How to use the fastparquet.write function in fastparquet

To help you get started, we’ve selected a few fastparquet examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github danielhrisca / asammdf / asammdf / mdf.py View on Github external
if format == "7.3":

                savemat(
                    str(name),
                    mdict,
                    long_field_names=True,
                    format="7.3",
                    delete_unused_variables=False,
                    oned_as=oned_as,
                )
            else:
                savemat(str(name), mdict, long_field_names=True, oned_as=oned_as)

        elif fmt == "parquet":
            name = name.with_suffix(".parquet")
            write_parquet(name, df)

        else:
            message = (
                'Unsopported export type "{}". '
                'Please select "csv", "excel", "hdf5", "mat" or "pandas"'
            )
            message.format(fmt)
            logger.warning(message)
github holoviz / datashader / examples / pcap_to_parquet.py View on Github external
for i, key in enumerate(traffic):
            edge = [i, nodes[key[1]], nodes[key[2]], key[0], traffic[key]]
            edges.append(edge)

        nodes_df = pd.DataFrame(np.arange(len(nodes)), columns=['id'])
        nodes_df = nodes_df.set_index('id')

        edges_df = pd.DataFrame(np.array(edges), columns=['id', 'source', 'target', 'protocol', 'weight'])
        edges_df = edges_df.set_index('id')
        edges_df['source'] = pd.to_numeric(edges_df['source'])
        edges_df['target'] = pd.to_numeric(edges_df['target'])
        edges_df['weight'] = pd.to_numeric(edges_df['weight'])
        edges_df['protocol'] = edges_df['protocol'].astype('category')

        fp.write('{}_nodes.parq'.format(prefix), nodes_df)
        fp.write('{}_edges.parq'.format(prefix), edges_df)
github ratal / mdfreader / mdfreader / mdfreader.py View on Github external
----------------
        file_name : str, optional
            file name. If no name defined, it will use original mdf name and path with .parquet extension
        """
        try:
            from fastparquet import write as write_parquet
        except ImportError:
            warn('fastparquet not installed')
            return
        if file_name is None:
            file_name = splitext(self.fileName)[0]
            file_name = file_name + '.parquet'
        for master_channel_name in self.masterChannelList:
            frame = self.return_pandas_dataframe(master_channel_name)
            if frame is not None:
                write_parquet(file_name, frame, compression='GZIP')
github holoviz / datashader / examples / pcap_to_parquet.py View on Github external
edges = []
        for i, key in enumerate(traffic):
            edge = [i, nodes[key[1]], nodes[key[2]], key[0], traffic[key]]
            edges.append(edge)

        nodes_df = pd.DataFrame(np.arange(len(nodes)), columns=['id'])
        nodes_df = nodes_df.set_index('id')

        edges_df = pd.DataFrame(np.array(edges), columns=['id', 'source', 'target', 'protocol', 'weight'])
        edges_df = edges_df.set_index('id')
        edges_df['source'] = pd.to_numeric(edges_df['source'])
        edges_df['target'] = pd.to_numeric(edges_df['target'])
        edges_df['weight'] = pd.to_numeric(edges_df['weight'])
        edges_df['protocol'] = edges_df['protocol'].astype('category')

        fp.write('{}_nodes.parq'.format(prefix), nodes_df)
        fp.write('{}_edges.parq'.format(prefix), edges_df)
github JGCRI / xanthos / xanthos / data_writer / out_writer.py View on Github external
def save_parquet(self, filename, df, col_names=None):
        """Write pandas DataFrame to parquet file."""
        from fastparquet import write as fp_write

        filename += ".parquet"
        append = os.path.exists(filename)

        if col_names is not None:
            df.columns = col_names

        fp_write(filename, df, row_group_offsets=len(df), compression="GZIP", file_scheme='hive', has_nulls=False, append=append)
github aws / lumberyard / dev / Gems / CloudGemMetric / v1 / AWS / common-code / ParquetUtils / parquet_writer.py View on Github external
def append(bucket, key1, key2, s3, output_filename):  
    s3_open = s3.open
    path1='{}{}'.format(bucket,key1)   
    pf1 = ParquetFile(path1, open_with=s3_open)
    df1=pf1.to_pandas()
    path2='{}{}'.format(bucket,key2)   
    pf2 = ParquetFile(path2, open_with=s3_open)
    df2=pf2.to_pandas()            
    data = df1.append(df2) 
    
    pwrite('{}{}'.format(bucket,output_filename), data, open_with=s3_open, compression='GZIP', append=False, has_nulls=True)    
github JGCRI / xanthos / xanthos / data_writer / out_writer.py View on Github external
def save_parquet(self, filename, df, col_names=None):
        """Write pandas DataFrame to parquet file."""
        from fastparquet import write as fp_write

        filename += ".parquet"
        append = os.path.exists(filename)

        if col_names is not None:
            df.columns = col_names

        fp_write(filename, df, row_group_offsets=len(df), file_scheme='hive', has_nulls=False, append=append)
github kylebarron / medicare_utils / medicare_utils / parquet.py View on Github external
"""
        print(_mywrap(msg))

        if parquet_engine == 'pyarrow':
            if i == 1:
                if manual_schema:
                    schema = _create_parquet_schema(df.dtypes)
                else:
                    schema = pa.Table.from_pandas(
                        df, preserve_index=False).schema
                writer = pq.ParquetWriter(outfile, schema, flavor='spark')

            writer.write_table(pa.Table.from_pandas(df, preserve_index=False))
        elif parquet_engine == 'fastparquet':
            if i == 1:
                fp.write(
                    outfile,
                    df,
                    compression=compression_type,
                    has_nulls=False,
                    write_index=False,
                    object_encoding='utf8')
            else:
                fp.write(
                    outfile,
                    df,
                    compression=compression_type,
                    has_nulls=False,
                    write_index=False,
                    object_encoding='utf8',
                    append=True)