How to use the fastparquet.util.check_column_names function in fastparquet

To help you get started, we’ve selected a few fastparquet examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dask / dask / dask / dataframe / io / parquet.py View on Github external
def _pf_validation(pf, columns, index, categories, filters):
    """Validate user options against metadata in dataset

     columns, index and categories must be in the list of columns available
     (both data columns and path-based partitioning - subject to possible
     renaming, if pandas metadata is present). The output index will
     be inferred from any available pandas metadata, if not given.
     """
    from fastparquet.util import check_column_names

    check_column_names(pf.columns, categories)
    check_column_names(pf.columns + list(pf.cats or []), columns)
    if isinstance(columns, tuple):
        # ensure they tokenize the same
        columns = list(columns)

    if pf.fmd.key_value_metadata:
        pandas_md = [x.value for x in pf.fmd.key_value_metadata if x.key == "pandas"]
    else:
        pandas_md = []

    if len(pandas_md) == 0:
        # Fall back to the storage information
        index_names = pf._get_index()
        if not isinstance(index_names, list):
            index_names = [index_names]
        column_names = pf.columns + list(pf.cats)
        storage_name_mapping = {k: k for k in column_names}
github dask / dask / dask / dataframe / io / parquet.py View on Github external
def _pf_validation(pf, columns, index, categories, filters):
    """Validate user options against metadata in dataset

     columns, index and categories must be in the list of columns available
     (both data columns and path-based partitioning - subject to possible
     renaming, if pandas metadata is present). The output index will
     be inferred from any available pandas metadata, if not given.
     """
    from fastparquet.util import check_column_names

    check_column_names(pf.columns, categories)
    check_column_names(pf.columns + list(pf.cats or []), columns)
    if isinstance(columns, tuple):
        # ensure they tokenize the same
        columns = list(columns)

    if pf.fmd.key_value_metadata:
        pandas_md = [x.value for x in pf.fmd.key_value_metadata if x.key == "pandas"]
    else:
        pandas_md = []

    if len(pandas_md) == 0:
        # Fall back to the storage information
        index_names = pf._get_index()
        if not isinstance(index_names, list):
            index_names = [index_names]
        column_names = pf.columns + list(pf.cats)