How to use the fsspec.utils.stringify_path function in fsspec

To help you get started, we’ve selected a few fsspec examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github intake / filesystem_spec / fsspec / spec.py View on Github external
def _strip_protocol(cls, path):
        """ Turn path from fully-qualified to file-system-specific

        May require FS-specific handling, e.g., for relative paths or links.
        """
        path = stringify_path(path)
        protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
        for protocol in protos:
            path = path.rstrip("/")
            if path.startswith(protocol + "://"):
                path = path[len(protocol) + 3 :]
            elif path.startswith(protocol + ":"):
                path = path[len(protocol) + 1 :]
        # use of root_marker to make minimum required path, e.g., "/"
        return path or cls.root_marker
github intake / filesystem_spec / fsspec / core.py View on Github external
If opening in writing mode, number of files we expect to create.
    name_function: callable, optional
        If opening in writing mode, this callable is used to generate path
        names. Names are generated for each partition by
        ``urlpath.replace('*', name_function(partition_index))``.
    storage_options: dict, optional
        Additional keywords to pass to the filesystem class.
    protocol: str or None
        To override the protocol specifier in the URL
    expand: bool
        Expand string paths for writing, assuming the path is a directory
    """
    if isinstance(urlpath, (list, tuple, set)):
        urlpath = [stringify_path(u) for u in urlpath]
    else:
        urlpath = stringify_path(urlpath)
    chain = _un_chain(urlpath, storage_options or {})
    if len(chain) > 1:
        storage_options = chain[0][2]
        inkwargs = storage_options
        urlpath = False
        for i, ch in enumerate(chain):
            urls, protocol, kw = ch
            if isinstance(urls, str):
                if not urlpath and split_protocol(urls)[1]:
                    urlpath = protocol + "://" + split_protocol(urls)[1]
            else:
                if not urlpath and any(split_protocol(u)[1] for u in urls):
                    urlpath = [protocol + "://" + split_protocol(u)[1] for u in urls]
            if i == 0:
                continue
            inkwargs["target_protocol"] = protocol
github intake / filesystem_spec / fsspec / implementations / local.py View on Github external
def _strip_protocol(cls, path):
        path = stringify_path(path)
        if path.startswith("file://"):
            path = path[7:]
        path = os.path.expanduser(path)
        return make_path_posix(path)
github dask / dask / dask / dataframe / io / parquet / core.py View on Github external
partition_on = partition_on or []
    if isinstance(partition_on, str):
        partition_on = [partition_on]

    if set(partition_on) - set(df.columns):
        raise ValueError(
            "Partitioning on non-existent column. "
            "partition_on=%s ."
            "columns=%s" % (str(partition_on), str(list(df.columns)))
        )

    if isinstance(engine, str):
        engine = get_engine(engine)

    if hasattr(path, "name"):
        path = stringify_path(path)
    fs, _, _ = get_fs_token_paths(path, mode="wb", storage_options=storage_options)
    # Trim any protocol information from the path before forwarding
    path = fs._strip_protocol(path)

    # Save divisions and corresponding index name. This is necessary,
    # because we may be resetting the index to write the file
    division_info = {"divisions": df.divisions, "name": df.index.name}
    if division_info["name"] is None:
        # As of 0.24.2, pandas will rename an index with name=None
        # when df.reset_index() is called.  The default name is "index",
        # (or "level_0" if "index" is already a column name)
        division_info["name"] = "index" if "index" not in df.columns else "level_0"

    # If write_index==True (default), reset the index and record the
    # name of the original index in `index_cols` (will be `index` if None,
    # or `level_0` if `index` is already a column name).
github dask / dask / dask / dataframe / io / parquet / core.py View on Github external
name = "read-parquet-" + tokenize(
        path,
        columns,
        filters,
        categories,
        index,
        storage_options,
        engine,
        gather_statistics,
    )

    if isinstance(engine, str):
        engine = get_engine(engine)

    if hasattr(path, "name"):
        path = stringify_path(path)
    fs, _, paths = get_fs_token_paths(path, mode="rb", storage_options=storage_options)

    paths = sorted(paths, key=natural_sort_key)  # numeric rather than glob ordering

    auto_index_allowed = False
    if index is None:
        # User is allowing auto-detected index
        auto_index_allowed = True
    if index and isinstance(index, str):
        index = [index]

    meta, statistics, parts = engine.read_metadata(
        fs,
        paths,
        categories=categories,
        index=index,