Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _strip_protocol(cls, path):
""" Turn path from fully-qualified to file-system-specific
May require FS-specific handling, e.g., for relative paths or links.
"""
path = stringify_path(path)
protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
for protocol in protos:
path = path.rstrip("/")
if path.startswith(protocol + "://"):
path = path[len(protocol) + 3 :]
elif path.startswith(protocol + ":"):
path = path[len(protocol) + 1 :]
# use of root_marker to make minimum required path, e.g., "/"
return path or cls.root_marker
If opening in writing mode, number of files we expect to create.
name_function: callable, optional
If opening in writing mode, this callable is used to generate path
names. Names are generated for each partition by
``urlpath.replace('*', name_function(partition_index))``.
storage_options: dict, optional
Additional keywords to pass to the filesystem class.
protocol: str or None
To override the protocol specifier in the URL
expand: bool
Expand string paths for writing, assuming the path is a directory
"""
if isinstance(urlpath, (list, tuple, set)):
urlpath = [stringify_path(u) for u in urlpath]
else:
urlpath = stringify_path(urlpath)
chain = _un_chain(urlpath, storage_options or {})
if len(chain) > 1:
storage_options = chain[0][2]
inkwargs = storage_options
urlpath = False
for i, ch in enumerate(chain):
urls, protocol, kw = ch
if isinstance(urls, str):
if not urlpath and split_protocol(urls)[1]:
urlpath = protocol + "://" + split_protocol(urls)[1]
else:
if not urlpath and any(split_protocol(u)[1] for u in urls):
urlpath = [protocol + "://" + split_protocol(u)[1] for u in urls]
if i == 0:
continue
inkwargs["target_protocol"] = protocol
def _strip_protocol(cls, path):
path = stringify_path(path)
if path.startswith("file://"):
path = path[7:]
path = os.path.expanduser(path)
return make_path_posix(path)
partition_on = partition_on or []
if isinstance(partition_on, str):
partition_on = [partition_on]
if set(partition_on) - set(df.columns):
raise ValueError(
"Partitioning on non-existent column. "
"partition_on=%s ."
"columns=%s" % (str(partition_on), str(list(df.columns)))
)
if isinstance(engine, str):
engine = get_engine(engine)
if hasattr(path, "name"):
path = stringify_path(path)
fs, _, _ = get_fs_token_paths(path, mode="wb", storage_options=storage_options)
# Trim any protocol information from the path before forwarding
path = fs._strip_protocol(path)
# Save divisions and corresponding index name. This is necessary,
# because we may be resetting the index to write the file
division_info = {"divisions": df.divisions, "name": df.index.name}
if division_info["name"] is None:
# As of 0.24.2, pandas will rename an index with name=None
# when df.reset_index() is called. The default name is "index",
# (or "level_0" if "index" is already a column name)
division_info["name"] = "index" if "index" not in df.columns else "level_0"
# If write_index==True (default), reset the index and record the
# name of the original index in `index_cols` (will be `index` if None,
# or `level_0` if `index` is already a column name).
name = "read-parquet-" + tokenize(
path,
columns,
filters,
categories,
index,
storage_options,
engine,
gather_statistics,
)
if isinstance(engine, str):
engine = get_engine(engine)
if hasattr(path, "name"):
path = stringify_path(path)
fs, _, paths = get_fs_token_paths(path, mode="rb", storage_options=storage_options)
paths = sorted(paths, key=natural_sort_key) # numeric rather than glob ordering
auto_index_allowed = False
if index is None:
# User is allowing auto-detected index
auto_index_allowed = True
if index and isinstance(index, str):
index = [index]
meta, statistics, parts = engine.read_metadata(
fs,
paths,
categories=categories,
index=index,