Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
("file:///tmp/test.json", LocalFileSystem),
("/tmp/test.json", LocalFileSystem),
("gcs://bucket/file.json", GCSFileSystem),
],
)
def test_protocol_usage(self, filepath, instance_type):
data_set = JSONDataSet(filepath=filepath)
assert isinstance(data_set._fs, instance_type)
assert str(data_set._filepath) == data_set._fs._strip_protocol(filepath)
assert isinstance(data_set._filepath, PurePosixPath)
def make_path_posix(path, sep=os.sep):
""" Make path generic """
path = LocalFileSystem._strip_protocol(path)
if re.match('/[A-Za-z]:', path):
# for windows file URI like "file:///C:/folder/file"
# or "file:///C:\\dir\\file"
path = path[1:]
if path.startswith('\\') or re.match("[\\\\]*[A-Za-z]:", path):
# windows full path "\\server\\path" or "C:\\local\\path"
return path.lstrip('\\').replace('\\', '/').replace('//', '/')
if (
sep not in path and '/' not in path
or (sep == '/' and not path.startswith('/'))
or (sep == '\\' and ":" not in path)
):
# relative path like "path" or "rel\\path" (win) or rel/path"
path = os.path.abspath(path)
if os.sep == "\\":
# abspath made some more '\\' separators
import os
from fsspec.spec import AbstractFileSystem
from fsspec.implementations.local import LocalFileSystem
import dvc.repo
lfs = LocalFileSystem()
class DVCFileSystem(AbstractFileSystem):
"""DVC backend (experimental)
Load data files that are versioned using the `Data Version Control`_ system
.. _Data Version Control: https://dvc.org/
This interface is incomplete and experimental.
"""
root_marker = ""
def __init__(self, path=None, **kwargs):
"""
header_first_partition_only = single_file
elif not header_first_partition_only and single_file:
raise ValueError(
"header_first_partition_only cannot be False in the single file mode."
)
file_options = dict(
compression=compression,
encoding=encoding,
newline="",
**(storage_options or {})
)
to_csv_chunk = delayed(_write_csv, pure=False)
dfs = df.to_delayed()
if single_file:
first_file = open_file(filename, mode=mode, **file_options)
if not isinstance(first_file.fs, fsspec.implementations.local.LocalFileSystem):
warn("Appending data to a network storage system may not work.")
value = to_csv_chunk(dfs[0], first_file, **kwargs)
append_mode = mode.replace("w", "") + "a"
append_file = open_file(filename, mode=append_mode, **file_options)
kwargs["header"] = False
for d in dfs[1:]:
value = to_csv_chunk(d, append_file, depend_on=value, **kwargs)
values = [value]
files = [first_file]
else:
files = open_files(
filename,
mode=mode,
name_function=name_function,
num=df.npartitions,
**file_options
def get(self, path1, path2, **kwargs):
""" Copy file to local """
if kwargs.get("recursive"):
return super(LocalFileSystem, self).get(path1, path2, **kwargs)
else:
return self.copy(path1, path2, **kwargs)
def _maybe_add_rm(fs):
# monkey-path local filesystem
# this goes away if we can use fsspec's local file-system
from fsspec.implementations.local import LocalFileSystem
if isinstance(fs, LocalFileSystem):
def rm(path, recursive=False):
if recursive:
import shutil
shutil.rmtree(path)
else:
import os
os.remove(path)
fs.rm = rm
def put(self, lpath, rpath, recursive=False, **kwargs):
"""Copy file(s) from local.
Copies a specific file or tree of files (if recursive=True). If rpath
ends with a "/", it will be assumed to be a directory, and target files
will go within.
Calls put_file for each source.
"""
from .implementations.local import make_path_posix, LocalFileSystem
rpath = self._strip_protocol(rpath)
if isinstance(lpath, str):
lpath = make_path_posix(lpath)
fs = LocalFileSystem()
lpaths = fs.expand_path(lpath, recursive=recursive)
rpaths = other_paths(lpaths, rpath)
for lpath, rpath in zip(lpaths, rpaths):
self.put_file(lpath, rpath, **kwargs)
def put(self, lpath, rpath, recursive=False, **kwargs):
from .implementations.local import make_path_posix, LocalFileSystem
rpath = self._strip_protocol(rpath)
if isinstance(lpath, str):
lpath = make_path_posix(lpath)
fs = LocalFileSystem()
lpaths = fs.expand_path(lpath, recursive=recursive)
rpaths = other_paths(lpaths, rpath)
sync(self.loop, self._put, lpaths, rpaths, **kwargs)
def get(self, path1, path2, **kwargs):
if kwargs.get("recursive"):
return super(LocalFileSystem, self).get(path1, path2, **kwargs)
else:
return self.copy(path1, path2, **kwargs)