Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from fsspec import AbstractFileSystem
for method in async_methods + default_async_methods + dir(AsyncFileSystem):
if not method.startswith("_"):
continue
smethod = method[1:]
if private.match(method):
isco = inspect.iscoroutinefunction(getattr(obj, method, None))
unsync = getattr(getattr(obj, smethod, False), "__func__", None)
is_default = unsync is getattr(AbstractFileSystem, smethod, "")
if isco and is_default:
mth = sync_wrapper(getattr(obj, method), obj=obj)
setattr(obj, smethod, mth)
if not mth.__doc__:
mth.__doc__ = getattr(
getattr(AbstractFileSystem, smethod, None), "__doc__", ""
)
elif (
hasattr(obj, smethod)
and inspect.ismethod(getattr(obj, smethod))
and not hasattr(obj, method)
):
setattr(obj, method, async_wrapper(getattr(obj, smethod)))
import logging
import os
import hashlib
from shutil import move, rmtree
import tempfile
import inspect
from fsspec import AbstractFileSystem, filesystem
from fsspec.spec import AbstractBufferedFile
from fsspec.core import MMapCache, BaseCache
from fsspec.utils import infer_compression
from fsspec.compression import compr
logger = logging.getLogger("fsspec")
class CachingFileSystem(AbstractFileSystem):
"""Locally caching filesystem, layer over any other FS
This class implements chunk-wise local storage of remote files, for quick
access after the initial download. The files are stored in a given
directory with random hashes for the filenames. If no directory is given,
a temporary one is used, which should be cleaned up by the OS after the
process ends. The files themselves as sparse (as implemented in
MMapCache), so only the data which is accessed takes up space.
Restrictions:
- the block-size must be the same for each access of a given file, unless
all blocks of the file have already been read
- caching can only be applied to file-systems which produce files
derived from fsspec.spec.AbstractBufferedFile ; LocalFileSystem is also
allowed, for testing
from fsspec import AbstractFileSystem
from fsspec.spec import AbstractBufferedFile
from fsspec.utils import tokenize, DEFAULT_BLOCK_SIZE
from fsspec.asyn import sync_wrapper, sync, AsyncFileSystem, maybe_sync
from ..caching import AllBytes
# https://stackoverflow.com/a/15926317/3821154
ex = re.compile(r"""]*?\s+)?href=(["'])(.*?)\1""")
ex2 = re.compile(r"""(http[s]?://[-a-zA-Z0-9@:%_+.~#?&/=]+)""")
async def get_client():
return aiohttp.ClientSession()
class HTTPFileSystem(AsyncFileSystem, AbstractFileSystem):
"""
Simple File-System for fetching data via HTTP(S)
``ls()`` is implemented by loading the parent page and doing a regex
match on the result. If simple_link=True, anything of the form
"http(s)://server.com/stuff?thing=other"; otherwise only links within
HTML href tags will be used.
"""
sep = "/"
def __init__(
self,
simple_links=True,
block_size=None,
same_scheme=True,
import paramiko
from stat import S_ISDIR, S_ISLNK
import types
import uuid
from .. import AbstractFileSystem
from ..utils import infer_storage_options
class SFTPFileSystem(AbstractFileSystem):
"""Files over SFTP/SSH
Peer-to-peer filesystem over SSH using paramiko.
Note: if using this with the ``open`` or ``open_files``, with full URLs,
there is no way to tell if a path is relative, so all paths are assumed
to be absolute.
"""
protocol = "sftp", "ssh"
def __init__(self, host, **ssh_kwargs):
"""
Parameters
----------
Uses the methods specified in
- async_methods: the set that an implementation is expected to provide
- default_async_methods: that can be derived from their sync version in
AbstractFileSystem
- AsyncFileSystem: async-specific default coroutines
"""
from fsspec import AbstractFileSystem
for method in async_methods + default_async_methods + dir(AsyncFileSystem):
if not method.startswith("_"):
continue
smethod = method[1:]
if private.match(method):
isco = inspect.iscoroutinefunction(getattr(obj, method, None))
unsync = getattr(getattr(obj, smethod, False), "__func__", None)
is_default = unsync is getattr(AbstractFileSystem, smethod, "")
if isco and is_default:
mth = sync_wrapper(getattr(obj, method), obj=obj)
setattr(obj, smethod, mth)
if not mth.__doc__:
mth.__doc__ = getattr(
getattr(AbstractFileSystem, smethod, None), "__doc__", ""
)
elif (
hasattr(obj, smethod)
and inspect.ismethod(getattr(obj, smethod))
and not hasattr(obj, method)
):
setattr(obj, method, async_wrapper(getattr(obj, smethod)))
from __future__ import print_function, division, absolute_import
from io import BytesIO
from fsspec import AbstractFileSystem
import logging
logger = logging.Logger("fsspec.memoryfs")
class MemoryFileSystem(AbstractFileSystem):
"""A filesystem based on a dict of BytesIO objects"""
store = {} # global
pseudo_dirs = []
protocol = "memory"
root_marker = ""
def ls(self, path, detail=False, **kwargs):
if path in self.store:
# there is a key with this exact name, but could also be directory
out = [
{
"name": path,
"size": self.store[path].getbuffer().nbytes,
"type": "file",
}
import datetime
import io
import os
import shutil
import posixpath
import re
import tempfile
from fsspec import AbstractFileSystem
from fsspec.utils import stringify_path
class LocalFileSystem(AbstractFileSystem):
"""Interface to files on local storage
Parameters
----------
auto_mkdirs: bool
Whether, when opening a file, the directory containing it should
be created (if it doesn't already exist). This is assumed by pyarrow
code.
"""
root_marker = "/"
protocol = "file"
def __init__(self, auto_mkdir=False, **kwargs):
super().__init__(**kwargs)
self.auto_mkdir = auto_mkdir
Windows Samba network shares by using package smbprotocol
"""
from stat import S_ISDIR, S_ISLNK
import datetime
import uuid
import smbclient
from .. import AbstractFileSystem
from ..utils import infer_storage_options
# ! pylint: disable=bad-continuation
class SMBFileSystem(AbstractFileSystem):
"""Allow reading and writing to Windows and Samba network shares.
When using `fsspec.open()` for getting a file-like object the URI
should be specified as this format:
`smb://workgroup;user:password@server:port/share/folder/file.csv`.
Example::
>>> import fsspec
>>> with fsspec.open('smb://myuser:mypassword@myserver.com/'
... 'share/folder/file.csv') as smbfile:
... df = pd.read_csv(smbfile, sep='|', header=None)
Note that you need to pass in a valid hostname or IP address for the host
component of the URL. Do not use the Windows/NetBIOS machine name for the
host component.
from __future__ import print_function, division, absolute_import
import zipfile
from fsspec import AbstractFileSystem, open_files
from fsspec.utils import tokenize, DEFAULT_BLOCK_SIZE
class ZipFileSystem(AbstractFileSystem):
"""Read contents of ZIP archive as a file-system
Keeps file object open while instance lives.
This class is pickleable, but not necessarily thread-safe
"""
root_marker = ""
def __init__(
self,
fo="",
mode="r",
target_protocol=None,
target_options=None,
block_size=DEFAULT_BLOCK_SIZE,
Validate filesystem argument and return an fsspec file system object
Args:
path: Path as a string
filesystem: Optional fsspec filesystem object to use to open the file. If not
provided, filesystem type is inferred from path
Returns:
fsspec file system
"""
if filesystem is None:
return fsspec.open(path).fs
else:
if isinstance(filesystem, (str, pathlib.Path)):
return fsspec.filesystem(str(filesystem))
elif isinstance(filesystem, fsspec.AbstractFileSystem):
return filesystem
else:
raise ValueError(
"Received invalid filesystem value with type: {typ}".format(
typ=type(filesystem)
)