How to use the fsspec.AbstractFileSystem function in fsspec

To help you get started, we’ve selected a few fsspec examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github intake / filesystem_spec / fsspec / asyn.py View on Github external
from fsspec import AbstractFileSystem

    for method in async_methods + default_async_methods + dir(AsyncFileSystem):
        if not method.startswith("_"):
            continue
        smethod = method[1:]
        if private.match(method):
            isco = inspect.iscoroutinefunction(getattr(obj, method, None))
            unsync = getattr(getattr(obj, smethod, False), "__func__", None)
            is_default = unsync is getattr(AbstractFileSystem, smethod, "")
            if isco and is_default:
                mth = sync_wrapper(getattr(obj, method), obj=obj)
                setattr(obj, smethod, mth)
                if not mth.__doc__:
                    mth.__doc__ = getattr(
                        getattr(AbstractFileSystem, smethod, None), "__doc__", ""
                    )
            elif (
                hasattr(obj, smethod)
                and inspect.ismethod(getattr(obj, smethod))
                and not hasattr(obj, method)
            ):
                setattr(obj, method, async_wrapper(getattr(obj, smethod)))
github intake / filesystem_spec / fsspec / implementations / cached.py View on Github external
import logging
import os
import hashlib
from shutil import move, rmtree
import tempfile
import inspect
from fsspec import AbstractFileSystem, filesystem
from fsspec.spec import AbstractBufferedFile
from fsspec.core import MMapCache, BaseCache
from fsspec.utils import infer_compression
from fsspec.compression import compr

logger = logging.getLogger("fsspec")


class CachingFileSystem(AbstractFileSystem):
    """Locally caching filesystem, layer over any other FS

    This class implements chunk-wise local storage of remote files, for quick
    access after the initial download. The files are stored in a given
    directory with random hashes for the filenames. If no directory is given,
    a temporary one is used, which should be cleaned up by the OS after the
    process ends. The files themselves as sparse (as implemented in
    MMapCache), so only the data which is accessed takes up space.

    Restrictions:

    - the block-size must be the same for each access of a given file, unless
      all blocks of the file have already been read
    - caching can only be applied to file-systems which produce files
      derived from fsspec.spec.AbstractBufferedFile ; LocalFileSystem is also
      allowed, for testing
github intake / filesystem_spec / fsspec / implementations / http.py View on Github external
from fsspec import AbstractFileSystem
from fsspec.spec import AbstractBufferedFile
from fsspec.utils import tokenize, DEFAULT_BLOCK_SIZE
from fsspec.asyn import sync_wrapper, sync, AsyncFileSystem, maybe_sync
from ..caching import AllBytes

# https://stackoverflow.com/a/15926317/3821154
ex = re.compile(r"""]*?\s+)?href=(["'])(.*?)\1""")
ex2 = re.compile(r"""(http[s]?://[-a-zA-Z0-9@:%_+.~#?&/=]+)""")


async def get_client():
    return aiohttp.ClientSession()


class HTTPFileSystem(AsyncFileSystem, AbstractFileSystem):
    """
    Simple File-System for fetching data via HTTP(S)

    ``ls()`` is implemented by loading the parent page and doing a regex
    match on the result. If simple_link=True, anything of the form
    "http(s)://server.com/stuff?thing=other"; otherwise only links within
    HTML href tags will be used.
    """

    sep = "/"

    def __init__(
        self,
        simple_links=True,
        block_size=None,
        same_scheme=True,
github intake / filesystem_spec / fsspec / implementations / sftp.py View on Github external
import paramiko
from stat import S_ISDIR, S_ISLNK
import types
import uuid
from .. import AbstractFileSystem
from ..utils import infer_storage_options


class SFTPFileSystem(AbstractFileSystem):
    """Files over SFTP/SSH

    Peer-to-peer filesystem over SSH using paramiko.

    Note: if using this with the ``open`` or ``open_files``, with full URLs,
    there is no way to tell if a path is relative, so all paths are assumed
    to be absolute.
    """

    protocol = "sftp", "ssh"

    def __init__(self, host, **ssh_kwargs):
        """

        Parameters
        ----------
github intake / filesystem_spec / fsspec / asyn.py View on Github external
Uses the methods specified in
    - async_methods: the set that an implementation is expected to provide
    - default_async_methods: that can be derived from their sync version in
      AbstractFileSystem
    - AsyncFileSystem: async-specific default coroutines
    """
    from fsspec import AbstractFileSystem

    for method in async_methods + default_async_methods + dir(AsyncFileSystem):
        if not method.startswith("_"):
            continue
        smethod = method[1:]
        if private.match(method):
            isco = inspect.iscoroutinefunction(getattr(obj, method, None))
            unsync = getattr(getattr(obj, smethod, False), "__func__", None)
            is_default = unsync is getattr(AbstractFileSystem, smethod, "")
            if isco and is_default:
                mth = sync_wrapper(getattr(obj, method), obj=obj)
                setattr(obj, smethod, mth)
                if not mth.__doc__:
                    mth.__doc__ = getattr(
                        getattr(AbstractFileSystem, smethod, None), "__doc__", ""
                    )
            elif (
                hasattr(obj, smethod)
                and inspect.ismethod(getattr(obj, smethod))
                and not hasattr(obj, method)
            ):
                setattr(obj, method, async_wrapper(getattr(obj, smethod)))
github intake / filesystem_spec / fsspec / implementations / memory.py View on Github external
from __future__ import print_function, division, absolute_import

from io import BytesIO
from fsspec import AbstractFileSystem
import logging

logger = logging.Logger("fsspec.memoryfs")


class MemoryFileSystem(AbstractFileSystem):
    """A filesystem based on a dict of BytesIO objects"""

    store = {}  # global
    pseudo_dirs = []
    protocol = "memory"
    root_marker = ""

    def ls(self, path, detail=False, **kwargs):
        if path in self.store:
            # there is a key with this exact name, but could also be directory
            out = [
                {
                    "name": path,
                    "size": self.store[path].getbuffer().nbytes,
                    "type": "file",
                }
github intake / filesystem_spec / fsspec / implementations / local.py View on Github external
import datetime
import io
import os
import shutil
import posixpath
import re
import tempfile
from fsspec import AbstractFileSystem
from fsspec.utils import stringify_path


class LocalFileSystem(AbstractFileSystem):
    """Interface to files on local storage

    Parameters
    ----------
    auto_mkdirs: bool
        Whether, when opening a file, the directory containing it should
        be created (if it doesn't already exist). This is assumed by pyarrow
        code.
    """

    root_marker = "/"
    protocol = "file"

    def __init__(self, auto_mkdir=False, **kwargs):
        super().__init__(**kwargs)
        self.auto_mkdir = auto_mkdir
github intake / filesystem_spec / fsspec / implementations / smb.py View on Github external
Windows Samba network shares by using package smbprotocol
"""

from stat import S_ISDIR, S_ISLNK
import datetime
import uuid

import smbclient

from .. import AbstractFileSystem
from ..utils import infer_storage_options

# ! pylint: disable=bad-continuation


class SMBFileSystem(AbstractFileSystem):
    """Allow reading and writing to Windows and Samba network shares.

    When using `fsspec.open()` for getting a file-like object the URI
    should be specified as this format:
    `smb://workgroup;user:password@server:port/share/folder/file.csv`.

    Example::
        >>> import fsspec
        >>> with fsspec.open('smb://myuser:mypassword@myserver.com/'
        ...                  'share/folder/file.csv') as smbfile:
        ...     df = pd.read_csv(smbfile, sep='|', header=None)

    Note that you need to pass in a valid hostname or IP address for the host
    component of the URL. Do not use the Windows/NetBIOS machine name for the
    host component.
github intake / filesystem_spec / fsspec / implementations / zip.py View on Github external
from __future__ import print_function, division, absolute_import

import zipfile
from fsspec import AbstractFileSystem, open_files
from fsspec.utils import tokenize, DEFAULT_BLOCK_SIZE


class ZipFileSystem(AbstractFileSystem):
    """Read contents of ZIP archive as a file-system

    Keeps file object open while instance lives.

    This class is pickleable, but not necessarily thread-safe
    """

    root_marker = ""

    def __init__(
        self,
        fo="",
        mode="r",
        target_protocol=None,
        target_options=None,
        block_size=DEFAULT_BLOCK_SIZE,
github holoviz / spatialpandas / spatialpandas / io / utils.py View on Github external
Validate filesystem argument and return an fsspec file system object

    Args:
        path: Path as a string
        filesystem: Optional fsspec filesystem object to use to open the file. If not
            provided, filesystem type is inferred from path

    Returns:
        fsspec file system
    """
    if filesystem is None:
        return fsspec.open(path).fs
    else:
        if isinstance(filesystem, (str, pathlib.Path)):
            return fsspec.filesystem(str(filesystem))
        elif isinstance(filesystem, fsspec.AbstractFileSystem):
            return filesystem
        else:
            raise ValueError(
                "Received invalid filesystem value with type: {typ}".format(
                    typ=type(filesystem)
                )