How to use the fsspec.spec.AbstractFileSystem function in fsspec

To help you get started, we’ve selected a few fsspec examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github intake / filesystem_spec / fsspec / implementations / hdfs.py View on Github external
from ..spec import AbstractFileSystem
from ..utils import infer_storage_options
from pyarrow.hdfs import HadoopFileSystem


class PyArrowHDFS(AbstractFileSystem):
    """Adapted version of Arrow's HadoopFileSystem

    This is a very simple wrapper over pa.hdfs.HadoopFileSystem, which
    passes on all calls to the underlying class.
    """

    def __init__(
        self,
        host="default",
        port=0,
        user=None,
        kerb_ticket=None,
        driver="libhdfs",
        extra_conf=None,
        **kwargs
    ):
github intake / filesystem_spec / fsspec / implementations / ftp.py View on Github external
from ftplib import FTP, Error, error_perm
import uuid
from ..spec import AbstractBufferedFile, AbstractFileSystem
from ..utils import infer_storage_options


class FTPFileSystem(AbstractFileSystem):
    """A filesystem over classic """

    root_marker = "/"
    cachable = False
    protocol = "ftp"

    def __init__(
        self,
        host,
        port=21,
        username=None,
        password=None,
        acct=None,
        block_size=None,
        tempdir="/tmp",
        timeout=30,
github intake / filesystem_spec / fsspec / implementations / dropboxdrivefs.py View on Github external
import requests
import dropbox
from ..spec import AbstractFileSystem, AbstractBufferedFile


class DropboxDriveFileSystem(AbstractFileSystem):
    """ Interface dropbox to connect, list and manage files
    Parameters:
    ----------
    token : str
          Generated key by adding a dropbox app in the user dropbox account. 
          Needs to be done by the user

    """

    def __init__(self, **storage_options):
        super().__init__(**storage_options)
        self.token = storage_options["token"]
        self.kwargs = storage_options
        self.connect()

    def connect(self):
github intake / filesystem_spec / fsspec / implementations / webhdfs.py View on Github external
# https://hadoop.apache.org/docs/r1.0.4/webhdfs.html

import requests
from urllib.parse import quote
import uuid
from ..spec import AbstractFileSystem, AbstractBufferedFile
from ..utils import infer_storage_options
import logging

logger = logging.getLogger("webhdfs")


class WebHDFS(AbstractFileSystem):
    """
    Interface to HDFS over HTTP using the WebHDFS API. Supports also HttpFS gateways.

    Three auth mechanisms are supported:

    insecure: no auth is done, and the user is assumed to be whoever they
        say they are (parameter `user`), or a predefined value such as
        "dr.who" if not given
    spnego: when kerberos authentication is enabled, auth is negotiated by
        requests_kerberos https://github.com/requests/requests-kerberos .
        This establishes a session based on existing kinit login and/or
        specified principal/password; paraneters are passed with ``kerb_kwargs``
    token: uses an existing Hadoop delegation token from another secured
        service. Indeed, this client can also generate such tokens when
        not insecure. Note that tokens expire, but can be renewed (by a
        previously specified user) and may allow for proxying.
github intake / filesystem_spec / fsspec / implementations / hdfs.py View on Github external
host: str
            Hostname, IP or "default" to try to read from Hadoop config
        port: int
            Port to connect on, or default from Hadoop config if 0
        user: str or None
            If given, connect as this username
        kerb_ticket: str or None
            If given, use this ticket for authentication
        driver: 'libhdfs' or 'libhdfs3'
            Binary driver; libhdfs if the JNI library and default
        extra_conf: None or dict
            Passed on to HadoopFileSystem
        """
        if self._cached:
            return
        AbstractFileSystem.__init__(self, **kwargs)
        self.pars = (host, port, user, kerb_ticket, driver, extra_conf)
        self.pahdfs = HadoopFileSystem(
            host=host,
            port=port,
            user=user,
            kerb_ticket=kerb_ticket,
            driver=driver,
            extra_conf=extra_conf,
        )
github intake / filesystem_spec / fsspec / implementations / github.py View on Github external
import requests
from ..spec import AbstractFileSystem
from ..utils import infer_storage_options
from .memory import MemoryFile


class GithubFileSystem(AbstractFileSystem):
    """Interface to files in github

    An instance of this class provides the files residing within a remote github
    repository. You may specify a point in the repos history, by SHA, branch
    or tag (default is current master).

    Given that code files tend to be small, and that github does not support
    retrieving partial content, we always fetch whole files.

    When using fsspec.open, allows URIs of the form:

    - "github://path/file", in which case you must specify org, repo and
      may specify sha in the extra args
    - 'github://org:repo@/precip/catalog.yml', where the org and repo are
      part of the URI
    - 'github://org:repo@sha/precip/catalog.yml', where tha sha is also included
github intake / filesystem_spec / fsspec / implementations / dask.py View on Github external
from distributed.worker import get_worker
from distributed.client import _get_global_client
import dask
from fsspec.spec import AbstractFileSystem, AbstractBufferedFile
from fsspec import filesystem


def make_instance(cls, args, kwargs):
    inst = cls(*args, **kwargs)
    inst._determine_worker()
    return inst


class DaskWorkerFileSystem(AbstractFileSystem):
    """View files accessible to a worker as any other remote file-system

    When instances are run on the worker, uses the real filesystem. When
    run on the client, they call the worker to provide information or data.

    **Warning** this implementation is experimental, and read-only for now.
    """

    def __init__(self, remote_protocol, remote_options=None, **kwargs):
        super().__init__(**kwargs)
        self.protocol = remote_protocol
        self.remote_options = remote_options
        self.worker = None
        self.client = None
        self.fs = None  # What is the type here?
        self._determine_worker()
github intake / filesystem_spec / fsspec / implementations / git.py View on Github external
import pygit2
from fsspec.spec import AbstractFileSystem
from .memory import MemoryFile
import os


class GitFileSystem(AbstractFileSystem):
    """Browse the files of a local git repo at any hash/tag/branch

    (experimental backend)
    """

    root_marker = ""

    def __init__(self, path=None, ref=None, **kwargs):
        """

        Parameters
        ----------
        path: str (optional)
            Local location of the repo (uses current directory if not given)
        ref: str (optional)
            Reference to work with, could be a hash, tag or branch name. Defaults
github intake / filesystem_spec / fsspec / asyn.py View on Github external
# the sync methods below all call expand_path, which in turn may call walk or glob
# (if passed paths with glob characters, or for recursive=True, respectively)
default_async_methods = [
    "_expand_path",
    "_info",
    "_isfile",
    "_isdir",
    "_exists",
    "_walk",
    "_glob",
    "_find",
    "_du",
]


class AsyncFileSystem(AbstractFileSystem):
    """Async file operations, default implementations

    Passes bulk operations to asyncio.gather for concurrent operation.

    Implementations that have concurrent batch operations and/or async methods
    should inherit from this class instead of AbstractFileSystem. Docstrings are
    copied from the un-underscored method in AbstractFileSystem, if not given.
    """

    # note that methods do not have docstring here; they will be copied
    # for _* methods and inferred for overridden methods.

    async_impl = True

    def __init__(self, *args, asynchronous=False, loop=None, **kwargs):
        self.asynchronous = asynchronous
github intake / filesystem_spec / fsspec / implementations / dvc.py View on Github external
import os
from fsspec.spec import AbstractFileSystem
from fsspec.implementations.local import LocalFileSystem
import dvc.repo

lfs = LocalFileSystem()


class DVCFileSystem(AbstractFileSystem):
    """DVC backend (experimental)

    Load data files that are versioned using the `Data Version Control`_ system

    .. _Data Version Control: https://dvc.org/

    This interface is incomplete and experimental.
    """

    root_marker = ""

    def __init__(self, path=None, **kwargs):
        """

        Parameters
        ----------