Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from ..spec import AbstractFileSystem
from ..utils import infer_storage_options
from pyarrow.hdfs import HadoopFileSystem
class PyArrowHDFS(AbstractFileSystem):
"""Adapted version of Arrow's HadoopFileSystem
This is a very simple wrapper over pa.hdfs.HadoopFileSystem, which
passes on all calls to the underlying class.
"""
def __init__(
self,
host="default",
port=0,
user=None,
kerb_ticket=None,
driver="libhdfs",
extra_conf=None,
**kwargs
):
from ftplib import FTP, Error, error_perm
import uuid
from ..spec import AbstractBufferedFile, AbstractFileSystem
from ..utils import infer_storage_options
class FTPFileSystem(AbstractFileSystem):
"""A filesystem over classic """
root_marker = "/"
cachable = False
protocol = "ftp"
def __init__(
self,
host,
port=21,
username=None,
password=None,
acct=None,
block_size=None,
tempdir="/tmp",
timeout=30,
import requests
import dropbox
from ..spec import AbstractFileSystem, AbstractBufferedFile
class DropboxDriveFileSystem(AbstractFileSystem):
""" Interface dropbox to connect, list and manage files
Parameters:
----------
token : str
Generated key by adding a dropbox app in the user dropbox account.
Needs to be done by the user
"""
def __init__(self, **storage_options):
super().__init__(**storage_options)
self.token = storage_options["token"]
self.kwargs = storage_options
self.connect()
def connect(self):
# https://hadoop.apache.org/docs/r1.0.4/webhdfs.html
import requests
from urllib.parse import quote
import uuid
from ..spec import AbstractFileSystem, AbstractBufferedFile
from ..utils import infer_storage_options
import logging
logger = logging.getLogger("webhdfs")
class WebHDFS(AbstractFileSystem):
"""
Interface to HDFS over HTTP using the WebHDFS API. Supports also HttpFS gateways.
Three auth mechanisms are supported:
insecure: no auth is done, and the user is assumed to be whoever they
say they are (parameter `user`), or a predefined value such as
"dr.who" if not given
spnego: when kerberos authentication is enabled, auth is negotiated by
requests_kerberos https://github.com/requests/requests-kerberos .
This establishes a session based on existing kinit login and/or
specified principal/password; paraneters are passed with ``kerb_kwargs``
token: uses an existing Hadoop delegation token from another secured
service. Indeed, this client can also generate such tokens when
not insecure. Note that tokens expire, but can be renewed (by a
previously specified user) and may allow for proxying.
host: str
Hostname, IP or "default" to try to read from Hadoop config
port: int
Port to connect on, or default from Hadoop config if 0
user: str or None
If given, connect as this username
kerb_ticket: str or None
If given, use this ticket for authentication
driver: 'libhdfs' or 'libhdfs3'
Binary driver; libhdfs if the JNI library and default
extra_conf: None or dict
Passed on to HadoopFileSystem
"""
if self._cached:
return
AbstractFileSystem.__init__(self, **kwargs)
self.pars = (host, port, user, kerb_ticket, driver, extra_conf)
self.pahdfs = HadoopFileSystem(
host=host,
port=port,
user=user,
kerb_ticket=kerb_ticket,
driver=driver,
extra_conf=extra_conf,
)
import requests
from ..spec import AbstractFileSystem
from ..utils import infer_storage_options
from .memory import MemoryFile
class GithubFileSystem(AbstractFileSystem):
"""Interface to files in github
An instance of this class provides the files residing within a remote github
repository. You may specify a point in the repos history, by SHA, branch
or tag (default is current master).
Given that code files tend to be small, and that github does not support
retrieving partial content, we always fetch whole files.
When using fsspec.open, allows URIs of the form:
- "github://path/file", in which case you must specify org, repo and
may specify sha in the extra args
- 'github://org:repo@/precip/catalog.yml', where the org and repo are
part of the URI
- 'github://org:repo@sha/precip/catalog.yml', where tha sha is also included
from distributed.worker import get_worker
from distributed.client import _get_global_client
import dask
from fsspec.spec import AbstractFileSystem, AbstractBufferedFile
from fsspec import filesystem
def make_instance(cls, args, kwargs):
inst = cls(*args, **kwargs)
inst._determine_worker()
return inst
class DaskWorkerFileSystem(AbstractFileSystem):
"""View files accessible to a worker as any other remote file-system
When instances are run on the worker, uses the real filesystem. When
run on the client, they call the worker to provide information or data.
**Warning** this implementation is experimental, and read-only for now.
"""
def __init__(self, remote_protocol, remote_options=None, **kwargs):
super().__init__(**kwargs)
self.protocol = remote_protocol
self.remote_options = remote_options
self.worker = None
self.client = None
self.fs = None # What is the type here?
self._determine_worker()
import pygit2
from fsspec.spec import AbstractFileSystem
from .memory import MemoryFile
import os
class GitFileSystem(AbstractFileSystem):
"""Browse the files of a local git repo at any hash/tag/branch
(experimental backend)
"""
root_marker = ""
def __init__(self, path=None, ref=None, **kwargs):
"""
Parameters
----------
path: str (optional)
Local location of the repo (uses current directory if not given)
ref: str (optional)
Reference to work with, could be a hash, tag or branch name. Defaults
# the sync methods below all call expand_path, which in turn may call walk or glob
# (if passed paths with glob characters, or for recursive=True, respectively)
default_async_methods = [
"_expand_path",
"_info",
"_isfile",
"_isdir",
"_exists",
"_walk",
"_glob",
"_find",
"_du",
]
class AsyncFileSystem(AbstractFileSystem):
"""Async file operations, default implementations
Passes bulk operations to asyncio.gather for concurrent operation.
Implementations that have concurrent batch operations and/or async methods
should inherit from this class instead of AbstractFileSystem. Docstrings are
copied from the un-underscored method in AbstractFileSystem, if not given.
"""
# note that methods do not have docstring here; they will be copied
# for _* methods and inferred for overridden methods.
async_impl = True
def __init__(self, *args, asynchronous=False, loop=None, **kwargs):
self.asynchronous = asynchronous
import os
from fsspec.spec import AbstractFileSystem
from fsspec.implementations.local import LocalFileSystem
import dvc.repo
lfs = LocalFileSystem()
class DVCFileSystem(AbstractFileSystem):
"""DVC backend (experimental)
Load data files that are versioned using the `Data Version Control`_ system
.. _Data Version Control: https://dvc.org/
This interface is incomplete and experimental.
"""
root_marker = ""
def __init__(self, path=None, **kwargs):
"""
Parameters
----------