How to use the fsspec.spec.AbstractBufferedFile function in fsspec

To help you get started, we’ve selected a few fsspec examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dask / gcsfs / gcsfs / core.py View on Github external
elif status == 502:
                raise ProxyError()
            elif "invalid" in str(msg):
                raise ValueError("Bad Request: %s\n%s" % (path, msg))
            elif error:
                raise HttpError(error)
            elif status:
                raise HttpError({"code": status})
            else:
                raise RuntimeError(msg)


GCSFileSystem.load_tokens()


class GCSFile(fsspec.spec.AbstractBufferedFile):
    def __init__(
        self,
        gcsfs,
        path,
        mode="rb",
        block_size=DEFAULT_BLOCK_SIZE,
        autocommit=True,
        cache_type="readahead",
        cache_options=None,
        acl=None,
        consistency="md5",
        metadata=None,
        content_type=None,
        **kwargs,
    ):
        """
github intake / filesystem_spec / fsspec / implementations / http.py View on Github external
try:
                size = await _file_size(
                    url, size_policy=policy, session=self.session, **self.kwargs
                )
                if size:
                    break
            except Exception:
                pass
        else:
            # get failed, so conclude URL does not exist
            if size is False:
                raise FileNotFoundError(url)
        return {"name": url, "size": size or None, "type": "file"}


class HTTPFile(AbstractBufferedFile):
    """
    A file-like object pointing to a remove HTTP(S) resource

    Supports only reading, with read-ahead of a predermined block-size.

    In the case that the server does not supply the filesize, only reading of
    the complete file in one go is supported.

    Parameters
    ----------
    url: str
        Full URL of the remote resource, including the protocol
    session: requests.Session or None
        All calls will be made within this session, to avoid restarting
        connections where the server allows this
    block_size: int or None
github intake / filesystem_spec / fsspec / compression.py View on Github external
register_compression("lzma", LZMAFile, "xz")
    register_compression("xz", LZMAFile, "xz", force=True)
except ImportError:
    pass

try:
    import lzmaffi

    register_compression("lzma", lzmaffi.LZMAFile, "xz", force=True)
    register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
except ImportError:
    pass


class SnappyFile(AbstractBufferedFile):
    def __init__(self, infile, mode, **kwargs):
        import snappy

        self.details = {"size": 999999999}  # not true, but OK if we don't seek
        super().__init__(fs=None, path="snappy", mode=mode.strip("b") + "b", **kwargs)
        self.infile = infile
        if "r" in mode:
            self.codec = snappy.StreamDecompressor()
        else:
            self.codec = snappy.StreamCompressor()

    def _upload_chunk(self, final=False):
        self.buffer.seek(0)
        out = self.codec.add_chunk(self.buffer.read())
        self.infile.write(out)
        return True
github intake / filesystem_spec / fsspec / implementations / dropboxdrivefs.py View on Github external
"""Get info of URL
        """
        metadata = self.dbx.files_get_metadata(url)
        if isinstance(metadata, dropbox.files.FileMetadata):
            return {
                "name": metadata.path_display,
                "size": metadata.size,
                "type": "file",
            }
        elif isinstance(metadata, dropbox.files.FolderMetadata):
            return {"name": metadata.path_display, "size": None, "type": "folder"}
        else:
            return {"name": url, "size": None, "type": "unknow"}


class DropboxDriveFile(AbstractBufferedFile):
    """ fetch_all, fetch_range, and read method are based from the http implementation
    """

    def __init__(
        self, fs, dbx, path, session=None, block_size=None, mode="rb", **kwargs
    ):
        """
        Open a file.
        Parameters
        ----------
        fs: instance of DropboxDriveFileSystem
        dbx : instance of dropbox
        session: requests.Session or None
                All calls will be made within this session, to avoid restarting connections
                where the server allows this
        path : str
github intake / filesystem_spec / fsspec / spec.py View on Github external
def _open(
        self,
        path,
        mode="rb",
        block_size=None,
        autocommit=True,
        cache_options=None,
        **kwargs
    ):
        """Return raw bytes-mode file-like from the file-system"""
        return AbstractBufferedFile(
            self,
            path,
            mode,
            block_size,
            autocommit,
            cache_options=cache_options,
            **kwargs
        )
github intake / filesystem_spec / fsspec / implementations / cached.py View on Github external
else:
            store_path = path
        path = self.fs._strip_protocol(store_path)
        if "r" not in mode:
            return LocalTempFile(self, path, mode=mode)
        fn = self._check_file(path)
        if fn:
            return open(fn, mode)

        sha = hash_name(path, self.same_names)
        fn = os.path.join(self.storage[-1], sha)
        logger.debug("Copying %s to local cache" % path)
        kwargs["mode"] = mode

        with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
            if isinstance(f, AbstractBufferedFile):
                # want no type of caching if just downloading whole thing
                f.cache = BaseCache(0, f.cache.fetcher, f.size)
            if getattr(f, "blocksize", 0) and f.size:
                # opportunity to parallelise here (if not compressed)
                if self.compression:
                    comp = (
                        infer_compression(path)
                        if self.compression == "infer"
                        else self.compression
                    )
                    f = compr[comp](f, mode="rb")
                data = True
                while data:
                    data = f.read(f.blocksize)
                    f2.write(data)
            else:
github intake / filesystem_spec / fsspec / implementations / dask.py View on Github external
block_size=block_size,
                autocommit=autocommit,
                cache_options=cache_options,
                **kwargs
            )

    def fetch_range(self, path, mode, start, end):
        if self.worker:
            with self._open(path, mode) as f:
                f.seek(start)
                return f.read(end - start)
        else:
            return self.rfs.fetch_range(path, mode, start, end).compute()


class DaskFile(AbstractBufferedFile):
    def _upload_chunk(self, final=False):
        pass

    def _initiate_upload(self):
        """ Create remote file/upload """
        pass

    def _fetch_range(self, start, end):
        """Get the specified set of bytes from remote"""
        return self.fs.fetch_range(self.path, self.mode, start, end)
github dask / gcsfs / gcsfs / core.py View on Github external
if block_size is None:
            block_size = self.default_block_size
        const = consistency or self.consistency
        return GCSFile(self, path, mode, block_size, consistency=const,
                       metadata=metadata, acl=acl, autocommit=autocommit)

    def __setstate__(self, state):
        self.__dict__.update(state)
        self.dircache = {}
        self.connect(self.token)


GCSFileSystem.load_tokens()


class GCSFile(fsspec.spec.AbstractBufferedFile):

    def __init__(self, gcsfs, path, mode='rb', block_size=DEFAULT_BLOCK_SIZE,
                 acl=None, consistency='md5', metadata=None,
                 autocommit=True):
        """
        Open a file.

        Parameters
        ----------
        gcsfs: instance of GCSFileSystem
        path: str
            location in GCS, like 'bucket/path/to/file'
        mode: str
            Normal file modes. Currently only 'wb' amd 'rb'.
        block_size: int
            Buffer size for reading or writing
github intake / filesystem_spec / fsspec / implementations / ftp.py View on Github external
path2 = self._strip_protocol(path2)
        self.ftp.rename(path1, path2)
        self.invalidate_cache(self._parent(path1))
        self.invalidate_cache(self._parent(path2))

    def __del__(self):
        self.ftp.close()


class TransferDone(Exception):
    """Internal exception to break out of transfer"""

    pass


class FTPFile(AbstractBufferedFile):
    """Interact with a remote FTP file with read/write buffering"""

    def __init__(
        self,
        fs,
        path,
        mode="rb",
        block_size="default",
        autocommit=True,
        cache_type="readahead",
        cache_options=None,
        **kwargs
    ):
        super().__init__(
            fs,
            path,