How to use the fsspec.caching.BaseCache function in fsspec

To help you get started, we’ve selected a few fsspec examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github intake / filesystem_spec / fsspec / View on Github external
def __len__(self):
        return len(self.cache)

class AllBytes(object):
    """Cache entire contents of the file"""

    def __init__(self, data): = data

    def _fetch(self, start, end):

caches = {
    "none": BaseCache,
    "mmap": MMapCache,
    "bytes": BytesCache,
    "readahead": ReadAheadCache,
    "block": BlockCache,
github intake / filesystem_spec / fsspec / View on Github external
elif self.start <= start < self.end:
            # partial hit
            part = self.cache[start - self.start :]
            l -= len(part)
            start = self.end
            # miss
            part = b""
        end = min(self.size, end + self.blocksize)
        self.cache = self.fetcher(start, end)  # new block replaces old
        self.start = start
        self.end = self.start + len(self.cache)
        return part + self.cache[:l]

class BlockCache(BaseCache):
    Cache holding memory as a set of blocks.

    Requests are only ever made `blocksize` at a time, and are
    stored in an LRU cache. The least recently accessed block is
    discarded when more than `maxblocks` are stored.

    blocksize : int
        The number of bytes to store in each block.
        Requests are only ever made for `blocksize`, so this
        should balance the overhead of making a request against
        the granularity of the blocks.
    fetcher : Callable
    size : int
github intake / filesystem_spec / fsspec / View on Github external
def __init__(self, blocksize, fetcher, size):
        self.blocksize = blocksize
        self.fetcher = fetcher
        self.size = size

    def _fetch(self, start, stop):
        if start is None:
            start = 0
        if stop is None:
            stop = self.size
        if start >= self.size or start >= stop:
            return b""
        return self.fetcher(start, stop)

class MMapCache(BaseCache):
    """memory-mapped sparse file cache

    Opens temporary file, which is filled blocks-wise when data is requested.
    Ensure there is enough disc space in the temporary location.

    This cache method might only work on posix

    def __init__(self, blocksize, fetcher, size, location=None, blocks=None):
        super().__init__(blocksize, fetcher, size)
        self.blocks = set() if blocks is None else blocks
        self.location = location
        self.cache = self._makefile()

    def _makefile(self):
        import tempfile
github intake / filesystem_spec / fsspec / View on Github external
out = []

            # intermediate blocks
            # Note: it'd be nice to combine these into one big request. However
            # that doesn't play nicely with our LRU cache.
            for block_number in range(start_block_number + 1, end_block_number):

            # final block

            return b"".join(out)

class BytesCache(BaseCache):
    """Cache which holds data in a in-memory bytes object

    Implements read-ahead by the block size, for semi-random reads progressing
    through the file.

    trim: bool
        As we read more data, whether to discard the start of the buffer when
        we are more than a blocksize ahead of it.

    def __init__(self, blocksize, fetcher, size, trim=True):
        super().__init__(blocksize, fetcher, size)
        self.cache = b""
        self.start = None
github intake / filesystem_spec / fsspec / View on Github external
return self.cache[start:end]

    def __getstate__(self):
        state = self.__dict__.copy()
        # Remove the unpicklable entries.
        del state["cache"]
        return state

    def __setstate__(self, state):
        # Restore instance attributes
        self.cache = self._makefile()

class ReadAheadCache(BaseCache):
    """ Cache which reads only when we get beyond a block of data

    This is a much simpler version of BytesCache, and does not attempt to
    fill holes in the cache or keep fragments alive. It is best suited to
    many small reads in a sequential order (e.g., reading lines from a file).

    def __init__(self, blocksize, fetcher, size):
        super().__init__(blocksize, fetcher, size)
        self.cache = b""
        self.start = 0
        self.end = 0

    def _fetch(self, start, end):
        if start is None:
            start = 0