How to use the fsspec.asyn.sync_wrapper function in fsspec

To help you get started, we’ve selected a few fsspec examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github intake / filesystem_spec / fsspec / implementations / http.py View on Github external
head["Accept-Encoding"] = "identity"
    session = session or await get_client()
    if size_policy == "head":
        r = await session.head(url, allow_redirects=ar, **kwargs)
    elif size_policy == "get":
        r = await session.get(url, allow_redirects=ar, **kwargs)
    else:
        raise TypeError('size_policy must be "head" or "get", got %s' "" % size_policy)
    async with r:
        if "Content-Length" in r.headers:
            return int(r.headers["Content-Length"])
        elif "Content-Range" in r.headers:
            return int(r.headers["Content-Range"].split("/")[1])


file_size = sync_wrapper(_file_size)
github dask / gcsfs / gcsfs / core.py View on Github external
source = [{"name": self.split_path(p)[1]} for p in paths]
        await self._call(
            "POST",
            "b/{}/o/{}/compose",
            bucket,
            key,
            destinationPredefinedAcl=acl,
            headers={"Content-Type": "application/json"},
            json={
                "sourceObjects": source,
                "kind": "storage#composeRequest",
                "destination": {"name": key, "bucket": bucket},
            },
        )

    merge = sync_wrapper(_merge)

    async def _cp_file(self, path1, path2, acl=None, **kwargs):
        """Duplicate remote file
        """
        b1, k1 = self.split_path(path1)
        b2, k2 = self.split_path(path2)
        out = await self._call(
            "POST",
            "b/{}/o/{}/rewriteTo/b/{}/o/{}",
            b1,
            k1,
            b2,
            k2,
            headers={"Content-Type": "application/json"},
            destinationPredefinedAcl=acl,
            json_out=True,
github intake / filesystem_spec / fsspec / implementations / http.py View on Github external
self.session = session
        if mode != "rb":
            raise ValueError
        self.details = {"name": url, "size": None}
        super().__init__(fs=fs, path=url, mode=mode, cache_type="none", **kwargs)
        self.r = sync(self.loop, get, self.session, url, **kwargs)

    def seek(self, *args, **kwargs):
        raise ValueError("Cannot seek strteaming HTTP file")

    async def _read(self, num=-1):
        out = await self.r.content.read(num)
        self.loc += len(out)
        return out

    read = sync_wrapper(_read)

    async def _close(self):
        self.r.close()

    def close(self):
        asyncio.run_coroutine_threadsafe(self._close(), self.loop)


async def get_range(session, url, start, end, file=None, **kwargs):
    # explicit get a range when we know it must be safe
    kwargs = kwargs.copy()
    headers = kwargs.pop("headers", {}).copy()
    headers["Range"] = "bytes=%i-%i" % (start, end - 1)
    r = await session.get(url, headers=headers, **kwargs)
    r.raise_for_status()
    async with r:
github dask / gcsfs / gcsfs / core.py View on Github external
"""Delete an empty bucket

        Parameters
        ----------
        bucket: str
            bucket name. If contains '/' (i.e., looks like subdir), will
            have no effect because GCS doesn't have real directories.
        """
        bucket = bucket.rstrip("/")
        if "/" in bucket:
            return
        await self._call("DELETE", "b/" + bucket, json_out=True)
        self.invalidate_cache(bucket)
        self.invalidate_cache("")

    rmdir = sync_wrapper(_rmdir)

    async def _info(self, path, **kwargs):
        """File information about this path."""
        path = self._strip_protocol(path).rstrip("/")
        # Check directory cache for parent dir
        parent_path = self._parent(path)
        parent_cache = self._ls_from_cache(parent_path)
        bucket, key = self.split_path(path)
        if parent_cache:
            for o in parent_cache:
                if o["name"].rstrip("/") == path:
                    return o
        if self._ls_from_cache(path):
            # this is a directory
            return {
                "bucket": bucket,
github dask / gcsfs / gcsfs / core.py View on Github external
if bucket in ["", "/"]:
            raise ValueError("Cannot create root bucket")
        if "/" in bucket:
            return
        await self._call(
            method="POST",
            path="b/",
            predefinedAcl=acl,
            project=self.project,
            predefinedDefaultObjectAcl=default_acl,
            json={"name": bucket},
            json_out=True,
        )
        self.invalidate_cache(bucket)

    mkdir = sync_wrapper(_mkdir)

    async def _rmdir(self, bucket):
        """Delete an empty bucket

        Parameters
        ----------
        bucket: str
            bucket name. If contains '/' (i.e., looks like subdir), will
            have no effect because GCS doesn't have real directories.
        """
        bucket = bucket.rstrip("/")
        if "/" in bucket:
            return
        await self._call("DELETE", "b/" + bucket, json_out=True)
        self.invalidate_cache(bucket)
        self.invalidate_cache("")
github intake / filesystem_spec / fsspec / implementations / http.py View on Github external
chunk = await r.content.read(2 ** 20)
                    # data size unknown, let's see if it goes too big
                    if chunk:
                        out.append(chunk)
                        cl += len(chunk)
                        if cl > end - start:
                            raise ValueError(
                                "Got more bytes so far (>%i) than requested (%i)"
                                % (cl, end - start)
                            )
                    else:
                        break
                out = b"".join(out)
            return out

    _fetch_range = sync_wrapper(async_fetch_range)

    def close(self):
        pass


async def get(session, url, **kwargs):
    return await session.get(url, **kwargs)


class HTTPStreamFile(AbstractBufferedFile):
    def __init__(self, fs, url, mode="rb", loop=None, session=None, **kwargs):
        self.asynchronous = kwargs.pop("asynchronous", False)
        self.url = url
        self.loop = loop
        self.session = session
        if mode != "rb":
github intake / filesystem_spec / fsspec / implementations / http.py View on Github external
async def async_fetch_all(self):
        """Read whole file in one shot, without caching

        This is only called when position is still at zero,
        and read() is called without a byte-count.
        """
        if not isinstance(self.cache, AllBytes):
            r = await self.session.get(self.url, **self.kwargs)
            async with r:
                r.raise_for_status()
                out = await r.read()
                self.cache = AllBytes(out)
                self.size = len(out)

    _fetch_all = sync_wrapper(async_fetch_all)

    async def async_fetch_range(self, start, end):
        """Download a block of data

        The expectation is that the server returns only the requested bytes,
        with HTTP code 206. If this is not the case, we first check the headers,
        and then stream the output - if the data size is bigger than we
        requested, an exception is raised.
        """
        kwargs = self.kwargs.copy()
        headers = kwargs.pop("headers", {}).copy()
        headers["Range"] = "bytes=%i-%i" % (start, end - 1)
        r = await self.session.get(self.url, headers=headers, **kwargs)
        async with r:
            if r.status == 416:
                # range request outside file
github dask / gcsfs / gcsfs / core.py View on Github external
i_json["contentEncoding"] = content_encoding

        bucket, key = self.split_path(path)
        o_json = await self._call(
            "PATCH",
            "b/{}/o/{}",
            bucket,
            key,
            fields="metadata",
            json=i_json,
            json_out=True,
        )
        (await self._info(path))["metadata"] = o_json.get("metadata", {})
        return o_json.get("metadata", {})

    setxattrs = sync_wrapper(_setxattrs)

    async def _merge(self, path, paths, acl=None):
        """Concatenate objects within a single bucket"""
        bucket, key = self.split_path(path)
        source = [{"name": self.split_path(p)[1]} for p in paths]
        await self._call(
            "POST",
            "b/{}/o/{}/compose",
            bucket,
            key,
            destinationPredefinedAcl=acl,
            headers={"Content-Type": "application/json"},
            json={
                "sourceObjects": source,
                "kind": "storage#composeRequest",
                "destination": {"name": key, "bucket": bucket},
github dask / gcsfs / gcsfs / core.py View on Github external
if retry == self.retries - 1:
                    logger.exception("_call out of retries on exception: %s", e)
                    raise e
                if is_retriable(e):
                    logger.debug("_call retrying after exception: %s", e)
                    continue
                logger.exception("_call non-retriable exception: %s", e)
                raise e
        if json_out:
            return json
        elif info_out:
            return info
        else:
            return headers, contents

    call = sync_wrapper(_call)

    @property
    def buckets(self):
        """Return list of available project buckets."""
        return [b["name"] for b in sync(self.loop, self._list_buckets())]

    @staticmethod
    def _process_object(bucket, object_metadata):
        """Process object resource into gcsfs object information format.

        Process GCS object resource via type casting and attribute updates to
        the cache-able gcsfs object information format. Returns an updated copy
        of the object resource.

        (See https://cloud.google.com/storage/docs/json_api/v1/objects#resource)
        """