How to use the s3fs.core.S3FileSystem function in s3fs

To help you get started, we’ve selected a few s3fs examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github quantumblacklabs / kedro / tests / io / test_csv_dataset.py View on Github external
            ("s3://bucket/file.csv", S3FileSystem),
            ("file:///tmp/test.csv", LocalFileSystem),
            ("/tmp/test.csv", LocalFileSystem),
            ("gcs://bucket/file.csv", GCSFileSystem),
            ("https://example.com/file.csv", HTTPFileSystem),
        ],
    )
    def test_protocol_usage(self, filepath, instance_type):
        data_set = CSVDataSet(filepath=filepath)
        assert isinstance(data_set._fs, instance_type)

        # _strip_protocol() doesn't strip http(s) protocol
        if data_set._protocol == "https":
            path = filepath.split("://")[-1]
        else:
            path = data_set._fs._strip_protocol(filepath)
github quantumblacklabs / kedro / tests / io / test_json_dataset.py View on Github external
            ("s3://bucket/file.json", S3FileSystem),
            ("file:///tmp/test.json", LocalFileSystem),
            ("/tmp/test.json", LocalFileSystem),
            ("gcs://bucket/file.json", GCSFileSystem),
        ],
    )
    def test_protocol_usage(self, filepath, instance_type):
        data_set = JSONDataSet(filepath=filepath)
        assert isinstance(data_set._fs, instance_type)
        assert str(data_set._filepath) == data_set._fs._strip_protocol(filepath)
        assert isinstance(data_set._filepath, PurePosixPath)
github quantumblacklabs / kedro / kedro / io / csv_s3.py View on Github external
load_args: Pandas options for loading csv files.
                Here you can find all available arguments:
                https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html
                All defaults are preserved.
            save_args: Pandas options for saving csv files.
                Here you can find all available arguments:
                https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html
                All defaults are preserved, but "index", which is set to False.
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.

        """
        _credentials = copy.deepcopy(credentials) or {}
        _s3 = S3FileSystem(client_kwargs=_credentials)
        path = _s3._strip_protocol(filepath)  # pylint: disable=protected-access
        path = PurePosixPath("{}/{}".format(bucket_name, path) if bucket_name else path)
        super().__init__(
            path, version, exists_function=_s3.exists, glob_function=_s3.glob,
        )
        self._credentials = _credentials

        # Handle default load and save arguments
        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
        if load_args is not None:
            self._load_args.update(load_args)
        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
        if save_args is not None:
            self._save_args.update(save_args)

        self._s3 = _s3
github quantumblacklabs / kedro / kedro / io / pickle_s3.py View on Github external
including bucket and protocol, e.g. `s3://bucket-name/path/to/file.pkl`.
            bucket_name: S3 bucket name. Must be specified **only** if not
                present in ``filepath``.
            credentials: Credentials to access the S3 bucket, such as
                ``aws_access_key_id``, ``aws_secret_access_key``.
            load_args: Options for loading pickle files. Refer to the help
                file of ``pickle.loads`` for options.
            save_args: Options for saving pickle files. Refer to the help
                file of ``pickle.dumps`` for options.
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.
        """
        _credentials = deepcopy(credentials) or {}
        _s3 = S3FileSystem(client_kwargs=_credentials)
        path = _s3._strip_protocol(filepath)  # pylint: disable=protected-access
        path = PurePosixPath("{}/{}".format(bucket_name, path) if bucket_name else path)
        super().__init__(
            path, version, exists_function=_s3.exists, glob_function=_s3.glob,
        )
        self._credentials = _credentials

        # Handle default load and save arguments
        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
        if load_args is not None:
            self._load_args.update(load_args)
        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
        if save_args is not None:
            self._save_args.update(save_args)

        self._s3 = _s3
github quantumblacklabs / kedro / kedro / contrib / io / parquet / parquet_s3.py View on Github external
load_args: Additional loading options `pyarrow`:
                https://arrow.apache.org/docs/python/generated/pyarrow.parquet.read_table.html
                or `fastparquet`:
                https://fastparquet.readthedocs.io/en/latest/api.html#fastparquet.ParquetFile.to_pandas
            save_args: Additional saving options for `pyarrow`:
                https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.from_pandas
                or `fastparquet`:
                https://fastparquet.readthedocs.io/en/latest/api.html#fastparquet.write
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.
        """

        _credentials = deepcopy(credentials) or {}
        _s3 = S3FileSystem(client_kwargs=_credentials)
        path = _s3._strip_protocol(filepath)  # pylint: disable=protected-access
        path = PurePosixPath("{}/{}".format(bucket_name, path) if bucket_name else path)
        super().__init__(
            path, version, exists_function=_s3.exists, glob_function=_s3.glob,
        )

        default_load_args = {}  # type: Dict[str, Any]
        default_save_args = {}  # type: Dict[str, Any]

        self._load_args = (
            {**default_load_args, **load_args}
            if load_args is not None
            else default_load_args
        )
        self._save_args = (
            {**default_save_args, **save_args}
github augerai / a2ml / a2ml / api / utils / s3_fsclient.py View on Github external
def s3fs_open(self, path, mode):
        from s3fs.core import S3FileSystem

        endpoint_url = os.environ.get('S3_ENDPOINT_URL')
        client_kwargs = {}
        if endpoint_url:
            client_kwargs = {'endpoint_url': endpoint_url}

        if 'r' in mode:
            self.wait_for_path(path)

        s3 = S3FileSystem(anon=False, default_fill_cache=False,
                          client_kwargs=client_kwargs)
        return s3.open(path, mode=mode)