How to use the s3fs.S3FileSystem function in s3fs

To help you get started, we’ve selected a few s3fs examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github quantumblacklabs / kedro / tests / io / test_partitioned_dataset.py View on Github external
def test_exists(self, dataset, mocked_csvs_in_s3):
        assert PartitionedDataSet(mocked_csvs_in_s3, dataset).exists()

        empty_folder = "/".join([mocked_csvs_in_s3, "empty", "folder"])
        assert not PartitionedDataSet(empty_folder, dataset).exists()

        s3fs.S3FileSystem().mkdir(empty_folder)
        assert not PartitionedDataSet(empty_folder, dataset).exists()
github mozilla / OpenWPM / test / utilities.py View on Github external
def __init__(self, bucket, directory):
        self.bucket = bucket
        self.root_directory = directory
        self.visits_uri = '%s/%s/visits/%%s' % (
            self.bucket, self.root_directory)
        self.s3_fs = s3fs.S3FileSystem(session=LocalS3Session())
        boto3.DEFAULT_SESSION = LocalS3Session()
        self.s3_client = boto3.client('s3')
        self.s3_resource = boto3.resource('s3')
github dcs4cop / xcube / xcube / core / store / dataaccess.py View on Github external
def _get_s3_and_consume_params(params: Dict[str, Any]):
    import s3fs
    key = params.pop('key', params.pop('aws_access_key_id', None))
    secret = params.pop('secret', params.pop('aws_secret_access_key', None))
    token = params.pop('token', params.pop('aws_access_key_token', None))
    anon = params.pop('anon', key is None and secret is None and token is None)
    client_kwargs = dict(region_name=params.pop('region_name', None))
    return s3fs.S3FileSystem(anon=anon, key=key, secret=secret, token=token, client_kwargs=client_kwargs), params
github JinYang88 / atec_nlp_sim / pure / lib / python2.7 / site-packages / pandas / io / s3.py View on Github external
def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
                           compression=None):
    fs = s3fs.S3FileSystem(anon=False)
    try:
        filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer))
    except (OSError, NoCredentialsError):
        # boto3 has troubles when trying to access a public file
        # when credentialed...
        # An OSError is raised if you have credentials, but they
        # aren't valid for that bucket.
        # A NoCredentialsError is raised if you don't have creds
        # for that bucket.
        fs = s3fs.S3FileSystem(anon=True)
        filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer))
    return filepath_or_buffer, None, compression
github aws / lumberyard / dev / Gems / CloudGemMetric / v1 / AWS / common-code / ParquetUtils / parquet_reader.py View on Github external
def debug_file(context, args):
    if args.file_path:
        debug_local_file(context,args)
    else:
        s3 = s3fs.S3FileSystem()        
        resources = util.get_resources(context)
        bucket = resources[c.RES_S3_STORAGE] 
        key = args.s3_key
        if not key.startswith('/'):
            key = "/{}".format(key)
        print read(s3,bucket,key)
github uber / petastorm / petastorm / fs_utils.py View on Github external
            self._filesystem_factory = lambda: pyarrow.filesystem.S3FSWrapper(s3fs.S3FileSystem())
github FeatureLabs / featuretools / featuretools / primitives / install.py View on Github external
def download_archive(uri):
    # determine where to save locally
    filename = os.path.basename(urlparse(uri).path)
    local_archive = os.path.join(get_installation_temp_dir(), filename)

    with open(local_archive, 'wb') as f:
        try:
            remote_archive = smart_open(uri, 'rb', ignore_extension=True)
        except NoCredentialsError:
            # fallback to anonymous using s3fs
            try:
                import s3fs
            except ImportError:
                raise ImportError("The s3fs library is required to handle s3 files")

            s3 = s3fs.S3FileSystem(anon=True)
            remote_archive = s3.open(uri, 'rb')

        for line in remote_archive:
            f.write(line)

        remote_archive.close()

    return local_archive
github RCOSDP / weko / modules / invenio-s3 / invenio_s3 / storage.py View on Github external
def _get_fs(self, *args, **kwargs):
        """Ge PyFilesystem instance and S3 real path."""
        if not self.fileurl.startswith('s3://'):
            return super(S3FSFileStorage, self)._get_fs(*args, **kwargs)

        info = current_app.extensions['invenio-s3'].init_s3f3_info
        fs = s3fs.S3FileSystem(**info)

        return (fs, self.fileurl)
github yeatmanlab / pyAFQ / AFQ / data.py View on Github external
def s3fs_json_read(fname, fs=None):
    """
    Reads json directly from S3

    Paramters
    ---------
    fname : str
        Full path (including bucket name and extension) to the file on S3.
    fs : an s3fs.S3FileSystem class instance, optional
        A file-system to refer to. Default to create a new file-system.

    """
    if fs is None:
        fs = s3fs.S3FileSystem()
    with fs.open(fname) as ff:
        data = json.load(ff)
    return data
github ThreatResponse / margaritashotgun / margaritashotgun / memory.py View on Github external
:type bucket: str
        :param bucket: memory dump output s3 bucket
        :type filename: str
        :param filename: memory dump output filename
        :type tunnel_addr: str
        :param tunnel_port: ssh tunnel hostname or ip
        :type tunnel_port: int
        :param tunnel_port: ssh tunnel port
        """
        if self.progressbar:
            self.bar = ProgressBar(widgets=self.widgets,
                                   maxval=self.max_size).start()
            self.bar.start()

        s3 = s3fs.S3FileSystem(anon=False)
        with s3.open('{0}/{1}'.format(bucket, filename), 'wb') as self.outfile:
            self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            self.sock.connect((tunnel_addr, tunnel_port))
            self.sock.settimeout(self.sock_timeout)
            bytes_since_update = 0

            while True:
                try:
                    data = self.sock.recv(self.recv_size)
                    data_length = len(data)
                    if not data:
                        break
                    self.outfile.write(data)
                    self.transfered = self.transfered + data_length
                    bytes_since_update = bytes_since_update + data_length
                    data = None

s3fs

Convenient Filesystem interface over S3

BSD-3-Clause
Latest version published 2 months ago

Package Health Score

97 / 100
Full package analysis