Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_exists(self, dataset, mocked_csvs_in_s3):
assert PartitionedDataSet(mocked_csvs_in_s3, dataset).exists()
empty_folder = "/".join([mocked_csvs_in_s3, "empty", "folder"])
assert not PartitionedDataSet(empty_folder, dataset).exists()
s3fs.S3FileSystem().mkdir(empty_folder)
assert not PartitionedDataSet(empty_folder, dataset).exists()
def __init__(self, bucket, directory):
self.bucket = bucket
self.root_directory = directory
self.visits_uri = '%s/%s/visits/%%s' % (
self.bucket, self.root_directory)
self.s3_fs = s3fs.S3FileSystem(session=LocalS3Session())
boto3.DEFAULT_SESSION = LocalS3Session()
self.s3_client = boto3.client('s3')
self.s3_resource = boto3.resource('s3')
def _get_s3_and_consume_params(params: Dict[str, Any]):
import s3fs
key = params.pop('key', params.pop('aws_access_key_id', None))
secret = params.pop('secret', params.pop('aws_secret_access_key', None))
token = params.pop('token', params.pop('aws_access_key_token', None))
anon = params.pop('anon', key is None and secret is None and token is None)
client_kwargs = dict(region_name=params.pop('region_name', None))
return s3fs.S3FileSystem(anon=anon, key=key, secret=secret, token=token, client_kwargs=client_kwargs), params
def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
compression=None):
fs = s3fs.S3FileSystem(anon=False)
try:
filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer))
except (OSError, NoCredentialsError):
# boto3 has troubles when trying to access a public file
# when credentialed...
# An OSError is raised if you have credentials, but they
# aren't valid for that bucket.
# A NoCredentialsError is raised if you don't have creds
# for that bucket.
fs = s3fs.S3FileSystem(anon=True)
filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer))
return filepath_or_buffer, None, compression
def debug_file(context, args):
if args.file_path:
debug_local_file(context,args)
else:
s3 = s3fs.S3FileSystem()
resources = util.get_resources(context)
bucket = resources[c.RES_S3_STORAGE]
key = args.s3_key
if not key.startswith('/'):
key = "/{}".format(key)
print read(s3,bucket,key)
self._filesystem_factory = lambda: pyarrow.filesystem.S3FSWrapper(s3fs.S3FileSystem())
def download_archive(uri):
# determine where to save locally
filename = os.path.basename(urlparse(uri).path)
local_archive = os.path.join(get_installation_temp_dir(), filename)
with open(local_archive, 'wb') as f:
try:
remote_archive = smart_open(uri, 'rb', ignore_extension=True)
except NoCredentialsError:
# fallback to anonymous using s3fs
try:
import s3fs
except ImportError:
raise ImportError("The s3fs library is required to handle s3 files")
s3 = s3fs.S3FileSystem(anon=True)
remote_archive = s3.open(uri, 'rb')
for line in remote_archive:
f.write(line)
remote_archive.close()
return local_archive
def _get_fs(self, *args, **kwargs):
"""Ge PyFilesystem instance and S3 real path."""
if not self.fileurl.startswith('s3://'):
return super(S3FSFileStorage, self)._get_fs(*args, **kwargs)
info = current_app.extensions['invenio-s3'].init_s3f3_info
fs = s3fs.S3FileSystem(**info)
return (fs, self.fileurl)
def s3fs_json_read(fname, fs=None):
"""
Reads json directly from S3
Paramters
---------
fname : str
Full path (including bucket name and extension) to the file on S3.
fs : an s3fs.S3FileSystem class instance, optional
A file-system to refer to. Default to create a new file-system.
"""
if fs is None:
fs = s3fs.S3FileSystem()
with fs.open(fname) as ff:
data = json.load(ff)
return data
:type bucket: str
:param bucket: memory dump output s3 bucket
:type filename: str
:param filename: memory dump output filename
:type tunnel_addr: str
:param tunnel_port: ssh tunnel hostname or ip
:type tunnel_port: int
:param tunnel_port: ssh tunnel port
"""
if self.progressbar:
self.bar = ProgressBar(widgets=self.widgets,
maxval=self.max_size).start()
self.bar.start()
s3 = s3fs.S3FileSystem(anon=False)
with s3.open('{0}/{1}'.format(bucket, filename), 'wb') as self.outfile:
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.sock.connect((tunnel_addr, tunnel_port))
self.sock.settimeout(self.sock_timeout)
bytes_since_update = 0
while True:
try:
data = self.sock.recv(self.recv_size)
data_length = len(data)
if not data:
break
self.outfile.write(data)
self.transfered = self.transfered + data_length
bytes_since_update = bytes_since_update + data_length
data = None