Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
config['endpoint'],
config['access_key'],
config['secret_key'],
config['security_token']
)
with fsclient.open_file(file_to_upload, 'rb', encoding=None, auto_decompression=False) as f:
return uploader.multipart_upload_obj(
f,
upload_details['key'],
callback=NewlineProgressPercentage(file_to_upload)
)
else:
url = res['url']
file_path = res['fields']['key']
with fsclient.open_file(file_to_upload, 'rb', encoding=None, auto_decompression=False) as f:
files = {'file': (file_path, f)}
res = requests.post(url, data=res['fields'], files=files)
if res.status_code == 201 or res.status_code == 200:
bucket = urllib.parse.urlparse(url).netloc.split('.')[0]
return 's3://%s/%s' % (bucket, file_path)
else:
if res.status_code == 400 and b'EntityTooLarge' in res.content:
max_size = ElementTree.fromstring(res.content).find('MaxSizeAllowed').text
max_size_mb = int(max_size) / 1024 / 1024
raise AugerException('Data set size is limited to %.1f MB' % max_size_mb)
else:
raise AugerException(
'HTTP error [%s] "%s" while uploading file'
' to Auger Cloud...' % (res.status_code, res.content))
raise AugerException(
'Error while uploading file to Auger Cloud...')
if 'multipart' in res:
upload_details = res['multipart']
config = upload_details['config']
uploader = FileUploader(
upload_details['bucket'],
config['endpoint'],
config['access_key'],
config['secret_key'],
config['security_token']
)
with fsclient.open_file(file_to_upload, 'rb', encoding=None, auto_decompression=False) as f:
return uploader.multipart_upload_obj(
f,
upload_details['key'],
callback=NewlineProgressPercentage(file_to_upload)
)
else:
url = res['url']
file_path = res['fields']['key']
with fsclient.open_file(file_to_upload, 'rb', encoding=None, auto_decompression=False) as f:
files = {'file': (file_path, f)}
res = requests.post(url, data=res['fields'], files=files)
if res.status_code == 201 or res.status_code == 200:
bucket = urllib.parse.urlparse(url).netloc.split('.')[0]
return 's3://%s/%s' % (bucket, file_path)
else:
def _load_azure_cred_file(self):
content = {}
azure_creds_file = os.path.abspath('%s/.azureml/auth/azureProfile.json' % os.environ.get('HOME', ''))
if os.path.exists(azure_creds_file):
from a2ml.api.utils import fsclient
try:
with fsclient.open_file(azure_creds_file, "r", encoding='utf-8-sig', num_tries=0) as file:
res = json.load(file)
content = {
'subscription_id': res['subscriptions'][0]['id']
}
except Exception as e:
if self.ctx.debug:
import traceback
traceback.print_exc()
return content
def loadFromFeatherFile(self, path, features=None):
from pyarrow import feather
with fsclient.open_file(path, 'rb', encoding=None) as local_file:
self.df = feather.read_feather(local_file, columns=features, use_threads=bool(True))
return self.df
def load_from_file(self, filename):
if not isinstance(filename, str) or len(filename) == 0:
raise ValueError("please provide yaml file name")
self.filename = filename
with fsclient.open_file(filename, 'r') as f:
self.yaml = ruamel.yaml.load(f,
Loader=ruamel.yaml.RoundTripLoader)
return self
def load(filename, target, features=None, nrows=None, data=None):
df = None
if filename:
if filename.endswith('.json') or filename.endswith('.json.gz'):
df = pandas.read_json(filename)
elif filename.endswith('.xlsx') or filename.endswith('.xls'):
df = pandas.read_excel(filename)
elif filename.endswith('.feather') or filename.endswith('.feather.gz'):
import feather
with fsclient.open_file(filename, 'rb', encoding=None) as local_file:
df = feather.read_dataframe(local_file, columns=features, use_threads=bool(True))
if df is None:
try:
df = DataFrame._read_csv(filename, ',', features, nrows)
except Exception as e:
df = DataFrame._read_csv(filename, '|', features, nrows)
else:
df = DataFrame.load_data(data, features)
features = df.columns.tolist()
if target in features:
df.drop(columns=[target], inplace=True)
return df