How to use the a2ml.api.utils.fsclient.open_file function in a2ml

To help you get started, we’ve selected a few a2ml examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github augerai / a2ml / a2ml / api / auger / impl / cloud / dataset.py View on Github external
config['endpoint'],
                config['access_key'],
                config['secret_key'],
                config['security_token']
            )

            with fsclient.open_file(file_to_upload, 'rb', encoding=None, auto_decompression=False) as f:
                return uploader.multipart_upload_obj(
                    f,
                    upload_details['key'],
                    callback=NewlineProgressPercentage(file_to_upload)
                )
        else:
            url = res['url']
            file_path = res['fields']['key']
            with fsclient.open_file(file_to_upload, 'rb', encoding=None, auto_decompression=False) as f:
                files = {'file': (file_path, f)}
                res = requests.post(url, data=res['fields'], files=files)

            if res.status_code == 201 or res.status_code == 200:
                bucket = urllib.parse.urlparse(url).netloc.split('.')[0]
                return 's3://%s/%s' % (bucket, file_path)
            else:
                if res.status_code == 400 and b'EntityTooLarge' in res.content:
                    max_size = ElementTree.fromstring(res.content).find('MaxSizeAllowed').text
                    max_size_mb = int(max_size) / 1024 / 1024
                    raise AugerException('Data set size is limited to %.1f MB' % max_size_mb)
                else:
                    raise AugerException(
                        'HTTP error [%s] "%s" while uploading file'
                            ' to Auger Cloud...' % (res.status_code, res.content))
github augerai / a2ml / a2ml / api / auger / impl / cloud / dataset.py View on Github external
raise AugerException(
                'Error while uploading file to Auger Cloud...')

        if 'multipart' in res:
            upload_details = res['multipart']
            config = upload_details['config']

            uploader = FileUploader(
                upload_details['bucket'],
                config['endpoint'],
                config['access_key'],
                config['secret_key'],
                config['security_token']
            )

            with fsclient.open_file(file_to_upload, 'rb', encoding=None, auto_decompression=False) as f:
                return uploader.multipart_upload_obj(
                    f,
                    upload_details['key'],
                    callback=NewlineProgressPercentage(file_to_upload)
                )
        else:
            url = res['url']
            file_path = res['fields']['key']
            with fsclient.open_file(file_to_upload, 'rb', encoding=None, auto_decompression=False) as f:
                files = {'file': (file_path, f)}
                res = requests.post(url, data=res['fields'], files=files)

            if res.status_code == 201 or res.status_code == 200:
                bucket = urllib.parse.urlparse(url).netloc.split('.')[0]
                return 's3://%s/%s' % (bucket, file_path)
            else:
github augerai / a2ml / a2ml / api / azure / credentials.py View on Github external
def _load_azure_cred_file(self):
        content = {}

        azure_creds_file = os.path.abspath('%s/.azureml/auth/azureProfile.json' % os.environ.get('HOME', ''))
        if os.path.exists(azure_creds_file):
            from a2ml.api.utils import fsclient
            try:
                with fsclient.open_file(azure_creds_file, "r", encoding='utf-8-sig', num_tries=0) as file:
                    res = json.load(file)
                    content = {
                        'subscription_id': res['subscriptions'][0]['id']
                    }
            except Exception as e:
                if self.ctx.debug:
                    import traceback
                    traceback.print_exc()
        
        return content
github augerai / a2ml / a2ml / api / utils / data_source_api_pandas.py View on Github external
def loadFromFeatherFile(self, path, features=None):
        from pyarrow import feather

        with fsclient.open_file(path, 'rb', encoding=None) as local_file:
            self.df = feather.read_feather(local_file, columns=features, use_threads=bool(True))

        return self.df
github augerai / a2ml / a2ml / api / utils / config_yaml.py View on Github external
def load_from_file(self, filename):
        if not isinstance(filename, str) or len(filename) == 0:
            raise ValueError("please provide yaml file name")
        self.filename = filename
        with fsclient.open_file(filename, 'r') as f:
            self.yaml = ruamel.yaml.load(f,
                Loader=ruamel.yaml.RoundTripLoader)
        return self
github augerai / a2ml / a2ml / api / utils / dataframe.py View on Github external
def load(filename, target, features=None, nrows=None, data=None):
        df = None

        if filename:
            if filename.endswith('.json') or filename.endswith('.json.gz'):
                df = pandas.read_json(filename)
            elif filename.endswith('.xlsx') or filename.endswith('.xls'):
                df = pandas.read_excel(filename)
            elif filename.endswith('.feather') or filename.endswith('.feather.gz'):
                import feather
                with fsclient.open_file(filename, 'rb', encoding=None) as local_file:
                    df = feather.read_dataframe(local_file, columns=features, use_threads=bool(True))

            if df is None:        
                try:
                    df = DataFrame._read_csv(filename, ',', features, nrows)
                except Exception as e:
                    df = DataFrame._read_csv(filename, '|', features, nrows)

        else:
            df = DataFrame.load_data(data, features)

        features = df.columns.tolist()
        if target in features:
            df.drop(columns=[target], inplace=True)

        return df