Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _get_feature_importances(self):
cache_path = ModelHelper.get_metric_path(self.options)
importance_data = None
if cache_path:
importance_data = fsclient.read_json_file(os.path.join(cache_path, "metrics.json")).get('feature_importance_data')
if not importance_data:
importance_data = fsclient.read_json_file(os.path.join(cache_path, "metric_names_feature_importance.json")).get('feature_importance_data')
if importance_data:
return dict(zip(importance_data['features'], importance_data['scores']))
else:
logging.warn("No feature importance in cache: for model %s" % (cache_path))
return {}
def _get_feature_importances(self):
cache_path = ModelHelper.get_metric_path(self.options)
importance_data = None
if cache_path:
importance_data = fsclient.read_json_file(os.path.join(cache_path, "metrics.json")).get('feature_importance_data')
if not importance_data:
importance_data = fsclient.read_json_file(os.path.join(cache_path, "metric_names_feature_importance.json")).get('feature_importance_data')
if importance_data:
return dict(zip(importance_data['features'], importance_data['scores']))
else:
logging.warn("No feature importance in cache: for model %s" % (cache_path))
return {}
def _predict_locally(self, predict_data, model_id, threshold):
is_loaded, model_path = self.verify_local_model(model_id)
if not is_loaded:
raise Exception("Model should be deployed before predict.")
fitted_model = fsclient.load_object_from_file(model_path)
try:
options = fsclient.read_json_file(os.path.join(self.ctx.config.get_model_path(model_id), "options.json"))
model_features = options.get("originalFeatureColumns")
predict_data = predict_data[model_features]
predict_data.to_csv("test_options.csv", index=False, compression=None, encoding='utf-8')
except Exception as e:
self.ctx.log('Cannot get columns from model.Use original columns from predicted data: %s'%e)
results_proba = None
proba_classes = None
results = None
if threshold is not None:
results_proba = fitted_model.predict_proba(predict_data)
proba_classes = list(fitted_model.classes_)
else:
results = fitted_model.predict(predict_data)
def preprocess_target_ds(model_path, ds):
options = fsclient.read_json_file(os.path.join(model_path, "options.json"))
target_categoricals = fsclient.read_json_file(os.path.join(model_path, "target_categoricals.json"))
y_true = None
if not options.get('targetFeature') or not options.get('targetFeature') in ds.columns:
return y_true, target_categoricals
if options.get('timeSeriesFeatures'):
y_true = np.ravel(ds.df[options.get('targetFeature')].astype(np.float64, copy=False), order='C')
else:
if target_categoricals and options.get('targetFeature') in target_categoricals:
ds.convertToCategorical(options.get('targetFeature'), is_target=True,
categories=target_categoricals.get(options.get('targetFeature')).get('categories'))
y_true = np.ravel(ds.df[options.get('targetFeature')], order='C')
return y_true, target_categoricals
model_features = options.get("originalFeatureColumns")
predict_data = predict_data[model_features]
predict_data.to_csv("test_options.csv", index=False, compression=None, encoding='utf-8')
except Exception as e:
self.ctx.log('Cannot get columns from model.Use original columns from predicted data: %s'%e)
results_proba = None
proba_classes = None
results = None
if threshold is not None:
results_proba = fitted_model.predict_proba(predict_data)
proba_classes = list(fitted_model.classes_)
else:
results = fitted_model.predict(predict_data)
target_categoricals = fsclient.read_json_file(os.path.join(
self.ctx.config.get_model_path(model_id), "target_categoricals.json"))
target_categories = target_categoricals.get(self.ctx.config.get('target'), {}).get("categories")
return results, results_proba, proba_classes, target_categories
start = text.find(to_find)
if start > 0:
end = text.find("\n", start)
if end > start:
code_to_run = text[start+len(to_find):end]
input_sample = eval(code_to_run)
model_features = input_sample.columns.tolist()
except Exception as e:
self.ctx.log('Cannot get columns from remote model.Use original columns from predicted data: %s'%e)
if self.ctx.config.get("model_type") == "classification":
try:
file_name = 'confusion_matrix'
remote_run.download_file('%s'%file_name, os.path.join(temp_dir, file_name))
cm_data = fsclient.read_json_file(os.path.join(temp_dir, file_name))
target_categories = cm_data.get('data', {}).get('class_labels')
except Exception as e:
self.ctx.log('Cannot get categorical target class labels from remote model.Use class codes: %s'%e)
fsclient.remove_folder(temp_dir)
return model_features, target_categories
def _predict_locally(self, filename_arg, model_id, threshold, data, columns, output):
model_deploy = ModelDeploy(self.ctx, None)
is_model_loaded, model_path, model_name = \
model_deploy.verify_local_model(model_id)
if not is_model_loaded:
raise AugerException('Model isn\'t loaded locally. '
'Please use a2ml deploy command to download model.')
model_path, model_existed = self._extract_model(model_name)
model_options = fsclient.read_json_file(os.path.join(model_path, "model", "options.json"))
filename = filename_arg
if not filename:
ds = DataFrame.create_dataframe(filename, data, columns)
filename = os.path.join(self.ctx.config.get_path(), '.augerml', 'predict_data.csv')
ds.saveToCsvFile(filename, compression=None)
try:
predicted = \
self._docker_run_predict(filename, threshold, model_path)
finally:
# clean up unzipped model
# if it wasn't unzipped before
if not model_existed:
shutil.rmtree(model_path, ignore_errors=True)
def __init__(self, params):
self.model_path = params.get('model_path')
if not self.model_path:
self.model_path = ModelHelper.get_model_path(params['augerInfo']['pipeline_id'],
params['augerInfo'].get('projectPath'))
self.options = fsclient.read_json_file(os.path.join(self.model_path, "options.json"))
if params.get('augerInfo'):
self.options['augerInfo'] = params['augerInfo']
self.target_feature = self.options.get('targetFeature')