Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
y_pred = y_pred * corr_max + corr_min
else:
logging.error("calculate_scores: no scaling found for target fold group: %s"%options['fold_group'])
all_scores = {}
for scoring in options.get('scoreNames', []):
try:
if options.get('task_type') == "timeseries":
from auger_ml.preprocessors.space import ppspace_is_timeseries_model
if ppspace_is_timeseries_model(options.get('algorithm_name')) and \
scoring != options.get('scoring'):
continue
scorer = ModelHelper._get_score_byname(scoring)
if options.get('minority_target_class_pos') is not None:
argSpec = inspect.getfullargspec(scorer._score_func)
if 'pos_label' in argSpec.args:
scorer._kwargs['pos_label'] = options.get('minority_target_class_pos')
#logging.info("Use minority class to calculate score: %s"%scorer._kwargs)
if y_pred is not None:
all_scores[scoring] = scorer._sign * scorer._score_func(y_test, y_pred, **scorer._kwargs)
else:
all_scores[scoring] = _score(estimator, X_test, y_test, scorer)
#all_scores['scoring'] = scorer(estimator, X_test, y_test)
if np.isnan(all_scores[scoring]):
all_scores[scoring] = 0
try:
predicted = \
self._docker_run_predict(filename, threshold, model_path)
finally:
# clean up unzipped model
# if it wasn't unzipped before
if not model_existed:
shutil.rmtree(model_path, ignore_errors=True)
if not filename_arg:
ds_result = DataFrame.create_dataframe(predicted)
ds_result.options['data_path'] = None
ds_result.loaded_columns = columns
return ModelHelper.save_prediction_result(ds_result,
prediction_id = None, support_review_model = model_options.get("support_review_model"),
json_result=False, count_in_result=False, prediction_date=None,
model_path=None, model_id=model_id, output=output)
elif output:
fsclient.move_file(predicted, output)
predicted = output
return predicted
def __init__(self, params):
self.model_path = params.get('model_path')
if not self.model_path:
self.model_path = ModelHelper.get_model_path(params['augerInfo']['pipeline_id'],
params['augerInfo'].get('projectPath'))
self.options = fsclient.read_json_file(os.path.join(self.model_path, "options.json"))
if params.get('augerInfo'):
self.options['augerInfo'] = params['augerInfo']
self.target_feature = self.options.get('targetFeature')
def score_actuals_by_model_task(params):
return ModelReview(params).score_actuals(
actuals_path = params.get('actuals_path'),
actual_records=params.get('actual_records'),
prediction_group_id=params.get('prediction_group_id', None),
primary_prediction_group_id=params.get('primary_prediction_group_id', None),
primary_model_path=ModelHelper.get_model_path(params.get('primary_pipeline_id', None),
params.get('hub_info', {}).get('projectPath')),
actual_date=params.get('actual_date'),
actuals_id=params.get('actuals_id')
)
# Fix: store original datypes and force them after merging
for col in origin_columns:
if col != 'prediction_id':
ds_actuals.df[col] = ds_actuals.df[col].astype(origin_dtypes[col], copy=False)
ds_actuals.df['a2ml_actual'] = ds_actuals.df['a2ml_actual'].astype(
origin_dtypes[self.target_feature], copy=False
)
ds_true = DataFrame({})
ds_true.df = ds_actuals.df[['a2ml_actual']].rename(columns={'a2ml_actual':self.target_feature})
y_pred, _ = ModelHelper.preprocess_target_ds(self.model_path, ds_actuals)
y_true, _ = ModelHelper.preprocess_target_ds(self.model_path, ds_true)
score = ModelHelper.calculate_scores(self.options, y_test=y_true, y_pred=y_pred)
if not actuals_ds:
ds_actuals.drop(self.target_feature)
ds_actuals.df = ds_actuals.df.rename(columns={'a2ml_actual':self.target_feature})
if not actuals_id:
actuals_id = get_uid()
file_name = str(actual_date or datetime.date.today()) + '_' + actuals_id + "_actuals.feather.zstd"
ds_actuals.saveToFeatherFile(os.path.join(self.model_path, "predictions", file_name))
return score
def save_prediction(ds, prediction_id, support_review_model,
json_result, count_in_result, prediction_date, model_path, model_id, output=None):
# Ids for each row of prediction (predcition row's ids)
prediction_ids = []
for i in range(0, ds.count()):
prediction_ids.append(get_uid4())
ds.df.insert(loc=0, column='prediction_id', value=prediction_ids)
return ModelHelper.save_prediction_result(ds, prediction_id, support_review_model,
json_result, count_in_result, prediction_date, model_path, model_id, output)
def save_metric(metric_id, project_path, metric_name, metric_data):
metric_path = ModelHelper.get_metric_path({'augerInfo':{'projectPath': projectPath}}, metric_id)
fsclient.write_json_file(os.path.join(metric_path,
"metric_names_feature_importance.json"))
def preprocess_target(model_path, data_path=None, records=None, features=None):
ds = DataFrame.create_dataframe(data_path, records, features)
return ModelHelper.preprocess_target_ds(model_path, ds)
def get_metric_path(params, metric_id=None):
if not metric_id:
metric_id = params.get('augerInfo', {}).get('pipeline_id')
if not metric_id:
metric_id = params.get('uid')
metrics_path = ModelHelper.get_metrics_path(params)
if metrics_path:
return os.path.join(metrics_path, metric_id)
return None
def process_prediction(ds, results, results_proba, proba_classes,
threshold, minority_target_class, targetFeature, target_categories):
if results_proba is not None:
proba_classes_orig = None
if target_categories:
proba_classes_orig = ModelHelper.revertCategories(proba_classes, target_categories)
results = ModelHelper.calculate_proba_target(
results_proba, proba_classes, proba_classes_orig,
threshold, minority_target_class)
if proba_classes_orig is not None:
proba_classes = proba_classes_orig
try:
results = list(results)
except Exception as e:
results = [results]
if target_categories and results_proba is not None:
results = ModelHelper.revertCategories(results, target_categories)
# drop target
if targetFeature in ds.columns: