Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
]
res = ModelReview({'model_path': model_path}).score_actuals(
actual_records=actuals, prediction_group_id=prediction_group_id,
primary_prediction_group_id=primary_prediction_group_id, primary_model_path=primary_model_path
)
assert type(res) == dict
assert res['accuracy'] == 1.0
actual_files = glob.glob(model_path + '/predictions/*_actuals.feather.zstd')
assert len(actual_files) == 1
actual_file = actual_files[0]
assert str(datetime.date.today()) in actual_file
stored_actuals = DataFrame({})
stored_actuals.loadFromFeatherFile(actual_file)
assert 'prediction_group_id' in stored_actuals.columns
stored_actuals = json.loads(
stored_actuals.df.sort_values(by=['prediction_id']).to_json(orient='records')
)
assert stored_actuals[0]['prediction_id'] == 'bef9be07-5534-434e-ab7c-c379d8fcfe77'
assert stored_actuals[0]['prediction_group_id'] == prediction_group_id
assert stored_actuals[0]['species'] == 'versicolor'
assert stored_actuals[1]['prediction_id'] == 'f61b1bbc-6f7b-4e7e-9a3b-6acb6e1462cd'
assert stored_actuals[1]['prediction_group_id'] == prediction_group_id
assert stored_actuals[1]['species'] == 'virginica'
self.assertTrue(fsclient.is_file_exists(results_file_path))
ds = DataFrame.create_dataframe(os.path.join(model_path, "iris_test.csv"))
fsclient.remove_file(results_file_path)
self.assertFalse(fsclient.is_file_exists(results_file_path))
fsclient.remove_file(predicted_file_path)
self.assertFalse(fsclient.is_file_exists(predicted_file_path))
res = ModelHelper.save_prediction(ds, prediction_id,
support_review_model=True, json_result=True, count_in_result=False, prediction_date=prediction_date,
model_path=model_path, model_id=options.get('uid'))
res = json.loads(res)
self.assertEqual( res['columns'], ds.columns)
self.assertEqual( len(res['data']), 6)
ds = DataFrame.create_dataframe(os.path.join(model_path, "iris_test.csv"))
fsclient.remove_file(results_file_path)
self.assertFalse(fsclient.is_file_exists(results_file_path))
fsclient.remove_file(predicted_file_path)
self.assertFalse(fsclient.is_file_exists(predicted_file_path))
ds.options['data_path'] = None
res = ModelHelper.save_prediction(ds, prediction_id,
support_review_model=False, json_result=False, count_in_result=False, prediction_date=prediction_date,
model_path=model_path, model_id=options.get('uid'))
self.assertEqual( type(res[0]), dict)
self.assertEqual( res[0][options['targetFeature']], 'setosa')
ds = DataFrame.create_dataframe(os.path.join(model_path, "iris_test.csv"))
fsclient.remove_file(results_file_path)
self.assertFalse(fsclient.is_file_exists(results_file_path))
fsclient.remove_file(predicted_file_path)
def predict(self, filename, model_id, threshold=None, locally=False, data=None, columns=None,
predicted_at=None, output=None, json_result=False, count_in_result=False, prediction_id=None
):
ds = DataFrame.create_dataframe(filename, data, columns)
model_path = self.ctx.config.get_model_path(model_id)
options = fsclient.read_json_file(os.path.join(model_path, "options.json"))
results, results_proba, proba_classes, target_categories = \
self._predict_locally(ds.df, model_id, threshold) if locally else self._predict_remotely(ds.df, model_id, threshold)
if target_categories and len(target_categories) == 2:
for idx, item in enumerate(target_categories):
if item == "False":
target_categories[idx] = False
if item == "True":
target_categories[idx] = True
ModelHelper.process_prediction(ds,
results, results_proba, proba_classes,
threshold,
def preprocess_target(model_path, data_path=None, records=None, features=None):
ds = DataFrame.create_dataframe(data_path, records, features)
return ModelHelper.preprocess_target_ds(model_path, ds)
def score_actuals(self, actuals_path = None, actual_records=None, actuals_ds=None,
prediction_group_id=None, primary_prediction_group_id=None, primary_model_path=None,
actual_date=None, actuals_id = None):
ds_actuals = actuals_ds or DataFrame.create_dataframe(actuals_path, actual_records,
features=['prediction_id', 'a2ml_actual'])
actuals_count = ds_actuals.count()
primary_ds = None
if primary_prediction_group_id:
files = ModelReview._get_prediction_files(primary_model_path, primary_prediction_group_id)
for (_, df) in DataFrame.load_from_files(files, features=['prediction_id']):
primary_ds = df
# should be only one file
break
origin_dtypes = []
origin_columns = []
prediction_files = ModelReview._get_prediction_files(self.model_path, prediction_group_id)
actual_index = False
def _predict_locally(self, filename_arg, model_id, threshold, data, columns, output):
model_deploy = ModelDeploy(self.ctx, None)
is_model_loaded, model_path, model_name = \
model_deploy.verify_local_model(model_id)
if not is_model_loaded:
raise AugerException('Model isn\'t loaded locally. '
'Please use a2ml deploy command to download model.')
model_path, model_existed = self._extract_model(model_name)
model_options = fsclient.read_json_file(os.path.join(model_path, "model", "options.json"))
filename = filename_arg
if not filename:
ds = DataFrame.create_dataframe(filename, data, columns)
filename = os.path.join(self.ctx.config.get_path(), '.augerml', 'predict_data.csv')
ds.saveToCsvFile(filename, compression=None)
try:
predicted = \
self._docker_run_predict(filename, threshold, model_path)
finally:
# clean up unzipped model
# if it wasn't unzipped before
if not model_existed:
shutil.rmtree(model_path, ignore_errors=True)
if not filename_arg:
ds_result = DataFrame.create_dataframe(predicted)
ds_result.options['data_path'] = None
if filename:
if filename.endswith('.json') or filename.endswith('.json.gz'):
df = pandas.read_json(filename)
elif filename.endswith('.xlsx') or filename.endswith('.xls'):
df = pandas.read_excel(filename)
elif filename.endswith('.feather') or filename.endswith('.feather.gz'):
import feather
with fsclient.open_file(filename, 'rb', encoding=None) as local_file:
df = feather.read_dataframe(local_file, columns=features, use_threads=bool(True))
if df is None:
try:
df = DataFrame._read_csv(filename, ',', features, nrows)
except Exception as e:
df = DataFrame._read_csv(filename, '|', features, nrows)
else:
df = DataFrame.load_data(data, features)
features = df.columns.tolist()
if target in features:
df.drop(columns=[target], inplace=True)
return df
ds_actuals.df.reset_index(inplace=True)
ds_actuals.dropna(columns=[self.target_feature, 'a2ml_actual'])
# combine_first changes orginal non float64 types to float64 when NaN values appear during merging tables
# Good explanations https://stackoverflow.com/a/15353297/898680
# Fix: store original datypes and force them after merging
for col in origin_columns:
if col != 'prediction_id':
ds_actuals.df[col] = ds_actuals.df[col].astype(origin_dtypes[col], copy=False)
ds_actuals.df['a2ml_actual'] = ds_actuals.df['a2ml_actual'].astype(
origin_dtypes[self.target_feature], copy=False
)
ds_true = DataFrame({})
ds_true.df = ds_actuals.df[['a2ml_actual']].rename(columns={'a2ml_actual':self.target_feature})
y_pred, _ = ModelHelper.preprocess_target_ds(self.model_path, ds_actuals)
y_true, _ = ModelHelper.preprocess_target_ds(self.model_path, ds_true)
score = ModelHelper.calculate_scores(self.options, y_test=y_true, y_pred=y_pred)
if not actuals_ds:
ds_actuals.drop(self.target_feature)
ds_actuals.df = ds_actuals.df.rename(columns={'a2ml_actual':self.target_feature})
if not actuals_id:
actuals_id = get_uid()
file_name = str(actual_date or datetime.date.today()) + '_' + actuals_id + "_actuals.feather.zstd"
ds_actuals.saveToFeatherFile(os.path.join(self.model_path, "predictions", file_name))
def score_model_performance_daily(self, date_from, date_to):
features = ['prediction_id', self.target_feature]
res = {}
for (curr_date, files) in ModelReview._prediction_files_by_day(
self.model_path, date_from, date_to, "_*_actuals.feather.zstd"):
df_actuals = DataFrame({})
for (file, df) in DataFrame.load_from_files(files, features):
df_actuals.df = pd.concat([df_actuals.df, df.df])
if df_actuals.count() > 0:
df_actuals.df.rename(columns={self.target_feature: 'a2ml_actual'}, inplace=True)
scores = self.score_actuals(actuals_ds=df_actuals)
res[str(curr_date)] = scores[self.options.get('score_name')]
return res