Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_distribution_chart_stats_for_categorical_target():
model_path = 'tests/fixtures/test_distribution_chart_stats/adult'
date_from = datetime.date(2020, 2, 16)
date_to = datetime.date(2020, 2, 20)
res = ModelReview(load_metric_task_params(model_path)).distribution_chart_stats(date_from, date_to)
assert type(res) is dict
assert type(res[str(date_to)]) is dict
assert res[str(date_to)] == {
'income': {'dist': {' <=50K': 1}, 'imp': 0},
'age': {'avg': 0.0, 'std_dev': 0, 'imp': 0.716105},
'workclass': {'dist': {0: 1}, 'imp': 0.120064},
'fnlwgt': {'avg': 0.0, 'std_dev': 0, 'imp': 1.0},
'education': {'dist': {0: 1}, 'imp': 0.299958},
'education-num': {'avg': 0.0, 'std_dev': 0, 'imp': 0},
'marital-status': {'dist': {0: 1}, 'imp': 0.143296},
'occupation': {'dist': {0: 1}, 'imp': 0.209677},
'relationship': {'dist': {0: 1}, 'imp': 0.086982},
'race': {'dist': {0: 1}, 'imp': 0.041806},
'sex': {'dist': {0: 1}, 'imp': 0.039482},
'capital-gain': {'avg': 0.0, 'std_dev': 0, 'imp': 0.313237},
def test_set_support_review_model_flag():
# setup
model_path = 'tests/fixtures/test_set_support_review_model_flag'
shutil.copyfile(model_path + '/options_original.json', model_path + '/options.json')
# test
ModelReview({'model_path': model_path}).set_support_review_model_flag(True)
res = {}
with open(model_path + '/options.json', 'r') as f:
res = json.load(f)
assert res['support_review_model'] == True
# teardown
os.remove(model_path + '/options.json')
def test_count_actuals_by_prediction_id():
model_path = 'tests/fixtures/test_count_actuals_by_prediction_id/adult'
res = ModelReview({'model_path': model_path}).count_actuals_by_prediction_id()
assert type(res) is dict
assert len(res) > 0
assert res == {
'ffa89d52-5300-412d-b7a4-d21b3c9b7d16': 2,
'5d9f640d-529a-42bd-be85-172107249a01': 1,
'066f3c25-80ee-4c75-af15-38cda8a4ad57': 1
}
def test_distribution_chart_stats():
model_path = 'tests/fixtures/test_distribution_chart_stats/bikesharing'
date_from = datetime.date(2020, 2, 16)
date_to = datetime.date(2020, 2, 19)
res = ModelReview(load_metric_task_params(model_path)).distribution_chart_stats(date_from, date_to)
assert type(res) is dict
assert type(res[str(date_to)]) is dict
assert res[str(date_to)] == {
'cnt': { 'avg': 483.18357849636016, 'std_dev': 0.0, 'imp': 0 },
'dteday': { 'avg': 0.0, 'std_dev': 0.0, 'imp': 0 },
'season': { 'dist': { 0: 2 }, 'imp': 0},
'yr': { 'dist': { 0: 2 }, 'imp': 0 },
'mnth': { 'avg': 0.0, 'std_dev': 0.0, 'imp': 0 },
'holiday': { 'dist': { 0: 2 }, 'imp': 0},
'weekday': { 'dist': { 0: 2 }, 'imp': 0},
'workingday': { 'dist': { 0: 2 }, 'imp': 0},
'weathersit': { 'dist': { 0: 2 }, 'imp': 0},
'temp': { 'avg': 0.0, 'std_dev': 0.0, 'imp': 0 },
'atemp': { 'avg': 0.0, 'std_dev': 0.0, 'imp': 0 },
'hum': { 'avg': 0.0, 'std_dev': 0.0, 'imp': 0 },
def test_get_feature_importances_general_metrics_cache():
model_path = 'tests/fixtures/test_distribution_chart_stats/adult'
params = load_metric_task_params(model_path)
res = ModelReview(params)._get_feature_importances()
assert res == {'workclass': 0.12006373015194421, 'sex': 0.039481754114499897,
'occupation': 0.20967661413259162, 'education': 0.2999579889231273,
'relationship': 0.08698243068672135, 'marital-status': 0.14329620992107325,
'race': 0.04180630794271793, 'native-country': 0.02072552576600564,
'capital-loss': 0.2571256791934569, 'capital-gain': 0.31323744185565716,
'hours-per-week': 0.4246393312722869, 'age': 0.7161049235052714, 'fnlwgt': 1.0}
def test_score_model_performance_daily_no_matching_actuals_and_predictions():
model_path = 'tests/fixtures/test_score_model_performance_daily/iris_no_matches'
date_from = datetime.date(2020, 2, 16)
date_to = datetime.date(2020, 2, 18)
res = ModelReview({'model_path': model_path}).score_model_performance_daily(date_from, str(date_to))
assert type(res) is dict
assert res[str(date_from)] == 0
for feature in features:
if stats[feature]['count'] > 0 and stats[feature]['dist'] == None:
stats[feature]['average'] = stats[feature]['sum'] / stats[feature]['count']
# Second pass: sum of squares of value and average for std dev
for df in df_list:
ModelReview._remove_duplicates_by(df, 'prediction_id', second_pass_counter)
for feature in features:
if 'average' in stats[feature]:
avg = stats[feature]['average']
stats[feature]['sq_sum'] += ((df.df[feature] - avg)**2).sum()
# Calc std dev
if len(files) > 0:
res[str(curr_date)] = ModelReview._calc_stddev_for_features(stats, features, feature_mapper)
return res
def score_actuals_by_model_task(params):
return ModelReview(params).score_actuals(
actuals_path = params.get('actuals_path'),
actual_records=params.get('actual_records'),
prediction_group_id=params.get('prediction_group_id', None),
primary_prediction_group_id=params.get('primary_prediction_group_id', None),
primary_model_path=ModelHelper.get_model_path(params.get('primary_pipeline_id', None),
params.get('hub_info', {}).get('projectPath')),
actual_date=params.get('actual_date'),
actuals_id=params.get('actuals_id')
)
def actuals(self, model_id, filename=None, actual_records=None, actuals_at=None, locally=False):
if locally:
is_loaded, model_path, model_name = ModelDeploy(self.ctx, self.project).\
verify_local_model(model_id)
if not is_loaded:
raise AugerException('Model should be deployed locally.')
model_path, model_existed = ModelPredict(self.ctx)._extract_model(model_name)
return ModelReview({'model_path': os.path.join(model_path, "model")}).add_actuals(
actuals_path=filename, actual_records=actual_records, actual_date=actuals_at)
else:
return ModelActual(self.ctx).execute(model_id, filename, actual_records, actuals_at)
def score_model_performance_daily_task(params):
return ModelReview(params).score_model_performance_daily(
date_from=params.get('date_from'),
date_to=params.get('date_to')
)