Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_run_experiment_duplicate_feature_names():
# rsmtool experiment with duplicate feature names
source = 'lr-with-duplicate-feature-names'
experiment_id = 'lr_with_duplicate_feature_names'
config_file = join(rsmtool_test_dir,
'data',
'experiments',
source,
'{}.json'.format(experiment_id))
do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_wrong_train_file_path():
# basic experiment with the path in train_file field pointing to
# a non-existing file
source = 'lr-wrong-path'
experiment_id = 'lr'
config_file = join(rsmtool_test_dir,
'data',
'experiments',
source,
'{}.json'.format(experiment_id))
do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_adaboost():
# basic experiment with AdaBoostRegressor model
source = 'adaboost'
experiment_id = 'AdaBoost'
config_file = join(test_dir,
'data',
'experiments',
source,
'{}.json'.format(experiment_id))
do_run_experiment(source, experiment_id, config_file)
output_dir = join('test_outputs', source, 'output')
expected_output_dir = join(test_dir, 'data', 'experiments', source, 'output')
html_report = join('test_outputs', source, 'report', '{}_report.html'.format(experiment_id))
csv_files = glob(join(output_dir, '*.csv'))
for csv_file in csv_files:
csv_filename = basename(csv_file)
expected_csv_file = join(expected_output_dir, csv_filename)
if exists(expected_csv_file):
yield check_csv_output, csv_file, expected_csv_file
yield check_all_csv_exist, csv_files, experiment_id, 'skll'
yield check_report, html_report
def test_run_experiment_lr_subgroups_with_h2():
# basic experiment with subgroups and second
# rater analyses
source = 'lr-subgroups-with-h2'
experiment_id = 'lr_subgroups_with_h2'
config_file = join(test_dir,
'data',
'experiments',
source,
'{}.json'.format(experiment_id))
do_run_experiment(source, experiment_id, config_file)
output_dir = join('test_outputs', source, 'output')
expected_output_dir = join(test_dir, 'data', 'experiments', source, 'output')
html_report = join('test_outputs', source, 'report', '{}_report.html'.format(experiment_id))
csv_files = glob(join(output_dir, '*.csv'))
for csv_file in csv_files:
csv_filename = basename(csv_file)
expected_csv_file = join(expected_output_dir, csv_filename)
if exists(expected_csv_file):
yield check_csv_output, csv_file, expected_csv_file
yield check_all_csv_exist, csv_files, experiment_id, 'rsmtool'
yield check_consistency_files_exist, csv_files, experiment_id
yield check_scaled_coefficients, source, experiment_id
def test_run_experiment_lr_with_sc2_as_feature_name():
# rsmtool experiment with sc2 used as a feature name
# when the user also requests h2 analysis using a different
# column
source = 'lr-with-sc2-as-feature-name'
experiment_id = 'lr_with_sc2_as_feature_name'
config_file = join(test_dir,
'data',
'experiments',
source,
'{}.json'.format(experiment_id))
do_run_experiment(source, experiment_id, config_file)
def test_run_experiment_lr_with_large_integer_value():
# basic experiment with LinearRegression model with some
# values for FEATURE7 (the integer feature) being
# outside +/-4 SD.
source = 'lr-with-large-integer-value'
experiment_id = 'lr_with_large_integer_value'
config_file = join(test_dir,
'data',
'experiments',
source,
'{}.json'.format(experiment_id))
do_run_experiment(source, experiment_id, config_file)
output_dir = join('test_outputs', source, 'output')
expected_output_dir = join(test_dir, 'data', 'experiments', source, 'output')
html_report = join('test_outputs', source, 'report', '{}_report.html'.format(experiment_id))
csv_files = glob(join(output_dir, '*.csv'))
for csv_file in csv_files:
csv_filename = basename(csv_file)
expected_csv_file = join(expected_output_dir, csv_filename)
if exists(expected_csv_file):
yield check_csv_output, csv_file, expected_csv_file
yield check_all_csv_exist, csv_files, experiment_id, 'rsmtool'
yield check_scaled_coefficients, source, experiment_id
yield check_report, html_report
def test_run_experiment_lr_feature_json():
# basic experiment with a LinearRegression model but using
# feature json file
source = 'lr-feature-json'
experiment_id = 'lr'
config_file = join(rsmtool_test_dir,
'data',
'experiments',
source,
'{}.json'.format(experiment_id))
# run this experiment but suppress the expected deprecation warnings
do_run_experiment(source,
experiment_id,
config_file,
suppress_warnings_for=[DeprecationWarning])
def test_run_experiment_lr_tsv_input_and_subset_files():
# rsmtool experiment with input files in .tsv format
# including a feature subset file in .tsv format
source = 'lr-tsv-input-and-subset-files'
experiment_id = 'lr_tsv_input_and_subset_files'
config_file = join(test_dir,
'data',
'experiments',
source,
'{}.json'.format(experiment_id))
do_run_experiment(source, experiment_id, config_file)
output_dir = join('test_outputs', source, 'output')
expected_output_dir = join(test_dir, 'data', 'experiments', source, 'output')
html_report = join('test_outputs', source, 'report', '{}_report.html'.format(experiment_id))
csv_files = glob(join(output_dir, '*.csv'))
for csv_file in csv_files:
csv_filename = basename(csv_file)
expected_csv_file = join(expected_output_dir, csv_filename)
if exists(expected_csv_file):
yield check_csv_output, csv_file, expected_csv_file
yield check_all_csv_exist, csv_files, experiment_id, 'rsmtool'
yield check_scaled_coefficients, source, experiment_id
yield check_report, html_report
def test_run_experiment_svr():
# basic experiment with svr model
source = 'svr'
experiment_id = 'SVR'
config_file = join(test_dir,
'data',
'experiments',
source,
'{}.json'.format(experiment_id))
do_run_experiment(source, experiment_id, config_file)
output_dir = join('test_outputs', source, 'output')
expected_output_dir = join(test_dir, 'data', 'experiments', source, 'output')
html_report = join('test_outputs', source, 'report', '{}_report.html'.format(experiment_id))
csv_files = glob(join(output_dir, '*.csv'))
for csv_file in csv_files:
csv_filename = basename(csv_file)
expected_csv_file = join(expected_output_dir, csv_filename)
if exists(expected_csv_file):
yield check_csv_output, csv_file, expected_csv_file
yield check_all_csv_exist, csv_files, experiment_id, 'skll'
yield check_report, html_report
def test_run_experiment_lr():
# basic experiment with a LinearRegression model
source = 'lr'
experiment_id = 'lr'
config_file = join(test_dir,
'data',
'experiments',
source,
'{}.json'.format(experiment_id))
do_run_experiment(source, experiment_id, config_file)
output_dir = join('test_outputs', source, 'output')
expected_output_dir = join(test_dir, 'data', 'experiments', source, 'output')
html_report = join('test_outputs', source, 'report', '{}_report.html'.format(experiment_id))
csv_files = glob(join(output_dir, '*.csv'))
for csv_file in csv_files:
csv_filename = basename(csv_file)
expected_csv_file = join(expected_output_dir, csv_filename)
if exists(expected_csv_file):
yield check_csv_output, csv_file, expected_csv_file
yield check_all_csv_exist, csv_files, experiment_id, 'rsmtool'
yield check_scaled_coefficients, source, experiment_id
yield check_report, html_report