Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_data_container_save_wrong_format(self):
data_sets = [{'name': 'dataset1', 'frame': pd.DataFrame(np.random.normal(size=(100, 2)),
columns=['A', 'B'])},
{'name': 'dataset2', 'frame': pd.DataFrame(np.random.normal(size=(120, 3)),
columns=['A', 'B', 'C'])}]
container = DataContainer(data_sets)
directory = 'temp_directory_container_save_wrong_format_xyz'
writer = DataWriter()
writer.write_experiment_output(directory,
container,
dataframe_names=['dataset1'],
file_format='html')
def test_data_container_save_files(self):
data_sets = [{'name': 'dataset1', 'frame': pd.DataFrame(np.random.normal(size=(100, 2)),
columns=['A', 'B'])},
{'name': 'dataset2', 'frame': pd.DataFrame(np.random.normal(size=(120, 3)),
columns=['A', 'B', 'C'])}]
container = DataContainer(data_sets)
directory = 'temp_directory_data_container_save_files_xyz'
os.makedirs(directory, exist_ok=True)
writer = DataWriter()
for file_type in ['json', 'csv', 'xlsx']:
if file_type != 'json':
writer.write_experiment_output(directory,
container,
dataframe_names=['dataset1'],
file_format=file_type)
else:
writer.write_experiment_output(directory,
container,
new_names_dict={'dataset1': 'aaa'},
dataframe_names=['dataset1'],
file_format=file_type)
aaa_json = pd.read_json(os.path.join(directory, 'aaa.json'))
def test_dictionary_save_files(self):
data_sets = {'dataset1': pd.DataFrame(np.random.normal(size=(100, 2)),
columns=['A', 'B']),
'dataset2': pd.DataFrame(np.random.normal(size=(120, 3)),
columns=['A', 'B', 'C'])}
directory = 'temp_directory_dictionary_save_files_xyz'
os.makedirs(directory, exist_ok=True)
writer = DataWriter()
for file_type in ['json', 'csv', 'xlsx']:
if file_type != 'json':
writer.write_experiment_output(directory,
data_sets,
dataframe_names=['dataset1'],
file_format=file_type)
else:
writer.write_experiment_output(directory,
data_sets,
new_names_dict={'dataset1': 'aaa'},
dataframe_names=['dataset1'],
file_format=file_type)
aaa_json = pd.read_json(os.path.join(directory, 'aaa.json'))
def test_data_container_save_files_with_id(self):
data_sets = [{'name': 'dataset1', 'frame': pd.DataFrame(np.random.normal(size=(100, 2)),
columns=['A', 'B'])},
{'name': 'dataset2', 'frame': pd.DataFrame(np.random.normal(size=(120, 3)),
columns=['A', 'B', 'C'])}]
container = DataContainer(data_sets)
directory = 'temp_directory_save_files_with_id_xyz'
os.makedirs(directory, exist_ok=True)
writer = DataWriter('test')
for file_type in ['json', 'csv', 'xlsx']:
if file_type != 'json':
writer.write_experiment_output(directory,
container,
dataframe_names=['dataset1'],
file_format=file_type)
else:
writer.write_experiment_output(directory,
container,
new_names_dict={'dataset1': 'aaa'},
dataframe_names=['dataset1'],
file_format=file_type)
aaa_json = pd.read_json(os.path.join(directory, 'test_aaa.json'))
else:
logger.warning("{} already contains a non-empty 'output' directory. "
"The generated report might contain "
"unexpected information from a previous "
"experiment.".format(output_dir))
configuration = configure('rsmtool', config_file_or_obj_or_dict)
logger.info('Saving configuration file.')
configuration.save(output_dir)
# Get output format
file_format = configuration.get('file_format', 'csv')
# Get DataWriter object
writer = DataWriter(configuration['experiment_id'])
# Get the paths and names for the DataReader
(file_names,
file_paths_org) = configuration.get_names_and_paths(['train_file', 'test_file',
'features',
'feature_subset_file'],
['train', 'test',
'feature_specs',
'feature_subset_specs'])
file_paths = DataReader.locate_files(file_paths_org, configuration.configdir)
# if there are any missing files after trying to locate
# all expected files, raise an error
if None in file_paths:
ValueError
If any of the required fields are missing or ill-specified.
"""
logger = logging.getLogger(__name__)
configuration = configure('rsmpredict', config_file_or_obj_or_dict)
# get the experiment ID
experiment_id = configuration['experiment_id']
# Get output format
file_format = configuration.get('file_format', 'csv')
# Get DataWriter object
writer = DataWriter(experiment_id)
# get the input file containing the feature values
# for which we want to generate the predictions
input_features_file = DataReader.locate_files(configuration['input_features_file'],
configuration.configdir)
if not input_features_file:
raise FileNotFoundError('Input file {} does not exist'
''.format(configuration['input_features_file']))
experiment_dir = DataReader.locate_files(configuration['experiment_dir'],
configuration.configdir)
if not experiment_dir:
raise FileNotFoundError('The directory {} does not exist.'
''.format(configuration['experiment_dir']))
else:
experiment_output_dir = normpath(join(experiment_dir, 'output'))
# let's first save model files and summaries
for model in fit_dictionary:
fit = fit_dictionary[model]
ols_file = join(output_dir, '{}_{}_by_{}.ols'.format(experiment_id,
model,
group))
summary_file = join(output_dir, '{}_{}_by_{}_ols_summary.txt'.format(experiment_id,
model,
group))
with open(ols_file, 'wb') as olsf, open(summary_file, 'w') as summf:
pickle.dump(fit, olsf)
summf.write(str(fit.summary()))
# Now let's write out the content of the data container
writer = DataWriter(experiment_id)
writer.write_experiment_output(output_dir,
fairness_container,
file_format=file_format,
index=True)