Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_data_container_save_wrong_format(self):
data_sets = [{'name': 'dataset1', 'frame': pd.DataFrame(np.random.normal(size=(100, 2)),
columns=['A', 'B'])},
{'name': 'dataset2', 'frame': pd.DataFrame(np.random.normal(size=(120, 3)),
columns=['A', 'B', 'C'])}]
container = DataContainer(data_sets)
directory = 'temp_directory_container_save_wrong_format_xyz'
writer = DataWriter()
writer.write_experiment_output(directory,
container,
dataframe_names=['dataset1'],
file_format='html')
def test_drop(self):
container = DataContainer([{'frame': pd.DataFrame(), 'name': 'test'}])
container.drop('test')
assert_false('test' in container)
def test_data_container_save_files(self):
data_sets = [{'name': 'dataset1', 'frame': pd.DataFrame(np.random.normal(size=(100, 2)),
columns=['A', 'B'])},
{'name': 'dataset2', 'frame': pd.DataFrame(np.random.normal(size=(120, 3)),
columns=['A', 'B', 'C'])}]
container = DataContainer(data_sets)
directory = 'temp_directory_data_container_save_files_xyz'
os.makedirs(directory, exist_ok=True)
writer = DataWriter()
for file_type in ['json', 'csv', 'xlsx']:
if file_type != 'json':
writer.write_experiment_output(directory,
container,
dataframe_names=['dataset1'],
file_format=file_type)
else:
writer.write_experiment_output(directory,
container,
def test_copy_not_deep(self):
expected = pd.DataFrame([['John', 1, 5.0],
['Mary', 2, 4.0],
['Sally', 6, np.nan],
['Jeff', 3, 9.0],
['Edwin', 9, 1.0]],
columns=['string', 'numeric',
'numeric_missing'])
container = DataContainer([{'frame': expected, 'name': 'test', 'path': 'foo'}])
new_container = container.copy(deep=False)
assert_not_equal(id(new_container), id(container))
for name in new_container.keys():
frame = new_container.get_frame(name)
path = new_container.get_path(name)
old_frame = container.get_frame(name)
old_path = container.get_path(name)
eq_(path, old_path)
assert_frame_equal(frame, old_frame)
assert_equal(id(frame), id(old_frame))
def test_copy(self):
expected = pd.DataFrame([['John', 1, 5.0],
['Mary', 2, 4.0],
['Sally', 6, np.nan],
['Jeff', 3, 9.0],
['Edwin', 9, 1.0]],
columns=['string', 'numeric',
'numeric_missing'])
container = DataContainer([{'frame': expected, 'name': 'test', 'path': 'foo'}])
new_container = container.copy()
assert_not_equal(id(new_container), id(container))
for name in new_container.keys():
frame = new_container.get_frame(name)
path = new_container.get_path(name)
old_frame = container.get_frame(name)
old_path = container.get_path(name)
eq_(path, old_path)
assert_frame_equal(frame, old_frame)
assert_not_equal(id(frame), id(old_frame))
def test_data_container_save_files_with_id(self):
data_sets = [{'name': 'dataset1', 'frame': pd.DataFrame(np.random.normal(size=(100, 2)),
columns=['A', 'B'])},
{'name': 'dataset2', 'frame': pd.DataFrame(np.random.normal(size=(120, 3)),
columns=['A', 'B', 'C'])}]
container = DataContainer(data_sets)
directory = 'temp_directory_save_files_with_id_xyz'
os.makedirs(directory, exist_ok=True)
writer = DataWriter('test')
for file_type in ['json', 'csv', 'xlsx']:
if file_type != 'json':
writer.write_experiment_output(directory,
container,
dataframe_names=['dataset1'],
file_format=file_type)
else:
writer.write_experiment_output(directory,
container,
KeyError
If there are duplicate keys in the two DataContainers.
"""
if not isinstance(other, DataContainer):
raise ValueError('Object must be `DataContainer`, '
'not {}.'.format(type(other)))
# Make sure there are no duplicate keys
common_keys = set(other._names).intersection(self._names)
if common_keys:
raise KeyError('The key(s) `{}` already exist in the '
'DataContainer.'.format(', '.join(common_keys)))
dicts = DataContainer.to_datasets(self)
dicts.extend(DataContainer.to_datasets(other))
return DataContainer(dicts)
# compute disattenuated correlations if we have the second human score
if include_second_score:
dis_corr_by_group = self.compute_disattenuated_correlations(eval_by_group['corr.{}_trim'.format(score_type)],
consistency_by_group['corr'])
else:
dis_corr_by_group = pd.DataFrame()
datasets.extend([{'name': 'eval_by_{}'.format(group),
'frame': eval_by_group},
{'name': 'consistency_by_{}'.format(group),
'frame': consistency_by_group},
{'name': 'disattenuated_correlations_by_{}'.format(group),
'frame': dis_corr_by_group}])
return configuration, DataContainer(datasets=datasets)
df_predictions_with_metadata = pd.merge(df_predictions,
df_input[['spkitemid'] + columns_to_copy])
assert(len(df_predictions) == len(df_predictions_with_metadata))
else:
df_predictions_with_metadata = df_predictions.copy()
# we need to make sure that `spkitemid` is the first column
df_excluded = df_excluded[['spkitemid'] + [column for column in df_excluded
if column != 'spkitemid']]
datasets = [{'name': 'features_processed', 'frame': df_features_preprocessed},
{'name': 'excluded', 'frame': df_excluded},
{'name': 'predictions_with_metadata', 'frame': df_predictions_with_metadata},
{'name': 'predictions', 'frame': df_predictions}]
return config_obj, DataContainer(datasets)
if not exists(set_path):
raise FileNotFoundError('The file {} does not exist'.format(set_path))
if kwargs_dict is not None:
kwargs = kwargs_dict.get(name, {})
else:
kwargs = {}
dataframe = self.read_from_file(set_path, converter, **kwargs)
# Add to list of datasets
self.datasets.append({'name': name.strip(),
'path': set_path,
'frame': dataframe})
return DataContainer(self.datasets)