How to use the rsmtool.container.DataContainer function in rsmtool

To help you get started, we’ve selected a few rsmtool examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github EducationalTestingService / rsmtool / tests / test_writer.py View on Github external
def test_data_container_save_wrong_format(self):

        data_sets = [{'name': 'dataset1', 'frame': pd.DataFrame(np.random.normal(size=(100, 2)),
                                                                columns=['A', 'B'])},
                     {'name': 'dataset2', 'frame': pd.DataFrame(np.random.normal(size=(120, 3)),
                                                                columns=['A', 'B', 'C'])}]

        container = DataContainer(data_sets)

        directory = 'temp_directory_container_save_wrong_format_xyz'

        writer = DataWriter()
        writer.write_experiment_output(directory,
                                       container,
                                       dataframe_names=['dataset1'],
                                       file_format='html')
github EducationalTestingService / rsmtool / tests / test_container.py View on Github external
def test_drop(self):

        container = DataContainer([{'frame': pd.DataFrame(), 'name': 'test'}])
        container.drop('test')
        assert_false('test' in container)
github EducationalTestingService / rsmtool / tests / test_writer.py View on Github external
def test_data_container_save_files(self):

        data_sets = [{'name': 'dataset1', 'frame': pd.DataFrame(np.random.normal(size=(100, 2)),
                                                                columns=['A', 'B'])},
                     {'name': 'dataset2', 'frame': pd.DataFrame(np.random.normal(size=(120, 3)),
                                                                columns=['A', 'B', 'C'])}]

        container = DataContainer(data_sets)

        directory = 'temp_directory_data_container_save_files_xyz'
        os.makedirs(directory, exist_ok=True)

        writer = DataWriter()
        for file_type in ['json', 'csv', 'xlsx']:

            if file_type != 'json':

                writer.write_experiment_output(directory,
                                               container,
                                               dataframe_names=['dataset1'],
                                               file_format=file_type)
            else:
                writer.write_experiment_output(directory,
                                               container,
github EducationalTestingService / rsmtool / tests / test_container.py View on Github external
def test_copy_not_deep(self):

        expected = pd.DataFrame([['John', 1, 5.0],
                                 ['Mary', 2, 4.0],
                                 ['Sally', 6, np.nan],
                                 ['Jeff', 3, 9.0],
                                 ['Edwin', 9, 1.0]],
                                columns=['string', 'numeric',
                                         'numeric_missing'])

        container = DataContainer([{'frame': expected, 'name': 'test', 'path': 'foo'}])
        new_container = container.copy(deep=False)

        assert_not_equal(id(new_container), id(container))
        for name in new_container.keys():

            frame = new_container.get_frame(name)
            path = new_container.get_path(name)

            old_frame = container.get_frame(name)
            old_path = container.get_path(name)

            eq_(path, old_path)
            assert_frame_equal(frame, old_frame)
            assert_equal(id(frame), id(old_frame))
github EducationalTestingService / rsmtool / tests / test_container.py View on Github external
def test_copy(self):

        expected = pd.DataFrame([['John', 1, 5.0],
                                 ['Mary', 2, 4.0],
                                 ['Sally', 6, np.nan],
                                 ['Jeff', 3, 9.0],
                                 ['Edwin', 9, 1.0]],
                                columns=['string', 'numeric',
                                         'numeric_missing'])

        container = DataContainer([{'frame': expected, 'name': 'test', 'path': 'foo'}])
        new_container = container.copy()

        assert_not_equal(id(new_container), id(container))
        for name in new_container.keys():

            frame = new_container.get_frame(name)
            path = new_container.get_path(name)

            old_frame = container.get_frame(name)
            old_path = container.get_path(name)

            eq_(path, old_path)
            assert_frame_equal(frame, old_frame)
            assert_not_equal(id(frame), id(old_frame))
github EducationalTestingService / rsmtool / tests / test_writer.py View on Github external
def test_data_container_save_files_with_id(self):

        data_sets = [{'name': 'dataset1', 'frame': pd.DataFrame(np.random.normal(size=(100, 2)),
                                                                columns=['A', 'B'])},
                     {'name': 'dataset2', 'frame': pd.DataFrame(np.random.normal(size=(120, 3)),
                                                                columns=['A', 'B', 'C'])}]

        container = DataContainer(data_sets)

        directory = 'temp_directory_save_files_with_id_xyz'
        os.makedirs(directory, exist_ok=True)

        writer = DataWriter('test')
        for file_type in ['json', 'csv', 'xlsx']:

            if file_type != 'json':

                writer.write_experiment_output(directory,
                                               container,
                                               dataframe_names=['dataset1'],
                                               file_format=file_type)
            else:
                writer.write_experiment_output(directory,
                                               container,
github EducationalTestingService / rsmtool / rsmtool / container.py View on Github external
KeyError
            If there are duplicate keys in the two DataContainers.
        """
        if not isinstance(other, DataContainer):
            raise ValueError('Object must be `DataContainer`, '
                             'not {}.'.format(type(other)))

        # Make sure there are no duplicate keys
        common_keys = set(other._names).intersection(self._names)
        if common_keys:
            raise KeyError('The key(s) `{}` already exist in the '
                           'DataContainer.'.format(', '.join(common_keys)))

        dicts = DataContainer.to_datasets(self)
        dicts.extend(DataContainer.to_datasets(other))
        return DataContainer(dicts)
github EducationalTestingService / rsmtool / rsmtool / analyzer.py View on Github external
# compute disattenuated correlations if we have the second human score
            if include_second_score:
                dis_corr_by_group = self.compute_disattenuated_correlations(eval_by_group['corr.{}_trim'.format(score_type)],
                                                                            consistency_by_group['corr'])
            else:
                dis_corr_by_group = pd.DataFrame()

            datasets.extend([{'name': 'eval_by_{}'.format(group),
                              'frame': eval_by_group},
                             {'name': 'consistency_by_{}'.format(group),
                              'frame': consistency_by_group},
                             {'name': 'disattenuated_correlations_by_{}'.format(group),
                              'frame': dis_corr_by_group}])

        return configuration, DataContainer(datasets=datasets)
github EducationalTestingService / rsmtool / rsmtool / preprocessor.py View on Github external
df_predictions_with_metadata = pd.merge(df_predictions,
                                                    df_input[['spkitemid'] + columns_to_copy])
            assert(len(df_predictions) == len(df_predictions_with_metadata))
        else:
            df_predictions_with_metadata = df_predictions.copy()

        # we need to make sure that `spkitemid` is the first column
        df_excluded = df_excluded[['spkitemid'] + [column for column in df_excluded
                                                   if column != 'spkitemid']]

        datasets = [{'name': 'features_processed', 'frame': df_features_preprocessed},
                    {'name': 'excluded', 'frame': df_excluded},
                    {'name': 'predictions_with_metadata', 'frame': df_predictions_with_metadata},
                    {'name': 'predictions', 'frame': df_predictions}]

        return config_obj, DataContainer(datasets)
github EducationalTestingService / rsmtool / rsmtool / reader.py View on Github external
if not exists(set_path):
                raise FileNotFoundError('The file {} does not exist'.format(set_path))

            if kwargs_dict is not None:
                kwargs = kwargs_dict.get(name, {})
            else:
                kwargs = {}

            dataframe = self.read_from_file(set_path, converter, **kwargs)

            # Add to list of datasets
            self.datasets.append({'name': name.strip(),
                                  'path': set_path,
                                  'frame': dataframe})

        return DataContainer(self.datasets)