How to use the rsmtool.writer.DataWriter function in rsmtool

To help you get started, we’ve selected a few rsmtool examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github EducationalTestingService / rsmtool / tests / test_writer.py View on Github external
def test_data_container_save_wrong_format(self):

        data_sets = [{'name': 'dataset1', 'frame': pd.DataFrame(np.random.normal(size=(100, 2)),
                                                                columns=['A', 'B'])},
                     {'name': 'dataset2', 'frame': pd.DataFrame(np.random.normal(size=(120, 3)),
                                                                columns=['A', 'B', 'C'])}]

        container = DataContainer(data_sets)

        directory = 'temp_directory_container_save_wrong_format_xyz'

        writer = DataWriter()
        writer.write_experiment_output(directory,
                                       container,
                                       dataframe_names=['dataset1'],
                                       file_format='html')
github EducationalTestingService / rsmtool / tests / test_writer.py View on Github external
def test_data_container_save_files(self):

        data_sets = [{'name': 'dataset1', 'frame': pd.DataFrame(np.random.normal(size=(100, 2)),
                                                                columns=['A', 'B'])},
                     {'name': 'dataset2', 'frame': pd.DataFrame(np.random.normal(size=(120, 3)),
                                                                columns=['A', 'B', 'C'])}]

        container = DataContainer(data_sets)

        directory = 'temp_directory_data_container_save_files_xyz'
        os.makedirs(directory, exist_ok=True)

        writer = DataWriter()
        for file_type in ['json', 'csv', 'xlsx']:

            if file_type != 'json':

                writer.write_experiment_output(directory,
                                               container,
                                               dataframe_names=['dataset1'],
                                               file_format=file_type)
            else:
                writer.write_experiment_output(directory,
                                               container,
                                               new_names_dict={'dataset1': 'aaa'},
                                               dataframe_names=['dataset1'],
                                               file_format=file_type)

        aaa_json = pd.read_json(os.path.join(directory, 'aaa.json'))
github EducationalTestingService / rsmtool / tests / test_writer.py View on Github external
def test_dictionary_save_files(self):

        data_sets = {'dataset1': pd.DataFrame(np.random.normal(size=(100, 2)),
                                              columns=['A', 'B']),
                     'dataset2': pd.DataFrame(np.random.normal(size=(120, 3)),
                                              columns=['A', 'B', 'C'])}

        directory = 'temp_directory_dictionary_save_files_xyz'
        os.makedirs(directory, exist_ok=True)

        writer = DataWriter()
        for file_type in ['json', 'csv', 'xlsx']:

            if file_type != 'json':

                writer.write_experiment_output(directory,
                                               data_sets,
                                               dataframe_names=['dataset1'],
                                               file_format=file_type)
            else:
                writer.write_experiment_output(directory,
                                               data_sets,
                                               new_names_dict={'dataset1': 'aaa'},
                                               dataframe_names=['dataset1'],
                                               file_format=file_type)

        aaa_json = pd.read_json(os.path.join(directory, 'aaa.json'))
github EducationalTestingService / rsmtool / tests / test_writer.py View on Github external
def test_data_container_save_files_with_id(self):

        data_sets = [{'name': 'dataset1', 'frame': pd.DataFrame(np.random.normal(size=(100, 2)),
                                                                columns=['A', 'B'])},
                     {'name': 'dataset2', 'frame': pd.DataFrame(np.random.normal(size=(120, 3)),
                                                                columns=['A', 'B', 'C'])}]

        container = DataContainer(data_sets)

        directory = 'temp_directory_save_files_with_id_xyz'
        os.makedirs(directory, exist_ok=True)

        writer = DataWriter('test')
        for file_type in ['json', 'csv', 'xlsx']:

            if file_type != 'json':

                writer.write_experiment_output(directory,
                                               container,
                                               dataframe_names=['dataset1'],
                                               file_format=file_type)
            else:
                writer.write_experiment_output(directory,
                                               container,
                                               new_names_dict={'dataset1': 'aaa'},
                                               dataframe_names=['dataset1'],
                                               file_format=file_type)

        aaa_json = pd.read_json(os.path.join(directory, 'test_aaa.json'))
github EducationalTestingService / rsmtool / rsmtool / rsmtool.py View on Github external
else:
            logger.warning("{} already contains a non-empty 'output' directory. "
                           "The generated report might contain "
                           "unexpected information from a previous "
                           "experiment.".format(output_dir))

    configuration = configure('rsmtool', config_file_or_obj_or_dict)

    logger.info('Saving configuration file.')
    configuration.save(output_dir)

    # Get output format
    file_format = configuration.get('file_format', 'csv')

    # Get DataWriter object
    writer = DataWriter(configuration['experiment_id'])

    # Get the paths and names for the DataReader

    (file_names,
     file_paths_org) = configuration.get_names_and_paths(['train_file', 'test_file',
                                                          'features',
                                                          'feature_subset_file'],
                                                         ['train', 'test',
                                                          'feature_specs',
                                                          'feature_subset_specs'])

    file_paths = DataReader.locate_files(file_paths_org, configuration.configdir)

    # if there are any missing files after trying to locate
    # all expected files, raise an error
    if None in file_paths:
github EducationalTestingService / rsmtool / rsmtool / rsmpredict.py View on Github external
ValueError
        If any of the required fields are missing or ill-specified.
    """

    logger = logging.getLogger(__name__)

    configuration = configure('rsmpredict', config_file_or_obj_or_dict)

    # get the experiment ID
    experiment_id = configuration['experiment_id']

    # Get output format
    file_format = configuration.get('file_format', 'csv')

    # Get DataWriter object
    writer = DataWriter(experiment_id)

    # get the input file containing the feature values
    # for which we want to generate the predictions
    input_features_file = DataReader.locate_files(configuration['input_features_file'],
                                                  configuration.configdir)
    if not input_features_file:
        raise FileNotFoundError('Input file {} does not exist'
                                ''.format(configuration['input_features_file']))

    experiment_dir = DataReader.locate_files(configuration['experiment_dir'],
                                             configuration.configdir)
    if not experiment_dir:
        raise FileNotFoundError('The directory {} does not exist.'
                                ''.format(configuration['experiment_dir']))
    else:
        experiment_output_dir = normpath(join(experiment_dir, 'output'))
github EducationalTestingService / rsmtool / rsmtool / fairness_utils.py View on Github external
# let's first save model files and summaries
    for model in fit_dictionary:
        fit = fit_dictionary[model]

        ols_file = join(output_dir, '{}_{}_by_{}.ols'.format(experiment_id,
                                                             model,
                                                             group))
        summary_file = join(output_dir, '{}_{}_by_{}_ols_summary.txt'.format(experiment_id,
                                                                             model,
                                                                             group))
        with open(ols_file, 'wb') as olsf, open(summary_file, 'w') as summf:
            pickle.dump(fit, olsf)
            summf.write(str(fit.summary()))

    # Now let's write out the content of the data container
    writer = DataWriter(experiment_id)
    writer.write_experiment_output(output_dir,
                                   fairness_container,
                                   file_format=file_format,
                                   index=True)