How to use the rsmtool.reader.DataReader.locate_files function in rsmtool

To help you get started, we’ve selected a few rsmtool examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github EducationalTestingService / rsmtool / rsmtool / preprocessor.py View on Github external
config_obj : configuration_parser.Configuration
            A new configuration object.
        data_congtainer : container.DataContainer
            A new data container object.

        Raises
        ------
        ValueError
        """

        # get the directory where the config file lives
        # if this is the 'expm' directory, then go
        # up one level.
        configpath = config_obj.configdir

        pred_file_location = DataReader.locate_files(config_obj['predictions_file'],
                                                     configpath)

        # get the column name for the labels for the training and testing data
        human_score_column = config_obj['human_score_column']
        system_score_column = config_obj['system_score_column']

        # if the human score column is the same as the
        # system score column, raise an error
        if human_score_column == system_score_column:
            raise ValueError("'human_score_column' and "
                             "'system_score_column' "
                             "cannot have the same value.")

        # get the name of the optional column that
        # contains the second human score
        second_human_score_column = config_obj['second_human_score_column']
github EducationalTestingService / rsmtool / rsmtool / rsmsummarize.py View on Github external
Returns
    -------
    jsons : list
        A list paths to all configuration json files contained in the output directory

    Raises
    ------
    FileNotFoundError
        If the directory does not exist or does not contain and output
        of an RSMTool experiment.
    ValueError
        If the given experiment directory contains several JSON configuration
        files instead of just one.
    """
    full_path_experiment_dir = DataReader.locate_files(experiment_dir, configpath)
    if not full_path_experiment_dir:
        raise FileNotFoundError("The directory {} "
                                "does not exist.".format(experiment_dir))
    else:
        # check that there is an output directory
        csvdir = normpath(join(full_path_experiment_dir, 'output'))
        if not exists(csvdir):
            raise FileNotFoundError("The directory {} does not contain "
                                    "the output of an rsmtool "
                                    "experiment.".format(full_path_experiment_dir))

        # find the json configuration files for all experiments stored in this directory
        jsons = glob.glob(join(csvdir, '*.json'))
        if len(jsons) == 0:
            raise FileNotFoundError("The directory {} does not contain "
                                    "the .json configuration files for rsmtool "
github EducationalTestingService / rsmtool / rsmtool / rsmpredict.py View on Github external
logger = logging.getLogger(__name__)

    configuration = configure('rsmpredict', config_file_or_obj_or_dict)

    # get the experiment ID
    experiment_id = configuration['experiment_id']

    # Get output format
    file_format = configuration.get('file_format', 'csv')

    # Get DataWriter object
    writer = DataWriter(experiment_id)

    # get the input file containing the feature values
    # for which we want to generate the predictions
    input_features_file = DataReader.locate_files(configuration['input_features_file'],
                                                  configuration.configdir)
    if not input_features_file:
        raise FileNotFoundError('Input file {} does not exist'
                                ''.format(configuration['input_features_file']))

    experiment_dir = DataReader.locate_files(configuration['experiment_dir'],
                                             configuration.configdir)
    if not experiment_dir:
        raise FileNotFoundError('The directory {} does not exist.'
                                ''.format(configuration['experiment_dir']))
    else:
        experiment_output_dir = normpath(join(experiment_dir, 'output'))
        if not exists(experiment_output_dir):
            raise FileNotFoundError('The directory {} does not contain '
                                    'the output of an rsmtool experiment.'.format(experiment_dir))
github EducationalTestingService / rsmtool / rsmtool / reporter.py View on Github external
Returns
        -------
        custom_report_sections :  list of str
            List of absolute paths to the custom section
            notebooks.

        Raises
        ------
        FileNotFoundError
            If any of the files cannot be found.
        """

        custom_report_sections = []
        for cs_path in custom_report_section_paths:
            cs_location = DataReader.locate_files(cs_path, configdir)
            if not cs_location:
                raise FileNotFoundError("Error: custom section not found at "
                                        "{}.".format(cs_path))
            else:
                custom_report_sections.append(cs_location)
        return custom_report_sections
github EducationalTestingService / rsmtool / rsmtool / preprocessor.py View on Github external
A DataContainer object.

        Raises
        ------
        ValueError
            If the columns in the config file do not exist in the data.
        """
        train = data_container_obj.train
        test = data_container_obj.test
        feature_specs = data_container_obj.get_frame('feature_specs')
        feature_subset = data_container_obj.get_frame('feature_subset_specs')

        configdir = config_obj.configdir

        (test_file_location,
         train_file_location) = DataReader.locate_files([config_obj['test_file'],
                                                         config_obj['train_file']],
                                                        configdir)

        feature_subset_file = config_obj['feature_subset_file']

        if feature_subset_file is not None:
            feature_subset_file = DataReader.locate_files(feature_subset_file, configdir)

        # get the column name for the labels for the training and testing data
        train_label_column = config_obj['train_label_column']
        test_label_column = config_obj['test_label_column']

        # get the column name that will hold the ID for
        # both the training and the test data
        id_column = config_obj['id_column']