How to use the ludwig.utils.data_utils.file_exists_with_diff_extension function in ludwig

To help you get started, we’ve selected a few ludwig examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github uber / ludwig / ludwig / data / preprocessing.py View on Github external
test_set = load_data(
                    test_fp,
                    model_definition['input_features'],
                    model_definition['output_features'],
                    split_data=False
                )

    elif data_type == 'csv':
        data_hdf5_fp = replace_file_extension(
            all_data_fp, 'hdf5'
        )
        model_definition['data_hdf5_fp'] = data_hdf5_fp

        if all_data_fp is not None:
            if (file_exists_with_diff_extension(all_data_fp, 'hdf5') and
                    file_exists_with_diff_extension(all_data_fp, 'json')):
                # use hdf5 data instead
                logger.info(
                    'Found hdf5 and json with the same filename '
                    'of the csv, using them instead'
                )
                return preprocess_for_training_by_type(
                    model_definition,
                    'hdf5',
                    all_data_fp=replace_file_extension(all_data_fp, 'hdf5'),
                    train_set_metadata_json=replace_file_extension(all_data_fp,
                                                                   'json'),
                    skip_save_processed_input=skip_save_processed_input,
                    preprocessing_params=preprocessing_params,
                    random_seed=random_seed
                )
            else:
github uber / ludwig / ludwig / data / preprocessing.py View on Github external
if test_fp is not None:
                test_set = load_data(
                    test_fp,
                    model_definition['input_features'],
                    model_definition['output_features'],
                    split_data=False
                )

    elif data_type == 'csv':
        data_hdf5_fp = replace_file_extension(
            all_data_fp, 'hdf5'
        )
        model_definition['data_hdf5_fp'] = data_hdf5_fp

        if all_data_fp is not None:
            if (file_exists_with_diff_extension(all_data_fp, 'hdf5') and
                    file_exists_with_diff_extension(all_data_fp, 'json')):
                # use hdf5 data instead
                logger.info(
                    'Found hdf5 and json with the same filename '
                    'of the csv, using them instead'
                )
                return preprocess_for_training_by_type(
                    model_definition,
                    'hdf5',
                    all_data_fp=replace_file_extension(all_data_fp, 'hdf5'),
                    train_set_metadata_json=replace_file_extension(all_data_fp,
                                                                   'json'),
                    skip_save_processed_input=skip_save_processed_input,
                    preprocessing_params=preprocessing_params,
                    random_seed=random_seed
                )
github uber / ludwig / ludwig / data / preprocessing.py View on Github external
test_set,
                    validation_set,
                    train_set_metadata
                ) = _preprocess_csv_for_training(
                    features=features,
                    data_csv=all_data_fp,
                    data_train_csv=None,
                    data_validation_csv=None,
                    data_test_csv=None,
                    train_set_metadata_json=train_set_metadata_json,
                    skip_save_processed_input=skip_save_processed_input,
                    preprocessing_params=preprocessing_params,
                    random_seed=random_seed
                )
        else:
            if (file_exists_with_diff_extension(train_fp, 'hdf5') and
                    file_exists_with_diff_extension(train_fp, 'json') and
                    file_exists_with_diff_extension(validation_fp, 'hdf5') and
                    file_exists_with_diff_extension(test_fp, 'hdf5')):
                logger.info(
                    'Found hdf5 and json with the same filename '
                    'of the csvs, using them instead.'
                )
                return preprocess_for_training_by_type(
                    model_definition,
                    'hdf5',
                    train_fp=replace_file_extension(train_fp, 'hdf5'),
                    validation_fp=replace_file_extension(
                        validation_fp,
                        'hdf5'
                    ),
                    test_fp=replace_file_extension(test_fp, 'hdf5'),
github uber / ludwig / ludwig / data / preprocessing.py View on Github external
validation_set,
                    train_set_metadata
                ) = _preprocess_csv_for_training(
                    features=features,
                    data_csv=all_data_fp,
                    data_train_csv=None,
                    data_validation_csv=None,
                    data_test_csv=None,
                    train_set_metadata_json=train_set_metadata_json,
                    skip_save_processed_input=skip_save_processed_input,
                    preprocessing_params=preprocessing_params,
                    random_seed=random_seed
                )
        else:
            if (file_exists_with_diff_extension(train_fp, 'hdf5') and
                    file_exists_with_diff_extension(train_fp, 'json') and
                    file_exists_with_diff_extension(validation_fp, 'hdf5') and
                    file_exists_with_diff_extension(test_fp, 'hdf5')):
                logger.info(
                    'Found hdf5 and json with the same filename '
                    'of the csvs, using them instead.'
                )
                return preprocess_for_training_by_type(
                    model_definition,
                    'hdf5',
                    train_fp=replace_file_extension(train_fp, 'hdf5'),
                    validation_fp=replace_file_extension(
                        validation_fp,
                        'hdf5'
                    ),
                    test_fp=replace_file_extension(test_fp, 'hdf5'),
                    train_set_metadata_json=replace_file_extension(
github uber / ludwig / ludwig / data / preprocessing.py View on Github external
) = _preprocess_csv_for_training(
                    features=features,
                    data_csv=all_data_fp,
                    data_train_csv=None,
                    data_validation_csv=None,
                    data_test_csv=None,
                    train_set_metadata_json=train_set_metadata_json,
                    skip_save_processed_input=skip_save_processed_input,
                    preprocessing_params=preprocessing_params,
                    random_seed=random_seed
                )
        else:
            if (file_exists_with_diff_extension(train_fp, 'hdf5') and
                    file_exists_with_diff_extension(train_fp, 'json') and
                    file_exists_with_diff_extension(validation_fp, 'hdf5') and
                    file_exists_with_diff_extension(test_fp, 'hdf5')):
                logger.info(
                    'Found hdf5 and json with the same filename '
                    'of the csvs, using them instead.'
                )
                return preprocess_for_training_by_type(
                    model_definition,
                    'hdf5',
                    train_fp=replace_file_extension(train_fp, 'hdf5'),
                    validation_fp=replace_file_extension(
                        validation_fp,
                        'hdf5'
                    ),
                    test_fp=replace_file_extension(test_fp, 'hdf5'),
                    train_set_metadata_json=replace_file_extension(
                        train_fp,
                        'json'