How to use the chemprop.parsing.modify_train_args function in chemprop

To help you get started, we’ve selected a few chemprop examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wengong-jin / chemprop / end_to_end.py View on Github external
parser.add_argument('--train_path', type=str, required=True,
                        help='Path to CSV file containing training data in chronological order')
    parser.add_argument('--val_path', type=str, required=True,
                        help='Path to CSV file containing val data in chronological order')
    parser.add_argument('--train_save', type=str, required=True,
                        help='Path to CSV file for new train data')
    parser.add_argument('--val_save', type=str, required=True,
                        help='Path to CSV file for new val data')
    parser.add_argument('--val_frac', type=float, default=0.2,
                        help='frac of data to use for validation')
    parser.add_argument('--train_val_save', type=str, required=True,
                        help='Path to CSV file for combined train and val data')
    args = parser.parse_args()

    set_logger(logger, args.save_dir, args.quiet)
    modify_train_args(args)
    modify_hyper_opt_args(args)

    # Preprocess train and validation data
    resplit(args)
    merge_train_val(args)
    for path in [args.train_save, args.val_save, args.train_val_save]:
        args.data_path = path
        args.save_path = path
        average_duplicates(args)

    # Optimize hyperparameters
    args.data_path = args.train_save
    args.separate_test_set = args.val_save
    optimize_hyperparameters(args)

    # Determine best hyperparameters, update args, and train
github wengong-jin / chemprop / web.py View on Github external
return render_train(warnings=warnings, errors=errors)

    if dataset_type == 'regression' and unique_targets <= {0, 1}:
        errors.append('Selected regression dataset but all labels are 0 or 1. Select classification instead.')

        return render_train(warnings=warnings, errors=errors)

    if gpu is not None:
        if gpu == 'None':
            args.no_cuda = True
        else:
            args.gpu = int(gpu)

    with TemporaryDirectory() as temp_dir:
        args.save_dir = temp_dir
        modify_train_args(args)

        logger = logging.getLogger('train')
        logger.setLevel(logging.DEBUG)
        logger.propagate = False
        set_logger(logger, args.save_dir, args.quiet)

        process = mp.Process(target=progress_bar, args=(args, progress))
        process.start()
        training = 1

        # Run training
        task_scores = run_training(args, logger)
        process.join()

        # Reset globals
        training = 0
github wengong-jin / chemprop / hyperparameter_optimization.py View on Github external
# Save best hyperparameter settings as JSON config file
    with open(args.config_save_path, 'w') as f:
        json.dump(best_result['hyperparams'], f, indent=4, sort_keys=True)


if __name__ == '__main__':
    parser = ArgumentParser()
    add_train_args(parser)
    parser.add_argument('--num_iters', type=int, default=20,
                        help='Number of hyperparameter choices to try')
    parser.add_argument('--config_save_path', type=str, required=True,
                        help='Path to .json file where best hyperparameter settings will be written')
    parser.add_argument('--log_path', type=str,
                        help='(Optional) Path to .log file where all results of the hyperparameter optimization will be written')
    args = parser.parse_args()
    modify_train_args(args)

    grid_search(args)
github wengong-jin / chemprop / model_comparison.py View on Github external
logger: logging.Logger,
                   features_dir: str = None):
    for dataset_name in experiment_args.datasets:
        dataset_type, dataset_path, num_folds, metric = DATASETS[dataset_name]
        logger.info(dataset_name)

        # Set up args
        args = deepcopy(experiment_args)
        args.data_path = dataset_path
        args.dataset_type = dataset_type
        args.save_dir = os.path.join(args.save_dir, dataset_name)
        args.num_folds = num_folds
        args.metric = metric
        if features_dir is not None:
            args.features_path = [os.path.join(features_dir, dataset_name + '.pckl')]
        modify_train_args(args)

        # Set up logging for training
        os.makedirs(args.save_dir, exist_ok=True)
        fh = logging.FileHandler(os.path.join(args.save_dir, args.log_name))
        fh.setLevel(logging.DEBUG)

        # Cross validate
        TRAIN_LOGGER.addHandler(fh)
        mean_score, std_score = cross_validate(args, TRAIN_LOGGER)
        TRAIN_LOGGER.removeHandler(fh)

        # Record results
        logger.info(f'{mean_score} +/- {std_score} {metric}')
        temp_model = build_model(args)
        logger.info(f'num params: {param_count(temp_model):,}')