How to use the autogluon.TabularPrediction.fit function in autogluon

To help you get started, we’ve selected a few autogluon examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github awslabs / autogluon / tests / unittests / test_tabularHPO.py View on Github external
savedir = directory + 'AutogluonOutput/'
            shutil.rmtree(savedir, ignore_errors=True) # Delete AutoGluon output directory to ensure previous runs' information has been removed.
            label_column = dataset['label_column']
            train_data = task.Dataset(file_path=train_file_path)
            test_data = task.Dataset(file_path=test_file_path)
            y_test = test_data[label_column]
            test_data = test_data.drop(labels=[label_column], axis=1)
            if fast_benchmark:
                train_data = train_data.head(subsample_size) # subsample for fast_benchmark
            predictor = None # reset from last Dataset
            if fast_benchmark:
                predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir, 
                    hyperparameter_tune=hyperparameter_tune, hyperparameters=hyperparameters,
                    time_limits=time_limits, num_trials=num_trials, verbosity=verbosity)
            else:
                predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir, 
                                     hyperparameter_tune=hyperparameter_tune, verbosity=verbosity)
            results = predictor.fit_summary(verbosity=0)
            if predictor.problem_type != dataset['problem_type']:
                warnings.warn("For dataset %s: Autogluon inferred problem_type = %s, but should = %s" % (dataset['name'], predictor.problem_type, dataset['problem_type']))
            predictor = None  # We delete predictor here to test loading previously-trained predictor from file
            predictor = task.load(savedir)
            y_pred = predictor.predict(test_data)
            perf_dict = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)
            if dataset['problem_type'] != REGRESSION:
                perf = 1.0 - perf_dict['accuracy_score'] # convert accuracy to error-rate
            else:
                perf = 1.0 - perf_dict['r2_score'] # unexplained variance score.
            performance_vals[idx] = perf
            print("Performance on dataset %s: %s   (previous perf=%s)" % (dataset['name'], performance_vals[idx], dataset['performance_val']))
            if (not fast_benchmark) and (performance_vals[idx] > dataset['performance_val'] * perf_threshold):
                warnings.warn("Performance on dataset %s is %s times worse than previous performance." %
github awslabs / autogluon / tests / unittests / test_tabular.py View on Github external
savedir = directory + 'AutogluonOutput/'
            shutil.rmtree(savedir, ignore_errors=True) # Delete AutoGluon output directory to ensure previous runs' information has been removed.
            label_column = dataset['label_column']
            train_data = task.Dataset(file_path=train_file_path)
            test_data = task.Dataset(file_path=test_file_path)
            y_test = test_data[label_column]
            test_data = test_data.drop(labels=[label_column], axis=1)
            if fast_benchmark:
                train_data = train_data.head(subsample_size) # subsample for fast_benchmark
            predictor = None # reset from last Dataset
            if fast_benchmark:
                predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir, 
                    hyperparameter_tune=hyperparameter_tune, hyperparameters=hyperparameters,
                    time_limits=time_limits, num_trials=num_trials, verbosity=verbosity)
            else:
                predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir, 
                                     hyperparameter_tune=hyperparameter_tune, verbosity=verbosity)
            results = predictor.fit_summary(verbosity=0)
            if predictor.problem_type != dataset['problem_type']:
                warnings.warn("For dataset %s: Autogluon inferred problem_type = %s, but should = %s" % (dataset['name'], predictor.problem_type, dataset['problem_type']))
            predictor = None  # We delete predictor here to test loading previously-trained predictor from file
            predictor = task.load(savedir)
            y_pred = predictor.predict(test_data)
            perf_dict = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)
            if dataset['problem_type'] != REGRESSION:
                perf = 1.0 - perf_dict['accuracy_score'] # convert accuracy to error-rate
            else:
                perf = 1.0 - perf_dict['r2_score'] # unexplained variance score.
            performance_vals[idx] = perf
            print("Performance on dataset %s: %s   (previous perf=%s)" % (dataset['name'], performance_vals[idx], dataset['performance_val']))
            if (not fast_benchmark) and (performance_vals[idx] > dataset['performance_val'] * perf_threshold):
                warnings.warn("Performance on dataset %s is %s times worse than previous performance." %
github awslabs / autogluon / tests / unittests / test_tabularHPO.py View on Github external
# fetch files from s3:
                print("%s data not found locally, so fetching from %s" % (dataset['name'],  dataset['url']))
                os.system("wget " + dataset['url'] + " -O temp.zip && unzip -o temp.zip && rm temp.zip")
            
            savedir = directory + 'AutogluonOutput/'
            shutil.rmtree(savedir, ignore_errors=True) # Delete AutoGluon output directory to ensure previous runs' information has been removed.
            label_column = dataset['label_column']
            train_data = task.Dataset(file_path=train_file_path)
            test_data = task.Dataset(file_path=test_file_path)
            y_test = test_data[label_column]
            test_data = test_data.drop(labels=[label_column], axis=1)
            if fast_benchmark:
                train_data = train_data.head(subsample_size) # subsample for fast_benchmark
            predictor = None # reset from last Dataset
            if fast_benchmark:
                predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir, 
                    hyperparameter_tune=hyperparameter_tune, hyperparameters=hyperparameters,
                    time_limits=time_limits, num_trials=num_trials, verbosity=verbosity)
            else:
                predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir, 
                                     hyperparameter_tune=hyperparameter_tune, verbosity=verbosity)
            results = predictor.fit_summary(verbosity=0)
            if predictor.problem_type != dataset['problem_type']:
                warnings.warn("For dataset %s: Autogluon inferred problem_type = %s, but should = %s" % (dataset['name'], predictor.problem_type, dataset['problem_type']))
            predictor = None  # We delete predictor here to test loading previously-trained predictor from file
            predictor = task.load(savedir)
            y_pred = predictor.predict(test_data)
            perf_dict = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)
            if dataset['problem_type'] != REGRESSION:
                perf = 1.0 - perf_dict['accuracy_score'] # convert accuracy to error-rate
            else:
                perf = 1.0 - perf_dict['r2_score'] # unexplained variance score.
github awslabs / autogluon / examples / tabular / example_advanced_tabular.py View on Github external
"""

import autogluon as ag
from autogluon import TabularPrediction as task

# Training time:
train_data = task.Dataset(file_path='https://autogluon.s3-us-west-2.amazonaws.com/datasets/Inc/train.csv') # can be local CSV file as well, returns Pandas DataFrame
train_data = train_data.head(100) # subsample for faster demo
print(train_data.head())
label_column = 'class' # specifies which column do we want to predict
savedir = 'ag_hpo_models/' # where to save trained models

hyperparams = {'NN': {'num_epochs': 10, 'activation': 'relu', 'dropout_prob': ag.Real(0.0,0.5)}, 
               'GBM': {'num_boost_round': 1000, 'learning_rate': ag.Real(0.01,0.1,log=True)} }

predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir, 
                     hyperparameter_tune=True, hyperparameters=hyperparams, 
                     num_trials=5, time_limits=1*60, num_bagging_folds=0, stack_ensemble_levels=0) # since tuning_data = None, automatically determines train/validation split

results = predictor.fit_summary() # display detailed summary of fit() process

# Inference time:
test_data = task.Dataset(file_path='https://autogluon.s3-us-west-2.amazonaws.com/datasets/Inc/test.csv') # another Pandas DataFrame
print(test_data.head())

perf = predictor.evaluate(test_data) # shorthand way to evaluate our predictor if test-labels available

# Otherwise we make predictions and can evaluate them later:
y_test = test_data[label_column]
test_data = test_data.drop(labels=[label_column],axis=1) # Delete labels from test data since we wouldn't have them in practice
y_pred = predictor.predict(test_data)
perf = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)
github awslabs / autogluon / examples / tabular / example_simple_tabular.py View on Github external
""" Example script for predicting columns of tables, demonstrating simple use-case """

from autogluon import TabularPrediction as task

# Training time:
train_data = task.Dataset(file_path='https://autogluon.s3-us-west-2.amazonaws.com/datasets/Inc/train.csv') # can be local CSV file as well, returns Pandas DataFrame
train_data = train_data.head(500) # subsample for faster demo
print(train_data.head())
label_column = 'class' # specifies which column do we want to predict
savedir = 'ag_models/' # where to save trained models

predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir) # since tuning_data = None, automatically determines train/validation split
results = predictor.fit_summary() # display summary of models trained during fit()

# Inference time:
test_data = task.Dataset(file_path='https://autogluon.s3-us-west-2.amazonaws.com/datasets/Inc/test.csv') # another Pandas DataFrame
y_test = test_data[label_column]
test_data = test_data.drop(labels=[label_column],axis=1) # delete labels from test data since we wouldn't have them in practice
print(test_data.head())

predictor = task.load(savedir) # Unnecessary, we reload predictor just to demonstrate how to load previously-trained predictor from file
y_pred = predictor.predict(test_data)
perf = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)