How to use the tpot.TPOTRegressor function in TPOT

To help you get started, we’ve selected a few TPOT examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github EpistasisLab / tpot / tests / tpot_tests.py View on Github external
def test_random_ind_2():
    """Assert that the TPOTRegressor can generate the same pipeline with same random seed."""
    tpot_obj = TPOTRegressor(random_state=43)
    tpot_obj._fit_init()
    pipeline1 = str(tpot_obj._toolbox.individual())
    tpot_obj = TPOTRegressor(random_state=43)
    tpot_obj._fit_init()
    pipeline2 = str(tpot_obj._toolbox.individual())

    assert pipeline1 == pipeline2
github EpistasisLab / tpot / tests / export_tests.py View on Github external
def test_set_param_recursive_2():
    """Assert that set_param_recursive sets \"random_state\" to 42 in nested estimator in SelectFromModel."""
    pipeline_string = (
        'DecisionTreeRegressor(SelectFromModel(input_matrix, '
        'SelectFromModel__ExtraTreesRegressor__max_features=0.05, SelectFromModel__ExtraTreesRegressor__n_estimators=100, '
        'SelectFromModel__threshold=0.05), DecisionTreeRegressor__max_depth=8,'
        'DecisionTreeRegressor__min_samples_leaf=5, DecisionTreeRegressor__min_samples_split=5)'
    )
    tpot_obj = TPOTRegressor()
    tpot_obj._fit_init()
    deap_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
    sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
    set_param_recursive(sklearn_pipeline.steps, 'random_state', 42)

    assert getattr(getattr(sklearn_pipeline.steps[0][1], 'estimator'), 'random_state') == 42
    assert getattr(sklearn_pipeline.steps[1][1], 'random_state') == 42
github EpistasisLab / tpot / tests / tpot_tests.py View on Github external
tpot_obj._fit_init()
    assert tpot_obj._config_dict == tpot_mdr_classifier_config_dict

    tpot_obj = TPOTClassifier(config_dict='TPOT sparse')
    tpot_obj._fit_init()
    assert tpot_obj._config_dict == classifier_config_sparse

    tpot_obj = TPOTRegressor(config_dict='TPOT light')
    tpot_obj._fit_init()
    assert tpot_obj._config_dict == regressor_config_dict_light

    tpot_obj = TPOTRegressor(config_dict='TPOT MDR')
    tpot_obj._fit_init()
    assert tpot_obj._config_dict == tpot_mdr_regressor_config_dict

    tpot_obj = TPOTRegressor(config_dict='TPOT sparse')
    tpot_obj._fit_init()
    assert tpot_obj._config_dict == regressor_config_sparse
github EpistasisLab / tpot / tests / tpot_tests.py View on Github external
def test_timeout():
    """Assert that _wrapped_cross_val_score return Timeout in a time limit."""
    tpot_obj = TPOTRegressor(scoring='neg_mean_squared_error')
    tpot_obj._fit_init()
    # a complex pipeline for the test
    pipeline_string = (
        "ExtraTreesRegressor("
        "GradientBoostingRegressor(input_matrix, GradientBoostingRegressor__alpha=0.8,"
        "GradientBoostingRegressor__learning_rate=0.1,GradientBoostingRegressor__loss=huber,"
        "GradientBoostingRegressor__max_depth=5, GradientBoostingRegressor__max_features=0.5,"
        "GradientBoostingRegressor__min_samples_leaf=5, GradientBoostingRegressor__min_samples_split=5,"
        "GradientBoostingRegressor__n_estimators=100, GradientBoostingRegressor__subsample=0.25),"
        "ExtraTreesRegressor__bootstrap=True, ExtraTreesRegressor__max_features=0.5,"
        "ExtraTreesRegressor__min_samples_leaf=5, ExtraTreesRegressor__min_samples_split=5, "
        "ExtraTreesRegressor__n_estimators=100)"
    )
    tpot_obj._optimized_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
    tpot_obj.fitted_pipeline_ = tpot_obj._toolbox.compile(expr=tpot_obj._optimized_pipeline)
    # test _wrapped_cross_val_score with cv=20 so that it is impossible to finish in 1 second
github EpistasisLab / tpot / tests / export_tests.py View on Github external
classifier_config_dict[test_operator_key_1]
)

TPOTSelectFromModel, TPOTSelectFromModel_args = TPOTOperatorClassFactory(
    test_operator_key_2,
    classifier_config_dict[test_operator_key_2]
)

digits_data = load_digits()
training_features, testing_features, training_target, testing_target = \
    train_test_split(digits_data.data.astype(np.float64), digits_data.target.astype(np.float64), random_state=42)

tpot_obj = TPOTClassifier()
tpot_obj._fit_init()

tpot_obj_reg = TPOTRegressor()
tpot_obj_reg._fit_init()

def test_export_random_ind():
    """Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39."""
    tpot_obj = TPOTClassifier(random_state=39, config_dict="TPOT light")
    tpot_obj._fit_init()
    tpot_obj._pbar = tqdm(total=1, disable=True)
    pipeline = tpot_obj._toolbox.individual()
    expected_code = """import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB
from tpot.export_utils import set_param_recursive

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
github EpistasisLab / tpot / tests / tpot_tests.py View on Github external
def test_sample_weight_func():
    """Assert that the TPOTRegressor score function outputs a known score for a fixed pipeline with sample weights."""
    tpot_obj = TPOTRegressor(scoring='neg_mean_squared_error')
    tpot_obj._fit_init()
    # Reify pipeline with known scor
    pipeline_string = (
        "ExtraTreesRegressor("
        "GradientBoostingRegressor(input_matrix, GradientBoostingRegressor__alpha=0.8,"
        "GradientBoostingRegressor__learning_rate=0.1,GradientBoostingRegressor__loss=huber,"
        "GradientBoostingRegressor__max_depth=5, GradientBoostingRegressor__max_features=0.5,"
        "GradientBoostingRegressor__min_samples_leaf=5, GradientBoostingRegressor__min_samples_split=5,"
        "GradientBoostingRegressor__n_estimators=100, GradientBoostingRegressor__subsample=0.25),"
        "ExtraTreesRegressor__bootstrap=True, ExtraTreesRegressor__max_features=0.5,"
        "ExtraTreesRegressor__min_samples_leaf=5, ExtraTreesRegressor__min_samples_split=5, "
        "ExtraTreesRegressor__n_estimators=100)"
    )
    tpot_obj._optimized_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
    tpot_obj.fitted_pipeline_ = tpot_obj._toolbox.compile(expr=tpot_obj._optimized_pipeline)
    tpot_obj.fitted_pipeline_.fit(training_features_r, training_target_r)
github TwoRavens / TwoRavens / automl / solver_tpot.py View on Github external
def solve_tpot(specification, fit_parameters):
    import tpot

    output_folder = os.path.join(*specification['results_path'].split('/'), 'output')

    # if os.path.exists(output_folder):
    #     shutil.rmtree(output_folder)

    data = pandas.read_csv(specification['dataset_path'])
    X = data[specification['problem']['predictors']]
    y = data[specification['problem']['target']]

    automl = {
        'regression': tpot.TPOTRegressor,
        'classification': tpot.TPOTClassifier
    }[specification['problem']['task']](**fit_parameters)

    automl.fit(X, y)

    responses = []
    # selected models along the cost-complexity vs accuracy frontier
    for model_str in automl.pareto_front_fitted_pipelines_:
        model = automl.pareto_front_fitted_pipelines_[model_str]

        actual_values = X[specification['problem']['target']]
        fitted_values = model.predict(X)

        data_pointer = os.path.join(output_folder, str(uuid.uuid4()) + '.csv')

        pandas.DataFrame(
github hackingmaterials / automatminer / automatminer / automl / adaptors.py View on Github external
"""
        # Prevent goofy pandas casting by casting to native
        y = df[target].values.tolist()
        X = df.drop(columns=target).values.tolist()

        # Determine learning type based on whether classification or regression
        self.mode = regression_or_classification(df[target])
        if self.mode == "classification":
            self.tpot_kwargs['config_dict'] = self.tpot_kwargs.get(
                'config_dict', classifier_config_dict_mb)
            self._backend = TPOTClassifier(**self.tpot_kwargs)
        elif self.mode == "regression":
            self.tpot_kwargs['config_dict'] = self.tpot_kwargs.get(
                'config_dict', regressor_config_dict_mb)
            self._backend = TPOTRegressor(**self.tpot_kwargs)
        else:
            raise ValueError("Learning type {} not recognized as a valid mode "
                             "for {}".format(self.mode, self.__class__.__name__))
        self._features = df.drop(columns=target).columns.tolist()
        self._ml_data = {"X": X, "y": y}
        self.fitted_target = target
        self._logger.info("TPOT fitting started.")
        self._backend = self._backend.fit(X, y, **fit_kwargs)
        self._logger.info("TPOT fitting finished.")
        return self
github ritabratamaiti / RapidML / RapidML / __init__.py View on Github external
def rapid_regressor_arr(X,
                        Y,
                        model=TPOTRegressor(
                            generations=5, population_size=50, verbosity=2),
                        name="RapidML_Files"):

    print(
        '\nUsing RapidML Regressor with arrays, Inputs will not be label encoded; Experimental, For Issues Visit: https://github.com/ritabratamaiti/RapidML/issues or Contact Author: ritabratamaiti@hiretrex.com'
    )

    if (type(model) != TPOTRegressor):
        raise ValueError('\nError!! Model must be a TPOTRegressor')

    newpath = name
    if not os.path.exists(newpath):
        os.makedirs(newpath)

    str1 = '''
from flask import Flask, request
github ow2-proactive / proactive-examples / MachineLearningScripts / resources / catalog / Train_Model.py View on Github external
else:
            from sklearn.ensemble import RandomForestClassifier
            model = RandomForestClassifier(**alg.input_variables.__dict__)
    elif alg.name == 'XGBoost' and alg.type == 'classification':
        from xgboost.sklearn import XGBClassifier
        model = XGBClassifier(**alg.input_variables.__dict__)
    elif alg.name == 'CatBoost' and alg.type == 'classification':
        from catboost import CatBoostClassifier
        model = CatBoostClassifier(**alg.input_variables.__dict__)

    # -------------------------------------------------------------
    # Regression algorithms
    #
    elif alg.name == 'TPOT_Regressor':
        from tpot import TPOTRegressor
        model = TPOTRegressor(
            generations=alg.generations,
            cv=alg.cv,
            scoring=alg.scoring,
            verbosity=alg.verbosity
        )
    elif alg.name == 'AutoSklearn_Regressor':
        from autosklearn import regression
        if alg.sampling:
            model = regression.AutoSklearnRegressor(
                time_left_for_this_task=alg.task_time,
                per_run_time_limit=alg.run_time,
                resampling_strategy=alg.sampling_strategy,
                resampling_strategy_arguments={'folds': alg.folds}
            )
        else:
            model = regression.AutoSklearnRegressor(