Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_random_ind_2():
"""Assert that the TPOTRegressor can generate the same pipeline with same random seed."""
tpot_obj = TPOTRegressor(random_state=43)
tpot_obj._fit_init()
pipeline1 = str(tpot_obj._toolbox.individual())
tpot_obj = TPOTRegressor(random_state=43)
tpot_obj._fit_init()
pipeline2 = str(tpot_obj._toolbox.individual())
assert pipeline1 == pipeline2
def test_set_param_recursive_2():
"""Assert that set_param_recursive sets \"random_state\" to 42 in nested estimator in SelectFromModel."""
pipeline_string = (
'DecisionTreeRegressor(SelectFromModel(input_matrix, '
'SelectFromModel__ExtraTreesRegressor__max_features=0.05, SelectFromModel__ExtraTreesRegressor__n_estimators=100, '
'SelectFromModel__threshold=0.05), DecisionTreeRegressor__max_depth=8,'
'DecisionTreeRegressor__min_samples_leaf=5, DecisionTreeRegressor__min_samples_split=5)'
)
tpot_obj = TPOTRegressor()
tpot_obj._fit_init()
deap_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
set_param_recursive(sklearn_pipeline.steps, 'random_state', 42)
assert getattr(getattr(sklearn_pipeline.steps[0][1], 'estimator'), 'random_state') == 42
assert getattr(sklearn_pipeline.steps[1][1], 'random_state') == 42
tpot_obj._fit_init()
assert tpot_obj._config_dict == tpot_mdr_classifier_config_dict
tpot_obj = TPOTClassifier(config_dict='TPOT sparse')
tpot_obj._fit_init()
assert tpot_obj._config_dict == classifier_config_sparse
tpot_obj = TPOTRegressor(config_dict='TPOT light')
tpot_obj._fit_init()
assert tpot_obj._config_dict == regressor_config_dict_light
tpot_obj = TPOTRegressor(config_dict='TPOT MDR')
tpot_obj._fit_init()
assert tpot_obj._config_dict == tpot_mdr_regressor_config_dict
tpot_obj = TPOTRegressor(config_dict='TPOT sparse')
tpot_obj._fit_init()
assert tpot_obj._config_dict == regressor_config_sparse
def test_timeout():
"""Assert that _wrapped_cross_val_score return Timeout in a time limit."""
tpot_obj = TPOTRegressor(scoring='neg_mean_squared_error')
tpot_obj._fit_init()
# a complex pipeline for the test
pipeline_string = (
"ExtraTreesRegressor("
"GradientBoostingRegressor(input_matrix, GradientBoostingRegressor__alpha=0.8,"
"GradientBoostingRegressor__learning_rate=0.1,GradientBoostingRegressor__loss=huber,"
"GradientBoostingRegressor__max_depth=5, GradientBoostingRegressor__max_features=0.5,"
"GradientBoostingRegressor__min_samples_leaf=5, GradientBoostingRegressor__min_samples_split=5,"
"GradientBoostingRegressor__n_estimators=100, GradientBoostingRegressor__subsample=0.25),"
"ExtraTreesRegressor__bootstrap=True, ExtraTreesRegressor__max_features=0.5,"
"ExtraTreesRegressor__min_samples_leaf=5, ExtraTreesRegressor__min_samples_split=5, "
"ExtraTreesRegressor__n_estimators=100)"
)
tpot_obj._optimized_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
tpot_obj.fitted_pipeline_ = tpot_obj._toolbox.compile(expr=tpot_obj._optimized_pipeline)
# test _wrapped_cross_val_score with cv=20 so that it is impossible to finish in 1 second
classifier_config_dict[test_operator_key_1]
)
TPOTSelectFromModel, TPOTSelectFromModel_args = TPOTOperatorClassFactory(
test_operator_key_2,
classifier_config_dict[test_operator_key_2]
)
digits_data = load_digits()
training_features, testing_features, training_target, testing_target = \
train_test_split(digits_data.data.astype(np.float64), digits_data.target.astype(np.float64), random_state=42)
tpot_obj = TPOTClassifier()
tpot_obj._fit_init()
tpot_obj_reg = TPOTRegressor()
tpot_obj_reg._fit_init()
def test_export_random_ind():
"""Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39."""
tpot_obj = TPOTClassifier(random_state=39, config_dict="TPOT light")
tpot_obj._fit_init()
tpot_obj._pbar = tqdm(total=1, disable=True)
pipeline = tpot_obj._toolbox.individual()
expected_code = """import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB
from tpot.export_utils import set_param_recursive
# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
def test_sample_weight_func():
"""Assert that the TPOTRegressor score function outputs a known score for a fixed pipeline with sample weights."""
tpot_obj = TPOTRegressor(scoring='neg_mean_squared_error')
tpot_obj._fit_init()
# Reify pipeline with known scor
pipeline_string = (
"ExtraTreesRegressor("
"GradientBoostingRegressor(input_matrix, GradientBoostingRegressor__alpha=0.8,"
"GradientBoostingRegressor__learning_rate=0.1,GradientBoostingRegressor__loss=huber,"
"GradientBoostingRegressor__max_depth=5, GradientBoostingRegressor__max_features=0.5,"
"GradientBoostingRegressor__min_samples_leaf=5, GradientBoostingRegressor__min_samples_split=5,"
"GradientBoostingRegressor__n_estimators=100, GradientBoostingRegressor__subsample=0.25),"
"ExtraTreesRegressor__bootstrap=True, ExtraTreesRegressor__max_features=0.5,"
"ExtraTreesRegressor__min_samples_leaf=5, ExtraTreesRegressor__min_samples_split=5, "
"ExtraTreesRegressor__n_estimators=100)"
)
tpot_obj._optimized_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
tpot_obj.fitted_pipeline_ = tpot_obj._toolbox.compile(expr=tpot_obj._optimized_pipeline)
tpot_obj.fitted_pipeline_.fit(training_features_r, training_target_r)
def solve_tpot(specification, fit_parameters):
import tpot
output_folder = os.path.join(*specification['results_path'].split('/'), 'output')
# if os.path.exists(output_folder):
# shutil.rmtree(output_folder)
data = pandas.read_csv(specification['dataset_path'])
X = data[specification['problem']['predictors']]
y = data[specification['problem']['target']]
automl = {
'regression': tpot.TPOTRegressor,
'classification': tpot.TPOTClassifier
}[specification['problem']['task']](**fit_parameters)
automl.fit(X, y)
responses = []
# selected models along the cost-complexity vs accuracy frontier
for model_str in automl.pareto_front_fitted_pipelines_:
model = automl.pareto_front_fitted_pipelines_[model_str]
actual_values = X[specification['problem']['target']]
fitted_values = model.predict(X)
data_pointer = os.path.join(output_folder, str(uuid.uuid4()) + '.csv')
pandas.DataFrame(
"""
# Prevent goofy pandas casting by casting to native
y = df[target].values.tolist()
X = df.drop(columns=target).values.tolist()
# Determine learning type based on whether classification or regression
self.mode = regression_or_classification(df[target])
if self.mode == "classification":
self.tpot_kwargs['config_dict'] = self.tpot_kwargs.get(
'config_dict', classifier_config_dict_mb)
self._backend = TPOTClassifier(**self.tpot_kwargs)
elif self.mode == "regression":
self.tpot_kwargs['config_dict'] = self.tpot_kwargs.get(
'config_dict', regressor_config_dict_mb)
self._backend = TPOTRegressor(**self.tpot_kwargs)
else:
raise ValueError("Learning type {} not recognized as a valid mode "
"for {}".format(self.mode, self.__class__.__name__))
self._features = df.drop(columns=target).columns.tolist()
self._ml_data = {"X": X, "y": y}
self.fitted_target = target
self._logger.info("TPOT fitting started.")
self._backend = self._backend.fit(X, y, **fit_kwargs)
self._logger.info("TPOT fitting finished.")
return self
def rapid_regressor_arr(X,
Y,
model=TPOTRegressor(
generations=5, population_size=50, verbosity=2),
name="RapidML_Files"):
print(
'\nUsing RapidML Regressor with arrays, Inputs will not be label encoded; Experimental, For Issues Visit: https://github.com/ritabratamaiti/RapidML/issues or Contact Author: ritabratamaiti@hiretrex.com'
)
if (type(model) != TPOTRegressor):
raise ValueError('\nError!! Model must be a TPOTRegressor')
newpath = name
if not os.path.exists(newpath):
os.makedirs(newpath)
str1 = '''
from flask import Flask, request
else:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(**alg.input_variables.__dict__)
elif alg.name == 'XGBoost' and alg.type == 'classification':
from xgboost.sklearn import XGBClassifier
model = XGBClassifier(**alg.input_variables.__dict__)
elif alg.name == 'CatBoost' and alg.type == 'classification':
from catboost import CatBoostClassifier
model = CatBoostClassifier(**alg.input_variables.__dict__)
# -------------------------------------------------------------
# Regression algorithms
#
elif alg.name == 'TPOT_Regressor':
from tpot import TPOTRegressor
model = TPOTRegressor(
generations=alg.generations,
cv=alg.cv,
scoring=alg.scoring,
verbosity=alg.verbosity
)
elif alg.name == 'AutoSklearn_Regressor':
from autosklearn import regression
if alg.sampling:
model = regression.AutoSklearnRegressor(
time_left_for_this_task=alg.task_time,
per_run_time_limit=alg.run_time,
resampling_strategy=alg.sampling_strategy,
resampling_strategy_arguments={'folds': alg.folds}
)
else:
model = regression.AutoSklearnRegressor(