Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_fit_6():
"""Assert that the TPOT fit function provides an optimized pipeline with pandas DataFrame"""
tpot_obj = TPOTClassifier(
random_state=42,
population_size=1,
offspring_size=2,
generations=1,
verbosity=0
)
tpot_obj.fit(pd_features, pd_target)
assert isinstance(pd_features, pd.DataFrame)
assert isinstance(tpot_obj._optimized_pipeline, creator.Individual)
assert not (tpot_obj._start_datetime is None)
def test_predict_proba():
"""Assert that the TPOT predict_proba function returns a numpy matrix of shape (num_testing_rows, num_testing_target)."""
tpot_obj = TPOTClassifier()
tpot_obj._fit_init()
pipeline_string = (
'DecisionTreeClassifier('
'input_matrix, '
'DecisionTreeClassifier__criterion=gini, '
'DecisionTreeClassifier__max_depth=8, '
'DecisionTreeClassifier__min_samples_leaf=5, '
'DecisionTreeClassifier__min_samples_split=5)'
)
tpot_obj._optimized_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
tpot_obj.fitted_pipeline_ = tpot_obj._toolbox.compile(expr=tpot_obj._optimized_pipeline)
tpot_obj.fitted_pipeline_.fit(training_features, training_target)
result = tpot_obj.predict_proba(testing_features)
num_labels = np.amax(testing_target) + 1
def test_warm_start():
"""Assert that the TPOT warm_start flag stores the pop and pareto_front from the first run."""
tpot_obj = TPOTClassifier(
random_state=42,
population_size=1,
offspring_size=2,
generations=1,
verbosity=0,
config_dict='TPOT light',
warm_start=True)
tpot_obj.fit(pretest_X, pretest_y)
assert tpot_obj._pop is not None
assert tpot_obj._pareto_front is not None
first_pop = tpot_obj._pop
tpot_obj.random_state = 21
tpot_obj.fit(pretest_X, pretest_y)
def test_check_dataset_4():
"""Assert that the check_dataset function raise ValueError when sample_weight has a length different length"""
tpot_obj = TPOTClassifier(
random_state=42,
population_size=1,
offspring_size=2,
generations=1,
verbosity=0,
config_dict='TPOT light'
)
tpot_obj._fit_init()
test_sample_weight = list(range(1, len(training_target)))
assert_raises(ValueError, tpot_obj._check_dataset, training_features, training_target, test_sample_weight)
def test_sparse_matrix_5():
"""Assert that the TPOT fit function will not raise a ValueError in a sparse matrix with a customized config dictionary."""
tpot_obj = TPOTClassifier(
random_state=42,
population_size=1,
offspring_size=2,
generations=1,
verbosity=0,
config_dict='tests/test_config_sparse.py'
)
tpot_obj.fit(sparse_features, sparse_target)
def test_varOr_3():
"""Assert that varOr() applys reproduction only and does NOT remove CV scores in offsprings."""
tpot_obj = TPOTClassifier(
random_state=42,
verbosity=0,
config_dict='TPOT light'
)
tpot_obj._fit_init()
tpot_obj._pbar = tqdm(total=1, disable=True)
pop = tpot_obj._toolbox.population(n=5)
for ind in pop:
ind.fitness.values = (2, 1.0)
offspring = varOr(pop, tpot_obj._toolbox, 5, cxpb=0.0, mutpb=0.0)
invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
assert len(offspring) == 5
assert len(invalid_ind) == 0
from tpot import TPOTClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
import time
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,
train_size=0.25, test_size=0.75)
tpot = TPOTClassifier(generations=3, population_size=5, offspring_size=10, verbosity=3, n_jobs = 2, random_state = 44)#, max_time_mins=1)
time_start = time.time()
tpot.fit(X_train, y_train)
print(tpot.score(X_test, y_test))
print('\nTime used with num_cpu = 2:',time.time()-time_start)
def test_random_ind_2():
"""Assert that the TPOTRegressor can generate the same pipeline with same random seed."""
tpot_obj = TPOTRegressor(random_state=43)
tpot_obj._fit_init()
pipeline1 = str(tpot_obj._toolbox.individual())
tpot_obj = TPOTRegressor(random_state=43)
tpot_obj._fit_init()
pipeline2 = str(tpot_obj._toolbox.individual())
assert pipeline1 == pipeline2
def test_set_param_recursive_2():
"""Assert that set_param_recursive sets \"random_state\" to 42 in nested estimator in SelectFromModel."""
pipeline_string = (
'DecisionTreeRegressor(SelectFromModel(input_matrix, '
'SelectFromModel__ExtraTreesRegressor__max_features=0.05, SelectFromModel__ExtraTreesRegressor__n_estimators=100, '
'SelectFromModel__threshold=0.05), DecisionTreeRegressor__max_depth=8,'
'DecisionTreeRegressor__min_samples_leaf=5, DecisionTreeRegressor__min_samples_split=5)'
)
tpot_obj = TPOTRegressor()
tpot_obj._fit_init()
deap_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
set_param_recursive(sklearn_pipeline.steps, 'random_state', 42)
assert getattr(getattr(sklearn_pipeline.steps[0][1], 'estimator'), 'random_state') == 42
assert getattr(sklearn_pipeline.steps[1][1], 'random_state') == 42
tpot_obj._fit_init()
assert tpot_obj._config_dict == tpot_mdr_classifier_config_dict
tpot_obj = TPOTClassifier(config_dict='TPOT sparse')
tpot_obj._fit_init()
assert tpot_obj._config_dict == classifier_config_sparse
tpot_obj = TPOTRegressor(config_dict='TPOT light')
tpot_obj._fit_init()
assert tpot_obj._config_dict == regressor_config_dict_light
tpot_obj = TPOTRegressor(config_dict='TPOT MDR')
tpot_obj._fit_init()
assert tpot_obj._config_dict == tpot_mdr_regressor_config_dict
tpot_obj = TPOTRegressor(config_dict='TPOT sparse')
tpot_obj._fit_init()
assert tpot_obj._config_dict == regressor_config_sparse