Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# clone individual before each func call so it is not altered for
# the possible next cycle loop
args = [self._toolbox.clone(arg) if isinstance(arg, creator.Individual) else arg for arg in args]
try:
with warnings.catch_warnings():
warnings.simplefilter('ignore')
expr = func(self, *args, **kwargs)
# mutation operator returns tuple (ind,); crossover operator
# returns tuple of (ind1, ind2)
expr_tuple = expr if isinstance(expr, tuple) else (expr,)
for expr_test in expr_tuple:
pipeline_code = generate_pipeline_code(
expr_to_tree(expr_test, self._pset),
self.operators
)
sklearn_pipeline = eval(pipeline_code, self.operators_context)
if self.classification:
sklearn_pipeline.fit(pretest_X, pretest_y)
else:
sklearn_pipeline.fit(pretest_X_reg, pretest_y_reg)
bad_pipeline = False
except BaseException as e:
message = '_pre_test decorator: {fname}: num_test={n} {e}'.format(
n=num_test,
fname=func.__name__,
e=e
)
# Use the pbar output stream if it's active
def _save_periodic_pipeline(self, gen):
try:
self._create_periodic_checkpoint_folder()
for pipeline, pipeline_scores in zip(self._pareto_front.items, reversed(self._pareto_front.keys)):
idx = self._pareto_front.items.index(pipeline)
pareto_front_pipeline_score = pipeline_scores.wvalues[1]
sklearn_pipeline_str = generate_pipeline_code(expr_to_tree(pipeline, self._pset), self.operators)
to_write = export_pipeline(pipeline,
self.operators, self._pset,
self._imputed, pareto_front_pipeline_score,
self.random_state)
# dont export a pipeline you had
if self._exported_pipeline_text.count(sklearn_pipeline_str):
self._update_pbar(pbar_num=0, pbar_msg='Periodic pipeline was not saved, probably saved before...')
else:
filename = os.path.join(self.periodic_checkpoint_folder,
'pipeline_gen_{}_idx_{}_{}.py'.format(gen,
idx ,
datetime.now().strftime('%Y.%m.%d_%H-%M-%S')
)
)
self._update_pbar(pbar_num=0, pbar_msg='Saving periodic pipeline from pareto front to {}'.format(filename))
with open(filename, 'w') as output_file:
stats_dicts = {}
# 2 lists of DEAP individuals' string, their sklearn pipelines for parallel computing
eval_individuals_str = []
sklearn_pipeline_list = []
for individual in unique_individuals:
# Disallow certain combinations of operators because they will take too long or take up too much RAM
# This is a fairly hacky way to prevent TPOT from getting stuck on bad pipelines and should be improved in a future release
individual_str = str(individual)
if not len(individual): # a pipeline cannot be randomly generated
self.evaluated_individuals_[individual_str] = self._combine_individual_stats(5000.,
-float('inf'),
individual.statistics)
self._update_pbar(pbar_msg='Invalid pipeline encountered. Skipping its evaluation.')
continue
sklearn_pipeline_str = generate_pipeline_code(expr_to_tree(individual, self._pset), self.operators)
if sklearn_pipeline_str.count('PolynomialFeatures') > 1:
self.evaluated_individuals_[individual_str] = self._combine_individual_stats(5000.,
-float('inf'),
individual.statistics)
self._update_pbar(pbar_msg='Invalid pipeline encountered. Skipping its evaluation.')
# Check if the individual was evaluated before
elif individual_str in self.evaluated_individuals_:
self._update_pbar(pbar_msg=('Pipeline encountered that has previously been evaluated during the '
'optimization process. Using the score from the previous evaluation.'))
else:
try:
# Transform the tree expression into an sklearn pipeline
sklearn_pipeline = self._toolbox.compile(expr=individual)
# Count the number of pipeline operators as a measure of pipeline complexity
operator_count = self._operator_count(individual)
def _compile_to_sklearn(self, expr):
"""Compile a DEAP pipeline into a sklearn pipeline.
Parameters
----------
expr: DEAP individual
The DEAP pipeline to be compiled
Returns
-------
sklearn_pipeline: sklearn.pipeline.Pipeline
"""
sklearn_pipeline_str = generate_pipeline_code(expr_to_tree(expr, self._pset), self.operators)
sklearn_pipeline = eval(sklearn_pipeline_str, self.operators_context)
sklearn_pipeline.memory = self._memory
if self.random_state:
# Fix random state when the operator allows
set_param_recursive(sklearn_pipeline.steps, 'random_state', self.random_state)
return sklearn_pipeline