Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_resumable_pipeline_fit_should_save_all_fitted_pipeline_steps(tmpdir: LocalPath):
p = ResumablePipeline([
(SOME_STEP_1, MultiplyByN(multiply_by=2)),
(PIPELINE_2, ResumablePipeline([
(SOME_STEP_2, MultiplyByN(multiply_by=4)),
(CHECKPOINT, DefaultCheckpoint()),
(SOME_STEP_3, MultiplyByN(multiply_by=6))
]))
], cache_folder=tmpdir)
p.name = ROOT
p = p.fit(
np.array(range(10)),
np.array(range(10))
)
not_saved_paths = [create_some_step3_path(tmpdir)]
saved_paths = [create_root_path(tmpdir), create_pipeline2_path(tmpdir), create_some_step1_path(tmpdir),
create_some_step2_path(tmpdir), create_some_checkpoint_path(tmpdir)]
def test_resumable_pipeline_fit_transform_should_save_all_fitted_pipeline_steps(tmpdir: LocalPath):
p = ResumablePipeline([
(SOME_STEP_1, MultiplyByN(multiply_by=2)),
(PIPELINE_2, ResumablePipeline([
(SOME_STEP_2, MultiplyByN(multiply_by=4)),
(CHECKPOINT, DefaultCheckpoint()),
(SOME_STEP_3, MultiplyByN(multiply_by=6))
]))
], cache_folder=tmpdir)
p.name = ROOT
p, outputs = p.fit_transform(
np.array(range(10)),
np.array(range(10))
)
not_saved_paths = [create_some_step3_path(tmpdir)]
saved_paths = [create_root_path(tmpdir), create_pipeline2_path(tmpdir), create_some_step1_path(tmpdir),
create_some_step2_path(tmpdir), create_some_checkpoint_path(tmpdir)]
def test_apply_method_on_pipeline_with_meta_step_should_call_method_on_each_steps():
pipeline = Pipeline([OutputTransformerWrapper(MultiplyByN(1)), MultiplyByN(1)])
pipeline.apply_method(
lambda step: step.set_hyperparams(HyperparameterSamples({'multiply_by': 2}))
)
assert pipeline.get_hyperparams()['multiply_by'] == 2
assert pipeline['OutputTransformerWrapper'].wrapped.get_hyperparams()['multiply_by'] == 2
assert pipeline['MultiplyByN'].get_hyperparams()['multiply_by'] == 2
(SOME_STEP_3, []),
]
dump(pipeline_2, create_pipeline2_path(tmpdir, True))
given_saved_some_step(multiply_by=2, name=SOME_STEP_1, path=create_some_step1_path(tmpdir, True))
given_saved_some_step(multiply_by=4, name=SOME_STEP_2, path=create_some_step2_path(tmpdir, True))
given_saved_some_step(multiply_by=6, name=SOME_STEP_3, path=create_some_step3_path(tmpdir, True))
checkpoint = DefaultCheckpoint()
checkpoint.name = CHECKPOINT
dump(checkpoint, create_some_checkpoint_path(tmpdir, True))
p = ResumablePipeline([
(SOME_STEP_1, MultiplyByN(multiply_by=1)),
(PIPELINE_2, ResumablePipeline([
(SOME_STEP_2, MultiplyByN(multiply_by=1)),
(CHECKPOINT, DefaultCheckpoint()),
(SOME_STEP_3, MultiplyByN(multiply_by=1))
]))
], cache_folder=tmpdir)
p.name = ROOT
return p
def test_apply_method_on_pipeline_with_meta_step_and_positional_argument_should_call_method_on_each_steps():
pipeline = Pipeline([OutputTransformerWrapper(MultiplyByN(1)), MultiplyByN(1)])
pipeline.apply_method(
lambda step, hyperparams: step.set_hyperparams(hyperparams),
hyperparams=HyperparameterSamples({'multiply_by': 2})
)
assert pipeline.get_hyperparams()['multiply_by'] == 2
assert pipeline['OutputTransformerWrapper'].wrapped.get_hyperparams()['multiply_by'] == 2
assert pipeline['MultiplyByN'].get_hyperparams()['multiply_by'] == 2
def test_apply_on_pipeline_with_meta_step_and_positional_argument_should_call_method_on_each_steps():
pipeline = Pipeline([OutputTransformerWrapper(MultiplyByN(1)), MultiplyByN(1)])
pipeline.apply('set_hyperparams', hyperparams=HyperparameterSamples({'multiply_by': 2}))
assert pipeline.get_hyperparams()['multiply_by'] == 2
assert pipeline['OutputTransformerWrapper'].wrapped.get_hyperparams()['multiply_by'] == 2
assert pipeline['MultiplyByN'].get_hyperparams()['multiply_by'] == 2
def main(tmpdir, sleep_time: float = 0, n_iter: int = 10):
DATA_INPUTS = np.array(range(100))
EXPECTED_OUTPUTS = np.array(range(100, 200))
HYPERPARAMETER_SPACE = HyperparameterSpace({
'multiplication_1__multiply_by': RandInt(1, 2),
'multiplication_2__multiply_by': RandInt(1, 2),
'multiplication_3__multiply_by': RandInt(1, 2),
})
print('Classic Pipeline:')
pipeline = Pipeline([
('multiplication_1', MultiplyByN()),
('sleep_1', ForEachDataInput(Sleep(sleep_time))),
('multiplication_2', MultiplyByN()),
('sleep_2', ForEachDataInput(Sleep(sleep_time))),
('multiplication_3', MultiplyByN()),
]).set_hyperparams_space(HYPERPARAMETER_SPACE)
time_a = time.time()
best_model = RandomSearch(
pipeline,
n_iter=n_iter,
higher_score_is_better=True
).fit(DATA_INPUTS, EXPECTED_OUTPUTS)
outputs = best_model.transform(DATA_INPUTS)
time_b = time.time()
actual_score = mean_squared_error(EXPECTED_OUTPUTS, outputs)
print('{0} seconds'.format(time_b - time_a))
print('output: {0}'.format(outputs))
def main():
p = Pipeline([MultiplyByN(multiply_by=2)])
data_inputs = np.array([1, 2])
generated_outputs = p.transform(data_inputs)
regenerated_inputs = reversed(p).transform(generated_outputs)
assert np.array_equal(regenerated_inputs, data_inputs)
assert np.array_equal(generated_outputs, 2 * data_inputs)
def main():
p = Pipeline([
('step1', MultiplyByN()),
('step2', MultiplyByN()),
Pipeline([
Identity(),
Identity(),
SKLearnWrapper(PCA(n_components=4))
])
])
p.set_hyperparams_space(HyperparameterSpace({
'step1__multiply_by': RandInt(42, 50),
'step2__multiply_by': RandInt(-10, 0),
'Pipeline__SKLearnWrapper_PCA__n_components': RandInt(2, 3)
}))
samples = p.get_hyperparams_space().rvs()
p.set_hyperparams(samples)