Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_increase_num_rows(self):
from test.mock_custom_operators import IncreaseRows
increase_rows = IncreaseRows()
trainable = increase_rows >> LogisticRegression()
iris = sklearn.datasets.load_iris()
X, y = iris.data, iris.target
trained = trainable.fit(X, y)
predicted = trained.transform(X, y)
def test_trained_pipeline_freeze_trainable(self):
from lale.lib.sklearn import MinMaxScaler, LogisticRegression
from lale.operators import TrainedPipeline
trainable = MinMaxScaler() >> LogisticRegression()
X = [[0.0], [1.0], [2.0]]
y = [0.0, 0.0, 1.0]
liquid = trainable.fit(X, y)
self.assertIsInstance(liquid, TrainedPipeline)
self.assertFalse(liquid.is_frozen_trainable())
frozen = liquid.freeze_trainable()
self.assertFalse(liquid.is_frozen_trainable())
self.assertTrue(frozen.is_frozen_trainable())
self.assertIsInstance(frozen, TrainedPipeline)
def test_comparison_with_scikit(self):
import warnings
warnings.filterwarnings("ignore")
from lale.lib.sklearn import PCA
import sklearn.datasets
from lale.helpers import cross_val_score
pca = PCA(n_components=3, random_state=42, svd_solver='arpack')
nys = Nystroem(n_components=10, random_state=42)
concat = ConcatFeatures()
lr = LogisticRegression(random_state=42, C=0.1)
trainable = (pca & nys) >> concat >> lr
digits = sklearn.datasets.load_digits()
X, y = sklearn.utils.shuffle(digits.data, digits.target, random_state=42)
cv_results = cross_val_score(trainable, X, y)
cv_results = ['{0:.1%}'.format(score) for score in cv_results]
from sklearn.pipeline import make_pipeline, FeatureUnion
from sklearn.decomposition import PCA as SklearnPCA
from sklearn.kernel_approximation import Nystroem as SklearnNystroem
from sklearn.linear_model import LogisticRegression as SklearnLR
from sklearn.model_selection import cross_val_score
union = FeatureUnion([("pca", SklearnPCA(n_components=3, random_state=42, svd_solver='arpack')),
("nys", SklearnNystroem(n_components=10, random_state=42))])
lr = SklearnLR(random_state=42, C=0.1)
pipeline = make_pipeline(union, lr)
def test_clone_with_scikit1(self):
lr = LogisticRegression()
lr.get_params()
from sklearn.base import clone
lr_clone = clone(lr)
self.assertNotEqual(lr, lr_clone)
self.assertNotEqual(lr._impl, lr_clone._impl)
iris = sklearn.datasets.load_iris()
trained_lr = lr.fit(iris.data, iris.target)
predicted = trained_lr.predict(iris.data)
cloned_trained_lr = clone(trained_lr)
self.assertNotEqual(trained_lr._impl, cloned_trained_lr._impl)
predicted_clone = cloned_trained_lr.predict(iris.data)
for i in range(len(iris.target)):
self.assertEqual(predicted[i], predicted_clone[i])
# Testing clone with pipelines having OperatorChoice
def test_multiple_estimators_predict_predict_proba(self) :
pipeline = (
StandardScaler() >>
( LogisticRegression() & PCA() ) >> ConcatFeatures() >>
( NoOp() & LinearSVC() ) >> ConcatFeatures() >>
KNeighborsClassifier()
)
pipeline.fit(self.X_train, self.y_train)
tmp = pipeline.predict_proba(self.X_test)
tmp = pipeline.predict(self.X_test)
def test_export_to_sklearn_pipeline3(self):
from lale.lib.lale import ConcatFeatures
from lale.lib.sklearn import PCA
from lale.lib.sklearn import KNeighborsClassifier, LogisticRegression, SVC
from sklearn.feature_selection import SelectKBest
from lale.lib.sklearn import Nystroem
from sklearn.pipeline import FeatureUnion
lale_pipeline = ((PCA() >> SelectKBest(k=2)) & (Nystroem(random_state = 42) >> SelectKBest(k=3))
& (SelectKBest(k=3))) >> ConcatFeatures() >> SelectKBest(k=2) >> LogisticRegression()
trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
self.assertIsInstance(sklearn_pipeline.named_steps['featureunion'], FeatureUnion)
self.assertIsInstance(sklearn_pipeline.named_steps['selectkbest'], SelectKBest)
from sklearn.linear_model import LogisticRegression
self.assertIsInstance(sklearn_pipeline.named_steps['logisticregression'], LogisticRegression)
self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)
def test_make_choice_with_instance(self):
from lale.operators import make_union, make_choice, make_pipeline
from sklearn.datasets import load_iris
iris = load_iris()
X, y = iris.data, iris.target
tfm = PCA() | Nystroem() | NoOp()
with self.assertRaises(AttributeError):
trained = tfm.fit(X, y)
planned_pipeline1 = (OneHotEncoder | NoOp) >> tfm >> (LogisticRegression | KNeighborsClassifier)
planned_pipeline2 = (OneHotEncoder | NoOp) >> (PCA | Nystroem) >> (LogisticRegression | KNeighborsClassifier)
planned_pipeline3 = make_choice(OneHotEncoder, NoOp) >> make_choice(PCA, Nystroem) >> make_choice(LogisticRegression, KNeighborsClassifier)
def test_make_choice_with_instance(self):
from lale.operators import make_union, make_choice, make_pipeline
from sklearn.datasets import load_iris
iris = load_iris()
X, y = iris.data, iris.target
tfm = PCA() | Nystroem() | NoOp()
with self.assertRaises(AttributeError):
trained = tfm.fit(X, y)
planned_pipeline1 = (OneHotEncoder | NoOp) >> tfm >> (LogisticRegression | KNeighborsClassifier)
planned_pipeline2 = (OneHotEncoder | NoOp) >> (PCA | Nystroem) >> (LogisticRegression | KNeighborsClassifier)
planned_pipeline3 = make_choice(OneHotEncoder, NoOp) >> make_choice(PCA, Nystroem) >> make_choice(LogisticRegression, KNeighborsClassifier)
def test_feature_preprocessor(self):
X_train, y_train = self.X_train, self.y_train
X_test, y_test = self.X_test, self.y_test
import importlib
module_name = ".".join(fproc_name.split('.')[0:-1])
class_name = fproc_name.split('.')[-1]
module = importlib.import_module(module_name)
class_ = getattr(module, class_name)
fproc = class_()
from lale.lib.sklearn.one_hot_encoder import OneHotEncoderImpl
if fproc._impl_class() == OneHotEncoderImpl:
#fproc = OneHotEncoder(handle_unknown = 'ignore')
#remove the hack when this is fixed
fproc = PCA()
#test_schemas_are_schemas
lale.type_checking.validate_is_schema(fproc.input_schema_fit())
lale.type_checking.validate_is_schema(fproc.input_schema_transform())
lale.type_checking.validate_is_schema(fproc.output_schema_transform())
lale.type_checking.validate_is_schema(fproc.hyperparam_schema())
#test_init_fit_transform
trained = fproc.fit(self.X_train, self.y_train)
predictions = trained.transform(self.X_test)
#test_predict_on_trainable
trained = fproc.fit(X_train, y_train)
fproc.transform(X_train)
#test_to_json
fproc.to_json()
def dont_test_with_gridsearchcv2_auto(self):
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score, make_scorer
lr = LogisticRegression(random_state = 42)
pca = PCA(random_state = 42, svd_solver = 'arpack')
trainable = pca >> lr
from sklearn.pipeline import Pipeline
scikit_pipeline = Pipeline([(pca.name(), PCA(random_state = 42, svd_solver = 'arpack')), (lr.name(), LogisticRegression(random_state = 42))])
all_parameters = get_grid_search_parameter_grids(trainable, num_samples=1)
# otherwise the test takes too long
parameters = random.sample(all_parameters, 2)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
clf = GridSearchCV(scikit_pipeline, parameters, cv=2, scoring=make_scorer(accuracy_score))
iris = load_iris()
clf.fit(iris.data, iris.target)
predicted = clf.predict(iris.data)
accuracy_with_lale_operators = accuracy_score(iris.target, predicted)
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA as SklearnPCA