How to use the lale.lib.sklearn.PCA function in lale

To help you get started, we’ve selected a few lale examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github IBM / lale / test / test_core_pipeline.py View on Github external
def test_export_to_sklearn_pipeline3(self):
        from lale.lib.lale import ConcatFeatures
        from lale.lib.sklearn import PCA
        from lale.lib.sklearn import KNeighborsClassifier, LogisticRegression, SVC 
        from sklearn.feature_selection import SelectKBest
        from lale.lib.sklearn import Nystroem
        from sklearn.pipeline import FeatureUnion

        lale_pipeline = ((PCA() >> SelectKBest(k=2)) & (Nystroem(random_state = 42) >> SelectKBest(k=3))
         & (SelectKBest(k=3))) >> ConcatFeatures() >> SelectKBest(k=2) >> LogisticRegression()
        trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
        sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
        self.assertIsInstance(sklearn_pipeline.named_steps['featureunion'], FeatureUnion)
        self.assertIsInstance(sklearn_pipeline.named_steps['selectkbest'], SelectKBest)
        from sklearn.linear_model import LogisticRegression
        self.assertIsInstance(sklearn_pipeline.named_steps['logisticregression'], LogisticRegression)
        self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)
github IBM / lale / test / test_core_operators.py View on Github external
def test_make_choice_with_instance(self):
        from lale.operators import make_union, make_choice, make_pipeline
        from sklearn.datasets import load_iris
        iris = load_iris()
        X, y = iris.data, iris.target
        tfm = PCA() | Nystroem() | NoOp()
        with self.assertRaises(AttributeError):
            trained = tfm.fit(X, y)
        planned_pipeline1 = (OneHotEncoder | NoOp) >> tfm >> (LogisticRegression | KNeighborsClassifier)
        planned_pipeline2 = (OneHotEncoder | NoOp) >> (PCA | Nystroem) >> (LogisticRegression | KNeighborsClassifier)
        planned_pipeline3 = make_choice(OneHotEncoder, NoOp) >> make_choice(PCA, Nystroem) >> make_choice(LogisticRegression, KNeighborsClassifier)
github IBM / lale / test / test_core_operators.py View on Github external
def test_feature_preprocessor(self):
        X_train, y_train = self.X_train, self.y_train
        X_test, y_test = self.X_test, self.y_test
        import importlib
        module_name = ".".join(fproc_name.split('.')[0:-1])
        class_name = fproc_name.split('.')[-1]
        module = importlib.import_module(module_name)

        class_ = getattr(module, class_name)
        fproc = class_()

        from lale.lib.sklearn.one_hot_encoder import OneHotEncoderImpl
        if fproc._impl_class() == OneHotEncoderImpl:
            #fproc = OneHotEncoder(handle_unknown = 'ignore')
            #remove the hack when this is fixed
            fproc = PCA()
        #test_schemas_are_schemas
        lale.type_checking.validate_is_schema(fproc.input_schema_fit())
        lale.type_checking.validate_is_schema(fproc.input_schema_transform())
        lale.type_checking.validate_is_schema(fproc.output_schema_transform())
        lale.type_checking.validate_is_schema(fproc.hyperparam_schema())

        #test_init_fit_transform
        trained = fproc.fit(self.X_train, self.y_train)
        predictions = trained.transform(self.X_test)

        #test_predict_on_trainable
        trained = fproc.fit(X_train, y_train)
        fproc.transform(X_train)

        #test_to_json
        fproc.to_json()
github IBM / lale / test / test_pipeline.py View on Github external
def dont_test_with_gridsearchcv2_auto(self):
        from sklearn.model_selection import GridSearchCV
        from sklearn.datasets import load_iris
        from sklearn.metrics import accuracy_score, make_scorer
        lr = LogisticRegression(random_state = 42)
        pca = PCA(random_state = 42, svd_solver = 'arpack')
        trainable = pca >> lr
        from sklearn.pipeline import Pipeline
        scikit_pipeline = Pipeline([(pca.name(), PCA(random_state = 42, svd_solver = 'arpack')), (lr.name(), LogisticRegression(random_state = 42))])
        all_parameters = get_grid_search_parameter_grids(trainable, num_samples=1)
        # otherwise the test takes too long
        parameters = random.sample(all_parameters, 2)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            clf = GridSearchCV(scikit_pipeline, parameters, cv=2, scoring=make_scorer(accuracy_score))
            iris = load_iris()
            clf.fit(iris.data, iris.target)
            predicted = clf.predict(iris.data)
            accuracy_with_lale_operators = accuracy_score(iris.target, predicted)

        from sklearn.pipeline import Pipeline
        from sklearn.decomposition import PCA as SklearnPCA
github IBM / lale / test / test_optimizers.py View on Github external
def test_fit_smaller_trials(self):
        from sklearn.datasets import load_iris
        from lale.lib.lale import TopKVotingClassifier
        from lale.lib.sklearn import Nystroem
        from sklearn.metrics import accuracy_score

        ensemble = TopKVotingClassifier(estimator=(PCA() | Nystroem()) >> (LogisticRegression()|KNeighborsClassifier()), args_to_optimizer={'max_evals':3}, k=20)
        trained = ensemble.fit(self.X_train, self.y_train)
        final_ensemble = trained._impl._best_estimator
        self.assertLessEqual(len(final_ensemble._impl._wrapped_model.estimators), 3)
github IBM / lale / test / test_optimizers.py View on Github external
def test_with_gridsearchcv_auto_wrapped_pipe1(self):
        from sklearn.datasets import load_iris
        from sklearn.metrics import accuracy_score, make_scorer
  
        lr = LogisticRegression()
        pca = PCA()
        trainable = pca >> lr

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            from lale.lib.lale import GridSearchCV
            clf = GridSearchCV(
                estimator=trainable, lale_num_samples=1, lale_num_grids=1,
                cv=2, scoring=make_scorer(accuracy_score))
            iris = load_iris()
            clf.fit(iris.data, iris.target)
github IBM / lale / test / test_interoperability.py View on Github external
with self.assertRaises(ValueError):
            res = class_()

        #test_schemas_are_schemas
        lale.type_checking.validate_is_schema(class_.input_schema_fit())
        lale.type_checking.validate_is_schema(class_.input_schema_predict())
        lale.type_checking.validate_is_schema(class_.output_schema_predict())
        lale.type_checking.validate_is_schema(class_.hyperparam_schema())

        #test_init_fit_predict
        from lale.operators import make_pipeline
        pipeline1 = PCA() >> class_(operator=make_pipeline(LogisticRegression()))
        trained = pipeline1.fit(X_train, y_train)
        predictions = trained.predict(X_test)

        pipeline2 = class_(operator=make_pipeline(PCA(), LogisticRegression()))
        trained = pipeline2.fit(X_train, y_train)
        predictions = trained.predict(X_test)

        #test_with_hyperopt
        from lale.lib.lale import Hyperopt
        optimizer = Hyperopt(estimator=PCA >> class_(operator=make_pipeline(LogisticRegression())), max_evals = 1, show_progressbar=False)
        trained_optimizer = optimizer.fit(X_train, y_train)
        predictions = trained_optimizer.predict(X_test)

        pipeline3 = class_(operator= PCA() >> (Nystroem & NoOp) >> ConcatFeatures >> LogisticRegression())
        optimizer = Hyperopt(estimator=pipeline3, max_evals = 1, show_progressbar=False)
        trained_optimizer = optimizer.fit(X_train, y_train)
        predictions = trained_optimizer.predict(X_test)

        pipeline4 = (PCA >> class_(operator=make_pipeline(Nystroem())) & class_(operator=make_pipeline(Nystroem()))) >> ConcatFeatures >> LogisticRegression()
        optimizer = Hyperopt(estimator=pipeline4, max_evals = 1, scoring='roc_auc', show_progressbar=False)
github IBM / lale / test / test_optimizers.py View on Github external
def test_hyperparam_overriding_with_hyperopt(self):
        pca1 = PCA(n_components = 3)
        pca2 = PCA()
        search_space1 = hyperopt_search_space(pca1)
        search_space2 = hyperopt_search_space(pca2)
        self.assertNotEqual(search_space1, search_space2)