How to use the mleap.sklearn.preprocessing.data.FeatureExtractor function in mleap

To help you get started, we’ve selected a few mleap examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github combust / mleap / python / mleap / sklearn / base_tests.py View on Github external
def test_logistic_regression_serializer(self):

        logistic_regression = LogisticRegression(fit_intercept=True)
        logistic_regression.mlinit(input_features='a',
                                 prediction_column='e_binary')

        extract_features = ['e']
        feature_extractor = FeatureExtractor(input_scalars=['e'],
                                         output_vector='extracted_e_output',
                                         output_vector_items=["{}_out".format(x) for x in extract_features])

        binarizer = Binarizer(threshold=0.0)
        binarizer.mlinit(prior_tf=feature_extractor,
                         output_features='e_binary')

        Xres = binarizer.fit_transform(self.df[['a']])

        logistic_regression.fit(self.df[['a']], Xres)

        logistic_regression.serialize_to_bundle(self.tmp_dir, logistic_regression.name)


        # Test model.json
        with open("{}/{}.node/model.json".format(self.tmp_dir, logistic_regression.name)) as json_data:
github combust / mleap / python / mleap / sklearn / base_tests.py View on Github external
def test_logistic_regression_cv_deserializer(self):

        logistic_regression = LogisticRegressionCV(fit_intercept=True)
        logistic_regression.mlinit(input_features='a',
                                   prediction_column='e_binary')

        extract_features = ['e']
        feature_extractor = FeatureExtractor(input_scalars=['e'],
                                             output_vector='extracted_e_output',
                                             output_vector_items=["{}_out".format(x) for x in extract_features])

        binarizer = Binarizer(threshold=0.0)
        binarizer.mlinit(prior_tf=feature_extractor,
                         output_features='e_binary')

        Xres = binarizer.fit_transform(self.df[['a']])

        logistic_regression.fit(self.df[['a']], Xres)

        logistic_regression.serialize_to_bundle(self.tmp_dir, logistic_regression.name)

        # Test model.json
        with open("{}/{}.node/model.json".format(self.tmp_dir, logistic_regression.name)) as json_data:
            model = json.load(json_data)
github combust / mleap / python / mleap / sklearn / preprocessing / tests.py View on Github external
def imputer_test(self):

        def _set_nulls(df):
            row = df['index']
            if row in [2,5]:
                return np.NaN
            return df.a

        extract_features = ['a']
        feature_extractor = FeatureExtractor(input_scalars=['a'],
                                         output_vector='extracted_a_output',
                                         output_vector_items=["{}_out".format(x) for x in extract_features])

        imputer = Imputer(strategy='mean')
        imputer.mlinit(prior_tf=feature_extractor,
                       output_features='a_imputed')

        df2 = self.df
        df2.reset_index(inplace=True)
        df2['a'] = df2.apply(_set_nulls, axis=1)

        imputer.fit(df2[['a']])

        self.assertAlmostEqual(imputer.statistics_[0], df2.a.mean(), places = 7)

        imputer.serialize_to_bundle(self.tmp_dir, imputer.name)
github combust / mleap / python / mleap / sklearn / preprocessing / tests.py View on Github external
def test_standard_scaler_serializer(self):

        standard_scaler = StandardScaler(with_mean=True,
                                         with_std=True
                                         )

        extract_features = ['a']
        feature_extractor = FeatureExtractor(input_scalars=['a'],
                                             output_vector='extracted_a_output',
                                             output_vector_items=["{}_out".format(x) for x in extract_features])

        standard_scaler.mlinit(prior_tf=feature_extractor,
                               output_features='a_scaled')

        standard_scaler.fit(self.df[['a']])

        standard_scaler.serialize_to_bundle(self.tmp_dir, standard_scaler.name)

        expected_mean = self.df.a.mean()
        expected_std = np.sqrt(np.var(self.df.a))

        expected_model = {
            "op": "standard_scaler",
            "attributes": {
github combust / mleap / python / mleap / sklearn / preprocessing / tests.py View on Github external
def test_standard_scaler_multi_deserializer(self):

        extract_features = ['a', 'b']
        feature_extractor = FeatureExtractor(input_scalars=['a', 'b'],
                                             output_vector='extracted_multi_outputs',
                                             output_vector_items=["{}_out".format(x) for x in extract_features])

        # Serialize a standard scaler to a bundle
        standard_scaler = StandardScaler(with_mean=True,
                                         with_std=True
                                         )

        standard_scaler.mlinit(prior_tf=feature_extractor,
                               output_features=['a_scaled', 'b_scaled'])

        standard_scaler.fit(self.df[['a', 'b']])

        standard_scaler.serialize_to_bundle(self.tmp_dir, standard_scaler.name)

        # Now deserialize it back
github combust / mleap / python / mleap / sklearn / preprocessing / tests.py View on Github external
def test_min_max_scaler_serializer(self):

        extract_features = ['a']
        feature_extractor = FeatureExtractor(input_scalars=['a'],
                                         output_vector='extracted_a_output',
                                         output_vector_items=["{}_out".format(x) for x in extract_features])

        scaler = MinMaxScaler()
        scaler.mlinit(prior_tf = feature_extractor,
                      output_features='a_scaled')

        scaler.fit(self.df[['a']])

        scaler.serialize_to_bundle(self.tmp_dir, scaler.name)

        expected_min = self.df.a.min()
        expected_max = self.df.a.max()

        expected_model = {
           "op": "min_max_scaler",
github combust / mleap / python / mleap / sklearn / preprocessing / tests.py View on Github external
def binarizer_deserializer_test(self):

        extract_features = ['a']
        feature_extractor = FeatureExtractor(input_scalars=['a'],
                                         output_vector='extracted_a_output',
                                         output_vector_items=["{}_out".format(x) for x in extract_features])

        binarizer = Binarizer(threshold=0.0)
        binarizer.mlinit(prior_tf=feature_extractor,
                         output_features='a_binary')

        Xres = binarizer.fit_transform(self.df[['a']])

        # Test that the binarizer functions as expected
        self.assertEqual(float(len(self.df[self.df.a >= 0]))/10.0, Xres.mean())

        binarizer.serialize_to_bundle(self.tmp_dir, binarizer.name)

        # Deserialize the Binarizer
        node_name = "{}.node".format(binarizer.name)
github combust / mleap / python / mleap / sklearn / preprocessing / tests.py View on Github external
def test_min_max_scaler_multi_deserializer(self):

        extract_features = ['a', 'b']
        feature_extractor = FeatureExtractor(input_scalars=['a', 'b'],
                                             output_vector='extracted_multi_outputs',
                                             output_vector_items=["{}_out".format(x) for x in extract_features])

        scaler = MinMaxScaler()
        scaler.mlinit(prior_tf=feature_extractor,
                      output_features=['a_scaled', 'b_scaled'])

        scaler.fit(self.df[['a']])

        scaler.serialize_to_bundle(self.tmp_dir, scaler.name)

        # Deserialize the MinMaxScaler
        node_name = "{}.node".format(scaler.name)
        min_max_scaler_tf = MinMaxScaler()
        min_max_scaler_tf.deserialize_from_bundle(self.tmp_dir, node_name)
github combust / mleap / python / mleap / sklearn / preprocessing / tests.py View on Github external
def test_min_max_scaler_deserializer(self):

        extract_features = ['a']
        feature_extractor = FeatureExtractor(input_scalars=['a'],
                                             output_vector='extracted_a_output',
                                             output_vector_items=["{}_out".format(x) for x in extract_features])

        scaler = MinMaxScaler()
        scaler.mlinit(prior_tf=feature_extractor,
                      output_features='a_scaled')

        scaler.fit(self.df[['a']])

        scaler.serialize_to_bundle(self.tmp_dir, scaler.name)

        # Deserialize the MinMaxScaler
        node_name = "{}.node".format(scaler.name)
        min_max_scaler_tf = MinMaxScaler()
        min_max_scaler_tf.deserialize_from_bundle(self.tmp_dir, node_name)
github combust / mleap / python / mleap / sklearn / preprocessing / tests.py View on Github external
def polynomial_expansion_test(self):

        extract_features = ['a']
        feature_extractor = FeatureExtractor(input_scalars=['a'],
                                         output_vector='extracted_a_output',
                                         output_vector_items=["{}_out".format(x) for x in extract_features])

        polynomial_exp = PolynomialFeatures(degree=2, include_bias=False)
        polynomial_exp.mlinit(prior_tf=feature_extractor,
                              output_features='poly')

        Xres = polynomial_exp.fit_transform(self.df[['a']])

        self.assertEqual(Xres[0][1], Xres[0][0] * Xres[0][0])

        polynomial_exp.serialize_to_bundle(self.tmp_dir, polynomial_exp.name)

        expected_model = {
          "op": "polynomial_expansion",
          "attributes": {