Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_logistic_regression_serializer(self):
logistic_regression = LogisticRegression(fit_intercept=True)
logistic_regression.mlinit(input_features='a',
prediction_column='e_binary')
extract_features = ['e']
feature_extractor = FeatureExtractor(input_scalars=['e'],
output_vector='extracted_e_output',
output_vector_items=["{}_out".format(x) for x in extract_features])
binarizer = Binarizer(threshold=0.0)
binarizer.mlinit(prior_tf=feature_extractor,
output_features='e_binary')
Xres = binarizer.fit_transform(self.df[['a']])
logistic_regression.fit(self.df[['a']], Xres)
logistic_regression.serialize_to_bundle(self.tmp_dir, logistic_regression.name)
# Test model.json
with open("{}/{}.node/model.json".format(self.tmp_dir, logistic_regression.name)) as json_data:
def test_logistic_regression_cv_deserializer(self):
logistic_regression = LogisticRegressionCV(fit_intercept=True)
logistic_regression.mlinit(input_features='a',
prediction_column='e_binary')
extract_features = ['e']
feature_extractor = FeatureExtractor(input_scalars=['e'],
output_vector='extracted_e_output',
output_vector_items=["{}_out".format(x) for x in extract_features])
binarizer = Binarizer(threshold=0.0)
binarizer.mlinit(prior_tf=feature_extractor,
output_features='e_binary')
Xres = binarizer.fit_transform(self.df[['a']])
logistic_regression.fit(self.df[['a']], Xres)
logistic_regression.serialize_to_bundle(self.tmp_dir, logistic_regression.name)
# Test model.json
with open("{}/{}.node/model.json".format(self.tmp_dir, logistic_regression.name)) as json_data:
model = json.load(json_data)
def imputer_test(self):
def _set_nulls(df):
row = df['index']
if row in [2,5]:
return np.NaN
return df.a
extract_features = ['a']
feature_extractor = FeatureExtractor(input_scalars=['a'],
output_vector='extracted_a_output',
output_vector_items=["{}_out".format(x) for x in extract_features])
imputer = Imputer(strategy='mean')
imputer.mlinit(prior_tf=feature_extractor,
output_features='a_imputed')
df2 = self.df
df2.reset_index(inplace=True)
df2['a'] = df2.apply(_set_nulls, axis=1)
imputer.fit(df2[['a']])
self.assertAlmostEqual(imputer.statistics_[0], df2.a.mean(), places = 7)
imputer.serialize_to_bundle(self.tmp_dir, imputer.name)
def test_standard_scaler_serializer(self):
standard_scaler = StandardScaler(with_mean=True,
with_std=True
)
extract_features = ['a']
feature_extractor = FeatureExtractor(input_scalars=['a'],
output_vector='extracted_a_output',
output_vector_items=["{}_out".format(x) for x in extract_features])
standard_scaler.mlinit(prior_tf=feature_extractor,
output_features='a_scaled')
standard_scaler.fit(self.df[['a']])
standard_scaler.serialize_to_bundle(self.tmp_dir, standard_scaler.name)
expected_mean = self.df.a.mean()
expected_std = np.sqrt(np.var(self.df.a))
expected_model = {
"op": "standard_scaler",
"attributes": {
def test_standard_scaler_multi_deserializer(self):
extract_features = ['a', 'b']
feature_extractor = FeatureExtractor(input_scalars=['a', 'b'],
output_vector='extracted_multi_outputs',
output_vector_items=["{}_out".format(x) for x in extract_features])
# Serialize a standard scaler to a bundle
standard_scaler = StandardScaler(with_mean=True,
with_std=True
)
standard_scaler.mlinit(prior_tf=feature_extractor,
output_features=['a_scaled', 'b_scaled'])
standard_scaler.fit(self.df[['a', 'b']])
standard_scaler.serialize_to_bundle(self.tmp_dir, standard_scaler.name)
# Now deserialize it back
def test_min_max_scaler_serializer(self):
extract_features = ['a']
feature_extractor = FeatureExtractor(input_scalars=['a'],
output_vector='extracted_a_output',
output_vector_items=["{}_out".format(x) for x in extract_features])
scaler = MinMaxScaler()
scaler.mlinit(prior_tf = feature_extractor,
output_features='a_scaled')
scaler.fit(self.df[['a']])
scaler.serialize_to_bundle(self.tmp_dir, scaler.name)
expected_min = self.df.a.min()
expected_max = self.df.a.max()
expected_model = {
"op": "min_max_scaler",
def binarizer_deserializer_test(self):
extract_features = ['a']
feature_extractor = FeatureExtractor(input_scalars=['a'],
output_vector='extracted_a_output',
output_vector_items=["{}_out".format(x) for x in extract_features])
binarizer = Binarizer(threshold=0.0)
binarizer.mlinit(prior_tf=feature_extractor,
output_features='a_binary')
Xres = binarizer.fit_transform(self.df[['a']])
# Test that the binarizer functions as expected
self.assertEqual(float(len(self.df[self.df.a >= 0]))/10.0, Xres.mean())
binarizer.serialize_to_bundle(self.tmp_dir, binarizer.name)
# Deserialize the Binarizer
node_name = "{}.node".format(binarizer.name)
def test_min_max_scaler_multi_deserializer(self):
extract_features = ['a', 'b']
feature_extractor = FeatureExtractor(input_scalars=['a', 'b'],
output_vector='extracted_multi_outputs',
output_vector_items=["{}_out".format(x) for x in extract_features])
scaler = MinMaxScaler()
scaler.mlinit(prior_tf=feature_extractor,
output_features=['a_scaled', 'b_scaled'])
scaler.fit(self.df[['a']])
scaler.serialize_to_bundle(self.tmp_dir, scaler.name)
# Deserialize the MinMaxScaler
node_name = "{}.node".format(scaler.name)
min_max_scaler_tf = MinMaxScaler()
min_max_scaler_tf.deserialize_from_bundle(self.tmp_dir, node_name)
def test_min_max_scaler_deserializer(self):
extract_features = ['a']
feature_extractor = FeatureExtractor(input_scalars=['a'],
output_vector='extracted_a_output',
output_vector_items=["{}_out".format(x) for x in extract_features])
scaler = MinMaxScaler()
scaler.mlinit(prior_tf=feature_extractor,
output_features='a_scaled')
scaler.fit(self.df[['a']])
scaler.serialize_to_bundle(self.tmp_dir, scaler.name)
# Deserialize the MinMaxScaler
node_name = "{}.node".format(scaler.name)
min_max_scaler_tf = MinMaxScaler()
min_max_scaler_tf.deserialize_from_bundle(self.tmp_dir, node_name)
def polynomial_expansion_test(self):
extract_features = ['a']
feature_extractor = FeatureExtractor(input_scalars=['a'],
output_vector='extracted_a_output',
output_vector_items=["{}_out".format(x) for x in extract_features])
polynomial_exp = PolynomialFeatures(degree=2, include_bias=False)
polynomial_exp.mlinit(prior_tf=feature_extractor,
output_features='poly')
Xres = polynomial_exp.fit_transform(self.df[['a']])
self.assertEqual(Xres[0][1], Xres[0][0] * Xres[0][0])
polynomial_exp.serialize_to_bundle(self.tmp_dir, polynomial_exp.name)
expected_model = {
"op": "polynomial_expansion",
"attributes": {