Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from sklearn.preprocessing.data import BaseEstimator, TransformerMixin
from mleap.bundle.serialize import MLeapSerializer
from gensim.models import Word2Vec
import uuid
import numpy as np
import pandas as pd
class MLeapWord2Vec(BaseEstimator, TransformerMixin, MLeapSerializer):
def __init__(self, input_features, output_features, kernel='sqrt', size=35, window=5, min_count=5):
self.input_features = input_features
self.output_features = output_features
self.op = 'word2vec'
self.name = "{}_{}".format(self.op, str(uuid.uuid4()))
self.serializable = True
self.kernel = kernel
self.model = None
self.output_features_pandas = []
self.size=35
self.window=5
self.min_count=5
def fit(self, X, y=None, **fit_params):
"""
# define node inputs and outputs
inputs = [{
"name": transformer.input_features,
"port": "input"
}]
outputs = [{
"name": transformer.prediction_column,
"port": "output"
}]
self.serialize(transformer, path, model_name, attributes, inputs, outputs)
class TfidfVectorizerSerializer(MLeapSerializer):
pipeline_serializer = PipelineSerializer()
def __init__(self):
super(TfidfVectorizerSerializer, self).__init__()
@staticmethod
def set_prediction_column(transformer, prediction_column):
transformer.prediction_column = prediction_column
@staticmethod
def set_input_features(transformer, input_features):
transformer.input_features = input_features
def serialize_to_bundle(self, transformer, path, model_name):
num_features = transformer.idf_.shape[0]
vocabulary = [None] * num_features
'min': 'data_min_',
'max': 'data_max_'
}
full_node_path = os.path.join(node_path, node_name)
transformer = self.deserialize_single_input_output(transformer, full_node_path, attributes_map)
transformer.data_range_ = np.array(transformer.data_max_) - np.array(transformer.data_min_)
transformer.scale_ = ((feature_range[1] - feature_range[0]) / transformer.data_range_)
transformer.min_ = feature_range[0] - transformer.data_min_ * transformer.scale_
return transformer
class ImputerSerializer(MLeapSerializer):
def __init__(self):
super(ImputerSerializer, self).__init__()
self.serializable = False
def serialize_to_bundle(self, transformer, path, model_name):
# compile tuples of model attributes to serialize
attributes = list()
attributes.append(('strategy', transformer.strategy))
attributes.append(('surrogate_value', transformer.statistics_.tolist()[0]))
if transformer.missing_values is not "NaN":
attributes.append(('missing_value', transformer.missing_values))
# define node inputs and outputs
inputs = [{
"name": transformer.input_features,
return serializer
@staticmethod
def set_prediction_column(transformer, prediction_column):
transformer.prediction_column = prediction_column
@staticmethod
def set_input_features(transformer, input_features):
transformer.input_features = input_features
def serialize_to_bundle(self, transformer, path, model_name):
serializer = self._choose_serializer(transformer)
serializer.serialize_to_bundle(transformer, path, model_name)
class CountVectorizerSerializer(MLeapSerializer):
def __init__(self):
super(CountVectorizerSerializer, self).__init__()
def serialize_to_bundle(self, transformer, path, model_name):
# compile tuples of model attributes to serialize
attributes = None
# define node inputs and outputs
inputs = [{
"name": transformer.input_features,
"port": "input"
}]
outputs = [{
"name": transformer.prediction_column,
serializer = SimpleSerializer()
return serializer.serialize_to_bundle(self, path, model_name, serialize_node=serialize_node)
setattr(DecisionTreeRegressor, 'op', 'decision_tree_regression')
setattr(DecisionTreeRegressor, 'mlinit', mleap_init)
setattr(DecisionTreeRegressor, 'serialize_to_bundle', serialize_to_bundle)
setattr(DecisionTreeRegressor, 'serializable', True)
setattr(DecisionTreeClassifier, 'op', 'decision_tree_classifier')
setattr(DecisionTreeClassifier, 'mlinit', mleap_init)
setattr(DecisionTreeClassifier, 'serialize_to_bundle', serialize_to_bundle)
setattr(DecisionTreeClassifier, 'serializable', True)
class SimpleSerializer(MLeapSerializer):
def __init__(self):
super(SimpleSerializer, self).__init__()
@staticmethod
def serialize_tree(tree, feature_names, outfile):
"""
:type feature_names: list
:type tree: sklearn.tree.tree.BaseDecisionTree
:param tree: sklearn.tree.tree
:param feature_names:
:return:
"""
tree_ = tree.tree_
feature_name = [feature_names[i] if i != _tree.TREE_UNDEFINED else 'n/a' for i in tree_.feature]