Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_truncated_svd(self):
N, C, K = 2, 3, 2
x = create_tensor(N, C)
svd = TruncatedSVD(n_components=K)
svd.fit(x)
model_onnx = onnxmltools.convert_sklearn(svd, initial_types=[('input', FloatTensorType(shape=[1, C]))])
self.assertTrue(model_onnx is not None)
dump_data_and_model(x, svd, model_onnx, basename="SklearnTruncatedSVD")
def test_combine_inputs(self):
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
data = numpy.array([[0., 0.], [0., 0.], [1., 1.], [1., 1.]], dtype=numpy.float32)
scaler = StandardScaler()
scaler.fit(data)
model = Pipeline([('scaler1', scaler), ('scaler2', scaler)])
model_onnx = convert_sklearn(model, 'pipeline',
[('input1', FloatTensorType([1, 1])),
('input2', FloatTensorType([1, 1]))])
self.assertTrue(len(model_onnx.graph.node[-1].output) == 1)
self.assertTrue(model_onnx is not None)
data = {'input1': data[:, 0], 'input2': data[:, 1]}
dump_data_and_model(data, PipeConcatenateInput(model), model_onnx,
basename="SklearnPipelineScaler11-OneOff")
pickle.dump(prediction, f)
dest = os.path.join(folder, basename + ".data.pkl")
names.append(dest)
with open(dest, "wb") as f:
pickle.dump(data, f)
dest = os.path.join(folder, basename + ".model.pkl")
names.append(dest)
with open(dest, "wb") as f:
pickle.dump(model, f)
if onnx is None:
array = numpy.array(data)
if inputs is None:
inputs = [('input', FloatTensorType(list(array.shape)))]
onnx, _ = convert_model(model, basename, inputs)
dest = os.path.join(folder, basename + ".model.onnx")
names.append(dest)
with open(dest, "wb") as f:
f.write(onnx.SerializeToString())
runtime_test["onnx"] = dest
# backend
if backend is not None:
if not isinstance(backend, list):
backend = [backend]
for b in backend:
if not is_backend_enabled(b):
continue
def calculate_sparkml_normalizer_output_shapes(operator):
check_input_and_output_numbers(operator, output_count_range=1)
check_input_and_output_types(operator,
good_input_types=[FloatTensorType, Int64TensorType],
good_output_types=[FloatTensorType])
input_shape = copy.deepcopy(operator.inputs[0].type.shape)
operator.outputs[0].type = FloatTensorType(input_shape)
def calculate_keras_embed_output_shapes(operator):
doc_string = operator.inputs[0].type.doc_string
shape = operator.raw_operator.output_shape
operator.outputs[0].type = FloatTensorType(['None' if dim == None else dim for dim in shape], doc_string)
def getTensorTypeFromSpark(sparktype):
if sparktype == 'StringType':
return StringTensorType([1, 1])
elif sparktype == 'DecimalType' \
or sparktype == 'DoubleType' \
or sparktype == 'FloatType' \
or sparktype == 'LongType' \
or sparktype == 'IntegerType' \
or sparktype == 'ShortType' \
or sparktype == 'ByteType' \
or sparktype == 'BooleanType':
return FloatTensorType([1, 1])
else:
raise TypeError("Cannot map this type to Onnx types: " + sparktype)
2. [N, 'None'] ---> [N, 'None']
'''
op = operator.raw_operator
# encoded_slot_sizes[i] is the number of output coordinates associated with the ith categorical feature.
encoded_slot_sizes = op.categorySizes
N = operator.inputs[0].type.shape[0]
# Calculate the output feature length by replacing the count of categorical
# features with their encoded widths
if operator.inputs[0].type.shape[1] != 'None':
C = operator.inputs[0].type.shape[1] - 1 + sum(encoded_slot_sizes)
else:
C = 'None'
operator.outputs[0].type = FloatTensorType([N, C])
def calculate_word2vec_output_shapes(operator):
check_input_and_output_numbers(operator, output_count_range=1)
check_input_and_output_types(operator, good_input_types=[StringTensorType])
N = operator.inputs[0].type.shape[0]
if N != 1:
raise SparkMlConversionError('Word2Vec converter cannot handle batch size of more than 1')
C = operator.raw_operator.getOrDefault('vectorSize')
operator.outputs[0].type = FloatTensorType([N, C])
def calculate_bidirectional_lstm_output_shapes(operator):
'''
See bidirectional LSTM's conversion function for its output shapes.
'''
check_input_and_output_numbers(operator, input_count_range=[1, 5], output_count_range=[1, 5])
check_input_and_output_types(operator, good_input_types=[FloatTensorType])
input_shape = operator.inputs[0].type.shape
# LSTM accepts [N, C] and [N, C, 1, 1] inputs
if len(input_shape) not in [2, 4]:
raise RuntimeError('Input must be a 2-D or 4-D tensor')
params = operator.raw_operator.biDirectionalLSTM
# The following line is more accurate but it may break some tests
# output_shape = ['None', params.outputVectorSize] if params.params.sequenceOutput else [1, 2 *params.outputVectorSize]
output_shape = ['None', 2 * params.outputVectorSize]
state_shape = [1, params.outputVectorSize]
# TODO: Changing input shapes of an operator is dangerous, this should be move to Topology's _fix_shapes function
if len(operator.inputs) > 1:
Y_h_in = operator.inputs[1] # The forward initial hidden state of a single sequence