Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_robust_scaler_floats_no_bias(self):
model = RobustScaler(with_centering=False)
data = [
[0.0, 0.0, 3.0],
[1.0, 1.0, 0.0],
[0.0, 2.0, 1.0],
[1.0, 0.0, 2.0],
]
model.fit(data)
model_onnx = convert_sklearn(model, "scaler",
[("input", FloatTensorType([None, 3]))])
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.float32),
model,
basename="SklearnRobustScalerWithCenteringFloat32",
)
def test_standard_scaler_floats_no_mean_std(self):
model = StandardScaler(with_mean=False, with_std=False)
data = [
[0.0, 0.0, 3.0],
[1.0, 1.0, 0.0],
[0.0, 2.0, 1.0],
[1.0, 0.0, 2.0],
]
model.fit(data)
model_onnx = convert_sklearn(model, "scaler",
[("input", FloatTensorType([None, 3]))])
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.float32),
model,
basename="SklearnStandardScalerFloat32NoMeanStd",
)
def test_model_knn_regressor_metric_cityblock(self):
model, X = self._fit_model(KNeighborsRegressor(metric="cityblock"))
model_onnx = convert_sklearn(model, "KNN regressor",
[("input", FloatTensorType([None, 4]))])
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[:7],
model, model_onnx,
basename="SklearnKNeighborsRegressorMetricCityblock")
def test_convert_nusvr_int(self):
model, X = fit_regression_model(
NuSVR(), is_int=True)
model_onnx = convert_sklearn(
model,
"NuSVR",
[("input", Int64TensorType([None, X.shape[1]]))],
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X,
model,
model_onnx,
basename="SklearnNuSVRInt-Dec4",
allow_failure="StrictVersion(onnxruntime.__version__)"
" <= StrictVersion('0.2.1')"
def test_model_linear_regression(self):
model, X = fit_regression_model(linear_model.LinearRegression())
model_onnx = convert_sklearn(
model, "linear regression",
[("input", FloatTensorType([None, X.shape[1]]))])
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X,
model,
model_onnx,
basename="SklearnLinearRegression-Dec4",
allow_failure="StrictVersion("
"onnxruntime.__version__)"
def test_column_transformer_weights(self):
model, X = fit_classification_model(
ColumnTransformer(
[('pca', PCA(n_components=5), slice(0, 10)),
('svd', TruncatedSVD(n_components=5), slice(10, 100))],
transformer_weights={'pca': 2, 'svd': 3}), 3)
model_onnx = convert_sklearn(
model,
"column transformer weights",
[("input", FloatTensorType([None, X.shape[1]]))],
dtype=numpy.float32,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X,
model,
model_onnx,
basename="SklearnColumnTransformerWeights",
allow_failure="StrictVersion(onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
bost = load_boston()
X, y = bost.data, bost.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
gpr = GaussianProcessRegressor(DotProduct() + RBF(), alpha=1.)
gpr.fit(X_train, y_train)
print(gpr)
###########################
# First attempt to convert a model into ONNX
# ++++++++++++++++++++++++++++++++++++++++++
#
# The documentation suggests the following way to
# convert a model into ONNX.
initial_type = [('X', FloatTensorType([None, X_train.shape[1]]))]
onx = convert_sklearn(gpr, initial_types=initial_type)
sess = rt.InferenceSession(onx.SerializeToString())
try:
pred_onx = sess.run(
None, {'X': X_test.astype(numpy.float32)})[0]
except RuntimeError as e:
print(str(e))
###########################
# Second attempt: variable dimensions
# +++++++++++++++++++++++++++++++++++
#
# Unfortunately, even though the conversion
# went well, the runtime fails to compute the prediction.
# The previous snippet of code imposes fixed dimension
# on the input and therefore let the runtime assume
# *sklearn-onnx* converts a pipeline without knowing the training data,
# more specifically, it does not know the input variables. This is why
# it complain when the parameter *initial_type* is not filled
# when function :func:`skl2onnx.convert_sklearn`
# is called. Let's see what happens without it.
data = load_iris()
X = data.data[:, :2]
y = data.target
clf = LogisticRegression()
clf.fit(X, y)
try:
model_onnx = convert_sklearn(clf)
except Exception as e:
print(e)
################################
# We need to define the initial type.
# Let's write some code to automatically
# fill that parameter from a dataframe.
def convert_dataframe_schema(df, drop=None):
inputs = []
for k, v in zip(df.columns, df.dtypes):
if drop is not None and k in drop:
continue
if v == 'int64':
t = Int64TensorType([None, 1])
input_name = sess.get_inputs()[0].name
res = sess.run(None, {input_name: X.astype(np.float32)})
return res[0]
#################################
# Simple KMeans
# +++++++++++++
#
# The first way: :func:`convert_sklearn`.
X = np.arange(20).reshape(10, 2)
tr = KMeans(n_clusters=2)
tr.fit(X)
onx = convert_sklearn(
tr, initial_types=[('X', FloatTensorType((None, X.shape[1])))])
print(predict_with_onnxruntime(onx, X))
#################################
# The second way: :func:`to_onnx`: no need to play with
# :class:`FloatTensorType` anymore.
X = np.arange(20).reshape(10, 2)
tr = KMeans(n_clusters=2)
tr.fit(X)
onx = to_onnx(tr, X.astype(np.float32))
print(predict_with_onnxruntime(onx, X))
#################################
# This second comparison is better as
# ONNX Runtime, in this experience,
# computes the label and the probabilities
# in every case.
##########################################
# Benchmark with RandomForest
# +++++++++++++++++++++++++++
#
# We first train and save a model in ONNX format.
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
initial_type = [('float_input', FloatTensorType([1, 4]))]
onx = convert_sklearn(rf, initial_types=initial_type)
with open("rf_iris.onnx", "wb") as f:
f.write(onx.SerializeToString())
###################################
# We compare.
sess = rt.InferenceSession("rf_iris.onnx")
def sess_predict_proba_rf(x):
return sess.run([prob_name], {input_name: x.astype(numpy.float32)})[0]
print("Execution time for predict_proba")
speed("loop(X_test, rf.predict_proba, 100)")
print("Execution time for sess_predict_proba")
speed("loop(X_test, sess_predict_proba_rf, 100)")