Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if objective == "binary:logistic":
ncl = 2
else:
ncl = ntrees // params['n_estimators']
if objective == "reg:logistic" and ncl == 1:
ncl = 2
classes = xgb_node.classes_
if (np.issubdtype(classes.dtype, np.floating) or
np.issubdtype(classes.dtype, np.signedinteger)):
operator.outputs[0].type = Int64TensorType(shape=[N])
else:
operator.outputs[0].type = StringTensorType(shape=[N])
operator.outputs[1].type = operator.outputs[1].type = FloatTensorType([N, ncl])
register_shape_calculator('XGBClassifier', calculate_xgboost_classifier_output_shapes)
def calculate_xgboost_classifier_output_shapes(operator):
check_input_and_output_numbers(operator, input_count_range=1, output_count_range=2)
check_input_and_output_types(operator, good_input_types=[FloatTensorType, Int64TensorType])
N = operator.inputs[0].type.shape[0]
xgb_node = operator.raw_operator
params = get_xgb_params(xgb_node)
booster = xgb_node.get_booster()
atts = booster.attributes()
ntrees = len(booster.get_dump(with_stats=True, dump_format = 'json'))
objective = params["objective"]
if objective == "binary:logistic":
ncl = 2
else:
ncl = ntrees // params['n_estimators']
if objective == "reg:logistic" and ncl == 1:
ncl = 2
def test_truncated_svd(self):
N, C, K = 2, 3, 2
x = create_tensor(N, C)
svd = TruncatedSVD(n_components=K)
svd.fit(x)
model_onnx = onnxmltools.convert_sklearn(svd, initial_types=[('input', FloatTensorType(shape=[1, C]))])
self.assertTrue(model_onnx is not None)
dump_data_and_model(x, svd, model_onnx, basename="SklearnTruncatedSVD")
def test_combine_inputs(self):
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
data = numpy.array([[0., 0.], [0., 0.], [1., 1.], [1., 1.]], dtype=numpy.float32)
scaler = StandardScaler()
scaler.fit(data)
model = Pipeline([('scaler1', scaler), ('scaler2', scaler)])
model_onnx = convert_sklearn(model, 'pipeline',
[('input1', FloatTensorType([1, 1])),
('input2', FloatTensorType([1, 1]))])
self.assertTrue(len(model_onnx.graph.node[-1].output) == 1)
self.assertTrue(model_onnx is not None)
data = {'input1': data[:, 0], 'input2': data[:, 1]}
dump_data_and_model(data, PipeConcatenateInput(model), model_onnx,
basename="SklearnPipelineScaler11-OneOff")
def test_pipeline(self):
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
data = numpy.array([[0, 0], [0, 0], [1, 1], [1, 1]], dtype=numpy.float32)
scaler = StandardScaler()
scaler.fit(data)
model = Pipeline([('scaler1',scaler), ('scaler2', scaler)])
model_onnx = convert_sklearn(model, 'pipeline', [('input', FloatTensorType([1, 2]))])
self.assertTrue(model_onnx is not None)
dump_data_and_model(data, model, model_onnx, basename="SklearnPipelineScaler")
def test_robust_scaler_floats_no_scaling(self):
model = RobustScaler(with_scaling=False)
data = [[0., 0., 3.], [1., 1., 0.], [0., 2., 1.], [1., 0., 2.]]
model.fit(data)
model_onnx = convert_sklearn(model, 'scaler', [('input', FloatTensorType([1, 3]))])
self.assertTrue(model_onnx is not None)
dump_data_and_model(numpy.array(data, dtype=numpy.float32),
model, basename="SklearnRobustScalerNoScalingFloat32")
def test_glm_regressor(self):
X, y = make_regression(n_features=4, random_state=0)
lr = LinearRegression()
lr.fit(X, y)
lr_coreml = coremltools.converters.sklearn.convert(lr)
lr_onnx = convert(lr_coreml.get_spec())
self.assertTrue(lr_onnx is not None)
dump_data_and_model(X.astype(numpy.float32), lr, lr_onnx, basename="CmlLinearRegression-Dec4")
svr = LinearSVR()
svr.fit(X, y)
svr_coreml = coremltools.converters.sklearn.convert(svr)
svr_onnx = convert(svr_coreml.get_spec())
self.assertTrue(svr_onnx is not None)
dump_data_and_model(X.astype(numpy.float32), svr, svr_onnx, basename="CmlLinearSvr-Dec4")
def _test_single_output_core(self, model):
X = [[0, 1], [1, 1], [2, 0]]
y = [100, -10, 50]
model.fit(X, y)
model_onnx = convert_sklearn(model, 'tree-based regressor', [('input', Int64TensorType([1, 2]))])
self.assertTrue(model_onnx is not None)
def test_pipeline(self):
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
data = numpy.array([[0, 0], [0, 0], [1, 1], [1, 1]], dtype=numpy.float32)
scaler = StandardScaler()
scaler.fit(data)
model = Pipeline([('scaler1',scaler), ('scaler2', scaler)])
model_onnx = convert_sklearn(model, 'pipeline', [('input', FloatTensorType([1, 2]))])
self.assertTrue(model_onnx is not None)
dump_data_and_model(data, model, model_onnx, basename="SklearnPipelineScaler")
def test_one_hot_encoder_mixed_float_int(self):
# categorical_features will be removed in 0.22 (this test will fail by then).
model = OneHotEncoder()
model.fit([[0.4, 0.2, 3], [1.4, 1.2, 0], [0.2, 2.2, 1]])
model_onnx = convert_sklearn(model, 'one-hot encoder mixed-type inputs',
[('input1', FloatTensorType([1, 2])), ('input2', Int64TensorType([1, 1]))])
self.assertTrue(model_onnx is not None)