How to use the skl2onnx.common.data_types.StringTensorType function in skl2onnx

To help you get started, we’ve selected a few skl2onnx examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github onnx / sklearn-onnx / tests / test_sklearn_tfidf_vectorizer_converter.py View on Github external
def test_model_tfidf_vectorizer22(self):
        corpus = numpy.array([
            "This is the first document.",
            "This document is the second document.",
            "And this is the third one.",
            "Is this the first document?",
        ]).reshape((4, 1))
        vect = TfidfVectorizer(ngram_range=(2, 2), norm=None)
        vect.fit(corpus.ravel())
        model_onnx = convert_sklearn(vect, "TfidfVectorizer",
                                     [("input", StringTensorType([1]))],
                                     options=self.get_options())
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            corpus,
            vect,
            model_onnx,
            basename="SklearnTfidfVectorizer22-OneOff-SklCol",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.4.0')",
github onnx / sklearn-onnx / tests / test_sklearn_tfidf_vectorizer_converter.py View on Github external
def test_model_tfidf_vectorizer12_normL2(self):
        corpus = numpy.array([
            "This is the first document.",
            "This document is the second document.",
            "And this is the third one.",
            "Is this the first document?",
        ]).reshape((4, 1))
        vect = TfidfVectorizer(ngram_range=(1, 2), norm="l2")
        vect.fit(corpus.ravel())
        model_onnx = convert_sklearn(vect, "TfidfVectorizer",
                                     [("input", StringTensorType([1]))],
                                     options=self.get_options())
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            corpus,
            vect,
            model_onnx,
            basename="SklearnTfidfVectorizer22L2-OneOff-SklCol",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.4.0')",
github onnx / sklearn-onnx / tests / test_sklearn_label_encoder_converter.py View on Github external
def test_model_label_encoder(self):
        model = LabelEncoder()
        data = ["str3", "str2", "str0", "str1", "str3"]
        model.fit(data)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn label encoder",
            [("input", StringTensorType([None]))],
        )
        self.assertTrue(model_onnx is not None)
        self.assertTrue(model_onnx.graph.node is not None)
        dump_data_and_model(
            np.array(data),
            model,
            model_onnx,
            basename="SklearnLabelEncoder",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
github onnx / sklearn-onnx / tests / test_sklearn_pipeline.py View on Github external
TruncatedSVD(n_components=1, algorithm="arpack", tol=1e-4),
            ),
        ])

        preprocessor = ColumnTransformer(transformers=[
            ("num", numeric_transformer, numeric_features),
            ("cat", categorical_transformer, categorical_features),
        ])

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)
        initial_type = [
            ("numfeat", FloatTensorType([None, 3])),
            ("strfeat", StringTensorType([None, 2])),
        ]

        X_train = X_train[:11]
        model_onnx = convert_sklearn(model, initial_types=initial_type)

        dump_data_and_model(
            X_train,
            model,
            model_onnx,
            basename="SklearnPipelineColumnTransformerPipeliner",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.3') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.4.0')",
        )
github onnx / sklearn-onnx / tests / test_sklearn_tfidf_vectorizer_converter.py View on Github external
def test_model_tfidf_vectorizer11_compose(self):
        corpus = numpy.array([
            "This is the first document.",
            "This document is the second document.",
            "And this is the third one.",
            "Is this the first document?",
        ]).reshape((4, 1))
        corpus = numpy.hstack([corpus, corpus])
        y = numpy.array([0, 1, 0, 1])
        model = ColumnTransformer([
            ('a', TfidfVectorizer(), 0),
            ('b', TfidfVectorizer(), 1),
        ])
        model.fit(corpus, y)
        model_onnx = convert_sklearn(model, "TfIdfcomp",
                                     [("input", StringTensorType([4, 2]))],
                                     options=self.get_options())
        sess = InferenceSession(model_onnx.SerializeToString())
        res = sess.run(None, {'input': corpus})[0]
        exp = model.transform(corpus)
        assert_almost_equal(res, exp)
github onnx / sklearn-onnx / tests / test_sklearn_tfidf_vectorizer_converter_regex.py View on Github external
def test_model_tfidf_vectorizer11_empty_string(self):
        corpus = numpy.array([
            'This is the first document.',
            'This document is the second document.',
            'And this is the third one.',
            '',
        ]).reshape((4, 1))
        vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
        vect.fit(corpus.ravel())
        model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
                                     [('input', StringTensorType([1]))],
                                     options=self.get_options())
        self.assertTrue(model_onnx is not None)
        # TfidfVectorizer in onnxruntime fails with empty strings
        dump_data_and_model(
            corpus, vect, model_onnx,
            basename="SklearnTfidfVectorizer11EmptyStringRegex-OneOff-SklCol",
            allow_failure="StrictVersion(onnxruntime.__version__) "
                          "<= StrictVersion('0.4.0')")
github onnx / sklearn-onnx / tests / test_algebra_test_helper.py View on Github external
def test_guess_type(self):
        dtypes = [
            (np.int32, Int32TensorType),
            (np.int64, Int64TensorType),
            (np.float32, FloatTensorType),
            (np.str, StringTensorType)
        ]
        for dtype, exp in dtypes:
            if dtype == np.str:
                mat = np.empty((3, 3), dtype=dtype)
                mat[:, :] = ""
            else:
                mat = np.zeros((3, 3), dtype=dtype)
            res = _guess_type(mat)
            assert isinstance(res, exp)

        dtypes = [np.float64]
        for dtype in dtypes:
            mat = np.zeros((3, 3), dtype=dtype)
            _guess_type(mat, )
github onnx / sklearn-onnx / docs / examples / plot_errors_pipeline.py View on Github external
def convert_dataframe_schema(df, drop=None):
    inputs = []
    for k, v in zip(df.columns, df.dtypes):
        if drop is not None and k in drop:
            continue
        if v == 'int64':
            t = Int64TensorType([None, 1])
        elif v == 'float64':
            t = FloatTensorType([None, 1])
        else:
            t = StringTensorType([None, 1])
        inputs.append((k, t))
    return inputs
github onnx / sklearn-onnx / docs / examples / plot_complex_pipeline.py View on Github external
def convert_dataframe_schema(df, drop=None):
    inputs = []
    for k, v in zip(df.columns, df.dtypes):
        if drop is not None and k in drop:
            continue
        if v == 'int64':
            t = Int64TensorType([None, 1])
        elif v == 'float64':
            t = FloatTensorType([None, 1])
        else:
            t = StringTensorType([None, 1])
        inputs.append((k, t))
    return inputs
github onnx / sklearn-onnx / skl2onnx / shape_calculators / label_encoder.py View on Github external
def calculate_sklearn_label_encoder_output_shapes(operator):
    """
    This function just copy the input shape to the output because label
    encoder only alters input features' values, not their shape.
    """
    check_input_and_output_numbers(operator, output_count_range=1)
    check_input_and_output_types(operator, good_input_types=[
                                 FloatTensorType, Int64TensorType,
                                 StringTensorType])

    input_shape = copy.deepcopy(operator.inputs[0].type.shape)
    operator.outputs[0].type = Int64TensorType(copy.deepcopy(input_shape))