How to use the rasa.nlu.training_data.Message function in rasa

To help you get started, weā€™ve selected a few rasa examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github botfront / rasa-for-botfront / tests / nlu / featurizers / test_spacy_featurizer.py View on Github external
def test_spacy_featurizer_sequence(sentence, expected, spacy_nlp):
    from rasa.nlu.featurizers.dense_featurizer.spacy_featurizer import SpacyFeaturizer

    doc = spacy_nlp(sentence)
    token_vectors = [t.vector for t in doc]

    ftr = SpacyFeaturizer.create({}, RasaNLUModelConfig())

    greet = {"intent": "greet", "text_features": [0.5]}

    message = Message(sentence, greet)
    message.set("text_spacy_doc", doc)

    ftr._set_spacy_features(message)

    vecs = message.get("text_dense_features")[0][:5]

    assert np.allclose(token_vectors[0][:5], vecs, atol=1e-4)
    assert np.allclose(vecs, expected, atol=1e-4)
github botfront / rasa-for-botfront / tests / nlu / classifiers / test_diet_classifier.py View on Github external
                Message(
                    "test b",
                    data={
                        SPARSE_FEATURE_NAMES[TEXT]: np.zeros(1),
                        DENSE_FEATURE_NAMES[TEXT]: np.zeros(1),
                    },
                ),
            ],
            True,
        ),
        (
            [
                Message(
                    "test a",
                    data={
                        SPARSE_FEATURE_NAMES[INTENT]: np.zeros(1),
                        DENSE_FEATURE_NAMES[INTENT]: np.zeros(1),
github RasaHQ / rasa / tests / nlu / featurizers / test_count_vectors_featurizer.py View on Github external
],
)
def test_count_vector_featurizer_oov_words(sentence, expected):
    from rasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer import (
        CountVectorsFeaturizer,
    )

    ftr = CountVectorsFeaturizer(
        {
            "token_pattern": r"(?u)\b\w+\b",
            "OOV_token": "__oov__",
            "OOV_words": ["oov_word0", "OOV_word1"],
            "return_sequence": True,
        }
    )
    train_message = Message(sentence)
    # this is needed for a valid training example
    train_message.set("intent", "bla")
    data = TrainingData([train_message])
    ftr.train(data)

    test_message = Message(sentence)
    ftr.process(test_message)

    assert np.all(test_message.get("text_sparse_features").toarray()[0] == expected)
github RasaHQ / rasa / tests / nlu / base / test_featurizers.py View on Github external
@pytest.mark.parametrize(
    "sentence, expected",
    [
        ("hello hello hello hello hello ", [5]),
        ("hello goodbye hello", [1, 2]),
        ("a b c d e f", [1, 1, 1, 1, 1, 1]),
        ("a 1 2", [2, 1]),
    ],
)
def test_count_vector_featurizer(sentence, expected):
    from rasa.nlu.featurizers.count_vectors_featurizer import CountVectorsFeaturizer

    ftr = CountVectorsFeaturizer({"token_pattern": r"(?u)\b\w+\b"})
    train_message = Message(sentence)
    # this is needed for a valid training example
    train_message.set("intent", "bla")
    data = TrainingData([train_message])
    ftr.train(data)

    test_message = Message(sentence)
    ftr.process(test_message)

    assert np.all(test_message.get("text_features") == expected)
github RasaHQ / rasa / tests / nlu / featurizers / test_count_vectors_featurizer.py View on Github external
("__OOV__ a 1 2 __oov__ __OOV__", [[0, 1, 0]]),
    ],
)
def test_count_vector_featurizer_oov_token(sentence, expected):
    from rasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer import (
        CountVectorsFeaturizer,
    )

    ftr = CountVectorsFeaturizer(
        {
            "token_pattern": r"(?u)\b\w+\b",
            "OOV_token": "__oov__",
            "return_sequence": True,
        }
    )
    train_message = Message(sentence)
    # this is needed for a valid training example
    train_message.set("intent", "bla")
    data = TrainingData([train_message])
    ftr.train(data)

    test_message = Message(sentence)
    ftr.process(test_message)

    assert np.all(test_message.get("text_sparse_features").toarray()[0] == expected)
github botfront / rasa-for-botfront / tests / nlu / featurizers / test_count_vectors_featurizer.py View on Github external
def test_count_vector_featurizer(sentence, expected):
    from rasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer import (
        CountVectorsFeaturizer,
    )

    ftr = CountVectorsFeaturizer(
        {"token_pattern": r"(?u)\b\w+\b", "return_sequence": True}
    )
    train_message = Message(sentence)
    # this is needed for a valid training example
    train_message.set("intent", "bla")
    data = TrainingData([train_message])
    ftr.train(data)

    test_message = Message(sentence)
    ftr.process(test_message)

    assert isinstance(test_message.get("text_sparse_features"), scipy.sparse.coo_matrix)

    actual = test_message.get("text_sparse_features").toarray()

    assert np.all(actual[0] == expected)
github RasaHQ / rasa / tests / nlu / base / test_featurizers.py View on Github external
("__OOV__ a 1 2 __oov__ __OOV__", [2, 3, 1]),
    ],
)
def test_count_vector_featurizer_oov_token(sentence, expected):
    from rasa.nlu.featurizers.count_vectors_featurizer import CountVectorsFeaturizer

    ftr = CountVectorsFeaturizer(
        {"token_pattern": r"(?u)\b\w+\b", "OOV_token": "__oov__"}
    )
    train_message = Message(sentence)
    # this is needed for a valid training example
    train_message.set("intent", "bla")
    data = TrainingData([train_message])
    ftr.train(data)

    test_message = Message(sentence)
    ftr.process(test_message)

    assert np.all(test_message.get("text_features") == expected)
github RasaHQ / rasa / tests / nlu / extractors / text_crf_entity_extractor.py View on Github external
},
        ),
    ]

    # uses BILOU and the default features
    ext.train(TrainingData(training_examples=examples), RasaNLUModelConfig())
    sentence = "anywhere in the west"
    doc = {"spacy_doc": spacy_nlp(sentence)}
    crf_format = ext._from_text_to_crf(Message(sentence, doc))
    assert [word[0] for word in crf_format] == ["anywhere", "in", "the", "west"]
    feats = ext._sentence_to_features(crf_format)
    assert "BOS" in feats[0]
    assert "EOS" in feats[-1]
    assert feats[1]["0:low"] == "in"
    sentence = "anywhere in the west"
    ext.extract_entities(Message(sentence, {"spacy_doc": spacy_nlp(sentence)}))
    filtered = ext.filter_trainable_entities(examples)
    assert filtered[0].get("entities") == [
        {"start": 16, "end": 20, "value": "west", "entity": "location"}
    ], "Entity without extractor remains"
    assert filtered[1].get("entities") == [
        {
            "start": 8,
            "end": 14,
            "value": "indian",
            "entity": "cuisine",
            "extractor": "CRFEntityExtractor",
        }
    ], "Only CRFEntityExtractor entity annotation remains"
    assert examples[1].get("entities")[0] == {
        "start": 0,
        "end": 7,
github RasaHQ / rasa / rasa / nlu / model.py View on Github external
only_output_properties: bool = True,
    ) -> Dict[Text, Any]:
        """Parse the input text, classify it and return pipeline result.

        The pipeline result usually contains intent and entities."""

        if not text:
            # Not all components are able to handle empty strings. So we need
            # to prevent that... This default return will not contain all
            # output attributes of all components, but in the end, no one
            # should pass an empty string in the first place.
            output = self.default_output_attributes()
            output["text"] = ""
            return output

        message = Message(text, self.default_output_attributes(), time=time)

        for component in self.pipeline:
            component.process(message, **self.context)

        output = self.default_output_attributes()
        output.update(message.as_dict(only_output_properties=only_output_properties))
        return output
github botfront / rasa-for-botfront / rasa / nlu / extractors / __init__.py View on Github external
Creates a copy of entity_examples in which entities that have
        `extractor` set to something other than
        self.name (e.g. 'CRFEntityExtractor') are removed.
        """

        filtered = []
        for message in entity_examples:
            entities = []
            for ent in message.get(ENTITIES, []):
                extractor = ent.get(EXTRACTOR)
                if not extractor or extractor == self.name:
                    entities.append(ent)
            data = message.data.copy()
            data[ENTITIES] = entities
            filtered.append(
                Message(
                    text=message.text,
                    data=data,
                    output_properties=message.output_properties,
                    time=message.time,
                )
            )

        return filtered