How to use the rasa.nlu.training_data.load_data function in rasa

To help you get started, weā€™ve selected a few rasa examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github botfront / rasa-for-botfront / tests / nlu / base / test_featurizers.py View on Github external
def test_convert_featurizer_output_shape():
    from rasa.nlu.featurizers.convert_featurizer import ConveRTFeaturizer

    td = training_data.load_data("data/examples/rasa/demo-rasa.json")

    convert_featurizer = ConveRTFeaturizer()
    convert_featurizer.train(td, config=None)

    text_features_dim = np.array(
        [
            example.get("text_features").shape[0]
            for example in td.intent_examples
            if example.get("text_features") is not None
        ]
    )

    response_features_dim = np.array(
        [
            example.get("response_features").shape[0]
            for example in td.intent_examples
github botfront / rasa-for-botfront / tests / nlu / training_data / test_training_data.py View on Github external
def test_dialogflow_data():
    td = training_data.load_data("data/examples/dialogflow/")
    assert not td.is_empty()
    assert len(td.entity_examples) == 5
    assert len(td.intent_examples) == 24
    assert len(td.training_examples) == 24
    assert len(td.lookup_tables) == 2
    assert td.intents == {"affirm", "goodbye", "hi", "inform"}
    assert td.entities == {"cuisine", "location"}
    non_trivial_synonyms = {k: v for k, v in td.entity_synonyms.items() if k != v}
    assert non_trivial_synonyms == {
        "mexico": "mexican",
        "china": "chinese",
        "india": "indian",
    }
    # The order changes based on different computers hence the grouping
    assert {td.lookup_tables[0]["name"], td.lookup_tables[1]["name"]} == {
        "location",
github RasaHQ / rasa / tests / nlu / base / test_featurizers.py View on Github external
def test_convert_featurizer_output_shape():
    from rasa.nlu.featurizers.convert_featurizer import ConveRTFeaturizer

    td = training_data.load_data("data/examples/rasa/demo-rasa.json")

    convert_featurizer = ConveRTFeaturizer()
    convert_featurizer.train(td, config=None)

    text_features_dim = np.array(
        [
            example.get("text_features").shape[0]
            for example in td.intent_examples
            if example.get("text_features") is not None
        ]
    )

    response_features_dim = np.array(
        [
            example.get("response_features").shape[0]
            for example in td.intent_examples
github botfront / rasa-for-botfront / tests / nlu / training_data / test_training_data.py View on Github external
out_path = tmpdir.join("rasa_nlu_data.json")
    convert_training_data(data_file, out_path.strpath, output_format, language)
    td = training_data.load_data(out_path.strpath, language)
    assert td.entity_examples != []
    assert td.intent_examples != []

    gold_standard = training_data.load_data(gold_standard_file, language)
    cmp_message_list(td.entity_examples, gold_standard.entity_examples)
    cmp_message_list(td.intent_examples, gold_standard.intent_examples)
    assert td.entity_synonyms == gold_standard.entity_synonyms

    # converting the converted file back to original
    # file format and performing the same tests
    rto_path = tmpdir.join("data_in_original_format.txt")
    convert_training_data(out_path.strpath, rto_path.strpath, "json", language)
    rto = training_data.load_data(rto_path.strpath, language)
    cmp_message_list(gold_standard.entity_examples, rto.entity_examples)
    cmp_message_list(gold_standard.intent_examples, rto.intent_examples)
    assert gold_standard.entity_synonyms == rto.entity_synonyms
github botfront / rasa-for-botfront / tests / nlu / base / test_featurizers.py View on Github external
def test_count_vector_featurizer_char_intent_featurizer():
    from rasa.nlu.featurizers.count_vectors_featurizer import CountVectorsFeaturizer

    ftr = CountVectorsFeaturizer({"min_ngram": 1, "max_ngram": 2, "analyzer": "char"})
    td = training_data.load_data("data/examples/rasa/demo-rasa.json")
    ftr.train(td, config=None)

    intent_features_exist = np.array(
        [
            True if example.get("intent_features") is not None else False
            for example in td.intent_examples
        ]
    )

    # no intent features should have been set
    assert not any(intent_features_exist)
github botfront / rasa-for-botfront / tests / nlu / training_data / test_training_data.py View on Github external
def test_section_value_with_delimiter():
    td_section_with_delimiter = training_data.load_data(
        "data/test/markdown_single_sections/section_with_delimiter.md"
    )
    assert td_section_with_delimiter.entity_synonyms == {"10:00 am": "10:00"}
github botfront / rasa-for-botfront / tests / nlu / training_data / test_training_data.py View on Github external
def test_markdown_single_sections():
    td_regex_only = training_data.load_data(
        "data/test/markdown_single_sections/regex_only.md"
    )
    assert td_regex_only.regex_features == [{"name": "greet", "pattern": r"hey[^\s]*"}]

    td_syn_only = training_data.load_data(
        "data/test/markdown_single_sections/synonyms_only.md"
    )
    assert td_syn_only.entity_synonyms == {"Chines": "chinese", "Chinese": "chinese"}
github RasaHQ / rasa_lookup_demo / run_ngrams.py View on Github external
def train_test(td_file, config_file, model_dir, key="company", noise=0.1):
    """trains a model using the training data
       (split into train-test) and config"""
    td = load_data(td_file)

    trainer = Trainer(config.load(config_file))
    train, test = td.train_test_split(train_frac=0.8)
    test = add_noise(test, key, noise=noise)

    trainer.train(train)
    tmp_fname = "data/tmp/temp_test.json"
    model_loc = trainer.persist(model_dir)
    with open(tmp_fname, "w", encoding="utf8") as f:
        f.write(test.as_json())
    evaluate_model(tmp_fname, model_loc)
github RasaHQ / rasa / rasa / nlu / convert.py View on Github external
def convert_training_data(
    data_file: Text, out_file: Text, output_format: Text, language: Text
):
    if not os.path.exists(data_file):
        print_error(
            "Data file '{}' does not exist. Provide a valid NLU data file using "
            "the '--data' argument.".format(data_file)
        )
        return

    if output_format == "json":
        td = training_data.load_data(data_file, language)
        output = td.nlu_as_json(indent=2)
    elif output_format == "md":
        td = training_data.load_data(data_file, language)
        output = td.nlu_as_markdown()
    else:
        print_error(
            "Did not recognize output format. Supported output formats: 'json' and "
            "'md'. Specify the desired output format with '--format'."
        )
        return

    write_to_file(out_file, output)
github RasaHQ / rasa_lookup_demo / run_lookup.py View on Github external
def train_model(td_file, config_file, model_dir):
    # trains a model using the training data and config

    td = load_data(td_file)
    trainer = Trainer(config.load(config_file))
    trainer.train(td)

    # creates model and returns the path to this model for evaluation
    model_loc = trainer.persist(model_dir)

    return model_loc