How to use rnnmorph - 10 common examples

To help you get started, we’ve selected a few rnnmorph examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github IlyaGusev / rnnmorph / rnnmorph / test_predictor.py View on Github external
def setUpClass(cls):
        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
        nltk.download("wordnet")
        nltk.download('averaged_perceptron_tagger')
        nltk.download('universal_tagset')
        cls.en_predictor = RNNMorphPredictor(language="en")
        cls.ru_predictor = RNNMorphPredictor(language="ru")
github IlyaGusev / rnnmorph / rnnmorph / test_predictor.py View on Github external
def setUpClass(cls):
        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
        nltk.download("wordnet")
        nltk.download('averaged_perceptron_tagger')
        nltk.download('universal_tagset')
        cls.en_predictor = RNNMorphPredictor(language="en")
        cls.ru_predictor = RNNMorphPredictor(language="ru")
github IlyaGusev / rnnmorph / rnnmorph / test_predictor.py View on Github external
def test_en_accuracy(self):
        self.assertGreater(tag_en_files(self.en_predictor).tag_accuracy, 85)
github IlyaGusev / rnnmorph / rnnmorph / data_preparation / converter.py View on Github external
with open(input_filename, "r", encoding='utf-8') as r, open(output_filename, "w", encoding='utf-8') as w:
            i = 0
            for line in r:
                if line[0] == "#" or line[0] == "=":
                    continue
                if line == "\n":
                    w.write("\n")
                    i = 0
                    continue
                records = line.split("\t")
                pos = records[3]
                if with_forth_column:
                    gram = records[5]
                else:
                    gram = records[4]
                gram = process_gram_tag(gram)
                if pos == "PUNCT" and not with_punct:
                    continue
                if add_number:
                    i += 1
                    w.write("\t".join([str(i), records[1], records[2].lower(), pos, gram]) + "\n")
                else:
                    w.write("\t".join([records[1], records[2].lower(), pos, gram]) + "\n")
github nsu-ai / russian_g2p / other_scripts / check_accentor.py View on Github external
def main():
    if sys.argv.__len__() > 1:
        init_dir_name = os.path.normpath(sys.argv[1])
        assert os.path.isdir(init_dir_name), 'Directory `{0}` does not exist!'.format(init_dir_name)
        all_prompts = sorted(list(get_all_prompts(init_dir_name)))
        accentor = Accentor()
        morpho_predictor = RNNMorphPredictor()
        i = 0
        for cur_prompt in all_prompts[:100]:
            trouble = False
            unknown_words = []
            for cur_subsentence in select_subsentences(cur_prompt):
                morphotags = ['{0} {1}'.format(cur_morpho.pos, cur_morpho.tag)
                              for cur_morpho in morpho_predictor.predict_sentence_tags(cur_subsentence)]
                accent_variants = accentor.do_accents(cur_subsentence, morphotags)
                if len(accent_variants) > 1:
                    trouble = True
                else:
                    accented_phrase = accent_variants[0]
                    for cur_word in accented_phrase:
                        vowels_counter = 0
                        for cur_char in cur_word.lower():
                            if cur_char in VOWEL_LETTERS:
github nsu-ai / russian_g2p / russian_g2p / Preprocessor.py View on Github external
def __init__(self, batch_size=1):
        self.batch_size = batch_size
        self.predictor = RNNMorphPredictor(language="ru")
github IlyaGusev / rnnmorph / rnnmorph / tag_genres.py View on Github external
def tag_en_files(predictor: RNNMorphPredictor):
    if not os.path.exists(TEST_TAGGED_FOLDER):
        os.makedirs(TEST_TAGGED_FOLDER)
    tag(predictor, TEST_GOLD_EN_EWT_UD, TEST_TAGGED_EN_EWT_UD)
    return measure(TEST_GOLD_EN_EWT_UD, TEST_TAGGED_EN_EWT_UD, True, None)
github IlyaGusev / rnnmorph / rnnmorph / tag_genres.py View on Github external
def tag_ru_files(predictor: RNNMorphPredictor) -> Dict:
    if not os.path.exists(TEST_TAGGED_FOLDER):
        os.makedirs(TEST_TAGGED_FOLDER)
    tag(predictor, TEST_UNTAGGED_LENTA, TEST_TAGGED_LENTA)
    tag(predictor, TEST_UNTAGGED_VK, TEST_TAGGED_VK)
    tag(predictor, TEST_UNTAGGED_JZ, TEST_TAGGED_JZ)

    quality = dict()
    print("Lenta:")
    quality['Lenta'] = measure(TEST_GOLD_LENTA, TEST_TAGGED_LENTA, True, None)
    print("VK:")
    quality['VK'] = measure(TEST_GOLD_VK, TEST_TAGGED_VK, True, None)
    print("JZ:")
    quality['JZ'] = measure(TEST_GOLD_JZ, TEST_TAGGED_JZ, True, None)
    print("All:")
    count_correct_tags = quality['Lenta'].correct_tags + quality['VK'].correct_tags + quality['JZ'].correct_tags
    count_correct_pos = quality['Lenta'].correct_pos + quality['VK'].correct_pos + quality['JZ'].correct_pos
    count_tags = quality['Lenta'].total_tags + quality['VK'].total_tags + quality['JZ'].total_tags
    count_correct_sentences = quality['Lenta'].correct_sentences + quality['VK'].correct_sentences + \
                              quality['JZ'].correct_sentences
    count_sentences = quality['Lenta'].total_sentences + quality['VK'].total_sentences + \
                      quality['JZ'].total_sentences
    quality['All'] = dict()
    quality['All']['tag_accuracy'] = float(count_correct_tags) / count_tags
    quality['All']['pos_accuracy'] = float(count_correct_pos) / count_tags
    quality['All']['sentence_accuracy'] = float(count_correct_sentences) / count_sentences
    return quality
github IlyaGusev / rnnmorph / rnnmorph / tag_genres.py View on Github external
def tag_ru_files(predictor: RNNMorphPredictor) -> Dict:
    if not os.path.exists(TEST_TAGGED_FOLDER):
        os.makedirs(TEST_TAGGED_FOLDER)
    tag(predictor, TEST_UNTAGGED_LENTA, TEST_TAGGED_LENTA)
    tag(predictor, TEST_UNTAGGED_VK, TEST_TAGGED_VK)
    tag(predictor, TEST_UNTAGGED_JZ, TEST_TAGGED_JZ)

    quality = dict()
    print("Lenta:")
    quality['Lenta'] = measure(TEST_GOLD_LENTA, TEST_TAGGED_LENTA, True, None)
    print("VK:")
    quality['VK'] = measure(TEST_GOLD_VK, TEST_TAGGED_VK, True, None)
    print("JZ:")
    quality['JZ'] = measure(TEST_GOLD_JZ, TEST_TAGGED_JZ, True, None)
    print("All:")
    count_correct_tags = quality['Lenta'].correct_tags + quality['VK'].correct_tags + quality['JZ'].correct_tags
    count_correct_pos = quality['Lenta'].correct_pos + quality['VK'].correct_pos + quality['JZ'].correct_pos
    count_tags = quality['Lenta'].total_tags + quality['VK'].total_tags + quality['JZ'].total_tags
    count_correct_sentences = quality['Lenta'].correct_sentences + quality['VK'].correct_sentences + \
                              quality['JZ'].correct_sentences
    count_sentences = quality['Lenta'].total_sentences + quality['VK'].total_sentences + \
                      quality['JZ'].total_sentences
    quality['All'] = dict()
    quality['All']['tag_accuracy'] = float(count_correct_tags) / count_tags
    quality['All']['pos_accuracy'] = float(count_correct_pos) / count_tags
    quality['All']['sentence_accuracy'] = float(count_correct_sentences) / count_sentences
github IlyaGusev / rnnmorph / rnnmorph / tag_genres.py View on Github external
def tag_ru_files(predictor: RNNMorphPredictor) -> Dict:
    if not os.path.exists(TEST_TAGGED_FOLDER):
        os.makedirs(TEST_TAGGED_FOLDER)
    tag(predictor, TEST_UNTAGGED_LENTA, TEST_TAGGED_LENTA)
    tag(predictor, TEST_UNTAGGED_VK, TEST_TAGGED_VK)
    tag(predictor, TEST_UNTAGGED_JZ, TEST_TAGGED_JZ)

    quality = dict()
    print("Lenta:")
    quality['Lenta'] = measure(TEST_GOLD_LENTA, TEST_TAGGED_LENTA, True, None)
    print("VK:")
    quality['VK'] = measure(TEST_GOLD_VK, TEST_TAGGED_VK, True, None)
    print("JZ:")
    quality['JZ'] = measure(TEST_GOLD_JZ, TEST_TAGGED_JZ, True, None)
    print("All:")
    count_correct_tags = quality['Lenta'].correct_tags + quality['VK'].correct_tags + quality['JZ'].correct_tags
    count_correct_pos = quality['Lenta'].correct_pos + quality['VK'].correct_pos + quality['JZ'].correct_pos
    count_tags = quality['Lenta'].total_tags + quality['VK'].total_tags + quality['JZ'].total_tags
    count_correct_sentences = quality['Lenta'].correct_sentences + quality['VK'].correct_sentences + \
                              quality['JZ'].correct_sentences
    count_sentences = quality['Lenta'].total_sentences + quality['VK'].total_sentences + \
                      quality['JZ'].total_sentences
    quality['All'] = dict()
    quality['All']['tag_accuracy'] = float(count_correct_tags) / count_tags
    quality['All']['pos_accuracy'] = float(count_correct_pos) / count_tags
    quality['All']['sentence_accuracy'] = float(count_correct_sentences) / count_sentences
    return quality