How to use underthesea - 10 common examples

To help you get started, we’ve selected a few underthesea examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github undertheseanlp / underthesea / tests / ner / test_ner.py View on Github external
def load_input(input_file):
    lines = read(input_file).strip().split("\n")
    content = [line.split("\t")[0] for line in lines]
    content = u" ".join(content)
    return content
github undertheseanlp / underthesea / tests / ner / test_ner.py View on Github external
def load_output(input_file):
    lines = [text.split("\t") for text in read(input_file).strip().split("\n")]
    output = [tuple(item) for item in lines]
    return output
github undertheseanlp / underthesea / tests / word_sent / test_performance_2.py View on Github external
def test_1(self):
        n_tokens = len(tokenize(self.text).split(" "))
        start = time.time()
        word_sent(self.text)
        end = time.time()
        duration = end - start  # in seconds
        if duration != 0:
            speed = n_tokens / duration
            print("Speed: ", speed)
            self.assertGreater(speed, EXPECTED_SPEED)
github undertheseanlp / underthesea / tests / word_sent / test_performance.py View on Github external
def test_1(self):
        n_tokens = 0
        for text in self.texts:
            n_tokens += len(tokenize(text).split(" "))
        start = time.time()
        for text in self.texts:
            word_sent(text)
        end = time.time()
        duration = end - start  # in seconds
        speed = n_tokens / duration
        print("Speed: ", speed)
        self.assertGreater(speed, EXPECTED_SPEED)
github undertheseanlp / underthesea / tests / test_corpus / test_plaintext.py View on Github external
def test_save(self):
        corpus = PlainTextCorpus()
        corpus.load(self.plaintext_folder)
        corpus.save(self.saved_plaintext_folder)
        files = listdir(self.saved_plaintext_folder)
        self.assertEqual(4, len(files))
        try:
            shutil.rmtree(self.saved_plaintext_folder)
        except Exception:
            pass
github undertheseanlp / underthesea / tests / test_corpus / test_plaintext.py View on Github external
def test___init__(self):
        corpus = PlainTextCorpus()
        self.assertIsNone(corpus.documents)
github undertheseanlp / underthesea / tests / feature_engineering / test_text.py View on Github external
def test_text_1(self):
        input = u"đi học"
        output = Text(input)
        self.assertTrue(is_unicode(output))
github undertheseanlp / underthesea / tests / ner / test_ner.py View on Github external
def save_temp(id, output):
    temp_file = join(samples_dir, "%s.actual" % id)
    content = u"\n".join([u"\t".join(item) for item in output])
    write(temp_file, content)
github undertheseanlp / underthesea / tests / word_tokenize / test_word_tokenize.py View on Github external
def test_special_cases_2(self):
        sentence = u"="
        actual = word_tokenize(sentence)
        expected = ["="]
        self.assertEqual(actual, expected)
github undertheseanlp / underthesea / tests / word_tokenize / test_word_tokenize.py View on Github external
def test_special_cases_3(self):
        sentence = u"=))"
        actual = word_tokenize(sentence)
        expected = ["=))"]
        self.assertEqual(actual, expected)