How to use the nlpaug.augmenter.word.RandomWordAug function in nlpaug

To help you get started, we’ve selected a few nlpaug examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github makcedward / nlpaug / test / augmenter / word / test_word.py View on Github external
def test_empty_input_for_swap(self):
        texts = [' ']
        aug = naw.RandomWordAug(action="swap")
        for text in texts:
            augmented_text = aug.augment(text)

            self.assertEqual('', augmented_text)

        self.assertEqual(1, len(texts))

        tokens = [None]
        aug = naw.RandomWordAug(action="swap")
        for t in tokens:
            augmented_text = aug.augment(t)
            self.assertEqual(None, augmented_text)

        self.assertEqual(len(tokens), 1)
github makcedward / nlpaug / test / augmenter / word / test_random_word.py View on Github external
def test_substitute_with_target_word(self):
        texts = [
            'The quick brown fox jumps over the lazy dog'
        ]
        target_words = ['$', '#', '^^^']
        aug = naw.RandomWordAug(action='substitute', target_words=target_words)

        for text in texts:
            augmented_text = aug.augment(text)

            replaced = False
            for w in target_words:
                if w in augmented_text:
                    replaced = True
                    break
            self.assertTrue(replaced)
            self.assertNotEqual(text, augmented_text)
github makcedward / nlpaug / test / augmenter / word / test_random_word.py View on Github external
def test_delete(self):
        texts = [
            'The quick brown fox jumps over the lazy dog'
        ]
        aug = naw.RandomWordAug()

        for text in texts:
            augmented_text = aug.augment(text)
            self.assertNotEqual(text, augmented_text)
github makcedward / nlpaug / test / flow / test_sequential.py View on Github external
def test_multiple_actions(self):
        texts = [
            'The quick brown fox jumps over the lazy dog',
            'Zology raku123456 fasdasd asd4123414 1234584'
        ]

        flows = [
            naf.Sequential([nac.RandomCharAug(action=Action.INSERT),
                            naw.RandomWordAug()]),
            naf.Sequential([nac.OcrAug(), nac.KeyboardAug(aug_char_min=1),
                            nac.RandomCharAug(action=Action.SUBSTITUTE, aug_char_min=1, aug_char_p=0.6, aug_word_p=0.6)])
        ]

        for flow in flows:
            for text in texts:
                augmented_text = flow.augment(text)

                self.assertNotEqual(text, augmented_text)
                self.assertLess(0, len(text))

            self.assertLess(0, len(texts))

        self.assertLess(0, len(flows))
github makcedward / nlpaug / test / augmenter / word / test_word.py View on Github external
def test_empty_input_for_delete(self):
        text = ' '
        # None
        augs = [
            naw.RandomWordAug(action="delete"),
            naw.RandomWordAug(action="delete", stopwords=['a', 'an', 'the'])
        ]

        for aug in augs:
            augmented_text = aug.augment(text)
            # FIXME: standardize return
            is_equal = augmented_text == '' or augmented_text == ' '
            self.assertTrue(is_equal)
github makcedward / nlpaug / test / augmenter / word / test_random_word.py View on Github external
def test_substitute_without_target_word(self):
        texts = [
            'The quick brown fox jumps over the lazy dog'
        ]
        aug = naw.RandomWordAug(action='substitute')

        for text in texts:
            augmented_text = aug.augment(text)

            self.assertIn('_', augmented_text)
            self.assertNotEqual(text, augmented_text)
github makcedward / nlpaug / test / augmenter / word / test_word.py View on Github external
def test_multi_thread(self):
        text = 'The quick brown fox jumps over the lazy dog.'
        n = 3
        augs = [
            naw.RandomWordAug(),
            naw.WordEmbsAug(model_type='word2vec',
                            model_path=os.environ["MODEL_DIR"] + 'GoogleNews-vectors-negative300.bin'),
            naw.ContextualWordEmbsAug(
                model_path='xlnet-base-cased', action="substitute",
                skip_unknown_word=True, temperature=0.7, device='cpu')
        ]

        for num_thread in [1, 3]:
            for aug in augs:
                augmented_data = aug.augment(text, n=n, num_thread=num_thread)
                self.assertEqual(len(augmented_data), n)