How to use the nlpaug.util.Action.INSERT function in nlpaug

To help you get started, we’ve selected a few nlpaug examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github makcedward / nlpaug / test / augmenter / word / test_tfidf.py View on Github external
def test_insert(self):
        texts = [
            'The quick brown fox jumps over the lazy dog'
        ]

        aug = naw.TfIdfAug(model_path=os.environ.get("MODEL_DIR"), action=Action.INSERT)

        for text in texts:
            self.assertLess(0, len(text))
            augmented_text = aug.augment(text)

            self.assertLess(len(text.split(' ')), len(augmented_text.split(' ')))
            self.assertNotEqual(text, augmented_text)

        self.assertLess(0, len(texts))
github makcedward / nlpaug / test / augmenter / word / test_bert.py View on Github external
def test_insert(self):
        texts = [
            'The quick brown fox jumps over the lazy dog'
        ]

        aug = naw.BertAug(action=Action.INSERT)

        for text in texts:
            self.assertLess(0, len(text))
            augmented_text = aug.augment(text)

            self.assertLess(len(text.split(' ')), len(augmented_text.split(' ')))
            self.assertNotEqual(text, augmented_text)
            self.assertTrue(nml.Bert.SUBWORD_PREFIX not in augmented_text)

        self.assertLess(0, len(texts))
github makcedward / nlpaug / test / augmenter / word / test_wordembs.py View on Github external
def setUpClass(cls):
        env_config_path = os.path.abspath(os.path.join(
            os.path.dirname(__file__), '..', '..', '..', '.env'))
        load_dotenv(env_config_path)

        cls.insert_augmenters = [
            naw.Word2vecAug(
                model_path=os.environ.get("MODEL_DIR") + 'GoogleNews-vectors-negative300.bin',
                action=Action.INSERT),
            naw.FasttextAug(
                model_path=os.environ.get("MODEL_DIR") + 'wiki-news-300d-1M.vec',
                action=Action.INSERT),
            naw.GloVeAug(
                model_path=os.environ.get("MODEL_DIR") + 'glove.6B.50d.txt',
                action=Action.INSERT)
        ]

        cls.substitute_augmenters = [
            naw.Word2vecAug(
                model_path=os.environ.get("MODEL_DIR") + 'GoogleNews-vectors-negative300.bin',
                action=Action.SUBSTITUTE),
            naw.FasttextAug(
                model_path=os.environ.get("MODEL_DIR") + 'wiki-news-300d-1M.vec',
                action=Action.SUBSTITUTE),
            naw.GloVeAug(
github makcedward / nlpaug / test / augmenter / word / test_fasttext.py View on Github external
def test_insert(self):
        texts = [
            'The quick brown fox jumps over the lazy dog'
        ]

        aug = naw.FasttextAug(
            model_path=os.environ.get("MODEL_DIR")+'wiki-news-300d-1M.vec',
            action=Action.INSERT)

        for text in texts:
            tokens = aug.tokenizer(text)
            results = aug.augment(text)

            self.assertLess(len(tokens), len(results))
            self.assertLess(0, len(tokens))

        self.assertLess(0, len(texts))
github makcedward / nlpaug / test / flow / test_sequential.py View on Github external
def test_multiple_actions(self):
        texts = [
            'The quick brown fox jumps over the lazy dog',
            'Zology raku123456 fasdasd asd4123414 1234584'
        ]

        flows = [
            naf.Sequential([nac.RandomCharAug(action=Action.INSERT),
                            naw.RandomWordAug()]),
            naf.Sequential([nac.OcrAug(), nac.KeyboardAug(aug_char_min=1),
                            nac.RandomCharAug(action=Action.SUBSTITUTE, aug_char_min=1, aug_char_p=0.6, aug_word_p=0.6)])
        ]

        for flow in flows:
            for text in texts:
                augmented_text = flow.augment(text)

                self.assertNotEqual(text, augmented_text)
                self.assertLess(0, len(text))

            self.assertLess(0, len(texts))

        self.assertLess(0, len(flows))
github makcedward / nlpaug / test / flow / test_sometimes.py View on Github external
def test_single_action(self):
        texts = [
            'The quick brown fox jumps over the lazy dog',
            'Zology raku123456 fasdasd asd4123414 1234584 s@#'
        ]

        # Since prob may be low and causing do not perform data augmentation. Retry 5 times
        at_least_one_not_equal = False
        for _ in range(0, 5):
            flow = naf.Sometimes([nac.RandomCharAug(action=Action.INSERT)], pipeline_p=0.6)
            for text in texts:
                augmented_text = flow.augment(text)

                if text != augmented_text:
                    at_least_one_not_equal = True

                self.assertLess(0, len(text))

            if at_least_one_not_equal:
                break

        self.assertTrue(at_least_one_not_equal)
        self.assertLess(0, len(texts))
github makcedward / nlpaug / test / flow / test_sequential.py View on Github external
def test_single_action(self):
        texts = [
            'The quick brown fox jumps over the lazy dog',
            'Zology raku123456 fasdasd asd4123414 1234584 s@#'
        ]

        flow = naf.Sequential([nac.RandomCharAug(action=Action.INSERT, min_char=1)])

        for text in texts:
            augmented_text = flow.augment(text)

            self.assertNotEqual(text, augmented_text)
            self.assertLess(0, len(text))

        self.assertLess(0, len(texts))
github makcedward / nlpaug / test / augmenter / char / test_random_char.py View on Github external
def test_insert_multi_words(self):
        texts = ['The quick brown fox jumps over the lazy dog']
        aug = RandomCharAug(action=Action.INSERT, min_char=1)
        for text in texts:
            augmented_cnt = 0
            augmented_text = aug.augment(text)

            tokens = aug.tokenizer(text)
            augmented_tokens = aug.tokenizer(augmented_text)

            for token, augmented_token in zip(tokens, augmented_tokens):
                if token != augmented_token:
                    augmented_cnt += 1

            self.assertLess(augmented_cnt, len(tokens))
            self.assertNotEqual(text, augmented_text)
            self.assertLess(len(text), len(augmented_text))

        self.assertTrue(len(texts) > 0)
github makcedward / nlpaug / test / augmenter / word / test_glove.py View on Github external
def test_insert(self):
        texts = [
            'The quick brown fox jumps over the lazy dog'
        ]

        aug = naw.GloVeAug(
            model_path=os.environ.get("MODEL_DIR")+'glove.6B.50d.txt',
            action=Action.INSERT)

        for text in texts:
            tokens = aug.tokenizer(text)
            results = aug.augment(text)

            self.assertLess(len(tokens), len(results))
            self.assertLess(0, len(tokens))

        self.assertLess(0, len(texts))
github makcedward / nlpaug / nlpaug / base_augmenter.py View on Github external
exception.output()

                # Return empty value per data type
                if isinstance(data, str):
                    return ''
                elif isinstance(data, list):
                    return []
                elif isinstance(data, np.ndarray):
                    return np.array([])

                return None

        results = []
        action_fx = None
        clean_data = self.clean(data)
        if self.action == Action.INSERT:
            action_fx = self.insert
        elif self.action == Action.SUBSTITUTE:
            action_fx = self.substitute
        elif self.action == Action.SWAP:
            action_fx = self.swap
        elif self.action == Action.DELETE:
            action_fx = self.delete
        elif self.action == Action.SPLIT:
            action_fx = self.split

        for _ in range(max_retry_times+1):
            augmented_results = []
            if num_thread == 1 or self.device == 'cuda':
                # TODO: support multiprocessing for GPU
                # https://discuss.pytorch.org/t/using-cuda-multiprocessing-with-single-gpu/7300
                augmented_results = [action_fx(clean_data) for _ in range(n)]