How to use the nlpaug.util.Method function in nlpaug

To help you get started, we’ve selected a few nlpaug examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github makcedward / nlpaug / nlpaug / base_augmenter.py View on Github external
def _validate_augmenter(cls, method, action):
        if method not in Method.getall():
            raise ValueError(
                'Method must be one of {} while {} is passed'.format(Method.getall(), method))

        if action not in Action.getall():
            raise ValueError(
                'Action must be one of {} while {} is passed'.format(Action.getall(), action))
github makcedward / nlpaug / nlpaug / augmenter / char / keyboard.py View on Github external
def substitute(self, data):
        results = []
        tokens = self.tokenizer(data)
        aug_word_idxes = self._get_aug_idxes(tokens, self.aug_word_min, self.aug_word_max, self.aug_word_p, Method.WORD)

        for token_i, token in enumerate(tokens):
            if token_i not in aug_word_idxes:
                results.append(token)
                continue

            result = ''
            chars = self.token2char(token)
            aug_char_idxes = self._get_aug_idxes(chars, self.aug_char_min, self.aug_char_max, self.aug_char_p,
                                                 Method.CHAR)
            if aug_char_idxes is None:
                results.append(token)
                continue

            for char_i, char in enumerate(chars):
                if char_i not in aug_char_idxes:
github makcedward / nlpaug / nlpaug / augmenter / audio / audio_augmenter.py View on Github external
def __init__(self, action, name='Audio_Aug', device='cpu', verbose=0):
        super(AudioAugmenter, self).__init__(
            name=name, method=Method.AUDIO, action=action, aug_min=None, aug_max=None, device=device, verbose=verbose)
github makcedward / nlpaug / nlpaug / base_augmenter.py View on Github external
def _validate_augmenter(cls, method, action):
        if method not in Method.getall():
            raise ValueError(
                'Method must be one of {} while {} is passed'.format(Method.getall(), method))

        if action not in Action.getall():
            raise ValueError(
                'Action must be one of {} while {} is passed'.format(Action.getall(), action))
github makcedward / nlpaug / nlpaug / augmenter / word / word_augmenter.py View on Github external
def __init__(self, action, name='Word_Aug', aug_min=1, aug_max=10, aug_p=0.3, stopwords=None,
                 tokenizer=None, reverse_tokenizer=None, device='cpu', verbose=0):
        super().__init__(
            name=name, method=Method.WORD, action=action, aug_min=aug_min, aug_max=aug_max, device=device,
            verbose=verbose)
        self.aug_p = aug_p
        self.tokenizer = tokenizer or self._tokenizer
        self.reverse_tokenizer = reverse_tokenizer or self._reverse_tokenizer
        self.stopwords = stopwords
github makcedward / nlpaug / nlpaug / augmenter / char / random.py View on Github external
def insert(self, data):
        results = []
        tokens = self.tokenizer(data)
        aug_word_idxes = self._get_aug_idxes(tokens, self.aug_word_min, self.aug_word_max, self.aug_word_p, Method.WORD)
        if aug_word_idxes is None:
            return data

        for token_i, token in enumerate(tokens):
            if token_i not in aug_word_idxes:
                results.append(token)
                continue

            chars = self.token2char(token)
            aug_char_idxes = self._get_aug_idxes(chars, self.aug_char_min, self.aug_char_max, self.aug_char_p,
                                                 Method.CHAR)
            if aug_char_idxes is None:
                results.append(token)
                continue

            aug_char_idxes.sort(reverse=True)
github makcedward / nlpaug / nlpaug / augmenter / spectrogram / spectrogram_augmenter.py View on Github external
def __init__(self, action, name='Spectrogram_Aug', device='cpu', verbose=0):
        super(SpectrogramAugmenter, self).__init__(
            name=name, method=Method.SPECTROGRAM, action=action, aug_min=None, aug_max=None, device=device,
            verbose=verbose)
github makcedward / nlpaug / nlpaug / augmenter / char / char_augmenter.py View on Github external
def _get_aug_idxes(self, tokens, aug_min, aug_max, aug_p, mode):
        if mode == Method.CHAR:
            # If word is too short, do not augment it.
            if len(tokens) < self.min_char:
                return None

        aug_cnt = self._generate_aug_cnt(len(tokens), aug_min, aug_max, aug_p)
        idxes = [i for i, t in enumerate(tokens)]
        if mode == Method.WORD:
            # skip stopwords
            idxes = [i for i in idxes if self.stopwords is None or tokens[i] not in self.stopwords]
            # skip short word
            idxes = [i for i in idxes if len(tokens[i]) >= self.min_char]

        elif mode == Method.CHAR:
            idxes = self.skip_aug(idxes, tokens)

        if len(idxes) == 0:
            if self.verbose > 0:
                exception = WarningException(name=WarningName.OUT_OF_VOCABULARY,
                                             code=WarningCode.WARNING_CODE_002, msg=WarningMessage.NO_WORD)
                exception.output()
            return None
        if len(idxes) < aug_cnt:
            aug_cnt = len(idxes)
github makcedward / nlpaug / nlpaug / augmenter / char / ocr.py View on Github external
def substitute(self, data):
        results = []
        tokens = self.tokenizer(data)
        aug_word_idxes = self._get_aug_idxes(tokens, self.aug_word_min, self.aug_word_max, self.aug_word_p, Method.WORD)

        for token_i, token in enumerate(tokens):
            if token_i not in aug_word_idxes:
                results.append(token)
                continue

            result = ''
            chars = self.token2char(token)
            aug_char_idxes = self._get_aug_idxes(chars, self.aug_char_min, self.aug_char_max, self.aug_char_p,
                                                 Method.CHAR)
            if aug_char_idxes is None:
                results.append(token)
                continue

            for char_i, char in enumerate(chars):
                if char_i not in aug_char_idxes:
github makcedward / nlpaug / nlpaug / augmenter / char / qwerty.py View on Github external
def substitute(self, data):
        results = []
        tokens = self.tokenizer(data)
        aug_word_idxes = self._get_aug_idxes(tokens, self.aug_word_p, Method.WORD)

        for token_i, token in enumerate(tokens):
            if token_i not in aug_word_idxes:
                results.append(token)
                continue

            result = ''
            chars = self.token2char(token)
            aug_char_idxes = self._get_aug_idxes(chars, self.aug_char_p, Method.CHAR)
            if aug_char_idxes is None:
                results.append(token)
                continue

            for char_i, char in enumerate(chars):
                if char_i not in aug_char_idxes:
                    result += char