Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _validate_augmenter(cls, method, action):
if method not in Method.getall():
raise ValueError(
'Method must be one of {} while {} is passed'.format(Method.getall(), method))
if action not in Action.getall():
raise ValueError(
'Action must be one of {} while {} is passed'.format(Action.getall(), action))
def substitute(self, data):
results = []
tokens = self.tokenizer(data)
aug_word_idxes = self._get_aug_idxes(tokens, self.aug_word_min, self.aug_word_max, self.aug_word_p, Method.WORD)
for token_i, token in enumerate(tokens):
if token_i not in aug_word_idxes:
results.append(token)
continue
result = ''
chars = self.token2char(token)
aug_char_idxes = self._get_aug_idxes(chars, self.aug_char_min, self.aug_char_max, self.aug_char_p,
Method.CHAR)
if aug_char_idxes is None:
results.append(token)
continue
for char_i, char in enumerate(chars):
if char_i not in aug_char_idxes:
def __init__(self, action, name='Audio_Aug', device='cpu', verbose=0):
super(AudioAugmenter, self).__init__(
name=name, method=Method.AUDIO, action=action, aug_min=None, aug_max=None, device=device, verbose=verbose)
def _validate_augmenter(cls, method, action):
if method not in Method.getall():
raise ValueError(
'Method must be one of {} while {} is passed'.format(Method.getall(), method))
if action not in Action.getall():
raise ValueError(
'Action must be one of {} while {} is passed'.format(Action.getall(), action))
def __init__(self, action, name='Word_Aug', aug_min=1, aug_max=10, aug_p=0.3, stopwords=None,
tokenizer=None, reverse_tokenizer=None, device='cpu', verbose=0):
super().__init__(
name=name, method=Method.WORD, action=action, aug_min=aug_min, aug_max=aug_max, device=device,
verbose=verbose)
self.aug_p = aug_p
self.tokenizer = tokenizer or self._tokenizer
self.reverse_tokenizer = reverse_tokenizer or self._reverse_tokenizer
self.stopwords = stopwords
def insert(self, data):
results = []
tokens = self.tokenizer(data)
aug_word_idxes = self._get_aug_idxes(tokens, self.aug_word_min, self.aug_word_max, self.aug_word_p, Method.WORD)
if aug_word_idxes is None:
return data
for token_i, token in enumerate(tokens):
if token_i not in aug_word_idxes:
results.append(token)
continue
chars = self.token2char(token)
aug_char_idxes = self._get_aug_idxes(chars, self.aug_char_min, self.aug_char_max, self.aug_char_p,
Method.CHAR)
if aug_char_idxes is None:
results.append(token)
continue
aug_char_idxes.sort(reverse=True)
def __init__(self, action, name='Spectrogram_Aug', device='cpu', verbose=0):
super(SpectrogramAugmenter, self).__init__(
name=name, method=Method.SPECTROGRAM, action=action, aug_min=None, aug_max=None, device=device,
verbose=verbose)
def _get_aug_idxes(self, tokens, aug_min, aug_max, aug_p, mode):
if mode == Method.CHAR:
# If word is too short, do not augment it.
if len(tokens) < self.min_char:
return None
aug_cnt = self._generate_aug_cnt(len(tokens), aug_min, aug_max, aug_p)
idxes = [i for i, t in enumerate(tokens)]
if mode == Method.WORD:
# skip stopwords
idxes = [i for i in idxes if self.stopwords is None or tokens[i] not in self.stopwords]
# skip short word
idxes = [i for i in idxes if len(tokens[i]) >= self.min_char]
elif mode == Method.CHAR:
idxes = self.skip_aug(idxes, tokens)
if len(idxes) == 0:
if self.verbose > 0:
exception = WarningException(name=WarningName.OUT_OF_VOCABULARY,
code=WarningCode.WARNING_CODE_002, msg=WarningMessage.NO_WORD)
exception.output()
return None
if len(idxes) < aug_cnt:
aug_cnt = len(idxes)
def substitute(self, data):
results = []
tokens = self.tokenizer(data)
aug_word_idxes = self._get_aug_idxes(tokens, self.aug_word_min, self.aug_word_max, self.aug_word_p, Method.WORD)
for token_i, token in enumerate(tokens):
if token_i not in aug_word_idxes:
results.append(token)
continue
result = ''
chars = self.token2char(token)
aug_char_idxes = self._get_aug_idxes(chars, self.aug_char_min, self.aug_char_max, self.aug_char_p,
Method.CHAR)
if aug_char_idxes is None:
results.append(token)
continue
for char_i, char in enumerate(chars):
if char_i not in aug_char_idxes:
def substitute(self, data):
results = []
tokens = self.tokenizer(data)
aug_word_idxes = self._get_aug_idxes(tokens, self.aug_word_p, Method.WORD)
for token_i, token in enumerate(tokens):
if token_i not in aug_word_idxes:
results.append(token)
continue
result = ''
chars = self.token2char(token)
aug_char_idxes = self._get_aug_idxes(chars, self.aug_char_p, Method.CHAR)
if aug_char_idxes is None:
results.append(token)
continue
for char_i, char in enumerate(chars):
if char_i not in aug_char_idxes:
result += char