Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def insert(self, aug):
self.assertLess(0, len(self.text))
augmented_text = aug.augment(self.text)
self.assertLess(len(self.text.split(' ')), len(augmented_text.split(' ')))
self.assertNotEqual(self.text, augmented_text)
self.assertTrue(nml.Bert.SUBWORD_PREFIX not in augmented_text)
def init_context_word_embs_sentence_model(model_path, device, force_reload=False, temperature=1.0, top_k=None,
top_p=None, optimize=None):
global CONTEXT_WORD_EMBS_SENTENCE_MODELS
model_name = os.path.basename(model_path)
if model_name in CONTEXT_WORD_EMBS_SENTENCE_MODELS and not force_reload:
CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name].temperature = temperature
CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name].top_k = top_k
CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name].top_p = top_p
return CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name]
if 'xlnet' in model_path:
model = nml.XlNet(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p,
optimize=optimize)
elif 'gpt2' in model_path:
model = nml.Gpt2(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p,
optimize=optimize)
else:
raise ValueError('Model name value is unexpected. Only support XLNet and GPT2 model.')
CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name] = model
return model
def insert(self, data):
tokens = self.tokenizer(data)
results = tokens.copy()
aug_idxes = self._get_random_aug_idxes(tokens)
aug_idxes.sort(reverse=True)
for aug_idx in aug_idxes:
results.insert(aug_idx, nml.BertDeprecated.MASK)
predict_results = self.model.predict(results, nml.BertDeprecated.MASK, self.aug_n)
# Temp fix or sampling issue
if len(predict_results) > 0:
new_word = self.sample(predict_results, 1)[0]
results[aug_idx] = new_word
return self.reverse_tokenizer(results)
def init_context_word_embs_sentence_model(model_path, device, force_reload=False, temperature=1.0, top_k=None,
top_p=None, optimize=None):
global CONTEXT_WORD_EMBS_SENTENCE_MODELS
model_name = os.path.basename(model_path)
if model_name in CONTEXT_WORD_EMBS_SENTENCE_MODELS and not force_reload:
CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name].temperature = temperature
CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name].top_k = top_k
CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name].top_p = top_p
return CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name]
if 'xlnet' in model_path:
model = nml.XlNet(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p,
optimize=optimize)
elif 'gpt2' in model_path:
model = nml.Gpt2(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p,
optimize=optimize)
else:
raise ValueError('Model name value is unexpected. Only support XLNet and GPT2 model.')
CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name] = model
return model
def skip_aug(self, token_idxes, tokens):
results = []
for token_idx in token_idxes:
# Some token is not a partial word.
word = tokens[token_idx]
if word[:2] != nml.Bert.SUBWORD_PREFIX:
results.append(token_idx)
return results
model_name = os.path.basename(model_path)
if model_name in CONTEXT_WORD_EMBS_MODELS and not force_reload:
CONTEXT_WORD_EMBS_MODELS[model_name].temperature = temperature
CONTEXT_WORD_EMBS_MODELS[model_name].top_k = top_k
CONTEXT_WORD_EMBS_MODELS[model_name].top_p = top_p
return CONTEXT_WORD_EMBS_MODELS[model_name]
if 'distilbert' in model_path:
model = nml.DistilBert(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p)
elif 'roberta' in model_path:
model = nml.Roberta(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p)
elif 'bert' in model_path:
model = nml.Bert(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p)
elif 'xlnet' in model_path:
model = nml.XlNet(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p, optimize=optimize)
else:
raise ValueError('Model name value is unexpected. Only support BERT, DistilBERT, RoBERTa and XLNet model.')
CONTEXT_WORD_EMBS_MODELS[model_name] = model
return model
optimize=None):
global CONTEXT_WORD_EMBS_MODELS
model_name = os.path.basename(model_path)
if model_name in CONTEXT_WORD_EMBS_MODELS and not force_reload:
CONTEXT_WORD_EMBS_MODELS[model_name].temperature = temperature
CONTEXT_WORD_EMBS_MODELS[model_name].top_k = top_k
CONTEXT_WORD_EMBS_MODELS[model_name].top_p = top_p
return CONTEXT_WORD_EMBS_MODELS[model_name]
if 'distilbert' in model_path:
model = nml.DistilBert(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p)
elif 'roberta' in model_path:
model = nml.Roberta(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p)
elif 'bert' in model_path:
model = nml.Bert(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p)
elif 'xlnet' in model_path:
model = nml.XlNet(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p, optimize=optimize)
else:
raise ValueError('Model name value is unexpected. Only support BERT, DistilBERT, RoBERTa and XLNet model.')
CONTEXT_WORD_EMBS_MODELS[model_name] = model
return model
def init_context_word_embs_model(model_path, device, force_reload=False, temperature=1.0, top_k=None, top_p=None,
optimize=None):
global CONTEXT_WORD_EMBS_MODELS
model_name = os.path.basename(model_path)
if model_name in CONTEXT_WORD_EMBS_MODELS and not force_reload:
CONTEXT_WORD_EMBS_MODELS[model_name].temperature = temperature
CONTEXT_WORD_EMBS_MODELS[model_name].top_k = top_k
CONTEXT_WORD_EMBS_MODELS[model_name].top_p = top_p
return CONTEXT_WORD_EMBS_MODELS[model_name]
if 'distilbert' in model_path:
model = nml.DistilBert(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p)
elif 'roberta' in model_path:
model = nml.Roberta(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p)
elif 'bert' in model_path:
model = nml.Bert(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p)
elif 'xlnet' in model_path:
model = nml.XlNet(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p, optimize=optimize)
else:
raise ValueError('Model name value is unexpected. Only support BERT, DistilBERT, RoBERTa and XLNet model.')
CONTEXT_WORD_EMBS_MODELS[model_name] = model
return model