How to use the nlpaug.model.lang_models function in nlpaug

To help you get started, we’ve selected a few nlpaug examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github makcedward / nlpaug / test / augmenter / word / test_context_word_embs.py View on Github external
def insert(self, aug):
        self.assertLess(0, len(self.text))
        augmented_text = aug.augment(self.text)

        self.assertLess(len(self.text.split(' ')), len(augmented_text.split(' ')))
        self.assertNotEqual(self.text, augmented_text)
        self.assertTrue(nml.Bert.SUBWORD_PREFIX not in augmented_text)
github makcedward / nlpaug / nlpaug / augmenter / sentence / context_word_embs_sentence.py View on Github external
def init_context_word_embs_sentence_model(model_path, device, force_reload=False, temperature=1.0, top_k=None,
                                          top_p=None, optimize=None):
    global CONTEXT_WORD_EMBS_SENTENCE_MODELS

    model_name = os.path.basename(model_path)
    if model_name in CONTEXT_WORD_EMBS_SENTENCE_MODELS and not force_reload:
        CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name].temperature = temperature
        CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name].top_k = top_k
        CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name].top_p = top_p
        return CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name]

    if 'xlnet' in model_path:
        model = nml.XlNet(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p,
                          optimize=optimize)
    elif 'gpt2' in model_path:
        model = nml.Gpt2(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p,
                         optimize=optimize)
    else:
        raise ValueError('Model name value is unexpected. Only support XLNet and GPT2 model.')

    CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name] = model
    return model
github makcedward / nlpaug / nlpaug / augmenter / word / bert.py View on Github external
def insert(self, data):
        tokens = self.tokenizer(data)
        results = tokens.copy()

        aug_idxes = self._get_random_aug_idxes(tokens)
        aug_idxes.sort(reverse=True)

        for aug_idx in aug_idxes:
            results.insert(aug_idx, nml.BertDeprecated.MASK)
            predict_results = self.model.predict(results, nml.BertDeprecated.MASK, self.aug_n)
            # Temp fix or sampling issue
            if len(predict_results) > 0:
                new_word = self.sample(predict_results, 1)[0]
                results[aug_idx] = new_word

        return self.reverse_tokenizer(results)
github makcedward / nlpaug / nlpaug / augmenter / sentence / context_word_embs_sentence.py View on Github external
def init_context_word_embs_sentence_model(model_path, device, force_reload=False, temperature=1.0, top_k=None,
                                          top_p=None, optimize=None):
    global CONTEXT_WORD_EMBS_SENTENCE_MODELS

    model_name = os.path.basename(model_path)
    if model_name in CONTEXT_WORD_EMBS_SENTENCE_MODELS and not force_reload:
        CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name].temperature = temperature
        CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name].top_k = top_k
        CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name].top_p = top_p
        return CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name]

    if 'xlnet' in model_path:
        model = nml.XlNet(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p,
                          optimize=optimize)
    elif 'gpt2' in model_path:
        model = nml.Gpt2(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p,
                         optimize=optimize)
    else:
        raise ValueError('Model name value is unexpected. Only support XLNet and GPT2 model.')

    CONTEXT_WORD_EMBS_SENTENCE_MODELS[model_name] = model
    return model
github makcedward / nlpaug / nlpaug / augmenter / word / bert.py View on Github external
def skip_aug(self, token_idxes, tokens):
        results = []
        for token_idx in token_idxes:
            # Some token is not a partial word.
            word = tokens[token_idx]

            if word[:2] != nml.Bert.SUBWORD_PREFIX:
                results.append(token_idx)

        return results
github makcedward / nlpaug / nlpaug / augmenter / word / context_word_embs.py View on Github external
model_name = os.path.basename(model_path)
    if model_name in CONTEXT_WORD_EMBS_MODELS and not force_reload:
        CONTEXT_WORD_EMBS_MODELS[model_name].temperature = temperature
        CONTEXT_WORD_EMBS_MODELS[model_name].top_k = top_k
        CONTEXT_WORD_EMBS_MODELS[model_name].top_p = top_p
        return CONTEXT_WORD_EMBS_MODELS[model_name]

    if 'distilbert' in model_path:
        model = nml.DistilBert(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p)
    elif 'roberta' in model_path:
        model = nml.Roberta(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p)
    elif 'bert' in model_path:
        model = nml.Bert(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p)
    elif 'xlnet' in model_path:
        model = nml.XlNet(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p, optimize=optimize)
    else:
        raise ValueError('Model name value is unexpected. Only support BERT, DistilBERT, RoBERTa and XLNet model.')

    CONTEXT_WORD_EMBS_MODELS[model_name] = model
    return model
github makcedward / nlpaug / nlpaug / augmenter / word / context_word_embs.py View on Github external
optimize=None):
    global CONTEXT_WORD_EMBS_MODELS

    model_name = os.path.basename(model_path)
    if model_name in CONTEXT_WORD_EMBS_MODELS and not force_reload:
        CONTEXT_WORD_EMBS_MODELS[model_name].temperature = temperature
        CONTEXT_WORD_EMBS_MODELS[model_name].top_k = top_k
        CONTEXT_WORD_EMBS_MODELS[model_name].top_p = top_p
        return CONTEXT_WORD_EMBS_MODELS[model_name]

    if 'distilbert' in model_path:
        model = nml.DistilBert(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p)
    elif 'roberta' in model_path:
        model = nml.Roberta(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p)
    elif 'bert' in model_path:
        model = nml.Bert(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p)
    elif 'xlnet' in model_path:
        model = nml.XlNet(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p, optimize=optimize)
    else:
        raise ValueError('Model name value is unexpected. Only support BERT, DistilBERT, RoBERTa and XLNet model.')

    CONTEXT_WORD_EMBS_MODELS[model_name] = model
    return model
github makcedward / nlpaug / nlpaug / augmenter / word / context_word_embs.py View on Github external
def init_context_word_embs_model(model_path, device, force_reload=False, temperature=1.0, top_k=None, top_p=None,
                                 optimize=None):
    global CONTEXT_WORD_EMBS_MODELS

    model_name = os.path.basename(model_path)
    if model_name in CONTEXT_WORD_EMBS_MODELS and not force_reload:
        CONTEXT_WORD_EMBS_MODELS[model_name].temperature = temperature
        CONTEXT_WORD_EMBS_MODELS[model_name].top_k = top_k
        CONTEXT_WORD_EMBS_MODELS[model_name].top_p = top_p
        return CONTEXT_WORD_EMBS_MODELS[model_name]

    if 'distilbert' in model_path:
        model = nml.DistilBert(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p)
    elif 'roberta' in model_path:
        model = nml.Roberta(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p)
    elif 'bert' in model_path:
        model = nml.Bert(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p)
    elif 'xlnet' in model_path:
        model = nml.XlNet(model_path, device=device, temperature=temperature, top_k=top_k, top_p=top_p, optimize=optimize)
    else:
        raise ValueError('Model name value is unexpected. Only support BERT, DistilBERT, RoBERTa and XLNet model.')

    CONTEXT_WORD_EMBS_MODELS[model_name] = model
    return model