Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if self.use_input_mask:
input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
token_type_ids = None
if self.use_token_type_ids:
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
sequence_labels = None
token_labels = None
choice_labels = None
if self.use_labels:
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = BertConfig(
vocab_size=self.vocab_size,
hidden_size=self.hidden_size,
num_hidden_layers=self.num_hidden_layers,
num_attention_heads=self.num_attention_heads,
intermediate_size=self.intermediate_size,
hidden_act=self.hidden_act,
hidden_dropout_prob=self.hidden_dropout_prob,
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size,
is_decoder=False,
initializer_range=self.initializer_range,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
if labels:
self.labels = labels
else:
self.labels = ["O", "B-MISC", "I-MISC", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC"]
self.num_labels = len(self.labels)
if roberta_available:
MODEL_CLASSES = {
'bert': (BertConfig, BertForTokenClassification, BertTokenizer),
'roberta': (RobertaConfig, RobertaForTokenClassification, RobertaTokenizer),
'distilbert': (DistilBertConfig, DistilBertForTokenClassification, DistilBertTokenizer),
'camembert': (CamembertConfig, CamembertForTokenClassification, CamembertTokenizer)
}
else:
MODEL_CLASSES = {
'bert': (BertConfig, BertForTokenClassification, BertTokenizer),
'distilbert': (DistilBertConfig, DistilBertForTokenClassification, BertTokenizer),
'camembert': (CamembertConfig, CamembertForTokenClassification, CamembertTokenizer)
}
config_class, model_class, tokenizer_class = MODEL_CLASSES[model_type]
self.model = model_class.from_pretrained(model_name, num_labels=self.num_labels)
if use_cuda:
if torch.cuda.is_available():
if cuda_device == -1:
self.device = torch.device("cuda")
else:
self.device = torch.device(f"cuda:{cuda_device}")
else:
raise ValueError("'use_cuda' set to True when cuda is unavailable. Make sure CUDA is available or set use_cuda=False.")
)
from transformers import AdamW, get_linear_schedule_with_warmup
from transformers import glue_compute_metrics as compute_metrics
from transformers import glue_output_modes as output_modes
from transformers import glue_processors as processors
from transformers import glue_convert_examples_to_features as convert_examples_to_features
logger = logging.getLogger(__name__)
ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) for conf in (BertConfig, XLNetConfig, XLMConfig,
RobertaConfig, DistilBertConfig)), ())
MODEL_CLASSES = {
'bert': (BertConfig, BertForSequenceClassification, BertTokenizer),
'xlnet': (XLNetConfig, XLNetForSequenceClassification, XLNetTokenizer),
'xlm': (XLMConfig, XLMForSequenceClassification, XLMTokenizer),
'roberta': (RobertaConfig, RobertaForSequenceClassification, RobertaTokenizer),
'distilbert': (DistilBertConfig, DistilBertForSequenceClassification, DistilBertTokenizer),
'albert': (AlbertConfig, AlbertForSequenceClassification, AlbertTokenizer)
}
def set_seed(args):
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.n_gpu > 0:
torch.cuda.manual_seed_all(args.seed)
from transformers import AdamW, get_linear_schedule_with_warmup
from utils_squad import (read_squad_examples, convert_examples_to_features,
RawResult, write_predictions,
RawResultExtended, write_predictions_extended)
# The follwing import is the official SQuAD evaluation script (2.0).
# You can remove it from the dependencies if you are using this script outside of the library
# We've added it here for automated tests (see examples/test_examples.py file)
from utils_squad_evaluate import EVAL_OPTS, main as evaluate_on_squad
logger = logging.getLogger(__name__)
ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) \
for conf in (BertConfig, XLNetConfig, XLMConfig)), ())
MODEL_CLASSES = {
'bert': (BertConfig, BertForQuestionAnswering, BertTokenizer),
'xlnet': (XLNetConfig, XLNetForQuestionAnswering, XLNetTokenizer),
'xlm': (XLMConfig, XLMForQuestionAnswering, XLMTokenizer),
'distilbert': (DistilBertConfig, DistilBertForQuestionAnswering, DistilBertTokenizer)
}
def set_seed(args):
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.n_gpu > 0:
torch.cuda.manual_seed_all(args.seed)
def to_list(tensor):
BertConfig,
BertForMultipleChoice,
BertTokenizer,
get_linear_schedule_with_warmup,
)
try:
from torch.utils.tensorboard import SummaryWriter
except ImportError:
from tensorboardX import SummaryWriter
logger = logging.getLogger(__name__)
ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) for conf in [BertConfig]), ())
MODEL_CLASSES = {
"bert": (BertConfig, BertForMultipleChoice, BertTokenizer),
}
class SwagExample(object):
"""A single training/test example for the SWAG dataset."""
def __init__(self, swag_id, context_sentence, start_ending, ending_0, ending_1, ending_2, ending_3, label=None):
self.swag_id = swag_id
self.context_sentence = context_sentence
self.start_ending = start_ending
self.endings = [
ending_0,
ending_1,
import os
import random
import logging
import torch
import numpy as np
from transformers import BertTokenizer, BertConfig, AlbertConfig, AlbertTokenizer, RobertaConfig, RobertaTokenizer
from official_eval import official_f1
from model import RBERT
MODEL_CLASSES = {
'bert': (BertConfig, RBERT, BertTokenizer),
'roberta': (RobertaConfig, RBERT, RobertaTokenizer),
'albert': (AlbertConfig, RBERT, AlbertTokenizer)
}
MODEL_PATH_MAP = {
'bert': 'bert-base-uncased',
'roberta': 'roberta-base',
'albert': 'albert-xxlarge-v1'
}
ADDITIONAL_SPECIAL_TOKENS = ["", "", "", ""]
def get_label(args):
return [label.strip() for label in open(os.path.join(args.data_dir, args.label_file), 'r', encoding='utf-8')]
"bert": (
BertConfig,
TFBertForPreTraining,
BertForPreTraining,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"bert-large-uncased-whole-word-masking-finetuned-squad": (
BertConfig,
TFBertForQuestionAnswering,
BertForQuestionAnswering,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"bert-large-cased-whole-word-masking-finetuned-squad": (
BertConfig,
TFBertForQuestionAnswering,
BertForQuestionAnswering,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"bert-base-cased-finetuned-mrpc": (
BertConfig,
TFBertForSequenceClassification,
BertForSequenceClassification,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"gpt2": (
GPT2Config,
TFGPT2LMHeadModel,
GPT2LMHeadModel,
def __init__(self, config):
super(Model, self).__init__()
bert_config_file = os.path.join(config.bert_path, f'bert_config.json')
bert_config = BertConfig.from_json_file(bert_config_file)
self.bert = BertModel.from_pretrained(config.bert_path,config=bert_config)
for param in self.bert.parameters():
param.requires_grad = True
self.fc = nn.Linear(config.hidden_size, config.num_classes)
import torch
from transformers import BertConfig, BertForMaskedLM, BertTokenizer
from transformers import RobertaConfig, RobertaForMaskedLM, RobertaTokenizer
from transformers import DistilBertConfig, DistilBertForMaskedLM, DistilBertTokenizer
from transformers import GPT2Config, GPT2LMHeadModel, GPT2Tokenizer
from distiller import Distiller
from utils import git_log, logger, init_gpu_params, set_seed
from lm_seqs_dataset import LmSeqsDataset
MODEL_CLASSES = {
'distilbert': (DistilBertConfig, DistilBertForMaskedLM, DistilBertTokenizer),
'roberta': (RobertaConfig, RobertaForMaskedLM, RobertaTokenizer),
'bert': (BertConfig, BertForMaskedLM, BertTokenizer),
'gpt2': (GPT2Config, GPT2LMHeadModel, GPT2Tokenizer)
}
def sanity_checks(args):
"""
A bunch of args sanity checks to perform even starting...
"""
assert (args.mlm and args.alpha_mlm > 0.) or (not args.mlm and args.alpha_mlm == 0.)
assert (args.alpha_mlm > 0. and args.alpha_clm == 0.) or (args.alpha_mlm == 0. and args.alpha_clm > 0.)
if args.mlm:
assert os.path.isfile(args.token_counts)
assert (args.student_type in ['roberta', 'distilbert']) and (args.teacher_type in ['roberta', 'bert'])
else:
assert (args.student_type in ['gpt2']) and (args.teacher_type in ['gpt2'])
assert args.teacher_type == args.student_type or (args.student_type=='distilbert' and args.teacher_type=='bert')
"""Initializes a PyTorch `BertForEntityAndRelationExtraction` model and its tokenizer.
Returns:
The loaded PyTorch `BertPreTrainedModel` model and its corresponding `BertTokenizer`.
"""
# If this is one of our preprocessed BERT models, download it from GDrive first
if self.pretrained_model_name_or_path in constants.PRETRAINED_MODELS:
self.pretrained_model_name_or_path = \
model_utils.download_model_from_gdrive(self.pretrained_model_name_or_path)
# Anything we need to add to BertConfig goes here
idx_to_ent = [dataset.idx_to_tag['ent'] for dataset in self.datasets]
num_ent_labels = self.num_ent_labels
num_rel_labels = self.num_rel_labels
config = BertConfig.from_pretrained(self.pretrained_model_name_or_path)
# Manually add these values to the BERT config.
config.__dict__['idx_to_ent'] = idx_to_ent
config.__dict__['num_ent_labels'] = num_ent_labels
config.__dict__['num_rel_labels'] = num_rel_labels
# This is different than BERTs hidden layer dropout rate
config.__dict__['dropout_rate'] = self.config.dropout_rate
# Initializes the model using our modified config
self.model = BertForEntityAndRelationExtraction.from_pretrained(
pretrained_model_name_or_path=self.pretrained_model_name_or_path,
config=config
).to(self.device)
self.tokenizer = BertTokenizer.from_pretrained(
pretrained_model_name_or_path=self.pretrained_model_name_or_path,