Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Initializes a MultiLabelClassification model.
Args:
model_type: The type of model (bert, roberta)
model_name: Default Transformer model name or path to a directory containing Transformer model file (pytorch_nodel.bin).
num_labels (optional): The number of labels or classes in the dataset.
pos_weight (optional): A list of length num_labels containing the weights to assign to each label for loss calculation.
args (optional): Default args will be used if this parameter is not provided. If provided, it should be a dict containing the args that should be changed in the default args.
use_cuda (optional): Use GPU if available. Setting to False will force model to use CPU only.
"""
MODEL_CLASSES = {
'bert': (BertConfig, BertForMultiLabelSequenceClassification, BertTokenizer),
'roberta': (RobertaConfig, RobertaForMultiLabelSequenceClassification, RobertaTokenizer),
'xlnet': (XLNetConfig, XLNetForMultiLabelSequenceClassification, XLNetTokenizer),
'xlm': (XLMConfig, XLMForMultiLabelSequenceClassification, XLMTokenizer),
'distilbert': (DistilBertConfig, DistilBertForMultiLabelSequenceClassification, DistilBertTokenizer),
'albert': (AlbertConfig, AlbertForMultiLabelSequenceClassification, AlbertTokenizer)
}
config_class, model_class, tokenizer_class = MODEL_CLASSES[model_type]
if num_labels:
self.config = config_class.from_pretrained(model_name, num_labels=num_labels)
self.num_labels = num_labels
else:
self.config = config_class.from_pretrained(model_name)
self.num_labels = self.config.num_labels
self.tokenizer = tokenizer_class.from_pretrained(model_name)
self.tokenizer = tokenizer_class.from_pretrained(model_name)
self.num_labels = num_labels
self.pos_weight = pos_weight
self.sliding_window = False
def __init__(self, config):
super(DistilBertForMultiLabelSequenceClassification, self).__init__(config)
self.num_labels = config.num_labels
self.distilbert = DistilBertModel(config)
self.pre_classifier = nn.Linear(config.dim, config.dim)
self.classifier = nn.Linear(config.dim, config.num_labels)
self.dropout = nn.Dropout(config.seq_classif_dropout)
self.init_weights()
"""
Initializes a MultiLabelClassification model.
Args:
model_type: The type of model (bert, roberta)
model_name: Default Transformer model name or path to a directory containing Transformer model file (pytorch_nodel.bin).
num_labels (optional): The number of labels or classes in the dataset.
args (optional): Default args will be used if this parameter is not provided. If provided, it should be a dict containing the args that should be changed in the default args.
use_cuda (optional): Use GPU if available. Setting to False will force model to use CPU only.
"""
MODEL_CLASSES = {
'bert': (BertConfig, BertForMultiLabelSequenceClassification, BertTokenizer),
'roberta': (RobertaConfig, RobertaForMultiLabelSequenceClassification, RobertaTokenizer),
'xlnet': (XLNetConfig, XLNetForMultiLabelSequenceClassification, XLNetTokenizer),
'xlm': (XLMConfig, XLMForMultiLabelSequenceClassification, XLMTokenizer),
'distilbert': (DistilBertConfig, DistilBertForMultiLabelSequenceClassification, DistilBertTokenizer),
}
config_class, model_class, tokenizer_class = MODEL_CLASSES[model_type]
self.tokenizer = tokenizer_class.from_pretrained(model_name)
self.model = model_class.from_pretrained(model_name, num_labels=num_labels)
self.num_labels = num_labels
if use_cuda:
if torch.cuda.is_available():
self.device = torch.device("cuda")
else:
raise ValueError("'use_cuda' set to True when cuda is unavailable. Make sure CUDA is available or set use_cuda=False.")
else:
self.device = "cpu"
self.results = {}