Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, dims = None):
if 'distilbert' in cfg.transformer_model:
self.model = DistilBertModel.from_pretrained(cfg.transformer_model)
self.tokenizer = DistilBertTokenizer.from_pretrained(cfg.transformer_model)
self.model_dims = self.model.config.dim
else:
self.model = BertModel.from_pretrained(cfg.transformer_model)
self.tokenizer = BertTokenizer.from_pretrained(cfg.transformer_model)
self.model_dims = self.model.config.hidden_size
# Make sure model is in Eval mode.
self.model.eval()
self.terms = []
self.embeddings = torch.FloatTensor([])
self.reduce = nn.Linear(self.model_dims, dims) if dims else None
self.activation = nn.Tanh()
self.sim_fn = torch.nn.CosineSimilarity(dim=-1, eps=1e-6)
def from_pretrained(cls, model_name: str):
return cls(
BertModel.from_pretrained(
model_name,
output_attentions=True,
output_hidden_states=True,
output_additional_info=True,
),
BertAligner.from_pretrained(model_name),
)
def __init__(self, model_name_or_path: str, max_seq_length: int = 128, do_lower_case: Optional[bool] = None, model_args: Dict = {}, tokenizer_args: Dict = {}):
super(BERT, self).__init__()
self.config_keys = ['max_seq_length', 'do_lower_case']
self.do_lower_case = do_lower_case
if max_seq_length > 510:
logging.warning("BERT only allows a max_seq_length of 510 (512 with special tokens). Value will be set to 510")
max_seq_length = 510
self.max_seq_length = max_seq_length
if self.do_lower_case is not None:
tokenizer_args['do_lower_case'] = do_lower_case
self.bert = BertModel.from_pretrained(model_name_or_path, **model_args)
self.tokenizer = BertTokenizer.from_pretrained(model_name_or_path, **tokenizer_args)
def __init__(self, pretrain_path):
nn.Module.__init__(self)
self.bert = BertModel.from_pretrained(pretrain_path)
def __init__(self, max_length, pretrain_path, blank_padding=True):
"""
Args:
max_length: max length of sentence
pretrain_path: path of pretrain model
"""
super().__init__()
self.max_length = max_length
self.blank_padding = blank_padding
self.hidden_size = 768 * 2
self.bert = BertModel.from_pretrained(pretrain_path)
self.tokenizer = BertTokenizer.from_pretrained(pretrain_path)
self.linear = nn.Linear(self.hidden_size, self.hidden_size)