How to use the transformers.BertModel.from_pretrained function in transformers

To help you get started, we’ve selected a few transformers examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jroakes / tech-seo-crawler / lib / bert.py View on Github external
def __init__(self, dims = None):

        if 'distilbert' in cfg.transformer_model:
            self.model      = DistilBertModel.from_pretrained(cfg.transformer_model)
            self.tokenizer  = DistilBertTokenizer.from_pretrained(cfg.transformer_model)
            self.model_dims = self.model.config.dim
        else:
            self.model      = BertModel.from_pretrained(cfg.transformer_model)
            self.tokenizer  = BertTokenizer.from_pretrained(cfg.transformer_model)
            self.model_dims = self.model.config.hidden_size

        # Make sure model is in Eval mode.
        self.model.eval()

        self.terms          = []
        self.embeddings     = torch.FloatTensor([])
        self.reduce         = nn.Linear(self.model_dims, dims) if dims else None
        self.activation     = nn.Tanh()
        self.sim_fn         = torch.nn.CosineSimilarity(dim=-1, eps=1e-6)
github bhoov / exbert / server / transformer_details.py View on Github external
def from_pretrained(cls, model_name: str):
        return cls(
            BertModel.from_pretrained(
                model_name,
                output_attentions=True,
                output_hidden_states=True,
                output_additional_info=True,
            ),
            BertAligner.from_pretrained(model_name),
        )
github UKPLab / sentence-transformers / sentence_transformers / models / BERT.py View on Github external
def __init__(self, model_name_or_path: str, max_seq_length: int = 128, do_lower_case: Optional[bool] = None, model_args: Dict = {}, tokenizer_args: Dict = {}):
        super(BERT, self).__init__()
        self.config_keys = ['max_seq_length', 'do_lower_case']
        self.do_lower_case = do_lower_case

        if max_seq_length > 510:
            logging.warning("BERT only allows a max_seq_length of 510 (512 with special tokens). Value will be set to 510")
            max_seq_length = 510
        self.max_seq_length = max_seq_length

        if self.do_lower_case is not None:
            tokenizer_args['do_lower_case'] = do_lower_case

        self.bert = BertModel.from_pretrained(model_name_or_path, **model_args)
        self.tokenizer = BertTokenizer.from_pretrained(model_name_or_path, **tokenizer_args)
github thunlp / Neural-Snowball / nrekit / sentence_encoder.py View on Github external
def __init__(self, pretrain_path): 
        nn.Module.__init__(self)
        self.bert = BertModel.from_pretrained(pretrain_path)
github thunlp / OpenNRE / opennre / encoder / bert_encoder.py View on Github external
def __init__(self, max_length, pretrain_path, blank_padding=True):
        """
        Args:
            max_length: max length of sentence
            pretrain_path: path of pretrain model
        """
        super().__init__()
        self.max_length = max_length
        self.blank_padding = blank_padding
        self.hidden_size = 768 * 2
        self.bert = BertModel.from_pretrained(pretrain_path)
        self.tokenizer = BertTokenizer.from_pretrained(pretrain_path)
        self.linear = nn.Linear(self.hidden_size, self.hidden_size)