Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self.desired_error = 0.01
self.max_training_time = 7200
self._head = None
# Possible: speed, balance, accuracy
self.aim = aim
if self.aim == ENCODER_AIM.SPEED:
# uses more memory, takes very long to train and outputs weird debugging statements to the command line,
# consider waiting until it gets better or try to investigate why this happens
# (changing the pretrained model doesn't seem to help)
self._classifier_model_class = AlbertForSequenceClassification
self._embeddings_model_class = AlbertModel
self._tokenizer_class = AlbertTokenizer
self._pretrained_model_name = 'albert-base-v2'
self._model_max_len = 768
if self.aim == ENCODER_AIM.BALANCE:
self._classifier_model_class = DistilBertForSequenceClassification
self._embeddings_model_class = DistilBertModel
self._tokenizer_class = DistilBertTokenizer
self._pretrained_model_name = 'distilbert-base-uncased'
self._model_max_len = 768
if self.aim == ENCODER_AIM.ACCURACY:
self._classifier_model_class = DistilBertForSequenceClassification
self._embeddings_model_class = DistilBertModel
self._tokenizer_class = DistilBertTokenizer
self._pretrained_model_name = 'distilbert-base-uncased'
self._model_max_len = 768
self.device, _ = get_devices()
def __init__(self, is_target=False, aim=ENCODER_AIM.BALANCE):
self._prepared = False
self.aim = aim
self._pytorch_wrapper = torch.FloatTensor
if self.aim == ENCODER_AIM.SPEED:
self.ngram_range = (1,3)
self.max_features = 200
elif self.aim == ENCODER_AIM.BALANCE:
self.ngram_range = (1,5)
self.max_features = 500
elif self.aim == ENCODER_AIM.ACCURACY:
self.ngram_range = (1,8)
self.max_features = None
def __init__(self, is_target=False, aim=ENCODER_AIM.BALANCE):
self._prepared = False
self.aim = aim
self._pytorch_wrapper = torch.FloatTensor
if self.aim == ENCODER_AIM.SPEED:
self.ngram_range = (1,3)
self.max_features = 200
elif self.aim == ENCODER_AIM.BALANCE:
self.ngram_range = (1,5)
self.max_features = 500
elif self.aim == ENCODER_AIM.ACCURACY:
self.ngram_range = (1,8)
self.max_features = None
def __init__(self, is_target=False, aim=ENCODER_AIM.BALANCE):
self.name = 'Text Transformer Encoder'
self._tokenizer = None
self._model = None
self._pad_id = None
self._pytorch_wrapper = torch.FloatTensor
self._max_len = None
self._max_ele = None
self._prepared = False
self._model_type = None
self.desired_error = 0.01
self.max_training_time = 7200
self._head = None
# Possible: speed, balance, accuracy
self.aim = aim
if self.aim == ENCODER_AIM.SPEED:
def prepare_encoder(self, priming_data):
if self._prepared:
raise Exception('You can only call "prepare_encoder" once for a given encoder.')
if self.model is None:
if self.aim == ENCODER_AIM.SPEED:
self.model = Img2Vec(model='resnet-18')
elif self.aim == ENCODER_AIM.BALANCE:
self.model = Img2Vec(model='resnext-50-small')
elif self.aim == ENCODER_AIM.ACCURACY:
self.model = Img2Vec(model='resnext-50')
else:
self.model = Img2Vec()
self._prepared = True