Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
train_data_loader = DataLoader(list(zip(priming_data_str,priming_data_str)), batch_size=batch_size, shuffle=True)
test_data_loader = None
best_model, error, training_time = gym.fit(train_data_loader,
test_data_loader,
desired_error=self.desired_error,
max_time=self.max_training_time,
callback=self._train_callback,
eval_every_x_epochs=1,
max_unimproving_models=5)
self.net = best_model.to(self.net.device)
modules = [module for module in self.net.modules() if type(
module) != torch.nn.Sequential and type(module) != DefaultNet]
self.encoder = torch.nn.Sequential(*modules[0:2]).eval()
self.decoder = torch.nn.Sequential(*modules[2:3]).eval()
logging.info('Categorical autoencoder ready')
self._prepared = True
if device_str == 'cuda':
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
self.available_devices = torch.cuda.device_count()
self.dynamic_parameters = dynamic_parameters
"""
Here we define the basic building blocks of our model,
in forward we define how we put it all together along with an input
:param sample_batch: this is used to understand the characteristics of the input and target,
it is an object of type utils.libs.data_types.batch.Batch
"""
super(DefaultNet, self).__init__()
if shape is None and pretrained_net is None:
input_sample, output_sample = ds[0]
self.input_size = len(input_sample)
self.output_size = len(output_sample)
'''
small_input = True if self.input_size < 50 else False
small_output = True if self.input_size < 50 else False
large_input = True if self.input_size > 2000 else False
large_output = True if self.output_size > 2000 else False
# 2. Determine in/out proportions
# @TODO: Maybe provide a warning if the output is larger, this really shouldn't usually be the case
# (outside of very specific things, such as text to image)
test=False),
custom_test_func=partial(
self.categorical_train_function, test=True)
)
self._model = best_model.to(self.device)
elif all([x['output_type'] == COLUMN_DATA_TYPES.NUMERIC or x['output_type'] == COLUMN_DATA_TYPES.CATEGORICAL
for x in training_data['targets']]):
self.desired_error = 0.01
self._model_type = 'generic_target_predictor'
self._model = self._embeddings_model_class.from_pretrained(self._pretrained_model_name).to(self.device)
batch_size = 10
self._head = DefaultNet(dynamic_parameters={}, shape=funnel(
768, sum([len(x['encoded_output'][0]) for x in training_data['targets']]), depth=5), selfaware=False)
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
{'params': [p for n, p in self._head.named_parameters() if not any(
nd in n for nd in no_decay)], 'weight_decay': 0.000001},
{'params': [p for n, p in self._head.named_parameters() if any(nd in n for nd in no_decay)],
'weight_decay': 0.0}
]
optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=5e-5, eps=1e-8)
# optimizer = Ranger(self._head.parameters(),lr=5e-5)
# num_training_steps is kind of an estimation
scheduler = get_linear_schedule_with_warmup(
optimizer, num_warmup_steps=10, num_training_steps=len(priming_data) * 15 / 20)
self.out_types = None
self.net = None
self.optimizer = None
self.input_column_names = None
self.output_column_names = None
self.transformer = None
self.encoders = None
self.optimizer_class = None
self.optimizer_args = None
self.criterion_arr = None
self.unreduced_criterion_arr = None
self.batch_size = 200
self.epochs = 120000
self.nn_class = DefaultNet
self.dynamic_parameters = dynamic_parameters
self.awareness_criterion = None
self.start_selfaware_training = False
self.stop_selfaware_training = False
self.is_selfaware = False
self.last_unaware_net = False
self.max_confidence_per_output = []
self.monitor = None
self.quantiles = [0.5, 0.2,0.8, 0.1,0.9, 0.05,0.95, 0.02,0.98, 0.005,0.995]
self.quantiles_pair = [9,10]
self.map_mean_sc_qi = None
for k in CONFIG.MONITORING:
if CONFIG.MONITORING[k]:
from lightwood.mixers.helpers.debugging import TrainingMonitor
random.seed(len(priming_data))
if self._prepared:
raise Exception('You can only call "prepare_encoder" once for a given encoder.')
self.onehot_encoder.prepare_encoder(priming_data)
input_len = self.onehot_encoder._lang.n_words
self.use_autoencoder = self.max_encoded_length is not None and input_len > self.max_encoded_length
if self.use_autoencoder:
logging.info('Preparing a categorical autoencoder, this might take a while')
embeddings_layer_len = self.max_encoded_length
self.net = DefaultNet(dynamic_parameters={}, shape=[
input_len, embeddings_layer_len, input_len], selfaware=False)
criterion = torch.nn.CrossEntropyLoss()
optimizer = Ranger(self.net.parameters())
gym = Gym(model=self.net, optimizer=optimizer, scheduler=None, loss_criterion=criterion,
device=self.net.device, name=self.name, input_encoder=self.onehot_encoder.encode,
output_encoder=self._encoder_targets)
batch_size = min(200, int(len(priming_data) / 50))
priming_data_str = [str(x) for x in priming_data]
train_data_loader = DataLoader(list(zip(priming_data_str,priming_data_str)), batch_size=batch_size, shuffle=True)
test_data_loader = None
def __init__(self, dynamic_parameters, is_categorical_output=False):
self.is_categorical_output = is_categorical_output
self.net = None
self.optimizer = None
self.input_column_names = None
self.output_column_names = None
self.data_loader = None
self.transformer = None
self.encoders = None
self.criterion = None
self.batch_size = 200
self.epochs = 120000
self.nn_class = DefaultNet
self.dynamic_parameters = dynamic_parameters
# Pyro stuff
self.softplus = torch.nn.Softplus()
torch.manual_seed(66)
if 'cuda' in str(self.device):
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
self.available_devices = torch.cuda.device_count()
else:
self.available_devices = 1
self.dynamic_parameters = dynamic_parameters
"""
Here we define the basic building blocks of our model,
in forward we define how we put it all together along with an input
"""
super(DefaultNet, self).__init__()
if shape is None and pretrained_net is None:
shape = [self.input_size, max([self.input_size*2,self.output_size*2,400]), self.output_size]
if pretrained_net is None:
logging.info(f'Building network of shape: {shape}')
rectifier = torch.nn.SELU #alternative: torch.nn.ReLU
layers = []
for ind in range(len(shape) - 1):
linear_function = PLinear if CONFIG.USE_PROBABILISTIC_LINEAR else torch.nn.Linear
layers.append(linear_function(shape[ind],shape[ind+1]))
if ind < len(shape) - 2:
layers.append(rectifier())
self.net = torch.nn.Sequential(*layers)