Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
to_predict: A python list of text (str) to be sent to the model for prediction.
Returns:
preds: A python list of the predictions (0 or 1) for each text.
model_outputs: A python list of the raw model outputs for each text.
"""
tokenizer = self.tokenizer
device = self.device
model = self.model
args = self.args
self._move_model_to_device()
if multi_label:
eval_examples = [InputExample(i, text, None, [0 for i in range(self.num_labels)]) for i, text in enumerate(to_predict)]
else:
eval_examples = [InputExample(i, text, None, 0) for i, text in enumerate(to_predict)]
eval_dataset = self.load_and_cache_examples(eval_examples, evaluate=True, multi_label=multi_label, no_cache=True)
eval_sampler = SequentialSampler(eval_dataset)
eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args["eval_batch_size"])
eval_loss = 0.0
nb_eval_steps = 0
preds = None
out_label_ids = None
for batch in tqdm(eval_dataloader, disable=args['silent']):
model.eval()
batch = tuple(t.to(device) for t in batch)
Utility function to be used by the eval_model() method. Not intended to be used directly.
"""
tokenizer = self.tokenizer
device = self.device
model = self.model
args = self.args
eval_output_dir = output_dir
results = {}
if 'text' in eval_df.columns and 'labels' in eval_df.columns:
eval_examples = [InputExample(i, text, None, label) for i, (text, label) in enumerate(zip(eval_df['text'], eval_df['labels']))]
else:
eval_examples = [InputExample(i, text, None, label) for i, (text, label) in enumerate(zip(eval_df.iloc[:, 0], eval_df.iloc[:, 1]))]
eval_dataset = self.load_and_cache_examples(eval_examples, evaluate=True)
if not os.path.exists(eval_output_dir):
os.makedirs(eval_output_dir)
eval_sampler = SequentialSampler(eval_dataset)
eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args["eval_batch_size"])
eval_loss = 0.0
nb_eval_steps = 0
preds = None
out_label_ids = None
model.eval()
for batch in tqdm(eval_dataloader, disable=args['silent']):
batch = tuple(t.to(device) for t in batch)
Returns:
preds: A python list of the predictions (0 or 1) for each text.
model_outputs: A python list of the raw model outputs for each text.
"""
tokenizer = self.tokenizer
device = self.device
model = self.model
args = self.args
self._move_model_to_device()
if multi_label:
eval_examples = [InputExample(i, text, None, [0 for i in range(self.num_labels)]) for i, text in enumerate(to_predict)]
else:
eval_examples = [InputExample(i, text, None, 0) for i, text in enumerate(to_predict)]
eval_dataset = self.load_and_cache_examples(eval_examples, evaluate=True, multi_label=multi_label, no_cache=True)
eval_sampler = SequentialSampler(eval_dataset)
eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args["eval_batch_size"])
eval_loss = 0.0
nb_eval_steps = 0
preds = None
out_label_ids = None
for batch in tqdm(eval_dataloader, disable=args['silent']):
model.eval()
batch = tuple(t.to(device) for t in batch)
with torch.no_grad():
"""
Evaluates the model on eval_df.
Utility function to be used by the eval_model() method. Not intended to be used directly.
"""
tokenizer = self.tokenizer
device = self.device
model = self.model
args = self.args
eval_output_dir = output_dir
results = {}
if 'text' in eval_df.columns and 'labels' in eval_df.columns:
eval_examples = [InputExample(i, text, None, label) for i, (text, label) in enumerate(zip(eval_df['text'], eval_df['labels']))]
else:
eval_examples = [InputExample(i, text, None, label) for i, (text, label) in enumerate(zip(eval_df.iloc[:, 0], eval_df.iloc[:, 1]))]
eval_dataset = self.load_and_cache_examples(eval_examples, evaluate=True)
if not os.path.exists(eval_output_dir):
os.makedirs(eval_output_dir)
eval_sampler = SequentialSampler(eval_dataset)
eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args["eval_batch_size"])
eval_loss = 0.0
nb_eval_steps = 0
preds = None
out_label_ids = None
model.eval()
if self.args['evaluate_during_training'] and eval_df is None:
raise ValueError("evaluate_during_training is enabled but eval_df is not specified. Pass eval_df to model.train_model() if using evaluate_during_training.")
if not output_dir:
output_dir = self.args['output_dir']
if os.path.exists(output_dir) and os.listdir(output_dir) and not self.args["overwrite_output_dir"]:
raise ValueError("Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(output_dir))
self._move_model_to_device()
if 'text' in train_df.columns and 'labels' in train_df.columns:
train_examples = [InputExample(i, text, None, label) for i, (text, label) in enumerate(zip(train_df['text'], train_df['labels']))]
else:
train_examples = [InputExample(i, text, None, label) for i, (text, label) in enumerate(zip(train_df.iloc[:, 0], train_df.iloc[:, 1]))]
train_dataset = self.load_and_cache_examples(train_examples)
global_step, tr_loss = self.train(train_dataset, output_dir, show_running_loss=show_running_loss, eval_df=eval_df)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
model_to_save = self.model.module if hasattr(self.model, "module") else self.model
model_to_save.save_pretrained(output_dir)
self.tokenizer.save_pretrained(output_dir)
torch.save(self.args, os.path.join(output_dir, "training_args.bin"))
print("Training of {} model complete. Saved to {}.".format(self.args["model_type"], output_dir))