Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""
Evaluates the model on eval_df.
Utility function to be used by the eval_model() method. Not intended to be used directly.
"""
tokenizer = self.tokenizer
device = self.device
model = self.model
args = self.args
eval_output_dir = output_dir
results = {}
if 'text' in eval_df.columns and 'labels' in eval_df.columns:
eval_examples = [InputExample(i, text, None, label) for i, (text, label) in enumerate(zip(eval_df['text'], eval_df['labels']))]
else:
eval_examples = [InputExample(i, text, None, label) for i, (text, label) in enumerate(zip(eval_df.iloc[:, 0], eval_df.iloc[:, 1]))]
if args['sliding_window']:
eval_dataset, window_counts = self.load_and_cache_examples(eval_examples, evaluate=True)
else:
eval_dataset = self.load_and_cache_examples(eval_examples, evaluate=True)
if not os.path.exists(eval_output_dir):
os.makedirs(eval_output_dir)
eval_sampler = SequentialSampler(eval_dataset)
eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args["eval_batch_size"])
eval_loss = 0.0
nb_eval_steps = 0
preds = None
Returns:
preds: A python list of the predictions (0 or 1) for each text.
model_outputs: A python list of the raw model outputs for each text.
"""
tokenizer = self.tokenizer
device = self.device
model = self.model
args = self.args
self._move_model_to_device()
if multi_label:
eval_examples = [InputExample(i, text, None, [0 for i in range(self.num_labels)]) for i, text in enumerate(to_predict)]
else:
eval_examples = [InputExample(i, text, None, 0) for i, text in enumerate(to_predict)]
if args['sliding_window']:
eval_dataset, window_counts = self.load_and_cache_examples(eval_examples, evaluate=True, no_cache=True)
else:
eval_dataset = self.load_and_cache_examples(eval_examples, evaluate=True, multi_label=multi_label, no_cache=True)
eval_sampler = SequentialSampler(eval_dataset)
eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args["eval_batch_size"])
eval_loss = 0.0
nb_eval_steps = 0
preds = None
out_label_ids = None
for batch in tqdm(eval_dataloader, disable=args['silent']):
model.eval()
batch = tuple(t.to(device) for t in batch)
to_predict: A python list of text (str) to be sent to the model for prediction.
Returns:
preds: A python list of the predictions (0 or 1) for each text.
model_outputs: A python list of the raw model outputs for each text.
"""
tokenizer = self.tokenizer
device = self.device
model = self.model
args = self.args
self._move_model_to_device()
if multi_label:
eval_examples = [InputExample(i, text, None, [0 for i in range(self.num_labels)]) for i, text in enumerate(to_predict)]
else:
eval_examples = [InputExample(i, text, None, 0) for i, text in enumerate(to_predict)]
if args['sliding_window']:
eval_dataset, window_counts = self.load_and_cache_examples(eval_examples, evaluate=True, no_cache=True)
else:
eval_dataset = self.load_and_cache_examples(eval_examples, evaluate=True, multi_label=multi_label, no_cache=True)
eval_sampler = SequentialSampler(eval_dataset)
eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args["eval_batch_size"])
eval_loss = 0.0
nb_eval_steps = 0
preds = None
out_label_ids = None
for batch in tqdm(eval_dataloader, disable=args['silent']):