Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
to_predict: A python list of text (str) to be sent to the model for prediction.
Returns:
preds: A Python list of lists with dicts containg each word mapped to its NER tag.
model_outputs: A python list of the raw model outputs for each text.
"""
tokenizer = self.tokenizer
device = self.device
model = self.model
args = self.args
pad_token_label_id = self.pad_token_label_id
self._move_model_to_device()
predict_examples = [InputExample(i, sentence.split(), ["O" for word in sentence.split()]) for i, sentence in enumerate(to_predict)]
eval_dataset = self.load_and_cache_examples(None, to_predict=predict_examples)
eval_sampler = SequentialSampler(eval_dataset)
eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args["eval_batch_size"])
eval_loss = 0.0
nb_eval_steps = 0
preds = None
out_label_ids = None
model.eval()
for batch in tqdm(eval_dataloader, disable=args['silent']):
batch = tuple(t.to(device) for t in batch)
with torch.no_grad():
def read_examples_from_file(data_file, mode):
file_path = data_file
guid_index = 1
examples = []
with open(file_path, encoding="utf-8") as f:
words = []
labels = []
for line in f:
if line.startswith("-DOCSTART-") or line == "" or line == "\n":
if words:
examples.append(InputExample(guid="{}-{}".format(mode, guid_index),
words=words,
labels=labels))
guid_index += 1
words = []
labels = []
else:
splits = line.split(" ")
words.append(splits[0])
if len(splits) > 1:
labels.append(splits[-1].replace("\n", ""))
else:
# Examples could have no label for mode = "test"
labels.append("O")
if words:
examples.append(InputExample(guid="%s-%d".format(mode, guid_index),
words=words,
def get_examples_from_df(data):
return [InputExample(guid=sentence_id, words=sentence_df['words'].tolist(), labels=sentence_df['labels'].tolist()) for sentence_id, sentence_df in data.groupby(['sentence_id'])]