Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_tagged_corpus_make_label_dictionary():
sentence_1 = Sentence("sentence 1", labels=[Label("class_1")])
sentence_2 = Sentence("sentence 2", labels=[Label("class_2")])
sentence_3 = Sentence("sentence 3", labels=[Label("class_1")])
corpus: Corpus = Corpus([sentence_1, sentence_2, sentence_3], [], [])
label_dict = corpus.make_label_dictionary()
assert 2 == len(label_dict)
assert "" not in label_dict.get_items()
assert "class_1" in label_dict.get_items()
assert "class_2" in label_dict.get_items()
def test_tagged_corpus_statistics():
train_sentence = Sentence(
"I love Berlin.", labels=[Label("class_1")], use_tokenizer=segtok_tokenizer
)
dev_sentence = Sentence(
"The sun is shining.", labels=[Label("class_2")], use_tokenizer=segtok_tokenizer
)
test_sentence = Sentence(
"Berlin is sunny.", labels=[Label("class_1")], use_tokenizer=segtok_tokenizer
)
class_to_count_dict = Corpus._get_class_to_count(
[train_sentence, dev_sentence, test_sentence]
)
assert "class_1" in class_to_count_dict
assert "class_2" in class_to_count_dict
assert 2 == class_to_count_dict["class_1"]
assert 1 == class_to_count_dict["class_2"]
tokens_in_sentences = Corpus._get_tokens_per_sentence(
[train_sentence, dev_sentence, test_sentence]
)
assert 3 == len(tokens_in_sentences)
def _get_multi_label(self, label_scores) -> List[Label]:
labels = []
sigmoid = torch.nn.Sigmoid()
results = list(map(lambda x: sigmoid(x), label_scores))
for idx, conf in enumerate(results):
if conf > self.multi_label_threshold:
label = self.label_dictionary.get_item_for_index(idx)
labels.append(Label(label, conf.item()))
return labels
confidences = score[:length].tolist()
tag_seq = prediction[:length].tolist()
scores = softmax[:length].tolist()
tags.append(
[
Label(self.tag_dictionary.get_item_for_index(tag), conf)
for conf, tag in zip(confidences, tag_seq)
]
)
if get_all_tags:
all_tags.append(
[
[
Label(
self.tag_dictionary.get_item_for_index(score_id), score
)
for score_id, score in enumerate(score_dist)
]
for score_dist in scores
]
)
return tags, all_tags
def convert_tag_scheme(self, tag_type: str = "ner", target_scheme: str = "iob"):
tags: List[Label] = []
for token in self.tokens:
tags.append(token.get_tag(tag_type))
if target_scheme == "iob":
iob2(tags)
if target_scheme == "iobes":
iob2(tags)
tags = iob_iobes(tags)
for index, tag in enumerate(tags):
self.tokens[index].add_tag(tag_type, tag)
def add_label(self, label: Union[Label, str]):
if type(label) is Label:
self.labels.append(label)
elif type(label) is str:
self.labels.append(Label(label))
def _predict_label_prob(self, label_scores) -> List[Label]:
softmax = torch.nn.functional.softmax(label_scores, dim=0)
label_probs = []
for idx, conf in enumerate(softmax):
label = self.label_dictionary.get_item_for_index(idx)
label_probs.append(Label(label, conf.item()))
return label_probs