Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def reload_annotator_labels(session, candidate_class, annotator_name, split, filter_label_split=True, create_missing_cands=False):
"""Reloads stable annotator labels into the AnnotatorLabel table"""
# Sets up the AnnotatorLabelKey to use
ak = session.query(GoldLabelKey).filter(GoldLabelKey.name == annotator_name).first()
if ak is None:
ak = GoldLabelKey(name=annotator_name)
session.add(ak)
session.commit()
labels = []
missed = []
sl_query = session.query(StableLabel).filter(StableLabel.annotator_name == annotator_name)
sl_query = sl_query.filter(StableLabel.split == split) if filter_label_split else sl_query
for sl in sl_query.all():
context_stable_ids = sl.context_stable_ids.split('~~')
# Check for labeled Contexts
# TODO: Does not create the Contexts if they do not yet exist!
contexts = []
for stable_id in context_stable_ids:
context = session.query(Context).filter(Context.stable_id == stable_id).first()
if context:
contexts.append(context)
if len(contexts) < len(context_stable_ids):
missed.append(sl)
continue
# Check for Candidate
# Assemble candidate arguments
gold_labels = pd.read_csv(FPATH, sep="\t")
for index, row in gold_labels.iterrows():
# We check if the label already exists, in case this cell was already executed
context_stable_ids = "~~".join([row['person1'], row['person2']])
query = session.query(StableLabel).filter(StableLabel.context_stable_ids == context_stable_ids)
query = query.filter(StableLabel.annotator_name == annotator_name)
if query.count() == 0:
session.add(StableLabel(
context_stable_ids=context_stable_ids,
annotator_name=annotator_name,
value=row['label']))
# Because it's a symmetric relation, load both directions...
context_stable_ids = "~~".join([row['person2'], row['person1']])
query = session.query(StableLabel).filter(StableLabel.context_stable_ids == context_stable_ids)
query = query.filter(StableLabel.annotator_name == annotator_name)
if query.count() == 0:
session.add(StableLabel(
context_stable_ids=context_stable_ids,
annotator_name=annotator_name,
value=row['label']))
# Commit session
session.commit()
# Reload annotator labels
reload_annotator_labels(session, candidate_class, annotator_name, split=1, filter_label_split=False)
reload_annotator_labels(session, candidate_class, annotator_name, split=2, filter_label_split=False)
if self.annotations[cid].value != value:
self.annotations[cid].value = value
self.annotations_stable[cid].value = value
self.session.commit()
# Otherwise, create a AnnotatorLabel *and a StableLabel*
else:
candidate = self.candidates[cid]
# Create AnnotatorLabel
self.annotations[cid] = GoldLabel(key=self.annotator, candidate=candidate, value=value)
self.session.add(self.annotations[cid])
# Create StableLabel
context_stable_ids = '~~'.join([c.stable_id for c in candidate.get_contexts()])
self.annotations_stable[cid] = StableLabel(context_stable_ids=context_stable_ids,\
annotator_name=self.annotator.name,\
value=value,\
split=candidate.split)
self.session.add(self.annotations_stable[cid])
self.session.commit()
elif content.get('event', '') == 'delete_label':
cid = content.get('cid', None)
self.session.delete(self.annotations[cid])
self.annotations[cid] = None
self.session.delete(self.annotations_stable[cid])
self.annotations_stable[cid] = None
self.session.commit()
.filter(GoldLabel.candidate == candidate) \
.first()
if existing_annotation is not None:
self.annotations[i] = existing_annotation
if existing_annotation.value == 1:
value_string = 'true'
elif existing_annotation.value == -1:
value_string = 'false'
else:
raise ValueError(str(existing_annotation) +
' has value not in {1, -1}, which Viewer does not support.')
init_labels_serialized.append(str(i) + '~~' + value_string)
# If the annotator label is in the main table, also get its stable version
context_stable_ids = '~~'.join([c.stable_id for c in candidate.get_contexts()])
existing_annotation_stable = self.session.query(StableLabel) \
.filter(StableLabel.context_stable_ids == context_stable_ids)\
.filter(StableLabel.annotator_name == name).one_or_none()
# If stable version is not available, create it here
# NOTE: This is for versioning issues, should be removed?
if existing_annotation_stable is None:
context_stable_ids = '~~'.join([c.stable_id for c in candidate.get_contexts()])
existing_annotation_stable = StableLabel(context_stable_ids=context_stable_ids,\
annotator_name=self.annotator.name,\
split=candidate.split,\
value=existing_annotation.value)
self.session.add(existing_annotation_stable)
self.session.commit()
self.annotations_stable[i] = existing_annotation_stable
def load_external_labels(session, candidate_class, split, annotator='gold',
label_fname='data/cdr_relations_gold.pkl', id_fname='data/doc_ids.pkl'):
# Load document-level relation annotations
with open(label_fname, 'rb') as f:
relations = load(f)
# Get split candidates
candidates = session.query(candidate_class).filter(
candidate_class.split == split
).all()
for c in candidates:
# Get the label by mapping document annotations to mentions
doc_relations = relations.get(c.get_parent().get_parent().name, set())
label = 2 * int(c.get_cids() in doc_relations) - 1
# Get stable ids and check to see if label already exits
context_stable_ids = '~~'.join(x.get_stable_id() for x in c)
query = session.query(StableLabel).filter(
StableLabel.context_stable_ids == context_stable_ids
)
query = query.filter(StableLabel.annotator_name == annotator)
# If does not already exist, add label
if query.count() == 0:
session.add(StableLabel(
context_stable_ids=context_stable_ids,
annotator_name=annotator,
value=label
))
# Commit session
session.commit()
# Reload annotator labels
reload_annotator_labels(session, candidate_class, annotator,
rela_type, arg1, arg2 = annotations[name][key]
rela = sorted([[annotations[name][arg1]["entity_type"], spans[arg1]],
[annotations[name][arg2]["entity_type"],spans[arg2]]])
stable_labels_by_type[rela_type].append("~~".join(zip(*rela)[1]))
# create stable labels
# NOTE: we store each label class type in a different split so that it is compatible with
# the current version of 'reload_annotator_labels', where we create candidates by split id
for i, class_type in enumerate(stable_labels_by_type):
for context_stable_id in stable_labels_by_type[class_type]:
query = self.session.query(StableLabel).filter(StableLabel.context_stable_ids == context_stable_id)
query = query.filter(StableLabel.annotator_name == annotator_name)
if query.count() != 0:
continue
self.session.add(StableLabel(context_stable_ids=context_stable_id, split=i,
annotator_name=annotator_name, value=1))
abs_offsets = {}
entity_types = defaultdict(list)
for i, class_type in enumerate(stable_labels_by_type):
if class_type in self.subclasses:
class_name = self.subclasses[class_type]
else:
class_name = self.subclasses[self._get_normed_rela_name(class_type)]
for et in stable_labels_by_type[class_type]:
contexts = et.split('~~')
spans = []
for c,et in zip(contexts,class_name.__argnames__):
with open(label_fname, 'rb') as f:
relations = load(f)
# Get split candidates
candidates = session.query(candidate_class).filter(
candidate_class.split == split
).all()
for c in candidates:
# Get the label by mapping document annotations to mentions
doc_relations = relations.get(c.get_parent().get_parent().name, set())
label = 2 * int(c.get_cids() in doc_relations) - 1
# Get stable ids and check to see if label already exits
context_stable_ids = '~~'.join(x.get_stable_id() for x in c)
query = session.query(StableLabel).filter(
StableLabel.context_stable_ids == context_stable_ids
)
query = query.filter(StableLabel.annotator_name == annotator)
# If does not already exist, add label
if query.count() == 0:
session.add(StableLabel(
context_stable_ids=context_stable_ids,
annotator_name=annotator,
value=label
))
# Commit session
session.commit()
# Reload annotator labels
reload_annotator_labels(session, candidate_class, annotator,
split=split, filter_label_split=False)
# We check if the label already exists, in case this cell was already executed
context_stable_ids = "~~".join([row['person1'], row['person2']])
query = session.query(StableLabel).filter(StableLabel.context_stable_ids == context_stable_ids)
query = query.filter(StableLabel.annotator_name == annotator_name)
if query.count() == 0:
session.add(StableLabel(
context_stable_ids=context_stable_ids,
annotator_name=annotator_name,
value=row['label']))
# Because it's a symmetric relation, load both directions...
context_stable_ids = "~~".join([row['person2'], row['person1']])
query = session.query(StableLabel).filter(StableLabel.context_stable_ids == context_stable_ids)
query = query.filter(StableLabel.annotator_name == annotator_name)
if query.count() == 0:
session.add(StableLabel(
context_stable_ids=context_stable_ids,
annotator_name=annotator_name,
value=row['label']))
# Commit session
session.commit()
# Reload annotator labels
reload_annotator_labels(session, candidate_class, annotator_name, split=1, filter_label_split=False)
reload_annotator_labels(session, candidate_class, annotator_name, split=2, filter_label_split=False)
def reload_annotator_labels(session, candidate_class, annotator_name, split, filter_label_split=True, create_missing_cands=False):
"""Reloads stable annotator labels into the AnnotatorLabel table"""
# Sets up the AnnotatorLabelKey to use
ak = session.query(GoldLabelKey).filter(GoldLabelKey.name == annotator_name).first()
if ak is None:
ak = GoldLabelKey(name=annotator_name)
session.add(ak)
session.commit()
labels = []
missed = []
sl_query = session.query(StableLabel).filter(StableLabel.annotator_name == annotator_name)
sl_query = sl_query.filter(StableLabel.split == split) if filter_label_split else sl_query
for sl in sl_query.all():
context_stable_ids = sl.context_stable_ids.split('~~')
# Check for labeled Contexts
# TODO: Does not create the Contexts if they do not yet exist!
contexts = []
for stable_id in context_stable_ids:
context = session.query(Context).filter(Context.stable_id == stable_id).first()
if context:
contexts.append(context)
if len(contexts) < len(context_stable_ids):
missed.append(sl)
continue
# Check for Candidate
def load_external_labels(session, candidate_class, annotator_name='gold'):
gold_labels = pd.read_csv(FPATH, sep="\t")
for index, row in gold_labels.iterrows():
# We check if the label already exists, in case this cell was already executed
context_stable_ids = "~~".join([row['person1'], row['person2']])
query = session.query(StableLabel).filter(StableLabel.context_stable_ids == context_stable_ids)
query = query.filter(StableLabel.annotator_name == annotator_name)
if query.count() == 0:
session.add(StableLabel(
context_stable_ids=context_stable_ids,
annotator_name=annotator_name,
value=row['label']))
# Because it's a symmetric relation, load both directions...
context_stable_ids = "~~".join([row['person2'], row['person1']])
query = session.query(StableLabel).filter(StableLabel.context_stable_ids == context_stable_ids)
query = query.filter(StableLabel.annotator_name == annotator_name)
if query.count() == 0:
session.add(StableLabel(
context_stable_ids=context_stable_ids,
annotator_name=annotator_name,
value=row['label']))