Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_canonical(resolver):
"""Tests that entity resolution works for a canonical entity in the map"""
expected = {"id": "2", "cname": "Pine and Market"}
predicted = resolver.predict(Entity("Pine and Market", ENTITY_TYPE))[0]
assert predicted["id"] == expected["id"]
assert predicted["cname"] == expected["cname"]
def test_entity_equality():
"""Tests entity equality"""
entity_a = Entity("text", "type", "role", "value", "display")
entity_b = Entity("text", "type", "role", "value", "display")
assert entity_a == entity_b
def test_entity_equality():
"""Tests entity equality"""
entity_a = Entity("text", "type", "role", "value", "display")
entity_b = Entity("text", "type", "role", "value", "display")
assert entity_a == entity_b
def test_synonym(resolver):
"""Tests that entity resolution works for an entity synonym in the map"""
expected = {"id": "2", "cname": "Pine and Market"}
predicted = resolver.predict(Entity("Pine St", ENTITY_TYPE))[0]
assert predicted["id"] == expected["id"]
assert predicted["cname"] == expected["cname"]
# Special handling of time dimension grain
if dimension == DucklingDimension.TIME.value:
if type_ == "value":
value["grain"] = item["value"].get("grain")
elif type_ == "interval":
# Want to predict time intervals as sys_interval
num_type = "interval"
if "from" in item["value"]:
value["grain"] = item["value"]["from"].get("grain")
elif "to" in item["value"]:
value["grain"] = item["value"]["to"].get("grain")
entity_type = "sys_{}".format(num_type)
return Entity(item["body"], entity_type, value=value)
msg = "Group between {} and {} missing children".format(
ann["start"], ann["end"]
)
raise MarkupError(msg) from exc
entity = head.with_children(children)
entities.remove(head)
entities.append(entity)
if ann.get("parent"):
parent = ann.get("parent")
children = parent.get("children", [])
children.append(entity)
parent["children"] = children
if ann["ann_type"] == "entity":
span = Span(ann["start"], ann["end"])
if Entity.is_system_entity(ann["type"]):
try:
raw_entity = resolve_system_entity(query, ann["type"], span).entity
except SystemEntityResolutionError as e:
logger.warning("Unable to load query: %s", e)
return
try:
raw_entity.role = ann["role"]
except KeyError:
pass
else:
try:
value = {"children": ann["children"]}
except KeyError:
value = None
raw_entity = Entity(
ann["text"], ann["type"], role=ann.get("role"), value=value
def __init__(
self, app_path, resource_loader, entity_type, es_host=None, es_client=None
):
"""Initializes an entity resolver
Args:
app_path (str): The application path
resource_loader (ResourceLoader): An object which can load resources for the resolver
entity_type: The entity type associated with this entity resolver
es_host (str): The Elasticsearch host server
"""
self._app_namespace = get_app_namespace(app_path)
self._resource_loader = resource_loader
self._normalizer = resource_loader.query_factory.normalize
self.type = entity_type
self._is_system_entity = Entity.is_system_entity(self.type)
self._exact_match_mapping = None
self._er_config = get_classifier_config("entity_resolution", app_path=app_path)
self._es_host = es_host
self._es_config = {"client": es_client, "pid": os.getpid()}
msg = (
"Your trained models are incompatible with this version of MindMeld. "
"Please run a clean build to retrain models"
)
raise ClassifierLoadError(msg)
try:
self._model.config.to_dict()
except AttributeError:
# Loaded model config is incompatible with app config.
self._model.config.resolve_config(self._get_model_config())
gazetteers = self._resource_loader.get_gazetteers()
tokenizer = self._resource_loader.get_tokenizer()
sys_types = set(
(t for t in self.entity_types if Entity.is_system_entity(t))
)
w_ngram_freq = er_data.get("w_ngram_freq")
c_ngram_freq = er_data.get("c_ngram_freq")
self._model.register_resources(
gazetteers=gazetteers,
sys_types=sys_types,
w_ngram_freq=w_ngram_freq,
c_ngram_freq=c_ngram_freq,
tokenizer=tokenizer,
)
self.config = ClassifierConfig.from_model_config(self._model.config)
self.hash = self._load_hash(model_path)