Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
return self.label
class NamedResult(Result):
"""Any entity extracted that has a human-style name."""
strict = False
def __init__(self, ctx, label, start, end):
label = self.clean_name(label)
super(NamedResult, self).__init__(ctx, label, start, end)
self.key = self.label_key(self.label)
self.valid = self.key is not None
class OrganizationResult(NamedResult):
category = ExtractedEntity.ORGANIZATION
def __init__(self, ctx, label, start, end):
super(OrganizationResult, self).__init__(ctx, label, start, end)
if self.valid and ' ' not in self.label:
self.valid = False
class PersonResult(NamedResult):
category = ExtractedEntity.PERSON
def __init__(self, ctx, label, start, end):
super(PersonResult, self).__init__(ctx, label, start, end)
if self.valid and ' ' not in self.label:
self.valid = False
from aleph import settings
from aleph.services import ServiceClientMixin
from aleph.analyze.analyzer import EntityAnalyzer, TextIterator
from aleph.model import DocumentTag, DocumentTagCollector
log = logging.getLogger(__name__)
TYPE = ExtractedEntity.Type.Value
class EntityExtractor(EntityAnalyzer, TextIterator, ServiceClientMixin):
SERVICE = settings.ENTITIES_SERVICE
ORIGIN = 'ner'
TYPES = {
ExtractedEntity.PERSON: DocumentTag.TYPE_PERSON,
ExtractedEntity.ORGANIZATION: DocumentTag.TYPE_ORGANIZATION,
ExtractedEntity.COMPANY: DocumentTag.TYPE_ORGANIZATION,
}
def __init__(self):
self.active = self.has_channel()
def extract(self, collector, document):
DocumentTagCollector(document, 'polyglot').save()
DocumentTagCollector(document, 'spacy').save()
try:
service = EntityExtractStub(self.channel)
texts = self.text_iterator(document)
entities = service.Extract(texts)
for entity in entities.entities:
type_ = self.TYPES.get(entity.type)
if type_ is None:
log = logging.getLogger('service')
# POLYGLOT_PATH = os.environ.get('POLYGLOT_DATA_PATH')
# POLYGLOT_NER_PATH = os.path.join(POLYGLOT_PATH, 'polyglot_data/ner2')
# POLYGLOT_LANGUAGES = os.listdir(POLYGLOT_NER_PATH)
# POLYGLOT_TYPES = {
# 'I-PER': ExtractedEntity.PERSON,
# 'I-ORG': ExtractedEntity.ORGANIZATION,
# 'I-LOC': ExtractedEntity.LOCATION
# }
# https://spacy.io/api/annotation#named-entities
SPACY_TYPES = {
'PER': ExtractedEntity.PERSON,
'PERSON': ExtractedEntity.PERSON,
'ORG': ExtractedEntity.ORGANIZATION,
'LOC': ExtractedEntity.LOCATION,
'GPE': ExtractedEntity.LOCATION
}
class EntityServicer(EntityExtractServicer):
def __init__(self):
log.info("Loading spaCy model xx...")
self.spacy = spacy.load('xx')
# def extract_polyglot(self, text):
# try:
# parsed = Text(text)
# lang = parsed.language
# if lang.confidence > 90:
import logging
from polyglot.text import Text
from concurrent import futures
from alephclient.services.entityextract_pb2_grpc import (
add_EntityExtractServicer_to_server, EntityExtractServicer
)
from alephclient.services.entityextract_pb2 import ExtractedEntity
log = logging.getLogger('service')
POLYGLOT_PATH = os.environ.get('POLYGLOT_DATA_PATH')
POLYGLOT_NER_PATH = os.path.join(POLYGLOT_PATH, 'polyglot_data/ner2')
POLYGLOT_LANGUAGES = os.listdir(POLYGLOT_NER_PATH)
POLYGLOT_TYPES = {
'I-PER': ExtractedEntity.PERSON,
'I-ORG': ExtractedEntity.ORGANIZATION,
'I-LOC': ExtractedEntity.LOCATION
}
# https://spacy.io/api/annotation#named-entities
SPACY_TYPES = {
'PER': ExtractedEntity.PERSON,
'PERSON': ExtractedEntity.PERSON,
'ORG': ExtractedEntity.ORGANIZATION,
'LOC': ExtractedEntity.LOCATION,
'GPE': ExtractedEntity.LOCATION
}
class EntityServicer(EntityExtractServicer):
def __init__(self):
from aleph import settings
from aleph.services import ServiceClientMixin
from aleph.analyze.analyzer import EntityAnalyzer, TextIterator
from aleph.model import DocumentTag, DocumentTagCollector
log = logging.getLogger(__name__)
TYPE = ExtractedEntity.Type.Value
class EntityExtractor(EntityAnalyzer, TextIterator, ServiceClientMixin):
SERVICE = settings.ENTITIES_SERVICE
ORIGIN = 'ner'
TYPES = {
ExtractedEntity.PERSON: DocumentTag.TYPE_PERSON,
ExtractedEntity.ORGANIZATION: DocumentTag.TYPE_ORGANIZATION,
ExtractedEntity.COMPANY: DocumentTag.TYPE_ORGANIZATION,
}
def __init__(self):
self.active = self.has_channel()
def extract(self, collector, document):
DocumentTagCollector(document, 'polyglot').save()
DocumentTagCollector(document, 'spacy').save()
try:
service = EntityExtractStub(self.channel)
texts = self.text_iterator(document)
entities = service.Extract(texts)
for entity in entities.entities:
type_ = self.TYPES.get(entity.type)
if type_ is None: