How to use the alephclient.services.entityextract_pb2.ExtractedEntity.PERSON function in alephclient

To help you get started, we’ve selected a few alephclient examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alephdata / aleph / services / extract-entities / entityextractor / normalize.py View on Github external
def clean_label(text, category=None):
    if text is None or len(text) > MAX_LENGTH:
        return
    match = CLEANUP.match(text)
    if match is not None:
        text = match.group('term')
    text = collapse_spaces(text)
    if not len(text) or len(text) < MIN_LENGTH:
        return
    if category in (None, ExtractedEntity.PERSON) and ' ' not in text:
        return
    return text
github alephdata / aleph / services / extract-entities / entityextractor / service.py View on Github external
log = logging.getLogger('service')

POLYGLOT_PATH = os.environ.get('POLYGLOT_DATA_PATH')
POLYGLOT_NER_PATH = os.path.join(POLYGLOT_PATH, 'polyglot_data/ner2')
POLYGLOT_LANGUAGES = os.listdir(POLYGLOT_NER_PATH)
POLYGLOT_TYPES = {
    'I-PER': ExtractedEntity.PERSON,
    'I-ORG': ExtractedEntity.ORGANIZATION,
    'I-LOC': ExtractedEntity.LOCATION
}

# https://spacy.io/api/annotation#named-entities
SPACY_TYPES = {
    'PER': ExtractedEntity.PERSON,
    'PERSON': ExtractedEntity.PERSON,
    'ORG': ExtractedEntity.ORGANIZATION,
    'LOC': ExtractedEntity.LOCATION,
    'GPE': ExtractedEntity.LOCATION
}


class EntityServicer(EntityExtractServicer):

    def __init__(self):
        log.info("Loading spaCy model xx...")
        self.spacy = spacy.load('xx')

    def extract_polyglot(self, text):
        try:
            parsed = Text(text)
            lang = parsed.language
github alephdata / aleph / aleph / analyze / extract_entity.py View on Github external
from alephclient.services.entityextract_pb2 import ExtractedEntity

from aleph import settings
from aleph.services import ServiceClientMixin
from aleph.analyze.analyzer import EntityAnalyzer, TextIterator
from aleph.model import DocumentTag, DocumentTagCollector

log = logging.getLogger(__name__)
TYPE = ExtractedEntity.Type.Value


class EntityExtractor(EntityAnalyzer, TextIterator, ServiceClientMixin):
    SERVICE = settings.ENTITIES_SERVICE
    ORIGIN = 'ner'
    TYPES = {
        ExtractedEntity.PERSON: DocumentTag.TYPE_PERSON,
        ExtractedEntity.ORGANIZATION: DocumentTag.TYPE_ORGANIZATION,
        ExtractedEntity.COMPANY: DocumentTag.TYPE_ORGANIZATION,
    }

    def __init__(self):
        self.active = self.has_channel()

    def extract(self, collector, document):
        DocumentTagCollector(document, 'polyglot').save()
        DocumentTagCollector(document, 'spacy').save()
        try:
            service = EntityExtractStub(self.channel)
            texts = self.text_iterator(document)
            entities = service.Extract(texts)
            for entity in entities.entities:
                type_ = self.TYPES.get(entity.type)
github alephdata / aleph / aleph / analyze / extract_entity.py View on Github external
from alephclient.services.entityextract_pb2 import ExtractedEntity

from aleph import settings
from aleph.services import ServiceClientMixin
from aleph.analyze.analyzer import EntityAnalyzer, TextIterator
from aleph.model import DocumentTag, DocumentTagCollector

log = logging.getLogger(__name__)
TYPE = ExtractedEntity.Type.Value


class EntityExtractor(EntityAnalyzer, TextIterator, ServiceClientMixin):
    SERVICE = settings.ENTITIES_SERVICE
    ORIGIN = 'ner'
    TYPES = {
        ExtractedEntity.PERSON: DocumentTag.TYPE_PERSON,
        ExtractedEntity.ORGANIZATION: DocumentTag.TYPE_ORGANIZATION,
        ExtractedEntity.COMPANY: DocumentTag.TYPE_ORGANIZATION,
    }

    def __init__(self):
        self.active = self.has_channel()

    def extract(self, collector, document):
        DocumentTagCollector(document, 'polyglot').save()
        DocumentTagCollector(document, 'spacy').save()
        try:
            service = EntityExtractStub(self.channel)
            texts = self.text_iterator(document)
            entities = service.Extract(texts)
            for entity in entities.entities:
                type_ = self.TYPES.get(entity.type)
github alephdata / aleph / services / extract-entities / entityextractor / result.py View on Github external
super(NamedResult, self).__init__(ctx, label, start, end)
        self.key = self.label_key(self.label)
        self.valid = self.key is not None


class OrganizationResult(NamedResult):
    category = ExtractedEntity.ORGANIZATION

    def __init__(self, ctx, label, start, end):
        super(OrganizationResult, self).__init__(ctx, label, start, end)
        if self.valid and ' ' not in self.label:
            self.valid = False


class PersonResult(NamedResult):
    category = ExtractedEntity.PERSON

    def __init__(self, ctx, label, start, end):
        super(PersonResult, self).__init__(ctx, label, start, end)
        if self.valid and ' ' not in self.label:
            self.valid = False


class LocationResult(NamedResult):
    """Locations are being mapped to countries."""
    resolver = LocationResolver()
    category = ExtractedEntity.LOCATION

    def __init__(self, ctx, label, start, end):
        super(LocationResult, self).__init__(ctx, label, start, end)
        self.countries = self.resolver.get_countries(label)
github alephdata / aleph / services / extract-entities / entityextractor / service.py View on Github external
from alephclient.services.entityextract_pb2 import ExtractedEntity

log = logging.getLogger('service')

# POLYGLOT_PATH = os.environ.get('POLYGLOT_DATA_PATH')
# POLYGLOT_NER_PATH = os.path.join(POLYGLOT_PATH, 'polyglot_data/ner2')
# POLYGLOT_LANGUAGES = os.listdir(POLYGLOT_NER_PATH)
# POLYGLOT_TYPES = {
#     'I-PER': ExtractedEntity.PERSON,
#     'I-ORG': ExtractedEntity.ORGANIZATION,
#     'I-LOC': ExtractedEntity.LOCATION
# }

# https://spacy.io/api/annotation#named-entities
SPACY_TYPES = {
    'PER': ExtractedEntity.PERSON,
    'PERSON': ExtractedEntity.PERSON,
    'ORG': ExtractedEntity.ORGANIZATION,
    'LOC': ExtractedEntity.LOCATION,
    'GPE': ExtractedEntity.LOCATION
}


class EntityServicer(EntityExtractServicer):

    def __init__(self):
        log.info("Loading spaCy model xx...")
        self.spacy = spacy.load('xx')

    # def extract_polyglot(self, text):
    #     try:
    #         parsed = Text(text)