How to use the alephclient.services.entityextract_pb2.ExtractedEntity.ORGANIZATION function in alephclient

To help you get started, we’ve selected a few alephclient examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alephdata / aleph / services / extract-entities / entityextractor / result.py View on Github external
return self.label


class NamedResult(Result):
    """Any entity extracted that has a human-style name."""
    strict = False

    def __init__(self, ctx, label, start, end):
        label = self.clean_name(label)
        super(NamedResult, self).__init__(ctx, label, start, end)
        self.key = self.label_key(self.label)
        self.valid = self.key is not None


class OrganizationResult(NamedResult):
    category = ExtractedEntity.ORGANIZATION

    def __init__(self, ctx, label, start, end):
        super(OrganizationResult, self).__init__(ctx, label, start, end)
        if self.valid and ' ' not in self.label:
            self.valid = False


class PersonResult(NamedResult):
    category = ExtractedEntity.PERSON

    def __init__(self, ctx, label, start, end):
        super(PersonResult, self).__init__(ctx, label, start, end)
        if self.valid and ' ' not in self.label:
            self.valid = False
github alephdata / aleph / aleph / analyze / extract_entity.py View on Github external
from aleph import settings
from aleph.services import ServiceClientMixin
from aleph.analyze.analyzer import EntityAnalyzer, TextIterator
from aleph.model import DocumentTag, DocumentTagCollector

log = logging.getLogger(__name__)
TYPE = ExtractedEntity.Type.Value


class EntityExtractor(EntityAnalyzer, TextIterator, ServiceClientMixin):
    SERVICE = settings.ENTITIES_SERVICE
    ORIGIN = 'ner'
    TYPES = {
        ExtractedEntity.PERSON: DocumentTag.TYPE_PERSON,
        ExtractedEntity.ORGANIZATION: DocumentTag.TYPE_ORGANIZATION,
        ExtractedEntity.COMPANY: DocumentTag.TYPE_ORGANIZATION,
    }

    def __init__(self):
        self.active = self.has_channel()

    def extract(self, collector, document):
        DocumentTagCollector(document, 'polyglot').save()
        DocumentTagCollector(document, 'spacy').save()
        try:
            service = EntityExtractStub(self.channel)
            texts = self.text_iterator(document)
            entities = service.Extract(texts)
            for entity in entities.entities:
                type_ = self.TYPES.get(entity.type)
                if type_ is None:
github alephdata / aleph / services / extract-entities / entityextractor / service.py View on Github external
log = logging.getLogger('service')

# POLYGLOT_PATH = os.environ.get('POLYGLOT_DATA_PATH')
# POLYGLOT_NER_PATH = os.path.join(POLYGLOT_PATH, 'polyglot_data/ner2')
# POLYGLOT_LANGUAGES = os.listdir(POLYGLOT_NER_PATH)
# POLYGLOT_TYPES = {
#     'I-PER': ExtractedEntity.PERSON,
#     'I-ORG': ExtractedEntity.ORGANIZATION,
#     'I-LOC': ExtractedEntity.LOCATION
# }

# https://spacy.io/api/annotation#named-entities
SPACY_TYPES = {
    'PER': ExtractedEntity.PERSON,
    'PERSON': ExtractedEntity.PERSON,
    'ORG': ExtractedEntity.ORGANIZATION,
    'LOC': ExtractedEntity.LOCATION,
    'GPE': ExtractedEntity.LOCATION
}


class EntityServicer(EntityExtractServicer):

    def __init__(self):
        log.info("Loading spaCy model xx...")
        self.spacy = spacy.load('xx')

    # def extract_polyglot(self, text):
    #     try:
    #         parsed = Text(text)
    #         lang = parsed.language
    #         if lang.confidence > 90:
github alephdata / aleph / services / extract-entities / entityextractor / service.py View on Github external
import logging
from polyglot.text import Text
from concurrent import futures
from alephclient.services.entityextract_pb2_grpc import (
    add_EntityExtractServicer_to_server, EntityExtractServicer
)
from alephclient.services.entityextract_pb2 import ExtractedEntity

log = logging.getLogger('service')

POLYGLOT_PATH = os.environ.get('POLYGLOT_DATA_PATH')
POLYGLOT_NER_PATH = os.path.join(POLYGLOT_PATH, 'polyglot_data/ner2')
POLYGLOT_LANGUAGES = os.listdir(POLYGLOT_NER_PATH)
POLYGLOT_TYPES = {
    'I-PER': ExtractedEntity.PERSON,
    'I-ORG': ExtractedEntity.ORGANIZATION,
    'I-LOC': ExtractedEntity.LOCATION
}

# https://spacy.io/api/annotation#named-entities
SPACY_TYPES = {
    'PER': ExtractedEntity.PERSON,
    'PERSON': ExtractedEntity.PERSON,
    'ORG': ExtractedEntity.ORGANIZATION,
    'LOC': ExtractedEntity.LOCATION,
    'GPE': ExtractedEntity.LOCATION
}


class EntityServicer(EntityExtractServicer):

    def __init__(self):
github alephdata / aleph / aleph / analyze / extract_entity.py View on Github external
from aleph import settings
from aleph.services import ServiceClientMixin
from aleph.analyze.analyzer import EntityAnalyzer, TextIterator
from aleph.model import DocumentTag, DocumentTagCollector

log = logging.getLogger(__name__)
TYPE = ExtractedEntity.Type.Value


class EntityExtractor(EntityAnalyzer, TextIterator, ServiceClientMixin):
    SERVICE = settings.ENTITIES_SERVICE
    ORIGIN = 'ner'
    TYPES = {
        ExtractedEntity.PERSON: DocumentTag.TYPE_PERSON,
        ExtractedEntity.ORGANIZATION: DocumentTag.TYPE_ORGANIZATION,
        ExtractedEntity.COMPANY: DocumentTag.TYPE_ORGANIZATION,
    }

    def __init__(self):
        self.active = self.has_channel()

    def extract(self, collector, document):
        DocumentTagCollector(document, 'polyglot').save()
        DocumentTagCollector(document, 'spacy').save()
        try:
            service = EntityExtractStub(self.channel)
            texts = self.text_iterator(document)
            entities = service.Extract(texts)
            for entity in entities.entities:
                type_ = self.TYPES.get(entity.type)
                if type_ is None: