How to use the followthemoney.model.get function in followthemoney

To help you get started, we’ve selected a few followthemoney examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alephdata / aleph / services / ingest-file / ingestors / documents / ooxml.py View on Github external
def ingest(self, file_path, entity):
        """Ingestor implementation."""
        entity.schema = model.get('Pages')
        self.ooxml_extract_metadata(file_path, entity)
        pdf_path = self.document_to_pdf(file_path, entity)
        self.pdf_alternative_extract(entity, pdf_path)
github alephdata / aleph / services / ingest-file / ingestors / manager.py View on Github external
def make_entity(self, schema, parent=None):
        schema = model.get(schema)
        prefix = self.stage.job.dataset.name
        entity = model.make_entity(schema, key_prefix=prefix)
        self.make_child(parent, entity)
        return entity
github alephdata / aleph / aleph / views / reconcile_api.py View on Github external
def suggest_property():
    prefix = request.args.get('prefix', '').lower().strip()
    tag_request(prefix=prefix)
    schema = request.args.get('schema', Entity.THING)
    matches = []
    for prop in model.get(schema).properties.values():
        match = not len(prefix)
        match = prefix in prop.name.lower()
        match = match or prefix in prop.label.lower()
        if match:
            matches.append({
                'id': prop.name,
                'quid': prop.name,
                'name': prop.label,
                'r:score': 100,
                'n:type': {
                    'id': '/properties/property',
                    'name': 'Property'
                }
            })
    return jsonify({
        "code": "/api/status/ok",
github alephdata / aleph / services / ingest-file / ingestors / tabular / xlsx.py View on Github external
def ingest(self, file_path, entity):
        entity.schema = model.get('Workbook')
        self.ooxml_extract_metadata(file_path, entity)
        try:
            book = load_workbook(file_path, read_only=True)
        except Exception as err:
            raise ProcessingException('Invalid Excel file: %s' % err)

        try:
            for name in book.sheetnames:
                table = self.manager.make_entity('Table', parent=entity)
                table.make_id(entity.id, name)
                table.set('title', name)
                log.debug('Sheet: %s', name)
                self.emit_row_tuples(table, self.generate_rows(book[name]))
                if table.has('csvHash'):
                    self.manager.emit_entity(table)
        except Exception as err:
github alephdata / aleph / aleph / model / entity.py View on Github external
def model(self):
        return model.get(self.schema)
github alephdata / aleph / services / ingest-file / ingestors / directory.py View on Github external
def ingest(self, file_path, entity):
        """Ingestor implementation."""
        if entity.schema == model.get('Document'):
            entity.schema = model.get('Folder')

        if file_path is None or not file_path.is_dir():
            return

        self.crawl(self.manager, file_path, parent=entity)
github alephdata / opensanctions / opensanctions / crawlers / gb_hmt_sanctions.py View on Github external
def parse_entry(emitter, group, rows):
    entity = emitter.make('LegalEntity')
    entity.make_id(group)
    sanction = emitter.make('Sanction')
    sanction.make_id(entity.id, 'Sanction')
    sanction.add('entity', entity)
    sanction.add('authority', 'HM Treasury Financial sanctions targets')
    sanction.add('country', 'gb')
    for row in rows:
        if row.pop('Group Type') == 'Individual':
            entity.schema = model.get('Person')
        row.pop('Alias Type', None)
        name1 = row.pop('Name 1')
        entity.add('firstName', name1, quiet=True)
        name2 = row.pop('Name 2')
        name3 = row.pop('Name 3')
        name4 = row.pop('Name 4')
        name5 = row.pop('Name 5')
        name6 = row.pop('Name 6')
        entity.add('lastName', name6, quiet=True)
        name = jointext(name1, name2, name3, name4, name5, name6)
        if not entity.has('name'):
            entity.add('name', name)
        else:
            entity.add('alias', name)
        entity.add('title', row.pop('Title'), quiet=True)
        sanction.add('program', row.pop('Regime'))
github alephdata / aleph / services / ingest-file / ingestors / email / msg.py View on Github external
def ingest(self, file_path, entity):
        entity.schema = model.get('Email')
        try:
            with open(file_path, 'rb') as fh:
                msg = email.message_from_binary_file(fh, policy=default)
        except (MessageError, ValueError, IndexError) as err:
            raise ProcessingException('Cannot parse email: %s' % err) from err

        self.extract_msg_headers(entity, msg)
        self.resolve_message_ids(entity)

        for part in msg.walk():
            self.parse_part(entity, part)
github alephdata / aleph / aleph / logic / graph / expand.py View on Github external
def _iter_value_entities(type_, value):
    query = {
        'query': {'term': {type_.group: value}},
        '_source': {'includes': ['schema', 'properties']}
    }
    schemata = model.get_type_schemata(type_)
    index = entities_read_index(schema=schemata)
    for res in scan(es, index=index, query=query):
        entity_id = res.get('_id')
        source = res.get('_source')
        properties = source.get('properties')
        schema = model.get(source.get('schema'))
        for prop in schema.properties.values():
            if prop.type != type_:
                continue
            values = properties.get(prop.name)
            values = type_.normalize_set(values)
            if value in values:
                yield entity_id, prop
github alephdata / aleph / services / ingest-file / ingestors / media / svg.py View on Github external
def ingest(self, file_path, entity):
        entity.schema = model.get('HyperText')
        html_body = self.read_file_decoded(entity, file_path)
        text = self.extract_html_content(entity, html_body)
        entity.add('bodyText', text)