Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def aleph_emit(context, data):
if not settings.ALEPH_HOST:
context.log.warning("No $MEMORIOUS_ALEPH_HOST, skipping upload...")
return
if not settings.ALEPH_API_KEY:
context.log.warning("No $MEMORIOUS_ALEPH_API_KEY, skipping upload...")
return
session_id = 'memorious:%s' % context.crawler.name
api = AlephAPI(settings.ALEPH_HOST, settings.ALEPH_API_KEY,
session_id=session_id)
collection_id = get_collection_id(context, api)
if collection_id is None:
context.log.warning("Cannot get aleph collection.")
return
content_hash = data.get('content_hash')
source_url = data.get('source_url', data.get('url'))
foreign_id = data.get('foreign_id', data.get('request_id', source_url))
if context.skip_incremental(collection_id, foreign_id, content_hash):
context.log.info("Skip aleph upload: %s", foreign_id)
return
meta = {
'crawler': context.crawler.name,
'foreign_id': foreign_id,
def __init__(self):
self.api = AlephAPI()
def load_aleph(foreign_id, api_url, api_key):
api = AlephAPI(api_url, api_key)
collection_id = None
if foreign_id is not None:
collection = api.get_collection_by_foreign_id(foreign_id)
if collection is None:
raise click.BadParameter("Cannot find collection: %s" % foreign_id)
collection_id = collection.get('id')
stdout = click.get_text_stream('stdout')
entities = api.stream_entities(collection_id=collection_id,
include=['schema', 'properties'])
for data in entities:
if 'properties' not in data:
continue
entity = model.get_proxy(data)
api_url = api._make_url('entities/%s' % entity.id)
entity.add('alephUrl', api_url, quiet=True)