Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def skip_incremental(self, *criteria):
"""Perform an incremental check on a set of criteria.
This can be used to execute a part of a crawler only once per an
interval (which is specified by the ``expire`` setting). If the
operation has already been performed (and should thus be skipped),
this will return ``True``. If the operation needs to be executed,
the returned value will be ``False``.
"""
if not self.incremental:
return False
# this is pure convenience, and will probably backfire at some point.
key = make_key(*criteria)
if key is None:
return False
if self.check_tag(key):
return True
self.set_tag(key, None)
return False
def save(cls, crawler, session):
session = pickle.dumps(session)
session = codecs.encode(session, 'base64')
key = sha1(session).hexdigest()[:15]
key = make_key(crawler, "session", key)
cls.conn.set(key, session, ex=QUEUE_EXPIRE)
return key
def record_operation_start(cls, crawler, run_id):
if not cls.conn.sismember(make_key(crawler, "runs"), run_id):
cls.conn.sadd(make_key(crawler, "runs"), run_id)
cls.conn.lpush(make_key(crawler, "runs_list"), run_id)
cls.conn.set(make_key("run", run_id, "start"), pack_now())
cls.conn.incr(make_key("run", run_id))
cls.conn.incr(make_key("run", run_id, "total_ops"))
def record_operation_start(cls, crawler, run_id):
if not cls.conn.sismember(make_key(crawler, "runs"), run_id):
cls.conn.sadd(make_key(crawler, "runs"), run_id)
cls.conn.lpush(make_key(crawler, "runs_list"), run_id)
cls.conn.set(make_key("run", run_id, "start"), pack_now())
cls.conn.incr(make_key("run", run_id))
cls.conn.incr(make_key("run", run_id, "total_ops"))
def set_tag(self, key, value):
data = dump_json(value)
key = make_key(self.crawler, "tag", key)
return conn.set(key, data, ex=self.crawler.expire)
def record_operation_start(cls, crawler, run_id):
if not cls.conn.sismember(make_key(crawler, "runs"), run_id):
cls.conn.sadd(make_key(crawler, "runs"), run_id)
cls.conn.lpush(make_key(crawler, "runs_list"), run_id)
cls.conn.set(make_key("run", run_id, "start"), pack_now())
cls.conn.incr(make_key("run", run_id))
cls.conn.incr(make_key("run", run_id, "total_ops"))
def find(cls, crawler, key):
value = cls.conn.get(make_key(crawler, "tag", key))
if value is not None:
return load_json(value)
def get(cls, crawler, key):
value = cls.conn.get(make_key(crawler, "session", key))
if value is not None:
session = codecs.decode(bytes(value, 'utf-8'), 'base64')
return pickle.loads(session)
def save(cls, crawler, key, value):
data = dump_json(value)
key = make_key(crawler, "tag", key)
cls.conn.set(key, data, ex=crawler.expire)
def check_tag(self, key):
return conn.exists(make_key(self.crawler, "tag", key))