How to use queuelib - 10 common examples

To help you get started, we’ve selected a few queuelib examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scrapy / scrapy / tests / test_squeues.py View on Github external
chunksize = 1


class ChunkSize2PickleFifoDiskQueueTest(PickleFifoDiskQueueTest):
    chunksize = 2


class ChunkSize3PickleFifoDiskQueueTest(PickleFifoDiskQueueTest):
    chunksize = 3


class ChunkSize4PickleFifoDiskQueueTest(PickleFifoDiskQueueTest):
    chunksize = 4


class MarshalLifoDiskQueueTest(t.LifoDiskQueueTest):

    def queue(self):
        return MarshalLifoDiskQueue(self.qpath)

    def test_serialize(self):
        q = self.queue()
        q.push('a')
        q.push(123)
        q.push({'a': 'dict'})
        self.assertEqual(q.pop(), {'a': 'dict'})
        self.assertEqual(q.pop(), 123)
        self.assertEqual(q.pop(), 'a')

    test_nonserializable_object = nonserializable_object_test
github scrapy / scrapy / tests / test_squeues.py View on Github external
from scrapy.squeues import MarshalFifoDiskQueue, MarshalLifoDiskQueue, PickleFifoDiskQueue, PickleLifoDiskQueue
from scrapy.item import Item, Field
from scrapy.http import Request
from scrapy.loader import ItemLoader

class TestItem(Item):
    name = Field()

def _test_procesor(x):
    return x + x

class TestLoader(ItemLoader):
    default_item_class = TestItem
    name_out = staticmethod(_test_procesor)

class MarshalFifoDiskQueueTest(t.FifoDiskQueueTest):

    chunksize = 100000

    def queue(self):
        return MarshalFifoDiskQueue(self.qpath, chunksize=self.chunksize)

    def test_serialize(self):
        q = self.queue()
        q.push('a')
        q.push(123)
        q.push({'a': 'dict'})
        self.assertEqual(q.pop(), 'a')
        self.assertEqual(q.pop(), 123)
        self.assertEqual(q.pop(), {'a': 'dict'})

    def test_nonserializable_object(self):
github scrapy / scrapy / tests / test_squeues.py View on Github external
assert r2.meta['request'] is r2

class ChunkSize1PickleFifoDiskQueueTest(PickleFifoDiskQueueTest):
    chunksize = 1

class ChunkSize2PickleFifoDiskQueueTest(PickleFifoDiskQueueTest):
    chunksize = 2

class ChunkSize3PickleFifoDiskQueueTest(PickleFifoDiskQueueTest):
    chunksize = 3

class ChunkSize4PickleFifoDiskQueueTest(PickleFifoDiskQueueTest):
    chunksize = 4


class MarshalLifoDiskQueueTest(t.LifoDiskQueueTest):

    def queue(self):
        return MarshalLifoDiskQueue(self.qpath)

    def test_serialize(self):
        q = self.queue()
        q.push('a')
        q.push(123)
        q.push({'a': 'dict'})
        self.assertEqual(q.pop(), {'a': 'dict'})
        self.assertEqual(q.pop(), 123)
        self.assertEqual(q.pop(), 'a')

    def test_nonserializable_object(self):
        # Trigger Twisted bug #7989
        import twisted.persisted.styles  # NOQA
github scrapy / scrapy / tests / test_squeues.py View on Github external
except Exception:
        # Trigger Twisted bug #7989
        import twisted.persisted.styles  # NOQA
        self.assertRaises(ValueError, q.push, lambda x: x)
    else:
        # Use a different unpickleable object
        class A(object): pass
        a = A()
        a.__reduce__ = a.__reduce_ex__ = None
        self.assertRaises(ValueError, q.push, a)
    # Selectors should fail (lxml.html.HtmlElement objects can't be pickled)
    sel = Selector(text='<p>some text</p>')
    self.assertRaises(ValueError, q.push, sel)


class MarshalFifoDiskQueueTest(t.FifoDiskQueueTest):

    chunksize = 100000

    def queue(self):
        return MarshalFifoDiskQueue(self.qpath, chunksize=self.chunksize)

    def test_serialize(self):
        q = self.queue()
        q.push('a')
        q.push(123)
        q.push({'a': 'dict'})
        self.assertEqual(q.pop(), 'a')
        self.assertEqual(q.pop(), 123)
        self.assertEqual(q.pop(), {'a': 'dict'})

    test_nonserializable_object = nonserializable_object_test
github openrightsgroup / Blocking-Middleware / backend / queue-services / categoryimporter.py View on Github external
import time
import psycopg2
import logging

import queuelib
from queuelib import QueueService

import urlparse
import requests

class CategoryImporter(QueueService):
    CATEGORIFY_API = 'https://categorify.org/api'
    QUEUE_NAME = 'category'

    def setup_bindings(self):
        self.ch.queue_declare("category", durable=True, auto_delete=False)
        self.ch.queue_bind("category", "org.blocked", "url.org")
        self.ch.queue_bind("category", "org.blocked", "url.public")
        self.session = requests.session()

    def process_message(self, data):
        url = data['url']
        parsed_url = urlparse.urlparse(url)
        domain = parsed_url.netloc.lower()
        if domain.startswith('www.'):
            domain = domain.split('.', 1)[-1]
github wcong / ants / ants / core / scheduler.py View on Github external
def open(self, spider):
        self.spider = spider
        self.mqs = PriorityQueue(self._newmq)
        self.dqs = self._dq() if self.dqdir else None
        return self.df.open()
github wcong / ants / ants / core / scheduler.py View on Github external
def _dq(self):
        activef = join(self.dqdir, 'active.json')
        if exists(activef):
            with open(activef) as f:
                prios = json.load(f)
        else:
            prios = ()
        q = PriorityQueue(self._newdq, startprios=prios)
        if q:
            log.msg(format="Resuming crawl (%(queuesize)d requests scheduled)",
                    spider=self.spider, queuesize=len(q))
        return q
github scrapy / scrapy / scrapy / pqueues.py View on Github external
if slot not in self.pqueues:
            self.pqueues[slot] = self.pqfactory()
        queue = self.pqueues[slot]
        queue.push(obj, priority)

    def close(self):
        active = {slot: queue.close()
                  for slot, queue in self.pqueues.items()}
        self.pqueues.clear()
        return active

    def __len__(self):
        return sum(len(x) for x in self.pqueues.values()) if self.pqueues else 0


class ScrapyPriorityQueue(PriorityQueue):
    """
    PriorityQueue which works with scrapy.Request instances and
    can optionally convert them to/from dicts before/after putting to a queue.
    """
    def __init__(self, crawler, qfactory, startprios=(), serialize=False):
        super(ScrapyPriorityQueue, self).__init__(qfactory, startprios)
        self.serialize = serialize
        self.spider = crawler.spider

    @classmethod
    def from_crawler(cls, crawler, qfactory, startprios=(), serialize=False):
        return cls(crawler, qfactory, startprios, serialize)

    def push(self, request, priority=0):
        if self.serialize:
            request = request_to_dict(request, self.spider)
github wcong / ants / ants / crawl / scheduler.py View on Github external
def __init__(self, settings):
        self.settings = settings
        self.mq_class = load_object(settings['SCHEDULER_MEMORY_QUEUE'])
        self.mqs = PriorityQueue(self.priority)
        self.status = ScheduleStatus()
github openrightsgroup / Blocking-Middleware / backend / queue-services / cf-probe.py View on Github external
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--verbose','-v', action='store_true')
    parser.add_argument('queue', nargs='?')
    args = parser.parse_args()
    queuelib.setup_logging()
    if args.queue:
        CloudflareProbe.QUEUE_NAME = CloudflareProbe.QUEUE_NAME.replace('.org', '.'+args.queue)
    cfprobe = CloudflareProbe()
    logging.info("Listening on: %s", cfprobe.QUEUE_NAME)
    cfprobe.run()