How to use the scrapy.signals function in Scrapy

To help you get started, we’ve selected a few Scrapy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scrapy-plugins / scrapy-pagestorage / tests / test_pagestorage.py View on Github external
def test_writer_closed_on_spider_closed_signal(self):
        self.crawler_mock.signals.connect.assert_called_once_with(
            self.instance.spider_closed,
            signal=signals.spider_closed
        )
        with mock.patch.object(self.instance, '_writer') as writer_mock:
            self.instance.spider_closed(self.spider)
        writer_mock.close.assert_called_once_with()
github scrapy / scrapy / scrapy / contrib / pipeline / images.py View on Github external
def __init__(self, basedir):
        if '://' in basedir:
            basedir = basedir.split('://', 1)[1]
        self.basedir = basedir
        self._mkdir(self.basedir)
        self.created_directories = defaultdict(set)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
github scrapy / scrapy / scrapy / contrib / resolver.py View on Github external
def __init__(self):
        self.spider_hostnames = defaultdict(set)
        self.resolver = _CachingThreadedResolver(reactor)
        reactor.installResolver(self.resolver)
        dispatcher.connect(self.request_received, signals.request_received)
        dispatcher.connect(self.spider_closed, signal=signals.spider_closed)
github dangsh / hive / scrapySpider / bilibili / bilibili / middlewares.py View on Github external
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
github foolcage / fooltrader / fooltrader / spiders / chinafuture / shfe_trading_calendar_spider.py View on Github external
def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(ShfeTradingCalendarSpider, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
        return spider
github wattlebird / Bangumi_Spider / bgmapi / middlewares.py View on Github external
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
github ooclab / remotex / OLD / PyBots / PyBots / middlewares.py View on Github external
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
github hl10502 / scrapy_site / scrapy_site / pipelines.py View on Github external
def __init__(self):
        dispatcher.connect(self.spider_opended, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        dispatcher.connect(self.engine_stopped, signals.engine_stopped)
        dispatcher.connect(self.engine_started, signals.engine_started)

        # 获取当前目录,当前目录为scrapy_site项目的根目录
        self.curpath = os.getcwd()
        #爬虫爬取的数据存储文件目录
        self.spidername_filepath = self.curpath + "/scrapy_site/msg/"

        # 从配置文件keyword.conf获取关键字字典值
        self.keywordsDict = dict()
        self.getKeywords()

        #从配置文件中读取网站名称
        self.webnamesDict = dict()
        self.getWebnames()

        # 爬取信息
github vipulgupta2048 / scrape / scrapeNews / scrapeNews / middlewares.py View on Github external
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s