How to use the scrapy.signals.spider_opened function in Scrapy

To help you get started, we’ve selected a few Scrapy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mozilla / spade / vendor / scrapy / contrib / closespider.py View on Github external
self.timeout = crawler.settings.getint('CLOSESPIDER_TIMEOUT')
        self.itemcount = crawler.settings.getint('CLOSESPIDER_ITEMCOUNT')
        self.pagecount = crawler.settings.getint('CLOSESPIDER_PAGECOUNT')
        self.errorcount = crawler.settings.getint('CLOSESPIDER_ERRORCOUNT')

        self.errorcounts = defaultdict(int)
        self.pagecounts = defaultdict(int)
        self.counts = defaultdict(int)
        self.tasks = {}

        if self.errorcount:
            txlog.addObserver(self.catch_log)
        if self.pagecount:
            crawler.signals.connect(self.page_count, signal=signals.response_received)
        if self.timeout:
            crawler.signals.connect(self.spider_opened, signal=signals.spider_opened)
        if self.itemcount:
            crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
        crawler.signals.connect(self.spider_closed, signal=signals.spider_closed)
github Times125 / encyclopediaCrawler / baiduSpider / middlewares.py View on Github external
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
github SylvanasSun / scrapy-picture-spider / pixiv / pixiv_spider / middlewares.py View on Github external
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
github HITFRobot / happy-spiders / scrapy_templates / 12-information / jobbole / jobbole / middlewares.py View on Github external
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
github crawlab-team / crawlab / backend / template / scrapy / config_spider / middlewares.py View on Github external
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
github scrapy / scrapy / scrapy / extensions / corestats.py View on Github external
def from_crawler(cls, crawler):
        o = cls(crawler.stats)
        crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
        crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
        crawler.signals.connect(o.item_scraped, signal=signals.item_scraped)
        crawler.signals.connect(o.item_dropped, signal=signals.item_dropped)
        crawler.signals.connect(o.response_received, signal=signals.response_received)
        return o
github hl10502 / scrapy_site / scrapy_site / pipelines.py View on Github external
def __init__(self):
        dispatcher.connect(self.spider_opended, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        dispatcher.connect(self.engine_stopped, signals.engine_stopped)
        dispatcher.connect(self.engine_started, signals.engine_started)

        # 获取当前目录,当前目录为scrapy_site项目的根目录
        self.curpath = os.getcwd()
        #爬虫爬取的数据存储文件目录
        self.spidername_filepath = self.curpath + "/scrapy_site/msg/"

        # 从配置文件keyword.conf获取关键字字典值
        self.keywordsDict = dict()
        self.getKeywords()

        #从配置文件中读取网站名称
        self.webnamesDict = dict()
        self.getWebnames()
github dangsh / hive / scrapySpider / fenbu / fenbu / middlewares.py View on Github external
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s