Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self.timeout = crawler.settings.getint('CLOSESPIDER_TIMEOUT')
self.itemcount = crawler.settings.getint('CLOSESPIDER_ITEMCOUNT')
self.pagecount = crawler.settings.getint('CLOSESPIDER_PAGECOUNT')
self.errorcount = crawler.settings.getint('CLOSESPIDER_ERRORCOUNT')
self.errorcounts = defaultdict(int)
self.pagecounts = defaultdict(int)
self.counts = defaultdict(int)
self.tasks = {}
if self.errorcount:
txlog.addObserver(self.catch_log)
if self.pagecount:
crawler.signals.connect(self.page_count, signal=signals.response_received)
if self.timeout:
crawler.signals.connect(self.spider_opened, signal=signals.spider_opened)
if self.itemcount:
crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
crawler.signals.connect(self.spider_closed, signal=signals.spider_closed)
def from_crawler(cls, crawler):
# This method is used by Scrapy to create your spiders.
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
def from_crawler(cls, crawler):
# This method is used by Scrapy to create your spiders.
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
def from_crawler(cls, crawler):
# This method is used by Scrapy to create your spiders.
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
def from_crawler(cls, crawler):
# This method is used by Scrapy to create your spiders.
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
def from_crawler(cls, crawler):
o = cls(crawler.stats)
crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
crawler.signals.connect(o.item_scraped, signal=signals.item_scraped)
crawler.signals.connect(o.item_dropped, signal=signals.item_dropped)
crawler.signals.connect(o.response_received, signal=signals.response_received)
return o
def __init__(self):
dispatcher.connect(self.spider_opended, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
dispatcher.connect(self.engine_stopped, signals.engine_stopped)
dispatcher.connect(self.engine_started, signals.engine_started)
# 获取当前目录,当前目录为scrapy_site项目的根目录
self.curpath = os.getcwd()
#爬虫爬取的数据存储文件目录
self.spidername_filepath = self.curpath + "/scrapy_site/msg/"
# 从配置文件keyword.conf获取关键字字典值
self.keywordsDict = dict()
self.getKeywords()
#从配置文件中读取网站名称
self.webnamesDict = dict()
self.getWebnames()
def from_crawler(cls, crawler):
# This method is used by Scrapy to create your spiders.
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s