Coverage for scrapy/contrib/closespider : 69%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
|
"""CloseSpider is an extension that forces spiders to be closed after certain conditions are met.
See documentation in docs/topics/extensions.rst """
txlog.addObserver(self.catch_log) dispatcher.connect(self.page_count, signal=signals.response_received) dispatcher.connect(self.spider_opened, signal=signals.spider_opened) dispatcher.connect(self.item_scraped, signal=signals.item_scraped)
def from_crawler(cls, crawler):
if event.get('logLevel') == log.ERROR: spider = event.get('spider') if spider: self.errorcounts[spider] += 1 if self.errorcounts[spider] == self.errorcount: self.crawler.engine.close_spider(spider, 'closespider_errorcount')
self.pagecounts[spider] += 1 if self.pagecounts[spider] == self.pagecount: self.crawler.engine.close_spider(spider, 'closespider_pagecount')
self.tasks[spider] = reactor.callLater(self.timeout, \ self.crawler.engine.close_spider, spider=spider, \ reason='closespider_timeout')
self.counts[spider] += 1 if self.counts[spider] == self.itemcount: self.crawler.engine.close_spider(spider, 'closespider_itemcount')
tsk.cancel() |