这是一个不在自定义命令中运行,而是手动运行Reactor并为每个蜘蛛创建新的Crawler的示例:
from twisted.internet import reactorfrom scrapy.crawler import Crawler# scrapy.conf.settings singlton was deprecated last yearfrom scrapy.utils.project import get_project_settingsfrom scrapy import logdef setup_crawler(spider_name): crawler = Crawler(settings) crawler.configure() spider = crawler.spiders.create(spider_name) crawler.crawl(spider) crawler.start()log.start()settings = get_project_settings()crawler = Crawler(settings)crawler.configure()for spider_name in crawler.spiders.list(): setup_crawler(spider_name)reactor.run()
您将必须设计一些信号系统,以便在所有信号灯完成后停止反应堆。
编辑:这是您可以在自定义命令中运行多个蜘蛛的方法:
from scrapy.command import ScrapyCommandfrom scrapy.utils.project import get_project_settingsfrom scrapy.crawler import Crawlerclass Command(ScrapyCommand): requires_project = True def syntax(self): return '[options]' def short_desc(self): return 'Runs all of the spiders' def run(self, args, opts): settings = get_project_settings() for spider_name in self.crawler.spiders.list(): crawler = Crawler(settings) crawler.configure() spider = crawler.spiders.create(spider_name) crawler.crawl(spider) crawler.start() self.crawler.start()



