参见scrapy Request结构,要抓取此类链,你将必须使用如下所示的callback参数:
class MySpider(baseSpider): ... # spider starts here def parse(self, response): ... # A, D, E are done in parallel, A -> B -> C are done serially yield Request(url=<A url>,...callback=parseA) yield Request(url=<D url>,...callback=parseD) yield Request(url=<E url>,...callback=parseE) def parseA(self, response): ... yield Request(url=<B url>,...callback=parseB) def parseB(self, response): ... yield Request(url=<C url>,...callback=parseC) def parseC(self, response): ... def parseD(self, response): ... def parseE(self, response): ...



