我在这里发布了我用来
MultiCSVItemPipeline基于上述drcolossos的答案生成代码的代码。
该管道假定所有Item类均遵循* Item约定(例如TeamItem,EventItem),并创建team.csv,event.csv文件并将所有记录发送到适当的csv文件。
from scrapy.exporters import CsvItemExporterfrom scrapy import signalsfrom scrapy.xlib.pydispatch import dispatcherdef item_type(item): return type(item).__name__.replace('Item','').lower() # TeamItem => teamclass MultiCSVItemPipeline(object): SaveTypes = ['team','club','event', 'match'] def __init__(self): dispatcher.connect(self.spider_opened, signal=signals.spider_opened) dispatcher.connect(self.spider_closed, signal=signals.spider_closed) def spider_opened(self, spider): self.files = dict([ (name, open(CSVDir+name+'.csv','w+b')) for name in self.SaveTypes ]) self.exporters = dict([ (name,CsvItemExporter(self.files[name])) for name in self.SaveTypes]) [e.start_exporting() for e in self.exporters.values()] def spider_closed(self, spider): [e.finish_exporting() for e in self.exporters.values()] [f.close() for f in self.files.values()] def process_item(self, item, spider): what = item_type(item) if what in set(self.SaveTypes): self.exporters[what].export_item(item) return item


