我从头开始,下面的spider应该与
scrapy crawl amazon -t csv -o Amazon.csv --loglevel=INFO
因此,使用电子表格打开CSV文件对我来说是
希望这可以帮助 :
import scrapyclass AmazonItem(scrapy.Item): rating = scrapy.Field() date = scrapy.Field() review = scrapy.Field() link = scrapy.Field()class AmazonSpider(scrapy.Spider): name = "amazon" allowed_domains = ['amazon.co.uk'] start_urls = ['http://www.amazon.co.uk/product-reviews/B0042EU3A2/' ] def parse(self, response): for sel in response.xpath('//table[@id="productReviews"]//tr/td/div'): item = AmazonItem() item['rating'] = sel.xpath('./div/span/span/span/text()').extract() item['date'] = sel.xpath('./div/span/nobr/text()').extract() item['review'] = sel.xpath('./div[@]/text()').extract() item['link'] = sel.xpath('.//a[contains(.,"Permalink")]/@href').extract() yield item xpath_Next_Page = './/table[@id="productReviews"]/following::*//span[@]/a[contains(.,"Next")]/@href' if response.xpath(xpath_Next_Page): url_Next_Page = response.xpath(xpath_Next_Page).extract()[0] request = scrapy.Request(url_Next_Page, callback=self.parse) yield request


