对于通过解析找到的每个链接,你都可以请求它,并使用其他函数来解析内容:
class MySpider(scrapy.Spider): name = "myspider" start_urls = [ ..... ] def parse(self, response): rows = response.css('table.apas_tbl tr').extract() urls = [] for row in rows[1:]: soup = BeautifulSoup(row, 'lxml') dates = soup.find_all('input') url = "http://myurl{}.com/{}".format(dates[0]['value'], dates[1]['value']) urls.append(url) yield scrapy.Request(url, callback=self.parse_page_contents) def parse_page_contents(self, response): rows = response.xpath('//div[@id="apas_form"]').extract_first() soup = BeautifulSoup(rows, 'lxml') pages = soup.find(id='apas_form_text') for link in pages.find_all('a'): url = 'myurl.com/{}'.format(link['href']) resultTable = soup.find("table", { "class" : "apas_tbl" })


