运行scrapy 中的spider
新建run.py
from scrapy import cmdline
cmdline.execute("scrapy crawl tq".split(" "))
判断获取元素的类型
print(type(sevenday(对象)))
获取对象中所有数据 对象.getall(); 对象.get()中的第一元素
组装数组:
在items中组装字段
import scrapy
from ..items import TqybItem
class TqSpider(scrapy.Spider):
name = 'tq'
allowed_domains = ['weather.com.cn']
start_urls = ['http://www.weather.com.cn/weather/101010100.shtml']
def parse(self, response):
#print(response.text)
sevendays = response.xpath('//div[@]//div[@id="7d"]//h1/text()').getall()
#print(type(sevendays))
for sevenday in sevendays:
day = sevenday
print(type(sevenday))
item = TqybItem(day=day)
yield item #发给pipelines
yield item 发给管道



