当下很多页面多是通过js动态获取信息的
这里获取的数据为新发地的菜价信息
一 :使用开发者工具 获取到数据请求的url 二:请求头和data的编辑data = {
"limit": 20,
"current": 3,
"pubDateStartTime":"",
"pubDateEndTime":"",
"prodPcatid": "",
"prodCatid": "",
"prodName":"",
}
headres = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36X-Requested-With: XMLHttpRequest",
"Referer":"http://www.xinfadi.com.cn/priceDetail.html"
}
三:解析网页获取数据
def dowload_onr_page(url,data):
resp = requests.post(url,data=data,headers = headres)
a = resp.json()['list']
for i in a:
csvwriter.writerow(
[i['prodName'], i['lowPrice'], i['highPrice'], i['avgPrice'], i['place'], i['specInfo'], i['unitInfo'],
i['pubDate']])
四:打开文件,建立多线程
if __name__=='__main__':
f = open("菜价100.csv", mode="w", newline="", encoding='utf-8-sig')
csvwriter = csv.writer(f)
#创建线程
with ThreadPoolExecutor(50):
for it in range(1,14392):
url = 'http://www.xinfadi.com.cn/getPriceData.html'
data['current'] = it
dowload_onr_page(url,data)
f.close()
print("OVER")



