学习练习爬虫实例,获取股票数据
东财威武,没有爬虫限制
https://www.eastmoney.com/robots.txt
获取4千只股票数据保存为 csv
import requests
import re
import json
import time
def getApiRequest(url):
try:
headerParams = { 'user-agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36' }
r= requests.get(url, timeout=30, headers = headerParams)
r.raise_for_status()
return r
except:
return 'http request error !'
def rinseData(res):
try:
jsdata = re.findall(r'(.*?)', res.text)
dicts = json.loads(jsdata[0][1:len(jsdata[0])-1 ])
diff = dicts.get('data', []).get('diff', [])
return diff
except:
return []
def writeCsv(f, lslist):
try :
for item in lslist:
tmp = [item.get('f12'),item.get('f14'),item.get('f15'),item.get('f3'),item['f4'],item['f5'],item['f6'],item['f7'],item['f2'],item['f16'],item['f17'],item['f18'],item['f10'],item['f8'],item['f9'],item['f23'] ]
datastr = ''
for s in tmp:
datastr += str(s) + ','
f.write(datastr[:len(datastr)-1] + "n")
except:
return 'write csv file Error'
def main():
total = 236
page = 20
start = time.perf_counter() #开始时间
f = open('gp.csv', 'w+', encoding='utf-8')
f.write('代码,名称,最新价,涨跌幅,涨跌额,成交量(手),成交额,振幅,最高,最低,今天,昨收,量比,换手率,市盈率(动态),市净率n')
for p in range(total):
a = "*" * p
b = "." * (total -p)
c = (p/total)*100
dur = time.perf_counter() - start
print("r{:^3.0f}%[{}->{}]{:.2f}s".format(c,a,b,dur),end="")
url = 'http://81.push2.eastmoney.com/api/qt/clist/get?cb=jQuery1124017220261478010612_1636785244249&pn='+ str(p) +'&pz='+str(page)+'&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f3&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152&_=1636785244294'
res = getApiRequest(url)
lslist = rinseData(res)
writeCsv(f, lslist)
f.close()
print("n"+"执行结束".center(total//2,"-"))
main()



