import requests
import re
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62',
}
def par_html(url):
resp=requests.get(url,headers=headers)
text=resp.text
title=re.findall(r'.*?.*?(.*?)',text)
zuozhe=re.findall(r'.*?.*?(.*?)',text)
contents=re.findall(r'.*?(.*?)',text,re.S)
poems=[]
for i in zip(title,zuozhe,contents):
title,zuozhe,contents=i
poem={
"title":title,
"zuozhe":zuozhe,
"contents":contents
}
poems.append(poem)
# print(poems)
for i in poems:
print(i["title"])
print(i["zuozhe"])
print(i["contents"])
print("*"*30)
def main():
url="https://www.gushiwen.com/type/n/lianghan/n/2.html"
par_html(url)
main()
爬取的结果
爬虫小案例实例总结
链接:https://pan.baidu.com/s/1V19M39M094hXeuqUSzPMnw?pwd=xnfn
提取码:xnfn
--来自百度网盘超级会员V2的分享



