环境:win10
ide:pycharm
1.获取页面
import requests as requests link = "http://www.santostang.com" r = requests.get(link) print(r) print(r.text)
2.提取需要的数据
import requests as requests
from bs4 import BeautifulSoup
link = "http://www.santostang.com"
r = requests.get(link)
soup = BeautifulSoup(r.text, "html.parser")
title=soup.find("h1",class_="post-title").a.text.strip() #取决于前端知识了
print(title)
3.存储数据
import requests as requests
from bs4 import BeautifulSoup
link = "http://www.santostang.com"
r = requests.get(link)
soup = BeautifulSoup(r.text, "html.parser")
title=soup.find("h1",class_="post-title").a.text.strip() #取决于前端知识了
print(title)
with open("crawlertest.txt","a+") as f:
f.write(title)



