request模块
1、基本操作2、UA伪装3、小案例
request模块 1、基本操作'''
- 指定url
- 发起请求
- 获取响应的数据
- 持久化存储
'''
import requests
if __name__ == "__main__":
# 指定url
url = "https://www.sogou.com/"
# 发送请求
response = requests.get(url=url)
# 获取相应数据,text返回的是字符串类型的相应数据
page_text = response.text
print(page_text)
# 持久化存储
with open("./sougou.html", "w", encoding="UTF-8") as fp:
fp.write(page_text)
2、UA伪装
# UA:user-agent
# UA伪装
import requests
if __name__ == "__main__":
url = "https://www.sogou.com/web"
kw = input('enter a word:')
# UA伪装
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:96.0) Gecko/20100101 Firefox/96.0'
}
# 字典,相当于url的请求里的参数
param = {
'query': kw
}
# 代理,正常用https和sockets5
proxies = {"HTTP":"http://123.169.122.201:9999"}
response = requests.get(url=url, params=param, headers=headers, proxies=proxies)
page_text = response.text
with open("./sougou.html", "w", encoding="UTF-8") as fp:
fp.write(page_text)
3、小案例
# 整张页面的局部数据
# 破解百度翻译
'''
- poat请求(携带了参数)
- 相应的数据是一组json数据
json.load('json数据')可以变成字符串形式
'''
import requests
import json
if __name__ == "__main__":
post_url = "https://fanyi.baidu.com/sug"
# post请求的参数
data = {
'kw': 'dog'
}
#UA伪装
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:96.0) Gecko/20100101 Firefox/96.0'
}
response = requests.post(url=post_url, data=data, headers=headers)
# 返回的是一个json对象(相应的数据必须为json数据)
dic_obj = post_text = response.json()
# print(dic_obj)
# 存储
with open('./dog.json', 'w', encoding='UTF-8') as fp:
json.dump(dic_obj, fp=fp, ensure_ascii=False, indent=2)



