import requests
from lxml import etree
class Fsssezj():
def __init__(self):
self.header={'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'}
self.url='http://ewenyan.com/'
#爬取数据
def get_data(self):
response=requests.get(self.url,headers=self.header)
#解决gb2312乱码
response.encoding='gbk'
response=response.text
return response
#解析数据
def parse_data(self,response):
html=etree.HTML(response)
content_text=html.xpath('//p/text()')
return content_text
def write(self,content_text):
Data=''
for i in content_text:
#data将lxml.etree._ElementUnicodeResul转化成字符
Data+=str(i)
with open('shiji.txt','a',encoding='utf8') as f:
f.write(Data)
def run(self):
response=self.get_data()
content_text=self.parse_data(response)
self.write(content_text)
if __name__=='__main__':
Fsssezj=Fsssezj()
Fsssezj.run()