python xpath爬取美女图片
import requests
from lxml import etree
#https://www.tupianzj.com/meinv/20210830/231951.html
q = input('请输入网址:')
global jpg_list ##申明获取的图片地址列表为全局变量
jpg_list = []
class tupianzj(object):
def __init__(self):
self.url = q
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0'
}
def get_data(self,url):
res = requests.get(url,headers=self.headers)
html = res.content
return html
def parse_data(self,html):
parse_html = etree.HTML(html) ##解析为html格式
image_list = parse_html.xpath('//*[@id="bigpicimg"]/@src') ##获取图片地址
#print(image_list)
jpg_list.extend(image_list) ##地址加到这
#print(jpg_list)
#print(image_list)
p_list = parse_html.xpath('//*[@id="container"]/div/div/div[2]/div[2]/div[3]/ul/li[11]/a/@href') ##获取下一个url
#print(p_list)
next_url_list =['https://www.tupianzj.com/meinv/20210830/' + herf for herf in p_list] ##拼接url注意url不一样
#print(next_url_list)
for next_url in next_url_list:
#print(next_url)
return next_url
def run(self):
next_url = q
while True:
html = self.get_data(next_url)
next_url = self.parse_data(html)
#print(jpg_list)
#print(next_url)
#return jpg_list
if next_url is None: ##判断url是否为空,是则停止
print(jpg_list)
break
for i in jpg_list:
dirname = i[54:64] + ".jpg" ##图片名称列表切片
html2 = requests.get(url=i, headers=self.headers).content
file = open(dirname, 'wb')
file.write(html2)
print("%s下载成功" % dirname)
if __name__ == '__main__':
imageSpider = tupianzj()
imageSpider.run()


