今天写了一个简化版爬美女图片,和之前的一样,用法:
输入你要搜索的套图的名字,然后会在d盘创建一个pic文件夹,下载的图片都在里面。下面给代码:
import requests
from lxml import etree
import re
import os
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36",
"Referer": "https://www.mm131.net/"}
dic = {"0": "/xinggan/", "1": "/qingchun/", "2": "/xiaohua/", "3": "/chemo/", "4": "/qipao/",
"5": "/mingxing/"}
number=input("请输入您的套图序号:")
number=int(number)
print("0:xinggan,1:qingchun,2:xiaohua,3:chemo,4:qipao,5:mingxing")
name=input("n"+"请输入目标在哪个套图范围")
url="https://www.mm131.net/{}/{}.html".format(dic[name],number)
response=requests.get(url=url,headers=headers,allow_redirects=False)
response.encoding=response.apparent_encoding
res = response.text # 解析完成的网页
html = etree.HTML(res)
pages = html.xpath("//span[@class='page-ch'][1]/text()")
#print(url)
print(pages)
pages_number=re.findall(r"d+",str(pages))
biggest=''.join(str(pages_number[-1]))
biggest=int(biggest)#统计套图一共有多少页
print(biggest)
try:
os.chdir("D://")
os.mkdir("pic")
os.chdir("D://pic/")
except:
pass
print(os.getcwd())
for i in range(1,biggest+1):
url_picture = "https://img1.mmmw.net/pic/{}/{}.jpg".format(number, i)
picture=requests.get(url=url_picture,headers=headers,allow_redirects=False)
with open("D://pic/" + "第" + str(number)+"("+str(i)+")" + "张" + ".jpg", "ab+") as f:
print("开始下载第" + str(i) + "张")
f.write(picture.content)
print("结束")
#



