# 1.拿到主页面源代码
# 2.从中提取出子页面链接地址 a->href
# 3.进入子页面并获取到子页面源代码
# 4.从源代码中找到图片的链接地址 img->src
# 5.下载(获取字节文件)
import requests
from bs4 import BeautifulSoup
import time
url = 'https://www.umei.cc/bizhitupian/weimeibizhi/'
resp = requests.get(url)
resp.encoding = 'utf-8' # 解决中文乱码问题
# print(resp.text)
# 将源码交给bs4
main_page = BeautifulSoup(resp.text, 'html.parser')
# print(main_page)
aList = main_page.find('div', class_='TypeList').find_all('a')
# print(aList)
for b in aList:
child_page_url = b.get('href')
child_page = requests.get('https://www.umei.cc' + child_page_url) # 获取子页面的源码
child_page.encoding = 'utf-8'
child_page_text = child_page.text # 获取页面源文件
child_page_bf = BeautifulSoup(child_page_text, 'html.parser')
img = child_page_bf.find('p', align='center').find('img') # 获取img元素
src = img.get('src') # 获取图片的src地址
# 下载图片
img_resp = requests.get(src)
img_name = src.split('/')[-1]
with open('img/'+img_name, mode='wb') as f:
f.write(img_resp.content)
print('下载完成')
time.sleep(1) # 时间休眠