- 导入用到的库
- 目标网址
- 利用正则表达式获取图片详情页的网址
- 完整代码
import requests import re import time目标网址
for page in range(2, 140): # 对网站多个网页进行爬取
download_one_page('https://pic.netbian.com/4kdongman/index_' + str(page) + '.html')
利用正则表达式获取图片详情页的网址
re_temp = '
import requests
import re
import time
def download(url, name):
time.sleep(2) # 避免对服务器造成破坏
resp = requests.get(url)
resp.encoding = 'gbk' # 网页编码格式
suffix = url.split('.')[-1]
name.replace(' ', '')
address = "E:壁纸动漫壁纸" + "\" + name + '.' + suffix
try:
with open(address, 'wb') as file:
file.write(resp.content)
except:
print('下载错误:', name)
def download_next_one_page(url):
time.sleep(2)
response = requests.get(url)
response.encoding = 'gbk'
re_temp = ''
result = re.findall(re_temp, response.text)
download('https://pic.netbian.com' + result[0][0], result[0][2])
def download_one_page(one_page_url):
time.sleep(1)
response = requests.get(one_page_url)
response.encoding = 'gbk' # 网站编码
re_temp = '仅用作技术交流和分享,请勿对网站进行破坏,要有逼格!



