Python网络爬虫实例（爬一些小网站的图片）

文章目录前言一、爬优美图库二、爬ZOL壁纸网

前言

提示以下是本篇文章正文内容下面案例可供参考

一、爬优美图库

#用到的技术
#1.requests 发送请求 从服务器获取到数据
#2.beautifulsoup 来解析整个页面源代码
import requests
from bs4 import BeautifulSoup
#爬取网站的第一件事 发送请求到服务器
resp requests.get( https://www.umei.cc/bizhitupian/diannaobizhi/ )#请求
resp.encoding utf-8 #改字符编码
#print(resp.text) 打印的源码
#解析html
main_page BeautifulSoup(resp.text, html.parser )
#从页面中找到某些东西
#find() 找一个
#find_all() 找所有
alist main_page.find( div ,attrs { class : TypeList }).find_all( a ,attrs { class : TypeBigPics })
#print(typelist)
#print(alist)
for a in alist:
 #print(a.get( href ))
 href https://www.umei.cc/ a.get( href )
 #print(href)
 #发送请求到子页面 进入下一个图片的位置
 resp1 requests.get(href)
 resp1.encoding utf-8 
 child_page BeautifulSoup(resp1.text, html.parser )
 src child_page.find( div , attrs { class : ImageBody }).find( img ).get( src )
 #发送请求到服务器 图片保存到本地
 #创建文件
 f open( picture/tu_%s.jpg %n,mode wb )#wb写入的内容非文本文件
 f.write(requests.get(src).content)#requests.get(src).content#向外拿图片数据 不是文本信息
 print( 恭喜你下好%s张图片 %n)
 text child_page.find( div , attrs { class : NewPages }).find_all( a )
 #print(text)
 #print(a.get( href ))
 for b in text:
 href2 https://www.umei.cc/ b.get( href )
 resp2 requests.get(href2)
 resp2.encoding utf-8 
 child_page1 BeautifulSoup(resp2.text, html.parser )
 src child_page1.find( div , attrs { class : ImageBody }).find( img ).get( src )
 # 创建文件
 f open( picture/tu_%s.jpg % n, mode wb ) # wb写入的内容非文本文件
 f.write(requests.get(src).content) # requests.get(src).content#向外拿图片数据 不是文本信息
 print( 恭喜你下好%s张图片 % n)

二、爬ZOL壁纸网

import requests
from bs4 import BeautifulSoup
resp requests.get( https://desk.zol.com.cn/meinv/ )
resp.encoding utf-8 
main_page BeautifulSoup(resp.text, html.parser )
alist main_page.find( ul ,attrs { class : pic-list2 }).find_all( a ,attrs { class : pic })
#print(alist)
for a in alist:
 href https://desk.zol.com.cn/ a.get( href )
 if href https://desk.zol.com.cn/https://file.cdn.cqttech.com/xzdesktop/XZDesktop_4020_2.0.11.12.exe :continue
 #print(href)
 resp1 requests.get(href)
 resp1.encoding utf-8 
 child_page BeautifulSoup(resp1.text, html.parser )
 # print(child_page)
 src child_page.find( div ,attrs { id : mouscroll }).find( img ).get( src )
 #print(src)
 f open( ZOL/tu_%s.jpg % n, mode wb )
 f.write(requests.get(src).content)
 print( 恭喜你下好%s张图片 % n)
 page child_page
 for i in range(1,10):
 b page.find( div , attrs { id : mouscroll }).find( div , attrs { id : photo-next }).find( a )
 href2 https://desk.zol.com.cn/ b.get( href )
 if href2 https://desk.zol.com.cn/https://file.cdn.cqttech.com/xzdesktop/XZDesktop_4020_2.0.11.12.exe : continue
 if href2 https://desk.zol.com.cn/javascript:; :continue
 #print(href2)
 resp2 requests.get(href2)
 resp2.encoding utf-8 
 new_page BeautifulSoup(resp2.text, html.parser )
 src new_page.find( div , attrs { id : mouscroll }).find( img ).get( src )
 f open( ZOL/tu_%s.jpg % n, mode wb ) # wb写入的内容非文本文件
 f.write(requests.get(src).content) # requests.get(src).content#向外拿图片数据 不是文本信息
 print( 恭喜你下好%s张图片 % n)
 page new_page

Python网络爬虫实例（爬一些小网站的图片）

Python相关栏目本月热门文章