---简单图片爬虫------

-- coding:UTF-8 --

import urllib2
import urllib
import re
from bs4 import BeautifulSoup

def http_web(url_1):#获取网页
request_1=urllib2.Request(url_1)
request_1.add_header("user-agent","Mozilla/5.0")
opendz=urllib2.urlopen(request_1)
read_sj=opendz.read()
return read_sj

def screen_link_1(readdz):#根据网页抓取有用链接（正则筛选）并保存链接（保存不重复的）

fgzl=re.findall(r'href="http://.+.html',readdz)

fgzl_1=re.findall(r'http://.+.html',readdz)
i=0
print fgzl_1
wj=open('http.txt','w')
for a in fgzl_1:
 print i,':',a

 wj.write(a+'n')#写入行数据
 i+=1
wj.close()

def screen_link_2():
wj=open('http.txt','r')
wj_web=open('http_web.txt','w')

i=2

while True:
  wjh=(wj.readline())
  tupian_http=re.findall(r'http://.+d',wjh)#序列
  if not wjh:break
  print tupian_http[0]

  for i in range(2,50):

      url_2=tupian_http[0]+'_'+str(i)+'.html'
      i+=1
      print url_2
      req = urllib2.Request(url_2)
      try:
   urllib2.urlopen(req)
      except urllib2.HTTPError, e:
   print '网页错误'
   print e.code
   print e.reason

      #网页正常就保存网址
      else:
   wj_web.write(url_2+'n')#写入行数据

   #url_2=tupian_http[0]+'_'+str(i)+'.html'  
   #i+=1
   #print url_2

wj.close()  
wj_web.close()

获取网页上的数据名称

def s_s(html_string):
soup_1=BeautifulSoup(html_string,'html.parser',from_encoding='utf-8')

links_1=soup_1.find_all('img')

links_1=soup_1.find('img')
return links_1

根据数据名称保存文件

def p_f(string_1):
i=n

for link in string_1:

print '--下载图片中--'
    #print link.name,link['src'],link.get_text()
print string_1.name,string_1['src'],string_1.get_text()
urllib.urlretrieve(string_1['src'],'%s.jpg' % i)
    #urllib.urlretrieve(link['src'],'%s.jpg' % i)
    #i+=1

对以上函数整体调用

def dywj(http_):
http_string=httpweb(http)
soup_string=s_s(http_string)
print_file=p_f(soup_string)

----------执行------------

url_http=str(raw_input('#请输入官网网址：'))
a_1=http_web(url_http)
b_1=screen_link_1(a_1)
screen_link_2()#根据网页进行扩展网页页数
print '----开始抓取图片---'
n=0
wj=open('http_web.txt','r')#网址文件名字
while True:
wjh=(wj.readline())
if not wjh:break
dywj(wjh)
n+=50
wj.close()

print '#-----抓取图片完成------'

---简单图片爬虫------

Python相关栏目本月热门文章