from selenium import webdriver获取网页源代码
driver = webdriver.Chrome('D:\数据分析\chromedriver_win32\chromedriver.exe') #下载好对应chromedriver.exe
driver.get('https://pvp.qq.com/web201605/herolist.shtml') #王者荣耀英雄资料页面
定位获取英雄名和英雄链接
点击具体英雄头像就可以进入具体英雄页面,所以我们可以右键检查,得到英雄名和英雄链接的定位。
hrefs = driver.find_elements_by_css_selector(
'body > div.wrapper > div > div > div.herolist-box > div.herolist-content > ul > li > a')
imgs = driver.find_elements_by_css_selector(
'body > div.wrapper > div > div > div.herolist-box > div.herolist-content > ul > li > a > img')
hero_detail = [] #英雄名和英雄链接添加到列表
for i in range(len(imgs)):
item = {}
item['hero'] = imgs[i].get_attribute("alt")
item['detial'] = hrefs[i].get_attribute("href")
hero_detail.append(item)
driver.quit() #关闭chrome
打印列表hero_detail
print(hero_detail)二、英雄对应英雄皮肤链接 依赖库导入
import lxml.html import json import os
skin_list = [] #列表
for i in hero_detail:
driver = webdriver.Chrome('D:\数据分析\chromedriver_win32\chromedriver.exe')
driver.maximize_window()
driver.get(i['detial']) #英雄链接
html_datas = driver.page_source #页面源代码
driver.quit()
#利用lxml解析库的etree.parse方法加载本地html文件的路径
metree = lxml.html.etree
parser = metree.HTML(html_datas)
li_list = parser.xpath("/html/body/div[3]/div[1]/div/div/div[2]/ul//li") #定位皮肤的li标签
for li_element in li_list:
item = {}
item["hero_name"] = i['hero']
skin_name = li_element.xpath("./p/text()")[0] #皮肤名称
item["hero_skin_name"] = skin_name
skin_url = "https:" + li_element.xpath("./i/img/@data-imgname")[0] #皮肤链接
item["hero_skin_url"] = skin_url
skin_list.append(item) #皮肤名称对应皮肤链接
打印skin_list皮肤列表
hero_image_dir_name = "./hero/pictures"
if not os.path.exists(hero_image_dir_name):
os.makedirs(hero_image_dir_name)
# 遍历
for hero_element in skin_list:
hero_pic_dir = hero_element["hero_name"]
# 判断
if not os.path.exists(hero_image_dir_name + "/" + hero_pic_dir):
os.makedirs(hero_image_dir_name + "/" + hero_pic_dir)
print("英雄(%s)所存放目录已创建成功!" % hero_pic_dir)
# 提示
print("所有英雄存放目录已创建成功!")
下载图片放置到对应的目录
import requests
hero_file_dir = "./hero/pictures"
for hero_element in skin_list:
#文件夹名称
dir_name = hero_element["hero_name"]
# 图片皮肤地址
image_url = hero_element["hero_skin_url"]
#图片皮肤名称
hero_skin_name= hero_element["hero_skin_name"]
# 请求数据
headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"};
response = requests.get(image_url, headers=headers, verify=False)
hero_dir_skin = './hero/pictures/' + dir_name
if not os.path.exists(hero_dir_skin):
os.makedirs(hero_dir_skin)
# 下载并保存
with open(hero_dir_skin + "/" + hero_skin_name + ".jpg", "wb") as fs:
fs.write(response.content)
对应目录查看图片



