import os
import re
from time import sleep
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
class Xfl():
def __init__(self):
self.hrefs="""https://card.xinfuli.net/web/#/mall/productDetail?goodsCode=JD100036129448&saleType=0&pageType=7
https://card.xinfuli.net/web/#/mall/productDetail?goodsCode=JD100012935520&saleType=0&pageType=7
https://card.xinfuli.net/web/#/mall/productDetail?goodsCode=JD100017067564&saleType=0&pageType=7
https://card.xinfuli.net/web/#/mall/productDetail?goodsCode=JD100002345600&saleType=0&pageType=7
https://card.xinfuli.net/web/#/mall/productDetail?goodsCode=900005329401&saleType=0&pageType=7
https://card.xinfuli.net/web/#/mall/productDetail?goodsCode=JD100021111894&saleType=0&pageType=7
https://card.xinfuli.net/web/#/mall/productDetail?goodsCode=JD1422752&saleType=0&pageType=7
https://card.xinfuli.net/web/#/mall/productDetail?goodsCode=900007632001&saleType=0&pageType=2
https://card.xinfuli.net/web/#/mall/productDetail?goodsCode=JD5046984&saleType=0&pageType=2
https://card.xinfuli.net/web/#/mall/productDetail?goodsCode=JD1028001&saleType=0&pageType=7
https://card.xinfuli.net/web/#/mall/productDetail?goodsCode=JD100002837215&saleType=0&pageType=7
https://card.xinfuli.net/web/#/mall/productDetail?goodsCode=JD3920768&saleType=0&pageType=7
https://card.xinfuli.net/web/#/mall/productDetail?goodsCode=JD100013045806&saleType=0&pageType=7
https://card.xinfuli.net/web/#/mall/productDetail?goodsCode=JD7423549&saleType=0&pageType=7
https://card.xinfuli.net/web/#/mall/productDetail?goodsCode=900008132501&saleType=0&pageType=7
https://card.xinfuli.net/web/#/mall/productDetail?goodsCode=JD100016429124&saleType=0&pageType=7
https://card.xinfuli.net/web/#/mall/productDetail?goodsCode=JD5504872&saleType=0&pageType=7
https://card.xinfuli.net/web/#/mall/productDetail?goodsCode=JD100012375204&saleType=0&pageType=7"""
chrome_options = Options()
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
self.driver = webdriver.Chrome(options=chrome_options)
self.driver.maximize_window()
self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webself.driver', {
get: () => undefined
})
"""
})
self.driver.implicitly_wait(10)
self.driver.get('https://card.xinfuli.net/web/#/')
sleep(1)
self.driver.find_element(By.CLASS_NAME,'loginBtn').click()
sleep(1)
self.driver.find_element(By.XPATH,'//*[@placeholder="请输入用户名"]').send_keys('******')
self.driver.find_element(By.XPATH,'//*[@placeholder="请输入密码"]').send_keys('**********')
self.driver.find_element(By.XPATH,'//*[text()=" 登录 "]').click()
sleep(1)
#爬去图片
def spider_png(self):
# for href in self.hrefs.split()[1:]:
for href in self.hrefs.split()[16:]:
print(href)
self.driver.get(href)
self.driver.refresh()
sleep(2)
path=self.driver.find_element(By.XPATH,'//*[@]').text.replace('*','x')
if os.path.exists(path):
pass
else:
os.mkdir(path)
i=1
for src in self.driver.find_elements(By.XPATH,'//*[@]/div//img|//*[@]//img'):
res=requests.get(re.sub('sd+xd+_|@d+h_d+w','',src.get_attribute('src'))).content
with open(path+'/头部_%d.jpg'%i,'wb') as f:
f.write(res)
i+=1
ii=1
for src in re.findall('image:url(//(.*?.jpg)); height:',self.driver.page_source):
res=requests.get('http://'+src).content
with open(path+'/详情_%d.jpg'%ii,'wb') as f:
f.write(res)
ii+=1
iii = 1
for src in self.driver.find_elements(By.XPATH,'//*[@]//img'):
try:
res=requests.get(src.get_attribute('src')).content
with open(path+'/详情_%d.jpg'%iii,'wb') as f:
f.write(res)
iii += 1
except:
pass
Xfl().spider_png()