selenium基本用法
import time
from selenium import webdriver
class Tjgb:
def __init__(self,url,if_headless=False):
self.chrome_driver_path = r'D:Python3.6scriptschromedriver.exe'
self.binary_location = r'C:Program Files (x86)GoogleChromeApplicationchrome.exe'
self.url = url
self.ua_pool = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'
]
self.if_headless = if_headless
self.driver = self.base_driver()
def opt_config(self):
user_agent = self.ua_pool[0]
opt = webdriver.ChromeOptions()
if self.if_headless:
opt.add_argument('--headless')
opt.add_argument('-disable-gpu')
opt.add_argument('--no-sandbox')
opt.add_experimental_option('excludeSwitches',['enable-automation'])
opt.add_experimental_option('useAutomationExtension',False)
opt.add_argument('--disable-blink-features=AutomationControlled')
opt.add_argument(f'user-agent={user_agent}')
opt.binary_location = self.binary_location
return opt
def base_driver(self):
driver = webdriver.Chrome(options=self.opt_config(),executable_path=self.chrome_driver_path)
driver.execute_cdp_cmd("Page.addscriptToevaluateOnNewdocument",{
"source":"""
Object.defineProperty(navigator,'webdriver',{
get:()=>undefined})"""
})
return driver
def load_webpage(self):
self.driver.get(self.url)
time.sleep(15)
self.driver.implicitly_wait(10)
# li_list = self.driver.find_elements_by_xpath('//div[@]/ul/li')
# li_list = self.driver.find_elements_by_xpath('//div[@id="375746"]/div/div/ul/li')
# tag = self.driver.find_element_by_tag_name('iframe')
# print(tag)
self.driver.switch_to.frame('ml')
li_list = self.driver.find_elements_by_xpath('//ul[@id="fanye"]/li')
print(li_list)
# li_list = self.driver.find_elements_by_xpath('//ul[@id="fanye"]/li')
# print(li_list)
for li in li_list:
title = li.find_element_by_xpath('./a').get_attribute('title')
# pub_date = li.find_element_by_xpath('./span[@]').text
pub_date = li.find_element_by_xpath('./span').get_attribute('textContent')
print(title,pub_date)
self.driver.switch_to.default_content()
def close_driver(self):
print(self.driver.window_handles)
for handle in self.driver.window_handles:
self.driver.switch_to_window(handle)
self.driver.close()
self.driver.quit()
print('close all')
if __name__ == '__main__':
# url = 'http://tjj.hefei.gov.cn/tjyw/tjgb/index.html'
# url = 'http://jxf.jiangxi.gov.cn/col/col41574/index.html'
url = 'http://tjj.ezhou.gov.cn/zwgk/fdzdgknr/?itemid=2392'
tjgb = Tjgb(url=url,if_headless=False)
tjgb.load_webpage()
tjgb.close_driver()
selenium对iframe操作
# id
# 跳转到iframe
driver.switch_to.frame("frame1")
# name
# 跳转到iframe
driver.switch_to.frame("slider")
# 元素定位
#跳转到iframe
iframe_elem = driver.find_element_by_class_name('x-iframe').find_element_by_tag_name('iframe')
driver.switch_to.frame(iframe_elem)
多个iframe嵌套



