PS:主要使用selenuim和谷歌驱动
这个里面难点主要是如何自动登录163邮箱,因为163邮箱里面模块的id或者classname都是动态码
所有首先需要定位登录框的frame
这一步
需要使用selenuim中的模糊定位start-with
driver.find_element_by_xpath("//iframe[starts-with(@id, 'x-URS-iframe')
之后找到账号框的classname:email;和密码框:password
之后输入对应的邮箱账号密码
为了防止出现登陆验证码,使用了timesleep去模拟人登陆
之后,去判断是否存在未读邮件,如果存在,则点击进入
点击之后,进入邮件内容界面,之后选择对应的验证码所在行,进行提取
具体代码:
# -*- coding: utf-8 -*-
import time
import os
from selenium import webdriver
from bs4 import BeautifulSoup
# from docx import document
# acount_num = input('请输入账号:n')
# passwd_str = input('请输入密码:n')
def dynamic_code():
chromedriver = "C:Program FilesGoogleChromeApplication"
os.environ["webdriver.ie.driver"] = chromedriver
option = webdriver.ChromeOptions()
option.add_argument("headless")
#driver = webdriver.Chrome(chrome_options=option) # 不打开浏览器测试
url = 'http://mail.163.com/'
driver.get(url)
# driver.maximize_window()
time.sleep(5)
acount_num = '*'
passwd_str = '*'
# 163登陆框是使用iframe进行嵌套的,所以需要先切换到该iframe
driver.switch_to.frame(driver.find_element_by_xpath("//iframe[starts-with(@id, 'x-URS-iframe')]"))
acount = driver.find_element_by_name('email')
acount.clear()
acount.send_keys(acount_num)
passwd = driver.find_element_by_name('password')
passwd.clear()
passwd.send_keys(passwd_str)
time.sleep(3)
click_button = driver.find_element_by_id('dologin')
click_button.click()
time.sleep(5)
num=driver.find_element_by_class_name('gWel-mailInfo-status').text
print(num)#看未读邮件的个数
driver.find_element_by_xpath('/html/body/div[2]/div[1]/div/div/div[2]/div[1]/div[3]/ul/li[1]/div[1]/b[1]').click()
time.sleep(2)
driver.find_element_by_xpath('/html/body/div[2]/div[1]/div[2]/div/div/div/div[3]/div[2]/div').click()
# /html/body/div[2]/div[1]/div[2]/div/div/div/div[3]/div[2]/div
# /html/body/div[2]/div[1]/div[2]/div/div/div/div[3]/div[3]/div
time.sleep(2)
# text = driver.find_element_by_xpath('//*[@id="content"]/p[2]').text
# text = driver.find_element_by_id('content').text
driver.switch_to.frame(driver.find_element_by_xpath("/html/body/div[2]/div[1]/div[3]/div/div[1]/div[6]/div/iframe"))
'''
By.xpath("//input[starts-with(@id,'file')
By.xpath("//input[ends-with(@id,'_11')
By.xpath("//input[contains(@id,'_')]")
'''
#text一开始是一段数据,需要进行提取,这个根据邮件内容需要进行更改
text = driver.find_element_by_css_selector('#content > p:nth-child(2)').text
text = text.split(':')
text = text[1][:-1]
driver.close()
return text



