基金收益排名

import xlwings as xw
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import json
import xlwt
import xlwings as xw
from selenium import webdriver
import time
import pandas as pd
import csv
import re
from selenium.webdriver import Chrome, ChromeOptions, ActionChains
import multiprocessing

def web(url):
    driver.get(url)
    driver.refresh()
    time.sleep(0.5)
    #获取返回值
    source = driver.page_source  # 获取网页内容
    html = BeautifulSoup(source, 'html.parser')  # 获取网页内容
    # 写入网页
    with open('FundRank.html', 'w', encoding='utf-8-sig') as f:
        f.write(source)
    print('写入网页')
    return html
def data(html):
    df=pd.Dataframe()
    fundName=[]
    html.list=html.find_all('div',attrs={'class','ti-left'})[1]
    # print(html.list)
    for i,item in enumerate(html.list):
        try:
            if item.text!='n':
                fundName.append(item.text)
                # print(item.text)
        except:
            print('基本信息错误')
    df['基金名称'] = fundName[0],
    df['基金代码'] = fundName[1],
    df['基金类型'] = fundName[2],
    df['基金风险'] = fundName[3],
    print(df)
    return df

def data_b(html):#基金管理
    df = pd.Dataframe()
    fundManage = []
    html.list = html.find_all('div', attrs={'class', 'all'})
    # print(html.list)
    for i, item in enumerate(html.list):
        item.list=item.find_all('p')
        for i, item in enumerate(item.list):
            try:
                item_a=item.text.replace('n', '').replace('t', '').replace(' ', '')
                # print(item_a)
                fundManage.append(item_a)
                # print(item_a)
            except:
                print('管理信息错误')
    # print(fundManage[0])
    df['基金经理'] = fundManage[0],
    df['从业年均回报'] = fundManage[1].replace('任职回报：', ''),
    df['从业时间'] = fundManage[2].replace('从业时间：', ''),
    df['最大盈利'] = fundManage[5],
    df['最大回撤'] = fundManage[7],
    print(df)
    return df

def data_c(html):#基金涨幅
    df=pd.Dataframe()
    fundIncrease=[]
    html.list=html.find_all('tbody',attrs={'id':'tableCtn'})[0]
    # print(html.list)
    for i,item in enumerate(html.list):
        item.list=item.find_all('td')
        for i, item in enumerate(item.list):
            try:
                fundIncrease.append(item.text)
                # print(item.text)
            except:
                print('基本信息错误')
    df['近1周'] = fundIncrease[1],
    df['近1月'] = fundIncrease[5],
    df['近3月'] = fundIncrease[9],
    df['近6月'] = fundIncrease[13],
    df['近1年'] = fundIncrease[17],
    df['年度排名'] = fundIncrease[19],
    df['近2年'] = fundIncrease[21],
    df['近3年'] = fundIncrease[25],
    df['近5年'] = fundIncrease[29],
    df['成立以来'] = fundIncrease[33],
    print(df)
    return df

def data_d(html):#获取净值、规模信息
    df = pd.Dataframe()
    fundShare_a = []
    fundShare_b = []
    html.list = html.find_all('div', attrs={'class', 't-right'})
    # print(html.list)
    #获取基金净值信息
    for i, item in enumerate(html.list):
        item.list=item.find_all('div', attrs={'class', 'data'})
        for i, item in enumerate(item.list):
            item.list=item.find_all('b')
            for i, item in enumerate(item.list):
                try:
                    # print(item)
                    item_a=item.text.replace('n', '').replace('t', '').replace(' ', '')
                    # print(item_a)
                    fundShare_a.append(item_a)
                    # print(item_a)
                except:
                    print('净值信息错误')
    #获取基金公司信息
    for i, item in enumerate(html.list):
        item.list = item.find('ul', attrs={'class': 'company'})
        # print(item.list)
        for i, item in enumerate(item.list):
            # print(item,i,sep=',')
            try:
                item_a = item.text.replace('n', '').replace('t', '').replace(' ', '')
                # print(item_a)
                fundShare_b.append(item_a)
            except:
                print('公司信息错误')


    # print(fundManage[0])
    df['当前净值'] = fundShare_a[0],
    df['基金公司'] = fundShare_b[4].replace('管理人：', ''),
    df['基金规模'] = fundShare_b[0].replace('基金规模：', ''),
    df['成立时间'] = fundShare_b[1].replace('成立时间：', ''),
    print(df)
    return df

def data_e(html):#获取分红
    df = pd.Dataframe()
    fundshare = []
    html.list = html.find_all('div', attrs={'class', 'fundshare fund-module'})
    # print(html.list)
    for i, item in enumerate(html.list):
        item.list=html.find_all('div', attrs={'class', 's-list'})[2]#获取第三组数据
        for i, item in enumerate(item.list):
            # print(item,i,sep=',')
            item.list = html.find_all('ul', attrs={'class', 'data'})[31]# 获取第三组数据
            for i, item in enumerate(item.list):
                # print(item, i, sep=',')
                # print(item.text, i, sep=',')
                try:
                    item_a = item.text.replace('n', '').replace('t', '').replace(' ', '')
                    if item_a != '':
                        # print(item_a, i, sep=',')
                        fundshare.append(item_a)
                except:
                    print('分红信息错误')
    # print(fundshare[3])
    df['分红金额'] = fundshare[3],
    df['分红时间'] = fundshare[4],
    print(df)
    return df

df = pd.Dataframe()
if __name__=="__main__":

    #打开excel
    app=xw.App(visible=True,add_book=False)
    wb=app.books.open('FundRank.xlsx')
    #连接excel
    sh=wb.sheets['FundRank']
    rng=[i for i in sh.range('E:E').value if i!=None]
    j=sh.range('a1').expand('table').rows.count
    app.display_alerts=False
    app.screen_updating=False
    # 打开网页
    opt = ChromeOptions()
    opt.headless = False
    driver = Chrome(options=opt)
    # driver.set_window_size(400,900)

    df_a=[]
    df_b=[]
    df_c = []
    df_d = []
    df_e = []
    for i in range(len(rng)-1):
        try:
            #提取数据并查询
            time1=time.time()#开始计时
            Fundname=str(rng[i+1])#'003298'
            print(Fundname)#基金代码
            url='http://fund.10jqka.com.cn/'+Fundname+'/'#获取基本信息
            url_increase='https://fund.10jqka.com.cn/public/newfund/syrank.html#'+Fundname#获取涨幅
            url_share='http://fund.10jqka.com.cn/'+Fundname+'/historynet.html#dividends'#获取分红、规模
            ''''''
            #获取基本信息
            print(url)
            html=web(url)#获取网页信息
            df_a = data(html)  #获取基金基本信息
            time.sleep(0.5)
            df_b = data_b(html)  #获取基金管理信息
            time.sleep(0.5)
            # #获取涨幅信息
            print(url_increase)
            html_increase = web(url_increase)  #获取网页信息
            df_c=data_c(html_increase)#获取基金涨幅
            time.sleep(0.5)
            #  print(html)
            ''''''
            # 获取净值、规模信息分红、
            print(url_share)
            html_share= web(url_share)  # 获取网页信息
            df_d = data_d(html_share)  # 获取基金净值、规模信
            time.sleep(0.5)
            df_e = data_e(html_share)  # 获取分红
            time.sleep(0.5)
            #  print(html)

            time2=time.time()
            print('总耗时{}'.format(time2-time1))
        except:
            print(str(i),'错误')
        ''''''
        try:
            df1 = pd.concat([df_a, df_b], axis=1)
            df2 = pd.concat([df1, df_c], axis=1)
            df3 = pd.concat([df2, df_d], axis=1)
            df4 = pd.concat([df3, df_e], axis=1)
            df = pd.concat([df, df4], axis=0)
            print(df)
            df.to_csv('FundRank.csv', mode='a+', header=None, index=None, encoding='utf-8-sig', sep=',')  # 提前写入vsv文件
            # 写入json文件
            df.to_json('FundRank.json', orient='records', indent=1, force_ascii=False)
            print('写入jason正常')
        except:
            print('写入jason错误')
        ''''''

    ''''''
    with open('FundRank.json','r',encoding='utf-8')as f:
        data=json.load(f)
        # print(data[0]['基金名称'])
    FundN=['基金名称', '基金代码','基金类型','基金风险','近1周','近1月','近3月','近6月','近1年','近2年','近3年','近5年','成立以来','年度排名','成立时间',
           '基金规模','持有股票']
    for i in range(len(data)):
        try:
            sh.cells[i+1,0].value=i+1
            sh.cells[i+1,1].value=data[i]['基金类型']
            sh.cells[i + 1, 3].value = data[i]['基金名称']
            sh.cells[i + 1, 4].value = data[i]['基金代码']
            sh.cells[i + 1, 6].value = data[i]['基金风险']
            sh.cells[i + 1, 7].value = data[i]['基金公司']
            sh.cells[i + 1, 12].value = data[i]['分红时间']
            sh.cells[i + 1, 13].value = data[i]['分红金额']
            sh.cells[i + 1, 14].value = data[i]['当前净值']

            sh.cells[i + 1, 8].value = data[i]['基金经理']
            sh.cells[i + 1, 9].value = data[i]['从业年均回报']
            sh.cells[i + 1, 10].value = data[i]['从业时间']

            sh.cells[i + 1, 16].value = data[i]['最大盈利']
            sh.cells[i + 1, 17].value = data[i]['最大回撤']
            sh.cells[i + 1, 18].value = data[i]['最大回撤']

            sh.cells[i + 1, 18].value = data[i][FundN[4]]
            sh.cells[i + 1, 19].value = data[i][FundN[5]]
            sh.cells[i + 1, 20].value = data[i][FundN[6]]
            sh.cells[i + 1, 21].value = data[i][FundN[7]]
            sh.cells[i + 1, 22].value = data[i][FundN[8]]
            sh.cells[i + 1, 23].value = data[i][FundN[9]]
            sh.cells[i + 1, 24].value = data[i][FundN[10]]
            sh.cells[i + 1, 25].value = data[i][FundN[11]]
            sh.cells[i + 1, 26].value = data[i][FundN[12]]
            sh.cells[i + 1, 27].value = data[i][FundN[13]]

            sh.cells[i + 1, 28].value = data[i]['成立时间']
            sh.cells[i + 1, 29].value = data[i]['基金规模']

            print(str(i), 'excel写入正常')
        except:
            # continue
            print(str(i), 'excel写入错误')
    ''''''
    try:
        wb.save('FundRank.xlsx')
        wb.close()
        app.quit()
        ''''''
        # 获得当前窗口句柄
        sreach_windows = driver.current_window_handle
        driver.quit()
        # 获得当前所有打开的窗口的句柄
        all_handles = driver.window_handles
        for handle in all_handles:
            driver.switch_to.window(handle)
            driver.close()
            time.sleep(0.5)
        driver.close()
        driver.quit()
        ''''''
    except:
        print('有错误代码')
基金收益排名

Python相关栏目本月热门文章