栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 软件开发 > 后端开发 > Python

python根据财务指标寻找价值股票

Python 更新时间: 发布时间: IT归档 最新发布 模块sitemap 名妆网 法律咨询 聚返吧 英语巴士网 伯小乐 网商动力

python根据财务指标寻找价值股票

确定股票池

from concurrent.futures import ThreadPoolExecutor
import urllib
import os
from time import sleep
import pandas as pd
# 上证代码
shanghaicode = []
for i in range(600000, 606000, 1):
    shanghaicode.append(str(i))

# 深证代码
shenzhencode = []
for i in range(1000000, 1005000, 1):
    i = str(i)[1:]
    shenzhencode.append(i)

 爬取数据

def get_data(num):
    url = 'http://quotes.money.163.com/service/lrb_' + str(num) + '.html'
    while True:
        try:
            content = urllib.request.urlopen(url, timeout=2).read()
            path = '利润表_multi/' + str(num) + '.csv'
            if os.path.exists(path):
                print(path + " already existed!!!")
                break
            
            with open('利润表_multi/' + str(num) + '.csv', 'wb') as f:
                f.write(content)
            print(num)
    
            sleep(1)
        except Exception as e:
            if str(e) == 'HTTP Error 404: Not Found':
                print(f"{num} : {e}")
                break
            else:
                print(e)

多线程运作

executor = ThreadPoolExecutor(max_workers=10)
executor.map(get_data, shenzhencode)
executor.shutdown()

executor = ThreadPoolExecutor(max_workers=10)
executor.map(get_data, shanghaicode)
executor.shutdown()

 读取本地数据

def generatefile(path):
    names = []
    for dirpath, dirnames, filenames in os.walk(path):
        names = filenames
    return names
datapath = '利润表_multi/'
datalist = generatefile(datapath)
invest = []
for data in datalist:
    try:
        path = datapath + data
        temp = pd.read_csv(path, encoding='gbk', header=None)
        temp = pd.Dataframe(temp.values.T, index=temp.columns, columns=temp.index)
        temp.columns = temp.loc[0]
        temp = temp[1:]
        temp = temp[:-1]
        #temp['报告日期'] = temp['报告日期'].apply(convert_date)
        temp = temp[['报告日期','净利润(万元)']]
        temp['净利润(万元)'] = temp['净利润(万元)'].astype(int)
        temp_g = pd.Dataframe(temp.groupby('报告日期').sum())
        temp_g = temp_g[:-1] # 去除2021
        temp_g.reset_index(inplace=True)
        temp_g = temp_g['净利润(万元)']
        anu_diff = temp_g.diff()
        temp_g = temp_g.values
        # anu_diff = anu_diff.values
        temp_g = temp_g[::-1]
        ratio = 0.3
        if len(temp_g) >= 5:
            # rate = anu_diff[-5:]/temp_g[-6:-1]
            # if rate[-1] >= ratio and rate[-2] >= ratio and rate[-3] >= ratio and rate[-4] >= ratio:
            #         invest.append(data)
            growth_anu = []
            for i in range(len(temp_g)):
                if i == (len(temp_g)-1):
                    continue
                year = temp_g[i]
                ex_year = temp_g[i+1]
                if i+1 <= 5:
                    growth = (year - ex_year)/ex_year
                    growth_anu.append(growth)
            
            if growth_anu[0] >= ratio and growth_anu[1] >= ratio and growth_anu[2] >= ratio and growth_anu[3] >= ratio:
                invest.append(data)
                
    except Exception as e:
        print(data + f':{e}')
invest = pd.Dataframe(invest)
invest.to_excel('连续4年增长30%.xls')

转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/767114.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号