个人tushreID:458135
1.导入所需的库,设置全局变量
import numpy as np import pandas as pd import tushare as ts import datetime import calendar from sklearn import linear_model #from linearmodels import FamaMacBeth import statsmodels.api as sm from scipy import stats import matplotlib.pyplot as plt
数据源采用的是tushare平台Tushare大数据社区 (waditu.com),除使用免费的basic接口外,还可以通过提交学生证明获取积分使用大多数接口完成代码测试任务,详情可见链接:Tushare大数据社区 (waditu.com)
2.F-score因子原理
石川等著 因子投资:方法与实践P193
3.获取数据(以000001.SZ为例,实际上银行业会缺短期负债等数据,最好换一个公司)
#tushare接口
token='你的token'
ts.set_token(token)
pro=ts.pro_api(token)
#获取股票列表
#仅运行一遍
L = pro.query('stock_basic', exchange='', list_status='L', fields='ts_code')
D = pro.query('stock_basic', exchange='', list_status='D', fields='ts_code')
P = pro.query('stock_basic', exchange='', list_status='P', fields='ts_code')
Stock = pd.concat([L,D,P])
Stock.sort_values(['ts_code'],inplace=True)
stock_list = Stock['ts_code'].tolist()
#提取各行数据的年月,存入新的两列
def YM(data):
data['end_date'] = data['end_date'].astype('int')
data = data.groupby(by='end_date').head(1)
data['year'] = data['end_date']//10000
data['month'] = data['end_date']%10000//100
return data
#从不同的接口中读取所需数据
income = pro.income(ts_code='000001.SZ', start_date='20140501', end_date='20210501',
fields='ts_code,ann_date,end_date,operate_profit,n_income_attr_p')
balance = pro.balancesheet(ts_code='000001.SZ', start_date='20140501', end_date='20210501',
fields='ts_code,ann_date,end_date,total_assets,total_cur_assets,total_cur_liab,total_ncl')
cash = pro.cashflow(ts_code='000001.SZ', start_date='20140501', end_date='20210501',
fields='ts_code,ann_date,end_date,n_cashflow_inv_act')
share = pro.daily_basic(ts_code='000001.SZ', start_date='20140501', end_date='20210501',
fields='ts_code,trade_date,total_share')
fina = pro.fina_indicator(ts_code='000001.SZ', start_date='20140501', end_date='20210501',
fields='ts_code,ann_date,end_date,grossprofit_margin,assets_turn' )
income = YM(income)
balance = YM(balance)
cash = YM(cash)
fina = YM(fina)
date_list = income['end_date'].to_list()
share['trade_date'] = share['trade_date'].astype('int')
share_list = pd.Dataframe()
for i in range(len(date_list)):
share_list = share_list.append(share[share['trade_date'] < date_list[i]].head(1))
share_list.reset_index(inplace = True)
tushare自带的数据是不经过TTM的,我们将其处理一下,同时写一下“前一年数据”这一函数(直接shift4摆烂也行)
def TTM(data,year,month,table):
if month==12:
return table.loc[(table['year']== year) & (table['month'] == month),data].values[0]
elif len(table.loc[(table['year']== year-1) & (table['month'] == month),data]) and len(table.loc[(table['year']== year-1) & (table['month'] == 12),data]):
return table.loc[(table['year']== year) & (table['month'] == month),data].values[0] + table.loc[(table['year']== year-1) & (table['month'] == 12),data].values[0] - table.loc[(table['year']== year-1) & (table['month'] == month),data].values[0]
else:
return table.loc[(table['year']== year) & (table['month'] == month),data].values[0]*12/month
def Delta_year(data,year,month,table):
if len(table.loc[(table['year']== year-1) & (table['month'] == month),data]) :
return table.loc[(table['year']== year) & (table['month'] == month),data].values[0] - table.loc[(table['year']== year-1) & (table['month'] == month),data].values[0]
else:
return 0
4.制作F-score因子
income['OP_TTM'] = income.apply(lambda x:TTM('operate_profit',x['year'],x['month'],income),axis=1)
income['NI_TTM'] = income.apply(lambda x:TTM('n_income_attr_p',x['year'],x['month'],income),axis=1)
cash['CFO_TTM'] = cash.apply(lambda x:TTM('n_cashflow_inv_act',x['year'],x['month'],cash),axis=1)
balance['TA_mean'] = (balance['total_assets']+balance['total_assets'].shift(-1))/2
balance['LEVER'] = balance['total_ncl']/balance['total_assets']
balance['LIQUID'] = balance['total_cur_assets']/balance['total_cur_liab']
data = pd.merge(income,balance)
data = pd.merge(data,cash)
data = pd.merge(data,fina)
data['total_share'] = share_list['total_share']
data.fillna(0,inplace = True)
data['ROA'] = data['NI_TTM']/data['TA_mean']
data['ROA_d'] = data.apply(lambda x:Delta_year('ROA',x['year'],x['month'],data),axis=1)
data['CFOA'] = data['CFO_TTM']/data['TA_mean']
data['AP'] = (data['OP_TTM'] - data['CFO_TTM'])/data['TA_mean']
data['LEVER_d'] = data.apply(lambda x:Delta_year('LEVER',x['year'],x['month'],data),axis=1)
data['LIQUID_d'] = data.apply(lambda x:Delta_year('LIQUID',x['year'],x['month'],data),axis=1)
data['EQ_OFFER'] = data['total_share'] - data['total_share'].shift(-1)
data['MARGIN_d'] = data.apply(lambda x:Delta_year('grossprofit_margin',x['year'],x['month'],data),axis=1)
data['TURN_d'] = data.apply(lambda x:Delta_year('assets_turn',x['year'],x['month'],data),axis=1)
data['F_score'] = 0
def F_score(ROA,ROA_d,CFOA,AP,LEVER_d,LIQUID_d,EQ_OFFER,MARGIN_d,TURN_d):
F = 0
if ROA>0: F+=1
if ROA_d>0: F+=1
if CFOA>0: F+=1
if AP>0: F+=1
if LEVER_d<0: F+=1
if LIQUID_d<0: F+=1
if EQ_OFFER<=0: F+=1
if MARGIN_d>0: F+=1
if TURN_d>0: F+=1
return F
data['F_score'] = data.apply(lambda x:F_score(x['ROA'],x['ROA_d'],x['CFOA'],x['AP'],x['LEVER_d'],x['LIQUID_d'],x['EQ_OFFER'],x['MARGIN_d'],x['TURN_d']),axis=1)
F = data.loc[:,['ts_code','ann_date','end_date','F_score']]
F['ann_date'] = F['ann_date'].astype('int')
5.将F-score加入指标
grouped-all是之前获取的全数据表格,代码找不到了
grouped_all = pd.read_csv('GroupedFamaData1620.csv')
BM_close = grouped_all.loc[grouped_all['ts_code'] == '000001.SZ',['ts_code','trade_date','BM','total_mv','close']]
data_df = BM_close['trade_date'].str.split('-', expand=True)
data_df = data_df.astype('int')
data_df['time'] = data_df[0]*10000+ data_df[1]*100+31
BM_close = pd.concat([BM_close,data_df['time']], axis=1)
BM_close['F_score'] = 0
def choose_F(time,F):
A = F[F['ann_date']<=time].head(1)
return A['F_score'].values[0]
BM_close['F_score'] = BM_close.apply(lambda x:choose_F(x['time'],F),axis=1)



