Skip to content

Commit

Permalink
pure python code
Browse files Browse the repository at this point in the history
  • Loading branch information
evelynpurse committed Jun 11, 2019
0 parents commit 15286ee
Show file tree
Hide file tree
Showing 8 changed files with 464 additions and 0 deletions.
127 changes: 127 additions & 0 deletions backtest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import pandas as pd
import numpy as np
import math


def cal_riskrt(filename,D):
source=pd.read_csv(filename,index_col=0)
source=source.dropna()
#新建一个数据框记录各种指标
df=pd.DataFrame(columns=['rt','volatility','mdd','sharpe','calmar'],index=['rt1','rt2','rt3'])
#计算多头各项指标
rt=pd.DataFrame(source['rt1'])
rt['prod'] = np.cumprod(rt['rt1'] + 1)
holding_period = pd.to_datetime(rt.index.values[-1]) - pd.to_datetime(rt.index.values[0])
# #年化收益率
annual_ret = pow(rt['prod'][-1], 365 / holding_period.days) - 1
# #年化波动率
volatility = rt['rt1'].std() * (math.sqrt(250) )
# #sharpe
sharpe = annual_ret / volatility
# #计算最大回撤
rt['max2here'] = rt['prod'].expanding(1).max()
rt['dd2here'] = (rt['prod'] / rt['max2here']) - 1
mdd = rt['dd2here'].min()
calmar = annual_ret / abs(mdd)
#计算胜率
win_rate=len(rt[rt['rt1']>0])/len(rt.dropna(subset=['rt1']))

df.loc['rt1','rt']=annual_ret
df.loc['rt1','volatility']=volatility
df.loc['rt1','mdd']=mdd
df.loc['rt1','sharpe']=sharpe
df.loc['rt1','calmar']=calmar
df.loc['rt1','win']=win_rate


# 计算多头各项指标
rt = pd.DataFrame(source['rt2'])
rt['prod'] = np.cumprod(rt['rt2'] + 1)
holding_period = pd.to_datetime(rt.index.values[-1]) - pd.to_datetime(rt.index.values[0])
# #年化收益率
annual_ret = pow(rt['prod'][-1], 365 / holding_period.days) - 1
# #年化波动率
volatility = rt['rt2'].std() * (math.sqrt(250))
# #sharpe
sharpe = annual_ret / volatility
# #计算最大回撤
rt['max2here'] = rt['prod'].expanding(1).max()
rt['dd2here'] = (rt['prod'] / rt['max2here']) - 1
mdd = rt['dd2here'].min()
calmar = annual_ret / abs(mdd)
win_rate = len(rt[rt['rt2'] > 0]) / len(rt.dropna(subset=['rt2']))
df.loc['rt2', 'rt'] = annual_ret
df.loc['rt2', 'volatility'] = volatility
df.loc['rt2', 'mdd'] = mdd
df.loc['rt2', 'sharpe'] = sharpe
df.loc['rt2', 'calmar'] = calmar
df.loc['rt2','win']=win_rate

# 计算多头各项指标
rt = pd.DataFrame(source['rt3'])
rt['prod'] = np.cumprod(rt['rt3'] + 1)
holding_period = pd.to_datetime(rt.index.values[-1]) - pd.to_datetime(rt.index.values[0])
# #年化收益率
annual_ret = pow(rt['prod'][-1], 365 / holding_period.days) - 1
# #年化波动率
volatility = rt['rt3'].std() * (math.sqrt(250))
# #sharpe
sharpe = annual_ret / volatility
# #计算最大回撤
rt['max2here'] = rt['prod'].expanding(1).max()
rt['dd2here'] = (rt['prod'] / rt['max2here']) - 1
mdd = rt['dd2here'].min()
calmar = annual_ret / abs(mdd)
win_rate = len(rt[rt['rt3'] > 0]) / len(rt.dropna(subset=['rt3']))
df.loc['rt3', 'rt'] = annual_ret
df.loc['rt3', 'volatility'] = volatility
df.loc['rt3', 'mdd'] = mdd
df.loc['rt3', 'sharpe'] = sharpe
df.loc['rt3', 'calmar'] = calmar
df.loc['rt3', 'win'] = win_rate


return df

#等权组合
iterables=[['rank60','rank120','rank250'],['D5','D10','D20','D40']]
index=pd.MultiIndex.from_product(iterables, names=['ranking', 'holding_period'])
df_rt1=pd.DataFrame(index=index,columns=[-0.1,-0.05,-0.02,-0.01,0,0.01,0.02,0.05,0.1])

for rank in [60,120,250]:
for D in [5,10,20,40]:
for threshold in [-0.1,-0.05,-0.02,-0.01,0,0.01,0.02,0.05,0.1]:
ratio=cal_riskrt("rank" + str(rank) + "D" + str(D) + "threshold" + str(threshold) + "_new.csv",D)
df_rt1.loc[('rank'+str(rank),'D'+str(D)),threshold]=ratio.iloc[0,5]
#总市值加权
iterables=[['rank60','rank120','rank250'],['D5','D10','D20','D40']]
index=pd.MultiIndex.from_product(iterables, names=['ranking', 'holding_period'])
df_rt2=pd.DataFrame(index=index,columns=[-0.1,-0.05,-0.02,-0.01,0,0.01,0.02,0.05,0.1])

for rank in [60,120,250]:
for D in [5,10,20,40]:
for threshold in [-0.1,-0.05,-0.02,-0.01,0,0.01,0.02,0.05,0.1]:
ratio=cal_riskrt("rank" + str(rank) + "D" + str(D) + "threshold" + str(threshold) + "_new.csv",D)
df_rt2.loc[('rank'+str(rank),'D'+str(D)),threshold]=ratio.iloc[1,5]
#流通市值加权
df_rt3=pd.DataFrame(index=index,columns=[-0.1,-0.05,-0.02,-0.01,0,0.01,0.02,0.05,0.1])

for rank in [60,120,250]:
for D in [5,10,20,40]:
for threshold in [-0.1,-0.05,-0.02,-0.01,0,0.01,0.02,0.05,0.1]:
ratio=cal_riskrt("rank" + str(rank) + "D" + str(D) + "threshold" + str(threshold) + "_new.csv",D)
df_rt3.loc[('rank'+str(rank),'D'+str(D)),threshold]=ratio.iloc[2,5]
#df_rt1.to_excel("等权组合年化收益率.xls")
#df_rt2.to_excel("总市值加权.xls")
#df_rt3.to_excel("流通市值加权.xls")
#test=cal_riskrt("rank60D5threshold0.1_new.csv",5)
#test.to_csv("ratio.csv")
#df_rt1.to_csv("mdd_1.csv")
#df_rt2.to_csv("mdd_2.csv")
#df_rt3.to_csv("mdd_3.csv")

df_rt1.to_csv("win_1.csv")
df_rt2.to_csv("win_2.csv")
df_rt3.to_csv("win_3.csv")


11 changes: 11 additions & 0 deletions cal_avg_hold.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import pandas as pd
import numpy as np
iterables=[['rank60','rank120','rank250'],['D5','D10','D20','D40']]
index=pd.MultiIndex.from_product(iterables, names=['ranking', 'holding_period'])
df=pd.DataFrame(index=index,columns=[-0.1,-0.05,-0.02,-0.01,0,0.01,0.02,0.05,0.1])
for rank in [60,120,250]:
for D in [5,10,20,40]:
for threshold in [-0.1,-0.05,-0.02,-0.01,0,0.01,0.02,0.05,0.1]:
position=pd.read_csv("rank"+str(rank)+"D"+str(D)+"threshold"+str(threshold)+"_position.csv",index_col=0)
df.loc[('rank'+str(rank),'D'+str(D)),threshold]=position.sum(axis=1).mean()
df.to_csv("平均持仓.csv")
2 changes: 2 additions & 0 deletions cal_holding_num.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import pandas as pd
import numpy as np
83 changes: 83 additions & 0 deletions cal_rt1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import pandas as pd
import numpy as np
####建立持仓信息
#读入信号值
signal_60=pd.read_csv("factor60.csv",index_col=0)
signal_120=pd.read_csv("factor120.csv",index_col=0)
signal_250=pd.read_csv("factor250.csv",index_col=0)
signal_60=signal_60.stack(dropna=False)
signal_120=signal_120.stack(dropna=False)
signal_250=signal_250.stack(dropna=False)
signal_60=signal_60.reset_index()
signal_120=signal_120.reset_index()
signal_250=signal_250.reset_index()
signal_60=signal_60.rename(columns={'level_0':'trade_date','level_1':'stcode',0:'signal_60'})
signal_120=signal_120.rename(columns={'level_0':'trade_date','level_1':'stcode',0:'signal_120'})
signal_250=signal_250.rename(columns={'level_0':'trade_date','level_1':'stcode',0:'signal_250'})

#读入股票状态
status=pd.read_csv("stock_status.csv")
df_combined=signal_60.merge(signal_120,left_on=['trade_date','stcode'],right_on=['trade_date','stcode'],how='inner')
df_combined=df_combined.merge(signal_250,left_on=['trade_date','stcode'],right_on=['trade_date','stcode'],how='inner')
df_combined=df_combined.merge(status,left_on=['trade_date','stcode'],right_on=['trade_date','stcode'],how='left')

#建仓
def set_position(rank,threshold,D):
"""
:param rank: 排序期 60 120 250
:param threshold: 阈值 -0.1,-0.05,-0.02,-0.01,0,0.01,0.02,0.05,0.1
:param D: 5,10,20,40
:return: D天一次的持仓信息
"""
#超过阈值的记为1
df_combined1=df_combined
df_combined1['sign']=None
df_combined1['sign'][df_combined1['signal_'+str(rank)]>threshold]=1
df_position=df_combined1[['trade_date','stcode','status','sign']]
#与股票状态相乘
df_position['sign']=df_position['status']*df_position['sign']
#变形
df_position=df_position.pivot(index='trade_date',columns='stcode',values='sign')
#切片
df_position=df_position.iloc[range(0,len(df_position),D),:]
df_position=df_position.reset_index()

return df_position

for rank in [60,120,250]:
for D in [5,10,20,40]:
for threshold in [0.3]:
print("rank"+str(rank)+"D"+str(D)+"threshold"+str(threshold)+".csv")
position=set_position(rank,threshold,D)
position.to_csv("rank"+str(rank)+"D"+str(D)+"threshold"+str(threshold)+"_position.csv",index=None)


price=pd.read_csv("price.csv")
price=price.pivot(index='trade_date',columns='stcode',values='price')
#转换日期格式
price.index=pd.to_datetime(price.index,format='%Y%m%d')
price.index=price.index.strftime("%Y-%m-%d")
#转换股票代码
import rqdatac as rq
rq.init()
new_col=rq.id_convert(list(price.columns))
price.columns=new_col
##处理个股收益率
def forward_return(price,holding_period):
"""
:param price: 收盘价
:param holding_period:持有期
:return: 收益率序列
"""
df_return=price/price.shift(-1*holding_period)-1
return df_return
forward_return(price,5).to_csv("rt_5.csv")
forward_return(price,10).to_csv("rt_10.csv")
forward_return(price,20).to_csv("rt_20.csv")
forward_return(price,40).to_csv("rt_40.csv")




89 changes: 89 additions & 0 deletions cal_rt2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import pandas as pd
import numpy as np
import math

#读入市值
mkt_value=pd.read_csv("mkt_value.csv")
mkt_value=mkt_value.rename(columns={'trade_dt':'trade_date'})
total_value=mkt_value.pivot(index='trade_date',columns='stcode',values='mkt_value')
flow_value=mkt_value.pivot(index='trade_date',columns='stcode',values='flow_mkt_value')
#处理日期形式
total_value.index=pd.to_datetime(total_value.index,format='%Y%m%d')
total_value.index=total_value.index.strftime("%Y-%m-%d")
flow_value.index=pd.to_datetime(flow_value.index,format='%Y%m%d')
flow_value.index=flow_value.index.strftime("%Y-%m-%d")

total_value=total_value.stack().reset_index()
flow_value=flow_value.stack().reset_index()
total_value=total_value.rename(columns={'level_0':'trade_date',0:'mkt_value'})
flow_value=flow_value.rename(columns={'level_0':'trade_date',0:'flow_mkt_value'})
#循环
for rank in [60,120,250]:
for D in [5,10,20,40]:
for threshold in [0.1]:
# 读入收益率信息
rt = pd.read_csv("rt_" + str(D) + ".csv", index_col=0)
rt = rt.stack().reset_index()
rt = rt.rename(columns={'level_0': 'trade_date', 'level_1': 'stcode', 0: 'rt'})
rt['rt'][rt['rt'] > 0.1] = 0.1
rt['rt'][rt['rt'] < -0.1] = -0.1
position = pd.read_csv("rank" + str(rank) + "D" + str(D) + "threshold" + str(threshold) + "_position.csv",
index_col=0)
##计算费率
position_fill = position.fillna(0)
position_abs = (position_fill - position_fill.shift(1)).abs()
position_sum = position_abs.sum(axis=1)
positon_len = position.apply(lambda x: len(x.dropna()), axis=1)
# 计算换手率
turnover = position_sum / positon_len
# 计算费率
cost = turnover * 0.001
cost[cost == float('inf')] = 0
# 费率同样前置
cost = cost.shift(-1)
cost = pd.DataFrame(cost)
cost = cost.rename(columns={0: 'cost'})

##计算三种组合下的收益率
# 首先把 个股收益率、持仓、市值、成本合并
position = position.stack().reset_index()
position = position.rename(columns={'level_1': 'stcode', 0: 'position'})
df_combined = position.merge(total_value, left_on=['trade_date', 'stcode'],
right_on=['trade_date', 'stcode'], how='inner')
df_combined = df_combined.merge(flow_value, left_on=['trade_date', 'stcode'],
right_on=['trade_date', 'stcode'], how='inner')
df_combined = df_combined.merge(rt, left_on=['trade_date', 'stcode'], right_on=['trade_date', 'stcode'],
how='inner')

df_combined['rt1'] = df_combined['position'] * df_combined['rt']
# rt2 为市值加权组合的中间变量
df_combined['rt2'] = df_combined['position'] * df_combined['rt'] * df_combined['mkt_value']
# rt3 为流通市值加权组合的中间变量
df_combined['rt3'] = df_combined['position'] * df_combined['rt'] * df_combined['flow_mkt_value']


# 等权组合收益率时间序列
##两个市值加权组合 建立函数 计算组合收益率
def cal_rt2(df):
return df['rt2'].sum(min_count=1) / df['mkt_value'].sum(min_count=1)


def cal_rt3(df):
return df['rt3'].sum(min_count=1) / df['flow_mkt_value'].sum(min_count=1)


rt1 = pd.DataFrame(df_combined.groupby('trade_date')['rt1'].mean())
rt2 = pd.DataFrame(df_combined.groupby('trade_date')['mkt_value', 'rt2'].apply(cal_rt2))
rt3 = pd.DataFrame(df_combined.groupby('trade_date')['flow_mkt_value', 'rt3'].apply(cal_rt3))
rt_df = rt1.join(rt2, how='outer', rsuffix='rt2')
rt_df = rt_df.join(rt3, how='outer', rsuffix='rt3')
rt_df = rt_df.rename(columns={'0': 'rt2', '0rt3': 'rt3'})
rt_df = rt_df.join(cost, how='left')

rt_df['rt1'] = rt_df['rt1'] - rt_df['cost']
rt_df['rt2'] = rt_df['rt2'] - rt_df['cost']
rt_df['rt3'] = rt_df['rt3'] - rt_df['cost']
rt_df = rt_df.drop(columns='cost')
rt_df = rt_df.reset_index()
rt_df.to_csv("rank" + str(rank) + "D" + str(D) + "threshold" + str(threshold) + "_new.csv", index=None)

Loading

0 comments on commit 15286ee

Please sign in to comment.