forked from PIM-Data-Science/prescient-coding-challenge-2023
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsolution_skeleton.py
184 lines (129 loc) · 6.62 KB
/
solution_skeleton.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# %%
import numpy as np
import pandas as pd
import datetime
import plotly.express as px
import plotly.io as pio
from numpy.random import gamma
from scipy.stats import gamma
pio.renderers.default = "browser"
print('---Python script Start---', str(datetime.datetime.now()))
# %%
# data reads
df_returns_train = pd.read_csv('data/returns_train.csv')
df_returns_test = pd.read_csv('data/returns_test.csv')
df_returns_train['month_end'] = pd.to_datetime(arg=df_returns_train['month_end']).apply(lambda d: d.date())
df_returns_test['month_end'] = pd.to_datetime(arg=df_returns_test['month_end']).apply(lambda d: d.date())
# %%
def equalise_weights(df: pd.DataFrame):
'''
Function to generate the equal weights, i.e. 1/p for each active stock within a month
Args:
df: A return data frame. First column is month end and remaining columns are stocks
Returns:
A dataframe of the same dimension but with values 1/p on active funds within a month
'''
# create df to house weights
n_length = len(df)
df_returns = df
df_weights = df_returns[:n_length].copy()
df_weights.set_index('month_end', inplace=True)
# list of stock names
list_stocks = list(df_returns.columns)
list_stocks.remove('month_end')
# assign 1/p
df_weights[list_stocks] = 1/len(list_stocks)
return df_weights
# %%
def generate_portfolio(df_train: pd.DataFrame, df_test: pd.DataFrame):
'''
Function to generate stocks weight allocation for time t+1 using historic data. Initial weights generated as 1/p for active stock within a month
Args:
df_train: The training set of returns. First column is month end and remaining columns are stocks
df_test: The testing set of returns. First column is month end and remaining columns are stocks
Returns:
The returns dataframe and the weights
'''
print('---> training set spans', df_train['month_end'].min(), df_train['month_end'].max())
print('---> training set spans', df_test['month_end'].min(), df_test['month_end'].max())
# initialise data
n_train = len(df_train)
df_returns = pd.concat(objs=[df_train, df_test], ignore_index=True)
df_weights = equalise_weights(df_returns[:n_train]) # df to store weights and create initial
# list of stock names
list_stocks = list(df_returns.columns)
list_stocks.remove('month_end')
# <<--------------------- YOUR CODE GOES BELOW THIS LINE --------------------->>
# This is your playground. Delete/modify any of the code here and replace with
# your methodology. Below we provide a simple, naive estimation to illustrate
# how we think you should go about structuring your submission and your comments:
# We use a static Inverse Volatility Weighting (https://en.wikipedia.org/wiki/Inverse-variance_weighting)
# strategy to generate portfolio weights.
# Use the latest available data at that point in time
for i in range(len(df_test)):
# latest data at this point
df_latest = df_returns[(df_returns['month_end'] < df_test.loc[i, 'month_end'])]
#create a gamma distribution because we are looking for a positively skewed result from the distribution
#simulate a gamma distribution because stocks often move in random distribution
shape, scale = 4.5503, 7037
Gamma = gamma(a=shape, scale=scale)
x = np.arange(1, 55)
#vol calc
df_w = pd.DataFrame()
df_w['vol'] = Gamma.pdf(x) # calculate stock volatility using gamma distribution simulation
df_w['inv_vol'] = 1/df_w['vol'] # calculate the inverse volatility
df_w['tot_inv_vol'] = df_w['inv_vol'].sum() # calculate the total inverse volatility
df_w['weight'] = df_w['inv_vol']/df_w['tot_inv_vol'] # calculate weight based on inverse volatility
df_w.reset_index(inplace=True, names='name')
# add to all weights
df_this = pd.DataFrame(data=[[df_test.loc[i, 'month_end']] + df_w['weight'].to_list()], columns=df_latest.columns)
df_weights = pd.concat(objs=[df_weights, df_this], ignore_index=True)
# <<--------------------- YOUR CODE GOES ABOVE THIS LINE --------------------->>
# 10% limit check
if len(np.array(df_weights[list_stocks])[np.array(df_weights[list_stocks]) > 0.90]):
raise Exception(r'---> 10% limit exceeded')
return df_returns, df_weights
# %%
def plot_total_return(df_returns: pd.DataFrame, df_weights_index: pd.DataFrame, df_weights_portfolio: pd.DataFrame):
'''
Function to generate the two total return indices.
Args:
df_returns: Ascending date ordered combined training and test returns data.
df_weights_index: Index weights. Equally weighted
df_weights_index: Portfolio weights. Your portfolio should use equally weighted for the training date range. If blank will be ignored
Returns:
A plot of the two total return indices and the total return indices as a dataframe
'''
# list of stock names
list_stocks = list(df_returns.columns)
list_stocks.remove('month_end')
# replace nans with 0 in return array
ar_returns = np.array(df_returns[list_stocks])
np.nan_to_num(x=ar_returns, copy=False, nan=0)
# calc index
ar_rtn_index = np.array(df_weights_index[list_stocks])*ar_returns
ar_rtn_port = np.array(df_weights_portfolio[list_stocks])*ar_returns
v_rtn_index = np.sum(ar_rtn_index, axis=1)
v_rtn_port = np.sum(ar_rtn_port, axis=1)
# add return series to dataframe
df_rtn = pd.DataFrame(data=df_returns['month_end'], columns=['month_end'])
df_rtn['index'] = v_rtn_index
df_rtn['portfolio'] = v_rtn_port
df_rtn
# create total return
base_price = 100
df_rtn.sort_values(by = 'month_end', inplace = True)
df_rtn['index_tr'] = ((1 + df_rtn['index']).cumprod()) * base_price
df_rtn['portfolio_tr'] = ((1 + df_rtn['portfolio']).cumprod()) * base_price
df_rtn
df_rtn_long = df_rtn[['month_end', 'index_tr', 'portfolio_tr']].melt(id_vars='month_end', var_name='series', value_name='Total Return')
# plot
fig1 = px.line(data_frame=df_rtn_long, x='month_end', y='Total Return', color='series')
return fig1, df_rtn
# %%
# running solution
df_returns = pd.concat(objs=[df_returns_train, df_returns_test], ignore_index=True)
df_weights_index = equalise_weights(df_returns)
df_returns, df_weights_portfolio = generate_portfolio(df_returns_train, df_returns_test)
fig1, df_rtn = plot_total_return(df_returns, df_weights_index=df_weights_index, df_weights_portfolio=df_weights_portfolio)
fig1