-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathfunctions.py
163 lines (129 loc) · 5.37 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 21 10:24:15 2016
@author: charles sutton
In this file, you will find the main filters
and basics function for time series operations
"""
# Filters
# Every filter has to produce a filtered time serie of prices from a sequence of price relatives
"""
Documentation for all the following filters :
INPUT :
dataset : a pandas.dataframe of price relatives to filter
of shape (nb_periods , nb_shares)
params : a dict of parameters
OUTPUT :
f_dataset : the filtered price serie dataset
The resulting dataframe has the following properties :
- same shape than the original dataset,
- NAN values where prediction cannot be made (mostoften first days)
- And at row t, the prediction made in period t
- Note that you have to adjust the dataframe to perform predictive analysis
otherwise you are measuring with knowledge of future information.
Specific documentation added in each function
"""
def MA(dataset, params):
"""
Moving average
params should at least contain
window : window parameter
"""
assert "window" in params, "you should add the window parameter"
p_dataset = to_absolute(dataset)
f_dataset = p_dataset.rolling(window = params["window"]).mean()
return f_dataset
def EMA(dataset, params):
"""
Exponential moving average
params should at least contain
com : is the center of mass parameter
"""
assert "com" in params, "you should add the com (center of mass) parameter"
p_dataset = to_absolute(dataset)
f_dataset = p_dataset.ewm(com=params["com"]).mean()
return f_dataset
def ZLEMA(dataset, params):
"""cf filters"""
# p_dataset = to_absolute(dataset)
def KCA(dataset, params = None):
"""
KCA perform kalman filtering
It is an online implementation of the kalman filter
We fix the seed parameter to 1 since it doesn't affect a lot the way the filter behave in our case.
"""
p_dataset = to_absolute(dataset)
f_dataset = pd.DataFrame()
for stock in p_dataset.columns :
f_dataset[stock] = kalman_filtering(p_dataset[stock])
return f_dataset
from pykalman import KalmanFilter
def kalman_filtering(price_sequence):
h = 1 #time step
A = np.array([[1,h,.5*h**2],
[0,1,h],
[0,0,1]])
Q = np.eye(A.shape[0])
#2) Apply the filter
kf = KalmanFilter(transition_matrices = A , transition_covariance = Q)
means, covariances = kf.filter([price_sequence[0]])
filtered_price_sequence = [means[0,0]]
for i in range(1,len(price_sequence)):
#to track it (streaming)
new_mean, new_covariance = kf.filter_update(means[-1], covariances[-1], price_sequence[i])
means = np.vstack([means,new_mean])
covariances = np.vstack([covariances,new_covariance.reshape((1,3,3))])
filtered_price_sequence.append(means[i,0])
return filtered_price_sequence
# Predictive analysis
def adjust_data(dataset, prediction ,horizon = 1):
"""
Aims to adjust the prediction and the real price relative for the measure of performance
you can adjust the horizon.
"""
assert dataset.shape == prediction.shape
adjusted_prediction = prediction[:-horizon].dropna(axis=0, how='all', inplace=False)
starting_idx = adjusted_prediction.index[0]
adjusted_dataset = dataset[starting_idx+horizon:]
assert adjusted_dataset.shape == adjusted_prediction.shape
return adjusted_dataset, adjusted_prediction
from sklearn.metrics import mean_absolute_error, r2_score
import numpy as np
import pandas as pd
def regression_report(adjusted_dataset, adjusted_prediction, output="all"):
"""
Build a regression task report for the adjusted datasets
report includes
MAE : mean average error
R2 : r2-score
DPA : direction prediction accuracy error
if output = "all" then it outputs the report stock per stock and the average
if output = "average" the it ouputs the average only
"""
df = pd.DataFrame()
df["MAE"] = np.insert(mean_absolute_error(adjusted_dataset, adjusted_prediction, multioutput = "raw_values"), 0 , mean_absolute_error(adjusted_dataset, adjusted_prediction))
df["DPA"] = direction_prediction_accuracy(adjusted_dataset,adjusted_prediction)
df["R2"] = np.insert(r2_score(adjusted_dataset, adjusted_prediction, multioutput = "raw_values"), 0 , r2_score(adjusted_dataset, adjusted_prediction))
# setting stock names as index
df.index = adjusted_dataset.columns.insert(0, u'uniform_average')
if output == "all" :
return df
elif output == "average" :
return df.iloc[0]
def direction_prediction_accuracy(adjusted_dataset, adjusted_prediction):
"""
compute direction prediction accuracy
"""
multi = (np.asanyarray(adjusted_dataset)-1)*(np.asanyarray(adjusted_prediction)-1)
direction_success = np.zeros(multi.shape)
direction_success[multi>=0] = 1.0 #positive element are denoting same direction prediction !
raw_values = np.mean(direction_success, axis = 0)
uniform_average = np.mean(raw_values)
return np.insert(raw_values,0,uniform_average)
# Useful
def to_absolute(price_relatives):
"""
Transfrom a price relatives dataframe to a price sequence dataframe
The sequence if a pandas.series
"""
return price_relatives.cumprod(axis=0)