-
Notifications
You must be signed in to change notification settings - Fork 2
/
SINDy_algorithm.py
114 lines (94 loc) · 3.33 KB
/
SINDy_algorithm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import numpy as np
def STRidge(X, y, lam, maxit, tol, normalize=2, print_results=False):
"""
Sequential Threshold Ridge Regression algorithm for finding (hopefully) sparse
approximation to X^{-1}y. The idea is that this may do better with correlated observables.
This assumes y is only one column
"""
from sklearn.linear_model import Ridge
n, d = X.shape
w = Ridge(alpha=lam, fit_intercept=False).fit(X, y).coef_.T
num_relevant = d
biginds = np.where(abs(w) > tol)[0]
# Threshold and continue
for j in range(maxit):
# Figure out which items to cut out
smallinds = np.where(abs(w) < tol)[0]
new_biginds = [i for i in range(d) if i not in smallinds]
# If nothing changes then stop
if num_relevant == len(new_biginds):
break
else:
num_relevant = len(new_biginds)
# Also make sure we didn't just lose all the coefficients
if len(new_biginds) == 0:
if j == 0:
return w
else:
break
biginds = new_biginds
# Otherwise get a new guess
w[smallinds] = 0
w[biginds] = Ridge(alpha=lam, fit_intercept=False).fit(X[:, biginds], y).coef_.T
# Now that we have the sparsity pattern, use standard least squares to get w
if biginds != []:
w[biginds] = Ridge(alpha=0.0, fit_intercept=False).fit(X[:, biginds], y).coef_.T
return w
def TrainSTRidge(
TrainR,
TrainY,
TestR,
TestY,
lam,
d_tol,
maxit=200,
STR_iters=200,
l0_penalty=0.0,
print_best_tol=False,
):
"""
This function trains a predictor using STRidge.
It runs over different values of tolerance and trains predictors on a training set, then evaluates them
using a loss function on a holdout set.
Please note published article has typo. Loss function used here for model selection evaluates fidelity using 2-norm,
not squared 2-norm.
"""
from sklearn.linear_model import Ridge
D = TrainR.shape[1]
# Set up the initial tolerance and l0 penalty
d_tol = float(d_tol)
tol = d_tol
if l0_penalty == None:
l0_penalty = 0.001 * np.linalg.cond(TrainR)
# Get the standard least squares estimator
w = np.zeros((D, 1))
w_best = Ridge(alpha=0.0, fit_intercept=False).fit(TrainR, TrainY).coef_.T
err_best = np.linalg.norm(
TestY - TestR.dot(w_best), 2
) + l0_penalty * np.count_nonzero(w_best)
tol_best = 0
errors = [
np.linalg.norm(TestY - TestR.dot(w_best), 2)
+ l0_penalty * np.count_nonzero(w_best)
]
tolerances = [0]
# Now increase tolerance until test performance decreases
for iter in range(maxit):
# Get a set of coefficients and error
w = STRidge(TrainR, TrainY, lam, STR_iters, tol)
err = np.linalg.norm(TestY - TestR.dot(w), 2) + l0_penalty * np.count_nonzero(w)
errors.append(err)
# Has the accuracy improved?
if err <= err_best:
err_best = err
w_best = w
tol_best = tol
tol = tol + d_tol
else:
tol = max([0, tol - 2 * d_tol])
d_tol = 2 * d_tol / (maxit - iter)
tol = tol + d_tol
tolerances.append(tol)
if print_best_tol:
print("Optimal tolerance: " + str(tol_best))
return w_best