Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add method to find the optimal fitting window #115

Merged
merged 2 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions lppls/lppls.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@
from datetime import datetime as date
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
from scipy.optimize import minimize
from sklearn.linear_model import LinearRegression
from tqdm import tqdm
import xarray as xr
from typing import Any, Dict, Optional
import warnings


class LPPLS(object):
Expand Down Expand Up @@ -634,3 +637,107 @@ def ordinal_to_date(self, ordinal):
return date.fromordinal(int(ordinal)).strftime("%Y-%m-%d")
except (ValueError, OutOfBoundsDatetime):
return str(pd.NaT)

def detect_bubble_start_time_via_lagrange(
self,
max_window_size: int,
min_window_size: int,
step_size: int = 1,
max_searches: int = 25,
) -> Optional[Dict[str, Any]]:

window_sizes = []
sse_list = []
ssen_list = []
lagrange_sse_list = []
start_times = []
n_params = 7 # The number of degrees of freedom used for this exercise as well as for the real-world time series is p = 8, which includes the 7 parameters of the LPPLS model augmented by the extra parameter t1

total_obs = len(self.observations[0])

lppls_params_list = []

for window_size in range(max_window_size, min_window_size - 1, -step_size):
start_idx = total_obs - window_size
end_idx = total_obs
obs_window = self.observations[:, start_idx:end_idx]

start_time = self.observations[0][start_idx]
start_times.append(start_time)
t2 = self.observations[0][end_idx - 1]

try:
tc, m, w, a, b, _, c1, c2, _, _ = self.fit(max_searches, obs=obs_window)
if tc == 0.0:
continue

# compute predictions and residuals
Yhat = self.lppls(obs_window[0], tc, m, w, a, b, c1, c2)
residuals = obs_window[1] - Yhat

# compute SSE and normalized SSE
sse = np.sum(residuals ** 2)
n = len(obs_window[0])
if n - n_params <= 0:
continue # avoid division by zero or negative degrees of freedom
ssen = sse / (n - n_params)

window_sizes.append(window_size)
sse_list.append(sse)
ssen_list.append(ssen)
lppls_params_list.append({
'tc': tc,
'm': m,
'w': w,
'a': a,
'b': b,
'c1': c1,
'c2': c2,
'obs_window': obs_window # may be useful later
})
except Exception as e:
print(e)
continue

if len(ssen_list) < 2:
warnings.warn("Not enough data points to compute Lagrange regularization.")
return None

window_sizes_np = np.array(window_sizes).reshape(-1, 1)
ssen_list_np = np.array(ssen_list)

# fit linear regression to normalized SSE vs. window sizes
reg = LinearRegression().fit(window_sizes_np, ssen_list_np)
slope = reg.coef_[0]
intercept = reg.intercept_

# compute Lagrange-regularized SSE
for i in range(len(sse_list)):
lagrange_sse = ssen_list[i] - slope * window_sizes[i]
lagrange_sse_list.append(lagrange_sse)

# find the optimal window size
min_index = np.argmin(lagrange_sse_list)
optimal_window_size = window_sizes[min_index]
optimal_params = lppls_params_list[min_index] # get LPPLS parameters for optimal window

# get tau (start time of the bubble)
tau_idx = total_obs - optimal_window_size
tau = self.observations[0][tau_idx]

return {
"tau": tau,
"optimal_window_size": optimal_window_size,
"tc": optimal_params['tc'],
"m": optimal_params['m'],
"w": optimal_params['w'],
"a": optimal_params['a'],
"b": optimal_params['b'],
"c1": optimal_params['c1'],
"c2": optimal_params['c2'],
"window_sizes": window_sizes,
"sse_list": sse_list,
"ssen_list": ssen_list,
"lagrange_sse_list": lagrange_sse_list,
"start_times": start_times
}
193 changes: 193 additions & 0 deletions notebooks/lagrange_regularization.ipynb

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ numpy>=1.17.0
pandas>=0.25.0
scipy>=1.3.0
tqdm>=4.62.3
xarray==0.19.0
xarray==0.19.0
scikit-learn>=1.2.2
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
long_description = fh.read()

setuptools.setup(name='lppls',
version='0.6.19',
version='0.6.20',
description='A Python module for fitting the LPPLS model to data.',
packages=['lppls'],
author='Josh Nielsen',
Expand Down
Loading