diff --git a/docs/source/how-to-guides/feature-guides/model_regressor_coefficients.ipynb b/docs/source/how-to-guides/feature-guides/model_regressor_coefficients.ipynb new file mode 100644 index 000000000..9132a2e18 --- /dev/null +++ b/docs/source/how-to-guides/feature-guides/model_regressor_coefficients.ipynb @@ -0,0 +1,206 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2ee6d94d", + "metadata": {}, + "source": [ + "# Retrieving regressor coefficients" + ] + }, + { + "cell_type": "markdown", + "id": "61d31237-c428-483a-bac1-419dddad3000", + "metadata": {}, + "source": [ + "Understanding the coefficients of various components in a forecasting model is crucial as it provides insights into how different factors influence the predicted values. We will demonstrate how to retrieve these coefficients using specific functions provided in NeuralProphet.\n", + "\n", + "The following functions are available:\n", + "- get_future_regressor_coefficients: Retrieves the coefficients for future regressors.\n", + "- get_event_coefficients: Retrieves the coefficients for events and holidays.\n", + "- get_lagged_regressor_coefficients: Retrieves the coefficients for lagged regressors.\n", + "- get_ar_coefficients: Retrieves the coefficients for autoregressive lags.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6575cb59", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from neuralprophet import NeuralProphet\n", + "\n", + "# Load tutorial datasets \n", + "df = pd.read_csv(\"https://github.com/ourownstory/neuralprophet-data/raw/main/kaggle-energy/datasets/tutorial04.csv\")\n", + "\n", + "df1 = pd.read_csv(\"https://github.com/ourownstory/neuralprophet-data/raw/main/kaggle-energy/datasets/tutorial01.csv\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "0d2ae750", + "metadata": {}, + "source": [ + "## Future regressors\n", + "\n", + "Useful for understanding the impact of external variables that are known in advance, such as temperature in this example. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95511f2b", + "metadata": {}, + "outputs": [], + "source": [ + "m = NeuralProphet(epochs=10)\n", + "\n", + "# Add the new future regressor\n", + "m.add_future_regressor(\"temperature\")\n", + "\n", + "\n", + "# Continue training the model and making a prediction\n", + "metrics = m.fit(df)\n", + "\n", + "print(\"Future regressor coefficients:\", m.model.get_future_regressor_coefficients())" + ] + }, + { + "cell_type": "markdown", + "id": "455b60e1", + "metadata": {}, + "source": [ + "## Events\n", + "\n", + "Helps in assessing the effect of specific events or holidays on the forecasted values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ffd52d2b", + "metadata": {}, + "outputs": [], + "source": [ + "m = NeuralProphet(epochs=10)\n", + "\n", + "# Add holidays for the US as events \n", + "m.add_country_holidays(\"US\")\n", + "\n", + "metrics = m.fit(df1)\n", + "\n", + "print(\"Event coefficients:\", m.model.get_event_coefficients())" + ] + }, + { + "cell_type": "markdown", + "id": "757056b4", + "metadata": {}, + "source": [ + "## Lagged regressors\n", + "\n", + "Lagged regressor coefficients are useful for understanding the influence of past values of external variables on the forecast." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c61347cb-bea9-4732-a7f6-4c05aa496354", + "metadata": {}, + "outputs": [], + "source": [ + "m = NeuralProphet(epochs=10)\n", + "\n", + "# Add temperature of last three days as lagged regressor\n", + "m.add_lagged_regressor(\"temperature\", n_lags=3)\n", + "\n", + "metrics = m.fit(df)\n", + "print(m.model.get_lagged_regressor_coefficients())" + ] + }, + { + "cell_type": "markdown", + "id": "a9440659", + "metadata": {}, + "source": [ + "## Autoregressive\n", + "\n", + "Useful for understanding how past values of the time series itself influence future predictions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "feff9910", + "metadata": {}, + "outputs": [], + "source": [ + "m = NeuralProphet(n_lags=5, epochs=10)\n", + "\n", + "metrics = m.fit(df1)\n", + "\n", + "print(\"AR coefficients:\", m.model.get_ar_coefficients())" + ] + }, + { + "cell_type": "markdown", + "id": "bc77b042", + "metadata": {}, + "source": [ + "## Visualizing coefficients\n", + "\n", + "With the Neuralprophet plotting features it is easy to automatically create plots for model parameters that visulize the previously discussed coefficients." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f90dd1b", + "metadata": {}, + "outputs": [], + "source": [ + "m = NeuralProphet(\n", + " n_lags=10, # Autogression\n", + " epochs=10\n", + ")\n", + "\n", + "# Add the new future regressor\n", + "m.add_future_regressor(\"temperature\")\n", + "\n", + "# Add holidays for the US as events\n", + "m.add_country_holidays(\"US\")\n", + "\n", + "metrics = m.fit(df)\n", + "\n", + "print(m.model.get_future_regressor_coefficients())\n", + "print(m.model.get_event_coefficients())\n", + "print(m.model.get_ar_coefficients())\n", + "\n", + "m.plot_parameters()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/neuralprophet/plot_utils.py b/neuralprophet/plot_utils.py index c468d3210..65594eabc 100644 --- a/neuralprophet/plot_utils.py +++ b/neuralprophet/plot_utils.py @@ -324,8 +324,6 @@ def get_valid_configuration( # move to utils # Identify components to be plotted # as dict, minimum: {plot_name} plot_components = [] - if validator == "plot_parameters": - quantile_index = m.model.quantiles.index(quantile) # Plot trend if "trend" in components: @@ -418,38 +416,32 @@ def get_valid_configuration( # move to utils multiplicative_events = [] if "events" in components: additive_events_flag = False - muliplicative_events_flag = False + multiplicative_events_flag = False + event_configs = {} if m.config_events is not None: - for event, configs in m.config_events.items(): - if validator == "plot_components" and configs.mode == "additive": - additive_events_flag = True - elif validator == "plot_components" and configs.mode == "multiplicative": - muliplicative_events_flag = True - elif validator == "plot_parameters": - event_params = m.model.get_event_weights(event) - weight_list = [ - (key, param.detach().numpy()[quantile_index, :]) for key, param in event_params.items() - ] - if configs.mode == "additive": - additive_events = additive_events + weight_list - elif configs.mode == "multiplicative": - multiplicative_events = multiplicative_events + weight_list - + event_configs.update(m.config_events) if m.config_country_holidays is not None: - for country_holiday in m.config_country_holidays.holiday_names: - if validator == "plot_components" and m.config_country_holidays.mode == "additive": - additive_events_flag = True - elif validator == "plot_components" and m.config_country_holidays.mode == "multiplicative": - muliplicative_events_flag = True - elif validator == "plot_parameters": - event_params = m.model.get_event_weights(country_holiday) - weight_list = [ - (key, param.detach().numpy()[quantile_index, :]) for key, param in event_params.items() - ] - if m.config_country_holidays.mode == "additive": - additive_events = additive_events + weight_list - elif m.config_country_holidays.mode == "multiplicative": - multiplicative_events = multiplicative_events + weight_list + event_configs.update( + {holiday: m.config_country_holidays for holiday in m.config_country_holidays.holiday_names} + ) + + if event_configs: + if validator == "plot_components": + additive_events_flag = any(config.mode == "additive" for config in event_configs.values()) + multiplicative_events_flag = any(config.mode == "multiplicative" for config in event_configs.values()) + + elif validator == "plot_parameters": + event_coefficients = m.model.get_event_coefficients() + for _, row in event_coefficients.iterrows(): + event = row["regressor"] + mode = row["regressor_mode"] + coef = row["coef"] + weight_tuple = (event, coef) + + if mode == "additive": + additive_events.append(weight_tuple) + elif mode == "multiplicative": + multiplicative_events.append(weight_tuple) if additive_events_flag: plot_components.append( @@ -458,7 +450,7 @@ def get_valid_configuration( # move to utils "comp_name": "events_additive", } ) - if muliplicative_events_flag: + if multiplicative_events_flag: plot_components.append( { "plot_name": "Multiplicative Events", @@ -488,11 +480,15 @@ def get_valid_configuration( # move to utils } ) elif validator == "plot_parameters": - regressor_param = m.model.future_regressors.get_reg_weights(regressor)[quantile_index, :] - if configs.mode == "additive": - additive_future_regressors.append((regressor, regressor_param.detach().numpy())) - elif configs.mode == "multiplicative": - multiplicative_future_regressors.append((regressor, regressor_param.detach().numpy())) + future_regressor_coefficients = m.model.get_future_regressor_coefficients() + for _, row in future_regressor_coefficients.iterrows(): + regressor = row["regressor"] + mode = row["regressor_mode"] + coef = row["coef"] + if mode == "additive": + additive_future_regressors.append((regressor, coef)) + elif mode == "multiplicative": + multiplicative_future_regressors.append((regressor, coef)) # Plot quantiles as a separate component, if present # If multiple steps in the future are predicted, only plot quantiles if highlight_forecast_step_n is set diff --git a/neuralprophet/time_net.py b/neuralprophet/time_net.py index 7aeecd058..6c2922d1f 100644 --- a/neuralprophet/time_net.py +++ b/neuralprophet/time_net.py @@ -5,12 +5,13 @@ from typing import Dict, List, Optional, Union import numpy as np +import pandas as pd import pytorch_lightning as pl import torch import torch.nn as nn import torchmetrics -from neuralprophet import configure, np_types +from neuralprophet import configure, np_types, utils_torch from neuralprophet.components.router import get_future_regressors, get_seasonality, get_trend from neuralprophet.utils import ( check_for_regularization, @@ -989,6 +990,173 @@ def denormalize(self, ts): def train_dataloader(self): return self.train_loader + def get_future_regressor_coefficients(self): + """ + Retrieves the coefficients for future regressors and events. + + Note: The average weight calculation is performed to get a single representative + value of the coefficient for a given regressor when there are multiple forecasts + or hidden layers. + + Returns + ------- + pd.DataFrame: A DataFrame containing the following columns: + - regressor: Name of the regressor or event. + - regressor_mode: Mode of the regressor ('additive' or 'multiplicative'). + - coef: Coefficient value for the regressor. + Example + ------- + >>> m = NeuralProphet() + >>> m.add_future_regressor("temperature") + >>> m.fit(df) + >>> m.model.get_future_and_event_regressor_coefficients() + """ + coefficients = [] + + # Helper function to calculate the average weight + def calculate_average_weight(weight): + if weight.ndim == 2: + return weight.mean(axis=0).mean(axis=0) + elif weight.ndim == 1: + return weight.mean(axis=0) + else: + return weight + + # Future Regressors + if self.config_regressors is not None and self.config_regressors.regressors is not None: + for name, config in self.config_regressors.regressors.items(): + regressor_component = self.future_regressors + if config.mode == "additive": + if ( + hasattr(regressor_component, "regressor_params") + and "additive" in regressor_component.regressor_params + ): + coef = regressor_component.regressor_params["additive"].data.cpu().numpy() + else: + if hasattr(regressor_component, "regressor_nets"): + layers = regressor_component.regressor_nets[name] + weights = [ + layer.weight.data.cpu().numpy() for layer in layers if isinstance(layer, nn.Linear) + ] + coef = np.concatenate(weights, axis=None) + elif hasattr(regressor_component, "regressor_params") and config.mode == "multiplicative": + if "multiplicative" in regressor_component.regressor_params: + coef = regressor_component.regressor_params["multiplicative"].data.cpu().numpy() + else: + if hasattr(regressor_component, "regressor_nets"): + layers = regressor_component.regressor_nets[name] + weights = [ + layer.weight.data.cpu().numpy() for layer in layers if isinstance(layer, nn.Linear) + ] + coef = np.concatenate(weights, axis=None) + coef_avg = calculate_average_weight(coef) + coefficients.append({"regressor": name, "regressor_mode": config.mode, "coef": coef_avg}) + + return pd.DataFrame(coefficients) + + def get_event_coefficients(self): + """ + Retrieves the coefficients for events and holidays. + + Returns + ------- + pd.DataFrame: A DataFrame containing the following columns: + - regressor: Name of the event or holiday. + - regressor_mode: Mode of the regressor ('additive' or 'multiplicative'). + - coef: Coefficient value for the regressor. + Example + ------- + >>> m = NeuralProphet() + >>> m.add_country_holidays("US") + >>> m.fit(df) + >>> m.model.get_event_coefficients() + """ + + coefficients = [] + + if self.events_dims is not None: + for event, configs in self.events_dims.items(): + mode = configs["mode"] + event_weights = self.get_event_weights(event) + all_weights = [] + for key, param in event_weights.items(): + all_weights.extend(param.detach().numpy()) + + coef_avg = np.mean(all_weights) + + # Determine if it's a holiday or an event + if self.config_holidays and event in self.config_holidays.holiday_names: + regressor_type = "holiday" + else: + regressor_type = "event" + + coefficients.append( + {"regressor": event, "regressor_type": regressor_type, "regressor_mode": mode, "coef": coef_avg} + ) + + return pd.DataFrame(coefficients) + + def get_ar_coefficients(self): + """ + Retrieves the coefficients for the autoregressive (AR) components. In case of hidden layers the coefficients only provide a rough approximation of the importance as they only consider the first layer. + + Returns + ------- + pd.DataFrame: A DataFrame containing the following columns: + - regressor: Name of the AR component. + - lag: Lag value for the AR component. + - coef: Coefficient value for the AR component. + + Example + ------- + >>> m = NeuralProphet(n_lags=10) + >>> m.fit(df) + >>> m.model.get_ar_coefficients() + """ + coefficients = [] + + if self.config_ar is not None and hasattr(self, "ar_net"): + ar_weights = utils_torch.interprete_model(self, net="ar_net", forward_func="auto_regression") + ar_weights_np = ar_weights.detach().cpu().numpy() + mean_weights = ar_weights_np.mean(axis=0) + + for lag, coef in enumerate(mean_weights): + coefficients.append({"regressor": "AR", "lag": lag + 1, "coef": coef}) + + return pd.DataFrame(coefficients) + + def get_lagged_regressor_coefficients(self): + """ + Retrieves coefficients of lagged regressors, mapped to their corresponding lags. + + Returns + ------- + pd.DataFrame: A DataFrame containing the following columns: + - regressor: Name of the regressor. + - lag: The specific lag associated with the coefficient. + - coef: Coefficient value for the regressor at the specific lag. + Example + ------- + >>> m = NeuralProphet() + >>> m.add_lagged_regressor('lagged_regressor1', n_lags=3) + >>> m.fit(df) + >>> m.model.get_lagged_regressor_coefficients() + """ + coefficients = [] + if self.config_lagged_regressors is not None: + covar_weights = self.get_covar_weights() + + for regressor_name, weight_tensor in covar_weights.items(): + if weight_tensor.requires_grad: + weight_tensor = weight_tensor.detach() + weights = weight_tensor.cpu().numpy().mean(axis=0) + + config = self.config_lagged_regressors[regressor_name] + for lag in range(config.n_lags): + coefficients.append({"regressor": regressor_name, "lag": lag + 1, "coef": weights[lag]}) + + return pd.DataFrame(coefficients) + class FlatNet(nn.Module): """ diff --git a/tests/test_coefficient_retrieval.py b/tests/test_coefficient_retrieval.py new file mode 100644 index 000000000..14bfd1ee8 --- /dev/null +++ b/tests/test_coefficient_retrieval.py @@ -0,0 +1,88 @@ +import logging +import os +import pathlib + +import pandas as pd + +from neuralprophet import NeuralProphet + +log = logging.getLogger("NP.test") +log.setLevel("DEBUG") +log.parent.setLevel("WARNING") + +DIR = pathlib.Path(__file__).parent.parent.absolute() +DATA_DIR = os.path.join(DIR, "tests", "test-data") +PEYTON_FILE = os.path.join(DATA_DIR, "wp_log_peyton_manning.csv") +ENERGY_TEMP_DAILY_FILE = os.path.join(DATA_DIR, "tutorial04_kaggle_energy_daily_temperature.csv") +NROWS = 512 +EPOCHS = 2 +BATCH_SIZE = 128 +LR = 1.0 + + +def test_future_regressor_coefficients_nn(): + log.info("Testing: Future Regressor Coefficients with NNs") + df = pd.read_csv(PEYTON_FILE, nrows=NROWS + 50) + m = NeuralProphet(epochs=EPOCHS, batch_size=BATCH_SIZE, learning_rate=LR, future_regressors_model="neural_nets") + df["A"] = df["y"].rolling(7, min_periods=1).mean() + df["B"] = df["y"].rolling(30, min_periods=1).mean() + df = df[:-50] + m = m.add_future_regressor(name="A") + m = m.add_future_regressor(name="B", mode="additive") + m.fit(df, freq="D") + coefficients = m.model.get_future_regressor_coefficients() + log.info(coefficients) + assert not coefficients.empty, "No coefficients found" + assert "regressor" in coefficients.columns, "Regressor column missing" + assert "regressor_mode" in coefficients.columns, "Regressor mode column missing" + assert "A" in coefficients["regressor"].values, "Regressor A not found" + assert "B" in coefficients["regressor"].values, "Regressor B not found" + a_mode = coefficients[coefficients["regressor"] == "A"]["regressor_mode"].values[0] + b_mode = coefficients[coefficients["regressor"] == "B"]["regressor_mode"].values[0] + assert a_mode == "additive", f"Unexpected mode for regressor A: {a_mode}" + assert b_mode == "additive", f"Unexpected mode for regressor B: {b_mode}" + + +def test_event_regressor_coefficients(): + log.info("Testing: Event Regressor Coefficients") + df = pd.read_csv(PEYTON_FILE, nrows=NROWS) + m = NeuralProphet(epochs=EPOCHS, batch_size=BATCH_SIZE, learning_rate=LR) + m = m.add_country_holidays("US") + m.fit(df, freq="D") + coefficients = m.model.get_event_coefficients() + log.info(coefficients) + assert not coefficients.empty, "No coefficients found" + assert "regressor" in coefficients.columns, "Regressor column missing" + assert "regressor_mode" in coefficients.columns, "Regressor mode column missing" + assert "coef" in coefficients.columns, "Coefficient column missing" + assert len(coefficients) == 10, f"Incorrect number of coefficients found: {len(coefficients)}" + + +def test_lagged_regressor_coefficients(): + log.info("Testing: Lagged Regressor Coefficients") + df = pd.read_csv(ENERGY_TEMP_DAILY_FILE, nrows=NROWS) + m = NeuralProphet(epochs=EPOCHS, batch_size=BATCH_SIZE, learning_rate=LR) + m.add_lagged_regressor("temperature", n_lags=3) + m.fit(df, freq="D") + coefficients = m.model.get_lagged_regressor_coefficients() + log.info(coefficients) + assert not coefficients.empty, "No coefficients found" + assert "regressor" in coefficients.columns, "Regressor column missing" + assert "lag" in coefficients.columns, "Lag column missing" + assert "coef" in coefficients.columns, "Coefficient column missing" + assert len(coefficients) == 3, "Incorrect number of lagged coefficients" + + +def test_ar_coefficients(): + log.info("Testing: AR Coefficients") + df = pd.read_csv(ENERGY_TEMP_DAILY_FILE, nrows=NROWS) + m = NeuralProphet(epochs=EPOCHS, batch_size=BATCH_SIZE, learning_rate=LR, n_lags=10) + m.add_lagged_regressor("temperature") + m.fit(df, freq="D") + coefficients = m.model.get_ar_coefficients() + log.info(coefficients) + assert not coefficients.empty, "No coefficients found" + assert "regressor" in coefficients.columns, "Regressor column missing" + assert "lag" in coefficients.columns, "Lag column missing" + assert "coef" in coefficients.columns, "Coefficient column missing" + assert len(coefficients) == 10, "Incorrect number of lagged coefficients"