feat: add streamlit, evaluating predictions

freddysongg · Nov 24, 2024 · 61ad91a · 61ad91a
1 parent ce36415
commit 61ad91a
Show file tree

Hide file tree

Showing 7 changed files with 149 additions and 99 deletions.
diff --git a/app.py b/app.py
@@ -1,52 +1,11 @@
+import requests
 import streamlit as st
-import numpy as np
 import pandas as pd
+import numpy as np
 import plotly.graph_objects as go
-import json
-import tensorflow as tf
-import torch
-from sklearn.preprocessing import MinMaxScaler
-from datetime import datetime
-
-# Helper functions
-def load_scaler(path):
-    with open(path, 'r') as f:
-        scaler_data = json.load(f)
-    scaler = MinMaxScaler()
-    scaler.min_ = np.array(scaler_data['min_'])
-    scaler.scale_ = np.array(scaler_data['scale_'])
-    return scaler
-
-def load_lstm_model():
-    return tf.keras.load_model('models/best_lstm_model.keras')
-
-def load_transformer_model(params_path, model_path):
-    with open(params_path, 'r') as f:
-        params = json.load(f)
-    model = TimeSeriesTransformer(
-        input_size=params['d_model'],
-        num_layers=params['num_layers'],
-        num_heads=params['num_heads'],
-        d_model=params['d_model'],
-        dim_feedforward=params['dim_feedforward']
-    )
-    model.load_state_dict(torch.load(model_path))
-    return model, params
-
-# TimeSeriesTransformer class definition (same as in your training code)
-class TimeSeriesTransformer(torch.nn.Module):
-    def __init__(self, input_size, num_layers, num_heads, d_model, dim_feedforward):
-        super(TimeSeriesTransformer, self).__init__()
-        self.encoder_layer = torch.nn.TransformerEncoderLayer(
-            d_model=d_model, nhead=num_heads, dim_feedforward=dim_feedforward, batch_first=True
-        )
-        self.transformer_encoder = torch.nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
-        self.fc = torch.nn.Linear(d_model, 1)
 
-    def forward(self, x):
-        x = self.transformer_encoder(x)
-        x = self.fc(x[:, -1, :])  # Output of the last time step
-        return x
+# API Base URL 
+API_BASE_URL = "http://127.0.0.1:8000"
 
 # UI Setup
 st.set_page_config(
@@ -77,7 +36,7 @@ def forward(self, x):
 st.sidebar.title("⚙️ Settings")
 
 # Sidebar options
-model_type = st.sidebar.selectbox("Select Model Type", ["LSTM", "Transformer"])
+model_type = st.sidebar.selectbox("Select Model Type", ["LSTM", "Transformer", "ARIMA"])
 seq_length = st.sidebar.number_input("Sequence Length", min_value=5, max_value=50, value=10, step=1)
 uploaded_file = st.sidebar.file_uploader("Upload Test Data (CSV)", type=["csv"])
 dark_mode = st.sidebar.checkbox("Enable Dark Mode")
@@ -98,61 +57,51 @@ def forward(self, x):
     """, unsafe_allow_html=True)
 
 # Load Data
+data = None
 if uploaded_file:
     data = pd.read_csv(uploaded_file)
-    st.sidebar.write(f"Data preview:")
+    st.sidebar.write("Data preview:")
     st.sidebar.write(data.head())
 else:
     st.sidebar.warning("Upload a CSV file to proceed.")
 
-# Load Models
-scaler_path = 'models/scaler.json'
-scaler = load_scaler(scaler_path)
-lstm_model = None
-transformer_model = None
-if model_type == "LSTM":
-    lstm_model = load_lstm_model()
-else:
-    transformer_model, transformer_params = load_transformer_model(
-        'params/best_ts_transformer_params.json',
-        'models/best_ts_transformer_model.pt'
-    )
-
-# Inference
+# Run Inference
 if st.button("Run Inference"):
-    if not uploaded_file:
+    if data is None:
         st.error("Please upload a test data file first.")
     else:
-        # Scale and process data
-        scaled_data = scaler.transform(data.values)
-        sequences = [
-            scaled_data[i : i + seq_length]
-            for i in range(len(scaled_data) - seq_length)
-        ]
-        sequences = np.array(sequences)
-
-        # Predict
-        if model_type == "LSTM":
-            predictions = lstm_model.predict(sequences)
+        # Convert data to JSON-friendly format
+        input_data = {"data": data.values.flatten().tolist()}
+
+        # Call API based on model type
+        endpoint = {
+            "LSTM": "/predict/lstm",
+            "Transformer": "/predict/transformer",
+            "ARIMA": "/predict/arima"
+        }.get(model_type)
+
+        if endpoint:
+            try:
+                response = requests.post(API_BASE_URL + endpoint, json=input_data)
+                response.raise_for_status()
+                predictions = response.json()["predictions"]
+
+                # Visualization
+                st.success("Inference complete! Here are the results:")
+                fig = go.Figure()
+                fig.add_trace(go.Scatter(y=data.values.flatten(), name="Actual", mode="lines"))
+                fig.add_trace(go.Scatter(y=predictions, name="Predicted", mode="lines"))
+                fig.update_layout(
+                    title="Actual vs Predicted",
+                    xaxis_title="Time Steps",
+                    yaxis_title="Values",
+                    template="plotly_dark" if dark_mode else "plotly_white",
+                )
+                st.plotly_chart(fig)
+            except requests.exceptions.RequestException as e:
+                st.error(f"API call failed: {e}")
         else:
-            sequences_torch = torch.FloatTensor(sequences).unsqueeze(-1)  # Add feature dim
-            predictions = transformer_model(sequences_torch).detach().numpy()
-
-        # Rescale predictions
-        predictions_rescaled = scaler.inverse_transform(predictions)
-
-        # Visualization
-        st.success("Inference complete! Here are the results:")
-        fig = go.Figure()
-        fig.add_trace(go.Scatter(y=data.values.flatten(), name="Actual", mode="lines"))
-        fig.add_trace(go.Scatter(y=predictions_rescaled.flatten(), name="Predicted", mode="lines"))
-        fig.update_layout(
-            title="Actual vs Predicted",
-            xaxis_title="Time Steps",
-            yaxis_title="Values",
-            template="plotly_dark" if dark_mode else "plotly_white",
-        )
-        st.plotly_chart(fig)
+            st.error("Invalid model type selected.")
 
 # Footer
 st.markdown("#### Made with ❤️ for CaféCast")
diff --git a/data/modify_dataset.py b/data/modify_dataset.py
@@ -1,17 +1,55 @@
 import pandas as pd
+import json
+from sklearn.preprocessing import MinMaxScaler
 
 def prepare_data(file_path, date_col='transaction_date', time_col='transaction_time'):
     data = pd.read_excel(file_path)
 
+    # Convert date column to datetime
     data[date_col] = pd.to_datetime(data[date_col])
 
+    # Set the date column as the index
     data.set_index(date_col, inplace=True)
 
+    # Extract transaction hour if the time column exists
     if time_col in data.columns:
         data['transaction_hour'] = data[time_col].apply(lambda x: x.hour)
 
     return data
 
+def generate_test_data(file_path, output_file='test_payload.json', seq_length=10):
+    # Prepare data
+    data = prepare_data(file_path)
+
+    # Resample to daily transaction quantities
+    daily_data = data.resample('D')['transaction_qty'].sum()
+
+    # Scale the data
+    scaler = MinMaxScaler()
+    scaled_data = scaler.fit_transform(daily_data.values.reshape(-1, 1))
+
+    # Generate sequences
+    sequences = [
+        scaled_data[i:i + seq_length].flatten().tolist()
+        for i in range(len(scaled_data) - seq_length)
+    ]
+
+    # Create a sample payload with the first sequence
+    if sequences:
+        test_data = {"data": sequences[0]}  # Taking the first sequence for testing
+
+        # Save the test payload to a JSON file
+        with open(output_file, 'w') as f:
+            json.dump(test_data, f)
+
+        print(f"Test data saved to {output_file}")
+    else:
+        print("Not enough data to generate sequences. Please ensure the dataset is sufficient.")
+
 if __name__ == "__main__":
-    data = prepare_data('../data/cafecast_data.xlsx')
-    print(data.info())  
+    file_path = 'data/cafecast_data.xlsx'  
+    data = prepare_data(file_path)
+    print(data.info()) 
+
+    # Generate and save test data
+    generate_test_data(file_path, output_file='test_payload.json', seq_length=10)
diff --git a/evaluate_predictions.py b/evaluate_predictions.py
@@ -0,0 +1,54 @@
+
+import numpy as np
+from sklearn.metrics import mean_absolute_error, mean_squared_error
+import matplotlib.pyplot as plt
+
+def evaluate_predictions(actuals, predictions):
+    actuals = np.array(actuals)
+    predictions = np.array(predictions)
+
+    mae = mean_absolute_error(actuals, predictions)
+    rmse = np.sqrt(mean_squared_error(actuals, predictions))
+    mape = np.mean(np.abs((actuals - predictions) / actuals)) * 100
+
+    return {
+        "MAE": mae,
+        "RMSE": rmse,
+        "MAPE": mape
+    }
+
+def plot_predictions(actuals, predictions):
+    plt.figure(figsize=(10, 6))
+    plt.plot(actuals, label="Actual", marker='o')
+    plt.plot(predictions, label="Predicted", marker='x')
+    plt.xlabel("Time Steps")
+    plt.ylabel("Values")
+    plt.title("Actual vs Predicted")
+    plt.legend()
+    plt.grid(True)
+    plt.show()
+
+def naive_forecast(actuals):
+    return actuals[:-1]  # Predict the last observed value for all steps
+
+def moving_average_forecast(actuals, window=3):
+    return [np.mean(actuals[i-window:i]) for i in range(window, len(actuals))]
+
+if __name__ == "__main__":
+    # Example usage:
+    # Replace these with your actual test and prediction data
+    actual_values = [100, 105, 110, 120]  # Example actual values
+    predicted_values = [98, 107, 115, 118]  # Example predicted values
+
+    # Evaluate metrics
+    metrics = evaluate_predictions(actual_values, predicted_values)
+    print("Evaluation Metrics:", metrics)
+
+    # Plot actual vs predicted values
+    plot_predictions(actual_values, predicted_values)
+
+    # Baselines
+    naive = naive_forecast(actual_values)
+    moving_avg = moving_average_forecast(actual_values, window=2)
+    print("Naive Forecast:", naive)
+    print("Moving Average Forecast:", moving_avg)
diff --git a/params/best_lstm_params.json b/params/best_lstm_params.json
@@ -1,5 +1,12 @@
-{
-    "batch_size": 61,
-    "epochs": 214,
-    "num_units": 289
-}
+[
+    {
+        "batch_size": 61,
+        "epochs": 214,
+        "num_units": 289
+    },
+    {
+        "num_units": 50,
+        "batch_size": 40,
+        "epochs": 100
+    }
+]
diff --git a/requirements.txt b/requirements.txt
@@ -83,6 +83,7 @@ typing_extensions==4.12.2
 tzdata==2024.2
 urllib3==2.2.3
 uvicorn==0.32.1
+watchdog==6.0.0
 Werkzeug==3.1.3
 wrapt==1.16.0
 zipp==3.21.0
diff --git a/src/api.py b/src/api.py
@@ -21,7 +21,7 @@
 TRANSFORMER_MODEL_PATH = "models/best_ts_transformer_model.pt"
 ARIMA_MODEL_PATH = "models/arima_model.pkl"
 PARAMS_DIR = "params/"
-SCALER_PATH = "scaler.pkl"
+SCALER_PATH = "models/scaler.pkl"
 
 # LSTM Model
 lstm_model = None

diff --git a/test_payload.json b/test_payload.json
@@ -0,0 +1 @@
+{"data": [0.11921032649962032, 0.11009870918754744, 0.13515565679574798, 0.06150341685649202, 0.10098709187547456, 0.06909643128321946, 0.1169324221716021, 0.12224753227031132, 0.0736522399392559, 0.15945330296127563]}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"data": [0.11921032649962032, 0.11009870918754744, 0.13515565679574798, 0.06150341685649202, 0.10098709187547456, 0.06909643128321946, 0.1169324221716021, 0.12224753227031132, 0.0736522399392559, 0.15945330296127563]}