Skip to content

Commit

Permalink
feat: add streamlit, evaluating predictions
Browse files Browse the repository at this point in the history
  • Loading branch information
freddysongg committed Nov 24, 2024
1 parent ce36415 commit 61ad91a
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 99 deletions.
131 changes: 40 additions & 91 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,11 @@
import requests
import streamlit as st
import numpy as np
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import json
import tensorflow as tf
import torch
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime

# Helper functions
def load_scaler(path):
with open(path, 'r') as f:
scaler_data = json.load(f)
scaler = MinMaxScaler()
scaler.min_ = np.array(scaler_data['min_'])
scaler.scale_ = np.array(scaler_data['scale_'])
return scaler

def load_lstm_model():
return tf.keras.load_model('models/best_lstm_model.keras')

def load_transformer_model(params_path, model_path):
with open(params_path, 'r') as f:
params = json.load(f)
model = TimeSeriesTransformer(
input_size=params['d_model'],
num_layers=params['num_layers'],
num_heads=params['num_heads'],
d_model=params['d_model'],
dim_feedforward=params['dim_feedforward']
)
model.load_state_dict(torch.load(model_path))
return model, params

# TimeSeriesTransformer class definition (same as in your training code)
class TimeSeriesTransformer(torch.nn.Module):
def __init__(self, input_size, num_layers, num_heads, d_model, dim_feedforward):
super(TimeSeriesTransformer, self).__init__()
self.encoder_layer = torch.nn.TransformerEncoderLayer(
d_model=d_model, nhead=num_heads, dim_feedforward=dim_feedforward, batch_first=True
)
self.transformer_encoder = torch.nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
self.fc = torch.nn.Linear(d_model, 1)

def forward(self, x):
x = self.transformer_encoder(x)
x = self.fc(x[:, -1, :]) # Output of the last time step
return x
# API Base URL
API_BASE_URL = "http://127.0.0.1:8000"

# UI Setup
st.set_page_config(
Expand Down Expand Up @@ -77,7 +36,7 @@ def forward(self, x):
st.sidebar.title("⚙️ Settings")

# Sidebar options
model_type = st.sidebar.selectbox("Select Model Type", ["LSTM", "Transformer"])
model_type = st.sidebar.selectbox("Select Model Type", ["LSTM", "Transformer", "ARIMA"])
seq_length = st.sidebar.number_input("Sequence Length", min_value=5, max_value=50, value=10, step=1)
uploaded_file = st.sidebar.file_uploader("Upload Test Data (CSV)", type=["csv"])
dark_mode = st.sidebar.checkbox("Enable Dark Mode")
Expand All @@ -98,61 +57,51 @@ def forward(self, x):
""", unsafe_allow_html=True)

# Load Data
data = None
if uploaded_file:
data = pd.read_csv(uploaded_file)
st.sidebar.write(f"Data preview:")
st.sidebar.write("Data preview:")
st.sidebar.write(data.head())
else:
st.sidebar.warning("Upload a CSV file to proceed.")

# Load Models
scaler_path = 'models/scaler.json'
scaler = load_scaler(scaler_path)
lstm_model = None
transformer_model = None
if model_type == "LSTM":
lstm_model = load_lstm_model()
else:
transformer_model, transformer_params = load_transformer_model(
'params/best_ts_transformer_params.json',
'models/best_ts_transformer_model.pt'
)

# Inference
# Run Inference
if st.button("Run Inference"):
if not uploaded_file:
if data is None:
st.error("Please upload a test data file first.")
else:
# Scale and process data
scaled_data = scaler.transform(data.values)
sequences = [
scaled_data[i : i + seq_length]
for i in range(len(scaled_data) - seq_length)
]
sequences = np.array(sequences)

# Predict
if model_type == "LSTM":
predictions = lstm_model.predict(sequences)
# Convert data to JSON-friendly format
input_data = {"data": data.values.flatten().tolist()}

# Call API based on model type
endpoint = {
"LSTM": "/predict/lstm",
"Transformer": "/predict/transformer",
"ARIMA": "/predict/arima"
}.get(model_type)

if endpoint:
try:
response = requests.post(API_BASE_URL + endpoint, json=input_data)
response.raise_for_status()
predictions = response.json()["predictions"]

# Visualization
st.success("Inference complete! Here are the results:")
fig = go.Figure()
fig.add_trace(go.Scatter(y=data.values.flatten(), name="Actual", mode="lines"))
fig.add_trace(go.Scatter(y=predictions, name="Predicted", mode="lines"))
fig.update_layout(
title="Actual vs Predicted",
xaxis_title="Time Steps",
yaxis_title="Values",
template="plotly_dark" if dark_mode else "plotly_white",
)
st.plotly_chart(fig)
except requests.exceptions.RequestException as e:
st.error(f"API call failed: {e}")
else:
sequences_torch = torch.FloatTensor(sequences).unsqueeze(-1) # Add feature dim
predictions = transformer_model(sequences_torch).detach().numpy()

# Rescale predictions
predictions_rescaled = scaler.inverse_transform(predictions)

# Visualization
st.success("Inference complete! Here are the results:")
fig = go.Figure()
fig.add_trace(go.Scatter(y=data.values.flatten(), name="Actual", mode="lines"))
fig.add_trace(go.Scatter(y=predictions_rescaled.flatten(), name="Predicted", mode="lines"))
fig.update_layout(
title="Actual vs Predicted",
xaxis_title="Time Steps",
yaxis_title="Values",
template="plotly_dark" if dark_mode else "plotly_white",
)
st.plotly_chart(fig)
st.error("Invalid model type selected.")

# Footer
st.markdown("#### Made with ❤️ for CaféCast")
42 changes: 40 additions & 2 deletions data/modify_dataset.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,55 @@
import pandas as pd
import json
from sklearn.preprocessing import MinMaxScaler

def prepare_data(file_path, date_col='transaction_date', time_col='transaction_time'):
data = pd.read_excel(file_path)

# Convert date column to datetime
data[date_col] = pd.to_datetime(data[date_col])

# Set the date column as the index
data.set_index(date_col, inplace=True)

# Extract transaction hour if the time column exists
if time_col in data.columns:
data['transaction_hour'] = data[time_col].apply(lambda x: x.hour)

return data

def generate_test_data(file_path, output_file='test_payload.json', seq_length=10):
# Prepare data
data = prepare_data(file_path)

# Resample to daily transaction quantities
daily_data = data.resample('D')['transaction_qty'].sum()

# Scale the data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(daily_data.values.reshape(-1, 1))

# Generate sequences
sequences = [
scaled_data[i:i + seq_length].flatten().tolist()
for i in range(len(scaled_data) - seq_length)
]

# Create a sample payload with the first sequence
if sequences:
test_data = {"data": sequences[0]} # Taking the first sequence for testing

# Save the test payload to a JSON file
with open(output_file, 'w') as f:
json.dump(test_data, f)

print(f"Test data saved to {output_file}")
else:
print("Not enough data to generate sequences. Please ensure the dataset is sufficient.")

if __name__ == "__main__":
data = prepare_data('../data/cafecast_data.xlsx')
print(data.info())
file_path = 'data/cafecast_data.xlsx'
data = prepare_data(file_path)
print(data.info())

# Generate and save test data
generate_test_data(file_path, output_file='test_payload.json', seq_length=10)
54 changes: 54 additions & 0 deletions evaluate_predictions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@

import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

def evaluate_predictions(actuals, predictions):
actuals = np.array(actuals)
predictions = np.array(predictions)

mae = mean_absolute_error(actuals, predictions)
rmse = np.sqrt(mean_squared_error(actuals, predictions))
mape = np.mean(np.abs((actuals - predictions) / actuals)) * 100

return {
"MAE": mae,
"RMSE": rmse,
"MAPE": mape
}

def plot_predictions(actuals, predictions):
plt.figure(figsize=(10, 6))
plt.plot(actuals, label="Actual", marker='o')
plt.plot(predictions, label="Predicted", marker='x')
plt.xlabel("Time Steps")
plt.ylabel("Values")
plt.title("Actual vs Predicted")
plt.legend()
plt.grid(True)
plt.show()

def naive_forecast(actuals):
return actuals[:-1] # Predict the last observed value for all steps

def moving_average_forecast(actuals, window=3):
return [np.mean(actuals[i-window:i]) for i in range(window, len(actuals))]

if __name__ == "__main__":
# Example usage:
# Replace these with your actual test and prediction data
actual_values = [100, 105, 110, 120] # Example actual values
predicted_values = [98, 107, 115, 118] # Example predicted values

# Evaluate metrics
metrics = evaluate_predictions(actual_values, predicted_values)
print("Evaluation Metrics:", metrics)

# Plot actual vs predicted values
plot_predictions(actual_values, predicted_values)

# Baselines
naive = naive_forecast(actual_values)
moving_avg = moving_average_forecast(actual_values, window=2)
print("Naive Forecast:", naive)
print("Moving Average Forecast:", moving_avg)
17 changes: 12 additions & 5 deletions params/best_lstm_params.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
{
"batch_size": 61,
"epochs": 214,
"num_units": 289
}
[
{
"batch_size": 61,
"epochs": 214,
"num_units": 289
},
{
"num_units": 50,
"batch_size": 40,
"epochs": 100
}
]
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ typing_extensions==4.12.2
tzdata==2024.2
urllib3==2.2.3
uvicorn==0.32.1
watchdog==6.0.0
Werkzeug==3.1.3
wrapt==1.16.0
zipp==3.21.0
2 changes: 1 addition & 1 deletion src/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
TRANSFORMER_MODEL_PATH = "models/best_ts_transformer_model.pt"
ARIMA_MODEL_PATH = "models/arima_model.pkl"
PARAMS_DIR = "params/"
SCALER_PATH = "scaler.pkl"
SCALER_PATH = "models/scaler.pkl"

# LSTM Model
lstm_model = None
Expand Down
1 change: 1 addition & 0 deletions test_payload.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"data": [0.11921032649962032, 0.11009870918754744, 0.13515565679574798, 0.06150341685649202, 0.10098709187547456, 0.06909643128321946, 0.1169324221716021, 0.12224753227031132, 0.0736522399392559, 0.15945330296127563]}

0 comments on commit 61ad91a

Please sign in to comment.