Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hyper parameter search #6

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ export RESAMPLING_FREQ=15 # Unit is minutes
export ESP_OUTPUT_FOLDER=Processed
export DAILY_OUTPUT_FOLDER=Cropped
export OUTPUT_FOLDER=Predictions
export BEST_CHECKPOINT=checkpoints/best-chckpt-v21.ckpt
export BEST_CHECKPOINT=checkpoints/best-chckpt-v21.ckpt
export PROBA_THRESHOLD=0.8
283 changes: 276 additions & 7 deletions notebooks/tensorboard_viz.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion notebooks/test_plot.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
"version": "3.11.7"
}
},
"nbformat": 4,
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,6 @@ scipy==1.12.0
tbparse==0.0.8
tensorboard==2.16.2
torch==2.2.1
tqdm==4.66.2
tqdm==4.66.2
optuna==3.6.0
optuna-integration==3.6.0
3 changes: 2 additions & 1 deletion scripts/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@
DAILY_OUTPUT_FOLDER = os.environ.get("DAILY_OUTPUT_FOLDER")
RESAMPLING_FREQ = os.environ.get("RESAMPLING_FREQ")
PRED_FOL = os.environ.get("OUTPUT_FOLDER")
BEST_CHECKPOINT = os.environ.get("BEST_CHECKPOINT")
BEST_CHECKPOINT = os.environ.get("BEST_CHECKPOINT")
PROBA_THRESHOLD = float(os.environ.get("PROBA_THRESHOLD"))
4 changes: 3 additions & 1 deletion scripts/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from torch.utils.data import DataLoader
from train import ESPFailureModel

from env import PROBA_THRESHOLD


class ESP_Eval_Chkpt:
def __init__(self, checkpoint_path, batch_size=128):
Expand Down Expand Up @@ -40,7 +42,7 @@ def confusion_matrix(self):
for batch in self.dataloader:
yp = self.model(batch["features"])
yp = np.squeeze(yp[1].cpu().detach().numpy())
yp = np.where(yp>=0.8, 1, 0)
yp = np.where(yp>=PROBA_THRESHOLD, 1, 0)
yt = np.squeeze(batch["labels"].cpu().detach().numpy())
ytest.append(yt)
ypred.append(yp)
Expand Down
96 changes: 96 additions & 0 deletions scripts/hyper_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import argparse
from data_loader import Train_Test_Split, ESPDataset, ESPDataModule
import optuna
from optuna.integration import PyTorchLightningPruningCallback
from pytorch_lightning.callbacks import ModelCheckpoint
from env import *
from train import ESPFailureModel
from data_loader import Train_Test_Split, ESPDataset, ESPDataModule
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger



def objective(trial, seed, split):
hidden_size = trial.suggest_categorical('hidden_size', [32, 64, 128, 256])
dropout = trial.suggest_float('dropout', 0.0, 0.5, step=0.1)
num_stack_layers = trial.suggest_int('num_stack_layers', 1, 3)
num_epochs = trial.suggest_categorical('num_epochs', [150, 200, 250, 300])
learning_rate = trial.suggest_categorical('learning_rate', [1e-3, 1e-4, 1e-5])
num_layers = trial.suggest_int('num_layers', 1, 3)
batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])

# set seed for reproducibility
pl.seed_everything(seed=seed)

tts = Train_Test_Split(f"{DAILY_OUTPUT_FOLDER}_{SLIDE_N}", split=split)
data_paths = tts.split_data()

# Create the dataloaders
data_module = ESPDataModule(train_paths=data_paths["train"],
val_paths=data_paths["val"],
test_paths=data_paths["test"],
batch_size=batch_size)

# Load a single file to get the model dimensions
single_batch = next(iter(ESPDataset(data_paths["val"][:1])))
n_features = single_batch["features"].shape[-1]
n_classes= single_batch["labels"].shape[-1]

# Initialize the model
model = ESPFailureModel(n_features=n_features,
n_classes=n_classes,
lr=learning_rate,
dropout=dropout,
hidden_size=hidden_size,
num_stack_layers=num_stack_layers,
n_layers=num_layers)

# Define the model callbacks
checkpoint_call_back = ModelCheckpoint(
dirpath=f"checkpoints/{trial.number}",
filename="best-chckpt",
save_top_k=1,
verbose=True,
monitor="val_loss",
mode="min"
)

logger = TensorBoardLogger(save_dir="lightning_logs", name="JTK_Challenge")

trainer = pl.Trainer(logger=logger,
callbacks=checkpoint_call_back,
max_epochs=num_epochs,
deterministic=True,
enable_progress_bar=True)

model.save_hyperparameters({"hidden_dim": hidden_size,
"learning_rate": learning_rate,
"dropout": dropout,
"num_stack_layers": num_stack_layers,
"num_layers": num_layers,
"num_epochs": num_epochs,
"seed": seed,
"split": split,
"batch_size": batch_size,
"device":trainer.accelerator})

trainer.fit(model, data_module)

return trainer.callback_metrics["val_loss"].item()



if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Hyperparameter search for ESP Failure model")
parser.add_argument("--seed", type=int, default=42)
parser.add_argument("--split", type=float, default=0.8)
parser.add_argument("--n_trials", type=int, default=300)
args = parser.parse_args()

study = optuna.create_study(direction='minimize')
study.optimize(lambda trial: objective(trial, args.seed, args.split), n_trials=args.n_trials)

best_params = study.best_params
print("Best hyperparameters:", best_params)
print("Best value:", study.best_value)
4 changes: 2 additions & 2 deletions scripts/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

# Cropped_14/z8jfoj1ef3_esp#2_2022-08-03_PF.npz
class ESP_Predictor:
def __init__(self, checkpoint_path, api, csv_folder_path="Train", probability=0.8):
def __init__(self, checkpoint_path, api, csv_folder_path="Train", probability=0.85):
self.model = ESPFailureModel.load_from_checkpoint(checkpoint_path)
self.model.eval()
self.api = api
Expand Down Expand Up @@ -79,7 +79,7 @@ def predict(self, save_csv=True):
parser.add_argument("--chkpt", type=str, default=BEST_CHECKPOINT, help="Model Checkpoint")
parser.add_argument("--api", type=str, help="Well api number")
parser.add_argument("--train_folder", default="Train", type=str, help="Training data folder path")
parser.add_argument("--prob", type=float, default=0.8, help="Prediction probability threshold")
parser.add_argument("--prob", type=float, default=PROBA_THRESHOLD, help="Prediction probability threshold")
args = parser.parse_args()

args.api = "z8jfojo31x"
Expand Down
46 changes: 26 additions & 20 deletions scripts/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,50 @@


class LSTMClassifier(nn.Module):
def __init__(self, n_features, n_classes, n_hidden=32, n_layer=1, dropout=0.75) -> None:
def __init__(self, n_features, n_classes, hidden_size=32, n_layer=1, num_stack_layers=1, dropout=0.75) -> None:
"""
LSTM Classifier model

Args:
n_features (int): 13 features available from the ESP data
n_classes (int): defaults to 1 because we're doing binary classification and we can scale pribabilities.
n_hidden (int, optional): Number of LSTMS stacked on each other. Defaults to 32.
hidden_size (int, optional): hidden size of the LSTM. Defaults to 32.
num_stack_layers (int, optional): Number of LSTMS stacked on each other. Defaults to 1.
n_layer (int, optional): _description_. Defaults to 3.
dropout (float, optional): _description_. Defaults to 0.75.
"""
super(LSTMClassifier, self).__init__()

self.lstm1 = nn.LSTM(input_size=n_features,
hidden_size=n_hidden*2,
num_layers=n_layer,
batch_first=True,
dropout=dropout,)

self.lstm2 = nn.LSTM(input_size=n_hidden*2,
hidden_size=n_hidden,
num_layers=n_layer,
batch_first=True,
dropout=dropout,)
self.lstm_layers = nn.ModuleList()
self.num_stack_layers = num_stack_layers

dropout = dropout if n_layer > 1 else 0

current_input_dim = n_features
for i in range(self.num_stack_layers):
# Calculate the hidden size for the current layer
layer_hidden_size = hidden_size * (2 ** i) if i < self.num_stack_layers // 2 else hidden_size * (2 ** (self.num_stack_layers - i - 1))

self.lstm_layers.append(nn.LSTM(current_input_dim, layer_hidden_size, batch_first=True,
num_layers=n_layer, dropout=dropout))

# The next layer's input dimension is the current layer's output dimension
current_input_dim = layer_hidden_size


self.batch_norm = nn.BatchNorm1d(n_hidden)
self.batch_norm = nn.BatchNorm1d(hidden_size)

self.classifier = nn.Linear(n_hidden, n_classes)
self.classifier = nn.Linear(hidden_size, n_classes)
self.sigmoid = nn.Sigmoid()

def forward(self, x):
self.lstm1.flatten_parameters()
output, (hidden, _) = self.lstm1(x)
self.lstm2.flatten_parameters()
_, (hidden, _) = self.lstm2(output)

for lstm_layer in self.lstm_layers:
lstm_layer.flatten_parameters()
x, (hidden, _) = lstm_layer(x)

# Extract the last state of the last layer
out = hidden[-1]
out = self.batch_norm(out)
out = self.classifier(out)

return self.sigmoid(out)
4 changes: 3 additions & 1 deletion scripts/submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from tqdm import tqdm
from utils import date_parser

from env import PROBA_THRESHOLD

if __name__ == "__main__":
os.system("clear")

Expand All @@ -23,7 +25,7 @@
model = ESP_Predictor(checkpoint_path=BEST_CHECKPOINT,
api=api,
csv_folder_path=data_path,
probability=0.8)
probability=PROBA_THRESHOLD)
pred = model.predict()

os.system("rm -rf scripts/__pycache__")
18 changes: 16 additions & 2 deletions scripts/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,17 @@
import torch
from torchmetrics.classification import BinaryFBetaScore


from env import *


class ESPFailureModel(pl.LightningModule):
def __init__(self, n_features, n_classes, lr, dropout, n_layers):
def __init__(self, n_features, n_classes, lr, dropout, hidden_size, n_layers, num_stack_layers):
super().__init__()
self.model = LSTMClassifier(n_features=n_features,
n_classes=n_classes,
num_stack_layers=num_stack_layers,
hidden_size=hidden_size,
n_layer=n_layers,
dropout=dropout)
self.criterion = nn.BCELoss()
Expand All @@ -38,7 +41,11 @@ def training_step(self, batch, batch_idx):
daily_sequence = batch["features"]
labels = batch["labels"]
loss, outputs = self(daily_sequence, labels)
<<<<<<< Updated upstream
predictions = torch.where(outputs > 0.8, 1, 0)
=======
predictions = torch.where(outputs > PROBA_THRESHOLD, 1, 0)
>>>>>>> Stashed changes
step_acc = self.metric(predictions, labels)
step_fbeta = self.fbeta_score(predictions, labels)

Expand All @@ -51,7 +58,11 @@ def validation_step(self, batch, batch_idx):
daily_sequence = batch["features"]
labels = batch["labels"]
loss, outputs = self(daily_sequence, labels)
<<<<<<< Updated upstream
predictions = torch.where(outputs > 0.8, 1, 0)
=======
predictions = torch.where(outputs > PROBA_THRESHOLD, 1, 0)
>>>>>>> Stashed changes
step_acc = self.metric(predictions, labels)
step_fbeta = self.fbeta_score(predictions, labels)

Expand All @@ -64,7 +75,11 @@ def test_step(self, batch, batch_idx):
daily_sequence = batch["features"]
labels = batch["labels"]
loss, outputs = self(daily_sequence, labels)
<<<<<<< Updated upstream
predictions = torch.where(outputs > 0.8, 1, 0)
=======
predictions = torch.where(outputs > PROBA_THRESHOLD, 1, 0)
>>>>>>> Stashed changes
step_acc = self.metric(predictions, labels)
step_fbeta = self.fbeta_score(predictions, labels)

Expand Down Expand Up @@ -131,7 +146,6 @@ def trainer_wrapper(split, batch_size, learning_rate, num_epochs, dropout, num_l
trainer.fit(model, data_module)



if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Train and evaluate an LSTM model on NPZ data with dynamic configuration.")
parser.add_argument("--split", type=float, default=0.8, help="Train test split percentage")
Expand Down