Skip to content

Commit

Permalink
hyperparameter plot feature
Browse files Browse the repository at this point in the history
  • Loading branch information
Josesx506 committed Mar 30, 2024
1 parent 83c8858 commit 06c4623
Show file tree
Hide file tree
Showing 5 changed files with 194 additions and 13 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Predictions_*
jtk
checkpoints
lightning_logs
images

# Extensions
.DS_Store
Expand Down
57 changes: 45 additions & 12 deletions notebooks/view_results.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ python-dotenv==1.0.1
pytorch-lightning==2.2.1
scikit-learn==1.4.1.post1
scipy==1.12.0
tbparse==0.0.8
tensorboard==2.16.2
torch==2.2.1
tqdm==4.66.2
96 changes: 96 additions & 0 deletions scripts/plots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from matplotlib.path import Path
import matplotlib.patches as patches
import matplotlib.pyplot as plt
import numpy as np


class Parallel_Coordinates:
def __init__(self, dataframe, ax=None, fs=10) -> None:
self.columns = dataframe.columns[1:]
self.data = dataframe.to_numpy()[:,1:]
self.index = dataframe.iloc[:,0].to_list()
self.ax = ax
self.lim = self.data.shape[1] - 1
self.fs = fs

def wrap_xlabel_strings(self, max_char=9):
strings = self.columns
wrapped_strings = []
for string in strings:
if len(string) > max_char:
wrapped_string = '\n'.join([string[i:i+max_char] for i in range(0, len(string), max_char)])
wrapped_strings.append(wrapped_string)
else:
wrapped_strings.append(string)
self.columns = wrapped_strings

def transforms(self):
self.data[self.data == 0] = 1e-5
ys = self.data
ymins = ys.min(axis=0)
ymaxs = ys.max(axis=0)
dys = ymaxs - ymins
ymins -= dys * 0.05 # add 5% padding below and above
ymaxs += dys * 0.05
ymaxs[1], ymins[1] = ymins[1], ymaxs[1] # reverse axis 1 to have less crossings

self.ymin = ymins
self.ymax = ymaxs

dys = ymaxs - ymins
# transform all data to be compatible with the main axis
zs = np.zeros_like(ys)
zs[:, 0] = ys[:, 0]
zs[:, 1:] = (ys[:, 1:] - ymins[1:]) / dys[1:] * dys[0] + ymins[0]

self.zs = zs

def create_axes(self):
axes = [self.ax] + [self.ax.twinx() for i in range(self.lim)]

for i, nx in enumerate(axes):
nx.set_ylim(self.ymin[i], self.ymax[i])
nx.spines['top'].set_visible(False)
nx.spines['bottom'].set_visible(False)
nx.tick_params(axis='y', labelsize=self.fs-2)
if nx != self.ax:
nx.spines['left'].set_visible(False)
nx.yaxis.set_ticks_position('right')
nx.spines["right"].set_position(("axes", i / (self.data.shape[1] - 1)))

self.ax.set_xlim(0, self.lim)
self.ax.set_xticks(range(self.data.shape[1]))
self.ax.set_xticklabels(self.columns, fontsize=self.fs, rotation=0)
self.ax.tick_params(axis='x', which='major', pad=7)
self.ax.spines['right'].set_visible(False)
self.ax.xaxis.tick_top()

def plot_curves(self):
colors = plt.cm.Set2.colors
mult = np.ceil(len(self.data)/len(colors)).astype(int)
colors = list(colors) * mult
legend_handles = [None for _ in self.index]
for j in range(self.data.shape[0]):
col = colors[j]
lw = 0.3
ls = "-"
if j == 45:
lw = 2
col = "k"
ls = "--"
# create bezier curves
verts = list(zip([x for x in np.linspace(0,
len(self.data) - 1,
len(self.data) * 3 - 2, endpoint=True)],
np.repeat(self.zs[j, :], 3)[1:-1]))
codes = [Path.MOVETO] + [Path.CURVE4 for _ in range(len(verts) - 1)]
path = Path(verts, codes)
patch = patches.PathPatch(path, facecolor='none', lw=lw, alpha=1, edgecolor=col, ls=ls)
legend_handles[j] = patch
self.ax.add_patch(patch)

def plot(self):
self.wrap_xlabel_strings()
self.transforms()
self.create_axes()
self.plot_curves()
52 changes: 51 additions & 1 deletion scripts/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from glob import glob
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d
from tbparse import SummaryReader
import yaml

def date_parser(str_date):
"""
Expand Down Expand Up @@ -171,4 +174,51 @@ def normalize_timeseries(array):

# Normalize the array
norm_arr = (array - mean) / std
return norm_arr
return norm_arr


def load_hp_params(path):
log_dirs = sorted(glob(path))

hp_params = {"run":[],
"dropout":[],
"hidden_size": [],
"lr": [],
"n_layers": [],
"num_stack_layers": [],
"train_acc":[],
"val_acc":[],
"trn_fbeta":[],
"val_fbeta":[],
"trn_loss":[],
"val_loss":[]}

for i,v in enumerate(log_dirs):
reader = SummaryReader(log_dirs[i])
df = reader.scalars

if len(df) > 2:
hp_params["run"].append(f"run_{i}")
with open(f"{log_dirs[i]}/hparams.yaml", "r") as file:
hparams = yaml.safe_load(file)

for k,v in hparams.items():
if k not in ["n_features", "n_classes"]:
hp_params[k].append(v)

train_acc = df[df.tag=="train_acc"].value.to_numpy()
val_acc = df[df.tag=="val_acc"].value.to_numpy()
train_fbeta = df[df.tag=="train_fbeta"].value.to_numpy()
val_fbeta = df[df.tag=="val_fbeta"].value.to_numpy()
train_loss = df[df.tag=="train_loss"].value.to_numpy()
val_loss = df[df.tag=="val_loss"].value.to_numpy()

hp_params["train_acc"].append(np.nanmax(train_acc))
hp_params["val_acc"].append(np.nanmax(val_acc))
hp_params["trn_fbeta"].append(np.nanmax(train_fbeta))
hp_params["val_fbeta"].append(np.nanmax(val_fbeta))
hp_params["trn_loss"].append(np.nanmax(train_loss))
hp_params["val_loss"].append(np.nanmax(val_loss))

df = pd.DataFrame(hp_params)
return df

0 comments on commit 06c4623

Please sign in to comment.