hyperparameter plot feature

EnthusiasticTeslim · Mar 30, 2024 · 06c4623 · 06c4623
1 parent 83c8858
commit 06c4623
Show file tree

Hide file tree

Showing 5 changed files with 194 additions and 13 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,6 +6,7 @@ Predictions_*
 jtk
 checkpoints
 lightning_logs
+images
 
 # Extensions
 .DS_Store

diff --git a/notebooks/view_results.ipynb b/notebooks/view_results.ipynb
diff --git a/requirements.txt b/requirements.txt
@@ -9,6 +9,7 @@ python-dotenv==1.0.1
 pytorch-lightning==2.2.1
 scikit-learn==1.4.1.post1
 scipy==1.12.0
+tbparse==0.0.8
 tensorboard==2.16.2
 torch==2.2.1
 tqdm==4.66.2
diff --git a/scripts/plots.py b/scripts/plots.py
@@ -0,0 +1,96 @@
+from matplotlib.path import Path
+import matplotlib.patches as patches
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+class Parallel_Coordinates:
+    def __init__(self, dataframe, ax=None, fs=10) -> None:
+        self.columns = dataframe.columns[1:]
+        self.data = dataframe.to_numpy()[:,1:]
+        self.index = dataframe.iloc[:,0].to_list()
+        self.ax = ax
+        self.lim = self.data.shape[1] - 1
+        self.fs = fs
+
+    def wrap_xlabel_strings(self, max_char=9):
+        strings = self.columns
+        wrapped_strings = []
+        for string in strings:
+            if len(string) > max_char:
+                wrapped_string = '\n'.join([string[i:i+max_char] for i in range(0, len(string), max_char)])
+                wrapped_strings.append(wrapped_string)
+            else:
+                wrapped_strings.append(string)
+        self.columns = wrapped_strings
+
+    def transforms(self):
+        self.data[self.data == 0] = 1e-5
+        ys = self.data
+        ymins = ys.min(axis=0)
+        ymaxs = ys.max(axis=0)
+        dys = ymaxs - ymins
+        ymins -= dys * 0.05  # add 5% padding below and above
+        ymaxs += dys * 0.05
+        ymaxs[1], ymins[1] = ymins[1], ymaxs[1]  # reverse axis 1 to have less crossings
+
+        self.ymin = ymins
+        self.ymax = ymaxs
+
+        dys = ymaxs - ymins
+        # transform all data to be compatible with the main axis
+        zs = np.zeros_like(ys)
+        zs[:, 0] = ys[:, 0]
+        zs[:, 1:] = (ys[:, 1:] - ymins[1:]) / dys[1:] * dys[0] + ymins[0]
+
+        self.zs = zs
+
+    def create_axes(self):
+        axes = [self.ax] + [self.ax.twinx() for i in range(self.lim)]
+
+        for i, nx in enumerate(axes):
+            nx.set_ylim(self.ymin[i], self.ymax[i])
+            nx.spines['top'].set_visible(False)
+            nx.spines['bottom'].set_visible(False)
+            nx.tick_params(axis='y', labelsize=self.fs-2)
+            if nx != self.ax:
+                nx.spines['left'].set_visible(False)
+                nx.yaxis.set_ticks_position('right')
+                nx.spines["right"].set_position(("axes", i / (self.data.shape[1] - 1)))
+
+        self.ax.set_xlim(0, self.lim)
+        self.ax.set_xticks(range(self.data.shape[1]))
+        self.ax.set_xticklabels(self.columns, fontsize=self.fs, rotation=0)
+        self.ax.tick_params(axis='x', which='major', pad=7)
+        self.ax.spines['right'].set_visible(False)
+        self.ax.xaxis.tick_top()
+
+    def plot_curves(self):
+        colors = plt.cm.Set2.colors
+        mult = np.ceil(len(self.data)/len(colors)).astype(int)
+        colors = list(colors) * mult
+        legend_handles = [None for _ in self.index]
+        for j in range(self.data.shape[0]):
+            col = colors[j]
+            lw = 0.3
+            ls = "-"
+            if j == 45:
+                lw = 2
+                col = "k"
+                ls = "--"
+            # create bezier curves
+            verts = list(zip([x for x in np.linspace(0, 
+                                                     len(self.data) - 1, 
+                                                     len(self.data) * 3 - 2, endpoint=True)],
+                            np.repeat(self.zs[j, :], 3)[1:-1]))
+            codes = [Path.MOVETO] + [Path.CURVE4 for _ in range(len(verts) - 1)]
+            path = Path(verts, codes)
+            patch = patches.PathPatch(path, facecolor='none', lw=lw, alpha=1, edgecolor=col, ls=ls)
+            legend_handles[j] = patch
+            self.ax.add_patch(patch)
+
+    def plot(self):
+        self.wrap_xlabel_strings()
+        self.transforms()
+        self.create_axes()
+        self.plot_curves()
diff --git a/scripts/utils.py b/scripts/utils.py
@@ -1,6 +1,9 @@
+from glob import glob
 import pandas as pd
 import numpy as np
 from scipy.interpolate import interp1d
+from tbparse import SummaryReader
+import yaml
 
 def date_parser(str_date):
     """
@@ -171,4 +174,51 @@ def normalize_timeseries(array):
 
     # Normalize the array
     norm_arr = (array - mean) / std
-    return norm_arr
+    return norm_arr
+
+
+def load_hp_params(path):
+    log_dirs = sorted(glob(path))
+
+    hp_params = {"run":[],
+                "dropout":[],
+                "hidden_size": [],
+                "lr": [],
+                "n_layers": [],
+                "num_stack_layers": [],
+                "train_acc":[],
+                "val_acc":[],
+                "trn_fbeta":[],
+                "val_fbeta":[],
+                "trn_loss":[],
+                "val_loss":[]}
+
+    for i,v in enumerate(log_dirs):
+        reader = SummaryReader(log_dirs[i])
+        df = reader.scalars
+
+        if len(df) > 2:
+            hp_params["run"].append(f"run_{i}")
+            with open(f"{log_dirs[i]}/hparams.yaml", "r") as file:
+                hparams = yaml.safe_load(file)
+
+            for k,v in hparams.items():
+                if k not in ["n_features", "n_classes"]:
+                    hp_params[k].append(v)
+
+            train_acc = df[df.tag=="train_acc"].value.to_numpy()
+            val_acc = df[df.tag=="val_acc"].value.to_numpy()
+            train_fbeta = df[df.tag=="train_fbeta"].value.to_numpy()
+            val_fbeta = df[df.tag=="val_fbeta"].value.to_numpy()
+            train_loss = df[df.tag=="train_loss"].value.to_numpy()
+            val_loss = df[df.tag=="val_loss"].value.to_numpy()
+
+            hp_params["train_acc"].append(np.nanmax(train_acc))
+            hp_params["val_acc"].append(np.nanmax(val_acc))
+            hp_params["trn_fbeta"].append(np.nanmax(train_fbeta))
+            hp_params["val_fbeta"].append(np.nanmax(val_fbeta))
+            hp_params["trn_loss"].append(np.nanmax(train_loss))
+            hp_params["val_loss"].append(np.nanmax(val_loss))
+
+    df = pd.DataFrame(hp_params)
+    return df