Merge pull request #46 from Purg/dev/expand-aug-parameters

Dev/expand aug parameters
PTG-Kitware · Nov 20, 2024 · 7dcef0e · 7dcef0e
2 parents 3c5cb82 + f579b98
commit 7dcef0e
Show file tree

Hide file tree

Showing 12 changed files with 352 additions and 283 deletions.
diff --git a/configs/callbacks/default.yaml b/configs/callbacks/default.yaml
@@ -3,6 +3,7 @@ defaults:
   - early_stopping
   - model_summary
   - rich_progress_bar
+  - learning_rate_monitor
   - plot_metrics
   - _self_
 
@@ -11,11 +12,10 @@ defaults:
 model_checkpoint:
   dirpath: ${paths.output_dir}/checkpoints
   filename: "epoch_{epoch:03d}"
-  # monitor: "val/loss"
-  # mode: "min"
   monitor: "val/f1"
   mode: "max"
   save_last: True
+  save_top_k: 10 # save k best models (determined by above metric)
   auto_insert_metric_name: False
 
 early_stopping:

diff --git a/configs/callbacks/learning_rate_monitor.yaml b/configs/callbacks/learning_rate_monitor.yaml
@@ -0,0 +1,4 @@
+learning_rate_monitor:
+  _target_: pytorch_lightning.callbacks.LearningRateMonitor
+  logging_interval: epoch
+  log_momentum: false
diff --git a/configs/data/ptg.yaml b/configs/data/ptg.yaml
@@ -30,6 +30,14 @@ val_dataset:
 
 test_dataset: ${data.val_dataset}
 
+# Match the test dataset's configuration **sans** augmentations.
+pred_dataset:
+  _target_: tcn_hpl.data.tcn_dataset.TCNDataset
+  window_size: ${data.test_dataset.window_size}
+  window_label_idx: ${data.test_dataset.window_label_idx}
+  vectorize: ${data.test_dataset.vectorize}
+  transform_frame_data: null
+
 coco_train_activities: ""
 coco_train_objects: ""
 coco_train_poses: ""
@@ -42,5 +50,5 @@ coco_test_poses: ""
 batch_size: 128
 num_workers: 0
 target_framerate: 15
-epoch_length: 10000
+epoch_sample_factor: 1
 pin_memory: True
diff --git a/configs/experiment/m2/feat_locsconfs.yaml b/configs/experiment/m2/feat_locsconfs.yaml
@@ -28,12 +28,6 @@ tags: ["m2", "ms_tcn", "debug"]
 
 seed: 12345
 
-#callbacks:
-#  model_checkpoint:
-#    # save all ~80MB checkpoints for post-training investigation.
-#    # Total: ~45GB
-#    save_top_k: 500
-
 trainer:
   min_epochs: 50
   max_epochs: 500
@@ -42,16 +36,15 @@ trainer:
 model:
   num_classes: 9  # number of activity classification classes
   compile: false
+  scheduler:
+    # Code change to track train/loss instead of val/loss.
+    factor: 0.9
+    patience: 10
   net:
     # Length of feature vector for a single frame.
     # Currently derived from the parameterization of dataset vectorizer.
     dim: 102
 
-#    # Once upon a time defaults
-#    num_stages: 4
-#    num_layers: 10
-#    num_f_maps: 64
-
 data:
   coco_train_activities: "${paths.coco_file_root}/TRAIN-activity_truth.coco.json"
   coco_train_objects: "${paths.coco_file_root}/TRAIN-object_detections.coco.json"
@@ -68,10 +61,7 @@ data:
   batch_size: 512
   num_workers: 16
   target_framerate: 15  # BBN Hololens2 Framerate
-  # This is a bit more than the number of windows in the training dataset so
-  # the weighted sampler has more of an opportunity to sample the space
-  # proportionally.
-  epoch_length: 300000
+  epoch_sample_factor: 1  # 1x the dataset size iterations for train/val
 
   train_dataset:
     window_size: 25
@@ -99,7 +89,15 @@ data:
           pose_throughput_std: 0.2
           fixed_pattern: false
         - _target_: tcn_hpl.data.frame_data_aug.rotate_scale_translate_jitter.FrameDataRotateScaleTranslateJitter
-          # Using default parameters to start
+          translate: 0.05
+          scale: [0.9, 1.1]
+          rotate: [-5, 5]
+          det_loc_jitter: 0.02
+          det_wh_jitter: 0.02
+          pose_kp_loc_jitter: 0.005
+          dets_score_jitter: 0.
+          pose_score_jitter: 0.
+          pose_kp_score_jitter: 0.
   val_dataset:
     # Augmentations on windows of frame data before performing vectorization.
     # Sharing transform with training dataset as it is only the drop-out aug to

diff --git a/configs/experiment/r18/feat_locsconfs.yaml b/configs/experiment/r18/feat_locsconfs.yaml
@@ -36,16 +36,15 @@ trainer:
 model:
   num_classes: 6  # number of activity classification classes
   compile: false
+  scheduler:
+    # Code change to track train/loss instead of val/loss.
+    factor: 0.9
+    patience: 10
   net:
     # Length of feature vector for a single frame.
     # Currently derived from the parameterization of dataset vectorizer.
     dim: 102
 
-#    # Once upon a time defaults
-#    num_stages: 4
-#    num_layers: 10
-#    num_f_maps: 64
-
 data:
   coco_train_activities: "${paths.coco_file_root}/TRAIN-activity_truth.coco.json"
   coco_train_objects: "${paths.coco_file_root}/TRAIN-object_detections.coco.json"
@@ -62,10 +61,7 @@ data:
   batch_size: 512
   num_workers: 16
   target_framerate: 15  # BBN Hololens2 Framerate
-  # This is a bit more than the number of windows in the training dataset so
-  # the weighted sampler has more of an opportunity to sample the space
-  # proportionally.
-  epoch_length: 300000
+  epoch_sample_factor: 1  # 1x the dataset size iterations for train/val
 
   train_dataset:
     window_size: 25
@@ -93,7 +89,15 @@ data:
           pose_throughput_std: 0.2
           fixed_pattern: false
         - _target_: tcn_hpl.data.frame_data_aug.rotate_scale_translate_jitter.FrameDataRotateScaleTranslateJitter
-          # Using default parameters to start
+          translate: 0.05
+          scale: [0.9, 1.1]
+          rotate: [-5, 5]
+          det_loc_jitter: 0.02
+          det_wh_jitter: 0.02
+          pose_kp_loc_jitter: 0.005
+          dets_score_jitter: 0.
+          pose_score_jitter: 0.
+          pose_kp_score_jitter: 0.
   val_dataset:
     # Augmentations on windows of frame data before performing vectorization.
     # Sharing transform with training dataset as it is only the drop-out aug to

diff --git a/configs/model/ptg.yaml b/configs/model/ptg.yaml
@@ -20,7 +20,6 @@ net:
   num_stages: 4
   num_layers: 5
   num_f_maps: 128
-  # dim: 204
   dim: 128
   num_classes: ${model.num_classes}
 

diff --git a/tcn_hpl/callbacks/plot_metrics.py b/tcn_hpl/callbacks/plot_metrics.py
@@ -174,8 +174,8 @@ def on_train_epoch_end(
         all_source_frames = torch.cat(self._train_all_source_frames)  # shape: #frames
 
         current_epoch = pl_module.current_epoch
-        curr_acc = pl_module.train_acc.compute()
-        curr_f1 = pl_module.train_f1.compute()
+        curr_acc = pl_module.train_metrics.acc.compute()
+        curr_f1 = pl_module.train_metrics.f1.compute()
 
         #
         # Plot per-video class predictions vs. GT across progressive frames in
@@ -265,8 +265,8 @@ def on_validation_epoch_end(
         all_source_frames = torch.cat(self._val_all_source_frames)  # shape: #frames
 
         current_epoch = pl_module.current_epoch
-        curr_acc = pl_module.val_acc.compute()
-        curr_f1 = pl_module.val_f1.compute()
+        curr_acc = pl_module.val_metrics.acc.compute()
+        curr_f1 = pl_module.val_metrics.f1.compute()
         best_f1 = pl_module.val_f1_best.compute()
 
         #
@@ -359,8 +359,8 @@ def on_test_epoch_end(
         all_source_frames = torch.cat(self._val_all_source_frames)  # shape: #frames
 
         current_epoch = pl_module.current_epoch
-        test_acc = pl_module.test_acc.compute()
-        test_f1 = pl_module.test_f1.compute()
+        test_acc = pl_module.test_metrics.acc.compute()
+        test_f1 = pl_module.test_metrics.f1.compute()
 
         #
         # Plot per-video class predictions vs. GT across progressive frames in

diff --git a/tcn_hpl/data/frame_data.py b/tcn_hpl/data/frame_data.py
@@ -38,6 +38,16 @@ def __post_init__(self):
     def __bool__(self):
         return bool(self.boxes.size)
 
+    def __eq__(self, other):
+        return (
+            (self.boxes == other.boxes).all()
+            and (self.labels == other.labels).all()
+            and (self.scores == other.scores).all()
+        )
+
+    def __ne__(self, other):
+        return not (self == other)
+
 
 @dataclass
 class FramePoses:
@@ -51,7 +61,8 @@ class FramePoses:
     # Array of scores for each pose. Ostensibly the bbox score. Shape: (num_poses,)
     scores: npt.NDArray[float]
     # Pose join 2D positions in ascending joint ID order. If the joint is not
-    # present, 0s are used. Shape: (num_poses, num_joints, 2)
+    # present, 0s are used. Points in (x, y) format.
+    # Shape: (num_poses, num_joints, 2)
     joint_positions: npt.NDArray[float]
     # Poise joint scores. Shape: (num_poses, num_joints)
     joint_scores: npt.NDArray[float]
@@ -67,6 +78,16 @@ def __post_init__(self):
     def __bool__(self):
         return bool(self.scores.size)
 
+    def __eq__(self, other):
+        return (
+            (self.scores == other.scores).all()
+            and (self.joint_positions == other.joint_positions).all()
+            and (self.joint_scores == other.joint_scores).all()
+        )
+
+    def __ne__(self, other):
+        return not (self == other)
+
 
 @dataclass
 class FrameData:
@@ -114,3 +135,13 @@ def __bool__(self):
             not.
         """
         return bool(self.object_detections) or bool(self.poses)
+
+    def __eq__(self, other):
+        return (
+            (self.object_detections == other.object_detections)
+            and (self.poses == other.poses)
+            and (self.size == other.size)
+        )
+
+    def __ne__(self, other):
+        return not (self == other)