Modified FC block to be u-net skip connections, shared across frame t…

…ensors
PTG-Kitware · Nov 7, 2024 · 3f89daa · 3f89daa
1 parent efee490
commit 3f89daa
Show file tree

Hide file tree

Showing 5 changed files with 313 additions and 39 deletions.
diff --git a/configs/callbacks/default.yaml b/configs/callbacks/default.yaml
@@ -6,18 +6,20 @@ defaults:
   - plot_metrics
   - _self_
 
+# Checkpoint and early stop based on metric logged by PTGLitModule.
+
 model_checkpoint:
   dirpath: ${paths.output_dir}/checkpoints
   filename: "epoch_{epoch:03d}"
   # monitor: "val/loss"
   # mode: "min"
-  monitor: "val/acc"
+  monitor: "val/f1"
   mode: "max"
   save_last: True
   auto_insert_metric_name: False
 
 early_stopping:
-  monitor: "val/acc"
+  monitor: "val/f1"
   patience: 100
   mode: "max"
 

diff --git a/configs/experiment/r18/feat_locsconfs-reduced_model.yaml b/configs/experiment/r18/feat_locsconfs-reduced_model.yaml
@@ -0,0 +1,124 @@
+# @package _global_
+
+# to execute this experiment run:
+# python train.py experiment=example
+topic: "medical"
+task: "r18"
+
+defaults:
+  - override /data: ptg
+  - override /model: ptg
+  - override /callbacks: default
+  - override /trainer: gpu
+  - override /paths: default
+  #- override /logger: aim
+  - override /logger: csv
+
+# all parameters below will be merged with parameters from default configurations set above
+# this allows you to overwrite only specified parameters
+
+# Change this name to something descriptive and unique for this experiment.
+# This will differentiate the run logs and output to be separate from other
+# experiments that may have been run under the configured
+# Setting this value influences:
+# - the name of the directory under `${paths.root_dir}/logs/` in which training
+#   run files are stored.
+# Default is "train" set in the "configs/train.yaml" file.
+#task_name:
+
+tags: ["r18", "ms_tcn", "debug"]
+
+seed: 12345
+
+trainer:
+  min_epochs: 50
+  max_epochs: 500
+  log_every_n_steps: 1
+
+model:
+  num_classes: 6  # number of activity classification classes
+  compile: false
+  net:
+    # Length of feature vector for a single frame.
+    # Currently derived from the parameterization of dataset vectorizer.
+    dim: 102
+
+#    # Once upon a time defaults
+#    num_stages: 4
+#    num_layers: 10
+#    num_f_maps: 64
+
+# TRAINING
+data:
+  coco_train_activities: "${paths.coco_file_root}/TRAIN-activity_truth.coco.json"
+  coco_train_objects: "${paths.coco_file_root}/TRAIN-object_detections.coco.json"
+  coco_train_poses: "${paths.coco_file_root}/TRAIN-pose_estimations.coco.json"
+
+  coco_validation_activities: "${paths.coco_file_root}/VALIDATION-activity_truth.coco.json"
+  coco_validation_objects: "${paths.coco_file_root}/VALIDATION-object_detections.coco.json"
+  coco_validation_poses: "${paths.coco_file_root}/VALIDATION-pose_estimations.coco.json"
+
+  coco_test_activities: "${paths.coco_file_root}/TEST-activity_truth.coco.json"
+  coco_test_objects: "${paths.coco_file_root}/TEST-object_detections.coco.json"
+  coco_test_poses: "${paths.coco_file_root}/TEST-pose_estimations.coco.json"
+
+  # Lower batch size than previously now that we are augmenting and cannot have
+  # window vectorization cached. This value provided for a good balance of
+  # maximizing CPU load with GPU load averages (16 cores, ~14 load avg., ~80%
+  # GPU utilization, ~10.35 GB VRAM).
+  batch_size: 512
+  num_workers: 16
+  target_framerate: 15  # BBN Hololens2 Framerate
+  # This is a little more than the number of windows in the training dataset.
+  epoch_length: 80000
+
+  train_dataset:
+    window_size: 25
+    vectorize:
+      _target_: tcn_hpl.data.vectorize.locs_and_confs.LocsAndConfs
+      top_k: 1
+      num_classes: 7
+      use_joint_confs: True
+      use_pixel_norm: True
+      use_joint_obj_offsets: False
+      background_idx: 0
+    # Augmentations on windows of frame data before performing vectorization.
+    transform_frame_data:
+      transforms:
+        - _target_: tcn_hpl.data.frame_data_aug.window_frame_dropout.DropoutFrameDataTransform
+          # These parameters are a fudge for now to experiment. Window presence
+          # looks qualitatively right with what we're seeing live.
+          frame_rate: ${data.target_framerate}
+          dets_throughput_mean: 14.5
+          pose_throughput_mean: 10
+          dets_latency: 0
+          pose_latency: 0.1
+          dets_throughput_std: 0.2
+          pose_throughput_std: 0.2
+  val_dataset:
+    # Augmentations on windows of frame data before performing vectorization.
+    # Sharing transform with training dataset as it is only the drop-out aug to
+    # simulate stream processing dropout the same.
+    transform_frame_data: ${data.train_dataset.transform_frame_data}
+#      transforms: []  # no transforms
+#        - _target_: tcn_hpl.data.components.augmentations.NormalizePixelPts
+#          im_w: 1280
+#          im_h: 720
+#          num_obj_classes: 42
+#          feat_version: 2
+#          top_k_objects: 1
+  # Test dataset usually configured the same as val, unless there is some
+  # different set of transforms that should be used during test/prediction.
+
+paths:
+  # root_dir: "/data/PTG/medical/training/activity_classifier/TCN_HPL/"
+  root_dir: "/data/paul.tunison/data/darpa-ptg/train-TCN-R18_bbn_hololens-yolo_v7-mmpose-window_dropout"
+
+  # Convenience variable to where your train/val/test split COCO file datasets
+  # are stored.
+  coco_file_root: ${paths.root_dir}
+
+#logger:
+#  aim:
+#    experiment: ${task_name}
+#    capture_terminal_logs: true
diff --git a/configs/experiment/r18/feat_v6-reduced_model.yaml b/configs/experiment/r18/feat_v6-reduced_model.yaml
@@ -0,0 +1,126 @@
+# @package _global_
+
+# to execute this experiment run:
+# python train.py experiment=example
+topic: "medical"
+task: "r18"
+
+defaults:
+  - override /data: ptg
+  - override /model: ptg
+  - override /callbacks: default
+  - override /trainer: gpu
+  - override /paths: default
+  #- override /logger: aim
+  - override /logger: csv
+
+# all parameters below will be merged with parameters from default configurations set above
+# this allows you to overwrite only specified parameters
+
+# Change this name to something descriptive and unique for this experiment.
+# This will differentiate the run logs and output to be separate from other
+# experiments that may have been run under the configured
+# Setting this value influences:
+# - the name of the directory under `${paths.root_dir}/logs/` in which training
+#   run files are stored.
+# Default is "train" set in the "configs/train.yaml" file.
+#task_name:
+
+tags: ["r18", "ms_tcn", "debug"]
+
+seed: 12345
+
+trainer:
+  min_epochs: 50
+  max_epochs: 500
+  log_every_n_steps: 1
+
+model:
+  num_classes: 6  # number of activity classification classes
+  compile: false
+  net:
+    # Length of feature vector for a single frame.
+    # Currently derived from the parameterization of dataset vectorizer.
+    dim: 297
+
+#    # Once upon a time defaults
+#    num_stages: 4
+#    num_layers: 10
+#    num_f_maps: 64
+
+# TRAINING
+data:
+  coco_train_activities: "${paths.coco_file_root}/TRAIN-activity_truth.coco.json"
+  coco_train_objects: "${paths.coco_file_root}/TRAIN-object_detections.coco.json"
+  coco_train_poses: "${paths.coco_file_root}/TRAIN-pose_estimations.coco.json"
+
+  coco_validation_activities: "${paths.coco_file_root}/VALIDATION-activity_truth.coco.json"
+  coco_validation_objects: "${paths.coco_file_root}/VALIDATION-object_detections.coco.json"
+  coco_validation_poses: "${paths.coco_file_root}/VALIDATION-pose_estimations.coco.json"
+
+  coco_test_activities: "${paths.coco_file_root}/TEST-activity_truth.coco.json"
+  coco_test_objects: "${paths.coco_file_root}/TEST-object_detections.coco.json"
+  coco_test_poses: "${paths.coco_file_root}/TEST-pose_estimations.coco.json"
+
+  # Lower batch size than previously now that we are augmenting and cannot have
+  # window vectorization cached. This value provided for a good balance of
+  # maximizing CPU load with GPU load averages (16 cores, ~14 load avg., ~80%
+  # GPU utilization, ~10.35 GB VRAM).
+  #batch_size: 64
+  #batch_size: 56
+  batch_size: 32
+  num_workers: 16
+  target_framerate: 15  # BBN Hololens2 Framerate
+  # This is a little more than the number of windows in the training dataset.
+  epoch_length: 80000
+
+  train_dataset:
+    window_size: 25
+    vectorize:
+      _target_: tcn_hpl.data.vectorize.classic.Classic
+      feat_version: 6
+      top_k: 1
+      num_classes: 7
+      background_idx: 0
+      hand_left_idx: 5
+      hand_right_idx: 6
+    # Augmentations on windows of frame data before performing vectorization.
+    transform_frame_data:
+      transforms:
+        - _target_: tcn_hpl.data.frame_data_aug.window_frame_dropout.DropoutFrameDataTransform
+          # These parameters are a fudge for now to experiment. Window presence
+          # looks qualitatively right with what we're seeing live.
+          frame_rate: ${data.target_framerate}
+          dets_throughput_mean: 14.5
+          pose_throughput_mean: 10
+          dets_latency: 0
+          pose_latency: 0.1
+          dets_throughput_std: 0.2
+          pose_throughput_std: 0.2
+  val_dataset:
+    # Augmentations on windows of frame data before performing vectorization.
+    # Sharing transform with training dataset as it is only the drop-out aug to
+    # simulate stream processing dropout the same.
+    transform_frame_data: ${data.train_dataset.transform_frame_data}
+#      transforms: []  # no transforms
+#        - _target_: tcn_hpl.data.components.augmentations.NormalizePixelPts
+#          im_w: 1280
+#          im_h: 720
+#          num_obj_classes: 42
+#          feat_version: 2
+#          top_k_objects: 1
+  # Test dataset usually configured the same as val, unless there is some
+  # different set of transforms that should be used during test/prediction.
+
+paths:
+  # root_dir: "/data/PTG/medical/training/activity_classifier/TCN_HPL/"
+  root_dir: "/data/paul.tunison/data/darpa-ptg/train-TCN-R18_bbn_hololens-yolo_v7-mmpose-window_dropout"
+
+  # Convenience variable to where your train/val/test split COCO file datasets
+  # are stored.
+  coco_file_root: ${paths.root_dir}
+
+#logger:
+#  aim:
+#    experiment: ${task_name}
+#    capture_terminal_logs: true
diff --git a/configs/model/ptg.yaml b/configs/model/ptg.yaml
@@ -15,13 +15,14 @@ scheduler:
 
 net:
   _target_: tcn_hpl.models.components.ms_tcs_net.MultiStageModel
+  fc_sequence_dims: [512, 1024, 2048]
+  fc_sequence_dropout_p: 0.25
   num_stages: 4
   num_layers: 5
   num_f_maps: 128
   # dim: 204
   dim: 128
   num_classes: ${model.num_classes}
-  window_size: ${data.train_dataset.window_size}
 
 criterion:
   _target_: tcn_hpl.models.components.focal_loss.FocalLoss