diff --git a/configs/experiment/m2/feat_locsconfs.yaml b/configs/experiment/m2/feat_locsconfs.yaml index 5ff7d9843..c723fde84 100644 --- a/configs/experiment/m2/feat_locsconfs.yaml +++ b/configs/experiment/m2/feat_locsconfs.yaml @@ -36,6 +36,8 @@ trainer: model: num_classes: 9 # number of activity classification classes compile: false + optimizer: + lr: 0.00005 scheduler: # Code change to track train/loss instead of val/loss. factor: 0.9 diff --git a/configs/experiment/m2/feat_locsconfs_residualLinear.yaml b/configs/experiment/m2/feat_locsconfs_residualLinear.yaml new file mode 100644 index 000000000..c04f2aa69 --- /dev/null +++ b/configs/experiment/m2/feat_locsconfs_residualLinear.yaml @@ -0,0 +1,144 @@ +# @package _global_ + +defaults: + - override /data: ptg + - override /model: ptg + - override /callbacks: default + - override /trainer: gpu + - override /paths: default + #- override /logger: aim + - override /logger: csv + +# all parameters below will be merged with parameters from default configurations set above +# this allows you to overwrite only specified parameters + +# Change this name to something descriptive and unique for this experiment. +# This will differentiate the run logs and output to be separate from other +# experiments that may have been run under the configured +# Setting this value influences: +# - the name of the directory under `${paths.root_dir}/logs/` in which training +# run files are stored. +# Default is "train" set in the "configs/train.yaml" file. +#task_name: + +# simply provide checkpoint path to resume training +#ckpt_path: null + +tags: ["m2", "ms_tcn", "debug"] + +seed: 12345 + +trainer: + min_epochs: 50 + max_epochs: 500 + log_every_n_steps: 1 + +model: + num_classes: 9 # number of activity classification classes + # This should stay false, related functionality is not available in torch + # version used. + compile: false + optimizer: + lr: 0.00005 + scheduler: + # Code change to track train/loss instead of val/loss. + factor: 0.9 + patience: 10 + net: + _target_: tcn_hpl.models.components.ms_tcs_net.MultiStageModel2 + # Utilize "linear" dilation layers instead of exponential ones. + linear_single_stage: true + # 12 here allows for the largest residual layer to cover the first, middle + # and last window index when in the middle of its convolutional sweep. + num_layers: 12 + # These do_* booleans match the behavior of the original MultiStageModule. + do_stage_softmax: true + do_stage_residual: false + # Length of feature vector for a single frame. + # Currently derived from the parameterization of dataset vectorizer. + dim: 102 + +data: + coco_train_activities: "${paths.coco_file_root}/TRAIN-activity_truth.coco.json" + coco_train_objects: "${paths.coco_file_root}/TRAIN-object_detections.coco.json" + coco_train_poses: "${paths.coco_file_root}/TRAIN-pose_estimations.coco.json" + + coco_validation_activities: "${paths.coco_file_root}/VALIDATION-activity_truth.coco.json" + coco_validation_objects: "${paths.coco_file_root}/VALIDATION-object_detections.coco.json" + coco_validation_poses: "${paths.coco_file_root}/VALIDATION-pose_estimations.coco.json" + + coco_test_activities: "${paths.coco_file_root}/TEST-activity_truth.coco.json" + coco_test_objects: "${paths.coco_file_root}/TEST-object_detections.coco.json" + coco_test_poses: "${paths.coco_file_root}/TEST-pose_estimations.coco.json" + + batch_size: 512 + num_workers: 16 + target_framerate: 15 # BBN Hololens2 Framerate + epoch_sample_factor: 1 # 1x the dataset size iterations for train/val + + train_dataset: + window_size: 25 + window_label_idx: ${model.pred_frame_index} + vectorize: + _target_: tcn_hpl.data.vectorize.locs_and_confs.LocsAndConfs + top_k: 1 + num_classes: 7 + use_joint_confs: True + use_pixel_norm: True + use_joint_obj_offsets: False + background_idx: 0 + # Augmentations on windows of frame data before performing vectorization. + transform_frame_data: + transforms: + - _target_: tcn_hpl.data.frame_data_aug.window_frame_dropout.DropoutFrameDataTransform + # These parameters are a fudge for now to experiment. Window presence + # looks qualitatively right with what we're seeing live. + frame_rate: ${data.target_framerate} + dets_throughput_mean: 14.5 + pose_throughput_mean: 10 + dets_latency: 0 + pose_latency: 0.1 + dets_throughput_std: 0.2 + pose_throughput_std: 0.2 + fixed_pattern: false + - _target_: tcn_hpl.data.frame_data_aug.rotate_scale_translate_jitter.FrameDataRotateScaleTranslateJitter + translate: 0.05 + scale: [0.9, 1.1] + rotate: [-5, 5] + det_loc_jitter: 0.02 + det_wh_jitter: 0.02 + pose_kp_loc_jitter: 0.005 + dets_score_jitter: 0. + pose_score_jitter: 0. + pose_kp_score_jitter: 0. + val_dataset: + # Augmentations on windows of frame data before performing vectorization. + # Sharing transform with training dataset as it is only the drop-out aug to + # simulate stream processing dropout the same. + transform_frame_data: + transforms: + - _target_: tcn_hpl.data.frame_data_aug.window_frame_dropout.DropoutFrameDataTransform + # Mirror training hparams, except used fixed patterns. + frame_rate: ${data.target_framerate} + dets_throughput_mean: 14.5 + pose_throughput_mean: 10 + dets_latency: 0 + pose_latency: 0.1 + dets_throughput_std: 0.2 + pose_throughput_std: 0.2 + fixed_pattern: true + # Test dataset usually configured the same as val, unless there is some + # different set of transforms that should be used during test/prediction. + +paths: + # Base directory for training outputs. + root_dir: "/home/local/KHQ/cameron.johnson/code/TCN_HPL/tcn_hpl/train-TCN-M2_bbn_hololens/training_root" + + # Convenience variable to where your train/val/test split COCO file datasets + # are stored. + coco_file_root: ${paths.root_dir} + +#logger: +# aim: +# experiment: ${task_name} +# capture_terminal_logs: true diff --git a/configs/experiment/m3/feat_locsconfs.yaml b/configs/experiment/m3/feat_locsconfs.yaml index 551f774e6..6413b28f8 100644 --- a/configs/experiment/m3/feat_locsconfs.yaml +++ b/configs/experiment/m3/feat_locsconfs.yaml @@ -36,6 +36,8 @@ trainer: model: num_classes: 6 # number of activity classification classes compile: false + optimizer: + lr: 0.00005 scheduler: # Code change to track train/loss instead of val/loss. factor: 0.9 diff --git a/configs/experiment/m3/feat_locsconfs_residualConstant.yaml b/configs/experiment/m3/feat_locsconfs_residualConstant.yaml new file mode 100644 index 000000000..ea029c92a --- /dev/null +++ b/configs/experiment/m3/feat_locsconfs_residualConstant.yaml @@ -0,0 +1,139 @@ +# @package _global_ + +defaults: + - override /data: ptg + - override /model: ptg + - override /callbacks: default + - override /trainer: gpu + - override /paths: default + #- override /logger: aim + - override /logger: csv + +# all parameters below will be merged with parameters from default configurations set above +# this allows you to overwrite only specified parameters + +# Change this name to something descriptive and unique for this experiment. +# This will differentiate the run logs and output to be separate from other +# experiments that may have been run under the configured +# Setting this value influences: +# - the name of the directory under `${paths.root_dir}/logs/` in which training +# run files are stored. +# Default is "train" set in the "configs/train.yaml" file. +#task_name: + +# simply provide checkpoint path to resume training +#ckpt_path: null + +tags: ["m3", "ms_tcn", "debug"] + +seed: 12345 + +trainer: + min_epochs: 50 + max_epochs: 500 + log_every_n_steps: 1 + +model: + num_classes: 6 # number of activity classification classes + # This should stay false, related functionality is not available in torch + # version used. + compile: false + optimizer: + lr: 0.00005 + scheduler: + # Code change to track train/loss instead of val/loss. + factor: 0.9 + patience: 10 + net: + _target_: tcn_hpl.models.components.ms_tcs_net.MultiStageModel2 + # Utilize "linear" dilation layers instead of exponential ones. + constant_single_stage: true + constant_stage_dilation: 1 + # Length of feature vector for a single frame. + # Currently derived from the parameterization of dataset vectorizer. + dim: 97 + +data: + coco_train_activities: "${paths.coco_file_root}/TRAIN-activity_truth.coco.json" + coco_train_objects: "${paths.coco_file_root}/TRAIN-object_detections.coco.json" + coco_train_poses: "${paths.coco_file_root}/TRAIN-pose_estimations.coco.json" + + coco_validation_activities: "${paths.coco_file_root}/VALIDATION-activity_truth.coco.json" + coco_validation_objects: "${paths.coco_file_root}/VALIDATION-object_detections.coco.json" + coco_validation_poses: "${paths.coco_file_root}/VALIDATION-pose_estimations.coco.json" + + coco_test_activities: "${paths.coco_file_root}/TEST-activity_truth.coco.json" + coco_test_objects: "${paths.coco_file_root}/TEST-object_detections.coco.json" + coco_test_poses: "${paths.coco_file_root}/TEST-pose_estimations.coco.json" + + batch_size: 512 + num_workers: 16 + target_framerate: 15 # BBN Hololens2 Framerate + epoch_sample_factor: 1 # 1x the dataset size iterations for train/val + + train_dataset: + window_size: 25 + window_label_idx: ${model.pred_frame_index} + vectorize: + _target_: tcn_hpl.data.vectorize.locs_and_confs.LocsAndConfs + top_k: 1 + num_classes: 6 + use_joint_confs: True + use_pixel_norm: True + use_joint_obj_offsets: False + background_idx: 0 + # Augmentations on windows of frame data before performing vectorization. + transform_frame_data: + transforms: + - _target_: tcn_hpl.data.frame_data_aug.window_frame_dropout.DropoutFrameDataTransform + # These parameters are a fudge for now to experiment. Window presence + # looks qualitatively right with what we're seeing live. + frame_rate: ${data.target_framerate} + dets_throughput_mean: 14.5 + pose_throughput_mean: 10 + dets_latency: 0 + pose_latency: 0.1 + dets_throughput_std: 0.2 + pose_throughput_std: 0.2 + fixed_pattern: false + - _target_: tcn_hpl.data.frame_data_aug.rotate_scale_translate_jitter.FrameDataRotateScaleTranslateJitter + translate: 0.05 + scale: [0.9, 1.1] + rotate: [-5, 5] + det_loc_jitter: 0.02 + det_wh_jitter: 0.02 + pose_kp_loc_jitter: 0.005 + dets_score_jitter: 0. + pose_score_jitter: 0. + pose_kp_score_jitter: 0. + val_dataset: + # Augmentations on windows of frame data before performing vectorization. + # Sharing transform with training dataset as it is only the drop-out aug to + # simulate stream processing dropout the same. + transform_frame_data: + transforms: + - _target_: tcn_hpl.data.frame_data_aug.window_frame_dropout.DropoutFrameDataTransform + # Mirror training hparams, except used fixed patterns. + frame_rate: ${data.target_framerate} + dets_throughput_mean: 14.5 + pose_throughput_mean: 10 + dets_latency: 0 + pose_latency: 0.1 + dets_throughput_std: 0.2 + pose_throughput_std: 0.2 + fixed_pattern: true + # Test dataset usually configured the same as val, unless there is some + # different set of transforms that should be used during test/prediction. + +paths: + # Base directory for training outputs. + root_dir: "/home/local/KHQ/cameron.johnson/code/TCN_HPL/tcn_hpl/train-TCN-M2_bbn_hololens/training_root" + + # Convenience variable to where your train/val/test split COCO file datasets + # are stored. + coco_file_root: ${paths.root_dir} + +#logger: +# aim: +# experiment: ${task_name} +# capture_terminal_logs: true diff --git a/configs/experiment/m3/feat_locsconfs_residualLinear.yaml b/configs/experiment/m3/feat_locsconfs_residualLinear.yaml new file mode 100644 index 000000000..1a40825fb --- /dev/null +++ b/configs/experiment/m3/feat_locsconfs_residualLinear.yaml @@ -0,0 +1,144 @@ +# @package _global_ + +defaults: + - override /data: ptg + - override /model: ptg + - override /callbacks: default + - override /trainer: gpu + - override /paths: default + #- override /logger: aim + - override /logger: csv + +# all parameters below will be merged with parameters from default configurations set above +# this allows you to overwrite only specified parameters + +# Change this name to something descriptive and unique for this experiment. +# This will differentiate the run logs and output to be separate from other +# experiments that may have been run under the configured +# Setting this value influences: +# - the name of the directory under `${paths.root_dir}/logs/` in which training +# run files are stored. +# Default is "train" set in the "configs/train.yaml" file. +#task_name: + +# simply provide checkpoint path to resume training +#ckpt_path: null + +tags: ["m3", "ms_tcn", "debug"] + +seed: 12345 + +trainer: + min_epochs: 50 + max_epochs: 500 + log_every_n_steps: 1 + +model: + num_classes: 6 # number of activity classification classes + # This should stay false, related functionality is not available in torch + # version used. + compile: false + optimizer: + lr: 0.00005 + scheduler: + # Code change to track train/loss instead of val/loss. + factor: 0.9 + patience: 10 + net: + _target_: tcn_hpl.models.components.ms_tcs_net.MultiStageModel2 + # Utilize "linear" dilation layers instead of exponential ones. + linear_single_stage: true + # 12 here allows for the largest residual layer to cover the first, middle + # and last window index when in the middle of its convolutional sweep. + num_layers: 12 + # These do_* booleans match the behavior of the original MultiStageModule. + do_stage_softmax: true + do_stage_residual: false + # Length of feature vector for a single frame. + # Currently derived from the parameterization of dataset vectorizer. + dim: 97 + +data: + coco_train_activities: "${paths.coco_file_root}/TRAIN-activity_truth.coco.json" + coco_train_objects: "${paths.coco_file_root}/TRAIN-object_detections.coco.json" + coco_train_poses: "${paths.coco_file_root}/TRAIN-pose_estimations.coco.json" + + coco_validation_activities: "${paths.coco_file_root}/VALIDATION-activity_truth.coco.json" + coco_validation_objects: "${paths.coco_file_root}/VALIDATION-object_detections.coco.json" + coco_validation_poses: "${paths.coco_file_root}/VALIDATION-pose_estimations.coco.json" + + coco_test_activities: "${paths.coco_file_root}/TEST-activity_truth.coco.json" + coco_test_objects: "${paths.coco_file_root}/TEST-object_detections.coco.json" + coco_test_poses: "${paths.coco_file_root}/TEST-pose_estimations.coco.json" + + batch_size: 512 + num_workers: 16 + target_framerate: 15 # BBN Hololens2 Framerate + epoch_sample_factor: 1 # 1x the dataset size iterations for train/val + + train_dataset: + window_size: 25 + window_label_idx: ${model.pred_frame_index} + vectorize: + _target_: tcn_hpl.data.vectorize.locs_and_confs.LocsAndConfs + top_k: 1 + num_classes: 6 + use_joint_confs: True + use_pixel_norm: True + use_joint_obj_offsets: False + background_idx: 0 + # Augmentations on windows of frame data before performing vectorization. + transform_frame_data: + transforms: + - _target_: tcn_hpl.data.frame_data_aug.window_frame_dropout.DropoutFrameDataTransform + # These parameters are a fudge for now to experiment. Window presence + # looks qualitatively right with what we're seeing live. + frame_rate: ${data.target_framerate} + dets_throughput_mean: 14.5 + pose_throughput_mean: 10 + dets_latency: 0 + pose_latency: 0.1 + dets_throughput_std: 0.2 + pose_throughput_std: 0.2 + fixed_pattern: false + - _target_: tcn_hpl.data.frame_data_aug.rotate_scale_translate_jitter.FrameDataRotateScaleTranslateJitter + translate: 0.05 + scale: [0.9, 1.1] + rotate: [-5, 5] + det_loc_jitter: 0.02 + det_wh_jitter: 0.02 + pose_kp_loc_jitter: 0.005 + dets_score_jitter: 0. + pose_score_jitter: 0. + pose_kp_score_jitter: 0. + val_dataset: + # Augmentations on windows of frame data before performing vectorization. + # Sharing transform with training dataset as it is only the drop-out aug to + # simulate stream processing dropout the same. + transform_frame_data: + transforms: + - _target_: tcn_hpl.data.frame_data_aug.window_frame_dropout.DropoutFrameDataTransform + # Mirror training hparams, except used fixed patterns. + frame_rate: ${data.target_framerate} + dets_throughput_mean: 14.5 + pose_throughput_mean: 10 + dets_latency: 0 + pose_latency: 0.1 + dets_throughput_std: 0.2 + pose_throughput_std: 0.2 + fixed_pattern: true + # Test dataset usually configured the same as val, unless there is some + # different set of transforms that should be used during test/prediction. + +paths: + # Base directory for training outputs. + root_dir: "/home/local/KHQ/cameron.johnson/code/TCN_HPL/tcn_hpl/train-TCN-M2_bbn_hololens/training_root" + + # Convenience variable to where your train/val/test split COCO file datasets + # are stored. + coco_file_root: ${paths.root_dir} + +#logger: +# aim: +# experiment: ${task_name} +# capture_terminal_logs: true diff --git a/configs/experiment/m5/feat_locsconfs.yaml b/configs/experiment/m5/feat_locsconfs.yaml new file mode 100644 index 000000000..15111a363 --- /dev/null +++ b/configs/experiment/m5/feat_locsconfs.yaml @@ -0,0 +1,133 @@ +# @package _global_ + +defaults: + - override /data: ptg + - override /model: ptg + - override /callbacks: default + - override /trainer: gpu + - override /paths: default + #- override /logger: aim + - override /logger: csv + +# all parameters below will be merged with parameters from default configurations set above +# this allows you to overwrite only specified parameters + +# Change this name to something descriptive and unique for this experiment. +# This will differentiate the run logs and output to be separate from other +# experiments that may have been run under the configured +# Setting this value influences: +# - the name of the directory under `${paths.root_dir}/logs/` in which training +# run files are stored. +# Default is "train" set in the "configs/train.yaml" file. +#task_name: + +# simply provide checkpoint path to resume training +#ckpt_path: null + +tags: ["m5", "ms_tcn", "debug"] + +seed: 12345 + +trainer: + min_epochs: 50 + max_epochs: 500 + log_every_n_steps: 1 + +model: + num_classes: 6 # number of activity classification classes + compile: false + optimizer: + lr: 0.00005 + scheduler: + # Code change to track train/loss instead of val/loss. + factor: 0.9 + patience: 10 + net: + # Length of feature vector for a single frame. + # Currently derived from the parameterization of dataset vectorizer. + dim: 97 + +data: + coco_train_activities: "${paths.coco_file_root}/TRAIN-activity_truth.coco.json" + coco_train_objects: "${paths.coco_file_root}/TRAIN-object_detections.coco.json" + coco_train_poses: "${paths.coco_file_root}/TRAIN-pose_estimations.coco.json" + + coco_validation_activities: "${paths.coco_file_root}/VALIDATION-activity_truth.coco.json" + coco_validation_objects: "${paths.coco_file_root}/VALIDATION-object_detections.coco.json" + coco_validation_poses: "${paths.coco_file_root}/VALIDATION-pose_estimations.coco.json" + + coco_test_activities: "${paths.coco_file_root}/TEST-activity_truth.coco.json" + coco_test_objects: "${paths.coco_file_root}/TEST-object_detections.coco.json" + coco_test_poses: "${paths.coco_file_root}/TEST-pose_estimations.coco.json" + + batch_size: 512 + num_workers: 16 + target_framerate: 15 # BBN Hololens2 Framerate + epoch_sample_factor: 1 # 1x the dataset size iterations for train/val + + train_dataset: + window_size: 25 + window_label_idx: ${model.pred_frame_index} + vectorize: + _target_: tcn_hpl.data.vectorize.locs_and_confs.LocsAndConfs + top_k: 1 + num_classes: 6 + use_joint_confs: True + use_pixel_norm: True + use_joint_obj_offsets: False + background_idx: 0 + # Augmentations on windows of frame data before performing vectorization. + transform_frame_data: + transforms: + - _target_: tcn_hpl.data.frame_data_aug.window_frame_dropout.DropoutFrameDataTransform + # These parameters are a fudge for now to experiment. Window presence + # looks qualitatively right with what we're seeing live. + frame_rate: ${data.target_framerate} + dets_throughput_mean: 14.5 + pose_throughput_mean: 10 + dets_latency: 0 + pose_latency: 0.1 + dets_throughput_std: 0.2 + pose_throughput_std: 0.2 + fixed_pattern: false + - _target_: tcn_hpl.data.frame_data_aug.rotate_scale_translate_jitter.FrameDataRotateScaleTranslateJitter + translate: 0.05 + scale: [0.9, 1.1] + rotate: [-5, 5] + det_loc_jitter: 0.02 + det_wh_jitter: 0.02 + pose_kp_loc_jitter: 0.005 + dets_score_jitter: 0. + pose_score_jitter: 0. + pose_kp_score_jitter: 0. + val_dataset: + # Augmentations on windows of frame data before performing vectorization. + # Sharing transform with training dataset as it is only the drop-out aug to + # simulate stream processing dropout the same. + transform_frame_data: + transforms: + - _target_: tcn_hpl.data.frame_data_aug.window_frame_dropout.DropoutFrameDataTransform + # Mirror training hparams, except used fixed patterns. + frame_rate: ${data.target_framerate} + dets_throughput_mean: 14.5 + pose_throughput_mean: 10 + dets_latency: 0 + pose_latency: 0.1 + dets_throughput_std: 0.2 + pose_throughput_std: 0.2 + fixed_pattern: true + # Test dataset usually configured the same as val, unless there is some + # different set of transforms that should be used during test/prediction. + +paths: + # Base directory for training outputs. + root_dir: "/home/local/KHQ/cameron.johnson/code/TCN_HPL/tcn_hpl/train-TCN-M2_bbn_hololens/training_root" + + # Convenience variable to where your train/val/test split COCO file datasets + # are stored. + coco_file_root: ${paths.root_dir} + +#logger: +# aim: +# experiment: ${task_name} +# capture_terminal_logs: true diff --git a/configs/experiment/r18/feat_locsconfs.yaml b/configs/experiment/r18/feat_locsconfs.yaml index 3a0902b4b..d24ec4ce4 100644 --- a/configs/experiment/r18/feat_locsconfs.yaml +++ b/configs/experiment/r18/feat_locsconfs.yaml @@ -36,11 +36,14 @@ trainer: model: num_classes: 6 # number of activity classification classes compile: false + optimizer: + lr: 0.00005 scheduler: # Code change to track train/loss instead of val/loss. factor: 0.9 patience: 10 net: + num_stages: 6 # Length of feature vector for a single frame. # Currently derived from the parameterization of dataset vectorizer. dim: 102 diff --git a/tcn_hpl/data/utils/pose_generation/generate_pose_data.py b/tcn_hpl/data/utils/pose_generation/generate_pose_data.py index eba6eea8e..6effb8114 100755 --- a/tcn_hpl/data/utils/pose_generation/generate_pose_data.py +++ b/tcn_hpl/data/utils/pose_generation/generate_pose_data.py @@ -516,6 +516,7 @@ def img_done_cb(out_dset: kwcoco.CocoDataset) -> None: pose_model_device=pose_device, ) output_dset = pg.predict_coco(input_dset, img_done_cb) + output_coco_filepath.parent.mkdir(parents=True, exist_ok=True) output_dset.dump( output_coco_filepath, newlines=True, diff --git a/tcn_hpl/models/components/ms_tcs_net.py b/tcn_hpl/models/components/ms_tcs_net.py index c9508f80a..a756eeb5b 100644 --- a/tcn_hpl/models/components/ms_tcs_net.py +++ b/tcn_hpl/models/components/ms_tcs_net.py @@ -1,4 +1,5 @@ import copy +import functools from typing import Sequence import einops @@ -56,10 +57,121 @@ def forward(self, x, mask): # Bring it back to input shape [batch_size, feat_dim, window_size] x = einops.rearrange(re_x, "b w d -> b d w") + # input here is not being softmaxed because dim1 is feature inputs, not + # predictions. out = self.stage1(x, mask) + # out shape: (batch_size, num_classes, window_size) outputs = out.unsqueeze(0) for s in self.stages: out = s(F.softmax(out, dim=1) * mask[:, None, :], mask) + # out shape: (batch_size, num_classes, window_size) + outputs = torch.cat((outputs, out.unsqueeze(0)), dim=0) + + return outputs + + +class MultiStageModel2(nn.Module): + """ + Similar to the MultiStageModel class, however stages after the first is + added to the output of the previous. + + """ + def __init__( + self, + fc_sequence_dims: Sequence[int], + fc_sequence_dropout_p: float, + num_stages: int, + num_layers: int, + num_f_maps: int, + dim: int, + num_classes: int, + linear_single_stage: bool = False, + constant_single_stage: bool = False, + do_stage_residual: bool = False, + do_stage_softmax: bool = True, + constant_stage_dilation: int = 1, + ): + """Initialize a `MultiStageModel` module. + + Default values match the behavior of the original `MultiStageModel` + implementation. + + :param fc_sequence_dims: Create N*2 linear layers with u-net-like skip + connections connecting inputs and outputs of the same dimensions. + If an empty sequence is provided, then no FC layers are created + :param fc_sequence_dropout_p: P-value for drop-out layers utilized in + the FC u-net block. + :param num_stages: Number of State Model Layers. + :param num_layers: Number of Layers within each State Model. + :param num_f_maps: Feature size within the state model + :param dim: Feature size between state models. + :param num_classes: Number of output classes. + :param linear_single_stage: Use `SingleStageModelLinear` class for + single stage layers, otherwise use `SingleStageModel`. + :param constant_single_stage: Use `SingleStageModelConstant` class for + single stage layers, otherwise use `SingleStageModel`. + :param do_stage_residual: Enable adding previous stage output to + successive stage outputs. Default True. + :param do_stage_softmax: Enable performing a softmax operation on + previous stage outputs before input to successive stages. This only + affects the inout to a stage, and does not output the optional + residual stage addition via `do_stage_residual`. Default False. + :param constant_stage_dilation: If `constant_single_stage` is selected, + then use this value for the dilation amount. This parameter is not + used otherwise. + """ + super().__init__() + + # One FC sequence that is applied to a single frame's feature vector, + self.frame_fc = LinearSkipBlock([dim] + list(fc_sequence_dims), fc_sequence_dropout_p) + + if sum([linear_single_stage, constant_single_stage]) > 1: + raise ValueError("Only one of the stage class selectors may be " + "specified at a time.") + stage_class = SingleStageModel + if linear_single_stage: + stage_class = SingleStageModelLinear + if constant_single_stage: + stage_class = functools.partial( + SingleStageModelConstant, + dilation_amt=constant_stage_dilation, + ) + + self.stage1 = stage_class(num_layers, num_f_maps, dim, num_classes) + self.stages = nn.ModuleList( + [ + stage_class(num_layers, num_f_maps, num_classes, num_classes) + for _ in range(num_stages - 1) + ] + ) + + self.do_stage_residual = do_stage_residual + self.do_stage_softmax = do_stage_softmax + + def forward(self, x, mask): + # x shape: [batch_size, feat_dim, window_size] + # mask shape: [batch_size, window_size] + + # Shape [batch_size, window_size, feat_dim] + re_x = einops.rearrange(x, "b d w -> b w d") + re_x = self.frame_fc(re_x) + # Bring it back to input shape [batch_size, feat_dim, window_size] + x = einops.rearrange(re_x, "b w d -> b d w") + + # input here is not being softmaxed because dim1 is feature inputs, not + # predictions. + out = self.stage1(x, mask) + # out shape: (batch_size, num_classes, window_size) + outputs = out.unsqueeze(0) + for stage in self.stages: + s_in = out + if self.do_stage_softmax: + s_in = F.softmax(s_in, dim=1) + s_out = stage(s_in * mask[:, None, :], mask) + if self.do_stage_residual: + s_out = out + s_out + out = s_out # update the temp "out" var for cross-loop interaction + # out shape: (batch_size, num_classes, window_size) outputs = torch.cat((outputs, out.unsqueeze(0)), dim=0) return outputs @@ -158,6 +270,69 @@ def forward(self, x, mask): return out +class SingleStageModelLinear(nn.Module): + """ + Version of the SingleStageModel but where the increasing dilation of + successive layers linearly increases instead of exponentially. + + Input to the forward method should be shape (batch, dim, window_size). + """ + def __init__(self, num_layers, num_f_maps, dim, num_classes): + super().__init__() + self.conv_1x1 = nn.Conv1d(dim, num_f_maps, 1) + self.layers = nn.ModuleList( + [ + copy.deepcopy(DilatedResidualLayer(1 + i, num_f_maps, num_f_maps)) + for i in range(num_layers) + ] + ) + self.conv_out = nn.Conv1d(num_f_maps, num_classes, 1) + + def forward(self, x, mask): + + out = self.conv_1x1(x) + for layer in self.layers: + # the DR layers already add their output to the input, so no need + # do that here again. + out = layer(out, mask) + out = self.conv_out(out) * mask[:, None, :] + + return out + + +class SingleStageModelConstant(nn.Module): + """ + Version of the SingleStageModel but where the amount of dilation in + successive layers is constant. + + The default dilation amount of 1 is the same as saying "don't dilate" as + this will result in no dilation in the 3x1 convolution layers. + + Input to the forward method should be shape (batch, dim, window_size). + """ + def __init__(self, num_layers, num_f_maps, dim, num_classes, dilation_amt=1): + super().__init__() + self.conv_1x1 = nn.Conv1d(dim, num_f_maps, 1) + self.layers = nn.ModuleList( + [ + copy.deepcopy(DilatedResidualLayer(dilation_amt, num_f_maps, num_f_maps)) + for _ in range(num_layers) + ] + ) + self.conv_out = nn.Conv1d(num_f_maps, num_classes, 1) + + def forward(self, x, mask): + + out = self.conv_1x1(x) + for layer in self.layers: + # the DR layers already add their output to the input, so no need + # do that here again. + out = layer(out, mask) + out = self.conv_out(out) * mask[:, None, :] + + return out + + class DilatedResidualLayer(nn.Module): def __init__(self, dilation, in_channels, out_channels): super(DilatedResidualLayer, self).__init__()