Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding simple 'locsAndConfs' TCN input vector #41

Merged
merged 13 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions configs/data/ptg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@ _target_: tcn_hpl.data.ptg_datamodule.PTGDataModule
train_dataset:
_target_: tcn_hpl.data.tcn_dataset.TCNDataset
window_size: 15
vectorizer:
_target_: tcn_hpl.data.vectorize.classic.Classic
feat_version: 6
top_k: 1
num_classes: 7
background_idx: 0
hand_left_idx: 5
hand_right_idx: 6
# No vectorizer should be specified here, as there should be no "default".
# Example of a vectorizer:
# vectorizer:
# _target_: tcn_hpl.data.vectorize.classic.Classic
# feat_version: 6
# top_k: 1
# num_classes: 7
# background_idx: 0
# hand_left_idx: 5
# hand_right_idx: 6
transform:
_target_: torchvision.transforms.Compose
transforms: []
Expand Down
121 changes: 121 additions & 0 deletions configs/experiment/m2/feat_locsConfs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# @package _global_

# to execute this experiment run:
# python train.py experiment=example
task: "m2"
# feature_version: 6
topic: "medical"

defaults:
- override /data: ptg
- override /model: ptg
- override /callbacks: default
- override /trainer: gpu
- override /paths: default
#- override /logger: aim
- override /logger: csv

# all parameters below will be merged with parameters from default configurations set above
# this allows you to overwrite only specified parameters

# Change this name to something descriptive and unique for this experiment.
# This will differentiate the run logs and output to be separate from other
# experiments that may have been run under the configured
# Setting this value influences:
# - the name of the directory under `${paths.root_dir}/logs/` in which training
# run files are stored.
# Default is "train" set in the "configs/train.yaml" file.
#task_name:

# simply provide checkpoint path to resume training
#ckpt_path: null

tags: ["m2", "ms_tcn", "debug"]

seed: 12345

trainer:
min_epochs: 50
max_epochs: 500
log_every_n_steps: 1

model:
compile: false
net:
# Length of feature vector for a single frame.
# Currently derived from feature version and other hyperparameters.
dim: 102
num_classes: 9

data:
coco_train_activities: "${paths.coco_file_root}/TRAIN-activity_truth.coco.json"
coco_train_objects: "${paths.coco_file_root}/TRAIN-object_detections.coco.json"
coco_train_poses: "${paths.coco_file_root}/TRAIN-pose_estimates.coco.json"

coco_validation_activities: "${paths.coco_file_root}/VALIDATION-activity_truth.coco.json"
coco_validation_objects: "${paths.coco_file_root}/VALIDATION-object_detections.coco.json"
coco_validation_poses: "${paths.coco_file_root}/VALIDATION-pose_estimates.coco.json"

coco_test_activities: "${paths.coco_file_root}/TEST-activity_truth.coco.json"
coco_test_objects: "${paths.coco_file_root}/TEST-object_detections.coco.json"
coco_test_poses: "${paths.coco_file_root}/TEST-pose_estimates.coco.json"

batch_size: 16384
num_workers: 16
target_framerate: 15 # BBN Hololens2 Framerate
epoch_length: 200000

train_dataset:
window_size: 25
vectorizer:
_target_: tcn_hpl.data.vectorize.locs_and_confs.LocsAndConfs
top_k: 1
num_classes: 7
use_joint_confs: True
use_pixel_norm: True
use_hand_obj_offsets: False
background_idx: 0
transform:
transforms: [] # no transforms
# - _target_: tcn_hpl.data.components.augmentations.MoveCenterPts
# hand_dist_delta: 0.05
# obj_dist_delta: 0.05
# joint_dist_delta: 0.025
# im_w: 1280
# im_h: 720
# num_obj_classes: 42
# feat_version: 2
# top_k_objects: 1
# - _target_: tcn_hpl.data.components.augmentations.NormalizePixelPts
# im_w: 1280
# im_h: 720
# num_obj_classes: 42
# feat_version: 2
# top_k_objects: 1
val_dataset:
transform:
transforms: [] # no transforms
# - _target_: tcn_hpl.data.components.augmentations.NormalizePixelPts
# im_w: 1280
# im_h: 720
# num_obj_classes: 42
# feat_version: 2
# top_k_objects: 1
# Test dataset usually configured the same as val, unless there is some
# different set of transforms that should be used during test/prediction.

paths:
# root_dir: "/data/PTG/medical/training/activity_classifier/TCN_HPL/"
# root_dir: "/home/local/KHQ/paul.tunison/data/darpa-ptg/train-TCN-M2_bbn_hololens/training_root"
root_dir: "/home/local/KHQ/cameron.johnson/code/TCN_HPL/tcn_hpl/train-TCN-M2_bbn_hololens/training_root"

# Convenience variable to where your train/val/test split COCO file datasets
# are stored.
# coco_file_root: "/home/local/KHQ/paul.tunison/data/darpa-ptg/train-TCN-M2_bbn_hololens"
coco_file_root: "/home/local/KHQ/cameron.johnson/code/TCN_HPL/train-TCN-M2_bbn_hololens"

#exp_name: "tcn_training_revive"
#logger:
# aim:
# experiment: ${task_name}
# capture_terminal_logs: true
26 changes: 16 additions & 10 deletions tcn_hpl/data/tcn_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,12 @@ def load_data_offline(
)
else:
frame_dets = empty_dets

# Frame height and width should be available.
img_info = activity_coco.index.imgs[img_id]
assert "height" in img_info
assert "width" in img_info
frame_size = (img_info["width"], img_info["height"])

# Only consider annotations that actually have keypoints.
# There may be no poses on this frame.
Expand Down Expand Up @@ -336,7 +342,8 @@ def load_data_offline(
)
else:
frame_poses = empty_pose
vid_frame_data.append(FrameData(frame_dets, frame_poses))
# import ipdb; ipdb.set_trace()
vid_frame_data.append(FrameData(frame_dets, frame_poses, frame_size))

# Compose a list of indices into frame_data that this video's
# worth of content resides.
Expand Down Expand Up @@ -639,15 +646,14 @@ def test_dataset_for_input(
pose_coco = kwcoco.CocoDataset(pose_coco)

# TODO: Some method of configuring which vectorizer to use.
from tcn_hpl.data.vectorize.classic import Classic
vectorizer = Classic(
feat_version=6,
top_k=1,
# M2/R18 object detection class indices
num_classes=7,
background_idx=0,
hand_left_idx=5,
hand_right_idx=6,
from tcn_hpl.data.vectorize.locs_and_confs import LocsAndConfs
vectorizer = LocsAndConfs(
top_k = 1,
num_classes = 7,
use_joint_confs = True,
use_pixel_norm = True,
use_hand_obj_offsets = False,
background_idx = 0
)

dataset = TCNDataset(window_size=window_size, vectorizer=vectorizer)
Expand Down
3 changes: 3 additions & 0 deletions tcn_hpl/data/vectorize/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ class FrameData:
# This may be None, which implies that an object pose estimation was not
# run for this frame.
poses: tg.Optional[FramePoses]
# FrameSize: Length-2 tuple expected: (width, height).
# This is the video frame's width and height in pixels.
size: tg.Tuple[int, int]

def __bool__(self):
"""
Expand Down
161 changes: 161 additions & 0 deletions tcn_hpl/data/vectorize/locs_and_confs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import functools
import typing as tg

import numpy as np
from numpy import typing as npt

from tcn_hpl.data.vectorize._interface import Vectorize, FrameData

NUM_POSE_JOINTS = 22

class LocsAndConfs(Vectorize):
"""
Previous manual approach to vectorization.

Arguments:
top_k: The number of top per-class examples to use in vector
construction.
num_classes: the number of classes in the object detector.
use_joint_confs: use the confidence of each pose joint.
(changes the length of the input vector, which needs to
be manually updated if this flag changes.)
use_pixel_norm: Normalize pixel coordinates by dividing by
frame height and width, respectively. Normalized values
are between 0 and 1. Does not change input vector length.
use_joint_obj_offsets: add abs(X and Y offsets) for between joints and
each object.
(changes the length of the input vector, which needs to
be manually updated if this flag changes.)

"""

def __init__(
self,
top_k: int = 1,
num_classes: int = 7,
use_joint_confs: bool = True,
use_pixel_norm: bool = True,
use_joint_obj_offsets: bool = False,
background_idx: int = 0
):
super().__init__()

self._top_k = top_k
self._num_classes = num_classes
self._use_joint_confs = use_joint_confs
self._use_pixel_norm = use_pixel_norm
self._use_joint_obj_offsets = use_joint_obj_offsets
self._background_idx = background_idx

# Get the top "k" object indexes for each object
@staticmethod
def get_top_k_indexes_of_one_obj_type(f_dets, k, label_ind):
"""
Find all instances of a label index in object detections.
Then sort them and return the top K.
Inputs:
- object_dets:
"""
labels = f_dets.labels
scores = f_dets.scores
# Get all labels of an obj type
filtered_idxs = [i for i, e in enumerate(labels) if e == label_ind]
if not filtered_idxs:
return None
filtered_scores = [scores[i] for i in filtered_idxs]
# Sort labels by score values.
sorted_inds = [i[1] for i in sorted(zip(filtered_scores, filtered_idxs))]
return sorted_inds[:k]

@staticmethod
def append_vector(frame_feat, i, number):
frame_feat[i] = number
return frame_feat, i + 1

def determine_vector_length(self, data: FrameData) -> int:
#########################
# Feature vector
#########################
# Length: pose confs * 22, pose X's * 22, pose Y's * 22,
# obj confs * num_objects(7 for M2),
# obj X * num_objects(7 for M2),
# obj Y * num_objects(7 for M2)
# obj W * num_objects(7 for M2)
# obj H * num_objects(7 for M2)
# casualty conf * 1
vector_length = 0
# Joint confidences
if self._use_joint_confs:
vector_length += NUM_POSE_JOINTS
# X and Y for each joint
vector_length += 2 * NUM_POSE_JOINTS
# [Conf, X, Y, W, H] for k instances of each object class.
vector_length = 5 * self._top_k * self._num_classes
return vector_length


def vectorize(self, data: FrameData) -> npt.NDArray[np.float32]:

vector_len = self.determine_vector_length(data)
frame_feat = np.zeros(vector_len, dtype=np.float32)
# TODO: instead of carrying around this vector_ind, we should
# directly compute the offset of each feature we add to the TCN
# input vector. This would be much easier to debug.
vector_ind = 0
if self._use_pixel_norm:
W = data.size[0]
H = data.size[1]
else:
W = 1
H = 1
f_dets = data.object_detections

# Loop through all classes: populate obj conf, obj X, obj Y.
# Assumption: class labels are [0, 1, 2,... num_classes-1].
# TODO: this will break if top_k is ever > 1. Fix that.
for obj_ind in range(0,self._num_classes):
top_k_idxs = self.get_top_k_indexes_of_one_obj_type(f_dets, self._top_k, obj_ind)
Purg marked this conversation as resolved.
Show resolved Hide resolved
if top_k_idxs: # This is None if there were no detections to sort for this class
for idx in top_k_idxs:
# Conf
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.scores[idx])
Purg marked this conversation as resolved.
Show resolved Hide resolved
# X
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.boxes[idx][0] / W)
# Y
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.boxes[idx][1] / H)
# W
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.boxes[idx][2] / W)
# H
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.boxes[idx][3] / H)
else:
for _ in range(0, self._top_k * 5):
# 5 Zeros
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, 0)

f_poses = data.poses
if f_poses:
# Find most confident body detection
confident_pose_idx = np.argmax(f_poses.scores)
Purg marked this conversation as resolved.
Show resolved Hide resolved
num_joints = f_poses.joint_positions.shape[1]
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_poses.scores[confident_pose_idx])

for joint_ind in range(0, num_joints):
# Conf
if self._use_joint_confs:
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_poses.joint_scores[confident_pose_idx][joint_ind])
# X
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_poses.joint_positions[confident_pose_idx][joint_ind][0] / W)
# Y
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_poses.joint_positions[confident_pose_idx][joint_ind][1] / H)
else:
num_joints = f_poses.joint_positions.shape[1]
if self._use_joint_confs:
rows_per_joint = 3
else:
rows_per_joint = 2
for _ in range(num_joints * rows_per_joint + 1):
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, 0)

assert vector_ind == vector_len

return frame_feat
Loading