Skip to content

Commit

Permalink
formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
periakiva committed Apr 9, 2024
1 parent 5ee5262 commit d92845f
Show file tree
Hide file tree
Showing 24 changed files with 1,477 additions and 1,181 deletions.
71 changes: 39 additions & 32 deletions tcn_hpl/data/components/PTG_dataset.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@

import torch

import numpy as np

from typing import Optional, Callable, Dict, List
from torchvision.transforms import transforms


class PTG_Dataset(torch.utils.data.Dataset):
def __init__(
self,
videos: List[str],
videos: List[str],
num_classes: int,
actions_dict: Dict[str, int],
gt_path: str,
gt_path: str,
features_path: str,
sample_rate: int,
window_size: int,
transform: Optional[Callable]=None,
target_transform: Optional[Callable]=None
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
):
self.num_classes = num_classes
self.actions_dict = actions_dict
Expand All @@ -37,7 +37,7 @@ def __init__(
source_frames_list = []
for v, vid in enumerate(videos):
features = np.load(self.features_path + vid.split(".")[0] + ".npy")

file_ptr = open(self.gt_path + vid, "r")
content = file_ptr.read().split("\n")[:-1]

Expand All @@ -47,7 +47,7 @@ def __init__(
for i in range(len(classes)):
classes[i] = self.actions_dict[content[i]]
source_vid[i] = v
source_frame[i] = i # find filename????
source_frame[i] = i # find filename????

# mask out the end of the window size of the end of the sequence to prevent overlap between videos.
mask = np.ones_like(classes)
Expand All @@ -59,74 +59,81 @@ def __init__(
source_vids_list.append(source_vid[:: self.sample_rate])
source_frames_list.append(source_frame[:: self.sample_rate])

self.feature_frames = np.concatenate(input_frames_list, axis=1, dtype=np.single).transpose()
self.target_frames = np.concatenate(target_frames_list, axis=0, dtype=int, casting='unsafe')
self.mask_frames = np.concatenate(mask_frames_list, axis=0, dtype=int, casting='unsafe')
self.source_vids = np.concatenate(source_vids_list, axis=0, dtype=int, casting='unsafe')
self.feature_frames = np.concatenate(
input_frames_list, axis=1, dtype=np.single
).transpose()
self.target_frames = np.concatenate(
target_frames_list, axis=0, dtype=int, casting="unsafe"
)
self.mask_frames = np.concatenate(
mask_frames_list, axis=0, dtype=int, casting="unsafe"
)
self.source_vids = np.concatenate(
source_vids_list, axis=0, dtype=int, casting="unsafe"
)
self.source_frames = np.concatenate(source_frames_list, dtype=int, axis=0)


# Transforms/Augmentations
if self.transform is not None:
self.feature_frames = self.transform(self.feature_frames.copy())
if self.target_transform is not None:
self.target_frames = self.target_transform(self.target_frames.copy())

#zero_idxs = random.sample(list(range(len(self.mask_frames))), len(self.mask_frames)*0.3)
#self.mask_frames[zero_idxs] = 0
# zero_idxs = random.sample(list(range(len(self.mask_frames))), len(self.mask_frames)*0.3)
# self.mask_frames[zero_idxs] = 0

self.norm_stats['mean'] = self.feature_frames.mean(axis=0)
self.norm_stats['std'] = self.feature_frames.std(axis=0)
self.norm_stats['max'] = self.feature_frames.max(axis=0)
self.norm_stats['min'] = self.feature_frames.min(axis=0)
self.norm_stats["mean"] = self.feature_frames.mean(axis=0)
self.norm_stats["std"] = self.feature_frames.std(axis=0)
self.norm_stats["max"] = self.feature_frames.max(axis=0)
self.norm_stats["min"] = self.feature_frames.min(axis=0)

self.dataset_size = self.target_frames.shape[0] - self.window_size

# Get weights for sampler by inverse count.
# Get weights for sampler by inverse count.
# Weights represent the GT of the final frame of a window starting from idx
class_name, counts = np.unique(self.target_frames, return_counts=True)
class_weights = 1. / counts
class_weights = 1.0 / counts
class_lookup = dict()
for i, cn in enumerate(class_name):
class_lookup[cn] = class_weights[i]
self.weights = np.zeros((self.dataset_size))
for i in range(self.dataset_size):
self.weights[i] = class_lookup[self.target_frames[i+self.window_size]]
self.weights[i] = class_lookup[self.target_frames[i + self.window_size]]
# Set weights to 0 for frames before the window length
# So they don't get picked
self.weights[:self.window_size] = 0
self.weights[: self.window_size] = 0

def __len__(self):
return self.dataset_size

def __getitem__(self, idx):
#print(f"window idx: {idx}:{idx+self.window_size}")
# print(f"window idx: {idx}:{idx+self.window_size}")
"""Grab a window of frames starting at ``idx``
:param idx: The first index of the time window
:return: features, targets, and mask of the window
"""

# print(f"size of dataset: {self.__len__()}")
# print(f"self.feature_frames: {self.feature_frames.shape}")
# print(f"self.target_frames: {self.target_frames.shape}")
# print(f"self.mask_frames: {self.mask_frames.shape}")
# print(f"self.source_vids: {self.source_vids.shape}")
# print(f"self.source_frames: {self.source_frames.shape}")
# print(f"self.mask_frames: {self.mask_frames}")
features = self.feature_frames[idx:idx+self.window_size, :]
target = self.target_frames[idx:idx+self.window_size]
mask = self.mask_frames[idx:idx+self.window_size]
source_vid = self.source_vids[idx:idx+self.window_size]
source_frame = self.source_frames[idx:idx+self.window_size]

features = self.feature_frames[idx : idx + self.window_size, :]
target = self.target_frames[idx : idx + self.window_size]
mask = self.mask_frames[idx : idx + self.window_size]
source_vid = self.source_vids[idx : idx + self.window_size]
source_frame = self.source_frames[idx : idx + self.window_size]

# print(f"mask: {mask}")
# print(f"features: {features.shape}")
# print(f"target: {target.shape}")
# print(f"mask: {mask.shape}")
# print(f"source_vid: {source_vid.shape}")
# print(f"source_frame: {source_frame.shape}")

return features, target, mask, np.array(source_vid), source_frame
55 changes: 31 additions & 24 deletions tcn_hpl/data/components/augmentations.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,14 @@ class MoveCenterPts(torch.nn.Module):
"""

def __init__(
self, hand_dist_delta, obj_dist_delta, window_size, im_w, im_h, num_obj_classes, feat_version
self,
hand_dist_delta,
obj_dist_delta,
window_size,
im_w,
im_h,
num_obj_classes,
feat_version,
):
"""
:param hand_dist_delta: Decimal percentage to calculate the +-offset in
Expand Down Expand Up @@ -116,21 +123,19 @@ def forward(self, features):

elif self.feat_version == 3:
# Right and left hand distances
right_idx1 = 1; right_idx2 = 2;
left_idx1 = 4; left_idx2 = 5
right_idx1 = 1
right_idx2 = 2
left_idx1 = 4
left_idx2 = 5
for hand_delta_x, hand_delta_y, start_idx, end_idx in zip(
[rhand_delta_x, lhand_delta_x],
[rhand_delta_y, lhand_delta_y],
[right_idx1, left_idx1],
[right_idx2, left_idx2],
):
frame[start_idx] = (
frame[start_idx] + hand_delta_x
)

frame[end_idx] = (
frame[end_idx] + hand_delta_y
)
frame[start_idx] = frame[start_idx] + hand_delta_x

frame[end_idx] = frame[end_idx] + hand_delta_y

# Object distances
start_idx = 10
Expand Down Expand Up @@ -260,10 +265,12 @@ def forward(self, features):
)

# Distance between hands
hands_dist_idx = left_dist_idx2
hands_dist_idx = left_dist_idx2

features[:, hands_dist_idx] = features[:, hands_dist_idx] / self.im_w
features[:, hands_dist_idx + 1] = features[:, hands_dist_idx + 1] / self.im_h
features[:, hands_dist_idx + 1] = (
features[:, hands_dist_idx + 1] / self.im_h
)

elif self.feat_version == 3:
# Distances are from the center, skip
Expand All @@ -278,9 +285,10 @@ def __repr__(self) -> str:
detail = f"(im_w={self.im_w}, im_h={self.im_h}, num_obj_classes={self.num_obj_classes}, feat_version={self.feat_version})"
return f"{self.__class__.__name__}{detail}"


class NormalizeFromCenter(torch.nn.Module):
"""Normalize the distances from -1 to 1 with respect to the image center
Missing objects will be set to (2, 2)
"""

Expand Down Expand Up @@ -310,19 +318,17 @@ def forward(self, features):

elif self.feat_version == 3:
# Right and left hand distances
right_idx1 = 1; right_idx2 = 2;
left_idx1 = 4; left_idx2 = 5
right_idx1 = 1
right_idx2 = 2
left_idx1 = 4
left_idx2 = 5
for start_idx, end_idx in zip(
[right_idx1, left_idx1],
[right_idx2, left_idx2],
):

features[:, start_idx] = (
features[:, start_idx] / self.half_w
)
features[:, end_idx] = (
features[:, end_idx] / self.half_h
)

features[:, start_idx] = features[:, start_idx] / self.half_w
features[:, end_idx] = features[:, end_idx] / self.half_h

# Object distances
start_idx = 10
Expand All @@ -337,6 +343,7 @@ def forward(self, features):
return features

def __repr__(self) -> str:
detail = f"(im_w={self.im_w}, im_h={self.im_h}, feat_version={self.feat_version})"
detail = (
f"(im_w={self.im_w}, im_h={self.im_h}, feat_version={self.feat_version})"
)
return f"{self.__class__.__name__}{detail}"

8 changes: 6 additions & 2 deletions tcn_hpl/data/mnist_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,12 @@ def setup(self, stage: Optional[str] = None) -> None:
"""
# load and split datasets only if not loaded already
if not self.data_train and not self.data_val and not self.data_test:
trainset = MNIST(self.hparams.data_dir, train=True, transform=self.transforms)
testset = MNIST(self.hparams.data_dir, train=False, transform=self.transforms)
trainset = MNIST(
self.hparams.data_dir, train=True, transform=self.transforms
)
testset = MNIST(
self.hparams.data_dir, train=False, transform=self.transforms
)
dataset = ConcatDataset(datasets=[trainset, testset])
self.data_train, self.data_val, self.data_test = random_split(
dataset=dataset,
Expand Down
Loading

0 comments on commit d92845f

Please sign in to comment.