formatting

PTG-Kitware · Apr 9, 2024 · d92845f · d92845f
1 parent 5ee5262
commit d92845f
Show file tree

Hide file tree

Showing 24 changed files with 1,477 additions and 1,181 deletions.
diff --git a/tcn_hpl/data/components/PTG_dataset.py b/tcn_hpl/data/components/PTG_dataset.py
@@ -1,23 +1,23 @@
-
 import torch
 
 import numpy as np
 
 from typing import Optional, Callable, Dict, List
 from torchvision.transforms import transforms
 
+
 class PTG_Dataset(torch.utils.data.Dataset):
     def __init__(
         self,
-        videos: List[str], 
+        videos: List[str],
         num_classes: int,
         actions_dict: Dict[str, int],
-        gt_path: str, 
+        gt_path: str,
         features_path: str,
         sample_rate: int,
         window_size: int,
-        transform: Optional[Callable]=None,
-        target_transform: Optional[Callable]=None
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
     ):
         self.num_classes = num_classes
         self.actions_dict = actions_dict
@@ -37,7 +37,7 @@ def __init__(
         source_frames_list = []
         for v, vid in enumerate(videos):
             features = np.load(self.features_path + vid.split(".")[0] + ".npy")
-            
+
             file_ptr = open(self.gt_path + vid, "r")
             content = file_ptr.read().split("\n")[:-1]
 
@@ -47,7 +47,7 @@ def __init__(
             for i in range(len(classes)):
                 classes[i] = self.actions_dict[content[i]]
                 source_vid[i] = v
-                source_frame[i] = i # find filename????
+                source_frame[i] = i  # find filename????
 
             # mask out the end of the window size of the end of the sequence to prevent overlap between videos.
             mask = np.ones_like(classes)
@@ -59,74 +59,81 @@ def __init__(
             source_vids_list.append(source_vid[:: self.sample_rate])
             source_frames_list.append(source_frame[:: self.sample_rate])
 
-        self.feature_frames = np.concatenate(input_frames_list, axis=1, dtype=np.single).transpose()
-        self.target_frames = np.concatenate(target_frames_list, axis=0, dtype=int, casting='unsafe')
-        self.mask_frames = np.concatenate(mask_frames_list, axis=0, dtype=int, casting='unsafe')
-        self.source_vids = np.concatenate(source_vids_list, axis=0, dtype=int, casting='unsafe')
+        self.feature_frames = np.concatenate(
+            input_frames_list, axis=1, dtype=np.single
+        ).transpose()
+        self.target_frames = np.concatenate(
+            target_frames_list, axis=0, dtype=int, casting="unsafe"
+        )
+        self.mask_frames = np.concatenate(
+            mask_frames_list, axis=0, dtype=int, casting="unsafe"
+        )
+        self.source_vids = np.concatenate(
+            source_vids_list, axis=0, dtype=int, casting="unsafe"
+        )
         self.source_frames = np.concatenate(source_frames_list, dtype=int, axis=0)
 
-
         # Transforms/Augmentations
         if self.transform is not None:
             self.feature_frames = self.transform(self.feature_frames.copy())
         if self.target_transform is not None:
             self.target_frames = self.target_transform(self.target_frames.copy())
 
-        #zero_idxs = random.sample(list(range(len(self.mask_frames))), len(self.mask_frames)*0.3)
-        #self.mask_frames[zero_idxs] = 0
+        # zero_idxs = random.sample(list(range(len(self.mask_frames))), len(self.mask_frames)*0.3)
+        # self.mask_frames[zero_idxs] = 0
 
-        self.norm_stats['mean'] = self.feature_frames.mean(axis=0)
-        self.norm_stats['std'] = self.feature_frames.std(axis=0)
-        self.norm_stats['max'] = self.feature_frames.max(axis=0)
-        self.norm_stats['min'] = self.feature_frames.min(axis=0)
+        self.norm_stats["mean"] = self.feature_frames.mean(axis=0)
+        self.norm_stats["std"] = self.feature_frames.std(axis=0)
+        self.norm_stats["max"] = self.feature_frames.max(axis=0)
+        self.norm_stats["min"] = self.feature_frames.min(axis=0)
 
         self.dataset_size = self.target_frames.shape[0] - self.window_size
 
-        # Get weights for sampler by inverse count.  
+        # Get weights for sampler by inverse count.
         # Weights represent the GT of the final frame of a window starting from idx
         class_name, counts = np.unique(self.target_frames, return_counts=True)
-        class_weights =  1. / counts
+        class_weights = 1.0 / counts
         class_lookup = dict()
         for i, cn in enumerate(class_name):
             class_lookup[cn] = class_weights[i]
         self.weights = np.zeros((self.dataset_size))
         for i in range(self.dataset_size):
-            self.weights[i] = class_lookup[self.target_frames[i+self.window_size]]
+            self.weights[i] = class_lookup[self.target_frames[i + self.window_size]]
         # Set weights to 0 for frames before the window length
         # So they don't get picked
-        self.weights[:self.window_size] = 0
+        self.weights[: self.window_size] = 0
 
     def __len__(self):
         return self.dataset_size
 
     def __getitem__(self, idx):
-        #print(f"window idx: {idx}:{idx+self.window_size}")
+        # print(f"window idx: {idx}:{idx+self.window_size}")
         """Grab a window of frames starting at ``idx``
 
         :param idx: The first index of the time window
 
         :return: features, targets, and mask of the window
         """
-        
+
         # print(f"size of dataset: {self.__len__()}")
         # print(f"self.feature_frames: {self.feature_frames.shape}")
         # print(f"self.target_frames: {self.target_frames.shape}")
         # print(f"self.mask_frames: {self.mask_frames.shape}")
         # print(f"self.source_vids: {self.source_vids.shape}")
         # print(f"self.source_frames: {self.source_frames.shape}")
         # print(f"self.mask_frames: {self.mask_frames}")
-        
-        features = self.feature_frames[idx:idx+self.window_size, :]
-        target = self.target_frames[idx:idx+self.window_size]
-        mask = self.mask_frames[idx:idx+self.window_size]
-        source_vid = self.source_vids[idx:idx+self.window_size]
-        source_frame = self.source_frames[idx:idx+self.window_size]
-        
+
+        features = self.feature_frames[idx : idx + self.window_size, :]
+        target = self.target_frames[idx : idx + self.window_size]
+        mask = self.mask_frames[idx : idx + self.window_size]
+        source_vid = self.source_vids[idx : idx + self.window_size]
+        source_frame = self.source_frames[idx : idx + self.window_size]
+
         # print(f"mask: {mask}")
         # print(f"features: {features.shape}")
         # print(f"target: {target.shape}")
         # print(f"mask: {mask.shape}")
         # print(f"source_vid: {source_vid.shape}")
         # print(f"source_frame: {source_frame.shape}")
-        
+
         return features, target, mask, np.array(source_vid), source_frame
diff --git a/tcn_hpl/data/components/augmentations.py b/tcn_hpl/data/components/augmentations.py
@@ -10,7 +10,14 @@ class MoveCenterPts(torch.nn.Module):
     """
 
     def __init__(
-        self, hand_dist_delta, obj_dist_delta, window_size, im_w, im_h, num_obj_classes, feat_version
+        self,
+        hand_dist_delta,
+        obj_dist_delta,
+        window_size,
+        im_w,
+        im_h,
+        num_obj_classes,
+        feat_version,
     ):
         """
         :param hand_dist_delta: Decimal percentage to calculate the +-offset in
@@ -116,21 +123,19 @@ def forward(self, features):
 
             elif self.feat_version == 3:
                 # Right and left hand distances
-                right_idx1 = 1; right_idx2 = 2; 
-                left_idx1 = 4; left_idx2 = 5
+                right_idx1 = 1
+                right_idx2 = 2
+                left_idx1 = 4
+                left_idx2 = 5
                 for hand_delta_x, hand_delta_y, start_idx, end_idx in zip(
                     [rhand_delta_x, lhand_delta_x],
                     [rhand_delta_y, lhand_delta_y],
                     [right_idx1, left_idx1],
                     [right_idx2, left_idx2],
                 ):
-                    frame[start_idx] = (
-                        frame[start_idx] + hand_delta_x
-                    )
-
-                    frame[end_idx] = (
-                        frame[end_idx] + hand_delta_y
-                    )
+                    frame[start_idx] = frame[start_idx] + hand_delta_x
+
+                    frame[end_idx] = frame[end_idx] + hand_delta_y
 
                 # Object distances
                 start_idx = 10
@@ -260,10 +265,12 @@ def forward(self, features):
                 )
 
             # Distance between hands
-            hands_dist_idx = left_dist_idx2 
+            hands_dist_idx = left_dist_idx2
 
             features[:, hands_dist_idx] = features[:, hands_dist_idx] / self.im_w
-            features[:, hands_dist_idx + 1] = features[:, hands_dist_idx + 1] / self.im_h
+            features[:, hands_dist_idx + 1] = (
+                features[:, hands_dist_idx + 1] / self.im_h
+            )
 
         elif self.feat_version == 3:
             # Distances are from the center, skip
@@ -278,9 +285,10 @@ def __repr__(self) -> str:
         detail = f"(im_w={self.im_w}, im_h={self.im_h}, num_obj_classes={self.num_obj_classes}, feat_version={self.feat_version})"
         return f"{self.__class__.__name__}{detail}"
 
+
 class NormalizeFromCenter(torch.nn.Module):
     """Normalize the distances from -1 to 1 with respect to the image center
-    
+
     Missing objects will be set to (2, 2)
     """
 
@@ -310,19 +318,17 @@ def forward(self, features):
 
         elif self.feat_version == 3:
             # Right and left hand distances
-            right_idx1 = 1; right_idx2 = 2; 
-            left_idx1 = 4; left_idx2 = 5
+            right_idx1 = 1
+            right_idx2 = 2
+            left_idx1 = 4
+            left_idx2 = 5
             for start_idx, end_idx in zip(
                 [right_idx1, left_idx1],
                 [right_idx2, left_idx2],
             ):
-
-                features[:, start_idx] = (
-                    features[:, start_idx] / self.half_w
-                )
-                features[:, end_idx] = (
-                    features[:, end_idx] / self.half_h
-                )
+
+                features[:, start_idx] = features[:, start_idx] / self.half_w
+                features[:, end_idx] = features[:, end_idx] / self.half_h
 
             # Object distances
             start_idx = 10
@@ -337,6 +343,7 @@ def forward(self, features):
         return features
 
     def __repr__(self) -> str:
-        detail = f"(im_w={self.im_w}, im_h={self.im_h}, feat_version={self.feat_version})"
+        detail = (
+            f"(im_w={self.im_w}, im_h={self.im_h}, feat_version={self.feat_version})"
+        )
         return f"{self.__class__.__name__}{detail}"
-
diff --git a/tcn_hpl/data/mnist_datamodule.py b/tcn_hpl/data/mnist_datamodule.py
@@ -114,8 +114,12 @@ def setup(self, stage: Optional[str] = None) -> None:
         """
         # load and split datasets only if not loaded already
         if not self.data_train and not self.data_val and not self.data_test:
-            trainset = MNIST(self.hparams.data_dir, train=True, transform=self.transforms)
-            testset = MNIST(self.hparams.data_dir, train=False, transform=self.transforms)
+            trainset = MNIST(
+                self.hparams.data_dir, train=True, transform=self.transforms
+            )
+            testset = MNIST(
+                self.hparams.data_dir, train=False, transform=self.transforms
+            )
             dataset = ConcatDataset(datasets=[trainset, testset])
             self.data_train, self.data_val, self.data_test = random_split(
                 dataset=dataset,