[Feature] Support Visualizer (#35)

* Implement portable eval script for server * lint format * implement visualizer for debugging * Update README * docstring * Refine some docstrings. * refine docstring * refine docstring
OpenRobotLab · Apr 23, 2024 · e144c4b · e144c4b
1 parent 7d79106
commit e144c4b
Show file tree

Hide file tree

Showing 8 changed files with 267 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -193,6 +193,10 @@ To inference and evaluate the model (e.g., the checkpoint `work_dirs/mv-3ddet/ep
 python tools/test.py configs/detection/mv-det3d_8xb4_embodiedscan-3d-284class-9dof.py work_dirs/mv-3ddet/epoch_12.pth --launcher="pytorch"
 ```
 
+### Using Visualizer during inference
+
+We provide EmbodiedScanBaseVisualizer to visualize the output of models during inference. Please refer to the [guide](embodiedscan/visualizer/README.md) for detail.
+
 ### Inference and Submit your Results
 
 We preliminarily support format-only inference for multi-view 3D visual grounding. To achieve format-only inference during test, just set `format_only=True` in `test_evaluator` in the corresponding config like [here](https://github.com/OpenRobotLab/EmbodiedScan/blob/main/configs/grounding/mv-grounding_8xb12_embodiedscan-vg-9dof.py#L183). Then just run the test script like:

diff --git a/embodiedscan/datasets/embodiedscan_dataset.py b/embodiedscan/datasets/embodiedscan_dataset.py
@@ -105,6 +105,7 @@ def parse_data_info(self, info: dict) -> dict:
         """
         info['box_type_3d'] = self.box_type_3d
         info['axis_align_matrix'] = self._get_axis_align_matrix(info)
+        info['scan_id'] = info['sample_idx']
         ann_dataset = info['sample_idx'].split('/')[0]
         if ann_dataset == 'matterport3d':
             info['depth_shift'] = 4000.0

diff --git a/embodiedscan/datasets/transforms/formatting.py b/embodiedscan/datasets/transforms/formatting.py
@@ -75,7 +75,7 @@ def __init__(
             'cam2global', 'crop_offset', 'img_crop_offset', 'resize_img_shape',
             'lidar2cam', 'ori_lidar2img', 'num_ref_frames', 'num_views',
             'ego2global', 'fov_ori2aug', 'ego2cam', 'axis_align_matrix',
-            'text', 'tokens_positive')):
+            'text', 'tokens_positive', 'scan_id')):
         self.keys = keys
         self.meta_keys = meta_keys
 

diff --git a/embodiedscan/registry.py b/embodiedscan/registry.py
@@ -3,6 +3,8 @@
 from mmengine import MODELS as MMENGINE_MODELS
 from mmengine import TASK_UTILS as MMENGINE_TASK_UTILS
 from mmengine import TRANSFORMS as MMENGINE_TRANSFORMS
+from mmengine import VISBACKENDS as MMENGINE_VISBACKENDS
+from mmengine import VISUALIZERS as MMENGINE_VISUALIZERS
 from mmengine import Registry
 
 MODELS = Registry('model',
@@ -20,3 +22,10 @@
 TASK_UTILS = Registry('task util',
                       parent=MMENGINE_TASK_UTILS,
                       locations=['embodiedscan.models'])
+VISUALIZERS = Registry('visualizer',
+                       parent=MMENGINE_VISUALIZERS,
+                       locations=['embodiedscan.visualizer'])
+# manage visualizer backend
+VISBACKENDS = Registry('vis_backend',
+                       parent=MMENGINE_VISBACKENDS,
+                       locations=['embodiedscan.visualizer'])
diff --git a/embodiedscan/visualization/utils.py b/embodiedscan/visualization/utils.py
@@ -1,6 +1,7 @@
 import cv2
 import numpy as np
 import open3d as o3d
+from torch import Tensor
 
 from .line_mesh import LineMesh
 
@@ -39,23 +40,106 @@ def _box_add_thickness(box, thickness):
     return results
 
 
-def _9dof_to_box(box, label, color_selector):
+def _9dof_to_box(box, label=None, color_selector=None, color=None):
+    """Convert 9-DoF box from array/tensor to open3d.OrientedBoundingBox.
+
+    Args:
+        box (numpy.ndarray|torch.Tensor|List[float]):
+            9-DoF box with shape (9,).
+        label (int, optional): Label of the box. Defaults to None.
+        color_selector (:obj:`ColorSelector`, optional):
+            Color selector for boxes. Defaults to None.
+        color (tuple[int], optional): Color of the box.
+            You can directly specify the color.
+            If you do, the color_selector and label will be ignored.
+            Defaults to None.
+    """
     if isinstance(box, list):
         box = np.array(box)
+    if isinstance(box, Tensor):
+        box = box.cpu().numpy()
     center = box[:3].reshape(3, 1)
     scale = box[3:6].reshape(3, 1)
     rot = box[6:].reshape(3, 1)
     rot_mat = o3d.geometry.OrientedBoundingBox.get_rotation_matrix_from_zxy(
         rot)
     geo = o3d.geometry.OrientedBoundingBox(center, rot_mat, scale)
 
-    color = color_selector.get_color(label)
-    color = [x / 255.0 for x in color]
-    geo.color = color
+    if color is not None:
+        geo.color = [x / 255.0 for x in color]
+        return geo
+
+    if label is not None and color_selector is not None:
+        color = color_selector.get_color(label)
+        color = [x / 255.0 for x in color]
+        geo.color = color
     return geo
 
 
+def nms_filter(pred_results, iou_thr=0.15, score_thr=0.075, topk_per_class=10):
+    """Non-Maximum Suppression for 3D Euler boxes. Additionally, only the top-k
+    boxes will be kept for each category to avoid redundant boxes in the
+    visualization.
+
+    Args:
+        pred_results (:obj:`InstanceData`):
+            Results predicted by the model.
+        iou_thr (float): IoU thresholds for NMS. Defaults to 0.15.
+        score_thr (float): Score thresholds.
+            Instances with scores below thresholds will not be kept.
+            Defaults to 0.075.
+        topk_per_class (int): Number of instances kept per category.
+            Defaults to 10.
+
+    Returns:
+        numpy.ndarray[float], np.ndarray[int]:
+            Filtered boxes with shape (N, 9) and labels with shape (N,).
+    """
+    boxes = pred_results.bboxes_3d
+    boxes_tensor = boxes.tensor.cpu().numpy()
+    iou = boxes.overlaps(boxes, boxes, eps=1e-5)
+    score = pred_results.scores_3d.cpu().numpy()
+    label = pred_results.labels_3d.cpu().numpy()
+    selected_per_class = dict()
+
+    n = boxes_tensor.shape[0]
+    idx = list(range(n))
+    idx.sort(key=lambda x: score[x], reverse=True)
+    selected_idx = []
+    for i in idx:
+        if selected_per_class.get(label[i], 0) >= topk_per_class:
+            continue
+        if score[i] < score_thr:
+            continue
+        bo = False
+        for j in selected_idx:
+            if iou[i][j] > iou_thr:
+                bo = True
+                break
+        if not bo:
+            selected_idx.append(i)
+            if label[i] not in selected_per_class:
+                selected_per_class[label[i]] = 1
+            else:
+                selected_per_class[label[i]] += 1
+
+    return boxes_tensor[selected_idx], label[selected_idx]
+
+
 def draw_camera(camera_pose, camera_size=0.5, return_points=False):
+    """Draw the camera pose in the form of a cone.
+
+    Args:
+        camera_pose (numpy.ndarray): 4x4 camera pose from camera to world.
+        camera_size (float): Size of the camera cone. Defaults to 0.5.
+        return_points (bool): Whether to return the points of the camera cone.
+            Defaults to False.
+
+    Returns:
+        numpy.ndarray | :obj:`LineSet`:
+            if return_points is True, return the points of the camera cone.
+            Otherwise, return the camera cone as an open3d.LineSet.
+    """
     # camera_pose : 4*4 camera to world
     point = np.array([[0, 0, 0], [-camera_size, -camera_size, camera_size * 2],
                       [camera_size, -camera_size, camera_size * 2],

diff --git a/embodiedscan/visualizer/README.md b/embodiedscan/visualizer/README.md
@@ -0,0 +1,29 @@
+### EmbodiedScanBaseVisualizer Simple Tutorial
+
+To use visualizer, you need to specify the visualizer in the config. Add the following command to your config file.
+
+```Python
+visualizer = dict(type='EmbodiedScanBaseVisualizer', vis_backends=[dict(type='LocalVisBackend')], save_dir='temp_dir')
+```
+
+Then call the visualizer in models.
+
+```Python
+def predict(self, batch_inputs_dict, batch_data_samples, **kwargs):
+    x = self.extract_feat(batch_inputs_dict, batch_data_samples)
+    results_list = self.bbox_head.predict(x, batch_data_samples, **kwargs)
+    predictions = self.add_pred_to_datasample(batch_data_samples, results_list)
+
+    # visualization
+    from embodiedscan.visualizer import EmbodiedScanBaseVisualizer
+    visualizer = EmbodiedScanBaseVisualizer.get_current_instance()
+    visualizer.visualize_scene(predictions)
+
+    return predictions
+```
+
+The visualizer will apply Non-Maximum Suppression(NMS) to avoid redundant boxes in the visualization. You can specify its parameters by passing nms_args.
+
+```Python
+visualizer.visualize_scene(predictions, nms_args = dict(iou_thr = 0.15, score_thr = 0.075, topk_per_class = 10))
+```
diff --git a/embodiedscan/visualizer/__init__.py b/embodiedscan/visualizer/__init__.py
@@ -0,0 +1,3 @@
+from .base_visualizer import EmbodiedScanBaseVisualizer
+
+__all__ = ['EmbodiedScanBaseVisualizer']
diff --git a/embodiedscan/visualizer/base_visualizer.py b/embodiedscan/visualizer/base_visualizer.py
@@ -0,0 +1,132 @@
+import os
+
+from mmengine.dist import master_only
+from mmengine.visualization import Visualizer
+
+from embodiedscan.registry import VISUALIZERS
+
+try:
+    import open3d as o3d
+
+    from embodiedscan.visualization.utils import _9dof_to_box, nms_filter
+except ImportError:
+    o3d = None
+
+
+@VISUALIZERS.register_module()
+class EmbodiedScanBaseVisualizer(Visualizer):
+    """EmbodiedScan Base Visualizer. Method to visualize 3D scenes and Euler
+    boxes.
+
+    Args:
+        name (str): Name of the visualizer. Defaults to 'visualizer'.
+        save_dir (str, optional): Directory to save visualizations.
+            Defaults to None.
+        vis_backends (list[ConfigType], optional):
+            List of visualization backends to use. Defaluts to None.
+    """
+
+    def __init__(self,
+                 name: str = 'visualizer',
+                 save_dir: str = None,
+                 vis_backends=None) -> None:
+        super().__init__(name=name,
+                         vis_backends=vis_backends,
+                         save_dir=save_dir)
+
+        if o3d is None:
+            raise ImportError('Please install open3d.')
+
+    @staticmethod
+    def get_root_dir(img_path):
+        """Get the root directory of the dataset."""
+        if 'posed_images' in img_path:
+            return img_path.split('posed_images')[0]
+        if 'sequence' in img_path:
+            return img_path.split('sequence')[0]
+        if 'matterport_color_images' in img_path:
+            return img_path.split('matterport_color_images')[0]
+        raise ValueError('Custom datasets are not supported.')
+
+    @staticmethod
+    def get_ply(root_dir, scene_name):
+        """Get the path of the ply file."""
+        s = scene_name.split('/')
+        if len(s) == 2:
+            dataset, region = s
+        else:
+            dataset, building, region = s
+        if dataset == 'scannet':
+            filepath = os.path.join(root_dir, 'scans', region,
+                                    f'{region}_vh_clean.ply')
+        elif dataset == '3rscan':
+            filepath = os.path.join(root_dir, 'mesh.refined.v2.obj')
+        elif dataset == 'matterport3d':
+            filepath = os.path.join(root_dir, 'region_segmentations',
+                                    f'{region}.ply')
+        else:
+            raise NotImplementedError
+        return filepath
+
+    @master_only
+    def visualize_scene(self,
+                        data_samples,
+                        class_filter=None,
+                        nms_args=dict(iou_thr=0.15,
+                                      score_thr=0.075,
+                                      topk_per_class=10)):
+        """Visualize the 3D scene with 3D boxes.
+
+        Args:
+            data_samples (list[:obj:`Det3DDataSample`]):
+                The output of the model.
+            class_filter (int, optional): Class filter for visualization.
+                Default to None to show all classes.
+            nms_args (dict): NMS arguments for filtering boxes.
+                Defaults to dict(iou_thr = 0.15,
+                                 score_thr = 0.075,
+                                 topk_per_class = 10).
+        """
+        assert len(data_samples) == 1
+        data_sample = data_samples[0]
+
+        metainfo = data_sample.metainfo
+        pred = data_sample.pred_instances_3d
+        gt = data_sample.eval_ann_info
+
+        if not hasattr(pred, 'labels_3d'):
+            assert gt['gt_labels_3d'].shape[0] == 1
+            gt_label = gt['gt_labels_3d'][0].item()
+            _ = pred.bboxes_3d.tensor.shape[0]
+            pseudo_label = pred.bboxes_3d.tensor.new_ones(_, ) * gt_label
+            pred.labels_3d = pseudo_label
+        pred_box, pred_label = nms_filter(pred, **nms_args)
+
+        root_dir = self.get_root_dir(metainfo['img_path'][0])
+        ply_file = self.get_ply(root_dir, metainfo['scan_id'])
+        axis_align_matrix = metainfo['axis_align_matrix']
+
+        mesh = o3d.io.read_triangle_mesh(ply_file, True)
+        mesh.transform(axis_align_matrix)
+        frame = o3d.geometry.TriangleMesh.create_coordinate_frame()
+        boxes = []
+        # pred 3D box
+        n = pred_box.shape[0]
+        for i in range(n):
+            box = pred_box[i]
+            label = pred_label[i]
+            if class_filter is not None and label != class_filter:
+                continue
+            box_geo = _9dof_to_box(box, color=(255, 0, 0))
+            boxes.append(box_geo)
+        # gt 3D box
+        m = gt['gt_bboxes_3d'].tensor.shape[0]
+        for i in range(m):
+            box = gt['gt_bboxes_3d'].tensor[i]
+            label = gt['gt_labels_3d'][i]
+            if class_filter is not None and label != class_filter:
+                continue
+            box_geo = _9dof_to_box(box, color=(0, 255, 0))
+            boxes.append(box_geo)
+
+        o3d.visualization.draw_geometries([mesh, frame] + boxes)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from .base_visualizer import EmbodiedScanBaseVisualizer

		__all__ = ['EmbodiedScanBaseVisualizer']