Skip to content

Commit

Permalink
[Feature] Support Visualizer (#35)
Browse files Browse the repository at this point in the history
* Implement portable eval script for server

* lint format

* implement visualizer for debugging

* Update README

* docstring

* Refine some docstrings.

* refine docstring

* refine docstring
  • Loading branch information
mxh1999 authored Apr 23, 2024
1 parent 7d79106 commit e144c4b
Show file tree
Hide file tree
Showing 8 changed files with 267 additions and 5 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,10 @@ To inference and evaluate the model (e.g., the checkpoint `work_dirs/mv-3ddet/ep
python tools/test.py configs/detection/mv-det3d_8xb4_embodiedscan-3d-284class-9dof.py work_dirs/mv-3ddet/epoch_12.pth --launcher="pytorch"
```

### Using Visualizer during inference

We provide EmbodiedScanBaseVisualizer to visualize the output of models during inference. Please refer to the [guide](embodiedscan/visualizer/README.md) for detail.

### Inference and Submit your Results

We preliminarily support format-only inference for multi-view 3D visual grounding. To achieve format-only inference during test, just set `format_only=True` in `test_evaluator` in the corresponding config like [here](https://github.com/OpenRobotLab/EmbodiedScan/blob/main/configs/grounding/mv-grounding_8xb12_embodiedscan-vg-9dof.py#L183). Then just run the test script like:
Expand Down
1 change: 1 addition & 0 deletions embodiedscan/datasets/embodiedscan_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def parse_data_info(self, info: dict) -> dict:
"""
info['box_type_3d'] = self.box_type_3d
info['axis_align_matrix'] = self._get_axis_align_matrix(info)
info['scan_id'] = info['sample_idx']
ann_dataset = info['sample_idx'].split('/')[0]
if ann_dataset == 'matterport3d':
info['depth_shift'] = 4000.0
Expand Down
2 changes: 1 addition & 1 deletion embodiedscan/datasets/transforms/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def __init__(
'cam2global', 'crop_offset', 'img_crop_offset', 'resize_img_shape',
'lidar2cam', 'ori_lidar2img', 'num_ref_frames', 'num_views',
'ego2global', 'fov_ori2aug', 'ego2cam', 'axis_align_matrix',
'text', 'tokens_positive')):
'text', 'tokens_positive', 'scan_id')):
self.keys = keys
self.meta_keys = meta_keys

Expand Down
9 changes: 9 additions & 0 deletions embodiedscan/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from mmengine import MODELS as MMENGINE_MODELS
from mmengine import TASK_UTILS as MMENGINE_TASK_UTILS
from mmengine import TRANSFORMS as MMENGINE_TRANSFORMS
from mmengine import VISBACKENDS as MMENGINE_VISBACKENDS
from mmengine import VISUALIZERS as MMENGINE_VISUALIZERS
from mmengine import Registry

MODELS = Registry('model',
Expand All @@ -20,3 +22,10 @@
TASK_UTILS = Registry('task util',
parent=MMENGINE_TASK_UTILS,
locations=['embodiedscan.models'])
VISUALIZERS = Registry('visualizer',
parent=MMENGINE_VISUALIZERS,
locations=['embodiedscan.visualizer'])
# manage visualizer backend
VISBACKENDS = Registry('vis_backend',
parent=MMENGINE_VISBACKENDS,
locations=['embodiedscan.visualizer'])
92 changes: 88 additions & 4 deletions embodiedscan/visualization/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import cv2
import numpy as np
import open3d as o3d
from torch import Tensor

from .line_mesh import LineMesh

Expand Down Expand Up @@ -39,23 +40,106 @@ def _box_add_thickness(box, thickness):
return results


def _9dof_to_box(box, label, color_selector):
def _9dof_to_box(box, label=None, color_selector=None, color=None):
"""Convert 9-DoF box from array/tensor to open3d.OrientedBoundingBox.
Args:
box (numpy.ndarray|torch.Tensor|List[float]):
9-DoF box with shape (9,).
label (int, optional): Label of the box. Defaults to None.
color_selector (:obj:`ColorSelector`, optional):
Color selector for boxes. Defaults to None.
color (tuple[int], optional): Color of the box.
You can directly specify the color.
If you do, the color_selector and label will be ignored.
Defaults to None.
"""
if isinstance(box, list):
box = np.array(box)
if isinstance(box, Tensor):
box = box.cpu().numpy()
center = box[:3].reshape(3, 1)
scale = box[3:6].reshape(3, 1)
rot = box[6:].reshape(3, 1)
rot_mat = o3d.geometry.OrientedBoundingBox.get_rotation_matrix_from_zxy(
rot)
geo = o3d.geometry.OrientedBoundingBox(center, rot_mat, scale)

color = color_selector.get_color(label)
color = [x / 255.0 for x in color]
geo.color = color
if color is not None:
geo.color = [x / 255.0 for x in color]
return geo

if label is not None and color_selector is not None:
color = color_selector.get_color(label)
color = [x / 255.0 for x in color]
geo.color = color
return geo


def nms_filter(pred_results, iou_thr=0.15, score_thr=0.075, topk_per_class=10):
"""Non-Maximum Suppression for 3D Euler boxes. Additionally, only the top-k
boxes will be kept for each category to avoid redundant boxes in the
visualization.
Args:
pred_results (:obj:`InstanceData`):
Results predicted by the model.
iou_thr (float): IoU thresholds for NMS. Defaults to 0.15.
score_thr (float): Score thresholds.
Instances with scores below thresholds will not be kept.
Defaults to 0.075.
topk_per_class (int): Number of instances kept per category.
Defaults to 10.
Returns:
numpy.ndarray[float], np.ndarray[int]:
Filtered boxes with shape (N, 9) and labels with shape (N,).
"""
boxes = pred_results.bboxes_3d
boxes_tensor = boxes.tensor.cpu().numpy()
iou = boxes.overlaps(boxes, boxes, eps=1e-5)
score = pred_results.scores_3d.cpu().numpy()
label = pred_results.labels_3d.cpu().numpy()
selected_per_class = dict()

n = boxes_tensor.shape[0]
idx = list(range(n))
idx.sort(key=lambda x: score[x], reverse=True)
selected_idx = []
for i in idx:
if selected_per_class.get(label[i], 0) >= topk_per_class:
continue
if score[i] < score_thr:
continue
bo = False
for j in selected_idx:
if iou[i][j] > iou_thr:
bo = True
break
if not bo:
selected_idx.append(i)
if label[i] not in selected_per_class:
selected_per_class[label[i]] = 1
else:
selected_per_class[label[i]] += 1

return boxes_tensor[selected_idx], label[selected_idx]


def draw_camera(camera_pose, camera_size=0.5, return_points=False):
"""Draw the camera pose in the form of a cone.
Args:
camera_pose (numpy.ndarray): 4x4 camera pose from camera to world.
camera_size (float): Size of the camera cone. Defaults to 0.5.
return_points (bool): Whether to return the points of the camera cone.
Defaults to False.
Returns:
numpy.ndarray | :obj:`LineSet`:
if return_points is True, return the points of the camera cone.
Otherwise, return the camera cone as an open3d.LineSet.
"""
# camera_pose : 4*4 camera to world
point = np.array([[0, 0, 0], [-camera_size, -camera_size, camera_size * 2],
[camera_size, -camera_size, camera_size * 2],
Expand Down
29 changes: 29 additions & 0 deletions embodiedscan/visualizer/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
### EmbodiedScanBaseVisualizer Simple Tutorial

To use visualizer, you need to specify the visualizer in the config. Add the following command to your config file.

```Python
visualizer = dict(type='EmbodiedScanBaseVisualizer', vis_backends=[dict(type='LocalVisBackend')], save_dir='temp_dir')
```

Then call the visualizer in models.

```Python
def predict(self, batch_inputs_dict, batch_data_samples, **kwargs):
x = self.extract_feat(batch_inputs_dict, batch_data_samples)
results_list = self.bbox_head.predict(x, batch_data_samples, **kwargs)
predictions = self.add_pred_to_datasample(batch_data_samples, results_list)

# visualization
from embodiedscan.visualizer import EmbodiedScanBaseVisualizer
visualizer = EmbodiedScanBaseVisualizer.get_current_instance()
visualizer.visualize_scene(predictions)

return predictions
```

The visualizer will apply Non-Maximum Suppression(NMS) to avoid redundant boxes in the visualization. You can specify its parameters by passing nms_args.

```Python
visualizer.visualize_scene(predictions, nms_args = dict(iou_thr = 0.15, score_thr = 0.075, topk_per_class = 10))
```
3 changes: 3 additions & 0 deletions embodiedscan/visualizer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .base_visualizer import EmbodiedScanBaseVisualizer

__all__ = ['EmbodiedScanBaseVisualizer']
132 changes: 132 additions & 0 deletions embodiedscan/visualizer/base_visualizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import os

from mmengine.dist import master_only
from mmengine.visualization import Visualizer

from embodiedscan.registry import VISUALIZERS

try:
import open3d as o3d

from embodiedscan.visualization.utils import _9dof_to_box, nms_filter
except ImportError:
o3d = None


@VISUALIZERS.register_module()
class EmbodiedScanBaseVisualizer(Visualizer):
"""EmbodiedScan Base Visualizer. Method to visualize 3D scenes and Euler
boxes.
Args:
name (str): Name of the visualizer. Defaults to 'visualizer'.
save_dir (str, optional): Directory to save visualizations.
Defaults to None.
vis_backends (list[ConfigType], optional):
List of visualization backends to use. Defaluts to None.
"""

def __init__(self,
name: str = 'visualizer',
save_dir: str = None,
vis_backends=None) -> None:
super().__init__(name=name,
vis_backends=vis_backends,
save_dir=save_dir)

if o3d is None:
raise ImportError('Please install open3d.')

@staticmethod
def get_root_dir(img_path):
"""Get the root directory of the dataset."""
if 'posed_images' in img_path:
return img_path.split('posed_images')[0]
if 'sequence' in img_path:
return img_path.split('sequence')[0]
if 'matterport_color_images' in img_path:
return img_path.split('matterport_color_images')[0]
raise ValueError('Custom datasets are not supported.')

@staticmethod
def get_ply(root_dir, scene_name):
"""Get the path of the ply file."""
s = scene_name.split('/')
if len(s) == 2:
dataset, region = s
else:
dataset, building, region = s
if dataset == 'scannet':
filepath = os.path.join(root_dir, 'scans', region,
f'{region}_vh_clean.ply')
elif dataset == '3rscan':
filepath = os.path.join(root_dir, 'mesh.refined.v2.obj')
elif dataset == 'matterport3d':
filepath = os.path.join(root_dir, 'region_segmentations',
f'{region}.ply')
else:
raise NotImplementedError
return filepath

@master_only
def visualize_scene(self,
data_samples,
class_filter=None,
nms_args=dict(iou_thr=0.15,
score_thr=0.075,
topk_per_class=10)):
"""Visualize the 3D scene with 3D boxes.
Args:
data_samples (list[:obj:`Det3DDataSample`]):
The output of the model.
class_filter (int, optional): Class filter for visualization.
Default to None to show all classes.
nms_args (dict): NMS arguments for filtering boxes.
Defaults to dict(iou_thr = 0.15,
score_thr = 0.075,
topk_per_class = 10).
"""
assert len(data_samples) == 1
data_sample = data_samples[0]

metainfo = data_sample.metainfo
pred = data_sample.pred_instances_3d
gt = data_sample.eval_ann_info

if not hasattr(pred, 'labels_3d'):
assert gt['gt_labels_3d'].shape[0] == 1
gt_label = gt['gt_labels_3d'][0].item()
_ = pred.bboxes_3d.tensor.shape[0]
pseudo_label = pred.bboxes_3d.tensor.new_ones(_, ) * gt_label
pred.labels_3d = pseudo_label
pred_box, pred_label = nms_filter(pred, **nms_args)

root_dir = self.get_root_dir(metainfo['img_path'][0])
ply_file = self.get_ply(root_dir, metainfo['scan_id'])
axis_align_matrix = metainfo['axis_align_matrix']

mesh = o3d.io.read_triangle_mesh(ply_file, True)
mesh.transform(axis_align_matrix)
frame = o3d.geometry.TriangleMesh.create_coordinate_frame()
boxes = []
# pred 3D box
n = pred_box.shape[0]
for i in range(n):
box = pred_box[i]
label = pred_label[i]
if class_filter is not None and label != class_filter:
continue
box_geo = _9dof_to_box(box, color=(255, 0, 0))
boxes.append(box_geo)
# gt 3D box
m = gt['gt_bboxes_3d'].tensor.shape[0]
for i in range(m):
box = gt['gt_bboxes_3d'].tensor[i]
label = gt['gt_labels_3d'][i]
if class_filter is not None and label != class_filter:
continue
box_geo = _9dof_to_box(box, color=(0, 255, 0))
boxes.append(box_geo)

o3d.visualization.draw_geometries([mesh, frame] + boxes)

0 comments on commit e144c4b

Please sign in to comment.