Skip to content

Commit

Permalink
[Feature] Support for continuous occupancy rendering demo (#62)
Browse files Browse the repository at this point in the history
* Implement portable eval script for server

* lint format

* implement visualizer for debugging

* Update README

* docstring

* Refine some docstrings.

* refine docstring

* refine docstring

* update README for single gpu training/testing

* fix: rotate yaw instead of pitch

* fix: scale for euler 3d box

* lint format

* Minor fixes: 1) some info may have inaccurate tokens_positive 2) create_positive_maps in batch > 1 training

* Visual test complete. Need Full pipline test

* Pass Pipeline test

* Lint

---------

Co-authored-by: Tai-Wang <[email protected]>
  • Loading branch information
mxh1999 and Tai-Wang authored Aug 19, 2024
1 parent 1084674 commit 3698069
Show file tree
Hide file tree
Showing 4 changed files with 307 additions and 34 deletions.
58 changes: 41 additions & 17 deletions demo/demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,15 @@
" model.eval()\n",
" return model\n",
"\n",
"config_path = '../config/detection/embodied-det3d_8xb1_embodiedscan-3d-284class-9dof-mlvl.py'\n",
"config_path = '../configs/detection/cont-det3d_8xb1_embodiedscan-3d-284class-9dof.py'\n",
"checkpoint_path = '../ckpt/continuous.pth'\n",
"device = 'cuda:0'\n",
"\n",
"# Occupancy Settings\n",
"# config_path = '../configs/occupancy/cont-occ_8xb1_embodiedscan-occ-80class.py'\n",
"# checkpoint_path = '../ckpt/continuous_occupancy.pth'\n",
"# device = 'cuda:0'\n",
"\n",
"model = init_model(config_path, checkpoint_path, device=device)\n",
"cfg = model.cfg\n",
"classes = list(cfg.metainfo.classes)"
Expand Down Expand Up @@ -175,7 +180,10 @@
" ann_info=dict( # empty annotation\n",
" gt_bboxes_3d=np.zeros((0, 9), dtype=np.float32),\n",
" gt_labels_3d=np.zeros((0, ), dtype=np.int64),\n",
" visible_instance_masks=[[] for i in range(len(poses))]))\n",
" visible_instance_masks=[[] for i in range(len(poses))],\n",
" gt_occupancy=np.zeros((0,4), dtype=np.int64),\n",
" visible_occupancy_masks=[[] for i in range(len(poses))]\n",
" ))\n",
"n_frames = len(poses)\n",
"data = []\n",
"for i in range(1, n_frames):\n",
Expand Down Expand Up @@ -291,10 +299,19 @@
"\n",
" return boxes_tensor[selected_idx], label[selected_idx]\n",
"\n",
"is_occupancy = ('pred_occupancy' in results[0])\n",
"if is_occupancy:\n",
" classes = ['empty'] + classes # 0 = empty for occupancy\n",
"\n",
"filtered_results = []\n",
"for i in range(len(results)):\n",
" boxes, labels = nms_filter(results[i].pred_instances_3d)\n",
" filtered_results.append((boxes, labels))\n",
"if not is_occupancy:\n",
" for i in range(len(results)):\n",
" boxes, labels = nms_filter(results[i].pred_instances_3d)\n",
" filtered_results.append((boxes, labels))\n",
"else:\n",
" for i in range(len(results)):\n",
" pred_occ = results[i].pred_occupancy.cpu().numpy()\n",
" filtered_results.append(pred_occ)\n",
"\n",
"selected_image = [\n",
" info['img_path'].index(img_path)\n",
Expand All @@ -309,12 +326,15 @@
"for i in range(len(results)):\n",
" image_ann = info['images'][selected_image[i]]\n",
" image_ann['visible_instance_ids'] = []\n",
" boxes, labels = filtered_results[i]\n",
" for j in range(boxes.shape[0]):\n",
" pseudo_ann['instances'].append(\n",
" dict(bbox_3d=boxes[j], bbox_label_3d=labels[j]))\n",
" instance_id = len(pseudo_ann['instances']) - 1\n",
" image_ann['visible_instance_ids'].append(instance_id)\n",
" if is_occupancy:\n",
" image_ann['pred_occupancy'] = filtered_results[i]\n",
" else:\n",
" boxes, labels = filtered_results[i]\n",
" for j in range(boxes.shape[0]):\n",
" pseudo_ann['instances'].append(\n",
" dict(bbox_3d=boxes[j], bbox_label_3d=labels[j]))\n",
" instance_id = len(pseudo_ann['instances']) - 1\n",
" image_ann['visible_instance_ids'].append(instance_id)\n",
" pseudo_ann['images'].append(image_ann)\n",
"\n",
"metainfo = {'categories': classes}\n",
Expand Down Expand Up @@ -351,7 +371,10 @@
"from embodiedscan.explorer import EmbodiedScanExplorer\n",
"visualizer = EmbodiedScanExplorer(data_root={'demo': root_dir},\n",
" ann_file=[packed_pseudo_ann])\n",
"visualizer.render_continuous_scene(f'demo/{scene_name}')"
"if not is_occupancy:\n",
" visualizer.render_continuous_scene(f'demo/{scene_name}')\n",
"else:\n",
" visualizer.render_continuous_occupancy_prediction(f'demo/{scene_name}')"
]
},
{
Expand Down Expand Up @@ -861,11 +884,12 @@
}
],
"source": [
"for i in range(len(results)):\n",
" cam_name = pseudo_ann['images'][i]['img_path'].split('/')[-1][:-4]\n",
" visualizer.show_image(f'demo/{scene_name}',\n",
" camera_name=cam_name,\n",
" render_box=True)"
"if not is_occupancy:\n",
" for i in range(len(results)):\n",
" cam_name = pseudo_ann['images'][i]['img_path'].split('/')[-1][:-4]\n",
" visualizer.show_image(f'demo/{scene_name}',\n",
" camera_name=cam_name,\n",
" render_box=True)"
]
}
],
Expand Down
49 changes: 33 additions & 16 deletions demo/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,9 @@ def main(args):
ann_info=dict( # empty annotation
gt_bboxes_3d=np.zeros((0, 9), dtype=np.float32),
gt_labels_3d=np.zeros((0, ), dtype=np.int64),
visible_instance_masks=[[] for i in range(len(poses))]))
visible_instance_masks=[[] for i in range(len(poses))],
gt_occupancy=np.zeros((0, 4), dtype=np.int64),
visible_occupancy_masks=[[] for i in range(len(poses))]))
n_frames = len(poses)
data = []
for i in range(1, n_frames):
Expand Down Expand Up @@ -208,10 +210,19 @@ def main(args):
torch.cuda.empty_cache()

# collect results and construct data for visualization
is_occupancy = ('pred_occupancy' in results[0])
if is_occupancy:
classes = ['empty'] + classes # 0 = empty for occupancy

filtered_results = []
for i in range(len(results)):
boxes, labels = nms_filter(results[i].pred_instances_3d)
filtered_results.append((boxes, labels))
if not is_occupancy:
for i in range(len(results)):
boxes, labels = nms_filter(results[i].pred_instances_3d)
filtered_results.append((boxes, labels))
else:
for i in range(len(results)):
pred_occ = results[i].pred_occupancy.cpu().numpy()
filtered_results.append(pred_occ)

selected_image = [
info['img_path'].index(img_path)
Expand All @@ -226,12 +237,15 @@ def main(args):
for i in range(len(results)):
image_ann = info['images'][selected_image[i]]
image_ann['visible_instance_ids'] = []
boxes, labels = filtered_results[i]
for j in range(boxes.shape[0]):
pseudo_ann['instances'].append(
dict(bbox_3d=boxes[j], bbox_label_3d=labels[j]))
instance_id = len(pseudo_ann['instances']) - 1
image_ann['visible_instance_ids'].append(instance_id)
if is_occupancy:
image_ann['pred_occupancy'] = filtered_results[i]
else:
boxes, labels = filtered_results[i]
for j in range(boxes.shape[0]):
pseudo_ann['instances'].append(
dict(bbox_3d=boxes[j], bbox_label_3d=labels[j]))
instance_id = len(pseudo_ann['instances']) - 1
image_ann['visible_instance_ids'].append(instance_id)
pseudo_ann['images'].append(image_ann)

metainfo = {'categories': classes}
Expand All @@ -240,12 +254,15 @@ def main(args):
# visualization
visualizer = EmbodiedScanExplorer(data_root={'demo': args.root_dir},
ann_file=[packed_pseudo_ann])
visualizer.render_continuous_scene(f'demo/{args.scene}')
for i in range(len(results)):
cam_name = pseudo_ann['images'][i]['img_path'].split('/')[-1][:-4]
visualizer.show_image(f'demo/{args.scene}',
camera_name=cam_name,
render_box=True)
if not is_occupancy:
visualizer.render_continuous_scene(f'demo/{args.scene}')
for i in range(len(results)):
cam_name = pseudo_ann['images'][i]['img_path'].split('/')[-1][:-4]
visualizer.show_image(f'demo/{args.scene}',
camera_name=cam_name,
render_box=True)
else:
visualizer.render_continuous_occupancy_prediction(f'demo/{args.scene}')


if __name__ == '__main__':
Expand Down
56 changes: 55 additions & 1 deletion embodiedscan/explorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

from embodiedscan.visualization.color_selector import ColorMap
from embodiedscan.visualization.continuous_drawer import (
ContinuousDrawer, ContinuousOccupancyDrawer)
ContinuousDrawer, ContinuousOccupancyDrawer,
ContinuousPredictionOccupancyDrawer)
from embodiedscan.visualization.img_drawer import ImageDrawer
from embodiedscan.visualization.utils import _9dof_to_box, _box_add_thickness

Expand Down Expand Up @@ -382,6 +383,59 @@ def render_continuous_occupancy(self, scene_name, start_cam=None):
self.color_selector, start_idx)
drawer.begin()

def render_continuous_occupancy_prediction(self,
scene_name,
start_cam=None):
"""Render occupancy prediction with continuous ego-centric
observations.
Args:
scene_name (str): Scene name.
start_cam (str, optional): Camera frame from which the rendering
starts. Defaults to None, corresponding to the first frame.
"""
s = scene_name.split('/')
if len(s) == 2:
dataset, region = s
else:
dataset, building, region = s

selected_scene = None
start_idx = -1
for scene in self.data:
if scene['sample_idx'] == scene_name:
selected_scene = scene
if start_cam is not None:
start_idx = -1
for i, img in enumerate(scene['images']):
img_path = img['img_path']
if dataset == 'scannet':
cam_name = img_path.split('/')[-1][:-4]
elif dataset == '3rscan':
cam_name = img_path.split('/')[-1][:-10]
elif dataset == 'matterport3d':
cam_name = img_path.split(
'/')[-1][:-8] + img_path.split('/')[-1][-7:-4]
else:
cam_name = img_path.split('/')[-1][:-4]
if cam_name == start_cam:
start_idx = i
break
if start_idx == -1:
print('No such camera')
return
else:
start_idx = 0

if selected_scene is None:
print('No such scene')
return

drawer = ContinuousPredictionOccupancyDrawer(
dataset, self.data_root[dataset], selected_scene, self.classes,
self.id_to_index, self.color_selector, start_idx)
drawer.begin()

def render_occupancy(self, scene_name):
"""Render the occupancy annotation of a given scene.
Expand Down
Loading

0 comments on commit 3698069

Please sign in to comment.