huggingface · jleibs · Oct 17, 2024 · Oct 17, 2024 · Oct 17, 2024
diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
@@ -144,7 +144,7 @@ def __getitem__(self, idx):
                 self.tolerance_s,
             )
 
-        if self.video:
+        if self.video and self.video_backend != "raw":
             item = load_from_videos(
                 item,
                 self.video_frame_keys,

diff --git a/lerobot/scripts/visualize_dataset.py b/lerobot/scripts/visualize_dataset.py
@@ -70,6 +70,7 @@
 
 import numpy as np
 import rerun as rr
+import rerun.blueprint as rrb
 import torch
 import torch.utils.data
 import tqdm
@@ -110,14 +111,15 @@ def visualize_dataset(
     save: bool = False,
     root: Path | None = None,
     output_dir: Path | None = None,
+    decode_video: bool = False,
 ) -> Path | None:
     if save:
         assert (
             output_dir is not None
         ), "Set an output directory where to write .rrd files with `--output-dir path/to/directory`."
 
     logging.info("Loading dataset")
-    dataset = LeRobotDataset(repo_id, root=root)
+    dataset = LeRobotDataset(repo_id, root=root, video_backend=None if decode_video else "raw")
 
     logging.info("Loading dataloader")
     episode_sampler = EpisodeSampler(dataset, episode_index)
@@ -146,6 +148,16 @@ def visualize_dataset(
 
     logging.info("Logging to Rerun")
 
+    sent_videos = {}
+
+    # Video file heuristic doesn't trigger the correct layout, so set up a blueprint
+    # manually.
+    blueprint = rrb.Vertical(
+        rrb.Grid(contents=[rrb.Spatial2DView(origin=key) for key in dataset.camera_keys]),
+        rrb.TimeSeriesView(),
+    )
+    rr.send_blueprint(blueprint, make_active=False)
+
     for batch in tqdm.tqdm(dataloader, total=len(dataloader)):
         # iterate over the batch
         for i in range(len(batch["index"])):
@@ -154,8 +166,20 @@ def visualize_dataset(
 
             # display each camera image
             for key in dataset.camera_keys:
-                # TODO(rcadene): add `.compress()`? is it lossless?
-                rr.log(key, rr.Image(to_hwc_uint8_numpy(batch[key][i])))
+                if isinstance(batch[key], torch.Tensor):
+                    rr.log(key, rr.Image(to_hwc_uint8_numpy(batch[key][i])))
+                elif "path" in batch[key] and "timestamp" in batch[key]:
+                    if sent_videos.get(key) != batch[key]["path"][i]:
+                        sent_videos[key] = batch[key]["path"][i]
+                        rr.log(key, rr.AssetVideo(path=dataset.videos_dir.parent / batch[key]["path"][i]))
+                    rr.log(
+                        key,
+                        rr.VideoFrameReference(
+                            timestamp=rr.components.VideoTimestamp(seconds=batch[key]["timestamp"][i])
+                        ),
+                    )
+                else:
+                    logging.warning(f"Unsupported image schema for key {key}")
 
             # display each dimension of action space (e.g. actuators command)
             if "action" in batch:
@@ -266,6 +290,15 @@ def main():
             "Visualize the data by running `rerun path/to/file.rrd` on your local machine."
         ),
     )
+    parser.add_argument(
+        "--decode-video",
+        action="store_true",
+        default=False,
+        help=(
+            "Decode the video frames into images."
+            "By default videos are sent to the viewer for direct visualization."
+        ),
+    )
 
     args = parser.parse_args()
     visualize_dataset(**vars(args))

diff --git a/pyproject.toml b/pyproject.toml
@@ -57,7 +57,7 @@ pytest-cov = {version = ">=5.0.0", optional = true}
 datasets = ">=2.19.0"
 imagecodecs = { version = ">=2024.1.1", optional = true }
 pyav = ">=12.0.5"
-rerun-sdk = ">=0.15.1"
+rerun-sdk = ">=0.19.0"
 deepdiff = ">=7.0.1"
 flask = ">=3.0.3"
 pandas = {version = ">=2.2.2", optional = true}