From dfbfba45a4b33998a33bb3a07a4d335f293c30d3 Mon Sep 17 00:00:00 2001
From: Jeremy Leibs <jeremy@rerun.io>
Date: Thu, 17 Oct 2024 17:14:27 -0400
Subject: [PATCH 1/3] Upgrade rerun-sdk to 0.19.0

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 47e982d1b..ae31efcde 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -57,7 +57,7 @@ pytest-cov = {version = ">=5.0.0", optional = true}
 datasets = ">=2.19.0"
 imagecodecs = { version = ">=2024.1.1", optional = true }
 pyav = ">=12.0.5"
-rerun-sdk = ">=0.15.1"
+rerun-sdk = ">=0.19.0"
 deepdiff = ">=7.0.1"
 flask = ">=3.0.3"
 pandas = {version = ">=2.2.2", optional = true}

From 54ff7c101093b8ce3afb348f21925346ee82473c Mon Sep 17 00:00:00 2001
From: Jeremy Leibs <jeremy@rerun.io>
Date: Thu, 17 Oct 2024 17:14:54 -0400
Subject: [PATCH 2/3] Send video files directly to Rerun viewer

---
 lerobot/common/datasets/lerobot_dataset.py |  2 +-
 lerobot/scripts/visualize_dataset.py       | 38 ++++++++++++++++++++--
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index eb76f78d6..a5edfdba2 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -144,7 +144,7 @@ def __getitem__(self, idx):
                 self.tolerance_s,
             )
 
-        if self.video:
+        if self.video and self.video_backend != "raw":
             item = load_from_videos(
                 item,
                 self.video_frame_keys,
diff --git a/lerobot/scripts/visualize_dataset.py b/lerobot/scripts/visualize_dataset.py
index 6cff5752a..87092cad7 100644
--- a/lerobot/scripts/visualize_dataset.py
+++ b/lerobot/scripts/visualize_dataset.py
@@ -70,6 +70,7 @@
 
 import numpy as np
 import rerun as rr
+import rerun.blueprint as rrb
 import torch
 import torch.utils.data
 import tqdm
@@ -110,6 +111,7 @@ def visualize_dataset(
     save: bool = False,
     root: Path | None = None,
     output_dir: Path | None = None,
+    decode_video: bool = False,
 ) -> Path | None:
     if save:
         assert (
@@ -117,7 +119,7 @@ def visualize_dataset(
         ), "Set an output directory where to write .rrd files with `--output-dir path/to/directory`."
 
     logging.info("Loading dataset")
-    dataset = LeRobotDataset(repo_id, root=root)
+    dataset = LeRobotDataset(repo_id, root=root, video_backend=None if decode_video else "raw")
 
     logging.info("Loading dataloader")
     episode_sampler = EpisodeSampler(dataset, episode_index)
@@ -146,6 +148,17 @@ def visualize_dataset(
 
     logging.info("Logging to Rerun")
 
+    data_dir = dataset.videos_dir.parent
+    sent_videos = {}
+
+    # Video file heuristic doesn't trigger the correct layout, so set up a blueprint
+    # manually.
+    blueprint = rrb.Vertical(
+        rrb.Grid(contents=[rrb.Spatial2DView(origin=key) for key in dataset.camera_keys]),
+        rrb.TimeSeriesView(),
+    )
+    rr.send_blueprint(blueprint, make_active=False)
+
     for batch in tqdm.tqdm(dataloader, total=len(dataloader)):
         # iterate over the batch
         for i in range(len(batch["index"])):
@@ -154,8 +167,18 @@ def visualize_dataset(
 
             # display each camera image
             for key in dataset.camera_keys:
-                # TODO(rcadene): add `.compress()`? is it lossless?
-                rr.log(key, rr.Image(to_hwc_uint8_numpy(batch[key][i])))
+                if decode_video:
+                    rr.log(key, rr.Image(to_hwc_uint8_numpy(batch[key][i])))
+                else:
+                    if sent_videos.get(key) != batch[key]["path"][i]:
+                        sent_videos[key] = batch[key]["path"][i]
+                        rr.log(key, rr.AssetVideo(path=data_dir / batch[key]["path"][i]))
+                    rr.log(
+                        key,
+                        rr.VideoFrameReference(
+                            timestamp=rr.components.VideoTimestamp(seconds=batch[key]["timestamp"][i])
+                        ),
+                    )
 
             # display each dimension of action space (e.g. actuators command)
             if "action" in batch:
@@ -266,6 +289,15 @@ def main():
             "Visualize the data by running `rerun path/to/file.rrd` on your local machine."
         ),
     )
+    parser.add_argument(
+        "--decode-video",
+        action="store_true",
+        default=False,
+        help=(
+            "Decode the video frames into images."
+            "By default videos are sent to the viewer for direct visualization."
+        ),
+    )
 
     args = parser.parse_args()
     visualize_dataset(**vars(args))

From 3e7bb8a387e92057ea6109d7184b593a4b17319e Mon Sep 17 00:00:00 2001
From: Jeremy Leibs <jeremy@rerun.io>
Date: Thu, 17 Oct 2024 17:25:39 -0400
Subject: [PATCH 3/3] Support datasets without video

---
 lerobot/scripts/visualize_dataset.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lerobot/scripts/visualize_dataset.py b/lerobot/scripts/visualize_dataset.py
index 87092cad7..933420a6d 100644
--- a/lerobot/scripts/visualize_dataset.py
+++ b/lerobot/scripts/visualize_dataset.py
@@ -148,7 +148,6 @@ def visualize_dataset(
 
     logging.info("Logging to Rerun")
 
-    data_dir = dataset.videos_dir.parent
     sent_videos = {}
 
     # Video file heuristic doesn't trigger the correct layout, so set up a blueprint
@@ -167,18 +166,20 @@ def visualize_dataset(
 
             # display each camera image
             for key in dataset.camera_keys:
-                if decode_video:
+                if isinstance(batch[key], torch.Tensor):
                     rr.log(key, rr.Image(to_hwc_uint8_numpy(batch[key][i])))
-                else:
+                elif "path" in batch[key] and "timestamp" in batch[key]:
                     if sent_videos.get(key) != batch[key]["path"][i]:
                         sent_videos[key] = batch[key]["path"][i]
-                        rr.log(key, rr.AssetVideo(path=data_dir / batch[key]["path"][i]))
+                        rr.log(key, rr.AssetVideo(path=dataset.videos_dir.parent / batch[key]["path"][i]))
                     rr.log(
                         key,
                         rr.VideoFrameReference(
                             timestamp=rr.components.VideoTimestamp(seconds=batch[key]["timestamp"][i])
                         ),
                     )
+                else:
+                    logging.warning(f"Unsupported image schema for key {key}")
 
             # display each dimension of action space (e.g. actuators command)
             if "action" in batch: