Skip to content

Commit

Permalink
Fixed bug in the action scale of the intervention actions and offline…
Browse files Browse the repository at this point in the history
… dataset actions. (scale by inverse delta)

Co-authored-by: Adil Zouitine <[email protected]>
  • Loading branch information
michel-aractingi and Adil Zouitine committed Feb 14, 2025
1 parent 36711d7 commit 7ae368e
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 16 deletions.
4 changes: 2 additions & 2 deletions lerobot/configs/policy/sac_real.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
# env.gym.obs_type=environment_state_agent_pos \

seed: 1
dataset_repo_id: aractingi/push_cube_square_offline_demo_cropped_resized
#aractingi/push_cube_square_light_offline_demo_cropped_resized
dataset_repo_id: aractingi/push_cube_overfit_cropped_resized
#aractingi/push_cube_square_offline_demo_cropped_resized

training:
# Offline training dataloader
Expand Down
4 changes: 4 additions & 0 deletions lerobot/scripts/server/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ def from_lerobot_dataset(
state_keys: Optional[Sequence[str]] = None,
capacity: Optional[int] = None,
action_mask: Optional[Sequence[int]] = None,
action_delta: Optional[float] = None,
) -> "ReplayBuffer":
"""
Convert a LeRobotDataset into a ReplayBuffer.
Expand Down Expand Up @@ -249,6 +250,9 @@ def from_lerobot_dataset(
else:
data["action"] = data["action"][:, action_mask]

if action_delta is not None:
data["action"] = data["action"] / action_delta

replay_buffer.add(
state=data["state"],
action=data["action"],
Expand Down
8 changes: 2 additions & 6 deletions lerobot/scripts/server/crop_dataset_roi.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,12 +260,8 @@ def convert_lerobot_dataset_to_cropper_lerobot_dataset(
rois = json.load(f)

# rois = {
# "observation.images.side": (92, 123, 379, 349),
# "observation.images.front": (109, 37, 361, 557),
# }
# rois = {
# "observation.images.front": [109, 37, 361, 557],
# "observation.images.side": [94, 161, 372, 315],
# "observation.images.front": [102, 43, 358, 523],
# "observation.images.side": [92, 123, 379, 349],
# }

# Print the selected rectangular ROIs
Expand Down
41 changes: 33 additions & 8 deletions lerobot/scripts/server/gym_manipulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,18 +213,18 @@ def step(

# When applying the delta action space, convert teleop absolute values to relative differences.
if self.use_delta_action_space:
teleop_action = teleop_action - self.current_joint_positions
if torch.any(teleop_action < -self.delta_relative_bounds_size * self.delta) and torch.any(
teleop_action > self.delta_relative_bounds_size
teleop_action = (teleop_action - self.current_joint_positions) / self.delta
if torch.any(teleop_action < -self.relative_bounds_size) and torch.any(
teleop_action > self.relative_bounds_size
):
logging.debug(
f"Relative teleop delta exceeded bounds {self.delta_relative_bounds_size}, teleop_action {teleop_action}\n"
f"lower bounds condition {teleop_action < -self.delta_relative_bounds_size}\n"
f"upper bounds condition {teleop_action > self.delta_relative_bounds_size}"
f"Relative teleop delta exceeded bounds {self.relative_bounds_size}, teleop_action {teleop_action}\n"
f"lower bounds condition {teleop_action < -self.relative_bounds_size}\n"
f"upper bounds condition {teleop_action > self.relative_bounds_size}"
)

teleop_action = torch.clamp(
teleop_action, -self.delta_relative_bounds_size, self.delta_relative_bounds_size
teleop_action, -self.relative_bounds_size, self.relative_bounds_size
)
# NOTE: To mimic the shape of a neural network output, we add a batch dimension to the teleop action.
if teleop_action.dim() == 1:
Expand Down Expand Up @@ -312,7 +312,7 @@ def step(self, action):
start_time = time.perf_counter()
with torch.inference_mode():
reward = (
self.reward_classifier.predict_reward(images, threshold=0.6)
self.reward_classifier.predict_reward(images, threshold=0.8)
if self.reward_classifier is not None
else 0.0
)
Expand Down Expand Up @@ -726,6 +726,24 @@ def get_classifier(pretrained_path, config_path, device="mps"):
return model


def replay_episode(env, repo_id, root=None, episode=0):
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset

local_files_only = root is not None
dataset = LeRobotDataset(repo_id, root=root, episodes=[episode], local_files_only=local_files_only)
actions = dataset.hf_dataset.select_columns("action")

for idx in range(dataset.num_frames):
start_episode_t = time.perf_counter()

action = actions[idx]["action"][:4]
print(action)
env.step((action / env.unwrapped.delta, False))

dt_s = time.perf_counter() - start_episode_t
busy_wait(1 / 10 - dt_s)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--fps", type=int, default=30, help="control frequency")
Expand Down Expand Up @@ -776,6 +794,9 @@ def get_classifier(pretrained_path, config_path, device="mps"):
parser.add_argument("--env-overrides", type=str, default=None, help="Overrides for the env yaml file")
parser.add_argument("--control-time-s", type=float, default=20, help="Maximum episode length in seconds")
parser.add_argument("--reset-follower-pos", type=int, default=1, help="Reset follower between episodes")
parser.add_argument("--replay-repo-id", type=str, default=None, help="Repo ID of the episode to replay")
parser.add_argument("--replay-root", type=str, default=None, help="Root of the dataset to replay")
parser.add_argument("--replay-episode", type=int, default=0, help="Episode to replay")
args = parser.parse_args()

robot_cfg = init_hydra_config(args.robot_path, args.robot_overrides)
Expand All @@ -795,6 +816,10 @@ def get_classifier(pretrained_path, config_path, device="mps"):

env.reset()

if args.replay_repo_id is not None:
replay_episode(env, args.replay_repo_id, root=args.replay_root, episode=args.replay_episode)
exit()

# Retrieve the robot's action space for joint commands.
action_space_robot = env.action_space.spaces[0]

Expand Down
1 change: 1 addition & 0 deletions lerobot/scripts/server/learner_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
device=device,
state_keys=cfg.policy.input_shapes.keys(),
action_mask=active_action_dims,
action_delta=cfg.env.wrapper.delta_action,
)
batch_size: int = batch_size // 2 # We will sample from both replay buffer

Expand Down

0 comments on commit 7ae368e

Please sign in to comment.