diff --git a/README.md b/README.md index 5aef58e3e..93fb43ed6 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,58 @@ a = test_envs.action_space.sample() # Sample an action obs, reward, terminate, truncate, info = test_envs.step(a) # Step all environments with the sampled random actions ``` +## Accessing Single Goal Environments +You may wish to only access individual environments used in the Meta-World benchmark for your research. +We provide constructors for creating environments where the goal has been hidden (by zeroing out the goal in +the observation) and environments where the goal is observable. They are called GoalHidden and GoalObservable +environments respectively. + +You can access them in the following way: +```python +from metaworld.envs import (ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE, + ALL_V2_ENVIRONMENTS_GOAL_HIDDEN) + # these are ordered dicts where the key : value + # is env_name : env_constructor + +import numpy as np + +door_open_goal_observable_cls = ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE["door-open-v2-goal-observable"] +door_open_goal_hidden_cls = ALL_V2_ENVIRONMENTS_GOAL_HIDDEN["door-open-v2-goal-hidden"] + +env = door_open_goal_hidden_cls() +env.reset() # Reset environment +a = env.action_space.sample() # Sample an action +obs, reward, done, info = env.step(a) # Step the environment with the sampled random action +assert (obs[-3:] == np.zeros(3)).all() # goal will be zeroed out because env is HiddenGoal + +# You can choose to initialize the random seed of the environment. +# The state of your rng will remain unaffected after the environment is constructed. +env1 = door_open_goal_observable_cls(seed=5) +env2 = door_open_goal_observable_cls(seed=5) + +env1.reset() # Reset environment +env2.reset() +a1 = env1.action_space.sample() # Sample an action +a2 = env2.action_space.sample() +next_obs1, _, _, _ = env1.step(a1) # Step the environment with the sampled random action + +next_obs2, _, _, _ = env2.step(a2) +assert (next_obs1[-3:] == next_obs2[-3:]).all() # 2 envs initialized with the same seed will have the same goal +assert not (next_obs2[-3:] == np.zeros(3)).all() # The env's are goal observable, meaning the goal is not zero'd out + +env3 = door_open_goal_observable_cls(seed=10) # Construct an environment with a different seed +env1.reset() # Reset environment +env3.reset() +a1 = env1.action_space.sample() # Sample an action +a3 = env3.action_space.sample() +next_obs1, _, _, _ = env1.step(a1) # Step the environment with the sampled random action +next_obs3, _, _, _ = env3.step(a3) + +assert not (next_obs1[-3:] == next_obs3[-3:]).all() # 2 envs initialized with different seeds will have different goals +assert not (next_obs1[-3:] == np.zeros(3)).all() # The env's are goal observable, meaning the goal is not zero'd out + +``` + ## Citing Meta-World In progress ...