Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Gymnasium 0.29 #73

Merged
merged 6 commits into from
Jul 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,9 @@ dmypy.json

# Pyre type checker
.pyre/

# vscode
.vscode/

# videos
videos/
4 changes: 3 additions & 1 deletion mo_gymnasium/envs/reacher/reacher.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np
from gymnasium import spaces
from gymnasium.utils import EzPickle
from gymnasium.utils import EzPickle, seeding
from pybulletgym.envs.roboschool.envs.env_bases import BaseBulletEnv
from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot
from pybulletgym.envs.roboschool.scenes.scene_bases import SingleRobotEmptyScene
Expand Down Expand Up @@ -87,6 +87,8 @@ def camera_adjust(self):

def reset(self, seed=None, **kwargs):
self._seed(seed)
if seed is not None:
self._np_random, seed = seeding.np_random(seed)
obs = super().reset()
if self.render_mode == "human":
self._render(mode="human")
Expand Down
32 changes: 15 additions & 17 deletions mo_gymnasium/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@

import gymnasium as gym
import numpy as np
from gymnasium.utils import EzPickle
from gymnasium.vector import SyncVectorEnv
from gymnasium.wrappers import RecordEpisodeStatistics
from gymnasium.wrappers.normalize import RunningMeanStd
from gymnasium.wrappers.record_episode_statistics import RecordEpisodeStatistics


ObsType = TypeVar("ObsType")
Expand All @@ -29,7 +28,7 @@ def make(env_name: str, disable_env_checker: bool = True, **kwargs) -> gym.Env:
return gym.make(env_name, disable_env_checker=disable_env_checker, **kwargs)


class LinearReward(gym.Wrapper, EzPickle):
class LinearReward(gym.Wrapper, gym.utils.RecordConstructorArgs):
"""Makes the env return a scalar reward, which is the dot-product between the reward vector and the weight vector."""

def __init__(self, env: gym.Env, weight: np.ndarray = None):
Expand All @@ -39,8 +38,8 @@ def __init__(self, env: gym.Env, weight: np.ndarray = None):
env: env to wrap
weight: weight vector to use in the dot product
"""
super().__init__(env)
EzPickle.__init__(self, env, weight)
gym.utils.RecordConstructorArgs.__init__(self, weight=weight)
gym.Wrapper.__init__(self, env)
if weight is None:
weight = np.ones(shape=env.reward_space.shape)
self.set_weight(weight)
Expand Down Expand Up @@ -70,7 +69,7 @@ def step(self, action: ActType) -> Tuple[ObsType, float, bool, bool, dict]:
return observation, scalar_reward, terminated, truncated, info


class MONormalizeReward(gym.Wrapper, EzPickle):
class MONormalizeReward(gym.Wrapper, gym.utils.RecordConstructorArgs):
"""Wrapper to normalize the reward component at index idx. Does not touch other reward components."""

def __init__(self, env: gym.Env, idx: int, gamma: float = 0.99, epsilon: float = 1e-8):
Expand All @@ -82,8 +81,8 @@ def __init__(self, env: gym.Env, idx: int, gamma: float = 0.99, epsilon: float =
epsilon (float): A stability parameter
gamma (float): The discount factor that is used in the exponential moving average.
"""
super().__init__(env)
EzPickle.__init__(self, env, idx, gamma, epsilon)
gym.utils.RecordConstructorArgs.__init__(self, idx=idx, gamma=gamma, epsilon=epsilon)
gym.Wrapper.__init__(self, env)
self.idx = idx
self.num_envs = getattr(env, "num_envs", 1)
self.is_vector_env = getattr(env, "is_vector_env", False)
Expand Down Expand Up @@ -125,7 +124,7 @@ def normalize(self, rews):
return rews / np.sqrt(self.return_rms.var + self.epsilon)


class MOClipReward(gym.RewardWrapper, EzPickle):
class MOClipReward(gym.RewardWrapper, gym.utils.RecordConstructorArgs):
"""Clip reward[idx] to [min, max]."""

def __init__(self, env: gym.Env, idx: int, min_r, max_r):
Expand All @@ -137,8 +136,8 @@ def __init__(self, env: gym.Env, idx: int, min_r, max_r):
min_r: min reward
max_r: max reward
"""
super().__init__(env)
EzPickle.__init__(self, env, idx, min_r, max_r)
gym.utils.RecordConstructorArgs.__init__(self, idx=idx, min_r=min_r, max_r=max_r)
gym.RewardWrapper.__init__(self, env)
self.idx = idx
self.min_r = min_r
self.max_r = max_r
Expand All @@ -154,7 +153,7 @@ def reward(self, reward):
return reward


class MOSyncVectorEnv(SyncVectorEnv, EzPickle):
class MOSyncVectorEnv(SyncVectorEnv):
"""Vectorized environment that serially runs multiple environments."""

def __init__(
Expand All @@ -168,8 +167,7 @@ def __init__(
env_fns: env constructors
copy: If ``True``, then the :meth:`reset` and :meth:`step` methods return a copy of the observations.
"""
super().__init__(env_fns, copy=copy)
EzPickle.__init__(self, env_fns, copy=copy)
SyncVectorEnv.__init__(self, env_fns, copy=copy)
# Just overrides the rewards memory to add the number of objectives
self.reward_space = self.envs[0].reward_space
self._rewards = np.zeros(
Expand All @@ -181,7 +179,7 @@ def __init__(
)


class MORecordEpisodeStatistics(RecordEpisodeStatistics, EzPickle):
class MORecordEpisodeStatistics(RecordEpisodeStatistics, gym.utils.RecordConstructorArgs):
"""This wrapper will keep track of cumulative rewards and episode lengths.

After the completion of an episode, ``info`` will look like this::
Expand Down Expand Up @@ -220,8 +218,8 @@ def __init__(self, env: gym.Env, gamma: float = 1.0, deque_size: int = 100):
gamma (float): Discounting factor
deque_size: The size of the buffers :attr:`return_queue` and :attr:`length_queue`
"""
super().__init__(env, deque_size)
EzPickle.__init__(self, env, gamma, deque_size)
gym.utils.RecordConstructorArgs.__init__(self, gamma=gamma, deque_size=deque_size)
RecordEpisodeStatistics.__init__(self, env, deque_size=deque_size)
# CHANGE: Here we just override the standard implementation to extend to MO
# We also take care of the case where the env is vectorized
self.reward_dim = self.env.reward_space.shape[0]
Expand Down
Loading
Loading