Farama-Foundation · LucasAlegre · Jul 24, 2023 · Jul 20, 2023 · Jul 20, 2023 · Jul 22, 2023
diff --git a/.gitignore b/.gitignore
@@ -127,3 +127,9 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# vscode
+.vscode/
+
+# videos
+videos/
diff --git a/mo_gymnasium/envs/reacher/reacher.py b/mo_gymnasium/envs/reacher/reacher.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 from gymnasium import spaces
-from gymnasium.utils import EzPickle
+from gymnasium.utils import EzPickle, seeding
 from pybulletgym.envs.roboschool.envs.env_bases import BaseBulletEnv
 from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot
 from pybulletgym.envs.roboschool.scenes.scene_bases import SingleRobotEmptyScene
@@ -87,6 +87,8 @@ def camera_adjust(self):
 
     def reset(self, seed=None, **kwargs):
         self._seed(seed)
+        if seed is not None:
+            self._np_random, seed = seeding.np_random(seed)
         obs = super().reset()
         if self.render_mode == "human":
             self._render(mode="human")

diff --git a/mo_gymnasium/utils.py b/mo_gymnasium/utils.py
@@ -6,10 +6,9 @@
 
 import gymnasium as gym
 import numpy as np
-from gymnasium.utils import EzPickle
 from gymnasium.vector import SyncVectorEnv
-from gymnasium.wrappers import RecordEpisodeStatistics
 from gymnasium.wrappers.normalize import RunningMeanStd
+from gymnasium.wrappers.record_episode_statistics import RecordEpisodeStatistics
 
 
 ObsType = TypeVar("ObsType")
@@ -29,7 +28,7 @@ def make(env_name: str, disable_env_checker: bool = True, **kwargs) -> gym.Env:
     return gym.make(env_name, disable_env_checker=disable_env_checker, **kwargs)
 
 
-class LinearReward(gym.Wrapper, EzPickle):
+class LinearReward(gym.Wrapper, gym.utils.RecordConstructorArgs):
     """Makes the env return a scalar reward, which is the dot-product between the reward vector and the weight vector."""
 
     def __init__(self, env: gym.Env, weight: np.ndarray = None):
@@ -39,8 +38,8 @@ def __init__(self, env: gym.Env, weight: np.ndarray = None):
             env: env to wrap
             weight: weight vector to use in the dot product
         """
-        super().__init__(env)
-        EzPickle.__init__(self, env, weight)
+        gym.utils.RecordConstructorArgs.__init__(self, weight=weight)
+        gym.Wrapper.__init__(self, env)
         if weight is None:
             weight = np.ones(shape=env.reward_space.shape)
         self.set_weight(weight)
@@ -70,7 +69,7 @@ def step(self, action: ActType) -> Tuple[ObsType, float, bool, bool, dict]:
         return observation, scalar_reward, terminated, truncated, info
 
 
-class MONormalizeReward(gym.Wrapper, EzPickle):
+class MONormalizeReward(gym.Wrapper, gym.utils.RecordConstructorArgs):
     """Wrapper to normalize the reward component at index idx. Does not touch other reward components."""
 
     def __init__(self, env: gym.Env, idx: int, gamma: float = 0.99, epsilon: float = 1e-8):
@@ -82,8 +81,8 @@ def __init__(self, env: gym.Env, idx: int, gamma: float = 0.99, epsilon: float =
             epsilon (float): A stability parameter
             gamma (float): The discount factor that is used in the exponential moving average.
         """
-        super().__init__(env)
-        EzPickle.__init__(self, env, idx, gamma, epsilon)
+        gym.utils.RecordConstructorArgs.__init__(self, idx=idx, gamma=gamma, epsilon=epsilon)
+        gym.Wrapper.__init__(self, env)
         self.idx = idx
         self.num_envs = getattr(env, "num_envs", 1)
         self.is_vector_env = getattr(env, "is_vector_env", False)
@@ -125,7 +124,7 @@ def normalize(self, rews):
         return rews / np.sqrt(self.return_rms.var + self.epsilon)
 
 
-class MOClipReward(gym.RewardWrapper, EzPickle):
+class MOClipReward(gym.RewardWrapper, gym.utils.RecordConstructorArgs):
     """Clip reward[idx] to [min, max]."""
 
     def __init__(self, env: gym.Env, idx: int, min_r, max_r):
@@ -137,8 +136,8 @@ def __init__(self, env: gym.Env, idx: int, min_r, max_r):
             min_r: min reward
             max_r: max reward
         """
-        super().__init__(env)
-        EzPickle.__init__(self, env, idx, min_r, max_r)
+        gym.utils.RecordConstructorArgs.__init__(self, idx=idx, min_r=min_r, max_r=max_r)
+        gym.RewardWrapper.__init__(self, env)
         self.idx = idx
         self.min_r = min_r
         self.max_r = max_r
@@ -154,7 +153,7 @@ def reward(self, reward):
         return reward
 
 
-class MOSyncVectorEnv(SyncVectorEnv, EzPickle):
+class MOSyncVectorEnv(SyncVectorEnv):
     """Vectorized environment that serially runs multiple environments."""
 
     def __init__(
@@ -168,8 +167,7 @@ def __init__(
             env_fns: env constructors
             copy: If ``True``, then the :meth:`reset` and :meth:`step` methods return a copy of the observations.
         """
-        super().__init__(env_fns, copy=copy)
-        EzPickle.__init__(self, env_fns, copy=copy)
+        SyncVectorEnv.__init__(self, env_fns, copy=copy)
         # Just overrides the rewards memory to add the number of objectives
         self.reward_space = self.envs[0].reward_space
         self._rewards = np.zeros(
@@ -181,7 +179,7 @@ def __init__(
         )
 
 
-class MORecordEpisodeStatistics(RecordEpisodeStatistics, EzPickle):
+class MORecordEpisodeStatistics(RecordEpisodeStatistics, gym.utils.RecordConstructorArgs):
     """This wrapper will keep track of cumulative rewards and episode lengths.
 
     After the completion of an episode, ``info`` will look like this::
@@ -220,8 +218,8 @@ def __init__(self, env: gym.Env, gamma: float = 1.0, deque_size: int = 100):
             gamma (float): Discounting factor
             deque_size: The size of the buffers :attr:`return_queue` and :attr:`length_queue`
         """
-        super().__init__(env, deque_size)
-        EzPickle.__init__(self, env, gamma, deque_size)
+        gym.utils.RecordConstructorArgs.__init__(self, gamma=gamma, deque_size=deque_size)
+        RecordEpisodeStatistics.__init__(self, env, deque_size=deque_size)
         # CHANGE: Here we just override the standard implementation to extend to MO
         # We also take care of the case where the env is vectorized
         self.reward_dim = self.env.reward_space.shape[0]