How does reset
work?
#119
-
The reset function doesn't actually seem to reset the environment, at least for the "Pong-v5" env. In the following code, I'm using a random policy to step the env. I first step the env def test_envpool_resets_correctly() -> None:
def gather_rewards(n_init_steps: int, reset: bool = True):
env = envpool.make_gym("Pong-v5", num_envs=1, seed=0)
ep_returns: list[float] = []
def policy():
return np.asarray([env.action_space.sample()])
curr_ep_return = 0
for _ in range(n_init_steps):
_, rewards, dones, _ = env.step(policy())
curr_ep_return += rewards.item()
if dones.item():
ep_returns.append(curr_ep_return)
curr_ep_return = 0
old_ep_return = curr_ep_return
if reset:
env.reset()
curr_ep_return = 0
# Copy ep_returns to avoid changing the original list
# old_ep_returns assumes env was not reset
old_ep_returns = [x for x in ep_returns]
for _ in range(3):
while True:
_, rewards, dones, _ = env.step(policy())
curr_ep_return += rewards.item()
old_ep_return += rewards.item()
if dones.item():
old_ep_returns.append(old_ep_return)
ep_returns.append(curr_ep_return)
old_ep_return = 0
curr_ep_return = 0
break
return ep_returns, old_ep_returns
print(gather_rewards(0)) # ([-21.0, -21.0, -19.0], [-21.0, -21.0, -19.0])
print(gather_rewards(500)) # ([-10.0, -21.0, -21.0], [-21.0, -21.0, -21.0])
print(gather_rewards(800)) # ([-2.0, -21.0, -21.0], [-20.0, -21.0, -21.0])
print(gather_rewards(500, False)) # ([-20.0, -20.0, -21.0], [-20.0, -20.0, -21.0])
print(gather_rewards(800, False)) # ([-21.0, -20.0, -20.0, -21.0], [-21.0, -20.0, -20.0, -21.0]) |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment 1 reply
-
See #121. Thanks for reporting this issue! |
Beta Was this translation helpful? Give feedback.
See #121. Thanks for reporting this issue!