You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Describe the bug
in gym\utils\passive_env_checker.py:if not isinstance(terminated, (bool, np.bool8)):,but module 'numpy' has no attribute 'bool8'.
Code example
import gym
import torch
from torch.distributions import Categorical
from torch import nn, optim
import torch.nn.functional as F
def compute_policy_loss(n, log_p):
r = list()
for i in range(n, 0, -1):
r.append(i * 1.0)
r = torch.tensor(r)
r = (r - r.mean()) / r.std()
loss = 0
for pi, ri in zip(log_p, r):
loss += -pi * ri
return loss
class CartPolePolicy(nn.Module):
def init(self):
super(CartPolePolicy, self).init()
self.fc1 = nn.Linear(in_features=4, out_features=128)
self.fc2 = nn.Linear(128, 2)
self.drop = nn.Dropout(p=0.6)
def forward(self, x):
x = self.fc1(x)
x = self.drop(x)
x = F.relu(x)
x = self.fc2(x)
return F.softmax(x, dim=1)
if name == 'main':
env = gym.make("CartPole-v0")
env.reset(seed=543)
torch.manual_seed(543)
policy = CartPolePolicy()
optimizer = optim.Adam(policy.parameters(), lr=0.01)
max_episod = 10000
max_action = 10000
max_steps = 5000
for episod in range(1, max_episod + 1):
state, _ = env.reset()
step = 0
log_p = list()
for step in range(1, max_action + 1):
state = torch.from_numpy(state).float().unsqueeze(0)
probs = policy(state)
m = Categorical(probs)
action = m.sample()
state, _, done, _, _ = env.step(action.item())
if done:
break
log_p.append(m.log_prob(action))
if step > max_steps:
print(f"完成! 最后一回合 {episod} 运行步数 {step}")
break
optimizer.zero_grad()
loss = compute_policy_loss(step, log_p)
loss.backward()
optimizer.step()
if episod % 10 == 0:
print(f"回合数 {episod} 累计运行步数 {step}")
torch.save(policy.state_dict(), f"cartpole_policy.pth")
System Info
Describe the characteristic of your environment:
*pip install gym
*Windows 11
Python 3.12.3
I have checked that there is no similar issue in the repo (required)
The text was updated successfully, but these errors were encountered:
Describe the bug
in gym\utils\passive_env_checker.py:if not isinstance(terminated, (bool, np.bool8)):,but module 'numpy' has no attribute 'bool8'.
Code example
import gym
import torch
from torch.distributions import Categorical
from torch import nn, optim
import torch.nn.functional as F
def compute_policy_loss(n, log_p):
r = list()
for i in range(n, 0, -1):
r.append(i * 1.0)
r = torch.tensor(r)
r = (r - r.mean()) / r.std()
loss = 0
for pi, ri in zip(log_p, r):
loss += -pi * ri
return loss
class CartPolePolicy(nn.Module):
def init(self):
super(CartPolePolicy, self).init()
self.fc1 = nn.Linear(in_features=4, out_features=128)
self.fc2 = nn.Linear(128, 2)
self.drop = nn.Dropout(p=0.6)
if name == 'main':
env = gym.make("CartPole-v0")
env.reset(seed=543)
torch.manual_seed(543)
policy = CartPolePolicy()
optimizer = optim.Adam(policy.parameters(), lr=0.01)
max_episod = 10000
max_action = 10000
max_steps = 5000
for episod in range(1, max_episod + 1):
state, _ = env.reset()
step = 0
log_p = list()
for step in range(1, max_action + 1):
state = torch.from_numpy(state).float().unsqueeze(0)
probs = policy(state)
m = Categorical(probs)
action = m.sample()
state, _, done, _, _ = env.step(action.item())
if done:
break
log_p.append(m.log_prob(action))
if step > max_steps:
print(f"完成! 最后一回合 {episod} 运行步数 {step}")
break
optimizer.zero_grad()
loss = compute_policy_loss(step, log_p)
loss.backward()
optimizer.step()
if episod % 10 == 0:
print(f"回合数 {episod} 累计运行步数 {step}")
torch.save(policy.state_dict(), f"cartpole_policy.pth")
System Info
Describe the characteristic of your environment:
*pip install gym
*Windows 11
The text was updated successfully, but these errors were encountered: