Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

适配新版gym=0.25.2和pettingzoo=1.24.1 #7

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.idea/
/results/
/.venv/
.vscode/
__pycache__
6 changes: 3 additions & 3 deletions Agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
class Agent:
"""Agent that can interact with environment from pettingzoo"""

def __init__(self, obs_dim, act_dim, global_obs_dim, actor_lr, critic_lr):
self.actor = MLPNetwork(obs_dim, act_dim)
def __init__(self, obs_dim, act_dim, global_obs_dim, actor_lr, critic_lr, device):
self.actor = MLPNetwork(obs_dim, act_dim).to(device)

# critic input all the observations and actions
# if there are 3 agents for example, the input for critic is (obs1, obs2, obs3, act1, act2, act3)
self.critic = MLPNetwork(global_obs_dim, 1)
self.critic = MLPNetwork(global_obs_dim, 1).to(device)
self.actor_optimizer = Adam(self.actor.parameters(), lr=actor_lr)
self.critic_optimizer = Adam(self.critic.parameters(), lr=critic_lr)
self.target_actor = deepcopy(self.actor)
Expand Down
7 changes: 4 additions & 3 deletions MADDPG.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def setup_logger(filename):

class MADDPG:
"""A MADDPG(Multi Agent Deep Deterministic Policy Gradient) agent"""
device = 'cuda'

def __init__(self, dim_info, capacity, batch_size, actor_lr, critic_lr, res_dir):
# sum all the dims of each agent to get input dim for critic
Expand All @@ -35,8 +36,8 @@ def __init__(self, dim_info, capacity, batch_size, actor_lr, critic_lr, res_dir)
self.agents = {}
self.buffers = {}
for agent_id, (obs_dim, act_dim) in dim_info.items():
self.agents[agent_id] = Agent(obs_dim, act_dim, global_obs_act_dim, actor_lr, critic_lr)
self.buffers[agent_id] = Buffer(capacity, obs_dim, act_dim, 'cpu')
self.agents[agent_id] = Agent(obs_dim, act_dim, global_obs_act_dim, actor_lr, critic_lr, self.device)
self.buffers[agent_id] = Buffer(capacity, obs_dim, act_dim, self.device)
self.dim_info = dim_info

self.batch_size = batch_size
Expand Down Expand Up @@ -81,7 +82,7 @@ def sample(self, batch_size):
def select_action(self, obs):
actions = {}
for agent, o in obs.items():
o = torch.from_numpy(o).unsqueeze(0).float()
o = torch.from_numpy(o).unsqueeze(0).float().to(self.device)
a = self.agents[agent].action(o) # torch.Size([1, action_size])
# NOTE that the output is a tensor, convert it to int before input to the environment
actions[agent] = a.squeeze(0).argmax().item()
Expand Down
15 changes: 8 additions & 7 deletions evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('env_name', type=str, default='simple_adversary_v2', help='name of the env',
choices=['simple_adversary_v2', 'simple_spread_v2', 'simple_tag_v2'])
parser.add_argument('env_name', type=str, default='simple_spread_v3', help='name of the env',
choices=['simple_adversary_v3', 'simple_spread_v3', 'simple_tag_v3'])
parser.add_argument('folder', type=str, help='name of the folder where model is saved')
parser.add_argument('--episode-num', type=int, default=10, help='total episode num during evaluation')
parser.add_argument('--episode-length', type=int, default=50, help='steps per episode')
Expand All @@ -32,19 +32,20 @@
# reward of each episode of each agent
episode_rewards = {agent: np.zeros(args.episode_num) for agent in env.agents}
for episode in range(args.episode_num):
states = env.reset()
states, _ = env.reset()
agent_reward = {agent: 0 for agent in env.agents} # agent reward of the current episode
frame_list = [] # used to save gif
while env.agents: # interact with the env for an episode
actions = maddpg.select_action(states)
next_states, rewards, dones, infos = env.step(actions)
frame_list.append(Image.fromarray(env.render(mode='rgb_array')))
next_states, rewards, terminated, truncated, info = env.step(actions)
dones = terminated or truncated
frame_list.append(Image.fromarray(env.render()))
states = next_states

for agent_id, reward in rewards.items(): # update reward
agent_reward[agent_id] += reward

env.close()
# env.close()
message = f'episode {episode + 1}, '
# episode finishes, record reward
for agent_id, reward in agent_reward.items():
Expand All @@ -54,7 +55,7 @@
# save gif
frame_list[0].save(os.path.join(gif_dir, f'out{gif_num + episode + 1}.gif'),
save_all=True, append_images=frame_list[1:], duration=1, loop=0)

env.close()
# training finishes, plot reward
fig, ax = plt.subplots()
x = range(1, args.episode_num + 1)
Expand Down
25 changes: 13 additions & 12 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@

import matplotlib.pyplot as plt
import numpy as np
from pettingzoo.mpe import simple_adversary_v2, simple_spread_v2, simple_tag_v2
from pettingzoo.mpe import simple_adversary_v3, simple_spread_v3, simple_tag_v3

from MADDPG import MADDPG


def get_env(env_name, ep_len=25):
"""create environment and get observation and action dimension of each agent in this environment"""
new_env = None
if env_name == 'simple_adversary_v2':
new_env = simple_adversary_v2.parallel_env(max_cycles=ep_len)
if env_name == 'simple_spread_v2':
new_env = simple_spread_v2.parallel_env(max_cycles=ep_len)
if env_name == 'simple_tag_v2':
new_env = simple_tag_v2.parallel_env(max_cycles=ep_len)
if env_name == 'simple_adversary_v3':
new_env = simple_adversary_v3.parallel_env(max_cycles=ep_len)
if env_name == 'simple_spread_v3':
new_env = simple_spread_v3.parallel_env(max_cycles=ep_len,render_mode="rgb_array")
if env_name == 'simple_tag_v3':
new_env = simple_tag_v3.parallel_env(max_cycles=ep_len)

new_env.reset()
_dim_info = {}
Expand All @@ -30,8 +30,8 @@ def get_env(env_name, ep_len=25):

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('env_name', type=str, default='simple_adversary_v2', help='name of the env',
choices=['simple_adversary_v2', 'simple_spread_v2', 'simple_tag_v2'])
parser.add_argument('env_name', type=str, default='simple_adversary_v3', help='name of the env',
choices=['simple_adversary_v3', 'simple_spread_v3', 'simple_tag_v3'])
parser.add_argument('--episode_num', type=int, default=30000,
help='total episode num during training procedure')
parser.add_argument('--episode_length', type=int, default=25, help='steps per episode')
Expand Down Expand Up @@ -64,16 +64,17 @@ def get_env(env_name, ep_len=25):
# reward of each episode of each agent
episode_rewards = {agent_id: np.zeros(args.episode_num) for agent_id in env.agents}
for episode in range(args.episode_num):
obs = env.reset()
obs, _ = env.reset()
agent_reward = {agent_id: 0 for agent_id in env.agents} # agent reward of the current episode
while env.agents: # interact with the env for an episode
step += 1
if step < args.random_steps:
action = {agent_id: env.action_space(agent_id).sample() for agent_id in env.agents}
else:
action = maddpg.select_action(obs)

next_obs, reward, done, info = env.step(action)

next_obs, reward, terminated, truncated, info = env.step(action)
done = terminated or truncated
# env.render()
maddpg.add(obs, action, reward, next_obs, done)

Expand Down