-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathtd3.py
128 lines (105 loc) · 3.5 KB
/
td3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# -*- coding: utf-8 -*-
"""Run module for TD3 on LunarLanderContinuous-v2.
- Author: whikwon
- Contact: [email protected]
"""
import torch
import torch.optim as optim
from algorithms.common.networks.mlp import MLP
from algorithms.common.noise import GaussianNoise
from algorithms.td3.agent import Agent
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# hyper parameters
hyper_params = {
"GAMMA": 0.99,
"TAU": 5e-3,
"BUFFER_SIZE": int(1e6),
"BATCH_SIZE": 100,
"LR_ACTOR": 1e-3,
"LR_CRITIC": 1e-3,
"WEIGHT_DECAY": 0.0,
"EXPLORATION_NOISE": 0.1,
"TARGET_POLICY_NOISE": 0.2,
"TARGET_POLICY_NOISE_CLIP": 0.5,
"POLICY_UPDATE_FREQ": 2,
"INITIAL_RANDOM_ACTIONS": 1e4,
"NETWORK": {"ACTOR_HIDDEN_SIZES": [400, 300], "CRITIC_HIDDEN_SIZES": [400, 300]},
}
def get(env, args):
"""Run training or test.
Args:
env (gym.Env): openAI Gym environment with continuous action space
args (argparse.Namespace): arguments including training settings
"""
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
hidden_sizes_actor = hyper_params["NETWORK"]["ACTOR_HIDDEN_SIZES"]
hidden_sizes_critic = hyper_params["NETWORK"]["CRITIC_HIDDEN_SIZES"]
# create actor
actor = MLP(
input_size=state_dim,
output_size=action_dim,
hidden_sizes=hidden_sizes_actor,
output_activation=torch.tanh,
).to(device)
actor_target = MLP(
input_size=state_dim,
output_size=action_dim,
hidden_sizes=hidden_sizes_actor,
output_activation=torch.tanh,
).to(device)
actor_target.load_state_dict(actor.state_dict())
# create critic1
critic1 = MLP(
input_size=state_dim + action_dim,
output_size=1,
hidden_sizes=hidden_sizes_critic,
).to(device)
critic1_target = MLP(
input_size=state_dim + action_dim,
output_size=1,
hidden_sizes=hidden_sizes_critic,
).to(device)
critic1_target.load_state_dict(critic1.state_dict())
# create critic2
critic2 = MLP(
input_size=state_dim + action_dim,
output_size=1,
hidden_sizes=hidden_sizes_critic,
).to(device)
critic2_target = MLP(
input_size=state_dim + action_dim,
output_size=1,
hidden_sizes=hidden_sizes_critic,
).to(device)
critic2_target.load_state_dict(critic2.state_dict())
# concat critic parameters to use one optim
critic_parameters = list(critic1.parameters()) + list(critic2.parameters())
# create optimizer
actor_optim = optim.Adam(
actor.parameters(),
lr=hyper_params["LR_ACTOR"],
weight_decay=hyper_params["WEIGHT_DECAY"],
)
critic_optim = optim.Adam(
critic_parameters,
lr=hyper_params["LR_CRITIC"],
weight_decay=hyper_params["WEIGHT_DECAY"],
)
# noise
exploration_noise = GaussianNoise(
action_dim,
min_sigma=hyper_params["EXPLORATION_NOISE"],
max_sigma=hyper_params["EXPLORATION_NOISE"],
)
target_policy_noise = GaussianNoise(
action_dim,
min_sigma=hyper_params["TARGET_POLICY_NOISE"],
max_sigma=hyper_params["TARGET_POLICY_NOISE"],
)
# make tuples to create an agent
models = (actor, actor_target, critic1, critic1_target, critic2, critic2_target)
optims = (actor_optim, critic_optim)
noises = (exploration_noise, target_policy_noise)
# create an agent
return Agent(env, args, hyper_params, models, optims, noises)