-
Notifications
You must be signed in to change notification settings - Fork 38
/
config.yaml
159 lines (147 loc) · 3.03 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
wandb:
project: nmmo-baselines
entity: kywch
group: ~
debug:
train:
num_envs: 1
envs_per_batch: 1 # batching envs work?
envs_per_worker: 1
batch_size: 1024
total_timesteps: 10000
pool_kernel: [0, 1]
checkpoint_interval: 3
verbose: True
train:
seed: 1
torch_deterministic: True
device: cuda
total_timesteps: 10_000_000
learning_rate: 1.5e-4
anneal_lr: True
gamma: 0.99
gae_lambda: 0.95
update_epochs: 3
norm_adv: True
clip_coef: 0.1
clip_vloss: True
ent_coef: 0.01
vf_coef: 0.5
max_grad_norm: 0.5
target_kl: ~
num_envs: 15
envs_per_worker: 1
envs_per_batch: 6
env_pool: True
verbose: True
data_dir: runs
checkpoint_interval: 200
pool_kernel: [0]
batch_size: 32768
batch_rows: 128
bptt_horizon: 8
vf_clip_coef: 0.1
compile: False
compile_mode: reduce-overhead
sweep:
method: random
name: sweep
metric:
goal: maximize
name: episodic_return
# Nested parameters name required by WandB API
parameters:
train:
parameters:
learning_rate: {
'distribution': 'log_uniform_values',
'min': 1e-4,
'max': 1e-1,
}
batch_size: {
'values': [128, 256, 512, 1024, 2048],
}
batch_rows: {
'values': [16, 32, 64, 128, 256],
}
bptt_horizon: {
'values': [4, 8, 16, 32],
}
env:
num_agents: 128
num_npcs: 256
max_episode_length: 1024
maps_path: 'maps/train/'
map_size: 128
num_maps: 256
map_force_generation: False
death_fog_tick: ~
task_size: 2048
spawn_immunity: 20
resilient_population: 0.2
policy:
input_size: 256
hidden_size: 256
task_size: 2048 # must match env task_size
recurrent:
input_size: 256
hidden_size: 256
num_layers: 1
reward_wrapper:
eval_mode: False
early_stop_agent_num: 8
use_custom_reward: True
neurips23_start_kit:
reward_wrapper:
heal_bonus_weight: 0.03
explore_bonus_weight: 0.01
yaofeng:
env:
maps_path: 'maps/train_yaofeng/'
num_maps: 1024
resilient_population: 0
train:
update_epochs: 2
learning_rate: 1.0e-4
recurrent:
num_layers: 2
reward_wrapper:
hp_bonus_weight: 0.03
exp_bonus_weight: 0.002
defense_bonus_weight: 0.04
attack_bonus_weight: 0.0
gold_bonus_weight: 0.001
custom_bonus_scale: 0.1
disable_give: True
donot_attack_dangerous_npc: True
takeru:
env:
maps_path: 'maps/train_takeru/'
num_maps: 1280
resilient_population: 0
train:
update_epochs: 1
recurrent:
num_layers: 0
reward_wrapper:
early_stop_agent_num: 0
explore_bonus_weight: 0.01
disable_give: True
hybrid:
env:
maps_path: 'maps/train_yaofeng/'
num_maps: 1024
resilient_population: 0
train:
update_epochs: 1
recurrent:
num_layers: 1
reward_wrapper:
hp_bonus_weight: 0.03
exp_bonus_weight: 0.002
defense_bonus_weight: 0.04
attack_bonus_weight: 0.0
gold_bonus_weight: 0.001
custom_bonus_scale: 0.1
disable_give: True
donot_attack_dangerous_npc: True