-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_smac_mf.py
214 lines (171 loc) · 8.15 KB
/
run_smac_mf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
import logging
import math
import pickle
import argparse
from training_base import get_pretuned_hyperparameters
from training_base import create_model
logging.basicConfig(level=logging.INFO)
import json
import os
import gym
import numpy as np
from ConfigSpace.hyperparameters import (UniformFloatHyperparameter)
from smac.configspace import ConfigurationSpace
from smac.facade.smac_mf_facade import SMAC4MF
from smac.scenario.scenario import Scenario
from stable_baselines3 import A2C
from stable_baselines3 import DQN
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
__copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
__license__ = "3-clause BSD"
MAX_ITERATIONS_PER_LOOP = 10
MAX_TOTAL_ITERATIONS = 100
# we will cut of the # of configurations after MAX_TOTAL_ITERATIONS is reached
NUMBER_OF_CONFIGURATIONS = 30
INITIAL_BUDGET = 2
ETA = 2
algorithms = {
"A2C": A2C,
"DQN": DQN,
"PPO": PPO
}
environments = ["Acrobot-v1", "CartPole-v1", "MountainCar-v0"]
parser = argparse.ArgumentParser("python smac_mf.py")
parser.add_argument("algorithm", help="The search algorithm as string (A2C|DQN|PPO)", type=str)
parser.add_argument("environment", help="The gym environment as string", type=str)
parser.add_argument("seed", help="The random seed", type=int)
args = parser.parse_args()
if args.environment not in environments or args.algorithm not in list(algorithms.keys()):
print(f"passed {args.algorithm} as algorithm and {args.environment} as environment")
print(f"allowed algorithms: {list(algorithms.keys())}")
print(f"allowed environments: {environments}")
raise ValueError("Did not pass the correct environment or algorithm as an argument.")
algorithmString = args.algorithm
algorithmClass = algorithms[algorithmString]
environment = args.environment
seed = args.seed
def create_configspace_for_algorithm(algorithm: str) -> ConfigurationSpace:
cs = ConfigurationSpace()
params = [
UniformFloatHyperparameter(
"learning_rate", math.pow(10, -6), math.pow(10, -2), default_value=0.001, log=True
),
UniformFloatHyperparameter(
"gamma", 0.8, 1.0, default_value=0.99, log=False
)
]
if algorithm == 'DQN':
params.append(UniformFloatHyperparameter(
"epsilon", 0.05, 0.3, default_value=0.2, log=False
))
elif algorithm =='PPO':
params.append(UniformFloatHyperparameter(
"clip", 0.05, 0.3, default_value=0.2, log=False
))
# Add all hyperparameters at once:
cs.add_hyperparameters(params)
return cs
# Because we use parameter noise, we should use a MlpPolicy with layer normalization
def run_algorithm(config: dict, environment: str = environment, policy: str = 'MlpPolicy'
, budget: int = 1):
hyperparams = dict(
lr = config["learning_rate"],
gamma = config["gamma"],
clip = config["clip"],
epsilon = config["epsilon"])
# Create log dir if it doesn't exist
log_dir = os.path.dirname(os.path.realpath(__file__))+'/../tmp/seed_%s/%s-smac_%s/' % (seed, str.lower(algorithmString), environment)
os.makedirs(log_dir, exist_ok=True)
hyperparamsAsString = "_".join(f'{k}_{v}' for k, v in hyperparams.items() if v is not None)
checkpoint_dir = os.path.join(log_dir, "checkpoint_"+hyperparamsAsString)
evaluation_dir = os.path.join(log_dir, "%s-smac_%s_seed%s_eval.json" % (str.lower(algorithmString), environment, seed))
model_evaluation_dir = os.path.join(checkpoint_dir, "%s-smac_%s_model-valuation.json" % (str.lower(algorithmString), hyperparamsAsString))
model_dir = os.path.join(checkpoint_dir, "checkpoint_"+hyperparamsAsString)
if os.path.exists(checkpoint_dir):
# Create and wrap the environment
env = gym.make(environment)
env = Monitor(env, checkpoint_dir)
# TODO: Check if this is working as expected
model = algorithmClass.load(path=model_dir, env=env)
with open(model_evaluation_dir, 'r') as f:
data = json.load(f)
rewards = data["rewards"]
std_rewards = data["std_rewards"]
else:
os.makedirs(checkpoint_dir, exist_ok=True)
# Create and wrap the environment
env = gym.make(environment)
env = Monitor(env, checkpoint_dir)
optimal_env_params = get_pretuned_hyperparameters(algorithm=algorithmString, environment=environment)
model = create_model(policy, env, hyperparams["lr"], hyperparams["gamma"], hyperparams["clip"], hyperparams["epsilon"], optimal_env_params, algorithmString, seed)
rewards = []
std_rewards = []
# Train the agent
timesteps = budget - len(rewards)
if len(rewards) < budget:
for i in range(int(timesteps)):
print (f"training loop {i} of {timesteps}")
total_iteration_count = 0
total_iteration_count_path = os.path.join(log_dir, "total_iteration_count.json")
if os.path.exists(total_iteration_count_path):
with open (total_iteration_count_path, "rb") as f:
total_iteration_count = pickle.load(f)
print("total iteration count: ", total_iteration_count)
if total_iteration_count >= MAX_TOTAL_ITERATIONS:
print("reached total iteration limit of "+str(total_iteration_count)+" !")
break
model.learn(total_timesteps=int(1e4), callback=None)
# Returns average and standard deviation of the return from the evaluation
r, std_r = evaluate_policy(model=model, env=env)
rewards.append(r)
std_rewards.append(std_r)
with open(total_iteration_count_path, 'wb') as f:
pickle.dump(total_iteration_count+1, f)
if len(rewards) > 0:
data = {"gamma": hyperparams["gamma"], "learning_rate": hyperparams["lr"], "clip": hyperparams["clip"], "epsilon": hyperparams["epsilon"],
"rewards": rewards, "std_rewards": std_rewards}
with open(model_evaluation_dir, 'w+') as f:
json.dump(data, f)
with open(evaluation_dir, 'a+') as f:
json.dump(data, f)
f.write("\n")
# Save state to checkpoint file.
# No need to save optimizer for SGD.
model.save(path=model_dir)
return -1 * np.amax(rewards)
if __name__ == "__main__":
# Build Configuration Space which defines all parameters and their ranges.
# To illustrate different parameter types,
# we use continuous, integer and categorical parameters.
cs = create_configspace_for_algorithm(algorithmString)
# SMAC scenario object
scenario = Scenario(
{
"run_obj": "quality", # we optimize quality (alternative to runtime)
"runcount-limit": NUMBER_OF_CONFIGURATIONS, # number of configurations
"cs": cs, # configuration space
"deterministic": True,
# Uses pynisher to limit memory and runtime
# Alternatively, you can also disable this.
# Then you should handle runtime and memory yourself in the TA
"limit_resources": False,
# "cutoff": 30, # runtime limit for target algorithm
# "memory_limit": 3072, # adapt this to reasonable value for your hardware
}
)
# Max budget for hyperband can be anything. Here, we set it to maximum no. of epochs to train the MLP for
intensifier_kwargs = {"initial_budget": INITIAL_BUDGET, "max_budget": MAX_ITERATIONS_PER_LOOP, "eta": ETA}
# To optimize, we pass the function to the SMAC-object
smac = SMAC4MF(
scenario=scenario,
rng=np.random.RandomState(seed),
tae_runner=run_algorithm,
intensifier_kwargs=intensifier_kwargs,
)
tae = smac.get_tae_runner()
try:
incumbent = smac.optimize()
finally:
incumbent = smac.solver.incumbent