-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplay.py
104 lines (90 loc) · 3.43 KB
/
play.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python3
import numpy as np
import gym
from gym_brt.envs import (
QubeSwingupEnv,
QubeSwingupSparseEnv,
QubeSwingupFollowEnv,
QubeSwingupFollowSparseEnv,
QubeBalanceEnv,
QubeBalanceSparseEnv,
QubeBalanceFollowEnv,
QubeBalanceFollowSparseEnv,
QubeDampenEnv,
QubeDampenSparseEnv,
QubeDampenFollowEnv,
QubeDampenFollowSparseEnv,
QubeRotorEnv,
QubeRotorFollowEnv,
QubeBalanceFollowSineWaveEnv,
QubeSwingupFollowSineWaveEnv,
QubeRotorFollowSineWaveEnv,
QubeDampenFollowSineWaveEnv,
)
from stable_baselines.common.vec_env.vec_normalize import VecNormalize
from stable_baselines.common.vec_env.dummy_vec_env import DummyVecEnv
from stable_baselines.common.cmd_util import arg_parser
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common import set_global_seeds
from stable_baselines import bench, logger
from stable_baselines.ppo2 import PPO2
def main():
envs = {
"QubeSwingupEnv": QubeSwingupEnv,
"QubeSwingupSparseEnv": QubeSwingupSparseEnv,
"QubeSwingupFollowEnv": QubeSwingupFollowEnv,
"QubeSwingupFollowSparseEnv": QubeSwingupFollowSparseEnv,
"QubeBalanceEnv": QubeBalanceEnv,
"QubeBalanceSparseEnv": QubeBalanceSparseEnv,
"QubeBalanceFollowEnv": QubeBalanceFollowEnv,
"QubeBalanceFollowSparseEnv": QubeBalanceFollowSparseEnv,
"QubeDampenEnv": QubeDampenEnv,
"QubeDampenSparseEnv": QubeDampenSparseEnv,
"QubeDampenFollowEnv": QubeDampenFollowEnv,
"QubeDampenFollowSparseEnv": QubeDampenFollowSparseEnv,
"QubeRotorEnv": QubeRotorEnv,
"QubeRotorFollowEnv": QubeRotorFollowEnv,
"QubeBalanceFollowSineWaveEnv": QubeBalanceFollowSineWaveEnv,
"QubeSwingupFollowSineWaveEnv": QubeSwingupFollowSineWaveEnv,
"QubeRotorFollowSineWaveEnv": QubeRotorFollowSineWaveEnv,
"QubeDampenFollowSineWaveEnv": QubeDampenFollowSineWaveEnv,
}
# Parse command line args
parser = arg_parser()
parser.add_argument("-e", "--env", choices=list(envs.keys()), required=True)
parser.add_argument("-hw", "--use-hardware", action="store_true")
parser.add_argument("-l", "--load", type=str, default=None)
args = parser.parse_args()
env = args.env
if args.env is None:
# If env isn't given try to find the env name in the filename
dirs_from_filename = args.load.split("/")
for d in dirs_from_filename:
if d in envs.keys():
env = d
print("'env' argument is not given. Assuming env is '{}'".format(d))
if env is None:
raise ValueError("the following arguments are required: -e/--env")
def make_env():
env_out = envs[env](use_simulator=not args.use_hardware, frequency=250)
return env_out
try:
env = DummyVecEnv([make_env])
policy = MlpPolicy
model = PPO2(policy=policy, env=env)
model.load_parameters(args.load)
print("Running trained model")
obs = np.zeros((env.num_envs,) + env.observation_space.shape)
obs[:] = env.reset()
while True:
actions = model.step(obs)[0]
obs[:], reward, done, _ = env.step(actions)
if not args.use_hardware:
env.render()
if done:
print("done")
obs[:] = env.reset()
finally:
env.close()
if __name__ == "__main__":
main()