- Follow the instructions in
ppo-dash-training\README.md
to set up a conda envrionment and install the correct Obsticle Tower Envrionment for you platform
cd ppo-dash-training
cd pytorch-a2c-ppo-acktr-gail
python run.py --num-env-steps=50000000 --env=ObtRetro-v7 --num-processes=32 --num-mini-batch=8 --exp_name=cur047
python run.py --num-env-steps=50000000 --env=ObtRetro-v7 --num-processes=32 --num-mini-batch=8 --exp_name=cur047 --load --ppo-epoch=1 --lr=1-e4
- Using the folder
ppo-dash-validation
- this contains the trained model used for the submission for the paper. - Follow the obstacle-tower-challenge instructions regarding running the docker image. Note: I have tested this on MacBook Pro and it works. As it builds, runs from docker images it should work on any platform.
- You can contact the Unity team [email protected] to ask if this is possible / validate our claim that we are 2nd place.
- From the folder
ppo-dash-training
- Follow the instructions in readme to set up a conda envrionment and install the correct Obsticle Tower Envrionment for you platform
cd 001_baseline
python run.py --num-processes=50 --exp_name=000_baseline-01 python run.py --num-processes=50 --exp_name=000_baseline-02 python run.py --num-processes=50 --exp_name=000_baseline-03
cd 002_reduce_action_space
python run.py --num-processes=50 --exp_name=002_reduce_action_space-02
python run.py --num-processes=50 --exp_name=002_reduce_action_space-03
python run.py --num-processes=50 --exp_name=002_reduce_action_space-04
cd 003_recurrent
python run.py --num-processes=50 --exp_name=003_recurrent-01
python run.py --num-processes=50 --exp_name=003_recurrent-02
python run.py --num-processes=50 --exp_name=003_recurrent-03
cd 005_large_scale_hyperparms
python run.py --num-mini-batch=4 --exp_name=005_large_scale_hyperparms-01
python run.py --num-mini-batch=4 --exp_name=005_large_scale_hyperparms-02
python run.py --num-mini-batch=4 --exp_name=005_large_scale_hyperparms-03
cd 006_reduced_frame_stack
python run.py --num-processes=50 --exp_name=006_reduced_frame_stack-01
python run.py --num-processes=50 --exp_name=006_reduced_frame_stack-02
python run.py --num-processes=50 --exp_name=006_reduced_frame_stack-03
cd 007_reduced_action_space_and_frame_stack
python run.py --num-processes=50 --exp_name=007_reduced_action_space_and_frame_stack-01
python run.py --num-processes=50 --exp_name=007_reduced_action_space_and_frame_stack-02
python run.py --num-processes=50 --exp_name=007_reduced_action_space_and_frame_stack-03
cd 008_ra+rf+lshp
python run.py --exp_name=008_ra+rf+lshp-01
python run.py --exp_name=008_ra+rf+lshp-02
python run.py --exp_name=008_ra+rf+lshp-03
cd 009_ra+rf+lshp+recurrent
python run.py --exp_name=009_ra+rf+lshp+recurrent-01
python run.py --exp_name=009_ra+rf+lshp+recurrent-02
python run.py --exp_name=009_ra+rf+lshp+recurrent-03
cd 010_ra+rf+lshp+recurrent+vec_obs
python run.py --exp_name=010_ra+rf+lshp+recurrent+vec_obs-01
python run.py --exp_name=010_ra+rf+lshp+recurrent+vec_obs-02
python run.py --exp_name=010_ra+rf+lshp+recurrent+vec_obs-03
cd 011_ra+rf+lshp+recurrent+vec_obs+norm_obs
python run.py --exp_name=011_ra+rf+lshp+recurrent+vec_obs+norm_obs-01
python run.py --exp_name=011_ra+rf+lshp+recurrent+vec_obs+norm_obs-02
python run.py --exp_name=011_ra+rf+lshp+recurrent+vec_obs+norm_obs-03
cd 012_ra+no_stack+lshp+recurrent+vec_obs+norm_obs
python run.py --exp_name=012_ra+no_stack+lshp+recurrent+vec_obs+norm_obs-01
python run.py --exp_name=012_ra+no_stack+lshp+recurrent+vec_obs+norm_obs-02
python run.py --exp_name=012_ra+no_stack+lshp+recurrent+vec_obs+norm_obs-03
cd 013_ra+no_stack+lshp+recurrent+vec_obs+norm_obs+rew_hacking
--exp_name=013_ra+no_stack+lshp+recurrent+vec_obs+norm_obs+rew_hacking-01
--exp_name=013_ra+no_stack+lshp+recurrent+vec_obs+norm_obs+rew_hacking-02
--exp_name=013_ra+no_stack+lshp+recurrent+vec_obs+norm_obs+rew_hacking-03