From 34a88c995b48ba8ed687d645efa90bcf5cb67a7a Mon Sep 17 00:00:00 2001 From: Ziyi Wu Date: Thu, 6 May 2021 15:48:49 -0700 Subject: [PATCH] Move tests --- tests/garage/experiment/test_torch_resume.py | 248 +++++++++++++++++ tests/garage/torch/algos/test_torch_resume.py | 252 ------------------ 2 files changed, 248 insertions(+), 252 deletions(-) create mode 100644 tests/garage/experiment/test_torch_resume.py delete mode 100644 tests/garage/torch/algos/test_torch_resume.py diff --git a/tests/garage/experiment/test_torch_resume.py b/tests/garage/experiment/test_torch_resume.py new file mode 100644 index 0000000000..c72505d5a4 --- /dev/null +++ b/tests/garage/experiment/test_torch_resume.py @@ -0,0 +1,248 @@ +"""This script creates a test which fails when + saving/resuming a model is unsuccessful.""" + +import tempfile + +import numpy as np +import pytest +import torch +from torch.nn import functional as F + +from garage.envs import GymEnv, normalize +from garage.experiment import deterministic, SnapshotConfig +from garage.replay_buffer import PathBuffer +from garage.sampler import FragmentWorker, LocalSampler +from garage.torch import set_gpu_mode +from garage.torch.algos import SAC +from garage.torch.policies import TanhGaussianMLPPolicy +from garage.torch.q_functions import ContinuousMLPQFunction +from garage.trainer import Trainer + + +@pytest.mark.mujoco +def test_torch_cpu_resume_cpu(): + """Test saving on CPU and resuming on CPU.""" + with tempfile.TemporaryDirectory() as temp_dir: + snapshot_config = SnapshotConfig(snapshot_dir=temp_dir, + snapshot_mode='last', + snapshot_gap=1) + env = normalize( + GymEnv('InvertedDoublePendulum-v2', max_episode_length=100)) + deterministic.set_seed(0) + policy = TanhGaussianMLPPolicy( + env_spec=env.spec, + hidden_sizes=[32, 32], + hidden_nonlinearity=torch.nn.ReLU, + output_nonlinearity=None, + min_std=np.exp(-20.), + max_std=np.exp(2.), + ) + + qf1 = ContinuousMLPQFunction(env_spec=env.spec, + hidden_sizes=[32, 32], + hidden_nonlinearity=F.relu) + + qf2 = ContinuousMLPQFunction(env_spec=env.spec, + hidden_sizes=[32, 32], + hidden_nonlinearity=F.relu) + replay_buffer = PathBuffer(capacity_in_transitions=int(1e6), ) + trainer = Trainer(snapshot_config=snapshot_config) + sampler = LocalSampler(agents=policy, + envs=env, + max_episode_length=env.spec.max_episode_length, + worker_class=FragmentWorker) + sac = SAC(env_spec=env.spec, + policy=policy, + qf1=qf1, + qf2=qf2, + sampler=sampler, + gradient_steps_per_itr=100, + replay_buffer=replay_buffer, + min_buffer_size=1e3, + target_update_tau=5e-3, + discount=0.99, + buffer_batch_size=64, + reward_scale=1., + steps_per_epoch=2) + sac.has_lambda = lambda x: x + 1 + trainer.setup(sac, env) + set_gpu_mode(False) + sac.to() + trainer.setup(algo=sac, env=env) + trainer.train(n_epochs=10, batch_size=100) + trainer = Trainer(snapshot_config) + trainer.restore(temp_dir) + trainer.resume(n_epochs=20) + + +@pytest.mark.gpu +@pytest.mark.mujoco +def test_torch_cpu_resume_gpu(): + """Test saving on CPU and resuming on GPU.""" + with tempfile.TemporaryDirectory() as temp_dir: + snapshot_config = SnapshotConfig(snapshot_dir=temp_dir, + snapshot_mode='last', + snapshot_gap=1) + env = normalize( + GymEnv('InvertedDoublePendulum-v2', max_episode_length=100)) + deterministic.set_seed(0) + policy = TanhGaussianMLPPolicy( + env_spec=env.spec, + hidden_sizes=[32, 32], + hidden_nonlinearity=torch.nn.ReLU, + output_nonlinearity=None, + min_std=np.exp(-20.), + max_std=np.exp(2.), + ) + + qf1 = ContinuousMLPQFunction(env_spec=env.spec, + hidden_sizes=[32, 32], + hidden_nonlinearity=F.relu) + + qf2 = ContinuousMLPQFunction(env_spec=env.spec, + hidden_sizes=[32, 32], + hidden_nonlinearity=F.relu) + replay_buffer = PathBuffer(capacity_in_transitions=int(1e6), ) + trainer = Trainer(snapshot_config=snapshot_config) + sampler = LocalSampler(agents=policy, + envs=env, + max_episode_length=env.spec.max_episode_length, + worker_class=FragmentWorker) + sac = SAC(env_spec=env.spec, + policy=policy, + qf1=qf1, + qf2=qf2, + sampler=sampler, + gradient_steps_per_itr=100, + replay_buffer=replay_buffer, + min_buffer_size=1e3, + target_update_tau=5e-3, + discount=0.99, + buffer_batch_size=64, + reward_scale=1., + steps_per_epoch=2) + sac.has_lambda = lambda x: x + 1 + trainer.setup(sac, env) + set_gpu_mode(False) + sac.to() + trainer.setup(algo=sac, env=env) + trainer.train(n_epochs=10, batch_size=100) + trainer = Trainer(snapshot_config) + set_gpu_mode(True) + trainer.restore(temp_dir) + trainer.resume(n_epochs=20) + + +@pytest.mark.gpu +@pytest.mark.mujoco +def test_torch_gpu_resume_cpu(): + """Test saving on GPU and resuming on CPU.""" + with tempfile.TemporaryDirectory() as temp_dir: + snapshot_config = SnapshotConfig(snapshot_dir=temp_dir, + snapshot_mode='last', + snapshot_gap=1) + env = normalize( + GymEnv('InvertedDoublePendulum-v2', max_episode_length=100)) + deterministic.set_seed(0) + policy = TanhGaussianMLPPolicy( + env_spec=env.spec, + hidden_sizes=[32, 32], + hidden_nonlinearity=torch.nn.ReLU, + output_nonlinearity=None, + min_std=np.exp(-20.), + max_std=np.exp(2.), + ) + + qf1 = ContinuousMLPQFunction(env_spec=env.spec, + hidden_sizes=[32, 32], + hidden_nonlinearity=F.relu) + + qf2 = ContinuousMLPQFunction(env_spec=env.spec, + hidden_sizes=[32, 32], + hidden_nonlinearity=F.relu) + replay_buffer = PathBuffer(capacity_in_transitions=int(1e6), ) + trainer = Trainer(snapshot_config=snapshot_config) + sampler = LocalSampler(agents=policy, + envs=env, + max_episode_length=env.spec.max_episode_length, + worker_class=FragmentWorker) + sac = SAC(env_spec=env.spec, + policy=policy, + qf1=qf1, + qf2=qf2, + sampler=sampler, + gradient_steps_per_itr=100, + replay_buffer=replay_buffer, + min_buffer_size=1e3, + target_update_tau=5e-3, + discount=0.99, + buffer_batch_size=64, + reward_scale=1., + steps_per_epoch=2) + sac.has_lambda = lambda x: x + 1 + trainer.setup(sac, env) + set_gpu_mode(True) + sac.to() + trainer.setup(algo=sac, env=env) + trainer.train(n_epochs=10, batch_size=100) + set_gpu_mode(False) + trainer = Trainer(snapshot_config) + trainer.restore(temp_dir) + trainer.resume(n_epochs=20) + + +@pytest.mark.gpu +@pytest.mark.mujoco +def test_torch_gpu_resume_gpu(): + """Test saving on GPU and resuming on GPU.""" + with tempfile.TemporaryDirectory() as temp_dir: + snapshot_config = SnapshotConfig(snapshot_dir=temp_dir, + snapshot_mode='last', + snapshot_gap=1) + env = normalize( + GymEnv('InvertedDoublePendulum-v2', max_episode_length=100)) + deterministic.set_seed(0) + policy = TanhGaussianMLPPolicy( + env_spec=env.spec, + hidden_sizes=[32, 32], + hidden_nonlinearity=torch.nn.ReLU, + output_nonlinearity=None, + min_std=np.exp(-20.), + max_std=np.exp(2.), + ) + + qf1 = ContinuousMLPQFunction(env_spec=env.spec, + hidden_sizes=[32, 32], + hidden_nonlinearity=F.relu) + + qf2 = ContinuousMLPQFunction(env_spec=env.spec, + hidden_sizes=[32, 32], + hidden_nonlinearity=F.relu) + replay_buffer = PathBuffer(capacity_in_transitions=int(1e6), ) + trainer = Trainer(snapshot_config=snapshot_config) + sampler = LocalSampler(agents=policy, + envs=env, + max_episode_length=env.spec.max_episode_length, + worker_class=FragmentWorker) + sac = SAC(env_spec=env.spec, + policy=policy, + qf1=qf1, + qf2=qf2, + sampler=sampler, + gradient_steps_per_itr=100, + replay_buffer=replay_buffer, + min_buffer_size=1e3, + target_update_tau=5e-3, + discount=0.99, + buffer_batch_size=64, + reward_scale=1., + steps_per_epoch=2) + sac.has_lambda = lambda x: x + 1 + trainer.setup(sac, env) + set_gpu_mode(True) + sac.to() + trainer.setup(algo=sac, env=env) + trainer.train(n_epochs=10, batch_size=100) + trainer = Trainer(snapshot_config) + trainer.restore(temp_dir) + trainer.resume(n_epochs=20) diff --git a/tests/garage/torch/algos/test_torch_resume.py b/tests/garage/torch/algos/test_torch_resume.py deleted file mode 100644 index 43277ab758..0000000000 --- a/tests/garage/torch/algos/test_torch_resume.py +++ /dev/null @@ -1,252 +0,0 @@ -"""This script creates a test which fails when - saving/resuming a model is unsuccessful.""" - -import tempfile - -import numpy as np -import pytest -import torch -from torch.nn import functional as F - -from garage.envs import GymEnv, normalize -from garage.experiment import deterministic, SnapshotConfig -from garage.replay_buffer import PathBuffer -from garage.sampler import FragmentWorker, LocalSampler -from garage.torch import set_gpu_mode -from garage.torch.algos import SAC -from garage.torch.policies import TanhGaussianMLPPolicy -from garage.torch.q_functions import ContinuousMLPQFunction -from garage.trainer import Trainer - - -@pytest.mark.mujoco -def test_torch_cpu_resume_cpu(): - """Test saving on CPU and resuming on CPU.""" - temp_dir = tempfile.TemporaryDirectory() - snapshot_config = SnapshotConfig(snapshot_dir=temp_dir.name, - snapshot_mode='last', - snapshot_gap=1) - env = normalize(GymEnv('InvertedDoublePendulum-v2', - max_episode_length=100)) - deterministic.set_seed(0) - policy = TanhGaussianMLPPolicy( - env_spec=env.spec, - hidden_sizes=[32, 32], - hidden_nonlinearity=torch.nn.ReLU, - output_nonlinearity=None, - min_std=np.exp(-20.), - max_std=np.exp(2.), - ) - - qf1 = ContinuousMLPQFunction(env_spec=env.spec, - hidden_sizes=[32, 32], - hidden_nonlinearity=F.relu) - - qf2 = ContinuousMLPQFunction(env_spec=env.spec, - hidden_sizes=[32, 32], - hidden_nonlinearity=F.relu) - replay_buffer = PathBuffer(capacity_in_transitions=int(1e6), ) - trainer = Trainer(snapshot_config=snapshot_config) - sampler = LocalSampler(agents=policy, - envs=env, - max_episode_length=env.spec.max_episode_length, - worker_class=FragmentWorker) - sac = SAC(env_spec=env.spec, - policy=policy, - qf1=qf1, - qf2=qf2, - sampler=sampler, - gradient_steps_per_itr=100, - replay_buffer=replay_buffer, - min_buffer_size=1e3, - target_update_tau=5e-3, - discount=0.99, - buffer_batch_size=64, - reward_scale=1., - steps_per_epoch=2) - sac.has_lambda = lambda x: x + 1 - trainer.setup(sac, env) - set_gpu_mode(False) - sac.to() - trainer.setup(algo=sac, env=env) - trainer.train(n_epochs=10, batch_size=100) - trainer = Trainer(snapshot_config) - trainer.restore(temp_dir.name) - trainer.resume(n_epochs=20) - temp_dir.cleanup() - - -@pytest.mark.gpu -@pytest.mark.mujoco -def test_torch_cpu_resume_gpu(): - """Test saving on CPU and resuming on GPU.""" - temp_dir = tempfile.TemporaryDirectory() - snapshot_config = SnapshotConfig(snapshot_dir=temp_dir.name, - snapshot_mode='last', - snapshot_gap=1) - env = normalize(GymEnv('InvertedDoublePendulum-v2', - max_episode_length=100)) - deterministic.set_seed(0) - policy = TanhGaussianMLPPolicy( - env_spec=env.spec, - hidden_sizes=[32, 32], - hidden_nonlinearity=torch.nn.ReLU, - output_nonlinearity=None, - min_std=np.exp(-20.), - max_std=np.exp(2.), - ) - - qf1 = ContinuousMLPQFunction(env_spec=env.spec, - hidden_sizes=[32, 32], - hidden_nonlinearity=F.relu) - - qf2 = ContinuousMLPQFunction(env_spec=env.spec, - hidden_sizes=[32, 32], - hidden_nonlinearity=F.relu) - replay_buffer = PathBuffer(capacity_in_transitions=int(1e6), ) - trainer = Trainer(snapshot_config=snapshot_config) - sampler = LocalSampler(agents=policy, - envs=env, - max_episode_length=env.spec.max_episode_length, - worker_class=FragmentWorker) - sac = SAC(env_spec=env.spec, - policy=policy, - qf1=qf1, - qf2=qf2, - sampler=sampler, - gradient_steps_per_itr=100, - replay_buffer=replay_buffer, - min_buffer_size=1e3, - target_update_tau=5e-3, - discount=0.99, - buffer_batch_size=64, - reward_scale=1., - steps_per_epoch=2) - sac.has_lambda = lambda x: x + 1 - trainer.setup(sac, env) - set_gpu_mode(False) - sac.to() - trainer.setup(algo=sac, env=env) - trainer.train(n_epochs=10, batch_size=100) - trainer = Trainer(snapshot_config) - set_gpu_mode(True) - trainer.restore(temp_dir.name) - trainer.resume(n_epochs=20) - temp_dir.cleanup() - - -@pytest.mark.gpu -@pytest.mark.mujoco -def test_torch_gpu_resume_cpu(): - """Test saving on GPU and resuming on CPU.""" - temp_dir = tempfile.TemporaryDirectory() - snapshot_config = SnapshotConfig(snapshot_dir=temp_dir.name, - snapshot_mode='last', - snapshot_gap=1) - env = normalize(GymEnv('InvertedDoublePendulum-v2', - max_episode_length=100)) - deterministic.set_seed(0) - policy = TanhGaussianMLPPolicy( - env_spec=env.spec, - hidden_sizes=[32, 32], - hidden_nonlinearity=torch.nn.ReLU, - output_nonlinearity=None, - min_std=np.exp(-20.), - max_std=np.exp(2.), - ) - - qf1 = ContinuousMLPQFunction(env_spec=env.spec, - hidden_sizes=[32, 32], - hidden_nonlinearity=F.relu) - - qf2 = ContinuousMLPQFunction(env_spec=env.spec, - hidden_sizes=[32, 32], - hidden_nonlinearity=F.relu) - replay_buffer = PathBuffer(capacity_in_transitions=int(1e6), ) - trainer = Trainer(snapshot_config=snapshot_config) - sampler = LocalSampler(agents=policy, - envs=env, - max_episode_length=env.spec.max_episode_length, - worker_class=FragmentWorker) - sac = SAC(env_spec=env.spec, - policy=policy, - qf1=qf1, - qf2=qf2, - sampler=sampler, - gradient_steps_per_itr=100, - replay_buffer=replay_buffer, - min_buffer_size=1e3, - target_update_tau=5e-3, - discount=0.99, - buffer_batch_size=64, - reward_scale=1., - steps_per_epoch=2) - sac.has_lambda = lambda x: x + 1 - trainer.setup(sac, env) - set_gpu_mode(True) - sac.to() - trainer.setup(algo=sac, env=env) - trainer.train(n_epochs=10, batch_size=100) - set_gpu_mode(False) - trainer = Trainer(snapshot_config) - trainer.restore(temp_dir.name) - trainer.resume(n_epochs=20) - temp_dir.cleanup() - - -@pytest.mark.gpu -@pytest.mark.mujoco -def test_torch_gpu_resume_gpu(): - """Test saving on GPU and resuming on GPU.""" - temp_dir = tempfile.TemporaryDirectory() - snapshot_config = SnapshotConfig(snapshot_dir=temp_dir.name, - snapshot_mode='last', - snapshot_gap=1) - env = normalize(GymEnv('InvertedDoublePendulum-v2', - max_episode_length=100)) - deterministic.set_seed(0) - policy = TanhGaussianMLPPolicy( - env_spec=env.spec, - hidden_sizes=[32, 32], - hidden_nonlinearity=torch.nn.ReLU, - output_nonlinearity=None, - min_std=np.exp(-20.), - max_std=np.exp(2.), - ) - - qf1 = ContinuousMLPQFunction(env_spec=env.spec, - hidden_sizes=[32, 32], - hidden_nonlinearity=F.relu) - - qf2 = ContinuousMLPQFunction(env_spec=env.spec, - hidden_sizes=[32, 32], - hidden_nonlinearity=F.relu) - replay_buffer = PathBuffer(capacity_in_transitions=int(1e6), ) - trainer = Trainer(snapshot_config=snapshot_config) - sampler = LocalSampler(agents=policy, - envs=env, - max_episode_length=env.spec.max_episode_length, - worker_class=FragmentWorker) - sac = SAC(env_spec=env.spec, - policy=policy, - qf1=qf1, - qf2=qf2, - sampler=sampler, - gradient_steps_per_itr=100, - replay_buffer=replay_buffer, - min_buffer_size=1e3, - target_update_tau=5e-3, - discount=0.99, - buffer_batch_size=64, - reward_scale=1., - steps_per_epoch=2) - sac.has_lambda = lambda x: x + 1 - trainer.setup(sac, env) - set_gpu_mode(True) - sac.to() - trainer.setup(algo=sac, env=env) - trainer.train(n_epochs=10, batch_size=100) - trainer = Trainer(snapshot_config) - trainer.restore(temp_dir.name) - trainer.resume(n_epochs=20) - temp_dir.cleanup()