Add tests for long running examples (#1079)

These tests take batch_size or buffer_size as arg using click. Command.main(standalone_mode=False) is used here to disable the implicit sys.exit() at end of click command processing, since that causes experiment subprocess to exit. Ref: https://click.palletsprojects.com/en/7.x/exceptions/
rlworkgroup · Dec 6, 2019 · 1def654 · 1def654
1 parent 1966673
commit 1def654
Show file tree

Hide file tree

Showing 5 changed files with 143 additions and 18 deletions.
diff --git a/Makefile b/Makefile
@@ -31,7 +31,7 @@ ci-job-normal:
 	bash <(curl -s https://codecov.io/bash)
 
 ci-job-large:
-	pytest --cov=garage -v -m large
+	pytest --cov=garage -v -m large --durations=0
 	coverage xml
 	bash <(curl -s https://codecov.io/bash)
 

diff --git a/examples/tf/dqn_pong.py b/examples/tf/dqn_pong.py
@@ -1,9 +1,9 @@
 #!/usr/bin/env python3
-"""
-This is an example to train a task with DQN algorithm in pixel environment.
+"""This is an example to train a task with DQN algorithm in pixel environment.
 
 Here it creates a gym environment Pong, and trains a DQN with 1M steps.
 """
+import click
 import gym
 
 from garage.envs.wrappers.clip_reward import ClipReward
@@ -24,8 +24,18 @@
 from garage.tf.q_functions import DiscreteCNNQFunction
 
 
-def run_task(snapshot_config, *_):
-    """Run task."""
+def run_task(snapshot_config, variant_data, *_):
+    """Run task.
+
+    Args:
+        snapshot_config (garage.experiment.SnapshotConfig): The snapshot
+            configuration used by LocalRunner to create the snapshotter.
+
+        variant_data (dict): Custom arguments for the task.
+
+        *_ (object): Ignored by this function.
+
+    """
     with LocalTFRunner(snapshot_config=snapshot_config) as runner:
         n_epochs = 100
         n_epoch_cycles = 20
@@ -45,9 +55,10 @@ def run_task(snapshot_config, *_):
 
         env = TfEnv(env)
 
-        replay_buffer = SimpleReplayBuffer(env_spec=env.spec,
-                                           size_in_transitions=int(5e4),
-                                           time_horizon=1)
+        replay_buffer = SimpleReplayBuffer(
+            env_spec=env.spec,
+            size_in_transitions=variant_data['buffer_size'],
+            time_horizon=1)
 
         qf = DiscreteCNNQFunction(env_spec=env.spec,
                                   filter_dims=(8, 4, 3),
@@ -83,10 +94,27 @@ def run_task(snapshot_config, *_):
                      batch_size=sampler_batch_size)
 
 
+@click.command()
+@click.option('--buffer_size', type=int, default=int(5e4))
+def _args(buffer_size):
+    """A click command to parse arguments for automated testing purposes.
+
+    Args:
+        buffer_size (int): Size of replay buffer.
+
+    Returns:
+        int: The input argument as-is.
+
+    """
+    return buffer_size
+
+
+replay_buffer_size = _args.main(standalone_mode=False)
 run_experiment(
     run_task,
     n_parallel=1,
     snapshot_mode='last',
     seed=1,
     plot=False,
+    variant={'buffer_size': replay_buffer_size},
 )
diff --git a/examples/tf/ppo_memorize_digits.py b/examples/tf/ppo_memorize_digits.py
@@ -1,9 +1,9 @@
 #!/usr/bin/env python3
-"""
-This is an example to train a task with PPO algorithm.
+"""This is an example to train a task with PPO algorithm.
 
 Here it runs MemorizeDigits-v0 environment with 1000 iterations.
 """
+import click
 import gym
 
 from garage.envs import normalize
@@ -15,8 +15,18 @@
 from garage.tf.policies import CategoricalCNNPolicy
 
 
-def run_task(snapshot_config, *_):
-    """Run task."""
+def run_task(snapshot_config, variant_data, *_):
+    """Run task.
+
+    Args:
+        snapshot_config (garage.experiment.SnapshotConfig): The snapshot
+            configuration used by LocalRunner to create the snapshotter.
+
+        variant_data (dict): Custom arguments for the task.
+
+        *_ (object): Ignored by this function.
+
+    """
     with LocalTFRunner(snapshot_config=snapshot_config) as runner:
         env = TfEnv(normalize(gym.make('MemorizeDigits-v0')))
         policy = CategoricalCNNPolicy(env_spec=env.spec,
@@ -44,11 +54,28 @@ def run_task(snapshot_config, *_):
                    flatten_input=False)
 
         runner.setup(algo, env)
-        runner.train(n_epochs=1000, batch_size=2048)
+        runner.train(n_epochs=1000, batch_size=variant_data['batch_size'])
+
+
+@click.command()
+@click.option('--batch_size', '_batch_size', type=int, default=2048)
+def _args(_batch_size):
+    """A click command to parse arguments for automated testing purposes.
+
+    Args:
+        _batch_size (int): Number of environment steps in one batch.
+
+    Returns:
+        int: The input argument as-is.
+
+    """
+    return _batch_size
 
 
+batch_size = _args.main(standalone_mode=False)
 run_experiment(
     run_task,
     snapshot_mode='last',
     seed=1,
+    variant={'batch_size': batch_size},
 )
diff --git a/examples/tf/trpo_cubecrash.py b/examples/tf/trpo_cubecrash.py
@@ -1,9 +1,9 @@
 #!/usr/bin/env python3
-"""
-This is an example to train a task with TRPO algorithm.
+"""This is an example to train a task with TRPO algorithm.
 
 Here it runs CubeCrash-v0 environment with 100 iterations.
 """
+import click
 import gym
 
 from garage.envs import normalize
@@ -15,8 +15,18 @@
 from garage.tf.policies import CategoricalCNNPolicy
 
 
-def run_task(snapshot_config, *_):
-    """Run task."""
+def run_task(snapshot_config, variant_data, *_):
+    """Run task.
+
+    Args:
+        snapshot_config (garage.experiment.SnapshotConfig): The snapshot
+            configuration used by LocalRunner to create the snapshotter.
+
+        variant_data (dict): Custom arguments for the task.
+
+        *_ (object): Ignored by this function.
+
+    """
     with LocalTFRunner(snapshot_config=snapshot_config) as runner:
         env = TfEnv(normalize(gym.make('CubeCrash-v0')))
         policy = CategoricalCNNPolicy(env_spec=env.spec,
@@ -44,11 +54,28 @@ def run_task(snapshot_config, *_):
                     flatten_input=False)
 
         runner.setup(algo, env)
-        runner.train(n_epochs=100, batch_size=4000)
+        runner.train(n_epochs=100, batch_size=variant_data['batch_size'])
+
+
+@click.command()
+@click.option('--batch_size', '_batch_size', type=int, default=4000)
+def _args(_batch_size):
+    """A click command to parse arguments for automated testing purposes.
+
+    Args:
+        _batch_size (int): Number of environment steps in one batch.
+
+    Returns:
+        int: The input argument as-is.
+
+    """
+    return _batch_size
 
 
+batch_size = _args.main(standalone_mode=False)
 run_experiment(
     run_task,
     snapshot_mode='last',
     seed=1,
+    variant={'batch_size': batch_size},
 )
diff --git a/tests/integration_tests/test_examples.py b/tests/integration_tests/test_examples.py
@@ -54,6 +54,49 @@ def test_algo_examples(filepath):
     assert subprocess.run([filepath], check=False, env=env).returncode == 0
 
 
+@pytest.mark.no_cover
+@pytest.mark.timeout(180)
+def test_dqn_pong():
+    """Test tf/dqn_pong.py with reduced replay buffer size for reduced memory
+    consumption.
+    """
+    env = os.environ.copy()
+    env['GARAGE_EXAMPLE_TEST_N_EPOCHS'] = '1'
+    assert subprocess.run(
+        [str(EXAMPLES_ROOT_DIR / 'tf/dqn_pong.py'), '--buffer_size', '5'],
+        check=False,
+        env=env).returncode == 0
+
+
+@pytest.mark.no_cover
+@pytest.mark.timeout(30)
+def test_ppo_memorize_digits():
+    """Test tf/ppo_memorize_digits.py with reduced batch size for reduced
+    memory consumption.
+    """
+    env = os.environ.copy()
+    env['GARAGE_EXAMPLE_TEST_N_EPOCHS'] = '1'
+    command = [
+        str(EXAMPLES_ROOT_DIR / 'tf/ppo_memorize_digits.py'), '--batch_size',
+        '4'
+    ]
+    assert subprocess.run(command, check=False, env=env).returncode == 0
+
+
+@pytest.mark.no_cover
+@pytest.mark.timeout(40)
+def test_trpo_cubecrash():
+    """Test tf/trpo_cubecrash.py with reduced batch size for reduced memory
+    consumption.
+    """
+    env = os.environ.copy()
+    env['GARAGE_EXAMPLE_TEST_N_EPOCHS'] = '1'
+    assert subprocess.run(
+        [str(EXAMPLES_ROOT_DIR / 'tf/trpo_cubecrash.py'), '--batch_size', '4'],
+        check=False,
+        env=env).returncode == 0
+
+
 @pytest.mark.no_cover
 @pytest.mark.timeout(10)
 def test_step_env():