Skip to content

Commit

Permalink
Add tests for long running examples (#1079)
Browse files Browse the repository at this point in the history
These tests take batch_size or buffer_size as arg using click.
Command.main(standalone_mode=False) is used here to disable the
implicit sys.exit() at end of click command processing, since
that causes experiment subprocess to exit.

Ref: https://click.palletsprojects.com/en/7.x/exceptions/
  • Loading branch information
gitanshu authored and mergify[bot] committed Dec 6, 2019
1 parent 1966673 commit 1def654
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 18 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ ci-job-normal:
bash <(curl -s https://codecov.io/bash)

ci-job-large:
pytest --cov=garage -v -m large
pytest --cov=garage -v -m large --durations=0
coverage xml
bash <(curl -s https://codecov.io/bash)

Expand Down
42 changes: 35 additions & 7 deletions examples/tf/dqn_pong.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/usr/bin/env python3
"""
This is an example to train a task with DQN algorithm in pixel environment.
"""This is an example to train a task with DQN algorithm in pixel environment.
Here it creates a gym environment Pong, and trains a DQN with 1M steps.
"""
import click
import gym

from garage.envs.wrappers.clip_reward import ClipReward
Expand All @@ -24,8 +24,18 @@
from garage.tf.q_functions import DiscreteCNNQFunction


def run_task(snapshot_config, *_):
"""Run task."""
def run_task(snapshot_config, variant_data, *_):
"""Run task.
Args:
snapshot_config (garage.experiment.SnapshotConfig): The snapshot
configuration used by LocalRunner to create the snapshotter.
variant_data (dict): Custom arguments for the task.
*_ (object): Ignored by this function.
"""
with LocalTFRunner(snapshot_config=snapshot_config) as runner:
n_epochs = 100
n_epoch_cycles = 20
Expand All @@ -45,9 +55,10 @@ def run_task(snapshot_config, *_):

env = TfEnv(env)

replay_buffer = SimpleReplayBuffer(env_spec=env.spec,
size_in_transitions=int(5e4),
time_horizon=1)
replay_buffer = SimpleReplayBuffer(
env_spec=env.spec,
size_in_transitions=variant_data['buffer_size'],
time_horizon=1)

qf = DiscreteCNNQFunction(env_spec=env.spec,
filter_dims=(8, 4, 3),
Expand Down Expand Up @@ -83,10 +94,27 @@ def run_task(snapshot_config, *_):
batch_size=sampler_batch_size)


@click.command()
@click.option('--buffer_size', type=int, default=int(5e4))
def _args(buffer_size):
"""A click command to parse arguments for automated testing purposes.
Args:
buffer_size (int): Size of replay buffer.
Returns:
int: The input argument as-is.
"""
return buffer_size


replay_buffer_size = _args.main(standalone_mode=False)
run_experiment(
run_task,
n_parallel=1,
snapshot_mode='last',
seed=1,
plot=False,
variant={'buffer_size': replay_buffer_size},
)
37 changes: 32 additions & 5 deletions examples/tf/ppo_memorize_digits.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/usr/bin/env python3
"""
This is an example to train a task with PPO algorithm.
"""This is an example to train a task with PPO algorithm.
Here it runs MemorizeDigits-v0 environment with 1000 iterations.
"""
import click
import gym

from garage.envs import normalize
Expand All @@ -15,8 +15,18 @@
from garage.tf.policies import CategoricalCNNPolicy


def run_task(snapshot_config, *_):
"""Run task."""
def run_task(snapshot_config, variant_data, *_):
"""Run task.
Args:
snapshot_config (garage.experiment.SnapshotConfig): The snapshot
configuration used by LocalRunner to create the snapshotter.
variant_data (dict): Custom arguments for the task.
*_ (object): Ignored by this function.
"""
with LocalTFRunner(snapshot_config=snapshot_config) as runner:
env = TfEnv(normalize(gym.make('MemorizeDigits-v0')))
policy = CategoricalCNNPolicy(env_spec=env.spec,
Expand Down Expand Up @@ -44,11 +54,28 @@ def run_task(snapshot_config, *_):
flatten_input=False)

runner.setup(algo, env)
runner.train(n_epochs=1000, batch_size=2048)
runner.train(n_epochs=1000, batch_size=variant_data['batch_size'])


@click.command()
@click.option('--batch_size', '_batch_size', type=int, default=2048)
def _args(_batch_size):
"""A click command to parse arguments for automated testing purposes.
Args:
_batch_size (int): Number of environment steps in one batch.
Returns:
int: The input argument as-is.
"""
return _batch_size


batch_size = _args.main(standalone_mode=False)
run_experiment(
run_task,
snapshot_mode='last',
seed=1,
variant={'batch_size': batch_size},
)
37 changes: 32 additions & 5 deletions examples/tf/trpo_cubecrash.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/usr/bin/env python3
"""
This is an example to train a task with TRPO algorithm.
"""This is an example to train a task with TRPO algorithm.
Here it runs CubeCrash-v0 environment with 100 iterations.
"""
import click
import gym

from garage.envs import normalize
Expand All @@ -15,8 +15,18 @@
from garage.tf.policies import CategoricalCNNPolicy


def run_task(snapshot_config, *_):
"""Run task."""
def run_task(snapshot_config, variant_data, *_):
"""Run task.
Args:
snapshot_config (garage.experiment.SnapshotConfig): The snapshot
configuration used by LocalRunner to create the snapshotter.
variant_data (dict): Custom arguments for the task.
*_ (object): Ignored by this function.
"""
with LocalTFRunner(snapshot_config=snapshot_config) as runner:
env = TfEnv(normalize(gym.make('CubeCrash-v0')))
policy = CategoricalCNNPolicy(env_spec=env.spec,
Expand Down Expand Up @@ -44,11 +54,28 @@ def run_task(snapshot_config, *_):
flatten_input=False)

runner.setup(algo, env)
runner.train(n_epochs=100, batch_size=4000)
runner.train(n_epochs=100, batch_size=variant_data['batch_size'])


@click.command()
@click.option('--batch_size', '_batch_size', type=int, default=4000)
def _args(_batch_size):
"""A click command to parse arguments for automated testing purposes.
Args:
_batch_size (int): Number of environment steps in one batch.
Returns:
int: The input argument as-is.
"""
return _batch_size


batch_size = _args.main(standalone_mode=False)
run_experiment(
run_task,
snapshot_mode='last',
seed=1,
variant={'batch_size': batch_size},
)
43 changes: 43 additions & 0 deletions tests/integration_tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,49 @@ def test_algo_examples(filepath):
assert subprocess.run([filepath], check=False, env=env).returncode == 0


@pytest.mark.no_cover
@pytest.mark.timeout(180)
def test_dqn_pong():
"""Test tf/dqn_pong.py with reduced replay buffer size for reduced memory
consumption.
"""
env = os.environ.copy()
env['GARAGE_EXAMPLE_TEST_N_EPOCHS'] = '1'
assert subprocess.run(
[str(EXAMPLES_ROOT_DIR / 'tf/dqn_pong.py'), '--buffer_size', '5'],
check=False,
env=env).returncode == 0


@pytest.mark.no_cover
@pytest.mark.timeout(30)
def test_ppo_memorize_digits():
"""Test tf/ppo_memorize_digits.py with reduced batch size for reduced
memory consumption.
"""
env = os.environ.copy()
env['GARAGE_EXAMPLE_TEST_N_EPOCHS'] = '1'
command = [
str(EXAMPLES_ROOT_DIR / 'tf/ppo_memorize_digits.py'), '--batch_size',
'4'
]
assert subprocess.run(command, check=False, env=env).returncode == 0


@pytest.mark.no_cover
@pytest.mark.timeout(40)
def test_trpo_cubecrash():
"""Test tf/trpo_cubecrash.py with reduced batch size for reduced memory
consumption.
"""
env = os.environ.copy()
env['GARAGE_EXAMPLE_TEST_N_EPOCHS'] = '1'
assert subprocess.run(
[str(EXAMPLES_ROOT_DIR / 'tf/trpo_cubecrash.py'), '--batch_size', '4'],
check=False,
env=env).returncode == 0


@pytest.mark.no_cover
@pytest.mark.timeout(10)
def test_step_env():
Expand Down

0 comments on commit 1def654

Please sign in to comment.