diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 10e487a46..256a7598f 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -61,7 +61,7 @@ body: required: true - label: I have read the [SB3 documentation](https://stable-baselines3.readthedocs.io/en/master/) required: true - - label: I have read the [RL Zoo README](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/README.md) + - label: I have read the [RL Zoo documentation](https://rl-baselines3-zoo.readthedocs.io) required: true - label: I have provided a minimal working example to reproduce the bug required: true diff --git a/.github/ISSUE_TEMPLATE/question.yml b/.github/ISSUE_TEMPLATE/question.yml index a519436a7..4da584fec 100644 --- a/.github/ISSUE_TEMPLATE/question.yml +++ b/.github/ISSUE_TEMPLATE/question.yml @@ -24,7 +24,7 @@ body: required: true - label: I have read the [SB3 documentation](https://stable-baselines3.readthedocs.io/en/master/) required: true - - label: I have read the [RL Zoo README](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/README.md) + - label: I have read the [RL Zoo documentation](https://rl-baselines3-zoo.readthedocs.io) required: true - label: If code there is, it is minimal and working required: true diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 622bba139..0afdeba88 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,6 +31,13 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip + + # Install Atari Roms + pip install autorom + wget https://gist.githubusercontent.com/jjshoots/61b22aefce4456920ba99f2c36906eda/raw/00046ac3403768bfe45857610a3d333b8e35e026/Roms.tar.gz.b64 + base64 Roms.tar.gz.b64 --decode &> Roms.tar.gz + AutoROM --accept-license --source-file Roms.tar.gz + pip install setuptools==65.5.0 # cpu version of pytorch - faster to download pip install torch==1.11+cpu -f https://download.pytorch.org/whl/torch_stable.html @@ -41,17 +48,20 @@ jobs: # install parking-env to test HER (pinned so it works with gym 0.21) pip install highway-env==1.5.0 pip install -e . - - name: Type check + - name: Lint with ruff run: | - make type - # skip mypy type check for python3.7 (last forever for some reason) - if: "!(matrix.python-version == '3.7')" + make lint - name: Check codestyle run: | make check-codestyle - - name: Lint with flake8 + - name: Build doc run: | - make lint + make doc + - name: Type check + run: | + make type + # skip mypy type check for python3.7 (last forever for some reason) + if: "!(matrix.python-version == '3.7')" - name: Test with pytest run: | make pytest diff --git a/.github/workflows/trained_agents.yml b/.github/workflows/trained_agents.yml index 3e2d6d27b..cf45a9fb7 100644 --- a/.github/workflows/trained_agents.yml +++ b/.github/workflows/trained_agents.yml @@ -14,6 +14,7 @@ jobs: env: TERM: xterm-256color FORCE_COLOR: 1 + # Skip CI if [ci skip] in the commit message if: "! contains(toJSON(github.event.commits.*.message), '[ci skip]')" runs-on: ubuntu-latest @@ -31,6 +32,14 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip + + # Install Atari Roms + pip install autorom + wget https://gist.githubusercontent.com/jjshoots/61b22aefce4456920ba99f2c36906eda/raw/00046ac3403768bfe45857610a3d333b8e35e026/Roms.tar.gz.b64 + base64 Roms.tar.gz.b64 --decode &> Roms.tar.gz + AutoROM --accept-license --source-file Roms.tar.gz + + pip install setuptools==65.5.0 # cpu version of pytorch - faster to download pip install torch==1.11+cpu -f https://download.pytorch.org/whl/torch_stable.html diff --git a/.gitignore b/.gitignore index 87c26d1a2..b428b9386 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,8 @@ runs hub *.mp4 *.json +_build/ + # Setuptools distribution and build folders. /dist/ diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 000000000..6753d8744 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,16 @@ +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Optionally build your docs in additional formats such as PDF and ePub +formats: all + +# Set requirements using conda env +conda: + environment: docs/conda_env.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 3596cdce6..ded42f17d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,11 @@ -## Release 1.8.0a2 (WIP) +## Release 1.8.0a9 (WIP) ### Breaking Changes - Upgraded to SB3 >= 1.8.0 ### New Features - Tuned hyperparameters for RecurrentPPO on Swimmer +- Documentation is now built using Sphinx and hosted on read the doc ### Bug fixes - Set ``highway-env`` version to 1.5 and ``setuptools to`` v65.5 for the CI @@ -18,6 +19,7 @@ - Added support for `ruff` (fast alternative to flake8) in the Makefile - Removed Gitlab CI file - Replaced deprecated `optuna.suggest_loguniform(...)` by `optuna.suggest_float(..., log=True)` +- Switched to `ruff` and `pyproject.toml` ## Release 1.7.0 (2023-01-10) diff --git a/Makefile b/Makefile index cabb0ff0b..e87e65efb 100644 --- a/Makefile +++ b/Makefile @@ -18,32 +18,34 @@ type: pytype mypy lint: # stop the build if there are Python syntax errors or undefined names - # see https://lintlyci.github.io/Flake8Rules/ - flake8 ${LINT_PATHS} --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. - flake8 ${LINT_PATHS} --count --exit-zero --statistics - -ruff: - # stop the build if there are Python syntax errors or undefined names - # see https://lintlyci.github.io/Flake8Rules/ + # see https://www.flake8rules.com/ ruff ${LINT_PATHS} --select=E9,F63,F7,F82 --show-source # exit-zero treats all errors as warnings. - ruff ${LINT_PATHS} --exit-zero --line-length 127 + ruff ${LINT_PATHS} --exit-zero format: # Sort imports isort ${LINT_PATHS} # Reformat using black - black -l 127 ${LINT_PATHS} + black ${LINT_PATHS} check-codestyle: # Sort imports isort --check ${LINT_PATHS} # Reformat using black - black --check -l 127 ${LINT_PATHS} + black --check ${LINT_PATHS} commit-checks: format type lint +doc: + cd docs && make html + +spelling: + cd docs && make spelling + +clean: + cd docs && make clean + docker: docker-cpu docker-gpu docker-cpu: @@ -66,4 +68,4 @@ test-release: python setup.py bdist_wheel twine upload --repository-url https://test.pypi.org/legacy/ dist/* -.PHONY: lint format check-codestyle commit-checks doc spelling docker type pytest +.PHONY: lint format check-codestyle commit-checks doc spelling docker type pytest diff --git a/README.md b/README.md index cf672ba9a..600c2e54d 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ ![CI](https://github.com/DLR-RM/rl-baselines3-zoo/workflows/CI/badge.svg) +[![Documentation Status](https://readthedocs.org/projects/rl-baselines3-zoo/badge/?version=master)](https://rl-baselines3-zoo.readthedocs.io/en/master/?badge=master) [![coverage report](https://gitlab.com/araffin/rl-baselines3-zoo/badges/master/coverage.svg)](https://gitlab.com/araffin/rl-baselines3-zoo/-/commits/master) [![codestyle](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) @@ -26,6 +27,10 @@ Goals of this repository: This is the SB3 version of the original SB2 [rl-zoo](https://github.com/araffin/rl-baselines-zoo). +## Documentation + +Documentation is available online: [https://rl-baselines3-zoo.readthedocs.io/](https://rl-baselines3-zoo.readthedocs.io) + ## Installation ### Minimal installation @@ -59,99 +64,22 @@ If the environment exists in this file, then you can train an agent using: ``` python train.py --algo algo_name --env env_id ``` -You can use `-P` (`--progress`) option to display a progress bar. - -Using a custom config file when it is a yaml file with a which contains a `env_id` entry: -``` -python train.py --algo algo_name --env env_id --conf-file my_yaml.yml -``` - -You can also use a python file that contains a dictionary called `hyperparams` with an entry for each `env_id`. -(see `hyperparams/python/ppo_config_example.py` for an example) -```bash -# You can pass a path to a python file -python train.py --algo ppo --env MountainCarContinuous-v0 --conf-file hyperparams/python/ppo_config_example.py -# Or pass a path to a file from a module (for instance my_package.my_file) -python train.py --algo ppo --env MountainCarContinuous-v0 --conf-file hyperparams.python.ppo_config_example -``` -The advantage of this approach is that you can specify arbitrary python dictionaries -and ensure that all their dependencies are imported in the config file itself. - - -For example (with tensorboard support): -``` -python train.py --algo ppo --env CartPole-v1 --tensorboard-log /tmp/stable-baselines/ -``` Evaluate the agent every 10000 steps using 10 episodes for evaluation (using only one evaluation env): ``` python train.py --algo sac --env HalfCheetahBulletEnv-v0 --eval-freq 10000 --eval-episodes 10 --n-eval-envs 1 ``` -Save a checkpoint of the agent every 100000 steps: -``` -python train.py --algo td3 --env HalfCheetahBulletEnv-v0 --save-freq 100000 -``` +More examples are available in the [documentation](https://rl-baselines3-zoo.readthedocs.io). -Continue training (here, load pretrained agent for Breakout and continue training for 5000 steps): -``` -python train.py --algo a2c --env BreakoutNoFrameskip-v4 -i rl-trained-agents/a2c/BreakoutNoFrameskip-v4_1/BreakoutNoFrameskip-v4.zip -n 5000 -``` -When using off-policy algorithms, you can also save the replay buffer after training: -``` -python train.py --algo sac --env Pendulum-v1 --save-replay-buffer -``` -It will be automatically loaded if present when continuing training. +## Integrations -## Plot Scripts +The RL Zoo has some integration with other libraries/services like Weights & Biases for experiment tracking or Hugging Face for storing/sharing trained models. You can find out more in the [dedicated section](https://rl-baselines3-zoo.readthedocs.io/en/master/guide/integrations.html) of the documentation. -Plot scripts (to be documented, see "Results" sections in SB3 documentation): -- `scripts/all_plots.py`/`scripts/plot_from_file.py` for plotting evaluations -- `scripts/plot_train.py` for plotting training reward/success - -*Examples (on the current collection)* - -Plot training success (y-axis) w.r.t. timesteps (x-axis) with a moving window of 500 episodes for all the `Fetch` environment with `HER` algorithm: - -``` -python scripts/plot_train.py -a her -e Fetch -y success -f rl-trained-agents/ -w 500 -x steps -``` - -Plot evaluation reward curve for TQC, SAC and TD3 on the HalfCheetah and Ant PyBullet environments: - -``` -python3 scripts/all_plots.py -a sac td3 tqc --env HalfCheetahBullet AntBullet -f rl-trained-agents/ -``` - -## Plot with the rliable library - -The RL zoo integrates some of [rliable](https://agarwl.github.io/rliable/) library features. -You can find a visual explanation of the tools used by rliable in this [blog post](https://araffin.github.io/post/rliable/). - -First, you need to install [rliable](https://github.com/google-research/rliable). - -Note: Python 3.7+ is required in that case. - -Then export your results to a file using the `all_plots.py` script (see above): -``` -python scripts/all_plots.py -a sac td3 tqc --env Half Ant -f logs/ -o logs/offpolicy -``` - -You can now use the `plot_from_file.py` script with `--rliable`, `--versus` and `--iqm` arguments: -``` -python scripts/plot_from_file.py -i logs/offpolicy.pkl --skip-timesteps --rliable --versus -l SAC TD3 TQC -``` - -Note: you may need to edit `plot_from_file.py`, in particular the `env_key_to_env_id` dictionary -and the `scripts/score_normalization.py` which stores min and max score for each environment. - -Remark: plotting with the `--rliable` option is usually slow as confidence interval need to be computed using bootstrap sampling. - - -## Custom Environment +## Plot Scripts -The easiest way to add support for a custom environment is to edit `rl_zoo3/import_envs.py` and register your environment here. Then, you need to add a section for it in the hyperparameters file (`hyperparams/algo.yml` or a custom yaml file that you can specify using `--conf-file` argument). +Please the see [dedicated section](https://rl-baselines3-zoo.readthedocs.io/en/master/guide/plot.html) of the documentation. ## Enjoy a Trained Agent @@ -168,237 +96,13 @@ For example, enjoy A2C on Breakout during 5000 timesteps: python enjoy.py --algo a2c --env BreakoutNoFrameskip-v4 --folder rl-trained-agents/ -n 5000 ``` -If you have trained an agent yourself, you need to do: -``` -# exp-id 0 corresponds to the last experiment, otherwise, you can specify another ID -python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 0 -``` - -To load the best model (when using evaluation environment): -``` -python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-best -``` - -To load a checkpoint (here the checkpoint name is `rl_model_10000_steps.zip`): -``` -python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-checkpoint 10000 -``` - -To load the latest checkpoint: -``` -python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-last-checkpoint -``` - -## Huggingface Hub Integration - -Upload model to hub (same syntax as for `enjoy.py`): -``` -python -m rl_zoo3.push_to_hub --algo ppo --env CartPole-v1 -f logs/ -orga sb3 -m "Initial commit" -``` -you can choose custom `repo-name` (default: `{algo}-{env_id}`) by passing a `--repo-name` argument. - -Download model from hub: -``` -python -m rl_zoo3.load_from_hub --algo ppo --env CartPole-v1 -f logs/ -orga sb3 -``` - -## Hyperparameter yaml syntax - -The syntax used in `hyperparameters/algo_name.yml` for setting hyperparameters (likewise the syntax to [overwrite hyperparameters](https://github.com/DLR-RM/rl-baselines3-zoo#overwrite-hyperparameters) on the cli) may be specialized if the argument is a function. See examples in the `hyperparameters/` directory. For example: - -- Specify a linear schedule for the learning rate: - -```yaml - learning_rate: lin_0.012486195510232303 -``` - -Specify a different activation function for the network: - -```yaml - policy_kwargs: "dict(activation_fn=nn.ReLU)" -``` - -For a custom policy: - -```yaml - policy: my_package.MyCustomPolicy # for instance stable_baselines3.ppo.MlpPolicy -``` - - -## Hyperparameter Tuning - -We use [Optuna](https://optuna.org/) for optimizing the hyperparameters. -Not all hyperparameters are tuned, and tuning enforces certain default hyperparameter settings that may be different from the official defaults. See [rl_zoo3/hyperparams_opt.py](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py) for the current settings for each agent. - -Hyperparameters not specified in [rl_zoo3/hyperparams_opt.py](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py) are taken from the associated YAML file and fallback to the default values of SB3 if not present. - -Note: when using SuccessiveHalvingPruner ("halving"), you must specify `--n-jobs > 1` - -Budget of 1000 trials with a maximum of 50000 steps: - -``` -python train.py --algo ppo --env MountainCar-v0 -n 50000 -optimize --n-trials 1000 --n-jobs 2 \ - --sampler tpe --pruner median -``` - -Distributed optimization using a shared database is also possible (see the corresponding [Optuna documentation](https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/004_distributed.html)): -``` -python train.py --algo ppo --env MountainCar-v0 -optimize --study-name test --storage sqlite:///example.db -``` - -Print and save best hyperparameters of an Optuna study: -``` -python scripts/parse_study.py -i path/to/study.pkl --print-n-best-trials 10 --save-n-best-hyperparameters 10 -``` - -The default budget for hyperparameter tuning is 500 trials and there is one intermediate evaluation for pruning/early stopping per 100k time steps. - -### Hyperparameters search space - -Note that the default hyperparameters used in the zoo when tuning are not always the same as the defaults provided in [stable-baselines3](https://stable-baselines3.readthedocs.io/en/master/modules/base.html). Consult the latest source code to be sure of these settings. For example: - -- PPO tuning assumes a network architecture with `ortho_init = False` when tuning, though it is `True` by [default](https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html#ppo-policies). You can change that by updating [rl_zoo3/hyperparams_opt.py](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py). - -- Non-episodic rollout in TD3 and DDPG assumes `gradient_steps = train_freq` and so tunes only `train_freq` to reduce the search space. - -When working with continuous actions, we recommend to enable [gSDE](https://arxiv.org/abs/2005.05719) by uncommenting lines in [rl_zoo3/hyperparams_opt.py](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py). - - -## Experiment tracking - -We support tracking experiment data such as learning curves and hyperparameters via [Weights and Biases](https://wandb.ai). - -The following command -``` -python train.py --algo ppo --env CartPole-v1 --track --wandb-project-name sb3 -``` - -yields a tracked experiment at this [URL](https://wandb.ai/openrlbenchmark/sb3/runs/1b65ldmh). - -To add a tag to the run, (e.g. `optimized`), use the argument `--wandb-tags optimized`. - -## Env normalization +## Hyperparameters Tuning -In the hyperparameter file, `normalize: True` means that the training environment will be wrapped in a [VecNormalize](https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/vec_env/vec_normalize.py#L13) wrapper. - -[Normalization uses](https://github.com/DLR-RM/rl-baselines3-zoo/issues/64) the default parameters of `VecNormalize`, with the exception of `gamma` which is set to match that of the agent. This can be [overridden](https://github.com/DLR-RM/rl-baselines3-zoo/blob/v0.10.0/hyperparams/sac.yml#L239) using the appropriate `hyperparameters/algo_name.yml`, e.g. - -```yaml - normalize: "{'norm_obs': True, 'norm_reward': False}" -``` - - -## Env Wrappers - -You can specify in the hyperparameter config one or more wrapper to use around the environment: - -for one wrapper: -```yaml -env_wrapper: gym_minigrid.wrappers.FlatObsWrapper -``` - -for multiple, specify a list: - -```yaml -env_wrapper: - - rl_zoo3.wrappers.DoneOnSuccessWrapper: - reward_offset: 1.0 - - sb3_contrib.common.wrappers.TimeFeatureWrapper -``` - -Note that you can easily specify parameters too. - -By default, the environment is wrapped with a `Monitor` wrapper to record episode statistics. -You can specify arguments to it using `monitor_kwargs` parameter to log additional data. -That data *must* be present in the info dictionary at the last step of each episode. - -For instance, for recording success with goal envs (e.g. `FetchReach-v1`): - -```yaml -monitor_kwargs: dict(info_keywords=('is_success',)) -``` - -or recording final x position with `Ant-v3`: -```yaml -monitor_kwargs: dict(info_keywords=('x_position',)) -``` - -Note: for known `GoalEnv` like `FetchReach`, `info_keywords=('is_success',)` is actually the default. - -## VecEnvWrapper - -You can specify which `VecEnvWrapper` to use in the config, the same way as for env wrappers (see above), using the `vec_env_wrapper` key: - -For instance: -```yaml -vec_env_wrapper: stable_baselines3.common.vec_env.VecMonitor -``` - -Note: `VecNormalize` is supported separately using `normalize` keyword, and `VecFrameStack` has a dedicated keyword `frame_stack`. - -## Callbacks - -Following the same syntax as env wrappers, you can also add custom callbacks to use during training. - -```yaml -callback: - - rl_zoo3.callbacks.ParallelTrainCallback: - gradient_steps: 256 -``` - -## Env keyword arguments - -You can specify keyword arguments to pass to the env constructor in the command line, using `--env-kwargs`: - -``` -python enjoy.py --algo ppo --env MountainCar-v0 --env-kwargs goal_velocity:10 -``` +Please the see [dedicated section](https://rl-baselines3-zoo.readthedocs.io/en/master/guide/tuning.html) of the documentation. -## Overwrite hyperparameters +## Custom Configuration -You can easily overwrite hyperparameters in the command line, using ``--hyperparams``: - -``` -python train.py --algo a2c --env MountainCarContinuous-v0 --hyperparams learning_rate:0.001 policy_kwargs:"dict(net_arch=[64, 64])" -``` - -Note: if you want to pass a string, you need to escape it like that: `my_string:"'value'"` - -## Record a Video of a Trained Agent - -Record 1000 steps with the latest saved model: - -``` -python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 -``` - -Use the best saved model instead: - -``` -python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 --load-best -``` - -Record a video of a checkpoint saved during training (here the checkpoint name is `rl_model_10000_steps.zip`): - -``` -python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 --load-checkpoint 10000 -``` - -## Record a Video of a Training Experiment - -Apart from recording videos of specific saved models, it is also possible to record a video of a training experiment where checkpoints have been saved. - -Record 1000 steps for each checkpoint, latest and best saved models: - -``` -python -m rl_zoo3.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic -``` - -The previous command will create a `mp4` file. To convert this file to `gif` format as well: - -``` -python -m rl_zoo3.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic --gif -``` +Please the see [dedicated section](https://rl-baselines3-zoo.readthedocs.io/en/master/guide/config.html) of the documentation. ## Current Collection: 195+ Trained Agents! @@ -577,34 +281,6 @@ train() ``` -### Docker Images - -Build docker image (CPU): -``` -make docker-cpu -``` - -GPU: -``` -USE_GPU=True make docker-gpu -``` - -Pull built docker image (CPU): -``` -docker pull stablebaselines/rl-baselines3-zoo-cpu -``` - -GPU image: -``` -docker pull stablebaselines/rl-baselines3-zoo -``` - -Run script in the docker image: - -``` -./scripts/run_docker_cpu.sh python train.py --algo ppo --env CartPole-v1 -``` - ## Tests To run tests, first install pytest, then: @@ -639,4 +315,4 @@ If you trained an agent that is not present in the RL Zoo, please submit a Pull ## Contributors -We would like to thank our contributors: [@iandanforth](https://github.com/iandanforth), [@tatsubori](https://github.com/tatsubori) [@Shade5](https://github.com/Shade5) [@mcres](https://github.com/mcres), [@ernestum](https://github.com/ernestum) +We would like to thank our contributors: [@iandanforth](https://github.com/iandanforth), [@tatsubori](https://github.com/tatsubori) [@Shade5](https://github.com/Shade5) [@mcres](https://github.com/mcres), [@ernestum](https://github.com/ernestum), [@qgallouedec](https://github.com/qgallouedec) diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 000000000..938bf87e1 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,21 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +# For debug: SPHINXOPTS = -nWT --keep-going -vvv +SPHINXOPTS = # -W # make warnings fatal (disabled because of gym in the wrappers) +SPHINXBUILD = sphinx-build +SPHINXPROJ = RLZoo +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 000000000..c007b52b2 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,26 @@ +## RL Zoo3 Documentation + +This folder contains documentation for the RL Zoo. + + +### Build the Documentation + +#### Install Sphinx and Theme +Execute this command in the project root: +``` +pip install stable_baselines3[docs] +pip install -e . +``` + +#### Building the Docs + +In the `docs/` folder: +``` +make html +``` + +if you want to building each time a file is changed: + +``` +sphinx-autobuild . _build/html +``` diff --git a/docs/_static/css/baselines_theme.css b/docs/_static/css/baselines_theme.css new file mode 100644 index 000000000..450864efe --- /dev/null +++ b/docs/_static/css/baselines_theme.css @@ -0,0 +1,61 @@ +/* Main colors adapted from pytorch doc */ +:root{ + --main-bg-color: #343A40; + --link-color: #FD7E14; +} + +/* Header fonts y */ +h1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend, p.caption { + font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif; +} + + +/* Docs background */ +.wy-side-nav-search{ + background-color: var(--main-bg-color); +} + +/* Mobile version */ +.wy-nav-top{ + background-color: var(--main-bg-color); +} + +/* Change link colors (except for the menu) */ +a { + color: var(--link-color); +} + +a:hover { + color: #4F778F; +} + +.wy-menu a { + color: #b3b3b3; +} + +.wy-menu a:hover { + color: #b3b3b3; +} + +a.icon.icon-home { + color: #b3b3b3; +} + +.version{ + color: var(--link-color) !important; +} + + +/* Make code blocks have a background */ +.codeblock,pre.literal-block,.rst-content .literal-block,.rst-content pre.literal-block,div[class^='highlight'] { + background: #f8f8f8;; +} + +/* Change style of types in the docstrings .rst-content .field-list */ +.field-list .xref.py.docutils, .field-list code.docutils, .field-list .docutils.literal.notranslate +{ + border: None; + padding-left: 0; + padding-right: 0; + color: #404040; +} diff --git a/docs/_static/img/colab-badge.svg b/docs/_static/img/colab-badge.svg new file mode 100644 index 000000000..c08066ee3 --- /dev/null +++ b/docs/_static/img/colab-badge.svg @@ -0,0 +1 @@ + Open in ColabOpen in Colab diff --git a/docs/_static/img/colab.svg b/docs/_static/img/colab.svg new file mode 100644 index 000000000..c2d30e973 --- /dev/null +++ b/docs/_static/img/colab.svg @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/docs/conda_env.yml b/docs/conda_env.yml new file mode 100644 index 000000000..98a550820 --- /dev/null +++ b/docs/conda_env.yml @@ -0,0 +1,20 @@ +name: root +channels: + - pytorch + - defaults +dependencies: + - cpuonly=1.0=0 + - pip=21.1 + - python=3.7 + - pytorch=1.11=py3.7_cpu_0 + - pip: + - gym==0.21 + - cloudpickle + - opencv-python-headless + - pandas + - numpy + - matplotlib + - sphinx_autodoc_typehints + - sphinx>=4.2 + - sphinx_rtd_theme>=1.0 + - sphinx_copybutton diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 000000000..3f0348af7 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,230 @@ +# +# Configuration file for the Sphinx documentation builder. +# +# This file does only contain a selection of the most common options. For a +# full list see the documentation: +# http://www.sphinx-doc.org/en/master/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +from typing import Dict, List +from unittest.mock import MagicMock + +# We CANNOT enable 'sphinxcontrib.spelling' because ReadTheDocs.org does not support +# PyEnchant. +try: + import sphinxcontrib.spelling # noqa: F401 + + enable_spell_check = True +except ImportError: + enable_spell_check = False + +# Try to enable copy button +try: + import sphinx_copybutton # noqa: F401 + + enable_copy_button = True +except ImportError: + enable_copy_button = False + +# source code directory, relative to this file, for sphinx-autobuild +sys.path.insert(0, os.path.abspath("..")) + + +class Mock(MagicMock): + __subclasses__ = [] # type: ignore + + @classmethod + def __getattr__(cls, name): + return MagicMock() + + +# Mock modules that requires C modules +# Note: because of that we cannot test examples using CI +# 'torch', 'torch.nn', 'torch.nn.functional', +# DO not mock modules for now, we will need to do that for read the docs later +MOCK_MODULES: List[str] = [] +sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES) + +# Read version from file +version_file = os.path.join(os.path.dirname(__file__), "../rl_zoo3", "version.txt") +with open(version_file) as file_handler: + __version__ = file_handler.read().strip() + +# -- Project information ----------------------------------------------------- + +project = "RL Baselines3 Zoo" +copyright = "2023, Stable Baselines3" +author = "Stable Baselines3 Contributors" + +# The short X.Y version +version = "master (" + __version__ + " )" +# The full version, including alpha/beta/rc tags +release = __version__ + + +# -- General configuration --------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx_autodoc_typehints", + "sphinx.ext.autosummary", + "sphinx.ext.mathjax", + "sphinx.ext.ifconfig", + "sphinx.ext.viewcode", + # 'sphinx.ext.intersphinx', + # 'sphinx.ext.doctest' +] + +if enable_spell_check: + extensions.append("sphinxcontrib.spelling") + +if enable_copy_button: + extensions.append("sphinx_copybutton") + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = ".rst" + +# The master toctree document. +master_doc = "index" + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = "en" + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path . +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "sphinx" + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. + +# Fix for read the docs +on_rtd = os.environ.get("READTHEDOCS") == "True" +if on_rtd: + html_theme = "default" +else: + html_theme = "sphinx_rtd_theme" + +html_logo = "../images/car.jpg" + + +def setup(app): + app.add_css_file("css/baselines_theme.css") + + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# The default sidebars (for documents that don't match any pattern) are +# defined by theme itself. Builtin themes are using these templates by +# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', +# 'searchbox.html']``. +# +# html_sidebars = {} + + +# -- Options for HTMLHelp output --------------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = "RLZoo3doc" + + +# -- Options for LaTeX output ------------------------------------------------ + +latex_elements: Dict[str, str] = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, "RLZoo3.tex", "RL Baselines3 Zoo Documentation", "Stable Baselines3 Contributors", "manual"), +] + + +# -- Options for manual page output ------------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [(master_doc, "rl_zoo3", "RL Baselines3 Zoo Documentation", [author], 1)] + + +# -- Options for Texinfo output ---------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ( + master_doc, + "RLZoo3", + "RL Baselines3 Zoo Documentation", + author, + "RLZoo3", + "One line description of project.", + "Miscellaneous", + ), +] + + +# -- Extension configuration ------------------------------------------------- + +# Example configuration for intersphinx: refer to the Python standard library. +# intersphinx_mapping = { +# 'python': ('https://docs.python.org/3/', None), +# 'numpy': ('http://docs.scipy.org/doc/numpy/', None), +# 'torch': ('http://pytorch.org/docs/master/', None), +# } diff --git a/docs/guide/config.rst b/docs/guide/config.rst new file mode 100644 index 000000000..0aa5918f0 --- /dev/null +++ b/docs/guide/config.rst @@ -0,0 +1,122 @@ +.. _config: + +============= +Configuration +============= + +Hyperparameter yaml syntax +-------------------------- + +The syntax used in ``hyperparameters/algo_name.yml`` for setting +hyperparameters (likewise the syntax to `overwrite +hyperparameters `__ +on the cli) may be specialized if the argument is a function. See +examples in the ``hyperparameters/`` directory. For example: + +- Specify a linear schedule for the learning rate: + +.. code:: yaml + + learning_rate: lin_0.012486195510232303 + +Specify a different activation function for the network: + +.. code:: yaml + + policy_kwargs: "dict(activation_fn=nn.ReLU)" + +For a custom policy: + +.. code:: yaml + + policy: my_package.MyCustomPolicy # for instance stable_baselines3.ppo.MlpPolicy + +Env Normalization +----------------- + +In the hyperparameter file, ``normalize: True`` means that the training +environment will be wrapped in a +`VecNormalize `__ +wrapper. + +`Normalization +uses `__ the +default parameters of ``VecNormalize``, with the exception of ``gamma`` +which is set to match that of the agent. This can be +`overridden `__ +using the appropriate ``hyperparameters/algo_name.yml``, e.g. + +.. code:: yaml + + normalize: "{'norm_obs': True, 'norm_reward': False}" + +Env Wrappers +------------ + +You can specify in the hyperparameter config one or more wrapper to use +around the environment: + +for one wrapper: + +.. code:: yaml + + env_wrapper: gym_minigrid.wrappers.FlatObsWrapper + +for multiple, specify a list: + +.. code:: yaml + + env_wrapper: + - rl_zoo3.wrappers.DoneOnSuccessWrapper: + reward_offset: 1.0 + - sb3_contrib.common.wrappers.TimeFeatureWrapper + +Note that you can easily specify parameters too. + +By default, the environment is wrapped with a ``Monitor`` wrapper to +record episode statistics. You can specify arguments to it using +``monitor_kwargs`` parameter to log additional data. That data *must* be +present in the info dictionary at the last step of each episode. + +For instance, for recording success with goal envs +(e.g. ``FetchReach-v1``): + +.. code:: yaml + + monitor_kwargs: dict(info_keywords=('is_success',)) + +or recording final x position with ``Ant-v3``: + +.. code:: yaml + + monitor_kwargs: dict(info_keywords=('x_position',)) + +Note: for known ``GoalEnv`` like ``FetchReach``, +``info_keywords=('is_success',)`` is actually the default. + +VecEnvWrapper +------------- + +You can specify which ``VecEnvWrapper`` to use in the config, the same +way as for env wrappers (see above), using the ``vec_env_wrapper`` key: + +For instance: + +.. code:: yaml + + vec_env_wrapper: stable_baselines3.common.vec_env.VecMonitor + +Note: ``VecNormalize`` is supported separately using ``normalize`` +keyword, and ``VecFrameStack`` has a dedicated keyword ``frame_stack``. + +Callbacks +--------- + +Following the same syntax as env wrappers, you can also add custom +callbacks to use during training. + +.. code:: yaml + + callback: + - rl_zoo3.callbacks.ParallelTrainCallback: + gradient_steps: 256 diff --git a/docs/guide/custom_env.rst b/docs/guide/custom_env.rst new file mode 100644 index 000000000..5c6ab016d --- /dev/null +++ b/docs/guide/custom_env.rst @@ -0,0 +1,11 @@ +.. _custom: + +================== +Custom Environment +================== + +The easiest way to add support for a custom environment is to edit +``rl_zoo3/import_envs.py`` and register your environment here. Then, you +need to add a section for it in the hyperparameters file +(``hyperparams/algo.yml`` or a custom yaml file that you can specify +using ``--conf-file`` argument). diff --git a/docs/guide/enjoy.rst b/docs/guide/enjoy.rst new file mode 100644 index 000000000..95097f811 --- /dev/null +++ b/docs/guide/enjoy.rst @@ -0,0 +1,100 @@ +.. _enjoy: + +===================== +Enjoy a Trained Agent +===================== + +.. note:: + + To download the repo with the trained agents, you must use + ``git clone --recursive https://github.com/DLR-RM/rl-baselines3-zoo`` + in order to clone the submodule too. + + +Enjoy a trained agent +--------------------- + +If the trained agent exists, then you can see it in action using: + +:: + + python enjoy.py --algo algo_name --env env_id + +For example, enjoy A2C on Breakout during 5000 timesteps: + +:: + + python enjoy.py --algo a2c --env BreakoutNoFrameskip-v4 --folder rl-trained-agents/ -n 5000 + +If you have trained an agent yourself, you need to do: + +:: + + # exp-id 0 corresponds to the last experiment, otherwise, you can specify another ID + python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 0 + +Load Checkpoints, Best Model +----------------------------- + +To load the best model (when using evaluation environment): + +:: + + python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-best + +To load a checkpoint (here the checkpoint name is +``rl_model_10000_steps.zip``): + +:: + + python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-checkpoint 10000 + +To load the latest checkpoint: + +:: + + python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-last-checkpoint + + +Record a Video of a Trained Agent +--------------------------------- + +Record 1000 steps with the latest saved model: + +:: + + python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 + +Use the best saved model instead: + +:: + + python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 --load-best + +Record a video of a checkpoint saved during training (here the +checkpoint name is ``rl_model_10000_steps.zip``): + +:: + + python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 --load-checkpoint 10000 + + +Record a Video of a Training Experiment +--------------------------------------- + +Apart from recording videos of specific saved models, it is also +possible to record a video of a training experiment where checkpoints +have been saved. + +Record 1000 steps for each checkpoint, latest and best saved models: + +:: + + python -m rl_zoo3.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic + +The previous command will create a ``mp4`` file. To convert this file to +``gif`` format as well: + +:: + + python -m rl_zoo3.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic --gif diff --git a/docs/guide/install.rst b/docs/guide/install.rst new file mode 100644 index 000000000..dcd5c8587 --- /dev/null +++ b/docs/guide/install.rst @@ -0,0 +1,87 @@ +.. _install: + +Installation +============ + +Prerequisites +------------- + +RL Zoo requires python 3.7+ and PyTorch >= 1.11 + + +Minimal Installation +-------------------- + +To install RL Zoo with pip, execute: + +.. code-block:: bash + + pip install rl_zoo3 + +From source: + +.. code-block:: bash + + git clone https://github.com/DLR-RM/rl-baselines3-zoo + cd rl-baselines3-zoo/ + pip install -e . + +.. note:: + + You can do ``python -m rl_zoo3.train`` from any folder and you have access to ``rl_zoo3`` command line interface, for instance, ``rl_zoo3 train`` is equivalent to ``python train.py`` + + + +Full installation +----------------- + +With extra envs and test dependencies: + + +.. note:: + + If you want to use Atari games, you will need to do ``pip install "autorom[accept-rom-license]"`` + additionally to download the ROMs + + +.. code-block:: bash + + apt-get install swig cmake ffmpeg + pip install -r requirements.txt + + +Please see `Stable Baselines3 documentation `_ for alternatives to install stable baselines3. + + +Docker Images +------------- + +Build docker image (CPU): + +:: + + make docker-cpu + +GPU: + +:: + + USE_GPU=True make docker-gpu + +Pull built docker image (CPU): + +:: + + docker pull stablebaselines/rl-baselines3-zoo-cpu + +GPU image: + +:: + + docker pull stablebaselines/rl-baselines3-zoo + +Run script in the docker image: + +:: + + ./scripts/run_docker_cpu.sh python train.py --algo ppo --env CartPole-v1 diff --git a/docs/guide/integrations.rst b/docs/guide/integrations.rst new file mode 100644 index 000000000..e5890e4e7 --- /dev/null +++ b/docs/guide/integrations.rst @@ -0,0 +1,45 @@ +.. _integrations: + +============ +Integrations +============ + +Huggingface Hub Integration +--------------------------- + +List and videos of trained agents can be found on our Huggingface page: https://huggingface.co/sb3 + + +Upload model to hub (same syntax as for ``enjoy.py``): + +:: + + python -m rl_zoo3.push_to_hub --algo ppo --env CartPole-v1 -f logs/ -orga sb3 -m "Initial commit" + +you can choose custom ``repo-name`` (default: ``{algo}-{env_id}``) by +passing a ``--repo-name`` argument. + +Download model from hub: + +:: + + python -m rl_zoo3.load_from_hub --algo ppo --env CartPole-v1 -f logs/ -orga sb3 + + +Experiment tracking +------------------- + +We support tracking experiment data such as learning curves and +hyperparameters via `Weights and Biases `__. + +The following command + +:: + + python train.py --algo ppo --env CartPole-v1 --track --wandb-project-name sb3 + +yields a tracked experiment at this +`URL `__. + +To add a tag to the run, (e.g. ``optimized``), use the argument +``--wandb-tags optimized``. diff --git a/docs/guide/plot.rst b/docs/guide/plot.rst new file mode 100644 index 000000000..aa571f612 --- /dev/null +++ b/docs/guide/plot.rst @@ -0,0 +1,70 @@ +.. _plot: + +============ +Plot Scripts +============ + + +Plot scripts (to be documented, see "Results" sections in SB3 +documentation): + +- ``scripts/all_plots.py``/``scripts/plot_from_file.py`` for plotting evaluations + +- ``scripts/plot_train.py`` for plotting training reward/success + + +Examples +-------- + +Plot training success (y-axis) w.r.t. timesteps (x-axis) with a moving +window of 500 episodes for all the ``Fetch`` environment with ``HER`` +algorithm: + +:: + + python scripts/plot_train.py -a her -e Fetch -y success -f rl-trained-agents/ -w 500 -x steps + +Plot evaluation reward curve for TQC, SAC and TD3 on the HalfCheetah and +Ant PyBullet environments: + +:: + + python3 scripts/all_plots.py -a sac td3 tqc --env HalfCheetahBullet AntBullet -f rl-trained-agents/ + +Plot with the rliable library +----------------------------- + +The RL zoo integrates some of +`rliable `__ library features. You +can find a visual explanation of the tools used by rliable in this `blog +post `__. + +First, you need to install +`rliable `__. + +Note: Python 3.7+ is required in that case. + +Then export your results to a file using the ``all_plots.py`` script +(see above): + +:: + + python scripts/all_plots.py -a sac td3 tqc --env Half Ant -f logs/ -o logs/offpolicy + +You can now use the ``plot_from_file.py`` script with ``--rliable``, +``--versus`` and ``--iqm`` arguments: + +:: + + python scripts/plot_from_file.py -i logs/offpolicy.pkl --skip-timesteps --rliable --versus -l SAC TD3 TQC + +.. note:: + + you may need to edit ``plot_from_file.py``, in particular the + ``env_key_to_env_id`` dictionary and the + ``scripts/score_normalization.py`` which stores min and max score for + each environment. + + +Remark: plotting with the ``--rliable`` option is usually slow as +confidence interval need to be computed using bootstrap sampling. diff --git a/docs/guide/quickstart.rst b/docs/guide/quickstart.rst new file mode 100644 index 000000000..d527bb189 --- /dev/null +++ b/docs/guide/quickstart.rst @@ -0,0 +1,51 @@ +.. _quickstart: + +=============== +Getting Started +=============== + +.. note:: + + You can try the following examples online using Google colab |colab| + notebook: `RL Baselines zoo notebook`_ + + +.. _RL Baselines zoo notebook: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/sb3/rl-baselines-zoo.ipynb +.. |colab| image:: ../_static/img/colab.svg + + +The hyperparameters for each environment are defined in +``hyperparameters/algo_name.yml``. + +If the environment exists in this file, then you can train an agent +using: + +:: + + python -m rl_zoo3.train --algo algo_name --env env_id + +Or if you are in the RL Zoo3 folder: + +:: + + python train.py --algo algo_name --env env_id + +For example (with evaluation and checkpoints): + +:: + + python -m rl_zoo3.train --algo ppo --env CartPole-v1 --eval-freq 10000 --save-freq 50000 + + + +If the trained agent exists, then you can see it in action using: + +:: + + python -m rl_zoo3.enjoy --algo algo_name --env env_id + +For example, enjoy A2C on Breakout during 5000 timesteps: + +:: + + python -m rl_zoo3.enjoy --algo a2c --env BreakoutNoFrameskip-v4 --folder rl-trained-agents/ -n 5000 diff --git a/docs/guide/sbx.rst b/docs/guide/sbx.rst new file mode 100644 index 000000000..3205f33a3 --- /dev/null +++ b/docs/guide/sbx.rst @@ -0,0 +1,58 @@ +.. _sbx: + +========================== +Stable Baselines Jax (SBX) +========================== + +`Stable Baselines Jax (SBX) `_ is a proof of concept version of Stable-Baselines3 in Jax. + +It provides a minimal number of features compared to SB3 but can be much faster (up to 20x times!): https://twitter.com/araffin2/status/1590714558628253698 + + +It is also compatible with the RL Zoo. +For that you will need to create two files. + +``train_sbx.py``: + +.. code-block:: python + + import rl_zoo3 + import rl_zoo3.train + from rl_zoo3.train import train + from sbx import DQN, PPO, SAC, TQC, DroQ + + + rl_zoo3.ALGOS["tqc"] = TQC + rl_zoo3.ALGOS["droq"] = DroQ + rl_zoo3.ALGOS["sac"] = SAC + rl_zoo3.ALGOS["ppo"] = PPO + rl_zoo3.ALGOS["dqn"] = DQN + rl_zoo3.train.ALGOS = rl_zoo3.ALGOS + rl_zoo3.exp_manager.ALGOS = rl_zoo3.ALGOS + + if __name__ == "__main__": + train() + +Then you can call ``python train_sbx.py --algo sac --env Pendulum-v1`` and use the RL Zoo CLI. + + +``enjoy_sbx.py``: + +.. code-block:: python + + import rl_zoo3 + import rl_zoo3.enjoy + from rl_zoo3.enjoy import enjoy + from sbx import DQN, PPO, SAC, TQC, DroQ + + + rl_zoo3.ALGOS["tqc"] = TQC + rl_zoo3.ALGOS["droq"] = DroQ + rl_zoo3.ALGOS["sac"] = SAC + rl_zoo3.ALGOS["ppo"] = PPO + rl_zoo3.ALGOS["dqn"] = DQN + rl_zoo3.enjoy.ALGOS = rl_zoo3.ALGOS + rl_zoo3.exp_manager.ALGOS = rl_zoo3.ALGOS + + if __name__ == "__main__": + enjoy() diff --git a/docs/guide/train.rst b/docs/guide/train.rst new file mode 100644 index 000000000..8d8e18170 --- /dev/null +++ b/docs/guide/train.rst @@ -0,0 +1,120 @@ +.. _train: + +============== +Train an Agent +============== + +Basic Usage +----------- + +The hyperparameters for each environment are defined in +``hyperparameters/algo_name.yml``. + + +.. note:: + + Once RL Zoo3 is install, you can do ``python -m rl_zoo3.train`` from any folder, it is equivalent to ``python train.py`` + + +If the environment exists in this file, then you can train an agent using: + +:: + + python train.py --algo algo_name --env env_id + + +.. note:: + + You can use ``-P`` (``--progress``) option to display a progress bar. + + +Custom Config File +------------------ + +Using a custom config file when it is a yaml file with a which contains a ``env_id`` entry: + +:: + + python train.py --algo algo_name --env env_id --conf-file my_yaml.yml + + +You can also use a python file that contains a dictionary called `hyperparams` with an entry for each ``env_id``. +(see ``hyperparams/python/ppo_config_example.py`` for an example) + +:: + + # You can pass a path to a python file + python train.py --algo ppo --env MountainCarContinuous-v0 --conf-file hyperparams/python/ppo_config_example.py + # Or pass a path to a file from a module (for instance my_package.my_file) + python train.py --algo ppo --env MountainCarContinuous-v0 --conf-file hyperparams.python.ppo_config_example + +The advantage of this approach is that you can specify arbitrary python dictionaries +and ensure that all their dependencies are imported in the config file itself. + +Tensorboard, Checkpoints, Evaluation +------------------------------------ + +For example (with tensorboard support): + +:: + + python train.py --algo ppo --env CartPole-v1 --tensorboard-log /tmp/stable-baselines/ + + +Evaluate the agent every 10000 steps using 10 episodes for evaluation (using only one evaluation env): + +:: + + python train.py --algo sac --env AntBulletEnv-v0 --eval-freq 10000 --eval-episodes 10 --n-eval-envs 1 + + +Save a checkpoint of the agent every 100000 steps: + +:: + + python train.py --algo td3 --env AntBulletEnv-v0 --save-freq 100000 + +Resume Training +--------------- + +Continue training (here, load pretrained agent for Breakout and continue training for 5000 steps): + +:: + + python train.py --algo a2c --env BreakoutNoFrameskip-v4 -i rl-trained-agents/a2c/BreakoutNoFrameskip-v4_1/BreakoutNoFrameskip-v4.zip -n 5000 + +Save Replay Buffer +------------------ + +When using off-policy algorithms, you can also **save the replay buffer** after training: + +:: + + python train.py --algo sac --env Pendulum-v1 --save-replay-buffer + +It will be automatically loaded if present when continuing training. + + +Env keyword arguments +--------------------- + +You can specify keyword arguments to pass to the env constructor in the +command line, using ``--env-kwargs``: + +:: + + python enjoy.py --algo ppo --env MountainCar-v0 --env-kwargs goal_velocity:10 + + +Overwrite hyperparameters +------------------------- + +You can easily overwrite hyperparameters in the command line, using +``--hyperparams``: + +:: + + python train.py --algo a2c --env MountainCarContinuous-v0 --hyperparams learning_rate:0.001 policy_kwargs:"dict(net_arch=[64, 64])" + +Note: if you want to pass a string, you need to escape it like that: +``my_string:"'value'"`` diff --git a/docs/guide/tuning.rst b/docs/guide/tuning.rst new file mode 100644 index 000000000..159f15673 --- /dev/null +++ b/docs/guide/tuning.rst @@ -0,0 +1,71 @@ +.. _tuning: + +===================== +Hyperparameter Tuning +===================== + +Hyperparameter Tuning +--------------------- + +We use `Optuna `__ for optimizing the +hyperparameters. Not all hyperparameters are tuned, and tuning enforces +certain default hyperparameter settings that may be different from the +official defaults. See +`rl_zoo3/hyperparams_opt.py `__ +for the current settings for each agent. + +Hyperparameters not specified in +`rl_zoo3/hyperparams_opt.py `__ +are taken from the associated YAML file and fallback to the default +values of SB3 if not present. + +Note: when using SuccessiveHalvingPruner (“halving”), you must specify +``--n-jobs > 1`` + +Budget of 1000 trials with a maximum of 50000 steps: + +:: + + python train.py --algo ppo --env MountainCar-v0 -n 50000 -optimize --n-trials 1000 --n-jobs 2 \ + --sampler tpe --pruner median + +Distributed optimization using a shared database is also possible (see +the corresponding `Optuna +documentation `__): + +:: + + python train.py --algo ppo --env MountainCar-v0 -optimize --study-name test --storage sqlite:///example.db + +Print and save best hyperparameters of an Optuna study: + +:: + + python scripts/parse_study.py -i path/to/study.pkl --print-n-best-trials 10 --save-n-best-hyperparameters 10 + +The default budget for hyperparameter tuning is 500 trials and there is +one intermediate evaluation for pruning/early stopping per 100k time +steps. + +Hyperparameters search space +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Note that the default hyperparameters used in the zoo when tuning are +not always the same as the defaults provided in +`stable-baselines3 `__. +Consult the latest source code to be sure of these settings. For +example: + +- PPO tuning assumes a network architecture with ``ortho_init = False`` + when tuning, though it is ``True`` by + `default `__. + You can change that by updating + `rl_zoo3/hyperparams_opt.py `__. + +- Non-episodic rollout in TD3 and DDPG assumes + ``gradient_steps = train_freq`` and so tunes only ``train_freq`` to + reduce the search space. + +When working with continuous actions, we recommend to enable +`gSDE `__ by uncommenting lines in +`rl_zoo3/hyperparams_opt.py `__. diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 000000000..3ef2d6bc0 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,74 @@ +RL Baselines3 Zoo Docs - A Training Framework for Stable Baselines3 +=================================================================== + +`RL Baselines3 Zoo `_ s a training framework for Reinforcement Learning (RL), using `Stable Baselines3 (SB3) `_, +reliable implementations of reinforcement learning algorithms in PyTorch. + +Github repository: https://github.com/DLR-RM/rl-baselines3-zoo + +It provides scripts for training, evaluating agents, tuning hyperparameters, plotting results and recording videos. + +In addition, it includes a collection of tuned hyperparameters for common environments and RL algorithms, and agents trained with those settings. + +.. toctree:: + :maxdepth: 2 + :caption: User Guide + + guide/install + guide/quickstart + guide/train + guide/plot + guide/enjoy + guide/custom_env + guide/config + guide/integrations + guide/tuning + guide/sbx + + +.. toctree:: + :maxdepth: 1 + :caption: RL Zoo API + + modules/exp_manager + modules/wrappers + modules/callbacks + modules/utils + +.. toctree:: + :maxdepth: 1 + :caption: Misc + + misc/changelog + + +Citing RL Baselines3 Zoo +------------------------ +To cite this project in publications: + +.. code-block:: bibtex + + @misc{rl-zoo3, + author = {Raffin, Antonin}, + title = {RL Baselines3 Zoo}, + year = {2020}, + publisher = {GitHub}, + journal = {GitHub repository}, + howpublished = {\url{https://github.com/DLR-RM/rl-baselines3-zoo}}, + } + +Contributing +------------ + +To any interested in making the rl baselines better, there are still some improvements +that need to be done. +You can check issues in the `repo `_. + +If you want to contribute, please read `CONTRIBUTING.md `_ first. + +Indices and tables +------------------- + +* :ref:`genindex` +* :ref:`search` +* :ref:`modindex` diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 000000000..22b5fff4e --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,36 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build +set SPHINXPROJ=StableBaselines + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst new file mode 100644 index 000000000..cf5d4b9e1 --- /dev/null +++ b/docs/misc/changelog.rst @@ -0,0 +1,7 @@ +.. _changelog: + +Changelog +========== + + +See https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/CHANGELOG.md diff --git a/docs/modules/callbacks.rst b/docs/modules/callbacks.rst new file mode 100644 index 000000000..bb26e1737 --- /dev/null +++ b/docs/modules/callbacks.rst @@ -0,0 +1,8 @@ +.. _callbacks: + + +Callbacks +========= + +.. automodule:: rl_zoo3.callbacks + :members: diff --git a/docs/modules/exp_manager.rst b/docs/modules/exp_manager.rst new file mode 100644 index 000000000..4eb97a4e9 --- /dev/null +++ b/docs/modules/exp_manager.rst @@ -0,0 +1,15 @@ +.. _manager: + +.. automodule:: rl_zoo3.exp_manager + + +Experiment Manager +================== + + +Parameters +---------- + +.. autoclass:: ExperimentManager + :members: + :inherited-members: diff --git a/docs/modules/utils.rst b/docs/modules/utils.rst new file mode 100644 index 000000000..097dedbd4 --- /dev/null +++ b/docs/modules/utils.rst @@ -0,0 +1,8 @@ +.. _utils: + + +Utils +===== + +.. automodule:: rl_zoo3.utils + :members: diff --git a/docs/modules/wrappers.rst b/docs/modules/wrappers.rst new file mode 100644 index 000000000..97bd7b89a --- /dev/null +++ b/docs/modules/wrappers.rst @@ -0,0 +1,8 @@ +.. _wrappers: + + +Wrappers +======== + +.. automodule:: rl_zoo3.wrappers + :members: diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt new file mode 100644 index 000000000..d306d7e78 --- /dev/null +++ b/docs/spelling_wordlist.txt @@ -0,0 +1,129 @@ +py +env +atari +argparse +Argparse +TensorFlow +feedforward +envs +VecEnv +pretrain +petrained +tf +th +nn +np +str +mujoco +cpu +ndarray +ndarrays +timestep +timesteps +stepsize +dataset +adam +fn +normalisation +Kullback +Leibler +boolean +deserialized +pretrained +minibatch +subprocesses +ArgumentParser +Tensorflow +Gaussian +approximator +minibatches +hyperparameters +hyperparameter +vectorized +rl +colab +dataloader +npz +datasets +vf +logits +num +Utils +backpropagate +prepend +NaN +preprocessing +Cloudpickle +async +multiprocess +tensorflow +mlp +cnn +neglogp +tanh +coef +repo +Huber +params +ppo +arxiv +Arxiv +func +DQN +Uhlenbeck +Ornstein +multithread +cancelled +Tensorboard +parallelize +customising +serializable +Multiprocessed +cartpole +toolset +lstm +rescale +ffmpeg +avconv +unnormalized +Github +pre +preprocess +backend +attr +preprocess +Antonin +Raffin +araffin +Homebrew +Numpy +Theano +rollout +kfac +Piecewise +csv +nvidia +visdom +tensorboard +preprocessed +namespace +sklearn +GoalEnv +Torchy +pytorch +dicts +optimizers +Deprecations +forkserver +cuda +Polyak +gSDE +rollouts +Pyro +softmax +stdout +Contrib +Quantile +Huggingface +Jax +Optuna diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..73f41c8b8 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,54 @@ +[tool.ruff] +# Same as Black. +line-length = 127 +# Assume Python 3.7 +target-version = "py37" +# TODO(antonin): activate "RUF" https://beta.ruff.rs/docs/rules/#ruff-specific-rules-ruf +select = ["E", "F", "B", "UP", "C90"] +ignore = [] + +[tool.ruff.per-file-ignores] +# "./rl_zoo3/plots/all_plots.py"= ["E501"] +# "./rl_zoo3/plots/plot_train.py"= ["E501"] + + +[tool.ruff.mccabe] +# Unlike Flake8, default to a complexity level of 10. +max-complexity = 15 + +[tool.black] +line-length = 127 + +[tool.isort] +profile = "black" +line_length = 127 +src_paths = ["stable_baselines3", "rl_zoo3"] + +[tool.pytype] +inputs = ["."] +# disable = [] + +[tool.mypy] +ignore_missing_imports = true +follow_imports = "silent" +show_error_codes = true +exclude = """(?x)( + rl_zoo3/hyperparams_opt.py$ + | rl_zoo3/exp_manager.py$ + )""" + +[tool.pytest.ini_options] +# Deterministic ordering for tests; useful for pytest-xdist. +env = [ + "PYTHONHASHSEED=0" +] + +filterwarnings = [ + # Tensorboard warnings + "ignore::DeprecationWarning:tensorboard", + # Gym warnings + "ignore::UserWarning:gym", +] +markers = [ + "slow: marks tests as slow (deselect with '-m \"not slow\"')" +] diff --git a/requirements.txt b/requirements.txt index 5bb2a0460..d33a536db 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ gym==0.21 -stable-baselines3[extra,tests,docs]>=1.8.0a2 -sb3-contrib>=1.8.0a2 +stable-baselines3[extra_no_roms,tests,docs]>=1.8.0a9 +sb3-contrib>=1.8.0a9 box2d-py==2.3.8 pybullet gym-minigrid @@ -13,8 +13,8 @@ plotly panda-gym==1.1.1 # tmp fix: until compatibility with panda-gym v2 rliable>=1.0.5 wandb -huggingface_sb3>=2.2.1, <3.* +huggingface_sb3>=2.2.1 seaborn tqdm rich -importlib-metadata~=4.13 # flake8 not compatible with importlib-metadata>5.0 +ruff diff --git a/rl-trained-agents b/rl-trained-agents index 1e2a45e5d..1935484b7 160000 --- a/rl-trained-agents +++ b/rl-trained-agents @@ -1 +1 @@ -Subproject commit 1e2a45e5d06efd6cc15da6cf2d1939d72dcbdf87 +Subproject commit 1935484b7458967f77d4da1a50b14078d2520071 diff --git a/rl_zoo3/callbacks.py b/rl_zoo3/callbacks.py index 556e9553a..53543df2b 100644 --- a/rl_zoo3/callbacks.py +++ b/rl_zoo3/callbacks.py @@ -102,12 +102,12 @@ class ParallelTrainCallback(BaseCallback): TODO: - blocking mode: wait for the model to finish updating the policy before collecting new experience - at the end of a rollout + at the end of a rollout - force sync mode: stop training to update to the latest policy for collecting - new experience + new experience :param gradient_steps: Number of gradient steps to do before - sending the new policy + sending the new policy :param verbose: Verbosity level :param sleep_time: Limit the fps in the thread collecting experience. """ diff --git a/rl_zoo3/exp_manager.py b/rl_zoo3/exp_manager.py index 4503121b0..713bb100a 100644 --- a/rl_zoo3/exp_manager.py +++ b/rl_zoo3/exp_manager.py @@ -782,7 +782,7 @@ def objective(self, trial: optuna.Trial) -> float: print("============") print("Sampled hyperparams:") pprint(sampled_hyperparams) - raise optuna.exceptions.TrialPruned() + raise optuna.exceptions.TrialPruned() from e is_pruned = eval_callback.is_pruned reward = eval_callback.last_mean_reward diff --git a/rl_zoo3/train.py b/rl_zoo3/train.py index 1e52a5fc0..f8a6926da 100644 --- a/rl_zoo3/train.py +++ b/rl_zoo3/train.py @@ -204,10 +204,10 @@ def train() -> None: if args.track: try: import wandb - except ImportError: + except ImportError as e: raise ImportError( "if you want to use Weights & Biases to track experiment, please install W&B via `pip install wandb`" - ) + ) from e run_name = f"{args.env}__{args.algo}__{args.seed}__{int(time.time())}" tags = args.wandb_tags + [f"v{sb3.__version__}"] diff --git a/rl_zoo3/utils.py b/rl_zoo3/utils.py index f23265883..91e9071b9 100644 --- a/rl_zoo3/utils.py +++ b/rl_zoo3/utils.py @@ -314,7 +314,7 @@ def get_trained_models(log_folder: str) -> Dict[str, Tuple[str, str]]: args_files = glob.glob(os.path.join(log_folder, algo, model_folder, "*/args.yml")) if len(args_files) != 1: continue # we expect only one sub-folder with an args.yml file - with open(args_files[0], "r") as fh: + with open(args_files[0]) as fh: env_id = yaml.load(fh, Loader=yaml.UnsafeLoader)["env"] model_name = ModelName(algo, EnvironmentName(env_id)) diff --git a/rl_zoo3/version.txt b/rl_zoo3/version.txt index c3d22c01c..13ef2a834 100644 --- a/rl_zoo3/version.txt +++ b/rl_zoo3/version.txt @@ -1 +1 @@ -1.8.0a2 +1.8.0a9 diff --git a/rl_zoo3/wrappers.py b/rl_zoo3/wrappers.py index e94e51a70..0634a7967 100644 --- a/rl_zoo3/wrappers.py +++ b/rl_zoo3/wrappers.py @@ -301,12 +301,12 @@ def __init__(self, env: gym.Env): env_id: str = env.unwrapped.spec.id # By default no masking - self.mask = np.ones_like((env.observation_space.sample())) + self.mask = np.ones_like(env.observation_space.sample()) try: # Mask velocity self.mask[self.velocity_indices[env_id]] = 0.0 - except KeyError: - raise NotImplementedError(f"Velocity masking not implemented for {env_id}") + except KeyError as e: + raise NotImplementedError(f"Velocity masking not implemented for {env_id}") from e def observation(self, observation: np.ndarray) -> np.ndarray: return observation * self.mask diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 14b7d4b95..000000000 --- a/setup.cfg +++ /dev/null @@ -1,47 +0,0 @@ -[metadata] -# This includes the license file in the wheel. -license_files = LICENSE - -[tool:pytest] -filterwarnings = - # Tensorboard warnings - ignore::DeprecationWarning:tensorboard - # Gym warnings - ignore::UserWarning:gym -markers = - slow: marks tests as slow (deselect with '-m "not slow"') - serial - -[pytype] -inputs = . - -[flake8] -# line breaks before and after binary operators -ignore = W503,W504,E203,E231 -# Ignore import not used when aliases are defined -per-file-ignores = - ./rl_zoo3/plots/all_plots.py:E501 - ./rl_zoo3/plots/plot_train.py:E501 - -exclude = - # No need to traverse our git directory - .git, - # There's no value in checking cache directories - __pycache__, -max-complexity = 15 -# The GitHub editor is 127 chars wide -max-line-length = 127 - -[isort] -profile = black -line_length = 127 -src_paths = stable_baselines3,rl_zoo3 - -[mypy] -ignore_missing_imports = True -follow_imports = silent -show_error_codes = True -exclude = (?x)( - rl_zoo3/hyperparams_opt.py$ - | rl_zoo3/exp_manager.py$ - ) diff --git a/setup.py b/setup.py index 73ddc7a48..458a6ff2f 100644 --- a/setup.py +++ b/setup.py @@ -27,8 +27,8 @@ }, entry_points={"console_scripts": ["rl_zoo3=rl_zoo3.cli:main"]}, install_requires=[ - "sb3-contrib>=1.8.0a2", - "huggingface_sb3>=2.2.1, <3.*", + "sb3-contrib>=1.8.0a9", + "huggingface_sb3>=2.2.1", "tqdm", "rich", "optuna",