diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index 10e487a46..256a7598f 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -61,7 +61,7 @@ body:
required: true
- label: I have read the [SB3 documentation](https://stable-baselines3.readthedocs.io/en/master/)
required: true
- - label: I have read the [RL Zoo README](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/README.md)
+ - label: I have read the [RL Zoo documentation](https://rl-baselines3-zoo.readthedocs.io)
required: true
- label: I have provided a minimal working example to reproduce the bug
required: true
diff --git a/.github/ISSUE_TEMPLATE/question.yml b/.github/ISSUE_TEMPLATE/question.yml
index a519436a7..4da584fec 100644
--- a/.github/ISSUE_TEMPLATE/question.yml
+++ b/.github/ISSUE_TEMPLATE/question.yml
@@ -24,7 +24,7 @@ body:
required: true
- label: I have read the [SB3 documentation](https://stable-baselines3.readthedocs.io/en/master/)
required: true
- - label: I have read the [RL Zoo README](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/README.md)
+ - label: I have read the [RL Zoo documentation](https://rl-baselines3-zoo.readthedocs.io)
required: true
- label: If code there is, it is minimal and working
required: true
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 622bba139..0afdeba88 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -31,6 +31,13 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
+
+ # Install Atari Roms
+ pip install autorom
+ wget https://gist.githubusercontent.com/jjshoots/61b22aefce4456920ba99f2c36906eda/raw/00046ac3403768bfe45857610a3d333b8e35e026/Roms.tar.gz.b64
+ base64 Roms.tar.gz.b64 --decode &> Roms.tar.gz
+ AutoROM --accept-license --source-file Roms.tar.gz
+
pip install setuptools==65.5.0
# cpu version of pytorch - faster to download
pip install torch==1.11+cpu -f https://download.pytorch.org/whl/torch_stable.html
@@ -41,17 +48,20 @@ jobs:
# install parking-env to test HER (pinned so it works with gym 0.21)
pip install highway-env==1.5.0
pip install -e .
- - name: Type check
+ - name: Lint with ruff
run: |
- make type
- # skip mypy type check for python3.7 (last forever for some reason)
- if: "!(matrix.python-version == '3.7')"
+ make lint
- name: Check codestyle
run: |
make check-codestyle
- - name: Lint with flake8
+ - name: Build doc
run: |
- make lint
+ make doc
+ - name: Type check
+ run: |
+ make type
+ # skip mypy type check for python3.7 (last forever for some reason)
+ if: "!(matrix.python-version == '3.7')"
- name: Test with pytest
run: |
make pytest
diff --git a/.github/workflows/trained_agents.yml b/.github/workflows/trained_agents.yml
index 3e2d6d27b..cf45a9fb7 100644
--- a/.github/workflows/trained_agents.yml
+++ b/.github/workflows/trained_agents.yml
@@ -14,6 +14,7 @@ jobs:
env:
TERM: xterm-256color
FORCE_COLOR: 1
+
# Skip CI if [ci skip] in the commit message
if: "! contains(toJSON(github.event.commits.*.message), '[ci skip]')"
runs-on: ubuntu-latest
@@ -31,6 +32,14 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
+
+ # Install Atari Roms
+ pip install autorom
+ wget https://gist.githubusercontent.com/jjshoots/61b22aefce4456920ba99f2c36906eda/raw/00046ac3403768bfe45857610a3d333b8e35e026/Roms.tar.gz.b64
+ base64 Roms.tar.gz.b64 --decode &> Roms.tar.gz
+ AutoROM --accept-license --source-file Roms.tar.gz
+
+
pip install setuptools==65.5.0
# cpu version of pytorch - faster to download
pip install torch==1.11+cpu -f https://download.pytorch.org/whl/torch_stable.html
diff --git a/.gitignore b/.gitignore
index 87c26d1a2..b428b9386 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,8 @@ runs
hub
*.mp4
*.json
+_build/
+
# Setuptools distribution and build folders.
/dist/
diff --git a/.readthedocs.yml b/.readthedocs.yml
new file mode 100644
index 000000000..6753d8744
--- /dev/null
+++ b/.readthedocs.yml
@@ -0,0 +1,16 @@
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Build documentation in the docs/ directory with Sphinx
+sphinx:
+ configuration: docs/conf.py
+
+# Optionally build your docs in additional formats such as PDF and ePub
+formats: all
+
+# Set requirements using conda env
+conda:
+ environment: docs/conda_env.yml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3596cdce6..ded42f17d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,11 @@
-## Release 1.8.0a2 (WIP)
+## Release 1.8.0a9 (WIP)
### Breaking Changes
- Upgraded to SB3 >= 1.8.0
### New Features
- Tuned hyperparameters for RecurrentPPO on Swimmer
+- Documentation is now built using Sphinx and hosted on read the doc
### Bug fixes
- Set ``highway-env`` version to 1.5 and ``setuptools to`` v65.5 for the CI
@@ -18,6 +19,7 @@
- Added support for `ruff` (fast alternative to flake8) in the Makefile
- Removed Gitlab CI file
- Replaced deprecated `optuna.suggest_loguniform(...)` by `optuna.suggest_float(..., log=True)`
+- Switched to `ruff` and `pyproject.toml`
## Release 1.7.0 (2023-01-10)
diff --git a/Makefile b/Makefile
index cabb0ff0b..e87e65efb 100644
--- a/Makefile
+++ b/Makefile
@@ -18,32 +18,34 @@ type: pytype mypy
lint:
# stop the build if there are Python syntax errors or undefined names
- # see https://lintlyci.github.io/Flake8Rules/
- flake8 ${LINT_PATHS} --count --select=E9,F63,F7,F82 --show-source --statistics
- # exit-zero treats all errors as warnings.
- flake8 ${LINT_PATHS} --count --exit-zero --statistics
-
-ruff:
- # stop the build if there are Python syntax errors or undefined names
- # see https://lintlyci.github.io/Flake8Rules/
+ # see https://www.flake8rules.com/
ruff ${LINT_PATHS} --select=E9,F63,F7,F82 --show-source
# exit-zero treats all errors as warnings.
- ruff ${LINT_PATHS} --exit-zero --line-length 127
+ ruff ${LINT_PATHS} --exit-zero
format:
# Sort imports
isort ${LINT_PATHS}
# Reformat using black
- black -l 127 ${LINT_PATHS}
+ black ${LINT_PATHS}
check-codestyle:
# Sort imports
isort --check ${LINT_PATHS}
# Reformat using black
- black --check -l 127 ${LINT_PATHS}
+ black --check ${LINT_PATHS}
commit-checks: format type lint
+doc:
+ cd docs && make html
+
+spelling:
+ cd docs && make spelling
+
+clean:
+ cd docs && make clean
+
docker: docker-cpu docker-gpu
docker-cpu:
@@ -66,4 +68,4 @@ test-release:
python setup.py bdist_wheel
twine upload --repository-url https://test.pypi.org/legacy/ dist/*
-.PHONY: lint format check-codestyle commit-checks doc spelling docker type pytest
+.PHONY: lint format check-codestyle commit-checks doc spelling docker type pytest
diff --git a/README.md b/README.md
index cf672ba9a..600c2e54d 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
![CI](https://github.com/DLR-RM/rl-baselines3-zoo/workflows/CI/badge.svg)
+[![Documentation Status](https://readthedocs.org/projects/rl-baselines3-zoo/badge/?version=master)](https://rl-baselines3-zoo.readthedocs.io/en/master/?badge=master)
[![coverage report](https://gitlab.com/araffin/rl-baselines3-zoo/badges/master/coverage.svg)](https://gitlab.com/araffin/rl-baselines3-zoo/-/commits/master) [![codestyle](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
@@ -26,6 +27,10 @@ Goals of this repository:
This is the SB3 version of the original SB2 [rl-zoo](https://github.com/araffin/rl-baselines-zoo).
+## Documentation
+
+Documentation is available online: [https://rl-baselines3-zoo.readthedocs.io/](https://rl-baselines3-zoo.readthedocs.io)
+
## Installation
### Minimal installation
@@ -59,99 +64,22 @@ If the environment exists in this file, then you can train an agent using:
```
python train.py --algo algo_name --env env_id
```
-You can use `-P` (`--progress`) option to display a progress bar.
-
-Using a custom config file when it is a yaml file with a which contains a `env_id` entry:
-```
-python train.py --algo algo_name --env env_id --conf-file my_yaml.yml
-```
-
-You can also use a python file that contains a dictionary called `hyperparams` with an entry for each `env_id`.
-(see `hyperparams/python/ppo_config_example.py` for an example)
-```bash
-# You can pass a path to a python file
-python train.py --algo ppo --env MountainCarContinuous-v0 --conf-file hyperparams/python/ppo_config_example.py
-# Or pass a path to a file from a module (for instance my_package.my_file)
-python train.py --algo ppo --env MountainCarContinuous-v0 --conf-file hyperparams.python.ppo_config_example
-```
-The advantage of this approach is that you can specify arbitrary python dictionaries
-and ensure that all their dependencies are imported in the config file itself.
-
-
-For example (with tensorboard support):
-```
-python train.py --algo ppo --env CartPole-v1 --tensorboard-log /tmp/stable-baselines/
-```
Evaluate the agent every 10000 steps using 10 episodes for evaluation (using only one evaluation env):
```
python train.py --algo sac --env HalfCheetahBulletEnv-v0 --eval-freq 10000 --eval-episodes 10 --n-eval-envs 1
```
-Save a checkpoint of the agent every 100000 steps:
-```
-python train.py --algo td3 --env HalfCheetahBulletEnv-v0 --save-freq 100000
-```
+More examples are available in the [documentation](https://rl-baselines3-zoo.readthedocs.io).
-Continue training (here, load pretrained agent for Breakout and continue training for 5000 steps):
-```
-python train.py --algo a2c --env BreakoutNoFrameskip-v4 -i rl-trained-agents/a2c/BreakoutNoFrameskip-v4_1/BreakoutNoFrameskip-v4.zip -n 5000
-```
-When using off-policy algorithms, you can also save the replay buffer after training:
-```
-python train.py --algo sac --env Pendulum-v1 --save-replay-buffer
-```
-It will be automatically loaded if present when continuing training.
+## Integrations
-## Plot Scripts
+The RL Zoo has some integration with other libraries/services like Weights & Biases for experiment tracking or Hugging Face for storing/sharing trained models. You can find out more in the [dedicated section](https://rl-baselines3-zoo.readthedocs.io/en/master/guide/integrations.html) of the documentation.
-Plot scripts (to be documented, see "Results" sections in SB3 documentation):
-- `scripts/all_plots.py`/`scripts/plot_from_file.py` for plotting evaluations
-- `scripts/plot_train.py` for plotting training reward/success
-
-*Examples (on the current collection)*
-
-Plot training success (y-axis) w.r.t. timesteps (x-axis) with a moving window of 500 episodes for all the `Fetch` environment with `HER` algorithm:
-
-```
-python scripts/plot_train.py -a her -e Fetch -y success -f rl-trained-agents/ -w 500 -x steps
-```
-
-Plot evaluation reward curve for TQC, SAC and TD3 on the HalfCheetah and Ant PyBullet environments:
-
-```
-python3 scripts/all_plots.py -a sac td3 tqc --env HalfCheetahBullet AntBullet -f rl-trained-agents/
-```
-
-## Plot with the rliable library
-
-The RL zoo integrates some of [rliable](https://agarwl.github.io/rliable/) library features.
-You can find a visual explanation of the tools used by rliable in this [blog post](https://araffin.github.io/post/rliable/).
-
-First, you need to install [rliable](https://github.com/google-research/rliable).
-
-Note: Python 3.7+ is required in that case.
-
-Then export your results to a file using the `all_plots.py` script (see above):
-```
-python scripts/all_plots.py -a sac td3 tqc --env Half Ant -f logs/ -o logs/offpolicy
-```
-
-You can now use the `plot_from_file.py` script with `--rliable`, `--versus` and `--iqm` arguments:
-```
-python scripts/plot_from_file.py -i logs/offpolicy.pkl --skip-timesteps --rliable --versus -l SAC TD3 TQC
-```
-
-Note: you may need to edit `plot_from_file.py`, in particular the `env_key_to_env_id` dictionary
-and the `scripts/score_normalization.py` which stores min and max score for each environment.
-
-Remark: plotting with the `--rliable` option is usually slow as confidence interval need to be computed using bootstrap sampling.
-
-
-## Custom Environment
+## Plot Scripts
-The easiest way to add support for a custom environment is to edit `rl_zoo3/import_envs.py` and register your environment here. Then, you need to add a section for it in the hyperparameters file (`hyperparams/algo.yml` or a custom yaml file that you can specify using `--conf-file` argument).
+Please the see [dedicated section](https://rl-baselines3-zoo.readthedocs.io/en/master/guide/plot.html) of the documentation.
## Enjoy a Trained Agent
@@ -168,237 +96,13 @@ For example, enjoy A2C on Breakout during 5000 timesteps:
python enjoy.py --algo a2c --env BreakoutNoFrameskip-v4 --folder rl-trained-agents/ -n 5000
```
-If you have trained an agent yourself, you need to do:
-```
-# exp-id 0 corresponds to the last experiment, otherwise, you can specify another ID
-python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 0
-```
-
-To load the best model (when using evaluation environment):
-```
-python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-best
-```
-
-To load a checkpoint (here the checkpoint name is `rl_model_10000_steps.zip`):
-```
-python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-checkpoint 10000
-```
-
-To load the latest checkpoint:
-```
-python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-last-checkpoint
-```
-
-## Huggingface Hub Integration
-
-Upload model to hub (same syntax as for `enjoy.py`):
-```
-python -m rl_zoo3.push_to_hub --algo ppo --env CartPole-v1 -f logs/ -orga sb3 -m "Initial commit"
-```
-you can choose custom `repo-name` (default: `{algo}-{env_id}`) by passing a `--repo-name` argument.
-
-Download model from hub:
-```
-python -m rl_zoo3.load_from_hub --algo ppo --env CartPole-v1 -f logs/ -orga sb3
-```
-
-## Hyperparameter yaml syntax
-
-The syntax used in `hyperparameters/algo_name.yml` for setting hyperparameters (likewise the syntax to [overwrite hyperparameters](https://github.com/DLR-RM/rl-baselines3-zoo#overwrite-hyperparameters) on the cli) may be specialized if the argument is a function. See examples in the `hyperparameters/` directory. For example:
-
-- Specify a linear schedule for the learning rate:
-
-```yaml
- learning_rate: lin_0.012486195510232303
-```
-
-Specify a different activation function for the network:
-
-```yaml
- policy_kwargs: "dict(activation_fn=nn.ReLU)"
-```
-
-For a custom policy:
-
-```yaml
- policy: my_package.MyCustomPolicy # for instance stable_baselines3.ppo.MlpPolicy
-```
-
-
-## Hyperparameter Tuning
-
-We use [Optuna](https://optuna.org/) for optimizing the hyperparameters.
-Not all hyperparameters are tuned, and tuning enforces certain default hyperparameter settings that may be different from the official defaults. See [rl_zoo3/hyperparams_opt.py](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py) for the current settings for each agent.
-
-Hyperparameters not specified in [rl_zoo3/hyperparams_opt.py](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py) are taken from the associated YAML file and fallback to the default values of SB3 if not present.
-
-Note: when using SuccessiveHalvingPruner ("halving"), you must specify `--n-jobs > 1`
-
-Budget of 1000 trials with a maximum of 50000 steps:
-
-```
-python train.py --algo ppo --env MountainCar-v0 -n 50000 -optimize --n-trials 1000 --n-jobs 2 \
- --sampler tpe --pruner median
-```
-
-Distributed optimization using a shared database is also possible (see the corresponding [Optuna documentation](https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/004_distributed.html)):
-```
-python train.py --algo ppo --env MountainCar-v0 -optimize --study-name test --storage sqlite:///example.db
-```
-
-Print and save best hyperparameters of an Optuna study:
-```
-python scripts/parse_study.py -i path/to/study.pkl --print-n-best-trials 10 --save-n-best-hyperparameters 10
-```
-
-The default budget for hyperparameter tuning is 500 trials and there is one intermediate evaluation for pruning/early stopping per 100k time steps.
-
-### Hyperparameters search space
-
-Note that the default hyperparameters used in the zoo when tuning are not always the same as the defaults provided in [stable-baselines3](https://stable-baselines3.readthedocs.io/en/master/modules/base.html). Consult the latest source code to be sure of these settings. For example:
-
-- PPO tuning assumes a network architecture with `ortho_init = False` when tuning, though it is `True` by [default](https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html#ppo-policies). You can change that by updating [rl_zoo3/hyperparams_opt.py](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py).
-
-- Non-episodic rollout in TD3 and DDPG assumes `gradient_steps = train_freq` and so tunes only `train_freq` to reduce the search space.
-
-When working with continuous actions, we recommend to enable [gSDE](https://arxiv.org/abs/2005.05719) by uncommenting lines in [rl_zoo3/hyperparams_opt.py](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py).
-
-
-## Experiment tracking
-
-We support tracking experiment data such as learning curves and hyperparameters via [Weights and Biases](https://wandb.ai).
-
-The following command
-```
-python train.py --algo ppo --env CartPole-v1 --track --wandb-project-name sb3
-```
-
-yields a tracked experiment at this [URL](https://wandb.ai/openrlbenchmark/sb3/runs/1b65ldmh).
-
-To add a tag to the run, (e.g. `optimized`), use the argument `--wandb-tags optimized`.
-
-## Env normalization
+## Hyperparameters Tuning
-In the hyperparameter file, `normalize: True` means that the training environment will be wrapped in a [VecNormalize](https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/vec_env/vec_normalize.py#L13) wrapper.
-
-[Normalization uses](https://github.com/DLR-RM/rl-baselines3-zoo/issues/64) the default parameters of `VecNormalize`, with the exception of `gamma` which is set to match that of the agent. This can be [overridden](https://github.com/DLR-RM/rl-baselines3-zoo/blob/v0.10.0/hyperparams/sac.yml#L239) using the appropriate `hyperparameters/algo_name.yml`, e.g.
-
-```yaml
- normalize: "{'norm_obs': True, 'norm_reward': False}"
-```
-
-
-## Env Wrappers
-
-You can specify in the hyperparameter config one or more wrapper to use around the environment:
-
-for one wrapper:
-```yaml
-env_wrapper: gym_minigrid.wrappers.FlatObsWrapper
-```
-
-for multiple, specify a list:
-
-```yaml
-env_wrapper:
- - rl_zoo3.wrappers.DoneOnSuccessWrapper:
- reward_offset: 1.0
- - sb3_contrib.common.wrappers.TimeFeatureWrapper
-```
-
-Note that you can easily specify parameters too.
-
-By default, the environment is wrapped with a `Monitor` wrapper to record episode statistics.
-You can specify arguments to it using `monitor_kwargs` parameter to log additional data.
-That data *must* be present in the info dictionary at the last step of each episode.
-
-For instance, for recording success with goal envs (e.g. `FetchReach-v1`):
-
-```yaml
-monitor_kwargs: dict(info_keywords=('is_success',))
-```
-
-or recording final x position with `Ant-v3`:
-```yaml
-monitor_kwargs: dict(info_keywords=('x_position',))
-```
-
-Note: for known `GoalEnv` like `FetchReach`, `info_keywords=('is_success',)` is actually the default.
-
-## VecEnvWrapper
-
-You can specify which `VecEnvWrapper` to use in the config, the same way as for env wrappers (see above), using the `vec_env_wrapper` key:
-
-For instance:
-```yaml
-vec_env_wrapper: stable_baselines3.common.vec_env.VecMonitor
-```
-
-Note: `VecNormalize` is supported separately using `normalize` keyword, and `VecFrameStack` has a dedicated keyword `frame_stack`.
-
-## Callbacks
-
-Following the same syntax as env wrappers, you can also add custom callbacks to use during training.
-
-```yaml
-callback:
- - rl_zoo3.callbacks.ParallelTrainCallback:
- gradient_steps: 256
-```
-
-## Env keyword arguments
-
-You can specify keyword arguments to pass to the env constructor in the command line, using `--env-kwargs`:
-
-```
-python enjoy.py --algo ppo --env MountainCar-v0 --env-kwargs goal_velocity:10
-```
+Please the see [dedicated section](https://rl-baselines3-zoo.readthedocs.io/en/master/guide/tuning.html) of the documentation.
-## Overwrite hyperparameters
+## Custom Configuration
-You can easily overwrite hyperparameters in the command line, using ``--hyperparams``:
-
-```
-python train.py --algo a2c --env MountainCarContinuous-v0 --hyperparams learning_rate:0.001 policy_kwargs:"dict(net_arch=[64, 64])"
-```
-
-Note: if you want to pass a string, you need to escape it like that: `my_string:"'value'"`
-
-## Record a Video of a Trained Agent
-
-Record 1000 steps with the latest saved model:
-
-```
-python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000
-```
-
-Use the best saved model instead:
-
-```
-python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 --load-best
-```
-
-Record a video of a checkpoint saved during training (here the checkpoint name is `rl_model_10000_steps.zip`):
-
-```
-python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 --load-checkpoint 10000
-```
-
-## Record a Video of a Training Experiment
-
-Apart from recording videos of specific saved models, it is also possible to record a video of a training experiment where checkpoints have been saved.
-
-Record 1000 steps for each checkpoint, latest and best saved models:
-
-```
-python -m rl_zoo3.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic
-```
-
-The previous command will create a `mp4` file. To convert this file to `gif` format as well:
-
-```
-python -m rl_zoo3.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic --gif
-```
+Please the see [dedicated section](https://rl-baselines3-zoo.readthedocs.io/en/master/guide/config.html) of the documentation.
## Current Collection: 195+ Trained Agents!
@@ -577,34 +281,6 @@ train()
```
-### Docker Images
-
-Build docker image (CPU):
-```
-make docker-cpu
-```
-
-GPU:
-```
-USE_GPU=True make docker-gpu
-```
-
-Pull built docker image (CPU):
-```
-docker pull stablebaselines/rl-baselines3-zoo-cpu
-```
-
-GPU image:
-```
-docker pull stablebaselines/rl-baselines3-zoo
-```
-
-Run script in the docker image:
-
-```
-./scripts/run_docker_cpu.sh python train.py --algo ppo --env CartPole-v1
-```
-
## Tests
To run tests, first install pytest, then:
@@ -639,4 +315,4 @@ If you trained an agent that is not present in the RL Zoo, please submit a Pull
## Contributors
-We would like to thank our contributors: [@iandanforth](https://github.com/iandanforth), [@tatsubori](https://github.com/tatsubori) [@Shade5](https://github.com/Shade5) [@mcres](https://github.com/mcres), [@ernestum](https://github.com/ernestum)
+We would like to thank our contributors: [@iandanforth](https://github.com/iandanforth), [@tatsubori](https://github.com/tatsubori) [@Shade5](https://github.com/Shade5) [@mcres](https://github.com/mcres), [@ernestum](https://github.com/ernestum), [@qgallouedec](https://github.com/qgallouedec)
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 000000000..938bf87e1
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,21 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+# For debug: SPHINXOPTS = -nWT --keep-going -vvv
+SPHINXOPTS = # -W # make warnings fatal (disabled because of gym in the wrappers)
+SPHINXBUILD = sphinx-build
+SPHINXPROJ = RLZoo
+SOURCEDIR = .
+BUILDDIR = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 000000000..c007b52b2
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,26 @@
+## RL Zoo3 Documentation
+
+This folder contains documentation for the RL Zoo.
+
+
+### Build the Documentation
+
+#### Install Sphinx and Theme
+Execute this command in the project root:
+```
+pip install stable_baselines3[docs]
+pip install -e .
+```
+
+#### Building the Docs
+
+In the `docs/` folder:
+```
+make html
+```
+
+if you want to building each time a file is changed:
+
+```
+sphinx-autobuild . _build/html
+```
diff --git a/docs/_static/css/baselines_theme.css b/docs/_static/css/baselines_theme.css
new file mode 100644
index 000000000..450864efe
--- /dev/null
+++ b/docs/_static/css/baselines_theme.css
@@ -0,0 +1,61 @@
+/* Main colors adapted from pytorch doc */
+:root{
+ --main-bg-color: #343A40;
+ --link-color: #FD7E14;
+}
+
+/* Header fonts y */
+h1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend, p.caption {
+ font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;
+}
+
+
+/* Docs background */
+.wy-side-nav-search{
+ background-color: var(--main-bg-color);
+}
+
+/* Mobile version */
+.wy-nav-top{
+ background-color: var(--main-bg-color);
+}
+
+/* Change link colors (except for the menu) */
+a {
+ color: var(--link-color);
+}
+
+a:hover {
+ color: #4F778F;
+}
+
+.wy-menu a {
+ color: #b3b3b3;
+}
+
+.wy-menu a:hover {
+ color: #b3b3b3;
+}
+
+a.icon.icon-home {
+ color: #b3b3b3;
+}
+
+.version{
+ color: var(--link-color) !important;
+}
+
+
+/* Make code blocks have a background */
+.codeblock,pre.literal-block,.rst-content .literal-block,.rst-content pre.literal-block,div[class^='highlight'] {
+ background: #f8f8f8;;
+}
+
+/* Change style of types in the docstrings .rst-content .field-list */
+.field-list .xref.py.docutils, .field-list code.docutils, .field-list .docutils.literal.notranslate
+{
+ border: None;
+ padding-left: 0;
+ padding-right: 0;
+ color: #404040;
+}
diff --git a/docs/_static/img/colab-badge.svg b/docs/_static/img/colab-badge.svg
new file mode 100644
index 000000000..c08066ee3
--- /dev/null
+++ b/docs/_static/img/colab-badge.svg
@@ -0,0 +1 @@
+
diff --git a/docs/_static/img/colab.svg b/docs/_static/img/colab.svg
new file mode 100644
index 000000000..c2d30e973
--- /dev/null
+++ b/docs/_static/img/colab.svg
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/conda_env.yml b/docs/conda_env.yml
new file mode 100644
index 000000000..98a550820
--- /dev/null
+++ b/docs/conda_env.yml
@@ -0,0 +1,20 @@
+name: root
+channels:
+ - pytorch
+ - defaults
+dependencies:
+ - cpuonly=1.0=0
+ - pip=21.1
+ - python=3.7
+ - pytorch=1.11=py3.7_cpu_0
+ - pip:
+ - gym==0.21
+ - cloudpickle
+ - opencv-python-headless
+ - pandas
+ - numpy
+ - matplotlib
+ - sphinx_autodoc_typehints
+ - sphinx>=4.2
+ - sphinx_rtd_theme>=1.0
+ - sphinx_copybutton
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 000000000..3f0348af7
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,230 @@
+#
+# Configuration file for the Sphinx documentation builder.
+#
+# This file does only contain a selection of the most common options. For a
+# full list see the documentation:
+# http://www.sphinx-doc.org/en/master/config
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+from typing import Dict, List
+from unittest.mock import MagicMock
+
+# We CANNOT enable 'sphinxcontrib.spelling' because ReadTheDocs.org does not support
+# PyEnchant.
+try:
+ import sphinxcontrib.spelling # noqa: F401
+
+ enable_spell_check = True
+except ImportError:
+ enable_spell_check = False
+
+# Try to enable copy button
+try:
+ import sphinx_copybutton # noqa: F401
+
+ enable_copy_button = True
+except ImportError:
+ enable_copy_button = False
+
+# source code directory, relative to this file, for sphinx-autobuild
+sys.path.insert(0, os.path.abspath(".."))
+
+
+class Mock(MagicMock):
+ __subclasses__ = [] # type: ignore
+
+ @classmethod
+ def __getattr__(cls, name):
+ return MagicMock()
+
+
+# Mock modules that requires C modules
+# Note: because of that we cannot test examples using CI
+# 'torch', 'torch.nn', 'torch.nn.functional',
+# DO not mock modules for now, we will need to do that for read the docs later
+MOCK_MODULES: List[str] = []
+sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
+
+# Read version from file
+version_file = os.path.join(os.path.dirname(__file__), "../rl_zoo3", "version.txt")
+with open(version_file) as file_handler:
+ __version__ = file_handler.read().strip()
+
+# -- Project information -----------------------------------------------------
+
+project = "RL Baselines3 Zoo"
+copyright = "2023, Stable Baselines3"
+author = "Stable Baselines3 Contributors"
+
+# The short X.Y version
+version = "master (" + __version__ + " )"
+# The full version, including alpha/beta/rc tags
+release = __version__
+
+
+# -- General configuration ---------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+ "sphinx.ext.autodoc",
+ "sphinx_autodoc_typehints",
+ "sphinx.ext.autosummary",
+ "sphinx.ext.mathjax",
+ "sphinx.ext.ifconfig",
+ "sphinx.ext.viewcode",
+ # 'sphinx.ext.intersphinx',
+ # 'sphinx.ext.doctest'
+]
+
+if enable_spell_check:
+ extensions.append("sphinxcontrib.spelling")
+
+if enable_copy_button:
+ extensions.append("sphinx_copybutton")
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = ".rst"
+
+# The master toctree document.
+master_doc = "index"
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = "en"
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path .
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = "sphinx"
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+
+# Fix for read the docs
+on_rtd = os.environ.get("READTHEDOCS") == "True"
+if on_rtd:
+ html_theme = "default"
+else:
+ html_theme = "sphinx_rtd_theme"
+
+html_logo = "../images/car.jpg"
+
+
+def setup(app):
+ app.add_css_file("css/baselines_theme.css")
+
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further. For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["_static"]
+
+# Custom sidebar templates, must be a dictionary that maps document names
+# to template names.
+#
+# The default sidebars (for documents that don't match any pattern) are
+# defined by theme itself. Builtin themes are using these templates by
+# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
+# 'searchbox.html']``.
+#
+# html_sidebars = {}
+
+
+# -- Options for HTMLHelp output ---------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = "RLZoo3doc"
+
+
+# -- Options for LaTeX output ------------------------------------------------
+
+latex_elements: Dict[str, str] = {
+ # The paper size ('letterpaper' or 'a4paper').
+ #
+ # 'papersize': 'letterpaper',
+ # The font size ('10pt', '11pt' or '12pt').
+ #
+ # 'pointsize': '10pt',
+ # Additional stuff for the LaTeX preamble.
+ #
+ # 'preamble': '',
+ # Latex figure (float) alignment
+ #
+ # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+# author, documentclass [howto, manual, or own class]).
+latex_documents = [
+ (master_doc, "RLZoo3.tex", "RL Baselines3 Zoo Documentation", "Stable Baselines3 Contributors", "manual"),
+]
+
+
+# -- Options for manual page output ------------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [(master_doc, "rl_zoo3", "RL Baselines3 Zoo Documentation", [author], 1)]
+
+
+# -- Options for Texinfo output ----------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+# dir menu entry, description, category)
+texinfo_documents = [
+ (
+ master_doc,
+ "RLZoo3",
+ "RL Baselines3 Zoo Documentation",
+ author,
+ "RLZoo3",
+ "One line description of project.",
+ "Miscellaneous",
+ ),
+]
+
+
+# -- Extension configuration -------------------------------------------------
+
+# Example configuration for intersphinx: refer to the Python standard library.
+# intersphinx_mapping = {
+# 'python': ('https://docs.python.org/3/', None),
+# 'numpy': ('http://docs.scipy.org/doc/numpy/', None),
+# 'torch': ('http://pytorch.org/docs/master/', None),
+# }
diff --git a/docs/guide/config.rst b/docs/guide/config.rst
new file mode 100644
index 000000000..0aa5918f0
--- /dev/null
+++ b/docs/guide/config.rst
@@ -0,0 +1,122 @@
+.. _config:
+
+=============
+Configuration
+=============
+
+Hyperparameter yaml syntax
+--------------------------
+
+The syntax used in ``hyperparameters/algo_name.yml`` for setting
+hyperparameters (likewise the syntax to `overwrite
+hyperparameters `__
+on the cli) may be specialized if the argument is a function. See
+examples in the ``hyperparameters/`` directory. For example:
+
+- Specify a linear schedule for the learning rate:
+
+.. code:: yaml
+
+ learning_rate: lin_0.012486195510232303
+
+Specify a different activation function for the network:
+
+.. code:: yaml
+
+ policy_kwargs: "dict(activation_fn=nn.ReLU)"
+
+For a custom policy:
+
+.. code:: yaml
+
+ policy: my_package.MyCustomPolicy # for instance stable_baselines3.ppo.MlpPolicy
+
+Env Normalization
+-----------------
+
+In the hyperparameter file, ``normalize: True`` means that the training
+environment will be wrapped in a
+`VecNormalize `__
+wrapper.
+
+`Normalization
+uses `__ the
+default parameters of ``VecNormalize``, with the exception of ``gamma``
+which is set to match that of the agent. This can be
+`overridden `__
+using the appropriate ``hyperparameters/algo_name.yml``, e.g.
+
+.. code:: yaml
+
+ normalize: "{'norm_obs': True, 'norm_reward': False}"
+
+Env Wrappers
+------------
+
+You can specify in the hyperparameter config one or more wrapper to use
+around the environment:
+
+for one wrapper:
+
+.. code:: yaml
+
+ env_wrapper: gym_minigrid.wrappers.FlatObsWrapper
+
+for multiple, specify a list:
+
+.. code:: yaml
+
+ env_wrapper:
+ - rl_zoo3.wrappers.DoneOnSuccessWrapper:
+ reward_offset: 1.0
+ - sb3_contrib.common.wrappers.TimeFeatureWrapper
+
+Note that you can easily specify parameters too.
+
+By default, the environment is wrapped with a ``Monitor`` wrapper to
+record episode statistics. You can specify arguments to it using
+``monitor_kwargs`` parameter to log additional data. That data *must* be
+present in the info dictionary at the last step of each episode.
+
+For instance, for recording success with goal envs
+(e.g. ``FetchReach-v1``):
+
+.. code:: yaml
+
+ monitor_kwargs: dict(info_keywords=('is_success',))
+
+or recording final x position with ``Ant-v3``:
+
+.. code:: yaml
+
+ monitor_kwargs: dict(info_keywords=('x_position',))
+
+Note: for known ``GoalEnv`` like ``FetchReach``,
+``info_keywords=('is_success',)`` is actually the default.
+
+VecEnvWrapper
+-------------
+
+You can specify which ``VecEnvWrapper`` to use in the config, the same
+way as for env wrappers (see above), using the ``vec_env_wrapper`` key:
+
+For instance:
+
+.. code:: yaml
+
+ vec_env_wrapper: stable_baselines3.common.vec_env.VecMonitor
+
+Note: ``VecNormalize`` is supported separately using ``normalize``
+keyword, and ``VecFrameStack`` has a dedicated keyword ``frame_stack``.
+
+Callbacks
+---------
+
+Following the same syntax as env wrappers, you can also add custom
+callbacks to use during training.
+
+.. code:: yaml
+
+ callback:
+ - rl_zoo3.callbacks.ParallelTrainCallback:
+ gradient_steps: 256
diff --git a/docs/guide/custom_env.rst b/docs/guide/custom_env.rst
new file mode 100644
index 000000000..5c6ab016d
--- /dev/null
+++ b/docs/guide/custom_env.rst
@@ -0,0 +1,11 @@
+.. _custom:
+
+==================
+Custom Environment
+==================
+
+The easiest way to add support for a custom environment is to edit
+``rl_zoo3/import_envs.py`` and register your environment here. Then, you
+need to add a section for it in the hyperparameters file
+(``hyperparams/algo.yml`` or a custom yaml file that you can specify
+using ``--conf-file`` argument).
diff --git a/docs/guide/enjoy.rst b/docs/guide/enjoy.rst
new file mode 100644
index 000000000..95097f811
--- /dev/null
+++ b/docs/guide/enjoy.rst
@@ -0,0 +1,100 @@
+.. _enjoy:
+
+=====================
+Enjoy a Trained Agent
+=====================
+
+.. note::
+
+ To download the repo with the trained agents, you must use
+ ``git clone --recursive https://github.com/DLR-RM/rl-baselines3-zoo``
+ in order to clone the submodule too.
+
+
+Enjoy a trained agent
+---------------------
+
+If the trained agent exists, then you can see it in action using:
+
+::
+
+ python enjoy.py --algo algo_name --env env_id
+
+For example, enjoy A2C on Breakout during 5000 timesteps:
+
+::
+
+ python enjoy.py --algo a2c --env BreakoutNoFrameskip-v4 --folder rl-trained-agents/ -n 5000
+
+If you have trained an agent yourself, you need to do:
+
+::
+
+ # exp-id 0 corresponds to the last experiment, otherwise, you can specify another ID
+ python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 0
+
+Load Checkpoints, Best Model
+-----------------------------
+
+To load the best model (when using evaluation environment):
+
+::
+
+ python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-best
+
+To load a checkpoint (here the checkpoint name is
+``rl_model_10000_steps.zip``):
+
+::
+
+ python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-checkpoint 10000
+
+To load the latest checkpoint:
+
+::
+
+ python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-last-checkpoint
+
+
+Record a Video of a Trained Agent
+---------------------------------
+
+Record 1000 steps with the latest saved model:
+
+::
+
+ python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000
+
+Use the best saved model instead:
+
+::
+
+ python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 --load-best
+
+Record a video of a checkpoint saved during training (here the
+checkpoint name is ``rl_model_10000_steps.zip``):
+
+::
+
+ python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 --load-checkpoint 10000
+
+
+Record a Video of a Training Experiment
+---------------------------------------
+
+Apart from recording videos of specific saved models, it is also
+possible to record a video of a training experiment where checkpoints
+have been saved.
+
+Record 1000 steps for each checkpoint, latest and best saved models:
+
+::
+
+ python -m rl_zoo3.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic
+
+The previous command will create a ``mp4`` file. To convert this file to
+``gif`` format as well:
+
+::
+
+ python -m rl_zoo3.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic --gif
diff --git a/docs/guide/install.rst b/docs/guide/install.rst
new file mode 100644
index 000000000..dcd5c8587
--- /dev/null
+++ b/docs/guide/install.rst
@@ -0,0 +1,87 @@
+.. _install:
+
+Installation
+============
+
+Prerequisites
+-------------
+
+RL Zoo requires python 3.7+ and PyTorch >= 1.11
+
+
+Minimal Installation
+--------------------
+
+To install RL Zoo with pip, execute:
+
+.. code-block:: bash
+
+ pip install rl_zoo3
+
+From source:
+
+.. code-block:: bash
+
+ git clone https://github.com/DLR-RM/rl-baselines3-zoo
+ cd rl-baselines3-zoo/
+ pip install -e .
+
+.. note::
+
+ You can do ``python -m rl_zoo3.train`` from any folder and you have access to ``rl_zoo3`` command line interface, for instance, ``rl_zoo3 train`` is equivalent to ``python train.py``
+
+
+
+Full installation
+-----------------
+
+With extra envs and test dependencies:
+
+
+.. note::
+
+ If you want to use Atari games, you will need to do ``pip install "autorom[accept-rom-license]"``
+ additionally to download the ROMs
+
+
+.. code-block:: bash
+
+ apt-get install swig cmake ffmpeg
+ pip install -r requirements.txt
+
+
+Please see `Stable Baselines3 documentation `_ for alternatives to install stable baselines3.
+
+
+Docker Images
+-------------
+
+Build docker image (CPU):
+
+::
+
+ make docker-cpu
+
+GPU:
+
+::
+
+ USE_GPU=True make docker-gpu
+
+Pull built docker image (CPU):
+
+::
+
+ docker pull stablebaselines/rl-baselines3-zoo-cpu
+
+GPU image:
+
+::
+
+ docker pull stablebaselines/rl-baselines3-zoo
+
+Run script in the docker image:
+
+::
+
+ ./scripts/run_docker_cpu.sh python train.py --algo ppo --env CartPole-v1
diff --git a/docs/guide/integrations.rst b/docs/guide/integrations.rst
new file mode 100644
index 000000000..e5890e4e7
--- /dev/null
+++ b/docs/guide/integrations.rst
@@ -0,0 +1,45 @@
+.. _integrations:
+
+============
+Integrations
+============
+
+Huggingface Hub Integration
+---------------------------
+
+List and videos of trained agents can be found on our Huggingface page: https://huggingface.co/sb3
+
+
+Upload model to hub (same syntax as for ``enjoy.py``):
+
+::
+
+ python -m rl_zoo3.push_to_hub --algo ppo --env CartPole-v1 -f logs/ -orga sb3 -m "Initial commit"
+
+you can choose custom ``repo-name`` (default: ``{algo}-{env_id}``) by
+passing a ``--repo-name`` argument.
+
+Download model from hub:
+
+::
+
+ python -m rl_zoo3.load_from_hub --algo ppo --env CartPole-v1 -f logs/ -orga sb3
+
+
+Experiment tracking
+-------------------
+
+We support tracking experiment data such as learning curves and
+hyperparameters via `Weights and Biases `__.
+
+The following command
+
+::
+
+ python train.py --algo ppo --env CartPole-v1 --track --wandb-project-name sb3
+
+yields a tracked experiment at this
+`URL `__.
+
+To add a tag to the run, (e.g. ``optimized``), use the argument
+``--wandb-tags optimized``.
diff --git a/docs/guide/plot.rst b/docs/guide/plot.rst
new file mode 100644
index 000000000..aa571f612
--- /dev/null
+++ b/docs/guide/plot.rst
@@ -0,0 +1,70 @@
+.. _plot:
+
+============
+Plot Scripts
+============
+
+
+Plot scripts (to be documented, see "Results" sections in SB3
+documentation):
+
+- ``scripts/all_plots.py``/``scripts/plot_from_file.py`` for plotting evaluations
+
+- ``scripts/plot_train.py`` for plotting training reward/success
+
+
+Examples
+--------
+
+Plot training success (y-axis) w.r.t. timesteps (x-axis) with a moving
+window of 500 episodes for all the ``Fetch`` environment with ``HER``
+algorithm:
+
+::
+
+ python scripts/plot_train.py -a her -e Fetch -y success -f rl-trained-agents/ -w 500 -x steps
+
+Plot evaluation reward curve for TQC, SAC and TD3 on the HalfCheetah and
+Ant PyBullet environments:
+
+::
+
+ python3 scripts/all_plots.py -a sac td3 tqc --env HalfCheetahBullet AntBullet -f rl-trained-agents/
+
+Plot with the rliable library
+-----------------------------
+
+The RL zoo integrates some of
+`rliable `__ library features. You
+can find a visual explanation of the tools used by rliable in this `blog
+post `__.
+
+First, you need to install
+`rliable `__.
+
+Note: Python 3.7+ is required in that case.
+
+Then export your results to a file using the ``all_plots.py`` script
+(see above):
+
+::
+
+ python scripts/all_plots.py -a sac td3 tqc --env Half Ant -f logs/ -o logs/offpolicy
+
+You can now use the ``plot_from_file.py`` script with ``--rliable``,
+``--versus`` and ``--iqm`` arguments:
+
+::
+
+ python scripts/plot_from_file.py -i logs/offpolicy.pkl --skip-timesteps --rliable --versus -l SAC TD3 TQC
+
+.. note::
+
+ you may need to edit ``plot_from_file.py``, in particular the
+ ``env_key_to_env_id`` dictionary and the
+ ``scripts/score_normalization.py`` which stores min and max score for
+ each environment.
+
+
+Remark: plotting with the ``--rliable`` option is usually slow as
+confidence interval need to be computed using bootstrap sampling.
diff --git a/docs/guide/quickstart.rst b/docs/guide/quickstart.rst
new file mode 100644
index 000000000..d527bb189
--- /dev/null
+++ b/docs/guide/quickstart.rst
@@ -0,0 +1,51 @@
+.. _quickstart:
+
+===============
+Getting Started
+===============
+
+.. note::
+
+ You can try the following examples online using Google colab |colab|
+ notebook: `RL Baselines zoo notebook`_
+
+
+.. _RL Baselines zoo notebook: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/sb3/rl-baselines-zoo.ipynb
+.. |colab| image:: ../_static/img/colab.svg
+
+
+The hyperparameters for each environment are defined in
+``hyperparameters/algo_name.yml``.
+
+If the environment exists in this file, then you can train an agent
+using:
+
+::
+
+ python -m rl_zoo3.train --algo algo_name --env env_id
+
+Or if you are in the RL Zoo3 folder:
+
+::
+
+ python train.py --algo algo_name --env env_id
+
+For example (with evaluation and checkpoints):
+
+::
+
+ python -m rl_zoo3.train --algo ppo --env CartPole-v1 --eval-freq 10000 --save-freq 50000
+
+
+
+If the trained agent exists, then you can see it in action using:
+
+::
+
+ python -m rl_zoo3.enjoy --algo algo_name --env env_id
+
+For example, enjoy A2C on Breakout during 5000 timesteps:
+
+::
+
+ python -m rl_zoo3.enjoy --algo a2c --env BreakoutNoFrameskip-v4 --folder rl-trained-agents/ -n 5000
diff --git a/docs/guide/sbx.rst b/docs/guide/sbx.rst
new file mode 100644
index 000000000..3205f33a3
--- /dev/null
+++ b/docs/guide/sbx.rst
@@ -0,0 +1,58 @@
+.. _sbx:
+
+==========================
+Stable Baselines Jax (SBX)
+==========================
+
+`Stable Baselines Jax (SBX) `_ is a proof of concept version of Stable-Baselines3 in Jax.
+
+It provides a minimal number of features compared to SB3 but can be much faster (up to 20x times!): https://twitter.com/araffin2/status/1590714558628253698
+
+
+It is also compatible with the RL Zoo.
+For that you will need to create two files.
+
+``train_sbx.py``:
+
+.. code-block:: python
+
+ import rl_zoo3
+ import rl_zoo3.train
+ from rl_zoo3.train import train
+ from sbx import DQN, PPO, SAC, TQC, DroQ
+
+
+ rl_zoo3.ALGOS["tqc"] = TQC
+ rl_zoo3.ALGOS["droq"] = DroQ
+ rl_zoo3.ALGOS["sac"] = SAC
+ rl_zoo3.ALGOS["ppo"] = PPO
+ rl_zoo3.ALGOS["dqn"] = DQN
+ rl_zoo3.train.ALGOS = rl_zoo3.ALGOS
+ rl_zoo3.exp_manager.ALGOS = rl_zoo3.ALGOS
+
+ if __name__ == "__main__":
+ train()
+
+Then you can call ``python train_sbx.py --algo sac --env Pendulum-v1`` and use the RL Zoo CLI.
+
+
+``enjoy_sbx.py``:
+
+.. code-block:: python
+
+ import rl_zoo3
+ import rl_zoo3.enjoy
+ from rl_zoo3.enjoy import enjoy
+ from sbx import DQN, PPO, SAC, TQC, DroQ
+
+
+ rl_zoo3.ALGOS["tqc"] = TQC
+ rl_zoo3.ALGOS["droq"] = DroQ
+ rl_zoo3.ALGOS["sac"] = SAC
+ rl_zoo3.ALGOS["ppo"] = PPO
+ rl_zoo3.ALGOS["dqn"] = DQN
+ rl_zoo3.enjoy.ALGOS = rl_zoo3.ALGOS
+ rl_zoo3.exp_manager.ALGOS = rl_zoo3.ALGOS
+
+ if __name__ == "__main__":
+ enjoy()
diff --git a/docs/guide/train.rst b/docs/guide/train.rst
new file mode 100644
index 000000000..8d8e18170
--- /dev/null
+++ b/docs/guide/train.rst
@@ -0,0 +1,120 @@
+.. _train:
+
+==============
+Train an Agent
+==============
+
+Basic Usage
+-----------
+
+The hyperparameters for each environment are defined in
+``hyperparameters/algo_name.yml``.
+
+
+.. note::
+
+ Once RL Zoo3 is install, you can do ``python -m rl_zoo3.train`` from any folder, it is equivalent to ``python train.py``
+
+
+If the environment exists in this file, then you can train an agent using:
+
+::
+
+ python train.py --algo algo_name --env env_id
+
+
+.. note::
+
+ You can use ``-P`` (``--progress``) option to display a progress bar.
+
+
+Custom Config File
+------------------
+
+Using a custom config file when it is a yaml file with a which contains a ``env_id`` entry:
+
+::
+
+ python train.py --algo algo_name --env env_id --conf-file my_yaml.yml
+
+
+You can also use a python file that contains a dictionary called `hyperparams` with an entry for each ``env_id``.
+(see ``hyperparams/python/ppo_config_example.py`` for an example)
+
+::
+
+ # You can pass a path to a python file
+ python train.py --algo ppo --env MountainCarContinuous-v0 --conf-file hyperparams/python/ppo_config_example.py
+ # Or pass a path to a file from a module (for instance my_package.my_file)
+ python train.py --algo ppo --env MountainCarContinuous-v0 --conf-file hyperparams.python.ppo_config_example
+
+The advantage of this approach is that you can specify arbitrary python dictionaries
+and ensure that all their dependencies are imported in the config file itself.
+
+Tensorboard, Checkpoints, Evaluation
+------------------------------------
+
+For example (with tensorboard support):
+
+::
+
+ python train.py --algo ppo --env CartPole-v1 --tensorboard-log /tmp/stable-baselines/
+
+
+Evaluate the agent every 10000 steps using 10 episodes for evaluation (using only one evaluation env):
+
+::
+
+ python train.py --algo sac --env AntBulletEnv-v0 --eval-freq 10000 --eval-episodes 10 --n-eval-envs 1
+
+
+Save a checkpoint of the agent every 100000 steps:
+
+::
+
+ python train.py --algo td3 --env AntBulletEnv-v0 --save-freq 100000
+
+Resume Training
+---------------
+
+Continue training (here, load pretrained agent for Breakout and continue training for 5000 steps):
+
+::
+
+ python train.py --algo a2c --env BreakoutNoFrameskip-v4 -i rl-trained-agents/a2c/BreakoutNoFrameskip-v4_1/BreakoutNoFrameskip-v4.zip -n 5000
+
+Save Replay Buffer
+------------------
+
+When using off-policy algorithms, you can also **save the replay buffer** after training:
+
+::
+
+ python train.py --algo sac --env Pendulum-v1 --save-replay-buffer
+
+It will be automatically loaded if present when continuing training.
+
+
+Env keyword arguments
+---------------------
+
+You can specify keyword arguments to pass to the env constructor in the
+command line, using ``--env-kwargs``:
+
+::
+
+ python enjoy.py --algo ppo --env MountainCar-v0 --env-kwargs goal_velocity:10
+
+
+Overwrite hyperparameters
+-------------------------
+
+You can easily overwrite hyperparameters in the command line, using
+``--hyperparams``:
+
+::
+
+ python train.py --algo a2c --env MountainCarContinuous-v0 --hyperparams learning_rate:0.001 policy_kwargs:"dict(net_arch=[64, 64])"
+
+Note: if you want to pass a string, you need to escape it like that:
+``my_string:"'value'"``
diff --git a/docs/guide/tuning.rst b/docs/guide/tuning.rst
new file mode 100644
index 000000000..159f15673
--- /dev/null
+++ b/docs/guide/tuning.rst
@@ -0,0 +1,71 @@
+.. _tuning:
+
+=====================
+Hyperparameter Tuning
+=====================
+
+Hyperparameter Tuning
+---------------------
+
+We use `Optuna `__ for optimizing the
+hyperparameters. Not all hyperparameters are tuned, and tuning enforces
+certain default hyperparameter settings that may be different from the
+official defaults. See
+`rl_zoo3/hyperparams_opt.py `__
+for the current settings for each agent.
+
+Hyperparameters not specified in
+`rl_zoo3/hyperparams_opt.py `__
+are taken from the associated YAML file and fallback to the default
+values of SB3 if not present.
+
+Note: when using SuccessiveHalvingPruner (“halving”), you must specify
+``--n-jobs > 1``
+
+Budget of 1000 trials with a maximum of 50000 steps:
+
+::
+
+ python train.py --algo ppo --env MountainCar-v0 -n 50000 -optimize --n-trials 1000 --n-jobs 2 \
+ --sampler tpe --pruner median
+
+Distributed optimization using a shared database is also possible (see
+the corresponding `Optuna
+documentation `__):
+
+::
+
+ python train.py --algo ppo --env MountainCar-v0 -optimize --study-name test --storage sqlite:///example.db
+
+Print and save best hyperparameters of an Optuna study:
+
+::
+
+ python scripts/parse_study.py -i path/to/study.pkl --print-n-best-trials 10 --save-n-best-hyperparameters 10
+
+The default budget for hyperparameter tuning is 500 trials and there is
+one intermediate evaluation for pruning/early stopping per 100k time
+steps.
+
+Hyperparameters search space
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Note that the default hyperparameters used in the zoo when tuning are
+not always the same as the defaults provided in
+`stable-baselines3 `__.
+Consult the latest source code to be sure of these settings. For
+example:
+
+- PPO tuning assumes a network architecture with ``ortho_init = False``
+ when tuning, though it is ``True`` by
+ `default `__.
+ You can change that by updating
+ `rl_zoo3/hyperparams_opt.py `__.
+
+- Non-episodic rollout in TD3 and DDPG assumes
+ ``gradient_steps = train_freq`` and so tunes only ``train_freq`` to
+ reduce the search space.
+
+When working with continuous actions, we recommend to enable
+`gSDE `__ by uncommenting lines in
+`rl_zoo3/hyperparams_opt.py `__.
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 000000000..3ef2d6bc0
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,74 @@
+RL Baselines3 Zoo Docs - A Training Framework for Stable Baselines3
+===================================================================
+
+`RL Baselines3 Zoo `_ s a training framework for Reinforcement Learning (RL), using `Stable Baselines3 (SB3) `_,
+reliable implementations of reinforcement learning algorithms in PyTorch.
+
+Github repository: https://github.com/DLR-RM/rl-baselines3-zoo
+
+It provides scripts for training, evaluating agents, tuning hyperparameters, plotting results and recording videos.
+
+In addition, it includes a collection of tuned hyperparameters for common environments and RL algorithms, and agents trained with those settings.
+
+.. toctree::
+ :maxdepth: 2
+ :caption: User Guide
+
+ guide/install
+ guide/quickstart
+ guide/train
+ guide/plot
+ guide/enjoy
+ guide/custom_env
+ guide/config
+ guide/integrations
+ guide/tuning
+ guide/sbx
+
+
+.. toctree::
+ :maxdepth: 1
+ :caption: RL Zoo API
+
+ modules/exp_manager
+ modules/wrappers
+ modules/callbacks
+ modules/utils
+
+.. toctree::
+ :maxdepth: 1
+ :caption: Misc
+
+ misc/changelog
+
+
+Citing RL Baselines3 Zoo
+------------------------
+To cite this project in publications:
+
+.. code-block:: bibtex
+
+ @misc{rl-zoo3,
+ author = {Raffin, Antonin},
+ title = {RL Baselines3 Zoo},
+ year = {2020},
+ publisher = {GitHub},
+ journal = {GitHub repository},
+ howpublished = {\url{https://github.com/DLR-RM/rl-baselines3-zoo}},
+ }
+
+Contributing
+------------
+
+To any interested in making the rl baselines better, there are still some improvements
+that need to be done.
+You can check issues in the `repo `_.
+
+If you want to contribute, please read `CONTRIBUTING.md `_ first.
+
+Indices and tables
+-------------------
+
+* :ref:`genindex`
+* :ref:`search`
+* :ref:`modindex`
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 000000000..22b5fff4e
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,36 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+set SPHINXPROJ=StableBaselines
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.http://sphinx-doc.org/
+ exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+
+:end
+popd
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
new file mode 100644
index 000000000..cf5d4b9e1
--- /dev/null
+++ b/docs/misc/changelog.rst
@@ -0,0 +1,7 @@
+.. _changelog:
+
+Changelog
+==========
+
+
+See https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/CHANGELOG.md
diff --git a/docs/modules/callbacks.rst b/docs/modules/callbacks.rst
new file mode 100644
index 000000000..bb26e1737
--- /dev/null
+++ b/docs/modules/callbacks.rst
@@ -0,0 +1,8 @@
+.. _callbacks:
+
+
+Callbacks
+=========
+
+.. automodule:: rl_zoo3.callbacks
+ :members:
diff --git a/docs/modules/exp_manager.rst b/docs/modules/exp_manager.rst
new file mode 100644
index 000000000..4eb97a4e9
--- /dev/null
+++ b/docs/modules/exp_manager.rst
@@ -0,0 +1,15 @@
+.. _manager:
+
+.. automodule:: rl_zoo3.exp_manager
+
+
+Experiment Manager
+==================
+
+
+Parameters
+----------
+
+.. autoclass:: ExperimentManager
+ :members:
+ :inherited-members:
diff --git a/docs/modules/utils.rst b/docs/modules/utils.rst
new file mode 100644
index 000000000..097dedbd4
--- /dev/null
+++ b/docs/modules/utils.rst
@@ -0,0 +1,8 @@
+.. _utils:
+
+
+Utils
+=====
+
+.. automodule:: rl_zoo3.utils
+ :members:
diff --git a/docs/modules/wrappers.rst b/docs/modules/wrappers.rst
new file mode 100644
index 000000000..97bd7b89a
--- /dev/null
+++ b/docs/modules/wrappers.rst
@@ -0,0 +1,8 @@
+.. _wrappers:
+
+
+Wrappers
+========
+
+.. automodule:: rl_zoo3.wrappers
+ :members:
diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt
new file mode 100644
index 000000000..d306d7e78
--- /dev/null
+++ b/docs/spelling_wordlist.txt
@@ -0,0 +1,129 @@
+py
+env
+atari
+argparse
+Argparse
+TensorFlow
+feedforward
+envs
+VecEnv
+pretrain
+petrained
+tf
+th
+nn
+np
+str
+mujoco
+cpu
+ndarray
+ndarrays
+timestep
+timesteps
+stepsize
+dataset
+adam
+fn
+normalisation
+Kullback
+Leibler
+boolean
+deserialized
+pretrained
+minibatch
+subprocesses
+ArgumentParser
+Tensorflow
+Gaussian
+approximator
+minibatches
+hyperparameters
+hyperparameter
+vectorized
+rl
+colab
+dataloader
+npz
+datasets
+vf
+logits
+num
+Utils
+backpropagate
+prepend
+NaN
+preprocessing
+Cloudpickle
+async
+multiprocess
+tensorflow
+mlp
+cnn
+neglogp
+tanh
+coef
+repo
+Huber
+params
+ppo
+arxiv
+Arxiv
+func
+DQN
+Uhlenbeck
+Ornstein
+multithread
+cancelled
+Tensorboard
+parallelize
+customising
+serializable
+Multiprocessed
+cartpole
+toolset
+lstm
+rescale
+ffmpeg
+avconv
+unnormalized
+Github
+pre
+preprocess
+backend
+attr
+preprocess
+Antonin
+Raffin
+araffin
+Homebrew
+Numpy
+Theano
+rollout
+kfac
+Piecewise
+csv
+nvidia
+visdom
+tensorboard
+preprocessed
+namespace
+sklearn
+GoalEnv
+Torchy
+pytorch
+dicts
+optimizers
+Deprecations
+forkserver
+cuda
+Polyak
+gSDE
+rollouts
+Pyro
+softmax
+stdout
+Contrib
+Quantile
+Huggingface
+Jax
+Optuna
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 000000000..73f41c8b8
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,54 @@
+[tool.ruff]
+# Same as Black.
+line-length = 127
+# Assume Python 3.7
+target-version = "py37"
+# TODO(antonin): activate "RUF" https://beta.ruff.rs/docs/rules/#ruff-specific-rules-ruf
+select = ["E", "F", "B", "UP", "C90"]
+ignore = []
+
+[tool.ruff.per-file-ignores]
+# "./rl_zoo3/plots/all_plots.py"= ["E501"]
+# "./rl_zoo3/plots/plot_train.py"= ["E501"]
+
+
+[tool.ruff.mccabe]
+# Unlike Flake8, default to a complexity level of 10.
+max-complexity = 15
+
+[tool.black]
+line-length = 127
+
+[tool.isort]
+profile = "black"
+line_length = 127
+src_paths = ["stable_baselines3", "rl_zoo3"]
+
+[tool.pytype]
+inputs = ["."]
+# disable = []
+
+[tool.mypy]
+ignore_missing_imports = true
+follow_imports = "silent"
+show_error_codes = true
+exclude = """(?x)(
+ rl_zoo3/hyperparams_opt.py$
+ | rl_zoo3/exp_manager.py$
+ )"""
+
+[tool.pytest.ini_options]
+# Deterministic ordering for tests; useful for pytest-xdist.
+env = [
+ "PYTHONHASHSEED=0"
+]
+
+filterwarnings = [
+ # Tensorboard warnings
+ "ignore::DeprecationWarning:tensorboard",
+ # Gym warnings
+ "ignore::UserWarning:gym",
+]
+markers = [
+ "slow: marks tests as slow (deselect with '-m \"not slow\"')"
+]
diff --git a/requirements.txt b/requirements.txt
index 5bb2a0460..d33a536db 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
gym==0.21
-stable-baselines3[extra,tests,docs]>=1.8.0a2
-sb3-contrib>=1.8.0a2
+stable-baselines3[extra_no_roms,tests,docs]>=1.8.0a9
+sb3-contrib>=1.8.0a9
box2d-py==2.3.8
pybullet
gym-minigrid
@@ -13,8 +13,8 @@ plotly
panda-gym==1.1.1 # tmp fix: until compatibility with panda-gym v2
rliable>=1.0.5
wandb
-huggingface_sb3>=2.2.1, <3.*
+huggingface_sb3>=2.2.1
seaborn
tqdm
rich
-importlib-metadata~=4.13 # flake8 not compatible with importlib-metadata>5.0
+ruff
diff --git a/rl-trained-agents b/rl-trained-agents
index 1e2a45e5d..1935484b7 160000
--- a/rl-trained-agents
+++ b/rl-trained-agents
@@ -1 +1 @@
-Subproject commit 1e2a45e5d06efd6cc15da6cf2d1939d72dcbdf87
+Subproject commit 1935484b7458967f77d4da1a50b14078d2520071
diff --git a/rl_zoo3/callbacks.py b/rl_zoo3/callbacks.py
index 556e9553a..53543df2b 100644
--- a/rl_zoo3/callbacks.py
+++ b/rl_zoo3/callbacks.py
@@ -102,12 +102,12 @@ class ParallelTrainCallback(BaseCallback):
TODO:
- blocking mode: wait for the model to finish updating the policy before collecting new experience
- at the end of a rollout
+ at the end of a rollout
- force sync mode: stop training to update to the latest policy for collecting
- new experience
+ new experience
:param gradient_steps: Number of gradient steps to do before
- sending the new policy
+ sending the new policy
:param verbose: Verbosity level
:param sleep_time: Limit the fps in the thread collecting experience.
"""
diff --git a/rl_zoo3/exp_manager.py b/rl_zoo3/exp_manager.py
index 4503121b0..713bb100a 100644
--- a/rl_zoo3/exp_manager.py
+++ b/rl_zoo3/exp_manager.py
@@ -782,7 +782,7 @@ def objective(self, trial: optuna.Trial) -> float:
print("============")
print("Sampled hyperparams:")
pprint(sampled_hyperparams)
- raise optuna.exceptions.TrialPruned()
+ raise optuna.exceptions.TrialPruned() from e
is_pruned = eval_callback.is_pruned
reward = eval_callback.last_mean_reward
diff --git a/rl_zoo3/train.py b/rl_zoo3/train.py
index 1e52a5fc0..f8a6926da 100644
--- a/rl_zoo3/train.py
+++ b/rl_zoo3/train.py
@@ -204,10 +204,10 @@ def train() -> None:
if args.track:
try:
import wandb
- except ImportError:
+ except ImportError as e:
raise ImportError(
"if you want to use Weights & Biases to track experiment, please install W&B via `pip install wandb`"
- )
+ ) from e
run_name = f"{args.env}__{args.algo}__{args.seed}__{int(time.time())}"
tags = args.wandb_tags + [f"v{sb3.__version__}"]
diff --git a/rl_zoo3/utils.py b/rl_zoo3/utils.py
index f23265883..91e9071b9 100644
--- a/rl_zoo3/utils.py
+++ b/rl_zoo3/utils.py
@@ -314,7 +314,7 @@ def get_trained_models(log_folder: str) -> Dict[str, Tuple[str, str]]:
args_files = glob.glob(os.path.join(log_folder, algo, model_folder, "*/args.yml"))
if len(args_files) != 1:
continue # we expect only one sub-folder with an args.yml file
- with open(args_files[0], "r") as fh:
+ with open(args_files[0]) as fh:
env_id = yaml.load(fh, Loader=yaml.UnsafeLoader)["env"]
model_name = ModelName(algo, EnvironmentName(env_id))
diff --git a/rl_zoo3/version.txt b/rl_zoo3/version.txt
index c3d22c01c..13ef2a834 100644
--- a/rl_zoo3/version.txt
+++ b/rl_zoo3/version.txt
@@ -1 +1 @@
-1.8.0a2
+1.8.0a9
diff --git a/rl_zoo3/wrappers.py b/rl_zoo3/wrappers.py
index e94e51a70..0634a7967 100644
--- a/rl_zoo3/wrappers.py
+++ b/rl_zoo3/wrappers.py
@@ -301,12 +301,12 @@ def __init__(self, env: gym.Env):
env_id: str = env.unwrapped.spec.id
# By default no masking
- self.mask = np.ones_like((env.observation_space.sample()))
+ self.mask = np.ones_like(env.observation_space.sample())
try:
# Mask velocity
self.mask[self.velocity_indices[env_id]] = 0.0
- except KeyError:
- raise NotImplementedError(f"Velocity masking not implemented for {env_id}")
+ except KeyError as e:
+ raise NotImplementedError(f"Velocity masking not implemented for {env_id}") from e
def observation(self, observation: np.ndarray) -> np.ndarray:
return observation * self.mask
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 14b7d4b95..000000000
--- a/setup.cfg
+++ /dev/null
@@ -1,47 +0,0 @@
-[metadata]
-# This includes the license file in the wheel.
-license_files = LICENSE
-
-[tool:pytest]
-filterwarnings =
- # Tensorboard warnings
- ignore::DeprecationWarning:tensorboard
- # Gym warnings
- ignore::UserWarning:gym
-markers =
- slow: marks tests as slow (deselect with '-m "not slow"')
- serial
-
-[pytype]
-inputs = .
-
-[flake8]
-# line breaks before and after binary operators
-ignore = W503,W504,E203,E231
-# Ignore import not used when aliases are defined
-per-file-ignores =
- ./rl_zoo3/plots/all_plots.py:E501
- ./rl_zoo3/plots/plot_train.py:E501
-
-exclude =
- # No need to traverse our git directory
- .git,
- # There's no value in checking cache directories
- __pycache__,
-max-complexity = 15
-# The GitHub editor is 127 chars wide
-max-line-length = 127
-
-[isort]
-profile = black
-line_length = 127
-src_paths = stable_baselines3,rl_zoo3
-
-[mypy]
-ignore_missing_imports = True
-follow_imports = silent
-show_error_codes = True
-exclude = (?x)(
- rl_zoo3/hyperparams_opt.py$
- | rl_zoo3/exp_manager.py$
- )
diff --git a/setup.py b/setup.py
index 73ddc7a48..458a6ff2f 100644
--- a/setup.py
+++ b/setup.py
@@ -27,8 +27,8 @@
},
entry_points={"console_scripts": ["rl_zoo3=rl_zoo3.cli:main"]},
install_requires=[
- "sb3-contrib>=1.8.0a2",
- "huggingface_sb3>=2.2.1, <3.*",
+ "sb3-contrib>=1.8.0a9",
+ "huggingface_sb3>=2.2.1",
"tqdm",
"rich",
"optuna",