diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index 10e487a46..256a7598f 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -61,7 +61,7 @@ body:
           required: true
         - label: I have read the [SB3 documentation](https://stable-baselines3.readthedocs.io/en/master/)
           required: true
-        - label: I have read the [RL Zoo README](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/README.md)
+        - label: I have read the [RL Zoo documentation](https://rl-baselines3-zoo.readthedocs.io)
           required: true
         - label: I have provided a minimal working example to reproduce the bug
           required: true
diff --git a/.github/ISSUE_TEMPLATE/question.yml b/.github/ISSUE_TEMPLATE/question.yml
index a519436a7..4da584fec 100644
--- a/.github/ISSUE_TEMPLATE/question.yml
+++ b/.github/ISSUE_TEMPLATE/question.yml
@@ -24,7 +24,7 @@ body:
           required: true
         - label: I have read the [SB3 documentation](https://stable-baselines3.readthedocs.io/en/master/)
           required: true
-        - label: I have read the [RL Zoo README](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/README.md)
+        - label: I have read the [RL Zoo documentation](https://rl-baselines3-zoo.readthedocs.io)
           required: true
         - label: If code there is, it is minimal and working
           required: true
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 622bba139..0afdeba88 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -31,6 +31,13 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
+
+        # Install Atari Roms
+        pip install autorom
+        wget https://gist.githubusercontent.com/jjshoots/61b22aefce4456920ba99f2c36906eda/raw/00046ac3403768bfe45857610a3d333b8e35e026/Roms.tar.gz.b64
+        base64 Roms.tar.gz.b64 --decode &> Roms.tar.gz
+        AutoROM --accept-license --source-file Roms.tar.gz
+
         pip install setuptools==65.5.0
         # cpu version of pytorch - faster to download
         pip install torch==1.11+cpu -f https://download.pytorch.org/whl/torch_stable.html
@@ -41,17 +48,20 @@ jobs:
         # install parking-env to test HER (pinned so it works with gym 0.21)
         pip install highway-env==1.5.0
         pip install -e .
-    - name: Type check
+    - name: Lint with ruff
       run: |
-        make type
-      # skip mypy type check for python3.7 (last forever for some reason)
-      if: "!(matrix.python-version == '3.7')" 
+        make lint
     - name: Check codestyle
       run: |
         make check-codestyle
-    - name: Lint with flake8
+    - name: Build doc
       run: |
-        make lint
+        make doc
+    - name: Type check
+      run: |
+        make type
+      # skip mypy type check for python3.7 (last forever for some reason)
+      if: "!(matrix.python-version == '3.7')"
     - name: Test with pytest
       run: |
         make pytest
diff --git a/.github/workflows/trained_agents.yml b/.github/workflows/trained_agents.yml
index 3e2d6d27b..cf45a9fb7 100644
--- a/.github/workflows/trained_agents.yml
+++ b/.github/workflows/trained_agents.yml
@@ -14,6 +14,7 @@ jobs:
     env:
       TERM: xterm-256color
       FORCE_COLOR: 1
+
     # Skip CI if [ci skip] in the commit message
     if: "! contains(toJSON(github.event.commits.*.message), '[ci skip]')"
     runs-on: ubuntu-latest
@@ -31,6 +32,14 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
+
+        # Install Atari Roms
+        pip install autorom
+        wget https://gist.githubusercontent.com/jjshoots/61b22aefce4456920ba99f2c36906eda/raw/00046ac3403768bfe45857610a3d333b8e35e026/Roms.tar.gz.b64
+        base64 Roms.tar.gz.b64 --decode &> Roms.tar.gz
+        AutoROM --accept-license --source-file Roms.tar.gz
+
+
         pip install setuptools==65.5.0
         # cpu version of pytorch - faster to download
         pip install torch==1.11+cpu -f https://download.pytorch.org/whl/torch_stable.html
diff --git a/.gitignore b/.gitignore
index 87c26d1a2..b428b9386 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,8 @@ runs
 hub
 *.mp4
 *.json
+_build/
+
 
 # Setuptools distribution and build folders.
 /dist/
diff --git a/.readthedocs.yml b/.readthedocs.yml
new file mode 100644
index 000000000..6753d8744
--- /dev/null
+++ b/.readthedocs.yml
@@ -0,0 +1,16 @@
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Build documentation in the docs/ directory with Sphinx
+sphinx:
+  configuration: docs/conf.py
+
+# Optionally build your docs in additional formats such as PDF and ePub
+formats: all
+
+# Set requirements using conda env
+conda:
+  environment: docs/conda_env.yml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3596cdce6..ded42f17d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,11 @@
-## Release 1.8.0a2 (WIP)
+## Release 1.8.0a9 (WIP)
 
 ### Breaking Changes
 - Upgraded to SB3 >= 1.8.0
 
 ### New Features
 - Tuned hyperparameters for RecurrentPPO on Swimmer
+- Documentation is now built using Sphinx and hosted on read the doc
 
 ### Bug fixes
 - Set ``highway-env`` version to 1.5 and ``setuptools to`` v65.5 for the CI
@@ -18,6 +19,7 @@
 - Added support for `ruff` (fast alternative to flake8) in the Makefile
 - Removed Gitlab CI file
 - Replaced deprecated `optuna.suggest_loguniform(...)` by `optuna.suggest_float(..., log=True)`
+- Switched to `ruff` and `pyproject.toml`
 
 ## Release 1.7.0 (2023-01-10)
 
diff --git a/Makefile b/Makefile
index cabb0ff0b..e87e65efb 100644
--- a/Makefile
+++ b/Makefile
@@ -18,32 +18,34 @@ type: pytype mypy
 
 lint:
 	# stop the build if there are Python syntax errors or undefined names
-	# see https://lintlyci.github.io/Flake8Rules/
-	flake8 ${LINT_PATHS} --count --select=E9,F63,F7,F82 --show-source --statistics
-	# exit-zero treats all errors as warnings.
-	flake8 ${LINT_PATHS} --count --exit-zero --statistics
-
-ruff:
-	# stop the build if there are Python syntax errors or undefined names
-	# see https://lintlyci.github.io/Flake8Rules/
+	# see https://www.flake8rules.com/
 	ruff ${LINT_PATHS} --select=E9,F63,F7,F82 --show-source
 	# exit-zero treats all errors as warnings.
-	ruff ${LINT_PATHS} --exit-zero --line-length 127
+	ruff ${LINT_PATHS} --exit-zero
 
 format:
 	# Sort imports
 	isort ${LINT_PATHS}
 	# Reformat using black
-	black -l 127 ${LINT_PATHS}
+	black ${LINT_PATHS}
 
 check-codestyle:
 	# Sort imports
 	isort --check ${LINT_PATHS}
 	# Reformat using black
-	black --check -l 127 ${LINT_PATHS}
+	black --check ${LINT_PATHS}
 
 commit-checks: format type lint
 
+doc:
+	cd docs && make html
+
+spelling:
+	cd docs && make spelling
+
+clean:
+	cd docs && make clean
+
 docker: docker-cpu docker-gpu
 
 docker-cpu:
@@ -66,4 +68,4 @@ test-release:
 	python setup.py bdist_wheel
 	twine upload --repository-url https://test.pypi.org/legacy/ dist/*
 
-.PHONY: lint format check-codestyle commit-checks doc spelling  docker type pytest
+.PHONY: lint format check-codestyle commit-checks doc spelling docker type pytest
diff --git a/README.md b/README.md
index cf672ba9a..600c2e54d 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
 <!-- [![pipeline status](https://gitlab.com/araffin/rl-baselines3-zoo/badges/master/pipeline.svg)](https://gitlab.com/araffin/rl-baselines3-zoo/-/commits/master) -->
 ![CI](https://github.com/DLR-RM/rl-baselines3-zoo/workflows/CI/badge.svg)
+[![Documentation Status](https://readthedocs.org/projects/rl-baselines3-zoo/badge/?version=master)](https://rl-baselines3-zoo.readthedocs.io/en/master/?badge=master)
 [![coverage report](https://gitlab.com/araffin/rl-baselines3-zoo/badges/master/coverage.svg)](https://gitlab.com/araffin/rl-baselines3-zoo/-/commits/master) [![codestyle](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 
 
@@ -26,6 +27,10 @@ Goals of this repository:
 
 This is the SB3 version of the original SB2 [rl-zoo](https://github.com/araffin/rl-baselines-zoo).
 
+## Documentation
+
+Documentation is available online: [https://rl-baselines3-zoo.readthedocs.io/](https://rl-baselines3-zoo.readthedocs.io)
+
 ## Installation
 
 ### Minimal installation
@@ -59,99 +64,22 @@ If the environment exists in this file, then you can train an agent using:
 ```
 python train.py --algo algo_name --env env_id
 ```
-You can use `-P` (`--progress`) option to display a progress bar.
-
-Using a custom config file when it is a yaml file with a which contains a `env_id` entry:
-```
-python train.py --algo algo_name --env env_id --conf-file my_yaml.yml
-```
-
-You can also use a python file that contains a dictionary called `hyperparams` with an entry for each `env_id`.
-(see `hyperparams/python/ppo_config_example.py` for an example)
-```bash
-# You can pass a path to a python file
-python train.py --algo ppo --env MountainCarContinuous-v0 --conf-file hyperparams/python/ppo_config_example.py
-# Or pass a path to a file from a module (for instance my_package.my_file)
-python train.py --algo ppo --env MountainCarContinuous-v0 --conf-file hyperparams.python.ppo_config_example
-```
-The advantage of this approach is that you can specify arbitrary python dictionaries
-and ensure that all their dependencies are imported in the config file itself.
-
-
-For example (with tensorboard support):
-```
-python train.py --algo ppo --env CartPole-v1 --tensorboard-log /tmp/stable-baselines/
-```
 
 Evaluate the agent every 10000 steps using 10 episodes for evaluation (using only one evaluation env):
 ```
 python train.py --algo sac --env HalfCheetahBulletEnv-v0 --eval-freq 10000 --eval-episodes 10 --n-eval-envs 1
 ```
 
-Save a checkpoint of the agent every 100000 steps:
-```
-python train.py --algo td3 --env HalfCheetahBulletEnv-v0 --save-freq 100000
-```
+More examples are available in the [documentation](https://rl-baselines3-zoo.readthedocs.io).
 
-Continue training (here, load pretrained agent for Breakout and continue training for 5000 steps):
-```
-python train.py --algo a2c --env BreakoutNoFrameskip-v4 -i rl-trained-agents/a2c/BreakoutNoFrameskip-v4_1/BreakoutNoFrameskip-v4.zip -n 5000
-```
 
-When using off-policy algorithms, you can also save the replay buffer after training:
-```
-python train.py --algo sac --env Pendulum-v1 --save-replay-buffer
-```
-It will be automatically loaded if present when continuing training.
+## Integrations
 
-## Plot Scripts
+The RL Zoo has some integration with other libraries/services like Weights & Biases for experiment tracking or Hugging Face for storing/sharing trained models. You can find out more in the [dedicated section](https://rl-baselines3-zoo.readthedocs.io/en/master/guide/integrations.html) of the documentation.
 
-Plot scripts (to be documented, see "Results" sections in SB3 documentation):
-- `scripts/all_plots.py`/`scripts/plot_from_file.py` for plotting evaluations
-- `scripts/plot_train.py` for plotting training reward/success
-
-*Examples (on the current collection)*
-
-Plot training success (y-axis) w.r.t. timesteps (x-axis) with a moving window of 500 episodes for all the `Fetch` environment with `HER` algorithm:
-
-```
-python scripts/plot_train.py -a her -e Fetch -y success -f rl-trained-agents/ -w 500 -x steps
-```
-
-Plot evaluation reward curve for TQC, SAC and TD3 on the HalfCheetah and Ant PyBullet environments:
-
-```
-python3 scripts/all_plots.py -a sac td3 tqc --env HalfCheetahBullet AntBullet -f rl-trained-agents/
-```
-
-## Plot with the rliable library
-
-The RL zoo integrates some of [rliable](https://agarwl.github.io/rliable/) library features.
-You can find a visual explanation of the tools used by rliable in this [blog post](https://araffin.github.io/post/rliable/).
-
-First, you need to install [rliable](https://github.com/google-research/rliable).
-
-Note: Python 3.7+ is required in that case.
-
-Then export your results to a file using the `all_plots.py` script (see above):
-```
-python scripts/all_plots.py -a sac td3 tqc --env Half Ant -f logs/ -o logs/offpolicy
-```
-
-You can now use the `plot_from_file.py` script with `--rliable`, `--versus` and `--iqm` arguments:
-```
-python scripts/plot_from_file.py -i logs/offpolicy.pkl --skip-timesteps --rliable --versus -l SAC TD3 TQC
-```
-
-Note: you may need to edit `plot_from_file.py`, in particular the `env_key_to_env_id` dictionary
-and the `scripts/score_normalization.py` which stores min and max score for each environment.
-
-Remark: plotting with the `--rliable` option is usually slow as confidence interval need to be computed using bootstrap sampling.
-
-
-## Custom Environment
+## Plot Scripts
 
-The easiest way to add support for a custom environment is to edit `rl_zoo3/import_envs.py` and register your environment here. Then, you need to add a section for it in the hyperparameters file (`hyperparams/algo.yml` or a custom yaml file that you can specify using `--conf-file` argument).
+Please the see [dedicated section](https://rl-baselines3-zoo.readthedocs.io/en/master/guide/plot.html) of the documentation.
 
 ## Enjoy a Trained Agent
 
@@ -168,237 +96,13 @@ For example, enjoy A2C on Breakout during 5000 timesteps:
 python enjoy.py --algo a2c --env BreakoutNoFrameskip-v4 --folder rl-trained-agents/ -n 5000
 ```
 
-If you have trained an agent yourself, you need to do:
-```
-# exp-id 0 corresponds to the last experiment, otherwise, you can specify another ID
-python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 0
-```
-
-To load the best model (when using evaluation environment):
-```
-python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-best
-```
-
-To load a checkpoint (here the checkpoint name is `rl_model_10000_steps.zip`):
-```
-python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-checkpoint 10000
-```
-
-To load the latest checkpoint:
-```
-python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-last-checkpoint
-```
-
-## Huggingface Hub Integration
-
-Upload model to hub (same syntax as for `enjoy.py`):
-```
-python -m rl_zoo3.push_to_hub --algo ppo --env CartPole-v1 -f logs/ -orga sb3 -m "Initial commit"
-```
-you can choose custom `repo-name` (default: `{algo}-{env_id}`) by passing a `--repo-name` argument.
-
-Download model from hub:
-```
-python -m rl_zoo3.load_from_hub --algo ppo --env CartPole-v1 -f logs/ -orga sb3
-```
-
-## Hyperparameter yaml syntax
-
-The syntax used in `hyperparameters/algo_name.yml` for setting hyperparameters (likewise the syntax to [overwrite hyperparameters](https://github.com/DLR-RM/rl-baselines3-zoo#overwrite-hyperparameters) on the cli) may be specialized if the argument is a function.  See examples in the `hyperparameters/` directory. For example:
-
-- Specify a linear schedule for the learning rate:
-
-```yaml
-  learning_rate: lin_0.012486195510232303
-```
-
-Specify a different activation function for the network:
-
-```yaml
-  policy_kwargs: "dict(activation_fn=nn.ReLU)"
-```
-
-For a custom policy:
-
-```yaml
-  policy: my_package.MyCustomPolicy  # for instance stable_baselines3.ppo.MlpPolicy
-```
-
-
-## Hyperparameter Tuning
-
-We use [Optuna](https://optuna.org/) for optimizing the hyperparameters.
-Not all hyperparameters are tuned, and tuning enforces certain default hyperparameter settings that may be different from the official defaults. See [rl_zoo3/hyperparams_opt.py](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py) for the current settings for each agent.
-
-Hyperparameters not specified in [rl_zoo3/hyperparams_opt.py](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py) are taken from the associated YAML file and fallback to the default values of SB3 if not present.
-
-Note: when using SuccessiveHalvingPruner ("halving"), you must specify `--n-jobs > 1`
-
-Budget of 1000 trials with a maximum of 50000 steps:
-
-```
-python train.py --algo ppo --env MountainCar-v0 -n 50000 -optimize --n-trials 1000 --n-jobs 2 \
-  --sampler tpe --pruner median
-```
-
-Distributed optimization using a shared database is also possible (see the corresponding [Optuna documentation](https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/004_distributed.html)):
-```
-python train.py --algo ppo --env MountainCar-v0 -optimize --study-name test --storage sqlite:///example.db
-```
-
-Print and save best hyperparameters of an Optuna study:
-```
-python scripts/parse_study.py -i path/to/study.pkl --print-n-best-trials 10 --save-n-best-hyperparameters 10
-```
-
-The default budget for hyperparameter tuning is 500 trials and there is one intermediate evaluation for pruning/early stopping per 100k time steps.
-
-### Hyperparameters search space
-
-Note that the default hyperparameters used in the zoo when tuning are not always the same as the defaults provided in [stable-baselines3](https://stable-baselines3.readthedocs.io/en/master/modules/base.html). Consult the latest source code to be sure of these settings. For example:
-
-- PPO tuning assumes a network architecture with `ortho_init = False` when tuning, though it is `True` by [default](https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html#ppo-policies). You can change that by updating [rl_zoo3/hyperparams_opt.py](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py).
-
-- Non-episodic rollout in TD3 and DDPG assumes `gradient_steps = train_freq` and so tunes only `train_freq` to reduce the search space.  
-
-When working with continuous actions, we recommend to enable [gSDE](https://arxiv.org/abs/2005.05719) by uncommenting lines in [rl_zoo3/hyperparams_opt.py](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py).
-
-
-## Experiment tracking
-
-We support tracking experiment data such as learning curves and hyperparameters via [Weights and Biases](https://wandb.ai).
-
-The following command
-```
-python train.py --algo ppo --env CartPole-v1 --track --wandb-project-name sb3
-```
-
-yields a tracked experiment at this [URL](https://wandb.ai/openrlbenchmark/sb3/runs/1b65ldmh).
-
-To add a tag to the run, (e.g. `optimized`), use the argument `--wandb-tags optimized`.
-
-## Env normalization
+## Hyperparameters Tuning
 
-In the hyperparameter file, `normalize: True` means that the training environment will be wrapped in a [VecNormalize](https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/vec_env/vec_normalize.py#L13) wrapper.
-
-[Normalization uses](https://github.com/DLR-RM/rl-baselines3-zoo/issues/64) the default parameters of `VecNormalize`, with the exception of `gamma` which is set to match that of the agent.  This can be [overridden](https://github.com/DLR-RM/rl-baselines3-zoo/blob/v0.10.0/hyperparams/sac.yml#L239) using the appropriate `hyperparameters/algo_name.yml`, e.g.
-
-```yaml
-  normalize: "{'norm_obs': True, 'norm_reward': False}"
-```
-
-
-## Env Wrappers
-
-You can specify in the hyperparameter config one or more wrapper to use around the environment:
-
-for one wrapper:
-```yaml
-env_wrapper: gym_minigrid.wrappers.FlatObsWrapper
-```
-
-for multiple, specify a list:
-
-```yaml
-env_wrapper:
-    - rl_zoo3.wrappers.DoneOnSuccessWrapper:
-        reward_offset: 1.0
-    - sb3_contrib.common.wrappers.TimeFeatureWrapper
-```
-
-Note that you can easily specify parameters too.
-
-By default, the environment is wrapped with a `Monitor` wrapper to record episode statistics.
-You can specify arguments to it using `monitor_kwargs` parameter to log additional data.
-That data *must* be present in the info dictionary at the last step of each episode.
-
-For instance, for recording success with goal envs (e.g. `FetchReach-v1`):
-
-```yaml
-monitor_kwargs: dict(info_keywords=('is_success',))
-```
-
-or recording final x position with `Ant-v3`:
-```yaml
-monitor_kwargs: dict(info_keywords=('x_position',))
-```
-
-Note: for known `GoalEnv` like `FetchReach`, `info_keywords=('is_success',)` is actually the default.
-
-## VecEnvWrapper
-
-You can specify which `VecEnvWrapper` to use in the config, the same way as for env wrappers (see above), using the `vec_env_wrapper` key:
-
-For instance:
-```yaml
-vec_env_wrapper: stable_baselines3.common.vec_env.VecMonitor
-```
-
-Note: `VecNormalize` is supported separately using `normalize` keyword, and `VecFrameStack` has a dedicated keyword `frame_stack`.
-
-## Callbacks
-
-Following the same syntax as env wrappers, you can also add custom callbacks to use during training.
-
-```yaml
-callback:
-  - rl_zoo3.callbacks.ParallelTrainCallback:
-      gradient_steps: 256
-```
-
-## Env keyword arguments
-
-You can specify keyword arguments to pass to the env constructor in the command line, using `--env-kwargs`:
-
-```
-python enjoy.py --algo ppo --env MountainCar-v0 --env-kwargs goal_velocity:10
-```
+Please the see [dedicated section](https://rl-baselines3-zoo.readthedocs.io/en/master/guide/tuning.html) of the documentation.
 
-## Overwrite hyperparameters
+## Custom Configuration
 
-You can easily overwrite hyperparameters in the command line, using ``--hyperparams``:
-
-```
-python train.py --algo a2c --env MountainCarContinuous-v0 --hyperparams learning_rate:0.001 policy_kwargs:"dict(net_arch=[64, 64])"
-```
-
-Note: if you want to pass a string, you need to escape it like that: `my_string:"'value'"`
-
-## Record a Video of a Trained Agent
-
-Record 1000 steps with the latest saved model:
-
-```
-python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000
-```
-
-Use the best saved model instead:
-
-```
-python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 --load-best
-```
-
-Record a video of a checkpoint saved during training (here the checkpoint name is `rl_model_10000_steps.zip`):
-
-```
-python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 --load-checkpoint 10000
-```
-
-## Record a Video of a Training Experiment
-
-Apart from recording videos of specific saved models, it is also possible to record a video of a training experiment where checkpoints have been saved.
-
-Record 1000 steps for each checkpoint, latest and best saved models:
-
-```
-python -m rl_zoo3.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic
-```
-
-The previous command will create a `mp4` file. To convert this file to `gif` format as well:
-
-```
-python -m rl_zoo3.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic --gif
-```
+Please the see [dedicated section](https://rl-baselines3-zoo.readthedocs.io/en/master/guide/config.html) of the documentation.
 
 ## Current Collection: 195+ Trained Agents!
 
@@ -577,34 +281,6 @@ train()
 ```
 
 
-### Docker Images
-
-Build docker image (CPU):
-```
-make docker-cpu
-```
-
-GPU:
-```
-USE_GPU=True make docker-gpu
-```
-
-Pull built docker image (CPU):
-```
-docker pull stablebaselines/rl-baselines3-zoo-cpu
-```
-
-GPU image:
-```
-docker pull stablebaselines/rl-baselines3-zoo
-```
-
-Run script in the docker image:
-
-```
-./scripts/run_docker_cpu.sh python train.py --algo ppo --env CartPole-v1
-```
-
 ## Tests
 
 To run tests, first install pytest, then:
@@ -639,4 +315,4 @@ If you trained an agent that is not present in the RL Zoo, please submit a Pull
 
 ## Contributors
 
-We would like to thank our contributors: [@iandanforth](https://github.com/iandanforth), [@tatsubori](https://github.com/tatsubori) [@Shade5](https://github.com/Shade5) [@mcres](https://github.com/mcres), [@ernestum](https://github.com/ernestum)
+We would like to thank our contributors: [@iandanforth](https://github.com/iandanforth), [@tatsubori](https://github.com/tatsubori) [@Shade5](https://github.com/Shade5) [@mcres](https://github.com/mcres), [@ernestum](https://github.com/ernestum), [@qgallouedec](https://github.com/qgallouedec)
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 000000000..938bf87e1
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,21 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+# For debug: SPHINXOPTS = -nWT --keep-going -vvv
+SPHINXOPTS    = # -W  # make warnings fatal (disabled because of gym in the wrappers)
+SPHINXBUILD   = sphinx-build
+SPHINXPROJ    = RLZoo
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 000000000..c007b52b2
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,26 @@
+## RL Zoo3 Documentation
+
+This folder contains documentation for the RL Zoo.
+
+
+### Build the Documentation
+
+#### Install Sphinx and Theme
+Execute this command in the project root:
+```
+pip install stable_baselines3[docs]
+pip install -e .
+```
+
+#### Building the Docs
+
+In the `docs/` folder:
+```
+make html
+```
+
+if you want to building each time a file is changed:
+
+```
+sphinx-autobuild . _build/html
+```
diff --git a/docs/_static/css/baselines_theme.css b/docs/_static/css/baselines_theme.css
new file mode 100644
index 000000000..450864efe
--- /dev/null
+++ b/docs/_static/css/baselines_theme.css
@@ -0,0 +1,61 @@
+/* Main colors  adapted from pytorch doc */
+:root{
+  --main-bg-color: #343A40;
+  --link-color: #FD7E14;
+}
+
+/* Header fonts y */
+h1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend, p.caption {
+    font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;
+}
+
+
+/* Docs background */
+.wy-side-nav-search{
+  background-color: var(--main-bg-color);
+}
+
+/* Mobile version */
+.wy-nav-top{
+  background-color: var(--main-bg-color);
+}
+
+/* Change link colors (except for the menu) */
+a {
+    color: var(--link-color);
+}
+
+a:hover {
+    color: #4F778F;
+}
+
+.wy-menu a {
+    color: #b3b3b3;
+}
+
+.wy-menu a:hover {
+    color: #b3b3b3;
+}
+
+a.icon.icon-home {
+    color: #b3b3b3;
+}
+
+.version{
+    color: var(--link-color) !important;
+}
+
+
+/* Make code blocks have a background */
+.codeblock,pre.literal-block,.rst-content .literal-block,.rst-content pre.literal-block,div[class^='highlight'] {
+        background: #f8f8f8;;
+}
+
+/* Change style of types in the docstrings .rst-content .field-list */
+.field-list .xref.py.docutils, .field-list code.docutils, .field-list .docutils.literal.notranslate
+{
+  border: None;
+  padding-left: 0;
+  padding-right: 0;
+  color: #404040;
+}
diff --git a/docs/_static/img/colab-badge.svg b/docs/_static/img/colab-badge.svg
new file mode 100644
index 000000000..c08066ee3
--- /dev/null
+++ b/docs/_static/img/colab-badge.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="117" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="117" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h30v20H0z"/><path fill="#007ec6" d="M30 0h87v20H30z"/><path fill="url(#b)" d="M0 0h117v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><svg x="4px" y="0px" width="22px" height="20px" viewBox="-2 0 28 24" style="background-color: #fff;border-radius: 1px;"><path style="fill:#ef9008;" d="M1.977,16.77c-2.667-2.277-2.605-7.079,0-9.357C2.919,8.057,3.522,9.075,4.49,9.691c-1.152,1.6-1.146,3.201-0.004,4.803C3.522,15.111,2.918,16.126,1.977,16.77z"/><path style="fill:#fdba18;" d="M12.257,17.114c-1.767-1.633-2.485-3.658-2.118-6.02c0.451-2.91,2.139-4.893,4.946-5.678c2.565-0.718,4.964-0.217,6.878,1.819c-0.884,0.743-1.707,1.547-2.434,2.446C18.488,8.827,17.319,8.435,16,8.856c-2.404,0.767-3.046,3.241-1.494,5.644c-0.241,0.275-0.493,0.541-0.721,0.826C13.295,15.939,12.511,16.3,12.257,17.114z"/><path style="fill:#ef9008;" d="M19.529,9.682c0.727-0.899,1.55-1.703,2.434-2.446c2.703,2.783,2.701,7.031-0.005,9.764c-2.648,2.674-6.936,2.725-9.701,0.115c0.254-0.814,1.038-1.175,1.528-1.788c0.228-0.285,0.48-0.552,0.721-0.826c1.053,0.916,2.254,1.268,3.6,0.83C20.502,14.551,21.151,11.927,19.529,9.682z"/><path style="fill:#fdba18;" d="M4.49,9.691C3.522,9.075,2.919,8.057,1.977,7.413c2.209-2.398,5.721-2.942,8.476-1.355c0.555,0.32,0.719,0.606,0.285,1.128c-0.157,0.188-0.258,0.422-0.391,0.631c-0.299,0.47-0.509,1.067-0.929,1.371C8.933,9.539,8.523,8.847,8.021,8.746C6.673,8.475,5.509,8.787,4.49,9.691z"/><path style="fill:#fdba18;" d="M1.977,16.77c0.941-0.644,1.545-1.659,2.509-2.277c1.373,1.152,2.85,1.433,4.45,0.499c0.332-0.194,0.503-0.088,0.673,0.19c0.386,0.635,0.753,1.285,1.181,1.89c0.34,0.48,0.222,0.715-0.253,1.006C7.84,19.73,4.205,19.188,1.977,16.77z"/></svg><text x="245" y="140" transform="scale(.1)" textLength="30"> </text><text x="725" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="770">Open in Colab</text><text x="725" y="140" transform="scale(.1)" textLength="770">Open in Colab</text></g> </svg>
diff --git a/docs/_static/img/colab.svg b/docs/_static/img/colab.svg
new file mode 100644
index 000000000..c2d30e973
--- /dev/null
+++ b/docs/_static/img/colab.svg
@@ -0,0 +1,7 @@
+<svg width="24px" height="15px" viewBox="0 0 24 15" version="1.1" xmlns="http://www.w3.org/2000/svg">
+  <path d="M1.977,11.77 C-0.69,9.493 -0.628,4.691 1.977,2.413 C2.919,3.057 3.522,4.075 4.49,4.691 C3.338,6.291 3.344,7.892 4.486,9.494 C3.522,10.111 2.918,11.126 1.977,11.77 Z" fill="#FDBA18"/>
+  <path d="M12.257,12.114 C10.49,10.481 9.772,8.456 10.139,6.094 C10.59,3.184 12.278,1.201 15.085,0.416 C17.65,-0.302 20.049,0.199 21.963,2.235 C21.079,2.978 20.256,3.782 19.529,4.681 C18.488,3.827 17.319,3.435 16,3.856 C13.596,4.623 12.954,7.097 14.506,9.5 C14.265,9.775 14.013,10.041 13.785,10.326 C13.295,10.939 12.511,11.3 12.257,12.114 Z" fill="#FCD93D"/>
+  <path d="M19.529,4.682 C20.256,3.783 21.079,2.979 21.963,2.236 C24.666,5.019 24.664,9.267 21.958,12 C19.31,14.674 15.022,14.725 12.257,12.115 C12.511,11.301 13.295,10.94 13.785,10.327 C14.013,10.042 14.265,9.775 14.506,9.501 C15.559,10.417 16.76,10.769 18.106,10.331 C20.502,9.551 21.151,6.927 19.529,4.682 Z" fill="#FDBA18"/>
+  <path d="M4.49,4.691 C3.522,4.075 2.919,3.057 1.977,2.413 C4.186,0.015 7.698,-0.529 10.453,1.058 C11.008,1.378 11.172,1.664 10.738,2.186 C10.581,2.374 10.48,2.608 10.347,2.817 C10.048,3.287 9.838,3.884 9.418,4.188 C8.933,4.539 8.523,3.847 8.021,3.746 C6.673,3.475 5.509,3.787 4.49,4.691 Z" fill="#FCD93D"/>
+  <path d="M1.977,11.77 C2.918,11.126 3.522,10.111 4.486,9.493 C5.859,10.645 7.336,10.926 8.936,9.992 C9.268,9.798 9.439,9.904 9.609,10.182 C9.995,10.817 10.362,11.467 10.79,12.072 C11.13,12.552 11.012,12.787 10.537,13.078 C7.84,14.73 4.205,14.188 1.977,11.77 Z" fill="#FCD93D"/>
+</svg>
\ No newline at end of file
diff --git a/docs/conda_env.yml b/docs/conda_env.yml
new file mode 100644
index 000000000..98a550820
--- /dev/null
+++ b/docs/conda_env.yml
@@ -0,0 +1,20 @@
+name: root
+channels:
+  - pytorch
+  - defaults
+dependencies:
+  - cpuonly=1.0=0
+  - pip=21.1
+  - python=3.7
+  - pytorch=1.11=py3.7_cpu_0
+  - pip:
+    - gym==0.21
+    - cloudpickle
+    - opencv-python-headless
+    - pandas
+    - numpy
+    - matplotlib
+    - sphinx_autodoc_typehints
+    - sphinx>=4.2
+    - sphinx_rtd_theme>=1.0
+    - sphinx_copybutton
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 000000000..3f0348af7
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,230 @@
+#
+# Configuration file for the Sphinx documentation builder.
+#
+# This file does only contain a selection of the most common options. For a
+# full list see the documentation:
+# http://www.sphinx-doc.org/en/master/config
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+from typing import Dict, List
+from unittest.mock import MagicMock
+
+# We CANNOT enable 'sphinxcontrib.spelling' because ReadTheDocs.org does not support
+# PyEnchant.
+try:
+    import sphinxcontrib.spelling  # noqa: F401
+
+    enable_spell_check = True
+except ImportError:
+    enable_spell_check = False
+
+# Try to enable copy button
+try:
+    import sphinx_copybutton  # noqa: F401
+
+    enable_copy_button = True
+except ImportError:
+    enable_copy_button = False
+
+# source code directory, relative to this file, for sphinx-autobuild
+sys.path.insert(0, os.path.abspath(".."))
+
+
+class Mock(MagicMock):
+    __subclasses__ = []  # type: ignore
+
+    @classmethod
+    def __getattr__(cls, name):
+        return MagicMock()
+
+
+# Mock modules that requires C modules
+# Note: because of that we cannot test examples using CI
+# 'torch', 'torch.nn', 'torch.nn.functional',
+# DO not mock modules for now, we will need to do that for read the docs later
+MOCK_MODULES: List[str] = []
+sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
+
+# Read version from file
+version_file = os.path.join(os.path.dirname(__file__), "../rl_zoo3", "version.txt")
+with open(version_file) as file_handler:
+    __version__ = file_handler.read().strip()
+
+# -- Project information -----------------------------------------------------
+
+project = "RL Baselines3 Zoo"
+copyright = "2023, Stable Baselines3"
+author = "Stable Baselines3 Contributors"
+
+# The short X.Y version
+version = "master (" + __version__ + " )"
+# The full version, including alpha/beta/rc tags
+release = __version__
+
+
+# -- General configuration ---------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx_autodoc_typehints",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.mathjax",
+    "sphinx.ext.ifconfig",
+    "sphinx.ext.viewcode",
+    # 'sphinx.ext.intersphinx',
+    # 'sphinx.ext.doctest'
+]
+
+if enable_spell_check:
+    extensions.append("sphinxcontrib.spelling")
+
+if enable_copy_button:
+    extensions.append("sphinx_copybutton")
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = ".rst"
+
+# The master toctree document.
+master_doc = "index"
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = "en"
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path .
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = "sphinx"
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+
+# Fix for read the docs
+on_rtd = os.environ.get("READTHEDOCS") == "True"
+if on_rtd:
+    html_theme = "default"
+else:
+    html_theme = "sphinx_rtd_theme"
+
+html_logo = "../images/car.jpg"
+
+
+def setup(app):
+    app.add_css_file("css/baselines_theme.css")
+
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["_static"]
+
+# Custom sidebar templates, must be a dictionary that maps document names
+# to template names.
+#
+# The default sidebars (for documents that don't match any pattern) are
+# defined by theme itself.  Builtin themes are using these templates by
+# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
+# 'searchbox.html']``.
+#
+# html_sidebars = {}
+
+
+# -- Options for HTMLHelp output ---------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = "RLZoo3doc"
+
+
+# -- Options for LaTeX output ------------------------------------------------
+
+latex_elements: Dict[str, str] = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+    # Latex figure (float) alignment
+    #
+    # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, "RLZoo3.tex", "RL Baselines3 Zoo Documentation", "Stable Baselines3 Contributors", "manual"),
+]
+
+
+# -- Options for manual page output ------------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [(master_doc, "rl_zoo3", "RL Baselines3 Zoo Documentation", [author], 1)]
+
+
+# -- Options for Texinfo output ----------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (
+        master_doc,
+        "RLZoo3",
+        "RL Baselines3 Zoo Documentation",
+        author,
+        "RLZoo3",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
+]
+
+
+# -- Extension configuration -------------------------------------------------
+
+# Example configuration for intersphinx: refer to the Python standard library.
+# intersphinx_mapping = {
+#     'python': ('https://docs.python.org/3/', None),
+#     'numpy': ('http://docs.scipy.org/doc/numpy/', None),
+#     'torch': ('http://pytorch.org/docs/master/', None),
+# }
diff --git a/docs/guide/config.rst b/docs/guide/config.rst
new file mode 100644
index 000000000..0aa5918f0
--- /dev/null
+++ b/docs/guide/config.rst
@@ -0,0 +1,122 @@
+.. _config:
+
+=============
+Configuration
+=============
+
+Hyperparameter yaml syntax
+--------------------------
+
+The syntax used in ``hyperparameters/algo_name.yml`` for setting
+hyperparameters (likewise the syntax to `overwrite
+hyperparameters <https://github.com/DLR-RM/rl-baselines3-zoo#overwrite-hyperparameters>`__
+on the cli) may be specialized if the argument is a function. See
+examples in the ``hyperparameters/`` directory. For example:
+
+-  Specify a linear schedule for the learning rate:
+
+.. code:: yaml
+
+     learning_rate: lin_0.012486195510232303
+
+Specify a different activation function for the network:
+
+.. code:: yaml
+
+     policy_kwargs: "dict(activation_fn=nn.ReLU)"
+
+For a custom policy:
+
+.. code:: yaml
+
+     policy: my_package.MyCustomPolicy  # for instance stable_baselines3.ppo.MlpPolicy
+
+Env Normalization
+-----------------
+
+In the hyperparameter file, ``normalize: True`` means that the training
+environment will be wrapped in a
+`VecNormalize <https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/vec_env/vec_normalize.py#L13>`__
+wrapper.
+
+`Normalization
+uses <https://github.com/DLR-RM/rl-baselines3-zoo/issues/64>`__ the
+default parameters of ``VecNormalize``, with the exception of ``gamma``
+which is set to match that of the agent. This can be
+`overridden <https://github.com/DLR-RM/rl-baselines3-zoo/blob/v0.10.0/hyperparams/sac.yml#L239>`__
+using the appropriate ``hyperparameters/algo_name.yml``, e.g.
+
+.. code:: yaml
+
+    normalize: "{'norm_obs': True, 'norm_reward': False}"
+
+Env Wrappers
+------------
+
+You can specify in the hyperparameter config one or more wrapper to use
+around the environment:
+
+for one wrapper:
+
+.. code:: yaml
+
+  env_wrapper: gym_minigrid.wrappers.FlatObsWrapper
+
+for multiple, specify a list:
+
+.. code:: yaml
+
+  env_wrapper:
+      - rl_zoo3.wrappers.DoneOnSuccessWrapper:
+          reward_offset: 1.0
+      - sb3_contrib.common.wrappers.TimeFeatureWrapper
+
+Note that you can easily specify parameters too.
+
+By default, the environment is wrapped with a ``Monitor`` wrapper to
+record episode statistics. You can specify arguments to it using
+``monitor_kwargs`` parameter to log additional data. That data *must* be
+present in the info dictionary at the last step of each episode.
+
+For instance, for recording success with goal envs
+(e.g. ``FetchReach-v1``):
+
+.. code:: yaml
+
+  monitor_kwargs: dict(info_keywords=('is_success',))
+
+or recording final x position with ``Ant-v3``:
+
+.. code:: yaml
+
+  monitor_kwargs: dict(info_keywords=('x_position',))
+
+Note: for known ``GoalEnv`` like ``FetchReach``,
+``info_keywords=('is_success',)`` is actually the default.
+
+VecEnvWrapper
+-------------
+
+You can specify which ``VecEnvWrapper`` to use in the config, the same
+way as for env wrappers (see above), using the ``vec_env_wrapper`` key:
+
+For instance:
+
+.. code:: yaml
+
+  vec_env_wrapper: stable_baselines3.common.vec_env.VecMonitor
+
+Note: ``VecNormalize`` is supported separately using ``normalize``
+keyword, and ``VecFrameStack`` has a dedicated keyword ``frame_stack``.
+
+Callbacks
+---------
+
+Following the same syntax as env wrappers, you can also add custom
+callbacks to use during training.
+
+.. code:: yaml
+
+  callback:
+    - rl_zoo3.callbacks.ParallelTrainCallback:
+        gradient_steps: 256
diff --git a/docs/guide/custom_env.rst b/docs/guide/custom_env.rst
new file mode 100644
index 000000000..5c6ab016d
--- /dev/null
+++ b/docs/guide/custom_env.rst
@@ -0,0 +1,11 @@
+.. _custom:
+
+==================
+Custom Environment
+==================
+
+The easiest way to add support for a custom environment is to edit
+``rl_zoo3/import_envs.py`` and register your environment here. Then, you
+need to add a section for it in the hyperparameters file
+(``hyperparams/algo.yml`` or a custom yaml file that you can specify
+using ``--conf-file`` argument).
diff --git a/docs/guide/enjoy.rst b/docs/guide/enjoy.rst
new file mode 100644
index 000000000..95097f811
--- /dev/null
+++ b/docs/guide/enjoy.rst
@@ -0,0 +1,100 @@
+.. _enjoy:
+
+=====================
+Enjoy a Trained Agent
+=====================
+
+.. note::
+
+  To download the repo with the trained agents, you must use
+  ``git clone --recursive https://github.com/DLR-RM/rl-baselines3-zoo``
+  in order to clone the submodule too.
+
+
+Enjoy a trained agent
+---------------------
+
+If the trained agent exists, then you can see it in action using:
+
+::
+
+   python enjoy.py --algo algo_name --env env_id
+
+For example, enjoy A2C on Breakout during 5000 timesteps:
+
+::
+
+   python enjoy.py --algo a2c --env BreakoutNoFrameskip-v4 --folder rl-trained-agents/ -n 5000
+
+If you have trained an agent yourself, you need to do:
+
+::
+
+   # exp-id 0 corresponds to the last experiment, otherwise, you can specify another ID
+   python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 0
+
+Load Checkpoints, Best Model
+-----------------------------
+
+To load the best model (when using evaluation environment):
+
+::
+
+   python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-best
+
+To load a checkpoint (here the checkpoint name is
+``rl_model_10000_steps.zip``):
+
+::
+
+   python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-checkpoint 10000
+
+To load the latest checkpoint:
+
+::
+
+   python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-last-checkpoint
+
+
+Record a Video of a Trained Agent
+---------------------------------
+
+Record 1000 steps with the latest saved model:
+
+::
+
+  python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000
+
+Use the best saved model instead:
+
+::
+
+  python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 --load-best
+
+Record a video of a checkpoint saved during training (here the
+checkpoint name is ``rl_model_10000_steps.zip``):
+
+::
+
+  python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 --load-checkpoint 10000
+
+
+Record a Video of a Training Experiment
+---------------------------------------
+
+Apart from recording videos of specific saved models, it is also
+possible to record a video of a training experiment where checkpoints
+have been saved.
+
+Record 1000 steps for each checkpoint, latest and best saved models:
+
+::
+
+  python -m rl_zoo3.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic
+
+The previous command will create a ``mp4`` file. To convert this file to
+``gif`` format as well:
+
+::
+
+  python -m rl_zoo3.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic --gif
diff --git a/docs/guide/install.rst b/docs/guide/install.rst
new file mode 100644
index 000000000..dcd5c8587
--- /dev/null
+++ b/docs/guide/install.rst
@@ -0,0 +1,87 @@
+.. _install:
+
+Installation
+============
+
+Prerequisites
+-------------
+
+RL Zoo requires python 3.7+ and PyTorch >= 1.11
+
+
+Minimal Installation
+--------------------
+
+To install RL Zoo with pip, execute:
+
+.. code-block:: bash
+
+    pip install rl_zoo3
+
+From source:
+
+.. code-block:: bash
+
+	git clone https://github.com/DLR-RM/rl-baselines3-zoo
+	cd rl-baselines3-zoo/
+	pip install -e .
+
+.. note::
+
+	You can do ``python -m rl_zoo3.train`` from any folder and you have access to ``rl_zoo3`` command line interface, for instance, ``rl_zoo3 train`` is equivalent to ``python train.py``
+
+
+
+Full installation
+-----------------
+
+With extra envs and test dependencies:
+
+
+.. note::
+
+  If you want to use Atari games, you will need to do ``pip install "autorom[accept-rom-license]"``
+  additionally to download the ROMs
+
+
+.. code-block:: bash
+
+	apt-get install swig cmake ffmpeg
+	pip install -r requirements.txt
+
+
+Please see `Stable Baselines3 documentation <https://stable-baselines3.readthedocs.io/en/master/>`_ for alternatives to install stable baselines3.
+
+
+Docker Images
+-------------
+
+Build docker image (CPU):
+
+::
+
+   make docker-cpu
+
+GPU:
+
+::
+
+   USE_GPU=True make docker-gpu
+
+Pull built docker image (CPU):
+
+::
+
+   docker pull stablebaselines/rl-baselines3-zoo-cpu
+
+GPU image:
+
+::
+
+   docker pull stablebaselines/rl-baselines3-zoo
+
+Run script in the docker image:
+
+::
+
+   ./scripts/run_docker_cpu.sh python train.py --algo ppo --env CartPole-v1
diff --git a/docs/guide/integrations.rst b/docs/guide/integrations.rst
new file mode 100644
index 000000000..e5890e4e7
--- /dev/null
+++ b/docs/guide/integrations.rst
@@ -0,0 +1,45 @@
+.. _integrations:
+
+============
+Integrations
+============
+
+Huggingface Hub Integration
+---------------------------
+
+List and videos of trained agents can be found on our Huggingface page: https://huggingface.co/sb3
+
+
+Upload model to hub (same syntax as for ``enjoy.py``):
+
+::
+
+   python -m rl_zoo3.push_to_hub --algo ppo --env CartPole-v1 -f logs/ -orga sb3 -m "Initial commit"
+
+you can choose custom ``repo-name`` (default: ``{algo}-{env_id}``) by
+passing a ``--repo-name`` argument.
+
+Download model from hub:
+
+::
+
+   python -m rl_zoo3.load_from_hub --algo ppo --env CartPole-v1 -f logs/ -orga sb3
+
+
+Experiment tracking
+-------------------
+
+We support tracking experiment data such as learning curves and
+hyperparameters via `Weights and Biases <https://wandb.ai>`__.
+
+The following command
+
+::
+
+  python train.py --algo ppo --env CartPole-v1 --track --wandb-project-name sb3
+
+yields a tracked experiment at this
+`URL <https://wandb.ai/openrlbenchmark/sb3/runs/1b65ldmh>`__.
+
+To add a tag to the run, (e.g. ``optimized``), use the argument
+``--wandb-tags optimized``.
diff --git a/docs/guide/plot.rst b/docs/guide/plot.rst
new file mode 100644
index 000000000..aa571f612
--- /dev/null
+++ b/docs/guide/plot.rst
@@ -0,0 +1,70 @@
+.. _plot:
+
+============
+Plot Scripts
+============
+
+
+Plot scripts (to be documented, see "Results" sections in SB3
+documentation):
+
+- ``scripts/all_plots.py``/``scripts/plot_from_file.py`` for plotting evaluations
+
+- ``scripts/plot_train.py`` for plotting training reward/success
+
+
+Examples
+--------
+
+Plot training success (y-axis) w.r.t. timesteps (x-axis) with a moving
+window of 500 episodes for all the ``Fetch`` environment with ``HER``
+algorithm:
+
+::
+
+   python scripts/plot_train.py -a her -e Fetch -y success -f rl-trained-agents/ -w 500 -x steps
+
+Plot evaluation reward curve for TQC, SAC and TD3 on the HalfCheetah and
+Ant PyBullet environments:
+
+::
+
+   python3 scripts/all_plots.py -a sac td3 tqc --env HalfCheetahBullet AntBullet -f rl-trained-agents/
+
+Plot with the rliable library
+-----------------------------
+
+The RL zoo integrates some of
+`rliable <https://agarwl.github.io/rliable/>`__ library features. You
+can find a visual explanation of the tools used by rliable in this `blog
+post <https://araffin.github.io/post/rliable/>`__.
+
+First, you need to install
+`rliable <https://github.com/google-research/rliable>`__.
+
+Note: Python 3.7+ is required in that case.
+
+Then export your results to a file using the ``all_plots.py`` script
+(see above):
+
+::
+
+   python scripts/all_plots.py -a sac td3 tqc --env Half Ant -f logs/ -o logs/offpolicy
+
+You can now use the ``plot_from_file.py`` script with ``--rliable``,
+``--versus`` and ``--iqm`` arguments:
+
+::
+
+   python scripts/plot_from_file.py -i logs/offpolicy.pkl --skip-timesteps --rliable --versus -l SAC TD3 TQC
+
+.. note::
+
+  you may need to edit ``plot_from_file.py``, in particular the
+  ``env_key_to_env_id`` dictionary and the
+  ``scripts/score_normalization.py`` which stores min and max score for
+  each environment.
+
+
+Remark: plotting with the ``--rliable`` option is usually slow as
+confidence interval need to be computed using bootstrap sampling.
diff --git a/docs/guide/quickstart.rst b/docs/guide/quickstart.rst
new file mode 100644
index 000000000..d527bb189
--- /dev/null
+++ b/docs/guide/quickstart.rst
@@ -0,0 +1,51 @@
+.. _quickstart:
+
+===============
+Getting Started
+===============
+
+.. note::
+
+  You can try the following examples online using Google colab |colab|
+  notebook: `RL Baselines zoo notebook`_
+
+
+.. _RL Baselines zoo notebook: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/sb3/rl-baselines-zoo.ipynb
+.. |colab| image:: ../_static/img/colab.svg
+
+
+The hyperparameters for each environment are defined in
+``hyperparameters/algo_name.yml``.
+
+If the environment exists in this file, then you can train an agent
+using:
+
+::
+
+ python -m rl_zoo3.train --algo algo_name --env env_id
+
+Or if you are in the RL Zoo3 folder:
+
+::
+
+  python train.py --algo algo_name --env env_id
+
+For example (with evaluation and checkpoints):
+
+::
+
+ python -m rl_zoo3.train --algo ppo --env CartPole-v1 --eval-freq 10000 --save-freq 50000
+
+
+
+If the trained agent exists, then you can see it in action using:
+
+::
+
+ python -m rl_zoo3.enjoy --algo algo_name --env env_id
+
+For example, enjoy A2C on Breakout during 5000 timesteps:
+
+::
+
+ python -m rl_zoo3.enjoy --algo a2c --env BreakoutNoFrameskip-v4 --folder rl-trained-agents/ -n 5000
diff --git a/docs/guide/sbx.rst b/docs/guide/sbx.rst
new file mode 100644
index 000000000..3205f33a3
--- /dev/null
+++ b/docs/guide/sbx.rst
@@ -0,0 +1,58 @@
+.. _sbx:
+
+==========================
+Stable Baselines Jax (SBX)
+==========================
+
+`Stable Baselines Jax (SBX) <https://github.com/araffin/sbx>`_ is a proof of concept version of Stable-Baselines3 in Jax.
+
+It provides a minimal number of features compared to SB3 but can be much faster (up to 20x times!): https://twitter.com/araffin2/status/1590714558628253698
+
+
+It is also compatible with the RL Zoo.
+For that you will need to create two files.
+
+``train_sbx.py``:
+
+.. code-block:: python
+
+  import rl_zoo3
+  import rl_zoo3.train
+  from rl_zoo3.train import train
+  from sbx import DQN, PPO, SAC, TQC, DroQ
+
+
+  rl_zoo3.ALGOS["tqc"] = TQC
+  rl_zoo3.ALGOS["droq"] = DroQ
+  rl_zoo3.ALGOS["sac"] = SAC
+  rl_zoo3.ALGOS["ppo"] = PPO
+  rl_zoo3.ALGOS["dqn"] = DQN
+  rl_zoo3.train.ALGOS = rl_zoo3.ALGOS
+  rl_zoo3.exp_manager.ALGOS = rl_zoo3.ALGOS
+
+  if __name__ == "__main__":
+      train()
+
+Then you can call ``python train_sbx.py --algo sac --env Pendulum-v1`` and use the RL Zoo CLI.
+
+
+``enjoy_sbx.py``:
+
+.. code-block:: python
+
+  import rl_zoo3
+  import rl_zoo3.enjoy
+  from rl_zoo3.enjoy import enjoy
+  from sbx import DQN, PPO, SAC, TQC, DroQ
+
+
+  rl_zoo3.ALGOS["tqc"] = TQC
+  rl_zoo3.ALGOS["droq"] = DroQ
+  rl_zoo3.ALGOS["sac"] = SAC
+  rl_zoo3.ALGOS["ppo"] = PPO
+  rl_zoo3.ALGOS["dqn"] = DQN
+  rl_zoo3.enjoy.ALGOS = rl_zoo3.ALGOS
+  rl_zoo3.exp_manager.ALGOS = rl_zoo3.ALGOS
+
+  if __name__ == "__main__":
+      enjoy()
diff --git a/docs/guide/train.rst b/docs/guide/train.rst
new file mode 100644
index 000000000..8d8e18170
--- /dev/null
+++ b/docs/guide/train.rst
@@ -0,0 +1,120 @@
+.. _train:
+
+==============
+Train an Agent
+==============
+
+Basic Usage
+-----------
+
+The hyperparameters for each environment are defined in
+``hyperparameters/algo_name.yml``.
+
+
+.. note::
+
+	Once RL Zoo3 is install, you can do ``python -m rl_zoo3.train`` from any folder, it is equivalent to ``python train.py``
+
+
+If the environment exists in this file, then you can train an agent using:
+
+::
+
+  python train.py --algo algo_name --env env_id
+
+
+.. note::
+
+	You can use ``-P`` (``--progress``) option to display a progress bar.
+
+
+Custom Config File
+------------------
+
+Using a custom config file when it is a yaml file with a which contains a ``env_id`` entry:
+
+::
+
+  python train.py --algo algo_name --env env_id --conf-file my_yaml.yml
+
+
+You can also use a python file that contains a dictionary called `hyperparams` with an entry for each ``env_id``.
+(see ``hyperparams/python/ppo_config_example.py`` for an example)
+
+::
+
+  # You can pass a path to a python file
+  python train.py --algo ppo --env MountainCarContinuous-v0 --conf-file hyperparams/python/ppo_config_example.py
+  # Or pass a path to a file from a module (for instance my_package.my_file)
+  python train.py --algo ppo --env MountainCarContinuous-v0 --conf-file hyperparams.python.ppo_config_example
+
+The advantage of this approach is that you can specify arbitrary python dictionaries
+and ensure that all their dependencies are imported in the config file itself.
+
+Tensorboard, Checkpoints, Evaluation
+------------------------------------
+
+For example (with tensorboard support):
+
+::
+
+  python train.py --algo ppo --env CartPole-v1 --tensorboard-log /tmp/stable-baselines/
+
+
+Evaluate the agent every 10000 steps using 10 episodes for evaluation (using only one evaluation env):
+
+::
+
+  python train.py --algo sac --env AntBulletEnv-v0 --eval-freq 10000 --eval-episodes 10 --n-eval-envs 1
+
+
+Save a checkpoint of the agent every 100000 steps:
+
+::
+
+  python train.py --algo td3 --env AntBulletEnv-v0 --save-freq 100000
+
+Resume Training
+---------------
+
+Continue training (here, load pretrained agent for Breakout and continue training for 5000 steps):
+
+::
+
+  python train.py --algo a2c --env BreakoutNoFrameskip-v4 -i rl-trained-agents/a2c/BreakoutNoFrameskip-v4_1/BreakoutNoFrameskip-v4.zip -n 5000
+
+Save Replay Buffer
+------------------
+
+When using off-policy algorithms, you can also **save the replay buffer** after training:
+
+::
+
+  python train.py --algo sac --env Pendulum-v1 --save-replay-buffer
+
+It will be automatically loaded if present when continuing training.
+
+
+Env keyword arguments
+---------------------
+
+You can specify keyword arguments to pass to the env constructor in the
+command line, using ``--env-kwargs``:
+
+::
+
+   python enjoy.py --algo ppo --env MountainCar-v0 --env-kwargs goal_velocity:10
+
+
+Overwrite hyperparameters
+-------------------------
+
+You can easily overwrite hyperparameters in the command line, using
+``--hyperparams``:
+
+::
+
+   python train.py --algo a2c --env MountainCarContinuous-v0 --hyperparams learning_rate:0.001 policy_kwargs:"dict(net_arch=[64, 64])"
+
+Note: if you want to pass a string, you need to escape it like that:
+``my_string:"'value'"``
diff --git a/docs/guide/tuning.rst b/docs/guide/tuning.rst
new file mode 100644
index 000000000..159f15673
--- /dev/null
+++ b/docs/guide/tuning.rst
@@ -0,0 +1,71 @@
+.. _tuning:
+
+=====================
+Hyperparameter Tuning
+=====================
+
+Hyperparameter Tuning
+---------------------
+
+We use `Optuna <https://optuna.org/>`__ for optimizing the
+hyperparameters. Not all hyperparameters are tuned, and tuning enforces
+certain default hyperparameter settings that may be different from the
+official defaults. See
+`rl_zoo3/hyperparams_opt.py <https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py>`__
+for the current settings for each agent.
+
+Hyperparameters not specified in
+`rl_zoo3/hyperparams_opt.py <https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py>`__
+are taken from the associated YAML file and fallback to the default
+values of SB3 if not present.
+
+Note: when using SuccessiveHalvingPruner (“halving”), you must specify
+``--n-jobs > 1``
+
+Budget of 1000 trials with a maximum of 50000 steps:
+
+::
+
+   python train.py --algo ppo --env MountainCar-v0 -n 50000 -optimize --n-trials 1000 --n-jobs 2 \
+     --sampler tpe --pruner median
+
+Distributed optimization using a shared database is also possible (see
+the corresponding `Optuna
+documentation <https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/004_distributed.html>`__):
+
+::
+
+   python train.py --algo ppo --env MountainCar-v0 -optimize --study-name test --storage sqlite:///example.db
+
+Print and save best hyperparameters of an Optuna study:
+
+::
+
+   python scripts/parse_study.py -i path/to/study.pkl --print-n-best-trials 10 --save-n-best-hyperparameters 10
+
+The default budget for hyperparameter tuning is 500 trials and there is
+one intermediate evaluation for pruning/early stopping per 100k time
+steps.
+
+Hyperparameters search space
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Note that the default hyperparameters used in the zoo when tuning are
+not always the same as the defaults provided in
+`stable-baselines3 <https://stable-baselines3.readthedocs.io/en/master/modules/base.html>`__.
+Consult the latest source code to be sure of these settings. For
+example:
+
+-  PPO tuning assumes a network architecture with ``ortho_init = False``
+   when tuning, though it is ``True`` by
+   `default <https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html#ppo-policies>`__.
+   You can change that by updating
+   `rl_zoo3/hyperparams_opt.py <https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py>`__.
+
+-  Non-episodic rollout in TD3 and DDPG assumes
+   ``gradient_steps = train_freq`` and so tunes only ``train_freq`` to
+   reduce the search space.
+
+When working with continuous actions, we recommend to enable
+`gSDE <https://arxiv.org/abs/2005.05719>`__ by uncommenting lines in
+`rl_zoo3/hyperparams_opt.py <https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py>`__.
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 000000000..3ef2d6bc0
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,74 @@
+RL Baselines3 Zoo Docs - A Training Framework for Stable Baselines3
+===================================================================
+
+`RL Baselines3 Zoo  <https://github.com/DLR-RM/stable-baselines3>`_  s a training framework for Reinforcement Learning (RL), using `Stable Baselines3 (SB3) <https://github.com/DLR-RM/stable-baselines3>`_,
+reliable implementations of reinforcement learning algorithms in PyTorch.
+
+Github repository: https://github.com/DLR-RM/rl-baselines3-zoo
+
+It provides scripts for training, evaluating agents, tuning hyperparameters, plotting results and recording videos.
+
+In addition, it includes a collection of tuned hyperparameters for common environments and RL algorithms, and agents trained with those settings.
+
+.. toctree::
+   :maxdepth: 2
+   :caption: User Guide
+
+   guide/install
+   guide/quickstart
+   guide/train
+   guide/plot
+   guide/enjoy
+   guide/custom_env
+   guide/config
+   guide/integrations
+   guide/tuning
+   guide/sbx
+
+
+.. toctree::
+  :maxdepth: 1
+  :caption: RL Zoo API
+
+  modules/exp_manager
+  modules/wrappers
+  modules/callbacks
+  modules/utils
+
+.. toctree::
+  :maxdepth: 1
+  :caption: Misc
+
+  misc/changelog
+
+
+Citing RL Baselines3 Zoo
+------------------------
+To cite this project in publications:
+
+.. code-block:: bibtex
+
+  @misc{rl-zoo3,
+    author = {Raffin, Antonin},
+    title = {RL Baselines3 Zoo},
+    year = {2020},
+    publisher = {GitHub},
+    journal = {GitHub repository},
+    howpublished = {\url{https://github.com/DLR-RM/rl-baselines3-zoo}},
+  }
+
+Contributing
+------------
+
+To any interested in making the rl baselines better, there are still some improvements
+that need to be done.
+You can check issues in the `repo <https://github.com/DLR-RM/rl-baselines3-zoo/issues>`_.
+
+If you want to contribute, please read `CONTRIBUTING.md <https://github.com/DLR-RM/stable-baselines3/blob/master/CONTRIBUTING.md>`_ first.
+
+Indices and tables
+-------------------
+
+* :ref:`genindex`
+* :ref:`search`
+* :ref:`modindex`
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 000000000..22b5fff4e
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,36 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+set SPHINXPROJ=StableBaselines
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+
+:end
+popd
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
new file mode 100644
index 000000000..cf5d4b9e1
--- /dev/null
+++ b/docs/misc/changelog.rst
@@ -0,0 +1,7 @@
+.. _changelog:
+
+Changelog
+==========
+
+
+See https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/CHANGELOG.md
diff --git a/docs/modules/callbacks.rst b/docs/modules/callbacks.rst
new file mode 100644
index 000000000..bb26e1737
--- /dev/null
+++ b/docs/modules/callbacks.rst
@@ -0,0 +1,8 @@
+.. _callbacks:
+
+
+Callbacks
+=========
+
+.. automodule:: rl_zoo3.callbacks
+  :members:
diff --git a/docs/modules/exp_manager.rst b/docs/modules/exp_manager.rst
new file mode 100644
index 000000000..4eb97a4e9
--- /dev/null
+++ b/docs/modules/exp_manager.rst
@@ -0,0 +1,15 @@
+.. _manager:
+
+.. automodule:: rl_zoo3.exp_manager
+
+
+Experiment Manager
+==================
+
+
+Parameters
+----------
+
+.. autoclass:: ExperimentManager
+  :members:
+  :inherited-members:
diff --git a/docs/modules/utils.rst b/docs/modules/utils.rst
new file mode 100644
index 000000000..097dedbd4
--- /dev/null
+++ b/docs/modules/utils.rst
@@ -0,0 +1,8 @@
+.. _utils:
+
+
+Utils
+=====
+
+.. automodule:: rl_zoo3.utils
+  :members:
diff --git a/docs/modules/wrappers.rst b/docs/modules/wrappers.rst
new file mode 100644
index 000000000..97bd7b89a
--- /dev/null
+++ b/docs/modules/wrappers.rst
@@ -0,0 +1,8 @@
+.. _wrappers:
+
+
+Wrappers
+========
+
+.. automodule:: rl_zoo3.wrappers
+  :members:
diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt
new file mode 100644
index 000000000..d306d7e78
--- /dev/null
+++ b/docs/spelling_wordlist.txt
@@ -0,0 +1,129 @@
+py
+env
+atari
+argparse
+Argparse
+TensorFlow
+feedforward
+envs
+VecEnv
+pretrain
+petrained
+tf
+th
+nn
+np
+str
+mujoco
+cpu
+ndarray
+ndarrays
+timestep
+timesteps
+stepsize
+dataset
+adam
+fn
+normalisation
+Kullback
+Leibler
+boolean
+deserialized
+pretrained
+minibatch
+subprocesses
+ArgumentParser
+Tensorflow
+Gaussian
+approximator
+minibatches
+hyperparameters
+hyperparameter
+vectorized
+rl
+colab
+dataloader
+npz
+datasets
+vf
+logits
+num
+Utils
+backpropagate
+prepend
+NaN
+preprocessing
+Cloudpickle
+async
+multiprocess
+tensorflow
+mlp
+cnn
+neglogp
+tanh
+coef
+repo
+Huber
+params
+ppo
+arxiv
+Arxiv
+func
+DQN
+Uhlenbeck
+Ornstein
+multithread
+cancelled
+Tensorboard
+parallelize
+customising
+serializable
+Multiprocessed
+cartpole
+toolset
+lstm
+rescale
+ffmpeg
+avconv
+unnormalized
+Github
+pre
+preprocess
+backend
+attr
+preprocess
+Antonin
+Raffin
+araffin
+Homebrew
+Numpy
+Theano
+rollout
+kfac
+Piecewise
+csv
+nvidia
+visdom
+tensorboard
+preprocessed
+namespace
+sklearn
+GoalEnv
+Torchy
+pytorch
+dicts
+optimizers
+Deprecations
+forkserver
+cuda
+Polyak
+gSDE
+rollouts
+Pyro
+softmax
+stdout
+Contrib
+Quantile
+Huggingface
+Jax
+Optuna
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 000000000..73f41c8b8
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,54 @@
+[tool.ruff]
+# Same as Black.
+line-length = 127
+# Assume Python 3.7
+target-version = "py37"
+# TODO(antonin): activate "RUF" https://beta.ruff.rs/docs/rules/#ruff-specific-rules-ruf
+select = ["E", "F", "B", "UP", "C90"]
+ignore = []
+
+[tool.ruff.per-file-ignores]
+# "./rl_zoo3/plots/all_plots.py"= ["E501"]
+# "./rl_zoo3/plots/plot_train.py"= ["E501"]
+
+
+[tool.ruff.mccabe]
+# Unlike Flake8, default to a complexity level of 10.
+max-complexity = 15
+
+[tool.black]
+line-length = 127
+
+[tool.isort]
+profile = "black"
+line_length = 127
+src_paths = ["stable_baselines3", "rl_zoo3"]
+
+[tool.pytype]
+inputs = ["."]
+# disable = []
+
+[tool.mypy]
+ignore_missing_imports = true
+follow_imports = "silent"
+show_error_codes = true
+exclude = """(?x)(
+    rl_zoo3/hyperparams_opt.py$
+    | rl_zoo3/exp_manager.py$
+  )"""
+
+[tool.pytest.ini_options]
+# Deterministic ordering for tests; useful for pytest-xdist.
+env = [
+	"PYTHONHASHSEED=0"
+]
+
+filterwarnings = [
+    # Tensorboard warnings
+    "ignore::DeprecationWarning:tensorboard",
+    # Gym warnings
+    "ignore::UserWarning:gym",
+]
+markers = [
+    "slow: marks tests as slow (deselect with '-m \"not slow\"')"
+]
diff --git a/requirements.txt b/requirements.txt
index 5bb2a0460..d33a536db 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 gym==0.21
-stable-baselines3[extra,tests,docs]>=1.8.0a2
-sb3-contrib>=1.8.0a2
+stable-baselines3[extra_no_roms,tests,docs]>=1.8.0a9
+sb3-contrib>=1.8.0a9
 box2d-py==2.3.8
 pybullet
 gym-minigrid
@@ -13,8 +13,8 @@ plotly
 panda-gym==1.1.1 # tmp fix: until compatibility with panda-gym v2
 rliable>=1.0.5
 wandb
-huggingface_sb3>=2.2.1, <3.*
+huggingface_sb3>=2.2.1
 seaborn
 tqdm
 rich
-importlib-metadata~=4.13 # flake8 not compatible with importlib-metadata>5.0
+ruff
diff --git a/rl-trained-agents b/rl-trained-agents
index 1e2a45e5d..1935484b7 160000
--- a/rl-trained-agents
+++ b/rl-trained-agents
@@ -1 +1 @@
-Subproject commit 1e2a45e5d06efd6cc15da6cf2d1939d72dcbdf87
+Subproject commit 1935484b7458967f77d4da1a50b14078d2520071
diff --git a/rl_zoo3/callbacks.py b/rl_zoo3/callbacks.py
index 556e9553a..53543df2b 100644
--- a/rl_zoo3/callbacks.py
+++ b/rl_zoo3/callbacks.py
@@ -102,12 +102,12 @@ class ParallelTrainCallback(BaseCallback):
 
     TODO:
     - blocking mode: wait for the model to finish updating the policy before collecting new experience
-        at the end of a rollout
+    at the end of a rollout
     - force sync mode: stop training to update to the latest policy for collecting
-        new experience
+    new experience
 
     :param gradient_steps: Number of gradient steps to do before
-        sending the new policy
+      sending the new policy
     :param verbose: Verbosity level
     :param sleep_time: Limit the fps in the thread collecting experience.
     """
diff --git a/rl_zoo3/exp_manager.py b/rl_zoo3/exp_manager.py
index 4503121b0..713bb100a 100644
--- a/rl_zoo3/exp_manager.py
+++ b/rl_zoo3/exp_manager.py
@@ -782,7 +782,7 @@ def objective(self, trial: optuna.Trial) -> float:
             print("============")
             print("Sampled hyperparams:")
             pprint(sampled_hyperparams)
-            raise optuna.exceptions.TrialPruned()
+            raise optuna.exceptions.TrialPruned() from e
         is_pruned = eval_callback.is_pruned
         reward = eval_callback.last_mean_reward
 
diff --git a/rl_zoo3/train.py b/rl_zoo3/train.py
index 1e52a5fc0..f8a6926da 100644
--- a/rl_zoo3/train.py
+++ b/rl_zoo3/train.py
@@ -204,10 +204,10 @@ def train() -> None:
     if args.track:
         try:
             import wandb
-        except ImportError:
+        except ImportError as e:
             raise ImportError(
                 "if you want to use Weights & Biases to track experiment, please install W&B via `pip install wandb`"
-            )
+            ) from e
 
         run_name = f"{args.env}__{args.algo}__{args.seed}__{int(time.time())}"
         tags = args.wandb_tags + [f"v{sb3.__version__}"]
diff --git a/rl_zoo3/utils.py b/rl_zoo3/utils.py
index f23265883..91e9071b9 100644
--- a/rl_zoo3/utils.py
+++ b/rl_zoo3/utils.py
@@ -314,7 +314,7 @@ def get_trained_models(log_folder: str) -> Dict[str, Tuple[str, str]]:
             args_files = glob.glob(os.path.join(log_folder, algo, model_folder, "*/args.yml"))
             if len(args_files) != 1:
                 continue  # we expect only one sub-folder with an args.yml file
-            with open(args_files[0], "r") as fh:
+            with open(args_files[0]) as fh:
                 env_id = yaml.load(fh, Loader=yaml.UnsafeLoader)["env"]
 
             model_name = ModelName(algo, EnvironmentName(env_id))
diff --git a/rl_zoo3/version.txt b/rl_zoo3/version.txt
index c3d22c01c..13ef2a834 100644
--- a/rl_zoo3/version.txt
+++ b/rl_zoo3/version.txt
@@ -1 +1 @@
-1.8.0a2
+1.8.0a9
diff --git a/rl_zoo3/wrappers.py b/rl_zoo3/wrappers.py
index e94e51a70..0634a7967 100644
--- a/rl_zoo3/wrappers.py
+++ b/rl_zoo3/wrappers.py
@@ -301,12 +301,12 @@ def __init__(self, env: gym.Env):
 
         env_id: str = env.unwrapped.spec.id
         # By default no masking
-        self.mask = np.ones_like((env.observation_space.sample()))
+        self.mask = np.ones_like(env.observation_space.sample())
         try:
             # Mask velocity
             self.mask[self.velocity_indices[env_id]] = 0.0
-        except KeyError:
-            raise NotImplementedError(f"Velocity masking not implemented for {env_id}")
+        except KeyError as e:
+            raise NotImplementedError(f"Velocity masking not implemented for {env_id}") from e
 
     def observation(self, observation: np.ndarray) -> np.ndarray:
         return observation * self.mask
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 14b7d4b95..000000000
--- a/setup.cfg
+++ /dev/null
@@ -1,47 +0,0 @@
-[metadata]
-# This includes the license file in the wheel.
-license_files = LICENSE
-
-[tool:pytest]
-filterwarnings =
-    # Tensorboard warnings
-    ignore::DeprecationWarning:tensorboard
-    # Gym warnings
-    ignore::UserWarning:gym
-markers =
-    slow: marks tests as slow (deselect with '-m "not slow"')
-    serial
-
-[pytype]
-inputs = .
-
-[flake8]
-# line breaks before and after binary operators
-ignore = W503,W504,E203,E231
-# Ignore import not used when aliases are defined
-per-file-ignores =
-  ./rl_zoo3/plots/all_plots.py:E501
-  ./rl_zoo3/plots/plot_train.py:E501
-
-exclude =
-    # No need to traverse our git directory
-    .git,
-    # There's no value in checking cache directories
-    __pycache__,
-max-complexity = 15
-# The GitHub editor is 127 chars wide
-max-line-length = 127
-
-[isort]
-profile = black
-line_length = 127
-src_paths = stable_baselines3,rl_zoo3
-
-[mypy]
-ignore_missing_imports = True
-follow_imports = silent
-show_error_codes = True
-exclude = (?x)(
-    rl_zoo3/hyperparams_opt.py$
-    | rl_zoo3/exp_manager.py$
-  )
diff --git a/setup.py b/setup.py
index 73ddc7a48..458a6ff2f 100644
--- a/setup.py
+++ b/setup.py
@@ -27,8 +27,8 @@
     },
     entry_points={"console_scripts": ["rl_zoo3=rl_zoo3.cli:main"]},
     install_requires=[
-        "sb3-contrib>=1.8.0a2",
-        "huggingface_sb3>=2.2.1, <3.*",
+        "sb3-contrib>=1.8.0a9",
+        "huggingface_sb3>=2.2.1",
         "tqdm",
         "rich",
         "optuna",