Merge branch 'dev' into fix-improvements/adjust-speech-rate-or-speed

idiap · Jan 20, 2025 · c868390 · c868390
2 parents ed1563b + 420a02f
commit c868390
Show file tree

Hide file tree

Showing 246 changed files with 4,895 additions and 5,157 deletions.
diff --git a/.github/actions/setup-uv/action.yml b/.github/actions/setup-uv/action.yml
@@ -4,8 +4,9 @@ runs:
   using: 'composite'
   steps:
     - name: Install uv
-      uses: astral-sh/setup-uv@v4
+      uses: astral-sh/setup-uv@v5
       with:
-        version: "0.5.4"
+        version: "0.5.17"
         enable-cache: true
         cache-dependency-glob: "**/pyproject.toml"
+        python-version: ${{ matrix.python-version }}
diff --git a/.github/workflows/style_check.yml b/.github/workflows/style_check.yml
@@ -9,15 +9,9 @@ on:
 jobs:
   lint:
     runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: [3.9]
     steps:
       - uses: actions/checkout@v4
       - name: Setup uv
         uses: ./.github/actions/setup-uv
-      - name: Set up Python ${{ matrix.python-version }}
-        run: uv python install ${{ matrix.python-version }}
       - name: Lint check
         run: make lint
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -22,22 +22,19 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.9, "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12"]
         subset: ["data_tests", "inference_tests", "test_aux", "test_text"]
     steps:
       - uses: actions/checkout@v4
       - name: Setup uv
         uses: ./.github/actions/setup-uv
-      - name: Set up Python ${{ matrix.python-version }}
-        run: uv python install ${{ matrix.python-version }}
       - name: Install Espeak
         if: contains(fromJSON('["inference_tests", "test_text"]'), matrix.subset)
         run: |
           sudo apt-get update
           sudo apt-get install espeak espeak-ng
       - name: Install dependencies
         run: |
-          sudo apt-get update
           sudo apt-get install -y --no-install-recommends git make gcc
           make system-deps
       - name: Install custom Trainer and/or Coqpit if requested
@@ -51,7 +48,7 @@ jobs:
       - name: Unit tests
         run: |
           resolution=highest
-          if [ "${{ matrix.python-version }}" == "3.9" ]; then
+          if [ "${{ matrix.python-version }}" == "3.10" ]; then
             resolution=lowest-direct
           fi
           uv run --resolution=$resolution --extra server --extra languages make ${{ matrix.subset }}
@@ -67,22 +64,19 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.9", "3.12"]
-        subset: ["test_tts", "test_tts2", "test_vocoder", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]
+        python-version: ["3.10", "3.12"]
+        subset: ["test_tts", "test_tts2", "test_vocoder", "test_xtts"]
     steps:
       - uses: actions/checkout@v4
       - name: Setup uv
         uses: ./.github/actions/setup-uv
-      - name: Set up Python ${{ matrix.python-version }}
-        run: uv python install ${{ matrix.python-version }}
       - name: Install Espeak
-        if: contains(fromJSON('["test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset)
+        if: contains(fromJSON('["test_tts", "test_tts2", "test_xtts"]'), matrix.subset)
         run: |
           sudo apt-get update
           sudo apt-get install espeak espeak-ng
       - name: Install dependencies
         run: |
-          sudo apt-get update
           sudo apt-get install -y --no-install-recommends git make gcc
           make system-deps
       - name: Install custom Trainer and/or Coqpit if requested
@@ -96,7 +90,7 @@ jobs:
       - name: Integration tests
         run: |
           resolution=highest
-          if [ "${{ matrix.python-version }}" == "3.9" ]; then
+          if [ "${{ matrix.python-version }}" == "3.10" ]; then
             resolution=lowest-direct
           fi
           uv run --resolution=$resolution --extra server --extra languages make ${{ matrix.subset }}
@@ -107,9 +101,48 @@ jobs:
           name: coverage-data-${{ matrix.subset }}-${{ matrix.python-version }}
           path: .coverage.*
           if-no-files-found: ignore
+  zoo:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12"]
+        partition: ["0", "1", "2"]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup uv
+        uses: ./.github/actions/setup-uv
+      - name: Install Espeak
+        run: |
+          sudo apt-get update
+          sudo apt-get install espeak espeak-ng
+      - name: Install dependencies
+        run: |
+          sudo apt-get install -y --no-install-recommends git make gcc
+          make system-deps
+      - name: Install custom Trainer and/or Coqpit if requested
+        run: |
+          if [[ -n "${{ github.event.inputs.trainer_branch }}" ]]; then
+            uv add git+https://github.com/idiap/coqui-ai-Trainer --branch ${{ github.event.inputs.trainer_branch }}
+          fi
+          if [[ -n "${{ github.event.inputs.coqpit_branch }}" ]]; then
+            uv add git+https://github.com/idiap/coqui-ai-coqpit --branch ${{ github.event.inputs.coqpit_branch }}
+          fi
+      - name: Zoo tests
+        run: uv run --extra server --extra languages make test_zoo
+        env:
+          NUM_PARTITIONS: 3
+          TEST_PARTITION: ${{ matrix.partition }}
+      - name: Upload coverage data
+        uses: actions/upload-artifact@v4
+        with:
+          include-hidden-files: true
+          name: coverage-data-zoo-${{ matrix.partition }}
+          path: .coverage.*
+          if-no-files-found: ignore
   coverage:
     if: always()
-    needs: [unit, integration]
+    needs: [unit, integration, zoo]
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,13 +7,9 @@ repos:
       - id: check-yaml
       - id: end-of-file-fixer
       - id: trailing-whitespace
-  - repo: "https://github.com/psf/black"
-    rev: 24.2.0
-    hooks:
-      - id: black
-        language_version: python3
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.7.0
+    rev: v0.9.1
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]
+      - id: ruff-format
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -88,7 +88,7 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
     uv run make test_all  # run all the tests, report all the errors
     ```
 
-9. Format your code. We use ```black``` for code formatting.
+9. Format your code. We use ```ruff``` for code formatting.
 
     ```bash
     make style

diff --git a/Makefile b/Makefile
@@ -6,55 +6,48 @@ help:
 
 target_dirs := tests TTS notebooks recipes
 
-test_all:	## run tests and don't stop on an error.
-	nose2 --with-coverage --coverage TTS tests
-	./run_bash_tests.sh
-
 test:	## run tests.
-	coverage run -m nose2 -F -v -B tests
+	coverage run -m pytest -x -v --durations=0 tests
 
 test_vocoder:	## run vocoder tests.
-	coverage run -m nose2 -F -v -B tests.vocoder_tests
+	coverage run -m pytest -x -v --durations=0 tests/vocoder_tests
 
 test_tts:	## run tts tests.
-	coverage run -m nose2 -F -v -B tests.tts_tests
+	coverage run -m pytest -x -v --durations=0 tests/tts_tests
 
 test_tts2:	## run tts tests.
-	coverage run -m nose2 -F -v -B tests.tts_tests2
+	coverage run -m pytest -x -v --durations=0 tests/tts_tests2
 
 test_xtts:
-	coverage run -m nose2 -F -v -B tests.xtts_tests
+	coverage run -m pytest -x -v --durations=0 tests/xtts_tests
 
 test_aux:	## run aux tests.
-	coverage run -m nose2 -F -v -B tests.aux_tests
-	./run_bash_tests.sh
+	coverage run -m pytest -x -v --durations=0 tests/aux_tests
+
+test_zoo:	## run zoo tests.
+	coverage run -m pytest -x -v --durations=0 tests/zoo_tests/test_models.py
 
-test_zoo0:	## run zoo tests.
-	coverage run -m nose2 -F -v -B tests.zoo_tests.test_models.test_models_offset_0_step_3 \
-	tests.zoo_tests.test_models.test_voice_conversion
-test_zoo1:	## run zoo tests.
-	coverage run -m nose2 -F -v -B tests.zoo_tests.test_models.test_models_offset_1_step_3
-test_zoo2:	## run zoo tests.
-	coverage run -m nose2 -F -v -B tests.zoo_tests.test_models.test_models_offset_2_step_3
+test_zoo_big:	## run tests for models that are too big for CI.
+	coverage run -m pytest -x -v --durations=0 tests/zoo_tests/test_big_models.py
 
 inference_tests: ## run inference tests.
-	coverage run -m nose2 -F -v -B tests.inference_tests
+	coverage run -m pytest -x -v --durations=0 tests/inference_tests
 
 data_tests: ## run data tests.
-	coverage run -m nose2 -F -v -B tests.data_tests
+	coverage run -m pytest -x -v --durations=0 tests/data_tests
 
 test_text: ## run text tests.
-	coverage run -m nose2 -F -v -B tests.text_tests
+	coverage run -m pytest -x -v --durations=0 tests/text_tests
 
 test_failed:  ## only run tests failed the last time.
-	coverage run -m nose2 -F -v -B tests
+	coverage run -m pytest -x -v --last-failed tests
 
 style:	## update code style.
-	uv run --only-dev black ${target_dirs}
+	uv run --only-dev ruff format ${target_dirs}
 
 lint:	## run linters.
 	uv run --only-dev ruff check ${target_dirs}
-	uv run --only-dev black ${target_dirs} --check
+	uv run --only-dev ruff format ${target_dirs} --check
 
 system-deps:	## install linux system deps
 	sudo apt-get install -y libsndfile1-dev

diff --git a/README.md b/README.md
@@ -98,6 +98,7 @@ repository are also still a useful source of information.
 
 ### Voice Conversion
 - [FreeVC](https://arxiv.org/abs/2210.15418)
+- [kNN-VC](https://doi.org/10.21437/Interspeech.2023-419)
 - [OpenVoice](https://arxiv.org/abs/2312.01479)
 
 ### Others
@@ -115,7 +116,7 @@ You can also help us implement more models.
 <!-- start installation -->
 ## Installation
 
-🐸TTS is tested on Ubuntu 24.04 with **python >= 3.9, < 3.13**, but should also
+🐸TTS is tested on Ubuntu 24.04 with **python >= 3.10, < 3.13**, but should also
 work on Mac and Windows.
 
 If you are only interested in [synthesizing speech](https://coqui-tts.readthedocs.io/en/latest/inference.html) with the pretrained 🐸TTS models, installing from PyPI is the easiest option.
@@ -170,7 +171,7 @@ You can also try out Coqui TTS without installation with the docker image.
 Simply run the following command and you will be able to run TTS:
 
 ```bash
-docker run --rm -it -p 5002:5002 --entrypoint /bin/bash ghcr.io/coqui-ai/tts-cpu
+docker run --rm -it -p 5002:5002 --entrypoint /bin/bash ghcr.io/idiap/coqui-tts-cpu
 python3 TTS/server/server.py --list_models #To get the list of available models
 python3 TTS/server/server.py --model_name tts_models/en/vctk/vits # To start a server
 ```
@@ -234,7 +235,7 @@ tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path=OUTPUT_PATH)
 
 #### Voice conversion (VC)
 
-Converting the voice in `source_wav` to the voice of `target_wav`
+Converting the voice in `source_wav` to the voice of `target_wav`:
 
 ```python
 tts = TTS("voice_conversion_models/multilingual/vctk/freevc24").to("cuda")
@@ -246,9 +247,13 @@ tts.voice_conversion_to_file(
 ```
 
 Other available voice conversion models:
+- `voice_conversion_models/multilingual/multi-dataset/knnvc`
 - `voice_conversion_models/multilingual/multi-dataset/openvoice_v1`
 - `voice_conversion_models/multilingual/multi-dataset/openvoice_v2`
 
+For more details, see the
+[documentation](https://coqui-tts.readthedocs.io/en/latest/vc.html).
+
 #### Voice cloning by combining single speaker TTS model with the default VC model
 
 This way, you can clone voices by using any model in 🐸TTS. The FreeVC model is

diff --git a/TTS/.models.json b/TTS/.models.json
@@ -787,6 +787,22 @@
                     "license": "apache 2.0"
                 }
             },
+            "librispeech100": {
+                "wavlm-hifigan": {
+                    "description": "HiFiGAN vocoder for WavLM features from kNN-VC",
+                    "github_rls_url": "https://github.com/idiap/coqui-ai-TTS/releases/download/v0.25.2_models/vocoder_models--en--librispeech100--wavlm-hifigan.zip",
+                    "commit": "cfba7e0",
+                    "author": "Benjamin van Niekerk @bshall, Matthew Baas @RF5",
+                    "license": "MIT"
+                },
+                "wavlm-hifigan_prematched": {
+                    "description": "Prematched HiFiGAN vocoder for WavLM features from kNN-VC",
+                    "github_rls_url": "https://github.com/idiap/coqui-ai-TTS/releases/download/v0.25.2_models/vocoder_models--en--librispeech100--wavlm-hifigan_prematched.zip",
+                    "commit": "cfba7e0",
+                    "author": "Benjamin van Niekerk @bshall, Matthew Baas @RF5",
+                    "license": "MIT"
+                }
+            },
             "ljspeech": {
                 "multiband-melgan": {
                     "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--en--ljspeech--multiband-melgan.zip",
@@ -927,18 +943,27 @@
                 "freevc24": {
                     "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.13.0_models/voice_conversion_models--multilingual--vctk--freevc24.zip",
                     "description": "FreeVC model trained on VCTK dataset from https://github.com/OlaWod/FreeVC",
+                    "default_vocoder": null,
                     "author": "Jing-Yi Li @OlaWod",
                     "license": "MIT",
                     "commit": null
                 }
             },
             "multi-dataset": {
+                "knnvc": {
+                    "description": "kNN-VC model from https://github.com/bshall/knn-vc",
+                    "default_vocoder": "vocoder_models/en/librispeech100/wavlm-hifigan_prematched",
+                    "author": "Benjamin van Niekerk @bshall, Matthew Baas @RF5",
+                    "license": "MIT",
+                    "commit": null
+                },
                 "openvoice_v1": {
                     "hf_url": [
                         "https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/converter/config.json",
                         "https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/converter/checkpoint.pth"
                     ],
                     "description": "OpenVoice VC model from https://huggingface.co/myshell-ai/OpenVoiceV2",
+                    "default_vocoder": null,
                     "author": "MyShell.ai",
                     "license": "MIT",
                     "commit": null
@@ -949,6 +974,7 @@
                         "https://huggingface.co/myshell-ai/OpenVoiceV2/resolve/main/converter/checkpoint.pth"
                     ],
                     "description": "OpenVoice VC model from https://huggingface.co/myshell-ai/OpenVoiceV2",
+                    "default_vocoder": null,
                     "author": "MyShell.ai",
                     "license": "MIT",
                     "commit": null