Skip to content

Commit

Permalink
Merge branch 'dev' into fix-improvements/adjust-speech-rate-or-speed
Browse files Browse the repository at this point in the history
  • Loading branch information
isikhi authored Jan 20, 2025
2 parents ed1563b + 420a02f commit c868390
Show file tree
Hide file tree
Showing 246 changed files with 4,895 additions and 5,157 deletions.
5 changes: 3 additions & 2 deletions .github/actions/setup-uv/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ runs:
using: 'composite'
steps:
- name: Install uv
uses: astral-sh/setup-uv@v4
uses: astral-sh/setup-uv@v5
with:
version: "0.5.4"
version: "0.5.17"
enable-cache: true
cache-dependency-glob: "**/pyproject.toml"
python-version: ${{ matrix.python-version }}
6 changes: 0 additions & 6 deletions .github/workflows/style_check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,9 @@ on:
jobs:
lint:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: [3.9]
steps:
- uses: actions/checkout@v4
- name: Setup uv
uses: ./.github/actions/setup-uv
- name: Set up Python ${{ matrix.python-version }}
run: uv python install ${{ matrix.python-version }}
- name: Lint check
run: make lint
59 changes: 46 additions & 13 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,19 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [3.9, "3.10", "3.11", "3.12"]
python-version: ["3.10", "3.11", "3.12"]
subset: ["data_tests", "inference_tests", "test_aux", "test_text"]
steps:
- uses: actions/checkout@v4
- name: Setup uv
uses: ./.github/actions/setup-uv
- name: Set up Python ${{ matrix.python-version }}
run: uv python install ${{ matrix.python-version }}
- name: Install Espeak
if: contains(fromJSON('["inference_tests", "test_text"]'), matrix.subset)
run: |
sudo apt-get update
sudo apt-get install espeak espeak-ng
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends git make gcc
make system-deps
- name: Install custom Trainer and/or Coqpit if requested
Expand All @@ -51,7 +48,7 @@ jobs:
- name: Unit tests
run: |
resolution=highest
if [ "${{ matrix.python-version }}" == "3.9" ]; then
if [ "${{ matrix.python-version }}" == "3.10" ]; then
resolution=lowest-direct
fi
uv run --resolution=$resolution --extra server --extra languages make ${{ matrix.subset }}
Expand All @@ -67,22 +64,19 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.12"]
subset: ["test_tts", "test_tts2", "test_vocoder", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]
python-version: ["3.10", "3.12"]
subset: ["test_tts", "test_tts2", "test_vocoder", "test_xtts"]
steps:
- uses: actions/checkout@v4
- name: Setup uv
uses: ./.github/actions/setup-uv
- name: Set up Python ${{ matrix.python-version }}
run: uv python install ${{ matrix.python-version }}
- name: Install Espeak
if: contains(fromJSON('["test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset)
if: contains(fromJSON('["test_tts", "test_tts2", "test_xtts"]'), matrix.subset)
run: |
sudo apt-get update
sudo apt-get install espeak espeak-ng
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends git make gcc
make system-deps
- name: Install custom Trainer and/or Coqpit if requested
Expand All @@ -96,7 +90,7 @@ jobs:
- name: Integration tests
run: |
resolution=highest
if [ "${{ matrix.python-version }}" == "3.9" ]; then
if [ "${{ matrix.python-version }}" == "3.10" ]; then
resolution=lowest-direct
fi
uv run --resolution=$resolution --extra server --extra languages make ${{ matrix.subset }}
Expand All @@ -107,9 +101,48 @@ jobs:
name: coverage-data-${{ matrix.subset }}-${{ matrix.python-version }}
path: .coverage.*
if-no-files-found: ignore
zoo:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.12"]
partition: ["0", "1", "2"]
steps:
- uses: actions/checkout@v4
- name: Setup uv
uses: ./.github/actions/setup-uv
- name: Install Espeak
run: |
sudo apt-get update
sudo apt-get install espeak espeak-ng
- name: Install dependencies
run: |
sudo apt-get install -y --no-install-recommends git make gcc
make system-deps
- name: Install custom Trainer and/or Coqpit if requested
run: |
if [[ -n "${{ github.event.inputs.trainer_branch }}" ]]; then
uv add git+https://github.com/idiap/coqui-ai-Trainer --branch ${{ github.event.inputs.trainer_branch }}
fi
if [[ -n "${{ github.event.inputs.coqpit_branch }}" ]]; then
uv add git+https://github.com/idiap/coqui-ai-coqpit --branch ${{ github.event.inputs.coqpit_branch }}
fi
- name: Zoo tests
run: uv run --extra server --extra languages make test_zoo
env:
NUM_PARTITIONS: 3
TEST_PARTITION: ${{ matrix.partition }}
- name: Upload coverage data
uses: actions/upload-artifact@v4
with:
include-hidden-files: true
name: coverage-data-zoo-${{ matrix.partition }}
path: .coverage.*
if-no-files-found: ignore
coverage:
if: always()
needs: [unit, integration]
needs: [unit, integration, zoo]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
Expand Down
8 changes: 2 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,9 @@ repos:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: "https://github.com/psf/black"
rev: 24.2.0
hooks:
- id: black
language_version: python3
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.0
rev: v0.9.1
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
- id: ruff-format
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
uv run make test_all # run all the tests, report all the errors
```

9. Format your code. We use ```black``` for code formatting.
9. Format your code. We use ```ruff``` for code formatting.

```bash
make style
Expand Down
41 changes: 17 additions & 24 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,55 +6,48 @@ help:

target_dirs := tests TTS notebooks recipes

test_all: ## run tests and don't stop on an error.
nose2 --with-coverage --coverage TTS tests
./run_bash_tests.sh

test: ## run tests.
coverage run -m nose2 -F -v -B tests
coverage run -m pytest -x -v --durations=0 tests

test_vocoder: ## run vocoder tests.
coverage run -m nose2 -F -v -B tests.vocoder_tests
coverage run -m pytest -x -v --durations=0 tests/vocoder_tests

test_tts: ## run tts tests.
coverage run -m nose2 -F -v -B tests.tts_tests
coverage run -m pytest -x -v --durations=0 tests/tts_tests

test_tts2: ## run tts tests.
coverage run -m nose2 -F -v -B tests.tts_tests2
coverage run -m pytest -x -v --durations=0 tests/tts_tests2

test_xtts:
coverage run -m nose2 -F -v -B tests.xtts_tests
coverage run -m pytest -x -v --durations=0 tests/xtts_tests

test_aux: ## run aux tests.
coverage run -m nose2 -F -v -B tests.aux_tests
./run_bash_tests.sh
coverage run -m pytest -x -v --durations=0 tests/aux_tests

test_zoo: ## run zoo tests.
coverage run -m pytest -x -v --durations=0 tests/zoo_tests/test_models.py

test_zoo0: ## run zoo tests.
coverage run -m nose2 -F -v -B tests.zoo_tests.test_models.test_models_offset_0_step_3 \
tests.zoo_tests.test_models.test_voice_conversion
test_zoo1: ## run zoo tests.
coverage run -m nose2 -F -v -B tests.zoo_tests.test_models.test_models_offset_1_step_3
test_zoo2: ## run zoo tests.
coverage run -m nose2 -F -v -B tests.zoo_tests.test_models.test_models_offset_2_step_3
test_zoo_big: ## run tests for models that are too big for CI.
coverage run -m pytest -x -v --durations=0 tests/zoo_tests/test_big_models.py

inference_tests: ## run inference tests.
coverage run -m nose2 -F -v -B tests.inference_tests
coverage run -m pytest -x -v --durations=0 tests/inference_tests

data_tests: ## run data tests.
coverage run -m nose2 -F -v -B tests.data_tests
coverage run -m pytest -x -v --durations=0 tests/data_tests

test_text: ## run text tests.
coverage run -m nose2 -F -v -B tests.text_tests
coverage run -m pytest -x -v --durations=0 tests/text_tests

test_failed: ## only run tests failed the last time.
coverage run -m nose2 -F -v -B tests
coverage run -m pytest -x -v --last-failed tests

style: ## update code style.
uv run --only-dev black ${target_dirs}
uv run --only-dev ruff format ${target_dirs}

lint: ## run linters.
uv run --only-dev ruff check ${target_dirs}
uv run --only-dev black ${target_dirs} --check
uv run --only-dev ruff format ${target_dirs} --check

system-deps: ## install linux system deps
sudo apt-get install -y libsndfile1-dev
Expand Down
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ repository are also still a useful source of information.

### Voice Conversion
- [FreeVC](https://arxiv.org/abs/2210.15418)
- [kNN-VC](https://doi.org/10.21437/Interspeech.2023-419)
- [OpenVoice](https://arxiv.org/abs/2312.01479)

### Others
Expand All @@ -115,7 +116,7 @@ You can also help us implement more models.
<!-- start installation -->
## Installation

🐸TTS is tested on Ubuntu 24.04 with **python >= 3.9, < 3.13**, but should also
🐸TTS is tested on Ubuntu 24.04 with **python >= 3.10, < 3.13**, but should also
work on Mac and Windows.

If you are only interested in [synthesizing speech](https://coqui-tts.readthedocs.io/en/latest/inference.html) with the pretrained 🐸TTS models, installing from PyPI is the easiest option.
Expand Down Expand Up @@ -170,7 +171,7 @@ You can also try out Coqui TTS without installation with the docker image.
Simply run the following command and you will be able to run TTS:

```bash
docker run --rm -it -p 5002:5002 --entrypoint /bin/bash ghcr.io/coqui-ai/tts-cpu
docker run --rm -it -p 5002:5002 --entrypoint /bin/bash ghcr.io/idiap/coqui-tts-cpu
python3 TTS/server/server.py --list_models #To get the list of available models
python3 TTS/server/server.py --model_name tts_models/en/vctk/vits # To start a server
```
Expand Down Expand Up @@ -234,7 +235,7 @@ tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path=OUTPUT_PATH)

#### Voice conversion (VC)

Converting the voice in `source_wav` to the voice of `target_wav`
Converting the voice in `source_wav` to the voice of `target_wav`:

```python
tts = TTS("voice_conversion_models/multilingual/vctk/freevc24").to("cuda")
Expand All @@ -246,9 +247,13 @@ tts.voice_conversion_to_file(
```

Other available voice conversion models:
- `voice_conversion_models/multilingual/multi-dataset/knnvc`
- `voice_conversion_models/multilingual/multi-dataset/openvoice_v1`
- `voice_conversion_models/multilingual/multi-dataset/openvoice_v2`

For more details, see the
[documentation](https://coqui-tts.readthedocs.io/en/latest/vc.html).

#### Voice cloning by combining single speaker TTS model with the default VC model

This way, you can clone voices by using any model in 🐸TTS. The FreeVC model is
Expand Down
26 changes: 26 additions & 0 deletions TTS/.models.json
Original file line number Diff line number Diff line change
Expand Up @@ -787,6 +787,22 @@
"license": "apache 2.0"
}
},
"librispeech100": {
"wavlm-hifigan": {
"description": "HiFiGAN vocoder for WavLM features from kNN-VC",
"github_rls_url": "https://github.com/idiap/coqui-ai-TTS/releases/download/v0.25.2_models/vocoder_models--en--librispeech100--wavlm-hifigan.zip",
"commit": "cfba7e0",
"author": "Benjamin van Niekerk @bshall, Matthew Baas @RF5",
"license": "MIT"
},
"wavlm-hifigan_prematched": {
"description": "Prematched HiFiGAN vocoder for WavLM features from kNN-VC",
"github_rls_url": "https://github.com/idiap/coqui-ai-TTS/releases/download/v0.25.2_models/vocoder_models--en--librispeech100--wavlm-hifigan_prematched.zip",
"commit": "cfba7e0",
"author": "Benjamin van Niekerk @bshall, Matthew Baas @RF5",
"license": "MIT"
}
},
"ljspeech": {
"multiband-melgan": {
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--en--ljspeech--multiband-melgan.zip",
Expand Down Expand Up @@ -927,18 +943,27 @@
"freevc24": {
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.13.0_models/voice_conversion_models--multilingual--vctk--freevc24.zip",
"description": "FreeVC model trained on VCTK dataset from https://github.com/OlaWod/FreeVC",
"default_vocoder": null,
"author": "Jing-Yi Li @OlaWod",
"license": "MIT",
"commit": null
}
},
"multi-dataset": {
"knnvc": {
"description": "kNN-VC model from https://github.com/bshall/knn-vc",
"default_vocoder": "vocoder_models/en/librispeech100/wavlm-hifigan_prematched",
"author": "Benjamin van Niekerk @bshall, Matthew Baas @RF5",
"license": "MIT",
"commit": null
},
"openvoice_v1": {
"hf_url": [
"https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/converter/config.json",
"https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/converter/checkpoint.pth"
],
"description": "OpenVoice VC model from https://huggingface.co/myshell-ai/OpenVoiceV2",
"default_vocoder": null,
"author": "MyShell.ai",
"license": "MIT",
"commit": null
Expand All @@ -949,6 +974,7 @@
"https://huggingface.co/myshell-ai/OpenVoiceV2/resolve/main/converter/checkpoint.pth"
],
"description": "OpenVoice VC model from https://huggingface.co/myshell-ai/OpenVoiceV2",
"default_vocoder": null,
"author": "MyShell.ai",
"license": "MIT",
"commit": null
Expand Down
Loading

0 comments on commit c868390

Please sign in to comment.