From d8bb2d3bfeb9f1d9459a68d772a259f5b78861c0 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sat, 11 Jan 2025 02:25:53 +0100 Subject: [PATCH] fix(configs): update config field types --- TTS/tts/configs/align_tts_config.py | 8 ++++---- TTS/tts/configs/fast_pitch_config.py | 6 +++--- TTS/tts/configs/fast_speech_config.py | 6 +++--- TTS/tts/configs/fastspeech2_config.py | 8 ++++---- TTS/tts/configs/glow_tts_config.py | 8 ++++---- TTS/tts/configs/speedy_speech_config.py | 6 +++--- TTS/tts/configs/tacotron_config.py | 6 +++--- TTS/tts/configs/vits_config.py | 2 +- TTS/vocoder/configs/multiband_melgan_config.py | 2 +- TTS/vocoder/configs/shared_configs.py | 2 +- 10 files changed, 27 insertions(+), 27 deletions(-) diff --git a/TTS/tts/configs/align_tts_config.py b/TTS/tts/configs/align_tts_config.py index 2224396d1e..0d323e9a69 100644 --- a/TTS/tts/configs/align_tts_config.py +++ b/TTS/tts/configs/align_tts_config.py @@ -69,7 +69,7 @@ class AlignTTSConfig(BaseTTSConfig): model: str = "align_tts" # model specific params model_args: AlignTTSArgs = field(default_factory=AlignTTSArgs) - phase_start_steps: list[int] = None + phase_start_steps: list[int] | None = None ssim_alpha: float = 1.0 spec_loss_alpha: float = 1.0 @@ -79,13 +79,13 @@ class AlignTTSConfig(BaseTTSConfig): # multi-speaker settings use_speaker_embedding: bool = False use_d_vector_file: bool = False - d_vector_file: str = False + d_vector_file: str | None = None # optimizer parameters optimizer: str = "Adam" optimizer_params: dict = field(default_factory=lambda: {"betas": [0.9, 0.998], "weight_decay": 1e-6}) - lr_scheduler: str = None - lr_scheduler_params: dict = None + lr_scheduler: str | None = None + lr_scheduler_params: dict | None = None lr: float = 1e-4 grad_clip: float = 5.0 diff --git a/TTS/tts/configs/fast_pitch_config.py b/TTS/tts/configs/fast_pitch_config.py index 5b50122e09..86863b54af 100644 --- a/TTS/tts/configs/fast_pitch_config.py +++ b/TTS/tts/configs/fast_pitch_config.py @@ -116,10 +116,10 @@ class FastPitchConfig(BaseTTSConfig): # multi-speaker settings num_speakers: int = 0 - speakers_file: str = None + speakers_file: str | None = None use_speaker_embedding: bool = False use_d_vector_file: bool = False - d_vector_file: str = False + d_vector_file: str | None = None d_vector_dim: int = 0 # optimizer parameters @@ -149,7 +149,7 @@ class FastPitchConfig(BaseTTSConfig): # dataset configs compute_f0: bool = True - f0_cache_path: str = None + f0_cache_path: str | None = None # testing test_sentences: list[str] = field( diff --git a/TTS/tts/configs/fast_speech_config.py b/TTS/tts/configs/fast_speech_config.py index f375292256..099419df51 100644 --- a/TTS/tts/configs/fast_speech_config.py +++ b/TTS/tts/configs/fast_speech_config.py @@ -110,10 +110,10 @@ class FastSpeechConfig(BaseTTSConfig): # multi-speaker settings num_speakers: int = 0 - speakers_file: str = None + speakers_file: str | None = None use_speaker_embedding: bool = False use_d_vector_file: bool = False - d_vector_file: str = False + d_vector_file: str | None = None d_vector_dim: int = 0 # optimizer parameters @@ -143,7 +143,7 @@ class FastSpeechConfig(BaseTTSConfig): # dataset configs compute_f0: bool = False - f0_cache_path: str = None + f0_cache_path: str | None = None # testing test_sentences: list[str] = field( diff --git a/TTS/tts/configs/fastspeech2_config.py b/TTS/tts/configs/fastspeech2_config.py index 3d6ce4f4b3..7b16085a44 100644 --- a/TTS/tts/configs/fastspeech2_config.py +++ b/TTS/tts/configs/fastspeech2_config.py @@ -126,10 +126,10 @@ class Fastspeech2Config(BaseTTSConfig): # multi-speaker settings num_speakers: int = 0 - speakers_file: str = None + speakers_file: str | None = None use_speaker_embedding: bool = False use_d_vector_file: bool = False - d_vector_file: str = False + d_vector_file: str | None = None d_vector_dim: int = 0 # optimizer parameters @@ -160,11 +160,11 @@ class Fastspeech2Config(BaseTTSConfig): # dataset configs compute_f0: bool = True - f0_cache_path: str = None + f0_cache_path: str | None = None # dataset configs compute_energy: bool = True - energy_cache_path: str = None + energy_cache_path: str | None = None # testing test_sentences: list[str] = field( diff --git a/TTS/tts/configs/glow_tts_config.py b/TTS/tts/configs/glow_tts_config.py index 34b4057093..cb88e1b8db 100644 --- a/TTS/tts/configs/glow_tts_config.py +++ b/TTS/tts/configs/glow_tts_config.py @@ -100,7 +100,7 @@ class GlowTTSConfig(BaseTTSConfig): model: str = "glow_tts" # model params - num_chars: int = None + num_chars: int | None = None encoder_type: str = "rel_pos_transformer" encoder_params: dict = field( default_factory=lambda: { @@ -146,15 +146,15 @@ class GlowTTSConfig(BaseTTSConfig): data_dep_init_steps: int = 10 # inference params - style_wav_for_test: str = None + style_wav_for_test: str | None = None inference_noise_scale: float = 0.0 length_scale: float = 1.0 # multi-speaker settings use_speaker_embedding: bool = False - speakers_file: str = None + speakers_file: str | None = None use_d_vector_file: bool = False - d_vector_file: str = False + d_vector_file: str | None = None # optimizer parameters optimizer: str = "RAdam" diff --git a/TTS/tts/configs/speedy_speech_config.py b/TTS/tts/configs/speedy_speech_config.py index 29221d7b25..e7446100e7 100644 --- a/TTS/tts/configs/speedy_speech_config.py +++ b/TTS/tts/configs/speedy_speech_config.py @@ -128,10 +128,10 @@ class SpeedySpeechConfig(BaseTTSConfig): # multi-speaker settings num_speakers: int = 0 - speakers_file: str = None + speakers_file: str | None = None use_speaker_embedding: bool = False use_d_vector_file: bool = False - d_vector_file: str = False + d_vector_file: str | None = None d_vector_dim: int = 0 # optimizer parameters @@ -160,7 +160,7 @@ class SpeedySpeechConfig(BaseTTSConfig): # dataset configs compute_f0: bool = False - f0_cache_path: str = None + f0_cache_path: str | None = None # testing test_sentences: list[str] = field( diff --git a/TTS/tts/configs/tacotron_config.py b/TTS/tts/configs/tacotron_config.py index e4b419d1fa..ed2aec14a6 100644 --- a/TTS/tts/configs/tacotron_config.py +++ b/TTS/tts/configs/tacotron_config.py @@ -169,7 +169,7 @@ class TacotronConfig(BaseTTSConfig): # attention layers attention_type: str = "original" - attention_heads: int = None + attention_heads: int | None = None attention_norm: str = "sigmoid" attention_win: bool = False windowing: bool = False @@ -188,8 +188,8 @@ class TacotronConfig(BaseTTSConfig): use_speaker_embedding: bool = False speaker_embedding_dim: int = 512 use_d_vector_file: bool = False - d_vector_file: str = False - d_vector_dim: int = None + d_vector_file: str | None = None + d_vector_dim: int | None = None # optimizer parameters optimizer: str = "RAdam" diff --git a/TTS/tts/configs/vits_config.py b/TTS/tts/configs/vits_config.py index d85684c721..9ad720da30 100644 --- a/TTS/tts/configs/vits_config.py +++ b/TTS/tts/configs/vits_config.py @@ -145,7 +145,7 @@ class VitsConfig(BaseTTSConfig): add_blank: bool = True # testing - test_sentences: list[list] = field( + test_sentences: list[str] | list[list[str]] = field( default_factory=lambda: [ ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."], ["Be a voice, not an echo."], diff --git a/TTS/vocoder/configs/multiband_melgan_config.py b/TTS/vocoder/configs/multiband_melgan_config.py index 763113537f..2139f47b0e 100644 --- a/TTS/vocoder/configs/multiband_melgan_config.py +++ b/TTS/vocoder/configs/multiband_melgan_config.py @@ -121,7 +121,7 @@ class MultibandMelganConfig(BaseGANVocoderConfig): pad_short: int = 2000 use_noise_augment: bool = False use_cache: bool = True - steps_to_start_discriminator: bool = 200000 + steps_to_start_discriminator: int = 200000 # LOSS PARAMETERS - overrides use_stft_loss: bool = True diff --git a/TTS/vocoder/configs/shared_configs.py b/TTS/vocoder/configs/shared_configs.py index a558cfcabb..98c925a380 100644 --- a/TTS/vocoder/configs/shared_configs.py +++ b/TTS/vocoder/configs/shared_configs.py @@ -168,7 +168,7 @@ class BaseGANVocoderConfig(BaseVocoderConfig): target_loss: str = "loss_0" # loss value to pick the best model to save after each epoch # optimizer - grad_clip: float = field(default_factory=lambda: [5, 5]) + grad_clip: float | list[float] = field(default_factory=lambda: [5, 5]) lr_gen: float = 0.0002 # Initial learning rate. lr_disc: float = 0.0002 # Initial learning rate. lr_scheduler_gen: str = "ExponentialLR" # one of the schedulers from https:#pytorch.org/docs/stable/optim.html