Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hifi_FS2 #15

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions configs/default.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
data:
data_dir: 'H:\Deepsync\backup\fastspeech\data\'
wav_dir: 'H:\Deepsync\backup\deepsync\LJSpeech-1.1\wavs\'
data_dir: '/workspace/data/'
wav_dir: '/workspace/LJSpeech-1.1/wavs/'
# Compute statistics
e_mean: 21.578571319580078
e_std: 18.916799545288086
Expand Down Expand Up @@ -106,10 +106,12 @@ model:


train:
discriminator_start: 20000
rep_discriminator: 1
# optimization related
eos: False #True
opt: 'noam'
accum_grad: 4
accum_grad: 1
grad_clip: 1.0
weight_decay: 0.001
patience: 0
Expand All @@ -125,7 +127,7 @@ train:
seed: 1 # random seed number
resume: "" # the snapshot path to resume (if set empty, no effect)
use_phonemes: True
batch_size : 16
batch_size : 24
# other
melgan_vocoder : True
save_interval : 1000
Expand All @@ -134,4 +136,4 @@ train:
summary_interval : 200
validation_step : 500
tts_max_mel_len : 870 # if you have a couple of extremely long spectrograms you might want to use this
tts_bin_lengths : True # bins the spectrogram lengths before sampling in data loader - speeds up training
tts_bin_lengths : True # bins the spectrogram lengths before sampling in data loader - speeds up training
62 changes: 62 additions & 0 deletions core/discriminator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import torch
import torch.nn as nn
import torch.nn.functional as F


class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()

self.discriminator = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=3, stride=1, padding = 1),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(16, 32, kernel_size=3, stride=1, padding = 1),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding = 1),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(64, 1, kernel_size=3, stride=1, padding = 1)
#nn.Flatten(), # add conv2d a 1 channel
#nn.Linear(46240,256)
)

def forward(self, x):
'''
we directly predict score without last sigmoid function
since we're using Least Squares GAN (https://arxiv.org/abs/1611.04076)
'''
# print(x.shape, "Input to Discriminator")
return self.discriminator(x)

def weights_init(m):
classname = m.__class__.__name__
if classname.find("Conv") != -1:
m.weight.data.normal_(0.0, 0.02)
elif classname.find("BatchNorm2d") != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)

class SFDiscriminator(nn.Module):
def __init__(self):
super().__init__()
self.disc1 = Discriminator()
self.disc2 = Discriminator()
self.disc3 = Discriminator()
self.apply(weights_init)
def forward(self, x, start):
results = []
results.append(self.disc1(x[:, :, start: start + 40, 0:40]))
results.append(self.disc2(x[:, :, start: start + 40, 20:60]))
results.append(self.disc3(x[:, :, start: start + 40, 40:80, ]))
return results

if __name__ == '__main__':
model = SFDiscriminator()

x = torch.randn(16, 1, 40, 80)
print(x.shape)

out = model(x)
print(len(out), "Shape of output")

pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(pytorch_total_params)
Loading