Skip to content

Commit

Permalink
Merge pull request Idlak#4 from Idlak/dev_trainfix2
Browse files Browse the repository at this point in the history
Reverted nnet training utils / scripts to previous version
  • Loading branch information
dabraude authored Jun 15, 2018
2 parents 0dcf922 + 442dd3b commit 0210345
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 15 deletions.
6 changes: 5 additions & 1 deletion egs/wsj/s5/steps/nnet/train_scheduler.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ l2_penalty=0
train_tool="nnet-train-frmshuff"
train_tool_opts="--minibatch-size=256 --randomizer-size=32768 --randomizer-seed=777"
feature_transform=
output_feature_transform=

split_feats= # int -> number of splits 'feats.scp -> feats.${i}.scp', starting from feats.1.scp,
# (data are alredy shuffled and split to N parts),
Expand Down Expand Up @@ -84,6 +85,7 @@ mlp_base=${mlp_init##*/}; mlp_base=${mlp_base%.*}
log=$dir/log/iter00.initial.log; hostname>$log
$train_tool --cross-validate=true --randomize=false --verbose=$verbose $train_tool_opts \
${feature_transform:+ --feature-transform=$feature_transform} \
${output_feature_transform:+ --output-feature-transform=$output_feature_transform} \
${frame_weights:+ "--frame-weights=$frame_weights"} \
${utt_weights:+ "--utt-weights=$utt_weights"} \
"$feats_cv" "$labels_cv" $mlp_best \
Expand Down Expand Up @@ -125,6 +127,7 @@ for iter in $(seq -w $max_iters); do
--learn-rate=$learn_rate --momentum=$momentum \
--l1-penalty=$l1_penalty --l2-penalty=$l2_penalty \
${feature_transform:+ --feature-transform=$feature_transform} \
${output_feature_transform:+ --output-feature-transform=$output_feature_transform} \
${frame_weights:+ "--frame-weights=$frame_weights"} \
${utt_weights:+ "--utt-weights=$utt_weights"} \
"$feats_tr_portion" "$labels_tr" $mlp_best $mlp_next \
Expand All @@ -137,6 +140,7 @@ for iter in $(seq -w $max_iters); do
log=$dir/log/iter${iter}.cv.log; hostname>$log
$train_tool --cross-validate=true --randomize=false --verbose=$verbose $train_tool_opts \
${feature_transform:+ --feature-transform=$feature_transform} \
${output_feature_transform:+ --output-feature-transform=$output_feature_transform} \
${frame_weights:+ "--frame-weights=$frame_weights"} \
${utt_weights:+ "--utt-weights=$utt_weights"} \
"$feats_cv" "$labels_cv" $mlp_next \
Expand All @@ -147,7 +151,7 @@ for iter in $(seq -w $max_iters); do

# accept or reject?
loss_prev=$loss
if [ 1 == $(awk "BEGIN{print($loss_new < $loss ? 1:0);}") -o $iter -le $keep_lr_iters -o $iter -le $min_iters ]; then
if [ 1 == $(awk "BEGIN{print($loss_new < $loss ? 1:0);}") -o $iter -le $keep_lr_iters ]; then
# accepting: the loss was better, or we had fixed learn-rate, or we had fixed epoch-number,
loss=$loss_new
mlp_best=$dir/nnet/${mlp_base}_iter${iter}_learnrate${learn_rate}_tr$(printf "%.4f" $tr_loss)_cv$(printf "%.4f" $loss_new)
Expand Down
54 changes: 51 additions & 3 deletions egs/wsj/s5/utils/nnet/make_lstm_proto.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,51 @@

# Generated Nnet prototype, to be initialized by 'nnet-initialize'.

import sys
import sys, math

###
### Parse options
###
from optparse import OptionParser
usage="%prog [options] <feat-dim> <num-leaves> >nnet-proto-file"
parser = OptionParser(usage)
# Softmax related,
parser.add_option('--no-softmax', dest='with_softmax',
help='Do not put <SoftMax> in the prototype [default: %default]',
default=True, action='store_false');
parser.add_option('--block-softmax-dims', dest='block_softmax_dims',
help='Generate <BlockSoftmax> with dims D1:D2:D3 [default: %default]',
default="", type='string');
# Required,
parser.add_option('--cell-dim', dest='cell_dim', type='int', default=320,
help='Number of cells for one direction in LSTM [default: %default]');
parser.add_option('--proj-dim', dest='proj_dim', type='int', default=400,
help='Number of LSTM recurrent units [default: %default]');
parser.add_option('--num-layers', dest='num_layers', type='int', default=2,
help='Number of LSTM layers [default: %default]');
# Activation related,
parser.add_option('--activation-final', dest='activation_final',
help='If set, outputs an activation layer as final layer [default: %default]',
default=False, action='store_true');
parser.add_option('--activation-type', dest='activation_type',
help='Select type of activation function : (<Sigmoid>|<Tanh>|<ParametricRelu>) [default: %default]',
default='<Tanh>', type='string');
parser.add_option('--activation-opts', dest='activation_opts',
help='Additional options for protoype of activation function [default: %default]',
default='', type='string');
# Affine-transform related,
parser.add_option('--hid-bias-mean', dest='hid_bias_mean',
help='Set bias for hidden activations [default: %default]',
default=-2.0, type='float');
parser.add_option('--hid-bias-range', dest='hid_bias_range',
help='Set bias range for hidden activations (+/- 1/2 range around mean) [default: %default]',
default=4.0, type='float');
parser.add_option('--param-stddev-factor', dest='param_stddev_factor',
help='Factor to rescale Normal distriburtion for initalizing weight matrices [default: %default]',
default=0.1, type='float');
parser.add_option('--no-glorot-scaled-stddev', dest='with_glorot',
help='Generate normalized weights according to X.Glorot paper, but mapping U->N with same variance (factor sqrt(x/(dim_in+dim_out)))',
action='store_false', default=True);
# Optional (default == 'None'),
parser.add_option('--lstm-param-range', dest='lstm_param_range', type='float',
help='Range of initial LSTM parameters [default: %default]');
Expand All @@ -54,6 +84,15 @@

(feat_dim, num_leaves) = map(int,args);


# Optionaly scale
def Glorot(dim1, dim2):
if o.with_glorot:
# 35.0 = magic number, gives ~1.0 in inner layers for hid-dim 1024dim,
return 35.0 * math.sqrt(2.0/(dim1+dim2));
else:
return 1.0

# Original prototype from Jiayu,
#<NnetProto>
#<Transmit> <InputDim> 40 <OutputDim> 40
Expand All @@ -78,9 +117,18 @@
print "<LstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (o.proj_dim, o.proj_dim, o.cell_dim) + lstm_extra_opts

# Adding <Tanh> for more stability,
#print "%s <InputDim> %d <OutputDim> %d %s" % (o.activation_type, o.proj_dim, o.proj_dim, o.activation_opts) # Non-linearity
print "<Tanh> <InputDim> %d <OutputDim> %d" % (o.proj_dim, o.proj_dim)

# Softmax layer,
print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> 0.0 <BiasRange> 0.0" % (o.proj_dim, num_leaves) + softmax_affine_opts
print "<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves)
print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <LearnRateCoef> %f <BiasLearnRateCoef> %f" % (o.proj_dim, num_leaves, 0.0, 0.0, (o.param_stddev_factor * Glorot(o.proj_dim, num_leaves)), 1.0, 0.1)# + softmax_affine_opts

# Optionaly append softmax
if o.with_softmax:
if o.block_softmax_dims == "":
print "<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves)
else:
print "<BlockSoftmax> <InputDim> %d <OutputDim> %d <BlockDims> %s" % (num_leaves, num_leaves, o.block_softmax_dims)

if o.activation_final:
print "%s <InputDim> %d <OutputDim> %d %s" % (o.activation_type, num_leaves, num_leaves, o.activation_opts)
6 changes: 6 additions & 0 deletions egs/wsj/s5/utils/nnet/make_nnet_proto.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@
help='Generate <BlockSoftmax> with dims D1:D2:D3 [default: %default]',
default="", type='string');
# Activation related,
parser.add_option('--activation-final', dest='activation_final',
help='If set, outputs an activation layer as final layer [default: %default]',
default=None, type='string');
parser.add_option('--activation-type', dest='activation_type',
help='Select type of activation function : (<Sigmoid>|<Tanh>|<ParametricRelu>) [default: %default]',
default='<Sigmoid>', type='string');
Expand Down Expand Up @@ -231,6 +234,9 @@ def Glorot(dim1, dim2):
else:
print "<BlockSoftmax> <InputDim> %d <OutputDim> %d <BlockDims> %s" % (num_leaves, num_leaves, o.block_softmax_dims)

if o.activation_final:
print "%s <InputDim> %d <OutputDim> %d %s" % (o.activation_final, num_leaves, num_leaves, o.activation_opts)

# We are done!
sys.exit(0)

9 changes: 5 additions & 4 deletions idlak-egs/tts_tangle_arctic/s2/local/idlak_make_lang.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,10 +427,11 @@ def idlak_make_lang(textfile, datadir, langdir):
chars[c] = 1
# get phone set from transcription lexicon
for p in prons:
pp = p.split()
for phone in pp:
phones[phone] = 1
fplex.write(("%s %s\n" % (utf8w, p)).encode('utf-8'))
if len(p):
pp = p.split()
for phone in pp:
phones[phone] = 1
fplex.write(("%s %s\n" % (utf8w, p)).encode('utf-8'))
if handler.oov.has_key(w):
fpoov.write(("%s %s\n" % (utf8w, prons[0])).encode('utf-8'))
fplex.close()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ if [ "$synth" = "cere" ]; then
elif [ "$synth" = "excitation" ]; then
echo "generating in $tmpdir"
x2x +af $mcep > $mcep.float
mlsacheck -l $fftlen -c 2 -r 0 -P 5 -m $order -a $alpha < $mcep.float > $mcep.float.stable
mlsacheck 2> /dev/null -l $fftlen -c 2 -r 0 -P 5 -m $order -a $alpha < $mcep.float > $mcep.float.stable
psize=`echo "$period * $srate / 1000" | bc`
# We have to drop the first few F0 frames to match SPTK behaviour
#cat $f0 | awk -v srate=$srate '(NR > 2){if ($1 > 0) print srate / $1; else print 0.0}' | x2x +af \
Expand Down Expand Up @@ -214,7 +214,7 @@ elif [ "$synth" = "WORLD" ]; then
echo $world/synth $fftlen $srate $f0.double $mcep.sp.double $bap.double $out_wav
$world/synth $fftlen $srate $f0.double $mcep.sp.double $bap.double $out_wav
else
x2x +af $mcep | mlsacheck -l $fftlen -c 2 -r 0 -P 5 -m $order -a $alpha > $mcep.float
x2x +af $mcep | mlsacheck 2> /dev/null -l $fftlen -c 2 -r 0 -P 5 -m $order -a $alpha > $mcep.float
psize=`echo "$period * $srate / 1000" | bc`
# We have to drop the first few F0 frames to match SPTK behaviour
cat $f0 | awk -v srate=$srate '(NR > 2){if ($1 > 0) print srate / $1; else print 0.0}' | x2x +af \
Expand Down
8 changes: 3 additions & 5 deletions idlak-egs/tts_tangle_arctic/s2/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -514,11 +514,9 @@ echo "
*********************
** Congratulations **
*********************
TTS-DNN trained and sample synthesis done.
TTS-DNN trained.
Samples can be found in $dnndir/tst_forward/wav_mlpg/*.wav.
More synthesis can be performed using the utils/synthesis_test.sh utility,
Synthesis can be performed using the utils/synthesis_test.sh utility,
e.g.: echo 'Test 1 2 3' | utils/synthesis_test-48k.sh
"
echo "#### Step 6: packaging DNN voice ####"
Expand All @@ -527,6 +525,6 @@ local/make_dnn_voice_pitch.sh --spk $spk --srate $srate --mcep_order $order --bn

echo "Voice packaged successfully. Portable models have been stored in ${spk}_pmdl."
echo "Synthesis can be performed using:
echo \"This is a demo of D N N synthesis\" | local/synthesis_voice_pitch.sh ${spk}_pmdl <out_wav>"
echo \"This is a demo of D N N synthesis\" | local/synthesis_voice_pitch.sh ${spk}_pmdl <out_dir>"


0 comments on commit 0210345

Please sign in to comment.