Skip to content

Commit

Permalink
Merge pull request Idlak#2 from Idlak/master
Browse files Browse the repository at this point in the history
updated
  • Loading branch information
dabraude authored Jun 29, 2018
2 parents 4c5ad5b + 0210345 commit 69e750e
Show file tree
Hide file tree
Showing 89 changed files with 36,755 additions and 47 deletions.
6 changes: 5 additions & 1 deletion egs/wsj/s5/steps/nnet/train_scheduler.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ l2_penalty=0
train_tool="nnet-train-frmshuff"
train_tool_opts="--minibatch-size=256 --randomizer-size=32768 --randomizer-seed=777"
feature_transform=
output_feature_transform=

split_feats= # int -> number of splits 'feats.scp -> feats.${i}.scp', starting from feats.1.scp,
# (data are alredy shuffled and split to N parts),
Expand Down Expand Up @@ -84,6 +85,7 @@ mlp_base=${mlp_init##*/}; mlp_base=${mlp_base%.*}
log=$dir/log/iter00.initial.log; hostname>$log
$train_tool --cross-validate=true --randomize=false --verbose=$verbose $train_tool_opts \
${feature_transform:+ --feature-transform=$feature_transform} \
${output_feature_transform:+ --output-feature-transform=$output_feature_transform} \
${frame_weights:+ "--frame-weights=$frame_weights"} \
${utt_weights:+ "--utt-weights=$utt_weights"} \
"$feats_cv" "$labels_cv" $mlp_best \
Expand Down Expand Up @@ -125,6 +127,7 @@ for iter in $(seq -w $max_iters); do
--learn-rate=$learn_rate --momentum=$momentum \
--l1-penalty=$l1_penalty --l2-penalty=$l2_penalty \
${feature_transform:+ --feature-transform=$feature_transform} \
${output_feature_transform:+ --output-feature-transform=$output_feature_transform} \
${frame_weights:+ "--frame-weights=$frame_weights"} \
${utt_weights:+ "--utt-weights=$utt_weights"} \
"$feats_tr_portion" "$labels_tr" $mlp_best $mlp_next \
Expand All @@ -137,6 +140,7 @@ for iter in $(seq -w $max_iters); do
log=$dir/log/iter${iter}.cv.log; hostname>$log
$train_tool --cross-validate=true --randomize=false --verbose=$verbose $train_tool_opts \
${feature_transform:+ --feature-transform=$feature_transform} \
${output_feature_transform:+ --output-feature-transform=$output_feature_transform} \
${frame_weights:+ "--frame-weights=$frame_weights"} \
${utt_weights:+ "--utt-weights=$utt_weights"} \
"$feats_cv" "$labels_cv" $mlp_next \
Expand All @@ -147,7 +151,7 @@ for iter in $(seq -w $max_iters); do

# accept or reject?
loss_prev=$loss
if [ 1 == $(awk "BEGIN{print($loss_new < $loss ? 1:0);}") -o $iter -le $keep_lr_iters -o $iter -le $min_iters ]; then
if [ 1 == $(awk "BEGIN{print($loss_new < $loss ? 1:0);}") -o $iter -le $keep_lr_iters ]; then
# accepting: the loss was better, or we had fixed learn-rate, or we had fixed epoch-number,
loss=$loss_new
mlp_best=$dir/nnet/${mlp_base}_iter${iter}_learnrate${learn_rate}_tr$(printf "%.4f" $tr_loss)_cv$(printf "%.4f" $loss_new)
Expand Down
54 changes: 51 additions & 3 deletions egs/wsj/s5/utils/nnet/make_lstm_proto.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,51 @@

# Generated Nnet prototype, to be initialized by 'nnet-initialize'.

import sys
import sys, math

###
### Parse options
###
from optparse import OptionParser
usage="%prog [options] <feat-dim> <num-leaves> >nnet-proto-file"
parser = OptionParser(usage)
# Softmax related,
parser.add_option('--no-softmax', dest='with_softmax',
help='Do not put <SoftMax> in the prototype [default: %default]',
default=True, action='store_false');
parser.add_option('--block-softmax-dims', dest='block_softmax_dims',
help='Generate <BlockSoftmax> with dims D1:D2:D3 [default: %default]',
default="", type='string');
# Required,
parser.add_option('--cell-dim', dest='cell_dim', type='int', default=320,
help='Number of cells for one direction in LSTM [default: %default]');
parser.add_option('--proj-dim', dest='proj_dim', type='int', default=400,
help='Number of LSTM recurrent units [default: %default]');
parser.add_option('--num-layers', dest='num_layers', type='int', default=2,
help='Number of LSTM layers [default: %default]');
# Activation related,
parser.add_option('--activation-final', dest='activation_final',
help='If set, outputs an activation layer as final layer [default: %default]',
default=False, action='store_true');
parser.add_option('--activation-type', dest='activation_type',
help='Select type of activation function : (<Sigmoid>|<Tanh>|<ParametricRelu>) [default: %default]',
default='<Tanh>', type='string');
parser.add_option('--activation-opts', dest='activation_opts',
help='Additional options for protoype of activation function [default: %default]',
default='', type='string');
# Affine-transform related,
parser.add_option('--hid-bias-mean', dest='hid_bias_mean',
help='Set bias for hidden activations [default: %default]',
default=-2.0, type='float');
parser.add_option('--hid-bias-range', dest='hid_bias_range',
help='Set bias range for hidden activations (+/- 1/2 range around mean) [default: %default]',
default=4.0, type='float');
parser.add_option('--param-stddev-factor', dest='param_stddev_factor',
help='Factor to rescale Normal distriburtion for initalizing weight matrices [default: %default]',
default=0.1, type='float');
parser.add_option('--no-glorot-scaled-stddev', dest='with_glorot',
help='Generate normalized weights according to X.Glorot paper, but mapping U->N with same variance (factor sqrt(x/(dim_in+dim_out)))',
action='store_false', default=True);
# Optional (default == 'None'),
parser.add_option('--lstm-param-range', dest='lstm_param_range', type='float',
help='Range of initial LSTM parameters [default: %default]');
Expand All @@ -54,6 +84,15 @@

(feat_dim, num_leaves) = map(int,args);


# Optionaly scale
def Glorot(dim1, dim2):
if o.with_glorot:
# 35.0 = magic number, gives ~1.0 in inner layers for hid-dim 1024dim,
return 35.0 * math.sqrt(2.0/(dim1+dim2));
else:
return 1.0

# Original prototype from Jiayu,
#<NnetProto>
#<Transmit> <InputDim> 40 <OutputDim> 40
Expand All @@ -78,9 +117,18 @@
print "<LstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (o.proj_dim, o.proj_dim, o.cell_dim) + lstm_extra_opts

# Adding <Tanh> for more stability,
#print "%s <InputDim> %d <OutputDim> %d %s" % (o.activation_type, o.proj_dim, o.proj_dim, o.activation_opts) # Non-linearity
print "<Tanh> <InputDim> %d <OutputDim> %d" % (o.proj_dim, o.proj_dim)

# Softmax layer,
print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> 0.0 <BiasRange> 0.0" % (o.proj_dim, num_leaves) + softmax_affine_opts
print "<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves)
print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <LearnRateCoef> %f <BiasLearnRateCoef> %f" % (o.proj_dim, num_leaves, 0.0, 0.0, (o.param_stddev_factor * Glorot(o.proj_dim, num_leaves)), 1.0, 0.1)# + softmax_affine_opts

# Optionaly append softmax
if o.with_softmax:
if o.block_softmax_dims == "":
print "<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves)
else:
print "<BlockSoftmax> <InputDim> %d <OutputDim> %d <BlockDims> %s" % (num_leaves, num_leaves, o.block_softmax_dims)

if o.activation_final:
print "%s <InputDim> %d <OutputDim> %d %s" % (o.activation_type, num_leaves, num_leaves, o.activation_opts)
6 changes: 6 additions & 0 deletions egs/wsj/s5/utils/nnet/make_nnet_proto.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@
help='Generate <BlockSoftmax> with dims D1:D2:D3 [default: %default]',
default="", type='string');
# Activation related,
parser.add_option('--activation-final', dest='activation_final',
help='If set, outputs an activation layer as final layer [default: %default]',
default=None, type='string');
parser.add_option('--activation-type', dest='activation_type',
help='Select type of activation function : (<Sigmoid>|<Tanh>|<ParametricRelu>) [default: %default]',
default='<Sigmoid>', type='string');
Expand Down Expand Up @@ -231,6 +234,9 @@ def Glorot(dim1, dim2):
else:
print "<BlockSoftmax> <InputDim> %d <OutputDim> %d <BlockDims> %s" % (num_leaves, num_leaves, o.block_softmax_dims)

if o.activation_final:
print "%s <InputDim> %d <OutputDim> %d %s" % (o.activation_final, num_leaves, num_leaves, o.activation_opts)

# We are done!
sys.exit(0)

19 changes: 19 additions & 0 deletions idlak-data/en/abbrev-default.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<abbreviations>
<!-- The abbreviation can either be a whole tokenised token (in English separated by whitespace, or it
can be some punctuation followed by a token followed by punctuation. In the first case the entire
token must match, in the second the puctuation before and after must match but allows extra
punctuation before or after. The expansion is the text node. Optionally
a lex tag can be put around words to select a pronunciation type no other XML -->
<abb token="Dr" pstpunc=".">doctor</abb>
<abb token="Dr">doctor</abb>
<abb token="Mr" pstpunc=".">mister</abb>
<abb token="Mr"> mister </abb>
<abb token="Mrs" pstpunc=".">missis</abb>
<abb token="Mrs">missis</abb>
<abb token="Ms" pstpunc=".">mizz</abb>
<abb token="Ms">mizz</abb>
<abb token="NASA"> nasa </abb>
<abb prepunc="(" token="US" pstpunc=")"> u s </abb>
<abb token="USA-wide"> u s <lex entry="letter">a</lex> wide </abb>
<abb token=":-)">smiley</abb>
</abbreviations>
2 changes: 1 addition & 1 deletion idlak-data/en/nrules-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
to deal with hypenation.
</comment>
<exp>
<![CDATA[^([ \n\t\r\-]+)]]>
<![CDATA[^([ \n\t\r]+)]]>
</exp>
</regex>

Expand Down
9 changes: 5 additions & 4 deletions idlak-egs/tts_tangle_arctic/s2/local/idlak_make_lang.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,10 +427,11 @@ def idlak_make_lang(textfile, datadir, langdir):
chars[c] = 1
# get phone set from transcription lexicon
for p in prons:
pp = p.split()
for phone in pp:
phones[phone] = 1
fplex.write(("%s %s\n" % (utf8w, p)).encode('utf-8'))
if len(p):
pp = p.split()
for phone in pp:
phones[phone] = 1
fplex.write(("%s %s\n" % (utf8w, p)).encode('utf-8'))
if handler.oov.has_key(w):
fpoov.write(("%s %s\n" % (utf8w, prons[0])).encode('utf-8'))
fplex.close()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ if [ "$synth" = "cere" ]; then
elif [ "$synth" = "excitation" ]; then
echo "generating in $tmpdir"
x2x +af $mcep > $mcep.float
mlsacheck -l $fftlen -c 2 -r 0 -P 5 -m $order -a $alpha < $mcep.float > $mcep.float.stable
mlsacheck 2> /dev/null -l $fftlen -c 2 -r 0 -P 5 -m $order -a $alpha < $mcep.float > $mcep.float.stable
psize=`echo "$period * $srate / 1000" | bc`
# We have to drop the first few F0 frames to match SPTK behaviour
#cat $f0 | awk -v srate=$srate '(NR > 2){if ($1 > 0) print srate / $1; else print 0.0}' | x2x +af \
Expand Down Expand Up @@ -214,7 +214,7 @@ elif [ "$synth" = "WORLD" ]; then
echo $world/synth $fftlen $srate $f0.double $mcep.sp.double $bap.double $out_wav
$world/synth $fftlen $srate $f0.double $mcep.sp.double $bap.double $out_wav
else
x2x +af $mcep | mlsacheck -l $fftlen -c 2 -r 0 -P 5 -m $order -a $alpha > $mcep.float
x2x +af $mcep | mlsacheck 2> /dev/null -l $fftlen -c 2 -r 0 -P 5 -m $order -a $alpha > $mcep.float
psize=`echo "$period * $srate / 1000" | bc`
# We have to drop the first few F0 frames to match SPTK behaviour
cat $f0 | awk -v srate=$srate '(NR > 2){if ($1 > 0) print srate / $1; else print 0.0}' | x2x +af \
Expand Down
8 changes: 3 additions & 5 deletions idlak-egs/tts_tangle_arctic/s2/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -514,11 +514,9 @@ echo "
*********************
** Congratulations **
*********************
TTS-DNN trained and sample synthesis done.
TTS-DNN trained.
Samples can be found in $dnndir/tst_forward/wav_mlpg/*.wav.
More synthesis can be performed using the utils/synthesis_test.sh utility,
Synthesis can be performed using the utils/synthesis_test.sh utility,
e.g.: echo 'Test 1 2 3' | utils/synthesis_test-48k.sh
"
echo "#### Step 6: packaging DNN voice ####"
Expand All @@ -527,6 +525,6 @@ local/make_dnn_voice_pitch.sh --spk $spk --srate $srate --mcep_order $order --bn

echo "Voice packaged successfully. Portable models have been stored in ${spk}_pmdl."
echo "Synthesis can be performed using:
echo \"This is a demo of D N N synthesis\" | local/synthesis_voice_pitch.sh ${spk}_pmdl <out_wav>"
echo \"This is a demo of D N N synthesis\" | local/synthesis_voice_pitch.sh ${spk}_pmdl <out_dir>"


2 changes: 1 addition & 1 deletion src/idlaktxp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ TESTFILES = idlak-mod-test

OBJFILES = txpxmldata.o txputf8.o txppcre.o txpnrules.o txppos.o \
txppbreak.o txpsylmax.o txplexicon.o txplts.o txpmodule.o txpcexspec.o \
txpparse-options.o \
txpparse-options.o txpabbrev.o \
cexfunctions.o cexfunctionscatalog.o mod-tokenise.o \
mod-postag.o mod-pauses.o mod-phrasing.o mod-pronounce.o mod-syllabify.o mod-cex.o

Expand Down
Loading

0 comments on commit 69e750e

Please sign in to comment.