Merge pull request Idlak#2 from c-alexbrouwer/master

merge
Skaiste · Jul 27, 2018 · 1fa9024 · 1fa9024
2 parents f8e8700 + 732017b
commit 1fa9024
Show file tree

Hide file tree

Showing 127 changed files with 15,236 additions and 17,354 deletions.
diff --git a/.gitignore b/.gitignore
@@ -72,6 +72,10 @@ GSYMS
 /src/kaldi.mk.tmp
 /src/kaldi.mk.bak
 
+# Idlak python wrapper
+/src/pyIdlak/pyIdlak.py
+/src/pyIdlak/pyIdlak_wrap.cc
+
 # /egs/ & /idlak-egs/
 /egs/*/s*/mfcc
 /egs/*/s*/plp

diff --git a/.travis.yml b/.travis.yml
@@ -22,6 +22,7 @@ addons:
       - liblapack-dev
       - clang-3.8
       - csh
+      - swig
 
 branches:
   only:
@@ -40,8 +41,12 @@ before_script:
   - ccache --zero-stats --max-size=3G
 
 env:
-  - CI_TARGETS="all ext"        # Job1: Build everything.
-  - CI_TARGETS="test"           # Job2: Test libraries. #### ext_test? adds 5min compile, runs 1 test.
+  - CI_TARGETS="depend"     # Job1: Build main programs.
+  - CI_TARGETS="all"        # Job2: Build main programs.
+  - CI_TARGETS="ext_depend" # Job3: Build extra programs
+  - CI_TARGETS="ext"        # Job4: Build extra programs
+  - CI_TARGETS="test"       # Job5: Test libraries.
+  #### ext_test? adds 5min compile, runs 1 test.
 
 script:
   # See http://petereisentraut.blogspot.com/2011/05/ccache-and-clang.html and

diff --git a/README.md b/README.md
@@ -66,13 +66,13 @@ Development pattern for contributors
 ------------------------------------
 
 1. [Create a personal fork](https://help.github.com/articles/fork-a-repo/)
-   of the [main Kaldi repository](https://github.com/kaldi-asr/kaldi) in GitHub.
+   of the [main Idlak repository](https://github.com/Idlak/idlak) in GitHub.
 2. Make your changes in a named branch different from `master`, e.g. you create
    a branch `my-awesome-feature`.
 3. [Generate a pull request](https://help.github.com/articles/creating-a-pull-request/)
    through the Web interface of GitHub.
 4. As a general rule, please follow [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
-   There are a [few exceptions in Kaldi](http://kaldi-asr.org/doc/style.html).
+   There are a [few exceptions in Kaldi & Idlak](http://kaldi-asr.org/doc/style.html).
    You can use the [Google's cpplint.py](https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py)
    to verify that your code is free of basic mistakes.
 

diff --git a/egs/aishell2/README.txt b/egs/aishell2/README.txt
@@ -0,0 +1,50 @@
+# AISHELL-2
+
+AISHELL-2 is by far the largest free speech corpus available for Mandarin ASR research.
+## 1. DATA
+### training data
+* 1000 hours of speech data (around 1 million utterances)
+* 1991 speakers (845 male and 1146 female)
+* clean recording environment(studio or quiet living room)
+* read speech
+* reading prompts from various domain: entertainment, finance, technology, sports, control command, place of interest etc.
+* near field recording via 3 parallel channels(iOS, Android, Microphone).
+* iOS data is free for non-commercial research and education use (e.g. universities and colleges)
+
+### evaluation data:
+Currently we release AISHELL2-2018A-EVAL, containing:
+* dev: 2500 utterances from 5 speaker
+* test: 5000 utterances from 10 speakers
+
+you can download above evaluation set from:
+http://www.aishelltech.com/aishell_eval
+
+we may update and release other evaluation sets on the website later, targeting on different applications and senarios.
+
+## 2. RECIPE
+Based on Kaldi standard system, AISHELL-2 provides a self-contained Mandarin ASR recipe, with:
+* a word segmentation module, which is a must-have component for Chinese ASR systems
+* an open-sourced Mandarin lexicon(DaCiDian)
+* a simplified GMM training recipe
+* acoustic channel adaptation recipe(AM fine-tuning)
+
+# CONTACT
+AISHELL foundation is a non-profit online organization, with members from speech industry and research institutes.
+
+We hope AISHELL-2 corpus and recipe could be beneficial to the entire speech community.
+
+Depends on your location and internet speed, we distribute the corpus in two ways:
+* hard-disk delivery
+* cloud-disk downloading
+
+To apply for AISHELL-2 corpus for free, you need to fill in a very simple application form, confirming that:
+* university department / education institute info
+* only for non-commercial research / education use
+
+AISHELL-foundation covers all data distribution fees (including the corpus, hard-disk cost etc)
+
+Data re-distribution inside your university department is OK for convenience. However, users are not supposed to re-distribute AISHELL-2 to other universities or education institutes.
+
+To get the application form, or you come across any problem with the recipe, contact us via:
+
+[email protected]
diff --git a/egs/aishell2/s5/RESULTS b/egs/aishell2/s5/RESULTS
@@ -0,0 +1,6 @@
+%WER 44.78 [ 22176 / 49527, 370 ins, 2179 del, 19627 sub ] exp/mono/decode_test/cer_9_0.0
+%WER 24.78 [ 12271 / 49527, 394 ins, 815 del, 11062 sub ] exp/tri1/decode_test/cer_11_0.0
+%WER 22.54 [ 11165 / 49527, 390 ins, 665 del, 10110 sub ] exp/tri2/decode_test/cer_11_0.0
+%WER 19.78 [ 9795 / 49527, 313 ins, 684 del, 8798 sub ] exp/tri3/decode_test/cer_13_0.0
+
+# (Chain model results are at the beginning of corresponding scripts)
diff --git a/egs/aishell2/s5/cmd.sh b/egs/aishell2/s5/cmd.sh
@@ -0,0 +1,15 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl --mem 2G"
+export decode_cmd="queue.pl --mem 4G"
+export mkgraph_cmd="queue.pl --mem 8G"
diff --git a/egs/aishell2/s5/conf/decode.conf b/egs/aishell2/s5/conf/decode.conf
@@ -0,0 +1,3 @@
+beam=11.0 # beam for decoding.  Was 13.0 in the scripts.
+first_beam=8.0 # beam for 1st-pass decoding in SAT.
+
diff --git a/egs/aishell2/s5/conf/mfcc.conf b/egs/aishell2/s5/conf/mfcc.conf
@@ -0,0 +1,2 @@
+--use-energy=false
+--sample-frequency=16000
diff --git a/egs/aishell2/s5/conf/mfcc_hires.conf b/egs/aishell2/s5/conf/mfcc_hires.conf
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false       # use average of log energy, not energy.
+--sample-frequency=16000 # AISHELL-2 is sampled at 16kHz
+--num-mel-bins=40        # similar to Google's setup.
+--num-ceps=40            # there is no dimensionality reduction.
+--low-freq=20            # low cutoff frequency for mel bins
+--high-freq=-400         # high cutoff frequency, relative to Nyquist of 8000 (=7600)
diff --git a/egs/aishell2/s5/conf/pitch.conf b/egs/aishell2/s5/conf/pitch.conf
@@ -0,0 +1 @@
+--sample-frequency=16000
diff --git a/egs/aishell2/s5/local/chain/compare_wer.sh b/egs/aishell2/s5/local/chain/compare_wer.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+# Copyright 2018  Emotech LTD (Author: Xuechen LIU)
+# Apache 2.0
+
+# compare wer between diff. models in aishell2 chain directory
+
+set -e
+. ./cmd.sh
+. ./path.sh
+
+if [ $# == 0 ]; then
+  echo "Usage: $0: <dir1> [<dir2> ... ]"
+  echo "e.g.: $0 exp/chain/tdnn_7h_sp"
+  exit 1
+fi
+
+echo "# $0 $*"
+
+set_names() {
+  if [ $# != 1 ]; then
+    echo "compare_wer_general.sh: internal error"
+    exit 1  # exit the program
+  fi
+  dirname=$(echo $1 | cut -d: -f1)
+}
+
+# print model names
+echo -n "# Model               "
+for x in $*; do
+  printf "% 10s" " $(basename $x)"
+done
+echo
+
+# print number of parameters
+echo -n "# Num. of params        "
+for x in $*; do
+  set_names $x
+  params=$(steps/info/chain_dir_info.pl "$x" | grep -o 'num-params=[0-9]*\.[0-9]*M' | cut -d'=' -f2-)
+  printf "% 10s\n" $params
+done
+
+# print decode WER results
+echo -n "# WER(%)               "
+for x in $*; do
+  set_names $x
+  wer=$([ -d $x ] && grep WER $x/decode_test/cer_* | utils/best_wer.sh | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
+# print final log prob for train & validation
+echo -n "# Final train prob     "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf($8)}' | cut -c1-7)
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob     "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf($8)}' | cut -c1-7)
+  printf "% 10s" $prob
+done
+echo
+
+# do the same for xent objective
+echo -n "# Final train prob (xent)"
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob (xent)"
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
diff --git a/egs/aishell2/s5/local/chain/run_tdnn.sh b/egs/aishell2/s5/local/chain/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1b.sh