diff --git a/evals/README.Rmd b/evals/README.Rmd index 0991a44..b65b949 100644 --- a/evals/README.Rmd +++ b/evals/README.Rmd @@ -102,8 +102,8 @@ asr_wers <- asr_results %>% wer = wer * 100, is_baseline = model == "wav2vec2-large-robust-ft-swbd-300h", - `Fine-tuning` = ifelse(model == "wav2vec2-large-robust-ft-swbd-300h", "None", "39 minutes"), - `Fine-tuning` = factor(`Fine-tuning`, levels = c("None", "39 minutes")), + `Fine-tuning` = ifelse(model == "wav2vec2-large-robust-ft-swbd-300h", "None", "<1 minute"), + `Fine-tuning` = factor(`Fine-tuning`, levels = c("None", "<1 minute")), `Language Model` = ifelse(model_lm, "2-gram", "None"), `Language Model` = factor(`Language Model`, levels = c("None", "2-gram")) ) diff --git a/evals/README.md b/evals/README.md index 155e2df..3034b92 100644 --- a/evals/README.md +++ b/evals/README.md @@ -1,7 +1,7 @@ SLI-ASR Evaluations ================ Nay San -15 April, 2022 +23 April, 2022 ``` r library(tidyverse) @@ -108,14 +108,17 @@ asr_results %>% knitr::kable() ``` -| model | model\_lm | testset | wer | cer | -|:-----------------------------------|:----------|:---------|-----:|-----:| -| train-100 | FALSE | test.tsv | 0.11 | 0.05 | -| train-80 | FALSE | test.tsv | 0.13 | 0.05 | -| train-60 | FALSE | test.tsv | 0.16 | 0.06 | -| train-40 | FALSE | test.tsv | 0.37 | 0.14 | -| train-20 | FALSE | test.tsv | 0.98 | 0.78 | -| wav2vec2-large-robust-ft-swbd-300h | FALSE | test.tsv | 0.37 | 0.22 | +| model | model\_lm | testset | wer | cer | +|:-----------------------------------|:----------|:---------|------:|------:| +| train-100 | FALSE | test.tsv | 0.101 | 0.042 | +| train-80 | FALSE | test.tsv | 0.101 | 0.044 | +| train-60 | FALSE | test.tsv | 0.118 | 0.052 | +| train-40 | FALSE | test.tsv | 0.123 | 0.055 | +| train-20 | FALSE | test.tsv | 0.132 | 0.061 | +| train-10 | FALSE | test.tsv | 0.134 | 0.061 | +| train-05 | FALSE | test.tsv | 0.151 | 0.067 | +| train-01 | FALSE | test.tsv | 0.191 | 0.088 | +| wav2vec2-large-robust-ft-swbd-300h | FALSE | test.tsv | 0.363 | 0.215 | ### Cross-validation experiments @@ -127,8 +130,8 @@ asr_wers <- asr_results %>% wer = wer * 100, is_baseline = model == "wav2vec2-large-robust-ft-swbd-300h", - `Fine-tuning` = ifelse(model == "wav2vec2-large-robust-ft-swbd-300h", "None", "39 minutes"), - `Fine-tuning` = factor(`Fine-tuning`, levels = c("None", "39 minutes")), + `Fine-tuning` = ifelse(model == "wav2vec2-large-robust-ft-swbd-300h", "None", "<1 minute"), + `Fine-tuning` = factor(`Fine-tuning`, levels = c("None", "<1 minute")), `Language Model` = ifelse(model_lm, "2-gram", "None"), `Language Model` = factor(`Language Model`, levels = c("None", "2-gram")) ) diff --git a/evals/README_files/figure-gfm/cross-validation-1.png b/evals/README_files/figure-gfm/cross-validation-1.png index 187ac6b..1734d53 100644 Binary files a/evals/README_files/figure-gfm/cross-validation-1.png and b/evals/README_files/figure-gfm/cross-validation-1.png differ diff --git a/evals/asr_wer-cer.csv b/evals/asr_wer-cer.csv index 0049b9a..b99b122 100644 --- a/evals/asr_wer-cer.csv +++ b/evals/asr_wer-cer.csv @@ -1,37 +1,40 @@ -model,model_lm,testset,wer,cer -train-100,False,test.tsv,0.11,0.05 -train-80,False,test.tsv,0.13,0.05 -train-60,False,test.tsv,0.16,0.06 -train-40,False,test.tsv,0.37,0.14 -train-20,False,test.tsv,0.98,0.78 -wav2vec2-large-robust-ft-swbd-300h,False,test.tsv,0.37,0.22 -b-1,False,bootstrap-1-test20.tsv,0.22,0.09 -b-2,False,bootstrap-2-test20.tsv,0.23,0.09 -b-3,False,bootstrap-3-test20.tsv,0.15,0.06 -b-4,False,bootstrap-4-test20.tsv,0.2,0.08 -b-5,False,bootstrap-5-test20.tsv,0.23,0.09 -b-6,False,bootstrap-6-test20.tsv,0.17,0.07 -b-7,False,bootstrap-7-test20.tsv,0.18,0.07 -b-8,False,bootstrap-8-test20.tsv,0.15,0.06 -b-9,False,bootstrap-9-test20.tsv,0.18,0.07 -b-10,False,bootstrap-10-test20.tsv,0.19,0.07 -b-1,True,bootstrap-1-test20.tsv,0.13,0.06 -b-2,True,bootstrap-2-test20.tsv,0.11,0.05 -b-3,True,bootstrap-3-test20.tsv,0.13,0.06 -b-4,True,bootstrap-4-test20.tsv,0.13,0.06 -b-5,True,bootstrap-5-test20.tsv,0.13,0.06 -b-6,True,bootstrap-6-test20.tsv,0.18,0.08 -b-7,True,bootstrap-7-test20.tsv,0.15,0.07 -b-8,True,bootstrap-8-test20.tsv,0.11,0.05 -b-9,True,bootstrap-9-test20.tsv,0.17,0.08 -b-10,True,bootstrap-10-test20.tsv,0.14,0.06 -wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-1-test20.tsv,0.36,0.23 -wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-2-test20.tsv,0.37,0.22 -wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-3-test20.tsv,0.35,0.22 -wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-4-test20.tsv,0.34,0.21 -wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-5-test20.tsv,0.38,0.24 -wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-6-test20.tsv,0.36,0.24 -wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-7-test20.tsv,0.35,0.22 -wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-8-test20.tsv,0.34,0.22 -wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-9-test20.tsv,0.38,0.25 -wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-10-test20.tsv,0.34,0.21 +model,model_lm,testset,wer,cer +train-100,False,test.tsv,0.101,0.042 +train-80,False,test.tsv,0.101,0.044 +train-60,False,test.tsv,0.118,0.052 +train-40,False,test.tsv,0.123,0.055 +train-20,False,test.tsv,0.132,0.061 +train-10,False,test.tsv,0.134,0.061 +train-05,False,test.tsv,0.151,0.067 +train-01,False,test.tsv,0.191,0.088 +wav2vec2-large-robust-ft-swbd-300h,False,test.tsv,0.363,0.215 +b-1,False,bootstrap-1-test20.tsv,0.187,0.09 +b-2,False,bootstrap-2-test20.tsv,0.167,0.077 +b-3,False,bootstrap-3-test20.tsv,0.173,0.079 +b-4,False,bootstrap-4-test20.tsv,0.173,0.086 +b-5,False,bootstrap-5-test20.tsv,0.195,0.093 +b-6,False,bootstrap-6-test20.tsv,0.173,0.084 +b-7,False,bootstrap-7-test20.tsv,0.187,0.087 +b-8,False,bootstrap-8-test20.tsv,0.194,0.09 +b-9,False,bootstrap-9-test20.tsv,0.188,0.085 +b-10,False,bootstrap-10-test20.tsv,0.186,0.088 +b-1,True,bootstrap-1-test20.tsv,0.196,0.087 +b-2,True,bootstrap-2-test20.tsv,0.188,0.083 +b-3,True,bootstrap-3-test20.tsv,0.179,0.083 +b-4,True,bootstrap-4-test20.tsv,0.178,0.089 +b-5,True,bootstrap-5-test20.tsv,0.219,0.103 +b-6,True,bootstrap-6-test20.tsv,0.196,0.089 +b-7,True,bootstrap-7-test20.tsv,0.21,0.092 +b-8,True,bootstrap-8-test20.tsv,0.201,0.09 +b-9,True,bootstrap-9-test20.tsv,0.213,0.093 +b-10,True,bootstrap-10-test20.tsv,0.216,0.094 +wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-1-test20.tsv,0.358,0.225 +wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-2-test20.tsv,0.365,0.223 +wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-3-test20.tsv,0.354,0.215 +wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-4-test20.tsv,0.34,0.212 +wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-5-test20.tsv,0.379,0.236 +wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-6-test20.tsv,0.356,0.237 +wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-7-test20.tsv,0.348,0.219 +wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-8-test20.tsv,0.341,0.221 +wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-9-test20.tsv,0.378,0.248 +wav2vec2-large-robust-ft-swbd-300h,False,bootstrap-10-test20.tsv,0.338,0.211 diff --git a/scripts/exp_asr-eval.py b/scripts/exp_asr-eval.py index a941460..873e342 100644 --- a/scripts/exp_asr-eval.py +++ b/scripts/exp_asr-eval.py @@ -86,8 +86,8 @@ def make_all_lowercase(batch): "model" : os.path.basename(model_path), "model_lm" : type(processor).__name__ == 'Wav2Vec2ProcessorWithLM', "testset" : os.path.basename(testset_path), - "wer" : round(wer(test_ds['sentence'], test_ds['transcription']), 2), - "cer" : round(cer(test_ds['sentence'], test_ds['transcription']), 2) + "wer" : round(wer(test_ds['sentence'], test_ds['transcription']), 3), + "cer" : round(cer(test_ds['sentence'], test_ds['transcription']), 3) }) results_df = pd.DataFrame(EVAL_RESULTS) diff --git a/scripts/exp_asr-train.sh b/scripts/exp_asr-train.sh index 2c4660b..52cdb3e 100755 --- a/scripts/exp_asr-train.sh +++ b/scripts/exp_asr-train.sh @@ -19,8 +19,9 @@ do python scripts/train_asr-by-w2v2-ft.py \ facebook/wav2vec2-large-robust-ft-swbd-300h \ "data/exps/asr/checkpoints/bootstrap/no-lm/b-$j" \ - "data/exps/asr/datasets/bootstrap-$j-train60.tsv" \ - "data/exps/asr/datasets/bootstrap-$j-test20.tsv" + "data/exps/asr/datasets/bootstrap-$j-train01.tsv" \ + "data/exps/asr/datasets/bootstrap-$j-test20.tsv" \ + --use_target_vocab False done # Cross-validation experiments with a bigram language model @@ -29,7 +30,8 @@ do python scripts/train_asr-by-w2v2-ft.py \ facebook/wav2vec2-large-robust-ft-swbd-300h \ "data/exps/asr/checkpoints/bootstrap/lm/b-$k" \ - "data/exps/asr/datasets/bootstrap-$k-train60.tsv" \ + "data/exps/asr/datasets/bootstrap-$k-train01.tsv" \ "data/exps/asr/datasets/bootstrap-$k-test20.tsv" \ - --lm_arpa data/exps/asr/2gram_correct.arpa + --lm_arpa data/exps/asr/datasets/20220422_2gram-correct.arpa \ + --use_target_vocab False done