Skip to content

Commit

Permalink
Fix C api for Go and MFC to support streaming paraformer (#268)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Aug 14, 2023
1 parent eb5ae18 commit bc791d4
Show file tree
Hide file tree
Showing 13 changed files with 307 additions and 66 deletions.
21 changes: 18 additions & 3 deletions .github/workflows/go.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,14 @@ jobs:
echo "Test transducer"
git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26
./run.sh
./run-transducer.sh
rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26
echo "Test paraformer"
git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en
./run-paraformer.sh
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
- name: Test streaming decoding files (Win64)
if: matrix.os == 'windows-latest' && matrix.arch == 'x64'
shell: bash
Expand All @@ -202,9 +207,14 @@ jobs:
echo "Test transducer"
git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26
./run.sh
./run-transducer.sh
rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26
echo "Test paraformer"
git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en
./run-paraformer.sh
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
- name: Test streaming decoding files (Win32)
if: matrix.os == 'windows-latest' && matrix.arch == 'x86'
shell: bash
Expand Down Expand Up @@ -235,5 +245,10 @@ jobs:
echo "Test transducer"
git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26
./run.sh
./run-transducer.sh
rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26
echo "Test paraformer"
git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en
./run-paraformer.sh
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
project(sherpa-onnx)

set(SHERPA_ONNX_VERSION "1.7.4")
set(SHERPA_ONNX_VERSION "1.7.5")

# Disable warning about
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,11 @@ func main() {
config := sherpa.OnlineRecognizerConfig{}
config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80}

flag.StringVar(&config.ModelConfig.Encoder, "encoder", "", "Path to the encoder model")
flag.StringVar(&config.ModelConfig.Decoder, "decoder", "", "Path to the decoder model")
flag.StringVar(&config.ModelConfig.Joiner, "joiner", "", "Path to the joiner model")
flag.StringVar(&config.ModelConfig.Transducer.Encoder, "encoder", "", "Path to the transducer encoder model")
flag.StringVar(&config.ModelConfig.Transducer.Decoder, "decoder", "", "Path to the transducer decoder model")
flag.StringVar(&config.ModelConfig.Transducer.Joiner, "joiner", "", "Path to the transducer joiner model")
flag.StringVar(&config.ModelConfig.Paraformer.Encoder, "paraformer-encoder", "", "Path to the paraformer encoder model")
flag.StringVar(&config.ModelConfig.Paraformer.Decoder, "paraformer-decoder", "", "Path to the paraformer decoder model")
flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file")
flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing")
flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message")
Expand Down
8 changes: 5 additions & 3 deletions go-api-examples/streaming-decode-files/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ func main() {
config := sherpa.OnlineRecognizerConfig{}
config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80}

flag.StringVar(&config.ModelConfig.Encoder, "encoder", "", "Path to the encoder model")
flag.StringVar(&config.ModelConfig.Decoder, "decoder", "", "Path to the decoder model")
flag.StringVar(&config.ModelConfig.Joiner, "joiner", "", "Path to the joiner model")
flag.StringVar(&config.ModelConfig.Transducer.Encoder, "encoder", "", "Path to the transducer encoder model")
flag.StringVar(&config.ModelConfig.Transducer.Decoder, "decoder", "", "Path to the transducer decoder model")
flag.StringVar(&config.ModelConfig.Transducer.Joiner, "joiner", "", "Path to the transducer joiner model")
flag.StringVar(&config.ModelConfig.Paraformer.Encoder, "paraformer-encoder", "", "Path to the paraformer encoder model")
flag.StringVar(&config.ModelConfig.Paraformer.Decoder, "paraformer-decoder", "", "Path to the paraformer decoder model")
flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file")
flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing")
flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message")
Expand Down
21 changes: 21 additions & 0 deletions go-api-examples/streaming-decode-files/run-paraformer.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env bash

# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english
# to download the model files

if [ ! -d ./sherpa-onnx-streaming-paraformer-bilingual-zh-en ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en
cd sherpa-onnx-streaming-paraformer-bilingual-zh-en
git lfs pull --include "*.onnx"
cd ..
fi

./streaming-decode-files \
--paraformer-encoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
--paraformer-decoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \
--tokens ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
--decoding-method greedy_search \
--model-type paraformer \
--debug 0 \
./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav
Original file line number Diff line number Diff line change
Expand Up @@ -306,12 +306,10 @@ void CNonStreamingSpeechRecognitionDlg::ShowInitRecognizerHelpMessage() {
"https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html "
"\r\n";
msg += "to download a non-streaming model, i.e., an offline model.\r\n";
msg += "You need to rename them after downloading\r\n\r\n";
msg += "It supports transducer, paraformer, and whisper models.\r\n\r\n";
msg +=
"You need to rename them to encoder.onnx, decoder.onnx, and "
"joiner.onnx correspoondingly.\r\n\r\n";
msg += "It supports both transducer models and paraformer models.\r\n\r\n";
msg +=
"We give two examples below to show you how to download models\r\n\r\n";
"We give three examples below to show you how to download models\r\n\r\n";
msg += "(1) Transducer\r\n\r\n";
msg +=
"We use "
Expand Down Expand Up @@ -346,13 +344,82 @@ void CNonStreamingSpeechRecognitionDlg::ShowInitRecognizerHelpMessage() {
"https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/"
"resolve/main/tokens.txt\r\n\r\n";
msg += "\r\n Now rename them\r\n";
msg += "mv model.onnx paraformer.onnx\r\n";
msg += "mv model.onnx paraformer.onnx\r\n\r\n";
msg += "(3) Whisper\r\n\r\n";
msg +=
"wget "
"https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en/resolve/"
"main/tiny.en-encoder.onnx\r\n";
msg +=
"wget "
"https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en/resolve/"
"main/tiny.en-decoder.onnx\r\n";
msg +=
"wget "
"https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en/resolve/"
"main/tiny.en-tokens.txt\r\n";
msg += "\r\n Now rename them\r\n";
msg += "mv tiny.en-encoder.onnx whisper-encoder.onnx\r\n";
msg += "mv tiny.en-decoder.onnx whisper-decoder.onnx\r\n";
msg += "\r\n";
msg += "That's it!\r\n";

AppendLineToMultilineEditCtrl(msg);
}

void CNonStreamingSpeechRecognitionDlg::InitWhisper() {
std::string whisper_encoder = "./whisper-encoder.onnx";
std::string whisper_decoder = "./whisper-decoder.onnx";

std::string tokens = "./tokens.txt";

bool is_ok = true;

if (Exists("./whisper-encoder.int8.onnx")) {
whisper_encoder = "./whisper-encoder.int8.onnx";
} else if (!Exists(whisper_encoder)) {
std::string msg = whisper_encoder + " does not exist!";
AppendLineToMultilineEditCtrl(msg);
is_ok = false;
}

if (Exists("./whisper-decoder.int8.onnx")) {
whisper_decoder = "./whisper-decoder.int8.onnx";
} else if (!Exists(whisper_decoder)) {
std::string msg = whisper_decoder + " does not exist!";
AppendLineToMultilineEditCtrl(msg);
is_ok = false;
}

if (!Exists(tokens)) {
std::string msg = tokens + " does not exist!";
AppendLineToMultilineEditCtrl(msg);
is_ok = false;
}

if (!is_ok) {
ShowInitRecognizerHelpMessage();
return;
}

memset(&config_, 0, sizeof(config_));

config_.feat_config.sample_rate = 16000;
config_.feat_config.feature_dim = 80;

config_.model_config.whisper.encoder = whisper_encoder.c_str();
config_.model_config.whisper.decoder = whisper_decoder.c_str();
config_.model_config.tokens = tokens.c_str();
config_.model_config.num_threads = 1;
config_.model_config.debug = 1;
config_.model_config.model_type = "whisper";

config_.decoding_method = "greedy_search";
config_.max_active_paths = 4;

recognizer_ = CreateOfflineRecognizer(&config_);
}

void CNonStreamingSpeechRecognitionDlg::InitParaformer() {
std::string paraformer = "./paraformer.onnx";
std::string tokens = "./tokens.txt";
Expand Down Expand Up @@ -401,6 +468,11 @@ void CNonStreamingSpeechRecognitionDlg::InitRecognizer() {
return;
}

if (Exists("./whisper-encoder.onnx") || Exists("./whisper-encoder.int8.onnx")) {
InitWhisper();
return;
}

// assume it is transducer

std::string encoder = "./encoder.onnx";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,5 +69,6 @@ class CNonStreamingSpeechRecognitionDlg : public CDialogEx {
void InitRecognizer();

void InitParaformer();
void InitWhisper();
void ShowInitRecognizerHelpMessage();
};
Loading

0 comments on commit bc791d4

Please sign in to comment.