From be07f5e6077e5219b9901ad200b5b7145096a385 Mon Sep 17 00:00:00 2001 From: whp98 <32877126+whp98@users.noreply.github.com> Date: Mon, 5 Aug 2024 11:08:02 +0800 Subject: [PATCH] update --- ...63\350\257\206\345\210\253fast-whisper.md" | 30 +++++++++++++++++++ ...06\345\214\226\345\244\204\347\220\206.md" | 11 +++++++ 2 files changed, 41 insertions(+) create mode 100644 "docs/python\347\233\270\345\205\263/python\350\257\255\351\237\263\350\257\206\345\210\253fast-whisper.md" create mode 100644 "docs/\345\205\264\350\266\243/ffmpeg\345\243\260\351\237\263\346\240\207\345\207\206\345\214\226\345\244\204\347\220\206.md" diff --git "a/docs/python\347\233\270\345\205\263/python\350\257\255\351\237\263\350\257\206\345\210\253fast-whisper.md" "b/docs/python\347\233\270\345\205\263/python\350\257\255\351\237\263\350\257\206\345\210\253fast-whisper.md" new file mode 100644 index 000000000..b7afea1d6 --- /dev/null +++ "b/docs/python\347\233\270\345\205\263/python\350\257\255\351\237\263\350\257\206\345\210\253fast-whisper.md" @@ -0,0 +1,30 @@ +# python语音识别fast-whisper + + +## 语音识别模型下载 + +```shell +git clone https://huggingface.co/Systran/faster-whisper-large-v3 +``` + + +## 相关使用代码如下 + + +```python +from faster_whisper import WhisperModel + +def wisper_generate(audio_path): + path = "./faster-whisper-medium" + model = WhisperModel(model_size_or_path=path, device="auto", compute_type="int8", + cpu_threads=12) + segments, info = model.transcribe(audio=audio_path, + vad_filter=True, + vad_parameters=dict(min_silence_duration_ms=1000)) + print("Detected language '%s' with probability %f" % (info.language, info.language_probability)) + return segments,info +if __name__ == "__main__": + wisper_generate('./test-cn.mp3') + wisper_generate('./test-en.mp3') + wisper_generate('./test-ja.wav') +``` \ No newline at end of file diff --git "a/docs/\345\205\264\350\266\243/ffmpeg\345\243\260\351\237\263\346\240\207\345\207\206\345\214\226\345\244\204\347\220\206.md" "b/docs/\345\205\264\350\266\243/ffmpeg\345\243\260\351\237\263\346\240\207\345\207\206\345\214\226\345\244\204\347\220\206.md" new file mode 100644 index 000000000..012d85b17 --- /dev/null +++ "b/docs/\345\205\264\350\266\243/ffmpeg\345\243\260\351\237\263\346\240\207\345\207\206\345\214\226\345\244\204\347\220\206.md" @@ -0,0 +1,11 @@ +# ffmpeg-normalize 声音标准化处理 + + +## 安装ffmpeg-normalize +```shell +pip3 install ffmpeg-normalize +``` +## 使用ffmpeg-normalize标准化声音到-14LUFS +```shell +ffmpeg-normalize ./output_temp.mp3 -o ./output.mp3 -ar 44100 --target -14 --loudness-range 50 --dual-mono -c:a libmp3lame -b:a 192k -pr --dynamic -f +``` \ No newline at end of file