feat: Add Speech to Text feature

osl-incubator · Feb 20, 2024 · ffdf895 · ffdf895
1 parent af3c6d1
commit ffdf895
Show file tree

Hide file tree

Showing 12 changed files with 686 additions and 179 deletions.
diff --git a/README.md b/README.md
@@ -31,17 +31,17 @@ $ mkdir /tmp/artbox
 
 ### Convert text to audio
 
-By default, the `artbox voice` uses
+By default, the `artbox speech` uses
 [`edge-tts`](https://pypi.org/project/edge-tts/) engine, but if you can also
 specify [`gtts`](https://github.com/pndurette/gTTS) with the flag
 `--engine gtts`.
 
 ```bash
 $ echo "Are you ready to join Link and Zelda in fighting off this unprecedented threat to Hyrule?" > /tmp/artbox/text.md
-$ artbox voice text-to-speech \
+$ artbox speech text-to-speech \
     --title artbox \
     --text-path /tmp/artbox/text.md \
-    --output-path /tmp/artbox/voice.mp3 \
+    --output-path /tmp/artbox/speech.mp3 \
     --engine edge-tts
 ```
 
@@ -50,10 +50,10 @@ If you need to generate the audio for different language, you can use the flag
 
 ```bash
 $ echo "Bom dia, mundo!" > /tmp/artbox/text.md
-$ artbox voice text-to-speech \
+$ artbox speech text-to-speech \
     --title artbox \
     --text-path /tmp/artbox/text.md \
-    --output-path /tmp/artbox/voice.mp3 \
+    --output-path /tmp/artbox/speech.mp3 \
     --lang pt
 ```
 
@@ -62,10 +62,10 @@ locale for that language, for example:
 
 ```bash
 $ echo "Are you ready to join Link and Zelda in fighting off this unprecedented threat to Hyrule?" > /tmp/artbox/text.md
-$ artbox voice text-to-speech \
+$ artbox speech text-to-speech \
     --title artbox \
     --text-path /tmp/artbox/text.md \
-    --output-path /tmp/artbox/voice.mp3 \
+    --output-path /tmp/artbox/speech.mp3 \
     --engine edge-tts \
     --lang en-IN
 ```
@@ -75,10 +75,10 @@ and `--pitch`, for example:
 
 ```bash
 $ echo "Do you want some coffee?" > /tmp/artbox/text.md
-$ artbox voice text-to-speech \
+$ artbox speech text-to-speech \
     --title artbox \
     --text-path /tmp/artbox/text.md \
-    --output-path /tmp/artbox/voice.mp3 \
+    --output-path /tmp/artbox/speech.mp3 \
     --engine edge-tts \
     --lang en \
     --rate +10% \

diff --git a/docs/changelog.md b/docs/changelog.md
@@ -58,7 +58,7 @@
 
 ### Features
 
-- Add engine options for Voice class. ([#6](https://github.com/ggpedia/artbox/issues/6)) ([d4381f7](https://github.com/ggpedia/artbox/commit/d4381f781a98ffb51fb103d671c5a9115bb3f6d1))
+- Add engine options for Speech class. ([#6](https://github.com/ggpedia/artbox/issues/6)) ([d4381f7](https://github.com/ggpedia/artbox/commit/d4381f781a98ffb51fb103d671c5a9115bb3f6d1))
 
 # [0.2.0](https://github.com/ggpedia/artbox/compare/0.1.0...0.2.0) (2023-08-29)
 
@@ -69,4 +69,4 @@
 
 ### Features
 
-- Add the flag `--lang` for the voice command ([#2](https://github.com/ggpedia/artbox/issues/2)) ([cb937e9](https://github.com/ggpedia/artbox/commit/cb937e9e7a9de5a19b3dc4dc8d34f6daf4ba6304))
+- Add the flag `--lang` for the speech command ([#2](https://github.com/ggpedia/artbox/issues/2)) ([cb937e9](https://github.com/ggpedia/artbox/commit/cb937e9e7a9de5a19b3dc4dc8d34f6daf4ba6304))
diff --git a/docs/index.md b/docs/index.md
@@ -31,17 +31,17 @@ $ mkdir /tmp/artbox
 
 ### Convert text to audio
 
-By default, the `artbox voice` uses
+By default, the `artbox speech` uses
 [`edge-tts`](https://pypi.org/project/edge-tts/) engine, but if you can also
 specify [`gtts`](https://github.com/pndurette/gTTS) with the flag
 `--engine gtts`.
 
 ```bash
 $ echo "Are you ready to join Link and Zelda in fighting off this unprecedented threat to Hyrule?" > /tmp/artbox/text.md
-$ artbox voice text-to-speech \
+$ artbox speech text-to-speech \
     --title artbox \
     --text-path /tmp/artbox/text.md \
-    --output-path /tmp/artbox/voice.mp3 \
+    --output-path /tmp/artbox/speech.mp3 \
     --engine edge-tts
 ```
 
@@ -50,10 +50,10 @@ If you need to generate the audio for different language, you can use the flag
 
 ```bash
 $ echo "Bom dia, mundo!" > /tmp/artbox/text.md
-$ artbox voice text-to-speech \
+$ artbox speech text-to-speech \
     --title artbox \
     --text-path /tmp/artbox/text.md \
-    --output-path /tmp/artbox/voice.mp3 \
+    --output-path /tmp/artbox/speech.mp3 \
     --lang pt
 ```
 
@@ -62,10 +62,10 @@ locale for that language, for example:
 
 ```bash
 $ echo "Are you ready to join Link and Zelda in fighting off this unprecedented threat to Hyrule?" > /tmp/artbox/text.md
-$ artbox voice text-to-speech \
+$ artbox speech text-to-speech \
     --title artbox \
     --text-path /tmp/artbox/text.md \
-    --output-path /tmp/artbox/voice.mp3 \
+    --output-path /tmp/artbox/speech.mp3 \
     --engine edge-tts \
     --lang en-IN
 ```
@@ -75,10 +75,10 @@ and `--pitch`, for example:
 
 ```bash
 $ echo "Do you want some coffee?" > /tmp/artbox/text.md
-$ artbox voice text-to-speech \
+$ artbox speech text-to-speech \
     --title artbox \
     --text-path /tmp/artbox/text.md \
-    --output-path /tmp/artbox/voice.mp3 \
+    --output-path /tmp/artbox/speech.mp3 \
     --engine edge-tts \
     --lang en \
     --rate +10% \