diff --git a/README.md b/README.md
index 33f3300..a64784b 100644
--- a/README.md
+++ b/README.md
@@ -31,17 +31,17 @@ $ mkdir /tmp/artbox
 
 ### Convert text to audio
 
-By default, the `artbox voice` uses
+By default, the `artbox speech` uses
 [`edge-tts`](https://pypi.org/project/edge-tts/) engine, but if you can also
 specify [`gtts`](https://github.com/pndurette/gTTS) with the flag
 `--engine gtts`.
 
 ```bash
 $ echo "Are you ready to join Link and Zelda in fighting off this unprecedented threat to Hyrule?" > /tmp/artbox/text.md
-$ artbox voice text-to-speech \
+$ artbox speech text-to-speech \
     --title artbox \
     --text-path /tmp/artbox/text.md \
-    --output-path /tmp/artbox/voice.mp3 \
+    --output-path /tmp/artbox/speech.mp3 \
     --engine edge-tts
 ```
 
@@ -50,10 +50,10 @@ If you need to generate the audio for different language, you can use the flag
 
 ```bash
 $ echo "Bom dia, mundo!" > /tmp/artbox/text.md
-$ artbox voice text-to-speech \
+$ artbox speech text-to-speech \
     --title artbox \
     --text-path /tmp/artbox/text.md \
-    --output-path /tmp/artbox/voice.mp3 \
+    --output-path /tmp/artbox/speech.mp3 \
     --lang pt
 ```
 
@@ -62,10 +62,10 @@ locale for that language, for example:
 
 ```bash
 $ echo "Are you ready to join Link and Zelda in fighting off this unprecedented threat to Hyrule?" > /tmp/artbox/text.md
-$ artbox voice text-to-speech \
+$ artbox speech text-to-speech \
     --title artbox \
     --text-path /tmp/artbox/text.md \
-    --output-path /tmp/artbox/voice.mp3 \
+    --output-path /tmp/artbox/speech.mp3 \
     --engine edge-tts \
     --lang en-IN
 ```
@@ -75,10 +75,10 @@ and `--pitch`, for example:
 
 ```bash
 $ echo "Do you want some coffee?" > /tmp/artbox/text.md
-$ artbox voice text-to-speech \
+$ artbox speech text-to-speech \
     --title artbox \
     --text-path /tmp/artbox/text.md \
-    --output-path /tmp/artbox/voice.mp3 \
+    --output-path /tmp/artbox/speech.mp3 \
     --engine edge-tts \
     --lang en \
     --rate +10% \
diff --git a/docs/changelog.md b/docs/changelog.md
index 35e70bf..55c3d6c 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -58,7 +58,7 @@
 
 ### Features
 
-- Add engine options for Voice class. ([#6](https://github.com/ggpedia/artbox/issues/6)) ([d4381f7](https://github.com/ggpedia/artbox/commit/d4381f781a98ffb51fb103d671c5a9115bb3f6d1))
+- Add engine options for Speech class. ([#6](https://github.com/ggpedia/artbox/issues/6)) ([d4381f7](https://github.com/ggpedia/artbox/commit/d4381f781a98ffb51fb103d671c5a9115bb3f6d1))
 
 # [0.2.0](https://github.com/ggpedia/artbox/compare/0.1.0...0.2.0) (2023-08-29)
 
@@ -69,4 +69,4 @@
 
 ### Features
 
-- Add the flag `--lang` for the voice command ([#2](https://github.com/ggpedia/artbox/issues/2)) ([cb937e9](https://github.com/ggpedia/artbox/commit/cb937e9e7a9de5a19b3dc4dc8d34f6daf4ba6304))
+- Add the flag `--lang` for the speech command ([#2](https://github.com/ggpedia/artbox/issues/2)) ([cb937e9](https://github.com/ggpedia/artbox/commit/cb937e9e7a9de5a19b3dc4dc8d34f6daf4ba6304))
diff --git a/docs/index.md b/docs/index.md
index 2b78971..c4afe44 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -31,17 +31,17 @@ $ mkdir /tmp/artbox
 
 ### Convert text to audio
 
-By default, the `artbox voice` uses
+By default, the `artbox speech` uses
 [`edge-tts`](https://pypi.org/project/edge-tts/) engine, but if you can also
 specify [`gtts`](https://github.com/pndurette/gTTS) with the flag
 `--engine gtts`.
 
 ```bash
 $ echo "Are you ready to join Link and Zelda in fighting off this unprecedented threat to Hyrule?" > /tmp/artbox/text.md
-$ artbox voice text-to-speech \
+$ artbox speech text-to-speech \
     --title artbox \
     --text-path /tmp/artbox/text.md \
-    --output-path /tmp/artbox/voice.mp3 \
+    --output-path /tmp/artbox/speech.mp3 \
     --engine edge-tts
 ```
 
@@ -50,10 +50,10 @@ If you need to generate the audio for different language, you can use the flag
 
 ```bash
 $ echo "Bom dia, mundo!" > /tmp/artbox/text.md
-$ artbox voice text-to-speech \
+$ artbox speech text-to-speech \
     --title artbox \
     --text-path /tmp/artbox/text.md \
-    --output-path /tmp/artbox/voice.mp3 \
+    --output-path /tmp/artbox/speech.mp3 \
     --lang pt
 ```
 
@@ -62,10 +62,10 @@ locale for that language, for example:
 
 ```bash
 $ echo "Are you ready to join Link and Zelda in fighting off this unprecedented threat to Hyrule?" > /tmp/artbox/text.md
-$ artbox voice text-to-speech \
+$ artbox speech text-to-speech \
     --title artbox \
     --text-path /tmp/artbox/text.md \
-    --output-path /tmp/artbox/voice.mp3 \
+    --output-path /tmp/artbox/speech.mp3 \
     --engine edge-tts \
     --lang en-IN
 ```
@@ -75,10 +75,10 @@ and `--pitch`, for example:
 
 ```bash
 $ echo "Do you want some coffee?" > /tmp/artbox/text.md
-$ artbox voice text-to-speech \
+$ artbox speech text-to-speech \
     --title artbox \
     --text-path /tmp/artbox/text.md \
-    --output-path /tmp/artbox/voice.mp3 \
+    --output-path /tmp/artbox/speech.mp3 \
     --engine edge-tts \
     --lang en \
     --rate +10% \
diff --git a/poetry.lock b/poetry.lock
index 5a97ec4..f6f9602 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -412,6 +412,17 @@ webencodings = "*"
 [package.extras]
 css = ["tinycss2 (>=1.1.0,<1.3)"]
 
+[[package]]
+name = "cachetools"
+version = "5.3.2"
+description = "Extensible memoizing collections and decorators"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "cachetools-5.3.2-py3-none-any.whl", hash = "sha256:861f35a13a451f94e301ce2bec7cac63e881232ccce7ed67fab9b5df4d3beaa1"},
+    {file = "cachetools-5.3.2.tar.gz", hash = "sha256:086ee420196f7b2ab9ca2db2520aca326318b68fe5ba8bc4d49cca91add450f2"},
+]
+
 [[package]]
 name = "certifi"
 version = "2023.7.22"
@@ -1132,6 +1143,93 @@ python-dateutil = ">=2.8.1"
 [package.extras]
 dev = ["flake8", "markdown", "twine", "wheel"]
 
+[[package]]
+name = "google-api-core"
+version = "2.17.1"
+description = "Google API client core library"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "google-api-core-2.17.1.tar.gz", hash = "sha256:9df18a1f87ee0df0bc4eea2770ebc4228392d8cc4066655b320e2cfccb15db95"},
+    {file = "google_api_core-2.17.1-py3-none-any.whl", hash = "sha256:610c5b90092c360736baccf17bd3efbcb30dd380e7a6dc28a71059edb8bd0d8e"},
+]
+
+[package.dependencies]
+google-auth = ">=2.14.1,<3.0.dev0"
+googleapis-common-protos = ">=1.56.2,<2.0.dev0"
+grpcio = [
+    {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""},
+    {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""},
+]
+grpcio-status = [
+    {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""},
+    {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""},
+]
+protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0"
+requests = ">=2.18.0,<3.0.0.dev0"
+
+[package.extras]
+grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"]
+grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"]
+grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"]
+
+[[package]]
+name = "google-auth"
+version = "2.28.0"
+description = "Google Authentication Library"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "google-auth-2.28.0.tar.gz", hash = "sha256:3cfc1b6e4e64797584fb53fc9bd0b7afa9b7c0dba2004fa7dcc9349e58cc3195"},
+    {file = "google_auth-2.28.0-py2.py3-none-any.whl", hash = "sha256:7634d29dcd1e101f5226a23cbc4a0c6cda6394253bf80e281d9c5c6797869c53"},
+]
+
+[package.dependencies]
+cachetools = ">=2.0.0,<6.0"
+pyasn1-modules = ">=0.2.1"
+rsa = ">=3.1.4,<5"
+
+[package.extras]
+aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"]
+enterprise-cert = ["cryptography (==36.0.2)", "pyopenssl (==22.0.0)"]
+pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"]
+reauth = ["pyu2f (>=0.1.5)"]
+requests = ["requests (>=2.20.0,<3.0.0.dev0)"]
+
+[[package]]
+name = "google-cloud-speech"
+version = "2.24.1"
+description = "Google Cloud Speech API client library"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "google-cloud-speech-2.24.1.tar.gz", hash = "sha256:68f5b99b5719357a5b89609442cd74e101fefeb56e79a9d6d321394c27605a84"},
+    {file = "google_cloud_speech-2.24.1-py2.py3-none-any.whl", hash = "sha256:4931b7e8edc662e2bf7baa36f5dc5145890d639961ebec9a5ca63fc854562476"},
+]
+
+[package.dependencies]
+google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]}
+google-auth = ">=2.14.1,<3.0.0dev"
+proto-plus = ">=1.22.3,<2.0.0dev"
+protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev"
+
+[[package]]
+name = "googleapis-common-protos"
+version = "1.62.0"
+description = "Common protobufs used in Google APIs"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "googleapis-common-protos-1.62.0.tar.gz", hash = "sha256:83f0ece9f94e5672cced82f592d2a5edf527a96ed1794f0bab36d5735c996277"},
+    {file = "googleapis_common_protos-1.62.0-py2.py3-none-any.whl", hash = "sha256:4750113612205514f9f6aa4cb00d523a94f3e8c06c5ad2fee466387dc4875f07"},
+]
+
+[package.dependencies]
+protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0"
+
+[package.extras]
+grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"]
+
 [[package]]
 name = "griffe"
 version = "0.40.0"
@@ -1146,6 +1244,88 @@ files = [
 [package.dependencies]
 colorama = ">=0.4"
 
+[[package]]
+name = "grpcio"
+version = "1.60.1"
+description = "HTTP/2-based RPC framework"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "grpcio-1.60.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:14e8f2c84c0832773fb3958240c69def72357bc11392571f87b2d7b91e0bb092"},
+    {file = "grpcio-1.60.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:33aed0a431f5befeffd9d346b0fa44b2c01aa4aeae5ea5b2c03d3e25e0071216"},
+    {file = "grpcio-1.60.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:fead980fbc68512dfd4e0c7b1f5754c2a8e5015a04dea454b9cada54a8423525"},
+    {file = "grpcio-1.60.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:082081e6a36b6eb5cf0fd9a897fe777dbb3802176ffd08e3ec6567edd85bc104"},
+    {file = "grpcio-1.60.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55ccb7db5a665079d68b5c7c86359ebd5ebf31a19bc1a91c982fd622f1e31ff2"},
+    {file = "grpcio-1.60.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9b54577032d4f235452f77a83169b6527bf4b77d73aeada97d45b2aaf1bf5ce0"},
+    {file = "grpcio-1.60.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7d142bcd604166417929b071cd396aa13c565749a4c840d6c702727a59d835eb"},
+    {file = "grpcio-1.60.1-cp310-cp310-win32.whl", hash = "sha256:2a6087f234cb570008a6041c8ffd1b7d657b397fdd6d26e83d72283dae3527b1"},
+    {file = "grpcio-1.60.1-cp310-cp310-win_amd64.whl", hash = "sha256:f2212796593ad1d0235068c79836861f2201fc7137a99aa2fea7beeb3b101177"},
+    {file = "grpcio-1.60.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:79ae0dc785504cb1e1788758c588c711f4e4a0195d70dff53db203c95a0bd303"},
+    {file = "grpcio-1.60.1-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:4eec8b8c1c2c9b7125508ff7c89d5701bf933c99d3910e446ed531cd16ad5d87"},
+    {file = "grpcio-1.60.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:8c9554ca8e26241dabe7951aa1fa03a1ba0856688ecd7e7bdbdd286ebc272e4c"},
+    {file = "grpcio-1.60.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91422ba785a8e7a18725b1dc40fbd88f08a5bb4c7f1b3e8739cab24b04fa8a03"},
+    {file = "grpcio-1.60.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cba6209c96828711cb7c8fcb45ecef8c8859238baf15119daa1bef0f6c84bfe7"},
+    {file = "grpcio-1.60.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c71be3f86d67d8d1311c6076a4ba3b75ba5703c0b856b4e691c9097f9b1e8bd2"},
+    {file = "grpcio-1.60.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:af5ef6cfaf0d023c00002ba25d0751e5995fa0e4c9eec6cd263c30352662cbce"},
+    {file = "grpcio-1.60.1-cp311-cp311-win32.whl", hash = "sha256:a09506eb48fa5493c58f946c46754ef22f3ec0df64f2b5149373ff31fb67f3dd"},
+    {file = "grpcio-1.60.1-cp311-cp311-win_amd64.whl", hash = "sha256:49c9b6a510e3ed8df5f6f4f3c34d7fbf2d2cae048ee90a45cd7415abab72912c"},
+    {file = "grpcio-1.60.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:b58b855d0071575ea9c7bc0d84a06d2edfbfccec52e9657864386381a7ce1ae9"},
+    {file = "grpcio-1.60.1-cp312-cp312-macosx_10_10_universal2.whl", hash = "sha256:a731ac5cffc34dac62053e0da90f0c0b8560396a19f69d9703e88240c8f05858"},
+    {file = "grpcio-1.60.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:cf77f8cf2a651fbd869fbdcb4a1931464189cd210abc4cfad357f1cacc8642a6"},
+    {file = "grpcio-1.60.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c557e94e91a983e5b1e9c60076a8fd79fea1e7e06848eb2e48d0ccfb30f6e073"},
+    {file = "grpcio-1.60.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:069fe2aeee02dfd2135d562d0663fe70fbb69d5eed6eb3389042a7e963b54de8"},
+    {file = "grpcio-1.60.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb0af13433dbbd1c806e671d81ec75bd324af6ef75171fd7815ca3074fe32bfe"},
+    {file = "grpcio-1.60.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2f44c32aef186bbba254129cea1df08a20be414144ac3bdf0e84b24e3f3b2e05"},
+    {file = "grpcio-1.60.1-cp312-cp312-win32.whl", hash = "sha256:a212e5dea1a4182e40cd3e4067ee46be9d10418092ce3627475e995cca95de21"},
+    {file = "grpcio-1.60.1-cp312-cp312-win_amd64.whl", hash = "sha256:6e490fa5f7f5326222cb9f0b78f207a2b218a14edf39602e083d5f617354306f"},
+    {file = "grpcio-1.60.1-cp37-cp37m-linux_armv7l.whl", hash = "sha256:4216e67ad9a4769117433814956031cb300f85edc855252a645a9a724b3b6594"},
+    {file = "grpcio-1.60.1-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:73e14acd3d4247169955fae8fb103a2b900cfad21d0c35f0dcd0fdd54cd60367"},
+    {file = "grpcio-1.60.1-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:6ecf21d20d02d1733e9c820fb5c114c749d888704a7ec824b545c12e78734d1c"},
+    {file = "grpcio-1.60.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:33bdea30dcfd4f87b045d404388469eb48a48c33a6195a043d116ed1b9a0196c"},
+    {file = "grpcio-1.60.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53b69e79d00f78c81eecfb38f4516080dc7f36a198b6b37b928f1c13b3c063e9"},
+    {file = "grpcio-1.60.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:39aa848794b887120b1d35b1b994e445cc028ff602ef267f87c38122c1add50d"},
+    {file = "grpcio-1.60.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:72153a0d2e425f45b884540a61c6639436ddafa1829a42056aa5764b84108b8e"},
+    {file = "grpcio-1.60.1-cp37-cp37m-win_amd64.whl", hash = "sha256:50d56280b482875d1f9128ce596e59031a226a8b84bec88cb2bf76c289f5d0de"},
+    {file = "grpcio-1.60.1-cp38-cp38-linux_armv7l.whl", hash = "sha256:6d140bdeb26cad8b93c1455fa00573c05592793c32053d6e0016ce05ba267549"},
+    {file = "grpcio-1.60.1-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:bc808924470643b82b14fe121923c30ec211d8c693e747eba8a7414bc4351a23"},
+    {file = "grpcio-1.60.1-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:70c83bb530572917be20c21f3b6be92cd86b9aecb44b0c18b1d3b2cc3ae47df0"},
+    {file = "grpcio-1.60.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9b106bc52e7f28170e624ba61cc7dc6829566e535a6ec68528f8e1afbed1c41f"},
+    {file = "grpcio-1.60.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30e980cd6db1088c144b92fe376747328d5554bc7960ce583ec7b7d81cd47287"},
+    {file = "grpcio-1.60.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0c5807e9152eff15f1d48f6b9ad3749196f79a4a050469d99eecb679be592acc"},
+    {file = "grpcio-1.60.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f1c3dc536b3ee124e8b24feb7533e5c70b9f2ef833e3b2e5513b2897fd46763a"},
+    {file = "grpcio-1.60.1-cp38-cp38-win32.whl", hash = "sha256:d7404cebcdb11bb5bd40bf94131faf7e9a7c10a6c60358580fe83913f360f929"},
+    {file = "grpcio-1.60.1-cp38-cp38-win_amd64.whl", hash = "sha256:c8754c75f55781515a3005063d9a05878b2cfb3cb7e41d5401ad0cf19de14872"},
+    {file = "grpcio-1.60.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:0250a7a70b14000fa311de04b169cc7480be6c1a769b190769d347939d3232a8"},
+    {file = "grpcio-1.60.1-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:660fc6b9c2a9ea3bb2a7e64ba878c98339abaf1811edca904ac85e9e662f1d73"},
+    {file = "grpcio-1.60.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:76eaaba891083fcbe167aa0f03363311a9f12da975b025d30e94b93ac7a765fc"},
+    {file = "grpcio-1.60.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5d97c65ea7e097056f3d1ead77040ebc236feaf7f71489383d20f3b4c28412a"},
+    {file = "grpcio-1.60.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb2a2911b028f01c8c64d126f6b632fcd8a9ac975aa1b3855766c94e4107180"},
+    {file = "grpcio-1.60.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:5a1ebbae7e2214f51b1f23b57bf98eeed2cf1ba84e4d523c48c36d5b2f8829ff"},
+    {file = "grpcio-1.60.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9a66f4d2a005bc78e61d805ed95dedfcb35efa84b7bba0403c6d60d13a3de2d6"},
+    {file = "grpcio-1.60.1-cp39-cp39-win32.whl", hash = "sha256:8d488fbdbf04283f0d20742b64968d44825617aa6717b07c006168ed16488804"},
+    {file = "grpcio-1.60.1-cp39-cp39-win_amd64.whl", hash = "sha256:61b7199cd2a55e62e45bfb629a35b71fc2c0cb88f686a047f25b1112d3810904"},
+    {file = "grpcio-1.60.1.tar.gz", hash = "sha256:dd1d3a8d1d2e50ad9b59e10aa7f07c7d1be2b367f3f2d33c5fade96ed5460962"},
+]
+
+[package.extras]
+protobuf = ["grpcio-tools (>=1.60.1)"]
+
+[[package]]
+name = "grpcio-status"
+version = "1.60.1"
+description = "Status proto mapping for gRPC"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "grpcio-status-1.60.1.tar.gz", hash = "sha256:61b5aab8989498e8aa142c20b88829ea5d90d18c18c853b9f9e6d407d37bf8b4"},
+    {file = "grpcio_status-1.60.1-py3-none-any.whl", hash = "sha256:3034fdb239185b6e0f3169d08c268c4507481e4b8a434c21311a03d9eb5889a0"},
+]
+
+[package.dependencies]
+googleapis-common-protos = ">=1.5.5"
+grpcio = ">=1.60.1"
+protobuf = ">=4.21.6"
+
 [[package]]
 name = "gtts"
 version = "2.5.1"
@@ -3355,6 +3535,43 @@ files = [
 [package.dependencies]
 wcwidth = "*"
 
+[[package]]
+name = "proto-plus"
+version = "1.23.0"
+description = "Beautiful, Pythonic protocol buffers."
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "proto-plus-1.23.0.tar.gz", hash = "sha256:89075171ef11988b3fa157f5dbd8b9cf09d65fffee97e29ce403cd8defba19d2"},
+    {file = "proto_plus-1.23.0-py3-none-any.whl", hash = "sha256:a829c79e619e1cf632de091013a4173deed13a55f326ef84f05af6f50ff4c82c"},
+]
+
+[package.dependencies]
+protobuf = ">=3.19.0,<5.0.0dev"
+
+[package.extras]
+testing = ["google-api-core[grpc] (>=1.31.5)"]
+
+[[package]]
+name = "protobuf"
+version = "4.25.3"
+description = ""
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "protobuf-4.25.3-cp310-abi3-win32.whl", hash = "sha256:d4198877797a83cbfe9bffa3803602bbe1625dc30d8a097365dbc762e5790faa"},
+    {file = "protobuf-4.25.3-cp310-abi3-win_amd64.whl", hash = "sha256:209ba4cc916bab46f64e56b85b090607a676f66b473e6b762e6f1d9d591eb2e8"},
+    {file = "protobuf-4.25.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:f1279ab38ecbfae7e456a108c5c0681e4956d5b1090027c1de0f934dfdb4b35c"},
+    {file = "protobuf-4.25.3-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:e7cb0ae90dd83727f0c0718634ed56837bfeeee29a5f82a7514c03ee1364c019"},
+    {file = "protobuf-4.25.3-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:7c8daa26095f82482307bc717364e7c13f4f1c99659be82890dcfc215194554d"},
+    {file = "protobuf-4.25.3-cp38-cp38-win32.whl", hash = "sha256:f4f118245c4a087776e0a8408be33cf09f6c547442c00395fbfb116fac2f8ac2"},
+    {file = "protobuf-4.25.3-cp38-cp38-win_amd64.whl", hash = "sha256:c053062984e61144385022e53678fbded7aea14ebb3e0305ae3592fb219ccfa4"},
+    {file = "protobuf-4.25.3-cp39-cp39-win32.whl", hash = "sha256:19b270aeaa0099f16d3ca02628546b8baefe2955bbe23224aaf856134eccf1e4"},
+    {file = "protobuf-4.25.3-cp39-cp39-win_amd64.whl", hash = "sha256:e3c97a1555fd6388f857770ff8b9703083de6bf1f9274a002a332d65fbb56c8c"},
+    {file = "protobuf-4.25.3-py3-none-any.whl", hash = "sha256:f0700d54bcf45424477e46a9f0944155b46fb0639d69728739c0e47bab83f2b9"},
+    {file = "protobuf-4.25.3.tar.gz", hash = "sha256:25b5d0b42fd000320bd7830b349e3b696435f3b329810427a6bcce6a5492cc5c"},
+]
+
 [[package]]
 name = "psutil"
 version = "5.9.8"
@@ -3408,6 +3625,31 @@ files = [
 [package.extras]
 tests = ["pytest"]
 
+[[package]]
+name = "pyasn1"
+version = "0.5.1"
+description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
+files = [
+    {file = "pyasn1-0.5.1-py2.py3-none-any.whl", hash = "sha256:4439847c58d40b1d0a573d07e3856e95333f1976294494c325775aeca506eb58"},
+    {file = "pyasn1-0.5.1.tar.gz", hash = "sha256:6d391a96e59b23130a5cfa74d6fd7f388dbbe26cc8f1edf39fdddf08d9d6676c"},
+]
+
+[[package]]
+name = "pyasn1-modules"
+version = "0.3.0"
+description = "A collection of ASN.1-based protocols modules"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
+files = [
+    {file = "pyasn1_modules-0.3.0-py2.py3-none-any.whl", hash = "sha256:d3ccd6ed470d9ffbc716be08bd90efbd44d0734bc9303818f7336070984a162d"},
+    {file = "pyasn1_modules-0.3.0.tar.gz", hash = "sha256:5bd01446b736eb9d31512a30d46c1ac3395d676c6f3cafa4c03eb54b9925631c"},
+]
+
+[package.dependencies]
+pyasn1 = ">=0.4.6,<0.6.0"
+
 [[package]]
 name = "pycairo"
 version = "1.25.1"
@@ -3802,6 +4044,7 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
     {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
     {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
     {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@@ -4347,6 +4590,20 @@ files = [
     {file = "rpds_py-0.17.1.tar.gz", hash = "sha256:0210b2668f24c078307260bf88bdac9d6f1093635df5123789bfee4d8d7fc8e7"},
 ]
 
+[[package]]
+name = "rsa"
+version = "4.9"
+description = "Pure-Python RSA implementation"
+optional = false
+python-versions = ">=3.6,<4"
+files = [
+    {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"},
+    {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"},
+]
+
+[package.dependencies]
+pyasn1 = ">=0.1.3"
+
 [[package]]
 name = "ruff"
 version = "0.1.15"
@@ -4603,6 +4860,34 @@ numpy = "*"
 docs = ["linkify-it-py", "myst-parser", "sphinx", "sphinx-book-theme"]
 test = ["pytest"]
 
+[[package]]
+name = "speechrecognition"
+version = "3.10.1"
+description = "Library for performing speech recognition, with support for several engines and APIs, online and offline."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "SpeechRecognition-3.10.1-py2.py3-none-any.whl", hash = "sha256:69898b2b0bbd5bc65647c5ef833fe318ad74f02e8a6af5c9fd671933083660bc"},
+    {file = "SpeechRecognition-3.10.1.tar.gz", hash = "sha256:718731886b7836e20a06b9a2c6cace12a9e130971bb6af1b1dd130b22bad9f82"},
+]
+
+[package.dependencies]
+requests = ">=2.26.0"
+typing-extensions = "*"
+
+[package.extras]
+whisper-api = ["openai"]
+
+[[package]]
+name = "srt"
+version = "3.5.3"
+description = "A tiny library for parsing, modifying, and composing SRT files."
+optional = false
+python-versions = ">=2.7"
+files = [
+    {file = "srt-3.5.3.tar.gz", hash = "sha256:4884315043a4f0740fd1f878ed6caa376ac06d70e135f306a6dc44632eed0cc0"},
+]
+
 [[package]]
 name = "stack-data"
 version = "0.6.3"
@@ -4871,6 +5156,26 @@ platformdirs = ">=3.9.1,<5"
 docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
 test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"]
 
+[[package]]
+name = "vosk"
+version = "0.3.45"
+description = "Offline open source speech recognition API based on Kaldi and Vosk"
+optional = false
+python-versions = ">=3"
+files = [
+    {file = "vosk-0.3.45-py3-none-linux_armv7l.whl", hash = "sha256:4221f83287eefe5abbe54fc6f1da5774e9e3ffcbbdca1705a466b341093b072e"},
+    {file = "vosk-0.3.45-py3-none-manylinux2014_aarch64.whl", hash = "sha256:54efb47dd890e544e9e20f0316413acec7f8680d04ec095c6140ab4e70262704"},
+    {file = "vosk-0.3.45-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:25e025093c4399d7278f543568ed8cc5460ac3a4bf48c23673ace1e25d26619f"},
+    {file = "vosk-0.3.45-py3-none-win_amd64.whl", hash = "sha256:6994ddc68556c7e5730c3b6f6bad13320e3519b13ce3ed2aa25a86724e7c10ac"},
+]
+
+[package.dependencies]
+cffi = ">=1.0"
+requests = "*"
+srt = "*"
+tqdm = "*"
+websockets = "*"
+
 [[package]]
 name = "vulture"
 version = "2.11"
@@ -4977,6 +5282,87 @@ docs = ["Sphinx (>=6.0)", "sphinx-rtd-theme (>=1.1.0)"]
 optional = ["python-socks", "wsaccel"]
 test = ["websockets"]
 
+[[package]]
+name = "websockets"
+version = "12.0"
+description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "websockets-12.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d554236b2a2006e0ce16315c16eaa0d628dab009c33b63ea03f41c6107958374"},
+    {file = "websockets-12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2d225bb6886591b1746b17c0573e29804619c8f755b5598d875bb4235ea639be"},
+    {file = "websockets-12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eb809e816916a3b210bed3c82fb88eaf16e8afcf9c115ebb2bacede1797d2547"},
+    {file = "websockets-12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c588f6abc13f78a67044c6b1273a99e1cf31038ad51815b3b016ce699f0d75c2"},
+    {file = "websockets-12.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5aa9348186d79a5f232115ed3fa9020eab66d6c3437d72f9d2c8ac0c6858c558"},
+    {file = "websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6350b14a40c95ddd53e775dbdbbbc59b124a5c8ecd6fbb09c2e52029f7a9f480"},
+    {file = "websockets-12.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:70ec754cc2a769bcd218ed8d7209055667b30860ffecb8633a834dde27d6307c"},
+    {file = "websockets-12.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6e96f5ed1b83a8ddb07909b45bd94833b0710f738115751cdaa9da1fb0cb66e8"},
+    {file = "websockets-12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4d87be612cbef86f994178d5186add3d94e9f31cc3cb499a0482b866ec477603"},
+    {file = "websockets-12.0-cp310-cp310-win32.whl", hash = "sha256:befe90632d66caaf72e8b2ed4d7f02b348913813c8b0a32fae1cc5fe3730902f"},
+    {file = "websockets-12.0-cp310-cp310-win_amd64.whl", hash = "sha256:363f57ca8bc8576195d0540c648aa58ac18cf85b76ad5202b9f976918f4219cf"},
+    {file = "websockets-12.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5d873c7de42dea355d73f170be0f23788cf3fa9f7bed718fd2830eefedce01b4"},
+    {file = "websockets-12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3f61726cae9f65b872502ff3c1496abc93ffbe31b278455c418492016e2afc8f"},
+    {file = "websockets-12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed2fcf7a07334c77fc8a230755c2209223a7cc44fc27597729b8ef5425aa61a3"},
+    {file = "websockets-12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e332c210b14b57904869ca9f9bf4ca32f5427a03eeb625da9b616c85a3a506c"},
+    {file = "websockets-12.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5693ef74233122f8ebab026817b1b37fe25c411ecfca084b29bc7d6efc548f45"},
+    {file = "websockets-12.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e9e7db18b4539a29cc5ad8c8b252738a30e2b13f033c2d6e9d0549b45841c04"},
+    {file = "websockets-12.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6e2df67b8014767d0f785baa98393725739287684b9f8d8a1001eb2839031447"},
+    {file = "websockets-12.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bea88d71630c5900690fcb03161ab18f8f244805c59e2e0dc4ffadae0a7ee0ca"},
+    {file = "websockets-12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dff6cdf35e31d1315790149fee351f9e52978130cef6c87c4b6c9b3baf78bc53"},
+    {file = "websockets-12.0-cp311-cp311-win32.whl", hash = "sha256:3e3aa8c468af01d70332a382350ee95f6986db479ce7af14d5e81ec52aa2b402"},
+    {file = "websockets-12.0-cp311-cp311-win_amd64.whl", hash = "sha256:25eb766c8ad27da0f79420b2af4b85d29914ba0edf69f547cc4f06ca6f1d403b"},
+    {file = "websockets-12.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0e6e2711d5a8e6e482cacb927a49a3d432345dfe7dea8ace7b5790df5932e4df"},
+    {file = "websockets-12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:dbcf72a37f0b3316e993e13ecf32f10c0e1259c28ffd0a85cee26e8549595fbc"},
+    {file = "websockets-12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12743ab88ab2af1d17dd4acb4645677cb7063ef4db93abffbf164218a5d54c6b"},
+    {file = "websockets-12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b645f491f3c48d3f8a00d1fce07445fab7347fec54a3e65f0725d730d5b99cb"},
+    {file = "websockets-12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9893d1aa45a7f8b3bc4510f6ccf8db8c3b62120917af15e3de247f0780294b92"},
+    {file = "websockets-12.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f38a7b376117ef7aff996e737583172bdf535932c9ca021746573bce40165ed"},
+    {file = "websockets-12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f764ba54e33daf20e167915edc443b6f88956f37fb606449b4a5b10ba42235a5"},
+    {file = "websockets-12.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:1e4b3f8ea6a9cfa8be8484c9221ec0257508e3a1ec43c36acdefb2a9c3b00aa2"},
+    {file = "websockets-12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9fdf06fd06c32205a07e47328ab49c40fc1407cdec801d698a7c41167ea45113"},
+    {file = "websockets-12.0-cp312-cp312-win32.whl", hash = "sha256:baa386875b70cbd81798fa9f71be689c1bf484f65fd6fb08d051a0ee4e79924d"},
+    {file = "websockets-12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ae0a5da8f35a5be197f328d4727dbcfafa53d1824fac3d96cdd3a642fe09394f"},
+    {file = "websockets-12.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5f6ffe2c6598f7f7207eef9a1228b6f5c818f9f4d53ee920aacd35cec8110438"},
+    {file = "websockets-12.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9edf3fc590cc2ec20dc9d7a45108b5bbaf21c0d89f9fd3fd1685e223771dc0b2"},
+    {file = "websockets-12.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8572132c7be52632201a35f5e08348137f658e5ffd21f51f94572ca6c05ea81d"},
+    {file = "websockets-12.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:604428d1b87edbf02b233e2c207d7d528460fa978f9e391bd8aaf9c8311de137"},
+    {file = "websockets-12.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1a9d160fd080c6285e202327aba140fc9a0d910b09e423afff4ae5cbbf1c7205"},
+    {file = "websockets-12.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87b4aafed34653e465eb77b7c93ef058516cb5acf3eb21e42f33928616172def"},
+    {file = "websockets-12.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b2ee7288b85959797970114deae81ab41b731f19ebcd3bd499ae9ca0e3f1d2c8"},
+    {file = "websockets-12.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7fa3d25e81bfe6a89718e9791128398a50dec6d57faf23770787ff441d851967"},
+    {file = "websockets-12.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a571f035a47212288e3b3519944f6bf4ac7bc7553243e41eac50dd48552b6df7"},
+    {file = "websockets-12.0-cp38-cp38-win32.whl", hash = "sha256:3c6cc1360c10c17463aadd29dd3af332d4a1adaa8796f6b0e9f9df1fdb0bad62"},
+    {file = "websockets-12.0-cp38-cp38-win_amd64.whl", hash = "sha256:1bf386089178ea69d720f8db6199a0504a406209a0fc23e603b27b300fdd6892"},
+    {file = "websockets-12.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ab3d732ad50a4fbd04a4490ef08acd0517b6ae6b77eb967251f4c263011a990d"},
+    {file = "websockets-12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1d9697f3337a89691e3bd8dc56dea45a6f6d975f92e7d5f773bc715c15dde28"},
+    {file = "websockets-12.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1df2fbd2c8a98d38a66f5238484405b8d1d16f929bb7a33ed73e4801222a6f53"},
+    {file = "websockets-12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23509452b3bc38e3a057382c2e941d5ac2e01e251acce7adc74011d7d8de434c"},
+    {file = "websockets-12.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e5fc14ec6ea568200ea4ef46545073da81900a2b67b3e666f04adf53ad452ec"},
+    {file = "websockets-12.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46e71dbbd12850224243f5d2aeec90f0aaa0f2dde5aeeb8fc8df21e04d99eff9"},
+    {file = "websockets-12.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b81f90dcc6c85a9b7f29873beb56c94c85d6f0dac2ea8b60d995bd18bf3e2aae"},
+    {file = "websockets-12.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a02413bc474feda2849c59ed2dfb2cddb4cd3d2f03a2fedec51d6e959d9b608b"},
+    {file = "websockets-12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bbe6013f9f791944ed31ca08b077e26249309639313fff132bfbf3ba105673b9"},
+    {file = "websockets-12.0-cp39-cp39-win32.whl", hash = "sha256:cbe83a6bbdf207ff0541de01e11904827540aa069293696dd528a6640bd6a5f6"},
+    {file = "websockets-12.0-cp39-cp39-win_amd64.whl", hash = "sha256:fc4e7fa5414512b481a2483775a8e8be7803a35b30ca805afa4998a84f9fd9e8"},
+    {file = "websockets-12.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:248d8e2446e13c1d4326e0a6a4e9629cb13a11195051a73acf414812700badbd"},
+    {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44069528d45a933997a6fef143030d8ca8042f0dfaad753e2906398290e2870"},
+    {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c4e37d36f0d19f0a4413d3e18c0d03d0c268ada2061868c1e6f5ab1a6d575077"},
+    {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d829f975fc2e527a3ef2f9c8f25e553eb7bc779c6665e8e1d52aa22800bb38b"},
+    {file = "websockets-12.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2c71bd45a777433dd9113847af751aae36e448bc6b8c361a566cb043eda6ec30"},
+    {file = "websockets-12.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0bee75f400895aef54157b36ed6d3b308fcab62e5260703add87f44cee9c82a6"},
+    {file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:423fc1ed29f7512fceb727e2d2aecb952c46aa34895e9ed96071821309951123"},
+    {file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27a5e9964ef509016759f2ef3f2c1e13f403725a5e6a1775555994966a66e931"},
+    {file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3181df4583c4d3994d31fb235dc681d2aaad744fbdbf94c4802485ececdecf2"},
+    {file = "websockets-12.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b067cb952ce8bf40115f6c19f478dc71c5e719b7fbaa511359795dfd9d1a6468"},
+    {file = "websockets-12.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:00700340c6c7ab788f176d118775202aadea7602c5cc6be6ae127761c16d6b0b"},
+    {file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e469d01137942849cff40517c97a30a93ae79917752b34029f0ec72df6b46399"},
+    {file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffefa1374cd508d633646d51a8e9277763a9b78ae71324183693959cf94635a7"},
+    {file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba0cab91b3956dfa9f512147860783a1829a8d905ee218a9837c18f683239611"},
+    {file = "websockets-12.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2cb388a5bfb56df4d9a406783b7f9dbefb888c09b71629351cc6b036e9259370"},
+    {file = "websockets-12.0-py3-none-any.whl", hash = "sha256:dc284bbc8d7c78a6c69e0c7325ab46ee5e40bb4d50e494d8131a07ef47500e9e"},
+    {file = "websockets-12.0.tar.gz", hash = "sha256:81df9cbcbb6c260de1e007e58c011bfebe2dafc8435107b0537f393dd38c8b1b"},
+]
+
 [[package]]
 name = "xonsh"
 version = "0.14.0"
@@ -5124,4 +5510,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = ">3.8.1,<3.12"
-content-hash = "507bab25cdfcdb01af5c0d2869dfc46cfd3276f760ce0f702600b20140bf7d24"
+content-hash = "5edc9bcaa45d2f2410fe24b3787f776666f5b2c63737d8ddc8d58fa51704c699"
diff --git a/pyproject.toml b/pyproject.toml
index c9cc563..9910b4e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,6 +32,9 @@ edge-tts = ">=6.1.8"
 numpy = ">=1.20"
 typer = ">=0.9.0"
 pytubefix = ">=1.13.3"
+speechrecognition = ">=3.10"
+vosk = ">=0.3.45"
+google-cloud-speech = ">=2.24.1"
 
 [tool.poetry.group.dev.dependencies]
 pytest = ">=7.3.2"
diff --git a/src/artbox/cli.py b/src/artbox/cli.py
index 26e40a3..0820bd7 100644
--- a/src/artbox/cli.py
+++ b/src/artbox/cli.py
@@ -5,8 +5,8 @@
 
 from artbox import __version__
 from artbox.sounds import Sound
+from artbox.speech import Speech
 from artbox.videos import Video, Youtube
-from artbox.voices import Voice
 
 app = typer.Typer(
     name="Artbox",
@@ -28,9 +28,9 @@
     short_help="Video processing commands.",
 )
 app_voice = typer.Typer(
-    name="voice",
-    help="Voice processing commands for Artbox.",
-    short_help="Voice processing commands.",
+    name="speech",
+    help="Speech processing commands for Artbox.",
+    short_help="Speech processing commands.",
 )
 app_youtube = typer.Typer(
     name="youtube",
@@ -40,7 +40,7 @@
 
 app.add_typer(app_sound, name="sound")
 app.add_typer(app_video, name="video")
-app.add_typer(app_voice, name="voice")
+app.add_typer(app_voice, name="speech")
 app.add_typer(app_youtube, name="youtube")
 
 
@@ -118,7 +118,7 @@ def voice_text_to_speech(
         "pitch": pitch,
     }
 
-    runner = Voice(args_dict)
+    runner = Speech(args_dict)
     runner.text_to_speech()
 
 
diff --git a/src/artbox/speech.py b/src/artbox/speech.py
new file mode 100644
index 0000000..8db5be7
--- /dev/null
+++ b/src/artbox/speech.py
@@ -0,0 +1,192 @@
+"""
+Utilities for handling audio voices.
+
+ref: https://thepythoncode.com/article/convert-text-to-speech-in-python
+"""
+import asyncio
+import os
+import random
+
+from abc import ABC, abstractmethod
+from pathlib import Path
+
+import edge_tts
+import gtts
+import speech_recognition as sr
+
+from edge_tts import VoicesManager
+from pydub import AudioSegment
+
+from artbox.base import ArtBox
+
+
+class SpeechEngineBase(ArtBox, ABC):
+    """Set of methods for handing audio voices."""
+
+    @abstractmethod
+    def text_to_speech(self) -> None:
+        """Convert text to audio speech."""
+        ...
+
+
+class Speech(SpeechEngineBase):
+    """Speech class will run commands according to the selected engine."""
+
+    engine: SpeechEngineBase
+
+    def __init__(self, *args, **kwargs) -> None:
+        """Initialize Speech class."""
+        super().__init__(*args, **kwargs)
+        engine = self.args.get("engine", "edge-tts")
+
+        if engine == "edge-tts":
+            self.engine: SpeechEngineBase = SpeechEngineMSEdgeTTS(
+                *args, **kwargs
+            )
+        elif engine == "gtts":
+            self.engine: SpeechEngineBase = SpeechEngineGTTS(*args, **kwargs)
+        else:
+            raise Exception(f"Engine {engine} not found.")
+
+    def text_to_speech(self) -> None:
+        """Convert text to audio speech."""
+        return self.engine.text_to_speech()
+
+
+class SpeechEngineGTTS(SpeechEngineBase):
+    """Google-Text-To-Speech engine."""
+
+    def text_to_speech(self) -> None:
+        """Convert text to audio speech."""
+        title: str = self.args.get("title", "")
+        text_path: str = self.args.get("text-path", "")
+        lang: str = self.args.get("lang", "en")
+
+        if not title:
+            raise Exception("Argument `title` not given")
+
+        if not text_path:
+            raise Exception("Argument `text_path` not given")
+
+        with open(text_path, "r") as f:
+            text = f.read()
+
+        tts = gtts.gTTS(text, lang=lang, slow=False)
+        tts.save(str(self.output_path))
+
+
+class SpeechEngineMSEdgeTTS(SpeechEngineBase):
+    """Microsoft Edge Text-To-Speech engine."""
+
+    async def async_text_to_speech(self) -> None:
+        """Convert text to audio speech in async mode."""
+        title: str = self.args.get("title", "")
+        text_path: str = self.args.get("text-path", "")
+        lang: str = self.args.get("lang", "en")
+        rate = self.args.get("rate", "+0%")
+        volume = self.args.get("volume", "+0%")
+        pitch = self.args.get("pitch", "+0Hz")
+
+        if not title:
+            raise Exception("Argument `title` not given")
+
+        if not text_path:
+            raise Exception("Argument `text_path` not given")
+
+        with open(text_path, "r") as f:
+            text = f.read()
+
+        params = {"Locale": lang} if "-" in lang else {"Language": lang}
+        voices = await VoicesManager.create()
+        voice_options = voices.find(Gender="Female", **params)
+
+        communicate = edge_tts.Communicate(
+            text=text,
+            speech=random.choice(voice_options)["Name"],
+            rate=rate,
+            volume=volume,
+            pitch=pitch,
+        )
+        with open(self.output_path, "wb") as file:
+            async for chunk in communicate.stream():
+                if chunk["type"] == "audio":
+                    file.write(chunk["data"])
+                elif chunk["type"] == "WordBoundary":
+                    print(f"WordBoundary: {chunk}")
+
+    def text_to_speech(self) -> None:
+        """Convert text to audio speech."""
+        loop = asyncio.get_event_loop_policy().get_event_loop()
+        try:
+            loop.run_until_complete(self.async_text_to_speech())
+        finally:
+            loop.close()
+
+
+def convert_mp3_to_wav(input_path: str, output_path: str) -> None:
+    sound = AudioSegment.from_mp3(input_path)
+    sound.export(output_path, format="wav")
+
+
+class SpeechToText(ArtBox):
+    def convert_from_mp3(self) -> None:
+        """Recognize speech from MP# using various engines options."""
+        file_path: Path = self.input_path
+
+        # Convert MP3 to WAV
+        wav_path = str(file_path).replace(".mp3", ".wav")
+        convert_mp3_to_wav(file_path, wav_path)
+
+        self.input_path = wav_path
+        self.convert_from_wav()
+
+        # Cleanup: Remove the WAV file
+        os.remove(wav_path)
+
+    def convert_from_wav(self) -> None:
+        """Recognize speech from WAVE using various engines options."""
+        wav_path: str = self.input_path
+        output_path: str = self.output_path
+        language: str = self.args.get("lang", "en-US")
+        engine: str = self.args.get("engine", "google")
+
+        # Initialize recognizer
+        recognizer = sr.Recognizer()
+
+        with sr.AudioFile(wav_path) as source:
+            audio_data = recognizer.record(source)
+            kwargs = {"audio_data": audio_data, "language": language}
+            try:
+                if engine == "google":
+                    text = recognizer.recognize_google(**kwargs)
+                elif engine == "google_cloud":
+                    text = recognizer.recognize_google_cloud(**kwargs)
+                elif engine == "wit":
+                    text = recognizer.recognize_wit(**kwargs)
+                elif engine == "azure":
+                    text = recognizer.recognize_azure(**kwargs)
+                elif engine == "houndify":
+                    text = recognizer.recognize_houndify(**kwargs)
+                elif engine == "ibm":
+                    text = recognizer.recognize_ibm(**kwargs)
+                elif engine == "vosk":
+                    text = recognizer.recognize_vosk(**kwargs)
+                elif engine == "whisper":
+                    text = recognizer.recognize_whisper(**kwargs)
+                elif engine == "whisper-api":
+                    text = recognizer.recognize_whisper_api(
+                        kwargs.get("audio_data")
+                    )
+                else:
+                    raise Exception(f"Engine '{engine}' is not supported.")
+            except sr.UnknownValueError:
+                raise Exception(
+                    f"{engine.title()} could not understand the audio"
+                )
+            except sr.RequestError as e:
+                raise Exception(
+                    f"Could not request results from {engine.title()}; {e}"
+                )
+
+        with open(output_path, "w") as f:
+            f.write(text)
diff --git a/src/artbox/voices.py b/src/artbox/voices.py
deleted file mode 100644
index 420386f..0000000
--- a/src/artbox/voices.py
+++ /dev/null
@@ -1,119 +0,0 @@
-"""
-Utilities for handling audio voices.
-
-ref: https://thepythoncode.com/article/convert-text-to-speech-in-python
-"""
-import asyncio
-import random
-
-from abc import ABC, abstractmethod
-
-import edge_tts
-import gtts
-
-from edge_tts import VoicesManager
-
-from artbox.base import ArtBox
-
-
-class VoiceEngineBase(ArtBox, ABC):
-    """Set of methods for handing audio voices."""
-
-    @abstractmethod
-    def text_to_speech(self) -> None:
-        """Convert text to audio voice."""
-        ...
-
-
-class Voice(VoiceEngineBase):
-    """Voice class will run commands according to the selected engine."""
-
-    engine: VoiceEngineBase
-
-    def __init__(self, *args, **kwargs) -> None:
-        """Initialize Voice class."""
-        super().__init__(*args, **kwargs)
-        engine = self.args.get("engine", "edge-tts")
-
-        if engine == "edge-tts":
-            self.engine: VoiceEngineBase = VoiceEngineMSEdgeTTS(
-                *args, **kwargs
-            )
-        elif engine == "gtts":
-            self.engine: VoiceEngineBase = VoiceEngineGTTS(*args, **kwargs)
-        else:
-            raise Exception(f"Engine {engine} not found.")
-
-    def text_to_speech(self) -> None:
-        """Convert text to audio voice."""
-        return self.engine.text_to_speech()
-
-
-class VoiceEngineGTTS(VoiceEngineBase):
-    """Google-Text-To-Speech engine."""
-
-    def text_to_speech(self) -> None:
-        """Convert text to audio voice."""
-        title: str = self.args.get("title", "")
-        text_path: str = self.args.get("text-path", "")
-        lang: str = self.args.get("lang", "en")
-
-        if not title:
-            raise Exception("Argument `title` not given")
-
-        if not text_path:
-            raise Exception("Argument `text_path` not given")
-
-        with open(text_path, "r") as f:
-            text = f.read()
-
-        tts = gtts.gTTS(text, lang=lang, slow=False)
-        tts.save(str(self.output_path))
-
-
-class VoiceEngineMSEdgeTTS(VoiceEngineBase):
-    """Microsoft Edge Text-To-Speech engine."""
-
-    async def async_text_to_speech(self) -> None:
-        """Convert text to audio voice in async mode."""
-        title: str = self.args.get("title", "")
-        text_path: str = self.args.get("text-path", "")
-        lang: str = self.args.get("lang", "en")
-        rate = self.args.get("rate", "+0%")
-        volume = self.args.get("volume", "+0%")
-        pitch = self.args.get("pitch", "+0Hz")
-
-        if not title:
-            raise Exception("Argument `title` not given")
-
-        if not text_path:
-            raise Exception("Argument `text_path` not given")
-
-        with open(text_path, "r") as f:
-            text = f.read()
-
-        params = {"Locale": lang} if "-" in lang else {"Language": lang}
-        voices = await VoicesManager.create()
-        voice_options = voices.find(Gender="Female", **params)
-
-        communicate = edge_tts.Communicate(
-            text=text,
-            voice=random.choice(voice_options)["Name"],
-            rate=rate,
-            volume=volume,
-            pitch=pitch,
-        )
-        with open(self.output_path, "wb") as file:
-            async for chunk in communicate.stream():
-                if chunk["type"] == "audio":
-                    file.write(chunk["data"])
-                elif chunk["type"] == "WordBoundary":
-                    print(f"WordBoundary: {chunk}")
-
-    def text_to_speech(self) -> None:
-        """Convert text to audio voice."""
-        loop = asyncio.get_event_loop_policy().get_event_loop()
-        try:
-            loop.run_until_complete(self.async_text_to_speech())
-        finally:
-            loop.close()
diff --git a/tests/data/audios/speech.mp3 b/tests/data/audios/speech.mp3
new file mode 100644
index 0000000..7819755
Binary files /dev/null and b/tests/data/audios/speech.mp3 differ
diff --git a/tests/data/audios/speech.wav b/tests/data/audios/speech.wav
new file mode 100644
index 0000000..0fbf92d
Binary files /dev/null and b/tests/data/audios/speech.wav differ
diff --git a/tests/test_speech.py b/tests/test_speech.py
new file mode 100644
index 0000000..f84e140
--- /dev/null
+++ b/tests/test_speech.py
@@ -0,0 +1,78 @@
+"""Set of tests for the voices module."""
+import os
+
+from pathlib import Path
+
+import pytest
+
+from artbox.speech import SpeechToText
+
+TMP_PATH = Path("/tmp/artbox")
+TEST_DATA_DIR = Path(__file__).parent / "data"
+
+os.makedirs(TMP_PATH, exist_ok=True)
+
+
+@pytest.mark.parametrize("engine", ["gtts", "edge-tts"])
+def test_convert_text_to_speech(engine) -> None:
+    """Test the conversion from text to audio."""
+    text_path = TMP_PATH / f"totk-{engine}.txt"
+    params = {
+        "title": "totk",
+        "text-path": str(text_path),
+        "output-path": str(TMP_PATH / f"speech-{engine}.mp3"),
+        "engine": engine,
+    }
+
+    with open(text_path, "w") as f:
+        f.write(
+            "Are you ready to join Link and Zelda in fighting "
+            "off this unprecedented threat to Hyrule?"
+        )
+
+    speech = Speech(params)
+    speech.text_to_speech()
+
+
+@pytest.mark.parametrize(
+    "engine",
+    [
+        "google",
+        # they need special keys for the test
+        # 'google_cloud',
+        # 'wit',
+        # 'azure',
+        # 'houndify',
+        # 'ibm',
+        "vosk",
+        "whisper",
+        "whisper-api",
+    ],
+)
+def test_convert_speech_to_text(engine) -> None:
+    """Test the conversion from text to audio."""
+    mp3_path = TEST_DATA_DIR / "audios" / "speech.mp3"
+    output_path = TMP_PATH / f"speech-{engine}.txt"
+
+    params = {
+        "input-path": str(mp3_path),
+        "output-path": str(output_path),
+        "engine": engine,
+    }
+
+    expected = (
+        "At Open Science Labs (OSL), we are dedicated to advancing the "
+        "scientific research through collaboration, innovation, and "
+        "education. Our mission is to create a more inclusive, transparent, "
+        "and accessible scientific community."
+    ).lower()
+
+    for char in ("(", ")", ",", "."):
+        expected = expected.replace(char, "")
+
+    speech = SpeechToText(params)
+    speech.convert_from_mp3()
+
+    with open(output_path, "r") as f:
+        result = f.read().lower()
+    assert result == expected
diff --git a/tests/test_voices.py b/tests/test_voices.py
deleted file mode 100644
index c88199b..0000000
--- a/tests/test_voices.py
+++ /dev/null
@@ -1,33 +0,0 @@
-"""Set of tests for the voices module."""
-import os
-
-from pathlib import Path
-
-import pytest
-
-from artbox.voices import Voice
-
-TMP_PATH = Path("/tmp/artbox")
-
-os.makedirs(TMP_PATH, exist_ok=True)
-
-
-@pytest.mark.parametrize("engine", ["gtts", "edge-tts"])
-def test_convert_text_to_speech(engine) -> None:
-    """Test the conversion from text to audio."""
-    text_path = TMP_PATH / f"totk-{engine}.txt"
-    params = {
-        "title": "totk",
-        "text-path": str(text_path),
-        "output-path": str(TMP_PATH / f"voice-{engine}.mp3"),
-        "engine": engine,
-    }
-
-    with open(text_path, "w") as f:
-        f.write(
-            "Are you ready to join Link and Zelda in fighting "
-            "off this unprecedented threat to Hyrule?"
-        )
-
-    voice = Voice(params)
-    voice.text_to_speech()