feat: Refactor and add new providers, improve configuration, and upda…

…te dependencies - Added a new `google-tts.go` file with the implementation of various methods for the Google Text-to-Speech provider. - Made changes to the configuration variables and added new structs in `internal/config/config.go`. - Made changes in the `main.go` file, including renaming functions, adding authentication middleware, and adding new routes. - Updated the default values and added new default values in `internal/config/parse.go`. - Made changes to import statements, renamed functions and fields, and added support for the Google Text-to-Speech provider in `internal/talker.go`. - Updated the `.gitignore` file to ignore specific files. - Added functionality, error handling, and logging statements in `pkg/providers/whisper.go`. - Added a new sample configuration file with additional sections and configurations in `configs/talk.sample.yaml`. - Updated the versions of various dependencies in the `go.mod` file. - Deleted the `configs/config.sample.yaml` file. - Added a new method and made changes to existing methods in `pkg/providers/chatgpt.go`. - Made changes to interface names, function signatures, and added comments in `pkg/providers/provider.go`. - Made changes to field types, added a new method, and modified existing methods in `pkg/providers/elevenlabs.go`. - Made changes to method names, added a new struct, modified method calls, and updated return messages in `internal/handler.go`.
moderato-app · Aug 13, 2023 · 2e073c1 · 2e073c1
1 parent c10dd1a
commit 2e073c1
Show file tree

Hide file tree

Showing 15 changed files with 483 additions and 153 deletions.
diff --git a/.gitignore b/.gitignore
@@ -17,9 +17,13 @@
 # Dependency directories (remove the comment below to include it)
 # vendor/
 
+# Do not touch the flowing 3 lines
+config.yaml
+talk.yaml
+google_credentials.json
+
 # Go workspace file
 go.work
 .idea
-config.yaml
 main
 build+push.sh
diff --git a/cmd/talk/main.go b/cmd/talk/main.go
@@ -23,8 +23,8 @@ func main() {
 	if err != nil {
 		logger.Sugar().Panicf("failed to create a talker: %+v", err)
 	}
-	if conf.Server.EagerCheckProviders {
-		t.MustCheckProviders()
+	if conf.Server.ProvidersMustFunction {
+		t.ProvidersMustFunction()
 	}
 
 	logger.Info("initialise web server...")

diff --git a/configs/config.sample.yaml b/configs/config.sample.yaml
diff --git a/configs/talk.sample.yaml b/configs/talk.sample.yaml
@@ -0,0 +1,62 @@
+server:
+  # Optional. Use port 8000 if not specified.
+  port: 8000
+  # Optional. Use true if not specified. Perform a startup request to each provider of speech-to-text,
+  # text-to-speech, and llm upon server initialization.
+  # Shutdown the server if there are any errors, such as invalid API key or connection error.
+  # Log a warning if there are any issues, such as quota exhaustion or incorrect transcriptions, indicating a potential problem
+  # These requests consume a minimal amount of quota or even no quota.
+  providers-must-function: true
+  # Optional. Enable basic auth only when there is at least one pair of username and password.
+  basic-auth:
+    - username1: password1
+    - username2: password2
+    - username3:            # match only empty password
+    - username4: "*"        # match any password, including empty password
+
+speech-to-text:
+  open-ai-whisper:
+    api-key: sk-abc123abc123abc123abc123abc123abc123
+
+text-to-speech:
+  elevenlabs:
+    api-key: abc123abc123abc1
+    # Optional. Use this voice whenever available, or randomly select a voice from the voice list
+    voice-id: P9sd8KYc82I23b9dUJm
+    # Optional. Range: 0.0~1.0. Use 0.5 if not specified. Increasing `stability` can make speech more stable and less expressive.
+    stability: 0.5
+    #Optional. Range: 0.0~1.0. Use 50(50%) if not specified. Increasing `clarity` brings more clarity and more background artifacts.
+    clarity: 0.5
+  google-text-to-speech:
+    # Download a key file from Google Cloud. see https://cloud.google.com/iam/docs/keys-create-delete#iam-service-account-keys-create-console
+    path-to-keyfile: ./google_credentials.json
+    # Optional. Use en-US if not specified. Choose a language-code from  https://www.rfc-editor.org/rfc/bcp/bcp47.txt
+    language-code: en-US
+    # Optional. Use this voice whenever available, or randomly select a voice from the voice list
+    voice-id: en-US-Standard-C
+    # Optional. Use female if not specified.
+    # When `voice-id` is not set, `gender` will be used to choose a voice.
+    #
+    # The preferred gender of the voice. If not set, the service will
+    # choose a voice based on the other parameters such as language_code and
+    # name. Note that this is only a preference, not requirement; if a
+    # voice of the appropriate gender is not available, the synthesizer should
+    # substitute a voice with a different gender rather than failing the request.
+    #
+    # Options: [male, female, neutral]
+    gender: female
+    # Optional. Range [0.25, 4.0]. Use 1.0 if not specified. Speaking speed
+    speaking-rate: 1.0
+    # Optional. Range [-20.0, 20.0]. Use 0 if not specified. Unit: semitone(12 semitone = 1 octave)
+    pitch: 0
+    # Optional. Range [-96.0, 16.0]. Use 0 if not specified. The bigger value comes with louder voice.
+    volume-gain-db: 0
+
+llm:
+  open-ai-chat-gpt:
+    # typically, you would use the same API key as speech-to-text.open-ai-whisper
+    api-key: sk-abc123abc123abc123abc123abc123abc123
+    # Optional. Use gpt-3.5-turbo if not specified. For model list, see https://platform.openai.com/docs/models/gpt-4
+    model: gpt-3.5-turbo
+    # Optional. Use 2000 if not specified
+    max-generation-token: 2000
diff --git a/go.mod b/go.mod
@@ -3,8 +3,9 @@ module github.com/bubblelight/talk
 go 1.20
 
 require (
+	cloud.google.com/go/texttospeech v1.7.1
 	github.com/brpaz/echozap v1.1.3
-	github.com/google/uuid v1.1.2
+	github.com/google/uuid v1.3.0
 	github.com/haguro/elevenlabs-go v0.2.2
 	github.com/labstack/echo/v4 v4.11.1
 	github.com/patrickmn/go-cache v2.1.0+incompatible
@@ -16,8 +17,18 @@ require (
 )
 
 require (
+	cloud.google.com/go v0.110.7 // indirect
+	cloud.google.com/go/compute v1.19.3 // indirect
+	cloud.google.com/go/compute/metadata v0.2.3 // indirect
+	cloud.google.com/go/longrunning v0.5.0 // indirect
 	github.com/fsnotify/fsnotify v1.6.0 // indirect
 	github.com/golang-jwt/jwt v3.2.2+incompatible // indirect
+	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
+	github.com/golang/protobuf v1.5.3 // indirect
+	github.com/google/go-cmp v0.5.9 // indirect
+	github.com/google/s2a-go v0.1.4 // indirect
+	github.com/googleapis/enterprise-certificate-proxy v0.2.3 // indirect
+	github.com/googleapis/gax-go/v2 v2.11.0 // indirect
 	github.com/hashicorp/hcl v1.0.0 // indirect
 	github.com/labstack/gommon v0.4.0 // indirect
 	github.com/magiconair/properties v1.8.7 // indirect
@@ -31,13 +42,22 @@ require (
 	github.com/subosito/gotenv v1.4.2 // indirect
 	github.com/valyala/bytebufferpool v1.0.0 // indirect
 	github.com/valyala/fasttemplate v1.2.2 // indirect
+	go.opencensus.io v0.24.0 // indirect
 	go.uber.org/atomic v1.9.0 // indirect
 	go.uber.org/multierr v1.8.0 // indirect
 	golang.org/x/crypto v0.11.0 // indirect
 	golang.org/x/net v0.12.0 // indirect
+	golang.org/x/oauth2 v0.8.0 // indirect
 	golang.org/x/sys v0.10.0 // indirect
 	golang.org/x/text v0.11.0 // indirect
 	golang.org/x/time v0.3.0 // indirect
+	google.golang.org/api v0.126.0 // indirect
+	google.golang.org/appengine v1.6.7 // indirect
+	google.golang.org/genproto v0.0.0-20230530153820-e85fd2cbaebc // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20230530153820-e85fd2cbaebc // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20230530153820-e85fd2cbaebc // indirect
+	google.golang.org/grpc v1.55.0 // indirect
+	google.golang.org/protobuf v1.30.0 // indirect
 	gopkg.in/ini.v1 v1.67.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )