Skip to content

Commit

Permalink
feat(backend): support koemotion (#10)
Browse files Browse the repository at this point in the history
  • Loading branch information
nekomeowww authored Dec 30, 2024
1 parent f9338ec commit e2db1dc
Show file tree
Hide file tree
Showing 14 changed files with 681 additions and 33 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ unSpeech lets you use various online TTS with OpenAI-compatible API.

- [OpenAI](https://platform.openai.com/docs/api-reference/audio/createSpeech)
- [ElevenLabs](https://elevenlabs.io/docs/api-reference/text-to-speech/convert)
- [Koemotion (by Rinna)](https://koemotion.rinna.co.jp/)

## Getting Started

Expand Down Expand Up @@ -36,7 +37,7 @@ You can use unSpeech with most OpenAI clients.

The `model` parameter should be provider + model, e.g. `openai/tts-1-hd`, `elevenlabs/eleven_multilingual_v2`.

The `Authorization` header is auto-converted to the vendor's corresponding auth method, such as `xi-api-key`.
The `Authorization` header is auto-converted to the vendor's corresponding auth method, such as `xi-api-key`.

###### `curl`

Expand Down
5 changes: 5 additions & 0 deletions cspell.config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ words:
- containedctx
- contextcheck
- cyclop
- dataurl
- depguard
- Describedby
- Detailf
Expand All @@ -20,6 +21,7 @@ words:
- exhaustive
- exhaustruct
- exportloopref
- Facemotion
- flac
- forcetypeassert
- funlen
Expand All @@ -43,6 +45,8 @@ words:
- hreflang
- ineffassign
- ireturn
- jsonpatch
- koemotion
- labstack
- lll
- maintidx
Expand All @@ -62,6 +66,7 @@ words:
- predeclared
- reassign
- revive
- Rinna
- samber
- staticcheck
- strconv
Expand Down
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@ module github.com/moeru-ai/unspeech
go 1.23.2

require (
github.com/evanphx/json-patch/v5 v5.9.0
github.com/golang-module/carbon v1.7.3
github.com/labstack/echo/v4 v4.13.2
github.com/nekomeowww/fo v1.4.0
github.com/samber/lo v1.47.0
github.com/samber/mo v1.13.0
github.com/spf13/cobra v1.8.1
github.com/stretchr/testify v1.10.0
github.com/vincent-petithory/dataurl v1.0.0
k8s.io/client-go v0.32.0
)

Expand All @@ -23,6 +25,7 @@ require (
github.com/labstack/gommon v0.4.2 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/rogpeppe/go-internal v1.3.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
Expand Down
6 changes: 6 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg=
github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/gobuffalo/envy v1.7.0 h1:GlXgaiBkmrYMHco6t4j7SacKO4XUjvh5pwXh0f4uxXU=
github.com/gobuffalo/envy v1.7.0/go.mod h1:n7DRkBerg/aorDM8kbduw5dN3oXGswK5liaSCx4T5NI=
Expand Down Expand Up @@ -50,6 +52,8 @@ github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh
github.com/nekomeowww/fo v1.4.0 h1:ULX5KsnDzWHoDwHgtjd2wibpdpyh+5/5DITmvhJZyWY=
github.com/nekomeowww/fo v1.4.0/go.mod h1:ctwQ+BZ0UYUb2s+yM7h9SFHjqGCXeUIXFLK2ujAneWw=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
Expand Down Expand Up @@ -89,6 +93,8 @@ github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6Kllzaw
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQD0Loo=
github.com/valyala/fasttemplate v1.2.2/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ=
github.com/vincent-petithory/dataurl v1.0.0 h1:cXw+kPto8NLuJtlMsI152irrVw9fRDX8AbShPRpg2CI=
github.com/vincent-petithory/dataurl v1.0.0/go.mod h1:FHafX5vmDzyP+1CQATJn7WFKc9CvnvxyvZy6I1MrG/U=
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
Expand Down
77 changes: 60 additions & 17 deletions pkg/backend/backend.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
package backend

import (
"bytes"
"encoding/json"
"io"
"strings"

"github.com/labstack/echo/v4"
"github.com/samber/lo"
"github.com/samber/mo"

"github.com/moeru-ai/unspeech/pkg/apierrors"
"github.com/moeru-ai/unspeech/pkg/utils"
)

// Options represent API parameters refer to https://platform.openai.com/docs/api-reference/audio/createSpeech
type Options struct {
// OpenAISpeechRequestOptions represent API parameters refer to https://platform.openai.com/docs/api-reference/audio/createSpeech
type OpenAISpeechRequestOptions struct {
// (required) One of the available TTS models.
Model string `json:"model"`
// (required) The text to generate audio for.
Expand All @@ -29,20 +33,48 @@ type Options struct {
Speed int `json:"speed,omitempty"`
}

type FullOptions struct {
Options
type SpeechRequestOptions struct {
OpenAISpeechRequestOptions

Backend string `json:"backend"`
Model string `json:"model"`

body mo.Option[*bytes.Buffer]
bodyParsedMap map[string]any
}

func Speech(c echo.Context) mo.Result[any] {
var options Options
func (o SpeechRequestOptions) AsBuffer() mo.Option[*bytes.Buffer] {
return o.body
}

if err := c.Bind(&options); err != nil {
return mo.Err[any](apierrors.NewErrBadRequest())
func (o SpeechRequestOptions) AsMap() map[string]any {
return o.bodyParsedMap
}

func NewSpeechRequestOptions(body io.ReadCloser) mo.Result[SpeechRequestOptions] {
buffer := new(bytes.Buffer)

_, err := buffer.ReadFrom(body)
if err != nil {
return mo.Err[SpeechRequestOptions](apierrors.NewErrBadRequest().WithDetail(err.Error()))
}

var optionsMap map[string]any

err = json.Unmarshal(buffer.Bytes(), &optionsMap)
if err != nil {
return mo.Err[SpeechRequestOptions](apierrors.NewErrBadRequest().WithDetail(err.Error()))
}

var options OpenAISpeechRequestOptions

err = json.Unmarshal(buffer.Bytes(), &options)
if err != nil {
return mo.Err[SpeechRequestOptions](apierrors.NewErrBadRequest().WithDetail(err.Error()))
}

if options.Model == "" || options.Input == "" || options.Voice == "" {
return mo.Err[any](apierrors.NewErrInvalidArgument().WithDetail("either one of model, input, and voice parameter is required"))
return mo.Err[SpeechRequestOptions](apierrors.NewErrInvalidArgument().WithDetail("either one of model, input, and voice parameter is required"))
}

backendAndModel := lo.Ternary(
Expand All @@ -51,18 +83,29 @@ func Speech(c echo.Context) mo.Result[any] {
[]string{options.Model, ""},
)

fullOptions := FullOptions{
Options: options,
Backend: backendAndModel[0],
Model: backendAndModel[1],
return mo.Ok(SpeechRequestOptions{
OpenAISpeechRequestOptions: options,
Backend: backendAndModel[0],
Model: backendAndModel[1],
body: mo.Some(buffer),
bodyParsedMap: optionsMap,
})
}

func Speech(c echo.Context) mo.Result[any] {
options := NewSpeechRequestOptions(c.Request().Body)
if options.IsError() {
return mo.Err[any](options.Error())
}

switch backendAndModel[0] {
switch options.MustGet().Backend {
case "openai":
return openai(c, fullOptions)
return openai(c, utils.ResultToOption(options))
case "elevenlabs":
return elevenlabs(c, fullOptions)
return elevenlabs(c, utils.ResultToOption(options))
case "koemotion":
return koemotion(c, utils.ResultToOption(options))
default:
return mo.Err[any](apierrors.NewErrBadRequest())
return mo.Err[any](apierrors.NewErrBadRequest().WithDetail("unsupported backend"))
}
}
9 changes: 5 additions & 4 deletions pkg/backend/elevenlabs.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,17 @@ import (
type ElevenLabsOptions struct {
Text string `json:"text"`
ModelID string `json:"model_id,omitempty"`
// TODO: support other options
}

func elevenlabs(c echo.Context, options FullOptions) mo.Result[any] {
func elevenlabs(c echo.Context, options mo.Option[SpeechRequestOptions]) mo.Result[any] {
reqURL := lo.Must(url.Parse("https://api.elevenlabs.io/v1/text-to-speech")).
JoinPath(options.Voice).
JoinPath(options.MustGet().Voice).
String()

values := ElevenLabsOptions{
Text: options.Input,
ModelID: options.Model,
Text: options.MustGet().Input,
ModelID: options.MustGet().Model,
}

payload := lo.Must(json.Marshal(values))
Expand Down
92 changes: 92 additions & 0 deletions pkg/backend/koemotion.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package backend

import (
"bytes"
"encoding/json"
"log/slog"
"net/http"
"strings"

"github.com/labstack/echo/v4"
"github.com/moeru-ai/unspeech/pkg/apierrors"
"github.com/moeru-ai/unspeech/pkg/utils"
"github.com/moeru-ai/unspeech/pkg/utils/jsonpatch"
"github.com/samber/mo"
"github.com/vincent-petithory/dataurl"
)

func koemotion(c echo.Context, options mo.Option[SpeechRequestOptions]) mo.Result[any] {
patchedPayload := jsonpatch.ApplyPatches(
options.MustGet().body.OrElse(new(bytes.Buffer)).Bytes(),
mo.Some(jsonpatch.ApplyOptions{AllowMissingPathOnRemove: true}),
jsonpatch.NewRemove("/model"),
jsonpatch.NewRemove("/voice"),
jsonpatch.NewRemove("/input"),
jsonpatch.NewAdd("/text", options.MustGet().Input),
)
if patchedPayload.IsError() {
return mo.Err[any](apierrors.NewErrInternal().WithDetail(patchedPayload.Error().Error()).WithCaller())
}

req, err := http.NewRequestWithContext(
c.Request().Context(),
http.MethodPost,
"https://api.rinna.co.jp/koemotion/infer",
bytes.NewBuffer(patchedPayload.MustGet()),
)
if err != nil {
return mo.Err[any](apierrors.NewErrInternal().WithCaller())
}

// Rewrite the Authorization header
req.Header.Set("Ocp-Apim-Subscription-Key", strings.TrimPrefix(
c.Request().Header.Get("Authorization"),
"Bearer ",
))
req.Header.Set("Content-Type", "application/json")

res, err := http.DefaultClient.Do(req)
if err != nil {
return mo.Err[any](apierrors.NewErrBadGateway().WithDetail(err.Error()).WithError(err).WithCaller())
}

defer func() { _ = res.Body.Close() }()

if res.StatusCode >= 400 && res.StatusCode < 600 {
switch {
case strings.HasPrefix(res.Header.Get("Content-Type"), "application/json"):
return mo.Err[any](apierrors.
NewUpstreamError(res.StatusCode).
WithDetail(NewJSONResponseError(res.StatusCode, res.Body).OrEmpty().Error()))
case strings.HasPrefix(res.Header.Get("Content-Type"), "text/"):
return mo.Err[any](apierrors.
NewUpstreamError(res.StatusCode).
WithDetail(NewTextResponseError(res.StatusCode, res.Body).OrEmpty().Error()))
default:
slog.Warn("unknown upstream error with unknown Content-Type",
slog.Int("status", res.StatusCode),
slog.String("content-type", res.Header.Get("Content-Type")),
slog.String("content-length", res.Header.Get("Content-Length")),
)
}
}

var resBody map[string]any

err = json.NewDecoder(res.Body).Decode(&resBody)
if err != nil {
return mo.Err[any](apierrors.NewErrInternal().WithDetail(err.Error()).WithError(err).WithCaller())
}

audioDataURLString := utils.GetByJSONPath[string](resBody, "{ .audio }")
if audioDataURLString == "" {
return mo.Err[any](apierrors.NewErrInternal().WithDetail("upstream returned empty audio data URL").WithCaller())
}

audioDataURL, err := dataurl.DecodeString(audioDataURLString)
if err != nil {
return mo.Err[any](apierrors.NewErrInternal().WithDetail(err.Error()).WithError(err).WithCaller())
}

return mo.Ok[any](c.Blob(http.StatusOK, "audio/mp3", audioDataURL.Data))
}
14 changes: 7 additions & 7 deletions pkg/backend/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ import (
"github.com/samber/mo"
)

func openai(c echo.Context, options FullOptions) mo.Result[any] {
values := Options{
Model: options.Model,
Input: options.Input,
Voice: options.Voice,
ResponseFormat: options.ResponseFormat,
Speed: options.Speed,
func openai(c echo.Context, options mo.Option[SpeechRequestOptions]) mo.Result[any] {
values := OpenAISpeechRequestOptions{
Model: options.MustGet().Model,
Input: options.MustGet().Input,
Voice: options.MustGet().Voice,
ResponseFormat: options.MustGet().ResponseFormat,
Speed: options.MustGet().Speed,
}

payload := lo.Must(json.Marshal(values))
Expand Down
5 changes: 1 addition & 4 deletions pkg/utils/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,7 @@ func GetByJSONPath[T any](input any, template string) T {
}

func ReadAsJSONWithClose(readCloser io.ReadCloser) (*bytes.Buffer, map[string]any, error) {
defer func() {
_ = readCloser.Close()
}()
defer func() { readCloser.Close() }()

buffer, jsonMap, err := ReadAsJSON(readCloser)
if err != nil {
Expand Down Expand Up @@ -72,7 +70,6 @@ func FromMap[T any, MK comparable, MV any](m map[MK]MV) (*T, error) {
if m == nil {
return nil, nil
}

if len(m) == 0 {
return nil, nil
}
Expand Down
Loading

0 comments on commit e2db1dc

Please sign in to comment.