diff --git a/docker_overlay/etc/neon/diana.yaml b/docker_overlay/etc/neon/diana.yaml index a722370..14110bf 100644 --- a/docker_overlay/etc/neon/diana.yaml +++ b/docker_overlay/etc/neon/diana.yaml @@ -25,4 +25,6 @@ hana: server_host: "0.0.0.0" server_port: 8080 fastapi_title: "Hana" - fastapi_summary: "HANA (HTTP API for Neon Applications) is the HTTP component of the Device Independent API for Neon Applications (DIANA)" \ No newline at end of file + fastapi_summary: "HANA (HTTP API for Neon Applications) is the HTTP component of the Device Independent API for Neon Applications (DIANA)" + stt_max_length_encoded: 500000 + tts_max_words: 128 \ No newline at end of file diff --git a/neon_hana/app/routers/assist.py b/neon_hana/app/routers/assist.py index 7875f33..a274df7 100644 --- a/neon_hana/app/routers/assist.py +++ b/neon_hana/app/routers/assist.py @@ -42,3 +42,7 @@ async def get_stt(audio_in: STTRequest) -> STTResponse: async def get_tts(request: TTSRequest) -> TTSResponse: return mq_connector.get_tts(**dict(request)) + +@assist_route.post("/get_response") +async def get_response(request: SkillRequest) -> SkillResponse: + return mq_connector.get_response(**dict(request)) diff --git a/neon_hana/mq_service_api.py b/neon_hana/mq_service_api.py index 849e1c5..1d43fd6 100644 --- a/neon_hana/mq_service_api.py +++ b/neon_hana/mq_service_api.py @@ -31,6 +31,8 @@ from typing import Optional, Dict, Any, List from uuid import uuid4 from fastapi import HTTPException + +from neon_hana.schema.user_profile import UserProfile from neon_mq_connector.utils.client_utils import send_mq_request @@ -44,7 +46,8 @@ class MQServiceManager: def __init__(self, config: dict): self.mq_default_timeout = config.get('mq_default_timeout', 10) self.mq_cliend_id = config.get('mq_client_id') or str(uuid4()) - self.audio_tmp_path = mkdtemp("hana_audio") + self.stt_max_length = config.get('stt_max_length_encoded') or 500000 + self.tts_max_words = config.get('tts_max_words') or 128 def _validate_api_proxy_response(self, response: dict): if response['status_code'] == 200: @@ -123,6 +126,10 @@ def get_coupons(self): raise APIError(status_code=500, detail=repr(e)) def get_stt(self, encoded_audio: str, lang_code: str): + if 0 < self.stt_max_length < len(encoded_audio): + raise APIError(status_code=400, + detail=f"Audio exceeds maximum encoded length of " + f"{self.stt_max_length}") request_data = {"msg_type": "neon.get_stt", "data": {"audio_data": encoded_audio, "utterances": [""], # TODO: Compat @@ -136,18 +143,38 @@ def get_stt(self, encoded_audio: str, lang_code: str): return response['data'] def get_tts(self, to_speak: str, lang_code: str, gender: str): + if 0 < self.tts_max_words < len(to_speak.split()): + raise APIError(status_code=400, + detail=f"Text exceeds maximum word count of " + f"{self.tts_max_words}") request_data = {"msg_type": "neon.get_tts", - "data": {"text": to_speak, - "utterance": "", # TODO: Compat - "speaker": {"name": "Neon", - "gender": gender, - "lang": lang_code}, + "data": {"text": to_speak, + "utterance": "", # TODO: Compat + "speaker": {"name": "Neon", + "gender": gender, "lang": lang_code}, - "context": {"source": "hana", - "ident": f"{self.mq_cliend_id}" - f"{time()}"}} + "lang": lang_code}, + "context": {"source": "hana", + "ident": f"{self.mq_cliend_id}{time()}"}} response = send_mq_request("/neon_chat_api", request_data, "neon_chat_api_request", timeout=self.mq_default_timeout) audio = response['data'][lang_code]['audio'][gender] return {"encoded_audio": audio} + + def get_response(self, utterance: str, lang_code: str, + user_profile: UserProfile): + user_profile.user.username = (user_profile.user.username or + self.mq_cliend_id) + request_data = {"msg_type": "recognizer_loop:utterance", + "data": {"utterances": [utterance], + "lang": lang_code}, + "context": {"username": user_profile.user.username, + "user_profiles": [user_profile.model_dump(mode="json")], + "source": "hana", + "ident": f"{self.mq_cliend_id}{time()}"}} + response = send_mq_request("/neon_chat_api", request_data, + "neon_chat_api_request", + timeout=self.mq_default_timeout) + sentence = response['data']['responses'][lang_code]['sentence'] + return {"answer": sentence, "lang_code": lang_code} diff --git a/neon_hana/schema/assist_requests.py b/neon_hana/schema/assist_requests.py index 5e08d2d..8acd25c 100644 --- a/neon_hana/schema/assist_requests.py +++ b/neon_hana/schema/assist_requests.py @@ -24,9 +24,11 @@ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from typing import Optional, List +from typing import List from pydantic import BaseModel +from neon_hana.schema.user_profile import UserProfile + class STTRequest(BaseModel): encoded_audio: str @@ -77,11 +79,30 @@ class TTSResponse(BaseModel): # TODO: User profile model with below inputs? -class TextInput(BaseModel): +class SkillRequest(BaseModel): utterance: str lang_code: str + user_profile: UserProfile + model_config = { + "json_schema_extra": { + "examples": [{ + "utterance": "what time is it", + "lang_code": "en-us", + "user_profile": {"location": {"lat": 40.730610, + "lon": -73.935242, + "city": "New York", + "state": "New York"}} + }]}} -class AudioInput(BaseModel): - encoded_audio: str + +class SkillResponse(BaseModel): + answer: str lang_code: str + + model_config = { + "json_schema_extra": { + "examples": [{ + "answer": "four forty three.", + "lang_code": "en-us" + }]}} diff --git a/neon_hana/schema/user_profile.py b/neon_hana/schema/user_profile.py new file mode 100644 index 0000000..91a9a05 --- /dev/null +++ b/neon_hana/schema/user_profile.py @@ -0,0 +1,104 @@ +# NEON AI (TM) SOFTWARE, Software Development Kit & Application Development System +# All trademark and other rights reserved by their respective owners +# Copyright 2008-2021 Neongecko.com Inc. +# BSD-3 +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, +# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from typing import Optional, List +from pydantic import BaseModel + + +class ProfileUser(BaseModel): + first_name: str = "" + middle_name: str = "" + last_name: str = "" + preferred_name: str = "" + full_name: str = "" + dob: str = "YYYY/MM/DD" + age: str = "" + email: str = "" + username: str = "" + password: str = "" + picture: str = "" + about: str = "" + phone: str = "" + phone_verified: bool = False + email_verified: bool = False + + +class ProfileBrands(BaseModel): + ignored_brands: dict = {} + favorite_brands: dict = {} + specially_requested: dict = {} + + +class ProfileSpeech(BaseModel): + stt_language: str = "en-us" + alt_languages: List[str] = ['en'] + tts_language: str = "en-us" + tts_gender: str = "female" + neon_voice: Optional[str] = '' + secondary_tts_language: Optional[str] = '' + secondary_tts_gender: str = "male" + secondary_neon_voice: str = '' + speed_multiplier: float = 1.0 + + +class ProfileUnits(BaseModel): + time: int = 12 + # 12, 24 + date: str = "MDY" + # MDY, YMD, YDM + measure: str = "imperial" + # imperial, metric + + +class ProfileLocation(BaseModel): + lat: Optional[float] = None + lng: Optional[float] = None + city: Optional[str] = None + state: Optional[str] = None + country: Optional[str] = None + tz: Optional[str] = None + utc: Optional[float] = None + + +class ProfileResponseMode(BaseModel): + speed_mode: str = "quick" + hesitation: bool = False + limit_dialog: bool = False + + +class ProfilePrivacy(BaseModel): + save_audio: bool = False + save_text: bool = False + + +class UserProfile(BaseModel): + user: ProfileUser = ProfileUser() + # brands: ProfileBrands + speech: ProfileSpeech = ProfileSpeech() + units: ProfileUnits = ProfileUnits() + location: ProfileLocation = ProfileLocation() + response_mode: ProfileResponseMode = ProfileResponseMode() + privacy: ProfilePrivacy = ProfilePrivacy()