diff --git a/src/assistant/Dockerfile b/src/assistant/Dockerfile index 202c1a7..63a6bd7 100644 --- a/src/assistant/Dockerfile +++ b/src/assistant/Dockerfile @@ -13,7 +13,7 @@ COPY poetry.toml /opt/app/poetry.toml WORKDIR /opt/app -RUN pip install poetry \ +RUN pip install poetry \ && poetry install --no-dev COPY bin /opt/app/bin diff --git a/src/assistant/lib/api/v1/handlers/voice_responce_handler.py b/src/assistant/lib/api/v1/handlers/voice_responce_handler.py index 9888bf9..dc298a9 100644 --- a/src/assistant/lib/api/v1/handlers/voice_responce_handler.py +++ b/src/assistant/lib/api/v1/handlers/voice_responce_handler.py @@ -1,6 +1,8 @@ -import fastapi +import io + +import fastapi +import http -import lib.models.tts.voice as models_tts_voice import lib.stt.services as stt_services @@ -16,13 +18,17 @@ class VoiceResponseHandler: self.voice_response, methods=["POST"], summary="Ответ голосового помощника", - description="Ответ голосового помощника", + description="Маршрут возвращает потоковый ответ аудио", ) async def voice_response( self, voice: bytes = fastapi.File(...), - voice_model: models_tts_voice.VoiceModelProvidersEnum = fastapi.Depends(), - ) -> dict[str, str]: + ) -> fastapi.responses.StreamingResponse: voice_text: str = await self.stt.recognize(voice) - return {"text": voice_text} + if voice_text == "": + raise fastapi.HTTPException(status_code=http.HTTPStatus.BAD_REQUEST, detail="Speech recognition failed") + # TODO: Добавить обработку текста через клиента openai + # TODO: Добавить синтез речи через клиента tts + # TODO: Заменить заглушку на реальный ответ + return fastapi.responses.StreamingResponse(io.BytesIO(voice), media_type="audio/ogg") diff --git a/src/assistant/lib/app/app.py b/src/assistant/lib/app/app.py index 7c1bc4f..09dc69d 100644 --- a/src/assistant/lib/app/app.py +++ b/src/assistant/lib/app/app.py @@ -86,6 +86,8 @@ class Application: logger.info("Initializing handlers") liveness_probe_handler = api_v1_handlers.basic_router + + # TODO: объявить сервисы tts и openai и добавить их в voice_response_handler voice_response_handler = api_v1_handlers.VoiceResponseHandler(stt=stt_service).router logger.info("Creating application") diff --git a/src/assistant/lib/app/settings.py b/src/assistant/lib/app/settings.py index 72198a6..6189151 100644 --- a/src/assistant/lib/app/settings.py +++ b/src/assistant/lib/app/settings.py @@ -1,24 +1,14 @@ -import pydantic import pydantic_settings import lib.app.split_settings as app_split_settings class Settings(pydantic_settings.BaseSettings): - api: app_split_settings.ApiSettings = pydantic.Field(default_factory=lambda: app_split_settings.ApiSettings()) - app: app_split_settings.AppSettings = pydantic.Field(default_factory=lambda: app_split_settings.AppSettings()) - postgres: app_split_settings.PostgresSettings = pydantic.Field( - default_factory=lambda: app_split_settings.PostgresSettings() - ) - logger: app_split_settings.LoggingSettings = pydantic.Field( - default_factory=lambda: app_split_settings.LoggingSettings() - ) - openai: app_split_settings.OpenaiSettings = pydantic.Field( - default_factory=lambda: app_split_settings.OpenaiSettings() - ) - project: app_split_settings.ProjectSettings = pydantic.Field( - default_factory=lambda: app_split_settings.ProjectSettings() - ) - - proxy: app_split_settings.ProxySettings = pydantic.Field(default_factory=lambda: app_split_settings.ProxySettings()) - voice: app_split_settings.VoiceSettings = pydantic.Field(default_factory=lambda: app_split_settings.VoiceSettings()) + api: app_split_settings.ApiSettings = app_split_settings.ApiSettings() + app: app_split_settings.AppSettings = app_split_settings.AppSettings() + postgres: app_split_settings.PostgresSettings = app_split_settings.PostgresSettings() + logger: app_split_settings.LoggingSettings = app_split_settings.LoggingSettings() + openai: app_split_settings.OpenaiSettings = app_split_settings.OpenaiSettings() + project: app_split_settings.ProjectSettings = app_split_settings.ProjectSettings() + proxy: app_split_settings.ProxySettings = app_split_settings.ProxySettings() + voice: app_split_settings.VoiceSettings = app_split_settings.VoiceSettings()