1
0
mirror of https://github.com/ijaric/voice_assistant.git synced 2025-12-18 05:16:18 +00:00

Merge branch 'main' into tasks/#47_tts_handlers_repositories

This commit is contained in:
Aleksandr Sukharev
2023-10-14 22:51:27 +03:00
committed by GitHub
42 changed files with 2243 additions and 88 deletions

View File

@@ -1,3 +1,7 @@
from .health import basic_router
from .voice_responce_handler import VoiceResponseHandler
__all__ = ["basic_router"]
__all__ = [
"VoiceResponseHandler",
"basic_router",
]

View File

@@ -0,0 +1,45 @@
import http
import io
import fastapi
import lib.stt.services as stt_services
# import lib.tts.services as tts_service
# import lib.models as models
class VoiceResponseHandler:
def __init__(
self,
stt: stt_services.SpeechService,
# tts: tts_service.TTSService,
):
self.stt = stt
# self.tts = tts
self.router = fastapi.APIRouter()
self.router.add_api_route(
"/",
self.voice_response,
methods=["POST"],
summary="Ответ голосового помощника",
description="Маршрут возвращает потоковый ответ аудио",
)
async def voice_response(
self,
voice: bytes = fastapi.File(...),
) -> fastapi.responses.StreamingResponse:
voice_text: str = await self.stt.recognize(voice)
if voice_text == "":
raise fastapi.HTTPException(status_code=http.HTTPStatus.BAD_REQUEST, detail="Speech recognition failed")
# TODO: Добавить обработку текста через клиента openai
# TODO: Добавить синтез речи через клиента tts
# TODO: Заменить заглушку на реальный ответ
# response = await self.tts.get_audio_as_bytes(
# models.TTSCreateRequestModel(
# text=voice_text,
# )
# )
# return fastapi.responses.StreamingResponse(io.BytesIO(response.audio_content), media_type="audio/ogg")
return fastapi.responses.StreamingResponse(io.BytesIO(voice), media_type="audio/ogg")

View File

@@ -1,3 +1,5 @@
from .base import HealthResponse
__all__ = ["HealthResponse"]
__all__ = [
"HealthResponse",
]

View File

@@ -115,11 +115,18 @@ class Application:
models.VoiceModelProvidersEnum.ELEVEN_LABS: tts_eleven_labs_repository,
},
)
# Handlers
logger.info("Initializing handlers")
liveness_probe_handler = api_v1_handlers.basic_router
# TODO: объявить сервисы tts и openai и добавить их в voice_response_handler
voice_response_handler = api_v1_handlers.VoiceResponseHandler(
stt=stt_service,
# tts=tts_service, # TODO
).router
logger.info("Creating application")
fastapi_app = fastapi.FastAPI(
@@ -132,6 +139,7 @@ class Application:
# Routes
fastapi_app.include_router(liveness_probe_handler, prefix="/api/v1/health", tags=["health"])
fastapi_app.include_router(voice_response_handler, prefix="/api/v1/voice", tags=["voice"])
application = Application(
settings=settings,

View File

@@ -1,30 +1,16 @@
import pydantic
import pydantic_settings
import lib.app.split_settings as app_split_settings
class Settings(pydantic_settings.BaseSettings):
api: app_split_settings.ApiSettings = pydantic.Field(default_factory=lambda: app_split_settings.ApiSettings())
app: app_split_settings.AppSettings = pydantic.Field(default_factory=lambda: app_split_settings.AppSettings())
postgres: app_split_settings.PostgresSettings = pydantic.Field(
default_factory=lambda: app_split_settings.PostgresSettings()
)
logger: app_split_settings.LoggingSettings = pydantic.Field(
default_factory=lambda: app_split_settings.LoggingSettings()
)
openai: app_split_settings.OpenaiSettings = pydantic.Field(
default_factory=lambda: app_split_settings.OpenaiSettings()
)
project: app_split_settings.ProjectSettings = pydantic.Field(
default_factory=lambda: app_split_settings.ProjectSettings()
)
proxy: app_split_settings.ProxySettings = pydantic.Field(default_factory=lambda: app_split_settings.ProxySettings())
voice: app_split_settings.VoiceSettings = pydantic.Field(default_factory=lambda: app_split_settings.VoiceSettings())
tts_yandex: app_split_settings.TTSYandexSettings = pydantic.Field(
default_factory=lambda: app_split_settings.TTSYandexSettings()
)
tts_eleven_labs: app_split_settings.TTSElevenLabsSettings = pydantic.Field(
default_factory=lambda: app_split_settings.TTSElevenLabsSettings()
)
api: app_split_settings.ApiSettings = app_split_settings.ApiSettings()
app: app_split_settings.AppSettings = app_split_settings.AppSettings()
postgres: app_split_settings.PostgresSettings = app_split_settings.PostgresSettings()
logger: app_split_settings.LoggingSettings = app_split_settings.LoggingSettings()
openai: app_split_settings.OpenaiSettings = app_split_settings.OpenaiSettings()
project: app_split_settings.ProjectSettings = app_split_settings.ProjectSettings()
proxy: app_split_settings.ProxySettings = app_split_settings.ProxySettings()
voice: app_split_settings.VoiceSettings = app_split_settings.VoiceSettings()
tts_yandex: app_split_settings.TTSYandexSettings = app_split_settings.TTSYandexSettings()
tts_eleven_labs: app_split_settings.TTSElevenLabsSettings = app_split_settings.TTSElevenLabsSettings()

View File

@@ -1,8 +1,11 @@
import http
import mimetypes
import tempfile
import fastapi
import magic
import openai
import pydantic
import lib.app.settings as app_settings
import lib.stt as stt
@@ -24,15 +27,24 @@ class OpenaiSpeechRepository:
async def speech_to_text(self, audio: bytes) -> str:
file_extension = self.__get_file_extension_from_bytes(audio)
if not file_extension:
raise ValueError("File extension is not supported")
voice: stt.models.SttVoice = stt.models.SttVoice(
audio_size=len(audio) // 1024, # audio size in MB,
audio_format=file_extension,
audio_data=audio,
voice_settings=self.settings.voice,
)
if not file_extension or file_extension not in self.settings.voice.available_formats:
raise fastapi.HTTPException(
status_code=http.HTTPStatus.UNSUPPORTED_MEDIA_TYPE,
detail=f"File extension is not supported. "
f"Available extensions: {self.settings.voice.available_formats}",
)
try:
voice: stt.models.SttVoice = stt.models.SttVoice(
audio_size=len(audio) // 1024, # audio size in MB,
audio_format=file_extension,
audio_data=audio,
voice_settings=self.settings.voice,
)
except (pydantic.ValidationError, ValueError) as e:
raise fastapi.HTTPException(
status_code=http.HTTPStatus.BAD_REQUEST,
detail=f"Voice validation error: {e}",
)
try:
with tempfile.NamedTemporaryFile(suffix=f".{file_extension}") as temp_file:
@@ -40,8 +52,14 @@ class OpenaiSpeechRepository:
temp_file.seek(0)
transcript = openai.Audio.transcribe(self.settings.openai.stt_model, temp_file) # type: ignore
except openai.error.InvalidRequestError as e: # type: ignore[reportGeneralTypeIssues]
raise ValueError(f"OpenAI API error: {e}")
raise fastapi.HTTPException(
status_code=http.HTTPStatus.BAD_REQUEST,
detail=f"OpenAI request error: {e}",
)
except openai.error.OpenAIError as e: # type: ignore[reportGeneralTypeIssues]
raise ValueError(f"OpenAI API error: {e}")
raise fastapi.HTTPException(
status_code=http.HTTPStatus.BAD_REQUEST,
detail=f"OpenAI API error: {e}",
)
return transcript.text # type: ignore[reportUnknownVariableType]