mirror of
https://github.com/ijaric/voice_assistant.git
synced 2025-12-18 05:16:18 +00:00
Merge branch 'main' into tasks/#47_tts_handlers_repositories
This commit is contained in:
@@ -1,3 +1,7 @@
|
||||
from .health import basic_router
|
||||
from .voice_responce_handler import VoiceResponseHandler
|
||||
|
||||
__all__ = ["basic_router"]
|
||||
__all__ = [
|
||||
"VoiceResponseHandler",
|
||||
"basic_router",
|
||||
]
|
||||
|
||||
45
src/assistant/lib/api/v1/handlers/voice_responce_handler.py
Normal file
45
src/assistant/lib/api/v1/handlers/voice_responce_handler.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import http
|
||||
import io
|
||||
|
||||
import fastapi
|
||||
|
||||
import lib.stt.services as stt_services
|
||||
|
||||
# import lib.tts.services as tts_service
|
||||
# import lib.models as models
|
||||
|
||||
|
||||
class VoiceResponseHandler:
|
||||
def __init__(
|
||||
self,
|
||||
stt: stt_services.SpeechService,
|
||||
# tts: tts_service.TTSService,
|
||||
):
|
||||
self.stt = stt
|
||||
# self.tts = tts
|
||||
self.router = fastapi.APIRouter()
|
||||
self.router.add_api_route(
|
||||
"/",
|
||||
self.voice_response,
|
||||
methods=["POST"],
|
||||
summary="Ответ голосового помощника",
|
||||
description="Маршрут возвращает потоковый ответ аудио",
|
||||
)
|
||||
|
||||
async def voice_response(
|
||||
self,
|
||||
voice: bytes = fastapi.File(...),
|
||||
) -> fastapi.responses.StreamingResponse:
|
||||
voice_text: str = await self.stt.recognize(voice)
|
||||
if voice_text == "":
|
||||
raise fastapi.HTTPException(status_code=http.HTTPStatus.BAD_REQUEST, detail="Speech recognition failed")
|
||||
# TODO: Добавить обработку текста через клиента openai
|
||||
# TODO: Добавить синтез речи через клиента tts
|
||||
# TODO: Заменить заглушку на реальный ответ
|
||||
# response = await self.tts.get_audio_as_bytes(
|
||||
# models.TTSCreateRequestModel(
|
||||
# text=voice_text,
|
||||
# )
|
||||
# )
|
||||
# return fastapi.responses.StreamingResponse(io.BytesIO(response.audio_content), media_type="audio/ogg")
|
||||
return fastapi.responses.StreamingResponse(io.BytesIO(voice), media_type="audio/ogg")
|
||||
@@ -1,3 +1,5 @@
|
||||
from .base import HealthResponse
|
||||
|
||||
__all__ = ["HealthResponse"]
|
||||
__all__ = [
|
||||
"HealthResponse",
|
||||
]
|
||||
|
||||
@@ -115,11 +115,18 @@ class Application:
|
||||
models.VoiceModelProvidersEnum.ELEVEN_LABS: tts_eleven_labs_repository,
|
||||
},
|
||||
)
|
||||
|
||||
# Handlers
|
||||
|
||||
logger.info("Initializing handlers")
|
||||
liveness_probe_handler = api_v1_handlers.basic_router
|
||||
|
||||
# TODO: объявить сервисы tts и openai и добавить их в voice_response_handler
|
||||
voice_response_handler = api_v1_handlers.VoiceResponseHandler(
|
||||
stt=stt_service,
|
||||
# tts=tts_service, # TODO
|
||||
).router
|
||||
|
||||
logger.info("Creating application")
|
||||
|
||||
fastapi_app = fastapi.FastAPI(
|
||||
@@ -132,6 +139,7 @@ class Application:
|
||||
|
||||
# Routes
|
||||
fastapi_app.include_router(liveness_probe_handler, prefix="/api/v1/health", tags=["health"])
|
||||
fastapi_app.include_router(voice_response_handler, prefix="/api/v1/voice", tags=["voice"])
|
||||
|
||||
application = Application(
|
||||
settings=settings,
|
||||
|
||||
@@ -1,30 +1,16 @@
|
||||
import pydantic
|
||||
import pydantic_settings
|
||||
|
||||
import lib.app.split_settings as app_split_settings
|
||||
|
||||
|
||||
class Settings(pydantic_settings.BaseSettings):
|
||||
api: app_split_settings.ApiSettings = pydantic.Field(default_factory=lambda: app_split_settings.ApiSettings())
|
||||
app: app_split_settings.AppSettings = pydantic.Field(default_factory=lambda: app_split_settings.AppSettings())
|
||||
postgres: app_split_settings.PostgresSettings = pydantic.Field(
|
||||
default_factory=lambda: app_split_settings.PostgresSettings()
|
||||
)
|
||||
logger: app_split_settings.LoggingSettings = pydantic.Field(
|
||||
default_factory=lambda: app_split_settings.LoggingSettings()
|
||||
)
|
||||
openai: app_split_settings.OpenaiSettings = pydantic.Field(
|
||||
default_factory=lambda: app_split_settings.OpenaiSettings()
|
||||
)
|
||||
project: app_split_settings.ProjectSettings = pydantic.Field(
|
||||
default_factory=lambda: app_split_settings.ProjectSettings()
|
||||
)
|
||||
|
||||
proxy: app_split_settings.ProxySettings = pydantic.Field(default_factory=lambda: app_split_settings.ProxySettings())
|
||||
voice: app_split_settings.VoiceSettings = pydantic.Field(default_factory=lambda: app_split_settings.VoiceSettings())
|
||||
tts_yandex: app_split_settings.TTSYandexSettings = pydantic.Field(
|
||||
default_factory=lambda: app_split_settings.TTSYandexSettings()
|
||||
)
|
||||
tts_eleven_labs: app_split_settings.TTSElevenLabsSettings = pydantic.Field(
|
||||
default_factory=lambda: app_split_settings.TTSElevenLabsSettings()
|
||||
)
|
||||
api: app_split_settings.ApiSettings = app_split_settings.ApiSettings()
|
||||
app: app_split_settings.AppSettings = app_split_settings.AppSettings()
|
||||
postgres: app_split_settings.PostgresSettings = app_split_settings.PostgresSettings()
|
||||
logger: app_split_settings.LoggingSettings = app_split_settings.LoggingSettings()
|
||||
openai: app_split_settings.OpenaiSettings = app_split_settings.OpenaiSettings()
|
||||
project: app_split_settings.ProjectSettings = app_split_settings.ProjectSettings()
|
||||
proxy: app_split_settings.ProxySettings = app_split_settings.ProxySettings()
|
||||
voice: app_split_settings.VoiceSettings = app_split_settings.VoiceSettings()
|
||||
tts_yandex: app_split_settings.TTSYandexSettings = app_split_settings.TTSYandexSettings()
|
||||
tts_eleven_labs: app_split_settings.TTSElevenLabsSettings = app_split_settings.TTSElevenLabsSettings()
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
import http
|
||||
import mimetypes
|
||||
import tempfile
|
||||
|
||||
import fastapi
|
||||
import magic
|
||||
import openai
|
||||
import pydantic
|
||||
|
||||
import lib.app.settings as app_settings
|
||||
import lib.stt as stt
|
||||
@@ -24,15 +27,24 @@ class OpenaiSpeechRepository:
|
||||
|
||||
async def speech_to_text(self, audio: bytes) -> str:
|
||||
file_extension = self.__get_file_extension_from_bytes(audio)
|
||||
if not file_extension:
|
||||
raise ValueError("File extension is not supported")
|
||||
|
||||
voice: stt.models.SttVoice = stt.models.SttVoice(
|
||||
audio_size=len(audio) // 1024, # audio size in MB,
|
||||
audio_format=file_extension,
|
||||
audio_data=audio,
|
||||
voice_settings=self.settings.voice,
|
||||
)
|
||||
if not file_extension or file_extension not in self.settings.voice.available_formats:
|
||||
raise fastapi.HTTPException(
|
||||
status_code=http.HTTPStatus.UNSUPPORTED_MEDIA_TYPE,
|
||||
detail=f"File extension is not supported. "
|
||||
f"Available extensions: {self.settings.voice.available_formats}",
|
||||
)
|
||||
try:
|
||||
voice: stt.models.SttVoice = stt.models.SttVoice(
|
||||
audio_size=len(audio) // 1024, # audio size in MB,
|
||||
audio_format=file_extension,
|
||||
audio_data=audio,
|
||||
voice_settings=self.settings.voice,
|
||||
)
|
||||
except (pydantic.ValidationError, ValueError) as e:
|
||||
raise fastapi.HTTPException(
|
||||
status_code=http.HTTPStatus.BAD_REQUEST,
|
||||
detail=f"Voice validation error: {e}",
|
||||
)
|
||||
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(suffix=f".{file_extension}") as temp_file:
|
||||
@@ -40,8 +52,14 @@ class OpenaiSpeechRepository:
|
||||
temp_file.seek(0)
|
||||
transcript = openai.Audio.transcribe(self.settings.openai.stt_model, temp_file) # type: ignore
|
||||
except openai.error.InvalidRequestError as e: # type: ignore[reportGeneralTypeIssues]
|
||||
raise ValueError(f"OpenAI API error: {e}")
|
||||
raise fastapi.HTTPException(
|
||||
status_code=http.HTTPStatus.BAD_REQUEST,
|
||||
detail=f"OpenAI request error: {e}",
|
||||
)
|
||||
except openai.error.OpenAIError as e: # type: ignore[reportGeneralTypeIssues]
|
||||
raise ValueError(f"OpenAI API error: {e}")
|
||||
raise fastapi.HTTPException(
|
||||
status_code=http.HTTPStatus.BAD_REQUEST,
|
||||
detail=f"OpenAI API error: {e}",
|
||||
)
|
||||
|
||||
return transcript.text # type: ignore[reportUnknownVariableType]
|
||||
|
||||
Reference in New Issue
Block a user