mirror of
https://github.com/ijaric/voice_assistant.git
synced 2025-05-24 14:33:26 +00:00
Merge branch 'main' into tasks/#41_assistant_base_tests
This commit is contained in:
commit
fed97f81ad
|
@ -5,6 +5,12 @@ POSTGRES_USER=user
|
|||
POSTGRES_PASSWORD=Qwe123
|
||||
POSTGRES_DB_NAME=api_db
|
||||
|
||||
PROXY_HOST=255.255.255.255
|
||||
PROXY_PORT=8888
|
||||
PROXY_USER=YOUR_USER
|
||||
PROXY_PASSWORD=YOUR_PASSWORD
|
||||
PROXY_ENABLE=False
|
||||
|
||||
NGINX_PORT=80
|
||||
API_HOST=0.0.0.0
|
||||
API_PORT=8000
|
||||
|
@ -17,3 +23,10 @@ JWT_SECRET_KEY=v9LctjUWwol4XbvczPiLFMDtZ8aal7mm
|
|||
JWT_ALGORITHM=HS256
|
||||
|
||||
APP_RELOAD=True
|
||||
|
||||
VOICE_AVAILABLE_FORMATS=mp3,ogg,wav
|
||||
VOICE_MAX_INPUT_SIZE=5120 # 5MB
|
||||
VOICE_MAX_INPUT_SECONDS=30
|
||||
|
||||
OPENAI_API_KEY=sk-1234567890
|
||||
OPENAI_STT_MODEL=whisper-1
|
||||
|
|
|
@ -11,6 +11,7 @@ import lib.app.errors as app_errors
|
|||
import lib.app.settings as app_settings
|
||||
import lib.app.split_settings as app_split_settings
|
||||
import lib.clients as clients
|
||||
import lib.stt as stt
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -57,9 +58,21 @@ class Application:
|
|||
|
||||
logger.info("Initializing clients")
|
||||
|
||||
http_yandex_tts_client = clients.AsyncHttpClient(
|
||||
base_url="yandex", # todo add yandex api url from settings
|
||||
proxy_settings=settings.proxy,
|
||||
)
|
||||
disposable_resources.append(
|
||||
DisposableResource(
|
||||
name="http_client yandex",
|
||||
dispose_callback=http_yandex_tts_client.close(),
|
||||
)
|
||||
)
|
||||
|
||||
# Repositories
|
||||
|
||||
logger.info("Initializing repositories")
|
||||
stt_repository: stt.STTProtocol = stt.OpenaiSpeechRepository(settings=settings)
|
||||
|
||||
# Caches
|
||||
|
||||
|
@ -68,6 +81,7 @@ class Application:
|
|||
# Services
|
||||
|
||||
logger.info("Initializing services")
|
||||
stt_service: stt.SpeechService = stt.SpeechService(repository=stt_repository) # type: ignore
|
||||
|
||||
# Handlers
|
||||
|
||||
|
|
|
@ -13,6 +13,12 @@ class Settings(pydantic_settings.BaseSettings):
|
|||
logger: app_split_settings.LoggingSettings = pydantic.Field(
|
||||
default_factory=lambda: app_split_settings.LoggingSettings()
|
||||
)
|
||||
openai: app_split_settings.OpenaiSettings = pydantic.Field(
|
||||
default_factory=lambda: app_split_settings.OpenaiSettings()
|
||||
)
|
||||
project: app_split_settings.ProjectSettings = pydantic.Field(
|
||||
default_factory=lambda: app_split_settings.ProjectSettings()
|
||||
)
|
||||
|
||||
proxy: app_split_settings.ProxySettings = pydantic.Field(default_factory=lambda: app_split_settings.ProxySettings())
|
||||
voice: app_split_settings.VoiceSettings = pydantic.Field(default_factory=lambda: app_split_settings.VoiceSettings())
|
||||
|
|
|
@ -1,14 +1,20 @@
|
|||
from .api import *
|
||||
from .app import *
|
||||
from .logger import *
|
||||
from .openai import *
|
||||
from .postgres import *
|
||||
from .project import *
|
||||
from .proxy import *
|
||||
from .voice import *
|
||||
|
||||
__all__ = [
|
||||
"ApiSettings",
|
||||
"AppSettings",
|
||||
"LoggingSettings",
|
||||
"OpenaiSettings",
|
||||
"PostgresSettings",
|
||||
"ProjectSettings",
|
||||
"ProxySettings",
|
||||
"VoiceSettings",
|
||||
"get_logging_config",
|
||||
]
|
||||
|
|
|
@ -5,7 +5,9 @@ import lib.app.split_settings.utils as app_split_settings_utils
|
|||
|
||||
class LoggingSettings(pydantic_settings.BaseSettings):
|
||||
model_config = pydantic_settings.SettingsConfigDict(
|
||||
env_file=app_split_settings_utils.ENV_PATH, env_file_encoding="utf-8", extra="ignore"
|
||||
env_file=app_split_settings_utils.ENV_PATH,
|
||||
env_file_encoding="utf-8",
|
||||
extra="ignore",
|
||||
)
|
||||
|
||||
log_format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
|
|
18
src/assistant/lib/app/split_settings/openai.py
Normal file
18
src/assistant/lib/app/split_settings/openai.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
import pydantic
|
||||
import pydantic_settings
|
||||
|
||||
import lib.app.split_settings.utils as app_split_settings_utils
|
||||
|
||||
|
||||
class OpenaiSettings(pydantic_settings.BaseSettings):
|
||||
model_config = pydantic_settings.SettingsConfigDict(
|
||||
env_file=app_split_settings_utils.ENV_PATH,
|
||||
env_prefix="OPENAI_",
|
||||
env_file_encoding="utf-8",
|
||||
extra="ignore",
|
||||
)
|
||||
|
||||
api_key: pydantic.SecretStr = pydantic.Field(
|
||||
default=..., validation_alias=pydantic.AliasChoices("api_key", "openai_api_key")
|
||||
)
|
||||
stt_model: str = "whisper-1"
|
43
src/assistant/lib/app/split_settings/proxy.py
Normal file
43
src/assistant/lib/app/split_settings/proxy.py
Normal file
|
@ -0,0 +1,43 @@
|
|||
import typing
|
||||
|
||||
import pydantic
|
||||
import pydantic_settings
|
||||
|
||||
import lib.app.split_settings.utils as app_split_settings_utils
|
||||
|
||||
|
||||
class ProxySettings(pydantic_settings.BaseSettings):
|
||||
model_config = pydantic_settings.SettingsConfigDict(
|
||||
env_file=app_split_settings_utils.ENV_PATH,
|
||||
env_prefix="PROXY_",
|
||||
env_file_encoding="utf-8",
|
||||
extra="ignore",
|
||||
)
|
||||
protocol: typing.Literal["http", "socks5"] = "http"
|
||||
user: str | None = None
|
||||
password: pydantic.SecretStr | None = None
|
||||
host: str | None = None
|
||||
port: int | None = None
|
||||
enable: bool = False
|
||||
|
||||
@property
|
||||
def dsn(self) -> str:
|
||||
if self.user and self.password:
|
||||
password = self.password.get_secret_value()
|
||||
return f"{self.protocol}://{self.user}:{password}@{self.host}:{self.port}"
|
||||
return f"{self.protocol}://{self.host}:{self.port}"
|
||||
|
||||
@pydantic.computed_field
|
||||
@property
|
||||
def dsn_as_safe_url(self) -> str:
|
||||
if self.user and self.password:
|
||||
return f"{self.protocol}://{self.user}:{self.password}@{self.host}:{self.port}"
|
||||
return f"{self.protocol}://{self.host}:{self.port}"
|
||||
|
||||
@pydantic.model_validator(mode="after")
|
||||
def check_proxy(self):
|
||||
if not self.enable:
|
||||
return self
|
||||
if self.host and self.port:
|
||||
return self
|
||||
raise ValueError("Proxy settings must be set if use_proxy is True")
|
21
src/assistant/lib/app/split_settings/voice.py
Normal file
21
src/assistant/lib/app/split_settings/voice.py
Normal file
|
@ -0,0 +1,21 @@
|
|||
import pydantic
|
||||
import pydantic_settings
|
||||
|
||||
import lib.app.split_settings.utils as app_split_settings_utils
|
||||
|
||||
|
||||
class VoiceSettings(pydantic_settings.BaseSettings):
|
||||
model_config = pydantic_settings.SettingsConfigDict(
|
||||
env_file=app_split_settings_utils.ENV_PATH,
|
||||
env_prefix="VOICE_",
|
||||
env_file_encoding="utf-8",
|
||||
extra="ignore",
|
||||
)
|
||||
|
||||
max_input_seconds: int = 30
|
||||
max_input_size: int = 5120 # 5MB
|
||||
available_formats: str = "wav,mp3,ogg"
|
||||
|
||||
@pydantic.field_validator("available_formats")
|
||||
def validate_available_formats(cls, v: str) -> list[str]:
|
||||
return v.split(",")
|
|
@ -1,3 +1,7 @@
|
|||
from .http_client import AsyncHttpClient
|
||||
from .postgres import AsyncPostgresClient
|
||||
|
||||
__all__ = ["AsyncPostgresClient"]
|
||||
__all__ = [
|
||||
"AsyncHttpClient",
|
||||
"AsyncPostgresClient",
|
||||
]
|
||||
|
|
29
src/assistant/lib/clients/http_client.py
Normal file
29
src/assistant/lib/clients/http_client.py
Normal file
|
@ -0,0 +1,29 @@
|
|||
import typing
|
||||
|
||||
import httpx
|
||||
|
||||
import lib.app.split_settings as app_split_settings
|
||||
|
||||
|
||||
class AsyncHttpClient(httpx.AsyncClient):
|
||||
def __init__(
|
||||
self,
|
||||
proxy_settings: app_split_settings.ProxySettings,
|
||||
base_url: str | None = None,
|
||||
**client_params: typing.Any,
|
||||
) -> None:
|
||||
self.base_url = base_url if base_url else ""
|
||||
self.proxy_settings = proxy_settings
|
||||
self.proxies = self.__get_proxies_from_settings()
|
||||
self.client_params = client_params
|
||||
|
||||
super().__init__(base_url=self.base_url, proxies=self.proxies, **client_params) # type: ignore[reportGeneralTypeIssues]
|
||||
|
||||
def __get_proxies_from_settings(self) -> dict[str, str] | None:
|
||||
if not self.proxy_settings.enable:
|
||||
return None
|
||||
proxies = {"all://": self.proxy_settings.dsn}
|
||||
return proxies
|
||||
|
||||
async def close(self) -> None:
|
||||
await self.aclose()
|
|
@ -1,4 +1,20 @@
|
|||
from .orm import Base, IdCreatedUpdatedBaseMixin
|
||||
from .token import Token
|
||||
from .tts import *
|
||||
|
||||
__all__ = ["Base", "IdCreatedUpdatedBaseMixin", "Token"]
|
||||
__all__ = [
|
||||
"AVAILABLE_MODELS_TYPE",
|
||||
"Base",
|
||||
"BaseLanguageCodesEnum",
|
||||
"BaseVoiceModel",
|
||||
"ElevenLabsLanguageCodesEnum",
|
||||
"IdCreatedUpdatedBaseMixin",
|
||||
"LANGUAGE_CODES_ENUM_TYPE",
|
||||
"LIST_VOICE_MODELS_TYPE",
|
||||
"TTSCreateRequestModel",
|
||||
"TTSCreateResponseModel",
|
||||
"TTSSearchVoiceRequestModel",
|
||||
"Token",
|
||||
"VoiceModelProvidersEnum",
|
||||
"YandexLanguageCodesEnum",
|
||||
]
|
||||
|
|
16
src/assistant/lib/models/tts/__init__.py
Normal file
16
src/assistant/lib/models/tts/__init__.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
from .models import *
|
||||
from .voice import *
|
||||
|
||||
__all__ = [
|
||||
"AVAILABLE_MODELS_TYPE",
|
||||
"BaseLanguageCodesEnum",
|
||||
"BaseVoiceModel",
|
||||
"ElevenLabsLanguageCodesEnum",
|
||||
"LANGUAGE_CODES_ENUM_TYPE",
|
||||
"LIST_VOICE_MODELS_TYPE",
|
||||
"TTSCreateRequestModel",
|
||||
"TTSCreateResponseModel",
|
||||
"TTSSearchVoiceRequestModel",
|
||||
"VoiceModelProvidersEnum",
|
||||
"YandexLanguageCodesEnum",
|
||||
]
|
31
src/assistant/lib/models/tts/models.py
Normal file
31
src/assistant/lib/models/tts/models.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
import pydantic
|
||||
|
||||
import lib.models.tts.voice as models_tts_voice
|
||||
import lib.models.tts.voice.languages as models_tts_languages
|
||||
|
||||
AVAILABLE_MODELS_TYPE = models_tts_voice.YandexVoiceModel | models_tts_voice.ElevenLabsVoiceModel
|
||||
LIST_VOICE_MODELS_TYPE = models_tts_voice.YandexListVoiceModelsModel | models_tts_voice.ElevenLabsListVoiceModelsModel
|
||||
|
||||
|
||||
class TTSCreateRequestModel(pydantic.BaseModel):
|
||||
model_config = pydantic.ConfigDict(use_enum_values=True)
|
||||
|
||||
voice_model: AVAILABLE_MODELS_TYPE
|
||||
text: str
|
||||
|
||||
|
||||
class TTSCreateResponseModel(pydantic.BaseModel):
|
||||
audio_content: bytes
|
||||
|
||||
|
||||
class TTSSearchVoiceRequestModel(pydantic.BaseModel):
|
||||
voice_id: str | None = None
|
||||
voice_name: str | None = None
|
||||
languages: list[models_tts_languages.LANGUAGE_CODES_ENUM_TYPE] | None = None
|
||||
company_name: str | None = None
|
||||
|
||||
@pydantic.model_validator(mode="after")
|
||||
def check_at_least_one_field(self):
|
||||
if not any((self.voice_name, self.languages, self.company_name)):
|
||||
raise ValueError("At least one field required")
|
||||
return self
|
17
src/assistant/lib/models/tts/voice/__init__.py
Normal file
17
src/assistant/lib/models/tts/voice/__init__.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
from .base import *
|
||||
from .eleven_labs import *
|
||||
from .languages import *
|
||||
from .yandex import *
|
||||
|
||||
__all__ = [
|
||||
"BaseLanguageCodesEnum",
|
||||
"BaseVoiceModel",
|
||||
"ElevenLabsLanguageCodesEnum",
|
||||
"ElevenLabsListVoiceModelsModel",
|
||||
"ElevenLabsVoiceModel",
|
||||
"LANGUAGE_CODES_ENUM_TYPE",
|
||||
"VoiceModelProvidersEnum",
|
||||
"YandexLanguageCodesEnum",
|
||||
"YandexListVoiceModelsModel",
|
||||
"YandexVoiceModel",
|
||||
]
|
27
src/assistant/lib/models/tts/voice/base.py
Normal file
27
src/assistant/lib/models/tts/voice/base.py
Normal file
|
@ -0,0 +1,27 @@
|
|||
import enum
|
||||
import typing
|
||||
|
||||
import pydantic
|
||||
|
||||
import lib.models.tts.voice.languages as models_tts_languages
|
||||
|
||||
|
||||
class VoiceModelProvidersEnum(enum.Enum):
|
||||
YANDEX = "yandex"
|
||||
ELEVEN_LABS = "eleven_labs"
|
||||
|
||||
|
||||
class BaseVoiceModel(pydantic.BaseModel):
|
||||
voice_id: str
|
||||
voice_name: str | None = None
|
||||
languages: list[models_tts_languages.LANGUAGE_CODES_ENUM_TYPE]
|
||||
provider: VoiceModelProvidersEnum
|
||||
|
||||
@pydantic.model_validator(mode="before")
|
||||
@classmethod
|
||||
def check_voice_name_exists(cls, data: typing.Any) -> typing.Any:
|
||||
voice_id = data.get("voice_id")
|
||||
voice_name = data.get("voice_name")
|
||||
if not voice_name and voice_id:
|
||||
data["voice_name"] = voice_id
|
||||
return data
|
75
src/assistant/lib/models/tts/voice/eleven_labs.py
Normal file
75
src/assistant/lib/models/tts/voice/eleven_labs.py
Normal file
|
@ -0,0 +1,75 @@
|
|||
import typing
|
||||
|
||||
import pydantic
|
||||
|
||||
import lib.models.tts.voice.base as models_tts_base
|
||||
import lib.models.tts.voice.languages as models_tts_languages
|
||||
|
||||
|
||||
class ElevenLabsVoiceModel(models_tts_base.BaseVoiceModel):
|
||||
model_config = pydantic.ConfigDict(use_enum_values=True)
|
||||
voice_id: str
|
||||
voice_name: str | None = None
|
||||
languages: list[models_tts_languages.LANGUAGE_CODES_ENUM_TYPE]
|
||||
provider: models_tts_base.VoiceModelProvidersEnum = models_tts_base.VoiceModelProvidersEnum.ELEVEN_LABS
|
||||
|
||||
|
||||
class ElevenLabsListVoiceModelsModel(pydantic.BaseModel):
|
||||
models: list[ElevenLabsVoiceModel] = [
|
||||
ElevenLabsVoiceModel(
|
||||
voice_id="eleven_multilingual_v1",
|
||||
languages=[
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.ENGLISH,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.GERMAN,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.POLISH,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.SPANISH,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.ITALIAN,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.FRENCH,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.PORTUGUESE,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.HINDI,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.ARABIC,
|
||||
],
|
||||
),
|
||||
ElevenLabsVoiceModel(
|
||||
voice_id="eleven_multilingual_v2",
|
||||
languages=[
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.ENGLISH,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.JAPANESE,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.CHINESE,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.GERMAN,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.HINDI,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.FRENCH,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.KOREAN,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.PORTUGUESE,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.ITALIAN,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.SPANISH,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.INDONESIAN,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.DUTCH,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.TURKISH,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.FILIPINO,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.POLISH,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.SWEDISH,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.BULGARIAN,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.ROMANIAN,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.ARABIC,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.CZECH,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.GREEK,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.FINNISH,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.CROATIAN,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.MALAY,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.SLOVAK,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.DANISH,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.TAMIL,
|
||||
models_tts_languages.ElevenLabsLanguageCodesEnum.UKRAINIAN,
|
||||
],
|
||||
),
|
||||
ElevenLabsVoiceModel(
|
||||
voice_id="eleven_multilingual_v2",
|
||||
languages=[models_tts_languages.ElevenLabsLanguageCodesEnum.ENGLISH],
|
||||
),
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def from_api(cls, voice_models_from_api: list[dict[str, typing.Any]]) -> typing.Self:
|
||||
voice_models = [ElevenLabsVoiceModel.model_validate(voice_model) for voice_model in voice_models_from_api]
|
||||
return ElevenLabsListVoiceModelsModel(models=voice_models)
|
83
src/assistant/lib/models/tts/voice/languages.py
Normal file
83
src/assistant/lib/models/tts/voice/languages.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
import enum
|
||||
|
||||
|
||||
class BaseLanguageCodesEnum(enum.Enum):
|
||||
RUSSIAN = "ru"
|
||||
ENGLISH = "en"
|
||||
KAZAKH = "kk"
|
||||
GERMAN = "de"
|
||||
HEBREW = "he"
|
||||
UZBEK = "uz"
|
||||
JAPANESE = "ja"
|
||||
CHINESE = "zh"
|
||||
HINDI = "hi"
|
||||
FRENCH = "fr"
|
||||
KOREAN = "ko"
|
||||
PORTUGUESE = "pt"
|
||||
ITALIAN = "it"
|
||||
SPANISH = "es"
|
||||
INDONESIAN = "id"
|
||||
DUTCH = "nl"
|
||||
TURKISH = "tr"
|
||||
FILIPINO = "fil"
|
||||
POLISH = "pl"
|
||||
SWEDISH = "sv"
|
||||
BULGARIAN = "bg"
|
||||
ROMANIAN = "ro"
|
||||
ARABIC = "ar"
|
||||
CZECH = "cs"
|
||||
GREEK = "el"
|
||||
FINNISH = "fi"
|
||||
CROATIAN = "hr"
|
||||
MALAY = "ms"
|
||||
SLOVAK = "sk"
|
||||
DANISH = "da"
|
||||
TAMIL = "ta"
|
||||
UKRAINIAN = "uk"
|
||||
|
||||
|
||||
class ElevenLabsLanguageCodesEnum(enum.Enum):
|
||||
RUSSIAN = "ru"
|
||||
ENGLISH = "en"
|
||||
KAZAKH = "kk"
|
||||
GERMAN = "de"
|
||||
HEBREW = "he"
|
||||
UZBEK = "uz"
|
||||
JAPANESE = "ja"
|
||||
CHINESE = "zh"
|
||||
HINDI = "hi"
|
||||
FRENCH = "fr"
|
||||
KOREAN = "ko"
|
||||
PORTUGUESE = "pt"
|
||||
ITALIAN = "it"
|
||||
SPANISH = "es"
|
||||
INDONESIAN = "id"
|
||||
DUTCH = "nl"
|
||||
TURKISH = "tr"
|
||||
FILIPINO = "fil"
|
||||
POLISH = "pl"
|
||||
SWEDISH = "sv"
|
||||
BULGARIAN = "bg"
|
||||
ROMANIAN = "ro"
|
||||
ARABIC = "ar"
|
||||
CZECH = "cs"
|
||||
GREEK = "el"
|
||||
FINNISH = "fi"
|
||||
CROATIAN = "hr"
|
||||
MALAY = "ms"
|
||||
SLOVAK = "sk"
|
||||
DANISH = "da"
|
||||
TAMIL = "ta"
|
||||
UKRAINIAN = "uk"
|
||||
|
||||
|
||||
class YandexLanguageCodesEnum(enum.Enum):
|
||||
RUSSIAN = "ru-RU"
|
||||
ENGLISH = "en-US"
|
||||
KAZAKH = "kk-KK"
|
||||
GERMAN = "de-DE"
|
||||
HEBREW = "he-IL"
|
||||
UZBEK = "uz-UZ"
|
||||
|
||||
|
||||
LANGUAGE_CODES_ENUM_TYPE = BaseLanguageCodesEnum | ElevenLabsLanguageCodesEnum | YandexLanguageCodesEnum
|
97
src/assistant/lib/models/tts/voice/yandex.py
Normal file
97
src/assistant/lib/models/tts/voice/yandex.py
Normal file
|
@ -0,0 +1,97 @@
|
|||
import typing
|
||||
|
||||
import pydantic
|
||||
|
||||
import lib.models.tts.voice.base as models_tts_base
|
||||
import lib.models.tts.voice.languages as models_tts_languages
|
||||
|
||||
|
||||
class YandexVoiceModel(models_tts_base.BaseVoiceModel):
|
||||
voice_id: str
|
||||
voice_name: str | None = None
|
||||
languages: list[models_tts_languages.LANGUAGE_CODES_ENUM_TYPE]
|
||||
provider: models_tts_base.VoiceModelProvidersEnum = models_tts_base.VoiceModelProvidersEnum.YANDEX
|
||||
role: str | None = None
|
||||
|
||||
@pydantic.model_validator(mode="before")
|
||||
@classmethod
|
||||
def check_voice_name_exists(cls, data: typing.Any) -> typing.Any:
|
||||
voice_id = data.get("voice_id")
|
||||
voice_name = data.get("voice_name")
|
||||
role = data.get("role")
|
||||
if not voice_name and voice_id:
|
||||
data["voice_name"] = f"{voice_id} {role}" if role else voice_id
|
||||
return data
|
||||
|
||||
|
||||
class YandexListVoiceModelsModel(pydantic.BaseModel):
|
||||
models: list[YandexVoiceModel] = [
|
||||
YandexVoiceModel(
|
||||
voice_id="ermil", role="neutral", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]
|
||||
),
|
||||
YandexVoiceModel(
|
||||
voice_id="ermil", role="good", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]
|
||||
),
|
||||
YandexVoiceModel(
|
||||
voice_id="alena", role="neutral", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]
|
||||
),
|
||||
YandexVoiceModel(
|
||||
voice_id="alena", role="good", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]
|
||||
),
|
||||
YandexVoiceModel(
|
||||
voice_id="jane", role="neutral", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]
|
||||
),
|
||||
YandexVoiceModel(
|
||||
voice_id="jane", role="good", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]
|
||||
),
|
||||
YandexVoiceModel(
|
||||
voice_id="jane", role="evil", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]
|
||||
),
|
||||
YandexVoiceModel(
|
||||
voice_id="omazh", role="neutral", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]
|
||||
),
|
||||
YandexVoiceModel(
|
||||
voice_id="omazh", role="evil", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]
|
||||
),
|
||||
YandexVoiceModel(
|
||||
voice_id="zahar", role="neutral", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]
|
||||
),
|
||||
YandexVoiceModel(
|
||||
voice_id="zahar", role="good", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]
|
||||
),
|
||||
YandexVoiceModel(
|
||||
voice_id="filipp", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]
|
||||
),
|
||||
YandexVoiceModel(
|
||||
voice_id="madirus", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]
|
||||
),
|
||||
YandexVoiceModel(voice_id="dasha", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]),
|
||||
YandexVoiceModel(voice_id="julia", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]),
|
||||
YandexVoiceModel(voice_id="lera", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]),
|
||||
YandexVoiceModel(
|
||||
voice_id="marina", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]
|
||||
),
|
||||
YandexVoiceModel(
|
||||
voice_id="alexander", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]
|
||||
),
|
||||
YandexVoiceModel(
|
||||
voice_id="kirill", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]
|
||||
),
|
||||
YandexVoiceModel(voice_id="anton", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]),
|
||||
YandexVoiceModel(voice_id="john", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.ENGLISH]),
|
||||
YandexVoiceModel(voice_id="amira", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.KAZAKH]),
|
||||
YandexVoiceModel(voice_id="madi", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.KAZAKH]),
|
||||
YandexVoiceModel(voice_id="lea", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.GERMAN]),
|
||||
YandexVoiceModel(
|
||||
voice_id="naomi", role="modern", languages=[models_tts_languages.YandexLanguageCodesEnum.HEBREW]
|
||||
),
|
||||
YandexVoiceModel(
|
||||
voice_id="naomi", role="classic", languages=[models_tts_languages.YandexLanguageCodesEnum.HEBREW]
|
||||
),
|
||||
YandexVoiceModel(voice_id="nigora", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.UZBEK]),
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def from_api(cls, voice_models_from_api: list[dict[str, typing.Any]]) -> typing.Self:
|
||||
voice_models = [YandexVoiceModel.model_validate(voice_model) for voice_model in voice_models_from_api]
|
||||
return YandexListVoiceModelsModel(models=voice_models)
|
10
src/assistant/lib/stt/__init__.py
Normal file
10
src/assistant/lib/stt/__init__.py
Normal file
|
@ -0,0 +1,10 @@
|
|||
from .models import *
|
||||
from .repositories import *
|
||||
from .services import *
|
||||
|
||||
__all__ = [
|
||||
"OpenaiSpeechRepository",
|
||||
"STTProtocol",
|
||||
"SpeechService",
|
||||
"SttVoice",
|
||||
]
|
25
src/assistant/lib/stt/models.py
Normal file
25
src/assistant/lib/stt/models.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
import typing
|
||||
|
||||
import pydantic
|
||||
|
||||
import lib.app.split_settings as app_split_settings
|
||||
|
||||
|
||||
class SttVoice(pydantic.BaseModel):
|
||||
audio_size: int
|
||||
audio_format: str
|
||||
audio_name: str = "voice"
|
||||
audio_data: bytes
|
||||
voice_settings: app_split_settings.VoiceSettings
|
||||
|
||||
@pydantic.model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_audio(cls, v: dict[str, typing.Any]) -> dict[str, typing.Any]:
|
||||
settings: app_split_settings.VoiceSettings = v["voice_settings"]
|
||||
if v["audio_size"] > settings.max_input_size:
|
||||
raise ValueError(f"Audio size is too big: {v['audio_size']}")
|
||||
if v["audio_format"] not in settings.available_formats:
|
||||
raise ValueError(f"Audio format is not supported: {v['audio_format']}")
|
||||
if "audio_name" not in v or not v["audio_name"]:
|
||||
v["audio_name"] = f"audio.{v['audio_format']}"
|
||||
return v
|
47
src/assistant/lib/stt/repositories.py
Normal file
47
src/assistant/lib/stt/repositories.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
import mimetypes
|
||||
import tempfile
|
||||
|
||||
import magic
|
||||
import openai
|
||||
|
||||
import lib.app.settings as app_settings
|
||||
import lib.stt as stt
|
||||
|
||||
|
||||
class OpenaiSpeechRepository:
|
||||
def __init__(self, settings: app_settings.Settings):
|
||||
self.settings = settings
|
||||
openai.api_key = self.settings.openai.api_key.get_secret_value()
|
||||
|
||||
@staticmethod
|
||||
def __get_file_extension_from_bytes(audio: bytes) -> str | None:
|
||||
mime: magic.Magic = magic.Magic(mime=True)
|
||||
mime_type: str = mime.from_buffer(audio)
|
||||
extension: str | None = mimetypes.guess_extension(mime_type)
|
||||
if extension:
|
||||
extension = extension.replace(".", "")
|
||||
return extension
|
||||
|
||||
async def speech_to_text(self, audio: bytes) -> str:
|
||||
file_extension = self.__get_file_extension_from_bytes(audio)
|
||||
if not file_extension:
|
||||
raise ValueError("File extension is not supported")
|
||||
|
||||
voice: stt.models.SttVoice = stt.models.SttVoice(
|
||||
audio_size=len(audio) // 1024, # audio size in MB,
|
||||
audio_format=file_extension,
|
||||
audio_data=audio,
|
||||
voice_settings=self.settings.voice,
|
||||
)
|
||||
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(suffix=f".{file_extension}") as temp_file:
|
||||
temp_file.write(voice.audio_data)
|
||||
temp_file.seek(0)
|
||||
transcript = openai.Audio.transcribe(self.settings.openai.stt_model, temp_file) # type: ignore
|
||||
except openai.error.InvalidRequestError as e: # type: ignore[reportGeneralTypeIssues]
|
||||
raise ValueError(f"OpenAI API error: {e}")
|
||||
except openai.error.OpenAIError as e: # type: ignore[reportGeneralTypeIssues]
|
||||
raise ValueError(f"OpenAI API error: {e}")
|
||||
|
||||
return transcript.text # type: ignore[reportUnknownVariableType]
|
14
src/assistant/lib/stt/services.py
Normal file
14
src/assistant/lib/stt/services.py
Normal file
|
@ -0,0 +1,14 @@
|
|||
import typing
|
||||
|
||||
|
||||
class STTProtocol(typing.Protocol):
|
||||
async def speech_to_text(self, audio: bytes) -> str:
|
||||
...
|
||||
|
||||
|
||||
class SpeechService:
|
||||
def __init__(self, repository: STTProtocol):
|
||||
self.repository = repository
|
||||
|
||||
async def recognize(self, audio: bytes) -> str:
|
||||
return await self.repository.speech_to_text(audio)
|
0
src/assistant/lib/tts/__init__.py
Normal file
0
src/assistant/lib/tts/__init__.py
Normal file
5
src/assistant/lib/tts/models/__init__.py
Normal file
5
src/assistant/lib/tts/models/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
from .protocols import *
|
||||
|
||||
__all__ = [
|
||||
"TTSRepositoryProtocol",
|
||||
]
|
14
src/assistant/lib/tts/models/protocols.py
Normal file
14
src/assistant/lib/tts/models/protocols.py
Normal file
|
@ -0,0 +1,14 @@
|
|||
import typing
|
||||
|
||||
import lib.models as models
|
||||
|
||||
|
||||
class TTSRepositoryProtocol(typing.Protocol):
|
||||
def get_audio_as_bytes(self, request: models.TTSCreateRequestModel) -> models.TTSCreateResponseModel:
|
||||
...
|
||||
|
||||
def get_voice_model_by_name(self, voice_model_name: str) -> models.BaseVoiceModel | None:
|
||||
...
|
||||
|
||||
def get_voice_models_by_fields(self, fields: models.TTSSearchVoiceRequestModel) -> models.LIST_VOICE_MODELS_TYPE:
|
||||
...
|
5
src/assistant/lib/tts/repositories/__init__.py
Normal file
5
src/assistant/lib/tts/repositories/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
from .base import *
|
||||
|
||||
__all__ = [
|
||||
"TTSBaseRepository",
|
||||
]
|
57
src/assistant/lib/tts/repositories/base.py
Normal file
57
src/assistant/lib/tts/repositories/base.py
Normal file
|
@ -0,0 +1,57 @@
|
|||
import abc
|
||||
|
||||
import lib.models as models
|
||||
|
||||
|
||||
class HttpClient: # Mocked class todo remove and use real http client from lib.clients.http_client
|
||||
...
|
||||
|
||||
|
||||
class TTSBaseRepository(abc.ABC):
|
||||
def __init__(self, client: HttpClient, is_models_from_api: bool = False):
|
||||
self.http_client = client
|
||||
self.is_models_from_api = is_models_from_api
|
||||
|
||||
@property
|
||||
@abc.abstractmethod
|
||||
def voice_models(self) -> models.LIST_VOICE_MODELS_TYPE:
|
||||
...
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_audio_as_bytes(self, request: models.TTSCreateRequestModel) -> models.TTSCreateResponseModel:
|
||||
raise NotImplementedError
|
||||
|
||||
def get_voice_model_by_name(self, voice_model_name: str) -> models.BaseVoiceModel | None:
|
||||
"""
|
||||
Search voice model by name
|
||||
:param voice_model_name: String name
|
||||
:return: Voice model that match the name
|
||||
"""
|
||||
for voice_model in self.voice_models.models:
|
||||
if voice_model.voice_name == voice_model_name:
|
||||
return voice_model
|
||||
|
||||
def get_list_voice_models_by_fields(
|
||||
self, fields: models.TTSSearchVoiceRequestModel
|
||||
) -> list[models.AVAILABLE_MODELS_TYPE]:
|
||||
"""
|
||||
Search voice model by fields
|
||||
:param fields: Any fields from TTSSearchVoiceRequestModel
|
||||
:return: All voice models that match the fields
|
||||
"""
|
||||
fields_dump = fields.model_dump(exclude_none=True)
|
||||
voice_models_response = []
|
||||
for voice_model in self.voice_models.models:
|
||||
for field, field_value in fields_dump.items():
|
||||
if field == "languages": # language is a list
|
||||
language_names: set[str] = {item.name for item in field_value}
|
||||
voice_model_language_names: set[str] = {item.name for item in voice_model.languages}
|
||||
if language_names.issubset(voice_model_language_names):
|
||||
continue
|
||||
break
|
||||
voice_model_dump = voice_model.model_dump()
|
||||
if voice_model_dump[field] != field_value.name:
|
||||
break
|
||||
else:
|
||||
voice_models_response.append(voice_model)
|
||||
return voice_models_response # type: ignore[reportUnknownVariableType]
|
35
src/assistant/lib/tts/services.py
Normal file
35
src/assistant/lib/tts/services.py
Normal file
|
@ -0,0 +1,35 @@
|
|||
import lib.app.settings as app_settings
|
||||
import lib.models as models
|
||||
import lib.tts.models as tts_models
|
||||
|
||||
|
||||
class TTSService:
|
||||
def __init__(
|
||||
self,
|
||||
settings: app_settings.Settings,
|
||||
repositories: dict[models.VoiceModelProvidersEnum, tts_models.TTSRepositoryProtocol],
|
||||
):
|
||||
self.settings = settings
|
||||
self.repositories = repositories
|
||||
|
||||
def get_audio_as_bytes(self, request: models.TTSCreateRequestModel) -> models.TTSCreateResponseModel:
|
||||
model = request.voice_model
|
||||
repository = self.repositories[model.provider]
|
||||
audio_response = repository.get_audio_as_bytes(request)
|
||||
return audio_response
|
||||
|
||||
def get_voice_model_by_name(self, voice_model_name: str) -> models.BaseVoiceModel | None:
|
||||
for repository in self.repositories.values():
|
||||
voice_model = repository.get_voice_model_by_name(voice_model_name)
|
||||
if voice_model:
|
||||
return voice_model
|
||||
|
||||
def get_list_voice_models_by_fields(
|
||||
self, fields: models.TTSSearchVoiceRequestModel
|
||||
) -> list[models.AVAILABLE_MODELS_TYPE]:
|
||||
response_models: list[models.AVAILABLE_MODELS_TYPE] = []
|
||||
for repository in self.repositories.values():
|
||||
voice_models = repository.get_voice_models_by_fields(fields)
|
||||
if voice_models.models:
|
||||
response_models.extend(voice_models.models)
|
||||
return response_models
|
1570
src/assistant/poetry.lock
generated
1570
src/assistant/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
|
@ -27,7 +27,8 @@ fastapi = "0.103.1"
|
|||
greenlet = "^2.0.2"
|
||||
httpx = "^0.25.0"
|
||||
multidict = "^6.0.4"
|
||||
orjson = "^3.9.7"
|
||||
openai = "^0.28.1"
|
||||
orjson = "3.9.7"
|
||||
psycopg2-binary = "^2.9.9"
|
||||
pydantic = {extras = ["email"], version = "^2.3.0"}
|
||||
pydantic-settings = "^2.0.3"
|
||||
|
@ -35,6 +36,7 @@ pytest = "^7.4.2"
|
|||
pytest-asyncio = "^0.21.1"
|
||||
python = "^3.11"
|
||||
python-jose = "^3.3.0"
|
||||
python-magic = "^0.4.27"
|
||||
sqlalchemy = "^2.0.20"
|
||||
uvicorn = "^0.23.2"
|
||||
wrapt = "^1.15.0"
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
import pydantic_settings
|
||||
|
||||
import lib.app.split_settings.utils as app_split_settings_utils
|
||||
import pydantic_settings
|
||||
|
||||
|
||||
class LoggingSettings(pydantic_settings.BaseSettings):
|
||||
model_config = pydantic_settings.SettingsConfigDict(
|
||||
env_file=app_split_settings_utils.ENV_PATH, env_file_encoding="utf-8", extra="ignore"
|
||||
env_file=app_split_settings_utils.ENV_PATH,
|
||||
env_file_encoding="utf-8",
|
||||
extra="ignore",
|
||||
)
|
||||
|
||||
log_format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
|
|
Loading…
Reference in New Issue
Block a user