diff --git a/src/assistant/.env.example b/src/assistant/.env.example index 224cc85..030bda3 100644 --- a/src/assistant/.env.example +++ b/src/assistant/.env.example @@ -30,3 +30,10 @@ VOICE_MAX_INPUT_SECONDS=30 OPENAI_API_KEY=sk-1234567890 OPENAI_STT_MODEL=whisper-1 + +TTS_YANDEX_API_KEY= +TTS_YANDEX_AUDIO_FORMAT=oggopus +TTS_YANDEX_SAMPLE_RATE_HERTZ=48000 + +TTS_ELEVEN_LABS_API_KEY= +TTS_ELEVEN_LABS_DEFAULT_VOICE_ID=EXAVITQu4vr4xnSDxMaL diff --git a/src/assistant/lib/app/settings.py b/src/assistant/lib/app/settings.py index 72198a6..e3bdb38 100644 --- a/src/assistant/lib/app/settings.py +++ b/src/assistant/lib/app/settings.py @@ -22,3 +22,9 @@ class Settings(pydantic_settings.BaseSettings): proxy: app_split_settings.ProxySettings = pydantic.Field(default_factory=lambda: app_split_settings.ProxySettings()) voice: app_split_settings.VoiceSettings = pydantic.Field(default_factory=lambda: app_split_settings.VoiceSettings()) + tts_yandex: app_split_settings.TTSYandexSettings = pydantic.Field( + default_factory=lambda: app_split_settings.TTSYandexSettings() + ) + tts_eleven_labs: app_split_settings.TTSElevenLabsSettings = pydantic.Field( + default_factory=lambda: app_split_settings.TTSElevenLabsSettings() + ) diff --git a/src/assistant/lib/app/split_settings/__init__.py b/src/assistant/lib/app/split_settings/__init__.py index 3aa53b4..b7b24df 100644 --- a/src/assistant/lib/app/split_settings/__init__.py +++ b/src/assistant/lib/app/split_settings/__init__.py @@ -5,6 +5,7 @@ from .openai import * from .postgres import * from .project import * from .proxy import * +from .tts import * from .voice import * __all__ = [ @@ -15,6 +16,8 @@ __all__ = [ "PostgresSettings", "ProjectSettings", "ProxySettings", + "TTSElevenLabsSettings", + "TTSYandexSettings", "VoiceSettings", "get_logging_config", ] diff --git a/src/assistant/lib/app/split_settings/tts/__init__.py b/src/assistant/lib/app/split_settings/tts/__init__.py new file mode 100644 index 0000000..28c8188 --- /dev/null +++ b/src/assistant/lib/app/split_settings/tts/__init__.py @@ -0,0 +1,7 @@ +from .eleven_labs import * +from .yandex import * + +__all__ = [ + "TTSElevenLabsSettings", + "TTSYandexSettings", +] diff --git a/src/assistant/lib/app/split_settings/tts/eleven_labs.py b/src/assistant/lib/app/split_settings/tts/eleven_labs.py new file mode 100644 index 0000000..496a66b --- /dev/null +++ b/src/assistant/lib/app/split_settings/tts/eleven_labs.py @@ -0,0 +1,24 @@ +import pydantic +import pydantic_settings + +import lib.app.split_settings.utils as app_split_settings_utils + + +class TTSElevenLabsSettings(pydantic_settings.BaseSettings): + model_config = pydantic_settings.SettingsConfigDict( + env_file=app_split_settings_utils.ENV_PATH, + env_prefix="TTS_ELEVEN_LABS_", + env_file_encoding="utf-8", + extra="ignore", + ) + + api_key: pydantic.SecretStr = pydantic.Field(default=...) + default_voice_id: str = "EXAVITQu4vr4xnSDxMaL" + + @property + def base_headers(self) -> dict[str, str]: + return { + "Accept": "audio/mpeg", + "Content-Type": "application/json", + "xi-api-key": self.api_key.get_secret_value(), + } diff --git a/src/assistant/lib/app/split_settings/tts/yandex.py b/src/assistant/lib/app/split_settings/tts/yandex.py new file mode 100644 index 0000000..38393b0 --- /dev/null +++ b/src/assistant/lib/app/split_settings/tts/yandex.py @@ -0,0 +1,26 @@ +import typing + +import pydantic +import pydantic_settings + +import lib.app.split_settings.utils as app_split_settings_utils + + +class TTSYandexSettings(pydantic_settings.BaseSettings): + model_config = pydantic_settings.SettingsConfigDict( + env_file=app_split_settings_utils.ENV_PATH, + env_prefix="TTS_YANDEX_", + env_file_encoding="utf-8", + extra="ignore", + ) + + audio_format: typing.Literal["oggopus", "mp3", "lpcm"] = "oggopus" + sample_rate_hertz: int = 48000 + api_key: pydantic.SecretStr = pydantic.Field(default=...) + + @property + def base_headers(self) -> dict[str, str]: + return { + "Authorization": f"Api-Key {self.api_key.get_secret_value()}", + "Content-Type": "application/x-www-form-urlencoded", + }