From d7528028ce3a3afd687045ed934a16e4e4160abf Mon Sep 17 00:00:00 2001 From: ksieuk Date: Fri, 13 Oct 2023 14:22:50 +0300 Subject: [PATCH 01/18] feat: [#47] add http_client --- src/assistant/lib/tts/repositories/base.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/assistant/lib/tts/repositories/base.py b/src/assistant/lib/tts/repositories/base.py index cabbfc5..14f3275 100644 --- a/src/assistant/lib/tts/repositories/base.py +++ b/src/assistant/lib/tts/repositories/base.py @@ -1,14 +1,11 @@ import abc +import lib.clients as clients import lib.models as models -class HttpClient: # Mocked class todo remove and use real http client from lib.clients.http_client - ... - - class TTSBaseRepository(abc.ABC): - def __init__(self, client: HttpClient, is_models_from_api: bool = False): + def __init__(self, client: clients.AsyncHttpClient, is_models_from_api: bool = False): self.http_client = client self.is_models_from_api = is_models_from_api From 881c611650271341f88c1dbf826e6ce8fc760f80 Mon Sep 17 00:00:00 2001 From: ksieuk Date: Fri, 13 Oct 2023 14:23:34 +0300 Subject: [PATCH 02/18] fix: [#47] voice_models must be declared --- src/assistant/lib/tts/repositories/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/assistant/lib/tts/repositories/base.py b/src/assistant/lib/tts/repositories/base.py index 14f3275..6d1ce15 100644 --- a/src/assistant/lib/tts/repositories/base.py +++ b/src/assistant/lib/tts/repositories/base.py @@ -12,7 +12,7 @@ class TTSBaseRepository(abc.ABC): @property @abc.abstractmethod def voice_models(self) -> models.LIST_VOICE_MODELS_TYPE: - ... + raise NotImplementedError @abc.abstractmethod def get_audio_as_bytes(self, request: models.TTSCreateRequestModel) -> models.TTSCreateResponseModel: From 5b50d78b13fd6bdd29b19e9313e15d93bcce5766 Mon Sep 17 00:00:00 2001 From: ksieuk Date: Fri, 13 Oct 2023 14:28:35 +0300 Subject: [PATCH 03/18] fix: [#47] add voice models --- src/assistant/lib/models/__init__.py | 4 ++++ src/assistant/lib/models/tts/__init__.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/assistant/lib/models/__init__.py b/src/assistant/lib/models/__init__.py index 70993c4..d134ff8 100644 --- a/src/assistant/lib/models/__init__.py +++ b/src/assistant/lib/models/__init__.py @@ -8,6 +8,8 @@ __all__ = [ "BaseLanguageCodesEnum", "BaseVoiceModel", "ElevenLabsLanguageCodesEnum", + "ElevenLabsListVoiceModelsModel", + "ElevenLabsVoiceModel", "IdCreatedUpdatedBaseMixin", "LANGUAGE_CODES_ENUM_TYPE", "LIST_VOICE_MODELS_TYPE", @@ -17,4 +19,6 @@ __all__ = [ "Token", "VoiceModelProvidersEnum", "YandexLanguageCodesEnum", + "YandexListVoiceModelsModel", + "YandexVoiceModel", ] diff --git a/src/assistant/lib/models/tts/__init__.py b/src/assistant/lib/models/tts/__init__.py index 2eabaf4..85e38ee 100644 --- a/src/assistant/lib/models/tts/__init__.py +++ b/src/assistant/lib/models/tts/__init__.py @@ -6,6 +6,8 @@ __all__ = [ "BaseLanguageCodesEnum", "BaseVoiceModel", "ElevenLabsLanguageCodesEnum", + "ElevenLabsListVoiceModelsModel", + "ElevenLabsVoiceModel", "LANGUAGE_CODES_ENUM_TYPE", "LIST_VOICE_MODELS_TYPE", "TTSCreateRequestModel", @@ -13,4 +15,6 @@ __all__ = [ "TTSSearchVoiceRequestModel", "VoiceModelProvidersEnum", "YandexLanguageCodesEnum", + "YandexListVoiceModelsModel", + "YandexVoiceModel", ] From 8eef8b46a7c5bd4880f12d2a77752d791b3c657b Mon Sep 17 00:00:00 2001 From: ksieuk Date: Fri, 13 Oct 2023 14:56:36 +0300 Subject: [PATCH 04/18] fix: [#47] methods to async --- src/assistant/lib/tts/repositories/base.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/assistant/lib/tts/repositories/base.py b/src/assistant/lib/tts/repositories/base.py index 6d1ce15..5978061 100644 --- a/src/assistant/lib/tts/repositories/base.py +++ b/src/assistant/lib/tts/repositories/base.py @@ -11,24 +11,25 @@ class TTSBaseRepository(abc.ABC): @property @abc.abstractmethod - def voice_models(self) -> models.LIST_VOICE_MODELS_TYPE: + async def voice_models(self) -> models.LIST_VOICE_MODELS_TYPE: raise NotImplementedError @abc.abstractmethod - def get_audio_as_bytes(self, request: models.TTSCreateRequestModel) -> models.TTSCreateResponseModel: + async def get_audio_as_bytes(self, request: models.TTSCreateRequestModel) -> models.TTSCreateResponseModel: raise NotImplementedError - def get_voice_model_by_name(self, voice_model_name: str) -> models.BaseVoiceModel | None: + async def get_voice_model_by_name(self, voice_model_name: str) -> models.BaseVoiceModel | None: """ Search voice model by name :param voice_model_name: String name :return: Voice model that match the name """ - for voice_model in self.voice_models.models: + voice_models = await self.voice_models + for voice_model in voice_models.models: if voice_model.voice_name == voice_model_name: return voice_model - def get_list_voice_models_by_fields( + async def get_list_voice_models_by_fields( self, fields: models.TTSSearchVoiceRequestModel ) -> list[models.AVAILABLE_MODELS_TYPE]: """ @@ -38,7 +39,8 @@ class TTSBaseRepository(abc.ABC): """ fields_dump = fields.model_dump(exclude_none=True) voice_models_response = [] - for voice_model in self.voice_models.models: + voice_models = await self.voice_models + for voice_model in voice_models.models: for field, field_value in fields_dump.items(): if field == "languages": # language is a list language_names: set[str] = {item.name for item in field_value} From 4b281170556c6fe82278fd5e2e995b4235b7d491 Mon Sep 17 00:00:00 2001 From: ksieuk Date: Fri, 13 Oct 2023 14:56:58 +0300 Subject: [PATCH 05/18] fix: [#47] rewrite from_api --- src/assistant/lib/models/tts/voice/eleven_labs.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/assistant/lib/models/tts/voice/eleven_labs.py b/src/assistant/lib/models/tts/voice/eleven_labs.py index 80804b4..50b6fbb 100644 --- a/src/assistant/lib/models/tts/voice/eleven_labs.py +++ b/src/assistant/lib/models/tts/voice/eleven_labs.py @@ -71,5 +71,13 @@ class ElevenLabsListVoiceModelsModel(pydantic.BaseModel): @classmethod def from_api(cls, voice_models_from_api: list[dict[str, typing.Any]]) -> typing.Self: - voice_models = [ElevenLabsVoiceModel.model_validate(voice_model) for voice_model in voice_models_from_api] + voice_models = [] + for voice_model in voice_models_from_api: + voice_model["voice_id"] = voice_model.pop("model_id") + voice_model["voice_name"] = voice_model.pop("name") + voice_model["languages"] = [ + models_tts_languages.ElevenLabsLanguageCodesEnum(item.get("language_id")) + for item in voice_model.pop("languages") + ] + voice_models.append(ElevenLabsVoiceModel.model_validate(voice_model)) return ElevenLabsListVoiceModelsModel(models=voice_models) From eca38ebe25a6e81af18c6ca01b994411cba0235d Mon Sep 17 00:00:00 2001 From: ksieuk Date: Fri, 13 Oct 2023 15:01:30 +0300 Subject: [PATCH 06/18] feat: [#47] http_client proxy may not be passed --- src/assistant/lib/clients/http_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/assistant/lib/clients/http_client.py b/src/assistant/lib/clients/http_client.py index 532759d..e1bac11 100644 --- a/src/assistant/lib/clients/http_client.py +++ b/src/assistant/lib/clients/http_client.py @@ -8,7 +8,7 @@ import lib.app.split_settings as app_split_settings class AsyncHttpClient(httpx.AsyncClient): def __init__( self, - proxy_settings: app_split_settings.ProxySettings, + proxy_settings: app_split_settings.ProxySettings | None = None, base_url: str | None = None, **client_params: typing.Any, ) -> None: @@ -20,7 +20,7 @@ class AsyncHttpClient(httpx.AsyncClient): super().__init__(base_url=self.base_url, proxies=self.proxies, **client_params) # type: ignore[reportGeneralTypeIssues] def __get_proxies_from_settings(self) -> dict[str, str] | None: - if not self.proxy_settings.enable: + if not self.proxy_settings or not self.proxy_settings.enable: return None proxies = {"all://": self.proxy_settings.dsn} return proxies From c9a9abb0775692052569216a6887b74a86766e10 Mon Sep 17 00:00:00 2001 From: ksieuk Date: Fri, 13 Oct 2023 17:03:08 +0300 Subject: [PATCH 07/18] feat: [#47] add tts settings --- src/assistant/.env.example | 7 +++++ src/assistant/lib/app/settings.py | 6 +++++ .../lib/app/split_settings/__init__.py | 3 +++ .../lib/app/split_settings/tts/__init__.py | 7 +++++ .../lib/app/split_settings/tts/eleven_labs.py | 24 +++++++++++++++++ .../lib/app/split_settings/tts/yandex.py | 26 +++++++++++++++++++ 6 files changed, 73 insertions(+) create mode 100644 src/assistant/lib/app/split_settings/tts/__init__.py create mode 100644 src/assistant/lib/app/split_settings/tts/eleven_labs.py create mode 100644 src/assistant/lib/app/split_settings/tts/yandex.py diff --git a/src/assistant/.env.example b/src/assistant/.env.example index 224cc85..030bda3 100644 --- a/src/assistant/.env.example +++ b/src/assistant/.env.example @@ -30,3 +30,10 @@ VOICE_MAX_INPUT_SECONDS=30 OPENAI_API_KEY=sk-1234567890 OPENAI_STT_MODEL=whisper-1 + +TTS_YANDEX_API_KEY= +TTS_YANDEX_AUDIO_FORMAT=oggopus +TTS_YANDEX_SAMPLE_RATE_HERTZ=48000 + +TTS_ELEVEN_LABS_API_KEY= +TTS_ELEVEN_LABS_DEFAULT_VOICE_ID=EXAVITQu4vr4xnSDxMaL diff --git a/src/assistant/lib/app/settings.py b/src/assistant/lib/app/settings.py index 72198a6..e3bdb38 100644 --- a/src/assistant/lib/app/settings.py +++ b/src/assistant/lib/app/settings.py @@ -22,3 +22,9 @@ class Settings(pydantic_settings.BaseSettings): proxy: app_split_settings.ProxySettings = pydantic.Field(default_factory=lambda: app_split_settings.ProxySettings()) voice: app_split_settings.VoiceSettings = pydantic.Field(default_factory=lambda: app_split_settings.VoiceSettings()) + tts_yandex: app_split_settings.TTSYandexSettings = pydantic.Field( + default_factory=lambda: app_split_settings.TTSYandexSettings() + ) + tts_eleven_labs: app_split_settings.TTSElevenLabsSettings = pydantic.Field( + default_factory=lambda: app_split_settings.TTSElevenLabsSettings() + ) diff --git a/src/assistant/lib/app/split_settings/__init__.py b/src/assistant/lib/app/split_settings/__init__.py index 3aa53b4..b7b24df 100644 --- a/src/assistant/lib/app/split_settings/__init__.py +++ b/src/assistant/lib/app/split_settings/__init__.py @@ -5,6 +5,7 @@ from .openai import * from .postgres import * from .project import * from .proxy import * +from .tts import * from .voice import * __all__ = [ @@ -15,6 +16,8 @@ __all__ = [ "PostgresSettings", "ProjectSettings", "ProxySettings", + "TTSElevenLabsSettings", + "TTSYandexSettings", "VoiceSettings", "get_logging_config", ] diff --git a/src/assistant/lib/app/split_settings/tts/__init__.py b/src/assistant/lib/app/split_settings/tts/__init__.py new file mode 100644 index 0000000..28c8188 --- /dev/null +++ b/src/assistant/lib/app/split_settings/tts/__init__.py @@ -0,0 +1,7 @@ +from .eleven_labs import * +from .yandex import * + +__all__ = [ + "TTSElevenLabsSettings", + "TTSYandexSettings", +] diff --git a/src/assistant/lib/app/split_settings/tts/eleven_labs.py b/src/assistant/lib/app/split_settings/tts/eleven_labs.py new file mode 100644 index 0000000..496a66b --- /dev/null +++ b/src/assistant/lib/app/split_settings/tts/eleven_labs.py @@ -0,0 +1,24 @@ +import pydantic +import pydantic_settings + +import lib.app.split_settings.utils as app_split_settings_utils + + +class TTSElevenLabsSettings(pydantic_settings.BaseSettings): + model_config = pydantic_settings.SettingsConfigDict( + env_file=app_split_settings_utils.ENV_PATH, + env_prefix="TTS_ELEVEN_LABS_", + env_file_encoding="utf-8", + extra="ignore", + ) + + api_key: pydantic.SecretStr = pydantic.Field(default=...) + default_voice_id: str = "EXAVITQu4vr4xnSDxMaL" + + @property + def base_headers(self) -> dict[str, str]: + return { + "Accept": "audio/mpeg", + "Content-Type": "application/json", + "xi-api-key": self.api_key.get_secret_value(), + } diff --git a/src/assistant/lib/app/split_settings/tts/yandex.py b/src/assistant/lib/app/split_settings/tts/yandex.py new file mode 100644 index 0000000..38393b0 --- /dev/null +++ b/src/assistant/lib/app/split_settings/tts/yandex.py @@ -0,0 +1,26 @@ +import typing + +import pydantic +import pydantic_settings + +import lib.app.split_settings.utils as app_split_settings_utils + + +class TTSYandexSettings(pydantic_settings.BaseSettings): + model_config = pydantic_settings.SettingsConfigDict( + env_file=app_split_settings_utils.ENV_PATH, + env_prefix="TTS_YANDEX_", + env_file_encoding="utf-8", + extra="ignore", + ) + + audio_format: typing.Literal["oggopus", "mp3", "lpcm"] = "oggopus" + sample_rate_hertz: int = 48000 + api_key: pydantic.SecretStr = pydantic.Field(default=...) + + @property + def base_headers(self) -> dict[str, str]: + return { + "Authorization": f"Api-Key {self.api_key.get_secret_value()}", + "Content-Type": "application/x-www-form-urlencoded", + } From 0d5a2c8bae8f642370e82eff6c2a2301595e18b7 Mon Sep 17 00:00:00 2001 From: ksieuk Date: Fri, 13 Oct 2023 17:03:26 +0300 Subject: [PATCH 08/18] feat: [#47] add tts repositories --- src/assistant/lib/tts/__init__.py | 9 ++++ .../lib/tts/repositories/__init__.py | 4 ++ .../lib/tts/repositories/eleven_labs.py | 43 +++++++++++++++++ src/assistant/lib/tts/repositories/yandex.py | 48 +++++++++++++++++++ 4 files changed, 104 insertions(+) create mode 100644 src/assistant/lib/tts/repositories/eleven_labs.py create mode 100644 src/assistant/lib/tts/repositories/yandex.py diff --git a/src/assistant/lib/tts/__init__.py b/src/assistant/lib/tts/__init__.py index e69de29..49b8e2b 100644 --- a/src/assistant/lib/tts/__init__.py +++ b/src/assistant/lib/tts/__init__.py @@ -0,0 +1,9 @@ +from .repositories import * +from .services import * + +__all__ = [ + "TTSBaseRepository", + "TTSElevenLabsRepository", + "TTSService", + "TTSYandexRepository", +] diff --git a/src/assistant/lib/tts/repositories/__init__.py b/src/assistant/lib/tts/repositories/__init__.py index 45a1941..9d2c9e4 100644 --- a/src/assistant/lib/tts/repositories/__init__.py +++ b/src/assistant/lib/tts/repositories/__init__.py @@ -1,5 +1,9 @@ from .base import * +from .eleven_labs import * +from .yandex import * __all__ = [ "TTSBaseRepository", + "TTSElevenLabsRepository", + "TTSYandexRepository", ] diff --git a/src/assistant/lib/tts/repositories/eleven_labs.py b/src/assistant/lib/tts/repositories/eleven_labs.py new file mode 100644 index 0000000..eb1ef9a --- /dev/null +++ b/src/assistant/lib/tts/repositories/eleven_labs.py @@ -0,0 +1,43 @@ +import typing + +import lib.app.split_settings as app_split_settings +import lib.clients as clients +import lib.models as models +import lib.tts.repositories.base as tts_repositories_base + + +class TTSElevenLabsRepository(tts_repositories_base.TTSBaseRepository): + def __init__( + self, + tts_settings: app_split_settings.TTSElevenLabsSettings, + client: clients.AsyncHttpClient, + is_models_from_api: bool = False, + ): + self.tts_settings = tts_settings + super().__init__(client, is_models_from_api) + + @property + async def voice_models(self) -> models.ElevenLabsListVoiceModelsModel: + if self.is_models_from_api: + return models.ElevenLabsListVoiceModelsModel.from_api(await self.get_all_models_dict_from_api()) + return models.ElevenLabsListVoiceModelsModel() + + async def get_all_models_dict_from_api(self) -> list[dict[str, typing.Any]]: + response = await self.http_client.get("/models") + print(response) + return response.json() + + async def get_audio_as_bytes(self, request: models.TTSCreateRequestModel) -> models.TTSCreateResponseModel: + if not isinstance(request.voice_model, models.ElevenLabsVoiceModel): + raise ValueError("ElevenLabs TTS support only ElevenLabsVoiceModel") + response = await self.http_client.post( + f"/text-to-speech/{self.tts_settings.default_voice_id}", + json={"text": request.text, "model_id": request.voice_model.voice_id}, + ) + return models.TTSCreateResponseModel(audio_content=response.content) + + async def get_voice_models_by_fields( + self, fields: models.TTSSearchVoiceRequestModel + ) -> models.ElevenLabsListVoiceModelsModel: + list_voice_models = await self.get_list_voice_models_by_fields(fields) + return models.ElevenLabsListVoiceModelsModel(models=list_voice_models) # type: ignore diff --git a/src/assistant/lib/tts/repositories/yandex.py b/src/assistant/lib/tts/repositories/yandex.py new file mode 100644 index 0000000..9abf67d --- /dev/null +++ b/src/assistant/lib/tts/repositories/yandex.py @@ -0,0 +1,48 @@ +import logging + +import lib.app.split_settings as app_split_settings +import lib.clients as clients +import lib.models as models +import lib.tts.repositories.base as tts_repositories_base + +logger = logging.getLogger(__name__) + + +class TTSYandexRepository(tts_repositories_base.TTSBaseRepository): + def __init__( + self, + tts_settings: app_split_settings.TTSYandexSettings, + client: clients.AsyncHttpClient, + is_models_from_api: bool = False, + ): + self.tts_settings = tts_settings + if is_models_from_api: + logger.warning("Yandex TTS doesn't support getting models from API") + super().__init__(client, is_models_from_api=False) + + @property + async def voice_models(self) -> models.YandexListVoiceModelsModel: + return models.YandexListVoiceModelsModel() + + async def get_audio_as_bytes(self, request: models.TTSCreateRequestModel) -> models.TTSCreateResponseModel: + if not isinstance(request.voice_model, models.YandexVoiceModel): + raise ValueError("Yandex TTS support only YandexVoiceModel") + data = { + "text": request.text, + "lang": request.voice_model.languages[0].value, + "voice": request.voice_model.voice_id, + "emotion": request.voice_model.role, + "format": self.tts_settings.audio_format, + "sampleRateHertz": self.tts_settings.sample_rate_hertz, + } + response = await self.http_client.post( + "/tts:synthesize", + data=data, + ) + return models.TTSCreateResponseModel(audio_content=response.content) + + async def get_voice_models_by_fields( + self, fields: models.TTSSearchVoiceRequestModel + ) -> models.YandexListVoiceModelsModel: + list_voice_models = await self.get_list_voice_models_by_fields(fields) + return models.YandexListVoiceModelsModel(models=list_voice_models) # type: ignore From cc550ce1e7ef57d43819fe0e19dea6226a5e83e1 Mon Sep 17 00:00:00 2001 From: ksieuk Date: Fri, 13 Oct 2023 17:03:47 +0300 Subject: [PATCH 09/18] fix: [#47] methods to async --- src/assistant/lib/tts/models/protocols.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/assistant/lib/tts/models/protocols.py b/src/assistant/lib/tts/models/protocols.py index 5774c08..1912cce 100644 --- a/src/assistant/lib/tts/models/protocols.py +++ b/src/assistant/lib/tts/models/protocols.py @@ -4,11 +4,13 @@ import lib.models as models class TTSRepositoryProtocol(typing.Protocol): - def get_audio_as_bytes(self, request: models.TTSCreateRequestModel) -> models.TTSCreateResponseModel: + async def get_audio_as_bytes(self, request: models.TTSCreateRequestModel) -> models.TTSCreateResponseModel: ... - def get_voice_model_by_name(self, voice_model_name: str) -> models.BaseVoiceModel | None: + async def get_voice_model_by_name(self, voice_model_name: str) -> models.BaseVoiceModel | None: ... - def get_voice_models_by_fields(self, fields: models.TTSSearchVoiceRequestModel) -> models.LIST_VOICE_MODELS_TYPE: + async def get_voice_models_by_fields( + self, fields: models.TTSSearchVoiceRequestModel + ) -> models.LIST_VOICE_MODELS_TYPE: ... From 6ed4928ced6e9f5a041df48eaa42443b09e09ce9 Mon Sep 17 00:00:00 2001 From: ksieuk Date: Fri, 13 Oct 2023 17:04:12 +0300 Subject: [PATCH 10/18] fix: [#47] check not is none --- src/assistant/lib/models/tts/voice/base.py | 2 ++ src/assistant/lib/models/tts/voice/yandex.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/assistant/lib/models/tts/voice/base.py b/src/assistant/lib/models/tts/voice/base.py index 45c3960..1682b02 100644 --- a/src/assistant/lib/models/tts/voice/base.py +++ b/src/assistant/lib/models/tts/voice/base.py @@ -20,6 +20,8 @@ class BaseVoiceModel(pydantic.BaseModel): @pydantic.model_validator(mode="before") @classmethod def check_voice_name_exists(cls, data: typing.Any) -> typing.Any: + if not data: + return data voice_id = data.get("voice_id") voice_name = data.get("voice_name") if not voice_name and voice_id: diff --git a/src/assistant/lib/models/tts/voice/yandex.py b/src/assistant/lib/models/tts/voice/yandex.py index 476a1f0..90f8c12 100644 --- a/src/assistant/lib/models/tts/voice/yandex.py +++ b/src/assistant/lib/models/tts/voice/yandex.py @@ -16,6 +16,8 @@ class YandexVoiceModel(models_tts_base.BaseVoiceModel): @pydantic.model_validator(mode="before") @classmethod def check_voice_name_exists(cls, data: typing.Any) -> typing.Any: + if not data: + return data voice_id = data.get("voice_id") voice_name = data.get("voice_name") role = data.get("role") From 1170532c9349235f46e7fe6870e42db9dc1927d6 Mon Sep 17 00:00:00 2001 From: ksieuk Date: Fri, 13 Oct 2023 17:04:23 +0300 Subject: [PATCH 11/18] fix: [#47] to async --- src/assistant/lib/tts/services.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/assistant/lib/tts/services.py b/src/assistant/lib/tts/services.py index 1d6a337..13fd340 100644 --- a/src/assistant/lib/tts/services.py +++ b/src/assistant/lib/tts/services.py @@ -1,35 +1,33 @@ -import lib.app.settings as app_settings -import lib.models as models +import lib.models as _models import lib.tts.models as tts_models class TTSService: def __init__( self, - settings: app_settings.Settings, - repositories: dict[models.VoiceModelProvidersEnum, tts_models.TTSRepositoryProtocol], + repositories: dict[_models.VoiceModelProvidersEnum, tts_models.TTSRepositoryProtocol], ): - self.settings = settings self.repositories = repositories - def get_audio_as_bytes(self, request: models.TTSCreateRequestModel) -> models.TTSCreateResponseModel: + async def get_audio_as_bytes(self, request: _models.TTSCreateRequestModel) -> _models.TTSCreateResponseModel: model = request.voice_model repository = self.repositories[model.provider] - audio_response = repository.get_audio_as_bytes(request) + audio_response = await repository.get_audio_as_bytes(request) return audio_response - def get_voice_model_by_name(self, voice_model_name: str) -> models.BaseVoiceModel | None: + async def get_voice_model_by_name(self, voice_model_name: str) -> _models.BaseVoiceModel | None: for repository in self.repositories.values(): - voice_model = repository.get_voice_model_by_name(voice_model_name) + voice_model = await repository.get_voice_model_by_name(voice_model_name) if voice_model: return voice_model + raise ValueError("Voice model not found") - def get_list_voice_models_by_fields( - self, fields: models.TTSSearchVoiceRequestModel - ) -> list[models.AVAILABLE_MODELS_TYPE]: - response_models: list[models.AVAILABLE_MODELS_TYPE] = [] + async def get_list_voice_models_by_fields( + self, fields: _models.TTSSearchVoiceRequestModel + ) -> list[_models.AVAILABLE_MODELS_TYPE]: + response_models: list[_models.AVAILABLE_MODELS_TYPE] = [] for repository in self.repositories.values(): - voice_models = repository.get_voice_models_by_fields(fields) + voice_models = await repository.get_voice_models_by_fields(fields) if voice_models.models: response_models.extend(voice_models.models) return response_models From cd966241358d69d7eeea9254b25b8d2effc737eb Mon Sep 17 00:00:00 2001 From: ksieuk Date: Fri, 13 Oct 2023 17:04:36 +0300 Subject: [PATCH 12/18] feat: [#47] add tts --- src/assistant/lib/app/app.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/assistant/lib/app/app.py b/src/assistant/lib/app/app.py index c6183d7..5628aeb 100644 --- a/src/assistant/lib/app/app.py +++ b/src/assistant/lib/app/app.py @@ -11,7 +11,9 @@ import lib.app.errors as app_errors import lib.app.settings as app_settings import lib.app.split_settings as app_split_settings import lib.clients as clients +import lib.models as models import lib.stt as stt +import lib.tts as tts logger = logging.getLogger(__name__) @@ -59,21 +61,43 @@ class Application: logger.info("Initializing clients") http_yandex_tts_client = clients.AsyncHttpClient( - base_url="yandex", # todo add yandex api url from settings proxy_settings=settings.proxy, + base_url="https://tts.api.cloud.yandex.net/speech/v1/", + headers=settings.tts_yandex.base_headers, ) + http_eleven_labs_tts_client = clients.AsyncHttpClient( + base_url="https://api.elevenlabs.io/v1/", + headers=settings.tts_eleven_labs.base_headers, + ) + disposable_resources.append( DisposableResource( name="http_client yandex", dispose_callback=http_yandex_tts_client.close(), ) ) + disposable_resources.append( + DisposableResource( + name="http_client eleven labs", + dispose_callback=http_eleven_labs_tts_client.close(), + ) + ) # Repositories logger.info("Initializing repositories") stt_repository: stt.STTProtocol = stt.OpenaiSpeechRepository(settings=settings) + tts_yandex_repository = tts.TTSYandexRepository( + tts_settings=app_split_settings.TTSYandexSettings(), + client=http_yandex_tts_client, + ) + tts_eleven_labs_repository = tts.TTSElevenLabsRepository( + tts_settings=app_split_settings.TTSElevenLabsSettings(), + client=http_eleven_labs_tts_client, + is_models_from_api=True, + ) + # Caches logger.info("Initializing caches") @@ -83,6 +107,12 @@ class Application: logger.info("Initializing services") stt_service: stt.SpeechService = stt.SpeechService(repository=stt_repository) # type: ignore + tts_service: tts.TTSService = tts.TTSService( # type: ignore + repositories={ + models.VoiceModelProvidersEnum.YANDEX: tts_yandex_repository, + models.VoiceModelProvidersEnum.ELEVEN_LABS: tts_eleven_labs_repository, + }, + ) # Handlers logger.info("Initializing handlers") From 7285af0f9759b0f042f959caff407c9d4ddb3974 Mon Sep 17 00:00:00 2001 From: ksieuk Date: Fri, 13 Oct 2023 19:54:32 +0300 Subject: [PATCH 13/18] fix: [#47] remove debug code --- src/assistant/lib/tts/repositories/eleven_labs.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/assistant/lib/tts/repositories/eleven_labs.py b/src/assistant/lib/tts/repositories/eleven_labs.py index eb1ef9a..96973d9 100644 --- a/src/assistant/lib/tts/repositories/eleven_labs.py +++ b/src/assistant/lib/tts/repositories/eleven_labs.py @@ -24,7 +24,6 @@ class TTSElevenLabsRepository(tts_repositories_base.TTSBaseRepository): async def get_all_models_dict_from_api(self) -> list[dict[str, typing.Any]]: response = await self.http_client.get("/models") - print(response) return response.json() async def get_audio_as_bytes(self, request: models.TTSCreateRequestModel) -> models.TTSCreateResponseModel: From 7ad8972d01a5ace02968c0d8536e146b776de33b Mon Sep 17 00:00:00 2001 From: ksieuk Date: Fri, 13 Oct 2023 19:59:04 +0300 Subject: [PATCH 14/18] feat: [#47] base_url from settings --- src/assistant/lib/app/app.py | 4 ++-- src/assistant/lib/app/split_settings/tts/eleven_labs.py | 1 + src/assistant/lib/app/split_settings/tts/yandex.py | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/assistant/lib/app/app.py b/src/assistant/lib/app/app.py index 5628aeb..d77f0c2 100644 --- a/src/assistant/lib/app/app.py +++ b/src/assistant/lib/app/app.py @@ -62,11 +62,11 @@ class Application: http_yandex_tts_client = clients.AsyncHttpClient( proxy_settings=settings.proxy, - base_url="https://tts.api.cloud.yandex.net/speech/v1/", + base_url=settings.tts_yandex.base_url, headers=settings.tts_yandex.base_headers, ) http_eleven_labs_tts_client = clients.AsyncHttpClient( - base_url="https://api.elevenlabs.io/v1/", + base_url=settings.tts_eleven_labs.base_url, headers=settings.tts_eleven_labs.base_headers, ) diff --git a/src/assistant/lib/app/split_settings/tts/eleven_labs.py b/src/assistant/lib/app/split_settings/tts/eleven_labs.py index 496a66b..d5b099a 100644 --- a/src/assistant/lib/app/split_settings/tts/eleven_labs.py +++ b/src/assistant/lib/app/split_settings/tts/eleven_labs.py @@ -14,6 +14,7 @@ class TTSElevenLabsSettings(pydantic_settings.BaseSettings): api_key: pydantic.SecretStr = pydantic.Field(default=...) default_voice_id: str = "EXAVITQu4vr4xnSDxMaL" + base_url: str = "https://api.elevenlabs.io/v1/" @property def base_headers(self) -> dict[str, str]: diff --git a/src/assistant/lib/app/split_settings/tts/yandex.py b/src/assistant/lib/app/split_settings/tts/yandex.py index 38393b0..e0dbc44 100644 --- a/src/assistant/lib/app/split_settings/tts/yandex.py +++ b/src/assistant/lib/app/split_settings/tts/yandex.py @@ -17,6 +17,7 @@ class TTSYandexSettings(pydantic_settings.BaseSettings): audio_format: typing.Literal["oggopus", "mp3", "lpcm"] = "oggopus" sample_rate_hertz: int = 48000 api_key: pydantic.SecretStr = pydantic.Field(default=...) + base_url: str = "https://tts.api.cloud.yandex.net/speech/v1/" @property def base_headers(self) -> dict[str, str]: From bf4ee5a5db3b3155a3f6ff95ff3645c933c62fc2 Mon Sep 17 00:00:00 2001 From: ksieuk Date: Fri, 13 Oct 2023 20:00:16 +0300 Subject: [PATCH 15/18] feat: [#47] add tts api keys from github.secrets --- .github/workflows/check-pr.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/check-pr.yaml b/.github/workflows/check-pr.yaml index cdb0be7..f74ca27 100644 --- a/.github/workflows/check-pr.yaml +++ b/.github/workflows/check-pr.yaml @@ -111,6 +111,8 @@ jobs: API_HOST: ${{ vars.API_HOST }} API_PORT: ${{ vars.API_PORT }} APP_RELOAD: ${{ vars.APP_RELOAD }} + TTS_YANDEX_API_KEY: ${{ secrets.TTS_YANDEX_API_KEY }} + TTS_ELEVEN_LABS_API_KEY: ${{ secrets.TTS_ELEVEN_LABS_API_KEY }} working-directory: src/${{ matrix.package }} run: | make ci-test From 77f246e7771406bd6519805d72cb6ba8dc451127 Mon Sep 17 00:00:00 2001 From: ksieuk Date: Fri, 13 Oct 2023 20:06:06 +0300 Subject: [PATCH 16/18] feat: [#47] add tts settings from github.variables --- .github/workflows/check-pr.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/check-pr.yaml b/.github/workflows/check-pr.yaml index f74ca27..6ffc6aa 100644 --- a/.github/workflows/check-pr.yaml +++ b/.github/workflows/check-pr.yaml @@ -113,6 +113,9 @@ jobs: APP_RELOAD: ${{ vars.APP_RELOAD }} TTS_YANDEX_API_KEY: ${{ secrets.TTS_YANDEX_API_KEY }} TTS_ELEVEN_LABS_API_KEY: ${{ secrets.TTS_ELEVEN_LABS_API_KEY }} + TTS_YANDEX_AUDIO_FORMAT: ${{ vars.TTS_YANDEX_AUDIO_FORMAT }} + TTS_YANDEX_SAMPLE_RATE_HERTZ: ${{ vars.TTS_YANDEX_SAMPLE_RATE_HERTZ }} + TTS_ELEVEN_LABS_DEFAULT_VOICE_ID: ${{ vars.TTS_ELEVEN_LABS_DEFAULT_VOICE_ID }} working-directory: src/${{ matrix.package }} run: | make ci-test From 9544edfc6e247d0cc8d4e2e3e91ba26c3ed228cd Mon Sep 17 00:00:00 2001 From: ksieuk Date: Sat, 14 Oct 2023 19:12:10 +0300 Subject: [PATCH 17/18] feat: [#47] add default model --- src/assistant/lib/models/tts/models.py | 35 +++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/src/assistant/lib/models/tts/models.py b/src/assistant/lib/models/tts/models.py index 712dee3..cfc3628 100644 --- a/src/assistant/lib/models/tts/models.py +++ b/src/assistant/lib/models/tts/models.py @@ -5,12 +5,45 @@ import lib.models.tts.voice.languages as models_tts_languages AVAILABLE_MODELS_TYPE = models_tts_voice.YandexVoiceModel | models_tts_voice.ElevenLabsVoiceModel LIST_VOICE_MODELS_TYPE = models_tts_voice.YandexListVoiceModelsModel | models_tts_voice.ElevenLabsListVoiceModelsModel +DEFAULT_MODEL = models_tts_voice.ElevenLabsVoiceModel( + voice_id="eleven_multilingual_v2", + languages=[ + models_tts_languages.ElevenLabsLanguageCodesEnum.ENGLISH, + models_tts_languages.ElevenLabsLanguageCodesEnum.JAPANESE, + models_tts_languages.ElevenLabsLanguageCodesEnum.CHINESE, + models_tts_languages.ElevenLabsLanguageCodesEnum.GERMAN, + models_tts_languages.ElevenLabsLanguageCodesEnum.HINDI, + models_tts_languages.ElevenLabsLanguageCodesEnum.FRENCH, + models_tts_languages.ElevenLabsLanguageCodesEnum.KOREAN, + models_tts_languages.ElevenLabsLanguageCodesEnum.PORTUGUESE, + models_tts_languages.ElevenLabsLanguageCodesEnum.ITALIAN, + models_tts_languages.ElevenLabsLanguageCodesEnum.SPANISH, + models_tts_languages.ElevenLabsLanguageCodesEnum.INDONESIAN, + models_tts_languages.ElevenLabsLanguageCodesEnum.DUTCH, + models_tts_languages.ElevenLabsLanguageCodesEnum.TURKISH, + models_tts_languages.ElevenLabsLanguageCodesEnum.FILIPINO, + models_tts_languages.ElevenLabsLanguageCodesEnum.POLISH, + models_tts_languages.ElevenLabsLanguageCodesEnum.SWEDISH, + models_tts_languages.ElevenLabsLanguageCodesEnum.BULGARIAN, + models_tts_languages.ElevenLabsLanguageCodesEnum.ROMANIAN, + models_tts_languages.ElevenLabsLanguageCodesEnum.ARABIC, + models_tts_languages.ElevenLabsLanguageCodesEnum.CZECH, + models_tts_languages.ElevenLabsLanguageCodesEnum.GREEK, + models_tts_languages.ElevenLabsLanguageCodesEnum.FINNISH, + models_tts_languages.ElevenLabsLanguageCodesEnum.CROATIAN, + models_tts_languages.ElevenLabsLanguageCodesEnum.MALAY, + models_tts_languages.ElevenLabsLanguageCodesEnum.SLOVAK, + models_tts_languages.ElevenLabsLanguageCodesEnum.DANISH, + models_tts_languages.ElevenLabsLanguageCodesEnum.TAMIL, + models_tts_languages.ElevenLabsLanguageCodesEnum.UKRAINIAN, + ], +) class TTSCreateRequestModel(pydantic.BaseModel): model_config = pydantic.ConfigDict(use_enum_values=True) - voice_model: AVAILABLE_MODELS_TYPE + voice_model: AVAILABLE_MODELS_TYPE = DEFAULT_MODEL text: str From 2d47520386e78c9b9a96be834bf91407eaaf66c2 Mon Sep 17 00:00:00 2001 From: ksieuk Date: Sat, 14 Oct 2023 22:02:45 +0300 Subject: [PATCH 18/18] feat: [#47] add timeout for tts clients --- src/assistant/.env.example | 2 ++ src/assistant/lib/app/app.py | 2 ++ src/assistant/lib/app/split_settings/tts/eleven_labs.py | 1 + src/assistant/lib/app/split_settings/tts/yandex.py | 1 + 4 files changed, 6 insertions(+) diff --git a/src/assistant/.env.example b/src/assistant/.env.example index 030bda3..469f268 100644 --- a/src/assistant/.env.example +++ b/src/assistant/.env.example @@ -34,6 +34,8 @@ OPENAI_STT_MODEL=whisper-1 TTS_YANDEX_API_KEY= TTS_YANDEX_AUDIO_FORMAT=oggopus TTS_YANDEX_SAMPLE_RATE_HERTZ=48000 +TTS_YANDEX_TIMEOUT_SECONDS=30 TTS_ELEVEN_LABS_API_KEY= TTS_ELEVEN_LABS_DEFAULT_VOICE_ID=EXAVITQu4vr4xnSDxMaL +TTS_ELEVEN_LABS_TIMEOUT_SECONDS=30 diff --git a/src/assistant/lib/app/app.py b/src/assistant/lib/app/app.py index d77f0c2..78258a3 100644 --- a/src/assistant/lib/app/app.py +++ b/src/assistant/lib/app/app.py @@ -64,10 +64,12 @@ class Application: proxy_settings=settings.proxy, base_url=settings.tts_yandex.base_url, headers=settings.tts_yandex.base_headers, + timeout=settings.tts_yandex.timeout_seconds, ) http_eleven_labs_tts_client = clients.AsyncHttpClient( base_url=settings.tts_eleven_labs.base_url, headers=settings.tts_eleven_labs.base_headers, + timeout=settings.tts_eleven_labs.timeout_seconds, ) disposable_resources.append( diff --git a/src/assistant/lib/app/split_settings/tts/eleven_labs.py b/src/assistant/lib/app/split_settings/tts/eleven_labs.py index d5b099a..f664176 100644 --- a/src/assistant/lib/app/split_settings/tts/eleven_labs.py +++ b/src/assistant/lib/app/split_settings/tts/eleven_labs.py @@ -15,6 +15,7 @@ class TTSElevenLabsSettings(pydantic_settings.BaseSettings): api_key: pydantic.SecretStr = pydantic.Field(default=...) default_voice_id: str = "EXAVITQu4vr4xnSDxMaL" base_url: str = "https://api.elevenlabs.io/v1/" + timeout_seconds: int = 30 @property def base_headers(self) -> dict[str, str]: diff --git a/src/assistant/lib/app/split_settings/tts/yandex.py b/src/assistant/lib/app/split_settings/tts/yandex.py index e0dbc44..76d0829 100644 --- a/src/assistant/lib/app/split_settings/tts/yandex.py +++ b/src/assistant/lib/app/split_settings/tts/yandex.py @@ -18,6 +18,7 @@ class TTSYandexSettings(pydantic_settings.BaseSettings): sample_rate_hertz: int = 48000 api_key: pydantic.SecretStr = pydantic.Field(default=...) base_url: str = "https://tts.api.cloud.yandex.net/speech/v1/" + timeout_seconds: int = 30 @property def base_headers(self) -> dict[str, str]: