From 48a6f4e1675eb6e9639c04510359be42533801b0 Mon Sep 17 00:00:00 2001 From: ksieuk Date: Wed, 11 Oct 2023 18:41:10 +0300 Subject: [PATCH] fix: [#28] review fixes --- src/assistant/lib/models/__init__.py | 4 +- src/assistant/lib/models/tts/__init__.py | 7 +- .../lib/models/tts/{base.py => models.py} | 26 ++--- .../lib/models/tts/voice/__init__.py | 17 ++++ src/assistant/lib/models/tts/voice/base.py | 27 ++++++ .../lib/models/tts/voice/eleven_labs.py | 75 ++++++++++++++ .../lib/models/tts/{ => voice}/languages.py | 0 src/assistant/lib/models/tts/voice/yandex.py | 97 +++++++++++++++++++ src/assistant/lib/tts/models/__init__.py | 14 +-- src/assistant/lib/tts/models/protocols.py | 14 +++ src/assistant/lib/tts/models/repository.py | 17 ---- src/assistant/lib/tts/models/service.py | 9 -- src/assistant/lib/tts/models/utils.py | 4 - .../lib/tts/models/voice/__init__.py | 9 -- .../lib/tts/models/voice/eleven_labs.py | 73 -------------- src/assistant/lib/tts/models/voice/yandex.py | 60 ------------ src/assistant/lib/tts/repositories/base.py | 8 +- src/assistant/lib/tts/services.py | 24 ++--- 18 files changed, 258 insertions(+), 227 deletions(-) rename src/assistant/lib/models/tts/{base.py => models.py} (53%) create mode 100644 src/assistant/lib/models/tts/voice/__init__.py create mode 100644 src/assistant/lib/models/tts/voice/base.py create mode 100644 src/assistant/lib/models/tts/voice/eleven_labs.py rename src/assistant/lib/models/tts/{ => voice}/languages.py (100%) create mode 100644 src/assistant/lib/models/tts/voice/yandex.py create mode 100644 src/assistant/lib/tts/models/protocols.py delete mode 100644 src/assistant/lib/tts/models/repository.py delete mode 100644 src/assistant/lib/tts/models/service.py delete mode 100644 src/assistant/lib/tts/models/utils.py delete mode 100644 src/assistant/lib/tts/models/voice/__init__.py delete mode 100644 src/assistant/lib/tts/models/voice/eleven_labs.py delete mode 100644 src/assistant/lib/tts/models/voice/yandex.py diff --git a/src/assistant/lib/models/__init__.py b/src/assistant/lib/models/__init__.py index 00fea5c..70993c4 100644 --- a/src/assistant/lib/models/__init__.py +++ b/src/assistant/lib/models/__init__.py @@ -3,16 +3,18 @@ from .token import Token from .tts import * __all__ = [ + "AVAILABLE_MODELS_TYPE", "Base", "BaseLanguageCodesEnum", "BaseVoiceModel", - "BaseVoiceModel", "ElevenLabsLanguageCodesEnum", "IdCreatedUpdatedBaseMixin", "LANGUAGE_CODES_ENUM_TYPE", + "LIST_VOICE_MODELS_TYPE", "TTSCreateRequestModel", "TTSCreateResponseModel", "TTSSearchVoiceRequestModel", "Token", + "VoiceModelProvidersEnum", "YandexLanguageCodesEnum", ] diff --git a/src/assistant/lib/models/tts/__init__.py b/src/assistant/lib/models/tts/__init__.py index e26941c..2eabaf4 100644 --- a/src/assistant/lib/models/tts/__init__.py +++ b/src/assistant/lib/models/tts/__init__.py @@ -1,13 +1,16 @@ -from .base import * -from .languages import * +from .models import * +from .voice import * __all__ = [ + "AVAILABLE_MODELS_TYPE", "BaseLanguageCodesEnum", "BaseVoiceModel", "ElevenLabsLanguageCodesEnum", "LANGUAGE_CODES_ENUM_TYPE", + "LIST_VOICE_MODELS_TYPE", "TTSCreateRequestModel", "TTSCreateResponseModel", "TTSSearchVoiceRequestModel", + "VoiceModelProvidersEnum", "YandexLanguageCodesEnum", ] diff --git a/src/assistant/lib/models/tts/base.py b/src/assistant/lib/models/tts/models.py similarity index 53% rename from src/assistant/lib/models/tts/base.py rename to src/assistant/lib/models/tts/models.py index 7be9b6a..712dee3 100644 --- a/src/assistant/lib/models/tts/base.py +++ b/src/assistant/lib/models/tts/models.py @@ -1,14 +1,16 @@ -import typing - import pydantic -import lib.models.tts.languages as models_tts_languages +import lib.models.tts.voice as models_tts_voice +import lib.models.tts.voice.languages as models_tts_languages + +AVAILABLE_MODELS_TYPE = models_tts_voice.YandexVoiceModel | models_tts_voice.ElevenLabsVoiceModel +LIST_VOICE_MODELS_TYPE = models_tts_voice.YandexListVoiceModelsModel | models_tts_voice.ElevenLabsListVoiceModelsModel class TTSCreateRequestModel(pydantic.BaseModel): model_config = pydantic.ConfigDict(use_enum_values=True) - voice_model_name: str + voice_model: AVAILABLE_MODELS_TYPE text: str @@ -16,22 +18,6 @@ class TTSCreateResponseModel(pydantic.BaseModel): audio_content: bytes -class BaseVoiceModel(pydantic.BaseModel): - voice_id: str - voice_name: str | None = None - languages: list[models_tts_languages.LANGUAGE_CODES_ENUM_TYPE] - company_name: str - - @pydantic.model_validator(mode="before") - @classmethod - def check_voice_name_exists(cls, data: typing.Any) -> typing.Any: - voice_id = data.get("voice_id") - voice_name = data.get("voice_name") - if not voice_name and voice_id: - data["voice_name"] = voice_id - return data - - class TTSSearchVoiceRequestModel(pydantic.BaseModel): voice_id: str | None = None voice_name: str | None = None diff --git a/src/assistant/lib/models/tts/voice/__init__.py b/src/assistant/lib/models/tts/voice/__init__.py new file mode 100644 index 0000000..f82a8ce --- /dev/null +++ b/src/assistant/lib/models/tts/voice/__init__.py @@ -0,0 +1,17 @@ +from .base import * +from .eleven_labs import * +from .languages import * +from .yandex import * + +__all__ = [ + "BaseLanguageCodesEnum", + "BaseVoiceModel", + "ElevenLabsLanguageCodesEnum", + "ElevenLabsListVoiceModelsModel", + "ElevenLabsVoiceModel", + "LANGUAGE_CODES_ENUM_TYPE", + "VoiceModelProvidersEnum", + "YandexLanguageCodesEnum", + "YandexListVoiceModelsModel", + "YandexVoiceModel", +] diff --git a/src/assistant/lib/models/tts/voice/base.py b/src/assistant/lib/models/tts/voice/base.py new file mode 100644 index 0000000..45c3960 --- /dev/null +++ b/src/assistant/lib/models/tts/voice/base.py @@ -0,0 +1,27 @@ +import enum +import typing + +import pydantic + +import lib.models.tts.voice.languages as models_tts_languages + + +class VoiceModelProvidersEnum(enum.Enum): + YANDEX = "yandex" + ELEVEN_LABS = "eleven_labs" + + +class BaseVoiceModel(pydantic.BaseModel): + voice_id: str + voice_name: str | None = None + languages: list[models_tts_languages.LANGUAGE_CODES_ENUM_TYPE] + provider: VoiceModelProvidersEnum + + @pydantic.model_validator(mode="before") + @classmethod + def check_voice_name_exists(cls, data: typing.Any) -> typing.Any: + voice_id = data.get("voice_id") + voice_name = data.get("voice_name") + if not voice_name and voice_id: + data["voice_name"] = voice_id + return data diff --git a/src/assistant/lib/models/tts/voice/eleven_labs.py b/src/assistant/lib/models/tts/voice/eleven_labs.py new file mode 100644 index 0000000..80804b4 --- /dev/null +++ b/src/assistant/lib/models/tts/voice/eleven_labs.py @@ -0,0 +1,75 @@ +import typing + +import pydantic + +import lib.models.tts.voice.base as models_tts_base +import lib.models.tts.voice.languages as models_tts_languages + + +class ElevenLabsVoiceModel(models_tts_base.BaseVoiceModel): + model_config = pydantic.ConfigDict(use_enum_values=True) + voice_id: str + voice_name: str | None = None + languages: list[models_tts_languages.LANGUAGE_CODES_ENUM_TYPE] + provider: models_tts_base.VoiceModelProvidersEnum = models_tts_base.VoiceModelProvidersEnum.ELEVEN_LABS + + +class ElevenLabsListVoiceModelsModel(pydantic.BaseModel): + models: list[ElevenLabsVoiceModel] = [ + ElevenLabsVoiceModel( + voice_id="eleven_multilingual_v1", + languages=[ + models_tts_languages.ElevenLabsLanguageCodesEnum.ENGLISH, + models_tts_languages.ElevenLabsLanguageCodesEnum.GERMAN, + models_tts_languages.ElevenLabsLanguageCodesEnum.POLISH, + models_tts_languages.ElevenLabsLanguageCodesEnum.SPANISH, + models_tts_languages.ElevenLabsLanguageCodesEnum.ITALIAN, + models_tts_languages.ElevenLabsLanguageCodesEnum.FRENCH, + models_tts_languages.ElevenLabsLanguageCodesEnum.PORTUGUESE, + models_tts_languages.ElevenLabsLanguageCodesEnum.HINDI, + models_tts_languages.ElevenLabsLanguageCodesEnum.ARABIC, + ], + ), + ElevenLabsVoiceModel( + voice_id="eleven_multilingual_v2", + languages=[ + models_tts_languages.ElevenLabsLanguageCodesEnum.ENGLISH, + models_tts_languages.ElevenLabsLanguageCodesEnum.JAPANESE, + models_tts_languages.ElevenLabsLanguageCodesEnum.CHINESE, + models_tts_languages.ElevenLabsLanguageCodesEnum.GERMAN, + models_tts_languages.ElevenLabsLanguageCodesEnum.HINDI, + models_tts_languages.ElevenLabsLanguageCodesEnum.FRENCH, + models_tts_languages.ElevenLabsLanguageCodesEnum.KOREAN, + models_tts_languages.ElevenLabsLanguageCodesEnum.PORTUGUESE, + models_tts_languages.ElevenLabsLanguageCodesEnum.ITALIAN, + models_tts_languages.ElevenLabsLanguageCodesEnum.SPANISH, + models_tts_languages.ElevenLabsLanguageCodesEnum.INDONESIAN, + models_tts_languages.ElevenLabsLanguageCodesEnum.DUTCH, + models_tts_languages.ElevenLabsLanguageCodesEnum.TURKISH, + models_tts_languages.ElevenLabsLanguageCodesEnum.FILIPINO, + models_tts_languages.ElevenLabsLanguageCodesEnum.POLISH, + models_tts_languages.ElevenLabsLanguageCodesEnum.SWEDISH, + models_tts_languages.ElevenLabsLanguageCodesEnum.BULGARIAN, + models_tts_languages.ElevenLabsLanguageCodesEnum.ROMANIAN, + models_tts_languages.ElevenLabsLanguageCodesEnum.ARABIC, + models_tts_languages.ElevenLabsLanguageCodesEnum.CZECH, + models_tts_languages.ElevenLabsLanguageCodesEnum.GREEK, + models_tts_languages.ElevenLabsLanguageCodesEnum.FINNISH, + models_tts_languages.ElevenLabsLanguageCodesEnum.CROATIAN, + models_tts_languages.ElevenLabsLanguageCodesEnum.MALAY, + models_tts_languages.ElevenLabsLanguageCodesEnum.SLOVAK, + models_tts_languages.ElevenLabsLanguageCodesEnum.DANISH, + models_tts_languages.ElevenLabsLanguageCodesEnum.TAMIL, + models_tts_languages.ElevenLabsLanguageCodesEnum.UKRAINIAN, + ], + ), + ElevenLabsVoiceModel( + voice_id="eleven_multilingual_v2", + languages=[models_tts_languages.ElevenLabsLanguageCodesEnum.ENGLISH], + ), + ] + + @classmethod + def from_api(cls, voice_models_from_api: list[dict[str, typing.Any]]) -> typing.Self: + voice_models = [ElevenLabsVoiceModel.model_validate(voice_model) for voice_model in voice_models_from_api] + return ElevenLabsListVoiceModelsModel(models=voice_models) diff --git a/src/assistant/lib/models/tts/languages.py b/src/assistant/lib/models/tts/voice/languages.py similarity index 100% rename from src/assistant/lib/models/tts/languages.py rename to src/assistant/lib/models/tts/voice/languages.py diff --git a/src/assistant/lib/models/tts/voice/yandex.py b/src/assistant/lib/models/tts/voice/yandex.py new file mode 100644 index 0000000..476a1f0 --- /dev/null +++ b/src/assistant/lib/models/tts/voice/yandex.py @@ -0,0 +1,97 @@ +import typing + +import pydantic + +import lib.models.tts.voice.base as models_tts_base +import lib.models.tts.voice.languages as models_tts_languages + + +class YandexVoiceModel(models_tts_base.BaseVoiceModel): + voice_id: str + voice_name: str | None = None + languages: list[models_tts_languages.LANGUAGE_CODES_ENUM_TYPE] + provider: models_tts_base.VoiceModelProvidersEnum = models_tts_base.VoiceModelProvidersEnum.YANDEX + role: str | None = None + + @pydantic.model_validator(mode="before") + @classmethod + def check_voice_name_exists(cls, data: typing.Any) -> typing.Any: + voice_id = data.get("voice_id") + voice_name = data.get("voice_name") + role = data.get("role") + if not voice_name and voice_id: + data["voice_name"] = f"{voice_id} {role}" if role else voice_id + return data + + +class YandexListVoiceModelsModel(pydantic.BaseModel): + models: list[YandexVoiceModel] = [ + YandexVoiceModel( + voice_id="ermil", role="neutral", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN] + ), + YandexVoiceModel( + voice_id="ermil", role="good", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN] + ), + YandexVoiceModel( + voice_id="alena", role="neutral", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN] + ), + YandexVoiceModel( + voice_id="alena", role="good", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN] + ), + YandexVoiceModel( + voice_id="jane", role="neutral", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN] + ), + YandexVoiceModel( + voice_id="jane", role="good", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN] + ), + YandexVoiceModel( + voice_id="jane", role="evil", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN] + ), + YandexVoiceModel( + voice_id="omazh", role="neutral", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN] + ), + YandexVoiceModel( + voice_id="omazh", role="evil", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN] + ), + YandexVoiceModel( + voice_id="zahar", role="neutral", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN] + ), + YandexVoiceModel( + voice_id="zahar", role="good", languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN] + ), + YandexVoiceModel( + voice_id="filipp", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN] + ), + YandexVoiceModel( + voice_id="madirus", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN] + ), + YandexVoiceModel(voice_id="dasha", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]), + YandexVoiceModel(voice_id="julia", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]), + YandexVoiceModel(voice_id="lera", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]), + YandexVoiceModel( + voice_id="marina", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN] + ), + YandexVoiceModel( + voice_id="alexander", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN] + ), + YandexVoiceModel( + voice_id="kirill", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN] + ), + YandexVoiceModel(voice_id="anton", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.RUSSIAN]), + YandexVoiceModel(voice_id="john", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.ENGLISH]), + YandexVoiceModel(voice_id="amira", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.KAZAKH]), + YandexVoiceModel(voice_id="madi", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.KAZAKH]), + YandexVoiceModel(voice_id="lea", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.GERMAN]), + YandexVoiceModel( + voice_id="naomi", role="modern", languages=[models_tts_languages.YandexLanguageCodesEnum.HEBREW] + ), + YandexVoiceModel( + voice_id="naomi", role="classic", languages=[models_tts_languages.YandexLanguageCodesEnum.HEBREW] + ), + YandexVoiceModel(voice_id="nigora", role=None, languages=[models_tts_languages.YandexLanguageCodesEnum.UZBEK]), + ] + + @classmethod + def from_api(cls, voice_models_from_api: list[dict[str, typing.Any]]) -> typing.Self: + voice_models = [YandexVoiceModel.model_validate(voice_model) for voice_model in voice_models_from_api] + return YandexListVoiceModelsModel(models=voice_models) diff --git a/src/assistant/lib/tts/models/__init__.py b/src/assistant/lib/tts/models/__init__.py index fd5289e..1064aa8 100644 --- a/src/assistant/lib/tts/models/__init__.py +++ b/src/assistant/lib/tts/models/__init__.py @@ -1,17 +1,5 @@ -from .repository import * -from .service import * -from .utils import * -from .voice import * +from .protocols import * __all__ = [ - "ElevenLabsListVoiceModelsModel", - "ElevenLabsVoiceModel", - "LIST_VOICE_MODELS_TYPE", - "LIST_VOICE_MODELS_TYPE", - "TTSCreateRequestServiceModel", - "TTSCreateResponseServiceModel", "TTSRepositoryProtocol", - "VOICE_MODELS_TYPE", - "YandexListVoiceModelsModel", - "YandexVoiceModel", ] diff --git a/src/assistant/lib/tts/models/protocols.py b/src/assistant/lib/tts/models/protocols.py new file mode 100644 index 0000000..5774c08 --- /dev/null +++ b/src/assistant/lib/tts/models/protocols.py @@ -0,0 +1,14 @@ +import typing + +import lib.models as models + + +class TTSRepositoryProtocol(typing.Protocol): + def get_audio_as_bytes(self, request: models.TTSCreateRequestModel) -> models.TTSCreateResponseModel: + ... + + def get_voice_model_by_name(self, voice_model_name: str) -> models.BaseVoiceModel | None: + ... + + def get_voice_models_by_fields(self, fields: models.TTSSearchVoiceRequestModel) -> models.LIST_VOICE_MODELS_TYPE: + ... diff --git a/src/assistant/lib/tts/models/repository.py b/src/assistant/lib/tts/models/repository.py deleted file mode 100644 index 142d7c5..0000000 --- a/src/assistant/lib/tts/models/repository.py +++ /dev/null @@ -1,17 +0,0 @@ -import typing - -import lib.models as models -import lib.tts.models.utils as tts_models_utils - - -class TTSRepositoryProtocol(typing.Protocol): - def get_audio_as_bytes_from_text(self, text: str) -> models.TTSCreateResponseModel: - ... - - def get_voice_model_by_name(self, voice_model_name: str) -> models.BaseVoiceModel | None: - ... - - def get_voice_models_by_fields( - self, fields: models.TTSSearchVoiceRequestModel - ) -> tts_models_utils.LIST_VOICE_MODELS_TYPE: - ... diff --git a/src/assistant/lib/tts/models/service.py b/src/assistant/lib/tts/models/service.py deleted file mode 100644 index 694770e..0000000 --- a/src/assistant/lib/tts/models/service.py +++ /dev/null @@ -1,9 +0,0 @@ -import lib.models as models - - -class TTSCreateRequestServiceModel(models.TTSCreateRequestModel): - ... - - -class TTSCreateResponseServiceModel(models.TTSCreateResponseModel): - ... diff --git a/src/assistant/lib/tts/models/utils.py b/src/assistant/lib/tts/models/utils.py deleted file mode 100644 index 1a653b5..0000000 --- a/src/assistant/lib/tts/models/utils.py +++ /dev/null @@ -1,4 +0,0 @@ -import lib.tts.models.voice as tts_models_voice - -VOICE_MODELS_TYPE = tts_models_voice.YandexVoiceModel | tts_models_voice.ElevenLabsVoiceModel -LIST_VOICE_MODELS_TYPE = tts_models_voice.YandexListVoiceModelsModel | tts_models_voice.ElevenLabsListVoiceModelsModel diff --git a/src/assistant/lib/tts/models/voice/__init__.py b/src/assistant/lib/tts/models/voice/__init__.py deleted file mode 100644 index 7ca63ac..0000000 --- a/src/assistant/lib/tts/models/voice/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from .eleven_labs import * -from .yandex import * - -__all__ = [ - "ElevenLabsListVoiceModelsModel", - "ElevenLabsVoiceModel", - "YandexListVoiceModelsModel", - "YandexVoiceModel", -] diff --git a/src/assistant/lib/tts/models/voice/eleven_labs.py b/src/assistant/lib/tts/models/voice/eleven_labs.py deleted file mode 100644 index 1e989c1..0000000 --- a/src/assistant/lib/tts/models/voice/eleven_labs.py +++ /dev/null @@ -1,73 +0,0 @@ -import typing - -import pydantic - -import lib.models as models - - -class ElevenLabsVoiceModel(models.BaseVoiceModel): - voice_id: str - voice_name: str | None = None - languages: list[models.LANGUAGE_CODES_ENUM_TYPE] - company_name: str = "eleven labs" - - -class ElevenLabsListVoiceModelsModel(pydantic.BaseModel): - models: list[ElevenLabsVoiceModel] = [ - ElevenLabsVoiceModel( - voice_id="eleven_multilingual_v1", - languages=[ - models.ElevenLabsLanguageCodesEnum.ENGLISH, - models.ElevenLabsLanguageCodesEnum.GERMAN, - models.ElevenLabsLanguageCodesEnum.POLISH, - models.ElevenLabsLanguageCodesEnum.SPANISH, - models.ElevenLabsLanguageCodesEnum.ITALIAN, - models.ElevenLabsLanguageCodesEnum.FRENCH, - models.ElevenLabsLanguageCodesEnum.PORTUGUESE, - models.ElevenLabsLanguageCodesEnum.HINDI, - models.ElevenLabsLanguageCodesEnum.ARABIC, - ], - ), - ElevenLabsVoiceModel( - voice_id="eleven_multilingual_v2", - languages=[ - models.ElevenLabsLanguageCodesEnum.ENGLISH, - models.ElevenLabsLanguageCodesEnum.JAPANESE, - models.ElevenLabsLanguageCodesEnum.CHINESE, - models.ElevenLabsLanguageCodesEnum.GERMAN, - models.ElevenLabsLanguageCodesEnum.HINDI, - models.ElevenLabsLanguageCodesEnum.FRENCH, - models.ElevenLabsLanguageCodesEnum.KOREAN, - models.ElevenLabsLanguageCodesEnum.PORTUGUESE, - models.ElevenLabsLanguageCodesEnum.ITALIAN, - models.ElevenLabsLanguageCodesEnum.SPANISH, - models.ElevenLabsLanguageCodesEnum.INDONESIAN, - models.ElevenLabsLanguageCodesEnum.DUTCH, - models.ElevenLabsLanguageCodesEnum.TURKISH, - models.ElevenLabsLanguageCodesEnum.FILIPINO, - models.ElevenLabsLanguageCodesEnum.POLISH, - models.ElevenLabsLanguageCodesEnum.SWEDISH, - models.ElevenLabsLanguageCodesEnum.BULGARIAN, - models.ElevenLabsLanguageCodesEnum.ROMANIAN, - models.ElevenLabsLanguageCodesEnum.ARABIC, - models.ElevenLabsLanguageCodesEnum.CZECH, - models.ElevenLabsLanguageCodesEnum.GREEK, - models.ElevenLabsLanguageCodesEnum.FINNISH, - models.ElevenLabsLanguageCodesEnum.CROATIAN, - models.ElevenLabsLanguageCodesEnum.MALAY, - models.ElevenLabsLanguageCodesEnum.SLOVAK, - models.ElevenLabsLanguageCodesEnum.DANISH, - models.ElevenLabsLanguageCodesEnum.TAMIL, - models.ElevenLabsLanguageCodesEnum.UKRAINIAN, - ], - ), - ElevenLabsVoiceModel( - voice_id="eleven_multilingual_v2", - languages=[models.ElevenLabsLanguageCodesEnum.ENGLISH], - ), - ] - - @classmethod - def from_api(cls, voice_models_from_api: list[dict[str, typing.Any]]) -> typing.Self: - voice_models = [ElevenLabsVoiceModel.model_validate(voice_model) for voice_model in voice_models_from_api] - return ElevenLabsListVoiceModelsModel(models=voice_models) diff --git a/src/assistant/lib/tts/models/voice/yandex.py b/src/assistant/lib/tts/models/voice/yandex.py deleted file mode 100644 index 099acde..0000000 --- a/src/assistant/lib/tts/models/voice/yandex.py +++ /dev/null @@ -1,60 +0,0 @@ -import typing - -import pydantic - -import lib.models as models - - -class YandexVoiceModel(models.BaseVoiceModel): - voice_id: str - voice_name: str | None = None - languages: list[models.LANGUAGE_CODES_ENUM_TYPE] - company_name: str = "yandex" - role: str | None = None - - @pydantic.model_validator(mode="before") - @classmethod - def check_voice_name_exists(cls, data: typing.Any) -> typing.Any: - voice_id = data.get("voice_id") - voice_name = data.get("voice_name") - role = data.get("role") - if not voice_name and voice_id: - data["voice_name"] = f"{voice_id} {role}" if role else voice_id - return data - - -class YandexListVoiceModelsModel(pydantic.BaseModel): - models: list[YandexVoiceModel] = [ - YandexVoiceModel(voice_id="ermil", role="neutral", languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="ermil", role="good", languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="alena", role="neutral", languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="alena", role="good", languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="jane", role="neutral", languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="jane", role="good", languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="jane", role="evil", languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="omazh", role="neutral", languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="omazh", role="evil", languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="zahar", role="neutral", languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="zahar", role="good", languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="filipp", role=None, languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="madirus", role=None, languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="dasha", role=None, languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="julia", role=None, languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="lera", role=None, languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="marina", role=None, languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="alexander", role=None, languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="kirill", role=None, languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="anton", role=None, languages=[models.YandexLanguageCodesEnum.RUSSIAN]), - YandexVoiceModel(voice_id="john", role=None, languages=[models.YandexLanguageCodesEnum.ENGLISH]), - YandexVoiceModel(voice_id="amira", role=None, languages=[models.YandexLanguageCodesEnum.KAZAKH]), - YandexVoiceModel(voice_id="madi", role=None, languages=[models.YandexLanguageCodesEnum.KAZAKH]), - YandexVoiceModel(voice_id="lea", role=None, languages=[models.YandexLanguageCodesEnum.GERMAN]), - YandexVoiceModel(voice_id="naomi", role="modern", languages=[models.YandexLanguageCodesEnum.HEBREW]), - YandexVoiceModel(voice_id="naomi", role="classic", languages=[models.YandexLanguageCodesEnum.HEBREW]), - YandexVoiceModel(voice_id="nigora", role=None, languages=[models.YandexLanguageCodesEnum.UZBEK]), - ] - - @classmethod - def from_api(cls, voice_models_from_api: list[dict[str, typing.Any]]) -> typing.Self: - voice_models = [YandexVoiceModel.model_validate(voice_model) for voice_model in voice_models_from_api] - return YandexListVoiceModelsModel(models=voice_models) diff --git a/src/assistant/lib/tts/repositories/base.py b/src/assistant/lib/tts/repositories/base.py index 238b873..cabbfc5 100644 --- a/src/assistant/lib/tts/repositories/base.py +++ b/src/assistant/lib/tts/repositories/base.py @@ -1,7 +1,6 @@ import abc import lib.models as models -import lib.tts.models as tts_models class HttpClient: # Mocked class todo remove and use real http client from lib.clients.http_client @@ -15,11 +14,11 @@ class TTSBaseRepository(abc.ABC): @property @abc.abstractmethod - def voice_models(self) -> tts_models.LIST_VOICE_MODELS_TYPE: + def voice_models(self) -> models.LIST_VOICE_MODELS_TYPE: ... @abc.abstractmethod - def get_audio_as_bytes_from_text(self, text: str) -> models.TTSCreateResponseModel: + def get_audio_as_bytes(self, request: models.TTSCreateRequestModel) -> models.TTSCreateResponseModel: raise NotImplementedError def get_voice_model_by_name(self, voice_model_name: str) -> models.BaseVoiceModel | None: @@ -34,7 +33,7 @@ class TTSBaseRepository(abc.ABC): def get_list_voice_models_by_fields( self, fields: models.TTSSearchVoiceRequestModel - ) -> list[tts_models.VOICE_MODELS_TYPE]: + ) -> list[models.AVAILABLE_MODELS_TYPE]: """ Search voice model by fields :param fields: Any fields from TTSSearchVoiceRequestModel @@ -43,7 +42,6 @@ class TTSBaseRepository(abc.ABC): fields_dump = fields.model_dump(exclude_none=True) voice_models_response = [] for voice_model in self.voice_models.models: - voice_model: tts_models.VOICE_MODELS_TYPE for field, field_value in fields_dump.items(): if field == "languages": # language is a list language_names: set[str] = {item.name for item in field_value} diff --git a/src/assistant/lib/tts/services.py b/src/assistant/lib/tts/services.py index df8a00b..1d6a337 100644 --- a/src/assistant/lib/tts/services.py +++ b/src/assistant/lib/tts/services.py @@ -7,32 +7,28 @@ class TTSService: def __init__( self, settings: app_settings.Settings, - tts_repositories: list[tts_models.TTSRepositoryProtocol], + repositories: dict[models.VoiceModelProvidersEnum, tts_models.TTSRepositoryProtocol], ): self.settings = settings - self.tts_repositories = tts_repositories + self.repositories = repositories - def get_audio_as_bytes_from_text(self, tts_request: models.TTSCreateRequestModel) -> models.TTSCreateResponseModel: - for repository in self.tts_repositories: - voice_model = repository.get_voice_model_by_name(tts_request.voice_model_name) - if voice_model: - audio_response = repository.get_audio_as_bytes_from_text(tts_request.text) - break - else: - raise ValueError(f"Voice model {tts_request.voice_model_name} not found") + def get_audio_as_bytes(self, request: models.TTSCreateRequestModel) -> models.TTSCreateResponseModel: + model = request.voice_model + repository = self.repositories[model.provider] + audio_response = repository.get_audio_as_bytes(request) return audio_response def get_voice_model_by_name(self, voice_model_name: str) -> models.BaseVoiceModel | None: - for repository in self.tts_repositories: + for repository in self.repositories.values(): voice_model = repository.get_voice_model_by_name(voice_model_name) if voice_model: return voice_model def get_list_voice_models_by_fields( self, fields: models.TTSSearchVoiceRequestModel - ) -> list[tts_models.VOICE_MODELS_TYPE]: - response_models: list[tts_models.VOICE_MODELS_TYPE] = [] - for repository in self.tts_repositories: + ) -> list[models.AVAILABLE_MODELS_TYPE]: + response_models: list[models.AVAILABLE_MODELS_TYPE] = [] + for repository in self.repositories.values(): voice_models = repository.get_voice_models_by_fields(fields) if voice_models.models: response_models.extend(voice_models.models)