diff --git a/src/assistant/lib/tts/__init__.py b/src/assistant/lib/tts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/assistant/lib/tts/models/__init__.py b/src/assistant/lib/tts/models/__init__.py new file mode 100644 index 0000000..f3a089c --- /dev/null +++ b/src/assistant/lib/tts/models/__init__.py @@ -0,0 +1,11 @@ +from .models import * +from .voice import * + +__all__ = [ + "LanguageCodes", + "TTSRequestModel", + "TTSResponseModel", + "VoiceModel", + "YandexVoiceModelNamesString", + "YandexVoiceModels", +] diff --git a/src/assistant/lib/tts/models/models.py b/src/assistant/lib/tts/models/models.py new file mode 100644 index 0000000..14ee677 --- /dev/null +++ b/src/assistant/lib/tts/models/models.py @@ -0,0 +1,13 @@ +import pydantic + +import lib.tts.models.voice as tts_models_voice + + +class TTSRequestModel(pydantic.BaseModel): + model_config = pydantic.ConfigDict(use_enum_values=True) + + voice_model_name: tts_models_voice.YandexVoiceModelNamesString + + +class TTSResponseModel(pydantic.BaseModel): + audio_content: bytes diff --git a/src/assistant/lib/tts/models/voice/__init__.py b/src/assistant/lib/tts/models/voice/__init__.py new file mode 100644 index 0000000..ce1a9cf --- /dev/null +++ b/src/assistant/lib/tts/models/voice/__init__.py @@ -0,0 +1,9 @@ +from .base import * +from .yandex import * + +__all__ = [ + "LanguageCodes", + "VoiceModel", + "YandexVoiceModelNamesString", + "YandexVoiceModels", +] diff --git a/src/assistant/lib/tts/models/voice/base.py b/src/assistant/lib/tts/models/voice/base.py new file mode 100644 index 0000000..65082f1 --- /dev/null +++ b/src/assistant/lib/tts/models/voice/base.py @@ -0,0 +1,20 @@ +import enum + +import pydantic + + +class LanguageCodes(enum.Enum): + RUSSIAN = "ru-RU" + ENGLISH = "en-US" + KAZAKH = "kk-KK" + GERMAN = "de-DE" + HEBREW = "he-IL" + UZBEK = "uz-UZ" + + +class VoiceModel(pydantic.BaseModel): + model_config = pydantic.ConfigDict(use_enum_values=True) + + voice_name: str + role: str | None = None + lang: LanguageCodes diff --git a/src/assistant/lib/tts/models/voice/yandex.py b/src/assistant/lib/tts/models/voice/yandex.py new file mode 100644 index 0000000..3a47c6b --- /dev/null +++ b/src/assistant/lib/tts/models/voice/yandex.py @@ -0,0 +1,126 @@ +import enum + +import lib.tts.models.voice.base as tts_models_voice_base + + +class YandexVoiceModelNamesString(enum.Enum): + ERMIL_NEUTRAL = "ermil neutral" + ERMIL_GOOD = "ermil good" + ALENA_NEUTRAL = "alena neutral" + ALENA_GOOD = "alena good" + JANE_NEUTRAL = "jane neutral" + JANE_GOOD = "jane good" + JANE_EVIL = "jane evil" + OMAZH_NEUTRAL = "omazh neutral" + OMAZH_EVIL = "omazh evil" + ZAHAR_NEUTRAL = "zahar neutral" + ZAHAR_GOOD = "zahar good" + FILIPP = "filipp" + MADIRUS = "madirus" + DASHA = "dasha" + JULIA = "julia" + LERA = "lera" + MARINA = "marina" + ALEXANDER = "alexander" + KIRILL = "kirill" + ANTON = "anton" + # English + JOHN = "john" + # Kazakh + AMIRA = "amira" + MADI = "madi" + # German + LEA = "lea" + # HEBREW + NAOMI_MODERN = "naomi modern" + NAOMI_CLASSIC = "naomi classic" + # Uzbek + NIGORA = "nigora" + + +class YandexVoiceModels(enum.Enum): + # Russian + ERMIL_NEUTRAL = tts_models_voice_base.VoiceModel( + voice_name="ermil", role="neutral", lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + ERMIL_GOOD = tts_models_voice_base.VoiceModel( + voice_name="ermil", role="good", lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + ALENA_NEUTRAL = tts_models_voice_base.VoiceModel( + voice_name="alena", role="neutral", lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + ALENA_GOOD = tts_models_voice_base.VoiceModel( + voice_name="alena", role="good", lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + JANE_NEUTRAL = tts_models_voice_base.VoiceModel( + voice_name="jane", role="neutral", lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + JANE_GOOD = tts_models_voice_base.VoiceModel( + voice_name="jane", role="good", lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + JANE_EVIL = tts_models_voice_base.VoiceModel( + voice_name="jane", role="evil", lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + OMAZH_NEUTRAL = tts_models_voice_base.VoiceModel( + voice_name="omazh", role="neutral", lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + OMAZH_EVIL = tts_models_voice_base.VoiceModel( + voice_name="omazh", role="evil", lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + ZAHAR_NEUTRAL = tts_models_voice_base.VoiceModel( + voice_name="zahar", role="neutral", lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + ZAHAR_GOOD = tts_models_voice_base.VoiceModel( + voice_name="zahar", role="good", lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + FILIPP = tts_models_voice_base.VoiceModel( + voice_name="filipp", role=None, lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + MADIRUS = tts_models_voice_base.VoiceModel( + voice_name="madirus", role=None, lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + DASHA = tts_models_voice_base.VoiceModel( + voice_name="dasha", role=None, lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + JULIA = tts_models_voice_base.VoiceModel( + voice_name="julia", role=None, lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + LERA = tts_models_voice_base.VoiceModel( + voice_name="lera", role=None, lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + MARINA = tts_models_voice_base.VoiceModel( + voice_name="marina", role=None, lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + ALEXANDER = tts_models_voice_base.VoiceModel( + voice_name="alexander", role=None, lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + KIRILL = tts_models_voice_base.VoiceModel( + voice_name="kirill", role=None, lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + ANTON = tts_models_voice_base.VoiceModel( + voice_name="anton", role=None, lang=tts_models_voice_base.LanguageCodes.RUSSIAN + ) + # English + JOHN = tts_models_voice_base.VoiceModel( + voice_name="john", role=None, lang=tts_models_voice_base.LanguageCodes.ENGLISH + ) + # Kazakh + AMIRA = tts_models_voice_base.VoiceModel( + voice_name="amira", role=None, lang=tts_models_voice_base.LanguageCodes.KAZAKH + ) + MADI = tts_models_voice_base.VoiceModel( + voice_name="madi", role=None, lang=tts_models_voice_base.LanguageCodes.KAZAKH + ) + # German + LEA = tts_models_voice_base.VoiceModel(voice_name="lea", role=None, lang=tts_models_voice_base.LanguageCodes.GERMAN) + # HEBREW + NAOMI_MODERN = tts_models_voice_base.VoiceModel( + voice_name="naomi", role="modern", lang=tts_models_voice_base.LanguageCodes.HEBREW + ) + NAOMI_CLASSIC = tts_models_voice_base.VoiceModel( + voice_name="naomi", role="classic", lang=tts_models_voice_base.LanguageCodes.HEBREW + ) + # Uzbek + NIGORA = tts_models_voice_base.VoiceModel( + voice_name="nigora", role=None, lang=tts_models_voice_base.LanguageCodes.UZBEK + )