1
0
mirror of https://github.com/ijaric/voice_assistant.git synced 2025-05-24 14:33:26 +00:00
voice_assistant/src/assistant/lib/stt/openai_speech.py
2023-10-05 22:43:40 +03:00

48 lines
1.7 KiB
Python

import mimetypes
import tempfile
import magic
import openai
import lib.app.settings as app_settings
import lib.models as models
class OpenaiSpeech:
def __init__(self, settings: app_settings.Settings):
self.settings = settings
openai.api_key = self.settings.openai.api_key.get_secret_value()
@staticmethod
def __get_file_extension_from_bytes(audio: bytes) -> str | None:
mime: magic.Magic = magic.Magic(mime=True)
mime_type: str = mime.from_buffer(audio)
extension: str | None = mimetypes.guess_extension(mime_type)
if extension:
extension = extension.replace(".", "")
return extension
async def recognize(self, audio: bytes) -> str:
file_extension: str | None = self.__get_file_extension_from_bytes(audio)
if not file_extension:
raise ValueError("File extension is not supported")
voice: models.SttVoice = models.SttVoice(
audio_size=int(len(audio) / 1024),
audio_format=file_extension,
audio_data=audio,
voice_settings=self.settings.voice,
)
try:
with tempfile.NamedTemporaryFile(suffix=f".{file_extension}") as temp_file:
temp_file.write(voice.audio_data)
temp_file.seek(0)
transcript = openai.Audio.transcribe("whisper-1", temp_file) # type: ignore
except openai.error.InvalidRequestError as e: # type: ignore
raise ValueError(f"OpenAI API error: {e}")
except openai.error.OpenAIError as e: # type: ignore
raise ValueError(f"OpenAI API error: {e}")
return transcript.text # type: ignore