diff --git a/src/assistant/alembic/env.py b/src/assistant/alembic/env.py index 1786b59..9d6b168 100644 --- a/src/assistant/alembic/env.py +++ b/src/assistant/alembic/env.py @@ -6,7 +6,7 @@ from sqlalchemy.engine import Connection from sqlalchemy.ext.asyncio import async_engine_from_config import lib.app.settings as app_settings -import lib.models as models +import lib.orm_models as orm_models from alembic import context # this is the Alembic Config object, which provides @@ -19,7 +19,11 @@ if config.config_file_name is not None: config.set_main_option("sqlalchemy.url", app_settings.Settings().postgres.dsn) -target_metadata = models.Base.metadata +print("BASE: ", orm_models.Base.metadata.schema) +for t in orm_models.Base.metadata.sorted_tables: + print(t.name) + +target_metadata = orm_models.Base.metadata def run_migrations_offline() -> None: diff --git a/src/assistant/alembic/versions/2023-10-02_9749b063b095_added_initial_table.py b/src/assistant/alembic/versions/2023-10-02_9749b063b095_added_initial_table.py deleted file mode 100644 index e300471..0000000 --- a/src/assistant/alembic/versions/2023-10-02_9749b063b095_added_initial_table.py +++ /dev/null @@ -1,37 +0,0 @@ -"""Added initial table - -Revision ID: 9749b063b095 -Revises: -Create Date: 2023-10-02 19:46:05.078494 - -""" -from typing import Sequence, Union - -import sqlalchemy as sa - -from alembic import op - -# revision identifiers, used by Alembic. -revision: str = "9749b063b095" -down_revision: Union[str, None] = None -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - "joke", - sa.Column("type", sa.String(), nullable=False), - sa.Column("setup", sa.String(), nullable=False), - sa.Column("punchline", sa.String(), nullable=False), - sa.Column("id", sa.Uuid(), nullable=False), - sa.PrimaryKeyConstraint("id"), - ) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table("joke") - # ### end Alembic commands ### diff --git a/src/assistant/alembic/versions/2023-10-12_3d448c6327cd_init_commit.py b/src/assistant/alembic/versions/2023-10-12_3d448c6327cd_init_commit.py new file mode 100644 index 0000000..d40648e --- /dev/null +++ b/src/assistant/alembic/versions/2023-10-12_3d448c6327cd_init_commit.py @@ -0,0 +1,138 @@ +"""init commit + +Revision ID: 3d448c6327cd +Revises: +Create Date: 2023-10-12 00:01:42.248941 + +""" +from typing import Sequence, Union + +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "3d448c6327cd" +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "chat_history", + sa.Column("id", sa.Uuid(), nullable=False), + sa.Column("session_id", sa.Uuid(), nullable=False), + sa.Column("channel", sa.String(), nullable=False), + sa.Column("user_id", sa.String(), nullable=False), + sa.Column("content", sa.JSON(), nullable=False), + sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("modified", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.PrimaryKeyConstraint("id"), + schema="content", + ) + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("person_film_work", sa.Column("id", sa.UUID(), autoincrement=False, nullable=False)) + op.drop_constraint(None, "person_film_work", type_="foreignkey") + op.drop_constraint(None, "person_film_work", type_="foreignkey") + op.alter_column( + "person_film_work", "role", existing_type=sa.String(length=50), type_=sa.TEXT(), existing_nullable=False + ) + op.alter_column("person", "modified", existing_type=postgresql.TIMESTAMP(timezone=True), nullable=True) + op.alter_column("person", "created", existing_type=postgresql.TIMESTAMP(timezone=True), nullable=True) + op.alter_column("person", "full_name", existing_type=sa.String(), type_=sa.TEXT(), existing_nullable=False) + op.drop_constraint(None, "genre_film_work", type_="foreignkey") + op.drop_constraint(None, "genre_film_work", type_="foreignkey") + op.alter_column("genre", "modified", existing_type=postgresql.TIMESTAMP(timezone=True), nullable=True) + op.alter_column("genre", "created", existing_type=postgresql.TIMESTAMP(timezone=True), nullable=True) + op.alter_column("genre", "description", existing_type=sa.String(), type_=sa.TEXT(), existing_nullable=True) + op.alter_column("genre", "name", existing_type=sa.String(), type_=sa.TEXT(), existing_nullable=False) + op.alter_column("film_work", "modified", existing_type=postgresql.TIMESTAMP(timezone=True), nullable=True) + op.alter_column("film_work", "created", existing_type=postgresql.TIMESTAMP(timezone=True), nullable=True) + op.alter_column("film_work", "type", existing_type=sa.String(), type_=sa.TEXT(), existing_nullable=False) + op.alter_column("film_work", "file_path", existing_type=sa.String(), type_=sa.TEXT(), existing_nullable=True) + op.alter_column("film_work", "creation_date", existing_type=sa.DateTime(), type_=sa.DATE(), existing_nullable=True) + op.alter_column("film_work", "description", existing_type=sa.String(), type_=sa.TEXT(), existing_nullable=True) + op.alter_column("film_work", "title", existing_type=sa.String(), type_=sa.TEXT(), existing_nullable=False) + op.create_table( + "django_migrations", + sa.Column("id", sa.BIGINT(), autoincrement=True, nullable=False), + sa.Column("app", sa.VARCHAR(length=255), autoincrement=False, nullable=False), + sa.Column("name", sa.VARCHAR(length=255), autoincrement=False, nullable=False), + sa.Column("applied", postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False), + ) + op.create_table( + "django_admin_log", + sa.Column("id", sa.INTEGER(), autoincrement=True, nullable=False), + sa.Column("action_time", postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False), + sa.Column("object_id", sa.TEXT(), autoincrement=False, nullable=True), + sa.Column("object_repr", sa.VARCHAR(length=200), autoincrement=False, nullable=False), + sa.Column("action_flag", sa.SMALLINT(), autoincrement=False, nullable=False), + sa.Column("change_message", sa.TEXT(), autoincrement=False, nullable=False), + sa.Column("content_type_id", sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column("user_id", sa.INTEGER(), autoincrement=False, nullable=False), + sa.CheckConstraint("action_flag >= 0", name="django_admin_log_action_flag_check"), + ) + op.create_table( + "django_session", + sa.Column("session_key", sa.VARCHAR(length=40), autoincrement=False, nullable=False), + sa.Column("session_data", sa.TEXT(), autoincrement=False, nullable=False), + sa.Column("expire_date", postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False), + ) + op.create_table( + "auth_permission", + sa.Column("id", sa.INTEGER(), autoincrement=True, nullable=False), + sa.Column("name", sa.VARCHAR(length=255), autoincrement=False, nullable=False), + sa.Column("content_type_id", sa.INTEGER(), autoincrement=False, nullable=False), + sa.Column("codename", sa.VARCHAR(length=100), autoincrement=False, nullable=False), + ) + op.create_table( + "django_content_type", + sa.Column("id", sa.INTEGER(), autoincrement=True, nullable=False), + sa.Column("app_label", sa.VARCHAR(length=100), autoincrement=False, nullable=False), + sa.Column("model", sa.VARCHAR(length=100), autoincrement=False, nullable=False), + ) + op.create_table( + "auth_user", + sa.Column("id", sa.INTEGER(), autoincrement=True, nullable=False), + sa.Column("password", sa.VARCHAR(length=128), autoincrement=False, nullable=False), + sa.Column("last_login", postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=True), + sa.Column("is_superuser", sa.BOOLEAN(), autoincrement=False, nullable=False), + sa.Column("username", sa.VARCHAR(length=150), autoincrement=False, nullable=False), + sa.Column("first_name", sa.VARCHAR(length=150), autoincrement=False, nullable=False), + sa.Column("last_name", sa.VARCHAR(length=150), autoincrement=False, nullable=False), + sa.Column("email", sa.VARCHAR(length=254), autoincrement=False, nullable=False), + sa.Column("is_staff", sa.BOOLEAN(), autoincrement=False, nullable=False), + sa.Column("is_active", sa.BOOLEAN(), autoincrement=False, nullable=False), + sa.Column("date_joined", postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False), + ) + op.create_table( + "auth_user_user_permissions", + sa.Column("id", sa.BIGINT(), autoincrement=True, nullable=False), + sa.Column("user_id", sa.INTEGER(), autoincrement=False, nullable=False), + sa.Column("permission_id", sa.INTEGER(), autoincrement=False, nullable=False), + ) + op.create_table( + "auth_group_permissions", + sa.Column("id", sa.BIGINT(), autoincrement=True, nullable=False), + sa.Column("group_id", sa.INTEGER(), autoincrement=False, nullable=False), + sa.Column("permission_id", sa.INTEGER(), autoincrement=False, nullable=False), + ) + op.create_table( + "auth_user_groups", + sa.Column("id", sa.BIGINT(), autoincrement=True, nullable=False), + sa.Column("user_id", sa.INTEGER(), autoincrement=False, nullable=False), + sa.Column("group_id", sa.INTEGER(), autoincrement=False, nullable=False), + ) + op.create_table( + "auth_group", + sa.Column("id", sa.INTEGER(), autoincrement=True, nullable=False), + sa.Column("name", sa.VARCHAR(length=150), autoincrement=False, nullable=False), + ) + op.drop_table("chat_history", schema="content") + # ### end Alembic commands ### diff --git a/src/assistant/lib/agent/__init__.py b/src/assistant/lib/agent/__init__.py new file mode 100644 index 0000000..ff2f391 --- /dev/null +++ b/src/assistant/lib/agent/__init__.py @@ -0,0 +1,3 @@ +from .chat_repository import ChatHistoryRepository + +__all__ = ["ChatHistoryRepository"] diff --git a/src/assistant/lib/agent/chat_repository.py b/src/assistant/lib/agent/chat_repository.py new file mode 100644 index 0000000..d632d4b --- /dev/null +++ b/src/assistant/lib/agent/chat_repository.py @@ -0,0 +1,83 @@ +import logging +import uuid + +import sqlalchemy as sa +import sqlalchemy.exc +import sqlalchemy.ext.asyncio as sa_asyncio + +import lib.models as models +import lib.orm_models as orm_models + + +class ChatHistoryRepository: + def __init__(self, pg_async_session: sa_asyncio.async_sessionmaker[sa_asyncio.AsyncSession]) -> None: + self.pg_async_session = pg_async_session + self.logger = logging.getLogger(__name__) + + async def get_last_session_id(self, request: models.RequestLastSessionId) -> uuid.UUID | None: + """Get a current session ID if exists.""" + + self.logger.debug("get_last_session_id: %s", request) + try: + async with self.pg_async_session() as session: + statement = ( + sa.select(orm_models.ChatHistory) + .filter_by(channel=request.channel, user_id=request.user_id) + .filter( + ( + sa.func.extract("epoch", sa.text("NOW()")) + - sa.func.extract("epoch", orm_models.ChatHistory.created) + ) + / 60 + <= request.minutes_ago + ) + .order_by(orm_models.ChatHistory.created.desc()) + .limit(1) + ) + result = await session.execute(statement) + + chat_session = result.scalars().first() + if chat_session: + return chat_session.session_id + except sqlalchemy.exc.SQLAlchemyError as error: + self.logger.exception("Error: %s", error) + + async def get_messages_by_sid(self, request: models.RequestChatHistory) -> list[models.Message] | None: + """Get all messages of a chat by session ID.""" + + self.logger.debug("get_messages_by_sid: %s", request) + try: + async with self.pg_async_session() as session: + messages: list[models.Message] = [] + statement = ( + sa.select(orm_models.ChatHistory) + .filter_by(session_id=request.session_id) + .order_by(orm_models.ChatHistory.created.asc()) + ) + print("get_messages_by_sid:", statement) + result = await session.execute(statement) + for row in result.scalars().all(): + # TODO: Было бы интересно понять почему pyright ругается ниже и как правильно вызывать компоненты + messages.append(models.Message(role=row.content["role"], content=row.content["content"])) # type: ignore[reportGeneralTypeIssues] + return messages + except sqlalchemy.exc.SQLAlchemyError as error: + self.logger.exception("Error: %s", error) + + async def add_message(self, request: models.RequestChatMessage) -> None: + """Add a message to the chat history.""" + + self.logger.debug("add_message: %s", request) + try: + async with self.pg_async_session() as session: + chat_history = orm_models.ChatHistory( + id=uuid.uuid4(), + session_id=request.session_id, + user_id=request.user_id, + channel=request.channel, + content=request.message, + ) + session.add(chat_history) + await session.commit() + # TODO: Add refresh to session and return added object + except sqlalchemy.exc.SQLAlchemyError as error: + self.logger.exception("Error: %s", error) diff --git a/src/assistant/lib/agent/openai_functions.py b/src/assistant/lib/agent/openai_functions.py new file mode 100644 index 0000000..673a1ba --- /dev/null +++ b/src/assistant/lib/agent/openai_functions.py @@ -0,0 +1,58 @@ +import logging +import uuid + +import langchain.agents +import orm_models +import sqlalchemy as sa +import sqlalchemy.exc +import sqlalchemy.ext.asyncio as sa_asyncio + +import lib.agent.repositories as repositories +import lib.models as models + + +class OpenAIFunctions: + """OpenAI Functions for langchain agents.""" + + def __init__( + self, + repository: repositories.EmbeddingRepository, + pg_async_session: sa_asyncio.async_sessionmaker[sa_asyncio.AsyncSession], + ) -> None: + self.logger = logging.getLogger(__name__) + self.pg_async_session = pg_async_session + self.repository = repository + + @langchain.agents.tool + async def get_movie_by_description(self, description: str) -> list[models.Movie] | None: + """Provide a movie data by description.""" + + self.logger.info("Request to get movie by description: %s", description) + embedded_description = await self.repository.aget_embedding(description) + try: + async with self.pg_async_session() as session: + result: list[models.Movie] = [] + stmt = ( + sa.select(orm_models.FilmWork) + .order_by(orm_models.FilmWork.embedding.cosine_distance(embedded_description)) + .limit(5) + ) + neighbours = session.scalars(stmt) + for neighbour in await neighbours: + result.append(models.Movie(**neighbour.__dict__)) + return result + except sqlalchemy.exc.SQLAlchemyError as error: + self.logger.exception("Error: %s", error) + + @langchain.agents.tool + def get_movie_by_id(self, id: uuid.UUID) -> models.Movie | None: + """Provide a movie data by movie id.""" + self.logger.info("Request to get movie by id: %s", id) + return None + + @langchain.agents.tool + def get_similar_movies(self, id: uuid.UUID) -> list[models.Movie] | None: + """Provide similar movies for the given movie ID.""" + + self.logger.info("Request to get movie by id: %s", id) + return None diff --git a/src/assistant/lib/agent/repositories.py b/src/assistant/lib/agent/repositories.py new file mode 100644 index 0000000..f8fe6f1 --- /dev/null +++ b/src/assistant/lib/agent/repositories.py @@ -0,0 +1,40 @@ +import logging +import typing + +import openai +import openai.error + +import lib.app.settings as app_settings +import lib.models as models + + +class EmbeddingRepository: + """A service for getting embeddings from OpenAI.""" + + def __init__(self, settings: app_settings.Settings) -> None: + """Initialize the service with an OpenAI API key.""" + self.llm = openai.api_key = settings.openai.api_key + self.logger = logging.getLogger(__name__) + + def get_embedding(self, text: str, model: str = "text-embedding-ada-002") -> models.Embedding | None: + """Get the embedding for a given text.""" + try: + response: dict[str, typing.Any] = openai.Embedding.create( + input=text, + model=model, + ) # type: ignore[reportGeneralTypeIssues] + return models.Embedding(**response["data"][0]["embedding"]) + except openai.error.OpenAIError: + self.logger.exception("Failed to get async embedding for: %s", text) + + async def aget_embedding(self, text: str, model: str = "text-embedding-ada-002") -> models.Embedding | None: + """Get the embedding for a given text.""" + try: + response: dict[str, typing.Any] = await openai.Embedding.acreate( + input=text, + model=model, + ) # type: ignore[reportGeneralTypeIssues] + return models.Embedding(**response["data"][0]["embedding"]) + + except openai.error.OpenAIError: + self.logger.exception("Failed to get async embedding for: %s", text) diff --git a/src/assistant/lib/agent/services.py b/src/assistant/lib/agent/services.py new file mode 100644 index 0000000..83f89a4 --- /dev/null +++ b/src/assistant/lib/agent/services.py @@ -0,0 +1,88 @@ +import asyncio +import logging +import uuid + +import langchain.agents +import langchain.agents.format_scratchpad +import langchain.agents.output_parsers +import langchain.chat_models +import langchain.prompts +import langchain.schema.agent +import langchain.schema.messages +import langchain.tools.render + +import assistant.lib.models.movies as movies +import lib.agent.openai_functions as openai_functions +import lib.app.settings as app_settings + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class AgentService: + def __init__(self, settings: app_settings.Settings, tools: openai_functions.OpenAIFunctions) -> None: + self.settings = settings + self.tools = tools + + async def process_request(self, request: str, chat_history: list[langchain.schema.messages.Message]) -> str: + llm = langchain.chat_models.ChatOpenAI(temperature=0.7, openai_api_key=self.settings.openai.api_key) + tools = [self.tools.get_movie_by_description, self.tools.get_movie_by_id, self.tools.get_similar_movies] + + chat_history = [] + chat_history_name = f"{chat_history=}".partition("=")[0] + prompt = langchain.prompts.ChatPromptTemplate.from_messages( + [ + ( + "system", + "You are very powerful assistant. If you are asked about movies you will you provided functions.", + ), + langchain.prompts.MessagesPlaceholder(variable_name=chat_history_name), + ("user", "{input}"), + langchain.prompts.MessagesPlaceholder(variable_name="agent_scratchpad"), + ] + ) + + llm_with_tools = llm.bind( + functions=[langchain.tools.render.format_tool_to_openai_function(tool) for tool in tools] + ) + + chat_history = [] + + agent = ( + { + "input": lambda _: _["input"], + "agent_scratchpad": lambda _: langchain.agents.format_scratchpad.format_to_openai_functions( + _["intermediate_steps"] + ), + "chat_history": lambda _: _["chat_history"], + } + | prompt + | llm_with_tools + | langchain.agents.output_parsers.OpenAIFunctionsAgentOutputParser() + ) + + agent_executor = langchain.agents.AgentExecutor(agent=agent, tools=tools, verbose=True) + + return await agent_executor.ainvoke({"input": first_question, "chat_history": chat_history}) + + +# async def main(): +# agent_executor = langchain.agents.AgentExecutor(agent=agent, tools=tools, verbose=True) + +# # first_question = "What is the movie where halfling bring the ring to the volcano?" +# first_question = ( +# "What is the movie about a famous country singer meet a talented singer and songwriter who works as a waitress?" +# ) +# second_question = "So what is the rating of the movie? Do you recommend it?" +# third_question = "What are the similar movies?" +# first_result = await agent_executor.ainvoke({"input": first_question, "chat_history": chat_history}) +# chat_history.append(langchain.schema.messages.HumanMessage(content=first_question)) +# chat_history.append(langchain.schema.messages.AIMessage(content=first_result["output"])) +# second_result = await agent_executor.ainvoke({"input": second_question, "chat_history": chat_history}) +# chat_history.append(langchain.schema.messages.HumanMessage(content=second_question)) +# chat_history.append(langchain.schema.messages.AIMessage(content=second_result["output"])) +# final_result = await agent_executor.ainvoke({"input": third_question, "chat_history": chat_history}) + + +# if __name__ == "__main__": +# asyncio.run(main()) diff --git a/src/assistant/lib/api/v1/handlers/__init__.py b/src/assistant/lib/api/v1/handlers/__init__.py index 00d7189..178157c 100644 --- a/src/assistant/lib/api/v1/handlers/__init__.py +++ b/src/assistant/lib/api/v1/handlers/__init__.py @@ -1,7 +1,10 @@ +from .agent import AgentHandler from .health import basic_router from .voice_responce_handler import VoiceResponseHandler + __all__ = [ + "AgentHandler", "VoiceResponseHandler", "basic_router", ] diff --git a/src/assistant/lib/api/v1/handlers/agent.py b/src/assistant/lib/api/v1/handlers/agent.py new file mode 100644 index 0000000..bd584e0 --- /dev/null +++ b/src/assistant/lib/api/v1/handlers/agent.py @@ -0,0 +1,59 @@ +import uuid + +import fastapi + +import lib.agent as agent +import lib.models as models + + +class AgentHandler: + def __init__(self, chat_history_repository: agent.ChatHistoryRepository): + self.chat_history_repository = chat_history_repository + self.router = fastapi.APIRouter() + self.router.add_api_route( + "/", + self.get_agent, + methods=["GET"], + summary="Статус работоспособности", + description="Проверяет доступность сервиса FastAPI.", + ) + self.router.add_api_route( + "/add", + self.add_message, + methods=["GET"], + summary="Статус работоспособности", + description="Проверяет доступность сервиса FastAPI.", + ) + self.router.add_api_route( + "/messages", + self.get_messages, + methods=["GET"], + summary="Статус работоспособности", + description="Проверяет доступность сервиса FastAPI.", + ) + + async def get_agent(self): + request = models.RequestLastSessionId(channel="test", user_id="user_id_1", minutes_ago=3) + response = await self.chat_history_repository.get_last_session_id(request=request) + print("RESPONSE: ", response) + return {"response": response} + + async def add_message(self): + sid: uuid.UUID = uuid.UUID("0cd3c882-affd-4929-aff1-e1724f5b54f2") + import faker + + fake = faker.Faker() + + message = models.RequestChatMessage( + session_id=sid, user_id="user_id_1", channel="test", message={"role": "system", "content": fake.sentence()} + ) + await self.chat_history_repository.add_message(request=message) + return {"response": "ok"} + + async def get_messages(self): + sid: uuid.UUID = uuid.UUID("0cd3c882-affd-4929-aff1-e1724f5b54f2") + + request = models.RequestChatHistory(session_id=sid) + response = await self.chat_history_repository.get_messages_by_sid(request=request) + print("RESPONSE: ", response) + return {"response": response} diff --git a/src/assistant/lib/app/app.py b/src/assistant/lib/app/app.py index 85729cf..a060a20 100644 --- a/src/assistant/lib/app/app.py +++ b/src/assistant/lib/app/app.py @@ -6,6 +6,7 @@ import typing import fastapi import uvicorn +import lib.agent as agent import lib.api.v1.handlers as api_v1_handlers import lib.app.errors as app_errors import lib.app.settings as app_settings @@ -89,6 +90,7 @@ class Application: logger.info("Initializing repositories") stt_repository: stt.STTProtocol = stt.OpenaiSpeechRepository(settings=settings) + chat_history_repository = agent.ChatHistoryRepository(pg_async_session=postgres_client.get_async_session()) tts_yandex_repository = tts.TTSYandexRepository( tts_settings=app_split_settings.TTSYandexSettings(), @@ -120,6 +122,7 @@ class Application: logger.info("Initializing handlers") liveness_probe_handler = api_v1_handlers.basic_router + agent_handler = api_v1_handlers.AgentHandler(chat_history_repository=chat_history_repository).router # TODO: объявить сервисы tts и openai и добавить их в voice_response_handler voice_response_handler = api_v1_handlers.VoiceResponseHandler( @@ -139,6 +142,7 @@ class Application: # Routes fastapi_app.include_router(liveness_probe_handler, prefix="/api/v1/health", tags=["health"]) + fastapi_app.include_router(agent_handler, prefix="/api/v1/agent", tags=["testing"]) fastapi_app.include_router(voice_response_handler, prefix="/api/v1/voice", tags=["voice"]) application = Application( diff --git a/src/assistant/lib/models/__init__.py b/src/assistant/lib/models/__init__.py index d134ff8..3990c57 100644 --- a/src/assistant/lib/models/__init__.py +++ b/src/assistant/lib/models/__init__.py @@ -1,7 +1,11 @@ -from .orm import Base, IdCreatedUpdatedBaseMixin +from .chat_history import Message, RequestChatHistory, RequestChatMessage, RequestLastSessionId +from .embedding import Embedding +from .movies import Movie from .token import Token from .tts import * + +__all__ = ["Embedding", "Message", "Movie", "RequestChatHistory", "RequestChatMessage", "RequestLastSessionId", "Token"] __all__ = [ "AVAILABLE_MODELS_TYPE", "Base", diff --git a/src/assistant/lib/models/chat_history.py b/src/assistant/lib/models/chat_history.py new file mode 100644 index 0000000..41a3dbe --- /dev/null +++ b/src/assistant/lib/models/chat_history.py @@ -0,0 +1,33 @@ +import uuid + +import pydantic + + +class RequestLastSessionId(pydantic.BaseModel): + """Request for a new session ID.""" + + channel: str + user_id: str + minutes_ago: int + + +class RequestChatMessage(pydantic.BaseModel): + """A chat message.""" + + session_id: uuid.UUID + user_id: str + channel: str + message: dict[str, str] + + +class RequestChatHistory(pydantic.BaseModel): + """Request for chat history.""" + + session_id: uuid.UUID + + +class Message(pydantic.BaseModel): + """A chat message.""" + + role: str + content: str diff --git a/src/assistant/lib/models/embedding.py b/src/assistant/lib/models/embedding.py new file mode 100644 index 0000000..3978dc9 --- /dev/null +++ b/src/assistant/lib/models/embedding.py @@ -0,0 +1,5 @@ +import pydantic + + +class Embedding(pydantic.RootModel[list[float]]): + root: list[float] diff --git a/src/assistant/lib/models/movies.py b/src/assistant/lib/models/movies.py new file mode 100644 index 0000000..e432111 --- /dev/null +++ b/src/assistant/lib/models/movies.py @@ -0,0 +1,14 @@ +import datetime +import uuid + +import pydantic + + +class Movie(pydantic.BaseModel): + id: uuid.UUID + title: str + description: str | None = None + rating: float + type: str + created: datetime.datetime + modified: datetime.datetime diff --git a/src/assistant/lib/models/orm/__init__.py b/src/assistant/lib/models/orm/__init__.py deleted file mode 100644 index 0a1fd51..0000000 --- a/src/assistant/lib/models/orm/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .base import Base, IdCreatedUpdatedBaseMixin - -__all__ = ["Base", "IdCreatedUpdatedBaseMixin"] diff --git a/src/assistant/lib/orm_models/__init__.py b/src/assistant/lib/orm_models/__init__.py new file mode 100644 index 0000000..869102e --- /dev/null +++ b/src/assistant/lib/orm_models/__init__.py @@ -0,0 +1,14 @@ +from .base import Base, IdCreatedUpdatedBaseMixin +from .chat_history import ChatHistory +from .movies import FilmWork, Genre, GenreFilmWork, Person, PersonFilmWork + +__all__ = [ + "Base", + "ChatHistory", + "FilmWork", + "Genre", + "GenreFilmWork", + "IdCreatedUpdatedBaseMixin", + "Person", + "PersonFilmWork", +] diff --git a/src/assistant/lib/models/orm/base.py b/src/assistant/lib/orm_models/base.py similarity index 66% rename from src/assistant/lib/models/orm/base.py rename to src/assistant/lib/orm_models/base.py index 3d1ace3..b3929f6 100644 --- a/src/assistant/lib/models/orm/base.py +++ b/src/assistant/lib/orm_models/base.py @@ -16,20 +16,12 @@ class Base(sa_orm.DeclarativeBase): return cls.__name__.lower() __mapper_args__ = {"eager_defaults": True} - - id: sa_orm.Mapped[uuid.UUID] = sa_orm.mapped_column(primary_key=True, default=uuid.uuid4) + __table_args__ = {"schema": "content"} class IdCreatedUpdatedBaseMixin: - # id: sa_orm.Mapped[int] = sa_orm.mapped_column(primary_key=True) - # id_field: sa_orm.Mapped[uuid.UUID] = sa_orm.mapped_column(name="uuid", primary_key=True, unique=True, default=uuid.uuid4, nullable=False) + id: sa_orm.Mapped[uuid.UUID] = sa_orm.mapped_column(primary_key=True, default=uuid.uuid4) created: sa_orm.Mapped[datetime.datetime] = sa_orm.mapped_column(server_default=sa_sql.func.now()) updated: sa_orm.Mapped[datetime.datetime] = sa_orm.mapped_column( server_default=sa_sql.func.now(), onupdate=sa_sql.func.now() ) - - # __mapper_args__ = {"eager_defaults": True} - - # @sqlalchemy.ext.declarative.declared_attr.directive - # def __tablename__(cls) -> str: - # return cls.__name__.lower() diff --git a/src/assistant/lib/orm_models/chat_history.py b/src/assistant/lib/orm_models/chat_history.py new file mode 100644 index 0000000..04742e9 --- /dev/null +++ b/src/assistant/lib/orm_models/chat_history.py @@ -0,0 +1,24 @@ +import datetime +import uuid + +import sqlalchemy as sa +import sqlalchemy.orm as sa_orm +import sqlalchemy.sql as sa_sql + +import lib.orm_models.base as base_models + + +class ChatHistory(base_models.Base): + __tablename__: str = "chat_history" # type: ignore[reportIncompatibleVariableOverride] + + id: sa_orm.Mapped[uuid.UUID] = sa_orm.mapped_column(primary_key=True, default=uuid.uuid4) + session_id: sa_orm.Mapped[uuid.UUID] = sa_orm.mapped_column(nullable=False, unique=True) + channel: sa_orm.Mapped[str] = sa_orm.mapped_column() + user_id: sa_orm.Mapped[str] = sa_orm.mapped_column() + content: sa_orm.Mapped[sa.JSON] = sa_orm.mapped_column(sa.JSON) + created: sa_orm.Mapped[datetime.datetime] = sa_orm.mapped_column( + sa.DateTime(timezone=True), server_default=sa_sql.func.now() + ) + modified: sa_orm.Mapped[datetime.datetime] = sa_orm.mapped_column( + sa.DateTime(timezone=True), server_default=sa_sql.func.now(), onupdate=sa_sql.func.now() + ) diff --git a/src/assistant/lib/orm_models/movies.py b/src/assistant/lib/orm_models/movies.py new file mode 100644 index 0000000..88082e2 --- /dev/null +++ b/src/assistant/lib/orm_models/movies.py @@ -0,0 +1,76 @@ +import datetime +import uuid + +import pgvector.sqlalchemy +import sqlalchemy as sa +import sqlalchemy.orm as sa_orm +import sqlalchemy.sql as sa_sql + +import lib.orm_models.base as base_models + + +class Genre(base_models.Base): + __tablename__: str = "genre" # type: ignore[reportIncompatibleVariableOverride] + + id: sa_orm.Mapped[uuid.UUID] = sa_orm.mapped_column(primary_key=True, default=uuid.uuid4) + name: sa_orm.Mapped[str] = sa_orm.mapped_column() + description: sa_orm.Mapped[str] = sa_orm.mapped_column(nullable=True) + created: sa_orm.Mapped[datetime.datetime] = sa_orm.mapped_column( + sa.DateTime(timezone=True), server_default=sa_sql.func.now() + ) + modified: sa_orm.Mapped[datetime.datetime] = sa_orm.mapped_column( + sa.DateTime(timezone=True), server_default=sa_sql.func.now(), onupdate=sa_sql.func.now() + ) + + +class Person(base_models.Base): + __tablename__: str = "person" # type: ignore[reportIncompatibleVariableOverride] + + id: sa_orm.Mapped[uuid.UUID] = sa_orm.mapped_column(primary_key=True, default=uuid.uuid4) + full_name: sa_orm.Mapped[str] = sa_orm.mapped_column() + created: sa_orm.Mapped[datetime.datetime] = sa_orm.mapped_column( + sa.DateTime(timezone=True), server_default=sa_sql.func.now() + ) + modified: sa_orm.Mapped[datetime.datetime] = sa_orm.mapped_column( + sa.DateTime(timezone=True), server_default=sa_sql.func.now(), onupdate=sa_sql.func.now() + ) + + +class FilmWork(base_models.Base): + __tablename__: str = "film_work" # type: ignore[reportIncompatibleVariableOverride] + + id: sa_orm.Mapped[uuid.UUID] = sa_orm.mapped_column(primary_key=True, default=uuid.uuid4) + title: sa_orm.Mapped[str] = sa_orm.mapped_column() + description: sa_orm.Mapped[str] = sa_orm.mapped_column(nullable=True) + creation_date: sa_orm.Mapped[datetime.datetime] = sa_orm.mapped_column(nullable=True) + file_path: sa_orm.Mapped[str] = sa_orm.mapped_column(nullable=True) + rating: sa_orm.Mapped[float] = sa_orm.mapped_column(nullable=True) + type: sa_orm.Mapped[str] = sa_orm.mapped_column() + created: sa_orm.Mapped[datetime.datetime] = sa_orm.mapped_column( + sa.DateTime(timezone=True), server_default=sa_sql.func.now() + ) + modified: sa_orm.Mapped[datetime.datetime] = sa_orm.mapped_column( + sa.DateTime(timezone=True), server_default=sa_sql.func.now(), onupdate=sa_sql.func.now() + ) + embedding: sa_orm.Mapped[list[float]] = sa_orm.mapped_column(pgvector.sqlalchemy.Vector(1536)) + genres: sa_orm.Mapped[list[Genre]] = sa_orm.relationship(secondary="genre_film_work") + + +GenreFilmWork = sa.Table( + "genre_film_work", + base_models.Base.metadata, + sa.Column("id", sa.UUID, primary_key=True), # type: ignore[reportUnknownVariableType] + sa.Column("genre_id", sa.ForeignKey(Genre.id), primary_key=True), # type: ignore[reportUnknownVariableType] + sa.Column("film_work_id", sa.ForeignKey(FilmWork.id), primary_key=True), # type: ignore[reportUnknownVariableType] + sa.Column("created", sa.DateTime(timezone=True), server_default=sa_sql.func.now()), +) + + +PersonFilmWork = sa.Table( + "person_film_work", + base_models.Base.metadata, + sa.Column("person_id", sa.ForeignKey(Person.id), primary_key=True), # type: ignore[reportUnknownVariableType] + sa.Column("film_work_id", sa.ForeignKey(FilmWork.id), primary_key=True), # type: ignore[reportUnknownVariableType] + sa.Column("role", sa.String(50), nullable=False), + sa.Column("created", sa.DateTime(timezone=True), server_default=sa_sql.func.now()), +) diff --git a/src/assistant/poetry.lock b/src/assistant/poetry.lock index 4b733cb..48b0fb8 100644 --- a/src/assistant/poetry.lock +++ b/src/assistant/poetry.lock @@ -448,6 +448,21 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "dataclasses-json" +version = "0.6.1" +description = "Easily serialize dataclasses to and from JSON." +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "dataclasses_json-0.6.1-py3-none-any.whl", hash = "sha256:1bd8418a61fe3d588bb0079214d7fb71d44937da40742b787256fd53b26b6c80"}, + {file = "dataclasses_json-0.6.1.tar.gz", hash = "sha256:a53c220c35134ce08211a1057fd0e5bf76dc5331627c6b241cacbc570a89faae"}, +] + +[package.dependencies] +marshmallow = ">=3.18.0,<4.0.0" +typing-inspect = ">=0.4.0,<1" + [[package]] name = "dill" version = "0.3.7" @@ -514,6 +529,20 @@ files = [ dnspython = ">=2.0.0" idna = ">=2.0.0" +[[package]] +name = "faker" +version = "19.10.0" +description = "Faker is a Python package that generates fake data for you." +optional = false +python-versions = ">=3.8" +files = [ + {file = "Faker-19.10.0-py3-none-any.whl", hash = "sha256:f321e657ed61616fbfe14dbb9ccc6b2e8282652bbcfcb503c1bd0231ff834df6"}, + {file = "Faker-19.10.0.tar.gz", hash = "sha256:63da90512d0cb3acdb71bd833bb3071cb8a196020d08b8567a01d232954f1820"}, +] + +[package.dependencies] +python-dateutil = ">=2.4" + [[package]] name = "fastapi" version = "0.103.1" @@ -771,6 +800,85 @@ pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib" plugins = ["setuptools"] requirements-deprecated-finder = ["pip-api", "pipreqs"] +[[package]] +name = "jsonpatch" +version = "1.33" +description = "Apply JSON-Patches (RFC 6902)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, + {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"}, +] + +[package.dependencies] +jsonpointer = ">=1.9" + +[[package]] +name = "jsonpointer" +version = "2.4" +description = "Identify specific nodes in a JSON document (RFC 6901)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, + {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, +] + +[[package]] +name = "langchain" +version = "0.0.312" +description = "Building applications with LLMs through composability" +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "langchain-0.0.312-py3-none-any.whl", hash = "sha256:2c7ea6e80195b8747c25ca4b905bd4814f26f47719a27edcb369cb6cd2186df3"}, + {file = "langchain-0.0.312.tar.gz", hash = "sha256:4629233c158f23dcfb0cbc249b27d7d8bde1e71ce1d8972d53ae54c7504fc78a"}, +] + +[package.dependencies] +aiohttp = ">=3.8.3,<4.0.0" +anyio = "<4.0" +dataclasses-json = ">=0.5.7,<0.7" +jsonpatch = ">=1.33,<2.0" +langsmith = ">=0.0.43,<0.1.0" +numpy = ">=1,<2" +pydantic = ">=1,<3" +PyYAML = ">=5.3" +requests = ">=2,<3" +SQLAlchemy = ">=1.4,<3" +tenacity = ">=8.1.0,<9.0.0" + +[package.extras] +all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.10.1,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<4)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.6.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] +azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (>=0,<1)"] +clarifai = ["clarifai (>=9.1.0)"] +cli = ["typer (>=0.9.0,<0.10.0)"] +cohere = ["cohere (>=4,<5)"] +docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"] +embeddings = ["sentence-transformers (>=2,<3)"] +extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "amazon-textract-caller (<2)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "dashvector (>=1.0.1,<2.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (>=0,<1)", "openapi-schema-pydantic (>=1.2,<2.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"] +javascript = ["esprima (>=4.0.1,<5.0.0)"] +llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"] +openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.6.0)"] +qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"] +text-helpers = ["chardet (>=5.1.0,<6.0.0)"] + +[[package]] +name = "langsmith" +version = "0.0.43" +description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "langsmith-0.0.43-py3-none-any.whl", hash = "sha256:27854bebdae6a35c88e1c1172e6abba27592287b70511aca2a953a59fade0e87"}, + {file = "langsmith-0.0.43.tar.gz", hash = "sha256:f7705f13eb8ce3b8eb16c4d2b2760c62cfb9a3b3ab6aa0728afa84d26b2a6e55"}, +] + +[package.dependencies] +pydantic = ">=1,<3" +requests = ">=2,<3" + [[package]] name = "lazy-object-proxy" version = "1.9.0" @@ -894,6 +1002,26 @@ files = [ {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, ] +[[package]] +name = "marshmallow" +version = "3.20.1" +description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +optional = false +python-versions = ">=3.8" +files = [ + {file = "marshmallow-3.20.1-py3-none-any.whl", hash = "sha256:684939db93e80ad3561392f47be0230743131560a41c5110684c16e21ade0a5c"}, + {file = "marshmallow-3.20.1.tar.gz", hash = "sha256:5d2371bbe42000f2b3fb5eaa065224df7d8f8597bc19a1bbfa5bfe7fba8da889"}, +] + +[package.dependencies] +packaging = ">=17.0" + +[package.extras] +dev = ["flake8 (==6.0.0)", "flake8-bugbear (==23.7.10)", "mypy (==1.4.1)", "pre-commit (>=2.4,<4.0)", "pytest", "pytz", "simplejson", "tox"] +docs = ["alabaster (==0.7.13)", "autodocsumm (==0.2.11)", "sphinx (==7.0.1)", "sphinx-issues (==3.0.1)", "sphinx-version-warning (==1.1.2)"] +lint = ["flake8 (==6.0.0)", "flake8-bugbear (==23.7.10)", "mypy (==1.4.1)", "pre-commit (>=2.4,<4.0)"] +tests = ["pytest", "pytz", "simplejson"] + [[package]] name = "mccabe" version = "0.7.0" @@ -1013,6 +1141,40 @@ files = [ [package.dependencies] setuptools = "*" +[[package]] +name = "numpy" +version = "1.25.2" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "numpy-1.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:db3ccc4e37a6873045580d413fe79b68e47a681af8db2e046f1dacfa11f86eb3"}, + {file = "numpy-1.25.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:90319e4f002795ccfc9050110bbbaa16c944b1c37c0baeea43c5fb881693ae1f"}, + {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfe4a913e29b418d096e696ddd422d8a5d13ffba4ea91f9f60440a3b759b0187"}, + {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08f2e037bba04e707eebf4bc934f1972a315c883a9e0ebfa8a7756eabf9e357"}, + {file = "numpy-1.25.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bec1e7213c7cb00d67093247f8c4db156fd03075f49876957dca4711306d39c9"}, + {file = "numpy-1.25.2-cp310-cp310-win32.whl", hash = "sha256:7dc869c0c75988e1c693d0e2d5b26034644399dd929bc049db55395b1379e044"}, + {file = "numpy-1.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:834b386f2b8210dca38c71a6e0f4fd6922f7d3fcff935dbe3a570945acb1b545"}, + {file = "numpy-1.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5462d19336db4560041517dbb7759c21d181a67cb01b36ca109b2ae37d32418"}, + {file = "numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5652ea24d33585ea39eb6a6a15dac87a1206a692719ff45d53c5282e66d4a8f"}, + {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2"}, + {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e7f0f7f6d0eee8364b9a6304c2845b9c491ac706048c7e8cf47b83123b8dbf"}, + {file = "numpy-1.25.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bb33d5a1cf360304754913a350edda36d5b8c5331a8237268c48f91253c3a364"}, + {file = "numpy-1.25.2-cp311-cp311-win32.whl", hash = "sha256:5883c06bb92f2e6c8181df7b39971a5fb436288db58b5a1c3967702d4278691d"}, + {file = "numpy-1.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:5c97325a0ba6f9d041feb9390924614b60b99209a71a69c876f71052521d42a4"}, + {file = "numpy-1.25.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b79e513d7aac42ae918db3ad1341a015488530d0bb2a6abcbdd10a3a829ccfd3"}, + {file = "numpy-1.25.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb942bfb6f84df5ce05dbf4b46673ffed0d3da59f13635ea9b926af3deb76926"}, + {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e0746410e73384e70d286f93abf2520035250aad8c5714240b0492a7302fdca"}, + {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7806500e4f5bdd04095e849265e55de20d8cc4b661b038957354327f6d9b295"}, + {file = "numpy-1.25.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8b77775f4b7df768967a7c8b3567e309f617dd5e99aeb886fa14dc1a0791141f"}, + {file = "numpy-1.25.2-cp39-cp39-win32.whl", hash = "sha256:2792d23d62ec51e50ce4d4b7d73de8f67a2fd3ea710dcbc8563a51a03fb07b01"}, + {file = "numpy-1.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:76b4115d42a7dfc5d485d358728cdd8719be33cc5ec6ec08632a5d6fca2ed380"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a1329e26f46230bf77b02cc19e900db9b52f398d6722ca853349a782d4cff55"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c3abc71e8b6edba80a01a52e66d83c5d14433cbcd26a40c329ec7ed09f37901"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1b9735c27cea5d995496f46a8b1cd7b408b3f34b6d50459d9ac8fe3a20cc17bf"}, + {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"}, +] + [[package]] name = "openai" version = "0.28.1" @@ -1126,6 +1288,19 @@ files = [ {file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"}, ] +[[package]] +name = "pgvector" +version = "0.2.3" +description = "pgvector support for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pgvector-0.2.3-py2.py3-none-any.whl", hash = "sha256:9d53dc01138ecc7c9aca64e4680cfa9edf4c38f9cb8ed7098317871fdd211824"}, +] + +[package.dependencies] +numpy = "*" + [[package]] name = "platformdirs" version = "3.11.0" @@ -1504,6 +1679,18 @@ pluggy = ">=0.12,<2.0" testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] [[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" name = "pytest-asyncio" version = "0.21.1" description = "Pytest support for asyncio" @@ -1595,6 +1782,55 @@ files = [ [package.dependencies] tokenize-rt = ">=5.2.0" +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + [[package]] name = "requests" version = "2.31.0" @@ -1811,6 +2047,20 @@ anyio = ">=3.4.0,<5" [package.extras] full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"] +[[package]] +name = "tenacity" +version = "8.2.3" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, + {file = "tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a"}, +] + +[package.extras] +doc = ["reno", "sphinx", "tornado (>=4.5)"] + [[package]] name = "tokenize-rt" version = "5.2.0" @@ -1878,6 +2128,21 @@ files = [ {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"}, ] +[[package]] +name = "typing-inspect" +version = "0.9.0" +description = "Runtime inspection utilities for typing module." +optional = false +python-versions = "*" +files = [ + {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, + {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, +] + +[package.dependencies] +mypy-extensions = ">=0.3.0" +typing-extensions = ">=3.7.4" + [[package]] name = "urllib3" version = "2.0.6" diff --git a/src/assistant/pyproject.toml b/src/assistant/pyproject.toml index 45753e6..b59205d 100644 --- a/src/assistant/pyproject.toml +++ b/src/assistant/pyproject.toml @@ -23,9 +23,13 @@ version = "0.1.0" alembic = "^1.12.0" asyncpg = "^0.28.0" dill = "^0.3.7" +faker = "^19.10.0" fastapi = "0.103.1" greenlet = "^2.0.2" httpx = "^0.25.0" +langchain = "^0.0.312" +openai = "^0.28.1" +pgvector = "^0.2.3" multidict = "^6.0.4" openai = "^0.28.1" orjson = "3.9.7" @@ -94,8 +98,9 @@ variable-rgx = "^_{0,2}[a-z][a-z0-9_]*$" [tool.pyright] exclude = [ + ".venv", + "alembic" ".pytest_cache", - ".venv" ] pythonPlatform = "All" pythonVersion = "3.11"