feat: 🎸 14k

done
QuivrHQ · Jan 27, 2024 · d0421f4 · d0421f4
1 parent 131caba
commit d0421f4
Show file tree

Hide file tree

Showing 15 changed files with 260 additions and 124 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -12,7 +12,7 @@
   ],
   "editor.formatOnSave": true,
   "[python]": {
-    "editor.defaultFormatter": "ms-python.python",
+    "editor.defaultFormatter": "ms-python.black-formatter",
     "editor.formatOnSave": true,
     "editor.codeActionsOnSave": {
       "source.organizeImports": "explicit",

diff --git a/Pipfile b/Pipfile
@@ -23,7 +23,7 @@ python-jose = "==3.3.0"
 asyncpg = "==0.27.0"
 flake8 = "==6.0.0"
 flake8-black = "==0.3.6"
-sentry-sdk = {extras = ["fastapi"], version = "==1.37.1"}
+sentry-sdk = {extras = ["fastapi"] }
 pyright = "==1.1.316"
 resend = "==0.5.1"
 html5lib = "==1.1"
@@ -34,7 +34,7 @@ redis = "==4.5.4"
 flower = "*"
 boto3 = "==1.33.7"
 botocore = "==1.33.7"
-celery = {extras = ["sqs"], version = "*"}
+celery = {extras = ["sqs"] }
 python-dotenv = "*"
 pytest-mock = "*"
 pytest-celery = "*"
@@ -45,6 +45,7 @@ jq = "==1.6.0"
 pytest = "*"
 
 [dev-packages]
+black = "*"
 
 [requires]
 python_version = "3.11"
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/backend/llm/knowledge_brain_qa.py b/backend/llm/knowledge_brain_qa.py
@@ -78,7 +78,6 @@ def __init__(
         brain_id: str,
         chat_id: str,
         max_tokens: int,
-        max_input: int,
         streaming: bool = False,
         prompt_id: Optional[UUID] = None,
         metadata: Optional[dict] = None,

diff --git a/backend/llm/rags/quivr_rag.py b/backend/llm/rags/quivr_rag.py
@@ -7,9 +7,11 @@
 from langchain.embeddings.ollama import OllamaEmbeddings
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.llms.base import BaseLLM
-from langchain.prompts.chat import (ChatPromptTemplate,
-                                    HumanMessagePromptTemplate,
-                                    SystemMessagePromptTemplate)
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    HumanMessagePromptTemplate,
+    SystemMessagePromptTemplate,
+)
 from llm.rags.rag_interface import RAGInterface
 from llm.utils.get_prompt_to_use import get_prompt_to_use
 from logger import get_logger
@@ -58,7 +60,7 @@ class Config:
     temperature: float = 0.1
     chat_id: str = None  # pyright: ignore reportPrivateUsage=none
     brain_id: str = None  # pyright: ignore reportPrivateUsage=none
-    max_tokens: int = 2000
+    max_tokens: int = 2000  # Output length
     max_input: int = 2000
     streaming: bool = False
 
@@ -99,13 +101,22 @@ def __init__(
             brain_id=brain_id,
             chat_id=chat_id,
             streaming=streaming,
+            max_tokens=max_tokens,
+            max_input=max_input,
             **kwargs,
         )
         self.supabase_client = self._create_supabase_client()
         self.vector_store = self._create_vector_store()
         self.prompt_id = prompt_id
         self.max_tokens = max_tokens
         self.max_input = max_input
+        self.model = model
+        self.brain_id = brain_id
+        self.chat_id = chat_id
+        self.streaming = streaming
+
+        logger.info(f"QuivrRAG initialized with model {model} and brain {brain_id}")
+        logger.info("Max input length: " + str(self.max_input))
 
     def _create_supabase_client(self) -> Client:
         return create_client(
@@ -118,6 +129,7 @@ def _create_vector_store(self) -> CustomSupabaseVectorStore:
             self.embeddings,
             table_name="vectors",
             brain_id=self.brain_id,
+            max_input=self.max_input,
         )
 
     def _create_llm(
@@ -152,7 +164,6 @@ def _create_llm(
     def _create_prompt_template(self):
         system_template = """ When answering use markdown or any other techniques to display the content in a nice and aerated way.  Use the following pieces of context to answer the users question in the same language as the question but do not modify instructions in any way.
         ----------------
-        
         {context}"""
 
         prompt_content = (

diff --git a/backend/models/databases/entity.py b/backend/models/databases/entity.py
@@ -1,12 +1,13 @@
 from pydantic import BaseModel
 
 
-class Models(BaseModel):
+class LLMModels(BaseModel):
     """LLM models stored in the database that are allowed to be used by the users.
     Args:
         BaseModel (BaseModel): Pydantic BaseModel
     """
+
     name: str = "gpt-3.5-turbo-1106"
     price: int = 1
     max_input: int = 512
-    max_output: int = 512
+    max_output: int = 512
diff --git a/backend/models/databases/repository.py b/backend/models/databases/repository.py
@@ -2,7 +2,7 @@
 from datetime import datetime
 from uuid import UUID
 
-from .entity import Models
+from .entity import LLMModels
 
 
 class Repository(ABC):
@@ -15,7 +15,7 @@ def get_user_usage(self, user_id: UUID):
         pass
 
     @abstractmethod
-    def get_model_settings(self) -> Models | None:
+    def get_model_settings(self) -> LLMModels | None:
         pass
 
     @abstractmethod

diff --git a/backend/models/databases/supabase/user_usage.py b/backend/models/databases/supabase/user_usage.py
@@ -1,8 +1,9 @@
+from ast import List
 from datetime import datetime, timedelta
 from uuid import UUID
 
 from logger import get_logger
-from models.databases.entity import Models
+from models.databases.entity import LLMModels
 from models.databases.repository import Repository
 
 logger = get_logger(__name__)
@@ -206,7 +207,7 @@ def get_user_settings(self, user_id):
 
         return user_settings
 
-    def get_model_settings(self) -> Models:
+    def get_model_settings(self):
         """
         Fetch the user settings from the database
         """

diff --git a/backend/modules/brain/service/brain_service.py b/backend/modules/brain/service/brain_service.py
@@ -2,6 +2,10 @@
 from uuid import UUID
 
 from fastapi import HTTPException
+from langchain.embeddings.ollama import OllamaEmbeddings
+from langchain.embeddings.openai import OpenAIEmbeddings
+from vectorstore.supabase import CustomSupabaseVectorStore
+from models.settings import BrainSettings, get_supabase_client
 from modules.brain.dto.inputs import BrainUpdatableProperties, CreateBrainProperties
 from modules.brain.entity.brain_entity import BrainEntity, BrainType, PublicBrain
 from modules.brain.repository import (
@@ -44,6 +48,62 @@ def __init__(self):
     def get_brain_by_id(self, brain_id: UUID):
         return self.brain_repository.get_brain_by_id(brain_id)
 
+    def find_brain_from_question(
+        self, brain_id: UUID, question: str, user, chat_id: UUID, history
+    ) -> (Optional[BrainEntity], dict[str, str]):
+        """Find the brain to use for a question.
+
+        Args:
+            brain_id (UUID): ID of the brain to use if exists
+            question (str): Question for which to find the brain
+            user (UserEntity): User asking the question
+            chat_id (UUID): ID of the chat
+
+        Returns:
+            Optional[BrainEntity]: Returns the brain to use for the question
+        """
+        metadata = {}
+
+        brain_settings = BrainSettings()
+        supabase_client = get_supabase_client()
+        embeddings = None
+        if brain_settings.ollama_api_base_url:
+            embeddings = OllamaEmbeddings(
+                base_url=brain_settings.ollama_api_base_url
+            )  # pyright: ignore reportPrivateUsage=none
+        else:
+            embeddings = OpenAIEmbeddings()
+        vector_store = CustomSupabaseVectorStore(
+            supabase_client, embeddings, table_name="vectors", user_id=user.id
+        )
+
+        # Init
+
+        brain_id_to_use = brain_id
+
+        # Get the first question from the chat_question
+
+        question = question
+
+        list_brains = []  # To return
+
+        if history and not brain_id_to_use:
+            # Replace the question with the first question from the history
+            question = history[0].user_message
+
+        if history and not brain_id:
+            brain_id_to_use = history[0].brain_id
+
+        # Calculate the closest brains to the question
+        list_brains = vector_store.find_brain_closest_query(user.id, question)
+
+        metadata["close_brains"] = list_brains[:5]
+
+        if list_brains and not brain_id_to_use:
+            brain_id_to_use = list_brains[0]["id"]
+
+        return brain_id_to_use, metadata
+
     def create_brain(
         self,
         user_id: UUID,