Skip to content

Commit

Permalink
feat: 🎸 rag
Browse files Browse the repository at this point in the history
now works with 30 chunks
  • Loading branch information
StanGirard committed Jan 26, 2024
1 parent 67c71bb commit e7bd571
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 10 deletions.
2 changes: 1 addition & 1 deletion backend/llm/knowledge_brain_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class Config:
temperature: float = 0.1
chat_id: str = None # pyright: ignore reportPrivateUsage=none
brain_id: str # pyright: ignore reportPrivateUsage=none
max_tokens: int = 256
max_tokens: int = 2000
streaming: bool = False
knowledge_qa: Optional[RAGInterface]
metadata: Optional[dict] = None
Expand Down
4 changes: 3 additions & 1 deletion backend/llm/rags/quivr_rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class Config:
temperature: float = 0.1
chat_id: str = None # pyright: ignore reportPrivateUsage=none
brain_id: str = None # pyright: ignore reportPrivateUsage=none
max_tokens: int = 256
max_tokens: int = 2000
streaming: bool = False

@property
Expand Down Expand Up @@ -91,6 +91,7 @@ def __init__(
chat_id: str,
streaming: bool = False,
prompt_id: Optional[UUID] = None,
max_tokens: int = 2000,
**kwargs,
):
super().__init__(
Expand All @@ -103,6 +104,7 @@ def __init__(
self.supabase_client = self._create_supabase_client()
self.vector_store = self._create_vector_store()
self.prompt_id = prompt_id
self.max_tokens = max_tokens

def _create_supabase_client(self) -> Client:
return create_client(
Expand Down
4 changes: 1 addition & 3 deletions backend/models/databases/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,7 @@ def get_vectors_by_file_name(self, file_name: str):
pass

@abstractmethod
def similarity_search(
self, query_embedding, table: str, top_k: int, threshold: float
):
def similarity_search(self, query_embedding, table: str, k: int, threshold: float):
pass

@abstractmethod
Expand Down
4 changes: 2 additions & 2 deletions backend/models/databases/supabase/vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ def get_vectors_by_file_sha1(self, file_sha1):
return response

# TODO: remove duplicate similarity_search in supabase vector store
def similarity_search(self, query_embedding, table, top_k, threshold):
def similarity_search(self, query_embedding, table, k, threshold):
response = self.db.rpc(
table,
{
"query_embedding": query_embedding,
"match_count": top_k,
"match_count": k,
"match_threshold": threshold,
},
).execute()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def get_question_context_from_brain(brain_id: UUID, question: str) -> str:
embeddings,
table_name="vectors",
brain_id=str(brain_id),
number_docs=20,
)
documents = vector_store.similarity_search(question, k=20, threshold=0.8)

Expand Down
9 changes: 6 additions & 3 deletions backend/vectorstore/supabase.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):

brain_id: str = "none"
user_id: str = "none"
number_docs: int = 35

def __init__(
self,
Expand All @@ -22,10 +23,12 @@ def __init__(
table_name: str,
brain_id: str = "none",
user_id: str = "none",
number_docs: int = 35,
):
super().__init__(client, embedding, table_name)
self.brain_id = brain_id
self.user_id = user_id
self.number_docs = number_docs

def find_brain_closest_query(
self,
Expand All @@ -42,7 +45,7 @@ def find_brain_closest_query(
table,
{
"query_embedding": query_embedding,
"match_count": k,
"match_count": self.number_docs,
"p_user_id": str(self.user_id),
},
).execute()
Expand All @@ -62,7 +65,7 @@ def find_brain_closest_query(
def similarity_search(
self,
query: str,
k: int = 6,
k: int = 35,
table: str = "match_vectors",
threshold: float = 0.5,
**kwargs: Any,
Expand All @@ -73,7 +76,7 @@ def similarity_search(
table,
{
"query_embedding": query_embedding,
"match_count": k,
"match_count": self.number_docs,
"p_brain_id": str(self.brain_id),
},
).execute()
Expand Down

0 comments on commit e7bd571

Please sign in to comment.