Skip to content

Commit

Permalink
feat: websearch, tool use, user intent, dynamic retrieval, multiple q…
Browse files Browse the repository at this point in the history
…uestions (#3424)

# Description

This PR includes far too many new features:

- detection of user intent (closes CORE-211)
- treating multiple questions in parallel (closes CORE-212)
- using the chat history when answering a question (closes CORE-213)
- filtering of retrieved chunks by relevance threshold (closes CORE-217)
- dynamic retrieval of chunks (closes CORE-218)
- enabling web search via Tavily (closes CORE-220)
- enabling agent / assistant to activate tools when relevant to complete
the user task (closes CORE-224)

Also closes CORE-205

## Checklist before requesting a review

Please delete options that are not relevant.

- [ ] My code follows the style guidelines of this project
- [ ] I have performed a self-review of my code
- [ ] I have commented hard-to-understand areas
- [ ] I have ideally added tests that prove my fix is effective or that
my feature works
- [ ] New and existing unit tests pass locally with my changes
- [ ] Any dependent changes have been merged

## Screenshots (if appropriate):

---------

Co-authored-by: Stan Girard <[email protected]>
  • Loading branch information
jacopo-chevallard and StanGirard authored Oct 31, 2024
1 parent 5401c01 commit 285fe5b
Show file tree
Hide file tree
Showing 43 changed files with 2,165 additions and 1,452 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/backend-core-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,4 @@ jobs:
sudo apt-get update
sudo apt-get install -y libmagic-dev poppler-utils libreoffice tesseract-ocr pandoc
cd core
rye run python -c "from unstructured.nlp.tokenize import download_nltk_packages; download_nltk_packages()"
rye run python -c "import nltk;nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger_eng')"
rye test -p quivr-core
3 changes: 2 additions & 1 deletion core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dependencies = [
"pydantic>=2.8.2",
"langchain-core>=0.2.38",
"langchain>=0.2.14,<0.3.0",
"langgraph>=0.2.14",
"langgraph>=0.2.38",
"httpx>=0.27.0",
"rich>=13.7.1",
"tiktoken>=0.7.0",
Expand All @@ -21,6 +21,7 @@ dependencies = [
"types-pyyaml>=6.0.12.20240808",
"transformers[sentencepiece]>=4.44.2",
"faiss-cpu>=1.8.0.post1",
"rapidfuzz>=3.10.1",
]
readme = "README.md"
requires-python = ">= 3.11"
Expand Down
165 changes: 72 additions & 93 deletions core/quivr_core/brain/brain.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
from langchain_core.embeddings import Embeddings
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.vectorstores import VectorStore
from quivr_core.rag.entities.models import ParsedRAGResponse
from langchain_openai import OpenAIEmbeddings
from quivr_core.rag.quivr_rag import QuivrQARAG
from rich.console import Console
from rich.panel import Panel

Expand All @@ -22,19 +24,17 @@
LocalStorageConfig,
TransparentStorageConfig,
)
from quivr_core.chat import ChatHistory
from quivr_core.config import RetrievalConfig
from quivr_core.rag.entities.chat import ChatHistory
from quivr_core.rag.entities.config import RetrievalConfig
from quivr_core.files.file import load_qfile
from quivr_core.llm import LLMEndpoint
from quivr_core.models import (
from quivr_core.rag.entities.models import (
ParsedRAGChunkResponse,
ParsedRAGResponse,
QuivrKnowledge,
SearchResult,
)
from quivr_core.processor.registry import get_processor_class
from quivr_core.quivr_rag import QuivrQARAG
from quivr_core.quivr_rag_langgraph import QuivrQARAGLangGraph
from quivr_core.rag.quivr_rag_langgraph import QuivrQARAGLangGraph
from quivr_core.storage.local_storage import LocalStorage, TransparentStorage
from quivr_core.storage.storage_base import StorageBase

Expand All @@ -49,19 +49,15 @@ async def process_files(
"""
Process files in storage.
This function takes a StorageBase and return a list of langchain documents.
Args:
storage (StorageBase): The storage containing the files to process.
skip_file_error (bool): Whether to skip files that cannot be processed.
processor_kwargs (dict[str, Any]): Additional arguments for the processor.
Returns:
list[Document]: List of processed documents in the Langchain Document format.
Raises:
ValueError: If a file cannot be processed and skip_file_error is False.
Exception: If no processor is found for a file of a specific type and skip_file_error is False.
"""

knowledge = []
Expand Down Expand Up @@ -91,40 +87,32 @@ async def process_files(
class Brain:
"""
A class representing a Brain.
This class allows for the creation of a Brain, which is a collection of knowledge one wants to retrieve information from.
A Brain is set to:
* Store files in the storage of your choice (local, S3, etc.)
* Process the files in the storage to extract text and metadata in a wide range of format.
* Store the processed files in the vector store of your choice (FAISS, PGVector, etc.) - default to FAISS.
* Create an index of the processed files.
* Use the *Quivr* workflow for the retrieval augmented generation.
A Brain is able to:
* Search for information in the vector store.
* Answer questions about the knowledges in the Brain.
* Stream the answer to the question.
Attributes:
name (str): The name of the brain.
id (UUID): The unique identifier of the brain.
storage (StorageBase): The storage used to store the files.
llm (LLMEndpoint): The language model used to generate the answer.
vector_db (VectorStore): The vector store used to store the processed files.
embedder (Embeddings): The embeddings used to create the index of the processed files.
"""

def __init__(
self,
*,
name: str,
id: UUID,
llm: LLMEndpoint,
id: UUID | None = None,
vector_db: VectorStore | None = None,
embedder: Embeddings | None = None,
storage: StorageBase | None = None,
Expand Down Expand Up @@ -156,19 +144,15 @@ def print_info(self):
def load(cls, folder_path: str | Path) -> Self:
"""
Load a brain from a folder path.
Args:
folder_path (str | Path): The path to the folder containing the brain.
Returns:
Brain: The brain loaded from the folder path.
Example:
```python
brain_loaded = Brain.load("path/to/brain")
brain_loaded.print_info()
```
"""
if isinstance(folder_path, str):
folder_path = Path(folder_path)
Expand Down Expand Up @@ -217,16 +201,13 @@ def load(cls, folder_path: str | Path) -> Self:
vector_db=vector_db,
)

async def save(self, folder_path: str | Path) -> str:
async def save(self, folder_path: str | Path):
"""
Save the brain to a folder path.
Args:
folder_path (str | Path): The path to the folder where the brain will be saved.
Returns:
str: The path to the folder where the brain was saved.
Example:
```python
await brain.save("path/to/brain")
Expand Down Expand Up @@ -324,10 +305,9 @@ async def afrom_files(
embedder: Embeddings | None = None,
skip_file_error: bool = False,
processor_kwargs: dict[str, Any] | None = None,
) -> Self:
):
"""
Create a brain from a list of file paths.
Args:
name (str): The name of the brain.
file_paths (list[str | Path]): The list of file paths to add to the brain.
Expand All @@ -337,10 +317,8 @@ async def afrom_files(
embedder (Embeddings | None): The embeddings used to create the index of the processed files.
skip_file_error (bool): Whether to skip files that cannot be processed.
processor_kwargs (dict[str, Any] | None): Additional arguments for the processor.
Returns:
Brain: The brain created from the file paths.
Example:
```python
brain = await Brain.afrom_files(name="My Brain", file_paths=["file1.pdf", "file2.pdf"])
Expand Down Expand Up @@ -429,18 +407,15 @@ async def afrom_langchain_documents(
) -> Self:
"""
Create a brain from a list of langchain documents.
Args:
name (str): The name of the brain.
langchain_documents (list[Document]): The list of langchain documents to add to the brain.
vector_db (VectorStore | None): The vector store used to store the processed files.
storage (StorageBase): The storage used to store the files.
llm (LLMEndpoint | None): The language model used to generate the answer.
embedder (Embeddings | None): The embeddings used to create the index of the processed files.
Returns:
Brain: The brain created from the langchain documents.
Example:
```python
from langchain_core.documents import Document
Expand All @@ -449,6 +424,7 @@ async def afrom_langchain_documents(
brain.print_info()
```
"""

if llm is None:
llm = default_llm()

Expand Down Expand Up @@ -481,16 +457,13 @@ async def asearch(
) -> list[SearchResult]:
"""
Search for relevant documents in the brain based on a query.
Args:
query (str | Document): The query to search for.
n_results (int): The number of results to return.
filter (Callable | Dict[str, Any] | None): The filter to apply to the search.
fetch_n_neighbors (int): The number of neighbors to fetch.
Returns:
list[SearchResult]: The list of retrieved chunks.
Example:
```python
brain = Brain.from_files(name="My Brain", file_paths=["file1.pdf", "file2.pdf"])
Expand All @@ -517,57 +490,6 @@ def add_file(self) -> None:
# add it to vectorstore
raise NotImplementedError

def ask(
self,
question: str,
retrieval_config: RetrievalConfig | None = None,
rag_pipeline: Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None = None,
list_files: list[QuivrKnowledge] | None = None,
chat_history: ChatHistory | None = None,
) -> ParsedRAGResponse:
"""
Ask a question to the brain and get a generated answer.
Args:
question (str): The question to ask.
retrieval_config (RetrievalConfig | None): The retrieval configuration (see RetrievalConfig docs).
rag_pipeline (Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None): The RAG pipeline to use.
list_files (list[QuivrKnowledge] | None): The list of files to include in the RAG pipeline.
chat_history (ChatHistory | None): The chat history to use.
Returns:
ParsedRAGResponse: The generated answer.
Example:
```python
brain = Brain.from_files(name="My Brain", file_paths=["file1.pdf", "file2.pdf"])
answer = brain.ask("What is the meaning of life?")
print(answer.answer)
```
"""
async def collect_streamed_response():
full_answer = ""
async for response in self.ask_streaming(
question=question,
retrieval_config=retrieval_config,
rag_pipeline=rag_pipeline,
list_files=list_files,
chat_history=chat_history
):
full_answer += response.answer
return full_answer

# Run the async function in the event loop
loop = asyncio.get_event_loop()
full_answer = loop.run_until_complete(collect_streamed_response())

chat_history = self.default_chat if chat_history is None else chat_history
chat_history.append(HumanMessage(content=question))
chat_history.append(AIMessage(content=full_answer))

# Return the final response
return ParsedRAGResponse(answer=full_answer)

async def ask_streaming(
self,
question: str,
Expand All @@ -578,24 +500,20 @@ async def ask_streaming(
) -> AsyncGenerator[ParsedRAGChunkResponse, ParsedRAGChunkResponse]:
"""
Ask a question to the brain and get a streamed generated answer.
Args:
question (str): The question to ask.
retrieval_config (RetrievalConfig | None): The retrieval configuration (see RetrievalConfig docs).
rag_pipeline (Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None): The RAG pipeline to use.
list_files (list[QuivrKnowledge] | None): The list of files to include in the RAG pipeline.
list_files (list[QuivrKnowledge] | None): The list of files to include in the RAG pipeline.
chat_history (ChatHistory | None): The chat history to use.
Returns:
AsyncGenerator[ParsedRAGChunkResponse, ParsedRAGChunkResponse]: The streamed generated answer.
Example:
```python
brain = Brain.from_files(name="My Brain", file_paths=["file1.pdf", "file2.pdf"])
async for chunk in brain.ask_streaming("What is the meaning of life?"):
print(chunk.answer)
```
"""
llm = self.llm

Expand Down Expand Up @@ -630,3 +548,64 @@ async def ask_streaming(
chat_history.append(AIMessage(content=full_answer))
yield response

async def aask(
self,
question: str,
retrieval_config: RetrievalConfig | None = None,
rag_pipeline: Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None = None,
list_files: list[QuivrKnowledge] | None = None,
chat_history: ChatHistory | None = None,
) -> ParsedRAGResponse:
"""
Synchronous version that asks a question to the brain and gets a generated answer.
Args:
question (str): The question to ask.
retrieval_config (RetrievalConfig | None): The retrieval configuration (see RetrievalConfig docs).
rag_pipeline (Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None): The RAG pipeline to use.
list_files (list[QuivrKnowledge] | None): The list of files to include in the RAG pipeline.
chat_history (ChatHistory | None): The chat history to use.
Returns:
ParsedRAGResponse: The generated answer.
"""
full_answer = ""

async for response in self.ask_streaming(
question=question,
retrieval_config=retrieval_config,
rag_pipeline=rag_pipeline,
list_files=list_files,
chat_history=chat_history,
):
full_answer += response.answer

return ParsedRAGResponse(answer=full_answer)

def ask(
self,
question: str,
retrieval_config: RetrievalConfig | None = None,
rag_pipeline: Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None = None,
list_files: list[QuivrKnowledge] | None = None,
chat_history: ChatHistory | None = None,
) -> ParsedRAGResponse:
"""
Fully synchronous version that asks a question to the brain and gets a generated answer.
Args:
question (str): The question to ask.
retrieval_config (RetrievalConfig | None): The retrieval configuration (see RetrievalConfig docs).
rag_pipeline (Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None): The RAG pipeline to use.
list_files (list[QuivrKnowledge] | None): The list of files to include in the RAG pipeline.
chat_history (ChatHistory | None): The chat history to use.
Returns:
ParsedRAGResponse: The generated answer.
"""
loop = asyncio.get_event_loop()
return loop.run_until_complete(
self.aask(
question=question,
retrieval_config=retrieval_config,
rag_pipeline=rag_pipeline,
list_files=list_files,
chat_history=chat_history,
)
)
6 changes: 4 additions & 2 deletions core/quivr_core/brain/brain_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from langchain_core.embeddings import Embeddings
from langchain_core.vectorstores import VectorStore

from quivr_core.config import LLMEndpointConfig
from quivr_core.rag.entities.config import DefaultModelSuppliers, LLMEndpointConfig
from quivr_core.llm import LLMEndpoint

logger = logging.getLogger("quivr_core")
Expand Down Expand Up @@ -46,7 +46,9 @@ def default_embedder() -> Embeddings:
def default_llm() -> LLMEndpoint:
try:
logger.debug("Loaded ChatOpenAI as default LLM for brain")
llm = LLMEndpoint.from_config(LLMEndpointConfig())
llm = LLMEndpoint.from_config(
LLMEndpointConfig(supplier=DefaultModelSuppliers.OPENAI, model="gpt-4o")
)
return llm

except ImportError as e:
Expand Down
Loading

0 comments on commit 285fe5b

Please sign in to comment.