Skip to content

Commit

Permalink
Merge branch 'main' into feat/remove_Pydantic_V1
Browse files Browse the repository at this point in the history
  • Loading branch information
jacopo-chevallard committed Dec 16, 2024
2 parents 7e34d67 + 09b4811 commit d51a0c1
Show file tree
Hide file tree
Showing 8 changed files with 156 additions and 50 deletions.
2 changes: 1 addition & 1 deletion .release-please-manifest.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"core": "0.0.25"
"core": "0.0.27"
}
25 changes: 25 additions & 0 deletions core/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,30 @@
# Changelog

## [0.0.27](https://github.com/QuivrHQ/quivr/compare/core-0.0.26...core-0.0.27) (2024-12-16)


### Features

* ensuring that max_context_tokens is never larger than what supported by models ([#3519](https://github.com/QuivrHQ/quivr/issues/3519)) ([d6e0ed4](https://github.com/QuivrHQ/quivr/commit/d6e0ed44df0ee7edafea85f704a15fd99969bafd))
* send all to megaparse_sdk ([#3521](https://github.com/QuivrHQ/quivr/issues/3521)) ([e48044d](https://github.com/QuivrHQ/quivr/commit/e48044d36ffda613f65da24641ed8da290195177))


### Bug Fixes

* fixing errors arising when the user input contains no tasks ([#3525](https://github.com/QuivrHQ/quivr/issues/3525)) ([e28f7bc](https://github.com/QuivrHQ/quivr/commit/e28f7bcb9ab9534bc011664525ae1f9c2cf6393e))

## [0.0.26](https://github.com/QuivrHQ/quivr/compare/core-0.0.25...core-0.0.26) (2024-12-10)


### Features

* first version (V0) of the Workflow Management System ([#3493](https://github.com/QuivrHQ/quivr/issues/3493)) ([6450a49](https://github.com/QuivrHQ/quivr/commit/6450a494e3efa8e8c267ca49aa0a7ec682586b4e))


### Bug Fixes

* dealing with empty tool_calls ([#3514](https://github.com/QuivrHQ/quivr/issues/3514)) ([e2f6389](https://github.com/QuivrHQ/quivr/commit/e2f6389189d911a382b2236ab39f28a1270528ac))

## [0.0.25](https://github.com/QuivrHQ/quivr/compare/core-0.0.24...core-0.0.25) (2024-11-28)


Expand Down
8 changes: 3 additions & 5 deletions core/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
[project]
name = "quivr-core"
version = "0.0.25"
version = "0.0.27"
description = "Quivr core RAG package"
authors = [
{ name = "Stan Girard", email = "[email protected]" }
]
authors = [{ name = "Stan Girard", email = "[email protected]" }]
dependencies = [
"pydantic>=2.8.2",
"langchain-core>=0.3,<0.4",
Expand All @@ -23,7 +21,7 @@ dependencies = [
"faiss-cpu>=1.8.0.post1",
"rapidfuzz>=3.10.1",
"markupsafe>=2.1.5",
"megaparse-sdk==0.1.7",
"megaparse-sdk>=0.1.9",
"langchain-mistralai>=0.2.3",
]
readme = "README.md"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class MegaparseProcessor(ProcessorBase):
"""

supported_extensions = [
FileExtension.txt,
FileExtension.pdf,
FileExtension.docx,
FileExtension.doc,
Expand All @@ -42,11 +43,9 @@ class MegaparseProcessor(ProcessorBase):
FileExtension.bib,
FileExtension.odt,
FileExtension.html,
FileExtension.py,
FileExtension.markdown,
FileExtension.md,
FileExtension.mdx,
FileExtension.ipynb,
]

def __init__(
Expand Down
10 changes: 9 additions & 1 deletion core/quivr_core/processor/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,21 @@ def defaults_to_proc_entries(
_append_proc_mapping(
mapping=base_processors,
file_exts=[
FileExtension.txt,
FileExtension.pdf,
FileExtension.xls,
FileExtension.docx,
FileExtension.doc,
FileExtension.pptx,
FileExtension.xls,
FileExtension.xlsx,
FileExtension.csv,
FileExtension.epub,
FileExtension.bib,
FileExtension.odt,
FileExtension.html,
FileExtension.markdown,
FileExtension.md,
FileExtension.mdx,
],
cls_mod="quivr_core.processor.implementations.megaparse_processor.MegaparseProcessor",
errtxt=f"can't import MegaparseProcessor. Please install quivr-core[{ext_str}] to access MegaparseProcessor",
Expand Down
133 changes: 100 additions & 33 deletions core/quivr_core/rag/entities/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,89 +75,139 @@ class DefaultModelSuppliers(str, Enum):


class LLMConfig(QuivrBaseConfig):
context: int | None = None
max_context_tokens: int | None = None
max_output_tokens: int | None = None
tokenizer_hub: str | None = None


class LLMModelConfig:
_model_defaults: Dict[DefaultModelSuppliers, Dict[str, LLMConfig]] = {
DefaultModelSuppliers.OPENAI: {
"gpt-4o": LLMConfig(context=128000, tokenizer_hub="Xenova/gpt-4o"),
"gpt-4o-mini": LLMConfig(context=128000, tokenizer_hub="Xenova/gpt-4o"),
"gpt-4-turbo": LLMConfig(context=128000, tokenizer_hub="Xenova/gpt-4"),
"gpt-4": LLMConfig(context=8192, tokenizer_hub="Xenova/gpt-4"),
"gpt-4o": LLMConfig(
max_context_tokens=128000,
max_output_tokens=16384,
tokenizer_hub="Xenova/gpt-4o",
),
"gpt-4o-mini": LLMConfig(
max_context_tokens=128000,
max_output_tokens=16384,
tokenizer_hub="Xenova/gpt-4o",
),
"gpt-4-turbo": LLMConfig(
max_context_tokens=128000,
max_output_tokens=4096,
tokenizer_hub="Xenova/gpt-4",
),
"gpt-4": LLMConfig(
max_context_tokens=8192,
max_output_tokens=8192,
tokenizer_hub="Xenova/gpt-4",
),
"gpt-3.5-turbo": LLMConfig(
context=16385, tokenizer_hub="Xenova/gpt-3.5-turbo"
max_context_tokens=16385,
max_output_tokens=4096,
tokenizer_hub="Xenova/gpt-3.5-turbo",
),
"text-embedding-3-large": LLMConfig(
context=8191, tokenizer_hub="Xenova/text-embedding-ada-002"
max_context_tokens=8191, tokenizer_hub="Xenova/text-embedding-ada-002"
),
"text-embedding-3-small": LLMConfig(
context=8191, tokenizer_hub="Xenova/text-embedding-ada-002"
max_context_tokens=8191, tokenizer_hub="Xenova/text-embedding-ada-002"
),
"text-embedding-ada-002": LLMConfig(
context=8191, tokenizer_hub="Xenova/text-embedding-ada-002"
max_context_tokens=8191, tokenizer_hub="Xenova/text-embedding-ada-002"
),
},
DefaultModelSuppliers.ANTHROPIC: {
"claude-3-5-sonnet": LLMConfig(
context=200000, tokenizer_hub="Xenova/claude-tokenizer"
max_context_tokens=200000,
max_output_tokens=8192,
tokenizer_hub="Xenova/claude-tokenizer",
),
"claude-3-opus": LLMConfig(
context=200000, tokenizer_hub="Xenova/claude-tokenizer"
max_context_tokens=200000,
max_output_tokens=4096,
tokenizer_hub="Xenova/claude-tokenizer",
),
"claude-3-sonnet": LLMConfig(
context=200000, tokenizer_hub="Xenova/claude-tokenizer"
max_context_tokens=200000,
max_output_tokens=4096,
tokenizer_hub="Xenova/claude-tokenizer",
),
"claude-3-haiku": LLMConfig(
context=200000, tokenizer_hub="Xenova/claude-tokenizer"
max_context_tokens=200000,
max_output_tokens=4096,
tokenizer_hub="Xenova/claude-tokenizer",
),
"claude-2-1": LLMConfig(
context=200000, tokenizer_hub="Xenova/claude-tokenizer"
max_context_tokens=200000,
max_output_tokens=4096,
tokenizer_hub="Xenova/claude-tokenizer",
),
"claude-2-0": LLMConfig(
context=100000, tokenizer_hub="Xenova/claude-tokenizer"
max_context_tokens=100000,
max_output_tokens=4096,
tokenizer_hub="Xenova/claude-tokenizer",
),
"claude-instant-1-2": LLMConfig(
context=100000, tokenizer_hub="Xenova/claude-tokenizer"
max_context_tokens=100000,
max_output_tokens=4096,
tokenizer_hub="Xenova/claude-tokenizer",
),
},
# Unclear for LLAMA models...
# see https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct/discussions/6
DefaultModelSuppliers.META: {
"llama-3.1": LLMConfig(
context=128000, tokenizer_hub="Xenova/Meta-Llama-3.1-Tokenizer"
max_context_tokens=128000,
max_output_tokens=4096,
tokenizer_hub="Xenova/Meta-Llama-3.1-Tokenizer",
),
"llama-3": LLMConfig(
context=8192, tokenizer_hub="Xenova/llama3-tokenizer-new"
max_context_tokens=8192,
max_output_tokens=2048,
tokenizer_hub="Xenova/llama3-tokenizer-new",
),
"llama-2": LLMConfig(context=4096, tokenizer_hub="Xenova/llama2-tokenizer"),
"code-llama": LLMConfig(
context=16384, tokenizer_hub="Xenova/llama-code-tokenizer"
max_context_tokens=16384, tokenizer_hub="Xenova/llama-code-tokenizer"
),
},
DefaultModelSuppliers.GROQ: {
"llama-3.1": LLMConfig(
context=128000, tokenizer_hub="Xenova/Meta-Llama-3.1-Tokenizer"
"llama-3.3-70b": LLMConfig(
max_context_tokens=128000,
max_output_tokens=32768,
tokenizer_hub="Xenova/Meta-Llama-3.1-Tokenizer",
),
"llama-3.1-70b": LLMConfig(
max_context_tokens=128000,
max_output_tokens=32768,
tokenizer_hub="Xenova/Meta-Llama-3.1-Tokenizer",
),
"llama-3": LLMConfig(
context=8192, tokenizer_hub="Xenova/llama3-tokenizer-new"
max_context_tokens=8192, tokenizer_hub="Xenova/llama3-tokenizer-new"
),
"llama-2": LLMConfig(context=4096, tokenizer_hub="Xenova/llama2-tokenizer"),
"code-llama": LLMConfig(
context=16384, tokenizer_hub="Xenova/llama-code-tokenizer"
max_context_tokens=16384, tokenizer_hub="Xenova/llama-code-tokenizer"
),
},
DefaultModelSuppliers.MISTRAL: {
"mistral-large": LLMConfig(
context=128000, tokenizer_hub="Xenova/mistral-tokenizer-v3"
max_context_tokens=128000,
max_output_tokens=4096,
tokenizer_hub="Xenova/mistral-tokenizer-v3",
),
"mistral-small": LLMConfig(
context=128000, tokenizer_hub="Xenova/mistral-tokenizer-v3"
max_context_tokens=128000,
max_output_tokens=4096,
tokenizer_hub="Xenova/mistral-tokenizer-v3",
),
"mistral-nemo": LLMConfig(
context=128000, tokenizer_hub="Xenova/Mistral-Nemo-Instruct-Tokenizer"
max_context_tokens=128000,
max_output_tokens=4096,
tokenizer_hub="Xenova/Mistral-Nemo-Instruct-Tokenizer",
),
"codestral": LLMConfig(
context=32000, tokenizer_hub="Xenova/mistral-tokenizer-v3"
max_context_tokens=32000, tokenizer_hub="Xenova/mistral-tokenizer-v3"
),
},
}
Expand Down Expand Up @@ -193,13 +243,12 @@ def get_llm_model_config(
class LLMEndpointConfig(QuivrBaseConfig):
supplier: DefaultModelSuppliers = DefaultModelSuppliers.OPENAI
model: str = "gpt-4o"
context_length: int | None = None
tokenizer_hub: str | None = None
llm_base_url: str | None = None
env_variable_name: str | None = None
llm_api_key: str | None = None
max_context_tokens: int = 2000
max_output_tokens: int = 2000
max_context_tokens: int = 10000
max_output_tokens: int = 4000
temperature: float = 0.7
streaming: bool = True
prompt: CustomPromptsModel | None = None
Expand Down Expand Up @@ -240,7 +289,25 @@ def set_llm_model_config(self):
self.supplier, self.model
)
if llm_model_config:
self.context_length = llm_model_config.context
if llm_model_config.max_context_tokens:
_max_context_tokens = (
llm_model_config.max_context_tokens
- llm_model_config.max_output_tokens
if llm_model_config.max_output_tokens
else llm_model_config.max_context_tokens
)
if self.max_context_tokens > _max_context_tokens:
logger.warning(
f"Lowering max_context_tokens from {self.max_context_tokens} to {_max_context_tokens}"
)
self.max_context_tokens = _max_context_tokens
if llm_model_config.max_output_tokens:
if self.max_output_tokens > llm_model_config.max_output_tokens:
logger.warning(
f"Lowering max_output_tokens from {self.max_output_tokens} to {llm_model_config.max_output_tokens}"
)
self.max_output_tokens = llm_model_config.max_output_tokens

self.tokenizer_hub = llm_model_config.tokenizer_hub

def set_llm_model(self, model: str):
Expand Down
8 changes: 4 additions & 4 deletions core/quivr_core/rag/quivr_rag_langgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,11 @@ class SplittedInput(BaseModel):
class TasksCompletion(BaseModel):
is_task_completable_reasoning: Optional[str] = Field(
default=None,
description="The reasoning that leads to identifying whether the user task or question can be completed using the provided context and chat history.",
description="The reasoning that leads to identifying whether the user task or question can be completed using the provided context and chat history BEFORE any tool is used.",
)

is_task_completable: bool = Field(
description="Whether the user task or question can be completed using the provided context and chat history.",
description="Whether the user task or question can be completed using the provided context and chat history BEFORE any tool is used.",
)

tool_reasoning: Optional[str] = Field(
Expand Down Expand Up @@ -667,7 +667,7 @@ async def dynamic_retrieve(self, state: AgentState) -> AgentState:
MAX_ITERATIONS = 3

tasks = state["tasks"]
if not tasks.has_tasks():
if not tasks or not tasks.has_tasks():
return {**state}

k = self.retrieval_config.k
Expand Down Expand Up @@ -1031,7 +1031,7 @@ def _build_rag_prompt_inputs(
return {
"context": combine_documents(docs) if docs else "None",
"question": user_question,
"rephrased_task": state["tasks"].definitions,
"rephrased_task": state["tasks"].definitions if state["tasks"] else "None",
"custom_instructions": prompt if prompt else "None",
"files": files if files else "None",
"chat_history": state["chat_history"].to_list(),
Expand Down
17 changes: 13 additions & 4 deletions core/quivr_core/rag/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,18 @@ def parse_chunk_response(
"""
rolling_msg += raw_chunk

if not supports_func_calling or not rolling_msg.tool_calls:
tool_calls = rolling_msg.tool_calls

if not supports_func_calling or not tool_calls:
new_content = raw_chunk.content # Just the new chunk's content
full_content = rolling_msg.content # The full accumulated content
return rolling_msg, new_content, full_content

current_answers = get_answers_from_tool_calls(rolling_msg.tool_calls)
current_answers = get_answers_from_tool_calls(tool_calls)
full_answer = "\n\n".join(current_answers)
if not full_answer:
full_answer = previous_content

new_content = full_answer[len(previous_content) :]

return rolling_msg, new_content, full_answer
Expand All @@ -111,8 +116,12 @@ def parse_chunk_response(
def get_answers_from_tool_calls(tool_calls):
answers = []
for tool_call in tool_calls:
if tool_call.get("name") == "cited_answer" and "args" in tool_call:
answers.append(tool_call["args"].get("answer", ""))
if tool_call.get("name") == "cited_answer":
args = tool_call.get("args", {})
if isinstance(args, dict):
answers.append(args.get("answer", ""))
else:
logger.warning(f"Expected dict for tool_call args, got {type(args)}")
return answers


Expand Down

0 comments on commit d51a0c1

Please sign in to comment.