feat: remove dependencies on Pydantic v1 (#3526)

# Description By moving to LangChain > 0.3, we can remove any dependency on Pydantic V1, thus avoiding the conflicts originating from mixing Pydantic V1 with Pydantic V2 Please include a summary of the changes and the related issue. Please also include relevant motivation and context. ## Checklist before requesting a review Please delete options that are not relevant. - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented hard-to-understand areas - [ ] I have ideally added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged ## Screenshots (if appropriate):
QuivrHQ · Dec 17, 2024 · ebc4eb8 · ebc4eb8
1 parent 09b4811
commit ebc4eb8
Show file tree

Hide file tree

Showing 23 changed files with 559 additions and 3,430 deletions.
diff --git a/core/pyproject.toml b/core/pyproject.toml
@@ -5,23 +5,24 @@ description = "Quivr core RAG package"
 authors = [{ name = "Stan Girard", email = "[email protected]" }]
 dependencies = [
     "pydantic>=2.8.2",
-    "langchain-core>=0.2.38",
-    "langchain>=0.2.14,<0.3.0",
-    "langgraph>=0.2.38",
+    "langchain-core>=0.3,<0.4",
+    "langchain>=0.3,<0.4",
+    "langgraph>=0.2.38,<0.3",
     "httpx>=0.27.0",
     "rich>=13.7.1",
     "tiktoken>=0.7.0",
     "aiofiles>=23.1.0",
     "langchain-openai>=0.1.0",
     "langchain-cohere>=0.1.0",
-    "langchain-community>=0.2.12",
+    "langchain-community>=0.3,<0.4",
     "langchain-anthropic>=0.1.23",
     "types-pyyaml>=6.0.12.20240808",
     "transformers[sentencepiece]>=4.44.2",
     "faiss-cpu>=1.8.0.post1",
     "rapidfuzz>=3.10.1",
     "markupsafe>=2.1.5",
     "megaparse-sdk>=0.1.9",
+    "langchain-mistralai>=0.2.3",
 ]
 readme = "README.md"
 requires-python = ">= 3.11"

diff --git a/core/quivr_core/llm/llm_endpoint.py b/core/quivr_core/llm/llm_endpoint.py
@@ -5,9 +5,10 @@
 
 import tiktoken
 from langchain_anthropic import ChatAnthropic
+from langchain_mistralai import ChatMistralAI
 from langchain_core.language_models.chat_models import BaseChatModel
 from langchain_openai import AzureChatOpenAI, ChatOpenAI
-from pydantic.v1 import SecretStr
+from pydantic import SecretStr
 
 from quivr_core.brain.info import LLMInfo
 from quivr_core.rag.entities.config import DefaultModelSuppliers, LLMEndpointConfig
@@ -54,7 +55,7 @@ def get_config(self):
 
     @classmethod
     def from_config(cls, config: LLMEndpointConfig = LLMEndpointConfig()):
-        _llm: Union[AzureChatOpenAI, ChatOpenAI, ChatAnthropic]
+        _llm: Union[AzureChatOpenAI, ChatOpenAI, ChatAnthropic, ChatMistralAI]
         try:
             if config.supplier == DefaultModelSuppliers.AZURE:
                 # Parse the URL
@@ -92,6 +93,15 @@ def from_config(cls, config: LLMEndpointConfig = LLMEndpointConfig()):
                     max_tokens=config.max_output_tokens,
                     temperature=config.temperature,
                 )
+            elif config.supplier == DefaultModelSuppliers.MISTRAL:
+                _llm = ChatMistralAI(
+                    model=config.model,
+                    api_key=SecretStr(config.llm_api_key)
+                    if config.llm_api_key
+                    else None,
+                    base_url=config.llm_base_url,
+                    temperature=config.temperature,
+                )
             else:
                 _llm = ChatOpenAI(
                     model=config.model,

diff --git a/core/quivr_core/llm_tools/web_search_tools.py b/core/quivr_core/llm_tools/web_search_tools.py
@@ -4,7 +4,7 @@
 from langchain_community.utilities.tavily_search import TavilySearchAPIWrapper
 from quivr_core.llm_tools.entity import ToolsCategory
 import os
-from pydantic.v1 import SecretStr as SecretStrV1  # Ensure correct import
+from pydantic import SecretStr  # Ensure correct import
 from quivr_core.llm_tools.entity import ToolWrapper, ToolRegistry
 from langchain_core.documents import Document
 
@@ -23,7 +23,7 @@ def create_tavily_tool(config: Dict[str, Any]) -> ToolWrapper:
         )
 
     tavily_api_wrapper = TavilySearchAPIWrapper(
-        tavily_api_key=SecretStrV1(api_key),
+        tavily_api_key=SecretStr(api_key),
     )
     tool = TavilySearchResults(
         api_wrapper=tavily_api_wrapper,

diff --git a/core/quivr_core/rag/entities/models.py b/core/quivr_core/rag/entities/models.py
@@ -5,35 +5,33 @@
 
 from langchain_core.documents import Document
 from langchain_core.messages import AIMessage, HumanMessage
-from langchain_core.pydantic_v1 import BaseModel as BaseModelV1
-from langchain_core.pydantic_v1 import Field as FieldV1
 from pydantic import BaseModel, Field
 from typing_extensions import TypedDict
 
 
-class cited_answer(BaseModelV1):
+class cited_answer(BaseModel):
     """Answer the user question based only on the given sources, and cite the sources used."""
 
-    answer: str = FieldV1(
+    answer: str = Field(
         ...,
         description="The answer to the user question, which is based only on the given sources.",
     )
-    citations: list[int] = FieldV1(
+    citations: list[int] = Field(
         ...,
         description="The integer IDs of the SPECIFIC sources which justify the answer.",
     )
 
-    followup_questions: list[str] = FieldV1(
+    followup_questions: list[str] = Field(
         ...,
         description="Generate up to 3 follow-up questions that could be asked based on the answer given or context provided.",
     )
 
 
-class ChatMessage(BaseModelV1):
+class ChatMessage(BaseModel):
     chat_id: UUID
     message_id: UUID
     brain_id: UUID | None
-    msg: AIMessage | HumanMessage
+    msg: HumanMessage | AIMessage
     message_time: datetime
     metadata: dict[str, Any]
 
@@ -108,7 +106,6 @@ class QuivrKnowledge(BaseModel):
     metadata: Optional[Dict[str, str]] = None
 
 
-# NOTE: for compatibility issues with langchain <-> PydanticV1
-class SearchResult(BaseModelV1):
+class SearchResult(BaseModel):
     chunk: Document
     distance: float
diff --git a/core/requirements-dev.lock b/core/requirements-dev.lock
@@ -21,7 +21,7 @@ aiosignal==1.3.1
     # via aiohttp
 annotated-types==0.7.0
     # via pydantic
-anthropic==0.36.1
+anthropic==0.40.0
     # via langchain-anthropic
 anyio==4.6.2.post1
     # via anthropic
@@ -89,12 +89,16 @@ httpcore==1.0.6
 httpx==0.27.2
     # via anthropic
     # via cohere
+    # via langchain-mistralai
     # via langgraph-sdk
     # via langsmith
+    # via megaparse-sdk
     # via openai
     # via quivr-core
 httpx-sse==0.4.0
     # via cohere
+    # via langchain-community
+    # via langchain-mistralai
     # via langgraph-sdk
 huggingface-hub==0.25.2
     # via tokenizers
@@ -125,32 +129,35 @@ jupyter-client==8.6.3
 jupyter-core==5.7.2
     # via ipykernel
     # via jupyter-client
-langchain==0.2.16
+langchain==0.3.9
     # via langchain-community
     # via quivr-core
-langchain-anthropic==0.1.23
+langchain-anthropic==0.3.0
     # via quivr-core
-langchain-cohere==0.2.4
+langchain-cohere==0.3.3
     # via quivr-core
-langchain-community==0.2.17
+langchain-community==0.3.9
     # via langchain-experimental
     # via quivr-core
-langchain-core==0.2.41
+langchain-core==0.3.21
     # via langchain
     # via langchain-anthropic
     # via langchain-cohere
     # via langchain-community
     # via langchain-experimental
+    # via langchain-mistralai
     # via langchain-openai
     # via langchain-text-splitters
     # via langgraph
     # via langgraph-checkpoint
     # via quivr-core
-langchain-experimental==0.0.65
+langchain-experimental==0.3.3
     # via langchain-cohere
-langchain-openai==0.1.25
+langchain-mistralai==0.2.3
+    # via quivr-core
+langchain-openai==0.2.11
     # via quivr-core
-langchain-text-splitters==0.2.4
+langchain-text-splitters==0.3.2
     # via langchain
 langgraph==0.2.38
     # via quivr-core
@@ -162,8 +169,12 @@ langsmith==0.1.135
     # via langchain
     # via langchain-community
     # via langchain-core
+loguru==0.7.2
+    # via megaparse-sdk
 markdown-it-py==3.0.0
     # via rich
+markupsafe==3.0.2
+    # via quivr-core
 marshmallow==3.22.0
     # via dataclasses-json
 matplotlib-inline==0.1.7
@@ -173,6 +184,8 @@ mccabe==0.7.0
     # via flake8
 mdurl==0.1.2
     # via markdown-it-py
+megaparse-sdk==0.1.10
+    # via quivr-core
 msgpack==1.1.0
     # via langgraph-checkpoint
 multidict==6.1.0
@@ -183,6 +196,8 @@ mypy-extensions==1.0.0
     # via black
     # via mypy
     # via typing-inspect
+nats-py==2.9.0
+    # via megaparse-sdk
 nest-asyncio==1.6.0
     # via ipykernel
 nodeenv==1.9.1
@@ -193,7 +208,7 @@ numpy==1.26.4
     # via langchain-community
     # via pandas
     # via transformers
-openai==1.52.0
+openai==1.56.2
     # via langchain-openai
 orjson==3.10.7
     # via langgraph-sdk
@@ -230,8 +245,9 @@ propcache==0.2.0
     # via yarl
 protobuf==5.28.2
     # via transformers
-psutil==6.0.0
+psutil==6.1.0
     # via ipykernel
+    # via megaparse-sdk
 ptyprocess==0.7.0
     # via pexpect
 pure-eval==0.2.3
@@ -240,17 +256,25 @@ py-cpuinfo==9.0.0
     # via pytest-benchmark
 pycodestyle==2.12.1
     # via flake8
+pycryptodome==3.21.0
+    # via megaparse-sdk
 pydantic==2.9.2
     # via anthropic
     # via cohere
     # via langchain
+    # via langchain-anthropic
+    # via langchain-cohere
     # via langchain-core
+    # via langchain-mistralai
     # via langsmith
     # via openai
+    # via pydantic-settings
     # via quivr-core
 pydantic-core==2.23.4
     # via cohere
     # via pydantic
+pydantic-settings==2.6.1
+    # via langchain-community
 pyflakes==3.2.0
     # via flake8
 pygments==2.18.0
@@ -266,6 +290,9 @@ pytest-xdist==3.6.1
 python-dateutil==2.8.2
     # via jupyter-client
     # via pandas
+python-dotenv==1.0.1
+    # via megaparse-sdk
+    # via pydantic-settings
 pytz==2024.2
     # via pandas
 pyyaml==6.0.2
@@ -324,8 +351,8 @@ tiktoken==0.8.0
     # via langchain-openai
     # via quivr-core
 tokenizers==0.20.1
-    # via anthropic
     # via cohere
+    # via langchain-mistralai
     # via transformers
 tornado==6.4.1
     # via ipykernel