Feat/memory base (#1444)

* byom - short/entity memory * better * rm uneeded * fix text * use context * rm dep and sync * type check fix * fixed test using new cassete * fixing types * fixed types * fix types * fixed types * fixing types * fix type * cassette update * just mock the return of short term mem * remove print * try catch block * added docs * dding error handling here
crewAIInc · Oct 17, 2024 · 6d20ba7 · 6d20ba7
1 parent 67f55ba
commit 6d20ba7
Show file tree

Hide file tree

Showing 14 changed files with 242 additions and 559 deletions.
diff --git a/docs/concepts/memory.mdx b/docs/concepts/memory.mdx
@@ -34,7 +34,7 @@ By default, the memory system is disabled, and you can ensure it is active by se
 The memory will use OpenAI embeddings by default, but you can change it by setting `embedder` to a different model. 
 It's also possible to initialize the memory instance with your own instance.
 
-The 'embedder' only applies to **Short-Term Memory** which uses Chroma for RAG using the EmbedChain package.
+The 'embedder' only applies to **Short-Term Memory** which uses Chroma for RAG.
 The **Long-Term Memory** uses SQLite3 to store task results. Currently, there is no way to override these storage implementations.
 The data storage files are saved into a platform-specific location found using the appdirs package,
 and the name of the project can be overridden using the **CREWAI_STORAGE_DIR** environment variable.
@@ -105,12 +105,9 @@ my_crew = Crew(
     process=Process.sequential,
     memory=True,
     verbose=True,
-    embedder={
-        "provider": "openai",
-        "config": {
-            "model": 'text-embedding-3-small'
-        }
-    }
+    embedder=embedding_functions.OpenAIEmbeddingFunction(
+            api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
+        )
 )
 ```
 
@@ -125,14 +122,10 @@ my_crew = Crew(
     process=Process.sequential,
     memory=True,
     verbose=True,
-    embedder={
-        "provider": "google",
-        "config": {
-            "model": 'models/embedding-001',
-            "task_type": "retrieval_document",
-            "title": "Embeddings for Embedchain"
-        }
-    }
+    embedder=embedding_functions.OpenAIEmbeddingFunction(
+            api_key=os.getenv("OPENAI_API_KEY"),
+            model_name="text-embedding-ada-002"
+    )
 )
 ```
 
@@ -147,30 +140,13 @@ my_crew = Crew(
     process=Process.sequential,
     memory=True,
     verbose=True,
-    embedder={
-        "provider": "azure_openai",
-        "config": {
-            "model": 'text-embedding-ada-002',
-            "deployment_name": "your_embedding_model_deployment_name"
-        }
-    }
-)
-```
-
-### Using GPT4ALL embeddings
-
-```python Code
-from crewai import Crew, Agent, Task, Process
-
-my_crew = Crew(
-    agents=[...],
-    tasks=[...],
-    process=Process.sequential,
-    memory=True,
-    verbose=True,
-    embedder={
-        "provider": "gpt4all"
-    }
+    embedder=embedding_functions.OpenAIEmbeddingFunction(
+        api_key="YOUR_API_KEY",
+        api_base="YOUR_API_BASE_PATH",
+        api_type="azure",
+        api_version="YOUR_API_VERSION",
+        model_name="text-embedding-3-small"
+    )
 )
 ```
 
@@ -185,12 +161,12 @@ my_crew = Crew(
     process=Process.sequential,
     memory=True,
     verbose=True,
-    embedder={
-        "provider": "vertexai",
-        "config": {
-            "model": 'textembedding-gecko'
-        }
-    }
+    embedder=embedding_functions.GoogleVertexEmbeddingFunction(
+        project_id="YOUR_PROJECT_ID",
+        region="YOUR_REGION",
+        api_key="YOUR_API_KEY",
+        model_name="textembedding-gecko"
+    )
 )
 ```
 
@@ -205,13 +181,10 @@ my_crew = Crew(
     process=Process.sequential,
     memory=True,
     verbose=True,
-    embedder={
-        "provider": "cohere",
-        "config": {
-            "model": "embed-english-v3.0",
-            "vector_dimension": 1024
-        }
-    }
+    embedder=embedding_functions.CohereEmbeddingFunction(
+        api_key=YOUR_API_KEY,
+        model_name="<model_name>"
+    )
 )
 ```
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -21,14 +21,13 @@ dependencies = [
     "python-dotenv>=1.0.0",
     "appdirs>=1.4.4",
     "jsonref>=1.1.0",
-    "agentops>=0.3.0",
-    "embedchain>=0.1.114",
     "json-repair>=0.25.2",
     "auth0-python>=4.7.1",
     "litellm>=1.44.22",
     "pyvis>=0.3.2",
     "uv>=0.4.18",
     "tomli-w>=1.1.0",
+    "chromadb>=0.4.24",
 ]
 
 [project.urls]

diff --git a/src/crewai/agents/agent_builder/base_agent_executor_mixin.py b/src/crewai/agents/agent_builder/base_agent_executor_mixin.py
@@ -17,7 +17,7 @@
 
 class CrewAgentExecutorMixin:
     crew: Optional["Crew"]
-    crew_agent: Optional["BaseAgent"]
+    agent: Optional["BaseAgent"]
     task: Optional["Task"]
     iterations: int
     have_forced_answer: bool
@@ -33,21 +33,21 @@ def _create_short_term_memory(self, output) -> None:
         """Create and save a short-term memory item if conditions are met."""
         if (
             self.crew
-            and self.crew_agent
+            and self.agent
             and self.task
-            and "Action: Delegate work to coworker" not in output.log
+            and "Action: Delegate work to coworker" not in output.text
         ):
             try:
                 if (
                     hasattr(self.crew, "_short_term_memory")
                     and self.crew._short_term_memory
                 ):
                     self.crew._short_term_memory.save(
-                        value=output.log,
+                        value=output.text,
                         metadata={
                             "observation": self.task.description,
                         },
-                        agent=self.crew_agent.role,
+                        agent=self.agent.role,
                     )
             except Exception as e:
                 print(f"Failed to add to short term memory: {e}")
@@ -61,18 +61,18 @@ def _create_long_term_memory(self, output) -> None:
             and self.crew._long_term_memory
             and self.crew._entity_memory
             and self.task
-            and self.crew_agent
+            and self.agent
         ):
             try:
-                ltm_agent = TaskEvaluator(self.crew_agent)
-                evaluation = ltm_agent.evaluate(self.task, output.log)
+                ltm_agent = TaskEvaluator(self.agent)
+                evaluation = ltm_agent.evaluate(self.task, output.text)
 
                 if isinstance(evaluation, ConverterError):
                     return
 
                 long_term_memory = LongTermMemoryItem(
                     task=self.task.description,
-                    agent=self.crew_agent.role,
+                    agent=self.agent.role,
                     quality=evaluation.quality,
                     datetime=str(time.time()),
                     expected_output=self.task.expected_output,

diff --git a/src/crewai/agents/crew_agent_executor.py b/src/crewai/agents/crew_agent_executor.py
@@ -19,6 +19,7 @@
 )
 from crewai.utilities.logger import Logger
 from crewai.utilities.training_handler import CrewTrainingHandler
+from crewai.agents.agent_builder.base_agent import BaseAgent
 
 
 class CrewAgentExecutor(CrewAgentExecutorMixin):
@@ -29,7 +30,7 @@ def __init__(
         llm: Any,
         task: Any,
         crew: Any,
-        agent: Any,
+        agent: BaseAgent,
         prompt: dict[str, str],
         max_iter: int,
         tools: List[Any],
@@ -103,7 +104,8 @@ def invoke(self, inputs: Dict[str, str]) -> Dict[str, Any]:
 
             if self.crew and self.crew._train:
                 self._handle_crew_training_output(formatted_answer)
-
+        self._create_short_term_memory(formatted_answer)
+        self._create_long_term_memory(formatted_answer)
         return {"output": formatted_answer.output}
 
     def _invoke_loop(self, formatted_answer=None):
@@ -176,6 +178,8 @@ def _invoke_loop(self, formatted_answer=None):
         return formatted_answer
 
     def _show_start_logs(self):
+        if self.agent is None:
+            raise ValueError("Agent cannot be None")
         if self.agent.verbose or (
             hasattr(self, "crew") and getattr(self.crew, "verbose", False)
         ):
@@ -188,6 +192,8 @@ def _show_start_logs(self):
             )
 
     def _show_logs(self, formatted_answer: Union[AgentAction, AgentFinish]):
+        if self.agent is None:
+            raise ValueError("Agent cannot be None")
         if self.agent.verbose or (
             hasattr(self, "crew") and getattr(self.crew, "verbose", False)
         ):
@@ -306,7 +312,7 @@ def _handle_crew_training_output(
         self, result: AgentFinish, human_feedback: str | None = None
     ) -> None:
         """Function to handle the process of the training data."""
-        agent_id = str(self.agent.id)
+        agent_id = str(self.agent.id)  # type: ignore
 
         # Load training data
         training_handler = CrewTrainingHandler(TRAINING_DATA_FILE)
@@ -339,7 +345,7 @@ def _handle_crew_training_output(
                 "initial_output": result.output,
                 "human_feedback": human_feedback,
                 "agent": agent_id,
-                "agent_role": self.agent.role,
+                "agent_role": self.agent.role,  # type: ignore
             }
             if self.crew is not None and hasattr(self.crew, "_train_iteration"):
                 train_iteration = self.crew._train_iteration

diff --git a/src/crewai/crew.py b/src/crewai/crew.py
@@ -126,8 +126,8 @@ class Crew(BaseModel):
         default=None,
         description="An Instance of the EntityMemory to be used by the Crew",
     )
-    embedder: Optional[dict] = Field(
-        default={"provider": "openai"},
+    embedder: Optional[Any] = Field(
+        default=None,
         description="Configuration for the embedder to be used for the crew.",
     )
     usage_metrics: Optional[UsageMetrics] = Field(
@@ -774,7 +774,9 @@ def _add_delegation_tools(self, task: Task):
 
     def _log_task_start(self, task: Task, role: str = "None"):
         if self.output_log_file:
-            self._file_handler.log(task_name=task.name, task=task.description, agent=role, status="started")
+            self._file_handler.log(
+                task_name=task.name, task=task.description, agent=role, status="started"
+            )
 
     def _update_manager_tools(self, task: Task):
         if self.manager_agent:
@@ -796,7 +798,13 @@ def _get_context(self, task: Task, task_outputs: List[TaskOutput]):
     def _process_task_result(self, task: Task, output: TaskOutput) -> None:
         role = task.agent.role if task.agent is not None else "None"
         if self.output_log_file:
-            self._file_handler.log(task_name=task.name, task=task.description, agent=role, status="completed", output=output.raw)
+            self._file_handler.log(
+                task_name=task.name,
+                task=task.description,
+                agent=role,
+                status="completed",
+                output=output.raw,
+            )
 
     def _create_crew_output(self, task_outputs: List[TaskOutput]) -> CrewOutput:
         if len(task_outputs) != 1:

diff --git a/src/crewai/memory/contextual/contextual_memory.py b/src/crewai/memory/contextual/contextual_memory.py
@@ -31,7 +31,9 @@ def _fetch_stm_context(self, query) -> str:
         formatted as bullet points.
         """
         stm_results = self.stm.search(query)
-        formatted_results = "\n".join([f"- {result}" for result in stm_results])
+        formatted_results = "\n".join(
+            [f"- {result['context']}" for result in stm_results]
+        )
         return f"Recent Insights:\n{formatted_results}" if stm_results else ""
 
     def _fetch_ltm_context(self, task) -> Optional[str]:

diff --git a/src/crewai/memory/long_term/long_term_memory.py b/src/crewai/memory/long_term/long_term_memory.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict
+from typing import Any, Dict, List
 
 from crewai.memory.long_term.long_term_memory_item import LongTermMemoryItem
 from crewai.memory.memory import Memory
@@ -28,7 +28,7 @@ def save(self, item: LongTermMemoryItem) -> None:  # type: ignore # BUG?: Signat
             datetime=item.datetime,
         )
 
-    def search(self, task: str, latest_n: int = 3) -> Dict[str, Any]:
+    def search(self, task: str, latest_n: int = 3) -> List[Dict[str, Any]]:  # type: ignore # signature of "search" incompatible with supertype "Memory"
         return self.storage.load(task, latest_n)  # type: ignore # BUG?: "Storage" has no attribute "load"
 
     def reset(self) -> None:

diff --git a/src/crewai/memory/memory.py b/src/crewai/memory/memory.py
@@ -1,14 +1,14 @@
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, List
 
-from crewai.memory.storage.interface import Storage
+from crewai.memory.storage.rag_storage import RAGStorage
 
 
 class Memory:
     """
     Base class for memory, now supporting agent tags and generic metadata.
     """
 
-    def __init__(self, storage: Storage):
+    def __init__(self, storage: RAGStorage):
         self.storage = storage
 
     def save(
@@ -23,5 +23,5 @@ def save(
 
         self.storage.save(value, metadata)
 
-    def search(self, query: str) -> Dict[str, Any]:
+    def search(self, query: str) -> List[Dict[str, Any]]:
         return self.storage.search(query)