From 633c37533e15ddda0583287deb2368297af2a59f Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Fri, 16 Aug 2024 16:55:31 +0100
Subject: [PATCH 01/20] Skeleton

---
 .../agents/goal_manager.py                    | 59 +++++++++++++++++++
 .../agents/microchain_agent/deploy.py         | 24 ++++++++
 .../microchain_agent/microchain_agent.py      |  2 +
 prediction_market_agent/agents/utils.py       |  3 +
 4 files changed, 88 insertions(+)
 create mode 100644 prediction_market_agent/agents/goal_manager.py

diff --git a/prediction_market_agent/agents/goal_manager.py b/prediction_market_agent/agents/goal_manager.py
new file mode 100644
index 00000000..95e54a82
--- /dev/null
+++ b/prediction_market_agent/agents/goal_manager.py
@@ -0,0 +1,59 @@
+from pydantic import BaseModel
+
+from prediction_market_agent.agents.microchain_agent.memory import ChatHistory
+
+
+class Goal(BaseModel):
+    prompt: str
+    motivation: str
+    completion_criteria: str  # ?
+
+
+class EvaluatedGoal(Goal):
+    is_complete: bool
+    reasoning: str
+    output: str | None  # or 'learning'?
+
+    def to_goal(self) -> Goal:
+        return Goal(
+            prompt=self.prompt,
+            motivation=self.motivation,
+            completion_criteria=self.completion_criteria,
+        )
+
+
+class GoalManager:
+    def __init__(
+        self,
+        agent_id: str,
+    ):
+        self.agent_id: str = agent_id
+
+    def get_latest_goal_from_memory(self) -> EvaluatedGoal | None:
+        pass
+
+    def generate_goal(self) -> Goal:
+        """
+        If a goal exists from a previous session, load it and check its status.
+        Otherwise create a new one.
+        """
+        pass
+
+    def get_goal(self) -> Goal:
+        if goal := self.get_latest_goal_from_memory():
+            if goal.is_complete:
+                # Generate a new goal
+                return self.generate_goal()
+            else:
+                # Try again
+                return goal
+        return self.generate_goal()
+
+    def evaluate_goal_progress(
+        goal: Goal,
+        chat_history: ChatHistory,
+    ) -> EvaluatedGoal:
+        pass
+
+    def save_evaluated_goal(self, goal: EvaluatedGoal) -> None:
+        pass
diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py
index 4dfdd829..4011139c 100644
--- a/prediction_market_agent/agents/microchain_agent/deploy.py
+++ b/prediction_market_agent/agents/microchain_agent/deploy.py
@@ -2,6 +2,7 @@
 from prediction_market_agent_tooling.deploy.agent import DeployableAgent
 from prediction_market_agent_tooling.markets.markets import MarketType
 
+from prediction_market_agent.agents.goal_manager import GoalManager
 from prediction_market_agent.agents.microchain_agent.microchain_agent import (
     SupportedModel,
     build_agent,
@@ -28,6 +29,7 @@ class DeployableMicrochainAgent(DeployableAgent):
     load_historical_prompt: bool = False
     system_prompt_choice: SystemPromptChoice = SystemPromptChoice.TRADING_AGENT
     task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN
+    goal_manager: GoalManager | None = None
 
     def run(
         self,
@@ -47,6 +49,13 @@ def run(
                 prompt_handler if self.load_historical_prompt else None
             ),
         )
+
+        if self.goal_manager:
+            goal = self.goal_manager.get_goal()
+            prompt = goal.prompt
+        else:
+            prompt = None
+
         agent: Agent = build_agent(
             market_type=market_type,
             model=self.model,
@@ -57,6 +66,7 @@ def run(
             functions_config=FunctionsConfig.from_system_prompt_choice(
                 self.system_prompt_choice
             ),
+            prompt=prompt,
         )
 
         # Save formatted system prompt
@@ -70,6 +80,12 @@ def run(
         )
         prompt_handler.save_prompt(get_editable_prompt_from_agent(agent))
 
+        if self.goal_manager:
+            evaluated_goal = self.goal_manager.evaluate_goal_progress(
+                goal=goal, chat_history=agent.history
+            )
+            self.goal_manager.save_evaluated_goal(evaluated_goal)
+
 
 class DeployableMicrochainModifiableSystemPromptAgentAbstract(
     DeployableMicrochainAgent
@@ -102,3 +118,11 @@ class DeployableMicrochainModifiableSystemPromptAgent3(
 ):
     task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_LEARNING_3
     model = SupportedModel.llama_31_instruct
+
+
+class DeployableMicrochainModifiableSystemPromptAgent3(
+    DeployableMicrochainModifiableSystemPromptAgentAbstract
+):
+    task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER
+    goal_manager: GoalManager = GoalManager(agent_id=task_description)
+    model = SupportedModel.gpt_4o
diff --git a/prediction_market_agent/agents/microchain_agent/microchain_agent.py b/prediction_market_agent/agents/microchain_agent/microchain_agent.py
index 636eb0ae..35ed5079 100644
--- a/prediction_market_agent/agents/microchain_agent/microchain_agent.py
+++ b/prediction_market_agent/agents/microchain_agent/microchain_agent.py
@@ -147,6 +147,7 @@ def build_agent(
     long_term_memory: LongTermMemoryTableHandler | None = None,
     allow_stop: bool = True,
     bootstrap: str | None = None,
+    prompt: str | None = None,
     raise_on_error: bool = True,
 ) -> Agent:
     engine = Engine()
@@ -185,6 +186,7 @@ def step_end_callback(agent: Agent, step_output: StepOutput) -> None:
         llm=LLM(generator=generator),
         engine=engine,
         on_iteration_step=on_iteration_step,
+        prompt=prompt,
     )
 
     for f in build_agent_functions(
diff --git a/prediction_market_agent/agents/utils.py b/prediction_market_agent/agents/utils.py
index 8622e6b6..e59a1dab 100644
--- a/prediction_market_agent/agents/utils.py
+++ b/prediction_market_agent/agents/utils.py
@@ -25,6 +25,9 @@ class AgentIdentifier(str, Enum):
     MICROCHAIN_AGENT_OMEN_LEARNING_2 = "general-agent-2"
     MICROCHAIN_AGENT_OMEN_LEARNING_3 = "general-agent-3"
     MICROCHAIN_AGENT_STREAMLIT = "microchain-streamlit-app"
+    MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER = (
+        "microchain-agent-deployment-omen_with_goal_manager"
+    )
 
 
 MEMORIES_TO_LEARNINGS_TEMPLATE = """

From 68444b1c329e97245d7d9ef1c148d264777beb32 Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Fri, 16 Aug 2024 19:08:29 +0100
Subject: [PATCH 02/20] Add table hander and test

---
 .../agents/goal_manager.py                    | 66 +++++++++++++++----
 .../agents/microchain_agent/deploy.py         |  4 +-
 .../db/evaluated_goal_table_handler.py        | 34 ++++++++++
 prediction_market_agent/db/models.py          | 19 ++++++
 tests/db/test_evaluated_goal_table_handler.py | 54 +++++++++++++++
 5 files changed, 163 insertions(+), 14 deletions(-)
 create mode 100644 prediction_market_agent/db/evaluated_goal_table_handler.py
 create mode 100644 tests/db/test_evaluated_goal_table_handler.py

diff --git a/prediction_market_agent/agents/goal_manager.py b/prediction_market_agent/agents/goal_manager.py
index 95e54a82..917b5f21 100644
--- a/prediction_market_agent/agents/goal_manager.py
+++ b/prediction_market_agent/agents/goal_manager.py
@@ -1,24 +1,45 @@
+from prediction_market_agent_tooling.tools.utils import utcnow
 from pydantic import BaseModel
 
 from prediction_market_agent.agents.microchain_agent.memory import ChatHistory
+from prediction_market_agent.db.evaluated_goal_table_handler import (
+    EvaluatedGoalTableHandler,
+)
+from prediction_market_agent.db.models import EvaluatedGoalModel
 
 
 class Goal(BaseModel):
     prompt: str
     motivation: str
-    completion_criteria: str  # ?
+    completion_criteria: str  # TODO maybe?
 
 
 class EvaluatedGoal(Goal):
     is_complete: bool
     reasoning: str
-    output: str | None  # or 'learning'?
+    output: str | None
 
-    def to_goal(self) -> Goal:
-        return Goal(
+    @classmethod
+    def from_model(cls, model: EvaluatedGoalModel) -> "EvaluatedGoal":
+        return EvaluatedGoal(
+            prompt=model.prompt,
+            motivation=model.motivation,
+            completion_criteria=model.completion_criteria,
+            is_complete=model.is_complete,
+            reasoning=model.reasoning,
+            output=model.output,
+        )
+
+    def to_model(self, agent_id: str) -> EvaluatedGoalModel:
+        return EvaluatedGoalModel(
             prompt=self.prompt,
             motivation=self.motivation,
             completion_criteria=self.completion_criteria,
+            is_complete=self.is_complete,
+            reasoning=self.reasoning,
+            output=self.output,
+            agent_id=agent_id,
+            datetime_=utcnow(),
         )
 
 
@@ -26,21 +47,34 @@ class GoalManager:
     def __init__(
         self,
         agent_id: str,
+        sqlalchemy_db_url: str | None = None,
     ):
-        self.agent_id: str = agent_id
+        self.agent_id = agent_id
+        self.table_handler = EvaluatedGoalTableHandler(
+            agent_id=agent_id,
+            sqlalchemy_db_url=sqlalchemy_db_url,
+        )
 
-    def get_latest_goal_from_memory(self) -> EvaluatedGoal | None:
-        pass
+    def get_latest_evaluated_goal_from_memory(self) -> EvaluatedGoal | None:
+        evaluated_goal_model = self.table_handler.get_latest_evaluated_goal()
+        if evaluated_goal_model:
+            return EvaluatedGoal.from_model(model=evaluated_goal_model)
+        return None
 
     def generate_goal(self) -> Goal:
         """
         If a goal exists from a previous session, load it and check its status.
         Otherwise create a new one.
         """
-        pass
+        # TODO
+        return Goal(
+            prompt="foo",
+            motivation="bar",
+            completion_criteria="baz",
+        )
 
     def get_goal(self) -> Goal:
-        if goal := self.get_latest_goal_from_memory():
+        if goal := self.get_latest_evaluated_goal_from_memory():
             if goal.is_complete:
                 # Generate a new goal
                 return self.generate_goal()
@@ -50,10 +84,20 @@ def get_goal(self) -> Goal:
         return self.generate_goal()
 
     def evaluate_goal_progress(
+        self,
         goal: Goal,
         chat_history: ChatHistory,
     ) -> EvaluatedGoal:
-        pass
+        # TODO
+        return EvaluatedGoal(
+            prompt=goal.prompt,
+            motivation=goal.motivation,
+            completion_criteria=goal.completion_criteria,
+            is_complete=False,
+            reasoning="",
+            output="",
+        )
 
     def save_evaluated_goal(self, goal: EvaluatedGoal) -> None:
-        pass
+        model = goal.to_model(agent_id=self.agent_id)
+        self.table_handler.save_evaluated_goal(model)
diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py
index 6ad7bbe0..050d14b1 100644
--- a/prediction_market_agent/agents/microchain_agent/deploy.py
+++ b/prediction_market_agent/agents/microchain_agent/deploy.py
@@ -122,9 +122,7 @@ class DeployableMicrochainModifiableSystemPromptAgent3(
     model = SupportedModel.llama_31_instruct
 
 
-class DeployableMicrochainModifiableSystemPromptAgent3(
-    DeployableMicrochainModifiableSystemPromptAgentAbstract
-):
+class DeployableMicrochainWithGoalManagerAgent0(DeployableMicrochainAgent):
     task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER
     goal_manager: GoalManager = GoalManager(agent_id=task_description)
     model = SupportedModel.gpt_4o
diff --git a/prediction_market_agent/db/evaluated_goal_table_handler.py b/prediction_market_agent/db/evaluated_goal_table_handler.py
new file mode 100644
index 00000000..6a0208eb
--- /dev/null
+++ b/prediction_market_agent/db/evaluated_goal_table_handler.py
@@ -0,0 +1,34 @@
+import typing as t
+
+from sqlmodel import col
+
+from prediction_market_agent.db.models import EvaluatedGoalModel
+from prediction_market_agent.db.sql_handler import SQLHandler
+
+
+class EvaluatedGoalTableHandler:
+    def __init__(
+        self,
+        agent_id: str,
+        sqlalchemy_db_url: str | None = None,
+    ):
+        self.agent_id = agent_id
+        self.sql_handler = SQLHandler(
+            model=EvaluatedGoalModel,
+            sqlalchemy_db_url=sqlalchemy_db_url,
+        )
+
+    def save_evaluated_goal(self, model: EvaluatedGoalModel) -> None:
+        self.sql_handler.save_multiple([model])
+
+    def get_latest_evaluated_goal(self) -> EvaluatedGoalModel | None:
+        column_to_order: str = EvaluatedGoalModel.datetime_.key  # type: ignore
+        items: t.Sequence[
+            EvaluatedGoalModel
+        ] = self.sql_handler.get_with_filter_and_order(
+            query_filters=[col(EvaluatedGoalModel.agent_id) == self.agent_id],
+            order_by_column_name=column_to_order,
+            order_desc=True,
+            limit=1,
+        )
+        return items[0] if items else None
diff --git a/prediction_market_agent/db/models.py b/prediction_market_agent/db/models.py
index 0302a9da..7eb09e75 100644
--- a/prediction_market_agent/db/models.py
+++ b/prediction_market_agent/db/models.py
@@ -29,3 +29,22 @@ class Prompt(SQLModel, table=True):
     # user (or app) should be persisted.
     session_identifier: str
     datetime_: datetime
+
+
+class EvaluatedGoalModel(SQLModel, table=True):
+    """
+    Checkpoint for general agent's goals. Used to store the agent's progress
+    towards a goal, and to restore it in future sessions.
+    """
+
+    __tablename__ = "evaluated_goals"
+    __table_args__ = {"extend_existing": True}
+    id: Optional[int] = Field(default=None, primary_key=True)
+    agent_id: str  # Per-agent identifier
+    prompt: str
+    motivation: str
+    completion_criteria: str
+    is_complete: bool
+    reasoning: str
+    output: str | None  # TODO or 'learning'?
+    datetime_: datetime
diff --git a/tests/db/test_evaluated_goal_table_handler.py b/tests/db/test_evaluated_goal_table_handler.py
new file mode 100644
index 00000000..d953171e
--- /dev/null
+++ b/tests/db/test_evaluated_goal_table_handler.py
@@ -0,0 +1,54 @@
+from typing import Generator
+
+import pytest
+
+from prediction_market_agent.agents.goal_manager import EvaluatedGoal
+from prediction_market_agent.db.evaluated_goal_table_handler import (
+    EvaluatedGoalTableHandler,
+)
+
+SQLITE_DB_URL = "sqlite://"
+TEST_AGENT_ID = "test_agent_id"
+
+
+@pytest.fixture(scope="function")
+def table_handler() -> Generator[EvaluatedGoalTableHandler, None, None]:
+    """Creates a in-memory SQLite DB for testing"""
+    table_handler = EvaluatedGoalTableHandler(
+        sqlalchemy_db_url=SQLITE_DB_URL,
+        agent_id=TEST_AGENT_ID,
+    )
+    yield table_handler
+
+
+def test_save_load_evaluated_goal(table_handler: EvaluatedGoalTableHandler) -> None:
+    evaluated_goal = EvaluatedGoal(
+        prompt="abc",
+        motivation="def",
+        completion_criteria="ghi",
+        is_complete=True,
+        reasoning="jkl",
+        output="mno",
+    )
+    model = evaluated_goal.to_model(agent_id=TEST_AGENT_ID)
+    table_handler.save_evaluated_goal(model=model)
+
+    loaded_model = table_handler.get_latest_evaluated_goal()
+    assert loaded_model
+    loaded_evaluated_goal = EvaluatedGoal.from_model(model=loaded_model)
+    assert loaded_evaluated_goal == evaluated_goal
+
+
+# TODO
+# def test_load_latest_prompt(memory_prompt_handler: PromptTableHandler) -> None:
+#     prompt_text_first = "prompt_text_first"
+#     prompt_text_second = "prompt_text_second"
+
+#     memory_prompt_handler.save_prompt(prompt_text_first)
+#     memory_prompt_handler.save_prompt(prompt_text_second)
+
+#     # assert latest prompt is there
+#     result = memory_prompt_handler.fetch_latest_prompt()
+#     assert result
+#     # ignore timezone
+#     assert result.prompt == prompt_text_second

From 39d4300618cebae9aaae37678ec4a69aa5c76aea Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Wed, 21 Aug 2024 10:29:54 +0100
Subject: [PATCH 03/20] Add test for goal generation

---
 .../agents/goal_manager.py                    | 187 +++++++++++++++---
 .../agents/microchain_agent/deploy.py         |   9 +-
 .../db/evaluated_goal_table_handler.py        |   6 +-
 prediction_market_agent/db/models.py          |   4 +-
 prediction_market_agent/run_agent.py          |   3 +
 tests/agents/test_goal_manager.py             | 139 +++++++++++++
 tests/db/test_evaluated_goal_table_handler.py |  96 +++++++--
 7 files changed, 390 insertions(+), 54 deletions(-)
 create mode 100644 tests/agents/test_goal_manager.py

diff --git a/prediction_market_agent/agents/goal_manager.py b/prediction_market_agent/agents/goal_manager.py
index 917b5f21..9475b1b8 100644
--- a/prediction_market_agent/agents/goal_manager.py
+++ b/prediction_market_agent/agents/goal_manager.py
@@ -1,28 +1,75 @@
+from langchain_core.output_parsers import PydanticOutputParser
+from langchain_core.prompts import PromptTemplate
+from langchain_openai import ChatOpenAI
 from prediction_market_agent_tooling.tools.utils import utcnow
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 
 from prediction_market_agent.agents.microchain_agent.memory import ChatHistory
 from prediction_market_agent.db.evaluated_goal_table_handler import (
     EvaluatedGoalTableHandler,
 )
 from prediction_market_agent.db.models import EvaluatedGoalModel
+from prediction_market_agent.utils import DEFAULT_OPENAI_MODEL, APIKeys
+
+GENERATE_GOAL_PROMPT_TEMPLATE = """
+Generate a specific goal for an open-ended, autonomous agent that has a high-level description and a number of specific capabilities.
+If applicable, use the agent's previous evaluated goals when considering its new goal.
+
+The goal should satisfy the following:
+- have a narrow focus
+- be realistically achievable given the agen't specific capabilities
+- not be contingent on external factors that are out of the agent's control
+- have a clear motivation and completion criteria
+- advance the aims of the agent
+- balance the need for exploration and exploitation
+
+[HIGH LEVEL DESCRIPTION]
+{high_level_description}
+
+[AGENT CAPABILITIES]
+{agent_capabilities}
+
+{previous_evaluated_goals}
+{format_instructions}
+"""
 
 
 class Goal(BaseModel):
-    prompt: str
-    motivation: str
-    completion_criteria: str  # TODO maybe?
+    goal: str = Field(..., description="A clear description of the goal")
+    motivation: str = Field(..., description="The reason for the goal")
+    completion_criteria: str = Field(
+        ...,
+        description="The criteria that will be used to evaluate whether the goal has been completed",
+    )
 
 
 class EvaluatedGoal(Goal):
-    is_complete: bool
-    reasoning: str
-    output: str | None
+    reasoning: str = Field(
+        ..., description="An explanation of why the goal is deemed completed or not"
+    )
+    is_complete: bool = Field(..., description="Whether the goal is complete")
+    output: str | None = Field(
+        ...,
+        description="If the goal description implied a 'return value', and the goal is complete, this field should contain the output",
+    )
+
+    def __repr__(self) -> str:
+        return (
+            f"Goal: {self.goal}\n"
+            f"Motivation: {self.motivation}\n"
+            f"Completion Criteria: {self.completion_criteria}\n"
+            f"Is Complete: {self.is_complete}\n"
+            f"Reasoning: {self.reasoning}\n"
+            f"Output: {self.output}"
+        )
+
+    def __str__(self) -> str:
+        return self.__repr__()
 
     @classmethod
     def from_model(cls, model: EvaluatedGoalModel) -> "EvaluatedGoal":
         return EvaluatedGoal(
-            prompt=model.prompt,
+            goal=model.goal,
             motivation=model.motivation,
             completion_criteria=model.completion_criteria,
             is_complete=model.is_complete,
@@ -32,7 +79,7 @@ def from_model(cls, model: EvaluatedGoalModel) -> "EvaluatedGoal":
 
     def to_model(self, agent_id: str) -> EvaluatedGoalModel:
         return EvaluatedGoalModel(
-            prompt=self.prompt,
+            goal=self.goal,
             motivation=self.motivation,
             completion_criteria=self.completion_criteria,
             is_complete=self.is_complete,
@@ -42,46 +89,119 @@ def to_model(self, agent_id: str) -> EvaluatedGoalModel:
             datetime_=utcnow(),
         )
 
+    def to_goal(self) -> Goal:
+        return Goal(
+            goal=self.goal,
+            motivation=self.motivation,
+            completion_criteria=self.completion_criteria,
+        )
+
 
 class GoalManager:
     def __init__(
         self,
         agent_id: str,
+        high_level_description: str,
+        agent_capabilities: str,
+        retry_limit: int = 3,
+        model: str = DEFAULT_OPENAI_MODEL,
         sqlalchemy_db_url: str | None = None,
     ):
         self.agent_id = agent_id
+        self.high_level_description = high_level_description
+        self.agent_capabilities = agent_capabilities
+        self.retry_limit = retry_limit
+        self.model = model
         self.table_handler = EvaluatedGoalTableHandler(
             agent_id=agent_id,
             sqlalchemy_db_url=sqlalchemy_db_url,
         )
 
-    def get_latest_evaluated_goal_from_memory(self) -> EvaluatedGoal | None:
-        evaluated_goal_model = self.table_handler.get_latest_evaluated_goal()
-        if evaluated_goal_model:
-            return EvaluatedGoal.from_model(model=evaluated_goal_model)
-        return None
+    def get_latest_evaluated_goals_from_memory(self, limit: int) -> list[EvaluatedGoal]:
+        evaluated_goal_models = self.table_handler.get_latest_evaluated_goals(
+            limit=limit
+        )
+        return [EvaluatedGoal.from_model(model) for model in evaluated_goal_models]
 
-    def generate_goal(self) -> Goal:
+    def generate_goal(self, latest_evaluated_goals: list[EvaluatedGoal]) -> Goal:
         """
-        If a goal exists from a previous session, load it and check its status.
-        Otherwise create a new one.
+        Generate a new goal based on the high-level description and the latest
+        evaluated goals.
+
+        TODO support generation of long-horizon goals with a specified
+        completion date, until which the goal's status is 'pending'.
         """
-        # TODO
-        return Goal(
-            prompt="foo",
-            motivation="bar",
-            completion_criteria="baz",
+        parser = PydanticOutputParser(pydantic_object=Goal)
+        prompt = PromptTemplate(
+            template=GENERATE_GOAL_PROMPT_TEMPLATE,
+            input_variables=[
+                "high_level_description",
+                "agent_capabilities",
+                "previous_evaluated_goals",
+            ],
+            partial_variables={"format_instructions": parser.get_format_instructions()},
+        )
+        latest_evaluated_goals_str = self.evaluated_goals_to_str(latest_evaluated_goals)
+        llm = ChatOpenAI(
+            temperature=0,
+            model=self.model,
+            api_key=APIKeys().openai_api_key_secretstr_v1,
+        )
+        chain = prompt | llm | parser
+
+        goal: Goal = chain.invoke(
+            {
+                "high_level_description": self.high_level_description,
+                "agent_capabilities": self.agent_capabilities,
+                "previous_evaluated_goals": latest_evaluated_goals_str,
+            }
         )
+        return goal
+
+    def have_reached_retry_limit(
+        self, latest_evaluated_goals: list[EvaluatedGoal]
+    ) -> bool:
+        if self.retry_limit == 0:
+            return True
+
+        if len(latest_evaluated_goals) < self.retry_limit + 1:
+            return False
+
+        latest_goal = latest_evaluated_goals[0].to_goal()
+        if all(
+            [
+                g.to_goal() == latest_goal
+                for g in latest_evaluated_goals[: self.retry_limit + 1]
+            ]
+        ):
+            return True
+
+        return False
 
     def get_goal(self) -> Goal:
-        if goal := self.get_latest_evaluated_goal_from_memory():
-            if goal.is_complete:
+        """
+        Manage the fetching of goals from memory, and deciding when to generate
+        a new goal vs. retrying an incomplete one.
+
+        TODO add the ability to continue from a previous session if the goal
+        is not complete.
+        """
+        latest_evaluated_goals = self.get_latest_evaluated_goals_from_memory(
+            limit=self.retry_limit
+        )
+        if latest_evaluated_goals:
+            evaluated_goal = latest_evaluated_goals[0]
+            if evaluated_goal.is_complete:
                 # Generate a new goal
-                return self.generate_goal()
+                return self.generate_goal(latest_evaluated_goals)
             else:
-                # Try again
-                return goal
-        return self.generate_goal()
+                # Try again, unless we've reached the retry limit
+                if self.have_reached_retry_limit(latest_evaluated_goals):
+                    return self.generate_goal(latest_evaluated_goals)
+                else:
+                    return evaluated_goal.to_goal()
+
+        return self.generate_goal(latest_evaluated_goals=[])
 
     def evaluate_goal_progress(
         self,
@@ -90,7 +210,7 @@ def evaluate_goal_progress(
     ) -> EvaluatedGoal:
         # TODO
         return EvaluatedGoal(
-            prompt=goal.prompt,
+            goal=goal.goal,
             motivation=goal.motivation,
             completion_criteria=goal.completion_criteria,
             is_complete=False,
@@ -101,3 +221,12 @@ def evaluate_goal_progress(
     def save_evaluated_goal(self, goal: EvaluatedGoal) -> None:
         model = goal.to_model(agent_id=self.agent_id)
         self.table_handler.save_evaluated_goal(model)
+
+    @staticmethod
+    def evaluated_goals_to_str(evaluated_goals: list[EvaluatedGoal]) -> str:
+        goals_str = ""
+        for i, goal in enumerate(evaluated_goals):
+            goals_str += f"## Goal {i+1}:\n{goal}\n"
+            if i < len(evaluated_goals) - 1:
+                goals_str += "\n"
+        return goals_str
diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py
index 050d14b1..a46b9336 100644
--- a/prediction_market_agent/agents/microchain_agent/deploy.py
+++ b/prediction_market_agent/agents/microchain_agent/deploy.py
@@ -52,7 +52,7 @@ def run(
 
         if self.goal_manager:
             goal = self.goal_manager.get_goal()
-            prompt = goal.prompt
+            prompt = goal.goal
         else:
             prompt = None
 
@@ -124,5 +124,10 @@ class DeployableMicrochainModifiableSystemPromptAgent3(
 
 class DeployableMicrochainWithGoalManagerAgent0(DeployableMicrochainAgent):
     task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER
-    goal_manager: GoalManager = GoalManager(agent_id=task_description)
+    goal_manager = GoalManager(
+        agent_id=task_description,
+        high_level_description="foo",  # TODO
+        agent_capabilities="bar",  # TODO
+        retry_limit=3,
+    )
     model = SupportedModel.gpt_4o
diff --git a/prediction_market_agent/db/evaluated_goal_table_handler.py b/prediction_market_agent/db/evaluated_goal_table_handler.py
index 6a0208eb..a95e02c2 100644
--- a/prediction_market_agent/db/evaluated_goal_table_handler.py
+++ b/prediction_market_agent/db/evaluated_goal_table_handler.py
@@ -21,7 +21,7 @@ def __init__(
     def save_evaluated_goal(self, model: EvaluatedGoalModel) -> None:
         self.sql_handler.save_multiple([model])
 
-    def get_latest_evaluated_goal(self) -> EvaluatedGoalModel | None:
+    def get_latest_evaluated_goals(self, limit: int) -> list[EvaluatedGoalModel]:
         column_to_order: str = EvaluatedGoalModel.datetime_.key  # type: ignore
         items: t.Sequence[
             EvaluatedGoalModel
@@ -29,6 +29,6 @@ def get_latest_evaluated_goal(self) -> EvaluatedGoalModel | None:
             query_filters=[col(EvaluatedGoalModel.agent_id) == self.agent_id],
             order_by_column_name=column_to_order,
             order_desc=True,
-            limit=1,
+            limit=limit,
         )
-        return items[0] if items else None
+        return list(items)
diff --git a/prediction_market_agent/db/models.py b/prediction_market_agent/db/models.py
index 7eb09e75..94b5baaf 100644
--- a/prediction_market_agent/db/models.py
+++ b/prediction_market_agent/db/models.py
@@ -41,10 +41,10 @@ class EvaluatedGoalModel(SQLModel, table=True):
     __table_args__ = {"extend_existing": True}
     id: Optional[int] = Field(default=None, primary_key=True)
     agent_id: str  # Per-agent identifier
-    prompt: str
+    goal: str
     motivation: str
     completion_criteria: str
     is_complete: bool
     reasoning: str
-    output: str | None  # TODO or 'learning'?
+    output: str | None
     datetime_: datetime
diff --git a/prediction_market_agent/run_agent.py b/prediction_market_agent/run_agent.py
index 407032ce..29b136de 100644
--- a/prediction_market_agent/run_agent.py
+++ b/prediction_market_agent/run_agent.py
@@ -24,6 +24,7 @@
     DeployableMicrochainModifiableSystemPromptAgent1,
     DeployableMicrochainModifiableSystemPromptAgent2,
     DeployableMicrochainModifiableSystemPromptAgent3,
+    DeployableMicrochainWithGoalManagerAgent0,
 )
 from prediction_market_agent.agents.prophet_agent.deploy import (
     DeployableOlasEmbeddingOAAgent,
@@ -54,6 +55,7 @@ class RunnableAgent(str, Enum):
     microchain_modifiable_system_prompt_1 = "microchain_modifiable_system_prompt_1"
     microchain_modifiable_system_prompt_2 = "microchain_modifiable_system_prompt_2"
     microchain_modifiable_system_prompt_3 = "microchain_modifiable_system_prompt_3"
+    microchain_with_goal_manager_agent0 = "microchain_with_goal_manager_agent0"
     metaculus_bot_tournament_agent = "metaculus_bot_tournament_agent"
     prophet_gpt4o = "prophet_gpt4o"
     prophet_gpt4 = "prophet_gpt4"
@@ -74,6 +76,7 @@ class RunnableAgent(str, Enum):
     RunnableAgent.microchain_modifiable_system_prompt_1: DeployableMicrochainModifiableSystemPromptAgent1,
     RunnableAgent.microchain_modifiable_system_prompt_2: DeployableMicrochainModifiableSystemPromptAgent2,
     RunnableAgent.microchain_modifiable_system_prompt_3: DeployableMicrochainModifiableSystemPromptAgent3,
+    RunnableAgent.microchain_with_goal_manager_agent0: DeployableMicrochainWithGoalManagerAgent0,
     RunnableAgent.social_media: DeployableSocialMediaAgent,
     RunnableAgent.metaculus_bot_tournament_agent: DeployableMetaculusBotTournamentAgent,
     RunnableAgent.prophet_gpt4o: DeployablePredictionProphetGPT4oAgent,
diff --git a/tests/agents/test_goal_manager.py b/tests/agents/test_goal_manager.py
new file mode 100644
index 00000000..a48aefe1
--- /dev/null
+++ b/tests/agents/test_goal_manager.py
@@ -0,0 +1,139 @@
+import pytest
+
+from prediction_market_agent.agents.goal_manager import EvaluatedGoal, GoalManager
+from tests.utils import RUN_PAID_TESTS
+
+
+def test_have_reached_retry_limit() -> None:
+    goal_manager = GoalManager(
+        agent_id="test_agent",
+        high_level_description="foo",
+        agent_capabilities="bar",
+        retry_limit=0,
+    )
+
+    g0 = EvaluatedGoal(
+        goal="goal0",
+        motivation="motivation",
+        completion_criteria="completion_criteria",
+        is_complete=False,
+        reasoning="reasoning",
+        output=None,
+    )
+    g1 = g0.model_copy()
+    g1.goal = "goal1"
+
+    assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[]) is True
+
+    goal_manager.retry_limit = 1
+    assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[]) is False
+    assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0]) is False
+    assert (
+        goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0]) is True
+    )
+
+    goal_manager.retry_limit = 2
+    assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[]) is False
+    assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0]) is False
+    assert (
+        goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0]) is False
+    )
+    assert (
+        goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0, g0])
+        is True
+    )
+    assert (
+        goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0, g1])
+        is False
+    )
+    assert (
+        goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0, g0, g1])
+        is True
+    )
+    assert (
+        goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0, g0, g1])
+        is True
+    )
+
+
+def test_evaluated_goals_to_str() -> None:
+    gs = [
+        EvaluatedGoal(
+            goal="foo0",
+            motivation="bar0",
+            completion_criteria="baz0",
+            is_complete=False,
+            reasoning="qux0",
+            output=None,
+        ),
+        EvaluatedGoal(
+            goal="foo1",
+            motivation="bar1",
+            completion_criteria="baz1",
+            is_complete=True,
+            reasoning="qux1",
+            output="output",
+        ),
+    ]
+    goals_str = GoalManager.evaluated_goals_to_str(gs)
+    assert goals_str == (
+        "## Goal 1:\n"
+        "Goal: foo0\n"
+        "Motivation: bar0\n"
+        "Completion Criteria: baz0\n"
+        "Is Complete: False\n"
+        "Reasoning: qux0\n"
+        "Output: None\n"
+        "\n"
+        "## Goal 2:\n"
+        "Goal: foo1\n"
+        "Motivation: bar1\n"
+        "Completion Criteria: baz1\n"
+        "Is Complete: True\n"
+        "Reasoning: qux1\n"
+        "Output: output\n"
+    )
+
+
+@pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.")
+def test_generate_goal() -> None:
+    goal_manager = GoalManager(
+        agent_id="test_agent",
+        high_level_description="You are a gambler that focuses on cycling races, predominantly the Tour de France.",
+        agent_capabilities=(
+            "- Web search\n"
+            "- Web scraping\n"
+            "- Accurate predictions of the probability of yes/no outcomes for a given event."
+        ),
+    )
+    goal0 = goal_manager.generate_goal(latest_evaluated_goals=[])
+
+    evaluated_goal = EvaluatedGoal(
+        goal="Investigate the top 5 contenders for the Tour de France, make predictions on their chances of overall victory, and compare these against the market odds.",
+        motivation="The Tour de France is a popular race, so markets are likely to have the highest liquidity",
+        completion_criteria="5 contenders identified, predictions made, and compared against market odds",
+        is_complete=False,
+        reasoning="The Tour de France is cancelled this year.",
+        output=None,
+    )
+    goal2 = goal_manager.generate_goal(latest_evaluated_goals=[evaluated_goal])
+
+    # Generates a goal related to the Tour de France
+    assert "Tour de France" in goal0.goal
+
+    # Does not generate a goal related to the Tour de France, based on the
+    # reasoning of the previous evaluated goal
+    assert "Tour de France" not in goal2.goal
+
+
+@pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.")
+def test_evaluate_goal_progress() -> None:
+    goal_manager = GoalManager(
+        agent_id="test_agent",
+        high_level_description="You are a gambler that focuses on cycling races, predominantly the Tour de France.",
+        agent_capabilities=(
+            "- Web search\n"
+            "- Web scraping\n"
+            "- Accurate predictions of the probability of yes/no outcomes for a given event."
+        ),
+    )
diff --git a/tests/db/test_evaluated_goal_table_handler.py b/tests/db/test_evaluated_goal_table_handler.py
index d953171e..b1731816 100644
--- a/tests/db/test_evaluated_goal_table_handler.py
+++ b/tests/db/test_evaluated_goal_table_handler.py
@@ -21,34 +21,94 @@ def table_handler() -> Generator[EvaluatedGoalTableHandler, None, None]:
     yield table_handler
 
 
-def test_save_load_evaluated_goal(table_handler: EvaluatedGoalTableHandler) -> None:
+def test_save_load_evaluated_goal_0(table_handler: EvaluatedGoalTableHandler) -> None:
     evaluated_goal = EvaluatedGoal(
-        prompt="abc",
+        goal="abc",
         motivation="def",
         completion_criteria="ghi",
         is_complete=True,
         reasoning="jkl",
         output="mno",
     )
-    model = evaluated_goal.to_model(agent_id=TEST_AGENT_ID)
-    table_handler.save_evaluated_goal(model=model)
+    table_handler.save_evaluated_goal(
+        model=evaluated_goal.to_model(agent_id=TEST_AGENT_ID)
+    )
 
-    loaded_model = table_handler.get_latest_evaluated_goal()
-    assert loaded_model
-    loaded_evaluated_goal = EvaluatedGoal.from_model(model=loaded_model)
+    loaded_models = table_handler.get_latest_evaluated_goals(limit=1)
+    assert len(loaded_models) == 1
+    loaded_evaluated_goal = EvaluatedGoal.from_model(model=loaded_models[0])
     assert loaded_evaluated_goal == evaluated_goal
 
 
-# TODO
-# def test_load_latest_prompt(memory_prompt_handler: PromptTableHandler) -> None:
-#     prompt_text_first = "prompt_text_first"
-#     prompt_text_second = "prompt_text_second"
+def test_save_load_evaluated_goal_1(table_handler: EvaluatedGoalTableHandler) -> None:
+    evaluated_goal0 = EvaluatedGoal(
+        goal="foo",
+        motivation="foo",
+        completion_criteria="foo",
+        is_complete=True,
+        reasoning="foo",
+        output="foo",
+    )
+    evaluated_goal1 = EvaluatedGoal(
+        goal="bar",
+        motivation="bar",
+        completion_criteria="bar",
+        is_complete=False,
+        reasoning="bar",
+        output="bar",
+    )
+
+    table_handler.save_evaluated_goal(
+        model=evaluated_goal0.to_model(agent_id=TEST_AGENT_ID)
+    )
+    table_handler.save_evaluated_goal(
+        model=evaluated_goal1.to_model(agent_id=TEST_AGENT_ID)
+    )
+
+    loaded_models = table_handler.get_latest_evaluated_goals(limit=1)
+    assert len(loaded_models) == 1
+    loaded_evaluated_goal = EvaluatedGoal.from_model(model=loaded_models[0])
+    assert loaded_evaluated_goal == evaluated_goal1
+
+    for limit in [2, 3]:
+        loaded_models = table_handler.get_latest_evaluated_goals(limit=limit)
+        assert len(loaded_models) == 2
+        # Check LIFO order
+        assert loaded_models[0].datetime_ > loaded_models[1].datetime_
+        assert [EvaluatedGoal.from_model(model) for model in loaded_models] == [
+            evaluated_goal1,
+            evaluated_goal0,
+        ]
+
+
+def test_save_load_evaluated_goal_multiple_agents(
+    table_handler: EvaluatedGoalTableHandler,
+) -> None:
+    evaluated_goal0 = EvaluatedGoal(
+        goal="foo",
+        motivation="foo",
+        completion_criteria="foo",
+        is_complete=True,
+        reasoning="foo",
+        output="foo",
+    )
+    evaluated_goal1 = EvaluatedGoal(
+        goal="bar",
+        motivation="bar",
+        completion_criteria="bar",
+        is_complete=False,
+        reasoning="bar",
+        output="bar",
+    )
 
-#     memory_prompt_handler.save_prompt(prompt_text_first)
-#     memory_prompt_handler.save_prompt(prompt_text_second)
+    table_handler.save_evaluated_goal(
+        model=evaluated_goal0.to_model(agent_id=TEST_AGENT_ID)
+    )
+    table_handler.save_evaluated_goal(
+        model=evaluated_goal1.to_model(agent_id=TEST_AGENT_ID + "1")
+    )
 
-#     # assert latest prompt is there
-#     result = memory_prompt_handler.fetch_latest_prompt()
-#     assert result
-#     # ignore timezone
-#     assert result.prompt == prompt_text_second
+    loaded_models = table_handler.get_latest_evaluated_goals(limit=1)
+    assert len(loaded_models) == 1
+    loaded_evaluated_goal = EvaluatedGoal.from_model(model=loaded_models[0])
+    assert loaded_evaluated_goal == evaluated_goal0

From 19ba6869c92eeec2ca52a3af804d41e81cb77c24 Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Wed, 21 Aug 2024 15:01:13 +0100
Subject: [PATCH 04/20] Add tests for GoalManager.evaluate_goal_progress

---
 .../agents/goal_manager.py                    |  77 ++++++++-
 .../agents/microchain_agent/deploy.py         |   2 +-
 .../agents/microchain_agent/memory.py         |   6 +
 tests/agents/test_goal_manager.py             | 156 +++++++++++++++++-
 tests/test_chat_history.py                    |  14 ++
 5 files changed, 236 insertions(+), 19 deletions(-)

diff --git a/prediction_market_agent/agents/goal_manager.py b/prediction_market_agent/agents/goal_manager.py
index 9475b1b8..b8f39aa3 100644
--- a/prediction_market_agent/agents/goal_manager.py
+++ b/prediction_market_agent/agents/goal_manager.py
@@ -33,6 +33,19 @@
 {format_instructions}
 """
 
+EVALUATE_GOAL_PROGRESS_PROMPT_TEMPLATE = """
+An agent and user are working together to achieve a well defined goal.
+Given their chat history, and the goal definition, evaluate whether the goal has been completed.
+
+[GOAL]
+{goal_prompt}
+
+[CHAT HISTORY]
+{chat_history}
+
+{format_instructions}
+"""
+
 
 class Goal(BaseModel):
     goal: str = Field(..., description="A clear description of the goal")
@@ -42,8 +55,16 @@ class Goal(BaseModel):
         description="The criteria that will be used to evaluate whether the goal has been completed",
     )
 
+    def to_prompt(self) -> str:
+        return (
+            f"{self.goal}"
+            f"\n\n"
+            f"## Motivation\n{self.motivation}"
+            f"## Completion Criteria:\n\n{self.completion_criteria}"
+        )
 
-class EvaluatedGoal(Goal):
+
+class GoalEvaluation(BaseModel):
     reasoning: str = Field(
         ..., description="An explanation of why the goal is deemed completed or not"
     )
@@ -53,7 +74,13 @@ class EvaluatedGoal(Goal):
         description="If the goal description implied a 'return value', and the goal is complete, this field should contain the output",
     )
 
-    def __repr__(self) -> str:
+
+class EvaluatedGoal(Goal):
+    reasoning: str
+    is_complete: bool
+    output: str | None
+
+    def __str__(self) -> str:
         return (
             f"Goal: {self.goal}\n"
             f"Motivation: {self.motivation}\n"
@@ -63,9 +90,6 @@ def __repr__(self) -> str:
             f"Output: {self.output}"
         )
 
-    def __str__(self) -> str:
-        return self.__repr__()
-
     @classmethod
     def from_model(cls, model: EvaluatedGoalModel) -> "EvaluatedGoal":
         return EvaluatedGoal(
@@ -203,19 +227,54 @@ def get_goal(self) -> Goal:
 
         return self.generate_goal(latest_evaluated_goals=[])
 
+    @classmethod
+    def get_chat_history_after_goal_prompt(
+        cls, goal: Goal, chat_history: ChatHistory
+    ) -> ChatHistory:
+        """
+        Return the chat history after the goal prompt, or None if the goal
+        prompt is not found.
+        """
+        for i, chat_message in enumerate(chat_history.chat_messages):
+            if chat_message.content == goal.to_prompt():
+                return ChatHistory(chat_messages=chat_history.chat_messages[i + 1 :])
+        raise ValueError("Goal prompt not found in chat history")
+
     def evaluate_goal_progress(
         self,
         goal: Goal,
         chat_history: ChatHistory,
     ) -> EvaluatedGoal:
-        # TODO
+        relevant_chat_history = self.get_chat_history_after_goal_prompt(
+            goal=goal,
+            chat_history=chat_history,
+        )
+        parser = PydanticOutputParser(pydantic_object=GoalEvaluation)
+        prompt = PromptTemplate(
+            template=EVALUATE_GOAL_PROGRESS_PROMPT_TEMPLATE,
+            input_variables=["goal_prompt", "chat_history"],
+            partial_variables={"format_instructions": parser.get_format_instructions()},
+        )
+        llm = ChatOpenAI(
+            temperature=0,
+            model=self.model,
+            api_key=APIKeys().openai_api_key_secretstr_v1,
+        )
+        chain = prompt | llm | parser
+
+        goal_evaluation: GoalEvaluation = chain.invoke(
+            {
+                "goal_prompt": goal.to_prompt(),
+                "chat_history": str(relevant_chat_history),
+            }
+        )
         return EvaluatedGoal(
             goal=goal.goal,
             motivation=goal.motivation,
             completion_criteria=goal.completion_criteria,
-            is_complete=False,
-            reasoning="",
-            output="",
+            is_complete=goal_evaluation.is_complete,
+            reasoning=goal_evaluation.reasoning,
+            output=goal_evaluation.output,
         )
 
     def save_evaluated_goal(self, goal: EvaluatedGoal) -> None:
diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py
index a46b9336..348bce1c 100644
--- a/prediction_market_agent/agents/microchain_agent/deploy.py
+++ b/prediction_market_agent/agents/microchain_agent/deploy.py
@@ -52,7 +52,7 @@ def run(
 
         if self.goal_manager:
             goal = self.goal_manager.get_goal()
-            prompt = goal.goal
+            prompt = goal.to_prompt()
         else:
             prompt = None
 
diff --git a/prediction_market_agent/agents/microchain_agent/memory.py b/prediction_market_agent/agents/microchain_agent/memory.py
index 27788466..53f667bb 100644
--- a/prediction_market_agent/agents/microchain_agent/memory.py
+++ b/prediction_market_agent/agents/microchain_agent/memory.py
@@ -23,6 +23,9 @@ class ChatMessage(BaseModel):
     def is_system_message(self) -> bool:
         return self.role == "system"
 
+    def __str__(self) -> str:
+        return f"{self.role}: {self.content}"
+
 
 class DatedChatMessage(ChatMessage):
     datetime_: datetime
@@ -98,6 +101,9 @@ def iterations(self) -> int:
         else:
             return (self.num_messages - 1) // 2
 
+    def __str__(self) -> str:
+        return "\n".join(str(m) for m in self.chat_messages)
+
 
 class DatedChatHistory(ChatHistory):
     chat_messages: Sequence[DatedChatMessage]
diff --git a/tests/agents/test_goal_manager.py b/tests/agents/test_goal_manager.py
index a48aefe1..9fabf4fc 100644
--- a/tests/agents/test_goal_manager.py
+++ b/tests/agents/test_goal_manager.py
@@ -1,6 +1,11 @@
 import pytest
 
-from prediction_market_agent.agents.goal_manager import EvaluatedGoal, GoalManager
+from prediction_market_agent.agents.goal_manager import EvaluatedGoal, Goal, GoalManager
+from prediction_market_agent.agents.microchain_agent.memory import (
+    ChatHistory,
+    ChatMessage,
+)
+from prediction_market_agent.utils import DEFAULT_OPENAI_MODEL
 from tests.utils import RUN_PAID_TESTS
 
 
@@ -105,6 +110,7 @@ def test_generate_goal() -> None:
             "- Web scraping\n"
             "- Accurate predictions of the probability of yes/no outcomes for a given event."
         ),
+        model=DEFAULT_OPENAI_MODEL,
     )
     goal0 = goal_manager.generate_goal(latest_evaluated_goals=[])
 
@@ -126,14 +132,146 @@ def test_generate_goal() -> None:
     assert "Tour de France" not in goal2.goal
 
 
+def test_get_chat_history_after_goal_prompt() -> None:
+    goal = Goal(goal="Foo", motivation="Bar", completion_criteria="Baz")
+    assistant_message = ChatMessage(role="assistant", content="The answer is 42.")
+    chat_history = ChatHistory(
+        chat_messages=[
+            ChatMessage(role="system", content="You are a helpful assistant."),
+            ChatMessage(role="user", content=goal.to_prompt()),
+            assistant_message,
+        ]
+    )
+    assert GoalManager.get_chat_history_after_goal_prompt(
+        goal=goal, chat_history=chat_history
+    ) == ChatHistory(chat_messages=[assistant_message])
+
+
+def test_get_chat_history_after_goal_prompt_error() -> None:
+    goal = Goal(goal="Foo", motivation="Bar", completion_criteria="Baz")
+    assistant_message = ChatMessage(role="assistant", content="The answer is 42.")
+    chat_history = ChatHistory(
+        chat_messages=[
+            ChatMessage(role="system", content="You are a helpful assistant."),
+        ]
+    )
+    try:
+        GoalManager.get_chat_history_after_goal_prompt(
+            goal=goal, chat_history=chat_history
+        )
+    except ValueError as e:
+        assert str(e) == "Goal prompt not found in chat history"
+
+
 @pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.")
-def test_evaluate_goal_progress() -> None:
+def test_evaluate_goal_progress_0() -> None:
+    """
+    Test for the case where the evaluated goal:
+    - is completed
+    - should have a 'None' output.
+    """
     goal_manager = GoalManager(
-        agent_id="test_agent",
-        high_level_description="You are a gambler that focuses on cycling races, predominantly the Tour de France.",
-        agent_capabilities=(
-            "- Web search\n"
-            "- Web scraping\n"
-            "- Accurate predictions of the probability of yes/no outcomes for a given event."
-        ),
+        agent_id="",  # Not relevant to test
+        high_level_description="",  # Not relevant to test
+        agent_capabilities="",  # Not relevant to test
+        model=DEFAULT_OPENAI_MODEL,
+    )
+    goal = Goal(
+        goal="If last year's TdF winner is competing this year, place a small bet on them.",
+        motivation="The winner of the last Tour de France is likely to be in good form.",
+        completion_criteria="If the winner is competing, place a small bet, otherwise do nothing.",
+    )
+    chat_history0 = ChatHistory(
+        chat_messages=[
+            ChatMessage(role="system", content="You are a helpful assistant."),
+            ChatMessage(role="user", content=goal.to_prompt()),
+            ChatMessage(
+                role="assistant",
+                content="Searching the web... Yes the winner, Tadej Pogacar, is competing.",
+            ),
+            ChatMessage(role="user", content="The reasoning has been recorded."),
+            ChatMessage(
+                role="assistant",
+                content="The market id is '0x123' for the TdF winner. Placing bet of 0.01 USD on Tadej Pogacar",
+            ),
+            ChatMessage(role="user", content="Bet successfully placed."),
+        ]
+    )
+    evaluated_goal = goal_manager.evaluate_goal_progress(
+        goal=goal,
+        chat_history=chat_history0,
+    )
+    assert evaluated_goal.is_complete is True
+    assert evaluated_goal.output == None
+
+
+@pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.")
+def test_evaluate_goal_progress_1() -> None:
+    """
+    Test for the case where the evaluated goal:
+    - is completed
+    - should have a non-'None' output.
+    """
+    goal_manager = GoalManager(
+        agent_id="",  # Not relevant to test
+        high_level_description="",  # Not relevant to test
+        agent_capabilities="",  # Not relevant to test
+        model=DEFAULT_OPENAI_MODEL,
+    )
+    goal = Goal(
+        goal="If last year's TdF winner is competing this year, get their probability of winning.",
+        motivation="The winner of the last Tour de France is likely to be in good form.",
+        completion_criteria="Return the name and odds of last year's winner for this year's TdF.",
+    )
+    chat_history0 = ChatHistory(
+        chat_messages=[
+            ChatMessage(role="system", content="You are a helpful assistant."),
+            ChatMessage(role="user", content=goal.to_prompt()),
+            ChatMessage(
+                role="assistant",
+                content="Searching the web... Yes the winner, Tadej Pogacar, is competing. His winning probability: p_yes=0.27",
+            ),
+            ChatMessage(role="user", content="The reasoning has been recorded."),
+        ]
+    )
+    evaluated_goal = goal_manager.evaluate_goal_progress(
+        goal=goal,
+        chat_history=chat_history0,
+    )
+    assert evaluated_goal.is_complete is True
+    assert "Tadej Pogacar" in evaluated_goal.output
+    assert "0.27" in evaluated_goal.output
+
+
+@pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.")
+def test_evaluate_goal_progress_2() -> None:
+    """
+    Test for the case where the evaluated goal is not completed
+    """
+    goal_manager = GoalManager(
+        agent_id="",  # Not relevant to test
+        high_level_description="",  # Not relevant to test
+        agent_capabilities="",  # Not relevant to test
+        model=DEFAULT_OPENAI_MODEL,
+    )
+    goal = Goal(
+        goal="If last year's TdF winner is competing this year, get their probability of winning.",
+        motivation="The winner of the last Tour de France is likely to be in good form.",
+        completion_criteria="Return the name and odds of last year's winner for this year's TdF.",
+    )
+    chat_history0 = ChatHistory(
+        chat_messages=[
+            ChatMessage(role="system", content="You are a helpful assistant."),
+            ChatMessage(role="user", content=goal.to_prompt()),
+            ChatMessage(
+                role="assistant",
+                content="Uhoh, I've hit some exception and need to quit",
+            ),
+        ]
+    )
+    evaluated_goal = goal_manager.evaluate_goal_progress(
+        goal=goal,
+        chat_history=chat_history0,
     )
+    assert evaluated_goal.is_complete is False
+    assert evaluated_goal.output == None
diff --git a/tests/test_chat_history.py b/tests/test_chat_history.py
index 103861cd..85c40822 100644
--- a/tests/test_chat_history.py
+++ b/tests/test_chat_history.py
@@ -5,6 +5,8 @@
 from prediction_market_agent_tooling.tools.utils import utcnow
 
 from prediction_market_agent.agents.microchain_agent.memory import (
+    ChatHistory,
+    ChatMessage,
     DatedChatHistory,
     DatedChatMessage,
 )
@@ -73,3 +75,15 @@ def test_save_to_and_load_from_memory(
         new_chat_history.to_undated_chat_history()
         == chat_history.to_undated_chat_history()
     )
+
+
+def test_stringified_chat_history() -> None:
+    chat_history = ChatHistory(
+        chat_messages=[
+            ChatMessage(role="system", content="You are a helpful assistant."),
+            ChatMessage(role="user", content="What is the weather like today?"),
+        ]
+    )
+    assert str(chat_history) == (
+        "system: You are a helpful assistant.\nuser: What is the weather like today?"
+    )

From 8c0413755cf47b0938c601180291e618839b2b45 Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Wed, 21 Aug 2024 15:28:25 +0100
Subject: [PATCH 05/20] add evaluated goal to chat history

---
 .../agents/microchain_agent/deploy.py         | 30 ++++++++-----------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py
index 348bce1c..128f88c8 100644
--- a/prediction_market_agent/agents/microchain_agent/deploy.py
+++ b/prediction_market_agent/agents/microchain_agent/deploy.py
@@ -3,6 +3,7 @@
 from prediction_market_agent_tooling.markets.markets import MarketType
 
 from prediction_market_agent.agents.goal_manager import GoalManager
+from prediction_market_agent.agents.microchain_agent.memory import ChatMessage
 from prediction_market_agent.agents.microchain_agent.microchain_agent import (
     SupportedModel,
     build_agent,
@@ -74,6 +75,18 @@ def run(
 
         agent.run(self.n_iterations)
 
+        if self.goal_manager:
+            evaluated_goal = self.goal_manager.evaluate_goal_progress(
+                goal=goal, chat_history=agent.history
+            )
+            self.goal_manager.save_evaluated_goal(evaluated_goal)
+            agent.history.append(
+                ChatMessage(
+                    role="user",
+                    content=str(evaluated_goal),
+                ).model_dump()
+            )
+
         save_agent_history(
             agent=agent,
             long_term_memory=long_term_memory,
@@ -82,12 +95,6 @@ def run(
         if agent.system_prompt != initial_formatted_system_prompt:
             prompt_handler.save_prompt(get_editable_prompt_from_agent(agent))
 
-        if self.goal_manager:
-            evaluated_goal = self.goal_manager.evaluate_goal_progress(
-                goal=goal, chat_history=agent.history
-            )
-            self.goal_manager.save_evaluated_goal(evaluated_goal)
-
 
 class DeployableMicrochainModifiableSystemPromptAgentAbstract(
     DeployableMicrochainAgent
@@ -120,14 +127,3 @@ class DeployableMicrochainModifiableSystemPromptAgent3(
 ):
     task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_LEARNING_3
     model = SupportedModel.llama_31_instruct
-
-
-class DeployableMicrochainWithGoalManagerAgent0(DeployableMicrochainAgent):
-    task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER
-    goal_manager = GoalManager(
-        agent_id=task_description,
-        high_level_description="foo",  # TODO
-        agent_capabilities="bar",  # TODO
-        retry_limit=3,
-    )
-    model = SupportedModel.gpt_4o

From a75d284c6ea3c13141d463b5d0d07b0350493b34 Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Wed, 21 Aug 2024 17:40:42 +0100
Subject: [PATCH 06/20] tweaks

---
 poetry.lock                                   | 24 +++++++------
 .../agents/goal_manager.py                    | 35 ++++++++++++-------
 .../agents/microchain_agent/deploy.py         | 34 +++++++++++++++---
 .../microchain_agent/microchain_agent.py      |  2 +-
 prediction_market_agent/agents/utils.py       |  4 +--
 .../db/evaluated_goal_table_handler.py        |  9 +++++
 pyproject.toml                                |  2 +-
 scripts/delete_agent_db_entries.py            | 12 +++++++
 tests/agents/test_goal_manager.py             | 21 +++++------
 9 files changed, 100 insertions(+), 43 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index e3d50bed..8f277b5a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -5125,19 +5125,23 @@ psutil = "*"
 
 [[package]]
 name = "microchain-python"
-version = "0.4.4"
+version = "0.4.3"
 description = ""
 optional = false
 python-versions = "*"
-files = [
-    {file = "microchain_python-0.4.4-py3-none-any.whl", hash = "sha256:b27f7ce318a94342b094d49a0509e5c208166fa1fad4b87b69db75a5fe1c4a9e"},
-    {file = "microchain_python-0.4.4.tar.gz", hash = "sha256:33d2f18856988433f17b512ecc217de9d9fdd97ad50adac3f70cfcee7e490d67"},
-]
+files = []
+develop = false
 
 [package.dependencies]
 pydantic = ">=2,<3"
 termcolor = "2.4.0"
 
+[package.source]
+type = "git"
+url = "https://github.com/galatolofederico/microchain.git"
+reference = "56db91de72b6466080c26434631808cc20af670c"
+resolved_reference = "56db91de72b6466080c26434631808cc20af670c"
+
 [[package]]
 name = "mmh3"
 version = "4.1.0"
@@ -5562,13 +5566,13 @@ files = [
 
 [[package]]
 name = "narwhals"
-version = "1.5.0"
+version = "1.5.2"
 description = "Extremely lightweight compatibility layer between dataframe libraries"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "narwhals-1.5.0-py3-none-any.whl", hash = "sha256:6b63e4e3fd494fc201395e0f3c86cef32f4970f73fb15d5502a15d479f848023"},
-    {file = "narwhals-1.5.0.tar.gz", hash = "sha256:88c5cb329d7350c4cd688188068636f8fef5b385b31377b33d92a00ebd8d951b"},
+    {file = "narwhals-1.5.2-py3-none-any.whl", hash = "sha256:431d7fdca6104a8b6a72254e31ee6630b526c03544df8e338b3249ca9160f5d2"},
+    {file = "narwhals-1.5.2.tar.gz", hash = "sha256:4d8ce16ecf431a544c1740adbb523477565c9614e5e330ce444f3a432ff93d7f"},
 ]
 
 [package.extras]
@@ -10716,4 +10720,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
 [metadata]
 lock-version = "2.0"
 python-versions = "~3.10.0"
-content-hash = "3af35337fac84c3278d1ebee06cf76b7ed5c3e2de77df9361bd9a7ce9a051a18"
+content-hash = "48447e694cc1d45cd5c510d3c017b031bf6b8cd24571429f07900400eea40f28"
diff --git a/prediction_market_agent/agents/goal_manager.py b/prediction_market_agent/agents/goal_manager.py
index b8f39aa3..9392e5db 100644
--- a/prediction_market_agent/agents/goal_manager.py
+++ b/prediction_market_agent/agents/goal_manager.py
@@ -17,11 +17,12 @@
 
 The goal should satisfy the following:
 - have a narrow focus
+- be completable immediately, within a single session
 - be realistically achievable given the agen't specific capabilities
-- not be contingent on external factors that are out of the agent's control
 - have a clear motivation and completion criteria
 - advance the aims of the agent
 - balance the need for exploration and exploitation
+- not be contingent on external factors that are out of the agent's control
 
 [HIGH LEVEL DESCRIPTION]
 {high_level_description}
@@ -57,10 +58,10 @@ class Goal(BaseModel):
 
     def to_prompt(self) -> str:
         return (
-            f"{self.goal}"
-            f"\n\n"
-            f"## Motivation\n{self.motivation}"
-            f"## Completion Criteria:\n\n{self.completion_criteria}"
+            f"# Goal:\n"
+            f"{self.goal}\n\n"
+            f"## Motivation:\n{self.motivation}\n\n"
+            f"## Completion Criteria:\n{self.completion_criteria}"
         )
 
 
@@ -74,6 +75,13 @@ class GoalEvaluation(BaseModel):
         description="If the goal description implied a 'return value', and the goal is complete, this field should contain the output",
     )
 
+    def __str__(self) -> str:
+        return (
+            f"Is Complete: {self.is_complete}\n"
+            f"Reasoning: {self.reasoning}\n"
+            f"Output: {self.output}"
+        )
+
 
 class EvaluatedGoal(Goal):
     reasoning: str
@@ -244,7 +252,7 @@ def evaluate_goal_progress(
         self,
         goal: Goal,
         chat_history: ChatHistory,
-    ) -> EvaluatedGoal:
+    ) -> GoalEvaluation:
         relevant_chat_history = self.get_chat_history_after_goal_prompt(
             goal=goal,
             chat_history=chat_history,
@@ -268,17 +276,18 @@ def evaluate_goal_progress(
                 "chat_history": str(relevant_chat_history),
             }
         )
-        return EvaluatedGoal(
+        return goal_evaluation
+
+    def save_evaluated_goal(self, goal: Goal, evaluation: GoalEvaluation) -> None:
+        evaluated_goal = EvaluatedGoal(
             goal=goal.goal,
             motivation=goal.motivation,
             completion_criteria=goal.completion_criteria,
-            is_complete=goal_evaluation.is_complete,
-            reasoning=goal_evaluation.reasoning,
-            output=goal_evaluation.output,
+            is_complete=evaluation.is_complete,
+            reasoning=evaluation.reasoning,
+            output=evaluation.output,
         )
-
-    def save_evaluated_goal(self, goal: EvaluatedGoal) -> None:
-        model = goal.to_model(agent_id=self.agent_id)
+        model = evaluated_goal.to_model(agent_id=self.agent_id)
         self.table_handler.save_evaluated_goal(model)
 
     @staticmethod
diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py
index 128f88c8..ae983c01 100644
--- a/prediction_market_agent/agents/microchain_agent/deploy.py
+++ b/prediction_market_agent/agents/microchain_agent/deploy.py
@@ -3,7 +3,10 @@
 from prediction_market_agent_tooling.markets.markets import MarketType
 
 from prediction_market_agent.agents.goal_manager import GoalManager
-from prediction_market_agent.agents.microchain_agent.memory import ChatMessage
+from prediction_market_agent.agents.microchain_agent.memory import (
+    ChatHistory,
+    ChatMessage,
+)
 from prediction_market_agent.agents.microchain_agent.microchain_agent import (
     SupportedModel,
     build_agent,
@@ -76,14 +79,15 @@ def run(
         agent.run(self.n_iterations)
 
         if self.goal_manager:
-            evaluated_goal = self.goal_manager.evaluate_goal_progress(
-                goal=goal, chat_history=agent.history
+            goal_evaluation = self.goal_manager.evaluate_goal_progress(
+                goal=goal,
+                chat_history=ChatHistory.from_list_of_dicts(agent.history),
             )
-            self.goal_manager.save_evaluated_goal(evaluated_goal)
+            self.goal_manager.save_evaluated_goal(goal=goal, evaluation=goal_evaluation)
             agent.history.append(
                 ChatMessage(
                     role="user",
-                    content=str(evaluated_goal),
+                    content=str(f"# Goal evaluation\n{goal_evaluation}"),
                 ).model_dump()
             )
 
@@ -127,3 +131,23 @@ class DeployableMicrochainModifiableSystemPromptAgent3(
 ):
     task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_LEARNING_3
     model = SupportedModel.llama_31_instruct
+
+
+class DeployableMicrochainWithGoalManagerAgent0(DeployableMicrochainAgent):
+    task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER
+    model = SupportedModel.gpt_4o
+    goal_manager = GoalManager(
+        agent_id=task_description,
+        high_level_description="You are a trader agent in prediction markets to maximise your profit.",
+        agent_capabilities=(
+            "You are able to:"
+            "\n- List all binary markets that can be traded."
+            "\n- List the current outcome probabilities for each open market."
+            "\n- Predict the outcome probability for a market."
+            "\n- Buy, sell and hold outcome tokens in a market."
+            "\n- Query your wallet balance, and the positions you hold in open markets."
+            "\n- Query the past bets you've made, and their outcomes."
+        ),
+        retry_limit=3,
+    )
+    n_iterations = 100
diff --git a/prediction_market_agent/agents/microchain_agent/microchain_agent.py b/prediction_market_agent/agents/microchain_agent/microchain_agent.py
index 75bb68ab..aeb96c71 100644
--- a/prediction_market_agent/agents/microchain_agent/microchain_agent.py
+++ b/prediction_market_agent/agents/microchain_agent/microchain_agent.py
@@ -186,7 +186,6 @@ def step_end_callback(agent: Agent, step_output: StepOutput) -> None:
         llm=LLM(generator=generator),
         engine=engine,
         on_iteration_step=on_iteration_step,
-        prompt=prompt,
     )
 
     for f in build_agent_functions(
@@ -205,6 +204,7 @@ def step_end_callback(agent: Agent, step_output: StepOutput) -> None:
     agent.system_prompt = unformatted_system_prompt.format(
         engine_help=agent.engine.help
     )
+    agent.prompt = prompt
     if bootstrap:
         agent.bootstrap = [bootstrap]
     return agent
diff --git a/prediction_market_agent/agents/utils.py b/prediction_market_agent/agents/utils.py
index ac3d55a7..d7fcf4a7 100644
--- a/prediction_market_agent/agents/utils.py
+++ b/prediction_market_agent/agents/utils.py
@@ -29,9 +29,7 @@ class AgentIdentifier(str, Enum):
     MICROCHAIN_AGENT_OMEN_LEARNING_2 = "general-agent-2"
     MICROCHAIN_AGENT_OMEN_LEARNING_3 = "general-agent-3"
     MICROCHAIN_AGENT_STREAMLIT = "microchain-streamlit-app"
-    MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER = (
-        "microchain-agent-deployment-omen_with_goal_manager"
-    )
+    MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER = "general-agent-4-with-goal-manager"
 
 
 MEMORIES_TO_LEARNINGS_TEMPLATE = """
diff --git a/prediction_market_agent/db/evaluated_goal_table_handler.py b/prediction_market_agent/db/evaluated_goal_table_handler.py
index a95e02c2..e2f41abc 100644
--- a/prediction_market_agent/db/evaluated_goal_table_handler.py
+++ b/prediction_market_agent/db/evaluated_goal_table_handler.py
@@ -32,3 +32,12 @@ def get_latest_evaluated_goals(self, limit: int) -> list[EvaluatedGoalModel]:
             limit=limit,
         )
         return list(items)
+
+    def delete_all_evaluated_goals(self) -> None:
+        """
+        Delete all evaluated goals with `agent_id`
+        """
+        self.sql_handler.delete_all_entries(
+            col_name=EvaluatedGoalModel.agent_id.key,  # type: ignore
+            col_value=self.agent_id,
+        )
diff --git a/pyproject.toml b/pyproject.toml
index 6c77b063..c71714b8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,7 +40,7 @@ autoflake = "^2.2.1"
 isort = "^5.13.2"
 markdownify = "^0.11.6"
 tavily-python = "^0.3.9"
-microchain-python = "^0.4.4"
+microchain-python = { git = "https://github.com/galatolofederico/microchain.git", rev = "56db91de72b6466080c26434631808cc20af670c" }
 pysqlite3-binary = {version="^0.5.2.post3", markers = "sys_platform == 'linux'"}
 psycopg2-binary = "^2.9.9"
 sqlmodel = "^0.0.21"
diff --git a/scripts/delete_agent_db_entries.py b/scripts/delete_agent_db_entries.py
index fa58f0d0..3c14dcc5 100644
--- a/scripts/delete_agent_db_entries.py
+++ b/scripts/delete_agent_db_entries.py
@@ -1,6 +1,9 @@
 import typer
 
 from prediction_market_agent.agents.utils import AgentIdentifier
+from prediction_market_agent.db.evaluated_goal_table_handler import (
+    EvaluatedGoalTableHandler,
+)
 from prediction_market_agent.db.long_term_memory_table_handler import (
     LongTermMemoryTableHandler,
 )
@@ -11,6 +14,7 @@ def main(
     session_id: AgentIdentifier,
     delete_memories: bool = True,
     delete_prompts: bool = True,
+    delete_goals: bool = True,
 ) -> None:
     """
     Delete all memories and prompts for a given agent, defined by the session_id.
@@ -31,6 +35,14 @@ def main(
         else:
             print("Memory entries successfully deleted.")
 
+    if delete_goals:
+        evaluated_goal_table_handler = EvaluatedGoalTableHandler(agent_id=session_id)
+        evaluated_goal_table_handler.delete_all_evaluated_goals()
+        if len(evaluated_goal_table_handler.get_latest_evaluated_goals(limit=1)) != 0:
+            raise Exception("Evaluated goal entries were not deleted.")
+        else:
+            print("Evaluated goal entries successfully deleted.")
+
 
 if __name__ == "__main__":
     typer.run(main)
diff --git a/tests/agents/test_goal_manager.py b/tests/agents/test_goal_manager.py
index 9fabf4fc..2eba998d 100644
--- a/tests/agents/test_goal_manager.py
+++ b/tests/agents/test_goal_manager.py
@@ -197,12 +197,12 @@ def test_evaluate_goal_progress_0() -> None:
             ChatMessage(role="user", content="Bet successfully placed."),
         ]
     )
-    evaluated_goal = goal_manager.evaluate_goal_progress(
+    goal_evaluation = goal_manager.evaluate_goal_progress(
         goal=goal,
         chat_history=chat_history0,
     )
-    assert evaluated_goal.is_complete is True
-    assert evaluated_goal.output == None
+    assert goal_evaluation.is_complete is True
+    assert goal_evaluation.output == None
 
 
 @pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.")
@@ -234,13 +234,14 @@ def test_evaluate_goal_progress_1() -> None:
             ChatMessage(role="user", content="The reasoning has been recorded."),
         ]
     )
-    evaluated_goal = goal_manager.evaluate_goal_progress(
+    goal_evaluation = goal_manager.evaluate_goal_progress(
         goal=goal,
         chat_history=chat_history0,
     )
-    assert evaluated_goal.is_complete is True
-    assert "Tadej Pogacar" in evaluated_goal.output
-    assert "0.27" in evaluated_goal.output
+    assert goal_evaluation.is_complete is True
+    assert goal_evaluation.output is not None
+    assert "Tadej Pogacar" in goal_evaluation.output
+    assert "0.27" in goal_evaluation.output
 
 
 @pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.")
@@ -269,9 +270,9 @@ def test_evaluate_goal_progress_2() -> None:
             ),
         ]
     )
-    evaluated_goal = goal_manager.evaluate_goal_progress(
+    goal_evaluation = goal_manager.evaluate_goal_progress(
         goal=goal,
         chat_history=chat_history0,
     )
-    assert evaluated_goal.is_complete is False
-    assert evaluated_goal.output == None
+    assert goal_evaluation.is_complete is False
+    assert goal_evaluation.output == None

From e0e25937b3af7bb2c276e7dae00780fc3ed3ea1c Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Wed, 21 Aug 2024 18:21:09 +0100
Subject: [PATCH 07/20] remove n_iterations override

---
 prediction_market_agent/agents/microchain_agent/deploy.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py
index ae983c01..48350d50 100644
--- a/prediction_market_agent/agents/microchain_agent/deploy.py
+++ b/prediction_market_agent/agents/microchain_agent/deploy.py
@@ -150,4 +150,3 @@ class DeployableMicrochainWithGoalManagerAgent0(DeployableMicrochainAgent):
         ),
         retry_limit=3,
     )
-    n_iterations = 100

From 5fe023596d3209691a757927258396e0a10f6dbb Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Wed, 21 Aug 2024 18:51:32 +0100
Subject: [PATCH 08/20] Use local db for test

---
 tests/agents/test_goal_manager.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/agents/test_goal_manager.py b/tests/agents/test_goal_manager.py
index 2eba998d..6e6f5f21 100644
--- a/tests/agents/test_goal_manager.py
+++ b/tests/agents/test_goal_manager.py
@@ -8,6 +8,8 @@
 from prediction_market_agent.utils import DEFAULT_OPENAI_MODEL
 from tests.utils import RUN_PAID_TESTS
 
+SQLITE_DB_URL = "sqlite://"
+
 
 def test_have_reached_retry_limit() -> None:
     goal_manager = GoalManager(
@@ -111,6 +113,7 @@ def test_generate_goal() -> None:
             "- Accurate predictions of the probability of yes/no outcomes for a given event."
         ),
         model=DEFAULT_OPENAI_MODEL,
+        sqlalchemy_db_url=SQLITE_DB_URL,
     )
     goal0 = goal_manager.generate_goal(latest_evaluated_goals=[])
 
@@ -175,6 +178,7 @@ def test_evaluate_goal_progress_0() -> None:
         high_level_description="",  # Not relevant to test
         agent_capabilities="",  # Not relevant to test
         model=DEFAULT_OPENAI_MODEL,
+        sqlalchemy_db_url=SQLITE_DB_URL,
     )
     goal = Goal(
         goal="If last year's TdF winner is competing this year, place a small bet on them.",
@@ -217,6 +221,7 @@ def test_evaluate_goal_progress_1() -> None:
         high_level_description="",  # Not relevant to test
         agent_capabilities="",  # Not relevant to test
         model=DEFAULT_OPENAI_MODEL,
+        sqlalchemy_db_url=SQLITE_DB_URL,
     )
     goal = Goal(
         goal="If last year's TdF winner is competing this year, get their probability of winning.",
@@ -254,6 +259,7 @@ def test_evaluate_goal_progress_2() -> None:
         high_level_description="",  # Not relevant to test
         agent_capabilities="",  # Not relevant to test
         model=DEFAULT_OPENAI_MODEL,
+        sqlalchemy_db_url=SQLITE_DB_URL,
     )
     goal = Goal(
         goal="If last year's TdF winner is competing this year, get their probability of winning.",

From e2b7760d4e3477c37e637f12abb6b1ba118803e2 Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Wed, 21 Aug 2024 22:16:02 +0100
Subject: [PATCH 09/20] Fixed test

---
 tests/agents/test_goal_manager.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/agents/test_goal_manager.py b/tests/agents/test_goal_manager.py
index 6e6f5f21..9a7d084c 100644
--- a/tests/agents/test_goal_manager.py
+++ b/tests/agents/test_goal_manager.py
@@ -17,6 +17,7 @@ def test_have_reached_retry_limit() -> None:
         high_level_description="foo",
         agent_capabilities="bar",
         retry_limit=0,
+        sqlalchemy_db_url=SQLITE_DB_URL,
     )
 
     g0 = EvaluatedGoal(

From 8df7f4028dd456ae4cf7e38e8dfd4d1196015aab Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Wed, 21 Aug 2024 22:23:49 +0100
Subject: [PATCH 10/20] coderabbit suggestions

---
 prediction_market_agent/agents/goal_manager.py | 13 ++++---------
 tests/agents/test_goal_manager.py              |  5 ++---
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/prediction_market_agent/agents/goal_manager.py b/prediction_market_agent/agents/goal_manager.py
index 9392e5db..d26892a1 100644
--- a/prediction_market_agent/agents/goal_manager.py
+++ b/prediction_market_agent/agents/goal_manager.py
@@ -200,15 +200,10 @@ def have_reached_retry_limit(
             return False
 
         latest_goal = latest_evaluated_goals[0].to_goal()
-        if all(
-            [
-                g.to_goal() == latest_goal
-                for g in latest_evaluated_goals[: self.retry_limit + 1]
-            ]
-        ):
-            return True
-
-        return False
+        return all(
+            g.to_goal() == latest_goal
+            for g in latest_evaluated_goals[: self.retry_limit + 1]
+        )
 
     def get_goal(self) -> Goal:
         """
diff --git a/tests/agents/test_goal_manager.py b/tests/agents/test_goal_manager.py
index 9a7d084c..7db1ee3c 100644
--- a/tests/agents/test_goal_manager.py
+++ b/tests/agents/test_goal_manager.py
@@ -153,7 +153,6 @@ def test_get_chat_history_after_goal_prompt() -> None:
 
 def test_get_chat_history_after_goal_prompt_error() -> None:
     goal = Goal(goal="Foo", motivation="Bar", completion_criteria="Baz")
-    assistant_message = ChatMessage(role="assistant", content="The answer is 42.")
     chat_history = ChatHistory(
         chat_messages=[
             ChatMessage(role="system", content="You are a helpful assistant."),
@@ -207,7 +206,7 @@ def test_evaluate_goal_progress_0() -> None:
         chat_history=chat_history0,
     )
     assert goal_evaluation.is_complete is True
-    assert goal_evaluation.output == None
+    assert goal_evaluation.output is None
 
 
 @pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.")
@@ -282,4 +281,4 @@ def test_evaluate_goal_progress_2() -> None:
         chat_history=chat_history0,
     )
     assert goal_evaluation.is_complete is False
-    assert goal_evaluation.output == None
+    assert goal_evaluation.output is None

From 1f48aa797fcb9861c86493655348f9f66fc05f76 Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Thu, 22 Aug 2024 15:35:08 +0100
Subject: [PATCH 11/20] Review comments

---
 prediction_market_agent/agents/goal_manager.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/prediction_market_agent/agents/goal_manager.py b/prediction_market_agent/agents/goal_manager.py
index d26892a1..e61f00c0 100644
--- a/prediction_market_agent/agents/goal_manager.py
+++ b/prediction_market_agent/agents/goal_manager.py
@@ -1,6 +1,10 @@
 from langchain_core.output_parsers import PydanticOutputParser
 from langchain_core.prompts import PromptTemplate
 from langchain_openai import ChatOpenAI
+from prediction_market_agent_tooling.tools.langfuse_ import (
+    get_langfuse_langchain_config,
+    observe,
+)
 from prediction_market_agent_tooling.tools.utils import utcnow
 from pydantic import BaseModel, Field
 
@@ -155,6 +159,7 @@ def get_latest_evaluated_goals_from_memory(self, limit: int) -> list[EvaluatedGo
         )
         return [EvaluatedGoal.from_model(model) for model in evaluated_goal_models]
 
+    @observe()
     def generate_goal(self, latest_evaluated_goals: list[EvaluatedGoal]) -> Goal:
         """
         Generate a new goal based on the high-level description and the latest
@@ -178,6 +183,7 @@ def generate_goal(self, latest_evaluated_goals: list[EvaluatedGoal]) -> Goal:
             temperature=0,
             model=self.model,
             api_key=APIKeys().openai_api_key_secretstr_v1,
+            config=get_langfuse_langchain_config(),
         )
         chain = prompt | llm | parser
 
@@ -217,8 +223,11 @@ def get_goal(self) -> Goal:
             limit=self.retry_limit
         )
         if latest_evaluated_goals:
-            evaluated_goal = latest_evaluated_goals[0]
-            if evaluated_goal.is_complete:
+            # Previous goals have been retrieved from memory. Generate a new
+            # goal based on these, or retry the last on if it did not complete.
+            latest_evaluated_goal = latest_evaluated_goals[0]
+
+            if latest_evaluated_goal.is_complete:
                 # Generate a new goal
                 return self.generate_goal(latest_evaluated_goals)
             else:
@@ -226,8 +235,9 @@ def get_goal(self) -> Goal:
                 if self.have_reached_retry_limit(latest_evaluated_goals):
                     return self.generate_goal(latest_evaluated_goals)
                 else:
-                    return evaluated_goal.to_goal()
+                    return latest_evaluated_goal.to_goal()
 
+        # No evaluated goals in memory. Generate a new goal from scratch
         return self.generate_goal(latest_evaluated_goals=[])
 
     @classmethod
@@ -243,6 +253,7 @@ def get_chat_history_after_goal_prompt(
                 return ChatHistory(chat_messages=chat_history.chat_messages[i + 1 :])
         raise ValueError("Goal prompt not found in chat history")
 
+    @observe()
     def evaluate_goal_progress(
         self,
         goal: Goal,
@@ -262,6 +273,7 @@ def evaluate_goal_progress(
             temperature=0,
             model=self.model,
             api_key=APIKeys().openai_api_key_secretstr_v1,
+            config=get_langfuse_langchain_config(),
         )
         chain = prompt | llm | parser
 

From 3b0359acbbea83dc9964d9afbbfe341d7ee40da9 Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Thu, 22 Aug 2024 15:51:41 +0100
Subject: [PATCH 12/20] Review comments

---
 prediction_market_agent/agents/goal_manager.py         |  8 ++++----
 .../agents/microchain_agent/deploy.py                  | 10 ++++------
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/prediction_market_agent/agents/goal_manager.py b/prediction_market_agent/agents/goal_manager.py
index e61f00c0..b9039f35 100644
--- a/prediction_market_agent/agents/goal_manager.py
+++ b/prediction_market_agent/agents/goal_manager.py
@@ -183,7 +183,6 @@ def generate_goal(self, latest_evaluated_goals: list[EvaluatedGoal]) -> Goal:
             temperature=0,
             model=self.model,
             api_key=APIKeys().openai_api_key_secretstr_v1,
-            config=get_langfuse_langchain_config(),
         )
         chain = prompt | llm | parser
 
@@ -192,7 +191,8 @@ def generate_goal(self, latest_evaluated_goals: list[EvaluatedGoal]) -> Goal:
                 "high_level_description": self.high_level_description,
                 "agent_capabilities": self.agent_capabilities,
                 "previous_evaluated_goals": latest_evaluated_goals_str,
-            }
+            },
+            config=get_langfuse_langchain_config(),
         )
         return goal
 
@@ -273,7 +273,6 @@ def evaluate_goal_progress(
             temperature=0,
             model=self.model,
             api_key=APIKeys().openai_api_key_secretstr_v1,
-            config=get_langfuse_langchain_config(),
         )
         chain = prompt | llm | parser
 
@@ -281,7 +280,8 @@ def evaluate_goal_progress(
             {
                 "goal_prompt": goal.to_prompt(),
                 "chat_history": str(relevant_chat_history),
-            }
+            },
+            config=get_langfuse_langchain_config(),
         )
         return goal_evaluation
 
diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py
index 48350d50..0d5ce93a 100644
--- a/prediction_market_agent/agents/microchain_agent/deploy.py
+++ b/prediction_market_agent/agents/microchain_agent/deploy.py
@@ -1,6 +1,7 @@
 from microchain import Agent
 from prediction_market_agent_tooling.deploy.agent import DeployableAgent
 from prediction_market_agent_tooling.markets.markets import MarketType
+from prediction_market_agent_tooling.tools.utils import check_not_none
 
 from prediction_market_agent.agents.goal_manager import GoalManager
 from prediction_market_agent.agents.microchain_agent.memory import (
@@ -54,11 +55,7 @@ def run(
             ),
         )
 
-        if self.goal_manager:
-            goal = self.goal_manager.get_goal()
-            prompt = goal.to_prompt()
-        else:
-            prompt = None
+        goal = self.goal_manager.get_goal() if self.goal_manager else None
 
         agent: Agent = build_agent(
             market_type=market_type,
@@ -70,7 +67,7 @@ def run(
             functions_config=FunctionsConfig.from_system_prompt_choice(
                 self.system_prompt_choice
             ),
-            prompt=prompt,
+            prompt=goal.to_prompt() if goal else None,
         )
 
         # Save formatted system prompt
@@ -79,6 +76,7 @@ def run(
         agent.run(self.n_iterations)
 
         if self.goal_manager:
+            goal = check_not_none(goal)
             goal_evaluation = self.goal_manager.evaluate_goal_progress(
                 goal=goal,
                 chat_history=ChatHistory.from_list_of_dicts(agent.history),

From 3d39d173795ec19b32abf8f10c9ec629b81f1c29 Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Fri, 23 Aug 2024 09:40:35 +0100
Subject: [PATCH 13/20] Tweak high_level_description

---
 prediction_market_agent/agents/microchain_agent/deploy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py
index 0d5ce93a..3ace5082 100644
--- a/prediction_market_agent/agents/microchain_agent/deploy.py
+++ b/prediction_market_agent/agents/microchain_agent/deploy.py
@@ -136,7 +136,7 @@ class DeployableMicrochainWithGoalManagerAgent0(DeployableMicrochainAgent):
     model = SupportedModel.gpt_4o
     goal_manager = GoalManager(
         agent_id=task_description,
-        high_level_description="You are a trader agent in prediction markets to maximise your profit.",
+        high_level_description="You are a trader agent in prediction markets, aiming to maximise your long-term profit.",
         agent_capabilities=(
             "You are able to:"
             "\n- List all binary markets that can be traded."

From 3168372c831dcb2da9eb4390197fccc164711b3a Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Sat, 24 Aug 2024 01:26:53 +0100
Subject: [PATCH 14/20] tidy

---
 prediction_market_agent/run_agent.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/prediction_market_agent/run_agent.py b/prediction_market_agent/run_agent.py
index 615da4f0..33a5002e 100644
--- a/prediction_market_agent/run_agent.py
+++ b/prediction_market_agent/run_agent.py
@@ -56,7 +56,7 @@ class RunnableAgent(str, Enum):
     microchain_modifiable_system_prompt_1 = "microchain_modifiable_system_prompt_1"
     microchain_modifiable_system_prompt_2 = "microchain_modifiable_system_prompt_2"
     microchain_modifiable_system_prompt_3 = "microchain_modifiable_system_prompt_3"
-    microchain_with_goal_manager_agent0 = "microchain_with_goal_manager_agent0"
+    microchain_with_goal_manager_agent_0 = "microchain_with_goal_manager_agent_0"
     metaculus_bot_tournament_agent = "metaculus_bot_tournament_agent"
     prophet_gpt4o = "prophet_gpt4o"
     prophet_gpt4 = "prophet_gpt4"
@@ -77,7 +77,7 @@ class RunnableAgent(str, Enum):
     RunnableAgent.microchain_modifiable_system_prompt_1: DeployableMicrochainModifiableSystemPromptAgent1,
     RunnableAgent.microchain_modifiable_system_prompt_2: DeployableMicrochainModifiableSystemPromptAgent2,
     RunnableAgent.microchain_modifiable_system_prompt_3: DeployableMicrochainModifiableSystemPromptAgent3,
-    RunnableAgent.microchain_with_goal_manager_agent0: DeployableMicrochainWithGoalManagerAgent0,
+    RunnableAgent.microchain_with_goal_manager_agent_0: DeployableMicrochainWithGoalManagerAgent0,
     RunnableAgent.social_media: DeployableSocialMediaAgent,
     RunnableAgent.metaculus_bot_tournament_agent: DeployableMetaculusBotTournamentAgent,
     RunnableAgent.prophet_gpt4o: DeployablePredictionProphetGPT4oAgent,

From 399abb5adb78543b259b3fd2bb31e5adccc3c73b Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Wed, 28 Aug 2024 12:33:43 +0100
Subject: [PATCH 15/20] Move goal manager initialisation to inside
 DeployableMicrochainAgent.run

---
 .../agents/microchain_agent/deploy.py         | 46 ++++++++++---------
 .../microchain_agent/microchain_agent.py      |  6 ++-
 2 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py
index cfd698d9..a1f4fda2 100644
--- a/prediction_market_agent/agents/microchain_agent/deploy.py
+++ b/prediction_market_agent/agents/microchain_agent/deploy.py
@@ -13,6 +13,7 @@
     SupportedModel,
     build_agent,
     get_editable_prompt_from_agent,
+    get_functions_summary_list,
     get_unformatted_system_prompt,
     save_agent_history,
 )
@@ -35,7 +36,12 @@ class DeployableMicrochainAgent(DeployableAgent):
     load_historical_prompt: bool = False
     system_prompt_choice: SystemPromptChoice = SystemPromptChoice.TRADING_AGENT
     task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN
-    goal_manager: GoalManager | None = None
+
+    def build_goal_manager(
+        self,
+        agent: Agent,
+    ) -> GoalManager | None:
+        return None
 
     def run(
         self,
@@ -56,8 +62,6 @@ def run(
             ),
         )
 
-        goal = self.goal_manager.get_goal() if self.goal_manager else None
-
         agent: Agent = build_agent(
             market_type=market_type,
             model=self.model,
@@ -68,9 +72,12 @@ def run(
             functions_config=FunctionsConfig.from_system_prompt_choice(
                 self.system_prompt_choice
             ),
-            prompt=goal.to_prompt() if goal else None,
         )
 
+        if goal_manager := self.build_goal_manager(agent=agent):
+            goal = goal_manager.get_goal()
+        agent.prompt = (goal.to_prompt() if goal else None,)
+
         # Save formatted system prompt
         initial_formatted_system_prompt = agent.system_prompt
 
@@ -80,13 +87,13 @@ def run(
             logger.error(e)
             raise e
         finally:
-            if self.goal_manager:
+            if goal_manager:
                 goal = check_not_none(goal)
-                goal_evaluation = self.goal_manager.evaluate_goal_progress(
+                goal_evaluation = goal_manager.evaluate_goal_progress(
                     goal=goal,
                     chat_history=ChatHistory.from_list_of_dicts(agent.history),
                 )
-                self.goal_manager.save_evaluated_goal(
+                goal_manager.save_evaluated_goal(
                     goal=goal,
                     evaluation=goal_evaluation,
                 )
@@ -142,17 +149,14 @@ class DeployableMicrochainModifiableSystemPromptAgent3(
 class DeployableMicrochainWithGoalManagerAgent0(DeployableMicrochainAgent):
     task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER
     model = SupportedModel.gpt_4o
-    goal_manager = GoalManager(
-        agent_id=task_description,
-        high_level_description="You are a trader agent in prediction markets, aiming to maximise your long-term profit.",
-        agent_capabilities=(
-            "You are able to:"
-            "\n- List all binary markets that can be traded."
-            "\n- List the current outcome probabilities for each open market."
-            "\n- Predict the outcome probability for a market."
-            "\n- Buy, sell and hold outcome tokens in a market."
-            "\n- Query your wallet balance, and the positions you hold in open markets."
-            "\n- Query the past bets you've made, and their outcomes."
-        ),
-        retry_limit=3,
-    )
+
+    def build_goal_manager(
+        self,
+        agent: Agent,
+    ) -> GoalManager | None:
+        return GoalManager(
+            agent_id=self.task_description,
+            high_level_description="You are a trader agent in prediction markets, aiming to maximise your long-term profit.",
+            agent_capabilities=f"You have the following capabilities:\n{get_functions_summary_list(agent.engine.functions)}",
+            retry_limit=3,
+        )
diff --git a/prediction_market_agent/agents/microchain_agent/microchain_agent.py b/prediction_market_agent/agents/microchain_agent/microchain_agent.py
index aeb96c71..d94c3ac0 100644
--- a/prediction_market_agent/agents/microchain_agent/microchain_agent.py
+++ b/prediction_market_agent/agents/microchain_agent/microchain_agent.py
@@ -147,7 +147,6 @@ def build_agent(
     long_term_memory: LongTermMemoryTableHandler | None = None,
     allow_stop: bool = True,
     bootstrap: str | None = None,
-    prompt: str | None = None,
     raise_on_error: bool = True,
 ) -> Agent:
     engine = Engine()
@@ -204,7 +203,6 @@ def step_end_callback(agent: Agent, step_output: StepOutput) -> None:
     agent.system_prompt = unformatted_system_prompt.format(
         engine_help=agent.engine.help
     )
-    agent.prompt = prompt
     if bootstrap:
         agent.bootstrap = [bootstrap]
     return agent
@@ -245,3 +243,7 @@ def save_agent_history(
 
 def get_editable_prompt_from_agent(agent: Agent) -> str:
     return extract_updatable_system_prompt(str(agent.system_prompt))
+
+
+def get_functions_summary_list(functions: list[Function]) -> str:
+    return "\n".join([f"- {fname}: {f.description}" for fname, f in functions.items()])

From eaa6ca6acb6ebe15d5adfc55cb2c3cc16bd41ebc Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Wed, 28 Aug 2024 12:40:43 +0100
Subject: [PATCH 16/20] mypy

---
 .../agents/microchain_agent/microchain_agent.py             | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/prediction_market_agent/agents/microchain_agent/microchain_agent.py b/prediction_market_agent/agents/microchain_agent/microchain_agent.py
index d94c3ac0..68a90628 100644
--- a/prediction_market_agent/agents/microchain_agent/microchain_agent.py
+++ b/prediction_market_agent/agents/microchain_agent/microchain_agent.py
@@ -245,5 +245,7 @@ def get_editable_prompt_from_agent(agent: Agent) -> str:
     return extract_updatable_system_prompt(str(agent.system_prompt))
 
 
-def get_functions_summary_list(functions: list[Function]) -> str:
-    return "\n".join([f"- {fname}: {f.description}" for fname, f in functions.items()])
+def get_functions_summary_list(engine: Engine) -> str:
+    return "\n".join(
+        [f"- {fname}: {f.description}" for fname, f in engine.functions.items()]
+    )

From 14c13bfea6cba228023fe2fa312f59c9226f148a Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Wed, 28 Aug 2024 13:04:06 +0100
Subject: [PATCH 17/20] Fixes

---
 prediction_market_agent/agents/microchain_agent/deploy.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py
index a1f4fda2..a4d7b6de 100644
--- a/prediction_market_agent/agents/microchain_agent/deploy.py
+++ b/prediction_market_agent/agents/microchain_agent/deploy.py
@@ -76,7 +76,7 @@ def run(
 
         if goal_manager := self.build_goal_manager(agent=agent):
             goal = goal_manager.get_goal()
-        agent.prompt = (goal.to_prompt() if goal else None,)
+        agent.prompt = goal.to_prompt() if goal else None
 
         # Save formatted system prompt
         initial_formatted_system_prompt = agent.system_prompt
@@ -157,6 +157,6 @@ def build_goal_manager(
         return GoalManager(
             agent_id=self.task_description,
             high_level_description="You are a trader agent in prediction markets, aiming to maximise your long-term profit.",
-            agent_capabilities=f"You have the following capabilities:\n{get_functions_summary_list(agent.engine.functions)}",
+            agent_capabilities=f"You have the following capabilities:\n{get_functions_summary_list(agent.engine)}",
             retry_limit=3,
         )

From b220db1b19ad77b85a8cbcf4d84097b4b37121a0 Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Wed, 28 Aug 2024 13:41:13 +0100
Subject: [PATCH 18/20] Add TRADING_AGENT_SYSTEM_PROMPT_MINIMAL, use for
 microchain agent with goal manager

---
 .../agents/microchain_agent/deploy.py         |  1 +
 .../agents/microchain_agent/prompts.py        | 23 ++++++++++++++++++-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py
index a4d7b6de..31c123ca 100644
--- a/prediction_market_agent/agents/microchain_agent/deploy.py
+++ b/prediction_market_agent/agents/microchain_agent/deploy.py
@@ -149,6 +149,7 @@ class DeployableMicrochainModifiableSystemPromptAgent3(
 class DeployableMicrochainWithGoalManagerAgent0(DeployableMicrochainAgent):
     task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER
     model = SupportedModel.gpt_4o
+    system_prompt_choice = SystemPromptChoice.TRADING_AGENT_MINIMAL
 
     def build_goal_manager(
         self,
diff --git a/prediction_market_agent/agents/microchain_agent/prompts.py b/prediction_market_agent/agents/microchain_agent/prompts.py
index 9737146e..442b76b0 100644
--- a/prediction_market_agent/agents/microchain_agent/prompts.py
+++ b/prediction_market_agent/agents/microchain_agent/prompts.py
@@ -33,6 +33,22 @@
 Only output a single function call per message.
 Make 'Reasoning' calls frequently - at least every other call.
 """
+
+# This is similar to the TRADING_AGENT_SYSTEM_PROMPT, except that it doesn't
+# contain any specific instructions on what to do. This is appropriate to use
+# for an agent when combined with a user-prompt containing the instructions for
+# the session.
+TRADING_AGENT_SYSTEM_PROMPT_MINIMAL = f"""You are a helpful assistant, who specializes as an expert trader agent in prediction markets.
+
+{NON_UPDATABLE_DIVIDOR}
+
+{{engine_help}}
+
+Only output valid Python function calls, without code formatting characters, without any other text. i.e. it should run if passed to Python's `eval` function.
+Only output a single function call per message.
+Make 'Reasoning' calls frequently - at least every other call. You need to reason step by step.
+"""
+
 # Experimental system prompt for task-solving agent.
 TASK_AGENT_SYSTEM_PROMPT = f"""Act as a task-solving agents that picks up available tasks and solves them for getting rewards.
 
@@ -74,6 +90,7 @@ def build_full_unformatted_system_prompt(system_prompt: str) -> str:
 class SystemPromptChoice(str, Enum):
     JUST_BORN = "just_born"
     TRADING_AGENT = "trading_agent"
+    TRADING_AGENT_MINIMAL = "trading_agent_minimal"
     TASK_AGENT = "task_agent"
 
 
@@ -95,7 +112,10 @@ def from_system_prompt_choice(
             include_learning_functions = True
             include_trading_functions = True
 
-        elif system_prompt_choice == SystemPromptChoice.TRADING_AGENT:
+        elif system_prompt_choice in [
+            SystemPromptChoice.TRADING_AGENT,
+            SystemPromptChoice.TRADING_AGENT_MINIMAL,
+        ]:
             include_trading_functions = True
 
         elif system_prompt_choice == SystemPromptChoice.TASK_AGENT:
@@ -113,4 +133,5 @@ def from_system_prompt_choice(
     SystemPromptChoice.JUST_BORN: SYSTEM_PROMPT,
     SystemPromptChoice.TRADING_AGENT: TRADING_AGENT_SYSTEM_PROMPT,
     SystemPromptChoice.TASK_AGENT: TASK_AGENT_SYSTEM_PROMPT,
+    SystemPromptChoice.TRADING_AGENT_MINIMAL: TRADING_AGENT_SYSTEM_PROMPT_MINIMAL,
 }

From a53909194cd3e5499c6559da00160ecebff36386 Mon Sep 17 00:00:00 2001
From: Evan Griffiths <56087052+evangriffiths@users.noreply.github.com>
Date: Thu, 29 Aug 2024 14:18:27 +0100
Subject: [PATCH 19/20] Update pyproject.toml

Co-authored-by: Peter Jung <peter@jung.ninja>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 27c23053..d22bcace 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,7 +40,7 @@ autoflake = "^2.2.1"
 isort = "^5.13.2"
 markdownify = "^0.11.6"
 tavily-python = "^0.3.9"
-microchain-python = { git = "https://github.com/galatolofederico/microchain.git", rev = "56db91de72b6466080c26434631808cc20af670c" }
+microchain-python = { git = "https://github.com/galatolofederico/microchain.git", rev = "98e601f6b7413ea48fb0b099309d686c4b10ff5c" }
 pysqlite3-binary = {version="^0.5.2.post3", markers = "sys_platform == 'linux'"}
 psycopg2-binary = "^2.9.9"
 sqlmodel = "^0.0.21"

From 97ec5ac60910915913aff1b89725da91f9b48f5b Mon Sep 17 00:00:00 2001
From: evangriffiths <egriffiths93@gmail.com>
Date: Thu, 29 Aug 2024 15:06:09 +0100
Subject: [PATCH 20/20] Review comment

---
 prediction_market_agent/agents/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prediction_market_agent/agents/utils.py b/prediction_market_agent/agents/utils.py
index d7fcf4a7..ee848320 100644
--- a/prediction_market_agent/agents/utils.py
+++ b/prediction_market_agent/agents/utils.py
@@ -29,7 +29,7 @@ class AgentIdentifier(str, Enum):
     MICROCHAIN_AGENT_OMEN_LEARNING_2 = "general-agent-2"
     MICROCHAIN_AGENT_OMEN_LEARNING_3 = "general-agent-3"
     MICROCHAIN_AGENT_STREAMLIT = "microchain-streamlit-app"
-    MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER = "general-agent-4-with-goal-manager"
+    MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER = "trader-agent-0-with-goal-manager"
 
 
 MEMORIES_TO_LEARNINGS_TEMPLATE = """