From 633c37533e15ddda0583287deb2368297af2a59f Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Fri, 16 Aug 2024 16:55:31 +0100 Subject: [PATCH 01/20] Skeleton --- .../agents/goal_manager.py | 59 +++++++++++++++++++ .../agents/microchain_agent/deploy.py | 24 ++++++++ .../microchain_agent/microchain_agent.py | 2 + prediction_market_agent/agents/utils.py | 3 + 4 files changed, 88 insertions(+) create mode 100644 prediction_market_agent/agents/goal_manager.py diff --git a/prediction_market_agent/agents/goal_manager.py b/prediction_market_agent/agents/goal_manager.py new file mode 100644 index 00000000..95e54a82 --- /dev/null +++ b/prediction_market_agent/agents/goal_manager.py @@ -0,0 +1,59 @@ +from pydantic import BaseModel + +from prediction_market_agent.agents.microchain_agent.memory import ChatHistory + + +class Goal(BaseModel): + prompt: str + motivation: str + completion_criteria: str # ? + + +class EvaluatedGoal(Goal): + is_complete: bool + reasoning: str + output: str | None # or 'learning'? + + def to_goal(self) -> Goal: + return Goal( + prompt=self.prompt, + motivation=self.motivation, + completion_criteria=self.completion_criteria, + ) + + +class GoalManager: + def __init__( + self, + agent_id: str, + ): + self.agent_id: str = agent_id + + def get_latest_goal_from_memory(self) -> EvaluatedGoal | None: + pass + + def generate_goal(self) -> Goal: + """ + If a goal exists from a previous session, load it and check its status. + Otherwise create a new one. + """ + pass + + def get_goal(self) -> Goal: + if goal := self.get_latest_goal_from_memory(): + if goal.is_complete: + # Generate a new goal + return self.generate_goal() + else: + # Try again + return goal + return self.generate_goal() + + def evaluate_goal_progress( + goal: Goal, + chat_history: ChatHistory, + ) -> EvaluatedGoal: + pass + + def save_evaluated_goal(self, goal: EvaluatedGoal) -> None: + pass diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py index 4dfdd829..4011139c 100644 --- a/prediction_market_agent/agents/microchain_agent/deploy.py +++ b/prediction_market_agent/agents/microchain_agent/deploy.py @@ -2,6 +2,7 @@ from prediction_market_agent_tooling.deploy.agent import DeployableAgent from prediction_market_agent_tooling.markets.markets import MarketType +from prediction_market_agent.agents.goal_manager import GoalManager from prediction_market_agent.agents.microchain_agent.microchain_agent import ( SupportedModel, build_agent, @@ -28,6 +29,7 @@ class DeployableMicrochainAgent(DeployableAgent): load_historical_prompt: bool = False system_prompt_choice: SystemPromptChoice = SystemPromptChoice.TRADING_AGENT task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN + goal_manager: GoalManager | None = None def run( self, @@ -47,6 +49,13 @@ def run( prompt_handler if self.load_historical_prompt else None ), ) + + if self.goal_manager: + goal = self.goal_manager.get_goal() + prompt = goal.prompt + else: + prompt = None + agent: Agent = build_agent( market_type=market_type, model=self.model, @@ -57,6 +66,7 @@ def run( functions_config=FunctionsConfig.from_system_prompt_choice( self.system_prompt_choice ), + prompt=prompt, ) # Save formatted system prompt @@ -70,6 +80,12 @@ def run( ) prompt_handler.save_prompt(get_editable_prompt_from_agent(agent)) + if self.goal_manager: + evaluated_goal = self.goal_manager.evaluate_goal_progress( + goal=goal, chat_history=agent.history + ) + self.goal_manager.save_evaluated_goal(evaluated_goal) + class DeployableMicrochainModifiableSystemPromptAgentAbstract( DeployableMicrochainAgent @@ -102,3 +118,11 @@ class DeployableMicrochainModifiableSystemPromptAgent3( ): task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_LEARNING_3 model = SupportedModel.llama_31_instruct + + +class DeployableMicrochainModifiableSystemPromptAgent3( + DeployableMicrochainModifiableSystemPromptAgentAbstract +): + task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER + goal_manager: GoalManager = GoalManager(agent_id=task_description) + model = SupportedModel.gpt_4o diff --git a/prediction_market_agent/agents/microchain_agent/microchain_agent.py b/prediction_market_agent/agents/microchain_agent/microchain_agent.py index 636eb0ae..35ed5079 100644 --- a/prediction_market_agent/agents/microchain_agent/microchain_agent.py +++ b/prediction_market_agent/agents/microchain_agent/microchain_agent.py @@ -147,6 +147,7 @@ def build_agent( long_term_memory: LongTermMemoryTableHandler | None = None, allow_stop: bool = True, bootstrap: str | None = None, + prompt: str | None = None, raise_on_error: bool = True, ) -> Agent: engine = Engine() @@ -185,6 +186,7 @@ def step_end_callback(agent: Agent, step_output: StepOutput) -> None: llm=LLM(generator=generator), engine=engine, on_iteration_step=on_iteration_step, + prompt=prompt, ) for f in build_agent_functions( diff --git a/prediction_market_agent/agents/utils.py b/prediction_market_agent/agents/utils.py index 8622e6b6..e59a1dab 100644 --- a/prediction_market_agent/agents/utils.py +++ b/prediction_market_agent/agents/utils.py @@ -25,6 +25,9 @@ class AgentIdentifier(str, Enum): MICROCHAIN_AGENT_OMEN_LEARNING_2 = "general-agent-2" MICROCHAIN_AGENT_OMEN_LEARNING_3 = "general-agent-3" MICROCHAIN_AGENT_STREAMLIT = "microchain-streamlit-app" + MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER = ( + "microchain-agent-deployment-omen_with_goal_manager" + ) MEMORIES_TO_LEARNINGS_TEMPLATE = """ From 68444b1c329e97245d7d9ef1c148d264777beb32 Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Fri, 16 Aug 2024 19:08:29 +0100 Subject: [PATCH 02/20] Add table hander and test --- .../agents/goal_manager.py | 66 +++++++++++++++---- .../agents/microchain_agent/deploy.py | 4 +- .../db/evaluated_goal_table_handler.py | 34 ++++++++++ prediction_market_agent/db/models.py | 19 ++++++ tests/db/test_evaluated_goal_table_handler.py | 54 +++++++++++++++ 5 files changed, 163 insertions(+), 14 deletions(-) create mode 100644 prediction_market_agent/db/evaluated_goal_table_handler.py create mode 100644 tests/db/test_evaluated_goal_table_handler.py diff --git a/prediction_market_agent/agents/goal_manager.py b/prediction_market_agent/agents/goal_manager.py index 95e54a82..917b5f21 100644 --- a/prediction_market_agent/agents/goal_manager.py +++ b/prediction_market_agent/agents/goal_manager.py @@ -1,24 +1,45 @@ +from prediction_market_agent_tooling.tools.utils import utcnow from pydantic import BaseModel from prediction_market_agent.agents.microchain_agent.memory import ChatHistory +from prediction_market_agent.db.evaluated_goal_table_handler import ( + EvaluatedGoalTableHandler, +) +from prediction_market_agent.db.models import EvaluatedGoalModel class Goal(BaseModel): prompt: str motivation: str - completion_criteria: str # ? + completion_criteria: str # TODO maybe? class EvaluatedGoal(Goal): is_complete: bool reasoning: str - output: str | None # or 'learning'? + output: str | None - def to_goal(self) -> Goal: - return Goal( + @classmethod + def from_model(cls, model: EvaluatedGoalModel) -> "EvaluatedGoal": + return EvaluatedGoal( + prompt=model.prompt, + motivation=model.motivation, + completion_criteria=model.completion_criteria, + is_complete=model.is_complete, + reasoning=model.reasoning, + output=model.output, + ) + + def to_model(self, agent_id: str) -> EvaluatedGoalModel: + return EvaluatedGoalModel( prompt=self.prompt, motivation=self.motivation, completion_criteria=self.completion_criteria, + is_complete=self.is_complete, + reasoning=self.reasoning, + output=self.output, + agent_id=agent_id, + datetime_=utcnow(), ) @@ -26,21 +47,34 @@ class GoalManager: def __init__( self, agent_id: str, + sqlalchemy_db_url: str | None = None, ): - self.agent_id: str = agent_id + self.agent_id = agent_id + self.table_handler = EvaluatedGoalTableHandler( + agent_id=agent_id, + sqlalchemy_db_url=sqlalchemy_db_url, + ) - def get_latest_goal_from_memory(self) -> EvaluatedGoal | None: - pass + def get_latest_evaluated_goal_from_memory(self) -> EvaluatedGoal | None: + evaluated_goal_model = self.table_handler.get_latest_evaluated_goal() + if evaluated_goal_model: + return EvaluatedGoal.from_model(model=evaluated_goal_model) + return None def generate_goal(self) -> Goal: """ If a goal exists from a previous session, load it and check its status. Otherwise create a new one. """ - pass + # TODO + return Goal( + prompt="foo", + motivation="bar", + completion_criteria="baz", + ) def get_goal(self) -> Goal: - if goal := self.get_latest_goal_from_memory(): + if goal := self.get_latest_evaluated_goal_from_memory(): if goal.is_complete: # Generate a new goal return self.generate_goal() @@ -50,10 +84,20 @@ def get_goal(self) -> Goal: return self.generate_goal() def evaluate_goal_progress( + self, goal: Goal, chat_history: ChatHistory, ) -> EvaluatedGoal: - pass + # TODO + return EvaluatedGoal( + prompt=goal.prompt, + motivation=goal.motivation, + completion_criteria=goal.completion_criteria, + is_complete=False, + reasoning="", + output="", + ) def save_evaluated_goal(self, goal: EvaluatedGoal) -> None: - pass + model = goal.to_model(agent_id=self.agent_id) + self.table_handler.save_evaluated_goal(model) diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py index 6ad7bbe0..050d14b1 100644 --- a/prediction_market_agent/agents/microchain_agent/deploy.py +++ b/prediction_market_agent/agents/microchain_agent/deploy.py @@ -122,9 +122,7 @@ class DeployableMicrochainModifiableSystemPromptAgent3( model = SupportedModel.llama_31_instruct -class DeployableMicrochainModifiableSystemPromptAgent3( - DeployableMicrochainModifiableSystemPromptAgentAbstract -): +class DeployableMicrochainWithGoalManagerAgent0(DeployableMicrochainAgent): task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER goal_manager: GoalManager = GoalManager(agent_id=task_description) model = SupportedModel.gpt_4o diff --git a/prediction_market_agent/db/evaluated_goal_table_handler.py b/prediction_market_agent/db/evaluated_goal_table_handler.py new file mode 100644 index 00000000..6a0208eb --- /dev/null +++ b/prediction_market_agent/db/evaluated_goal_table_handler.py @@ -0,0 +1,34 @@ +import typing as t + +from sqlmodel import col + +from prediction_market_agent.db.models import EvaluatedGoalModel +from prediction_market_agent.db.sql_handler import SQLHandler + + +class EvaluatedGoalTableHandler: + def __init__( + self, + agent_id: str, + sqlalchemy_db_url: str | None = None, + ): + self.agent_id = agent_id + self.sql_handler = SQLHandler( + model=EvaluatedGoalModel, + sqlalchemy_db_url=sqlalchemy_db_url, + ) + + def save_evaluated_goal(self, model: EvaluatedGoalModel) -> None: + self.sql_handler.save_multiple([model]) + + def get_latest_evaluated_goal(self) -> EvaluatedGoalModel | None: + column_to_order: str = EvaluatedGoalModel.datetime_.key # type: ignore + items: t.Sequence[ + EvaluatedGoalModel + ] = self.sql_handler.get_with_filter_and_order( + query_filters=[col(EvaluatedGoalModel.agent_id) == self.agent_id], + order_by_column_name=column_to_order, + order_desc=True, + limit=1, + ) + return items[0] if items else None diff --git a/prediction_market_agent/db/models.py b/prediction_market_agent/db/models.py index 0302a9da..7eb09e75 100644 --- a/prediction_market_agent/db/models.py +++ b/prediction_market_agent/db/models.py @@ -29,3 +29,22 @@ class Prompt(SQLModel, table=True): # user (or app) should be persisted. session_identifier: str datetime_: datetime + + +class EvaluatedGoalModel(SQLModel, table=True): + """ + Checkpoint for general agent's goals. Used to store the agent's progress + towards a goal, and to restore it in future sessions. + """ + + __tablename__ = "evaluated_goals" + __table_args__ = {"extend_existing": True} + id: Optional[int] = Field(default=None, primary_key=True) + agent_id: str # Per-agent identifier + prompt: str + motivation: str + completion_criteria: str + is_complete: bool + reasoning: str + output: str | None # TODO or 'learning'? + datetime_: datetime diff --git a/tests/db/test_evaluated_goal_table_handler.py b/tests/db/test_evaluated_goal_table_handler.py new file mode 100644 index 00000000..d953171e --- /dev/null +++ b/tests/db/test_evaluated_goal_table_handler.py @@ -0,0 +1,54 @@ +from typing import Generator + +import pytest + +from prediction_market_agent.agents.goal_manager import EvaluatedGoal +from prediction_market_agent.db.evaluated_goal_table_handler import ( + EvaluatedGoalTableHandler, +) + +SQLITE_DB_URL = "sqlite://" +TEST_AGENT_ID = "test_agent_id" + + +@pytest.fixture(scope="function") +def table_handler() -> Generator[EvaluatedGoalTableHandler, None, None]: + """Creates a in-memory SQLite DB for testing""" + table_handler = EvaluatedGoalTableHandler( + sqlalchemy_db_url=SQLITE_DB_URL, + agent_id=TEST_AGENT_ID, + ) + yield table_handler + + +def test_save_load_evaluated_goal(table_handler: EvaluatedGoalTableHandler) -> None: + evaluated_goal = EvaluatedGoal( + prompt="abc", + motivation="def", + completion_criteria="ghi", + is_complete=True, + reasoning="jkl", + output="mno", + ) + model = evaluated_goal.to_model(agent_id=TEST_AGENT_ID) + table_handler.save_evaluated_goal(model=model) + + loaded_model = table_handler.get_latest_evaluated_goal() + assert loaded_model + loaded_evaluated_goal = EvaluatedGoal.from_model(model=loaded_model) + assert loaded_evaluated_goal == evaluated_goal + + +# TODO +# def test_load_latest_prompt(memory_prompt_handler: PromptTableHandler) -> None: +# prompt_text_first = "prompt_text_first" +# prompt_text_second = "prompt_text_second" + +# memory_prompt_handler.save_prompt(prompt_text_first) +# memory_prompt_handler.save_prompt(prompt_text_second) + +# # assert latest prompt is there +# result = memory_prompt_handler.fetch_latest_prompt() +# assert result +# # ignore timezone +# assert result.prompt == prompt_text_second From 39d4300618cebae9aaae37678ec4a69aa5c76aea Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Wed, 21 Aug 2024 10:29:54 +0100 Subject: [PATCH 03/20] Add test for goal generation --- .../agents/goal_manager.py | 187 +++++++++++++++--- .../agents/microchain_agent/deploy.py | 9 +- .../db/evaluated_goal_table_handler.py | 6 +- prediction_market_agent/db/models.py | 4 +- prediction_market_agent/run_agent.py | 3 + tests/agents/test_goal_manager.py | 139 +++++++++++++ tests/db/test_evaluated_goal_table_handler.py | 96 +++++++-- 7 files changed, 390 insertions(+), 54 deletions(-) create mode 100644 tests/agents/test_goal_manager.py diff --git a/prediction_market_agent/agents/goal_manager.py b/prediction_market_agent/agents/goal_manager.py index 917b5f21..9475b1b8 100644 --- a/prediction_market_agent/agents/goal_manager.py +++ b/prediction_market_agent/agents/goal_manager.py @@ -1,28 +1,75 @@ +from langchain_core.output_parsers import PydanticOutputParser +from langchain_core.prompts import PromptTemplate +from langchain_openai import ChatOpenAI from prediction_market_agent_tooling.tools.utils import utcnow -from pydantic import BaseModel +from pydantic import BaseModel, Field from prediction_market_agent.agents.microchain_agent.memory import ChatHistory from prediction_market_agent.db.evaluated_goal_table_handler import ( EvaluatedGoalTableHandler, ) from prediction_market_agent.db.models import EvaluatedGoalModel +from prediction_market_agent.utils import DEFAULT_OPENAI_MODEL, APIKeys + +GENERATE_GOAL_PROMPT_TEMPLATE = """ +Generate a specific goal for an open-ended, autonomous agent that has a high-level description and a number of specific capabilities. +If applicable, use the agent's previous evaluated goals when considering its new goal. + +The goal should satisfy the following: +- have a narrow focus +- be realistically achievable given the agen't specific capabilities +- not be contingent on external factors that are out of the agent's control +- have a clear motivation and completion criteria +- advance the aims of the agent +- balance the need for exploration and exploitation + +[HIGH LEVEL DESCRIPTION] +{high_level_description} + +[AGENT CAPABILITIES] +{agent_capabilities} + +{previous_evaluated_goals} +{format_instructions} +""" class Goal(BaseModel): - prompt: str - motivation: str - completion_criteria: str # TODO maybe? + goal: str = Field(..., description="A clear description of the goal") + motivation: str = Field(..., description="The reason for the goal") + completion_criteria: str = Field( + ..., + description="The criteria that will be used to evaluate whether the goal has been completed", + ) class EvaluatedGoal(Goal): - is_complete: bool - reasoning: str - output: str | None + reasoning: str = Field( + ..., description="An explanation of why the goal is deemed completed or not" + ) + is_complete: bool = Field(..., description="Whether the goal is complete") + output: str | None = Field( + ..., + description="If the goal description implied a 'return value', and the goal is complete, this field should contain the output", + ) + + def __repr__(self) -> str: + return ( + f"Goal: {self.goal}\n" + f"Motivation: {self.motivation}\n" + f"Completion Criteria: {self.completion_criteria}\n" + f"Is Complete: {self.is_complete}\n" + f"Reasoning: {self.reasoning}\n" + f"Output: {self.output}" + ) + + def __str__(self) -> str: + return self.__repr__() @classmethod def from_model(cls, model: EvaluatedGoalModel) -> "EvaluatedGoal": return EvaluatedGoal( - prompt=model.prompt, + goal=model.goal, motivation=model.motivation, completion_criteria=model.completion_criteria, is_complete=model.is_complete, @@ -32,7 +79,7 @@ def from_model(cls, model: EvaluatedGoalModel) -> "EvaluatedGoal": def to_model(self, agent_id: str) -> EvaluatedGoalModel: return EvaluatedGoalModel( - prompt=self.prompt, + goal=self.goal, motivation=self.motivation, completion_criteria=self.completion_criteria, is_complete=self.is_complete, @@ -42,46 +89,119 @@ def to_model(self, agent_id: str) -> EvaluatedGoalModel: datetime_=utcnow(), ) + def to_goal(self) -> Goal: + return Goal( + goal=self.goal, + motivation=self.motivation, + completion_criteria=self.completion_criteria, + ) + class GoalManager: def __init__( self, agent_id: str, + high_level_description: str, + agent_capabilities: str, + retry_limit: int = 3, + model: str = DEFAULT_OPENAI_MODEL, sqlalchemy_db_url: str | None = None, ): self.agent_id = agent_id + self.high_level_description = high_level_description + self.agent_capabilities = agent_capabilities + self.retry_limit = retry_limit + self.model = model self.table_handler = EvaluatedGoalTableHandler( agent_id=agent_id, sqlalchemy_db_url=sqlalchemy_db_url, ) - def get_latest_evaluated_goal_from_memory(self) -> EvaluatedGoal | None: - evaluated_goal_model = self.table_handler.get_latest_evaluated_goal() - if evaluated_goal_model: - return EvaluatedGoal.from_model(model=evaluated_goal_model) - return None + def get_latest_evaluated_goals_from_memory(self, limit: int) -> list[EvaluatedGoal]: + evaluated_goal_models = self.table_handler.get_latest_evaluated_goals( + limit=limit + ) + return [EvaluatedGoal.from_model(model) for model in evaluated_goal_models] - def generate_goal(self) -> Goal: + def generate_goal(self, latest_evaluated_goals: list[EvaluatedGoal]) -> Goal: """ - If a goal exists from a previous session, load it and check its status. - Otherwise create a new one. + Generate a new goal based on the high-level description and the latest + evaluated goals. + + TODO support generation of long-horizon goals with a specified + completion date, until which the goal's status is 'pending'. """ - # TODO - return Goal( - prompt="foo", - motivation="bar", - completion_criteria="baz", + parser = PydanticOutputParser(pydantic_object=Goal) + prompt = PromptTemplate( + template=GENERATE_GOAL_PROMPT_TEMPLATE, + input_variables=[ + "high_level_description", + "agent_capabilities", + "previous_evaluated_goals", + ], + partial_variables={"format_instructions": parser.get_format_instructions()}, + ) + latest_evaluated_goals_str = self.evaluated_goals_to_str(latest_evaluated_goals) + llm = ChatOpenAI( + temperature=0, + model=self.model, + api_key=APIKeys().openai_api_key_secretstr_v1, + ) + chain = prompt | llm | parser + + goal: Goal = chain.invoke( + { + "high_level_description": self.high_level_description, + "agent_capabilities": self.agent_capabilities, + "previous_evaluated_goals": latest_evaluated_goals_str, + } ) + return goal + + def have_reached_retry_limit( + self, latest_evaluated_goals: list[EvaluatedGoal] + ) -> bool: + if self.retry_limit == 0: + return True + + if len(latest_evaluated_goals) < self.retry_limit + 1: + return False + + latest_goal = latest_evaluated_goals[0].to_goal() + if all( + [ + g.to_goal() == latest_goal + for g in latest_evaluated_goals[: self.retry_limit + 1] + ] + ): + return True + + return False def get_goal(self) -> Goal: - if goal := self.get_latest_evaluated_goal_from_memory(): - if goal.is_complete: + """ + Manage the fetching of goals from memory, and deciding when to generate + a new goal vs. retrying an incomplete one. + + TODO add the ability to continue from a previous session if the goal + is not complete. + """ + latest_evaluated_goals = self.get_latest_evaluated_goals_from_memory( + limit=self.retry_limit + ) + if latest_evaluated_goals: + evaluated_goal = latest_evaluated_goals[0] + if evaluated_goal.is_complete: # Generate a new goal - return self.generate_goal() + return self.generate_goal(latest_evaluated_goals) else: - # Try again - return goal - return self.generate_goal() + # Try again, unless we've reached the retry limit + if self.have_reached_retry_limit(latest_evaluated_goals): + return self.generate_goal(latest_evaluated_goals) + else: + return evaluated_goal.to_goal() + + return self.generate_goal(latest_evaluated_goals=[]) def evaluate_goal_progress( self, @@ -90,7 +210,7 @@ def evaluate_goal_progress( ) -> EvaluatedGoal: # TODO return EvaluatedGoal( - prompt=goal.prompt, + goal=goal.goal, motivation=goal.motivation, completion_criteria=goal.completion_criteria, is_complete=False, @@ -101,3 +221,12 @@ def evaluate_goal_progress( def save_evaluated_goal(self, goal: EvaluatedGoal) -> None: model = goal.to_model(agent_id=self.agent_id) self.table_handler.save_evaluated_goal(model) + + @staticmethod + def evaluated_goals_to_str(evaluated_goals: list[EvaluatedGoal]) -> str: + goals_str = "" + for i, goal in enumerate(evaluated_goals): + goals_str += f"## Goal {i+1}:\n{goal}\n" + if i < len(evaluated_goals) - 1: + goals_str += "\n" + return goals_str diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py index 050d14b1..a46b9336 100644 --- a/prediction_market_agent/agents/microchain_agent/deploy.py +++ b/prediction_market_agent/agents/microchain_agent/deploy.py @@ -52,7 +52,7 @@ def run( if self.goal_manager: goal = self.goal_manager.get_goal() - prompt = goal.prompt + prompt = goal.goal else: prompt = None @@ -124,5 +124,10 @@ class DeployableMicrochainModifiableSystemPromptAgent3( class DeployableMicrochainWithGoalManagerAgent0(DeployableMicrochainAgent): task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER - goal_manager: GoalManager = GoalManager(agent_id=task_description) + goal_manager = GoalManager( + agent_id=task_description, + high_level_description="foo", # TODO + agent_capabilities="bar", # TODO + retry_limit=3, + ) model = SupportedModel.gpt_4o diff --git a/prediction_market_agent/db/evaluated_goal_table_handler.py b/prediction_market_agent/db/evaluated_goal_table_handler.py index 6a0208eb..a95e02c2 100644 --- a/prediction_market_agent/db/evaluated_goal_table_handler.py +++ b/prediction_market_agent/db/evaluated_goal_table_handler.py @@ -21,7 +21,7 @@ def __init__( def save_evaluated_goal(self, model: EvaluatedGoalModel) -> None: self.sql_handler.save_multiple([model]) - def get_latest_evaluated_goal(self) -> EvaluatedGoalModel | None: + def get_latest_evaluated_goals(self, limit: int) -> list[EvaluatedGoalModel]: column_to_order: str = EvaluatedGoalModel.datetime_.key # type: ignore items: t.Sequence[ EvaluatedGoalModel @@ -29,6 +29,6 @@ def get_latest_evaluated_goal(self) -> EvaluatedGoalModel | None: query_filters=[col(EvaluatedGoalModel.agent_id) == self.agent_id], order_by_column_name=column_to_order, order_desc=True, - limit=1, + limit=limit, ) - return items[0] if items else None + return list(items) diff --git a/prediction_market_agent/db/models.py b/prediction_market_agent/db/models.py index 7eb09e75..94b5baaf 100644 --- a/prediction_market_agent/db/models.py +++ b/prediction_market_agent/db/models.py @@ -41,10 +41,10 @@ class EvaluatedGoalModel(SQLModel, table=True): __table_args__ = {"extend_existing": True} id: Optional[int] = Field(default=None, primary_key=True) agent_id: str # Per-agent identifier - prompt: str + goal: str motivation: str completion_criteria: str is_complete: bool reasoning: str - output: str | None # TODO or 'learning'? + output: str | None datetime_: datetime diff --git a/prediction_market_agent/run_agent.py b/prediction_market_agent/run_agent.py index 407032ce..29b136de 100644 --- a/prediction_market_agent/run_agent.py +++ b/prediction_market_agent/run_agent.py @@ -24,6 +24,7 @@ DeployableMicrochainModifiableSystemPromptAgent1, DeployableMicrochainModifiableSystemPromptAgent2, DeployableMicrochainModifiableSystemPromptAgent3, + DeployableMicrochainWithGoalManagerAgent0, ) from prediction_market_agent.agents.prophet_agent.deploy import ( DeployableOlasEmbeddingOAAgent, @@ -54,6 +55,7 @@ class RunnableAgent(str, Enum): microchain_modifiable_system_prompt_1 = "microchain_modifiable_system_prompt_1" microchain_modifiable_system_prompt_2 = "microchain_modifiable_system_prompt_2" microchain_modifiable_system_prompt_3 = "microchain_modifiable_system_prompt_3" + microchain_with_goal_manager_agent0 = "microchain_with_goal_manager_agent0" metaculus_bot_tournament_agent = "metaculus_bot_tournament_agent" prophet_gpt4o = "prophet_gpt4o" prophet_gpt4 = "prophet_gpt4" @@ -74,6 +76,7 @@ class RunnableAgent(str, Enum): RunnableAgent.microchain_modifiable_system_prompt_1: DeployableMicrochainModifiableSystemPromptAgent1, RunnableAgent.microchain_modifiable_system_prompt_2: DeployableMicrochainModifiableSystemPromptAgent2, RunnableAgent.microchain_modifiable_system_prompt_3: DeployableMicrochainModifiableSystemPromptAgent3, + RunnableAgent.microchain_with_goal_manager_agent0: DeployableMicrochainWithGoalManagerAgent0, RunnableAgent.social_media: DeployableSocialMediaAgent, RunnableAgent.metaculus_bot_tournament_agent: DeployableMetaculusBotTournamentAgent, RunnableAgent.prophet_gpt4o: DeployablePredictionProphetGPT4oAgent, diff --git a/tests/agents/test_goal_manager.py b/tests/agents/test_goal_manager.py new file mode 100644 index 00000000..a48aefe1 --- /dev/null +++ b/tests/agents/test_goal_manager.py @@ -0,0 +1,139 @@ +import pytest + +from prediction_market_agent.agents.goal_manager import EvaluatedGoal, GoalManager +from tests.utils import RUN_PAID_TESTS + + +def test_have_reached_retry_limit() -> None: + goal_manager = GoalManager( + agent_id="test_agent", + high_level_description="foo", + agent_capabilities="bar", + retry_limit=0, + ) + + g0 = EvaluatedGoal( + goal="goal0", + motivation="motivation", + completion_criteria="completion_criteria", + is_complete=False, + reasoning="reasoning", + output=None, + ) + g1 = g0.model_copy() + g1.goal = "goal1" + + assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[]) is True + + goal_manager.retry_limit = 1 + assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[]) is False + assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0]) is False + assert ( + goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0]) is True + ) + + goal_manager.retry_limit = 2 + assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[]) is False + assert goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0]) is False + assert ( + goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0]) is False + ) + assert ( + goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0, g0]) + is True + ) + assert ( + goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0, g1]) + is False + ) + assert ( + goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0, g0, g1]) + is True + ) + assert ( + goal_manager.have_reached_retry_limit(latest_evaluated_goals=[g0, g0, g0, g1]) + is True + ) + + +def test_evaluated_goals_to_str() -> None: + gs = [ + EvaluatedGoal( + goal="foo0", + motivation="bar0", + completion_criteria="baz0", + is_complete=False, + reasoning="qux0", + output=None, + ), + EvaluatedGoal( + goal="foo1", + motivation="bar1", + completion_criteria="baz1", + is_complete=True, + reasoning="qux1", + output="output", + ), + ] + goals_str = GoalManager.evaluated_goals_to_str(gs) + assert goals_str == ( + "## Goal 1:\n" + "Goal: foo0\n" + "Motivation: bar0\n" + "Completion Criteria: baz0\n" + "Is Complete: False\n" + "Reasoning: qux0\n" + "Output: None\n" + "\n" + "## Goal 2:\n" + "Goal: foo1\n" + "Motivation: bar1\n" + "Completion Criteria: baz1\n" + "Is Complete: True\n" + "Reasoning: qux1\n" + "Output: output\n" + ) + + +@pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.") +def test_generate_goal() -> None: + goal_manager = GoalManager( + agent_id="test_agent", + high_level_description="You are a gambler that focuses on cycling races, predominantly the Tour de France.", + agent_capabilities=( + "- Web search\n" + "- Web scraping\n" + "- Accurate predictions of the probability of yes/no outcomes for a given event." + ), + ) + goal0 = goal_manager.generate_goal(latest_evaluated_goals=[]) + + evaluated_goal = EvaluatedGoal( + goal="Investigate the top 5 contenders for the Tour de France, make predictions on their chances of overall victory, and compare these against the market odds.", + motivation="The Tour de France is a popular race, so markets are likely to have the highest liquidity", + completion_criteria="5 contenders identified, predictions made, and compared against market odds", + is_complete=False, + reasoning="The Tour de France is cancelled this year.", + output=None, + ) + goal2 = goal_manager.generate_goal(latest_evaluated_goals=[evaluated_goal]) + + # Generates a goal related to the Tour de France + assert "Tour de France" in goal0.goal + + # Does not generate a goal related to the Tour de France, based on the + # reasoning of the previous evaluated goal + assert "Tour de France" not in goal2.goal + + +@pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.") +def test_evaluate_goal_progress() -> None: + goal_manager = GoalManager( + agent_id="test_agent", + high_level_description="You are a gambler that focuses on cycling races, predominantly the Tour de France.", + agent_capabilities=( + "- Web search\n" + "- Web scraping\n" + "- Accurate predictions of the probability of yes/no outcomes for a given event." + ), + ) diff --git a/tests/db/test_evaluated_goal_table_handler.py b/tests/db/test_evaluated_goal_table_handler.py index d953171e..b1731816 100644 --- a/tests/db/test_evaluated_goal_table_handler.py +++ b/tests/db/test_evaluated_goal_table_handler.py @@ -21,34 +21,94 @@ def table_handler() -> Generator[EvaluatedGoalTableHandler, None, None]: yield table_handler -def test_save_load_evaluated_goal(table_handler: EvaluatedGoalTableHandler) -> None: +def test_save_load_evaluated_goal_0(table_handler: EvaluatedGoalTableHandler) -> None: evaluated_goal = EvaluatedGoal( - prompt="abc", + goal="abc", motivation="def", completion_criteria="ghi", is_complete=True, reasoning="jkl", output="mno", ) - model = evaluated_goal.to_model(agent_id=TEST_AGENT_ID) - table_handler.save_evaluated_goal(model=model) + table_handler.save_evaluated_goal( + model=evaluated_goal.to_model(agent_id=TEST_AGENT_ID) + ) - loaded_model = table_handler.get_latest_evaluated_goal() - assert loaded_model - loaded_evaluated_goal = EvaluatedGoal.from_model(model=loaded_model) + loaded_models = table_handler.get_latest_evaluated_goals(limit=1) + assert len(loaded_models) == 1 + loaded_evaluated_goal = EvaluatedGoal.from_model(model=loaded_models[0]) assert loaded_evaluated_goal == evaluated_goal -# TODO -# def test_load_latest_prompt(memory_prompt_handler: PromptTableHandler) -> None: -# prompt_text_first = "prompt_text_first" -# prompt_text_second = "prompt_text_second" +def test_save_load_evaluated_goal_1(table_handler: EvaluatedGoalTableHandler) -> None: + evaluated_goal0 = EvaluatedGoal( + goal="foo", + motivation="foo", + completion_criteria="foo", + is_complete=True, + reasoning="foo", + output="foo", + ) + evaluated_goal1 = EvaluatedGoal( + goal="bar", + motivation="bar", + completion_criteria="bar", + is_complete=False, + reasoning="bar", + output="bar", + ) + + table_handler.save_evaluated_goal( + model=evaluated_goal0.to_model(agent_id=TEST_AGENT_ID) + ) + table_handler.save_evaluated_goal( + model=evaluated_goal1.to_model(agent_id=TEST_AGENT_ID) + ) + + loaded_models = table_handler.get_latest_evaluated_goals(limit=1) + assert len(loaded_models) == 1 + loaded_evaluated_goal = EvaluatedGoal.from_model(model=loaded_models[0]) + assert loaded_evaluated_goal == evaluated_goal1 + + for limit in [2, 3]: + loaded_models = table_handler.get_latest_evaluated_goals(limit=limit) + assert len(loaded_models) == 2 + # Check LIFO order + assert loaded_models[0].datetime_ > loaded_models[1].datetime_ + assert [EvaluatedGoal.from_model(model) for model in loaded_models] == [ + evaluated_goal1, + evaluated_goal0, + ] + + +def test_save_load_evaluated_goal_multiple_agents( + table_handler: EvaluatedGoalTableHandler, +) -> None: + evaluated_goal0 = EvaluatedGoal( + goal="foo", + motivation="foo", + completion_criteria="foo", + is_complete=True, + reasoning="foo", + output="foo", + ) + evaluated_goal1 = EvaluatedGoal( + goal="bar", + motivation="bar", + completion_criteria="bar", + is_complete=False, + reasoning="bar", + output="bar", + ) -# memory_prompt_handler.save_prompt(prompt_text_first) -# memory_prompt_handler.save_prompt(prompt_text_second) + table_handler.save_evaluated_goal( + model=evaluated_goal0.to_model(agent_id=TEST_AGENT_ID) + ) + table_handler.save_evaluated_goal( + model=evaluated_goal1.to_model(agent_id=TEST_AGENT_ID + "1") + ) -# # assert latest prompt is there -# result = memory_prompt_handler.fetch_latest_prompt() -# assert result -# # ignore timezone -# assert result.prompt == prompt_text_second + loaded_models = table_handler.get_latest_evaluated_goals(limit=1) + assert len(loaded_models) == 1 + loaded_evaluated_goal = EvaluatedGoal.from_model(model=loaded_models[0]) + assert loaded_evaluated_goal == evaluated_goal0 From 19ba6869c92eeec2ca52a3af804d41e81cb77c24 Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Wed, 21 Aug 2024 15:01:13 +0100 Subject: [PATCH 04/20] Add tests for GoalManager.evaluate_goal_progress --- .../agents/goal_manager.py | 77 ++++++++- .../agents/microchain_agent/deploy.py | 2 +- .../agents/microchain_agent/memory.py | 6 + tests/agents/test_goal_manager.py | 156 +++++++++++++++++- tests/test_chat_history.py | 14 ++ 5 files changed, 236 insertions(+), 19 deletions(-) diff --git a/prediction_market_agent/agents/goal_manager.py b/prediction_market_agent/agents/goal_manager.py index 9475b1b8..b8f39aa3 100644 --- a/prediction_market_agent/agents/goal_manager.py +++ b/prediction_market_agent/agents/goal_manager.py @@ -33,6 +33,19 @@ {format_instructions} """ +EVALUATE_GOAL_PROGRESS_PROMPT_TEMPLATE = """ +An agent and user are working together to achieve a well defined goal. +Given their chat history, and the goal definition, evaluate whether the goal has been completed. + +[GOAL] +{goal_prompt} + +[CHAT HISTORY] +{chat_history} + +{format_instructions} +""" + class Goal(BaseModel): goal: str = Field(..., description="A clear description of the goal") @@ -42,8 +55,16 @@ class Goal(BaseModel): description="The criteria that will be used to evaluate whether the goal has been completed", ) + def to_prompt(self) -> str: + return ( + f"{self.goal}" + f"\n\n" + f"## Motivation\n{self.motivation}" + f"## Completion Criteria:\n\n{self.completion_criteria}" + ) -class EvaluatedGoal(Goal): + +class GoalEvaluation(BaseModel): reasoning: str = Field( ..., description="An explanation of why the goal is deemed completed or not" ) @@ -53,7 +74,13 @@ class EvaluatedGoal(Goal): description="If the goal description implied a 'return value', and the goal is complete, this field should contain the output", ) - def __repr__(self) -> str: + +class EvaluatedGoal(Goal): + reasoning: str + is_complete: bool + output: str | None + + def __str__(self) -> str: return ( f"Goal: {self.goal}\n" f"Motivation: {self.motivation}\n" @@ -63,9 +90,6 @@ def __repr__(self) -> str: f"Output: {self.output}" ) - def __str__(self) -> str: - return self.__repr__() - @classmethod def from_model(cls, model: EvaluatedGoalModel) -> "EvaluatedGoal": return EvaluatedGoal( @@ -203,19 +227,54 @@ def get_goal(self) -> Goal: return self.generate_goal(latest_evaluated_goals=[]) + @classmethod + def get_chat_history_after_goal_prompt( + cls, goal: Goal, chat_history: ChatHistory + ) -> ChatHistory: + """ + Return the chat history after the goal prompt, or None if the goal + prompt is not found. + """ + for i, chat_message in enumerate(chat_history.chat_messages): + if chat_message.content == goal.to_prompt(): + return ChatHistory(chat_messages=chat_history.chat_messages[i + 1 :]) + raise ValueError("Goal prompt not found in chat history") + def evaluate_goal_progress( self, goal: Goal, chat_history: ChatHistory, ) -> EvaluatedGoal: - # TODO + relevant_chat_history = self.get_chat_history_after_goal_prompt( + goal=goal, + chat_history=chat_history, + ) + parser = PydanticOutputParser(pydantic_object=GoalEvaluation) + prompt = PromptTemplate( + template=EVALUATE_GOAL_PROGRESS_PROMPT_TEMPLATE, + input_variables=["goal_prompt", "chat_history"], + partial_variables={"format_instructions": parser.get_format_instructions()}, + ) + llm = ChatOpenAI( + temperature=0, + model=self.model, + api_key=APIKeys().openai_api_key_secretstr_v1, + ) + chain = prompt | llm | parser + + goal_evaluation: GoalEvaluation = chain.invoke( + { + "goal_prompt": goal.to_prompt(), + "chat_history": str(relevant_chat_history), + } + ) return EvaluatedGoal( goal=goal.goal, motivation=goal.motivation, completion_criteria=goal.completion_criteria, - is_complete=False, - reasoning="", - output="", + is_complete=goal_evaluation.is_complete, + reasoning=goal_evaluation.reasoning, + output=goal_evaluation.output, ) def save_evaluated_goal(self, goal: EvaluatedGoal) -> None: diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py index a46b9336..348bce1c 100644 --- a/prediction_market_agent/agents/microchain_agent/deploy.py +++ b/prediction_market_agent/agents/microchain_agent/deploy.py @@ -52,7 +52,7 @@ def run( if self.goal_manager: goal = self.goal_manager.get_goal() - prompt = goal.goal + prompt = goal.to_prompt() else: prompt = None diff --git a/prediction_market_agent/agents/microchain_agent/memory.py b/prediction_market_agent/agents/microchain_agent/memory.py index 27788466..53f667bb 100644 --- a/prediction_market_agent/agents/microchain_agent/memory.py +++ b/prediction_market_agent/agents/microchain_agent/memory.py @@ -23,6 +23,9 @@ class ChatMessage(BaseModel): def is_system_message(self) -> bool: return self.role == "system" + def __str__(self) -> str: + return f"{self.role}: {self.content}" + class DatedChatMessage(ChatMessage): datetime_: datetime @@ -98,6 +101,9 @@ def iterations(self) -> int: else: return (self.num_messages - 1) // 2 + def __str__(self) -> str: + return "\n".join(str(m) for m in self.chat_messages) + class DatedChatHistory(ChatHistory): chat_messages: Sequence[DatedChatMessage] diff --git a/tests/agents/test_goal_manager.py b/tests/agents/test_goal_manager.py index a48aefe1..9fabf4fc 100644 --- a/tests/agents/test_goal_manager.py +++ b/tests/agents/test_goal_manager.py @@ -1,6 +1,11 @@ import pytest -from prediction_market_agent.agents.goal_manager import EvaluatedGoal, GoalManager +from prediction_market_agent.agents.goal_manager import EvaluatedGoal, Goal, GoalManager +from prediction_market_agent.agents.microchain_agent.memory import ( + ChatHistory, + ChatMessage, +) +from prediction_market_agent.utils import DEFAULT_OPENAI_MODEL from tests.utils import RUN_PAID_TESTS @@ -105,6 +110,7 @@ def test_generate_goal() -> None: "- Web scraping\n" "- Accurate predictions of the probability of yes/no outcomes for a given event." ), + model=DEFAULT_OPENAI_MODEL, ) goal0 = goal_manager.generate_goal(latest_evaluated_goals=[]) @@ -126,14 +132,146 @@ def test_generate_goal() -> None: assert "Tour de France" not in goal2.goal +def test_get_chat_history_after_goal_prompt() -> None: + goal = Goal(goal="Foo", motivation="Bar", completion_criteria="Baz") + assistant_message = ChatMessage(role="assistant", content="The answer is 42.") + chat_history = ChatHistory( + chat_messages=[ + ChatMessage(role="system", content="You are a helpful assistant."), + ChatMessage(role="user", content=goal.to_prompt()), + assistant_message, + ] + ) + assert GoalManager.get_chat_history_after_goal_prompt( + goal=goal, chat_history=chat_history + ) == ChatHistory(chat_messages=[assistant_message]) + + +def test_get_chat_history_after_goal_prompt_error() -> None: + goal = Goal(goal="Foo", motivation="Bar", completion_criteria="Baz") + assistant_message = ChatMessage(role="assistant", content="The answer is 42.") + chat_history = ChatHistory( + chat_messages=[ + ChatMessage(role="system", content="You are a helpful assistant."), + ] + ) + try: + GoalManager.get_chat_history_after_goal_prompt( + goal=goal, chat_history=chat_history + ) + except ValueError as e: + assert str(e) == "Goal prompt not found in chat history" + + @pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.") -def test_evaluate_goal_progress() -> None: +def test_evaluate_goal_progress_0() -> None: + """ + Test for the case where the evaluated goal: + - is completed + - should have a 'None' output. + """ goal_manager = GoalManager( - agent_id="test_agent", - high_level_description="You are a gambler that focuses on cycling races, predominantly the Tour de France.", - agent_capabilities=( - "- Web search\n" - "- Web scraping\n" - "- Accurate predictions of the probability of yes/no outcomes for a given event." - ), + agent_id="", # Not relevant to test + high_level_description="", # Not relevant to test + agent_capabilities="", # Not relevant to test + model=DEFAULT_OPENAI_MODEL, + ) + goal = Goal( + goal="If last year's TdF winner is competing this year, place a small bet on them.", + motivation="The winner of the last Tour de France is likely to be in good form.", + completion_criteria="If the winner is competing, place a small bet, otherwise do nothing.", + ) + chat_history0 = ChatHistory( + chat_messages=[ + ChatMessage(role="system", content="You are a helpful assistant."), + ChatMessage(role="user", content=goal.to_prompt()), + ChatMessage( + role="assistant", + content="Searching the web... Yes the winner, Tadej Pogacar, is competing.", + ), + ChatMessage(role="user", content="The reasoning has been recorded."), + ChatMessage( + role="assistant", + content="The market id is '0x123' for the TdF winner. Placing bet of 0.01 USD on Tadej Pogacar", + ), + ChatMessage(role="user", content="Bet successfully placed."), + ] + ) + evaluated_goal = goal_manager.evaluate_goal_progress( + goal=goal, + chat_history=chat_history0, + ) + assert evaluated_goal.is_complete is True + assert evaluated_goal.output == None + + +@pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.") +def test_evaluate_goal_progress_1() -> None: + """ + Test for the case where the evaluated goal: + - is completed + - should have a non-'None' output. + """ + goal_manager = GoalManager( + agent_id="", # Not relevant to test + high_level_description="", # Not relevant to test + agent_capabilities="", # Not relevant to test + model=DEFAULT_OPENAI_MODEL, + ) + goal = Goal( + goal="If last year's TdF winner is competing this year, get their probability of winning.", + motivation="The winner of the last Tour de France is likely to be in good form.", + completion_criteria="Return the name and odds of last year's winner for this year's TdF.", + ) + chat_history0 = ChatHistory( + chat_messages=[ + ChatMessage(role="system", content="You are a helpful assistant."), + ChatMessage(role="user", content=goal.to_prompt()), + ChatMessage( + role="assistant", + content="Searching the web... Yes the winner, Tadej Pogacar, is competing. His winning probability: p_yes=0.27", + ), + ChatMessage(role="user", content="The reasoning has been recorded."), + ] + ) + evaluated_goal = goal_manager.evaluate_goal_progress( + goal=goal, + chat_history=chat_history0, + ) + assert evaluated_goal.is_complete is True + assert "Tadej Pogacar" in evaluated_goal.output + assert "0.27" in evaluated_goal.output + + +@pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.") +def test_evaluate_goal_progress_2() -> None: + """ + Test for the case where the evaluated goal is not completed + """ + goal_manager = GoalManager( + agent_id="", # Not relevant to test + high_level_description="", # Not relevant to test + agent_capabilities="", # Not relevant to test + model=DEFAULT_OPENAI_MODEL, + ) + goal = Goal( + goal="If last year's TdF winner is competing this year, get their probability of winning.", + motivation="The winner of the last Tour de France is likely to be in good form.", + completion_criteria="Return the name and odds of last year's winner for this year's TdF.", + ) + chat_history0 = ChatHistory( + chat_messages=[ + ChatMessage(role="system", content="You are a helpful assistant."), + ChatMessage(role="user", content=goal.to_prompt()), + ChatMessage( + role="assistant", + content="Uhoh, I've hit some exception and need to quit", + ), + ] + ) + evaluated_goal = goal_manager.evaluate_goal_progress( + goal=goal, + chat_history=chat_history0, ) + assert evaluated_goal.is_complete is False + assert evaluated_goal.output == None diff --git a/tests/test_chat_history.py b/tests/test_chat_history.py index 103861cd..85c40822 100644 --- a/tests/test_chat_history.py +++ b/tests/test_chat_history.py @@ -5,6 +5,8 @@ from prediction_market_agent_tooling.tools.utils import utcnow from prediction_market_agent.agents.microchain_agent.memory import ( + ChatHistory, + ChatMessage, DatedChatHistory, DatedChatMessage, ) @@ -73,3 +75,15 @@ def test_save_to_and_load_from_memory( new_chat_history.to_undated_chat_history() == chat_history.to_undated_chat_history() ) + + +def test_stringified_chat_history() -> None: + chat_history = ChatHistory( + chat_messages=[ + ChatMessage(role="system", content="You are a helpful assistant."), + ChatMessage(role="user", content="What is the weather like today?"), + ] + ) + assert str(chat_history) == ( + "system: You are a helpful assistant.\nuser: What is the weather like today?" + ) From 8c0413755cf47b0938c601180291e618839b2b45 Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Wed, 21 Aug 2024 15:28:25 +0100 Subject: [PATCH 05/20] add evaluated goal to chat history --- .../agents/microchain_agent/deploy.py | 30 ++++++++----------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py index 348bce1c..128f88c8 100644 --- a/prediction_market_agent/agents/microchain_agent/deploy.py +++ b/prediction_market_agent/agents/microchain_agent/deploy.py @@ -3,6 +3,7 @@ from prediction_market_agent_tooling.markets.markets import MarketType from prediction_market_agent.agents.goal_manager import GoalManager +from prediction_market_agent.agents.microchain_agent.memory import ChatMessage from prediction_market_agent.agents.microchain_agent.microchain_agent import ( SupportedModel, build_agent, @@ -74,6 +75,18 @@ def run( agent.run(self.n_iterations) + if self.goal_manager: + evaluated_goal = self.goal_manager.evaluate_goal_progress( + goal=goal, chat_history=agent.history + ) + self.goal_manager.save_evaluated_goal(evaluated_goal) + agent.history.append( + ChatMessage( + role="user", + content=str(evaluated_goal), + ).model_dump() + ) + save_agent_history( agent=agent, long_term_memory=long_term_memory, @@ -82,12 +95,6 @@ def run( if agent.system_prompt != initial_formatted_system_prompt: prompt_handler.save_prompt(get_editable_prompt_from_agent(agent)) - if self.goal_manager: - evaluated_goal = self.goal_manager.evaluate_goal_progress( - goal=goal, chat_history=agent.history - ) - self.goal_manager.save_evaluated_goal(evaluated_goal) - class DeployableMicrochainModifiableSystemPromptAgentAbstract( DeployableMicrochainAgent @@ -120,14 +127,3 @@ class DeployableMicrochainModifiableSystemPromptAgent3( ): task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_LEARNING_3 model = SupportedModel.llama_31_instruct - - -class DeployableMicrochainWithGoalManagerAgent0(DeployableMicrochainAgent): - task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER - goal_manager = GoalManager( - agent_id=task_description, - high_level_description="foo", # TODO - agent_capabilities="bar", # TODO - retry_limit=3, - ) - model = SupportedModel.gpt_4o From a75d284c6ea3c13141d463b5d0d07b0350493b34 Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Wed, 21 Aug 2024 17:40:42 +0100 Subject: [PATCH 06/20] tweaks --- poetry.lock | 24 +++++++------ .../agents/goal_manager.py | 35 ++++++++++++------- .../agents/microchain_agent/deploy.py | 34 +++++++++++++++--- .../microchain_agent/microchain_agent.py | 2 +- prediction_market_agent/agents/utils.py | 4 +-- .../db/evaluated_goal_table_handler.py | 9 +++++ pyproject.toml | 2 +- scripts/delete_agent_db_entries.py | 12 +++++++ tests/agents/test_goal_manager.py | 21 +++++------ 9 files changed, 100 insertions(+), 43 deletions(-) diff --git a/poetry.lock b/poetry.lock index e3d50bed..8f277b5a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -5125,19 +5125,23 @@ psutil = "*" [[package]] name = "microchain-python" -version = "0.4.4" +version = "0.4.3" description = "" optional = false python-versions = "*" -files = [ - {file = "microchain_python-0.4.4-py3-none-any.whl", hash = "sha256:b27f7ce318a94342b094d49a0509e5c208166fa1fad4b87b69db75a5fe1c4a9e"}, - {file = "microchain_python-0.4.4.tar.gz", hash = "sha256:33d2f18856988433f17b512ecc217de9d9fdd97ad50adac3f70cfcee7e490d67"}, -] +files = [] +develop = false [package.dependencies] pydantic = ">=2,<3" termcolor = "2.4.0" +[package.source] +type = "git" +url = "https://github.com/galatolofederico/microchain.git" +reference = "56db91de72b6466080c26434631808cc20af670c" +resolved_reference = "56db91de72b6466080c26434631808cc20af670c" + [[package]] name = "mmh3" version = "4.1.0" @@ -5562,13 +5566,13 @@ files = [ [[package]] name = "narwhals" -version = "1.5.0" +version = "1.5.2" description = "Extremely lightweight compatibility layer between dataframe libraries" optional = false python-versions = ">=3.8" files = [ - {file = "narwhals-1.5.0-py3-none-any.whl", hash = "sha256:6b63e4e3fd494fc201395e0f3c86cef32f4970f73fb15d5502a15d479f848023"}, - {file = "narwhals-1.5.0.tar.gz", hash = "sha256:88c5cb329d7350c4cd688188068636f8fef5b385b31377b33d92a00ebd8d951b"}, + {file = "narwhals-1.5.2-py3-none-any.whl", hash = "sha256:431d7fdca6104a8b6a72254e31ee6630b526c03544df8e338b3249ca9160f5d2"}, + {file = "narwhals-1.5.2.tar.gz", hash = "sha256:4d8ce16ecf431a544c1740adbb523477565c9614e5e330ce444f3a432ff93d7f"}, ] [package.extras] @@ -10716,4 +10720,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "~3.10.0" -content-hash = "3af35337fac84c3278d1ebee06cf76b7ed5c3e2de77df9361bd9a7ce9a051a18" +content-hash = "48447e694cc1d45cd5c510d3c017b031bf6b8cd24571429f07900400eea40f28" diff --git a/prediction_market_agent/agents/goal_manager.py b/prediction_market_agent/agents/goal_manager.py index b8f39aa3..9392e5db 100644 --- a/prediction_market_agent/agents/goal_manager.py +++ b/prediction_market_agent/agents/goal_manager.py @@ -17,11 +17,12 @@ The goal should satisfy the following: - have a narrow focus +- be completable immediately, within a single session - be realistically achievable given the agen't specific capabilities -- not be contingent on external factors that are out of the agent's control - have a clear motivation and completion criteria - advance the aims of the agent - balance the need for exploration and exploitation +- not be contingent on external factors that are out of the agent's control [HIGH LEVEL DESCRIPTION] {high_level_description} @@ -57,10 +58,10 @@ class Goal(BaseModel): def to_prompt(self) -> str: return ( - f"{self.goal}" - f"\n\n" - f"## Motivation\n{self.motivation}" - f"## Completion Criteria:\n\n{self.completion_criteria}" + f"# Goal:\n" + f"{self.goal}\n\n" + f"## Motivation:\n{self.motivation}\n\n" + f"## Completion Criteria:\n{self.completion_criteria}" ) @@ -74,6 +75,13 @@ class GoalEvaluation(BaseModel): description="If the goal description implied a 'return value', and the goal is complete, this field should contain the output", ) + def __str__(self) -> str: + return ( + f"Is Complete: {self.is_complete}\n" + f"Reasoning: {self.reasoning}\n" + f"Output: {self.output}" + ) + class EvaluatedGoal(Goal): reasoning: str @@ -244,7 +252,7 @@ def evaluate_goal_progress( self, goal: Goal, chat_history: ChatHistory, - ) -> EvaluatedGoal: + ) -> GoalEvaluation: relevant_chat_history = self.get_chat_history_after_goal_prompt( goal=goal, chat_history=chat_history, @@ -268,17 +276,18 @@ def evaluate_goal_progress( "chat_history": str(relevant_chat_history), } ) - return EvaluatedGoal( + return goal_evaluation + + def save_evaluated_goal(self, goal: Goal, evaluation: GoalEvaluation) -> None: + evaluated_goal = EvaluatedGoal( goal=goal.goal, motivation=goal.motivation, completion_criteria=goal.completion_criteria, - is_complete=goal_evaluation.is_complete, - reasoning=goal_evaluation.reasoning, - output=goal_evaluation.output, + is_complete=evaluation.is_complete, + reasoning=evaluation.reasoning, + output=evaluation.output, ) - - def save_evaluated_goal(self, goal: EvaluatedGoal) -> None: - model = goal.to_model(agent_id=self.agent_id) + model = evaluated_goal.to_model(agent_id=self.agent_id) self.table_handler.save_evaluated_goal(model) @staticmethod diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py index 128f88c8..ae983c01 100644 --- a/prediction_market_agent/agents/microchain_agent/deploy.py +++ b/prediction_market_agent/agents/microchain_agent/deploy.py @@ -3,7 +3,10 @@ from prediction_market_agent_tooling.markets.markets import MarketType from prediction_market_agent.agents.goal_manager import GoalManager -from prediction_market_agent.agents.microchain_agent.memory import ChatMessage +from prediction_market_agent.agents.microchain_agent.memory import ( + ChatHistory, + ChatMessage, +) from prediction_market_agent.agents.microchain_agent.microchain_agent import ( SupportedModel, build_agent, @@ -76,14 +79,15 @@ def run( agent.run(self.n_iterations) if self.goal_manager: - evaluated_goal = self.goal_manager.evaluate_goal_progress( - goal=goal, chat_history=agent.history + goal_evaluation = self.goal_manager.evaluate_goal_progress( + goal=goal, + chat_history=ChatHistory.from_list_of_dicts(agent.history), ) - self.goal_manager.save_evaluated_goal(evaluated_goal) + self.goal_manager.save_evaluated_goal(goal=goal, evaluation=goal_evaluation) agent.history.append( ChatMessage( role="user", - content=str(evaluated_goal), + content=str(f"# Goal evaluation\n{goal_evaluation}"), ).model_dump() ) @@ -127,3 +131,23 @@ class DeployableMicrochainModifiableSystemPromptAgent3( ): task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_LEARNING_3 model = SupportedModel.llama_31_instruct + + +class DeployableMicrochainWithGoalManagerAgent0(DeployableMicrochainAgent): + task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER + model = SupportedModel.gpt_4o + goal_manager = GoalManager( + agent_id=task_description, + high_level_description="You are a trader agent in prediction markets to maximise your profit.", + agent_capabilities=( + "You are able to:" + "\n- List all binary markets that can be traded." + "\n- List the current outcome probabilities for each open market." + "\n- Predict the outcome probability for a market." + "\n- Buy, sell and hold outcome tokens in a market." + "\n- Query your wallet balance, and the positions you hold in open markets." + "\n- Query the past bets you've made, and their outcomes." + ), + retry_limit=3, + ) + n_iterations = 100 diff --git a/prediction_market_agent/agents/microchain_agent/microchain_agent.py b/prediction_market_agent/agents/microchain_agent/microchain_agent.py index 75bb68ab..aeb96c71 100644 --- a/prediction_market_agent/agents/microchain_agent/microchain_agent.py +++ b/prediction_market_agent/agents/microchain_agent/microchain_agent.py @@ -186,7 +186,6 @@ def step_end_callback(agent: Agent, step_output: StepOutput) -> None: llm=LLM(generator=generator), engine=engine, on_iteration_step=on_iteration_step, - prompt=prompt, ) for f in build_agent_functions( @@ -205,6 +204,7 @@ def step_end_callback(agent: Agent, step_output: StepOutput) -> None: agent.system_prompt = unformatted_system_prompt.format( engine_help=agent.engine.help ) + agent.prompt = prompt if bootstrap: agent.bootstrap = [bootstrap] return agent diff --git a/prediction_market_agent/agents/utils.py b/prediction_market_agent/agents/utils.py index ac3d55a7..d7fcf4a7 100644 --- a/prediction_market_agent/agents/utils.py +++ b/prediction_market_agent/agents/utils.py @@ -29,9 +29,7 @@ class AgentIdentifier(str, Enum): MICROCHAIN_AGENT_OMEN_LEARNING_2 = "general-agent-2" MICROCHAIN_AGENT_OMEN_LEARNING_3 = "general-agent-3" MICROCHAIN_AGENT_STREAMLIT = "microchain-streamlit-app" - MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER = ( - "microchain-agent-deployment-omen_with_goal_manager" - ) + MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER = "general-agent-4-with-goal-manager" MEMORIES_TO_LEARNINGS_TEMPLATE = """ diff --git a/prediction_market_agent/db/evaluated_goal_table_handler.py b/prediction_market_agent/db/evaluated_goal_table_handler.py index a95e02c2..e2f41abc 100644 --- a/prediction_market_agent/db/evaluated_goal_table_handler.py +++ b/prediction_market_agent/db/evaluated_goal_table_handler.py @@ -32,3 +32,12 @@ def get_latest_evaluated_goals(self, limit: int) -> list[EvaluatedGoalModel]: limit=limit, ) return list(items) + + def delete_all_evaluated_goals(self) -> None: + """ + Delete all evaluated goals with `agent_id` + """ + self.sql_handler.delete_all_entries( + col_name=EvaluatedGoalModel.agent_id.key, # type: ignore + col_value=self.agent_id, + ) diff --git a/pyproject.toml b/pyproject.toml index 6c77b063..c71714b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ autoflake = "^2.2.1" isort = "^5.13.2" markdownify = "^0.11.6" tavily-python = "^0.3.9" -microchain-python = "^0.4.4" +microchain-python = { git = "https://github.com/galatolofederico/microchain.git", rev = "56db91de72b6466080c26434631808cc20af670c" } pysqlite3-binary = {version="^0.5.2.post3", markers = "sys_platform == 'linux'"} psycopg2-binary = "^2.9.9" sqlmodel = "^0.0.21" diff --git a/scripts/delete_agent_db_entries.py b/scripts/delete_agent_db_entries.py index fa58f0d0..3c14dcc5 100644 --- a/scripts/delete_agent_db_entries.py +++ b/scripts/delete_agent_db_entries.py @@ -1,6 +1,9 @@ import typer from prediction_market_agent.agents.utils import AgentIdentifier +from prediction_market_agent.db.evaluated_goal_table_handler import ( + EvaluatedGoalTableHandler, +) from prediction_market_agent.db.long_term_memory_table_handler import ( LongTermMemoryTableHandler, ) @@ -11,6 +14,7 @@ def main( session_id: AgentIdentifier, delete_memories: bool = True, delete_prompts: bool = True, + delete_goals: bool = True, ) -> None: """ Delete all memories and prompts for a given agent, defined by the session_id. @@ -31,6 +35,14 @@ def main( else: print("Memory entries successfully deleted.") + if delete_goals: + evaluated_goal_table_handler = EvaluatedGoalTableHandler(agent_id=session_id) + evaluated_goal_table_handler.delete_all_evaluated_goals() + if len(evaluated_goal_table_handler.get_latest_evaluated_goals(limit=1)) != 0: + raise Exception("Evaluated goal entries were not deleted.") + else: + print("Evaluated goal entries successfully deleted.") + if __name__ == "__main__": typer.run(main) diff --git a/tests/agents/test_goal_manager.py b/tests/agents/test_goal_manager.py index 9fabf4fc..2eba998d 100644 --- a/tests/agents/test_goal_manager.py +++ b/tests/agents/test_goal_manager.py @@ -197,12 +197,12 @@ def test_evaluate_goal_progress_0() -> None: ChatMessage(role="user", content="Bet successfully placed."), ] ) - evaluated_goal = goal_manager.evaluate_goal_progress( + goal_evaluation = goal_manager.evaluate_goal_progress( goal=goal, chat_history=chat_history0, ) - assert evaluated_goal.is_complete is True - assert evaluated_goal.output == None + assert goal_evaluation.is_complete is True + assert goal_evaluation.output == None @pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.") @@ -234,13 +234,14 @@ def test_evaluate_goal_progress_1() -> None: ChatMessage(role="user", content="The reasoning has been recorded."), ] ) - evaluated_goal = goal_manager.evaluate_goal_progress( + goal_evaluation = goal_manager.evaluate_goal_progress( goal=goal, chat_history=chat_history0, ) - assert evaluated_goal.is_complete is True - assert "Tadej Pogacar" in evaluated_goal.output - assert "0.27" in evaluated_goal.output + assert goal_evaluation.is_complete is True + assert goal_evaluation.output is not None + assert "Tadej Pogacar" in goal_evaluation.output + assert "0.27" in goal_evaluation.output @pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.") @@ -269,9 +270,9 @@ def test_evaluate_goal_progress_2() -> None: ), ] ) - evaluated_goal = goal_manager.evaluate_goal_progress( + goal_evaluation = goal_manager.evaluate_goal_progress( goal=goal, chat_history=chat_history0, ) - assert evaluated_goal.is_complete is False - assert evaluated_goal.output == None + assert goal_evaluation.is_complete is False + assert goal_evaluation.output == None From e0e25937b3af7bb2c276e7dae00780fc3ed3ea1c Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Wed, 21 Aug 2024 18:21:09 +0100 Subject: [PATCH 07/20] remove n_iterations override --- prediction_market_agent/agents/microchain_agent/deploy.py | 1 - 1 file changed, 1 deletion(-) diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py index ae983c01..48350d50 100644 --- a/prediction_market_agent/agents/microchain_agent/deploy.py +++ b/prediction_market_agent/agents/microchain_agent/deploy.py @@ -150,4 +150,3 @@ class DeployableMicrochainWithGoalManagerAgent0(DeployableMicrochainAgent): ), retry_limit=3, ) - n_iterations = 100 From 5fe023596d3209691a757927258396e0a10f6dbb Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Wed, 21 Aug 2024 18:51:32 +0100 Subject: [PATCH 08/20] Use local db for test --- tests/agents/test_goal_manager.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/agents/test_goal_manager.py b/tests/agents/test_goal_manager.py index 2eba998d..6e6f5f21 100644 --- a/tests/agents/test_goal_manager.py +++ b/tests/agents/test_goal_manager.py @@ -8,6 +8,8 @@ from prediction_market_agent.utils import DEFAULT_OPENAI_MODEL from tests.utils import RUN_PAID_TESTS +SQLITE_DB_URL = "sqlite://" + def test_have_reached_retry_limit() -> None: goal_manager = GoalManager( @@ -111,6 +113,7 @@ def test_generate_goal() -> None: "- Accurate predictions of the probability of yes/no outcomes for a given event." ), model=DEFAULT_OPENAI_MODEL, + sqlalchemy_db_url=SQLITE_DB_URL, ) goal0 = goal_manager.generate_goal(latest_evaluated_goals=[]) @@ -175,6 +178,7 @@ def test_evaluate_goal_progress_0() -> None: high_level_description="", # Not relevant to test agent_capabilities="", # Not relevant to test model=DEFAULT_OPENAI_MODEL, + sqlalchemy_db_url=SQLITE_DB_URL, ) goal = Goal( goal="If last year's TdF winner is competing this year, place a small bet on them.", @@ -217,6 +221,7 @@ def test_evaluate_goal_progress_1() -> None: high_level_description="", # Not relevant to test agent_capabilities="", # Not relevant to test model=DEFAULT_OPENAI_MODEL, + sqlalchemy_db_url=SQLITE_DB_URL, ) goal = Goal( goal="If last year's TdF winner is competing this year, get their probability of winning.", @@ -254,6 +259,7 @@ def test_evaluate_goal_progress_2() -> None: high_level_description="", # Not relevant to test agent_capabilities="", # Not relevant to test model=DEFAULT_OPENAI_MODEL, + sqlalchemy_db_url=SQLITE_DB_URL, ) goal = Goal( goal="If last year's TdF winner is competing this year, get their probability of winning.", From e2b7760d4e3477c37e637f12abb6b1ba118803e2 Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Wed, 21 Aug 2024 22:16:02 +0100 Subject: [PATCH 09/20] Fixed test --- tests/agents/test_goal_manager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/agents/test_goal_manager.py b/tests/agents/test_goal_manager.py index 6e6f5f21..9a7d084c 100644 --- a/tests/agents/test_goal_manager.py +++ b/tests/agents/test_goal_manager.py @@ -17,6 +17,7 @@ def test_have_reached_retry_limit() -> None: high_level_description="foo", agent_capabilities="bar", retry_limit=0, + sqlalchemy_db_url=SQLITE_DB_URL, ) g0 = EvaluatedGoal( From 8df7f4028dd456ae4cf7e38e8dfd4d1196015aab Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Wed, 21 Aug 2024 22:23:49 +0100 Subject: [PATCH 10/20] coderabbit suggestions --- prediction_market_agent/agents/goal_manager.py | 13 ++++--------- tests/agents/test_goal_manager.py | 5 ++--- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/prediction_market_agent/agents/goal_manager.py b/prediction_market_agent/agents/goal_manager.py index 9392e5db..d26892a1 100644 --- a/prediction_market_agent/agents/goal_manager.py +++ b/prediction_market_agent/agents/goal_manager.py @@ -200,15 +200,10 @@ def have_reached_retry_limit( return False latest_goal = latest_evaluated_goals[0].to_goal() - if all( - [ - g.to_goal() == latest_goal - for g in latest_evaluated_goals[: self.retry_limit + 1] - ] - ): - return True - - return False + return all( + g.to_goal() == latest_goal + for g in latest_evaluated_goals[: self.retry_limit + 1] + ) def get_goal(self) -> Goal: """ diff --git a/tests/agents/test_goal_manager.py b/tests/agents/test_goal_manager.py index 9a7d084c..7db1ee3c 100644 --- a/tests/agents/test_goal_manager.py +++ b/tests/agents/test_goal_manager.py @@ -153,7 +153,6 @@ def test_get_chat_history_after_goal_prompt() -> None: def test_get_chat_history_after_goal_prompt_error() -> None: goal = Goal(goal="Foo", motivation="Bar", completion_criteria="Baz") - assistant_message = ChatMessage(role="assistant", content="The answer is 42.") chat_history = ChatHistory( chat_messages=[ ChatMessage(role="system", content="You are a helpful assistant."), @@ -207,7 +206,7 @@ def test_evaluate_goal_progress_0() -> None: chat_history=chat_history0, ) assert goal_evaluation.is_complete is True - assert goal_evaluation.output == None + assert goal_evaluation.output is None @pytest.mark.skipif(not RUN_PAID_TESTS, reason="This test costs money to run.") @@ -282,4 +281,4 @@ def test_evaluate_goal_progress_2() -> None: chat_history=chat_history0, ) assert goal_evaluation.is_complete is False - assert goal_evaluation.output == None + assert goal_evaluation.output is None From 1f48aa797fcb9861c86493655348f9f66fc05f76 Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Thu, 22 Aug 2024 15:35:08 +0100 Subject: [PATCH 11/20] Review comments --- prediction_market_agent/agents/goal_manager.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/prediction_market_agent/agents/goal_manager.py b/prediction_market_agent/agents/goal_manager.py index d26892a1..e61f00c0 100644 --- a/prediction_market_agent/agents/goal_manager.py +++ b/prediction_market_agent/agents/goal_manager.py @@ -1,6 +1,10 @@ from langchain_core.output_parsers import PydanticOutputParser from langchain_core.prompts import PromptTemplate from langchain_openai import ChatOpenAI +from prediction_market_agent_tooling.tools.langfuse_ import ( + get_langfuse_langchain_config, + observe, +) from prediction_market_agent_tooling.tools.utils import utcnow from pydantic import BaseModel, Field @@ -155,6 +159,7 @@ def get_latest_evaluated_goals_from_memory(self, limit: int) -> list[EvaluatedGo ) return [EvaluatedGoal.from_model(model) for model in evaluated_goal_models] + @observe() def generate_goal(self, latest_evaluated_goals: list[EvaluatedGoal]) -> Goal: """ Generate a new goal based on the high-level description and the latest @@ -178,6 +183,7 @@ def generate_goal(self, latest_evaluated_goals: list[EvaluatedGoal]) -> Goal: temperature=0, model=self.model, api_key=APIKeys().openai_api_key_secretstr_v1, + config=get_langfuse_langchain_config(), ) chain = prompt | llm | parser @@ -217,8 +223,11 @@ def get_goal(self) -> Goal: limit=self.retry_limit ) if latest_evaluated_goals: - evaluated_goal = latest_evaluated_goals[0] - if evaluated_goal.is_complete: + # Previous goals have been retrieved from memory. Generate a new + # goal based on these, or retry the last on if it did not complete. + latest_evaluated_goal = latest_evaluated_goals[0] + + if latest_evaluated_goal.is_complete: # Generate a new goal return self.generate_goal(latest_evaluated_goals) else: @@ -226,8 +235,9 @@ def get_goal(self) -> Goal: if self.have_reached_retry_limit(latest_evaluated_goals): return self.generate_goal(latest_evaluated_goals) else: - return evaluated_goal.to_goal() + return latest_evaluated_goal.to_goal() + # No evaluated goals in memory. Generate a new goal from scratch return self.generate_goal(latest_evaluated_goals=[]) @classmethod @@ -243,6 +253,7 @@ def get_chat_history_after_goal_prompt( return ChatHistory(chat_messages=chat_history.chat_messages[i + 1 :]) raise ValueError("Goal prompt not found in chat history") + @observe() def evaluate_goal_progress( self, goal: Goal, @@ -262,6 +273,7 @@ def evaluate_goal_progress( temperature=0, model=self.model, api_key=APIKeys().openai_api_key_secretstr_v1, + config=get_langfuse_langchain_config(), ) chain = prompt | llm | parser From 3b0359acbbea83dc9964d9afbbfe341d7ee40da9 Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Thu, 22 Aug 2024 15:51:41 +0100 Subject: [PATCH 12/20] Review comments --- prediction_market_agent/agents/goal_manager.py | 8 ++++---- .../agents/microchain_agent/deploy.py | 10 ++++------ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/prediction_market_agent/agents/goal_manager.py b/prediction_market_agent/agents/goal_manager.py index e61f00c0..b9039f35 100644 --- a/prediction_market_agent/agents/goal_manager.py +++ b/prediction_market_agent/agents/goal_manager.py @@ -183,7 +183,6 @@ def generate_goal(self, latest_evaluated_goals: list[EvaluatedGoal]) -> Goal: temperature=0, model=self.model, api_key=APIKeys().openai_api_key_secretstr_v1, - config=get_langfuse_langchain_config(), ) chain = prompt | llm | parser @@ -192,7 +191,8 @@ def generate_goal(self, latest_evaluated_goals: list[EvaluatedGoal]) -> Goal: "high_level_description": self.high_level_description, "agent_capabilities": self.agent_capabilities, "previous_evaluated_goals": latest_evaluated_goals_str, - } + }, + config=get_langfuse_langchain_config(), ) return goal @@ -273,7 +273,6 @@ def evaluate_goal_progress( temperature=0, model=self.model, api_key=APIKeys().openai_api_key_secretstr_v1, - config=get_langfuse_langchain_config(), ) chain = prompt | llm | parser @@ -281,7 +280,8 @@ def evaluate_goal_progress( { "goal_prompt": goal.to_prompt(), "chat_history": str(relevant_chat_history), - } + }, + config=get_langfuse_langchain_config(), ) return goal_evaluation diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py index 48350d50..0d5ce93a 100644 --- a/prediction_market_agent/agents/microchain_agent/deploy.py +++ b/prediction_market_agent/agents/microchain_agent/deploy.py @@ -1,6 +1,7 @@ from microchain import Agent from prediction_market_agent_tooling.deploy.agent import DeployableAgent from prediction_market_agent_tooling.markets.markets import MarketType +from prediction_market_agent_tooling.tools.utils import check_not_none from prediction_market_agent.agents.goal_manager import GoalManager from prediction_market_agent.agents.microchain_agent.memory import ( @@ -54,11 +55,7 @@ def run( ), ) - if self.goal_manager: - goal = self.goal_manager.get_goal() - prompt = goal.to_prompt() - else: - prompt = None + goal = self.goal_manager.get_goal() if self.goal_manager else None agent: Agent = build_agent( market_type=market_type, @@ -70,7 +67,7 @@ def run( functions_config=FunctionsConfig.from_system_prompt_choice( self.system_prompt_choice ), - prompt=prompt, + prompt=goal.to_prompt() if goal else None, ) # Save formatted system prompt @@ -79,6 +76,7 @@ def run( agent.run(self.n_iterations) if self.goal_manager: + goal = check_not_none(goal) goal_evaluation = self.goal_manager.evaluate_goal_progress( goal=goal, chat_history=ChatHistory.from_list_of_dicts(agent.history), From 3d39d173795ec19b32abf8f10c9ec629b81f1c29 Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Fri, 23 Aug 2024 09:40:35 +0100 Subject: [PATCH 13/20] Tweak high_level_description --- prediction_market_agent/agents/microchain_agent/deploy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py index 0d5ce93a..3ace5082 100644 --- a/prediction_market_agent/agents/microchain_agent/deploy.py +++ b/prediction_market_agent/agents/microchain_agent/deploy.py @@ -136,7 +136,7 @@ class DeployableMicrochainWithGoalManagerAgent0(DeployableMicrochainAgent): model = SupportedModel.gpt_4o goal_manager = GoalManager( agent_id=task_description, - high_level_description="You are a trader agent in prediction markets to maximise your profit.", + high_level_description="You are a trader agent in prediction markets, aiming to maximise your long-term profit.", agent_capabilities=( "You are able to:" "\n- List all binary markets that can be traded." From 3168372c831dcb2da9eb4390197fccc164711b3a Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Sat, 24 Aug 2024 01:26:53 +0100 Subject: [PATCH 14/20] tidy --- prediction_market_agent/run_agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prediction_market_agent/run_agent.py b/prediction_market_agent/run_agent.py index 615da4f0..33a5002e 100644 --- a/prediction_market_agent/run_agent.py +++ b/prediction_market_agent/run_agent.py @@ -56,7 +56,7 @@ class RunnableAgent(str, Enum): microchain_modifiable_system_prompt_1 = "microchain_modifiable_system_prompt_1" microchain_modifiable_system_prompt_2 = "microchain_modifiable_system_prompt_2" microchain_modifiable_system_prompt_3 = "microchain_modifiable_system_prompt_3" - microchain_with_goal_manager_agent0 = "microchain_with_goal_manager_agent0" + microchain_with_goal_manager_agent_0 = "microchain_with_goal_manager_agent_0" metaculus_bot_tournament_agent = "metaculus_bot_tournament_agent" prophet_gpt4o = "prophet_gpt4o" prophet_gpt4 = "prophet_gpt4" @@ -77,7 +77,7 @@ class RunnableAgent(str, Enum): RunnableAgent.microchain_modifiable_system_prompt_1: DeployableMicrochainModifiableSystemPromptAgent1, RunnableAgent.microchain_modifiable_system_prompt_2: DeployableMicrochainModifiableSystemPromptAgent2, RunnableAgent.microchain_modifiable_system_prompt_3: DeployableMicrochainModifiableSystemPromptAgent3, - RunnableAgent.microchain_with_goal_manager_agent0: DeployableMicrochainWithGoalManagerAgent0, + RunnableAgent.microchain_with_goal_manager_agent_0: DeployableMicrochainWithGoalManagerAgent0, RunnableAgent.social_media: DeployableSocialMediaAgent, RunnableAgent.metaculus_bot_tournament_agent: DeployableMetaculusBotTournamentAgent, RunnableAgent.prophet_gpt4o: DeployablePredictionProphetGPT4oAgent, From 399abb5adb78543b259b3fd2bb31e5adccc3c73b Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Wed, 28 Aug 2024 12:33:43 +0100 Subject: [PATCH 15/20] Move goal manager initialisation to inside DeployableMicrochainAgent.run --- .../agents/microchain_agent/deploy.py | 46 ++++++++++--------- .../microchain_agent/microchain_agent.py | 6 ++- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py index cfd698d9..a1f4fda2 100644 --- a/prediction_market_agent/agents/microchain_agent/deploy.py +++ b/prediction_market_agent/agents/microchain_agent/deploy.py @@ -13,6 +13,7 @@ SupportedModel, build_agent, get_editable_prompt_from_agent, + get_functions_summary_list, get_unformatted_system_prompt, save_agent_history, ) @@ -35,7 +36,12 @@ class DeployableMicrochainAgent(DeployableAgent): load_historical_prompt: bool = False system_prompt_choice: SystemPromptChoice = SystemPromptChoice.TRADING_AGENT task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN - goal_manager: GoalManager | None = None + + def build_goal_manager( + self, + agent: Agent, + ) -> GoalManager | None: + return None def run( self, @@ -56,8 +62,6 @@ def run( ), ) - goal = self.goal_manager.get_goal() if self.goal_manager else None - agent: Agent = build_agent( market_type=market_type, model=self.model, @@ -68,9 +72,12 @@ def run( functions_config=FunctionsConfig.from_system_prompt_choice( self.system_prompt_choice ), - prompt=goal.to_prompt() if goal else None, ) + if goal_manager := self.build_goal_manager(agent=agent): + goal = goal_manager.get_goal() + agent.prompt = (goal.to_prompt() if goal else None,) + # Save formatted system prompt initial_formatted_system_prompt = agent.system_prompt @@ -80,13 +87,13 @@ def run( logger.error(e) raise e finally: - if self.goal_manager: + if goal_manager: goal = check_not_none(goal) - goal_evaluation = self.goal_manager.evaluate_goal_progress( + goal_evaluation = goal_manager.evaluate_goal_progress( goal=goal, chat_history=ChatHistory.from_list_of_dicts(agent.history), ) - self.goal_manager.save_evaluated_goal( + goal_manager.save_evaluated_goal( goal=goal, evaluation=goal_evaluation, ) @@ -142,17 +149,14 @@ class DeployableMicrochainModifiableSystemPromptAgent3( class DeployableMicrochainWithGoalManagerAgent0(DeployableMicrochainAgent): task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER model = SupportedModel.gpt_4o - goal_manager = GoalManager( - agent_id=task_description, - high_level_description="You are a trader agent in prediction markets, aiming to maximise your long-term profit.", - agent_capabilities=( - "You are able to:" - "\n- List all binary markets that can be traded." - "\n- List the current outcome probabilities for each open market." - "\n- Predict the outcome probability for a market." - "\n- Buy, sell and hold outcome tokens in a market." - "\n- Query your wallet balance, and the positions you hold in open markets." - "\n- Query the past bets you've made, and their outcomes." - ), - retry_limit=3, - ) + + def build_goal_manager( + self, + agent: Agent, + ) -> GoalManager | None: + return GoalManager( + agent_id=self.task_description, + high_level_description="You are a trader agent in prediction markets, aiming to maximise your long-term profit.", + agent_capabilities=f"You have the following capabilities:\n{get_functions_summary_list(agent.engine.functions)}", + retry_limit=3, + ) diff --git a/prediction_market_agent/agents/microchain_agent/microchain_agent.py b/prediction_market_agent/agents/microchain_agent/microchain_agent.py index aeb96c71..d94c3ac0 100644 --- a/prediction_market_agent/agents/microchain_agent/microchain_agent.py +++ b/prediction_market_agent/agents/microchain_agent/microchain_agent.py @@ -147,7 +147,6 @@ def build_agent( long_term_memory: LongTermMemoryTableHandler | None = None, allow_stop: bool = True, bootstrap: str | None = None, - prompt: str | None = None, raise_on_error: bool = True, ) -> Agent: engine = Engine() @@ -204,7 +203,6 @@ def step_end_callback(agent: Agent, step_output: StepOutput) -> None: agent.system_prompt = unformatted_system_prompt.format( engine_help=agent.engine.help ) - agent.prompt = prompt if bootstrap: agent.bootstrap = [bootstrap] return agent @@ -245,3 +243,7 @@ def save_agent_history( def get_editable_prompt_from_agent(agent: Agent) -> str: return extract_updatable_system_prompt(str(agent.system_prompt)) + + +def get_functions_summary_list(functions: list[Function]) -> str: + return "\n".join([f"- {fname}: {f.description}" for fname, f in functions.items()]) From eaa6ca6acb6ebe15d5adfc55cb2c3cc16bd41ebc Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Wed, 28 Aug 2024 12:40:43 +0100 Subject: [PATCH 16/20] mypy --- .../agents/microchain_agent/microchain_agent.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/prediction_market_agent/agents/microchain_agent/microchain_agent.py b/prediction_market_agent/agents/microchain_agent/microchain_agent.py index d94c3ac0..68a90628 100644 --- a/prediction_market_agent/agents/microchain_agent/microchain_agent.py +++ b/prediction_market_agent/agents/microchain_agent/microchain_agent.py @@ -245,5 +245,7 @@ def get_editable_prompt_from_agent(agent: Agent) -> str: return extract_updatable_system_prompt(str(agent.system_prompt)) -def get_functions_summary_list(functions: list[Function]) -> str: - return "\n".join([f"- {fname}: {f.description}" for fname, f in functions.items()]) +def get_functions_summary_list(engine: Engine) -> str: + return "\n".join( + [f"- {fname}: {f.description}" for fname, f in engine.functions.items()] + ) From 14c13bfea6cba228023fe2fa312f59c9226f148a Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Wed, 28 Aug 2024 13:04:06 +0100 Subject: [PATCH 17/20] Fixes --- prediction_market_agent/agents/microchain_agent/deploy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py index a1f4fda2..a4d7b6de 100644 --- a/prediction_market_agent/agents/microchain_agent/deploy.py +++ b/prediction_market_agent/agents/microchain_agent/deploy.py @@ -76,7 +76,7 @@ def run( if goal_manager := self.build_goal_manager(agent=agent): goal = goal_manager.get_goal() - agent.prompt = (goal.to_prompt() if goal else None,) + agent.prompt = goal.to_prompt() if goal else None # Save formatted system prompt initial_formatted_system_prompt = agent.system_prompt @@ -157,6 +157,6 @@ def build_goal_manager( return GoalManager( agent_id=self.task_description, high_level_description="You are a trader agent in prediction markets, aiming to maximise your long-term profit.", - agent_capabilities=f"You have the following capabilities:\n{get_functions_summary_list(agent.engine.functions)}", + agent_capabilities=f"You have the following capabilities:\n{get_functions_summary_list(agent.engine)}", retry_limit=3, ) From b220db1b19ad77b85a8cbcf4d84097b4b37121a0 Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Wed, 28 Aug 2024 13:41:13 +0100 Subject: [PATCH 18/20] Add TRADING_AGENT_SYSTEM_PROMPT_MINIMAL, use for microchain agent with goal manager --- .../agents/microchain_agent/deploy.py | 1 + .../agents/microchain_agent/prompts.py | 23 ++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/prediction_market_agent/agents/microchain_agent/deploy.py b/prediction_market_agent/agents/microchain_agent/deploy.py index a4d7b6de..31c123ca 100644 --- a/prediction_market_agent/agents/microchain_agent/deploy.py +++ b/prediction_market_agent/agents/microchain_agent/deploy.py @@ -149,6 +149,7 @@ class DeployableMicrochainModifiableSystemPromptAgent3( class DeployableMicrochainWithGoalManagerAgent0(DeployableMicrochainAgent): task_description = AgentIdentifier.MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER model = SupportedModel.gpt_4o + system_prompt_choice = SystemPromptChoice.TRADING_AGENT_MINIMAL def build_goal_manager( self, diff --git a/prediction_market_agent/agents/microchain_agent/prompts.py b/prediction_market_agent/agents/microchain_agent/prompts.py index 9737146e..442b76b0 100644 --- a/prediction_market_agent/agents/microchain_agent/prompts.py +++ b/prediction_market_agent/agents/microchain_agent/prompts.py @@ -33,6 +33,22 @@ Only output a single function call per message. Make 'Reasoning' calls frequently - at least every other call. """ + +# This is similar to the TRADING_AGENT_SYSTEM_PROMPT, except that it doesn't +# contain any specific instructions on what to do. This is appropriate to use +# for an agent when combined with a user-prompt containing the instructions for +# the session. +TRADING_AGENT_SYSTEM_PROMPT_MINIMAL = f"""You are a helpful assistant, who specializes as an expert trader agent in prediction markets. + +{NON_UPDATABLE_DIVIDOR} + +{{engine_help}} + +Only output valid Python function calls, without code formatting characters, without any other text. i.e. it should run if passed to Python's `eval` function. +Only output a single function call per message. +Make 'Reasoning' calls frequently - at least every other call. You need to reason step by step. +""" + # Experimental system prompt for task-solving agent. TASK_AGENT_SYSTEM_PROMPT = f"""Act as a task-solving agents that picks up available tasks and solves them for getting rewards. @@ -74,6 +90,7 @@ def build_full_unformatted_system_prompt(system_prompt: str) -> str: class SystemPromptChoice(str, Enum): JUST_BORN = "just_born" TRADING_AGENT = "trading_agent" + TRADING_AGENT_MINIMAL = "trading_agent_minimal" TASK_AGENT = "task_agent" @@ -95,7 +112,10 @@ def from_system_prompt_choice( include_learning_functions = True include_trading_functions = True - elif system_prompt_choice == SystemPromptChoice.TRADING_AGENT: + elif system_prompt_choice in [ + SystemPromptChoice.TRADING_AGENT, + SystemPromptChoice.TRADING_AGENT_MINIMAL, + ]: include_trading_functions = True elif system_prompt_choice == SystemPromptChoice.TASK_AGENT: @@ -113,4 +133,5 @@ def from_system_prompt_choice( SystemPromptChoice.JUST_BORN: SYSTEM_PROMPT, SystemPromptChoice.TRADING_AGENT: TRADING_AGENT_SYSTEM_PROMPT, SystemPromptChoice.TASK_AGENT: TASK_AGENT_SYSTEM_PROMPT, + SystemPromptChoice.TRADING_AGENT_MINIMAL: TRADING_AGENT_SYSTEM_PROMPT_MINIMAL, } From a53909194cd3e5499c6559da00160ecebff36386 Mon Sep 17 00:00:00 2001 From: Evan Griffiths <56087052+evangriffiths@users.noreply.github.com> Date: Thu, 29 Aug 2024 14:18:27 +0100 Subject: [PATCH 19/20] Update pyproject.toml Co-authored-by: Peter Jung --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 27c23053..d22bcace 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ autoflake = "^2.2.1" isort = "^5.13.2" markdownify = "^0.11.6" tavily-python = "^0.3.9" -microchain-python = { git = "https://github.com/galatolofederico/microchain.git", rev = "56db91de72b6466080c26434631808cc20af670c" } +microchain-python = { git = "https://github.com/galatolofederico/microchain.git", rev = "98e601f6b7413ea48fb0b099309d686c4b10ff5c" } pysqlite3-binary = {version="^0.5.2.post3", markers = "sys_platform == 'linux'"} psycopg2-binary = "^2.9.9" sqlmodel = "^0.0.21" From 97ec5ac60910915913aff1b89725da91f9b48f5b Mon Sep 17 00:00:00 2001 From: evangriffiths Date: Thu, 29 Aug 2024 15:06:09 +0100 Subject: [PATCH 20/20] Review comment --- prediction_market_agent/agents/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prediction_market_agent/agents/utils.py b/prediction_market_agent/agents/utils.py index d7fcf4a7..ee848320 100644 --- a/prediction_market_agent/agents/utils.py +++ b/prediction_market_agent/agents/utils.py @@ -29,7 +29,7 @@ class AgentIdentifier(str, Enum): MICROCHAIN_AGENT_OMEN_LEARNING_2 = "general-agent-2" MICROCHAIN_AGENT_OMEN_LEARNING_3 = "general-agent-3" MICROCHAIN_AGENT_STREAMLIT = "microchain-streamlit-app" - MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER = "general-agent-4-with-goal-manager" + MICROCHAIN_AGENT_OMEN_WITH_GOAL_MANAGER = "trader-agent-0-with-goal-manager" MEMORIES_TO_LEARNINGS_TEMPLATE = """