-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use newest PMAT (Metaculus fixes, supported_markets property) #529
Changes from 3 commits
9d74498
4403e61
d239fce
6475688
aa37a41
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,84 +1,82 @@ | ||
import sys | ||
from typing import Sequence | ||
|
||
from prediction_market_agent_tooling.deploy.agent import DeployableAgent | ||
from prediction_market_agent_tooling.deploy.agent import DeployablePredictionAgent | ||
from prediction_market_agent_tooling.gtypes import Probability | ||
from prediction_market_agent_tooling.loggers import logger | ||
from prediction_market_agent_tooling.markets.agent_market import FilterBy, SortBy | ||
from prediction_market_agent_tooling.markets.agent_market import ( | ||
AgentMarket, | ||
FilterBy, | ||
SortBy, | ||
) | ||
from prediction_market_agent_tooling.markets.data_models import ProbabilisticAnswer | ||
from prediction_market_agent_tooling.markets.markets import MarketType | ||
from prediction_market_agent_tooling.markets.metaculus.metaculus import ( | ||
MetaculusAgentMarket, | ||
) | ||
from prediction_market_agent_tooling.tools.utils import check_not_none | ||
|
||
from prediction_market_agent.agents.think_thoroughly_agent.think_thoroughly_agent import ( | ||
ThinkThoroughlyWithItsOwnResearch, | ||
from prediction_market_agent.agents.prophet_agent.deploy import ( | ||
DeployablePredictionProphetGPTo1PreviewAgent, | ||
) | ||
from prediction_market_agent.utils import DEFAULT_OPENAI_MODEL | ||
|
||
WARMUP_TOURNAMENT_ID = 3294 | ||
TOURNAMENT_ID = 3349 | ||
|
||
|
||
class DeployableMetaculusBotTournamentAgent(DeployableAgent): | ||
model: str = DEFAULT_OPENAI_MODEL | ||
class DeployableMetaculusBotTournamentAgent(DeployablePredictionAgent): | ||
bet_on_n_markets_per_run: int = ( | ||
sys.maxsize | ||
) # On Metaculus "betting" is free, we can just bet on everything available in one run. | ||
dummy_prediction: bool = False | ||
repeat_predictions: bool = False | ||
tournament_id: int = TOURNAMENT_ID | ||
supported_markets = [MarketType.METACULUS] | ||
|
||
def run( | ||
self, | ||
market_type: MarketType = MarketType.METACULUS, | ||
) -> None: | ||
""" | ||
Submit predictions to Metaculus markets using the CrewAIAgentSubquestions | ||
|
||
https://www.metaculus.com/notebooks/25525/announcing-the-ai-forecasting-benchmark-series--july-8-120k-in-prizes/ | ||
""" | ||
|
||
if market_type != MarketType.METACULUS: | ||
raise ValueError("Only Metaculus markets are supported for this agent") | ||
|
||
agent = ThinkThoroughlyWithItsOwnResearch( | ||
model=self.model, enable_langfuse=self.enable_langfuse, memory=False | ||
def load(self) -> None: | ||
# Using this one because it had the lowest `p_yes mse` from the `match_bets_with_langfuse_traces.py` evaluation at the time of writing this. | ||
self.agent = DeployablePredictionProphetGPTo1PreviewAgent( | ||
enable_langfuse=self.enable_langfuse | ||
) | ||
|
||
def get_markets(self, market_type: MarketType) -> Sequence[AgentMarket]: # type: ignore # TODO: Needs to be decided in https://github.com/gnosis/prediction-market-agent/pull/511#discussion_r1810034688 and then I'll implement it here. | ||
markets: Sequence[ | ||
MetaculusAgentMarket | ||
] = MetaculusAgentMarket.get_binary_markets( | ||
limit=sys.maxsize, | ||
limit=self.bet_on_n_markets_per_run, | ||
tournament_id=self.tournament_id, | ||
filter_by=FilterBy.OPEN, | ||
sort_by=SortBy.NEWEST, | ||
) | ||
logger.info(f"Found {len(markets)} open markets to submit predictions for.") | ||
return markets | ||
|
||
if not self.repeat_predictions: | ||
# Filter out markets that we have already answered | ||
markets = [market for market in markets if not market.have_predicted] | ||
logger.info( | ||
f"Found {len(markets)} unanswered markets to submit predictions for." | ||
) | ||
def verify_market(self, market_type: MarketType, market: AgentMarket) -> bool: | ||
assert isinstance( | ||
market, MetaculusAgentMarket | ||
), "Just making mypy happy. It's true thanks to the check in the `run` method via `supported_markets`." | ||
|
||
for market in markets: | ||
logger.info(f"Answering market {market.id}, question: {market.question}") | ||
if not self.dummy_prediction: | ||
# TODO incorporate 'Resolution criteria', 'Fine print', and | ||
# 'Background info' into the prompt given to the agent. | ||
answer = agent.answer_binary_market( | ||
market.question, created_time=market.created_time | ||
) | ||
else: | ||
answer = ProbabilisticAnswer( | ||
p_yes=Probability(0.5), | ||
reasoning="Just a test.", | ||
confidence=0.5, | ||
) | ||
# Filter out the market if the agent isn't configured to re-bet. | ||
if not self.repeat_predictions and market.have_predicted: | ||
return False | ||
|
||
if answer is None: | ||
logger.error("No answer was given. Skipping") | ||
else: | ||
market.submit_prediction( | ||
p_yes=answer.p_yes, | ||
reasoning=check_not_none(answer.reasoning), | ||
) | ||
# Otherwise all markets on Metaculus are fine. | ||
return True | ||
kongzii marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def answer_binary_market(self, market: AgentMarket) -> ProbabilisticAnswer | None: | ||
assert isinstance( | ||
market, MetaculusAgentMarket | ||
), "Just making mypy happy. It's true thanks to the check in the `run` method via `supported_markets`." | ||
logger.info(f"Answering market {market.id}, question: {market.question}") | ||
answer: ProbabilisticAnswer | None | ||
if not self.dummy_prediction: | ||
full_question = f"""Question: {market.question} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The previous Metaculus tournament wasn't tracked in Langfuse, so there is not much to debug. Thanks to this refactoring it will be tracked now, example: https://cloud.langfuse.com/project/clv2hvvyw0006z9uchz6u1irw/traces/15a2dd9d-20e7-4ef3-9235-c41050064661?observation=3c1861b6-8866-4e27-8182-4196d091e08e I thought this is a fine way to add additional fields to the existing agents -- if it turns out that it works, we could make it officially like this. Something like the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🤞 |
||
Question's description: {market.description} | ||
Question's fine print: {market.fine_print} | ||
Question's resolution criteria: {market.resolution_criteria}""" | ||
answer = self.agent.agent.predict(full_question).outcome_prediction | ||
kongzii marked this conversation as resolved.
Show resolved
Hide resolved
|
||
else: | ||
answer = ProbabilisticAnswer( | ||
p_yes=Probability(0.5), | ||
reasoning="Just a test.", | ||
confidence=0.5, | ||
) | ||
Comment on lines
+77
to
+81
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh haha! it must be right then... |
||
return answer | ||
kongzii marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -78,7 +78,9 @@ def predict( | |||||||||||||
enable_langfuse: bool, | ||||||||||||||
) -> None: | ||||||||||||||
agent = AgentClass( | ||||||||||||||
place_bet=False, | ||||||||||||||
place_trades=False, | ||||||||||||||
store_prediction=False, | ||||||||||||||
store_trades=False, | ||||||||||||||
Comment on lines
+81
to
+83
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Consider making agent behavior configurable. Currently, all new parameters ( Consider adding these as configurable options in the Streamlit UI: agent = AgentClass(
- place_trades=False,
- store_prediction=False,
- store_trades=False,
+ place_trades=st.checkbox("Place trades", value=False),
+ store_prediction=st.checkbox("Store prediction", value=False),
+ store_trades=st.checkbox("Store trades", value=False),
enable_langfuse=enable_langfuse,
) 📝 Committable suggestion
Suggested change
Function signature doesn't match the implementation. The function signature of Update the function signature to match the implementation: def predict(
AgentClass: SupportedAgentType,
market_source: MarketType,
market: AgentMarket,
skip_market_verification: bool,
enable_langfuse: bool,
+ place_trades: bool = False,
+ store_prediction: bool = False,
+ store_trades: bool = False,
) -> None:
|
||||||||||||||
enable_langfuse=enable_langfuse, | ||||||||||||||
) | ||||||||||||||
|
||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🛠️ Refactor suggestion
Consider using runtime type checking instead of assertions.
While the assertion helps with type checking, it has some limitations:
Consider this alternative implementation that provides better runtime safety:
This approach:
📝 Committable suggestion