Skip to content

Commit

Permalink
Format code and remove animations
Browse files Browse the repository at this point in the history
  • Loading branch information
Gongsta committed Jun 14, 2024
1 parent 62d64e4 commit 10d3df3
Show file tree
Hide file tree
Showing 8 changed files with 1,083 additions and 1,848 deletions.
927 changes: 0 additions & 927 deletions animation/scene.py

This file was deleted.

1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ matplotlib
pot # Not really needed, I found it to be too slow
scikit-learn
labml

358 changes: 193 additions & 165 deletions research/kuhn/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
python main.py --play
---
"""

from random import shuffle
import joblib
import numpy as np
Expand All @@ -24,175 +25,202 @@
PLAYER = 0
AI = 1


def get_action(strategy):
r = np.random.random()
cumulativeProbability = 0
action = 0
for a in range(len(strategy)):
action = a
cumulativeProbability += strategy[a]
if (r < cumulativeProbability): break

if action == 0:
return 'p'
else:
return 'b'

r = np.random.random()
cumulativeProbability = 0
action = 0
for a in range(len(strategy)):
action = a
cumulativeProbability += strategy[a]
if r < cumulativeProbability:
break

if action == 0:
return "p"
else:
return "b"


def get_strategy(card, strategy=0):
"""
strategy=0 -> CFR
stragegy=1 -> Pass if you have 1, Bet if you have 3, and play 50% of your hands with 2
stragegy=2 -> Always pass
stragegy=3 -> Always bet
"""
if strategy == 0:
return get_action(nodeMap[str(card)].getAverageStrategy())
elif strategy == 1:
if card == 1:
return 'p'
elif card == 3:
return 'b'
else:
r = np.random.random()
if r <= 0.5:
return 'p'
else:
return 'b'
elif strategy == 2:
return 'p'
elif strategy == 3:
return 'b'
"""
strategy=0 -> CFR
stragegy=1 -> Pass if you have 1, Bet if you have 3, and play 50% of your hands with 2
stragegy=2 -> Always pass
stragegy=3 -> Always bet
stragegy=4 -> Random (50% pass, 50% bet)
"""
if strategy == 0:
return get_action(nodeMap[str(card)].getAverageStrategy())
elif strategy == 1:
if card == 1:
return "p"
elif card == 3:
return "b"
else:
r = np.random.random()
if r <= 0.5:
return "p"
else:
return "b"
elif strategy == 2:
return "p"
elif strategy == 3:
return "b"
elif strategy == 4:
choices = ["p", "b"]
shuffle(choices)
return choices[0]


def terminal(history):
if (len(history) > 1) and (history[-1] == 'p' or history[-2:] == "bb"):
return True
else:
return False
if (len(history) > 1) and (history[-1] == "p" or history[-2:] == "bb"):
return True
else:
return False


if __name__ == "__main__":
score = [0, 0] # [PLAYER_SCORE, AI_SCORE]
# Load the nodeMap
try:
nodeMap: Node = joblib.load("KuhnNodeMap.joblib")
except:
print("Could not load nodeMap. Please train the model first by running: python main.py")
exit()

first_player_to_move = 0

parser = argparse.ArgumentParser(description='Play Kuhn Poker against the best AI possible.')
parser.add_argument("-p", "--play",
action="store_true", dest="user_input", default=False,
help="Manually play against the AI through the terminal.")
parser.add_argument("-v", "--verbose",
action="store_true", dest="verbose", default=False,
help="Manually play against the AI through the terminal.")

args = parser.parse_args()
user_input = args.user_input
verbose = args.verbose # In case you want to see each game printed out in the terminal while running the simulation

user_scores_over_time = []
opponent_scores_over_time = []

cards = [1,2,3] # index 0 is for PLAYER, index 1 is for AI
for _ in range(1000000):
# Setup a new round
history = ""
first_player_to_move += 1 # Alternate players to play each round
first_player_to_move %= 2
player_to_move = first_player_to_move
shuffle(cards)

if user_input or verbose:
print("--------------------------")
print("Current Scoreboard:")
print("You: {}, Opponent: {}\n".format(score[0], score[1]))
print("You have been dealt a:", cards[0])

# Alternate every round between the players playing first
if player_to_move == PLAYER:
if user_input: # Manual Input
action = input('Please decide whether to pass or bet ("p" or "b"): ')
else: # Get a hardcoded trategy
action = get_strategy(cards[0], 1)
else:
action = get_strategy(cards[1])
if user_input or verbose:
print("Your opponent has decided to play:", action)

history += action

while not terminal(history):
plays = len(history)
player = (player_to_move + plays) % 2

if player == PLAYER:
if user_input:
action = input('Please decide whether to pass or bet ("p" or "b"): ')
else:
action = get_strategy(cards[0], 1)
else:
action = get_strategy(cards[1])
if user_input or verbose:
print("Your opponent has decided to play:", action)

history += action

# Return payoff for terminal states
terminalPass = history[-1] == 'p'
doubleBet = history[-2:] == "bb"
isPlayerCardHigher = cards[0] > cards[1]


temp_score = [0, 0]
if terminalPass:
if history == "pp":
if isPlayerCardHigher:
temp_score[0] += 1
temp_score[1] -= 1

else:
temp_score[0] -= 1
temp_score[1] += 1
else: # Equivalent to folding
temp_score[(first_player_to_move + len(history)) % 2] += 1
temp_score[(first_player_to_move + len(history) + 1) % 2] -= 1

elif doubleBet:
if isPlayerCardHigher:
temp_score[0] += 2
temp_score[1] -= 2

else:
temp_score[0] -= 2
temp_score[1] += 2

if user_input or verbose:
if temp_score[0] > temp_score[1]:
print("Congratulations, you won the round with {} extra chips!\n".format(temp_score[0]))
else:
print("You lost to a {} :( You lose {} chips.\n".format(cards[1], temp_score[1]))

score[0] += temp_score[0]
score[1] += temp_score[1]

# Score scores so it can be plotted afterwards
user_scores_over_time.append(score[0])
opponent_scores_over_time.append(score[1])

plt.plot(user_scores_over_time)
plt.plot(opponent_scores_over_time)
if user_input:
plt.legend(['User Strategy', "CFR Strategy"], loc="upper left")
else:
plt.legend(['Deterministic Strategy', "CFR Strategy"], loc="upper left")
plt.xlabel("Number of Rounds")
plt.ylabel("Number of Chips Gained")
# plt.savefig("AI_score_over_time.png", bbox_inches='tight') # Uncomment to save the figure
plt.show()




score = [0, 0] # [PLAYER_SCORE, AI_SCORE]
# Load the nodeMap
try:
nodeMap: Node = joblib.load("KuhnNodeMap.joblib")
print("NodeMap loaded:")
print("InfoSet - Actions Probability")
for key in nodeMap.keys():
# print(key, f"{float(nodeMap[key].getAverageStrategy()):.2f}")
print(key, "-", [round(val, 2) for val in nodeMap[key].getAverageStrategy()])
except:
print("Could not load nodeMap. Please train the model first by running: python main.py")
exit()

first_player_to_move = 0

parser = argparse.ArgumentParser(description="Play Kuhn Poker against the best AI possible.")
parser.add_argument(
"-p",
"--play",
action="store_true",
dest="user_input",
default=False,
help="Manually play against the AI through the terminal.",
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
dest="verbose",
default=False,
help="Manually play against the AI through the terminal.",
)

args = parser.parse_args()
user_input = args.user_input
verbose = (
args.verbose
) # In case you want to see each game printed out in the terminal while running the simulation

user_scores_over_time = []
opponent_scores_over_time = []

cards = [1, 2, 3] # index 0 is for PLAYER, index 1 is for AI
for _ in range(1000000):
# Setup a new round
history = ""
first_player_to_move += 1 # Alternate players to play each round
first_player_to_move %= 2
player_to_move = first_player_to_move
shuffle(cards)

if user_input or verbose:
print("--------------------------")
print("Current Scoreboard:")
print("You: {}, Opponent: {}\n".format(score[0], score[1]))
print("You have been dealt a:", cards[0])

# Alternate every round between the players playing first
if player_to_move == PLAYER:
if user_input: # Manual Input
action = input('Please decide whether to pass or bet ("p" or "b"): ')
else: # Get a hardcoded trategy
action = get_strategy(cards[0], 4)
else:
action = get_strategy(cards[1])
if user_input or verbose:
print("Your opponent has decided to play:", action)

history += action

while not terminal(history):
plays = len(history)
player = (player_to_move + plays) % 2

if player == PLAYER:
if user_input:
action = input('Please decide whether to pass or bet ("p" or "b"): ')
else:
action = get_strategy(cards[0], 1)
else:
action = get_strategy(cards[1])
if user_input or verbose:
print("Your opponent has decided to play:", action)

history += action

# Return payoff for terminal states
terminalPass = history[-1] == "p"
doubleBet = history[-2:] == "bb"
isPlayerCardHigher = cards[0] > cards[1]

temp_score = [0, 0]
if terminalPass:
if history == "pp":
if isPlayerCardHigher:
temp_score[0] += 1
temp_score[1] -= 1

else:
temp_score[0] -= 1
temp_score[1] += 1
else: # Equivalent to folding
temp_score[(first_player_to_move + len(history)) % 2] += 1
temp_score[(first_player_to_move + len(history) + 1) % 2] -= 1

elif doubleBet:
if isPlayerCardHigher:
temp_score[0] += 2
temp_score[1] -= 2

else:
temp_score[0] -= 2
temp_score[1] += 2

if user_input or verbose:
if temp_score[0] > temp_score[1]:
print(
"Congratulations, you won the round with {} extra chips!\n".format(
temp_score[0]
)
)
else:
print("You lost to a {} :( You lose {} chips.\n".format(cards[1], temp_score[1]))

score[0] += temp_score[0]
score[1] += temp_score[1]

# Score scores so it can be plotted afterwards
user_scores_over_time.append(score[0])
opponent_scores_over_time.append(score[1])

print(history)

plt.plot(user_scores_over_time)
plt.plot(opponent_scores_over_time)
if user_input:
plt.legend(["User Strategy", "CFR Strategy"], loc="upper left")
else:
plt.legend(["Deterministic Strategy", "CFR Strategy"], loc="upper left")
plt.xlabel("Number of Rounds")
plt.ylabel("Number of Chips Gained")
# plt.savefig("AI_score_over_time.png", bbox_inches='tight') # Uncomment to save the figure
plt.show()
Loading

0 comments on commit 10d3df3

Please sign in to comment.