diff --git a/game/poker_main.py b/game/poker_main.py index bc9f8fb..452866e 100644 --- a/game/poker_main.py +++ b/game/poker_main.py @@ -13,37 +13,17 @@ pygame.mixer.init() # For sounds SCALE = 1 -WIDTH, HEIGHT = 1280, 720 -# WIDTH, HEIGHT = 1289, 791 -# WIDTH, HEIGHT = 1600, 900 +WIDTH, HEIGHT = 1280, 720 # you can resize this dynamically afterwards -WIN = pygame.display.set_mode((WIDTH, HEIGHT)) +WIN = pygame.display.set_mode((WIDTH, HEIGHT), pygame.RESIZABLE) pygame.display.set_caption("Poker By Steven Gong") - WHITE = (255, 255, 255) BLACK = (25, 25, 25) GREEN = (0, 255, 0) RED = (255, 0, 0) FPS = 60 -POKER_BACKGROUND = pygame.transform.scale( - pygame.image.load("assets/poker-table.png"), (WIDTH, HEIGHT) -) - -FLOP_1_CARD_POSITION = (400, HEIGHT / 2 - 65) -FLOP_2_CARD_POSITION = (490, HEIGHT / 2 - 65) -FLOP_3_CARD_POSITION = (580, HEIGHT / 2 - 65) -TURN_CARD_POSITION = (670, HEIGHT / 2 - 65) -RIVER_CARD_POSITION = (760, HEIGHT / 2 - 65) - -PLAYER_CARD_1 = (550, HEIGHT - 220) -PLAYER_CARD_2 = (600, HEIGHT - 220) - -OPPONENT_CARD_1 = (550, 35) -OPPONENT_CARD_2 = (600, 35) - - INVERSE_RANK_KEY = { 14: "A", 2: "02", @@ -59,82 +39,136 @@ 12: "Q", 13: "K", } -"""Events -post event: pygame.event.post(pygame.event.Event(pygame.USER_EVENT + 1)) -And then you check this event in the while True loop -""" +# INITIALIZE GLOBAL VARIABLES +FLOP_1_CARD_POSITION = None +FLOP_2_CARD_POSITION = None +FLOP_3_CARD_POSITION = None +TURN_CARD_POSITION = None +RIVER_CARD_POSITION = None +PLAYER_CARD_1 = None +PLAYER_CARD_2 = None +OPPONENT_CARD_1 = None +OPPONENT_CARD_2 = None +DEALER_BUTTON = None +DEALER_BUTTON_POSITION_1 = None +DEALER_BUTTON_POSITION_2 = None +CARD_BACK = None +POT_FONT = None +BET_BUTTON_FONT = None +BET_FONT = None +PLAYERS_FONT = None +START_NEW_ROUND_BUTTON = None +fold_rect = None +check_rect = None +custom_rect = None +start_new_round_rect = None +buttons = None +input_box = None +POKER_BACKGROUND = None + +scale_factor = 1 +color_inactive = pygame.Color("lightskyblue3") +color_active = pygame.Color("dodgerblue2") +color = color_inactive +active = False +input_bet_text = "" +warning_text = "" +done = False +cursor_counter = 0 -dealer_button = pygame.transform.scale(pygame.image.load("assets/dealer_button.png"), (30, 30)) -CARD_BACK = pygame.transform.scale(pygame.image.load("../assets/back.png"), (263 / 3, 376 / 3)) +def resize(width, height): + global scale_factor, WIN + scale_factor = width / WIDTH + WIN = pygame.display.set_mode((width, height), pygame.RESIZABLE) + update_asset_positions() -POT_FONT = pygame.font.SysFont("Roboto", 30, bold=True) -BET_BUTTON_FONT = pygame.font.SysFont("Roboto", 24, bold=True) -BET_FONT = pygame.font.SysFont("Roboto", 26, bold=True) -PLAYERS_FONT = pygame.font.SysFont("Roboto", 24, bold=True) -# To rescale: pygame.transform.scale(card, (width, height)) -# pygame.transform.rotate(card, degrees) +def scale_tuple(tup, factor): + return tuple([x * factor for x in tup]) -# Pause time: pygame.time.delay(5000) -# rect = Pygame.Rect(x,y, width, height) -# You can then access coordinates with rect.x, rect.y, rect.width, rect.height, +def update_asset_positions(): + global FLOP_1_CARD_POSITION, FLOP_2_CARD_POSITION, FLOP_3_CARD_POSITION, TURN_CARD_POSITION, RIVER_CARD_POSITION, PLAYER_CARD_1, PLAYER_CARD_2, OPPONENT_CARD_1, OPPONENT_CARD_2 + global DEALER_BUTTON, DEALER_BUTTON_POSITION_1, DEALER_BUTTON_POSITION_2, CARD_BACK, POT_FONT, BET_BUTTON_FONT, BET_FONT, PLAYERS_FONT, fold_rect, check_rect, custom_rect, start_new_round_rect, buttons, input_box, POKER_BACKGROUND -# checking for collisions, use colliderectd + FLOP_1_CARD_POSITION = scale_tuple((400, HEIGHT / 2 - 65), scale_factor) + FLOP_2_CARD_POSITION = scale_tuple((490, HEIGHT / 2 - 65), scale_factor) + FLOP_3_CARD_POSITION = scale_tuple((580, HEIGHT / 2 - 65), scale_factor) + TURN_CARD_POSITION = scale_tuple((670, HEIGHT / 2 - 65), scale_factor) + RIVER_CARD_POSITION = scale_tuple((760, HEIGHT / 2 - 65), scale_factor) + PLAYER_CARD_1 = scale_tuple((WIDTH / 2 - 70, HEIGHT - 220), scale_factor) + PLAYER_CARD_2 = scale_tuple((WIDTH / 2, HEIGHT - 220), scale_factor) + OPPONENT_CARD_1 = scale_tuple((WIDTH / 2 - 70, 35), scale_factor) + OPPONENT_CARD_2 = scale_tuple((WIDTH / 2, 35), scale_factor) + DEALER_BUTTON = pygame.transform.scale( + pygame.image.load("assets/dealer_button.png"), scale_tuple((30, 30), scale_factor) + ) + DEALER_BUTTON_POSITION_1 = scale_tuple((500, HEIGHT - 200), scale_factor) + DEALER_BUTTON_POSITION_2 = scale_tuple((515, 120), scale_factor) -# BUTTONS -fold_rect = pygame.Rect(800, HEIGHT - 80, 80, 45) -check_rect = pygame.Rect(887, HEIGHT - 80, 100, 45) # Can also be call button -custom_rect = pygame.Rect(995, HEIGHT - 80, 80, 45) -buttons = [fold_rect, check_rect, custom_rect] + CARD_BACK = pygame.transform.scale( + pygame.image.load("../assets/back.png"), scale_tuple((263 / 3, 376 / 3), scale_factor) + ) + POT_FONT = pygame.font.SysFont("Roboto", int(scale_factor * 30), bold=True) + BET_BUTTON_FONT = pygame.font.SysFont("Roboto", int(scale_factor * 24), bold=True) + BET_FONT = pygame.font.SysFont("Roboto", int(scale_factor * 26), bold=True) + PLAYERS_FONT = pygame.font.SysFont("Roboto", int(scale_factor * 30), bold=True) + + fold_rect = pygame.Rect(*scale_tuple((800, HEIGHT - 80, 80, 45), scale_factor)) + check_rect = pygame.Rect( + *scale_tuple((887, HEIGHT - 80, 100, 45), scale_factor) + ) # Can also be call button + custom_rect = pygame.Rect(*scale_tuple((995, HEIGHT - 80, 80, 45), scale_factor)) + start_new_round_rect = pygame.Rect( + *scale_tuple((WIDTH - 300, HEIGHT - 80, 250, 45), scale_factor) + ) + buttons = [fold_rect, check_rect, custom_rect, start_new_round_rect] -input_box = pygame.Rect(1060, HEIGHT - 80, 140, 45) -color_inactive = pygame.Color("lightskyblue3") -color_active = pygame.Color("dodgerblue2") -color = color_inactive -active = False -input_bet_text = "" -warning_text = "" -done = False + input_box = pygame.Rect(*scale_tuple((1060, HEIGHT - 80, 140, 45), scale_factor)) + + POKER_BACKGROUND = pygame.transform.scale( + pygame.image.load("assets/poker-table.png"), scale_tuple((WIDTH, HEIGHT), scale_factor) + ) + + +update_asset_positions() -cursor_counter = 0 def load_card_image(card: Card): # 263 × 376 return pygame.transform.scale( - pygame.image.load("../assets/" + str(card) + ".png"), (263 / 3, 376 / 3) + pygame.image.load("../assets/" + str(card) + ".png"), + scale_tuple((263 / 3, 376 / 3), scale_factor), ) -def display_total_pot_balance(env: PokerEnvironment): - pot_information = POT_FONT.render("Total Pot: $" + str(env.total_pot_balance), 1, WHITE) - WIN.blit(pot_information, (900, HEIGHT / 2 - 30)) - -def display_stage_pot_balance(env: PokerEnvironment): - pot_information = POT_FONT.render("Current Pot: $" + str(env.stage_pot_balance), 1, WHITE) - WIN.blit(pot_information, (900, HEIGHT / 2)) +def display_total_pot_balance(env: PokerEnvironment): + pot_information = POT_FONT.render( + "Total Pot: $" + str(env.total_pot_balance + env.stage_pot_balance), 1, WHITE + ) + WIN.blit(pot_information, scale_tuple((875, HEIGHT / 2 - 15), scale_factor)) def display_user_balance(env: PokerEnvironment): player_balance = PLAYERS_FONT.render( - "$" + str(env.players[0].player_balance - env.players[0].current_bet), 1, GREEN + "$" + str((env.players[0].player_balance - env.players[0].current_bet)), 1, GREEN ) - WIN.blit(player_balance, (WIDTH / 2 + 80, HEIGHT - 200)) + WIN.blit(player_balance, scale_tuple((WIDTH / 2 + 130, HEIGHT - 200), scale_factor)) def display_opponent_balance(env: PokerEnvironment): opponent_balance = PLAYERS_FONT.render( - "$" + str(env.players[1].player_balance - env.players[1].current_bet), 1, GREEN + "$" + str((env.players[1].player_balance - env.players[1].current_bet)), 1, GREEN ) - WIN.blit(opponent_balance, (WIDTH / 2 + 80, 100)) + WIN.blit(opponent_balance, scale_tuple((WIDTH / 2 + 130, 100), scale_factor)) def display_user_bet(env: PokerEnvironment): pot_information = BET_FONT.render("Bet: $" + str(env.players[0].current_bet), 1, WHITE) - WIN.blit(pot_information, (WIDTH / 2 - 30, HEIGHT - 280)) + WIN.blit(pot_information, scale_tuple((WIDTH / 2 - 30, HEIGHT - 280), scale_factor)) def display_opponent_bet(env: PokerEnvironment): @@ -143,17 +177,27 @@ def display_opponent_bet(env: PokerEnvironment): def display_sessions_winnings(env: PokerEnvironment): - winnings = sum(env.players_balance_history[0]) + # winnings = sum(env.players_balance_history[0]) + winnings = 0 if winnings < 0: text = POT_FONT.render("Session Winnings: -$" + str(-winnings), 1, WHITE) else: text = POT_FONT.render("Session Winnings: $" + str(winnings), 1, WHITE) + + +def display_turn(env: PokerEnvironment): + if env.position_in_play == 0: # AI + text = POT_FONT.render("YOUR TURN", 1, WHITE) + else: + text = POT_FONT.render("OPPONENT TURN", 1, RED) WIN.blit(text, (70, 40)) def display_user_cards(env: PokerEnvironment): WIN.blit(load_card_image(env.players[0].hand[0]), PLAYER_CARD_1) WIN.blit(load_card_image(env.players[0].hand[1]), PLAYER_CARD_2) + # WIN.blit(CARD_BACK, PLAYER_CARD_1) + # WIN.blit(CARD_BACK, PLAYER_CARD_2) def display_opponent_cards(env: PokerEnvironment): @@ -183,13 +227,12 @@ def display_community_cards(env: PokerEnvironment): def display_dealer_button(env: PokerEnvironment): if env.dealer_button_position == 0: # User is the dealer - WIN.blit(dealer_button, (500, HEIGHT - 200)) + WIN.blit(DEALER_BUTTON, DEALER_BUTTON_POSITION_1) else: # Opponent is the dealer - WIN.blit(dealer_button, (515, 120)) + WIN.blit(DEALER_BUTTON, DEALER_BUTTON_POSITION_2) def draw_window(env: PokerEnvironment, god_mode=False, user_input=False): - WIN.blit(POKER_BACKGROUND, (0, 0)) if env.showdown and env.end_of_round(): # Reveal opponent's cards at showdown @@ -207,7 +250,6 @@ def draw_window(env: PokerEnvironment, god_mode=False, user_input=False): # Display Pot Information display_total_pot_balance(env) - display_stage_pot_balance(env) display_dealer_button(env) # TODO: Display Current bet information @@ -221,7 +263,9 @@ def draw_window(env: PokerEnvironment, god_mode=False, user_input=False): # Display Session Winnings display_sessions_winnings(env) - # if env.showdown and env.end_of_round(): # Show who won + # Display turn + display_turn(env) + if env.end_of_round(): winning_players = env.get_winning_players_idx() if len(winning_players) == 2: # Split the pot @@ -231,42 +275,60 @@ def draw_window(env: PokerEnvironment, god_mode=False, user_input=False): else: text = BET_FONT.render("You lost.", 1, WHITE) - WIN.blit(text, (250, 350)) + WIN.blit(text, scale_tuple((250, 350), scale_factor)) + + start_new_round = BET_BUTTON_FONT.render("Start New Round", 1, WHITE) + + AAfilledRoundedRect(WIN, RED, start_new_round_rect, radius=0.4) + WIN.blit( + start_new_round, + (start_new_round_rect.x + 28 * scale_factor, start_new_round_rect.y + 7 * scale_factor), + ) # Pressable Buttons for Check / Fold / Raise. Only display buttons if it is your turn warning_text_rendered = BET_FONT.render(warning_text, 1, RED) WIN.blit(warning_text_rendered, (WIDTH - 250, HEIGHT - 120)) if user_input: - if env.position_in_play == 0 or env.play_as_AI: - # AAfilledRoundedRect(WIN, RED, pygame.Rect(392,400, 120,50), radius=0.4) + # AAfilledRoundedRect(WIN, RED, pygame.Rect(392,400, 120,50), radius=0.4) + if not env.end_of_round() and not env.players[env.position_in_play].is_AI: AAfilledRoundedRect(WIN, RED, check_rect, radius=0.4) AAfilledRoundedRect(WIN, RED, custom_rect, radius=0.4) AAfilledRoundedRect(WIN, WHITE, input_box, radius=0.4) + AAfilledRoundedRect(WIN, RED, fold_rect, radius=0.4) - if "f" in env.infoset.actions(): - AAfilledRoundedRect(WIN, RED, fold_rect, radius=0.4) + if "f" in env.valid_actions(): fold_bet = BET_BUTTON_FONT.render("Fold", 1, WHITE) - WIN.blit(fold_bet, (fold_rect.x + 15, fold_rect.y + 7)) + WIN.blit( + fold_bet, (fold_rect.x + 15 * scale_factor, fold_rect.y + 9 * scale_factor) + ) - if "k" in env.infoset.actions(): + if "k" in env.valid_actions(): check_bet = BET_BUTTON_FONT.render("Check", 1, WHITE) - WIN.blit(check_bet, (check_rect.x + 15, check_rect.y + 7)) - else: # TODO: Min bet size is not 0 when you are the small blind, so it should be call, not check right. - # I forgot how the logic is handled for the preflop betting sizes + WIN.blit( + check_bet, (check_rect.x + 15 * scale_factor, check_rect.y + 9 * scale_factor) + ) + else: call_bet = BET_BUTTON_FONT.render("Call", 1, WHITE) - WIN.blit(call_bet, (check_rect.x + 28, check_rect.y + 7)) + WIN.blit( + call_bet, (check_rect.x + 28 * scale_factor, check_rect.y + 9 * scale_factor) + ) # TODO: Handle edges cases where these buttons are impossible, in which case you need to grey it out custom_bet = BET_BUTTON_FONT.render("Bet", 1, WHITE) - WIN.blit(custom_bet, (custom_rect.x + 15, custom_rect.y + 7)) + WIN.blit( + custom_bet, (custom_rect.x + 15 * scale_factor, custom_rect.y + 9 * scale_factor) + ) custom_input_bet_text = BET_BUTTON_FONT.render(input_bet_text, 1, BLACK) - WIN.blit(custom_input_bet_text, (input_box.x + 7, input_box.y + 7)) + WIN.blit( + custom_input_bet_text, + (input_box.x + 7 * scale_factor, input_box.y + 9 * scale_factor), + ) - if cursor_counter < 15 and active: - pygame.draw.rect( - WIN, (0, 0, 0), (WIDTH - 210 + 13 * len(input_bet_text), HEIGHT - 70, 1, 20), 1 - ) + if cursor_counter < 15 and active: + pygame.draw.rect( + WIN, (0, 0, 0), (WIDTH - 210 + 13 * len(input_bet_text), HEIGHT - 70, 1, 20), 1 + ) pygame.display.update() @@ -310,7 +372,6 @@ def main(): game = 0 game_i = 0 - env: PokerEnvironment = PokerEnvironment() # if user_input or replay: # env.add_player() # You / replay @@ -323,21 +384,24 @@ def main(): # env.add_AI_player() # Opponent # play as the AI env.add_AI_player() - env.add_player() # play as the opponent too + env.add_AI_player() + # env.add_player() + # env.add_player() # play as the opponent too + env.start_new_round() clock = pygame.time.Clock() run = True def place_custom_bet(): global input_bet_text, warning_text - if input_bet_text != "": - bet = "b" + input_bet_text - print(bet) - if bet in env.history.actions(): - env.handle_game_stage(bet) - input_bet_text = "" - warning_text = "" - else: + if input_bet_text != "" and input_bet_text.isdigit(): + bet = "b" + str(int(input_bet_text)) + if int(input_bet_text) == env.get_highest_current_bet(): + warning_text = "Cannot bet the same amount" + return + + env.handle_game_stage(bet) + if int(input_bet_text) != env.get_highest_current_bet(): warning_text = "Invalid bet size" while run: @@ -347,33 +411,37 @@ def place_custom_bet(): if user_input or replay: # If you want to render PyGame clock.tick(FPS) - handler_called = False + if env.players[env.position_in_play].is_AI: + env.handle_game_stage() for event in pygame.event.get(): if event.type == pygame.QUIT: run = False - # elif event.type == pygame.VIDEORESIZE: # For resizing of the window - # global WIN - # WIN = pygame.display.set_mode((event.w, event.h), pygame.RESIZABLE) + elif event.type == pygame.VIDEORESIZE: # For resizing of the window + resize(event.w, event.h) # Check if the buttons are clicked, only process if it is our turn if user_input: - if event.type == pygame.MOUSEBUTTONDOWN and env.position_in_play == 0: - for i in range(len(buttons)): # Check for willision with the three buttons + # and env.position_in_play == 0, we can make decisions as the opponent too + if event.type == pygame.MOUSEBUTTONDOWN: + for i in range(len(buttons)): if buttons[i].collidepoint(pygame.mouse.get_pos()): warning_text = "" - # TODO: Change this for no-limit version if i == 0: env.handle_game_stage("f") # Fold elif i == 1: - if "k" in env.history.actions(): + if "k" in env.valid_actions(): env.handle_game_stage("k") # Check else: env.handle_game_stage("c") # Call elif i == 2: place_custom_bet() + elif i == 3 and env.end_of_round(): + env.start_new_round() + else: + continue - handler_called = True + input_bet_text = "" break if event.type == pygame.MOUSEBUTTONDOWN: # If the user clicked on the input_box rect. @@ -394,48 +462,40 @@ def place_custom_bet(): else: input_bet_text += event.unicode - if not handler_called: - if replay: - if game_i == 0: # New game, update player's hands - # TODO: Show the appropriate community cards. Right now it shows the right player cards, but the board is still the old way. - # TODO: This is a little buggy right now too. It doesn't show the right cards. - env.players[0].hand = [ - Card(rank_suit=history[game]["player_cards"][0]), - Card(rank_suit=history[game]["player_cards"][1]), - ] - env.players[1].hand = [ - Card(rank_suit=history[game]["opponent_cards"][0]), - Card(rank_suit=history[game]["opponent_cards"][1]), - ] - - env.handle_game_stage(history[game]["history"][game_i]) - game_i += 1 - if game_i >= len(history[game]["history"]): # Move onto the next game - print( - "Finished game with history: {}. Player: {} Opponent: {} Board: {}".format( - history[game]["history"], - history[game]["player_cards"], - history[game]["opponent_cards"], - history[game]["community_cards"], - ) + if replay: + if game_i == 0: # New game, update player's hands + # TODO: Show the appropriate community cards. Right now it shows the right player cards, but the board is still the old way. + # TODO: This is a little buggy right now too. It doesn't show the right cards. + env.players[0].hand = [ + Card(rank_suit=history[game]["player_cards"][0]), + Card(rank_suit=history[game]["player_cards"][1]), + ] + env.players[1].hand = [ + Card(rank_suit=history[game]["opponent_cards"][0]), + Card(rank_suit=history[game]["opponent_cards"][1]), + ] + + env.handle_game_stage(history[game]["history"][game_i]) + game_i += 1 + if game_i >= len(history[game]["history"]): # Move onto the next game + print( + "Finished game with history: {}. Player: {} Opponent: {} Board: {}".format( + history[game]["history"], + history[game]["player_cards"], + history[game]["opponent_cards"], + history[game]["community_cards"], ) - game += 1 - game_i = 0 - if game == len(history): - print("Finished replay of all games") - return - - else: - env.handle_game_stage() + ) + game += 1 + game_i = 0 + if game == len(history): + print("Finished replay of all games") + return # At Showdown, reveal opponent's cards and add a delay if replay or user_input: draw_window(env, god_mode, user_input) - if user_input and env.end_of_round(): - draw_window(env, god_mode, False) - time.sleep(2) - pygame.quit() diff --git a/requirements.txt b/requirements.txt index 15f3491..e3dd2f6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,5 @@ joblib # parallel computation and store models phevaluator # fast poker hand evaluator using string representation of cards torch matplotlib -pot # Not really needed, I found it to be too slow scikit-learn -labml - +py3-tts diff --git a/src/environment.py b/src/environment.py index 03e57e7..383200b 100644 --- a/src/environment.py +++ b/src/environment.py @@ -1,155 +1,48 @@ # The Poker Environment from evaluator import * from typing import List -from holdem import HoldEmHistory, HoldemInfoSet # To get the legal actions -from abstraction import predict_cluster_fast -import joblib - - -def load_holdem_infosets(): - print("loading holdem infosets") - global holdem_infosets - holdem_infosets = joblib.load("../src/infoSets_300.joblib") - print("loaded holdem infosets!") - - -class Player: # This is the POV - def __init__(self, balance) -> None: - self.is_AI = False - - self.hand: List[Card] = ( - [] - ) # The hand is also known as hole cards: https://en.wikipedia.org/wiki/Texas_hold_%27em - self.player_balance: int = ( - balance # TODO: Important that this value cannot be modified easily... - ) - self.current_bet = 0 - self.playing_current_round = True - - # Wellformedness, hand is always either 0 or 2 cards - def add_card_to_hand(self, card: Card): - self.hand.append(card) - assert len(self.hand) <= 2 - - def clear_hand(self): - self.hand = [] - - def place_bet(self, action: str, observed_env) -> int: - - legal_actions = observed_env.infoset.actions() - print("here are your legal actions that the AI can react to", legal_actions) - # make action to nearest number - # ----- BET ABSTRACTION ------ - if action[0] == "b": - closest_action = legal_actions[-1] - for legal_action in legal_actions: - if int(action[1:]) < int(legal_action[1:]): - closest_action = legal_action - break - else: - closest_action = action - - if closest_action not in legal_actions: - raise Exception(f"Invalid Action: {action}") - - print("closest bet found", closest_action) - - return closest_action - - def calculate_pot_odds( - self, - ): # Calculate Pot Odds helper function, basically look at how many hands can you currently beat - """ - Simple logic, does not account for the pot values. - """ - - -import numpy as np - - -def getAction(strategy): - return np.random.choice(strategy.keys(), p=strategy.values()) - - -class AIPlayer(Player): - def __init__(self, balance) -> None: - super().__init__(balance) - self.is_AI = True - - # We are going to have the dumbest AI possible, which is to call every time - def place_bet(self, observed_env) -> int: # AI will call every time - # Very similar function to Player.place_bet, we only call and check - action = "k" - strategy = observed_env.infoset.get_average_strategy() - action = getAction(strategy) - print("AI strategy", strategy) - print("AI action", action) - - if action == "k": # check - if observed_env.game_stage == 2: - self.current_bet = 2 - else: - self.current_bet = 0 - - elif action == "c": - # If you call on the preflop - if observed_env.game_stage == 2: - self.current_bet = observed_env.big_blind - else: # Set the current bet to the amount of the last bet - self.current_bet = observed_env.players[ - (observed_env.position_in_play + 1) % 2 - ].current_bet - else: - self.current_bet = int(action[1:]) - - return action - +from player import Player, AIPlayer class PokerEnvironment: """ - Also see the HoldEmHistory class in holdem.py, which defines the set of legal actions every time + Also see the HoldEmHistory class in holdem.py """ def __init__(self) -> None: self.players: List[Player] = [] self.deck = Deck() - load_holdem_infosets() - - """Game Stages: - 1: Starting a new round, giving players their cards. Automatically goes into state 2 + """Game Stages (move_to_next_game_stage() is called to transition between stages (except for stage 1)): + 1: Initial stage. Call start_new_round() to enter the preflop stage 2: Preflop betting round. Goes into state 3 once everyone has made their decision 3: Flop round. Goes into turn (state 4) /ends round (state 6) once everyone " " 4: Turn round. Goes into river (state 5) /ends round (state 6) once everyone " " 5: River round. Ends round (state 6) once everyone " " - 6: Round is over. Distribute pot winnings. + 6: Round is over. Distribute pot winnings. Call start_new_round() to start a new round Game Stage - 2 = number of "/" in the holdem infoset and history """ - self.play_as_AI = True # play as the AI (used in video) self.game_stage = 1 # To keep track of which phase of the game we are at, new_round is 0 - # If self.finished_playing_game_stage = True, we can move to the next game state. This is needed to go around each player and await their decision - self.finished_playing_game_stage = False - # Changes every round self.dealer_button_position = 0 # This button will move every round + self.position_in_play = 0 + self.total_pot_balance = 0 # keep track of pot size of total round self.stage_pot_balance = 0 # keep track of pot size for current round self.community_cards: List[Card] = [] # a.k.a. the board - self.position_in_play = 0 - self.first_player_to_place_highest_bet = 0 # This is to keep track of who is the first player to have placed the highest bet, so we know when to end the round + self.raise_position = 0 # This is to keep track of who is the first player to have placed the highest bet, so we know when to end the round + self.showdown = False # flag that can be used to reveal opponents cards if needed - # These values should rarely change. TODO: Figure out how to integrate with holdem.py + # FIXED BALANCES self.new_player_balance = 100 - self.small_blind = 1 - self.big_blind = 2 - # holdem infosets - # use the infosets for the AI to make predictions - self.infoSet_key = [] - self.infoset = None + self.SMALL_BLIND = 1 + self.BIG_BLIND = 2 + + self.INPUT_CARDS = False + self.history = [] self.players_balance_history = [] # List of "n" list for "n" players def add_player(self): @@ -181,6 +74,7 @@ def get_winning_players_idx(self) -> List: return winning_players def distribute_pot_to_winning_players(self): # Run when self.game_stage = 5 + assert self.game_stage == 6 winning_players = self.get_winning_players() pot_winning = self.total_pot_balance / len(winning_players) @@ -200,8 +94,17 @@ def distribute_pot_to_winning_players(self): # Run when self.game_stage = 5 int(player.player_balance - self.new_player_balance) ) - self.total_pot_balance = 0 # Reset the pot just to be safe - self.stage_pot_balance = 0 # Reset the pot just to be safe + def valid_actions(self): + """ + Make sure this logic is unified with holdem.py + """ + valid_actions = ["b1", "b2", "f"] + if self.players[0].current_bet == self.players[1].current_bet: + valid_actions.append("k") + else: + valid_actions.append("c") + + return valid_actions def count_remaining_players_in_round(self): # Helper function to count the total number of players still in the round @@ -211,12 +114,7 @@ def count_remaining_players_in_round(self): total += 1 return total - def print_board(self): - for card in self.community_cards: - card.print() - def start_new_round(self): - self.showdown = False assert len(self.players) >= 2 # We cannot start a poker round with less than 2 players... # Reset Players @@ -224,7 +122,6 @@ def start_new_round(self): player.playing_current_round = True player.current_bet = 0 player.clear_hand() - # TODO: Remove this when you are ready player.player_balance = self.new_player_balance # Reset Deck (shuffles it as well), reset pot size @@ -237,36 +134,20 @@ def start_new_round(self): self.dealer_button_position += 1 self.dealer_button_position %= len(self.players) - # Big Blind - self.players[((self.dealer_button_position + 1) % len(self.players))].current_bet = ( - self.big_blind - ) - - # Small Blind - self.players[((self.dealer_button_position + 2) % len(self.players))].current_bet = ( - self.small_blind - ) - - self.update_stage_pot_balance() - # 3. Deal Cards - # We start dealing with the player directly clockwise of the dealer button - position_to_deal = self.dealer_button_position + 1 - - for _ in range(len(self.players)): - position_to_deal %= len(self.players) - card_str = "" - for i in range(2): - if self.play_as_AI and self.players[position_to_deal].is_AI: - card = Card(input(f"Enter the {i}-th card that was dealt to the AI (ex: Ah): ")) - else: - card = self.deck.draw() + self.showdown = False + self.history = [] # reset history for the round - card_str += str(card) - self.players[position_to_deal].add_card_to_hand(card) + # Proceed to preflop + self.game_stage = 1 + self.move_to_next_game_stage() - position_to_deal += 1 + def get_highest_current_bet(self): + highest_bet = 0 + for player in self.players: + if player.current_bet > highest_bet and player.playing_current_round: + highest_bet = player.current_bet - self.finished_playing_game_stage = True + return highest_bet def update_stage_pot_balance(self): """ @@ -279,42 +160,38 @@ def update_stage_pot_balance(self): def play_current_stage(self, action: str = ""): self.update_stage_pot_balance() if self.players[self.position_in_play].is_AI: - action = self.players[self.position_in_play].place_bet( - self - ) # Pass the Environment as an argument - - self.infoSet_key += [action] - self.infoset = holdem_infosets["".join(self.infoSet_key)] + action = self.players[self.position_in_play].place_bet(self) else: # Real player's turn if action == "": # No decision has yet been made return else: - self.players[self.position_in_play].place_bet(action, self) - # Update the history - self.history += action + action = self.players[self.position_in_play].place_bet(action, self) + if action is None: # invalid action + return - if action[0] == "b": - self.first_player_to_place_highest_bet = self.position_in_play + self.history += [action] + if action[0] == "b": + self.raise_position = self.position_in_play elif action == "f": self.players[self.position_in_play].playing_current_round = False # Player has folded self.update_stage_pot_balance() - if self.count_remaining_players_in_round() == 1: # Round is over, distribute winnings - self.finished_playing_game_stage = True - self.game_stage = 6 + # ---- Terminate Round if 1 player left ------ + if self.count_remaining_players_in_round() == 1: + self.end_round() return - else: - self.move_to_next_player() - if ( - self.position_in_play == self.first_player_to_place_highest_bet - ): # Stage is over, move to the next stage (see flop) - self.finished_playing_game_stage = True + self.move_to_next_playing_player() + + if self.position_in_play == self.raise_position: # Everyone has called with no new raises + self.move_to_next_game_stage() - def move_to_next_player(self): + def move_to_next_playing_player(self, from_position=None): assert self.count_remaining_players_in_round() > 1 + if from_position is not None: + self.position_in_play = from_position self.position_in_play += 1 self.position_in_play %= len(self.players) @@ -328,101 +205,84 @@ def play_preflop(self): The "small blind" is placed by the player to the left of the dealer button and the "big blind" is then posted by the next player to the left. The one exception is when there are only two players (a "heads-up" game), when the player on the button is the small blind, and the other player is the big blind. """ + # Set the blind values + # Big Blind + self.players[((self.dealer_button_position + 1) % len(self.players))].current_bet = ( + self.BIG_BLIND + ) + # Small Blind + self.players[((self.dealer_button_position + 2) % len(self.players))].current_bet = ( + self.SMALL_BLIND + ) + + self.update_stage_pot_balance() + if len(self.players) == 2: self.position_in_play = self.dealer_button_position else: self.position_in_play = (self.dealer_button_position + 3) % len(self.players) - self.first_player_to_place_highest_bet = self.position_in_play - - self.finished_playing_game_stage = False + self.raise_position = self.position_in_play - # Assign to cluster + for player_idx in range(len(self.players)): + card_str = "" + for i in range(2): + if self.INPUT_CARDS and player_idx == 0: + card = Card(input(f"Enter the card that was dealt (ex: Ah): ")) + else: + card = self.deck.draw() - if self.position_in_play != self.AI_player_idx: # AI doesn't know what the opponent has - self.infoSet_key += ["?"] - self.infoSet_key += [ - predict_cluster_fast( - [str(card) for card in self.players[self.AI_player_idx].hand], - n=3000, - total_clusters=20, - ) - ] - else: - self.infoSet_key += [ - predict_cluster_fast( - [str(card) for card in self.players[self.AI_player_idx].hand], - n=3000, - total_clusters=20, - ) - ] - self.infoSet_key += ["?"] + card_str += str(card) + self.players[player_idx].add_card_to_hand(card) - self.infoset = holdem_infosets["".join(self.infoSet_key)] + self.history += [card_str] def play_flop(self): - # 3. Flop - self.infoSet_key += ["/"] - self.deck.draw() # We must first burn one card, TODO: Show on video for i in range(3): # Draw 3 cards - if self.play_as_AI: + if self.INPUT_CARDS: card = Card(input(f"Input the {i}-th community card (ex: 'Ah'): ")) else: card = self.deck.draw() self.community_cards.append(card) - cards = [str(card) for card in self.community_cards] - - self.infoSet_key += [predict_cluster_fast(cards, n=1000, total_clusters=10)] - self.infoset = holdem_infosets["".join(self.infoSet_key)] + self.history += ["/"] + self.history += ["".join([str(card) for card in self.community_cards])] # The person that should play is the first person after the dealer position - self.position_in_play = self.dealer_button_position - self.move_to_next_player() - self.first_player_to_place_highest_bet = self.position_in_play - - self.finished_playing_game_stage = False + self.move_to_next_playing_player(from_position=self.dealer_button_position) + self.raise_position = self.position_in_play def play_turn(self): - # 4. Turn - self.infoSet_key += ["/"] - self.deck.draw() # We must first burn one card, TODO: Show on video - if self.play_as_AI: + self.deck.draw() + if self.INPUT_CARDS: card = Card(input("Input the turn card (ex: '5d'): ")) else: card = self.deck.draw() - self.community_cards.append(card) - cards = [str(card) for card in self.community_cards] - self.infoSet_key += [predict_cluster_fast(cards, n=500, total_clusters=5)] - self.infoset = holdem_infosets["".join(self.infoSet_key)] - # The person that should play is the first person after the dealer position - self.position_in_play = self.dealer_button_position - self.move_to_next_player() - self.first_player_to_place_highest_bet = self.position_in_play + self.history += ["/"] + self.history += [str(card)] - self.finished_playing_game_stage = False + # The person that should play is the first person after the dealer position that is STILL in the game + self.move_to_next_playing_player(from_position=self.dealer_button_position) + self.raise_position = self.position_in_play def play_river(self): - # 5. River - self.infoSet_key += ["/"] - - self.deck.draw() # We must first burn one card, TODO: Show on video - if self.play_as_AI: - card = input("Input the river card (ex: '5d'): ") + self.deck.draw() + if self.INPUT_CARDS: + card = Card(input(f"Input the river card (ex: 'Ah'): ")) else: card = self.deck.draw() - self.community_cards.append(card) - cards = [str(card) for card in self.community_cards] - self.infoSet_key += [predict_cluster_fast(cards, n=200, total_clusters=5)] - self.infoset = holdem_infosets["".join(self.infoSet_key)] - self.finished_playing_game_stage = False + self.history += ["/"] + self.history += [str(card)] + + self.move_to_next_playing_player(from_position=self.dealer_button_position) + self.raise_position = self.position_in_play def update_player_balances_at_end_of_stage(self): for player in self.players: @@ -434,57 +294,60 @@ def move_stage_to_total_pot_balance(self): self.stage_pot_balance = 0 def handle_game_stage(self, action=""): - if self.finished_playing_game_stage: - if self.game_stage != 1: - self.update_player_balances_at_end_of_stage() - self.move_stage_to_total_pot_balance() - self.game_stage += 1 - - if self.game_stage == 2: - self.play_preflop() - elif self.game_stage == 3: - self.play_flop() - elif self.game_stage == 4: - self.play_turn() - elif self.game_stage == 5: - self.play_river() - else: - if ( - self.game_stage == 6 - ): # We reached the river, and are now in the showdown. We need the evaluator to get the winners, set all losers to playing_current_round false - self.showdown = True - evaluator = Evaluator() - - indices_of_potential_winners = [] - for idx, player in enumerate(self.players): - if player.playing_current_round: - indices_of_potential_winners.append(idx) - hand = CombinedHand(self.community_cards + player.hand) - evaluator.add_hands(hand) - - winners = evaluator.get_winner() - for player in self.players: - player.playing_current_round = False - - for winner in winners: - self.players[indices_of_potential_winners[winner]].playing_current_round = ( - True + if self.game_stage != 1 and self.game_stage != 6: # nothing to do at start or end of round + self.play_current_stage(action) + + def move_to_next_game_stage(self, input_cards=None): + self.update_player_balances_at_end_of_stage() + self.move_stage_to_total_pot_balance() + + self.game_stage += 1 + + if self.game_stage == 2: + self.play_preflop() + elif self.game_stage == 3: + self.play_flop() + elif self.game_stage == 4: + self.play_turn() + elif self.game_stage == 5: + self.play_river() + else: + self.end_round() + return + + # If both players are out of balance, it's a showdown until the end + if self.total_pot_balance == len(self.players) * self.new_player_balance: + self.move_to_next_game_stage() + + def end_of_round(self): + return self.game_stage == 6 + + def end_round(self): + if self.count_remaining_players_in_round() > 1: + self.showdown = True + evaluator = Evaluator() + indices_of_potential_winners = [] + for idx, player in enumerate(self.players): + if player.playing_current_round: + indices_of_potential_winners.append(idx) + if self.INPUT_CARDS and idx == 1: + # Add opponents hand to calculate showdown winner + self.players[1].clear_hand() + self.players[1].add_card_to_hand( + Card(input("Enter the first card from opponent (ex: 5h): ")) + ) + self.players[1].add_card_to_hand( + Card(input("Enter the second card from opponent (ex: As): ")) ) + hand = CombinedHand(self.community_cards + player.hand) + evaluator.add_hands(hand) - self.game_stage = 1 - self.finished_playing_game_stage = ( - False # on the next call of the handler, we will start a new round - ) + winners = evaluator.get_winner() + for player in self.players: + player.playing_current_round = False - print(self.infoSet_key) - else: - if self.game_stage == 1: - # This function was put here instead of at game_stage == 6 to visualize the game - self.distribute_pot_to_winning_players() - self.start_new_round() - self.handle_game_stage() - else: - self.play_current_stage(action) + for winner in winners: + self.players[indices_of_potential_winners[winner]].playing_current_round = True - def end_of_round(self): - return self.game_stage == 1 and self.finished_playing_game_stage == False + self.game_stage = 6 # mark end of round + self.distribute_pot_to_winning_players() diff --git a/src/evaluator.py b/src/evaluator.py index e0d7608..01b9788 100644 --- a/src/evaluator.py +++ b/src/evaluator.py @@ -1,29 +1,7 @@ -# Uses Cactus Kev’s 5-Card Evaluator: http://suffe.cool/poker/evaluator.html -# I am aware that this method is not the most efficient, I will look into implementations if needed - -# https://www.codingthewheel.com/archives/poker-hand-evaluator-roundup - """ -# Some thoughts, the evaluator is going to be important because it determines the rules -# of the game. However, we won't give our AI this. The environment will simply -# feed the final reward, which is win or not win. - -# Reward = the amount of money you win or lose. -# The optimal policy probably has to be probabilistic, actually no... maybe -because the opponent will start figuring it out and calling / folding every time. - -Also, what kind of opponents do we want to be playing? - -Card representation also needs to be fast. Everything needs to be fast, for the AI -to run lots of simulations and figure out the optimal strategy. - -# I think I will consider two approaches -- Card Kev's which is a 5-card lookup table, will need to do lookup 21 times... -- Two + Two algorithm which is for a 7-card lookup table -https://github.com/chenosaurus/poker-evaluator/ - -Speed is pretty important, since I want to train the AI as fast as possible, so that it learns the optimal -policy. If the game is slow, then there is no point. +A custom evaluator implemented using bit representation for cards to evaluate hands for Texas Hold'Em Poker. +Note that this is still slow compared to other open-source implementations, so I only use this for running the main +game (see `poker_main.py`). """ # Representation is key to performance. This is going to be terrifying, as I am going to be working with bits.. @@ -511,6 +489,7 @@ def get_winner( hand_strengths = [hand.hand_strength for hand in self.hands] best_hand_val = min(hand_strengths) potential_winners = [i for i, x in enumerate(hand_strengths) if x == best_hand_val] + print(potential_winners) # TODO: Idea to optimize in the future, just make the best hand as a list, and then compare if necessary. diff --git a/src/holdem.py b/src/holdem.py index 8cb3f6b..e291b30 100644 --- a/src/holdem.py +++ b/src/holdem.py @@ -1,3 +1,6 @@ +""" +Abstracted version of Holdem Poker, used for training. +""" import base import numpy as np from base import Player, Action @@ -632,8 +635,12 @@ def generate_dataset(iterations=1000, num_samples=10000, save=True): # generate_dataset() load_dataset() cfr = HoldemCFR(create_infoSet, create_history) - # cfr.infoSets = joblib.load("infosets.joblib") + # cfr.infoSets = joblib.load("infoSets_2500.joblib") + # print("finished loading") cfr.solve() + # cfr.solve_multiprocess( + # initializer=load_dataset, + # ) # """ # When we work with these abstractions, we have two types: diff --git a/src/player.py b/src/player.py new file mode 100644 index 0000000..9d3407d --- /dev/null +++ b/src/player.py @@ -0,0 +1,112 @@ +import joblib +import pyttsx3 +import numpy as np +from evaluator import * +from typing import List + + +class Player: # This is the POV + def __init__(self, balance) -> None: + self.is_AI = False + + self.hand: List[Card] = ( + [] + ) # The hand is also known as hole cards: https://en.wikipedia.org/wiki/Texas_hold_%27em + self.player_balance: int = ( + balance # TODO: Important that this value cannot be modified easily... + ) + self.current_bet = 0 + self.playing_current_round = True + + # Wellformedness, hand is always either 0 or 2 cards + def add_card_to_hand(self, card: Card): + self.hand.append(card) + assert len(self.hand) <= 2 + + def clear_hand(self): + self.hand = [] + + def place_bet(self, action: str, observed_env) -> int: + if action == "c": + self.current_bet = observed_env.get_highest_current_bet() + + elif action[0] == "b": # bet X amount + bet_size = int(action[1:]) + if bet_size < observed_env.get_highest_current_bet(): + print("you must raise more than the current highest bet") + return None + elif bet_size > self.player_balance: + print("you cannot bet more than your balance") + return None + elif bet_size == observed_env.get_highest_current_bet(): + print("you must call, not bet") + else: + self.current_bet = int(action[1:]) + + return action + + +def getAction(strategy): + return np.random.choice(list(strategy.keys()), p=list(strategy.values())) + + +class AIPlayer(Player): + def __init__(self, balance) -> None: + super().__init__(balance) + self.is_AI = True + + self.engine = pyttsx3.init() + + # We are going to have the dumbest AI possible, which is to call every time + def place_bet(self, observed_env) -> int: # AI will call every time + print(observed_env) + # Very similar function to Player.place_bet, we only call and check + # use the the history + # strategy = observed_env.get_average_strategy() + if "k" in observed_env.valid_actions(): + action = "k" + else: + action = "c" + + print(observed_env.history) + + # action = getAction(strategy) + # print("AI strategy", strategy) + # print("AI action", action) + + if action == "k": # check + if observed_env.game_stage == 2: + self.current_bet = 2 + else: + self.current_bet = 0 + + self.engine.say("I Check") + elif action == "c": + self.engine.say("I Call") + # If you call on the preflop + self.current_bet = observed_env.get_highest_current_bet() + elif action == "f": + self.engine.say("I Fold") + else: + self.current_bet = int(action[1:]) + self.engine.say(f"I bet {self.current_bet * 100}") + + self.engine.runAndWait() + return action + + +def load_holdem_infosets(): + print("loading holdem infosets") + global holdem_infosets + # holdem_infosets = joblib.load("../src/infoSets_100.joblib") + holdem_infosets = joblib.load("../src/infoSets_0.joblib") + print("loaded holdem infosets!") + + +def get_infoset(infoSet_key): + print("getting infoset", infoSet_key) + key = "".join(infoSet_key) + if key in holdem_infosets: + return holdem_infosets[key] + else: + return None diff --git a/src/train.py b/src/train.py index 7cfad06..268161a 100644 --- a/src/train.py +++ b/src/train.py @@ -3,7 +3,7 @@ of CFR with Kuhn Poker, under `research/kuhn/train.py`. That version of the game does not implement Card Abstraction. This version does. -This is the main training file for Poker CFR. It generates a blueprint strategy. Then, we will improve +TOO SLOW This is the main training file for Poker CFR. It generates a blueprint strategy. Then, we will improve it in real-time by implementing depth-limited solving. """