we use not common tic-tac-toe-code.

mhahsler · Apr 23, 2024 · 95f5f8b · 95f5f8b
1 parent e140960
commit 95f5f8b
Show file tree

Hide file tree

Showing 9 changed files with 1,186 additions and 2,836 deletions.
diff --git a/Games/README.md b/Games/README.md
@@ -3,6 +3,9 @@
 
 ## Contents
 
+### Defining a Game
+* Example [Tic-Tac-Toe](https://colab.research.google.com/github/mhahsler/CS7320-AI/blob/master/Games/tictactoe_definitions.ipynb)
+
 ### Connection to search with nondeterministic actions (from Chapter 4.3)
 * Example: [Solving Tic-Tac-Toe with And-Or-Tree Search](https://colab.research.google.com/github/mhahsler/CS7320-AI/blob/master/Games/tictactoe_and_or_tree_search.ipynb). Here the opponent is seen as part of the environment, i.e.,
 each action by the player is followed by an unknown action of the opponent which, from the viewpoint of the player makes the outcomes of actions nondeterministic.

diff --git a/Games/tictactoe.py b/Games/tictactoe.py
@@ -0,0 +1,150 @@
+# Tic Tac Toe Game functions 
+
+# Games are defined by (see Chapter 5):
+# * Actions(s) Legal moves in state s.
+# * Result(s, a) Transition model.
+# * Terminal(s) Test for terminal states.
+# * Utility(s) Utility for player Max for terminal states.
+
+# I represent the state (board) as a vector of length 9. 
+# The values are ' ', 'x', 'o'. 
+
+import numpy as np
+
+def empty_board():
+    """create and empty board"""
+    return [' '] * 9
+
+def actions(board):
+    """return possible actions as a vector of indices"""
+    return np.where(np.array(board) == ' ')[0].tolist()
+
+    # randomize the action order
+    #actions = np.where(np.array(board) == ' ')[0]
+    #np.random.shuffle(actions)
+    #return actions.tolist()
+
+def result(state, player, action):
+    """Add move to the board."""
+
+    state = state.copy()
+    state[action] = player
+
+    return state
+
+def terminal(board):
+    """is the state terminal?"""
+    return check_board(board) != 'n'
+
+def utility(state, player = 'x'):
+    """utility of state. None defined for non-terminal states."""
+    goal = check_board(state)        
+    if goal == player: return +1         # win
+    if goal == 'd': return 0             # draw
+    if goal == other(player): return -1  # loss
+    return None                          # utility is not defined 
+
+
+## helper functions
+def check_board(board):
+    """check the board and return one of x, o, d (draw), or n (for next move)"""
+
+    board = np.array(board).reshape((3,3))
+
+    diagonals = np.array([[board[i][i] for i in range(len(board))], 
+                          [board[i][len(board)-i-1] for i in range(len(board))]])
+
+    for a_board in [board, np.transpose(board), diagonals]:
+        for row in a_board:
+            if len(set(row)) == 1 and row[0] != ' ':
+                return row[0]
+
+    # check for draw
+    if(np.sum(board == ' ') < 1):
+        return 'd'
+
+    return 'n'
+
+def other(player):
+    if player == 'x': return 'o'
+    else: return 'x'
+
+def show_board_text(board):
+    """display the board"""
+    b = np.array(board).reshape((3,3))
+    print(b)
+
+import matplotlib.pyplot as plt
+from matplotlib.colors import ListedColormap
+
+def show_board(board, help = True, dpi = 40, colors = {' ': 'white', 'x': 'red', 'o': 'black'}):
+    """Show the tic-tac-toe-board. help adds the array index, dpi changes the size and
+    colors sets the colors"""
+
+    b = np.array(board).reshape((3,3))
+
+    with plt.rc_context({'figure.dpi': dpi}):
+        fig = plt.matshow(np.zeros((3, 3)), cmap = ListedColormap(['w']))
+    fig.axes.axis('off')
+
+    plt.hlines([.5, 1.5], -.5, 2.5)
+    plt.vlines([.5, 1.5], -.5, 2.5)
+
+    for row in range(3):
+        for col in range(3):
+            plt.text(row, col, b[col, row],
+                 fontsize = 64,
+                 color = colors[b[col, row]],
+                 horizontalalignment = 'center',
+                 verticalalignment = 'center')
+
+    if help:
+        for row in range(3):
+            for col in range(3):
+                plt.text(col, row - .35, col + 3 * row,
+                     fontsize = 12,
+                     color = 'gray',
+                     horizontalalignment = 'center',
+                     verticalalignment = 'center')
+
+    plt.show()
+
+
+# Random Baseline player
+def random_player(board, player = None):
+    """Simple player that chooses a random empty square (equal probability of all permissible actions). 
+    player is unused."""
+    return np.random.choice(actions(board))
+
+# Simple Environment
+def play(x, o, N = 100, show_final_board = False):
+    """Let two agents play each other N times. x starts. x and y are agent functions that 
+    get the board as the percept and return their next action."""
+    results = {'x': 0, 'o': 0, 'd': 0}
+
+    for i in range(N):
+        board = empty_board()
+
+        while True:
+            # x moves
+            a = x(board, 'x')
+            board = result(board, 'x', a)
+
+            win = check_board(board)   # returns the 'n' if the game is not done.
+            if win != 'n':
+                results[win] += 1
+                break
+
+            # o moves
+            a = o(board, 'o')
+            board = result(board, 'o', a)
+
+            win = check_board(board)   # returns the 'n' if the game is not done.
+            if win != 'n':
+                results[win] += 1
+                break
+
+        if show_final_board:
+            show_board(board)   
+
+    return results