large tournament

ymahlau · Jan 11, 2024 · 5ff6dab · 5ff6dab
1 parent 5f0eaff
commit 5ff6dab
Show file tree

Hide file tree

Showing 6 changed files with 276 additions and 21 deletions.
diff --git a/environment.yml b/environment.yml
@@ -28,3 +28,4 @@ dependencies:
     - rliable
     - pygame
     - ipywidgets
+    - trueskill
diff --git a/scripts/depth/eval_tournament.py b/scripts/depth/eval_tournament.py
@@ -4,7 +4,12 @@
 from pathlib import Path
 import pickle
 import random
+from matplotlib import pyplot as plt
 import numpy as np
+import scipy
+import seaborn
+from trueskill import Rating, rate
+import trueskill
 from src.agent import Agent
 from src.agent.albatross import AlbatrossAgent, AlbatrossAgentConfig
 from src.agent.initialization import get_agent_from_config
@@ -64,19 +69,18 @@ def play_single_game(
 
 def play_tournament(experiment_id: int):
     num_seeds = 5
-    num_parts = 20
+    num_parts = 5
 
-    depths = np.asarray(list(range(200, 2001, 200)), dtype=int)
+    depths = np.asarray(list(range(50, 2001, 50)), dtype=int)
     depth_dict = {
         x: d for x, d in enumerate(depths)
     }
     depth_dict[len(depths)] = 1  # albatross
     depth_dict[len(depths) + 1] = 1  # alphaZero
 
-    save_path = Path(__file__).parent.parent.parent / 'a_data' / 'bs_depth'
-    base_name = 'trnmt_small'
-    prefix = '4nd7'
-    num_games_per_part = 100
+    save_path = Path(__file__).parent.parent.parent / 'a_data' / 'trnmt'
+    base_name = 'trnmt'
+    num_games_per_part = 1000
 
 
     game_dict = {
@@ -87,12 +91,16 @@ def play_tournament(experiment_id: int):
     }
 
     pref_lists = [
+        list(game_dict.keys()),
         list(range(num_seeds)),
         list(range(int(num_parts)))
     ]
     prod = list(itertools.product(*pref_lists))
-    seed, cur_part = prod[experiment_id]
+    prefix, seed, cur_part = prod[experiment_id]
+    full_save_path = save_path / f'{base_name}_{prefix}_{seed}_{cur_part}.pkl'
     num_agents = 4 if prefix.startswith("4") else 2
+    print(f"{datetime.now()} - Started {prefix} with {seed=}, {cur_part=}", flush=True)
+    print(f"{full_save_path=}", flush=True)
 
     set_seed((seed + 1) * cur_part)  
     game_cfg = game_dict[prefix]
@@ -145,25 +153,145 @@ def play_tournament(experiment_id: int):
         sampled_indices = random.sample(range(len(depth_dict)), 4)
         cur_agent_list = [agent_dict[idx] for idx in sampled_indices]
         cur_iterations = [depth_dict[idx] for idx in sampled_indices]
+        cur_temperatures = [math.inf if idx in [len(depths), len(depths) + 1] else 1 for idx in sampled_indices]
 
         cur_result, cur_length = play_single_game(
             game=game,
             agent_list=cur_agent_list,
             iterations=cur_iterations,
-            temperatures=[math.inf for _ in range(num_agents)],
+            temperatures=cur_temperatures,
             prevent_draw=False,
             verbose_level=0,
         )
         result_list.append((sampled_indices, cur_result, cur_length))
 
-        full_save_path = save_path / f'{base_name}_{prefix}_{seed}_{cur_part}.pkl'
         with open(full_save_path, 'wb') as f:
             pickle.dump(result_list, f)
-        print(f"{datetime.now()} - {game_idx}: {sampled_indices} - {cur_result}, {cur_length}")
+        print(f"{datetime.now()} - {game_idx}/{num_games_per_part}: {sampled_indices} - {cur_result}, {cur_length}", flush=True)
 
 
 
+def plot_avg_outcomes():
+    depths = np.asarray(list(range(200, 2001, 200)), dtype=int)
+    save_path = Path(__file__).parent.parent.parent / 'a_data' / 'trnmt'
+    img_path = Path(__file__).parent.parent.parent / 'a_img' / 'trnmt'
+    base_name = 'trnmt_small'
+    prefix = '4nd7'
+    num_seeds = 5
+    num_parts = 5
+
+    name_dict = {
+        idx: f'd{x}' for idx, x in enumerate(depths)
+    }
+    name_dict[10] = 'Alb.'
+    name_dict[11] = 'AlphaZero'
+    labels = list(name_dict.values())
+
+    result_dict = {
+        x: [[] for _ in range(num_seeds)] for x in name_dict.keys()
+    }
+
+    for seed in range(num_seeds):
+        for part in range(num_parts):
+            full_save_path = save_path / f'{base_name}_{prefix}_{seed}_{part}.pkl'
+            with open(full_save_path, 'rb') as f:
+                cur_list = pickle.load(f)
+            for tpl in cur_list:
+                for idx, res in zip(tpl[0], tpl[1]):
+                    result_dict[idx][seed].append(res)
+    data_arr = np.asarray([
+        [
+            np.asarray(seed_vals).mean() for seed_vals in agent_vals
+        ]
+        for agent_vals in result_dict.values()
+    ])
+    mean_vals = data_arr.mean(axis=-1)
+    std_vals = data_arr.std(axis=-1)
+
+    plt.clf()
+    plt.figure(dpi=600)
+    # fig, ax = plt.subplots()
+    seaborn.set_theme(style='whitegrid')
+
+    ind = np.arange(len(name_dict))
+    plt.bar(ind, mean_vals, yerr=std_vals, align='center', alpha=0.7, ecolor='black', capsize=10)
 
+    fontsize = 'medium'
+    plt.xlabel('Agents', fontsize=fontsize)
+    plt.ylabel('Avg. Outcome', fontsize=fontsize)
+    # plt.xlim(depths[0], depths[-1])
+    # plt.xticks(fontsize=fontsize)
+    plt.yticks(fontsize=fontsize)
+    plt.gca().set_xticks(ind)
+    plt.gca().set_xticklabels(labels)
+    # plt.legend(fontsize='x-large')
+    plt.xticks(fontsize='xx-small')
+    plt.tight_layout()
+    # plt.savefig(img_path / f'inf_100g_{abbrev}_depths.png')
+    plt.savefig(img_path / f'{base_name}_mean_vals.pdf', bbox_inches='tight', pad_inches=0.0)
+
+
+def plot_trueskill_ratings():
+    depths = np.asarray(list(range(200, 2001, 200)), dtype=int)
+    save_path = Path(__file__).parent.parent.parent / 'a_data' / 'trnmt'
+    img_path = Path(__file__).parent.parent.parent / 'a_img' / 'trnmt'
+    base_name = 'trnmt_small'
+    prefix = '4nd7'
+    num_seeds = 5
+    num_parts = 5
+
+    name_dict = {
+        idx: f'd{x}' for idx, x in enumerate(depths)
+    }
+    name_dict[10] = 'Alb.'
+    name_dict[11] = 'AlphaZero'
+    labels = list(name_dict.values())
+
+    result_list = []
+    for seed in range(num_seeds):
+        rating_dict = {
+            x: Rating() for x in name_dict.keys()
+        }
+        for part in range(num_parts):
+            full_save_path = save_path / f'{base_name}_{prefix}_{seed}_{part}.pkl'
+            with open(full_save_path, 'rb') as f:
+                cur_list = pickle.load(f)
+            for tpl in cur_list:
+                ranks = (scipy.stats.rankdata(-tpl[1], method='dense') - 1).tolist()
+                rating_groups = [(rating_dict[idx],) for idx in tpl[0]]
+                new_rating_groups = trueskill.rate(rating_groups, ranks=ranks)
+                for idx, new_r in zip(tpl[0], new_rating_groups):
+                    rating_dict[idx] = new_r[0]
+        seed_means = [r.mu for r in rating_dict.values()]
+        result_list.append(seed_means)
+
+    data_arr = np.asarray(result_list)
+    mean_vals = data_arr.mean(axis=0)
+    std_vals = data_arr.std(axis=0)
+
+    plt.clf()
+    plt.figure(dpi=600)
+    # fig, ax = plt.subplots()
+    seaborn.set_theme(style='whitegrid')
+    # plt.bar(ind, mean_vals, yerr=std_vals, align='center', alpha=0.7, ecolor='black', capsize=10)
+    plt.boxplot(data_arr, labels=labels, showfliers=True)
+
+    fontsize = 'medium'
+    plt.xlabel('Agents', fontsize=fontsize)
+    plt.ylabel('TrueSkill Score', fontsize=fontsize)
+    # plt.xlim(depths[0], depths[-1])
+    # plt.xticks(fontsize=fontsize)
+    plt.yticks(fontsize=fontsize)
+    # ind = np.arange(len(name_dict))
+    # plt.gca().set_xticks(ind)
+    # plt.gca().set_xticklabels(labels)
+    # plt.legend(fontsize='x-large')
+    plt.xticks(fontsize='xx-small')
+    plt.tight_layout()
+    # plt.savefig(img_path / f'inf_100g_{abbrev}_depths.png')
+    plt.savefig(img_path / f'{base_name}_{prefix}_boxplot.pdf', bbox_inches='tight', pad_inches=0.0)
 
 if __name__ == '__main__':
     play_tournament(0)
+    # plot_avg_outcomes()
+    # plot_trueskill_ratings()
diff --git a/scripts/depth/evaluate_bs_depth.py b/scripts/depth/evaluate_bs_depth.py
@@ -22,9 +22,10 @@ def evaluate_bs_depth_func(experiment_id: int):
     num_parts = 10
     search_iterations = np.arange(50, 2001, 50)
     # search_iterations = np.asarray([500])
+    # search_iterations = np.arange(100, 1001, 100)
     save_path = Path(__file__).parent.parent.parent / 'a_data' / 'bs_depth'
     # save_path = Path(__file__).parent.parent.parent / 'a_data' / 'temp'
-    base_name = 'nodraw_bs'
+    base_name = 'base_sampl'
     eval_az = True
 
     game_dict = {
@@ -43,8 +44,8 @@ def evaluate_bs_depth_func(experiment_id: int):
     prefix, seed, cur_game_id = prod[experiment_id]
     assert isinstance(prefix, str)
     num_games_per_part = 100
-    if 'n' in prefix:
-        num_games_per_part = 50
+    # if 'n' in prefix:
+    #     num_games_per_part = 50
 
     # we do not want to set the same seed in every game and repeat the same play.
     # Therefore, set a different seed for every game and base seed
@@ -97,8 +98,8 @@ def evaluate_bs_depth_func(experiment_id: int):
     if os.path.exists(full_save_path):
         with open(full_save_path, 'rb') as f:
             last_result_dict = pickle.load(f)
-        # full_result_list_alb = last_result_dict['results_alb'].tolist()
-        # full_length_list_alb = last_result_dict['lengths_alb'].tolist()
+        full_result_list_alb = last_result_dict['results_alb'].tolist()
+        full_length_list_alb = last_result_dict['lengths_alb'].tolist()
         if eval_az:
             full_result_list_az = last_result_dict['results_az'].tolist()
             full_length_list_az = last_result_dict['lengths_az'].tolist()
@@ -123,7 +124,7 @@ def evaluate_bs_depth_func(experiment_id: int):
             opponent_list=[base_agent],
             num_episodes=[num_games_per_part],
             enemy_iterations=cur_iterations,
-            temperature_list=[math.inf],
+            temperature_list=[1],
             own_temperature=math.inf,
             prevent_draw=False,
             switch_positions=False,
@@ -140,7 +141,7 @@ def evaluate_bs_depth_func(experiment_id: int):
                 opponent_list=[base_agent],
                 num_episodes=[num_games_per_part],
                 enemy_iterations=cur_iterations,
-                temperature_list=[math.inf],
+                temperature_list=[1],
                 own_temperature=math.inf,
                 prevent_draw=False,
                 switch_positions=False,

diff --git a/scripts/plotting/plot_depth_bs_temp.py b/scripts/plotting/plot_depth_bs_temp.py
@@ -0,0 +1,104 @@
+
+from pathlib import Path
+import pickle
+from matplotlib import pyplot as plt
+
+import numpy as np
+import scipy
+import seaborn
+from src.misc.const import COLORS, LIGHT_COLORS, LINESTYLES
+
+from src.misc.plotting import plot_filled_std_curves
+
+
+def plot_bs_depth():
+    data_path = Path(__file__).parent.parent.parent / 'a_data' / 'temp'
+    img_path = Path(__file__).parent.parent.parent / 'a_img' / 'temp'
+    num_parts = 10
+    num_seeds = 5
+    # depths = np.asarray(list(range(50, 2001, 50)), dtype=int)
+    depths = np.arange(100, 1001, 100)
+
+    # prefix -> (alb, az)
+    base_names =  {
+        'd7': ('both_sampl', 'both_sampl'),
+        '4d7': ('both_sampl', 'both_sampl'),
+        # '4nd7': ('bs_az_alb_area_50_to_2000_inf_100games', 'bs_az_alb_area_50_to_2000_inf_100games'),
+        'nd7': ('both_sampl', 'both_sampl')
+    }
+
+    for abbrev, (alb_base, az_base) in base_names.items():
+        full_list_alb, full_list_az, length_list_alb, length_list_az  = [], [], [], []
+        for seed in range(num_seeds):
+            for part in range(num_parts):
+                file_name_alb = f'{alb_base}_{abbrev}_{seed}_{part}.pkl'
+                with open(data_path / file_name_alb, 'rb') as f:
+                    cur_dict = pickle.load(f)
+                full_list_alb.append(cur_dict['results_alb'])
+                length_list_alb.append(cur_dict['lengths_alb'])
+
+                file_name_az = f'{az_base}_{abbrev}_{seed}_{part}.pkl'
+                with open(data_path / file_name_az, 'rb') as f:
+                    cur_dict = pickle.load(f)
+                full_list_az.append(cur_dict['results_az'])
+                length_list_az.append(cur_dict['lengths_az'])
+        full_arr_alb = np.concatenate(full_list_alb, axis=2)[:, 0, :]
+        full_arr_az = np.concatenate(full_list_az, axis=2)[:, 0, :]
+        length_arr_alb = np.concatenate(length_list_alb, axis=2)[:, 0, :]
+        length_arr_az = np.concatenate(length_list_az, axis=2)[:, 0, :]
+
+        # discount = 0.99
+        # full_arr_alb = np.power(discount, length_arr_alb) * full_arr_alb
+        # full_arr_az = np.power(discount, length_arr_az) * full_arr_az
+
+        full_arr_alb = full_arr_alb.reshape(len(depths), num_seeds, -1).mean(axis=-1)
+        full_arr_az = full_arr_az.reshape(len(depths), num_seeds, -1).mean(axis=-1)
+
+        # if abbrev == '4d7':
+        #     full_arr_alb = scipy.signal.savgol_filter(full_arr_alb, window_length=5, polyorder=1, axis=0)
+        #     full_arr_az = scipy.signal.savgol_filter(full_arr_az, window_length=5, polyorder=1, axis=0)
+
+        # length_arr_alb = length_arr_alb.reshape(len(depths), num_seeds, -1).mean(axis=-1)
+        # length_arr_az = length_arr_az.reshape(len(depths), num_seeds, -1).mean(axis=-1)
+
+        plt.clf()
+        plt.figure(dpi=600)
+        seaborn.set_theme(style='whitegrid')
+
+        # AlphaZero
+        plot_filled_std_curves(
+            x=depths,
+            mean=full_arr_az.mean(axis=-1),
+            std=full_arr_az.std(axis=-1),
+            color=COLORS[0],
+            lighter_color=LIGHT_COLORS[0],
+            linestyle=LINESTYLES[0],
+            label='AlphaZero',
+        )
+
+        # albatross
+        plot_filled_std_curves(
+            x=depths,
+            mean=full_arr_alb.mean(axis=-1),
+            std=full_arr_alb.std(axis=-1),
+            color=COLORS[1],
+            lighter_color=LIGHT_COLORS[1],
+            linestyle=LINESTYLES[1],
+            label='Albatross',
+        )
+
+        fontsize = 'large'
+        plt.xlabel('Enemy Search Iterations', fontsize=fontsize)
+        plt.ylabel('Reward', fontsize=fontsize)
+        plt.xlim(depths[0], depths[-1])
+        plt.xticks(fontsize=fontsize)
+        plt.yticks(fontsize=fontsize)
+        # if abbrev == 'd7':
+        plt.legend(fontsize='x-large')
+        plt.tight_layout()
+        # plt.savefig(img_path / f'inf_100g_{abbrev}_depths.png')
+        plt.savefig(img_path / f'{base_names[abbrev][0]}_{abbrev}.pdf', bbox_inches='tight', pad_inches=0.0)
+
+
+if __name__ == '__main__':
+    plot_bs_depth()
-Original file line number
+Diff line change
@@ Expand Up / @@ -28,3 +28,4 @@ dependencies: @@
         - rliable
         - pygame
         - ipywidgets
+        - trueskill