diff --git a/cutechess.py b/cutechess.py index b048c44..cb98af8 100644 --- a/cutechess.py +++ b/cutechess.py @@ -12,53 +12,69 @@ class MatchResult: class CutechessMan: - def __init__(self, engine: str, book: str, games: int = 120, tc: float = 5.0, hash_size: int = 8, threads: int = 1): + def __init__( + self, + engine: str, + book: str, + games: int = 120, + tc: float = 5.0, + hash: int = 8, + threads: int = 1 + ): self.engine = engine self.book = book self.games = games self.tc = tc self.inc = tc / 100 - self.hash_size = hash_size + self.hash_size = hash self.threads = threads - def get_cutechess_cmd(self, params_a: list[str], params_b: list[str]) -> str: - return f"./tuner/cutechess-cli \ - -engine cmd=./tuner/{self.engine} name={self.engine} proto=uci option.Hash={self.hash_size} {' '.join(params_a)} \ - -engine cmd=./tuner/{self.engine} name={self.engine} proto=uci option.Hash={self.hash_size} {' '.join(params_b)} \ - -resign movecount=3 score=400 \ - -draw movenumber=40 movecount=8 score=10 \ - -repeat \ - -recover \ - -concurrency {self.threads} \ - -each tc={self.tc}+{self.inc} \ - -openings file=tuner/{self.book} format={self.book.split('.')[-1]} order=random plies=16 \ - -games {self.games} \ - -pgnout tuner/games.pgn" + def get_cutechess_cmd( + self, + params_a: list[str], + params_b: list[str] + ) -> str: + return ( + "./tuner/cutechess-cli " + f"-engine cmd=./tuner/{self.engine} name={self.engine} proto=uci " + f"option.Hash={self.hash_size} {' '.join(params_a)} " + f"-engine cmd=./tuner/{self.engine} name={self.engine} proto=uci " + f"option.Hash={self.hash_size} {' '.join(params_b)} " + "-resign movecount=3 score=400 " + "-draw movenumber=40 movecount=8 score=10 " + "-repeat " + "-recover " + f"-concurrency {self.threads} " + f"-each tc={self.tc}+{self.inc} " + f"-openings file=tuner/{self.book} " + f"format={self.book.split('.')[-1]} order=random plies=16 " + f"-games {self.games} " + "-pgnout tuner/games.pgn" + ) def run(self, params_a: list[str], params_b: list[str]) -> MatchResult: cmd = self.get_cutechess_cmd(params_a, params_b) cutechess = Popen(cmd.split(), stdout=PIPE) score = [0, 0, 0] - elo_diff = 0 + elo_diff = 0.0 while True: # Read each line of output until the pipe closes line = cutechess.stdout.readline().strip().decode('ascii') - if line != '': + if line: print(line) else: cutechess.wait() - return MatchResult(score[0], score[1], score[2], elo_diff) + return MatchResult(*score, elo_diff) # Parse WLD score if line.startswith("Score of"): start_index = line.find(":") + 1 end_index = line.find("[") split = line[start_index:end_index].split(" - ") - for i in range(3): - score[i] = int(split[i]) + score = [int(i) for i in split] # Parse Elo Difference if line.startswith("Elo difference"): diff --git a/graph.py b/graph.py index eea4c0b..460232f 100644 --- a/graph.py +++ b/graph.py @@ -1,23 +1,21 @@ -import copy from spsa import Param -from matplotlib import pyplot as plt +import matplotlib.pyplot as plt class Graph: def __init__(self): - self.history = [] + self.history: list[list[Param]] = [] def update(self, params: list[Param]): self.history.append(params) def save(self, file_name: str): - iters = list(range(len(self.history))) - param_values = {} + param_values: dict[str, list[float]] = {} for params in self.history: for param in params: if param.name not in param_values: param_values[param.name] = [] param_values[param.name].append(param.value) - for param in param_values: - plt.plot(iters, param_values[param], label=param) + for name, value in param_values.items(): + plt.plot(range(len(self.history)), value, label=name) plt.savefig(f"tuner/{file_name}") diff --git a/main.py b/main.py index bb0404c..872d127 100644 --- a/main.py +++ b/main.py @@ -6,37 +6,22 @@ import copy -def cutechess_from_config(config: str) -> CutechessMan: - with open(config, "r") as config: - config = json.load(config) - return CutechessMan(config["engine"], - config["book"], - config["games"], - config["tc"], - config["hash"], - config["threads"]) - - -def params_from_config(config: str): - with open(config, "r") as config: - params = [] - config = json.load(config) - for name in config: - param = config[name] - params.append(Param( - name, - param["value"], - param["min_value"], - param["max_value"], - param["elo_per_val"] - )) - return params - - -def spsa_from_config(config: str): - with open(config, "r") as config: - config = json.load(config) - return SpsaParams(config["a"], config["c"], config["A"], config["alpha"], config["gamma"], config["elo"]) +def cutechess_from_config(config_path: str) -> CutechessMan: + with open(config_path) as config_file: + config = json.load(config_file) + return CutechessMan(**config) + + +def params_from_config(config_path: str) -> list[Param]: + with open(config_path) as config_file: + config = json.load(config_file) + return [Param(name, **cfg) for name, cfg in config.items()] + + +def spsa_from_config(config_path: str): + with open(config_path) as config_file: + config = json.load(config_file) + return SpsaParams(**config) def main(): @@ -48,10 +33,10 @@ def main(): while True: spsa.step() - graph.update(copy.deepcopy(spsa.params())) + graph.update(copy.deepcopy(spsa.params)) graph.save("graph.png") - for param in spsa.params(): - print(param.pretty()) + for param in spsa.params: + print(param) if __name__ == "__main__": diff --git a/requirements.txt b/requirements.txt index 4b43f7e..1573235 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -matplotlib \ No newline at end of file +matplotlib==3.5.1 \ No newline at end of file diff --git a/spsa.json b/spsa.json index 17ecbb2..346e621 100644 --- a/spsa.json +++ b/spsa.json @@ -4,5 +4,5 @@ "A": 100, "alpha": 0.6, "gamma": 0.1, - "elo": 2 + "target_elo": 2 } \ No newline at end of file diff --git a/spsa.py b/spsa.py index 178e5ec..b36cf6d 100644 --- a/spsa.py +++ b/spsa.py @@ -1,33 +1,36 @@ - - +from cutechess import CutechessMan from dataclasses import dataclass -import random +from random import randint import copy -from cutechess import CutechessMan, MatchResult - +@dataclass class Param: - def __init__(self, name: str, value: int, min_value: int, max_value: int, elo_per_val: float): - assert elo_per_val > 0 - self.name = name - self.value = value - self.min_value = min_value - self.max_value = max_value - self.elo_per_val = elo_per_val + name: str + value: float + min_value: int + max_value: int + elo_per_val: float + + def __post_init__(self): + assert self.elo_per_val > 0 def get(self) -> int: return round(self.value) def update(self, amt: float): - self.value += amt - self.value = min(max(self.value, self.min_value), self.max_value) + self.value = min(max(self.value + amt, self.min_value), self.max_value) + @property def as_uci(self) -> str: return f"option.{self.name}={self.get()}" - def pretty(self) -> str: - return f"{self.name} = {self.get()} in [{self.min_value}, {self.max_value}] with Elo diff {self.elo_per_val}" + def __str__(self) -> str: + return ( + f"{self.name} = {self.get()} in " + f"[{self.min_value}, {self.max_value}] " + f"with Elo diff {self.elo_per_val}" + ) @dataclass @@ -42,7 +45,12 @@ class SpsaParams: class SpsaTuner: - def __init__(self, spsa_params: SpsaParams, uci_params: list[Param], cutechess: CutechessMan): + def __init__( + self, + spsa_params: SpsaParams, + uci_params: list[Param], + cutechess: CutechessMan + ): self.uci_params = uci_params self.spsa = spsa_params self.cutechess = cutechess @@ -50,16 +58,15 @@ def __init__(self, spsa_params: SpsaParams, uci_params: list[Param], cutechess: self.t = 0 def step(self): - a_t = self.spsa.a / (self.t + 1 + self.spsa.A) ** self.spsa.alpha - c_t = self.spsa.c / (self.t + 1) ** self.spsa.gamma self.t += 1 + a_t = self.spsa.a / (self.t + self.spsa.A) ** self.spsa.alpha + c_t = self.spsa.c / self.t ** self.spsa.gamma - for i in range(len(self.delta)): - self.delta[i] = random.randint(0, 1) * 2 - 1 + self.delta = [randint(0, 1) * 2 - 1 for _ in range(len(self.delta))] uci_params_a = [] uci_params_b = [] - for (param, delta) in zip(self.uci_params, self.delta): + for param, delta in zip(self.uci_params, self.delta): curr_delta = self.spsa.target_elo / param.elo_per_val step = delta * curr_delta * c_t @@ -75,15 +82,16 @@ def step(self): gradient = self.gradient(uci_params_a, uci_params_b) - for (param, delta) in zip(self.uci_params, self.delta): + for param, delta in zip(self.uci_params, self.delta): param_grad = gradient / (delta * c_t) param.update(-param_grad * a_t) + @property def params(self) -> list[Param]: return self.uci_params def gradient(self, params_a: list[Param], params_b: list[Param]) -> float: - params_a = [p.as_uci() for p in params_a] - params_b = [p.as_uci() for p in params_b] - game_result: MatchResult = self.cutechess.run(params_a, params_b) + str_params_a = [p.as_uci for p in params_a] + str_params_b = [p.as_uci for p in params_b] + game_result = self.cutechess.run(str_params_a, str_params_b) return -game_result.elo_diff