-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathnumpy_impl.py
103 lines (83 loc) · 2.9 KB
/
numpy_impl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import collections
import numpy as np
import math
class UCTNode():
def __init__(self, game_state, move, parent=None):
self.game_state = game_state
self.move = move
self.is_expanded = False
self.parent = parent # Optional[UCTNode]
self.children = {} # Dict[move, UCTNode]
self.child_priors = np.zeros([362], dtype=np.float32)
self.child_total_value = np.zeros([362], dtype=np.float32)
self.child_number_visits = np.zeros([362], dtype=np.float32)
@property
def number_visits(self):
return self.parent.child_number_visits[self.move]
@number_visits.setter
def number_visits(self, value):
self.parent.child_number_visits[self.move] = value
@property
def total_value(self):
return self.parent.child_total_value[self.move]
@total_value.setter
def total_value(self, value):
self.parent.child_total_value[self.move] = value
def child_Q(self):
return self.child_total_value / (1 + self.child_number_visits)
def child_U(self):
return math.sqrt(self.number_visits) * (
self.child_priors / (1 + self.child_number_visits))
def best_child(self):
return np.argmax(self.child_Q() + self.child_U())
def select_leaf(self):
current = self
while current.is_expanded:
best_move = current.best_child()
current = current.maybe_add_child(best_move)
return current
def expand(self, child_priors):
self.is_expanded = True
self.child_priors = child_priors
def maybe_add_child(self, move):
if move not in self.children:
self.children[move] = UCTNode(
self.game_state.play(move), move, parent=self)
return self.children[move]
def backup(self, value_estimate: float):
current = self
while current.parent is not None:
current.number_visits += 1
current.total_value += (value_estimate *
self.game_state.to_play)
current = current.parent
class DummyNode(object):
def __init__(self):
self.parent = None
self.child_total_value = collections.defaultdict(float)
self.child_number_visits = collections.defaultdict(float)
def UCT_search(game_state, num_reads):
root = UCTNode(game_state, move=None, parent=DummyNode())
for _ in range(num_reads):
leaf = root.select_leaf()
child_priors, value_estimate = NeuralNet.evaluate(leaf.game_state)
leaf.expand(child_priors)
leaf.backup(value_estimate)
return np.argmax(root.child_number_visits)
class NeuralNet():
@classmethod
def evaluate(self, game_state):
return np.random.random([362]), np.random.random()
class GameState():
def __init__(self, to_play=1):
self.to_play = to_play
def play(self, move):
return GameState(-self.to_play)
num_reads = 10000
import time
tick = time.time()
UCT_search(GameState(), num_reads)
tock = time.time()
print("Took %s sec to run %s times" % (tock - tick, num_reads))
import resource
print("Consumed %sB memory" % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)