-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathseeker_trees.py
279 lines (222 loc) · 11.3 KB
/
seeker_trees.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
from cyberchase import Seeker, Hider, Match, Board
from typing import List, Tuple
import numpy as np
from scipy.special import softmax
import time
class Seeker_trees(Seeker):
def __init__(self):
self.MAX_SQUARES = 80
self.PROB_CUTOFF = 1e-3
self.impatience = 0
self.MAX_IMPATIENCE = 2
self.N_MOVES_KEPT = 18
self.prev_moves = np.array([(2,2)] * self.N_MOVES_KEPT)
self.PREV_WEIGHT = 12.
self.PREV_DROPOFF = 2.0
self.CAMP_TIMER = 260
self.CAMP_TIMER_SEEK = 50
self.hider_known = True
self.hider_location = np.array((27,27))
self.time_since_seen = np.zeros((30,30),dtype=int)
self.passable = None
self.hider_probabilities = None
self.cum_hider_probabilities = None
#self.passable_nonvisible = None
self.i = -1
self.hider_possible_moves = np.array([[ 0, -1, 0, 1, -2, -1, 0, 1, 2, -3, -2, -1, 0, 1, 2, 3,
-4, -3, -2, -1, 0, 1, 2, 3, 4, -3, -2, -1, 0, 1, 2, 3,
-2, -1, 0, 1, 2, -1, 0, 1, 0],
[-4, -3, -3, -3, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 3, 3, 3, 4]])
def get_action_from_state(self,
board_state: List[List[int]],
visible_squares: List[Tuple[int, int]],
valid_moves: List[Tuple[int, int]]) -> Tuple[int, int]:
board_state = np.array(board_state)
visible_squares = np.array(visible_squares)
valid_moves = np.array(valid_moves)
#Get our position from the board state
my_pos = np.array(np.unravel_index(np.argmax((board_state.ravel() == Board.PLAYER_1)), (30,30))).ravel()
#Update which squares we can see
self.time_since_seen += 1
self.time_since_seen[visible_squares[:,0],visible_squares[:,1]] = 0
# Keep track of turn number in case we want it
self.i += 1
self.hider_location = self._get_hider_loc(board_state, visible_squares)
# Figure out where we want to go
if self.passable is None:
# Only runs on first turn
self.passable = board_state != 3
self.init_hider_probabilities()
else:
# Diffuse probabilities
self.update_hider_probabilities(board_state, visible_squares, self.MAX_SQUARES)
#Find the best location, move towards it
if self.hider_location is None:
best_loc = self._get_best_target_loc()
else:
best_loc = self.hider_location
if self.hider_location is not None and np.min(np.sum(np.abs(valid_moves - self.hider_location.reshape(1,-1)), axis=1)) < 0.5:
# i.e. if we can move to the hider
best_move = self.hider_location
else:
best_move = self._get_best_move(my_pos, best_loc, valid_moves)
#update some things before returning
self.prev_moves[self.i % self.N_MOVES_KEPT,:] = my_pos
print(my_pos, best_move, self.hider_location)
return tuple(best_move)
def _get_best_target_loc(self):
time_weights = 1 + np.sqrt(self.time_since_seen)
if self.i < self.CAMP_TIMER:
value = self.hider_probabilities * time_weights
else:
value = self.cum_hider_probabilities * time_weights
mask = self.time_since_seen > self.CAMP_TIMER_SEEK
if np.any(mask):
time_weights[mask] += 0.05 * (self.time_since_seen[mask] - self.CAMP_TIMER_SEEK)**2
best_loc_unraveled = np.argmax(value.ravel())
return np.array(np.unravel_index(best_loc_unraveled, (30,30)))
def init_hider_probabilities(self):
self.hider_probabilities = np.zeros_like(self.passable, dtype=float)
self.hider_probabilities[27, 27] = 1.
self.cum_hider_probabilities = np.zeros_like(self.hider_probabilities)
def reinit_hider_probabilities_random(self, n):
self.hider_probabilities[:,:] = 0.
for _ in range(n):
x,y = np.random.randint(30,size=2)
if self.passable[x,y]:
self.hider_probabilities[x,y] = 1.
break
self.cum_hider_probabilities = np.copy(self.hider_probabilities)
def update_hider_probabilities(self, board_state, visible_squares, max_squares):
# Check if the hider is visible
if self.hider_location is not None:
#Reset probabilities and time-since-seen
self.hider_known = True
self.hider_probabilities[:,:] = 0.
self.hider_probabilities[self.hider_location[0], self.hider_location[1]] = 1.
self.time_since_seen[:,:] = 0
self.cum_hider_probabilities = np.copy(self.hider_probabilities)
else:
self.hider_known = False
# Otherwise, update probabilities
# Find all possibly valid squares
possible_squares = np.copy(self.passable)
possible_squares[visible_squares[:,0],visible_squares[:,1]] = False
pass_padded = np.full((38,38), False)
pass_padded[4:-4,4:-4] = possible_squares
#Initialize array
new_probs = np.zeros((30, 30))
# We can't do all of the squares because that takes too long
update_mask = possible_squares * (self.hider_probabilities > self.PROB_CUTOFF)
total_count = np.sum(update_mask)
if total_count <= max_squares:
#print(self.i,"Using all")
coords = np.array(np.where(update_mask)).T
else:
#print(self.i,"Using sample")
#Take only a subset of them
coords_unraveled = np.random.choice(900, size=int(max_squares*1.1))
coords = np.array(np.unravel_index(coords_unraveled, (30,30))).T
mask = update_mask[coords[:,0], coords[:,1]]
coords = coords[mask,:]
#Update
for x,y in coords:
X = x+self.hider_possible_moves[0]
Y = y+self.hider_possible_moves[1]
#Check which are valid; this will exclude walls and also things that are out of bounds
c_mask = pass_padded[X+4,Y+4]
#k = np.sum(self.passable[X[c_mask],Y[c_mask]])
k = np.sum(c_mask)
new_probs[X[c_mask],Y[c_mask]] += self.hider_probabilities[x,y]/k
# Preserve old probabilities if needed
if total_count > max_squares:
new_probs += self.hider_probabilities * (total_count / max_squares) * update_mask
if np.sum(new_probs) > 0:
# Normalize
new_probs /= np.sum(new_probs)
self.hider_probabilities = new_probs
else:
#Something went wrong
self.reinit_hider_probabilities_random(3)
#Update cum probabilities
self.cum_hider_probabilities += self.hider_probabilities
self.cum_hider_probabilities[visible_squares[:,0],visible_squares[:,1]] = 0.
def _get_hider_loc(self, board_state, visible_squares) -> Tuple:
"""Vectorized version of function from scotland_yard.py.
Returns None if hider is not in sight, otherwise returns hider's coordinate"""
player2 = board_state[visible_squares[:,0],visible_squares[:,1]] == Board.PLAYER_2
if np.any(player2):
return visible_squares[np.argmax(player2)]
else:
return None
def convert_id_to_cord(self, i):
return i // 30, i % 30
def convert_coord_to_id(self, y_location, x_location):
return y_location * 30 + x_location
def convert_to_index(self, i, j):
return i * 30 + j
def _get_best_move(self, start, end, dummy):
unexplored_areas = {i for i in range(30*30)}
# check if end points are valid
if not(self.passable[start[0],start[1]]) or not(self.passable[end[0],end[1]]):
return -1
def check_if_valid(i, j):
id = self.convert_to_index(i, j)
if i > 0 and i < 30 and j > 0 and j < 30:
if id in unexplored_areas and self.passable[i,j]:
return 1
return 0
possible_moves = [start]
lengths_to_goal = [ np.linalg.norm(start - end) ]
paths_used = [[start]]
step_size = 1
def look_around(i, j, paths_used_reserved):
if i > 0:
if check_if_valid(i - step_size, j):
possible_moves.append(np.array([i - step_size, j]))
lengths_to_goal.append( np.linalg.norm(np.array([i - step_size, j]) - end) )
paths_used.append( paths_used_reserved + [possible_moves[-1]] )
if i < 30:
if check_if_valid(i + step_size, j):
possible_moves.append(np.array([i + step_size, j]))
lengths_to_goal.append(np.linalg.norm(np.array([i + step_size, j]) - end))
paths_used.append( paths_used_reserved + [possible_moves[-1]] )
if j > 0:
if check_if_valid(i, j - step_size):
possible_moves.append(np.array([i, j - step_size]))
lengths_to_goal.append(np.linalg.norm(np.array([i, j - step_size]) - end))
paths_used.append( paths_used_reserved + [possible_moves[-1]] )
if j < 30:
if check_if_valid(i, j + step_size):
possible_moves.append(np.array([i, j + step_size]))
lengths_to_goal.append(np.linalg.norm(np.array([i, j + step_size]) - end))
paths_used.append( paths_used_reserved + [possible_moves[-1]] )
# return possible_moves, lengths_to_goal
iteration = 0
start_time = time.time()
while len(possible_moves) > 0:
inverse_lengths = (1 / np.array(lengths_to_goal))**2
probability_moves = softmax(inverse_lengths)
if (time.time() - start_time) > 0.030:
best_path = np.argmin(lengths_to_goal)
if len(paths_used[best_path]) > 5:
return paths_used[best_path][4]
return paths_used[best_path][-1]
if np.isnan(probability_moves).any():
best_path = np.where(probability_moves != probability_moves)[0][0]
if len(paths_used[best_path]) > 5:
return paths_used[best_path][4]
return paths_used[best_path][-1]
path_to_explore = np.random.choice( len(probability_moves), 1, p=probability_moves )[0]
paths_used_reserved = paths_used[path_to_explore]
look_around(*possible_moves[path_to_explore], paths_used_reserved)
try:
unexplored_areas.remove(self.convert_to_index(*possible_moves[path_to_explore]))
except:
pass
del possible_moves[path_to_explore]
del lengths_to_goal[path_to_explore]
del paths_used[path_to_explore]
iteration += 1