-
Notifications
You must be signed in to change notification settings - Fork 0
/
Agent.py
122 lines (100 loc) · 4.43 KB
/
Agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import matplotlib
from matplotlib.patches import Circle
import matplotlib.pyplot as plt
import math
import random
import pickle
class Agent():
def __init__(self, ax):
self.circle = Circle((ax.get_xlim()[1] / 2, ax.get_ylim()[1] / 2), 0.025, color='blue')
self.center = self.circle.center
self.numEyes = 5
self.viewDist = 0.2
self.viewAngle = 60.0 / (self.numEyes - 1)
self.eyes = []
for i in range(self.numEyes):
eyeAngle = (-30.0 + i * self.viewAngle) * math.pi / 180
self.eyes.append([self.center[0],
self.center[0] + math.sin(eyeAngle) * self.viewDist,
self.center[1],
self.center[1] + math.cos(eyeAngle) * self.viewDist])
self.axis = ax
self.angle = math.pi / 2
self.axis.add_artist(self.circle)
self.eyesPlot = []
for i in range(self.numEyes):
temp1, = self.axis.plot(self.eyes[i][:2], self.eyes[i][2:], color='black')
self.eyesPlot.append(temp1)
self.epsilon = 0.1 # exploration rate [0, 1] (higher means more random actions)
self.alpha = 0.2 # learning rate (0, 1] (higher means it forgets old info quicker)
self.gamma = 0.7 # Greediness [0,1] (lower means cares more about immediate rewards)
self.q = {}
self.actions = [0, 1, 2, 3, 4] #forward, turn left little, turn right little, turn left more, turn right more
def move(self, dist):
# Move agent in direction of center eye by specified distance
distx = -math.sin(self.angle - math.pi / 2) * dist
disty = math.cos(self.angle - math.pi / 2) * dist
self.center = self.center[0] + distx, self.center[1] + disty
self.circle.center = self.center
for i in range(self.numEyes):
self.eyes[i] = [self.center[0], self.eyes[i][1]+distx, self.center[1], self.eyes[i][3] + disty]
self.eyesPlot[i].set_xdata(self.eyes[i][:2])
self.eyesPlot[i].set_ydata(self.eyes[i][2:])
def turn(self, angle):
# Turn agent in place by specified angle
angle = angle * math.pi / 180
for i in range(self.numEyes):
newx = (self.eyes[i][1] - self.eyes[i][0]) * math.cos(angle) - (self.eyes[i][3] - self.eyes[i][2]) * math.sin(angle) + self.eyes[i][0]
newy = (self.eyes[i][1] - self.eyes[i][0]) * math.sin(angle) + (self.eyes[i][3] - self.eyes[i][2]) * math.cos(angle) + self.eyes[i][2]
self.eyes[i] = [self.eyes[i][0], newx, self.eyes[i][2], newy]
self.eyesPlot[i].set_xdata(self.eyes[i][:2])
self.eyesPlot[i].set_ydata(self.eyes[i][2:])
self.angle = (self.angle + angle) % (2 * math.pi)
def atEdge(self):
# Check if any eyes see far enough beyond and edge that the agent is at the edge
for i in range(self.numEyes):
if not (-self.viewDist + self.circle.radius * 2 < self.eyes[i][1] < self.axis.get_xlim()[1] + self.viewDist - self.circle.radius * 2 ) or \
not (-self.viewDist + self.circle.radius * 2 < self.eyes[i][3] < self.axis.get_ylim()[1] + self.viewDist - self.circle.radius * 2):
return True
return False
def nearEdge(self):
# Check if any eyes see any distance beyond an edge
for i in range(self.numEyes):
if not (0 < self.eyes[i][1] < self.axis.get_xlim()[1]) or not (-0 < self.eyes[i][3] < self.axis.get_ylim()[1]):
return True
return False
def getQ(self, state, action):
return self.q.get((state, action), 0.0)
def learnQ(self, state, action, reward, value):
# Set reward for state/action or update existing reward
oldv = self.q.get((state, action), None)
if oldv is None:
self.q[(state, action)] = reward
else:
self.q[(state, action)] = oldv + self.alpha * (value - oldv)
def chooseAction(self, state):
# Choose random action based on exploration rate (epsilon)
# or choose best action based on potential rewards
if random.random() < self.epsilon:
action = random.choice(self.actions)
else:
q = [self.getQ(state, a) for a in self.actions]
maxQ = max(q)
count = q.count(maxQ)
if count > 1:
best = [i for i in range(len(self.actions)) if q[i] == maxQ]
i = random.choice(best)
else:
i = q.index(maxQ)
action = self.actions[i]
return action
def learn(self, state1, action1, reward, state2):
# Learn based on looking in the future for potential rewards
maxqnew = max([self.getQ(state2, a) for a in self.actions])
self.learnQ(state1, action1, reward, reward + self.gamma * maxqnew)
def saveQ(self, filename):
with open(filename, 'wb') as f:
pickle.dump(self.q, f)
def loadQ(self, filename):
with open(filename, 'rb') as f:
self.q = pickle.load(f)