-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDoubleDQN.py
36 lines (32 loc) · 1.94 KB
/
DoubleDQN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import tensorflow as tf
class Qnetwork():
def __init__(self, s_size, a_size):
self.a_size = a_size
self.s_size = s_size
#self.layer_name = layer_name
self.structure()
def structure(self):
init = tf.glorot_normal_initializer()
self.input = tf.placeholder(dtype=tf.float32, shape=[None,self.s_size])
self.inputt = tf.truediv(self.input,[[180.0,180.0]])
self.W_1 = tf.get_variable(shape=[self.s_size,10], dtype=tf.float32, name='w1', initializer=init)
self.b_1 = tf.get_variable(shape=[10], dtype=tf.float32, name='b1')
h_1 = tf.nn.relu(tf.matmul(self.inputt, self.W_1) + self.b_1)
self.h_1_drop = tf.nn.dropout(h_1, keep_prob=0.75)
self.W_2 = tf.get_variable(shape=[10,6], dtype=tf.float32, name='w2', initializer=init)
self.b_2 = tf.get_variable(shape=[6], dtype=tf.float32, name='b2')
h_2 = tf.nn.relu(tf.matmul(h_1, self.W_2) + self.b_2)
self.h_2_drop = tf.nn.dropout(h_2, keep_prob=0.75)
self.W_3 = tf.get_variable(shape=[6, self.a_size], dtype=tf.float32, name='w3', initializer=init)
self.action = tf.matmul(self.h_2_drop, self.W_3) # Q value for each action
self.predict = tf.argmax(input=self.action, axis=1)
self.target_y = tf.placeholder(dtype=tf.float32, shape=[None])
self.a = tf.placeholder(dtype=tf.int32, shape=[None])
self.predict_onehot = tf.one_hot(indices=self.a, depth=4, on_value=1, off_value=0)
self.floatpre = tf.cast(self.predict_onehot, tf.float32)
#self.Q = tf.matmul(self.floatpre,self.action)
self.Q = tf.reduce_sum(tf.multiply(self.floatpre,self.action), axis=1) # Targetnet's Q value in batch
self.error = tf.square(self.target_y - self.Q)
self.loss = tf.reduce_mean(self.error)
self.optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
self.update = self.optimizer.minimize(self.loss)