Merge pull request #146 from dee0512/master

Breakout RL experiments
BindsNET · Oct 28, 2018 · da730bc · da730bc
2 parents 2cc97a8 + a3942d8
commit da730bc
Show file tree

Hide file tree

Showing 4 changed files with 79 additions and 11 deletions.
diff --git a/bindsnet/pipeline/__init__.py b/bindsnet/pipeline/__init__.py
@@ -79,7 +79,6 @@ def __init__(self, network: Network, environment: Environment, encoding: Callabl
             for l in self.network.layers:
                 self.network.add_monitor(Monitor(self.network.layers[l], 's', self.plot_interval * self.time),
                                          name=f'{l}_spikes')
-
                 if 'v' in self.network.layers[l].__dict__:
                     self.network.add_monitor(Monitor(self.network.layers[l], 'v', self.plot_interval * self.time),
                                              name=f'{l}_voltages')

diff --git a/bindsnet/pipeline/action.py b/bindsnet/pipeline/action.py
@@ -60,11 +60,12 @@ def select_softmax(pipeline: Pipeline, **kwargs) -> int:
     assert pipeline.network.layers[output].n == pipeline.env.action_space.n, \
         'Output layer size not equal to size of action space.'
 
+    assert hasattr(pipeline, 'spike_record'), 'Pipeline has not attribute named: spike_record.'
+
     # Sum of previous iterations' spikes (Not yet implemented)
-    spikes = pipeline.network.layers[output].s
+    spikes = torch.sum(pipeline.spike_record[output], dim=1)
     _sum = torch.sum(torch.exp(spikes.float()))
 
-    # Choose action based on readout neuron spiking
     if _sum == 0:
         action = np.random.choice(pipeline.env.action_space.n)
     else:

diff --git a/examples/breakout/breakout.py b/examples/breakout/breakout.py
@@ -17,23 +17,23 @@
 out = LIFNodes(n=4, refrac=0, traces=True)
 
 # Connections between layers.
-inpt_middle = Connection(source=inpt, target=middle, wmax=1e-2)
-middle_out = Connection(source=middle, target=out, wmax=1e-1, nu=2e-2)
+inpt_middle = Connection(source=inpt, target=middle, wmin=0, wmax=1e-1)
+middle_out = Connection(source=middle, target=out, wmin=0, wmax=1)
 
 # Add all layers and connections to the network.
-network.add_layer(inpt, name='X')
-network.add_layer(middle, name='Y')
-network.add_layer(out, name='Z')
-network.add_connection(inpt_middle, source='X', target='Y')
-network.add_connection(middle_out, source='Y', target='Z')
+network.add_layer(inpt, name='Input Layer')
+network.add_layer(middle, name='Hidden Layer')
+network.add_layer(out, name='Output Layer')
+network.add_connection(inpt_middle, source='Input Layer', target='Hidden Layer')
+network.add_connection(middle_out, source='Hidden Layer', target='Output Layer')
 
 # Load SpaceInvaders environment.
 environment = GymEnvironment('BreakoutDeterministic-v4')
 environment.reset()
 
 # Build pipeline from specified components.
 pipeline = Pipeline(network, environment, encoding=bernoulli,
-                    action_function=select_softmax, output='Z',
+                    action_function=select_softmax, output='Output Layer',
                     time=100, history_length=1, delta=1,
                     plot_interval=1, render_interval=1)
 

diff --git a/examples/breakout/breakout_stdp.py b/examples/breakout/breakout_stdp.py
@@ -0,0 +1,68 @@
+import torch
+
+from bindsnet.network import Network
+from bindsnet.pipeline import Pipeline
+from bindsnet.learning import MSTDP
+from bindsnet.encoding import bernoulli
+from bindsnet.network.topology import Connection
+from bindsnet.environment import GymEnvironment
+from bindsnet.network.nodes import Input, LIFNodes
+from bindsnet.pipeline.action import select_softmax
+
+# Build network.
+network = Network(dt=1.0)
+
+# Layers of neurons.
+inpt = Input(n=80 * 80, shape=[80, 80], traces=True)
+middle = LIFNodes(n=100, traces=True)
+out = LIFNodes(n=4, refrac=0, traces=True)
+
+# Connections between layers.
+inpt_middle = Connection(source=inpt, target=middle, wmin=0, wmax=1e-1)
+middle_out = Connection(source=middle, target=out, wmin=0, wmax=1, update_rule=MSTDP, nu=1e-1, norm=0.5 * middle.n)
+
+# Add all layers and connections to the network.
+network.add_layer(inpt, name='Input Layer')
+network.add_layer(middle, name='Hidden Layer')
+network.add_layer(out, name='Output Layer')
+network.add_connection(inpt_middle, source='Input Layer', target='Hidden Layer')
+network.add_connection(middle_out, source='Hidden Layer', target='Output Layer')
+
+# Load SpaceInvaders environment.
+environment = GymEnvironment('BreakoutDeterministic-v4')
+environment.reset()
+
+# Build pipeline from specified components.
+pipeline = Pipeline(network, environment, encoding=bernoulli,
+                    action_function=select_softmax, output='Output Layer',
+                    time=100, history_length=1, delta=1,
+                    plot_interval=1, render_interval=1)
+
+
+# Train agent for 100 episodes.
+print("Training: ")
+for i in range(100):
+    pipeline.reset_()
+    # initialize episode reward
+    reward = 0
+    while True:
+        pipeline.step()
+        reward += pipeline.reward
+        if pipeline.done:
+            break
+    print("Episode " + str(i) + " reward:", reward)
+
+# stop MSTDP
+pipeline.network.learning = False
+
+print("Testing: ")
+for i in range(100):
+    pipeline.reset_()
+    # initialize episode reward
+    reward = 0
+    while True:
+        pipeline.step()
+        reward += pipeline.reward
+        if pipeline.done:
+            break
+    print("Episode " + str(i) + " reward:", reward)