Skip to content

Commit

Permalink
Added visualizations using W&B
Browse files Browse the repository at this point in the history
  • Loading branch information
afzalxo committed Oct 13, 2019
1 parent 64adb6a commit ec70876
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 18 deletions.
17 changes: 15 additions & 2 deletions bnn_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,28 @@ def sign_binarize(inp):
def bin_dense_layer(in_act, num_out, training=True, name='Bin_Dense_L'):
with tf.variable_scope(name+'_params', reuse=False):
l_w = tf.get_variable('weight', [in_act.shape[1], num_out], initializer = tf.random_uniform_initializer(-1, 1), constraint=lambda w: tf.clip_by_value(w, -1., 1.), trainable=True)
#l_w1 = tf.get_variable('weight1', [in_act.shape[1], num_out], initializer = tf.random_uniform_initializer(-1, 1), constraint=lambda w: tf.clip_by_value(w, -1., 1.), trainable=True)
l_b = tf.get_variable('bias', [num_out], initializer=tf.zeros_initializer(), trainable=True)
l_mod = tf.get_variable('modulo', [1], initializer=tf.random_uniform_initializer(-1, 1), constraint=lambda w: w, dtype=tf.float32, trainable=True)
#l_mod = tf.get_variable('modulo', [1], initializer=tf.random_uniform_initializer(-1, 1), constraint=lambda w: w, dtype=tf.float32, trainable=True)
#l_mod1 = tf.get_variable('modulo1', [1], initializer=tf.random_uniform_initializer(-1, 1), constraint=lambda w: w, dtype=tf.float32, trainable=True)

tf.add_to_collection(name+'_w', l_w)
layer_names.append(name)
bin_w = sign_binarize(l_w)
res = tf.matmul(in_act, bin_w)
res = (tf.nn.bias_add(res, sign_binarize(l_b)))
#res = tf.floormod(res, l_mod)
res = tf.floormod(res, 200.1) + tf.floormod(0.98, l_mod)
#res = l_mod1*tf.math.sin(res/l_mod)
#res = tf.floormod(res, 0.21) #+ tf.floormod(0.98, l_mod)
return res

def dense_layer(in_act, num_out, training=True, name='dense_l'):
with tf.variable_scope(name + '_params', reuse=False):
l_w = tf.get_variable('weight', [in_act.shape[1], num_out], initializer=tf.random_uniform_initializer(-1., 1.), trainable=True)
l_w1 = tf.get_variable('weight1', [num_out, num_out], initializer=tf.random_uniform_initializer(-1., 1.), trainable=True)
#res = tf.matmul(tf.math.square(in_act), l_w1) + tf.matmul(in_act, l_w)
res = tf.math.sin(tf.matmul(2*np.pi*in_act, l_w)) #*(tf.matmul(in_act, l_w1))
res = tf.matmul(res, l_w1)
return res

def compute_gradients(loss, optimizer):
Expand Down
25 changes: 25 additions & 0 deletions bnn_mlp.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from bnn_misc import bin_dense_layer
from bnn_misc import dense_layer
import tensorflow as tf
from bnn_misc import sign_binarize


def binn_mlp_mnist(inp, training=True):
alpha = .9
epsilon = 1e-4
Expand Down Expand Up @@ -29,3 +31,26 @@ def binn_mlp_mnist(inp, training=True):
return l3_bn


def mlp_mnist(inp, training=True):
alpha = .9
epsilon = 1e-4
inp = tf.nn.dropout(inp, rate=.2)
inp = tf.contrib.layers.flatten(inp)
l0= dense_layer(inp, 4096, training=training, name='dense_l0')
l0_bn = tf.layers.batch_normalization(l0, training=training, momentum=alpha, epsilon=epsilon)
l0_htanh = l0_bn#tf.nn.relu(l0_bn)

l1= dense_layer(l0_htanh, 4096, training=training, name='dense_l1')
l1_bn = tf.layers.batch_normalization(l1, training=training, momentum=alpha, epsilon=epsilon)
l1_htanh = l1_bn#tf.nn.relu(l1_bn)

l2= dense_layer(l1_htanh, 4096, training=training, name='dense_l2')
l2_bn = tf.layers.batch_normalization(l2, training=training, momentum=alpha, epsilon=epsilon)
l2_htanh = l2_bn#tf.nn.relu(l2_bn)

l3= dense_layer(l2_htanh, 10, training=training, name='dense_l3')
l3_bn = tf.layers.batch_normalization(l3, training=training, momentum=alpha, epsilon=epsilon)
#l3_bn = tf.nn.relu(l3_bn)
#l3_bn = tf.nn.softmax(l3_bn)

return l3_bn
101 changes: 85 additions & 16 deletions main_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@
import math
from tensorflow.examples.tutorials.mnist import input_data
from bnn_mlp import binn_mlp_mnist
from bnn_mlp import mlp_mnist
from bnn_misc import compute_gradients
import matplotlib.pyplot as plt
import wandb
import plotly

wandb.init(project='modulo_nn', sync_tensorboard=True)

def one_hot_labels(labels, dimension=10):
res = np.zeros((labels.shape[0], dimension))
Expand Down Expand Up @@ -37,13 +42,24 @@ def shuffle(X,y):

return X,y

def train_epoch(inp, y, grad_m, training, acc, lo, X, lab, sess, train_bn_step, train_mod_step, batch_size=100):
def train_epoch(inp, y, res, res1, grad_w, training, acc, lo, X, lab, sess, train_bn_step, batch_size=100):
batches = int(len(X)/batch_size)
for i in range(batches):
hist0 = sess.run([acc, lo, grad_m, train_bn_step, train_mod_step], feed_dict={inp:X[i*batch_size:(i+1)*batch_size], y: lab[i*batch_size:(i+1)*batch_size], training:True})
hist0 = sess.run([acc, lo, res, y, res1, grad_w, train_bn_step], feed_dict={inp:X[i*batch_size:(i+1)*batch_size], y: lab[i*batch_size:(i+1)*batch_size], training:True})
print("Train accuracy: %f, Loss: %f" % (hist0[0], hist0[1]))
wandb.log({'Training Accuracy': hist0[0], 'Training Loss': hist0[1]})
#print(hist0[4][0,:])
#print(hist0[2][0,:])
#print(hist0[3][0,:])
#print(hist0[2][0][0:10][0:10])
#for i in grad_w:
# print(i[1].name)
#for grad, v in hist0[2]:
# print(grad[1000:1002], v[1000:1002])
# print(grad, v)
#if(np.isnan(np.sum(grad)) or np.isnan(np.sum(v))):
# print("Is NAN true")
# print(grad[1000:1002], v[1000:1002])


def main():
batch_size = 100
Expand All @@ -52,7 +68,7 @@ def main():
n_output = 10
drop_in = 0.2
drop_hidden = 0.5
epochs = 10
epochs = 1
learning_rate_start = 3e-2
learning_rate_end = 3e-5
learning_rate_decay = (learning_rate_end/learning_rate_start)**(1./epochs)
Expand All @@ -68,7 +84,15 @@ def main():
mnist_data.train.labels[i] = mnist_data.train.labels[i] * 2 - 1
for i in range(mnist_data.test.labels.shape[0]):
mnist_data.test.labels[i] = mnist_data.test.labels[i] * 2 - 1


#Initializing WandB
wandb.config.batch_size = batch_size
wandb.config.epochs = epochs
wandb.config.n_hidden = 4096
wandb.config.learning_rate_start = learning_rate_start
wandb.config.learning_rate_end = learning_rate_end
wandb.config.learning_rate_decay = learning_rate_decay

inp = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_output])
training = tf.placeholder(tf.bool)
Expand All @@ -77,9 +101,10 @@ def main():
learning_rate = tf.train.exponential_decay(learning_rate_start, global_step = g_step_kern, decay_steps = int(mnist_data.train.images.shape[0]/batch_size), decay_rate=learning_rate_decay)
lr_mod = tf.train.exponential_decay(lr_mod_start, global_step = g_step_mod, decay_steps = int(mnist_data.train.images.shape[0]/batch_size), decay_rate=lr_mod_decay)
res = binn_mlp_mnist(inp, training=training)
cross_entropy = tf.square(tf.maximum(0., 1.-y*res))
gg= tf.maximum(0., 1.-y*res)
cross_entropy = tf.square(gg)
loss = tf.reduce_mean(cross_entropy)
all_trainable_vars = [var for var in tf.trainable_variables() if not var.name.endswith('modulo:0')]
all_trainable_vars = [var for var in tf.trainable_variables()] # if not var.name.endswith('modulo:0')]
train_mod_vars = [var for var in tf.trainable_variables() if var.name.endswith('modulo:0')]
print("--All Trainable Vars------------------------>>>>>>>")
print(all_trainable_vars)
Expand All @@ -94,11 +119,11 @@ def main():
print("--End Update Ops---------------------------->>>>>>>")
with tf.control_dependencies(update_operations):
optimizer = tf.train.AdamOptimizer(learning_rate)
optimizer_mod = tf.train.AdamOptimizer(lr_mod)
#optimizer_mod = tf.train.AdamOptimizer(lr_mod)
grad_w = optimizer.compute_gradients(loss = loss, var_list = all_trainable_vars)
train_bn_step = optimizer.apply_gradients(grad_w, global_step = g_step_kern)
grad_m = optimizer_mod.compute_gradients(loss = loss, var_list = train_mod_vars)
train_mod_step = optimizer_mod.apply_gradients(grad_m, global_step = g_step_mod)
#grad_m = optimizer_mod.compute_gradients(loss = loss, var_list = train_mod_vars)
#train_mod_step = optimizer_mod.apply_gradients(grad_m, global_step = g_step_mod)
#train_bn_step = optimizer.minimize(loss = loss, var_list=all_trainable_vars, global_step=g_step_kern)
correct_pred = tf.equal(tf.argmax(res, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
Expand All @@ -113,7 +138,7 @@ def main():
X_train, y_train = shuffle(mnist_data.train.images, mnist_data.train.labels)
t_start = time.time()
for i in range(epochs):
train_epoch(inp, y, grad_m, training, accuracy, loss, X_train, y_train, sess, train_bn_step, train_mod_step, batch_size)
train_epoch(inp, y, gg, res, grad_w, training, accuracy, loss, X_train, y_train, sess, train_bn_step, batch_size)
X_train, y_train = shuffle(mnist_data.train.images, mnist_data.train.labels)

hist = sess.run([accuracy, loss],
Expand All @@ -123,17 +148,61 @@ def main():
training: False
})
print("Epoch %d, Test Acc: %f, Loss %f, Current Best Acc: %f" % (i, hist[0], hist[1], old_acc))
wandb.log({"Test Accuracy": hist[0], "Test Loss": hist[1]})
if hist[0] > old_acc:
net_params = sess.run(tf.global_variables())
net_params = net_params[2:]
np.savez('bnn_mnist_10ep.npz', l0_w=net_params[0], l0_b=net_params[1], l0_gamma=net_params[2], l0_beta=net_params[3], l0_mean=net_params[4], l0_variance=net_params[5], l1_w=net_params[6], l1_b=net_params[7], l1_gamma=net_params[8], l1_beta = net_params[9], l1_mean=net_params[10], l1_variance=net_params[11], l2_w=net_params[12],l2_b=net_params[13], l2_gamma=net_params[14], l2_beta=net_params[15], l2_mean = net_params[16], l2_variance=net_params[17],l3_w=net_params[18],l3_b=net_params[19], l3_gamma=net_params[20], l3_beta=net_params[21], l3_mean = net_params[22], l3_variance=net_params[23])
old_acc = hist[0]
store_epoch = i
save_path = saver.save(sess, "./binn_model/model.ckpt")
t_end = time.time()
np.set_printoptions(edgeitems=500)
reto = sess.run(res, feed_dict={inp: mnist_data.test.images[0:2], training:False})
print(reto)

net_params = sess.run(tf.global_variables())
net_params = net_params[2:]
plt.hist(net_params[0].flatten(), bins=64)
plt.xlabel('Value Bin')
plt.ylabel('Number of Occurances')
wandb.log({"Flattened L0_w histogram with 64 Bins, Matrix Size: 784x4096": plt})
plt.clf()
plt.hist(net_params[1].flatten(), bins=64)
plt.xlabel('Value Bin')
plt.ylabel('Number of Occurances')
wandb.log({"Flattened L0_b histogram with 64 Bins, Vector Size: 4096": plt})
plt.clf()
plt.hist(net_params[6].flatten(), bins=64)
plt.xlabel('Value Bin')
plt.ylabel('Number of Occurances')
wandb.log({"Flattened L1_w histogram with 64 Bins, Matrix Size: 4096x4096": plt})
plt.clf()
plt.hist(net_params[7].flatten(), bins=64)
plt.xlabel('Value Bin')
plt.ylabel('Number of Occurances')
wandb.log({"Flattened L1_b histogram with 64 Bins, Vector Size: 4096": plt})
plt.clf()
plt.hist(net_params[12].flatten(), bins=64)
plt.xlabel('Value Bin')
plt.ylabel('Number of Occurances')
wandb.log({"Flattened L2_w histogram with 64 Bins, Matrix Size: 4096x4096": plt})
plt.clf()
plt.hist(net_params[13].flatten(), bins=64)
plt.xlabel('Value Bin')
plt.ylabel('Number of Occurances')
wandb.log({"Flattened L2_b histogram with 64 Bins, Vector Size: 4096": plt})
plt.clf()
plt.hist(net_params[18].flatten(), bins=64)
plt.xlabel('Value Bin')
plt.ylabel('Number of Occurances')
wandb.log({"Flattened L3_w histogram with 64 Bins, Matrix Size: 4096x10": plt})
plt.clf()
plt.hist(net_params[19].flatten(), bins=64)
plt.xlabel('Value Bin')
plt.ylabel('Number of Occurances')
wandb.log({"Flattened L3_b histogram with 64 Bins, Vector Size: 10": plt})
plt.clf()
#wandb.log({"L0_w Histogram": wandb.Histogram(np_histogram=np.histogram(net_params[0].flatten()))})
#wandb.log({"l0_weights":net_params[0].flatten()}, step=np.arange(0, len(net_params[0].flatten())))
#np.savez('bnn_mnist_10ep.npz', l0_w=net_params[0], l0_b=net_params[1], l0_gamma=net_params[2], l0_beta=net_params[3], l0_mean=net_params[4], l0_variance=net_params[5], l1_w=net_params[6], l1_b=net_params[7], l1_gamma=net_params[8], l1_beta = net_params[9], l1_mean=net_params[10], l1_variance=net_params[11], l2_w=net_params[12],l2_b=net_params[13], l2_gamma=net_params[14], l2_beta=net_params[15], l2_mean = net_params[16], l2_variance=net_params[17],l3_w=net_params[18],l3_b=net_params[19], l3_gamma=net_params[20], l3_beta=net_params[21], l3_mean = net_params[22], l3_variance=net_params[23])
#reto = sess.run(res, feed_dict={inp: mnist_data.test.images[0:2], training:False})
#print(reto)


'''
Expand Down

0 comments on commit ec70876

Please sign in to comment.