mnist.py

import tensorflow as tf
import input_data
import cv2
import numpy as np
import math
from scipy import ndimage


def getBestShift(img):
    cy,cx = ndimage.measurements.center_of_mass(img)

    rows,cols = img.shape
    shiftx = np.round(cols/2.0-cx).astype(int)
    shifty = np.round(rows/2.0-cy).astype(int)

    return shiftx,shifty


def shift(img,sx,sy):
    rows,cols = img.shape
    M = np.float32([[1,0,sx],[0,1,sy]])
    shifted = cv2.warpAffine(img,M,(cols,rows))
    return shifted


def train_and_predict(input_images):
	# create a MNIST_data folder with the MNIST dataset if necessary
	mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

	"""
	a placeholder for our image data:
	None stands for an unspecified number of images
	784 = 28*28 pixel
	"""
	x = tf.placeholder("float", [None, 784])

	# we need our weights for our neural net
	W = tf.Variable(tf.zeros([784,10]))
	# and the biases
	b = tf.Variable(tf.zeros([10]))

	"""
	softmax provides a probability based output
	we need to multiply the image values x and the weights
	and add the biases
	(the normal procedure, explained in previous articles)
	"""
	y = tf.nn.softmax(tf.matmul(x,W) + b)

	"""
	y_ will be filled with the real values
	which we want to train (digits 0-9)
	for an undefined number of images
	"""
	y_ = tf.placeholder("float", [None,10])

	"""
	we use the cross_entropy function
	which we want to minimize to improve our model
	"""
	cross_entropy = -tf.reduce_sum(y_*tf.log(y))

	"""
	use a learning rate of 0.01
	to minimize the cross_entropy error
	"""
	train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)

	# initialize all variables
	init = tf.initialize_all_variables()

	# create a session
	sess = tf.Session()
	sess.run(init)

	# use 1000 batches with a size of 100 each to train our net
	for i in range(1000):
	  batch_xs, batch_ys = mnist.train.next_batch(100)
	  # run the train_step function with the given image values (x) and the real output (y_)
	  sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

	"""
	Let's get the accuracy of our model:
	our model is correct if the index with the highest y value
	is the same as in the real digit vector
	The mean of the correct_prediction gives us the accuracy.
	We need to run the accuracy function
	with our test set (mnist.test)
	We use the keys "images" and "labels" for x and y_
	"""
	correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
	accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
	print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

	# create an an array where we can store our 4 pictures
	images = np.zeros((4,784))
	# and the correct values
	correct_vals = np.zeros((4,10))

	# we want to test our images which you saw at the top of this page
	i = 0
	# for no in [8,0,4,3]:
	for no in input_images:

		# read the image
		gray = cv2.imread("blog/own_"+str(no)+".png", 0)
		# gray = cv2.imread(no, 0)

		# rescale it
		gray = cv2.resize(255-gray, (28, 28))
		# better black and white version
		(thresh, gray) = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

		while np.sum(gray[0]) == 0:
			gray = gray[1:]

		while np.sum(gray[:,0]) == 0:
			gray = np.delete(gray,0,1)

		while np.sum(gray[-1]) == 0:
			gray = gray[:-1]

		while np.sum(gray[:,-1]) == 0:
			gray = np.delete(gray,-1,1)

		rows,cols = gray.shape

		if rows > cols:
			factor = 20.0/rows
			rows = 20
			cols = int(round(cols*factor))
			# first cols than rows
			gray = cv2.resize(gray, (cols,rows))
		else:
			factor = 20.0/cols
			cols = 20
			rows = int(round(rows*factor))
			# first cols than rows
			gray = cv2.resize(gray, (cols, rows))

		colsPadding = (int(math.ceil((28-cols)/2.0)),int(math.floor((28-cols)/2.0)))
		rowsPadding = (int(math.ceil((28-rows)/2.0)),int(math.floor((28-rows)/2.0)))
		gray = np.lib.pad(gray,(rowsPadding,colsPadding),'constant')

		shiftx,shifty = getBestShift(gray)
		shifted = shift(gray,shiftx,shifty)
		gray = shifted

		# save the processed images
		cv2.imwrite("pro-img/image_"+str(no)+".png", gray)
		"""
		all images in the training set have an range from 0-1
		and not from 0-255 so we divide our flatten images
		(a one dimensional vector with our 784 pixels)
		to use the same 0-1 based range
		"""
		flatten = gray.flatten() / 255.0
		"""
		we need to store the flatten image and generate
		the correct_vals array
		correct_val for the first digit (9) would be
		[0,0,0,0,0,0,0,0,0,1]
		"""
		images[i] = flatten
		correct_val = np.zeros((10))
		correct_val[no] = 1
		correct_vals[i] = correct_val
		i += 1

	"""
	the prediction will be an array with four values,
	which show the predicted number
	"""
	prediction = tf.argmax(y,1)
	print(prediction)
	"""
	we want to run the prediction and the accuracy function
	using our generated arrays (images and correct_vals)
	"""
	#print(sess.run(prediction, feed_dict={x: images, y_: correct_vals}))
	return(sess.run(prediction, feed_dict={x: images, y_: correct_vals}))
	print(sess.run(accuracy, feed_dict={x: images, y_: correct_vals}))

if __name__ == '__main__':
    train_and_predict([0, 8, 4, 3])