-
-
Notifications
You must be signed in to change notification settings - Fork 24
/
mnist.py
185 lines (152 loc) · 5.2 KB
/
mnist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import tensorflow as tf
import input_data
import cv2
import numpy as np
import math
from scipy import ndimage
def getBestShift(img):
cy,cx = ndimage.measurements.center_of_mass(img)
rows,cols = img.shape
shiftx = np.round(cols/2.0-cx).astype(int)
shifty = np.round(rows/2.0-cy).astype(int)
return shiftx,shifty
def shift(img,sx,sy):
rows,cols = img.shape
M = np.float32([[1,0,sx],[0,1,sy]])
shifted = cv2.warpAffine(img,M,(cols,rows))
return shifted
def train_and_predict(input_images):
# create a MNIST_data folder with the MNIST dataset if necessary
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
"""
a placeholder for our image data:
None stands for an unspecified number of images
784 = 28*28 pixel
"""
x = tf.placeholder("float", [None, 784])
# we need our weights for our neural net
W = tf.Variable(tf.zeros([784,10]))
# and the biases
b = tf.Variable(tf.zeros([10]))
"""
softmax provides a probability based output
we need to multiply the image values x and the weights
and add the biases
(the normal procedure, explained in previous articles)
"""
y = tf.nn.softmax(tf.matmul(x,W) + b)
"""
y_ will be filled with the real values
which we want to train (digits 0-9)
for an undefined number of images
"""
y_ = tf.placeholder("float", [None,10])
"""
we use the cross_entropy function
which we want to minimize to improve our model
"""
cross_entropy = -tf.reduce_sum(y_*tf.log(y))
"""
use a learning rate of 0.01
to minimize the cross_entropy error
"""
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
# initialize all variables
init = tf.initialize_all_variables()
# create a session
sess = tf.Session()
sess.run(init)
# use 1000 batches with a size of 100 each to train our net
for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
# run the train_step function with the given image values (x) and the real output (y_)
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
"""
Let's get the accuracy of our model:
our model is correct if the index with the highest y value
is the same as in the real digit vector
The mean of the correct_prediction gives us the accuracy.
We need to run the accuracy function
with our test set (mnist.test)
We use the keys "images" and "labels" for x and y_
"""
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
# create an an array where we can store our 4 pictures
images = np.zeros((4,784))
# and the correct values
correct_vals = np.zeros((4,10))
# we want to test our images which you saw at the top of this page
i = 0
# for no in [8,0,4,3]:
for no in input_images:
# read the image
gray = cv2.imread("blog/own_"+str(no)+".png", 0)
# gray = cv2.imread(no, 0)
# rescale it
gray = cv2.resize(255-gray, (28, 28))
# better black and white version
(thresh, gray) = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
while np.sum(gray[0]) == 0:
gray = gray[1:]
while np.sum(gray[:,0]) == 0:
gray = np.delete(gray,0,1)
while np.sum(gray[-1]) == 0:
gray = gray[:-1]
while np.sum(gray[:,-1]) == 0:
gray = np.delete(gray,-1,1)
rows,cols = gray.shape
if rows > cols:
factor = 20.0/rows
rows = 20
cols = int(round(cols*factor))
# first cols than rows
gray = cv2.resize(gray, (cols,rows))
else:
factor = 20.0/cols
cols = 20
rows = int(round(rows*factor))
# first cols than rows
gray = cv2.resize(gray, (cols, rows))
colsPadding = (int(math.ceil((28-cols)/2.0)),int(math.floor((28-cols)/2.0)))
rowsPadding = (int(math.ceil((28-rows)/2.0)),int(math.floor((28-rows)/2.0)))
gray = np.lib.pad(gray,(rowsPadding,colsPadding),'constant')
shiftx,shifty = getBestShift(gray)
shifted = shift(gray,shiftx,shifty)
gray = shifted
# save the processed images
cv2.imwrite("pro-img/image_"+str(no)+".png", gray)
"""
all images in the training set have an range from 0-1
and not from 0-255 so we divide our flatten images
(a one dimensional vector with our 784 pixels)
to use the same 0-1 based range
"""
flatten = gray.flatten() / 255.0
"""
we need to store the flatten image and generate
the correct_vals array
correct_val for the first digit (9) would be
[0,0,0,0,0,0,0,0,0,1]
"""
images[i] = flatten
correct_val = np.zeros((10))
correct_val[no] = 1
correct_vals[i] = correct_val
i += 1
"""
the prediction will be an array with four values,
which show the predicted number
"""
prediction = tf.argmax(y,1)
print(prediction)
"""
we want to run the prediction and the accuracy function
using our generated arrays (images and correct_vals)
"""
#print(sess.run(prediction, feed_dict={x: images, y_: correct_vals}))
return(sess.run(prediction, feed_dict={x: images, y_: correct_vals}))
print(sess.run(accuracy, feed_dict={x: images, y_: correct_vals}))
if __name__ == '__main__':
train_and_predict([0, 8, 4, 3])