-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathsolveCaptcha.py
175 lines (132 loc) · 4.69 KB
/
solveCaptcha.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# *******************************
# File : solveCaptcha.py
# Author : Kavish N. Dahekar
# Email : [email protected]
# Details : Using model for predicting captcha text
# *******************************
import sys,os,pickle,random
import numpy as np
import tensorflow as tf
import numpy as np
import cv2
import matplotlib
# Recreate neural network from model file generated during training
# input
x = tf.placeholder(tf.float32, [None, 2925])
# weights
W = tf.Variable(tf.zeros([2925, 28]))
# biases
b = tf.Variable(tf.zeros([28]))
# model
y = tf.nn.softmax(tf.matmul(x, W) + b)
# correct_answers
y_ = tf.placeholder(tf.float32, [None, 28])
# error function
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
# Add ops to save and restore all the variables.
saver = tf.train.Saver()
# sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
sess = tf.Session()
# load model from file
saver.restore(sess, "training/trainedModels/softmaxNNModel.model")
print("Model restored from file : training/trainedModels/softmaxNNModel.model")
# --------------------------------------------------------------
# --------------------------------------------------------------
# --------------------------------------------------------------
# get image
# load image in grayscale
if len(sys.argv) != 2:
print("\nPlease enter name of image to be processed as first argument.")
print("\nExample usage :")
print("\t\tpython solveCaptcha.py mycaptcha.png")
sys.exit(-1)
img = cv2.imread(sys.argv[1],1)
# convert to grayscale
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# apply b&w threshold
ret, thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
# medianBlur for removing salt and pepper noise
median = cv2.medianBlur(thresh,3)
# back to BGR so we have 3 channel colors for each pixel
img = cv2.cvtColor(median,cv2.COLOR_GRAY2BGR)
# erode and dilate
kernel = np.ones((2,2),np.uint8)
img = cv2.erode(img,kernel,iterations = 1)
img = cv2.dilate(img,kernel,iterations = 1)
charcpy = np.empty_like(img)
np.copyto(charcpy,img)
height, width, ch = img.shape
print(height,"x",width)
# count total white pixels
colwise_wlist = []
totalwhite = 0
for col in range(width):
wctr = 0
bctr = 0
for row in range(height):
if np.array_equal(img[row][col] , np.array([255,255,255])):
wctr += 1
totalwhite += 1
colwise_wlist.append(wctr)
# prepare data for clustering
a = np.zeros(shape=(totalwhite,2))
ctr = 0
for row in range(height):
for col in range(width):
if np.array_equal(img[row][col] , np.array([255,255,255])):
a[ctr] = np.array([row,col])
ctr += 1
# applying kmeans on colwise white pixel counts
z = np.float32(a)
# Define criteria = ( type, max_iter = 10 , epsilon = 1.0 )
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
# Set flags (Just to avoid line break in the code)
flags = cv2.KMEANS_RANDOM_CENTERS
K = 5
# Apply KMeans
compactness,labels,centers = cv2.kmeans(z,K,None,criteria,10,flags)
# extract the characters
charHeight, charWidth = height,45
char_imgs = []
centers = sorted(centers,key=lambda x: x[1])
for c in range(len(centers)):
Crow,Ccol = centers[c][0],int(centers[c][1])
x1 = (Ccol-int(charWidth/2)) if ((Ccol-int(charWidth/2)) > 0) else 0
x2 = (Ccol+int(charWidth/2)) if ((Ccol+int(charWidth/2)) < width) else width
temp_image = charcpy[0:height , x1:x2]
# adjust the width
for xx in range(len(temp_image[0]),45):
temp_image = np.insert(temp_image, len(temp_image[0]), values=0, axis=1)
temp_image = cv2.cvtColor(temp_image,cv2.COLOR_BGR2GRAY)
tempret, tempthresh = cv2.threshold(temp_image,0,1,cv2.THRESH_BINARY)
temp_image = np.reshape(tempthresh, (1, 2925))
char_imgs.append(temp_image)
# -------------------------------------------------
# -------------------------------------------------
# -------------------------------------------------
charMap = ['2','3','4','5','6','7','8','9','a','b','c','d','e','f','g','h','k','m','n','p','r','s','t','v','w','x','y','z']
# define the model
def model(x_in,Wts,bs):
yop = tf.nn.softmax(tf.matmul(x_in, Wts) + b)
return yop
# index of character
testi = 0
# final output
finOP = ""
# passing each of the 5 characters through the NNet
for testi in range(5):
test_x = np.asarray(char_imgs[testi],dtype=np.float32)
predict_op = model(test_x,W,b)
op = sess.run(predict_op, feed_dict={x: test_x})
# find max probability from the probability distribution returned by softmax
max = op[0][0]
maxi = -1
for i in range(28):
if op[0][i] > max:
max = op[0][i]
maxi = i
# append it to final output
finOP += charMap[maxi]
print("\n\n\nOUTPUT :",finOP.upper())
print("\n\n\n")