前言
【Tensorflow】Tensorflow构建LeNet实现手写数字识别
【Tensorflow】Tensorflow构建VGG实现17种花数据分类
前两篇我们已经通过 Tensorflow
简单构建了经典 CNN
网络中的 LeNet
与 VGG
,并通过次网络进行了手写数字识别和17种花的分类,本篇将构建我们自己搭建的 CNN
,来进行验证码的识别。
步骤如下:
- 数据的获取
- 构建CNN网络
- 模型的训练、测试、保存
- 模型的加载及识别
代码可见:Github
一、数据的获取
这里我们使用代码随机生成,假定验证码中只有:数字、大小写字母,验证码的数目是4个,eg: Gx3f
,最好不要每次都随机一个验证码,最好是先随机出10w张验证码的图片,然后利用这10w张图片来训练;否则收敛会特别慢,而且有可能不收敛。
import numpy as np
import random
# captcha是python验证码的库,安装方式pip install captcha
from captcha.image import ImageCaptcha
from PIL import Image
import matplotlib.pyplot as plt
code_char_set = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'q', 'a', 'z', 'w', 's', 'x', 'e', 'd', 'c', 'r',
'f', 'v', 't', 'g', 'b', 'y', 'h', 'n', 'u', 'j',
'm', 'i', 'k', 'o', 'l', 'p', 'Q', 'A', 'Z', 'W',
'S', 'X', 'E', 'D', 'C', 'R', 'F', 'V', 'T', 'G',
'B', 'Y', 'H', 'N', 'U', 'J', 'M', 'I', 'K', 'O',
'L', 'P']
# 字符集长度
code_char_set_size = len(code_char_set)
# 字符-数字字典(字符转数字)
code_char_2_number_dict = dict(zip(code_char_set, range(len(code_char_set))))
# 数字-字符字典(数字转字符)
code_number_2_char_dict = dict(zip(range(len(code_char_set)), code_char_set))
# 验证码中的字符数目
code_size = 4
# 随机产生验证码的字符
def random_code_text(code_size=4): # 传入验证码长度
code_text = []
for i in range(code_size):
c = random.choice(code_char_set) # 随机选择字符集中的一个字符
code_text.append(c) # 添加到列表中
return code_text # 返回一个长度为4的列表
# 加字符列表转换为一个验证码的Image对象
def generate_code_image(code_size=4):
image = ImageCaptcha()
code_text = random_code_text(code_size)
code_text = ''.join(code_text)
# 将字符串转换为验证码(流)
captcha = image.generate(code_text)
# 保存验证码图片
image.write(code_text, 'captcha/' + code_text + '.jpg')
# 将验证码转换为图片的形式
code_image = Image.open(captcha)
code_image = np.array(code_image)
return code_text, code_image
if __name__ == '__main__':
text, image = generate_code_image(4)
ax = plt.figure()
ax.text(0.1, 0.9, text, ha='center', va='center')
plt.imshow(image)
plt.show()
二、构建CNN网络
网络结构:构建一个简单的CNN网络,因为起始此时验证码图片是一个比较简单的数据,所以不需要使用那么复杂的网络结构,当然了:这种简单的网络结构,80%+的正确率是比较容易的,但是超过80%比较难
conv -> relu6 -> max_pool -> conv -> relu6 -> max_pool -> dropout -> conv -> relu6 -> max_pool -> full connection -> full connection
def code_cnn(x, y):
"""
构建一个验证码识别的CNN网络
:param x: Tensor对象,输入的特征矩阵信息,是一个4维的数据:[number_sample, height, weight, channels]
:param y: Tensor对象,输入的预测值信息,是一个2维的数据,其实就是验证码的值[number_sample, code_size]
:return: 返回一个网络
"""
# 获取输入数据的格式,[number_sample, height, weight, channels]
x_shape = x.get_shape()
# kernel_size_k: 其实就是卷积核的数目
kernel_size_1 = 32
kernel_size_2 = 64
kernel_size_3 = 64
unit_number_1 = 1024
unit_number_2 = code_size * code_char_set_size
with tf.variable_scope('net', initializer=tf.random_normal_initializer(0, 0.1), dtype=tf.float32):
with tf.variable_scope('conv1'):
w = tf.get_variable('w', shape=[5, 5, x_shape[3], kernel_size_1])
b = tf.get_variable('b', shape=[kernel_size_1])
net = tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
net = tf.nn.bias_add(net, b)
with tf.variable_scope('relu1'):
# relu6和relu的区别:relu6当输入的值大于6的时候,返回6,relu对于大于0的值不进行处理,relu6相对来讲具有一个边界
# relu: max(0, net)
# relu6: min(6, max(0, net))
net = tf.nn.relu6(net)
with tf.variable_scope('max_pool1'):
net = tf.nn.max_pool(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
with tf.variable_scope('conv2'):
w = tf.get_variable('w', shape=[3, 3, kernel_size_1, kernel_size_2])
b = tf.get_variable('b', shape=[kernel_size_2])
net = tf.nn.conv2d(net, w, strides=[1, 1, 1, 1], padding='SAME')
net = tf.nn.bias_add(net, b)
with tf.variable_scope('relu2'):
net = tf.nn.relu6(net)
with tf.variable_scope('max_pool2'):
net = tf.nn.max_pool(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
with tf.variable_scope('dropout1'):
tf.nn.dropout(net, keep_prob=keep_prob)
with tf.variable_scope('conv3'):
w = tf.get_variable('w', shape=[3, 3, kernel_size_2, kernel_size_3])
b = tf.get_variable('b', shape=[kernel_size_3])
net = tf.nn.conv2d(net, w, strides=[1, 1, 1, 1], padding='SAME')
net = tf.nn.bias_add(net, b)
with tf.variable_scope('relu3'):
net = tf.nn.relu6(net)
with tf.variable_scope('max_pool3'):
net = tf.nn.max_pool(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
with tf.variable_scope('fc1'):
net_shape = net.get_shape()
net_sample_feature_number = net_shape[1] * net_shape[2] * net_shape[3]
net = tf.reshape(net, shape=[-1, net_sample_feature_number])
w = tf.get_variable('w', shape=[net_sample_feature_number, unit_number_1])
b = tf.get_variable('b', shape=[unit_number_1])
net = tf.add(tf.matmul(net, w), b)
with tf.variable_scope('softmax'):
w = tf.get_variable('w', shape=[unit_number_1, unit_number_2])
b = tf.get_variable('b', shape=[unit_number_2])
net = tf.add(tf.matmul(net, w), b)
return net
三、模型的训练、测试、保存
批数据生成:
def text_2_vec(text):
vec = np.zeros((code_size, code_char_set_size))
k = 0
for ch in text:
index = code_char_2_number_dict[ch]
vec[k][index] = 1
k += 1
return np.array(vec.flat)
def random_next_batch(batch_size=64, code_size=4):
"""
随机获取下一个批次的数据
:param batch_size:
:param code_size:
:return:
"""
batch_x = []
batch_y = []
for i in range(batch_size):
code, image = generate_code_image(code_size)
# code字符转换为数字的数组形式
code_number = text_2_vec(code)
batch_x.append(image)
batch_y.append(code_number)
模型训练:
def train_code_cnn(model_path):
"""
模型训练
:param model_path:
:return:
"""
# 1. 构建相关变量:占位符
in_image_height = 60
in_image_weight = 160
x = tf.placeholder(tf.float32, shape=[None, in_image_height, in_image_weight, 1], name='x')
y = tf.placeholder(tf.float32, shape=[None, code_size * code_char_set_size], name='y')
# 2. 获取网络结构
network = code_cnn(x, y)
# 3. 构建损失函数(如果四个位置的值,只要有任意一个预测失败,那么我们损失就比较大)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=network, labels=y))
# 4. 定义优化函数
train = tf.train.GradientDescentOptimizer(learning_rate=0.0001).minimize(cost)
# 5. 计算准确率
predict = tf.reshape(network, [-1, code_size, code_char_set_size])
max_idx_p = tf.argmax(predict, 2)
max_idx_y = tf.argmax(tf.reshape(y, [-1, code_size, code_char_set_size]), 2)
correct = tf.equal(max_idx_p, max_idx_y)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
# 6. 开始训练
saver = tf.train.Saver()
with tf.Session() as sess:
# a. 变量的初始化
sess.run(tf.global_variables_initializer())
# b. 开始训练
step = 1
while step <= 100:
# 1. 获取批次的训练数据,这里是在每次获取下批次数据生成数据
batch_x, batch_y = random_next_batch(batch_size=64, code_size=code_size)
# 2. 对数据进行一下处理
batch_x = tf.image.rgb_to_grayscale(batch_x)
batch_x = tf.image.resize_images(batch_x, size=(in_image_height, in_image_weight),
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
# 3. 训练
_, cost_, accuracy_ = sess.run([train, cost, accuracy],
feed_dict={x: batch_x.eval(), y: batch_y})
print("Step:{}, Cost:{}, Accuracy:{}".format(step, cost_, accuracy_))
# 4. 每10次输出一次信息
if step % 10 == 0:
test_batch_x, test_batch_y = random_next_batch(batch_size=64, code_size=code_size)
# 2. 对数据进行一下处理
test_batch_x = tf.image.rgb_to_grayscale(test_batch_x)
test_batch_x = tf.image.resize_images(test_batch_x, size=(in_image_height, in_image_weight),
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
acc = sess.run(accuracy, feed_dict={x: test_batch_x.eval(), y: test_batch_y})
print("测试集准确率:{}".format(acc))
# # 如果模型准确率0.7,模型保存,然后退出
# if acc > 0.7 and accuracy_ > 0.7:
# saver.save(sess, model_path, global_step=step)
# break
step += 1
saver.save(sess, model_path, global_step=step)
# 模型可视化输出
writer = tf.summary.FileWriter('./graph/code', tf.get_default_graph())
writer.close()
因为模型训练时间比较长,这里为了保存模型用于模型加载及识别,这里只迭代了100次,感兴趣的可以按照源码方式运行,迭代停止条件是正确率大于等于0.7,代码可见源码
上图是可视化结果
四、模型的加载及识别
import tensorflow as tf
from verification_code_recognition import code_recognition
code_char_set = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'q', 'a', 'z', 'w', 's', 'x', 'e', 'd', 'c', 'r',
'f', 'v', 't', 'g', 'b', 'y', 'h', 'n', 'u', 'j',
'm', 'i', 'k', 'o', 'l', 'p', 'Q', 'A', 'Z', 'W',
'S', 'X', 'E', 'D', 'C', 'R', 'F', 'V', 'T', 'G',
'B', 'Y', 'H', 'N', 'U', 'J', 'M', 'I', 'K', 'O',
'L', 'P']
code_char_set_size = len(code_char_set)
code_char_2_number_dict = dict(zip(code_char_set, range(len(code_char_set))))
code_number_2_char_dict = dict(zip(range(len(code_char_set)), code_char_set))
# 网络中进行Dropout时候的,神经元的保留率(有多少神经元被保留下来)
# 0.75就表示75%的神经元保留下来,随机删除其中的25%的神经元(其实相当于将神经元的输出值设置为0)
keep_prob = 0.75
# 验证码中的字符数目
code_size = 4
# 1. 构建相关变量:占位符
in_image_height = 60
in_image_weight = 160
x = tf.placeholder(tf.float32, shape=[None, in_image_height, in_image_weight, 1], name='x')
y = tf.placeholder(tf.float32, shape=[None, code_size * code_char_set_size], name='y')
# 2、加载模型并预测
def predict_captcha(captcha_image, model_path):
# a、构建网络
output = code_recognition.code_cnn(x, y)
saver = tf.train.Saver()
# 开启会话
with tf.Session() as sess:
# 加载模型
saver.restore(sess, tf.train.latest_checkpoint(model_path))
predict = tf.argmax(tf.reshape(output, [-1, code_size, code_char_set_size]), 2)
text_list = sess.run(predict, feed_dict={x: captcha_image.eval()})
return text_list
# 3. 创建单个验证码数据,用于预测,也可以创建多个,代码将要修改
batch_x, batch_y = code_recognition.random_next_batch(batch_size=1, code_size=code_size)
# 4.重塑哑编码形状为(4,code_char_set_size)
code_vec = batch_y[0].reshape((code_size, code_char_set_size))
# 5.将原本真实的标签数据(哑编码转为text)
def vec_2_text(vec):
text = ''
for char_vec in code_vec:
for index, value in enumerate(char_vec):
if value == 1.0:
char = code_char_set[index]
text += char
return text
# 6. 对预测数据进行一下处理,由(1, 60, 160, 3)->(1, 60, 160, 1)
batch_x = tf.image.rgb_to_grayscale(batch_x)
code_img = tf.image.resize_images(batch_x, size=(in_image_height, in_image_weight),
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
# 7.模型路径
model_path = './model/code/'
# 8.模型预测
text_list = predict_captcha(code_img, model_path)
# 9.将模型预测结果转为text
def out_2_text(text_list):
pre_text = ''
for i in text_list[0]:
char = code_char_set[i]
pre_text += char
return pre_text
print("真实值:{},预测值:{}".format(vec_2_text(code_vec), out_2_text(text_list)))
由于模型训练时间比较长,博主并没有运行完,只是验证下代码时候可行
五、数字验证码
由于验证码如果包含数字、大写字母和小写字母,那么4个验证码的哑编码长度为 248,也就是输出层有248个神经元,参数比较大,训练对电脑配置要求稍高,博主电脑内存不够,不能够跑完,因此这里给出只包含数字的验证码,大概需要代码1200次,可达到 99%
的正确率。
代码:
# -- encoding:utf-8 --
"""
文件名:onlyNumber
日期:Danycym
作者:2019/5/18
"""
import tensorflow as tf
from captcha.image import ImageCaptcha
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import random
code_char_set = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
# 随机选择4个数字
def random_captcha_text(char_set=code_char_set, captcha_size=4):
captcha_text = []
for i in range(captcha_size):
c = random.choice(char_set)
captcha_text.append(c)
return captcha_text
# 生成验证码图片
def gen_captcha_text_image():
image = ImageCaptcha()
captcha_text = random_captcha_text()
captcha_text = ''.join(captcha_text)
captcha = image.generate(captcha_text)
captcha_image = Image.open(captcha)
captcha_image = np.array(captcha_image)
return captcha_text, captcha_image
# 转为灰度图像
def convert2gray(img):
if len(img.shape) > 2:
r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]
gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
return gray
else:
return img
# 将验证码转为哑编码形式
def text2vec(text):
text_len = len(text)
if text_len > max_captcha:
raise ValueError('验证码最长4个字符')
vector = np.zeros(max_captcha * char_set_len)
def char2pos(c):
if c == '_':
k = 62
return k
k = ord(c) - 48
if k > 9:
k = ord(c) - 55
if k > 35:
k = ord(c) - 61
if k > 61:
raise ValueError('No Map')
return k
for i, c in enumerate(text):
idx = i * char_set_len + char2pos(c)
vector[idx] = 1
return vector
# 获取下一个批数据
def get_next_batch(batch_size=128):
batch_x = np.zeros([batch_size, image_height * image_width])
batch_y = np.zeros([batch_size, max_captcha * char_set_len])
def wrap_gen_captcha_text_and_image():
while True:
text, image = gen_captcha_text_image()
if image.shape == (60, 160, 3):
return text, image
for i in range(batch_size):
text, image = wrap_gen_captcha_text_and_image()
image = convert2gray(image)
batch_x[i, :] = image.flatten() / 255
batch_y[i, :] = text2vec(text)
return batch_x, batch_y
# 构建CNN网络
# 三个(卷积层+激活+池化+dropout)+一个全连接层+输出层
def crack_captcha_cnn(b_alpha=0.1):
x = tf.reshape(X, shape=[-1, image_height, image_width, 1])
wc1 = tf.get_variable(name='wc1', shape=[3, 3, 1, 32], dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
bc1 = tf.Variable(b_alpha * tf.random_normal([32]))
conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, wc1, strides=[1, 1, 1, 1], padding='SAME'), bc1))
conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
conv1 = tf.nn.dropout(conv1, keep_prob)
wc2 = tf.get_variable(name='wc2', shape=[3, 3, 32, 64], dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
bc2 = tf.Variable(b_alpha * tf.random_normal([64]))
conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, wc2, strides=[1, 1, 1, 1], padding='SAME'), bc2))
conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
conv2 = tf.nn.dropout(conv2, keep_prob)
wc3 = tf.get_variable(name='wc3', shape=[3, 3, 64, 128], dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
bc3 = tf.Variable(b_alpha * tf.random_normal([128]))
conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, wc3, strides=[1, 1, 1, 1], padding='SAME'), bc3))
conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
conv3 = tf.nn.dropout(conv3, keep_prob)
wd1 = tf.get_variable(name='wd1', shape=[8 * 20 * 128, 1024], dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
bd1 = tf.Variable(b_alpha * tf.random_normal([1024]))
dense = tf.reshape(conv3, [-1, wd1.get_shape().as_list()[0]])
dense = tf.nn.relu(tf.add(tf.matmul(dense, wd1), bd1))
dense = tf.nn.dropout(dense, keep_prob)
wout = tf.get_variable('name', shape=[1024, max_captcha * char_set_len], dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
# wout = tf.Variable(w_alpha * tf.random_normal([1024, max_captcha * char_set_len]))
bout = tf.Variable(b_alpha * tf.random_normal([max_captcha * char_set_len]))
out = tf.add(tf.matmul(dense, wout), bout)
return out
# 模型训练
def train_cnn():
output = crack_captcha_cnn()
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
predict = tf.reshape(output, [-1, max_captcha, char_set_len])
max_idx_p = tf.argmax(predict, 2)
max_idx_l = tf.argmax(tf.reshape(Y, [-1, max_captcha, char_set_len]), 2)
correct_pred = tf.equal(max_idx_p, max_idx_l)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
saver = tf.train.Saver()
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
step = 0
while True:
batch_x, batch_y = get_next_batch(100)
_, cost_ = sess.run([optimizer, cost], feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.75})
print(step, cost_)
if step % 10 == 0:
batch_x_test, batch_y_test = get_next_batch(100)
acc = sess.run(accuracy, feed_dict={X: batch_x_test, Y: batch_y_test, keep_prob: 1.})
print(step, acc)
if acc > 0.99:
saver.save(sess, "./model/crack_capcha.model", global_step=step)
break
step += 1
# 模型加载,预测
def crack_captcha(captcha_image):
output = crack_captcha_cnn()
saver = tf.train.Saver()
with tf.Session() as sess:
saver.restore(sess, "./model/crack_capcha.model-1120")
predict = tf.argmax(tf.reshape(output, [-1, max_captcha, char_set_len]), 2)
text_list = sess.run(predict, feed_dict={X: [captcha_image], keep_prob: 1.})
text = text_list[0].tolist()
return text
if __name__ == '__main__':
# 设置为0,即训练
# 设置为1,即加载已经训练好的模型进行预测
train = 0
if train == 0:
text, image = gen_captcha_text_image()
print("验证码大小:", image.shape) # (60,160,3)
image_height = 60
image_width = 160
max_captcha = len(text)
print("验证码文本最长字符数", max_captcha)
char_set = code_char_set
char_set_len = len(char_set)
# 构建相关变量:占位符
X = tf.placeholder(tf.float32, [None, image_height * image_width])
Y = tf.placeholder(tf.float32, [None, max_captcha * char_set_len])
keep_prob = tf.placeholder(tf.float32)
# 训练模型
train_cnn()
if train == 1:
image_height = 60
image_width = 160
char_set = code_char_set
char_set_len = len(char_set)
text, image = gen_captcha_text_image()
f = plt.figure()
ax = f.add_subplot(111)
ax.text(0.1, 0.9, text, ha='center', va='center', transform=ax.transAxes)
plt.imshow(image)
max_captcha = len(text)
image = convert2gray(image)
image = image.flatten() / 255
# 构建相关变量:占位符
X = tf.placeholder(tf.float32, [None, image_height * image_width])
Y = tf.placeholder(tf.float32, [None, max_captcha * char_set_len])
keep_prob = tf.placeholder(tf.float32)
# 加载模型,预测
predict_text = crack_captcha(image)
print("正确: {} 预测: {}".format(text, predict_text))
plt.show()
六、总结
- 本案例是通过一边随机生成批数据一边训练的,通常我们应该先将数据都生成好,比如生成10w张验证码图片,然后再进行数据的划分,训练、测试等
- 在进行训练的时候,我们对数据进行的处理,那么在测试和预测的时候,我们也要做相应的操作
- 由于我只进行了100次训练,每次训练64个数据,所以模型效果看不出来,但是可以看出模型是可以运行的,要想模型效果提高,我们可以增加数据集和训练次数
- 本案例给出了只包含数字的验证码识别,正确率高达99%