【Tensorflow】Tensorflow构建CNN识别验证码

最新推荐文章于 2025-02-20 17:13:45 发布

Day-yong

最新推荐文章于 2025-02-20 17:13:45 发布

阅读量725

点赞数

CC 4.0 BY-SA版权

分类专栏： Tensorflow 文章标签： Tensorflow CNN

本文链接：https://blog.csdn.net/Daycym/article/details/90298115

Tensorflow 专栏收录该内容

8 篇文章

订阅专栏

前言

【Tensorflow】Tensorflow构建LeNet实现手写数字识别

【Tensorflow】Tensorflow构建VGG实现17种花数据分类

前两篇我们已经通过 Tensorflow 简单构建了经典 CNN 网络中的 LeNet 与 VGG，并通过次网络进行了手写数字识别和17种花的分类，本篇将构建我们自己搭建的 CNN ，来进行验证码的识别。

步骤如下：

数据的获取
构建CNN网络
模型的训练、测试、保存
模型的加载及识别

代码可见：Github

一、数据的获取

这里我们使用代码随机生成，假定验证码中只有：数字、大小写字母，验证码的数目是4个，eg: Gx3f，最好不要每次都随机一个验证码，最好是先随机出10w张验证码的图片，然后利用这10w张图片来训练；否则收敛会特别慢，而且有可能不收敛。

import numpy as np
import random
# captcha是python验证码的库，安装方式pip install captcha
from captcha.image import ImageCaptcha
from PIL import Image
import matplotlib.pyplot as plt

code_char_set = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
                 'q', 'a', 'z', 'w', 's', 'x', 'e', 'd', 'c', 'r',
                 'f', 'v', 't', 'g', 'b', 'y', 'h', 'n', 'u', 'j',
                 'm', 'i', 'k', 'o', 'l', 'p', 'Q', 'A', 'Z', 'W',
                 'S', 'X', 'E', 'D', 'C', 'R', 'F', 'V', 'T', 'G',
                 'B', 'Y', 'H', 'N', 'U', 'J', 'M', 'I', 'K', 'O',
                 'L', 'P']

# 字符集长度
code_char_set_size = len(code_char_set)
# 字符-数字字典（字符转数字）
code_char_2_number_dict = dict(zip(code_char_set, range(len(code_char_set))))
# 数字-字符字典（数字转字符）
code_number_2_char_dict = dict(zip(range(len(code_char_set)), code_char_set))
# 验证码中的字符数目
code_size = 4


# 随机产生验证码的字符
def random_code_text(code_size=4):  # 传入验证码长度
    code_text = []
    for i in range(code_size):
        c = random.choice(code_char_set)  # 随机选择字符集中的一个字符
        code_text.append(c)  # 添加到列表中
    return code_text  # 返回一个长度为4的列表


# 加字符列表转换为一个验证码的Image对象
def generate_code_image(code_size=4):
    image = ImageCaptcha()
    code_text = random_code_text(code_size)
    code_text = ''.join(code_text)
    # 将字符串转换为验证码(流)
    captcha = image.generate(code_text)
    # 保存验证码图片
    image.write(code_text, 'captcha/' + code_text + '.jpg')

    # 将验证码转换为图片的形式
    code_image = Image.open(captcha)
    code_image = np.array(code_image)

    return code_text, code_image


if __name__ == '__main__':
    text, image = generate_code_image(4)
    ax = plt.figure()
    ax.text(0.1, 0.9, text, ha='center', va='center')
    plt.imshow(image)
    plt.show()

在这里插入图片描述

二、构建CNN网络

网络结构：构建一个简单的CNN网络，因为起始此时验证码图片是一个比较简单的数据，所以不需要使用那么复杂的网络结构，当然了：这种简单的网络结构，80%+的正确率是比较容易的，但是超过80%比较难

conv -> relu6 -> max_pool -> conv -> relu6 -> max_pool -> dropout -> conv -> relu6 -> max_pool -> full connection -> full connection

def code_cnn(x, y):
    """
    构建一个验证码识别的CNN网络
    :param x:  Tensor对象，输入的特征矩阵信息，是一个4维的数据:[number_sample, height, weight, channels]
    :param y:  Tensor对象，输入的预测值信息，是一个2维的数据，其实就是验证码的值[number_sample, code_size]
    :return: 返回一个网络
    """
    # 获取输入数据的格式，[number_sample, height, weight, channels]
    x_shape = x.get_shape()
    # kernel_size_k: 其实就是卷积核的数目
    kernel_size_1 = 32
    kernel_size_2 = 64
    kernel_size_3 = 64
    unit_number_1 = 1024
    unit_number_2 = code_size * code_char_set_size

    with tf.variable_scope('net', initializer=tf.random_normal_initializer(0, 0.1), dtype=tf.float32):
        with tf.variable_scope('conv1'):
            w = tf.get_variable('w', shape=[5, 5, x_shape[3], kernel_size_1])
            b = tf.get_variable('b', shape=[kernel_size_1])
            net = tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
            net = tf.nn.bias_add(net, b)
        with tf.variable_scope('relu1'):
            # relu6和relu的区别：relu6当输入的值大于6的时候，返回6，relu对于大于0的值不进行处理，relu6相对来讲具有一个边界
            # relu: max(0, net)
            # relu6: min(6, max(0, net))
            net = tf.nn.relu6(net)
        with tf.variable_scope('max_pool1'):
            net = tf.nn.max_pool(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        with tf.variable_scope('conv2'):
            w = tf.get_variable('w', shape=[3, 3, kernel_size_1, kernel_size_2])
            b = tf.get_variable('b', shape=[kernel_size_2])
            net = tf.nn.conv2d(net, w, strides=[1, 1, 1, 1], padding='SAME')
            net = tf.nn.bias_add(net, b)
        with tf.variable_scope('relu2'):
            net = tf.nn.relu6(net)
        with tf.variable_scope('max_pool2'):
            net = tf.nn.max_pool(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        with tf.variable_scope('dropout1'):
            tf.nn.dropout(net, keep_prob=keep_prob)
        with tf.variable_scope('conv3'):
            w = tf.get_variable('w', shape=[3, 3, kernel_size_2, kernel_size_3])
            b = tf.get_variable('b', shape=[kernel_size_3])
            net = tf.nn.conv2d(net, w, strides=[1, 1, 1, 1], padding='SAME')
            net = tf.nn.bias_add(net, b)
        with tf.variable_scope('relu3'):
            net = tf.nn.relu6(net)
        with tf.variable_scope('max_pool3'):
            net = tf.nn.max_pool(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        with tf.variable_scope('fc1'):
            net_shape = net.get_shape()
            net_sample_feature_number = net_shape[1] * net_shape[2] * net_shape[3]
            net = tf.reshape(net, shape=[-1, net_sample_feature_number])
            w = tf.get_variable('w', shape=[net_sample_feature_number, unit_number_1])
            b = tf.get_variable('b', shape=[unit_number_1])
            net = tf.add(tf.matmul(net, w), b)
        with tf.variable_scope('softmax'):
            w = tf.get_variable('w', shape=[unit_number_1, unit_number_2])
            b = tf.get_variable('b', shape=[unit_number_2])
            net = tf.add(tf.matmul(net, w), b)
    return net

三、模型的训练、测试、保存

批数据生成：

def text_2_vec(text):
    vec = np.zeros((code_size, code_char_set_size))
    k = 0
    for ch in text:
        index = code_char_2_number_dict[ch]
        vec[k][index] = 1
        k += 1
    return np.array(vec.flat)


def random_next_batch(batch_size=64, code_size=4):
    """
    随机获取下一个批次的数据
    :param batch_size:
    :param code_size:
    :return:
    """
    batch_x = []
    batch_y = []

    for i in range(batch_size):
        code, image = generate_code_image(code_size)
        # code字符转换为数字的数组形式
        code_number = text_2_vec(code)
        batch_x.append(image)
        batch_y.append(code_number)

模型训练：

def train_code_cnn(model_path):
    """
    模型训练
    :param model_path:
    :return:
    """
    # 1. 构建相关变量：占位符
    in_image_height = 60
    in_image_weight = 160
    x = tf.placeholder(tf.float32, shape=[None, in_image_height, in_image_weight, 1], name='x')
    y = tf.placeholder(tf.float32, shape=[None, code_size * code_char_set_size], name='y')
    # 2. 获取网络结构
    network = code_cnn(x, y)
    # 3. 构建损失函数（如果四个位置的值，只要有任意一个预测失败，那么我们损失就比较大）
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=network, labels=y))
    # 4. 定义优化函数
    train = tf.train.GradientDescentOptimizer(learning_rate=0.0001).minimize(cost)
    # 5. 计算准确率
    predict = tf.reshape(network, [-1, code_size, code_char_set_size])
    max_idx_p = tf.argmax(predict, 2)
    max_idx_y = tf.argmax(tf.reshape(y, [-1, code_size, code_char_set_size]), 2)
    correct = tf.equal(max_idx_p, max_idx_y)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    # 6. 开始训练
    saver = tf.train.Saver()
    with tf.Session() as sess:
        # a. 变量的初始化
        sess.run(tf.global_variables_initializer())

        # b. 开始训练
        step = 1
        while step <= 100:
            # 1. 获取批次的训练数据，这里是在每次获取下批次数据生成数据
            batch_x, batch_y = random_next_batch(batch_size=64, code_size=code_size)
            # 2. 对数据进行一下处理
            batch_x = tf.image.rgb_to_grayscale(batch_x)
            batch_x = tf.image.resize_images(batch_x, size=(in_image_height, in_image_weight),
                                             method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
            # 3. 训练
            _, cost_, accuracy_ = sess.run([train, cost, accuracy],
                                           feed_dict={x: batch_x.eval(), y: batch_y})
            print("Step:{}, Cost:{}, Accuracy:{}".format(step, cost_, accuracy_))

            # 4. 每10次输出一次信息
            if step % 10 == 0:
                test_batch_x, test_batch_y = random_next_batch(batch_size=64, code_size=code_size)
                # 2. 对数据进行一下处理
                test_batch_x = tf.image.rgb_to_grayscale(test_batch_x)
                test_batch_x = tf.image.resize_images(test_batch_x, size=(in_image_height, in_image_weight),
                                                      method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
                acc = sess.run(accuracy, feed_dict={x: test_batch_x.eval(), y: test_batch_y})
                print("测试集准确率:{}".format(acc))

                # # 如果模型准确率0.7，模型保存，然后退出
                # if acc > 0.7 and accuracy_ > 0.7:
                #     saver.save(sess, model_path, global_step=step)
                #     break
            step += 1
        saver.save(sess, model_path, global_step=step)
        # 模型可视化输出
        writer = tf.summary.FileWriter('./graph/code', tf.get_default_graph())
        writer.close()

因为模型训练时间比较长，这里为了保存模型用于模型加载及识别，这里只迭代了100次，感兴趣的可以按照源码方式运行，迭代停止条件是正确率大于等于0.7，代码可见源码

在这里插入图片描述

上图是可视化结果

四、模型的加载及识别

import tensorflow as tf
from verification_code_recognition import code_recognition

code_char_set = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
                 'q', 'a', 'z', 'w', 's', 'x', 'e', 'd', 'c', 'r',
                 'f', 'v', 't', 'g', 'b', 'y', 'h', 'n', 'u', 'j',
                 'm', 'i', 'k', 'o', 'l', 'p', 'Q', 'A', 'Z', 'W',
                 'S', 'X', 'E', 'D', 'C', 'R', 'F', 'V', 'T', 'G',
                 'B', 'Y', 'H', 'N', 'U', 'J', 'M', 'I', 'K', 'O',
                 'L', 'P']
code_char_set_size = len(code_char_set)
code_char_2_number_dict = dict(zip(code_char_set, range(len(code_char_set))))
code_number_2_char_dict = dict(zip(range(len(code_char_set)), code_char_set))
# 网络中进行Dropout时候的，神经元的保留率(有多少神经元被保留下来)
# 0.75就表示75%的神经元保留下来，随机删除其中的25%的神经元(其实相当于将神经元的输出值设置为0)
keep_prob = 0.75
# 验证码中的字符数目
code_size = 4

# 1. 构建相关变量：占位符
in_image_height = 60
in_image_weight = 160
x = tf.placeholder(tf.float32, shape=[None, in_image_height, in_image_weight, 1], name='x')
y = tf.placeholder(tf.float32, shape=[None, code_size * code_char_set_size], name='y')


# 2、加载模型并预测
def predict_captcha(captcha_image, model_path):
    # a、构建网络
    output = code_recognition.code_cnn(x, y)

    saver = tf.train.Saver()
    # 开启会话
    with tf.Session() as sess:
        # 加载模型
        saver.restore(sess, tf.train.latest_checkpoint(model_path))

        predict = tf.argmax(tf.reshape(output, [-1, code_size, code_char_set_size]), 2)
        text_list = sess.run(predict, feed_dict={x: captcha_image.eval()})
        return text_list


# 3. 创建单个验证码数据，用于预测，也可以创建多个，代码将要修改
batch_x, batch_y = code_recognition.random_next_batch(batch_size=1, code_size=code_size)

# 4.重塑哑编码形状为(4,code_char_set_size)
code_vec = batch_y[0].reshape((code_size, code_char_set_size))


# 5.将原本真实的标签数据（哑编码转为text）
def vec_2_text(vec):
    text = ''
    for char_vec in code_vec:
        for index, value in enumerate(char_vec):
            if value == 1.0:
                char = code_char_set[index]
                text += char
    return text


# 6. 对预测数据进行一下处理，由(1, 60, 160, 3)->(1, 60, 160, 1)
batch_x = tf.image.rgb_to_grayscale(batch_x)
code_img = tf.image.resize_images(batch_x, size=(in_image_height, in_image_weight),
                                  method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
# 7.模型路径
model_path = './model/code/'

# 8.模型预测
text_list = predict_captcha(code_img, model_path)


# 9.将模型预测结果转为text
def out_2_text(text_list):
    pre_text = ''
    for i in text_list[0]:
        char = code_char_set[i]
        pre_text += char
    return pre_text


print("真实值：{}，预测值：{}".format(vec_2_text(code_vec), out_2_text(text_list)))

在这里插入图片描述

由于模型训练时间比较长，博主并没有运行完，只是验证下代码时候可行

五、数字验证码

由于验证码如果包含数字、大写字母和小写字母，那么4个验证码的哑编码长度为 248，也就是输出层有248个神经元，参数比较大，训练对电脑配置要求稍高，博主电脑内存不够，不能够跑完，因此这里给出只包含数字的验证码，大概需要代码1200次，可达到 99% 的正确率。

代码：

# -- encoding:utf-8 --

"""
文件名：onlyNumber
日期：Danycym
作者：2019/5/18
"""

import tensorflow as tf
from captcha.image import ImageCaptcha
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import random

code_char_set = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']


# 随机选择4个数字
def random_captcha_text(char_set=code_char_set, captcha_size=4):
    captcha_text = []
    for i in range(captcha_size):
        c = random.choice(char_set)
        captcha_text.append(c)
    return captcha_text


# 生成验证码图片
def gen_captcha_text_image():
    image = ImageCaptcha()
    captcha_text = random_captcha_text()
    captcha_text = ''.join(captcha_text)
    captcha = image.generate(captcha_text)
    captcha_image = Image.open(captcha)
    captcha_image = np.array(captcha_image)
    return captcha_text, captcha_image


# 转为灰度图像
def convert2gray(img):
    if len(img.shape) > 2:
        r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]
        gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
        return gray
    else:
        return img


# 将验证码转为哑编码形式
def text2vec(text):
    text_len = len(text)
    if text_len > max_captcha:
        raise ValueError('验证码最长4个字符')

    vector = np.zeros(max_captcha * char_set_len)

    def char2pos(c):
        if c == '_':
            k = 62
            return k
        k = ord(c) - 48
        if k > 9:
            k = ord(c) - 55
            if k > 35:
                k = ord(c) - 61
                if k > 61:
                    raise ValueError('No Map')
        return k

    for i, c in enumerate(text):
        idx = i * char_set_len + char2pos(c)
        vector[idx] = 1
    return vector


# 获取下一个批数据
def get_next_batch(batch_size=128):
    batch_x = np.zeros([batch_size, image_height * image_width])
    batch_y = np.zeros([batch_size, max_captcha * char_set_len])

    def wrap_gen_captcha_text_and_image():
        while True:
            text, image = gen_captcha_text_image()
            if image.shape == (60, 160, 3):
                return text, image

    for i in range(batch_size):
        text, image = wrap_gen_captcha_text_and_image()
        image = convert2gray(image)

        batch_x[i, :] = image.flatten() / 255
        batch_y[i, :] = text2vec(text)

    return batch_x, batch_y


# 构建CNN网络
# 三个（卷积层+激活+池化+dropout）+一个全连接层+输出层
def crack_captcha_cnn(b_alpha=0.1):
    x = tf.reshape(X, shape=[-1, image_height, image_width, 1])

    wc1 = tf.get_variable(name='wc1', shape=[3, 3, 1, 32], dtype=tf.float32,
                          initializer=tf.contrib.layers.xavier_initializer())
    bc1 = tf.Variable(b_alpha * tf.random_normal([32]))
    conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, wc1, strides=[1, 1, 1, 1], padding='SAME'), bc1))
    conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    conv1 = tf.nn.dropout(conv1, keep_prob)

    wc2 = tf.get_variable(name='wc2', shape=[3, 3, 32, 64], dtype=tf.float32,
                          initializer=tf.contrib.layers.xavier_initializer())
    bc2 = tf.Variable(b_alpha * tf.random_normal([64]))
    conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, wc2, strides=[1, 1, 1, 1], padding='SAME'), bc2))
    conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    conv2 = tf.nn.dropout(conv2, keep_prob)

    wc3 = tf.get_variable(name='wc3', shape=[3, 3, 64, 128], dtype=tf.float32,
                          initializer=tf.contrib.layers.xavier_initializer())
    bc3 = tf.Variable(b_alpha * tf.random_normal([128]))
    conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, wc3, strides=[1, 1, 1, 1], padding='SAME'), bc3))
    conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    conv3 = tf.nn.dropout(conv3, keep_prob)

    wd1 = tf.get_variable(name='wd1', shape=[8 * 20 * 128, 1024], dtype=tf.float32,
                          initializer=tf.contrib.layers.xavier_initializer())
    bd1 = tf.Variable(b_alpha * tf.random_normal([1024]))
    dense = tf.reshape(conv3, [-1, wd1.get_shape().as_list()[0]])
    dense = tf.nn.relu(tf.add(tf.matmul(dense, wd1), bd1))
    dense = tf.nn.dropout(dense, keep_prob)

    wout = tf.get_variable('name', shape=[1024, max_captcha * char_set_len], dtype=tf.float32,
                           initializer=tf.contrib.layers.xavier_initializer())
    # wout = tf.Variable(w_alpha * tf.random_normal([1024, max_captcha * char_set_len]))
    bout = tf.Variable(b_alpha * tf.random_normal([max_captcha * char_set_len]))
    out = tf.add(tf.matmul(dense, wout), bout)
    return out


# 模型训练
def train_cnn():
    output = crack_captcha_cnn()
    cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output, labels=Y))
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
    predict = tf.reshape(output, [-1, max_captcha, char_set_len])
    max_idx_p = tf.argmax(predict, 2)
    max_idx_l = tf.argmax(tf.reshape(Y, [-1, max_captcha, char_set_len]), 2)
    correct_pred = tf.equal(max_idx_p, max_idx_l)
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    saver = tf.train.Saver()

    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        step = 0
        while True:
            batch_x, batch_y = get_next_batch(100)
            _, cost_ = sess.run([optimizer, cost], feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.75})
            print(step, cost_)
            if step % 10 == 0:
                batch_x_test, batch_y_test = get_next_batch(100)
                acc = sess.run(accuracy, feed_dict={X: batch_x_test, Y: batch_y_test, keep_prob: 1.})
                print(step, acc)
                if acc > 0.99:
                    saver.save(sess, "./model/crack_capcha.model", global_step=step)
                    break
            step += 1


# 模型加载，预测
def crack_captcha(captcha_image):
    output = crack_captcha_cnn()

    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, "./model/crack_capcha.model-1120")

        predict = tf.argmax(tf.reshape(output, [-1, max_captcha, char_set_len]), 2)
        text_list = sess.run(predict, feed_dict={X: [captcha_image], keep_prob: 1.})
        text = text_list[0].tolist()
        return text


if __name__ == '__main__':
	# 设置为0，即训练
	# 设置为1，即加载已经训练好的模型进行预测
    train = 0
    if train == 0:
        text, image = gen_captcha_text_image()
        print("验证码大小：", image.shape)  # (60,160,3)

        image_height = 60
        image_width = 160
        max_captcha = len(text)
        print("验证码文本最长字符数", max_captcha)
        char_set = code_char_set
        char_set_len = len(char_set)

        # 构建相关变量：占位符
        X = tf.placeholder(tf.float32, [None, image_height * image_width])
        Y = tf.placeholder(tf.float32, [None, max_captcha * char_set_len])
        keep_prob = tf.placeholder(tf.float32)
        # 训练模型
        train_cnn()

    if train == 1:
        image_height = 60
        image_width = 160
        char_set = code_char_set
        char_set_len = len(char_set)

        text, image = gen_captcha_text_image()

        f = plt.figure()
        ax = f.add_subplot(111)
        ax.text(0.1, 0.9, text, ha='center', va='center', transform=ax.transAxes)
        plt.imshow(image)

        max_captcha = len(text)
        image = convert2gray(image)
        image = image.flatten() / 255
        # 构建相关变量：占位符
        X = tf.placeholder(tf.float32, [None, image_height * image_width])
        Y = tf.placeholder(tf.float32, [None, max_captcha * char_set_len])
        keep_prob = tf.placeholder(tf.float32)
        # 加载模型，预测
        predict_text = crack_captcha(image)
        print("正确: {}  预测: {}".format(text, predict_text))
        plt.show()