图像分类的tensorflow实现

目录

1.背景

2.LeNet-5网络实现手写数字识别

2.1.LeNet-5介绍

2.2.程序实现(tensorflow版)


1.背景

图像分类,根据各自在图像信息中所反映的不同特征,把不同类别的目标区分开来的图像处理方法。它利用计算机对图像进行定量分析,把图像或图像中的每个像元或区域划归为若干个类别中的某一种,以代替人的视觉判读。

2.LeNet-5网络实现手写数字识别

2.1.LeNet-5介绍

LeNet-5是一种高效的卷积神经网络,在论文《Gradient-Based Learning Applied to Document Recognition》中 有详细的解释,它在大家熟知的手写数字识别项目中它得到广泛地使用。该网络一共有7层,依次为:卷积层、池化层、卷积层、池化层、全连接层、全连接层(输出层)。

2.2.程序实现(tensorflow版)

import tensorflow as tf
from tensorflow.keras.datasets import mnist
import os 

# 导入mnist数据集
(train_images,train_labels),(test_images,test_labels)=mnist.load_data()
# 训练集维度调整
train_images=tf.reshape(train_images,(train_images.shape[0],train_images.shape[1],train_images.shape[2],1))
# 测试集维度调整
test_images=tf.reshape(test_images,(test_images.shape[0],test_images.shape[1],test_images.shape[2],1))

# 模型构建LeNet-5
net=tf.keras.Sequential([
    # 卷积层:6*5*5的卷积核,激活函数sigmoid
    tf.keras.layers.Conv2D(filters=6,kernel_size=5,activation='sigmoid',input_shape=(28,28,1)),
    # 最大池化
    tf.keras.layers.MaxPool2D(pool_size=2,strides=2),
    # 卷积层:16个5*5的卷积核,激活函数sigmoid
    tf.keras.layers.Conv2D(filters=16,kernel_size=5,activation='sigmoid'),
    # 最大池化
    tf.keras.layers.MaxPool2D(pool_size=2,strides=2),
    # 调整维度为一维数据
    tf.keras.layers.Flatten(),
    # 全卷积层,激活函数sigmoid
    tf.keras.layers.Dense(120,activation='sigmoid'),
    # 全卷积层,激活函数sigmoid
    tf.keras.layers.Dense(84,activation='sigmoid'),
    # 全卷积层,激活函数softmax
    tf.keras.layers.Dense(10,activation='softmax'),
])

# 模型编译
# 设置优化器、损失函数、评价指标
net.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.9),
           loss=tf.keras.losses.sparse_categorical_crossentropy,
            metrics=['ACCURACY'])

# 模型训练
# 指定GPU训练 
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" ##表示使用GPU编号为0的GPU进行计算 
os.environ["CUDA_VISIBLE_DEVICES"]="0" ##表示使用GPU编号为0的GPU
net.fit(train_images,train_labels,epochs=10,batch_size=128,verbose=1)

# 模型评估
net.evaluate(test_images,test_labels,verbose=1)

 3.基于AlexNet的CIFAR10数据集分类

本节参考于:【深度学习实战—5】:基于AlexNet的CIFAR10数据集分类(附Keras实现)_米开朗琪罗~的博客-CSDN博客

 完整程序:

from keras.datasets import cifar10
import matplotlib.pyplot as plt
import pickle
from keras.utils import np_utils
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import confusion_matrix
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.models import Sequential
from keras.utils import np_utils


# 指定GPU训练
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  ##表示使用GPU编号为0的GPU进行计算
os.environ["CUDA_VISIBLE_DEVICES"]="0"  ##表示使用GPU编号为0的GPU进行计算


"""
数据集获取
"""
def get_cifar10_data():

    # x_train_original和y_train_original代表训练集的图像与标签, x_test_original与y_test_original代表测试集的图像与标签
    (x_train_original, y_train_original), (x_test_original, y_test_original) = cifar10.load_data()

    # 验证集分配(从测试集中抽取,因为训练集数据量不够)
    x_val = x_test_original[:5000]
    y_val = y_test_original[:5000]
    x_test = x_test_original[5000:]
    y_test = y_test_original[5000:]
    x_train = x_train_original
    y_train = y_train_original

    # 这里把数据从unint类型转化为float32类型, 提高训练精度。
    x_train = x_train.astype('float32')
    x_val = x_val.astype('float32')
    x_test = x_test.astype('float32')

    # 原始图像的像素灰度值为0-255,为了提高模型的训练精度,通常将数值归一化映射到0-1。
    x_train = x_train / 255
    x_val = x_val / 255
    x_test = x_test / 255

    # 图像标签一共有10个类别即0-9,这里将其转化为独热编码(One-hot)向量
    y_train = np_utils.to_categorical(y_train)
    y_val = np_utils.to_categorical(y_val)
    y_test = np_utils.to_categorical(y_test)

    return x_train, y_train, x_val, y_val, x_test, y_test

"""
定义alexnet网络模型
"""
def alexnet():
    model = Sequential()

    model.add(Conv2D(96, (11, 11), strides=(4, 4), input_shape=(32, 32, 3), padding='same', activation='relu',
                     kernel_initializer='uniform'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(BatchNormalization())

    model.add(Conv2D(256, (5, 5), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(BatchNormalization())

    model.add(Conv2D(384, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))

    model.add(Conv2D(384, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))

    model.add(Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))

    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))
    print(model.summary())

    return model

"""
编译网络并训练
"""
x_train, y_train, x_val, y_val, x_test, y_test = get_cifar10_data()
model = alexnet()

# 编译网络(定义损失函数、优化器、评估指标)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# 开始网络训练(定义训练数据与验证数据、定义训练代数,定义训练批大小)
train_history = model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=20, batch_size=64, verbose=2)

# 模型保存
model.save('alexnet_cifar10.h5')

# 定义训练过程可视化函数(训练集损失、验证集损失、训练集精度、验证集精度)
def show_train_history(train_history, train, validation):
    plt.plot(train_history.history[train])
    plt.plot(train_history.history[validation])
    plt.title('Train History')
    plt.ylabel(train)
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='best')
    plt.show()

show_train_history(train_history, 'accuracy', 'val_accuracy')
show_train_history(train_history, 'loss', 'val_loss')

# 输出网络在测试集上的损失与精度
score = model.evaluate(x_test, y_test)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# 输出网络在测试集上的损失与精度
score = model.evaluate(x_test, y_test)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# 测试集结果预测
predictions = model.predict(x_test)
predictions = np.argmax(predictions, axis=1)
print('前20张图片预测结果:', predictions[:20])

# 预测结果图像可视化
(x_train_original, y_train_original), (x_test_original, y_test_original) = cifar10.load_data()
def cifar10_visualize_multiple_predict(start, end, length, width):

    for i in range(start, end):
        plt.subplot(length, width, 1 + i)
        plt.imshow(x_test_original[i], cmap=plt.get_cmap('gray'))
        title_true = 'true=' + str(y_test_original[i])                  # 图像真实标签
        title_prediction = ',' + 'prediction' + str(predictions[i])     # 预测结果
        title = title_true + title_prediction
        plt.title(title)
        plt.xticks([])
        plt.yticks([])
    plt.show()

cifar10_visualize_multiple_predict(start=0, end=9, length=3, width=3)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值