- 🍨 本文为🔗365天深度学习训练营 中的学习记录博客
- 🍖 原作者:K同学啊 | 接辅导、项目定制
环境
- 系统: Linux
- 语言: Python3.8.10
- 深度学习框架: Pytorch2.0.0+cu118
步骤
环境设置
包引用
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import copy, random, pathlib
import matplotlib.pyplot as plt
from PIL import Image
from torchinfo import summary
import numpy as np
设置一个全局的设备,使后面的模型和数据放置在统一的设备中
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
数据准备
从K同学提供的网盘中下载鸟类数据集,解压到data目录下,数据集的结构如下:
其中bird_photos下不同的文件夹中保存了不同类型的鸟类图像,这个目录结构可以使用torchvision.datasets.ImageFolder直接加载
图像信息查看
- 获取到所有的图像
root_dir = 'data/bird_photos'
root_directory = pathlib.Path(root_dir)
image_list = root_directory.glob("*/*")
- 随机打印5个图像的尺寸
for _ in range(5):
print(np.array(Image.open(str(random.choice(image_list)))).shape)
发现都是224*224大小的三通道图像,所以我们可以在数据集处理时省略Resize这一步,或者加上224的Resize排除异常情况
3. 随机打印20个图像
plt.figure(figsize=(20, 4))
for i in range(20):
plt.subplot(2, 10, i+1)
plt.axis('off')
image = random.choice(image_list)
class_name = image.parts[-2]
plt.title(class_name)
plt.imshow(Image.open(str(image)))
4. 创建数据集
首先定义一个图像的预处理
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
),
])
然后通过datasets.ImageFolder
加载文件夹
dataset = datasets.ImageFolder(root_dir, transform=transform)
从数据中提取图像不同的分类名称
class_names = [x for x in dataset.class_to_idx]
划分训练集和验证集
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
最后,将数据集划分批次
batch_size = 8
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)
模型设计
基于上次的ResNet-50,v2主要改进了BatchNormalization和激活函数的顺序。
ResidualBlock块
class ResidualBlock(nn.Module):
def __init__(self, input_size, filters, kernel_size=3, stride=1, conv_shortcut=False):
super().__init__()
self.preact = nn.Sequential(nn.BatchNorm2d(input_size), nn.ReLU())
self.conv_shortcut = conv_shortcut
if conv_shortcut:
self.shortcut = nn.Conv2d(input_size, 4*filters, 1, stride=stride, bias=False)
elif stride > 1:
self.shortcut = nn.MaxPool2d(1, stride=stride)
else:
self.shortcut = nn.Identity()
self.conv1 = nn.Sequential(
nn.Conv2d(input_size, filters, 1, stride=1, bias=False),
nn.BatchNorm2d(filters),
nn.ReLU()
)
self.conv2 = nn.Sequential(
nn.Conv2d(filters, filters, kernel_size, padding=1, stride=stride, bias=False),
nn.BatchNorm2d(filters),
nn.ReLU()
)
self.conv3 = nn.Conv2d(filters, 4*filters, 1, bias=False)
def forward(self, x):
pre = self.preact(x)
if self.conv_shortcut:
shortcut = self.shortcut(pre)
else:
shortcut = self.shortcut(x)
x = self.conv1(pre)
x = self.conv2(x)
x = self.conv3(x)
x = x + shortcut
return x
stack堆叠
class ResidualStack(nn.Module):
def __init__(self, input_size, filters, blocks, stride=2):
super().__init__()
self.first = ResidualBlock(input_size, filters, conv_shortcut=True)
self.module_list = nn.ModuleList([])
for i in range(2, blocks):
self.module_list.append(ResidualBlock(filters*4, filters))
self.last = ResidualBlock(filters*4, filters, stride=stride)
def forward(self, x):
x = self.first(x)
for layer in self.module_list:
x = layer(x)
x = self.last(x)
return x
resnet50v2模型
class ResNet50v2(nn.Module):
def __init__(self,
include_top=True,
preact=True,
use_bias=True,
input_size=None,
pooling=None,
classes=1000,
classifier_activation = 'softmax'):
super().__init__()
self.input_conv = nn.Conv2d(3, 64, 7, padding=3, stride=2, bias=use_bias)
if not preact:
self