SPP的作用主要是扩大感受野,融合不同尺度特征图的信息,完成特征融合
SPP主要结构:由不同的大小kernel_size的池化层组成
池化层可以:
提取更高阶的特征,加强图像特征的不变形,增加图像的鲁棒性
对卷积提取出来的信息做更进一步的降维。
算法实现:
class SpatialPyramidPooling(nn.Module): ''' 特征金字塔池化模块 特点:将通过kernel_size分别为5,9,13的池化层的4种特征拼接起来,获取同张图片多个空间尺度的特征 ''' def __init__(self, pool_sizes=[5, 9, 13]): super(SpatialPyramidPooling, self).__init__() self.maxpools = nn.ModuleList([ nn.MaxPool2d(pool_size, 1, pool_size // 2) for pool_size in pool_sizes ]) def forward(self, x): features = [maxpool(x) for maxpool in self.maxpools[::-1]] features = torch.cat(features + [x], dim=1) return features class SPPF(nn.Module): ''' 在SPP结构上进行优化,保持结构不变的同时,为计算提升速度 特点: 定义统一的kernel_size为5的池化层,分别通过池化层1,2,3次,获得与SPP一样的kernel_size为5,9,13的池化层。 减少计算冗余,节省时间 ''' def __init__(self, kernel_size=5): super(SPPF, self).__init__() self.maxpool = nn.MaxPool2d(kernel_size, 1, kernel_size // 2) def forward(self, x): m5 = self.maxpool(x) m9 = self.maxpool(m5) m13 = self.maxpool(m9) return torch.cat([x, m5, m9, m13], dim=1)
在yolo中的实现:
class Conv(nn.Module): # Standard convolution def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups super().__init__() self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) self.bn = nn.BatchNorm2d(c2) self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) def forward(self, x): return self.act(self.bn(self.conv(x))) def forward_fuse(self, x): return self.act(self.conv(x)) class SPP(nn.Module): def __init__(self, c1, c2, k=(5, 9, 13)): super().__init__() # hidden channels c_ = c1 // 2 #这里对应第一个CBS self.cv1 = Conv(c1, c_, 1, 1) #这里对应SPP操作里的最后一个CBS self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) #这里对应SPP核心操作,创建kernel_size为5,9,13的池化层 self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) def forward(self, x): x = self.cv1(x) with warnings.catch_warnings(): warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning忽略警告 return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) class SPPF(nn.Module): # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13)) super().__init__() c_ = c1 // 2 # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c_ * 4, c2, 1, 1) self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2) def forward(self, x): x = self.cv1(x) with warnings.catch_warnings(): warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning y1 = self.m(x) y2 = self.m(y1) return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))