在 Detectron2 中使用 SE 模块
直接在 detectron2/detectron2/modeling/backbone/resnet.py
中添加 SELayer
,若想自己新建一个文件,重新写模型类,这样 fpn.py
等文件中的 build_resnet_backbone
等函数均要修改,担心自己漏了,所以直接在原文件上添加了。
其中 SENet-Pytorch
的代码参考 https://github.com/moskomule/senet.pytorch/blob/master/senet。
__all__ = [
"SELayer",
"ResNetBlockBase",
"BasicBlock",
"BottleneckBlock",
"DeformBottleneckBlock",
"BasicStem",
"ResNet",
"make_stage",
"build_resnet_backbone",
]
class SELayer(nn.Module):
def __init__(self, channel, reduction=16):
super(SELayer, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(channel, channel // reduction, bias=False),
nn.ReLU(inplace=True),
nn.Linear(channel // reduction, channel, bias=False),
nn.Sigmoid()
)
def forward(self, x):
b, c, _, _ = x.size()
y = self.avg_pool(x).view(b, c)
print(f"shape: {y.shape}")
y = self.fc(y).view(b, c, 1, 1)
return x * y.expand_as(x)
class BottleneckBlock(CNNBlockBase):
"""
The standard bottleneck residual block used by ResNet-50, 101 and 152
defined in :paper:`ResNet`. It contains 3 conv layers with kernels
1x1, 3x3, 1x1, and a projection shortcut if needed.
"""
def __init__(
self,
in_channels,
out_channels,
*,
bottleneck_channels,
stride=1,
num_groups=1,
norm="BN",
stride_in_1x1=False,
dilation=1,
):
"""
Args:
bottleneck_channels (int): number of output channels for the 3x3
"bottleneck" conv layers.
num_groups (int): number of groups for the 3x3 conv layer.
norm (str or callable): normalization for all conv layers.
See :func:`layers.get_norm` for supported format.
stride_in_1x1 (bool): when stride>1, whether to put stride in the
first 1x1 convolution or the bottleneck 3x3 convolution.
dilation (int): the dilation rate of the 3x3 conv layer.
"""
super().__init__(in_channels, out_channels, stride)
if in_channels != out_channels:
self.shortcut = Conv2d(
in_channels,
out_channels,
kernel_size=1,
stride=stride,
bias=False,
norm=get_norm(norm, out_channels),
)
else:
self.shortcut = None
# The original MSRA ResNet models have stride in the first 1x1 conv
# The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
# stride in the 3x3 conv
stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)
self.conv1 = Conv2d(
in_channels,
bottleneck_channels,
kernel_size=1,
stride=stride_1x1,
bias=False,
norm=get_norm(norm, bottleneck_channels),
)
self.conv2 = Conv2d(
bottleneck_channels,
bottleneck_channels,
kernel_size=3,
stride=stride_3x3,
padding=1 * dilation,
bias=False,
groups=num_groups,
dilation=dilation,
norm=get_norm(norm, bottleneck_channels),
)
self.conv3 = Conv2d(
bottleneck_channels,
out_channels,
kernel_size=1,
bias=False,
norm=get_norm(norm, out_channels),
)
for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
if layer is not None: # shortcut can be None
weight_init.c2_msra_fill(layer)
self.se = SELayer(bottleneck_channels * 4, 16)
# Zero-initialize the last normalization in each residual branch,
# so that at the beginning, the residual branch starts with zeros,
# and each residual block behaves like an identity.
# See Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour":
# "For BN layers, the learnable scaling coefficient γ is initialized
# to be 1, except for each residual block's last BN
# where γ is initialized to be 0."
# nn.init.constant_(self.conv3.norm.weight, 0)
# TODO this somehow hurts performance when training GN models from scratch.
# Add it as an option when we need to use this code to train a backbone.
def forward(self, x):
out = self.conv1(x)
out = F.relu_(out)
out = self.conv2(out)
out = F.relu_(out)
out = self.conv3(out)
out = self.se(out) # 添加了 SELayer
if self.shortcut is not None:
shortcut = self.shortcut(x)
else:
shortcut = x
out += shortcut
out = F.relu_(out)
return out
在 Mask2Former 中使用 SE 模块
Mask2Former 调用的是 Detectron2 中 ResNet
,所以在 Detectron2 中添加 SE 模块后,在 Mask2Former 中也会有改动。要注意的是当本地存在多个 Detectron2 项目文件夹时,需要在虚拟环境安装过的 Detectron2 中进行修改。