随便一个位置
from torchvision.models.resnet import resnet50
跳转到resnet50
def resnet50(pretrained=False, progress=True, **kwargs):
"""
:param pretrained: 是否下载预训练权重
:param progress: 是否显示下载进度条
:param kwargs: 额外参数
:return: resnet50模型
"""
r"""ResNet-50 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
# 调用_resnet
return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
**kwargs)
_resnet
def _resnet(arch, block, layers, pretrained, progress, **kwargs):
"""
:param arch: 模型名称 'resnet50'
:param block: 瓶颈模块Bottleneck
:param layers: 四个layer各有多少个瓶颈模块 [3, 4, 6, 3]
:param pretrained: 是否下载预训练权重
:param progress: 是否显示下载进度条
:param kwargs: 额外参数
:return: resnet50模型
"""
# 调用ResNet类
model = ResNet(block, layers, **kwargs)
if pretrained:
# 下载resnet50预训练权重
state_dict = load_state_dict_from_url(model_urls[arch],
progress=progress)
model.load_state_dict(state_dict)
return model
ResNet类
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
groups=1, width_per_group=64, replace_stride_with_dilation=None,
norm_layer=None):
"""
:param 传入参数 block: Bottleneck
:param 传入参数 layers:[3, 4, 6, 3]
:param num_classes: 分类数
:param zero_init_residual: 零初始化
:param groups: 分组数(暂时用不上,ResNeXt用)
:param width_per_group: 每个分组的通道数(暂时用不上,ResNeXt用)
:param replace_stride_with_dilation: 是否用空洞卷积替代stride(用不上)
:param norm_layer:BatchNorm
"""
super(ResNet, self).__init__()
if norm_layer is None: # 如果为空,则BatchNorm2d
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64 # Bottleneck输入通道数,后面会变256,512,1024,2048
self.dilation = 1 # 空洞卷积替代stride才会变,否则固定不变
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
# B*3*224*224->B*64*112*112
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
# B*64*112*112->B*64*56*56
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
# B*64*56*56->B*256*56*56 layer1没有下采样
self.layer1 = self._make_layer(block, 64, layers[0])
# B*256*56*56->B*512*28*28 Bottleneck第二个卷积步长为2,所以下采样
self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
dilate=replace_stride_with_dilation[0])
# B*512*28*28->B*1024*14*14 Bottleneck第二个卷积步长为2,所以下采样
self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
dilate=replace_stride_with_dilation[1])
# B*1024*14*14->B*2048*7*7 Bottleneck第二个卷积步长为2,所以下采样
self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
dilate=replace_stride_with_dilation[2])
# B*2048*7*7->B*2048*1*1
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
# B*2048->B*num_classes
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
"""
:param block: Bottleneck
:param planes: 通用输出通道数 64 128 256 512 实际上Bottleneck输出通道数要乘以expansion 4
:param blocks: 3, 4, 6, 3 四个layer各有多少个Bottleneck
:param stride: 决定Bottleneck是否下采样的步长
:param dilate: 是否用空洞卷积替代stride(用不上)
:return: layer
"""
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation # 1
if dilate: # 是否用空洞卷积替代stride(用不上)
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
# 下采样
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = [] # 创建空layer
# 各个layer中只有第一个Bottleneck才用stride下采样
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
# 更新输入通道
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, dilation=self.dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
def _forward_impl(self, x):
# See note [TorchScript super()]
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
def forward(self, x):
return self._forward_impl(x)
Bottleneck 瓶颈模块
class Bottleneck(nn.Module):
# Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
# while original implementation places the stride at the first 1x1 convolution(self.conv1)
# according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
# This variant is also known as ResNet V1.5 and improves accuracy according to
# https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
expansion = 4
"""
expansion是Bottleneck相对于BasicBlock输出的倍数
也是通用输出通道数的倍数
"""
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None):
"""
:param inplanes: 输入通道数,会不断变化
:param planes: 通用输出通道数 64 128 256 512 实际上Bottleneck输出通道数要乘以expansion
:param stride: 步长
:param downsample: 下采样
:param groups: 分组卷积 (暂时用不上,ResNeXt用)
参考:https://zhuanlan.zhihu.com/p/28749411
:param base_width:width_per_group 每个分组的通道数
:param dilation: 空洞卷积扩张数,1是不扩张
:param norm_layer: BatchNorm
"""
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
pytorch版ResNet
图片原始来自:
https://zhuanlan.zhihu.com/p/353235794
原始图片是论文的版本,即ResNet V1.0,上面图片是我在此基础上修改的pytorch版,即ResNet V1.5,
https://ngc.nvidia.com/catalog/resources/nvidia:resnet_50_v1_5_for_pytorch
加了点东西,我觉得这样可以更好的理解程序
注:
图片右边
C对应程序里inplanes
C1对应程序里planes
上面是整个resnet流程inplanes和planes变化
最后resnet家族图:
网友评论