美文网首页
torchvision.models.resnet.resnet

torchvision.models.resnet.resnet

作者: blair_liu | 来源:发表于2021-03-12 22:26 被阅读0次

    随便一个位置

    from torchvision.models.resnet import resnet50
    

    跳转到resnet50

    def resnet50(pretrained=False, progress=True, **kwargs):
        """
            :param pretrained: 是否下载预训练权重
            :param progress: 是否显示下载进度条
            :param kwargs: 额外参数
            :return: resnet50模型
        """
        r"""ResNet-50 model from
        `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
    
        Args:
            pretrained (bool): If True, returns a model pre-trained on ImageNet
            progress (bool): If True, displays a progress bar of the download to stderr
        """
        # 调用_resnet
        return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
                       **kwargs)
    

    _resnet

    def _resnet(arch, block, layers, pretrained, progress, **kwargs):
        """
        :param arch: 模型名称 'resnet50'
        :param block: 瓶颈模块Bottleneck
        :param layers: 四个layer各有多少个瓶颈模块 [3, 4, 6, 3]
        :param pretrained: 是否下载预训练权重
        :param progress: 是否显示下载进度条
        :param kwargs: 额外参数
        :return: resnet50模型
        """
        # 调用ResNet类
        model = ResNet(block, layers, **kwargs)
        if pretrained:
            # 下载resnet50预训练权重
            state_dict = load_state_dict_from_url(model_urls[arch],
                                                  progress=progress)
            model.load_state_dict(state_dict)
        return model
    

    ResNet类

    class ResNet(nn.Module):
    
        def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
                     groups=1, width_per_group=64, replace_stride_with_dilation=None,
                     norm_layer=None):
            """
            :param 传入参数 block: Bottleneck
            :param 传入参数 layers:[3, 4, 6, 3]
            :param num_classes: 分类数
            :param zero_init_residual: 零初始化
            :param groups: 分组数(暂时用不上,ResNeXt用)
            :param width_per_group: 每个分组的通道数(暂时用不上,ResNeXt用)
            :param replace_stride_with_dilation: 是否用空洞卷积替代stride(用不上)
            :param norm_layer:BatchNorm
            """
            super(ResNet, self).__init__()
            if norm_layer is None:  # 如果为空,则BatchNorm2d
                norm_layer = nn.BatchNorm2d
            self._norm_layer = norm_layer
    
            self.inplanes = 64  # Bottleneck输入通道数,后面会变256,512,1024,2048
            self.dilation = 1  # 空洞卷积替代stride才会变,否则固定不变
            if replace_stride_with_dilation is None:
                # each element in the tuple indicates if we should replace
                # the 2x2 stride with a dilated convolution instead
                replace_stride_with_dilation = [False, False, False]
            if len(replace_stride_with_dilation) != 3:
                raise ValueError("replace_stride_with_dilation should be None "
                                 "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
            self.groups = groups
            self.base_width = width_per_group
            # B*3*224*224->B*64*112*112
            self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
                                   bias=False)
            self.bn1 = norm_layer(self.inplanes)
            self.relu = nn.ReLU(inplace=True)
            # B*64*112*112->B*64*56*56
            self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
            # B*64*56*56->B*256*56*56 layer1没有下采样
            self.layer1 = self._make_layer(block, 64, layers[0])
            # B*256*56*56->B*512*28*28 Bottleneck第二个卷积步长为2,所以下采样
            self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
                                           dilate=replace_stride_with_dilation[0])
            # B*512*28*28->B*1024*14*14 Bottleneck第二个卷积步长为2,所以下采样
            self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
                                           dilate=replace_stride_with_dilation[1])
            # B*1024*14*14->B*2048*7*7 Bottleneck第二个卷积步长为2,所以下采样
            self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
                                           dilate=replace_stride_with_dilation[2])
            # B*2048*7*7->B*2048*1*1
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
            # B*2048->B*num_classes
            self.fc = nn.Linear(512 * block.expansion, num_classes)
    
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                    nn.init.constant_(m.weight, 1)
                    nn.init.constant_(m.bias, 0)
    
            # Zero-initialize the last BN in each residual branch,
            # so that the residual branch starts with zeros, and each residual block behaves like an identity.
            # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
            if zero_init_residual:
                for m in self.modules():
                    if isinstance(m, Bottleneck):
                        nn.init.constant_(m.bn3.weight, 0)
                    elif isinstance(m, BasicBlock):
                        nn.init.constant_(m.bn2.weight, 0)
    
        def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
            """
            :param block: Bottleneck
            :param planes: 通用输出通道数  64 128 256 512 实际上Bottleneck输出通道数要乘以expansion 4
            :param blocks: 3, 4, 6, 3 四个layer各有多少个Bottleneck
            :param stride: 决定Bottleneck是否下采样的步长
            :param dilate: 是否用空洞卷积替代stride(用不上)
            :return: layer
            """
            norm_layer = self._norm_layer
            downsample = None
            previous_dilation = self.dilation  # 1
            if dilate:  # 是否用空洞卷积替代stride(用不上)
                self.dilation *= stride
                stride = 1
            if stride != 1 or self.inplanes != planes * block.expansion:
                # 下采样
                downsample = nn.Sequential(
                    conv1x1(self.inplanes, planes * block.expansion, stride),
                    norm_layer(planes * block.expansion),
                )
    
            layers = []  # 创建空layer
            # 各个layer中只有第一个Bottleneck才用stride下采样
            layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
                                self.base_width, previous_dilation, norm_layer))
            # 更新输入通道
            self.inplanes = planes * block.expansion
            for _ in range(1, blocks):
                layers.append(block(self.inplanes, planes, groups=self.groups,
                                    base_width=self.base_width, dilation=self.dilation,
                                    norm_layer=norm_layer))
    
            return nn.Sequential(*layers)
    
        def _forward_impl(self, x):
            # See note [TorchScript super()]
            x = self.conv1(x)
            x = self.bn1(x)
            x = self.relu(x)
            x = self.maxpool(x)
    
            x = self.layer1(x)
            x = self.layer2(x)
            x = self.layer3(x)
            x = self.layer4(x)
    
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)
    
            return x
    
        def forward(self, x):
            return self._forward_impl(x)
    

    Bottleneck 瓶颈模块

    class Bottleneck(nn.Module):
        # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
        # while original implementation places the stride at the first 1x1 convolution(self.conv1)
        # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
        # This variant is also known as ResNet V1.5 and improves accuracy according to
        # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
    
        expansion = 4
        """
        expansion是Bottleneck相对于BasicBlock输出的倍数
        也是通用输出通道数的倍数
        """
    
        def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                     base_width=64, dilation=1, norm_layer=None):
            """
            :param inplanes: 输入通道数,会不断变化
            :param planes: 通用输出通道数  64 128 256 512 实际上Bottleneck输出通道数要乘以expansion
            :param stride: 步长
            :param downsample: 下采样
            :param groups: 分组卷积 (暂时用不上,ResNeXt用)
                    参考:https://zhuanlan.zhihu.com/p/28749411
            :param base_width:width_per_group 每个分组的通道数
            :param dilation: 空洞卷积扩张数,1是不扩张
            :param norm_layer: BatchNorm
            """
            super(Bottleneck, self).__init__()
            if norm_layer is None:
                norm_layer = nn.BatchNorm2d
            width = int(planes * (base_width / 64.)) * groups
            # Both self.conv2 and self.downsample layers downsample the input when stride != 1
            self.conv1 = conv1x1(inplanes, width)
            self.bn1 = norm_layer(width)
            self.conv2 = conv3x3(width, width, stride, groups, dilation)
            self.bn2 = norm_layer(width)
            self.conv3 = conv1x1(width, planes * self.expansion)
            self.bn3 = norm_layer(planes * self.expansion)
            self.relu = nn.ReLU(inplace=True)
            self.downsample = downsample
            self.stride = stride
    
        def forward(self, x):
            identity = x
    
            out = self.conv1(x)
            out = self.bn1(out)
            out = self.relu(out)
    
            out = self.conv2(out)
            out = self.bn2(out)
            out = self.relu(out)
    
            out = self.conv3(out)
            out = self.bn3(out)
    
            if self.downsample is not None:
                identity = self.downsample(x)
    
            out += identity
            out = self.relu(out)
    
            return out
    
    pytorch版ResNet

    图片原始来自:

    https://zhuanlan.zhihu.com/p/353235794
    

    原始图片是论文的版本,即ResNet V1.0,上面图片是我在此基础上修改的pytorch版,即ResNet V1.5,

    https://ngc.nvidia.com/catalog/resources/nvidia:resnet_50_v1_5_for_pytorch
    

    加了点东西,我觉得这样可以更好的理解程序
    注:
    图片右边
    C对应程序里inplanes
    C1对应程序里planes
    \begin{array}{c|c} \hline \text{inplanes} & \text{planes} \\ \hline 64 & 64 \\ 256 & 64 \\ 256 & 64 \\ \hline 256 & 128 \\ 512 & 128 \\ 512 & 128 \\ 512 & 128 \\ \hline 512 & 256 \\ 1024 & 256 \\ 1024 & 256 \\ 1024 & 256 \\ 1024 & 256 \\ 1024 & 256 \\ \hline 1024 & 512 \\ 2048 & 512 \\ 2048 & 512 \\ \hline \end{array}
    上面是整个resnet流程inplanes和planes变化
    最后resnet家族图:

    resnet家族

    相关文章

      网友评论

          本文标题:torchvision.models.resnet.resnet

          本文链接:https://www.haomeiwen.com/subject/pbioqltx.html