Residual-attention-network的改写测试c

作者: 黑恶歌王 | 来源:发表于2018-09-18 18:01 被阅读0次

上个星期开始吧一直在改这一块的代码,数据集挺简单的,caffe都给处理好了.有个问题就是这个大哥文章中说好的他们已经测试过cifar10和cifar100了,但是最后给模型给了个imagenet的,妈耶,虽然确实是你改的代码,但是你开头提供的不给反而给个不容易复现的是几个意思嘛,真让人头大.最后也不知道改的对不对不过也是第一次搭建网络算是能出现输入和输出了,这里记一下中间过程踩过的坑吧.
首先就是这个网络:

name: "Attention-92"
input: "data"
input_dim: 1
input_dim: 3
input_dim: 224
input_dim: 224

layer{
    name: "conv1"
    type: "Convolution"
    bottom: "data"
    top: "conv1"
    convolution_param {
        num_output: 64
        pad: 3
        kernel_size: 7
        stride: 2
        bias_term: false
    }
}

layer{
    name: "conv1/bn"
    type: "BN"
    bottom: "conv1"
    top: "conv1/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "conv1/bn/relu"
    type: "ReLU"
    bottom: "conv1/bn"
    top: "conv1/bn"
}

layer{
    name: "pool1_3x3_s2"
    type: "Pooling"
    bottom: "conv1/bn"
    top: "pool1_3x3_s2"
    pooling_param {
        pool: MAX
        kernel_size: 3
        stride: 2
    }
}

layer{
    name: "pool1_3x3_s2/bn"
    type: "BN"
    bottom: "pool1_3x3_s2"
    top: "pool1_3x3_s2/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "pool1_3x3_s2/bn/relu"
    type: "ReLU"
    bottom: "pool1_3x3_s2/bn"
    top: "pool1_3x3_s2/bn"
}

layer{
    name: "pre_res_1/branch1/conv1_1x1"
    type: "Convolution"
    bottom: "pool1_3x3_s2/bn"
    top: "pre_res_1/branch1/conv1_1x1"
    convolution_param {
        num_output: 64
        pad: 0
        kernel_size: 1
        stride: 1
        bias_term: false
    }
}

layer{
    name: "pre_res_1/branch1/conv1_1x1/bn"
    type: "BN"
    bottom: "pre_res_1/branch1/conv1_1x1"
    top: "pre_res_1/branch1/conv1_1x1/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "pre_res_1/branch1/conv1_1x1/bn/relu"
    type: "ReLU"
    bottom: "pre_res_1/branch1/conv1_1x1/bn"
    top: "pre_res_1/branch1/conv1_1x1/bn"
}

layer{
    name: "pre_res_1/branch1/conv2_3x3"
    type: "Convolution"
    bottom: "pre_res_1/branch1/conv1_1x1/bn"
    top: "pre_res_1/branch1/conv2_3x3"
    convolution_param {
        num_output: 64
        pad: 1
        kernel_size: 3
        stride: 1
        bias_term: false
    }
}

layer{
    name: "pre_res_1/branch1/conv2_3x3/bn"
    type: "BN"
    bottom: "pre_res_1/branch1/conv2_3x3"
    top: "pre_res_1/branch1/conv2_3x3/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "pre_res_1/branch1/conv2_3x3/bn/relu"
    type: "ReLU"
    bottom: "pre_res_1/branch1/conv2_3x3/bn"
    top: "pre_res_1/branch1/conv2_3x3/bn"
}

layer{
    name: "pre_res_1/branch1/conv3_1x1"
    type: "Convolution"
    bottom: "pre_res_1/branch1/conv2_3x3/bn"
    top: "pre_res_1/branch1/conv3_1x1"
    convolution_param {
        num_output: 256
        pad: 0
        kernel_size: 1
        stride: 1
        bias_term: false
    }
}

layer{
    name: "pre_res_1/branch2/conv1_1x1"
    type: "Convolution"
    bottom: "pool1_3x3_s2/bn"
    top: "pre_res_1/branch2/conv1_1x1"
    convolution_param {
        num_output: 256
        pad: 0
        kernel_size: 1
        stride: 1
        bias_term: false
    }
}

layer{
    name: "pre_res_1"
    type: "Eltwise"
    bottom: "pre_res_1/branch2/conv1_1x1"
    bottom: "pre_res_1/branch1/conv3_1x1"
    top: "pre_res_1"
    eltwise_param {
        operation: SUM
    }
}

layer{
    name: "pre_res_1/bn"
    type: "BN"
    bottom: "pre_res_1"
    top: "pre_res_1/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "pre_res_1/bn/relu"
    type: "ReLU"
    bottom: "pre_res_1/bn"
    top: "pre_res_1/bn"
}

layer{
    name: "AttentionA_1/trunk/res1/branch1/conv1_1x1"
    type: "Convolution"
    bottom: "pre_res_1/bn"
    top: "AttentionA_1/trunk/res1/branch1/conv1_1x1"
    convolution_param {
        num_output: 64
        pad: 0
        kernel_size: 1
        stride: 1
        bias_term: false
    }
}

layer{
    name: "AttentionA_1/trunk/res1/branch1/conv1_1x1/bn"
    type: "BN"
    bottom: "AttentionA_1/trunk/res1/branch1/conv1_1x1"
    top: "AttentionA_1/trunk/res1/branch1/conv1_1x1/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "AttentionA_1/trunk/res1/branch1/conv1_1x1/bn/relu"
    type: "ReLU"
    bottom: "AttentionA_1/trunk/res1/branch1/conv1_1x1/bn"
    top: "AttentionA_1/trunk/res1/branch1/conv1_1x1/bn"
}

layer{
    name: "AttentionA_1/trunk/res1/branch1/conv2_3x3"
    type: "Convolution"
    bottom: "AttentionA_1/trunk/res1/branch1/conv1_1x1/bn"
    top: "AttentionA_1/trunk/res1/branch1/conv2_3x3"
    convolution_param {
        num_output: 64
        pad: 1
        kernel_size: 3
        stride: 1
        bias_term: false
    }
}

layer{
    name: "AttentionA_1/trunk/res1/branch1/conv2_3x3/bn"
    type: "BN"
    bottom: "AttentionA_1/trunk/res1/branch1/conv2_3x3"
    top: "AttentionA_1/trunk/res1/branch1/conv2_3x3/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "AttentionA_1/trunk/res1/branch1/conv2_3x3/bn/relu"
    type: "ReLU"
    bottom: "AttentionA_1/trunk/res1/branch1/conv2_3x3/bn"
    top: "AttentionA_1/trunk/res1/branch1/conv2_3x3/bn"
}

layer{
    name: "AttentionA_1/trunk/res1/branch1/conv3_1x1"
    type: "Convolution"
    bottom: "AttentionA_1/trunk/res1/branch1/conv2_3x3/bn"
    top: "AttentionA_1/trunk/res1/branch1/conv3_1x1"
    convolution_param {
        num_output: 256
        pad: 0
        kernel_size: 1
        stride: 1
        bias_term: false
    }
}

layer{
    name: "AttentionA_1/trunk/res1"
    type: "Eltwise"
    bottom: "AttentionA_1/trunk/res1/branch1/conv3_1x1"
    bottom: "pre_res_1"
    top: "AttentionA_1/trunk/res1"
    eltwise_param {
        operation: SUM
    }
}

layer{
    name: "AttentionA_1/trunk/res1/bn"
    type: "BN"
    bottom: "AttentionA_1/trunk/res1"
    top: "AttentionA_1/trunk/res1/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "AttentionA_1/trunk/res1/bn/relu"
    type: "ReLU"
    bottom: "AttentionA_1/trunk/res1/bn"
    top: "AttentionA_1/trunk/res1/bn"
}

layer{
    name: "AttentionA_1/trunk/res2/branch1/conv1_1x1"
    type: "Convolution"
    bottom: "AttentionA_1/trunk/res1/bn"
    top: "AttentionA_1/trunk/res2/branch1/conv1_1x1"
    convolution_param {
        num_output: 64
        pad: 0
        kernel_size: 1
        stride: 1
        bias_term: false
    }
}

layer{
    name: "AttentionA_1/trunk/res2/branch1/conv1_1x1/bn"
    type: "BN"
    bottom: "AttentionA_1/trunk/res2/branch1/conv1_1x1"
    top: "AttentionA_1/trunk/res2/branch1/conv1_1x1/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "AttentionA_1/trunk/res2/branch1/conv1_1x1/bn/relu"
    type: "ReLU"
    bottom: "AttentionA_1/trunk/res2/branch1/conv1_1x1/bn"
    top: "AttentionA_1/trunk/res2/branch1/conv1_1x1/bn"
}

layer{
    name: "AttentionA_1/trunk/res2/branch1/conv2_3x3"
    type: "Convolution"
    bottom: "AttentionA_1/trunk/res2/branch1/conv1_1x1/bn"
    top: "AttentionA_1/trunk/res2/branch1/conv2_3x3"
    convolution_param {
        num_output: 64
        pad: 1
        kernel_size: 3
        stride: 1
        bias_term: false
    }
}

layer{
    name: "AttentionA_1/trunk/res2/branch1/conv2_3x3/bn"
    type: "BN"
    bottom: "AttentionA_1/trunk/res2/branch1/conv2_3x3"
    top: "AttentionA_1/trunk/res2/branch1/conv2_3x3/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "AttentionA_1/trunk/res2/branch1/conv2_3x3/bn/relu"
    type: "ReLU"
    bottom: "AttentionA_1/trunk/res2/branch1/conv2_3x3/bn"
    top: "AttentionA_1/trunk/res2/branch1/conv2_3x3/bn"
}

layer{
    name: "AttentionA_1/trunk/res2/branch1/conv3_1x1"
    type: "Convolution"
    bottom: "AttentionA_1/trunk/res2/branch1/conv2_3x3/bn"
    top: "AttentionA_1/trunk/res2/branch1/conv3_1x1"
    convolution_param {
        num_output: 256
        pad: 0
        kernel_size: 1
        stride: 1
        bias_term: false
    }
}

layer{
    name: "AttentionA_1/trunk/res2"
    type: "Eltwise"
    bottom: "AttentionA_1/trunk/res2/branch1/conv3_1x1"
    bottom: "AttentionA_1/trunk/res1"
    top: "AttentionA_1/trunk/res2"
    eltwise_param {
        operation: SUM
    }
}

layer{
    name: "AttentionA_1/trunk/res2/bn"
    type: "BN"
    bottom: "AttentionA_1/trunk/res2"
    top: "AttentionA_1/trunk/res2/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "AttentionA_1/trunk/res2/bn/relu"
    type: "ReLU"
    bottom: "AttentionA_1/trunk/res2/bn"
    top: "AttentionA_1/trunk/res2/bn"
}

layer{
    name: "AttentionA_1/trunk/res3/branch1/conv1_1x1"
    type: "Convolution"
    bottom: "AttentionA_1/trunk/res2/bn"
    top: "AttentionA_1/trunk/res3/branch1/conv1_1x1"
    convolution_param {
        num_output: 64
        pad: 0
        kernel_size: 1
        stride: 1
        bias_term: false
    }
}

layer{
    name: "AttentionA_1/trunk/res3/branch1/conv1_1x1/bn"
    type: "BN"
    bottom: "AttentionA_1/trunk/res3/branch1/conv1_1x1"
    top: "AttentionA_1/trunk/res3/branch1/conv1_1x1/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "AttentionA_1/trunk/res3/branch1/conv1_1x1/bn/relu"
    type: "ReLU"
    bottom: "AttentionA_1/trunk/res3/branch1/conv1_1x1/bn"
    top: "AttentionA_1/trunk/res3/branch1/conv1_1x1/bn"
}

layer{
    name: "AttentionA_1/trunk/res3/branch1/conv2_3x3"
    type: "Convolution"
    bottom: "AttentionA_1/trunk/res3/branch1/conv1_1x1/bn"
    top: "AttentionA_1/trunk/res3/branch1/conv2_3x3"
    convolution_param {
        num_output: 64
        pad: 1
        kernel_size: 3
        stride: 1
        bias_term: false
    }
}

layer{
    name: "AttentionA_1/trunk/res3/branch1/conv2_3x3/bn"
    type: "BN"
    bottom: "AttentionA_1/trunk/res3/branch1/conv2_3x3"
    top: "AttentionA_1/trunk/res3/branch1/conv2_3x3/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "AttentionA_1/trunk/res3/branch1/conv2_3x3/bn/relu"
    type: "ReLU"
    bottom: "AttentionA_1/trunk/res3/branch1/conv2_3x3/bn"
    top: "AttentionA_1/trunk/res3/branch1/conv2_3x3/bn"
}

layer{
    name: "AttentionA_1/trunk/res3/branch1/conv3_1x1"
    type: "Convolution"
    bottom: "AttentionA_1/trunk/res3/branch1/conv2_3x3/bn"
    top: "AttentionA_1/trunk/res3/branch1/conv3_1x1"
    convolution_param {
        num_output: 256
        pad: 0
        kernel_size: 1
        stride: 1
        bias_term: false
    }
}

layer{
    name: "AttentionA_1/trunk/res3"
    type: "Eltwise"
    bottom: "AttentionA_1/trunk/res3/branch1/conv3_1x1"
    bottom: "AttentionA_1/trunk/res2"
    top: "AttentionA_1/trunk/res3"
    eltwise_param {
        operation: SUM
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/pool1_3x3_s2"
    type: "Pooling"
    bottom: "AttentionA_1/trunk/res1"
    top: "AttentionA_1/mask/down_sample/pool1_3x3_s2"
    pooling_param {
        pool: MAX
        kernel_size: 3
        stride: 2
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/pool1_3x3_s2/bn"
    type: "BN"
    bottom: "AttentionA_1/mask/down_sample/pool1_3x3_s2"
    top: "AttentionA_1/mask/down_sample/pool1_3x3_s2/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/pool1_3x3_s2/bn/relu"
    type: "ReLU"
    bottom: "AttentionA_1/mask/down_sample/pool1_3x3_s2/bn"
    top: "AttentionA_1/mask/down_sample/pool1_3x3_s2/bn"
}

layer{
    name: "AttentionA_1/mask/down_sample/res1_1/branch1/conv1_1x1"
    type: "Convolution"
    bottom: "AttentionA_1/mask/down_sample/pool1_3x3_s2/bn"
    top: "AttentionA_1/mask/down_sample/res1_1/branch1/conv1_1x1"
    convolution_param {
        num_output: 64
        pad: 0
        kernel_size: 1
        stride: 1
        bias_term: false
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/res1_1/branch1/conv1_1x1/bn"
    type: "BN"
    bottom: "AttentionA_1/mask/down_sample/res1_1/branch1/conv1_1x1"
    top: "AttentionA_1/mask/down_sample/res1_1/branch1/conv1_1x1/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/res1_1/branch1/conv1_1x1/bn/relu"
    type: "ReLU"
    bottom: "AttentionA_1/mask/down_sample/res1_1/branch1/conv1_1x1/bn"
    top: "AttentionA_1/mask/down_sample/res1_1/branch1/conv1_1x1/bn"
}

layer{
    name: "AttentionA_1/mask/down_sample/res1_1/branch1/conv2_3x3"
    type: "Convolution"
    bottom: "AttentionA_1/mask/down_sample/res1_1/branch1/conv1_1x1/bn"
    top: "AttentionA_1/mask/down_sample/res1_1/branch1/conv2_3x3"
    convolution_param {
        num_output: 64
        pad: 1
        kernel_size: 3
        stride: 1
        bias_term: false
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/res1_1/branch1/conv2_3x3/bn"
    type: "BN"
    bottom: "AttentionA_1/mask/down_sample/res1_1/branch1/conv2_3x3"
    top: "AttentionA_1/mask/down_sample/res1_1/branch1/conv2_3x3/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/res1_1/branch1/conv2_3x3/bn/relu"
    type: "ReLU"
    bottom: "AttentionA_1/mask/down_sample/res1_1/branch1/conv2_3x3/bn"
    top: "AttentionA_1/mask/down_sample/res1_1/branch1/conv2_3x3/bn"
}

layer{
    name: "AttentionA_1/mask/down_sample/res1_1/branch1/conv3_1x1"
    type: "Convolution"
    bottom: "AttentionA_1/mask/down_sample/res1_1/branch1/conv2_3x3/bn"
    top: "AttentionA_1/mask/down_sample/res1_1/branch1/conv3_1x1"
    convolution_param {
        num_output: 256
        pad: 0
        kernel_size: 1
        stride: 1
        bias_term: false
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/res1_1"
    type: "Eltwise"
    bottom: "AttentionA_1/mask/down_sample/res1_1/branch1/conv3_1x1"
    bottom: "AttentionA_1/mask/down_sample/pool1_3x3_s2"
    top: "AttentionA_1/mask/down_sample/res1_1"
    eltwise_param {
        operation: SUM
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/pool2_3x3_s2"
    type: "Pooling"
    bottom: "AttentionA_1/mask/down_sample/res1_1"
    top: "AttentionA_1/mask/down_sample/pool2_3x3_s2"
    pooling_param {
        pool: MAX
        kernel_size: 3
        stride: 2
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/pool2_3x3_s2/bn"
    type: "BN"
    bottom: "AttentionA_1/mask/down_sample/pool2_3x3_s2"
    top: "AttentionA_1/mask/down_sample/pool2_3x3_s2/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/pool2_3x3_s2/bn/relu"
    type: "ReLU"
    bottom: "AttentionA_1/mask/down_sample/pool2_3x3_s2/bn"
    top: "AttentionA_1/mask/down_sample/pool2_3x3_s2/bn"
}

layer{
    name: "AttentionA_1/mask/down_sample/res2_1/branch1/conv1_1x1"
    type: "Convolution"
    bottom: "AttentionA_1/mask/down_sample/pool2_3x3_s2/bn"
    top: "AttentionA_1/mask/down_sample/res2_1/branch1/conv1_1x1"
    convolution_param {
        num_output: 64
        pad: 0
        kernel_size: 1
        stride: 1
        bias_term: false
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/res2_1/branch1/conv1_1x1/bn"
    type: "BN"
    bottom: "AttentionA_1/mask/down_sample/res2_1/branch1/conv1_1x1"
    top: "AttentionA_1/mask/down_sample/res2_1/branch1/conv1_1x1/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/res2_1/branch1/conv1_1x1/bn/relu"
    type: "ReLU"
    bottom: "AttentionA_1/mask/down_sample/res2_1/branch1/conv1_1x1/bn"
    top: "AttentionA_1/mask/down_sample/res2_1/branch1/conv1_1x1/bn"
}

layer{
    name: "AttentionA_1/mask/down_sample/res2_1/branch1/conv2_3x3"
    type: "Convolution"
    bottom: "AttentionA_1/mask/down_sample/res2_1/branch1/conv1_1x1/bn"
    top: "AttentionA_1/mask/down_sample/res2_1/branch1/conv2_3x3"
    convolution_param {
        num_output: 64
        pad: 1
        kernel_size: 3
        stride: 1
        bias_term: false
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/res2_1/branch1/conv2_3x3/bn"
    type: "BN"
    bottom: "AttentionA_1/mask/down_sample/res2_1/branch1/conv2_3x3"
    top: "AttentionA_1/mask/down_sample/res2_1/branch1/conv2_3x3/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/res2_1/branch1/conv2_3x3/bn/relu"
    type: "ReLU"
    bottom: "AttentionA_1/mask/down_sample/res2_1/branch1/conv2_3x3/bn"
    top: "AttentionA_1/mask/down_sample/res2_1/branch1/conv2_3x3/bn"
}

layer{
    name: "AttentionA_1/mask/down_sample/res2_1/branch1/conv3_1x1"
    type: "Convolution"
    bottom: "AttentionA_1/mask/down_sample/res2_1/branch1/conv2_3x3/bn"
    top: "AttentionA_1/mask/down_sample/res2_1/branch1/conv3_1x1"
    convolution_param {
        num_output: 256
        pad: 0
        kernel_size: 1
        stride: 1
        bias_term: false
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/res2_1"
    type: "Eltwise"
    bottom: "AttentionA_1/mask/down_sample/res2_1/branch1/conv3_1x1"
    bottom: "AttentionA_1/mask/down_sample/pool2_3x3_s2"
    top: "AttentionA_1/mask/down_sample/res2_1"
    eltwise_param {
        operation: SUM
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/pool3_3x3_s2"
    type: "Pooling"
    bottom: "AttentionA_1/mask/down_sample/res2_1"
    top: "AttentionA_1/mask/down_sample/pool3_3x3_s2"
    pooling_param {
        pool: MAX
        kernel_size: 3
        stride: 2
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/pool3_3x3_s2/bn"
    type: "BN"
    bottom: "AttentionA_1/mask/down_sample/pool3_3x3_s2"
    top: "AttentionA_1/mask/down_sample/pool3_3x3_s2/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/pool3_3x3_s2/bn/relu"
    type: "ReLU"
    bottom: "AttentionA_1/mask/down_sample/pool3_3x3_s2/bn"
    top: "AttentionA_1/mask/down_sample/pool3_3x3_s2/bn"
}

layer{
    name: "AttentionA_1/mask/down_sample/res3_1/branch1/conv1_1x1"
    type: "Convolution"
    bottom: "AttentionA_1/mask/down_sample/pool3_3x3_s2/bn"
    top: "AttentionA_1/mask/down_sample/res3_1/branch1/conv1_1x1"
    convolution_param {
        num_output: 64
        pad: 0
        kernel_size: 1
        stride: 1
        bias_term: false
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/res3_1/branch1/conv1_1x1/bn"
    type: "BN"
    bottom: "AttentionA_1/mask/down_sample/res3_1/branch1/conv1_1x1"
    top: "AttentionA_1/mask/down_sample/res3_1/branch1/conv1_1x1/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/res3_1/branch1/conv1_1x1/bn/relu"
    type: "ReLU"
    bottom: "AttentionA_1/mask/down_sample/res3_1/branch1/conv1_1x1/bn"
    top: "AttentionA_1/mask/down_sample/res3_1/branch1/conv1_1x1/bn"
}

layer{
    name: "AttentionA_1/mask/down_sample/res3_1/branch1/conv2_3x3"
    type: "Convolution"
    bottom: "AttentionA_1/mask/down_sample/res3_1/branch1/conv1_1x1/bn"
    top: "AttentionA_1/mask/down_sample/res3_1/branch1/conv2_3x3"
    convolution_param {
        num_output: 64
        pad: 1
        kernel_size: 3
        stride: 1
        bias_term: false
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/res3_1/branch1/conv2_3x3/bn"
    type: "BN"
    bottom: "AttentionA_1/mask/down_sample/res3_1/branch1/conv2_3x3"
    top: "AttentionA_1/mask/down_sample/res3_1/branch1/conv2_3x3/bn"
    bn_param {
        frozen: true
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/res3_1/branch1/conv2_3x3/bn/relu"
    type: "ReLU"
    bottom: "AttentionA_1/mask/down_sample/res3_1/branch1/conv2_3x3/bn"
    top: "AttentionA_1/mask/down_sample/res3_1/branch1/conv2_3x3/bn"
}

layer{
    name: "AttentionA_1/mask/down_sample/res3_1/branch1/conv3_1x1"
    type: "Convolution"
    bottom: "AttentionA_1/mask/down_sample/res3_1/branch1/conv2_3x3/bn"
    top: "AttentionA_1/mask/down_sample/res3_1/branch1/conv3_1x1"
    convolution_param {
        num_output: 256
        pad: 0
        kernel_size: 1
        stride: 1
        bias_term: false
    }
}

layer{
    name: "AttentionA_1/mask/down_sample/res3_1"
    type: "Eltwise"
    bottom: "AttentionA_1/mask/down_sample/res3_1/branch1/conv3_1x1"
    bottom: "AttentionA_1/mask/down_sample/pool3_3x3_s2"
    top: "AttentionA_1/mask/down_sample/res3_1"
    eltwise_param {
        operation: SUM
    }
}
layer{
    name: "classifier"
    type: "InnerProduct"
    bottom: "ave_pool"
    top: "classifier"
    inner_product_param {
        num_output: 1000
    }
}

layer{
    name: "cls"
    type: "Softmax"
    bottom: "classifier"
    top: "cls"
}

这个是作者给的用来测试训练imagenet的,而且木有给输入和输出,这里参考了一下caffe里面自带的cifar_quick_solver.prototxt的写法,自己加了输入输出,然后后面改了一改.因为服务器现在没法可视化,我只能从ssh上截图发出来了.

2018-09-18 17-35-51屏幕截图.png

2018-09-18 17-35-59屏幕截图.png
这样和上面两张图看的一样,这里把该导入的数据啊导入进去,然后因为这个网络是92的,中间有好多时候维度是不匹配的,很头疼最后应该怎么改,记得中间是改了不少64的num_output,这就是卷积核的个数我查了一下,影响的是复杂度.小的卷积核多个有时候会比一个打的卷积要简单,所以这个实际上和6464说不定没什么区别.但是复杂度低一些,因为这里设置的时候是22的.

2018-09-18 17-40-53屏幕截图.png
这个是最后加的loss和error,因为开始老是发现这个accuray很低,我在想如果真的是改的有问题我也认了,因为这个人论文中讲得他是最后测得error和loss,我想是不是按他源网络来就可以呢.于是最后把accuracy顺便改成了error,这样看来的话和原文的差别倒不是很大了.
中间还有一个大问题是会出现weight以及各种参数存不进去的问题,对于这一点我其实在上周无就饱受困惑,最后发现是作者给的源码问题,他各种起的layer的name都带"/"这个符号,从github上查到是说这个符号不能出现在命名中,于是我全改成了下划线,终于可以存进去了,万岁!这个链接就是他原工程的代码,\url{https://github.com/fwang91/residual-attention-network},讲道理他这个真的能跑的同吗?如果这个layer有问题你别管是哪个数据上跑都不可能把参数存到模型里啊.

Residual-attention-network的改写测试c

相关文章

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读