上个星期开始吧一直在改这一块的代码,数据集挺简单的,caffe都给处理好了.有个问题就是这个大哥文章中说好的他们已经测试过cifar10和cifar100了,但是最后给模型给了个imagenet的,妈耶,虽然确实是你改的代码,但是你开头提供的不给反而给个不容易复现的是几个意思嘛,真让人头大.最后也不知道改的对不对不过也是第一次搭建网络算是能出现输入和输出了,这里记一下中间过程踩过的坑吧.
首先就是这个网络:
name: "Attention-92"
input: "data"
input_dim: 1
input_dim: 3
input_dim: 224
input_dim: 224
layer{
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
convolution_param {
num_output: 64
pad: 3
kernel_size: 7
stride: 2
bias_term: false
}
}
layer{
name: "conv1/bn"
type: "BN"
bottom: "conv1"
top: "conv1/bn"
bn_param {
frozen: true
}
}
layer{
name: "conv1/bn/relu"
type: "ReLU"
bottom: "conv1/bn"
top: "conv1/bn"
}
layer{
name: "pool1_3x3_s2"
type: "Pooling"
bottom: "conv1/bn"
top: "pool1_3x3_s2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer{
name: "pool1_3x3_s2/bn"
type: "BN"
bottom: "pool1_3x3_s2"
top: "pool1_3x3_s2/bn"
bn_param {
frozen: true
}
}
layer{
name: "pool1_3x3_s2/bn/relu"
type: "ReLU"
bottom: "pool1_3x3_s2/bn"
top: "pool1_3x3_s2/bn"
}
layer{
name: "pre_res_1/branch1/conv1_1x1"
type: "Convolution"
bottom: "pool1_3x3_s2/bn"
top: "pre_res_1/branch1/conv1_1x1"
convolution_param {
num_output: 64
pad: 0
kernel_size: 1
stride: 1
bias_term: false
}
}
layer{
name: "pre_res_1/branch1/conv1_1x1/bn"
type: "BN"
bottom: "pre_res_1/branch1/conv1_1x1"
top: "pre_res_1/branch1/conv1_1x1/bn"
bn_param {
frozen: true
}
}
layer{
name: "pre_res_1/branch1/conv1_1x1/bn/relu"
type: "ReLU"
bottom: "pre_res_1/branch1/conv1_1x1/bn"
top: "pre_res_1/branch1/conv1_1x1/bn"
}
layer{
name: "pre_res_1/branch1/conv2_3x3"
type: "Convolution"
bottom: "pre_res_1/branch1/conv1_1x1/bn"
top: "pre_res_1/branch1/conv2_3x3"
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
stride: 1
bias_term: false
}
}
layer{
name: "pre_res_1/branch1/conv2_3x3/bn"
type: "BN"
bottom: "pre_res_1/branch1/conv2_3x3"
top: "pre_res_1/branch1/conv2_3x3/bn"
bn_param {
frozen: true
}
}
layer{
name: "pre_res_1/branch1/conv2_3x3/bn/relu"
type: "ReLU"
bottom: "pre_res_1/branch1/conv2_3x3/bn"
top: "pre_res_1/branch1/conv2_3x3/bn"
}
layer{
name: "pre_res_1/branch1/conv3_1x1"
type: "Convolution"
bottom: "pre_res_1/branch1/conv2_3x3/bn"
top: "pre_res_1/branch1/conv3_1x1"
convolution_param {
num_output: 256
pad: 0
kernel_size: 1
stride: 1
bias_term: false
}
}
layer{
name: "pre_res_1/branch2/conv1_1x1"
type: "Convolution"
bottom: "pool1_3x3_s2/bn"
top: "pre_res_1/branch2/conv1_1x1"
convolution_param {
num_output: 256
pad: 0
kernel_size: 1
stride: 1
bias_term: false
}
}
layer{
name: "pre_res_1"
type: "Eltwise"
bottom: "pre_res_1/branch2/conv1_1x1"
bottom: "pre_res_1/branch1/conv3_1x1"
top: "pre_res_1"
eltwise_param {
operation: SUM
}
}
layer{
name: "pre_res_1/bn"
type: "BN"
bottom: "pre_res_1"
top: "pre_res_1/bn"
bn_param {
frozen: true
}
}
layer{
name: "pre_res_1/bn/relu"
type: "ReLU"
bottom: "pre_res_1/bn"
top: "pre_res_1/bn"
}
layer{
name: "AttentionA_1/trunk/res1/branch1/conv1_1x1"
type: "Convolution"
bottom: "pre_res_1/bn"
top: "AttentionA_1/trunk/res1/branch1/conv1_1x1"
convolution_param {
num_output: 64
pad: 0
kernel_size: 1
stride: 1
bias_term: false
}
}
layer{
name: "AttentionA_1/trunk/res1/branch1/conv1_1x1/bn"
type: "BN"
bottom: "AttentionA_1/trunk/res1/branch1/conv1_1x1"
top: "AttentionA_1/trunk/res1/branch1/conv1_1x1/bn"
bn_param {
frozen: true
}
}
layer{
name: "AttentionA_1/trunk/res1/branch1/conv1_1x1/bn/relu"
type: "ReLU"
bottom: "AttentionA_1/trunk/res1/branch1/conv1_1x1/bn"
top: "AttentionA_1/trunk/res1/branch1/conv1_1x1/bn"
}
layer{
name: "AttentionA_1/trunk/res1/branch1/conv2_3x3"
type: "Convolution"
bottom: "AttentionA_1/trunk/res1/branch1/conv1_1x1/bn"
top: "AttentionA_1/trunk/res1/branch1/conv2_3x3"
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
stride: 1
bias_term: false
}
}
layer{
name: "AttentionA_1/trunk/res1/branch1/conv2_3x3/bn"
type: "BN"
bottom: "AttentionA_1/trunk/res1/branch1/conv2_3x3"
top: "AttentionA_1/trunk/res1/branch1/conv2_3x3/bn"
bn_param {
frozen: true
}
}
layer{
name: "AttentionA_1/trunk/res1/branch1/conv2_3x3/bn/relu"
type: "ReLU"
bottom: "AttentionA_1/trunk/res1/branch1/conv2_3x3/bn"
top: "AttentionA_1/trunk/res1/branch1/conv2_3x3/bn"
}
layer{
name: "AttentionA_1/trunk/res1/branch1/conv3_1x1"
type: "Convolution"
bottom: "AttentionA_1/trunk/res1/branch1/conv2_3x3/bn"
top: "AttentionA_1/trunk/res1/branch1/conv3_1x1"
convolution_param {
num_output: 256
pad: 0
kernel_size: 1
stride: 1
bias_term: false
}
}
layer{
name: "AttentionA_1/trunk/res1"
type: "Eltwise"
bottom: "AttentionA_1/trunk/res1/branch1/conv3_1x1"
bottom: "pre_res_1"
top: "AttentionA_1/trunk/res1"
eltwise_param {
operation: SUM
}
}
layer{
name: "AttentionA_1/trunk/res1/bn"
type: "BN"
bottom: "AttentionA_1/trunk/res1"
top: "AttentionA_1/trunk/res1/bn"
bn_param {
frozen: true
}
}
layer{
name: "AttentionA_1/trunk/res1/bn/relu"
type: "ReLU"
bottom: "AttentionA_1/trunk/res1/bn"
top: "AttentionA_1/trunk/res1/bn"
}
layer{
name: "AttentionA_1/trunk/res2/branch1/conv1_1x1"
type: "Convolution"
bottom: "AttentionA_1/trunk/res1/bn"
top: "AttentionA_1/trunk/res2/branch1/conv1_1x1"
convolution_param {
num_output: 64
pad: 0
kernel_size: 1
stride: 1
bias_term: false
}
}
layer{
name: "AttentionA_1/trunk/res2/branch1/conv1_1x1/bn"
type: "BN"
bottom: "AttentionA_1/trunk/res2/branch1/conv1_1x1"
top: "AttentionA_1/trunk/res2/branch1/conv1_1x1/bn"
bn_param {
frozen: true
}
}
layer{
name: "AttentionA_1/trunk/res2/branch1/conv1_1x1/bn/relu"
type: "ReLU"
bottom: "AttentionA_1/trunk/res2/branch1/conv1_1x1/bn"
top: "AttentionA_1/trunk/res2/branch1/conv1_1x1/bn"
}
layer{
name: "AttentionA_1/trunk/res2/branch1/conv2_3x3"
type: "Convolution"
bottom: "AttentionA_1/trunk/res2/branch1/conv1_1x1/bn"
top: "AttentionA_1/trunk/res2/branch1/conv2_3x3"
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
stride: 1
bias_term: false
}
}
layer{
name: "AttentionA_1/trunk/res2/branch1/conv2_3x3/bn"
type: "BN"
bottom: "AttentionA_1/trunk/res2/branch1/conv2_3x3"
top: "AttentionA_1/trunk/res2/branch1/conv2_3x3/bn"
bn_param {
frozen: true
}
}
layer{
name: "AttentionA_1/trunk/res2/branch1/conv2_3x3/bn/relu"
type: "ReLU"
bottom: "AttentionA_1/trunk/res2/branch1/conv2_3x3/bn"
top: "AttentionA_1/trunk/res2/branch1/conv2_3x3/bn"
}
layer{
name: "AttentionA_1/trunk/res2/branch1/conv3_1x1"
type: "Convolution"
bottom: "AttentionA_1/trunk/res2/branch1/conv2_3x3/bn"
top: "AttentionA_1/trunk/res2/branch1/conv3_1x1"
convolution_param {
num_output: 256
pad: 0
kernel_size: 1
stride: 1
bias_term: false
}
}
layer{
name: "AttentionA_1/trunk/res2"
type: "Eltwise"
bottom: "AttentionA_1/trunk/res2/branch1/conv3_1x1"
bottom: "AttentionA_1/trunk/res1"
top: "AttentionA_1/trunk/res2"
eltwise_param {
operation: SUM
}
}
layer{
name: "AttentionA_1/trunk/res2/bn"
type: "BN"
bottom: "AttentionA_1/trunk/res2"
top: "AttentionA_1/trunk/res2/bn"
bn_param {
frozen: true
}
}
layer{
name: "AttentionA_1/trunk/res2/bn/relu"
type: "ReLU"
bottom: "AttentionA_1/trunk/res2/bn"
top: "AttentionA_1/trunk/res2/bn"
}
layer{
name: "AttentionA_1/trunk/res3/branch1/conv1_1x1"
type: "Convolution"
bottom: "AttentionA_1/trunk/res2/bn"
top: "AttentionA_1/trunk/res3/branch1/conv1_1x1"
convolution_param {
num_output: 64
pad: 0
kernel_size: 1
stride: 1
bias_term: false
}
}
layer{
name: "AttentionA_1/trunk/res3/branch1/conv1_1x1/bn"
type: "BN"
bottom: "AttentionA_1/trunk/res3/branch1/conv1_1x1"
top: "AttentionA_1/trunk/res3/branch1/conv1_1x1/bn"
bn_param {
frozen: true
}
}
layer{
name: "AttentionA_1/trunk/res3/branch1/conv1_1x1/bn/relu"
type: "ReLU"
bottom: "AttentionA_1/trunk/res3/branch1/conv1_1x1/bn"
top: "AttentionA_1/trunk/res3/branch1/conv1_1x1/bn"
}
layer{
name: "AttentionA_1/trunk/res3/branch1/conv2_3x3"
type: "Convolution"
bottom: "AttentionA_1/trunk/res3/branch1/conv1_1x1/bn"
top: "AttentionA_1/trunk/res3/branch1/conv2_3x3"
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
stride: 1
bias_term: false
}
}
layer{
name: "AttentionA_1/trunk/res3/branch1/conv2_3x3/bn"
type: "BN"
bottom: "AttentionA_1/trunk/res3/branch1/conv2_3x3"
top: "AttentionA_1/trunk/res3/branch1/conv2_3x3/bn"
bn_param {
frozen: true
}
}
layer{
name: "AttentionA_1/trunk/res3/branch1/conv2_3x3/bn/relu"
type: "ReLU"
bottom: "AttentionA_1/trunk/res3/branch1/conv2_3x3/bn"
top: "AttentionA_1/trunk/res3/branch1/conv2_3x3/bn"
}
layer{
name: "AttentionA_1/trunk/res3/branch1/conv3_1x1"
type: "Convolution"
bottom: "AttentionA_1/trunk/res3/branch1/conv2_3x3/bn"
top: "AttentionA_1/trunk/res3/branch1/conv3_1x1"
convolution_param {
num_output: 256
pad: 0
kernel_size: 1
stride: 1
bias_term: false
}
}
layer{
name: "AttentionA_1/trunk/res3"
type: "Eltwise"
bottom: "AttentionA_1/trunk/res3/branch1/conv3_1x1"
bottom: "AttentionA_1/trunk/res2"
top: "AttentionA_1/trunk/res3"
eltwise_param {
operation: SUM
}
}
layer{
name: "AttentionA_1/mask/down_sample/pool1_3x3_s2"
type: "Pooling"
bottom: "AttentionA_1/trunk/res1"
top: "AttentionA_1/mask/down_sample/pool1_3x3_s2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer{
name: "AttentionA_1/mask/down_sample/pool1_3x3_s2/bn"
type: "BN"
bottom: "AttentionA_1/mask/down_sample/pool1_3x3_s2"
top: "AttentionA_1/mask/down_sample/pool1_3x3_s2/bn"
bn_param {
frozen: true
}
}
layer{
name: "AttentionA_1/mask/down_sample/pool1_3x3_s2/bn/relu"
type: "ReLU"
bottom: "AttentionA_1/mask/down_sample/pool1_3x3_s2/bn"
top: "AttentionA_1/mask/down_sample/pool1_3x3_s2/bn"
}
layer{
name: "AttentionA_1/mask/down_sample/res1_1/branch1/conv1_1x1"
type: "Convolution"
bottom: "AttentionA_1/mask/down_sample/pool1_3x3_s2/bn"
top: "AttentionA_1/mask/down_sample/res1_1/branch1/conv1_1x1"
convolution_param {
num_output: 64
pad: 0
kernel_size: 1
stride: 1
bias_term: false
}
}
layer{
name: "AttentionA_1/mask/down_sample/res1_1/branch1/conv1_1x1/bn"
type: "BN"
bottom: "AttentionA_1/mask/down_sample/res1_1/branch1/conv1_1x1"
top: "AttentionA_1/mask/down_sample/res1_1/branch1/conv1_1x1/bn"
bn_param {
frozen: true
}
}
layer{
name: "AttentionA_1/mask/down_sample/res1_1/branch1/conv1_1x1/bn/relu"
type: "ReLU"
bottom: "AttentionA_1/mask/down_sample/res1_1/branch1/conv1_1x1/bn"
top: "AttentionA_1/mask/down_sample/res1_1/branch1/conv1_1x1/bn"
}
layer{
name: "AttentionA_1/mask/down_sample/res1_1/branch1/conv2_3x3"
type: "Convolution"
bottom: "AttentionA_1/mask/down_sample/res1_1/branch1/conv1_1x1/bn"
top: "AttentionA_1/mask/down_sample/res1_1/branch1/conv2_3x3"
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
stride: 1
bias_term: false
}
}
layer{
name: "AttentionA_1/mask/down_sample/res1_1/branch1/conv2_3x3/bn"
type: "BN"
bottom: "AttentionA_1/mask/down_sample/res1_1/branch1/conv2_3x3"
top: "AttentionA_1/mask/down_sample/res1_1/branch1/conv2_3x3/bn"
bn_param {
frozen: true
}
}
layer{
name: "AttentionA_1/mask/down_sample/res1_1/branch1/conv2_3x3/bn/relu"
type: "ReLU"
bottom: "AttentionA_1/mask/down_sample/res1_1/branch1/conv2_3x3/bn"
top: "AttentionA_1/mask/down_sample/res1_1/branch1/conv2_3x3/bn"
}
layer{
name: "AttentionA_1/mask/down_sample/res1_1/branch1/conv3_1x1"
type: "Convolution"
bottom: "AttentionA_1/mask/down_sample/res1_1/branch1/conv2_3x3/bn"
top: "AttentionA_1/mask/down_sample/res1_1/branch1/conv3_1x1"
convolution_param {
num_output: 256
pad: 0
kernel_size: 1
stride: 1
bias_term: false
}
}
layer{
name: "AttentionA_1/mask/down_sample/res1_1"
type: "Eltwise"
bottom: "AttentionA_1/mask/down_sample/res1_1/branch1/conv3_1x1"
bottom: "AttentionA_1/mask/down_sample/pool1_3x3_s2"
top: "AttentionA_1/mask/down_sample/res1_1"
eltwise_param {
operation: SUM
}
}
layer{
name: "AttentionA_1/mask/down_sample/pool2_3x3_s2"
type: "Pooling"
bottom: "AttentionA_1/mask/down_sample/res1_1"
top: "AttentionA_1/mask/down_sample/pool2_3x3_s2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer{
name: "AttentionA_1/mask/down_sample/pool2_3x3_s2/bn"
type: "BN"
bottom: "AttentionA_1/mask/down_sample/pool2_3x3_s2"
top: "AttentionA_1/mask/down_sample/pool2_3x3_s2/bn"
bn_param {
frozen: true
}
}
layer{
name: "AttentionA_1/mask/down_sample/pool2_3x3_s2/bn/relu"
type: "ReLU"
bottom: "AttentionA_1/mask/down_sample/pool2_3x3_s2/bn"
top: "AttentionA_1/mask/down_sample/pool2_3x3_s2/bn"
}
layer{
name: "AttentionA_1/mask/down_sample/res2_1/branch1/conv1_1x1"
type: "Convolution"
bottom: "AttentionA_1/mask/down_sample/pool2_3x3_s2/bn"
top: "AttentionA_1/mask/down_sample/res2_1/branch1/conv1_1x1"
convolution_param {
num_output: 64
pad: 0
kernel_size: 1
stride: 1
bias_term: false
}
}
layer{
name: "AttentionA_1/mask/down_sample/res2_1/branch1/conv1_1x1/bn"
type: "BN"
bottom: "AttentionA_1/mask/down_sample/res2_1/branch1/conv1_1x1"
top: "AttentionA_1/mask/down_sample/res2_1/branch1/conv1_1x1/bn"
bn_param {
frozen: true
}
}
layer{
name: "AttentionA_1/mask/down_sample/res2_1/branch1/conv1_1x1/bn/relu"
type: "ReLU"
bottom: "AttentionA_1/mask/down_sample/res2_1/branch1/conv1_1x1/bn"
top: "AttentionA_1/mask/down_sample/res2_1/branch1/conv1_1x1/bn"
}
layer{
name: "AttentionA_1/mask/down_sample/res2_1/branch1/conv2_3x3"
type: "Convolution"
bottom: "AttentionA_1/mask/down_sample/res2_1/branch1/conv1_1x1/bn"
top: "AttentionA_1/mask/down_sample/res2_1/branch1/conv2_3x3"
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
stride: 1
bias_term: false
}
}
layer{
name: "AttentionA_1/mask/down_sample/res2_1/branch1/conv2_3x3/bn"
type: "BN"
bottom: "AttentionA_1/mask/down_sample/res2_1/branch1/conv2_3x3"
top: "AttentionA_1/mask/down_sample/res2_1/branch1/conv2_3x3/bn"
bn_param {
frozen: true
}
}
layer{
name: "AttentionA_1/mask/down_sample/res2_1/branch1/conv2_3x3/bn/relu"
type: "ReLU"
bottom: "AttentionA_1/mask/down_sample/res2_1/branch1/conv2_3x3/bn"
top: "AttentionA_1/mask/down_sample/res2_1/branch1/conv2_3x3/bn"
}
layer{
name: "AttentionA_1/mask/down_sample/res2_1/branch1/conv3_1x1"
type: "Convolution"
bottom: "AttentionA_1/mask/down_sample/res2_1/branch1/conv2_3x3/bn"
top: "AttentionA_1/mask/down_sample/res2_1/branch1/conv3_1x1"
convolution_param {
num_output: 256
pad: 0
kernel_size: 1
stride: 1
bias_term: false
}
}
layer{
name: "AttentionA_1/mask/down_sample/res2_1"
type: "Eltwise"
bottom: "AttentionA_1/mask/down_sample/res2_1/branch1/conv3_1x1"
bottom: "AttentionA_1/mask/down_sample/pool2_3x3_s2"
top: "AttentionA_1/mask/down_sample/res2_1"
eltwise_param {
operation: SUM
}
}
layer{
name: "AttentionA_1/mask/down_sample/pool3_3x3_s2"
type: "Pooling"
bottom: "AttentionA_1/mask/down_sample/res2_1"
top: "AttentionA_1/mask/down_sample/pool3_3x3_s2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer{
name: "AttentionA_1/mask/down_sample/pool3_3x3_s2/bn"
type: "BN"
bottom: "AttentionA_1/mask/down_sample/pool3_3x3_s2"
top: "AttentionA_1/mask/down_sample/pool3_3x3_s2/bn"
bn_param {
frozen: true
}
}
layer{
name: "AttentionA_1/mask/down_sample/pool3_3x3_s2/bn/relu"
type: "ReLU"
bottom: "AttentionA_1/mask/down_sample/pool3_3x3_s2/bn"
top: "AttentionA_1/mask/down_sample/pool3_3x3_s2/bn"
}
layer{
name: "AttentionA_1/mask/down_sample/res3_1/branch1/conv1_1x1"
type: "Convolution"
bottom: "AttentionA_1/mask/down_sample/pool3_3x3_s2/bn"
top: "AttentionA_1/mask/down_sample/res3_1/branch1/conv1_1x1"
convolution_param {
num_output: 64
pad: 0
kernel_size: 1
stride: 1
bias_term: false
}
}
layer{
name: "AttentionA_1/mask/down_sample/res3_1/branch1/conv1_1x1/bn"
type: "BN"
bottom: "AttentionA_1/mask/down_sample/res3_1/branch1/conv1_1x1"
top: "AttentionA_1/mask/down_sample/res3_1/branch1/conv1_1x1/bn"
bn_param {
frozen: true
}
}
layer{
name: "AttentionA_1/mask/down_sample/res3_1/branch1/conv1_1x1/bn/relu"
type: "ReLU"
bottom: "AttentionA_1/mask/down_sample/res3_1/branch1/conv1_1x1/bn"
top: "AttentionA_1/mask/down_sample/res3_1/branch1/conv1_1x1/bn"
}
layer{
name: "AttentionA_1/mask/down_sample/res3_1/branch1/conv2_3x3"
type: "Convolution"
bottom: "AttentionA_1/mask/down_sample/res3_1/branch1/conv1_1x1/bn"
top: "AttentionA_1/mask/down_sample/res3_1/branch1/conv2_3x3"
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
stride: 1
bias_term: false
}
}
layer{
name: "AttentionA_1/mask/down_sample/res3_1/branch1/conv2_3x3/bn"
type: "BN"
bottom: "AttentionA_1/mask/down_sample/res3_1/branch1/conv2_3x3"
top: "AttentionA_1/mask/down_sample/res3_1/branch1/conv2_3x3/bn"
bn_param {
frozen: true
}
}
layer{
name: "AttentionA_1/mask/down_sample/res3_1/branch1/conv2_3x3/bn/relu"
type: "ReLU"
bottom: "AttentionA_1/mask/down_sample/res3_1/branch1/conv2_3x3/bn"
top: "AttentionA_1/mask/down_sample/res3_1/branch1/conv2_3x3/bn"
}
layer{
name: "AttentionA_1/mask/down_sample/res3_1/branch1/conv3_1x1"
type: "Convolution"
bottom: "AttentionA_1/mask/down_sample/res3_1/branch1/conv2_3x3/bn"
top: "AttentionA_1/mask/down_sample/res3_1/branch1/conv3_1x1"
convolution_param {
num_output: 256
pad: 0
kernel_size: 1
stride: 1
bias_term: false
}
}
layer{
name: "AttentionA_1/mask/down_sample/res3_1"
type: "Eltwise"
bottom: "AttentionA_1/mask/down_sample/res3_1/branch1/conv3_1x1"
bottom: "AttentionA_1/mask/down_sample/pool3_3x3_s2"
top: "AttentionA_1/mask/down_sample/res3_1"
eltwise_param {
operation: SUM
}
}
layer{
name: "classifier"
type: "InnerProduct"
bottom: "ave_pool"
top: "classifier"
inner_product_param {
num_output: 1000
}
}
layer{
name: "cls"
type: "Softmax"
bottom: "classifier"
top: "cls"
}
这个是作者给的用来测试训练imagenet的,而且木有给输入和输出,这里参考了一下caffe里面自带的cifar_quick_solver.prototxt的写法,自己加了输入输出,然后后面改了一改.因为服务器现在没法可视化,我只能从ssh上截图发出来了.
2018-09-18 17-35-51屏幕截图.png 2018-09-18 17-35-59屏幕截图.png
这样和上面两张图看的一样,这里把该导入的数据啊导入进去,然后因为这个网络是92的,中间有好多时候维度是不匹配的,很头疼最后应该怎么改,记得中间是改了不少64的num_output,这就是卷积核的个数我查了一下,影响的是复杂度.小的卷积核多个有时候会比一个打的卷积要简单,所以这个实际上和6464说不定没什么区别.但是复杂度低一些,因为这里设置的时候是22的.
2018-09-18 17-40-53屏幕截图.png
这个是最后加的loss和error,因为开始老是发现这个accuray很低,我在想如果真的是改的有问题我也认了,因为这个人论文中讲得他是最后测得error和loss,我想是不是按他源网络来就可以呢.于是最后把accuracy顺便改成了error,这样看来的话和原文的差别倒不是很大了.
中间还有一个大问题是会出现weight以及各种参数存不进去的问题,对于这一点我其实在上周无就饱受困惑,最后发现是作者给的源码问题,他各种起的layer的name都带"/"这个符号,从github上查到是说这个符号不能出现在命名中,于是我全改成了下划线,终于可以存进去了,万岁!这个链接就是他原工程的代码,\url{https://github.com/fwang91/residual-attention-network},讲道理他这个真的能跑的同吗?如果这个layer有问题你别管是哪个数据上跑都不可能把参数存到模型里啊.
网友评论