Net 完整的CNN模型
- 模型描述文件(prototxt)
- 模型权重(caffemodel)
类对象 | 含义 |
---|---|
vector<shared_ptr<Layer<Dtype> > > layers_ | 记录出现的每个layer |
vector<string> layer_names_ | 记录出现的每个layer的名称 |
map<string, int> layer_names_index_ | 记录每个layer名称与顺序索引的对应关系 |
vector<bool> layer_need_backward_ | 记录每个layer是否需要反向传播 |
vector<shared_ptr<Blob<Dtype> > > blobs_ | 记录所有Blob |
vector<string> blob_names_ | 记录所有Blob的名称 |
map<string, int> blob_names_index_ | 记录每个Blob名称与顺序索引的对应关系 |
vector<bool> blob_need_backward_ | 记录每个Blob是否需要反向传播 |
vector<vector<Blob<Dtype>*> > bottom_vecs_ | blobs_的影子,记录每个layer的输入Blob |
vector<vector<Blob<Dtype>*> > top_vecs_ | blobs_的影子,记录每个layer的输出Blob |
vector<int> net_input_blob_indices_ | 输入Blob在blobs_中的索引 |
vector<int> net_output_blob_indices_ | 输出Blob在blobs_中的索引 |
vector<Blob<Dtype>*> net_input_blobs_ | Net的输入Blob |
vector<Blob<Dtype>*> net_output_blobs_ | Net的输出Blob |
vector<shared_ptr<Blob<Dtype> > > params_ | Net的权值Blob |
vector<Blob<Dtype>*> learnable_params_ | Net的可训练权值Blob |
vector<float> params_lr_ | 每个元素的学习因子 |
Net中的Blob分为两类,一类是以blob开头命名的Layer输入/输出Blob,一类是以param开头命名的权值Blob。权值Blob归属于模型,layer输入/输出Blob归属于数据会随着输入变化。
重要成员
成员函数
explicit Net(const NetParameter& param, const Net* root_net = NULL);
explicit Net(const string& param_file, Phase phase,const Net* root_net = NULL);
/// @brief Initialize a network with a NetParameter.
void Init(const NetParameter& param);
//前向传播的几种形式
Dtype ForwardFromTo(int start, int end);
Dtype ForwardFrom(int start);
Dtype ForwardTo(int end);
void Backward();
void BackwardFromTo(int start, int end);
void BackwardFrom(int start);
void BackwardTo(int end);
void Reshape();
//Updates the network weights based on the diff values computed.
void Update();
1.Net 的构造和初始化
template <typename Dtype>
Net<Dtype>::Net(const NetParameter& param, const Net* root_net)
: root_net_(root_net) {
Init(param);
}
template <typename Dtype>
Net<Dtype>::Net(const string& param_file, Phase phase, const Net* root_net)
: root_net_(root_net) {
NetParameter param;
ReadNetParamsFromTextFileOrDie(param_file, ¶m);
param.mutable_state()->set_phase(phase);
Init(param);
}
两个构造函数都调用了Init()函数
对Init()分析,取重要的步骤:
1.Init之前,先读取prototxt的内容到NetParameter中
NetParameter param;
ReadNetParamsFromTextFileOrDie(param_file, ¶m);
param.mutable_state()->set_phase(phase);
2.对NetParameter进行解析
- 过滤一些参数,只保留当前阶段参数,然后重新写到NetParameter param中,param包含每一层的所有参数
NetParameter filtered_param;
FilterNet(in_param, &filtered_param);
NetParameter param;
InsertSplits(filtered_param, ¶m);
- 根据layer的数量,为每个vector分配空间,以下都是二维vector,只对第0维resize到layer_size,以便于遍历
bottom_vecs_.resize(param.layer_size());
top_vecs_.resize(param.layer_size());
bottom_id_vecs_.resize(param.layer_size());
param_id_vecs_.resize(param.layer_size());
top_id_vecs_.resize(param.layer_size());
bottom_need_backward_.resize(param.layer_size());
- 遍历param做以下操作
- 创建网络中用到的层
layers_.push_back(LayerRegistry<Dtype>::CreateLayer(layer_param));
//blob_name_to_idx 用来记录每一层输入输出的blob的键值
map<string, int> blob_name_to_idx;
//available_blobs 用来记录每一层输入输出的blob的名称
set<string> available_blobs;
- 为该层创建输入输出Blob
for (int bottom_id = 0; bottom_id < layer_param.bottom_size(); ++bottom_id)
{
const int blob_id = AppendBottom(param, layer_id, bottom_id,
&available_blobs, &blob_name_to_idx);
// If a blob needs backward, this layer should provide it.
need_backward |= blob_need_backward_[blob_id];
}
int num_top = layer_param.top_size();
for (int top_id = 0; top_id < num_top; ++top_id)
{
AppendTop(param, layer_id, top_id, &available_blobs, &blob_name_to_idx);
// Collect Input layer tops as Net inputs.
if (layer_param.type() == "Input") {
const int blob_id = blobs_.size() - 1;
net_input_blob_indices_.push_back(blob_id);
net_input_blobs_.push_back(blobs_[blob_id].get());
}
}
详细分析AppendBottom和AppendTop
AppendBottom
template <typename Dtype>
int Net<Dtype>::AppendBottom(const NetParameter& param, const int layer_id,
const int bottom_id, set<string>* available_blobs,
map<string, int>* blob_name_to_idx) {
const LayerParameter& layer_param = param.layer(layer_id);
const string& blob_name = layer_param.bottom(bottom_id);
if (available_blobs->find(blob_name) == available_blobs->end()) {
LOG(FATAL) << "Unknown bottom blob '" << blob_name << "' (layer '"
<< layer_param.name() << "', bottom index " << bottom_id << ")";
}
const int blob_id = (*blob_name_to_idx)[blob_name];
LOG_IF(INFO, Caffe::root_solver())
<< layer_names_[layer_id] << " <- " << blob_name;
bottom_vecs_[layer_id].push_back(blobs_[blob_id].get());
bottom_id_vecs_[layer_id].push_back(blob_id);
available_blobs->erase(blob_name);
bool propagate_down = true;
// Check if the backpropagation on bottom_id should be skipped
if (layer_param.propagate_down_size() > 0)
propagate_down = layer_param.propagate_down(bottom_id);
const bool need_backward = blob_need_backward_[blob_id] &&
propagate_down;
bottom_need_backward_[layer_id].push_back(need_backward);
return blob_id;
}
1.下一层的bottom blob是上一层的top blob。因此寻找登记的blob的索引
if (available_blobs->find(blob_name) == available_blobs->end())//判断是否存在该blob
const int blob_id = (*blob_name_to_idx)[blob_name];//获得索引
2.将bottom blob的指针和索引分别放到相应的vector中
bottom_vecs_[layer_id].push_back(blobs_[blob_id].get());
bottom_id_vecs_[layer_id].push_back(blob_id);
available_blobs->erase(blob_name);
.
AppendTop
// Helper for Net::Init: add a new top blob to the net.
template <typename Dtype>
void Net<Dtype>::AppendTop(const NetParameter& param, const int layer_id,
const int top_id, set<string>* available_blobs,
map<string, int>* blob_name_to_idx) {
shared_ptr<LayerParameter> layer_param(
new LayerParameter(param.layer(layer_id)));
const string& blob_name = (layer_param->top_size() > top_id) ?
layer_param->top(top_id) : "(automatic)";
// Check if we are doing in-place computation
if (blob_name_to_idx && layer_param->bottom_size() > top_id &&
blob_name == layer_param->bottom(top_id)) {
// In-place computation
LOG_IF(INFO, Caffe::root_solver())
<< layer_param->name() << " -> " << blob_name << " (in-place)";
top_vecs_[layer_id].push_back(blobs_[(*blob_name_to_idx)[blob_name]].get());
top_id_vecs_[layer_id].push_back((*blob_name_to_idx)[blob_name]);
} else if (blob_name_to_idx &&
blob_name_to_idx->find(blob_name) != blob_name_to_idx->end()) {
// If we are not doing in-place computation but have duplicated blobs,
// raise an error.
LOG(FATAL) << "Top blob '" << blob_name
<< "' produced by multiple sources.";
} else {
// Normal output.
if (Caffe::root_solver()) {
LOG(INFO) << layer_param->name() << " -> " << blob_name;
}
shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>());
const int blob_id = blobs_.size();
blobs_.push_back(blob_pointer);
blob_names_.push_back(blob_name);
blob_need_backward_.push_back(false);
if (blob_name_to_idx) { (*blob_name_to_idx)[blob_name] = blob_id; }
top_id_vecs_[layer_id].push_back(blob_id);
top_vecs_[layer_id].push_back(blob_pointer.get());
}
if (available_blobs) { available_blobs->insert(blob_name); }
}
1.判断是否为原位计算
if (blob_name_to_idx && layer_param->bottom_size() > top_id &&
blob_name == layer_param->bottom(top_id))
2.是原位计算则把指针和索引放到vector中
top_vecs_[layer_id].push_back(blobs_[(*blob_name_to_idx)[blob_name]].get());
top_id_vecs_[layer_id].push_back((*blob_name_to_idx)[blob_name]);
3.如果不是原位计算但名字重复则报错
4.正常输出时,创建新的输入输出blob,并将指针和索引放到相应的vector中
shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>());
const int blob_id = blobs_.size();
blobs_.push_back(blob_pointer);
blob_names_.push_back(blob_name);
blob_need_backward_.push_back(false);
if (blob_name_to_idx) { (*blob_name_to_idx)[blob_name] = blob_id; }
top_id_vecs_[layer_id].push_back(blob_id);
top_vecs_[layer_id].push_back(blob_pointer.get());
5.插入已登记的blob到available_blobs中
if (available_blobs) { available_blobs->insert(blob_name); }
- 设置每个layer
layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]);
void SetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
InitMutex();
CheckBlobCounts(bottom, top);
LayerSetUp(bottom, top);
Reshape(bottom, top);
SetLossWeights(top);
}
这里是Setup函数,首先check 这个bottom和top的blob是否正确,再调用Layersetup对每一具体的层做进一步设置,之后再做reshape来设置top blobs和internal buffer。最后再设置loss weight multiplier 的blob对每一个非零的loss和weight,一般这个方法被继承之后是不会被重写的。
LayerSetUp(bottom, top);
Reshape(bottom, top);
SetLossWeights(top);
这三个函数是通过继承来具体实现的。我们以conv层来分析
template <typename Dtype>
void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// Configure the kernel size, padding, stride, and inputs.
ConvolutionParameter conv_param = this->layer_param_.convolution_param();//读入参数
force_nd_im2col_ = conv_param.force_nd_im2col();//读入标志进行强制n维卷积的参数
/*channel_axis_这个参数读取参数定义中的axis参数,默认为1,表示按channel求和,输入blob为(N,C,W,H)时,
一个输出通道对应的所有卷积核对输入blob上各通道做二维卷积,最后将输入各通道卷积的结果加起来,作为
一张输出的特征子图*/
channel_axis_ = bottom[0]->CanonicalAxisIndex(conv_param.axis());
const int first_spatial_axis = channel_axis_ + 1;//指示卷积输入图像的第一个轴,往往是H(height)
const int num_axes = bottom[0]->num_axes();//得到bottom blob的维度
num_spatial_axes_ = num_axes - first_spatial_axis;//卷积处理的维度数
CHECK_GE(num_spatial_axes_, 0);//卷积处理的维度数必须大于0
vector<int> bottom_dim_blob_shape(1, num_spatial_axes_ + 1);//用于初始化卷积操作输入数据的形状,一般三维(C,H,W)
vector<int> spatial_dim_blob_shape(1, std::max(num_spatial_axes_, 1));//用于初始化卷积核的形状
// Setup filter kernel dimensions (kernel_shape_).
kernel_shape_.Reshape(spatial_dim_blob_shape);//初始化卷积核的形状(高*宽)
int* kernel_shape_data = kernel_shape_.mutable_cpu_data();//得到记录卷积核形状数据地址
/*检查参数中有没有自定义二维卷积的卷积核长宽,如果有定义则分别赋值,且自定义了二维卷积核
长宽的话,kernal_size参数将不能被定义,否则非法。若参数中没有定义二维卷积核的长宽,那么根据
kernal_size参数给卷积核赋值,卷积核一般是正方形*/
if (conv_param.has_kernel_h() || conv_param.has_kernel_w()) {
CHECK_EQ(num_spatial_axes_, 2)
<< "kernel_h & kernel_w can only be used for 2D convolution.";
CHECK_EQ(0, conv_param.kernel_size_size())
<< "Either kernel_size or kernel_h/w should be specified; not both.";
kernel_shape_data[0] = conv_param.kernel_h();
kernel_shape_data[1] = conv_param.kernel_w();
} else {
const int num_kernel_dims = conv_param.kernel_size_size();
CHECK(num_kernel_dims == 1 || num_kernel_dims == num_spatial_axes_)
<< "kernel_size must be specified once, or once per spatial dimension "
<< "(kernel_size specified " << num_kernel_dims << " times; "
<< num_spatial_axes_ << " spatial dims).";
for (int i = 0; i < num_spatial_axes_; ++i) {
kernel_shape_data[i] =
conv_param.kernel_size((num_kernel_dims == 1) ? 0 : i);
}
}
//检查卷积核参数(高宽)是否合法
for (int i = 0; i < num_spatial_axes_; ++i) {
CHECK_GT(kernel_shape_data[i], 0) << "Filter dimensions must be nonzero.";
}
// Setup stride dimensions (stride_).
stride_.Reshape(spatial_dim_blob_shape);//初始化步长,注意,卷积核处理二维图像的话,步长也是二维的
int* stride_data = stride_.mutable_cpu_data();//得到卷积核步长参数的地址
/*检查参数中有没有自定义二维卷积时高和宽方向的步长,如果定义了则赋值。如果没有定义的话,就按照我们
定义的网络参数文件中的卷积层的stride参数赋值,stride参数要是缺失的话步长默认为kDefaultStride,即为1,
我们往往只定义了一个步长值,代表高和宽方向的步长一致。*/
if (conv_param.has_stride_h() || conv_param.has_stride_w()) {
CHECK_EQ(num_spatial_axes_, 2)
<< "stride_h & stride_w can only be used for 2D convolution.";
CHECK_EQ(0, conv_param.stride_size())
<< "Either stride or stride_h/w should be specified; not both.";
stride_data[0] = conv_param.stride_h();
stride_data[1] = conv_param.stride_w();
} else {
const int num_stride_dims = conv_param.stride_size();
CHECK(num_stride_dims == 0 || num_stride_dims == 1 ||
num_stride_dims == num_spatial_axes_)
<< "stride must be specified once, or once per spatial dimension "
<< "(stride specified " << num_stride_dims << " times; "
<< num_spatial_axes_ << " spatial dims).";
const int kDefaultStride = 1;
for (int i = 0; i < num_spatial_axes_; ++i) {
stride_data[i] = (num_stride_dims == 0) ? kDefaultStride :
conv_param.stride((num_stride_dims == 1) ? 0 : i);
CHECK_GT(stride_data[i], 0) << "Stride dimensions must be nonzero.";
}
}
// Setup pad dimensions (pad_).
/*检查参数中有没有自定义高和宽方向的pad,如果定义了则赋值。如果没有定义的话,就按照我们
定义的网络参数文件中的卷积层的pad参数赋值,pad参数要是缺失的话默认为kDefaultPad,即为0,
我们往往只定义了一个pad值,代表高和宽方向的pad一致。*/
pad_.Reshape(spatial_dim_blob_shape);
int* pad_data = pad_.mutable_cpu_data();
if (conv_param.has_pad_h() || conv_param.has_pad_w()) {
CHECK_EQ(num_spatial_axes_, 2)
<< "pad_h & pad_w can only be used for 2D convolution.";
CHECK_EQ(0, conv_param.pad_size())
<< "Either pad or pad_h/w should be specified; not both.";
pad_data[0] = conv_param.pad_h();
pad_data[1] = conv_param.pad_w();
} else {
const int num_pad_dims = conv_param.pad_size();
CHECK(num_pad_dims == 0 || num_pad_dims == 1 ||
num_pad_dims == num_spatial_axes_)
<< "pad must be specified once, or once per spatial dimension "
<< "(pad specified " << num_pad_dims << " times; "
<< num_spatial_axes_ << " spatial dims).";
const int kDefaultPad = 0;
for (int i = 0; i < num_spatial_axes_; ++i) {
pad_data[i] = (num_pad_dims == 0) ? kDefaultPad :
conv_param.pad((num_pad_dims == 1) ? 0 : i);
}
}
/*检查参数中有没有自定义高和宽方向的卷积核扩展,如果定义了则赋值。如果没有定义的话,就按照我们
定义的网络参数文件中的卷积层的dilation参数赋值,dilation_参数要是缺失的话默认为kDefaultDilation,
即为1,表示卷积核不进行扩展。*/
// Setup dilation dimensions (dilation_).
dilation_.Reshape(spatial_dim_blob_shape);
int* dilation_data = dilation_.mutable_cpu_data();
const int num_dilation_dims = conv_param.dilation_size();
CHECK(num_dilation_dims == 0 || num_dilation_dims == 1 ||
num_dilation_dims == num_spatial_axes_)
<< "dilation must be specified once, or once per spatial dimension "
<< "(dilation specified " << num_dilation_dims << " times; "
<< num_spatial_axes_ << " spatial dims).";
const int kDefaultDilation = 1;
for (int i = 0; i < num_spatial_axes_; ++i) {
dilation_data[i] = (num_dilation_dims == 0) ? kDefaultDilation :
conv_param.dilation((num_dilation_dims == 1) ? 0 : i);
}
// Special case: im2col is the identity for 1x1 convolution with stride 1
// and no padding, so flag for skipping the buffer and transformation.
//判断是不是1*1卷积
is_1x1_ = true;
for (int i = 0; i < num_spatial_axes_; ++i) {
is_1x1_ &=
kernel_shape_data[i] == 1 && stride_data[i] == 1 && pad_data[i] == 0;
if (!is_1x1_) { break; }
}
// Configure output channels and groups.
channels_ = bottom[0]->shape(channel_axis_);//获取卷积层输入的单blob的通道数
num_output_ = this->layer_param_.convolution_param().num_output();//获取卷积层输出的通道数
CHECK_GT(num_output_, 0);//核验输出通道数是否大于零
group_ = this->layer_param_.convolution_param().group();//获取卷积组大小
CHECK_EQ(channels_ % group_, 0);//核验输入的单blob通道数是否能被卷积组数整除
CHECK_EQ(num_output_ % group_, 0)//核验输出通道数是否能被卷积组数整除
<< "Number of output should be multiples of group.";
if (reverse_dimensions()) {//若需要反转卷积操作,则交换输入输出,否则不交换
conv_out_channels_ = channels_;
conv_in_channels_ = num_output_;
} else {
conv_out_channels_ = num_output_;
conv_in_channels_ = channels_;
}
// Handle the parameters: weights and biases.
// - blobs_[0] holds the filter weights
// - blobs_[1] holds the biases (optional)
vector<int> weight_shape(2);//定义卷积层参数规格
weight_shape[0] = conv_out_channels_;//权重参数shape的第一个数为输出通道大小,即每个输出通道对应各自的卷积核,理解为num
weight_shape[1] = conv_in_channels_ / group_;//权重参数shape的第二个数为输入通道大小除以卷积组数,理解为channel
for (int i = 0; i < num_spatial_axes_; ++i) {
weight_shape.push_back(kernel_shape_data[i]);//权重参数shape的第三个和第四个数为卷积核维度大小
}
bias_term_ = this->layer_param_.convolution_param().bias_term();//获取是否使用偏置的参数
vector<int> bias_shape(bias_term_, num_output_);//定义偏置参数规格,若bias_term_为true(1),那么bias_shape[0]=num_output_
if (this->blobs_.size() > 0) {
CHECK_EQ(1 + bias_term_, this->blobs_.size())//核验blobs_是否合法
<< "Incorrect number of weight blobs.";
if (weight_shape != this->blobs_[0]->shape()) {//若weight_shape不为bobs_[0]的shape,则输出异常
Blob<Dtype> weight_shaped_blob(weight_shape);
LOG(FATAL) << "Incorrect weight shape: expected shape "
<< weight_shaped_blob.shape_string() << "; instead, shape was "
<< this->blobs_[0]->shape_string();
}
if (bias_term_ && bias_shape != this->blobs_[1]->shape()) {//若bias_shape不为bobs_[1]的shape,则输出异常
Blob<Dtype> bias_shaped_blob(bias_shape);
LOG(FATAL) << "Incorrect bias shape: expected shape "
<< bias_shaped_blob.shape_string() << "; instead, shape was "
<< this->blobs_[1]->shape_string();
}
LOG(INFO) << "Skipping parameter initialization";
} else {//若blobs_.size() = 0,那么根据bias_term_的真伪进行blobs_的大小初始化
if (bias_term_) {
this->blobs_.resize(2);
} else {
this->blobs_.resize(1);
}
// Initialize and fill the weights:
// output channels x input channels per-group x kernel height x kernel width
this->blobs_[0].reset(new Blob<Dtype>(weight_shape));//将blobs_[0]大小初始化为weight_shape
shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(
this->layer_param_.convolution_param().weight_filler()));//读取我们定义层的参数中的权重填充,默认为0
weight_filler->Fill(this->blobs_[0].get());//进行权重填充
// If necessary, initialize and fill the biases.
if (bias_term_) {
this->blobs_[1].reset(new Blob<Dtype>(bias_shape));//若启用了偏置,则读取我们定义层的参数中的偏置填充,默认为0
shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(
this->layer_param_.convolution_param().bias_filler()));
bias_filler->Fill(this->blobs_[1].get());//进行偏置的填充
}
}
kernel_dim_ = this->blobs_[0]->count(1);//获取一个输出通道对应的所有卷积核对输入的一个卷积组所有通道操作一次处理数据量大小,为(输入总通道数/卷积组数)*卷积核高*卷积核宽
weight_offset_ = conv_out_channels_ * kernel_dim_ / group_;//获取权重的偏移量,理解为(conv_out_channels_/group_)* kernel_dim_
// Propagate gradients to the parameters (as directed by backward pass).
this->param_propagate_down_.resize(this->blobs_.size(), true);//初始化对权重和偏置(可选)梯度反传的开关
}
Blob<int> kernel_shape_用来保存核的形状 kernel_shape_data[0]为H,kernel_shape_data[1]为W
stride_、pad_等同理
weight_shape存储权值点形状:conv_out_channels_*conv_in_channels_ k_hk_w,group_为分组卷积的参数,默认为1
@Handle the parameters: weights and biases.
@- blobs_[0] holds the filter weights
@- blobs_[1] holds the biases (optional)
vector<int> weight_shape(2);
weight_shape[0] = conv_out_channels_;
weight_shape[1] = conv_in_channels_ / group_;
for (int i = 0; i < num_spatial_axes_; ++i) {
weight_shape.push_back(kernel_shape_data[i]);
}
若存在权重,则严格检查权重的维度,blobs_[0]为权重存储的地方;若不存在权重则申请内存并按一定规则初始化权重
if (this->blobs_.size() > 0) {
CHECK_EQ(1 + bias_term_, this->blobs_.size())
<< "Incorrect number of weight blobs.";
if (weight_shape != this->blobs_[0]->shape()) {
Blob<Dtype> weight_shaped_blob(weight_shape);
LOG(FATAL) << "Incorrect weight shape: expected shape "
<< weight_shaped_blob.shape_string() << "; instead, shape was "
<< this->blobs_[0]->shape_string();
}
if (bias_term_ && bias_shape != this->blobs_[1]->shape()) {
Blob<Dtype> bias_shaped_blob(bias_shape);
LOG(FATAL) << "Incorrect bias shape: expected shape "
<< bias_shaped_blob.shape_string() << "; instead, shape was "
<< this->blobs_[1]->shape_string();
}
LOG(INFO) << "Skipping parameter initialization";
} else {
if (bias_term_) {
this->blobs_.resize(2);
} else {
this->blobs_.resize(1);
}
// Initialize and fill the weights:
// output channels x input channels per-group x kernel height x kernel width
this->blobs_[0].reset(new Blob<Dtype>(weight_shape));
shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(
this->layer_param_.convolution_param().weight_filler()));
weight_filler->Fill(this->blobs_[0].get());
// If necessary, initialize and fill the biases.
if (bias_term_) {
this->blobs_[1].reset(new Blob<Dtype>(bias_shape));
shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(
this->layer_param_.convolution_param().bias_filler()));
bias_filler->Fill(this->blobs_[1].get());
}
}
kernel_dim_ = this->blobs_[0]->count(1);
weight_offset_ = conv_out_channels_ * kernel_dim_ / group_;
可见,卷积核的维度为conv_in_channels_ k_hk_w,weight_offset_ 偏移为一组卷积
template <typename Dtype>
void BaseConvolutionLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int first_spatial_axis = channel_axis_ + 1;//找到卷积操作处理的第一维的索引,通常为height
/*核验输入blob的维度是否等于卷积操作处理的第一维的索引加上卷积操作需要处理的维度数*/
CHECK_EQ(bottom[0]->num_axes(), first_spatial_axis + num_spatial_axes_)
<< "bottom num_axes may not change.";
num_ = bottom[0]->count(0, channel_axis_);//获取卷积层操作输入的图片数目
CHECK_EQ(bottom[0]->shape(channel_axis_), channels_)//检查输入的通道数是否合法
<< "Input size incompatible with convolution kernel.";
// TODO: generalize to handle inputs of different shapes.
for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) {
CHECK(bottom[0]->shape() == bottom[bottom_id]->shape())//如果输入多个blob的话,检查所有blob是否具有相同的shape
<< "All inputs must have the same shape.";
}
// Shape the tops.
bottom_shape_ = &bottom[0]->shape();//获取卷积层输入的blob的形状
compute_output_shape();//获取卷积层输出的blob的形状
vector<int> top_shape(bottom[0]->shape().begin(),//初始化top_shape第一个元素为输入单位blob的num
bottom[0]->shape().begin() + channel_axis_);
top_shape.push_back(num_output_);//top_shape加入输出的通道数
for (int i = 0; i < num_spatial_axes_; ++i) {
top_shape.push_back(output_shape_[i]);//top_shape加入卷积处理的维度
}
for (int top_id = 0; top_id < top.size(); ++top_id) {
top[top_id]->Reshape(top_shape);//将top的每个blob进行初始化
}
if (reverse_dimensions()) {
/*如果要反转卷积操作,conv_out_spatial_dim_初始化为卷积层输出单位blob(bottom[0])的单通道的数据量*/
conv_out_spatial_dim_ = bottom[0]->count(first_spatial_axis);
} else {
/*否则,conv_out_spatial_dim_初始化为卷积层输出单位blob(top[0])的单通道的数据量*/
conv_out_spatial_dim_ = top[0]->count(first_spatial_axis);
}
col_offset_ = kernel_dim_ * conv_out_spatial_dim_;//col_offset表征了一个输出通道对应的所有卷积核处理的一个卷积组的所有数据量
output_offset_ = conv_out_channels_ * conv_out_spatial_dim_ / group_;//output_offset_表征了一个卷积组输出的所有数据量
// Setup input dimensions (conv_input_shape_).
vector<int> bottom_dim_blob_shape(1, num_spatial_axes_ + 1);//用于初始化卷积操作输入数据的形状,一般三维(C,H,W)
conv_input_shape_.Reshape(bottom_dim_blob_shape);//初始化卷积层输入shape,一般大小为3
int* conv_input_shape_data = conv_input_shape_.mutable_cpu_data();
for (int i = 0; i < num_spatial_axes_ + 1; ++i) {//初始化卷积层的输入参数,一般顺序为channel->height->width
if (reverse_dimensions()) {
conv_input_shape_data[i] = top[0]->shape(channel_axis_ + i);
} else {
conv_input_shape_data[i] = bottom[0]->shape(channel_axis_ + i);
}
}
// The im2col result buffer will only hold one image at a time to avoid
// overly large memory usage. In the special case of 1x1 convolution
// it goes lazily unused to save memory.
col_buffer_shape_.clear();
col_buffer_shape_.push_back(kernel_dim_ * group_);//col_buffer_shape_加入(输入总通道数*卷积核高*卷积核宽)
for (int i = 0; i < num_spatial_axes_; ++i) {//col_buffer_shape_加入卷积层输出单通道的维度
if (reverse_dimensions()) {
col_buffer_shape_.push_back(input_shape(i + 1));
} else {
col_buffer_shape_.push_back(output_shape_[i]);
}
}
col_buffer_.Reshape(col_buffer_shape_);//初始化col_buffer
bottom_dim_ = bottom[0]->count(channel_axis_);//bottom_dim_描述的是bottom blob的一个channel包含的数据量
top_dim_ = top[0]->count(channel_axis_);//top_dim_描述的是top blob的一个channel包含的数据量
num_kernels_im2col_ = conv_in_channels_ * conv_out_spatial_dim_;//描述了一个输出通道对应的所有卷积核对全部输入做卷积操作时转换生成的列向量的数量
num_kernels_col2im_ = reverse_dimensions() ? top_dim_ : bottom_dim_;//描述了将生成的列向量还原卷积操作的区域图的数量
// Set up the all ones "bias multiplier" for adding biases by BLAS
out_spatial_dim_ = top[0]->count(first_spatial_axis);//描述了输出的单通道数据量
if (bias_term_) {//若启用了偏置,那么初始化偏置乘数blob
//偏置乘数的大小为输出的单通道数据量,因为对于每个输出数据乘数不一样
vector<int> bias_multiplier_shape(1, out_spatial_dim_);
bias_multiplier_.Reshape(bias_multiplier_shape);
caffe_set(bias_multiplier_.count(), Dtype(1),//先将这些乘数置为1
bias_multiplier_.mutable_cpu_data());
}
}
1.compute_output_shape();计算输出的shape
output_dim = (input_dim + 2 * pad_data[i] - kernel_extent) / stride_data[i] + 1;
2.reshape top vector
vector<int> top_shape(bottom[0]->shape().begin(),
bottom[0]->shape().begin() + channel_axis_);
top_shape.push_back(num_output_);
for (int i = 0; i < num_spatial_axes_; ++i) {
top_shape.push_back(output_shape_[i]);
}
for (int top_id = 0; top_id < top.size(); ++top_id) {
top[top_id]->Reshape(top_shape);
}
top_shape:bottom_num、num_output_、output_shape_h、output_shape_w
3.设置卷积输入的形状
// Setup input dimensions (conv_input_shape_).
vector<int> bottom_dim_blob_shape(1, num_spatial_axes_ + 1);
conv_input_shape_.Reshape(bottom_dim_blob_shape);
int* conv_input_shape_data = conv_input_shape_.mutable_cpu_data();
for (int i = 0; i < num_spatial_axes_ + 1; ++i) {
if (reverse_dimensions()) {
conv_input_shape_data[i] = top[0]->shape(channel_axis_ + i);
} else {
conv_input_shape_data[i] = bottom[0]->shape(channel_axis_ + i);
}
}
conv_input_shape_ 为卷积输入的变量,一般为c * h * w
4.im2col缓存区的设置
// The im2col result buffer will only hold one image at a time to avoid
// overly large memory usage. In the special case of 1x1 convolution
// it goes lazily unused to save memory.
col_buffer_shape_.clear();
col_buffer_shape_.push_back(kernel_dim_ * group_);
for (int i = 0; i < num_spatial_axes_; ++i) {
if (reverse_dimensions()) {
col_buffer_shape_.push_back(input_shape(i + 1));
} else {
col_buffer_shape_.push_back(output_shape_[i]);
}
}
col_buffer_.Reshape(col_buffer_shape_);
bottom_dim_ = bottom[0]->count(channel_axis_);
top_dim_ = top[0]->count(channel_axis_);
num_kernels_im2col_ = conv_in_channels_ * conv_out_spatial_dim_;
num_kernels_col2im_ = reverse_dimensions() ? top_dim_ : bottom_dim_;
im2COL结果缓冲区每次只能保存一个图像以避免内存占用过大。在1x1卷积的特殊情况下,它懒洋洋地被用来保存内存。
5.为BLAS建立偏序乘法的所有“偏置乘数”
if (bias_term_) {//若启用了偏置,那么初始化偏置乘数blob
//偏置乘数的大小为输出的单通道数据量,因为对于每个输出数据乘数不一样
vector<int> bias_multiplier_shape(1, out_spatial_dim_);
bias_multiplier_.Reshape(bias_multiplier_shape);
caffe_set(bias_multiplier_.count(), Dtype(1),//先将这些乘数置为1
bias_multiplier_.mutable_cpu_data());
- 为该层设置可学习参数
for (int param_id = 0; param_id < num_param_blobs; ++param_id) {
AppendParam(param, layer_id, param_id);
}
网友评论