美文网首页OpenPose
OpenPose训练过程解析(4)

OpenPose训练过程解析(4)

作者: LaLa_2539 | 来源:发表于2018-10-09 10:56 被阅读0次

    CPMTransformationParameter参数解析

    layer {
      name: "data"
      type: "CPMData"
      top: "data"
      top: "label"
      data_param {      //caffe.proto  Line:687
        source: "/mnt/sdb/yangbin/COCO_kpt/lmdb"
        batch_size: 10
        backend: LMDB
      }
      cpm_transform_param {
        stride: 8
        max_rotate_degree: 40
        visualize: false
        crop_size_x: 368
        crop_size_y: 368
        scale_prob: 1
        scale_min: 0.5
        scale_max: 1.1
        target_dist: 0.6
        center_perterb_max: 40
        do_clahe: false
        num_parts: 56
        np_in_lmdb: 17
      }
    }
    
    message CPMTransformationParameter {
      // 对数据进行预处理,可以执行简单的缩放或者减去图像均值(注意减去均值操作通常在缩放之前执行)
      optional float scale = 1 [default = 1];
      // 制定是否镜像数据(默认为false)
      optional bool mirror = 2 [default = false];
      // 制定图像裁剪尺寸(默认为0,实际使用中设置crop_size_x和crop_size_y)
      optional uint32 crop_size = 3 [default = 0];
      // 均值文件和均值大小可以同时指定
      optional string mean_file = 4;
      // if specified can be repeated once  (所有的通道都减去均值)
      // or can be repeated the same number of times as channels  (指定通道减去均值)
      repeated float mean_value = 5;
      optional uint32 stride = 6 [default = 4];
      optional float scale_cvg = 7 [default = 0.5];
      optional uint32 max_cvg_len = 8 [default = 50];
      optional uint32 min_cvg_len = 9 [default = 50];
      optional bool opaque_coverage = 10 [default = true];
      optional string coverage = 11 [default = "gridbox_max"];
      optional float flip_prob = 12 [default = 0.5];
      optional float max_rotate_degree = 13 [default = 5.0];
      optional bool visualize = 14 [default = false];
      optional uint32 crop_size_x = 15 [default = 368];
      optional uint32 crop_size_y = 16 [default = 368];
      optional float scale_prob = 17 [default = 0.5];
      optional float scale_min = 18 [default = 0.9];
      optional float scale_max = 19 [default = 1.1];
      optional float bbox_norm_factor = 20 [default = 300];
      optional string img_header = 21 [default = "."];
      // Force the decoded image to have 3 color channels.
      optional bool force_color = 22 [default = false];
      // Force the decoded image to have 1 color channels.
      optional bool force_gray = 23 [default = false];
      optional float target_dist = 24 [default = 1.0];
      optional float center_perterb_max = 25 [default = 10.0];
      optional float sigma = 26 [default = 7.0];
      optional float sigma_center = 27 [default = 21.0];
      optional float clahe_tile_size = 28 [default = 8.0];
      optional float clahe_clip_limit = 29 [default = 4.0];
      optional bool do_clahe = 30 [default = false];
      optional uint32 num_parts = 31 [default = 14];
      optional uint32 num_total_augs = 32 [default = 82];
      optional string aug_way = 33 [default = "rand"];
      optional uint32 gray = 34 [default = 0];
      optional uint32 np_in_lmdb = 35 [default = 16];
      optional bool transform_body_joint = 38 [default = true];
    }
    

    一个Datum有三个维度,channels, height,和width,可以看做是少了num维度的Blob。存放数据的地方有两个:byte_data和float_data,分别存放整数型和浮点型数据。图像数据一般是整形,放在byte_data里,特征向量一般是浮点型,放在float_data里。label存放数据的类别标签,是整数型。encoded标识数据是否需要被解码(里面有可能放的是JPEG或者PNG之类经过编码的数据)。

    message Datum {
      optional int32 channels = 1;    //数据维度信息,channel*height*width
      optional int32 height = 2;
      optional int32 width = 3;
      // the actual image data, in bytes
      optional bytes data = 4;        //图像数据,以字节类型存储
      optional int32 label = 5;
      // Optionally, the datum could also hold float data.
      repeated float float_data = 6;    //可选,图像数据也可以float类型存储
      // If true data contains an encoded image that need to be decoded
      optional bool encoded = 7 [default = false];  //encoded标识数据是否需要被解码(里面有可能放的是JPEG或者PNG之类经过编码的数据)
    }
    

    DataLayerSetUp函数实现层设置

    template <typename Dtype>
    CPMDataLayer<Dtype>::CPMDataLayer(const LayerParameter& param)
      : BasePrefetchingDataLayer<Dtype>(param),
        reader_(param),
        cpm_transform_param_(param.cpm_transform_param()){
    }
    
    template <typename Dtype>
    void CPMDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
          const vector<Blob<Dtype>*>& top) {
      cpm_data_transformer_.reset(
         new CPMDataTransformer<Dtype>(cpm_transform_param_, this->phase_));   // 调用DataLayerSetUp函数的类设置phase为train or test
      cpm_data_transformer_->InitRand();                                       // cpm_data_transformer_初始化
    

    上述两部分代码都位于cpm_data_layer.cpp中,第一部分为CPMData层的构造函数,LayerParameter是包含所有层的类(例如Loss层、ReLU层、Data层……),然后param就是将我们写的网络读入的一个参数,用param.cpm_transform_param来初始化cpm_transform_param_(CPMTransformationParameter   cpm_transform_param_;)参数,这样cpm_transform_param_就包含了CPMData层的所有参数,即如下所示。

    cpm_transform_param {
      stride: 8
      max_rotate_degree: 40
      visualize: false
      crop_size_x: 368
      crop_size_y: 368
      scale_prob: 1
      scale_min: 0.5
      scale_max: 1.1
      target_dist: 0.6
      center_perterb_max: 40
      do_clahe: false
      num_parts: 56
      np_in_lmdb: 17
    }
    
    • 设置crop_size_x和crop_size_y
      // image
      const int crop_size = this->layer_param_.cpm_transform_param().crop_size();
      const int batch_size = this->layer_param_.data_param().batch_size();
      if (crop_size > 0) {              //实际运行中,设置crop_size为默认值0
        // top[0]->Reshape(batch_size, datum.channels(), crop_size, crop_size);
        // for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
        //   this->prefetch_[i].data_.Reshape(batch_size, datum.channels(), crop_size, crop_size);
        // }
        // //this->transformed_data_.Reshape(1, 4, crop_size, crop_size);
        // this->transformed_data_.Reshape(1, 6, crop_size, crop_size);
      } 
      else {
        const int height = this->phase_ != TRAIN ? datum.height() :    //设置crop_size_x=368
          this->layer_param_.cpm_transform_param().crop_size_y();      //设置crop_size_y=368
        const int width = this->phase_ != TRAIN ? datum.width() :
          this->layer_param_.cpm_transform_param().crop_size_x();
        LOG(INFO) << "PREFETCH_COUNT is " << this->PREFETCH_COUNT;
        top[0]->Reshape(batch_size, datum.channels(), height, width);
        for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
          this->prefetch_[i].data_.Reshape(batch_size, datum.channels(), height, width);
        }
        //this->transformed_data_.Reshape(1, 4, height, width);
        this->transformed_data_.Reshape(1, datum.channels(), height, width);
      }
    
    • 设置num_parts stride
      // label
      if (this->output_labels_) {
        const int stride = this->layer_param_.cpm_transform_param().stride();
        const int height = this->phase_ != TRAIN ? datum.height() :
          this->layer_param_.cpm_transform_param().crop_size_y();
        const int width = this->phase_ != TRAIN ? datum.width() :
          this->layer_param_.cpm_transform_param().crop_size_x();
    
        int num_parts = this->layer_param_.cpm_transform_param().num_parts(); //COCO 's num_parts = 56
        top[1]->Reshape(batch_size, 2*(num_parts+1), height/stride, width/stride);
        //训练时产生如下结果:
        //I1008 14:29:50.468617 33177 net.cpp:157] Top shape: 10 6 368 368 (8125440)
        //I1008 14:29:50.468626 33177 net.cpp:157] Top shape: 10 114 46 46 (2412240)
    
        for (int i = 0; i < this->PREFETCH_COUNT; ++i) {      //static const int PREFETCH_COUNT = 3;
          this->prefetch_[i].label_.Reshape(batch_size, 2*(num_parts+1), height/stride, width/stride);  // 10,114,46,46
        }
        this->transformed_label_.Reshape(1, 2*(num_parts+1), height/stride, width/stride);  // 1,114,46,46
      }
    
    • 接下来的load_batch是一个纯虚函数,因此继承BasePrefetchingDataLayer类的子类都需要实现这个函数,用于 取数据,填充数据结构
    virtual void load_batch(Batch<Dtype>* batch) = 0;
    
    • 调用data transformations(mirror, scale, crop……)
        // Apply data transformations (mirror, scale, crop...)
        timer.Start();
        const int offset_data = batch->data_.offset(item_id);
        const int offset_label = batch->label_.offset(item_id);
        this->transformed_data_.set_cpu_data(top_data + offset_data);
        this->transformed_label_.set_cpu_data(top_label + offset_label);
        if (datum.encoded()) {
          this->cpm_data_transformer_->Transform(cv_img, &(this->transformed_data_));  //调用Transform函数
        } else {
          this->cpm_data_transformer_->Transform_nv(datum, 
            &(this->transformed_data_),
            &(this->transformed_label_), cnt); //调用Transform_nv函数
          ++cnt;
        }
    

    接下来通过调用TransformTransform_nv函数来对数据进行处理

    • Transform函数
    template<typename Dtype>
    void CPMDataTransformer<Dtype>::Transform(const cv::Mat& cv_img,
                                           Blob<Dtype>* transformed_blob) {
      const int img_channels = cv_img.channels();
      const int img_height = cv_img.rows;
      const int img_width = cv_img.cols;
    
      const int channels = transformed_blob->channels();
      const int height = transformed_blob->height();
      const int width = transformed_blob->width();
      const int num = transformed_blob->num();
    
      CHECK_EQ(channels, img_channels);
      CHECK_LE(height, img_height);
      CHECK_LE(width, img_width);
      CHECK_GE(num, 1);
    
      CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte";
    
      const int crop_size = param_.crop_size();
      const Dtype scale = param_.scale();
      const bool do_mirror = param_.mirror() && Rand(2);
      const bool has_mean_file = param_.has_mean_file();
      const bool has_mean_values = mean_values_.size() > 0;
    
      CHECK_GT(img_channels, 0);
      CHECK_GE(img_height, crop_size);
      CHECK_GE(img_width, crop_size);
    
      Dtype* mean = NULL;
      if (has_mean_file) {
        CHECK_EQ(img_channels, data_mean_.channels());
        CHECK_EQ(img_height, data_mean_.height());
        CHECK_EQ(img_width, data_mean_.width());
        mean = data_mean_.mutable_cpu_data();
      }
      if (has_mean_values) {
        CHECK(mean_values_.size() == 1 || mean_values_.size() == img_channels) <<
         "Specify either 1 mean_value or as many as channels: " << img_channels;
        if (img_channels > 1 && mean_values_.size() == 1) {
          // Replicate the mean_value for simplicity
          for (int c = 1; c < img_channels; ++c) {
            mean_values_.push_back(mean_values_[0]);
          }
        }
      }
    
      int h_off = 0;
      int w_off = 0;
      cv::Mat cv_cropped_img = cv_img;
      if (crop_size) {
        CHECK_EQ(crop_size, height);
        CHECK_EQ(crop_size, width);
        // We only do random crop when we do training.
        if (phase_ == TRAIN) {
          h_off = Rand(img_height - crop_size + 1);
          w_off = Rand(img_width - crop_size + 1);
        } else {
          h_off = (img_height - crop_size) / 2;
          w_off = (img_width - crop_size) / 2;
        }
        cv::Rect roi(w_off, h_off, crop_size, crop_size);
        cv_cropped_img = cv_img(roi);
      } else {
        CHECK_EQ(img_height, height);
        CHECK_EQ(img_width, width);
      }
    
      CHECK(cv_cropped_img.data);
    
      Dtype* transformed_data = transformed_blob->mutable_cpu_data();
      int top_index;
      for (int h = 0; h < height; ++h) {
        const uchar* ptr = cv_cropped_img.ptr<uchar>(h);
        int img_index = 0;
        for (int w = 0; w < width; ++w) {
          for (int c = 0; c < img_channels; ++c) {
            if (do_mirror) {
              top_index = (c * height + h) * width + (width - 1 - w);    
        //图像存储顺序:C*H*W,因此top_index之前有C个通道,每个通道有H*W个像素,
        //在当前通道top_index之前又有h*width像素,最后还要加上当前行所在的w个像素
            } else {
              top_index = (c * height + h) * width + w;
            }
            // int top_index = (c * height + h) * width + w;
            Dtype pixel = static_cast<Dtype>(ptr[img_index++]);
            if (has_mean_file) {
              int mean_index = (c * img_height + h_off + h) * img_width + w_off + w;
              transformed_data[top_index] =
                (pixel - mean[mean_index]) * scale;
            } else {
              if (has_mean_values) {
                transformed_data[top_index] =
                  (pixel - mean_values_[c]) * scale;      //减去均值操作
              } else {
                transformed_data[top_index] = pixel * scale;
              }
            }
          }
        }
      }
    }
    
    • Transform_nv函数

    load_batch函数中:datum是数据的来源,作为Transform_nv函数中的data,应该是制作好的LMDB数据

    Datum& datum = *(reader_.full().pop("Waiting for data"));
    
    this->cpm_data_transformer_->Transform_nv(datum, 
            &(this->transformed_data_),
            &(this->transformed_label_), cnt);
          ++cnt;
    
    template<typename Dtype> void CPMDataTransformer<Dtype>::Transform_nv(const Datum& datum, Dtype* transformed_data, Dtype* transformed_label, int cnt) {
      
      //TODO: some parameter should be set in prototxt
      int clahe_tileSize = param_.clahe_tile_size();
      int clahe_clipLimit = param_.clahe_clip_limit();
      //float targetDist = 41.0/35.0;
      AugmentSelection as = {
        false,              //bool flip
        0.0,                //float degree
        Size(),             //Size crop
        0,                  //float scale
      };
      MetaData meta;
      
      const string& data = datum.data();        //输入图像数据
      const int datum_channels = datum.channels();
      const int datum_height = datum.height();
      const int datum_width = datum.width();
      // To do: make this a parameter in caffe.proto
      //const int mode = 5; //related to datum.channels();
      const int mode = 5;
    
      /** 位于Transform函数中
      //const int crop_size = param_.crop_size();
      //const Dtype scale = param_.scale();
      //const bool do_mirror = param_.mirror() && Rand(2);
      //const bool has_mean_file = param_.has_mean_file();
      **/
      const bool has_uint8 = data.size() > 0;
      //const bool has_mean_values = mean_values_.size() > 0;
      int crop_x = param_.crop_size_x();
      int crop_y = param_.crop_size_y();
    
      CHECK_GT(datum_channels, 0);
      //CHECK_GE(datum_height, crop_size);
      //CHECK_GE(datum_width, crop_size);
      CPUTimer timer1;
      timer1.Start();
      //before any transformation, get the image from datum
      Mat img = Mat::zeros(datum_height, datum_width, CV_8UC3);
      Mat mask_all, mask_miss;
      if(mode >= 5){
        mask_miss = Mat::ones(datum_height, datum_width, CV_8UC1);
      }
      if(mode == 6){
        mask_all = Mat::zeros(datum_height, datum_width, CV_8UC1);
      }
    
      int offset = img.rows * img.cols;
      int dindex;
      Dtype d_element;
      for (int i = 0; i < img.rows; ++i) {
        for (int j = 0; j < img.cols; ++j) {
          Vec3b& rgb = img.at<Vec3b>(i, j);
          for(int c = 0; c < 3; c++){
            dindex = c*offset + i*img.cols + j;  //C*H*W格式存储
            if (has_uint8)
              d_element = static_cast<Dtype>(static_cast<uint8_t>(data[dindex]));
            else
              d_element = datum.float_data(dindex);
            rgb[c] = d_element;    //img.at<Vec3b>(i, j)的c通道数据(uchar类型)
          }
    
          if(mode >= 5){
            dindex = 4*offset + i*img.cols + j;
            if (has_uint8)
              d_element = static_cast<Dtype>(static_cast<uint8_t>(data[dindex]));
            else
              d_element = datum.float_data(dindex);
            if (round(d_element/255)!=1 && round(d_element/255)!=0){    //主要用来判断d_element是否为整数,若为小数则四舍五入(float/255会得到小数)
              cout << d_element << " " << round(d_element/255) << endl;
            }
            mask_miss.at<uchar>(i, j) = d_element; //round(d_element/255);
          }
    
          if(mode == 6){
            dindex = 5*offset + i*img.cols + j;
            if (has_uint8)
              d_element = static_cast<Dtype>(static_cast<uint8_t>(data[dindex]));
            else
              d_element = datum.float_data(dindex);
            mask_all.at<uchar>(i, j) = d_element;
          }
        }
      }
      VLOG(2) << "  rgb[:] = datum: " << timer1.MicroSeconds()/1000.0 << " ms";
      timer1.Start();
    
      //color, contract
      if(param_.do_clahe())
        clahe(img, clahe_tileSize, clahe_clipLimit);    //直方图均衡化
      if(param_.gray() == 1){
        cv::cvtColor(img, img, CV_BGR2GRAY);
        cv::cvtColor(img, img, CV_GRAY2BGR);
      }
      VLOG(2) << "  color: " << timer1.MicroSeconds()/1000.0 << " ms";
      timer1.Start();
    
      int offset3 = 3 * offset;
      int offset1 = datum_width;
      int stride = param_.stride();    //stride = 8
      ReadMetaData(meta, data, offset3, offset1);
      if(param_.transform_body_joint()) // we expect to transform body joints, and not to transform hand joints
        TransformMetaJoints(meta);
    
      VLOG(2) << "  ReadMeta+MetaJoints: " << timer1.MicroSeconds()/1000.0 << " ms";
      timer1.Start();
      //visualize original
      if(0 && param_.visualize()) 
        visualize(img, meta, as);
    
      //Start transforming
      Mat img_aug = Mat::zeros(crop_y, crop_x, CV_8UC3);
      Mat mask_miss_aug, mask_all_aug ;
      //Mat mask_miss_aug = Mat::zeros(crop_y, crop_x, CV_8UC1);
      //Mat mask_all_aug = Mat::zeros(crop_y, crop_x, CV_8UC1);
      Mat img_temp, img_temp2, img_temp3; //size determined by scale
      VLOG(2) << "   input size (" << img.cols << ", " << img.rows << ")"; 
      // We only do random transform as augmentation when training.
      if (phase_ == TRAIN) {
        as.scale = augmentation_scale(img, img_temp, mask_miss, mask_all, meta, mode);
        //LOG(INFO) << meta.joint_self.joints.size();
        //LOG(INFO) << meta.joint_self.joints[0];
        as.degree = augmentation_rotate(img_temp, img_temp2, mask_miss, mask_all, meta, mode);
        //LOG(INFO) << meta.joint_self.joints.size();
        //LOG(INFO) << meta.joint_self.joints[0];
        if(0 && param_.visualize()) 
          visualize(img_temp2, meta, as);
        as.crop = augmentation_croppad(img_temp2, img_temp3, mask_miss, mask_miss_aug, mask_all, mask_all_aug, meta, mode);
        //LOG(INFO) << meta.joint_self.joints.size();
        //LOG(INFO) << meta.joint_self.joints[0];
        if(0 && param_.visualize()) 
          visualize(img_temp3, meta, as);
        as.flip = augmentation_flip(img_temp3, img_aug, mask_miss_aug, mask_all_aug, meta, mode);
        //LOG(INFO) << meta.joint_self.joints.size();
        //LOG(INFO) << meta.joint_self.joints[0];
        if(param_.visualize()) 
          visualize(img_aug, meta, as);
    
        // imshow("img_aug", img_aug);
        // Mat label_map = mask_miss_aug;
        // applyColorMap(label_map, label_map, COLORMAP_JET);
        // addWeighted(label_map, 0.5, img_aug, 0.5, 0.0, label_map);
        // imshow("mask_miss_aug", label_map);
    
        if (mode > 4){
          resize(mask_miss_aug, mask_miss_aug, Size(), 1.0/stride, 1.0/stride, INTER_CUBIC);
        }
        if (mode > 5){
          resize(mask_all_aug, mask_all_aug, Size(), 1.0/stride, 1.0/stride, INTER_CUBIC);
        }
      }
      else {
        img_aug = img.clone();
        as.scale = 1;
        as.crop = Size();
        as.flip = 0;
        as.degree = 0;
      }
      VLOG(2) << "  Aug: " << timer1.MicroSeconds()/1000.0 << " ms";
      timer1.Start();
      //LOG(INFO) << "scale: " << as.scale << "; crop:(" << as.crop.width << "," << as.crop.height 
      //          << "); flip:" << as.flip << "; degree: " << as.degree;
    
      //copy transformed img (img_aug) into transformed_data, do the mean-subtraction here
      offset = img_aug.rows * img_aug.cols;
      int rezX = img_aug.cols;
      int rezY = img_aug.rows;
      int grid_x = rezX / stride;
      int grid_y = rezY / stride;
      int channelOffset = grid_y * grid_x;
    
      for (int i = 0; i < img_aug.rows; ++i) {
        for (int j = 0; j < img_aug.cols; ++j) {
          Vec3b& rgb = img_aug.at<Vec3b>(i, j);
          transformed_data[0*offset + i*img_aug.cols + j] = (rgb[0] - 128)/256.0;
          transformed_data[1*offset + i*img_aug.cols + j] = (rgb[1] - 128)/256.0;
          transformed_data[2*offset + i*img_aug.cols + j] = (rgb[2] - 128)/256.0;
        }
      }
      
      // label size is image size/ stride
      if (mode > 4){
        for (int g_y = 0; g_y < grid_y; g_y++){
          for (int g_x = 0; g_x < grid_x; g_x++){
            for (int i = 0; i < np; i++){
              float weight = float(mask_miss_aug.at<uchar>(g_y, g_x)) /255; //mask_miss_aug.at<uchar>(i, j); 
              if (meta.joint_self.isVisible[i] != 3){
                transformed_label[i*channelOffset + g_y*grid_x + g_x] = weight;
              }
            }  
            // background channel
            if(mode == 5){
              transformed_label[np*channelOffset + g_y*grid_x + g_x] = float(mask_miss_aug.at<uchar>(g_y, g_x)) /255;
            }
            if(mode > 5){
              transformed_label[np*channelOffset + g_y*grid_x + g_x] = 1;
              transformed_label[(2*np+1)*channelOffset + g_y*grid_x + g_x] = float(mask_all_aug.at<uchar>(g_y, g_x)) /255;
            }
          }
        }
      }  
    
      //putGaussianMaps(transformed_data + 3*offset, meta.objpos, 1, img_aug.cols, img_aug.rows, param_.sigma_center());
      //LOG(INFO) << "image transformation done!";
      generateLabelMap(transformed_label, img_aug, meta);
    
      VLOG(2) << "  putGauss+genLabel: " << timer1.MicroSeconds()/1000.0 << " ms";
      //starts to visualize everything (transformed_data in 4 ch, label) fed into conv1
      //if(param_.visualize()){
        //dumpEverything(transformed_data, transformed_label, meta);
      //}
    }
    

    相关文章

      网友评论

        本文标题:OpenPose训练过程解析(4)

        本文链接:https://www.haomeiwen.com/subject/pvreaftx.html