从demo简读dlib的源码

作者: 神昀 | 来源:发表于2019-11-07 13:15 被阅读0次

从demo简读dlib的源码
试玩dlib人脸识别
dlib和imglab数据集标签工具
【Golang】make和new区别,append
用react + webpack + grunt制作动态相册
【WPF】通过WindowsFormsHost加载GIF图片
opencv相机标定
简繁文字转换 Javascript
ResideMenu源码简单分析
小程序源码[小程序demo] 微信小程序古诗文助手源码

因为dlib的源码是用C++写的，阅读源码最好有些许C++的基础。
贴出来的代码不必仔细看，有中文注释的地方看看就好。
这次看看face_detection_ex这个demo，它的代码如下（删除了英文注释）。

#include <dlib/image_processing/frontal_face_detector.h>
#include <dlib/gui_widgets.h>
#include <dlib/image_io.h>
#include <iostream>

using namespace dlib;
using namespace std;

int main(int argc, char** argv)
{  
    try
    {
        if (argc == 1)
        {
            cout << "Give some image files as arguments to this program." << endl;
            return 0;
        }
        
        frontal_face_detector detector = get_frontal_face_detector(); //获得一个detector
        image_window win;

        for (int i = 1; i < argc; ++i)
        {
            cout << "processing image " << argv[i] << endl;
            array2d<unsigned char> img;
            load_image(img, argv[i]);  //加载图片
            pyramid_up(img);  //上采样处理

            std::vector<rectangle> dets = detector(img); //检测图片，返回图片中每个人脸的位置

            cout << "Number of faces detected: " << dets.size() << endl;
          
            win.clear_overlay();
            win.set_image(img);
            win.add_overlay(dets, rgb_pixel(255,0,0)); //在图片上画出每个人脸的方框

            cout << "Hit enter to process the next image..." << endl;
            cin.get();
        }
    }
    catch (exception& e)
    {
        cout << "\nexception thrown!" << endl;
        cout << e.what() << endl;
    }
}

从中可以得知dlib的人脸识别主流程：
1.获取detector
2.加载图片
3.图像处理（可选）
4.用detector识别图片的人脸

获取detector

frontal_face_detector detector = get_frontal_face_detector();

看看它怎么实现的

#ifndef DLIB_FRONTAL_FACE_DETECTOr_Hh_
#define DLIB_FRONTAL_FACE_DETECTOr_Hh_

#include "frontal_face_detector_abstract.h"
#include "../image_processing/object_detector.h"
#include "../image_processing/scan_fhog_pyramid.h"
#include <sstream>
#include "../compress_stream.h"
#include "../base64.h"

namespace dlib
{
    typedef object_detector<scan_fhog_pyramid<pyramid_down<6> > > frontal_face_detector;
    inline const std::string get_serialized_frontal_faces();

    inline frontal_face_detector get_frontal_face_detector()
    {
        std::istringstream sin(get_serialized_frontal_faces());
        frontal_face_detector detector;
        deserialize(detector, sin);
        return detector;
    }

  /*
  It is built out of 5 HOG filters. A front looking, left looking, right looking, 
  front looking but rotated left, and finally a front looking but rotated right one.
  detector是用5种不同面向的HOG训练出来的(这里删除了5种面向的训练参数)
  */
   inline const std::string get_serialized_frontal_faces()
    {
        dlib::base64 base64_coder;
        dlib::compress_stream::kernel_1ea compressor;
        std::ostringstream sout;
        std::istringstream sin;

        // The base64 encoded data from the file 'object_detector.dat' we want to decode and return.
        //已经训练好的detector的数据
        sout << "AW2B5ZIvv09mlKLVYjKqbJC05yeR2KsCpPGEGOgn2QlwM92S4UT4HgQkV0V9WqYRf6xETTSVKz7Z";
        sout << "YcJ84Jc4C3+VdPgZDhV+LDt6qAt3OI4nA9zN4Y9cCIb6ivlETkN/JMmapbOAUW2mrSzDif5zjAaq";
        sout << "+NFvw/5V0Jciopw9tR6nYtV41unWGvyyfsO9CcqvDy81QIydToHh0a7UaL0jCtA2DYzkViDufxyv";
        sout << "wqMklOYYJag/XNyoQs8g44qAha1rVyeq4eXodi0JegvjkXWEB4Mq8jBuHXbYjYiRiHoL68/9mry5";
        /*中间省略上千行*/
        sout << "nlN2Duwp7g5yl982CZLZc0k7uSjKaDkWyynH60MwLnmVj2sA";

        sin.str(sout.str());
        sout.str("");

        // Decode the base64 text into its compressed binary form
        //解码
        base64_coder.decode(sin,sout);
        sin.clear();
        sin.str(sout.str());
        sout.str("");

        // Decompress the data into its original form
        //解压
        compressor.decompress(sin,sout);
        return sout.str();
    }

}

#endif // DLIB_FRONTAL_FACE_DETECTOr_Hh_

实际做的工作就是返回一个已经训练好的detector，解码解压这些细节操作的因作者而异。不过把detector的数据写在代码里有点暴力。

加载图片

array2d<unsigned char> img;
load_image(img, argv[i]);

这个不深入理解也可以，它根据文件名得到图片类型，再根据图片类型用不同的方法读入图片，并通过重新赋值的方式得到array2d<unsigned char>类型的结果。下面贴出部分代码。

    template <typename image_type>
    void load_image ( image_type& image,  const std::string& file_name )
    {
        const image_file_type::type im_type = image_file_type::read_type(file_name);//得到图片类型
        switch (im_type) //不同类型使用不同的加载方法
        {
            case image_file_type::BMP: load_bmp(image, file_name); return;
            case image_file_type::DNG: load_dng(image, file_name); return;
#ifdef DLIB_PNG_SUPPORT
            case image_file_type::PNG: load_png(image, file_name); return;
#endif
#ifdef DLIB_JPEG_SUPPORT
            case image_file_type::JPG: load_jpeg(image, file_name); return; //这里用到jpeglib第三方库
#endif
        }
}

//这里直接用赋值的方式把图片数据转化为作者定义的类型
       template<typename T>
        void get_image( T& t_) const
        {
            image_view<T> t(t_);
            t.set_size( height_, width_ );
            for ( unsigned n = 0; n < height_;n++ )
            {
                const unsigned char* v = get_row( n );
                for ( unsigned m = 0; m < width_;m++ )
                {
                    if ( is_gray() )
                    {
                        unsigned char p = v[m];
                        assign_pixel( t[n][m], p );
                    }
                    else if ( is_rgba() ) {
                        rgb_alpha_pixel p;
                        p.red = v[m*4];
                        p.green = v[m*4+1];
                        p.blue = v[m*4+2];
                        p.alpha = v[m*4+3];
                        assign_pixel( t[n][m], p );
                    }
                    else // if ( is_rgb() )
                    {
                        rgb_pixel p;
                        p.red = v[m*3];
                        p.green = v[m*3+1];
                        p.blue = v[m*3+2];
                        assign_pixel( t[n][m], p );
                    }
                }
            }
        }

图像处理（可选）

上采样，把图片放大，这步即使注释掉也是可以跑通的，等了解了上采样、下采样技术再回来参透也是可以的。

pyramid_up(img);

上面的pyramid_up（1个参数）调用了下面的pyramid_up（2个参数）

template <typename image_type>
void pyramid_up ( image_type& img)
{
    pyramid_down<2> pyr;
    pyramid_up(img, pyr);
}

上面的pyramid_up（2个参数）调用了下面的pyramid_up（3个参数）

 template <
    typename image_type,
    typename pyramid_type
 >
void pyramid_up ( image_type& img,   const pyramid_type& pyr )
{
    image_type temp;
    pyramid_up(img, temp, pyr);
    swap(temp, img);
}

上面的pyramid_up（3个参数）调用了下面的pyramid_up（4个参数）

    template <
        typename image_type1,
        typename image_type2,
        typename pyramid_type
        >
    void pyramid_up ( const image_type1& in_img, image_type2& out_img,  const pyramid_type& pyr)
    {
        // make sure requires clause is not broken
        DLIB_ASSERT( is_same_object(in_img, out_img) == false ,
            "\t void pyramid_up()"
            << "\n\t Invalid inputs were given to this function."
            << "\n\t is_same_object(in_img, out_img):  " << is_same_object(in_img, out_img)
            );

        pyramid_up(in_img, out_img, pyr, interpolate_bilinear());
    }

pyramid_up(in_img, out_img, pyr, interpolate_bilinear());

最后，从这句可以看出使用双线性插值（interpolate_bilinear）的方法把图片放大，至于具体怎么个插法，就是各种计算、各种赋值，代码较长，不放出来了。

用detector识别图片的人脸

std::vector<rectangle> dets = detector(img);

查看函数的定义时发现它重载了()运算符

    template < typename image_scanner_type >
    template < typename image_type  >
    std::vector<rectangle> object_detector<image_scanner_type>::
    operator() (  const image_type& img, double adjust_threshold ) 
    {
        std::vector<rect_detection> dets;
        (*this)(img,dets,adjust_threshold); //调用另一句重载()运算符的函数

        std::vector<rectangle> final_dets(dets.size());
        for (unsigned long i = 0; i < dets.size(); ++i)
            final_dets[i] = dets[i].rect;

        return final_dets;
    }

(*this)(img,dets,adjust_threshold);调用了下面的函数

    template < typename image_scanner_type >
    template <typename image_type>
    void object_detector<image_scanner_type>::
    operator() (  const image_type& img, std::vector<rect_detection>& final_dets,  
          double adjust_threshold  ) 
    {
        scanner.load(img);
        std::vector<std::pair<double, rectangle> > dets;
        std::vector<rect_detection> dets_accum;
        for (unsigned long i = 0; i < w.size(); ++i)
        {
            const double thresh = w[i].w(scanner.get_num_dimensions());
            //检测出来可能的结果都会放在dets中
            scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold);
            for (unsigned long j = 0; j < dets.size(); ++j)
            {
                rect_detection temp;
                temp.detection_confidence = dets[j].first-thresh; //计算每个检测结果的自信度
                temp.weight_index = i;
                temp.rect = dets[j].second;
                dets_accum.push_back(temp);
            }
        }

        // 非极大值抑制
        final_dets.clear();
        if (w.size() > 1)
            std::sort(dets_accum.rbegin(), dets_accum.rend()); //根据自信度detection_confidence 排序
        for (unsigned long i = 0; i < dets_accum.size(); ++i) //选取不互相覆盖的方框
        {
            if (overlaps_any_box(final_dets, dets_accum[i].rect))
                continue;

            final_dets.push_back(dets_accum[i]);
        }
    }

上面通过检测得到包围每张脸的方框的坐标，然后就可以直接把方框画出来。

最核心的一句

scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold);

继续深入阅读可以看到它调用的函数名为detect_from_fhog_pyramid。从名字就可以知道使用了fhog+pyramid的方法检测，具体实现建议自行阅读源码。

自己读源码的时候可能看到一个函数有许多定义，不知道到底调用了哪个，这个时候打个断点debug一下就好。

从demo简读dlib的源码
因为dlib的源码是用C++写的，阅读源码最好有些许C++的基础。贴出来的代码不必仔细看，有中文注释的地方看看就好...
试玩dlib人脸识别
一、Ubuntu端试玩dlib 源码下载： wget http://dlib.net/files/dlib-19....
dlib和imglab数据集标签工具
首先从官网下载dlib，或者从gittub上：https://github.com/davisking/dlib ...
【Golang】make和new区别,append
根据GO夜读学习go源码源码： Demo Output Append Demo 在往slice里append元素...
用react + webpack + grunt制作动态相册
源码在线demo
【WPF】通过WindowsFormsHost加载GIF图片
源码demo下载
opencv相机标定
OpenCV相机标定原理及源码分析 OpenCV摄像头标定《OpenCV：相机标定（自带Demo）》读Open...
简繁文字转换 Javascript
Js代码预览 Demo源码下载 Github源码下载
ResideMenu源码简单分析
实现原理 addChildViewController从使用开始下载源码下来之后，运行demo发现Appdeleg...
小程序源码[小程序demo] 微信小程序古诗文助手源码
小程序源码[小程序demo] 微信小程序古诗文助手源码小程序源码[小程序demo] 微信小程序古诗文助手源码，可...