美文网首页
从demo简读dlib的源码

从demo简读dlib的源码

作者: 神昀 | 来源:发表于2019-11-07 13:15 被阅读0次

    因为dlib的源码是用C++写的,阅读源码最好有些许C++的基础。
    贴出来的代码不必仔细看,有中文注释的地方看看就好。
    这次看看face_detection_ex这个demo,它的代码如下(删除了英文注释)。

    #include <dlib/image_processing/frontal_face_detector.h>
    #include <dlib/gui_widgets.h>
    #include <dlib/image_io.h>
    #include <iostream>
    
    using namespace dlib;
    using namespace std;
    
    int main(int argc, char** argv)
    {  
        try
        {
            if (argc == 1)
            {
                cout << "Give some image files as arguments to this program." << endl;
                return 0;
            }
            
            frontal_face_detector detector = get_frontal_face_detector(); //获得一个detector
            image_window win;
    
            for (int i = 1; i < argc; ++i)
            {
                cout << "processing image " << argv[i] << endl;
                array2d<unsigned char> img;
                load_image(img, argv[i]);  //加载图片
                pyramid_up(img);  //上采样处理
    
                std::vector<rectangle> dets = detector(img); //检测图片,返回图片中每个人脸的位置
    
                cout << "Number of faces detected: " << dets.size() << endl;
              
                win.clear_overlay();
                win.set_image(img);
                win.add_overlay(dets, rgb_pixel(255,0,0)); //在图片上画出每个人脸的方框
    
                cout << "Hit enter to process the next image..." << endl;
                cin.get();
            }
        }
        catch (exception& e)
        {
            cout << "\nexception thrown!" << endl;
            cout << e.what() << endl;
        }
    }
    

    从中可以得知dlib的人脸识别主流程:
    1.获取detector
    2.加载图片
    3.图像处理(可选)
    4.用detector识别图片的人脸

    获取detector

    frontal_face_detector detector = get_frontal_face_detector(); 
    

    看看它怎么实现的

    #ifndef DLIB_FRONTAL_FACE_DETECTOr_Hh_
    #define DLIB_FRONTAL_FACE_DETECTOr_Hh_
    
    #include "frontal_face_detector_abstract.h"
    #include "../image_processing/object_detector.h"
    #include "../image_processing/scan_fhog_pyramid.h"
    #include <sstream>
    #include "../compress_stream.h"
    #include "../base64.h"
    
    namespace dlib
    {
        typedef object_detector<scan_fhog_pyramid<pyramid_down<6> > > frontal_face_detector;
        inline const std::string get_serialized_frontal_faces();
    
        inline frontal_face_detector get_frontal_face_detector()
        {
            std::istringstream sin(get_serialized_frontal_faces());
            frontal_face_detector detector;
            deserialize(detector, sin);
            return detector;
        }
    
      /*
      It is built out of 5 HOG filters. A front looking, left looking, right looking, 
      front looking but rotated left, and finally a front looking but rotated right one.
      detector是用5种不同面向的HOG训练出来的(这里删除了5种面向的训练参数)
      */
       inline const std::string get_serialized_frontal_faces()
        {
            dlib::base64 base64_coder;
            dlib::compress_stream::kernel_1ea compressor;
            std::ostringstream sout;
            std::istringstream sin;
    
            // The base64 encoded data from the file 'object_detector.dat' we want to decode and return.
            //已经训练好的detector的数据
            sout << "AW2B5ZIvv09mlKLVYjKqbJC05yeR2KsCpPGEGOgn2QlwM92S4UT4HgQkV0V9WqYRf6xETTSVKz7Z";
            sout << "YcJ84Jc4C3+VdPgZDhV+LDt6qAt3OI4nA9zN4Y9cCIb6ivlETkN/JMmapbOAUW2mrSzDif5zjAaq";
            sout << "+NFvw/5V0Jciopw9tR6nYtV41unWGvyyfsO9CcqvDy81QIydToHh0a7UaL0jCtA2DYzkViDufxyv";
            sout << "wqMklOYYJag/XNyoQs8g44qAha1rVyeq4eXodi0JegvjkXWEB4Mq8jBuHXbYjYiRiHoL68/9mry5";
            /*中间省略上千行*/
            sout << "nlN2Duwp7g5yl982CZLZc0k7uSjKaDkWyynH60MwLnmVj2sA";
    
            sin.str(sout.str());
            sout.str("");
    
            // Decode the base64 text into its compressed binary form
            //解码
            base64_coder.decode(sin,sout);
            sin.clear();
            sin.str(sout.str());
            sout.str("");
    
            // Decompress the data into its original form
            //解压
            compressor.decompress(sin,sout);
            return sout.str();
        }
    
    }
    
    #endif // DLIB_FRONTAL_FACE_DETECTOr_Hh_
    

    实际做的工作就是返回一个已经训练好的detector,解码解压这些细节操作的因作者而异。不过把detector的数据写在代码里有点暴力。

    加载图片

    array2d<unsigned char> img;
    load_image(img, argv[i]);
    

    这个不深入理解也可以,它根据文件名得到图片类型,再根据图片类型用不同的方法读入图片,并通过重新赋值的方式得到array2d<unsigned char>类型的结果。下面贴出部分代码。

        template <typename image_type>
        void load_image ( image_type& image,  const std::string& file_name )
        {
            const image_file_type::type im_type = image_file_type::read_type(file_name);//得到图片类型
            switch (im_type) //不同类型使用不同的加载方法
            {
                case image_file_type::BMP: load_bmp(image, file_name); return;
                case image_file_type::DNG: load_dng(image, file_name); return;
    #ifdef DLIB_PNG_SUPPORT
                case image_file_type::PNG: load_png(image, file_name); return;
    #endif
    #ifdef DLIB_JPEG_SUPPORT
                case image_file_type::JPG: load_jpeg(image, file_name); return; //这里用到jpeglib第三方库
    #endif
            }
    }
    
    //这里直接用赋值的方式把图片数据转化为作者定义的类型
           template<typename T>
            void get_image( T& t_) const
            {
                image_view<T> t(t_);
                t.set_size( height_, width_ );
                for ( unsigned n = 0; n < height_;n++ )
                {
                    const unsigned char* v = get_row( n );
                    for ( unsigned m = 0; m < width_;m++ )
                    {
                        if ( is_gray() )
                        {
                            unsigned char p = v[m];
                            assign_pixel( t[n][m], p );
                        }
                        else if ( is_rgba() ) {
                            rgb_alpha_pixel p;
                            p.red = v[m*4];
                            p.green = v[m*4+1];
                            p.blue = v[m*4+2];
                            p.alpha = v[m*4+3];
                            assign_pixel( t[n][m], p );
                        }
                        else // if ( is_rgb() )
                        {
                            rgb_pixel p;
                            p.red = v[m*3];
                            p.green = v[m*3+1];
                            p.blue = v[m*3+2];
                            assign_pixel( t[n][m], p );
                        }
                    }
                }
            }
    

    图像处理(可选 )

    上采样,把图片放大,这步即使注释掉也是可以跑通的,等了解了上采样、下采样技术再回来参透也是可以的。

    pyramid_up(img);
    

    上面的pyramid_up(1个参数)调用了下面的pyramid_up(2个参数)

    template <typename image_type>
    void pyramid_up ( image_type& img)
    {
        pyramid_down<2> pyr;
        pyramid_up(img, pyr);
    }
    

    上面的pyramid_up(2个参数)调用了下面的pyramid_up(3个参数)

     template <
        typename image_type,
        typename pyramid_type
     >
    void pyramid_up ( image_type& img,   const pyramid_type& pyr )
    {
        image_type temp;
        pyramid_up(img, temp, pyr);
        swap(temp, img);
    }
    

    上面的pyramid_up(3个参数)调用了下面的pyramid_up(4个参数)

        template <
            typename image_type1,
            typename image_type2,
            typename pyramid_type
            >
        void pyramid_up ( const image_type1& in_img, image_type2& out_img,  const pyramid_type& pyr)
        {
            // make sure requires clause is not broken
            DLIB_ASSERT( is_same_object(in_img, out_img) == false ,
                "\t void pyramid_up()"
                << "\n\t Invalid inputs were given to this function."
                << "\n\t is_same_object(in_img, out_img):  " << is_same_object(in_img, out_img)
                );
    
            pyramid_up(in_img, out_img, pyr, interpolate_bilinear());
        }
    
    pyramid_up(in_img, out_img, pyr, interpolate_bilinear());
    

    最后,从这句可以看出使用双线性插值(interpolate_bilinear)的方法把图片放大,至于具体怎么个插法,就是各种计算、各种赋值,代码较长,不放出来了。

    用detector识别图片的人脸

    std::vector<rectangle> dets = detector(img);
    

    查看函数的定义时发现它重载了()运算符

        template < typename image_scanner_type >
        template < typename image_type  >
        std::vector<rectangle> object_detector<image_scanner_type>::
        operator() (  const image_type& img, double adjust_threshold ) 
        {
            std::vector<rect_detection> dets;
            (*this)(img,dets,adjust_threshold); //调用另一句重载()运算符的函数
    
            std::vector<rectangle> final_dets(dets.size());
            for (unsigned long i = 0; i < dets.size(); ++i)
                final_dets[i] = dets[i].rect;
    
            return final_dets;
        }
    

    (*this)(img,dets,adjust_threshold);调用了下面的函数

        template < typename image_scanner_type >
        template <typename image_type>
        void object_detector<image_scanner_type>::
        operator() (  const image_type& img, std::vector<rect_detection>& final_dets,  
              double adjust_threshold  ) 
        {
            scanner.load(img);
            std::vector<std::pair<double, rectangle> > dets;
            std::vector<rect_detection> dets_accum;
            for (unsigned long i = 0; i < w.size(); ++i)
            {
                const double thresh = w[i].w(scanner.get_num_dimensions());
                //检测出来可能的结果都会放在dets中
                scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold);
                for (unsigned long j = 0; j < dets.size(); ++j)
                {
                    rect_detection temp;
                    temp.detection_confidence = dets[j].first-thresh; //计算每个检测结果的自信度
                    temp.weight_index = i;
                    temp.rect = dets[j].second;
                    dets_accum.push_back(temp);
                }
            }
    
            // 非极大值抑制
            final_dets.clear();
            if (w.size() > 1)
                std::sort(dets_accum.rbegin(), dets_accum.rend()); //根据自信度detection_confidence 排序
            for (unsigned long i = 0; i < dets_accum.size(); ++i) //选取不互相覆盖的方框
            {
                if (overlaps_any_box(final_dets, dets_accum[i].rect))
                    continue;
    
                final_dets.push_back(dets_accum[i]);
            }
        }
    

    上面通过检测得到包围每张脸的方框的坐标,然后就可以直接把方框画出来。

    最核心的一句

    scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold);
    

    继续深入阅读可以看到它调用的函数名为detect_from_fhog_pyramid。从名字就可以知道使用了fhog+pyramid的方法检测,具体实现建议自行阅读源码。

    自己读源码的时候可能看到一个函数有许多定义,不知道到底调用了哪个,这个时候打个断点debug一下就好。

    相关文章

      网友评论

          本文标题:从demo简读dlib的源码

          本文链接:https://www.haomeiwen.com/subject/gwzsbctx.html