因为dlib的源码是用C++写的,阅读源码最好有些许C++的基础。
贴出来的代码不必仔细看,有中文注释的地方看看就好。
这次看看face_detection_ex这个demo,它的代码如下(删除了英文注释)。
#include <dlib/image_processing/frontal_face_detector.h>
#include <dlib/gui_widgets.h>
#include <dlib/image_io.h>
#include <iostream>
using namespace dlib;
using namespace std;
int main(int argc, char** argv)
{
try
{
if (argc == 1)
{
cout << "Give some image files as arguments to this program." << endl;
return 0;
}
frontal_face_detector detector = get_frontal_face_detector(); //获得一个detector
image_window win;
for (int i = 1; i < argc; ++i)
{
cout << "processing image " << argv[i] << endl;
array2d<unsigned char> img;
load_image(img, argv[i]); //加载图片
pyramid_up(img); //上采样处理
std::vector<rectangle> dets = detector(img); //检测图片,返回图片中每个人脸的位置
cout << "Number of faces detected: " << dets.size() << endl;
win.clear_overlay();
win.set_image(img);
win.add_overlay(dets, rgb_pixel(255,0,0)); //在图片上画出每个人脸的方框
cout << "Hit enter to process the next image..." << endl;
cin.get();
}
}
catch (exception& e)
{
cout << "\nexception thrown!" << endl;
cout << e.what() << endl;
}
}
从中可以得知dlib的人脸识别主流程:
1.获取detector
2.加载图片
3.图像处理(可选)
4.用detector识别图片的人脸
获取detector
frontal_face_detector detector = get_frontal_face_detector();
看看它怎么实现的
#ifndef DLIB_FRONTAL_FACE_DETECTOr_Hh_
#define DLIB_FRONTAL_FACE_DETECTOr_Hh_
#include "frontal_face_detector_abstract.h"
#include "../image_processing/object_detector.h"
#include "../image_processing/scan_fhog_pyramid.h"
#include <sstream>
#include "../compress_stream.h"
#include "../base64.h"
namespace dlib
{
typedef object_detector<scan_fhog_pyramid<pyramid_down<6> > > frontal_face_detector;
inline const std::string get_serialized_frontal_faces();
inline frontal_face_detector get_frontal_face_detector()
{
std::istringstream sin(get_serialized_frontal_faces());
frontal_face_detector detector;
deserialize(detector, sin);
return detector;
}
/*
It is built out of 5 HOG filters. A front looking, left looking, right looking,
front looking but rotated left, and finally a front looking but rotated right one.
detector是用5种不同面向的HOG训练出来的(这里删除了5种面向的训练参数)
*/
inline const std::string get_serialized_frontal_faces()
{
dlib::base64 base64_coder;
dlib::compress_stream::kernel_1ea compressor;
std::ostringstream sout;
std::istringstream sin;
// The base64 encoded data from the file 'object_detector.dat' we want to decode and return.
//已经训练好的detector的数据
sout << "AW2B5ZIvv09mlKLVYjKqbJC05yeR2KsCpPGEGOgn2QlwM92S4UT4HgQkV0V9WqYRf6xETTSVKz7Z";
sout << "YcJ84Jc4C3+VdPgZDhV+LDt6qAt3OI4nA9zN4Y9cCIb6ivlETkN/JMmapbOAUW2mrSzDif5zjAaq";
sout << "+NFvw/5V0Jciopw9tR6nYtV41unWGvyyfsO9CcqvDy81QIydToHh0a7UaL0jCtA2DYzkViDufxyv";
sout << "wqMklOYYJag/XNyoQs8g44qAha1rVyeq4eXodi0JegvjkXWEB4Mq8jBuHXbYjYiRiHoL68/9mry5";
/*中间省略上千行*/
sout << "nlN2Duwp7g5yl982CZLZc0k7uSjKaDkWyynH60MwLnmVj2sA";
sin.str(sout.str());
sout.str("");
// Decode the base64 text into its compressed binary form
//解码
base64_coder.decode(sin,sout);
sin.clear();
sin.str(sout.str());
sout.str("");
// Decompress the data into its original form
//解压
compressor.decompress(sin,sout);
return sout.str();
}
}
#endif // DLIB_FRONTAL_FACE_DETECTOr_Hh_
实际做的工作就是返回一个已经训练好的detector,解码解压这些细节操作的因作者而异。不过把detector的数据写在代码里有点暴力。
加载图片
array2d<unsigned char> img;
load_image(img, argv[i]);
这个不深入理解也可以,它根据文件名得到图片类型,再根据图片类型用不同的方法读入图片,并通过重新赋值的方式得到array2d<unsigned char>类型的结果。下面贴出部分代码。
template <typename image_type>
void load_image ( image_type& image, const std::string& file_name )
{
const image_file_type::type im_type = image_file_type::read_type(file_name);//得到图片类型
switch (im_type) //不同类型使用不同的加载方法
{
case image_file_type::BMP: load_bmp(image, file_name); return;
case image_file_type::DNG: load_dng(image, file_name); return;
#ifdef DLIB_PNG_SUPPORT
case image_file_type::PNG: load_png(image, file_name); return;
#endif
#ifdef DLIB_JPEG_SUPPORT
case image_file_type::JPG: load_jpeg(image, file_name); return; //这里用到jpeglib第三方库
#endif
}
}
//这里直接用赋值的方式把图片数据转化为作者定义的类型
template<typename T>
void get_image( T& t_) const
{
image_view<T> t(t_);
t.set_size( height_, width_ );
for ( unsigned n = 0; n < height_;n++ )
{
const unsigned char* v = get_row( n );
for ( unsigned m = 0; m < width_;m++ )
{
if ( is_gray() )
{
unsigned char p = v[m];
assign_pixel( t[n][m], p );
}
else if ( is_rgba() ) {
rgb_alpha_pixel p;
p.red = v[m*4];
p.green = v[m*4+1];
p.blue = v[m*4+2];
p.alpha = v[m*4+3];
assign_pixel( t[n][m], p );
}
else // if ( is_rgb() )
{
rgb_pixel p;
p.red = v[m*3];
p.green = v[m*3+1];
p.blue = v[m*3+2];
assign_pixel( t[n][m], p );
}
}
}
}
图像处理(可选 )
上采样,把图片放大,这步即使注释掉也是可以跑通的,等了解了上采样、下采样技术再回来参透也是可以的。
pyramid_up(img);
上面的pyramid_up(1个参数)调用了下面的pyramid_up(2个参数)
template <typename image_type>
void pyramid_up ( image_type& img)
{
pyramid_down<2> pyr;
pyramid_up(img, pyr);
}
上面的pyramid_up(2个参数)调用了下面的pyramid_up(3个参数)
template <
typename image_type,
typename pyramid_type
>
void pyramid_up ( image_type& img, const pyramid_type& pyr )
{
image_type temp;
pyramid_up(img, temp, pyr);
swap(temp, img);
}
上面的pyramid_up(3个参数)调用了下面的pyramid_up(4个参数)
template <
typename image_type1,
typename image_type2,
typename pyramid_type
>
void pyramid_up ( const image_type1& in_img, image_type2& out_img, const pyramid_type& pyr)
{
// make sure requires clause is not broken
DLIB_ASSERT( is_same_object(in_img, out_img) == false ,
"\t void pyramid_up()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img)
);
pyramid_up(in_img, out_img, pyr, interpolate_bilinear());
}
pyramid_up(in_img, out_img, pyr, interpolate_bilinear());
最后,从这句可以看出使用双线性插值(interpolate_bilinear)的方法把图片放大,至于具体怎么个插法,就是各种计算、各种赋值,代码较长,不放出来了。
用detector识别图片的人脸
std::vector<rectangle> dets = detector(img);
查看函数的定义时发现它重载了()运算符
template < typename image_scanner_type >
template < typename image_type >
std::vector<rectangle> object_detector<image_scanner_type>::
operator() ( const image_type& img, double adjust_threshold )
{
std::vector<rect_detection> dets;
(*this)(img,dets,adjust_threshold); //调用另一句重载()运算符的函数
std::vector<rectangle> final_dets(dets.size());
for (unsigned long i = 0; i < dets.size(); ++i)
final_dets[i] = dets[i].rect;
return final_dets;
}
(*this)(img,dets,adjust_threshold);调用了下面的函数
template < typename image_scanner_type >
template <typename image_type>
void object_detector<image_scanner_type>::
operator() ( const image_type& img, std::vector<rect_detection>& final_dets,
double adjust_threshold )
{
scanner.load(img);
std::vector<std::pair<double, rectangle> > dets;
std::vector<rect_detection> dets_accum;
for (unsigned long i = 0; i < w.size(); ++i)
{
const double thresh = w[i].w(scanner.get_num_dimensions());
//检测出来可能的结果都会放在dets中
scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold);
for (unsigned long j = 0; j < dets.size(); ++j)
{
rect_detection temp;
temp.detection_confidence = dets[j].first-thresh; //计算每个检测结果的自信度
temp.weight_index = i;
temp.rect = dets[j].second;
dets_accum.push_back(temp);
}
}
// 非极大值抑制
final_dets.clear();
if (w.size() > 1)
std::sort(dets_accum.rbegin(), dets_accum.rend()); //根据自信度detection_confidence 排序
for (unsigned long i = 0; i < dets_accum.size(); ++i) //选取不互相覆盖的方框
{
if (overlaps_any_box(final_dets, dets_accum[i].rect))
continue;
final_dets.push_back(dets_accum[i]);
}
}
上面通过检测得到包围每张脸的方框的坐标,然后就可以直接把方框画出来。
最核心的一句
scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold);
继续深入阅读可以看到它调用的函数名为detect_from_fhog_pyramid。从名字就可以知道使用了fhog+pyramid的方法检测,具体实现建议自行阅读源码。
自己读源码的时候可能看到一个函数有许多定义,不知道到底调用了哪个,这个时候打个断点debug一下就好。
网友评论