1、人脸检测算法的原理

人脸检测算法目前以深度学习方向更为准确，本文不进行算法的原理解析，直接以开源的训练好的模型介绍和比较为主。主要介绍MTCNN、Resnet10_SSD、RFBNet三个开源的模型。

2、MTCNN算法

MTCNN原理介绍：https://zhuanlan.zhihu.com/p/38520597
Github上参考来源：https://github.com/LeslieZhoa/tensorflow-MTCNN

MTCNN

import cv2
import sys

from MtcnnDetector import MtcnnDetector
from detector import Detector
from fcn_detector import FcnDetector
from model import P_Net,R_Net,O_Net
from utils import *
import testconfig as config

# 加载MTCNN的三个模型
def load_align():
    thresh=config.thresh
    min_face_size=config.min_face
    stride=config.stride
    test_mode=config.test_mode
    detectors=[None,None,None]
    # 模型放置位置
    model_path=['./model/PNet/','./model/RNet/','./model/ONet']
    batch_size=config.batches
    PNet=FcnDetector(P_Net,model_path[0])
    detectors[0]=PNet


    if test_mode in ["RNet", "ONet"]:
        RNet = Detector(R_Net, 24, batch_size[1], model_path[1])
        detectors[1] = RNet


    if test_mode == "ONet":
        ONet = Detector(O_Net, 48, batch_size[2], model_path[2])
        detectors[2] = ONet

    mtcnn_detector = MtcnnDetector(detectors=detectors, min_face_size=min_face_size,
                                   stride=stride, threshold=thresh)
    return mtcnn_detector

# 检测部分
def align_face(img, mtcnn_detector):
    try:
        boxes_c, _ = mtcnn_detector.detect(img)
    except:
        print('找不到脸')
        return [], [], []
    # 人脸框数量
    num_box = boxes_c.shape[0]
    # 位置坐标
    bb_arr = []

    if num_box > 0:
        det = boxes_c[:, :4]
        det_arr = []
        img_size = np.asarray(img.shape)[:2]
        for i in range(num_box):
            det_arr.append(np.squeeze(det[i]))

        for i, det in enumerate(det_arr):
            det = np.squeeze(det)
            bb = [int(max(det[0], 0)), int(max(det[1], 0)), int(min(det[2], img_size[1])),
                  int(min(det[3], img_size[0]))]
            cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 2)
        return  bb_arr
    else:
        print('找不到脸 ')
        return [], [], []

if __name__ == '__main__':
        mtcnn = load_align()
        img_path = '../1.jpg'
        image = cv2.imread(img_path)
        align_face(image, mtcnn)
        cv2.imshow('main', image)
        cv2.waitKey()

MTCNN的模型过于复杂，运行速度较慢。我减少了PNet前做图像金字塔的次数，同时提高了检测阈值，也没有得到较好地结果。在我的MAC上测得结果如下：

MTCNN检测结果

3、Resnet10_SSD算法

在这推荐一个工具，能够脱离深度学习框架直接运行已经训练好的模型opencv_dnn模块，能够直接解析caffe、tensorflow、pytorch、onnx的预训练模型。
这次的人脸检测算法参考：https://github.com/thegopieffect/computer_vision/tree/master/CAFFE_DNN

import numpy as np
import cv2

net = cv2.dnn.readNetFromCaffe("./deploy.prototxt",
                               "./res10_300x300_ssd_iter_140000.caffemodel")
def find_face(frame):
    h, w = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(cv2.resize(frame, (320, 240)), 1.0, (320, 240), (104.0, 177.0, 123.0))
    net.setInput(blob)
    detections = net.forward()
    scaled_arr = []
    bb_arr = []
    # loop over the detections
    if detections.shape[2]>0:
        for i in range(0, detections.shape[2]):
            # extract the confidence (i.e., probability) associated with the
            # prediction
            confidence = detections[0, 0, i, 2]

            # filter out weak detections by ensuring the `confidence` is
            # greater than the minimum confidence
            if confidence < 0.8:
                continue
            # compute the (x, y)-coordinates of the bounding box for the object
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")
            cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 2)
            bb_arr.append(box)
            cv2.imshow('main',frame)
            cv2.waitKey(0)
           
        return  bb_arr
    else:
        print('find no face!!!')
        return [],[],[]

if __name__ == '__main__':
    img = cv2.imread('../1.jpg')
    for _ in range(10):
        find_face(img)

这里直接输出了boundingbox的坐标相对真实尺寸的比值，可以通过Netron在线看模型的结构图。可以得出，其比MTCNN模型更加快速，占用内存更少，运行更为简单。同样其测试结果如下：

Resnet_SSD测试结果

4、RFBnet

RFBnet使用了github上的一个厉害的项目：https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB，模型仅有1MB大小但效果却非常好。作者提供了pytorch、caffe、MNN、ONNX等模型和推理代码。其中caffe代码也是利用opencv_dnn模块，读者可自行验证。本次介绍另外一个能够脱离深度学习框架直接运行已经训练好的模型的框架--onnxruntime，而且其还有GPU的版本。

import onnxruntime as rt
import numpy as np
import cv2
from box_util import *
import time

model_path = './version-RFB-320.onnx'
img_path = '../27.jpg'
orig_image = cv2.imread(img_path)

sess = rt.InferenceSession(model_path)
input_name = sess.get_inputs()[0].name
label = sess.get_outputs()[0].name
box = sess.get_outputs()[1].name

for _ in range(10):
    start = time.time()
    image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
    image = (cv2.resize(image, (320, 240)) - 127.0)/128

    image = np.transpose(image, [2, 0, 1])
    image = np.expand_dims(image, axis=0)
    image = image.astype(np.float32)
    # 多个输出时可以以None运行获得多个输出
    confidences, boxes = sess.run(None, {input_name: image})
    boxes = predict(orig_image.shape[1], orig_image.shape[0], confidences, boxes, 0.6, iou_threshold=0.3)

    for i in range(boxes.shape[0]):
        box = boxes[i, :]
        cv2.rectangle(orig_image, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 4)

    end = time.time()
    seconds = end - start
    print("Time taken : {0} seconds".format(seconds))
    # Calculate frames per second
    fps = 1 / seconds
    print("Estimated frames per second : {0}".format(fps))

cv2.imshow('main', orig_image)
cv2.waitKey()

整个运行过程非常简洁，适合做快速验证。可以通过Netron在线看模型的结构图，同样其测试结果如下