1、人脸检测算法的原理
人脸检测算法目前以深度学习方向更为准确,本文不进行算法的原理解析,直接以开源的训练好的模型介绍和比较为主。主要介绍MTCNN、Resnet10_SSD、RFBNet三个开源的模型。
2、MTCNN算法
MTCNN原理介绍:https://zhuanlan.zhihu.com/p/38520597
Github上参考来源:https://github.com/LeslieZhoa/tensorflow-MTCNN
import cv2
import sys
from MtcnnDetector import MtcnnDetector
from detector import Detector
from fcn_detector import FcnDetector
from model import P_Net,R_Net,O_Net
from utils import *
import testconfig as config
# 加载MTCNN的三个模型
def load_align():
thresh=config.thresh
min_face_size=config.min_face
stride=config.stride
test_mode=config.test_mode
detectors=[None,None,None]
# 模型放置位置
model_path=['./model/PNet/','./model/RNet/','./model/ONet']
batch_size=config.batches
PNet=FcnDetector(P_Net,model_path[0])
detectors[0]=PNet
if test_mode in ["RNet", "ONet"]:
RNet = Detector(R_Net, 24, batch_size[1], model_path[1])
detectors[1] = RNet
if test_mode == "ONet":
ONet = Detector(O_Net, 48, batch_size[2], model_path[2])
detectors[2] = ONet
mtcnn_detector = MtcnnDetector(detectors=detectors, min_face_size=min_face_size,
stride=stride, threshold=thresh)
return mtcnn_detector
# 检测部分
def align_face(img, mtcnn_detector):
try:
boxes_c, _ = mtcnn_detector.detect(img)
except:
print('找不到脸')
return [], [], []
# 人脸框数量
num_box = boxes_c.shape[0]
# 位置坐标
bb_arr = []
if num_box > 0:
det = boxes_c[:, :4]
det_arr = []
img_size = np.asarray(img.shape)[:2]
for i in range(num_box):
det_arr.append(np.squeeze(det[i]))
for i, det in enumerate(det_arr):
det = np.squeeze(det)
bb = [int(max(det[0], 0)), int(max(det[1], 0)), int(min(det[2], img_size[1])),
int(min(det[3], img_size[0]))]
cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 2)
return bb_arr
else:
print('找不到脸 ')
return [], [], []
if __name__ == '__main__':
mtcnn = load_align()
img_path = '../1.jpg'
image = cv2.imread(img_path)
align_face(image, mtcnn)
cv2.imshow('main', image)
cv2.waitKey()
MTCNN的模型过于复杂,运行速度较慢。我减少了PNet前做图像金字塔的次数,同时提高了检测阈值,也没有得到较好地结果。在我的MAC上测得结果如下:
MTCNN检测结果
3、Resnet10_SSD算法
在这推荐一个工具,能够脱离深度学习框架直接运行已经训练好的模型opencv_dnn模块,能够直接解析caffe、tensorflow、pytorch、onnx的预训练模型。
这次的人脸检测算法参考:https://github.com/thegopieffect/computer_vision/tree/master/CAFFE_DNN
import numpy as np
import cv2
net = cv2.dnn.readNetFromCaffe("./deploy.prototxt",
"./res10_300x300_ssd_iter_140000.caffemodel")
def find_face(frame):
h, w = frame.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(frame, (320, 240)), 1.0, (320, 240), (104.0, 177.0, 123.0))
net.setInput(blob)
detections = net.forward()
scaled_arr = []
bb_arr = []
# loop over the detections
if detections.shape[2]>0:
for i in range(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with the
# prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the `confidence` is
# greater than the minimum confidence
if confidence < 0.8:
continue
# compute the (x, y)-coordinates of the bounding box for the object
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 2)
bb_arr.append(box)
cv2.imshow('main',frame)
cv2.waitKey(0)
return bb_arr
else:
print('find no face!!!')
return [],[],[]
if __name__ == '__main__':
img = cv2.imread('../1.jpg')
for _ in range(10):
find_face(img)
这里直接输出了boundingbox的坐标相对真实尺寸的比值,可以通过Netron在线看模型的结构图。可以得出,其比MTCNN模型更加快速,占用内存更少,运行更为简单。同样其测试结果如下:
Resnet_SSD测试结果
4、RFBnet
RFBnet使用了github上的一个厉害的项目:https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB,模型仅有1MB大小但效果却非常好。作者提供了pytorch、caffe、MNN、ONNX等模型和推理代码。其中caffe代码也是利用opencv_dnn模块,读者可自行验证。本次介绍另外一个能够脱离深度学习框架直接运行已经训练好的模型的框架--onnxruntime,而且其还有GPU的版本。
import onnxruntime as rt
import numpy as np
import cv2
from box_util import *
import time
model_path = './version-RFB-320.onnx'
img_path = '../27.jpg'
orig_image = cv2.imread(img_path)
sess = rt.InferenceSession(model_path)
input_name = sess.get_inputs()[0].name
label = sess.get_outputs()[0].name
box = sess.get_outputs()[1].name
for _ in range(10):
start = time.time()
image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
image = (cv2.resize(image, (320, 240)) - 127.0)/128
image = np.transpose(image, [2, 0, 1])
image = np.expand_dims(image, axis=0)
image = image.astype(np.float32)
# 多个输出时可以以None运行获得多个输出
confidences, boxes = sess.run(None, {input_name: image})
boxes = predict(orig_image.shape[1], orig_image.shape[0], confidences, boxes, 0.6, iou_threshold=0.3)
for i in range(boxes.shape[0]):
box = boxes[i, :]
cv2.rectangle(orig_image, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 4)
end = time.time()
seconds = end - start
print("Time taken : {0} seconds".format(seconds))
# Calculate frames per second
fps = 1 / seconds
print("Estimated frames per second : {0}".format(fps))
cv2.imshow('main', orig_image)
cv2.waitKey()
整个运行过程非常简洁,适合做快速验证。可以通过Netron在线看模型的结构图,同样其测试结果如下
RFBNet测试结果
5、总结
最后本文选择使用第三种人脸检测模型,效果好而且速度快。将其通过TensorRT部署在Jetson Nano上得到一个至少30fps人脸检测器。
网友评论