美文网首页
使用cv.FaceNet和MobileNetV2进行人脸和口罩检

使用cv.FaceNet和MobileNetV2进行人脸和口罩检

作者: FredricZhu | 来源:发表于2024-04-22 12:21 被阅读0次

    本例使用上一节训练出来的模型,进行口罩检测。
    上一节地址如下,
    https://www.jianshu.com/p/0ac62d3750b1

    先使用cv.FaceNet Detect出人脸位置,然后截取人脸位置的图像,送入MobileNetV2进行口罩检测。
    cvFaceNet的模型文件和protodef文件,可以在如下地址下载,
    model, https://gitlab.com/zhuge20100104/cpp_practice/-/blob/master/simple_learn/deep_learning/19_training_neural_network_with_keras2/res10_300x300_ssd_iter_140000.caffemodel?ref_type=heads
    protodef,
    https://gitlab.com/zhuge20100104/cpp_practice/-/blob/master/simple_learn/deep_learning/19_training_neural_network_with_keras2/deploy.prototxt.txt?ref_type=heads

    预测视频可以在此处下载,当然也可以输出摄像头数据,因为本例使用的是docker环境,连接本机摄像头比较麻烦,所以使用视频数据输入流,

    https://gitlab.com/zhuge20100104/cpp_practice/-/blob/master/simple_learn/deep_learning/19_training_neural_network_with_keras2/masks.mp4?ref_type=heads

    代码如下,
    notebook 代码地址如下,
    https://gitlab.com/zhuge20100104/cpp_practice/-/blob/master/simple_learn/deep_learning/19_training_neural_network_with_keras2/19.%20Training%20Neural%20Network%20with%20Keras%202.ipynb?ref_type=heads

    # 1. 使用上一节训练的模型,进行detector
    # 还需要使用OpenCV的一个FaceDetector的模型来检测人脸位置
    # 引入库
    
    # import the necessary packages
    from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
    from tensorflow.keras.preprocessing.image import img_to_array
    from tensorflow.keras.models import load_model
    import numpy as np
    import imutils
    import time
    import cv2
    import os
    
    def detect_and_predict_mask(frame, faceNet, maskNet):
        # grab the dimensions of the frame and then construct a blob from it
        (h, w) = frame.shape[:2]
        blob = cv2.dnn.blobFromImage(frame, 1.0, (224, 224), (104.0, 177.0, 123.0))
        # pass the blob through the network and obtain the face detections
        faceNet.setInput(blob)
        detections = faceNet.forward()
        print(detections.shape)
        # initialize our list of faces, their corresponding locations,
        # and the list of predictions from our face mask network
        faces = []
        locs = []
        preds = []
        # loop over the detections
        # n个7位的数组,
        # 这7位中第2位 是 confidence
        # 这7位中第 3, 4, 5, 6 位是 startX, startY, endX, endY的比例
        # 根据这些值,查找confidence > 0.5的 人脸的位置
        # 对人脸部分进行切图
        # 然后对切图进行预测
        for i in range(0, detections.shape[2]):
            # extract the confidence (i.e., probability) associated with the detection
            confidence = detections[0, 0, i, 2]
            # filter out weak detections by ensuring the confidence is greater than
            # the minimum confidence
            if confidence > 0.5:
                # compute the (x, y) coordinates of the bounding box for the object
                box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                (startX, startY, endX, endY) = box.astype('int')
                # ensuring the bounding boxes fall within the dimensions of the frame
                (startX, startY) = (max(0, startX), max(0, startY))
                (endX, endY) = (min(w-1, endX), min(h-1, endY))
                # extract the face ROI, convert it from BGR to RGB channel, 
                # ordering, resize it to 224 * 224, and preprocessing it for face mask detect model
                face = frame[startY: endY, startX: endX]
                # face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
                face = cv2.resize(face, (224, 224))
                face = img_to_array(face)
                face = preprocess_input(face)
                # add the face and bounding boxes to their respective lists
                faces.append(face)
                locs.append((startX, startY, endX, endY))
        # only make predictions if at least one face was detected
        if len(faces) > 0:
            # for faster inferenece, we will make batch predictions on all
            # faces at the same time rather than one-by-one predictions in the above 'for' loop
            faces = np.array(faces, dtype='float32')
            preds = maskNet.predict(faces, batch_size=32)
    
        # return a 2-tuple of the face locations and their corresponding locations
        return (locs, preds)
    
    
    # load our searilized face detector model from disk
    protoTxtPath = r'./deploy.prototxt.txt'
    weightsPath = r'./res10_300x300_ssd_iter_140000.caffemodel'
    faceNet = cv2.dnn.readNet(protoTxtPath, weightsPath)
    
    # load the face mask detector model from disk
    maskNet = load_model('./mask_detector.keras')
    
    # 启动视频,使用模型开始检测,然后绘制到图片中
    import imageio
    from datetime import datetime
    import matplotlib.pyplot as plt
    
    
    input_video = 'masks'
    
    video_reader = imageio.get_reader('{}.mp4'.format(input_video))
    video_writer = imageio.get_writer('{}_annotated.mp4'.format(input_video), fps=10)
    
    t0 = datetime.now()
    n_frames = 0
    for frame in video_reader:
        n_frames += 1
        # grab the frame from the video and resize it to have a maximum width of 400 pixels
        frame = imutils.resize(frame, width=400)
        # detect faces in the frame and determine if they are wearing a face mask or not
        (locs, preds) = detect_and_predict_mask(frame, faceNet, maskNet)
        # loop over the detected face locations and their corresponding locations
        for (box, pred) in zip(locs, preds):
            # unpack the bounding box and predictions
            (startX, startY, endX, endY) = box
            (mask, withoutMask) = pred
            # detect the class label and color we'll use to draw the bounding box and text
            label = 'Mask' if mask > withoutMask else 'No Mask'
            color = (0, 255, 0) if label == 'Mask' else (0, 0, 255)
            # include the probability in the label
            label = '{}: {:.2f}%'.format(label, max(mask, withoutMask) * 100)
            cv2.putText(frame, label, (startX, startY+20), cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
            cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
        video_writer.append_data(frame)
    
    fps = n_frames/(datetime.now() - t0).total_seconds()
    print('Frames processed: {}, speed: {} fps'.format(n_frames, fps))
    video_reader.close()
    video_writer.close()
    

    效果如下,

    image.png image.png

    相关文章

      网友评论

          本文标题:使用cv.FaceNet和MobileNetV2进行人脸和口罩检

          本文链接:https://www.haomeiwen.com/subject/ztnbxjtx.html