python opencv dnn 推理 yolov5模型

作者: leon0514 | 来源:发表于2023-11-20 11:30 被阅读0次

OpenCV DNN 模块-风格迁移
C++ opencv-3.4.1 调用caffe训练好的模型
Opencv-python DNN模块使用CUDA加速
OpenCV-4.0 + Mask R-CNN
CV04-01:OpenCV的dnn模块
【OpenCV】DNN demo
C++ opencv-3.4.1 调用darkNet
mmPose HRnetv2 pytorch—>onnx—>op
Python中的参数命令行
使用OpenVINO完成PyTorch YOLOv5模型推理计算

预处理

使用 opencv warpaffine 对图片做letterbox
详细解释见使用opencv的warpaffine完成letter box图片预处理 - 简书 (jianshu.com)
预处理代码

    def warpAffine(self, image):
        scale_x = self.image_width / image.shape[1]
        scale_y = self.image_height / image.shape[0]
        scale   = min(scale_x, scale_y)
        M = np.array([[scale, 0, (-scale *  image.shape[1] + self.image_width + scale  - 1) * 0.5],
                    [0, scale, (-scale *  image.shape[0] + self.image_height + scale  - 1) * 0.5]])
        M_T = cv2.invertAffineTransform(M)
        image = cv2.warpAffine(image ,
            M,
            (self.image_width , self.image_height ),
            flags=cv2.INTER_NEAREST,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(114,114,114))
        return image, M_T

后处理

使用 opencv 的 nms的算法有一个缺陷，该算法在进行nms的时候没有区分类别。在opencv4.7.0版本中增加了NMSBoxesBatched函数，该方法可以分类别做nms。
Performs batched non maximum suppression on given boxes and corresponding scores across different classes.
代码

    def decode(self, outputs, M_T):
        results   = []
        class_ids = []
        confidences = []
        boxes = []
        rows = outputs[0].shape[1]
        for r in range(rows):
            row = outputs[0][0][r]
            confidence = row[4]
            if confidence >= self.confidence_threshold:
                classes_scores = row[5:]
                class_id = np.argmax(classes_scores)
                score = confidence * classes_scores[class_id]
                if score >= self.confidence_threshold:
                    confidences.append(score.astype(float))
                    class_ids.append(class_id)
                    cx, cy, w, h = row[0], row[1], row[2], row[3]
                    left   = int(cx - w / 2) * M_T[0][0] + M_T[0][2]
                    top    = int(cy - h / 2) * M_T[0][0] + M_T[1][2]
                    right  = int(cx + w / 2) * M_T[0][0] + M_T[0][2]
                    bottom = int(cy + h / 2) * M_T[0][0] + M_T[1][2]
                    boxes.append([left, top, right - left, bottom - top])
        indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confidence_threshold, self.nms_threshold)
        for i in indices:
            i      = i[0]
            box    = boxes[i]
            left   = int(box[0])
            top    = int(box[1])
            width  = int(box[2])
            height = int(box[3])
            results.append([left, top, left + width, top + height, confidences[i], class_ids[i]])
        return results

整体代码

import cv2
import numpy as np
class YoloV5(object):
    def __init__(self, model_path, confidence_threshold, nms_threshold, image_width, image_height):
        self.model = cv2.dnn.readNet(model_path)
        self.confidence_threshold = confidence_threshold
        self.nms_threshold = nms_threshold
        self.image_width = image_width
        self.image_height = image_height
    
    def warpAffine(self, image):
        scale_x = self.image_width / image.shape[1]
        scale_y = self.image_height / image.shape[0]
        scale   = min(scale_x, scale_y)
        M = np.array([[scale, 0, (-scale *  image.shape[1] + self.image_width + scale  - 1) * 0.5],
                    [0, scale, (-scale *  image.shape[0] + self.image_height + scale  - 1) * 0.5]])
        M_T = cv2.invertAffineTransform(M)
        image = cv2.warpAffine(image ,
            M,
            (self.image_width , self.image_height ),
            flags=cv2.INTER_NEAREST,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(114,114,114))
        return image, M_T

    def decode(self, outputs, M_T):
        results   = []
        class_ids = []
        confidences = []
        boxes = []
        rows = outputs[0].shape[1]
        for r in range(rows):
            row = outputs[0][0][r]
            confidence = row[4]
            if confidence >= self.confidence_threshold:
                classes_scores = row[5:]
                class_id = np.argmax(classes_scores)
                score = confidence * classes_scores[class_id]
                if score >= self.confidence_threshold:
                    confidences.append(score.astype(float))
                    class_ids.append(class_id)
                    cx, cy, w, h = row[0], row[1], row[2], row[3]
                    left   = int(cx - w / 2) * M_T[0][0] + M_T[0][2]
                    top    = int(cy - h / 2) * M_T[0][0] + M_T[1][2]
                    right  = int(cx + w / 2) * M_T[0][0] + M_T[0][2]
                    bottom = int(cy + h / 2) * M_T[0][0] + M_T[1][2]
                    boxes.append([left, top, right - left, bottom - top])
        indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confidence_threshold, self.nms_threshold)
        for i in indices:
            i      = i[0]
            box    = boxes[i]
            left   = int(box[0])
            top    = int(box[1])
            width  = int(box[2])
            height = int(box[3])
            results.append([left, top, left + width, top + height, confidences[i], class_ids[i]])
        return results
    
    def detect(self, image):
        affine_image, M_T = self.warpAffine(image)
        blob = cv2.dnn.blobFromImage(affine_image, 1 / 255.0, (self.image_width, self.image_height), [0, 0, 0], swapRB=True, crop=False)
        self.model.setInput(blob)
        outs = self.model.forward(self.model.getUnconnectedOutLayersNames())
        results = self.decode(outs, M_T)
        return results
    
    def draw(self, image, boxes):
        for box in boxes:
            left, top, right, bottom, conf, class_id = box
            cv2.rectangle(image, (left, top), (right, bottom), (255, 0, 255), 2)
            # cv2.rectangle(image, (left, top), (right, bottom), (0, 255, 255), 2)
        cv2.imwrite("result.jpg", image)



if __name__ == "__main__":
    yolo = YoloV5("model.onnx", 0.3, 0.45, 896, 512)
    image = cv2.imread("test.jpg")
    results = yolo.detect(image)
    yolo.draw(image, results)