预处理
- 使用 opencv warpaffine 对图片做letterbox
详细解释见 使用opencv的warpaffine完成letter box图片预处理 - 简书 (jianshu.com)
预处理代码
def warpAffine(self, image):
scale_x = self.image_width / image.shape[1]
scale_y = self.image_height / image.shape[0]
scale = min(scale_x, scale_y)
M = np.array([[scale, 0, (-scale * image.shape[1] + self.image_width + scale - 1) * 0.5],
[0, scale, (-scale * image.shape[0] + self.image_height + scale - 1) * 0.5]])
M_T = cv2.invertAffineTransform(M)
image = cv2.warpAffine(image ,
M,
(self.image_width , self.image_height ),
flags=cv2.INTER_NEAREST,
borderMode=cv2.BORDER_CONSTANT,
borderValue=(114,114,114))
return image, M_T
后处理
使用 opencv 的 nms的算法有一个缺陷,该算法在进行nms的时候没有区分类别。在opencv4.7.0版本中增加了NMSBoxesBatched函数,该方法可以分类别做nms。
Performs batched non maximum suppression on given boxes and corresponding scores across different classes.
代码
def decode(self, outputs, M_T):
results = []
class_ids = []
confidences = []
boxes = []
rows = outputs[0].shape[1]
for r in range(rows):
row = outputs[0][0][r]
confidence = row[4]
if confidence >= self.confidence_threshold:
classes_scores = row[5:]
class_id = np.argmax(classes_scores)
score = confidence * classes_scores[class_id]
if score >= self.confidence_threshold:
confidences.append(score.astype(float))
class_ids.append(class_id)
cx, cy, w, h = row[0], row[1], row[2], row[3]
left = int(cx - w / 2) * M_T[0][0] + M_T[0][2]
top = int(cy - h / 2) * M_T[0][0] + M_T[1][2]
right = int(cx + w / 2) * M_T[0][0] + M_T[0][2]
bottom = int(cy + h / 2) * M_T[0][0] + M_T[1][2]
boxes.append([left, top, right - left, bottom - top])
indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confidence_threshold, self.nms_threshold)
for i in indices:
i = i[0]
box = boxes[i]
left = int(box[0])
top = int(box[1])
width = int(box[2])
height = int(box[3])
results.append([left, top, left + width, top + height, confidences[i], class_ids[i]])
return results
整体代码
import cv2
import numpy as np
class YoloV5(object):
def __init__(self, model_path, confidence_threshold, nms_threshold, image_width, image_height):
self.model = cv2.dnn.readNet(model_path)
self.confidence_threshold = confidence_threshold
self.nms_threshold = nms_threshold
self.image_width = image_width
self.image_height = image_height
def warpAffine(self, image):
scale_x = self.image_width / image.shape[1]
scale_y = self.image_height / image.shape[0]
scale = min(scale_x, scale_y)
M = np.array([[scale, 0, (-scale * image.shape[1] + self.image_width + scale - 1) * 0.5],
[0, scale, (-scale * image.shape[0] + self.image_height + scale - 1) * 0.5]])
M_T = cv2.invertAffineTransform(M)
image = cv2.warpAffine(image ,
M,
(self.image_width , self.image_height ),
flags=cv2.INTER_NEAREST,
borderMode=cv2.BORDER_CONSTANT,
borderValue=(114,114,114))
return image, M_T
def decode(self, outputs, M_T):
results = []
class_ids = []
confidences = []
boxes = []
rows = outputs[0].shape[1]
for r in range(rows):
row = outputs[0][0][r]
confidence = row[4]
if confidence >= self.confidence_threshold:
classes_scores = row[5:]
class_id = np.argmax(classes_scores)
score = confidence * classes_scores[class_id]
if score >= self.confidence_threshold:
confidences.append(score.astype(float))
class_ids.append(class_id)
cx, cy, w, h = row[0], row[1], row[2], row[3]
left = int(cx - w / 2) * M_T[0][0] + M_T[0][2]
top = int(cy - h / 2) * M_T[0][0] + M_T[1][2]
right = int(cx + w / 2) * M_T[0][0] + M_T[0][2]
bottom = int(cy + h / 2) * M_T[0][0] + M_T[1][2]
boxes.append([left, top, right - left, bottom - top])
indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confidence_threshold, self.nms_threshold)
for i in indices:
i = i[0]
box = boxes[i]
left = int(box[0])
top = int(box[1])
width = int(box[2])
height = int(box[3])
results.append([left, top, left + width, top + height, confidences[i], class_ids[i]])
return results
def detect(self, image):
affine_image, M_T = self.warpAffine(image)
blob = cv2.dnn.blobFromImage(affine_image, 1 / 255.0, (self.image_width, self.image_height), [0, 0, 0], swapRB=True, crop=False)
self.model.setInput(blob)
outs = self.model.forward(self.model.getUnconnectedOutLayersNames())
results = self.decode(outs, M_T)
return results
def draw(self, image, boxes):
for box in boxes:
left, top, right, bottom, conf, class_id = box
cv2.rectangle(image, (left, top), (right, bottom), (255, 0, 255), 2)
# cv2.rectangle(image, (left, top), (right, bottom), (0, 255, 255), 2)
cv2.imwrite("result.jpg", image)
if __name__ == "__main__":
yolo = YoloV5("model.onnx", 0.3, 0.45, 896, 512)
image = cv2.imread("test.jpg")
results = yolo.detect(image)
yolo.draw(image, results)
网友评论