本文介绍
python, yolov算法目标检测基础框架,可用于检测图片,视频,实时的摄像头监控,结果也可以很好的保存。我的opencv-python 版本是4.5.3.56, python版本是3.8.10。本文coco.name, yolo-festest-xl.cfg, yolo-fastest-xl.weights等文件可从 https://github.com/dog-qiuqiu/Yolo-Fastest下载获得
import cv2 as cv
import argparse
import sys
import numpy as np
import os.path
confThreshold = 0.5
nmsThreshold = 0.4
inpWidth = 416
inpHeight = 416
parser = argparse.ArgumentParser(description='Object Detection using YOLO in OPENCV')
parser.add_argument('--image', help='Path to image file.')
parser.add_argument('--video', help='Path to video file.')
args = parser.parse_args() ##在终端执行 python 文件名.py --视频名.mp4 就可以检测视频了
获取coco文件的种类,根据种类标记不同的颜色
classesFile = "data/coco.names";
classes = None
with open(classesFile, 'rt') as f:
classes = f.read().rstrip('\n').split('\n') ##将coco文件中所有的类别放在变量里
colors = np.random.randint(0, 255, size=(len(classes), 3), dtype="uint8") ##根据种类标记不同的颜色
modelConfiguration = "data/yolo-fastest-xl.cfg"
modelWeights = "model/yolo-fastest-xl.weights"
net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights) ####第一参数是每一层的信息,第二个参数是训练好的模型
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) ####设置DNN后端为Opencv
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) ####目标设置为cpu
获取输出层的名称
def getOutputsNames(net):
layersNames = net.getLayerNames() ##网络所有层
return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]
def drawPred(color, classId, conf, left, top, right, bottom):
cv.rectangle(frame, (left, top), (right, bottom), color, 3)
label = '%.2f' % conf
##类别和置信度
if classes:
assert (classId < len(classes))
label = '%s:%s' % (classes[classId], label)
# 在检测框顶部显示标签
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
top = max(top, labelSize[1])
cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine),
(255, 255, 255), cv.FILLED)
cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, color, 1) #color是上面定义的随机色
去掉一些置信度低的标签框,扫描从网络输出的所有边界框,只保留置信度高的,将框的类标签指定为得分最高的类。
def postprocess(frame, outs):
frameHeight = frame.shape[0]
frameWidth = frame.shape[1]
classIds = []
confidences = []
boxes = []
classIds = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > confThreshold: #可以调置信度大小
center_x = int(detection[0] * frameWidth)
center_y = int(detection[1] * frameHeight)
width = int(detection[2] * frameWidth)
height = int(detection[3] * frameHeight)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
classIds.append(classId)
confidences.append(float(confidence))
boxes.append([left, top, width, height])
# 消除冗余的重叠框和置信度低的
indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
for i in indices:
i = i[0]
box = boxes[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
color = [int(c) for c in colors[classIds[i]]]
drawPred(color, classIds[i], confidences[i], left, top, left + width, top + height)
输入,可以是图片,视频,摄像头。 在终端执行 python 文件名.py --视频名.mp4 就可以检测视频了。
winName = 'Deep learning object detection in OpenCV' ##结果标题名称
cv.namedWindow(winName, cv.WINDOW_NORMAL)
outputFile = "yolo_out_py.avi"
if (args.image):
# 图像文件
if not os.path.isfile(args.image):
print("Input image file ", args.image, " doesn't exist")
sys.exit(1)
cap = cv.VideoCapture(args.image)
outputFile = args.image[:-4] + '_yolo_out_py.jpg'
elif (args.video):
# 视频文件
if not os.path.isfile(args.video):
print("Input video file ", args.video, " doesn't exist")
sys.exit(1)
else:
print('input_video')
cap = cv.VideoCapture(args.video)
outputFile = args.video[:-4] + '_yolo_out_py.avi'
else:
# 摄像头流
#cap = cv.VideoCapture('0') ##可支持rtsp, http://admin:admin@ip:端口号(8081) 可用手机下载IP摄像头可获取
cap = cv.VideoCapture(0)
# Get the video writer initialized to save the output video
if (not args.image):
vid_writer = cv.VideoWriter(outputFile, cv.VideoWriter_fourcc('M', 'J', 'P', 'G'), 30,
(round(cap.get(cv.CAP_PROP_FRAME_WIDTH)), round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))
while cv.waitKey(1) < 0:
# 读视频
hasFrame, frame = cap.read()
if not hasFrame:
print("完成 !!!")
print("输出文件 ", outputFile)
cv.waitKey(3000) ##响应时间毫秒
# Release device
cap.release()
break
blob = cv.dnn.blobFromImage(frame, 1 / 255, (inpWidth, inpHeight), [0, 0, 0], 1, crop=False)
net.setInput(blob)
outs = net.forward(getOutputsNames(net))
##过滤一些置信度低的框
postprocess(frame, outs)
##结果展示下可能性
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
# 保存结果
if (args.image):
cv.imwrite(outputFile, frame.astype(np.uint8));
else:
vid_writer.write(frame.astype(np.uint8))
cv.imshow(winName, frame)
完整代码如下
import cv2 as cv
import argparse
import sys
import numpy as np
import os.path
confThreshold = 0.5
nmsThreshold = 0.4
inpWidth = 416
inpHeight = 416
parser = argparse.ArgumentParser(description='Object Detection using YOLO in OPENCV')
parser.add_argument('--image', help='Path to image file.')
parser.add_argument('--video', help='Path to video file.')
args = parser.parse_args() ##在终端执行 python 文件名.py --视频名.mp4 就可以检测视频了
classesFile = "data/coco.names";
classes = None
with open(classesFile, 'rt') as f:
classes = f.read().rstrip('\n').split('\n') ##将coco文件中所有的类别放在变量里
colors = np.random.randint(0, 255, size=(len(classes), 3), dtype="uint8") ##根据种类标记不同的颜色
modelConfiguration = "data/yolo-fastest-xl.cfg"
modelWeights = "model/yolo-fastest-xl.weights"
net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights) ####第一参数是每一层的信息,第二个参数是训练好的模型
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) ####设置DNN后端为Opencv
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) ####目标设置为cpu
def getOutputsNames(net):
layersNames = net.getLayerNames() ##网络所有层
return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]
def drawPred(color, classId, conf, left, top, right, bottom):
cv.rectangle(frame, (left, top), (right, bottom), color, 3)
label = '%.2f' % conf
##类别和置信度
if classes:
assert (classId < len(classes))
label = '%s:%s' % (classes[classId], label)
# 在检测框顶部显示标签
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
top = max(top, labelSize[1])
cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine),
(255, 255, 255), cv.FILLED)
cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, color, 1) #color是上面定义的随机色
def postprocess(frame, outs):
frameHeight = frame.shape[0]
frameWidth = frame.shape[1]
classIds = []
confidences = []
boxes = []
classIds = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > confThreshold: #可以调置信度大小
center_x = int(detection[0] * frameWidth)
center_y = int(detection[1] * frameHeight)
width = int(detection[2] * frameWidth)
height = int(detection[3] * frameHeight)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
classIds.append(classId)
confidences.append(float(confidence))
boxes.append([left, top, width, height])
# 消除冗余的重叠框和置信度低的
indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
for i in indices:
i = i[0]
box = boxes[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
color = [int(c) for c in colors[classIds[i]]]
drawPred(color, classIds[i], confidences[i], left, top, left + width, top + height)
#输入,可以是图片,视频,摄像头。 在终端执行 python 文件名.py --视频名.mp4 就可以检测视频了。
winName = '目标检测' ##结果标题名称 python .\yolov3.py --image cat.jpg可检测图片
cv.namedWindow(winName, cv.WINDOW_NORMAL)
outputFile = "res.avi"
if (args.image):
# 图像文件
if not os.path.isfile(args.image):
print("Input image file ", args.image, " doesn't exist")
sys.exit(1)
cap = cv.VideoCapture(args.image)
outputFile = args.image[:-4] + '_yolo_out_py.jpg'
elif (args.video):
# 视频文件
if not os.path.isfile(args.video):
print("Input video file ", args.video, " doesn't exist")
sys.exit(1)
else:
print('input_video')
cap = cv.VideoCapture(args.video)
outputFile = args.video[:-4] + '_yolo_out_py.avi'
else:
# 摄像头流
#cap = cv.VideoCapture('0') ##可支持rtsp, http://admin:admin@ip:端口号(8081) 可用手机下载IP摄像头可获取
cap = cv.VideoCapture(0)
# Get the video writer initialized to save the output video
if (not args.image):
vid_writer = cv.VideoWriter(outputFile, cv.VideoWriter_fourcc('M', 'J', 'P', 'G'), 30,
(round(cap.get(cv.CAP_PROP_FRAME_WIDTH)), round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))
while cv.waitKey(1) < 0:
# 读视频
hasFrame, frame = cap.read()
if not hasFrame:
print("完成 !!!")
print("输出文件 ", outputFile)
cv.waitKey(3000) ##响应时间毫秒
# Release device
cap.release()
break
blob = cv.dnn.blobFromImage(frame, 1 / 255, (inpWidth, inpHeight), [0, 0, 0], 1, crop=False)
net.setInput(blob)
outs = net.forward(getOutputsNames(net))
##过滤一些置信度低的框
postprocess(frame, outs)
##结果展示下可能性
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
# 保存结果
if (args.image):
cv.imwrite(outputFile, frame.astype(np.uint8));
else:
vid_writer.write(frame.astype(np.uint8))
cv.imshow(winName, frame)
网友评论