美文网首页
darknet框架下yolov3-tiny训练车辆数据集

darknet框架下yolov3-tiny训练车辆数据集

作者: 奋斗_登 | 来源:发表于2020-05-22 16:40 被阅读0次

主机环境:Ubuntu 18.04.4 LTS,GPU : GeForce GTX 1660

1. darknet安装

下载源码

git clone https://github.com/pjreddie/darknet

修改Makefile

GPU=1 #启用gpu 
CUDNN=1
OPENCV=1
OPENMP=0
DEBUG=1
eg: Makefile

编译安装:

cd darknet 
make

2. 目标检测测试

下载预训练模型

mkdir model 
cd model
wget https://pjreddie.com/media/files/yolov3-tiny.weights

测试

#测试图片
./darknet detect cfg/yolov3-tiny.cfg model/yolov3-tiny.weights data/dog.jpg
#测试实时视频
./darknet detector demo cfg/voc.data cfg/yolov3-tiny.cfg model/yolov3-tiny.weights

3. 制作车辆数据集

在darknet根目录下创建以下文件夹

cd scripts
mkdir  car && cd car
mkdir Annotations && mkdir ImageSets && mkdir JPEGImages && mkdir labels

准备车辆图片(分为三个部分)

1.DETRAC(车辆检测和跟踪的大规模数据集)

数据集主要拍摄于北京和天津的道路过街天桥(京津冀场景有福了),并 手动标注了 8250 个车辆 和 121万目标对象外框

图片:2755
car: 17609个区域,2725个图片
bus: 1144个区域, 873个图片
truck:0个区域 0个图片
挑选数据代码&转换标记文件

import xml.etree.ElementTree as ET
from xml.dom.minidom import Document
import os
import cv2
import time
from shutil import copyfile


def ConvertVOCXml(file_path="", file_name=""):
    tree = ET.parse(file_name)
    root = tree.getroot()
    src_img_basepath = "D:\\UA-DETRAC(车辆检测数据集8250车辆)\\Insight-MVT_Annotation_Train\\"
    num = 0  # 计数
    # 读xml操作

    for child in root:

        if (child.tag == "frame"):
            if int(child.attrib["num"]) % 30 != 1:
                continue
            # 创建dom文档
            doc = Document()
            # 创建根节点
            annotation = doc.createElement('annotation')
            # 根节点插入dom树
            doc.appendChild(annotation)

            # print(child.tag, child.attrib["num"])
            pic_id = child.attrib["num"].zfill(5)
            # print(pic_id)
            output_name = root.attrib["name"] + "__img" + pic_id
            output_file_name = output_name + ".xml"

            folder = doc.createElement("folder")
            folder.appendChild(doc.createTextNode("VOC2007"))
            annotation.appendChild(folder)

            filename = doc.createElement("filename")
            pic_name = output_name + ".jpg"
            filename.appendChild(doc.createTextNode(pic_name))
            annotation.appendChild(filename)

            filepath = doc.createElement("path")
            annotation.appendChild(filepath)

            source = doc.createElement("source")
            source_database = doc.createElement("database")
            source_database.appendChild(doc.createTextNode("Unknown"))
            source.appendChild(source_database)
            annotation.appendChild(source)

            sizeimage = doc.createElement("size")
            imagewidth = doc.createElement("width")
            imageheight = doc.createElement("height")
            imagedepth = doc.createElement("depth")

            imagewidth.appendChild(doc.createTextNode("960"))
            imageheight.appendChild(doc.createTextNode("540"))
            imagedepth.appendChild(doc.createTextNode("3"))

            sizeimage.appendChild(imagewidth)
            sizeimage.appendChild(imageheight)
            sizeimage.appendChild(imagedepth)
            annotation.appendChild(sizeimage)

            segmented = doc.createElement("segmented")
            segmented.appendChild(doc.createTextNode("0"))
            annotation.appendChild(segmented)

            target_list = child.getchildren()[0]  # 获取target_list
            for target in target_list:
                if (target.tag == "target" and target.getchildren()[1].attrib["vehicle_type"] in ["car", "bus"]):
                    object = doc.createElement('object')
                    bndbox = doc.createElement("bndbox")

                    for target_child in target:
                        if (target_child.tag == "box"):
                            xmin = doc.createElement("xmin")
                            ymin = doc.createElement("ymin")
                            xmax = doc.createElement("xmax")
                            ymax = doc.createElement("ymax")
                            xmin_value = int(float(target_child.attrib["left"]))
                            ymin_value = int(float(target_child.attrib["top"]))
                            box_width_value = int(float(target_child.attrib["width"]))
                            box_height_value = int(float(target_child.attrib["height"]))
                            xmin.appendChild(doc.createTextNode(str(xmin_value)))
                            ymin.appendChild(doc.createTextNode(str(ymin_value)))
                            if (xmin_value + box_width_value > 960):
                                xmax.appendChild(doc.createTextNode(str(960)))
                            else:
                                xmax.appendChild(doc.createTextNode(str(xmin_value + box_width_value)))
                            if (ymin_value + box_height_value > 540):
                                ymax.appendChild(doc.createTextNode(str(540)))
                            else:
                                ymax.appendChild(doc.createTextNode(str(ymin_value + box_height_value)))

                        if (target_child.tag == "attribute"):
                            name = doc.createElement('name')
                            pose = doc.createElement('pose')
                            truncated = doc.createElement('truncated')
                            difficult = doc.createElement('difficult')

                            name.appendChild(doc.createTextNode(target_child.attrib["vehicle_type"]))
                            pose.appendChild(doc.createTextNode("Unspecified"))  # 随意指定
                            truncated.appendChild(doc.createTextNode("0"))  # 随意指定
                            difficult.appendChild(doc.createTextNode("0"))  # 随意指定

                            object.appendChild(name)
                            object.appendChild(pose)
                            object.appendChild(truncated)
                            object.appendChild(difficult)

                    bndbox.appendChild(xmin)
                    bndbox.appendChild(ymin)
                    bndbox.appendChild(xmax)
                    bndbox.appendChild(ymax)
                    object.appendChild(bndbox)
                    annotation.appendChild(object)

            file_path_out = os.path.join(file_path, output_file_name)
            f = open(file_path_out, 'w')
            f.write(doc.toprettyxml(indent=' ' * 4))
            f.close()
            copyfile(src_img_basepath + root.attrib["name"] + "\\img" + pic_id + ".jpg", file_path + "\\" + pic_name)
            num = num + 1
    return num


'''
画方框
'''


def bboxes_draw_on_img(img, bbox, color=[255, 0, 0], thickness=2):
    # Draw bounding box...
    print(bbox)
    p1 = (int(float(bbox["xmin"])), int(float(bbox["ymin"])))
    p2 = (int(float(bbox["xmax"])), int(float(bbox["ymax"])))
    cv2.rectangle(img, p1, p2, color, thickness)


def visualization_image(image_name, xml_file_name):
    tree = ET.parse(xml_file_name)
    root = tree.getroot()

    object_lists = []
    for child in root:
        if (child.tag == "folder"):
            print(child.tag, child.text)
        elif (child.tag == "filename"):
            print(child.tag, child.text)
        elif (child.tag == "size"):  # 解析size
            for size_child in child:
                if (size_child.tag == "width"):
                    print(size_child.tag, size_child.text)
                elif (size_child.tag == "height"):
                    print(size_child.tag, size_child.text)
                elif (size_child.tag == "depth"):
                    print(size_child.tag, size_child.text)
        elif (child.tag == "object"):  # 解析object
            singleObject = {}
            for object_child in child:
                if (object_child.tag == "name"):
                    # print(object_child.tag,object_child.text)
                    singleObject["name"] = object_child.text
                elif (object_child.tag == "bndbox"):
                    for bndbox_child in object_child:
                        if (bndbox_child.tag == "xmin"):
                            singleObject["xmin"] = bndbox_child.text
                            # print(bndbox_child.tag, bndbox_child.text)
                        elif (bndbox_child.tag == "ymin"):
                            # print(bndbox_child.tag, bndbox_child.text)
                            singleObject["ymin"] = bndbox_child.text
                        elif (bndbox_child.tag == "xmax"):
                            singleObject["xmax"] = bndbox_child.text
                        elif (bndbox_child.tag == "ymax"):
                            singleObject["ymax"] = bndbox_child.text
            object_length = len(singleObject)
            if (object_length > 0):
                object_lists.append(singleObject)
    img = cv2.imread(image_name)
    for object_coordinate in object_lists:
        bboxes_draw_on_img(img, object_coordinate)
    cv2.imshow("capture", img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


if (__name__ == "__main__"):
    basePath = "D:\\UA-DETRAC(车辆检测数据集8250车辆)\\DETRAC-Train-Annotations-XML"
    totalxml = os.listdir(basePath)
    total_num = 0
    flag = False
    print("正在转换")
    saveBasePath = "xml_test"
    if os.path.exists(saveBasePath) == False:  # 判断文件夹是否存在
        os.makedirs(saveBasePath)
    # Start time
    start = time.time()
    log = open("xml_statistical.txt", "w")  # 分析日志,进行排错
    for xml in totalxml:
        file_name = os.path.join(basePath, xml)
        print(file_name)
        num = ConvertVOCXml(file_path=saveBasePath, file_name=file_name)
        print(num)
        total_num = total_num + num
        log.write(file_name + " " + str(num) + "\n")
        #break;
    # End time
    end = time.time()
    seconds = end - start
    print("Time taken : {0} seconds".format(seconds))
    print(total_num)
    log.write(str(total_num) + "\n")
    #visualization_image(saveBasePath + "/MVI_20011__img00581.jpg", "xml_test/MVI_20011__img00581.xml")

2.bdd100k(伯克利大学AI实验室(BAIR)发布了目前最大规模、内容最具多样性的公开驾驶数据集BDD100K)

图片:2301个
car: 23440个区域 2294个图片
bus: 873个区域 676个图片
truck:2198个区域 1361个图片
挑选2301个参考代码

import random
import os
import xml.etree.ElementTree as ET

path = "D:\\AI\\dataset\\bdd100k\\bdd100k\\Annotations"
xmls = []
for x in os.listdir(path):
    if x.endswith('xml'):
        xmls.append(x)
selected_xmls = random.sample(xmls, k=10000)
print(len(selected_xmls), selected_xmls[0], selected_xmls[200])

car_num = 0
bus_num = 0
truck_num = 0
car_jpg_num = 0
bus_jpg_num = 0
truck_jpg_num = 0
i = 0
xml_list = [];
for xml in selected_xmls:
    src = os.path.join(path, xml)
    tree = ET.parse(src)
    root = tree.getroot()
    effect_node_num = 0
    _car_num = 0
    _bus_num = 0
    _truck_num = 0
    i += 1
    for child in root:
        if (child.tag == "object"):
            effect_node_num += 1
            temp_text = child.getchildren()[0].text
            if temp_text == "car":
                _car_num += 1
            elif temp_text == "bus":
                _bus_num += 1
            elif temp_text == "truck":
                _truck_num += 1

    xml_count = len(xml_list)
    if xml_count <= 1500:
        if (_truck_num > 0 or _bus_num > 0):
            xml_list.append(xml)
            car_num += _car_num
            bus_num += _bus_num
            truck_num += _truck_num
            if _car_num > 0:
                car_jpg_num += 1
            if _bus_num > 0:
                bus_jpg_num += 1
            if _truck_num > 0:
                truck_jpg_num += 1
    elif xml_count <= 2300:
        xml_list.append(xml)
        car_num += _car_num
        bus_num += _bus_num
        truck_num += _truck_num
        if _car_num > 0:
            car_jpg_num += 1
        if _bus_num > 0:
            bus_jpg_num += 1
        if _truck_num > 0:
            truck_jpg_num += 1
    else:
        break;
    print (i, xml_count, car_num, car_jpg_num, bus_num, bus_jpg_num, truck_num, truck_jpg_num)
print ("over", car_num, car_jpg_num, bus_num, bus_jpg_num, truck_num, truck_jpg_num)
print (xml_list)
#复制文件
from shutil import copyfile

i = 0
xml_base_path = "D:\\AI\\dataset\\bdd100k\\bdd100k\\Annotations\\"
img_base_path = "D:\\AI\\dataset\\bdd100k\\bdd100k\\targetimg\\"
for xml in xml_list:
    i += 1
    print (i, xml_base_path + xml, img_base_path + xml.replace(".xml", ".jpg"))
    copyfile(xml_base_path + xml, "D:\\AI\\darknet\\scripts\\DETRAC\\Annotations\\" + xml)
    copyfile(img_base_path + xml.replace(".xml", ".jpg"),
             "D:\\AI\\darknet\\scripts\\DETRAC\\JPEGImages\\" + xml.replace(".xml", ".jpg"))


3.自己标记(labelImg标记)

图片:372 个
car: 677个区域 360个图片
bus: 19个区域 19个图片
truck:10个区域 10个图片


合计:

图片:5428个
car: 41726个区域 5379个图片
bus: 2036个区域 1568个图片
truck:2208个区域 1371个图片

将以上3处的数据中的图片和xml分别复制到JPEGImages和Annotations文件夹下

4. 指定训练和测试数据集

我们做好的数据集要一部分作为训练集来训练模型,需要另一部分作为测试集来帮助我们验证模型的可靠性.因此首先要将所有的图像文件随机分配为训练集和测试集.
首先切换到ImageSets目录中,新建Main目录,然后在Main目录中新建两个文本文档train.txt和val.txt.分别用于存放训练集的文件名列表和测试集的文件名列表.

cd ImageSets 
mkdir -p Main && cd Main
touch train.txt val.txt 

85%的数据做为训练,15%做为测试数据

import os
from os import listdir, getcwd
from os.path import join

if __name__ == '__main__':
    source_folder = 'DETRAC/JPEGImages/'  # 修改为自己的路径
    dest = 'DETRAC/ImageSets/Main/train.txt'  # 修改为自己的路径
    dest2 = 'DETRAC/ImageSets/Main/val.txt'  # 修改为自己的路径
    file_list = os.listdir(source_folder)
    train_file = open(dest, 'a')
    val_file = open(dest2, 'a')
    count = 0
    for file_obj in file_list:
        count += 1
        file_name, file_extend = os.path.splitext(file_obj)
        if (count < 4614):  # 可以修改这个数字,这个数字用来控制训练集合验证集的分割情况
            train_file.write(file_name + '\n')
        elif(count < 10000):
            val_file.write(file_name + '\n')
    train_file.close()
    val_file.close()
执行后train.txt和val.txt文件如下所示

5.执行训练集和测试集的路径和文件标签

修改script文件夹根目录下的voc_label.py文件
sets=['train', 'val']
classes = ["car", "bus", "truck"]

import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join

sets=['train', 'val']

classes = ["car", "bus", "truck"]


def convert(size, box):
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = (box[0] + box[1])/2.0 - 1
    y = (box[2] + box[3])/2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)

def convert_annotation(image_id):
    in_file = open('car_20200512/Annotations/%s.xml'%(image_id))
    out_file = open('car_20200512/labels/%s.txt'%(image_id), 'w')
    tree=ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    for obj in root.iter('object'):
        difficult_obj = obj.find('difficult')
        if difficult_obj == None:
            difficult_obj = obj.find('Difficult')
        difficult = difficult_obj.text
        cls = obj.find('name').text
        if cls not in classes or int(difficult)==1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        bb = convert((w,h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

wd = getcwd()

for  image_set in sets:
    image_ids = open('car_20200512/ImageSets/Main/%s.txt'%(image_set)).read().strip().split()
    list_file = open('car_20200512_%s.txt'%(image_set), 'w')
    for image_id in image_ids:
        print(image_id)
        list_file.write('%s/car_20200512/JPEGImages/%s.jpg\n'%(wd, image_id))
        convert_annotation(image_id)
    list_file.close()

os.system("cat car_train.txt car_val.txt > car_hjtrain.txt")
#os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt")
执行后labels下文件内容如下: scripts/DETRAC_train.txt、scripts/DETRAC_val.txt内容如下:

6.修改配置文件

6.1 修改data/car.names
car
bus
truck
6.2 修改cfg/car.data
classes= 3 #三种类型
train  = /data/project/darknet/scripts/car_train.txt
valid  = /data/project/darknet/scripts/car_val.txt
names = data/car.names
backup = car_backup
6.3 修改yolov3-tiny_car.cfg文件

有两处需要修改classes和filters,
将classes 修改为3,因为我们只有3类;将卷积层数修改为24


[convolutional]
size=1
stride=1
pad=1
filters=24
activation=linear

[yolo]
mask = 3,4,5
anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
classes=3
num=6
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1

文件上方Testing下两行的batch=1,subdivisions=1全部注释掉,将# Training下两行的batch=64,subdivisions=16全部取消注释.

[net]
# Testing
#batch=1
#subdivisions=1
# Training
batch=64
subdivisions=16
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1

7. 训练

#生成预训练模型
./darknet partial cfg/yolov3-tiny_car.cfg modle/yolov3-tiny.weights model/yolov3-tiny.conv.15 15
#开始训练
./darknet detector train cfg/car.data cfg/yolov3-tiny_car.cfg model/yolov3-tiny.conv.15 | tee car_train_log.txt

保存log时会生成两个文件,一个保存的是网络加载信息和checkout点保存信息,另一个保存的是训练信息
训练耗时较长,查看log当loss较小,且不再发生变化时,可按"ctrl+c"终止训练

8. 计算map

注意:删除annots.pkl

8.1 运行detector valid命令,进行测试
./darknet detector valid cfg/ca.data cfg/yolov3-tiny_car.cfg car_backup/yolov3-tiny_car_130000.weights -thresh 0.5 -out ""
运行结束后,在results文件夹下生成3个.txt文件
8.2 创建voc_eval.py
# --------------------------------------------------------
# Fast/er R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Bharath Hariharan
# --------------------------------------------------------

import xml.etree.ElementTree as ET
import os
import pickle
import numpy as np

def parse_rec(filename):
    """ Parse a PASCAL VOC xml file """
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        obj_struct['pose'] = obj.find('pose').text
        obj_struct['truncated'] = int(obj.find('truncated').text)
        diffiult_obj = obj.find('difficult');
        if diffiult_obj==None:
            diffiult_obj = obj.find('Difficult');
        obj_struct['difficult'] = int(diffiult_obj.text)
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(bbox.find('xmin').text),
                              int(bbox.find('ymin').text),
                              int(bbox.find('xmax').text),
                              int(bbox.find('ymax').text)]
        objects.append(obj_struct)

    return objects

def voc_ap(rec, prec, use_07_metric=False):
    """ ap = voc_ap(rec, prec, [use_07_metric])
    Compute VOC AP given precision and recall.
    If use_07_metric is true, uses the
    VOC 07 11 point method (default:False).
    """
    if use_07_metric:
        # 11 point metric
        ap = 0.
        for t in np.arange(0., 1.1, 0.1):
            if np.sum(rec >= t) == 0:
                p = 0
            else:
                p = np.max(prec[rec >= t])
            ap = ap + p / 11.
    else:
        # correct AP calculation
        # first append sentinel values at the end
        mrec = np.concatenate(([0.], rec, [1.]))
        mpre = np.concatenate(([0.], prec, [0.]))

        # compute the precision envelope
        for i in range(mpre.size - 1, 0, -1):
            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

        # to calculate area under PR curve, look for points
        # where X axis (recall) changes value
        i = np.where(mrec[1:] != mrec[:-1])[0]

        # and sum (\Delta recall) * prec
        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap

def voc_eval(detpath,
             annopath,
             imagesetfile,
             classname,
             cachedir,
             ovthresh=0.5,
             use_07_metric=False):
    """rec, prec, ap = voc_eval(detpath,
                                annopath,
                                imagesetfile,
                                classname,
                                [ovthresh],
                                [use_07_metric])
    Top level function that does the PASCAL VOC evaluation.
    detpath: Path to detections
        detpath.format(classname) should produce the detection results file.
    annopath: Path to annotations
        annopath.format(imagename) should be the xml annotations file.
    imagesetfile: Text file containing the list of images, one image per line.
    classname: Category name (duh)
    cachedir: Directory for caching the annotations
    [ovthresh]: Overlap threshold (default = 0.5)
    [use_07_metric]: Whether to use VOC07's 11 point AP computation
        (default False)
    """
    # assumes detections are in detpath.format(classname)
    # assumes annotations are in annopath.format(imagename)
    # assumes imagesetfile is a text file with each line an image name
    # cachedir caches the annotations in a pickle file

    # first load gt
    if not os.path.isdir(cachedir):
        os.mkdir(cachedir)
    cachefile = os.path.join(cachedir, 'annots.pkl')
    # read list of images
    with open(imagesetfile, 'r') as f:
        lines = f.readlines()
    imagenames = [x.strip() for x in lines]

    if not os.path.isfile(cachefile):
        # load annots
        recs = {}
        for i, imagename in enumerate(imagenames):
            recs[imagename] = parse_rec(annopath.format(imagename))
            if i % 100 == 0:
                print ('Reading annotation for {:d}/{:d}'.format(
                    i + 1, len(imagenames)))
        # save
        print ('Saving cached annotations to {:s}'.format(cachefile))
        with open(cachefile, 'wb') as f:
            #pickle.dump(recs, f)
            #str(pickle.dump(recs, f), encoding="utf-8")
            pickle.dump(recs, f)

    else:
        # load
        with open(cachefile, 'rb') as f:
            recs = pickle.load(f)
    # extract gt objects for this class
    class_recs = {}
    npos = 0
    for imagename in imagenames:
        R = [obj for obj in recs[imagename] if obj['name'] == classname]
        bbox = np.array([x['bbox'] for x in R])
        difficult = np.array([x['difficult'] for x in R]).astype(np.bool)

        det = [False] * len(R)
        npos = npos + sum(~difficult)
        class_recs[imagename] = {'bbox': bbox,
                                 'difficult': difficult,
                                 'det': det}

    # read dets
    detfile = detpath.format(classname)
    with open(detfile, 'r') as f:
        lines = f.readlines()

    splitlines = [x.strip().split(' ') for x in lines]
    image_ids = [x[0] for x in splitlines]
    confidence = np.array([float(x[1]) for x in splitlines])
    BB = np.array([[float(z) for z in x[2:]] for x in splitlines])

    # sort by confidence
    sorted_ind = np.argsort(-confidence)
    sorted_scores = np.sort(-confidence)
    BB = BB[sorted_ind, :]
    image_ids = [image_ids[x] for x in sorted_ind]

    # go down dets and mark TPs and FPs
    nd = len(image_ids)
    tp = np.zeros(nd)
    fp = np.zeros(nd)
    for d in range(nd):
        R = class_recs[image_ids[d]]
        bb = BB[d, :].astype(float)
        ovmax = -np.inf
        BBGT = R['bbox'].astype(float)

        if BBGT.size > 0:
            # compute overlaps
            # intersection
            ixmin = np.maximum(BBGT[:, 0], bb[0])
            iymin = np.maximum(BBGT[:, 1], bb[1])
            ixmax = np.minimum(BBGT[:, 2], bb[2])
            iymax = np.minimum(BBGT[:, 3], bb[3])
            iw = np.maximum(ixmax - ixmin + 1., 0.)
            ih = np.maximum(iymax - iymin + 1., 0.)
            inters = iw * ih

            # union
            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                   (BBGT[:, 2] - BBGT[:, 0] + 1.) *
                   (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

            overlaps = inters / uni
            ovmax = np.max(overlaps)
            jmax = np.argmax(overlaps)

        if ovmax > ovthresh:
            if not R['difficult'][jmax]:
                if not R['det'][jmax]:
                    tp[d] = 1.
                    R['det'][jmax] = 1
                else:
                    fp[d] = 1.
        else:
            fp[d] = 1.

    # compute precision recall
    fp = np.cumsum(fp)
    tp = np.cumsum(tp)
    rec = tp / float(npos)
    # avoid divide by zero in case the first detection matches a difficult
    # ground truth
    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
    ap = voc_ap(rec, prec, use_07_metric)

    return rec, prec, ap
8.3 创建 computer_Single_ALL_mAP.py
from voc_eval import voc_eval

import os

current_path = os.getcwd()
results_path = current_path+"/results"
sub_files = os.listdir(results_path)

mAP = []
for i in range(len(sub_files)):
    class_name = sub_files[i].split(".txt")[0]
    rec, prec, ap = voc_eval('results/{}.txt', 'scripts/DETRAC/Annotations/{}.xml', 'scripts/DETRAC/ImageSets/Main/val.txt', class_name, '.')
    print("{} :\t {} ".format(class_name, ap))
    mAP.append(ap)

mAP = tuple(mAP)

print("***************************")
print("mAP :\t {}".format( float( sum(mAP)/len(mAP)) ))

9. iou、loss可视化

extract_log.py

# coding=utf-8
# 该文件用来提取训练log,去除不可解析的log后使log文件格式化,生成新的log文件供可视化工具绘图

import inspect
import os
import random
import sys


def extract_log(log_file, new_log_file, key_word):
    with open(log_file, 'r') as f:
        with open(new_log_file, 'w') as train_log:
            # f = open(log_file)
            # train_log = open(new_log_file, 'w')
            for line in f:
                # 去除多gpu的同步log
                if 'Syncing' in line:
                    continue
                # 去除除零错误的log
                if 'nan' in line:
                    continue
                if key_word in line:
                    train_log.write(line)
    f.close()
    train_log.close()


extract_log('DETRAC_train_log.txt', 'train_log_loss.txt', 'images')
extract_log('DETRAC_train_log.txt', 'train_log_iou.txt', 'IOU')

train_iou_visualization.py

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# %matplotlib inline

# lines = 122956  # 根据train_log_iou.txt的行数修改
# result = pd.read_csv('train_log_iou.txt', skiprows=[x for x in range(lines) if (x % 10 == 0 or x % 10 == 9)],
#                      error_bad_lines=False, names=['Region Avg IOU', 'Class', 'Obj', 'No Obj', 'Avg Recall', 'count'])
# result = pd.read_csv('train_log_iou.txt', skiprows=[],
#                      error_bad_lines=False, names=['Region Avg IOU', 'Class', 'Obj', 'No Obj', 'Avg Recall', 'count'])

# 根据log_iou修改行数
lines = 613980
step = 5000
start_ite = 0
end_ite = 50200
igore = 1000
data_path = 'train_log_iou.txt'  # log_loss的路径。

names = ['Region Avg IOU', 'Class', 'Obj', 'No Obj', 'Avg Recall', 'count']
result = pd.read_csv(data_path, skiprows=[x for x in range(lines) if
                                          (x < lines * 1.0 / ((end_ite - start_ite) * 1.0) * igore or x % step != 0)] \
                     , error_bad_lines=False, names=names)
result.head()

result['Region Avg IOU'] = result['Region Avg IOU'].str.split(': ').str.get(1)
result['Class'] = result['Class'].str.split(': ').str.get(1)
result['Obj'] = result['Obj'].str.split(': ').str.get(1)
result['No Obj'] = result['No Obj'].str.split(': ').str.get(1)
result['Avg Recall'] = result['Avg Recall'].str.split(': ').str.get(1)
result['count'] = result['count'].str.split(': ').str.get(1)
result.head()
result.tail()

# print(result.head())
# print(result.tail())
# print(result.dtypes)
print(result['Region Avg IOU'])

result['Region Avg IOU'] = pd.to_numeric(result['Region Avg IOU'])
result['Class'] = pd.to_numeric(result['Class'])
result['Obj'] = pd.to_numeric(result['Obj'])
result['No Obj'] = pd.to_numeric(result['No Obj'])
result['Avg Recall'] = pd.to_numeric(result['Avg Recall'])
result['count'] = pd.to_numeric(result['count'])
result.dtypes

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(result['Region Avg IOU'].values, label='Region Avg IOU')
# ax.plot(result['Class'].values,label='Class')
# ax.plot(result['Obj'].values,label='Obj')
# ax.plot(result['No Obj'].values,label='No Obj')
# ax.plot(result['Avg Recall'].values,label='Avg Recall')
# ax.plot(result['count'].values,label='count')
ax.legend(loc='best')
# ax.set_title('The Region Avg IOU curves')
ax.set_title('The Region Avg IOU curves')
ax.set_xlabel('batches')
# fig.savefig('Avg IOU')
fig.savefig('Region Avg IOU')

train_loss_visualization.py

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# %matplotlib inline

lines = 5124  # 改为自己生成的train_log_loss.txt中的行数
result = pd.read_csv('train_log_loss.txt', skiprows=[x for x in range(lines) if ((x % 10 != 9) | (x < 1000))],
                     error_bad_lines=False, names=['loss', 'avg', 'rate', 'seconds', 'images'])
result.head()

result['loss'] = result['loss'].str.split(' ').str.get(1)
result['avg'] = result['avg'].str.split(' ').str.get(1)
result['rate'] = result['rate'].str.split(' ').str.get(1)
result['seconds'] = result['seconds'].str.split(' ').str.get(1)
result['images'] = result['images'].str.split(' ').str.get(1)
result.head()
result.tail()

# print(result.head())
# print(result.tail())
# print(result.dtypes)

print(result['loss'])
print(result['avg'])
print(result['rate'])
print(result['seconds'])
print(result['images'])

result['loss'] = pd.to_numeric(result['loss'])
result['avg'] = pd.to_numeric(result['avg'])
result['rate'] = pd.to_numeric(result['rate'])
result['seconds'] = pd.to_numeric(result['seconds'])
result['images'] = pd.to_numeric(result['images'])
result.dtypes

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(result['avg'].values, label='avg_loss')
# ax.plot(result['loss'].values,label='loss')
ax.legend(loc='best')  # 图列自适应位置
ax.set_title('The loss curves')
ax.set_xlabel('batches')
fig.savefig('avg_loss')
# fig.savefig('loss')

相关文章

网友评论

      本文标题:darknet框架下yolov3-tiny训练车辆数据集

      本文链接:https://www.haomeiwen.com/subject/suwynhtx.html