美文网首页
darknet框架下yolov3-tiny训练车辆数据集

darknet框架下yolov3-tiny训练车辆数据集

作者: 奋斗_登 | 来源:发表于2020-05-22 16:40 被阅读0次

    主机环境:Ubuntu 18.04.4 LTS,GPU : GeForce GTX 1660

    1. darknet安装

    下载源码

    git clone https://github.com/pjreddie/darknet
    

    修改Makefile

    GPU=1 #启用gpu 
    CUDNN=1
    OPENCV=1
    OPENMP=0
    DEBUG=1
    
    eg: Makefile

    编译安装:

    cd darknet 
    make
    

    2. 目标检测测试

    下载预训练模型

    mkdir model 
    cd model
    wget https://pjreddie.com/media/files/yolov3-tiny.weights
    

    测试

    #测试图片
    ./darknet detect cfg/yolov3-tiny.cfg model/yolov3-tiny.weights data/dog.jpg
    #测试实时视频
    ./darknet detector demo cfg/voc.data cfg/yolov3-tiny.cfg model/yolov3-tiny.weights
    

    3. 制作车辆数据集

    在darknet根目录下创建以下文件夹

    cd scripts
    mkdir  car && cd car
    mkdir Annotations && mkdir ImageSets && mkdir JPEGImages && mkdir labels
    

    准备车辆图片(分为三个部分)

    1.DETRAC(车辆检测和跟踪的大规模数据集)

    数据集主要拍摄于北京和天津的道路过街天桥(京津冀场景有福了),并 手动标注了 8250 个车辆 和 121万目标对象外框

    图片:2755
    car: 17609个区域,2725个图片
    bus: 1144个区域, 873个图片
    truck:0个区域 0个图片
    挑选数据代码&转换标记文件

    import xml.etree.ElementTree as ET
    from xml.dom.minidom import Document
    import os
    import cv2
    import time
    from shutil import copyfile
    
    
    def ConvertVOCXml(file_path="", file_name=""):
        tree = ET.parse(file_name)
        root = tree.getroot()
        src_img_basepath = "D:\\UA-DETRAC(车辆检测数据集8250车辆)\\Insight-MVT_Annotation_Train\\"
        num = 0  # 计数
        # 读xml操作
    
        for child in root:
    
            if (child.tag == "frame"):
                if int(child.attrib["num"]) % 30 != 1:
                    continue
                # 创建dom文档
                doc = Document()
                # 创建根节点
                annotation = doc.createElement('annotation')
                # 根节点插入dom树
                doc.appendChild(annotation)
    
                # print(child.tag, child.attrib["num"])
                pic_id = child.attrib["num"].zfill(5)
                # print(pic_id)
                output_name = root.attrib["name"] + "__img" + pic_id
                output_file_name = output_name + ".xml"
    
                folder = doc.createElement("folder")
                folder.appendChild(doc.createTextNode("VOC2007"))
                annotation.appendChild(folder)
    
                filename = doc.createElement("filename")
                pic_name = output_name + ".jpg"
                filename.appendChild(doc.createTextNode(pic_name))
                annotation.appendChild(filename)
    
                filepath = doc.createElement("path")
                annotation.appendChild(filepath)
    
                source = doc.createElement("source")
                source_database = doc.createElement("database")
                source_database.appendChild(doc.createTextNode("Unknown"))
                source.appendChild(source_database)
                annotation.appendChild(source)
    
                sizeimage = doc.createElement("size")
                imagewidth = doc.createElement("width")
                imageheight = doc.createElement("height")
                imagedepth = doc.createElement("depth")
    
                imagewidth.appendChild(doc.createTextNode("960"))
                imageheight.appendChild(doc.createTextNode("540"))
                imagedepth.appendChild(doc.createTextNode("3"))
    
                sizeimage.appendChild(imagewidth)
                sizeimage.appendChild(imageheight)
                sizeimage.appendChild(imagedepth)
                annotation.appendChild(sizeimage)
    
                segmented = doc.createElement("segmented")
                segmented.appendChild(doc.createTextNode("0"))
                annotation.appendChild(segmented)
    
                target_list = child.getchildren()[0]  # 获取target_list
                for target in target_list:
                    if (target.tag == "target" and target.getchildren()[1].attrib["vehicle_type"] in ["car", "bus"]):
                        object = doc.createElement('object')
                        bndbox = doc.createElement("bndbox")
    
                        for target_child in target:
                            if (target_child.tag == "box"):
                                xmin = doc.createElement("xmin")
                                ymin = doc.createElement("ymin")
                                xmax = doc.createElement("xmax")
                                ymax = doc.createElement("ymax")
                                xmin_value = int(float(target_child.attrib["left"]))
                                ymin_value = int(float(target_child.attrib["top"]))
                                box_width_value = int(float(target_child.attrib["width"]))
                                box_height_value = int(float(target_child.attrib["height"]))
                                xmin.appendChild(doc.createTextNode(str(xmin_value)))
                                ymin.appendChild(doc.createTextNode(str(ymin_value)))
                                if (xmin_value + box_width_value > 960):
                                    xmax.appendChild(doc.createTextNode(str(960)))
                                else:
                                    xmax.appendChild(doc.createTextNode(str(xmin_value + box_width_value)))
                                if (ymin_value + box_height_value > 540):
                                    ymax.appendChild(doc.createTextNode(str(540)))
                                else:
                                    ymax.appendChild(doc.createTextNode(str(ymin_value + box_height_value)))
    
                            if (target_child.tag == "attribute"):
                                name = doc.createElement('name')
                                pose = doc.createElement('pose')
                                truncated = doc.createElement('truncated')
                                difficult = doc.createElement('difficult')
    
                                name.appendChild(doc.createTextNode(target_child.attrib["vehicle_type"]))
                                pose.appendChild(doc.createTextNode("Unspecified"))  # 随意指定
                                truncated.appendChild(doc.createTextNode("0"))  # 随意指定
                                difficult.appendChild(doc.createTextNode("0"))  # 随意指定
    
                                object.appendChild(name)
                                object.appendChild(pose)
                                object.appendChild(truncated)
                                object.appendChild(difficult)
    
                        bndbox.appendChild(xmin)
                        bndbox.appendChild(ymin)
                        bndbox.appendChild(xmax)
                        bndbox.appendChild(ymax)
                        object.appendChild(bndbox)
                        annotation.appendChild(object)
    
                file_path_out = os.path.join(file_path, output_file_name)
                f = open(file_path_out, 'w')
                f.write(doc.toprettyxml(indent=' ' * 4))
                f.close()
                copyfile(src_img_basepath + root.attrib["name"] + "\\img" + pic_id + ".jpg", file_path + "\\" + pic_name)
                num = num + 1
        return num
    
    
    '''
    画方框
    '''
    
    
    def bboxes_draw_on_img(img, bbox, color=[255, 0, 0], thickness=2):
        # Draw bounding box...
        print(bbox)
        p1 = (int(float(bbox["xmin"])), int(float(bbox["ymin"])))
        p2 = (int(float(bbox["xmax"])), int(float(bbox["ymax"])))
        cv2.rectangle(img, p1, p2, color, thickness)
    
    
    def visualization_image(image_name, xml_file_name):
        tree = ET.parse(xml_file_name)
        root = tree.getroot()
    
        object_lists = []
        for child in root:
            if (child.tag == "folder"):
                print(child.tag, child.text)
            elif (child.tag == "filename"):
                print(child.tag, child.text)
            elif (child.tag == "size"):  # 解析size
                for size_child in child:
                    if (size_child.tag == "width"):
                        print(size_child.tag, size_child.text)
                    elif (size_child.tag == "height"):
                        print(size_child.tag, size_child.text)
                    elif (size_child.tag == "depth"):
                        print(size_child.tag, size_child.text)
            elif (child.tag == "object"):  # 解析object
                singleObject = {}
                for object_child in child:
                    if (object_child.tag == "name"):
                        # print(object_child.tag,object_child.text)
                        singleObject["name"] = object_child.text
                    elif (object_child.tag == "bndbox"):
                        for bndbox_child in object_child:
                            if (bndbox_child.tag == "xmin"):
                                singleObject["xmin"] = bndbox_child.text
                                # print(bndbox_child.tag, bndbox_child.text)
                            elif (bndbox_child.tag == "ymin"):
                                # print(bndbox_child.tag, bndbox_child.text)
                                singleObject["ymin"] = bndbox_child.text
                            elif (bndbox_child.tag == "xmax"):
                                singleObject["xmax"] = bndbox_child.text
                            elif (bndbox_child.tag == "ymax"):
                                singleObject["ymax"] = bndbox_child.text
                object_length = len(singleObject)
                if (object_length > 0):
                    object_lists.append(singleObject)
        img = cv2.imread(image_name)
        for object_coordinate in object_lists:
            bboxes_draw_on_img(img, object_coordinate)
        cv2.imshow("capture", img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
    
    
    if (__name__ == "__main__"):
        basePath = "D:\\UA-DETRAC(车辆检测数据集8250车辆)\\DETRAC-Train-Annotations-XML"
        totalxml = os.listdir(basePath)
        total_num = 0
        flag = False
        print("正在转换")
        saveBasePath = "xml_test"
        if os.path.exists(saveBasePath) == False:  # 判断文件夹是否存在
            os.makedirs(saveBasePath)
        # Start time
        start = time.time()
        log = open("xml_statistical.txt", "w")  # 分析日志,进行排错
        for xml in totalxml:
            file_name = os.path.join(basePath, xml)
            print(file_name)
            num = ConvertVOCXml(file_path=saveBasePath, file_name=file_name)
            print(num)
            total_num = total_num + num
            log.write(file_name + " " + str(num) + "\n")
            #break;
        # End time
        end = time.time()
        seconds = end - start
        print("Time taken : {0} seconds".format(seconds))
        print(total_num)
        log.write(str(total_num) + "\n")
        #visualization_image(saveBasePath + "/MVI_20011__img00581.jpg", "xml_test/MVI_20011__img00581.xml")
    

    2.bdd100k(伯克利大学AI实验室(BAIR)发布了目前最大规模、内容最具多样性的公开驾驶数据集BDD100K)

    图片:2301个
    car: 23440个区域 2294个图片
    bus: 873个区域 676个图片
    truck:2198个区域 1361个图片
    挑选2301个参考代码

    import random
    import os
    import xml.etree.ElementTree as ET
    
    path = "D:\\AI\\dataset\\bdd100k\\bdd100k\\Annotations"
    xmls = []
    for x in os.listdir(path):
        if x.endswith('xml'):
            xmls.append(x)
    selected_xmls = random.sample(xmls, k=10000)
    print(len(selected_xmls), selected_xmls[0], selected_xmls[200])
    
    car_num = 0
    bus_num = 0
    truck_num = 0
    car_jpg_num = 0
    bus_jpg_num = 0
    truck_jpg_num = 0
    i = 0
    xml_list = [];
    for xml in selected_xmls:
        src = os.path.join(path, xml)
        tree = ET.parse(src)
        root = tree.getroot()
        effect_node_num = 0
        _car_num = 0
        _bus_num = 0
        _truck_num = 0
        i += 1
        for child in root:
            if (child.tag == "object"):
                effect_node_num += 1
                temp_text = child.getchildren()[0].text
                if temp_text == "car":
                    _car_num += 1
                elif temp_text == "bus":
                    _bus_num += 1
                elif temp_text == "truck":
                    _truck_num += 1
    
        xml_count = len(xml_list)
        if xml_count <= 1500:
            if (_truck_num > 0 or _bus_num > 0):
                xml_list.append(xml)
                car_num += _car_num
                bus_num += _bus_num
                truck_num += _truck_num
                if _car_num > 0:
                    car_jpg_num += 1
                if _bus_num > 0:
                    bus_jpg_num += 1
                if _truck_num > 0:
                    truck_jpg_num += 1
        elif xml_count <= 2300:
            xml_list.append(xml)
            car_num += _car_num
            bus_num += _bus_num
            truck_num += _truck_num
            if _car_num > 0:
                car_jpg_num += 1
            if _bus_num > 0:
                bus_jpg_num += 1
            if _truck_num > 0:
                truck_jpg_num += 1
        else:
            break;
        print (i, xml_count, car_num, car_jpg_num, bus_num, bus_jpg_num, truck_num, truck_jpg_num)
    print ("over", car_num, car_jpg_num, bus_num, bus_jpg_num, truck_num, truck_jpg_num)
    print (xml_list)
    #复制文件
    from shutil import copyfile
    
    i = 0
    xml_base_path = "D:\\AI\\dataset\\bdd100k\\bdd100k\\Annotations\\"
    img_base_path = "D:\\AI\\dataset\\bdd100k\\bdd100k\\targetimg\\"
    for xml in xml_list:
        i += 1
        print (i, xml_base_path + xml, img_base_path + xml.replace(".xml", ".jpg"))
        copyfile(xml_base_path + xml, "D:\\AI\\darknet\\scripts\\DETRAC\\Annotations\\" + xml)
        copyfile(img_base_path + xml.replace(".xml", ".jpg"),
                 "D:\\AI\\darknet\\scripts\\DETRAC\\JPEGImages\\" + xml.replace(".xml", ".jpg"))
    
    

    3.自己标记(labelImg标记)

    图片:372 个
    car: 677个区域 360个图片
    bus: 19个区域 19个图片
    truck:10个区域 10个图片


    合计:

    图片:5428个
    car: 41726个区域 5379个图片
    bus: 2036个区域 1568个图片
    truck:2208个区域 1371个图片

    将以上3处的数据中的图片和xml分别复制到JPEGImages和Annotations文件夹下

    4. 指定训练和测试数据集

    我们做好的数据集要一部分作为训练集来训练模型,需要另一部分作为测试集来帮助我们验证模型的可靠性.因此首先要将所有的图像文件随机分配为训练集和测试集.
    首先切换到ImageSets目录中,新建Main目录,然后在Main目录中新建两个文本文档train.txt和val.txt.分别用于存放训练集的文件名列表和测试集的文件名列表.

    cd ImageSets 
    mkdir -p Main && cd Main
    touch train.txt val.txt 
    

    85%的数据做为训练,15%做为测试数据

    import os
    from os import listdir, getcwd
    from os.path import join
    
    if __name__ == '__main__':
        source_folder = 'DETRAC/JPEGImages/'  # 修改为自己的路径
        dest = 'DETRAC/ImageSets/Main/train.txt'  # 修改为自己的路径
        dest2 = 'DETRAC/ImageSets/Main/val.txt'  # 修改为自己的路径
        file_list = os.listdir(source_folder)
        train_file = open(dest, 'a')
        val_file = open(dest2, 'a')
        count = 0
        for file_obj in file_list:
            count += 1
            file_name, file_extend = os.path.splitext(file_obj)
            if (count < 4614):  # 可以修改这个数字,这个数字用来控制训练集合验证集的分割情况
                train_file.write(file_name + '\n')
            elif(count < 10000):
                val_file.write(file_name + '\n')
        train_file.close()
        val_file.close()
    
    执行后train.txt和val.txt文件如下所示

    5.执行训练集和测试集的路径和文件标签

    修改script文件夹根目录下的voc_label.py文件
    sets=['train', 'val']
    classes = ["car", "bus", "truck"]

    import xml.etree.ElementTree as ET
    import pickle
    import os
    from os import listdir, getcwd
    from os.path import join
    
    sets=['train', 'val']
    
    classes = ["car", "bus", "truck"]
    
    
    def convert(size, box):
        dw = 1./(size[0])
        dh = 1./(size[1])
        x = (box[0] + box[1])/2.0 - 1
        y = (box[2] + box[3])/2.0 - 1
        w = box[1] - box[0]
        h = box[3] - box[2]
        x = x*dw
        w = w*dw
        y = y*dh
        h = h*dh
        return (x,y,w,h)
    
    def convert_annotation(image_id):
        in_file = open('car_20200512/Annotations/%s.xml'%(image_id))
        out_file = open('car_20200512/labels/%s.txt'%(image_id), 'w')
        tree=ET.parse(in_file)
        root = tree.getroot()
        size = root.find('size')
        w = int(size.find('width').text)
        h = int(size.find('height').text)
    
        for obj in root.iter('object'):
            difficult_obj = obj.find('difficult')
            if difficult_obj == None:
                difficult_obj = obj.find('Difficult')
            difficult = difficult_obj.text
            cls = obj.find('name').text
            if cls not in classes or int(difficult)==1:
                continue
            cls_id = classes.index(cls)
            xmlbox = obj.find('bndbox')
            b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
            bb = convert((w,h), b)
            out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
    
    wd = getcwd()
    
    for  image_set in sets:
        image_ids = open('car_20200512/ImageSets/Main/%s.txt'%(image_set)).read().strip().split()
        list_file = open('car_20200512_%s.txt'%(image_set), 'w')
        for image_id in image_ids:
            print(image_id)
            list_file.write('%s/car_20200512/JPEGImages/%s.jpg\n'%(wd, image_id))
            convert_annotation(image_id)
        list_file.close()
    
    os.system("cat car_train.txt car_val.txt > car_hjtrain.txt")
    #os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt")
    
    执行后labels下文件内容如下: scripts/DETRAC_train.txt、scripts/DETRAC_val.txt内容如下:

    6.修改配置文件

    6.1 修改data/car.names
    car
    bus
    truck
    
    6.2 修改cfg/car.data
    classes= 3 #三种类型
    train  = /data/project/darknet/scripts/car_train.txt
    valid  = /data/project/darknet/scripts/car_val.txt
    names = data/car.names
    backup = car_backup
    
    6.3 修改yolov3-tiny_car.cfg文件

    有两处需要修改classes和filters,
    将classes 修改为3,因为我们只有3类;将卷积层数修改为24


    [convolutional]
    size=1
    stride=1
    pad=1
    filters=24
    activation=linear
    
    [yolo]
    mask = 3,4,5
    anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
    classes=3
    num=6
    jitter=.3
    ignore_thresh = .7
    truth_thresh = 1
    random=1
    

    文件上方Testing下两行的batch=1,subdivisions=1全部注释掉,将# Training下两行的batch=64,subdivisions=16全部取消注释.

    [net]
    # Testing
    #batch=1
    #subdivisions=1
    # Training
    batch=64
    subdivisions=16
    width=416
    height=416
    channels=3
    momentum=0.9
    decay=0.0005
    angle=0
    saturation = 1.5
    exposure = 1.5
    hue=.1
    

    7. 训练

    #生成预训练模型
    ./darknet partial cfg/yolov3-tiny_car.cfg modle/yolov3-tiny.weights model/yolov3-tiny.conv.15 15
    #开始训练
    ./darknet detector train cfg/car.data cfg/yolov3-tiny_car.cfg model/yolov3-tiny.conv.15 | tee car_train_log.txt
    

    保存log时会生成两个文件,一个保存的是网络加载信息和checkout点保存信息,另一个保存的是训练信息
    训练耗时较长,查看log当loss较小,且不再发生变化时,可按"ctrl+c"终止训练

    8. 计算map

    注意:删除annots.pkl

    8.1 运行detector valid命令,进行测试
    ./darknet detector valid cfg/ca.data cfg/yolov3-tiny_car.cfg car_backup/yolov3-tiny_car_130000.weights -thresh 0.5 -out ""
    
    运行结束后,在results文件夹下生成3个.txt文件
    8.2 创建voc_eval.py
    # --------------------------------------------------------
    # Fast/er R-CNN
    # Licensed under The MIT License [see LICENSE for details]
    # Written by Bharath Hariharan
    # --------------------------------------------------------
    
    import xml.etree.ElementTree as ET
    import os
    import pickle
    import numpy as np
    
    def parse_rec(filename):
        """ Parse a PASCAL VOC xml file """
        tree = ET.parse(filename)
        objects = []
        for obj in tree.findall('object'):
            obj_struct = {}
            obj_struct['name'] = obj.find('name').text
            obj_struct['pose'] = obj.find('pose').text
            obj_struct['truncated'] = int(obj.find('truncated').text)
            diffiult_obj = obj.find('difficult');
            if diffiult_obj==None:
                diffiult_obj = obj.find('Difficult');
            obj_struct['difficult'] = int(diffiult_obj.text)
            bbox = obj.find('bndbox')
            obj_struct['bbox'] = [int(bbox.find('xmin').text),
                                  int(bbox.find('ymin').text),
                                  int(bbox.find('xmax').text),
                                  int(bbox.find('ymax').text)]
            objects.append(obj_struct)
    
        return objects
    
    def voc_ap(rec, prec, use_07_metric=False):
        """ ap = voc_ap(rec, prec, [use_07_metric])
        Compute VOC AP given precision and recall.
        If use_07_metric is true, uses the
        VOC 07 11 point method (default:False).
        """
        if use_07_metric:
            # 11 point metric
            ap = 0.
            for t in np.arange(0., 1.1, 0.1):
                if np.sum(rec >= t) == 0:
                    p = 0
                else:
                    p = np.max(prec[rec >= t])
                ap = ap + p / 11.
        else:
            # correct AP calculation
            # first append sentinel values at the end
            mrec = np.concatenate(([0.], rec, [1.]))
            mpre = np.concatenate(([0.], prec, [0.]))
    
            # compute the precision envelope
            for i in range(mpre.size - 1, 0, -1):
                mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
    
            # to calculate area under PR curve, look for points
            # where X axis (recall) changes value
            i = np.where(mrec[1:] != mrec[:-1])[0]
    
            # and sum (\Delta recall) * prec
            ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
        return ap
    
    def voc_eval(detpath,
                 annopath,
                 imagesetfile,
                 classname,
                 cachedir,
                 ovthresh=0.5,
                 use_07_metric=False):
        """rec, prec, ap = voc_eval(detpath,
                                    annopath,
                                    imagesetfile,
                                    classname,
                                    [ovthresh],
                                    [use_07_metric])
        Top level function that does the PASCAL VOC evaluation.
        detpath: Path to detections
            detpath.format(classname) should produce the detection results file.
        annopath: Path to annotations
            annopath.format(imagename) should be the xml annotations file.
        imagesetfile: Text file containing the list of images, one image per line.
        classname: Category name (duh)
        cachedir: Directory for caching the annotations
        [ovthresh]: Overlap threshold (default = 0.5)
        [use_07_metric]: Whether to use VOC07's 11 point AP computation
            (default False)
        """
        # assumes detections are in detpath.format(classname)
        # assumes annotations are in annopath.format(imagename)
        # assumes imagesetfile is a text file with each line an image name
        # cachedir caches the annotations in a pickle file
    
        # first load gt
        if not os.path.isdir(cachedir):
            os.mkdir(cachedir)
        cachefile = os.path.join(cachedir, 'annots.pkl')
        # read list of images
        with open(imagesetfile, 'r') as f:
            lines = f.readlines()
        imagenames = [x.strip() for x in lines]
    
        if not os.path.isfile(cachefile):
            # load annots
            recs = {}
            for i, imagename in enumerate(imagenames):
                recs[imagename] = parse_rec(annopath.format(imagename))
                if i % 100 == 0:
                    print ('Reading annotation for {:d}/{:d}'.format(
                        i + 1, len(imagenames)))
            # save
            print ('Saving cached annotations to {:s}'.format(cachefile))
            with open(cachefile, 'wb') as f:
                #pickle.dump(recs, f)
                #str(pickle.dump(recs, f), encoding="utf-8")
                pickle.dump(recs, f)
    
        else:
            # load
            with open(cachefile, 'rb') as f:
                recs = pickle.load(f)
        # extract gt objects for this class
        class_recs = {}
        npos = 0
        for imagename in imagenames:
            R = [obj for obj in recs[imagename] if obj['name'] == classname]
            bbox = np.array([x['bbox'] for x in R])
            difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
    
            det = [False] * len(R)
            npos = npos + sum(~difficult)
            class_recs[imagename] = {'bbox': bbox,
                                     'difficult': difficult,
                                     'det': det}
    
        # read dets
        detfile = detpath.format(classname)
        with open(detfile, 'r') as f:
            lines = f.readlines()
    
        splitlines = [x.strip().split(' ') for x in lines]
        image_ids = [x[0] for x in splitlines]
        confidence = np.array([float(x[1]) for x in splitlines])
        BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
    
        # sort by confidence
        sorted_ind = np.argsort(-confidence)
        sorted_scores = np.sort(-confidence)
        BB = BB[sorted_ind, :]
        image_ids = [image_ids[x] for x in sorted_ind]
    
        # go down dets and mark TPs and FPs
        nd = len(image_ids)
        tp = np.zeros(nd)
        fp = np.zeros(nd)
        for d in range(nd):
            R = class_recs[image_ids[d]]
            bb = BB[d, :].astype(float)
            ovmax = -np.inf
            BBGT = R['bbox'].astype(float)
    
            if BBGT.size > 0:
                # compute overlaps
                # intersection
                ixmin = np.maximum(BBGT[:, 0], bb[0])
                iymin = np.maximum(BBGT[:, 1], bb[1])
                ixmax = np.minimum(BBGT[:, 2], bb[2])
                iymax = np.minimum(BBGT[:, 3], bb[3])
                iw = np.maximum(ixmax - ixmin + 1., 0.)
                ih = np.maximum(iymax - iymin + 1., 0.)
                inters = iw * ih
    
                # union
                uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                       (BBGT[:, 2] - BBGT[:, 0] + 1.) *
                       (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
    
                overlaps = inters / uni
                ovmax = np.max(overlaps)
                jmax = np.argmax(overlaps)
    
            if ovmax > ovthresh:
                if not R['difficult'][jmax]:
                    if not R['det'][jmax]:
                        tp[d] = 1.
                        R['det'][jmax] = 1
                    else:
                        fp[d] = 1.
            else:
                fp[d] = 1.
    
        # compute precision recall
        fp = np.cumsum(fp)
        tp = np.cumsum(tp)
        rec = tp / float(npos)
        # avoid divide by zero in case the first detection matches a difficult
        # ground truth
        prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
        ap = voc_ap(rec, prec, use_07_metric)
    
        return rec, prec, ap
    
    8.3 创建 computer_Single_ALL_mAP.py
    from voc_eval import voc_eval
    
    import os
    
    current_path = os.getcwd()
    results_path = current_path+"/results"
    sub_files = os.listdir(results_path)
    
    mAP = []
    for i in range(len(sub_files)):
        class_name = sub_files[i].split(".txt")[0]
        rec, prec, ap = voc_eval('results/{}.txt', 'scripts/DETRAC/Annotations/{}.xml', 'scripts/DETRAC/ImageSets/Main/val.txt', class_name, '.')
        print("{} :\t {} ".format(class_name, ap))
        mAP.append(ap)
    
    mAP = tuple(mAP)
    
    print("***************************")
    print("mAP :\t {}".format( float( sum(mAP)/len(mAP)) ))
    

    9. iou、loss可视化

    extract_log.py

    # coding=utf-8
    # 该文件用来提取训练log,去除不可解析的log后使log文件格式化,生成新的log文件供可视化工具绘图
    
    import inspect
    import os
    import random
    import sys
    
    
    def extract_log(log_file, new_log_file, key_word):
        with open(log_file, 'r') as f:
            with open(new_log_file, 'w') as train_log:
                # f = open(log_file)
                # train_log = open(new_log_file, 'w')
                for line in f:
                    # 去除多gpu的同步log
                    if 'Syncing' in line:
                        continue
                    # 去除除零错误的log
                    if 'nan' in line:
                        continue
                    if key_word in line:
                        train_log.write(line)
        f.close()
        train_log.close()
    
    
    extract_log('DETRAC_train_log.txt', 'train_log_loss.txt', 'images')
    extract_log('DETRAC_train_log.txt', 'train_log_iou.txt', 'IOU')
    

    train_iou_visualization.py

    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    
    # %matplotlib inline
    
    # lines = 122956  # 根据train_log_iou.txt的行数修改
    # result = pd.read_csv('train_log_iou.txt', skiprows=[x for x in range(lines) if (x % 10 == 0 or x % 10 == 9)],
    #                      error_bad_lines=False, names=['Region Avg IOU', 'Class', 'Obj', 'No Obj', 'Avg Recall', 'count'])
    # result = pd.read_csv('train_log_iou.txt', skiprows=[],
    #                      error_bad_lines=False, names=['Region Avg IOU', 'Class', 'Obj', 'No Obj', 'Avg Recall', 'count'])
    
    # 根据log_iou修改行数
    lines = 613980
    step = 5000
    start_ite = 0
    end_ite = 50200
    igore = 1000
    data_path = 'train_log_iou.txt'  # log_loss的路径。
    
    names = ['Region Avg IOU', 'Class', 'Obj', 'No Obj', 'Avg Recall', 'count']
    result = pd.read_csv(data_path, skiprows=[x for x in range(lines) if
                                              (x < lines * 1.0 / ((end_ite - start_ite) * 1.0) * igore or x % step != 0)] \
                         , error_bad_lines=False, names=names)
    result.head()
    
    result['Region Avg IOU'] = result['Region Avg IOU'].str.split(': ').str.get(1)
    result['Class'] = result['Class'].str.split(': ').str.get(1)
    result['Obj'] = result['Obj'].str.split(': ').str.get(1)
    result['No Obj'] = result['No Obj'].str.split(': ').str.get(1)
    result['Avg Recall'] = result['Avg Recall'].str.split(': ').str.get(1)
    result['count'] = result['count'].str.split(': ').str.get(1)
    result.head()
    result.tail()
    
    # print(result.head())
    # print(result.tail())
    # print(result.dtypes)
    print(result['Region Avg IOU'])
    
    result['Region Avg IOU'] = pd.to_numeric(result['Region Avg IOU'])
    result['Class'] = pd.to_numeric(result['Class'])
    result['Obj'] = pd.to_numeric(result['Obj'])
    result['No Obj'] = pd.to_numeric(result['No Obj'])
    result['Avg Recall'] = pd.to_numeric(result['Avg Recall'])
    result['count'] = pd.to_numeric(result['count'])
    result.dtypes
    
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.plot(result['Region Avg IOU'].values, label='Region Avg IOU')
    # ax.plot(result['Class'].values,label='Class')
    # ax.plot(result['Obj'].values,label='Obj')
    # ax.plot(result['No Obj'].values,label='No Obj')
    # ax.plot(result['Avg Recall'].values,label='Avg Recall')
    # ax.plot(result['count'].values,label='count')
    ax.legend(loc='best')
    # ax.set_title('The Region Avg IOU curves')
    ax.set_title('The Region Avg IOU curves')
    ax.set_xlabel('batches')
    # fig.savefig('Avg IOU')
    fig.savefig('Region Avg IOU')
    

    train_loss_visualization.py

    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    
    # %matplotlib inline
    
    lines = 5124  # 改为自己生成的train_log_loss.txt中的行数
    result = pd.read_csv('train_log_loss.txt', skiprows=[x for x in range(lines) if ((x % 10 != 9) | (x < 1000))],
                         error_bad_lines=False, names=['loss', 'avg', 'rate', 'seconds', 'images'])
    result.head()
    
    result['loss'] = result['loss'].str.split(' ').str.get(1)
    result['avg'] = result['avg'].str.split(' ').str.get(1)
    result['rate'] = result['rate'].str.split(' ').str.get(1)
    result['seconds'] = result['seconds'].str.split(' ').str.get(1)
    result['images'] = result['images'].str.split(' ').str.get(1)
    result.head()
    result.tail()
    
    # print(result.head())
    # print(result.tail())
    # print(result.dtypes)
    
    print(result['loss'])
    print(result['avg'])
    print(result['rate'])
    print(result['seconds'])
    print(result['images'])
    
    result['loss'] = pd.to_numeric(result['loss'])
    result['avg'] = pd.to_numeric(result['avg'])
    result['rate'] = pd.to_numeric(result['rate'])
    result['seconds'] = pd.to_numeric(result['seconds'])
    result['images'] = pd.to_numeric(result['images'])
    result.dtypes
    
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.plot(result['avg'].values, label='avg_loss')
    # ax.plot(result['loss'].values,label='loss')
    ax.legend(loc='best')  # 图列自适应位置
    ax.set_title('The loss curves')
    ax.set_xlabel('batches')
    fig.savefig('avg_loss')
    # fig.savefig('loss')
    

    相关文章

      网友评论

          本文标题:darknet框架下yolov3-tiny训练车辆数据集

          本文链接:https://www.haomeiwen.com/subject/suwynhtx.html