目标

关于COCO的注解文件的结构可以参考(https://blog.csdn.net/yeyang911/article/details/78675942
)。本文的目的是将COCO中每张图片注解中的bbox信息转为.txt文件。每一张图片对应一个txt。下图的右边txt文件就是我们想要的结果，每一行代表一个box，5维向量分别为(label,cen_x,ceny,width,height)。

最终结果

转化脚本

from pycocotools.coco import COCO
import numpy as np
import tqdm
import argparse


def arg_parser():
    parser = argparse.ArgumentParser('code by rbj')
    parser.add_argument('--annotation_path', type=str,
                        default='data/coco/annotations/instances_train2017.json')
    #生成的txt文件保存的目录
    parser.add_argument('--save_base_path', type=str, default='data/coco/labels/train2017/')
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = arg_parser()
    annotation_path = args.annotation_path
    save_base_path = args.save_base_path

    data_source = COCO(annotation_file=annotation_path)
    catIds = data_source.getCatIds()
    categories = data_source.loadCats(catIds)
    categories.sort(key=lambda x: x['id'])
    classes = {}
    coco_labels = {}
    coco_labels_inverse = {}
    for c in categories:
        coco_labels[len(classes)] = c['id']
        coco_labels_inverse[c['id']] = len(classes)
        classes[c['name']] = len(classes)

    img_ids = data_source.getImgIds()
    for index, img_id in tqdm.tqdm(enumerate(img_ids), desc='change .json file to .txt file'):
        img_info = data_source.loadImgs(img_id)[0]
        file_name = img_info['file_name'].split('.')[0]
        height = img_info['height']
        width = img_info['width']

        save_path = save_base_path + file_name + '.txt'
        with open(save_path, mode='w') as fp:
            annotation_id = data_source.getAnnIds(img_id)
            boxes = np.zeros((0, 5))
            if len(annotation_id) == 0:
                fp.write('')
                continue
            annotations = data_source.loadAnns(annotation_id)
            lines = ''
            for annotation in annotations:
                box = annotation['bbox']
                # some annotations have basically no width / height, skip them
                if box[2] < 1 or box[3] < 1:
                    continue
                #top_x,top_y,width,height---->cen_x,cen_y,width,height
                box[0] = round((box[0] + box[2] / 2) / width, 6)
                box[1] = round((box[1] + box[3] / 2) / height, 6)
                box[2] = round(box[2] / width, 6)
                box[3] = round(box[3] / height, 6)
                label = coco_labels_inverse[annotation['category_id']]
                lines = lines + str(label)
                for i in box:
                    lines += ' ' + str(i)
                lines += '\n'
            fp.writelines(lines)
    print('finish')

验证txt文件是否正确

def load_classes(path):
    with open(path, "r") as fp:
        names = fp.read().split("\n")[:-1]
    return names

if __name__ == '__main__':
    class_path = 'data/coco/coco.names'
    class_list = load_classes(class_path)
    img_path = 'data/coco/images/000000581886.jpg'
    img = np.array(Image.open(img_path))
    H, W, C = img.shape
    label_path = 'data/coco/labels/000000581886.txt'
    boxes = np.loadtxt(label_path, dtype=np.float).reshape(-1, 5)
    # xywh to xxyy
    boxes[:, 1] = (boxes[:, 1] - boxes[:, 3] / 2) * W
    boxes[:, 2] = (boxes[:, 2] - boxes[:, 4] / 2) * H
    boxes[:, 3] *= W
    boxes[:, 4] *= H
    fig = plt.figure()
    ax = fig.subplots(1)
    for box in boxes:
        bbox = patches.Rectangle((box[1], box[2]), box[3], box[4], linewidth=2,
                                 edgecolor='r', facecolor="none")
        label = class_list[int(box[0])]
        # Add the bbox to the plot
        ax.add_patch(bbox)
        # Add label
        plt.text(
            box[1],
            box[2],
            s=label,
            color="white",
            verticalalignment="top",
            bbox={"color": 'g', "pad": 0},
        )
        ax.imshow(img)
    plt.show()

如果得到的txt文件，能够正确在图片中绘制出box且label正确的话，则证明转化有效，效果图也就是第一张图片。
注意：COCO的bbox4维坐标的表示为左上角坐标和图片的宽高。