目标
关于COCO的注解文件的结构可以参考(https://blog.csdn.net/yeyang911/article/details/78675942
)。本文的目的是将COCO中每张图片注解中的bbox信息转为.txt文件。每一张图片对应一个txt。下图的右边txt文件就是我们想要的结果,每一行代表一个box,5维向量分别为(label,cen_x,ceny,width,height)。
转化脚本
from pycocotools.coco import COCO
import numpy as np
import tqdm
import argparse
def arg_parser():
parser = argparse.ArgumentParser('code by rbj')
parser.add_argument('--annotation_path', type=str,
default='data/coco/annotations/instances_train2017.json')
#生成的txt文件保存的目录
parser.add_argument('--save_base_path', type=str, default='data/coco/labels/train2017/')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = arg_parser()
annotation_path = args.annotation_path
save_base_path = args.save_base_path
data_source = COCO(annotation_file=annotation_path)
catIds = data_source.getCatIds()
categories = data_source.loadCats(catIds)
categories.sort(key=lambda x: x['id'])
classes = {}
coco_labels = {}
coco_labels_inverse = {}
for c in categories:
coco_labels[len(classes)] = c['id']
coco_labels_inverse[c['id']] = len(classes)
classes[c['name']] = len(classes)
img_ids = data_source.getImgIds()
for index, img_id in tqdm.tqdm(enumerate(img_ids), desc='change .json file to .txt file'):
img_info = data_source.loadImgs(img_id)[0]
file_name = img_info['file_name'].split('.')[0]
height = img_info['height']
width = img_info['width']
save_path = save_base_path + file_name + '.txt'
with open(save_path, mode='w') as fp:
annotation_id = data_source.getAnnIds(img_id)
boxes = np.zeros((0, 5))
if len(annotation_id) == 0:
fp.write('')
continue
annotations = data_source.loadAnns(annotation_id)
lines = ''
for annotation in annotations:
box = annotation['bbox']
# some annotations have basically no width / height, skip them
if box[2] < 1 or box[3] < 1:
continue
#top_x,top_y,width,height---->cen_x,cen_y,width,height
box[0] = round((box[0] + box[2] / 2) / width, 6)
box[1] = round((box[1] + box[3] / 2) / height, 6)
box[2] = round(box[2] / width, 6)
box[3] = round(box[3] / height, 6)
label = coco_labels_inverse[annotation['category_id']]
lines = lines + str(label)
for i in box:
lines += ' ' + str(i)
lines += '\n'
fp.writelines(lines)
print('finish')
验证txt文件是否正确
def load_classes(path):
with open(path, "r") as fp:
names = fp.read().split("\n")[:-1]
return names
if __name__ == '__main__':
class_path = 'data/coco/coco.names'
class_list = load_classes(class_path)
img_path = 'data/coco/images/000000581886.jpg'
img = np.array(Image.open(img_path))
H, W, C = img.shape
label_path = 'data/coco/labels/000000581886.txt'
boxes = np.loadtxt(label_path, dtype=np.float).reshape(-1, 5)
# xywh to xxyy
boxes[:, 1] = (boxes[:, 1] - boxes[:, 3] / 2) * W
boxes[:, 2] = (boxes[:, 2] - boxes[:, 4] / 2) * H
boxes[:, 3] *= W
boxes[:, 4] *= H
fig = plt.figure()
ax = fig.subplots(1)
for box in boxes:
bbox = patches.Rectangle((box[1], box[2]), box[3], box[4], linewidth=2,
edgecolor='r', facecolor="none")
label = class_list[int(box[0])]
# Add the bbox to the plot
ax.add_patch(bbox)
# Add label
plt.text(
box[1],
box[2],
s=label,
color="white",
verticalalignment="top",
bbox={"color": 'g', "pad": 0},
)
ax.imshow(img)
plt.show()
如果得到的txt文件,能够正确在图片中绘制出box且label正确的话,则证明转化有效,效果图也就是第一张图片。
注意:COCO的bbox4维坐标的表示为左上角坐标和图片的宽高。
网友评论