DOTA数据集转voc

作者: ForCLovC | 来源:发表于2020-02-19 00:49 被阅读0次

DOTA数据集转voc
PASCAL VOC 数据集格式
【机器学习】目标检测（2）
在本机一键搞定COCO和VOC数据集
RCNN 训练模型
目标检测：YoloV3
Pascal Voc 数据集格式解释
【行人重识别】PaddlePaddle目标检测项目
Tensorflow Pascal VOC数据集转 recor
ncnn上基于Caffe用MobileNet_SSD训练和测试自

1. DOTA数据集的label转成voc能用的xml文件（只需要改动原DOTA的txt标签路径和新储存xml的文件夹路径就可以了）

import os

from xml.dom.minidom import Document

from xml.dom.minidom import parse

import xml.dom.minidom

import numpy as np

import csv

import cv2

import string

def WriterXMLFiles(filename, path, box_list, label_list, w, h, d):

# dict_box[filename]=json_dict[filename]

doc = xml.dom.minidom.Document()

root = doc.createElement('annotation')

doc.appendChild(root)

foldername = doc.createElement("folder")

foldername.appendChild(doc.createTextNode("JPEGImages"))

root.appendChild(foldername)

nodeFilename = doc.createElement('filename')

nodeFilename.appendChild(doc.createTextNode(filename))

root.appendChild(nodeFilename)

pathname = doc.createElement("path")

pathname.appendChild(doc.createTextNode("xxxx"))

root.appendChild(pathname)

sourcename=doc.createElement("source")

databasename = doc.createElement("database")

databasename.appendChild(doc.createTextNode("Unknown"))

sourcename.appendChild(databasename)

annotationname = doc.createElement("annotation")

annotationname.appendChild(doc.createTextNode("xxx"))

sourcename.appendChild(annotationname)

imagename = doc.createElement("image")

imagename.appendChild(doc.createTextNode("xxx"))

sourcename.appendChild(imagename)

flickridname = doc.createElement("flickrid")

flickridname.appendChild(doc.createTextNode("0"))

sourcename.appendChild(flickridname)

root.appendChild(sourcename)

nodesize = doc.createElement('size')

nodewidth = doc.createElement('width')

nodewidth.appendChild(doc.createTextNode(str(w)))

nodesize.appendChild(nodewidth)

nodeheight = doc.createElement('height')

nodeheight.appendChild(doc.createTextNode(str(h)))

nodesize.appendChild(nodeheight)

nodedepth = doc.createElement('depth')

nodedepth.appendChild(doc.createTextNode(str(d)))

nodesize.appendChild(nodedepth)

root.appendChild(nodesize)

segname = doc.createElement("segmented")

segname.appendChild(doc.createTextNode("0"))

root.appendChild(segname)

for (box, label) in zip(box_list, label_list):

nodeobject = doc.createElement('object')

nodename = doc.createElement('name')

nodename.appendChild(doc.createTextNode(str(label)))

nodeobject.appendChild(nodename)

nodebndbox = doc.createElement('bndbox')

nodex1 = doc.createElement('x1')

nodex1.appendChild(doc.createTextNode(str(box[0])))

nodebndbox.appendChild(nodex1)

nodey1 = doc.createElement('y1')

nodey1.appendChild(doc.createTextNode(str(box[1])))

nodebndbox.appendChild(nodey1)

nodex2 = doc.createElement('x2')

nodex2.appendChild(doc.createTextNode(str(box[2])))

nodebndbox.appendChild(nodex2)

nodey2 = doc.createElement('y2')

nodey2.appendChild(doc.createTextNode(str(box[3])))

nodebndbox.appendChild(nodey2)

nodex3 = doc.createElement('x3')

nodex3.appendChild(doc.createTextNode(str(box[4])))

nodebndbox.appendChild(nodex3)

nodey3 = doc.createElement('y3')

nodey3.appendChild(doc.createTextNode(str(box[5])))

nodebndbox.appendChild(nodey3)

nodex4 = doc.createElement('x4')

nodex4.appendChild(doc.createTextNode(str(box[6])))

nodebndbox.appendChild(nodex4)

nodey4 = doc.createElement('y4')

nodey4.appendChild(doc.createTextNode(str(box[7])))

nodebndbox.appendChild(nodey4)

# ang = doc.createElement('angle')

# ang.appendChild(doc.createTextNode(str(angle)))

# nodebndbox.appendChild(ang)

nodeobject.appendChild(nodebndbox)

root.appendChild(nodeobject)

fp = open(path + filename, 'w')

doc.writexml(fp, indent='\n')

fp.close()

def load_annoataion(p):

'''

load annotation from the text file

:param p:

:return:

'''

text_polys = []

text_tags = []

if not os.path.exists(p):

return np.array(text_polys, dtype=np.float32)

with open(p, 'r') as f:

for line in f.readlines()[2:]:

label = 'text'

# strip BOM. \ufeff for python3, \xef\xbb\bf for python2

#line = [i.strip('\ufeff').strip('\xef\xbb\xbf') for i in line]

#print(line)

x1, y1, x2, y2, x3, y3, x4, y4 ,label= line.split(' ')[0:9]

#print(label)

x1 = float(x1)

y1 = float(y1)

x2 = float(x2)

y2 = float(y2)

x3 = float(x3)

y3 = float(y3)

x4 = float(x4)

y4 = float(y4)

text_polys.append([x1, y1, x2, y2, x3, y3, x4, y4])

text_tags.append(label)

return np.array(text_polys, dtype=np.int32), np.array(text_tags, dtype=np.str)

if __name__ == "__main__":

txt_path = './txts-hbb/'

xml_path = './Annotations-hbb/'

img_path = './JEPGImages/'

print(os.path.exists(txt_path))

txts = os.listdir(txt_path)

for count, t in enumerate(txts):

print(count,t)

boxes, labels = load_annoataion(os.path.join(txt_path, t))

#print(boxes,labels)

xml_name = t.replace('.txt', '.xml')

img_name = t.replace('.txt', '.png')

print(img_name)

img = cv2.imread(os.path.join(img_path, img_name))

h, w, d = img.shape

#print(xml_name, xml_path, boxes, labels, w, h, d)

WriterXMLFiles(xml_name, xml_path, boxes, labels, w, h, d)

if count % 1000 == 0:

print(count)

2. 新建VOC的Main文件夹里面的索引（只需要改动路径为原txt标签文件夹）

import os

dir = './txts-hbb'

lis = os.listdir(dir)

for i in range(0,len(lis)):

(shotname,ext) = os.path.splitext(lis[i])

f=open('./train.txt','r+')

f.read()

f.write(shotname+'\n')

f.close()