美文网首页
将自己标注的xml数据转为voc格式

将自己标注的xml数据转为voc格式

作者: 几时见得清梦 | 来源:发表于2019-07-27 16:24 被阅读0次
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
#from PIL import Image
import shutil

save_img_path = '/data_p/image/'
save_txt_path = '/data_p/label_txt/'
save_xml_path = '/data_p/label_xml/'

def delete_error(path_img, path_txt):
    filelist_1 = os.listdir(path_img)
    filelist_2 = os.listdir(path_txt)
    j = 0
    i = 0
    for file1 in filelist_1:
        filename = os.path.splitext(file1)[0]
        findFileName = filename + '.txt'
        if(findFileName not in filelist_2):
            rm_path = path_img + file1
            print(rm_path)
            os.remove(rm_path)
            j = j + 1

    for file2 in filelist_2:
        filename = os.path.splitext(file2)[0]
        findFileName = filename + '.jpg'
        if(findFileName not in filelist_1):
            rm_path = path_txt + file2
            print(rm_path)
            os.remove(rm_path)
            i = i + 1
    print(i, ", ", j)


# classes = ["class"]

def convert_annotation(xml_path, im_name):
    in_file = open(xml_path)
    out_file = open(save_txt_path + im_name + ".txt", 'w')
    tree=ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)
    for obj in root.iter('object'):
        cls = obj.find('name').text
        if cls not in classes: #or int(difficult) == 1:
            if cls == "wrong_class":
                cls = "class"
            else:
                continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('ymin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymax').text))
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in b]) + '\n')
    out_file.close()

def save_train_txt(path_img):
    j = 0
    fp1 = open('/data_p/train_name.txt', 'w')
    fp2 = open('/data_p/val_name.txt', 'w')
    filelist = os.listdir(path_img)
    for file in filelist:
        filename = os.path.splitext(file)[0]

        if(j % 20 == 0):
            fp2.write(filename + '\n')
        else:
            fp1.write(filename + '\n')

        j = j + 1

    fp1.close()
    fp2.close()



j = 1
path1 = "/raw_data/"
filelist1 = os.listdir(path1)
fp = open('/data_p/train.txt', 'w')
for files in filelist1:
    im_path = path1 + '/' + files
    filelist2 = os.listdir(im_path)
    for file in filelist2:
        filename = os.path.splitext(file)[0]
        filetype = os.path.splitext(file)[1]
        if(filetype == '.xml'):
            new_name = str(j + 99999)
            old_path_xml = os.path.join(im_path, file)
            new_path_xml = save_xml_path + new_name + '.xml'
            shutil.copyfile(old_path_xml, new_path_xml)
            old_path_jpg = im_path + '/' + filename + '.jpg'
            new_path_jpg = save_img_path + new_name + '.jpg'
            shutil.copyfile(old_path_jpg, new_path_jpg)
            convert_annotation(old_path_xml, new_name)
            fp.write(new_path_jpg + ' ' + new_path_xml + '\n')
            # print(j)
            j = j + 1

fp.close()
print(j)


delete_error(save_img_path, save_txt_path)

save_train_txt(save_img_path)

相关文章

网友评论

      本文标题:将自己标注的xml数据转为voc格式

      本文链接:https://www.haomeiwen.com/subject/aayerctx.html