制作VOC2007数据集常用代码

作者: 晓智AI | 来源:发表于2018-07-24 16:22 被阅读0次

制作VOC2007数据集常用代码
Caltech行人数据集转为VOC数据集
TensorFlow（二）制做自己的数据集
制作ILSVRC2015数据集常用代码
Faster R-CNN for Tensorflow
VOC2007数据集制作与训练自己的数据
【从零开始学习YOLOv3】8. YOLOv3中Loss部分计算
keras 数据集学习笔记 2/3
pytorch数据集相关操作
ncnn上基于Caffe用MobileNet_SSD训练和测试自

研究背景

由于研究时常根据使用情况，制作符合要求的数据集，因而将需要的代码整理。

数据集结构

└── VOCdevkit #根目录
└── VOC2012 #不同年份的数据集，这里只下载了2012的，还有2007等其它年份的
├── Annotations #存放xml文件，与JPEGImages中的图片一一对应，解释图片的内容等等
├── ImageSets #该目录下存放的都是txt文件，txt文件中每一行包含一个图片的名称，末尾会加上±1表示正负样本
│ ├── Action
│ ├── Layout
│ ├── Main
│ └── Segmentation
├── JPEGImages #存放源图片
├── SegmentationClass #存放的是图片，分割后的效果，见下文的例子
└── SegmentationObject #存放的是图片，分割后的效果，见下文的例子

Annotation文件夹存放的是xml文件，该文件是对图片的解释，每张图片都对于一个同名的xml文件。
ImageSets文件夹存放的是txt文件，这些txt将数据集的图片分成了各种集合。如Main下的train.txt中记录的是用于训练的图片集合
JPEGImages文件夹存放的是数据集的原图片
SegmentationClass以及SegmentationObject文件夹存放的都是图片，且都是图像分割结果图

参考链接制作VOC格式数据集
参考链接修改xml文件的节点值方法详解

代码实现

图片重命名保存在JPEGImages，将原命名数字+12682，再补0到6位数字。

import os
path = "/home/henry/File/URPC2018/VOC/VOC2007/JPEG/YDXJ0013"
#path1 = "/home/henry/File/URPC2018/VOC/VOC2007/JPEG/1"
filelist = os.listdir(path) #该文件夹下所有的文件（包括文件夹）
for file in filelist:   #遍历所有文件
    Olddir=os.path.join(path,file)   #原来的文件路径
    if os.path.isdir(Olddir):   #如果是文件夹则跳过
        continue
    filename=os.path.splitext(file)[0]   #文件名
    filetype=os.path.splitext(file)[1]   #文件扩展名
    Newdir=os.path.join(path,str(int(filename)+12682).zfill(6)+filetype)  #用字符串函数zfill 以0补全所需位数
    os.rename(Olddir,Newdir)#重命名

VOC格式数据集从000000.jpg转换为从1.jpg开始的自然排列。

import os
path = "/home/henry/Files/URPC2018/UPRC2018UnderWaterDetection/cla6/JPEGImagesc"
path1 = "/home/henry/Files/URPC2018/UPRC2018UnderWaterDetection/cla6/1"
filelist = os.listdir(path) #该文件夹下所有的文件（包括文件夹）
for file in filelist:   #遍历所有文件
    Olddir=os.path.join(path,file)   #原来的文件路径
    if os.path.isdir(Olddir):   #如果是文件夹则跳过
        continue
    filename=os.path.splitext(file)[0]   #文件名
    filetype=os.path.splitext(file)[1]   #文件扩展名
    Newdir=os.path.join(path1,str(int(filename)+1)+filetype) 
    os.rename(Olddir,Newdir)#重命名

Layout和Main文件夹所需text文档。
制作VOC2007数据集中的trainval.txt， train.txt ， test.txt ， val.txt
trainval占总数据集的50%，test占总数据集的50%；train占trainval的50%，val占trainval的50%；

import os
import random

trainval_percent = 0.5
train_percent = 0.5
xmlfilepath = 'Anno/G0024173'
txtsavepath = 'test'
total_xml = os.listdir(xmlfilepath)

num=len(total_xml)
list=range(num)
tv=int(num*trainval_percent)
tr=int(tv*train_percent)
trainval= random.sample(list,tv)
train=random.sample(trainval,tr)

ftrainval = open('test\\trainval.txt', 'w')
ftest = open('test\\test.txt', 'w')
ftrain = open('test\\train.txt', 'w')
fval = open('test\\val.txt', 'w')

for i  in list:
    name=total_xml[i][:-4]+'\n'
    if i in trainval:
        ftrainval.write(name)
        if i in train:
            ftrain.write(name)
        else:
            fval.write(name)
    else:
        ftest.write(name)

ftrainval.close()
ftrain.close()
fval.close()
ftest .close()

标签文件txt转xml格式代码。
python解析VOC的xml文件并转成自己需要的txt格式

# ! /usr/bin/python
# -*- coding:UTF-8 -*-
import os, sys
import glob
from PIL import Image

# VEDAI 图像存储位置
src_img_dir = os.path.abspath('.')+'/13'
# VEDAI 图像的 ground truth 的 xml 文件存放位置
src_xml_dir = '/home/henry/File/URPC2018/all_train_data_0829/111'


# 遍历目录读取图片
img_Lists = []
def get_img_list(dir_path):
    if os.path.isdir(dir_path):
        for x in os.listdir(dir_path):
            get_img_list(os.path.join(dir_path, x))
    elif os.path.isfile(dir_path) and dir_path.split('.')[-1] == 'jpg':
        img_Lists.append(dir_path)

get_img_list(src_img_dir)
img_Lists.sort(key=lambda x:x[-10:])
# for i in img_Lists:
#     print(i)

# 创建xml文件，存入图片信息
for img_item in img_Lists:
    im = Image.open(img_item)  #打开图片 为了记录图片的长宽数据
    img = os.path.split(img_item)[1].split('.')[0]
    width, height = im.size

    # write in xml file
    # os.mknod(src_xml_dir + '/' + img + '.xml')
    xml_file = open((src_xml_dir + '/' + img + '.xml'), 'w')
    xml_file.write('<annotation>\n')
    xml_file.write('    <folder>VOC2007</folder>\n')
    xml_file.write('    <filename>' + str(img) + '.jpg' + '</filename>\n')
    xml_file.write('    <size>\n')
    xml_file.write('        <width>' + str(width) + '</width>\n')
    xml_file.write('        <height>' + str(height) + '</height>\n')
    xml_file.write('        <depth>3</depth>\n')
    xml_file.write('    </size>\n')
    xml_file.close()
# 读取全部信息
txt_file = open('YDXJ0013.txt')

for line in txt_file.readlines():
    gt = line.splitlines()
    # print(gt)
#     gt = txt_file.readline().splitlines()
#     # gt = open(src_txt_dir + '/gt_' + img + '.txt').read().splitlines()

    # write the region of image on xml file
    for img_each_label in gt:
        spt = img_each_label.split(' ')  # 这里如果txt里面是以逗号‘，’隔开的，那么就改为spt = img_each_label.split(',')。

        # 判断是否需要写入xml
        if spt[6] == '0':
            # print (gt)

            # 打开相应xml文件
            # print(spt[5].zfill(6))
            xml_file = open((src_xml_dir + '/' + spt[5].zfill(6) + '.xml'), 'a')
            xml_file.write('    <object>\n')
            xml_file.write('        <name>' + str(spt[9]) + '</name>\n')
            xml_file.write('        <pose>Unspecified</pose>\n')
            xml_file.write('        <truncated>0</truncated>\n')
            xml_file.write('        <difficult>0</difficult>\n')
            xml_file.write('        <bndbox>\n')
            xml_file.write('            <xmin>' + str(spt[1]) + '</xmin>\n')
            xml_file.write('            <ymin>' + str(spt[2]) + '</ymin>\n')
            xml_file.write('            <xmax>' + str(spt[3]) + '</xmax>\n')
            xml_file.write('            <ymax>' + str(spt[4]) + '</ymax>\n')
            xml_file.write('        </bndbox>\n')
            xml_file.write('    </object>\n')
            xml_file.close()

# 补上结尾
for i in range(4500):
    xml_file = open((src_xml_dir + '/' + str(i).zfill(6) + '.xml'), 'a')
    xml_file.write('</annotation>')
    xml_file.close()

生产xml文档格式，以000017.xml为例

<annotation>
<folder>VOC2007</folder>
<filename>000017.jpg</filename>
<size>                        //图像尺寸（长宽以及通道数）
<width>720</width>
<height>405</height>
<depth>3</depth></size>
<object>           //检测到的物体
<name>"scallop"</name>       //物体类别
<pose>Unspecified</pose>    //拍摄角度
<truncated>0</truncated>      //是否被截断（0表示完整）
<difficult>0</difficult>             //目标是否难以识别（0表示容易识别）
<bndbox>                               //bounding-box  目标框坐标
<xmin>690</xmin>   左上角x
<ymin>299</ymin>   左上角y
<xmax>718</xmax>  右下角x
<ymax>356</ymax>  右下角y
</bndbox>
</object>                                //检测到多个物体
<object>
<name>"scallop"</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>472</xmin>
<ymin>296</ymin>
<xmax>709</xmax>
<ymax>403</ymax>
</bndbox>
</object>
<object>
<name>"scallop"</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>674</xmin>
<ymin>89</ymin>
<xmax>717</xmax>
<ymax>155</ymax>
</bndbox>
</object>
<object>
<name>"seaurchin"</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>663</xmin>
<ymin>12</ymin>
<xmax>716</xmax>
<ymax>67</ymax>
</bndbox>
</object>
<object>
<name>"scallop"</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>507</xmin>
<ymin>110</ymin>
<xmax>647</xmax>
<ymax>210</ymax>
</bndbox></object>
<object>
<name>"seaurchin"</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>576</xmin>
<ymin>173</ymin>
<xmax>714</xmax>
<ymax>297</ymax>
</bndbox></object>
<object>
<name>"scallop"</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>90</xmin>
<ymin>122</ymin>
<xmax>199</xmax>
<ymax>187</ymax>
</bndbox>
</object>
<object>
<name>"scallop"</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>202</xmin>
<ymin>76</ymin>
<xmax>303</xmax>
<ymax>138</ymax>
</bndbox>
</object>
<object>
<name>"scallop"</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>201</xmin>
<ymin>170</ymin>
<xmax>366</xmax>
<ymax>294</ymax>
</bndbox>
</object>
<object>
<name>"seaurchin"</name>   
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>52</xmin>
<ymin>137</ymin>
<xmax>189</xmax>
<ymax>279</ymax>
</bndbox></object>
<object>
<name>"seacucumber"</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>36</xmin>
<ymin>234</ymin>
<xmax>137</xmax>
<ymax>373</ymax>
</bndbox>
</object>
</annotation>

修改xml文件，frame改filename。

from xml.etree import ElementTree
import os, sys
import glob
from PIL import Image

path = "/media/leequens/File/YOLO/test/xml/11"
filelist = os.listdir(path)
for file in filelist:
    filename = os.path.splitext(file)[0]  # 文件名
    filetype = os.path.splitext(file)[1]  # 文件扩展名
    xmldoc = ElementTree.parse('/media/leequens/File/YOLO/test/xml/rebox/'+str(int(filename)).zfill(6)+'.xml')
    root = xmldoc.getroot()

    for child in root:
        if child.tag == 'frame':
            temp_node = 'filename'
            child.tag = temp_node
            break   
    xmldoc.write('/media/leequens/File/YOLO/test/xml/'+str(int(filename)).zfill(6)+'.xml')

根据xml框出图片物体

import os
import os.path
import numpy as np
import xml.etree.ElementTree as xmlET
from PIL import Image, ImageDraw

classes = ('__background__', # always index 0
           '"seacucumber"', '"seaurchin"', '"scallop"', 'boat',
           'bottle', 'bus', 'car', 'cat', 'chair',
           'cow', 'diningtable', 'dog', 'horse',
           'motorbike', 'person', 'pottedplant',
           'sheep', 'sofa', 'train', 'tvmonitor')

file_path_img = '/home/henry/File/URPC2018/all_train_data_0829/13'
file_path_xml = '/home/henry/File/URPC2018/all_train_data_0829/111'
save_file_path = '/home/henry/File/URPC2018/all_train_data_0829/test'

pathDir = os.listdir(file_path_xml)
for idx in range(len(pathDir)):  
    filename = pathDir[idx]
    tree = xmlET.parse(os.path.join(file_path_xml, filename))
    objs = tree.findall('object')        
    num_objs = len(objs)
    boxes = np.zeros((num_objs, 5), dtype=np.uint16)

    for ix, obj in enumerate(objs):
        bbox = obj.find('bndbox')
        # Make pixel indexes 0-based
        x1 = float(bbox.find('xmin').text) - 1
        y1 = float(bbox.find('ymin').text) - 1
        x2 = float(bbox.find('xmax').text) - 1
        y2 = float(bbox.find('ymax').text) - 1

        cla = obj.find('name').text
        label = classes.index(cla)  

        boxes[ix, 0:4] = [x1, y1, x2, y2]
        boxes[ix, 4] = label

    image_name = os.path.splitext(filename)[0]
    img = Image.open(os.path.join(file_path_img, image_name + '.jpg')) 

    draw = ImageDraw.Draw(img)
    for ix in range(len(boxes)):
        xmin = int(boxes[ix, 0])
        ymin = int(boxes[ix, 1])
        xmax = int(boxes[ix, 2])
        ymax = int(boxes[ix, 3])
        draw.rectangle([xmin, ymin, xmax, ymax], outline=(255, 0, 0))
        draw.text([xmin, ymin],classes[boxes[ix, 4]], (255, 0, 0))

    img.save(os.path.join(save_file_path, image_name + '.jpg'))

生成/VOCdevkit/VOC2007/ImageSets/Layout/train.txt，含图片路径，目前图片保存在/home/henry/File/URPC2018/VOC/VOC2007/JPEGImages/G0024173路径，在终端运行下面命令，txt文档生成到根目录～。其他txt文件类似处理。

ls -R /home/henry/File/URPC2018/VOC/VOC2007/JPEGImages/G0024173/*.jpg >train.txt

/home/henry/File/URPC2018/VOC/VOC2007/JPEGImages/G0024173/000000.jpg
/home/henry/File/URPC2018/VOC/VOC2007/JPEGImages/G0024173/000001.jpg
/home/henry/File/URPC2018/VOC/VOC2007/JPEGImages/G0024173/000002.jpg
/home/henry/File/URPC2018/VOC/VOC2007/JPEGImages/G0024173/000003.jpg
/home/henry/File/URPC2018/VOC/VOC2007/JPEGImages/G0024173/000004.jpg
/home/henry/File/URPC2018/VOC/VOC2007/JPEGImages/G0024173/000005.jpg
/home/henry/File/URPC2018/VOC/VOC2007/JPEGImages/G0024173/000006.jpg
/home/henry/File/URPC2018/VOC/VOC2007/JPEGImages/G0024173/000007.jpg
/home/henry/File/URPC2018/VOC/VOC2007/JPEGImages/G0024173/000008.jpg

生成/VOCdevkit/VOC2007/test/train.txt，不含图片路径，代码应在VOC2007文件夹下运行，图片从000000.jpg到001800.jpg。

import os
import random

file = open('/test/train.txt', 'w')
for i in range(0, 1800):
    file.write(str("%06d" % i) + '\n')
file.close()

Linux Shell 批量重命名的方法总览
Linux Shell 批量重命名的方法总览

c=0;for i in *.jpg;do mv -f $i $((c+=1)).jpg;done

ubuntu系统对txt文本文档的批量处理
Linux sed命令操作删除文件每一行的前k个字符、在文本的行尾或行首添加字符
常用操作
在行尾添加.jpg ^代表行首，$代表行尾如果要在原文件上修改，加上参数-i
tmp.txt原内容:

23456789
23456789
23456789

终端在tmp.txt路径下运行

sed -i 's/$/.jpg&/g' tmp.txt

23456789.jpg
23456789.jpg
23456789.jpg

删除共同后缀，可以用查找替换方法。

去掉文件名前导0方法

rename "s/^0{1,2}//g" *

Linux文件批量改名/排序总结（rename，sort等）

将比赛用的test_list.txt保存为字典，并将测试结果result.txt按照字典方式对应为。
test_list.txt内容

CHN083846_0000 1
CHN083846_0043 2
CHN083846_0076 3
CHN083846_0099 4
CHN083846_0124 5
CHN083846_0182 6
CHN083846_0237 7
CHN083846_0257 8
CHN083846_0262 9
CHN083846_0268 10
CHN083846_0276 11
CHN083846_0286 12
CHN083846_0290 13
CHN083846_0300 14
CHN083846_0308 15
CHN083846_0311 16
CHN083846_0321 17
CHN083846_0324 18
CHN083846_0326 19
CHN083846_0334 20

result.txt内容

CHN083846_0000.jpg 2 0.99687284 311 234 389 304
CHN083846_0000.jpg 2 0.9967654 379 105 464 184
CHN083846_0000.jpg 2 0.99383944 394 219 465 294
CHN083846_0000.jpg 2 0.993507 366 99 416 157
CHN083846_0000.jpg 2 0.98956084 498 164 571 238
CHN083846_0000.jpg 2 0.9875843 491 370 584 474
CHN083846_0000.jpg 2 0.98697644 373 178 430 232
CHN083846_0000.jpg 2 0.9807468 316 201 383 250
CHN083846_0043.jpg 2 0.99795675 325 120 404 198
CHN083846_0043.jpg 2 0.9977519 228 219 305 288
CHN083846_0043.jpg 2 0.9969998 309 111 359 156
CHN083846_0043.jpg 2 0.99581474 427 193 495 270
CHN083846_0043.jpg 2 0.9956038 304 229 374 302
CHN083846_0043.jpg 2 0.9946083 543 405 585 457
CHN083846_0043.jpg 2 0.9940837 372 394 466 479
CHN083846_0043.jpg 2 0.9934238 295 183 360 240
CHN083846_0043.jpg 2 0.99061626 233 188 307 249
CHN083846_0043.jpg 2 0.9718845 552 86 585 130
CHN083846_0076.jpg 2 0.99764097 293 227 363 295
CHN083846_0076.jpg 2 0.9971411 217 198 289 255
CHN083846_0076.jpg 2 0.9971004 548 27 585 72
CHN083846_0076.jpg 2 0.99674976 282 171 358 243

转换需要的代码convertxt-zidian.py：

f = open('test_list.txt', 'r')                  
result = {}
for line in f.readlines():
    line = line.strip()   # 去除首尾空格
    if not len(line):
        continue
    result[line.split(' ')[0]] = line.split(' ')[1]

input_file = open(r'/home/henry/Files/URPC2018/常用pythoncodes/result3.txt',"r").read();
for key,value in result.items():
  input_file=input_file.replace(key,value);
print(input_file)
with open('text_trans.txt','w+') as filetwo:  
        filetwo.writelines(input_file)

test_list.txt保存在当前路径下，与convertxt-zidian.py放在一起，result.txt放在/home/henry/Files/URPC2018/常用pythoncodes/result3.txt，生成的文件为text_trans.txt，内容如下：

1 2 0.99687284 311 234 389 304
1 2 0.9967654 379 105 464 184
1 2 0.99383944 394 219 465 294
1 2 0.993507 366 99 416 157
1 2 0.98956084 498 164 571 238
1 2 0.9875843 491 370 584 474
1 2 0.98697644 373 178 430 232
1 2 0.9807468 316 201 383 250
2 2 0.99795675 325 120 404 198
2 2 0.9977519 228 219 305 288
2 2 0.9969998 309 111 359 156
2 2 0.99581474 427 193 495 270
2 2 0.9956038 304 229 374 302
2 2 0.9946083 543 405 585 457
2 2 0.9940837 372 394 466 479
2 2 0.9934238 295 183 360 240
2 2 0.99061626 233 188 307 249
2 2 0.9718845 552 86 585 130

图片格式转换将* .png转换为* .jpg

henry@henry-Rev-1-0:~/Files/URPC2018/UPRC2018UnderWaterDetection/enhanced0815/B6
2 (复件)$ for i in *.png;do convert ${i} ${i%png}jpg;done
henry@henry-Rev-1-0:~/Files/URPC2018/UPRC2018UnderWaterDetection/enhanced0815/B6
2 (复件)$ rm -rf *.png

图片批量修改格式.sh文件，将当前目录中* .png图片转换为*.jpg图片，并删除.png图片

for i in *.png;do convert ${i} ${i%bmp}jpg;done
rm -rf *.png

comm命令进行文本编辑操作

多文本排序

sort A.txt -o A.txt; sort B.txt -o B.txt

两文本比较并输出至2.txt

comm train.txt test.txt>2.txt

参考资料comm比较两个文件的异同

diff命令进行文本内容比较操作

文件1中有，文件2中没有输出到_1_not_in_2.txt,文件2中有

diff -u a.txt b.txt|grep '^-' |grep -v '^---' > '_1_not_in_2.txt'

文件1中没有的输出到_2_not_in_1.txt

diff -u a.txt b.txt|grep '^+' |grep -v '^+++' > '_2_not_in_1.txt'

文件1和文件2都是每行一串字符，要选出相同的行输出到same.txt

diff -u a.txt b.txt|grep '^ ' > same.txt

python批量修改xml属性

修改xml文件，将其filename部分与自身*.xml对应

#coding=utf-8
import os
import os.path
import xml.dom.minidom
 
path="./7"
files=os.listdir(path)  #得到文件夹下所有文件名称
s=[]
for xmlFile in files:
    #遍历文件夹
    portion = os.path.splitext(xmlFile)
    if not os.path.isdir(xmlFile):
        #判断是否是文件夹,不是文件夹才打开
        # print (xmlFile)
 
        #xml文件读取操作
 
        #将获取的xml文件名送入到dom解析
        dom=xml.dom.minidom.parse(os.path.join(path,xmlFile))
        ###最核心的部分os.path.join(path,xmlFile),路径拼接,输入的是具体路径
        root=dom.documentElement
        name=root.getElementsByTagName('frame')
            #pose=root.getElementsByTagName('pose')
            #重命名class name
        for i in range(len(name)):
            # print (name[i].firstChild.data)
            print(xmlFile)
            if portion[1] ==".xml":           
                newname = portion[0]+".jpg"
                print(newname)
            name[i].firstChild.data=newname
            print (name[i].firstChild.data)
 
            #保存修改到xml文件中
        with open(os.path.join(path,xmlFile),'w',encoding='UTF-8') as fh:
            dom.writexml(fh)
            print('修改filename OK!')

参考资料python批量修改xml属性

python批量修改xml的bounding box数值，修改为图片镜像翻转之后的包围框坐标。

# coding:utf-8
import cv2
import math
import numpy as np
import xml.etree.ElementTree as ET
import os

xmlpath = './5801xml/'          
imgpath = './imgs/'         
rotated_imgpath = './rotatedimg/'
rotated_xmlpath = './rotatedxml/'
for i in os.listdir(xmlpath):
     a, b = os.path.splitext(i)
     print(str(i))
     tree = ET.parse(xmlpath + a + '.xml')
     root = tree.getroot()
     for chi in root.iter('size'):
         width=int(chi.find('width').text)
     for box in root.iter('bndbox'):
         xmin = int(box.find('xmin').text)
         ymin = int(box.find('ymin').text)
         xmax = int(box.find('xmax').text)
         ymax = int(box.find('ymax').text)
            
         box.find('xmin').text = str(width-xmax)
         box.find('ymin').text = str(ymin)
         box.find('xmax').text = str(width-xmin)
         box.find('ymax').text = str(ymax)
         tree.write(rotated_xmlpath + a + '.xml')
         print(str(a) + '.xml has been rotated for  '+'°')

脚本批量修改图片大小尺寸和翻转操作
修改图片大小，将当前文件夹中图片原地修改为256x256

set -e  # or use "set -o errexit" to quit on error.
set -x  # or use "set -o xtrace" to print the statement before you execute it.

FILES=*.jpg
for f in $FILES
do
        echo "$f"
        convert $f -resize 256x256! $f
done

在当前文件夹中对图片镜像翻转

set -e  # or use "set -o errexit" to quit on error.
set -x  # or use "set -o xtrace" to print the statement before you execute it.

FILES=*.jpg
for f in $FILES
do
        echo "$f"
        convert $f -flop $f
done

读取图像的尺寸大小

import cv2
import os

dirfile = './copy'
filenames = os.listdir(dirfile)
filenames.sort()
f = open('image_shape1.txt','a+')

for filename in filenames:

    path = dirfile+'/'+filename

    print(dirfile + '/' + filename)

    img = cv2.imread(path)   # read image.jpg from dirfile
    # img = cv2.cv.LoadImage(path)
    size = img.shape
    size_output = str(size)
    print(size)

    f.writelines(filename + ' '+ size_output+'\n')
f.close()

由csv制作xml文件

import os
from utilscsv import *
from lxml.etree import Element, SubElement, tostring
from xml.dom.minidom import parseString
import cv2

countnum = 0

def save_xml(image_name, bbox_class, save_dir='./VOC2007/Annotations', width=1609, height=500, channel=3):


  global countnum

  path = './JPEGImages/'+ image_name + '.jpg'

  img = cv2.imread(path)  # read image.jpg from dirfile
  size = img.shape
  width = size[1]
  height = size[0]
  channel = size[2]

  node_root = Element('annotation')
  node_folder = SubElement(node_root, 'folder')
  node_folder.text = 'JPEGImages'

  node_filename = SubElement(node_root, 'filename')
  node_filename.text = image_name + '.jpg'

  node_size = SubElement(node_root, 'size')
  node_width = SubElement(node_size, 'width')
  node_width.text = '%s' % width
  node_height = SubElement(node_size, 'height')
  node_height.text = '%s' % height
  node_depth = SubElement(node_size, 'depth')
  node_depth.text = '%s' % channel

  print("bbox_class: ",bbox_class)

  # for i in range(len(bbox_class)):
  if int(bbox_class[0]) <6 or abs(int(bbox_class[0])-width)<6:  # x coordiante near boundary

        if int(bbox_class[1]) <6 or abs(int(bbox_class[1])-height)<6:  # y coordiante near boundary
           print("x near bbox_class[1]: ",int(bbox_class[1]))
           # left is minimum
           if int(bbox_class[0]) ==1:
               left = int(bbox_class[0])
           else:
               left = int(bbox_class[0])-1

           top = int(bbox_class[1])-1

           # right is maxmium
           if int(bbox_class[0]) == width:
              right = int(bbox_class[0])
           else:
              right = int(bbox_class[0])+1

           bottom = int(bbox_class[1]) + 1

        else:                                                     # y coordiante away from boundary
            print("x near y away bbox_class[0]: ", int(bbox_class[0]))
            left = int(bbox_class[0]) - 1
            top = int(bbox_class[1]) - 1
            right = int(bbox_class[0]) + 1
            bottom = int(bbox_class[1]) + 1

  elif int(bbox_class[1]) <6 or abs(int(bbox_class[1])-height)<6:  # y coordiante near boundary
           print("y near bbox_class[1]: ",int(bbox_class[1]))
           left = int(bbox_class[0])-1
           top = int(bbox_class[1])-1
           right = int(bbox_class[0])+1
           bottom = int(bbox_class[1]) + 1

  else:
         left, top, right, bottom = int(bbox_class[0])-5, int(bbox_class[1])-5, int(bbox_class[0]) + 5, int(bbox_class[1]) + 5

  if (left >=1 and left <= width) and (top >=1 and top <= height) and (right >=1 and right <= width) and (bottom >=1 and bottom <= height):
        countnum += 1
        print("lefttop and rightbottom are in the range!", countnum)
        node_object = SubElement(node_root, 'object')
        node_name = SubElement(node_object, 'name')
        node_name.text = '%s' % bbox_class[2]
        node_difficult = SubElement(node_object, 'difficult')
        node_difficult.text = '0'
        node_bndbox = SubElement(node_object, 'bndbox')
        node_xmin = SubElement(node_bndbox, 'xmin')
        node_xmin.text = '%s' % left
        node_ymin = SubElement(node_bndbox, 'ymin')
        node_ymin.text = '%s' % top
        node_xmax = SubElement(node_bndbox, 'xmax')
        node_xmax.text = '%s' % right
        node_ymax = SubElement(node_bndbox, 'ymax')
        node_ymax.text = '%s' % bottom

  else:
        print("There is an error: ",node_filename.text)
        file_object = open('log.txt', 'a+')
        file_object.writelines("There is an error: "+ node_filename.text + '\t')
        file_object.writelines(str(left)+' '+str(top)+' '+str(right)+' '+ str(bottom)+'\n')
        file_object.close()


  xml = tostring(node_root, pretty_print=True)
  dom = parseString(xml)

  save_xml = os.path.join(save_dir, node_filename.text.replace('jpg', 'xml'))
  with open(save_xml, 'wb') as f:
        f.write(xml)

  return

def change2xml(label_dict={}):
    for image in label_dict.keys():
        image_name = os.path.split(image)[-1]
        bbox_object = label_dict.get(image, [])
        save_xml(image_name, bbox_object)
    return


if __name__ == '__main__':
    label_dict = read_csv(csv_path=r'./list.csv',
                                pre_dir=r'./JPEGImages')
    change2xml(label_dict)

相关配置文件


import csv
import os

def read_csv(csv_path, pre_dir):

    label_dict = {}
    with open(csv_path, "r") as f:
        reader = csv.reader(f)
        header = True
        for line in reader:
            
            if header:
                header = False
                continue
           
            image_path = os.path.join(pre_dir, line[0])
            
            bbox_object = []

            for i in range(1,4):
              bbox_object.append(line[i])

           
            label_dict.setdefault(image_path, bbox_object)
    return label_dict


def write_csv(result_dict, out_path='out.csv'):

    with open(out_path, 'w', newline='') as f:
        writer = csv.writer(f)
        
        writer.writerow(['name', 'coordinate'])

        for image in result_dict.keys():
            image_name = os.path.split(image)[-1]
            bbox = result_dict.get(image, [])
            bbox_rs = ';'.join(['_'.join(str(int(id)) for id in i) for i in bbox])
            writer.writerow([image_name, bbox_rs])


if __name__ == '__main__':
    label_dict = read_csv(csv_path=r'./train_b.csv',
                             pre_dir=r'/home/matthew/dataset')
    write_csv(label_dict)

读取csv文件并保存到txt文件中

import csv
import os

def read_csv(csv_path, pre_dir):
    
    label_dict = {}
    with open(csv_path, "r") as f:
        reader = csv.reader(f)
        header = True
        for line in reader:
           
            if header:
                header = False
                continue
           
            image_path = os.path.join(pre_dir, line[0])
            
            bbox_object = []
           
            for i in range(1,4):
              bbox_object.append(line[i])

            
            label_dict.setdefault(image_path, bbox_object)
    return label_dict


def write_csv(result_dict, out_path='out.csv'):

    with open(out_path, 'w', newline='') as f:
        writer = csv.writer(f)
        
        writer.writerow(['name', 'coordinate'])

        for image in result_dict.keys():
            image_name = os.path.split(image)[-1]
            bbox = result_dict.get(image, [])
            bbox_rs = ';'.join(['_'.join(str(int(id)) for id in i) for i in bbox])
            writer.writerow([image_name, bbox_rs])

if __name__ == '__main__':
    label_dict = read_csv(csv_path=r'./train_b.csv',
                             pre_dir=r'/home/matthew/dataset')
    write_csv(label_dict)

import os
import shutil

srcimage_dir_path = "./VOC2007_6280/VOC_nova_all/JPEGImages"
srcxml_dir_path = "./VOC2007_6280/VOC_nova_all/Annotations_2class_0401"

imageto_dir_path = "./VOC2007_6280/all_star/copy_star_image/"
xmlto_dir_path = "./VOC2007_6280/all_star/copy_star_xml/"

txt_path = './csv_reader.txt'

key = '_a'

count = 0

if not os.path.exists(imageto_dir_path):
    print("to_dir_path not exist,so create the dir")
    os.mkdir(imageto_dir_path)

if not os.path.exists(xmlto_dir_path):
    print("to_dir_path not exist,so create the dir")
    os.mkdir(xmlto_dir_path)


# if os.path.exists(src_dir_path):
#    print("src_dir_path exitst")

fr = open(txt_path)
stringClass = [line.strip().split('\t') for line in fr.readlines()]
# print("stringClass: ",stringClass)

for i in range(len(stringClass)):
    if stringClass[i][3] == 'newtarget' or stringClass[i][3] == 'isstar' or stringClass[i][3] == 'asteroid' or stringClass[i][3] == 'isnova' or stringClass[i][3] == 'known':
        image_name = stringClass[i][0] + '.jpg'
        xml_name = stringClass[i][0] + '.xml'
        count +=1
        print(image_name,' ',count)
        shutil.copy(srcimage_dir_path+'/'+image_name,imageto_dir_path+image_name)
        shutil.copy(srcxml_dir_path + '/' + xml_name, xmlto_dir_path + xml_name)

对图像进行镜像翻转操作，扩充训练集同时，增加正样本数量

import cv2
import copy
import os

"""
#水平镜像可按公式
#I = i
#J = N - j + 1
#垂直镜像可按公式
#I = M - i + 1
#J = j
#对角镜像可按公式
#I = M - i + 1
#J = N - j + 1
"""

def mirror_imgs(imgs_path, save_path):
  for name in os.listdir(imgs_path):
    print(name)
    image = cv2.imread(os.path.join(imgs_path, name), 1);
    height = image.shape[0]
    width = image.shape[1]
    # channels = image.shape[2]
    iLR = copy.deepcopy(image)  # 获得一个和原始图像相同的图像，注意这里要使用深度复制

    for i in range(height):
      for j in range(width):
        iLR[i, width - 1 - j] = image[i, j]
    # cv2.imshow('image', image)
    # cv2.imshow('iLR', iLR)
    save_name = name[:-4]+'_zym'+'.jpg'

    cv2.imwrite(os.path.join(save_path, save_name), iLR,
                [int(cv2.IMWRITE_JPEG_QUALITY), 100])  # 保存图片
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

def horizontal_mirror_imgs(imgs_path, save_path):
  for name in os.listdir(imgs_path):
    print(name)
    image = cv2.imread(os.path.join(imgs_path, name), 1);
    height = image.shape[0]
    width = image.shape[1]
    # channels = image.shape[2]
    iLR = copy.deepcopy(image)  # 获得一个和原始图像相同的图像，注意这里要使用深度复制

    for i in range(height):
      for j in range(width):
        iLR[i, width - 1 - j] = image[i, j]
    # cv2.imshow('image', image)
    # cv2.imshow('iLR', iLR)
    save_name = name[:-4]+'_zym'+'.jpg'

    cv2.imwrite(os.path.join(save_path, save_name), iLR,
                [int(cv2.IMWRITE_JPEG_QUALITY), 100])  # 保存图片
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

def vertical_mirror_imgs(imgs_path, save_path):
  for name in os.listdir(imgs_path):
    print(name)
    image = cv2.imread(os.path.join(imgs_path, name), 1);
    height = image.shape[0]
    width = image.shape[1]
    # channels = image.shape[2]
    iLR = copy.deepcopy(image)  # 获得一个和原始图像相同的图像，注意这里要使用深度复制

    for i in range(height):
      for j in range(width):
        iLR[height - 1 - i, j] = image[i, j]
    # cv2.imshow('image', image)
    # cv2.imshow('iLR', iLR)
    save_name = name[:-4]+'_sxm'+'.jpg'

    cv2.imwrite(os.path.join(save_path, save_name), iLR,
                [int(cv2.IMWRITE_JPEG_QUALITY), 100])  # 保存图片
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()


imgs_path = '/home/henry/Files/FutureAi/VOC2007_6280/all_star/copy_star_image'
save_path = "/home/henry/Files/FutureAi/VOC2007_6280/all_star/copy_star_image_mirror"

if not os.path.exists(save_path):
    os.makedirs(save_path)
#mirror_imgs(imgs_path, save_path)

horizontal_mirror_imgs(imgs_path,save_path)
# vertical_mirror_imgs

制作VOC2007数据集常用代码
研究背景由于研究时常根据使用情况，制作符合要求的数据集，因而将需要的代码整理。数据集结构 └── VOCdev...
Caltech行人数据集转为VOC数据集
在跑通Faster-rcnn的demo,以及机智地在VOC2007上只使用行人对象训练跑通后，还是要自己制作数据集...
TensorFlow（二）制做自己的数据集
1.数据集格式 2. 数据集制作代码参考资料 [1] TensorFlow 制作自己的TFRecord数据集读...
制作ILSVRC2015数据集常用代码
研究背景由于研究时常根据使用情况，制作符合要求的数据集，因而将需要的代码整理。数据集结构 └── VOCdev...
Faster R-CNN for Tensorflow
研究背景根据老师要求，采用Faster-RCNN算法，使用VOC2007数据集和比赛数据集训练模型，测试图片并进...
VOC2007数据集制作与训练自己的数据
1. VOC2007数据集的格式 JPEGImages文件夹：该文件夹里包含了训练用的图像以及测试用的图像，混合存...
【从零开始学习YOLOv3】8. YOLOv3中Loss部分计算
YOLOv1是一个anchor-free的，从YOLOv2开始引入了Anchor，在VOC2007数据集上将mAP...
keras 数据集学习笔记 2/3
keras 数据集的学习笔记 2/3 上次学习一些常用的数据集，本次将学习数据集的具体应用。各种常用的数据集数...
pytorch数据集相关操作
常用数据集读取对于常用的数据集，可以通过torchvision.datasets读取，torchvision.d...
ncnn上基于Caffe用MobileNet_SSD训练和测试自
1.数据集标注 a.采用VoTT用于图像检测任务的数据集制作voc格式 2.lmdb数据集制作 a.采用 weil...