1. 官方demo
from gluoncv.data import VOCDetection
class VOCLike(VOCDetection):
CLASSES = ['person', 'dog']
def __init__(self, root, splits, transform=None, index_map=None, preload_label=True):
super(VOCLike, self).__init__(root, splits, transform, index_map, preload_label)
dataset = VOCLike(root='VOCtemplate', splits=((2018, 'train'),))
print('length of dataset:', len(dataset))
print('label example:')
print(dataset[0][1])
在官方demo的基础上增加了显示
from gluoncv import utils
import mxnet as mx
import numpy as np
from matplotlib import pyplot as plt
from gluoncv.data import VOCDetection
class VOCLike(VOCDetection):
CLASSES = ['person', 'dog']
def __init__(self, root, splits, transform=None, index_map=None, preload_label=True):
super(VOCLike, self).__init__(root, splits, transform, index_map, preload_label)
train_dataset = VOCLike(root='VOCtemplate', splits=((2018, 'train'),))
print('length of dataset:', len(train_dataset))
print('label example:')
print(train_dataset[0][1])
train_image, train_label = train_dataset[0]
print('Image size (height, width, RGB):', train_image.shape)
bounding_boxes = train_label[:, :4]
print('Num of objects:', bounding_boxes.shape[0])
print('Bounding boxes (num_boxes, x_min, y_min, x_max, y_max):\n',bounding_boxes)
class_ids = train_label[:, 4:5]
print('Class IDs (num_boxes, ):\n', class_ids)
ax = utils.viz.plot_bbox(train_image.asnumpy(), bounding_boxes, scores=None,
labels=class_ids, class_names=train_dataset.classes)
# see how it looks by rendering the boxes into image
plt.show()
# 读取xml文件内容
# with open('VOCtemplate/VOC2018/Annotations/000001.xml', 'r') as fid:
# print(fid.read())
2. 数据准备
2.1 voc数据集结构
image.png
在VOCtemplate和.py文件放在相同目录下,
image.png
2.2 Annotations下存放000001.xml格式文件,
<annotation>
<filename>000001.jpg</filename>
<size>
<width>353</width>
<height>500</height>
<depth>3</depth>
</size>
<object>
<name>dog</name>
<difficult>0</difficult>
<bndbox>
<xmin>48</xmin>
<ymin>240</ymin>
<xmax>195</xmax>
<ymax>371</ymax>
</bndbox>
</object>
<object>
<name>person</name>
<difficult>0</difficult>
<bndbox>
<xmin>8</xmin>
<ymin>12</ymin>
<xmax>352</xmax>
<ymax>498</ymax>
</bndbox>
</object>
</annotation>
2.3 JPEGImages文件夹存放000001.jpg文件,
000001.jpg
2.4 ImageSets文件夹下存放Main文件夹,Main文件夹下存放train.txt文件.
image.png
image.png
train.txt文件内容:是图片名称,不加后缀.
image.png
3.运行结果:
image.png
image.png
网友评论