from xml.dom.minidom import parse
import os
文件路径
xmlname=r"201603120058450610-8587440075781728764-2.xml"
xmlname=r"201704051510530998-8587102386237784086-6.xml"
filepath=r"C:\Users\bxzyz\Desktop\doc\ocr\data\OCR-caiji"
xmlname=os.path.join(filepath,xmlname)
dom=parse(xmlname)
print(dom.toxml())
xml文件输出如下
<?xml version="1.0" ?><annotation>
<folder>OCV - 3</folder>
<filename>201704051510530998-8587102386237784086-6.bmp</filename>
<path>D:\ocv收集\OCV - 3\201704051510530998-8587102386237784086-6.bmp</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>0</width>
<height>0</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>R68</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>95</xmin>
<ymin>50</ymin>
<xmax>239</xmax>
<ymax>133</ymax>
</bndbox>
</object>
<object>
<name>704</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>95</xmin>
<ymin>133</ymin>
<xmax>235</xmax>
<ymax>225</ymax>
</bndbox>
</object>
<object>
<name>003</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>101</xmin>
<ymin>228</ymin>
<xmax>250</xmax>
<ymax>333</ymax>
</bndbox>
</object>
</annotation>
读取xml中所有object节点
objs=dom.getElementsByTagName("object")
print(len(objs))
3
# objs = eldoc.getElementsByTagName( "object" )
for obj in objs:
name=obj.getElementsByTagName("name")[0]
print(name.nodeName+":"+name.childNodes[0].nodeValue)
xmin=obj.getElementsByTagName("xmin")[0]
print(xmin.nodeName+":"+xmin.childNodes[0].nodeValue)
xmax=obj.getElementsByTagName("xmax")[0]
print(xmax.nodeName+":"+xmax.childNodes[0].nodeValue)
ymin=obj.getElementsByTagName("ymin")[0]
print(ymin.nodeName+":"+ymin.childNodes[0].nodeValue)
ymax=obj.getElementsByTagName("ymax")[0]
print(ymax.nodeName+":"+ymax.childNodes[0].nodeValue)
print()
name:R68
xmin:95
xmax:239
ymin:50
ymax:133
name:704
xmin:95
xmax:235
ymin:133
ymax:225
name:003
xmin:101
xmax:250
ymin:228
ymax:333
网友评论