xml -- > dict
from lxml import etree
def parse_xml_to_dict(xml):
"""
将xml文件解析成字典形式,参考tensorflow的recursive_parse_xml_to_dict
Args:
xml: xml tree obtained by parsing XML file contents using lxml.etree
Returns:
Python dictionary holding XML contents.
"""
if len(xml) == 0: # 遍历到底层,直接返回tag对应的信息
return{xml.tag: xml.text}
result= {}
for child in xml:
child_result= parse_xml_to_dict(child) # 递归遍历标签信息
if child.tag != 'object':
result[child.tag] = child_result[child.tag]
else:
if child.tag not in result: # 因为object可能有多个,所以需要放入列表里
result[child.tag] = []
result[child.tag].append(child_result[child.tag])
return {xml.tag: result}
dict --> voc xml dict
def generate_xml_dict(image_name, im_dict, folder, path, width, height):
base_dict = \
{
"annotation": {
"folder": folder,
"filename": image_name,
"path": path,
"source": {
"database": "Unknown"
},
"size": {
"width": str(width),
"height": str(height),
"depth": "3"
},
"segmented": "0",
"object": []
}
}
for info in im_dict:
obj_dict = \
{
"name": "0",
"pose": "Unspecified",
"truncated": "0",
"difficult": "0",
"bndbox": {
"xmin": "0",
"ymin": "0",
"xmax": "0",
"ymax": "0"
}
}
obj_dict["name"] = info["label"]
obj_dict["bndbox"]["xmin"] = str(info["x"])
obj_dict["bndbox"]["ymin"] = str(info["y"])
obj_dict["bndbox"]["xmax"] = str(info["x"]+info["w"])
obj_dict["bndbox"]["ymax"] = str(info["y"]+info["h"])
base_dict["annotation"]["object"].append(obj_dict)
return base_dict
dict --> xml
import dict2xml
data = {"age":20}
xml_str_data = dict2xml.dict2xml(data)
with open("test.xml", "w") as f:
f.write(xml_str_data)
使用例子
# 解析xml文件打印
import os
from lxml import etree
def parse_xml_to_dict(xml):
if len(xml) == 0:
return{xml.tag: xml.text}
result= {}
for child in xml:
child_result= parse_xml_to_dict(child)
if child.tag != 'object':
result[child.tag] = child_result[child.tag]
else:
if child.tag not in result:
result[child.tag] = []
result[child.tag].append(child_result[child.tag])
return {xml.tag: result}
xml_path = "C:\\Users\\Administrator\\Desktop\\xml"
for xml_name in os.listdir(xml_path):
xml_name = os.path.join(xml_path, xml_name)
with open(xml_name) as fid:
xml_str = fid.read()
xml = etree.fromstring(xml_str)
info = parse_xml_to_dict(xml)
print(info)
对于xml中存在中文的应对方法
with open(xml_path, "r", encoding='gb18030', errors='ignore') as f:
xml_str = f.read()
xml = etree.fromstring(xml_str)
info = parse_xml_to_dict(xml)
网友评论