# Converting xml to dictionary using ElementTree
XML转字典
from collections import defaultdict
def etree_to_dict(t):
d = {t.tag: {} if t.attrib else None}
children = list(t)
if children:
dd = defaultdict(list)
for dc in map(etree_to_dict, children):
for k, v in dc.items():
dd[k].append(v)
d = {t.tag: {k: v[0] if len(v) == 1 else v
for k, v in dd.items()}}
if t.attrib:
d[t.tag].update(('@' + k, v)
for k, v in t.attrib.items())
if t.text:
text = t.text.strip()
if children or t.attrib:
if text:
d[t.tag]['#text'] = text
else:
d[t.tag] = text
return d
字典转XML
try:
basestring
except NameError: # python3
basestring = str
def dict_to_etree(d):
def _to_etree(d, root):
if not d:
pass
elif isinstance(d, str):
root.text = d
elif isinstance(d, dict):
for k,v in d.items():
assert isinstance(k, str)
if k.startswith('#'):
assert k == '#text' and isinstance(v, str)
root.text = v
elif k.startswith('@'):
assert isinstance(v, str)
root.set(k[1:], v)
elif isinstance(v, list):
for e in v:
_to_etree(e, ET.SubElement(root, k))
else:
_to_etree(v, ET.SubElement(root, k))
else:
assert d == 'invalid type', (type(d), d)
assert isinstance(d, dict) and len(d) == 1
tag, body = next(iter(d.items()))
node = ET.Element(tag)
_to_etree(body, node)
return node
试验
from xml.etree import cElementTree as ET
e = ET.XML('''
<train file_name="208-2.xls" table_number="350" train_number="6184," train_type="TGV">
<station depart="1457" font="b" name="Nice Ville" stop_id="0">
<ref>360</ref>
</station>
<station depart="1513" name="Antibes" stop_id="1">
<ref>360</ref>
</station>
<station depart="1527" font="b" name="Cannes" stop_id="2">
<ref>360</ref>
</station>
<station depart="1554" name="Saint-Raphaël-Valescure" stop_id="3">
<ref>360</ref>
</station>
<station depart="1648" font="b" name="Toulon" stop_id="4">
<ref>360</ref>
</station>
<station depart="1801" font="b" name="Avignon TGV" stop_id="5"/>
<station arrive="2041" font="b" name="Paris Gare de Lyon" stop_id="6"/>
</train>
''')
from pprint import pprint
d = etree_to_dict(e)
pprint(d)
"""
输出字典为:
{'train': {'@file_name': '208-2.xls',
'@table_number': '350',
'@train_number': '6184,',
'@train_type': 'TGV',
'station': [{'@depart': '1457',
'@font': 'b',
'@name': 'Nice Ville',
'@stop_id': '0',
'ref': '360'},
{'@depart': '1513',
'@name': 'Antibes',
'@stop_id': '1',
'ref': '360'},
{'@depart': '1527',
'@font': 'b',
'@name': 'Cannes',
'@stop_id': '2',
'ref': '360'},
{'@depart': '1554',
'@name': 'Saint-Raphaël-Valescure',
'@stop_id': '3',
'ref': '360'},
{'@depart': '1648',
'@font': 'b',
'@name': 'Toulon',
'@stop_id': '4',
'ref': '360'},
{'@depart': '1801',
'@font': 'b',
'@name': 'Avignon TGV',
'@stop_id': '5'},
{'@arrive': '2041',
'@font': 'b',
'@name': 'Paris Gare de Lyon',
'@stop_id': '6'}]}}
"""
# 变回来
print(ET.tostring(dict_to_etree(d)))
网友评论