converting XML to hash/dict/CustomTreeCtrl
Astan Chee
astan.chee at al.com.au
Tue Feb 2 16:07:50 EST 2010
Hi,
Sorry for being vague but here my question about converting an xml into
a dict. I found some examples online but none gives the dict/result I
want. The xml looks like this:
<doc>
<stats name="position1" description="Calculation statistics"
kind="position">
<stats name="time" description="Timing summary" kind="section">
<string name="timersNote" description="Note:">This is the note
on calculation times</string>
<stats name="timers" kind="timers">
<timer name="totaltime" description="Total time">
<elapsed>609.081574</elapsed>
<user>2531.972081</user>
<system>65.119100</system>
</timer>
<timer name="partialTimer" description="Gravitational Displacement">
<elapsed>1772.011230</elapsed>
</timer>
<stats name="subTimers" description="All non-phased time"
kind="timers">
<timer name="subATimer" description="Phase time A">
<elapsed>72.418861</elapsed>
</timer>
<timer name="subBTimer" description="Phase time B">
<elapsed>28.285192</elapsed>
</timer>
<timer name="spaceMem" description="Space memory">
<elapsed>0.000</elapsed>
</timer>
</stats>
<timer name="endTime" description="End">
<elapsed>607.432373</elapsed>
</timer>
</stats>
</stats>
<stats name="space" description="Space usage summary" kind="section">
<stats name="systemSpace" description="System Space">
<memory name="heapSpace" description="Total Space">
<peak>4833280000</peak>
<current>4833280000</current>
</memory>
<memory name="spaceResidentSize" description="Space resident size">
<peak>4182777856</peak>
<current>4182777856</current>
</memory>
<int name="pageMem">1</int>
<int name="memReclaims">1943498</int>
<int name="memSwaps">0</int>
</stats>
<stats name="subsystems" kind="memstats">
<memory name="geoSpace" description="Geo-Space">
<peak>1640100156</peak>
<current>411307840</current>
</memory>
<memory name="gridSpace" description="Grid-Space">
<peak>709596712</peak>
<current>1406752</current>
</memory>
<memory name="spaceMem" description="Space memory">
<peak>737720720</peak>
<current>0</current>
</memory>
<memory name="endTime" description="End">
<peak>607.432373</peak>
</memory>
</stats>
<memory name="subsystemSpace" description="Subsystem space
total">
<peak>5164184694</peak>
<current>2054715622</current>
</memory>
</stats>
</stats>
</doc>
using this script (taken from http://code.activestate.com/recipes/410469/):
from xml.etree import cElementTree as ElementTree
class XmlListConfig(list):
def __init__(self, aList):
for element in aList:
if element:
# treat like dict
if len(element) == 1 or element[0].tag != element[1].tag:
self.append(XmlDictConfig(element))
# treat like list
elif element[0].tag == element[1].tag:
self.append(XmlListConfig(element))
elif element.text:
text = element.text.strip()
if text:
self.append(text)
class XmlDictConfig(dict):
'''
Example usage:
>>> tree = ElementTree.parse('your_file.xml')
>>> root = tree.getroot()
>>> xmldict = XmlDictConfig(root)
Or, if you want to use an XML string:
>>> root = ElementTree.XML(xml_string)
>>> xmldict = XmlDictConfig(root)
And then use xmldict for what it is... a dict.
'''
def __init__(self, parent_element):
if parent_element.items():
self.update(dict(parent_element.items()))
for element in parent_element:
if element:
# treat like dict - we assume that if the first two tags
# in a series are different, then they are all different.
if len(element) == 1 or element[0].tag != element[1].tag:
aDict = XmlDictConfig(element)
# treat like list - we assume that if the first two tags
# in a series are the same, then the rest are the same.
else:
# here, we put the list in dictionary; the key is the
# tag name the list elements all share in common, and
# the value is the list itself
aDict = {element[0].tag: XmlListConfig(element)}
# if the tag has attributes, add those to the dict
if element.items():
aDict.update(dict(element.items()))
self.update({element.tag: aDict})
# this assumes that if you've got an attribute in a tag,
# you won't be having any text. This may or may not be a
# good idea -- time will tell. It works for the way we are
# currently doing XML configuration files...
elif element.items():
self.update({element.tag: dict(element.items())})
# finally, if there are no child tags and no attributes, extract
# the text
else:
self.update({element.tag: element.text})
tree = ElementTree.parse('test.xml')
root = tree.getroot()
xmldict = XmlDictConfig(root)
print xmldict
Which I get this dict:
{'stats': {'kind': 'position', 'stats': [{'kind': 'section', 'stats':
{'kind': 'timers', 'name': 'timers', 'timer': [{'system': '65.119100',
'user': '2531.972081', 'elapsed': '609.081574', 'name': 'totaltime',
'description': 'Total time'}, {'elapsed': '1772.011230', 'name':
'partialTimer', 'description': 'Gravitational Displacement'},
[{'elapsed': '72.418861', 'name': 'subATimer', 'description': 'Phase
time A'}, {'elapsed': '28.285192', 'name': 'subBTimer', 'description':
'Phase time B'}, {'elapsed': '0.000', 'name': 'spaceMem', 'description':
'Space memory'}], {'elapsed': '607.432373', 'name': 'endTime',
'description': 'End'}]}, 'name': 'time', 'string': {'name':
'timersNote', 'description': 'Note:'}, 'description': 'Timing summary'},
[[{'current': '4833280000', 'peak': '4833280000', 'name': 'heapSpace',
'description': 'Total Space'}, {'current': '4182777856', 'peak':
'4182777856', 'name': 'spaceResidentSize', 'description': 'Space
resident size'}, '1', '1943498', '0'], [{'current': '411307840', 'peak':
'1640100156', 'name': 'geoSpace', 'description': 'Geo-Space'},
{'current': '1406752', 'peak': '709596712', 'name': 'gridSpace',
'description': 'Grid-Space'}, {'current': '0', 'peak': '737720720',
'name': 'spaceMem', 'description': 'Space memory'}, {'peak':
'607.432373', 'name': 'endTime', 'description': 'End'}], {'current':
'2054715622', 'peak': '5164184694', 'name': 'subsystemSpace',
'description': 'Subsystem space total'}]], 'name': 'position1',
'description': 'Calculation statistics'}}
Which is kinda wrong. I expect the dict to have the "Space usage
summary", but it doesn't (duplicate?). What am I doing wrong here?
I am attempting to keep the attribute value of an XML as key (if it
doesn't have a value, then just the tag name will do) and associate it
with the text value of that tag/attribute value as well as reflecting
the hierarchy structure of the XML in the dict. Does this make sense?
Anyway, the python script above is just the first step or an example for me.
Cheers and thanks again
Astan
More information about the Python-list
mailing list