converting XML to hash/dict/CustomTreeCtrl

Astan Chee astan.chee at al.com.au
Tue Feb 2 16:07:50 EST 2010


Hi,
Sorry for being vague but here my question about converting an xml into 
a dict. I found some examples online but none gives the dict/result I 
want. The xml looks like this:
<doc>
    <stats name="position1" description="Calculation statistics"
kind="position">
    <stats name="time" description="Timing summary" kind="section">
        <string name="timersNote" description="Note:">This is the note
on calculation times</string>
        <stats name="timers" kind="timers">

        <timer name="totaltime" description="Total time">
            <elapsed>609.081574</elapsed>
            <user>2531.972081</user>
            <system>65.119100</system>
        </timer>
        <timer name="partialTimer" description="Gravitational Displacement">
            <elapsed>1772.011230</elapsed>
        </timer>
        <stats name="subTimers" description="All non-phased time" 
kind="timers">
            <timer name="subATimer" description="Phase time A">
            <elapsed>72.418861</elapsed>
            </timer>

            <timer name="subBTimer" description="Phase time B">
            <elapsed>28.285192</elapsed>
            </timer>
            <timer name="spaceMem" description="Space memory">
            <elapsed>0.000</elapsed>
            </timer>
        </stats>
        <timer name="endTime" description="End">
            <elapsed>607.432373</elapsed>
        </timer>
        </stats>
    </stats>
    <stats name="space" description="Space usage summary" kind="section">
        <stats name="systemSpace" description="System Space">
        <memory name="heapSpace" description="Total Space">
            <peak>4833280000</peak>

            <current>4833280000</current>
        </memory>
        <memory name="spaceResidentSize" description="Space resident size">
            <peak>4182777856</peak>
            <current>4182777856</current>
        </memory>
        <int name="pageMem">1</int>

        <int name="memReclaims">1943498</int>
        <int name="memSwaps">0</int>
        </stats>
        <stats name="subsystems" kind="memstats">
        <memory name="geoSpace" description="Geo-Space">
            <peak>1640100156</peak>
            <current>411307840</current>
        </memory>
        <memory name="gridSpace" description="Grid-Space">
            <peak>709596712</peak>
            <current>1406752</current>
        </memory>
        <memory name="spaceMem" description="Space memory">
            <peak>737720720</peak>
            <current>0</current>
        </memory>
                <memory name="endTime" description="End">
            <peak>607.432373</peak>
        </memory>
        </stats>
            <memory name="subsystemSpace" description="Subsystem space
total">
        <peak>5164184694</peak>
        <current>2054715622</current>
        </memory>
   
    </stats>
    </stats>
 </doc>

using this script (taken from http://code.activestate.com/recipes/410469/):
from xml.etree import cElementTree as ElementTree

class XmlListConfig(list):
    def __init__(self, aList):
        for element in aList:
            if element:
                # treat like dict
                if len(element) == 1 or element[0].tag != element[1].tag:
                    self.append(XmlDictConfig(element))
                # treat like list
                elif element[0].tag == element[1].tag:
                    self.append(XmlListConfig(element))
            elif element.text:
                text = element.text.strip()
                if text:
                    self.append(text)


class XmlDictConfig(dict):
    '''
    Example usage:

    >>> tree = ElementTree.parse('your_file.xml')
    >>> root = tree.getroot()
    >>> xmldict = XmlDictConfig(root)

    Or, if you want to use an XML string:

    >>> root = ElementTree.XML(xml_string)
    >>> xmldict = XmlDictConfig(root)

    And then use xmldict for what it is... a dict.
    '''
    def __init__(self, parent_element):
        if parent_element.items():
            self.update(dict(parent_element.items()))
        for element in parent_element:
            if element:
                # treat like dict - we assume that if the first two tags
                # in a series are different, then they are all different.
                if len(element) == 1 or element[0].tag != element[1].tag:
                    aDict = XmlDictConfig(element)
                # treat like list - we assume that if the first two tags
                # in a series are the same, then the rest are the same.
                else:
                    # here, we put the list in dictionary; the key is the
                    # tag name the list elements all share in common, and
                    # the value is the list itself
                    aDict = {element[0].tag: XmlListConfig(element)}
                # if the tag has attributes, add those to the dict
                if element.items():
                    aDict.update(dict(element.items()))
                self.update({element.tag: aDict})
            # this assumes that if you've got an attribute in a tag,
            # you won't be having any text. This may or may not be a
            # good idea -- time will tell. It works for the way we are
            # currently doing XML configuration files...
            elif element.items():
                self.update({element.tag: dict(element.items())})
            # finally, if there are no child tags and no attributes, extract
            # the text
            else:
                self.update({element.tag: element.text})
           
tree = ElementTree.parse('test.xml')
root = tree.getroot()

xmldict = XmlDictConfig(root)
print xmldict

Which I get this dict:
{'stats': {'kind': 'position', 'stats': [{'kind': 'section', 'stats': 
{'kind': 'timers', 'name': 'timers', 'timer': [{'system': '65.119100', 
'user': '2531.972081', 'elapsed': '609.081574', 'name': 'totaltime', 
'description': 'Total time'}, {'elapsed': '1772.011230', 'name': 
'partialTimer', 'description': 'Gravitational Displacement'}, 
[{'elapsed': '72.418861', 'name': 'subATimer', 'description': 'Phase 
time A'}, {'elapsed': '28.285192', 'name': 'subBTimer', 'description': 
'Phase time B'}, {'elapsed': '0.000', 'name': 'spaceMem', 'description': 
'Space memory'}], {'elapsed': '607.432373', 'name': 'endTime', 
'description': 'End'}]}, 'name': 'time', 'string': {'name': 
'timersNote', 'description': 'Note:'}, 'description': 'Timing summary'}, 
[[{'current': '4833280000', 'peak': '4833280000', 'name': 'heapSpace', 
'description': 'Total Space'}, {'current': '4182777856', 'peak': 
'4182777856', 'name': 'spaceResidentSize', 'description': 'Space 
resident size'}, '1', '1943498', '0'], [{'current': '411307840', 'peak': 
'1640100156', 'name': 'geoSpace', 'description': 'Geo-Space'}, 
{'current': '1406752', 'peak': '709596712', 'name': 'gridSpace', 
'description': 'Grid-Space'}, {'current': '0', 'peak': '737720720', 
'name': 'spaceMem', 'description': 'Space memory'}, {'peak': 
'607.432373', 'name': 'endTime', 'description': 'End'}], {'current': 
'2054715622', 'peak': '5164184694', 'name': 'subsystemSpace', 
'description': 'Subsystem space  total'}]], 'name': 'position1', 
'description': 'Calculation statistics'}}

Which is kinda wrong. I expect the dict to have the "Space usage 
summary", but it doesn't (duplicate?). What am I doing wrong here?
I am attempting to keep the attribute value of an XML as key (if it 
doesn't have a value, then just the tag name will do) and associate it 
with the text value of that tag/attribute value as well as reflecting 
the hierarchy structure of the XML in the dict. Does this make sense?
Anyway, the python script above is just the first step or an example for me.
Cheers and thanks again
Astan




More information about the Python-list mailing list