[Chicago] xml to py object

Lukasz Szybalski szybalski at gmail.com
Fri May 1 17:03:00 CEST 2009


On Fri, May 1, 2009 at 9:42 AM, Massimo Di Pierro
<mdipierro at cs.depaul.edu> wrote:
> This is a quick solution with no dependencies:
>
> import re
>
>
> class Parser:
>    class S(dict):
>        def __setattr__(self,k,v): self[k]=v
>        def __getattr__(self,k): return self[k]
>    r1=re.compile('^\s*\<(?P<tag>\w+)(?P<attr>.*?)>')
>    r2=re.compile('(?P<name>\w+)\s*=\s*"(?P<value>([^\\\\]|\\\\")*?)"')
>    def __init__(self,text): (self.text,self.value)=(text,Parser.S())
>    def parse(self,text=None,value=None):
>        if not text: (text,value)=(self.text,self.value)
>        done=False
>        while True:
>            m=self.r1.search(text)
>            if not m:
>                if not done: value.value=text
>                break
>            tag=m.group('tag')
>            k=text.find('</%s>'%tag)
>            if k<0:
>                if not done: value.value=text
>                break
>            nested = value[tag] = Parser.S()
>            nested.attr=Parser.S()
>            for item in self.r2.finditer(m.group('attr')):
>                nested.attr[item.group('name')]=item.group('value')
>            self.parse(text[m.end():k],nested)
>            (text, done) = (text[k+3+len(tag):],True)
>        return value
>
>
> x=Parser('<hello n="2"><a n="4" m="6">in a</a><b>in b</b></hello>').parse()
> print x
> print x.hello
> print x.hello.attr.n
> print x.hello.a.value
> print x.hello.a.attr.n
> print x.hello.a.attr.m
> print x.hello.b.value
>


objectify it is:


from lxml import etree
from lxml import objectify
#import lxml.usedoctest
f=open('Example1.xml','r')
tree=objectify.parse(f)
f.close()
root=tree.getroot()
print 'Parsing Root: %s' % root.tag
print [ el.tag for el in root.iterchildren() ]


Now I can overwrite the data, save, and send for further processing...nice..

Thanks,
Lucas


More information about the Chicago mailing list