[Chicago] xml to py object
Lukasz Szybalski
szybalski at gmail.com
Fri May 1 17:03:00 CEST 2009
On Fri, May 1, 2009 at 9:42 AM, Massimo Di Pierro
<mdipierro at cs.depaul.edu> wrote:
> This is a quick solution with no dependencies:
>
> import re
>
>
> class Parser:
> class S(dict):
> def __setattr__(self,k,v): self[k]=v
> def __getattr__(self,k): return self[k]
> r1=re.compile('^\s*\<(?P<tag>\w+)(?P<attr>.*?)>')
> r2=re.compile('(?P<name>\w+)\s*=\s*"(?P<value>([^\\\\]|\\\\")*?)"')
> def __init__(self,text): (self.text,self.value)=(text,Parser.S())
> def parse(self,text=None,value=None):
> if not text: (text,value)=(self.text,self.value)
> done=False
> while True:
> m=self.r1.search(text)
> if not m:
> if not done: value.value=text
> break
> tag=m.group('tag')
> k=text.find('</%s>'%tag)
> if k<0:
> if not done: value.value=text
> break
> nested = value[tag] = Parser.S()
> nested.attr=Parser.S()
> for item in self.r2.finditer(m.group('attr')):
> nested.attr[item.group('name')]=item.group('value')
> self.parse(text[m.end():k],nested)
> (text, done) = (text[k+3+len(tag):],True)
> return value
>
>
> x=Parser('<hello n="2"><a n="4" m="6">in a</a><b>in b</b></hello>').parse()
> print x
> print x.hello
> print x.hello.attr.n
> print x.hello.a.value
> print x.hello.a.attr.n
> print x.hello.a.attr.m
> print x.hello.b.value
>
objectify it is:
from lxml import etree
from lxml import objectify
#import lxml.usedoctest
f=open('Example1.xml','r')
tree=objectify.parse(f)
f.close()
root=tree.getroot()
print 'Parsing Root: %s' % root.tag
print [ el.tag for el in root.iterchildren() ]
Now I can overwrite the data, save, and send for further processing...nice..
Thanks,
Lucas
More information about the Chicago
mailing list