[XML-SIG] XPath in Python 2
Henry S. Thompson
ht@cogsci.ed.ac.uk
10 Jul 2000 14:37:14 +0100
Here's an existing implementation of your 10%, plus a bit more
(..../@foo). It's open source [1], available as part of the XSV XML
Schema validator [2]. It's currently operating against an XML substrate
provided by our PyLTXML embedding in Python [3] of our LT XML API
[4]. PyLTXML will itself become open source in the next few weeks.
Enjoy.
ht
# Copyright (C) 2000 LTG -- See accompanying COPYRIGHT and COPYING files
import string
import types
import XML
class XPath:
def __init__(self,str):
self.str=str
self.pats=self.parse(str)
def parse(self,str):
disjuncts=map(lambda s:string.split(s,'/'),string.split(str,'|'))
# weird result for //
return map(lambda d,ss=self:map(lambda p,s=ss:s.patBit(p),
d),
disjuncts)
def patBit(self,part):
# TODO: handle namespaces
if part=='':
# // in string
return None
elif part=='.':
return lambda e:[e]
elif part[0]=='@':
return lambda e,y=None,s=self,a=part[1:]:s.attrs(e,a,y)
else:
b=string.find(part,'[')
if b>-1:
f=string.find(part,']')
return lambda e,y=None,s=self,n=part[0:b],m=self.patBit(part[b+1:f]):s.children(e,n,y,m)
else:
return lambda e,y=None,s=self,n=part:s.children(e,n,y)
def find(self,element):
res=[]
for pat in self.pats:
sub=self.process(element,pat)
if sub:
res=res+sub
if res:
return res
else:
return None
def find1(self,nodelist,pat):
res=[]
for e in nodelist:
sub=self.process(e,pat)
if sub:
res=res+sub
if res:
return res
else:
return None
def process(self,element,pat):
pe=pat[0]
if pe:
res=pe(element)
else:
# None means descendant, side effect of split is two Nones in first place
if pat[1]:
pat=pat[1:]
else:
pat=pat[2:]
res=pat[0](element,1)
if not res:
return None
if len(pat)>1:
return self.find1(res,pat[1:])
else:
return res
def attrs(self,element,aname,anywhere):
# assume this is the end of the line
if element.attrs.has_key(aname):
res=[element.attrs[aname].value]
else:
res=None
if anywhere:
for c in element.children:
if isinstance(c,XML.Element):
sr=self.attrs(c,aname,1)
if sr:
if res:
res=res+sr
else:
res=sr
return res
def children(self,element,cname,anywhere,subPat=None):
# trickier, we need to stay in control
# TODO: handle namespaces!!!
res=[]
for c in element.children:
if isinstance(c,XML.Element):
if c.local==cname:
if (not subPat) or subPat(c):
res.append(c)
if anywhere:
sr=self.children(c,cname,1,subPat)
if sr:
if res:
res=res+sr
else:
res=sr
if res:
return res
else:
return None
ht
[1] http://dev.w3.org/cvsweb/xmlschema/xpath.py
[2] http://www.ltg.ed.ac.uk/~ht/xsv-status.html
[3] ftp://ftp.cogsci.ed.ac.uk/pub/ht/PyLTXML12.EXE
[4] http://www.ltg.ed.ac.uk/software/xml/
--
Henry S. Thompson, HCRC Language Technology Group, University of Edinburgh
W3C Fellow 1999--2001, part-time member of W3C Team
2 Buccleuch Place, Edinburgh EH8 9LW, SCOTLAND -- (44) 131 650-4440
Fax: (44) 131 650-4587, e-mail: ht@cogsci.ed.ac.uk
URL: http://www.ltg.ed.ac.uk/~ht/