[XML-SIG] XPath in Python 2

Henry S. Thompson ht@cogsci.ed.ac.uk
10 Jul 2000 14:37:14 +0100


Here's an existing implementation of your 10%, plus a bit more
(..../@foo).  It's open source [1], available as part of the XSV XML
Schema validator [2].  It's currently operating against an XML substrate
provided by our PyLTXML embedding in Python [3] of our LT XML API
[4].  PyLTXML will itself become open source in the next few weeks.

Enjoy.

ht

# Copyright (C) 2000 LTG -- See accompanying COPYRIGHT and COPYING files
import string
import types
import XML

class XPath:
  def __init__(self,str):
    self.str=str
    self.pats=self.parse(str)

  def parse(self,str):
    disjuncts=map(lambda s:string.split(s,'/'),string.split(str,'|'))
    # weird result for //
    return map(lambda d,ss=self:map(lambda p,s=ss:s.patBit(p),
                                    d),
               disjuncts)

  def patBit(self,part):
    # TODO: handle namespaces
    if part=='':
      # // in string
      return None
    elif part=='.':
      return lambda e:[e]
    elif part[0]=='@':
      return lambda e,y=None,s=self,a=part[1:]:s.attrs(e,a,y)
    else:
      b=string.find(part,'[')
      if b>-1:
        f=string.find(part,']')
        return lambda e,y=None,s=self,n=part[0:b],m=self.patBit(part[b+1:f]):s.children(e,n,y,m)
      else:
        return lambda e,y=None,s=self,n=part:s.children(e,n,y)

  def find(self,element):
    res=[]
    for pat in self.pats:
      sub=self.process(element,pat)
      if sub:
        res=res+sub
    if res:
      return res
    else:
      return None

  def find1(self,nodelist,pat):
    res=[]
    for e in nodelist:
      sub=self.process(e,pat)
      if sub:
	res=res+sub
    if res:
      return res
    else:
      return None

  def process(self,element,pat):
    pe=pat[0]
    if pe:
      res=pe(element)
    else:
      # None means descendant, side effect of split is two Nones in first place
      if pat[1]:
        pat=pat[1:]
      else:
        pat=pat[2:]
      res=pat[0](element,1)
    if not res:
      return None
    if len(pat)>1:
      return self.find1(res,pat[1:])
    else:
      return res

  def attrs(self,element,aname,anywhere):
    # assume this is the end of the line
    if element.attrs.has_key(aname):
      res=[element.attrs[aname].value]
    else:
      res=None
    if anywhere:
      for c in element.children:
        if isinstance(c,XML.Element):
          sr=self.attrs(c,aname,1)
          if sr:
            if res:
              res=res+sr
            else:
              res=sr
    return res

  def children(self,element,cname,anywhere,subPat=None):
    # trickier, we need to stay in control
    # TODO: handle namespaces!!!
    res=[]
    for c in element.children:
      if isinstance(c,XML.Element):
        if c.local==cname:
          if (not subPat) or subPat(c):
            res.append(c)
        if anywhere:
          sr=self.children(c,cname,1,subPat)
          if sr:
            if res:
              res=res+sr
            else:
              res=sr
    if res:
      return res
    else:
      return None

ht

[1] http://dev.w3.org/cvsweb/xmlschema/xpath.py
[2] http://www.ltg.ed.ac.uk/~ht/xsv-status.html
[3] ftp://ftp.cogsci.ed.ac.uk/pub/ht/PyLTXML12.EXE
[4] http://www.ltg.ed.ac.uk/software/xml/
-- 
  Henry S. Thompson, HCRC Language Technology Group, University of Edinburgh
          W3C Fellow 1999--2001, part-time member of W3C Team
     2 Buccleuch Place, Edinburgh EH8 9LW, SCOTLAND -- (44) 131 650-4440
	    Fax: (44) 131 650-4587, e-mail: ht@cogsci.ed.ac.uk
		     URL: http://www.ltg.ed.ac.uk/~ht/