[XML-SIG] ANN: Minidom 0.6
Paul Prescod
paul@prescod.net
Thu, 22 Apr 1999 16:37:05 -0500
This is a multi-part message in MIME format.
--------------5BA374B008AE171CF0613077
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Attached is a minature, lightweight subset of the DOM with a few
extensions for namespace handling. (I guess an extended subset is a
contradiction in terms but you get the idea!)
I propose that
* this become part of the xml package
* we consider the DOM-creation functions and namespaces extensions for
adoption in a standard Python DOM API
* DOM-haters try this out and clearly describe where it falls down in
their applications
* we try to figure out the right set of convenience functions to make the
DOM more palatable for everybody (if possible).
--
Paul Prescod - ISOGEN Consulting Engineer speaking for only himself
http://itrc.uwaterloo.ca/~papresco
"The Excursion [Sport Utility Vehicle] is so large that it will come
equipped with adjustable pedals to fit smaller drivers and sensor
devices that warn the driver when he or she is about to back into a
Toyota or some other object." -- Dallas Morning News
--------------5BA374B008AE171CF0613077
Content-Type: text/plain; charset=us-ascii;
name="minidom.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="minidom.py"
"""
minidom.py -- a lightweight DOM implementation based on SAX.
Version 0.6
Usage:
======
dom = DOMFromString( string )
dom = DOMFromURL( URL, SAXbuilder=None )
dom = DOMFromFile( file, SAXbuilder=None ):
Actually, the three constructor methods work with PyDOM as well as minidom.
Use xml.dom.sax_builder.SaxBuilder() for PyDOM.
Classes:
=======
The main classes are Document, Element and Text
Document:
childNodes: heterogenous Python list
documentElement: root element
Element:
# main properties
tagName: element type name (with colon, if it has one)
childNodes: heterogenous Python list
# attribute getting methods
getAttribute( "foo" ): string value of foo attribute
getAttribute( "foo", "someURI" ): string value of foo attribute in namespace named by URI
# namespaces stuff:
prefix: type name prefix
localName: type name following colon
uri: uri associated with prefix
#advanced attribute stuff
attributes: returns attribute mapping object
Text:
data: get the text data
Todo:
=====
* convenience methods for getting elements and text.
* more testing
* bring some of the writer an linearizer code into conformance with this
interface
"""
from xml.sax import saxexts
from xml.sax.saxlib import HandlerBase
import string
from StringIO import StringIO
import dom.core
class Node:
inGetAttr=None
def __getattr__( self, key ):
if self.inGetAttr:
raise AttributeError, key
elif key[0:4]=="get_":
return (lambda self=self, key=key:
getattr( self, key[4:] ))
else:
raise AttributeError, key
# self.inGetAttr=1
# func = getattr( self, "get_"+key )
# del self.inGetAttr
# return func()
class Document( Node ):
nodeType=dom.core.DOCUMENT_NODE
def __init__( self ):
self.childNodes=[]
self.documentElement=None
__URI=0
__VALUE=1
__PREFIX=0
__LOCAL=1
def _qname2String( key ):
if key[__PREFIX]:
return string.join( key, ":" )
else:
return key[__LOCAL]
def _getVal( val ):
return val[__VALUE]
class Attribute(Node):
def __init__( self, name, value ):
self.name=name
self.value=value
class AttributeList:
def __init__( self, attrs ):
self.__attrs=attrs
def items( self ):
names = map( _qname2String, self.__attrs.keys() )
values = map( _getVal, self.__attrs.values() )
return map( None, names, map( Attribute, names, values ) )
def keys( self ):
return map( _qname2String, self.__attrs.keys() )
def values( self ):
return map( _getVal, self.__attrs.values() )
def __getitem__( self, attname ):
if type( attname )==types.String:
parts = string.split( attname, ":")
if len(parts)==1:
tup = self.__attrs[(None,parts[0])]
else:
tup = self.__attrs[tuple(parts)]
return tup[__VALUE]
elif type(attname)==types.TupleType and len( attname ) == 2:
local,uri=attname
for key,val in self.__attrs.items():
if val[__URI]==uri and key[__LOCAL]==local:
return val[__VALUE]
raise KeyError, attname
else:
raise TypeError, attname
class Element( Node ):
nodeType=dom.core.ELEMENT_NODE
def __init__( self, tagName ):
self.tagName = tagName
self.childNodes=[]
self.__attrs=None
def getAttribute( self, attname, uri=None ):
if uri:
return self.__attrs[(attname,uri)]
else:
return self.__attrs[attname]
class Comment( Node ):
nodeType=dom.core.COMMENT_NODE
def __init__(self, data ):
self.data=data
class ProcessingInstruction( Node ):
nodeType=dom.core.PROCESSING_INSTRUCTION_NODE
def __init__(self, target, data ):
self.target = target
self.data = data
class Text( Node ):
nodeType=dom.core.TEXT_NODE
def __init__(self, data ):
self.data = data
class Error( Node ):
def __init__(self, *args ):
self.message = string.join( map( repr, args ) )
def __repr__( self ):
return self.message
class SaxBuilder( HandlerBase ):
def __init__(self ):
HandlerBase.__init__(self)
self.cur_node = self.document = Document()
self.cur_node.namespace={"xml":
"http://www.w3.org/XML/1998/namespace",
None:None, "xmlns":None}
self.cur_node.parent=None
def addChild( self, node ):
self.cur_node.childNodes.append( node )
def nssplit( self, qname ):
if string.find( qname, ":" )!=-1:
prefix,local = string.split( qname, ":" )
else:
prefix,local = None,qname
node = self.cur_node
while node:
if node.namespace.has_key(prefix):
uri = node.namespace[prefix]
return (prefix,local,uri)
node=node.parent
raise Error, "Namespace def not found for "+prefix
def handleAttrs( self, attrs ):
outattrs = {}
handleLater = []
for (attrname,value) in attrs.items():
if attrname[0:6]=="xmlns:":
prefix,local=string.split( attrname, ":" )
outattrs[(prefix,local)]=(None,value)
self.cur_node.namespace[local]=value
elif attrname=="xmlns":
prefix,local=(None,"xmlns")
outattrs[(prefix,local)]=(None,value)
self.cur_node.namespace[None]=value
else:
handleLater.append( (attrname, value ) )
for (attrname,value) in handleLater:
(prefix,local,uri)=self.nssplit( attrname )
outattrs[(prefix, local)]=(uri,value)
return outattrs
def startElement( self, tagname , attrs={} ):
node = Element( tagname )
self.addChild( node )
node.parent = self.cur_node
self.cur_node = node
self.cur_node.namespace = {None:None,"xmlns":None}
node.attributes = AttributeList( self.handleAttrs( attrs ) )
node.tagname = tagname
(node.prefix, node.localName, node.uri)= self.nssplit( tagname )
def endElement( self, name, attrs={} ):
del self.cur_node.namespace
node = self.cur_node
self.cur_node = node.parent
del node.parent
def comment( self, s):
self.addChild( Comment( s ) )
def processingInstruction( self, target, data ):
node = ProcessingInstruction( target, data )
self.addChild( node )
def characters( self, chars, start, length ):
node = Text( chars[start:start+length] )
self.addChild( node )
def endDocument( self ):
assert( not self.cur_node.parent )
del self.cur_node.parent
for node in self.cur_node.childNodes:
if node.nodeType==dom.core.ELEMENT_NODE:
self.document.documentElement = node
if not self.document.documentElement:
raise Error, "No document element"
del self.cur_node.namespace
# public constructors
def DOMFromString( string ):
return DOMFromFile( StringIO( string ) )
def DOMFromURL( URL, builder=None ):
builder = builder or SaxBuilder()
p=saxexts.make_parser()
p.setDocumentHandler( builder )
p.parse( URL )
return builder.document
def DOMFromFile( file, builder=None ):
builder = builder or SaxBuilder()
p=saxexts.make_parser()
p.setDocumentHandler( builder )
p.parseFile( file )
return builder.document
if __name__=="__main__":
import sys, os
file = os.path.join( os.path.dirname( sys.argv[0] ), "test/quotes.xml" )
docs=[]
docs.append( DOMFromURL( file ) )
docs.append( DOMFromFile( open( file ) ) )
docs.append( DOMFromString( open( file ).read() ) )
from xml.dom.writer import XmlWriter
import xml.dom.sax_builder
# test against PyDOM
docs.append( DOMFromURL( file, xml.dom.sax_builder.SaxBuilder() ) )
outputs=[]
for doc in docs:
outputs.append( StringIO() )
XmlWriter(outputs[-1]).walk( doc )
for output in outputs[1:]:
assert output.getvalue() == outputs[0].getvalue()
print output.getvalue()
# I don't like modules that export their imported modules
for key,value in locals().items():
if `type( value )` =="<type 'module'>":
del locals()[key]
del key, value
--------------5BA374B008AE171CF0613077--