[XML-SIG] ANN: Minidom 0.6

Paul Prescod paul@prescod.net
Thu, 22 Apr 1999 16:37:05 -0500


This is a multi-part message in MIME format.
--------------5BA374B008AE171CF0613077
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

Attached is a minature, lightweight subset of the DOM with a few
extensions for namespace handling. (I guess an extended subset is a
contradiction in terms but you get the idea!)

I propose that

 * this become part of the xml package

 *  we consider the DOM-creation functions and namespaces extensions for
adoption in a standard Python DOM API

 * DOM-haters try this out and clearly describe where it falls down in
their applications

 * we try to figure out the right set of convenience functions to make the
DOM more palatable for everybody (if possible).

-- 
 Paul Prescod  - ISOGEN Consulting Engineer speaking for only himself
 http://itrc.uwaterloo.ca/~papresco

"The Excursion [Sport Utility Vehicle] is so large that it will come
equipped with adjustable pedals to fit smaller drivers and sensor 
devices that warn the driver when he or she is about to back into a
Toyota or some other object." -- Dallas Morning News
--------------5BA374B008AE171CF0613077
Content-Type: text/plain; charset=us-ascii;
 name="minidom.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="minidom.py"

"""
minidom.py -- a lightweight DOM implementation based on SAX.

Version 0.6

Usage:
======

dom = DOMFromString( string )
dom = DOMFromURL( URL, SAXbuilder=None )
dom = DOMFromFile( file, SAXbuilder=None ):

Actually, the three constructor methods work with PyDOM as well as minidom.
Use xml.dom.sax_builder.SaxBuilder() for PyDOM.

Classes:
=======
The main classes are Document, Element and Text

Document:
	childNodes: heterogenous Python list
	documentElement: root element

Element:
	# main properties
	tagName: element type name (with colon, if it has one)
	childNodes: heterogenous Python list

	# attribute getting methods
	getAttribute( "foo" ): string value of foo attribute
	getAttribute( "foo", "someURI" ): string value of foo attribute in 						namespace named by URI
	# namespaces stuff:
	prefix: type name prefix
	localName: type name following colon
        uri: uri associated with prefix

	#advanced attribute stuff
	attributes: returns attribute mapping object

Text:
	data: get the text data

Todo:
=====
 * convenience methods for getting elements and text.
 * more testing
 * bring some of the writer an linearizer code into conformance with this
   interface
"""
from xml.sax import saxexts
from xml.sax.saxlib import HandlerBase
import string
from StringIO import StringIO
import dom.core

class Node:
	inGetAttr=None
	def __getattr__( self, key ):
		if self.inGetAttr:
			raise AttributeError, key
		elif key[0:4]=="get_":
			return (lambda self=self, key=key: 
				getattr( self, key[4:] ))
		else:
			raise AttributeError, key
		#	self.inGetAttr=1
		#	func = getattr( self, "get_"+key )
		#	del self.inGetAttr
		#	return func()


class Document( Node ):
	nodeType=dom.core.DOCUMENT_NODE
	def __init__( self ):
		self.childNodes=[]
		self.documentElement=None

__URI=0
__VALUE=1

__PREFIX=0
__LOCAL=1

def _qname2String( key ):
	if key[__PREFIX]:
		return string.join( key, ":" )
	else:
		return key[__LOCAL]

def _getVal( val ):
	return val[__VALUE]

class Attribute(Node):
	def __init__( self, name, value ):
		self.name=name
		self.value=value	

class AttributeList: 
	def __init__( self, attrs ):
		self.__attrs=attrs

	def items( self ):
		names = map( _qname2String, self.__attrs.keys() )
		values = map( _getVal, self.__attrs.values() )
		return map( None, names, map( Attribute, names, values ) )
	
	def keys( self ):
		return map( _qname2String, self.__attrs.keys() )

	def values( self ):
		return map( _getVal, self.__attrs.values() )

	def __getitem__( self, attname ):
		if type( attname )==types.String: 
			parts = string.split( attname, ":")
			if len(parts)==1:
				tup = self.__attrs[(None,parts[0])]
			else:
				tup = self.__attrs[tuple(parts)]
			return tup[__VALUE]
		elif type(attname)==types.TupleType and len( attname ) == 2:
			local,uri=attname
			for key,val in self.__attrs.items():
				if val[__URI]==uri and key[__LOCAL]==local:
					return val[__VALUE]
			raise KeyError, attname
		else:
			raise TypeError, attname

class Element( Node ):
	nodeType=dom.core.ELEMENT_NODE
	def __init__( self, tagName ):
		self.tagName = tagName
		self.childNodes=[]
		self.__attrs=None

	def getAttribute( self, attname, uri=None ):
		if uri:
			return self.__attrs[(attname,uri)]
		else:
			return self.__attrs[attname]


class Comment( Node ):
	nodeType=dom.core.COMMENT_NODE
	def __init__(self, data ):
		self.data=data

class ProcessingInstruction( Node ):
	nodeType=dom.core.PROCESSING_INSTRUCTION_NODE
	def __init__(self, target, data ):
		self.target = target
		self.data = data

class Text( Node ):
	nodeType=dom.core.TEXT_NODE
	def __init__(self, data ):
		self.data = data

class Error( Node ):
	def __init__(self, *args ):
		self.message = string.join( map( repr, args ) ) 

	def __repr__( self ):
		return self.message

class SaxBuilder( HandlerBase ):
	def __init__(self ):
		HandlerBase.__init__(self)
		self.cur_node = self.document = Document()
		self.cur_node.namespace={"xml": 
					"http://www.w3.org/XML/1998/namespace",
					None:None, "xmlns":None}
		self.cur_node.parent=None

	def addChild( self, node ):
		self.cur_node.childNodes.append( node )

	def nssplit( self, qname ):
		if string.find( qname, ":" )!=-1:
			prefix,local = string.split( qname, ":" )
		else:
			prefix,local = None,qname
		
		node = self.cur_node
		while node:
			if node.namespace.has_key(prefix):
				uri = node.namespace[prefix]
				return (prefix,local,uri)
			node=node.parent

		raise Error, "Namespace def not found for "+prefix

	def handleAttrs( self, attrs ):
		outattrs = {}
		handleLater = []

		for (attrname,value) in attrs.items():
			if attrname[0:6]=="xmlns:":
				prefix,local=string.split( attrname, ":" )
				outattrs[(prefix,local)]=(None,value)
				self.cur_node.namespace[local]=value
			elif attrname=="xmlns":
				prefix,local=(None,"xmlns")
				outattrs[(prefix,local)]=(None,value)
				self.cur_node.namespace[None]=value
			else: 
				handleLater.append( (attrname, value ) )

		for (attrname,value) in handleLater:
			(prefix,local,uri)=self.nssplit( attrname )
			outattrs[(prefix, local)]=(uri,value)

		return outattrs

	def startElement( self, tagname , attrs={} ):

		node = Element( tagname )
		self.addChild( node )

		node.parent = self.cur_node
		self.cur_node = node
		self.cur_node.namespace = {None:None,"xmlns":None}
		node.attributes = AttributeList( self.handleAttrs( attrs ) )

		node.tagname = tagname
		(node.prefix, node.localName, node.uri)= self.nssplit( tagname )


	def endElement( self, name, attrs={} ):
		del self.cur_node.namespace

		node = self.cur_node
		self.cur_node = node.parent
		del node.parent

	def comment( self, s):
		self.addChild( Comment( s  ) )

	def processingInstruction( self, target, data ):
		node = ProcessingInstruction( target, data )
		self.addChild( node )

	def characters( self, chars, start, length ): 
		node = Text( chars[start:start+length] )
		self.addChild( node )

	def endDocument( self ):
		assert( not self.cur_node.parent )
		del self.cur_node.parent
		for node in self.cur_node.childNodes:
			if node.nodeType==dom.core.ELEMENT_NODE:
				self.document.documentElement = node
		if not self.document.documentElement:
			raise Error, "No document element"

		del self.cur_node.namespace

# public constructors
def DOMFromString( string ):
	return DOMFromFile( StringIO( string ) )

def DOMFromURL( URL, builder=None ):
	builder = builder or SaxBuilder()
	p=saxexts.make_parser()
	p.setDocumentHandler( builder  )
	p.parse( URL )
	return builder.document

def DOMFromFile( file, builder=None ):
	builder = builder or SaxBuilder()
	p=saxexts.make_parser()
	p.setDocumentHandler( builder  )
	p.parseFile( file )
	return builder.document

if __name__=="__main__":
	import sys, os
	file = os.path.join( os.path.dirname( sys.argv[0] ), "test/quotes.xml" )
	docs=[]
	docs.append( DOMFromURL( file  ) )
	docs.append( DOMFromFile( open( file ) ) )
	docs.append( DOMFromString( open( file ).read()  ) )

	from xml.dom.writer import XmlWriter 
	import xml.dom.sax_builder

	# test against PyDOM
	docs.append( DOMFromURL( file,  xml.dom.sax_builder.SaxBuilder() ) )

	outputs=[]

	for doc in docs:
		outputs.append( StringIO() )
		XmlWriter(outputs[-1]).walk( doc )

	for output in outputs[1:]:
		assert output.getvalue() == outputs[0].getvalue()
	print output.getvalue()

# I don't like modules that export their imported modules
for key,value in locals().items():
	if `type( value )` =="<type 'module'>":
		del locals()[key]
del key, value


--------------5BA374B008AE171CF0613077--