[Python-checkins] CVS: python/dist/src/Lib/xml/dom minidom.py,1.13,1.14

Fred L. Drake python-dev@python.org
Tue, 21 Nov 2000 14:02:24 -0800


Update of /cvsroot/python/python/dist/src/Lib/xml/dom
In directory slayer.i.sourceforge.net:/tmp/cvs-serv16177/Lib/xml/dom

Modified Files:
	minidom.py 
Log Message:
Reduce the visibility of imported modules for cleaner  "from ... import *"
behavior.

Added support for the Attr.ownerElement attribute.

Everywhere:  Define constant object attributes in the classes rather than
on the instances during object construction.  This reduces the amount of
work needed for object construction and destruction; these need to be
lightweight operations on a DOM.

Node._get_firstChild(),
Node._get_lastChild():  Return None if there are no children (required for
        compliance with DOM level 1).

Node.insertBefore():  If refChild is None, append the new node instead of
        failing (required for compliance).  Also, update the sibling
        relationships.  Return the inserted node (required for compliance).

Node.appendChild():  Update the parent of the appended node.

Node.replaceChild():  Actually replace the old child!  Update the parent
        and sibling relationships of both the old and new children.  Return
        the replaced child (required for compliance).

Node.normalize():  Implemented the normalize() method.  Required for
        compliance, but missing from the release.  Useful for joining
        adjacent Text nodes into a single node for easier processing.

Node.cloneNode():  Actually make this work.  Don't let the new node share
        the instance __dict__ with the original.  Do proper recursion if
        doing a "deep" clone.  Move the attribute cloning out of the base
        class, since only Element is supposed to have attributes.

Node.unlink():  Simplify handling of child nodes for efficiency, and
        remove the attribute handling since only Element nodes support
        attributes.

Attr.cloneNode():  Extend this to clear the ownerElement attribute in
        the clone.

AttributeList.items(),
AttributeList.itemsNS():  Slight performance improvement (avoid lambda).

Element.cloneNode():  Extend Node.cloneNode() with support for the
        attributes.  Clone the Attr objects after creating the underlying
        clone.

Element.unlink():  Clean out the attributes here instead of in the base
        class, since this is the only class that will have them.

Element.toxml():  Adjust to create only one AttributeList instance; minor
        efficiency improvement.

_nssplit():  No need to re-import string.

Document.__init__():  No longer needed once constant attributes are
        initialized in the class itself.

Document.createElementNS(),
Document.createAttributeNS():  Use the defined constructors rather than
        directly access the classes.

_get_StringIO():  New function.  Create an output StringIO using the most
        efficient available flavor.

parse(),
parseString():  Import pulldom here instead of in the public namespace of
        the module.


Index: minidom.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/xml/dom/minidom.py,v
retrieving revision 1.13
retrieving revision 1.14
diff -C2 -r1.13 -r1.14
*** minidom.py	2000/10/23 18:09:50	1.13
--- minidom.py	2000/11/21 22:02:22	1.14
***************
*** 15,22 ****
  """
  
- import pulldom
  import string
! from StringIO import StringIO
  import types
  
  class Node:
--- 15,31 ----
  """
  
  import string
! _string = string
! del string
! 
! # localize the types, and allow support for Unicode values if available:
  import types
+ _TupleType = types.TupleType
+ try:
+     _StringTypes = (types.StringType, types.UnicodeType)
+ except AttributeError:
+     _StringTypes = (types.StringType,)
+ del types
+ 
  
  class Node:
***************
*** 45,49 ****
              Node.allnodes[index] = repr(self.__dict__)
              if Node.debug is None:
!                 Node.debug = StringIO()
                  #open( "debug4.out", "w" )
              Node.debug.write("create %s\n" % index)
--- 54,58 ----
              Node.allnodes[index] = repr(self.__dict__)
              if Node.debug is None:
!                 Node.debug = _get_StringIO()
                  #open( "debug4.out", "w" )
              Node.debug.write("create %s\n" % index)
***************
*** 80,84 ****
  
      def toxml(self):
!         writer = StringIO()
          self.writexml(writer)
          return writer.getvalue()
--- 89,93 ----
  
      def toxml(self):
!         writer = _get_StringIO()
          self.writexml(writer)
          return writer.getvalue()
***************
*** 91,104 ****
  
      def _get_firstChild(self):
!         return self.childNodes[0]
  
      def _get_lastChild(self):
!         return self.childNodes[-1]
  
      def insertBefore(self, newChild, refChild):
!         index = self.childNodes.index(refChild)
!         self.childNodes.insert(index, newChild)
!         if self._makeParentNodes:
!             newChild.parentNode = self
  
      def appendChild(self, node):
--- 100,127 ----
  
      def _get_firstChild(self):
!         if self.childNodes:
!             return self.childNodes[0]
  
      def _get_lastChild(self):
!         if self.childNodes:
!             return self.childNodes[-1]
  
      def insertBefore(self, newChild, refChild):
!         if refChild is None:
!             self.appendChild(newChild)
!         else:
!             index = self.childNodes.index(refChild)
!             self.childNodes.insert(index, newChild)
!             newChild.nextSibling = refChild
!             refChild.previousSibling = newChild
!             if index:
!                 node = self.childNodes[index-1]
!                 node.nextSibling = newChild
!                 newChild.previousSibling = node
!             else:
!                 newChild.previousSibling = None
!             if self._makeParentNodes:
!                 newChild.parentNode = self
!         return newChild
  
      def appendChild(self, node):
***************
*** 111,147 ****
          node.nextSibling = None
          self.childNodes.append(node)
          return node
  
      def replaceChild(self, newChild, oldChild):
          index = self.childNodes.index(oldChild)
!         self.childNodes[index] = oldChild
  
      def removeChild(self, oldChild):
!         index = self.childNodes.index(oldChild)
!         del self.childNodes[index]
  
      def cloneNode(self, deep):
          import new
!         clone = new.instance(self.__class__, self.__dict__)
!         clone.attributes = self.attributes.copy()
!         if not deep:
!             clone.childNodes = []
!         else:
!             clone.childNodes = map(lambda x: x.cloneNode, self.childNodes)
          return clone
  
      def unlink(self):
          self.parentNode = None
!         while self.childNodes:
!             self.childNodes[-1].unlink()
!             del self.childNodes[-1] # probably not most efficient!
          self.childNodes = None
          self.previousSibling = None
          self.nextSibling = None
-         if self.attributes:
-             for attr in self._attrs.values():
-                 self.removeAttributeNode(attr)
-             assert not len(self._attrs)
-             assert not len(self._attrsNS)
          if Node._debug:
              index = repr(id(self)) + repr(self.__class__)
--- 134,200 ----
          node.nextSibling = None
          self.childNodes.append(node)
+         if self._makeParentNodes:
+             node.parentNode = self
          return node
  
      def replaceChild(self, newChild, oldChild):
+         if newChild is oldChild:
+             return
          index = self.childNodes.index(oldChild)
!         self.childNodes[index] = newChild
!         if self._makeParentNodes:
!             newChild.parentNode = self
!             oldChild.parentNode = None
!         newChild.nextSibling = oldChild.nextSibling
!         newChild.previousSibling = oldChild.previousSibling
!         oldChild.newChild = None
!         oldChild.previousSibling = None
!         return oldChild
  
      def removeChild(self, oldChild):
!         self.childNodes.remove(oldChild)
!         if self._makeParentNodes:
!             oldChild.parentNode = None
!         return oldChild
  
+     def normalize(self):
+         if len(self.childNodes) > 1:
+             L = [self.childNodes[0]]
+             for child in self.childNodes[1:]:
+                 if (  child.nodeType == Node.TEXT_NODE
+                       and L[-1].nodeType == child.nodeType):
+                     # collapse text node
+                     node = L[-1]
+                     node.data = node.nodeValue = node.data + child.data
+                     node.nextSibling = child.nextSibling
+                     child.unlink()
+                 else:
+                     L[-1].nextSibling = child
+                     child.previousSibling = L[-1]
+                     L.append(child)
+                     child.normalize()
+             self.childNodes = L
+         elif self.childNodes:
+             # exactly one child -- just recurse
+             self.childNodes[0].normalize()
+ 
      def cloneNode(self, deep):
          import new
!         clone = new.instance(self.__class__, self.__dict__.copy())
!         if self._makeParentNodes:
!             clone.parentNode = None
!         clone.childNodes = []
!         if deep:
!             for child in self.childNodes:
!                 clone.appendChild(child.cloneNode(1))
          return clone
  
      def unlink(self):
          self.parentNode = None
!         for child in self.childNodes:
!             child.unlink()
          self.childNodes = None
          self.previousSibling = None
          self.nextSibling = None
          if Node._debug:
              index = repr(id(self)) + repr(self.__class__)
***************
*** 151,158 ****
  def _write_data(writer, data):
      "Writes datachars to writer."
!     data = string.replace(data, "&", "&")
!     data = string.replace(data, "<", "&lt;")
!     data = string.replace(data, "\"", "&quot;")
!     data = string.replace(data, ">", "&gt;")
      writer.write(data)
  
--- 204,212 ----
  def _write_data(writer, data):
      "Writes datachars to writer."
!     replace = _string.replace
!     data = replace(data, "&", "&amp;")
!     data = replace(data, "<", "&lt;")
!     data = replace(data, "\"", "&quot;")
!     data = replace(data, ">", "&gt;")
      writer.write(data)
  
***************
*** 175,186 ****
  class Attr(Node):
      nodeType = Node.ATTRIBUTE_NODE
  
      def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
          # skip setattr for performance
!         self.__dict__["localName"] = localName or qName
!         self.__dict__["nodeName"] = self.__dict__["name"] = qName
!         self.__dict__["namespaceURI"] = namespaceURI
!         self.__dict__["prefix"] = prefix
!         self.attributes = None
          Node.__init__(self)
          # nodeValue and value are set elsewhere
--- 229,242 ----
  class Attr(Node):
      nodeType = Node.ATTRIBUTE_NODE
+     attributes = None
+     ownerElement = None
  
      def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
          # skip setattr for performance
!         d = self.__dict__
!         d["localName"] = localName or qName
!         d["nodeName"] = d["name"] = qName
!         d["namespaceURI"] = namespaceURI
!         d["prefix"] = prefix
          Node.__init__(self)
          # nodeValue and value are set elsewhere
***************
*** 192,203 ****
              self.__dict__[name] = value
  
  class AttributeList:
!     """the attribute list is a transient interface to the underlying
!     dictionaries.  mutations here will change the underlying element's
      dictionary"""
      def __init__(self, attrs, attrsNS):
          self._attrs = attrs
          self._attrsNS = attrsNS
!         self.length = len(self._attrs.keys())
  
      def item(self, index):
--- 248,266 ----
              self.__dict__[name] = value
  
+     def cloneNode(self, deep):
+         clone = Node.cloneNode(self, deep)
+         if clone.__dict__.has_key("ownerElement"):
+             del clone.ownerElement
+         return clone
+ 
  class AttributeList:
!     """The attribute list is a transient interface to the underlying
!     dictionaries.  Mutations here will change the underlying element's
      dictionary"""
+ 
      def __init__(self, attrs, attrsNS):
          self._attrs = attrs
          self._attrsNS = attrsNS
!         self.length = len(self._attrs)
  
      def item(self, index):
***************
*** 208,217 ****
  
      def items(self):
!         return map(lambda node: (node.tagName, node.value),
!                    self._attrs.values())
  
      def itemsNS(self):
!         return map(lambda node: ((node.URI, node.localName), node.value),
!                    self._attrs.values())
  
      def keys(self):
--- 271,284 ----
  
      def items(self):
!         L = []
!         for node in self._attrs.values():
!             L.append((node.tagName, node.value))
!         return L
  
      def itemsNS(self):
!         L = []
!         for node in self._attrs.values():
!             L.append(((node.URI, node.localName), node.value))
!         return L
  
      def keys(self):
***************
*** 235,239 ****
      #FIXME: is it appropriate to return .value?
      def __getitem__(self, attname_or_tuple):
!         if type(attname_or_tuple) is types.TupleType:
              return self._attrsNS[attname_or_tuple]
          else:
--- 302,306 ----
      #FIXME: is it appropriate to return .value?
      def __getitem__(self, attname_or_tuple):
!         if type(attname_or_tuple) is _TupleType:
              return self._attrsNS[attname_or_tuple]
          else:
***************
*** 242,250 ****
      # same as set
      def __setitem__(self, attname, value):
!         if type(value) is types.StringType:
              node = Attr(attname)
!             node.value=value
          else:
!             assert isinstance(value, Attr) or type(value) is types.StringType
              node = value
          old = self._attrs.get(attname, None)
--- 309,318 ----
      # same as set
      def __setitem__(self, attname, value):
!         if type(value) in _StringTypes:
              node = Attr(attname)
!             node.value = value
          else:
!             if not isinstance(value, Attr):
!                 raise TypeError, "value must be a string or Attr object"
              node = value
          old = self._attrs.get(attname, None)
***************
*** 262,265 ****
--- 330,335 ----
  class Element(Node):
      nodeType = Node.ELEMENT_NODE
+     nextSibling = None
+     previousSibling = None
  
      def __init__(self, tagName, namespaceURI="", prefix="",
***************
*** 271,281 ****
          self.namespaceURI = namespaceURI
          self.nodeValue = None
  
!         self._attrs={}  # attributes are double-indexed:
!         self._attrsNS={}#    tagName -> Attribute
!                 #    URI,localName -> Attribute
!                 # in the future: consider lazy generation of attribute objects
!                 #                this is too tricky for now because of headaches
!                 #                with namespaces.
  
      def getAttribute(self, attname):
--- 341,370 ----
          self.namespaceURI = namespaceURI
          self.nodeValue = None
+ 
+         self._attrs = {}   # attributes are double-indexed:
+         self._attrsNS = {} #    tagName -> Attribute
+                            #    URI,localName -> Attribute
+                            # in the future: consider lazy generation
+                            # of attribute objects this is too tricky
+                            # for now because of headaches with
+                            # namespaces.
  
!     def cloneNode(self, deep):
!         clone = Node.cloneNode(self, deep)
!         clone._attrs = {}
!         clone._attrsNS = {}
!         for attr in self._attrs.values():
!             node = attr.cloneNode(1)
!             clone._attrs[node.name] = node
!             clone._attrsNS[(node.namespaceURI, node.localName)] = node
!             node.ownerElement = clone
!         return clone
! 
!     def unlink(self):
!         for attr in self._attrs.values():
!             attr.unlink()
!         self._attrs = None
!         self._attrsNS = None
!         Node.unlink(self)
  
      def getAttribute(self, attname):
***************
*** 297,301 ****
          attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
          self.setAttributeNode(attr)
-         # FIXME: return original node if something changed.
  
      def getAttributeNode(self, attrname):
--- 386,389 ----
***************
*** 306,309 ****
--- 394,399 ----
  
      def setAttributeNode(self, attr):
+         if attr.ownerElement not in (None, self):
+             raise ValueError, "attribute node already owned"
          old = self._attrs.get(attr.name, None)
          if old:
***************
*** 311,315 ****
          self._attrs[attr.name] = attr
          self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
!         # FIXME: return old value if something changed
  
      def removeAttribute(self, name):
--- 401,414 ----
          self._attrs[attr.name] = attr
          self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
! 
!         # This creates a circular reference, but Element.unlink()
!         # breaks the cycle since the references to the attribute
!         # dictionaries are tossed.
!         attr.ownerElement = self
! 
!         if old is not attr:
!             # It might have already been part of this node, in which case
!             # it doesn't represent a change, and should not be returned.
!             return old
  
      def removeAttribute(self, name):
***************
*** 335,348 ****
          return "<DOM Element: %s at %s>" % (self.tagName, id(self))
  
-     # undocumented
      def writexml(self, writer):
          writer.write("<" + self.tagName)
  
!         a_names = self._get_attributes().keys()
          a_names.sort()
  
          for a_name in a_names:
              writer.write(" %s=\"" % a_name)
!             _write_data(writer, self._get_attributes()[a_name].value)
              writer.write("\"")
          if self.childNodes:
--- 434,447 ----
          return "<DOM Element: %s at %s>" % (self.tagName, id(self))
  
      def writexml(self, writer):
          writer.write("<" + self.tagName)
  
!         attrs = self._get_attributes()
!         a_names = attrs.keys()
          a_names.sort()
  
          for a_name in a_names:
              writer.write(" %s=\"" % a_name)
!             _write_data(writer, attrs[a_name].value)
              writer.write("\"")
          if self.childNodes:
***************
*** 359,368 ****
  class Comment(Node):
      nodeType = Node.COMMENT_NODE
  
      def __init__(self, data):
          Node.__init__(self)
          self.data = self.nodeValue = data
-         self.nodeName = "#comment"
-         self.attributes = None
  
      def writexml(self, writer):
--- 458,467 ----
  class Comment(Node):
      nodeType = Node.COMMENT_NODE
+     nodeName = "#comment"
+     attributes = None
  
      def __init__(self, data):
          Node.__init__(self)
          self.data = self.nodeValue = data
  
      def writexml(self, writer):
***************
*** 371,374 ****
--- 470,474 ----
  class ProcessingInstruction(Node):
      nodeType = Node.PROCESSING_INSTRUCTION_NODE
+     attributes = None
  
      def __init__(self, target, data):
***************
*** 376,380 ****
          self.target = self.nodeName = target
          self.data = self.nodeValue = data
-         self.attributes = None
  
      def writexml(self, writer):
--- 476,479 ----
***************
*** 384,392 ****
      nodeType = Node.TEXT_NODE
      nodeName = "#text"
  
      def __init__(self, data):
          Node.__init__(self)
          self.data = self.nodeValue = data
-         self.attributes = None
  
      def __repr__(self):
--- 483,491 ----
      nodeType = Node.TEXT_NODE
      nodeName = "#text"
+     attributes = None
  
      def __init__(self, data):
          Node.__init__(self)
          self.data = self.nodeValue = data
  
      def __repr__(self):
***************
*** 401,406 ****
  
  def _nssplit(qualifiedName):
!     import string
!     fields = string.split(qualifiedName,':', 1)
      if len(fields) == 2:
          return fields
--- 500,504 ----
  
  def _nssplit(qualifiedName):
!     fields = _string.split(qualifiedName, ':', 1)
      if len(fields) == 2:
          return fields
***************
*** 410,421 ****
  class Document(Node):
      nodeType = Node.DOCUMENT_NODE
      documentElement = None
  
-     def __init__(self):
-         Node.__init__(self)
-         self.attributes = None
-         self.nodeName = "#document"
-         self.nodeValue = None
- 
      def appendChild(self, node):
          if node.nodeType == Node.ELEMENT_NODE:
--- 508,516 ----
  class Document(Node):
      nodeType = Node.DOCUMENT_NODE
+     nodeName = "#document"
+     nodeValue = None
+     attributes = None
      documentElement = None
  
      def appendChild(self, node):
          if node.nodeType == Node.ELEMENT_NODE:
***************
*** 424,429 ****
              else:
                  self.documentElement = node
!         Node.appendChild(self, node)
!         return node
  
      createElement = Element
--- 519,523 ----
              else:
                  self.documentElement = node
!         return Node.appendChild(self, node)
  
      createElement = Element
***************
*** 438,447 ****
  
      def createElementNS(self, namespaceURI, qualifiedName):
!         prefix,localName = _nssplit(qualifiedName)
!         return Element(qualifiedName, namespaceURI, prefix, localName)
  
      def createAttributeNS(self, namespaceURI, qualifiedName):
!         prefix,localName = _nssplit(qualifiedName)
!         return Attr(qualifiedName, namespaceURI, localName, prefix)
  
      def getElementsByTagNameNS(self, namespaceURI, localName):
--- 532,543 ----
  
      def createElementNS(self, namespaceURI, qualifiedName):
!         prefix, localName = _nssplit(qualifiedName)
!         return self.createElement(qualifiedName, namespaceURI,
!                                   prefix, localName)
  
      def createAttributeNS(self, namespaceURI, qualifiedName):
!         prefix, localName = _nssplit(qualifiedName)
!         return self.createAttribute(qualifiedName, namespaceURI,
!                                     localName, prefix)
  
      def getElementsByTagNameNS(self, namespaceURI, localName):
***************
*** 461,464 ****
--- 557,567 ----
              node.writexml(writer)
  
+ def _get_StringIO():
+     try:
+         from cStringIO import StringIO
+     except ImportError:
+         from StringIO import StringIO
+     return StringIO()
+ 
  def _doparse(func, args, kwargs):
      events = apply(func, args, kwargs)
***************
*** 469,475 ****
--- 572,580 ----
  def parse(*args, **kwargs):
      "Parse a file into a DOM by filename or file object"
+     from xml.dom import pulldom
      return _doparse(pulldom.parse, args, kwargs)
  
  def parseString(*args, **kwargs):
      "Parse a file into a DOM from a string"
+     from xml.dom import pulldom
      return _doparse(pulldom.parseString, args, kwargs)