[XML-SIG] DOM toxml() method

Fred L. Drake Fred L. Drake, Jr." <fdrake@acm.org
Fri, 7 May 1999 16:37:35 -0400 (EDT)


  The toxml() methods in PyDOM perform a huge amount of string
copying; just about every operation is implemented as a string
addition, which requires a malloc() and data copying.  When a lot of
this is required, string.join() can be a lot faster when joining many
strings.
  I've modified xml.dom.core to use string.join(); the patch is
below.


  -Fred

--
Fred L. Drake, Jr.	     <fdrake@acm.org>
Corporation for National Research Initiatives

Index: core.py
===================================================================
RCS file: /projects/cvsroot/xml/dom/core.py,v
retrieving revision 1.45
diff -c -c -r1.45 core.py
*** core.py	1999/03/27 18:47:28	1.45
--- core.py	1999/05/07 20:33:59
***************
*** 743,755 ****
          return '<Attribute node %s>' % (repr(self._node.name),)
  
      def toxml(self):
!         s = ""
          for c in self._node.children:
              if c.type == TEXT_NODE:
                  s = s + c.value
              elif c.type == ENTITY_REFERENCE_NODE:
!                 s = s + '&' + c.name + ';'
!         return s
      
      def get_nodeName(self):
          return self._node.name
--- 743,756 ----
          return '<Attribute node %s>' % (repr(self._node.name),)
  
      def toxml(self):
!         L = []
          for c in self._node.children:
              if c.type == TEXT_NODE:
+                 L.append(c.value)
                  s = s + c.value
              elif c.type == ENTITY_REFERENCE_NODE:
!                 L.extend(["&", c.name, ";"])
!         return string.join(L, "")
      
      def get_nodeName(self):
          return self._node.name
***************
*** 798,822 ****
          return "<Element '%s'>" % (self._node.name)
  
      def toxml(self):
!         s = "<" + self._node.name
          for attr, attrnode in self._node.attributes.items():
!             s = s + " %s='" % (attr,)
              for value in attrnode.children:
                  if value.type == TEXT_NODE:
!                     s = s + escape(value.value) 
                  else:
                      n = NODE_CLASS[ value.type ] (value, self._document)
!                     s = s + value.toxml()
!             s = s + "'"
              
          if len(self._node.children) == 0:
!             return s + " />"
!         s = s + '>'
          for child in self._node.children:
              n = NODE_CLASS[ child.type ] (child, self._document)
!             s = s + n.toxml()
!         s = s + "</" + self._node.name + '>'
!         return s
  
      # Attributes
      
--- 799,825 ----
          return "<Element '%s'>" % (self._node.name)
  
      def toxml(self):
!         L = ["<", self._node.name]
          for attr, attrnode in self._node.attributes.items():
!             L.append(" %s='" % (attr,))
              for value in attrnode.children:
                  if value.type == TEXT_NODE:
!                     L.append(escape(value.value) )
                  else:
                      n = NODE_CLASS[ value.type ] (value, self._document)
!                     L.append(value.toxml())
!                     s = s +
!             L.append("'")
              
          if len(self._node.children) == 0:
!             L.append("/>")
!             return string.join(L, "")
!         L.append(">")
          for child in self._node.children:
              n = NODE_CLASS[ child.type ] (child, self._document)
!             L.append(n.toxml())
!         L.extend(["</", self._node.name, ">"])
!         return string.join(L, "")
  
      # Attributes
      
***************
*** 1109,1121 ****
  	self._document = node
  
      def toxml(self):
!         s = '<?xml version="1.0"?>\n'
          if self.documentType:
!             s = s + self.documentType.toxml()
          for n in self._node.children:
              n = NODE_CLASS[ n.type ] (n, self._document)
!             s = s + n.toxml()
!         return s
  
      def __repr__(self):
          return '<DOM Document; root=%s >' % (repr(self.get_documentElement()),)
--- 1112,1124 ----
  	self._document = node
  
      def toxml(self):
!         L = ['<?xml version="1.0"?>\n']
          if self.documentType:
!             L.append(self.documentType.toxml())
          for n in self._node.children:
              n = NODE_CLASS[ n.type ] (n, self._document)
!             L.append(n.toxml())
!         return string.join(L, "")
  
      def __repr__(self):
          return '<DOM Document; root=%s >' % (repr(self.get_documentElement()),)
***************
*** 1327,1337 ****
  	return None    
  
      def toxml(self):
!         s = ""
          for child in self._node.children:
              n = NODE_CLASS[ child.type ] (child, self._document)
!             s = s + n.toxml()
!         return s
      
  # Dictionary mapping types to the corresponding class object
  
--- 1330,1340 ----
  	return None    
  
      def toxml(self):
!         L = []
          for child in self._node.children:
              n = NODE_CLASS[ child.type ] (child, self._document)
!             L.append(n.toxml())
!         return string.join(L, "")
      
  # Dictionary mapping types to the corresponding class object