[XML-SIG] PyDOM performance

Carey Evans c.evans@clear.net.nz
25 Aug 1999 23:37:33 +1200


--=-=-=

Hi.

I've been rather disappointed with the speed when trying out the DOM
support in the XML 0.5.1 package.  To construct a tree of the fairly
simple document at

    http://home.clear.net.nz/pages/c.evans/diary/hols199901.xml

took about 45 seconds.  I tried out the CVS tree and got this down to
17.8 seconds, which is quite an impressive improvement by itself, when 
PyDOM doesn't seem to have changed much.

Looking at this with the profiler, dom/core.py spends a *lot* of time
in __getattr__ and __setattr__.  I didn't have anything better to do,
so I rewrote these methods and got the time down to 11.7 seconds.
I've attached the patch to do this below.

My questions are:

  Is what I'm doing in this patch actually working, or am I on the
  wrong track?

  And, is it worth doing anything to PyDOM, or would I be better off
  looking at 4DOM, for example?

Thanks.

-- 
	 Carey Evans  http://home.clear.net.nz/pages/c.evans/

	       "This is where your sanity gives in..."


--=-=-=
Content-Type: text/x-patch
Content-Disposition: attachment; filename=dom-core.diff

--- core.py.dist	Fri Aug 13 14:33:42 1999
+++ core.py	Wed Aug 25 23:03:37 1999
@@ -323,16 +323,18 @@
     # to attributes such as .parentNode are redirected into calls to 
     # get_parentNode or set_parentNode.
     def __getattr__(self, key):
-        if key[0:4] == 'get_' or key[0:4] == 'set_':
-            raise AttributeError, repr(key[4:])
-        func = getattr(self, 'get_'+key)
-        return func()
+        method = self._get_dict.get(key)
+        if method is not None:
+            return method(self)
+        else:
+            raise AttributeError, key
 
     def __setattr__(self, key, value):
-        if hasattr(self, 'set_'+key):
-            func = getattr(self, 'set_'+key)
-            func( value )
-        self.__dict__[key] = value
+        method = self._set_dict.get(key)
+        if method is not None:
+            method(self, value)
+        else:
+            self.__dict__[key] = value
 
     def __cmp__(self, other):
 	if isinstance(other, Node):
@@ -637,6 +639,19 @@
                       "%s is an ancestor of %s" % (repr(child), repr(parent) )
             p = p.get_parentNode()
 
+    # Dictionaries of allowed get/set properties.
+    _get_dict = {
+        'nodeName': get_nodeName, 'name': get_name,
+        'nodeValue': get_nodeValue, 'value': get_value,
+        'nodeType': get_nodeType, 'attributes': get_attributes,
+        'childNodes': get_childNodes, 'parentNode': get_parentNode,
+        'firstChild': get_firstChild, 'lastChild': get_lastChild,
+        'previousSibling': get_previousSibling,
+        'nextSibling': get_nextSibling,
+        'ownerDocument': get_ownerDocument,
+        }
+    _set_dict = {}
+
         
 class CharacterData(Node):
     # Attributes
@@ -733,7 +748,14 @@
         d.name = "#text"
         d.value = value
         return Text(d, self._document)
-    
+
+    # Dictionaries of allowed get/set properties.
+    _get_dict = Node._get_dict.copy()
+    _get_dict.update({ 'data': get_data, 'length': get_length })
+    _set_dict = Node._set_dict.copy()
+    _set_dict.update({ 'data': set_data, 'nodeValue': set_nodeValue })
+
+
 class Attr(Node):
     childNodeTypes = [TEXT_NODE, ENTITY_REFERENCE_NODE]
     
@@ -789,7 +811,23 @@
     def get_parentNode(self): return None
     def get_previousSibling(self): return None
     def get_nextSibling(self): return None
-    
+
+    # Dictionaries of allowed get/set properties.
+    _get_dict = Node._get_dict.copy()
+    _get_dict.update({
+        'nodeName': get_nodeName, 'name': get_name,
+        'nodeValue': get_nodeValue, 'value': get_value,
+        'specified': get_specified,
+        'parentNode': get_parentNode,
+        'previousSibling': get_previousSibling,
+        'nextSibling': get_nextSibling,
+        })
+    _set_dict = Node._set_dict.copy()
+    _set_dict.update({
+        'nodeValue': set_nodeValue, 'value': set_value,
+        })
+
+
 class Element(Node):
     childNodeTypes = [ELEMENT_NODE, PROCESSING_INSTRUCTION_NODE, COMMENT_NODE,
                       TEXT_NODE, CDATA_SECTION_NODE, ENTITY_REFERENCE_NODE]
@@ -971,6 +1009,11 @@
             if L[i].type == ELEMENT_NODE:
                 n = NODE_CLASS[ L[i].type ] (L[i], self._document)
                 n.normalize()
+
+    # Dictionaries of allowed get/set properties.
+    _get_dict = Node._get_dict.copy()
+    _get_dict.update({ 'tagName': get_tagName, 'attributes': get_attributes })
+
     
 class Text(CharacterData):
     childNodeTypes = []
@@ -1040,6 +1083,13 @@
 
     def toxml(self):
         return '<!DOCTYPE %s>\n' % (self._node.name,)
+
+    # Dictionaries of allowed get/set properties.
+    _get_dict = Node._get_dict.copy()
+    _get_dict.update({
+        'name': get_name, 'entities': get_entities,
+        'notations': get_notations })
+
         
 class Notation(Node):
     readonly = 1    # This is a read-only class
@@ -1061,7 +1111,11 @@
             return '<!NOTATION %s PUBLIC %s %s>' % (self._node.name,
                                                     self._node.publicId,
                                                     self._node.systemId)
-        
+
+    # Dictionaries of allowed get/set properties.
+    _get_dict = Node._get_dict.copy()
+    _get_dict.update({ 'publicId': get_publicId, 'systemId': get_systemId })
+
         
 class Entity(Node):
     readonly = 1    # This is a read-only class
@@ -1077,6 +1131,14 @@
     def get_notationName(self):
         return self._node.notationName
 
+    # Dictionaries of allowed get/set properties.
+    _get_dict = Node._get_dict.copy()
+    _get_dict.update({
+        'publicId': get_publicId, 'systemId': get_systemId,
+        'notationName': get_notationName
+        })
+
+
 class EntityReference(Node):
     childNodeTypes = [ELEMENT_NODE, PROCESSING_INSTRUCTION_NODE,
                       COMMENT_NODE, TEXT_NODE, CDATA_SECTION_NODE,
@@ -1106,6 +1168,12 @@
             raise NoModificationAllowedException("Read-only object")
         self._node.value = data
 
+    # Dictionaries of allowed get/set properties.
+    _get_dict = Node._get_dict.copy()
+    _get_dict.update({ 'target': get_target, 'data': get_data })
+    _set_dict = Node._set_dict.copy()
+    _set_dict.update({ 'data': get_data })
+
 
 class Document(Node):
     childNodeTypes = [ELEMENT_NODE, PROCESSING_INSTRUCTION_NODE,
@@ -1325,6 +1393,17 @@
 
 	Node.replaceChild(self, newChild, oldChild)
 
+    # Dictionaries of allowed get/set properties.
+    _get_dict = Node._get_dict.copy()
+    _get_dict.update({
+        'doctype': get_doctype,
+        'implementation': get_implementation,
+        'childNodes': get_childNodes,
+        'documentElement': get_documentElement,
+        'ownerDocument': get_ownerDocument,
+        })
+
+
 class DocumentFragment(Node):
     childNodeTypes = [ELEMENT_NODE, PROCESSING_INSTRUCTION_NODE,
                       COMMENT_NODE, TEXT_NODE, CDATA_SECTION_NODE,
@@ -1341,7 +1420,12 @@
             n = NODE_CLASS[ child.type ] (child, self._document)
             L.append(n.toxml())
         return string.join(L, "")
-    
+
+    # Dictionaries of allowed get/set properties.
+    _get_dict = Node._get_dict.copy()
+    _get_dict.update({ 'parentNode': get_parentNode })
+
+
 # Dictionary mapping types to the corresponding class object
 
 NODE_CLASS = {

--=-=-=--