[XML-SIG] PyDOM performance
Carey Evans
c.evans@clear.net.nz
25 Aug 1999 23:37:33 +1200
--=-=-=
Hi.
I've been rather disappointed with the speed when trying out the DOM
support in the XML 0.5.1 package. To construct a tree of the fairly
simple document at
http://home.clear.net.nz/pages/c.evans/diary/hols199901.xml
took about 45 seconds. I tried out the CVS tree and got this down to
17.8 seconds, which is quite an impressive improvement by itself, when
PyDOM doesn't seem to have changed much.
Looking at this with the profiler, dom/core.py spends a *lot* of time
in __getattr__ and __setattr__. I didn't have anything better to do,
so I rewrote these methods and got the time down to 11.7 seconds.
I've attached the patch to do this below.
My questions are:
Is what I'm doing in this patch actually working, or am I on the
wrong track?
And, is it worth doing anything to PyDOM, or would I be better off
looking at 4DOM, for example?
Thanks.
--
Carey Evans http://home.clear.net.nz/pages/c.evans/
"This is where your sanity gives in..."
--=-=-=
Content-Type: text/x-patch
Content-Disposition: attachment; filename=dom-core.diff
--- core.py.dist Fri Aug 13 14:33:42 1999
+++ core.py Wed Aug 25 23:03:37 1999
@@ -323,16 +323,18 @@
# to attributes such as .parentNode are redirected into calls to
# get_parentNode or set_parentNode.
def __getattr__(self, key):
- if key[0:4] == 'get_' or key[0:4] == 'set_':
- raise AttributeError, repr(key[4:])
- func = getattr(self, 'get_'+key)
- return func()
+ method = self._get_dict.get(key)
+ if method is not None:
+ return method(self)
+ else:
+ raise AttributeError, key
def __setattr__(self, key, value):
- if hasattr(self, 'set_'+key):
- func = getattr(self, 'set_'+key)
- func( value )
- self.__dict__[key] = value
+ method = self._set_dict.get(key)
+ if method is not None:
+ method(self, value)
+ else:
+ self.__dict__[key] = value
def __cmp__(self, other):
if isinstance(other, Node):
@@ -637,6 +639,19 @@
"%s is an ancestor of %s" % (repr(child), repr(parent) )
p = p.get_parentNode()
+ # Dictionaries of allowed get/set properties.
+ _get_dict = {
+ 'nodeName': get_nodeName, 'name': get_name,
+ 'nodeValue': get_nodeValue, 'value': get_value,
+ 'nodeType': get_nodeType, 'attributes': get_attributes,
+ 'childNodes': get_childNodes, 'parentNode': get_parentNode,
+ 'firstChild': get_firstChild, 'lastChild': get_lastChild,
+ 'previousSibling': get_previousSibling,
+ 'nextSibling': get_nextSibling,
+ 'ownerDocument': get_ownerDocument,
+ }
+ _set_dict = {}
+
class CharacterData(Node):
# Attributes
@@ -733,7 +748,14 @@
d.name = "#text"
d.value = value
return Text(d, self._document)
-
+
+ # Dictionaries of allowed get/set properties.
+ _get_dict = Node._get_dict.copy()
+ _get_dict.update({ 'data': get_data, 'length': get_length })
+ _set_dict = Node._set_dict.copy()
+ _set_dict.update({ 'data': set_data, 'nodeValue': set_nodeValue })
+
+
class Attr(Node):
childNodeTypes = [TEXT_NODE, ENTITY_REFERENCE_NODE]
@@ -789,7 +811,23 @@
def get_parentNode(self): return None
def get_previousSibling(self): return None
def get_nextSibling(self): return None
-
+
+ # Dictionaries of allowed get/set properties.
+ _get_dict = Node._get_dict.copy()
+ _get_dict.update({
+ 'nodeName': get_nodeName, 'name': get_name,
+ 'nodeValue': get_nodeValue, 'value': get_value,
+ 'specified': get_specified,
+ 'parentNode': get_parentNode,
+ 'previousSibling': get_previousSibling,
+ 'nextSibling': get_nextSibling,
+ })
+ _set_dict = Node._set_dict.copy()
+ _set_dict.update({
+ 'nodeValue': set_nodeValue, 'value': set_value,
+ })
+
+
class Element(Node):
childNodeTypes = [ELEMENT_NODE, PROCESSING_INSTRUCTION_NODE, COMMENT_NODE,
TEXT_NODE, CDATA_SECTION_NODE, ENTITY_REFERENCE_NODE]
@@ -971,6 +1009,11 @@
if L[i].type == ELEMENT_NODE:
n = NODE_CLASS[ L[i].type ] (L[i], self._document)
n.normalize()
+
+ # Dictionaries of allowed get/set properties.
+ _get_dict = Node._get_dict.copy()
+ _get_dict.update({ 'tagName': get_tagName, 'attributes': get_attributes })
+
class Text(CharacterData):
childNodeTypes = []
@@ -1040,6 +1083,13 @@
def toxml(self):
return '<!DOCTYPE %s>\n' % (self._node.name,)
+
+ # Dictionaries of allowed get/set properties.
+ _get_dict = Node._get_dict.copy()
+ _get_dict.update({
+ 'name': get_name, 'entities': get_entities,
+ 'notations': get_notations })
+
class Notation(Node):
readonly = 1 # This is a read-only class
@@ -1061,7 +1111,11 @@
return '<!NOTATION %s PUBLIC %s %s>' % (self._node.name,
self._node.publicId,
self._node.systemId)
-
+
+ # Dictionaries of allowed get/set properties.
+ _get_dict = Node._get_dict.copy()
+ _get_dict.update({ 'publicId': get_publicId, 'systemId': get_systemId })
+
class Entity(Node):
readonly = 1 # This is a read-only class
@@ -1077,6 +1131,14 @@
def get_notationName(self):
return self._node.notationName
+ # Dictionaries of allowed get/set properties.
+ _get_dict = Node._get_dict.copy()
+ _get_dict.update({
+ 'publicId': get_publicId, 'systemId': get_systemId,
+ 'notationName': get_notationName
+ })
+
+
class EntityReference(Node):
childNodeTypes = [ELEMENT_NODE, PROCESSING_INSTRUCTION_NODE,
COMMENT_NODE, TEXT_NODE, CDATA_SECTION_NODE,
@@ -1106,6 +1168,12 @@
raise NoModificationAllowedException("Read-only object")
self._node.value = data
+ # Dictionaries of allowed get/set properties.
+ _get_dict = Node._get_dict.copy()
+ _get_dict.update({ 'target': get_target, 'data': get_data })
+ _set_dict = Node._set_dict.copy()
+ _set_dict.update({ 'data': get_data })
+
class Document(Node):
childNodeTypes = [ELEMENT_NODE, PROCESSING_INSTRUCTION_NODE,
@@ -1325,6 +1393,17 @@
Node.replaceChild(self, newChild, oldChild)
+ # Dictionaries of allowed get/set properties.
+ _get_dict = Node._get_dict.copy()
+ _get_dict.update({
+ 'doctype': get_doctype,
+ 'implementation': get_implementation,
+ 'childNodes': get_childNodes,
+ 'documentElement': get_documentElement,
+ 'ownerDocument': get_ownerDocument,
+ })
+
+
class DocumentFragment(Node):
childNodeTypes = [ELEMENT_NODE, PROCESSING_INSTRUCTION_NODE,
COMMENT_NODE, TEXT_NODE, CDATA_SECTION_NODE,
@@ -1341,7 +1420,12 @@
n = NODE_CLASS[ child.type ] (child, self._document)
L.append(n.toxml())
return string.join(L, "")
-
+
+ # Dictionaries of allowed get/set properties.
+ _get_dict = Node._get_dict.copy()
+ _get_dict.update({ 'parentNode': get_parentNode })
+
+
# Dictionary mapping types to the corresponding class object
NODE_CLASS = {
--=-=-=--