[Python-checkins] cpython (3.3): Issue #16089: Allow ElementTree.TreeBuilder to work again with a non-Element

antoine.pitrou python-checkins at python.org
Thu Oct 4 19:56:17 CEST 2012


http://hg.python.org/cpython/rev/7bd9626d8b4f
changeset:   79460:7bd9626d8b4f
branch:      3.3
parent:      79457:aa7f9b4a7508
user:        Antoine Pitrou <solipsis at pitrou.net>
date:        Thu Oct 04 19:53:29 2012 +0200
summary:
  Issue #16089: Allow ElementTree.TreeBuilder to work again with a non-Element element_factory (fixes a regression in SimpleTAL).

files:
  Lib/test/test_xml_etree.py   |   44 +++++++++-
  Lib/xml/etree/ElementTree.py |    4 +-
  Misc/NEWS                    |    3 +
  Modules/_elementtree.c       |  107 ++++++++++++++++------
  4 files changed, 127 insertions(+), 31 deletions(-)


diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1893,10 +1893,23 @@
     sample1 = ('<!DOCTYPE html PUBLIC'
         ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
         ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
-        '<html>text</html>')
+        '<html>text<div>subtext</div>tail</html>')
 
     sample2 = '''<toplevel>sometext</toplevel>'''
 
+    def _check_sample1_element(self, e):
+        self.assertEqual(e.tag, 'html')
+        self.assertEqual(e.text, 'text')
+        self.assertEqual(e.tail, None)
+        self.assertEqual(e.attrib, {})
+        children = list(e)
+        self.assertEqual(len(children), 1)
+        child = children[0]
+        self.assertEqual(child.tag, 'div')
+        self.assertEqual(child.text, 'subtext')
+        self.assertEqual(child.tail, 'tail')
+        self.assertEqual(child.attrib, {})
+
     def test_dummy_builder(self):
         class BaseDummyBuilder:
             def close(self):
@@ -1929,7 +1942,7 @@
         parser.feed(self.sample1)
 
         e = parser.close()
-        self.assertEqual(e.tag, 'html')
+        self._check_sample1_element(e)
 
     def test_element_factory(self):
         lst = []
@@ -1945,6 +1958,33 @@
 
         self.assertEqual(lst, ['toplevel'])
 
+    def _check_element_factory_class(self, cls):
+        tb = ET.TreeBuilder(element_factory=cls)
+
+        parser = ET.XMLParser(target=tb)
+        parser.feed(self.sample1)
+        e = parser.close()
+        self.assertIsInstance(e, cls)
+        self._check_sample1_element(e)
+
+    def test_element_factory_subclass(self):
+        class MyElement(ET.Element):
+            pass
+        self._check_element_factory_class(MyElement)
+
+    def test_element_factory_pure_python_subclass(self):
+        # Mimick SimpleTAL's behaviour (issue #16089): both versions of
+        # TreeBuilder should be able to cope with a subclass of the
+        # pure Python Element class.
+        base = ET._Element
+        # Not from a C extension
+        self.assertEqual(base.__module__, 'xml.etree.ElementTree')
+        # Force some multiple inheritance with a C class to make things
+        # more interesting.
+        class MyElement(base, ValueError):
+            pass
+        self._check_element_factory_class(MyElement)
+
     def test_doctype(self):
         class DoctypeParser:
             _doctype = None
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -303,7 +303,9 @@
         self._children.insert(index, element)
 
     def _assert_is_element(self, e):
-        if not isinstance(e, Element):
+        # Need to refer to the actual Python implementation, not the
+        # shadowing C implementation.
+        if not isinstance(e, _Element):
             raise TypeError('expected an Element, not %s' % type(e).__name__)
 
     ##
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -34,6 +34,9 @@
 Library
 -------
 
+- Issue #16089: Allow ElementTree.TreeBuilder to work again with a non-Element
+  element_factory (fixes a regression in SimpleTAL).
+
 - Issue #16034: Fix performance regressions in the new `bz2.BZ2File`
   implementation.  Initial patch by Serhiy Storchaka.
 
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -1997,8 +1997,8 @@
 
     PyObject *root; /* root node (first created node) */
 
-    ElementObject *this; /* current node */
-    ElementObject *last; /* most recently created node */
+    PyObject *this; /* current node */
+    PyObject *last; /* most recently created node */
 
     PyObject *data; /* data collector (string or list), or NULL */
 
@@ -2030,9 +2030,9 @@
         t->root = NULL;
 
         Py_INCREF(Py_None);
-        t->this = (ElementObject *)Py_None;
+        t->this = Py_None;
         Py_INCREF(Py_None);
-        t->last = (ElementObject *)Py_None;
+        t->last = Py_None;
 
         t->data = NULL;
         t->element_factory = NULL;
@@ -2113,6 +2113,64 @@
 }
 
 /* -------------------------------------------------------------------- */
+/* helpers for handling of arbitrary element-like objects */
+
+static int
+treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
+                                     PyObject **dest, _Py_Identifier *name)
+{
+    if (Element_CheckExact(element)) {
+        Py_DECREF(JOIN_OBJ(*dest));
+        *dest = JOIN_SET(data, PyList_CheckExact(data));
+        return 0;
+    }
+    else {
+        PyObject *joined = list_join(data);
+        int r;
+        if (joined == NULL)
+            return -1;
+        r = _PyObject_SetAttrId(element, name, joined);
+        Py_DECREF(joined);
+        return r;
+    }
+}
+
+/* These two functions steal a reference to data */
+static int
+treebuilder_set_element_text(PyObject *element, PyObject *data)
+{
+    _Py_IDENTIFIER(text);
+    return treebuilder_set_element_text_or_tail(
+        element, data, &((ElementObject *) element)->text, &PyId_text);
+}
+
+static int
+treebuilder_set_element_tail(PyObject *element, PyObject *data)
+{
+    _Py_IDENTIFIER(tail);
+    return treebuilder_set_element_text_or_tail(
+        element, data, &((ElementObject *) element)->tail, &PyId_tail);
+}
+
+static int
+treebuilder_add_subelement(PyObject *element, PyObject *child)
+{
+    _Py_IDENTIFIER(append);
+    if (Element_CheckExact(element)) {
+        ElementObject *elem = (ElementObject *) element;
+        return element_add_subelement(elem, child);
+    }
+    else {
+        PyObject *res;
+        res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
+        if (res == NULL)
+            return -1;
+        Py_DECREF(res);
+        return 0;
+    }
+}
+
+/* -------------------------------------------------------------------- */
 /* handlers */
 
 LOCAL(PyObject*)
@@ -2124,15 +2182,12 @@
 
     if (self->data) {
         if (self->this == self->last) {
-            Py_DECREF(JOIN_OBJ(self->last->text));
-            self->last->text = JOIN_SET(
-                self->data, PyList_CheckExact(self->data)
-                );
-        } else {
-            Py_DECREF(JOIN_OBJ(self->last->tail));
-            self->last->tail = JOIN_SET(
-                self->data, PyList_CheckExact(self->data)
-                );
+            if (treebuilder_set_element_text(self->last, self->data))
+                return NULL;
+        }
+        else {
+            if (treebuilder_set_element_tail(self->last, self->data))
+                return NULL;
         }
         self->data = NULL;
     }
@@ -2146,10 +2201,10 @@
         return NULL;
     }
 
-    this = (PyObject*) self->this;
+    this = self->this;
 
     if (this != Py_None) {
-        if (element_add_subelement((ElementObject*) this, node) < 0)
+        if (treebuilder_add_subelement(this, node) < 0)
             goto error;
     } else {
         if (self->root) {
@@ -2175,11 +2230,11 @@
 
     Py_DECREF(this);
     Py_INCREF(node);
-    self->this = (ElementObject*) node;
+    self->this = node;
 
     Py_DECREF(self->last);
     Py_INCREF(node);
-    self->last = (ElementObject*) node;
+    self->last = node;
 
     if (self->start_event_obj) {
         PyObject* res;
@@ -2203,7 +2258,7 @@
 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
 {
     if (!self->data) {
-        if (self->last == (ElementObject*) Py_None) {
+        if (self->last == Py_None) {
             /* ignore calls to data before the first call to start */
             Py_RETURN_NONE;
         }
@@ -2243,15 +2298,11 @@
 
     if (self->data) {
         if (self->this == self->last) {
-            Py_DECREF(JOIN_OBJ(self->last->text));
-            self->last->text = JOIN_SET(
-                self->data, PyList_CheckExact(self->data)
-                );
+            if (treebuilder_set_element_text(self->last, self->data))
+                return NULL;
         } else {
-            Py_DECREF(JOIN_OBJ(self->last->tail));
-            self->last->tail = JOIN_SET(
-                self->data, PyList_CheckExact(self->data)
-                );
+            if (treebuilder_set_element_tail(self->last, self->data))
+                return NULL;
         }
         self->data = NULL;
     }
@@ -2271,8 +2322,8 @@
 
     Py_DECREF(self->last);
 
-    self->last = (ElementObject*) self->this;
-    self->this = (ElementObject*) item;
+    self->last = self->this;
+    self->this = item;
 
     if (self->end_event_obj) {
         PyObject* res;

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list