[Python-checkins] cpython (merge 3.5 -> default): Issue #25902: Fixed various refcount issues in ElementTree iteration.

serhiy.storchaka python-checkins at python.org
Mon Dec 21 04:13:37 EST 2015


https://hg.python.org/cpython/rev/e09cb2af3092
changeset:   99654:e09cb2af3092
parent:      99652:e1418fc70e82
parent:      99653:00b6a13cfd70
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Mon Dec 21 11:11:12 2015 +0200
summary:
  Issue #25902: Fixed various refcount issues in ElementTree iteration.

files:
  Lib/test/test_xml_etree.py   |  51 +++++++++++++
  Lib/xml/etree/ElementTree.py |  10 +-
  Misc/NEWS                    |   2 +
  Modules/_elementtree.c       |  90 +++++++++++++++--------
  4 files changed, 117 insertions(+), 36 deletions(-)


diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1668,6 +1668,57 @@
         ET.register_namespace('test10777', 'http://myuri/')
         ET.register_namespace('test10777', 'http://myuri/')
 
+    def test_lost_text(self):
+        # Issue #25902: Borrowed text can disappear
+        class Text:
+            def __bool__(self):
+                e.text = 'changed'
+                return True
+
+        e = ET.Element('tag')
+        e.text = Text()
+        i = e.itertext()
+        t = next(i)
+        self.assertIsInstance(t, Text)
+        self.assertIsInstance(e.text, str)
+        self.assertEqual(e.text, 'changed')
+
+    def test_lost_tail(self):
+        # Issue #25902: Borrowed tail can disappear
+        class Text:
+            def __bool__(self):
+                e[0].tail = 'changed'
+                return True
+
+        e = ET.Element('root')
+        e.append(ET.Element('tag'))
+        e[0].tail = Text()
+        i = e.itertext()
+        t = next(i)
+        self.assertIsInstance(t, Text)
+        self.assertIsInstance(e[0].tail, str)
+        self.assertEqual(e[0].tail, 'changed')
+
+    def test_lost_elem(self):
+        # Issue #25902: Borrowed element can disappear
+        class Tag:
+            def __eq__(self, other):
+                e[0] = ET.Element('changed')
+                next(i)
+                return True
+
+        e = ET.Element('root')
+        e.append(ET.Element(Tag()))
+        e.append(ET.Element('tag'))
+        i = e.iter('tag')
+        try:
+            t = next(i)
+        except ValueError:
+            self.skipTest('generators are not reentrant')
+        self.assertIsInstance(t.tag, Tag)
+        self.assertIsInstance(e[0].tag, str)
+        self.assertEqual(e[0].tag, 'changed')
+
 
 # --------------------------------------------------------------------
 
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -429,12 +429,14 @@
         tag = self.tag
         if not isinstance(tag, str) and tag is not None:
             return
-        if self.text:
-            yield self.text
+        t = self.text
+        if t:
+            yield t
         for e in self:
             yield from e.itertext()
-            if e.tail:
-                yield e.tail
+            t = e.tail
+            if t:
+                yield t
 
 
 def SubElement(parent, tag, attrib={}, **extra):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -115,6 +115,8 @@
 Library
 -------
 
+- Issue #25902: Fixed various refcount issues in ElementTree iteration.
+
 - Issue #22227: The TarFile iterator is reimplemented using generator.
   This implementation is simpler that using class.
 
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -2070,6 +2070,7 @@
     ElementObject *cur_parent;
     Py_ssize_t child_index;
     int rc;
+    ElementObject *elem;
 
     while (1) {
         /* Handle the case reached in the beginning and end of iteration, where
@@ -2083,38 +2084,47 @@
                 PyErr_SetNone(PyExc_StopIteration);
                 return NULL;
             } else {
+                elem = it->root_element;
                 it->parent_stack = parent_stack_push_new(it->parent_stack,
-                                                         it->root_element);
+                                                         elem);
                 if (!it->parent_stack) {
                     PyErr_NoMemory();
                     return NULL;
                 }
 
+                Py_INCREF(elem);
                 it->root_done = 1;
                 rc = (it->sought_tag == Py_None);
                 if (!rc) {
-                    rc = PyObject_RichCompareBool(it->root_element->tag,
+                    rc = PyObject_RichCompareBool(elem->tag,
                                                   it->sought_tag, Py_EQ);
-                    if (rc < 0)
+                    if (rc < 0) {
+                        Py_DECREF(elem);
                         return NULL;
+                    }
                 }
                 if (rc) {
                     if (it->gettext) {
-                        PyObject *text = element_get_text(it->root_element);
-                        if (!text)
+                        PyObject *text = element_get_text(elem);
+                        if (!text) {
+                            Py_DECREF(elem);
                             return NULL;
+                        }
+                        Py_INCREF(text);
+                        Py_DECREF(elem);
                         rc = PyObject_IsTrue(text);
+                        if (rc > 0)
+                            return text;
+                        Py_DECREF(text);
                         if (rc < 0)
                             return NULL;
-                        if (rc) {
-                            Py_INCREF(text);
-                            return text;
-                        }
                     } else {
-                        Py_INCREF(it->root_element);
-                        return (PyObject *)it->root_element;
+                        return (PyObject *)elem;
                     }
                 }
+                else {
+                    Py_DECREF(elem);
+                }
             }
         }
 
@@ -2124,54 +2134,68 @@
         cur_parent = it->parent_stack->parent;
         child_index = it->parent_stack->child_index;
         if (cur_parent->extra && child_index < cur_parent->extra->length) {
-            ElementObject *child = (ElementObject *)
-                cur_parent->extra->children[child_index];
+            elem = (ElementObject *)cur_parent->extra->children[child_index];
             it->parent_stack->child_index++;
             it->parent_stack = parent_stack_push_new(it->parent_stack,
-                                                     child);
+                                                     elem);
             if (!it->parent_stack) {
                 PyErr_NoMemory();
                 return NULL;
             }
 
+            Py_INCREF(elem);
             if (it->gettext) {
-                PyObject *text = element_get_text(child);
-                if (!text)
+                PyObject *text = element_get_text(elem);
+                if (!text) {
+                    Py_DECREF(elem);
                     return NULL;
+                }
+                Py_INCREF(text);
+                Py_DECREF(elem);
                 rc = PyObject_IsTrue(text);
+                if (rc > 0)
+                    return text;
+                Py_DECREF(text);
                 if (rc < 0)
                     return NULL;
-                if (rc) {
-                    Py_INCREF(text);
-                    return text;
-                }
             } else {
                 rc = (it->sought_tag == Py_None);
                 if (!rc) {
-                    rc = PyObject_RichCompareBool(child->tag,
+                    rc = PyObject_RichCompareBool(elem->tag,
                                                   it->sought_tag, Py_EQ);
-                    if (rc < 0)
+                    if (rc < 0) {
+                        Py_DECREF(elem);
                         return NULL;
+                    }
                 }
                 if (rc) {
-                    Py_INCREF(child);
-                    return (PyObject *)child;
+                    return (PyObject *)elem;
                 }
+                Py_DECREF(elem);
             }
         }
         else {
             PyObject *tail;
-            ParentLocator *next = it->parent_stack->next;
+            ParentLocator *next;
             if (it->gettext) {
+                Py_INCREF(cur_parent);
                 tail = element_get_tail(cur_parent);
-                if (!tail)
+                if (!tail) {
+                    Py_DECREF(cur_parent);
                     return NULL;
+                }
+                Py_INCREF(tail);
+                Py_DECREF(cur_parent);
             }
-            else
+            else {
                 tail = Py_None;
-            Py_XDECREF(it->parent_stack->parent);
+                Py_INCREF(tail);
+            }
+            next = it->parent_stack->next;
+            cur_parent = it->parent_stack->parent;
             PyObject_Free(it->parent_stack);
             it->parent_stack = next;
+            Py_XDECREF(cur_parent);
 
             /* Note that extra condition on it->parent_stack->parent here;
              * this is because itertext() is supposed to only return *inner*
@@ -2179,12 +2203,14 @@
              */
             if (it->parent_stack->parent) {
                 rc = PyObject_IsTrue(tail);
+                if (rc > 0)
+                    return tail;
+                Py_DECREF(tail);
                 if (rc < 0)
                     return NULL;
-                if (rc) {
-                    Py_INCREF(tail);
-                    return tail;
-                }
+            }
+            else {
+                Py_DECREF(tail);
             }
         }
     }

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list