[Python-checkins] cpython: Close #18990: remove root attribute from XMLPullParser

nick.coghlan python-checkins at python.org
Sat Sep 28 15:50:57 CEST 2013


http://hg.python.org/cpython/rev/c5e206b9df2e
changeset:   85817:c5e206b9df2e
user:        Nick Coghlan <ncoghlan at gmail.com>
date:        Sat Sep 28 23:50:35 2013 +1000
summary:
  Close #18990: remove root attribute from XMLPullParser

- this was an internal implementation detail for iterparse
- this has been changed to use a new private method instead
- XMLPullParser.close docs are now more explicit about not
  returning a root element and instead direct users towards
  read_events
- also added missing docstrings and clarified some details
  related to exactly *when* events are consumed from the
  internal queue

(Initial patch by Stefan Behnel)

files:
  Doc/library/xml.etree.elementtree.rst |  13 ++++-
  Lib/test/test_xml_etree.py            |  19 ++------
  Lib/xml/etree/ElementTree.py          |  32 ++++++++++----
  3 files changed, 37 insertions(+), 27 deletions(-)


diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst
--- a/Doc/library/xml.etree.elementtree.rst
+++ b/Doc/library/xml.etree.elementtree.rst
@@ -1031,15 +1031,22 @@
 
    .. method:: close()
 
-      Signal the parser that the data stream is terminated.
+      Signal the parser that the data stream is terminated. Unlike
+      :meth:`XMLParser.close`, this method always returns :const:`None`.
+      Any events not yet retrieved when the parser is closed can still be
+      read with :meth:`read_events`.
 
    .. method:: read_events()
 
       Iterate over the events which have been encountered in the data fed to the
       parser.  This method yields ``(event, elem)`` pairs, where *event* is a
       string representing the type of event (e.g. ``"end"``) and *elem* is the
-      encountered :class:`Element` object.  Events provided in a previous call
-      to :meth:`read_events` will not be yielded again.
+      encountered :class:`Element` object.
+
+      Events provided in a previous call to :meth:`read_events` will not be
+      yielded again. As events are consumed from the internal queue only as
+      they are retrieved from the iterator, multiple readers calling
+      :meth:`read_events` in parallel will have unpredictable results.
 
    .. note::
 
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -985,10 +985,7 @@
                     ])
                 self._feed(parser, "</root>\n", chunk_size)
                 self.assert_event_tags(parser, [('end', 'root')])
-                # Closing sets the `root` attribute
-                self.assertIs(parser.root, None)
-                parser.close()
-                self.assertEqual(parser.root.tag, 'root')
+                self.assertIsNone(parser.close())
 
     def test_feed_while_iterating(self):
         parser = ET.XMLPullParser()
@@ -1021,10 +1018,7 @@
             ])
         self._feed(parser, "</root>\n")
         self.assert_event_tags(parser, [('end', '{namespace}root')])
-        # Closing sets the `root` attribute
-        self.assertIs(parser.root, None)
-        parser.close()
-        self.assertEqual(parser.root.tag, '{namespace}root')
+        self.assertIsNone(parser.close())
 
     def test_ns_events(self):
         parser = ET.XMLPullParser(events=('start-ns', 'end-ns'))
@@ -1039,7 +1033,7 @@
         self._feed(parser, "<empty-element/>\n")
         self._feed(parser, "</root>\n")
         self.assertEqual(list(parser.read_events()), [('end-ns', None)])
-        parser.close()
+        self.assertIsNone(parser.close())
 
     def test_events(self):
         parser = ET.XMLPullParser(events=())
@@ -1064,10 +1058,8 @@
             ('end', '{foo}element'),
             ])
         self._feed(parser, "</root>")
-        parser.close()
-        self.assertIs(parser.root, None)
+        self.assertIsNone(parser.close())
         self.assert_event_tags(parser, [('end', 'root')])
-        self.assertEqual(parser.root.tag, 'root')
 
         parser = ET.XMLPullParser(events=('start',))
         self._feed(parser, "<!-- comment -->\n")
@@ -1085,8 +1077,7 @@
             ('start', '{foo}empty-element'),
             ])
         self._feed(parser, "</root>")
-        parser.close()
-        self.assertEqual(parser.root.tag, 'root')
+        self.assertIsNone(parser.close())
 
     def test_events_sequence(self):
         # Test that events can be some sequence that's not just a tuple or list
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -1220,7 +1220,6 @@
         # _elementtree.c expects a list, not a deque
         self._events_queue = []
         self._index = 0
-        self.root = self._root = None
         self._parser = _parser or XMLParser(target=TreeBuilder())
         # wire up the parser for event reporting
         if events is None:
@@ -1228,6 +1227,7 @@
         self._parser._setevents(self._events_queue, events)
 
     def feed(self, data):
+        """Feed encoded data to parser."""
         if self._parser is None:
             raise ValueError("feed() called after end of stream")
         if data:
@@ -1236,13 +1236,26 @@
             except SyntaxError as exc:
                 self._events_queue.append(exc)
 
+    def _close_and_return_root(self):
+        # iterparse needs this to set its root attribute properly :(
+        root = self._parser.close()
+        self._parser = None
+        return root
+
     def close(self):
-        self._root = self._parser.close()
-        self._parser = None
-        if self._index >= len(self._events_queue):
-            self.root = self._root
+        """Finish feeding data to parser.
+
+        Unlike XMLParser, does not return the root element. Use
+        read_events() to consume elements from XMLPullParser.
+        """
+        self._close_and_return_root()
 
     def read_events(self):
+        """Iterate over currently available (event, elem) pairs.
+
+        Events are consumed from the internal event queue as they are
+        retrieved from the iterator.
+        """
         events = self._events_queue
         while True:
             index = self._index
@@ -1254,6 +1267,7 @@
                 break
             index += 1
             # Compact the list in a O(1) amortized fashion
+            # As noted above, _elementree.c needs a list, not a deque
             if index * 2 >= len(events):
                 events[:index] = []
                 self._index = 0
@@ -1263,8 +1277,6 @@
                 raise event
             else:
                 yield event
-        if self._parser is None:
-            self.root = self._root
 
 
 class _IterParseIterator:
@@ -1275,14 +1287,14 @@
         self._parser = XMLPullParser(events=events, _parser=parser)
         self._file = source
         self._close_file = close_source
-        self.root = None
+        self.root = self._root = None
 
     def __next__(self):
         while 1:
             for event in self._parser.read_events():
                 return event
             if self._parser._parser is None:
-                self.root = self._parser.root
+                self.root = self._root
                 if self._close_file:
                     self._file.close()
                 raise StopIteration
@@ -1291,7 +1303,7 @@
             if data:
                 self._parser.feed(data)
             else:
-                self._parser.close()
+                self._root = self._parser._close_and_return_root()
 
     def __iter__(self):
         return self

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list