[Python-checkins] cpython (3.2): Closes #2892: preserve iterparse events in case of SyntaxError.

florent.xicluna python-checkins at python.org
Tue Nov 1 23:35:17 CET 2011


http://hg.python.org/cpython/rev/23ffaf975267
changeset:   73291:23ffaf975267
branch:      3.2
parent:      73289:8f9f490b4777
user:        Florent Xicluna <florent.xicluna at gmail.com>
date:        Tue Nov 01 23:31:09 2011 +0100
summary:
  Closes #2892: preserve iterparse events in case of SyntaxError.

files:
  Lib/test/test_xml_etree.py   |   1 +
  Lib/xml/etree/ElementTree.py |  38 ++++++++++++++---------
  Misc/NEWS                    |   2 +
  Modules/_elementtree.c       |  38 ++++++++++++++---------
  4 files changed, 49 insertions(+), 30 deletions(-)


diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -754,6 +754,7 @@
     ...     print(action, elem.tag)
     ... except ET.ParseError as v:
     ...   print(v)
+    end document
     junk after document element: line 1, column 12
     """
 
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -1250,6 +1250,7 @@
         self._close_file = close_source
         self._events = []
         self._index = 0
+        self._error = None
         self.root = self._root = None
         self._parser = parser
         # wire up the parser for event reporting
@@ -1291,24 +1292,31 @@
         while 1:
             try:
                 item = self._events[self._index]
+                self._index += 1
+                return item
             except IndexError:
-                if self._parser is None:
-                    self.root = self._root
-                    if self._close_file:
-                        self._file.close()
-                    raise StopIteration
-                # load event buffer
-                del self._events[:]
-                self._index = 0
-                data = self._file.read(16384)
-                if data:
+                pass
+            if self._error:
+                e = self._error
+                self._error = None
+                raise e
+            if self._parser is None:
+                self.root = self._root
+                if self._close_file:
+                    self._file.close()
+                raise StopIteration
+            # load event buffer
+            del self._events[:]
+            self._index = 0
+            data = self._file.read(16384)
+            if data:
+                try:
                     self._parser.feed(data)
-                else:
-                    self._root = self._parser.close()
-                    self._parser = None
+                except SyntaxError as exc:
+                    self._error = exc
             else:
-                self._index = self._index + 1
-                return item
+                self._root = self._parser.close()
+                self._parser = None
 
     def __iter__(self):
         return self
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -66,6 +66,8 @@
 Library
 -------
 
+- Issue #2892: preserve iterparse events in case of SyntaxError.
+
 - Issue #670664: Fix HTMLParser to correctly handle the content of
   ``<script>...</script>`` and ``<style>...</style>``.
 
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -3000,6 +3000,7 @@
         "  self._file = file\n"
         "  self._events = []\n"
         "  self._index = 0\n"
+        "  self._error = None\n"
         "  self.root = self._root = None\n"
         "  b = cElementTree.TreeBuilder()\n"
         "  self._parser = cElementTree.XMLParser(b)\n"
@@ -3008,24 +3009,31 @@
         "  while 1:\n"
         "    try:\n"
         "      item = self._events[self._index]\n"
+        "      self._index += 1\n"
+        "      return item\n"
         "    except IndexError:\n"
-        "      if self._parser is None:\n"
-        "        self.root = self._root\n"
-        "        if self._close_file:\n"
-        "          self._file.close()\n"
-        "        raise StopIteration\n"
-        "      # load event buffer\n"
-        "      del self._events[:]\n"
-        "      self._index = 0\n"
-        "      data = self._file.read(16384)\n"
-        "      if data:\n"
+        "      pass\n"
+        "    if self._error:\n"
+        "      e = self._error\n"
+        "      self._error = None\n"
+        "      raise e\n"
+        "    if self._parser is None:\n"
+        "      self.root = self._root\n"
+        "      if self._close_file:\n"
+        "        self._file.close()\n"
+        "      raise StopIteration\n"
+        "    # load event buffer\n"
+        "    del self._events[:]\n"
+        "    self._index = 0\n"
+        "    data = self._file.read(16384)\n"
+        "    if data:\n"
+        "      try:\n"
         "        self._parser.feed(data)\n"
-        "      else:\n"
-        "        self._root = self._parser.close()\n"
-        "        self._parser = None\n"
+        "      except SyntaxError as exc:\n"
+        "        self._error = exc\n"
         "    else:\n"
-        "      self._index = self._index + 1\n"
-        "      return item\n"
+        "      self._root = self._parser.close()\n"
+        "      self._parser = None\n"
         " def __iter__(self):\n"
         "  return self\n"
         "cElementTree.iterparse = iterparse\n"

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list