[Python-checkins] r74429 - in python/trunk: Lib/test/test_pyexpat.py Misc/ACKS Misc/NEWS Modules/expat/xmltok_impl.c

brett.cannon python-checkins at python.org
Thu Aug 13 21:27:13 CEST 2009


Author: brett.cannon
Date: Thu Aug 13 21:27:12 2009
New Revision: 74429

Log:
Expat could crash if given the wrong kind of input by never stopping its
tokenizing step.

Thanks to Ivan Krstić for the patch.


Modified:
   python/trunk/Lib/test/test_pyexpat.py
   python/trunk/Misc/ACKS
   python/trunk/Misc/NEWS
   python/trunk/Modules/expat/xmltok_impl.c

Modified: python/trunk/Lib/test/test_pyexpat.py
==============================================================================
--- python/trunk/Lib/test/test_pyexpat.py	(original)
+++ python/trunk/Lib/test/test_pyexpat.py	Thu Aug 13 21:27:12 2009
@@ -559,6 +559,24 @@
         parser.Parse(xml2, 1)
         self.assertEquals(self.n, 4)
 
+class MalformedInputText(unittest.TestCase):
+    def test1(self):
+        xml = "\0\r\n"
+        parser = expat.ParserCreate()
+        try:
+            parser.Parse(xml, True)
+            self.fail()
+        except expat.ExpatError as e:
+            self.assertEquals(str(e), 'no element found: line 2, column 1')
+
+    def test2(self):
+        xml = "<?xml version\xc2\x85='1.0'?>\r\n"
+        parser = expat.ParserCreate()
+        try:
+            parser.Parse(xml, True)
+            self.fail()
+        except expat.ExpatError as e:
+            self.assertEquals(str(e), 'XML declaration not well-formed: line 1, column 14')
 
 def test_main():
     run_unittest(SetAttributeTest,
@@ -569,7 +587,8 @@
                  HandlerExceptionTest,
                  PositionTest,
                  sf1296433Test,
-                 ChardataBufferTest)
+                 ChardataBufferTest,
+                 MalformedInputText)
 
 if __name__ == "__main__":
     test_main()

Modified: python/trunk/Misc/ACKS
==============================================================================
--- python/trunk/Misc/ACKS	(original)
+++ python/trunk/Misc/ACKS	Thu Aug 13 21:27:12 2009
@@ -183,6 +183,7 @@
 Dima Dorfman
 Cesar Douady
 Dean Draayer
+Fred L. Drake, Jr.
 John DuBois
 Paul Dubois
 Graham Dumpleton
@@ -371,7 +372,6 @@
 Lucas de Jonge
 John Jorgensen
 Jens B. Jorgensen
-Fred L. Drake, Jr.
 Andreas Jung
 Tattoo Mabonzo K.
 Bob Kahn
@@ -408,6 +408,7 @@
 Michael Kremer
 Fabian Kreutz
 Hannu Krosing
+Ivan Krstić
 Andrew Kuchling
 Vladimir Kushnir
 Cameron Laird

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Thu Aug 13 21:27:12 2009
@@ -1227,6 +1227,9 @@
 Extension Modules
 -----------------
 
+- Fix a segfault in expat when given a specially crafted input lead to the
+  tokenizer not stopping.
+
 - Issue #6561: '\d' in a regex now matches only characters with
   Unicode category 'Nd' (Number, Decimal Digit).  Previously it also
   matched characters with category 'No'.

Modified: python/trunk/Modules/expat/xmltok_impl.c
==============================================================================
--- python/trunk/Modules/expat/xmltok_impl.c	(original)
+++ python/trunk/Modules/expat/xmltok_impl.c	Thu Aug 13 21:27:12 2009
@@ -1741,7 +1741,7 @@
                        const char *end,
                        POSITION *pos)
 {
-  while (ptr != end) {
+  while (ptr < end) {
     switch (BYTE_TYPE(enc, ptr)) {
 #define LEAD_CASE(n) \
     case BT_LEAD ## n: \


More information about the Python-checkins mailing list