[Python-checkins] cpython (merge 3.4 -> default): Issue #10590: Added tests for xml.sax.parse() and xml.sax.parseString().

serhiy.storchaka python-checkins at python.org
Thu Apr 2 22:22:08 CEST 2015


https://hg.python.org/cpython/rev/846c165cf643
changeset:   95404:846c165cf643
parent:      95401:407883c52bf3
parent:      95403:ca8666310eb3
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Thu Apr 02 23:07:19 2015 +0300
summary:
  Issue #10590: Added tests for xml.sax.parse() and xml.sax.parseString().

files:
  Lib/test/test_sax.py |  124 ++++++++++++++++++++++++++++++-
  1 files changed, 123 insertions(+), 1 deletions(-)


diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py
--- a/Lib/test/test_sax.py
+++ b/Lib/test/test_sax.py
@@ -16,10 +16,11 @@
 from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
 from io import BytesIO, StringIO
 import codecs
+import gc
 import os.path
 import shutil
 from test import support
-from test.support import findfile, run_unittest
+from test.support import findfile, run_unittest, TESTFN
 
 TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
 TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
@@ -95,6 +96,126 @@
         self.assertEqual(attrs["attr"], "val")
         self.assertEqual(attrs.getQNameByName("attr"), "attr")
 
+
+def xml_str(doc, encoding=None):
+    if encoding is None:
+        return doc
+    return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc)
+
+def xml_bytes(doc, encoding, decl_encoding=...):
+    if decl_encoding is ...:
+        decl_encoding = encoding
+    return xml_str(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace')
+
+def make_xml_file(doc, encoding, decl_encoding=...):
+    if decl_encoding is ...:
+        decl_encoding = encoding
+    with open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f:
+        f.write(xml_str(doc, decl_encoding))
+
+
+class ParseTest(unittest.TestCase):
+    data = '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>'
+
+    def tearDown(self):
+        support.unlink(TESTFN)
+
+    def check_parse(self, f):
+        from xml.sax import parse
+        result = StringIO()
+        parse(f, XMLGenerator(result, 'utf-8'))
+        self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))
+
+    def test_parse_text(self):
+        encodings = ('us-ascii', 'iso-8859-1', 'utf-8',
+                     'utf-16', 'utf-16le', 'utf-16be')
+        for encoding in encodings:
+            self.check_parse(StringIO(xml_str(self.data, encoding)))
+            make_xml_file(self.data, encoding)
+            with open(TESTFN, 'r', encoding=encoding) as f:
+                self.check_parse(f)
+            self.check_parse(StringIO(self.data))
+            make_xml_file(self.data, encoding, None)
+            with open(TESTFN, 'r', encoding=encoding) as f:
+                self.check_parse(f)
+
+    def test_parse_bytes(self):
+        # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
+        # UTF-16 is autodetected
+        encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
+        for encoding in encodings:
+            self.check_parse(BytesIO(xml_bytes(self.data, encoding)))
+            make_xml_file(self.data, encoding)
+            self.check_parse(TESTFN)
+            with open(TESTFN, 'rb') as f:
+                self.check_parse(f)
+            self.check_parse(BytesIO(xml_bytes(self.data, encoding, None)))
+            make_xml_file(self.data, encoding, None)
+            self.check_parse(TESTFN)
+            with open(TESTFN, 'rb') as f:
+                self.check_parse(f)
+        # accept UTF-8 with BOM
+        self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8')))
+        make_xml_file(self.data, 'utf-8-sig', 'utf-8')
+        self.check_parse(TESTFN)
+        with open(TESTFN, 'rb') as f:
+            self.check_parse(f)
+        self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', None)))
+        make_xml_file(self.data, 'utf-8-sig', None)
+        self.check_parse(TESTFN)
+        with open(TESTFN, 'rb') as f:
+            self.check_parse(f)
+        # accept data with declared encoding
+        self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1')))
+        make_xml_file(self.data, 'iso-8859-1')
+        self.check_parse(TESTFN)
+        with open(TESTFN, 'rb') as f:
+            self.check_parse(f)
+        # fail on non-UTF-8 incompatible data without declared encoding
+        with self.assertRaises(SAXException):
+            self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1', None)))
+        make_xml_file(self.data, 'iso-8859-1', None)
+        with support.check_warnings(('unclosed file', ResourceWarning)):
+            # XXX Failed parser leaks an opened file.
+            with self.assertRaises(SAXException):
+                self.check_parse(TESTFN)
+            # Collect leaked file.
+            gc.collect()
+        with open(TESTFN, 'rb') as f:
+            with self.assertRaises(SAXException):
+                self.check_parse(f)
+
+    def test_parse_InputSource(self):
+        # accept data without declared but with explicitly specified encoding
+        make_xml_file(self.data, 'iso-8859-1', None)
+        with open(TESTFN, 'rb') as f:
+            input = InputSource()
+            input.setByteStream(f)
+            input.setEncoding('iso-8859-1')
+            self.check_parse(input)
+
+    def check_parseString(self, s):
+        from xml.sax import parseString
+        result = StringIO()
+        parseString(s, XMLGenerator(result, 'utf-8'))
+        self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))
+
+    def test_parseString_bytes(self):
+        # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
+        # UTF-16 is autodetected
+        encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
+        for encoding in encodings:
+            self.check_parseString(xml_bytes(self.data, encoding))
+            self.check_parseString(xml_bytes(self.data, encoding, None))
+        # accept UTF-8 with BOM
+        self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))
+        self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None))
+        # accept data with declared encoding
+        self.check_parseString(xml_bytes(self.data, 'iso-8859-1'))
+        # fail on non-UTF-8 incompatible data without declared encoding
+        with self.assertRaises(SAXException):
+            self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None))
+
 class MakeParserTest(unittest.TestCase):
     def test_make_parser2(self):
         # Creating parsers several times in a row should succeed.
@@ -1115,6 +1236,7 @@
 
 def test_main():
     run_unittest(MakeParserTest,
+                 ParseTest,
                  SaxutilsTest,
                  PrepareInputSourceTest,
                  StringXmlgenTest,

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list