[Python-checkins] cpython (2.7): Issue #11159: SAX parser now supports unicode file names.

serhiy.storchaka python-checkins at python.org
Sat Feb 2 09:44:35 CET 2013


http://hg.python.org/cpython/rev/d3e7aea8a550
changeset:   81909:d3e7aea8a550
branch:      2.7
parent:      81901:2274f3196a44
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Sat Feb 02 10:28:30 2013 +0200
summary:
  Issue #11159: SAX parser now supports unicode file names.

files:
  Lib/test/test_sax.py       |  50 ++++++++++++++++++++++++++
  Lib/xml/sax/expatreader.py |   5 ++-
  Lib/xml/sax/saxutils.py    |  28 +++++++++++--
  Misc/NEWS                  |   2 +
  4 files changed, 79 insertions(+), 6 deletions(-)


diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py
--- a/Lib/test/test_sax.py
+++ b/Lib/test/test_sax.py
@@ -14,6 +14,8 @@
 from xml.sax.handler import feature_namespaces
 from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
 from cStringIO import StringIO
+import shutil
+import test.test_support as support
 from test.test_support import findfile, run_unittest
 import unittest
 
@@ -384,6 +386,22 @@
 
         self.assertEqual(result.getvalue(), xml_test_out)
 
+    @unittest.skipUnless(hasattr(support, 'TESTFN_UNICODE'),
+                         'Requires unicode filenames support')
+    def test_expat_file_unicode(self):
+        fname = support.TESTFN_UNICODE
+        shutil.copyfile(TEST_XMLFILE, fname)
+        self.addCleanup(support.unlink, fname)
+
+        parser = create_parser()
+        result = StringIO()
+        xmlgen = XMLGenerator(result)
+
+        parser.setContentHandler(xmlgen)
+        parser.parse(open(fname))
+
+        self.assertEqual(result.getvalue(), xml_test_out)
+
     # ===== DTDHandler support
 
     class TestDTDHandler:
@@ -523,6 +541,22 @@
 
         self.assertEqual(result.getvalue(), xml_test_out)
 
+    @unittest.skipUnless(hasattr(support, 'TESTFN_UNICODE'),
+                         'Requires unicode filenames support')
+    def test_expat_inpsource_sysid_unicode(self):
+        fname = support.TESTFN_UNICODE
+        shutil.copyfile(TEST_XMLFILE, fname)
+        self.addCleanup(support.unlink, fname)
+
+        parser = create_parser()
+        result = StringIO()
+        xmlgen = XMLGenerator(result)
+
+        parser.setContentHandler(xmlgen)
+        parser.parse(InputSource(fname))
+
+        self.assertEqual(result.getvalue(), xml_test_out)
+
     def test_expat_inpsource_stream(self):
         parser = create_parser()
         result = StringIO()
@@ -596,6 +630,22 @@
         self.assertEqual(parser.getSystemId(), TEST_XMLFILE)
         self.assertEqual(parser.getPublicId(), None)
 
+    @unittest.skipUnless(hasattr(support, 'TESTFN_UNICODE'),
+                         'Requires unicode filenames support')
+    def test_expat_locator_withinfo_unicode(self):
+        fname = support.TESTFN_UNICODE
+        shutil.copyfile(TEST_XMLFILE, fname)
+        self.addCleanup(support.unlink, fname)
+
+        result = StringIO()
+        xmlgen = XMLGenerator(result)
+        parser = create_parser()
+        parser.setContentHandler(xmlgen)
+        parser.parse(fname)
+
+        self.assertEqual(parser.getSystemId(), fname)
+        self.assertEqual(parser.getPublicId(), None)
+
 
 # ===========================================================================
 #
diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py
--- a/Lib/xml/sax/expatreader.py
+++ b/Lib/xml/sax/expatreader.py
@@ -108,7 +108,10 @@
 
     def prepareParser(self, source):
         if source.getSystemId() is not None:
-            self._parser.SetBase(source.getSystemId())
+            base = source.getSystemId()
+            if isinstance(base, unicode):
+                base = base.encode('utf-8')
+            self._parser.SetBase(base)
 
     # Redefined setContentHandler to allow changing handlers during parsing
 
diff --git a/Lib/xml/sax/saxutils.py b/Lib/xml/sax/saxutils.py
--- a/Lib/xml/sax/saxutils.py
+++ b/Lib/xml/sax/saxutils.py
@@ -4,6 +4,7 @@
 """
 
 import os, urlparse, urllib, types
+import sys
 import handler
 import xmlreader
 
@@ -293,14 +294,31 @@
             source.setSystemId(f.name)
 
     if source.getByteStream() is None:
-        sysid = source.getSystemId()
-        basehead = os.path.dirname(os.path.normpath(base))
-        sysidfilename = os.path.join(basehead, sysid)
-        if os.path.isfile(sysidfilename):
+        try:
+            sysid = source.getSystemId()
+            basehead = os.path.dirname(os.path.normpath(base))
+            encoding = sys.getfilesystemencoding()
+            if isinstance(sysid, unicode):
+                if not isinstance(basehead, unicode):
+                    try:
+                        basehead = basehead.decode(encoding)
+                    except UnicodeDecodeError:
+                        sysid = sysid.encode(encoding)
+            else:
+                if isinstance(basehead, unicode):
+                    try:
+                        sysid = sysid.decode(encoding)
+                    except UnicodeDecodeError:
+                        basehead = basehead.encode(encoding)
+            sysidfilename = os.path.join(basehead, sysid)
+            isfile = os.path.isfile(sysidfilename)
+        except UnicodeError:
+            isfile = False
+        if isfile:
             source.setSystemId(sysidfilename)
             f = open(sysidfilename, "rb")
         else:
-            source.setSystemId(urlparse.urljoin(base, sysid))
+            source.setSystemId(urlparse.urljoin(base, source.getSystemId()))
             f = urllib.urlopen(source.getSystemId())
 
         source.setByteStream(f)
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -202,6 +202,8 @@
 Library
 -------
 
+- Issue #11159: SAX parser now supports unicode file names.
+
 - Issue #6972: The zipfile module no longer overwrites files outside of
   its destination path when extracting malicious zip files.
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list