[Python-checkins] r85764 - python/branches/issue4388/Lib/test/test_cmd_line.py

victor.stinner python-checkins at python.org
Thu Oct 21 00:37:39 CEST 2010


Author: victor.stinner
Date: Thu Oct 21 00:37:38 2010
New Revision: 85764

Log:
test_cmd_line: more osx utf8 checks


Modified:
   python/branches/issue4388/Lib/test/test_cmd_line.py

Modified: python/branches/issue4388/Lib/test/test_cmd_line.py
==============================================================================
--- python/branches/issue4388/Lib/test/test_cmd_line.py	(original)
+++ python/branches/issue4388/Lib/test/test_cmd_line.py	Thu Oct 21 00:37:38 2010
@@ -119,14 +119,35 @@
 
     @unittest.skipUnless(sys.platform == 'darwin', 'test specific to Mac OS X')
     def test_osx_utf8_cmdline(self):
-        env = os.environ.copy()
-        # C locale gives ASCII locale encoding, but Python uses UTF-8
-        # to parse the command line arguments
-        env['LC_ALL'] = 'C'
-        non_ascii_char = "\xe9"
-        command = "assert('%s' == %a)" % (non_ascii_char, non_ascii_char)
-        command = command.encode('utf-8')
-        assert_python_ok('-c', command, env=env)
+        def check_output(text):
+            decoded = text.decode('utf8', 'surrogateescape')
+            expected = ascii(decoded).encode('ascii') + b'\n'
+
+            env = os.environ.copy()
+            # C locale gives ASCII locale encoding, but Python uses UTF-8
+            # to parse the command line arguments on Mac OS X
+            env['LC_ALL'] = 'C'
+
+            p = subprocess.Popen(
+                (sys.executable, "-c", "import sys; print(ascii(sys.argv[1]))", text),
+                stdout=subprocess.PIPE,
+                env=env)
+            stdout, stderr = p.communicate()
+            self.assertEqual(stdout, expected)
+            self.assertEqual(p.returncode, 0)
+
+        # test valid utf-8
+        text = 'e:\xe9, euro:\u20ac, non-bmp:\U0010ffff'.encode('utf-8')
+        check_output(text)
+
+        # test invalid utf-8
+        text = (
+            b'\xff'         # invalid byte
+            b'\xc3\xa9'     # valid utf-8 character
+            b'\xc3\xff'     # invalid byte sequence
+            b'\xed\xa0\x80' # lone surrogate character (invalid)
+        )
+        check_output(text)
 
     def test_unbuffered_output(self):
         # Test expected operation of the '-u' switch


More information about the Python-checkins mailing list