[Python-checkins] cpython (merge default -> default): Merge

antoine.pitrou python-checkins at python.org
Tue Oct 4 19:15:58 CEST 2011


http://hg.python.org/cpython/rev/ec6ee2a82583
changeset:   72669:ec6ee2a82583
parent:      72668:e6cc71820bf3
parent:      72666:f39b26ca7f3d
user:        Antoine Pitrou <solipsis at pitrou.net>
date:        Tue Oct 04 19:11:34 2011 +0200
summary:
  Merge

files:
  Lib/sre_compile.py              |   4 +++-
  Lib/test/test_builtin.py        |   3 +--
  Lib/test/test_codeccallbacks.py |  16 ++++------------
  Lib/test/test_multibytecodec.py |   7 +------
  Lib/test/test_unicode.py        |  20 ++++----------------
  Tools/pybench/pybench.py        |   1 +
  Tools/unicode/comparecodecs.py  |   2 +-
  7 files changed, 15 insertions(+), 38 deletions(-)


diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -318,11 +318,13 @@
                 # XXX: could expand category
                 return charset # cannot compress
     except IndexError:
-        # non-BMP characters
+        # non-BMP characters; XXX now they should work
         return charset
     if negate:
         if sys.maxunicode != 65535:
             # XXX: negation does not work with big charsets
+            # XXX2: now they should work, but removing this will make the
+            # charmap 17 times bigger
             return charset
         for i in range(65536):
             charmap[i] = not charmap[i]
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -249,8 +249,7 @@
         self.assertEqual(chr(0xff), '\xff')
         self.assertRaises(ValueError, chr, 1<<24)
         self.assertEqual(chr(sys.maxunicode),
-                         str(('\\U%08x' % (sys.maxunicode)).encode("ascii"),
-                             'unicode-escape'))
+                         str('\\U0010ffff'.encode("ascii"), 'unicode-escape'))
         self.assertRaises(TypeError, chr)
         self.assertEqual(chr(0x0000FFFF), "\U0000FFFF")
         self.assertEqual(chr(0x00010000), "\U00010000")
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -138,22 +138,14 @@
     def test_backslashescape(self):
         # Does the same as the "unicode-escape" encoding, but with different
         # base encodings.
-        sin = "a\xac\u1234\u20ac\u8000"
-        if sys.maxunicode > 0xffff:
-            sin += chr(sys.maxunicode)
-        sout = b"a\\xac\\u1234\\u20ac\\u8000"
-        if sys.maxunicode > 0xffff:
-            sout += bytes("\\U%08x" % sys.maxunicode, "ascii")
+        sin = "a\xac\u1234\u20ac\u8000\U0010ffff"
+        sout = b"a\\xac\\u1234\\u20ac\\u8000\\U0010ffff"
         self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
 
-        sout = b"a\xac\\u1234\\u20ac\\u8000"
-        if sys.maxunicode > 0xffff:
-            sout += bytes("\\U%08x" % sys.maxunicode, "ascii")
+        sout = b"a\xac\\u1234\\u20ac\\u8000\\U0010ffff"
         self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
 
-        sout = b"a\xac\\u1234\xa4\\u8000"
-        if sys.maxunicode > 0xffff:
-            sout += bytes("\\U%08x" % sys.maxunicode, "ascii")
+        sout = b"a\xac\\u1234\xa4\\u8000\\U0010ffff"
         self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
 
     def test_decoding_callbacks(self):
diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py
--- a/Lib/test/test_multibytecodec.py
+++ b/Lib/test/test_multibytecodec.py
@@ -247,14 +247,9 @@
             self.assertFalse(any(x > 0x80 for x in e))
 
     def test_bug1572832(self):
-        if sys.maxunicode >= 0x10000:
-            myunichr = chr
-        else:
-            myunichr = lambda x: chr(0xD7C0+(x>>10)) + chr(0xDC00+(x&0x3FF))
-
         for x in range(0x10000, 0x110000):
             # Any ISO 2022 codec will cause the segfault
-            myunichr(x).encode('iso_2022_jp', 'ignore')
+            chr(x).encode('iso_2022_jp', 'ignore')
 
 class TestStateful(unittest.TestCase):
     text = '\u4E16\u4E16'
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -13,10 +13,6 @@
 from test import support, string_tests
 import _string
 
-# decorator to skip tests on narrow builds
-requires_wide_build = unittest.skipIf(sys.maxunicode == 65535,
-                                      'requires wide build')
-
 # Error handling (bad decoder return)
 def search_function(encoding):
     def decode1(input, errors="strict"):
@@ -519,7 +515,6 @@
                 self.assertFalse(meth(s), '%a.%s() is False' % (s, meth_name))
 
 
-    @requires_wide_build
     def test_lower(self):
         string_tests.CommonTest.test_lower(self)
         self.assertEqual('\U00010427'.lower(), '\U0001044F')
@@ -530,7 +525,6 @@
         self.assertEqual('X\U00010427x\U0001044F'.lower(),
                          'x\U0001044Fx\U0001044F')
 
-    @requires_wide_build
     def test_upper(self):
         string_tests.CommonTest.test_upper(self)
         self.assertEqual('\U0001044F'.upper(), '\U00010427')
@@ -541,7 +535,6 @@
         self.assertEqual('X\U00010427x\U0001044F'.upper(),
                          'X\U00010427X\U00010427')
 
-    @requires_wide_build
     def test_capitalize(self):
         string_tests.CommonTest.test_capitalize(self)
         self.assertEqual('\U0001044F'.capitalize(), '\U00010427')
@@ -554,7 +547,6 @@
         self.assertEqual('X\U00010427x\U0001044F'.capitalize(),
                          'X\U0001044Fx\U0001044F')
 
-    @requires_wide_build
     def test_title(self):
         string_tests.MixinStrUnicodeUserStringTest.test_title(self)
         self.assertEqual('\U0001044F'.title(), '\U00010427')
@@ -569,7 +561,6 @@
         self.assertEqual('X\U00010427x\U0001044F X\U00010427x\U0001044F'.title(),
                          'X\U0001044Fx\U0001044F X\U0001044Fx\U0001044F')
 
-    @requires_wide_build
     def test_swapcase(self):
         string_tests.CommonTest.test_swapcase(self)
         self.assertEqual('\U0001044F'.swapcase(), '\U00010427')
@@ -1114,15 +1105,12 @@
     def test_codecs_utf8(self):
         self.assertEqual(''.encode('utf-8'), b'')
         self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
-        if sys.maxunicode == 65535:
-            self.assertEqual('\ud800\udc02'.encode('utf-8'), b'\xf0\x90\x80\x82')
-            self.assertEqual('\ud84d\udc56'.encode('utf-8'), b'\xf0\xa3\x91\x96')
+        self.assertEqual('\U00010002'.encode('utf-8'), b'\xf0\x90\x80\x82')
+        self.assertEqual('\U00023456'.encode('utf-8'), b'\xf0\xa3\x91\x96')
         self.assertEqual('\ud800'.encode('utf-8', 'surrogatepass'), b'\xed\xa0\x80')
         self.assertEqual('\udc00'.encode('utf-8', 'surrogatepass'), b'\xed\xb0\x80')
-        if sys.maxunicode == 65535:
-            self.assertEqual(
-                ('\ud800\udc02'*1000).encode('utf-8'),
-                b'\xf0\x90\x80\x82'*1000)
+        self.assertEqual(('\U00010002'*10).encode('utf-8'),
+                         b'\xf0\x90\x80\x82'*10)
         self.assertEqual(
             '\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
             '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
diff --git a/Tools/pybench/pybench.py b/Tools/pybench/pybench.py
--- a/Tools/pybench/pybench.py
+++ b/Tools/pybench/pybench.py
@@ -107,6 +107,7 @@
         print('Getting machine details...')
     buildno, builddate = platform.python_build()
     python = platform.python_version()
+    # XXX this is now always UCS4, maybe replace it with 'PEP393' in 3.3+?
     if sys.maxunicode == 65535:
         # UCS2 build (standard)
         unitype = 'UCS2'
diff --git a/Tools/unicode/comparecodecs.py b/Tools/unicode/comparecodecs.py
--- a/Tools/unicode/comparecodecs.py
+++ b/Tools/unicode/comparecodecs.py
@@ -14,7 +14,7 @@
     print('Comparing encoding/decoding of   %r and   %r' % (encoding1, encoding2))
     mismatch = 0
     # Check encoding
-    for i in range(sys.maxunicode):
+    for i in range(sys.maxunicode+1):
         u = chr(i)
         try:
             c1 = u.encode(encoding1)

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list