[pypy-commit] pypy py3k: also handle surrogates when hosted on a narrow build

pjenvey noreply at buildbot.pypy.org
Fri May 23 02:27:05 CEST 2014


Author: Philip Jenvey <pjenvey at underboss.org>
Branch: py3k
Changeset: r71681:556155656b47
Date: 2014-05-22 17:26 -0700
http://bitbucket.org/pypy/pypy/changeset/556155656b47/

Log:	also handle surrogates when hosted on a narrow build

diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -11,6 +11,7 @@
 else:
     MAXUNICODE = 0xffff
 
+NARROW_HOST = not we_are_translated() and sys.maxunicode == 0xFFFF
 BYTEORDER = sys.byteorder
 
 # python 2.7 has a preview of py3k behavior, so those functions
@@ -63,7 +64,7 @@
 
 if MAXUNICODE > 0xFFFF:
     def code_to_unichr(code):
-        if not we_are_translated() and sys.maxunicode == 0xFFFF:
+        if NARROW_HOST:
             # Host CPython is narrow build, generate surrogates
             return unichr_returns_surrogate(code)
         else:
@@ -334,7 +335,8 @@
                         ch2 = ord(s[pos])
                         # Check for low surrogate and combine the two to
                         # form a UCS4 value
-                        if ((allow_surrogates or MAXUNICODE < 65536) and
+                        if ((allow_surrogates or MAXUNICODE < 65536
+                             or NARROW_HOST) and
                             ch <= 0xDBFF and 0xDC00 <= ch2 <= 0xDFFF):
                             ch3 = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000
                             assert ch3 >= 0
@@ -1342,8 +1344,7 @@
 
             # The following logic is enabled only if MAXUNICODE == 0xffff, or
             # for testing on top of a host Python where sys.maxunicode == 0xffff
-            if ((MAXUNICODE < 65536 or
-                    (not we_are_translated() and sys.maxunicode < 65536))
+            if ((MAXUNICODE < 65536 or NARROW_HOST)
                 and 0xD800 <= oc < 0xDC00 and pos + 1 < size):
                 # Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes
                 pos += 1


More information about the pypy-commit mailing list