[pypy-commit] pypy py3k: also handle surrogates when hosted on a narrow build
pjenvey
noreply at buildbot.pypy.org
Fri May 23 02:27:05 CEST 2014
Author: Philip Jenvey <pjenvey at underboss.org>
Branch: py3k
Changeset: r71681:556155656b47
Date: 2014-05-22 17:26 -0700
http://bitbucket.org/pypy/pypy/changeset/556155656b47/
Log: also handle surrogates when hosted on a narrow build
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -11,6 +11,7 @@
else:
MAXUNICODE = 0xffff
+NARROW_HOST = not we_are_translated() and sys.maxunicode == 0xFFFF
BYTEORDER = sys.byteorder
# python 2.7 has a preview of py3k behavior, so those functions
@@ -63,7 +64,7 @@
if MAXUNICODE > 0xFFFF:
def code_to_unichr(code):
- if not we_are_translated() and sys.maxunicode == 0xFFFF:
+ if NARROW_HOST:
# Host CPython is narrow build, generate surrogates
return unichr_returns_surrogate(code)
else:
@@ -334,7 +335,8 @@
ch2 = ord(s[pos])
# Check for low surrogate and combine the two to
# form a UCS4 value
- if ((allow_surrogates or MAXUNICODE < 65536) and
+ if ((allow_surrogates or MAXUNICODE < 65536
+ or NARROW_HOST) and
ch <= 0xDBFF and 0xDC00 <= ch2 <= 0xDFFF):
ch3 = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000
assert ch3 >= 0
@@ -1342,8 +1344,7 @@
# The following logic is enabled only if MAXUNICODE == 0xffff, or
# for testing on top of a host Python where sys.maxunicode == 0xffff
- if ((MAXUNICODE < 65536 or
- (not we_are_translated() and sys.maxunicode < 65536))
+ if ((MAXUNICODE < 65536 or NARROW_HOST)
and 0xD800 <= oc < 0xDC00 and pos + 1 < size):
# Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes
pos += 1
More information about the pypy-commit
mailing list