[pypy-commit] pypy jitframe-on-heap: merge default

fijal noreply at buildbot.pypy.org
Sun Jan 20 16:01:23 CET 2013


Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: jitframe-on-heap
Changeset: r60241:53ce6999a6b1
Date: 2013-01-20 17:01 +0200
http://bitbucket.org/pypy/pypy/changeset/53ce6999a6b1/

Log:	merge default

diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py
--- a/pypy/module/unicodedata/interp_ucd.py
+++ b/pypy/module/unicodedata/interp_ucd.py
@@ -9,7 +9,7 @@
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.rlib.runicode import MAXUNICODE
 from rpython.rlib.unicodedata import unicodedb_5_2_0, unicodedb_3_2_0
-from rpython.rlib.unicodedata.ucd import code_to_unichr, ORD
+from rpython.rlib.runicode import code_to_unichr, ORD
 import sys
 
 
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -3,9 +3,91 @@
 from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
 from rpython.rlib.rarithmetic import r_uint, intmask
 from rpython.rlib.unicodedata import unicodedb
-from rpython.rlib.unicodedata.ucd import MAXUNICODE, UNICHR, BYTEORDER
+from rpython.rtyper.lltypesystem import lltype, rffi
 
 
+if rffi.sizeof(lltype.UniChar) == 4:
+    MAXUNICODE = 0x10ffff
+else:
+    MAXUNICODE = 0xffff
+
+BYTEORDER = sys.byteorder
+
+if MAXUNICODE > sys.maxunicode:
+    # A version of unichr which allows codes outside the BMP
+    # even on narrow unicode builds.
+    # It will be used when interpreting code on top of a UCS2 CPython,
+    # when sizeof(wchar_t) == 4.
+    # Note that Python3 uses a similar implementation.
+    def UNICHR(c):
+        assert not we_are_translated()
+        if c <= sys.maxunicode or c > MAXUNICODE:
+            return unichr(c)
+        else:
+            c -= 0x10000
+            return (unichr(0xD800 + (c >> 10)) +
+                    unichr(0xDC00 + (c & 0x03FF)))
+    UNICHR._flowspace_rewrite_directly_as_ = unichr
+    # ^^^ NB.: for translation, it's essential to use this hack instead
+    # of calling unichr() from UNICHR(), because unichr() detects if there
+    # is a "try:except ValueError" immediately around it.
+
+    def ORD(u):
+        assert not we_are_translated()
+        if isinstance(u, unicode) and len(u) == 2:
+            ch1 = ord(u[0])
+            ch2 = ord(u[1])
+            if 0xD800 <= ch1 <= 0xDBFF and 0xDC00 <= ch2 <= 0xDFFF:
+                return (((ch1 - 0xD800) << 10) | (ch2 - 0xDC00)) + 0x10000
+        return ord(u)
+    ORD._flowspace_rewrite_directly_as_ = ord
+
+else:
+    UNICHR = unichr
+    ORD = ord
+
+if MAXUNICODE > 0xFFFF:
+    def code_to_unichr(code):
+        if not we_are_translated() and sys.maxunicode == 0xFFFF:
+            # Host CPython is narrow build, generate surrogates
+            return UNICHR(code)
+        else:
+            return unichr(code)
+else:
+    def code_to_unichr(code):
+        # generate surrogates for large codes
+        return UNICHR(code)
+
+
+def UNICHR(c):
+    if c <= sys.maxunicode and c <= MAXUNICODE:
+        return unichr(c)
+    else:
+        c -= 0x10000
+        return (unichr(0xD800 + (c >> 10)) +
+                unichr(0xDC00 + (c & 0x03FF)))
+
+def ORD(u):
+    assert isinstance(u, unicode)
+    if len(u) == 1:
+        return ord(u[0])
+    elif len(u) == 2:
+        ch1 = ord(u[0])
+        ch2 = ord(u[1])
+        if 0xD800 <= ch1 <= 0xDBFF and 0xDC00 <= ch2 <= 0xDFFF:
+            return (((ch1 - 0xD800) << 10) | (ch2 - 0xDC00)) + 0x10000
+    raise ValueError
+
+def _STORECHAR(result, CH, byteorder):
+    hi = chr(((CH) >> 8) & 0xff)
+    lo = chr((CH) & 0xff)
+    if byteorder == 'little':
+        result.append(lo)
+        result.append(hi)
+    else:
+        result.append(hi)
+        result.append(lo)
+
 def default_unicode_error_decode(errors, encoding, msg, s,
                                  startingpos, endingpos):
     if errors == 'replace':
diff --git a/rpython/rlib/unicodedata/ucd.py b/rpython/rlib/unicodedata/ucd.py
deleted file mode 100644
--- a/rpython/rlib/unicodedata/ucd.py
+++ /dev/null
@@ -1,5 +0,0 @@
-
-import sys
-from rpython.rtyper.lltypesystem import lltype, rffi
-from rpython.rlib.objectmodel import we_are_translated
-


More information about the pypy-commit mailing list