[pypy-svn] r75591 - pypy/branch/interplevel-codecs/pypy/rlib

afa at codespeak.net afa at codespeak.net
Fri Jun 25 15:49:12 CEST 2010


Author: afa
Date: Fri Jun 25 15:49:11 2010
New Revision: 75591

Modified:
   pypy/branch/interplevel-codecs/pypy/rlib/runicode.py
Log:
Move code, order functions by codec


Modified: pypy/branch/interplevel-codecs/pypy/rlib/runicode.py
==============================================================================
--- pypy/branch/interplevel-codecs/pypy/rlib/runicode.py	(original)
+++ pypy/branch/interplevel-codecs/pypy/rlib/runicode.py	Fri Jun 25 15:49:11 2010
@@ -55,8 +55,8 @@
     assert isinstance(u, unicode)
     raise UnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
 
-# ____________________________________________________________ 
-# unicode decoding
+# ____________________________________________________________
+# utf-8
 
 utf8_code_length = [
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -199,6 +199,51 @@
 
     return u"".join(result), pos
 
+def _encodeUCS4(result, ch):
+    # Encode UCS4 Unicode ordinals
+    result.append((chr((0xf0 | (ch >> 18)))))
+    result.append((chr((0x80 | ((ch >> 12) & 0x3f)))))
+    result.append((chr((0x80 | ((ch >> 6) & 0x3f)))))
+    result.append((chr((0x80 | (ch & 0x3f)))))
+
+def unicode_encode_utf_8(s, size, errors, errorhandler=None):
+    assert(size >= 0)
+    result = []
+    i = 0
+    while i < size:
+        ch = ord(s[i])
+        i += 1
+        if (ch < 0x80):
+            # Encode ASCII 
+            result.append(chr(ch))
+        elif (ch < 0x0800) :
+            # Encode Latin-1 
+            result.append(chr((0xc0 | (ch >> 6))))
+            result.append(chr((0x80 | (ch & 0x3f))))
+        else:
+            # Encode UCS2 Unicode ordinals
+            if (ch < 0x10000):
+                # Special case: check for high surrogate
+                if (0xD800 <= ch and ch <= 0xDBFF and i != size) :
+                    ch2 = ord(s[i])
+                    # Check for low surrogate and combine the two to
+                    # form a UCS4 value
+                    if (0xDC00 <= ch2 and ch2 <= 0xDFFF) :
+                        ch3 = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000
+                        i += 1
+                        _encodeUCS4(result, ch3)
+                        continue
+                # Fall through: handles isolated high surrogates
+                result.append((chr((0xe0 | (ch >> 12)))))
+                result.append((chr((0x80 | ((ch >> 6) & 0x3f)))))
+                result.append((chr((0x80 | (ch & 0x3f)))))
+                continue
+            else:
+                _encodeUCS4(result, ch)
+    return "".join(result)
+
+# ____________________________________________________________
+# utf-16
 
 def str_decode_utf_16(s, size, errors, final=True,
                       errorhandler=None):
@@ -320,6 +365,60 @@
             result.append(r)
     return u"".join(result), pos, bo
 
+def _STORECHAR(result, CH, byteorder):
+    hi = chr(((CH) >> 8) & 0xff)
+    lo = chr((CH) & 0xff)
+    if byteorder == 'little':
+        result.append(lo)
+        result.append(hi)
+    else:
+        result.append(hi)
+        result.append(lo)
+
+def unicode_encode_utf_16_helper(s, size, errors,
+                                 errorhandler=None,
+                                 byteorder='little'):
+    result = []
+    if (byteorder == 'native'):
+        _STORECHAR(result, 0xFEFF, BYTEORDER)
+        byteorder = BYTEORDER
+        
+    if size == 0:
+        return ""
+
+    i = 0
+    while i < size:
+        ch = ord(s[i])
+        i += 1
+        ch2 = 0
+        if (ch >= 0x10000) :
+            ch2 = 0xDC00 | ((ch-0x10000) & 0x3FF)
+            ch  = 0xD800 | ((ch-0x10000) >> 10)
+
+        _STORECHAR(result, ch, byteorder)
+        if ch2:
+            _STORECHAR(result, ch2, byteorder)
+
+    return "".join(result)
+
+def unicode_encode_utf_16(s, size, errors,
+                          errorhandler=None):
+    return unicode_encode_utf_16_helper(s, size, errors, errorhandler, "native")
+
+
+def unicode_encode_utf_16_be(s, size, errors,
+                             errorhandler=None):
+    return unicode_encode_utf_16_helper(s, size, errors, errorhandler, "big")
+
+
+def unicode_encode_utf_16_le(s, size, errors,
+                             errorhandler=None):
+    return unicode_encode_utf_16_helper(s, size, errors, errorhandler, "little")
+
+
+# ____________________________________________________________
+# ascii and latin-1
+
 def str_decode_latin_1(s, size, errors, final=False,
                        errorhandler=None):
     # latin1 is equivalent to the first 256 ordinals in Unicode.
@@ -350,54 +449,6 @@
     return u"".join(result), pos
 
 
-# ____________________________________________________________ 
-# unicode encoding 
-
-
-def unicode_encode_utf_8(s, size, errors, errorhandler=None):
-    assert(size >= 0)
-    result = []
-    i = 0
-    while i < size:
-        ch = ord(s[i])
-        i += 1
-        if (ch < 0x80):
-            # Encode ASCII 
-            result.append(chr(ch))
-        elif (ch < 0x0800) :
-            # Encode Latin-1 
-            result.append(chr((0xc0 | (ch >> 6))))
-            result.append(chr((0x80 | (ch & 0x3f))))
-        else:
-            # Encode UCS2 Unicode ordinals
-            if (ch < 0x10000):
-                # Special case: check for high surrogate
-                if (0xD800 <= ch and ch <= 0xDBFF and i != size) :
-                    ch2 = ord(s[i])
-                    # Check for low surrogate and combine the two to
-                    # form a UCS4 value
-                    if (0xDC00 <= ch2 and ch2 <= 0xDFFF) :
-                        ch3 = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000
-                        i += 1
-                        _encodeUCS4(result, ch3)
-                        continue
-                # Fall through: handles isolated high surrogates
-                result.append((chr((0xe0 | (ch >> 12)))))
-                result.append((chr((0x80 | ((ch >> 6) & 0x3f)))))
-                result.append((chr((0x80 | (ch & 0x3f)))))
-                continue
-            else:
-                _encodeUCS4(result, ch)
-    return "".join(result)
-
-def _encodeUCS4(result, ch):
-    # Encode UCS4 Unicode ordinals
-    result.append((chr((0xf0 | (ch >> 18)))))
-    result.append((chr((0x80 | ((ch >> 12) & 0x3f)))))
-    result.append((chr((0x80 | ((ch >> 6) & 0x3f)))))
-    result.append((chr((0x80 | (ch & 0x3f)))))
-
-
 def unicode_encode_ucs1_helper(p, size, errors,
                                errorhandler=None, limit=256):
     if errorhandler is None:
@@ -440,57 +491,6 @@
     return res
 
 
-def _STORECHAR(result, CH, byteorder):
-    hi = chr(((CH) >> 8) & 0xff)
-    lo = chr((CH) & 0xff)
-    if byteorder == 'little':
-        result.append(lo)
-        result.append(hi)
-    else:
-        result.append(hi)
-        result.append(lo)
-
-def unicode_encode_utf_16_helper(s, size, errors,
-                                 errorhandler=None,
-                                 byteorder='little'):
-    result = []
-    if (byteorder == 'native'):
-        _STORECHAR(result, 0xFEFF, BYTEORDER)
-        byteorder = BYTEORDER
-        
-    if size == 0:
-        return ""
-
-    i = 0
-    while i < size:
-        ch = ord(s[i])
-        i += 1
-        ch2 = 0
-        if (ch >= 0x10000) :
-            ch2 = 0xDC00 | ((ch-0x10000) & 0x3FF)
-            ch  = 0xD800 | ((ch-0x10000) >> 10)
-
-        _STORECHAR(result, ch, byteorder)
-        if ch2:
-            _STORECHAR(result, ch2, byteorder)
-
-    return "".join(result)
-
-def unicode_encode_utf_16(s, size, errors,
-                          errorhandler=None):
-    return unicode_encode_utf_16_helper(s, size, errors, errorhandler, "native")
-
-
-def unicode_encode_utf_16_be(s, size, errors,
-                             errorhandler=None):
-    return unicode_encode_utf_16_helper(s, size, errors, errorhandler, "big")
-
-
-def unicode_encode_utf_16_le(s, size, errors,
-                             errorhandler=None):
-    return unicode_encode_utf_16_helper(s, size, errors, errorhandler, "little")
-
-
 # ____________________________________________________________
 # MBCS codecs for Windows
 



More information about the Pypy-commit mailing list