[pypy-svn] r48618 - pypy/branch/more-unicode-improvements/pypy/module/_codecs

cfbolz at codespeak.net cfbolz at codespeak.net
Mon Nov 12 23:02:18 CET 2007


Author: cfbolz
Date: Mon Nov 12 23:02:18 2007
New Revision: 48618

Modified:
   pypy/branch/more-unicode-improvements/pypy/module/_codecs/app_codecs.py
Log:
this is no longer needed either


Modified: pypy/branch/more-unicode-improvements/pypy/module/_codecs/app_codecs.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/module/_codecs/app_codecs.py	(original)
+++ pypy/branch/more-unicode-improvements/pypy/module/_codecs/app_codecs.py	Mon Nov 12 23:02:18 2007
@@ -604,129 +604,6 @@
     return p
 
 
-def PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder='native', final=True):
-
-    bo = 0       #/* assume native ordering by default */
-    consumed = 0
-    errmsg = ""
-
-    if sys.byteorder == 'little':
-        ihi = 1
-        ilo = 0
-    else:
-        ihi = 0
-        ilo = 1
-    
-
-    #/* Unpack UTF-16 encoded data */
-
-##    /* Check for BOM marks (U+FEFF) in the input and adjust current
-##       byte order setting accordingly. In native mode, the leading BOM
-##       mark is skipped, in all other modes, it is copied to the output
-##       stream as-is (giving a ZWNBSP character). */
-    q = 0
-    p = []
-    if byteorder == 'native':
-        if (size >= 2):
-            bom = (ord(s[ihi]) << 8) | ord(s[ilo])
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
-            if sys.byteorder == 'little':
-                if (bom == 0xFEFF):
-                    q += 2
-                    bo = -1
-                elif bom == 0xFFFE:
-                    q += 2
-                    bo = 1
-            else:
-                if bom == 0xFEFF:
-                    q += 2
-                    bo = 1
-                elif bom == 0xFFFE:
-                    q += 2
-                    bo = -1
-    elif byteorder == 'little':
-        bo = -1
-    else:
-        bo = 1
-        
-    if (size == 0):
-        return [u''], 0, bo
-    
-    if (bo == -1):
-        #/* force LE */
-        ihi = 1
-        ilo = 0
-
-    elif (bo == 1):
-        #/* force BE */
-        ihi = 0
-        ilo = 1
-
-    while (q < len(s)):
-    
-        #/* remaining bytes at the end? (size should be even) */
-        if (len(s)-q<2):
-            if not final:
-                break
-            errmsg = "truncated data"
-            startinpos = q
-            endinpos = len(s)
-            unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True)
-#           /* The remaining input chars are ignored if the callback
-##             chooses to skip the input */
-    
-        ch = (ord(s[q+ihi]) << 8) | ord(s[q+ilo])
-        q += 2
-    
-        if (ch < 0xD800 or ch > 0xDFFF):
-            p += unichr(ch)
-            continue
-    
-        #/* UTF-16 code pair: */
-        if (q >= len(s)):
-            errmsg = "unexpected end of data"
-            startinpos = q-2
-            endinpos = len(s)
-            unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True)
-
-        if (0xD800 <= ch and ch <= 0xDBFF):
-            ch2 = (ord(s[q+ihi]) << 8) | ord(s[q+ilo])
-            q += 2
-            if (0xDC00 <= ch2 and ch2 <= 0xDFFF):
-    #ifndef Py_UNICODE_WIDE
-                if sys.maxunicode < 65536:
-                    p += unichr(ch)
-                    p += unichr(ch2)
-                else:
-                    p += unichr((((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000)
-    #endif
-                continue
-
-            else:
-                errmsg = "illegal UTF-16 surrogate"
-                startinpos = q-4
-                endinpos = startinpos+2
-                unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True)
-           
-        errmsg = "illegal encoding"
-        startinpos = q-2
-        endinpos = startinpos+2
-        unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True)
-        
-    return p, q, bo
-
-# moved out of local scope, especially because it didn't
-# have any nested variables.
-
-def STORECHAR(CH, byteorder):
-    hi = chr(((CH) >> 8) & 0xff)
-    lo = chr((CH) & 0xff)
-    if byteorder == 'little':
-        return [lo, hi]
-    else:
-        return [hi, lo]
-
-
 def PyUnicode_DecodeMBCS(s, size, errors):
     pass
 



More information about the Pypy-commit mailing list