[pypy-svn] r48618 - pypy/branch/more-unicode-improvements/pypy/module/_codecs
cfbolz at codespeak.net
cfbolz at codespeak.net
Mon Nov 12 23:02:18 CET 2007
Author: cfbolz
Date: Mon Nov 12 23:02:18 2007
New Revision: 48618
Modified:
pypy/branch/more-unicode-improvements/pypy/module/_codecs/app_codecs.py
Log:
this is no longer needed either
Modified: pypy/branch/more-unicode-improvements/pypy/module/_codecs/app_codecs.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/module/_codecs/app_codecs.py (original)
+++ pypy/branch/more-unicode-improvements/pypy/module/_codecs/app_codecs.py Mon Nov 12 23:02:18 2007
@@ -604,129 +604,6 @@
return p
-def PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder='native', final=True):
-
- bo = 0 #/* assume native ordering by default */
- consumed = 0
- errmsg = ""
-
- if sys.byteorder == 'little':
- ihi = 1
- ilo = 0
- else:
- ihi = 0
- ilo = 1
-
-
- #/* Unpack UTF-16 encoded data */
-
-## /* Check for BOM marks (U+FEFF) in the input and adjust current
-## byte order setting accordingly. In native mode, the leading BOM
-## mark is skipped, in all other modes, it is copied to the output
-## stream as-is (giving a ZWNBSP character). */
- q = 0
- p = []
- if byteorder == 'native':
- if (size >= 2):
- bom = (ord(s[ihi]) << 8) | ord(s[ilo])
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
- if sys.byteorder == 'little':
- if (bom == 0xFEFF):
- q += 2
- bo = -1
- elif bom == 0xFFFE:
- q += 2
- bo = 1
- else:
- if bom == 0xFEFF:
- q += 2
- bo = 1
- elif bom == 0xFFFE:
- q += 2
- bo = -1
- elif byteorder == 'little':
- bo = -1
- else:
- bo = 1
-
- if (size == 0):
- return [u''], 0, bo
-
- if (bo == -1):
- #/* force LE */
- ihi = 1
- ilo = 0
-
- elif (bo == 1):
- #/* force BE */
- ihi = 0
- ilo = 1
-
- while (q < len(s)):
-
- #/* remaining bytes at the end? (size should be even) */
- if (len(s)-q<2):
- if not final:
- break
- errmsg = "truncated data"
- startinpos = q
- endinpos = len(s)
- unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True)
-# /* The remaining input chars are ignored if the callback
-## chooses to skip the input */
-
- ch = (ord(s[q+ihi]) << 8) | ord(s[q+ilo])
- q += 2
-
- if (ch < 0xD800 or ch > 0xDFFF):
- p += unichr(ch)
- continue
-
- #/* UTF-16 code pair: */
- if (q >= len(s)):
- errmsg = "unexpected end of data"
- startinpos = q-2
- endinpos = len(s)
- unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True)
-
- if (0xD800 <= ch and ch <= 0xDBFF):
- ch2 = (ord(s[q+ihi]) << 8) | ord(s[q+ilo])
- q += 2
- if (0xDC00 <= ch2 and ch2 <= 0xDFFF):
- #ifndef Py_UNICODE_WIDE
- if sys.maxunicode < 65536:
- p += unichr(ch)
- p += unichr(ch2)
- else:
- p += unichr((((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000)
- #endif
- continue
-
- else:
- errmsg = "illegal UTF-16 surrogate"
- startinpos = q-4
- endinpos = startinpos+2
- unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True)
-
- errmsg = "illegal encoding"
- startinpos = q-2
- endinpos = startinpos+2
- unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True)
-
- return p, q, bo
-
-# moved out of local scope, especially because it didn't
-# have any nested variables.
-
-def STORECHAR(CH, byteorder):
- hi = chr(((CH) >> 8) & 0xff)
- lo = chr((CH) & 0xff)
- if byteorder == 'little':
- return [lo, hi]
- else:
- return [hi, lo]
-
-
def PyUnicode_DecodeMBCS(s, size, errors):
pass
More information about the Pypy-commit
mailing list