[pypy-svn] r48913 - in pypy/branch/ropes-unicode/pypy: module/_codecs rlib rlib/test
cfbolz at codespeak.net
cfbolz at codespeak.net
Wed Nov 21 19:46:22 CET 2007
Author: cfbolz
Date: Wed Nov 21 19:46:21 2007
New Revision: 48913
Modified:
pypy/branch/ropes-unicode/pypy/module/_codecs/interp_codecs.py
pypy/branch/ropes-unicode/pypy/rlib/runicode.py
pypy/branch/ropes-unicode/pypy/rlib/test/test_runicode.py
Log:
hack even differently, by separating the decode from the encode error handler
Modified: pypy/branch/ropes-unicode/pypy/module/_codecs/interp_codecs.py
==============================================================================
--- pypy/branch/ropes-unicode/pypy/module/_codecs/interp_codecs.py (original)
+++ pypy/branch/ropes-unicode/pypy/module/_codecs/interp_codecs.py Wed Nov 21 19:46:21 2007
@@ -8,11 +8,12 @@
self.codec_search_cache = {}
self.codec_error_registry = {}
self.codec_need_encodings = True
- self.error_handler = self.make_errorhandler(space)
+ self.decode_error_handler = self.make_errorhandler(space, True)
+ self.encode_error_handler = self.make_errorhandler(space, False)
- def make_errorhandler(self, space):
+ def make_errorhandler(self, space, decode):
def unicode_call_errorhandler(errors, encoding, reason, input,
- startpos, endpos, decode=True):
+ startpos, endpos):
w_errorhandler = lookup_error(space, errors)
if decode:
@@ -205,7 +206,7 @@
def wrap_encoder(space, uni, errors="strict"):
state = space.fromcache(CodecState)
func = getattr(runicode, rname)
- result = func(uni, len(uni), errors, state.error_handler)
+ result = func(uni, len(uni), errors, state.encode_error_handler)
return space.newtuple([space.wrap(result), space.wrap(len(result))])
wrap_encoder.unwrap_spec = [ObjSpace, unicode, str]
globals()[name] = wrap_encoder
@@ -218,7 +219,7 @@
state = space.fromcache(CodecState)
func = getattr(runicode, rname)
result, consumed = func(string, len(string), errors,
- final, state.error_handler)
+ final, state.decode_error_handler)
return space.newtuple([space.wrap(result), space.wrap(consumed)])
wrap_decoder.unwrap_spec = [ObjSpace, str, str, W_Root]
globals()[name] = wrap_decoder
@@ -259,7 +260,7 @@
if final:
consumed = 0
res, consumed, byteorder = runicode.str_decode_utf_16_helper(
- data, len(data), errors, final, state.error_handler, byteorder)
+ data, len(data), errors, final, state.decode_error_handler, byteorder)
return space.newtuple([space.wrap(res), space.wrap(consumed),
space.wrap(byteorder)])
utf_16_ex_decode.unwrap_spec = [ObjSpace, str, str, int, W_Root]
Modified: pypy/branch/ropes-unicode/pypy/rlib/runicode.py
==============================================================================
--- pypy/branch/ropes-unicode/pypy/rlib/runicode.py (original)
+++ pypy/branch/ropes-unicode/pypy/rlib/runicode.py Wed Nov 21 19:46:21 2007
@@ -5,14 +5,17 @@
BYTEORDER = sys.byteorder
-def raise_unicode_exception(errors, encoding, msg, s, startingpos, endingpos,
- decode=True):
- if decode:
- raise UnicodeDecodeError(
- encoding, s[startingpos], startingpos, endingpos, msg)
- else:
- raise UnicodeEncodeError(
- encoding, s[startingpos], startingpos, endingpos, msg)
+def raise_unicode_exception_decode(errors, encoding, msg, s,
+ startingpos, endingpos):
+ assert isinstance(s, str)
+ raise UnicodeDecodeError(
+ encoding, s[startingpos], startingpos, endingpos, msg)
+
+def raise_unicode_exception_encode(errors, encoding, msg, u,
+ startingpos, endingpos):
+ assert isinstance(u, unicode)
+ raise UnicodeEncodeError(
+ encoding, u[startingpos], startingpos, endingpos, msg)
# ____________________________________________________________
# unicode decoding
@@ -37,7 +40,7 @@
]
def str_decode_utf_8(s, size, errors, final=False,
- errorhandler=raise_unicode_exception):
+ errorhandler=raise_unicode_exception_decode):
if (size == 0):
return u'', 0
result = []
@@ -158,25 +161,25 @@
def str_decode_utf_16(s, size, errors, final=True,
- errorhandler=raise_unicode_exception):
+ errorhandler=raise_unicode_exception_decode):
result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final,
errorhandler, "native")
return result, length
def str_decode_utf_16_be(s, size, errors, final=True,
- errorhandler=raise_unicode_exception):
+ errorhandler=raise_unicode_exception_decode):
result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final,
errorhandler, "big")
return result, length
def str_decode_utf_16_le(s, size, errors, final=True,
- errorhandler=raise_unicode_exception):
+ errorhandler=raise_unicode_exception_decode):
result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final,
errorhandler, "little")
return result, length
def str_decode_utf_16_helper(s, size, errors, final=True,
- errorhandler=raise_unicode_exception,
+ errorhandler=raise_unicode_exception_decode,
byteorder="native"):
bo = 0
@@ -277,7 +280,7 @@
return u"".join(result), pos, bo
def str_decode_latin_1(s, size, errors, final=False,
- errorhandler=raise_unicode_exception):
+ errorhandler=raise_unicode_exception_decode):
# latin1 is equivalent to the first 256 ordinals in Unicode.
pos = 0
result = []
@@ -288,7 +291,7 @@
def str_decode_ascii(s, size, errors, final=False,
- errorhandler=raise_unicode_exception):
+ errorhandler=raise_unicode_exception_decode):
# ASCII is equivalent to the first 128 ordinals in Unicode.
result = []
pos = 0
@@ -308,7 +311,7 @@
# unicode encoding
-def unicode_encode_utf_8(s, size, errors, errorhandler=raise_unicode_exception):
+def unicode_encode_utf_8(s, size, errors, errorhandler=raise_unicode_exception_encode):
assert(size >= 0)
result = []
i = 0
@@ -353,7 +356,7 @@
def unicode_encode_ucs1_helper(p, size, errors,
- errorhandler=raise_unicode_exception, limit=256):
+ errorhandler=raise_unicode_exception_encode, limit=256):
if limit == 256:
reason = "ordinal not in range(256)"
encoding = "latin-1"
@@ -378,16 +381,16 @@
while collend < len(p) and ord(p[collend]) >= limit:
collend += 1
r, pos = errorhandler(errors, encoding, reason, p,
- collstart, collend, False)
+ collstart, collend)
result.append(r)
return "".join(result)
-def unicode_encode_latin_1(p, size, errors, errorhandler=raise_unicode_exception):
+def unicode_encode_latin_1(p, size, errors, errorhandler=raise_unicode_exception_encode):
res = unicode_encode_ucs1_helper(p, size, errors, errorhandler, 256)
return res
-def unicode_encode_ascii(p, size, errors, errorhandler=raise_unicode_exception):
+def unicode_encode_ascii(p, size, errors, errorhandler=raise_unicode_exception_encode):
res = unicode_encode_ucs1_helper(p, size, errors, errorhandler, 128)
return res
@@ -403,7 +406,7 @@
result.append(lo)
def unicode_encode_utf_16_helper(s, size, errors,
- errorhandler=raise_unicode_exception,
+ errorhandler=raise_unicode_exception_encode,
byteorder='little'):
result = []
if (byteorder == 'native'):
@@ -429,15 +432,15 @@
return "".join(result)
def unicode_encode_utf_16(s, size, errors,
- errorhandler=raise_unicode_exception):
+ errorhandler=raise_unicode_exception_encode):
return unicode_encode_utf_16_helper(s, size, errors, errorhandler, "native")
def unicode_encode_utf_16_be(s, size, errors,
- errorhandler=raise_unicode_exception):
+ errorhandler=raise_unicode_exception_encode):
return unicode_encode_utf_16_helper(s, size, errors, errorhandler, "big")
def unicode_encode_utf_16_le(s, size, errors,
- errorhandler=raise_unicode_exception):
+ errorhandler=raise_unicode_exception_encode):
return unicode_encode_utf_16_helper(s, size, errors, errorhandler, "little")
Modified: pypy/branch/ropes-unicode/pypy/rlib/test/test_runicode.py
==============================================================================
--- pypy/branch/ropes-unicode/pypy/rlib/test/test_runicode.py (original)
+++ pypy/branch/ropes-unicode/pypy/rlib/test/test_runicode.py Wed Nov 21 19:46:21 2007
@@ -38,14 +38,13 @@
def checkencodeerror(self, s, encoding, start, stop):
called = [False]
def errorhandler(errors, enc, msg, t, startingpos,
- endingpos, decode):
+ endingpos):
called[0] = True
assert errors == "foo!"
assert enc == encoding
assert t is s
assert start == startingpos
assert stop == endingpos
- assert not decode
return "42424242", stop
encoder = self.getencoder(encoding)
result = encoder(s, len(s), "foo!", errorhandler)
@@ -55,7 +54,7 @@
def checkdecodeerror(self, s, encoding, start, stop, addstuff=True):
called = [0]
def errorhandler(errors, enc, msg, t, startingpos,
- endingpos, decode=True):
+ endingpos):
called[0] += 1
if called[0] == 1:
assert errors == "foo!"
@@ -63,7 +62,6 @@
assert t is s
assert start == startingpos
assert stop == endingpos
- assert decode
return u"42424242", stop
return "", endingpos
decoder = self.getdecoder(encoding)
More information about the Pypy-commit
mailing list