[pypy-commit] pypy default: Be stricter: like CPython, check that the encoding error handlers
arigo
noreply at buildbot.pypy.org
Mon Jun 6 09:53:49 CEST 2011
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r44726:45ef0fa73152
Date: 2011-06-06 09:53 +0200
http://bitbucket.org/pypy/pypy/changeset/45ef0fa73152/
Log: Be stricter: like CPython, check that the encoding error handlers
really return a unicode, not a string. Fix a few built-in error
handlers to return a unicode.
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -32,7 +32,10 @@
space.wrap(reason))
w_res = space.call_function(w_errorhandler, w_exc)
if (not space.is_true(space.isinstance(w_res, space.w_tuple))
- or space.len_w(w_res) != 2):
+ or space.len_w(w_res) != 2
+ or not space.is_true(space.isinstance(
+ space.getitem(w_res, space.wrap(0)),
+ space.w_unicode))):
if decode:
msg = ("decoding error handler must return "
"(unicode, int) tuple, not %s")
@@ -172,15 +175,7 @@
def ignore_errors(space, w_exc):
check_exception(space, w_exc)
w_end = space.getattr(w_exc, space.wrap('end'))
- if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
- return space.newtuple([space.wrap(''), w_end])
- elif (space.isinstance_w(w_exc, space.w_UnicodeDecodeError) or
- space.isinstance_w(w_exc, space.w_UnicodeTranslateError)):
- return space.newtuple([space.wrap(u''), w_end])
- else:
- typename = space.type(w_exc).getname(space, '?')
- raise operationerrfmt(space.w_TypeError,
- "don't know how to handle %s in error callback", typename)
+ return space.newtuple([space.wrap(u''), w_end])
def replace_errors(space, w_exc):
check_exception(space, w_exc)
@@ -188,7 +183,7 @@
w_end = space.getattr(w_exc, space.wrap('end'))
size = space.int_w(w_end) - space.int_w(w_start)
if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
- text = '?' * size
+ text = u'?' * size
return space.newtuple([space.wrap(text), w_end])
elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
text = u'\ufffd'
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -604,3 +604,11 @@
assert u'caf\xe9'.encode('mbcs') == 'caf\xe9'
assert u'\u040a'.encode('mbcs') == '?' # some cyrillic letter
assert 'cafx\e9'.decode('mbcs') == u'cafx\e9'
+
+ def test_bad_handler_string_result(self):
+ import _codecs
+ def f(exc):
+ return ('foo', exc.end)
+ _codecs.register_error("test.test_codecs_not_a_string", f)
+ raises(TypeError, u'\u1234'.encode, 'ascii',
+ 'test.test_codecs_not_a_string')
diff --git a/pypy/module/_multibytecodec/test/test_app_codecs.py b/pypy/module/_multibytecodec/test/test_app_codecs.py
--- a/pypy/module/_multibytecodec/test/test_app_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_app_codecs.py
@@ -63,7 +63,7 @@
import codecs
import sys
codecs.register_error("test.test_decode_custom_error_handler_overflow",
- lambda e: ('', sys.maxint + 1))
+ lambda e: (u'', sys.maxint + 1))
raises(IndexError, "abc\xDD".decode, "hz", "test.test_decode_custom_error_handler_overflow")
def test_encode_hz(self):
More information about the pypy-commit
mailing list