[pypy-svn] pypy default: Fix the "replace" error handler: on UnicodeDecodeError, only one U+FFFD is returned,
amauryfa
commits-noreply at bitbucket.org
Mon Feb 14 10:51:23 CET 2011
Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch:
Changeset: r41892:ed6b9cdcc22f
Date: 2011-02-14 10:50 +0100
http://bitbucket.org/pypy/pypy/changeset/ed6b9cdcc22f/
Log: Fix the "replace" error handler: on UnicodeDecodeError, only one
U+FFFD is returned, even when the error spans multiple bytes.
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -415,6 +415,13 @@
assert codecs.replace_errors(UnicodeTranslateError(
u"\u3042", 0, 1, "ouch")) == (u"\ufffd", 1)
+ assert codecs.replace_errors(UnicodeEncodeError(
+ "ascii", u"\u3042\u3042", 0, 2, "ouch")) == (u"??", 2)
+ assert codecs.replace_errors(UnicodeDecodeError(
+ "ascii", "\xff\xff", 0, 2, "ouch")) == (u"\ufffd", 2)
+ assert codecs.replace_errors(UnicodeTranslateError(
+ u"\u3042\u3042", 0, 2, "ouch")) == (u"\ufffd\ufffd", 2)
+
class BadStartUnicodeEncodeError(UnicodeEncodeError):
def __init__(self):
UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -525,6 +525,7 @@
raises(UnicodeError, "\\".decode, "unicode-escape")
raises(UnicodeError, "\xc2".decode, "utf-8")
+ assert '\xe1\x80'.decode('utf-8', 'replace') == u"\ufffd"
def test_repr_bug(self):
assert (repr(u'\U00090418\u027d\U000582b9\u54c3\U000fcb6e') ==
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -181,8 +181,10 @@
if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
text = '?' * size
return space.newtuple([space.wrap(text), w_end])
- elif (space.isinstance_w(w_exc, space.w_UnicodeDecodeError) or
- space.isinstance_w(w_exc, space.w_UnicodeTranslateError)):
+ elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
+ text = u'\ufffd'
+ return space.newtuple([space.wrap(text), w_end])
+ elif space.isinstance_w(w_exc, space.w_UnicodeTranslateError):
text = u'\ufffd' * size
return space.newtuple([space.wrap(text), w_end])
else:
More information about the Pypy-commit
mailing list