[pypy-svn] r48604 - in pypy/branch/more-unicode-improvements/pypy/rlib: . test
cfbolz at codespeak.net
cfbolz at codespeak.net
Mon Nov 12 18:18:43 CET 2007
Author: cfbolz
Date: Mon Nov 12 18:18:42 2007
New Revision: 48604
Modified:
pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py
pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py
Log:
add tests for encoding error handlers, fix problems found
Modified: pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py (original)
+++ pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py Mon Nov 12 18:18:42 2007
@@ -347,10 +347,9 @@
def unicode_encode_ucs1_helper(p, size, errors,
errorhandler=raise_unicode_exception, limit=256):
-
if limit == 256:
reason = "ordinal not in range(256)"
- encoding = "latin-1"
+ encoding = "latin1"
else:
reason = "ordinal not in range(128)"
encoding = "ascii"
@@ -371,19 +370,18 @@
collend = pos+1
while collend < len(p) and ord(p[collend]) >= limit:
collend += 1
- x = errorhandler(errors, encoding, reason, p,
- collstart, collend, False)
- res += str(x[0])
- pos = x[1]
+ r, pos = errorhandler(errors, encoding, reason, p,
+ collstart, collend, False)
+ res += r
return "".join(res)
-def unicode_encode_latin1(p, size, errors):
- res = unicode_encode_ucs1_helper(p, size, errors, 256)
+def unicode_encode_latin1(p, size, errors, errorhandler=raise_unicode_exception):
+ res = unicode_encode_ucs1_helper(p, size, errors, errorhandler, 256)
return res
-def unicode_encode_ascii(p, size, errors):
- res = unicode_encode_ucs1_helper(p, size, errors, 128)
+def unicode_encode_ascii(p, size, errors, errorhandler=raise_unicode_exception):
+ res = unicode_encode_ucs1_helper(p, size, errors, errorhandler, 128)
return res
Modified: pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py (original)
+++ pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py Mon Nov 12 18:18:42 2007
@@ -28,6 +28,23 @@
result = encoder(s, len(s), True)
self.typeequals(trueresult, result)
+ def checkencodeerror(self, s, encoding, start, stop):
+ called = [False]
+ def errorhandler(errors, enc, msg, t, startingpos,
+ endingpos, decode):
+ called[0] = True
+ assert errors == "foo!"
+ assert enc == encoding
+ assert t is s
+ assert start == startingpos
+ assert stop == endingpos
+ assert not decode
+ return "42424242", stop
+ encoder = getattr(runicode,
+ "unicode_encode_%s" % encoding.replace("-", ""))
+ result = encoder(s, len(s), "foo!", errorhandler)
+ assert called[0]
+ assert "42424242" in result
class TestDecoding(UnicodeTests):
@@ -76,3 +93,9 @@
def test_single_chars_utf8(self):
for s in ["\xd7\x90", "\xd6\x96", "\xeb\x96\x95", "\xf0\x90\x91\x93"]:
self.checkencode(s, "utf8")
+
+ def test_ascii_error(self):
+ self.checkencodeerror(u"abc\xFF\xFF\xFFcde", "ascii", 3, 6)
+
+ def test_latin1_error(self):
+ self.checkencodeerror(u"abc\uffff\uffff\uffffcde", "latin1", 3, 6)
More information about the Pypy-commit
mailing list