[pypy-svn] r48649 - in pypy/branch/more-unicode-improvements/pypy/rlib: . test
cfbolz at codespeak.net
cfbolz at codespeak.net
Tue Nov 13 15:34:32 CET 2007
Author: cfbolz
Date: Tue Nov 13 15:34:32 2007
New Revision: 48649
Modified:
pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py
pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py
Log:
fix bug in utf-16-decoder
Modified: pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py (original)
+++ pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py Tue Nov 13 15:34:32 2007
@@ -9,12 +9,10 @@
decode=True):
if decode:
raise UnicodeDecodeError(
- "%s can't decode byte %s in position %s: %s" % (
- encoding, s[startingpos], startingpos, msg))
+ encoding, s[startingpos], startingpos, endingpos, msg)
else:
raise UnicodeEncodeError(
- "%s can't encode byte %s in position %s: %s" % (
- encoding, s[startingpos], startingpos, msg))
+ encoding, s[startingpos], startingpos, endingpos, msg)
# ____________________________________________________________
# unicode decoding
@@ -272,7 +270,10 @@
s, pos - 4, pos - 2)
result.append(r)
else:
- assert 0, "unreachable"
+ r, pos = errorhandler(errors, 'utf-16',
+ "illegal encoding",
+ s, pos - 2, pos)
+ result.append(r)
return u"".join(result), pos, bo
def str_decode_latin_1(s, size, errors, final=False,
Modified: pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py (original)
+++ pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py Tue Nov 13 15:34:32 2007
@@ -1,3 +1,4 @@
+import py
import sys, random
from pypy.rlib import runicode
@@ -140,6 +141,12 @@
]:
self.checkdecodeerror(s, "utf-16", 2, 4, addstuff=False)
+ def test_utf16_bugs(self):
+ s = '\x80-\xe9\xdeL\xa3\x9b'
+ py.test.raises(UnicodeDecodeError, runicode.str_decode_utf_16_le,
+ s, len(s), True)
+
+
class TestEncoding(UnicodeTests):
def test_all_ascii(self):
for i in range(128):
@@ -175,3 +182,14 @@
def test_latin1_error(self):
self.checkencodeerror(u"abc\uffff\uffff\uffffcde", "latin-1", 3, 6)
+
+class TestTranslation(object):
+ def test_utf8(self):
+ from pypy.rpython.test.test_llinterp import interpret
+ def f(x):
+ s1 = "\xd7\x90\xd6\x96\xeb\x96\x95\xf0\x90\x91\x93" * x
+ u = runicode.str_decode_utf_8(s1, len(s1), True)
+ s2 = runicode.unicode_encode_utf_8(u, len(u), True)
+ return s1 == s2
+ res = interpret(f, [2])
+ assert res
More information about the Pypy-commit
mailing list