[pypy-svn] r48649 - in pypy/branch/more-unicode-improvements/pypy/rlib: . test

cfbolz at codespeak.net cfbolz at codespeak.net
Tue Nov 13 15:34:32 CET 2007


Author: cfbolz
Date: Tue Nov 13 15:34:32 2007
New Revision: 48649

Modified:
   pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py
   pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py
Log:
fix bug in utf-16-decoder


Modified: pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py	(original)
+++ pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py	Tue Nov 13 15:34:32 2007
@@ -9,12 +9,10 @@
                             decode=True):
     if decode:
         raise UnicodeDecodeError(
-                "%s can't decode byte %s in position %s: %s" % (
-                encoding, s[startingpos], startingpos, msg))
+                encoding, s[startingpos], startingpos, endingpos, msg)
     else:
         raise UnicodeEncodeError(
-                "%s can't encode byte %s in position %s: %s" % (
-                encoding, s[startingpos], startingpos, msg))
+                encoding, s[startingpos], startingpos, endingpos, msg)
 
 # ____________________________________________________________ 
 # unicode decoding
@@ -272,7 +270,10 @@
                                       s, pos - 4, pos - 2)
                 result.append(r)
         else:
-            assert 0, "unreachable"
+            r, pos = errorhandler(errors, 'utf-16',
+                                  "illegal encoding",
+                                  s, pos - 2, pos)
+            result.append(r)
     return u"".join(result), pos, bo
 
 def str_decode_latin_1(s, size, errors, final=False,

Modified: pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py	(original)
+++ pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py	Tue Nov 13 15:34:32 2007
@@ -1,3 +1,4 @@
+import py
 import sys, random
 from pypy.rlib import runicode
 
@@ -140,6 +141,12 @@
                   ]:
             self.checkdecodeerror(s, "utf-16", 2, 4, addstuff=False)
 
+    def test_utf16_bugs(self):
+        s = '\x80-\xe9\xdeL\xa3\x9b'
+        py.test.raises(UnicodeDecodeError, runicode.str_decode_utf_16_le,
+                       s, len(s), True)
+
+
 class TestEncoding(UnicodeTests):
     def test_all_ascii(self):
         for i in range(128):
@@ -175,3 +182,14 @@
 
     def test_latin1_error(self):
         self.checkencodeerror(u"abc\uffff\uffff\uffffcde", "latin-1", 3, 6)
+
+class TestTranslation(object):
+    def test_utf8(self):
+        from pypy.rpython.test.test_llinterp import interpret
+        def f(x):
+            s1 = "\xd7\x90\xd6\x96\xeb\x96\x95\xf0\x90\x91\x93" * x
+            u = runicode.str_decode_utf_8(s1, len(s1), True)
+            s2 = runicode.unicode_encode_utf_8(u, len(u), True)
+            return s1 == s2
+        res = interpret(f, [2])
+        assert res



More information about the Pypy-commit mailing list