[pypy-svn] r61085 - in pypy/trunk/pypy/module/_codecs: . test

fijal at codespeak.net fijal at codespeak.net
Sun Jan 18 21:53:12 CET 2009


Author: fijal
Date: Sun Jan 18 21:53:12 2009
New Revision: 61085

Modified:
   pypy/trunk/pypy/module/_codecs/app_codecs.py
   pypy/trunk/pypy/module/_codecs/test/test_codecs.py
Log:
I *think* this is how it should work. completely unsure though, let's look
at tests.


Modified: pypy/trunk/pypy/module/_codecs/app_codecs.py
==============================================================================
--- pypy/trunk/pypy/module/_codecs/app_codecs.py	(original)
+++ pypy/trunk/pypy/module/_codecs/app_codecs.py	Sun Jan 18 21:53:12 2009
@@ -135,10 +135,10 @@
         while i < len(unistr):
             if len(unistr) - i < unicode_bytes:
                 msg = 'truncated input'
-                next, _ = unicode_call_errorhandler(errors, 'unicode_internal', msg,
-                                                    unistr, i, i + unicode_bytes)
+                next, i = unicode_call_errorhandler(errors, 'unicode_internal', msg,
+                                                    unistr, i, len(unistr))
                 p += next
-                break
+                continue
             t = 0
             h = 0
             for j in range(start, stop, step):
@@ -151,7 +151,7 @@
                 startpos = i - unicode_bytes
                 endpos = i
                 msg = "unichr(%s) not in range" % (t,)
-                next, _ = unicode_call_errorhandler(errors, 'unicode_internal', msg,
+                next, i = unicode_call_errorhandler(errors, 'unicode_internal', msg,
                                                     unistr, startpos, endpos)
                 p += next
         res = u''.join(p)
@@ -407,7 +407,7 @@
             ##               it in a 16-bit character 
                         surrogate = 1
                         msg = "code pairs are not supported"
-                        out, x = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i)
+                        out, i = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i)
                         p += out
                         bitsleft = 0
                         break
@@ -419,7 +419,7 @@
 ##                       bitsleft < 6 then we could just classify it as padding
 ##                       but that is not the case here */
                     msg = "partial character in shift sequence"
-                    out, x = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i)
+                    out, i = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i)
                     
 ##                /* According to RFC2152 the remaining bits should be zero. We
 ##                   choose to signal an error/insert a replacement character
@@ -435,7 +435,7 @@
                     
                 elif SPECIAL(ch, 0, 0) :
                     msg = "unexpected special character"
-                    out, _ = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i)
+                    out, i = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i)
                     p += out
                 else:  
                     p +=  ch 
@@ -457,7 +457,7 @@
         elif (SPECIAL(ch, 0, 0)):
             i += 1
             msg = "unexpected special character"
-            out, _ = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i)
+            out, i = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i)
             p += out
         else:
             p +=  ch 
@@ -467,7 +467,7 @@
         #XXX This aint right
         endinpos = size
         msg = "unterminated shift sequence"
-        out, _ = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i)
+        out, i = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i)
         p += out
     return p
 

Modified: pypy/trunk/pypy/module/_codecs/test/test_codecs.py
==============================================================================
--- pypy/trunk/pypy/module/_codecs/test/test_codecs.py	(original)
+++ pypy/trunk/pypy/module/_codecs/test/test_codecs.py	Sun Jan 18 21:53:12 2009
@@ -470,6 +470,7 @@
                 )
 
     def test_unicode_internal(self):
+        import codecs
         try:
             '\x00'.decode('unicode-internal')
         except UnicodeDecodeError:
@@ -479,3 +480,16 @@
 
         res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace")
         assert res == u"\u0000\ufffd"
+        def handler_unicodeinternal(exc):
+            if not isinstance(exc, UnicodeDecodeError):
+                raise TypeError("don't know how to handle %r" % exc)
+            return (u"\x01", 1)
+
+        res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore")
+        assert res == u"\u0000"
+        res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace")
+        assert res == u"\u0000\ufffd"
+        codecs.register_error("test.hui", handler_unicodeinternal)
+        res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui")
+        assert res == u"\u0000\u0001\u0000"
+



More information about the Pypy-commit mailing list