[pypy-svn] r48604 - in pypy/branch/more-unicode-improvements/pypy/rlib: . test

cfbolz at codespeak.net cfbolz at codespeak.net
Mon Nov 12 18:18:43 CET 2007


Author: cfbolz
Date: Mon Nov 12 18:18:42 2007
New Revision: 48604

Modified:
   pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py
   pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py
Log:
add tests for encoding error handlers, fix problems found


Modified: pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py	(original)
+++ pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py	Mon Nov 12 18:18:42 2007
@@ -347,10 +347,9 @@
 
 def unicode_encode_ucs1_helper(p, size, errors,
                                errorhandler=raise_unicode_exception, limit=256):
-    
     if limit == 256:
         reason = "ordinal not in range(256)"
-        encoding = "latin-1"
+        encoding = "latin1"
     else:
         reason = "ordinal not in range(128)"
         encoding = "ascii"
@@ -371,19 +370,18 @@
             collend = pos+1 
             while collend < len(p) and ord(p[collend]) >= limit:
                 collend += 1
-            x = errorhandler(errors, encoding, reason, p,
-                             collstart, collend, False)
-            res += str(x[0])
-            pos = x[1]
+            r, pos = errorhandler(errors, encoding, reason, p,
+                                  collstart, collend, False)
+            res += r
     
     return "".join(res)
 
-def unicode_encode_latin1(p, size, errors):
-    res = unicode_encode_ucs1_helper(p, size, errors, 256)
+def unicode_encode_latin1(p, size, errors, errorhandler=raise_unicode_exception):
+    res = unicode_encode_ucs1_helper(p, size, errors, errorhandler, 256)
     return res
 
-def unicode_encode_ascii(p, size, errors):
-    res = unicode_encode_ucs1_helper(p, size, errors, 128)
+def unicode_encode_ascii(p, size, errors, errorhandler=raise_unicode_exception):
+    res = unicode_encode_ucs1_helper(p, size, errors, errorhandler, 128)
     return res
 
 

Modified: pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py	(original)
+++ pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py	Mon Nov 12 18:18:42 2007
@@ -28,6 +28,23 @@
         result = encoder(s, len(s), True)
         self.typeequals(trueresult, result)
 
+    def checkencodeerror(self, s, encoding, start, stop):
+        called = [False]
+        def errorhandler(errors, enc, msg, t, startingpos,
+                         endingpos, decode):
+            called[0] = True
+            assert errors == "foo!"
+            assert enc == encoding
+            assert t is s
+            assert start == startingpos
+            assert stop == endingpos
+            assert not decode
+            return "42424242", stop
+        encoder = getattr(runicode,
+                          "unicode_encode_%s" % encoding.replace("-", ""))
+        result = encoder(s, len(s), "foo!", errorhandler)
+        assert called[0]
+        assert "42424242" in result
 
 class TestDecoding(UnicodeTests):
     
@@ -76,3 +93,9 @@
     def test_single_chars_utf8(self):
         for s in ["\xd7\x90", "\xd6\x96", "\xeb\x96\x95", "\xf0\x90\x91\x93"]:
             self.checkencode(s, "utf8")
+
+    def test_ascii_error(self):
+        self.checkencodeerror(u"abc\xFF\xFF\xFFcde", "ascii", 3, 6)
+
+    def test_latin1_error(self):
+        self.checkencodeerror(u"abc\uffff\uffff\uffffcde", "latin1", 3, 6)



More information about the Pypy-commit mailing list