[pypy-svn] r48598 - in pypy/branch/more-unicode-improvements/pypy/rlib: . test

cfbolz at codespeak.net cfbolz at codespeak.net
Mon Nov 12 15:29:44 CET 2007


Author: cfbolz
Date: Mon Nov 12 15:29:42 2007
New Revision: 48598

Modified:
   pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py
   pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py
Log:
decoding latin1 is simple


Modified: pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py	(original)
+++ pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py	Mon Nov 12 15:29:42 2007
@@ -152,3 +152,11 @@
     return u"".join(p), pos
 
 
+def str_decode_latin1(s, size, errors, final=False,
+                      errorhandler=raise_unicode_exception):
+    pos = 0
+    p = []
+    while (pos < size):
+        p += unichr(ord(s[pos]))
+        pos += 1
+    return u"".join(p), pos

Modified: pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py	(original)
+++ pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py	Mon Nov 12 15:29:42 2007
@@ -7,17 +7,29 @@
 
     def checkdecode(self, s, encoding):
         decoder = getattr(runicode, "str_decode_%s" % encoding)
-        trueresult = s.decode(encoding)
+        if isinstance(s, str):
+            trueresult = s.decode(encoding)
+        else:
+            trueresult = s
+            s = s.encode(encoding)
         result, consumed = decoder(s, len(s), True)
         assert consumed == len(s)
         self.typeequals(trueresult, result)
 
+
 class TestDecoding(UnicodeTests):
     
     def test_all_ascii(self):
         for i in range(128):
-            self.checkdecode(chr(i), "utf8")
+            for encoding in "utf8 latin1".split():
+                self.checkdecode(chr(i), encoding)
+
+    def test_all_first_256(self):
+        for i in range(256):
+            for encoding in "utf8 latin1".split():
+                self.checkdecode(unichr(i), encoding)
 
-    def test_single_chars(self):
+    def test_single_chars_utf8(self):
         for s in ["\xd7\x90", "\xd6\x96", "\xeb\x96\x95", "\xf0\x90\x91\x93"]:
             self.checkdecode(s, "utf8")
+



More information about the Pypy-commit mailing list