[pypy-svn] r63138 - in pypy/trunk/pypy: module/marshal/test rlib rlib/test

Fri Mar 20 15:00:01 CET 2009

Author: pedronis
Date: Fri Mar 20 14:59:58 2009
New Revision: 63138

Modified:
   pypy/trunk/pypy/module/marshal/test/test_marshal.py
   pypy/trunk/pypy/rlib/runicode.py
   pypy/trunk/pypy/rlib/test/test_runicode.py
Log:
(iko, pedronis)

- fix a utf-8 decoding issue that also was breaking unmarshaling of unicode
- extend the runicode tests a bit



Modified: pypy/trunk/pypy/module/marshal/test/test_marshal.py
==============================================================================

--- pypy/trunk/pypy/module/marshal/test/test_marshal.py	(original)
+++ pypy/trunk/pypy/module/marshal/test/test_marshal.py	Fri Mar 20 14:59:58 2009
@@ -577,6 +577,17 @@
         assert obj2b == obj2
         assert tail == 'END'
 
+    def test_unicode(self):
+        import marshal, sys
+
+        u = u'\uFFFF'
+        u1 = marshal.loads(marshal.dumps(u))
+        assert u == u1
+
+        u = unichr(sys.maxunicode)
+        u1 = marshal.loads(marshal.dumps(u))
+        assert u == u1        
+
 
 class AppTestMultiDict(object):
     def setup_class(cls):

Modified: pypy/trunk/pypy/rlib/runicode.py
==============================================================================
--- pypy/trunk/pypy/rlib/runicode.py	(original)
+++ pypy/trunk/pypy/rlib/runicode.py	Fri Mar 20 14:59:58 2009
@@ -178,7 +178,7 @@
                     result.append(r)
                 else:
                     # convert to UTF-16 if necessary
-                    if c < MAXUNICODE:
+                    if c <= MAXUNICODE:
                         result.append(UNICHR(c))
                     else:
                         # compute and append the two surrogates:

Modified: pypy/trunk/pypy/rlib/test/test_runicode.py
==============================================================================
--- pypy/trunk/pypy/rlib/test/test_runicode.py	(original)
+++ pypy/trunk/pypy/rlib/test/test_runicode.py	Fri Mar 20 14:59:58 2009
@@ -89,12 +89,25 @@
             for encoding in "utf-8 latin-1 utf-16 utf-16-be utf-16-le".split():
                 self.checkdecode(unichr(i), encoding)
 
-    def test_random(self):
+    def test_first_10000(self):
         for i in range(10000):
-            uni = unichr(random.randrange(sys.maxunicode))
             for encoding in "utf-8 utf-16 utf-16-be utf-16-le".split():
                 self.checkdecode(unichr(i), encoding)
 
+    def test_random(self):
+        for i in range(10000):
+            v = random.randrange(sys.maxunicode)
+            if 0xd800 <= v <= 0xdfff:
+                continue
+            uni = unichr(v)
+            for encoding in "utf-8 utf-16 utf-16-be utf-16-le".split():
+                self.checkdecode(uni, encoding)                
+
+    def test_maxunicode(self):
+        uni = unichr(sys.maxunicode)
+        for encoding in "utf-8 utf-16 utf-16-be utf-16-le".split():
+            self.checkdecode(uni, encoding)        
+
     def test_single_chars_utf8(self):
         for s in ["\xd7\x90", "\xd6\x96", "\xeb\x96\x95", "\xf0\x90\x91\x93"]:
             self.checkdecode(s, "utf-8")
@@ -156,12 +169,25 @@
             for encoding in "utf-8 latin-1 utf-16 utf-16-be utf-16-le".split():
                 self.checkencode(unichr(i), encoding)
 
-    def test_random(self):
+    def test_first_10000(self):
         for i in range(10000):
-            uni = unichr(random.randrange(sys.maxunicode))
             for encoding in "utf-8 utf-16 utf-16-be utf-16-le".split():
                 self.checkencode(unichr(i), encoding)
 
+    def test_random(self):
+        for i in range(10000):
+            v = random.randrange(sys.maxunicode)
+            if 0xd800 <= v <= 0xdfff:
+                continue
+            uni = unichr(v)
+            for encoding in "utf-8 utf-16 utf-16-be utf-16-le".split():
+                self.checkencode(uni, encoding)                
+
+    def test_maxunicode(self):
+        uni = unichr(sys.maxunicode)
+        for encoding in "utf-8 utf-16 utf-16-be utf-16-le".split():
+            self.checkencode(uni, encoding)        
+
     def test_single_chars_utf8(self):
         # check every number of bytes per char
         for s in ["\xd7\x90", "\xd6\x96", "\xeb\x96\x95", "\xf0\x90\x91\x93"]: