[pypy-svn] r63138 - in pypy/trunk/pypy: module/marshal/test rlib rlib/test
pedronis at codespeak.net
pedronis at codespeak.net
Fri Mar 20 15:00:01 CET 2009
Author: pedronis
Date: Fri Mar 20 14:59:58 2009
New Revision: 63138
Modified:
pypy/trunk/pypy/module/marshal/test/test_marshal.py
pypy/trunk/pypy/rlib/runicode.py
pypy/trunk/pypy/rlib/test/test_runicode.py
Log:
(iko, pedronis)
- fix a utf-8 decoding issue that also was breaking unmarshaling of unicode
- extend the runicode tests a bit
Modified: pypy/trunk/pypy/module/marshal/test/test_marshal.py
==============================================================================
--- pypy/trunk/pypy/module/marshal/test/test_marshal.py (original)
+++ pypy/trunk/pypy/module/marshal/test/test_marshal.py Fri Mar 20 14:59:58 2009
@@ -577,6 +577,17 @@
assert obj2b == obj2
assert tail == 'END'
+ def test_unicode(self):
+ import marshal, sys
+
+ u = u'\uFFFF'
+ u1 = marshal.loads(marshal.dumps(u))
+ assert u == u1
+
+ u = unichr(sys.maxunicode)
+ u1 = marshal.loads(marshal.dumps(u))
+ assert u == u1
+
class AppTestMultiDict(object):
def setup_class(cls):
Modified: pypy/trunk/pypy/rlib/runicode.py
==============================================================================
--- pypy/trunk/pypy/rlib/runicode.py (original)
+++ pypy/trunk/pypy/rlib/runicode.py Fri Mar 20 14:59:58 2009
@@ -178,7 +178,7 @@
result.append(r)
else:
# convert to UTF-16 if necessary
- if c < MAXUNICODE:
+ if c <= MAXUNICODE:
result.append(UNICHR(c))
else:
# compute and append the two surrogates:
Modified: pypy/trunk/pypy/rlib/test/test_runicode.py
==============================================================================
--- pypy/trunk/pypy/rlib/test/test_runicode.py (original)
+++ pypy/trunk/pypy/rlib/test/test_runicode.py Fri Mar 20 14:59:58 2009
@@ -89,12 +89,25 @@
for encoding in "utf-8 latin-1 utf-16 utf-16-be utf-16-le".split():
self.checkdecode(unichr(i), encoding)
- def test_random(self):
+ def test_first_10000(self):
for i in range(10000):
- uni = unichr(random.randrange(sys.maxunicode))
for encoding in "utf-8 utf-16 utf-16-be utf-16-le".split():
self.checkdecode(unichr(i), encoding)
+ def test_random(self):
+ for i in range(10000):
+ v = random.randrange(sys.maxunicode)
+ if 0xd800 <= v <= 0xdfff:
+ continue
+ uni = unichr(v)
+ for encoding in "utf-8 utf-16 utf-16-be utf-16-le".split():
+ self.checkdecode(uni, encoding)
+
+ def test_maxunicode(self):
+ uni = unichr(sys.maxunicode)
+ for encoding in "utf-8 utf-16 utf-16-be utf-16-le".split():
+ self.checkdecode(uni, encoding)
+
def test_single_chars_utf8(self):
for s in ["\xd7\x90", "\xd6\x96", "\xeb\x96\x95", "\xf0\x90\x91\x93"]:
self.checkdecode(s, "utf-8")
@@ -156,12 +169,25 @@
for encoding in "utf-8 latin-1 utf-16 utf-16-be utf-16-le".split():
self.checkencode(unichr(i), encoding)
- def test_random(self):
+ def test_first_10000(self):
for i in range(10000):
- uni = unichr(random.randrange(sys.maxunicode))
for encoding in "utf-8 utf-16 utf-16-be utf-16-le".split():
self.checkencode(unichr(i), encoding)
+ def test_random(self):
+ for i in range(10000):
+ v = random.randrange(sys.maxunicode)
+ if 0xd800 <= v <= 0xdfff:
+ continue
+ uni = unichr(v)
+ for encoding in "utf-8 utf-16 utf-16-be utf-16-le".split():
+ self.checkencode(uni, encoding)
+
+ def test_maxunicode(self):
+ uni = unichr(sys.maxunicode)
+ for encoding in "utf-8 utf-16 utf-16-be utf-16-le".split():
+ self.checkencode(uni, encoding)
+
def test_single_chars_utf8(self):
# check every number of bytes per char
for s in ["\xd7\x90", "\xd6\x96", "\xeb\x96\x95", "\xf0\x90\x91\x93"]:
More information about the Pypy-commit
mailing list