[pypy-svn] r48884 - in pypy/branch/ropes-unicode/pypy/objspace/std: . test
cfbolz at codespeak.net
cfbolz at codespeak.net
Wed Nov 21 10:20:35 CET 2007
Author: cfbolz
Date: Wed Nov 21 10:20:34 2007
New Revision: 48884
Modified:
pypy/branch/ropes-unicode/pypy/objspace/std/rope.py
pypy/branch/ropes-unicode/pypy/objspace/std/ropeunicodeobject.py
pypy/branch/ropes-unicode/pypy/objspace/std/test/test_rope.py
Log:
fix rope translation failures. add tests for the code that tries to quickly
encode/decode ropes.
Modified: pypy/branch/ropes-unicode/pypy/objspace/std/rope.py
==============================================================================
--- pypy/branch/ropes-unicode/pypy/objspace/std/rope.py (original)
+++ pypy/branch/ropes-unicode/pypy/objspace/std/rope.py Wed Nov 21 10:20:34 2007
@@ -233,7 +233,7 @@
def getrope(self, index):
ch = ord(self.u[index])
if ch < 256:
- return LiteralStringNode.PREBUILT[ord(self.s[index])]
+ return LiteralStringNode.PREBUILT[ch]
if len(self.u) == 1:
return self
return LiteralUnicodeNode(unichr(ch))
@@ -606,7 +606,7 @@
unichunk.append(unichr(c))
i += 1
if unichunk:
- nodelist.append(LiteralUnicodeNode("".join(unichunk)))
+ nodelist.append(LiteralUnicodeNode(u"".join(unichunk)))
strchunk = []
while i < length:
c = ord(charlist[i])
@@ -1379,8 +1379,8 @@
str_decode_utf8(rope.right))
elif isinstance(rope, LiteralStringNode):
try:
- result, consumed = str_decode_utf_8(rope.s, len(rope.s), False,
- "strict")
+ result, consumed = str_decode_utf_8(rope.s, len(rope.s), "strict",
+ False)
except UnicodeDecodeError:
return None
if consumed < len(rope.s):
@@ -1388,7 +1388,7 @@
return rope_from_unicode(result)
s = rope.flatten_string()
try:
- result, consumed = str_decode_utf_8(s, len(s), True)
+ result, consumed = str_decode_utf_8(s, len(s), "strict", True)
return rope_from_unicode(result)
except UnicodeDecodeError:
pass
@@ -1410,11 +1410,8 @@
return BinaryConcatNode(unicode_encode_utf8(rope.left),
unicode_encode_utf8(rope.right))
elif isinstance(rope, LiteralUnicodeNode):
- try:
- return LiteralStringNode(
- unicode_encode_utf_8(rope.u, len(rope.u), "strict"))
- except UnicodeDecodeError:
- return None
+ return LiteralStringNode(
+ unicode_encode_utf_8(rope.u, len(rope.u), "strict"))
elif isinstance(rope, LiteralStringNode):
return LiteralStringNode(_str_encode_utf_8(rope.s))
@@ -1428,7 +1425,8 @@
if (ch < 0x80):
# Encode ASCII
result.append(chr(ch))
+ continue
# Encode Latin-1
result.append(chr((0xc0 | (ch >> 6))))
result.append(chr((0x80 | (ch & 0x3f))))
- return LiteralStringNode("".join(s))
+ return "".join(result)
Modified: pypy/branch/ropes-unicode/pypy/objspace/std/ropeunicodeobject.py
==============================================================================
--- pypy/branch/ropes-unicode/pypy/objspace/std/ropeunicodeobject.py (original)
+++ pypy/branch/ropes-unicode/pypy/objspace/std/ropeunicodeobject.py Wed Nov 21 10:20:34 2007
@@ -39,7 +39,9 @@
result = rope.str_decode_utf8(node)
if result is not None:
return W_RopeUnicodeObject(result)
- return unicode_from_encoded_object(space, w_str, encoding, errors)
+ result = unicode_from_encoded_object(space, w_str, encoding, errors)
+ assert isinstance(result, W_RopeUnicodeObject)
+ return result
def encode_unicode(space, w_unistr, encoding, errors):
from pypy.objspace.std.unicodetype import getdefaultencoding, \
Modified: pypy/branch/ropes-unicode/pypy/objspace/std/test/test_rope.py
==============================================================================
--- pypy/branch/ropes-unicode/pypy/objspace/std/test/test_rope.py (original)
+++ pypy/branch/ropes-unicode/pypy/objspace/std/test/test_rope.py Wed Nov 21 10:20:34 2007
@@ -770,3 +770,69 @@
assert node.length() == 90
assert not node.is_ascii()
assert not node.is_bytestring()
+
+def test_encode():
+ node = LiteralStringNode("abc")
+ assert unicode_encode_ascii(node) is node
+ assert unicode_encode_latin1(node) is node
+ assert unicode_encode_utf8(node) is node
+ node = LiteralStringNode("abc\xff")
+ assert unicode_encode_ascii(node) is None
+ assert unicode_encode_latin1(node) is node
+ assert unicode_encode_utf8(node).s == 'abc\xc3\xbf'
+ node = LiteralUnicodeNode(u"\uffffab")
+ assert unicode_encode_ascii(node) is None
+ assert unicode_encode_latin1(node) is None
+ assert unicode_encode_utf8(node).s == '\xef\xbf\xbfab'
+ node = BinaryConcatNode(LiteralStringNode("abc"),
+ LiteralUnicodeNode(u"\uffffab"))
+ assert unicode_encode_ascii(node) is None
+ assert unicode_encode_latin1(node) is None
+ res = unicode_encode_utf8(node)
+ assert res.left is node.left
+ assert res.right.s == '\xef\xbf\xbfab'
+
+def test_decode():
+ node = LiteralStringNode("abc")
+ assert str_decode_ascii(node) is node
+ assert str_decode_latin1(node) is node
+ assert str_decode_utf8(node) is node
+ node = LiteralStringNode("abc\xff")
+ assert str_decode_ascii(node) is None
+ assert str_decode_latin1(node) is node
+
+def test_decode_utf8():
+ # bad data
+ node = LiteralStringNode("\xd7\x50")
+ assert str_decode_utf8(node) is None
+ node = LiteralStringNode("\xf0\x90\x91")
+ assert str_decode_utf8(node) is None
+
+ # correct data in one node
+ node = LiteralStringNode('\xef\xbf\xbfab')
+ assert str_decode_utf8(node).u == u"\uffffab"
+
+ # binary node, left node can be decoded
+ node = BinaryConcatNode(LiteralStringNode('\xef\xbf\xbfab'),
+ LiteralStringNode('\xef\xbf\xbfab'))
+ res = str_decode_utf8(node)
+ assert res.left.u == u"\uffffab"
+ assert res.right.u == u"\uffffab"
+
+ # binary node, left node alone cannot be decoded
+ node = BinaryConcatNode(LiteralStringNode('\xef'),
+ LiteralStringNode('\xbf\xbfab'))
+ res = str_decode_utf8(node)
+ assert res.u == u"\uffffab"
+
+ # binary node, left node cannot be decoded, bad data
+ node = BinaryConcatNode(LiteralStringNode("\xf0\x90"),
+ LiteralStringNode("\x91"))
+ assert str_decode_utf8(node) is None
+
+ # binary node, incomplete data
+ node = BinaryConcatNode(LiteralStringNode('ab\xef'),
+ LiteralStringNode('\xbf'))
+
+ res = str_decode_utf8(node)
+ assert res is None
More information about the Pypy-commit
mailing list