python/dist/src/Lib/test test_codeccallbacks.py, 1.17, 1.18 test_codecs.py, 1.25, 1.26
![](https://secure.gravatar.com/avatar/3ff3f5066caa2b87d58b2dc8a60516a0.jpg?s=120&d=mm&r=g)
Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv1718/Lib/test Modified Files: test_codeccallbacks.py test_codecs.py Log Message: SF bug #1251300: On UCS-4 builds the "unicode-internal" codec will now complain about illegal code points. The codec now supports PEP 293 style error handlers. (This is a variant of the Nik Haldimann's patch that detects truncated data) Index: test_codeccallbacks.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_codeccallbacks.py,v retrieving revision 1.17 retrieving revision 1.18 diff -u -d -r1.17 -r1.18 --- test_codeccallbacks.py 14 Dec 2004 21:28:07 -0000 1.17 +++ test_codeccallbacks.py 30 Aug 2005 10:23:13 -0000 1.18 @@ -111,7 +111,7 @@ sout += "\\U%08x" % sys.maxunicode self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout) - def test_relaxedutf8(self): + def test_decoderelaxedutf8(self): # This is the test for a decoding callback handler, # that relaxes the UTF-8 minimal encoding restriction. # A null byte that is encoded as "\xc0\x80" will be @@ -158,6 +158,35 @@ charmap[ord("?")] = u"XYZ" self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap) + def test_decodeunicodeinternal(self): + self.assertRaises( + UnicodeDecodeError, + "\x00\x00\x00\x00\x00".decode, + "unicode-internal", + ) + if sys.maxunicode > 0xffff: + def handler_unicodeinternal(exc): + if not isinstance(exc, UnicodeDecodeError): + raise TypeError("don't know how to handle %r" % exc) + return (u"\x01", 1) + + self.assertEqual( + "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"), + u"\u0000" + ) + + self.assertEqual( + "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"), + u"\u0000\ufffd" + ) + + codecs.register_error("test.hui", handler_unicodeinternal) + + self.assertEqual( + "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"), + u"\u0000\u0001\u0000" + ) + def test_callbacks(self): def handler1(exc): if not isinstance(exc, UnicodeEncodeError) \ @@ -503,7 +532,8 @@ for (enc, bytes) in ( ("ascii", "\xff"), ("utf-8", "\xff"), - ("utf-7", "+x-") + ("utf-7", "+x-"), + ("unicode-internal", "\x00"), ): self.assertRaises( TypeError, Index: test_codecs.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_codecs.py,v retrieving revision 1.25 retrieving revision 1.26 diff -u -d -r1.25 -r1.26 --- test_codecs.py 25 Aug 2005 11:03:38 -0000 1.25 +++ test_codecs.py 30 Aug 2005 10:23:13 -0000 1.26 @@ -1,7 +1,7 @@ from test import test_support import unittest import codecs -import StringIO +import sys, StringIO class Queue(object): """ @@ -453,6 +453,54 @@ for uni, puny in punycode_testcases: self.assertEquals(uni, puny.decode("punycode")) +class UnicodeInternalTest(unittest.TestCase): + def test_bug1251300(self): + # Decoding with unicode_internal used to not correctly handle "code + # points" above 0x10ffff on UCS-4 builds. + if sys.maxunicode > 0xffff: + ok = [ + ("\x00\x10\xff\xff", u"\U0010ffff"), + ("\x00\x00\x01\x01", u"\U00000101"), + ("", u""), + ] + not_ok = [ + "\x7f\xff\xff\xff", + "\x80\x00\x00\x00", + "\x81\x00\x00\x00", + "\x00", + "\x00\x00\x00\x00\x00", + ] + for internal, uni in ok: + if sys.byteorder == "little": + internal = "".join(reversed(internal)) + self.assertEquals(uni, internal.decode("unicode_internal")) + for internal in not_ok: + if sys.byteorder == "little": + internal = "".join(reversed(internal)) + self.assertRaises(UnicodeDecodeError, internal.decode, + "unicode_internal") + + def test_decode_error_attributes(self): + if sys.maxunicode > 0xffff: + try: + "\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal") + except UnicodeDecodeError, ex: + self.assertEquals("unicode_internal", ex.encoding) + self.assertEquals("\x00\x00\x00\x00\x00\x11\x11\x00", ex.object) + self.assertEquals(4, ex.start) + self.assertEquals(8, ex.end) + else: + self.fail() + + def test_decode_callback(self): + if sys.maxunicode > 0xffff: + codecs.register_error("UnicodeInternalTest", codecs.ignore_errors) + decoder = codecs.getdecoder("unicode_internal") + ab = u"ab".encode("unicode_internal") + ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]), + "UnicodeInternalTest") + self.assertEquals((u"ab", 12), ignored) + # From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html nameprep_tests = [ # 3.1 Map to nothing. @@ -885,6 +933,7 @@ EscapeDecodeTest, RecodingTest, PunycodeTest, + UnicodeInternalTest, NameprepTest, CodecTest, CodecsModuleTest,
participants (1)
-
doerwalterļ¼ users.sourceforge.net