[Python-checkins] cpython: Write tests for invalid characters (U+00110000)
victor.stinner
python-checkins at python.org
Fri Dec 9 20:47:39 CET 2011
http://hg.python.org/cpython/rev/bfa9d1ba36ae
changeset: 73906:bfa9d1ba36ae
user: Victor Stinner <victor.stinner at haypocalc.com>
date: Fri Dec 09 20:49:49 2011 +0100
summary:
Write tests for invalid characters (U+00110000)
Test the following functions:
* codecs.raw_unicode_escape_decode()
* PyUnicode_FromWideChar()
* PyUnicode_FromUnicode()
* "unicode_internal" and "unicode_escape" decoders
files:
Lib/test/test_codecs.py | 16 ++++++++++++++++
Modules/_testcapimodule.c | 18 ++++++++++++++++++
2 files changed, 34 insertions(+), 0 deletions(-)
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1034,6 +1034,16 @@
'deprecated', DeprecationWarning)):
self.assertRaises(UnicodeDecodeError, internal.decode,
"unicode_internal")
+ if sys.byteorder == "little":
+ invalid = b"\x00\x00\x11\x00"
+ else:
+ invalid = b"\x00\x11\x00\x00"
+ with support.check_warnings():
+ self.assertRaises(UnicodeDecodeError,
+ invalid.decode, "unicode_internal")
+ with support.check_warnings():
+ self.assertEqual(invalid.decode("unicode_internal", "replace"),
+ '\ufffd')
@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
def test_decode_error_attributes(self):
@@ -1729,6 +1739,12 @@
self.assertEqual(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
self.assertEqual(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))
+ self.assertRaises(UnicodeDecodeError, codecs.unicode_escape_decode, br"\U00110000")
+ self.assertEqual(codecs.unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10))
+
+ self.assertRaises(UnicodeDecodeError, codecs.raw_unicode_escape_decode, br"\U00110000")
+ self.assertEqual(codecs.raw_unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10))
+
class SurrogateEscapeTest(unittest.TestCase):
def test_utf8(self):
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -1409,6 +1409,7 @@
#if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
const wchar_t wtext[2] = {(wchar_t)0x10ABCDu};
size_t wtextlen = 1;
+ const wchar_t invalid[1] = {(wchar_t)0x110000u};
#else
const wchar_t wtext[3] = {(wchar_t)0xDBEAu, (wchar_t)0xDFCDu};
size_t wtextlen = 2;
@@ -1444,6 +1445,23 @@
Py_DECREF(wide);
Py_DECREF(utf8);
+
+#if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
+ wide = PyUnicode_FromWideChar(invalid, 1);
+ if (wide == NULL)
+ PyErr_Clear();
+ else
+ return raiseTestError("test_widechar",
+ "PyUnicode_FromWideChar(L\"\\U00110000\", 1) didn't fail");
+
+ wide = PyUnicode_FromUnicode(invalid, 1);
+ if (wide == NULL)
+ PyErr_Clear();
+ else
+ return raiseTestError("test_widechar",
+ "PyUnicode_FromUnicode(L\"\\U00110000\", 1) didn't fail");
+#endif
+
Py_RETURN_NONE;
}
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list