[Python-checkins] cpython (3.3): Issue #19279: UTF-7 decoder no more produces illegal strings.
serhiy.storchaka
python-checkins at python.org
Sat Oct 19 19:39:57 CEST 2013
http://hg.python.org/cpython/rev/f471f2f05621
changeset: 86477:f471f2f05621
branch: 3.3
parent: 86463:2d8d5fea6194
user: Serhiy Storchaka <storchaka at gmail.com>
date: Sat Oct 19 20:38:19 2013 +0300
summary:
Issue #19279: UTF-7 decoder no more produces illegal strings.
files:
Lib/test/test_codecs.py | 30 +++++++++++++++++++++++++++++
Misc/NEWS | 2 +
Objects/unicodeobject.c | 2 +
3 files changed, 34 insertions(+), 0 deletions(-)
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -820,6 +820,36 @@
]
)
+ def test_errors(self):
+ tests = [
+ (b'a\xffb', 'a\ufffdb'),
+ (b'a+IK', 'a\ufffd'),
+ (b'a+IK-b', 'a\ufffdb'),
+ (b'a+IK,b', 'a\ufffdb'),
+ (b'a+IKx', 'a\u20ac\ufffd'),
+ (b'a+IKx-b', 'a\u20ac\ufffdb'),
+ (b'a+IKwgr', 'a\u20ac\ufffd'),
+ (b'a+IKwgr-b', 'a\u20ac\ufffdb'),
+ (b'a+IKwgr,', 'a\u20ac\ufffd'),
+ (b'a+IKwgr,-b', 'a\u20ac\ufffd-b'),
+ (b'a+IKwgrB', 'a\u20ac\u20ac\ufffd'),
+ (b'a+IKwgrB-b', 'a\u20ac\u20ac\ufffdb'),
+ (b'a+/,+IKw-b', 'a\ufffd\u20acb'),
+ (b'a+//,+IKw-b', 'a\ufffd\u20acb'),
+ (b'a+///,+IKw-b', 'a\uffff\ufffd\u20acb'),
+ (b'a+////,+IKw-b', 'a\uffff\ufffd\u20acb'),
+ ]
+ for raw, expected in tests:
+ with self.subTest(raw=raw):
+ self.assertRaises(UnicodeDecodeError, codecs.utf_7_decode,
+ raw, 'strict', True)
+ self.assertEqual(raw.decode('utf-7', 'replace'), expected)
+
+ def test_nonbmp(self):
+ self.assertEqual('\U000104A0'.encode(self.encoding), b'+2AHcoA-')
+ self.assertEqual('\ud801\udca0'.encode(self.encoding), b'+2AHcoA-')
+ self.assertEqual(b'+2AHcoA-'.decode(self.encoding), '\U000104A0')
+
class UTF16ExTest(unittest.TestCase):
def test_errors(self):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,8 @@
Core and Builtins
-----------------
+- Issue #19279: UTF-7 decoder no more produces illegal strings.
+
- Fix macro expansion of _PyErr_OCCURRED(), and make sure to use it in at
least one place so as to avoid regressions.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -4359,6 +4359,7 @@
Py_UCS4 outCh = (Py_UCS4)(base64buffer >> (base64bits-16));
base64bits -= 16;
base64buffer &= (1 << base64bits) - 1; /* clear high bits */
+ assert(outCh <= 0xffff);
if (surrogate) {
/* expecting a second surrogate */
if (Py_UNICODE_IS_LOW_SURROGATE(outCh)) {
@@ -4426,6 +4427,7 @@
inShift = 1;
shiftOutStart = outpos;
base64bits = 0;
+ base64buffer = 0;
}
}
else if (DECODE_DIRECT(ch)) { /* character decodes as itself */
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list