[Python-checkins] cpython (2.7): Issue #13333: The UTF-7 decoder now accepts lone surrogates
antoine.pitrou
python-checkins at python.org
Tue Nov 15 01:54:57 CET 2011
http://hg.python.org/cpython/rev/050772822bde
changeset: 73554:050772822bde
branch: 2.7
parent: 73550:555871844962
user: Antoine Pitrou <solipsis at pitrou.net>
date: Tue Nov 15 01:49:40 2011 +0100
summary:
Issue #13333: The UTF-7 decoder now accepts lone surrogates
(the encoder already accepts them).
files:
Lib/test/test_unicode.py | 14 +++++++++++---
Misc/NEWS | 3 +++
Objects/unicodeobject.c | 14 +++++---------
3 files changed, 19 insertions(+), 12 deletions(-)
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -771,10 +771,18 @@
for (x, y) in utfTests:
self.assertEqual(x.encode('utf-7'), y)
- # Unpaired surrogates not supported
- self.assertRaises(UnicodeError, unicode, '+3ADYAA-', 'utf-7')
+ # Unpaired surrogates are passed through
+ self.assertEqual(u'\uD801'.encode('utf-7'), '+2AE-')
+ self.assertEqual(u'\uD801x'.encode('utf-7'), '+2AE-x')
+ self.assertEqual(u'\uDC01'.encode('utf-7'), '+3AE-')
+ self.assertEqual(u'\uDC01x'.encode('utf-7'), '+3AE-x')
+ self.assertEqual('+2AE-'.decode('utf-7'), u'\uD801')
+ self.assertEqual('+2AE-x'.decode('utf-7'), u'\uD801x')
+ self.assertEqual('+3AE-'.decode('utf-7'), u'\uDC01')
+ self.assertEqual('+3AE-x'.decode('utf-7'), u'\uDC01x')
- self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd\ufffd')
+ self.assertEqual(u'\uD801\U000abcde'.encode('utf-7'), '+2AHab9ze-')
+ self.assertEqual('+2AHab9ze-'.decode('utf-7'), u'\uD801\U000abcde')
# Direct encoded characters
set_d = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -9,6 +9,9 @@
Core and Builtins
-----------------
+- Issue #13333: The UTF-7 decoder now accepts lone surrogates (the encoder
+ already accepts them).
+
- Remove Py3k warning for callable.
- Issue #10519: Avoid unnecessary recursive function calls in
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1628,21 +1628,17 @@
*p++ = outCh;
#endif
surrogate = 0;
+ continue;
}
else {
+ *p++ = surrogate;
surrogate = 0;
- errmsg = "second surrogate missing";
- goto utf7Error;
}
}
- else if (outCh >= 0xD800 && outCh <= 0xDBFF) {
+ if (outCh >= 0xD800 && outCh <= 0xDBFF) {
/* first surrogate */
surrogate = outCh;
}
- else if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
- errmsg = "unexpected second surrogate";
- goto utf7Error;
- }
else {
*p++ = outCh;
}
@@ -1652,8 +1648,8 @@
inShift = 0;
s++;
if (surrogate) {
- errmsg = "second surrogate missing at end of shift sequence";
- goto utf7Error;
+ *p++ = surrogate;
+ surrogate = 0;
}
if (base64bits > 0) { /* left-over bits */
if (base64bits >= 6) {
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list