[Python-checkins] r67936 - in python/branches/release26-maint: Lib/test/pickletester.py Misc/NEWS Modules/cPickle.c
alexandre.vassalotti
python-checkins at python.org
Sat Dec 27 08:16:41 CET 2008
Author: alexandre.vassalotti
Date: Sat Dec 27 08:16:40 2008
New Revision: 67936
Log:
Merged revisions 67934-67935 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
r67934 | alexandre.vassalotti | 2008-12-27 02:08:47 -0500 (Sat, 27 Dec 2008) | 4 lines
Fix issue #4730: cPickle corrupts high-unicode strings.
Update outdated copy of PyUnicode_EncodeRawUnicodeEscape.
Add a test case.
........
r67935 | alexandre.vassalotti | 2008-12-27 02:13:01 -0500 (Sat, 27 Dec 2008) | 2 lines
Add Misc/NEWS entry for r67934.
........
Modified:
python/branches/release26-maint/ (props changed)
python/branches/release26-maint/Lib/test/pickletester.py
python/branches/release26-maint/Misc/NEWS
python/branches/release26-maint/Modules/cPickle.c
Modified: python/branches/release26-maint/Lib/test/pickletester.py
==============================================================================
--- python/branches/release26-maint/Lib/test/pickletester.py (original)
+++ python/branches/release26-maint/Lib/test/pickletester.py Sat Dec 27 08:16:40 2008
@@ -480,14 +480,21 @@
if have_unicode:
def test_unicode(self):
- endcases = [unicode(''), unicode('<\\u>'), unicode('<\\\u1234>'),
- unicode('<\n>'), unicode('<\\>')]
+ endcases = [u'', u'<\\u>', u'<\\\\u1234>', u'<\n>',
+ u'<\\>', u'<\\\\U00012345>']
for proto in protocols:
for u in endcases:
p = self.dumps(u, proto)
u2 = self.loads(p)
self.assertEqual(u2, u)
+ def test_unicode_high_plane(self):
+ t = u'\U00012345'
+ for proto in protocols:
+ p = self.dumps(t, proto)
+ t2 = self.loads(p)
+ self.assertEqual(t2, t)
+
def test_ints(self):
import sys
for proto in protocols:
Modified: python/branches/release26-maint/Misc/NEWS
==============================================================================
--- python/branches/release26-maint/Misc/NEWS (original)
+++ python/branches/release26-maint/Misc/NEWS Sat Dec 27 08:16:40 2008
@@ -173,6 +173,9 @@
- Issue #4014: Don't claim that Python has an Alpha release status, in addition
to claiming it is Mature.
+- Issue #4730: Fixed the cPickle module to handle correctly astral characters
+ when protocol 0 is used.
+
Build
-----
Modified: python/branches/release26-maint/Modules/cPickle.c
==============================================================================
--- python/branches/release26-maint/Modules/cPickle.c (original)
+++ python/branches/release26-maint/Modules/cPickle.c Sat Dec 27 08:16:40 2008
@@ -1255,42 +1255,91 @@
/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
backslash and newline characters to \uXXXX escapes. */
static PyObject *
-modified_EncodeRawUnicodeEscape(const Py_UNICODE *s, int size)
+modified_EncodeRawUnicodeEscape(const Py_UNICODE *s, Py_ssize_t size)
{
- PyObject *repr;
- char *p;
- char *q;
+ PyObject *repr;
+ char *p;
+ char *q;
+
+ static const char *hexdigit = "0123456789abcdef";
+#ifdef Py_UNICODE_WIDE
+ const Py_ssize_t expandsize = 10;
+#else
+ const Py_ssize_t expandsize = 6;
+#endif
- static const char *hexdigit = "0123456789ABCDEF";
+ if (size > PY_SSIZE_T_MAX / expandsize)
+ return PyErr_NoMemory();
- repr = PyString_FromStringAndSize(NULL, 6 * size);
- if (repr == NULL)
- return NULL;
- if (size == 0)
- return repr;
+ repr = PyString_FromStringAndSize(NULL, expandsize * size);
+ if (repr == NULL)
+ return NULL;
+ if (size == 0)
+ return repr;
- p = q = PyString_AS_STRING(repr);
- while (size-- > 0) {
- Py_UNICODE ch = *s++;
- /* Map 16-bit characters to '\uxxxx' */
- if (ch >= 256 || ch == '\\' || ch == '\n') {
- *p++ = '\\';
- *p++ = 'u';
- *p++ = hexdigit[(ch >> 12) & 0xf];
- *p++ = hexdigit[(ch >> 8) & 0xf];
- *p++ = hexdigit[(ch >> 4) & 0xf];
- *p++ = hexdigit[ch & 15];
- }
- /* Copy everything else as-is */
- else
- *p++ = (char) ch;
+ p = q = PyString_AS_STRING(repr);
+ while (size-- > 0) {
+ Py_UNICODE ch = *s++;
+#ifdef Py_UNICODE_WIDE
+ /* Map 32-bit characters to '\Uxxxxxxxx' */
+ if (ch >= 0x10000) {
+ *p++ = '\\';
+ *p++ = 'U';
+ *p++ = hexdigit[(ch >> 28) & 0xf];
+ *p++ = hexdigit[(ch >> 24) & 0xf];
+ *p++ = hexdigit[(ch >> 20) & 0xf];
+ *p++ = hexdigit[(ch >> 16) & 0xf];
+ *p++ = hexdigit[(ch >> 12) & 0xf];
+ *p++ = hexdigit[(ch >> 8) & 0xf];
+ *p++ = hexdigit[(ch >> 4) & 0xf];
+ *p++ = hexdigit[ch & 15];
+ }
+ else
+#else
+ /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
+ if (ch >= 0xD800 && ch < 0xDC00) {
+ Py_UNICODE ch2;
+ Py_UCS4 ucs;
+
+ ch2 = *s++;
+ size--;
+ if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
+ ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
+ *p++ = '\\';
+ *p++ = 'U';
+ *p++ = hexdigit[(ucs >> 28) & 0xf];
+ *p++ = hexdigit[(ucs >> 24) & 0xf];
+ *p++ = hexdigit[(ucs >> 20) & 0xf];
+ *p++ = hexdigit[(ucs >> 16) & 0xf];
+ *p++ = hexdigit[(ucs >> 12) & 0xf];
+ *p++ = hexdigit[(ucs >> 8) & 0xf];
+ *p++ = hexdigit[(ucs >> 4) & 0xf];
+ *p++ = hexdigit[ucs & 0xf];
+ continue;
+ }
+ /* Fall through: isolated surrogates are copied as-is */
+ s--;
+ size++;
}
- *p = '\0';
- _PyString_Resize(&repr, p - q);
- return repr;
+#endif
+ /* Map 16-bit characters to '\uxxxx' */
+ if (ch >= 256 || ch == '\\' || ch == '\n') {
+ *p++ = '\\';
+ *p++ = 'u';
+ *p++ = hexdigit[(ch >> 12) & 0xf];
+ *p++ = hexdigit[(ch >> 8) & 0xf];
+ *p++ = hexdigit[(ch >> 4) & 0xf];
+ *p++ = hexdigit[ch & 15];
+ }
+ /* Copy everything else as-is */
+ else
+ *p++ = (char) ch;
+ }
+ *p = '\0';
+ _PyString_Resize(&repr, p - q);
+ return repr;
}
-
static int
save_unicode(Picklerobject *self, PyObject *args, int doput)
{
More information about the Python-checkins
mailing list