[Python-checkins] cpython (3.2): Issue #13505: Make pickling of bytes object compatible with Python 2.

alexandre.vassalotti python-checkins at python.org
Tue Dec 13 19:23:20 CET 2011


http://hg.python.org/cpython/rev/14695b4825dc
changeset:   73957:14695b4825dc
branch:      3.2
parent:      73955:44ca4264dc88
user:        Alexandre Vassalotti <alexandre at peadrop.com>
date:        Tue Dec 13 13:08:09 2011 -0500
summary:
  Issue #13505: Make pickling of bytes object compatible with Python 2.

Initial patch by sbt.

files:
  Lib/pickle.py            |   6 ++-
  Lib/pickletools.py       |  70 ++++++++++++---------------
  Lib/test/pickletester.py |  12 +++-
  Misc/NEWS                |   3 +
  Modules/_pickle.c        |  60 ++++++++++++++++++-----
  5 files changed, 94 insertions(+), 57 deletions(-)


diff --git a/Lib/pickle.py b/Lib/pickle.py
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -487,7 +487,11 @@
 
     def save_bytes(self, obj, pack=struct.pack):
         if self.proto < 3:
-            self.save_reduce(bytes, (list(obj),), obj=obj)
+            if len(obj) == 0:
+                self.save_reduce(bytes, (), obj=obj)
+            else:
+                self.save_reduce(codecs.encode,
+                                 (str(obj, 'latin1'), 'latin1'), obj=obj)
             return
         n = len(obj)
         if n < 256:
diff --git a/Lib/pickletools.py b/Lib/pickletools.py
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -2083,27 +2083,22 @@
    29: (    MARK
    30: d        DICT       (MARK at 29)
    31: p    PUT        2
-   34: c    GLOBAL     '__builtin__ bytes'
-   53: p    PUT        3
-   56: (    MARK
-   57: (        MARK
-   58: l            LIST       (MARK at 57)
+   34: c    GLOBAL     '_codecs encode'
+   50: p    PUT        3
+   53: (    MARK
+   54: V        UNICODE    'abc'
    59: p        PUT        4
-   62: L        LONG       97
-   67: a        APPEND
-   68: L        LONG       98
-   73: a        APPEND
-   74: L        LONG       99
-   79: a        APPEND
-   80: t        TUPLE      (MARK at 56)
-   81: p    PUT        5
-   84: R    REDUCE
-   85: p    PUT        6
-   88: V    UNICODE    'def'
-   93: p    PUT        7
-   96: s    SETITEM
-   97: a    APPEND
-   98: .    STOP
+   62: V        UNICODE    'latin1'
+   70: p        PUT        5
+   73: t        TUPLE      (MARK at 53)
+   74: p    PUT        6
+   77: R    REDUCE
+   78: p    PUT        7
+   81: V    UNICODE    'def'
+   86: p    PUT        8
+   89: s    SETITEM
+   90: a    APPEND
+   91: .    STOP
 highest protocol among opcodes = 0
 
 Try again with a "binary" pickle.
@@ -2122,25 +2117,22 @@
    14: q        BINPUT     1
    16: }        EMPTY_DICT
    17: q        BINPUT     2
-   19: c        GLOBAL     '__builtin__ bytes'
-   38: q        BINPUT     3
-   40: (        MARK
-   41: ]            EMPTY_LIST
-   42: q            BINPUT     4
-   44: (            MARK
-   45: K                BININT1    97
-   47: K                BININT1    98
-   49: K                BININT1    99
-   51: e                APPENDS    (MARK at 44)
-   52: t            TUPLE      (MARK at 40)
-   53: q        BINPUT     5
-   55: R        REDUCE
-   56: q        BINPUT     6
-   58: X        BINUNICODE 'def'
-   66: q        BINPUT     7
-   68: s        SETITEM
-   69: e        APPENDS    (MARK at 3)
-   70: .    STOP
+   19: c        GLOBAL     '_codecs encode'
+   35: q        BINPUT     3
+   37: (        MARK
+   38: X            BINUNICODE 'abc'
+   46: q            BINPUT     4
+   48: X            BINUNICODE 'latin1'
+   59: q            BINPUT     5
+   61: t            TUPLE      (MARK at 37)
+   62: q        BINPUT     6
+   64: R        REDUCE
+   65: q        BINPUT     7
+   67: X        BINUNICODE 'def'
+   75: q        BINPUT     8
+   77: s        SETITEM
+   78: e        APPENDS    (MARK at 3)
+   79: .    STOP
 highest protocol among opcodes = 1
 
 Exercise the INST/OBJ/BUILD family.
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -636,9 +636,15 @@
 
     def test_bytes(self):
         for proto in protocols:
-            for u in b'', b'xyz', b'xyz'*100:
-                p = self.dumps(u)
-                self.assertEqual(self.loads(p), u)
+            for s in b'', b'xyz', b'xyz'*100:
+                p = self.dumps(s)
+                self.assertEqual(self.loads(p), s)
+            for s in [bytes([i]) for i in range(256)]:
+                p = self.dumps(s)
+                self.assertEqual(self.loads(p), s)
+            for s in [bytes([i, i]) for i in range(256)]:
+                p = self.dumps(s)
+                self.assertEqual(self.loads(p), s)
 
     def test_ints(self):
         import sys
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
 Core and Builtins
 -----------------
 
+- Issue #13505: Pickle bytes objects in a way that is compatible with
+  Python 2 when using protocols <= 2.
+
 - Issue #11147: Fix an unused argument in _Py_ANNOTATE_MEMORY_ORDER.  (Fix
   given by Campbell Barton).
 
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -369,7 +369,7 @@
     char *errors;               /* Name of errors handling scheme to used when
                                    decoding strings. The default value is
                                    "strict". */
-    Py_ssize_t *marks;                 /* Mark stack, used for unpickling container
+    Py_ssize_t *marks;          /* Mark stack, used for unpickling container
                                    objects. */
     Py_ssize_t num_marks;       /* Number of marks in the mark stack. */
     Py_ssize_t marks_size;      /* Current allocated size of the mark stack. */
@@ -1700,26 +1700,58 @@
     if (self->proto < 3) {
         /* Older pickle protocols do not have an opcode for pickling bytes
            objects. Therefore, we need to fake the copy protocol (i.e.,
-           the __reduce__ method) to permit bytes object unpickling. */
+           the __reduce__ method) to permit bytes object unpickling.
+
+           Here we use a hack to be compatible with Python 2. Since in Python
+           2 'bytes' is just an alias for 'str' (which has different
+           parameters than the actual bytes object), we use codecs.encode
+           to create the appropriate 'str' object when unpickled using
+           Python 2 *and* the appropriate 'bytes' object when unpickled
+           using Python 3. Again this is a hack and we don't need to do this
+           with newer protocols. */
+        static PyObject *codecs_encode = NULL;
         PyObject *reduce_value = NULL;
-        PyObject *bytelist = NULL;
         int status;
 
-        bytelist = PySequence_List(obj);
-        if (bytelist == NULL)
+        if (codecs_encode == NULL) {
+            PyObject *codecs_module = PyImport_ImportModule("codecs");
+            if (codecs_module == NULL) {
+                return -1;
+            }
+            codecs_encode = PyObject_GetAttrString(codecs_module, "encode");
+            Py_DECREF(codecs_module);
+            if (codecs_encode == NULL) {
+                return -1;
+            }
+        }
+
+        if (PyBytes_GET_SIZE(obj) == 0) {
+            reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
+        }
+        else {
+            static PyObject *latin1 = NULL;
+            PyObject *unicode_str =
+                PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
+                                       PyBytes_GET_SIZE(obj),
+                                       "strict");
+            if (unicode_str == NULL)
+                return -1;
+            if (latin1 == NULL) {
+                latin1 = PyUnicode_InternFromString("latin1");
+                if (latin1 == NULL)
+                    return -1;
+            }
+            reduce_value = Py_BuildValue("(O(OO))",
+                                         codecs_encode, unicode_str, latin1);
+            Py_DECREF(unicode_str);
+        }
+
+        if (reduce_value == NULL)
             return -1;
 
-        reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
-                                     bytelist);
-        if (reduce_value == NULL) {
-            Py_DECREF(bytelist);
-            return -1;
-        }
-
         /* save_reduce() will memoize the object automatically. */
         status = save_reduce(self, reduce_value, obj);
         Py_DECREF(reduce_value);
-        Py_DECREF(bytelist);
         return status;
     }
     else {
@@ -1727,7 +1759,7 @@
         char header[5];
         Py_ssize_t len;
 
-        size = PyBytes_Size(obj);
+        size = PyBytes_GET_SIZE(obj);
         if (size < 0)
             return -1;
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list