[Python-checkins] bpo-32037: Use the INT opcode for 32-bit integers in protocol 0 pickles. (#4407)

Serhiy Storchaka webhook-mailer at python.org
Thu Nov 16 02:44:46 EST 2017


https://github.com/python/cpython/commit/3daaafb700df45716bb55f3a293f88773baf3463
commit: 3daaafb700df45716bb55f3a293f88773baf3463
branch: master
author: Serhiy Storchaka <storchaka at gmail.com>
committer: GitHub <noreply at github.com>
date: 2017-11-16T09:44:43+02:00
summary:

bpo-32037: Use the INT opcode for 32-bit integers in protocol 0 pickles. (#4407)

files:
A Misc/NEWS.d/next/Library/2017-11-15-19-04-22.bpo-32037.r8-5Nk.rst
M Lib/pickle.py
M Lib/pickletools.py
M Lib/test/pickletester.py
M Modules/_pickle.c

diff --git a/Lib/pickle.py b/Lib/pickle.py
index faa8fd7e557..350d4a46c06 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -674,7 +674,10 @@ def save_long(self, obj):
             else:
                 self.write(LONG4 + pack("<i", n) + encoded)
             return
-        self.write(LONG + repr(obj).encode("ascii") + b'L\n')
+        if -0x80000000 <= obj <= 0x7fffffff:
+            self.write(INT + repr(obj).encode("ascii") + b'\n')
+        else:
+            self.write(LONG + repr(obj).encode("ascii") + b'L\n')
     dispatch[int] = save_long
 
     def save_float(self, obj):
diff --git a/Lib/pickletools.py b/Lib/pickletools.py
index 0c8dddc10bb..408c2ac1360 100644
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -2480,35 +2480,35 @@ def __init__(self, value):
     0: (    MARK
     1: l        LIST       (MARK at 0)
     2: p    PUT        0
-    5: L    LONG       1
-    9: a    APPEND
-   10: L    LONG       2
-   14: a    APPEND
-   15: (    MARK
-   16: L        LONG       3
-   20: L        LONG       4
-   24: t        TUPLE      (MARK at 15)
-   25: p    PUT        1
-   28: a    APPEND
-   29: (    MARK
-   30: d        DICT       (MARK at 29)
-   31: p    PUT        2
-   34: c    GLOBAL     '_codecs encode'
-   50: p    PUT        3
-   53: (    MARK
-   54: V        UNICODE    'abc'
-   59: p        PUT        4
-   62: V        UNICODE    'latin1'
-   70: p        PUT        5
-   73: t        TUPLE      (MARK at 53)
-   74: p    PUT        6
-   77: R    REDUCE
-   78: p    PUT        7
-   81: V    UNICODE    'def'
-   86: p    PUT        8
-   89: s    SETITEM
-   90: a    APPEND
-   91: .    STOP
+    5: I    INT        1
+    8: a    APPEND
+    9: I    INT        2
+   12: a    APPEND
+   13: (    MARK
+   14: I        INT        3
+   17: I        INT        4
+   20: t        TUPLE      (MARK at 13)
+   21: p    PUT        1
+   24: a    APPEND
+   25: (    MARK
+   26: d        DICT       (MARK at 25)
+   27: p    PUT        2
+   30: c    GLOBAL     '_codecs encode'
+   46: p    PUT        3
+   49: (    MARK
+   50: V        UNICODE    'abc'
+   55: p        PUT        4
+   58: V        UNICODE    'latin1'
+   66: p        PUT        5
+   69: t        TUPLE      (MARK at 49)
+   70: p    PUT        6
+   73: R    REDUCE
+   74: p    PUT        7
+   77: V    UNICODE    'def'
+   82: p    PUT        8
+   85: s    SETITEM
+   86: a    APPEND
+   87: .    STOP
 highest protocol among opcodes = 0
 
 Try again with a "binary" pickle.
@@ -2577,13 +2577,13 @@ def __init__(self, value):
    93: p    PUT        6
    96: V    UNICODE    'value'
   103: p    PUT        7
-  106: L    LONG       42
-  111: s    SETITEM
-  112: b    BUILD
-  113: a    APPEND
-  114: g    GET        5
-  117: a    APPEND
-  118: .    STOP
+  106: I    INT        42
+  110: s    SETITEM
+  111: b    BUILD
+  112: a    APPEND
+  113: g    GET        5
+  116: a    APPEND
+  117: .    STOP
 highest protocol among opcodes = 0
 
 >>> dis(pickle.dumps(x, 1))
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 243bc94ee49..bf6116b2dfb 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -1821,7 +1821,7 @@ def test_simple_newobj(self):
             with self.subTest(proto=proto):
                 s = self.dumps(x, proto)
                 if proto < 1:
-                    self.assertIn(b'\nL64206', s)  # LONG
+                    self.assertIn(b'\nI64206', s)  # INT
                 else:
                     self.assertIn(b'M\xce\xfa', s)  # BININT2
                 self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s),
@@ -1837,7 +1837,7 @@ def test_complex_newobj(self):
             with self.subTest(proto=proto):
                 s = self.dumps(x, proto)
                 if proto < 1:
-                    self.assertIn(b'\nL64206', s)  # LONG
+                    self.assertIn(b'\nI64206', s)  # INT
                 elif proto < 2:
                     self.assertIn(b'M\xce\xfa', s)  # BININT2
                 elif proto < 4:
@@ -1857,7 +1857,7 @@ def test_complex_newobj_ex(self):
             with self.subTest(proto=proto):
                 s = self.dumps(x, proto)
                 if proto < 1:
-                    self.assertIn(b'\nL64206', s)  # LONG
+                    self.assertIn(b'\nI64206', s)  # INT
                 elif proto < 2:
                     self.assertIn(b'M\xce\xfa', s)  # BININT2
                 elif proto < 4:
diff --git a/Misc/NEWS.d/next/Library/2017-11-15-19-04-22.bpo-32037.r8-5Nk.rst b/Misc/NEWS.d/next/Library/2017-11-15-19-04-22.bpo-32037.r8-5Nk.rst
new file mode 100644
index 00000000000..d077d0e2110
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-11-15-19-04-22.bpo-32037.r8-5Nk.rst
@@ -0,0 +1,4 @@
+Integers that fit in a signed 32-bit integer will be now pickled with
+protocol 0 using the INT opcode.  This will decrease the size of a pickle,
+speed up pickling and unpickling, and make these integers be unpickled as
+int instances in Python 2.
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index 4b7f1ed66b3..943c70112b7 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -1858,18 +1858,13 @@ save_long(PicklerObject *self, PyObject *obj)
     PyObject *repr = NULL;
     Py_ssize_t size;
     long val;
+    int overflow;
     int status = 0;
 
-    const char long_op = LONG;
-
-    val= PyLong_AsLong(obj);
-    if (val == -1 && PyErr_Occurred()) {
-        /* out of range for int pickling */
-        PyErr_Clear();
-    }
-    else if (self->bin &&
-             (sizeof(long) <= 4 ||
-              (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1)))) {
+    val= PyLong_AsLongAndOverflow(obj, &overflow);
+    if (!overflow && (sizeof(long) <= 4 ||
+            (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
+    {
         /* result fits in a signed 4-byte integer.
 
            Note: we can't use -0x80000000L in the above condition because some
@@ -1882,31 +1877,35 @@ save_long(PicklerObject *self, PyObject *obj)
         char pdata[32];
         Py_ssize_t len = 0;
 
-        pdata[1] = (unsigned char)(val & 0xff);
-        pdata[2] = (unsigned char)((val >> 8) & 0xff);
-        pdata[3] = (unsigned char)((val >> 16) & 0xff);
-        pdata[4] = (unsigned char)((val >> 24) & 0xff);
-
-        if ((pdata[4] == 0) && (pdata[3] == 0)) {
-            if (pdata[2] == 0) {
-                pdata[0] = BININT1;
-                len = 2;
+        if (self->bin) {
+            pdata[1] = (unsigned char)(val & 0xff);
+            pdata[2] = (unsigned char)((val >> 8) & 0xff);
+            pdata[3] = (unsigned char)((val >> 16) & 0xff);
+            pdata[4] = (unsigned char)((val >> 24) & 0xff);
+
+            if ((pdata[4] != 0) || (pdata[3] != 0)) {
+                pdata[0] = BININT;
+                len = 5;
             }
-            else {
+            else if (pdata[2] != 0) {
                 pdata[0] = BININT2;
                 len = 3;
             }
+            else {
+                pdata[0] = BININT1;
+                len = 2;
+            }
         }
         else {
-            pdata[0] = BININT;
-            len = 5;
+            sprintf(pdata, "%c%ld\n", INT,  val);
+            len = strlen(pdata);
         }
-
         if (_Pickler_Write(self, pdata, len) < 0)
             return -1;
 
         return 0;
     }
+    assert(!PyErr_Occurred());
 
     if (self->proto >= 2) {
         /* Linear-time pickling. */
@@ -1986,6 +1985,7 @@ save_long(PicklerObject *self, PyObject *obj)
             goto error;
     }
     else {
+        const char long_op = LONG;
         const char *string;
 
         /* proto < 2: write the repr and newline.  This is quadratic-time (in



More information about the Python-checkins mailing list