[Python-checkins] cpython (3.4): Issue #15513: Added a __sizeof__ implementation for pickle classes.

serhiy.storchaka python-checkins at python.org
Tue Dec 16 19:03:00 CET 2014


https://hg.python.org/cpython/rev/fed774c926f5
changeset:   93910:fed774c926f5
branch:      3.4
parent:      93908:c49b7acba06f
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Tue Dec 16 19:39:08 2014 +0200
summary:
  Issue #15513: Added a __sizeof__ implementation for pickle classes.

files:
  Lib/test/test_pickle.py    |  70 +++++++++++++++++++++++++-
  Misc/NEWS                  |   4 +-
  Modules/_pickle.c          |  58 +++++++++++++++++++++-
  Modules/clinic/_pickle.c.h |  56 ++++++++++++++++++++-
  4 files changed, 184 insertions(+), 4 deletions(-)


diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py
--- a/Lib/test/test_pickle.py
+++ b/Lib/test/test_pickle.py
@@ -1,7 +1,10 @@
 import pickle
 import io
 import collections
+import struct
+import sys
 
+import unittest
 from test import support
 
 from test.pickletester import AbstractPickleTests
@@ -138,6 +141,71 @@
         def get_dispatch_table(self):
             return collections.ChainMap({}, pickle.dispatch_table)
 
+    @support.cpython_only
+    class SizeofTests(unittest.TestCase):
+        check_sizeof = support.check_sizeof
+
+        def test_pickler(self):
+            basesize = support.calcobjsize('5P2n3i2n3iP')
+            p = _pickle.Pickler(io.BytesIO())
+            self.assertEqual(object.__sizeof__(p), basesize)
+            MT_size = struct.calcsize('3nP0n')
+            ME_size = struct.calcsize('Pn0P')
+            check = self.check_sizeof
+            check(p, basesize +
+                MT_size + 8 * ME_size +  # Minimal memo table size.
+                sys.getsizeof(b'x'*4096))  # Minimal write buffer size.
+            for i in range(6):
+                p.dump(chr(i))
+            check(p, basesize +
+                MT_size + 32 * ME_size +  # Size of memo table required to
+                                          # save references to 6 objects.
+                0)  # Write buffer is cleared after every dump().
+
+        def test_unpickler(self):
+            basesize = support.calcobjsize('2Pn2P 2P2n2i5P 2P3n6P2n2i')
+            unpickler = _pickle.Unpickler
+            P = struct.calcsize('P')  # Size of memo table entry.
+            n = struct.calcsize('n')  # Size of mark table entry.
+            check = self.check_sizeof
+            for encoding in 'ASCII', 'UTF-16', 'latin-1':
+                for errors in 'strict', 'replace':
+                    u = unpickler(io.BytesIO(),
+                                  encoding=encoding, errors=errors)
+                    self.assertEqual(object.__sizeof__(u), basesize)
+                    check(u, basesize +
+                             32 * P +  # Minimal memo table size.
+                             len(encoding) + 1 + len(errors) + 1)
+
+            stdsize = basesize + len('ASCII') + 1 + len('strict') + 1
+            def check_unpickler(data, memo_size, marks_size):
+                dump = pickle.dumps(data)
+                u = unpickler(io.BytesIO(dump),
+                              encoding='ASCII', errors='strict')
+                u.load()
+                check(u, stdsize + memo_size * P + marks_size * n)
+
+            check_unpickler(0, 32, 0)
+            # 20 is minimal non-empty mark stack size.
+            check_unpickler([0] * 100, 32, 20)
+            # 128 is memo table size required to save references to 100 objects.
+            check_unpickler([chr(i) for i in range(100)], 128, 20)
+            def recurse(deep):
+                data = 0
+                for i in range(deep):
+                    data = [data, data]
+                return data
+            check_unpickler(recurse(0), 32, 0)
+            check_unpickler(recurse(1), 32, 20)
+            check_unpickler(recurse(20), 32, 58)
+            check_unpickler(recurse(50), 64, 58)
+            check_unpickler(recurse(100), 128, 134)
+
+            u = unpickler(io.BytesIO(pickle.dumps('a', 0)),
+                          encoding='ASCII', errors='strict')
+            u.load()
+            check(u, stdsize + 32 * P + 2 + 1)
+
 
 def test_main():
     tests = [PickleTests, PyPicklerTests, PyPersPicklerTests,
@@ -148,7 +216,7 @@
                       PyPicklerUnpicklerObjectTests,
                       CPicklerUnpicklerObjectTests,
                       CDispatchTableTests, CChainDispatchTableTests,
-                      InMemoryPickleTests])
+                      InMemoryPickleTests, SizeofTests])
     support.run_unittest(*tests)
     support.run_doctest(pickle)
 
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -41,7 +41,9 @@
 Library
 -------
 
-- Issue #19858:  pickletools.optimize() now aware of the MEMOIZE opcode, can
+- Issue #15513: Added a __sizeof__ implementation for pickle classes.
+
+- Issue #19858: pickletools.optimize() now aware of the MEMOIZE opcode, can
   produce more compact result and no longer produces invalid output if input
   data contains MEMOIZE opcodes together with PUT or BINPUT opcodes.
 
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -375,7 +375,7 @@
     PyVarObject_HEAD_INIT(NULL, 0)
     "_pickle.Pdata",              /*tp_name*/
     sizeof(Pdata),                /*tp_basicsize*/
-    0,                            /*tp_itemsize*/
+    sizeof(PyObject *),           /*tp_itemsize*/
     (destructor)Pdata_dealloc,    /*tp_dealloc*/
 };
 
@@ -3930,9 +3930,37 @@
     Py_RETURN_NONE;
 }
 
+/*[clinic input]
+
+_pickle.Pickler.__sizeof__ -> Py_ssize_t
+
+Returns size in memory, in bytes.
+[clinic start generated code]*/
+
+static Py_ssize_t
+_pickle_Pickler___sizeof___impl(PicklerObject *self)
+/*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
+{
+    Py_ssize_t res, s;
+
+    res = sizeof(PicklerObject);
+    if (self->memo != NULL) {
+        res += sizeof(PyMemoTable);
+        res += self->memo->mt_allocated * sizeof(PyMemoEntry);
+    }
+    if (self->output_buffer != NULL) {
+        s = _PySys_GetSizeOf(self->output_buffer);
+        if (s == -1)
+            return -1;
+        res += s;
+    }
+    return res;
+}
+
 static struct PyMethodDef Pickler_methods[] = {
     _PICKLE_PICKLER_DUMP_METHODDEF
     _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
+    _PICKLE_PICKLER___SIZEOF___METHODDEF
     {NULL, NULL}                /* sentinel */
 };
 
@@ -6289,9 +6317,37 @@
     return global;
 }
 
+/*[clinic input]
+
+_pickle.Unpickler.__sizeof__ -> Py_ssize_t
+
+Returns size in memory, in bytes.
+[clinic start generated code]*/
+
+static Py_ssize_t
+_pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
+/*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
+{
+    Py_ssize_t res;
+
+    res = sizeof(UnpicklerObject);
+    if (self->memo != NULL)
+        res += self->memo_size * sizeof(PyObject *);
+    if (self->marks != NULL)
+        res += self->marks_size * sizeof(Py_ssize_t);
+    if (self->input_line != NULL)
+        res += strlen(self->input_line) + 1;
+    if (self->encoding != NULL)
+        res += strlen(self->encoding) + 1;
+    if (self->errors != NULL)
+        res += strlen(self->errors) + 1;
+    return res;
+}
+
 static struct PyMethodDef Unpickler_methods[] = {
     _PICKLE_UNPICKLER_LOAD_METHODDEF
     _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
+    _PICKLE_UNPICKLER___SIZEOF___METHODDEF
     {NULL, NULL}                /* sentinel */
 };
 
diff --git a/Modules/clinic/_pickle.c.h b/Modules/clinic/_pickle.c.h
--- a/Modules/clinic/_pickle.c.h
+++ b/Modules/clinic/_pickle.c.h
@@ -34,6 +34,33 @@
 #define _PICKLE_PICKLER_DUMP_METHODDEF    \
     {"dump", (PyCFunction)_pickle_Pickler_dump, METH_O, _pickle_Pickler_dump__doc__},
 
+PyDoc_STRVAR(_pickle_Pickler___sizeof____doc__,
+"__sizeof__($self, /)\n"
+"--\n"
+"\n"
+"Returns size in memory, in bytes.");
+
+#define _PICKLE_PICKLER___SIZEOF___METHODDEF    \
+    {"__sizeof__", (PyCFunction)_pickle_Pickler___sizeof__, METH_NOARGS, _pickle_Pickler___sizeof____doc__},
+
+static Py_ssize_t
+_pickle_Pickler___sizeof___impl(PicklerObject *self);
+
+static PyObject *
+_pickle_Pickler___sizeof__(PicklerObject *self, PyObject *Py_UNUSED(ignored))
+{
+    PyObject *return_value = NULL;
+    Py_ssize_t _return_value;
+
+    _return_value = _pickle_Pickler___sizeof___impl(self);
+    if ((_return_value == -1) && PyErr_Occurred())
+        goto exit;
+    return_value = PyLong_FromSsize_t(_return_value);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(_pickle_Pickler___init____doc__,
 "Pickler(file, protocol=None, fix_imports=True)\n"
 "--\n"
@@ -191,6 +218,33 @@
     return return_value;
 }
 
+PyDoc_STRVAR(_pickle_Unpickler___sizeof____doc__,
+"__sizeof__($self, /)\n"
+"--\n"
+"\n"
+"Returns size in memory, in bytes.");
+
+#define _PICKLE_UNPICKLER___SIZEOF___METHODDEF    \
+    {"__sizeof__", (PyCFunction)_pickle_Unpickler___sizeof__, METH_NOARGS, _pickle_Unpickler___sizeof____doc__},
+
+static Py_ssize_t
+_pickle_Unpickler___sizeof___impl(UnpicklerObject *self);
+
+static PyObject *
+_pickle_Unpickler___sizeof__(UnpicklerObject *self, PyObject *Py_UNUSED(ignored))
+{
+    PyObject *return_value = NULL;
+    Py_ssize_t _return_value;
+
+    _return_value = _pickle_Unpickler___sizeof___impl(self);
+    if ((_return_value == -1) && PyErr_Occurred())
+        goto exit;
+    return_value = PyLong_FromSsize_t(_return_value);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(_pickle_Unpickler___init____doc__,
 "Unpickler(file, *, fix_imports=True, encoding=\'ASCII\', errors=\'strict\')\n"
 "--\n"
@@ -488,4 +542,4 @@
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=f965b6c7018c898d input=a9049054013a1b77]*/
+/*[clinic end generated code: output=3aba79576e240c62 input=a9049054013a1b77]*/

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list