[Python-checkins] r64066 - in python/trunk: Lib/test/test_sys.py Objects/unicodeobject.c

robert.schuppenies python-checkins at python.org
Tue Jun 10 12:10:32 CEST 2008


Author: robert.schuppenies
Date: Tue Jun 10 12:10:31 2008
New Revision: 64066

Log:
Issue 3048: Fixed sys.getsizeof for unicode objects.


Modified:
   python/trunk/Lib/test/test_sys.py
   python/trunk/Objects/unicodeobject.c

Modified: python/trunk/Lib/test/test_sys.py
==============================================================================
--- python/trunk/Lib/test/test_sys.py	(original)
+++ python/trunk/Lib/test/test_sys.py	Tue Jun 10 12:10:31 2008
@@ -421,11 +421,14 @@
         self.file.close()
         test.test_support.unlink(test.test_support.TESTFN)
 
-    def check_sizeof(self, o, size):
+    def check_sizeof(self, o, size, size2=None):
+        """Check size of o. Possible are size and optionally size2)."""
         result = sys.getsizeof(o)
-        msg = 'wrong size for %s: got %d, expected %d' \
-            % (type(o), result, size)
-        self.assertEqual(result, size, msg)
+        msg = 'wrong size for %s: got %d, expected ' % (type(o), result)
+        if (size2 != None) and (result != size):
+            self.assertEqual(result, size2, msg + str(size2))
+        else:
+            self.assertEqual(result, size, msg + str(size))
 
     def align(self, value):
         mod = value % self.p
@@ -517,10 +520,10 @@
                 pass
         # type (PyTypeObject + PyNumberMethods +  PyMappingMethods +
         #       PySequenceMethods +  PyBufferProcs)
-        len_typeobject = p + 2*l + 15*p + l + 4*p + l + 9*p + l + 11*p
+        len_typeobject = p + 2*l + 15*p + l + 4*p + l + 9*p +\
+                         l + 11*p + self.align(4)
         self.check_sizeof(class_newstyle,
-                          h + len_typeobject + 42*p + 10*p + 3*p + 6*p)
-
+                          h + len_typeobject + 41*p + 10*p + 3*p + 6*p)
 
     def test_specialtypes(self):
         i = self.i
@@ -534,6 +537,24 @@
         # list
         self.check_sizeof([], h + l + p + l)
         self.check_sizeof([1, 2, 3], h + l + p + l + 3*l)
+        # unicode
+        import math
+        usize = math.log(sys.maxunicode + 1, 2) / 8
+        samples = [u'', u'1'*100]
+        # we need to test for both sizes, because we don't know if the string
+        # has been cached
+        for s in samples:
+            basicsize =  h + l + p + l + p + usize * (len(s) + 1)
+            self.check_sizeof(s, basicsize,\
+                                  size2=basicsize + sys.getsizeof(str(s)))
+        # XXX trigger caching encoded version as Python string
+        s = samples[1]
+        try:
+            getattr(sys, s)
+        except AttributeError:
+            pass
+        finally:
+            self.check_sizeof(s, basicsize + sys.getsizeof(str(s)))
 
         h += l
         # long

Modified: python/trunk/Objects/unicodeobject.c
==============================================================================
--- python/trunk/Objects/unicodeobject.c	(original)
+++ python/trunk/Objects/unicodeobject.c	Tue Jun 10 12:10:31 2008
@@ -7895,6 +7895,29 @@
 \n\
 ");
 
+static PyObject *
+unicode__sizeof__(PyUnicodeObject *v)
+{
+    PyObject *res = NULL, *defsize = NULL;
+
+    res = PyInt_FromSsize_t(sizeof(PyUnicodeObject) +
+                            sizeof(Py_UNICODE) * (v->length + 1));
+    if (v->defenc) {
+        defsize = PyObject_CallMethod(v->defenc, "__sizeof__", NULL);
+        if (defsize == NULL) {
+            Py_DECREF(res);
+            return NULL;
+        }
+        res = PyNumber_Add(res, defsize);
+        Py_DECREF(defsize);
+    }
+    return res;
+}
+
+PyDoc_STRVAR(sizeof__doc__,
+"S.__sizeof__() -> size of S in memory, in bytes\n\
+\n\
+");
 
 static PyObject *
 unicode_getnewargs(PyUnicodeObject *v)
@@ -7952,6 +7975,7 @@
     {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__},
     {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
     {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
+    {"__sizeof__", (PyCFunction) unicode__sizeof__, METH_NOARGS, sizeof__doc__},
 #if 0
     {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},
 #endif


More information about the Python-checkins mailing list