[Python-checkins] bpo-42536: GC track recycled tuples (GH-23623) (GH-23651)

pablogsal webhook-mailer at python.org
Mon Dec 7 15:07:53 EST 2020


https://github.com/python/cpython/commit/60463e8e4f79e5b5e96dc43fb83ded373b489e33
commit: 60463e8e4f79e5b5e96dc43fb83ded373b489e33
branch: 3.9
author: Brandt Bucher <brandtbucher at gmail.com>
committer: pablogsal <Pablogsal at gmail.com>
date: 2020-12-07T20:07:48Z
summary:

bpo-42536: GC track recycled tuples (GH-23623) (GH-23651)

Several built-in and standard library types now ensure that their internal result tuples are always tracked by the garbage collector:

- collections.OrderedDict.items
- dict.items
- enumerate
- functools.reduce
- itertools.combinations
- itertools.combinations_with_replacement
- itertools.permutations
- itertools.product
- itertools.zip_longest
- zip

Previously, they could have become untracked by a prior garbage collection.
(cherry picked from commit 226a012d1cd61f42ecd3056c554922f359a1a35d)

files:
A Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst
M Lib/test/test_builtin.py
M Lib/test/test_dict.py
M Lib/test/test_enumerate.py
M Lib/test/test_itertools.py
M Lib/test/test_ordered_dict.py
M Modules/_functoolsmodule.c
M Modules/itertoolsmodule.c
M Objects/dictobject.c
M Objects/enumobject.c
M Objects/odictobject.c
M Python/bltinmodule.c

diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index 4df1b95bca79a..d009f57e47e05 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -6,6 +6,7 @@
 import collections
 import decimal
 import fractions
+import gc
 import io
 import locale
 import os
@@ -1606,6 +1607,18 @@ def __iter__(self):
 
         self.assertIs(cm.exception, exception)
 
+    @support.cpython_only
+    def test_zip_result_gc(self):
+        # bpo-42536: zip's tuple-reuse speed trick breaks the GC's assumptions
+        # about what can be untracked. Make sure we re-track result tuples
+        # whenever we reuse them.
+        it = zip([[]])
+        gc.collect()
+        # That GC collection probably untracked the recycled internal result
+        # tuple, which is initialized to (None,). Make sure it's re-tracked when
+        # it's mutated and returned from __next__:
+        self.assertTrue(gc.is_tracked(next(it)))
+
     def test_format(self):
         # Test the basic machinery of the format() builtin.  Don't test
         #  the specifics of the various formatters
diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py
index 6b8596fff6a9f..b10c07534ddb2 100644
--- a/Lib/test/test_dict.py
+++ b/Lib/test/test_dict.py
@@ -1422,6 +1422,25 @@ def items(self):
         d = CustomReversedDict(pairs)
         self.assertEqual(pairs[::-1], list(dict(d).items()))
 
+    @support.cpython_only
+    def test_dict_items_result_gc(self):
+        # bpo-42536: dict.items's tuple-reuse speed trick breaks the GC's
+        # assumptions about what can be untracked. Make sure we re-track result
+        # tuples whenever we reuse them.
+        it = iter({None: []}.items())
+        gc.collect()
+        # That GC collection probably untracked the recycled internal result
+        # tuple, which is initialized to (None, None). Make sure it's re-tracked
+        # when it's mutated and returned from __next__:
+        self.assertTrue(gc.is_tracked(next(it)))
+
+    @support.cpython_only
+    def test_dict_items_result_gc(self):
+        # Same as test_dict_items_result_gc above, but reversed.
+        it = reversed({None: []}.items())
+        gc.collect()
+        self.assertTrue(gc.is_tracked(next(it)))
+
 
 class CAPITest(unittest.TestCase):
 
diff --git a/Lib/test/test_enumerate.py b/Lib/test/test_enumerate.py
index 5785cb46492ef..906bfc21a26ae 100644
--- a/Lib/test/test_enumerate.py
+++ b/Lib/test/test_enumerate.py
@@ -2,6 +2,7 @@
 import operator
 import sys
 import pickle
+import gc
 
 from test import support
 
@@ -134,6 +135,18 @@ def test_tuple_reuse(self):
         self.assertEqual(len(set(map(id, list(enumerate(self.seq))))), len(self.seq))
         self.assertEqual(len(set(map(id, enumerate(self.seq)))), min(1,len(self.seq)))
 
+    @support.cpython_only
+    def test_enumerate_result_gc(self):
+        # bpo-42536: enumerate's tuple-reuse speed trick breaks the GC's
+        # assumptions about what can be untracked. Make sure we re-track result
+        # tuples whenever we reuse them.
+        it = self.enum([[]])
+        gc.collect()
+        # That GC collection probably untracked the recycled internal result
+        # tuple, which is initialized to (None, None). Make sure it's re-tracked
+        # when it's mutated and returned from __next__:
+        self.assertTrue(gc.is_tracked(next(it)))
+
 class MyEnum(enumerate):
     pass
 
diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py
index 702cf0820316b..7101264284919 100644
--- a/Lib/test/test_itertools.py
+++ b/Lib/test/test_itertools.py
@@ -12,6 +12,8 @@
 import sys
 import struct
 import threading
+import gc
+
 maxsize = support.MAX_Py_ssize_t
 minsize = -maxsize-1
 
@@ -1554,6 +1556,51 @@ def test_StopIteration(self):
             self.assertRaises(StopIteration, next, f(lambda x:x, []))
             self.assertRaises(StopIteration, next, f(lambda x:x, StopNow()))
 
+    @support.cpython_only
+    def test_combinations_result_gc(self):
+        # bpo-42536: combinations's tuple-reuse speed trick breaks the GC's
+        # assumptions about what can be untracked. Make sure we re-track result
+        # tuples whenever we reuse them.
+        it = combinations([None, []], 1)
+        next(it)
+        gc.collect()
+        # That GC collection probably untracked the recycled internal result
+        # tuple, which has the value (None,). Make sure it's re-tracked when
+        # it's mutated and returned from __next__:
+        self.assertTrue(gc.is_tracked(next(it)))
+
+    @support.cpython_only
+    def test_combinations_with_replacement_result_gc(self):
+        # Ditto for combinations_with_replacement.
+        it = combinations_with_replacement([None, []], 1)
+        next(it)
+        gc.collect()
+        self.assertTrue(gc.is_tracked(next(it)))
+
+    @support.cpython_only
+    def test_permutations_result_gc(self):
+        # Ditto for permutations.
+        it = permutations([None, []], 1)
+        next(it)
+        gc.collect()
+        self.assertTrue(gc.is_tracked(next(it)))
+
+    @support.cpython_only
+    def test_product_result_gc(self):
+        # Ditto for product.
+        it = product([None, []])
+        next(it)
+        gc.collect()
+        self.assertTrue(gc.is_tracked(next(it)))
+
+    @support.cpython_only
+    def test_zip_longest_result_gc(self):
+        # Ditto for zip_longest.
+        it = zip_longest([[]])
+        gc.collect()
+        self.assertTrue(gc.is_tracked(next(it)))
+
+
 class TestExamples(unittest.TestCase):
 
     def test_accumulate(self):
diff --git a/Lib/test/test_ordered_dict.py b/Lib/test/test_ordered_dict.py
index fdea44e4d8596..9df4efbfffa22 100644
--- a/Lib/test/test_ordered_dict.py
+++ b/Lib/test/test_ordered_dict.py
@@ -697,6 +697,17 @@ def test_merge_operator(self):
         with self.assertRaises(ValueError):
             a |= "BAD"
 
+    @support.cpython_only
+    def test_ordered_dict_items_result_gc(self):
+        # bpo-42536: OrderedDict.items's tuple-reuse speed trick breaks the GC's
+        # assumptions about what can be untracked. Make sure we re-track result
+        # tuples whenever we reuse them.
+        it = iter(self.OrderedDict({None: []}).items())
+        gc.collect()
+        # That GC collection probably untracked the recycled internal result
+        # tuple, which is initialized to (None, None). Make sure it's re-tracked
+        # when it's mutated and returned from __next__:
+        self.assertTrue(gc.is_tracked(next(it)))
 
 class PurePythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst b/Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst
new file mode 100644
index 0000000000000..6ccacab1f64f6
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst	
@@ -0,0 +1,26 @@
+Several built-in and standard library types now ensure that their internal
+result tuples are always tracked by the :term:`garbage collector
+<garbage collection>`:
+
+- :meth:`collections.OrderedDict.items() <collections.OrderedDict>`
+
+- :meth:`dict.items`
+
+- :func:`enumerate`
+
+- :func:`functools.reduce`
+
+- :func:`itertools.combinations`
+
+- :func:`itertools.combinations_with_replacement`
+
+- :func:`itertools.permutations`
+
+- :func:`itertools.product`
+
+- :func:`itertools.zip_longest`
+
+- :func:`zip`
+
+Previously, they could have become untracked by a prior garbage collection.
+Patch by Brandt Bucher.
diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c
index d158d3bae157b..42764a181d26b 100644
--- a/Modules/_functoolsmodule.c
+++ b/Modules/_functoolsmodule.c
@@ -1,4 +1,5 @@
 #include "Python.h"
+#include "pycore_object.h"        // _PyObject_GC_TRACK
 #include "pycore_pystate.h"       // _PyThreadState_GET()
 #include "pycore_tupleobject.h"
 #include "structmember.h"         // PyMemberDef
@@ -673,6 +674,11 @@ functools_reduce(PyObject *self, PyObject *args)
             if ((result = PyObject_Call(func, args, NULL)) == NULL) {
                 goto Fail;
             }
+            // bpo-42536: The GC may have untracked this args tuple. Since we're
+            // recycling it, make sure it's tracked again:
+            if (!_PyObject_GC_IS_TRACKED(args)) {
+                _PyObject_GC_TRACK(args);
+            }
         }
     }
 
diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c
index 18fcebdf25b46..95ef8d79a1653 100644
--- a/Modules/itertoolsmodule.c
+++ b/Modules/itertoolsmodule.c
@@ -2,6 +2,7 @@
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
 #include "pycore_tupleobject.h"
+#include "pycore_object.h"        // _PyObject_GC_TRACK()
 #include <stddef.h>               // offsetof()
 
 /* Itertools module written and maintained
@@ -2245,6 +2246,11 @@ product_next(productobject *lz)
             lz->result = result;
             Py_DECREF(old_result);
         }
+        // bpo-42536: The GC may have untracked this result tuple. Since we're
+        // recycling it, make sure it's tracked again:
+        else if (!_PyObject_GC_IS_TRACKED(result)) {
+            _PyObject_GC_TRACK(result);
+        }
         /* Now, we've got the only copy so we can update it in-place */
         assert (npools==0 || Py_REFCNT(result) == 1);
 
@@ -2568,6 +2574,11 @@ combinations_next(combinationsobject *co)
             co->result = result;
             Py_DECREF(old_result);
         }
+        // bpo-42536: The GC may have untracked this result tuple. Since we're
+        // recycling it, make sure it's tracked again:
+        else if (!_PyObject_GC_IS_TRACKED(result)) {
+            _PyObject_GC_TRACK(result);
+        }
         /* Now, we've got the only copy so we can update it in-place
          * CPython's empty tuple is a singleton and cached in
          * PyTuple's freelist.
@@ -2902,6 +2913,11 @@ cwr_next(cwrobject *co)
             co->result = result;
             Py_DECREF(old_result);
         }
+        // bpo-42536: The GC may have untracked this result tuple. Since we're
+        // recycling it, make sure it's tracked again:
+        else if (!_PyObject_GC_IS_TRACKED(result)) {
+            _PyObject_GC_TRACK(result);
+        }
         /* Now, we've got the only copy so we can update it in-place CPython's
            empty tuple is a singleton and cached in PyTuple's freelist. */
         assert(r == 0 || Py_REFCNT(result) == 1);
@@ -3246,6 +3262,11 @@ permutations_next(permutationsobject *po)
             po->result = result;
             Py_DECREF(old_result);
         }
+        // bpo-42536: The GC may have untracked this result tuple. Since we're
+        // recycling it, make sure it's tracked again:
+        else if (!_PyObject_GC_IS_TRACKED(result)) {
+            _PyObject_GC_TRACK(result);
+        }
         /* Now, we've got the only copy so we can update it in-place */
         assert(r == 0 || Py_REFCNT(result) == 1);
 
@@ -4515,6 +4536,11 @@ zip_longest_next(ziplongestobject *lz)
             PyTuple_SET_ITEM(result, i, item);
             Py_DECREF(olditem);
         }
+        // bpo-42536: The GC may have untracked this result tuple. Since we're
+        // recycling it, make sure it's tracked again:
+        if (!_PyObject_GC_IS_TRACKED(result)) {
+            _PyObject_GC_TRACK(result);
+        }
     } else {
         result = PyTuple_New(tuplesize);
         if (result == NULL)
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index faee6bc901eeb..8a056530a4545 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -3861,6 +3861,11 @@ dictiter_iternextitem(dictiterobject *di)
         Py_INCREF(result);
         Py_DECREF(oldkey);
         Py_DECREF(oldvalue);
+        // bpo-42536: The GC may have untracked this result tuple. Since we're
+        // recycling it, make sure it's tracked again:
+        if (!_PyObject_GC_IS_TRACKED(result)) {
+            _PyObject_GC_TRACK(result);
+        }
     }
     else {
         result = PyTuple_New(2);
@@ -3976,6 +3981,11 @@ dictreviter_iternext(dictiterobject *di)
             Py_INCREF(result);
             Py_DECREF(oldkey);
             Py_DECREF(oldvalue);
+            // bpo-42536: The GC may have untracked this result tuple. Since
+            // we're recycling it, make sure it's tracked again:
+            if (!_PyObject_GC_IS_TRACKED(result)) {
+                _PyObject_GC_TRACK(result);
+            }
         }
         else {
             result = PyTuple_New(2);
diff --git a/Objects/enumobject.c b/Objects/enumobject.c
index 4a83bb45aa667..bdd0ea5f3971b 100644
--- a/Objects/enumobject.c
+++ b/Objects/enumobject.c
@@ -1,6 +1,7 @@
 /* enumerate object */
 
 #include "Python.h"
+#include "pycore_object.h"        // _PyObject_GC_TRACK()
 
 #include "clinic/enumobject.c.h"
 
@@ -130,6 +131,11 @@ enum_next_long(enumobject *en, PyObject* next_item)
         PyTuple_SET_ITEM(result, 1, next_item);
         Py_DECREF(old_index);
         Py_DECREF(old_item);
+        // bpo-42536: The GC may have untracked this result tuple. Since we're
+        // recycling it, make sure it's tracked again:
+        if (!_PyObject_GC_IS_TRACKED(result)) {
+            _PyObject_GC_TRACK(result);
+        }
         return result;
     }
     result = PyTuple_New(2);
@@ -175,6 +181,11 @@ enum_next(enumobject *en)
         PyTuple_SET_ITEM(result, 1, next_item);
         Py_DECREF(old_index);
         Py_DECREF(old_item);
+        // bpo-42536: The GC may have untracked this result tuple. Since we're
+        // recycling it, make sure it's tracked again:
+        if (!_PyObject_GC_IS_TRACKED(result)) {
+            _PyObject_GC_TRACK(result);
+        }
         return result;
     }
     result = PyTuple_New(2);
diff --git a/Objects/odictobject.c b/Objects/odictobject.c
index d5bf499575d09..b9f2169a72a8a 100644
--- a/Objects/odictobject.c
+++ b/Objects/odictobject.c
@@ -1817,6 +1817,11 @@ odictiter_iternext(odictiterobject *di)
         Py_INCREF(result);
         Py_DECREF(PyTuple_GET_ITEM(result, 0));  /* borrowed */
         Py_DECREF(PyTuple_GET_ITEM(result, 1));  /* borrowed */
+        // bpo-42536: The GC may have untracked this result tuple. Since we're
+        // recycling it, make sure it's tracked again:
+        if (!_PyObject_GC_IS_TRACKED(result)) {
+            _PyObject_GC_TRACK(result);
+        }
     }
     else {
         result = PyTuple_New(2);
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index 199b09c4d8c41..614012df9b12a 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -2619,6 +2619,11 @@ zip_next(zipobject *lz)
             PyTuple_SET_ITEM(result, i, item);
             Py_DECREF(olditem);
         }
+        // bpo-42536: The GC may have untracked this result tuple. Since we're
+        // recycling it, make sure it's tracked again:
+        if (!_PyObject_GC_IS_TRACKED(result)) {
+            _PyObject_GC_TRACK(result);
+        }
     } else {
         result = PyTuple_New(tuplesize);
         if (result == NULL)



More information about the Python-checkins mailing list