[Python-checkins] bpo-42536: GC track recycled tuples (GH-23623) (GH-23652)
pablogsal
webhook-mailer at python.org
Mon Dec 7 15:08:29 EST 2020
https://github.com/python/cpython/commit/7c797982383ebfd9cca39c480dcf6132979dd06f
commit: 7c797982383ebfd9cca39c480dcf6132979dd06f
branch: 3.8
author: Brandt Bucher <brandtbucher at gmail.com>
committer: pablogsal <Pablogsal at gmail.com>
date: 2020-12-07T20:08:24Z
summary:
bpo-42536: GC track recycled tuples (GH-23623) (GH-23652)
Several built-in and standard library types now ensure that their internal result tuples are always tracked by the garbage collector:
- collections.OrderedDict.items
- dict.items
- enumerate
- functools.reduce
- itertools.combinations
- itertools.combinations_with_replacement
- itertools.permutations
- itertools.product
- itertools.zip_longest
- zip
Previously, they could have become untracked by a prior garbage collection.
(cherry picked from commit 226a012d1cd61f42ecd3056c554922f359a1a35d)
files:
A Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst
M Lib/test/test_builtin.py
M Lib/test/test_dict.py
M Lib/test/test_enumerate.py
M Lib/test/test_itertools.py
M Lib/test/test_ordered_dict.py
M Modules/_functoolsmodule.c
M Modules/itertoolsmodule.c
M Objects/dictobject.c
M Objects/enumobject.c
M Objects/odictobject.c
M Python/bltinmodule.c
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index 4a498262ba637..729f15dc4b232 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -6,6 +6,7 @@
import collections
import decimal
import fractions
+import gc
import io
import locale
import os
@@ -27,7 +28,7 @@
from operator import neg
from test.support import (
EnvironmentVarGuard, TESTFN, check_warnings, swap_attr, unlink,
- maybe_get_event_loop_policy)
+ maybe_get_event_loop_policy, cpython_only)
from test.support.script_helper import assert_python_ok
from unittest.mock import MagicMock, patch
try:
@@ -1573,6 +1574,18 @@ def __iter__(self):
self.assertIs(cm.exception, exception)
+ @cpython_only
+ def test_zip_result_gc(self):
+ # bpo-42536: zip's tuple-reuse speed trick breaks the GC's assumptions
+ # about what can be untracked. Make sure we re-track result tuples
+ # whenever we reuse them.
+ it = zip([[]])
+ gc.collect()
+ # That GC collection probably untracked the recycled internal result
+ # tuple, which is initialized to (None,). Make sure it's re-tracked when
+ # it's mutated and returned from __next__:
+ self.assertTrue(gc.is_tracked(next(it)))
+
def test_format(self):
# Test the basic machinery of the format() builtin. Don't test
# the specifics of the various formatters
diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py
index de483ab552155..ce044c8684ff9 100644
--- a/Lib/test/test_dict.py
+++ b/Lib/test/test_dict.py
@@ -1377,6 +1377,25 @@ def items(self):
d = CustomReversedDict(pairs)
self.assertEqual(pairs[::-1], list(dict(d).items()))
+ @support.cpython_only
+ def test_dict_items_result_gc(self):
+ # bpo-42536: dict.items's tuple-reuse speed trick breaks the GC's
+ # assumptions about what can be untracked. Make sure we re-track result
+ # tuples whenever we reuse them.
+ it = iter({None: []}.items())
+ gc.collect()
+ # That GC collection probably untracked the recycled internal result
+ # tuple, which is initialized to (None, None). Make sure it's re-tracked
+ # when it's mutated and returned from __next__:
+ self.assertTrue(gc.is_tracked(next(it)))
+
+ @support.cpython_only
+ def test_dict_items_result_gc(self):
+ # Same as test_dict_items_result_gc above, but reversed.
+ it = reversed({None: []}.items())
+ gc.collect()
+ self.assertTrue(gc.is_tracked(next(it)))
+
class CAPITest(unittest.TestCase):
diff --git a/Lib/test/test_enumerate.py b/Lib/test/test_enumerate.py
index 5785cb46492ef..906bfc21a26ae 100644
--- a/Lib/test/test_enumerate.py
+++ b/Lib/test/test_enumerate.py
@@ -2,6 +2,7 @@
import operator
import sys
import pickle
+import gc
from test import support
@@ -134,6 +135,18 @@ def test_tuple_reuse(self):
self.assertEqual(len(set(map(id, list(enumerate(self.seq))))), len(self.seq))
self.assertEqual(len(set(map(id, enumerate(self.seq)))), min(1,len(self.seq)))
+ @support.cpython_only
+ def test_enumerate_result_gc(self):
+ # bpo-42536: enumerate's tuple-reuse speed trick breaks the GC's
+ # assumptions about what can be untracked. Make sure we re-track result
+ # tuples whenever we reuse them.
+ it = self.enum([[]])
+ gc.collect()
+ # That GC collection probably untracked the recycled internal result
+ # tuple, which is initialized to (None, None). Make sure it's re-tracked
+ # when it's mutated and returned from __next__:
+ self.assertTrue(gc.is_tracked(next(it)))
+
class MyEnum(enumerate):
pass
diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py
index eaa6197bec395..d7c09222b7009 100644
--- a/Lib/test/test_itertools.py
+++ b/Lib/test/test_itertools.py
@@ -12,6 +12,8 @@
import sys
import struct
import threading
+import gc
+
maxsize = support.MAX_Py_ssize_t
minsize = -maxsize-1
@@ -1554,6 +1556,51 @@ def test_StopIteration(self):
self.assertRaises(StopIteration, next, f(lambda x:x, []))
self.assertRaises(StopIteration, next, f(lambda x:x, StopNow()))
+ @support.cpython_only
+ def test_combinations_result_gc(self):
+ # bpo-42536: combinations's tuple-reuse speed trick breaks the GC's
+ # assumptions about what can be untracked. Make sure we re-track result
+ # tuples whenever we reuse them.
+ it = combinations([None, []], 1)
+ next(it)
+ gc.collect()
+ # That GC collection probably untracked the recycled internal result
+ # tuple, which has the value (None,). Make sure it's re-tracked when
+ # it's mutated and returned from __next__:
+ self.assertTrue(gc.is_tracked(next(it)))
+
+ @support.cpython_only
+ def test_combinations_with_replacement_result_gc(self):
+ # Ditto for combinations_with_replacement.
+ it = combinations_with_replacement([None, []], 1)
+ next(it)
+ gc.collect()
+ self.assertTrue(gc.is_tracked(next(it)))
+
+ @support.cpython_only
+ def test_permutations_result_gc(self):
+ # Ditto for permutations.
+ it = permutations([None, []], 1)
+ next(it)
+ gc.collect()
+ self.assertTrue(gc.is_tracked(next(it)))
+
+ @support.cpython_only
+ def test_product_result_gc(self):
+ # Ditto for product.
+ it = product([None, []])
+ next(it)
+ gc.collect()
+ self.assertTrue(gc.is_tracked(next(it)))
+
+ @support.cpython_only
+ def test_zip_longest_result_gc(self):
+ # Ditto for zip_longest.
+ it = zip_longest([[]])
+ gc.collect()
+ self.assertTrue(gc.is_tracked(next(it)))
+
+
class TestExamples(unittest.TestCase):
def test_accumulate(self):
diff --git a/Lib/test/test_ordered_dict.py b/Lib/test/test_ordered_dict.py
index 085e5f60ed93b..bcda8f83cea7c 100644
--- a/Lib/test/test_ordered_dict.py
+++ b/Lib/test/test_ordered_dict.py
@@ -654,6 +654,17 @@ def test_free_after_iterating(self):
support.check_free_after_iterating(self, lambda d: iter(d.values()), self.OrderedDict)
support.check_free_after_iterating(self, lambda d: iter(d.items()), self.OrderedDict)
+ @support.cpython_only
+ def test_ordered_dict_items_result_gc(self):
+ # bpo-42536: OrderedDict.items's tuple-reuse speed trick breaks the GC's
+ # assumptions about what can be untracked. Make sure we re-track result
+ # tuples whenever we reuse them.
+ it = iter(self.OrderedDict({None: []}).items())
+ gc.collect()
+ # That GC collection probably untracked the recycled internal result
+ # tuple, which is initialized to (None, None). Make sure it's re-tracked
+ # when it's mutated and returned from __next__:
+ self.assertTrue(gc.is_tracked(next(it)))
class PurePythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst b/Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst
new file mode 100644
index 0000000000000..6ccacab1f64f6
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst
@@ -0,0 +1,26 @@
+Several built-in and standard library types now ensure that their internal
+result tuples are always tracked by the :term:`garbage collector
+<garbage collection>`:
+
+- :meth:`collections.OrderedDict.items() <collections.OrderedDict>`
+
+- :meth:`dict.items`
+
+- :func:`enumerate`
+
+- :func:`functools.reduce`
+
+- :func:`itertools.combinations`
+
+- :func:`itertools.combinations_with_replacement`
+
+- :func:`itertools.permutations`
+
+- :func:`itertools.product`
+
+- :func:`itertools.zip_longest`
+
+- :func:`zip`
+
+Previously, they could have become untracked by a prior garbage collection.
+Patch by Brandt Bucher.
diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c
index a101363bf02a0..148d051714f89 100644
--- a/Modules/_functoolsmodule.c
+++ b/Modules/_functoolsmodule.c
@@ -3,6 +3,7 @@
#include "pycore_pystate.h"
#include "pycore_tupleobject.h"
#include "structmember.h"
+#include "pycore_object.h" // _PyObject_GC_TRACK
/* _functools module written and maintained
by Hye-Shik Chang <perky at FreeBSD.org>
@@ -633,6 +634,11 @@ functools_reduce(PyObject *self, PyObject *args)
if ((result = PyObject_Call(func, args, NULL)) == NULL) {
goto Fail;
}
+ // bpo-42536: The GC may have untracked this args tuple. Since we're
+ // recycling it, make sure it's tracked again:
+ if (!_PyObject_GC_IS_TRACKED(args)) {
+ _PyObject_GC_TRACK(args);
+ }
}
}
diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c
index eba59ba1b8803..eb04c26beb81d 100644
--- a/Modules/itertoolsmodule.c
+++ b/Modules/itertoolsmodule.c
@@ -3,6 +3,7 @@
#include "Python.h"
#include "pycore_tupleobject.h"
#include "structmember.h"
+#include "pycore_object.h" // _PyObject_GC_TRACK()
/* Itertools module written and maintained
by Raymond D. Hettinger <python at rcn.com>
@@ -2255,6 +2256,11 @@ product_next(productobject *lz)
lz->result = result;
Py_DECREF(old_result);
}
+ // bpo-42536: The GC may have untracked this result tuple. Since we're
+ // recycling it, make sure it's tracked again:
+ else if (!_PyObject_GC_IS_TRACKED(result)) {
+ _PyObject_GC_TRACK(result);
+ }
/* Now, we've got the only copy so we can update it in-place */
assert (npools==0 || Py_REFCNT(result) == 1);
@@ -2580,6 +2586,11 @@ combinations_next(combinationsobject *co)
co->result = result;
Py_DECREF(old_result);
}
+ // bpo-42536: The GC may have untracked this result tuple. Since we're
+ // recycling it, make sure it's tracked again:
+ else if (!_PyObject_GC_IS_TRACKED(result)) {
+ _PyObject_GC_TRACK(result);
+ }
/* Now, we've got the only copy so we can update it in-place
* CPython's empty tuple is a singleton and cached in
* PyTuple's freelist.
@@ -2916,6 +2927,11 @@ cwr_next(cwrobject *co)
co->result = result;
Py_DECREF(old_result);
}
+ // bpo-42536: The GC may have untracked this result tuple. Since we're
+ // recycling it, make sure it's tracked again:
+ else if (!_PyObject_GC_IS_TRACKED(result)) {
+ _PyObject_GC_TRACK(result);
+ }
/* Now, we've got the only copy so we can update it in-place CPython's
empty tuple is a singleton and cached in PyTuple's freelist. */
assert(r == 0 || Py_REFCNT(result) == 1);
@@ -3259,6 +3275,11 @@ permutations_next(permutationsobject *po)
po->result = result;
Py_DECREF(old_result);
}
+ // bpo-42536: The GC may have untracked this result tuple. Since we're
+ // recycling it, make sure it's tracked again:
+ else if (!_PyObject_GC_IS_TRACKED(result)) {
+ _PyObject_GC_TRACK(result);
+ }
/* Now, we've got the only copy so we can update it in-place */
assert(r == 0 || Py_REFCNT(result) == 1);
@@ -4536,6 +4557,11 @@ zip_longest_next(ziplongestobject *lz)
PyTuple_SET_ITEM(result, i, item);
Py_DECREF(olditem);
}
+ // bpo-42536: The GC may have untracked this result tuple. Since we're
+ // recycling it, make sure it's tracked again:
+ if (!_PyObject_GC_IS_TRACKED(result)) {
+ _PyObject_GC_TRACK(result);
+ }
} else {
result = PyTuple_New(tuplesize);
if (result == NULL)
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 3c56f4a515e8a..edc02372a1d92 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -3769,6 +3769,11 @@ dictiter_iternextitem(dictiterobject *di)
Py_INCREF(result);
Py_DECREF(oldkey);
Py_DECREF(oldvalue);
+ // bpo-42536: The GC may have untracked this result tuple. Since we're
+ // recycling it, make sure it's tracked again:
+ if (!_PyObject_GC_IS_TRACKED(result)) {
+ _PyObject_GC_TRACK(result);
+ }
}
else {
result = PyTuple_New(2);
@@ -3884,6 +3889,11 @@ dictreviter_iternext(dictiterobject *di)
Py_INCREF(result);
Py_DECREF(oldkey);
Py_DECREF(oldvalue);
+ // bpo-42536: The GC may have untracked this result tuple. Since
+ // we're recycling it, make sure it's tracked again:
+ if (!_PyObject_GC_IS_TRACKED(result)) {
+ _PyObject_GC_TRACK(result);
+ }
}
else {
result = PyTuple_New(2);
diff --git a/Objects/enumobject.c b/Objects/enumobject.c
index 4786297c41ace..478a9dd93a32c 100644
--- a/Objects/enumobject.c
+++ b/Objects/enumobject.c
@@ -1,6 +1,7 @@
/* enumerate object */
#include "Python.h"
+#include "pycore_object.h" // _PyObject_GC_TRACK()
#include "clinic/enumobject.c.h"
@@ -130,6 +131,11 @@ enum_next_long(enumobject *en, PyObject* next_item)
PyTuple_SET_ITEM(result, 1, next_item);
Py_DECREF(old_index);
Py_DECREF(old_item);
+ // bpo-42536: The GC may have untracked this result tuple. Since we're
+ // recycling it, make sure it's tracked again:
+ if (!_PyObject_GC_IS_TRACKED(result)) {
+ _PyObject_GC_TRACK(result);
+ }
return result;
}
result = PyTuple_New(2);
@@ -175,6 +181,11 @@ enum_next(enumobject *en)
PyTuple_SET_ITEM(result, 1, next_item);
Py_DECREF(old_index);
Py_DECREF(old_item);
+ // bpo-42536: The GC may have untracked this result tuple. Since we're
+ // recycling it, make sure it's tracked again:
+ if (!_PyObject_GC_IS_TRACKED(result)) {
+ _PyObject_GC_TRACK(result);
+ }
return result;
}
result = PyTuple_New(2);
diff --git a/Objects/odictobject.c b/Objects/odictobject.c
index ac0da9bd5bae3..6076b03455f62 100644
--- a/Objects/odictobject.c
+++ b/Objects/odictobject.c
@@ -1766,6 +1766,11 @@ odictiter_iternext(odictiterobject *di)
Py_INCREF(result);
Py_DECREF(PyTuple_GET_ITEM(result, 0)); /* borrowed */
Py_DECREF(PyTuple_GET_ITEM(result, 1)); /* borrowed */
+ // bpo-42536: The GC may have untracked this result tuple. Since we're
+ // recycling it, make sure it's tracked again:
+ if (!_PyObject_GC_IS_TRACKED(result)) {
+ _PyObject_GC_TRACK(result);
+ }
}
else {
result = PyTuple_New(2);
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index e42d5f246c37a..3767f552bb17a 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -4,6 +4,7 @@
#include <ctype.h>
#include "ast.h"
#undef Yield /* undefine macro conflicting with <winbase.h> */
+#include "pycore_object.h" // _PyObject_GC_TRACK()
#include "pycore_pystate.h"
#include "pycore_tupleobject.h"
@@ -2618,6 +2619,11 @@ zip_next(zipobject *lz)
PyTuple_SET_ITEM(result, i, item);
Py_DECREF(olditem);
}
+ // bpo-42536: The GC may have untracked this result tuple. Since we're
+ // recycling it, make sure it's tracked again:
+ if (!_PyObject_GC_IS_TRACKED(result)) {
+ _PyObject_GC_TRACK(result);
+ }
} else {
result = PyTuple_New(tuplesize);
if (result == NULL)
More information about the Python-checkins
mailing list