Python-checkins
Threads by month
- ----- 2025 -----
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2008 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2007 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2006 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2005 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2004 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2003 -----
- December
- November
- October
- September
- August
March 2025
- 1 participants
- 715 discussions

[3.12] gh-126033: fix UAF in `xml.etree.ElementTree.Element.remove` when concurrent mutations happen (GH-126124) (#131930)
by picnixz March 31, 2025
by picnixz March 31, 2025
March 31, 2025
https://github.com/python/cpython/commit/5d4e89141137d98a546f81210c0c449ab1…
commit: 5d4e89141137d98a546f81210c0c449ab1d76937
branch: 3.12
author: Miss Islington (bot) <31488909+miss-islington(a)users.noreply.github.com>
committer: picnixz <10796600+picnixz(a)users.noreply.github.com>
date: 2025-03-31T14:50:13+02:00
summary:
[3.12] gh-126033: fix UAF in `xml.etree.ElementTree.Element.remove` when concurrent mutations happen (GH-126124) (#131930)
gh-126033: fix UAF in `xml.etree.ElementTree.Element.remove` when concurrent mutations happen (GH-126124)
(cherry picked from commit bab1398a47f6d0cfc1be70497f306874c749ef7c)
Co-authored-by: Bénédikt Tran <10796600+picnixz(a)users.noreply.github.com>
files:
A Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst
M Lib/test/test_xml_etree.py
M Modules/_elementtree.c
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 137e23867c5939..f1b420ed7944a9 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -18,9 +18,11 @@
import textwrap
import types
import unittest
+import unittest.mock as mock
import warnings
import weakref
+from contextlib import nullcontext
from functools import partial
from itertools import product, islice
from test import support
@@ -121,6 +123,21 @@
</foo>
"""
+def is_python_implementation():
+ assert ET is not None, "ET must be initialized"
+ assert pyET is not None, "pyET must be initialized"
+ return ET is pyET
+
+
+def equal_wrapper(cls):
+ """Mock cls.__eq__ to check whether it has been called or not.
+
+ The behaviour of cls.__eq__ (side-effects included) is left as is.
+ """
+ eq = cls.__eq__
+ return mock.patch.object(cls, "__eq__", autospec=True, wraps=eq)
+
+
def checkwarnings(*filters, quiet=False):
def decorator(test):
def newtest(*args, **kwargs):
@@ -2562,6 +2579,7 @@ def test_pickle_issue18997(self):
class BadElementTest(ElementTestCase, unittest.TestCase):
+
def test_extend_mutable_list(self):
class X:
@property
@@ -2600,18 +2618,168 @@ class Y(X, ET.Element):
e = ET.Element('foo')
e.extend(L)
- def test_remove_with_mutating(self):
- class X(ET.Element):
+ def test_remove_with_clear_assume_missing(self):
+ # gh-126033: Check that a concurrent clear() for an assumed-to-be
+ # missing element does not make the interpreter crash.
+ self.do_test_remove_with_clear(raises=True)
+
+ def test_remove_with_clear_assume_existing(self):
+ # gh-126033: Check that a concurrent clear() for an assumed-to-be
+ # existing element does not make the interpreter crash.
+ self.do_test_remove_with_clear(raises=False)
+
+ def do_test_remove_with_clear(self, *, raises):
+
+ # Until the discrepency between "del root[:]" and "root.clear()" is
+ # resolved, we need to keep two tests. Previously, using "del root[:]"
+ # did not crash with the reproducer of gh-126033 while "root.clear()"
+ # did.
+
+ class E(ET.Element):
+ """Local class to be able to mock E.__eq__ for introspection."""
+
+ class X(E):
def __eq__(self, o):
- del e[:]
- return False
- e = ET.Element('foo')
- e.extend([X('bar')])
- self.assertRaises(ValueError, e.remove, ET.Element('baz'))
+ del root[:]
+ return not raises
- e = ET.Element('foo')
- e.extend([ET.Element('bar')])
- self.assertRaises(ValueError, e.remove, X('baz'))
+ class Y(E):
+ def __eq__(self, o):
+ root.clear()
+ return not raises
+
+ if raises:
+ get_checker_context = lambda: self.assertRaises(ValueError)
+ else:
+ get_checker_context = nullcontext
+
+ self.assertIs(E.__eq__, object.__eq__)
+
+ for Z, side_effect in [(X, 'del root[:]'), (Y, 'root.clear()')]:
+ self.enterContext(self.subTest(side_effect=side_effect))
+
+ # test removing R() from [U()]
+ for R, U, description in [
+ (E, Z, "remove missing E() from [Z()]"),
+ (Z, E, "remove missing Z() from [E()]"),
+ (Z, Z, "remove missing Z() from [Z()]"),
+ ]:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one')])
+ with get_checker_context():
+ root.remove(R('missing'))
+
+ # test removing R() from [U(), V()]
+ cases = self.cases_for_remove_missing_with_mutations(E, Z)
+ for R, U, V, description in cases:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one'), V('two')])
+ with get_checker_context():
+ root.remove(R('missing'))
+
+ # Test removing root[0] from [Z()].
+ #
+ # Since we call root.remove() with root[0], Z.__eq__()
+ # will not be called (we branch on the fast Py_EQ path).
+ with self.subTest("remove root[0] from [Z()]"):
+ root = E('top')
+ root.append(Z('rem'))
+ with equal_wrapper(E) as f, equal_wrapper(Z) as g:
+ root.remove(root[0])
+ f.assert_not_called()
+ g.assert_not_called()
+
+ # Test removing root[1] (of type R) from [U(), R()].
+ is_special = is_python_implementation() and raises and Z is Y
+ if is_python_implementation() and raises and Z is Y:
+ # In pure Python, using root.clear() sets the children
+ # list to [] without calling list.clear().
+ #
+ # For this reason, the call to root.remove() first
+ # checks root[0] and sets the children list to []
+ # since either root[0] or root[1] is an evil element.
+ #
+ # Since checking root[1] still uses the old reference
+ # to the children list, PyObject_RichCompareBool() branches
+ # to the fast Py_EQ path and Y.__eq__() is called exactly
+ # once (when checking root[0]).
+ continue
+ else:
+ cases = self.cases_for_remove_existing_with_mutations(E, Z)
+ for R, U, description in cases:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one'), R('rem')])
+ with get_checker_context():
+ root.remove(root[1])
+
+ def test_remove_with_mutate_root_assume_missing(self):
+ # gh-126033: Check that a concurrent mutation for an assumed-to-be
+ # missing element does not make the interpreter crash.
+ self.do_test_remove_with_mutate_root(raises=True)
+
+ def test_remove_with_mutate_root_assume_existing(self):
+ # gh-126033: Check that a concurrent mutation for an assumed-to-be
+ # existing element does not make the interpreter crash.
+ self.do_test_remove_with_mutate_root(raises=False)
+
+ def do_test_remove_with_mutate_root(self, *, raises):
+ E = ET.Element
+
+ class Z(E):
+ def __eq__(self, o):
+ del root[0]
+ return not raises
+
+ if raises:
+ get_checker_context = lambda: self.assertRaises(ValueError)
+ else:
+ get_checker_context = nullcontext
+
+ # test removing R() from [U(), V()]
+ cases = self.cases_for_remove_missing_with_mutations(E, Z)
+ for R, U, V, description in cases:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one'), V('two')])
+ with get_checker_context():
+ root.remove(R('missing'))
+
+ # test removing root[1] (of type R) from [U(), R()]
+ cases = self.cases_for_remove_existing_with_mutations(E, Z)
+ for R, U, description in cases:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one'), R('rem')])
+ with get_checker_context():
+ root.remove(root[1])
+
+ def cases_for_remove_missing_with_mutations(self, E, Z):
+ # Cases for removing R() from [U(), V()].
+ # The case U = V = R = E is not interesting as there is no mutation.
+ for U, V in [(E, Z), (Z, E), (Z, Z)]:
+ description = (f"remove missing {E.__name__}() from "
+ f"[{U.__name__}(), {V.__name__}()]")
+ yield E, U, V, description
+
+ for U, V in [(E, E), (E, Z), (Z, E), (Z, Z)]:
+ description = (f"remove missing {Z.__name__}() from "
+ f"[{U.__name__}(), {V.__name__}()]")
+ yield Z, U, V, description
+
+ def cases_for_remove_existing_with_mutations(self, E, Z):
+ # Cases for removing root[1] (of type R) from [U(), R()].
+ # The case U = R = E is not interesting as there is no mutation.
+ for U, R, description in [
+ (E, Z, "remove root[1] from [E(), Z()]"),
+ (Z, E, "remove root[1] from [Z(), E()]"),
+ (Z, Z, "remove root[1] from [Z(), Z()]"),
+ ]:
+ description = (f"remove root[1] (of type {R.__name__}) "
+ f"from [{U.__name__}(), {R.__name__}()]")
+ yield R, U, description
@support.infinite_recursion(25)
def test_recursive_repr(self):
diff --git a/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst b/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst
new file mode 100644
index 00000000000000..fa09c712aa0c4a
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst
@@ -0,0 +1,3 @@
+:mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.remove
+<xml.etree.ElementTree.Element.remove>` when the element is
+concurrently mutated. Patch by Bénédikt Tran.
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index ad65eedcd6464f..56d1508af137da 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -845,6 +845,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
if (element_resize(element, self->extra->length) < 0)
goto error;
+ // TODO(picnixz): check for an evil child's __deepcopy__ on 'self'
for (i = 0; i < self->extra->length; i++) {
PyObject* child = deepcopy(st, self->extra->children[i], memo);
if (!child || !Element_Check(st, child)) {
@@ -1627,42 +1628,47 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
/*[clinic end generated code: output=38fe6c07d6d87d1f input=6133e1d05597d5ee]*/
{
Py_ssize_t i;
- int rc;
- PyObject *found;
-
- if (!self->extra) {
- /* element has no children, so raise exception */
- PyErr_SetString(
- PyExc_ValueError,
- "list.remove(x): x not in list"
- );
- return NULL;
- }
-
- for (i = 0; i < self->extra->length; i++) {
- if (self->extra->children[i] == subelement)
- break;
- rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
- if (rc > 0)
+ // When iterating over the list of children, we need to check that the
+ // list is not cleared (self->extra != NULL) and that we are still within
+ // the correct bounds (i < self->extra->length).
+ //
+ // We deliberately avoid protecting against children lists that grow
+ // faster than the index since list objects do not protect against it.
+ int rc = 0;
+ for (i = 0; self->extra && i < self->extra->length; i++) {
+ if (self->extra->children[i] == subelement) {
+ rc = 1;
break;
- if (rc < 0)
+ }
+ PyObject *child = Py_NewRef(self->extra->children[i]);
+ rc = PyObject_RichCompareBool(child, subelement, Py_EQ);
+ Py_DECREF(child);
+ if (rc < 0) {
return NULL;
+ }
+ else if (rc > 0) {
+ break;
+ }
}
- if (i >= self->extra->length) {
- /* subelement is not in children, so raise exception */
- PyErr_SetString(
- PyExc_ValueError,
- "list.remove(x): x not in list"
- );
+ if (rc == 0) {
+ PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
return NULL;
}
- found = self->extra->children[i];
+ // An extra check must be done if the mutation occurs at the very last
+ // step and removes or clears the 'extra' list (the condition on the
+ // length would not be satisfied any more).
+ if (self->extra == NULL || i >= self->extra->length) {
+ Py_RETURN_NONE;
+ }
+
+ PyObject *found = self->extra->children[i];
self->extra->length--;
- for (; i < self->extra->length; i++)
+ for (; i < self->extra->length; i++) {
self->extra->children[i] = self->extra->children[i+1];
+ }
Py_DECREF(found);
Py_RETURN_NONE;
1
0

[3.13] gh-126033: fix UAF in `xml.etree.ElementTree.Element.remove` when concurrent mutations happen (GH-126124) (#131929)
by picnixz March 31, 2025
by picnixz March 31, 2025
March 31, 2025
https://github.com/python/cpython/commit/b41c8cc671f454743c076ced412eec0156…
commit: b41c8cc671f454743c076ced412eec01565d0625
branch: 3.13
author: Miss Islington (bot) <31488909+miss-islington(a)users.noreply.github.com>
committer: picnixz <10796600+picnixz(a)users.noreply.github.com>
date: 2025-03-31T14:50:03+02:00
summary:
[3.13] gh-126033: fix UAF in `xml.etree.ElementTree.Element.remove` when concurrent mutations happen (GH-126124) (#131929)
gh-126033: fix UAF in `xml.etree.ElementTree.Element.remove` when concurrent mutations happen (GH-126124)
(cherry picked from commit bab1398a47f6d0cfc1be70497f306874c749ef7c)
Co-authored-by: Bénédikt Tran <10796600+picnixz(a)users.noreply.github.com>
files:
A Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst
M Lib/test/test_xml_etree.py
M Modules/_elementtree.c
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 235f4b54fac50b..3878e3a9cd5732 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -18,9 +18,11 @@
import textwrap
import types
import unittest
+import unittest.mock as mock
import warnings
import weakref
+from contextlib import nullcontext
from functools import partial
from itertools import product, islice
from test import support
@@ -121,6 +123,21 @@
</foo>
"""
+def is_python_implementation():
+ assert ET is not None, "ET must be initialized"
+ assert pyET is not None, "pyET must be initialized"
+ return ET is pyET
+
+
+def equal_wrapper(cls):
+ """Mock cls.__eq__ to check whether it has been called or not.
+
+ The behaviour of cls.__eq__ (side-effects included) is left as is.
+ """
+ eq = cls.__eq__
+ return mock.patch.object(cls, "__eq__", autospec=True, wraps=eq)
+
+
def checkwarnings(*filters, quiet=False):
def decorator(test):
def newtest(*args, **kwargs):
@@ -2642,6 +2659,7 @@ def test_pickle_issue18997(self):
class BadElementTest(ElementTestCase, unittest.TestCase):
+
def test_extend_mutable_list(self):
class X:
@property
@@ -2680,18 +2698,168 @@ class Y(X, ET.Element):
e = ET.Element('foo')
e.extend(L)
- def test_remove_with_mutating(self):
- class X(ET.Element):
+ def test_remove_with_clear_assume_missing(self):
+ # gh-126033: Check that a concurrent clear() for an assumed-to-be
+ # missing element does not make the interpreter crash.
+ self.do_test_remove_with_clear(raises=True)
+
+ def test_remove_with_clear_assume_existing(self):
+ # gh-126033: Check that a concurrent clear() for an assumed-to-be
+ # existing element does not make the interpreter crash.
+ self.do_test_remove_with_clear(raises=False)
+
+ def do_test_remove_with_clear(self, *, raises):
+
+ # Until the discrepency between "del root[:]" and "root.clear()" is
+ # resolved, we need to keep two tests. Previously, using "del root[:]"
+ # did not crash with the reproducer of gh-126033 while "root.clear()"
+ # did.
+
+ class E(ET.Element):
+ """Local class to be able to mock E.__eq__ for introspection."""
+
+ class X(E):
def __eq__(self, o):
- del e[:]
- return False
- e = ET.Element('foo')
- e.extend([X('bar')])
- self.assertRaises(ValueError, e.remove, ET.Element('baz'))
+ del root[:]
+ return not raises
- e = ET.Element('foo')
- e.extend([ET.Element('bar')])
- self.assertRaises(ValueError, e.remove, X('baz'))
+ class Y(E):
+ def __eq__(self, o):
+ root.clear()
+ return not raises
+
+ if raises:
+ get_checker_context = lambda: self.assertRaises(ValueError)
+ else:
+ get_checker_context = nullcontext
+
+ self.assertIs(E.__eq__, object.__eq__)
+
+ for Z, side_effect in [(X, 'del root[:]'), (Y, 'root.clear()')]:
+ self.enterContext(self.subTest(side_effect=side_effect))
+
+ # test removing R() from [U()]
+ for R, U, description in [
+ (E, Z, "remove missing E() from [Z()]"),
+ (Z, E, "remove missing Z() from [E()]"),
+ (Z, Z, "remove missing Z() from [Z()]"),
+ ]:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one')])
+ with get_checker_context():
+ root.remove(R('missing'))
+
+ # test removing R() from [U(), V()]
+ cases = self.cases_for_remove_missing_with_mutations(E, Z)
+ for R, U, V, description in cases:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one'), V('two')])
+ with get_checker_context():
+ root.remove(R('missing'))
+
+ # Test removing root[0] from [Z()].
+ #
+ # Since we call root.remove() with root[0], Z.__eq__()
+ # will not be called (we branch on the fast Py_EQ path).
+ with self.subTest("remove root[0] from [Z()]"):
+ root = E('top')
+ root.append(Z('rem'))
+ with equal_wrapper(E) as f, equal_wrapper(Z) as g:
+ root.remove(root[0])
+ f.assert_not_called()
+ g.assert_not_called()
+
+ # Test removing root[1] (of type R) from [U(), R()].
+ is_special = is_python_implementation() and raises and Z is Y
+ if is_python_implementation() and raises and Z is Y:
+ # In pure Python, using root.clear() sets the children
+ # list to [] without calling list.clear().
+ #
+ # For this reason, the call to root.remove() first
+ # checks root[0] and sets the children list to []
+ # since either root[0] or root[1] is an evil element.
+ #
+ # Since checking root[1] still uses the old reference
+ # to the children list, PyObject_RichCompareBool() branches
+ # to the fast Py_EQ path and Y.__eq__() is called exactly
+ # once (when checking root[0]).
+ continue
+ else:
+ cases = self.cases_for_remove_existing_with_mutations(E, Z)
+ for R, U, description in cases:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one'), R('rem')])
+ with get_checker_context():
+ root.remove(root[1])
+
+ def test_remove_with_mutate_root_assume_missing(self):
+ # gh-126033: Check that a concurrent mutation for an assumed-to-be
+ # missing element does not make the interpreter crash.
+ self.do_test_remove_with_mutate_root(raises=True)
+
+ def test_remove_with_mutate_root_assume_existing(self):
+ # gh-126033: Check that a concurrent mutation for an assumed-to-be
+ # existing element does not make the interpreter crash.
+ self.do_test_remove_with_mutate_root(raises=False)
+
+ def do_test_remove_with_mutate_root(self, *, raises):
+ E = ET.Element
+
+ class Z(E):
+ def __eq__(self, o):
+ del root[0]
+ return not raises
+
+ if raises:
+ get_checker_context = lambda: self.assertRaises(ValueError)
+ else:
+ get_checker_context = nullcontext
+
+ # test removing R() from [U(), V()]
+ cases = self.cases_for_remove_missing_with_mutations(E, Z)
+ for R, U, V, description in cases:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one'), V('two')])
+ with get_checker_context():
+ root.remove(R('missing'))
+
+ # test removing root[1] (of type R) from [U(), R()]
+ cases = self.cases_for_remove_existing_with_mutations(E, Z)
+ for R, U, description in cases:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one'), R('rem')])
+ with get_checker_context():
+ root.remove(root[1])
+
+ def cases_for_remove_missing_with_mutations(self, E, Z):
+ # Cases for removing R() from [U(), V()].
+ # The case U = V = R = E is not interesting as there is no mutation.
+ for U, V in [(E, Z), (Z, E), (Z, Z)]:
+ description = (f"remove missing {E.__name__}() from "
+ f"[{U.__name__}(), {V.__name__}()]")
+ yield E, U, V, description
+
+ for U, V in [(E, E), (E, Z), (Z, E), (Z, Z)]:
+ description = (f"remove missing {Z.__name__}() from "
+ f"[{U.__name__}(), {V.__name__}()]")
+ yield Z, U, V, description
+
+ def cases_for_remove_existing_with_mutations(self, E, Z):
+ # Cases for removing root[1] (of type R) from [U(), R()].
+ # The case U = R = E is not interesting as there is no mutation.
+ for U, R, description in [
+ (E, Z, "remove root[1] from [E(), Z()]"),
+ (Z, E, "remove root[1] from [Z(), E()]"),
+ (Z, Z, "remove root[1] from [Z(), Z()]"),
+ ]:
+ description = (f"remove root[1] (of type {R.__name__}) "
+ f"from [{U.__name__}(), {R.__name__}()]")
+ yield R, U, description
@support.infinite_recursion(25)
def test_recursive_repr(self):
diff --git a/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst b/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst
new file mode 100644
index 00000000000000..fa09c712aa0c4a
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst
@@ -0,0 +1,3 @@
+:mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.remove
+<xml.etree.ElementTree.Element.remove>` when the element is
+concurrently mutated. Patch by Bénédikt Tran.
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index b89f455cd8f1ca..3dd60c2eace07e 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -847,6 +847,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
if (element_resize(element, self->extra->length) < 0)
goto error;
+ // TODO(picnixz): check for an evil child's __deepcopy__ on 'self'
for (i = 0; i < self->extra->length; i++) {
PyObject* child = deepcopy(st, self->extra->children[i], memo);
if (!child || !Element_Check(st, child)) {
@@ -1625,42 +1626,47 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
/*[clinic end generated code: output=38fe6c07d6d87d1f input=6133e1d05597d5ee]*/
{
Py_ssize_t i;
- int rc;
- PyObject *found;
-
- if (!self->extra) {
- /* element has no children, so raise exception */
- PyErr_SetString(
- PyExc_ValueError,
- "list.remove(x): x not in list"
- );
- return NULL;
- }
-
- for (i = 0; i < self->extra->length; i++) {
- if (self->extra->children[i] == subelement)
- break;
- rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
- if (rc > 0)
+ // When iterating over the list of children, we need to check that the
+ // list is not cleared (self->extra != NULL) and that we are still within
+ // the correct bounds (i < self->extra->length).
+ //
+ // We deliberately avoid protecting against children lists that grow
+ // faster than the index since list objects do not protect against it.
+ int rc = 0;
+ for (i = 0; self->extra && i < self->extra->length; i++) {
+ if (self->extra->children[i] == subelement) {
+ rc = 1;
break;
- if (rc < 0)
+ }
+ PyObject *child = Py_NewRef(self->extra->children[i]);
+ rc = PyObject_RichCompareBool(child, subelement, Py_EQ);
+ Py_DECREF(child);
+ if (rc < 0) {
return NULL;
+ }
+ else if (rc > 0) {
+ break;
+ }
}
- if (i >= self->extra->length) {
- /* subelement is not in children, so raise exception */
- PyErr_SetString(
- PyExc_ValueError,
- "list.remove(x): x not in list"
- );
+ if (rc == 0) {
+ PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
return NULL;
}
- found = self->extra->children[i];
+ // An extra check must be done if the mutation occurs at the very last
+ // step and removes or clears the 'extra' list (the condition on the
+ // length would not be satisfied any more).
+ if (self->extra == NULL || i >= self->extra->length) {
+ Py_RETURN_NONE;
+ }
+
+ PyObject *found = self->extra->children[i];
self->extra->length--;
- for (; i < self->extra->length; i++)
+ for (; i < self->extra->length; i++) {
self->extra->children[i] = self->extra->children[i+1];
+ }
Py_DECREF(found);
Py_RETURN_NONE;
1
0

[3.13] gh-126037: fix UAF in `xml.etree.ElementTree.Element.find*` when current mutations happen (#127964) (#131931)
by picnixz March 31, 2025
by picnixz March 31, 2025
March 31, 2025
https://github.com/python/cpython/commit/588bb6ddf4388cefc926006e9e4752b7e6…
commit: 588bb6ddf4388cefc926006e9e4752b7e62ea077
branch: 3.13
author: Bénédikt Tran <10796600+picnixz(a)users.noreply.github.com>
committer: picnixz <10796600+picnixz(a)users.noreply.github.com>
date: 2025-03-31T14:48:42+02:00
summary:
[3.13] gh-126037: fix UAF in `xml.etree.ElementTree.Element.find*` when current mutations happen (#127964) (#131931)
gh-126037: fix UAF in `xml.etree.ElementTree.Element.find*` when concurrent mutations happen (#127964)
We fix a use-after-free in the `find`, `findtext` and `findall` methods of `xml.etree.ElementTree.Element`
objects that can be triggered when the tag to find implements an `__eq__` method that mutates the
element being queried.
(cherry picked from commit c57623c221d46daeaedfbf2b32d041fde0c882de)
files:
A Misc/NEWS.d/next/Library/2024-12-15-15-07-22.gh-issue-126037.OyA7JP.rst
M Lib/test/test_xml_etree.py
M Modules/_elementtree.c
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index ebec9d8f18a2f6..235f4b54fac50b 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -2793,20 +2793,38 @@ def element_factory(x, y):
gc_collect()
-class MutatingElementPath(str):
+class MutationDeleteElementPath(str):
def __new__(cls, elem, *args):
self = str.__new__(cls, *args)
self.elem = elem
return self
+
def __eq__(self, o):
del self.elem[:]
return True
-MutatingElementPath.__hash__ = str.__hash__
+
+ __hash__ = str.__hash__
+
+
+class MutationClearElementPath(str):
+ def __new__(cls, elem, *args):
+ self = str.__new__(cls, *args)
+ self.elem = elem
+ return self
+
+ def __eq__(self, o):
+ self.elem.clear()
+ return True
+
+ __hash__ = str.__hash__
+
class BadElementPath(str):
def __eq__(self, o):
raise 1/0
-BadElementPath.__hash__ = str.__hash__
+
+ __hash__ = str.__hash__
+
class BadElementPathTest(ElementTestCase, unittest.TestCase):
def setUp(self):
@@ -2821,9 +2839,11 @@ def tearDown(self):
super().tearDown()
def test_find_with_mutating(self):
- e = ET.Element('foo')
- e.extend([ET.Element('bar')])
- e.find(MutatingElementPath(e, 'x'))
+ for cls in [MutationDeleteElementPath, MutationClearElementPath]:
+ with self.subTest(cls):
+ e = ET.Element('foo')
+ e.extend([ET.Element('bar')])
+ e.find(cls(e, 'x'))
def test_find_with_error(self):
e = ET.Element('foo')
@@ -2834,9 +2854,11 @@ def test_find_with_error(self):
pass
def test_findtext_with_mutating(self):
- e = ET.Element('foo')
- e.extend([ET.Element('bar')])
- e.findtext(MutatingElementPath(e, 'x'))
+ for cls in [MutationDeleteElementPath, MutationClearElementPath]:
+ with self.subTest(cls):
+ e = ET.Element('foo')
+ e.extend([ET.Element('bar')])
+ e.findtext(cls(e, 'x'))
def test_findtext_with_error(self):
e = ET.Element('foo')
@@ -2861,9 +2883,11 @@ def test_findtext_with_none_text_attribute(self):
self.assertEqual(root_elem.findtext('./bar'), '')
def test_findall_with_mutating(self):
- e = ET.Element('foo')
- e.extend([ET.Element('bar')])
- e.findall(MutatingElementPath(e, 'x'))
+ for cls in [MutationDeleteElementPath, MutationClearElementPath]:
+ with self.subTest(cls):
+ e = ET.Element('foo')
+ e.extend([ET.Element('bar')])
+ e.findall(cls(e, 'x'))
def test_findall_with_error(self):
e = ET.Element('foo')
diff --git a/Misc/NEWS.d/next/Library/2024-12-15-15-07-22.gh-issue-126037.OyA7JP.rst b/Misc/NEWS.d/next/Library/2024-12-15-15-07-22.gh-issue-126037.OyA7JP.rst
new file mode 100644
index 00000000000000..30098f6e13e986
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-12-15-15-07-22.gh-issue-126037.OyA7JP.rst
@@ -0,0 +1,5 @@
+:mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.find <xml.etree.ElementTree.Element.find>`,
+:meth:`Element.findtext <xml.etree.ElementTree.Element.findtext>` and
+:meth:`Element.findall <xml.etree.ElementTree.Element.findall>` when the tag
+to find implements an :meth:`~object.__eq__` method mutating the element
+being queried. Patch by Bénédikt Tran.
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index ec999582d2fb9d..b89f455cd8f1ca 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -1249,29 +1249,28 @@ _elementtree_Element_find_impl(ElementObject *self, PyTypeObject *cls,
PyObject *path, PyObject *namespaces)
/*[clinic end generated code: output=18f77d393c9fef1b input=94df8a83f956acc6]*/
{
- Py_ssize_t i;
elementtreestate *st = get_elementtree_state_by_cls(cls);
if (checkpath(path) || namespaces != Py_None) {
return PyObject_CallMethodObjArgs(
st->elementpath_obj, st->str_find, self, path, namespaces, NULL
- );
+ );
}
- if (!self->extra)
- Py_RETURN_NONE;
-
- for (i = 0; i < self->extra->length; i++) {
- PyObject* item = self->extra->children[i];
- int rc;
+ for (Py_ssize_t i = 0; self->extra && i < self->extra->length; i++) {
+ PyObject *item = self->extra->children[i];
assert(Element_Check(st, item));
Py_INCREF(item);
- rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
- if (rc > 0)
+ PyObject *tag = Py_NewRef(((ElementObject *)item)->tag);
+ int rc = PyObject_RichCompareBool(tag, path, Py_EQ);
+ Py_DECREF(tag);
+ if (rc > 0) {
return item;
+ }
Py_DECREF(item);
- if (rc < 0)
+ if (rc < 0) {
return NULL;
+ }
}
Py_RETURN_NONE;
@@ -1294,38 +1293,34 @@ _elementtree_Element_findtext_impl(ElementObject *self, PyTypeObject *cls,
PyObject *namespaces)
/*[clinic end generated code: output=6af7a2d96aac32cb input=32f252099f62a3d2]*/
{
- Py_ssize_t i;
elementtreestate *st = get_elementtree_state_by_cls(cls);
if (checkpath(path) || namespaces != Py_None)
return PyObject_CallMethodObjArgs(
st->elementpath_obj, st->str_findtext,
self, path, default_value, namespaces, NULL
- );
-
- if (!self->extra) {
- return Py_NewRef(default_value);
- }
+ );
- for (i = 0; i < self->extra->length; i++) {
+ for (Py_ssize_t i = 0; self->extra && i < self->extra->length; i++) {
PyObject *item = self->extra->children[i];
- int rc;
assert(Element_Check(st, item));
Py_INCREF(item);
- rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
+ PyObject *tag = Py_NewRef(((ElementObject *)item)->tag);
+ int rc = PyObject_RichCompareBool(tag, path, Py_EQ);
+ Py_DECREF(tag);
if (rc > 0) {
- PyObject* text = element_get_text((ElementObject*)item);
+ PyObject *text = element_get_text((ElementObject *)item);
+ Py_DECREF(item);
if (text == Py_None) {
- Py_DECREF(item);
- return PyUnicode_New(0, 0);
+ return Py_GetConstant(Py_CONSTANT_EMPTY_STR);
}
Py_XINCREF(text);
- Py_DECREF(item);
return text;
}
Py_DECREF(item);
- if (rc < 0)
+ if (rc < 0) {
return NULL;
+ }
}
return Py_NewRef(default_value);
@@ -1346,29 +1341,26 @@ _elementtree_Element_findall_impl(ElementObject *self, PyTypeObject *cls,
PyObject *path, PyObject *namespaces)
/*[clinic end generated code: output=65e39a1208f3b59e input=7aa0db45673fc9a5]*/
{
- Py_ssize_t i;
- PyObject* out;
elementtreestate *st = get_elementtree_state_by_cls(cls);
if (checkpath(path) || namespaces != Py_None) {
return PyObject_CallMethodObjArgs(
st->elementpath_obj, st->str_findall, self, path, namespaces, NULL
- );
+ );
}
- out = PyList_New(0);
- if (!out)
+ PyObject *out = PyList_New(0);
+ if (out == NULL) {
return NULL;
+ }
- if (!self->extra)
- return out;
-
- for (i = 0; i < self->extra->length; i++) {
- PyObject* item = self->extra->children[i];
- int rc;
+ for (Py_ssize_t i = 0; self->extra && i < self->extra->length; i++) {
+ PyObject *item = self->extra->children[i];
assert(Element_Check(st, item));
Py_INCREF(item);
- rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
+ PyObject *tag = Py_NewRef(((ElementObject *)item)->tag);
+ int rc = PyObject_RichCompareBool(tag, path, Py_EQ);
+ Py_DECREF(tag);
if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
Py_DECREF(item);
Py_DECREF(out);
1
0

[3.12] gh-126037: fix UAF in `xml.etree.ElementTree.Element.find*` when concurrent mutations happen (#127964) (#131932)
by picnixz March 31, 2025
by picnixz March 31, 2025
March 31, 2025
https://github.com/python/cpython/commit/f1689b61fe16fa58cf5dab4e448a8ff1ca…
commit: f1689b61fe16fa58cf5dab4e448a8ff1cac32f2d
branch: 3.12
author: Bénédikt Tran <10796600+picnixz(a)users.noreply.github.com>
committer: picnixz <10796600+picnixz(a)users.noreply.github.com>
date: 2025-03-31T14:47:22+02:00
summary:
[3.12] gh-126037: fix UAF in `xml.etree.ElementTree.Element.find*` when concurrent mutations happen (#127964) (#131932)
gh-126037: fix UAF in `xml.etree.ElementTree.Element.find*` when concurrent mutations happen (#127964)
We fix a use-after-free in the `find`, `findtext` and `findall` methods of `xml.etree.ElementTree.Element`
objects that can be triggered when the tag to find implements an `__eq__` method that mutates the
element being queried.
(cherry picked from commit c57623c221d46daeaedfbf2b32d041fde0c882de)
files:
A Misc/NEWS.d/next/Library/2024-12-15-15-07-22.gh-issue-126037.OyA7JP.rst
M Lib/test/test_xml_etree.py
M Modules/_elementtree.c
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index f8c2e5ccaa41a1..137e23867c5939 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -2713,20 +2713,38 @@ def element_factory(x, y):
gc_collect()
-class MutatingElementPath(str):
+class MutationDeleteElementPath(str):
def __new__(cls, elem, *args):
self = str.__new__(cls, *args)
self.elem = elem
return self
+
def __eq__(self, o):
del self.elem[:]
return True
-MutatingElementPath.__hash__ = str.__hash__
+
+ __hash__ = str.__hash__
+
+
+class MutationClearElementPath(str):
+ def __new__(cls, elem, *args):
+ self = str.__new__(cls, *args)
+ self.elem = elem
+ return self
+
+ def __eq__(self, o):
+ self.elem.clear()
+ return True
+
+ __hash__ = str.__hash__
+
class BadElementPath(str):
def __eq__(self, o):
raise 1/0
-BadElementPath.__hash__ = str.__hash__
+
+ __hash__ = str.__hash__
+
class BadElementPathTest(ElementTestCase, unittest.TestCase):
def setUp(self):
@@ -2741,9 +2759,11 @@ def tearDown(self):
super().tearDown()
def test_find_with_mutating(self):
- e = ET.Element('foo')
- e.extend([ET.Element('bar')])
- e.find(MutatingElementPath(e, 'x'))
+ for cls in [MutationDeleteElementPath, MutationClearElementPath]:
+ with self.subTest(cls):
+ e = ET.Element('foo')
+ e.extend([ET.Element('bar')])
+ e.find(cls(e, 'x'))
def test_find_with_error(self):
e = ET.Element('foo')
@@ -2754,9 +2774,11 @@ def test_find_with_error(self):
pass
def test_findtext_with_mutating(self):
- e = ET.Element('foo')
- e.extend([ET.Element('bar')])
- e.findtext(MutatingElementPath(e, 'x'))
+ for cls in [MutationDeleteElementPath, MutationClearElementPath]:
+ with self.subTest(cls):
+ e = ET.Element('foo')
+ e.extend([ET.Element('bar')])
+ e.findtext(cls(e, 'x'))
def test_findtext_with_error(self):
e = ET.Element('foo')
@@ -2781,9 +2803,11 @@ def test_findtext_with_none_text_attribute(self):
self.assertEqual(root_elem.findtext('./bar'), '')
def test_findall_with_mutating(self):
- e = ET.Element('foo')
- e.extend([ET.Element('bar')])
- e.findall(MutatingElementPath(e, 'x'))
+ for cls in [MutationDeleteElementPath, MutationClearElementPath]:
+ with self.subTest(cls):
+ e = ET.Element('foo')
+ e.extend([ET.Element('bar')])
+ e.findall(cls(e, 'x'))
def test_findall_with_error(self):
e = ET.Element('foo')
diff --git a/Misc/NEWS.d/next/Library/2024-12-15-15-07-22.gh-issue-126037.OyA7JP.rst b/Misc/NEWS.d/next/Library/2024-12-15-15-07-22.gh-issue-126037.OyA7JP.rst
new file mode 100644
index 00000000000000..30098f6e13e986
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-12-15-15-07-22.gh-issue-126037.OyA7JP.rst
@@ -0,0 +1,5 @@
+:mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.find <xml.etree.ElementTree.Element.find>`,
+:meth:`Element.findtext <xml.etree.ElementTree.Element.findtext>` and
+:meth:`Element.findall <xml.etree.ElementTree.Element.findall>` when the tag
+to find implements an :meth:`~object.__eq__` method mutating the element
+being queried. Patch by Bénédikt Tran.
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index 386f38add16f94..ad65eedcd6464f 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -1249,29 +1249,28 @@ _elementtree_Element_find_impl(ElementObject *self, PyTypeObject *cls,
PyObject *path, PyObject *namespaces)
/*[clinic end generated code: output=18f77d393c9fef1b input=94df8a83f956acc6]*/
{
- Py_ssize_t i;
elementtreestate *st = get_elementtree_state_by_cls(cls);
if (checkpath(path) || namespaces != Py_None) {
return PyObject_CallMethodObjArgs(
st->elementpath_obj, st->str_find, self, path, namespaces, NULL
- );
+ );
}
- if (!self->extra)
- Py_RETURN_NONE;
-
- for (i = 0; i < self->extra->length; i++) {
- PyObject* item = self->extra->children[i];
- int rc;
+ for (Py_ssize_t i = 0; self->extra && i < self->extra->length; i++) {
+ PyObject *item = self->extra->children[i];
assert(Element_Check(st, item));
Py_INCREF(item);
- rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
- if (rc > 0)
+ PyObject *tag = Py_NewRef(((ElementObject *)item)->tag);
+ int rc = PyObject_RichCompareBool(tag, path, Py_EQ);
+ Py_DECREF(tag);
+ if (rc > 0) {
return item;
+ }
Py_DECREF(item);
- if (rc < 0)
+ if (rc < 0) {
return NULL;
+ }
}
Py_RETURN_NONE;
@@ -1294,38 +1293,34 @@ _elementtree_Element_findtext_impl(ElementObject *self, PyTypeObject *cls,
PyObject *namespaces)
/*[clinic end generated code: output=6af7a2d96aac32cb input=32f252099f62a3d2]*/
{
- Py_ssize_t i;
elementtreestate *st = get_elementtree_state_by_cls(cls);
if (checkpath(path) || namespaces != Py_None)
return PyObject_CallMethodObjArgs(
st->elementpath_obj, st->str_findtext,
self, path, default_value, namespaces, NULL
- );
-
- if (!self->extra) {
- return Py_NewRef(default_value);
- }
+ );
- for (i = 0; i < self->extra->length; i++) {
+ for (Py_ssize_t i = 0; self->extra && i < self->extra->length; i++) {
PyObject *item = self->extra->children[i];
- int rc;
assert(Element_Check(st, item));
Py_INCREF(item);
- rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
+ PyObject *tag = Py_NewRef(((ElementObject *)item)->tag);
+ int rc = PyObject_RichCompareBool(tag, path, Py_EQ);
+ Py_DECREF(tag);
if (rc > 0) {
- PyObject* text = element_get_text((ElementObject*)item);
+ PyObject *text = element_get_text((ElementObject *)item);
+ Py_DECREF(item);
if (text == Py_None) {
- Py_DECREF(item);
return PyUnicode_New(0, 0);
}
Py_XINCREF(text);
- Py_DECREF(item);
return text;
}
Py_DECREF(item);
- if (rc < 0)
+ if (rc < 0) {
return NULL;
+ }
}
return Py_NewRef(default_value);
@@ -1346,29 +1341,26 @@ _elementtree_Element_findall_impl(ElementObject *self, PyTypeObject *cls,
PyObject *path, PyObject *namespaces)
/*[clinic end generated code: output=65e39a1208f3b59e input=7aa0db45673fc9a5]*/
{
- Py_ssize_t i;
- PyObject* out;
elementtreestate *st = get_elementtree_state_by_cls(cls);
if (checkpath(path) || namespaces != Py_None) {
return PyObject_CallMethodObjArgs(
st->elementpath_obj, st->str_findall, self, path, namespaces, NULL
- );
+ );
}
- out = PyList_New(0);
- if (!out)
+ PyObject *out = PyList_New(0);
+ if (out == NULL) {
return NULL;
+ }
- if (!self->extra)
- return out;
-
- for (i = 0; i < self->extra->length; i++) {
- PyObject* item = self->extra->children[i];
- int rc;
+ for (Py_ssize_t i = 0; self->extra && i < self->extra->length; i++) {
+ PyObject *item = self->extra->children[i];
assert(Element_Check(st, item));
Py_INCREF(item);
- rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
+ PyObject *tag = Py_NewRef(((ElementObject *)item)->tag);
+ int rc = PyObject_RichCompareBool(tag, path, Py_EQ);
+ Py_DECREF(tag);
if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
Py_DECREF(item);
Py_DECREF(out);
1
0
https://github.com/python/cpython/commit/ba11f45dd969dfb039dfb47270de4f8c6a…
commit: ba11f45dd969dfb039dfb47270de4f8c6a03d241
branch: main
author: Bénédikt Tran <10796600+picnixz(a)users.noreply.github.com>
committer: picnixz <10796600+picnixz(a)users.noreply.github.com>
date: 2025-03-31T14:32:54+02:00
summary:
gh-130843: expose 48-bit timestamp for UUIDv7 (#131838)
files:
M Doc/library/uuid.rst
M Lib/test/test_uuid.py
M Lib/uuid.py
diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst
index 1c46d7d40bbdfa..13c00d22ef0299 100644
--- a/Doc/library/uuid.rst
+++ b/Doc/library/uuid.rst
@@ -103,28 +103,29 @@ which relays any information about the UUID's safety, using this enumeration:
- Meaning
* - .. attribute:: UUID.time_low
- - The first 32 bits of the UUID.
+ - The first 32 bits of the UUID. Only relevant to version 1.
* - .. attribute:: UUID.time_mid
- - The next 16 bits of the UUID.
+ - The next 16 bits of the UUID. Only relevant to version 1.
* - .. attribute:: UUID.time_hi_version
- - The next 16 bits of the UUID.
+ - The next 16 bits of the UUID. Only relevant to version 1.
* - .. attribute:: UUID.clock_seq_hi_variant
- - The next 8 bits of the UUID.
+ - The next 8 bits of the UUID. Only relevant to versions 1 and 6.
* - .. attribute:: UUID.clock_seq_low
- - The next 8 bits of the UUID.
+ - The next 8 bits of the UUID. Only relevant to versions 1 and 6.
* - .. attribute:: UUID.node
- - The last 48 bits of the UUID.
+ - The last 48 bits of the UUID. Only relevant to version 1.
* - .. attribute:: UUID.time
- - The 60-bit timestamp.
+ - The 60-bit timestamp for version 1 and 6,
+ or the 48-bit timestamp for version 7.
* - .. attribute:: UUID.clock_seq
- - The 14-bit sequence number.
+ - The 14-bit sequence number. Only relevant to versions 1 and 6.
.. attribute:: UUID.hex
diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py
index 91210923991f8e..958be5408ce90a 100755
--- a/Lib/test/test_uuid.py
+++ b/Lib/test/test_uuid.py
@@ -913,6 +913,7 @@ def test_uuid7(self):
equal(self.uuid._last_counter_v7, counter)
unix_ts_ms = timestamp_ms & 0xffff_ffff_ffff
+ equal(u.time, unix_ts_ms)
equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms)
equal((u.int >> 75) & 1, 0) # check that the MSB is 0
@@ -966,6 +967,7 @@ def test_uuid7_monotonicity(self):
urand.assert_called_once_with(10)
equal(self.uuid._last_timestamp_v7, timestamp_ms)
equal(self.uuid._last_counter_v7, counter)
+ equal(u1.time, timestamp_ms)
equal((u1.int >> 64) & 0xfff, counter_hi)
equal((u1.int >> 32) & 0x3fff_ffff, counter_lo)
equal(u1.int & 0xffff_ffff, tail)
@@ -988,6 +990,7 @@ def test_uuid7_monotonicity(self):
equal(self.uuid._last_timestamp_v7, timestamp_ms)
# 42-bit counter advanced by 1
equal(self.uuid._last_counter_v7, counter + 1)
+ equal(u2.time, timestamp_ms)
equal((u2.int >> 64) & 0xfff, counter_hi)
equal((u2.int >> 32) & 0x3fff_ffff, counter_lo + 1)
equal(u2.int & 0xffff_ffff, next_fail)
@@ -1025,6 +1028,7 @@ def test_uuid7_timestamp_backwards(self):
equal(u.version, 7)
equal(self.uuid._last_timestamp_v7, fake_last_timestamp_v7 + 1)
unix_ts_ms = (fake_last_timestamp_v7 + 1) & 0xffff_ffff_ffff
+ equal(u.time, unix_ts_ms)
equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms)
# 42-bit counter advanced by 1
equal(self.uuid._last_counter_v7, counter + 1)
@@ -1064,6 +1068,7 @@ def test_uuid7_overflow_counter(self):
# timestamp advanced due to overflow
equal(self.uuid._last_timestamp_v7, timestamp_ms + 1)
unix_ts_ms = (timestamp_ms + 1) & 0xffff_ffff_ffff
+ equal(u.time, unix_ts_ms)
equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms)
# counter overflowed, so we picked a new one
equal(self.uuid._last_counter_v7, new_counter)
diff --git a/Lib/uuid.py b/Lib/uuid.py
index 984981512d64a6..2c16c3f0f5a5b5 100644
--- a/Lib/uuid.py
+++ b/Lib/uuid.py
@@ -134,9 +134,16 @@ class UUID:
fields a tuple of the six integer fields of the UUID,
which are also available as six individual attributes
- and two derived attributes. The time_* attributes are
- only relevant to version 1, while the others are only
- relevant to versions 1 and 6:
+ and two derived attributes. Those attributes are not
+ always relevant to all UUID versions:
+
+ The 'time_*' attributes are only relevant to version 1.
+
+ The 'clock_seq*' and 'node' attributes are only relevant
+ to versions 1 and 6.
+
+ The 'time' attribute is only relevant to versions 1, 6
+ and 7.
time_low the first 32 bits of the UUID
time_mid the next 16 bits of the UUID
@@ -145,7 +152,8 @@ class UUID:
clock_seq_low the next 8 bits of the UUID
node the last 48 bits of the UUID
- time the 60-bit timestamp
+ time the 60-bit timestamp for UUIDv1/v6,
+ or the 48-bit timestamp for UUIDv7
clock_seq the 14-bit sequence number
hex the UUID as a 32-character hexadecimal string
@@ -366,6 +374,9 @@ def time(self):
time_hi = self.int >> 96
time_lo = (self.int >> 64) & 0x0fff
return time_hi << 28 | (self.time_mid << 12) | time_lo
+ elif self.version == 7:
+ # unix_ts_ms (48) | ... (80)
+ return self.int >> 80
else:
# time_lo (32) | time_mid (16) | ver (4) | time_hi (12) | ... (64)
#
1
0

gh-126033: fix UAF in `xml.etree.ElementTree.Element.remove` when concurrent mutations happen (#126124)
by picnixz March 31, 2025
by picnixz March 31, 2025
March 31, 2025
https://github.com/python/cpython/commit/bab1398a47f6d0cfc1be70497f306874c7…
commit: bab1398a47f6d0cfc1be70497f306874c749ef7c
branch: main
author: Bénédikt Tran <10796600+picnixz(a)users.noreply.github.com>
committer: picnixz <10796600+picnixz(a)users.noreply.github.com>
date: 2025-03-31T12:31:26+02:00
summary:
gh-126033: fix UAF in `xml.etree.ElementTree.Element.remove` when concurrent mutations happen (#126124)
files:
A Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst
M Lib/test/test_xml_etree.py
M Modules/_elementtree.c
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 9f319169a945da..7f5945d8515fba 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -18,9 +18,11 @@
import textwrap
import types
import unittest
+import unittest.mock as mock
import warnings
import weakref
+from contextlib import nullcontext
from functools import partial
from itertools import product, islice
from test import support
@@ -121,6 +123,21 @@
</foo>
"""
+def is_python_implementation():
+ assert ET is not None, "ET must be initialized"
+ assert pyET is not None, "pyET must be initialized"
+ return ET is pyET
+
+
+def equal_wrapper(cls):
+ """Mock cls.__eq__ to check whether it has been called or not.
+
+ The behaviour of cls.__eq__ (side-effects included) is left as is.
+ """
+ eq = cls.__eq__
+ return mock.patch.object(cls, "__eq__", autospec=True, wraps=eq)
+
+
def checkwarnings(*filters, quiet=False):
def decorator(test):
def newtest(*args, **kwargs):
@@ -2642,6 +2659,7 @@ def test_pickle_issue18997(self):
class BadElementTest(ElementTestCase, unittest.TestCase):
+
def test_extend_mutable_list(self):
class X:
@property
@@ -2680,18 +2698,168 @@ class Y(X, ET.Element):
e = ET.Element('foo')
e.extend(L)
- def test_remove_with_mutating(self):
- class X(ET.Element):
+ def test_remove_with_clear_assume_missing(self):
+ # gh-126033: Check that a concurrent clear() for an assumed-to-be
+ # missing element does not make the interpreter crash.
+ self.do_test_remove_with_clear(raises=True)
+
+ def test_remove_with_clear_assume_existing(self):
+ # gh-126033: Check that a concurrent clear() for an assumed-to-be
+ # existing element does not make the interpreter crash.
+ self.do_test_remove_with_clear(raises=False)
+
+ def do_test_remove_with_clear(self, *, raises):
+
+ # Until the discrepency between "del root[:]" and "root.clear()" is
+ # resolved, we need to keep two tests. Previously, using "del root[:]"
+ # did not crash with the reproducer of gh-126033 while "root.clear()"
+ # did.
+
+ class E(ET.Element):
+ """Local class to be able to mock E.__eq__ for introspection."""
+
+ class X(E):
def __eq__(self, o):
- del e[:]
- return False
- e = ET.Element('foo')
- e.extend([X('bar')])
- self.assertRaises(ValueError, e.remove, ET.Element('baz'))
+ del root[:]
+ return not raises
- e = ET.Element('foo')
- e.extend([ET.Element('bar')])
- self.assertRaises(ValueError, e.remove, X('baz'))
+ class Y(E):
+ def __eq__(self, o):
+ root.clear()
+ return not raises
+
+ if raises:
+ get_checker_context = lambda: self.assertRaises(ValueError)
+ else:
+ get_checker_context = nullcontext
+
+ self.assertIs(E.__eq__, object.__eq__)
+
+ for Z, side_effect in [(X, 'del root[:]'), (Y, 'root.clear()')]:
+ self.enterContext(self.subTest(side_effect=side_effect))
+
+ # test removing R() from [U()]
+ for R, U, description in [
+ (E, Z, "remove missing E() from [Z()]"),
+ (Z, E, "remove missing Z() from [E()]"),
+ (Z, Z, "remove missing Z() from [Z()]"),
+ ]:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one')])
+ with get_checker_context():
+ root.remove(R('missing'))
+
+ # test removing R() from [U(), V()]
+ cases = self.cases_for_remove_missing_with_mutations(E, Z)
+ for R, U, V, description in cases:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one'), V('two')])
+ with get_checker_context():
+ root.remove(R('missing'))
+
+ # Test removing root[0] from [Z()].
+ #
+ # Since we call root.remove() with root[0], Z.__eq__()
+ # will not be called (we branch on the fast Py_EQ path).
+ with self.subTest("remove root[0] from [Z()]"):
+ root = E('top')
+ root.append(Z('rem'))
+ with equal_wrapper(E) as f, equal_wrapper(Z) as g:
+ root.remove(root[0])
+ f.assert_not_called()
+ g.assert_not_called()
+
+ # Test removing root[1] (of type R) from [U(), R()].
+ is_special = is_python_implementation() and raises and Z is Y
+ if is_python_implementation() and raises and Z is Y:
+ # In pure Python, using root.clear() sets the children
+ # list to [] without calling list.clear().
+ #
+ # For this reason, the call to root.remove() first
+ # checks root[0] and sets the children list to []
+ # since either root[0] or root[1] is an evil element.
+ #
+ # Since checking root[1] still uses the old reference
+ # to the children list, PyObject_RichCompareBool() branches
+ # to the fast Py_EQ path and Y.__eq__() is called exactly
+ # once (when checking root[0]).
+ continue
+ else:
+ cases = self.cases_for_remove_existing_with_mutations(E, Z)
+ for R, U, description in cases:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one'), R('rem')])
+ with get_checker_context():
+ root.remove(root[1])
+
+ def test_remove_with_mutate_root_assume_missing(self):
+ # gh-126033: Check that a concurrent mutation for an assumed-to-be
+ # missing element does not make the interpreter crash.
+ self.do_test_remove_with_mutate_root(raises=True)
+
+ def test_remove_with_mutate_root_assume_existing(self):
+ # gh-126033: Check that a concurrent mutation for an assumed-to-be
+ # existing element does not make the interpreter crash.
+ self.do_test_remove_with_mutate_root(raises=False)
+
+ def do_test_remove_with_mutate_root(self, *, raises):
+ E = ET.Element
+
+ class Z(E):
+ def __eq__(self, o):
+ del root[0]
+ return not raises
+
+ if raises:
+ get_checker_context = lambda: self.assertRaises(ValueError)
+ else:
+ get_checker_context = nullcontext
+
+ # test removing R() from [U(), V()]
+ cases = self.cases_for_remove_missing_with_mutations(E, Z)
+ for R, U, V, description in cases:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one'), V('two')])
+ with get_checker_context():
+ root.remove(R('missing'))
+
+ # test removing root[1] (of type R) from [U(), R()]
+ cases = self.cases_for_remove_existing_with_mutations(E, Z)
+ for R, U, description in cases:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one'), R('rem')])
+ with get_checker_context():
+ root.remove(root[1])
+
+ def cases_for_remove_missing_with_mutations(self, E, Z):
+ # Cases for removing R() from [U(), V()].
+ # The case U = V = R = E is not interesting as there is no mutation.
+ for U, V in [(E, Z), (Z, E), (Z, Z)]:
+ description = (f"remove missing {E.__name__}() from "
+ f"[{U.__name__}(), {V.__name__}()]")
+ yield E, U, V, description
+
+ for U, V in [(E, E), (E, Z), (Z, E), (Z, Z)]:
+ description = (f"remove missing {Z.__name__}() from "
+ f"[{U.__name__}(), {V.__name__}()]")
+ yield Z, U, V, description
+
+ def cases_for_remove_existing_with_mutations(self, E, Z):
+ # Cases for removing root[1] (of type R) from [U(), R()].
+ # The case U = R = E is not interesting as there is no mutation.
+ for U, R, description in [
+ (E, Z, "remove root[1] from [E(), Z()]"),
+ (Z, E, "remove root[1] from [Z(), E()]"),
+ (Z, Z, "remove root[1] from [Z(), Z()]"),
+ ]:
+ description = (f"remove root[1] (of type {R.__name__}) "
+ f"from [{U.__name__}(), {R.__name__}()]")
+ yield R, U, description
@support.infinite_recursion(25)
def test_recursive_repr(self):
diff --git a/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst b/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst
new file mode 100644
index 00000000000000..fa09c712aa0c4a
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.sM3uCn.rst
@@ -0,0 +1,3 @@
+:mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.remove
+<xml.etree.ElementTree.Element.remove>` when the element is
+concurrently mutated. Patch by Bénédikt Tran.
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index b73bd844626b11..f4f48538e30abe 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -850,6 +850,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
if (element_resize(element, self->extra->length) < 0)
goto error;
+ // TODO(picnixz): check for an evil child's __deepcopy__ on 'self'
for (i = 0; i < self->extra->length; i++) {
PyObject* child = deepcopy(st, self->extra->children[i], memo);
if (!child || !Element_Check(st, child)) {
@@ -1629,42 +1630,47 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
/*[clinic end generated code: output=38fe6c07d6d87d1f input=6133e1d05597d5ee]*/
{
Py_ssize_t i;
- int rc;
- PyObject *found;
-
- if (!self->extra) {
- /* element has no children, so raise exception */
- PyErr_SetString(
- PyExc_ValueError,
- "list.remove(x): x not in list"
- );
- return NULL;
- }
-
- for (i = 0; i < self->extra->length; i++) {
- if (self->extra->children[i] == subelement)
- break;
- rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
- if (rc > 0)
+ // When iterating over the list of children, we need to check that the
+ // list is not cleared (self->extra != NULL) and that we are still within
+ // the correct bounds (i < self->extra->length).
+ //
+ // We deliberately avoid protecting against children lists that grow
+ // faster than the index since list objects do not protect against it.
+ int rc = 0;
+ for (i = 0; self->extra && i < self->extra->length; i++) {
+ if (self->extra->children[i] == subelement) {
+ rc = 1;
break;
- if (rc < 0)
+ }
+ PyObject *child = Py_NewRef(self->extra->children[i]);
+ rc = PyObject_RichCompareBool(child, subelement, Py_EQ);
+ Py_DECREF(child);
+ if (rc < 0) {
return NULL;
+ }
+ else if (rc > 0) {
+ break;
+ }
}
- if (i >= self->extra->length) {
- /* subelement is not in children, so raise exception */
- PyErr_SetString(
- PyExc_ValueError,
- "list.remove(x): x not in list"
- );
+ if (rc == 0) {
+ PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
return NULL;
}
- found = self->extra->children[i];
+ // An extra check must be done if the mutation occurs at the very last
+ // step and removes or clears the 'extra' list (the condition on the
+ // length would not be satisfied any more).
+ if (self->extra == NULL || i >= self->extra->length) {
+ Py_RETURN_NONE;
+ }
+
+ PyObject *found = self->extra->children[i];
self->extra->length--;
- for (; i < self->extra->length; i++)
+ for (; i < self->extra->length; i++) {
self->extra->children[i] = self->extra->children[i+1];
+ }
Py_DECREF(found);
Py_RETURN_NONE;
1
0

gh-126037: fix UAF in `xml.etree.ElementTree.Element.find*` when concurrent mutations happen (#127964)
by picnixz March 31, 2025
by picnixz March 31, 2025
March 31, 2025
https://github.com/python/cpython/commit/c57623c221d46daeaedfbf2b32d041fde0…
commit: c57623c221d46daeaedfbf2b32d041fde0c882de
branch: main
author: Bénédikt Tran <10796600+picnixz(a)users.noreply.github.com>
committer: picnixz <10796600+picnixz(a)users.noreply.github.com>
date: 2025-03-31T12:26:52+02:00
summary:
gh-126037: fix UAF in `xml.etree.ElementTree.Element.find*` when concurrent mutations happen (#127964)
We fix a use-after-free in the `find`, `findtext` and `findall` methods of `xml.etree.ElementTree.Element`
objects that can be triggered when the tag to find implements an `__eq__` method that mutates the
element being queried.
files:
A Misc/NEWS.d/next/Library/2024-12-15-15-07-22.gh-issue-126037.OyA7JP.rst
M Lib/test/test_xml_etree.py
M Modules/_elementtree.c
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index ae06a9cc11855f..9f319169a945da 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -2793,20 +2793,38 @@ def element_factory(x, y):
gc_collect()
-class MutatingElementPath(str):
+class MutationDeleteElementPath(str):
def __new__(cls, elem, *args):
self = str.__new__(cls, *args)
self.elem = elem
return self
+
def __eq__(self, o):
del self.elem[:]
return True
-MutatingElementPath.__hash__ = str.__hash__
+
+ __hash__ = str.__hash__
+
+
+class MutationClearElementPath(str):
+ def __new__(cls, elem, *args):
+ self = str.__new__(cls, *args)
+ self.elem = elem
+ return self
+
+ def __eq__(self, o):
+ self.elem.clear()
+ return True
+
+ __hash__ = str.__hash__
+
class BadElementPath(str):
def __eq__(self, o):
raise 1/0
-BadElementPath.__hash__ = str.__hash__
+
+ __hash__ = str.__hash__
+
class BadElementPathTest(ElementTestCase, unittest.TestCase):
def setUp(self):
@@ -2821,9 +2839,11 @@ def tearDown(self):
super().tearDown()
def test_find_with_mutating(self):
- e = ET.Element('foo')
- e.extend([ET.Element('bar')])
- e.find(MutatingElementPath(e, 'x'))
+ for cls in [MutationDeleteElementPath, MutationClearElementPath]:
+ with self.subTest(cls):
+ e = ET.Element('foo')
+ e.extend([ET.Element('bar')])
+ e.find(cls(e, 'x'))
def test_find_with_error(self):
e = ET.Element('foo')
@@ -2834,9 +2854,11 @@ def test_find_with_error(self):
pass
def test_findtext_with_mutating(self):
- e = ET.Element('foo')
- e.extend([ET.Element('bar')])
- e.findtext(MutatingElementPath(e, 'x'))
+ for cls in [MutationDeleteElementPath, MutationClearElementPath]:
+ with self.subTest(cls):
+ e = ET.Element('foo')
+ e.extend([ET.Element('bar')])
+ e.findtext(cls(e, 'x'))
def test_findtext_with_error(self):
e = ET.Element('foo')
@@ -2861,9 +2883,11 @@ def test_findtext_with_none_text_attribute(self):
self.assertEqual(root_elem.findtext('./bar'), '')
def test_findall_with_mutating(self):
- e = ET.Element('foo')
- e.extend([ET.Element('bar')])
- e.findall(MutatingElementPath(e, 'x'))
+ for cls in [MutationDeleteElementPath, MutationClearElementPath]:
+ with self.subTest(cls):
+ e = ET.Element('foo')
+ e.extend([ET.Element('bar')])
+ e.findall(cls(e, 'x'))
def test_findall_with_error(self):
e = ET.Element('foo')
diff --git a/Misc/NEWS.d/next/Library/2024-12-15-15-07-22.gh-issue-126037.OyA7JP.rst b/Misc/NEWS.d/next/Library/2024-12-15-15-07-22.gh-issue-126037.OyA7JP.rst
new file mode 100644
index 00000000000000..30098f6e13e986
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-12-15-15-07-22.gh-issue-126037.OyA7JP.rst
@@ -0,0 +1,5 @@
+:mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.find <xml.etree.ElementTree.Element.find>`,
+:meth:`Element.findtext <xml.etree.ElementTree.Element.findtext>` and
+:meth:`Element.findall <xml.etree.ElementTree.Element.findall>` when the tag
+to find implements an :meth:`~object.__eq__` method mutating the element
+being queried. Patch by Bénédikt Tran.
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index d20bbcd21fd371..b73bd844626b11 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -1252,29 +1252,28 @@ _elementtree_Element_find_impl(ElementObject *self, PyTypeObject *cls,
PyObject *path, PyObject *namespaces)
/*[clinic end generated code: output=18f77d393c9fef1b input=94df8a83f956acc6]*/
{
- Py_ssize_t i;
elementtreestate *st = get_elementtree_state_by_cls(cls);
if (checkpath(path) || namespaces != Py_None) {
return PyObject_CallMethodObjArgs(
st->elementpath_obj, st->str_find, self, path, namespaces, NULL
- );
+ );
}
- if (!self->extra)
- Py_RETURN_NONE;
-
- for (i = 0; i < self->extra->length; i++) {
- PyObject* item = self->extra->children[i];
- int rc;
+ for (Py_ssize_t i = 0; self->extra && i < self->extra->length; i++) {
+ PyObject *item = self->extra->children[i];
assert(Element_Check(st, item));
Py_INCREF(item);
- rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
- if (rc > 0)
+ PyObject *tag = Py_NewRef(((ElementObject *)item)->tag);
+ int rc = PyObject_RichCompareBool(tag, path, Py_EQ);
+ Py_DECREF(tag);
+ if (rc > 0) {
return item;
+ }
Py_DECREF(item);
- if (rc < 0)
+ if (rc < 0) {
return NULL;
+ }
}
Py_RETURN_NONE;
@@ -1297,38 +1296,34 @@ _elementtree_Element_findtext_impl(ElementObject *self, PyTypeObject *cls,
PyObject *namespaces)
/*[clinic end generated code: output=6af7a2d96aac32cb input=32f252099f62a3d2]*/
{
- Py_ssize_t i;
elementtreestate *st = get_elementtree_state_by_cls(cls);
if (checkpath(path) || namespaces != Py_None)
return PyObject_CallMethodObjArgs(
st->elementpath_obj, st->str_findtext,
self, path, default_value, namespaces, NULL
- );
-
- if (!self->extra) {
- return Py_NewRef(default_value);
- }
+ );
- for (i = 0; i < self->extra->length; i++) {
+ for (Py_ssize_t i = 0; self->extra && i < self->extra->length; i++) {
PyObject *item = self->extra->children[i];
- int rc;
assert(Element_Check(st, item));
Py_INCREF(item);
- rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
+ PyObject *tag = Py_NewRef(((ElementObject *)item)->tag);
+ int rc = PyObject_RichCompareBool(tag, path, Py_EQ);
+ Py_DECREF(tag);
if (rc > 0) {
- PyObject* text = element_get_text((ElementObject*)item);
+ PyObject *text = element_get_text((ElementObject *)item);
+ Py_DECREF(item);
if (text == Py_None) {
- Py_DECREF(item);
return Py_GetConstant(Py_CONSTANT_EMPTY_STR);
}
Py_XINCREF(text);
- Py_DECREF(item);
return text;
}
Py_DECREF(item);
- if (rc < 0)
+ if (rc < 0) {
return NULL;
+ }
}
return Py_NewRef(default_value);
@@ -1349,29 +1344,26 @@ _elementtree_Element_findall_impl(ElementObject *self, PyTypeObject *cls,
PyObject *path, PyObject *namespaces)
/*[clinic end generated code: output=65e39a1208f3b59e input=7aa0db45673fc9a5]*/
{
- Py_ssize_t i;
- PyObject* out;
elementtreestate *st = get_elementtree_state_by_cls(cls);
if (checkpath(path) || namespaces != Py_None) {
return PyObject_CallMethodObjArgs(
st->elementpath_obj, st->str_findall, self, path, namespaces, NULL
- );
+ );
}
- out = PyList_New(0);
- if (!out)
+ PyObject *out = PyList_New(0);
+ if (out == NULL) {
return NULL;
+ }
- if (!self->extra)
- return out;
-
- for (i = 0; i < self->extra->length; i++) {
- PyObject* item = self->extra->children[i];
- int rc;
+ for (Py_ssize_t i = 0; self->extra && i < self->extra->length; i++) {
+ PyObject *item = self->extra->children[i];
assert(Element_Check(st, item));
Py_INCREF(item);
- rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
+ PyObject *tag = Py_NewRef(((ElementObject *)item)->tag);
+ int rc = PyObject_RichCompareBool(tag, path, Py_EQ);
+ Py_DECREF(tag);
if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
Py_DECREF(item);
Py_DECREF(out);
1
0
https://github.com/python/cpython/commit/6aa88a2cb36240fe2b587f2e8204387327…
commit: 6aa88a2cb36240fe2b587f2e82043873270a27cf
branch: main
author: Adam Turner <9087854+AA-Turner(a)users.noreply.github.com>
committer: AA-Turner <9087854+AA-Turner(a)users.noreply.github.com>
date: 2025-03-31T00:35:12Z
summary:
gh-130167: Optimise ``textwrap.dedent()`` (#131919)
Co-authored-by: Marius Juston <marius.juston(a)hotmail.fr>
Co-authored-by: Pieter Eendebak <pieter.eendebak(a)gmail.com>
Co-authored-by: Bénédikt Tran <10796600+picnixz(a)users.noreply.github.com>
files:
A Misc/NEWS.d/next/Library/2025-03-30-19-55-10.gh-issue-131792.NNjzFA.rst
M Lib/test/test_textwrap.py
M Lib/textwrap.py
diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
index dfbc2b93dfc0d6..77366988b57fa7 100644
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -769,6 +769,56 @@ def assertUnchanged(self, text):
"""assert that dedent() has no effect on 'text'"""
self.assertEqual(text, dedent(text))
+ def test_dedent_whitespace(self):
+ # The empty string.
+ text = ""
+ self.assertUnchanged(text)
+
+ # Only spaces.
+ text = " "
+ expect = ""
+ self.assertEqual(expect, dedent(text))
+
+ # Only tabs.
+ text = "\t\t\t\t"
+ expect = ""
+ self.assertEqual(expect, dedent(text))
+
+ # A mixture.
+ text = " \t \t\t \t "
+ expect = ""
+ self.assertEqual(expect, dedent(text))
+
+ # ASCII whitespace.
+ text = "\f\n\r\t\v "
+ expect = "\n"
+ self.assertEqual(expect, dedent(text))
+
+ # One newline.
+ text = "\n"
+ expect = "\n"
+ self.assertEqual(expect, dedent(text))
+
+ # Windows-style newlines.
+ text = "\r\n" * 5
+ expect = "\n" * 5
+ self.assertEqual(expect, dedent(text))
+
+ # Whitespace mixture.
+ text = " \n\t\n \n\t\t\n\n\n "
+ expect = "\n\n\n\n\n\n"
+ self.assertEqual(expect, dedent(text))
+
+ # Lines consisting only of whitespace are always normalised
+ text = "a\n \n\t\n"
+ expect = "a\n\n\n"
+ self.assertEqual(expect, dedent(text))
+
+ # Whitespace characters on non-empty lines are retained
+ text = "a\r\n\r\n\r\n"
+ expect = "a\r\n\n\n"
+ self.assertEqual(expect, dedent(text))
+
def test_dedent_nomargin(self):
# No lines indented.
text = "Hello there.\nHow are you?\nOh good, I'm glad."
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index 1bf07aa46cad99..bb6a1186316275 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -413,9 +413,6 @@ def shorten(text, width, **kwargs):
# -- Loosely related functionality -------------------------------------
-_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
-_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
-
def dedent(text):
"""Remove any common leading whitespace from every line in `text`.
@@ -429,42 +426,21 @@ def dedent(text):
Entirely blank lines are normalized to a newline character.
"""
- # Look for the longest leading string of spaces and tabs common to
- # all lines.
- margin = None
- text = _whitespace_only_re.sub('', text)
- indents = _leading_whitespace_re.findall(text)
- for indent in indents:
- if margin is None:
- margin = indent
-
- # Current line more deeply indented than previous winner:
- # no change (previous winner is still on top).
- elif indent.startswith(margin):
- pass
-
- # Current line consistent with and no deeper than previous winner:
- # it's the new winner.
- elif margin.startswith(indent):
- margin = indent
-
- # Find the largest common whitespace between current line and previous
- # winner.
- else:
- for i, (x, y) in enumerate(zip(margin, indent)):
- if x != y:
- margin = margin[:i]
- break
+ if not text:
+ return text
+
+ lines = text.split('\n')
- # sanity check (testing/debugging only)
- if 0 and margin:
- for line in text.split("\n"):
- assert not line or line.startswith(margin), \
- "line = %r, margin = %r" % (line, margin)
+ # Get length of leading whitespace, inspired by ``os.path.commonprefix()``.
+ non_blank_lines = [l for l in lines if l and not l.isspace()]
+ l1 = min(non_blank_lines, default='')
+ l2 = max(non_blank_lines, default='')
+ margin = 0
+ for margin, c in enumerate(l1):
+ if c != l2[margin] or c not in ' \t':
+ break
- if margin:
- text = re.sub(r'(?m)^' + margin, '', text)
- return text
+ return '\n'.join([l[margin:] if not l.isspace() else '' for l in lines])
def indent(text, prefix, predicate=None):
diff --git a/Misc/NEWS.d/next/Library/2025-03-30-19-55-10.gh-issue-131792.NNjzFA.rst b/Misc/NEWS.d/next/Library/2025-03-30-19-55-10.gh-issue-131792.NNjzFA.rst
new file mode 100644
index 00000000000000..62b619c0d80f4d
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-03-30-19-55-10.gh-issue-131792.NNjzFA.rst
@@ -0,0 +1,5 @@
+Improved performance of :func:`textwrap.dedent` by an average of ~2.4x,
+(with improvements of up to 4x for large inputs),
+and fixed a bug where blank lines with whitespace characters other than space
+or horizontal tab were not normalised to the newline.
+Patch by Adam Turner, Marius Juston, and Pieter Eendebak.
1
0

March 30, 2025
https://github.com/python/cpython/commit/685fd74f81e673dc0430120f50009636b0…
commit: 685fd74f81e673dc0430120f50009636b0e437a5
branch: main
author: Amit Lavon <amitlavon1.spam(a)gmail.com>
committer: brandtbucher <brandtbucher(a)gmail.com>
date: 2025-03-30T16:07:25-07:00
summary:
GH-131798: Remove type checks for _TO_BOOL_STR (GH-131816)
files:
A Misc/NEWS.d/next/Core_and_Builtins/2025-03-28-19-02-55.gh-issue-131798.fNZ5-2.rst
M Include/internal/pycore_opcode_metadata.h
M Include/internal/pycore_uop_ids.h
M Include/internal/pycore_uop_metadata.h
M Lib/test/test_capi/test_opt.py
M Python/bytecodes.c
M Python/executor_cases.c.h
M Python/generated_cases.c.h
M Python/optimizer_bytecodes.c
M Python/optimizer_cases.c.h
diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h
index 9c101302591872..32bafaf09ce3da 100644
--- a/Include/internal/pycore_opcode_metadata.h
+++ b/Include/internal/pycore_opcode_metadata.h
@@ -1465,7 +1465,7 @@ _PyOpcode_macro_expansion[256] = {
[TO_BOOL_INT] = { .nuops = 1, .uops = { { _TO_BOOL_INT, OPARG_SIMPLE, 3 } } },
[TO_BOOL_LIST] = { .nuops = 1, .uops = { { _TO_BOOL_LIST, OPARG_SIMPLE, 3 } } },
[TO_BOOL_NONE] = { .nuops = 1, .uops = { { _TO_BOOL_NONE, OPARG_SIMPLE, 3 } } },
- [TO_BOOL_STR] = { .nuops = 1, .uops = { { _TO_BOOL_STR, OPARG_SIMPLE, 3 } } },
+ [TO_BOOL_STR] = { .nuops = 2, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _TO_BOOL_STR, OPARG_SIMPLE, 3 } } },
[UNARY_INVERT] = { .nuops = 1, .uops = { { _UNARY_INVERT, OPARG_SIMPLE, 0 } } },
[UNARY_NEGATIVE] = { .nuops = 1, .uops = { { _UNARY_NEGATIVE, OPARG_SIMPLE, 0 } } },
[UNARY_NOT] = { .nuops = 1, .uops = { { _UNARY_NOT, OPARG_SIMPLE, 0 } } },
diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h
index fb5486171ef4ca..1d9c2bef4cedda 100644
--- a/Include/internal/pycore_uop_ids.h
+++ b/Include/internal/pycore_uop_ids.h
@@ -135,17 +135,18 @@ extern "C" {
#define _GUARD_NOT_EXHAUSTED_TUPLE 381
#define _GUARD_TOS_FLOAT 382
#define _GUARD_TOS_INT 383
-#define _GUARD_TYPE_VERSION 384
-#define _GUARD_TYPE_VERSION_AND_LOCK 385
+#define _GUARD_TOS_UNICODE 384
+#define _GUARD_TYPE_VERSION 385
+#define _GUARD_TYPE_VERSION_AND_LOCK 386
#define _IMPORT_FROM IMPORT_FROM
#define _IMPORT_NAME IMPORT_NAME
-#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 386
-#define _INIT_CALL_PY_EXACT_ARGS 387
-#define _INIT_CALL_PY_EXACT_ARGS_0 388
-#define _INIT_CALL_PY_EXACT_ARGS_1 389
-#define _INIT_CALL_PY_EXACT_ARGS_2 390
-#define _INIT_CALL_PY_EXACT_ARGS_3 391
-#define _INIT_CALL_PY_EXACT_ARGS_4 392
+#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 387
+#define _INIT_CALL_PY_EXACT_ARGS 388
+#define _INIT_CALL_PY_EXACT_ARGS_0 389
+#define _INIT_CALL_PY_EXACT_ARGS_1 390
+#define _INIT_CALL_PY_EXACT_ARGS_2 391
+#define _INIT_CALL_PY_EXACT_ARGS_3 392
+#define _INIT_CALL_PY_EXACT_ARGS_4 393
#define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER
#define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION
#define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD
@@ -155,153 +156,153 @@ extern "C" {
#define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE
#define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE
#define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE
-#define _IS_NONE 393
+#define _IS_NONE 394
#define _IS_OP IS_OP
-#define _ITER_CHECK_LIST 394
-#define _ITER_CHECK_RANGE 395
-#define _ITER_CHECK_TUPLE 396
-#define _ITER_JUMP_LIST 397
-#define _ITER_JUMP_RANGE 398
-#define _ITER_JUMP_TUPLE 399
-#define _ITER_NEXT_LIST 400
-#define _ITER_NEXT_LIST_TIER_TWO 401
-#define _ITER_NEXT_RANGE 402
-#define _ITER_NEXT_TUPLE 403
-#define _JUMP_TO_TOP 404
+#define _ITER_CHECK_LIST 395
+#define _ITER_CHECK_RANGE 396
+#define _ITER_CHECK_TUPLE 397
+#define _ITER_JUMP_LIST 398
+#define _ITER_JUMP_RANGE 399
+#define _ITER_JUMP_TUPLE 400
+#define _ITER_NEXT_LIST 401
+#define _ITER_NEXT_LIST_TIER_TWO 402
+#define _ITER_NEXT_RANGE 403
+#define _ITER_NEXT_TUPLE 404
+#define _JUMP_TO_TOP 405
#define _LIST_APPEND LIST_APPEND
#define _LIST_EXTEND LIST_EXTEND
-#define _LOAD_ATTR 405
-#define _LOAD_ATTR_CLASS 406
+#define _LOAD_ATTR 406
+#define _LOAD_ATTR_CLASS 407
#define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN
-#define _LOAD_ATTR_INSTANCE_VALUE 407
-#define _LOAD_ATTR_METHOD_LAZY_DICT 408
-#define _LOAD_ATTR_METHOD_NO_DICT 409
-#define _LOAD_ATTR_METHOD_WITH_VALUES 410
-#define _LOAD_ATTR_MODULE 411
-#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 412
-#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 413
-#define _LOAD_ATTR_PROPERTY_FRAME 414
-#define _LOAD_ATTR_SLOT 415
-#define _LOAD_ATTR_WITH_HINT 416
+#define _LOAD_ATTR_INSTANCE_VALUE 408
+#define _LOAD_ATTR_METHOD_LAZY_DICT 409
+#define _LOAD_ATTR_METHOD_NO_DICT 410
+#define _LOAD_ATTR_METHOD_WITH_VALUES 411
+#define _LOAD_ATTR_MODULE 412
+#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 413
+#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 414
+#define _LOAD_ATTR_PROPERTY_FRAME 415
+#define _LOAD_ATTR_SLOT 416
+#define _LOAD_ATTR_WITH_HINT 417
#define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS
-#define _LOAD_BYTECODE 417
+#define _LOAD_BYTECODE 418
#define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT
#define _LOAD_CONST LOAD_CONST
#define _LOAD_CONST_IMMORTAL LOAD_CONST_IMMORTAL
-#define _LOAD_CONST_INLINE 418
-#define _LOAD_CONST_INLINE_BORROW 419
+#define _LOAD_CONST_INLINE 419
+#define _LOAD_CONST_INLINE_BORROW 420
#define _LOAD_CONST_MORTAL LOAD_CONST_MORTAL
#define _LOAD_DEREF LOAD_DEREF
-#define _LOAD_FAST 420
-#define _LOAD_FAST_0 421
-#define _LOAD_FAST_1 422
-#define _LOAD_FAST_2 423
-#define _LOAD_FAST_3 424
-#define _LOAD_FAST_4 425
-#define _LOAD_FAST_5 426
-#define _LOAD_FAST_6 427
-#define _LOAD_FAST_7 428
+#define _LOAD_FAST 421
+#define _LOAD_FAST_0 422
+#define _LOAD_FAST_1 423
+#define _LOAD_FAST_2 424
+#define _LOAD_FAST_3 425
+#define _LOAD_FAST_4 426
+#define _LOAD_FAST_5 427
+#define _LOAD_FAST_6 428
+#define _LOAD_FAST_7 429
#define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR
#define _LOAD_FAST_CHECK LOAD_FAST_CHECK
#define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST
#define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF
#define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS
-#define _LOAD_GLOBAL 429
-#define _LOAD_GLOBAL_BUILTINS 430
-#define _LOAD_GLOBAL_MODULE 431
+#define _LOAD_GLOBAL 430
+#define _LOAD_GLOBAL_BUILTINS 431
+#define _LOAD_GLOBAL_MODULE 432
#define _LOAD_LOCALS LOAD_LOCALS
#define _LOAD_NAME LOAD_NAME
-#define _LOAD_SMALL_INT 432
-#define _LOAD_SMALL_INT_0 433
-#define _LOAD_SMALL_INT_1 434
-#define _LOAD_SMALL_INT_2 435
-#define _LOAD_SMALL_INT_3 436
+#define _LOAD_SMALL_INT 433
+#define _LOAD_SMALL_INT_0 434
+#define _LOAD_SMALL_INT_1 435
+#define _LOAD_SMALL_INT_2 436
+#define _LOAD_SMALL_INT_3 437
#define _LOAD_SPECIAL LOAD_SPECIAL
#define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR
#define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD
-#define _MAKE_CALLARGS_A_TUPLE 437
+#define _MAKE_CALLARGS_A_TUPLE 438
#define _MAKE_CELL MAKE_CELL
#define _MAKE_FUNCTION MAKE_FUNCTION
-#define _MAKE_WARM 438
+#define _MAKE_WARM 439
#define _MAP_ADD MAP_ADD
#define _MATCH_CLASS MATCH_CLASS
#define _MATCH_KEYS MATCH_KEYS
#define _MATCH_MAPPING MATCH_MAPPING
#define _MATCH_SEQUENCE MATCH_SEQUENCE
-#define _MAYBE_EXPAND_METHOD 439
-#define _MAYBE_EXPAND_METHOD_KW 440
-#define _MONITOR_CALL 441
-#define _MONITOR_CALL_KW 442
-#define _MONITOR_JUMP_BACKWARD 443
-#define _MONITOR_RESUME 444
+#define _MAYBE_EXPAND_METHOD 440
+#define _MAYBE_EXPAND_METHOD_KW 441
+#define _MONITOR_CALL 442
+#define _MONITOR_CALL_KW 443
+#define _MONITOR_JUMP_BACKWARD 444
+#define _MONITOR_RESUME 445
#define _NOP NOP
#define _POP_EXCEPT POP_EXCEPT
-#define _POP_JUMP_IF_FALSE 445
-#define _POP_JUMP_IF_TRUE 446
+#define _POP_JUMP_IF_FALSE 446
+#define _POP_JUMP_IF_TRUE 447
#define _POP_TOP POP_TOP
-#define _POP_TOP_LOAD_CONST_INLINE 447
-#define _POP_TOP_LOAD_CONST_INLINE_BORROW 448
-#define _POP_TWO_LOAD_CONST_INLINE_BORROW 449
+#define _POP_TOP_LOAD_CONST_INLINE 448
+#define _POP_TOP_LOAD_CONST_INLINE_BORROW 449
+#define _POP_TWO_LOAD_CONST_INLINE_BORROW 450
#define _PUSH_EXC_INFO PUSH_EXC_INFO
-#define _PUSH_FRAME 450
+#define _PUSH_FRAME 451
#define _PUSH_NULL PUSH_NULL
-#define _PUSH_NULL_CONDITIONAL 451
-#define _PY_FRAME_GENERAL 452
-#define _PY_FRAME_KW 453
-#define _QUICKEN_RESUME 454
-#define _REPLACE_WITH_TRUE 455
+#define _PUSH_NULL_CONDITIONAL 452
+#define _PY_FRAME_GENERAL 453
+#define _PY_FRAME_KW 454
+#define _QUICKEN_RESUME 455
+#define _REPLACE_WITH_TRUE 456
#define _RESUME_CHECK RESUME_CHECK
#define _RETURN_GENERATOR RETURN_GENERATOR
#define _RETURN_VALUE RETURN_VALUE
-#define _SAVE_RETURN_OFFSET 456
-#define _SEND 457
-#define _SEND_GEN_FRAME 458
+#define _SAVE_RETURN_OFFSET 457
+#define _SEND 458
+#define _SEND_GEN_FRAME 459
#define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS
#define _SET_ADD SET_ADD
#define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE
#define _SET_UPDATE SET_UPDATE
-#define _START_EXECUTOR 459
-#define _STORE_ATTR 460
-#define _STORE_ATTR_INSTANCE_VALUE 461
-#define _STORE_ATTR_SLOT 462
-#define _STORE_ATTR_WITH_HINT 463
+#define _START_EXECUTOR 460
+#define _STORE_ATTR 461
+#define _STORE_ATTR_INSTANCE_VALUE 462
+#define _STORE_ATTR_SLOT 463
+#define _STORE_ATTR_WITH_HINT 464
#define _STORE_DEREF STORE_DEREF
-#define _STORE_FAST 464
-#define _STORE_FAST_0 465
-#define _STORE_FAST_1 466
-#define _STORE_FAST_2 467
-#define _STORE_FAST_3 468
-#define _STORE_FAST_4 469
-#define _STORE_FAST_5 470
-#define _STORE_FAST_6 471
-#define _STORE_FAST_7 472
+#define _STORE_FAST 465
+#define _STORE_FAST_0 466
+#define _STORE_FAST_1 467
+#define _STORE_FAST_2 468
+#define _STORE_FAST_3 469
+#define _STORE_FAST_4 470
+#define _STORE_FAST_5 471
+#define _STORE_FAST_6 472
+#define _STORE_FAST_7 473
#define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST
#define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST
#define _STORE_GLOBAL STORE_GLOBAL
#define _STORE_NAME STORE_NAME
-#define _STORE_SLICE 473
-#define _STORE_SUBSCR 474
+#define _STORE_SLICE 474
+#define _STORE_SUBSCR 475
#define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT
#define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT
#define _SWAP SWAP
-#define _TIER2_RESUME_CHECK 475
-#define _TO_BOOL 476
+#define _TIER2_RESUME_CHECK 476
+#define _TO_BOOL 477
#define _TO_BOOL_BOOL TO_BOOL_BOOL
#define _TO_BOOL_INT TO_BOOL_INT
#define _TO_BOOL_LIST TO_BOOL_LIST
#define _TO_BOOL_NONE TO_BOOL_NONE
-#define _TO_BOOL_STR TO_BOOL_STR
+#define _TO_BOOL_STR 478
#define _UNARY_INVERT UNARY_INVERT
#define _UNARY_NEGATIVE UNARY_NEGATIVE
#define _UNARY_NOT UNARY_NOT
#define _UNPACK_EX UNPACK_EX
-#define _UNPACK_SEQUENCE 477
+#define _UNPACK_SEQUENCE 479
#define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST
#define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE
#define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE
#define _WITH_EXCEPT_START WITH_EXCEPT_START
#define _YIELD_VALUE YIELD_VALUE
-#define MAX_UOP_ID 477
+#define MAX_UOP_ID 479
#ifdef __cplusplus
}
diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h
index 59572fca3753d6..4f5f6bbde2571a 100644
--- a/Include/internal/pycore_uop_metadata.h
+++ b/Include/internal/pycore_uop_metadata.h
@@ -64,7 +64,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_TO_BOOL_INT] = HAS_EXIT_FLAG | HAS_ESCAPES_FLAG,
[_TO_BOOL_LIST] = HAS_EXIT_FLAG,
[_TO_BOOL_NONE] = HAS_EXIT_FLAG,
- [_TO_BOOL_STR] = HAS_EXIT_FLAG | HAS_ESCAPES_FLAG,
+ [_GUARD_TOS_UNICODE] = HAS_EXIT_FLAG,
+ [_TO_BOOL_STR] = HAS_ESCAPES_FLAG,
[_REPLACE_WITH_TRUE] = HAS_ESCAPES_FLAG,
[_UNARY_INVERT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_GUARD_BOTH_INT] = HAS_EXIT_FLAG,
@@ -413,6 +414,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
[_GUARD_NOT_EXHAUSTED_TUPLE] = "_GUARD_NOT_EXHAUSTED_TUPLE",
[_GUARD_TOS_FLOAT] = "_GUARD_TOS_FLOAT",
[_GUARD_TOS_INT] = "_GUARD_TOS_INT",
+ [_GUARD_TOS_UNICODE] = "_GUARD_TOS_UNICODE",
[_GUARD_TYPE_VERSION] = "_GUARD_TYPE_VERSION",
[_GUARD_TYPE_VERSION_AND_LOCK] = "_GUARD_TYPE_VERSION_AND_LOCK",
[_IMPORT_FROM] = "_IMPORT_FROM",
@@ -649,6 +651,8 @@ int _PyUop_num_popped(int opcode, int oparg)
return 1;
case _TO_BOOL_NONE:
return 1;
+ case _GUARD_TOS_UNICODE:
+ return 0;
case _TO_BOOL_STR:
return 1;
case _REPLACE_WITH_TRUE:
diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
index cc6b4c88fe0fce..feab3b8b84f566 100644
--- a/Lib/test/test_capi/test_opt.py
+++ b/Lib/test/test_capi/test_opt.py
@@ -1581,6 +1581,24 @@ def testfunc(n):
self.assertNotIn("_COMPARE_OP_INT", uops)
self.assertIn("_POP_TWO_LOAD_CONST_INLINE_BORROW", uops)
+ def test_remove_guard_for_known_type_str(self):
+ def f(n):
+ for i in range(n):
+ false = i == TIER2_THRESHOLD
+ empty = "X"[:false]
+ empty += "" # Make JIT realize this is a string.
+ if empty:
+ return 1
+ return 0
+
+ res, ex = self._run_with_optimizer(f, TIER2_THRESHOLD)
+ self.assertEqual(res, 0)
+ self.assertIsNotNone(ex)
+ uops = get_opnames(ex)
+ self.assertIn("_TO_BOOL_STR", uops)
+ self.assertNotIn("_GUARD_TOS_UNICODE", uops)
+
+
def global_identity(x):
return x
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-03-28-19-02-55.gh-issue-131798.fNZ5-2.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-28-19-02-55.gh-issue-131798.fNZ5-2.rst
new file mode 100644
index 00000000000000..69c63ffd7fe986
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-28-19-02-55.gh-issue-131798.fNZ5-2.rst
@@ -0,0 +1 @@
+Allow JIT to omit str guard in truthiness test when str type is known.
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 63367141c6951b..2d18f658702fe7 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -512,10 +512,14 @@ dummy_func(
res = PyStackRef_False;
}
- inst(TO_BOOL_STR, (unused/1, unused/2, value -- res)) {
+ op(_GUARD_TOS_UNICODE, (value -- value)) {
PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
EXIT_IF(!PyUnicode_CheckExact(value_o));
+ }
+
+ op(_TO_BOOL_STR, (value -- res)) {
STAT_INC(TO_BOOL, hit);
+ PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
if (value_o == &_Py_STR(empty)) {
assert(_Py_IsImmortal(value_o));
DEAD(value);
@@ -528,6 +532,9 @@ dummy_func(
}
}
+ macro(TO_BOOL_STR) =
+ _GUARD_TOS_UNICODE + unused/1 + unused/2 + _TO_BOOL_STR;
+
op(_REPLACE_WITH_TRUE, (value -- res)) {
PyStackRef_CLOSE(value);
res = PyStackRef_True;
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index bf21c7bfcef393..29c3a270a27526 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -653,16 +653,23 @@
break;
}
- case _TO_BOOL_STR: {
+ case _GUARD_TOS_UNICODE: {
_PyStackRef value;
- _PyStackRef res;
value = stack_pointer[-1];
PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
if (!PyUnicode_CheckExact(value_o)) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
+ break;
+ }
+
+ case _TO_BOOL_STR: {
+ _PyStackRef value;
+ _PyStackRef res;
+ value = stack_pointer[-1];
STAT_INC(TO_BOOL, hit);
+ PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
if (value_o == &_Py_STR(empty)) {
assert(_Py_IsImmortal(value_o));
res = PyStackRef_False;
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index 4e37c68e2bd8bc..416035ae29d71c 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -11616,30 +11616,37 @@
static_assert(INLINE_CACHE_ENTRIES_TO_BOOL == 3, "incorrect cache size");
_PyStackRef value;
_PyStackRef res;
+ // _GUARD_TOS_UNICODE
+ {
+ value = stack_pointer[-1];
+ PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
+ if (!PyUnicode_CheckExact(value_o)) {
+ UPDATE_MISS_STATS(TO_BOOL);
+ assert(_PyOpcode_Deopt[opcode] == (TO_BOOL));
+ JUMP_TO_PREDICTED(TO_BOOL);
+ }
+ }
/* Skip 1 cache entry */
/* Skip 2 cache entries */
- value = stack_pointer[-1];
- PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
- if (!PyUnicode_CheckExact(value_o)) {
- UPDATE_MISS_STATS(TO_BOOL);
- assert(_PyOpcode_Deopt[opcode] == (TO_BOOL));
- JUMP_TO_PREDICTED(TO_BOOL);
- }
- STAT_INC(TO_BOOL, hit);
- if (value_o == &_Py_STR(empty)) {
- assert(_Py_IsImmortal(value_o));
- res = PyStackRef_False;
- }
- else {
- assert(Py_SIZE(value_o));
- stack_pointer += -1;
- assert(WITHIN_STACK_BOUNDS());
- _PyFrame_SetStackPointer(frame, stack_pointer);
- PyStackRef_CLOSE(value);
- stack_pointer = _PyFrame_GetStackPointer(frame);
- res = PyStackRef_True;
- stack_pointer += 1;
- assert(WITHIN_STACK_BOUNDS());
+ // _TO_BOOL_STR
+ {
+ STAT_INC(TO_BOOL, hit);
+ PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
+ if (value_o == &_Py_STR(empty)) {
+ assert(_Py_IsImmortal(value_o));
+ res = PyStackRef_False;
+ }
+ else {
+ assert(Py_SIZE(value_o));
+ stack_pointer += -1;
+ assert(WITHIN_STACK_BOUNDS());
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ PyStackRef_CLOSE(value);
+ stack_pointer = _PyFrame_GetStackPointer(frame);
+ res = PyStackRef_True;
+ stack_pointer += 1;
+ assert(WITHIN_STACK_BOUNDS());
+ }
}
stack_pointer[-1] = res;
DISPATCH();
diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c
index 2ccfd0a152f547..8d9e94a2445182 100644
--- a/Python/optimizer_bytecodes.c
+++ b/Python/optimizer_bytecodes.c
@@ -414,10 +414,16 @@ dummy_func(void) {
}
}
+ op(_GUARD_TOS_UNICODE, (value -- value)) {
+ if (sym_matches_type(value, &PyUnicode_Type)) {
+ REPLACE_OP(this_instr, _NOP, 0, 0);
+ }
+ sym_set_type(value, &PyUnicode_Type);
+ }
+
op(_TO_BOOL_STR, (value -- res)) {
if (!optimize_to_bool(this_instr, ctx, value, &res)) {
res = sym_new_truthiness(ctx, value, true);
- sym_set_type(value, &PyUnicode_Type);
}
}
diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
index 85fa8f4a2a7910..6a8ac75b63eb0e 100644
--- a/Python/optimizer_cases.c.h
+++ b/Python/optimizer_cases.c.h
@@ -208,13 +208,22 @@
break;
}
+ case _GUARD_TOS_UNICODE: {
+ JitOptSymbol *value;
+ value = stack_pointer[-1];
+ if (sym_matches_type(value, &PyUnicode_Type)) {
+ REPLACE_OP(this_instr, _NOP, 0, 0);
+ }
+ sym_set_type(value, &PyUnicode_Type);
+ break;
+ }
+
case _TO_BOOL_STR: {
JitOptSymbol *value;
JitOptSymbol *res;
value = stack_pointer[-1];
if (!optimize_to_bool(this_instr, ctx, value, &res)) {
res = sym_new_truthiness(ctx, value, true);
- sym_set_type(value, &PyUnicode_Type);
}
stack_pointer[-1] = res;
break;
1
0

gh-131895: fix clang `$PATH` on Darwin runners for tail-calling interpreter (#131903)
by picnixz March 30, 2025
by picnixz March 30, 2025
March 30, 2025
https://github.com/python/cpython/commit/39fa19a4ccb4c75d2421187e5b8d83c73b…
commit: 39fa19a4ccb4c75d2421187e5b8d83c73b48ca4d
branch: main
author: Bénédikt Tran <10796600+picnixz(a)users.noreply.github.com>
committer: picnixz <10796600+picnixz(a)users.noreply.github.com>
date: 2025-03-30T18:48:00Z
summary:
gh-131895: fix clang `$PATH` on Darwin runners for tail-calling interpreter (#131903)
files:
M .github/workflows/tail-call.yml
diff --git a/.github/workflows/tail-call.yml b/.github/workflows/tail-call.yml
index 3c9098b88ee3b1..572ff45e51ef00 100644
--- a/.github/workflows/tail-call.yml
+++ b/.github/workflows/tail-call.yml
@@ -112,8 +112,8 @@ jobs:
find /usr/local/bin -lname '*/Library/Frameworks/Python.framework/*' -delete
brew install llvm@${{ matrix.llvm }}
export SDKROOT="$(xcrun --show-sdk-path)"
- export PATH="/opt/homebrew/opt/llvm/bin:$PATH"
- export PATH="/usr/local/opt/llvm/bin:$PATH"
+ export PATH="/opt/homebrew/opt/llvm@${{ matrix.llvm }}/bin:$PATH"
+ export PATH="/usr/local/opt/llvm@${{ matrix.llvm }}/bin:$PATH"
CC=clang-19 ./configure --with-tail-call-interp
make all --jobs 4
./python.exe -m test --multiprocess 0 --timeout 4500 --verbose2 --verbose3
1
0