[Python-checkins] bpo-47084: Clear Unicode cached representations on finalization (GH-32032)

vstinner webhook-mailer at python.org
Tue Mar 22 08:54:00 EDT 2022


https://github.com/python/cpython/commit/88872a29f19092d2fde27365af230abd6d301941
commit: 88872a29f19092d2fde27365af230abd6d301941
branch: main
author: Jeremy Kloth <jeremy.kloth at gmail.com>
committer: vstinner <vstinner at python.org>
date: 2022-03-22T13:53:51+01:00
summary:

bpo-47084: Clear Unicode cached representations on finalization (GH-32032)

files:
M Include/internal/pycore_unicodeobject.h
M Lib/__hello__.py
M Lib/test/test_embed.py
M Objects/unicodeobject.c
M Tools/scripts/deepfreeze.py

diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h
index 4394ce939b567..c7f06051a622f 100644
--- a/Include/internal/pycore_unicodeobject.h
+++ b/Include/internal/pycore_unicodeobject.h
@@ -18,6 +18,7 @@ extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *);
 extern PyStatus _PyUnicode_InitTypes(PyInterpreterState *);
 extern void _PyUnicode_Fini(PyInterpreterState *);
 extern void _PyUnicode_FiniTypes(PyInterpreterState *);
+extern void _PyStaticUnicode_Dealloc(PyObject *);
 
 
 /* other API */
diff --git a/Lib/__hello__.py b/Lib/__hello__.py
index d37bd2766ac1c..c09d6a4f52332 100644
--- a/Lib/__hello__.py
+++ b/Lib/__hello__.py
@@ -1,5 +1,14 @@
 initialized = True
 
+class TestFrozenUtf8_1:
+    """\u00b6"""
+
+class TestFrozenUtf8_2:
+    """\u03c0"""
+
+class TestFrozenUtf8_4:
+    """\U0001f600"""
+
 def main():
     print("Hello world!")
 
diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py
index 80b9674c1c258..f0c88de68e89e 100644
--- a/Lib/test/test_embed.py
+++ b/Lib/test/test_embed.py
@@ -1645,24 +1645,29 @@ def test_frozenmain(self):
                          '-X showrefcount requires a Python debug build')
     def test_no_memleak(self):
         # bpo-1635741: Python must release all memory at exit
-        cmd = [sys.executable, "-I", "-X", "showrefcount", "-c", "pass"]
-        proc = subprocess.run(cmd,
-                              stdout=subprocess.PIPE,
-                              stderr=subprocess.STDOUT,
-                              text=True)
-        self.assertEqual(proc.returncode, 0)
-        out = proc.stdout.rstrip()
-        match = re.match(r'^\[(-?\d+) refs, (-?\d+) blocks\]', out)
-        if not match:
-            self.fail(f"unexpected output: {out!a}")
-        refs = int(match.group(1))
-        blocks = int(match.group(2))
-        self.assertEqual(refs, 0, out)
-        if not MS_WINDOWS:
-            self.assertEqual(blocks, 0, out)
-        else:
-            # bpo-46857: on Windows, Python still leaks 1 memory block at exit
-            self.assertIn(blocks, (0, 1), out)
+        tests = (
+            ('off', 'pass'),
+            ('on', 'pass'),
+            ('off', 'import __hello__'),
+            ('on', 'import __hello__'),
+        )
+        for flag, stmt in tests:
+            xopt = f"frozen_modules={flag}"
+            cmd = [sys.executable, "-I", "-X", "showrefcount", "-X", xopt, "-c", stmt]
+            proc = subprocess.run(cmd,
+                                  stdout=subprocess.PIPE,
+                                  stderr=subprocess.STDOUT,
+                                  text=True)
+            self.assertEqual(proc.returncode, 0)
+            out = proc.stdout.rstrip()
+            match = re.match(r'^\[(-?\d+) refs, (-?\d+) blocks\]', out)
+            if not match:
+                self.fail(f"unexpected output: {out!a}")
+            refs = int(match.group(1))
+            blocks = int(match.group(2))
+            with self.subTest(frozen_modules=flag, stmt=stmt):
+                self.assertEqual(refs, 0, out)
+                self.assertEqual(blocks, 0, out)
 
 
 class StdPrinterTests(EmbeddingTestsMixin, unittest.TestCase):
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 5dfe6e1e93f9f..ce3ebce1ff72d 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -16057,6 +16057,35 @@ _PyUnicode_FiniTypes(PyInterpreterState *interp)
 }
 
 
+static void unicode_static_dealloc(PyObject *op)
+{
+    PyASCIIObject* ascii = (PyASCIIObject*)op;
+
+    assert(ascii->state.compact);
+
+    if (ascii->state.ascii) {
+        if (ascii->wstr) {
+            PyObject_Free(ascii->wstr);
+            ascii->wstr = NULL;
+        }
+    }
+    else {
+        PyCompactUnicodeObject* compact = (PyCompactUnicodeObject*)op;
+        void* data = (void*)(compact + 1);
+        if (ascii->wstr && ascii->wstr != data) {
+            PyObject_Free(ascii->wstr);
+            ascii->wstr = NULL;
+            compact->wstr_length = 0;
+        }
+        if (compact->utf8) {
+            PyObject_Free(compact->utf8);
+            compact->utf8 = NULL;
+            compact->utf8_length = 0;
+        }
+    }
+}
+
+
 void
 _PyUnicode_Fini(PyInterpreterState *interp)
 {
@@ -16070,6 +16099,21 @@ _PyUnicode_Fini(PyInterpreterState *interp)
     _PyUnicode_FiniEncodings(&state->fs_codec);
 
     unicode_clear_identifiers(state);
+
+    // Clear the single character singletons
+    for (int i = 0; i < 128; i++) {
+        unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).ascii[i]);
+    }
+    for (int i = 0; i < 128; i++) {
+        unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).latin1[i]);
+    }
+}
+
+
+void
+_PyStaticUnicode_Dealloc(PyObject *op)
+{
+    unicode_static_dealloc(op);
 }
 
 
diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py
index d208258dbc54c..1831c15784af7 100644
--- a/Tools/scripts/deepfreeze.py
+++ b/Tools/scripts/deepfreeze.py
@@ -185,6 +185,7 @@ def generate_unicode(self, name: str, s: str) -> str:
                 else:
                     self.write("PyCompactUnicodeObject _compact;")
                 self.write(f"{datatype} _data[{len(s)+1}];")
+        self.deallocs.append(f"_PyStaticUnicode_Dealloc((PyObject *)&{name});")
         with self.block(f"{name} =", ";"):
             if ascii:
                 with self.block("._ascii =", ","):



More information about the Python-checkins mailing list