[Python-checkins] gh-85858: Remove PyUnicode_InternImmortal() function (#92579)

vstinner webhook-mailer at python.org
Fri May 13 07:40:34 EDT 2022


https://github.com/python/cpython/commit/059b5baf98c9503d9d59c79fba117826caa5a3e1
commit: 059b5baf98c9503d9d59c79fba117826caa5a3e1
branch: main
author: Victor Stinner <vstinner at python.org>
committer: vstinner <vstinner at python.org>
date: 2022-05-13T13:40:22+02:00
summary:

gh-85858: Remove PyUnicode_InternImmortal() function (#92579)

Remove the PyUnicode_InternImmortal() function and the
SSTATE_INTERNED_IMMORTAL macro.

The PyUnicode_InternImmortal() function is still exported in the
stable ABI. The function is removed from the API.

PyASCIIObject.state.interned size is now a single bit, rather than 2
bits.

Keep SSTATE_NOT_INTERNED and SSTATE_INTERNED_MORTAL macros for
backward compatibility, but no longer use them internally since the
interned member is now a single bit and so can only have two values
(interned or not interned).

Update stats of _PyUnicode_ClearInterned().

files:
A Misc/NEWS.d/next/C API/2022-05-09-23-16-38.gh-issue-85858.VIcNDL.rst
M Doc/data/stable_abi.dat
M Doc/whatsnew/3.12.rst
M Include/cpython/unicodeobject.h
M Include/unicodeobject.h
M Misc/stable_abi.toml
M Objects/unicodeobject.c

diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat
index 3486f33c02539..3912a7c1242de 100644
--- a/Doc/data/stable_abi.dat
+++ b/Doc/data/stable_abi.dat
@@ -762,7 +762,6 @@ function,PyUnicode_FromWideChar,3.2,,
 function,PyUnicode_GetDefaultEncoding,3.2,,
 function,PyUnicode_GetLength,3.7,,
 function,PyUnicode_InternFromString,3.2,,
-function,PyUnicode_InternImmortal,3.2,,
 function,PyUnicode_InternInPlace,3.2,,
 function,PyUnicode_IsIdentifier,3.2,,
 function,PyUnicode_Join,3.2,,
diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst
index d5017c0350e16..70b26ba48cf62 100644
--- a/Doc/whatsnew/3.12.rst
+++ b/Doc/whatsnew/3.12.rst
@@ -174,3 +174,7 @@ Removed
    * :c:func:`PyUnicode_GET_SIZE`
    * :c:func:`PyUnicode_GetSize`
    * :c:func:`PyUnicode_GET_DATA_SIZE`
+
+* Remove the ``PyUnicode_InternImmortal()`` function and the
+  ``SSTATE_INTERNED_IMMORTAL`` macro.
+  (Contributed by Victor Stinner in :gh:`85858`.)
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
index 16db2cb7bffb9..37bb13cbe5397 100644
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -98,15 +98,9 @@ typedef struct {
     Py_ssize_t length;          /* Number of code points in the string */
     Py_hash_t hash;             /* Hash value; -1 if not set */
     struct {
-        /*
-           SSTATE_NOT_INTERNED (0)
-           SSTATE_INTERNED_MORTAL (1)
-           SSTATE_INTERNED_IMMORTAL (2)
-
-           If interned != SSTATE_NOT_INTERNED, the two references from the
-           dictionary to this object are *not* counted in ob_refcnt.
-         */
-        unsigned int interned:2;
+        /* If interned is set, the two references from the
+           dictionary to this object are *not* counted in ob_refcnt. */
+        unsigned int interned:1;
         /* Character size:
 
            - PyUnicode_1BYTE_KIND (1):
@@ -189,7 +183,6 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
 /* Interning state. */
 #define SSTATE_NOT_INTERNED 0
 #define SSTATE_INTERNED_MORTAL 1
-#define SSTATE_INTERNED_IMMORTAL 2
 
 /* Use only if you know it's a string */
 static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) {
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index f71f37978a1ba..ed3e8d2c6cc99 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -256,10 +256,6 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
     const char *u              /* UTF-8 encoded string */
     );
 
-// PyUnicode_InternImmortal() is deprecated since Python 3.10
-// and will be removed in Python 3.12. Use PyUnicode_InternInPlace() instead.
-Py_DEPRECATED(3.10) PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
-
 /* --- wchar_t support for platforms which support it --------------------- */
 
 #ifdef HAVE_WCHAR_H
diff --git a/Misc/NEWS.d/next/C API/2022-05-09-23-16-38.gh-issue-85858.VIcNDL.rst b/Misc/NEWS.d/next/C API/2022-05-09-23-16-38.gh-issue-85858.VIcNDL.rst
new file mode 100644
index 0000000000000..c175d1efee388
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2022-05-09-23-16-38.gh-issue-85858.VIcNDL.rst	
@@ -0,0 +1,2 @@
+Remove the ``PyUnicode_InternImmortal()`` function and the
+``SSTATE_INTERNED_IMMORTAL`` macro. Patch by Victor Stinner.
diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml
index 07cce369fe80a..d848f18d68ff6 100644
--- a/Misc/stable_abi.toml
+++ b/Misc/stable_abi.toml
@@ -1563,6 +1563,7 @@
     added = '3.2'
 [function.PyUnicode_InternImmortal]
     added = '3.2'
+    abi_only = true
 [function.PyUnicode_InternInPlace]
     added = '3.2'
 [data.PyUnicode_Type]
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index ee3275793528c..e935829072483 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1516,13 +1516,8 @@ unicode_dealloc(PyObject *unicode)
     }
 #endif
 
-    switch (PyUnicode_CHECK_INTERNED(unicode)) {
-    case SSTATE_NOT_INTERNED:
-        break;
-
-    case SSTATE_INTERNED_MORTAL:
-    {
 #ifdef INTERNED_STRINGS
+    if (PyUnicode_CHECK_INTERNED(unicode)) {
         /* Revive the dead object temporarily. PyDict_DelItem() removes two
            references (key and value) which were ignored by
            PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
@@ -1536,17 +1531,8 @@ unicode_dealloc(PyObject *unicode)
         }
         assert(Py_REFCNT(unicode) == 1);
         Py_SET_REFCNT(unicode, 0);
-#endif
-        break;
-    }
-
-    case SSTATE_INTERNED_IMMORTAL:
-        _PyObject_ASSERT_FAILED_MSG(unicode, "Immortal interned string died");
-        break;
-
-    default:
-        Py_UNREACHABLE();
     }
+#endif
 
     if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {
         PyObject_Free(_PyUnicode_UTF8(unicode));
@@ -14674,7 +14660,7 @@ PyUnicode_InternInPlace(PyObject **p)
        refcnt. unicode_dealloc() and _PyUnicode_ClearInterned() take care of
        this. */
     Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
-    _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
+    _PyUnicode_STATE(s).interned = 1;
 #else
     // PyDict expects that interned strings have their hash
     // (PyASCIIObject.hash) already computed.
@@ -14682,23 +14668,14 @@ PyUnicode_InternInPlace(PyObject **p)
 #endif
 }
 
+// Function kept for the stable ABI.
+PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
 void
 PyUnicode_InternImmortal(PyObject **p)
 {
-    if (PyErr_WarnEx(PyExc_DeprecationWarning,
-            "PyUnicode_InternImmortal() is deprecated; "
-            "use PyUnicode_InternInPlace() instead", 1) < 0)
-    {
-        // The function has no return value, the exception cannot
-        // be reported to the caller, so just log it.
-        PyErr_WriteUnraisable(NULL);
-    }
-
     PyUnicode_InternInPlace(p);
-    if (PyUnicode_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
-        _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL;
-        Py_INCREF(*p);
-    }
+    // Leak a reference on purpose
+    Py_INCREF(*p);
 }
 
 PyObject *
@@ -14733,37 +14710,25 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
     fprintf(stderr, "releasing %zd interned strings\n",
             PyDict_GET_SIZE(interned));
 
-    Py_ssize_t immortal_size = 0, mortal_size = 0;
+    Py_ssize_t total_length = 0;
 #endif
     Py_ssize_t pos = 0;
     PyObject *s, *ignored_value;
     while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
-        switch (PyUnicode_CHECK_INTERNED(s)) {
-        case SSTATE_INTERNED_IMMORTAL:
-            Py_SET_REFCNT(s, Py_REFCNT(s) + 1);
-#ifdef INTERNED_STATS
-            immortal_size += PyUnicode_GET_LENGTH(s);
-#endif
-            break;
-        case SSTATE_INTERNED_MORTAL:
-            // Restore the two references (key and value) ignored
-            // by PyUnicode_InternInPlace().
-            Py_SET_REFCNT(s, Py_REFCNT(s) + 2);
+        assert(PyUnicode_CHECK_INTERNED(s));
+        // Restore the two references (key and value) ignored
+        // by PyUnicode_InternInPlace().
+        Py_SET_REFCNT(s, Py_REFCNT(s) + 2);
 #ifdef INTERNED_STATS
-            mortal_size += PyUnicode_GET_LENGTH(s);
+        total_length += PyUnicode_GET_LENGTH(s);
 #endif
-            break;
-        case SSTATE_NOT_INTERNED:
-            /* fall through */
-        default:
-            Py_UNREACHABLE();
-        }
-        _PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED;
+
+        _PyUnicode_STATE(s).interned = 0;
     }
 #ifdef INTERNED_STATS
     fprintf(stderr,
-            "total size of all interned strings: %zd/%zd mortal/immortal\n",
-            mortal_size, immortal_size);
+            "total length of all interned strings: %zd characters\n",
+            total_length);
 #endif
 
     PyDict_Clear(interned);



More information about the Python-checkins mailing list