gh-111972: Make Unicode name C APIcapsule initialization thread-safe (#112249)

https://github.com/python/cpython/commit/0785c685599aaa052f85d6163872bdecb9c... commit: 0785c685599aaa052f85d6163872bdecb9c66486 branch: main author: Kirill Podoprigora <kirill.bast9@mail.ru> committer: erlend-aasland <erlend.aasland@protonmail.com> date: 2023-11-30T11:12:49+01:00 summary: gh-111972: Make Unicode name C APIcapsule initialization thread-safe (#112249) files: M Include/internal/pycore_ucnhash.h M Objects/unicodeobject.c M Python/codecs.c diff --git a/Include/internal/pycore_ucnhash.h b/Include/internal/pycore_ucnhash.h index 187dd68e7347ff..1561dfbb3150d3 100644 --- a/Include/internal/pycore_ucnhash.h +++ b/Include/internal/pycore_ucnhash.h @@ -28,6 +28,8 @@ typedef struct { } _PyUnicode_Name_CAPI; +extern _PyUnicode_Name_CAPI* _PyUnicode_GetNameCAPI(void); + #ifdef __cplusplus } #endif diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index cffc06297a9aee..10022e23c04abf 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5869,6 +5869,23 @@ PyUnicode_AsUTF16String(PyObject *unicode) return _PyUnicode_EncodeUTF16(unicode, NULL, 0); } +_PyUnicode_Name_CAPI * +_PyUnicode_GetNameCAPI(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_Name_CAPI *ucnhash_capi; + + ucnhash_capi = _Py_atomic_load_ptr(&interp->unicode.ucnhash_capi); + if (ucnhash_capi == NULL) { + ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import( + PyUnicodeData_CAPSULE_NAME, 1); + + // It's fine if we overwite the value here. It's always the same value. + _Py_atomic_store_ptr(&interp->unicode.ucnhash_capi, ucnhash_capi); + } + return ucnhash_capi; +} + /* --- Unicode Escape Codec ----------------------------------------------- */ PyObject * @@ -5884,7 +5901,6 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, PyObject *errorHandler = NULL; PyObject *exc = NULL; _PyUnicode_Name_CAPI *ucnhash_capi; - PyInterpreterState *interp = _PyInterpreterState_GET(); // so we can remember if we've seen an invalid escape char or not *first_invalid_escape = NULL; @@ -6032,19 +6048,13 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, /* \N{name} */ case 'N': - ucnhash_capi = interp->unicode.ucnhash_capi; + ucnhash_capi = _PyUnicode_GetNameCAPI(); if (ucnhash_capi == NULL) { - /* load the unicode data module */ - ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import( - PyUnicodeData_CAPSULE_NAME, 1); - if (ucnhash_capi == NULL) { - PyErr_SetString( + PyErr_SetString( PyExc_UnicodeError, "\\N escapes not supported (can't load unicodedata module)" - ); - goto onError; - } - interp->unicode.ucnhash_capi = ucnhash_capi; + ); + goto onError; } message = "malformed \\N character escape"; diff --git a/Python/codecs.c b/Python/codecs.c index 545bf82e00dca1..d8fe7b22063a80 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -931,8 +931,6 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) return Py_BuildValue("(Nn)", res, end); } -static _PyUnicode_Name_CAPI *ucnhash_capi = NULL; - PyObject *PyCodec_NameReplaceErrors(PyObject *exc) { if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { @@ -953,13 +951,9 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) return NULL; if (!(object = PyUnicodeEncodeError_GetObject(exc))) return NULL; - if (!ucnhash_capi) { - /* load the unicode data module */ - ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import( - PyUnicodeData_CAPSULE_NAME, 1); - if (!ucnhash_capi) { - return NULL; - } + _PyUnicode_Name_CAPI *ucnhash_capi = _PyUnicode_GetNameCAPI(); + if (ucnhash_capi == NULL) { + return NULL; } for (i = start, ressize = 0; i < end; ++i) { /* object is guaranteed to be "ready" */
participants (1)
-
erlend-aasland