[Python-checkins] bpo-36346: Add Py_DEPRECATED to deprecated unicode APIs (GH-20878)

Inada Naoki webhook-mailer at python.org
Wed Jun 17 07:09:50 EDT 2020


https://github.com/python/cpython/commit/2c4928d37edc5e4aeec3c0b79fa3460b1ec9b60d
commit: 2c4928d37edc5e4aeec3c0b79fa3460b1ec9b60d
branch: master
author: Inada Naoki <songofacandy at gmail.com>
committer: GitHub <noreply at github.com>
date: 2020-06-17T20:09:44+09:00
summary:

bpo-36346: Add Py_DEPRECATED to deprecated unicode APIs (GH-20878)

Co-authored-by: Kyle Stanley <aeros167 at gmail.com>
Co-authored-by: Victor Stinner <vstinner at python.org>

files:
A Misc/NEWS.d/next/C API/2020-06-17-11-24-00.bpo-36346.fTMr3S.rst
M Doc/whatsnew/3.9.rst
M Include/cpython/unicodeobject.h
M Modules/_testcapimodule.c
M Objects/unicodeobject.c
M Python/getargs.c

diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst
index 67a83bc958457..15fca8fa9d4c9 100644
--- a/Doc/whatsnew/3.9.rst
+++ b/Doc/whatsnew/3.9.rst
@@ -1097,6 +1097,12 @@ Porting to Python 3.9
   internal C API (``pycore_gc.h``).
   (Contributed by Victor Stinner in :issue:`40241`.)
 
+* The ``Py_UNICODE_COPY``, ``Py_UNICODE_FILL``, ``PyUnicode_WSTR_LENGTH``,
+  :c:func:`PyUnicode_FromUnicode`, :c:func:`PyUnicode_AsUnicode`,
+  ``_PyUnicode_AsUnicode``, and :c:func:`PyUnicode_AsUnicodeAndSize` are
+  marked as deprecated in C.  They have been deprecated by :pep:`393` since
+  Python 3.3.
+  (Contributed by Inada Naoki in :issue:`36346`.)
 
 Removed
 -------
@@ -1165,3 +1171,8 @@ Removed
 
 * Remove ``_PyUnicode_ClearStaticStrings()`` function.
   (Contributed by Victor Stinner in :issue:`39465`.)
+
+* Remove ``Py_UNICODE_MATCH``. It has been deprecated by :pep:`393`, and
+  broken since Python 3.3. The :c:func:`PyUnicode_Tailmatch` function can be
+  used instead.
+  (Contributed by Inada Naoki in :issue:`36346`.)
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
index 3b49ce7759037..569bdb1e2a94b 100644
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -46,13 +46,17 @@
     Py_UNICODE_ISDIGIT(ch) || \
     Py_UNICODE_ISNUMERIC(ch))
 
-#define Py_UNICODE_COPY(target, source, length) \
-    memcpy((target), (source), (length)*sizeof(Py_UNICODE))
-
-#define Py_UNICODE_FILL(target, value, length) \
-    do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
-        for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
-    } while (0)
+Py_DEPRECATED(3.3) static inline void
+Py_UNICODE_COPY(Py_UNICODE *target, const Py_UNICODE *source, Py_ssize_t length) {
+    memcpy(target, source, length * sizeof(Py_UNICODE));
+}
+
+Py_DEPRECATED(3.3) static inline void
+Py_UNICODE_FILL(Py_UNICODE *target, Py_UNICODE value, Py_ssize_t length) {
+    for (Py_ssize_t i = 0; i < length; i++) {
+        target[i] = value;
+    }
+}
 
 /* macros to work with surrogates */
 #define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
@@ -67,14 +71,6 @@
 /* low surrogate = bottom 10 bits added to DC00 */
 #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
 
-/* Check if substring matches at given offset.  The offset must be
-   valid, and the substring must not be empty. */
-
-#define Py_UNICODE_MATCH(string, offset, substring) \
-    ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
-     ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
-     !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
-
 /* --- Unicode Type ------------------------------------------------------- */
 
 /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
@@ -247,10 +243,6 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
     int check_content);
 
 /* Fast access macros */
-#define PyUnicode_WSTR_LENGTH(op) \
-    (PyUnicode_IS_COMPACT_ASCII(op) ?                  \
-     ((PyASCIIObject*)op)->length :                    \
-     ((PyCompactUnicodeObject*)op)->wstr_length)
 
 /* Returns the deprecated Py_UNICODE representation's size in code units
    (this includes surrogate pairs as 2 units).
@@ -445,6 +437,14 @@ enum PyUnicode_Kind {
         (0xffffU) :                                                     \
         (0x10ffffU)))))
 
+Py_DEPRECATED(3.3)
+static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {
+    return PyUnicode_IS_COMPACT_ASCII(op) ?
+            ((PyASCIIObject*)op)->length :
+            ((PyCompactUnicodeObject*)op)->wstr_length;
+}
+#define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)
+
 /* === Public API ========================================================= */
 
 /* --- Plain Py_UNICODE --------------------------------------------------- */
@@ -543,7 +543,7 @@ PyAPI_FUNC(void) _PyUnicode_FastFill(
    only allowed if u was set to NULL.
 
    The buffer is copied into the new object. */
-/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
+Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
     const Py_UNICODE *u,        /* Unicode buffer */
     Py_ssize_t size             /* size of buffer */
     );
@@ -572,13 +572,13 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
    Py_UNICODE buffer.
    If the wchar_t/Py_UNICODE representation is not yet available, this
    function will calculate it. */
-/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
+Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
     PyObject *unicode           /* Unicode object */
     );
 
 /* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
    contains null characters. */
-PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
+Py_DEPRECATED(3.3) PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
     PyObject *unicode           /* Unicode object */
     );
 
@@ -587,7 +587,7 @@ PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
    If the wchar_t/Py_UNICODE representation is not yet available, this
    function will calculate it. */
 
-/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
+Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
     PyObject *unicode,          /* Unicode object */
     Py_ssize_t *size            /* location where to save the length */
     );
diff --git a/Misc/NEWS.d/next/C API/2020-06-17-11-24-00.bpo-36346.fTMr3S.rst b/Misc/NEWS.d/next/C API/2020-06-17-11-24-00.bpo-36346.fTMr3S.rst
new file mode 100644
index 0000000000000..902a0e60727e6
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2020-06-17-11-24-00.bpo-36346.fTMr3S.rst	
@@ -0,0 +1,4 @@
+Mark ``Py_UNICODE_COPY``, ``Py_UNICODE_FILL``, ``PyUnicode_WSTR_LENGTH``,
+``PyUnicode_FromUnicode``, ``PyUnicode_AsUnicode``, ``_PyUnicode_AsUnicode``,
+and ``PyUnicode_AsUnicodeAndSize`` as deprecated in C. Remove ``Py_UNICODE_MATCH``
+which was deprecated and broken since Python 3.3.
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index e0457ae5dfa55..5302641a9a37e 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -1668,6 +1668,10 @@ parse_tuple_and_keywords(PyObject *self, PyObject *args)
 
 static volatile int x;
 
+/* Ignore use of deprecated APIs */
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
+
 /* Test the u and u# codes for PyArg_ParseTuple. May leak memory in case
    of an error.
 */
@@ -1844,6 +1848,7 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored))
 
     Py_RETURN_NONE;
 }
+_Py_COMP_DIAG_POP
 
 static PyObject *
 unicode_aswidechar(PyObject *self, PyObject *args)
@@ -2064,6 +2069,10 @@ unicode_transformdecimaltoascii(PyObject *self, PyObject *args)
     return PyUnicode_TransformDecimalToASCII(unicode, length);
 }
 
+/* Ignore use of deprecated APIs */
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
+
 static PyObject *
 unicode_legacy_string(PyObject *self, PyObject *args)
 {
@@ -2086,6 +2095,7 @@ unicode_legacy_string(PyObject *self, PyObject *args)
 
     return u;
 }
+_Py_COMP_DIAG_POP
 
 static PyObject *
 getargs_w_star(PyObject *self, PyObject *args)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index c75eb077e0c80..1433848c81f8e 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -120,6 +120,13 @@ extern "C" {
          _PyUnicode_UTF8_LENGTH(op))
 #define _PyUnicode_WSTR(op)                             \
     (((PyASCIIObject*)(op))->wstr)
+
+/* Don't use deprecated macro of unicodeobject.h */
+#undef PyUnicode_WSTR_LENGTH
+#define PyUnicode_WSTR_LENGTH(op) \
+    (PyUnicode_IS_COMPACT_ASCII(op) ?                  \
+     ((PyASCIIObject*)op)->length :                    \
+     ((PyCompactUnicodeObject*)op)->wstr_length)
 #define _PyUnicode_WSTR_LENGTH(op)                      \
     (((PyCompactUnicodeObject*)(op))->wstr_length)
 #define _PyUnicode_LENGTH(op)                           \
@@ -970,11 +977,14 @@ ensure_unicode(PyObject *obj)
 #include "stringlib/find_max_char.h"
 #include "stringlib/undef.h"
 
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
 #include "stringlib/unicodedefs.h"
 #include "stringlib/fastsearch.h"
 #include "stringlib/count.h"
 #include "stringlib/find.h"
 #include "stringlib/undef.h"
+_Py_COMP_DIAG_POP
 
 /* --- Unicode Object ----------------------------------------------------- */
 
@@ -4097,6 +4107,11 @@ PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size)
     return w;
 }
 
+/* Deprecated APIs */
+
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
+
 Py_UNICODE *
 PyUnicode_AsUnicode(PyObject *unicode)
 {
@@ -4135,6 +4150,8 @@ PyUnicode_GetSize(PyObject *unicode)
     return -1;
 }
 
+_Py_COMP_DIAG_POP
+
 Py_ssize_t
 PyUnicode_GetLength(PyObject *unicode)
 {
@@ -12364,6 +12381,8 @@ PyUnicode_IsIdentifier(PyObject *self)
         return len && i == len;
     }
     else {
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
         Py_ssize_t i = 0, len = PyUnicode_GET_SIZE(self);
         if (len == 0) {
             /* an empty string is not a valid identifier */
@@ -12401,6 +12420,7 @@ PyUnicode_IsIdentifier(PyObject *self)
             }
         }
         return 1;
+_Py_COMP_DIAG_POP
     }
 }
 
@@ -15955,7 +15975,10 @@ PyUnicode_AsUnicodeCopy(PyObject *unicode)
         PyErr_BadArgument();
         return NULL;
     }
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
     u = PyUnicode_AsUnicodeAndSize(unicode, &len);
+_Py_COMP_DIAG_POP
     if (u == NULL)
         return NULL;
     /* Ensure we won't overflow the size. */
diff --git a/Python/getargs.c b/Python/getargs.c
index d2dba49966d47..cf0cc0783687a 100644
--- a/Python/getargs.c
+++ b/Python/getargs.c
@@ -1027,6 +1027,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
     case 'u': /* raw unicode buffer (Py_UNICODE *) */
     case 'Z': /* raw unicode buffer or None */
     {
+        // TODO: Raise DeprecationWarning
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
         Py_UNICODE **p = va_arg(*p_va, Py_UNICODE **);
 
         if (*format == '#') {
@@ -1066,6 +1069,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
                                   arg, msgbuf, bufsize);
         }
         break;
+_Py_COMP_DIAG_POP
     }
 
     case 'e': {/* encoded string */



More information about the Python-checkins mailing list