[Python-checkins] gh-89653: Use int type for Unicode kind (#92704)

vstinner webhook-mailer at python.org
Fri May 13 06:41:16 EDT 2022


https://github.com/python/cpython/commit/f62ad4f2c4214fdc05cc45c27a5c068553c7942c
commit: f62ad4f2c4214fdc05cc45c27a5c068553c7942c
branch: main
author: Victor Stinner <vstinner at python.org>
committer: vstinner <vstinner at python.org>
date: 2022-05-13T12:41:05+02:00
summary:

gh-89653: Use int type for Unicode kind (#92704)

Use the same type that PyUnicode_FromKindAndData() kind parameter
type (public C API): int.

files:
M Include/cpython/unicodeobject.h
M Modules/_csv.c
M Modules/_datetimemodule.c
M Modules/_decimal/_decimal.c
M Modules/_elementtree.c
M Modules/_operator.c
M Modules/_pickle.c
M Modules/pyexpat.c
M Objects/bytesobject.c
M Objects/longobject.c
M Objects/stringlib/localeutil.h
M Objects/unicodeobject.c
M Python/formatter_unicode.c

diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
index 030614e263344d..16db2cb7bffb99 100644
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -507,7 +507,7 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
 typedef struct {
     PyObject *buffer;
     void *data;
-    enum PyUnicode_Kind kind;
+    int kind;
     Py_UCS4 maxchar;
     Py_ssize_t size;
     Py_ssize_t pos;
@@ -566,7 +566,7 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
    macro instead. */
 PyAPI_FUNC(int)
 _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
-                                     enum PyUnicode_Kind kind);
+                                     int kind);
 
 /* Append a Unicode character.
    Return 0 on success, raise an exception and return -1 on error. */
diff --git a/Modules/_csv.c b/Modules/_csv.c
index cbf4c5de51968e..d34d0a1296ae72 100644
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@@ -868,7 +868,7 @@ Reader_iternext(ReaderObj *self)
     PyObject *fields = NULL;
     Py_UCS4 c;
     Py_ssize_t pos, linelen;
-    unsigned int kind;
+    int kind;
     const void *data;
     PyObject *lineobj;
 
@@ -1066,7 +1066,7 @@ join_reset(WriterObj *self)
  * record length.
  */
 static Py_ssize_t
-join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data,
+join_append_data(WriterObj *self, int field_kind, const void *field_data,
                  Py_ssize_t field_len, int *quoted,
                  int copy_phase)
 {
@@ -1179,7 +1179,7 @@ join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
 static int
 join_append(WriterObj *self, PyObject *field, int quoted)
 {
-    unsigned int field_kind = -1;
+    int field_kind = -1;
     const void *field_data = NULL;
     Py_ssize_t field_len = 0;
     Py_ssize_t rec_len;
@@ -1211,7 +1211,7 @@ static int
 join_append_lineterminator(WriterObj *self)
 {
     Py_ssize_t terminator_len, i;
-    unsigned int term_kind;
+    int term_kind;
     const void *term_data;
 
     terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c
index e0bb4ee602c425..06dff8fdd9eed9 100644
--- a/Modules/_datetimemodule.c
+++ b/Modules/_datetimemodule.c
@@ -5284,7 +5284,7 @@ _sanitize_isoformat_str(PyObject *dtstr)
     //
     // The result of this, if not NULL, returns a new reference
     const void* const unicode_data = PyUnicode_DATA(dtstr);
-    const unsigned int kind = PyUnicode_KIND(dtstr);
+    const int kind = PyUnicode_KIND(dtstr);
 
     // Depending on the format of the string, the separator can only ever be
     // in positions 7, 8 or 10. We'll check each of these for a surrogate and
diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c
index 5cbddac6232487..548bd601b0eb87 100644
--- a/Modules/_decimal/_decimal.c
+++ b/Modules/_decimal/_decimal.c
@@ -1918,7 +1918,7 @@ dec_dealloc(PyObject *dec)
 /******************************************************************************/
 
 Py_LOCAL_INLINE(int)
-is_space(enum PyUnicode_Kind kind, const void *data, Py_ssize_t pos)
+is_space(int kind, const void *data, Py_ssize_t pos)
 {
     Py_UCS4 ch = PyUnicode_READ(kind, data, pos);
     return Py_UNICODE_ISSPACE(ch);
@@ -1935,7 +1935,7 @@ is_space(enum PyUnicode_Kind kind, const void *data, Py_ssize_t pos)
 static char *
 numeric_as_ascii(PyObject *u, int strip_ws, int ignore_underscores)
 {
-    enum PyUnicode_Kind kind;
+    int kind;
     const void *data;
     Py_UCS4 ch;
     char *res, *cp;
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index 89f877f6e1279f..453e57a94f2c6b 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -1121,7 +1121,7 @@ checkpath(PyObject* tag)
     if (PyUnicode_Check(tag)) {
         const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
         const void *data = PyUnicode_DATA(tag);
-        unsigned int kind = PyUnicode_KIND(tag);
+        int kind = PyUnicode_KIND(tag);
         if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
                 PyUnicode_READ(kind, data, 1) == '}' || (
                 PyUnicode_READ(kind, data, 1) == '*' &&
diff --git a/Modules/_operator.c b/Modules/_operator.c
index 739ae5b229e1e0..1af4a4feb7728a 100644
--- a/Modules/_operator.c
+++ b/Modules/_operator.c
@@ -1230,9 +1230,6 @@ attrgetter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     /* prepare attr while checking args */
     for (idx = 0; idx < nattrs; ++idx) {
         PyObject *item = PyTuple_GET_ITEM(args, idx);
-        Py_ssize_t item_len;
-        const void *data;
-        unsigned int kind;
         int dot_count;
 
         if (!PyUnicode_Check(item)) {
@@ -1245,9 +1242,9 @@ attrgetter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
             Py_DECREF(attr);
             return NULL;
         }
-        item_len = PyUnicode_GET_LENGTH(item);
-        kind = PyUnicode_KIND(item);
-        data = PyUnicode_DATA(item);
+        Py_ssize_t item_len = PyUnicode_GET_LENGTH(item);
+        int kind = PyUnicode_KIND(item);
+        const void *data = PyUnicode_DATA(item);
 
         /* check whether the string is dotted */
         dot_count = 0;
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index 851feadd27df82..0adfa4103cc0d0 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -2579,7 +2579,7 @@ raw_unicode_escape(PyObject *obj)
     char *p;
     Py_ssize_t i, size;
     const void *data;
-    unsigned int kind;
+    int kind;
     _PyBytesWriter writer;
 
     if (PyUnicode_READY(obj))
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index ad8148adae98be..b8535dfe7115a1 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -1088,7 +1088,7 @@ PyUnknownEncodingHandler(void *encodingHandlerData,
     PyObject* u;
     int i;
     const void *data;
-    unsigned int kind;
+    int kind;
 
     if (PyErr_Occurred())
         return XML_STATUS_ERROR;
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index b429f7687f7f1e..1dfa1d57cf65a1 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -2396,7 +2396,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray)
 
     if (!PyUnicode_IS_ASCII(string)) {
         const void *data = PyUnicode_DATA(string);
-        unsigned int kind = PyUnicode_KIND(string);
+        int kind = PyUnicode_KIND(string);
         Py_ssize_t i;
 
         /* search for the first non-ASCII character */
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 78360a58facc63..7f60866d2eb280 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -1714,7 +1714,7 @@ long_to_decimal_string_internal(PyObject *aa,
     digit *pout, *pin, rem, tenpow;
     int negative;
     int d;
-    enum PyUnicode_Kind kind;
+    int kind;
 
     a = (PyLongObject *)aa;
     if (a == NULL || !PyLong_Check(a)) {
@@ -1904,7 +1904,7 @@ long_format_binary(PyObject *aa, int base, int alternate,
     PyObject *v = NULL;
     Py_ssize_t sz;
     Py_ssize_t size_a;
-    enum PyUnicode_Kind kind;
+    int kind;
     int negative;
     int bits;
 
diff --git a/Objects/stringlib/localeutil.h b/Objects/stringlib/localeutil.h
index bd16e0a172802f..d77715ec0de9ef 100644
--- a/Objects/stringlib/localeutil.h
+++ b/Objects/stringlib/localeutil.h
@@ -75,7 +75,7 @@ InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
 
     if (n_zeros) {
         *buffer_pos -= n_zeros;
-        enum PyUnicode_Kind kind = PyUnicode_KIND(writer->buffer);
+        int kind = PyUnicode_KIND(writer->buffer);
         void *data = PyUnicode_DATA(writer->buffer);
         unicode_fill(kind, data, '0', *buffer_pos, n_zeros);
     }
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index cc50fcd76791c2..ee3275793528cd 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -247,7 +247,7 @@ static inline PyObject* unicode_new_empty(void)
     } while (0)
 
 static inline void
-unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value,
+unicode_fill(int kind, void *data, Py_UCS4 value,
              Py_ssize_t start, Py_ssize_t length)
 {
     assert(0 <= start);
@@ -483,7 +483,7 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
     CHECK(PyUnicode_Check(op));
 
     PyASCIIObject *ascii = _PyASCIIObject_CAST(op);
-    unsigned int kind = ascii->state.kind;
+    int kind = ascii->state.kind;
 
     if (ascii->state.ascii == 1 && ascii->state.compact == 1) {
         CHECK(kind == PyUnicode_1BYTE_KIND);
@@ -612,7 +612,7 @@ backslashreplace(_PyBytesWriter *writer, char *str,
 {
     Py_ssize_t size, i;
     Py_UCS4 ch;
-    enum PyUnicode_Kind kind;
+    int kind;
     const void *data;
 
     kind = PyUnicode_KIND(unicode);
@@ -678,7 +678,7 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str,
 {
     Py_ssize_t size, i;
     Py_UCS4 ch;
-    enum PyUnicode_Kind kind;
+    int kind;
     const void *data;
 
     kind = PyUnicode_KIND(unicode);
@@ -1128,7 +1128,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
     PyObject *obj;
     PyCompactUnicodeObject *unicode;
     void *data;
-    enum PyUnicode_Kind kind;
+    int kind;
     int is_ascii;
     Py_ssize_t char_size;
     Py_ssize_t struct_size;
@@ -1268,7 +1268,7 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
                  PyObject *from, Py_ssize_t from_start,
                  Py_ssize_t how_many, int check_maxchar)
 {
-    unsigned int from_kind, to_kind;
+    int from_kind, to_kind;
     const void *from_data;
     void *to_data;
 
@@ -1663,7 +1663,7 @@ static void
 unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
                    const char *str, Py_ssize_t len)
 {
-    enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
+    int kind = PyUnicode_KIND(unicode);
     const void *data = PyUnicode_DATA(unicode);
     const char *end = str + len;
 
@@ -1950,7 +1950,7 @@ _PyUnicode_FromASCII(const char *buffer, Py_ssize_t size)
 }
 
 static Py_UCS4
-kind_maxchar_limit(unsigned int kind)
+kind_maxchar_limit(int kind)
 {
     switch (kind) {
     case PyUnicode_1BYTE_KIND:
@@ -2064,7 +2064,7 @@ PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size)
 Py_UCS4
 _PyUnicode_FindMaxChar(PyObject *unicode, Py_ssize_t start, Py_ssize_t end)
 {
-    enum PyUnicode_Kind kind;
+    int kind;
     const void *startptr, *endptr;
 
     assert(0 <= start);
@@ -2105,7 +2105,7 @@ unicode_adjust_maxchar(PyObject **p_unicode)
     PyObject *unicode, *copy;
     Py_UCS4 max_char;
     Py_ssize_t len;
-    unsigned int kind;
+    int kind;
 
     assert(p_unicode != NULL);
     unicode = *p_unicode;
@@ -2170,7 +2170,7 @@ _PyUnicode_Copy(PyObject *unicode)
    character. Return NULL on error. */
 
 static void*
-unicode_askind(unsigned int skind, void const *data, Py_ssize_t len, unsigned int kind)
+unicode_askind(int skind, void const *data, Py_ssize_t len, int kind)
 {
     void *result;
 
@@ -4984,7 +4984,7 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
         return PyBytes_FromStringAndSize(PyUnicode_UTF8(unicode),
                                          PyUnicode_UTF8_LENGTH(unicode));
 
-    enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
+    int kind = PyUnicode_KIND(unicode);
     const void *data = PyUnicode_DATA(unicode);
     Py_ssize_t size = PyUnicode_GET_LENGTH(unicode);
 
@@ -5020,7 +5020,7 @@ unicode_fill_utf8(PyObject *unicode)
     /* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
     assert(!PyUnicode_IS_ASCII(unicode));
 
-    enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
+    int kind = PyUnicode_KIND(unicode);
     const void *data = PyUnicode_DATA(unicode);
     Py_ssize_t size = PyUnicode_GET_LENGTH(unicode);
 
@@ -5155,7 +5155,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s,
         Py_UCS4 maxch = PyUnicode_MAX_CHAR_VALUE(writer.buffer);
 
         if (e - q >= 4) {
-            enum PyUnicode_Kind kind = writer.kind;
+            int kind = writer.kind;
             void *data = writer.data;
             const unsigned char *last = e - 4;
             Py_ssize_t pos = writer.pos;
@@ -5240,7 +5240,7 @@ _PyUnicode_EncodeUTF32(PyObject *str,
                        const char *errors,
                        int byteorder)
 {
-    enum PyUnicode_Kind kind;
+    int kind;
     const void *data;
     Py_ssize_t len;
     PyObject *v;
@@ -5557,7 +5557,7 @@ _PyUnicode_EncodeUTF16(PyObject *str,
                        const char *errors,
                        int byteorder)
 {
-    enum PyUnicode_Kind kind;
+    int kind;
     const void *data;
     Py_ssize_t len;
     PyObject *v;
@@ -6022,7 +6022,7 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
     Py_ssize_t i, len;
     PyObject *repr;
     char *p;
-    enum PyUnicode_Kind kind;
+    int kind;
     const void *data;
     Py_ssize_t expandsize;
 
@@ -7365,7 +7365,7 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
         }
         else {
             Py_ssize_t i;
-            enum PyUnicode_Kind kind;
+            int kind;
             const void *data;
 
             outsize = PyUnicode_GET_LENGTH(rep);
@@ -7504,7 +7504,7 @@ charmap_decode_string(const char *s,
     Py_ssize_t startinpos, endinpos;
     PyObject *errorHandler = NULL, *exc = NULL;
     Py_ssize_t maplen;
-    enum PyUnicode_Kind mapkind;
+    int mapkind;
     const void *mapdata;
     Py_UCS4 x;
     unsigned char ch;
@@ -7542,7 +7542,7 @@ charmap_decode_string(const char *s,
 
     while (s < e) {
         if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) {
-            enum PyUnicode_Kind outkind = writer->kind;
+            int outkind = writer->kind;
             const Py_UCS2 *mapdata_ucs2 = (const Py_UCS2 *)mapdata;
             if (outkind == PyUnicode_1BYTE_KIND) {
                 Py_UCS1 *outdata = (Py_UCS1 *)writer->data;
@@ -8061,7 +8061,7 @@ charmap_encoding_error(
     PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
     Py_ssize_t size, repsize;
     Py_ssize_t newpos;
-    enum PyUnicode_Kind kind;
+    int kind;
     const void *data;
     Py_ssize_t index;
     /* startpos for collecting unencodable chars */
@@ -9422,7 +9422,7 @@ _PyUnicode_JoinArray(PyObject *separator, PyObject *const *items, Py_ssize_t seq
     int use_memcpy;
     unsigned char *res_data = NULL, *sep_data = NULL;
     PyObject *last_obj;
-    unsigned int kind = 0;
+    int kind = 0;
 
     /* If empty sequence, return u"". */
     if (seqlen == 0) {
@@ -9581,7 +9581,7 @@ void
 _PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
                     Py_UCS4 fill_char)
 {
-    const enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
+    const int kind = PyUnicode_KIND(unicode);
     void *data = PyUnicode_DATA(unicode);
     assert(unicode_modifiable(unicode));
     assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode));
@@ -11049,7 +11049,7 @@ static PyObject *
 unicode_getitem(PyObject *self, Py_ssize_t index)
 {
     const void *data;
-    enum PyUnicode_Kind kind;
+    int kind;
     Py_UCS4 ch;
 
     if (!PyUnicode_Check(self)) {
@@ -12985,7 +12985,7 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
 
 int
 _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
-                                     enum PyUnicode_Kind kind)
+                                     int kind)
 {
     Py_UCS4 maxchar;
 
@@ -13444,7 +13444,7 @@ struct unicode_formatter_t {
     Py_ssize_t arglen, argidx;
     PyObject *dict;
 
-    enum PyUnicode_Kind fmtkind;
+    int fmtkind;
     Py_ssize_t fmtcnt, fmtpos;
     const void *fmtdata;
     PyObject *fmtstr;
@@ -14106,7 +14106,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx,
                           PyObject *str)
 {
     Py_ssize_t len;
-    enum PyUnicode_Kind kind;
+    int kind;
     const void *pbuf;
     Py_ssize_t pindex;
     Py_UCS4 signchar;
@@ -14422,7 +14422,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
     PyObject *self;
     Py_ssize_t length, char_size;
     int share_utf8;
-    unsigned int kind;
+    int kind;
     void *data;
 
     assert(PyType_IsSubtype(type, &PyUnicode_Type));
diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c
index 04d37c0be28cdd..38e5f69bfb4289 100644
--- a/Python/formatter_unicode.c
+++ b/Python/formatter_unicode.c
@@ -608,7 +608,7 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
 {
     /* Used to keep track of digits, decimal, and remainder. */
     Py_ssize_t d_pos = d_start;
-    const unsigned int kind = writer->kind;
+    const int kind = writer->kind;
     const void *data = writer->data;
     Py_ssize_t r;
 
@@ -1215,7 +1215,7 @@ format_complex_internal(PyObject *value,
     int flags = 0;
     int result = -1;
     Py_UCS4 maxchar = 127;
-    enum PyUnicode_Kind rkind;
+    int rkind;
     void *rdata;
     Py_UCS4 re_sign_char = '\0';
     Py_UCS4 im_sign_char = '\0';



More information about the Python-checkins mailing list