https://github.com/python/cpython/commit/36f341ca3ecd5f0d54073c6dbfa82b95d84... commit: 36f341ca3ecd5f0d54073c6dbfa82b95d843cab8 branch: main author: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> committer: encukou <encukou@gmail.com> date: 2025-01-21T11:45:53+01:00 summary: gh-127787: allow retrieving the clipped slice length in `_PyUnicodeError_GetParams` (GH-128980) files: M Include/internal/pycore_pyerrors.h M Objects/exceptions.c diff --git a/Include/internal/pycore_pyerrors.h b/Include/internal/pycore_pyerrors.h index 8dea2d34117430..fa7d9ee36d095d 100644 --- a/Include/internal/pycore_pyerrors.h +++ b/Include/internal/pycore_pyerrors.h @@ -196,9 +196,9 @@ extern int _PyUnicodeError_GetParams( Py_ssize_t *objlen, Py_ssize_t *start, Py_ssize_t *end, + Py_ssize_t *slen, int as_bytes); - #ifdef __cplusplus } #endif diff --git a/Objects/exceptions.c b/Objects/exceptions.c index d23b7f7c76c3e7..20fe55d2cc2955 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2954,8 +2954,10 @@ unicode_error_set_end_impl(PyObject *self, Py_ssize_t end) * The 'start' can be negative or not, but when adjusting the value, * we clip it in [0, max(0, objlen - 1)] and do not interpret it as * a relative offset. + * + * This function always succeeds. */ -static inline Py_ssize_t +static Py_ssize_t unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen) { assert(objlen >= 0); @@ -2969,14 +2971,34 @@ unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen) } +/* Assert some properties of the adjusted 'start' value. */ +#ifndef NDEBUG +static void +assert_adjusted_unicode_error_start(Py_ssize_t start, Py_ssize_t objlen) +{ + assert(objlen >= 0); + /* in the future, `min_start` may be something else */ + Py_ssize_t min_start = 0; + assert(start >= min_start); + /* in the future, `max_start` may be something else */ + Py_ssize_t max_start = Py_MAX(min_start, objlen - 1); + assert(start <= max_start); +} +#else +#define assert_adjusted_unicode_error_start(...) +#endif + + /* * Adjust the (exclusive) 'end' value of a UnicodeError object. * * The 'end' can be negative or not, but when adjusting the value, * we clip it in [min(1, objlen), max(min(1, objlen), objlen)] and * do not interpret it as a relative offset. + * + * This function always succeeds. */ -static inline Py_ssize_t +static Py_ssize_t unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) { assert(objlen >= 0); @@ -2990,6 +3012,59 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) } +/* Assert some properties of the adjusted 'end' value. */ +#ifndef NDEBUG +static void +assert_adjusted_unicode_error_end(Py_ssize_t end, Py_ssize_t objlen) +{ + assert(objlen >= 0); + /* in the future, `min_end` may be something else */ + Py_ssize_t min_end = Py_MIN(1, objlen); + assert(end >= min_end); + /* in the future, `max_end` may be something else */ + Py_ssize_t max_end = Py_MAX(min_end, objlen); + assert(end <= max_end); +} +#else +#define assert_adjusted_unicode_error_end(...) +#endif + + +/* + * Adjust the length of the range described by a UnicodeError object. + * + * The 'start' and 'end' arguments must have been obtained by + * unicode_error_adjust_start() and unicode_error_adjust_end(). + * + * The result is clipped in [0, objlen]. By construction, it + * will always be smaller than 'objlen' as 'start' and 'end' + * are smaller than 'objlen'. + */ +static Py_ssize_t +unicode_error_adjust_len(Py_ssize_t start, Py_ssize_t end, Py_ssize_t objlen) +{ + assert_adjusted_unicode_error_start(start, objlen); + assert_adjusted_unicode_error_end(end, objlen); + Py_ssize_t ranlen = end - start; + assert(ranlen <= objlen); + return ranlen < 0 ? 0 : ranlen; +} + + +/* Assert some properties of the adjusted range 'len' value. */ +#ifndef NDEBUG +static void +assert_adjusted_unicode_error_len(Py_ssize_t ranlen, Py_ssize_t objlen) +{ + assert(objlen >= 0); + assert(ranlen >= 0); + assert(ranlen <= objlen); +} +#else +#define assert_adjusted_unicode_error_len(...) +#endif + + /* * Get various common parameters of a UnicodeError object. * @@ -3004,22 +3079,24 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) * objlen The 'object' length. * start The clipped 'start' attribute. * end The clipped 'end' attribute. + * slen The length of the slice described by the clipped 'start' + * and 'end' values. It always lies in [0, objlen]. * * An output parameter can be NULL to indicate that * the corresponding value does not need to be stored. * * Input parameter: * - * as_bytes If 1, the error's 'object' attribute must be a bytes object, - * i.e. the call is for a `UnicodeDecodeError`. Otherwise, the - * 'object' attribute must be a string. + * as_bytes If true, the error's 'object' attribute must be a `bytes`, + * i.e. 'self' is a `UnicodeDecodeError` instance. Otherwise, + * the 'object' attribute must be a string. * * A TypeError is raised if the 'object' type is incompatible. */ int _PyUnicodeError_GetParams(PyObject *self, PyObject **obj, Py_ssize_t *objlen, - Py_ssize_t *start, Py_ssize_t *end, + Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t *slen, int as_bytes) { assert(self != NULL); @@ -3034,16 +3111,30 @@ _PyUnicodeError_GetParams(PyObject *self, if (objlen != NULL) { *objlen = n; } + + Py_ssize_t start_value = -1; + if (start != NULL || slen != NULL) { + start_value = unicode_error_adjust_start(exc->start, n); + } if (start != NULL) { - *start = unicode_error_adjust_start(exc->start, n); - assert(*start >= 0); - assert(*start <= n); + assert_adjusted_unicode_error_start(start_value, n); + *start = start_value; + } + + Py_ssize_t end_value = -1; + if (end != NULL || slen != NULL) { + end_value = unicode_error_adjust_end(exc->end, n); } if (end != NULL) { - *end = unicode_error_adjust_end(exc->end, n); - assert(*end >= 0); - assert(*end <= n); + assert_adjusted_unicode_error_end(end_value, n); + *end = end_value; + } + + if (slen != NULL) { + *slen = unicode_error_adjust_len(start_value, end_value, n); + assert_adjusted_unicode_error_len(*slen, n); } + if (obj != NULL) { *obj = r; } @@ -3111,7 +3202,9 @@ static inline int unicode_error_get_start_impl(PyObject *self, Py_ssize_t *start, int as_bytes) { assert(self != NULL); - return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, as_bytes); + return _PyUnicodeError_GetParams(self, NULL, NULL, + start, NULL, NULL, + as_bytes); } @@ -3177,7 +3270,9 @@ static inline int unicode_error_get_end_impl(PyObject *self, Py_ssize_t *end, int as_bytes) { assert(self != NULL); - return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, as_bytes); + return _PyUnicodeError_GetParams(self, NULL, NULL, + NULL, end, NULL, + as_bytes); }