https://github.com/python/cpython/commit/02e6bf7f2025cddcbde6432f6b6396198ab... commit: 02e6bf7f2025cddcbde6432f6b6396198ab313f4 branch: master author: Victor Stinner <vstinner@redhat.com> committer: GitHub <noreply@github.com> date: 2018-11-20T16:20:16+01:00 summary: bpo-28604: Fix localeconv() for different LC_MONETARY (GH-10606) locale.localeconv() now sets temporarily the LC_CTYPE locale to the LC_MONETARY locale if the two locales are different and monetary strings are non-ASCII. This temporary change affects other threads. Changes: * locale.localeconv() can now set LC_CTYPE to LC_MONETARY to decode monetary fields. * Add LocaleInfo.grouping_buffer: copy localeconv() grouping string since it can be replaced anytime if a different thread calls localeconv(). * _Py_GetLocaleconvNumeric() now requires a "struct lconv *" structure, so locale.localeconv() now longer calls localeconv() twice. Moreover, the function now requires all arguments to be non-NULL. * Rename STATIC_LOCALE_INFO_INIT to LocaleInfo_STATIC_INIT. * Move _Py_GetLocaleconvNumeric() definition from fileutils.h to pycore_fileutils.h. pycore_fileutils.h now includes locale.h. * The _locale module is now built with Py_BUILD_CORE defined. files: A Misc/NEWS.d/next/Library/2018-11-20-13-34-01.bpo-28604.iiih5h.rst M Doc/library/locale.rst M Include/fileutils.h M Include/internal/pycore_fileutils.h M Modules/Setup M Modules/_localemodule.c M Python/fileutils.c M Python/formatter_unicode.c diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index 2fd44fe8e90a..bf57a0835591 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -148,10 +148,8 @@ The :mod:`locale` module defines the following exception and functions: +--------------+-----------------------------------------+ The function sets temporarily the ``LC_CTYPE`` locale to the ``LC_NUMERIC`` - locale to decode ``decimal_point`` and ``thousands_sep`` byte strings if - they are non-ASCII or longer than 1 byte, and the ``LC_NUMERIC`` locale is - different than the ``LC_CTYPE`` locale. This temporary change affects other - threads. + locale or the ``LC_MONETARY`` locale if locales are different and numeric or + monetary strings are non-ASCII. This temporary change affects other threads. .. versionchanged:: 3.7 The function now sets temporarily the ``LC_CTYPE`` locale to the diff --git a/Include/fileutils.h b/Include/fileutils.h index fdd60fffcd55..830e56ad367a 100644 --- a/Include/fileutils.h +++ b/Include/fileutils.h @@ -170,11 +170,6 @@ PyAPI_FUNC(int) _Py_get_blocking(int fd); PyAPI_FUNC(int) _Py_set_blocking(int fd, int blocking); #endif /* !MS_WINDOWS */ -PyAPI_FUNC(int) _Py_GetLocaleconvNumeric( - PyObject **decimal_point, - PyObject **thousands_sep, - const char **grouping); - #endif /* Py_LIMITED_API */ #ifdef __cplusplus diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h index d577e099d1f5..98eb258fe61e 100644 --- a/Include/internal/pycore_fileutils.h +++ b/Include/internal/pycore_fileutils.h @@ -8,6 +8,8 @@ extern "C" { # error "Py_BUILD_CORE must be defined to include this header" #endif +#include <locale.h> /* struct lconv */ + PyAPI_FUNC(int) _Py_DecodeUTF8Ex( const char *arg, Py_ssize_t arglen, @@ -30,6 +32,11 @@ PyAPI_FUNC(wchar_t*) _Py_DecodeUTF8_surrogateescape( PyAPI_FUNC(int) _Py_GetForceASCII(void); +PyAPI_FUNC(int) _Py_GetLocaleconvNumeric( + struct lconv *lc, + PyObject **decimal_point, + PyObject **thousands_sep); + #ifdef __cplusplus } #endif diff --git a/Misc/NEWS.d/next/Library/2018-11-20-13-34-01.bpo-28604.iiih5h.rst b/Misc/NEWS.d/next/Library/2018-11-20-13-34-01.bpo-28604.iiih5h.rst new file mode 100644 index 000000000000..289e484c35d6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-11-20-13-34-01.bpo-28604.iiih5h.rst @@ -0,0 +1,3 @@ +:func:`locale.localeconv` now sets temporarily the ``LC_CTYPE`` locale to the +``LC_MONETARY`` locale if the two locales are different and monetary strings +are non-ASCII. This temporary change affects other threads. diff --git a/Modules/Setup b/Modules/Setup index e7b939d55182..11ddd0c7b202 100644 --- a/Modules/Setup +++ b/Modules/Setup @@ -120,7 +120,7 @@ time -DPy_BUILD_CORE -I$(srcdir)/Include/internal timemodule.c # -lm # time oper _thread -DPy_BUILD_CORE -I$(srcdir)/Include/internal _threadmodule.c # low-level threading interface # access to ISO C locale support -_locale _localemodule.c # -lintl +_locale -DPy_BUILD_CORE _localemodule.c # -lintl # Standard I/O baseline _io -DPy_BUILD_CORE -I$(srcdir)/Include/internal -I$(srcdir)/Modules/_io _io/_iomodule.c _io/iobase.c _io/fileio.c _io/bytesio.c _io/bufferedio.c _io/textio.c _io/stringio.c diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 3fdbc5ea8122..4202cc401414 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -11,6 +11,7 @@ This software comes with no warranty. Use at your own risk. #define PY_SSIZE_T_CLEAN #include "Python.h" +#include "pycore_fileutils.h" #include <stdio.h> #include <locale.h> @@ -128,6 +129,82 @@ PyLocale_setlocale(PyObject* self, PyObject* args) return result_object; } +static int +locale_is_ascii(const char *str) +{ + return (strlen(str) == 1 && ((unsigned char)str[0]) <= 127); +} + +static int +locale_decode_monetary(PyObject *dict, struct lconv *lc) +{ + int change_locale; + change_locale = (!locale_is_ascii(lc->int_curr_symbol) + || !locale_is_ascii(lc->currency_symbol) + || !locale_is_ascii(lc->mon_decimal_point) + || !locale_is_ascii(lc->mon_thousands_sep)); + + /* Keep a copy of the LC_CTYPE locale */ + char *oldloc = NULL, *loc = NULL; + if (change_locale) { + oldloc = setlocale(LC_CTYPE, NULL); + if (!oldloc) { + PyErr_SetString(PyExc_RuntimeWarning, + "failed to get LC_CTYPE locale"); + return -1; + } + + oldloc = _PyMem_Strdup(oldloc); + if (!oldloc) { + PyErr_NoMemory(); + return -1; + } + + loc = setlocale(LC_MONETARY, NULL); + if (loc != NULL && strcmp(loc, oldloc) == 0) { + loc = NULL; + } + + if (loc != NULL) { + /* Only set the locale temporarily the LC_CTYPE locale + to the LC_MONETARY locale if the two locales are different and + at least one string is non-ASCII. */ + setlocale(LC_CTYPE, loc); + } + } + + int res = -1; + +#define RESULT_STRING(ATTR) \ + do { \ + PyObject *obj; \ + obj = PyUnicode_DecodeLocale(lc->ATTR, NULL); \ + if (obj == NULL) { \ + goto done; \ + } \ + if (PyDict_SetItemString(dict, Py_STRINGIFY(ATTR), obj) < 0) { \ + Py_DECREF(obj); \ + goto done; \ + } \ + Py_DECREF(obj); \ + } while (0) + + RESULT_STRING(int_curr_symbol); + RESULT_STRING(currency_symbol); + RESULT_STRING(mon_decimal_point); + RESULT_STRING(mon_thousands_sep); +#undef RESULT_STRING + + res = 0; + +done: + if (loc != NULL) { + setlocale(LC_CTYPE, oldloc); + } + PyMem_Free(oldloc); + return res; +} + PyDoc_STRVAR(localeconv__doc__, "() -> dict. Returns numeric and monetary locale-specific parameters."); @@ -135,7 +212,7 @@ static PyObject* PyLocale_localeconv(PyObject* self, PyObject *Py_UNUSED(ignored)) { PyObject* result; - struct lconv *l; + struct lconv *lc; PyObject *x; result = PyDict_New(); @@ -144,7 +221,7 @@ PyLocale_localeconv(PyObject* self, PyObject *Py_UNUSED(ignored)) } /* if LC_NUMERIC is different in the C library, use saved value */ - l = localeconv(); + lc = localeconv(); /* hopefully, the localeconv result survives the C library calls involved herein */ @@ -162,22 +239,21 @@ PyLocale_localeconv(PyObject* self, PyObject *Py_UNUSED(ignored)) #define RESULT_STRING(s)\ do { \ - x = PyUnicode_DecodeLocale(l->s, NULL); \ + x = PyUnicode_DecodeLocale(lc->s, NULL); \ RESULT(#s, x); \ } while (0) #define RESULT_INT(i)\ do { \ - x = PyLong_FromLong(l->i); \ + x = PyLong_FromLong(lc->i); \ RESULT(#i, x); \ } while (0) - /* Monetary information */ - RESULT_STRING(int_curr_symbol); - RESULT_STRING(currency_symbol); - RESULT_STRING(mon_decimal_point); - RESULT_STRING(mon_thousands_sep); - x = copy_grouping(l->mon_grouping); + /* Monetary information: LC_MONETARY encoding */ + if (locale_decode_monetary(result, lc) < 0) { + goto failed; + } + x = copy_grouping(lc->mon_grouping); RESULT("mon_grouping", x); RESULT_STRING(positive_sign); @@ -191,12 +267,9 @@ PyLocale_localeconv(PyObject* self, PyObject *Py_UNUSED(ignored)) RESULT_INT(p_sign_posn); RESULT_INT(n_sign_posn); - /* Numeric information */ + /* Numeric information: LC_NUMERIC encoding */ PyObject *decimal_point, *thousands_sep; - const char *grouping; - if (_Py_GetLocaleconvNumeric(&decimal_point, - &thousands_sep, - &grouping) < 0) { + if (_Py_GetLocaleconvNumeric(lc, &decimal_point, &thousands_sep) < 0) { goto failed; } @@ -213,7 +286,7 @@ PyLocale_localeconv(PyObject* self, PyObject *Py_UNUSED(ignored)) } Py_DECREF(thousands_sep); - x = copy_grouping(grouping); + x = copy_grouping(lc->grouping); RESULT("grouping", x); return result; @@ -221,6 +294,10 @@ PyLocale_localeconv(PyObject* self, PyObject *Py_UNUSED(ignored)) failed: Py_DECREF(result); return NULL; + +#undef RESULT +#undef RESULT_STRING +#undef RESULT_INT } #if defined(HAVE_WCSCOLL) diff --git a/Python/fileutils.c b/Python/fileutils.c index c9a8e58dd122..033c2ff71b91 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -1868,22 +1868,17 @@ _Py_set_blocking(int fd, int blocking) int -_Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep, - const char **grouping) +_Py_GetLocaleconvNumeric(struct lconv *lc, + PyObject **decimal_point, PyObject **thousands_sep) { - int res = -1; - - struct lconv *lc = localeconv(); + assert(decimal_point != NULL); + assert(thousands_sep != NULL); int change_locale = 0; - if (decimal_point != NULL && - (strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) - { + if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) { change_locale = 1; } - if (thousands_sep != NULL && - (strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) - { + if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) { change_locale = 1; } @@ -1892,7 +1887,8 @@ _Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep, if (change_locale) { oldloc = setlocale(LC_CTYPE, NULL); if (!oldloc) { - PyErr_SetString(PyExc_RuntimeWarning, "faild to get LC_CTYPE locale"); + PyErr_SetString(PyExc_RuntimeWarning, + "failed to get LC_CTYPE locale"); return -1; } @@ -1908,7 +1904,7 @@ _Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep, } if (loc != NULL) { - /* Only set the locale temporarilty the LC_CTYPE locale + /* Only set the locale temporarily the LC_CTYPE locale if LC_NUMERIC locale is different than LC_CTYPE locale and decimal_point and/or thousands_sep are non-ASCII or longer than 1 byte */ @@ -1916,26 +1912,21 @@ _Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep, } } - if (decimal_point != NULL) { - *decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL); - if (*decimal_point == NULL) { - goto error; - } - } - if (thousands_sep != NULL) { - *thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL); - if (*thousands_sep == NULL) { - goto error; - } + int res = -1; + + *decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL); + if (*decimal_point == NULL) { + goto done; } - if (grouping != NULL) { - *grouping = lc->grouping; + *thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL); + if (*thousands_sep == NULL) { + goto done; } res = 0; -error: +done: if (loc != NULL) { setlocale(LC_CTYPE, oldloc); } diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index ba09cc67becf..e12ba49bd2c1 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -3,6 +3,7 @@ of int.__float__, etc., that take and return unicode objects */ #include "Python.h" +#include "pycore_fileutils.h" #include <locale.h> /* Raises an exception about an unknown presentation type for this @@ -396,9 +397,10 @@ typedef struct { PyObject *decimal_point; PyObject *thousands_sep; const char *grouping; + char *grouping_buffer; } LocaleInfo; -#define STATIC_LOCALE_INFO_INIT {0, 0, 0} +#define LocaleInfo_STATIC_INIT {0, 0, 0, 0} /* describes the layout for an integer, see the comment in calc_number_widths() for details */ @@ -705,11 +707,22 @@ get_locale_info(enum LocaleType type, LocaleInfo *locale_info) { switch (type) { case LT_CURRENT_LOCALE: { - if (_Py_GetLocaleconvNumeric(&locale_info->decimal_point, - &locale_info->thousands_sep, - &locale_info->grouping) < 0) { + struct lconv *lc = localeconv(); + if (_Py_GetLocaleconvNumeric(lc, + &locale_info->decimal_point, + &locale_info->thousands_sep) < 0) { return -1; } + + /* localeconv() grouping can become a dangling pointer or point + to a different string if another thread calls localeconv() during + the string formatting. Copy the string to avoid this risk. */ + locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping); + if (locale_info->grouping_buffer == NULL) { + PyErr_NoMemory(); + return -1; + } + locale_info->grouping = locale_info->grouping_buffer; break; } case LT_DEFAULT_LOCALE: @@ -743,6 +756,7 @@ free_locale_info(LocaleInfo *locale_info) { Py_XDECREF(locale_info->decimal_point); Py_XDECREF(locale_info->thousands_sep); + PyMem_Free(locale_info->grouping_buffer); } /************************************************************************/ @@ -855,7 +869,7 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ - LocaleInfo locale = STATIC_LOCALE_INFO_INIT; + LocaleInfo locale = LocaleInfo_STATIC_INIT; /* no precision allowed on integers */ if (format->precision != -1) { @@ -1027,7 +1041,7 @@ format_float_internal(PyObject *value, /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ - LocaleInfo locale = STATIC_LOCALE_INFO_INIT; + LocaleInfo locale = LocaleInfo_STATIC_INIT; if (format->precision > INT_MAX) { PyErr_SetString(PyExc_ValueError, "precision too big"); @@ -1190,7 +1204,7 @@ format_complex_internal(PyObject *value, /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ - LocaleInfo locale = STATIC_LOCALE_INFO_INIT; + LocaleInfo locale = LocaleInfo_STATIC_INIT; if (format->precision > INT_MAX) { PyErr_SetString(PyExc_ValueError, "precision too big");