Mailman 3 October 2020 - Python-checkins

bpo-42208: Add _Py_GetLocaleEncoding() (GH-23050)
by vstinner 31 Oct '20

31 Oct '20

https://github.com/python/cpython/commit/710e82630775774dceba5e8f24b1b10e6d… commit: 710e82630775774dceba5e8f24b1b10e6dfaf9b7 branch: master author: Victor Stinner <vstinner(a)python.org> committer: vstinner <vstinner(a)python.org> date: 2020-10-31T01:02:09+01:00 summary: bpo-42208: Add _Py_GetLocaleEncoding() (GH-23050) _io.TextIOWrapper no longer calls getpreferredencoding(False) of _bootlocale to get the locale encoding, but calls _Py_GetLocaleEncoding() instead. Add config_get_fs_encoding() sub-function. Reorganize also config_get_locale_encoding() code. files: M Include/internal/pycore_fileutils.h M Modules/_io/_iomodule.c M Modules/_io/_iomodule.h M Modules/_io/textio.c M Python/fileutils.c M Python/initconfig.c diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h index 9cb5fc66ee2e0..ff7bc4874c797 100644 --- a/Include/internal/pycore_fileutils.h +++ b/Include/internal/pycore_fileutils.h @@ -50,6 +50,8 @@ PyAPI_FUNC(int) _Py_GetLocaleconvNumeric( PyAPI_FUNC(void) _Py_closerange(int first, int last); +PyAPI_FUNC(PyObject*) _Py_GetLocaleEncoding(void); + #ifdef __cplusplus } #endif diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index e430352a48e21..9147648b243be 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -593,31 +593,6 @@ _PyIO_get_module_state(void) return state; } -PyObject * -_PyIO_get_locale_module(_PyIO_State *state) -{ - PyObject *mod; - if (state->locale_module != NULL) { - assert(PyWeakref_CheckRef(state->locale_module)); - mod = PyWeakref_GET_OBJECT(state->locale_module); - if (mod != Py_None) { - Py_INCREF(mod); - return mod; - } - Py_CLEAR(state->locale_module); - } - mod = PyImport_ImportModule("_bootlocale"); - if (mod == NULL) - return NULL; - state->locale_module = PyWeakref_NewRef(mod, NULL); - if (state->locale_module == NULL) { - Py_DECREF(mod); - return NULL; - } - return mod; -} - - static int iomodule_traverse(PyObject *mod, visitproc visit, void *arg) { _PyIO_State *state = get_io_state(mod); diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h index a8f3951e57feb..638797fd35736 100644 --- a/Modules/_io/_iomodule.h +++ b/Modules/_io/_iomodule.h @@ -150,7 +150,6 @@ typedef struct { #define IO_STATE() _PyIO_get_module_state() extern _PyIO_State *_PyIO_get_module_state(void); -extern PyObject *_PyIO_get_locale_module(_PyIO_State *); #ifdef MS_WINDOWS extern char _PyIO_get_console_type(PyObject *); diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 699b7e94c93bb..2078bb316b282 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -10,6 +10,7 @@ #include "Python.h" #include "pycore_interp.h" // PyInterpreterState.fs_codec #include "pycore_long.h" // _PyLong_GetZero() +#include "pycore_fileutils.h" // _Py_GetLocaleEncoding() #include "pycore_object.h" #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "structmember.h" // PyMemberDef @@ -27,7 +28,6 @@ _Py_IDENTIFIER(_dealloc_warn); _Py_IDENTIFIER(decode); _Py_IDENTIFIER(fileno); _Py_IDENTIFIER(flush); -_Py_IDENTIFIER(getpreferredencoding); _Py_IDENTIFIER(isatty); _Py_IDENTIFIER(mode); _Py_IDENTIFIER(name); @@ -1155,29 +1155,11 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, } } if (encoding == NULL && self->encoding == NULL) { - PyObject *locale_module = _PyIO_get_locale_module(state); - if (locale_module == NULL) - goto catch_ImportError; - self->encoding = _PyObject_CallMethodIdOneArg( - locale_module, &PyId_getpreferredencoding, Py_False); - Py_DECREF(locale_module); + self->encoding = _Py_GetLocaleEncoding(); if (self->encoding == NULL) { - catch_ImportError: - /* - Importing locale can raise an ImportError because of - _functools, and locale.getpreferredencoding can raise an - ImportError if _locale is not available. These will happen - during module building. - */ - if (PyErr_ExceptionMatches(PyExc_ImportError)) { - PyErr_Clear(); - self->encoding = PyUnicode_FromString("ascii"); - } - else - goto error; + goto error; } - else if (!PyUnicode_Check(self->encoding)) - Py_CLEAR(self->encoding); + assert(PyUnicode_Check(self->encoding)); } if (self->encoding != NULL) { encoding = PyUnicode_AsUTF8(self->encoding); diff --git a/Python/fileutils.c b/Python/fileutils.c index e125ba46c21ba..ba2690429f366 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -1,5 +1,6 @@ #include "Python.h" -#include "pycore_fileutils.h" +#include "pycore_fileutils.h" // fileutils definitions +#include "pycore_runtime.h" // _PyRuntime #include "osdefs.h" // SEP #include <locale.h> @@ -820,6 +821,46 @@ _Py_EncodeLocaleEx(const wchar_t *text, char **str, } +// Get the current locale encoding: locale.getpreferredencoding(False). +// See also config_get_locale_encoding() +PyObject * +_Py_GetLocaleEncoding(void) +{ +#ifdef _Py_FORCE_UTF8_LOCALE + // On Android langinfo.h and CODESET are missing, + // and UTF-8 is always used in mbstowcs() and wcstombs(). + return PyUnicode_FromString("UTF-8"); +#else + const PyPreConfig *preconfig = &_PyRuntime.preconfig; + if (preconfig->utf8_mode) { + return PyUnicode_FromString("UTF-8"); + } + +#if defined(MS_WINDOWS) + return PyUnicode_FromFormat("cp%u", GetACP()); +#else + const char *encoding = nl_langinfo(CODESET); + if (!encoding || encoding[0] == '\0') { +#ifdef _Py_FORCE_UTF8_FS_ENCODING + // nl_langinfo() can return an empty string when the LC_CTYPE locale is + // not supported. Default to UTF-8 in that case, because UTF-8 is the + // default charset on macOS. + encoding = "UTF-8"; +#else + PyErr_SetString(PyExc_ValueError, + "failed to get the locale encoding: " + "nl_langinfo(CODESET) returns an empty string"); + return NULL; +#endif + } + // Decode from UTF-8 + return PyUnicode_FromString(encoding); +#endif // !CODESET + +#endif +} + + #ifdef MS_WINDOWS static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */ diff --git a/Python/initconfig.c b/Python/initconfig.c index 6a13dc52ed776..e129278d8f8ad 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -766,7 +766,7 @@ config_set_bytes_string(PyConfig *config, wchar_t **config_str, configured. */ PyStatus PyConfig_SetBytesString(PyConfig *config, wchar_t **config_str, - const char *str) + const char *str) { return CONFIG_SET_BYTES_STR(config, config_str, str, "string"); } @@ -1466,8 +1466,13 @@ config_read_complex_options(PyConfig *config) static const wchar_t * -config_get_stdio_errors(void) +config_get_stdio_errors(const PyPreConfig *preconfig) { + if (preconfig->utf8_mode) { + /* UTF-8 Mode uses UTF-8/surrogateescape */ + return L"surrogateescape"; + } + #ifndef MS_WINDOWS const char *loc = setlocale(LC_CTYPE, NULL); if (loc != NULL) { @@ -1492,26 +1497,41 @@ config_get_stdio_errors(void) } +// See also _Py_GetLocaleEncoding() and config_get_fs_encoding() static PyStatus -config_get_locale_encoding(PyConfig *config, wchar_t **locale_encoding) +config_get_locale_encoding(PyConfig *config, const PyPreConfig *preconfig, + wchar_t **locale_encoding) { +#ifdef _Py_FORCE_UTF8_LOCALE + return PyConfig_SetString(config, locale_encoding, L"utf-8"); +#else + if (preconfig->utf8_mode) { + return PyConfig_SetString(config, locale_encoding, L"utf-8"); + } + #ifdef MS_WINDOWS char encoding[20]; PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP()); return PyConfig_SetBytesString(config, locale_encoding, encoding); -#elif defined(_Py_FORCE_UTF8_LOCALE) - return PyConfig_SetString(config, locale_encoding, L"utf-8"); #else const char *encoding = nl_langinfo(CODESET); if (!encoding || encoding[0] == '\0') { +#ifdef _Py_FORCE_UTF8_FS_ENCODING + // nl_langinfo() can return an empty string when the LC_CTYPE locale is + // not supported. Default to UTF-8 in that case, because UTF-8 is the + // default charset on macOS. + encoding = "UTF-8"; +#else return _PyStatus_ERR("failed to get the locale encoding: " - "nl_langinfo(CODESET) failed"); + "nl_langinfo(CODESET) returns an empty string"); +#endif } /* nl_langinfo(CODESET) is decoded by Py_DecodeLocale() */ return CONFIG_SET_BYTES_STR(config, locale_encoding, encoding, "nl_langinfo(CODESET)"); -#endif +#endif // !MS_WINDOWS +#endif // !_Py_FORCE_UTF8_LOCALE } @@ -1596,33 +1616,16 @@ config_init_stdio_encoding(PyConfig *config, PyMem_RawFree(pythonioencoding); } - /* UTF-8 Mode uses UTF-8/surrogateescape */ - if (preconfig->utf8_mode) { - if (config->stdio_encoding == NULL) { - status = PyConfig_SetString(config, &config->stdio_encoding, - L"utf-8"); - if (_PyStatus_EXCEPTION(status)) { - return status; - } - } - if (config->stdio_errors == NULL) { - status = PyConfig_SetString(config, &config->stdio_errors, - L"surrogateescape"); - if (_PyStatus_EXCEPTION(status)) { - return status; - } - } - } - /* Choose the default error handler based on the current locale. */ if (config->stdio_encoding == NULL) { - status = config_get_locale_encoding(config, &config->stdio_encoding); + status = config_get_locale_encoding(config, preconfig, + &config->stdio_encoding); if (_PyStatus_EXCEPTION(status)) { return status; } } if (config->stdio_errors == NULL) { - const wchar_t *errors = config_get_stdio_errors(); + const wchar_t *errors = config_get_stdio_errors(preconfig); assert(errors != NULL); status = PyConfig_SetString(config, &config->stdio_errors, errors); @@ -1635,46 +1638,46 @@ config_init_stdio_encoding(PyConfig *config, } +// See also config_get_locale_encoding() +static PyStatus +config_get_fs_encoding(PyConfig *config, const PyPreConfig *preconfig, + wchar_t **fs_encoding) +{ +#ifdef _Py_FORCE_UTF8_FS_ENCODING + return PyConfig_SetString(config, fs_encoding, L"utf-8"); +#elif defined(MS_WINDOWS) + const wchar_t *encoding; + if (preconfig->legacy_windows_fs_encoding) { + // Legacy Windows filesystem encoding: mbcs/replace + encoding = L"mbcs"; + } + else { + // Windows defaults to utf-8/surrogatepass (PEP 529) + encoding = L"utf-8"; + } + return PyConfig_SetString(config, fs_encoding, encoding); +#else // !MS_WINDOWS + if (preconfig->utf8_mode) { + return PyConfig_SetString(config, fs_encoding, L"utf-8"); + } + else if (_Py_GetForceASCII()) { + return PyConfig_SetString(config, fs_encoding, L"ascii"); + } + else { + return config_get_locale_encoding(config, preconfig, fs_encoding); + } +#endif // !MS_WINDOWS +} + + static PyStatus config_init_fs_encoding(PyConfig *config, const PyPreConfig *preconfig) { PyStatus status; if (config->filesystem_encoding == NULL) { -#ifdef _Py_FORCE_UTF8_FS_ENCODING - status = PyConfig_SetString(config, &config->filesystem_encoding, L"utf-8"); -#else - -#ifdef MS_WINDOWS - if (preconfig->legacy_windows_fs_encoding) { - /* Legacy Windows filesystem encoding: mbcs/replace */ - status = PyConfig_SetString(config, &config->filesystem_encoding, - L"mbcs"); - } - else -#endif - if (preconfig->utf8_mode) { - status = PyConfig_SetString(config, &config->filesystem_encoding, - L"utf-8"); - } -#ifndef MS_WINDOWS - else if (_Py_GetForceASCII()) { - status = PyConfig_SetString(config, &config->filesystem_encoding, - L"ascii"); - } -#endif - else { -#ifdef MS_WINDOWS - /* Windows defaults to utf-8/surrogatepass (PEP 529). */ - status = PyConfig_SetString(config, &config->filesystem_encoding, - L"utf-8"); -#else - status = config_get_locale_encoding(config, - &config->filesystem_encoding); -#endif - } -#endif /* !_Py_FORCE_UTF8_FS_ENCODING */ - + status = config_get_fs_encoding(config, preconfig, + &config->filesystem_encoding); if (_PyStatus_EXCEPTION(status)) { return status; }

1 0

bpo-42214: Fix check for NOTEQUAL token in the PEG parser for the barry_as_flufl rule (GH-23048)
by pablogsal 30 Oct '20

30 Oct '20

https://github.com/python/cpython/commit/06f8c3328dcd81c84d1ee2b3a57b5381dc… commit: 06f8c3328dcd81c84d1ee2b3a57b5381dcb38482 branch: master author: Pablo Galindo <Pablogsal(a)gmail.com> committer: pablogsal <Pablogsal(a)gmail.com> date: 2020-10-30T23:48:42Z summary: bpo-42214: Fix check for NOTEQUAL token in the PEG parser for the barry_as_flufl rule (GH-23048) files: A Misc/NEWS.d/next/Core and Builtins/2020-10-30-22-16-30.bpo-42214.lXskM_.rst M Grammar/python.gram M Lib/test/test_syntax.py M Parser/parser.c M Parser/pegen.c M Parser/pegen.h diff --git a/Grammar/python.gram b/Grammar/python.gram index b8da554b8ec99..ae5e4b5d4ca64 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -428,7 +428,7 @@ compare_op_bitwise_or_pair[CmpopExprPair*]: | is_bitwise_or eq_bitwise_or[CmpopExprPair*]: '==' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Eq, a) } noteq_bitwise_or[CmpopExprPair*]: - | (tok='!=' {_PyPegen_check_barry_as_flufl(p) ? NULL : tok}) a=bitwise_or {_PyPegen_cmpop_expr_pair(p, NotEq, a) } + | (tok='!=' { _PyPegen_check_barry_as_flufl(p, tok) ? NULL : tok}) a=bitwise_or {_PyPegen_cmpop_expr_pair(p, NotEq, a) } lte_bitwise_or[CmpopExprPair*]: '<=' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, LtE, a) } lt_bitwise_or[CmpopExprPair*]: '<' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Lt, a) } gte_bitwise_or[CmpopExprPair*]: '>=' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, GtE, a) } diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index c25b85246b919..e89d9401f2c39 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -955,6 +955,23 @@ def test_nested_named_except_blocks(self): code += f"{' '*4*12}pass" self._check_error(code, "too many statically nested blocks") + def test_barry_as_flufl_with_syntax_errors(self): + # The "barry_as_flufl" rule can produce some "bugs-at-a-distance" if + # is reading the wrong token in the presence of syntax errors later + # in the file. See bpo-42214 for more information. + code = """ +def func1(): + if a != b: + raise ValueError + +def func2(): + try + return 1 + finally: + pass +""" + self._check_error(code, "invalid syntax") + def test_main(): support.run_unittest(SyntaxTestCase) from test import test_syntax diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-10-30-22-16-30.bpo-42214.lXskM_.rst b/Misc/NEWS.d/next/Core and Builtins/2020-10-30-22-16-30.bpo-42214.lXskM_.rst new file mode 100644 index 0000000000000..3f85bbe83901a --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-10-30-22-16-30.bpo-42214.lXskM_.rst @@ -0,0 +1,2 @@ +Fixed a possible crash in the PEG parser when checking for the '!=' token in +the ``barry_as_flufl`` rule. Patch by Pablo Galindo. diff --git a/Parser/parser.c b/Parser/parser.c index a22cf2752d18d..a882a81344cc6 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -21288,7 +21288,7 @@ _tmp_93_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ _tmp_93[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!='")); - _res = _PyPegen_check_barry_as_flufl ( p ) ? NULL : tok; + _res = _PyPegen_check_barry_as_flufl ( p , tok ) ? NULL : tok; if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); diff --git a/Parser/pegen.c b/Parser/pegen.c index 216edd810e246..188fd282b7604 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -62,8 +62,7 @@ init_normalization(Parser *p) /* Checks if the NOTEQUAL token is valid given the current parser flags 0 indicates success and nonzero indicates failure (an exception may be set) */ int -_PyPegen_check_barry_as_flufl(Parser *p) { - Token *t = p->tokens[p->fill - 1]; +_PyPegen_check_barry_as_flufl(Parser *p, Token* t) { assert(t->bytes != NULL); assert(t->type == NOTEQUAL); diff --git a/Parser/pegen.h b/Parser/pegen.h index 841f1e5eb4396..f82a3a00b2ba0 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -263,7 +263,7 @@ expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_expr_seq *, asdl_seq *, int end_col_offset, PyArena *arena); expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *); asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *); -int _PyPegen_check_barry_as_flufl(Parser *); +int _PyPegen_check_barry_as_flufl(Parser *, Token *); mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *); // Error reporting helpers

1 0

GitHub Action: Add gdb to posix dependencies (GH-23043)
by miss-islington 30 Oct '20

30 Oct '20

https://github.com/python/cpython/commit/10260c737b19a99eab532fcccf2b0a1ffc… commit: 10260c737b19a99eab532fcccf2b0a1ffcb0805d branch: 3.8 author: Miss Skeleton (bot) <31488909+miss-islington(a)users.noreply.github.com> committer: miss-islington <31488909+miss-islington(a)users.noreply.github.com> date: 2020-10-30T15:36:31-07:00 summary: GitHub Action: Add gdb to posix dependencies (GH-23043) Sort also dependencies and remove duplicates (liblzma-dev). (cherry picked from commit 6e03c0ad156797cd6e9132e895d55dac0344d340) Co-authored-by: Victor Stinner <vstinner(a)python.org> files: M .github/workflows/posix-deps-apt.sh diff --git a/.github/workflows/posix-deps-apt.sh b/.github/workflows/posix-deps-apt.sh index 2b879d32f8150..5c7b9988be451 100755 --- a/.github/workflows/posix-deps-apt.sh +++ b/.github/workflows/posix-deps-apt.sh @@ -3,19 +3,19 @@ apt-get update apt-get -yq install \ build-essential \ - zlib1g-dev \ + gdb \ + lcov \ libbz2-dev \ + libffi-dev \ + libgdbm-dev \ liblzma-dev \ libncurses5-dev \ libreadline6-dev \ libsqlite3-dev \ libssl-dev \ - libgdbm-dev \ - tk-dev \ lzma \ lzma-dev \ - liblzma-dev \ - libffi-dev \ + tk-dev \ uuid-dev \ xvfb \ - lcov + zlib1g-dev

1 0

bpo-42208: GitHub Action: Add gdb to posix dependencies (GH-23043) (GH-23047)
by vstinner 30 Oct '20

30 Oct '20

https://github.com/python/cpython/commit/09c6120be8c70366495b027ae3daa21360… commit: 09c6120be8c70366495b027ae3daa213609de3ed branch: 3.9 author: Miss Skeleton (bot) <31488909+miss-islington(a)users.noreply.github.com> committer: vstinner <vstinner(a)python.org> date: 2020-10-30T23:16:17+01:00 summary: bpo-42208: GitHub Action: Add gdb to posix dependencies (GH-23043) (GH-23047) Sort also dependencies and remove duplicates (liblzma-dev). (cherry picked from commit 6e03c0ad156797cd6e9132e895d55dac0344d340) Co-authored-by: Victor Stinner <vstinner(a)python.org> Co-authored-by: Victor Stinner <vstinner(a)python.org> files: M .github/workflows/posix-deps-apt.sh diff --git a/.github/workflows/posix-deps-apt.sh b/.github/workflows/posix-deps-apt.sh index 2b879d32f8150..5c7b9988be451 100755 --- a/.github/workflows/posix-deps-apt.sh +++ b/.github/workflows/posix-deps-apt.sh @@ -3,19 +3,19 @@ apt-get update apt-get -yq install \ build-essential \ - zlib1g-dev \ + gdb \ + lcov \ libbz2-dev \ + libffi-dev \ + libgdbm-dev \ liblzma-dev \ libncurses5-dev \ libreadline6-dev \ libsqlite3-dev \ libssl-dev \ - libgdbm-dev \ - tk-dev \ lzma \ lzma-dev \ - liblzma-dev \ - libffi-dev \ + tk-dev \ uuid-dev \ xvfb \ - lcov + zlib1g-dev

1 0

GitHub Action: Add gdb to posix dependencies (GH-23043)
by vstinner 30 Oct '20

30 Oct '20

https://github.com/python/cpython/commit/6e03c0ad156797cd6e9132e895d55dac03… commit: 6e03c0ad156797cd6e9132e895d55dac0344d340 branch: master author: Victor Stinner <vstinner(a)python.org> committer: vstinner <vstinner(a)python.org> date: 2020-10-30T22:52:30+01:00 summary: GitHub Action: Add gdb to posix dependencies (GH-23043) Sort also dependencies and remove duplicates (liblzma-dev). files: M .github/workflows/posix-deps-apt.sh diff --git a/.github/workflows/posix-deps-apt.sh b/.github/workflows/posix-deps-apt.sh index 2b879d32f8150..5c7b9988be451 100755 --- a/.github/workflows/posix-deps-apt.sh +++ b/.github/workflows/posix-deps-apt.sh @@ -3,19 +3,19 @@ apt-get update apt-get -yq install \ build-essential \ - zlib1g-dev \ + gdb \ + lcov \ libbz2-dev \ + libffi-dev \ + libgdbm-dev \ liblzma-dev \ libncurses5-dev \ libreadline6-dev \ libsqlite3-dev \ libssl-dev \ - libgdbm-dev \ - tk-dev \ lzma \ lzma-dev \ - liblzma-dev \ - libffi-dev \ + tk-dev \ uuid-dev \ xvfb \ - lcov + zlib1g-dev

1 0

bpo-42208: Call GC collect earlier in PyInterpreterState_Clear() (GH-23044)
by vstinner 30 Oct '20

30 Oct '20

https://github.com/python/cpython/commit/eba5bf2f5672bf4861c626937597b85ac0… commit: eba5bf2f5672bf4861c626937597b85ac0c242b9 branch: master author: Victor Stinner <vstinner(a)python.org> committer: vstinner <vstinner(a)python.org> date: 2020-10-30T22:51:02+01:00 summary: bpo-42208: Call GC collect earlier in PyInterpreterState_Clear() (GH-23044) The last GC collection is now done before clearing builtins and sys dictionaries. Add also assertions to ensure that gc.collect() is no longer called after _PyGC_Fini(). Pass also the tstate to PyInterpreterState_Clear() to pass the correct tstate to _PyGC_CollectNoFail() and _PyGC_Fini(). files: M Include/internal/pycore_interp.h M Modules/gcmodule.c M Python/pylifecycle.c M Python/pystate.c diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index eee369a44bfc7..69d2108da4322 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -267,6 +267,7 @@ extern PyStatus _PyInterpreterState_SetConfig( PyInterpreterState *interp, const PyConfig *config); +extern void _PyInterpreterState_Clear(PyThreadState *tstate); /* cross-interpreter data registry */ diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index d90ff33684fe8..e6ad0f2dd4222 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -1191,6 +1191,11 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */ GCState *gcstate = &tstate->interp->gc; + // gc_collect_main() must not be called before _PyGC_Init + // or after _PyGC_Fini() + assert(gcstate->garbage != NULL); + assert(!_PyErr_Occurred(tstate)); + #ifdef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS if (tstate->interp->config._isolated_interpreter) { // bpo-40533: The garbage collector must not be run on parallel on @@ -2073,16 +2078,13 @@ PyGC_Collect(void) Py_ssize_t _PyGC_CollectNoFail(PyThreadState *tstate) { - assert(!_PyErr_Occurred(tstate)); - - GCState *gcstate = &tstate->interp->gc; - /* Ideally, this function is only called on interpreter shutdown, and therefore not recursively. Unfortunately, when there are daemon threads, a daemon thread can start a cyclic garbage collection during interpreter shutdown (and then never finish it). See http://bugs.python.org/issue8713#msg195178 for an example. */ + GCState *gcstate = &tstate->interp->gc; if (gcstate->collecting) { return 0; } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index adef1617f6132..ff58c1b9153bd 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1576,10 +1576,7 @@ finalize_interp_clear(PyThreadState *tstate) int is_main_interp = _Py_IsMainInterpreter(tstate); /* Clear interpreter state and all thread states */ - PyInterpreterState_Clear(tstate->interp); - - /* Last explicit GC collection */ - _PyGC_CollectNoFail(tstate); + _PyInterpreterState_Clear(tstate); /* Clear all loghooks */ /* Both _PySys_Audit function and users still need PyObject, such as tuple. @@ -1588,8 +1585,6 @@ finalize_interp_clear(PyThreadState *tstate) _PySys_ClearAuditHooks(tstate); } - _PyGC_Fini(tstate); - if (is_main_interp) { _Py_HashRandomization_Fini(); _PyArg_Fini(); diff --git a/Python/pystate.c b/Python/pystate.c index eb24f2b800607..e88898670cdff 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -268,14 +268,11 @@ PyInterpreterState_New(void) } -void -PyInterpreterState_Clear(PyInterpreterState *interp) +static void +interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) { _PyRuntimeState *runtime = interp->runtime; - /* Use the current Python thread state to call audit hooks, - not the current Python thread state of 'interp'. */ - PyThreadState *tstate = _PyThreadState_GET(); if (_PySys_Audit(tstate, "cpython.PyInterpreterState_Clear", NULL) < 0) { _PyErr_Clear(tstate); } @@ -306,6 +303,12 @@ PyInterpreterState_Clear(PyInterpreterState *interp) if (_PyRuntimeState_GetFinalizing(runtime) == NULL) { _PyWarnings_Fini(interp); } + + /* Last garbage collection on this interpreter */ + _PyGC_CollectNoFail(tstate); + + _PyGC_Fini(tstate); + /* We don't clear sysdict and builtins until the end of this function. Because clearing other attributes can execute arbitrary Python code which requires sysdict and builtins. */ @@ -320,6 +323,25 @@ PyInterpreterState_Clear(PyInterpreterState *interp) } +void +PyInterpreterState_Clear(PyInterpreterState *interp) +{ + // Use the current Python thread state to call audit hooks and to collect + // garbage. It can be different than the current Python thread state + // of 'interp'. + PyThreadState *current_tstate = _PyThreadState_GET(); + + interpreter_clear(interp, current_tstate); +} + + +void +_PyInterpreterState_Clear(PyThreadState *tstate) +{ + interpreter_clear(tstate->interp, tstate); +} + + static void zapthreads(PyInterpreterState *interp, int check_current) {

1 0

bpo-36876: Small adjustments to the C-analyzer tool. (GH-23045)
by ericsnowcurrently 30 Oct '20

30 Oct '20

https://github.com/python/cpython/commit/4fe72090deb7fb7bc09bfa56c92f6b3b09… commit: 4fe72090deb7fb7bc09bfa56c92f6b3b0967d395 branch: master author: Eric Snow <ericsnowcurrently(a)gmail.com> committer: ericsnowcurrently <ericsnowcurrently(a)gmail.com> date: 2020-10-30T15:46:52-06:00 summary: bpo-36876: Small adjustments to the C-analyzer tool. (GH-23045) This is a little bit of clean-up, small fixes, and additional helpers prior to building an updated & accurate list of globals to eliminate. files: A Tools/c-analyzer/c_analyzer/match.py A Tools/c-analyzer/c_parser/match.py M Tools/c-analyzer/c_analyzer/__init__.py M Tools/c-analyzer/c_analyzer/__main__.py M Tools/c-analyzer/c_analyzer/analyze.py M Tools/c-analyzer/c_analyzer/datafiles.py M Tools/c-analyzer/c_analyzer/info.py M Tools/c-analyzer/c_common/scriptutil.py M Tools/c-analyzer/c_parser/datafiles.py M Tools/c-analyzer/c_parser/info.py M Tools/c-analyzer/c_parser/parser/__init__.py M Tools/c-analyzer/c_parser/parser/_info.py M Tools/c-analyzer/c_parser/parser/_regexes.py M Tools/c-analyzer/cpython/__main__.py M Tools/c-analyzer/cpython/_analyzer.py M Tools/c-analyzer/cpython/_parser.py diff --git a/Tools/c-analyzer/c_analyzer/__init__.py b/Tools/c-analyzer/c_analyzer/__init__.py index 4a01cd396f5f5..171fa25102bff 100644 --- a/Tools/c-analyzer/c_analyzer/__init__.py +++ b/Tools/c-analyzer/c_analyzer/__init__.py @@ -4,10 +4,12 @@ from c_parser.info import ( KIND, TypeDeclaration, - filter_by_kind, - collate_by_kind_group, resolve_parsed, ) +from c_parser.match import ( + filter_by_kind, + group_by_kinds, +) from . import ( analyze as _analyze, datafiles as _datafiles, @@ -55,7 +57,7 @@ def analyze_decls(decls, known, *, ) decls = list(decls) - collated = collate_by_kind_group(decls) + collated = group_by_kinds(decls) types = {decl: None for decl in collated['type']} typespecs = _analyze.get_typespecs(types) diff --git a/Tools/c-analyzer/c_analyzer/__main__.py b/Tools/c-analyzer/c_analyzer/__main__.py index 1fd45b985d9bc..4cff1d4efb5fe 100644 --- a/Tools/c-analyzer/c_analyzer/__main__.py +++ b/Tools/c-analyzer/c_analyzer/__main__.py @@ -1,5 +1,6 @@ import io import logging +import os import os.path import re import sys @@ -9,6 +10,7 @@ add_verbosity_cli, add_traceback_cli, add_sepval_cli, + add_progress_cli, add_files_cli, add_commands_cli, process_args_by_key, @@ -17,11 +19,13 @@ filter_filenames, iter_marks, ) -from c_parser.info import KIND, is_type_decl +from c_parser.info import KIND +from c_parser.match import is_type_decl +from .match import filter_forward from . import ( analyze as _analyze, - check_all as _check_all, datafiles as _datafiles, + check_all as _check_all, ) @@ -44,7 +48,7 @@ TABLE_SECTIONS = { 'types': ( ['kind', 'name', 'data', 'file'], - is_type_decl, + KIND.is_type_decl, (lambda v: (v.kind.value, v.filename or '', v.name)), ), 'typedefs': 'types', @@ -167,9 +171,7 @@ def handle_failure(failure, data): print(f'{data.filename}:{name} - {failure}') elif fmt == 'summary': def handle_failure(failure, data): - parent = data.parent or '' - funcname = parent if isinstance(parent, str) else parent.name - print(f'{data.filename:35}\t{funcname or "-":35}\t{data.name:40}\t{failure}') + print(_fmt_one_summary(data, failure)) elif fmt == 'full': div = '' def handle_failure(failure, data): @@ -230,6 +232,15 @@ def section(name): yield f'grand total: {total}' +def _fmt_one_summary(item, extra=None): + parent = item.parent or '' + funcname = parent if isinstance(parent, str) else parent.name + if extra: + return f'{item.filename:35}\t{funcname or "-":35}\t{item.name:40}\t{extra}' + else: + return f'{item.filename:35}\t{funcname or "-":35}\t{item.name}' + + def fmt_full(analysis): # XXX Support sorting. items = sorted(analysis, key=lambda v: v.key) @@ -272,10 +283,12 @@ def process_checks(args): args.checks = [check] else: process_checks = add_checks_cli(parser, checks=checks) + process_progress = add_progress_cli(parser) process_output = add_output_cli(parser, default=None) process_files = add_files_cli(parser, **kwargs) return [ process_checks, + process_progress, process_output, process_files, ] @@ -288,6 +301,7 @@ def cmd_check(filenames, *, relroot=None, failfast=False, iter_filenames=None, + track_progress=None, verbosity=VERBOSITY, _analyze=_analyze, _CHECKS=CHECKS, @@ -304,36 +318,53 @@ def cmd_check(filenames, *, ) = _get_check_handlers(fmt, printer, verbosity) filenames = filter_filenames(filenames, iter_filenames) + if track_progress: + filenames = track_progress(filenames) - logger.info('analyzing...') + logger.info('analyzing files...') analyzed = _analyze(filenames, **kwargs) if relroot: analyzed.fix_filenames(relroot) + decls = filter_forward(analyzed, markpublic=True) - logger.info('checking...') - numfailed = 0 - for data, failure in _check_all(analyzed, checks, failfast=failfast): + logger.info('checking analysis results...') + failed = [] + for data, failure in _check_all(decls, checks, failfast=failfast): if data is None: printer.info('stopping after one failure') break - if div is not None and numfailed > 0: + if div is not None and len(failed) > 0: printer.info(div) - numfailed += 1 + failed.append(data) handle_failure(failure, data) handle_after() printer.info('-------------------------') - logger.info(f'total failures: {numfailed}') + logger.info(f'total failures: {len(failed)}') logger.info('done checking') - if numfailed > 0: - sys.exit(numfailed) + if fmt == 'summary': + print('Categorized by storage:') + print() + from .match import group_by_storage + grouped = group_by_storage(failed, ignore_non_match=False) + for group, decls in grouped.items(): + print() + print(group) + for decl in decls: + print(' ', _fmt_one_summary(decl)) + print(f'subtotal: {len(decls)}') + + if len(failed) > 0: + sys.exit(len(failed)) def _cli_analyze(parser, **kwargs): + process_progress = add_progress_cli(parser) process_output = add_output_cli(parser) process_files = add_files_cli(parser, **kwargs) return [ + process_progress, process_output, process_files, ] @@ -343,6 +374,7 @@ def _cli_analyze(parser, **kwargs): def cmd_analyze(filenames, *, fmt=None, iter_filenames=None, + track_progress=None, verbosity=None, _analyze=_analyze, formats=FORMATS, @@ -356,49 +388,46 @@ def cmd_analyze(filenames, *, raise ValueError(f'unsupported fmt {fmt!r}') filenames = filter_filenames(filenames, iter_filenames) - if verbosity == 2: - def iter_filenames(filenames=filenames): - marks = iter_marks() - for filename in filenames: - print(next(marks), end='') - yield filename - filenames = iter_filenames() - elif verbosity > 2: - def iter_filenames(filenames=filenames): - for filename in filenames: - print(f'<{filename}>') - yield filename - filenames = iter_filenames() - - logger.info('analyzing...') + if track_progress: + filenames = track_progress(filenames) + + logger.info('analyzing files...') analyzed = _analyze(filenames, **kwargs) + decls = filter_forward(analyzed, markpublic=True) - for line in do_fmt(analyzed): + for line in do_fmt(decls): print(line) def _cli_data(parser, filenames=None, known=None): ArgumentParser = type(parser) common = ArgumentParser(add_help=False) - if filenames is None: - common.add_argument('filenames', metavar='FILE', nargs='+') + # These flags will get processed by the top-level parse_args(). + add_verbosity_cli(common) + add_traceback_cli(common) subs = parser.add_subparsers(dest='datacmd') sub = subs.add_parser('show', parents=[common]) if known is None: sub.add_argument('--known', required=True) + if filenames is None: + sub.add_argument('filenames', metavar='FILE', nargs='+') - sub = subs.add_parser('dump') + sub = subs.add_parser('dump', parents=[common]) if known is None: sub.add_argument('--known') sub.add_argument('--show', action='store_true') + process_progress = add_progress_cli(sub) - sub = subs.add_parser('check') + sub = subs.add_parser('check', parents=[common]) if known is None: sub.add_argument('--known', required=True) - return None + def process_args(args): + if args.datacmd == 'dump': + process_progress(args) + return process_args def cmd_data(datacmd, filenames, known=None, *, @@ -406,6 +435,7 @@ def cmd_data(datacmd, filenames, known=None, *, formats=FORMATS, extracolumns=None, relroot=None, + track_progress=None, **kwargs ): kwargs.pop('verbosity', None) @@ -417,6 +447,8 @@ def cmd_data(datacmd, filenames, known=None, *, for line in do_fmt(known): print(line) elif datacmd == 'dump': + if track_progress: + filenames = track_progress(filenames) analyzed = _analyze(filenames, **kwargs) if known is None or usestdout: outfile = io.StringIO() diff --git a/Tools/c-analyzer/c_analyzer/analyze.py b/Tools/c-analyzer/c_analyzer/analyze.py index d8ae915e42002..267d058e07abd 100644 --- a/Tools/c-analyzer/c_analyzer/analyze.py +++ b/Tools/c-analyzer/c_analyzer/analyze.py @@ -3,15 +3,19 @@ TypeDeclaration, POTSType, FuncPtr, +) +from c_parser.match import ( is_pots, is_funcptr, ) from .info import ( IGNORED, UNKNOWN, - is_system_type, SystemType, ) +from .match import ( + is_system_type, +) def get_typespecs(typedecls): diff --git a/Tools/c-analyzer/c_analyzer/datafiles.py b/Tools/c-analyzer/c_analyzer/datafiles.py index 0de438cce470f..d37a4eefe351a 100644 --- a/Tools/c-analyzer/c_analyzer/datafiles.py +++ b/Tools/c-analyzer/c_analyzer/datafiles.py @@ -1,5 +1,6 @@ import c_common.tables as _tables import c_parser.info as _info +import c_parser.match as _match import c_parser.datafiles as _parser from . import analyze as _analyze @@ -17,7 +18,7 @@ def analyze_known(known, *, handle_unresolved=True, ): knowntypes = knowntypespecs = {} - collated = _info.collate_by_kind_group(known) + collated = _match.group_by_kinds(known) types = {decl: None for decl in collated['type']} typespecs = _analyze.get_typespecs(types) def analyze_decl(decl): diff --git a/Tools/c-analyzer/c_analyzer/info.py b/Tools/c-analyzer/c_analyzer/info.py index 23d77611a4c3c..be9281502d250 100644 --- a/Tools/c-analyzer/c_analyzer/info.py +++ b/Tools/c-analyzer/c_analyzer/info.py @@ -7,7 +7,11 @@ HighlevelParsedItem, Declaration, TypeDeclaration, +) +from c_parser.match import ( is_type_decl, +) +from .match import ( is_process_global, ) @@ -16,44 +20,6 @@ UNKNOWN = _misc.Labeled('UNKNOWN') -# XXX Use known.tsv for these? -SYSTEM_TYPES = { - 'int8_t', - 'uint8_t', - 'int16_t', - 'uint16_t', - 'int32_t', - 'uint32_t', - 'int64_t', - 'uint64_t', - 'size_t', - 'ssize_t', - 'intptr_t', - 'uintptr_t', - 'wchar_t', - '', - # OS-specific - 'pthread_cond_t', - 'pthread_mutex_t', - 'pthread_key_t', - 'atomic_int', - 'atomic_uintptr_t', - '', - # lib-specific - 'WINDOW', # curses - 'XML_LChar', - 'XML_Size', - 'XML_Parser', - 'enum XML_Error', - 'enum XML_Status', - '', -} - - -def is_system_type(typespec): - return typespec in SYSTEM_TYPES - - class SystemType(TypeDeclaration): def __init__(self, name): diff --git a/Tools/c-analyzer/c_analyzer/match.py b/Tools/c-analyzer/c_analyzer/match.py new file mode 100644 index 0000000000000..5c27e4a224afc --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/match.py @@ -0,0 +1,212 @@ +import os.path + +from c_parser import ( + info as _info, + match as _match, +) + + +_KIND = _info.KIND + + +# XXX Use known.tsv for these? +SYSTEM_TYPES = { + 'int8_t', + 'uint8_t', + 'int16_t', + 'uint16_t', + 'int32_t', + 'uint32_t', + 'int64_t', + 'uint64_t', + 'size_t', + 'ssize_t', + 'intptr_t', + 'uintptr_t', + 'wchar_t', + '', + # OS-specific + 'pthread_cond_t', + 'pthread_mutex_t', + 'pthread_key_t', + 'atomic_int', + 'atomic_uintptr_t', + '', + # lib-specific + 'WINDOW', # curses + 'XML_LChar', + 'XML_Size', + 'XML_Parser', + 'enum XML_Error', + 'enum XML_Status', + '', +} + + +def is_system_type(typespec): + return typespec in SYSTEM_TYPES + + +################################## +# decl matchers + +def is_public(decl): + if not decl.filename.endswith('.h'): + return False + if 'Include' not in decl.filename.split(os.path.sep): + return False + return True + + +def is_process_global(vardecl): + kind, storage, _, _, _ = _info.get_parsed_vartype(vardecl) + if kind is not _KIND.VARIABLE: + raise NotImplementedError(vardecl) + if 'static' in (storage or ''): + return True + + if hasattr(vardecl, 'parent'): + parent = vardecl.parent + else: + parent = vardecl.get('parent') + return not parent + + +def is_fixed_type(vardecl): + if not vardecl: + return None + _, _, _, typespec, abstract = _info.get_parsed_vartype(vardecl) + if 'typeof' in typespec: + raise NotImplementedError(vardecl) + elif not abstract: + return True + + if '*' not in abstract: + # XXX What about []? + return True + elif _match._is_funcptr(abstract): + return True + else: + for after in abstract.split('*')[1:]: + if not after.lstrip().startswith('const'): + return False + else: + return True + + +def is_immutable(vardecl): + if not vardecl: + return None + if not is_fixed_type(vardecl): + return False + _, _, typequal, _, _ = _info.get_parsed_vartype(vardecl) + # If there, it can only be "const" or "volatile". + return typequal == 'const' + + +def is_public_api(decl): + if not is_public(decl): + return False + if decl.kind is _KIND.TYPEDEF: + return True + elif _match.is_type_decl(decl): + return not _match.is_forward_decl(decl) + else: + return _match.is_external_reference(decl) + + +def is_public_declaration(decl): + if not is_public(decl): + return False + if decl.kind is _KIND.TYPEDEF: + return True + elif _match.is_type_decl(decl): + return _match.is_forward_decl(decl) + else: + return _match.is_external_reference(decl) + + +def is_public_definition(decl): + if not is_public(decl): + return False + if decl.kind is _KIND.TYPEDEF: + return True + elif _match.is_type_decl(decl): + return not _match.is_forward_decl(decl) + else: + return not _match.is_external_reference(decl) + + +def is_public_impl(decl): + if not _KIND.is_decl(decl.kind): + return False + # See filter_forward() about "is_public". + return getattr(decl, 'is_public', False) + + +def is_module_global_decl(decl): + if is_public_impl(decl): + return False + if _match.is_forward_decl(decl): + return False + return not _match.is_local_var(decl) + + +################################## +# filtering with matchers + +def filter_forward(items, *, markpublic=False): + if markpublic: + public = set() + actual = [] + for item in items: + if is_public_api(item): + public.add(item.id) + elif not _match.is_forward_decl(item): + actual.append(item) + else: + # non-public duplicate! + # XXX + raise Exception(item) + for item in actual: + _info.set_flag(item, 'is_public', item.id in public) + yield item + else: + for item in items: + if _match.is_forward_decl(item): + continue + yield item + + +################################## +# grouping with matchers + +def group_by_storage(decls, **kwargs): + def is_module_global(decl): + if not is_module_global_decl(decl): + return False + if decl.kind == _KIND.VARIABLE: + if _info.get_effective_storage(decl) == 'static': + # This is covered by is_static_module_global(). + return False + return True + def is_static_module_global(decl): + if not _match.is_global_var(decl): + return False + return _info.get_effective_storage(decl) == 'static' + def is_static_local(decl): + if not _match.is_local_var(decl): + return False + return _info.get_effective_storage(decl) == 'static' + #def is_local(decl): + # if not _match.is_local_var(decl): + # return False + # return _info.get_effective_storage(decl) != 'static' + categories = { + #'extern': is_extern, + 'published': is_public_impl, + 'module-global': is_module_global, + 'static-module-global': is_static_module_global, + 'static-local': is_static_local, + } + return _match.group_by_category(decls, categories, **kwargs) diff --git a/Tools/c-analyzer/c_common/scriptutil.py b/Tools/c-analyzer/c_common/scriptutil.py index 939a85003b296..222059015d76e 100644 --- a/Tools/c-analyzer/c_common/scriptutil.py +++ b/Tools/c-analyzer/c_common/scriptutil.py @@ -10,6 +10,9 @@ from . import fsutil, strutil, iterutil, logging as loggingutil +_NOT_SET = object() + + def get_prog(spec=None, *, absolute=False, allowsuffix=True): if spec is None: _, spec = _find_script() @@ -313,6 +316,22 @@ def _parse_files(filenames): yield filename.strip() +def add_progress_cli(parser, *, threshold=VERBOSITY, **kwargs): + parser.add_argument('--progress', dest='track_progress', action='store_const', const=True) + parser.add_argument('--no-progress', dest='track_progress', action='store_false') + parser.set_defaults(track_progress=True) + + def process_args(args): + if args.track_progress: + ns = vars(args) + verbosity = ns.get('verbosity', VERBOSITY) + if verbosity <= threshold: + args.track_progress = track_progress_compact + else: + args.track_progress = track_progress_flat + return process_args + + def add_failure_filtering_cli(parser, pool, *, default=False): parser.add_argument('--fail', action='append', metavar=f'"{{all|{"|".join(sorted(pool))}}},..."') @@ -551,13 +570,39 @@ def _iter_filenames(filenames, iter_files): raise NotImplementedError -def iter_marks(mark='.', *, group=5, groups=2, lines=10, sep=' '): +def track_progress_compact(items, *, groups=5, **mark_kwargs): + last = os.linesep + marks = iter_marks(groups=groups, **mark_kwargs) + for item in items: + last = next(marks) + print(last, end='', flush=True) + yield item + if not last.endswith(os.linesep): + print() + + +def track_progress_flat(items, fmt='<{}>'): + for item in items: + print(fmt.format(item), flush=True) + yield item + + +def iter_marks(mark='.', *, group=5, groups=2, lines=_NOT_SET, sep=' '): mark = mark or '' + group = group if group and group > 1 else 1 + groups = groups if groups and groups > 1 else 1 + sep = f'{mark}{sep}' if sep else mark end = f'{mark}{os.linesep}' div = os.linesep perline = group * groups - perlines = perline * lines + if lines is _NOT_SET: + # By default we try to put about 100 in each line group. + perlines = 100 // perline * perline + elif not lines or lines < 0: + perlines = None + else: + perlines = perline * lines if perline == 1: yield end @@ -568,7 +613,7 @@ def iter_marks(mark='.', *, group=5, groups=2, lines=10, sep=' '): while True: if count % perline == 0: yield end - if count % perlines == 0: + if perlines and count % perlines == 0: yield div elif count % group == 0: yield sep diff --git a/Tools/c-analyzer/c_parser/datafiles.py b/Tools/c-analyzer/c_parser/datafiles.py index 5bdb946b1772a..cdd69b1f9b2d8 100644 --- a/Tools/c-analyzer/c_parser/datafiles.py +++ b/Tools/c-analyzer/c_parser/datafiles.py @@ -92,7 +92,7 @@ def write_decls_tsv(decls, outfile, extracolumns=None, *, **kwargs ): # XXX Move the row rendering here. - _write_decls_tsv(rows, outfile, extracolumns, relroot, kwargs) + _write_decls_tsv(decls, outfile, extracolumns, relroot, kwargs) def _iter_decls_tsv(infile, extracolumns=None, relroot=None): diff --git a/Tools/c-analyzer/c_parser/info.py b/Tools/c-analyzer/c_parser/info.py index a07ce2e0ccb8d..798a45d2e08e7 100644 --- a/Tools/c-analyzer/c_parser/info.py +++ b/Tools/c-analyzer/c_parser/info.py @@ -7,85 +7,12 @@ import c_common.misc as _misc import c_common.strutil as _strutil import c_common.tables as _tables -from .parser._regexes import SIMPLE_TYPE +from .parser._regexes import SIMPLE_TYPE, _STORAGE FIXED_TYPE = _misc.Labeled('FIXED_TYPE') -POTS_REGEX = re.compile(rf'^{SIMPLE_TYPE}$', re.VERBOSE) - - -def is_pots(typespec): - if not typespec: - return None - if type(typespec) is not str: - _, _, _, typespec, _ = get_parsed_vartype(typespec) - return POTS_REGEX.match(typespec) is not None - - -def is_funcptr(vartype): - if not vartype: - return None - _, _, _, _, abstract = get_parsed_vartype(vartype) - return _is_funcptr(abstract) - - -def _is_funcptr(declstr): - if not declstr: - return None - # XXX Support "(<name>*)(". - return '(*)(' in declstr.replace(' ', '') - - -def is_exported_symbol(decl): - _, storage, _, _, _ = get_parsed_vartype(decl) - raise NotImplementedError - - -def is_process_global(vardecl): - kind, storage, _, _, _ = get_parsed_vartype(vardecl) - if kind is not KIND.VARIABLE: - raise NotImplementedError(vardecl) - if 'static' in (storage or ''): - return True - - if hasattr(vardecl, 'parent'): - parent = vardecl.parent - else: - parent = vardecl.get('parent') - return not parent - - -def is_fixed_type(vardecl): - if not vardecl: - return None - _, _, _, typespec, abstract = get_parsed_vartype(vardecl) - if 'typeof' in typespec: - raise NotImplementedError(vardecl) - elif not abstract: - return True - - if '*' not in abstract: - # XXX What about []? - return True - elif _is_funcptr(abstract): - return True - else: - for after in abstract.split('*')[1:]: - if not after.lstrip().startswith('const'): - return False - else: - return True - - -def is_immutable(vardecl): - if not vardecl: - return None - if not is_fixed_type(vardecl): - return False - _, _, typequal, _, _ = get_parsed_vartype(vardecl) - # If there, it can only be "const" or "volatile". - return typequal == 'const' +STORAGE = frozenset(_STORAGE) ############################# @@ -214,58 +141,8 @@ def resolve_group(cls, group): KIND._GROUPS.update((k.value, {k}) for k in KIND) -# The module-level kind-related helpers (below) deal with <item>.kind: - -def is_type_decl(kind): - # Handle ParsedItem, Declaration, etc.. - kind = getattr(kind, 'kind', kind) - return KIND.is_type_decl(kind) - - -def is_decl(kind): - # Handle ParsedItem, Declaration, etc.. - kind = getattr(kind, 'kind', kind) - return KIND.is_decl(kind) - - -def filter_by_kind(items, kind): - if kind == 'type': - kinds = KIND._TYPE_DECLS - elif kind == 'decl': - kinds = KIND._TYPE_DECLS - try: - okay = kind in KIND - except TypeError: - kinds = set(kind) - else: - kinds = {kind} if okay else set(kind) - for item in items: - if item.kind in kinds: - yield item - - -def collate_by_kind(items): - collated = {kind: [] for kind in KIND} - for item in items: - try: - collated[item.kind].append(item) - except KeyError: - raise ValueError(f'unsupported kind in {item!r}') - return collated - - -def get_kind_group(kind): - # Handle ParsedItem, Declaration, etc.. - kind = getattr(kind, 'kind', kind) - return KIND.get_group(kind) - - -def collate_by_kind_group(items): - collated = {KIND.get_group(k): [] for k in KIND} - for item in items: - group = KIND.get_group(item.kind) - collated[group].append(item) - return collated +def get_kind_group(item): + return KIND.get_group(item.kind) ############################# @@ -484,6 +361,27 @@ def get_parsed_vartype(decl): return kind, storage, typequal, typespec, abstract +def get_default_storage(decl): + if decl.kind not in (KIND.VARIABLE, KIND.FUNCTION): + return None + return 'extern' if decl.parent is None else 'auto' + + +def get_effective_storage(decl, *, default=None): + # Note that "static" limits access to just that C module + # and "extern" (the default for module-level) allows access + # outside the C module. + if default is None: + default = get_default_storage(decl) + if default is None: + return None + try: + storage = decl.storage + except AttributeError: + storage, _ = _get_vartype(decl.data) + return storage or default + + ############################# # high-level @@ -997,7 +895,7 @@ def _unformat_data(cls, datastr, fmt=None): def __init__(self, file, name, data, parent=None, storage=None): super().__init__(file, name, data, parent, - _extra={'storage': storage}, + _extra={'storage': storage or None}, _shortkey=f'({parent.name}).{name}' if parent else name, _key=(str(file), # Tilde comes after all other ascii characters. @@ -1005,6 +903,11 @@ def __init__(self, file, name, data, parent=None, storage=None): name, ), ) + if storage: + if storage not in STORAGE: + # The parser must need an update. + raise NotImplementedError(storage) + # Otherwise we trust the compiler to have validated it. @property def vartype(self): @@ -1413,6 +1316,13 @@ def resolve_parsed(parsed): return cls.from_parsed(parsed) +def set_flag(item, name, value): + try: + setattr(item, name, value) + except AttributeError: + object.__setattr__(item, name, value) + + ############################# # composite diff --git a/Tools/c-analyzer/c_parser/match.py b/Tools/c-analyzer/c_parser/match.py new file mode 100644 index 0000000000000..3b5068fd11b68 --- /dev/null +++ b/Tools/c-analyzer/c_parser/match.py @@ -0,0 +1,177 @@ +import re + +from . import info as _info +from .parser._regexes import SIMPLE_TYPE + + +_KIND = _info.KIND + + +def match_storage(decl, expected): + default = _info.get_default_storage(decl) + #assert default + if expected is None: + expected = {default} + elif isinstance(expected, str): + expected = {expected or default} + elif not expected: + expected = _info.STORAGE + else: + expected = {v or default for v in expected} + storage = _info.get_effective_storage(decl, default=default) + return storage in expected + + +################################## +# decl matchers + +def is_type_decl(item): + return _KIND.is_type_decl(item.kind) + + +def is_decl(item): + return _KIND.is_decl(item.kind) + + +def is_pots(typespec, *, + _regex=re.compile(rf'^{SIMPLE_TYPE}$', re.VERBOSE), + ): + + if not typespec: + return None + if type(typespec) is not str: + _, _, _, typespec, _ = _info.get_parsed_vartype(typespec) + return _regex.match(typespec) is not None + + +def is_funcptr(vartype): + if not vartype: + return None + _, _, _, _, abstract = _info.get_parsed_vartype(vartype) + return _is_funcptr(abstract) + + +def _is_funcptr(declstr): + if not declstr: + return None + # XXX Support "(<name>*)(". + return '(*)(' in declstr.replace(' ', '') + + +def is_forward_decl(decl): + if decl.kind is _KIND.TYPEDEF: + return False + elif is_type_decl(decl): + return not decl.data + elif decl.kind is _KIND.FUNCTION: + # XXX This doesn't work with ParsedItem. + return decl.signature.isforward + elif decl.kind is _KIND.VARIABLE: + # No var decls are considered forward (or all are...). + return False + else: + raise NotImplementedError(decl) + + +def can_have_symbol(decl): + return decl.kind in (_KIND.VARIABLE, _KIND.FUNCTION) + + +def has_external_symbol(decl): + if not can_have_symbol(decl): + return False + if _info.get_effective_storage(decl) != 'extern': + return False + if decl.kind is _KIND.FUNCTION: + return not decl.signature.isforward + else: + # It must be a variable, which can only be implicitly extern here. + return decl.storage != 'extern' + + +def has_internal_symbol(decl): + if not can_have_symbol(decl): + return False + return _info.get_actual_storage(decl) == 'static' + + +def is_external_reference(decl): + if not can_have_symbol(decl): + return False + # We have to check the declared storage rather tnan the effective. + if decl.storage != 'extern': + return False + if decl.kind is _KIND.FUNCTION: + return decl.signature.isforward + # Otherwise it's a variable. + return True + + +def is_local_var(decl): + if not decl.kind is _KIND.VARIABLE: + return False + return True if decl.parent else False + + +def is_global_var(decl): + if not decl.kind is _KIND.VARIABLE: + return False + return False if decl.parent else True + + +################################## +# filtering with matchers + +def filter_by_kind(items, kind): + if kind == 'type': + kinds = _KIND._TYPE_DECLS + elif kind == 'decl': + kinds = _KIND._TYPE_DECLS + try: + okay = kind in _KIND + except TypeError: + kinds = set(kind) + else: + kinds = {kind} if okay else set(kind) + for item in items: + if item.kind in kinds: + yield item + + +################################## +# grouping with matchers + +def group_by_category(decls, categories, *, ignore_non_match=True): + collated = {} + for decl in decls: + # Matchers should be mutually exclusive. (First match wins.) + for category, match in categories.items(): + if match(decl): + if category not in collated: + collated[category] = [decl] + else: + collated[category].append(decl) + break + else: + if not ignore_non_match: + raise Exception(f'no match for {decl!r}') + return collated + + +def group_by_kind(items): + collated = {kind: [] for kind in _KIND} + for item in items: + try: + collated[item.kind].append(item) + except KeyError: + raise ValueError(f'unsupported kind in {item!r}') + return collated + + +def group_by_kinds(items): + # Collate into kind groups (decl, type, etc.). + collated = {_KIND.get_group(k): [] for k in _KIND} + for item in items: + group = _KIND.get_group(item.kind) + collated[group].append(item) + return collated diff --git a/Tools/c-analyzer/c_parser/parser/__init__.py b/Tools/c-analyzer/c_parser/parser/__init__.py index 7cb34caf09eba..4b201c6354023 100644 --- a/Tools/c-analyzer/c_parser/parser/__init__.py +++ b/Tools/c-analyzer/c_parser/parser/__init__.py @@ -163,6 +163,8 @@ def _parse(srclines, anon_name): def _iter_source(lines, *, maxtext=20_000, maxlines=700, showtext=False): + maxtext = maxtext if maxtext and maxtext > 0 else None + maxlines = maxlines if maxlines and maxlines > 0 else None filestack = [] allinfo = {} # "lines" should be (fileinfo, data), as produced by the preprocessor code. @@ -181,9 +183,7 @@ def _iter_source(lines, *, maxtext=20_000, maxlines=700, showtext=False): _logger.debug(f'-> {line}') srcinfo._add_line(line, fileinfo.lno) - if len(srcinfo.text) > maxtext: - break - if srcinfo.end - srcinfo.start > maxlines: + if srcinfo.too_much(maxtext, maxlines): break while srcinfo._used(): yield srcinfo diff --git a/Tools/c-analyzer/c_parser/parser/_info.py b/Tools/c-analyzer/c_parser/parser/_info.py index 2dcd5e5e760b7..cc21931b66cc5 100644 --- a/Tools/c-analyzer/c_parser/parser/_info.py +++ b/Tools/c-analyzer/c_parser/parser/_info.py @@ -1,3 +1,5 @@ +import re + from ..info import KIND, ParsedItem, FileInfo @@ -121,6 +123,19 @@ def resolve(self, kind, data, name, parent=None): def done(self): self._set_ready() + def too_much(self, maxtext, maxlines): + if maxtext and len(self.text) > maxtext: + pass + elif maxlines and self.end - self.start > maxlines: + pass + else: + return False + + #if re.fullmatch(r'[^;]+\[\][ ]*=[ ]*[{]([ ]*\d+,)*([ ]*\d+,?)\s*', + # self._current.text): + # return False + return True + def _set_ready(self): if self._current is None: self._ready = False diff --git a/Tools/c-analyzer/c_parser/parser/_regexes.py b/Tools/c-analyzer/c_parser/parser/_regexes.py index e9bc31d335a7d..cb85a59aaa16c 100644 --- a/Tools/c-analyzer/c_parser/parser/_regexes.py +++ b/Tools/c-analyzer/c_parser/parser/_regexes.py @@ -137,7 +137,8 @@ def _ind(text, level=1, edges='both'): ####################################### # variable declarations -STORAGE_CLASS = r'(?: \b (?: auto | register | static | extern ) \b )' +_STORAGE = 'auto register static extern'.split() +STORAGE_CLASS = rf'(?: \b (?: {" | ".join(_STORAGE)} ) \b )' TYPE_QUALIFIER = r'(?: \b (?: const | volatile ) \b )' PTR_QUALIFIER = rf'(?: [*] (?: \s* {TYPE_QUALIFIER} )? )' diff --git a/Tools/c-analyzer/cpython/__main__.py b/Tools/c-analyzer/cpython/__main__.py index 23a3de06f639c..23ce29776ca68 100644 --- a/Tools/c-analyzer/cpython/__main__.py +++ b/Tools/c-analyzer/cpython/__main__.py @@ -31,6 +31,9 @@ def _resolve_filenames(filenames): return resolved +####################################### +# the formats + def fmt_summary(analysis): # XXX Support sorting and grouping. supported = [] @@ -179,7 +182,7 @@ def analyze(files, **kwargs): analyze_resolved=_analyzer.analyze_resolved, ) return _analyzer.Analysis.from_results(results) - else: + else: # check known = _analyzer.read_known() def analyze(files, **kwargs): return _analyzer.iter_decls(files, **kwargs) diff --git a/Tools/c-analyzer/cpython/_analyzer.py b/Tools/c-analyzer/cpython/_analyzer.py index 98f8888651e57..978831d1fd949 100644 --- a/Tools/c-analyzer/cpython/_analyzer.py +++ b/Tools/c-analyzer/cpython/_analyzer.py @@ -11,9 +11,14 @@ Struct, Member, FIXED_TYPE, +) +from c_parser.match import ( is_type_decl, is_pots, is_funcptr, +) +from c_analyzer.match import ( + is_system_type, is_process_global, is_fixed_type, is_immutable, @@ -246,7 +251,7 @@ def _check_typespec(decl, typedecl, types, knowntypes): # Fall back to default known types. if is_pots(typespec): return None - elif _info.is_system_type(typespec): + elif is_system_type(typespec): return None elif is_funcptr(decl.vartype): return None diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 35fa296251e2e..7c8c296665398 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -46,10 +46,14 @@ def clean_lines(text): GLOBS = [ 'Include/*.h', 'Include/internal/*.h', + 'Modules/**/*.h', 'Modules/**/*.c', + 'Objects/**/*.h', 'Objects/**/*.c', + 'Python/**/*.h', + 'Parser/**/*.c', + 'Python/**/*.h', 'Parser/**/*.c', - 'Python/**/*.c', ] EXCLUDED = clean_lines(''' @@ -67,11 +71,24 @@ def clean_lines(text): Modules/_winapi.c # windows.h Modules/overlapped.c # winsock.h Python/dynload_win.c # windows.h +Modules/expat/winconfig.h +Python/thread_nt.h # other OS-dependent Python/dynload_dl.c # dl.h Python/dynload_hpux.c # dl.h Python/dynload_aix.c # sys/ldr.h +Python/thread_pthread.h + +# only huge constants (safe but parsing is slow) +Modules/_ssl_data.h +Modules/unicodedata_db.h +Modules/unicodename_db.h +Modules/cjkcodecs/mappings_*.h +Objects/unicodetype_db.h +Python/importlib.h +Python/importlib_external.h +Python/importlib_zipimport.h # @end=conf@ ''') @@ -80,6 +97,17 @@ def clean_lines(text): EXCLUDED += clean_lines(''' # The tool should be able to parse these... +Modules/hashlib.h +Objects/stringlib/codecs.h +Objects/stringlib/count.h +Objects/stringlib/ctype.h +Objects/stringlib/fastsearch.h +Objects/stringlib/find.h +Objects/stringlib/find_max_char.h +Objects/stringlib/partition.h +Objects/stringlib/replace.h +Objects/stringlib/split.h + Modules/_dbmmodule.c Modules/cjkcodecs/_codecs_*.c Modules/expat/xmlrole.c @@ -134,6 +162,9 @@ def clean_lines(text): Modules/_ctypes/cfield.c Py_BUILD_CORE 1 Modules/_heapqmodule.c Py_BUILD_CORE 1 Modules/_posixsubprocess.c Py_BUILD_CORE 1 +Objects/stringlib/codecs.h Py_BUILD_CORE 1 +Python/ceval_gil.h Py_BUILD_CORE 1 +Python/condvar.h Py_BUILD_CORE 1 Modules/_json.c Py_BUILD_CORE_BUILTIN 1 Modules/_pickle.c Py_BUILD_CORE_BUILTIN 1 @@ -177,6 +208,12 @@ def clean_lines(text): Python/import.c PyMODINIT_FUNC PyObject* Modules/_testcapimodule.c PyAPI_FUNC(RTYPE) RTYPE Python/getargs.c PyAPI_FUNC(RTYPE) RTYPE +Objects/stringlib/unicode_format.h Py_LOCAL_INLINE(type) static inline type + +# implied include of pymacro.h +*/clinic/*.c.h PyDoc_VAR(name) static const char name[] +*/clinic/*.c.h PyDoc_STR(str) str +*/clinic/*.c.h PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str) # implied include of exports.h #Modules/_io/bytesio.c Py_EXPORTED_SYMBOL /* */ @@ -212,6 +249,11 @@ def clean_lines(text): Modules/expat/xmlparse.c XML_POOR_ENTROPY 1 Modules/_dbmmodule.c HAVE_GDBM_DASH_NDBM_H 1 +# others +Modules/sre_lib.h LOCAL(type) static inline type +Modules/sre_lib.h SRE(F) sre_ucs2_##F +Objects/stringlib/codecs.h STRINGLIB_IS_UNICODE 1 + # @end=tsv@ ''')[1:]

1 0

bpo-42208: Fix test_gdb for gc_collect_main() name (GH-23041)
by vstinner 30 Oct '20

30 Oct '20

https://github.com/python/cpython/commit/b9ee4af4c643a323779fd7076e80b29d61… commit: b9ee4af4c643a323779fd7076e80b29d611f2709 branch: master author: Victor Stinner <vstinner(a)python.org> committer: vstinner <vstinner(a)python.org> date: 2020-10-30T21:09:48+01:00 summary: bpo-42208: Fix test_gdb for gc_collect_main() name (GH-23041) The gcmodule.c collect() function was renamed to gc_collect_main(): update gdb/libpython.py (python-gdb.py). files: M Tools/gdb/libpython.py diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index 33bf5ac821fff..83a5fa93cf447 100755 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -1605,8 +1605,8 @@ def is_waiting_for_gil(self): return (name == 'take_gil') def is_gc_collect(self): - '''Is this frame "collect" within the garbage-collector?''' - return self._gdbframe.name() == 'collect' + '''Is this frame gc_collect_main() within the garbage-collector?''' + return self._gdbframe.name() in ('collect', 'gc_collect_main') def get_pyop(self): try:

1 0

bpo-42208: Move _PyImport_Cleanup() to pylifecycle.c (GH-23040)
by vstinner 30 Oct '20

30 Oct '20

https://github.com/python/cpython/commit/dff1ad509051f7e07e77d1e3ec83314d53… commit: dff1ad509051f7e07e77d1e3ec83314d53fb1118 branch: master author: Victor Stinner <vstinner(a)python.org> committer: vstinner <vstinner(a)python.org> date: 2020-10-30T18:03:28+01:00 summary: bpo-42208: Move _PyImport_Cleanup() to pylifecycle.c (GH-23040) Move _PyImport_Cleanup() to pylifecycle.c, rename it to finalize_modules(), split it (200 lines) into many smaller sub-functions and cleanup the code. files: M Python/import.c M Python/pylifecycle.c diff --git a/Python/import.c b/Python/import.c index 8b9cc3066fc4a..77e6baef011e3 100644 --- a/Python/import.c +++ b/Python/import.c @@ -406,233 +406,6 @@ import_ensure_initialized(PyThreadState *tstate, PyObject *mod, PyObject *name) } -/* List of names to clear in sys */ -static const char * const sys_deletes[] = { - "path", "argv", "ps1", "ps2", - "last_type", "last_value", "last_traceback", - "path_hooks", "path_importer_cache", "meta_path", - "__interactivehook__", - NULL -}; - -static const char * const sys_files[] = { - "stdin", "__stdin__", - "stdout", "__stdout__", - "stderr", "__stderr__", - NULL -}; - -/* Un-initialize things, as good as we can */ - -void -_PyImport_Cleanup(PyThreadState *tstate) -{ - PyInterpreterState *interp = tstate->interp; - PyObject *modules = interp->modules; - if (modules == NULL) { - /* Already done */ - return; - } - - /* Delete some special variables first. These are common - places where user values hide and people complain when their - destructors fail. Since the modules containing them are - deleted *last* of all, they would come too late in the normal - destruction order. Sigh. */ - - /* XXX Perhaps these precautions are obsolete. Who knows? */ - - int verbose = _PyInterpreterState_GetConfig(interp)->verbose; - if (verbose) { - PySys_WriteStderr("# clear builtins._\n"); - } - if (PyDict_SetItemString(interp->builtins, "_", Py_None) < 0) { - PyErr_WriteUnraisable(NULL); - } - - const char * const *p; - for (p = sys_deletes; *p != NULL; p++) { - if (verbose) { - PySys_WriteStderr("# clear sys.%s\n", *p); - } - if (PyDict_SetItemString(interp->sysdict, *p, Py_None) < 0) { - PyErr_WriteUnraisable(NULL); - } - } - for (p = sys_files; *p != NULL; p+=2) { - if (verbose) { - PySys_WriteStderr("# restore sys.%s\n", *p); - } - PyObject *value = _PyDict_GetItemStringWithError(interp->sysdict, - *(p+1)); - if (value == NULL) { - if (_PyErr_Occurred(tstate)) { - PyErr_WriteUnraisable(NULL); - } - value = Py_None; - } - if (PyDict_SetItemString(interp->sysdict, *p, value) < 0) { - PyErr_WriteUnraisable(NULL); - } - } - - /* We prepare a list which will receive (name, weakref) tuples of - modules when they are removed from sys.modules. The name is used - for diagnosis messages (in verbose mode), while the weakref helps - detect those modules which have been held alive. */ - PyObject *weaklist = PyList_New(0); - if (weaklist == NULL) { - PyErr_WriteUnraisable(NULL); - } - -#define STORE_MODULE_WEAKREF(name, mod) \ - if (weaklist != NULL) { \ - PyObject *wr = PyWeakref_NewRef(mod, NULL); \ - if (wr) { \ - PyObject *tup = PyTuple_Pack(2, name, wr); \ - if (!tup || PyList_Append(weaklist, tup) < 0) { \ - PyErr_WriteUnraisable(NULL); \ - } \ - Py_XDECREF(tup); \ - Py_DECREF(wr); \ - } \ - else { \ - PyErr_WriteUnraisable(NULL); \ - } \ - } -#define CLEAR_MODULE(name, mod) \ - if (PyModule_Check(mod)) { \ - if (verbose && PyUnicode_Check(name)) { \ - PySys_FormatStderr("# cleanup[2] removing %U\n", name); \ - } \ - STORE_MODULE_WEAKREF(name, mod); \ - if (PyObject_SetItem(modules, name, Py_None) < 0) { \ - PyErr_WriteUnraisable(NULL); \ - } \ - } - - /* Remove all modules from sys.modules, hoping that garbage collection - can reclaim most of them. */ - if (PyDict_CheckExact(modules)) { - Py_ssize_t pos = 0; - PyObject *key, *value; - while (PyDict_Next(modules, &pos, &key, &value)) { - CLEAR_MODULE(key, value); - } - } - else { - PyObject *iterator = PyObject_GetIter(modules); - if (iterator == NULL) { - PyErr_WriteUnraisable(NULL); - } - else { - PyObject *key; - while ((key = PyIter_Next(iterator))) { - PyObject *value = PyObject_GetItem(modules, key); - if (value == NULL) { - PyErr_WriteUnraisable(NULL); - continue; - } - CLEAR_MODULE(key, value); - Py_DECREF(value); - Py_DECREF(key); - } - if (PyErr_Occurred()) { - PyErr_WriteUnraisable(NULL); - } - Py_DECREF(iterator); - } - } - - /* Clear the modules dict. */ - if (PyDict_CheckExact(modules)) { - PyDict_Clear(modules); - } - else { - _Py_IDENTIFIER(clear); - if (_PyObject_CallMethodIdNoArgs(modules, &PyId_clear) == NULL) { - PyErr_WriteUnraisable(NULL); - } - } - /* Restore the original builtins dict, to ensure that any - user data gets cleared. */ - PyObject *dict = PyDict_Copy(interp->builtins); - if (dict == NULL) { - PyErr_WriteUnraisable(NULL); - } - PyDict_Clear(interp->builtins); - if (PyDict_Update(interp->builtins, interp->builtins_copy)) { - _PyErr_Clear(tstate); - } - Py_XDECREF(dict); - /* Collect references */ - _PyGC_CollectNoFail(tstate); - /* Dump GC stats before it's too late, since it uses the warnings - machinery. */ - _PyGC_DumpShutdownStats(tstate); - - /* Now, if there are any modules left alive, clear their globals to - minimize potential leaks. All C extension modules actually end - up here, since they are kept alive in the interpreter state. - - The special treatment of "builtins" here is because even - when it's not referenced as a module, its dictionary is - referenced by almost every module's __builtins__. Since - deleting a module clears its dictionary (even if there are - references left to it), we need to delete the "builtins" - module last. Likewise, we don't delete sys until the very - end because it is implicitly referenced (e.g. by print). */ - if (weaklist != NULL) { - Py_ssize_t i; - /* Since dict is ordered in CPython 3.6+, modules are saved in - importing order. First clear modules imported later. */ - for (i = PyList_GET_SIZE(weaklist) - 1; i >= 0; i--) { - PyObject *tup = PyList_GET_ITEM(weaklist, i); - PyObject *name = PyTuple_GET_ITEM(tup, 0); - PyObject *mod = PyWeakref_GET_OBJECT(PyTuple_GET_ITEM(tup, 1)); - if (mod == Py_None) - continue; - assert(PyModule_Check(mod)); - dict = PyModule_GetDict(mod); - if (dict == interp->builtins || dict == interp->sysdict) - continue; - Py_INCREF(mod); - if (verbose && PyUnicode_Check(name)) { - PySys_FormatStderr("# cleanup[3] wiping %U\n", name); - } - _PyModule_Clear(mod); - Py_DECREF(mod); - } - Py_DECREF(weaklist); - } - - /* Next, delete sys and builtins (in that order) */ - if (verbose) { - PySys_FormatStderr("# cleanup[3] wiping sys\n"); - } - _PyModule_ClearDict(interp->sysdict); - if (verbose) { - PySys_FormatStderr("# cleanup[3] wiping builtins\n"); - } - _PyModule_ClearDict(interp->builtins); - - /* Clear module dict copies stored in the interpreter state */ - _PyInterpreterState_ClearModules(interp); - - /* Clear and delete the modules directory. Actual modules will - still be there only if imported during the execution of some - destructor. */ - interp->modules = NULL; - Py_DECREF(modules); - - /* Once more */ - _PyGC_CollectNoFail(tstate); - -#undef CLEAR_MODULE -#undef STORE_MODULE_WEAKREF -} - - /* Helper for pythonrun.c -- return magic number and tag. */ long diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 71834f63f2a78..adef1617f6132 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -8,7 +8,6 @@ #include "pycore_ceval.h" // _PyEval_FiniGIL() #include "pycore_context.h" // _PyContext_Init() #include "pycore_fileutils.h" // _Py_ResetForceASCII() -#include "pycore_import.h" // _PyImport_Cleanup() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_object.h" // _PyDebug_PrintTotalRefs() #include "pycore_pathconfig.h" // _PyConfig_WritePathConfig() @@ -1192,6 +1191,292 @@ Py_Initialize(void) } +static void +finalize_modules_delete_special(PyThreadState *tstate, int verbose) +{ + // List of names to clear in sys + static const char * const sys_deletes[] = { + "path", "argv", "ps1", "ps2", + "last_type", "last_value", "last_traceback", + "path_hooks", "path_importer_cache", "meta_path", + "__interactivehook__", + NULL + }; + + static const char * const sys_files[] = { + "stdin", "__stdin__", + "stdout", "__stdout__", + "stderr", "__stderr__", + NULL + }; + + PyInterpreterState *interp = tstate->interp; + if (verbose) { + PySys_WriteStderr("# clear builtins._\n"); + } + if (PyDict_SetItemString(interp->builtins, "_", Py_None) < 0) { + PyErr_WriteUnraisable(NULL); + } + + const char * const *p; + for (p = sys_deletes; *p != NULL; p++) { + if (verbose) { + PySys_WriteStderr("# clear sys.%s\n", *p); + } + if (PyDict_SetItemString(interp->sysdict, *p, Py_None) < 0) { + PyErr_WriteUnraisable(NULL); + } + } + for (p = sys_files; *p != NULL; p+=2) { + const char *name = p[0]; + const char *orig_name = p[1]; + if (verbose) { + PySys_WriteStderr("# restore sys.%s\n", name); + } + PyObject *value = _PyDict_GetItemStringWithError(interp->sysdict, + orig_name); + if (value == NULL) { + if (_PyErr_Occurred(tstate)) { + PyErr_WriteUnraisable(NULL); + } + value = Py_None; + } + if (PyDict_SetItemString(interp->sysdict, name, value) < 0) { + PyErr_WriteUnraisable(NULL); + } + } +} + + +static PyObject* +finalize_remove_modules(PyObject *modules, int verbose) +{ + PyObject *weaklist = PyList_New(0); + if (weaklist == NULL) { + PyErr_WriteUnraisable(NULL); + } + +#define STORE_MODULE_WEAKREF(name, mod) \ + if (weaklist != NULL) { \ + PyObject *wr = PyWeakref_NewRef(mod, NULL); \ + if (wr) { \ + PyObject *tup = PyTuple_Pack(2, name, wr); \ + if (!tup || PyList_Append(weaklist, tup) < 0) { \ + PyErr_WriteUnraisable(NULL); \ + } \ + Py_XDECREF(tup); \ + Py_DECREF(wr); \ + } \ + else { \ + PyErr_WriteUnraisable(NULL); \ + } \ + } + +#define CLEAR_MODULE(name, mod) \ + if (PyModule_Check(mod)) { \ + if (verbose && PyUnicode_Check(name)) { \ + PySys_FormatStderr("# cleanup[2] removing %U\n", name); \ + } \ + STORE_MODULE_WEAKREF(name, mod); \ + if (PyObject_SetItem(modules, name, Py_None) < 0) { \ + PyErr_WriteUnraisable(NULL); \ + } \ + } + + if (PyDict_CheckExact(modules)) { + Py_ssize_t pos = 0; + PyObject *key, *value; + while (PyDict_Next(modules, &pos, &key, &value)) { + CLEAR_MODULE(key, value); + } + } + else { + PyObject *iterator = PyObject_GetIter(modules); + if (iterator == NULL) { + PyErr_WriteUnraisable(NULL); + } + else { + PyObject *key; + while ((key = PyIter_Next(iterator))) { + PyObject *value = PyObject_GetItem(modules, key); + if (value == NULL) { + PyErr_WriteUnraisable(NULL); + continue; + } + CLEAR_MODULE(key, value); + Py_DECREF(value); + Py_DECREF(key); + } + if (PyErr_Occurred()) { + PyErr_WriteUnraisable(NULL); + } + Py_DECREF(iterator); + } + } +#undef CLEAR_MODULE +#undef STORE_MODULE_WEAKREF + + return weaklist; +} + + +static void +finalize_clear_modules_dict(PyObject *modules) +{ + if (PyDict_CheckExact(modules)) { + PyDict_Clear(modules); + } + else { + _Py_IDENTIFIER(clear); + if (_PyObject_CallMethodIdNoArgs(modules, &PyId_clear) == NULL) { + PyErr_WriteUnraisable(NULL); + } + } +} + + +static void +finalize_restore_builtins(PyThreadState *tstate) +{ + PyInterpreterState *interp = tstate->interp; + PyObject *dict = PyDict_Copy(interp->builtins); + if (dict == NULL) { + PyErr_WriteUnraisable(NULL); + } + PyDict_Clear(interp->builtins); + if (PyDict_Update(interp->builtins, interp->builtins_copy)) { + _PyErr_Clear(tstate); + } + Py_XDECREF(dict); +} + + +static void +finalize_modules_clear_weaklist(PyInterpreterState *interp, + PyObject *weaklist, int verbose) +{ + // First clear modules imported later + for (Py_ssize_t i = PyList_GET_SIZE(weaklist) - 1; i >= 0; i--) { + PyObject *tup = PyList_GET_ITEM(weaklist, i); + PyObject *name = PyTuple_GET_ITEM(tup, 0); + PyObject *mod = PyWeakref_GET_OBJECT(PyTuple_GET_ITEM(tup, 1)); + if (mod == Py_None) { + continue; + } + assert(PyModule_Check(mod)); + PyObject *dict = PyModule_GetDict(mod); + if (dict == interp->builtins || dict == interp->sysdict) { + continue; + } + Py_INCREF(mod); + if (verbose && PyUnicode_Check(name)) { + PySys_FormatStderr("# cleanup[3] wiping %U\n", name); + } + _PyModule_Clear(mod); + Py_DECREF(mod); + } +} + + +static void +finalize_clear_sys_builtins_dict(PyInterpreterState *interp, int verbose) +{ + // Clear sys dict + if (verbose) { + PySys_FormatStderr("# cleanup[3] wiping sys\n"); + } + _PyModule_ClearDict(interp->sysdict); + + // Clear builtins dict + if (verbose) { + PySys_FormatStderr("# cleanup[3] wiping builtins\n"); + } + _PyModule_ClearDict(interp->builtins); +} + + +/* Clear modules, as good as we can */ +static void +finalize_modules(PyThreadState *tstate) +{ + PyInterpreterState *interp = tstate->interp; + PyObject *modules = interp->modules; + if (modules == NULL) { + // Already done + return; + } + int verbose = _PyInterpreterState_GetConfig(interp)->verbose; + + // Delete some special builtins._ and sys attributes first. These are + // common places where user values hide and people complain when their + // destructors fail. Since the modules containing them are + // deleted *last* of all, they would come too late in the normal + // destruction order. Sigh. + // + // XXX Perhaps these precautions are obsolete. Who knows? + finalize_modules_delete_special(tstate, verbose); + + // Remove all modules from sys.modules, hoping that garbage collection + // can reclaim most of them: set all sys.modules values to None. + // + // We prepare a list which will receive (name, weakref) tuples of + // modules when they are removed from sys.modules. The name is used + // for diagnosis messages (in verbose mode), while the weakref helps + // detect those modules which have been held alive. + PyObject *weaklist = finalize_remove_modules(modules, verbose); + + // Clear the modules dict + finalize_clear_modules_dict(modules); + + // Restore the original builtins dict, to ensure that any + // user data gets cleared. + finalize_restore_builtins(tstate); + + // Collect garbage + _PyGC_CollectNoFail(tstate); + + // Dump GC stats before it's too late, since it uses the warnings + // machinery. + _PyGC_DumpShutdownStats(tstate); + + if (weaklist != NULL) { + // Now, if there are any modules left alive, clear their globals to + // minimize potential leaks. All C extension modules actually end + // up here, since they are kept alive in the interpreter state. + // + // The special treatment of "builtins" here is because even + // when it's not referenced as a module, its dictionary is + // referenced by almost every module's __builtins__. Since + // deleting a module clears its dictionary (even if there are + // references left to it), we need to delete the "builtins" + // module last. Likewise, we don't delete sys until the very + // end because it is implicitly referenced (e.g. by print). + // + // Since dict is ordered in CPython 3.6+, modules are saved in + // importing order. First clear modules imported later. + finalize_modules_clear_weaklist(interp, weaklist, verbose); + Py_DECREF(weaklist); + } + + // Clear sys and builtins modules dict + finalize_clear_sys_builtins_dict(interp, verbose); + + // Clear module dict copies stored in the interpreter state: + // clear PyInterpreterState.modules_by_index and + // clear PyModuleDef.m_base.m_copy (of extensions not using the multi-phase + // initialization API) + _PyInterpreterState_ClearModules(interp); + + // Clear and delete the modules directory. Actual modules will + // still be there only if imported during the execution of some + // destructor. + Py_SETREF(interp->modules, NULL); + + // Collect garbage once more + _PyGC_CollectNoFail(tstate); +} + + /* Flush stdout and stderr */ static int @@ -1210,6 +1495,7 @@ file_is_closed(PyObject *fobj) return r > 0; } + static int flush_std_files(void) { @@ -1417,7 +1703,7 @@ Py_FinalizeEx(void) PyGC_Collect(); /* Destroy all modules */ - _PyImport_Cleanup(tstate); + finalize_modules(tstate); /* Print debug stats if any */ _PyEval_Fini(); @@ -1660,7 +1946,8 @@ Py_EndInterpreter(PyThreadState *tstate) Py_FatalError("not the last thread"); } - _PyImport_Cleanup(tstate); + finalize_modules(tstate); + finalize_interp_clear(tstate); finalize_interp_delete(tstate); }

1 0

bpo-42208: Pass tstate to _PyGC_CollectNoFail() (GH-23038)
by vstinner 30 Oct '20

30 Oct '20

https://github.com/python/cpython/commit/8b3414818f5289eac530bf38bcfbd7b2b8… commit: 8b3414818f5289eac530bf38bcfbd7b2b851805c branch: master author: Victor Stinner <vstinner(a)python.org> committer: vstinner <vstinner(a)python.org> date: 2020-10-30T17:00:00+01:00 summary: bpo-42208: Pass tstate to _PyGC_CollectNoFail() (GH-23038) Move private _PyGC_CollectNoFail() to the internal C API. Remove the private _PyGC_CollectIfEnabled() which was just an alias to the public PyGC_Collect() function since Python 3.8. Rename functions: * collect() => gc_collect_main() * collect_with_callback() => gc_collect_with_callback() * collect_generations() => gc_collect_generations() files: M Include/cpython/objimpl.h M Include/internal/pycore_gc.h M Modules/gcmodule.c M Python/import.c M Python/pylifecycle.c diff --git a/Include/cpython/objimpl.h b/Include/cpython/objimpl.h index 15999a239f7a9..d83700e2a4647 100644 --- a/Include/cpython/objimpl.h +++ b/Include/cpython/objimpl.h @@ -79,10 +79,6 @@ PyAPI_FUNC(void) PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator); PyAPI_FUNC(void) PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator); -PyAPI_FUNC(Py_ssize_t) _PyGC_CollectNoFail(void); -PyAPI_FUNC(Py_ssize_t) _PyGC_CollectIfEnabled(void); - - /* Test if an object implements the garbage collector protocol */ PyAPI_FUNC(int) PyObject_IS_GC(PyObject *obj); diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index da202a1df532e..e2d47c90c10d8 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -161,7 +161,9 @@ struct _gc_runtime_state { Py_ssize_t long_lived_pending; }; -PyAPI_FUNC(void) _PyGC_InitState(struct _gc_runtime_state *); +extern void _PyGC_InitState(struct _gc_runtime_state *); + +extern Py_ssize_t _PyGC_CollectNoFail(PyThreadState *tstate); // Functions to clear types free lists diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 8833400caba75..d90ff33684fe8 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -1176,8 +1176,9 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, /* This is the main function. Read this to understand how the * collection process works. */ static Py_ssize_t -collect(PyThreadState *tstate, int generation, - Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, int nofail) +gc_collect_main(PyThreadState *tstate, int generation, + Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, + int nofail) { int i; Py_ssize_t m = 0; /* # objects collected */ @@ -1395,19 +1396,19 @@ invoke_gc_callback(PyThreadState *tstate, const char *phase, * progress callbacks. */ static Py_ssize_t -collect_with_callback(PyThreadState *tstate, int generation) +gc_collect_with_callback(PyThreadState *tstate, int generation) { assert(!_PyErr_Occurred(tstate)); Py_ssize_t result, collected, uncollectable; invoke_gc_callback(tstate, "start", generation, 0, 0); - result = collect(tstate, generation, &collected, &uncollectable, 0); + result = gc_collect_main(tstate, generation, &collected, &uncollectable, 0); invoke_gc_callback(tstate, "stop", generation, collected, uncollectable); assert(!_PyErr_Occurred(tstate)); return result; } static Py_ssize_t -collect_generations(PyThreadState *tstate) +gc_collect_generations(PyThreadState *tstate) { GCState *gcstate = &tstate->interp->gc; /* Find the oldest generation (highest numbered) where the count @@ -1455,7 +1456,7 @@ collect_generations(PyThreadState *tstate) if (i == NUM_GENERATIONS - 1 && gcstate->long_lived_pending < gcstate->long_lived_total / 4) continue; - n = collect_with_callback(tstate, i); + n = gc_collect_with_callback(tstate, i); break; } } @@ -1541,7 +1542,7 @@ gc_collect_impl(PyObject *module, int generation) } else { gcstate->collecting = 1; - n = collect_with_callback(tstate, generation); + n = gc_collect_with_callback(tstate, generation); gcstate->collecting = 0; } return n; @@ -2041,7 +2042,7 @@ PyInit_gc(void) return m; } -/* API to invoke gc.collect() from C */ +/* Public API to invoke gc.collect() from C */ Py_ssize_t PyGC_Collect(void) { @@ -2061,7 +2062,7 @@ PyGC_Collect(void) PyObject *exc, *value, *tb; gcstate->collecting = 1; _PyErr_Fetch(tstate, &exc, &value, &tb); - n = collect_with_callback(tstate, NUM_GENERATIONS - 1); + n = gc_collect_with_callback(tstate, NUM_GENERATIONS - 1); _PyErr_Restore(tstate, exc, value, tb); gcstate->collecting = 0; } @@ -2070,19 +2071,11 @@ PyGC_Collect(void) } Py_ssize_t -_PyGC_CollectIfEnabled(void) +_PyGC_CollectNoFail(PyThreadState *tstate) { - return PyGC_Collect(); -} - -Py_ssize_t -_PyGC_CollectNoFail(void) -{ - PyThreadState *tstate = _PyThreadState_GET(); assert(!_PyErr_Occurred(tstate)); GCState *gcstate = &tstate->interp->gc; - Py_ssize_t n; /* Ideally, this function is only called on interpreter shutdown, and therefore not recursively. Unfortunately, when there are daemon @@ -2091,13 +2084,13 @@ _PyGC_CollectNoFail(void) See http://bugs.python.org/issue8713#msg195178 for an example. */ if (gcstate->collecting) { - n = 0; - } - else { - gcstate->collecting = 1; - n = collect(tstate, NUM_GENERATIONS - 1, NULL, NULL, 1); - gcstate->collecting = 0; + return 0; } + + Py_ssize_t n; + gcstate->collecting = 1; + n = gc_collect_main(tstate, NUM_GENERATIONS - 1, NULL, NULL, 1); + gcstate->collecting = 0; return n; } @@ -2240,7 +2233,7 @@ _PyObject_GC_Alloc(int use_calloc, size_t basicsize) !_PyErr_Occurred(tstate)) { gcstate->collecting = 1; - collect_generations(tstate); + gc_collect_generations(tstate); gcstate->collecting = 0; } PyObject *op = FROM_GC(g); diff --git a/Python/import.c b/Python/import.c index b79bda058db82..8b9cc3066fc4a 100644 --- a/Python/import.c +++ b/Python/import.c @@ -566,7 +566,7 @@ _PyImport_Cleanup(PyThreadState *tstate) } Py_XDECREF(dict); /* Collect references */ - _PyGC_CollectNoFail(); + _PyGC_CollectNoFail(tstate); /* Dump GC stats before it's too late, since it uses the warnings machinery. */ _PyGC_DumpShutdownStats(tstate); @@ -626,7 +626,7 @@ _PyImport_Cleanup(PyThreadState *tstate) Py_DECREF(modules); /* Once more */ - _PyGC_CollectNoFail(); + _PyGC_CollectNoFail(tstate); #undef CLEAR_MODULE #undef STORE_MODULE_WEAKREF diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 774a4f9de08e0..71834f63f2a78 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1293,7 +1293,7 @@ finalize_interp_clear(PyThreadState *tstate) PyInterpreterState_Clear(tstate->interp); /* Last explicit GC collection */ - _PyGC_CollectNoFail(); + _PyGC_CollectNoFail(tstate); /* Clear all loghooks */ /* Both _PySys_Audit function and users still need PyObject, such as tuple. @@ -1414,7 +1414,7 @@ Py_FinalizeEx(void) * XXX but I'm unclear on exactly how that one happens. In any case, * XXX I haven't seen a real-life report of either of these. */ - _PyGC_CollectIfEnabled(); + PyGC_Collect(); /* Destroy all modules */ _PyImport_Cleanup(tstate);

1 0