[Python-checkins] bpo-46541: Discover the global strings. (gh-31346)

ericsnowcurrently webhook-mailer at python.org
Mon Feb 14 19:37:00 EST 2022


https://github.com/python/cpython/commit/12360aa159c42c7798fd14225d271e6fd84db7eb
commit: 12360aa159c42c7798fd14225d271e6fd84db7eb
branch: main
author: Eric Snow <ericsnowcurrently at gmail.com>
committer: ericsnowcurrently <ericsnowcurrently at gmail.com>
date: 2022-02-14T17:36:51-07:00
summary:

bpo-46541: Discover the global strings. (gh-31346)

Instead of manually enumerating the global strings in generate_global_objects.py, we extrapolate the list from usage of _Py_ID() and _Py_STR() in the source files.

This is partly inspired by gh-31261.

https://bugs.python.org/issue46541

files:
M Include/internal/pycore_global_strings.h
M Include/internal/pycore_runtime_init.h
M Objects/typeobject.c
M Objects/weakrefobject.c
M Python/_warnings.c
M Python/ast_opt.c
M Python/compile.c
M Python/pythonrun.c
M Tools/scripts/generate_global_objects.py

diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index 17241b3a3dd16..aa597bc8281a5 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -28,13 +28,6 @@ extern "C" {
 /* The following is auto-generated by Tools/scripts/generate_global_objects.py. */
 struct _Py_global_strings {
     struct {
-        STRUCT_FOR_STR(empty, "")
-        STRUCT_FOR_STR(dot, ".")
-        STRUCT_FOR_STR(comma_sep, ", ")
-        STRUCT_FOR_STR(percent, "%")
-        STRUCT_FOR_STR(dbl_percent, "%%")
-
-        // "anonymous" labels
         STRUCT_FOR_STR(anon_dictcomp, "<dictcomp>")
         STRUCT_FOR_STR(anon_genexpr, "<genexpr>")
         STRUCT_FOR_STR(anon_lambda, "<lambda>")
@@ -42,7 +35,12 @@ struct _Py_global_strings {
         STRUCT_FOR_STR(anon_module, "<module>")
         STRUCT_FOR_STR(anon_setcomp, "<setcomp>")
         STRUCT_FOR_STR(anon_string, "<string>")
+        STRUCT_FOR_STR(comma_sep, ", ")
+        STRUCT_FOR_STR(dbl_percent, "%%")
+        STRUCT_FOR_STR(dot, ".")
         STRUCT_FOR_STR(dot_locals, ".<locals>")
+        STRUCT_FOR_STR(empty, "")
+        STRUCT_FOR_STR(percent, "%")
     } literals;
 
     struct {
@@ -330,6 +328,7 @@ struct _Py_global_strings {
 #define _Py_STR(NAME) \
      (_Py_SINGLETON(strings.literals._ ## NAME._ascii.ob_base))
 
+#define _Py_DECLARE_STR(name, str)
 
 #ifdef __cplusplus
 }
diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h
index 045ae5d2835b1..04c1e671235ea 100644
--- a/Include/internal/pycore_runtime_init.h
+++ b/Include/internal/pycore_runtime_init.h
@@ -644,12 +644,6 @@ extern "C" {
         \
         .strings = { \
             .literals = { \
-                INIT_STR(empty, ""), \
-                INIT_STR(dot, "."), \
-                INIT_STR(comma_sep, ", "), \
-                INIT_STR(percent, "%"), \
-                INIT_STR(dbl_percent, "%%"), \
-                \
                 INIT_STR(anon_dictcomp, "<dictcomp>"), \
                 INIT_STR(anon_genexpr, "<genexpr>"), \
                 INIT_STR(anon_lambda, "<lambda>"), \
@@ -657,7 +651,12 @@ extern "C" {
                 INIT_STR(anon_module, "<module>"), \
                 INIT_STR(anon_setcomp, "<setcomp>"), \
                 INIT_STR(anon_string, "<string>"), \
+                INIT_STR(comma_sep, ", "), \
+                INIT_STR(dbl_percent, "%%"), \
+                INIT_STR(dot, "."), \
                 INIT_STR(dot_locals, ".<locals>"), \
+                INIT_STR(empty, ""), \
+                INIT_STR(percent, "%"), \
             }, \
             .identifiers = { \
                 INIT_ID(Py_Repr), \
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 3f8f36a9c4648..8c4901119de7d 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -4546,6 +4546,7 @@ object_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
             Py_DECREF(sorted_methods);
             return NULL;
         }
+        _Py_DECLARE_STR(comma_sep, ", ");
         joined = PyUnicode_Join(&_Py_STR(comma_sep), sorted_methods);
         method_count = PyObject_Length(sorted_methods);
         Py_DECREF(sorted_methods);
diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c
index 71dfa640ebf57..1712533a39d80 100644
--- a/Objects/weakrefobject.c
+++ b/Objects/weakrefobject.c
@@ -458,12 +458,12 @@ proxy_checkref(PyWeakReference *proxy)
         return res; \
     }
 
-#define WRAP_METHOD(method, special) \
+#define WRAP_METHOD(method, SPECIAL) \
     static PyObject * \
     method(PyObject *proxy, PyObject *Py_UNUSED(ignored)) { \
             UNWRAP(proxy); \
             Py_INCREF(proxy); \
-            PyObject* res = PyObject_CallMethodNoArgs(proxy, &_Py_ID(special)); \
+            PyObject* res = PyObject_CallMethodNoArgs(proxy, &_Py_ID(SPECIAL)); \
             Py_DECREF(proxy); \
             return res; \
         }
diff --git a/Python/_warnings.c b/Python/_warnings.c
index a47e5fef6865f..03e6ffcee0ac2 100644
--- a/Python/_warnings.c
+++ b/Python/_warnings.c
@@ -186,8 +186,8 @@ check_matched(PyInterpreterState *interp, PyObject *obj, PyObject *arg)
     return rc;
 }
 
-#define GET_WARNINGS_ATTR(interp, attr, try_import) \
-    get_warnings_attr(interp, &_Py_ID(attr), try_import)
+#define GET_WARNINGS_ATTR(interp, ATTR, try_import) \
+    get_warnings_attr(interp, &_Py_ID(ATTR), try_import)
 
 /*
    Returns a new reference.
diff --git a/Python/ast_opt.c b/Python/ast_opt.c
index 2911370649790..77ed29d0cdddd 100644
--- a/Python/ast_opt.c
+++ b/Python/ast_opt.c
@@ -268,6 +268,8 @@ parse_literal(PyObject *fmt, Py_ssize_t *ppos, PyArena *arena)
     PyObject *str = PyUnicode_Substring(fmt, start, pos);
     /* str = str.replace('%%', '%') */
     if (str && has_percents) {
+        _Py_DECLARE_STR(percent, "%");
+        _Py_DECLARE_STR(dbl_percent, "%%");
         Py_SETREF(str, PyUnicode_Replace(str, &_Py_STR(dbl_percent),
                                          &_Py_STR(percent), -1));
     }
diff --git a/Python/compile.c b/Python/compile.c
index ac4960b5df320..1cf20d3a36ac1 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -667,6 +667,7 @@ compiler_set_qualname(struct compiler *c)
                 || parent->u_scope_type == COMPILER_SCOPE_ASYNC_FUNCTION
                 || parent->u_scope_type == COMPILER_SCOPE_LAMBDA)
             {
+                _Py_DECLARE_STR(dot_locals, ".<locals>");
                 base = PyUnicode_Concat(parent->u_qualname,
                                         &_Py_STR(dot_locals));
                 if (base == NULL)
@@ -2022,6 +2023,7 @@ compiler_mod(struct compiler *c, mod_ty mod)
 {
     PyCodeObject *co;
     int addNone = 1;
+    _Py_DECLARE_STR(anon_module, "<module>");
     if (!compiler_enter_scope(c, &_Py_STR(anon_module), COMPILER_SCOPE_MODULE,
                               mod, 1)) {
         return NULL;
@@ -2876,6 +2878,7 @@ compiler_lambda(struct compiler *c, expr_ty e)
         return 0;
     }
 
+    _Py_DECLARE_STR(anon_lambda, "<lambda>");
     if (!compiler_enter_scope(c, &_Py_STR(anon_lambda), COMPILER_SCOPE_LAMBDA,
                               (void *)e, e->lineno)) {
         return 0;
@@ -5347,6 +5350,7 @@ static int
 compiler_genexp(struct compiler *c, expr_ty e)
 {
     assert(e->kind == GeneratorExp_kind);
+    _Py_DECLARE_STR(anon_genexpr, "<genexpr>");
     return compiler_comprehension(c, e, COMP_GENEXP, &_Py_STR(anon_genexpr),
                                   e->v.GeneratorExp.generators,
                                   e->v.GeneratorExp.elt, NULL);
@@ -5356,6 +5360,7 @@ static int
 compiler_listcomp(struct compiler *c, expr_ty e)
 {
     assert(e->kind == ListComp_kind);
+    _Py_DECLARE_STR(anon_listcomp, "<listcomp>");
     return compiler_comprehension(c, e, COMP_LISTCOMP, &_Py_STR(anon_listcomp),
                                   e->v.ListComp.generators,
                                   e->v.ListComp.elt, NULL);
@@ -5365,6 +5370,7 @@ static int
 compiler_setcomp(struct compiler *c, expr_ty e)
 {
     assert(e->kind == SetComp_kind);
+    _Py_DECLARE_STR(anon_setcomp, "<setcomp>");
     return compiler_comprehension(c, e, COMP_SETCOMP, &_Py_STR(anon_setcomp),
                                   e->v.SetComp.generators,
                                   e->v.SetComp.elt, NULL);
@@ -5375,6 +5381,7 @@ static int
 compiler_dictcomp(struct compiler *c, expr_ty e)
 {
     assert(e->kind == DictComp_kind);
+    _Py_DECLARE_STR(anon_dictcomp, "<dictcomp>");
     return compiler_comprehension(c, e, COMP_DICTCOMP, &_Py_STR(anon_dictcomp),
                                   e->v.DictComp.generators,
                                   e->v.DictComp.key, e->v.DictComp.value);
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index b34a22391822b..38ca952838a1f 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -515,6 +515,7 @@ parse_syntax_error(PyObject *err, PyObject **message, PyObject **filename,
         goto finally;
     if (v == Py_None) {
         Py_DECREF(v);
+        _Py_DECLARE_STR(anon_string, "<string>");
         *filename = &_Py_STR(anon_string);
         Py_INCREF(*filename);
     }
@@ -1562,6 +1563,7 @@ PyRun_StringFlags(const char *str, int start, PyObject *globals,
     if (arena == NULL)
         return NULL;
 
+    _Py_DECLARE_STR(anon_string, "<string>");
     mod = _PyParser_ASTFromString(
             str, &_Py_STR(anon_string), start, flags, arena);
 
diff --git a/Tools/scripts/generate_global_objects.py b/Tools/scripts/generate_global_objects.py
index 73068894d974b..e989f3c086f34 100644
--- a/Tools/scripts/generate_global_objects.py
+++ b/Tools/scripts/generate_global_objects.py
@@ -13,298 +13,112 @@
 STRING_LITERALS = {
     'empty': '',
     'dot': '.',
-    'comma_sep': ', ',
-    'percent': '%',
-    'dbl_percent': '%%',
-
-    '"anonymous" labels': None,
-    'anon_dictcomp': '<dictcomp>',
-    'anon_genexpr': '<genexpr>',
-    'anon_lambda': '<lambda>',
-    'anon_listcomp': '<listcomp>',
-    'anon_module': '<module>',
-    'anon_setcomp': '<setcomp>',
-    'anon_string': '<string>',
-    'dot_locals': '.<locals>',
+}
+IGNORED = {
+    'ACTION',  # Python/_warnings.c
+    'ATTR',  # Python/_warnings.c and Objects/funcobject.c
+    'DUNDER',  # Objects/typeobject.c
+    'RDUNDER',  # Objects/typeobject.c
+    'SPECIAL',  # Objects/weakrefobject.c
 }
 IDENTIFIERS = [
-    'Py_Repr',
-    'TextIOWrapper',
+    # from ADD() Python/_warnings.c
+    'default',
+    'ignore',
+
+    # from GET_WARNINGS_ATTR() in Python/_warnings.c
     'WarningMessage',
-    '_',
-    '__IOBase_closed',
-    '__abc_tpflags__',
+    '_showwarnmsg',
+    '_warn_unawaited_coroutine',
+    'defaultaction',
+    'filters',
+    'onceregistry',
+
+    # from WRAP_METHOD() in Objects/weakrefobject.c
+    '__bytes__',
+    '__reversed__',
+
+    # from COPY_ATTR() in Objects/funcobject.c
+    '__module__',
+    '__name__',
+    '__qualname__',
+    '__doc__',
+    '__annotations__',
+
+    # from SLOT* in Objects/typeobject.c
     '__abs__',
-    '__abstractmethods__',
     '__add__',
-    '__aenter__',
-    '__aexit__',
-    '__aiter__',
-    '__all__',
     '__and__',
-    '__anext__',
-    '__annotations__',
-    '__args__',
-    '__await__',
-    '__bases__',
-    '__bool__',
-    '__build_class__',
-    '__builtins__',
-    '__bytes__',
-    '__call__',
-    '__cantrace__',
-    '__class__',
-    '__class_getitem__',
-    '__classcell__',
-    '__complex__',
-    '__contains__',
-    '__copy__',
-    '__del__',
-    '__delattr__',
-    '__delete__',
-    '__delitem__',
-    '__dict__',
-    '__dir__',
     '__divmod__',
-    '__doc__',
-    '__enter__',
-    '__eq__',
-    '__exit__',
-    '__file__',
     '__float__',
     '__floordiv__',
-    '__format__',
-    '__fspath__',
-    '__ge__',
-    '__get__',
-    '__getattr__',
-    '__getattribute__',
-    '__getinitargs__',
     '__getitem__',
-    '__getnewargs__',
-    '__getnewargs_ex__',
-    '__getstate__',
-    '__gt__',
-    '__hash__',
     '__iadd__',
     '__iand__',
     '__ifloordiv__',
     '__ilshift__',
     '__imatmul__',
     '__imod__',
-    '__import__',
     '__imul__',
-    '__index__',
-    '__init__',
-    '__init_subclass__',
-    '__instancecheck__',
     '__int__',
     '__invert__',
     '__ior__',
-    '__ipow__',
     '__irshift__',
-    '__isabstractmethod__',
     '__isub__',
-    '__iter__',
     '__itruediv__',
     '__ixor__',
-    '__le__',
-    '__len__',
-    '__length_hint__',
-    '__loader__',
     '__lshift__',
-    '__lt__',
-    '__ltrace__',
-    '__main__',
     '__matmul__',
-    '__missing__',
     '__mod__',
-    '__module__',
-    '__mro_entries__',
     '__mul__',
-    '__name__',
-    '__ne__',
     '__neg__',
-    '__new__',
-    '__newobj__',
-    '__newobj_ex__',
-    '__next__',
-    '__note__',
     '__or__',
-    '__origin__',
-    '__package__',
-    '__parameters__',
-    '__path__',
     '__pos__',
     '__pow__',
-    '__prepare__',
-    '__qualname__',
     '__radd__',
     '__rand__',
     '__rdivmod__',
-    '__reduce__',
-    '__reduce_ex__',
-    '__repr__',
-    '__reversed__',
     '__rfloordiv__',
     '__rlshift__',
     '__rmatmul__',
     '__rmod__',
     '__rmul__',
     '__ror__',
-    '__round__',
     '__rpow__',
     '__rrshift__',
     '__rshift__',
     '__rsub__',
     '__rtruediv__',
     '__rxor__',
-    '__set__',
-    '__set_name__',
-    '__setattr__',
-    '__setitem__',
-    '__setstate__',
-    '__sizeof__',
-    '__slotnames__',
-    '__slots__',
-    '__spec__',
     '__str__',
     '__sub__',
-    '__subclasscheck__',
-    '__subclasshook__',
     '__truediv__',
-    '__trunc__',
-    '__warningregistry__',
-    '__weakref__',
     '__xor__',
-    '_abc_impl',
-    '_blksize',
-    '_dealloc_warn',
-    '_finalizing',
-    '_find_and_load',
-    '_fix_up_module',
-    '_get_sourcefile',
-    '_handle_fromlist',
-    '_initializing',
-    '_is_text_encoding',
-    '_lock_unlock_module',
-    '_showwarnmsg',
-    '_shutdown',
-    '_slotnames',
-    '_strptime_time',
-    '_uninitialized_submodules',
-    '_warn_unawaited_coroutine',
-    '_xoptions',
-    'add',
-    'append',
-    'big',
-    'buffer',
-    'builtins',
-    'clear',
-    'close',
-    'code',
-    'copy',
-    'copyreg',
-    'decode',
-    'default',
-    'defaultaction',
-    'difference_update',
-    'dispatch_table',
-    'displayhook',
-    'enable',
-    'encoding',
-    'end_lineno',
-    'end_offset',
-    'errors',
-    'excepthook',
-    'extend',
-    'filename',
-    'fileno',
-    'fillvalue',
-    'filters',
-    'find_class',
-    'flush',
-    'get',
-    'get_source',
-    'getattr',
-    'ignore',
-    'importlib',
-    'intersection',
-    'isatty',
-    'items',
-    'iter',
-    'keys',
-    'last_traceback',
-    'last_type',
-    'last_value',
-    'latin1',
-    'lineno',
-    'little',
-    'match',
-    'metaclass',
-    'mode',
-    'modules',
-    'mro',
-    'msg',
-    'n_fields',
-    'n_sequence_fields',
-    'n_unnamed_fields',
-    'name',
-    'obj',
-    'offset',
-    'onceregistry',
-    'open',
-    'parent',
-    'partial',
-    'path',
-    'peek',
-    'persistent_id',
-    'persistent_load',
-    'print_file_and_line',
-    'ps1',
-    'ps2',
-    'raw',
-    'read',
-    'read1',
-    'readable',
-    'readall',
-    'readinto',
-    'readinto1',
-    'readline',
-    'reducer_override',
-    'reload',
-    'replace',
-    'reset',
-    'return',
-    'reversed',
-    'seek',
-    'seekable',
-    'send',
-    'setstate',
-    'sort',
-    'stderr',
-    'stdin',
-    'stdout',
-    'strict',
-    'symmetric_difference_update',
-    'tell',
-    'text',
-    'threading',
-    'throw',
-    'unraisablehook',
-    'values',
-    'version',
-    'warnings',
-    'warnoptions',
-    'writable',
-    'write',
-    'zipimporter',
 ]
 
 
 #######################################
 # helpers
 
+def iter_global_strings():
+    id_regex = re.compile(r'\b_Py_ID\((\w+)\)')
+    str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)')
+    for dirname, _, files in os.walk(ROOT):
+        if os.path.relpath(dirname, ROOT).startswith('Include'):
+            continue
+        for name in files:
+            if not name.endswith(('.c', '.h')):
+                continue
+            filename = os.path.join(dirname, name)
+            with open(os.path.join(filename), encoding='utf-8') as infile:
+                for lno, line in enumerate(infile, 1):
+                    for m in id_regex.finditer(line):
+                        identifier, = m.groups()
+                        yield identifier, None, filename, lno, line
+                    for m in str_regex.finditer(line):
+                        varname, string = m.groups()
+                        yield varname, string, filename, lno, line
+
 def iter_to_marker(lines, marker):
     for line in lines:
         if line.rstrip() == marker:
@@ -354,7 +168,7 @@ def block(self, prefix, suffix="", *, continuation=None):
 END = '/* End auto-generated code */'
 
 
-def generate_global_strings():
+def generate_global_strings(identifiers, strings):
     filename = os.path.join(INTERNAL, 'pycore_global_strings.h')
 
     # Read the non-generated part of the file.
@@ -371,22 +185,18 @@ def generate_global_strings():
         printer.write(START)
         with printer.block('struct _Py_global_strings', ';'):
             with printer.block('struct', ' literals;'):
-                for name, literal in STRING_LITERALS.items():
-                    if literal is None:
-                        outfile.write('\n')
-                        printer.write(f'// {name}')
-                    else:
-                        printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
+                for name, literal in sorted(strings.items()):
+                    printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
             outfile.write('\n')
             with printer.block('struct', ' identifiers;'):
-                for name in sorted(IDENTIFIERS):
+                for name in sorted(identifiers):
                     assert name.isidentifier(), name
                     printer.write(f'STRUCT_FOR_ID({name})')
         printer.write(END)
         printer.write(after)
 
 
-def generate_runtime_init():
+def generate_runtime_init(identifiers, strings):
     # First get some info from the declarations.
     nsmallposints = None
     nsmallnegints = None
@@ -432,13 +242,10 @@ def generate_runtime_init():
                 # Global strings.
                 with printer.block('.strings =', ','):
                     with printer.block('.literals =', ','):
-                        for name, literal in STRING_LITERALS.items():
-                            if literal is None:
-                                printer.write('')
-                            else:
-                                printer.write(f'INIT_STR({name}, "{literal}"),')
+                        for name, literal in sorted(strings.items()):
+                            printer.write(f'INIT_STR({name}, "{literal}"),')
                     with printer.block('.identifiers =', ','):
-                        for name in sorted(IDENTIFIERS):
+                        for name in sorted(identifiers):
                             assert name.isidentifier(), name
                             printer.write(f'INIT_ID({name}),')
         printer.write(END)
@@ -507,9 +314,9 @@ def err(msg):
      )
 ''', re.VERBOSE)
 
-def check_orphan_strings():
+def check_orphan_strings(identifiers):
     literals = set(n for n, s in STRING_LITERALS.items() if s)
-    identifiers = set(IDENTIFIERS)
+    identifiers = set(identifiers)
     files = glob.iglob(os.path.join(ROOT, '**', '*.[ch]'), recursive=True)
     for i, filename in enumerate(files, start=1):
         print('.', end='')
@@ -586,11 +393,23 @@ def check_orphan_strings():
 # the script
 
 def main(*, check=False) -> None:
-    generate_global_strings()
-    generate_runtime_init()
+    identifiers = set(IDENTIFIERS)
+    strings = dict(STRING_LITERALS)
+    for name, string, filename, lno, _ in iter_global_strings():
+        if string is None:
+            if name not in IGNORED:
+                identifiers.add(name)
+        else:
+            if name not in strings:
+                strings[name] = string
+            elif string != strings[name]:
+                raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}')
+
+    generate_global_strings(identifiers, strings)
+    generate_runtime_init(identifiers, strings)
 
     if check:
-        check_orphan_strings()
+        check_orphan_strings(identifiers)
 
 
 if __name__ == '__main__':



More information about the Python-checkins mailing list