[Python-checkins] bpo-46541: Discover the global strings. (gh-31346)
ericsnowcurrently
webhook-mailer at python.org
Mon Feb 14 19:37:00 EST 2022
https://github.com/python/cpython/commit/12360aa159c42c7798fd14225d271e6fd84db7eb
commit: 12360aa159c42c7798fd14225d271e6fd84db7eb
branch: main
author: Eric Snow <ericsnowcurrently at gmail.com>
committer: ericsnowcurrently <ericsnowcurrently at gmail.com>
date: 2022-02-14T17:36:51-07:00
summary:
bpo-46541: Discover the global strings. (gh-31346)
Instead of manually enumerating the global strings in generate_global_objects.py, we extrapolate the list from usage of _Py_ID() and _Py_STR() in the source files.
This is partly inspired by gh-31261.
https://bugs.python.org/issue46541
files:
M Include/internal/pycore_global_strings.h
M Include/internal/pycore_runtime_init.h
M Objects/typeobject.c
M Objects/weakrefobject.c
M Python/_warnings.c
M Python/ast_opt.c
M Python/compile.c
M Python/pythonrun.c
M Tools/scripts/generate_global_objects.py
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index 17241b3a3dd16..aa597bc8281a5 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -28,13 +28,6 @@ extern "C" {
/* The following is auto-generated by Tools/scripts/generate_global_objects.py. */
struct _Py_global_strings {
struct {
- STRUCT_FOR_STR(empty, "")
- STRUCT_FOR_STR(dot, ".")
- STRUCT_FOR_STR(comma_sep, ", ")
- STRUCT_FOR_STR(percent, "%")
- STRUCT_FOR_STR(dbl_percent, "%%")
-
- // "anonymous" labels
STRUCT_FOR_STR(anon_dictcomp, "<dictcomp>")
STRUCT_FOR_STR(anon_genexpr, "<genexpr>")
STRUCT_FOR_STR(anon_lambda, "<lambda>")
@@ -42,7 +35,12 @@ struct _Py_global_strings {
STRUCT_FOR_STR(anon_module, "<module>")
STRUCT_FOR_STR(anon_setcomp, "<setcomp>")
STRUCT_FOR_STR(anon_string, "<string>")
+ STRUCT_FOR_STR(comma_sep, ", ")
+ STRUCT_FOR_STR(dbl_percent, "%%")
+ STRUCT_FOR_STR(dot, ".")
STRUCT_FOR_STR(dot_locals, ".<locals>")
+ STRUCT_FOR_STR(empty, "")
+ STRUCT_FOR_STR(percent, "%")
} literals;
struct {
@@ -330,6 +328,7 @@ struct _Py_global_strings {
#define _Py_STR(NAME) \
(_Py_SINGLETON(strings.literals._ ## NAME._ascii.ob_base))
+#define _Py_DECLARE_STR(name, str)
#ifdef __cplusplus
}
diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h
index 045ae5d2835b1..04c1e671235ea 100644
--- a/Include/internal/pycore_runtime_init.h
+++ b/Include/internal/pycore_runtime_init.h
@@ -644,12 +644,6 @@ extern "C" {
\
.strings = { \
.literals = { \
- INIT_STR(empty, ""), \
- INIT_STR(dot, "."), \
- INIT_STR(comma_sep, ", "), \
- INIT_STR(percent, "%"), \
- INIT_STR(dbl_percent, "%%"), \
- \
INIT_STR(anon_dictcomp, "<dictcomp>"), \
INIT_STR(anon_genexpr, "<genexpr>"), \
INIT_STR(anon_lambda, "<lambda>"), \
@@ -657,7 +651,12 @@ extern "C" {
INIT_STR(anon_module, "<module>"), \
INIT_STR(anon_setcomp, "<setcomp>"), \
INIT_STR(anon_string, "<string>"), \
+ INIT_STR(comma_sep, ", "), \
+ INIT_STR(dbl_percent, "%%"), \
+ INIT_STR(dot, "."), \
INIT_STR(dot_locals, ".<locals>"), \
+ INIT_STR(empty, ""), \
+ INIT_STR(percent, "%"), \
}, \
.identifiers = { \
INIT_ID(Py_Repr), \
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 3f8f36a9c4648..8c4901119de7d 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -4546,6 +4546,7 @@ object_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Py_DECREF(sorted_methods);
return NULL;
}
+ _Py_DECLARE_STR(comma_sep, ", ");
joined = PyUnicode_Join(&_Py_STR(comma_sep), sorted_methods);
method_count = PyObject_Length(sorted_methods);
Py_DECREF(sorted_methods);
diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c
index 71dfa640ebf57..1712533a39d80 100644
--- a/Objects/weakrefobject.c
+++ b/Objects/weakrefobject.c
@@ -458,12 +458,12 @@ proxy_checkref(PyWeakReference *proxy)
return res; \
}
-#define WRAP_METHOD(method, special) \
+#define WRAP_METHOD(method, SPECIAL) \
static PyObject * \
method(PyObject *proxy, PyObject *Py_UNUSED(ignored)) { \
UNWRAP(proxy); \
Py_INCREF(proxy); \
- PyObject* res = PyObject_CallMethodNoArgs(proxy, &_Py_ID(special)); \
+ PyObject* res = PyObject_CallMethodNoArgs(proxy, &_Py_ID(SPECIAL)); \
Py_DECREF(proxy); \
return res; \
}
diff --git a/Python/_warnings.c b/Python/_warnings.c
index a47e5fef6865f..03e6ffcee0ac2 100644
--- a/Python/_warnings.c
+++ b/Python/_warnings.c
@@ -186,8 +186,8 @@ check_matched(PyInterpreterState *interp, PyObject *obj, PyObject *arg)
return rc;
}
-#define GET_WARNINGS_ATTR(interp, attr, try_import) \
- get_warnings_attr(interp, &_Py_ID(attr), try_import)
+#define GET_WARNINGS_ATTR(interp, ATTR, try_import) \
+ get_warnings_attr(interp, &_Py_ID(ATTR), try_import)
/*
Returns a new reference.
diff --git a/Python/ast_opt.c b/Python/ast_opt.c
index 2911370649790..77ed29d0cdddd 100644
--- a/Python/ast_opt.c
+++ b/Python/ast_opt.c
@@ -268,6 +268,8 @@ parse_literal(PyObject *fmt, Py_ssize_t *ppos, PyArena *arena)
PyObject *str = PyUnicode_Substring(fmt, start, pos);
/* str = str.replace('%%', '%') */
if (str && has_percents) {
+ _Py_DECLARE_STR(percent, "%");
+ _Py_DECLARE_STR(dbl_percent, "%%");
Py_SETREF(str, PyUnicode_Replace(str, &_Py_STR(dbl_percent),
&_Py_STR(percent), -1));
}
diff --git a/Python/compile.c b/Python/compile.c
index ac4960b5df320..1cf20d3a36ac1 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -667,6 +667,7 @@ compiler_set_qualname(struct compiler *c)
|| parent->u_scope_type == COMPILER_SCOPE_ASYNC_FUNCTION
|| parent->u_scope_type == COMPILER_SCOPE_LAMBDA)
{
+ _Py_DECLARE_STR(dot_locals, ".<locals>");
base = PyUnicode_Concat(parent->u_qualname,
&_Py_STR(dot_locals));
if (base == NULL)
@@ -2022,6 +2023,7 @@ compiler_mod(struct compiler *c, mod_ty mod)
{
PyCodeObject *co;
int addNone = 1;
+ _Py_DECLARE_STR(anon_module, "<module>");
if (!compiler_enter_scope(c, &_Py_STR(anon_module), COMPILER_SCOPE_MODULE,
mod, 1)) {
return NULL;
@@ -2876,6 +2878,7 @@ compiler_lambda(struct compiler *c, expr_ty e)
return 0;
}
+ _Py_DECLARE_STR(anon_lambda, "<lambda>");
if (!compiler_enter_scope(c, &_Py_STR(anon_lambda), COMPILER_SCOPE_LAMBDA,
(void *)e, e->lineno)) {
return 0;
@@ -5347,6 +5350,7 @@ static int
compiler_genexp(struct compiler *c, expr_ty e)
{
assert(e->kind == GeneratorExp_kind);
+ _Py_DECLARE_STR(anon_genexpr, "<genexpr>");
return compiler_comprehension(c, e, COMP_GENEXP, &_Py_STR(anon_genexpr),
e->v.GeneratorExp.generators,
e->v.GeneratorExp.elt, NULL);
@@ -5356,6 +5360,7 @@ static int
compiler_listcomp(struct compiler *c, expr_ty e)
{
assert(e->kind == ListComp_kind);
+ _Py_DECLARE_STR(anon_listcomp, "<listcomp>");
return compiler_comprehension(c, e, COMP_LISTCOMP, &_Py_STR(anon_listcomp),
e->v.ListComp.generators,
e->v.ListComp.elt, NULL);
@@ -5365,6 +5370,7 @@ static int
compiler_setcomp(struct compiler *c, expr_ty e)
{
assert(e->kind == SetComp_kind);
+ _Py_DECLARE_STR(anon_setcomp, "<setcomp>");
return compiler_comprehension(c, e, COMP_SETCOMP, &_Py_STR(anon_setcomp),
e->v.SetComp.generators,
e->v.SetComp.elt, NULL);
@@ -5375,6 +5381,7 @@ static int
compiler_dictcomp(struct compiler *c, expr_ty e)
{
assert(e->kind == DictComp_kind);
+ _Py_DECLARE_STR(anon_dictcomp, "<dictcomp>");
return compiler_comprehension(c, e, COMP_DICTCOMP, &_Py_STR(anon_dictcomp),
e->v.DictComp.generators,
e->v.DictComp.key, e->v.DictComp.value);
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index b34a22391822b..38ca952838a1f 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -515,6 +515,7 @@ parse_syntax_error(PyObject *err, PyObject **message, PyObject **filename,
goto finally;
if (v == Py_None) {
Py_DECREF(v);
+ _Py_DECLARE_STR(anon_string, "<string>");
*filename = &_Py_STR(anon_string);
Py_INCREF(*filename);
}
@@ -1562,6 +1563,7 @@ PyRun_StringFlags(const char *str, int start, PyObject *globals,
if (arena == NULL)
return NULL;
+ _Py_DECLARE_STR(anon_string, "<string>");
mod = _PyParser_ASTFromString(
str, &_Py_STR(anon_string), start, flags, arena);
diff --git a/Tools/scripts/generate_global_objects.py b/Tools/scripts/generate_global_objects.py
index 73068894d974b..e989f3c086f34 100644
--- a/Tools/scripts/generate_global_objects.py
+++ b/Tools/scripts/generate_global_objects.py
@@ -13,298 +13,112 @@
STRING_LITERALS = {
'empty': '',
'dot': '.',
- 'comma_sep': ', ',
- 'percent': '%',
- 'dbl_percent': '%%',
-
- '"anonymous" labels': None,
- 'anon_dictcomp': '<dictcomp>',
- 'anon_genexpr': '<genexpr>',
- 'anon_lambda': '<lambda>',
- 'anon_listcomp': '<listcomp>',
- 'anon_module': '<module>',
- 'anon_setcomp': '<setcomp>',
- 'anon_string': '<string>',
- 'dot_locals': '.<locals>',
+}
+IGNORED = {
+ 'ACTION', # Python/_warnings.c
+ 'ATTR', # Python/_warnings.c and Objects/funcobject.c
+ 'DUNDER', # Objects/typeobject.c
+ 'RDUNDER', # Objects/typeobject.c
+ 'SPECIAL', # Objects/weakrefobject.c
}
IDENTIFIERS = [
- 'Py_Repr',
- 'TextIOWrapper',
+ # from ADD() Python/_warnings.c
+ 'default',
+ 'ignore',
+
+ # from GET_WARNINGS_ATTR() in Python/_warnings.c
'WarningMessage',
- '_',
- '__IOBase_closed',
- '__abc_tpflags__',
+ '_showwarnmsg',
+ '_warn_unawaited_coroutine',
+ 'defaultaction',
+ 'filters',
+ 'onceregistry',
+
+ # from WRAP_METHOD() in Objects/weakrefobject.c
+ '__bytes__',
+ '__reversed__',
+
+ # from COPY_ATTR() in Objects/funcobject.c
+ '__module__',
+ '__name__',
+ '__qualname__',
+ '__doc__',
+ '__annotations__',
+
+ # from SLOT* in Objects/typeobject.c
'__abs__',
- '__abstractmethods__',
'__add__',
- '__aenter__',
- '__aexit__',
- '__aiter__',
- '__all__',
'__and__',
- '__anext__',
- '__annotations__',
- '__args__',
- '__await__',
- '__bases__',
- '__bool__',
- '__build_class__',
- '__builtins__',
- '__bytes__',
- '__call__',
- '__cantrace__',
- '__class__',
- '__class_getitem__',
- '__classcell__',
- '__complex__',
- '__contains__',
- '__copy__',
- '__del__',
- '__delattr__',
- '__delete__',
- '__delitem__',
- '__dict__',
- '__dir__',
'__divmod__',
- '__doc__',
- '__enter__',
- '__eq__',
- '__exit__',
- '__file__',
'__float__',
'__floordiv__',
- '__format__',
- '__fspath__',
- '__ge__',
- '__get__',
- '__getattr__',
- '__getattribute__',
- '__getinitargs__',
'__getitem__',
- '__getnewargs__',
- '__getnewargs_ex__',
- '__getstate__',
- '__gt__',
- '__hash__',
'__iadd__',
'__iand__',
'__ifloordiv__',
'__ilshift__',
'__imatmul__',
'__imod__',
- '__import__',
'__imul__',
- '__index__',
- '__init__',
- '__init_subclass__',
- '__instancecheck__',
'__int__',
'__invert__',
'__ior__',
- '__ipow__',
'__irshift__',
- '__isabstractmethod__',
'__isub__',
- '__iter__',
'__itruediv__',
'__ixor__',
- '__le__',
- '__len__',
- '__length_hint__',
- '__loader__',
'__lshift__',
- '__lt__',
- '__ltrace__',
- '__main__',
'__matmul__',
- '__missing__',
'__mod__',
- '__module__',
- '__mro_entries__',
'__mul__',
- '__name__',
- '__ne__',
'__neg__',
- '__new__',
- '__newobj__',
- '__newobj_ex__',
- '__next__',
- '__note__',
'__or__',
- '__origin__',
- '__package__',
- '__parameters__',
- '__path__',
'__pos__',
'__pow__',
- '__prepare__',
- '__qualname__',
'__radd__',
'__rand__',
'__rdivmod__',
- '__reduce__',
- '__reduce_ex__',
- '__repr__',
- '__reversed__',
'__rfloordiv__',
'__rlshift__',
'__rmatmul__',
'__rmod__',
'__rmul__',
'__ror__',
- '__round__',
'__rpow__',
'__rrshift__',
'__rshift__',
'__rsub__',
'__rtruediv__',
'__rxor__',
- '__set__',
- '__set_name__',
- '__setattr__',
- '__setitem__',
- '__setstate__',
- '__sizeof__',
- '__slotnames__',
- '__slots__',
- '__spec__',
'__str__',
'__sub__',
- '__subclasscheck__',
- '__subclasshook__',
'__truediv__',
- '__trunc__',
- '__warningregistry__',
- '__weakref__',
'__xor__',
- '_abc_impl',
- '_blksize',
- '_dealloc_warn',
- '_finalizing',
- '_find_and_load',
- '_fix_up_module',
- '_get_sourcefile',
- '_handle_fromlist',
- '_initializing',
- '_is_text_encoding',
- '_lock_unlock_module',
- '_showwarnmsg',
- '_shutdown',
- '_slotnames',
- '_strptime_time',
- '_uninitialized_submodules',
- '_warn_unawaited_coroutine',
- '_xoptions',
- 'add',
- 'append',
- 'big',
- 'buffer',
- 'builtins',
- 'clear',
- 'close',
- 'code',
- 'copy',
- 'copyreg',
- 'decode',
- 'default',
- 'defaultaction',
- 'difference_update',
- 'dispatch_table',
- 'displayhook',
- 'enable',
- 'encoding',
- 'end_lineno',
- 'end_offset',
- 'errors',
- 'excepthook',
- 'extend',
- 'filename',
- 'fileno',
- 'fillvalue',
- 'filters',
- 'find_class',
- 'flush',
- 'get',
- 'get_source',
- 'getattr',
- 'ignore',
- 'importlib',
- 'intersection',
- 'isatty',
- 'items',
- 'iter',
- 'keys',
- 'last_traceback',
- 'last_type',
- 'last_value',
- 'latin1',
- 'lineno',
- 'little',
- 'match',
- 'metaclass',
- 'mode',
- 'modules',
- 'mro',
- 'msg',
- 'n_fields',
- 'n_sequence_fields',
- 'n_unnamed_fields',
- 'name',
- 'obj',
- 'offset',
- 'onceregistry',
- 'open',
- 'parent',
- 'partial',
- 'path',
- 'peek',
- 'persistent_id',
- 'persistent_load',
- 'print_file_and_line',
- 'ps1',
- 'ps2',
- 'raw',
- 'read',
- 'read1',
- 'readable',
- 'readall',
- 'readinto',
- 'readinto1',
- 'readline',
- 'reducer_override',
- 'reload',
- 'replace',
- 'reset',
- 'return',
- 'reversed',
- 'seek',
- 'seekable',
- 'send',
- 'setstate',
- 'sort',
- 'stderr',
- 'stdin',
- 'stdout',
- 'strict',
- 'symmetric_difference_update',
- 'tell',
- 'text',
- 'threading',
- 'throw',
- 'unraisablehook',
- 'values',
- 'version',
- 'warnings',
- 'warnoptions',
- 'writable',
- 'write',
- 'zipimporter',
]
#######################################
# helpers
+def iter_global_strings():
+ id_regex = re.compile(r'\b_Py_ID\((\w+)\)')
+ str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)')
+ for dirname, _, files in os.walk(ROOT):
+ if os.path.relpath(dirname, ROOT).startswith('Include'):
+ continue
+ for name in files:
+ if not name.endswith(('.c', '.h')):
+ continue
+ filename = os.path.join(dirname, name)
+ with open(os.path.join(filename), encoding='utf-8') as infile:
+ for lno, line in enumerate(infile, 1):
+ for m in id_regex.finditer(line):
+ identifier, = m.groups()
+ yield identifier, None, filename, lno, line
+ for m in str_regex.finditer(line):
+ varname, string = m.groups()
+ yield varname, string, filename, lno, line
+
def iter_to_marker(lines, marker):
for line in lines:
if line.rstrip() == marker:
@@ -354,7 +168,7 @@ def block(self, prefix, suffix="", *, continuation=None):
END = '/* End auto-generated code */'
-def generate_global_strings():
+def generate_global_strings(identifiers, strings):
filename = os.path.join(INTERNAL, 'pycore_global_strings.h')
# Read the non-generated part of the file.
@@ -371,22 +185,18 @@ def generate_global_strings():
printer.write(START)
with printer.block('struct _Py_global_strings', ';'):
with printer.block('struct', ' literals;'):
- for name, literal in STRING_LITERALS.items():
- if literal is None:
- outfile.write('\n')
- printer.write(f'// {name}')
- else:
- printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
+ for name, literal in sorted(strings.items()):
+ printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
outfile.write('\n')
with printer.block('struct', ' identifiers;'):
- for name in sorted(IDENTIFIERS):
+ for name in sorted(identifiers):
assert name.isidentifier(), name
printer.write(f'STRUCT_FOR_ID({name})')
printer.write(END)
printer.write(after)
-def generate_runtime_init():
+def generate_runtime_init(identifiers, strings):
# First get some info from the declarations.
nsmallposints = None
nsmallnegints = None
@@ -432,13 +242,10 @@ def generate_runtime_init():
# Global strings.
with printer.block('.strings =', ','):
with printer.block('.literals =', ','):
- for name, literal in STRING_LITERALS.items():
- if literal is None:
- printer.write('')
- else:
- printer.write(f'INIT_STR({name}, "{literal}"),')
+ for name, literal in sorted(strings.items()):
+ printer.write(f'INIT_STR({name}, "{literal}"),')
with printer.block('.identifiers =', ','):
- for name in sorted(IDENTIFIERS):
+ for name in sorted(identifiers):
assert name.isidentifier(), name
printer.write(f'INIT_ID({name}),')
printer.write(END)
@@ -507,9 +314,9 @@ def err(msg):
)
''', re.VERBOSE)
-def check_orphan_strings():
+def check_orphan_strings(identifiers):
literals = set(n for n, s in STRING_LITERALS.items() if s)
- identifiers = set(IDENTIFIERS)
+ identifiers = set(identifiers)
files = glob.iglob(os.path.join(ROOT, '**', '*.[ch]'), recursive=True)
for i, filename in enumerate(files, start=1):
print('.', end='')
@@ -586,11 +393,23 @@ def check_orphan_strings():
# the script
def main(*, check=False) -> None:
- generate_global_strings()
- generate_runtime_init()
+ identifiers = set(IDENTIFIERS)
+ strings = dict(STRING_LITERALS)
+ for name, string, filename, lno, _ in iter_global_strings():
+ if string is None:
+ if name not in IGNORED:
+ identifiers.add(name)
+ else:
+ if name not in strings:
+ strings[name] = string
+ elif string != strings[name]:
+ raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}')
+
+ generate_global_strings(identifiers, strings)
+ generate_runtime_init(identifiers, strings)
if check:
- check_orphan_strings()
+ check_orphan_strings(identifiers)
if __name__ == '__main__':
More information about the Python-checkins
mailing list