[Python-Dev] cpython: replace custom validation logic in the parse module with a simple DFA validator
Christian Heimes
christian at python.org
Sat Jun 4 20:26:01 EDT 2016
On 2016-06-02 11:32, benjamin.peterson wrote:
> https://hg.python.org/cpython/rev/4a9159ea2536
> changeset: 101601:4a9159ea2536
> user: Benjamin Peterson <benjamin at python.org>
> date: Thu Jun 02 11:30:18 2016 -0700
> summary:
> replace custom validation logic in the parse module with a simple DFA validator (closes #26526)
>
> Patch from A. Skrobov.
>
> files:
> Misc/NEWS | 3 +
> Modules/parsermodule.c | 2545 +--------------------------
> 2 files changed, 96 insertions(+), 2452 deletions(-)
>
>
> diff --git a/Misc/NEWS b/Misc/NEWS
> --- a/Misc/NEWS
> +++ b/Misc/NEWS
> @@ -22,6 +22,9 @@
> Library
> -------
>
> +- Issue #26526: Replace custom parse tree validation in the parser
> + module with a simple DFA validator.
> +
> - Issue #27114: Fix SSLContext._load_windows_store_certs fails with
> PermissionError
>
> diff --git a/Modules/parsermodule.c b/Modules/parsermodule.c
> --- a/Modules/parsermodule.c
> +++ b/Modules/parsermodule.c
> @@ -670,9 +670,75 @@
>
>
> static node* build_node_tree(PyObject *tuple);
> -static int validate_expr_tree(node *tree);
> -static int validate_file_input(node *tree);
> -static int validate_encoding_decl(node *tree);
> +
> +static int
> +validate_node(node *tree)
> +{
> + int type = TYPE(tree);
> + int nch = NCH(tree);
> + dfa *nt_dfa;
> + state *dfa_state;
> + int pos, arc;
> +
> + assert(ISNONTERMINAL(type));
> + type -= NT_OFFSET;
> + if (type >= _PyParser_Grammar.g_ndfas) {
> + PyErr_Format(parser_error, "Unrecognized node type %d.", TYPE(tree));
> + return 0;
> + }
> + nt_dfa = &_PyParser_Grammar.g_dfa[type];
> + REQ(tree, nt_dfa->d_type);
> +
> + /* Run the DFA for this nonterminal. */
> + dfa_state = &nt_dfa->d_state[nt_dfa->d_initial];
> + for (pos = 0; pos < nch; ++pos) {
> + node *ch = CHILD(tree, pos);
> + int ch_type = TYPE(ch);
> + for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
> + short a_label = dfa_state->s_arc[arc].a_lbl;
> + assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels);
> + if (_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type) {
> + /* The child is acceptable; if non-terminal, validate it recursively. */
> + if (ISNONTERMINAL(ch_type) && !validate_node(ch))
> + return 0;
> +
> + /* Update the state, and move on to the next child. */
> + dfa_state = &nt_dfa->d_state[dfa_state->s_arc[arc].a_arrow];
> + goto arc_found;
> + }
> + }
> + /* What would this state have accepted? */
> + {
> + short a_label = dfa_state->s_arc->a_lbl;
> + int next_type;
> + if (!a_label) /* Wouldn't accept any more children */
> + goto illegal_num_children;
> +
> + next_type = _PyParser_Grammar.g_ll.ll_label[a_label].lb_type;
> + if (ISNONTERMINAL(next_type))
> + PyErr_Format(parser_error, "Expected node type %d, got %d.",
> + next_type, ch_type);
> + else
> + PyErr_Format(parser_error, "Illegal terminal: expected %s.",
> + _PyParser_TokenNames[next_type]);
Coverity doesn't that line:
CID 1362505 (#1 of 1): Out-of-bounds read (OVERRUN)
20. overrun-local: Overrunning array _PyParser_TokenNames of 58 8-byte
elements at element index 255 (byte offset 2040) using index next_type
(which evaluates to 255).
Can you add a check to verify, that next_type is not out-of-bounds, e.g.
+ else if (next_type > N_TOKENS)
+ PyErr_Format(parser_error, "Illegal node type %d",
next_type);
> + return 0;
> + }
> +
> +arc_found:
> + continue;
> + }
> + /* Are we in a final state? If so, return 1 for successful validation. */
> + for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
> + if (!dfa_state->s_arc[arc].a_lbl) {
> + return 1;
> + }
> + }
> +
> +illegal_num_children:
> + PyErr_Format(parser_error,
> + "Illegal number of children for %s node.", nt_dfa->d_name);
> + return 0;
> +}
More information about the Python-Dev
mailing list