[Python-Dev] cpython: replace custom validation logic in the parse module with a simple DFA validator

Christian Heimes christian at python.org
Sat Jun 4 20:26:01 EDT 2016


On 2016-06-02 11:32, benjamin.peterson wrote:
> https://hg.python.org/cpython/rev/4a9159ea2536
> changeset:   101601:4a9159ea2536
> user:        Benjamin Peterson <benjamin at python.org>
> date:        Thu Jun 02 11:30:18 2016 -0700
> summary:
>   replace custom validation logic in the parse module with a simple DFA validator (closes #26526)
> 
> Patch from A. Skrobov.
> 
> files:
>   Misc/NEWS              |     3 +
>   Modules/parsermodule.c |  2545 +--------------------------
>   2 files changed, 96 insertions(+), 2452 deletions(-)
> 
> 
> diff --git a/Misc/NEWS b/Misc/NEWS
> --- a/Misc/NEWS
> +++ b/Misc/NEWS
> @@ -22,6 +22,9 @@
>  Library
>  -------
>  
> +- Issue #26526: Replace custom parse tree validation in the parser
> +  module with a simple DFA validator.
> +
>  - Issue #27114: Fix SSLContext._load_windows_store_certs fails with
>    PermissionError
>  
> diff --git a/Modules/parsermodule.c b/Modules/parsermodule.c
> --- a/Modules/parsermodule.c
> +++ b/Modules/parsermodule.c
> @@ -670,9 +670,75 @@
>  
>  
>  static node* build_node_tree(PyObject *tuple);
> -static int   validate_expr_tree(node *tree);
> -static int   validate_file_input(node *tree);
> -static int   validate_encoding_decl(node *tree);
> +
> +static int
> +validate_node(node *tree)
> +{
> +    int type = TYPE(tree);
> +    int nch = NCH(tree);
> +    dfa *nt_dfa;
> +    state *dfa_state;
> +    int pos, arc;
> +
> +    assert(ISNONTERMINAL(type));
> +    type -= NT_OFFSET;
> +    if (type >= _PyParser_Grammar.g_ndfas) {
> +        PyErr_Format(parser_error, "Unrecognized node type %d.", TYPE(tree));
> +        return 0;
> +    }
> +    nt_dfa = &_PyParser_Grammar.g_dfa[type];
> +    REQ(tree, nt_dfa->d_type);
> +
> +    /* Run the DFA for this nonterminal. */
> +    dfa_state = &nt_dfa->d_state[nt_dfa->d_initial];
> +    for (pos = 0; pos < nch; ++pos) {
> +        node *ch = CHILD(tree, pos);
> +        int ch_type = TYPE(ch);
> +        for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
> +            short a_label = dfa_state->s_arc[arc].a_lbl;
> +            assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels);
> +            if (_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type) {
> +     	        /* The child is acceptable; if non-terminal, validate it recursively. */
> +                if (ISNONTERMINAL(ch_type) && !validate_node(ch))
> +                    return 0;
> +
> +                /* Update the state, and move on to the next child. */
> +                dfa_state = &nt_dfa->d_state[dfa_state->s_arc[arc].a_arrow];
> +                goto arc_found;
> +            }
> +        }
> +        /* What would this state have accepted? */
> +        {
> +            short a_label = dfa_state->s_arc->a_lbl;
> +            int next_type;
> +            if (!a_label) /* Wouldn't accept any more children */
> +                goto illegal_num_children;
> +
> +            next_type = _PyParser_Grammar.g_ll.ll_label[a_label].lb_type;
> +            if (ISNONTERMINAL(next_type))
> +                PyErr_Format(parser_error, "Expected node type %d, got %d.",
> +                             next_type, ch_type);
> +            else
> +                PyErr_Format(parser_error, "Illegal terminal: expected %s.",
> +                             _PyParser_TokenNames[next_type]);

Coverity doesn't that line:

CID 1362505 (#1 of 1): Out-of-bounds read (OVERRUN)
20. overrun-local: Overrunning array _PyParser_TokenNames of 58 8-byte
elements at element index 255 (byte offset 2040) using index next_type
(which evaluates to 255).

Can you add a check to verify, that next_type is not out-of-bounds, e.g.

+            else if (next_type > N_TOKENS)
+                PyErr_Format(parser_error, "Illegal node type %d",
next_type);



> +            return 0;
> +        }
> +
> +arc_found:
> +        continue;
> +    }
> +    /* Are we in a final state? If so, return 1 for successful validation. */
> +    for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
> +        if (!dfa_state->s_arc[arc].a_lbl) {
> +            return 1;
> +        }
> +    }
> +
> +illegal_num_children:
> +    PyErr_Format(parser_error,
> +                 "Illegal number of children for %s node.", nt_dfa->d_name);
> +    return 0;
> +}





More information about the Python-Dev mailing list