On 2016-06-02 11:32, benjamin.peterson wrote:
https://hg.python.org/cpython/rev/4a9159ea2536 changeset: 101601:4a9159ea2536 user: Benjamin Peterson
date: Thu Jun 02 11:30:18 2016 -0700 summary: replace custom validation logic in the parse module with a simple DFA validator (closes #26526) Patch from A. Skrobov.
files: Misc/NEWS | 3 + Modules/parsermodule.c | 2545 +-------------------------- 2 files changed, 96 insertions(+), 2452 deletions(-)
diff --git a/Misc/NEWS b/Misc/NEWS --- a/Misc/NEWS +++ b/Misc/NEWS @@ -22,6 +22,9 @@ Library -------
+- Issue #26526: Replace custom parse tree validation in the parser + module with a simple DFA validator. + - Issue #27114: Fix SSLContext._load_windows_store_certs fails with PermissionError
diff --git a/Modules/parsermodule.c b/Modules/parsermodule.c --- a/Modules/parsermodule.c +++ b/Modules/parsermodule.c @@ -670,9 +670,75 @@
static node* build_node_tree(PyObject *tuple); -static int validate_expr_tree(node *tree); -static int validate_file_input(node *tree); -static int validate_encoding_decl(node *tree); + +static int +validate_node(node *tree) +{ + int type = TYPE(tree); + int nch = NCH(tree); + dfa *nt_dfa; + state *dfa_state; + int pos, arc; + + assert(ISNONTERMINAL(type)); + type -= NT_OFFSET; + if (type >= _PyParser_Grammar.g_ndfas) { + PyErr_Format(parser_error, "Unrecognized node type %d.", TYPE(tree)); + return 0; + } + nt_dfa = &_PyParser_Grammar.g_dfa[type]; + REQ(tree, nt_dfa->d_type); + + /* Run the DFA for this nonterminal. */ + dfa_state = &nt_dfa->d_state[nt_dfa->d_initial]; + for (pos = 0; pos < nch; ++pos) { + node *ch = CHILD(tree, pos); + int ch_type = TYPE(ch); + for (arc = 0; arc < dfa_state->s_narcs; ++arc) { + short a_label = dfa_state->s_arc[arc].a_lbl; + assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels); + if (_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type) { + /* The child is acceptable; if non-terminal, validate it recursively. */ + if (ISNONTERMINAL(ch_type) && !validate_node(ch)) + return 0; + + /* Update the state, and move on to the next child. */ + dfa_state = &nt_dfa->d_state[dfa_state->s_arc[arc].a_arrow]; + goto arc_found; + } + } + /* What would this state have accepted? */ + { + short a_label = dfa_state->s_arc->a_lbl; + int next_type; + if (!a_label) /* Wouldn't accept any more children */ + goto illegal_num_children; + + next_type = _PyParser_Grammar.g_ll.ll_label[a_label].lb_type; + if (ISNONTERMINAL(next_type)) + PyErr_Format(parser_error, "Expected node type %d, got %d.", + next_type, ch_type); + else + PyErr_Format(parser_error, "Illegal terminal: expected %s.", + _PyParser_TokenNames[next_type]);
Coverity doesn't that line: CID 1362505 (#1 of 1): Out-of-bounds read (OVERRUN) 20. overrun-local: Overrunning array _PyParser_TokenNames of 58 8-byte elements at element index 255 (byte offset 2040) using index next_type (which evaluates to 255). Can you add a check to verify, that next_type is not out-of-bounds, e.g. + else if (next_type > N_TOKENS) + PyErr_Format(parser_error, "Illegal node type %d", next_type);
+ return 0; + } + +arc_found: + continue; + } + /* Are we in a final state? If so, return 1 for successful validation. */ + for (arc = 0; arc < dfa_state->s_narcs; ++arc) { + if (!dfa_state->s_arc[arc].a_lbl) { + return 1; + } + } + +illegal_num_children: + PyErr_Format(parser_error, + "Illegal number of children for %s node.", nt_dfa->d_name); + return 0; +}