[Patches] patch for core dump in parser

Jeremy Hylton jeremy@beopen.com
Fri, 16 Jun 2000 18:35:43 -0400 (EDT)


This patch fixes a bug report on the python list today.
http://www.python.org/pipermail/python-list/2000-June/061698.html
Brief summary: eval("2+2+" * 8192 + "2")

The patch mucks with the parser code, which I've never looked at
seriously before.  The basic idea is to catch the error -- an overflow
of the short used to count the number of children and cause a new
parser error to be returned, E_OVERFLOW.  Python turns that errorcode
into the SyntaxError "expression too long."

Note that I've changed the signature of PyNode_AddChild.  It used to
return a node * or NULL.  Callers only checked for NULL, so the change
seems safe.

Jeremy

Index: Include/errcode.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Include/errcode.h,v
retrieving revision 2.8
diff -c -r2.8 errcode.h
*** Include/errcode.h	1998/04/09 21:37:20	2.8
--- Include/errcode.h	2000/06/16 22:19:53
***************
*** 52,57 ****
--- 52,58 ----
  #define E_DONE		16	/* Parsing complete */
  #define E_ERROR		17	/* Execution error */
  #define E_INDENT	18	/* Invalid indentation detected */
+ #define E_OVERFLOW      19      /* Node had too many children */
  
  #ifdef __cplusplus
  }
Index: Include/node.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Include/node.h,v
retrieving revision 2.12
diff -c -r2.12 node.h
*** Include/node.h	1998/12/04 18:48:11	2.12
--- Include/node.h	2000/06/16 22:19:53
***************
*** 46,52 ****
  } node;
  
  extern DL_IMPORT(node *) PyNode_New Py_PROTO((int type));
! extern DL_IMPORT(node *) PyNode_AddChild Py_PROTO((node *n, int type, char *str, int lineno));
  extern DL_IMPORT(void) PyNode_Free Py_PROTO((node *n));
  
  /* Node access functions */
--- 46,52 ----
  } node;
  
  extern DL_IMPORT(node *) PyNode_New Py_PROTO((int type));
! extern DL_IMPORT(int) PyNode_AddChild Py_PROTO((node *n, int type, char *str, int lineno));
  extern DL_IMPORT(void) PyNode_Free Py_PROTO((node *n));
  
  /* Node access functions */
Index: Parser/node.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Parser/node.c,v
retrieving revision 2.7
diff -c -r2.7 node.c
*** Parser/node.c	1997/04/29 21:02:42	2.7
--- Parser/node.c	2000/06/16 22:19:53
***************
*** 33,38 ****
--- 33,39 ----
  
  #include "pgenheaders.h"
  #include "node.h"
+ #include "errcode.h"
  
  node *
  PyNode_New(type)
***************
*** 52,58 ****
  #define XXX 3 /* Node alignment factor to speed up realloc */
  #define XXXROUNDUP(n) ((n) == 1 ? 1 : ((n) + XXX - 1) / XXX * XXX)
  
! node *
  PyNode_AddChild(n1, type, str, lineno)
  	register node *n1;
  	int type;
--- 53,59 ----
  #define XXX 3 /* Node alignment factor to speed up realloc */
  #define XXXROUNDUP(n) ((n) == 1 ? 1 : ((n) + XXX - 1) / XXX * XXX)
  
! int
  PyNode_AddChild(n1, type, str, lineno)
  	register node *n1;
  	int type;
***************
*** 62,73 ****
  	register int nch = n1->n_nchildren;
  	register int nch1 = nch+1;
  	register node *n;
  	if (XXXROUNDUP(nch) < nch1) {
  		n = n1->n_child;
  		nch1 = XXXROUNDUP(nch1);
  		PyMem_RESIZE(n, node, nch1);
  		if (n == NULL)
! 			return NULL;
  		n1->n_child = n;
  	}
  	n = &n1->n_child[n1->n_nchildren++];
--- 63,76 ----
  	register int nch = n1->n_nchildren;
  	register int nch1 = nch+1;
  	register node *n;
+ 	if (nch == 32767)
+ 		return E_OVERFLOW;
  	if (XXXROUNDUP(nch) < nch1) {
  		n = n1->n_child;
  		nch1 = XXXROUNDUP(nch1);
  		PyMem_RESIZE(n, node, nch1);
  		if (n == NULL)
! 			return E_NOMEM;
  		n1->n_child = n;
  	}
  	n = &n1->n_child[n1->n_nchildren++];
***************
*** 76,82 ****
  	n->n_lineno = lineno;
  	n->n_nchildren = 0;
  	n->n_child = NULL;
! 	return n;
  }
  
  /* Forward */
--- 79,85 ----
  	n->n_lineno = lineno;
  	n->n_nchildren = 0;
  	n->n_child = NULL;
! 	return 0;
  }
  
  /* Forward */
Index: Parser/parser.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Parser/parser.c,v
retrieving revision 2.10
diff -c -r2.10 parser.c
*** Parser/parser.c	1997/04/29 21:02:45	2.10
--- Parser/parser.c	2000/06/16 22:19:54
***************
*** 153,163 ****
  	int newstate;
  	int lineno;
  {
  	assert(!s_empty(s));
! 	if (PyNode_AddChild(s->s_top->s_parent, type, str, lineno) == NULL) {
! 		fprintf(stderr, "shift: no mem in addchild\n");
! 		return -1;
! 	}
  	s->s_top->s_state = newstate;
  	return 0;
  }
--- 153,163 ----
  	int newstate;
  	int lineno;
  {
+ 	int err;
  	assert(!s_empty(s));
! 	err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno);
! 	if (err)
! 		return err;
  	s->s_top->s_state = newstate;
  	return 0;
  }
***************
*** 172,184 ****
  	int newstate;
  	int lineno;
  {
  	register node *n;
  	n = s->s_top->s_parent;
  	assert(!s_empty(s));
! 	if (PyNode_AddChild(n, type, (char *)NULL, lineno) == NULL) {
! 		fprintf(stderr, "push: no mem in addchild\n");
! 		return -1;
! 	}
  	s->s_top->s_state = newstate;
  	return s_push(s, d, CHILD(n, NCH(n)-1));
  }
--- 172,184 ----
  	int newstate;
  	int lineno;
  {
+ 	int err;
  	register node *n;
  	n = s->s_top->s_parent;
  	assert(!s_empty(s));
! 	err = PyNode_AddChild(n, type, (char *)NULL, lineno);
! 	if (err)
! 		return err;
  	s->s_top->s_state = newstate;
  	return s_push(s, d, CHILD(n, NCH(n)-1));
  }
***************
*** 233,238 ****
--- 233,239 ----
  	int lineno;
  {
  	register int ilabel;
+ 	int err;
  	
  	D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
  	
***************
*** 260,279 ****
  					int arrow = x & ((1<<7)-1);
  					dfa *d1 = PyGrammar_FindDFA(
  						ps->p_grammar, nt);
! 					if (push(&ps->p_stack, nt, d1,
! 						arrow, lineno) < 0) {
  						D(printf(" MemError: push\n"));
! 						return E_NOMEM;
  					}
  					D(printf(" Push ...\n"));
  					continue;
  				}
  				
  				/* Shift the token */
! 				if (shift(&ps->p_stack, type, str,
! 						x, lineno) < 0) {
  					D(printf(" MemError: shift.\n"));
! 					return E_NOMEM;
  				}
  				D(printf(" Shift.\n"));
  				/* Pop while we are in an accept-only state */
--- 261,280 ----
  					int arrow = x & ((1<<7)-1);
  					dfa *d1 = PyGrammar_FindDFA(
  						ps->p_grammar, nt);
! 					if ((err = push(&ps->p_stack, nt, d1,
! 						arrow, lineno)) > 0) {
  						D(printf(" MemError: push\n"));
! 						return err;
  					}
  					D(printf(" Push ...\n"));
  					continue;
  				}
  				
  				/* Shift the token */
! 				if ((err = shift(&ps->p_stack, type, str,
! 						x, lineno)) > 0) {
  					D(printf(" MemError: shift.\n"));
! 					return err;
  				}
  				D(printf(" Shift.\n"));
  				/* Pop while we are in an accept-only state */
Index: Python/pythonrun.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Python/pythonrun.c,v
retrieving revision 2.97
diff -c -r2.97 pythonrun.c
*** Python/pythonrun.c	2000/05/25 23:09:49	2.97
--- Python/pythonrun.c	2000/06/16 22:19:56
***************
*** 1033,1038 ****
--- 1033,1041 ----
  	case E_INDENT:
  		msg = "inconsistent use of tabs and spaces in indentation";
  		break;
+ 	case E_OVERFLOW:
+ 		msg = "expression too long";
+ 		break;
  	default:
  		fprintf(stderr, "error=%d\n", err->error);
  		msg = "unknown parsing error";