[Patches] Support for EXTENDED_ARG, fixes for bugs open/32 and open/134

Charles G Waldman cgw@fnal.gov
Mon, 22 May 2000 17:47:29 -0500 (CDT)


Description: 

This patch introduces a new opcode, EXTENDED_ARG == 143, which allows
for opcode arguments to have values outside the 16-bit range.  The
existing Python bytecode format allows for an opcode to be followed by
two bytes of argument value.  With this patch, a subsequent opcode of
EXTENDED_ARG allows for two more bytes of argument.  (The EXTENDED_ARG
opcode could be repeated to allow more than 4 bytes of oparg, but the
utility of this is questionable).

Benefits: 

(A) Source files longer than 32K lines no longer kill the interpreter
(bug open/32).  Although good coding practice would be to not generate
such big files, it is possible for machine-generated files to exceed
this size.  Furthermore arbitrary limits are not very nice, and the
current CVS Python dumps core on files right near the 32767-line
boundary.

(B) Source files containing explicit arrays with more than 32K
elements are no longer a problem - i.e. 
 
 python -c 'eval( "["+"1,"*16384+"1]" )'

no longer dumps core (bug open/134)

In fact even python -c 'eval( "["+"1,"*163840+"1]" ) is not a problem.

Note also that although this introduces a new opcode, the .pyc files
are still basically compatible; an old Python version will choke on
the new EXTENDED_ARG bytecode, but this will only appear in modules
which would have been a problem for that version anyway.

CVS commit notice:

This patch introduces a new opcode, EXTENDED_ARG == 143, which allows
for opcode arguments to have values outside the 16-bit range.  This
fixes bugs open/32 and open/134.


Disclaimer:
                   I confirm that, to the best of my knowledge and belief, this
                   contribution is free of any claims of third parties under
                   copyright, patent or other rights or interests ("claims").  To
                   the extent that I have any such claims, I hereby grant to CNRI a
                   nonexclusive, irrevocable, royalty-free, worldwide license to
                   reproduce, distribute, perform and/or display publicly, prepare
                   derivative versions, and otherwise use this contribution as part
                   of the Python software and its related documentation, or any
                   derivative versions thereof, at no cost to CNRI or its licensed
                   users, and to authorize others to do so.

                   I acknowledge that CNRI may, at its sole discretion, decide
                   whether or not to incorporate this contribution in the Python
                   software and its related documentation.  I further grant CNRI
                   permission to use my name and other identifying information
                   provided to CNRI by me for use in connection with the Python
                   software and its related documentation.


Patch:


Index: Include/node.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Include/node.h,v
retrieving revision 2.12
diff -c -r2.12 node.h
*** Include/node.h	1998/12/04 18:48:11	2.12
--- Include/node.h	2000/05/22 22:45:09
***************
*** 40,47 ****
  typedef struct _node {
  	short		n_type;
  	char		*n_str;
! 	short		n_lineno;
! 	short		n_nchildren;
  	struct _node	*n_child;
  } node;
  
--- 40,47 ----
  typedef struct _node {
  	short		n_type;
  	char		*n_str;
! 	unsigned int	n_lineno;
! 	unsigned int	n_nchildren;
  	struct _node	*n_child;
  } node;
  
Index: Include/opcode.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Include/opcode.h,v
retrieving revision 2.23
diff -c -r2.23 opcode.h
*** Include/opcode.h	2000/03/29 00:10:03	2.23
--- Include/opcode.h	2000/05/22 22:45:09
***************
*** 149,154 ****
--- 149,157 ----
  #define CALL_FUNCTION_KW           141	/* #args + (#kwargs<<8) */
  #define CALL_FUNCTION_VAR_KW       142	/* #args + (#kwargs<<8) */
  
+ /* Support for opargs more than 16 bits long */
+ #define EXTENDED_ARG	143
+ 
  /* Comparison operator codes (argument to COMPARE_OP) */
  enum cmp_op {LT, LE, EQ, NE, GT, GE, IN, NOT_IN, IS, IS_NOT, EXC_MATCH, BAD};
  
Index: Lib/dis.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/dis.py,v
retrieving revision 1.21
diff -c -r1.21 dis.py
*** Lib/dis.py	2000/03/30 15:02:11	1.21
--- Lib/dis.py	2000/05/22 22:45:10
***************
*** 70,75 ****
--- 70,80 ----
  		if op >= HAVE_ARGUMENT:
  			oparg = ord(code[i]) + ord(code[i+1])*256
  			i = i+2
+ 			if ord(code[i])  == EXTENDED_ARG:
+ 				i = i+1
+ 				extension = ord(code[i]) + ord(code[i+1])*256
+ 				oparg = oparg + 65536*extension
+ 				i = i+2
  			print string.rjust(`oparg`, 5),
  			if op in hasconst:
  				print '(' + `co.co_consts[oparg]` + ')',
***************
*** 256,261 ****
--- 261,268 ----
  def_op('CALL_FUNCTION_KW', 141)      # #args + (#kwargs << 8)
  def_op('CALL_FUNCTION_VAR_KW', 142)  # #args + (#kwargs << 8)
  
+ def_op('EXTENDED_ARG', 143) 
+ EXTENDED_ARG = 143
  
  def _test():
  	"""Simple test program to disassemble a file."""
Index: Python/ceval.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Python/ceval.c,v
retrieving revision 2.179
diff -c -r2.179 ceval.c
*** Python/ceval.c	2000/05/08 14:06:50	2.179
--- Python/ceval.c	2000/05/22 22:45:12
***************
*** 382,387 ****
--- 382,389 ----
  #define INSTR_OFFSET()	(next_instr - first_instr)
  #define NEXTOP()	(*next_instr++)
  #define NEXTARG()	(next_instr += 2, (next_instr[-1]<<8) + next_instr[-2])
+ #define IS_EXTENDED()	(*next_instr == EXTENDED_ARG)
+ #define EXTEND_ARG(lo, hi)  (lo | ( hi<<16))
  #define JUMPTO(x)	(next_instr = first_instr + (x))
  #define JUMPBY(x)	(next_instr += (x))
  
***************
*** 643,652 ****
  #if defined(Py_DEBUG) || defined(LLTRACE)
  		f->f_lasti = INSTR_OFFSET();
  #endif
- 		
  		opcode = NEXTOP();
! 		if (HAS_ARG(opcode))
  			oparg = NEXTARG();
  #ifdef DYNAMIC_EXECUTION_PROFILE
  #ifdef DXPAIRS
  		dxpairs[lastopcode][opcode]++;
--- 645,660 ----
  #if defined(Py_DEBUG) || defined(LLTRACE)
  		f->f_lasti = INSTR_OFFSET();
  #endif
  		opcode = NEXTOP();
! 		if (HAS_ARG(opcode)) {
  			oparg = NEXTARG();
+ 			if (IS_EXTENDED()) {
+ 				int extension;
+ 				NEXTOP(); /* skip over EXTENDED_ARG bytecode*/
+ 				extension = NEXTARG();
+ 				oparg = EXTEND_ARG(oparg, extension);
+ 			}
+ 		}
  #ifdef DYNAMIC_EXECUTION_PROFILE
  #ifdef DXPAIRS
  		dxpairs[lastopcode][opcode]++;
***************
*** 660,667 ****
  		
  		if (lltrace) {
  			if (HAS_ARG(opcode)) {
  				printf("%d: %d, %d\n",
! 					(int) (INSTR_OFFSET() - 3),
  					opcode, oparg);
  			}
  			else {
--- 668,678 ----
  		
  		if (lltrace) {
  			if (HAS_ARG(opcode)) {
+ 				int backup = 3;
+ 				if (first_instr[INSTR_OFFSET() - backup] == EXTENDED_ARG)
+ 					backup = 6;
  				printf("%d: %d, %d\n",
! 					(int) (INSTR_OFFSET() - backup),
  					opcode, oparg);
  			}
  			else {
***************
*** 1821,1828 ****
  		
  		if (why == WHY_EXCEPTION) {
  			f->f_lasti = INSTR_OFFSET() - 1;
! 			if (HAS_ARG(opcode))
  				f->f_lasti -= 2;
  			PyTraceBack_Here(f);
  
  			if (f->f_trace)
--- 1832,1846 ----
  		
  		if (why == WHY_EXCEPTION) {
  			f->f_lasti = INSTR_OFFSET() - 1;
! 			if (HAS_ARG(opcode)) {
! 				unsigned char *code;
! 				_PyCode_GETCODEPTR(f->f_code, &code);
  				f->f_lasti -= 2;
+ 				if (code[f->f_lasti] == EXTENDED_ARG) {
+ 					f->f_lasti -= 3;
+ 				}
+ 			}
+ 				
  			PyTraceBack_Here(f);
  
  			if (f->f_trace)
Index: Python/compile.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v
retrieving revision 2.108
diff -c -r2.108 compile.c
*** Python/compile.c	2000/05/03 23:44:38	2.108
--- Python/compile.c	2000/05/22 22:45:12
***************
*** 554,560 ****
  	int x;
  {
  	com_addbyte(c, x & 0xff);
! 	com_addbyte(c, x >> 8); /* XXX x should be positive */
  }
  
  static void
--- 554,568 ----
  	int x;
  {
  	com_addbyte(c, x & 0xff);
! 	x >>= 8;
! 	com_addbyte(c, x & 0xff);
! 	x >>= 8;
! 	if (x) {
! 		com_addbyte(c, EXTENDED_ARG);
! 		com_addbyte(c, x & 0xff);
! 		x >>= 8;
! 		com_addbyte(c, x & 0xff);
! 	}
  }
  
  static void
***************
*** 653,659 ****
  		prev = code[anchor] + (code[anchor+1] << 8);
  		dist = target - (anchor+2);
  		code[anchor] = dist & 0xff;
! 		code[anchor+1] = dist >> 8;
  		if (!prev)
  			break;
  		anchor -= prev;
--- 661,674 ----
  		prev = code[anchor] + (code[anchor+1] << 8);
  		dist = target - (anchor+2);
  		code[anchor] = dist & 0xff;
! 		dist >>= 8;
! 		code[anchor+1] = dist;
! 		dist >>= 8;
! 		if (dist) {
! 			com_error(c, PyExc_SystemError,
! 				  "com_backpatch: offset too large");
! 			break;
! 		}
  		if (!prev)
  			break;
  		anchor -= prev;