[Patches] Support for EXTENDED_ARG, fixes for bugs open/32 and open/134
Charles G Waldman
cgw@fnal.gov
Mon, 22 May 2000 17:47:29 -0500 (CDT)
Description:
This patch introduces a new opcode, EXTENDED_ARG == 143, which allows
for opcode arguments to have values outside the 16-bit range. The
existing Python bytecode format allows for an opcode to be followed by
two bytes of argument value. With this patch, a subsequent opcode of
EXTENDED_ARG allows for two more bytes of argument. (The EXTENDED_ARG
opcode could be repeated to allow more than 4 bytes of oparg, but the
utility of this is questionable).
Benefits:
(A) Source files longer than 32K lines no longer kill the interpreter
(bug open/32). Although good coding practice would be to not generate
such big files, it is possible for machine-generated files to exceed
this size. Furthermore arbitrary limits are not very nice, and the
current CVS Python dumps core on files right near the 32767-line
boundary.
(B) Source files containing explicit arrays with more than 32K
elements are no longer a problem - i.e.
python -c 'eval( "["+"1,"*16384+"1]" )'
no longer dumps core (bug open/134)
In fact even python -c 'eval( "["+"1,"*163840+"1]" ) is not a problem.
Note also that although this introduces a new opcode, the .pyc files
are still basically compatible; an old Python version will choke on
the new EXTENDED_ARG bytecode, but this will only appear in modules
which would have been a problem for that version anyway.
CVS commit notice:
This patch introduces a new opcode, EXTENDED_ARG == 143, which allows
for opcode arguments to have values outside the 16-bit range. This
fixes bugs open/32 and open/134.
Disclaimer:
I confirm that, to the best of my knowledge and belief, this
contribution is free of any claims of third parties under
copyright, patent or other rights or interests ("claims"). To
the extent that I have any such claims, I hereby grant to CNRI a
nonexclusive, irrevocable, royalty-free, worldwide license to
reproduce, distribute, perform and/or display publicly, prepare
derivative versions, and otherwise use this contribution as part
of the Python software and its related documentation, or any
derivative versions thereof, at no cost to CNRI or its licensed
users, and to authorize others to do so.
I acknowledge that CNRI may, at its sole discretion, decide
whether or not to incorporate this contribution in the Python
software and its related documentation. I further grant CNRI
permission to use my name and other identifying information
provided to CNRI by me for use in connection with the Python
software and its related documentation.
Patch:
Index: Include/node.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Include/node.h,v
retrieving revision 2.12
diff -c -r2.12 node.h
*** Include/node.h 1998/12/04 18:48:11 2.12
--- Include/node.h 2000/05/22 22:45:09
***************
*** 40,47 ****
typedef struct _node {
short n_type;
char *n_str;
! short n_lineno;
! short n_nchildren;
struct _node *n_child;
} node;
--- 40,47 ----
typedef struct _node {
short n_type;
char *n_str;
! unsigned int n_lineno;
! unsigned int n_nchildren;
struct _node *n_child;
} node;
Index: Include/opcode.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Include/opcode.h,v
retrieving revision 2.23
diff -c -r2.23 opcode.h
*** Include/opcode.h 2000/03/29 00:10:03 2.23
--- Include/opcode.h 2000/05/22 22:45:09
***************
*** 149,154 ****
--- 149,157 ----
#define CALL_FUNCTION_KW 141 /* #args + (#kwargs<<8) */
#define CALL_FUNCTION_VAR_KW 142 /* #args + (#kwargs<<8) */
+ /* Support for opargs more than 16 bits long */
+ #define EXTENDED_ARG 143
+
/* Comparison operator codes (argument to COMPARE_OP) */
enum cmp_op {LT, LE, EQ, NE, GT, GE, IN, NOT_IN, IS, IS_NOT, EXC_MATCH, BAD};
Index: Lib/dis.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/dis.py,v
retrieving revision 1.21
diff -c -r1.21 dis.py
*** Lib/dis.py 2000/03/30 15:02:11 1.21
--- Lib/dis.py 2000/05/22 22:45:10
***************
*** 70,75 ****
--- 70,80 ----
if op >= HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256
i = i+2
+ if ord(code[i]) == EXTENDED_ARG:
+ i = i+1
+ extension = ord(code[i]) + ord(code[i+1])*256
+ oparg = oparg + 65536*extension
+ i = i+2
print string.rjust(`oparg`, 5),
if op in hasconst:
print '(' + `co.co_consts[oparg]` + ')',
***************
*** 256,261 ****
--- 261,268 ----
def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
+ def_op('EXTENDED_ARG', 143)
+ EXTENDED_ARG = 143
def _test():
"""Simple test program to disassemble a file."""
Index: Python/ceval.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Python/ceval.c,v
retrieving revision 2.179
diff -c -r2.179 ceval.c
*** Python/ceval.c 2000/05/08 14:06:50 2.179
--- Python/ceval.c 2000/05/22 22:45:12
***************
*** 382,387 ****
--- 382,389 ----
#define INSTR_OFFSET() (next_instr - first_instr)
#define NEXTOP() (*next_instr++)
#define NEXTARG() (next_instr += 2, (next_instr[-1]<<8) + next_instr[-2])
+ #define IS_EXTENDED() (*next_instr == EXTENDED_ARG)
+ #define EXTEND_ARG(lo, hi) (lo | ( hi<<16))
#define JUMPTO(x) (next_instr = first_instr + (x))
#define JUMPBY(x) (next_instr += (x))
***************
*** 643,652 ****
#if defined(Py_DEBUG) || defined(LLTRACE)
f->f_lasti = INSTR_OFFSET();
#endif
-
opcode = NEXTOP();
! if (HAS_ARG(opcode))
oparg = NEXTARG();
#ifdef DYNAMIC_EXECUTION_PROFILE
#ifdef DXPAIRS
dxpairs[lastopcode][opcode]++;
--- 645,660 ----
#if defined(Py_DEBUG) || defined(LLTRACE)
f->f_lasti = INSTR_OFFSET();
#endif
opcode = NEXTOP();
! if (HAS_ARG(opcode)) {
oparg = NEXTARG();
+ if (IS_EXTENDED()) {
+ int extension;
+ NEXTOP(); /* skip over EXTENDED_ARG bytecode*/
+ extension = NEXTARG();
+ oparg = EXTEND_ARG(oparg, extension);
+ }
+ }
#ifdef DYNAMIC_EXECUTION_PROFILE
#ifdef DXPAIRS
dxpairs[lastopcode][opcode]++;
***************
*** 660,667 ****
if (lltrace) {
if (HAS_ARG(opcode)) {
printf("%d: %d, %d\n",
! (int) (INSTR_OFFSET() - 3),
opcode, oparg);
}
else {
--- 668,678 ----
if (lltrace) {
if (HAS_ARG(opcode)) {
+ int backup = 3;
+ if (first_instr[INSTR_OFFSET() - backup] == EXTENDED_ARG)
+ backup = 6;
printf("%d: %d, %d\n",
! (int) (INSTR_OFFSET() - backup),
opcode, oparg);
}
else {
***************
*** 1821,1828 ****
if (why == WHY_EXCEPTION) {
f->f_lasti = INSTR_OFFSET() - 1;
! if (HAS_ARG(opcode))
f->f_lasti -= 2;
PyTraceBack_Here(f);
if (f->f_trace)
--- 1832,1846 ----
if (why == WHY_EXCEPTION) {
f->f_lasti = INSTR_OFFSET() - 1;
! if (HAS_ARG(opcode)) {
! unsigned char *code;
! _PyCode_GETCODEPTR(f->f_code, &code);
f->f_lasti -= 2;
+ if (code[f->f_lasti] == EXTENDED_ARG) {
+ f->f_lasti -= 3;
+ }
+ }
+
PyTraceBack_Here(f);
if (f->f_trace)
Index: Python/compile.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v
retrieving revision 2.108
diff -c -r2.108 compile.c
*** Python/compile.c 2000/05/03 23:44:38 2.108
--- Python/compile.c 2000/05/22 22:45:12
***************
*** 554,560 ****
int x;
{
com_addbyte(c, x & 0xff);
! com_addbyte(c, x >> 8); /* XXX x should be positive */
}
static void
--- 554,568 ----
int x;
{
com_addbyte(c, x & 0xff);
! x >>= 8;
! com_addbyte(c, x & 0xff);
! x >>= 8;
! if (x) {
! com_addbyte(c, EXTENDED_ARG);
! com_addbyte(c, x & 0xff);
! x >>= 8;
! com_addbyte(c, x & 0xff);
! }
}
static void
***************
*** 653,659 ****
prev = code[anchor] + (code[anchor+1] << 8);
dist = target - (anchor+2);
code[anchor] = dist & 0xff;
! code[anchor+1] = dist >> 8;
if (!prev)
break;
anchor -= prev;
--- 661,674 ----
prev = code[anchor] + (code[anchor+1] << 8);
dist = target - (anchor+2);
code[anchor] = dist & 0xff;
! dist >>= 8;
! code[anchor+1] = dist;
! dist >>= 8;
! if (dist) {
! com_error(c, PyExc_SystemError,
! "com_backpatch: offset too large");
! break;
! }
if (!prev)
break;
anchor -= prev;