[Python-Dev] Catching "return" and "return expr" at compile time

Skip Montanaro skip@mojam.com (Skip Montanaro)
Sat, 4 Sep 1999 16:26:37 -0500


--Multipart_Sat_Sep__4_16:26:37_1999-1
Content-Type: text/plain; charset=US-ASCII


Attached is a context diff against the latest version of Python/compile.c
that checks at compile time for functions that both return expressions or
execute return statements with no expression (or equivalently, fall off the
end of the function).  I figured I'd post it here to get a little friendly
feedback and bug discovery before shooting it off to c.l.py.  I modified
compile.c instead of some preexisting PyLint script because I don't know
what's popular out there.  On the other hand, I'm sure most people who would
be interested in this sort of thing have access to the C source...

The basic idea is that each straight line chunk of code is terminated one of 
four ways:

    1. return with no expression
    2. return an expression
    3. raise an exception
    4. fall off the end of the chunk

Falling off the end of the function is obviously treated like return with no
expression.  (This is, after all, what motivated me to do this. ;-)

This information is recorded in a new bit vector added to the struct
compiling object that's carried around during the compilation.  Compound
statements simply aggregate the bit vectors for their various clauses in
ways appropriate to their semantics.

At the end of a function's compilation, the set of return bits computed up
to that point tells you whether or not to spit out a warning.  Note that it
does nothing to recognize constant expressions.  The following function will
generate a warning:

    def f():
        i = 0
        while 1:
            i = i + 1
            if i > 10: return i

even though the only way to return from the function is the return
statement.  To get the above to shut up the compiler you'd have to do
something like

    class CantGetHere: pass

    def f():
        i = 0
        while 1:
            i = i + 1
            if i > 10: return i
    raise CantGetHere

Raise statements are treated as a valid way to "return" from a function.
Registering them as separate styles of returns serves effectively to turn
off the "no return" bit for a block of code.  Raise is compatible with
either form of return, though they aren't compatible with each other.

The code is run whenever a module is compiled.  I didn't bother to add a new
flag to the python interpreter to enable/disable warnings during
compilation, though a -w switch for Python has been mentioned before.

I ran the modified byte code compiler over a silly test module as well as
executing 

    ./python Lib/test/regrtest.py
    ./python Lib/compileall.py

It uncovered some interesting programming practices and one item I think is
an actual bug.  In Lib/gzip.py, GzipFile._read returns EOFError at one point
instead of raising it.  At other points in the method it raises EOFError.
There are no other return statements in the function.  (I haven't taken the
time/had the nerve to run it against my own Python code yet. ;-)

I'm firmly of the opinion that more subtle bugs exist in the way people
write functions that return and raise values than in the code that calls
those functions, contrary to a few vocal folks on c.l.py who may believe
otherwise.

Enjoy,

Skip Montanaro | http://www.mojam.com/
skip@mojam.com | http://www.musi-cal.com/~skip/
847-971-7098   | Python: Programming the way Guido indented...


--Multipart_Sat_Sep__4_16:26:37_1999-1
Content-Type: application/octet-stream
Content-Disposition: attachment; filename="compile.diffs"
Content-Transfer-Encoding: quoted-printable

Index: compile.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /projects/cvsroot/python/dist/src/Python/compile.c,v
retrieving revision 2.97
diff -c -r2.97 compile.c
*** compile.c	1999/01/28 15:08:09	2.97
--- compile.c	1999/09/04 20:48:05
***************
*** 320,325 ****
--- 320,330 ----
  	int c_stacklevel;	/* Current stack level */
  	int c_maxstacklevel;	/* Maximum stack level */
  	int c_firstlineno;
+ #define R_NORET 1		/* no return */
+ #define R_EXPR 2		/* return expression */
+ #define R_NONE 4		/* return */
+ #define R_RAISE 8		/* raise */
+ 	int c_rettype;		/* how a function is exited */
  	PyObject *c_lnotab;	/* Table mapping address to line number */
  	int c_last_addr, c_last_line, c_lnotab_next;
  #ifdef PRIVATE_NAME_MANGLING
***************
*** 2053,2061 ****
  	if (NCH(n) < 2) {
  		com_addoparg(c, LOAD_CONST, com_addconst(c, Py_None));
  		com_push(c, 1);
  	}
! 	else
  		com_node(c, CHILD(n, 1));
  	com_addbyte(c, RETURN_VALUE);
  	com_pop(c, 1);
  }
--- 2058,2069 ----
  	if (NCH(n) < 2) {
  		com_addoparg(c, LOAD_CONST, com_addconst(c, Py_None));
  		com_push(c, 1);
+ 		c->c_rettype =3D R_NONE;
  	}
! 	else {
  		com_node(c, CHILD(n, 1));
+ 		c->c_rettype =3D R_EXPR;
+ 	}
  	com_addbyte(c, RETURN_VALUE);
  	com_pop(c, 1);
  }
***************
*** 2078,2083 ****
--- 2086,2092 ----
  	i =3D NCH(n)/2;
  	com_addoparg(c, RAISE_VARARGS, i);
  	com_pop(c, i);
+ 	c->c_rettype =3D R_RAISE;
  }
  =

  static void
***************
*** 2299,2310 ****
--- 2308,2342 ----
  }
  =

  static void
+ com_set_rettype(c, in_rettype, stmt_rettype)
+      struct compiling *c;
+      int in_rettype;
+      int stmt_rettype;
+ {
+ 	/* merge the accumulated return type coming into the statement
+ 	   with the return type generated by the statement */
+ 	if (!(in_rettype & R_NORET))
+ 		/* all branches before this statement led to a return
+ 		   of some type, so the return possibilities of this
+ 		   statement are irrelevant */
+ 		c->c_rettype =3D in_rettype;
+ 	else
+ 		/* there was at least one path leading to this statement
+ 		   that did not return - merge the return possibilities
+ 		   coming in with the return possibilities from this
+ 		   statement, which might include R_NORET */
+ 		c->c_rettype =3D (in_rettype & ~R_NORET) | stmt_rettype;
+ }
+ =

+ static void
  com_if_stmt(c, n)
  	struct compiling *c;
  	node *n;
  {
  	int i;
  	int anchor =3D 0;
+ 	int in_rettype =3D c->c_rettype;
+ 	int if_rettype =3D 0;
  	REQ(n, if_stmt);
  	/*'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] */
  	for (i =3D 0; i+3 < NCH(n); i+=3D4) {
***************
*** 2318,2331 ****
  		com_addfwref(c, JUMP_IF_FALSE, &a);
  		com_addbyte(c, POP_TOP);
  		com_pop(c, 1);
  		com_node(c, CHILD(n, i+3));
  		com_addfwref(c, JUMP_FORWARD, &anchor);
  		com_backpatch(c, a);
  		/* We jump here with an extra entry which we now pop */
  		com_addbyte(c, POP_TOP);
  	}
! 	if (i+2 < NCH(n))
  		com_node(c, CHILD(n, i+2));
  	if (anchor)
  		com_backpatch(c, anchor);
  }
--- 2350,2373 ----
  		com_addfwref(c, JUMP_IF_FALSE, &a);
  		com_addbyte(c, POP_TOP);
  		com_pop(c, 1);
+ 		c->c_rettype =3D R_NORET;
  		com_node(c, CHILD(n, i+3));
+ 		if_rettype |=3D c->c_rettype;
  		com_addfwref(c, JUMP_FORWARD, &anchor);
  		com_backpatch(c, a);
  		/* We jump here with an extra entry which we now pop */
  		com_addbyte(c, POP_TOP);
  	}
! 	if (i+2 < NCH(n)) {
! 		c->c_rettype =3D R_NORET;
  		com_node(c, CHILD(n, i+2));
+ 		if_rettype |=3D c->c_rettype;
+ 	}
+ 	else
+ 		/* no else clause - have to consider that we might
+ 		   get here without returning */
+ 		if_rettype |=3D R_NORET;
+ 	com_set_rettype(c, in_rettype, if_rettype);
  	if (anchor)
  		com_backpatch(c, anchor);
  }
***************
*** 2338,2343 ****
--- 2380,2387 ----
  	int break_anchor =3D 0;
  	int anchor =3D 0;
  	int save_begin =3D c->c_begin;
+ 	int in_rettype =3D c->c_rettype;
+ 	int while_rettype =3D 0;
  	REQ(n, while_stmt); /* 'while' test ':' suite ['else' ':' suite] */
  	com_addfwref(c, SETUP_LOOP, &break_anchor);
  	block_push(c, SETUP_LOOP);
***************
*** 2348,2354 ****
--- 2392,2400 ----
  	com_addbyte(c, POP_TOP);
  	com_pop(c, 1);
  	c->c_loops++;
+ 	c->c_rettype =3D R_NORET;
  	com_node(c, CHILD(n, 3));
+ 	while_rettype =3D c->c_rettype;
  	c->c_loops--;
  	com_addoparg(c, JUMP_ABSOLUTE, c->c_begin);
  	c->c_begin =3D save_begin;
***************
*** 2357,2364 ****
  	com_addbyte(c, POP_TOP);
  	com_addbyte(c, POP_BLOCK);
  	block_pop(c, SETUP_LOOP);
! 	if (NCH(n) > 4)
  		com_node(c, CHILD(n, 6));
  	com_backpatch(c, break_anchor);
  }
  =

--- 2403,2418 ----
  	com_addbyte(c, POP_TOP);
  	com_addbyte(c, POP_BLOCK);
  	block_pop(c, SETUP_LOOP);
! 	if (NCH(n) > 4) {
! 		c->c_rettype =3D R_NORET;
  		com_node(c, CHILD(n, 6));
+ 		while_rettype |=3D c->c_rettype;
+ 	}
+ 	else
+ 		/* no else clause - have to consider that we might
+ 		   never execute the loop body */
+ 		while_rettype |=3D R_NORET;
+ 	com_set_rettype(c, in_rettype, while_rettype);
  	com_backpatch(c, break_anchor);
  }
  =

***************
*** 2371,2376 ****
--- 2425,2432 ----
  	int break_anchor =3D 0;
  	int anchor =3D 0;
  	int save_begin =3D c->c_begin;
+ 	int in_rettype =3D c->c_rettype;
+ 	int for_rettype =3D 0;
  	REQ(n, for_stmt);
  	/* 'for' exprlist 'in' exprlist ':' suite ['else' ':' suite] */
  	com_addfwref(c, SETUP_LOOP, &break_anchor);
***************
*** 2388,2394 ****
--- 2444,2452 ----
  	com_push(c, 1);
  	com_assign(c, CHILD(n, 1), OP_ASSIGN);
  	c->c_loops++;
+ 	c->c_rettype =3D R_NORET;
  	com_node(c, CHILD(n, 5));
+ 	for_rettype =3D c->c_rettype;
  	c->c_loops--;
  	com_addoparg(c, JUMP_ABSOLUTE, c->c_begin);
  	c->c_begin =3D save_begin;
***************
*** 2396,2403 ****
  	com_pop(c, 2); /* FOR_LOOP has popped these */
  	com_addbyte(c, POP_BLOCK);
  	block_pop(c, SETUP_LOOP);
! 	if (NCH(n) > 8)
  		com_node(c, CHILD(n, 8));
  	com_backpatch(c, break_anchor);
  }
  =

--- 2454,2469 ----
  	com_pop(c, 2); /* FOR_LOOP has popped these */
  	com_addbyte(c, POP_BLOCK);
  	block_pop(c, SETUP_LOOP);
! 	if (NCH(n) > 8) {
! 		c->c_rettype =3D R_NORET;
  		com_node(c, CHILD(n, 8));
+ 		for_rettype |=3D c->c_rettype;
+ 	}
+ 	else
+ 		/* no else clause - have to consider that we might
+ 		   never execute the loop body */
+ 		for_rettype |=3D R_NORET;
+ 	com_set_rettype(c, in_rettype, for_rettype);
  	com_backpatch(c, break_anchor);
  }
  =

***************
*** 2477,2486 ****
--- 2543,2557 ----
  	int else_anchor =3D 0;
  	int i;
  	node *ch;
+ 	int in_rettype =3D c->c_rettype;
+ 	int try_rettype =3D 0;
+ 	int tryclause_rettype =3D 0;
  =

  	com_addfwref(c, SETUP_EXCEPT, &except_anchor);
  	block_push(c, SETUP_EXCEPT);
+ 	c->c_rettype =3D R_NORET;
  	com_node(c, CHILD(n, 2));
+ 	tryclause_rettype =3D c->c_rettype;
  	com_addbyte(c, POP_BLOCK);
  	block_pop(c, SETUP_EXCEPT);
  	com_addfwref(c, JUMP_FORWARD, &else_anchor);
***************
*** 2517,2523 ****
--- 2588,2596 ----
  		}
  		com_addbyte(c, POP_TOP);
  		com_pop(c, 1);
+ 		c->c_rettype =3D R_NORET;
  		com_node(c, CHILD(n, i+2));
+ 		try_rettype |=3D c->c_rettype;
  		com_addfwref(c, JUMP_FORWARD, &end_anchor);
  		if (except_anchor) {
  			com_backpatch(c, except_anchor);
***************
*** 2533,2540 ****
  	   anything. */
  	com_addbyte(c, END_FINALLY);
  	com_backpatch(c, else_anchor);
! 	if (i < NCH(n))
  		com_node(c, CHILD(n, i+2));
  	com_backpatch(c, end_anchor);
  }
  =

--- 2606,2622 ----
  	   anything. */
  	com_addbyte(c, END_FINALLY);
  	com_backpatch(c, else_anchor);
! 	if (i < NCH(n)) {
! 		/* pick up with the return stuff where the try clause ended */
! 		c->c_rettype =3D tryclause_rettype;
  		com_node(c, CHILD(n, i+2));
+ 		try_rettype |=3D c->c_rettype;
+ 	}
+ 	else
+ 		/* no else clause - just merge try clause return info
+ 		   with return info from all the except clauses */
+ 		try_rettype |=3D tryclause_rettype;
+ 	com_set_rettype(c, in_rettype, try_rettype);
  	com_backpatch(c, end_anchor);
  }
  =

***************
*** 2545,2554 ****
--- 2627,2640 ----
  {
  	int finally_anchor =3D 0;
  	node *ch;
+ 	int in_rettype =3D c->c_rettype;
+ 	int try_rettype =3D 0;
  =

  	com_addfwref(c, SETUP_FINALLY, &finally_anchor);
  	block_push(c, SETUP_FINALLY);
+ 	c->c_rettype =3D R_NORET;
  	com_node(c, CHILD(n, 2));
+ 	try_rettype =3D c->c_rettype;
  	com_addbyte(c, POP_BLOCK);
  	block_pop(c, SETUP_FINALLY);
  	block_push(c, END_FINALLY);
***************
*** 2561,2570 ****
--- 2647,2661 ----
  	com_backpatch(c, finally_anchor);
  	ch =3D CHILD(n, NCH(n)-1);
  	com_addoparg(c, SET_LINENO, ch->n_lineno);
+ 	/* shouldn't start with R_NORET, because it's essentially
+ 	   the tail end of the try clause */
+ 	c->c_rettype =3D 0;
  	com_node(c, ch);
+ 	try_rettype |=3D c->c_rettype;
  	com_addbyte(c, END_FINALLY);
  	block_pop(c, END_FINALLY);
  	com_pop(c, 3); /* Matches the com_push above */
+ 	com_set_rettype(c, in_rettype, try_rettype);
  }
  =

  static void
***************
*** 2886,2891 ****
--- 2977,2984 ----
  				  "'break' outside loop");
  		}
  		com_addbyte(c, BREAK_LOOP);
+ 		/* else clause wouldn't get executed, so R_NORET is possible */
+ 		c->c_rettype =3D R_NORET;
  		break;
  	case continue_stmt:
  		com_continue_stmt(c, n);
***************
*** 3156,3167 ****
--- 3249,3267 ----
  	if (TYPE(ch) =3D=3D varargslist)
  		com_arglist(c, ch);
  	c->c_infunction =3D 1;
+ 	c->c_rettype =3D R_NORET;
  	com_node(c, CHILD(n, 4));
  	c->c_infunction =3D 0;
  	com_addoparg(c, LOAD_CONST, com_addconst(c, Py_None));
  	com_push(c, 1);
  	com_addbyte(c, RETURN_VALUE);
  	com_pop(c, 1);
+ 	com_set_rettype(c, c->c_rettype, R_NONE);
+ 	if ((c->c_rettype & R_NONE) && (c->c_rettype & R_EXPR)) {
+ 		PySys_WriteStderr("warning: File \"%s\", line %d: %s",
+ 				  c->c_filename, c->c_lineno, c->c_name);
+ 		PySys_WriteStderr(" has \"return\" and \"return expr\"\n");
+ 	}
  }
  =

  static void

--Multipart_Sat_Sep__4_16:26:37_1999-1--