[Python-checkins] python/dist/src/Modules gcmodule.c,2.47,2.48

tim_one@users.sourceforge.net tim_one@users.sourceforge.net
Sun, 30 Jun 2002 20:52:21 -0700


Update of /cvsroot/python/python/dist/src/Modules
In directory usw-pr-cvs1:/tmp/cvs-serv31850/python/Modules

Modified Files:
	gcmodule.c 
Log Message:
OK, I couldn't stand it <0.5 wink>:  removed all uncertainty about what's
in gc_refs, even at the cost of putting back a test+branch in
visit_decref.

The good news:  since gc_refs became utterly tame then, it became
clear that another special value could be useful.  The move_roots() and
move_root_reachable() passes have now been replaced by a single
move_unreachable() pass.  Besides saving a pass over the generation, this
has a better effect:  most of the time everything turns out to be
reachable, so we were breaking the generation list apart and moving it
into into the reachable list, one element at a time.  Now the reachable
stuff stays in the generation list, and the unreachable stuff is moved
instead.  This isn't quite as good as it sounds, since sometimes we
guess wrongly that a thing is unreachable, and have to move it back again.

Still, overall, it yields a significant (but not dramatic) boost in
collection speed.


Index: gcmodule.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/gcmodule.c,v
retrieving revision 2.47
retrieving revision 2.48
diff -C2 -d -r2.47 -r2.48
*** gcmodule.c	30 Jun 2002 21:31:03 -0000	2.47
--- gcmodule.c	1 Jul 2002 03:52:19 -0000	2.48
***************
*** 75,89 ****
  /* When a collection begins, gc_refs is set to ob_refcnt for, and only for,
   * the objects in the generation being collected, called the "young"
!  * generation at that point.  As collection proceeds, when it's determined
!  * that one of these can't be collected (e.g., because it's reachable from
!  * outside, or has a __del__ method), the object is moved out of young, and
!  * gc_refs is set to a negative value.  The latter is so we can distinguish
!  * collection candidates from non-candidates just by looking at the object.
   */
- /* Special gc_refs value, although any negative value means "moved". */
- #define GC_MOVED  -123
  
! /* True iff an object is still a candidate for collection. */
! #define STILL_A_CANDIDATE(o) ((AS_GC(o))->gc.gc_refs >= 0)
  
  /* list of uncollectable objects */
--- 75,92 ----
  /* When a collection begins, gc_refs is set to ob_refcnt for, and only for,
   * the objects in the generation being collected, called the "young"
!  * generation at that point.  As collection proceeds, the gc_refs members
!  * of young objects are set to GC_REACHABLE when it becomes known that they're
!  * uncollectable, and to GC_TENTATIVELY_UNREACHABLE when the evidence
!  * suggests they are collectable (this can't be known for certain until all
!  * of the young generation is scanned).
   */
  
! /* Special gc_refs values. */
! #define GC_REACHABLE  -123
! #define GC_TENTATIVELY_UNREACHABLE -42
! 
! #define IS_REACHABLE(o) ((AS_GC(o))->gc.gc_refs == GC_REACHABLE)
! #define IS_TENTATIVELY_UNREACHABLE(o) ( \
! 	(AS_GC(o))->gc.gc_refs == GC_TENTATIVELY_UNREACHABLE)
  
  /* list of uncollectable objects */
***************
*** 169,176 ****
  
  
! 
! /* Set all gc_refs = ob_refcnt.  After this, STILL_A_CANDIDATE(o) is true
!  * for all objects in containers, and false for all tracked gc objects not
!  * in containers (although see the comment in visit_decref).
   */
  static void
--- 172,178 ----
  
  
! /* Set all gc_refs = ob_refcnt.  After this, gc_refs is > 0 for all objects
!  * in containers, and is GC_REACHABLE for all tracked gc objects not in
!  * containers.
   */
  static void
***************
*** 178,207 ****
  {
  	PyGC_Head *gc = containers->gc.gc_next;
! 	for (; gc != containers; gc=gc->gc.gc_next) {
  		gc->gc.gc_refs = FROM_GC(gc)->ob_refcnt;
- 	}
  }
  
  static int
  visit_decref(PyObject *op, void *data)
  {
-         /* There's no point to decrementing gc_refs unless
-          * STILL_A_CANDIDATE(op) is true.  It would take extra cycles to
-          * check that, though.  If STILL_A_CANDIDATE(op) is false,
-          * decrementing gc_refs almost always makes it "even more negative",
-          * so doesn't change that STILL_A_CANDIDATE is false, and no harm is
-          * done.  However, it's possible that, after many collections, this
-          * could underflow gc_refs in a long-lived old object.  In that case,
-          * visit_move() may move the old object back to the generation
-          * getting collected.  That would be a waste of time, but wouldn't
-          * cause an error.
-          */
          assert(op != NULL);
! 	if (PyObject_IS_GC(op))
! 	        AS_GC(op)->gc.gc_refs--;
  	return 0;
  }
  
! /* Subtract internal references from gc_refs */
  static void
  subtract_refs(PyGC_Head *containers)
--- 180,209 ----
  {
  	PyGC_Head *gc = containers->gc.gc_next;
! 	for (; gc != containers; gc = gc->gc.gc_next)
  		gc->gc.gc_refs = FROM_GC(gc)->ob_refcnt;
  }
  
+ /* A traversal callback for subtract_refs. */
  static int
  visit_decref(PyObject *op, void *data)
  {
          assert(op != NULL);
! 	if (PyObject_IS_GC(op)) {
! 		PyGC_Head *gc = AS_GC(op);
! 		/* We're only interested in gc_refs for objects in the
! 		 * generation being collected, which can be recognized
! 		 * because only they have positive gc_refs.
! 		 */
! 		if (gc->gc.gc_refs > 0)
! 			gc->gc.gc_refs--;
! 	}
  	return 0;
  }
  
! /* Subtract internal references from gc_refs.  After this, gc_refs is >= 0
!  * for all objects in containers, and is GC_REACHABLE for all tracked gc
!  * objects not in containers.  The ones with gc_refs > 0 are directly
!  * reachable from outside containers, and so can't be collected.
!  */
  static void
  subtract_refs(PyGC_Head *containers)
***************
*** 217,266 ****
  }
  
! /* Move objects with gc_refs > 0 to roots list.  They can't be collected. */
! static void
! move_roots(PyGC_Head *containers, PyGC_Head *roots)
! {
! 	PyGC_Head *next;
! 	PyGC_Head *gc = containers->gc.gc_next;
! 	while (gc != containers) {
! 		next = gc->gc.gc_next;
! 		if (gc->gc.gc_refs > 0) {
! 			gc_list_remove(gc);
! 			gc_list_append(gc, roots);
! 			gc->gc.gc_refs = GC_MOVED;
! 		}
! 		gc = next;
! 	}
! }
! 
  static int
! visit_move(PyObject *op, PyGC_Head *tolist)
  {
! 	if (PyObject_IS_GC(op)) {
! 		if (IS_TRACKED(op) && STILL_A_CANDIDATE(op)) {
! 			PyGC_Head *gc = AS_GC(op);
  			gc_list_remove(gc);
! 			gc_list_append(gc, tolist);
! 			gc->gc.gc_refs = GC_MOVED;
  		}
  	}
  	return 0;
  }
  
! /* Move candidates referenced from reachable to reachable set (they're no
!  * longer candidates).
   */
  static void
! move_root_reachable(PyGC_Head *reachable)
  {
! 	traverseproc traverse;
! 	PyGC_Head *gc = reachable->gc.gc_next;
! 	for (; gc != reachable; gc=gc->gc.gc_next) {
! 		/* careful, reachable list is growing here */
! 		PyObject *op = FROM_GC(gc);
! 		traverse = op->ob_type->tp_traverse;
! 		(void) traverse(op,
! 			       (visitproc)visit_move,
! 			       (void *)reachable);
  	}
  }
--- 219,316 ----
  }
  
! /* A traversal callback for move_unreachable. */
  static int
! visit_reachable(PyObject *op, PyGC_Head *reachable)
  {
! 	if (PyObject_IS_GC(op) && IS_TRACKED(op)) {
! 		PyGC_Head *gc = AS_GC(op);
! 		const int gc_refs = gc->gc.gc_refs;
! 
! 		if (gc_refs == 0) {
! 			/* This is in move_unreachable's 'young' list, but
! 			 * the traversal hasn't yet gotten to it.  All
! 			 * we need to do is tell move_unreachable that it's
! 			 * reachable.
! 			 */
! 			gc->gc.gc_refs = 1;
! 		}
! 		else if (gc_refs == GC_TENTATIVELY_UNREACHABLE) {
! 			/* This had gc_refs = 0 when move_unreachable got
! 			 * to it, but turns out it's reachable after all.
! 			 * Move it back to move_unreachable's 'young' list,
! 			 * and move_unreachable will eventually get to it
! 			 * again.
! 			 */
  			gc_list_remove(gc);
! 			gc_list_append(gc, reachable);
! 			gc->gc.gc_refs = 1;
  		}
+ 		/* Else there's nothing to do.
+ 		 * If gc_refs > 0, it must be in move_unreachable's 'young'
+ 		 * list, and move_unreachable will eventually get to it.
+ 		 * If gc_refs == GC_REACHABLE, it's either in some other
+ 		 * generation so we don't care about it, or move_unreachable
+ 		 * already dealt with it.
+ 		 */
  	}
  	return 0;
  }
  
! /* Move the unreachable objects from young to unreachable.  After this,
!  * all objects in young have gc_refs = GC_REACHABLE, and all objects in
!  * unreachable have gc_refs = GC_TENTATIVELY_UNREACHABLE.  All tracked
!  * gc objects not in young or unreachable still have gc_refs = GC_REACHABLE.
!  * All objects in young after this are directly or indirectly reachable
!  * from outside the original young; and all objects in unreachable are
!  * not.
   */
  static void
! move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
  {
! 	PyGC_Head *gc = young->gc.gc_next;
! 
! 	/* Invariants:  all objects "to the left" of us in young have gc_refs
! 	 * = GC_REACHABLE, and are indeed reachable (directly or indirectly)
! 	 * from outside the young list as it was at entry.  All other objects
! 	 * from the original young "to the left" of us are in unreachable now,
! 	 * and have gc_refs = GC_TENTATIVELY_UNREACHABLE.  All objects to the
! 	 * left of us in 'young' now have been scanned, and no objects here
! 	 * or to the right have been scanned yet.
! 	 */
! 
! 	while (gc != young) {
! 		PyGC_Head *next;
! 
! 		if (gc->gc.gc_refs == 0) {
! 			/* This *may* be unreachable.  To make progress,
! 			 * assume it is.  gc isn't directly reachable from
! 			 * any object we've already traversed, but may be
! 			 * reachable from an object we haven't gotten to yet.
! 			 * visit_reachable will eventually move gc back into
! 			 * young if that's so, and we'll see it again.
! 			 */
! 			next = gc->gc.gc_next;
! 			gc_list_remove(gc);
! 			gc_list_append(gc, unreachable);
! 			gc->gc.gc_refs = GC_TENTATIVELY_UNREACHABLE;
! 		}
! 		else {
! 			/* gc is definitely reachable from outside the
! 			 * original 'young'.  Mark it as such, and traverse
! 			 * its pointers to find any other objects that may
! 			 * be directly reachable from it.  Note that the
! 			 * call to tp_traverse may append objects to young,
! 			 * so we have to wait until it returns to determine
! 			 * the next object to visit.
! 			 */
! 			PyObject *op = FROM_GC(gc);
! 			traverseproc traverse = op->ob_type->tp_traverse;
! 			gc->gc.gc_refs = GC_REACHABLE;
! 			(void) traverse(op,
! 			       		(visitproc)visit_reachable,
! 			    		(void *)young);
! 			next = gc->gc.gc_next;
! 		}
! 		gc = next;
  	}
  }
***************
*** 293,302 ****
  			gc_list_remove(gc);
  			gc_list_append(gc, finalizers);
! 			gc->gc.gc_refs = GC_MOVED;
  		}
  	}
  }
  
! /* Move objects referenced from roots to roots */
  static void
  move_finalizer_reachable(PyGC_Head *finalizers)
--- 343,369 ----
  			gc_list_remove(gc);
  			gc_list_append(gc, finalizers);
! 			gc->gc.gc_refs = GC_REACHABLE;
  		}
  	}
  }
  
! /* A traversal callback for move_finalizer_reachable. */
! static int
! visit_move(PyObject *op, PyGC_Head *tolist)
! {
! 	if (PyObject_IS_GC(op)) {
! 		if (IS_TRACKED(op) && IS_TENTATIVELY_UNREACHABLE(op)) {
! 			PyGC_Head *gc = AS_GC(op);
! 			gc_list_remove(gc);
! 			gc_list_append(gc, tolist);
! 			gc->gc.gc_refs = GC_REACHABLE;
! 		}
! 	}
! 	return 0;
! }
! 
! /* Move objects that are reachable from finalizers, from the unreachable set
!  * into the finalizers set.
!  */
  static void
  move_finalizer_reachable(PyGC_Head *finalizers)
***************
*** 354,362 ****
  			 * finalizers to the list of garbage.  All objects in
  			 * the finalizers list are reachable from those
! 			 * objects. */
  			PyList_Append(garbage, op);
  		}
  		/* object is now reachable again */
! 		assert(!STILL_A_CANDIDATE(op));
  		gc_list_remove(gc);
  		gc_list_append(gc, old);
--- 421,430 ----
  			 * finalizers to the list of garbage.  All objects in
  			 * the finalizers list are reachable from those
! 			 * objects.
! 			 */
  			PyList_Append(garbage, op);
  		}
  		/* object is now reachable again */
! 		assert(IS_REACHABLE(op));
  		gc_list_remove(gc);
  		gc_list_append(gc, old);
***************
*** 366,370 ****
  /* Break reference cycles by clearing the containers involved.	This is
   * tricky business as the lists can be changing and we don't know which
!  * objects may be freed.  It is possible I screwed something up here. */
  static void
  delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
--- 434,439 ----
  /* Break reference cycles by clearing the containers involved.	This is
   * tricky business as the lists can be changing and we don't know which
!  * objects may be freed.  It is possible I screwed something up here.
!  */
  static void
  delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
***************
*** 376,380 ****
  		PyObject *op = FROM_GC(gc);
  
! 		assert(STILL_A_CANDIDATE(op));
  		if (debug & DEBUG_SAVEALL) {
  			PyList_Append(garbage, op);
--- 445,449 ----
  		PyObject *op = FROM_GC(gc);
  
! 		assert(IS_TENTATIVELY_UNREACHABLE(op));
  		if (debug & DEBUG_SAVEALL) {
  			PyList_Append(garbage, op);
***************
*** 391,395 ****
  			gc_list_remove(gc);
  			gc_list_append(gc, old);
! 			gc->gc.gc_refs = GC_MOVED;
  		}
  	}
--- 460,464 ----
  			gc_list_remove(gc);
  			gc_list_append(gc, old);
! 			gc->gc.gc_refs = GC_REACHABLE;
  		}
  	}
***************
*** 402,410 ****
  {
  	int i;
! 	long n = 0;
! 	long m = 0;
  	PyGC_Head *young; /* the generation we are examining */
  	PyGC_Head *old; /* next older generation */
- 	PyGC_Head reachable;
  	PyGC_Head unreachable;
  	PyGC_Head finalizers;
--- 471,478 ----
  {
  	int i;
! 	long m = 0;	/* # objects collected */
! 	long n = 0;	/* # unreachable objects that couldn't be collected */
  	PyGC_Head *young; /* the generation we are examining */
  	PyGC_Head *old; /* next older generation */
  	PyGC_Head unreachable;
  	PyGC_Head finalizers;
***************
*** 434,469 ****
  	/* handy references */
  	young = GEN_HEAD(generation);
! 	if (generation < NUM_GENERATIONS-1) {
  		old = GEN_HEAD(generation+1);
! 	} else {
! 		old = GEN_HEAD(NUM_GENERATIONS-1);
! 	}
  
  	/* Using ob_refcnt and gc_refs, calculate which objects in the
  	 * container set are reachable from outside the set (ie. have a
  	 * refcount greater than 0 when all the references within the
! 	 * set are taken into account */
  	update_refs(young);
  	subtract_refs(young);
  
! 	/* Move everything reachable from outside the set into the
! 	 * reachable set (ie. gc_refs > 0).  Next, move everything
! 	 * reachable from objects in the reachable set. */
! 	gc_list_init(&reachable);
! 	move_roots(young, &reachable);
! 	move_root_reachable(&reachable);
! 
! 	/* move unreachable objects to a temporary list, new objects can be
! 	 * allocated after this point */
  	gc_list_init(&unreachable);
! 	gc_list_move(young, &unreachable);
  
! 	/* move reachable objects to next generation */
! 	gc_list_merge(&reachable, old);
  
! 	/* Move objects reachable from finalizers, we can't safely delete
! 	 * them.  Python programmers should take care not to create such
! 	 * things.  For Python finalizers means instance objects with
! 	 * __del__ methods. */
  	gc_list_init(&finalizers);
  	move_finalizers(&unreachable, &finalizers);
--- 502,536 ----
  	/* handy references */
  	young = GEN_HEAD(generation);
! 	if (generation < NUM_GENERATIONS-1)
  		old = GEN_HEAD(generation+1);
! 	else
! 		old = young;
  
  	/* Using ob_refcnt and gc_refs, calculate which objects in the
  	 * container set are reachable from outside the set (ie. have a
  	 * refcount greater than 0 when all the references within the
! 	 * set are taken into account
! 	 */
  	update_refs(young);
  	subtract_refs(young);
  
! 	/* Leave everything reachable from outside young in young, and move
! 	 * everything else (in young) to unreachable.
! 	 * NOTE:  This used to move the reachable objects into a reachable
! 	 * set instead.  But most things usually turn out to be reachable,
! 	 * so it's more efficient to move the unreachable things.
! 	 */
  	gc_list_init(&unreachable);
! 	move_unreachable(young, &unreachable);
  
! 	/* Move reachable objects to next generation. */
! 	if (young != old)
! 		gc_list_merge(young, old);
  
! 	/* All objects in unreachable are trash, but objects reachable from
! 	 * finalizers can't safely be deleted.  Python programmers should take
! 	 * care not to create such things.  For Python, finalizers means
! 	 * instance objects with __del__ methods.
! 	 */
  	gc_list_init(&finalizers);
  	move_finalizers(&unreachable, &finalizers);
***************
*** 479,483 ****
  		}
  	}
! 	/* call tp_clear on objects in the collectable set.  This will cause
  	 * the reference cycles to be broken. It may also cause some objects in
  	 * finalizers to be freed */
--- 546,550 ----
  		}
  	}
! 	/* Call tp_clear on objects in the collectable set.  This will cause
  	 * the reference cycles to be broken. It may also cause some objects in
  	 * finalizers to be freed */