garbage collection / reference cycles (cont.)
Aaron Brady
castironpi at gmail.com
Wed Mar 25 01:12:30 EDT 2009
On Mar 25, 12:11 am, Aaron Brady <castiro... at gmail.com> wrote:
> Hello,
>
> I am posting the code I mentioned on Saturday that collects garbage
> and cyclic garbage in a flattened two-step process. The code takes
> 122 lines incl. comments, with 100 in tests. It should be in a reply
> to this.
>
> My aim is a buffer-like object which can contain reference-counted
> objects. This is a preliminary Python version of the cycle detector.
> I expect to port it to C++, but the buffer object as well as object
> proxies are Python objects. The memory management strategy,
> synchronization, etc., are other modules. It is similar in principle
> to Python's own 'gc'. If it's sound, it may have some educational and
> explanatory value also.
>
> Anyway, since I received a little interest in it, I wanted to follow
> up. It is free to play with. If there's a better group to ask about
> this, or there are more scholarly, widely-used, or thorough treatments
> or implementations, I'm interested.
from collections import deque
class Globals:
to_collect= deque() # FIFO of garbage that has been decref'ed;
# Queue them instead of nested 'gc' calls
to_collect_set= set() # hash lookup of the same information
ser_gc_running= False # bool flag if the GC is running
def schedule_collect( ob ):
''' Add to FIFO- no gc call '''
if ob in Globals.to_collect_set:
return
Globals.to_collect.append( ob )
Globals.to_collect_set.add( ob )
def serial_gc( ):
''' Visit objects which have been decref'ed. If they
have left reachability, enqueue the entire cycle they
are in; this as opposed to nested 'final' calls. '''
if Globals.ser_gc_running:
return
Globals.ser_gc_running= True
while Globals.to_collect:
ob= Globals.to_collect.popleft( )
Globals.to_collect_set.remove( ob )
if ob.ref_ct== 0:
ob.final( )
else:
incycle= Globals.cycle_detect( ob )
if incycle:
# Request object to drop its referenecs;
# re-queue the object. (Potential
# infinite loop, if objects do not comply.)
for k, v in list( ob.__dict__.items( ) ):
if not isinstance( v, ManagedOb ):
continue
ob.final_attr( k )
Globals.schedule_collect( ob )
Globals.ser_gc_running= False
def cycle_detect( ob ):
''' Detect an unreachable reference cycle in the
descendants of 'ob'. Return True if so, False if
still reachable. Only called when walking the
'to_collect' queue. '''
parents= { } # disjunction( ancestors, descendants )
bfs= deque( [ ob ] )
refct_copy= { ob: ob.ref_ct }
# copy the ref_ct's to a map;
# decrement the copies on visit (breadth-first)
while bfs:
x= bfs.popleft( )
for k, v in x.__dict__.items( ):
if not isinstance( v, ManagedOb ):
continue
if v not in refct_copy:
refct_copy[ v ]= v.ref_ct
bfs.append( v )
if v not in parents:
parents[ v ]= set( )
refct_copy[ v ]-= 1
parents[ v ].add( ( x, k ) )
print( 'parents', parents )
print( 'refct_copy', refct_copy )
# any extra-cyclic references?
if refct_copy[ ob ]:
return False
# (ancestors && descendants) all zero?
# --(breadth-first)
bfs= deque( [ ob ] )
visited= set( [ ob ] )
while bfs:
x= bfs.popleft( )
for n, _ in parents[ x ]:
if n in visited:
continue
if refct_copy[ n ]:
return False
visited.add( n )
bfs.append( n )
print( 'cycle of', ob, 'found' )
return True
class ManagedOb:
def __init__( self, name ):
self.ref_ct= 1
self.name= name
def assign( self, attr, other ):
''' setattr function (basically) '''
if hasattr( self, attr ):
getattr( self, attr ).decref( )
other.incref( )
setattr( self, attr, other )
def incref( self ):
self.ref_ct+= 1
def decref( self ):
print( self, 'decref' )
self.ref_ct-= 1
# check for cycles and poss. delete
Globals.schedule_collect( self )
Globals.serial_gc( ) # trip the collector
def final_attr( self, attr ):
''' magic function. your object has left
reachability and is requested to drop its
reference to 'attr'. '''
ob= getattr( self, attr )
delattr( self, attr )
ob.decref( )
def final( self ):
for _, v in self.__dict__.items( ):
if not isinstance( v, ManagedOb ):
continue
v.decref( )
print( 'finalizing', self )
def __repr__( self ):
return '<%s,%i>'% ( self.name, self.ref_ct )
def run( externals ):
''' create six global variables, which refer
to eachother in different shapes. only keep
references to those in the 'externals' string. '''
global p, q, r, s, t, u
print( )
p= ManagedOb( 'p' ) # created with reference count 1
q= ManagedOb( 'q' )
r= ManagedOb( 'r' )
s= ManagedOb( 's' )
t= ManagedOb( 't' )
u= ManagedOb( 'u' )
p.assign( 'at', q )
q.assign( 'at', p )
q.assign( 'at1', q )
q.assign( 'at2', r )
r.assign( 'at', q )
s.assign( 'at', p )
q.assign( 'at3', t )
u.assign( 'at', t )
# release references not in 'externals'
for x in p, q, r, s, t, u:
if x.name not in externals:
x.decref()
print( 'p: (q), q: (pqrt), r: (q), s: (p), t: (), u: (t)' )
def assert_exist( *args ):
for arg in args:
print( arg, 'exists' )
assert arg.ref_ct> 0
def assert_destroyed( *args ):
for arg in args:
print( arg, 'destroyed' )
assert arg.ref_ct== 0
run( 'psu' ) # external references to 'p', 's', and 'u'
p.decref() # decref 'p'. should not free any.
assert_exist( p, q, r, s, t, u )
run( 'psu' ) # start over
p.decref()
s.decref() # decref 'p' and 's'. should decref 'q', 'r',
# and 't'. should finalize 's', 'p', 'r', 'q'.
assert_exist( t, u )
assert_destroyed( p, q, r, s )
run( 'psu' )
s.decref()
p.decref() # same result, different order
assert_exist( t, u )
assert_destroyed( p, q, r, s )
run( 'psu' )
s.decref() # should finalize 's'.
assert_exist( p, q, r, t, u )
assert_destroyed( s )
run( 'qsu' ) # more.
q.decref()
assert_exist( p, q, r, s, t, u )
run( 'qsu' )
q.decref()
s.decref()
assert_exist( t, u )
assert_destroyed( p, q, r, s )
run( 'qsu' )
s.decref()
q.decref()
assert_exist( t, u )
assert_destroyed( p, q, r, s )
run( 'qsu' )
s.decref()
assert_exist( p, q, r, t, u )
assert_destroyed( s )
More information about the Python-list
mailing list