[pypy-svn] r54587 - in pypy/branch/gc-tweak/pypy/rpython/memory: . gc gc/test test
arigo at codespeak.net
arigo at codespeak.net
Fri May 9 13:37:21 CEST 2008
Author: arigo
Date: Fri May 9 13:37:18 2008
New Revision: 54587
Modified:
pypy/branch/gc-tweak/pypy/rpython/memory/gc/base.py
pypy/branch/gc-tweak/pypy/rpython/memory/gc/generation.py
pypy/branch/gc-tweak/pypy/rpython/memory/gc/hybrid.py
pypy/branch/gc-tweak/pypy/rpython/memory/gc/semispace.py
pypy/branch/gc-tweak/pypy/rpython/memory/gc/test/test_direct.py
pypy/branch/gc-tweak/pypy/rpython/memory/lldict.py
pypy/branch/gc-tweak/pypy/rpython/memory/support.py
pypy/branch/gc-tweak/pypy/rpython/memory/test/test_lldict.py
Log:
(likely inprogress)
Kill kill kill the obscure id() logic, and replace it
with something custom but sane in each GC class.
Modified: pypy/branch/gc-tweak/pypy/rpython/memory/gc/base.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/gc/base.py (original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/gc/base.py Fri May 9 13:37:18 2008
@@ -198,110 +198,6 @@
class MovingGCBase(GCBase):
moving_gc = True
- def __init__(self):
- # WaRnInG! Putting GC objects as fields of the GC itself is
- # basically *not* working in general! When running tests with
- # the gcwrapper, there is no way they can be returned from
- # get_roots_from_llinterp(). When the whole GC goes through the
- # gctransformer, though, it works if the fields are read-only
- # (and thus only ever reference a prebuilt list or dict). These
- # prebuilt lists or dicts themselves can be mutated and point to
- # more non-prebuild GC objects; this is fine because the
- # internal GC ptr in the prebuilt list or dict is found by
- # gctypelayout and listed in addresses_of_static_ptrs.
-
- # XXX I'm not sure any more about the warning above. The fields
- # of 'self' are found by gctypelayout and added to
- # addresses_of_static_ptrs_in_nongc, so in principle they could
- # be mutated and still be found by collect().
-
- self.wr_to_objects_with_id = []
- self.object_id_dict = {}
- self.object_id_dict_ends_at = 0
-
- def id(self, ptr):
- self.disable_finalizers()
- try:
- return self._compute_id(ptr)
- finally:
- self.enable_finalizers()
-
- def _compute_id(self, ptr):
- # XXX this may explode if --no-translation-rweakref is specified
- # ----------------------------------------------------------------
- # Basic logic: the list item wr_to_objects_with_id[i] contains a
- # weakref to the object whose id is i + 1. The object_id_dict is
- # an optimization that tries to reduce the number of linear
- # searches in this list.
- # ----------------------------------------------------------------
- # Invariant: if object_id_dict_ends_at >= 0, then object_id_dict
- # contains all pairs {address: id}, for the addresses
- # of all objects that are the targets of the weakrefs of the
- # following slice: wr_to_objects_with_id[:object_id_dict_ends_at].
- # ----------------------------------------------------------------
- # Essential: as long as notify_objects_just_moved() is not called,
- # we assume that the objects' addresses did not change. We also
- # assume that the address of a live object cannot be reused for
- # another object without an intervening notify_objects_just_moved()
- # call, but this could be fixed easily if needed.
- # ----------------------------------------------------------------
- # First check the dictionary
- i = self.object_id_dict_ends_at
- if i < 0:
- self.object_id_dict.clear() # dictionary invalid
- self.object_id_dict_ends_at = 0
- i = 0
- else:
- adr = llmemory.cast_ptr_to_adr(ptr)
- try:
- i = self.object_id_dict[adr]
- except KeyError:
- pass
- else:
- # double-check that the answer we got is correct
- lst = self.wr_to_objects_with_id
- target = llmemory.weakref_deref(llmemory.GCREF, lst[i])
- ll_assert(target == ptr, "bogus object_id_dict")
- return i + 1 # found via the dict
- # Walk the tail of the list, where entries are not also in the dict
- lst = self.wr_to_objects_with_id
- end = len(lst)
- freeentry = -1
- while i < end:
- target = llmemory.weakref_deref(llmemory.GCREF, lst[i])
- if not target:
- freeentry = i
- else:
- ll_assert(self.get_type_id(llmemory.cast_ptr_to_adr(target))
- > 0, "bogus weakref in compute_id()")
- # record this entry in the dict
- adr = llmemory.cast_ptr_to_adr(target)
- self.object_id_dict[adr] = i
- if target == ptr:
- break # found
- i += 1
- else:
- # not found
- wr = llmemory.weakref_create(ptr)
- if freeentry < 0:
- ll_assert(end == len(lst), "unexpected lst growth in gc_id")
- i = end
- lst.append(wr)
- else:
- i = freeentry # reuse the id() of a dead object
- lst[i] = wr
- adr = llmemory.cast_ptr_to_adr(ptr)
- self.object_id_dict[adr] = i
- # all entries up to and including index 'i' are now valid in the dict
- # unless a collection occurred while we were working, in which case
- # the object_id_dict is bogus anyway
- if self.object_id_dict_ends_at >= 0:
- self.object_id_dict_ends_at = i + 1
- return i + 1 # this produces id() values 1, 2, 3, 4...
-
- def notify_objects_just_moved(self):
- self.object_id_dict_ends_at = -1
-
def choose_gc_from_config(config):
"""Return a (GCClass, GC_PARAMS) from the given config object.
Modified: pypy/branch/gc-tweak/pypy/rpython/memory/gc/generation.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/gc/generation.py (original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/gc/generation.py Fri May 9 13:37:18 2008
@@ -62,6 +62,7 @@
# it lists exactly the old and static objects whose
# GCFLAG_NO_YOUNG_PTRS bit is not set.
self.young_objects_with_weakrefs = self.AddressStack()
+ self.young_objects_with_id = self.AddressDict()
self.reset_nursery()
# compute the constant lower bounds for the attributes
@@ -249,6 +250,7 @@
def semispace_collect(self, size_changing=False):
self.reset_young_gcflags() # we are doing a full collection anyway
self.weakrefs_grow_older()
+ self.ids_grow_older()
self.reset_nursery()
if DEBUG_PRINT:
llop.debug_print(lltype.Void, "major collect, size changing", size_changing)
@@ -343,7 +345,8 @@
# GCFLAG_NO_YOUNG_PTRS set again by trace_and_drag_out_of_nursery
if self.young_objects_with_weakrefs.non_empty():
self.invalidate_young_weakrefs()
- self.notify_objects_just_moved()
+ if self.young_objects_with_id.length() > 0:
+ self.update_young_objects_with_id()
# mark the nursery as free and fill it with zeroes again
llarena.arena_reset(self.nursery, self.nursery_size, True)
if DEBUG_PRINT:
@@ -454,6 +457,28 @@
# overridden by HybridGC
return (self.header(obj).tid & GCFLAG_EXTERNAL) != 0
+ def _compute_id(self, obj):
+ if self.is_in_nursery(obj):
+ result = self.young_objects_with_id.get(obj)
+ if not result:
+ result = self._next_id()
+ self.young_objects_with_id.setitem(obj, result)
+ return result
+ else:
+ return SemiSpaceGC._compute_id(self, obj)
+
+ def update_young_objects_with_id(self):
+ self.young_objects_with_id.foreach(self._update_object_id,
+ self.objects_with_id)
+ self.young_objects_with_id.clear()
+
+ def ids_grow_older(self):
+ self.young_objects_with_id.foreach(self._id_grow_older, None)
+ self.young_objects_with_id.clear()
+
+ def _id_grow_older(self, obj, id, ignored):
+ self.objects_with_id.setitem(obj, id)
+
def debug_check_object(self, obj):
"""Check the invariants about 'obj' that should be true
between collections."""
Modified: pypy/branch/gc-tweak/pypy/rpython/memory/gc/hybrid.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/gc/hybrid.py (original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/gc/hybrid.py Fri May 9 13:37:18 2008
@@ -466,6 +466,22 @@
dead_size, "bytes in",
dead_count, "objs")
+ def _compute_id_for_external(self, obj):
+ # the base classes make the assumption that all external objects
+ # have an id equal to their address. This is wrong if the object
+ # is a generation 3 rawmalloced object that initially lived in
+ # the semispaces.
+ if self.is_last_generation(obj):
+ # in this case, we still need to check if the object had its
+ # id taken before. If not, we can use its address as its id.
+ return self.objects_with_id.get(obj, obj)
+ else:
+ # a generation 2 external object was never non-external in
+ # the past, so it cannot be listed in self.objects_with_id.
+ return obj
+ # XXX a possible optimization would be to use three dicts, one
+ # for each generation, instead of mixing gen2 and gen3 objects.
+
def debug_check_object(self, obj):
"""Check the invariants about 'obj' that should be true
between collections."""
Modified: pypy/branch/gc-tweak/pypy/rpython/memory/gc/semispace.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/gc/semispace.py (original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/gc/semispace.py Fri May 9 13:37:18 2008
@@ -3,6 +3,7 @@
from pypy.rpython.lltypesystem.llmemory import NULL, raw_malloc_usage
from pypy.rpython.memory.support import DEFAULT_CHUNK_SIZE
from pypy.rpython.memory.support import get_address_stack, get_address_deque
+from pypy.rpython.memory.support import AddressDict
from pypy.rpython.memory.gcheader import GCHeaderBuilder
from pypy.rpython.lltypesystem import lltype, llmemory, llarena
from pypy.rlib.objectmodel import free_non_gc_object
@@ -50,8 +51,11 @@
self.gcheaderbuilder = GCHeaderBuilder(self.HDR)
self.AddressStack = get_address_stack(chunk_size)
self.AddressDeque = get_address_deque(chunk_size)
+ self.AddressDict = AddressDict
self.finalizer_lock_count = 0
self.red_zone = 0
+ self.id_free_list = self.AddressStack()
+ self.next_free_id = 1
def setup(self):
if DEBUG_PRINT:
@@ -66,6 +70,7 @@
self.objects_with_finalizers = self.AddressDeque()
self.run_finalizers = self.AddressDeque()
self.objects_with_weakrefs = self.AddressStack()
+ self.objects_with_id = self.AddressDict()
def disable_finalizers(self):
self.finalizer_lock_count += 1
@@ -235,9 +240,9 @@
scan = self.deal_with_objects_with_finalizers(scan)
if self.objects_with_weakrefs.non_empty():
self.invalidate_weakrefs()
+ self.update_objects_with_id()
self.finished_full_collect()
self.debug_check_consistency()
- self.notify_objects_just_moved()
if not size_changing:
llarena.arena_reset(fromspace, self.space_size, True)
self.record_red_zone()
@@ -573,6 +578,62 @@
finally:
self.finalizer_lock_count -= 1
+ def id(self, ptr):
+ obj = llmemory.cast_ptr_to_adr(ptr)
+ if self.header(obj).tid & GCFLAG_EXTERNAL:
+ result = self._compute_id_for_external(obj)
+ else:
+ result = self._compute_id(obj)
+ return llmemory.cast_adr_to_int(result)
+
+ def _next_id(self):
+ # return an id not currently in use (as an address instead of an int)
+ if self.id_free_list.non_empty():
+ result = self.id_free_list.pop() # reuse a dead id
+ else:
+ # make up a fresh id number
+ result = llmemory.cast_int_to_adr(self.next_free_id)
+ self.next_free_id += 2 # only odd numbers, to make lltype
+ # and llmemory happy and to avoid
+ # clashes with real addresses
+ return result
+
+ def _compute_id(self, obj):
+ # look if the object is listed in objects_with_id
+ result = self.objects_with_id.get(obj)
+ if not result:
+ result = self._next_id()
+ self.objects_with_id.setitem(obj, result)
+ return result
+
+ def _compute_id_for_external(self, obj):
+ # For prebuilt objects, we can simply return their address.
+ # This method is overriden by the HybridGC.
+ return obj
+
+ def update_objects_with_id(self):
+ old = self.objects_with_id
+ new_objects_with_id = self.AddressDict(old.length())
+ old.foreach(self._update_object_id_FAST, new_objects_with_id)
+ old.delete()
+ self.objects_with_id = new_objects_with_id
+
+ def _update_object_id(self, obj, id, new_objects_with_id):
+ # safe version (used by subclasses)
+ if self.surviving(obj):
+ newobj = self.get_forwarding_address(obj)
+ new_objects_with_id.setitem(newobj, id)
+ else:
+ self.id_free_list.append(id)
+
+ def _update_object_id_FAST(self, obj, id, new_objects_with_id):
+ # unsafe version, assumes that the new_objects_with_id is large enough
+ if self.surviving(obj):
+ newobj = self.get_forwarding_address(obj)
+ new_objects_with_id.insertclean(newobj, id)
+ else:
+ self.id_free_list.append(id)
+
def debug_check_object(self, obj):
"""Check the invariants about 'obj' that should be true
between collections."""
Modified: pypy/branch/gc-tweak/pypy/rpython/memory/gc/test/test_direct.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/gc/test/test_direct.py (original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/gc/test/test_direct.py Fri May 9 13:37:18 2008
@@ -238,6 +238,32 @@
self.gc.collect()
verify()
+ def test_id(self):
+ ids = {}
+ def allocate_bunch(count=50):
+ base = len(self.stackroots)
+ for i in range(count):
+ p = self.malloc(S)
+ self.stackroots.append(p)
+ for i in range(count):
+ j = base + (i*1291) % count
+ pid = self.gc.id(self.stackroots[j])
+ assert isinstance(pid, int)
+ ids[j] = pid
+ def verify():
+ for j, expected in ids.items():
+ assert self.gc.id(self.stackroots[j]) == expected
+ allocate_bunch(5)
+ verify()
+ allocate_bunch(75)
+ verify()
+ allocate_bunch(5)
+ verify()
+ self.gc.collect()
+ verify()
+ self.gc.collect()
+ verify()
+
class TestSemiSpaceGC(DirectGCTest):
from pypy.rpython.memory.gc.semispace import SemiSpaceGC as GCClass
Modified: pypy/branch/gc-tweak/pypy/rpython/memory/lldict.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/lldict.py (original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/lldict.py Fri May 9 13:37:18 2008
@@ -57,6 +57,9 @@
def dict_add(d, key):
rdict.ll_dict_setitem(d, key, llmemory.NULL)
+def dict_insertclean(d, key, value):
+ rdict.ll_dict_insertclean(d, key, value, _hash(key))
+
def dict_foreach(d, callback, arg):
entries = d.entries
i = len(entries) - 1
@@ -87,6 +90,8 @@
'setitem': rdict.ll_dict_setitem,
'get': dict_get,
'add': dict_add,
+ 'insertclean': dict_insertclean,
+ 'clear': rdict.ll_clear,
'foreach': dict_foreach,
'keyhash': dict_keyhash,
'keyeq': None,
Modified: pypy/branch/gc-tweak/pypy/rpython/memory/support.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/support.py (original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/support.py Fri May 9 13:37:18 2008
@@ -228,6 +228,9 @@
def _key(self, addr):
return addr._fixup().ptr._obj
+ def _wrapkey(self, obj):
+ return llmemory.cast_ptr_to_adr(obj._as_ptr())
+
def delete(self):
pass
@@ -244,14 +247,23 @@
assert keyaddr
self.data[self._key(keyaddr)] = valueaddr
+ def insertclean(self, keyaddr, valueaddr):
+ assert keyaddr
+ key = self._key(keyaddr)
+ assert key not in self.data
+ self.data[key] = valueaddr
+
def add(self, keyaddr):
self.setitem(keyaddr, llmemory.NULL)
+ def clear(self):
+ self.data.clear()
+
def foreach(self, callback, arg):
"""Invoke 'callback(key, value, arg)' for all items in the dict.
Typically, 'callback' is a bound method and 'arg' can be None."""
for key, value in self.data.iteritems():
- callback(key, value, arg)
+ callback(self._wrapkey(key), value, arg)
def copy_and_update(dict, surviving, updated_address):
Modified: pypy/branch/gc-tweak/pypy/rpython/memory/test/test_lldict.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/test/test_lldict.py (original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/test/test_lldict.py Fri May 9 13:37:18 2008
@@ -42,9 +42,9 @@
assert lldict.alloc_count == 0
def test_copy_and_update(self):
- d = lldict.newdict()
+ d = lldict.newdict(3)
d.setitem(intaddr(41), intaddr(44))
- d.setitem(intaddr(42), intaddr(45))
+ d.insertclean(intaddr(42), intaddr(45))
d.setitem(intaddr(43), intaddr(46))
def surviving(key):
return key.intval != 41
@@ -60,6 +60,15 @@
d2.delete()
assert lldict.alloc_count == 0
+ def test_clear(self):
+ d = lldict.newdict()
+ d.setitem(intaddr(41), intaddr(42))
+ d.clear()
+ assert d.length() == 0
+ assert not d.contains(intaddr(41))
+ d.delete()
+ assert lldict.alloc_count == 0
+
def test_random(self):
for i in range(8) + range(8, 80, 10):
examples = {}
More information about the Pypy-commit
mailing list