[pypy-svn] r71472 - in pypy/trunk/pypy: rpython/lltypesystem rpython/memory/gc rpython/memory/gc/test translator/c/test
arigo at codespeak.net
arigo at codespeak.net
Wed Feb 24 21:17:10 CET 2010
Author: arigo
Date: Wed Feb 24 21:17:05 2010
New Revision: 71472
Modified:
pypy/trunk/pypy/rpython/lltypesystem/llarena.py
pypy/trunk/pypy/rpython/lltypesystem/llmemory.py
pypy/trunk/pypy/rpython/memory/gc/generation.py
pypy/trunk/pypy/rpython/memory/gc/hybrid.py
pypy/trunk/pypy/rpython/memory/gc/semispace.py
pypy/trunk/pypy/rpython/memory/gc/test/test_direct.py
pypy/trunk/pypy/translator/c/test/test_newgc.py
Log:
Merge the gc-better-hash branch.
It improves the identityhash of young objects -- they are now not just
the address, because the address of young objects is always within the
small nursery; they are the address plus a number that changes at every
nursery collection.
Also, it contains a bit of work to let the identityhash logic be tested
by test_direct.py, at least on SemiSpace and Generation GC -- there is
still an issue with testing it on the Hybrid GC. (Of course, it's still
fully tested by test_newgc.py.)
Modified: pypy/trunk/pypy/rpython/lltypesystem/llarena.py
==============================================================================
--- pypy/trunk/pypy/rpython/lltypesystem/llarena.py (original)
+++ pypy/trunk/pypy/rpython/lltypesystem/llarena.py Wed Feb 24 21:17:05 2010
@@ -227,7 +227,7 @@
return self.arena._getid() + self.offset
-def _getfakearenaaddress(addr):
+def getfakearenaaddress(addr):
"""Logic to handle test_replace_object_with_stub()."""
if isinstance(addr, fakearenaaddress):
return addr
@@ -306,7 +306,7 @@
* 1: clear, optimized for a very large area of memory
* 2: clear, optimized for a small or medium area of memory
"""
- arena_addr = _getfakearenaaddress(arena_addr)
+ arena_addr = getfakearenaaddress(arena_addr)
arena_addr.arena.reset(zero, arena_addr.offset, size)
def arena_reserve(addr, size, check_alignment=True):
@@ -315,7 +315,7 @@
overlap. The size must be symbolic; in non-translated version
this is used to know what type of lltype object to allocate."""
from pypy.rpython.memory.lltypelayout import memory_alignment
- addr = _getfakearenaaddress(addr)
+ addr = getfakearenaaddress(addr)
if check_alignment and (addr.offset & (memory_alignment-1)) != 0:
raise ArenaError("object at offset %d would not be correctly aligned"
% (addr.offset,))
@@ -324,7 +324,7 @@
def arena_shrink_obj(addr, newsize):
""" Mark object as shorter than it was
"""
- addr = _getfakearenaaddress(addr)
+ addr = getfakearenaaddress(addr)
addr.arena.shrink_obj(addr.offset, newsize)
def round_up_for_allocation(size, minsize=0):
@@ -505,3 +505,11 @@
register_external(arena_new_view, [llmemory.Address], llmemory.Address,
'll_arena.arena_new_view', llimpl=llimpl_arena_new_view,
llfakeimpl=arena_new_view, sandboxsafe=True)
+
+def llimpl_getfakearenaaddress(addr):
+ return addr
+register_external(getfakearenaaddress, [llmemory.Address], llmemory.Address,
+ 'll_arena.getfakearenaaddress',
+ llimpl=llimpl_getfakearenaaddress,
+ llfakeimpl=getfakearenaaddress,
+ sandboxsafe=True)
Modified: pypy/trunk/pypy/rpython/lltypesystem/llmemory.py
==============================================================================
--- pypy/trunk/pypy/rpython/lltypesystem/llmemory.py (original)
+++ pypy/trunk/pypy/rpython/lltypesystem/llmemory.py Wed Feb 24 21:17:05 2010
@@ -490,7 +490,7 @@
if self.ptr is not None and self.ptr._was_freed():
# hack to support llarena.test_replace_object_with_stub()
from pypy.rpython.lltypesystem import llarena
- return llarena._getfakearenaaddress(self)
+ return llarena.getfakearenaaddress(self)
else:
return self
Modified: pypy/trunk/pypy/rpython/memory/gc/generation.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gc/generation.py (original)
+++ pypy/trunk/pypy/rpython/memory/gc/generation.py Wed Feb 24 21:17:05 2010
@@ -1,13 +1,14 @@
import sys
from pypy.rpython.memory.gc.semispace import SemiSpaceGC
from pypy.rpython.memory.gc.semispace import GCFLAG_EXTERNAL, GCFLAG_FORWARDED
-from pypy.rpython.memory.gc.semispace import GCFLAG_HASHTAKEN
+from pypy.rpython.memory.gc.semispace import GC_HASH_TAKEN_ADDR
from pypy.rpython.lltypesystem.llmemory import NULL, raw_malloc_usage
from pypy.rpython.lltypesystem import lltype, llmemory, llarena
from pypy.rpython.memory.support import DEFAULT_CHUNK_SIZE
from pypy.rlib.objectmodel import free_non_gc_object
from pypy.rlib.debug import ll_assert
from pypy.rlib.debug import debug_print, debug_start, debug_stop
+from pypy.rlib.rarithmetic import intmask
from pypy.rpython.lltypesystem.lloperation import llop
# The following flag is never set on young objects, i.e. the ones living
@@ -43,6 +44,8 @@
'min_nursery_size': 48*1024,
'auto_nursery_size': True}
+ nursery_hash_base = -1
+
def __init__(self, config, chunk_size=DEFAULT_CHUNK_SIZE,
nursery_size=128,
min_nursery_size=128,
@@ -234,7 +237,7 @@
GCFLAGS_FOR_NEW_YOUNG_OBJECTS = 0 # NO_YOUNG_PTRS never set on young objs
GCFLAGS_FOR_NEW_EXTERNAL_OBJECTS = (GCFLAG_EXTERNAL | GCFLAG_FORWARDED |
GCFLAG_NO_YOUNG_PTRS |
- GCFLAG_HASHTAKEN)
+ GC_HASH_TAKEN_ADDR)
# ____________________________________________________________
# Support code for full collections
@@ -355,8 +358,21 @@
self.nursery_top = self.nursery + self.nursery_size
self.free = self.nursery_top
self.nursery_free = self.nursery
+ # at this point we know that the nursery is empty
+ self.change_nursery_hash_base()
return self.nursery_free
+ def change_nursery_hash_base(self):
+ # The following should be enough to ensure that young objects
+ # tend to always get a different hash. It also makes sure that
+ # nursery_hash_base is not a multiple of 4, to avoid collisions
+ # with the hash of non-young objects.
+ hash_base = self.nursery_hash_base
+ hash_base += self.nursery_size - 1
+ if (hash_base & 3) == 0:
+ hash_base -= 1
+ self.nursery_hash_base = intmask(hash_base)
+
# NB. we can use self.copy() to move objects out of the nursery,
# but only if the object was really in the nursery.
@@ -534,6 +550,9 @@
def _id_grow_older(self, obj, id, ignored):
self.objects_with_id.setitem(obj, id)
+ def _compute_current_nursery_hash(self, obj):
+ return intmask(llmemory.cast_adr_to_int(obj) + self.nursery_hash_base)
+
def heap_stats_walk_roots(self):
self.last_generation_root_objects.foreach(
self._track_heap_ext, None)
Modified: pypy/trunk/pypy/rpython/memory/gc/hybrid.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gc/hybrid.py (original)
+++ pypy/trunk/pypy/rpython/memory/gc/hybrid.py Wed Feb 24 21:17:05 2010
@@ -2,9 +2,11 @@
from pypy.rpython.memory.gc.semispace import SemiSpaceGC
from pypy.rpython.memory.gc.generation import GenerationGC
from pypy.rpython.memory.gc.semispace import GCFLAG_EXTERNAL, GCFLAG_FORWARDED
-from pypy.rpython.memory.gc.semispace import GCFLAG_HASHTAKEN, GCFLAG_HASHFIELD
+from pypy.rpython.memory.gc.semispace import GCFLAG_HASHMASK
from pypy.rpython.memory.gc.generation import GCFLAG_NO_YOUNG_PTRS
from pypy.rpython.memory.gc.generation import GCFLAG_NO_HEAP_PTRS
+from pypy.rpython.memory.gc.semispace import GC_HASH_TAKEN_ADDR
+from pypy.rpython.memory.gc.semispace import GC_HASH_HASFIELD
from pypy.rpython.lltypesystem import lltype, llmemory, llarena
from pypy.rpython.lltypesystem.llmemory import raw_malloc_usage
from pypy.rpython.lltypesystem.lloperation import llop
@@ -359,7 +361,7 @@
# it's not an issue.
totalsize = self.size_gc_header() + objsize
tid = self.header(obj).tid
- if tid & (GCFLAG_HASHTAKEN|GCFLAG_HASHFIELD):
+ if tid & GCFLAG_HASHMASK:
totalsize_incl_hash = totalsize + llmemory.sizeof(lltype.Signed)
else:
totalsize_incl_hash = totalsize
@@ -370,14 +372,10 @@
self._nonmoving_copy_size += raw_malloc_usage(totalsize)
llmemory.raw_memcopy(obj - self.size_gc_header(), newaddr, totalsize)
- # check if we need to write a hash value at the end of the new obj
- if tid & (GCFLAG_HASHTAKEN|GCFLAG_HASHFIELD):
- if tid & GCFLAG_HASHFIELD:
- hash = (obj + objsize).signed[0]
- else:
- hash = llmemory.cast_adr_to_int(obj)
- tid |= GCFLAG_HASHFIELD
+ if tid & GCFLAG_HASHMASK:
+ hash = self._get_object_hash(obj, objsize, tid)
(newaddr + totalsize).signed[0] = hash
+ tid |= GC_HASH_HASFIELD
#
# GCFLAG_UNVISITED is not set
# GCFLAG_NO_HEAP_PTRS is not set either, conservatively. It may be
@@ -544,8 +542,8 @@
tid = self.header(obj).tid
ll_assert(bool(tid & GCFLAG_EXTERNAL),
"gen2: missing GCFLAG_EXTERNAL")
- ll_assert(bool(tid & GCFLAG_HASHTAKEN),
- "gen2: missing GCFLAG_HASHTAKEN")
+ ll_assert(bool(tid & GC_HASH_TAKEN_ADDR),
+ "gen2: missing GC_HASH_TAKEN_ADDR")
ll_assert(bool(tid & GCFLAG_UNVISITED),
"gen2: missing GCFLAG_UNVISITED")
ll_assert((tid & GCFLAG_AGE_MASK) < GCFLAG_AGE_MAX,
@@ -554,8 +552,8 @@
tid = self.header(obj).tid
ll_assert(bool(tid & GCFLAG_EXTERNAL),
"gen3: missing GCFLAG_EXTERNAL")
- ll_assert(bool(tid & GCFLAG_HASHTAKEN),
- "gen3: missing GCFLAG_HASHTAKEN")
+ ll_assert(bool(tid & GC_HASH_TAKEN_ADDR),
+ "gen3: missing GC_HASH_TAKEN_ADDR")
ll_assert(not (tid & GCFLAG_UNVISITED),
"gen3: unexpected GCFLAG_UNVISITED")
ll_assert((tid & GCFLAG_AGE_MASK) == GCFLAG_AGE_MAX,
Modified: pypy/trunk/pypy/rpython/memory/gc/semispace.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gc/semispace.py (original)
+++ pypy/trunk/pypy/rpython/memory/gc/semispace.py Wed Feb 24 21:17:05 2010
@@ -21,11 +21,22 @@
# either immortal objects or (for HybridGC) externally raw_malloc'ed
GCFLAG_EXTERNAL = first_gcflag << 1
GCFLAG_FINALIZATION_ORDERING = first_gcflag << 2
-GCFLAG_HASHTAKEN = first_gcflag << 3 # someone already asked for the hash
-GCFLAG_HASHFIELD = first_gcflag << 4 # we have an extra hash field
+
+_GCFLAG_HASH_BASE = first_gcflag << 3
+GCFLAG_HASHMASK = _GCFLAG_HASH_BASE * 0x3 # also consumes 'first_gcflag << 4'
+# the two bits in GCFLAG_HASHMASK can have one of the following values:
+# - nobody ever asked for the hash of the object
+GC_HASH_NOTTAKEN = _GCFLAG_HASH_BASE * 0x0
+# - someone asked, and we gave the address of the object
+GC_HASH_TAKEN_ADDR = _GCFLAG_HASH_BASE * 0x1
+# - someone asked, and we gave the address plus 'nursery_hash_base'
+GC_HASH_TAKEN_NURS = _GCFLAG_HASH_BASE * 0x2
+# - we have our own extra field to store the hash
+GC_HASH_HASFIELD = _GCFLAG_HASH_BASE * 0x3
memoryError = MemoryError()
+
class SemiSpaceGC(MovingGCBase):
_alloc_flavor_ = "raw"
inline_simple_malloc = True
@@ -35,9 +46,9 @@
HDR = lltype.Struct('header', ('tid', lltype.Signed)) # XXX or rffi.INT?
typeid_is_in_field = 'tid'
- withhash_flag_is_in_field = 'tid', GCFLAG_HASHFIELD
- # ^^^ all prebuilt objects have GCFLAG_HASHTAKEN, but only some have
- # GCFLAG_HASHFIELD (and then they are one word longer).
+ withhash_flag_is_in_field = 'tid', _GCFLAG_HASH_BASE * 0x2
+ # ^^^ prebuilt objects either have GC_HASH_TAKEN_ADDR or they
+ # have GC_HASH_HASFIELD (and then they are one word longer).
FORWARDSTUB = lltype.GcStruct('forwarding_stub',
('forw', llmemory.Address))
FORWARDSTUBPTR = lltype.Ptr(FORWARDSTUB)
@@ -316,7 +327,7 @@
def get_size_incl_hash(self, obj):
size = self.get_size(obj)
hdr = self.header(obj)
- if hdr.tid & GCFLAG_HASHFIELD:
+ if (hdr.tid & GCFLAG_HASHMASK) == GC_HASH_HASFIELD:
size += llmemory.sizeof(lltype.Signed)
return size
@@ -351,22 +362,31 @@
self.set_forwarding_address(obj, newobj, objsize)
return newobj
+ def _get_object_hash(self, obj, objsize, tid):
+ # Returns the hash of the object, which must not be GC_HASH_NOTTAKEN.
+ gc_hash = tid & GCFLAG_HASHMASK
+ if gc_hash == GC_HASH_HASFIELD:
+ obj = llarena.getfakearenaaddress(obj)
+ return (obj + objsize).signed[0]
+ elif gc_hash == GC_HASH_TAKEN_ADDR:
+ return llmemory.cast_adr_to_int(obj)
+ elif gc_hash == GC_HASH_TAKEN_NURS:
+ return self._compute_current_nursery_hash(obj)
+ else:
+ assert 0, "gc_hash == GC_HASH_NOTTAKEN"
+
def _make_a_copy_with_tid(self, obj, objsize, tid):
totalsize = self.size_gc_header() + objsize
newaddr = self.free
llarena.arena_reserve(newaddr, totalsize)
raw_memcopy(obj - self.size_gc_header(), newaddr, totalsize)
- #
- # check if we need to write a hash value at the end of the new obj
- if tid & (GCFLAG_HASHTAKEN|GCFLAG_HASHFIELD):
- if tid & GCFLAG_HASHFIELD:
- hash = (obj + objsize).signed[0]
- else:
- hash = llmemory.cast_adr_to_int(obj)
- tid |= GCFLAG_HASHFIELD
+ if tid & GCFLAG_HASHMASK:
+ hash = self._get_object_hash(obj, objsize, tid)
+ llarena.arena_reserve(newaddr + totalsize,
+ llmemory.sizeof(lltype.Signed))
(newaddr + totalsize).signed[0] = hash
+ tid |= GC_HASH_HASFIELD
totalsize += llmemory.sizeof(lltype.Signed)
- #
self.free += totalsize
newhdr = llmemory.cast_adr_to_ptr(newaddr, lltype.Ptr(self.HDR))
newhdr.tid = tid
@@ -446,7 +466,7 @@
def init_gc_object_immortal(self, addr, typeid16, flags=0):
hdr = llmemory.cast_adr_to_ptr(addr, lltype.Ptr(self.HDR))
- flags |= GCFLAG_EXTERNAL | GCFLAG_FORWARDED | GCFLAG_HASHTAKEN
+ flags |= GCFLAG_EXTERNAL | GCFLAG_FORWARDED | GC_HASH_TAKEN_ADDR
hdr.tid = self.combine(typeid16, flags)
# immortal objects always have GCFLAG_FORWARDED set;
# see get_forwarding_address().
@@ -611,36 +631,46 @@
STATISTICS_NUMBERS = 0
+ def is_in_nursery(self, addr):
+ # overridden in generation.py.
+ return False
+
+ def _compute_current_nursery_hash(self, obj):
+ # overridden in generation.py.
+ raise AssertionError("should not be called")
+
def identityhash(self, gcobj):
- # The following code should run at most twice.
+ # The following loop should run at most twice.
while 1:
obj = llmemory.cast_ptr_to_adr(gcobj)
hdr = self.header(obj)
- #
- if hdr.tid & GCFLAG_HASHFIELD: # the hash is in a field at the end
- obj += self.get_size(obj)
- return obj.signed[0]
- #
- if not (hdr.tid & GCFLAG_HASHTAKEN):
- # It's the first time we ask for a hash, and it's not an
- # external object. Shrink the top of space by the extra
- # hash word that will be needed after a collect.
- shrunk_top = self.top_of_space - llmemory.sizeof(lltype.Signed)
- if shrunk_top < self.free:
- # Cannot shrink! Do a collection, asking for at least
- # one word of free space, and try again. May raise
- # MemoryError. Obscure: not called directly, but
- # across an llop, to make sure that there is the
- # correct push_roots/pop_roots around the call...
- llop.gc_obtain_free_space(llmemory.Address,
- llmemory.sizeof(lltype.Signed))
- continue
- # Now we can have side-effects: set GCFLAG_HASHTAKEN
- # and lower the top of space.
+ if hdr.tid & GCFLAG_HASHMASK:
+ break
+ # It's the first time we ask for a hash, and it's not an
+ # external object. Shrink the top of space by the extra
+ # hash word that will be needed after a collect.
+ shrunk_top = self.top_of_space - llmemory.sizeof(lltype.Signed)
+ if shrunk_top < self.free:
+ # Cannot shrink! Do a collection, asking for at least
+ # one word of free space, and try again. May raise
+ # MemoryError. Obscure: not called directly, but
+ # across an llop, to make sure that there is the
+ # correct push_roots/pop_roots around the call...
+ llop.gc_obtain_free_space(llmemory.Address,
+ llmemory.sizeof(lltype.Signed))
+ continue
+ else:
+ # Now we can have side-effects: lower the top of space
+ # and set one of the GC_HASH_TAKEN_xxx flags.
self.top_of_space = shrunk_top
- hdr.tid |= GCFLAG_HASHTAKEN
- #
- return llmemory.cast_adr_to_int(obj) # direct case
+ if self.is_in_nursery(obj):
+ hdr.tid |= GC_HASH_TAKEN_NURS
+ else:
+ hdr.tid |= GC_HASH_TAKEN_ADDR
+ break
+ # Now we can return the result
+ objsize = self.get_size(obj)
+ return self._get_object_hash(obj, objsize, hdr.tid)
def track_heap_parent(self, obj, parent):
addr = obj.address[0]
Modified: pypy/trunk/pypy/rpython/memory/gc/test/test_direct.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gc/test/test_direct.py (original)
+++ pypy/trunk/pypy/rpython/memory/gc/test/test_direct.py Wed Feb 24 21:17:05 2010
@@ -264,6 +264,51 @@
self.gc.collect()
verify()
+ def test_identityhash(self):
+ # a "does not crash" kind of test
+ p_const = lltype.malloc(S, immortal=True)
+ self.consider_constant(p_const)
+ # (1) p is in the nursery
+ self.gc.collect()
+ p = self.malloc(S)
+ hash = self.gc.identityhash(p)
+ print hash
+ assert isinstance(hash, (int, long))
+ assert hash == self.gc.identityhash(p)
+ self.stackroots.append(p)
+ for i in range(6):
+ self.gc.collect()
+ assert hash == self.gc.identityhash(self.stackroots[-1])
+ self.stackroots.pop()
+ # (2) p is an older object
+ p = self.malloc(S)
+ self.stackroots.append(p)
+ self.gc.collect()
+ hash = self.gc.identityhash(self.stackroots[-1])
+ print hash
+ assert isinstance(hash, (int, long))
+ for i in range(6):
+ self.gc.collect()
+ assert hash == self.gc.identityhash(self.stackroots[-1])
+ self.stackroots.pop()
+ # (3) p is a gen3 object (for hybrid)
+ p = self.malloc(S)
+ self.stackroots.append(p)
+ for i in range(6):
+ self.gc.collect()
+ hash = self.gc.identityhash(self.stackroots[-1])
+ print hash
+ assert isinstance(hash, (int, long))
+ for i in range(2):
+ self.gc.collect()
+ assert hash == self.gc.identityhash(self.stackroots[-1])
+ self.stackroots.pop()
+ # (4) p is a prebuilt object
+ hash = self.gc.identityhash(p_const)
+ print hash
+ assert isinstance(hash, (int, long))
+ assert hash == self.gc.identityhash(p_const)
+
class TestSemiSpaceGC(DirectGCTest):
from pypy.rpython.memory.gc.semispace import SemiSpaceGC as GCClass
@@ -376,6 +421,9 @@
assert calls == [('semispace_collect', True)]
calls = []
+ def test_identityhash(self):
+ py.test.skip("does not support raw_mallocs(sizeof(S)+sizeof(hash))")
+
class TestMarkCompactGC(DirectGCTest):
from pypy.rpython.memory.gc.markcompact import MarkCompactGC as GCClass
Modified: pypy/trunk/pypy/translator/c/test/test_newgc.py
==============================================================================
--- pypy/trunk/pypy/translator/c/test/test_newgc.py (original)
+++ pypy/trunk/pypy/translator/c/test/test_newgc.py Wed Feb 24 21:17:05 2010
@@ -1019,6 +1019,30 @@
res = self.run('string_builder_over_allocation')
assert res[1000] == 'y'
+ def define_nursery_hash_base(cls):
+ from pypy.rlib.objectmodel import compute_identity_hash
+ class A:
+ pass
+ def fn():
+ objects = []
+ hashes = []
+ for i in range(200):
+ rgc.collect(0) # nursery-only collection, if possible
+ obj = A()
+ objects.append(obj)
+ hashes.append(compute_identity_hash(obj))
+ unique = {}
+ for i in range(len(objects)):
+ assert compute_identity_hash(objects[i]) == hashes[i]
+ unique[hashes[i]] = None
+ return len(unique)
+ return fn
+
+ def test_nursery_hash_base(self):
+ res = self.run('nursery_hash_base')
+ assert res >= 195
+
+
class TestGenerationalGC(TestSemiSpaceGC):
gcpolicy = "generation"
should_be_moving = True
More information about the Pypy-commit
mailing list