[pypy-svn] r71452 - pypy/branch/gc-better-hash/pypy/rpython/memory/gc
arigo at codespeak.net
arigo at codespeak.net
Wed Feb 24 16:55:48 CET 2010
Author: arigo
Date: Wed Feb 24 16:55:46 2010
New Revision: 71452
Modified:
pypy/branch/gc-better-hash/pypy/rpython/memory/gc/generation.py
pypy/branch/gc-better-hash/pypy/rpython/memory/gc/hybrid.py
pypy/branch/gc-better-hash/pypy/rpython/memory/gc/semispace.py
Log:
Write all the code needed around the result I'm looking for.
Right now it always says 0 for the hash of objects in the
nursery. The test pass, though, which shows that it does not
get confused: only objects that really live in the nursery
when their hashes are taken get the hash 0.
Modified: pypy/branch/gc-better-hash/pypy/rpython/memory/gc/generation.py
==============================================================================
--- pypy/branch/gc-better-hash/pypy/rpython/memory/gc/generation.py (original)
+++ pypy/branch/gc-better-hash/pypy/rpython/memory/gc/generation.py Wed Feb 24 16:55:46 2010
@@ -1,7 +1,7 @@
import sys
from pypy.rpython.memory.gc.semispace import SemiSpaceGC
from pypy.rpython.memory.gc.semispace import GCFLAG_EXTERNAL, GCFLAG_FORWARDED
-from pypy.rpython.memory.gc.semispace import GCFLAG_HASHTAKEN
+from pypy.rpython.memory.gc.semispace import GC_HASH_TAKEN_ADDR
from pypy.rpython.lltypesystem.llmemory import NULL, raw_malloc_usage
from pypy.rpython.lltypesystem import lltype, llmemory, llarena
from pypy.rpython.memory.support import DEFAULT_CHUNK_SIZE
@@ -234,7 +234,7 @@
GCFLAGS_FOR_NEW_YOUNG_OBJECTS = 0 # NO_YOUNG_PTRS never set on young objs
GCFLAGS_FOR_NEW_EXTERNAL_OBJECTS = (GCFLAG_EXTERNAL | GCFLAG_FORWARDED |
GCFLAG_NO_YOUNG_PTRS |
- GCFLAG_HASHTAKEN)
+ GC_HASH_TAKEN_ADDR)
# ____________________________________________________________
# Support code for full collections
@@ -534,6 +534,9 @@
def _id_grow_older(self, obj, id, ignored):
self.objects_with_id.setitem(obj, id)
+ def _compute_current_nursery_hash(self, obj):
+ return 0 # XXX temporary!
+
def heap_stats_walk_roots(self):
self.last_generation_root_objects.foreach(
self._track_heap_ext, None)
Modified: pypy/branch/gc-better-hash/pypy/rpython/memory/gc/hybrid.py
==============================================================================
--- pypy/branch/gc-better-hash/pypy/rpython/memory/gc/hybrid.py (original)
+++ pypy/branch/gc-better-hash/pypy/rpython/memory/gc/hybrid.py Wed Feb 24 16:55:46 2010
@@ -2,9 +2,11 @@
from pypy.rpython.memory.gc.semispace import SemiSpaceGC
from pypy.rpython.memory.gc.generation import GenerationGC
from pypy.rpython.memory.gc.semispace import GCFLAG_EXTERNAL, GCFLAG_FORWARDED
-from pypy.rpython.memory.gc.semispace import GCFLAG_HASHTAKEN, GCFLAG_HASHFIELD
+from pypy.rpython.memory.gc.semispace import GCFLAG_HASHMASK
from pypy.rpython.memory.gc.generation import GCFLAG_NO_YOUNG_PTRS
from pypy.rpython.memory.gc.generation import GCFLAG_NO_HEAP_PTRS
+from pypy.rpython.memory.gc.semispace import GC_HASH_TAKEN_ADDR
+from pypy.rpython.memory.gc.semispace import GC_HASH_HASFIELD
from pypy.rpython.lltypesystem import lltype, llmemory, llarena
from pypy.rpython.lltypesystem.llmemory import raw_malloc_usage
from pypy.rpython.lltypesystem.lloperation import llop
@@ -359,7 +361,7 @@
# it's not an issue.
totalsize = self.size_gc_header() + objsize
tid = self.header(obj).tid
- if tid & (GCFLAG_HASHTAKEN|GCFLAG_HASHFIELD):
+ if tid & GCFLAG_HASHMASK:
totalsize_incl_hash = totalsize + llmemory.sizeof(lltype.Signed)
else:
totalsize_incl_hash = totalsize
@@ -370,14 +372,10 @@
self._nonmoving_copy_size += raw_malloc_usage(totalsize)
llmemory.raw_memcopy(obj - self.size_gc_header(), newaddr, totalsize)
- # check if we need to write a hash value at the end of the new obj
- if tid & (GCFLAG_HASHTAKEN|GCFLAG_HASHFIELD):
- if tid & GCFLAG_HASHFIELD:
- hash = (obj + objsize).signed[0]
- else:
- hash = llmemory.cast_adr_to_int(obj)
- tid |= GCFLAG_HASHFIELD
+ if tid & GCFLAG_HASHMASK:
+ hash = self._get_object_hash(obj, objsize, tid)
(newaddr + totalsize).signed[0] = hash
+ tid |= GC_HASH_HASFIELD
#
# GCFLAG_UNVISITED is not set
# GCFLAG_NO_HEAP_PTRS is not set either, conservatively. It may be
@@ -544,8 +542,8 @@
tid = self.header(obj).tid
ll_assert(bool(tid & GCFLAG_EXTERNAL),
"gen2: missing GCFLAG_EXTERNAL")
- ll_assert(bool(tid & GCFLAG_HASHTAKEN),
- "gen2: missing GCFLAG_HASHTAKEN")
+ ll_assert(bool(tid & GC_HASH_TAKEN_ADDR),
+ "gen2: missing GC_HASH_TAKEN_ADDR")
ll_assert(bool(tid & GCFLAG_UNVISITED),
"gen2: missing GCFLAG_UNVISITED")
ll_assert((tid & GCFLAG_AGE_MASK) < GCFLAG_AGE_MAX,
@@ -554,8 +552,8 @@
tid = self.header(obj).tid
ll_assert(bool(tid & GCFLAG_EXTERNAL),
"gen3: missing GCFLAG_EXTERNAL")
- ll_assert(bool(tid & GCFLAG_HASHTAKEN),
- "gen3: missing GCFLAG_HASHTAKEN")
+ ll_assert(bool(tid & GC_HASH_TAKEN_ADDR),
+ "gen3: missing GC_HASH_TAKEN_ADDR")
ll_assert(not (tid & GCFLAG_UNVISITED),
"gen3: unexpected GCFLAG_UNVISITED")
ll_assert((tid & GCFLAG_AGE_MASK) == GCFLAG_AGE_MAX,
Modified: pypy/branch/gc-better-hash/pypy/rpython/memory/gc/semispace.py
==============================================================================
--- pypy/branch/gc-better-hash/pypy/rpython/memory/gc/semispace.py (original)
+++ pypy/branch/gc-better-hash/pypy/rpython/memory/gc/semispace.py Wed Feb 24 16:55:46 2010
@@ -21,11 +21,22 @@
# either immortal objects or (for HybridGC) externally raw_malloc'ed
GCFLAG_EXTERNAL = first_gcflag << 1
GCFLAG_FINALIZATION_ORDERING = first_gcflag << 2
-GCFLAG_HASHTAKEN = first_gcflag << 3 # someone already asked for the hash
-GCFLAG_HASHFIELD = first_gcflag << 4 # we have an extra hash field
+
+_GCFLAG_HASH_BASE = first_gcflag << 3
+GCFLAG_HASHMASK = _GCFLAG_HASH_BASE * 0x3 # also consumes 'first_gcflag << 4'
+# the two bits in GCFLAG_HASHMASK can have one of the following values:
+# - nobody ever asked for the hash of the object
+GC_HASH_NOTTAKEN = _GCFLAG_HASH_BASE * 0x0
+# - someone asked, and we gave the address of the object
+GC_HASH_TAKEN_ADDR = _GCFLAG_HASH_BASE * 0x1
+# - someone asked, and we gave the address xor'ed with nursery_hash_base
+GC_HASH_TAKEN_NURS = _GCFLAG_HASH_BASE * 0x2
+# - we have our own extra field to store the hash
+GC_HASH_HASFIELD = _GCFLAG_HASH_BASE * 0x3
memoryError = MemoryError()
+
class SemiSpaceGC(MovingGCBase):
_alloc_flavor_ = "raw"
inline_simple_malloc = True
@@ -35,9 +46,9 @@
HDR = lltype.Struct('header', ('tid', lltype.Signed)) # XXX or rffi.INT?
typeid_is_in_field = 'tid'
- withhash_flag_is_in_field = 'tid', GCFLAG_HASHFIELD
- # ^^^ all prebuilt objects have GCFLAG_HASHTAKEN, but only some have
- # GCFLAG_HASHFIELD (and then they are one word longer).
+ withhash_flag_is_in_field = 'tid', _GCFLAG_HASH_BASE * 0x2
+ # ^^^ prebuilt objects either have GC_HASH_TAKEN_ADDR or they
+ # have GC_HASH_HASFIELD (and then they are one word longer).
FORWARDSTUB = lltype.GcStruct('forwarding_stub',
('forw', llmemory.Address))
FORWARDSTUBPTR = lltype.Ptr(FORWARDSTUB)
@@ -316,7 +327,7 @@
def get_size_incl_hash(self, obj):
size = self.get_size(obj)
hdr = self.header(obj)
- if hdr.tid & GCFLAG_HASHFIELD:
+ if (hdr.tid & GCFLAG_HASHMASK) == GC_HASH_HASFIELD:
size += llmemory.sizeof(lltype.Signed)
return size
@@ -351,22 +362,31 @@
self.set_forwarding_address(obj, newobj, objsize)
return newobj
+ def _get_object_hash(self, obj, objsize, tid):
+ # Returns the hash of the object, which must not be GC_HASH_NOTTAKEN.
+ gc_hash = tid & GCFLAG_HASHMASK
+ if gc_hash == GC_HASH_HASFIELD:
+ obj = llarena.getfakearenaaddress(obj)
+ return (obj + objsize).signed[0]
+ elif gc_hash == GC_HASH_TAKEN_ADDR:
+ return llmemory.cast_adr_to_int(obj)
+ elif gc_hash == GC_HASH_TAKEN_NURS:
+ return self._compute_current_nursery_hash(obj)
+ else:
+ assert 0, "gc_hash == GC_HASH_NOTTAKEN"
+
def _make_a_copy_with_tid(self, obj, objsize, tid):
totalsize = self.size_gc_header() + objsize
newaddr = self.free
llarena.arena_reserve(newaddr, totalsize)
raw_memcopy(obj - self.size_gc_header(), newaddr, totalsize)
- #
- # check if we need to write a hash value at the end of the new obj
- if tid & (GCFLAG_HASHTAKEN|GCFLAG_HASHFIELD):
- if tid & GCFLAG_HASHFIELD:
- hash = (obj + objsize).signed[0]
- else:
- hash = llmemory.cast_adr_to_int(obj)
- tid |= GCFLAG_HASHFIELD
+ if tid & GCFLAG_HASHMASK:
+ hash = self._get_object_hash(obj, objsize, tid)
+ llarena.arena_reserve(newaddr + totalsize,
+ llmemory.sizeof(lltype.Signed))
(newaddr + totalsize).signed[0] = hash
+ tid |= GC_HASH_HASFIELD
totalsize += llmemory.sizeof(lltype.Signed)
- #
self.free += totalsize
newhdr = llmemory.cast_adr_to_ptr(newaddr, lltype.Ptr(self.HDR))
newhdr.tid = tid
@@ -446,7 +466,7 @@
def init_gc_object_immortal(self, addr, typeid16, flags=0):
hdr = llmemory.cast_adr_to_ptr(addr, lltype.Ptr(self.HDR))
- flags |= GCFLAG_EXTERNAL | GCFLAG_FORWARDED | GCFLAG_HASHTAKEN
+ flags |= GCFLAG_EXTERNAL | GCFLAG_FORWARDED | GC_HASH_TAKEN_ADDR
hdr.tid = self.combine(typeid16, flags)
# immortal objects always have GCFLAG_FORWARDED set;
# see get_forwarding_address().
@@ -611,36 +631,46 @@
STATISTICS_NUMBERS = 0
+ def is_in_nursery(self, addr):
+ # overridden in generation.py.
+ return False
+
+ def _compute_current_nursery_hash(self, obj):
+ # overridden in generation.py.
+ raise AssertionError("should not be called")
+
def identityhash(self, gcobj):
- # The following code should run at most twice.
+ # The following loop should run at most twice.
while 1:
obj = llmemory.cast_ptr_to_adr(gcobj)
hdr = self.header(obj)
- #
- if hdr.tid & GCFLAG_HASHFIELD: # the hash is in a field at the end
- obj += self.get_size(obj)
- return obj.signed[0]
- #
- if not (hdr.tid & GCFLAG_HASHTAKEN):
- # It's the first time we ask for a hash, and it's not an
- # external object. Shrink the top of space by the extra
- # hash word that will be needed after a collect.
- shrunk_top = self.top_of_space - llmemory.sizeof(lltype.Signed)
- if shrunk_top < self.free:
- # Cannot shrink! Do a collection, asking for at least
- # one word of free space, and try again. May raise
- # MemoryError. Obscure: not called directly, but
- # across an llop, to make sure that there is the
- # correct push_roots/pop_roots around the call...
- llop.gc_obtain_free_space(llmemory.Address,
- llmemory.sizeof(lltype.Signed))
- continue
- # Now we can have side-effects: set GCFLAG_HASHTAKEN
- # and lower the top of space.
+ if hdr.tid & GCFLAG_HASHMASK:
+ break
+ # It's the first time we ask for a hash, and it's not an
+ # external object. Shrink the top of space by the extra
+ # hash word that will be needed after a collect.
+ shrunk_top = self.top_of_space - llmemory.sizeof(lltype.Signed)
+ if shrunk_top < self.free:
+ # Cannot shrink! Do a collection, asking for at least
+ # one word of free space, and try again. May raise
+ # MemoryError. Obscure: not called directly, but
+ # across an llop, to make sure that there is the
+ # correct push_roots/pop_roots around the call...
+ llop.gc_obtain_free_space(llmemory.Address,
+ llmemory.sizeof(lltype.Signed))
+ continue
+ else:
+ # Now we can have side-effects: lower the top of space
+ # and set one of the GC_HASH_TAKEN_xxx flags.
self.top_of_space = shrunk_top
- hdr.tid |= GCFLAG_HASHTAKEN
- #
- return llmemory.cast_adr_to_int(obj) # direct case
+ if self.is_in_nursery(obj):
+ hdr.tid |= GC_HASH_TAKEN_NURS
+ else:
+ hdr.tid |= GC_HASH_TAKEN_ADDR
+ break
+ # Now we can return the result
+ objsize = self.get_size(obj)
+ return self._get_object_hash(obj, objsize, hdr.tid)
def track_heap_parent(self, obj, parent):
addr = obj.address[0]
More information about the Pypy-commit
mailing list