[pypy-svn] pypy default: Merge branch/gc-minimark-largeobj again: adds support for young

arigo commits-noreply at bitbucket.org
Wed Jan 5 11:18:29 CET 2011


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r40405:765055a6a7e5
Date: 2011-01-05 11:18 +0100
http://bitbucket.org/pypy/pypy/changeset/765055a6a7e5/

Log:	Merge branch/gc-minimark-largeobj again: adds support for young
	objects that are raw-malloced (i.e. that are big). Gives a speed-up
	of ~15% on running hg.

diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -55,15 +55,29 @@
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.tool.sourcetools import func_with_new_name
 
+#
+# Handles the objects in 2 generations:
+#
+#  * young objects: allocated in the nursery if they are not too large, or
+#    raw-malloced otherwise.  The nursery is a fixed-size memory buffer of
+#    half the size of the L2 cache.  When full, we do a minor collection;
+#    the surviving objects from the nursery are moved outside, and the
+#    non-surviving raw-malloced objects are freed.  All surviving objects
+#    become old.
+#
+#  * old objects: never move again.  These objects are either allocated by
+#    minimarkpage.py (if they are small), or raw-malloced (if they are not
+#    small).  Collected by regular mark-n-sweep during major collections.
+#
+
 WORD = LONG_BIT // 8
 NULL = llmemory.NULL
 
 first_gcflag = 1 << (LONG_BIT//2)
 
-# The following flag is never set on young objects, i.e. the ones living
-# in the nursery.  It is initially set on all prebuilt and old objects,
-# and gets cleared by the write_barrier() when we write in them a
-# pointer to a young object.
+# The following flag is never set on young objects.  It is initially set
+# on all prebuilt and old objects, and gets cleared by the write_barrier()
+# when we write in them a pointer to a young object.
 GCFLAG_NO_YOUNG_PTRS = first_gcflag << 0
 
 # The following flag is set on some prebuilt objects.  The flag is set
@@ -73,7 +87,8 @@
 # 'prebuilt_root_objects'.
 GCFLAG_NO_HEAP_PTRS = first_gcflag << 1
 
-# The following flag is set on surviving objects during a major collection.
+# The following flag is set on surviving objects during a major collection,
+# and on surviving raw-malloced young objects during a minor collection.
 GCFLAG_VISITED      = first_gcflag << 2
 
 # The following flag is set on nursery objects of which we asked the id
@@ -178,9 +193,9 @@
 
         # Objects whose total size is at least 'large_object' bytes are
         # allocated out of the nursery immediately, as old objects.  The
-        # minimal allocated size of the nursery is 1.9x the following
-        # number (by default, at least 500KB on 32-bit and 1000KB on 64-bit).
-        "large_object": 65792*WORD,
+        # minimal allocated size of the nursery is 2x the following
+        # number (by default, at least 132KB on 32-bit and 264KB on 64-bit).
+        "large_object": (16384+512)*WORD,
         }
 
     def __init__(self, config,
@@ -255,11 +270,12 @@
         # we implement differently anyway.  So directly call GCBase.setup().
         GCBase.setup(self)
         #
-        # A list of all raw_malloced objects (the objects too large)
-        self.rawmalloced_objects = self.AddressStack()
+        # Two lists of all raw_malloced objects (the objects too large)
+        self.young_rawmalloced_objects = self.null_address_dict()
+        self.old_rawmalloced_objects = self.AddressStack()
         self.rawmalloced_total_size = r_uint(0)
         #
-        # A list of all objects with finalizers (never in the nursery).
+        # A list of all objects with finalizers (these are never young).
         self.objects_with_finalizers = self.AddressDeque()
         #
         # Two lists of the objects with weakrefs.  No weakref can be an
@@ -272,7 +288,7 @@
         # Support for id and identityhash: map nursery objects with
         # GCFLAG_HAS_SHADOW to their future location at the next
         # minor collection.
-        self.young_objects_shadows = self.AddressDict()
+        self.nursery_objects_shadows = self.AddressDict()
         #
         # Allocate a nursery.  In case of auto_nursery_size, start by
         # allocating a very small nursery, enough to do things like look
@@ -283,9 +299,7 @@
         else:
             #
             defaultsize = self.nursery_size
-            minsize = int(1.9 * self.nonlarge_max)
-            if we_are_translated():
-                minsize = (minsize + 4095) & ~4095
+            minsize = 2 * (self.nonlarge_max + 1)
             self.nursery_size = minsize
             self.allocate_nursery()
             #
@@ -435,7 +449,7 @@
         if needs_finalizer:
             ll_assert(not contains_weakptr,
                      "'needs_finalizer' and 'contains_weakptr' both specified")
-            obj = self.external_malloc(typeid, 0)
+            obj = self.external_malloc(typeid, 0, can_make_young=False)
             self.objects_with_finalizers.append(obj)
         #
         # If totalsize is greater than nonlarge_max (which should never be
@@ -563,7 +577,7 @@
     collect_and_reserve._dont_inline_ = True
 
 
-    def external_malloc(self, typeid, length):
+    def external_malloc(self, typeid, length, can_make_young=True):
         """Allocate a large object using the ArenaCollection or
         raw_malloc(), possibly as an object with card marking enabled,
         if it has gc pointers in its var-sized part.  'length' should be
@@ -605,7 +619,12 @@
             # Allocate from the ArenaCollection and clear the memory returned.
             result = self.ac.malloc(totalsize)
             llmemory.raw_memclear(result, totalsize)
-            extra_flags = 0
+            #
+            # An object allocated from ArenaCollection is always old, even
+            # if 'can_make_young'.  The interesting case of 'can_make_young'
+            # is for large objects, bigger than the 'large_objects' threshold,
+            # which are raw-malloced but still young.
+            extra_flags = GCFLAG_NO_YOUNG_PTRS
             #
         else:
             # No, so proceed to allocate it externally with raw_malloc().
@@ -624,6 +643,8 @@
                 extra_words = self.card_marking_words_for_length(length)
                 cardheadersize = WORD * extra_words
                 extra_flags = GCFLAG_HAS_CARDS
+                # note that if 'can_make_young', then card marking will only
+                # be used later, after (and if) the object becomes old
             #
             # Detect very rare cases of overflows
             if raw_malloc_usage(totalsize) > (sys.maxint - (WORD-1)
@@ -656,11 +677,18 @@
             llarena.arena_reserve(result, totalsize)
             #
             # Record the newly allocated object and its full malloced size.
+            # The object is young or old depending on the argument.
             self.rawmalloced_total_size += allocsize
-            self.rawmalloced_objects.append(result + size_gc_header)
+            if can_make_young:
+                if not self.young_rawmalloced_objects:
+                    self.young_rawmalloced_objects = self.AddressDict()
+                self.young_rawmalloced_objects.add(result + size_gc_header)
+            else:
+                self.old_rawmalloced_objects.append(result + size_gc_header)
+                extra_flags |= GCFLAG_NO_YOUNG_PTRS
         #
         # Common code to fill the header and length of the object.
-        self.init_gc_object(result, typeid, GCFLAG_NO_YOUNG_PTRS | extra_flags)
+        self.init_gc_object(result, typeid, extra_flags)
         if self.is_varsize(typeid):
             offset_to_length = self.varsize_offset_to_length(typeid)
             (result + size_gc_header + offset_to_length).signed[0] = length
@@ -756,13 +784,35 @@
                   "odd-valued (i.e. tagged) pointer unexpected here")
         return self.nursery <= addr < self.nursery_top
 
-    def appears_to_be_in_nursery(self, addr):
-        # same as is_in_nursery(), but may return True accidentally if
-        # 'addr' is a tagged pointer with just the wrong value.
+    def appears_to_be_young(self, addr):
+        # "is a valid addr to a young object?"
+        # but it's ok to occasionally return True accidentally.
+        # Maybe the best implementation would be a bloom filter
+        # of some kind instead of the dictionary lookup that is
+        # sometimes done below.  But the expected common answer
+        # is "Yes" because addr points to the nursery, so it may
+        # not be useful to optimize the other case too much.
+        #
+        # First, if 'addr' appears to be a pointer to some place within
+        # the nursery, return True
         if not self.translated_to_c:
+            # When non-translated, filter out tagged pointers explicitly.
+            # When translated, it may occasionally give a wrong answer
+            # of True if 'addr' is a tagged pointer with just the wrong value.
             if not self.is_valid_gc_object(addr):
                 return False
-        return self.nursery <= addr < self.nursery_top
+
+        if self.nursery <= addr < self.nursery_top:
+            return True      # addr is in the nursery
+        #
+        # Else, it may be in the set 'young_rawmalloced_objects'
+        return (bool(self.young_rawmalloced_objects) and
+                self.young_rawmalloced_objects.contains(addr))
+    appears_to_be_young._always_inline_ = True
+
+    def debug_is_old_object(self, addr):
+        return (self.is_valid_gc_object(addr)
+                and not self.appears_to_be_young(addr))
 
     def is_forwarded(self, obj):
         """Returns True if the nursery obj is marked as forwarded.
@@ -802,6 +852,14 @@
             ((r_uint(length) + ((8 << self.card_page_shift) - 1)) >>
              (self.card_page_shift + 3)))
 
+    def debug_check_consistency(self):
+        if self.DEBUG:
+            ll_assert(not self.young_rawmalloced_objects,
+                      "young raw-malloced objects in a major collection")
+            ll_assert(not self.young_objects_with_weakrefs.non_empty(),
+                      "young objects with weakrefs in a major collection")
+            MovingGCBase.debug_check_consistency(self)
+
     def debug_check_object(self, obj):
         # after a minor or major collection, no object should be in the nursery
         ll_assert(not self.is_in_nursery(obj),
@@ -809,10 +867,6 @@
         # similarily, all objects should have this flag:
         ll_assert(self.header(obj).tid & GCFLAG_NO_YOUNG_PTRS,
                   "missing GCFLAG_NO_YOUNG_PTRS")
-        # if we have GCFLAG_NO_HEAP_PTRS, then we have GCFLAG_NO_YOUNG_PTRS
-        if self.header(obj).tid & GCFLAG_NO_HEAP_PTRS:
-            ll_assert(self.header(obj).tid & GCFLAG_NO_YOUNG_PTRS,
-                      "GCFLAG_NO_HEAP_PTRS && !GCFLAG_NO_YOUNG_PTRS")
         # the GCFLAG_VISITED should not be set between collections
         ll_assert(self.header(obj).tid & GCFLAG_VISITED == 0,
                   "unexpected GCFLAG_VISITED")
@@ -885,17 +939,17 @@
             # 'addr_struct' is the address of the object in which we write.
             # 'newvalue' is the address that we are going to write in there.
             if DEBUG:   # note: PYPY_GC_DEBUG=1 does not enable this
-                ll_assert(not self.is_in_nursery(addr_struct),
-                          "nursery object with GCFLAG_NO_YOUNG_PTRS")
+                ll_assert(self.debug_is_old_object(addr_struct),
+                          "young object with GCFLAG_NO_YOUNG_PTRS")
             #
             # If it seems that what we are writing is a pointer to the nursery
-            # (as checked with appears_to_be_in_nursery()), then we need
+            # (as checked with appears_to_be_young()), then we need
             # to remove the flag GCFLAG_NO_YOUNG_PTRS and add the old object
             # to the list 'old_objects_pointing_to_young'.  We know that
             # 'addr_struct' cannot be in the nursery, because nursery objects
             # never have the flag GCFLAG_NO_YOUNG_PTRS to start with.
             objhdr = self.header(addr_struct)
-            if self.appears_to_be_in_nursery(newvalue):
+            if self.appears_to_be_young(newvalue):
                 self.old_objects_pointing_to_young.append(addr_struct)
                 objhdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
             #
@@ -922,8 +976,8 @@
             # which must have an array part;  'index' is the index of the
             # item that is (or contains) the pointer that we write.
             if DEBUG:   # note: PYPY_GC_DEBUG=1 does not enable this
-                ll_assert(not self.is_in_nursery(addr_array),
-                          "nursery array with GCFLAG_NO_YOUNG_PTRS")
+                ll_assert(self.debug_is_old_object(addr_array),
+                          "young array with GCFLAG_NO_YOUNG_PTRS")
             objhdr = self.header(addr_array)
             if objhdr.tid & GCFLAG_HAS_CARDS == 0:
                 #
@@ -992,7 +1046,7 @@
         #
         if (source_hdr.tid & GCFLAG_NO_YOUNG_PTRS == 0 or
             source_hdr.tid & GCFLAG_CARDS_SET != 0):
-            # there might be an object in source that is in nursery
+            # there might be in source a pointer to a young object
             self.old_objects_pointing_to_young.append(dest_addr)
             dest_hdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
         #
@@ -1012,11 +1066,12 @@
         #
         debug_start("gc-minor")
         #
-        # First, find the roots that point to nursery objects.  These
-        # nursery objects are copied out of the nursery.  Note that
-        # references to further nursery objects are not modified by
-        # this step; only objects directly referenced by roots are
-        # copied out.  They are also added to the list
+        # First, find the roots that point to young objects.  All nursery
+        # objects found are copied out of the nursery, and the occasional
+        # young raw-malloced object is flagged with GCFLAG_VISITED.
+        # Note that during this step, we ignore references to further
+        # young objects; only objects directly referenced by roots
+        # are copied out or flagged.  They are also added to the list
         # 'old_objects_pointing_to_young'.
         self.collect_roots_in_nursery()
         #
@@ -1028,17 +1083,23 @@
         # Now trace objects from 'old_objects_pointing_to_young'.
         # All nursery objects they reference are copied out of the
         # nursery, and again added to 'old_objects_pointing_to_young'.
+        # All young raw-malloced object found is flagged GCFLAG_VISITED.
         # We proceed until 'old_objects_pointing_to_young' is empty.
         self.collect_oldrefs_to_nursery()
         #
-        # Now all live nursery objects should be out.  Update the
-        # young weakrefs' targets.
+        # Now all live nursery objects should be out.  Update the young
+        # weakrefs' targets.
         if self.young_objects_with_weakrefs.non_empty():
             self.invalidate_young_weakrefs()
         #
         # Clear this mapping.
-        if self.young_objects_shadows.length() > 0:
-            self.young_objects_shadows.clear()
+        if self.nursery_objects_shadows.length() > 0:
+            self.nursery_objects_shadows.clear()
+        #
+        # Walk the list of young raw-malloced objects, and either free
+        # them or make them old.
+        if self.young_rawmalloced_objects:
+            self.free_young_rawmalloced_objects()
         #
         # All live nursery objects are out, and the rest dies.  Fill
         # the whole nursery with zero and reset the current nursery pointer.
@@ -1157,9 +1218,22 @@
 
     def _trace_drag_out(self, root, ignored):
         obj = root.address[0]
+        #print '_trace_drag_out(%x: %r)' % (hash(obj.ptr._obj), obj)
         #
-        # If 'obj' is not in the nursery, nothing to change.
+        # If 'obj' is not in the nursery, nothing to change -- expect
+        # that we must set GCFLAG_VISITED on young raw-malloced objects.
         if not self.is_in_nursery(obj):
+            # cache usage trade-off: I think that it is a better idea to
+            # check if 'obj' is in young_rawmalloced_objects with an access
+            # to this (small) dictionary, rather than risk a lot of cache
+            # misses by reading a flag in the header of all the 'objs' that
+            # arrive here.
+            if (bool(self.young_rawmalloced_objects)
+                and self.young_rawmalloced_objects.contains(obj)):
+                # 'obj' points to a young, raw-malloced object
+                if (self.header(obj).tid & GCFLAG_VISITED) == 0:
+                    self.header(obj).tid |= GCFLAG_VISITED
+                    self.old_objects_pointing_to_young.append(obj)
             return
         #
         # If 'obj' was already forwarded, change it to its forwarding address.
@@ -1179,7 +1253,7 @@
             #
         else:
             # The object has already a shadow.
-            newobj = self.young_objects_shadows.get(obj)
+            newobj = self.nursery_objects_shadows.get(obj)
             ll_assert(newobj != NULL, "GCFLAG_HAS_SHADOW but no shadow found")
             newhdr = newobj - size_gc_header
             #
@@ -1236,9 +1310,20 @@
         #
         size_gc_header = self.gcheaderbuilder.size_gc_header
         self.rawmalloced_total_size += raw_malloc_usage(totalsize)
-        self.rawmalloced_objects.append(arena + size_gc_header)
+        self.old_rawmalloced_objects.append(arena + size_gc_header)
         return arena
 
+    def free_young_rawmalloced_objects(self):
+        self.young_rawmalloced_objects.foreach(
+            self._free_young_rawmalloced_obj, None)
+        self.young_rawmalloced_objects.delete()
+        self.young_rawmalloced_objects = self.null_address_dict()
+
+    def _free_young_rawmalloced_obj(self, obj, ignored1, ignored2):
+        # If 'obj' has GCFLAG_VISITED, it was seen by _trace_drag_out
+        # and survives.  Otherwise, it dies.
+        self.free_rawmalloced_object_if_unvisited(obj)
+
 
     # ----------
     # Full collection
@@ -1351,37 +1436,39 @@
     def _reset_gcflag_visited(self, obj, ignored):
         self.header(obj).tid &= ~GCFLAG_VISITED
 
+    def free_rawmalloced_object_if_unvisited(self, obj):
+        if self.header(obj).tid & GCFLAG_VISITED:
+            self.header(obj).tid &= ~GCFLAG_VISITED   # survives
+            self.old_rawmalloced_objects.append(obj)
+        else:
+            size_gc_header = self.gcheaderbuilder.size_gc_header
+            totalsize = size_gc_header + self.get_size(obj)
+            allocsize = raw_malloc_usage(totalsize)
+            arena = llarena.getfakearenaaddress(obj - size_gc_header)
+            #
+            # Must also include the card marker area, if any
+            if (self.card_page_indices > 0    # <- this is constant-folded
+                and self.header(obj).tid & GCFLAG_HAS_CARDS):
+                #
+                # Get the length and compute the number of extra bytes
+                typeid = self.get_type_id(obj)
+                ll_assert(self.has_gcptr_in_varsize(typeid),
+                          "GCFLAG_HAS_CARDS but not has_gcptr_in_varsize")
+                offset_to_length = self.varsize_offset_to_length(typeid)
+                length = (obj + offset_to_length).signed[0]
+                extra_words = self.card_marking_words_for_length(length)
+                arena -= extra_words * WORD
+                allocsize += extra_words * WORD
+            #
+            llarena.arena_free(arena)
+            self.rawmalloced_total_size -= allocsize
+
     def free_unvisited_rawmalloc_objects(self):
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        list = self.rawmalloced_objects
-        self.rawmalloced_objects = self.AddressStack()
+        list = self.old_rawmalloced_objects
+        self.old_rawmalloced_objects = self.AddressStack()
         #
         while list.non_empty():
-            obj = list.pop()
-            if self.header(obj).tid & GCFLAG_VISITED:
-                self.header(obj).tid &= ~GCFLAG_VISITED   # survives
-                self.rawmalloced_objects.append(obj)
-            else:
-                totalsize = size_gc_header + self.get_size(obj)
-                allocsize = raw_malloc_usage(totalsize)
-                arena = llarena.getfakearenaaddress(obj - size_gc_header)
-                #
-                # Must also include the card marker area, if any
-                if (self.card_page_indices > 0    # <- this is constant-folded
-                    and self.header(obj).tid & GCFLAG_HAS_CARDS):
-                    #
-                    # Get the length and compute the number of extra bytes
-                    typeid = self.get_type_id(obj)
-                    ll_assert(self.has_gcptr_in_varsize(typeid),
-                              "GCFLAG_HAS_CARDS but not has_gcptr_in_varsize")
-                    offset_to_length = self.varsize_offset_to_length(typeid)
-                    length = (obj + offset_to_length).signed[0]
-                    extra_words = self.card_marking_words_for_length(length)
-                    arena -= extra_words * WORD
-                    allocsize += extra_words * WORD
-                #
-                llarena.arena_free(arena)
-                self.rawmalloced_total_size -= allocsize
+            self.free_rawmalloced_object_if_unvisited(list.pop())
         #
         list.delete()
 
@@ -1464,7 +1551,7 @@
                 # where the object will be moved by the next minor
                 # collection
                 if self.header(obj).tid & GCFLAG_HAS_SHADOW:
-                    shadow = self.young_objects_shadows.get(obj)
+                    shadow = self.nursery_objects_shadows.get(obj)
                     ll_assert(shadow != NULL,
                               "GCFLAG_HAS_SHADOW but no shadow found")
                 else:
@@ -1488,7 +1575,7 @@
                         (shadow + lenofs).signed[0] = (obj + lenofs).signed[0]
                     #
                     self.header(obj).tid |= GCFLAG_HAS_SHADOW
-                    self.young_objects_shadows.setitem(obj, shadow)
+                    self.nursery_objects_shadows.setitem(obj, shadow)
                 #
                 # The answer is the address of the shadow.
                 obj = shadow
@@ -1635,6 +1722,16 @@
                 else:
                     (obj + offset).address[0] = llmemory.NULL
                     continue    # no need to remember this weakref any longer
+            #
+            elif (bool(self.young_rawmalloced_objects) and
+                  self.young_rawmalloced_objects.contains(pointing_to)):
+                # young weakref to a young raw-malloced object
+                if self.header(pointing_to).tid & GCFLAG_VISITED:
+                    pass    # survives, but does not move
+                else:
+                    (obj + offset).address[0] = llmemory.NULL
+                    continue    # no need to remember this weakref any longer
+            #
             self.old_objects_with_weakrefs.append(obj)
 
 

diff --git a/pypy/rpython/memory/gc/base.py b/pypy/rpython/memory/gc/base.py
--- a/pypy/rpython/memory/gc/base.py
+++ b/pypy/rpython/memory/gc/base.py
@@ -3,7 +3,7 @@
 from pypy.rpython.memory.gcheader import GCHeaderBuilder
 from pypy.rpython.memory.support import DEFAULT_CHUNK_SIZE
 from pypy.rpython.memory.support import get_address_stack, get_address_deque
-from pypy.rpython.memory.support import AddressDict
+from pypy.rpython.memory.support import AddressDict, null_address_dict
 from pypy.rpython.lltypesystem.llmemory import NULL, raw_malloc_usage
 
 TYPEID_MAP = lltype.GcStruct('TYPEID_MAP', ('count', lltype.Signed),
@@ -26,6 +26,7 @@
         self.AddressStack = get_address_stack(chunk_size)
         self.AddressDeque = get_address_deque(chunk_size)
         self.AddressDict = AddressDict
+        self.null_address_dict = null_address_dict
         self.config = config
         assert isinstance(translated_to_c, bool)
         self.translated_to_c = translated_to_c


More information about the Pypy-commit mailing list