[pypy-commit] pypy nogil-unsafe: Start, mostly just playing around

arigo pypy.commits at gmail.com
Sat Jan 7 04:22:42 EST 2017


Author: Armin Rigo <arigo at tunes.org>
Branch: nogil-unsafe
Changeset: r89408:c17b7079746f
Date: 2017-01-07 10:20 +0100
http://bitbucket.org/pypy/pypy/changeset/c17b7079746f/

Log:	Start, mostly just playing around

diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -190,6 +190,11 @@
 FORWARDSTUBPTR = lltype.Ptr(FORWARDSTUB)
 NURSARRAY = lltype.Array(llmemory.Address)
 
+GCTL = lltype.Struct('GCThreadLocal',
+                     ('nursery_free', llmemory.Address),
+                     ('nursery_top', llmemory.Address),
+                     hints={'thread_local': True})
+
 # ____________________________________________________________
 
 class IncrementalMiniMarkGC(MovingGCBase):
@@ -269,12 +274,23 @@
         "card_page_indices": 128,
 
         # Objects whose total size is at least 'large_object' bytes are
-        # allocated out of the nursery immediately, as old objects.  The
-        # minimal allocated size of the nursery is 2x the following
-        # number (by default, at least 132KB on 32-bit and 264KB on 64-bit).
-        "large_object": (16384+512)*WORD,
+        # allocated out of the nursery immediately, as old objects.
+        "large_object": 13000,
+
+        # Thread-local Block size: the nursery is divided into blocks of
+        # at most this size each, and allocations go on in a
+        # thread-local manner inside each block.  "large_object" must be
+        # significantly smaller, but at the same time the total nursery
+        # size must be many times bigger than "tl_block_size"; the minimum
+        # allocated nursery size is 2 times "tl_block_size".
+        # "cache_line_min" is used to round the actual thread-local
+        # blocks to a cache line, to avoid pointless cache conflicts.
+        "tl_block_size": 32768,
+        "cache_line_min": 256,
         }
 
+    tl = lltype.malloc(GCTL, flavor='raw', immortal=True)
+
     def __init__(self, config,
                  read_from_env=False,
                  nursery_size=32*WORD,
@@ -286,6 +302,8 @@
                  growth_rate_max=2.5,   # for tests
                  card_page_indices=0,
                  large_object=8*WORD,
+                 tl_block_size=9*WORD,
+                 cache_line_min=1*WORD,
                  ArenaCollectionClass=None,
                  **kwds):
         "NOT_RPYTHON"
@@ -313,10 +331,12 @@
         # 'large_object' limit how big objects can be in the nursery, so
         # it gives a lower bound on the allowed size of the nursery.
         self.nonlarge_max = large_object - 1
+        self.tl_block_size = tl_block_size
+        self.cache_line_min = cache_line_min
         #
         self.nursery      = llmemory.NULL
-        self.nursery_free = llmemory.NULL
-        self.nursery_top  = llmemory.NULL
+        self.tl.nursery_free = llmemory.NULL
+        self.tl.nursery_top  = llmemory.NULL
         self.debug_tiny_nursery = -1
         self.debug_rotating_nurseries = lltype.nullptr(NURSARRAY)
         self.extra_threshold = 0
@@ -437,7 +457,7 @@
         else:
             #
             defaultsize = self.nursery_size
-            minsize = 2 * (self.nonlarge_max + 1)
+            minsize = 2 * self.tl_block_size
             self.nursery_size = minsize
             self.allocate_nursery()
             #
@@ -513,6 +533,13 @@
             # Estimate this number conservatively
             bigobj = self.nonlarge_max + 1
             self.max_number_of_pinned_objects = self.nursery_size / (bigobj * 2)
+        #
+        # Round up
+        ll_assert((self.cache_line_min & (self.cache_line_min - 1)) == 0,
+                  "cache_line_min is not a power a two")
+        self.tl_block_size = ((self.tl_block_size + self.cache_line_min - 1)
+                              & ~(self.cache_line_min - 1))
+
 
     def _nursery_memory_size(self):
         extra = self.nonlarge_max + 1
@@ -532,10 +559,6 @@
         debug_start("gc-set-nursery-size")
         debug_print("nursery size:", self.nursery_size)
         self.nursery = self._alloc_nursery()
-        # the current position in the nursery:
-        self.nursery_free = self.nursery
-        # the end of the nursery:
-        self.nursery_top = self.nursery + self.nursery_size
         # initialize the threshold
         self.min_heap_size = max(self.min_heap_size, self.nursery_size *
                                               self.major_collection_threshold)
@@ -608,7 +631,6 @@
             #
             llarena.arena_protect(newnurs, self._nursery_memory_size(), False)
             self.nursery = newnurs
-            self.nursery_top = self.nursery + self.nursery_size
             debug_print("switching from nursery", oldnurs,
                         "to nursery", self.nursery,
                         "size", self.nursery_size)
@@ -651,10 +673,10 @@
             #
             # Get the memory from the nursery.  If there is not enough space
             # there, do a collect first.
-            result = self.nursery_free
+            result = self.tl.nursery_free
             ll_assert(result != llmemory.NULL, "uninitialized nursery")
-            self.nursery_free = new_free = result + totalsize
-            if new_free > self.nursery_top:
+            self.tl.nursery_free = new_free = result + totalsize
+            if new_free > self.tl.nursery_top:
                 result = self.collect_and_reserve(totalsize)
             #
             # Build the object.
@@ -711,10 +733,10 @@
             #
             # Get the memory from the nursery.  If there is not enough space
             # there, do a collect first.
-            result = self.nursery_free
+            result = self.tl.nursery_free
             ll_assert(result != llmemory.NULL, "uninitialized nursery")
-            self.nursery_free = new_free = result + totalsize
-            if new_free > self.nursery_top:
+            self.tl.nursery_free = new_free = result + totalsize
+            if new_free > self.tl.nursery_top:
                 result = self.collect_and_reserve(totalsize)
             #
             # Build the object.
@@ -802,13 +824,14 @@
         Otherwise do a minor collection, and possibly some steps of a
         major collection, and finally reserve totalsize bytes.
         """
-
         minor_collection_count = 0
+        must_downgrade_gil = False
         while True:
-            self.nursery_free = llmemory.NULL      # debug: don't use me
+            self.tl.nursery_free = llmemory.NULL      # debug: don't use me
             # note: no "raise MemoryError" between here and the next time
             # we initialize nursery_free!
 
+            self._gc_lock()
             if self.nursery_barriers.non_empty():
                 # Pinned object in front of nursery_top. Try reserving totalsize
                 # by jumping into the next, yet unused, area inside the
@@ -822,7 +845,7 @@
                 #     v     v    v  jump over this
                 # +---------+--------+--------+--------+-----------+ }
                 # | used    | pinned | empty  | pinned |  empty    | }- nursery
-                # +---------+--------+--------+--------+-----------+ }
+                # +---------+--------B--------B--------B-----------B }
                 #                       ^- try reserving totalsize in here next
                 #
                 # All pinned objects are represented by entries in
@@ -833,15 +856,21 @@
                 # totalsize) starts at the end of the pinned object and ends at
                 # nursery's end.
                 #
-                # find the size of the pinned object after nursery_top
-                size_gc_header = self.gcheaderbuilder.size_gc_header
-                pinned_obj_size = size_gc_header + self.get_size(
-                        self.nursery_top + size_gc_header)
+                # In the diagram above, self.nursery_barriers contains
+                # four addresses which match the four "B".
                 #
                 # update used nursery space to allocate objects
-                self.nursery_free = self.nursery_top + pinned_obj_size
-                self.nursery_top = self.nursery_barriers.popleft()
+                self.tl.nursery_free = self.nursery_barriers.popleft()
+                self.tl.nursery_top = self.nursery_barriers.popleft()
+                self._gc_unlock()
+                prev_gil = False
             else:
+                self._gc_unlock()
+                if not llop.gil_is_exclusive(lltype.Bool):
+                    ll_assert(not must_downgrade_gil,
+                              "collect_and_reverse: bad gil state")
+                    must_downgrade_gil = llop.gil_wait(lltype.Bool)
+                    continue      # waited, maybe the situation changed
                 minor_collection_count += 1
                 if minor_collection_count == 1:
                     self.minor_collection_with_major_progress()
@@ -868,16 +897,18 @@
             # Tried to do something about nursery_free overflowing
             # nursery_top before this point. Try to reserve totalsize now.
             # If this succeeds break out of loop.
-            result = self.nursery_free
-            if self.nursery_free + totalsize <= self.nursery_top:
-                self.nursery_free = result + totalsize
-                ll_assert(self.nursery_free <= self.nursery_top, "nursery overflow")
+            result = self.tl.nursery_free
+            if self.tl.nursery_free + totalsize <= self.tl.nursery_top:
+                self.tl.nursery_free = result + totalsize
+                ll_assert(self.tl.nursery_free <= self.tl.nursery_top, "nursery overflow")
                 break
             #
+        if must_downgrade_gil:
+            llop.gil_downgrade(lltype.Void)
         #
         if self.debug_tiny_nursery >= 0:   # for debugging
-            if self.nursery_top - self.nursery_free > self.debug_tiny_nursery:
-                self.nursery_free = self.nursery_top - self.debug_tiny_nursery
+            if self.tl.nursery_top - self.tl.nursery_free > self.debug_tiny_nursery:
+                self.tl.nursery_free = self.tl.nursery_top - self.debug_tiny_nursery
         #
         return result
     collect_and_reserve._dont_inline_ = True
@@ -1037,7 +1068,7 @@
         if self.next_major_collection_threshold < 0:
             # cannot trigger a full collection now, but we can ensure
             # that one will occur very soon
-            self.nursery_free = self.nursery_top
+            self.tl.nursery_free = self.tl.nursery_top
 
     def can_optimize_clean_setarrayitems(self):
         if self.card_page_indices > 0:
@@ -1144,7 +1175,7 @@
         # Check if the object at 'addr' is young.
         if not self.is_valid_gc_object(addr):
             return False     # filter out tagged pointers explicitly.
-        if self.nursery <= addr < self.nursery_top:
+        if self.nursery <= addr < self.nursery + self.nursery_size:
             return True      # addr is in the nursery
         # Else, it may be in the set 'young_rawmalloced_objects'
         return (bool(self.young_rawmalloced_objects) and
@@ -1756,51 +1787,82 @@
         # pointer.
         size_gc_header = self.gcheaderbuilder.size_gc_header
         nursery_barriers = self.AddressDeque()
-        prev = self.nursery
-        self.surviving_pinned_objects.sort()
+        if self.surviving_pinned_objects.non_empty():
+            self.surviving_pinned_objects.sort()
+            next_pinned_object = self.surviving_pinned_objects.pop()
+        else:
+            next_pinned_object = llmemory.NULL
         ll_assert(
             self.pinned_objects_in_nursery == \
             self.surviving_pinned_objects.length(),
             "pinned_objects_in_nursery != surviving_pinned_objects.length()")
-        while self.surviving_pinned_objects.non_empty():
+
+        # The following loop divides the nursery into small blocks whose
+        # size is generally about 'self.tl_block_size', but skipping
+        # over any pinned object.  Depending on the position of pinned
+        # objects, it is possible that one or two of these blocks are
+        # unusable because they are too small, but it should not matter.
+        prev = self.nursery
+        full_end = self.nursery + self.nursery_size
+
+        while True:
+            # Round up 'prev' to a multiple of 'cache_line_min'
+            prev_num = llmemory.cast_adr_to_int(prev)
+            prev += (-prev_num) & (self.cache_line_min - 1)
             #
-            cur = self.surviving_pinned_objects.pop()
-            ll_assert(
-                cur >= prev, "pinned objects encountered in backwards order")
+            # Compute the next TL block limit as 'cur1' and 'cur2'.
+            # These two addresses are normally equal to each other,
+            # but if there is a pinned object, then 'cur1' is the
+            # start of the pinned object and 'cur2' the end.
             #
-            # clear the arena between the last pinned object (or arena start)
-            # and the pinned object
-            pinned_obj_size = llarena.getfakearenaaddress(cur) - prev
+            if full_end - prev <= block_size:
+                cur1 = full_end
+            else:
+                cur1 = prev + block_size
+            #
+            if next_pinned_object and next_pinned_object <= cur1:
+                cur1 = next_pinned_object
+                if self.surviving_pinned_objects.non_empty():
+                    next_pinned_object = self.surviving_pinned_objects.pop()
+                else:
+                    next_pinned_object = llmemory.NULL
+                ll_assert(cur1 >= prev,
+                          "pinned objects encountered in backwards order")
+                # clean up object's flags
+                obj = cur1 + size_gc_header
+                self.header(obj).tid &= ~GCFLAG_VISITED
+                # set up 'cur1' and 'cur2'
+                cur1 = llarena.getfakearenaaddress(cur1)
+                cur2 = cur1 + (size_gc_header + self.get_size(obj))
+            else:
+                # no pinned object in this TL block.
+                cur2 = cur1
+            #
+            # clear this block in the arena
+            free_range_size = cur1 - prev
             if self.gc_nursery_debug:
-                llarena.arena_reset(prev, pinned_obj_size, 3)
+                llarena.arena_reset(prev, free_range_size, 3)
             else:
-                llarena.arena_reset(prev, pinned_obj_size, 0)
-            #
-            # clean up object's flags
-            obj = cur + size_gc_header
-            self.header(obj).tid &= ~GCFLAG_VISITED
+                llarena.arena_reset(prev, free_range_size, 0)
             #
             # create a new nursery barrier for the pinned object
-            nursery_barriers.append(cur)
+            nursery_barriers.append(cur1)    # pinned object
+            if cur1 == full_end:
+                break
+            nursery_barriers.append(cur2)    # end of pinned object
             #
-            # update 'prev' to the end of the 'cur' object
-            prev = prev + pinned_obj_size + \
-                (size_gc_header + self.get_size(obj))
+            # update 'prev' for the next iteration
+            prev = cur2
         #
-        # reset everything after the last pinned object till the end of the arena
+        ll_assert(not next_pinned_object, "bad pinned object location")
         if self.gc_nursery_debug:
-            llarena.arena_reset(prev, self.nursery + self.nursery_size - prev, 3)
             if not nursery_barriers.non_empty():   # no pinned objects
                 self.debug_rotate_nursery()
-        else:
-            llarena.arena_reset(prev, self.nursery + self.nursery_size - prev, 0)
-        #
-        # always add the end of the nursery to the list
-        nursery_barriers.append(self.nursery + self.nursery_size)
         #
         self.nursery_barriers = nursery_barriers
         self.surviving_pinned_objects.delete()
         #
+        XXX must clear out the other threads nursery_free/nursery_top
         self.nursery_free = self.nursery
         self.nursery_top = self.nursery_barriers.popleft()
         #
diff --git a/rpython/translator/exceptiontransform.py b/rpython/translator/exceptiontransform.py
--- a/rpython/translator/exceptiontransform.py
+++ b/rpython/translator/exceptiontransform.py
@@ -452,7 +452,9 @@
     def setup_excdata(self):
         EXCDATA = lltype.Struct('ExcData',
             ('exc_type',  self.lltype_of_exception_type),
-            ('exc_value', self.lltype_of_exception_value))
+            ('exc_value', self.lltype_of_exception_value),
+            #('have_debug_prints', lltype.Signed),
+            hints={'thread_local': True})
         self.EXCDATA = EXCDATA
 
         exc_data = lltype.malloc(EXCDATA, immortal=True)


More information about the pypy-commit mailing list