[pypy-commit] pypy default: 'gc_bit': trying out a way that seems to generate better code with gcc.

arigo pypy.commits at gmail.com
Wed Jun 1 07:35:15 EDT 2016


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r84856:45e1ec5bcb37
Date: 2016-06-01 13:35 +0200
http://bitbucket.org/pypy/pypy/changeset/45e1ec5bcb37/

Log:	'gc_bit': trying out a way that seems to generate better code with
	gcc.

diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -1357,11 +1357,14 @@
         return cls.minimal_size_in_nursery
 
     def write_barrier(self, addr_struct):
-        if self.header(addr_struct).tid & GCFLAG_TRACK_YOUNG_PTRS:
+        # see OP_GC_BIT in translator/c/gc.py
+        if llop.gc_bit(lltype.Signed, self.header(addr_struct),
+                       GCFLAG_TRACK_YOUNG_PTRS):
             self.remember_young_pointer(addr_struct)
 
     def write_barrier_from_array(self, addr_array, index):
-        if self.header(addr_array).tid & GCFLAG_TRACK_YOUNG_PTRS:
+        if llop.gc_bit(lltype.Signed, self.header(addr_array),
+                       GCFLAG_TRACK_YOUNG_PTRS):
             if self.card_page_indices > 0:
                 self.remember_young_pointer_from_array2(addr_array, index)
             else:
diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py
--- a/rpython/rtyper/lltypesystem/lloperation.py
+++ b/rpython/rtyper/lltypesystem/lloperation.py
@@ -470,6 +470,7 @@
     'gc_pin'              : LLOp(canrun=True),
     'gc_unpin'            : LLOp(canrun=True),
     'gc__is_pinned'        : LLOp(canrun=True),
+    'gc_bit'              : LLOp(sideeffects=False, canrun=True),
 
     'gc_get_rpy_roots'    : LLOp(),
     'gc_get_rpy_referents': LLOp(),
diff --git a/rpython/rtyper/lltypesystem/opimpl.py b/rpython/rtyper/lltypesystem/opimpl.py
--- a/rpython/rtyper/lltypesystem/opimpl.py
+++ b/rpython/rtyper/lltypesystem/opimpl.py
@@ -1,3 +1,4 @@
+import random, sys
 from rpython.flowspace.operation import op
 from rpython.rlib import debug
 from rpython.rlib.rarithmetic import is_valid_int
@@ -680,6 +681,11 @@
 def op_gc_writebarrier(addr):
     pass
 
+def op_gc_bit(hdr, bitmask):
+    if hdr.tid & bitmask:
+        return random.randrange(1, sys.maxint)
+    return 0
+
 def op_shrink_array(array, smallersize):
     return False
 
diff --git a/rpython/translator/c/gc.py b/rpython/translator/c/gc.py
--- a/rpython/translator/c/gc.py
+++ b/rpython/translator/c/gc.py
@@ -391,6 +391,34 @@
             raise AssertionError(subopnum)
         return ' '.join(parts)
 
+    def OP_GC_BIT(self, funcgen, op):
+        # This is a two-arguments operation (x, y) where x is a
+        # pointer and y is a constant power of two.  It returns 0 if
+        # "(*(Signed*)x) & y == 0", and non-zero if it is "== y".
+        #
+        # On x86-64, emitting this is better than emitting a load
+        # followed by an INT_AND for the case where y doesn't fit in
+        # 32 bits.  I've seen situations where a register was wasted
+        # to contain the constant 2**32 throughout a complete messy
+        # function; the goal of this GC_BIT is to avoid that.
+        #
+        # Don't abuse, though.  If you need to check several bits in
+        # sequence, then it's likely better to load the whole Signed
+        # first; using GC_BIT would result in multiple accesses to
+        # memory.
+        #
+        bitmask = op.args[1].value
+        assert bitmask > 0 and (bitmask & (bitmask - 1)) == 0
+        offset = 0
+        while bitmask >= 0x100:
+            offset += 1
+            bitmask >>= 8
+        if sys.byteorder == 'big':
+            offset = 'sizeof(Signed)-%s' % (offset+1)
+        return '%s = ((char *)%s)[%s] & %d;' % (funcgen.expr(op.result),
+                                                funcgen.expr(op.args[0]),
+                                                offset, bitmask)
+
 class ShadowStackFrameworkGcPolicy(BasicFrameworkGcPolicy):
 
     def gettransformer(self, translator):


More information about the pypy-commit mailing list