[pypy-commit] pypy default: 'gc_bit': trying out a way that seems to generate better code with gcc.
arigo
pypy.commits at gmail.com
Wed Jun 1 07:35:15 EDT 2016
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r84856:45e1ec5bcb37
Date: 2016-06-01 13:35 +0200
http://bitbucket.org/pypy/pypy/changeset/45e1ec5bcb37/
Log: 'gc_bit': trying out a way that seems to generate better code with
gcc.
diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -1357,11 +1357,14 @@
return cls.minimal_size_in_nursery
def write_barrier(self, addr_struct):
- if self.header(addr_struct).tid & GCFLAG_TRACK_YOUNG_PTRS:
+ # see OP_GC_BIT in translator/c/gc.py
+ if llop.gc_bit(lltype.Signed, self.header(addr_struct),
+ GCFLAG_TRACK_YOUNG_PTRS):
self.remember_young_pointer(addr_struct)
def write_barrier_from_array(self, addr_array, index):
- if self.header(addr_array).tid & GCFLAG_TRACK_YOUNG_PTRS:
+ if llop.gc_bit(lltype.Signed, self.header(addr_array),
+ GCFLAG_TRACK_YOUNG_PTRS):
if self.card_page_indices > 0:
self.remember_young_pointer_from_array2(addr_array, index)
else:
diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py
--- a/rpython/rtyper/lltypesystem/lloperation.py
+++ b/rpython/rtyper/lltypesystem/lloperation.py
@@ -470,6 +470,7 @@
'gc_pin' : LLOp(canrun=True),
'gc_unpin' : LLOp(canrun=True),
'gc__is_pinned' : LLOp(canrun=True),
+ 'gc_bit' : LLOp(sideeffects=False, canrun=True),
'gc_get_rpy_roots' : LLOp(),
'gc_get_rpy_referents': LLOp(),
diff --git a/rpython/rtyper/lltypesystem/opimpl.py b/rpython/rtyper/lltypesystem/opimpl.py
--- a/rpython/rtyper/lltypesystem/opimpl.py
+++ b/rpython/rtyper/lltypesystem/opimpl.py
@@ -1,3 +1,4 @@
+import random, sys
from rpython.flowspace.operation import op
from rpython.rlib import debug
from rpython.rlib.rarithmetic import is_valid_int
@@ -680,6 +681,11 @@
def op_gc_writebarrier(addr):
pass
+def op_gc_bit(hdr, bitmask):
+ if hdr.tid & bitmask:
+ return random.randrange(1, sys.maxint)
+ return 0
+
def op_shrink_array(array, smallersize):
return False
diff --git a/rpython/translator/c/gc.py b/rpython/translator/c/gc.py
--- a/rpython/translator/c/gc.py
+++ b/rpython/translator/c/gc.py
@@ -391,6 +391,34 @@
raise AssertionError(subopnum)
return ' '.join(parts)
+ def OP_GC_BIT(self, funcgen, op):
+ # This is a two-arguments operation (x, y) where x is a
+ # pointer and y is a constant power of two. It returns 0 if
+ # "(*(Signed*)x) & y == 0", and non-zero if it is "== y".
+ #
+ # On x86-64, emitting this is better than emitting a load
+ # followed by an INT_AND for the case where y doesn't fit in
+ # 32 bits. I've seen situations where a register was wasted
+ # to contain the constant 2**32 throughout a complete messy
+ # function; the goal of this GC_BIT is to avoid that.
+ #
+ # Don't abuse, though. If you need to check several bits in
+ # sequence, then it's likely better to load the whole Signed
+ # first; using GC_BIT would result in multiple accesses to
+ # memory.
+ #
+ bitmask = op.args[1].value
+ assert bitmask > 0 and (bitmask & (bitmask - 1)) == 0
+ offset = 0
+ while bitmask >= 0x100:
+ offset += 1
+ bitmask >>= 8
+ if sys.byteorder == 'big':
+ offset = 'sizeof(Signed)-%s' % (offset+1)
+ return '%s = ((char *)%s)[%s] & %d;' % (funcgen.expr(op.result),
+ funcgen.expr(op.args[0]),
+ offset, bitmask)
+
class ShadowStackFrameworkGcPolicy(BasicFrameworkGcPolicy):
def gettransformer(self, translator):
More information about the pypy-commit
mailing list