[pypy-commit] pypy default: merge call-loopinvariant-into-bridges: speed up branchy code that does a lot of inlining

cfbolz pypy.commits at gmail.com
Mon Mar 12 08:58:48 EDT 2018


Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: 
Changeset: r93971:ed869ecba520
Date: 2018-03-12 13:58 +0100
http://bitbucket.org/pypy/pypy/changeset/ed869ecba520/

Log:	merge call-loopinvariant-into-bridges: speed up branchy code that
	does a lot of inlining

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -48,3 +48,9 @@
 .. branch: refactor-slots
 
 Refactor cpyext slots.
+
+
+.. branch: call-loopinvariant-into-bridges
+
+Speed up branchy code that does a lot of function inlining by saving one call
+to read the TLS in most bridges.
diff --git a/rpython/jit/metainterp/optimizeopt/bridgeopt.py b/rpython/jit/metainterp/optimizeopt/bridgeopt.py
--- a/rpython/jit/metainterp/optimizeopt/bridgeopt.py
+++ b/rpython/jit/metainterp/optimizeopt/bridgeopt.py
@@ -17,11 +17,17 @@
 # <length>
 # (<box1> <descr> <box2>) length times, if getfield(box1, descr) == box2
 #                         both boxes should be in the liveboxes
+#                         (or constants)
 #
 # <length>
 # (<box1> <index> <descr> <box2>) length times, if getarrayitem_gc(box1, index, descr) == box2
 #                                 both boxes should be in the liveboxes
+#                                 (or constants)
 #
+# ---- call_loopinvariant knowledge
+# <length>
+# (<const> <box2>) length times, if call_loopinvariant(const) == box2
+#                  box2 should be in liveboxes
 # ----
 
 
@@ -55,11 +61,11 @@
     return box
 
 def serialize_optimizer_knowledge(optimizer, numb_state, liveboxes, liveboxes_from_env, memo):
+    from rpython.jit.metainterp.history import ConstInt
     available_boxes = {}
     for box in liveboxes:
         if box is not None and box in liveboxes_from_env:
             available_boxes[box] = None
-    metainterp_sd = optimizer.metainterp_sd
 
     # class knowledge is stored as bits, true meaning the class is known, false
     # means unknown. on deserializing we look at the bits, and read the runtime
@@ -106,7 +112,19 @@
         numb_state.append_int(0)
         numb_state.append_int(0)
 
+    if optimizer.optrewrite:
+        tuples_loopinvariant = optimizer.optrewrite.serialize_optrewrite(
+                available_boxes)
+        numb_state.append_int(len(tuples_loopinvariant))
+        for constarg0, box in tuples_loopinvariant:
+            numb_state.append_short(
+                    tag_box(ConstInt(constarg0), liveboxes_from_env, memo))
+            numb_state.append_short(tag_box(box, liveboxes_from_env, memo))
+    else:
+        numb_state.append_int(0)
+
 def deserialize_optimizer_knowledge(optimizer, resumestorage, frontend_boxes, liveboxes):
+    from rpython.jit.metainterp.history import ConstInt
     reader = resumecode.Reader(resumestorage.rd_numb)
     assert len(frontend_boxes) == len(liveboxes)
     metainterp_sd = optimizer.metainterp_sd
@@ -131,8 +149,6 @@
             optimizer.make_constant_class(box, cls)
 
     # heap knowledge
-    if not optimizer.optheap:
-        return
     length = reader.next_item()
     result_struct = []
     for i in range(length):
@@ -154,4 +170,19 @@
         tagged = reader.next_item()
         box2 = decode_box(resumestorage, tagged, liveboxes, metainterp_sd.cpu)
         result_array.append((box1, index, descr, box2))
-    optimizer.optheap.deserialize_optheap(result_struct, result_array)
+    if optimizer.optheap:
+        optimizer.optheap.deserialize_optheap(result_struct, result_array)
+
+    # call_loopinvariant knowledge
+    length = reader.next_item()
+    result_loopinvariant = []
+    for i in range(length):
+        tagged1 = reader.next_item()
+        const = decode_box(resumestorage, tagged1, liveboxes, metainterp_sd.cpu)
+        assert isinstance(const, ConstInt)
+        i = const.getint()
+        tagged2 = reader.next_item()
+        box = decode_box(resumestorage, tagged2, liveboxes, metainterp_sd.cpu)
+        result_loopinvariant.append((i, box))
+    if optimizer.optrewrite:
+        optimizer.optrewrite.deserialize_optrewrite(result_loopinvariant)
diff --git a/rpython/jit/metainterp/optimizeopt/rewrite.py b/rpython/jit/metainterp/optimizeopt/rewrite.py
--- a/rpython/jit/metainterp/optimizeopt/rewrite.py
+++ b/rpython/jit/metainterp/optimizeopt/rewrite.py
@@ -877,6 +877,18 @@
     optimize_SAME_AS_R = optimize_SAME_AS_I
     optimize_SAME_AS_F = optimize_SAME_AS_I
 
+    def serialize_optrewrite(self, available_boxes):
+        res = []
+        for i, box in self.loop_invariant_results.iteritems():
+            box = self.get_box_replacement(box)
+            if box in available_boxes:
+                res.append((i, box))
+        return res
+
+    def deserialize_optrewrite(self, tups):
+        for i, box in tups:
+            self.loop_invariant_results[i] = box
+
 dispatch_opt = make_dispatcher_method(OptRewrite, 'optimize_',
                                       default=OptRewrite.emit)
 optimize_guards = _findall(OptRewrite, 'optimize_', 'GUARD')
diff --git a/rpython/jit/metainterp/test/test_bridgeopt.py b/rpython/jit/metainterp/test/test_bridgeopt.py
--- a/rpython/jit/metainterp/test/test_bridgeopt.py
+++ b/rpython/jit/metainterp/test/test_bridgeopt.py
@@ -1,6 +1,9 @@
 # tests that check that information is fed from the optimizer into the bridges
 
+import pytest
+
 import math
+
 from rpython.rlib import jit
 from rpython.jit.metainterp.test.support import LLJitMixin
 from rpython.jit.metainterp.optimizeopt.bridgeopt import serialize_optimizer_knowledge
@@ -27,6 +30,7 @@
 class FakeOptimizer(object):
     metainterp_sd = None
     optheap = None
+    optrewrite = None
 
     def __init__(self, dct={}, cpu=None):
         self.dct = dct
@@ -61,7 +65,8 @@
 
     serialize_optimizer_knowledge(optimizer, numb_state, liveboxes, {}, None)
 
-    assert unpack_numbering(numb_state.create_numbering()) == [1, 0b010000, 0, 0]
+    assert unpack_numbering(numb_state.create_numbering()) == [
+            1, 0b010000, 0, 0, 0]
 
     rbox1 = InputArgRef()
     rbox2 = InputArgRef()
@@ -100,7 +105,7 @@
 
     serialize_optimizer_knowledge(optimizer, numb_state, liveboxes, {}, None)
 
-    assert len(numb_state.create_numbering().code) == 3 + math.ceil(len(refboxes) / 6.0)
+    assert len(numb_state.create_numbering().code) == 4 + math.ceil(len(refboxes) / 6.0)
 
     dct = {box: cls
               for box, known_class in boxes_known_classes
@@ -321,3 +326,74 @@
         self.check_trace_count(3)
         self.check_resops(guard_value=1)
         self.check_resops(getarrayitem_gc_i=5)
+
+    def test_bridge_call_loopinvariant(self):
+        class A(object):
+            pass
+        class B(object):
+            pass
+
+        aholder = B()
+        aholder.a = A()
+
+        @jit.loop_invariant
+        def get():
+            return aholder.a
+
+        myjitdriver = jit.JitDriver(greens=[], reds=['y', 'res', 'n'])
+        def f(x, y, n):
+            if x == 10001121:
+                aholder.a = A()
+            if x:
+                get().x = 1
+            else:
+                get().x = 2
+            res = 0
+            while y > 0:
+                myjitdriver.jit_merge_point(y=y, n=n, res=res)
+                a = get()
+                a = get()
+                res += a.x
+                if y > n:
+                    res += 1
+                res += get().x + a.x
+                y -= 1
+            return res
+        res = self.meta_interp(f, [6, 32, 16])
+        self.check_trace_count(3)
+        self.check_resops(call_r=1)
+
+    @pytest.mark.xfail()
+    def test_bridge_call_loopinvariant_2(self):
+        class A(object):
+            pass
+        class B(object):
+            pass
+
+        aholder = B()
+        aholder.a = A()
+
+        @jit.loop_invariant
+        def get():
+            return aholder.a
+
+        myjitdriver = jit.JitDriver(greens=[], reds=['y', 'res', 'n'])
+        def f(x, y, n):
+            if x == 10001121:
+                aholder.a = A()
+            if x:
+                get().x = 1
+            else:
+                get().x = 2
+            res = 0
+            while y > 0:
+                myjitdriver.jit_merge_point(y=y, n=n, res=res)
+                if y > n:
+                    res += get().x
+                    res += 1
+                res += get().x
+                y -= 1
+            return res
+        res = self.meta_interp(f, [6, 32, 16])
+        self.check_trace_count(3)
+        self.check_resops(call_r=1)
diff --git a/rpython/jit/metainterp/test/test_resume.py b/rpython/jit/metainterp/test/test_resume.py
--- a/rpython/jit/metainterp/test/test_resume.py
+++ b/rpython/jit/metainterp/test/test_resume.py
@@ -40,7 +40,7 @@
 
 class FakeOptimizer(object):
     metainterp_sd = None
-    optheap = None
+    optheap = optrewrite = None
 
     def __init__(self, trace=None):
         self.trace = trace


More information about the pypy-commit mailing list