[pypy-svn] r25560 - pypy/branch/stacklesscfg/pypy/translator/backendopt

Sat Apr 8 17:49:54 CEST 2006

Author: mwh
Date: Sat Apr  8 17:49:52 2006
New Revision: 25560

Modified:
   pypy/branch/stacklesscfg/pypy/translator/backendopt/inline.py
Log:
merging of http://codespeak.net/svn/pypy/dist/pypy/translator/backendopt/inline.py
revisions 25473 to 25557:

    ------------------------------------------------------------------------
    r25555 | hpk | 2006-04-08 17:32:31 +0200 (Sat, 08 Apr 2006) | 2 lines
    
    merge r25548 of stacklesscfg branch (split_block returns splitlink instead of afterblock)
    
    ------------------------------------------------------------------------
    r25544 | cfbolz | 2006-04-08 14:25:51 +0200 (Sat, 08 Apr 2006) | 2 lines
    
    remove another debug print :-(
    
    ------------------------------------------------------------------------
    r25543 | cfbolz | 2006-04-08 14:25:27 +0200 (Sat, 08 Apr 2006) | 2 lines
    
    remove debug print
    
    ------------------------------------------------------------------------
    r25542 | cfbolz | 2006-04-08 14:22:51 +0200 (Sat, 08 Apr 2006) | 3 lines
    
    (pedronis, cfbolz): trying to make the order of inlining even more
    deterministic
    
    ------------------------------------------------------------------------
    r25541 | cfbolz | 2006-04-08 14:17:15 +0200 (Sat, 08 Apr 2006) | 2 lines
    
    improve the weights of some operations. for some definition of improve
    
    ------------------------------------------------------------------------
    r25535 | cfbolz | 2006-04-08 10:33:27 +0200 (Sat, 08 Apr 2006) | 2 lines
    
    wua! bad me
    
    ------------------------------------------------------------------------
    r25532 | cfbolz | 2006-04-08 10:14:47 +0200 (Sat, 08 Apr 2006) | 5 lines
    
    (pedronis, cfbolz):
    
    increase the weight of edges that go back in a loop because they are much more
    likely
    
    ------------------------------------------------------------------------
    r25495 | cfbolz | 2006-04-07 15:18:05 +0200 (Fri, 07 Apr 2006) | 6 lines
    
    (pedronis, cfbolz):
    
    introduced an optimizations remove_duplicate_casts to reduce the size of the
    live variable set. For this we needed to make the exception heuristic in
    inlining cleverer.
    
    ------------------------------------------------------------------------


Modified: pypy/branch/stacklesscfg/pypy/translator/backendopt/inline.py
==============================================================================

--- pypy/branch/stacklesscfg/pypy/translator/backendopt/inline.py	(original)
+++ pypy/branch/stacklesscfg/pypy/translator/backendopt/inline.py	Sat Apr  8 17:49:52 2006
@@ -6,11 +6,11 @@
 from pypy.objspace.flow.model import SpaceOperation, c_last_exception
 from pypy.objspace.flow.model import traverse, mkentrymap, checkgraph
 from pypy.annotation import model as annmodel
-from pypy.rpython.lltypesystem.lltype import Bool, typeOf, Void
+from pypy.rpython.lltypesystem.lltype import Bool, typeOf, Void, Ptr
 from pypy.rpython import rmodel
 from pypy.tool.algo import sparsemat
 from pypy.translator.backendopt.support import log, split_block_with_keepalive
-from pypy.translator.backendopt.support import generate_keepalive 
+from pypy.translator.backendopt.support import generate_keepalive, find_backedges, find_loop_blocks
 
 BASE_INLINE_THRESHOLD = 32.4    # just enough to inline add__Int_Int()
 # and just small enough to prevend inlining of some rlist functions.
@@ -68,25 +68,28 @@
     except StopIteration:
         return False
 
-def inline_function(translator, inline_func, graph):
-    inliner = Inliner(translator, graph, inline_func)
+def inline_function(translator, inline_func, graph, lltype_to_classdef):
+    inliner = Inliner(translator, graph, inline_func, lltype_to_classdef)
     return inliner.inline_all()
 
+def simple_inline_function(translator, inline_func, graph):
+    inliner = Inliner(translator, graph, inline_func,
+                      translator.rtyper.lltype_to_classdef_mapping())
+    return inliner.inline_all()
+
+
 def _find_exception_type(block):
     #XXX slightly brittle: find the exception type for simple cases
     #(e.g. if you do only raise XXXError) by doing pattern matching
-    ops = [op for op in block.operations if op.opname != 'keepalive'] 
-    if (len(ops) < 6 or
-        ops[-6].opname != "malloc" or ops[-5].opname != "cast_pointer" or
-        ops[-4].opname != "setfield" or ops[-3].opname != "cast_pointer" or
-        ops[-2].opname != "getfield" or ops[-1].opname != "cast_pointer" or
-        len(block.exits) != 1 or block.exits[0].args[0] != ops[-2].result or
-        block.exitswitch is not None or
-        block.exits[0].args[1] != ops[-1].result or
-        not isinstance(ops[-4].args[1], Constant) or
-        ops[-4].args[1].value != "typeptr"):
+    currvar = block.exits[0].args[1]
+    for op in block.operations[::-1]:
+        if op.opname in ("same_as", "cast_pointer") and op.result is currvar:
+            currvar = op.args[0]
+        elif op.opname == "malloc" and op.result is currvar:
+            break
+    else:
         return None, None
-    return ops[-4].args[2].value, block.exits[0]
+    return Ptr(op.args[0].value), block.exits[0]
 
 def does_raise_directly(graph, raise_analyzer):
     """ this function checks, whether graph contains operations which can raise
@@ -102,7 +105,8 @@
     return False
 
 class BaseInliner(object):
-    def __init__(self, translator, graph, inline_guarded_calls=False,
+    def __init__(self, translator, graph, lltype_to_classdef, 
+                 inline_guarded_calls=False,
                  inline_guarded_calls_no_matter_what=False, raise_analyzer=None):
         self.translator = translator
         self.graph = graph
@@ -115,6 +119,7 @@
             self.raise_analyzer = raise_analyzer
         else:
             self.raise_analyzer = None
+        self.lltype_to_classdef = lltype_to_classdef
 
     def inline_all(self):
         count = 0
@@ -275,22 +280,27 @@
     def rewire_exceptblock_with_guard(self, afterblock, copiedexceptblock):
         # this rewiring does not always succeed. in the cases where it doesn't
         # there will be generic code inserted
+        from pypy.rpython.lltypesystem import rclass
         exc_match = self.translator.rtyper.getexceptiondata().fn_exception_match
         for link in self.entrymap[self.graph_to_inline.exceptblock]:
             copiedblock = self.copy_block(link.prevblock)
-            eclass, copiedlink = _find_exception_type(copiedblock)
+            VALUE, copiedlink = _find_exception_type(copiedblock)
             #print copiedblock.operations
-            if eclass is None:
+            if VALUE is None or VALUE not in self.lltype_to_classdef:
                 continue
-            etype = copiedlink.args[0]
-            evalue = copiedlink.args[1]
+            classdef = self.lltype_to_classdef[VALUE]
+            rtyper = self.translator.rtyper
+            classrepr = rclass.getclassrepr(rtyper, classdef)
+            vtable = classrepr.getruntime()
+            var_etype = copiedlink.args[0]
+            var_evalue = copiedlink.args[1]
             for exceptionlink in afterblock.exits[1:]:
-                if exc_match(eclass, exceptionlink.llexitcase):
+                if exc_match(vtable, exceptionlink.llexitcase):
                     passon_vars = self.passon_vars(link.prevblock)
                     copiedblock.operations += generate_keepalive(passon_vars)
                     copiedlink.target = exceptionlink.target
                     linkargs = self.find_args_in_exceptional_case(
-                        exceptionlink, link.prevblock, etype, evalue, afterblock, passon_vars)
+                        exceptionlink, link.prevblock, var_etype, var_evalue, afterblock, passon_vars)
                     copiedlink.args = linkargs
                     break
 
@@ -388,9 +398,9 @@
 
 
 class Inliner(BaseInliner):
-    def __init__(self, translator, graph, inline_func, inline_guarded_calls=False,
+    def __init__(self, translator, graph, inline_func, lltype_to_classdef, inline_guarded_calls=False,
                  inline_guarded_calls_no_matter_what=False, raise_analyzer=None):
-        BaseInliner.__init__(self, translator, graph, 
+        BaseInliner.__init__(self, translator, graph, lltype_to_classdef,
                              inline_guarded_calls,
                              inline_guarded_calls_no_matter_what,
                              raise_analyzer)
@@ -404,21 +414,10 @@
             self.block_to_index.setdefault(block, {})[i] = g
 
 class OneShotInliner(BaseInliner):
-    def __init__(self, translator, graph, inline_guarded_calls=False,
-                 inline_guarded_calls_no_matter_what=False, raise_analyzer=None):
-         BaseInliner.__init__(self, translator, graph, 
-                              inline_guarded_calls,
-                              inline_guarded_calls_no_matter_what,
-                              raise_analyzer)
-
     def search_for_calls(self, block):
         pass
 
 
-def _inline_function(translator, graph, block, index_operation):
-    inline_func = block.operations[index_operation].args[0].value._obj._callable
-    inliner = Inliner(translator, graph, inline_func)
-
 # ____________________________________________________________
 #
 # Automatic inlining
@@ -426,14 +425,17 @@
 OP_WEIGHTS = {'same_as': 0,
               'cast_pointer': 0,
               'keepalive': 0,
-              'direct_call': 2,    # guess
-              'indirect_call': 2,  # guess
+              'malloc': 2,
               'yield_current_frame_to_caller': sys.maxint, # XXX bit extreme
               }
 
 def block_weight(block, weights=OP_WEIGHTS):
     total = 0
     for op in block.operations:
+        if op.opname == "direct_call":
+            total += 1.5 + len(op.args) / 2
+        elif op.opname == "indirect_call":
+            total += 2 + len(op.args) / 2
         total += weights.get(op.opname, 1)
     if block.exitswitch is not None:
         total += 1
@@ -446,15 +448,34 @@
     for block in graph.iterblocks():
         blockmap[block] = len(blocks)
         blocks.append(block)
+    backedges = dict.fromkeys(find_backedges(graph), True)
+    loops = find_loop_blocks(graph)
     M = sparsemat.SparseMatrix(len(blocks))
     vector = []
     for i, block in enumerate(blocks):
         vector.append(block_weight(block))
         M[i, i] = 1
         if block.exits:
-            f = 1.0 / len(block.exits)
+            if block not in loops:
+                current_loop_start = None
+            else:
+                current_loop_start = loops[block]
+            loop_exits = []
             for link in block.exits:
-                M[i, blockmap[link.target]] -= f
+                if (link.target in loops and
+                    loops[link.target] is current_loop_start):
+                    loop_exits.append(link)
+            if len(loop_exits) and len(loop_exits) < len(block.exits):
+                f = 0.3 / (len(block.exits) - len(loop_exits))
+                b = 0.7 / len(loop_exits)
+            else:
+                b = f = 1.0 / len(block.exits)
+            for link in block.exits:
+                if (link.target in loops and
+                    loops[link.target] is current_loop_start):
+                    M[i, blockmap[link.target]] -= b
+                else:
+                    M[i, blockmap[link.target]] -= f
     try:
         Solution = M.solve(vector)
     except ValueError:
@@ -507,30 +528,33 @@
     for graph1, graph2 in static_callers(translator, ignore_primitives=True):
         callers.setdefault(graph2, {})[graph1] = True
         callees.setdefault(graph1, {})[graph2] = True
-    fiboheap = [(0.0, graph) for graph in callers]
+    heap = [(0.0, -len(callers[graph]), graph) for graph in callers]
     valid_weight = {}
     couldnt_inline = {}
+    lltype_to_classdef = translator.rtyper.lltype_to_classdef_mapping()
 
-    while fiboheap:
-        weight, graph = fiboheap[0]
+    while heap:
+        weight, _, graph = heap[0]
         if not valid_weight.get(graph):
             weight = inlining_heuristic(graph, callers.get(graph), callees.get(graph))
             #print '  + cost %7.2f %50s' % (weight, graph.name)
-            heapreplace(fiboheap, (weight, graph))
+            heapreplace(heap, (weight, -len(callers[graph]), graph))
             valid_weight[graph] = True
             continue
 
         if weight >= threshold:
             break   # finished
 
-        heappop(fiboheap)
-        log.inlining('%7.2f %50s' % (weight, graph.name))
+        heappop(heap)
+        if callers[graph]:
+            log.inlining('%7.2f %50s' % (weight, graph.name))
         for parentgraph in callers[graph]:
             if parentgraph == graph:
                 continue
             sys.stdout.flush()
             try:
-                res = bool(inline_function(translator, graph, parentgraph))
+                res = bool(inline_function(translator, graph, parentgraph,
+                                           lltype_to_classdef))
             except CannotInline:
                 couldnt_inline[graph] = True
                 res = CannotInline
@@ -545,5 +569,5 @@
                     # been modified.  Maybe now we can inline it into further
                     # parents?
                     del couldnt_inline[parentgraph]
-                    heappush(fiboheap, (0.0, parentgraph))
+                    heappush(heap, (0.0, -len(callers[parentgraph]), parentgraph))
                 valid_weight[parentgraph] = False