[pypy-svn] r69680 - pypy/branch/virtual-forcing/pypy/jit/backend/x86

arigo at codespeak.net arigo at codespeak.net
Fri Nov 27 12:33:42 CET 2009


Author: arigo
Date: Fri Nov 27 12:33:42 2009
New Revision: 69680

Modified:
   pypy/branch/virtual-forcing/pypy/jit/backend/x86/assembler.py
   pypy/branch/virtual-forcing/pypy/jit/backend/x86/regalloc.py
Log:
(pedronis, fijal partly, arigo)

Kill RET_BP and replace it with a FRAME_FIXED_SIZE constant
with a hopefully clearer meaning.  Use it instead of hard-coding
the value 4 in get_ebp_ofs().

Also kill the parts of force_token and call_may_force and
guard_not_forced that we plan to change.


Modified: pypy/branch/virtual-forcing/pypy/jit/backend/x86/assembler.py
==============================================================================
--- pypy/branch/virtual-forcing/pypy/jit/backend/x86/assembler.py	(original)
+++ pypy/branch/virtual-forcing/pypy/jit/backend/x86/assembler.py	Fri Nov 27 12:33:42 2009
@@ -9,7 +9,8 @@
 from pypy.rpython.annlowlevel import llhelper
 from pypy.tool.uid import fixid
 from pypy.jit.backend.x86.regalloc import RegAlloc, WORD, lower_byte,\
-     X86RegisterManager, X86XMMRegisterManager, get_ebp_ofs
+     X86RegisterManager, X86XMMRegisterManager, get_ebp_ofs, FRAME_FIXED_SIZE,\
+     FORCE_INDEX_OFS
 from pypy.rlib.objectmodel import we_are_translated, specialize
 from pypy.jit.backend.x86 import codebuf
 from pypy.jit.backend.x86.ri386 import *
@@ -22,8 +23,6 @@
 # our calling convention - we pass first 6 args in registers
 # and the rest stays on the stack
 
-RET_BP = 5 # ret ip + bp + bx + esi + edi = 5 words
-
 if sys.platform == 'darwin':
     # darwin requires the stack to be 16 bytes aligned on calls
     CALL_ALIGN = 4
@@ -200,7 +199,11 @@
         # patch stack adjustment LEA
         # possibly align, e.g. for Mac OS X        
         mc = codebuf.InMemoryCodeBuilder(adr_lea, adr_lea + 4)
-        mc.write(packimm32(-(stack_depth + RET_BP - 2) * WORD))
+        # Compute the correct offset for the instruction LEA ESP, [EBP-4*words].
+        # Given that [EBP] is where we saved EBP, i.e. in the last word
+        # of our fixed frame, then the 'words' value is:
+        words = (FRAME_FIXED_SIZE - 1) + stack_depth
+        mc.write(packimm32(-WORD * words))
         mc.done()
 
     def _assemble_bootstrap_code(self, inputargs, arglocs):
@@ -210,9 +213,8 @@
         self.mc.PUSH(ebx)
         self.mc.PUSH(esi)
         self.mc.PUSH(edi)
-        self.mc.PUSH(imm(0)) # the virtualizable flag
-        # NB. exactly 4 pushes above; if this changes, fix stack_pos().
-        # You must also keep get_basic_shape() in sync.
+        # NB. the shape of the frame is hard-coded in get_basic_shape() too.
+        # Also, make sure this is consistent with FRAME_FIXED_SIZE.
         adr_stackadjust = self._patchable_stackadjust()
         tmp = X86RegisterManager.all_regs[0]
         xmmtmp = X86XMMRegisterManager.all_regs[0]
@@ -683,11 +685,6 @@
         self.mc.CMP(heap(self.cpu.pos_exception()), imm(0))
         return self.implement_guard(addr, self.mc.JNZ)
 
-    def genop_guard_guard_not_forced(self, ign_1, guard_op, addr,
-                                     locs, ign_2):
-        self.mc.CMP(locs[0], imm(0))
-        return self.implement_guard(addr, self.mc.JNZ)
-
     def genop_guard_guard_exception(self, ign_1, guard_op, addr,
                                     locs, resloc):
         loc = locs[0]
@@ -1003,11 +1000,11 @@
         # now we return from the complete frame, which starts from
         # _assemble_bootstrap_code().  The LEA below throws away most
         # of the frame, including all the PUSHes that we did just above.
-        mc.LEA(esp, addr_add(ebp, imm((-RET_BP + 2) * WORD)))
-        mc.POP(edi)
-        mc.POP(esi)
-        mc.POP(ebx)
-        mc.POP(ebp)
+        mc.LEA(esp, addr_add(ebp, imm(-3 * WORD)))
+        mc.POP(edi)    # [ebp-12]
+        mc.POP(esi)    # [ebp-8]
+        mc.POP(ebx)    # [ebp-4]
+        mc.POP(ebp)    # [ebp]
         mc.RET()
         self.mc2.done()
         self.failure_recovery_code[exc + 2 * withfloats] = recovery_addr
@@ -1048,14 +1045,14 @@
         addr = self.cpu.get_on_leave_jitted_int(save_exception=exc)
         mc.CALL(rel32(addr))
 
-        # don't break the following code sequence!
+        # don't break the following code sequence!   xxx no reason any more?
         mc = mc._mc
-        mc.LEA(esp, addr_add(ebp, imm((-RET_BP + 2) * WORD)))
+        mc.LEA(esp, addr_add(ebp, imm(-3 * WORD)))
         mc.MOV(eax, imm(fail_index))
-        mc.POP(edi)
-        mc.POP(esi)
-        mc.POP(ebx)
-        mc.POP(ebp)
+        mc.POP(edi)    # [ebp-12]
+        mc.POP(esi)    # [ebp-8]
+        mc.POP(ebx)    # [ebp-4]
+        mc.POP(ebp)    # [ebp]
         mc.RET()
 
     @specialize.arg(2)
@@ -1110,7 +1107,10 @@
             self.mc.AND(eax, imm(0xffff))
 
     genop_call_pure = genop_call
-    genop_call_may_force = genop_call
+    
+    def genop_guard_call_may_force(self, op, guard_op, addr,
+                                   arglocs, result_loc):
+        xxx #...
 
     def genop_discard_cond_call_gc_wb(self, op, arglocs):
         # use 'mc._mc' directly instead of 'mc', to avoid
@@ -1142,7 +1142,7 @@
         mc.overwrite(jz_location-1, [chr(offset)])
 
     def genop_force_token(self, op, arglocs, resloc):
-        self.mc.LEA(resloc, arglocs[0])
+        xxx  #self.mc.LEA(resloc, ...)
 
     def not_implemented_op_discard(self, op, arglocs):
         msg = "not implemented operation: %s" % op.getopname()

Modified: pypy/branch/virtual-forcing/pypy/jit/backend/x86/regalloc.py
==============================================================================
--- pypy/branch/virtual-forcing/pypy/jit/backend/x86/regalloc.py	(original)
+++ pypy/branch/virtual-forcing/pypy/jit/backend/x86/regalloc.py	Fri Nov 27 12:33:42 2009
@@ -19,6 +19,8 @@
      TempBox
 
 WORD = 4
+FRAME_FIXED_SIZE = 5     # ebp + ebx + esi + edi + force_index = 5 words
+FORCE_INDEX_OFS = -4*WORD
 
 width_of_type = {
     INT : 1,
@@ -130,8 +132,6 @@
 
     def _prepare(self, inputargs, operations):
         self.sm = X86StackManager()
-        # a bit of a hack - always grab one position at the beginning
-        self.vable_loc = self.sm.loc(TempBox(), 1)
         cpu = self.assembler.cpu
         cpu.gc_ll_descr.rewrite_assembler(cpu, operations)
         # compute longevity of variables
@@ -310,7 +310,10 @@
             self.assembler.dump('%s(%s)' % (op, arglocs))
         self.assembler.regalloc_perform_discard(op, arglocs)
 
-    def can_optimize_cmp_op(self, op, i, operations):
+    def can_merge_with_next_guard(self, op, i, operations):
+        if op.opnum == rop.CALL_MAY_FORCE:
+            assert operations[i + 1].opnum == rop.GUARD_NOT_FORCED
+            return True
         if not op.is_comparison():
             return False
         if (operations[i + 1].opnum != rop.GUARD_TRUE and
@@ -334,7 +337,7 @@
                 i += 1
                 self.possibly_free_vars(op.args)
                 continue
-            if self.can_optimize_cmp_op(op, i, operations):
+            if self.can_merge_with_next_guard(op, i, operations):
                 oplist[op.opnum](self, op, operations[i + 1])
                 i += 1
             else:
@@ -397,9 +400,6 @@
     consider_guard_nonnull = _consider_guard
     consider_guard_isnull = _consider_guard
 
-    def consider_guard_not_forced(self, op, ignored):
-        self.perform_guard(op, [self.vable_loc], None)
-
     def consider_finish(self, op, ignored):
         locs = [self.loc(v) for v in op.args]
         locs_are_ref = [v.type == REF for v in op.args]
@@ -609,34 +609,36 @@
         self.Perform(op, [loc0], loc1)
         self.rm.possibly_free_var(op.args[0])
 
-    def _call(self, op, arglocs, force_store=[], save_all_regs=False):
+    def _call(self, op, arglocs, force_store=[], guard_not_forced_op=None):
+        save_all_regs = guard_not_forced_op is not None
         self.rm.before_call(force_store, save_all_regs=save_all_regs)
         self.xrm.before_call(force_store, save_all_regs=save_all_regs)
-        self.Perform(op, arglocs, eax)
+        if guard_not_forced_op is not None:
+            self.perform_with_guard(op, guard_not_forced_op, arglocs, eax)
+        else:
+            self.Perform(op, arglocs, eax)
         if op.result is not None:
             if op.result.type == FLOAT:
                 self.xrm.after_call(op.result)
             else:
                 self.rm.after_call(op.result)
 
-    def consider_call(self, op, ignored):
-        calldescr = op.descr
-        assert isinstance(calldescr, BaseCallDescr)
-        assert len(calldescr.arg_classes) == len(op.args) - 1
-        size = calldescr.get_result_size(self.translate_support_code)
-        self._call(op, [imm(size)] + [self.loc(arg) for arg in op.args])
-
-    def consider_call_may_force(self, op, ignored):
+    def _consider_call(self, op, guard_not_forced_op=None):
         calldescr = op.descr
         assert isinstance(calldescr, BaseCallDescr)
         assert len(calldescr.arg_classes) == len(op.args) - 1
         size = calldescr.get_result_size(self.translate_support_code)
-        self._call(op, [imm(size)] +
-                   [self.loc(arg) for arg in op.args],
-                   save_all_regs=True)
+        self._call(op, [imm(size)] + [self.loc(arg) for arg in op.args],
+                   guard_not_forced_op=guard_not_forced_op)
 
+    def consider_call(self, op, ignored):
+        self._consider_call(op)
     consider_call_pure = consider_call
 
+    def consider_call_may_force(self, op, guard_op):
+        assert guard_op is not None
+        self._consider_call(op, guard_op)
+
     def consider_cond_call_gc_wb(self, op, ignored):
         assert op.result is None
         arglocs = [self.loc(arg) for arg in op.args]
@@ -943,7 +945,7 @@
 
     def consider_force_token(self, op, ignored):
         loc = self.rm.force_allocate_reg(op.result)
-        self.Perform(op, [self.vable_loc], loc)
+        self.Perform(op, [], loc)
 
     def not_implemented_op(self, op, ignored):
         msg = "[regalloc] Not implemented operation: %s" % op.getopname()
@@ -960,10 +962,9 @@
 
 def get_ebp_ofs(position):
     # Argument is a stack position (0, 1, 2...).
-    # Returns (ebp-16), (ebp-20), (ebp-24)...
-    # This depends on the fact that our function prologue contains
-    # exactly 4 PUSHes.
-    return -WORD * (4 + position)
+    # Returns (ebp-20), (ebp-24), (ebp-28)...
+    # i.e. the n'th word beyond the fixed frame size.
+    return -WORD * (FRAME_FIXED_SIZE + position)
 
 def lower_byte(reg):
     # argh, kill, use lowest8bits instead



More information about the Pypy-commit mailing list