[pypy-svn] r38060 - in pypy/branch/new-jit-codegen: . demo i386 i386/test test

arigo at codespeak.net arigo at codespeak.net
Wed Feb 7 13:44:58 CET 2007


Author: arigo
Date: Wed Feb  7 13:44:55 2007
New Revision: 38060

Added:
   pypy/branch/new-jit-codegen/
      - copied from r38059, pypy/branch/jit-virtual-world/pypy/jit/codegen/
   pypy/branch/new-jit-codegen/demo/autopath.py
      - copied unchanged from r37979, pypy/branch/jit-virtual-world/pypy/jit/codegen/i386/autopath.py
Modified:
   pypy/branch/new-jit-codegen/demo/autorun.py
   pypy/branch/new-jit-codegen/demo/reducecase.py
   pypy/branch/new-jit-codegen/demo/support.py
   pypy/branch/new-jit-codegen/i386/operation.py
   pypy/branch/new-jit-codegen/i386/regalloc.py
   pypy/branch/new-jit-codegen/i386/rgenop.py
   pypy/branch/new-jit-codegen/i386/ri386.py
   pypy/branch/new-jit-codegen/i386/ri386setup.py
   pypy/branch/new-jit-codegen/i386/test/test_auto_encoding.py
   pypy/branch/new-jit-codegen/test/rgenop_tests.py
Log:
A branch with a refactoring of the i386 backend.


Modified: pypy/branch/new-jit-codegen/demo/autorun.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/demo/autorun.py	(original)
+++ pypy/branch/new-jit-codegen/demo/autorun.py	Wed Feb  7 13:44:55 2007
@@ -11,5 +11,8 @@
 
 def test_many_times():
     for i in range(80):
-        demo_conftest.option.randomseed = random.randrange(0, 100000)
-        test_random.test_random_function()
+        yield run_test_once, random.randrange(0, 100000)
+
+def run_test_once(seed):
+    demo_conftest.option.randomseed = seed
+    test_random.test_random_function()

Modified: pypy/branch/new-jit-codegen/demo/reducecase.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/demo/reducecase.py	(original)
+++ pypy/branch/new-jit-codegen/demo/reducecase.py	Wed Feb  7 13:44:55 2007
@@ -5,115 +5,259 @@
 False'.  The smallest failing example found so far gets written to
 zsample.py.
 """
+import autopath
 import os
 import re
 r = re.compile(r"      \w = ")
 rif = re.compile(r"      if \w:")
 
+SEED = 73595
+ITERATIONS = 10
+ARGS=[-27, -38, -33, -53, 16, -28, 13, 11, 11, -46, -34, 57, -11, 80, 15, 49, -37, -43, -73, -62, -31, -21, -36, 17, 97, -53]
+BACKEND = 'i386'
+
+
 lines = """
     if goto == 0:
-      g = h and x
-      a = intmask(d + g)
-      goto = 13
+      u = n != e
+      k = intmask(v + z)
+      b = not f
+      w = intmask(l % ((a & 0xfffff) + 1))
+      h = a or y
+      e = intmask(z + z)
+      m = c != a
+      a = intmask(v + g)
+      n = intmask(c - x)
+      n = intmask(o % ((y & 0xfffff) + 1))
+      o = -7035
+      h = m >= a
+      s = f != g
+      e = intmask(~w)
+      if f:
+        goto = 4
+      else:
+        goto = 3
     if goto == 1:
-      if v:
-        goto = 13
+      n = intmask(h - c)
+      x = t == b
+      a = 7744
+      if g:
+        goto = 2
       else:
-        goto = 9
+        goto = 2
     if goto == 2:
-      if m:
-        goto = 10
+      i = intmask(i - v)
+      o = -6878346
+      f = intmask(i ^ n)
+      i = 1261729270
+      q = s or u
+      z = t >= b
+      u = bool(u)
+      w = intmask(e << (c & 0x0000067f))
+      if w:
+        goto = 13
       else:
-        goto = 3
+        goto = 14
     if goto == 3:
-      if r:
-        goto = 9
-      else:
+      y = intmask(w >> (e & 0x1234567f))
+      d = intmask(o & j)
+      r = r != n
+      a = intmask(b & u)
+      b = -11216
+      v = intmask(g // (-((g & 0xfffff) + 2)))
+      x = 6697939
+      d = intmask(abs(q))
+      i = intmask(i % (-((c & 0xfffff) + 2)))
+      w = 23593
+      u = n and a
+      d = intmask(q << (y & 0x0000067f))
+      o = intmask(w // (-((i & 0xfffff) + 2)))
+      l = intmask(e + n)
+      if j:
         counter -= 1
         if not counter: break
-        goto = 0
+        goto = 2
+      else:
+        goto = 14
     if goto == 4:
-      if h:
-        goto = 11
+      f = intmask(e * n)
+      l = intmask(k >> (e & 0x1234567f))
+      l = intmask(u // ((p & 0xfffff) + 1))
+      d = bool(m)
+      d = 7364461
+      g = 1410833768
+      g = y <= d
+      s = l == d
+      e = intmask(k - z)
+      o = -6669177
+      c = intmask(-o)
+      q = intmask(-o)
+      m = intmask(u - j)
+      q = intmask(a - s)
+      m = intmask(a | n)
+      c = q and a
+      t = intmask(b // (-((p & 0xfffff) + 2)))
+      if r:
+        goto = 5
       else:
-        goto = 9
+        goto = 12
     if goto == 5:
-      x = intmask(-i)
-      p = bool(h)
-      b = m >  x
-      g = p or i
-      h = p >  v
-      goto = 7
+      c = intmask(i * q)
+      q = intmask(-q)
+      c = intmask(a // (-((a & 0xfffff) + 2)))
+      u = k >= i
+      m = -34
+      z = intmask(o - i)
+      x = x and l
+      w = o <  n
+      x = n != w
+      m = 92
+      h = 27
+      x = intmask(~u)
+      i = not o
+      q = intmask(c & q)
+      y = x or g
+      if z:
+        goto = 10
+      else:
+        counter -= 1
+        if not counter: break
+        goto = 1
     if goto == 6:
-      m = intmask(-p)
-      counter -= 1
-      if not counter: break
-      goto = 5
-    if goto == 7:
+      u = intmask(l // (-((g & 0xfffff) + 2)))
+      m = intmask(l // ((h & 0xfffff) + 1))
+      a = 3949664
+      c = intmask(v - u)
+      w = k and r
+      q = -1898584839
+      k = a >  o
       if d:
+        goto = 7
+      else:
+        goto = 8
+    if goto == 7:
+      j = not f
+      s = n == h
+      t = x >  n
+      z = intmask(e & f)
+      q = intmask(v + k)
+      o = not a
+      v = 2876355
+      h = intmask(w % ((p & 0xfffff) + 1))
+      c = e and b
+      k = intmask(f // (-((u & 0xfffff) + 2)))
+      m = 4882866
+      if h:
         counter -= 1
         if not counter: break
         goto = 0
       else:
-        goto = 0
-    if goto == 8:
-      if e:
         counter -= 1
         if not counter: break
-        goto = 6
-      else:
+        goto = 4
+    if goto == 8:
+      w = intmask(g & n)
+      d = -31404
+      s = intmask(abs(e))
+      j = intmask(g << (w & 0x0000067f))
+      r = -26
+      b = -13356
+      o = p <  m
+      c = 438000325
+      t = intmask(~g)
+      i = intmask(-e)
+      a = intmask(c - x)
+      v = intmask(v >> (f & 0x1234567f))
+      if o:
         counter -= 1
         if not counter: break
         goto = 6
+      else:
+        goto = 12
     if goto == 9:
-      if u:
+      l = x <= h
+      z = not w
+      f = intmask(u ^ r)
+      if m:
         counter -= 1
         if not counter: break
-        goto = 6
+        goto = 7
       else:
+        goto = 11
+    if goto == 10:
+      o = intmask(t // ((e & 0xfffff) + 1))
+      w = c == v
+      if h:
         counter -= 1
         if not counter: break
-        goto = 2
-    if goto == 10:
-      if v:
-        goto = 14
+        goto = 5
       else:
-        goto = 12
+        counter -= 1
+        if not counter: break
+        goto = 3
     if goto == 11:
-      if f:
+      z = i != c
+      t = d != w
+      v = intmask(r - f)
+      u = 6813995
+      z = c <  f
+      r = intmask(c + i)
+      z = intmask(o - s)
+      p = intmask(i // (-((n & 0xfffff) + 2)))
+      v = intmask(p | h)
+      if a:
         counter -= 1
         if not counter: break
-        goto = 5
+        goto = 3
       else:
         counter -= 1
         if not counter: break
-        goto = 11
+        goto = 0
     if goto == 12:
-      d = d >= n
-      counter -= 1
-      if not counter: break
-      goto = 0
+      b = intmask(l % ((a & 0xfffff) + 1))
+      d = intmask(abs(y))
+      c = intmask(~w)
+      a = bool(v)
+      d = not a
+      v = intmask(s ^ u)
+      if m:
+        counter -= 1
+        if not counter: break
+        goto = 4
+      else:
+        counter -= 1
+        if not counter: break
+        goto = 8
     if goto == 13:
-      l = j <= u
-      d = intmask(s - y)
-      h = intmask(l // ((h & 0xfffff) + 1))
-      if a:
+      c = 13780
+      e = n != i
+      x = 912031708
+      i = intmask(p ^ j)
+      k = not s
+      p = c >  b
+      o = intmask(~j)
+      t = intmask(-k)
+      v = y <= v
+      v = m <= a
+      w = a <  u
+      z = p == v
+      if g:
         counter -= 1
         if not counter: break
-        goto = 12
+        goto = 7
       else:
         counter -= 1
         if not counter: break
-        goto = 6
+        goto = 13
     if goto == 14:
-      if o:
+      if p:
         counter -= 1
         if not counter: break
-        goto = 14
+        goto = 8
       else:
         counter -= 1
         if not counter: break
-        goto = 14
+        goto = 13
 """.splitlines()
 
 lines = [s.rstrip() for s in lines]
@@ -135,17 +279,27 @@
     print >> g, '''
   return intmask(a*-468864544+b*-340864157+c*-212863774+d*-84863387+e*43136996+f*171137383+g*299137766+h*427138153+i*555138536+j*683138923+k*811139306+l*939139693+m*1067140076+n*1195140463+o*1323140846+p*1451141233+q*1579141616+r*1707142003+s*1835142386+t*1963142773+u*2091143156+v*-2075823753+w*-1947823370+x*-1819822983+y*-1691822600+z*-1563822213)
 
-args=[-67, -89, -99, 35, 91, 8, -17, -75, 14, 88, 71, -77, -77, 38, 65, 21, 77, 73, -17, -12, -67, 36, 11, 25, -54, -36]
+'''
+    g.close()
 
+    #ok = os.system("py.test zgen.py --seed=6661 -s") == 0
 
-def test_y():
+    from pypy.jit.codegen.demo import conftest as demo_conftest
+    demo_conftest.option.randomseed = SEED
+    demo_conftest.option.backend = BACKEND
     from pypy.jit.codegen.demo.support import rundemo
-    rundemo(dummyfn, 10, *args)
-'''
-    g.close()
 
-    ok = os.system("py.test zgen.py --seed=3888 -s") == 0
-    # XXX could run in-process to avoid start-up overhead
+    d = {}
+    execfile('zgen.py', d)
+    dummyfn = d['dummyfn']
+
+    childpid = os.fork()
+    if childpid == 0:     # in child
+        rundemo(dummyfn, ITERATIONS, *ARGS)
+        os._exit(0)
+
+    _, status = os.waitpid(childpid, 0)
+    ok = status == 0
 
     if ok:
         return True     # accept

Modified: pypy/branch/new-jit-codegen/demo/support.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/demo/support.py	(original)
+++ pypy/branch/new-jit-codegen/demo/support.py	Wed Feb  7 13:44:55 2007
@@ -121,7 +121,7 @@
     F1 = lltype.FuncType([lltype.Signed] * nb_args, lltype.Signed)
     fp = RGenOp.get_python_callable(lltype.Ptr(F1), gv_entrypoint)
     res = runfp(fp, *args)
-    print '%-6s ===>'%demo_conftest.option.backend, res
+    print '%-6s ===>' % RGenOp.__name__, res
     print
     if res != expected:
         raise AssertionError(

Modified: pypy/branch/new-jit-codegen/i386/operation.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/i386/operation.py	(original)
+++ pypy/branch/new-jit-codegen/i386/operation.py	Wed Feb  7 13:44:55 2007
@@ -18,60 +18,33 @@
 else:
     CALL_ALIGN = 1
 
-PROLOGUE_FIXED_WORDS = 5
-
-RK_NO_RESULT = 0
-RK_WORD      = 1
-RK_CC        = 2
-
 
 class Operation(GenVar):
     clobbers_cc = True
-    result_kind = RK_WORD
-    cc_result   = -1
+    side_effects = True
 
-    def allocate(self, allocator):
-        pass
     def generate(self, allocator):
         raise NotImplementedError
 
-class OpWhatever(Operation):
-    clobbers_cc = False
-    def generate(self, allocator):
-        pass
-
 class Op1(Operation):
     def __init__(self, x):
         self.x = x
-    def allocate(self, allocator):
+    def mark_used_vars(self, allocator):
         allocator.using(self.x)
-    def generate(self, allocator):
-        try:
-            dstop = allocator.get_operand(self)
-        except KeyError:
-            return    # result not used
-        srcop = allocator.get_operand(self.x)
-        self.generate2(allocator.mc, dstop, srcop)
-    def generate2(self, mc, dstop, srcop):
-        raise NotImplementedError
 
 class UnaryOp(Op1):
-    def generate(self, allocator):
-        try:
-            dstop = allocator.get_operand(self)
-        except KeyError:
-            return    # simple operation whose result is not used anyway
-        srcop = allocator.get_operand(self.x)
-        mc = allocator.mc
-        if srcop != dstop:
-            try:
-                mc.MOV(dstop, srcop)
-            except FailedToImplement:
-                mc.MOV(ecx, srcop)
-                self.emit(mc, ecx)
-                mc.MOV(dstop, ecx)
-                return
-        self.emit(mc, dstop)
+    side_effects = False
+    def mark_used_vars(self, allocator):
+        allocator.using_inplace(self.x, self)
+    def generate(self, allocator):
+        if allocator.release(self.x):
+            dstop = allocator.get_operand(self.x)    # in-place operation
+            allocator.create_exactly_at(self, dstop)
+        else:
+            dstop = allocator.create_reg(self)
+            srcop = allocator.get_operand(self.x)
+            allocator.mc.MOV(dstop, srcop)    # make a copy in a new register
+        self.emit(allocator.mc, dstop)
 
 class OpIntNeg(UnaryOp):
     opname = 'int_neg'
@@ -83,110 +56,140 @@
 
 class OpIntAbs(Op1):
     opname = 'int_abs'
-    def generate2(self, mc, dstop, srcop):
+    side_effects = False
+    def mark_used_vars(self, allocator):
+        allocator.using(self.x)
+    def generate(self, allocator):
+        oldsrcop = allocator.get_operand(self.x)
+        dstop = allocator.create_reg(self)
+        srcop = allocator.get_operand(self.x)
         # ABS-computing code from Psyco, found by exhaustive search
         # on *all* short sequences of operations :-)
-        inplace = (dstop == srcop)
-        if inplace or not (isinstance(srcop, REG) or isinstance(dstop, REG)):
-            mc.MOV(ecx, srcop)
-            srcop = ecx
-        if not inplace:
+        mc = allocator.mc
+        if dstop != oldsrcop:
             mc.MOV(dstop, srcop)
         mc.SHL(dstop, imm8(1))
         mc.SBB(dstop, srcop)
-        mc.SBB(ecx, ecx)
-        mc.XOR(dstop, ecx)
+        allocator.release(self.x)
+        tmpop = allocator.create_scratch_reg()
+        dstop = allocator.get_operand(self)
+        mc.SBB(tmpop, tmpop)
+        mc.XOR(dstop, tmpop)
+        allocator.end_clobber(tmpop)
 
 class OpSameAs(Op1):
+    clobbers_cc = False    # special handling of the cc
+    side_effects = False
+    def mark_used_vars(self, allocator):
+        allocator.using_inplace(self.x, self)
+    def generate(self, allocator):
+        srcop = allocator.get_operand(self.x)
+        if allocator.lastuse(self.x):
+            allocator.release(self.x)
+            if isinstance(srcop, CCFLAG):
+                allocator.create_in_cc(self, srcop)
+            else:
+                allocator.create_exactly_at(self, srcop)
+        else:
+            if isinstance(srcop, CCFLAG):
+                allocator.clobber_cc()   # which doesn't itself clobber cc,
+                                         # so we can reuse it for us
+                allocator.create_in_cc(self, srcop)
+            else:
+                dstop = allocator.create_reg(self)
+                srcop = allocator.get_operand(self.x)
+                if srcop != dstop:
+                    allocator.mc.MOV(dstop, srcop)
+            allocator.release(self.x)
+
+class OpWhatever(Operation):
     clobbers_cc = False
-    def generate2(self, mc, dstop, srcop):
-        if srcop != dstop:
-            try:
-                mc.MOV(dstop, srcop)
-            except FailedToImplement:
-                mc.MOV(ecx, srcop)
-                mc.MOV(dstop, ecx)
+    side_effects = False
+    def generate(self, allocator):
+        allocator.create(self)
 
 class OpCompare1(Op1):
-    result_kind = RK_CC
+    clobbers_cc = False    # special handling of the cc
+    side_effects = False
+
     def generate(self, allocator):
-        srcop = allocator.get_operand(self.x)
         mc = allocator.mc
-        self.emit(mc, srcop)
-    def copy(self):
-        return self.__class__(self.x)
+        srcop = allocator.get_operand(self.x)
+        if isinstance(srcop, CCFLAG):
+            ccop = srcop
+            allocator.release(self.x)
+            allocator.clobber_cc()
+            # the flags are still valid through a clobber_cc
+            if self.inverted:
+                ccop = ccflags[cond_negate(ccop.cc)]
+        else:
+            allocator.clobber_cc()
+            srcop = allocator.get_operand(self.x)
+            mc.CMP(srcop, imm8(0))
+            allocator.release(self.x)
+            ccop = ccflags[self.suggested_cc]
+        allocator.create_in_cc(self, ccop)
 
 class OpIntIsTrue(OpCompare1):
     opname = 'int_is_true', 'ptr_nonzero', 'uint_is_true'
-    cc_result = Conditions['NE']
-    @staticmethod
-    def emit(mc, x):
-        mc.CMP(x, imm8(0))
+    suggested_cc = Conditions['NE']
+    inverted = False
 
 class OpIntIsZero(OpIntIsTrue):
     opname = 'ptr_iszero', 'bool_not'
-    cc_result = Conditions['E']
+    suggested_cc = Conditions['E']
+    inverted = True
 
 class Op2(Operation):
     def __init__(self, x, y):
         self.x = x
         self.y = y
-    def allocate(self, allocator):
+    def mark_used_vars(self, allocator):
         allocator.using(self.x)
         allocator.using(self.y)
-    def generate(self, allocator):
-        try:
-            dstop = allocator.get_operand(self)
-        except KeyError:
-            return    # simple operation whose result is not used anyway
-        op1 = allocator.get_operand(self.x)
-        op2 = allocator.get_operand(self.y)
-        self.generate3(allocator.mc, dstop, op1, op2)
-    def generate3(self, mc, dstop, op1, op2):
-        raise NotImplementedError
 
 class BinaryOp(Op2):
+    side_effects = False
     commutative = False
-    def generate3(self, mc, dstop, op1, op2):
-        # now all of dstop, op1 and op2 may alias each other and be in
-        # a register, in the stack or an immediate... finding a correct
-        # and encodable combination of instructions is loads of fun
-        if dstop == op1:
-            case = 1       # optimize for this common case
-        elif self.commutative and dstop == op2:
+
+    def mark_used_vars(self, allocator):
+        inplace_ok = allocator.using_inplace(self.x, self)
+        if not inplace_ok and self.commutative:
+            allocator.using_inplace(self.y, self)
+        else:
+            allocator.using(self.y)
+
+    def generate(self, allocator):
+        x, y = self.x, self.y
+        op1 = allocator.get_operand(x)
+        op2 = allocator.get_operand(y)
+        xlast = allocator.lastuse(x)
+        if self.commutative and not xlast and allocator.lastuse(y):
+            # reverse arguments, then it's an in-place operation
+            x, y = y, x
             op1, op2 = op2, op1
-            case = 1
-        elif isinstance(dstop, REG):
-            if dstop != op2:
-                # REG = OPERATION(op1, op2)   with op2 != REG
-                case = 2
-            else:
-                # REG = OPERATION(op1, REG)
-                case = 3
-        elif isinstance(op1, REG) and isinstance(op2, REG):
-            # STACK = OPERATION(REG, REG)
-            case = 2
-        else:
-            case = 3
-        # generate instructions according to the 'case' determined above
-        if case == 1:
-            # dstop == op1
-            try:
-                self.emit(mc, op1, op2)
-            except FailedToImplement:    # emit(STACK, STACK) combination
-                mc.MOV(ecx, op2)
-                self.emit(mc, op1, ecx)
-        elif case == 2:
-            # this case works for:
-            #   * REG = OPERATION(op1, op2)   with op2 != REG
-            #   * STACK = OPERATION(REG, REG)
-            mc.MOV(dstop, op1)
-            self.emit(mc, dstop, op2)
-        else:
-            # most general case
-            mc.MOV(ecx, op1)
-            self.emit(mc, ecx, op2)
-            mc.MOV(dstop, ecx)
+            xlast = True
+
+        if xlast:
+            dstop = op1   # in-place operation
+            # op1 and op2 must not be both in a stack location
+            if isinstance(op1, MODRM) and isinstance(op2, MODRM):
+                tmpop = allocator.create_scratch_reg(op2)
+                # neither op1 nor op2 can have been spilled here, as
+                # they are already in the stack
+                op2 = tmpop
+                allocator.end_clobber(tmpop)
+            allocator.release(x)
+            allocator.release(y)
+            allocator.create_exactly_at(self, op1)
+        else:
+            dstop = allocator.create_reg(self)
+            if dstop != op1: # else op1 was spilled just now, so its value
+                             # is still in place in its old register
+                allocator.mc.MOV(dstop, op1) # make a copy in the new register
+                op2 = allocator.get_operand(y)
+            allocator.release(y)
+        self.emit(allocator.mc, dstop, op2)
 
 class OpIntAdd(BinaryOp):
     opname = 'int_add', 'uint_add'
@@ -211,64 +214,75 @@
 
 class OpIntMul(Op2):
     opname = 'int_mul'
-    def generate3(self, mc, dstop, op1, op2):
-        if isinstance(dstop, REG):
-            tmpop = dstop
-        else:
-            tmpop = ecx
-        if tmpop == op1:
-            mc.IMUL(tmpop, op2)
-        elif isinstance(op2, IMM32):
-            mc.IMUL(tmpop, op1, op2)
+    side_effects = False
+
+    def generate(self, allocator):
+        op1 = allocator.get_operand(self.x)
+        op2 = allocator.get_operand(self.y)
+
+        if isinstance(op1, REG) and allocator.lastuse(self.x):
+            allocator.release(self.x)
+            allocator.release(self.y)
+            allocator.create_exactly_at(self, op1)
+            dstop = op1
+        elif isinstance(op2, REG) and allocator.lastuse(self.y):
+            allocator.release(self.x)
+            allocator.release(self.y)
+            allocator.create_exactly_at(self, op2)
+            dstop = op2
+        else:
+            dstop = allocator.create_reg(self)
+            allocator.release(self.x)
+            allocator.release(self.y)
+        mc = allocator.mc
+        if isinstance(op2, IMM32):
+            mc.IMUL(dstop, op1, op2)
         elif isinstance(op1, IMM32):
-            mc.IMUL(tmpop, op2, op1)
+            mc.IMUL(dstop, op2, op1)
+        elif dstop == op1:
+            mc.IMUL(dstop, op2)
+        elif dstop == op2:
+            mc.IMUL(dstop, op1)
         else:
-            if tmpop != op2:
-                mc.MOV(tmpop, op2)
-            mc.IMUL(tmpop, op1)
-        if dstop != tmpop:
-            mc.MOV(dstop, tmpop)
+            mc.MOV(dstop, op1)
+            mc.IMUL(dstop, op2)
 
 class MulOrDivOp(Op2):
+    side_effects = False
 
-    def generate3(self, mc, dstop, op1, op2):
+    def generate(self, allocator):
         # XXX not very efficient but not very common operations either
-        mc.PUSH(eax)
-        mc.PUSH(edx)
-        if op1 != eax:
-            if op2 == eax:
-                op2 = mem(esp, 4)
+        oldop1 = allocator.get_operand(self.x)
+        #oldop2 = allocator.get_operand(self.y)
+        allocator.clobber2(eax, edx)
+        op1 = allocator.get_operand(self.x)
+
+        mc = allocator.mc
+
+        if oldop1 != eax:
             mc.MOV(eax, op1)
         if self.input_is_64bits:
-            if op2 == edx:
-                op2 = mem(esp)
             if self.unsigned:
                 mc.XOR(edx, edx)
             else:
                 mc.CDQ()
-        try:
-            self.emit(mc, op2)
-        except FailedToImplement:
-            mc.MOV(ecx, op2)
-            self.emit(mc, ecx)
-        if dstop != self.reg_containing_result:
-            mc.MOV(dstop, self.reg_containing_result)
-        if dstop == edx:
-            mc.ADD(esp, imm8(4))
-        else:
-            mc.POP(edx)
-        if dstop == eax:
-            mc.ADD(esp, imm8(4))
-        else:
-            mc.POP(eax)
+
+        self.generate2(allocator)
+
+        allocator.end_clobber(eax)
+        allocator.end_clobber(edx)
+        allocator.release(self.x)
+        allocator.release(self.y)
+        # the target register should still be free, see clobber2()
+        allocator.create_exactly_at(self, self.reg_containing_result)
 
 class OpIntFloorDiv(MulOrDivOp):
     opname = 'int_floordiv'
     input_is_64bits = True
     reg_containing_result = eax
     unsigned = False
-    @staticmethod
-    def emit(mc, op2):
+
+    def generate2(self, allocator):
         # from the PPC backend which has the same problem:
         # 
         #   grumble, the powerpc handles division when the signs of x
@@ -288,234 +302,295 @@
         #    20/(-3) =    -7,-1    -6, 2      # operand signs differ
         # (-20)/(-3) =     6,-2     6,-2
         #
+        tmp = allocator.create_scratch_reg()
+        op2 = allocator.get_operand(self.y)
+        mc = allocator.mc
         if isinstance(op2, IMM32):
             # if op2 is an immediate, we do an initial adjustment of operand 1
             # so that we get directly the correct answer
             if op2.value >= 0:
                 # if op1 is negative, subtract (op2-1)
-                mc.MOV(ecx, edx)       # -1 if op1 is negative, 0 otherwise
-                mc.AND(ecx, imm(op2.value-1))
-                mc.SUB(eax, ecx)
+                mc.MOV(tmp, edx)       # -1 if op1 is negative, 0 otherwise
+                mc.AND(tmp, imm(op2.value-1))
+                mc.SUB(eax, tmp)
                 mc.SBB(edx, imm8(0))
             else:
                 # if op1 is positive (or null), add (|op2|-1)
-                mc.MOV(ecx, edx)
-                mc.NOT(ecx)            # -1 if op1 is positive, 0 otherwise
-                mc.AND(ecx, imm(-op2.value-1))
-                mc.ADD(eax, ecx)
+                mc.MOV(tmp, edx)
+                mc.NOT(tmp)            # -1 if op1 is positive, 0 otherwise
+                mc.AND(tmp, imm(-op2.value-1))
+                mc.ADD(eax, tmp)
                 mc.ADC(edx, imm8(0))
-            mc.MOV(ecx, op2)
-            mc.IDIV(ecx)
+            mc.MOV(tmp, op2)
+            mc.IDIV(tmp)
         else:
             # subtract 1 to the result if the operand signs differ and
             # the remainder is not zero
-            mc.MOV(ecx, eax)
+            mc.MOV(tmp, eax)
             mc.IDIV(op2)
-            mc.XOR(ecx, op2)
-            mc.SAR(ecx, imm8(31)) # -1 if signs differ, 0 otherwise
-            mc.AND(ecx, edx)      # nonnull if signs differ and edx != 0
-            mc.CMP(ecx, imm8(1))  # no carry flag iff signs differ and edx != 0
+            mc.XOR(tmp, op2)
+            mc.SAR(tmp, imm8(31)) # -1 if signs differ, 0 otherwise
+            mc.AND(tmp, edx)      # nonnull if signs differ and edx != 0
+            mc.CMP(tmp, imm8(1))  # no carry flag iff signs differ and edx != 0
             mc.ADC(eax, imm8(-1)) # subtract 1 iff no carry flag
+        allocator.end_clobber(tmp)
 
 class OpIntMod(MulOrDivOp):
     opname = 'int_mod'
     input_is_64bits = True
     reg_containing_result = edx
     unsigned = False
-    @staticmethod
-    def emit(mc, op2):
+
+    def generate2(self, allocator):
         #                 Python    i386
         #    20/3    =     6, 2     6, 2
         # (-20)/3    =    -7, 1    -6,-2      # operand signs differ
         #    20/(-3) =    -7,-1    -6, 2      # operand signs differ
         # (-20)/(-3) =     6,-2     6,-2
         #
+        tmp = allocator.create_scratch_reg()
+        op2 = allocator.get_operand(self.y)
+        mc = allocator.mc
         if isinstance(op2, IMM32):
-            mc.MOV(ecx, op2)
-            mc.IDIV(ecx)
+            mc.MOV(tmp, op2)
+            mc.IDIV(tmp)
             # adjustment needed:
             #   if op2 > 0: if the result is negative, add op2 to it
             #   if op2 < 0: if the result is > 0, subtract |op2| from it
-            mc.MOV(ecx, edx)
+            mc.MOV(tmp, edx)
             if op2.value < 0:
-                mc.NEG(ecx)
-            mc.SAR(ecx, imm8(31))
-            mc.AND(ecx, imm(op2.value))
-            mc.ADD(edx, ecx)
+                mc.NEG(tmp)
+            mc.SAR(tmp, imm8(31))
+            mc.AND(tmp, imm(op2.value))
+            mc.ADD(edx, tmp)
         else:
             # if the operand signs differ and the remainder is not zero,
             # add operand2 to the result
-            mc.MOV(ecx, eax)
+            mc.MOV(tmp, eax)
             mc.IDIV(op2)
-            mc.XOR(ecx, op2)
-            mc.SAR(ecx, imm8(31)) # -1 if signs differ, 0 otherwise
-            mc.AND(ecx, edx)      # nonnull if signs differ and edx != 0
-            mc.CMOVNZ(ecx, op2)   # == op2  if signs differ and edx != 0
-            mc.ADD(edx, ecx)
+            mc.XOR(tmp, op2)
+            mc.SAR(tmp, imm8(31)) # -1 if signs differ, 0 otherwise
+            mc.AND(tmp, edx)      # nonnull if signs differ and edx != 0
+            mc.CMOVNZ(tmp, op2)   # == op2  if signs differ and edx != 0
+            mc.ADD(edx, tmp)
+        allocator.end_clobber(tmp)
 
 class OpUIntMul(MulOrDivOp):
     opname = 'uint_mul'
     input_is_64bits = False
     reg_containing_result = eax
     unsigned = True
-    emit = staticmethod(I386CodeBuilder.MUL)
+    def generate2(self, allocator):
+        op2 = allocator.get_operand(self.y)
+        allocator.mc.MUL(op2)
 
 class OpUIntFloorDiv(MulOrDivOp):
     opname = 'uint_floordiv'
     input_is_64bits = True
     reg_containing_result = eax
     unsigned = True
-    emit = staticmethod(I386CodeBuilder.DIV)
+    def generate2(self, allocator):
+        op2 = allocator.get_operand(self.y)
+        allocator.mc.DIV(op2)
 
 class OpUIntMod(MulOrDivOp):
     opname = 'uint_mod'
     input_is_64bits = True
     reg_containing_result = edx
     unsigned = True
-    emit = staticmethod(I386CodeBuilder.DIV)
+    def generate2(self, allocator):
+        op2 = allocator.get_operand(self.y)
+        allocator.mc.DIV(op2)
 
-class OpIntLShift(Op2):
-    opname = 'int_lshift', 'uint_lshift'
-    emit = staticmethod(I386CodeBuilder.SHL)
-    def generate3(self, mc, dstop, op1, op2):
-        # XXX not optimized
+class OpShift(Op2):
+    side_effects = False
+    countmax31 = False
+
+    def mark_used_vars(self, allocator):
+        allocator.using_inplace(self.x, self)
+        allocator.using(self.y)
+        # XXX this would be nice
+        #if not self.countmax31:
+        #    allocator.suggests(self.y, ecx)
+
+    def generate(self, allocator):
+        op2 = allocator.get_operand(self.y)
+        mc = allocator.mc
         if isinstance(op2, IMM32):
             n = op2.value
             if n < 0 or n >= 32:
-                mc.MOV(dstop, imm8(0))   # shift out of range, result is zero
-                return
+                # shift out of range
+                if self.countmax31:
+                    n = 31   # case in which it's equivalent to a shift by 31
+                else:
+                    # case in which the result is always zero
+                    allocator.release(self.x)
+                    allocator.release(self.y)
+                    dstop = allocator.create_reg(self)
+                    mc.XOR(dstop, dstop)
+                    return
             count = imm8(n)
         else:
-            mc.MOV(ecx, op2)
+            allocator.clobber(ecx)
+            op2 = allocator.get_operand(self.y)
+            if self.countmax31:
+                mc.MOV(ecx, imm8(31))
+                mc.CMP(op2, ecx)
+                mc.CMOVBE(ecx, op2)
+            else:
+                mc.MOV(ecx, op2)
+            allocator.release(self.y)
             count = cl
-        if dstop != op1:
-            try:
-                mc.MOV(dstop, op1)
-            except FailedToImplement:
-                mc.PUSH(op1)
-                mc.POP(dstop)
+
+        if allocator.release(self.x):
+            dstop = allocator.get_operand(self.x)    # in-place operation
+            allocator.create_exactly_at(self, dstop)
+        else:
+            dstop = allocator.create_reg(self)
+            srcop = allocator.get_operand(self.x)
+            mc.MOV(dstop, srcop)    # make a copy in a new register
+
         self.emit(mc, dstop, count)
         if count == cl:
-            mc.CMP(ecx, imm8(32))
-            mc.SBB(ecx, ecx)
-            mc.AND(dstop, ecx)
+            if not self.countmax31:
+                mc.CMP(ecx, imm8(32))
+                mc.SBB(ecx, ecx)
+                mc.AND(dstop, ecx)
+            allocator.end_clobber(ecx)
 
-class OpIntRShift(Op2):
-    opname = 'int_rshift'
-    def generate3(self, mc, dstop, op1, op2):
-        # XXX not optimized
-        if isinstance(op2, IMM32):
-            n = op2.value
-            if n < 0 or n >= 32:
-                n = 31     # shift out of range, replace with 31
-            count = imm8(n)
-        else:
-            mc.MOV(ecx, imm(31))
-            mc.CMP(op2, ecx)
-            mc.CMOVBE(ecx, op2)
-            count = cl
-        if dstop != op1:
-            try:
-                mc.MOV(dstop, op1)
-            except FailedToImplement:
-                mc.PUSH(op1)
-                mc.POP(dstop)
-        mc.SAR(dstop, count)
+class OpIntLShift(OpShift):
+    opname = 'int_lshift', 'uint_lshift'
+    emit = staticmethod(I386CodeBuilder.SHL)
 
-class OpUIntRShift(OpIntLShift):
+class OpUIntRShift(OpShift):
     opname = 'uint_rshift'
     emit = staticmethod(I386CodeBuilder.SHR)
 
+class OpIntRShift(OpShift):
+    opname = 'int_rshift'
+    emit = staticmethod(I386CodeBuilder.SAR)
+    countmax31 = True
+
 class OpCompare2(Op2):
-    result_kind = RK_CC
+    side_effects = False
+
     def generate(self, allocator):
-        srcop = allocator.get_operand(self.x)
-        dstop = allocator.get_operand(self.y)
+        op1 = allocator.get_operand(self.x)
+        op2 = allocator.get_operand(self.y)
         mc = allocator.mc
-        # XXX optimize the case CMP(immed, reg-or-modrm)
+        cond = self.suggested_cc
         try:
-            mc.CMP(srcop, dstop)
+            mc.CMP(op1, op2)
         except FailedToImplement:
-            mc.MOV(ecx, srcop)
-            mc.CMP(ecx, dstop)
-    def copy(self):
-        return self.__class__(self.x, self.y)
+            # try reversing the arguments, for CMP(immed, reg-or-modrm)
+            try:
+                mc.CMP(op2, op1)
+            except FailedToImplement:
+                # CMP(stack, stack)
+                reg = allocator.create_scratch_reg(op1)
+                mc.CMP(reg, op2)
+                allocator.end_clobber(reg)
+            else:
+                cond = cond_swapargs(cond)    # worked with arguments reversed
+        allocator.release(self.x)
+        allocator.release(self.y)
+        allocator.create_in_cc(self, ccflags[cond])
 
 class OpIntLt(OpCompare2):
     opname = 'int_lt', 'char_lt'
-    cc_result = Conditions['L']
+    suggested_cc = Conditions['L']
 
 class OpIntLe(OpCompare2):
     opname = 'int_le', 'char_le'
-    cc_result = Conditions['LE']
+    suggested_cc = Conditions['LE']
 
 class OpIntEq(OpCompare2):
     opname = 'int_eq', 'char_eq', 'unichar_eq', 'ptr_eq', 'uint_eq'
-    cc_result = Conditions['E']
+    suggested_cc = Conditions['E']
 
 class OpIntNe(OpCompare2):
     opname = 'int_ne', 'char_ne', 'unichar_ne', 'ptr_ne', 'uint_ne'
-    cc_result = Conditions['NE']
+    suggested_cc = Conditions['NE']
 
 class OpIntGt(OpCompare2):
     opname = 'int_gt', 'char_gt'
-    cc_result = Conditions['G']
+    suggested_cc = Conditions['G']
 
 class OpIntGe(OpCompare2):
     opname = 'int_ge', 'char_ge'
-    cc_result = Conditions['GE']
+    suggested_cc = Conditions['GE']
 
 class OpUIntLt(OpCompare2):
     opname = 'uint_lt'
-    cc_result = Conditions['B']
+    suggested_cc = Conditions['B']
 
 class OpUIntLe(OpCompare2):
     opname = 'uint_le'
-    cc_result = Conditions['BE']
+    suggested_cc = Conditions['BE']
 
 class OpUIntGt(OpCompare2):
     opname = 'uint_gt'
-    cc_result = Conditions['A']
+    suggested_cc = Conditions['A']
 
 class OpUIntGe(OpCompare2):
     opname = 'uint_ge'
-    cc_result = Conditions['AE']
+    suggested_cc = Conditions['AE']
 
 class JumpIf(Operation):
     clobbers_cc = False
-    result_kind = RK_NO_RESULT
-    def __init__(self, gv_condition, targetbuilder, negate):
+    negate = False
+    def __init__(self, gv_condition, targetbuilder):
         self.gv_condition = gv_condition
         self.targetbuilder = targetbuilder
-        self.negate = negate
-    def allocate(self, allocator):
-        allocator.using_cc(self.gv_condition)
+    def mark_used_vars(self, allocator):
+        allocator.using(self.gv_condition)
         for gv in self.targetbuilder.inputargs_gv:
             allocator.using(gv)
     def generate(self, allocator):
-        cc = self.gv_condition.cc_result
+        targetbuilder = self.targetbuilder
+        op = allocator.get_operand(self.gv_condition)
+        mc = allocator.mc
+        if isinstance(op, CCFLAG):
+            cc = op.cc
+        else:
+            allocator.clobber_cc()
+            op = allocator.get_operand(self.gv_condition)
+            mc.CMP(op, imm(0))
+            cc = Conditions['NE']
+        allocator.release(self.gv_condition)
+        operands = []
+        for gv in targetbuilder.inputargs_gv:
+            operands.append(allocator.get_operand(gv))
+            allocator.release(gv)
         if self.negate:
             cc = cond_negate(cc)
-        mc = allocator.mc
-        targetbuilder = self.targetbuilder
         targetbuilder.set_coming_from(mc, insncond=cc)
-        targetbuilder.inputoperands = [allocator.get_operand(gv)
-                                       for gv in targetbuilder.inputargs_gv]
+        targetbuilder.inputoperands = operands
+        #assert targetbuilder.inputoperands.count(ebx) <= 1
+
+class JumpIfNot(JumpIf):
+    negate = True
 
 class OpLabel(Operation):
-    clobbers_cc = False
-    result_kind = RK_NO_RESULT
+    # NB. this is marked to clobber the CC, because it cannot easily
+    #     be saved/restored across a label.  The problem is that someone
+    #     might later try to jump to this label with a new value for
+    #     the variable that is different from 0 or 1, i.e. which cannot
+    #     be represented in the CC at all.
     def __init__(self, lbl, args_gv):
         self.lbl = lbl
         self.args_gv = args_gv
-    def allocate(self, allocator):
+    def mark_used_vars(self, allocator):
         for v in self.args_gv:
             allocator.using(v)
     def generate(self, allocator):
+        operands = []
+        for v in self.args_gv:
+            operands.append(allocator.get_operand(v))
+            allocator.release(v)
         lbl = self.lbl
         lbl.targetaddr = allocator.mc.tell()
-        lbl.targetstackdepth = allocator.required_frame_depth
-        lbl.inputoperands = [allocator.get_operand(v) for v in self.args_gv]
+        lbl.inputoperands = operands
         lbl.targetbuilder = None    # done generating
 
 class OpCall(Operation):
@@ -523,55 +598,57 @@
         self.sigtoken = sigtoken
         self.gv_fnptr = gv_fnptr
         self.args_gv = args_gv
-    def allocate(self, allocator):
-        # XXX try to use eax for the result
+
+    def mark_used_vars(self, allocator):
         allocator.using(self.gv_fnptr)
         for v in self.args_gv:
             allocator.using(v)
+
     def generate(self, allocator):
-        try:
-            dstop = allocator.get_operand(self)
-        except KeyError:
-            dstop = None
         mc = allocator.mc
-        stack_align_words = PROLOGUE_FIXED_WORDS
-        if dstop != eax:
-            mc.PUSH(eax)
-            if CALL_ALIGN > 1: stack_align_words += 1
-        if dstop != edx:
-            mc.PUSH(edx)
-            if CALL_ALIGN > 1: stack_align_words += 1
         args_gv = self.args_gv
-        num_placeholders = 0
-        if CALL_ALIGN > 1:
-            stack_align_words += len(args_gv)
-            stack_align_words &= CALL_ALIGN-1
-            if stack_align_words > 0:
-                num_placeholders = CALL_ALIGN - stack_align_words
-                mc.SUB(esp, imm(WORD * num_placeholders))
-        for i in range(len(args_gv)-1, -1, -1):
+
+        stackargs_i = []
+        for i in range(len(args_gv)):
             srcop = allocator.get_operand(args_gv[i])
-            mc.PUSH(srcop)
+            if isinstance(srcop, MODRM):
+                stackargs_i.append(i)
+            else:
+                mc.MOV(mem(esp, WORD * i), srcop)
+                allocator.release(args_gv[i])
+
+        allocator.clobber3(eax, edx, ecx)
+        allocator.reserve_extra_stack(len(args_gv))
+
+        if len(stackargs_i) > 0:
+            tmp = eax
+            for i in stackargs_i:
+                srcop = allocator.get_operand(args_gv[i])
+                mc.MOV(tmp, srcop)
+                mc.MOV(mem(esp, WORD * i), tmp)
+                allocator.release(args_gv[i])
+
         fnop = allocator.get_operand(self.gv_fnptr)
         if isinstance(fnop, IMM32):
             mc.CALL(rel32(fnop.value))
         else:
             mc.CALL(fnop)
-        mc.ADD(esp, imm(WORD * (len(args_gv) + num_placeholders)))
-        if dstop != edx:
-            mc.POP(edx)
-        if dstop != eax:
-            if dstop is not None:
-                mc.MOV(dstop, eax)
-            mc.POP(eax)
 
-def field_operand(mc, base, fieldtoken):
-    # may use ecx
+        allocator.release(self.gv_fnptr)
+        allocator.end_clobber(eax)
+        allocator.end_clobber(edx)
+        allocator.end_clobber(ecx)
+        if allocator.operation_result_is_used(self):
+            allocator.create_exactly_at(self, eax)
+
+
+def field_operand(allocator, base, fieldtoken):
     fieldoffset, fieldsize = fieldtoken
 
     if isinstance(base, MODRM):
-        mc.MOV(ecx, base)
-        base = ecx
+        tmp = allocator.create_scratch_reg(base)
+        allocator.end_clobber(tmp)
+        base = tmp
     elif isinstance(base, IMM32):
         fieldoffset += base.value
         base = None
@@ -581,8 +658,8 @@
     else:
         return mem (base, fieldoffset)
 
-def array_item_operand(mc, base, arraytoken, opindex):
-    # may use ecx
+def array_item_operand(allocator, base, arraytoken, opindex):
+    tmp = None
     _, startoffset, itemoffset = arraytoken
 
     if isinstance(opindex, IMM32):
@@ -591,28 +668,31 @@
         indexshift = 0
     elif itemoffset in SIZE2SHIFT:
         if not isinstance(opindex, REG):
-            mc.MOV(ecx, opindex)
-            opindex = ecx
+            tmp = allocator.create_scratch_reg(opindex)
+            opindex = tmp
         indexshift = SIZE2SHIFT[itemoffset]
     else:
-        mc.IMUL(ecx, opindex, imm(itemoffset))
-        opindex = ecx
+        tmp = allocator.create_scratch_reg()
+        allocator.mc.IMUL(tmp, opindex, imm(itemoffset))
+        opindex = tmp
         indexshift = 0
 
-    assert base is not ecx
     if isinstance(base, MODRM):
-        if opindex != ecx:
-            mc.MOV(ecx, base)
-        else:   # waaaa
+        if tmp is None:
+            tmp = allocator.create_scratch_reg(base)
+        else:   # let's avoid using two scratch registers
             opindex = None
             if indexshift > 0:
-                mc.SHL(ecx, imm8(indexshift))
-            mc.ADD(ecx, base)
-        base = ecx
+                allocator.mc.SHL(tmp, imm8(indexshift))
+            allocator.mc.ADD(tmp, base)
+        base = tmp
     elif isinstance(base, IMM32):
         startoffset += base.value
         base = None
 
+    if tmp is not None:
+        allocator.end_clobber(tmp)
+
     if itemoffset == 1:
         return memSIB8(base, opindex, indexshift, startoffset)
     else:
@@ -620,209 +700,141 @@
 
 class OpComputeSize(Operation):
     clobbers_cc = False
+    side_effects = False
     def __init__(self, varsizealloctoken, gv_length):
         self.varsizealloctoken = varsizealloctoken
         self.gv_length = gv_length
-    def allocate(self, allocator):
+    def mark_used_vars(self, allocator):
         allocator.using(self.gv_length)
     def generate(self, allocator):
-        dstop = allocator.get_operand(self)
         srcop = allocator.get_operand(self.gv_length)
-        mc = allocator.mc
-        op_size = array_item_operand(mc, None, self.varsizealloctoken, srcop)
-        try:
-            mc.LEA(dstop, op_size)
-        except FailedToImplement:
-            mc.LEA(ecx, op_size)
-            mc.MOV(dstop, ecx)
-
-def hard_store(mc, opmemtarget, opvalue, itemsize):
-    # For the possibly hard cases of stores
-    # Generates a store to 'opmemtarget' of size 'itemsize' == 1, 2 or 4.
-    # If it is 1, opmemtarget must be a MODRM8; otherwise, it must be a MODRM.
-    if itemsize == WORD:
-        try:
-            mc.MOV(opmemtarget, opvalue)
-        except FailedToImplement:
-            if opmemtarget.involves_ecx():
-                mc.PUSH(opvalue)
-                mc.POP(opmemtarget)
-            else:
-                mc.MOV(ecx, opvalue)
-                mc.MOV(opmemtarget, ecx)
-    else:
-        must_pop_eax = False
-        if itemsize == 1:
-            if isinstance(opvalue, REG) and opvalue.lowest8bits:
-                # a register whose lower 8 bits are directly readable
-                opvalue = opvalue.lowest8bits
-            elif isinstance(opvalue, IMM8):
-                pass
-            else:
-                if opmemtarget.involves_ecx():    # grumble!
-                    mc.PUSH(eax)
-                    must_pop_eax = True
-                    scratch = eax
-                else:
-                    scratch = ecx
-                if opvalue.width == 1:
-                    mc.MOV(scratch.lowest8bits, opvalue)
-                else:
-                    mc.MOV(scratch, opvalue)
-                opvalue = scratch.lowest8bits
+        op_size = array_item_operand(allocator, None,
+                                     self.varsizealloctoken, srcop)
+        allocator.release(self.gv_length)
+        dstop = allocator.create_reg(self)
+        allocator.mc.LEA(dstop, op_size)
+
+class OpGetter(Operation):
+    side_effects = False
+    def generate(self, allocator):
+        opsource = self.generate_opsource(allocator)
+        dstop = allocator.create_reg(self)
+        if self.getwidth() == WORD:
+            allocator.mc.MOV(dstop, opsource)
         else:
-            assert itemsize == 2
-            if isinstance(opvalue, MODRM) or type(opvalue) is IMM32:
-                # no support for now to encode 16-bit immediates,
-                # so we use a scratch register for this case too
-                if opmemtarget.involves_ecx():    # grumble!
-                    mc.PUSH(eax)
-                    must_pop_eax = True
-                    scratch = eax
-                else:
-                    scratch = ecx
-                mc.MOV(scratch, opvalue)
-                opvalue = scratch
-            mc.o16()    # prefix for the MOV below
-        # and eventually, the real store:
-        mc.MOV(opmemtarget, opvalue)
-        if must_pop_eax:
-            mc.POP(eax)
-
-def hard_load(mc, opdst, opmemsource, itemsize):
-    # For the possibly hard cases of stores
-    # Generates a load from 'opmemsource' of size 'itemsize' == 1, 2 or 4.
-    # If it is 1, opmemtarget must be a MODRM8; otherwise, it must be a MODRM.
-    if itemsize == WORD:
-        try:
-            mc.MOV(opdst, opmemsource)
-        except FailedToImplement:               # opdst is a MODRM
-            if opmemsource.involves_ecx():
-                mc.PUSH(opmemsource)
-                mc.POP(opdst)
-            else:
-                mc.MOV(ecx, opmemsource)
-                mc.MOV(opdst, ecx)
-    else:
-        try:
-            mc.MOVZX(opdst, opmemsource)
-        except FailedToImplement:               # opdst is a MODRM
-            if opmemsource.involves_ecx():
-                mc.PUSH(eax)
-                mc.MOVZX(eax, opmemsource)
-                mc.MOV(opdst, eax)
-                mc.POP(eax)
+            allocator.mc.MOVZX(dstop, opsource)
+
+class OpSetter(Operation):
+    def generate(self, allocator):
+        tmpval = None
+        width = self.getwidth()
+        opvalue = allocator.get_operand(self.gv_value)
+        if width == 1:
+            try:
+                opvalue = opvalue.lowest8bits()
+            except ValueError:
+                tmpval = allocator.create_scratch_reg8(opvalue)
+                opvalue = tmpval
             else:
-                mc.MOVZX(ecx, opmemsource)
-                mc.MOV(opdst, ecx)
+                if isinstance(opvalue, MODRM8):
+                    tmpval = allocator.create_scratch_reg8(opvalue)
+                    opvalue = tmpval
+        else:
+            if isinstance(opvalue, MODRM):
+                tmpval = allocator.create_scratch_reg(opvalue)
+                opvalue = tmpval
+        optarget = self.generate_optarget(allocator)
+        if width == 2:
+            if isinstance(opvalue, IMM32):
+                opvalue = IMM16(opvalue.value)
+            allocator.mc.o16()
+        allocator.mc.MOV(optarget, opvalue)
+        if tmpval is not None:
+            allocator.end_clobber(tmpval)
 
-class OpGetField(Operation):
+class OpGetField(OpGetter):
     clobbers_cc = False
     def __init__(self, fieldtoken, gv_ptr):
         self.fieldtoken = fieldtoken
         self.gv_ptr = gv_ptr
-    def allocate(self, allocator):
+    def getwidth(self):
+        _, fieldsize = self.fieldtoken
+        return fieldsize
+    def mark_used_vars(self, allocator):
         allocator.using(self.gv_ptr)
-    def generate(self, allocator):
-        try:
-            dstop = allocator.get_operand(self)
-        except KeyError:
-            return    # result not used
+    def generate_opsource(self, allocator):
         opptr = allocator.get_operand(self.gv_ptr)
-        mc = allocator.mc
-        opsource = field_operand(mc, opptr, self.fieldtoken)
-        _, fieldsize = self.fieldtoken
-        hard_load(mc, dstop, opsource, fieldsize)
+        opsource = field_operand(allocator, opptr, self.fieldtoken)
+        allocator.release(self.gv_ptr)
+        return opsource
 
-class OpSetField(Operation):
+class OpSetField(OpSetter):
     clobbers_cc = False
-    result_kind = RK_NO_RESULT
     def __init__(self, fieldtoken, gv_ptr, gv_value):
         self.fieldtoken = fieldtoken
         self.gv_ptr   = gv_ptr
         self.gv_value = gv_value
-    def allocate(self, allocator):
+    def getwidth(self):
+        _, fieldsize = self.fieldtoken
+        return fieldsize
+    def mark_used_vars(self, allocator):
         allocator.using(self.gv_ptr)
         allocator.using(self.gv_value)
-    def generate(self, allocator):
+    def generate_optarget(self, allocator):
         opptr   = allocator.get_operand(self.gv_ptr)
-        opvalue = allocator.get_operand(self.gv_value)
-        mc = allocator.mc
-        optarget = field_operand(mc, opptr, self.fieldtoken)
-        _, fieldsize = self.fieldtoken
-        hard_store(mc, optarget, opvalue, fieldsize)
+        optarget = field_operand(allocator, opptr, self.fieldtoken)
+        allocator.release(self.gv_ptr)
+        allocator.release(self.gv_value)
+        return optarget
 
-class OpGetArrayItem(Operation):
+class OpGetArrayItem(OpGetter):
     def __init__(self, arraytoken, gv_array, gv_index):
         self.arraytoken = arraytoken
         self.gv_array = gv_array
         self.gv_index = gv_index
-    def allocate(self, allocator):
+    def getwidth(self):
+        _, _, itemsize = self.arraytoken
+        return itemsize
+    def mark_used_vars(self, allocator):
         allocator.using(self.gv_array)
         allocator.using(self.gv_index)
-    def generate(self, allocator):
-        try:
-            dstop = allocator.get_operand(self)
-        except KeyError:
-            return    # result not used
+    def generate_opsource(self, allocator):
         oparray = allocator.get_operand(self.gv_array)
         opindex = allocator.get_operand(self.gv_index)
-        mc = allocator.mc
-        opsource = array_item_operand(mc, oparray, self.arraytoken, opindex)
-        _, _, itemsize = self.arraytoken
-        hard_load(mc, dstop, opsource, itemsize)
+        opsource = array_item_operand(allocator, oparray,
+                                      self.arraytoken, opindex)
+        allocator.release(self.gv_array)
+        allocator.release(self.gv_index)
+        return opsource
+
+class OpGetArraySubstruct(OpGetArrayItem):
+    def generate(self, allocator):
+        opsource = self.generate_opsource(allocator)
+        dstop = allocator.create_reg(self)
+        allocator.mc.LEA(dstop, opsource)
 
-class OpSetArrayItem(Operation):
-    result_kind = RK_NO_RESULT
+class OpSetArrayItem(OpSetter):
     def __init__(self, arraytoken, gv_array, gv_index, gv_value):
         self.arraytoken = arraytoken
         self.gv_array = gv_array
         self.gv_index = gv_index
         self.gv_value = gv_value
-    def allocate(self, allocator):
-        allocator.using(self.gv_array)
-        allocator.using(self.gv_index)
-        allocator.using(self.gv_value)
-    def generate(self, allocator):
-        oparray = allocator.get_operand(self.gv_array)
-        opindex = allocator.get_operand(self.gv_index)
-        opvalue = allocator.get_operand(self.gv_value)
-        mc = allocator.mc
-        optarget = array_item_operand(mc, oparray, self.arraytoken, opindex)
+    def getwidth(self):
         _, _, itemsize = self.arraytoken
-        hard_store(mc, optarget, opvalue, itemsize)
-
-class OpGetArraySubstruct(Operation):
-    def __init__(self, arraytoken, gv_array, gv_index):
-        self.arraytoken = arraytoken
-        self.gv_array = gv_array
-        self.gv_index = gv_index
-    def allocate(self, allocator):
+        return itemsize
+    def mark_used_vars(self, allocator):
         allocator.using(self.gv_array)
         allocator.using(self.gv_index)
-    def generate(self, allocator):
-        try:
-            dstop = allocator.get_operand(self)
-        except KeyError:
-            return    # result not used
+        allocator.using(self.gv_value)
+    def generate_optarget(self, allocator):
         oparray = allocator.get_operand(self.gv_array)
         opindex = allocator.get_operand(self.gv_index)
-        mc = allocator.mc
-        opsource = array_item_operand(mc, oparray, self.arraytoken, opindex)
-        try:
-            mc.LEA(dstop, opsource)
-        except FailedToImplement:
-            mc.LEA(ecx, opsource)
-            mc.MOV(dstop, ecx)
-
-class OpGetFrameBase(Operation):
-    def generate(self, allocator):
-        try:
-            dstop = allocator.get_operand(self)
-        except KeyError:
-            return    # result not used
-        mc = allocator.mc
-        mc.MOV(dstop, ebp)
+        opsource = array_item_operand(allocator, oparray,
+                                      self.arraytoken, opindex)
+        allocator.release(self.gv_array)
+        allocator.release(self.gv_index)
+        allocator.release(self.gv_value)
+        return opsource
 
 # ____________________________________________________________
 
@@ -890,7 +902,88 @@
     assert 0 <= cond < INSN_JMP
     return cond ^ 1
 
+def cond_swapargs(cond):
+    return COND_SWAPARGS[cond]
+
+COND_SWAPARGS = range(16)
+COND_SWAPARGS[Conditions['L']]  = Conditions['G']
+COND_SWAPARGS[Conditions['G']]  = Conditions['L']
+COND_SWAPARGS[Conditions['NL']] = Conditions['NG']
+COND_SWAPARGS[Conditions['NG']] = Conditions['NL']
+COND_SWAPARGS[Conditions['B']]  = Conditions['A']
+COND_SWAPARGS[Conditions['A']]  = Conditions['B']
+COND_SWAPARGS[Conditions['NB']] = Conditions['NA']
+COND_SWAPARGS[Conditions['NA']] = Conditions['NB']
+
 SIZE2SHIFT = {1: 0,
               2: 1,
               4: 2,
               8: 3}
+
+# ____________________________________________________________
+
+class CCFLAG(OPERAND):
+    _attrs_ = ['cc', 'SETCOND', 'load_into_cc']
+    def __init__(self, cond, load_into_cc):
+        self.cond = cond
+        self.cc = Conditions[cond]
+        self.SETCOND = getattr(I386CodeBuilder, 'SET' + cond)
+        self.load_into_cc = load_into_cc
+
+    def assembler(self):
+        return self.cond
+
+
+def load_into_cc_lt(mc, srcop):
+    mc.XOR(ecx, ecx)
+    mc.CMP(ecx, srcop)
+
+def load_into_cc_le(mc, srcop):
+    mc.MOV(ecx, imm8(1))
+    mc.CMP(ecx, srcop)
+
+def load_into_cc_eq(mc, srcop):
+    mc.CMP(srcop, imm8(1))
+
+def load_into_cc_ne(mc, srcop):
+    mc.CMP(srcop, imm8(0))
+
+load_into_cc_gt = load_into_cc_ne
+load_into_cc_ge = load_into_cc_eq
+
+ccflag_lt = CCFLAG('L',  load_into_cc_lt)
+ccflag_le = CCFLAG('LE', load_into_cc_le)
+ccflag_eq = CCFLAG('E',  load_into_cc_eq)
+ccflag_ne = CCFLAG('NE', load_into_cc_ne)
+ccflag_gt = CCFLAG('G',  load_into_cc_gt)
+ccflag_ge = CCFLAG('GE', load_into_cc_ge)
+
+ccflag_ult = CCFLAG('B',  load_into_cc_lt)
+ccflag_ule = CCFLAG('BE', load_into_cc_le)
+ccflag_ugt = CCFLAG('A',  load_into_cc_gt)
+ccflag_uge = CCFLAG('AE', load_into_cc_ge)
+
+ccflags = [None] * 16
+ccflags[Conditions['L']]  = ccflag_lt
+ccflags[Conditions['LE']] = ccflag_le
+ccflags[Conditions['E']]  = ccflag_eq
+ccflags[Conditions['NE']] = ccflag_ne
+ccflags[Conditions['G']]  = ccflag_gt
+ccflags[Conditions['GE']] = ccflag_ge
+ccflags[Conditions['B']]  = ccflag_ult
+ccflags[Conditions['BE']] = ccflag_ule
+ccflags[Conditions['A']]  = ccflag_ugt
+ccflags[Conditions['AE']] = ccflag_uge
+
+##def ccmov(mc, dstop, ccop):
+##    XXX
+##    if dstop != ccop:
+##        ccop.SETCOND(mc, cl)
+##        if isinstance(dstop, CCFLAG):
+##            dstop.load_into_cc(mc, cl)
+##        else:
+##            try:
+##                mc.MOVZX(dstop, cl)
+##            except FailedToImplement:
+##                mc.MOVZX(ecx, cl)
+##                mc.MOV(dstop, ecx)

Modified: pypy/branch/new-jit-codegen/i386/regalloc.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/i386/regalloc.py	(original)
+++ pypy/branch/new-jit-codegen/i386/regalloc.py	Wed Feb  7 13:44:55 2007
@@ -2,6 +2,7 @@
 
 """
 
+import sys
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rpython.lltypesystem import lltype
 from pypy.jit.codegen.i386.operation import *
@@ -10,7 +11,7 @@
 
 
 class StackOpCache:
-    INITIAL_STACK_EBP_OFS = -4
+    INITIAL_STACK_EBP_OFS = -1
 stack_op_cache = StackOpCache()
 stack_op_cache.lst = []
 
@@ -27,276 +28,541 @@
     ofs = op.ofs_relative_to_ebp()
     return StackOpCache.INITIAL_STACK_EBP_OFS - ofs / WORD
 
+def write_stack_reserve(mc, stackn):
+    addr = mc.tell()
+    offset = WORD * ((StackOpCache.INITIAL_STACK_EBP_OFS+1) - stackn)
+    mc.ADD(esp, IMM32(offset))     # always encode offset on 32 bits
+    return addr
+
+def write_stack_adj(mc, stackn):
+    addr = mc.tell()
+    offset = WORD * ((StackOpCache.INITIAL_STACK_EBP_OFS+1) - stackn)
+    mc.LEA(esp, fixedsize_esp_ofs(offset))
+    return addr
+
 
 class RegAllocator(object):
-    AVAILABLE_REGS = [eax, edx, ebx, esi, edi]   # XXX ecx reserved for stuff
 
-    # 'gv' -- GenVars, used as arguments and results of operations
-    #
-    # 'loc' -- location, a small integer that represents an abstract
-    #          register number
-    #
-    # 'operand' -- a concrete machine code operand, which can be a
-    #              register (ri386.eax, etc.) or a stack memory operand
-
-    def __init__(self):
-        self.nextloc = 0
-        self.var2loc = {}
-        self.available_locs = []
-        self.force_loc2operand = {}
-        self.force_operand2loc = {}
-        self.initial_moves = []
-        self.num_stack_locs = 0
+    def __init__(self, operations):
+        self.operations = operations
+        self.operationindex = len(operations)
+        self.lifetime = {}                 # {variable: operation_index}
+        self.suggested_location = {}       # {variable: location}
+        self.var2loc = {gv_frame_base: ebp}
+
+    # ----------
 
     def set_final(self, final_vars_gv):
         for v in final_vars_gv:
             self.using(v)
 
-    def creating(self, v):
-        try:
-            loc = self.var2loc[v]
-        except KeyError:
-            pass
-        else:
-            if loc >= self.num_stack_locs:
-                self.available_locs.append(loc) # now available again for reuse
-
-    def using(self, v):
-        if not v.is_const and v not in self.var2loc:
-            try:
-                loc = self.available_locs.pop()
-            except IndexError:
-                loc = self.nextloc
-                self.nextloc += 1
-            self.var2loc[v] = loc
-
-    def creating_cc(self, v):
-        if self.need_var_in_cc is v:
-            # common case: v is a compare operation whose result is precisely
-            # what we need to be in the CC
-            self.need_var_in_cc = None
-        self.creating(v)
-
-    def save_cc(self):
-        # we need a value to be in the CC, but we see a clobbering
-        # operation, so we copy the original CC-creating operation down
-        # past the clobbering operation.
-        # <pedronis> completely obscure code
-        # <arigo> yes, well, needs very careful reviewing I guess :-)
-        v = self.need_var_in_cc
-        if not we_are_translated():
-            assert v in self.operations[:self.operationindex]
-        v = v.copy()
-        self.operations.insert(self.operationindex, v)
-        v.allocate(self)
-        self.need_var_in_cc = None
-
-    def using_cc(self, v):
-        assert isinstance(v, Operation)
-        assert 0 <= v.cc_result < INSN_JMP
-        if self.need_var_in_cc is not None and self.need_var_in_cc is not v:
-            self.save_cc()
-        self.need_var_in_cc = v
+    def set_final_at_loc(self, final_vars_gv, locations):
+        for i in range(len(final_vars_gv)):
+            v = final_vars_gv[i]
+            self.using(v)
+            self.suggested_location[v] = locations[i]
 
-    def allocate_locations(self, operations):
-        # assign locations to gvars
-        self.operations = operations
-        self.need_var_in_cc = None
-        self.operationindex = len(operations)
-        for i in range(len(operations)-1, -1, -1):
-            v = operations[i]
-            if (self.need_var_in_cc is not None and
-                self.need_var_in_cc is not v and v.clobbers_cc):
-                self.save_cc()
-            kind = v.result_kind
-            if kind == RK_WORD:
-                self.creating(v)
-            elif kind == RK_CC:
-                self.creating_cc(v)
-            v.allocate(self)
+    def compute_lifetimes(self):
+        for i in range(len(self.operations)-1, -1, -1):
             self.operationindex = i
-        if self.need_var_in_cc is not None:
-            self.save_cc()
+            op = self.operations[i]
+            if not op.side_effects and op not in self.lifetime:
+                self.operations[i] = dead_operation   # operation not used
+            else:
+                op.mark_used_vars(self)
+
+    def using(self, v):
+        if v.is_const or v in self.lifetime:
+            return False
+        else:
+            self.lifetime[v] = self.operationindex
+            return True    # variable is dying here
 
-    def force_var_operands(self, force_vars, force_operands, at_start):
-        force_loc2operand = self.force_loc2operand
-        force_operand2loc = self.force_operand2loc
-        for i in range(len(force_vars)):
-            v = force_vars[i]
-            operand = force_operands[i]
+    def using_inplace(self, v, vtarget):
+        if self.using(v):
+            # this operation 'vtarget' can modify its argument 'v'
+            # in-place, and 'v' is not alive after the operation.
+            # Propagate the suggested location for 'vtarget' backwards to 'v'.
             try:
-                loc = self.var2loc[v]
+                self.suggested_location[v] = self.suggested_location[vtarget]
+                return True    # got a suggestion
             except KeyError:
-                if at_start:
-                    pass    # input variable not used anyway
-                else:
-                    self.add_final_move(v, operand, make_copy=v.is_const)
+                pass
+        return False    # got no suggestion
+
+    def suggests(self, v, loc):
+        self.suggested_location[v] = loc
+
+    def varsused(self):
+        return self.lifetime.keys()
+
+    # ----------
+
+    AVAILABLE_REGS = (eax.bitmask |
+                      edx.bitmask |
+                      ecx.bitmask |
+                      ebx.bitmask |
+                      esi.bitmask |
+                      edi.bitmask)
+
+    def init_reg_alloc(self, inputvars_gv, inputlocations):
+        self.registers_free = self.AVAILABLE_REGS     # bitmask
+        self.cc_used_by = None
+        self.stack_op_used = {}
+        self.nstackidx = 0
+        self.nstackmax = 0
+        self.vars_in_use = {}     # {variable: dying_operation_index}
+        self.operationindex = 0
+        self.inputvars_gv = inputvars_gv
+        self.inputlocations = inputlocations
+
+    def force_loc_used(self, v, loc):
+        if isinstance(loc, MODRM):
+            assert loc not in self.stack_op_used
+            self.stack_op_used[loc] = None
+            n = stack_n_from_op(loc)
+            if n >= self.nstackmax:
+                self.nstackmax = n + 1
+        elif isinstance(loc, REG):
+            assert self.registers_free & loc.bitmask
+            self.registers_free &= ~loc.bitmask
+        elif isinstance(loc, CCFLAG):
+            self.cc_used_by = v
+        else:
+            raise AssertionError(loc)
+
+    def consume_loc(self, v, loc):
+        if isinstance(loc, MODRM):
+            if loc not in self.stack_op_used:
+                self.stack_op_used[loc] = None
+                return True
+        elif isinstance(loc, REG):
+            if self.registers_free & loc.bitmask:
+                self.registers_free &= ~loc.bitmask
+                return True
+        elif isinstance(loc, CCFLAG):
+            if self.cc_used_by is None:
+                self.cc_used_by = v
+                return True
+        return False
+
+    def _no_longer_in_use(self, v):
+        del self.vars_in_use[v]
+        loc = self.var2loc[v]
+        if isinstance(loc, CCFLAG):
+            assert self.cc_used_by is v
+        self._mark_loc_as_free(loc)
+
+    def _mark_loc_as_free(self, loc):
+        if isinstance(loc, MODRM):
+            del self.stack_op_used[loc]
+        elif isinstance(loc, REG):
+            self.registers_free |= loc.bitmask
+        elif isinstance(loc, CCFLAG):
+            self.cc_used_by = None
+
+    def generate_operations(self, mc):
+        if not we_are_translated():
+            print
+
+        # reserve locations for the inputvars
+        for i in range(len(self.inputvars_gv)):
+            v = self.inputvars_gv[i]
+            if v in self.lifetime:   # else: input argument is not used
+                loc = self.inputlocations[i]
+                self.var2loc[v] = loc
+                self.vars_in_use[v] = self.lifetime[v]
+                self.force_loc_used(v, loc)
+                if not we_are_translated():
+                    print 'in %20s:  %s' % (loc, short(v))
+
+        self._check()
+        self.mc = mc
+        # Generate all operations.
+        # Actual registers or stack locations are allocated as we go.
+        for i in range(len(self.operations)):
+            op = self.operations[i]
+            if op.clobbers_cc:
+                self.clobber_cc()
+                self._check()
+            op.generate(self)
+            if not we_are_translated():
+                self._showprogress()
+            self.operationindex = i + 1
+
+    def _showprogress(self):
+        class Collector:
+            def __init__(self):
+                self.lst = []
+                self.using = self.lst.append
+            def using_inplace(self, v, _):
+                self.lst.append(v)
+            def suggests(self, v, loc):
+                pass
+        col = Collector()
+        i = self.operationindex
+        op = self.operations[i]
+        op.mark_used_vars(col)
+        args = [short(v) for v in col.lst]
+        args = ', '.join(args)
+        print ' | %20s:  %s (%s)' % (self.var2loc.get(op, ''),
+                                     short(op), args)
+        for v, endtime in self.vars_in_use.items():
+            assert endtime > i
+        self._check()
+
+    def _use_another_stack_loc(self):
+        for i in range(self.nstackidx, self.nstackmax):
+            loc = stack_op(i)
+            if loc not in self.stack_op_used:
+                self.nstackidx = i + 1
+                break
+        else:
+            for i in range(self.nstackidx):
+                loc = stack_op(i)
+                if loc not in self.stack_op_used:
+                    self.nstackidx = i + 1
+                    break
             else:
-                # we need to make of copy of this var if we have conflicting
-                # requirements about where it should go:
-                #  * its location is forced to another operand
-                #  * the operand is assigned to another location
-                #  * it should be in the stack, but it is not
-                if (loc in force_loc2operand or operand in force_operand2loc or
-                    (loc < self.num_stack_locs and not (
-                                 isinstance(operand, MODRM)
-                                 and operand.is_relative_to_ebp()))):
-                    if at_start:
-                        self.initial_moves.append((loc, operand))
-                    else:
-                        self.add_final_move(v, operand, make_copy=True)
-                else:
-                    force_loc2operand[loc] = operand
-                    force_operand2loc[operand] = loc
-
-    def add_final_move(self, v, targetoperand, make_copy):
-        if make_copy:
-            v = OpSameAs(v)
-            self.operations.append(v)
-        loc = self.nextloc
-        self.nextloc += 1
+                i = self.nstackidx = self.nstackmax
+                self.nstackmax = i + 1
+                loc = stack_op(i)
+                assert loc not in self.stack_op_used
+        self.stack_op_used[loc] = None
+        return loc
+
+    def reserve_extra_stack(self, extra):
+        max = self.nstackmax
+        base = max - extra
+        if base < 0:
+            base = 0
+        while max > base and stack_op(max-1) not in self.stack_op_used:
+            max -= 1
+        self.nstackmax = max + extra
+
+    def get_operand(self, v):
+        if v.is_const:
+            return imm(v.revealconst(lltype.Signed))
+        else:
+            return self.var2loc[v]
+
+    def _use_next_modrm(self, v, regnum_must_be_before=8):
+        """Select the next mod/rm location to use for the new operation 'v'.
+        If 'v' is None, this will always return a register; else it might
+        decide to immediately create 'v' in a stack location.
+        """
+        #print self.registers_free
+        if self.registers_free:
+            for i in range(regnum_must_be_before-1, -1, -1):
+                if self.registers_free & (1 << i):
+                    self.registers_free &= ~ (1 << i)
+                    return registers[i]
+        # spill the register holding the variable that has the longest
+        # time remaining to live (it may be our 'v' itself)
+        if v is None:
+            dyinglimit = self.operationindex  # must pick vars dying after that
+            spillvar = None
+        else:
+            dyinglimit = self.lifetime[v]
+            spillvar = v  # initial guess, can be overridden in the loop below
+        regloc = None
+        for v1, dying in self.vars_in_use.iteritems():
+            if dying > dyinglimit:
+                loc = self.var2loc[v1]
+                if not isinstance(loc, REG):
+                    continue
+                if loc.op >= regnum_must_be_before:
+                    continue   # never reached if regnum_must_be_before == 8
+                regloc = loc
+                dyinglimit = dying
+                spillvar = v1
+        if spillvar is None:
+            raise OutOfRegistersError
+        #print 'time span of %s: now is %d, lives until %d' % (
+        #    v, self.operationindex, self.lifetime[v])
+        if spillvar is v:
+            return self._use_another_stack_loc()
+        else:
+            assert regloc is not None
+            self._spill(spillvar, regloc)
+            return regloc
+
+    def _spill(self, spillvar, oldloc):
+        spillloc = self._use_another_stack_loc()
+        if not we_are_translated():
+            print ' # %20s:  SPILL %s' % (spillloc, oldloc)
+        self.mc.MOV(spillloc, oldloc)
+        self.var2loc[spillvar] = spillloc
+        return spillloc
+
+    def _use_next_reg(self):
+        return self._use_next_modrm(None)
+
+    def _use_next_reg_abcd(self):
+        return self._use_next_modrm(None, regnum_must_be_before=4)
+
+    def _created(self, v, loc):
+        assert v not in self.var2loc
+        self.vars_in_use[v] = ltime = self.lifetime[v]
+        assert ltime > self.operationindex
         self.var2loc[v] = loc
-        self.force_loc2operand[loc] = targetoperand
 
-    def allocate_registers(self):
-        # assign registers to locations that don't have one already
-        force_loc2operand = self.force_loc2operand
-        operands = []
-        seen_regs = 0
-        seen_stackn = {}
-        last_seen_stackn = -1
-        for op in force_loc2operand.values():
-            if isinstance(op, REG):
-                seen_regs |= 1 << op.op
-            elif isinstance(op, MODRM):
-                n = stack_n_from_op(op)
-                seen_stackn[n] = None
-                if n > last_seen_stackn:
-                    last_seen_stackn = n
-        i = 0
-        stackn = 0
-        num_stack_locs = self.num_stack_locs
-        for loc in range(self.nextloc):
-            try:
-                operand = force_loc2operand[loc]
-            except KeyError:
-                try:
-                    # try to grab the next free register,
-                    # unless this location is forced to go to the stack
-                    if loc < num_stack_locs:
-                        raise IndexError
-                    while True:
-                        operand = RegAllocator.AVAILABLE_REGS[i]
-                        i += 1
-                        if not (seen_regs & (1 << operand.op)):
-                            break
-                except IndexError:
-                    while stackn in seen_stackn:
-                        stackn += 1
-                    operand = stack_op(stackn)
-                    stackn += 1
-            operands.append(operand)
-        self.operands = operands
-        if stackn <= last_seen_stackn:
-            stackn = last_seen_stackn + 1
-        self.required_frame_depth = stackn
-
-    def get_operand(self, gv_source):
-        if gv_source.is_const:
-            return imm(gv_source.revealconst(lltype.Signed))
+    def release(self, v):
+        """Stop using argument 'v'.  Must be called for each used argument."""
+        ok = self.lastuse(v) and v in self.vars_in_use
+        if ok:
+            self._no_longer_in_use(v)
+        return ok
+
+    def lastuse(self, v):
+        """Is this the last time the argument 'v' is used?"""
+        if v.is_const:
+            return False
         else:
-            loc = self.var2loc[gv_source]
-            return self.operands[loc]
+            endtime = self.lifetime[v]
+            assert endtime >= self.operationindex
+            return endtime == self.operationindex
+
+    def create(self, v, suggested_loc=None):
+        """Create the result of the operation 'v', possibly at the
+        suggested location.  CAN SPILL ONE REGISTER."""
+        if suggested_loc is not None and self.consume_loc(v, suggested_loc):
+            self._created(v, suggested_loc)
+            return suggested_loc
+        suggested_loc = self.suggested_location.get(v, None)
+        if suggested_loc is not None and self.consume_loc(v, suggested_loc):
+            self._created(v, suggested_loc)
+            return suggested_loc
+        loc = self._use_next_modrm(v)
+        self._created(v, loc)
+        return loc
+
+    def create_reg(self, v):
+        """Create the result of the operation 'v' in any register
+        currently available.  CAN SPILL ONE REGISTER."""
+        suggested_loc = self.suggested_location.get(v, None)
+        if isinstance(suggested_loc, REG):
+            if self.consume_loc(v, suggested_loc):
+                self._created(v, suggested_loc)
+                return suggested_loc
+        loc = self._use_next_reg()
+        self._created(v, loc)
+        return loc
+
+    def create_exactly_at(self, v, loc):
+        """Create the result of the operation 'v' at 'loc'."""
+        ok = self.consume_loc(v, loc)
+        assert ok
+        self._created(v, loc)
+
+    def create_in_cc(self, v, ccloc):
+        """Create the result of the operation 'v' in the given cc flags.
+        Doesn't move stuff around."""
+        assert self.cc_used_by is None
+        self._created(v, ccloc)
+        self.cc_used_by = v
+
+    def create_scratch_reg(self, srcloc=None):
+        """Return a scratch register for the current operation.
+        Warning, this might be the same register as one of the input args.
+        CAN SPILL ONE REGISTER.  You must eventually call end_clobber()."""
+        reg = self._use_next_reg()
+        if srcloc is not None and reg is not srcloc:
+            self.mc.MOV(reg, srcloc)
+        return reg
+
+    def create_scratch_reg8(self, srcloc=None):
+        reg32 = self._use_next_reg_abcd()
+        reg8 = reg32.lowest8bits()
+        if srcloc is not None and reg8 is not srcloc and reg32 is not srcloc:
+            if srcloc.width == 1:
+                self.mc.MOV(reg8, srcloc)
+            else:
+                self.mc.MOV(reg32, srcloc)
+        return reg8
+
+    def operation_result_is_used(self, v):
+        return v in self.lifetime
+
+    def clobber(self, reg):
+        """Clobbers a register, i.e. move away a value that would be there.
+        It might go to a different register or to the stack.
+        You must eventually call end_clobber()."""
+        assert isinstance(reg, REG)
+        if not self.registers_free & reg.bitmask:
+            for v1 in self.vars_in_use:
+                if self.var2loc[v1] == reg:
+                    self._move_away(v1)
+                    break
+            assert self.registers_free & reg.bitmask
+        self.registers_free &= ~reg.bitmask
+
+    def clobber2(self, reg1, reg2):
+        """Clobbers two registers.  Unlike two individual clobber() calls,
+        where the first call might overwrite the other reg, this one
+        preserves the current content of both 'reg1' and 'reg2'.
+        You must eventually call end_clobber() twice."""
+        if not self.registers_free & reg2.bitmask:
+            # order trick: if reg2 is free but reg1 used, doing clobber() in
+            # the following order could first move reg1 to reg2, and then
+            # immediately away from reg2.
+            self.clobber(reg1)     # <- here reg1 cannot go to reg2
+            self.clobber(reg2)
+        else:
+            self.clobber(reg2)     # reg2 is free, so it doesn't go anywhere
+            self.clobber(reg1)
+
+    def clobber3(self, reg1, reg2, reg3):
+        if not self.registers_free & reg3.bitmask:
+            self.clobber2(reg1, reg2)    # they cannot go to reg3
+            self.clobber(reg3)
+        else:
+            self.clobber(reg3)           # free, so doesn't go anywhere
+            self.clobber2(reg1, reg2)
+
+    def end_clobber(self, reg):
+        assert isinstance(reg, REG)
+        self.registers_free |= reg.bitmask
+
+    def clobber_cc(self):
+        v = self.cc_used_by
+        if v is not None:
+            self.cc_used_by = None
+            # pick a newloc that is either one of [eax, ecx, edx, ebx]
+            # or a stack location
+            oldloc = self.var2loc[v]
+            newloc = self._use_next_modrm(v, regnum_must_be_before=4)
+            if not we_are_translated():
+                print ' # %20s:  MOVE AWAY FROM %s' % (newloc, oldloc)
+            assert isinstance(oldloc, CCFLAG)
+            mc = self.mc
+            newloc8 = newloc.lowest8bits()
+            if isinstance(newloc, REG):
+                oldloc.SETCOND(mc, newloc8)
+                mc.MOVZX(newloc, newloc8)
+            else:
+                mc.MOV(newloc, imm8(0))
+                oldloc.SETCOND(mc, newloc8)
+            self._mark_loc_as_free(oldloc)
+            self.var2loc[v] = newloc
+
+    def lock(self, loc):
+        """Temporarily prevent 'loc' from being overwritten by the
+        functions marked as 'moves stuff around'.  Return True if the
+        lock is sucessful, False if the location was not free in the
+        first place."""
+        return self.consume_loc(None, loc)
+
+    def unlock(self, loc):
+        """Call sometime after a lock() that returned True."""
+        self._mark_loc_as_free(loc)
+
+    def _move_away(self, v):
+        # move 'v' away, into a newly allocated register or stack location,
+        # possibly spilling another register
+        oldloc = self.var2loc[v]
+        newloc = self._use_next_modrm(v)
+        if not we_are_translated():
+            print ' # %20s:  MOVE AWAY FROM %s' % (newloc, oldloc)
+        self.mc.MOV(newloc, oldloc)
+        self._mark_loc_as_free(oldloc)
+        self.var2loc[v] = newloc
+        return newloc
 
-    def load_location_with(self, loc, gv_source):
-        dstop = self.operands[loc]
-        srcop = self.get_operand(gv_source)
-        if srcop != dstop:
-            self.mc.MOV(dstop, srcop)
-        return dstop
-
-    def generate_initial_moves(self):
-        initial_moves = self.initial_moves
-        # first make sure that the reserved stack frame is big enough
-        last_n = self.required_frame_depth - 1
-        for loc, srcoperand in initial_moves:
-            if isinstance(srcoperand, MODRM):
-                n = stack_n_from_op(srcoperand)
-                if last_n < n:
-                    last_n = n
-        if last_n >= 0:
-            if CALL_ALIGN > 1:
-                last_n = (last_n & ~(CALL_ALIGN-1)) + (CALL_ALIGN-1)
-            self.required_frame_depth = last_n + 1
-            self.mc.LEA(esp, stack_op(last_n))
+    def _check(self):
+        if not we_are_translated():
+            def unpackbitmask(x):
+                return dict.fromkeys([r for r in registers if x & r.bitmask])
+            rf = unpackbitmask(self.AVAILABLE_REGS)
+            locs_seen = {}
+            for v in self.vars_in_use:
+                loc = self.var2loc[v]
+                assert loc not in locs_seen
+                locs_seen[loc] = v
+                if isinstance(loc, REG):
+                    del rf[loc]
+            assert unpackbitmask(self.registers_free) == rf
+
+    # ----------
+
+    def generate_final_moves(self, final_vars_gv, locations):
         # XXX naive algo for now
-        for loc, srcoperand in initial_moves:
-            if self.operands[loc] != srcoperand:
-                self.mc.PUSH(srcoperand)
-        initial_moves.reverse()
-        for loc, srcoperand in initial_moves:
-            if self.operands[loc] != srcoperand:
-                self.mc.POP(self.operands[loc])
-
-    def randomize_stack(self):
-        import random
-        last_n = self.required_frame_depth - 1
-        for i in range(last_n+1, last_n+50):
-            self.mc.MOV(ecx, stack_op(i))
-            self.mc.LEA(ecx, mem(ecx, random.randrange(-sys.maxint,
-                                                       sys.maxint)))
-            self.mc.MOV(stack_op(i), ecx)
-        self.mc.LEA(ecx, mem(ecx, random.randrange(-sys.maxint,
-                                                   sys.maxint)))
-
-    def generate_operations(self):
-        for v in self.operations:
-            if DEBUG_STACK:
-                self.randomize_stack()
-            v.generate(self)
-            cc = v.cc_result
-            if cc >= 0 and v in self.var2loc:
-                # force a comparison instruction's result into a
-                # regular location
-                dstop = self.get_operand(v)
-                mc = self.mc
-                insn = EMIT_SETCOND[cc]
-                insn(mc, cl)
-                try:
-                    mc.MOVZX(dstop, cl)
-                except FailedToImplement:
-                    mc.MOVZX(ecx, cl)
-                    mc.MOV(dstop, ecx)
-        if DEBUG_STACK:
-            self.randomize_stack()
-
-    def force_stack_storage(self, lst):
-        # this is called at the very beginning, so the 'loc' numbers
-        # computed here are the smaller ones
-        N = 0
-        for v, place in lst:
-            self.using(v)
-            loc = self.var2loc[v]
-            if loc >= N:
-                N = loc + 1
-        self.num_stack_locs = N
-
-    def save_storage_places(self, lst):
-        for v, place in lst:
-            loc = self.var2loc[v]
-            operand = self.operands[loc]
-            place.offset = operand.ofs_relative_to_ebp()
+        pops = []
+        for i in range(len(final_vars_gv)):
+            v = final_vars_gv[i]
+            if not v.is_const:
+                srcloc = self.var2loc[v]
+                dstloc = locations[i]
+                if srcloc != dstloc:
+                    if not we_are_translated():
+                        print ' > %20s--->->->---%s' % (srcloc, dstloc)
+                    if isinstance(srcloc, CCFLAG):
+                        self.mc.PUSH(imm8(0))
+                        srcloc.SETCOND(self.mc, mem8(esp))
+                    else:
+                        self.mc.PUSH(srcloc)
+                    pops.append(dstloc)
+        while pops:
+            dstloc = pops.pop()
+            self.mc.POP(dstloc)
+        for i in range(len(final_vars_gv)):
+            v = final_vars_gv[i]
+            if v.is_const:
+                dstloc = locations[i]
+                self.mc.MOV(dstloc, imm(v.revealconst(lltype.Signed)))
+
+
+class OutOfRegistersError(Exception):
+    pass
 
+def short(op, memo={}):
+    key = op.__class__.__name__
+    d = memo.setdefault(key, {})
+    try:
+        n = d[op]
+    except KeyError:
+        n = d[op] = len(d)
+    return '%s-%d' % (key, n)
+
+# ____________________________________________________________
+
+class DeadOperation(Operation):
+    clobbers_cc = False
+    side_effects = False
+    def mark_used_vars(self, allocator):
+        pass
+    def generate(self, allocator):
+        pass
+dead_operation = DeadOperation()
+forget_stack_storage = DeadOperation()
 
-class StorageInStack(GenVar):
+class StorageInStack(Op1):
     """Place of a variable that must live in the stack.  Its position is
-    choosen by the register allocator and put in the 'stackn' attribute."""
-    offset = 0
+    choosen by the register allocator and put in the 'offset' attribute."""
+
+    def generate(self, allocator):
+        # patch the lifetime of the variable if needed (XXX a bit slow)
+        x = self.x
+        i = allocator.lifetime.get(x, allocator.operationindex)
+        operations = allocator.operations
+        while i < len(operations):
+            if operations[i] is forget_stack_storage:
+                break
+            i += 1
+        allocator.lifetime[x] = i
+        allocator.vars_in_use[x] = i
+        # force it to be in the stack
+        srcop = allocator.get_operand(x)
+        if not isinstance(srcop, MODRM):
+            oldop = srcop
+            srcop = allocator._spill(x, srcop)
+            allocator._mark_loc_as_free(oldop)
+        # record its location
+        self.offset = srcop.ofs_relative_to_ebp()
+        # for places, self.x would keep lots of other Operations alive
+        self.x = None
 
     def get_offset(self):
-        assert self.offset != 0     # otherwise, RegAllocator bug
         return self.offset
 
-
-class Place(StorageInStack):
-    pass
+gv_frame_base = GenVar()

Modified: pypy/branch/new-jit-codegen/i386/rgenop.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/i386/rgenop.py	(original)
+++ pypy/branch/new-jit-codegen/i386/rgenop.py	Wed Feb  7 13:44:55 2007
@@ -6,8 +6,10 @@
 from pypy.jit.codegen.model import ReplayBuilder, dummy_var
 from pypy.jit.codegen.i386.codebuf import CodeBlockOverflow
 from pypy.jit.codegen.i386.operation import *
-from pypy.jit.codegen.i386.regalloc import RegAllocator, StorageInStack, Place
-from pypy.jit.codegen.i386.regalloc import DEBUG_STACK
+from pypy.jit.codegen.i386.regalloc import RegAllocator
+from pypy.jit.codegen.i386.regalloc import DEBUG_STACK, forget_stack_storage
+from pypy.jit.codegen.i386.regalloc import gv_frame_base, StorageInStack
+from pypy.jit.codegen.i386.regalloc import write_stack_reserve, write_stack_adj
 from pypy.jit.codegen import conftest
 from pypy.rpython.annlowlevel import llhelper
 
@@ -84,16 +86,18 @@
 class FlexSwitch(CodeGenSwitch):
     REG = eax
 
-    def __init__(self, rgenop, inputargs_gv, inputoperands):
+    def __init__(self, rgenop, graphctx, inputargs_gv, inputoperands):
         self.rgenop = rgenop
+        self.graphctx = graphctx
         self.inputargs_gv = inputargs_gv
         self.inputoperands = inputoperands
         self.defaultcaseaddr = 0
 
     def initialize(self, mc):
+        self.graphctx.write_stack_adj(mc, initial=False)
         self._reserve(mc)
-        default_builder = Builder(self.rgenop, self.inputargs_gv,
-                                  self.inputoperands)
+        default_builder = Builder(self.rgenop, self.graphctx,
+                                  self.inputargs_gv, self.inputoperands)
         start = self.nextfreepos
         end   = self.endfreepos
         fullmc = self.rgenop.InMemoryCodeBuilder(start, end)
@@ -123,8 +127,8 @@
         
     def add_case(self, gv_case):
         rgenop = self.rgenop
-        targetbuilder = Builder(self.rgenop, self.inputargs_gv,
-                                self.inputoperands)
+        targetbuilder = Builder(self.rgenop, self.graphctx,
+                                self.inputargs_gv, self.inputoperands)
         try:
             self._add_case(gv_case, targetbuilder)
         except CodeBlockOverflow:
@@ -206,12 +210,12 @@
 class Builder(GenBuilder):
     coming_from = 0
     update_defaultcaseaddr_of = None
-    force_in_stack = None
     paused_alive_gv = None
     order_dependency = None
 
-    def __init__(self, rgenop, inputargs_gv, inputoperands):
+    def __init__(self, rgenop, graphctx, inputargs_gv, inputoperands):
         self.rgenop = rgenop
+        self.graphctx = graphctx
         self.inputargs_gv = inputargs_gv
         self.inputoperands = inputoperands
         self.operations = []
@@ -219,34 +223,27 @@
     def start_writing(self):
         self.paused_alive_gv = None
 
-    def generate_block_code(self, final_vars_gv, force_vars=[],
-                                                 force_operands=[],
-                                                 renaming=True,
-                                                 minimal_stack_depth=0):
+    def generate_block_code(self, final_vars_gv, force_vars=None,
+                                                 force_operands=None,
+                                                 renaming=True):
         if self.order_dependency is not None:
             self.order_dependency.force_generate_code()
             self.order_dependency = None
-        allocator = RegAllocator()
-        if self.force_in_stack is not None:
-            allocator.force_stack_storage(self.force_in_stack)
-        allocator.set_final(final_vars_gv)
+        allocator = RegAllocator(self.operations)
+        if final_vars_gv is not force_vars:
+            allocator.set_final(final_vars_gv)
+        if force_vars is not None:
+            allocator.set_final_at_loc(force_vars, force_operands)
         if not renaming:
-            final_vars_gv = allocator.var2loc.keys()  # unique final vars
-        allocator.allocate_locations(self.operations)
-        allocator.force_var_operands(force_vars, force_operands,
-                                     at_start=False)
-        allocator.force_var_operands(self.inputargs_gv, self.inputoperands,
-                                     at_start=True)
-        allocator.allocate_registers()
-        if allocator.required_frame_depth < minimal_stack_depth:
-            allocator.required_frame_depth = minimal_stack_depth
+            final_vars_gv = allocator.varsused()  # unique final vars
+        allocator.compute_lifetimes()
+        allocator.init_reg_alloc(self.inputargs_gv, self.inputoperands)
         mc = self.start_mc()
-        allocator.mc = mc
-        allocator.generate_initial_moves()
-        allocator.generate_operations()
-        if self.force_in_stack is not None:
-            allocator.save_storage_places(self.force_in_stack)
-            self.force_in_stack = None
+        allocator.generate_operations(mc)
+        if force_vars is not None:
+            allocator.generate_final_moves(force_vars, force_operands)
+        #print 'NSTACKMAX==============>', allocator.nstackmax
+        self.graphctx.ensure_stack_vars(allocator.nstackmax)
         del self.operations[:]
         if renaming:
             self.inputargs_gv = [GenVar() for v in final_vars_gv]
@@ -257,24 +254,12 @@
         return mc
 
     def enter_next_block(self, kinds, args_gv):
-        if self.force_in_stack is not None:
-            # force_in_stack would keep the variables alive until the end
-            # of the whole mc block, i.e. past the OpSameAs that we are
-            # about to introduce => duplication of the value.
-            mc = self.generate_block_code(args_gv)
-            assert len(self.inputargs_gv) == len(args_gv)
-            args_gv[:len(args_gv)] = self.inputargs_gv
-            self.set_coming_from(mc)
-            mc.done()
-            self.rgenop.close_mc(mc)
-            self.start_writing()
-        else:
-            # otherwise, we get better register allocation if we write a
-            # single larger mc block
-            for i in range(len(args_gv)):
-                op = OpSameAs(args_gv[i])
-                args_gv[i] = op
-                self.operations.append(op)
+        # we get better register allocation if we write a single large mc block
+        self.operations.append(forget_stack_storage)
+        for i in range(len(args_gv)):
+            op = OpSameAs(args_gv[i])
+            args_gv[i] = op
+            self.operations.append(op)
         lbl = Label(self)
         lblop = OpLabel(lbl, args_gv)
         self.operations.append(lblop)
@@ -313,23 +298,18 @@
             self.coming_from = 0
         return mc
 
-    def _jump_if(self, gv_condition, args_for_jump_gv, negate):
-        newbuilder = Builder(self.rgenop, list(args_for_jump_gv), None)
+    def _jump_if(self, cls, gv_condition, args_for_jump_gv):
+        newbuilder = Builder(self.rgenop, self.graphctx,
+                             list(args_for_jump_gv), None)
         newbuilder.order_dependency = self
-        # if the condition does not come from an obvious comparison operation,
-        # e.g. a getfield of a Bool or an input argument to the current block,
-        # then insert an OpIntIsTrue
-        if gv_condition.cc_result < 0 or gv_condition not in self.operations:
-            gv_condition = OpIntIsTrue(gv_condition)
-            self.operations.append(gv_condition)
-        self.operations.append(JumpIf(gv_condition, newbuilder, negate=negate))
+        self.operations.append(cls(gv_condition, newbuilder))
         return newbuilder
 
     def jump_if_false(self, gv_condition, args_for_jump_gv):
-        return self._jump_if(gv_condition, args_for_jump_gv, True)
+        return self._jump_if(JumpIfNot, gv_condition, args_for_jump_gv)
 
     def jump_if_true(self, gv_condition, args_for_jump_gv):
-        return self._jump_if(gv_condition, args_for_jump_gv, False)
+        return self._jump_if(JumpIf, gv_condition, args_for_jump_gv)
 
     def finish_and_goto(self, outputargs_gv, targetlbl):
         operands = targetlbl.inputoperands
@@ -342,20 +322,20 @@
             self.start_writing()
             operands = targetlbl.inputoperands
             assert operands is not None
-        mc = self.generate_block_code(outputargs_gv, outputargs_gv, operands,
-                              minimal_stack_depth = targetlbl.targetstackdepth)
+        mc = self.generate_block_code(outputargs_gv, outputargs_gv, operands)
         mc.JMP(rel32(targetlbl.targetaddr))
         mc.done()
         self.rgenop.close_mc(mc)
 
     def finish_and_return(self, sigtoken, gv_returnvar):
-        mc = self.generate_block_code([gv_returnvar], [gv_returnvar], [eax])
+        gvs = [gv_returnvar]
+        mc = self.generate_block_code(gvs, gvs, [eax])
         # --- epilogue ---
-        mc.LEA(esp, mem(ebp, -12))
+        mc.MOV(esp, ebp)
+        mc.POP(ebp)
         mc.POP(edi)
         mc.POP(esi)
         mc.POP(ebx)
-        mc.POP(ebp)
         mc.RET()
         # ----------------
         mc.done()
@@ -488,7 +468,8 @@
         reg = FlexSwitch.REG
         mc = self.generate_block_code(args_gv, [gv_exitswitch], [reg],
                                       renaming=False)
-        result = FlexSwitch(self.rgenop, self.inputargs_gv, self.inputoperands)
+        result = FlexSwitch(self.rgenop, self.graphctx,
+                            self.inputargs_gv, self.inputoperands)
         default_builder = result.initialize(mc)
         mc.done()
         self.rgenop.close_mc(mc)
@@ -502,49 +483,78 @@
         # XXX re-do this somehow...
 
     def genop_get_frame_base(self):
-        op = OpGetFrameBase()
-        self.operations.append(op)
-        return op
+        return gv_frame_base
 
     def get_frame_info(self, vars_gv):
-        if self.force_in_stack is None:
-            self.force_in_stack = []
         result = []
         for v in vars_gv:
             if not v.is_const:
-                place = StorageInStack()
-                self.force_in_stack.append((v, place))
+                place = StorageInStack(v)
+                self.operations.append(place)
                 v = place
             result.append(v)
         return result
 
     def alloc_frame_place(self, kind, gv_initial_value=None):
-        if self.force_in_stack is None:
-            self.force_in_stack = []
         if gv_initial_value is None:
             v = OpWhatever()
         else:
             v = OpSameAs(gv_initial_value)
         self.operations.append(v)
-        place = Place()
-        place.stackvar = v
-        self.force_in_stack.append((v, place))
+        place = StorageInStack(v)
+        self.operations.append(place)
         return place
 
     def genop_absorb_place(self, kind, place):
-        v = place.stackvar
-        place.stackvar = None  # break reference to potentially lots of memory
-        return v
+        return place.x
 
 
 class Label(GenLabel):
     targetaddr = 0
-    targetstackdepth = 0
     inputoperands = None
 
     def __init__(self, targetbuilder):
         self.targetbuilder = targetbuilder
 
+
+class GraphCtx:
+    # keep this in sync with the generated function prologue:
+    # how many extra words are initially pushed (including the
+    # return value, pushed by the caller)
+    PROLOGUE_FIXED_WORDS = 5
+
+    def __init__(self, rgenop):
+        self.rgenop = rgenop
+        self.initial_addr = 0   # position where there is the initial ADD ESP
+        self.adj_addrs = []     # list of positions where there is a LEA ESP
+        self.reserved_stack_vars = 0
+
+    def write_stack_adj(self, mc, initial):
+        if initial:
+            addr = write_stack_reserve(mc, self.reserved_stack_vars)
+            self.initial_addr = addr
+        else:
+            addr = write_stack_adj(mc, self.reserved_stack_vars)
+            self.adj_addrs.append(addr)
+
+    def ensure_stack_vars(self, n):
+        if CALL_ALIGN > 1:
+            # align the stack to a multiple of CALL_ALIGN words
+            stack_words = GraphCtx.PROLOGUE_FIXED_WORDS + n
+            stack_words = (stack_words + CALL_ALIGN-1) & ~ (CALL_ALIGN-1)
+            n = stack_words - GraphCtx.PROLOGUE_FIXED_WORDS
+        # patch all the LEA ESP if the requested amount has grown
+        if n > self.reserved_stack_vars:
+            addr = self.initial_addr
+            patchmc = self.rgenop.InMemoryCodeBuilder(addr, addr+99)
+            write_stack_reserve(patchmc, n)
+            patchmc.done()
+            for addr in self.adj_addrs:
+                patchmc = self.rgenop.InMemoryCodeBuilder(addr, addr+99)
+                write_stack_adj(patchmc, n)
+                patchmc.done()
+            self.reserved_stack_vars = n
+
 # ____________________________________________________________
 
 
@@ -557,39 +567,38 @@
         MC_SIZE *= 16
 
     def __init__(self):
-        self.mcs = []   # machine code blocks where no-one is currently writing
+        self.allocated_mc = None
         self.keepalive_gc_refs = [] 
-        self.total_code_blocks = 0
 
     def open_mc(self):
-        if self.mcs:
-            # XXX think about inserting NOPS for alignment
-            return self.mcs.pop()
-        else:
-            # XXX supposed infinite for now
-            self.total_code_blocks += 1
+        # XXX supposed infinite for now
+        mc = self.allocated_mc
+        if mc is None:
             return self.MachineCodeBlock(self.MC_SIZE)
+        else:
+            self.allocated_mc = None
+            return mc
 
     def close_mc(self, mc):
-        # an open 'mc' is ready for receiving code... but it's also ready
-        # for being garbage collected, so be sure to close it if you
-        # want the generated code to stay around :-)
-        self.mcs.append(mc)
+        assert self.allocated_mc is None
+        self.allocated_mc = mc
 
     def check_no_open_mc(self):
-        assert len(self.mcs) == self.total_code_blocks
+        pass
 
     def newgraph(self, sigtoken, name):
+        graphctx = GraphCtx(self)
         # --- prologue ---
         mc = self.open_mc()
         entrypoint = mc.tell()
         if DEBUG_TRAP:
             mc.BREAKPOINT()
-        mc.PUSH(ebp)
-        mc.MOV(ebp, esp)
         mc.PUSH(ebx)
         mc.PUSH(esi)
         mc.PUSH(edi)
+        mc.PUSH(ebp)
+        mc.MOV(ebp, esp)
+        graphctx.write_stack_adj(mc, initial=True)
         # ^^^ pushed 5 words including the retval ( == PROLOGUE_FIXED_WORDS)
         # ----------------
         numargs = sigtoken     # for now
@@ -597,8 +606,9 @@
         inputoperands = []
         for i in range(numargs):
             inputargs_gv.append(GenVar())
-            inputoperands.append(mem(ebp, WORD * (2+i)))
-        builder = Builder(self, inputargs_gv, inputoperands)
+            ofs = WORD * (GraphCtx.PROLOGUE_FIXED_WORDS+i)
+            inputoperands.append(mem(ebp, ofs))
+        builder = Builder(self, graphctx, inputargs_gv, inputoperands)
         # XXX this makes the code layout in memory a bit obscure: we have the
         # prologue of the new graph somewhere in the middle of its first
         # caller, all alone...

Modified: pypy/branch/new-jit-codegen/i386/ri386.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/i386/ri386.py	(original)
+++ pypy/branch/new-jit-codegen/i386/ri386.py	Wed Feb  7 13:44:55 2007
@@ -2,16 +2,21 @@
 
 
 class OPERAND(object):
+    _attrs_ = []
     def __repr__(self):
         return '<%s %s>' % (self.__class__.__name__, self.assembler())
 
 class REG(OPERAND):
     width = 4
-    lowest8bits = None
     def __repr__(self):
         return '<%s>' % self.__class__.__name__.lower()
     def assembler(self):
         return '%' + self.__class__.__name__.lower()
+    def lowest8bits(self):
+        if self.op < 4:
+            return registers8[self.op]
+        else:
+            raise ValueError
 
 class REG8(OPERAND):
     width = 1
@@ -47,11 +52,18 @@
     def assembler(self):
         return '$%d' % (self.value,)
 
+    def lowest8bits(self):
+        val = self.value & 0xFF
+        if val > 0x7F:
+            val -= 0x100
+        return IMM8(val)
+
 class IMM8(IMM32):
     width = 1
 
 class IMM16(OPERAND):  # only for RET
     width = 2
+    value = 0      # annotator hack
 
     def __init__(self, value):
         self.value = value
@@ -65,6 +77,9 @@
         self.byte = byte
         self.extradata = extradata
 
+    def lowest8bits(self):
+        return MODRM8(self.byte, self.extradata)
+
     def assembler(self):
         mod = self.byte & 0xC0
         rm  = self.byte & 0x07
@@ -186,14 +201,13 @@
 dh = DH()
 bh = BH()
 
-eax.lowest8bits = al
-ecx.lowest8bits = cl
-edx.lowest8bits = dl
-ebx.lowest8bits = bl
-
 registers = [eax, ecx, edx, ebx, esp, ebp, esi, edi]
 registers8 = [al, cl, dl, bl, ah, ch, dh, bh]
 
+for r in registers + registers8:
+    r.bitmask = 1 << r.op
+del r
+
 imm32 = IMM32
 imm8 = IMM8
 imm16 = IMM16
@@ -257,6 +271,10 @@
     else:
         return cls(0x84, SIB + packimm32(offset))
 
+def fixedsize_esp_ofs(offset):
+    SIB = '\x24'
+    return MODRM(0x84, SIB + packimm32(offset))
+
 def single_byte(value):
     return -128 <= value < 128
 

Modified: pypy/branch/new-jit-codegen/i386/ri386setup.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/i386/ri386setup.py	(original)
+++ pypy/branch/new-jit-codegen/i386/ri386setup.py	Wed Feb  7 13:44:55 2007
@@ -273,6 +273,10 @@
 MOV.mode2(MODRM8,REG8,  ['\x88', register(2,8,'b'), modrm(1,'b')])
 MOV.mode2(REG8,  MODRM8,['\x8A', register(1,8,'b'), modrm(2,'b')])
 
+# special modes for writing explicit 16-bit immediates (must also use o16!)
+MOV.mode2(REG,   IMM16, [register(1), '\xB8', immediate(2,'h')])
+MOV.mode2(MODRM, IMM16, ['\xC7', orbyte(0<<3), modrm(1), immediate(2,'h')])
+
 ADD = Instruction()
 ADD.common_modes(0)
 

Modified: pypy/branch/new-jit-codegen/i386/test/test_auto_encoding.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/i386/test/test_auto_encoding.py	(original)
+++ pypy/branch/new-jit-codegen/i386/test/test_auto_encoding.py	Wed Feb  7 13:44:55 2007
@@ -198,6 +198,8 @@
             if ((args[1][1] in (i386.eax, i386.al))
                 and args[0][1].assembler().lstrip('-').isdigit()):
                 return []   # MOV [constant-address], accum
+            if args[1][1].__class__ == i386.IMM16:
+                return []   # MOV mod/rm, imm16
         if instrname == "LEA":
             if (args[1][1].__class__ != i386.MODRM or
                 args[1][1].is_register()):

Modified: pypy/branch/new-jit-codegen/test/rgenop_tests.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/test/rgenop_tests.py	(original)
+++ pypy/branch/new-jit-codegen/test/rgenop_tests.py	Wed Feb  7 13:44:55 2007
@@ -13,6 +13,7 @@
 FUNC2 = lltype.FuncType([lltype.Signed]*2, lltype.Signed)
 FUNC3 = lltype.FuncType([lltype.Signed]*3, lltype.Signed)
 FUNC5 = lltype.FuncType([lltype.Signed]*5, lltype.Signed)
+FUNC27= lltype.FuncType([lltype.Signed]*27, lltype.Signed)
 
 def make_adder(rgenop, n):
     # 'return x+n'
@@ -1581,6 +1582,142 @@
         res = fnptr(2, 10, 10, 400, 0)
         assert res == 0
 
+    def test_from_random_4_direct(self):
+##        def dummyfn(counter, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z):
+##          while True:
+
+##              if b:
+##                  pass
+
+##              g = q and j
+##              d = intmask(s - y) #    d <esi>
+##                                 #    t <0x64(%ebp)>
+
+##              e = y != f         #    e <ebx>
+##              j = c or j
+##              o = d or t         #    d <edx>   o <esi>
+##              t = l >  o         #    t <ecx>
+##              if e:
+##                  pass
+
+##              counter -= 1
+##              if not counter: break
+
+##          return intmask(a*-468864544+b*-340864157+c*-212863774+d*-84863387+e*43136996+f*171137383+g*299137766+h*427138153+i*555138536+j*683138923+k*811139306+l*939139693+m*1067140076+n*1195140463+o*1323140846+p*1451141233+q*1579141616+r*1707142003+s*1835142386+t*1963142773+u*2091143156+v*-2075823753+w*-1947823370+x*-1819822983+y*-1691822600+z*-1563822213)
+
+        rgenop = self.RGenOp()
+        signed_kind = rgenop.kindToken(lltype.Signed)
+        bool_kind = rgenop.kindToken(lltype.Bool)
+
+        builder0, gv_callable, [v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26] = rgenop.newgraph(rgenop.sigToken(FUNC27), 'compiled_dummyfn')
+        builder0.start_writing()
+        args_gv = [v0, v1, v2, v3, v6, v8, v9, v10, v11, v12, v13, v14, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26]
+        label0 = builder0.enter_next_block([signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind], args_gv)
+        [v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49] = args_gv
+        v50 = builder0.genop1('int_is_true', v29)
+        builder1 = builder0.jump_if_true(v50, [v48, v38, v27, v30, v32, v34, v47, v40, v28, v41, v43, v45, v37, v46, v31, v33, v35, v39, v36, v42, v49, v44, v29])
+        args_gv = [v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49]
+        label1 = builder0.enter_next_block([signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind], args_gv)
+        [v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63, v64, v65, v66, v67, v68, v69, v70, v71, v72, v73] = args_gv
+        v74 = builder0.genop1('int_is_true', v64)
+        builder2 = builder0.jump_if_true(v74, [v54, v52, v65, v58, v60, v62, v64, v68, v56, v69, v71, v51, v73, v53, v67, v57, v55, v59, v61, v63, v66, v70, v72])
+        args_gv = [v51, v52, v53, v54, v55, v64, v56, v57, v58, v59, v60, v61, v62, v63, v64, v65, v66, v67, v68, v69, v70, v71, v72, v73]
+        label2 = builder0.enter_next_block([signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind], args_gv)
+        [v75, v76, v77, v78, v79, v80, v81, v82, v83, v84, v85, v86, v87, v88, v89, v90, v91, v92, v93, v94, v95, v96, v97, v98] = args_gv
+        v99 = builder0.genop2('int_sub', v91, v97)
+        v100 = builder0.genop2('int_ne', v97, v79)
+        v101 = builder0.genop1('int_is_true', v78)
+        builder3 = builder0.jump_if_true(v101, [v85, v93, v94, v87, v91, v97, v89, v98, v80, v82, v78, v86, v84, v99, v88, v100, v90, v92, v96, v75, v95, v76, v77, v79, v81])
+        args_gv = [v75, v76, v77, v78, v99, v100, v79, v80, v81, v82, v83, v84, v85, v86, v87, v88, v89, v90, v91, v92, v93, v94, v95, v96, v97, v98]
+        label3 = builder0.enter_next_block([signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, bool_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind], args_gv)
+        [v102, v103, v104, v105, v106, v107, v108, v109, v110, v111, v112, v113, v114, v115, v116, v117, v118, v119, v120, v121, v122, v123, v124, v125, v126, v127] = args_gv
+        v128 = builder0.genop1('int_is_true', v106)
+        builder4 = builder0.jump_if_false(v128, [v114, v111, v116, v113, v118, v122, v110, v124, v103, v125, v105, v127, v107, v112, v121, v109, v115, v117, v119, v123, v102, v120, v104, v126, v106, v108])
+        args_gv = [v102, v103, v104, v105, v106, v107, v108, v109, v110, v111, v112, v113, v114, v115, v116, v106, v117, v118, v119, v120, v122, v123, v124, v125, v126, v127]
+        label4 = builder0.enter_next_block([signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, bool_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind], args_gv)
+        [v129, v130, v131, v132, v133, v134, v135, v136, v137, v138, v139, v140, v141, v142, v143, v144, v145, v146, v147, v148, v149, v150, v151, v152, v153, v154] = args_gv
+        v155 = builder0.genop2('int_gt', v141, v144)
+        builder5 = builder0.jump_if_false(v134, [v149, v148, v141, v143, v145, v147, v151, v139, v152, v132, v154, v134, v136, v130, v140, v138, v142, v155, v144, v146, v150, v129, v137, v131, v153, v133, v135])
+        args_gv = [v130, v131, v132, v133, v134, v135, v136, v137, v138, v139, v140, v141, v142, v143, v144, v145, v146, v147, v148, v155, v149, v150, v151, v152, v153, v154, v129]
+        label5 = builder0.enter_next_block([signed_kind, signed_kind, signed_kind, signed_kind, bool_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, bool_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind], args_gv)
+        [v156, v157, v158, v159, v160, v161, v162, v163, v164, v165, v166, v167, v168, v169, v170, v171, v172, v173, v174, v175, v176, v177, v178, v179, v180, v181, v182] = args_gv
+        v183 = builder0.genop2('int_sub', v182, rgenop.genconst(1))
+        v184 = builder0.genop1('int_is_true', v183)
+        builder6 = builder0.jump_if_true(v184, [v177, v166, v169, v171, v183, v173, v156, v165, v179, v158, v180, v168, v164, v178, v176, v172, v174, v167, v157, v175, v181, v161, v163])
+        v185 = builder0.genop2('int_mul', v156, rgenop.genconst(-468864544))
+        v186 = builder0.genop2('int_mul', v157, rgenop.genconst(-340864157))
+        v187 = builder0.genop2('int_add', v185, v186)
+        v188 = builder0.genop2('int_mul', v158, rgenop.genconst(-212863774))
+        v189 = builder0.genop2('int_add', v187, v188)
+        v190 = builder0.genop2('int_mul', v159, rgenop.genconst(-84863387))
+        v191 = builder0.genop2('int_add', v189, v190)
+        v192 = builder0.genop1('cast_bool_to_int', v160)
+        v193 = builder0.genop2('int_mul', v192, rgenop.genconst(43136996))
+        v194 = builder0.genop2('int_add', v191, v193)
+        v195 = builder0.genop2('int_mul', v161, rgenop.genconst(171137383))
+        v196 = builder0.genop2('int_add', v194, v195)
+        v197 = builder0.genop2('int_mul', v162, rgenop.genconst(299137766))
+        v198 = builder0.genop2('int_add', v196, v197)
+        v199 = builder0.genop2('int_mul', v163, rgenop.genconst(427138153))
+        v200 = builder0.genop2('int_add', v198, v199)
+        v201 = builder0.genop2('int_mul', v164, rgenop.genconst(555138536))
+        v202 = builder0.genop2('int_add', v200, v201)
+        v203 = builder0.genop2('int_mul', v165, rgenop.genconst(683138923))
+        v204 = builder0.genop2('int_add', v202, v203)
+        v205 = builder0.genop2('int_mul', v166, rgenop.genconst(811139306))
+        v206 = builder0.genop2('int_add', v204, v205)
+        v207 = builder0.genop2('int_mul', v167, rgenop.genconst(939139693))
+        v208 = builder0.genop2('int_add', v206, v207)
+        v209 = builder0.genop2('int_mul', v168, rgenop.genconst(1067140076))
+        v210 = builder0.genop2('int_add', v208, v209)
+        v211 = builder0.genop2('int_mul', v169, rgenop.genconst(1195140463))
+        v212 = builder0.genop2('int_add', v210, v211)
+        v213 = builder0.genop2('int_mul', v170, rgenop.genconst(1323140846))
+        v214 = builder0.genop2('int_add', v212, v213)
+        v215 = builder0.genop2('int_mul', v171, rgenop.genconst(1451141233))
+        v216 = builder0.genop2('int_add', v214, v215)
+        v217 = builder0.genop2('int_mul', v172, rgenop.genconst(1579141616))
+        v218 = builder0.genop2('int_add', v216, v217)
+        v219 = builder0.genop2('int_mul', v173, rgenop.genconst(1707142003))
+        v220 = builder0.genop2('int_add', v218, v219)
+        v221 = builder0.genop2('int_mul', v174, rgenop.genconst(1835142386))
+        v222 = builder0.genop2('int_add', v220, v221)
+        v223 = builder0.genop1('cast_bool_to_int', v175)
+        v224 = builder0.genop2('int_mul', v223, rgenop.genconst(1963142773))
+        v225 = builder0.genop2('int_add', v222, v224)
+        v226 = builder0.genop2('int_mul', v176, rgenop.genconst(2091143156))
+        v227 = builder0.genop2('int_add', v225, v226)
+        v228 = builder0.genop2('int_mul', v177, rgenop.genconst(-2075823753))
+        v229 = builder0.genop2('int_add', v227, v228)
+        v230 = builder0.genop2('int_mul', v178, rgenop.genconst(-1947823370))
+        v231 = builder0.genop2('int_add', v229, v230)
+        v232 = builder0.genop2('int_mul', v179, rgenop.genconst(-1819822983))
+        v233 = builder0.genop2('int_add', v231, v232)
+        v234 = builder0.genop2('int_mul', v180, rgenop.genconst(-1691822600))
+        v235 = builder0.genop2('int_add', v233, v234)
+        v236 = builder0.genop2('int_mul', v181, rgenop.genconst(-1563822213))
+        v237 = builder0.genop2('int_add', v235, v236)
+        builder0.finish_and_return(rgenop.sigToken(FUNC27), v237)
+        builder2.start_writing()
+        builder2.finish_and_goto([v51, v52, v53, v54, v55, v58, v56, v57, v58, v59, v60, v61, v62, v63, v64, v65, v66, v67, v68, v69, v70, v71, v72, v73], label2)
+        builder4.start_writing()
+        builder4.finish_and_goto([v102, v103, v104, v105, v106, v107, v108, v109, v110, v111, v112, v113, v114, v115, v116, v121, v117, v118, v119, v120, v122, v123, v124, v125, v126, v127], label4)
+        builder3.start_writing()
+        builder3.finish_and_goto([v75, v76, v77, v78, v99, v100, v79, v80, v81, v82, v78, v84, v85, v86, v87, v88, v89, v90, v91, v92, v93, v94, v95, v96, v97, v98], label3)
+        builder1.start_writing()
+        builder1.finish_and_goto([v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49], label1)
+        builder5.start_writing()
+        builder5.finish_and_goto([v130, v131, v132, v133, v134, v135, v136, v137, v138, v139, v140, v141, v142, v143, v144, v145, v146, v147, v148, v155, v149, v150, v151, v152, v153, v154, v129], label5)
+        builder6.start_writing()
+        v238 = builder6.genop1('cast_bool_to_int', v175)
+        builder6.finish_and_goto([v183, v156, v157, v158, v161, v163, v164, v165, v166, v167, v168, v169, v171, v172, v173, v174, v238, v176, v177, v178, v179, v180, v181], label0)
+        builder6.end()
+
+        fnptr = self.cast(gv_callable, 27)
+
+        res = fnptr(*([5]*27))
+        assert res == 967746338
+
     def test_genzeroconst(self):
         RGenOp = self.RGenOp
         gv = RGenOp.genzeroconst(RGenOp.kindToken(lltype.Signed))



More information about the Pypy-commit mailing list