[pypy-svn] r38060 - in pypy/branch/new-jit-codegen: . demo i386 i386/test test
arigo at codespeak.net
arigo at codespeak.net
Wed Feb 7 13:44:58 CET 2007
Author: arigo
Date: Wed Feb 7 13:44:55 2007
New Revision: 38060
Added:
pypy/branch/new-jit-codegen/
- copied from r38059, pypy/branch/jit-virtual-world/pypy/jit/codegen/
pypy/branch/new-jit-codegen/demo/autopath.py
- copied unchanged from r37979, pypy/branch/jit-virtual-world/pypy/jit/codegen/i386/autopath.py
Modified:
pypy/branch/new-jit-codegen/demo/autorun.py
pypy/branch/new-jit-codegen/demo/reducecase.py
pypy/branch/new-jit-codegen/demo/support.py
pypy/branch/new-jit-codegen/i386/operation.py
pypy/branch/new-jit-codegen/i386/regalloc.py
pypy/branch/new-jit-codegen/i386/rgenop.py
pypy/branch/new-jit-codegen/i386/ri386.py
pypy/branch/new-jit-codegen/i386/ri386setup.py
pypy/branch/new-jit-codegen/i386/test/test_auto_encoding.py
pypy/branch/new-jit-codegen/test/rgenop_tests.py
Log:
A branch with a refactoring of the i386 backend.
Modified: pypy/branch/new-jit-codegen/demo/autorun.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/demo/autorun.py (original)
+++ pypy/branch/new-jit-codegen/demo/autorun.py Wed Feb 7 13:44:55 2007
@@ -11,5 +11,8 @@
def test_many_times():
for i in range(80):
- demo_conftest.option.randomseed = random.randrange(0, 100000)
- test_random.test_random_function()
+ yield run_test_once, random.randrange(0, 100000)
+
+def run_test_once(seed):
+ demo_conftest.option.randomseed = seed
+ test_random.test_random_function()
Modified: pypy/branch/new-jit-codegen/demo/reducecase.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/demo/reducecase.py (original)
+++ pypy/branch/new-jit-codegen/demo/reducecase.py Wed Feb 7 13:44:55 2007
@@ -5,115 +5,259 @@
False'. The smallest failing example found so far gets written to
zsample.py.
"""
+import autopath
import os
import re
r = re.compile(r" \w = ")
rif = re.compile(r" if \w:")
+SEED = 73595
+ITERATIONS = 10
+ARGS=[-27, -38, -33, -53, 16, -28, 13, 11, 11, -46, -34, 57, -11, 80, 15, 49, -37, -43, -73, -62, -31, -21, -36, 17, 97, -53]
+BACKEND = 'i386'
+
+
lines = """
if goto == 0:
- g = h and x
- a = intmask(d + g)
- goto = 13
+ u = n != e
+ k = intmask(v + z)
+ b = not f
+ w = intmask(l % ((a & 0xfffff) + 1))
+ h = a or y
+ e = intmask(z + z)
+ m = c != a
+ a = intmask(v + g)
+ n = intmask(c - x)
+ n = intmask(o % ((y & 0xfffff) + 1))
+ o = -7035
+ h = m >= a
+ s = f != g
+ e = intmask(~w)
+ if f:
+ goto = 4
+ else:
+ goto = 3
if goto == 1:
- if v:
- goto = 13
+ n = intmask(h - c)
+ x = t == b
+ a = 7744
+ if g:
+ goto = 2
else:
- goto = 9
+ goto = 2
if goto == 2:
- if m:
- goto = 10
+ i = intmask(i - v)
+ o = -6878346
+ f = intmask(i ^ n)
+ i = 1261729270
+ q = s or u
+ z = t >= b
+ u = bool(u)
+ w = intmask(e << (c & 0x0000067f))
+ if w:
+ goto = 13
else:
- goto = 3
+ goto = 14
if goto == 3:
- if r:
- goto = 9
- else:
+ y = intmask(w >> (e & 0x1234567f))
+ d = intmask(o & j)
+ r = r != n
+ a = intmask(b & u)
+ b = -11216
+ v = intmask(g // (-((g & 0xfffff) + 2)))
+ x = 6697939
+ d = intmask(abs(q))
+ i = intmask(i % (-((c & 0xfffff) + 2)))
+ w = 23593
+ u = n and a
+ d = intmask(q << (y & 0x0000067f))
+ o = intmask(w // (-((i & 0xfffff) + 2)))
+ l = intmask(e + n)
+ if j:
counter -= 1
if not counter: break
- goto = 0
+ goto = 2
+ else:
+ goto = 14
if goto == 4:
- if h:
- goto = 11
+ f = intmask(e * n)
+ l = intmask(k >> (e & 0x1234567f))
+ l = intmask(u // ((p & 0xfffff) + 1))
+ d = bool(m)
+ d = 7364461
+ g = 1410833768
+ g = y <= d
+ s = l == d
+ e = intmask(k - z)
+ o = -6669177
+ c = intmask(-o)
+ q = intmask(-o)
+ m = intmask(u - j)
+ q = intmask(a - s)
+ m = intmask(a | n)
+ c = q and a
+ t = intmask(b // (-((p & 0xfffff) + 2)))
+ if r:
+ goto = 5
else:
- goto = 9
+ goto = 12
if goto == 5:
- x = intmask(-i)
- p = bool(h)
- b = m > x
- g = p or i
- h = p > v
- goto = 7
+ c = intmask(i * q)
+ q = intmask(-q)
+ c = intmask(a // (-((a & 0xfffff) + 2)))
+ u = k >= i
+ m = -34
+ z = intmask(o - i)
+ x = x and l
+ w = o < n
+ x = n != w
+ m = 92
+ h = 27
+ x = intmask(~u)
+ i = not o
+ q = intmask(c & q)
+ y = x or g
+ if z:
+ goto = 10
+ else:
+ counter -= 1
+ if not counter: break
+ goto = 1
if goto == 6:
- m = intmask(-p)
- counter -= 1
- if not counter: break
- goto = 5
- if goto == 7:
+ u = intmask(l // (-((g & 0xfffff) + 2)))
+ m = intmask(l // ((h & 0xfffff) + 1))
+ a = 3949664
+ c = intmask(v - u)
+ w = k and r
+ q = -1898584839
+ k = a > o
if d:
+ goto = 7
+ else:
+ goto = 8
+ if goto == 7:
+ j = not f
+ s = n == h
+ t = x > n
+ z = intmask(e & f)
+ q = intmask(v + k)
+ o = not a
+ v = 2876355
+ h = intmask(w % ((p & 0xfffff) + 1))
+ c = e and b
+ k = intmask(f // (-((u & 0xfffff) + 2)))
+ m = 4882866
+ if h:
counter -= 1
if not counter: break
goto = 0
else:
- goto = 0
- if goto == 8:
- if e:
counter -= 1
if not counter: break
- goto = 6
- else:
+ goto = 4
+ if goto == 8:
+ w = intmask(g & n)
+ d = -31404
+ s = intmask(abs(e))
+ j = intmask(g << (w & 0x0000067f))
+ r = -26
+ b = -13356
+ o = p < m
+ c = 438000325
+ t = intmask(~g)
+ i = intmask(-e)
+ a = intmask(c - x)
+ v = intmask(v >> (f & 0x1234567f))
+ if o:
counter -= 1
if not counter: break
goto = 6
+ else:
+ goto = 12
if goto == 9:
- if u:
+ l = x <= h
+ z = not w
+ f = intmask(u ^ r)
+ if m:
counter -= 1
if not counter: break
- goto = 6
+ goto = 7
else:
+ goto = 11
+ if goto == 10:
+ o = intmask(t // ((e & 0xfffff) + 1))
+ w = c == v
+ if h:
counter -= 1
if not counter: break
- goto = 2
- if goto == 10:
- if v:
- goto = 14
+ goto = 5
else:
- goto = 12
+ counter -= 1
+ if not counter: break
+ goto = 3
if goto == 11:
- if f:
+ z = i != c
+ t = d != w
+ v = intmask(r - f)
+ u = 6813995
+ z = c < f
+ r = intmask(c + i)
+ z = intmask(o - s)
+ p = intmask(i // (-((n & 0xfffff) + 2)))
+ v = intmask(p | h)
+ if a:
counter -= 1
if not counter: break
- goto = 5
+ goto = 3
else:
counter -= 1
if not counter: break
- goto = 11
+ goto = 0
if goto == 12:
- d = d >= n
- counter -= 1
- if not counter: break
- goto = 0
+ b = intmask(l % ((a & 0xfffff) + 1))
+ d = intmask(abs(y))
+ c = intmask(~w)
+ a = bool(v)
+ d = not a
+ v = intmask(s ^ u)
+ if m:
+ counter -= 1
+ if not counter: break
+ goto = 4
+ else:
+ counter -= 1
+ if not counter: break
+ goto = 8
if goto == 13:
- l = j <= u
- d = intmask(s - y)
- h = intmask(l // ((h & 0xfffff) + 1))
- if a:
+ c = 13780
+ e = n != i
+ x = 912031708
+ i = intmask(p ^ j)
+ k = not s
+ p = c > b
+ o = intmask(~j)
+ t = intmask(-k)
+ v = y <= v
+ v = m <= a
+ w = a < u
+ z = p == v
+ if g:
counter -= 1
if not counter: break
- goto = 12
+ goto = 7
else:
counter -= 1
if not counter: break
- goto = 6
+ goto = 13
if goto == 14:
- if o:
+ if p:
counter -= 1
if not counter: break
- goto = 14
+ goto = 8
else:
counter -= 1
if not counter: break
- goto = 14
+ goto = 13
""".splitlines()
lines = [s.rstrip() for s in lines]
@@ -135,17 +279,27 @@
print >> g, '''
return intmask(a*-468864544+b*-340864157+c*-212863774+d*-84863387+e*43136996+f*171137383+g*299137766+h*427138153+i*555138536+j*683138923+k*811139306+l*939139693+m*1067140076+n*1195140463+o*1323140846+p*1451141233+q*1579141616+r*1707142003+s*1835142386+t*1963142773+u*2091143156+v*-2075823753+w*-1947823370+x*-1819822983+y*-1691822600+z*-1563822213)
-args=[-67, -89, -99, 35, 91, 8, -17, -75, 14, 88, 71, -77, -77, 38, 65, 21, 77, 73, -17, -12, -67, 36, 11, 25, -54, -36]
+'''
+ g.close()
+ #ok = os.system("py.test zgen.py --seed=6661 -s") == 0
-def test_y():
+ from pypy.jit.codegen.demo import conftest as demo_conftest
+ demo_conftest.option.randomseed = SEED
+ demo_conftest.option.backend = BACKEND
from pypy.jit.codegen.demo.support import rundemo
- rundemo(dummyfn, 10, *args)
-'''
- g.close()
- ok = os.system("py.test zgen.py --seed=3888 -s") == 0
- # XXX could run in-process to avoid start-up overhead
+ d = {}
+ execfile('zgen.py', d)
+ dummyfn = d['dummyfn']
+
+ childpid = os.fork()
+ if childpid == 0: # in child
+ rundemo(dummyfn, ITERATIONS, *ARGS)
+ os._exit(0)
+
+ _, status = os.waitpid(childpid, 0)
+ ok = status == 0
if ok:
return True # accept
Modified: pypy/branch/new-jit-codegen/demo/support.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/demo/support.py (original)
+++ pypy/branch/new-jit-codegen/demo/support.py Wed Feb 7 13:44:55 2007
@@ -121,7 +121,7 @@
F1 = lltype.FuncType([lltype.Signed] * nb_args, lltype.Signed)
fp = RGenOp.get_python_callable(lltype.Ptr(F1), gv_entrypoint)
res = runfp(fp, *args)
- print '%-6s ===>'%demo_conftest.option.backend, res
+ print '%-6s ===>' % RGenOp.__name__, res
print
if res != expected:
raise AssertionError(
Modified: pypy/branch/new-jit-codegen/i386/operation.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/i386/operation.py (original)
+++ pypy/branch/new-jit-codegen/i386/operation.py Wed Feb 7 13:44:55 2007
@@ -18,60 +18,33 @@
else:
CALL_ALIGN = 1
-PROLOGUE_FIXED_WORDS = 5
-
-RK_NO_RESULT = 0
-RK_WORD = 1
-RK_CC = 2
-
class Operation(GenVar):
clobbers_cc = True
- result_kind = RK_WORD
- cc_result = -1
+ side_effects = True
- def allocate(self, allocator):
- pass
def generate(self, allocator):
raise NotImplementedError
-class OpWhatever(Operation):
- clobbers_cc = False
- def generate(self, allocator):
- pass
-
class Op1(Operation):
def __init__(self, x):
self.x = x
- def allocate(self, allocator):
+ def mark_used_vars(self, allocator):
allocator.using(self.x)
- def generate(self, allocator):
- try:
- dstop = allocator.get_operand(self)
- except KeyError:
- return # result not used
- srcop = allocator.get_operand(self.x)
- self.generate2(allocator.mc, dstop, srcop)
- def generate2(self, mc, dstop, srcop):
- raise NotImplementedError
class UnaryOp(Op1):
- def generate(self, allocator):
- try:
- dstop = allocator.get_operand(self)
- except KeyError:
- return # simple operation whose result is not used anyway
- srcop = allocator.get_operand(self.x)
- mc = allocator.mc
- if srcop != dstop:
- try:
- mc.MOV(dstop, srcop)
- except FailedToImplement:
- mc.MOV(ecx, srcop)
- self.emit(mc, ecx)
- mc.MOV(dstop, ecx)
- return
- self.emit(mc, dstop)
+ side_effects = False
+ def mark_used_vars(self, allocator):
+ allocator.using_inplace(self.x, self)
+ def generate(self, allocator):
+ if allocator.release(self.x):
+ dstop = allocator.get_operand(self.x) # in-place operation
+ allocator.create_exactly_at(self, dstop)
+ else:
+ dstop = allocator.create_reg(self)
+ srcop = allocator.get_operand(self.x)
+ allocator.mc.MOV(dstop, srcop) # make a copy in a new register
+ self.emit(allocator.mc, dstop)
class OpIntNeg(UnaryOp):
opname = 'int_neg'
@@ -83,110 +56,140 @@
class OpIntAbs(Op1):
opname = 'int_abs'
- def generate2(self, mc, dstop, srcop):
+ side_effects = False
+ def mark_used_vars(self, allocator):
+ allocator.using(self.x)
+ def generate(self, allocator):
+ oldsrcop = allocator.get_operand(self.x)
+ dstop = allocator.create_reg(self)
+ srcop = allocator.get_operand(self.x)
# ABS-computing code from Psyco, found by exhaustive search
# on *all* short sequences of operations :-)
- inplace = (dstop == srcop)
- if inplace or not (isinstance(srcop, REG) or isinstance(dstop, REG)):
- mc.MOV(ecx, srcop)
- srcop = ecx
- if not inplace:
+ mc = allocator.mc
+ if dstop != oldsrcop:
mc.MOV(dstop, srcop)
mc.SHL(dstop, imm8(1))
mc.SBB(dstop, srcop)
- mc.SBB(ecx, ecx)
- mc.XOR(dstop, ecx)
+ allocator.release(self.x)
+ tmpop = allocator.create_scratch_reg()
+ dstop = allocator.get_operand(self)
+ mc.SBB(tmpop, tmpop)
+ mc.XOR(dstop, tmpop)
+ allocator.end_clobber(tmpop)
class OpSameAs(Op1):
+ clobbers_cc = False # special handling of the cc
+ side_effects = False
+ def mark_used_vars(self, allocator):
+ allocator.using_inplace(self.x, self)
+ def generate(self, allocator):
+ srcop = allocator.get_operand(self.x)
+ if allocator.lastuse(self.x):
+ allocator.release(self.x)
+ if isinstance(srcop, CCFLAG):
+ allocator.create_in_cc(self, srcop)
+ else:
+ allocator.create_exactly_at(self, srcop)
+ else:
+ if isinstance(srcop, CCFLAG):
+ allocator.clobber_cc() # which doesn't itself clobber cc,
+ # so we can reuse it for us
+ allocator.create_in_cc(self, srcop)
+ else:
+ dstop = allocator.create_reg(self)
+ srcop = allocator.get_operand(self.x)
+ if srcop != dstop:
+ allocator.mc.MOV(dstop, srcop)
+ allocator.release(self.x)
+
+class OpWhatever(Operation):
clobbers_cc = False
- def generate2(self, mc, dstop, srcop):
- if srcop != dstop:
- try:
- mc.MOV(dstop, srcop)
- except FailedToImplement:
- mc.MOV(ecx, srcop)
- mc.MOV(dstop, ecx)
+ side_effects = False
+ def generate(self, allocator):
+ allocator.create(self)
class OpCompare1(Op1):
- result_kind = RK_CC
+ clobbers_cc = False # special handling of the cc
+ side_effects = False
+
def generate(self, allocator):
- srcop = allocator.get_operand(self.x)
mc = allocator.mc
- self.emit(mc, srcop)
- def copy(self):
- return self.__class__(self.x)
+ srcop = allocator.get_operand(self.x)
+ if isinstance(srcop, CCFLAG):
+ ccop = srcop
+ allocator.release(self.x)
+ allocator.clobber_cc()
+ # the flags are still valid through a clobber_cc
+ if self.inverted:
+ ccop = ccflags[cond_negate(ccop.cc)]
+ else:
+ allocator.clobber_cc()
+ srcop = allocator.get_operand(self.x)
+ mc.CMP(srcop, imm8(0))
+ allocator.release(self.x)
+ ccop = ccflags[self.suggested_cc]
+ allocator.create_in_cc(self, ccop)
class OpIntIsTrue(OpCompare1):
opname = 'int_is_true', 'ptr_nonzero', 'uint_is_true'
- cc_result = Conditions['NE']
- @staticmethod
- def emit(mc, x):
- mc.CMP(x, imm8(0))
+ suggested_cc = Conditions['NE']
+ inverted = False
class OpIntIsZero(OpIntIsTrue):
opname = 'ptr_iszero', 'bool_not'
- cc_result = Conditions['E']
+ suggested_cc = Conditions['E']
+ inverted = True
class Op2(Operation):
def __init__(self, x, y):
self.x = x
self.y = y
- def allocate(self, allocator):
+ def mark_used_vars(self, allocator):
allocator.using(self.x)
allocator.using(self.y)
- def generate(self, allocator):
- try:
- dstop = allocator.get_operand(self)
- except KeyError:
- return # simple operation whose result is not used anyway
- op1 = allocator.get_operand(self.x)
- op2 = allocator.get_operand(self.y)
- self.generate3(allocator.mc, dstop, op1, op2)
- def generate3(self, mc, dstop, op1, op2):
- raise NotImplementedError
class BinaryOp(Op2):
+ side_effects = False
commutative = False
- def generate3(self, mc, dstop, op1, op2):
- # now all of dstop, op1 and op2 may alias each other and be in
- # a register, in the stack or an immediate... finding a correct
- # and encodable combination of instructions is loads of fun
- if dstop == op1:
- case = 1 # optimize for this common case
- elif self.commutative and dstop == op2:
+
+ def mark_used_vars(self, allocator):
+ inplace_ok = allocator.using_inplace(self.x, self)
+ if not inplace_ok and self.commutative:
+ allocator.using_inplace(self.y, self)
+ else:
+ allocator.using(self.y)
+
+ def generate(self, allocator):
+ x, y = self.x, self.y
+ op1 = allocator.get_operand(x)
+ op2 = allocator.get_operand(y)
+ xlast = allocator.lastuse(x)
+ if self.commutative and not xlast and allocator.lastuse(y):
+ # reverse arguments, then it's an in-place operation
+ x, y = y, x
op1, op2 = op2, op1
- case = 1
- elif isinstance(dstop, REG):
- if dstop != op2:
- # REG = OPERATION(op1, op2) with op2 != REG
- case = 2
- else:
- # REG = OPERATION(op1, REG)
- case = 3
- elif isinstance(op1, REG) and isinstance(op2, REG):
- # STACK = OPERATION(REG, REG)
- case = 2
- else:
- case = 3
- # generate instructions according to the 'case' determined above
- if case == 1:
- # dstop == op1
- try:
- self.emit(mc, op1, op2)
- except FailedToImplement: # emit(STACK, STACK) combination
- mc.MOV(ecx, op2)
- self.emit(mc, op1, ecx)
- elif case == 2:
- # this case works for:
- # * REG = OPERATION(op1, op2) with op2 != REG
- # * STACK = OPERATION(REG, REG)
- mc.MOV(dstop, op1)
- self.emit(mc, dstop, op2)
- else:
- # most general case
- mc.MOV(ecx, op1)
- self.emit(mc, ecx, op2)
- mc.MOV(dstop, ecx)
+ xlast = True
+
+ if xlast:
+ dstop = op1 # in-place operation
+ # op1 and op2 must not be both in a stack location
+ if isinstance(op1, MODRM) and isinstance(op2, MODRM):
+ tmpop = allocator.create_scratch_reg(op2)
+ # neither op1 nor op2 can have been spilled here, as
+ # they are already in the stack
+ op2 = tmpop
+ allocator.end_clobber(tmpop)
+ allocator.release(x)
+ allocator.release(y)
+ allocator.create_exactly_at(self, op1)
+ else:
+ dstop = allocator.create_reg(self)
+ if dstop != op1: # else op1 was spilled just now, so its value
+ # is still in place in its old register
+ allocator.mc.MOV(dstop, op1) # make a copy in the new register
+ op2 = allocator.get_operand(y)
+ allocator.release(y)
+ self.emit(allocator.mc, dstop, op2)
class OpIntAdd(BinaryOp):
opname = 'int_add', 'uint_add'
@@ -211,64 +214,75 @@
class OpIntMul(Op2):
opname = 'int_mul'
- def generate3(self, mc, dstop, op1, op2):
- if isinstance(dstop, REG):
- tmpop = dstop
- else:
- tmpop = ecx
- if tmpop == op1:
- mc.IMUL(tmpop, op2)
- elif isinstance(op2, IMM32):
- mc.IMUL(tmpop, op1, op2)
+ side_effects = False
+
+ def generate(self, allocator):
+ op1 = allocator.get_operand(self.x)
+ op2 = allocator.get_operand(self.y)
+
+ if isinstance(op1, REG) and allocator.lastuse(self.x):
+ allocator.release(self.x)
+ allocator.release(self.y)
+ allocator.create_exactly_at(self, op1)
+ dstop = op1
+ elif isinstance(op2, REG) and allocator.lastuse(self.y):
+ allocator.release(self.x)
+ allocator.release(self.y)
+ allocator.create_exactly_at(self, op2)
+ dstop = op2
+ else:
+ dstop = allocator.create_reg(self)
+ allocator.release(self.x)
+ allocator.release(self.y)
+ mc = allocator.mc
+ if isinstance(op2, IMM32):
+ mc.IMUL(dstop, op1, op2)
elif isinstance(op1, IMM32):
- mc.IMUL(tmpop, op2, op1)
+ mc.IMUL(dstop, op2, op1)
+ elif dstop == op1:
+ mc.IMUL(dstop, op2)
+ elif dstop == op2:
+ mc.IMUL(dstop, op1)
else:
- if tmpop != op2:
- mc.MOV(tmpop, op2)
- mc.IMUL(tmpop, op1)
- if dstop != tmpop:
- mc.MOV(dstop, tmpop)
+ mc.MOV(dstop, op1)
+ mc.IMUL(dstop, op2)
class MulOrDivOp(Op2):
+ side_effects = False
- def generate3(self, mc, dstop, op1, op2):
+ def generate(self, allocator):
# XXX not very efficient but not very common operations either
- mc.PUSH(eax)
- mc.PUSH(edx)
- if op1 != eax:
- if op2 == eax:
- op2 = mem(esp, 4)
+ oldop1 = allocator.get_operand(self.x)
+ #oldop2 = allocator.get_operand(self.y)
+ allocator.clobber2(eax, edx)
+ op1 = allocator.get_operand(self.x)
+
+ mc = allocator.mc
+
+ if oldop1 != eax:
mc.MOV(eax, op1)
if self.input_is_64bits:
- if op2 == edx:
- op2 = mem(esp)
if self.unsigned:
mc.XOR(edx, edx)
else:
mc.CDQ()
- try:
- self.emit(mc, op2)
- except FailedToImplement:
- mc.MOV(ecx, op2)
- self.emit(mc, ecx)
- if dstop != self.reg_containing_result:
- mc.MOV(dstop, self.reg_containing_result)
- if dstop == edx:
- mc.ADD(esp, imm8(4))
- else:
- mc.POP(edx)
- if dstop == eax:
- mc.ADD(esp, imm8(4))
- else:
- mc.POP(eax)
+
+ self.generate2(allocator)
+
+ allocator.end_clobber(eax)
+ allocator.end_clobber(edx)
+ allocator.release(self.x)
+ allocator.release(self.y)
+ # the target register should still be free, see clobber2()
+ allocator.create_exactly_at(self, self.reg_containing_result)
class OpIntFloorDiv(MulOrDivOp):
opname = 'int_floordiv'
input_is_64bits = True
reg_containing_result = eax
unsigned = False
- @staticmethod
- def emit(mc, op2):
+
+ def generate2(self, allocator):
# from the PPC backend which has the same problem:
#
# grumble, the powerpc handles division when the signs of x
@@ -288,234 +302,295 @@
# 20/(-3) = -7,-1 -6, 2 # operand signs differ
# (-20)/(-3) = 6,-2 6,-2
#
+ tmp = allocator.create_scratch_reg()
+ op2 = allocator.get_operand(self.y)
+ mc = allocator.mc
if isinstance(op2, IMM32):
# if op2 is an immediate, we do an initial adjustment of operand 1
# so that we get directly the correct answer
if op2.value >= 0:
# if op1 is negative, subtract (op2-1)
- mc.MOV(ecx, edx) # -1 if op1 is negative, 0 otherwise
- mc.AND(ecx, imm(op2.value-1))
- mc.SUB(eax, ecx)
+ mc.MOV(tmp, edx) # -1 if op1 is negative, 0 otherwise
+ mc.AND(tmp, imm(op2.value-1))
+ mc.SUB(eax, tmp)
mc.SBB(edx, imm8(0))
else:
# if op1 is positive (or null), add (|op2|-1)
- mc.MOV(ecx, edx)
- mc.NOT(ecx) # -1 if op1 is positive, 0 otherwise
- mc.AND(ecx, imm(-op2.value-1))
- mc.ADD(eax, ecx)
+ mc.MOV(tmp, edx)
+ mc.NOT(tmp) # -1 if op1 is positive, 0 otherwise
+ mc.AND(tmp, imm(-op2.value-1))
+ mc.ADD(eax, tmp)
mc.ADC(edx, imm8(0))
- mc.MOV(ecx, op2)
- mc.IDIV(ecx)
+ mc.MOV(tmp, op2)
+ mc.IDIV(tmp)
else:
# subtract 1 to the result if the operand signs differ and
# the remainder is not zero
- mc.MOV(ecx, eax)
+ mc.MOV(tmp, eax)
mc.IDIV(op2)
- mc.XOR(ecx, op2)
- mc.SAR(ecx, imm8(31)) # -1 if signs differ, 0 otherwise
- mc.AND(ecx, edx) # nonnull if signs differ and edx != 0
- mc.CMP(ecx, imm8(1)) # no carry flag iff signs differ and edx != 0
+ mc.XOR(tmp, op2)
+ mc.SAR(tmp, imm8(31)) # -1 if signs differ, 0 otherwise
+ mc.AND(tmp, edx) # nonnull if signs differ and edx != 0
+ mc.CMP(tmp, imm8(1)) # no carry flag iff signs differ and edx != 0
mc.ADC(eax, imm8(-1)) # subtract 1 iff no carry flag
+ allocator.end_clobber(tmp)
class OpIntMod(MulOrDivOp):
opname = 'int_mod'
input_is_64bits = True
reg_containing_result = edx
unsigned = False
- @staticmethod
- def emit(mc, op2):
+
+ def generate2(self, allocator):
# Python i386
# 20/3 = 6, 2 6, 2
# (-20)/3 = -7, 1 -6,-2 # operand signs differ
# 20/(-3) = -7,-1 -6, 2 # operand signs differ
# (-20)/(-3) = 6,-2 6,-2
#
+ tmp = allocator.create_scratch_reg()
+ op2 = allocator.get_operand(self.y)
+ mc = allocator.mc
if isinstance(op2, IMM32):
- mc.MOV(ecx, op2)
- mc.IDIV(ecx)
+ mc.MOV(tmp, op2)
+ mc.IDIV(tmp)
# adjustment needed:
# if op2 > 0: if the result is negative, add op2 to it
# if op2 < 0: if the result is > 0, subtract |op2| from it
- mc.MOV(ecx, edx)
+ mc.MOV(tmp, edx)
if op2.value < 0:
- mc.NEG(ecx)
- mc.SAR(ecx, imm8(31))
- mc.AND(ecx, imm(op2.value))
- mc.ADD(edx, ecx)
+ mc.NEG(tmp)
+ mc.SAR(tmp, imm8(31))
+ mc.AND(tmp, imm(op2.value))
+ mc.ADD(edx, tmp)
else:
# if the operand signs differ and the remainder is not zero,
# add operand2 to the result
- mc.MOV(ecx, eax)
+ mc.MOV(tmp, eax)
mc.IDIV(op2)
- mc.XOR(ecx, op2)
- mc.SAR(ecx, imm8(31)) # -1 if signs differ, 0 otherwise
- mc.AND(ecx, edx) # nonnull if signs differ and edx != 0
- mc.CMOVNZ(ecx, op2) # == op2 if signs differ and edx != 0
- mc.ADD(edx, ecx)
+ mc.XOR(tmp, op2)
+ mc.SAR(tmp, imm8(31)) # -1 if signs differ, 0 otherwise
+ mc.AND(tmp, edx) # nonnull if signs differ and edx != 0
+ mc.CMOVNZ(tmp, op2) # == op2 if signs differ and edx != 0
+ mc.ADD(edx, tmp)
+ allocator.end_clobber(tmp)
class OpUIntMul(MulOrDivOp):
opname = 'uint_mul'
input_is_64bits = False
reg_containing_result = eax
unsigned = True
- emit = staticmethod(I386CodeBuilder.MUL)
+ def generate2(self, allocator):
+ op2 = allocator.get_operand(self.y)
+ allocator.mc.MUL(op2)
class OpUIntFloorDiv(MulOrDivOp):
opname = 'uint_floordiv'
input_is_64bits = True
reg_containing_result = eax
unsigned = True
- emit = staticmethod(I386CodeBuilder.DIV)
+ def generate2(self, allocator):
+ op2 = allocator.get_operand(self.y)
+ allocator.mc.DIV(op2)
class OpUIntMod(MulOrDivOp):
opname = 'uint_mod'
input_is_64bits = True
reg_containing_result = edx
unsigned = True
- emit = staticmethod(I386CodeBuilder.DIV)
+ def generate2(self, allocator):
+ op2 = allocator.get_operand(self.y)
+ allocator.mc.DIV(op2)
-class OpIntLShift(Op2):
- opname = 'int_lshift', 'uint_lshift'
- emit = staticmethod(I386CodeBuilder.SHL)
- def generate3(self, mc, dstop, op1, op2):
- # XXX not optimized
+class OpShift(Op2):
+ side_effects = False
+ countmax31 = False
+
+ def mark_used_vars(self, allocator):
+ allocator.using_inplace(self.x, self)
+ allocator.using(self.y)
+ # XXX this would be nice
+ #if not self.countmax31:
+ # allocator.suggests(self.y, ecx)
+
+ def generate(self, allocator):
+ op2 = allocator.get_operand(self.y)
+ mc = allocator.mc
if isinstance(op2, IMM32):
n = op2.value
if n < 0 or n >= 32:
- mc.MOV(dstop, imm8(0)) # shift out of range, result is zero
- return
+ # shift out of range
+ if self.countmax31:
+ n = 31 # case in which it's equivalent to a shift by 31
+ else:
+ # case in which the result is always zero
+ allocator.release(self.x)
+ allocator.release(self.y)
+ dstop = allocator.create_reg(self)
+ mc.XOR(dstop, dstop)
+ return
count = imm8(n)
else:
- mc.MOV(ecx, op2)
+ allocator.clobber(ecx)
+ op2 = allocator.get_operand(self.y)
+ if self.countmax31:
+ mc.MOV(ecx, imm8(31))
+ mc.CMP(op2, ecx)
+ mc.CMOVBE(ecx, op2)
+ else:
+ mc.MOV(ecx, op2)
+ allocator.release(self.y)
count = cl
- if dstop != op1:
- try:
- mc.MOV(dstop, op1)
- except FailedToImplement:
- mc.PUSH(op1)
- mc.POP(dstop)
+
+ if allocator.release(self.x):
+ dstop = allocator.get_operand(self.x) # in-place operation
+ allocator.create_exactly_at(self, dstop)
+ else:
+ dstop = allocator.create_reg(self)
+ srcop = allocator.get_operand(self.x)
+ mc.MOV(dstop, srcop) # make a copy in a new register
+
self.emit(mc, dstop, count)
if count == cl:
- mc.CMP(ecx, imm8(32))
- mc.SBB(ecx, ecx)
- mc.AND(dstop, ecx)
+ if not self.countmax31:
+ mc.CMP(ecx, imm8(32))
+ mc.SBB(ecx, ecx)
+ mc.AND(dstop, ecx)
+ allocator.end_clobber(ecx)
-class OpIntRShift(Op2):
- opname = 'int_rshift'
- def generate3(self, mc, dstop, op1, op2):
- # XXX not optimized
- if isinstance(op2, IMM32):
- n = op2.value
- if n < 0 or n >= 32:
- n = 31 # shift out of range, replace with 31
- count = imm8(n)
- else:
- mc.MOV(ecx, imm(31))
- mc.CMP(op2, ecx)
- mc.CMOVBE(ecx, op2)
- count = cl
- if dstop != op1:
- try:
- mc.MOV(dstop, op1)
- except FailedToImplement:
- mc.PUSH(op1)
- mc.POP(dstop)
- mc.SAR(dstop, count)
+class OpIntLShift(OpShift):
+ opname = 'int_lshift', 'uint_lshift'
+ emit = staticmethod(I386CodeBuilder.SHL)
-class OpUIntRShift(OpIntLShift):
+class OpUIntRShift(OpShift):
opname = 'uint_rshift'
emit = staticmethod(I386CodeBuilder.SHR)
+class OpIntRShift(OpShift):
+ opname = 'int_rshift'
+ emit = staticmethod(I386CodeBuilder.SAR)
+ countmax31 = True
+
class OpCompare2(Op2):
- result_kind = RK_CC
+ side_effects = False
+
def generate(self, allocator):
- srcop = allocator.get_operand(self.x)
- dstop = allocator.get_operand(self.y)
+ op1 = allocator.get_operand(self.x)
+ op2 = allocator.get_operand(self.y)
mc = allocator.mc
- # XXX optimize the case CMP(immed, reg-or-modrm)
+ cond = self.suggested_cc
try:
- mc.CMP(srcop, dstop)
+ mc.CMP(op1, op2)
except FailedToImplement:
- mc.MOV(ecx, srcop)
- mc.CMP(ecx, dstop)
- def copy(self):
- return self.__class__(self.x, self.y)
+ # try reversing the arguments, for CMP(immed, reg-or-modrm)
+ try:
+ mc.CMP(op2, op1)
+ except FailedToImplement:
+ # CMP(stack, stack)
+ reg = allocator.create_scratch_reg(op1)
+ mc.CMP(reg, op2)
+ allocator.end_clobber(reg)
+ else:
+ cond = cond_swapargs(cond) # worked with arguments reversed
+ allocator.release(self.x)
+ allocator.release(self.y)
+ allocator.create_in_cc(self, ccflags[cond])
class OpIntLt(OpCompare2):
opname = 'int_lt', 'char_lt'
- cc_result = Conditions['L']
+ suggested_cc = Conditions['L']
class OpIntLe(OpCompare2):
opname = 'int_le', 'char_le'
- cc_result = Conditions['LE']
+ suggested_cc = Conditions['LE']
class OpIntEq(OpCompare2):
opname = 'int_eq', 'char_eq', 'unichar_eq', 'ptr_eq', 'uint_eq'
- cc_result = Conditions['E']
+ suggested_cc = Conditions['E']
class OpIntNe(OpCompare2):
opname = 'int_ne', 'char_ne', 'unichar_ne', 'ptr_ne', 'uint_ne'
- cc_result = Conditions['NE']
+ suggested_cc = Conditions['NE']
class OpIntGt(OpCompare2):
opname = 'int_gt', 'char_gt'
- cc_result = Conditions['G']
+ suggested_cc = Conditions['G']
class OpIntGe(OpCompare2):
opname = 'int_ge', 'char_ge'
- cc_result = Conditions['GE']
+ suggested_cc = Conditions['GE']
class OpUIntLt(OpCompare2):
opname = 'uint_lt'
- cc_result = Conditions['B']
+ suggested_cc = Conditions['B']
class OpUIntLe(OpCompare2):
opname = 'uint_le'
- cc_result = Conditions['BE']
+ suggested_cc = Conditions['BE']
class OpUIntGt(OpCompare2):
opname = 'uint_gt'
- cc_result = Conditions['A']
+ suggested_cc = Conditions['A']
class OpUIntGe(OpCompare2):
opname = 'uint_ge'
- cc_result = Conditions['AE']
+ suggested_cc = Conditions['AE']
class JumpIf(Operation):
clobbers_cc = False
- result_kind = RK_NO_RESULT
- def __init__(self, gv_condition, targetbuilder, negate):
+ negate = False
+ def __init__(self, gv_condition, targetbuilder):
self.gv_condition = gv_condition
self.targetbuilder = targetbuilder
- self.negate = negate
- def allocate(self, allocator):
- allocator.using_cc(self.gv_condition)
+ def mark_used_vars(self, allocator):
+ allocator.using(self.gv_condition)
for gv in self.targetbuilder.inputargs_gv:
allocator.using(gv)
def generate(self, allocator):
- cc = self.gv_condition.cc_result
+ targetbuilder = self.targetbuilder
+ op = allocator.get_operand(self.gv_condition)
+ mc = allocator.mc
+ if isinstance(op, CCFLAG):
+ cc = op.cc
+ else:
+ allocator.clobber_cc()
+ op = allocator.get_operand(self.gv_condition)
+ mc.CMP(op, imm(0))
+ cc = Conditions['NE']
+ allocator.release(self.gv_condition)
+ operands = []
+ for gv in targetbuilder.inputargs_gv:
+ operands.append(allocator.get_operand(gv))
+ allocator.release(gv)
if self.negate:
cc = cond_negate(cc)
- mc = allocator.mc
- targetbuilder = self.targetbuilder
targetbuilder.set_coming_from(mc, insncond=cc)
- targetbuilder.inputoperands = [allocator.get_operand(gv)
- for gv in targetbuilder.inputargs_gv]
+ targetbuilder.inputoperands = operands
+ #assert targetbuilder.inputoperands.count(ebx) <= 1
+
+class JumpIfNot(JumpIf):
+ negate = True
class OpLabel(Operation):
- clobbers_cc = False
- result_kind = RK_NO_RESULT
+ # NB. this is marked to clobber the CC, because it cannot easily
+ # be saved/restored across a label. The problem is that someone
+ # might later try to jump to this label with a new value for
+ # the variable that is different from 0 or 1, i.e. which cannot
+ # be represented in the CC at all.
def __init__(self, lbl, args_gv):
self.lbl = lbl
self.args_gv = args_gv
- def allocate(self, allocator):
+ def mark_used_vars(self, allocator):
for v in self.args_gv:
allocator.using(v)
def generate(self, allocator):
+ operands = []
+ for v in self.args_gv:
+ operands.append(allocator.get_operand(v))
+ allocator.release(v)
lbl = self.lbl
lbl.targetaddr = allocator.mc.tell()
- lbl.targetstackdepth = allocator.required_frame_depth
- lbl.inputoperands = [allocator.get_operand(v) for v in self.args_gv]
+ lbl.inputoperands = operands
lbl.targetbuilder = None # done generating
class OpCall(Operation):
@@ -523,55 +598,57 @@
self.sigtoken = sigtoken
self.gv_fnptr = gv_fnptr
self.args_gv = args_gv
- def allocate(self, allocator):
- # XXX try to use eax for the result
+
+ def mark_used_vars(self, allocator):
allocator.using(self.gv_fnptr)
for v in self.args_gv:
allocator.using(v)
+
def generate(self, allocator):
- try:
- dstop = allocator.get_operand(self)
- except KeyError:
- dstop = None
mc = allocator.mc
- stack_align_words = PROLOGUE_FIXED_WORDS
- if dstop != eax:
- mc.PUSH(eax)
- if CALL_ALIGN > 1: stack_align_words += 1
- if dstop != edx:
- mc.PUSH(edx)
- if CALL_ALIGN > 1: stack_align_words += 1
args_gv = self.args_gv
- num_placeholders = 0
- if CALL_ALIGN > 1:
- stack_align_words += len(args_gv)
- stack_align_words &= CALL_ALIGN-1
- if stack_align_words > 0:
- num_placeholders = CALL_ALIGN - stack_align_words
- mc.SUB(esp, imm(WORD * num_placeholders))
- for i in range(len(args_gv)-1, -1, -1):
+
+ stackargs_i = []
+ for i in range(len(args_gv)):
srcop = allocator.get_operand(args_gv[i])
- mc.PUSH(srcop)
+ if isinstance(srcop, MODRM):
+ stackargs_i.append(i)
+ else:
+ mc.MOV(mem(esp, WORD * i), srcop)
+ allocator.release(args_gv[i])
+
+ allocator.clobber3(eax, edx, ecx)
+ allocator.reserve_extra_stack(len(args_gv))
+
+ if len(stackargs_i) > 0:
+ tmp = eax
+ for i in stackargs_i:
+ srcop = allocator.get_operand(args_gv[i])
+ mc.MOV(tmp, srcop)
+ mc.MOV(mem(esp, WORD * i), tmp)
+ allocator.release(args_gv[i])
+
fnop = allocator.get_operand(self.gv_fnptr)
if isinstance(fnop, IMM32):
mc.CALL(rel32(fnop.value))
else:
mc.CALL(fnop)
- mc.ADD(esp, imm(WORD * (len(args_gv) + num_placeholders)))
- if dstop != edx:
- mc.POP(edx)
- if dstop != eax:
- if dstop is not None:
- mc.MOV(dstop, eax)
- mc.POP(eax)
-def field_operand(mc, base, fieldtoken):
- # may use ecx
+ allocator.release(self.gv_fnptr)
+ allocator.end_clobber(eax)
+ allocator.end_clobber(edx)
+ allocator.end_clobber(ecx)
+ if allocator.operation_result_is_used(self):
+ allocator.create_exactly_at(self, eax)
+
+
+def field_operand(allocator, base, fieldtoken):
fieldoffset, fieldsize = fieldtoken
if isinstance(base, MODRM):
- mc.MOV(ecx, base)
- base = ecx
+ tmp = allocator.create_scratch_reg(base)
+ allocator.end_clobber(tmp)
+ base = tmp
elif isinstance(base, IMM32):
fieldoffset += base.value
base = None
@@ -581,8 +658,8 @@
else:
return mem (base, fieldoffset)
-def array_item_operand(mc, base, arraytoken, opindex):
- # may use ecx
+def array_item_operand(allocator, base, arraytoken, opindex):
+ tmp = None
_, startoffset, itemoffset = arraytoken
if isinstance(opindex, IMM32):
@@ -591,28 +668,31 @@
indexshift = 0
elif itemoffset in SIZE2SHIFT:
if not isinstance(opindex, REG):
- mc.MOV(ecx, opindex)
- opindex = ecx
+ tmp = allocator.create_scratch_reg(opindex)
+ opindex = tmp
indexshift = SIZE2SHIFT[itemoffset]
else:
- mc.IMUL(ecx, opindex, imm(itemoffset))
- opindex = ecx
+ tmp = allocator.create_scratch_reg()
+ allocator.mc.IMUL(tmp, opindex, imm(itemoffset))
+ opindex = tmp
indexshift = 0
- assert base is not ecx
if isinstance(base, MODRM):
- if opindex != ecx:
- mc.MOV(ecx, base)
- else: # waaaa
+ if tmp is None:
+ tmp = allocator.create_scratch_reg(base)
+ else: # let's avoid using two scratch registers
opindex = None
if indexshift > 0:
- mc.SHL(ecx, imm8(indexshift))
- mc.ADD(ecx, base)
- base = ecx
+ allocator.mc.SHL(tmp, imm8(indexshift))
+ allocator.mc.ADD(tmp, base)
+ base = tmp
elif isinstance(base, IMM32):
startoffset += base.value
base = None
+ if tmp is not None:
+ allocator.end_clobber(tmp)
+
if itemoffset == 1:
return memSIB8(base, opindex, indexshift, startoffset)
else:
@@ -620,209 +700,141 @@
class OpComputeSize(Operation):
clobbers_cc = False
+ side_effects = False
def __init__(self, varsizealloctoken, gv_length):
self.varsizealloctoken = varsizealloctoken
self.gv_length = gv_length
- def allocate(self, allocator):
+ def mark_used_vars(self, allocator):
allocator.using(self.gv_length)
def generate(self, allocator):
- dstop = allocator.get_operand(self)
srcop = allocator.get_operand(self.gv_length)
- mc = allocator.mc
- op_size = array_item_operand(mc, None, self.varsizealloctoken, srcop)
- try:
- mc.LEA(dstop, op_size)
- except FailedToImplement:
- mc.LEA(ecx, op_size)
- mc.MOV(dstop, ecx)
-
-def hard_store(mc, opmemtarget, opvalue, itemsize):
- # For the possibly hard cases of stores
- # Generates a store to 'opmemtarget' of size 'itemsize' == 1, 2 or 4.
- # If it is 1, opmemtarget must be a MODRM8; otherwise, it must be a MODRM.
- if itemsize == WORD:
- try:
- mc.MOV(opmemtarget, opvalue)
- except FailedToImplement:
- if opmemtarget.involves_ecx():
- mc.PUSH(opvalue)
- mc.POP(opmemtarget)
- else:
- mc.MOV(ecx, opvalue)
- mc.MOV(opmemtarget, ecx)
- else:
- must_pop_eax = False
- if itemsize == 1:
- if isinstance(opvalue, REG) and opvalue.lowest8bits:
- # a register whose lower 8 bits are directly readable
- opvalue = opvalue.lowest8bits
- elif isinstance(opvalue, IMM8):
- pass
- else:
- if opmemtarget.involves_ecx(): # grumble!
- mc.PUSH(eax)
- must_pop_eax = True
- scratch = eax
- else:
- scratch = ecx
- if opvalue.width == 1:
- mc.MOV(scratch.lowest8bits, opvalue)
- else:
- mc.MOV(scratch, opvalue)
- opvalue = scratch.lowest8bits
+ op_size = array_item_operand(allocator, None,
+ self.varsizealloctoken, srcop)
+ allocator.release(self.gv_length)
+ dstop = allocator.create_reg(self)
+ allocator.mc.LEA(dstop, op_size)
+
+class OpGetter(Operation):
+ side_effects = False
+ def generate(self, allocator):
+ opsource = self.generate_opsource(allocator)
+ dstop = allocator.create_reg(self)
+ if self.getwidth() == WORD:
+ allocator.mc.MOV(dstop, opsource)
else:
- assert itemsize == 2
- if isinstance(opvalue, MODRM) or type(opvalue) is IMM32:
- # no support for now to encode 16-bit immediates,
- # so we use a scratch register for this case too
- if opmemtarget.involves_ecx(): # grumble!
- mc.PUSH(eax)
- must_pop_eax = True
- scratch = eax
- else:
- scratch = ecx
- mc.MOV(scratch, opvalue)
- opvalue = scratch
- mc.o16() # prefix for the MOV below
- # and eventually, the real store:
- mc.MOV(opmemtarget, opvalue)
- if must_pop_eax:
- mc.POP(eax)
-
-def hard_load(mc, opdst, opmemsource, itemsize):
- # For the possibly hard cases of stores
- # Generates a load from 'opmemsource' of size 'itemsize' == 1, 2 or 4.
- # If it is 1, opmemtarget must be a MODRM8; otherwise, it must be a MODRM.
- if itemsize == WORD:
- try:
- mc.MOV(opdst, opmemsource)
- except FailedToImplement: # opdst is a MODRM
- if opmemsource.involves_ecx():
- mc.PUSH(opmemsource)
- mc.POP(opdst)
- else:
- mc.MOV(ecx, opmemsource)
- mc.MOV(opdst, ecx)
- else:
- try:
- mc.MOVZX(opdst, opmemsource)
- except FailedToImplement: # opdst is a MODRM
- if opmemsource.involves_ecx():
- mc.PUSH(eax)
- mc.MOVZX(eax, opmemsource)
- mc.MOV(opdst, eax)
- mc.POP(eax)
+ allocator.mc.MOVZX(dstop, opsource)
+
+class OpSetter(Operation):
+ def generate(self, allocator):
+ tmpval = None
+ width = self.getwidth()
+ opvalue = allocator.get_operand(self.gv_value)
+ if width == 1:
+ try:
+ opvalue = opvalue.lowest8bits()
+ except ValueError:
+ tmpval = allocator.create_scratch_reg8(opvalue)
+ opvalue = tmpval
else:
- mc.MOVZX(ecx, opmemsource)
- mc.MOV(opdst, ecx)
+ if isinstance(opvalue, MODRM8):
+ tmpval = allocator.create_scratch_reg8(opvalue)
+ opvalue = tmpval
+ else:
+ if isinstance(opvalue, MODRM):
+ tmpval = allocator.create_scratch_reg(opvalue)
+ opvalue = tmpval
+ optarget = self.generate_optarget(allocator)
+ if width == 2:
+ if isinstance(opvalue, IMM32):
+ opvalue = IMM16(opvalue.value)
+ allocator.mc.o16()
+ allocator.mc.MOV(optarget, opvalue)
+ if tmpval is not None:
+ allocator.end_clobber(tmpval)
-class OpGetField(Operation):
+class OpGetField(OpGetter):
clobbers_cc = False
def __init__(self, fieldtoken, gv_ptr):
self.fieldtoken = fieldtoken
self.gv_ptr = gv_ptr
- def allocate(self, allocator):
+ def getwidth(self):
+ _, fieldsize = self.fieldtoken
+ return fieldsize
+ def mark_used_vars(self, allocator):
allocator.using(self.gv_ptr)
- def generate(self, allocator):
- try:
- dstop = allocator.get_operand(self)
- except KeyError:
- return # result not used
+ def generate_opsource(self, allocator):
opptr = allocator.get_operand(self.gv_ptr)
- mc = allocator.mc
- opsource = field_operand(mc, opptr, self.fieldtoken)
- _, fieldsize = self.fieldtoken
- hard_load(mc, dstop, opsource, fieldsize)
+ opsource = field_operand(allocator, opptr, self.fieldtoken)
+ allocator.release(self.gv_ptr)
+ return opsource
-class OpSetField(Operation):
+class OpSetField(OpSetter):
clobbers_cc = False
- result_kind = RK_NO_RESULT
def __init__(self, fieldtoken, gv_ptr, gv_value):
self.fieldtoken = fieldtoken
self.gv_ptr = gv_ptr
self.gv_value = gv_value
- def allocate(self, allocator):
+ def getwidth(self):
+ _, fieldsize = self.fieldtoken
+ return fieldsize
+ def mark_used_vars(self, allocator):
allocator.using(self.gv_ptr)
allocator.using(self.gv_value)
- def generate(self, allocator):
+ def generate_optarget(self, allocator):
opptr = allocator.get_operand(self.gv_ptr)
- opvalue = allocator.get_operand(self.gv_value)
- mc = allocator.mc
- optarget = field_operand(mc, opptr, self.fieldtoken)
- _, fieldsize = self.fieldtoken
- hard_store(mc, optarget, opvalue, fieldsize)
+ optarget = field_operand(allocator, opptr, self.fieldtoken)
+ allocator.release(self.gv_ptr)
+ allocator.release(self.gv_value)
+ return optarget
-class OpGetArrayItem(Operation):
+class OpGetArrayItem(OpGetter):
def __init__(self, arraytoken, gv_array, gv_index):
self.arraytoken = arraytoken
self.gv_array = gv_array
self.gv_index = gv_index
- def allocate(self, allocator):
+ def getwidth(self):
+ _, _, itemsize = self.arraytoken
+ return itemsize
+ def mark_used_vars(self, allocator):
allocator.using(self.gv_array)
allocator.using(self.gv_index)
- def generate(self, allocator):
- try:
- dstop = allocator.get_operand(self)
- except KeyError:
- return # result not used
+ def generate_opsource(self, allocator):
oparray = allocator.get_operand(self.gv_array)
opindex = allocator.get_operand(self.gv_index)
- mc = allocator.mc
- opsource = array_item_operand(mc, oparray, self.arraytoken, opindex)
- _, _, itemsize = self.arraytoken
- hard_load(mc, dstop, opsource, itemsize)
+ opsource = array_item_operand(allocator, oparray,
+ self.arraytoken, opindex)
+ allocator.release(self.gv_array)
+ allocator.release(self.gv_index)
+ return opsource
+
+class OpGetArraySubstruct(OpGetArrayItem):
+ def generate(self, allocator):
+ opsource = self.generate_opsource(allocator)
+ dstop = allocator.create_reg(self)
+ allocator.mc.LEA(dstop, opsource)
-class OpSetArrayItem(Operation):
- result_kind = RK_NO_RESULT
+class OpSetArrayItem(OpSetter):
def __init__(self, arraytoken, gv_array, gv_index, gv_value):
self.arraytoken = arraytoken
self.gv_array = gv_array
self.gv_index = gv_index
self.gv_value = gv_value
- def allocate(self, allocator):
- allocator.using(self.gv_array)
- allocator.using(self.gv_index)
- allocator.using(self.gv_value)
- def generate(self, allocator):
- oparray = allocator.get_operand(self.gv_array)
- opindex = allocator.get_operand(self.gv_index)
- opvalue = allocator.get_operand(self.gv_value)
- mc = allocator.mc
- optarget = array_item_operand(mc, oparray, self.arraytoken, opindex)
+ def getwidth(self):
_, _, itemsize = self.arraytoken
- hard_store(mc, optarget, opvalue, itemsize)
-
-class OpGetArraySubstruct(Operation):
- def __init__(self, arraytoken, gv_array, gv_index):
- self.arraytoken = arraytoken
- self.gv_array = gv_array
- self.gv_index = gv_index
- def allocate(self, allocator):
+ return itemsize
+ def mark_used_vars(self, allocator):
allocator.using(self.gv_array)
allocator.using(self.gv_index)
- def generate(self, allocator):
- try:
- dstop = allocator.get_operand(self)
- except KeyError:
- return # result not used
+ allocator.using(self.gv_value)
+ def generate_optarget(self, allocator):
oparray = allocator.get_operand(self.gv_array)
opindex = allocator.get_operand(self.gv_index)
- mc = allocator.mc
- opsource = array_item_operand(mc, oparray, self.arraytoken, opindex)
- try:
- mc.LEA(dstop, opsource)
- except FailedToImplement:
- mc.LEA(ecx, opsource)
- mc.MOV(dstop, ecx)
-
-class OpGetFrameBase(Operation):
- def generate(self, allocator):
- try:
- dstop = allocator.get_operand(self)
- except KeyError:
- return # result not used
- mc = allocator.mc
- mc.MOV(dstop, ebp)
+ opsource = array_item_operand(allocator, oparray,
+ self.arraytoken, opindex)
+ allocator.release(self.gv_array)
+ allocator.release(self.gv_index)
+ allocator.release(self.gv_value)
+ return opsource
# ____________________________________________________________
@@ -890,7 +902,88 @@
assert 0 <= cond < INSN_JMP
return cond ^ 1
+def cond_swapargs(cond):
+ return COND_SWAPARGS[cond]
+
+COND_SWAPARGS = range(16)
+COND_SWAPARGS[Conditions['L']] = Conditions['G']
+COND_SWAPARGS[Conditions['G']] = Conditions['L']
+COND_SWAPARGS[Conditions['NL']] = Conditions['NG']
+COND_SWAPARGS[Conditions['NG']] = Conditions['NL']
+COND_SWAPARGS[Conditions['B']] = Conditions['A']
+COND_SWAPARGS[Conditions['A']] = Conditions['B']
+COND_SWAPARGS[Conditions['NB']] = Conditions['NA']
+COND_SWAPARGS[Conditions['NA']] = Conditions['NB']
+
SIZE2SHIFT = {1: 0,
2: 1,
4: 2,
8: 3}
+
+# ____________________________________________________________
+
+class CCFLAG(OPERAND):
+ _attrs_ = ['cc', 'SETCOND', 'load_into_cc']
+ def __init__(self, cond, load_into_cc):
+ self.cond = cond
+ self.cc = Conditions[cond]
+ self.SETCOND = getattr(I386CodeBuilder, 'SET' + cond)
+ self.load_into_cc = load_into_cc
+
+ def assembler(self):
+ return self.cond
+
+
+def load_into_cc_lt(mc, srcop):
+ mc.XOR(ecx, ecx)
+ mc.CMP(ecx, srcop)
+
+def load_into_cc_le(mc, srcop):
+ mc.MOV(ecx, imm8(1))
+ mc.CMP(ecx, srcop)
+
+def load_into_cc_eq(mc, srcop):
+ mc.CMP(srcop, imm8(1))
+
+def load_into_cc_ne(mc, srcop):
+ mc.CMP(srcop, imm8(0))
+
+load_into_cc_gt = load_into_cc_ne
+load_into_cc_ge = load_into_cc_eq
+
+ccflag_lt = CCFLAG('L', load_into_cc_lt)
+ccflag_le = CCFLAG('LE', load_into_cc_le)
+ccflag_eq = CCFLAG('E', load_into_cc_eq)
+ccflag_ne = CCFLAG('NE', load_into_cc_ne)
+ccflag_gt = CCFLAG('G', load_into_cc_gt)
+ccflag_ge = CCFLAG('GE', load_into_cc_ge)
+
+ccflag_ult = CCFLAG('B', load_into_cc_lt)
+ccflag_ule = CCFLAG('BE', load_into_cc_le)
+ccflag_ugt = CCFLAG('A', load_into_cc_gt)
+ccflag_uge = CCFLAG('AE', load_into_cc_ge)
+
+ccflags = [None] * 16
+ccflags[Conditions['L']] = ccflag_lt
+ccflags[Conditions['LE']] = ccflag_le
+ccflags[Conditions['E']] = ccflag_eq
+ccflags[Conditions['NE']] = ccflag_ne
+ccflags[Conditions['G']] = ccflag_gt
+ccflags[Conditions['GE']] = ccflag_ge
+ccflags[Conditions['B']] = ccflag_ult
+ccflags[Conditions['BE']] = ccflag_ule
+ccflags[Conditions['A']] = ccflag_ugt
+ccflags[Conditions['AE']] = ccflag_uge
+
+##def ccmov(mc, dstop, ccop):
+## XXX
+## if dstop != ccop:
+## ccop.SETCOND(mc, cl)
+## if isinstance(dstop, CCFLAG):
+## dstop.load_into_cc(mc, cl)
+## else:
+## try:
+## mc.MOVZX(dstop, cl)
+## except FailedToImplement:
+## mc.MOVZX(ecx, cl)
+## mc.MOV(dstop, ecx)
Modified: pypy/branch/new-jit-codegen/i386/regalloc.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/i386/regalloc.py (original)
+++ pypy/branch/new-jit-codegen/i386/regalloc.py Wed Feb 7 13:44:55 2007
@@ -2,6 +2,7 @@
"""
+import sys
from pypy.rlib.objectmodel import we_are_translated
from pypy.rpython.lltypesystem import lltype
from pypy.jit.codegen.i386.operation import *
@@ -10,7 +11,7 @@
class StackOpCache:
- INITIAL_STACK_EBP_OFS = -4
+ INITIAL_STACK_EBP_OFS = -1
stack_op_cache = StackOpCache()
stack_op_cache.lst = []
@@ -27,276 +28,541 @@
ofs = op.ofs_relative_to_ebp()
return StackOpCache.INITIAL_STACK_EBP_OFS - ofs / WORD
+def write_stack_reserve(mc, stackn):
+ addr = mc.tell()
+ offset = WORD * ((StackOpCache.INITIAL_STACK_EBP_OFS+1) - stackn)
+ mc.ADD(esp, IMM32(offset)) # always encode offset on 32 bits
+ return addr
+
+def write_stack_adj(mc, stackn):
+ addr = mc.tell()
+ offset = WORD * ((StackOpCache.INITIAL_STACK_EBP_OFS+1) - stackn)
+ mc.LEA(esp, fixedsize_esp_ofs(offset))
+ return addr
+
class RegAllocator(object):
- AVAILABLE_REGS = [eax, edx, ebx, esi, edi] # XXX ecx reserved for stuff
- # 'gv' -- GenVars, used as arguments and results of operations
- #
- # 'loc' -- location, a small integer that represents an abstract
- # register number
- #
- # 'operand' -- a concrete machine code operand, which can be a
- # register (ri386.eax, etc.) or a stack memory operand
-
- def __init__(self):
- self.nextloc = 0
- self.var2loc = {}
- self.available_locs = []
- self.force_loc2operand = {}
- self.force_operand2loc = {}
- self.initial_moves = []
- self.num_stack_locs = 0
+ def __init__(self, operations):
+ self.operations = operations
+ self.operationindex = len(operations)
+ self.lifetime = {} # {variable: operation_index}
+ self.suggested_location = {} # {variable: location}
+ self.var2loc = {gv_frame_base: ebp}
+
+ # ----------
def set_final(self, final_vars_gv):
for v in final_vars_gv:
self.using(v)
- def creating(self, v):
- try:
- loc = self.var2loc[v]
- except KeyError:
- pass
- else:
- if loc >= self.num_stack_locs:
- self.available_locs.append(loc) # now available again for reuse
-
- def using(self, v):
- if not v.is_const and v not in self.var2loc:
- try:
- loc = self.available_locs.pop()
- except IndexError:
- loc = self.nextloc
- self.nextloc += 1
- self.var2loc[v] = loc
-
- def creating_cc(self, v):
- if self.need_var_in_cc is v:
- # common case: v is a compare operation whose result is precisely
- # what we need to be in the CC
- self.need_var_in_cc = None
- self.creating(v)
-
- def save_cc(self):
- # we need a value to be in the CC, but we see a clobbering
- # operation, so we copy the original CC-creating operation down
- # past the clobbering operation.
- # <pedronis> completely obscure code
- # <arigo> yes, well, needs very careful reviewing I guess :-)
- v = self.need_var_in_cc
- if not we_are_translated():
- assert v in self.operations[:self.operationindex]
- v = v.copy()
- self.operations.insert(self.operationindex, v)
- v.allocate(self)
- self.need_var_in_cc = None
-
- def using_cc(self, v):
- assert isinstance(v, Operation)
- assert 0 <= v.cc_result < INSN_JMP
- if self.need_var_in_cc is not None and self.need_var_in_cc is not v:
- self.save_cc()
- self.need_var_in_cc = v
+ def set_final_at_loc(self, final_vars_gv, locations):
+ for i in range(len(final_vars_gv)):
+ v = final_vars_gv[i]
+ self.using(v)
+ self.suggested_location[v] = locations[i]
- def allocate_locations(self, operations):
- # assign locations to gvars
- self.operations = operations
- self.need_var_in_cc = None
- self.operationindex = len(operations)
- for i in range(len(operations)-1, -1, -1):
- v = operations[i]
- if (self.need_var_in_cc is not None and
- self.need_var_in_cc is not v and v.clobbers_cc):
- self.save_cc()
- kind = v.result_kind
- if kind == RK_WORD:
- self.creating(v)
- elif kind == RK_CC:
- self.creating_cc(v)
- v.allocate(self)
+ def compute_lifetimes(self):
+ for i in range(len(self.operations)-1, -1, -1):
self.operationindex = i
- if self.need_var_in_cc is not None:
- self.save_cc()
+ op = self.operations[i]
+ if not op.side_effects and op not in self.lifetime:
+ self.operations[i] = dead_operation # operation not used
+ else:
+ op.mark_used_vars(self)
+
+ def using(self, v):
+ if v.is_const or v in self.lifetime:
+ return False
+ else:
+ self.lifetime[v] = self.operationindex
+ return True # variable is dying here
- def force_var_operands(self, force_vars, force_operands, at_start):
- force_loc2operand = self.force_loc2operand
- force_operand2loc = self.force_operand2loc
- for i in range(len(force_vars)):
- v = force_vars[i]
- operand = force_operands[i]
+ def using_inplace(self, v, vtarget):
+ if self.using(v):
+ # this operation 'vtarget' can modify its argument 'v'
+ # in-place, and 'v' is not alive after the operation.
+ # Propagate the suggested location for 'vtarget' backwards to 'v'.
try:
- loc = self.var2loc[v]
+ self.suggested_location[v] = self.suggested_location[vtarget]
+ return True # got a suggestion
except KeyError:
- if at_start:
- pass # input variable not used anyway
- else:
- self.add_final_move(v, operand, make_copy=v.is_const)
+ pass
+ return False # got no suggestion
+
+ def suggests(self, v, loc):
+ self.suggested_location[v] = loc
+
+ def varsused(self):
+ return self.lifetime.keys()
+
+ # ----------
+
+ AVAILABLE_REGS = (eax.bitmask |
+ edx.bitmask |
+ ecx.bitmask |
+ ebx.bitmask |
+ esi.bitmask |
+ edi.bitmask)
+
+ def init_reg_alloc(self, inputvars_gv, inputlocations):
+ self.registers_free = self.AVAILABLE_REGS # bitmask
+ self.cc_used_by = None
+ self.stack_op_used = {}
+ self.nstackidx = 0
+ self.nstackmax = 0
+ self.vars_in_use = {} # {variable: dying_operation_index}
+ self.operationindex = 0
+ self.inputvars_gv = inputvars_gv
+ self.inputlocations = inputlocations
+
+ def force_loc_used(self, v, loc):
+ if isinstance(loc, MODRM):
+ assert loc not in self.stack_op_used
+ self.stack_op_used[loc] = None
+ n = stack_n_from_op(loc)
+ if n >= self.nstackmax:
+ self.nstackmax = n + 1
+ elif isinstance(loc, REG):
+ assert self.registers_free & loc.bitmask
+ self.registers_free &= ~loc.bitmask
+ elif isinstance(loc, CCFLAG):
+ self.cc_used_by = v
+ else:
+ raise AssertionError(loc)
+
+ def consume_loc(self, v, loc):
+ if isinstance(loc, MODRM):
+ if loc not in self.stack_op_used:
+ self.stack_op_used[loc] = None
+ return True
+ elif isinstance(loc, REG):
+ if self.registers_free & loc.bitmask:
+ self.registers_free &= ~loc.bitmask
+ return True
+ elif isinstance(loc, CCFLAG):
+ if self.cc_used_by is None:
+ self.cc_used_by = v
+ return True
+ return False
+
+ def _no_longer_in_use(self, v):
+ del self.vars_in_use[v]
+ loc = self.var2loc[v]
+ if isinstance(loc, CCFLAG):
+ assert self.cc_used_by is v
+ self._mark_loc_as_free(loc)
+
+ def _mark_loc_as_free(self, loc):
+ if isinstance(loc, MODRM):
+ del self.stack_op_used[loc]
+ elif isinstance(loc, REG):
+ self.registers_free |= loc.bitmask
+ elif isinstance(loc, CCFLAG):
+ self.cc_used_by = None
+
+ def generate_operations(self, mc):
+ if not we_are_translated():
+ print
+
+ # reserve locations for the inputvars
+ for i in range(len(self.inputvars_gv)):
+ v = self.inputvars_gv[i]
+ if v in self.lifetime: # else: input argument is not used
+ loc = self.inputlocations[i]
+ self.var2loc[v] = loc
+ self.vars_in_use[v] = self.lifetime[v]
+ self.force_loc_used(v, loc)
+ if not we_are_translated():
+ print 'in %20s: %s' % (loc, short(v))
+
+ self._check()
+ self.mc = mc
+ # Generate all operations.
+ # Actual registers or stack locations are allocated as we go.
+ for i in range(len(self.operations)):
+ op = self.operations[i]
+ if op.clobbers_cc:
+ self.clobber_cc()
+ self._check()
+ op.generate(self)
+ if not we_are_translated():
+ self._showprogress()
+ self.operationindex = i + 1
+
+ def _showprogress(self):
+ class Collector:
+ def __init__(self):
+ self.lst = []
+ self.using = self.lst.append
+ def using_inplace(self, v, _):
+ self.lst.append(v)
+ def suggests(self, v, loc):
+ pass
+ col = Collector()
+ i = self.operationindex
+ op = self.operations[i]
+ op.mark_used_vars(col)
+ args = [short(v) for v in col.lst]
+ args = ', '.join(args)
+ print ' | %20s: %s (%s)' % (self.var2loc.get(op, ''),
+ short(op), args)
+ for v, endtime in self.vars_in_use.items():
+ assert endtime > i
+ self._check()
+
+ def _use_another_stack_loc(self):
+ for i in range(self.nstackidx, self.nstackmax):
+ loc = stack_op(i)
+ if loc not in self.stack_op_used:
+ self.nstackidx = i + 1
+ break
+ else:
+ for i in range(self.nstackidx):
+ loc = stack_op(i)
+ if loc not in self.stack_op_used:
+ self.nstackidx = i + 1
+ break
else:
- # we need to make of copy of this var if we have conflicting
- # requirements about where it should go:
- # * its location is forced to another operand
- # * the operand is assigned to another location
- # * it should be in the stack, but it is not
- if (loc in force_loc2operand or operand in force_operand2loc or
- (loc < self.num_stack_locs and not (
- isinstance(operand, MODRM)
- and operand.is_relative_to_ebp()))):
- if at_start:
- self.initial_moves.append((loc, operand))
- else:
- self.add_final_move(v, operand, make_copy=True)
- else:
- force_loc2operand[loc] = operand
- force_operand2loc[operand] = loc
-
- def add_final_move(self, v, targetoperand, make_copy):
- if make_copy:
- v = OpSameAs(v)
- self.operations.append(v)
- loc = self.nextloc
- self.nextloc += 1
+ i = self.nstackidx = self.nstackmax
+ self.nstackmax = i + 1
+ loc = stack_op(i)
+ assert loc not in self.stack_op_used
+ self.stack_op_used[loc] = None
+ return loc
+
+ def reserve_extra_stack(self, extra):
+ max = self.nstackmax
+ base = max - extra
+ if base < 0:
+ base = 0
+ while max > base and stack_op(max-1) not in self.stack_op_used:
+ max -= 1
+ self.nstackmax = max + extra
+
+ def get_operand(self, v):
+ if v.is_const:
+ return imm(v.revealconst(lltype.Signed))
+ else:
+ return self.var2loc[v]
+
+ def _use_next_modrm(self, v, regnum_must_be_before=8):
+ """Select the next mod/rm location to use for the new operation 'v'.
+ If 'v' is None, this will always return a register; else it might
+ decide to immediately create 'v' in a stack location.
+ """
+ #print self.registers_free
+ if self.registers_free:
+ for i in range(regnum_must_be_before-1, -1, -1):
+ if self.registers_free & (1 << i):
+ self.registers_free &= ~ (1 << i)
+ return registers[i]
+ # spill the register holding the variable that has the longest
+ # time remaining to live (it may be our 'v' itself)
+ if v is None:
+ dyinglimit = self.operationindex # must pick vars dying after that
+ spillvar = None
+ else:
+ dyinglimit = self.lifetime[v]
+ spillvar = v # initial guess, can be overridden in the loop below
+ regloc = None
+ for v1, dying in self.vars_in_use.iteritems():
+ if dying > dyinglimit:
+ loc = self.var2loc[v1]
+ if not isinstance(loc, REG):
+ continue
+ if loc.op >= regnum_must_be_before:
+ continue # never reached if regnum_must_be_before == 8
+ regloc = loc
+ dyinglimit = dying
+ spillvar = v1
+ if spillvar is None:
+ raise OutOfRegistersError
+ #print 'time span of %s: now is %d, lives until %d' % (
+ # v, self.operationindex, self.lifetime[v])
+ if spillvar is v:
+ return self._use_another_stack_loc()
+ else:
+ assert regloc is not None
+ self._spill(spillvar, regloc)
+ return regloc
+
+ def _spill(self, spillvar, oldloc):
+ spillloc = self._use_another_stack_loc()
+ if not we_are_translated():
+ print ' # %20s: SPILL %s' % (spillloc, oldloc)
+ self.mc.MOV(spillloc, oldloc)
+ self.var2loc[spillvar] = spillloc
+ return spillloc
+
+ def _use_next_reg(self):
+ return self._use_next_modrm(None)
+
+ def _use_next_reg_abcd(self):
+ return self._use_next_modrm(None, regnum_must_be_before=4)
+
+ def _created(self, v, loc):
+ assert v not in self.var2loc
+ self.vars_in_use[v] = ltime = self.lifetime[v]
+ assert ltime > self.operationindex
self.var2loc[v] = loc
- self.force_loc2operand[loc] = targetoperand
- def allocate_registers(self):
- # assign registers to locations that don't have one already
- force_loc2operand = self.force_loc2operand
- operands = []
- seen_regs = 0
- seen_stackn = {}
- last_seen_stackn = -1
- for op in force_loc2operand.values():
- if isinstance(op, REG):
- seen_regs |= 1 << op.op
- elif isinstance(op, MODRM):
- n = stack_n_from_op(op)
- seen_stackn[n] = None
- if n > last_seen_stackn:
- last_seen_stackn = n
- i = 0
- stackn = 0
- num_stack_locs = self.num_stack_locs
- for loc in range(self.nextloc):
- try:
- operand = force_loc2operand[loc]
- except KeyError:
- try:
- # try to grab the next free register,
- # unless this location is forced to go to the stack
- if loc < num_stack_locs:
- raise IndexError
- while True:
- operand = RegAllocator.AVAILABLE_REGS[i]
- i += 1
- if not (seen_regs & (1 << operand.op)):
- break
- except IndexError:
- while stackn in seen_stackn:
- stackn += 1
- operand = stack_op(stackn)
- stackn += 1
- operands.append(operand)
- self.operands = operands
- if stackn <= last_seen_stackn:
- stackn = last_seen_stackn + 1
- self.required_frame_depth = stackn
-
- def get_operand(self, gv_source):
- if gv_source.is_const:
- return imm(gv_source.revealconst(lltype.Signed))
+ def release(self, v):
+ """Stop using argument 'v'. Must be called for each used argument."""
+ ok = self.lastuse(v) and v in self.vars_in_use
+ if ok:
+ self._no_longer_in_use(v)
+ return ok
+
+ def lastuse(self, v):
+ """Is this the last time the argument 'v' is used?"""
+ if v.is_const:
+ return False
else:
- loc = self.var2loc[gv_source]
- return self.operands[loc]
+ endtime = self.lifetime[v]
+ assert endtime >= self.operationindex
+ return endtime == self.operationindex
+
+ def create(self, v, suggested_loc=None):
+ """Create the result of the operation 'v', possibly at the
+ suggested location. CAN SPILL ONE REGISTER."""
+ if suggested_loc is not None and self.consume_loc(v, suggested_loc):
+ self._created(v, suggested_loc)
+ return suggested_loc
+ suggested_loc = self.suggested_location.get(v, None)
+ if suggested_loc is not None and self.consume_loc(v, suggested_loc):
+ self._created(v, suggested_loc)
+ return suggested_loc
+ loc = self._use_next_modrm(v)
+ self._created(v, loc)
+ return loc
+
+ def create_reg(self, v):
+ """Create the result of the operation 'v' in any register
+ currently available. CAN SPILL ONE REGISTER."""
+ suggested_loc = self.suggested_location.get(v, None)
+ if isinstance(suggested_loc, REG):
+ if self.consume_loc(v, suggested_loc):
+ self._created(v, suggested_loc)
+ return suggested_loc
+ loc = self._use_next_reg()
+ self._created(v, loc)
+ return loc
+
+ def create_exactly_at(self, v, loc):
+ """Create the result of the operation 'v' at 'loc'."""
+ ok = self.consume_loc(v, loc)
+ assert ok
+ self._created(v, loc)
+
+ def create_in_cc(self, v, ccloc):
+ """Create the result of the operation 'v' in the given cc flags.
+ Doesn't move stuff around."""
+ assert self.cc_used_by is None
+ self._created(v, ccloc)
+ self.cc_used_by = v
+
+ def create_scratch_reg(self, srcloc=None):
+ """Return a scratch register for the current operation.
+ Warning, this might be the same register as one of the input args.
+ CAN SPILL ONE REGISTER. You must eventually call end_clobber()."""
+ reg = self._use_next_reg()
+ if srcloc is not None and reg is not srcloc:
+ self.mc.MOV(reg, srcloc)
+ return reg
+
+ def create_scratch_reg8(self, srcloc=None):
+ reg32 = self._use_next_reg_abcd()
+ reg8 = reg32.lowest8bits()
+ if srcloc is not None and reg8 is not srcloc and reg32 is not srcloc:
+ if srcloc.width == 1:
+ self.mc.MOV(reg8, srcloc)
+ else:
+ self.mc.MOV(reg32, srcloc)
+ return reg8
+
+ def operation_result_is_used(self, v):
+ return v in self.lifetime
+
+ def clobber(self, reg):
+ """Clobbers a register, i.e. move away a value that would be there.
+ It might go to a different register or to the stack.
+ You must eventually call end_clobber()."""
+ assert isinstance(reg, REG)
+ if not self.registers_free & reg.bitmask:
+ for v1 in self.vars_in_use:
+ if self.var2loc[v1] == reg:
+ self._move_away(v1)
+ break
+ assert self.registers_free & reg.bitmask
+ self.registers_free &= ~reg.bitmask
+
+ def clobber2(self, reg1, reg2):
+ """Clobbers two registers. Unlike two individual clobber() calls,
+ where the first call might overwrite the other reg, this one
+ preserves the current content of both 'reg1' and 'reg2'.
+ You must eventually call end_clobber() twice."""
+ if not self.registers_free & reg2.bitmask:
+ # order trick: if reg2 is free but reg1 used, doing clobber() in
+ # the following order could first move reg1 to reg2, and then
+ # immediately away from reg2.
+ self.clobber(reg1) # <- here reg1 cannot go to reg2
+ self.clobber(reg2)
+ else:
+ self.clobber(reg2) # reg2 is free, so it doesn't go anywhere
+ self.clobber(reg1)
+
+ def clobber3(self, reg1, reg2, reg3):
+ if not self.registers_free & reg3.bitmask:
+ self.clobber2(reg1, reg2) # they cannot go to reg3
+ self.clobber(reg3)
+ else:
+ self.clobber(reg3) # free, so doesn't go anywhere
+ self.clobber2(reg1, reg2)
+
+ def end_clobber(self, reg):
+ assert isinstance(reg, REG)
+ self.registers_free |= reg.bitmask
+
+ def clobber_cc(self):
+ v = self.cc_used_by
+ if v is not None:
+ self.cc_used_by = None
+ # pick a newloc that is either one of [eax, ecx, edx, ebx]
+ # or a stack location
+ oldloc = self.var2loc[v]
+ newloc = self._use_next_modrm(v, regnum_must_be_before=4)
+ if not we_are_translated():
+ print ' # %20s: MOVE AWAY FROM %s' % (newloc, oldloc)
+ assert isinstance(oldloc, CCFLAG)
+ mc = self.mc
+ newloc8 = newloc.lowest8bits()
+ if isinstance(newloc, REG):
+ oldloc.SETCOND(mc, newloc8)
+ mc.MOVZX(newloc, newloc8)
+ else:
+ mc.MOV(newloc, imm8(0))
+ oldloc.SETCOND(mc, newloc8)
+ self._mark_loc_as_free(oldloc)
+ self.var2loc[v] = newloc
+
+ def lock(self, loc):
+ """Temporarily prevent 'loc' from being overwritten by the
+ functions marked as 'moves stuff around'. Return True if the
+ lock is sucessful, False if the location was not free in the
+ first place."""
+ return self.consume_loc(None, loc)
+
+ def unlock(self, loc):
+ """Call sometime after a lock() that returned True."""
+ self._mark_loc_as_free(loc)
+
+ def _move_away(self, v):
+ # move 'v' away, into a newly allocated register or stack location,
+ # possibly spilling another register
+ oldloc = self.var2loc[v]
+ newloc = self._use_next_modrm(v)
+ if not we_are_translated():
+ print ' # %20s: MOVE AWAY FROM %s' % (newloc, oldloc)
+ self.mc.MOV(newloc, oldloc)
+ self._mark_loc_as_free(oldloc)
+ self.var2loc[v] = newloc
+ return newloc
- def load_location_with(self, loc, gv_source):
- dstop = self.operands[loc]
- srcop = self.get_operand(gv_source)
- if srcop != dstop:
- self.mc.MOV(dstop, srcop)
- return dstop
-
- def generate_initial_moves(self):
- initial_moves = self.initial_moves
- # first make sure that the reserved stack frame is big enough
- last_n = self.required_frame_depth - 1
- for loc, srcoperand in initial_moves:
- if isinstance(srcoperand, MODRM):
- n = stack_n_from_op(srcoperand)
- if last_n < n:
- last_n = n
- if last_n >= 0:
- if CALL_ALIGN > 1:
- last_n = (last_n & ~(CALL_ALIGN-1)) + (CALL_ALIGN-1)
- self.required_frame_depth = last_n + 1
- self.mc.LEA(esp, stack_op(last_n))
+ def _check(self):
+ if not we_are_translated():
+ def unpackbitmask(x):
+ return dict.fromkeys([r for r in registers if x & r.bitmask])
+ rf = unpackbitmask(self.AVAILABLE_REGS)
+ locs_seen = {}
+ for v in self.vars_in_use:
+ loc = self.var2loc[v]
+ assert loc not in locs_seen
+ locs_seen[loc] = v
+ if isinstance(loc, REG):
+ del rf[loc]
+ assert unpackbitmask(self.registers_free) == rf
+
+ # ----------
+
+ def generate_final_moves(self, final_vars_gv, locations):
# XXX naive algo for now
- for loc, srcoperand in initial_moves:
- if self.operands[loc] != srcoperand:
- self.mc.PUSH(srcoperand)
- initial_moves.reverse()
- for loc, srcoperand in initial_moves:
- if self.operands[loc] != srcoperand:
- self.mc.POP(self.operands[loc])
-
- def randomize_stack(self):
- import random
- last_n = self.required_frame_depth - 1
- for i in range(last_n+1, last_n+50):
- self.mc.MOV(ecx, stack_op(i))
- self.mc.LEA(ecx, mem(ecx, random.randrange(-sys.maxint,
- sys.maxint)))
- self.mc.MOV(stack_op(i), ecx)
- self.mc.LEA(ecx, mem(ecx, random.randrange(-sys.maxint,
- sys.maxint)))
-
- def generate_operations(self):
- for v in self.operations:
- if DEBUG_STACK:
- self.randomize_stack()
- v.generate(self)
- cc = v.cc_result
- if cc >= 0 and v in self.var2loc:
- # force a comparison instruction's result into a
- # regular location
- dstop = self.get_operand(v)
- mc = self.mc
- insn = EMIT_SETCOND[cc]
- insn(mc, cl)
- try:
- mc.MOVZX(dstop, cl)
- except FailedToImplement:
- mc.MOVZX(ecx, cl)
- mc.MOV(dstop, ecx)
- if DEBUG_STACK:
- self.randomize_stack()
-
- def force_stack_storage(self, lst):
- # this is called at the very beginning, so the 'loc' numbers
- # computed here are the smaller ones
- N = 0
- for v, place in lst:
- self.using(v)
- loc = self.var2loc[v]
- if loc >= N:
- N = loc + 1
- self.num_stack_locs = N
-
- def save_storage_places(self, lst):
- for v, place in lst:
- loc = self.var2loc[v]
- operand = self.operands[loc]
- place.offset = operand.ofs_relative_to_ebp()
+ pops = []
+ for i in range(len(final_vars_gv)):
+ v = final_vars_gv[i]
+ if not v.is_const:
+ srcloc = self.var2loc[v]
+ dstloc = locations[i]
+ if srcloc != dstloc:
+ if not we_are_translated():
+ print ' > %20s--->->->---%s' % (srcloc, dstloc)
+ if isinstance(srcloc, CCFLAG):
+ self.mc.PUSH(imm8(0))
+ srcloc.SETCOND(self.mc, mem8(esp))
+ else:
+ self.mc.PUSH(srcloc)
+ pops.append(dstloc)
+ while pops:
+ dstloc = pops.pop()
+ self.mc.POP(dstloc)
+ for i in range(len(final_vars_gv)):
+ v = final_vars_gv[i]
+ if v.is_const:
+ dstloc = locations[i]
+ self.mc.MOV(dstloc, imm(v.revealconst(lltype.Signed)))
+
+
+class OutOfRegistersError(Exception):
+ pass
+def short(op, memo={}):
+ key = op.__class__.__name__
+ d = memo.setdefault(key, {})
+ try:
+ n = d[op]
+ except KeyError:
+ n = d[op] = len(d)
+ return '%s-%d' % (key, n)
+
+# ____________________________________________________________
+
+class DeadOperation(Operation):
+ clobbers_cc = False
+ side_effects = False
+ def mark_used_vars(self, allocator):
+ pass
+ def generate(self, allocator):
+ pass
+dead_operation = DeadOperation()
+forget_stack_storage = DeadOperation()
-class StorageInStack(GenVar):
+class StorageInStack(Op1):
"""Place of a variable that must live in the stack. Its position is
- choosen by the register allocator and put in the 'stackn' attribute."""
- offset = 0
+ choosen by the register allocator and put in the 'offset' attribute."""
+
+ def generate(self, allocator):
+ # patch the lifetime of the variable if needed (XXX a bit slow)
+ x = self.x
+ i = allocator.lifetime.get(x, allocator.operationindex)
+ operations = allocator.operations
+ while i < len(operations):
+ if operations[i] is forget_stack_storage:
+ break
+ i += 1
+ allocator.lifetime[x] = i
+ allocator.vars_in_use[x] = i
+ # force it to be in the stack
+ srcop = allocator.get_operand(x)
+ if not isinstance(srcop, MODRM):
+ oldop = srcop
+ srcop = allocator._spill(x, srcop)
+ allocator._mark_loc_as_free(oldop)
+ # record its location
+ self.offset = srcop.ofs_relative_to_ebp()
+ # for places, self.x would keep lots of other Operations alive
+ self.x = None
def get_offset(self):
- assert self.offset != 0 # otherwise, RegAllocator bug
return self.offset
-
-class Place(StorageInStack):
- pass
+gv_frame_base = GenVar()
Modified: pypy/branch/new-jit-codegen/i386/rgenop.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/i386/rgenop.py (original)
+++ pypy/branch/new-jit-codegen/i386/rgenop.py Wed Feb 7 13:44:55 2007
@@ -6,8 +6,10 @@
from pypy.jit.codegen.model import ReplayBuilder, dummy_var
from pypy.jit.codegen.i386.codebuf import CodeBlockOverflow
from pypy.jit.codegen.i386.operation import *
-from pypy.jit.codegen.i386.regalloc import RegAllocator, StorageInStack, Place
-from pypy.jit.codegen.i386.regalloc import DEBUG_STACK
+from pypy.jit.codegen.i386.regalloc import RegAllocator
+from pypy.jit.codegen.i386.regalloc import DEBUG_STACK, forget_stack_storage
+from pypy.jit.codegen.i386.regalloc import gv_frame_base, StorageInStack
+from pypy.jit.codegen.i386.regalloc import write_stack_reserve, write_stack_adj
from pypy.jit.codegen import conftest
from pypy.rpython.annlowlevel import llhelper
@@ -84,16 +86,18 @@
class FlexSwitch(CodeGenSwitch):
REG = eax
- def __init__(self, rgenop, inputargs_gv, inputoperands):
+ def __init__(self, rgenop, graphctx, inputargs_gv, inputoperands):
self.rgenop = rgenop
+ self.graphctx = graphctx
self.inputargs_gv = inputargs_gv
self.inputoperands = inputoperands
self.defaultcaseaddr = 0
def initialize(self, mc):
+ self.graphctx.write_stack_adj(mc, initial=False)
self._reserve(mc)
- default_builder = Builder(self.rgenop, self.inputargs_gv,
- self.inputoperands)
+ default_builder = Builder(self.rgenop, self.graphctx,
+ self.inputargs_gv, self.inputoperands)
start = self.nextfreepos
end = self.endfreepos
fullmc = self.rgenop.InMemoryCodeBuilder(start, end)
@@ -123,8 +127,8 @@
def add_case(self, gv_case):
rgenop = self.rgenop
- targetbuilder = Builder(self.rgenop, self.inputargs_gv,
- self.inputoperands)
+ targetbuilder = Builder(self.rgenop, self.graphctx,
+ self.inputargs_gv, self.inputoperands)
try:
self._add_case(gv_case, targetbuilder)
except CodeBlockOverflow:
@@ -206,12 +210,12 @@
class Builder(GenBuilder):
coming_from = 0
update_defaultcaseaddr_of = None
- force_in_stack = None
paused_alive_gv = None
order_dependency = None
- def __init__(self, rgenop, inputargs_gv, inputoperands):
+ def __init__(self, rgenop, graphctx, inputargs_gv, inputoperands):
self.rgenop = rgenop
+ self.graphctx = graphctx
self.inputargs_gv = inputargs_gv
self.inputoperands = inputoperands
self.operations = []
@@ -219,34 +223,27 @@
def start_writing(self):
self.paused_alive_gv = None
- def generate_block_code(self, final_vars_gv, force_vars=[],
- force_operands=[],
- renaming=True,
- minimal_stack_depth=0):
+ def generate_block_code(self, final_vars_gv, force_vars=None,
+ force_operands=None,
+ renaming=True):
if self.order_dependency is not None:
self.order_dependency.force_generate_code()
self.order_dependency = None
- allocator = RegAllocator()
- if self.force_in_stack is not None:
- allocator.force_stack_storage(self.force_in_stack)
- allocator.set_final(final_vars_gv)
+ allocator = RegAllocator(self.operations)
+ if final_vars_gv is not force_vars:
+ allocator.set_final(final_vars_gv)
+ if force_vars is not None:
+ allocator.set_final_at_loc(force_vars, force_operands)
if not renaming:
- final_vars_gv = allocator.var2loc.keys() # unique final vars
- allocator.allocate_locations(self.operations)
- allocator.force_var_operands(force_vars, force_operands,
- at_start=False)
- allocator.force_var_operands(self.inputargs_gv, self.inputoperands,
- at_start=True)
- allocator.allocate_registers()
- if allocator.required_frame_depth < minimal_stack_depth:
- allocator.required_frame_depth = minimal_stack_depth
+ final_vars_gv = allocator.varsused() # unique final vars
+ allocator.compute_lifetimes()
+ allocator.init_reg_alloc(self.inputargs_gv, self.inputoperands)
mc = self.start_mc()
- allocator.mc = mc
- allocator.generate_initial_moves()
- allocator.generate_operations()
- if self.force_in_stack is not None:
- allocator.save_storage_places(self.force_in_stack)
- self.force_in_stack = None
+ allocator.generate_operations(mc)
+ if force_vars is not None:
+ allocator.generate_final_moves(force_vars, force_operands)
+ #print 'NSTACKMAX==============>', allocator.nstackmax
+ self.graphctx.ensure_stack_vars(allocator.nstackmax)
del self.operations[:]
if renaming:
self.inputargs_gv = [GenVar() for v in final_vars_gv]
@@ -257,24 +254,12 @@
return mc
def enter_next_block(self, kinds, args_gv):
- if self.force_in_stack is not None:
- # force_in_stack would keep the variables alive until the end
- # of the whole mc block, i.e. past the OpSameAs that we are
- # about to introduce => duplication of the value.
- mc = self.generate_block_code(args_gv)
- assert len(self.inputargs_gv) == len(args_gv)
- args_gv[:len(args_gv)] = self.inputargs_gv
- self.set_coming_from(mc)
- mc.done()
- self.rgenop.close_mc(mc)
- self.start_writing()
- else:
- # otherwise, we get better register allocation if we write a
- # single larger mc block
- for i in range(len(args_gv)):
- op = OpSameAs(args_gv[i])
- args_gv[i] = op
- self.operations.append(op)
+ # we get better register allocation if we write a single large mc block
+ self.operations.append(forget_stack_storage)
+ for i in range(len(args_gv)):
+ op = OpSameAs(args_gv[i])
+ args_gv[i] = op
+ self.operations.append(op)
lbl = Label(self)
lblop = OpLabel(lbl, args_gv)
self.operations.append(lblop)
@@ -313,23 +298,18 @@
self.coming_from = 0
return mc
- def _jump_if(self, gv_condition, args_for_jump_gv, negate):
- newbuilder = Builder(self.rgenop, list(args_for_jump_gv), None)
+ def _jump_if(self, cls, gv_condition, args_for_jump_gv):
+ newbuilder = Builder(self.rgenop, self.graphctx,
+ list(args_for_jump_gv), None)
newbuilder.order_dependency = self
- # if the condition does not come from an obvious comparison operation,
- # e.g. a getfield of a Bool or an input argument to the current block,
- # then insert an OpIntIsTrue
- if gv_condition.cc_result < 0 or gv_condition not in self.operations:
- gv_condition = OpIntIsTrue(gv_condition)
- self.operations.append(gv_condition)
- self.operations.append(JumpIf(gv_condition, newbuilder, negate=negate))
+ self.operations.append(cls(gv_condition, newbuilder))
return newbuilder
def jump_if_false(self, gv_condition, args_for_jump_gv):
- return self._jump_if(gv_condition, args_for_jump_gv, True)
+ return self._jump_if(JumpIfNot, gv_condition, args_for_jump_gv)
def jump_if_true(self, gv_condition, args_for_jump_gv):
- return self._jump_if(gv_condition, args_for_jump_gv, False)
+ return self._jump_if(JumpIf, gv_condition, args_for_jump_gv)
def finish_and_goto(self, outputargs_gv, targetlbl):
operands = targetlbl.inputoperands
@@ -342,20 +322,20 @@
self.start_writing()
operands = targetlbl.inputoperands
assert operands is not None
- mc = self.generate_block_code(outputargs_gv, outputargs_gv, operands,
- minimal_stack_depth = targetlbl.targetstackdepth)
+ mc = self.generate_block_code(outputargs_gv, outputargs_gv, operands)
mc.JMP(rel32(targetlbl.targetaddr))
mc.done()
self.rgenop.close_mc(mc)
def finish_and_return(self, sigtoken, gv_returnvar):
- mc = self.generate_block_code([gv_returnvar], [gv_returnvar], [eax])
+ gvs = [gv_returnvar]
+ mc = self.generate_block_code(gvs, gvs, [eax])
# --- epilogue ---
- mc.LEA(esp, mem(ebp, -12))
+ mc.MOV(esp, ebp)
+ mc.POP(ebp)
mc.POP(edi)
mc.POP(esi)
mc.POP(ebx)
- mc.POP(ebp)
mc.RET()
# ----------------
mc.done()
@@ -488,7 +468,8 @@
reg = FlexSwitch.REG
mc = self.generate_block_code(args_gv, [gv_exitswitch], [reg],
renaming=False)
- result = FlexSwitch(self.rgenop, self.inputargs_gv, self.inputoperands)
+ result = FlexSwitch(self.rgenop, self.graphctx,
+ self.inputargs_gv, self.inputoperands)
default_builder = result.initialize(mc)
mc.done()
self.rgenop.close_mc(mc)
@@ -502,49 +483,78 @@
# XXX re-do this somehow...
def genop_get_frame_base(self):
- op = OpGetFrameBase()
- self.operations.append(op)
- return op
+ return gv_frame_base
def get_frame_info(self, vars_gv):
- if self.force_in_stack is None:
- self.force_in_stack = []
result = []
for v in vars_gv:
if not v.is_const:
- place = StorageInStack()
- self.force_in_stack.append((v, place))
+ place = StorageInStack(v)
+ self.operations.append(place)
v = place
result.append(v)
return result
def alloc_frame_place(self, kind, gv_initial_value=None):
- if self.force_in_stack is None:
- self.force_in_stack = []
if gv_initial_value is None:
v = OpWhatever()
else:
v = OpSameAs(gv_initial_value)
self.operations.append(v)
- place = Place()
- place.stackvar = v
- self.force_in_stack.append((v, place))
+ place = StorageInStack(v)
+ self.operations.append(place)
return place
def genop_absorb_place(self, kind, place):
- v = place.stackvar
- place.stackvar = None # break reference to potentially lots of memory
- return v
+ return place.x
class Label(GenLabel):
targetaddr = 0
- targetstackdepth = 0
inputoperands = None
def __init__(self, targetbuilder):
self.targetbuilder = targetbuilder
+
+class GraphCtx:
+ # keep this in sync with the generated function prologue:
+ # how many extra words are initially pushed (including the
+ # return value, pushed by the caller)
+ PROLOGUE_FIXED_WORDS = 5
+
+ def __init__(self, rgenop):
+ self.rgenop = rgenop
+ self.initial_addr = 0 # position where there is the initial ADD ESP
+ self.adj_addrs = [] # list of positions where there is a LEA ESP
+ self.reserved_stack_vars = 0
+
+ def write_stack_adj(self, mc, initial):
+ if initial:
+ addr = write_stack_reserve(mc, self.reserved_stack_vars)
+ self.initial_addr = addr
+ else:
+ addr = write_stack_adj(mc, self.reserved_stack_vars)
+ self.adj_addrs.append(addr)
+
+ def ensure_stack_vars(self, n):
+ if CALL_ALIGN > 1:
+ # align the stack to a multiple of CALL_ALIGN words
+ stack_words = GraphCtx.PROLOGUE_FIXED_WORDS + n
+ stack_words = (stack_words + CALL_ALIGN-1) & ~ (CALL_ALIGN-1)
+ n = stack_words - GraphCtx.PROLOGUE_FIXED_WORDS
+ # patch all the LEA ESP if the requested amount has grown
+ if n > self.reserved_stack_vars:
+ addr = self.initial_addr
+ patchmc = self.rgenop.InMemoryCodeBuilder(addr, addr+99)
+ write_stack_reserve(patchmc, n)
+ patchmc.done()
+ for addr in self.adj_addrs:
+ patchmc = self.rgenop.InMemoryCodeBuilder(addr, addr+99)
+ write_stack_adj(patchmc, n)
+ patchmc.done()
+ self.reserved_stack_vars = n
+
# ____________________________________________________________
@@ -557,39 +567,38 @@
MC_SIZE *= 16
def __init__(self):
- self.mcs = [] # machine code blocks where no-one is currently writing
+ self.allocated_mc = None
self.keepalive_gc_refs = []
- self.total_code_blocks = 0
def open_mc(self):
- if self.mcs:
- # XXX think about inserting NOPS for alignment
- return self.mcs.pop()
- else:
- # XXX supposed infinite for now
- self.total_code_blocks += 1
+ # XXX supposed infinite for now
+ mc = self.allocated_mc
+ if mc is None:
return self.MachineCodeBlock(self.MC_SIZE)
+ else:
+ self.allocated_mc = None
+ return mc
def close_mc(self, mc):
- # an open 'mc' is ready for receiving code... but it's also ready
- # for being garbage collected, so be sure to close it if you
- # want the generated code to stay around :-)
- self.mcs.append(mc)
+ assert self.allocated_mc is None
+ self.allocated_mc = mc
def check_no_open_mc(self):
- assert len(self.mcs) == self.total_code_blocks
+ pass
def newgraph(self, sigtoken, name):
+ graphctx = GraphCtx(self)
# --- prologue ---
mc = self.open_mc()
entrypoint = mc.tell()
if DEBUG_TRAP:
mc.BREAKPOINT()
- mc.PUSH(ebp)
- mc.MOV(ebp, esp)
mc.PUSH(ebx)
mc.PUSH(esi)
mc.PUSH(edi)
+ mc.PUSH(ebp)
+ mc.MOV(ebp, esp)
+ graphctx.write_stack_adj(mc, initial=True)
# ^^^ pushed 5 words including the retval ( == PROLOGUE_FIXED_WORDS)
# ----------------
numargs = sigtoken # for now
@@ -597,8 +606,9 @@
inputoperands = []
for i in range(numargs):
inputargs_gv.append(GenVar())
- inputoperands.append(mem(ebp, WORD * (2+i)))
- builder = Builder(self, inputargs_gv, inputoperands)
+ ofs = WORD * (GraphCtx.PROLOGUE_FIXED_WORDS+i)
+ inputoperands.append(mem(ebp, ofs))
+ builder = Builder(self, graphctx, inputargs_gv, inputoperands)
# XXX this makes the code layout in memory a bit obscure: we have the
# prologue of the new graph somewhere in the middle of its first
# caller, all alone...
Modified: pypy/branch/new-jit-codegen/i386/ri386.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/i386/ri386.py (original)
+++ pypy/branch/new-jit-codegen/i386/ri386.py Wed Feb 7 13:44:55 2007
@@ -2,16 +2,21 @@
class OPERAND(object):
+ _attrs_ = []
def __repr__(self):
return '<%s %s>' % (self.__class__.__name__, self.assembler())
class REG(OPERAND):
width = 4
- lowest8bits = None
def __repr__(self):
return '<%s>' % self.__class__.__name__.lower()
def assembler(self):
return '%' + self.__class__.__name__.lower()
+ def lowest8bits(self):
+ if self.op < 4:
+ return registers8[self.op]
+ else:
+ raise ValueError
class REG8(OPERAND):
width = 1
@@ -47,11 +52,18 @@
def assembler(self):
return '$%d' % (self.value,)
+ def lowest8bits(self):
+ val = self.value & 0xFF
+ if val > 0x7F:
+ val -= 0x100
+ return IMM8(val)
+
class IMM8(IMM32):
width = 1
class IMM16(OPERAND): # only for RET
width = 2
+ value = 0 # annotator hack
def __init__(self, value):
self.value = value
@@ -65,6 +77,9 @@
self.byte = byte
self.extradata = extradata
+ def lowest8bits(self):
+ return MODRM8(self.byte, self.extradata)
+
def assembler(self):
mod = self.byte & 0xC0
rm = self.byte & 0x07
@@ -186,14 +201,13 @@
dh = DH()
bh = BH()
-eax.lowest8bits = al
-ecx.lowest8bits = cl
-edx.lowest8bits = dl
-ebx.lowest8bits = bl
-
registers = [eax, ecx, edx, ebx, esp, ebp, esi, edi]
registers8 = [al, cl, dl, bl, ah, ch, dh, bh]
+for r in registers + registers8:
+ r.bitmask = 1 << r.op
+del r
+
imm32 = IMM32
imm8 = IMM8
imm16 = IMM16
@@ -257,6 +271,10 @@
else:
return cls(0x84, SIB + packimm32(offset))
+def fixedsize_esp_ofs(offset):
+ SIB = '\x24'
+ return MODRM(0x84, SIB + packimm32(offset))
+
def single_byte(value):
return -128 <= value < 128
Modified: pypy/branch/new-jit-codegen/i386/ri386setup.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/i386/ri386setup.py (original)
+++ pypy/branch/new-jit-codegen/i386/ri386setup.py Wed Feb 7 13:44:55 2007
@@ -273,6 +273,10 @@
MOV.mode2(MODRM8,REG8, ['\x88', register(2,8,'b'), modrm(1,'b')])
MOV.mode2(REG8, MODRM8,['\x8A', register(1,8,'b'), modrm(2,'b')])
+# special modes for writing explicit 16-bit immediates (must also use o16!)
+MOV.mode2(REG, IMM16, [register(1), '\xB8', immediate(2,'h')])
+MOV.mode2(MODRM, IMM16, ['\xC7', orbyte(0<<3), modrm(1), immediate(2,'h')])
+
ADD = Instruction()
ADD.common_modes(0)
Modified: pypy/branch/new-jit-codegen/i386/test/test_auto_encoding.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/i386/test/test_auto_encoding.py (original)
+++ pypy/branch/new-jit-codegen/i386/test/test_auto_encoding.py Wed Feb 7 13:44:55 2007
@@ -198,6 +198,8 @@
if ((args[1][1] in (i386.eax, i386.al))
and args[0][1].assembler().lstrip('-').isdigit()):
return [] # MOV [constant-address], accum
+ if args[1][1].__class__ == i386.IMM16:
+ return [] # MOV mod/rm, imm16
if instrname == "LEA":
if (args[1][1].__class__ != i386.MODRM or
args[1][1].is_register()):
Modified: pypy/branch/new-jit-codegen/test/rgenop_tests.py
==============================================================================
--- pypy/branch/jit-virtual-world/pypy/jit/codegen/test/rgenop_tests.py (original)
+++ pypy/branch/new-jit-codegen/test/rgenop_tests.py Wed Feb 7 13:44:55 2007
@@ -13,6 +13,7 @@
FUNC2 = lltype.FuncType([lltype.Signed]*2, lltype.Signed)
FUNC3 = lltype.FuncType([lltype.Signed]*3, lltype.Signed)
FUNC5 = lltype.FuncType([lltype.Signed]*5, lltype.Signed)
+FUNC27= lltype.FuncType([lltype.Signed]*27, lltype.Signed)
def make_adder(rgenop, n):
# 'return x+n'
@@ -1581,6 +1582,142 @@
res = fnptr(2, 10, 10, 400, 0)
assert res == 0
+ def test_from_random_4_direct(self):
+## def dummyfn(counter, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z):
+## while True:
+
+## if b:
+## pass
+
+## g = q and j
+## d = intmask(s - y) # d <esi>
+## # t <0x64(%ebp)>
+
+## e = y != f # e <ebx>
+## j = c or j
+## o = d or t # d <edx> o <esi>
+## t = l > o # t <ecx>
+## if e:
+## pass
+
+## counter -= 1
+## if not counter: break
+
+## return intmask(a*-468864544+b*-340864157+c*-212863774+d*-84863387+e*43136996+f*171137383+g*299137766+h*427138153+i*555138536+j*683138923+k*811139306+l*939139693+m*1067140076+n*1195140463+o*1323140846+p*1451141233+q*1579141616+r*1707142003+s*1835142386+t*1963142773+u*2091143156+v*-2075823753+w*-1947823370+x*-1819822983+y*-1691822600+z*-1563822213)
+
+ rgenop = self.RGenOp()
+ signed_kind = rgenop.kindToken(lltype.Signed)
+ bool_kind = rgenop.kindToken(lltype.Bool)
+
+ builder0, gv_callable, [v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26] = rgenop.newgraph(rgenop.sigToken(FUNC27), 'compiled_dummyfn')
+ builder0.start_writing()
+ args_gv = [v0, v1, v2, v3, v6, v8, v9, v10, v11, v12, v13, v14, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26]
+ label0 = builder0.enter_next_block([signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind], args_gv)
+ [v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49] = args_gv
+ v50 = builder0.genop1('int_is_true', v29)
+ builder1 = builder0.jump_if_true(v50, [v48, v38, v27, v30, v32, v34, v47, v40, v28, v41, v43, v45, v37, v46, v31, v33, v35, v39, v36, v42, v49, v44, v29])
+ args_gv = [v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49]
+ label1 = builder0.enter_next_block([signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind], args_gv)
+ [v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63, v64, v65, v66, v67, v68, v69, v70, v71, v72, v73] = args_gv
+ v74 = builder0.genop1('int_is_true', v64)
+ builder2 = builder0.jump_if_true(v74, [v54, v52, v65, v58, v60, v62, v64, v68, v56, v69, v71, v51, v73, v53, v67, v57, v55, v59, v61, v63, v66, v70, v72])
+ args_gv = [v51, v52, v53, v54, v55, v64, v56, v57, v58, v59, v60, v61, v62, v63, v64, v65, v66, v67, v68, v69, v70, v71, v72, v73]
+ label2 = builder0.enter_next_block([signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind], args_gv)
+ [v75, v76, v77, v78, v79, v80, v81, v82, v83, v84, v85, v86, v87, v88, v89, v90, v91, v92, v93, v94, v95, v96, v97, v98] = args_gv
+ v99 = builder0.genop2('int_sub', v91, v97)
+ v100 = builder0.genop2('int_ne', v97, v79)
+ v101 = builder0.genop1('int_is_true', v78)
+ builder3 = builder0.jump_if_true(v101, [v85, v93, v94, v87, v91, v97, v89, v98, v80, v82, v78, v86, v84, v99, v88, v100, v90, v92, v96, v75, v95, v76, v77, v79, v81])
+ args_gv = [v75, v76, v77, v78, v99, v100, v79, v80, v81, v82, v83, v84, v85, v86, v87, v88, v89, v90, v91, v92, v93, v94, v95, v96, v97, v98]
+ label3 = builder0.enter_next_block([signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, bool_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind], args_gv)
+ [v102, v103, v104, v105, v106, v107, v108, v109, v110, v111, v112, v113, v114, v115, v116, v117, v118, v119, v120, v121, v122, v123, v124, v125, v126, v127] = args_gv
+ v128 = builder0.genop1('int_is_true', v106)
+ builder4 = builder0.jump_if_false(v128, [v114, v111, v116, v113, v118, v122, v110, v124, v103, v125, v105, v127, v107, v112, v121, v109, v115, v117, v119, v123, v102, v120, v104, v126, v106, v108])
+ args_gv = [v102, v103, v104, v105, v106, v107, v108, v109, v110, v111, v112, v113, v114, v115, v116, v106, v117, v118, v119, v120, v122, v123, v124, v125, v126, v127]
+ label4 = builder0.enter_next_block([signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, bool_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind], args_gv)
+ [v129, v130, v131, v132, v133, v134, v135, v136, v137, v138, v139, v140, v141, v142, v143, v144, v145, v146, v147, v148, v149, v150, v151, v152, v153, v154] = args_gv
+ v155 = builder0.genop2('int_gt', v141, v144)
+ builder5 = builder0.jump_if_false(v134, [v149, v148, v141, v143, v145, v147, v151, v139, v152, v132, v154, v134, v136, v130, v140, v138, v142, v155, v144, v146, v150, v129, v137, v131, v153, v133, v135])
+ args_gv = [v130, v131, v132, v133, v134, v135, v136, v137, v138, v139, v140, v141, v142, v143, v144, v145, v146, v147, v148, v155, v149, v150, v151, v152, v153, v154, v129]
+ label5 = builder0.enter_next_block([signed_kind, signed_kind, signed_kind, signed_kind, bool_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, bool_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind, signed_kind], args_gv)
+ [v156, v157, v158, v159, v160, v161, v162, v163, v164, v165, v166, v167, v168, v169, v170, v171, v172, v173, v174, v175, v176, v177, v178, v179, v180, v181, v182] = args_gv
+ v183 = builder0.genop2('int_sub', v182, rgenop.genconst(1))
+ v184 = builder0.genop1('int_is_true', v183)
+ builder6 = builder0.jump_if_true(v184, [v177, v166, v169, v171, v183, v173, v156, v165, v179, v158, v180, v168, v164, v178, v176, v172, v174, v167, v157, v175, v181, v161, v163])
+ v185 = builder0.genop2('int_mul', v156, rgenop.genconst(-468864544))
+ v186 = builder0.genop2('int_mul', v157, rgenop.genconst(-340864157))
+ v187 = builder0.genop2('int_add', v185, v186)
+ v188 = builder0.genop2('int_mul', v158, rgenop.genconst(-212863774))
+ v189 = builder0.genop2('int_add', v187, v188)
+ v190 = builder0.genop2('int_mul', v159, rgenop.genconst(-84863387))
+ v191 = builder0.genop2('int_add', v189, v190)
+ v192 = builder0.genop1('cast_bool_to_int', v160)
+ v193 = builder0.genop2('int_mul', v192, rgenop.genconst(43136996))
+ v194 = builder0.genop2('int_add', v191, v193)
+ v195 = builder0.genop2('int_mul', v161, rgenop.genconst(171137383))
+ v196 = builder0.genop2('int_add', v194, v195)
+ v197 = builder0.genop2('int_mul', v162, rgenop.genconst(299137766))
+ v198 = builder0.genop2('int_add', v196, v197)
+ v199 = builder0.genop2('int_mul', v163, rgenop.genconst(427138153))
+ v200 = builder0.genop2('int_add', v198, v199)
+ v201 = builder0.genop2('int_mul', v164, rgenop.genconst(555138536))
+ v202 = builder0.genop2('int_add', v200, v201)
+ v203 = builder0.genop2('int_mul', v165, rgenop.genconst(683138923))
+ v204 = builder0.genop2('int_add', v202, v203)
+ v205 = builder0.genop2('int_mul', v166, rgenop.genconst(811139306))
+ v206 = builder0.genop2('int_add', v204, v205)
+ v207 = builder0.genop2('int_mul', v167, rgenop.genconst(939139693))
+ v208 = builder0.genop2('int_add', v206, v207)
+ v209 = builder0.genop2('int_mul', v168, rgenop.genconst(1067140076))
+ v210 = builder0.genop2('int_add', v208, v209)
+ v211 = builder0.genop2('int_mul', v169, rgenop.genconst(1195140463))
+ v212 = builder0.genop2('int_add', v210, v211)
+ v213 = builder0.genop2('int_mul', v170, rgenop.genconst(1323140846))
+ v214 = builder0.genop2('int_add', v212, v213)
+ v215 = builder0.genop2('int_mul', v171, rgenop.genconst(1451141233))
+ v216 = builder0.genop2('int_add', v214, v215)
+ v217 = builder0.genop2('int_mul', v172, rgenop.genconst(1579141616))
+ v218 = builder0.genop2('int_add', v216, v217)
+ v219 = builder0.genop2('int_mul', v173, rgenop.genconst(1707142003))
+ v220 = builder0.genop2('int_add', v218, v219)
+ v221 = builder0.genop2('int_mul', v174, rgenop.genconst(1835142386))
+ v222 = builder0.genop2('int_add', v220, v221)
+ v223 = builder0.genop1('cast_bool_to_int', v175)
+ v224 = builder0.genop2('int_mul', v223, rgenop.genconst(1963142773))
+ v225 = builder0.genop2('int_add', v222, v224)
+ v226 = builder0.genop2('int_mul', v176, rgenop.genconst(2091143156))
+ v227 = builder0.genop2('int_add', v225, v226)
+ v228 = builder0.genop2('int_mul', v177, rgenop.genconst(-2075823753))
+ v229 = builder0.genop2('int_add', v227, v228)
+ v230 = builder0.genop2('int_mul', v178, rgenop.genconst(-1947823370))
+ v231 = builder0.genop2('int_add', v229, v230)
+ v232 = builder0.genop2('int_mul', v179, rgenop.genconst(-1819822983))
+ v233 = builder0.genop2('int_add', v231, v232)
+ v234 = builder0.genop2('int_mul', v180, rgenop.genconst(-1691822600))
+ v235 = builder0.genop2('int_add', v233, v234)
+ v236 = builder0.genop2('int_mul', v181, rgenop.genconst(-1563822213))
+ v237 = builder0.genop2('int_add', v235, v236)
+ builder0.finish_and_return(rgenop.sigToken(FUNC27), v237)
+ builder2.start_writing()
+ builder2.finish_and_goto([v51, v52, v53, v54, v55, v58, v56, v57, v58, v59, v60, v61, v62, v63, v64, v65, v66, v67, v68, v69, v70, v71, v72, v73], label2)
+ builder4.start_writing()
+ builder4.finish_and_goto([v102, v103, v104, v105, v106, v107, v108, v109, v110, v111, v112, v113, v114, v115, v116, v121, v117, v118, v119, v120, v122, v123, v124, v125, v126, v127], label4)
+ builder3.start_writing()
+ builder3.finish_and_goto([v75, v76, v77, v78, v99, v100, v79, v80, v81, v82, v78, v84, v85, v86, v87, v88, v89, v90, v91, v92, v93, v94, v95, v96, v97, v98], label3)
+ builder1.start_writing()
+ builder1.finish_and_goto([v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49], label1)
+ builder5.start_writing()
+ builder5.finish_and_goto([v130, v131, v132, v133, v134, v135, v136, v137, v138, v139, v140, v141, v142, v143, v144, v145, v146, v147, v148, v155, v149, v150, v151, v152, v153, v154, v129], label5)
+ builder6.start_writing()
+ v238 = builder6.genop1('cast_bool_to_int', v175)
+ builder6.finish_and_goto([v183, v156, v157, v158, v161, v163, v164, v165, v166, v167, v168, v169, v171, v172, v173, v174, v238, v176, v177, v178, v179, v180, v181], label0)
+ builder6.end()
+
+ fnptr = self.cast(gv_callable, 27)
+
+ res = fnptr(*([5]*27))
+ assert res == 967746338
+
def test_genzeroconst(self):
RGenOp = self.RGenOp
gv = RGenOp.genzeroconst(RGenOp.kindToken(lltype.Signed))
More information about the Pypy-commit
mailing list