[pypy-commit] pypy default: - add in the backend, for binary instructions, a memo function
arigo
noreply at buildbot.pypy.org
Thu Nov 3 10:24:39 CET 2011
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r48677:eb27c44ca6ad
Date: 2011-11-03 08:21 +0100
http://bitbucket.org/pypy/pypy/changeset/eb27c44ca6ad/
Log: - add in the backend, for binary instructions, a memo function
that returns True if there is any NAME_xy that could match. If it
returns False we know the whole subcase can be omitted from
translated code. Without this hack, the size of most _binaryop
INSN functions ends up quite large in C code.
- found out that a lot of instructions have a missing case on 64
bits, because INSN_m used to fall back to INSN_a if the constant
offset doesn't fit in 32 bits --- but most instructions that have
an 'm' form don't have an 'a' form. Fixed by generating an extra
LEA and not falling back to the 'a' form.
- location_code() is an indirect method call for no really good
reason. Turn it into a monomorphic method that always read a
_location_code attribute.
diff --git a/pypy/jit/backend/x86/regloc.py b/pypy/jit/backend/x86/regloc.py
--- a/pypy/jit/backend/x86/regloc.py
+++ b/pypy/jit/backend/x86/regloc.py
@@ -17,7 +17,7 @@
class AssemblerLocation(object):
# XXX: Is adding "width" here correct?
- __slots__ = ('value', 'width')
+ _attrs_ = ('value', 'width', '_location_code')
_immutable_ = True
def _getregkey(self):
return self.value
@@ -25,6 +25,9 @@
def is_memory_reference(self):
return self.location_code() in ('b', 's', 'j', 'a', 'm')
+ def location_code(self):
+ return self._location_code
+
def value_r(self): return self.value
def value_b(self): return self.value
def value_s(self): return self.value
@@ -38,6 +41,8 @@
class StackLoc(AssemblerLocation):
_immutable_ = True
+ _location_code = 'b'
+
def __init__(self, position, ebp_offset, num_words, type):
assert ebp_offset < 0 # so no confusion with RegLoc.value
self.position = position
@@ -49,9 +54,6 @@
def __repr__(self):
return '%d(%%ebp)' % (self.value,)
- def location_code(self):
- return 'b'
-
def assembler(self):
return repr(self)
@@ -63,8 +65,10 @@
self.is_xmm = is_xmm
if self.is_xmm:
self.width = 8
+ self._location_code = 'x'
else:
self.width = WORD
+ self._location_code = 'r'
def __repr__(self):
if self.is_xmm:
return rx86.R.xmmnames[self.value]
@@ -79,12 +83,6 @@
assert not self.is_xmm
return RegLoc(rx86.high_byte(self.value), False)
- def location_code(self):
- if self.is_xmm:
- return 'x'
- else:
- return 'r'
-
def assembler(self):
return '%' + repr(self)
@@ -97,14 +95,13 @@
class ImmedLoc(AssemblerLocation):
_immutable_ = True
width = WORD
+ _location_code = 'i'
+
def __init__(self, value):
from pypy.rpython.lltypesystem import rffi, lltype
# force as a real int
self.value = rffi.cast(lltype.Signed, value)
- def location_code(self):
- return 'i'
-
def getint(self):
return self.value
@@ -149,9 +146,6 @@
info = getattr(self, attr, '?')
return '<AddressLoc %r: %s>' % (self._location_code, info)
- def location_code(self):
- return self._location_code
-
def value_a(self):
return self.loc_a
@@ -191,6 +185,7 @@
# we want a width of 8 (... I think. Check this!)
_immutable_ = True
width = 8
+ _location_code = 'j'
def __init__(self, address):
self.value = address
@@ -198,9 +193,6 @@
def __repr__(self):
return '<ConstFloatLoc @%s>' % (self.value,)
- def location_code(self):
- return 'j'
-
if IS_X86_32:
class FloatImmedLoc(AssemblerLocation):
# This stands for an immediate float. It cannot be directly used in
@@ -209,6 +201,7 @@
# instead; see below.
_immutable_ = True
width = 8
+ _location_code = '#' # don't use me
def __init__(self, floatstorage):
self.aslonglong = floatstorage
@@ -229,9 +222,6 @@
floatvalue = longlong.getrealfloat(self.aslonglong)
return '<FloatImmedLoc(%s)>' % (floatvalue,)
- def location_code(self):
- raise NotImplementedError
-
if IS_X86_64:
def FloatImmedLoc(floatstorage):
from pypy.rlib.longlong2float import float2longlong
@@ -270,6 +260,11 @@
else:
raise AssertionError(methname + " undefined")
+def _missing_binary_insn(name, code1, code2):
+ raise AssertionError(name + "_" + code1 + code2 + " missing")
+_missing_binary_insn._dont_inline_ = True
+
+
class LocationCodeBuilder(object):
_mixin_ = True
@@ -310,6 +305,23 @@
_rx86_getattr(self, methname)(val1, val2)
invoke._annspecialcase_ = 'specialize:arg(1)'
+ def has_implementation_for(loc1, loc2):
+ # A memo function that returns True if there is any NAME_xy that could match.
+ # If it returns False we know the whole subcase can be omitted from translated
+ # code. Without this hack, the size of most _binaryop INSN functions ends up
+ # quite large in C code.
+ if loc1 == '?':
+ return any([has_implementation_for(loc1, loc2)
+ for loc1 in unrolling_location_codes])
+ methname = name + "_" + loc1 + loc2
+ if not hasattr(rx86.AbstractX86CodeBuilder, methname):
+ return False
+ # any NAME_j should have a NAME_m as a fallback, too. Check it
+ if loc1 == 'j': assert has_implementation_for('m', loc2), methname
+ if loc2 == 'j': assert has_implementation_for(loc1, 'm'), methname
+ return True
+ has_implementation_for._annspecialcase_ = 'specialize:memo'
+
def INSN(self, loc1, loc2):
code1 = loc1.location_code()
code2 = loc2.location_code()
@@ -325,6 +337,8 @@
assert code2 not in ('j', 'i')
for possible_code2 in unrolling_location_codes:
+ if not has_implementation_for('?', possible_code2):
+ continue
if code2 == possible_code2:
val2 = getattr(loc2, "value_" + possible_code2)()
#
@@ -335,28 +349,32 @@
#
# Regular case
for possible_code1 in unrolling_location_codes:
+ if not has_implementation_for(possible_code1,
+ possible_code2):
+ continue
if code1 == possible_code1:
val1 = getattr(loc1, "value_" + possible_code1)()
# More faking out of certain operations for x86_64
- if possible_code1 == 'j' and not rx86.fits_in_32bits(val1):
+ fits32 = rx86.fits_in_32bits
+ if possible_code1 == 'j' and not fits32(val1):
val1 = self._addr_as_reg_offset(val1)
invoke(self, "m" + possible_code2, val1, val2)
- elif possible_code2 == 'j' and not rx86.fits_in_32bits(val2):
+ return
+ if possible_code2 == 'j' and not fits32(val2):
val2 = self._addr_as_reg_offset(val2)
invoke(self, possible_code1 + "m", val1, val2)
- elif possible_code1 == 'm' and not rx86.fits_in_32bits(val1[1]):
+ return
+ if possible_code1 == 'm' and not fits32(val1[1]):
val1 = self._fix_static_offset_64_m(val1)
- invoke(self, "a" + possible_code2, val1, val2)
- elif possible_code2 == 'm' and not rx86.fits_in_32bits(val2[1]):
+ if possible_code2 == 'm' and not fits32(val2[1]):
val2 = self._fix_static_offset_64_m(val2)
- invoke(self, possible_code1 + "a", val1, val2)
- else:
- if possible_code1 == 'a' and not rx86.fits_in_32bits(val1[3]):
- val1 = self._fix_static_offset_64_a(val1)
- if possible_code2 == 'a' and not rx86.fits_in_32bits(val2[3]):
- val2 = self._fix_static_offset_64_a(val2)
- invoke(self, possible_code1 + possible_code2, val1, val2)
+ if possible_code1 == 'a' and not fits32(val1[3]):
+ val1 = self._fix_static_offset_64_a(val1)
+ if possible_code2 == 'a' and not fits32(val2[3]):
+ val2 = self._fix_static_offset_64_a(val2)
+ invoke(self, possible_code1 + possible_code2, val1, val2)
return
+ _missing_binary_insn(name, code1, code2)
return func_with_new_name(INSN, "INSN_" + name)
@@ -431,12 +449,14 @@
def _fix_static_offset_64_m(self, (basereg, static_offset)):
# For cases where an AddressLoc has the location_code 'm', but
# where the static offset does not fit in 32-bits. We have to fall
- # back to the X86_64_SCRATCH_REG. Note that this returns a location
- # encoded as mode 'a'. These are all possibly rare cases; don't try
+ # back to the X86_64_SCRATCH_REG. Returns a new location encoded
+ # as mode 'm' too. These are all possibly rare cases; don't try
# to reuse a past value of the scratch register at all.
self._scratch_register_known = False
self.MOV_ri(X86_64_SCRATCH_REG.value, static_offset)
- return (basereg, X86_64_SCRATCH_REG.value, 0, 0)
+ self.LEA_ra(X86_64_SCRATCH_REG.value,
+ (basereg, X86_64_SCRATCH_REG.value, 0, 0))
+ return (X86_64_SCRATCH_REG.value, 0)
def _fix_static_offset_64_a(self, (basereg, scalereg,
scale, static_offset)):
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -745,6 +745,7 @@
assert insnname_template.count('*') == 1
add_insn('x', register(2), '\xC0')
add_insn('j', abs_, immediate(2))
+ add_insn('m', mem_reg_plus_const(2))
define_pxmm_insn('PADDQ_x*', '\xD4')
define_pxmm_insn('PSUBQ_x*', '\xFB')
diff --git a/pypy/jit/backend/x86/test/test_regloc.py b/pypy/jit/backend/x86/test/test_regloc.py
--- a/pypy/jit/backend/x86/test/test_regloc.py
+++ b/pypy/jit/backend/x86/test/test_regloc.py
@@ -146,8 +146,10 @@
expected_instructions = (
# mov r11, 0xFEDCBA9876543210
'\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE'
- # mov rcx, [rdx+r11]
- '\x4A\x8B\x0C\x1A'
+ # lea r11, [rdx+r11]
+ '\x4E\x8D\x1C\x1A'
+ # mov rcx, [r11]
+ '\x49\x8B\x0B'
)
assert cb.getvalue() == expected_instructions
@@ -217,8 +219,10 @@
expected_instructions = (
# mov r11, 0xFEDCBA9876543210
'\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE'
- # mov [rdx+r11], -0x01234567
- '\x4A\xC7\x04\x1A\x99\xBA\xDC\xFE'
+ # lea r11, [rdx+r11]
+ '\x4E\x8D\x1C\x1A'
+ # mov [r11], -0x01234567
+ '\x49\xC7\x03\x99\xBA\xDC\xFE'
)
assert cb.getvalue() == expected_instructions
@@ -300,8 +304,10 @@
'\x48\xBA\xEF\xCD\xAB\x89\x67\x45\x23\x01'
# mov r11, 0xFEDCBA9876543210
'\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE'
- # mov [rax+r11], rdx
- '\x4A\x89\x14\x18'
+ # lea r11, [rax+r11]
+ '\x4E\x8D\x1C\x18'
+ # mov [r11], rdx
+ '\x49\x89\x13'
# pop rdx
'\x5A'
)
More information about the pypy-commit
mailing list