[pypy-commit] pypy remove-list-smm-2: hg merge default
Manuel Jacob
noreply at buildbot.pypy.org
Tue May 21 15:10:14 CEST 2013
Author: Manuel Jacob
Branch: remove-list-smm-2
Changeset: r64384:b9d6357b218a
Date: 2013-05-21 14:53 +0200
http://bitbucket.org/pypy/pypy/changeset/b9d6357b218a/
Log: hg merge default
diff --git a/lib-python/2.7/distutils/sysconfig.py b/lib-python/2.7/distutils/sysconfig.py
--- a/lib-python/2.7/distutils/sysconfig.py
+++ b/lib-python/2.7/distutils/sysconfig.py
@@ -1,30 +1,16 @@
-"""Provide access to Python's configuration information. The specific
-configuration variables available depend heavily on the platform and
-configuration. The values may be retrieved using
-get_config_var(name), and the list of variables is available via
-get_config_vars().keys(). Additional convenience functions are also
-available.
-
-Written by: Fred L. Drake, Jr.
-Email: <fdrake at acm.org>
-"""
-
-__revision__ = "$Id: sysconfig.py 85358 2010-10-10 09:54:59Z antoine.pitrou $"
-
-import sys
-
# The content of this file is redirected from
# sysconfig_cpython or sysconfig_pypy.
+# All underscore names are imported too, because
+# people like to use undocumented sysconfig._xxx
+# directly.
+import sys
if '__pypy__' in sys.builtin_module_names:
- from distutils.sysconfig_pypy import *
- from distutils.sysconfig_pypy import _config_vars # needed by setuptools
- from distutils.sysconfig_pypy import _variable_rx # read_setup_file()
+ from distutils import sysconfig_pypy as _sysconfig_module
else:
- from distutils.sysconfig_cpython import *
- from distutils.sysconfig_cpython import _config_vars # needed by setuptools
- from distutils.sysconfig_cpython import _variable_rx # read_setup_file()
+ from distutils import sysconfig_cpython as _sysconfig_module
+globals().update(_sysconfig_module.__dict__)
_USE_CLANG = None
diff --git a/lib-python/2.7/distutils/sysconfig_cpython.py b/lib-python/2.7/distutils/sysconfig_cpython.py
--- a/lib-python/2.7/distutils/sysconfig_cpython.py
+++ b/lib-python/2.7/distutils/sysconfig_cpython.py
@@ -9,7 +9,7 @@
Email: <fdrake at acm.org>
"""
-__revision__ = "$Id$"
+__revision__ = "$Id: sysconfig.py 85358 2010-10-10 09:54:59Z antoine.pitrou $"
import os
import re
diff --git a/lib-python/2.7/distutils/sysconfig_pypy.py b/lib-python/2.7/distutils/sysconfig_pypy.py
--- a/lib-python/2.7/distutils/sysconfig_pypy.py
+++ b/lib-python/2.7/distutils/sysconfig_pypy.py
@@ -1,6 +1,15 @@
-"""PyPy's minimal configuration information.
+"""Provide access to Python's configuration information.
+This is actually PyPy's minimal configuration information.
+
+The specific configuration variables available depend heavily on the
+platform and configuration. The values may be retrieved using
+get_config_var(name), and the list of variables is available via
+get_config_vars().keys(). Additional convenience functions are also
+available.
"""
+__revision__ = "$Id: sysconfig.py 85358 2010-10-10 09:54:59Z antoine.pitrou $"
+
import sys
import os
import imp
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -363,6 +363,9 @@
# ignore names from 'essential_modules', notably 'exceptions', which
# may not be present in config.objspace.usemodules at all
modules = [name for name in modules if name not in essential_modules]
+
+ if config.translation.platform == 'arm' and '_continuation' in modules:
+ modules.remove('_continuation')
config.objspace.usemodules.suggest(**dict.fromkeys(modules, True))
def enable_translationmodules(config):
diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py
--- a/pypy/doc/conf.py
+++ b/pypy/doc/conf.py
@@ -47,7 +47,7 @@
# The short X.Y version.
version = '2.0'
# The full version, including alpha/beta/rc tags.
-release = '2.0.1'
+release = '2.0.2'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst
--- a/pypy/doc/index.rst
+++ b/pypy/doc/index.rst
@@ -40,7 +40,7 @@
* `FAQ`_: some frequently asked questions.
-* `Release 2.0.1`_: the latest official release
+* `Release 2.0.2`_: the latest official release
* `PyPy Blog`_: news and status info about PyPy
@@ -110,7 +110,7 @@
.. _`Getting Started`: getting-started.html
.. _`Papers`: extradoc.html
.. _`Videos`: video-index.html
-.. _`Release 2.0.1`: http://pypy.org/download.html
+.. _`Release 2.0.2`: http://pypy.org/download.html
.. _`speed.pypy.org`: http://speed.pypy.org
.. _`RPython toolchain`: translation.html
.. _`potential project ideas`: project-ideas.html
diff --git a/pypy/doc/release-2.0.2.rst b/pypy/doc/release-2.0.2.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/release-2.0.2.rst
@@ -0,0 +1,46 @@
+=========================
+PyPy 2.0.2 - Fermi Panini
+=========================
+
+We're pleased to announce PyPy 2.0.2. This is a stable bugfix release
+over `2.0`_ and `2.0.1`_. You can download it here:
+
+ http://pypy.org/download.html
+
+It fixes a crash in the JIT when calling external C functions (with
+ctypes/cffi) in a multithreaded context.
+
+.. _2.0: release-2.0.0.html
+.. _2.0.1: release-2.0.1.html
+
+What is PyPy?
+=============
+
+PyPy is a very compliant Python interpreter, almost a drop-in replacement for
+CPython 2.7. It's fast (`pypy 2.0 and cpython 2.7.3`_ performance comparison)
+due to its integrated tracing JIT compiler.
+
+This release supports x86 machines running Linux 32/64, Mac OS X 64 or
+Windows 32. Support for ARM is progressing but not bug-free yet.
+
+.. _`pypy 2.0 and cpython 2.7.3`: http://speed.pypy.org
+
+Highlights
+==========
+
+This release contains only the fix described above. A crash (or wrong
+results) used to occur if all these conditions were true:
+
+- your program is multithreaded;
+
+- it runs on a single-core machine or a heavily-loaded multi-core one;
+
+- it uses ctypes or cffi to issue external calls to C functions.
+
+This was fixed in the branch `emit-call-x86`__ (see the example file
+``bug1.py``).
+
+.. __: https://bitbucket.org/pypy/pypy/commits/7c80121abbf4
+
+Cheers,
+arigo et. al. for the PyPy team
diff --git a/pypy/module/pypyjit/test_pypy_c/bug1.py b/pypy/module/pypyjit/test_pypy_c/bug1.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/bug1.py
@@ -0,0 +1,57 @@
+import cffi, thread, time, sys
+
+
+ffi = cffi.FFI()
+
+ffi.cdef("""
+ long foobar(long a, long b, long c, long d, long e, long f,
+ long a2, long b2, long c2, long d2, long e2, long f2,
+ long a3, long b3, long c3, long d3, long e3, long f3,
+ long a4, long b4, long c4, long d4, long e4, long f4);
+""")
+
+lib = ffi.verify("""
+ long foobar(long a, long b, long c, long d, long e, long f,
+ long a2, long b2, long c2, long d2, long e2, long f2,
+ long a3, long b3, long c3, long d3, long e3, long f3,
+ long a4, long b4, long c4, long d4, long e4, long f4)
+ {
+ return a * 1 + b * 2 + c * 3 + d * 4 + e * 5 + f * 6 +
+ (a2 * 1 + b2 * 2 + c2 * 3 + d2 * 4 + e2 * 5 + f2 * 6) * (-3) +
+ (a3 * 1 + b3 * 2 + c3 * 3 + d3 * 4 + e3 * 5 + f3 * 6) * (-5) +
+ (a4 * 1 + b4 * 2 + c4 * 3 + d4 * 4 + e4 * 5 + f4 * 6) * (-7);
+ }
+""")
+
+
+def runme():
+ for j in range(10):
+ for i in range(10000):
+ args = [i-k for k in range(24)]
+ x = lib.foobar(*args)
+ (a,b,c,d,e,f,a2,b2,c2,d2,e2,f2,
+ a3,b3,c3,d3,e3,f3,a4,b4,c4,d4,e4,f4) = args
+ assert x == (
+ a * 1 + b * 2 + c * 3 + d * 4 + e * 5 + f * 6 +
+ (a2 * 1 + b2 * 2 + c2 * 3 + d2 * 4 + e2 * 5 + f2 * 6) * (-3) +
+ (a3 * 1 + b3 * 2 + c3 * 3 + d3 * 4 + e3 * 5 + f3 * 6) * (-5) +
+ (a4 * 1 + b4 * 2 + c4 * 3 + d4 * 4 + e4 * 5 + f4 * 6) * (-7))
+
+done = []
+
+def submain():
+ try:
+ runme()
+ err = None
+ except:
+ err = sys.exc_info()
+ done.append(err)
+
+for i in range(2):
+ thread.start_new_thread(submain, ())
+while len(done) < 2:
+ time.sleep(0.1)
+
+for err in done:
+ if err is not None:
+ raise err[0], err[1], err[2]
diff --git a/pypy/module/pypyjit/test_pypy_c/test_array.py b/pypy/module/pypyjit/test_pypy_c/test_array.py
--- a/pypy/module/pypyjit/test_pypy_c/test_array.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_array.py
@@ -105,6 +105,7 @@
assert loop.match("""
i10 = int_lt(i6, 1000)
guard_true(i10, descr=...)
+ guard_not_invalidated(descr=...)
i11 = int_lt(i6, i7)
guard_true(i11, descr=...)
f13 = getarrayitem_raw(i8, i6, descr=<ArrayF 8>)
@@ -141,6 +142,7 @@
assert loop.match("""
i10 = int_lt(i6, 1000)
guard_true(i10, descr=...)
+ guard_not_invalidated(descr=...)
i11 = int_lt(i6, i7)
guard_true(i11, descr=...)
i13 = getarrayitem_raw(i8, i6, descr=<Array. 4>)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_bug.py b/pypy/module/pypyjit/test_pypy_c/test_bug.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/pypyjit/test_pypy_c/test_bug.py
@@ -0,0 +1,14 @@
+import os, sys, py, subprocess
+
+localdir = os.path.dirname(os.path.abspath(__file__))
+
+
+def test_bug1():
+ if not sys.platform.startswith('linux'):
+ py.test.skip("linux-only test")
+
+ cmdline = ['taskset', '-c', '0',
+ sys.executable, os.path.join(localdir, 'bug1.py')]
+ popen = subprocess.Popen(cmdline)
+ err = popen.wait()
+ assert err == 0
diff --git a/pypy/module/pypyjit/test_pypy_c/test_call.py b/pypy/module/pypyjit/test_pypy_c/test_call.py
--- a/pypy/module/pypyjit/test_pypy_c/test_call.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_call.py
@@ -487,6 +487,7 @@
assert loop.match("""
i2 = int_lt(i0, i1)
guard_true(i2, descr=...)
+ guard_not_invalidated(descr=...)
i3 = force_token()
i4 = int_add(i0, 1)
--TICK--
@@ -586,7 +587,6 @@
""", [1000])
loop, = log.loops_by_id('call')
assert loop.match_by_id('call', '''
- guard_not_invalidated(descr=<.*>)
i1 = force_token()
''')
diff --git a/pypy/module/pypyjit/test_pypy_c/test_containers.py b/pypy/module/pypyjit/test_pypy_c/test_containers.py
--- a/pypy/module/pypyjit/test_pypy_c/test_containers.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_containers.py
@@ -44,6 +44,7 @@
# gc_id call is hoisted out of the loop, the id of a value obviously
# can't change ;)
assert loop.match_by_id("getitem", """
+ ...
i26 = call(ConstClass(ll_dict_lookup), p18, p6, i25, descr=...)
...
p33 = getinteriorfield_gc(p31, i26, descr=<InteriorFieldDescr <FieldP dictentry.value .*>>)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_misc.py b/pypy/module/pypyjit/test_pypy_c/test_misc.py
--- a/pypy/module/pypyjit/test_pypy_c/test_misc.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_misc.py
@@ -36,7 +36,7 @@
assert loop0.match(expected)
# XXX: The retracing fails to form a loop since j
# becomes constant 0 after the bridge and constant 1 at the end of the
- # loop. A bridge back to the peramble is produced instead.
+ # loop. A bridge back to the peramble is produced instead.
#assert loop1.match(expected)
def test_factorial(self):
@@ -242,6 +242,7 @@
i19 = int_add(i12, 1)
setfield_gc(p9, i19, descr=<FieldS .*W_AbstractSeqIterObject.inst_index .*>)
guard_nonnull_class(p17, ..., descr=...)
+ guard_not_invalidated(descr=...)
i21 = getfield_gc(p17, descr=<FieldS .*W_Array.*.inst_len .*>)
i23 = int_lt(0, i21)
guard_true(i23, descr=...)
diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -355,54 +355,63 @@
def _push_all_regs_to_jitframe(self, mc, ignored_regs, withfloats,
callee_only=False):
+ # Push general purpose registers
base_ofs = self.cpu.get_baseofs_of_frame_field()
if callee_only:
regs = CoreRegisterManager.save_around_call_regs
else:
regs = CoreRegisterManager.all_regs
- # XXX use STMDB ops here
- for i, gpr in enumerate(regs):
- if gpr in ignored_regs:
- continue
- self.store_reg(mc, gpr, r.fp, base_ofs + i * WORD)
+ # XXX add special case if ignored_regs are a block at the start of regs
+ if not ignored_regs: # we want to push a contiguous block of regs
+ assert check_imm_arg(base_ofs)
+ mc.ADD_ri(r.ip.value, r.fp.value, base_ofs)
+ mc.STM(r.ip.value, [reg.value for reg in regs])
+ else:
+ for reg in ignored_regs:
+ assert not reg.is_vfp_reg() # sanity check
+ # we can have holes in the list of regs
+ for i, gpr in enumerate(regs):
+ if gpr in ignored_regs:
+ continue
+ self.store_reg(mc, gpr, r.fp, base_ofs + i * WORD)
+
if withfloats:
- if callee_only:
- regs = VFPRegisterManager.save_around_call_regs
- else:
- regs = VFPRegisterManager.all_regs
- for i, vfpr in enumerate(regs):
- if vfpr in ignored_regs:
- continue
- ofs = len(CoreRegisterManager.all_regs) * WORD
- ofs += i * DOUBLE_WORD + base_ofs
- self.store_reg(mc, vfpr, r.fp, ofs)
+ # Push VFP regs
+ regs = VFPRegisterManager.all_regs
+ ofs = len(CoreRegisterManager.all_regs) * WORD
+ assert check_imm_arg(ofs+base_ofs)
+ mc.ADD_ri(r.ip.value, r.fp.value, imm=ofs+base_ofs)
+ mc.VSTM(r.ip.value, [vfpr.value for vfpr in regs])
def _pop_all_regs_from_jitframe(self, mc, ignored_regs, withfloats,
callee_only=False):
- # Pop all general purpose registers
+ # Pop general purpose registers
base_ofs = self.cpu.get_baseofs_of_frame_field()
if callee_only:
regs = CoreRegisterManager.save_around_call_regs
else:
regs = CoreRegisterManager.all_regs
- # XXX use LDMDB ops here
- for i, gpr in enumerate(regs):
- if gpr in ignored_regs:
- continue
- ofs = i * WORD + base_ofs
- self.load_reg(mc, gpr, r.fp, ofs)
+ # XXX add special case if ignored_regs are a block at the start of regs
+ if not ignored_regs: # we want to pop a contiguous block of regs
+ assert check_imm_arg(base_ofs)
+ mc.ADD_ri(r.ip.value, r.fp.value, base_ofs)
+ mc.LDM(r.ip.value, [reg.value for reg in regs])
+ else:
+ for reg in ignored_regs:
+ assert not reg.is_vfp_reg() # sanity check
+ # we can have holes in the list of regs
+ for i, gpr in enumerate(regs):
+ if gpr in ignored_regs:
+ continue
+ ofs = i * WORD + base_ofs
+ self.load_reg(mc, gpr, r.fp, ofs)
if withfloats:
- # Pop all XMM regs
- if callee_only:
- regs = VFPRegisterManager.save_around_call_regs
- else:
- regs = VFPRegisterManager.all_regs
- for i, vfpr in enumerate(regs):
- if vfpr in ignored_regs:
- continue
- ofs = len(CoreRegisterManager.all_regs) * WORD
- ofs += i * DOUBLE_WORD + base_ofs
- self.load_reg(mc, vfpr, r.fp, ofs)
+ # Pop VFP regs
+ regs = VFPRegisterManager.all_regs
+ ofs = len(CoreRegisterManager.all_regs) * WORD
+ assert check_imm_arg(ofs+base_ofs)
+ mc.ADD_ri(r.ip.value, r.fp.value, imm=ofs+base_ofs)
+ mc.VLDM(r.ip.value, [vfpr.value for vfpr in regs])
def _build_failure_recovery(self, exc, withfloats=False):
mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
diff --git a/rpython/jit/backend/arm/test/test_push_pop_frame.py b/rpython/jit/backend/arm/test/test_push_pop_frame.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/arm/test/test_push_pop_frame.py
@@ -0,0 +1,177 @@
+import py
+from rpython.jit.backend.arm import conditions as c
+from rpython.jit.backend.arm import registers as r
+from rpython.jit.backend.arm.arch import WORD
+from rpython.jit.backend.arm.test.test_regalloc_mov import BaseMovTest, mi
+
+base_ofs = 23
+class MockCPU(object):
+ def get_baseofs_of_frame_field(self):
+ return base_ofs
+
+
+class TestRegallocPush(BaseMovTest):
+ def setup_method(self, method):
+ BaseMovTest.setup_method(self, method)
+ self.asm.cpu = MockCPU()
+
+ def test_callee_only(self):
+ expected = [
+ mi('ADD_ri', r.ip.value, r.fp.value, base_ofs),
+ mi('STM', r.ip.value, [r.r0.value, r.r1.value,
+ r.r2.value, r.r3.value]),
+ ]
+ self.asm._push_all_regs_to_jitframe(self.asm.mc, ignored_regs=[],
+ withfloats=False, callee_only=True)
+ self.validate(expected)
+
+ def test_callee_only_with_holes(self):
+ expected = [
+ mi('STR_ri', r.r0.value, r.fp.value, cond=c.AL, imm=base_ofs),
+ mi('STR_ri', r.r2.value, r.fp.value, cond=c.AL, imm=base_ofs + 8),
+ ]
+ self.asm._push_all_regs_to_jitframe(self.asm.mc, ignored_regs=[r.r1, r.r3],
+ withfloats=False, callee_only=True)
+ self.validate(expected)
+
+ def test_callee_only_with_holes_in_front(self):
+ expected = [
+ mi('STR_ri', r.r2.value, r.fp.value, cond=c.AL, imm=base_ofs + 8),
+ mi('STR_ri', r.r3.value, r.fp.value, cond=c.AL, imm=base_ofs + 12),
+ ]
+ self.asm._push_all_regs_to_jitframe(self.asm.mc, ignored_regs=[r.r0, r.r1],
+ withfloats=False, callee_only=True)
+ self.validate(expected)
+
+ def test_callee_only_ignore_more_than_saved(self):
+ expected = [
+ mi('STR_ri', r.r0.value, r.fp.value, cond=c.AL, imm=base_ofs),
+ ]
+ self.asm._push_all_regs_to_jitframe(self.asm.mc,
+ ignored_regs=[r.r1, r.r2, r.r3, r.r4, r.r5],
+ withfloats=False, callee_only=True)
+ self.validate(expected)
+
+ def test_with_floats(self):
+ expected = [
+ mi('STR_ri', r.r0.value, r.fp.value, cond=c.AL, imm=base_ofs),
+ mi('ADD_ri', r.ip.value, r.fp.value, imm=base_ofs + len(r.all_regs) * WORD),
+ mi('VSTM', r.ip.value, [v.value for v in r.all_vfp_regs])
+ ]
+ self.asm._push_all_regs_to_jitframe(self.asm.mc,
+ ignored_regs=[r.r1, r.r2, r.r3],
+ withfloats=True, callee_only=True)
+ self.validate(expected)
+
+ def test_try_ignore_vfp_reg(self):
+ py.test.raises(AssertionError, self.asm._push_all_regs_to_jitframe, self.asm.mc,
+ ignored_regs=[r.d0, r.r2, r.r3], withfloats=True, callee_only=True)
+
+ def test_all_regs(self):
+ expected = [
+ mi('ADD_ri', r.ip.value, r.fp.value, base_ofs),
+ mi('STM', r.ip.value, [reg.value for reg in r.all_regs]),
+ ]
+ self.asm._push_all_regs_to_jitframe(self.asm.mc, ignored_regs=[],
+ withfloats=False, callee_only=False)
+ self.validate(expected)
+
+ def test_all_regs_with_holes(self):
+ ignored = [r.r1, r.r6]
+ expected = [mi('STR_ri', reg.value, r.fp.value, cond=c.AL, imm=base_ofs + reg.value * WORD)
+ for reg in r.all_regs if reg not in ignored]
+ self.asm._push_all_regs_to_jitframe(self.asm.mc, ignored_regs=ignored,
+ withfloats=False, callee_only=False)
+ self.validate(expected)
+
+ def test_all_regs_with_holes_in_front(self):
+ ignored = [r.r0, r.r1]
+ expected = [mi('STR_ri', reg.value, r.fp.value, cond=c.AL, imm=base_ofs + reg.value * WORD)
+ for reg in r.all_regs if reg not in ignored]
+ self.asm._push_all_regs_to_jitframe(self.asm.mc, ignored_regs=ignored,
+ withfloats=False, callee_only=False)
+ self.validate(expected)
+
+
+
+class TestRegallocPop(BaseMovTest):
+ def setup_method(self, method):
+ BaseMovTest.setup_method(self, method)
+ self.asm.cpu = MockCPU()
+
+ def test_callee_only(self):
+ expected = [
+ mi('ADD_ri', r.ip.value, r.fp.value, base_ofs),
+ mi('LDM', r.ip.value, [r.r0.value, r.r1.value,
+ r.r2.value, r.r3.value]),
+ ]
+ self.asm._pop_all_regs_from_jitframe(self.asm.mc, ignored_regs=[],
+ withfloats=False, callee_only=True)
+ self.validate(expected)
+
+ def test_callee_only_with_holes(self):
+ expected = [
+ mi('LDR_ri', r.r0.value, r.fp.value, cond=c.AL, imm=base_ofs),
+ mi('LDR_ri', r.r2.value, r.fp.value, cond=c.AL, imm=base_ofs + 8),
+ ]
+ self.asm._pop_all_regs_from_jitframe(self.asm.mc, ignored_regs=[r.r1, r.r3],
+ withfloats=False, callee_only=True)
+ self.validate(expected)
+
+ def test_callee_only_with_holes_in_front(self):
+ expected = [
+ mi('LDR_ri', r.r2.value, r.fp.value, cond=c.AL, imm=base_ofs + 8),
+ mi('LDR_ri', r.r3.value, r.fp.value, cond=c.AL, imm=base_ofs + 12),
+ ]
+ self.asm._pop_all_regs_from_jitframe(self.asm.mc, ignored_regs=[r.r0, r.r1],
+ withfloats=False, callee_only=True)
+ self.validate(expected)
+
+ def test_callee_only_ignore_more_than_saved(self):
+ expected = [
+ mi('LDR_ri', r.r0.value, r.fp.value, cond=c.AL, imm=base_ofs),
+ ]
+ self.asm._pop_all_regs_from_jitframe(self.asm.mc,
+ ignored_regs=[r.r1, r.r2, r.r3, r.r4, r.r5],
+ withfloats=False, callee_only=True)
+ self.validate(expected)
+
+ def test_with_floats(self):
+ expected = [
+ mi('LDR_ri', r.r0.value, r.fp.value, cond=c.AL, imm=base_ofs),
+ mi('ADD_ri', r.ip.value, r.fp.value, imm=base_ofs + len(r.all_regs) * WORD),
+ mi('VLDM', r.ip.value, [v.value for v in r.all_vfp_regs])
+ ]
+ self.asm._pop_all_regs_from_jitframe(self.asm.mc,
+ ignored_regs=[r.r1, r.r2, r.r3],
+ withfloats=True, callee_only=True)
+ self.validate(expected)
+
+ def test_try_ignore_vfp_reg(self):
+ py.test.raises(AssertionError, self.asm._pop_all_regs_from_jitframe, self.asm.mc,
+ ignored_regs=[r.d0, r.r2, r.r3], withfloats=True, callee_only=True)
+
+ def test_all_regs(self):
+ expected = [
+ mi('ADD_ri', r.ip.value, r.fp.value, base_ofs),
+ mi('LDM', r.ip.value, [reg.value for reg in r.all_regs]),
+ ]
+ self.asm._pop_all_regs_from_jitframe(self.asm.mc, ignored_regs=[],
+ withfloats=False, callee_only=False)
+ self.validate(expected)
+
+ def test_all_regs_with_holes(self):
+ ignored = [r.r1, r.r6]
+ expected = [mi('LDR_ri', reg.value, r.fp.value, cond=c.AL, imm=base_ofs + reg.value * WORD)
+ for reg in r.all_regs if reg not in ignored]
+ self.asm._pop_all_regs_from_jitframe(self.asm.mc, ignored_regs=ignored,
+ withfloats=False, callee_only=False)
+ self.validate(expected)
+
+ def test_all_regs_with_holes_in_front(self):
+ ignored = [r.r0, r.r1]
+ expected = [mi('LDR_ri', reg.value, r.fp.value, cond=c.AL, imm=base_ofs + reg.value * WORD)
+ for reg in r.all_regs if reg not in ignored]
+ self.asm._pop_all_regs_from_jitframe(self.asm.mc, ignored_regs=ignored,
+ withfloats=False, callee_only=False)
+ self.validate(expected)
diff --git a/rpython/jit/backend/llgraph/test/test_llgraph.py b/rpython/jit/backend/llgraph/test/test_llgraph.py
--- a/rpython/jit/backend/llgraph/test/test_llgraph.py
+++ b/rpython/jit/backend/llgraph/test/test_llgraph.py
@@ -15,6 +15,9 @@
def test_memoryerror(self):
py.test.skip("does not make much sense on the llgraph backend")
+ def test_call_release_gil_variable_function_and_arguments(self):
+ py.test.skip("the arguments seem not correctly casted")
+
def test_cast_adr_to_int_and_back():
X = lltype.Struct('X', ('foo', lltype.Signed))
diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -108,8 +108,7 @@
self.malloc_slowpath_unicode = None
self._build_stack_check_slowpath()
- if gc_ll_descr.gcrootmap:
- self._build_release_gil(gc_ll_descr.gcrootmap)
+ self._build_release_gil(gc_ll_descr.gcrootmap)
if not self._debug:
# if self._debug is already set it means that someone called
# set_debug by hand before initializing the assembler. Leave it
@@ -348,12 +347,19 @@
if after:
after()
+ @staticmethod
+ def _no_op():
+ pass
+
_NOARG_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
_CLOSESTACK_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP],
lltype.Void))
def _build_release_gil(self, gcrootmap):
- if gcrootmap.is_shadow_stack:
+ if gcrootmap is None:
+ releasegil_func = llhelper(self._NOARG_FUNC, self._no_op)
+ reacqgil_func = llhelper(self._NOARG_FUNC, self._no_op)
+ elif gcrootmap.is_shadow_stack:
releasegil_func = llhelper(self._NOARG_FUNC,
self._release_gil_shadowstack)
reacqgil_func = llhelper(self._NOARG_FUNC,
diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -2532,6 +2532,219 @@
assert rffi.charp2strn(buffer, buflen) == cwd
lltype.free(buffer, flavor='raw')
+ def test_call_release_gil_return_types(self):
+ from rpython.rlib.libffi import types
+ from rpython.rlib.rarithmetic import r_uint, r_longlong, r_ulonglong
+ from rpython.rlib.rarithmetic import r_singlefloat
+ cpu = self.cpu
+
+ for ffitype, result, TP in [
+ (types.ulong, r_uint(sys.maxint + 10), lltype.Unsigned),
+ (types.slong, -4321, lltype.Signed),
+ (types.uint8, 200, rffi.UCHAR),
+ (types.sint8, -42, rffi.SIGNEDCHAR),
+ (types.uint16, 50000, rffi.USHORT),
+ (types.sint16, -20000, rffi.SHORT),
+ (types.uint32, r_uint(3000000000), rffi.UINT),
+ (types.sint32, -2000000000, rffi.INT),
+ (types.uint64, r_ulonglong(9999999999999999999),
+ lltype.UnsignedLongLong),
+ (types.sint64, r_longlong(-999999999999999999),
+ lltype.SignedLongLong),
+ (types.double, 12.3475226, rffi.DOUBLE),
+ (types.float, r_singlefloat(-592.75), rffi.FLOAT),
+ ]:
+ if sys.maxint < 2**32 and TP in (lltype.SignedLongLong,
+ lltype.UnsignedLongLong):
+ if not cpu.supports_longlong:
+ continue
+ if TP == rffi.DOUBLE:
+ if not cpu.supports_floats:
+ continue
+ if TP == rffi.FLOAT:
+ if not cpu.supports_singlefloats:
+ continue
+ #
+ result = rffi.cast(TP, result)
+ #
+ def pseudo_c_function():
+ return result
+ #
+ FPTR = self.Ptr(self.FuncType([], TP))
+ func_ptr = llhelper(FPTR, pseudo_c_function)
+ funcbox = self.get_funcbox(cpu, func_ptr)
+ calldescr = cpu._calldescr_dynamic_for_tests([], ffitype)
+ faildescr = BasicFailDescr(1)
+ kind = types.getkind(ffitype)
+ if kind in 'uis':
+ b3 = BoxInt()
+ elif kind in 'fUI':
+ b3 = BoxFloat()
+ else:
+ assert 0, kind
+ #
+ ops = [
+ ResOperation(rop.CALL_RELEASE_GIL, [funcbox], b3,
+ descr=calldescr),
+ ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+ ResOperation(rop.FINISH, [b3], None, descr=BasicFinalDescr(0))
+ ]
+ ops[1].setfailargs([])
+ looptoken = JitCellToken()
+ self.cpu.compile_loop([], ops, looptoken)
+
+ deadframe = self.cpu.execute_token(looptoken)
+ fail = self.cpu.get_latest_descr(deadframe)
+ assert fail.identifier == 0
+ if isinstance(b3, BoxInt):
+ r = self.cpu.get_int_value(deadframe, 0)
+ if isinstance(result, r_singlefloat):
+ assert -sys.maxint-1 <= r <= 0xFFFFFFFF
+ r, = struct.unpack("f", struct.pack("I", r & 0xFFFFFFFF))
+ result = float(result)
+ else:
+ r = rffi.cast(TP, r)
+ assert r == result
+ elif isinstance(b3, BoxFloat):
+ r = self.cpu.get_float_value(deadframe, 0)
+ if isinstance(result, float):
+ r = longlong.getrealfloat(r)
+ else:
+ r = rffi.cast(TP, r)
+ assert r == result
+
+ def test_call_release_gil_variable_function_and_arguments(self):
+ from rpython.rlib.libffi import types
+ from rpython.rlib.rarithmetic import r_uint, r_longlong, r_ulonglong
+ from rpython.rlib.rarithmetic import r_singlefloat
+
+ cpu = self.cpu
+ rnd = random.Random(525)
+
+ ALL_TYPES = [
+ (types.ulong, lltype.Unsigned),
+ (types.slong, lltype.Signed),
+ (types.uint8, rffi.UCHAR),
+ (types.sint8, rffi.SIGNEDCHAR),
+ (types.uint16, rffi.USHORT),
+ (types.sint16, rffi.SHORT),
+ (types.uint32, rffi.UINT),
+ (types.sint32, rffi.INT),
+ ]
+ if sys.maxint < 2**32 and cpu.supports_longlong:
+ ALL_TYPES += [
+ (types.uint64, lltype.UnsignedLongLong),
+ (types.sint64, lltype.SignedLongLong),
+ ] * 2
+ if cpu.supports_floats:
+ ALL_TYPES += [
+ (types.double, rffi.DOUBLE),
+ ] * 4
+ if cpu.supports_singlefloats:
+ ALL_TYPES += [
+ (types.float, rffi.FLOAT),
+ ] * 4
+
+ for k in range(100):
+ POSSIBLE_TYPES = [rnd.choice(ALL_TYPES)
+ for i in range(random.randrange(2, 5))]
+ load_factor = rnd.random()
+ keepalive_factor = rnd.random()
+ #
+ def pseudo_c_function(*args):
+ seen.append(list(args))
+ #
+ ffitypes = []
+ ARGTYPES = []
+ for i in range(rnd.randrange(4, 20)):
+ ffitype, TP = rnd.choice(POSSIBLE_TYPES)
+ ffitypes.append(ffitype)
+ ARGTYPES.append(TP)
+ #
+ FPTR = self.Ptr(self.FuncType(ARGTYPES, lltype.Void))
+ func_ptr = llhelper(FPTR, pseudo_c_function)
+ funcbox = self.get_funcbox(cpu, func_ptr)
+ calldescr = cpu._calldescr_dynamic_for_tests(ffitypes, types.void)
+ faildescr = BasicFailDescr(1)
+ #
+ argboxes = [BoxInt()] # for the function to call
+ codes = ['X']
+ for ffitype in ffitypes:
+ kind = types.getkind(ffitype)
+ codes.append(kind)
+ if kind in 'uis':
+ b1 = BoxInt()
+ elif kind in 'fUI':
+ b1 = BoxFloat()
+ else:
+ assert 0, kind
+ argboxes.append(b1)
+ codes = ''.join(codes) # useful for pdb
+ print
+ print codes
+ #
+ argvalues = [funcbox.getint()]
+ for TP in ARGTYPES:
+ r = (rnd.random() - 0.5) * 999999999999.9
+ r = rffi.cast(TP, r)
+ argvalues.append(r)
+ #
+ argvalues_normal = argvalues[:1]
+ for ffitype, r in zip(ffitypes, argvalues[1:]):
+ kind = types.getkind(ffitype)
+ if kind in 'ui':
+ r = rffi.cast(lltype.Signed, r)
+ elif kind in 's':
+ r, = struct.unpack("i", struct.pack("f", float(r)))
+ elif kind in 'f':
+ r = longlong.getfloatstorage(r)
+ elif kind in 'UI': # 32-bit only
+ r = rffi.cast(lltype.SignedLongLong, r)
+ else:
+ assert 0
+ argvalues_normal.append(r)
+ #
+ ops = []
+ loadcodes = []
+ insideboxes = []
+ for b1 in argboxes:
+ load = rnd.random() < load_factor
+ loadcodes.append(' ^'[load])
+ if load:
+ b2 = b1.clonebox()
+ ops.insert(rnd.randrange(0, len(ops)+1),
+ ResOperation(rop.SAME_AS, [b1], b2))
+ b1 = b2
+ insideboxes.append(b1)
+ loadcodes = ''.join(loadcodes)
+ print loadcodes
+ ops += [
+ ResOperation(rop.CALL_RELEASE_GIL, insideboxes, None,
+ descr=calldescr),
+ ResOperation(rop.GUARD_NOT_FORCED, [], None, descr=faildescr),
+ ResOperation(rop.FINISH, [], None, descr=BasicFinalDescr(0))
+ ]
+ ops[-2].setfailargs([])
+ # keep alive a random subset of the insideboxes
+ for b1 in insideboxes:
+ if rnd.random() < keepalive_factor:
+ ops.insert(-1, ResOperation(rop.SAME_AS, [b1],
+ b1.clonebox()))
+ looptoken = JitCellToken()
+ self.cpu.compile_loop(argboxes, ops, looptoken)
+ #
+ seen = []
+ deadframe = self.cpu.execute_token(looptoken, *argvalues_normal)
+ fail = self.cpu.get_latest_descr(deadframe)
+ assert fail.identifier == 0
+ expected = argvalues[1:]
+ [got] = seen
+ different_values = ['%r != %r' % (a, b)
+ for a, b in zip(got, expected)
+ if a != b]
+ assert got == expected, ', '.join(different_values)
+
+
def test_guard_not_invalidated(self):
cpu = self.cpu
i0 = BoxInt()
diff --git a/rpython/jit/backend/x86/arch.py b/rpython/jit/backend/x86/arch.py
--- a/rpython/jit/backend/x86/arch.py
+++ b/rpython/jit/backend/x86/arch.py
@@ -40,4 +40,4 @@
PASS_ON_MY_FRAME = 12
JITFRAME_FIXED_SIZE = 28 # 13 GPR + 15 XMM
-assert PASS_ON_MY_FRAME >= 11 # asmgcc needs at least JIT_USE_WORDS + 2
+assert PASS_ON_MY_FRAME >= 12 # asmgcc needs at least JIT_USE_WORDS + 3
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -6,7 +6,7 @@
DEBUG_COUNTER, debug_bridge)
from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
-from rpython.jit.metainterp.history import Const, Box
+from rpython.jit.metainterp.history import Const, Box, VOID
from rpython.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
from rpython.rtyper.lltypesystem import lltype, rffi, rstr, llmemory
from rpython.rtyper.lltypesystem.lloperation import llop
@@ -25,28 +25,17 @@
RegLoc, FrameLoc, ConstFloatLoc, ImmedLoc, AddressLoc, imm,
imm0, imm1, FloatImmedLoc, RawEbpLoc, RawEspLoc)
from rpython.rlib.objectmodel import we_are_translated
-from rpython.jit.backend.x86 import rx86, codebuf
+from rpython.jit.backend.x86 import rx86, codebuf, callbuilder
from rpython.jit.metainterp.resoperation import rop
from rpython.jit.backend.x86 import support
from rpython.rlib.debug import debug_print, debug_start, debug_stop
from rpython.rlib import rgc
-from rpython.rlib.clibffi import FFI_DEFAULT_ABI
-from rpython.jit.backend.x86.jump import remap_frame_layout
from rpython.jit.codewriter.effectinfo import EffectInfo
from rpython.jit.codewriter import longlong
from rpython.rlib.rarithmetic import intmask, r_uint
from rpython.rlib.objectmodel import compute_unique_id
-# darwin requires the stack to be 16 bytes aligned on calls. Same for gcc 4.5.0,
-# better safe than sorry
-CALL_ALIGN = 16 // WORD
-
-
-def align_stack_words(words):
- return (words + CALL_ALIGN - 1) & ~(CALL_ALIGN-1)
-
-
class Assembler386(BaseAssembler):
_regalloc = None
_output_loop_log = None
@@ -131,10 +120,10 @@
mc.MOV_rs(esi.value, WORD*2)
# push first arg
mc.MOV_rr(edi.value, ebp.value)
- align = align_stack_words(1)
+ align = callbuilder.align_stack_words(1)
mc.SUB_ri(esp.value, (align - 1) * WORD)
else:
- align = align_stack_words(3)
+ align = callbuilder.align_stack_words(3)
mc.MOV_rs(eax.value, WORD * 2)
mc.SUB_ri(esp.value, (align - 1) * WORD)
mc.MOV_sr(WORD, eax.value)
@@ -1014,175 +1003,24 @@
gcrootmap = self.cpu.gc_ll_descr.gcrootmap
return bool(gcrootmap) and not gcrootmap.is_shadow_stack
- def _emit_call(self, x, arglocs, start=0, tmp=eax,
- argtypes=None, callconv=FFI_DEFAULT_ABI,
- # whether to worry about a CALL that can collect; this
- # is always true except in call_release_gil
- can_collect=True,
- # max number of arguments we can pass on esp; if more,
- # we need to decrease esp temporarily
- stack_max=PASS_ON_MY_FRAME):
- #
- if IS_X86_64:
- return self._emit_call_64(x, arglocs, start, argtypes,
- can_collect, stack_max)
- stack_depth = 0
- n = len(arglocs)
- for i in range(start, n):
- loc = arglocs[i]
- stack_depth += loc.get_width() // WORD
- if stack_depth > stack_max:
- align = align_stack_words(stack_depth - stack_max)
- self.mc.SUB_ri(esp.value, align * WORD)
- if can_collect:
- self.set_extra_stack_depth(self.mc, align * WORD)
+ def simple_call(self, fnloc, arglocs, result_loc=eax):
+ if result_loc is xmm0:
+ result_type = FLOAT
+ result_size = 8
+ elif result_loc is None:
+ result_type = VOID
+ result_size = 0
else:
- align = 0
- p = 0
- for i in range(start, n):
- loc = arglocs[i]
- if isinstance(loc, RegLoc):
- if loc.is_xmm:
- self.mc.MOVSD_sx(p, loc.value)
- else:
- self.mc.MOV_sr(p, loc.value)
- p += loc.get_width()
- p = 0
- for i in range(start, n):
- loc = arglocs[i]
- if not isinstance(loc, RegLoc):
- if loc.get_width() == 8:
- self.mc.MOVSD(xmm0, loc)
- self.mc.MOVSD_sx(p, xmm0.value)
- else:
- self.mc.MOV(tmp, loc)
- self.mc.MOV_sr(p, tmp.value)
- p += loc.get_width()
- # x is a location
- if can_collect:
- # we push *now* the gcmap, describing the status of GC registers
- # after the rearrangements done just above, ignoring the return
- # value eax, if necessary
- noregs = self.cpu.gc_ll_descr.is_shadow_stack()
- gcmap = self._regalloc.get_gcmap([eax], noregs=noregs)
- self.push_gcmap(self.mc, gcmap, store=True)
- #
- self.mc.CALL(x)
- if callconv != FFI_DEFAULT_ABI:
- self._fix_stdcall(callconv, p - align * WORD)
- elif align:
- self.mc.ADD_ri(esp.value, align * WORD)
- #
- if can_collect:
- self._reload_frame_if_necessary(self.mc)
- if align:
- self.set_extra_stack_depth(self.mc, 0)
- self.pop_gcmap(self.mc)
+ result_type = INT
+ result_size = WORD
+ cb = callbuilder.CallBuilder(self, fnloc, arglocs,
+ result_loc, result_type,
+ result_size)
+ cb.emit()
- def _fix_stdcall(self, callconv, p):
- from rpython.rlib.clibffi import FFI_STDCALL
- assert callconv == FFI_STDCALL
- # it's a bit stupid, but we're just going to cancel the fact that
- # the called function just added 'p' to ESP, by subtracting it again.
- self.mc.SUB_ri(esp.value, p)
-
- def _emit_call_64(self, x, arglocs, start, argtypes,
- can_collect, stack_max):
- src_locs = []
- dst_locs = []
- xmm_src_locs = []
- xmm_dst_locs = []
- singlefloats = None
-
- # In reverse order for use with pop()
- unused_gpr = [r9, r8, ecx, edx, esi, edi]
- unused_xmm = [xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0]
-
- on_stack = 0
- # count the stack depth
- floats = 0
- for i in range(start, len(arglocs)):
- arg = arglocs[i]
- if arg.is_float() or argtypes and argtypes[i - start] == 'S':
- floats += 1
- all_args = len(arglocs) - start
- stack_depth = (max(all_args - floats - len(unused_gpr), 0) +
- max(floats - len(unused_xmm), 0))
- align = 0
- if stack_depth > stack_max:
- align = align_stack_words(stack_depth - stack_max)
- if can_collect:
- self.set_extra_stack_depth(self.mc, align * WORD)
- self.mc.SUB_ri(esp.value, align * WORD)
- for i in range(start, len(arglocs)):
- loc = arglocs[i]
- if loc.is_float():
- xmm_src_locs.append(loc)
- if len(unused_xmm) > 0:
- xmm_dst_locs.append(unused_xmm.pop())
- else:
- xmm_dst_locs.append(RawEspLoc(on_stack * WORD, FLOAT))
- on_stack += 1
- elif argtypes is not None and argtypes[i-start] == 'S':
- # Singlefloat argument
- if singlefloats is None:
- singlefloats = []
- if len(unused_xmm) > 0:
- singlefloats.append((loc, unused_xmm.pop()))
- else:
- singlefloats.append((loc, RawEspLoc(on_stack * WORD, INT)))
- on_stack += 1
- else:
- src_locs.append(loc)
- if len(unused_gpr) > 0:
- dst_locs.append(unused_gpr.pop())
- else:
- dst_locs.append(RawEspLoc(on_stack * WORD, INT))
- on_stack += 1
-
- # Handle register arguments: first remap the xmm arguments
- remap_frame_layout(self, xmm_src_locs, xmm_dst_locs,
- X86_64_XMM_SCRATCH_REG)
- # Load the singlefloat arguments from main regs or stack to xmm regs
- if singlefloats is not None:
- for src, dst in singlefloats:
- if isinstance(dst, RawEspLoc):
- # XXX too much special logic
- if isinstance(src, RawEbpLoc):
- self.mc.MOV32(X86_64_SCRATCH_REG, src)
- self.mc.MOV32(dst, X86_64_SCRATCH_REG)
- else:
- self.mc.MOV32(dst, src)
- continue
- if isinstance(src, ImmedLoc):
- self.mc.MOV(X86_64_SCRATCH_REG, src)
- src = X86_64_SCRATCH_REG
- self.mc.MOVD(dst, src)
- # Finally remap the arguments in the main regs
- # If x is a register and is in dst_locs, then oups, it needs to
- # be moved away:
- if x in dst_locs:
- src_locs.append(x)
- dst_locs.append(r10)
- x = r10
- remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
- if can_collect:
- # we push *now* the gcmap, describing the status of GC registers
- # after the rearrangements done just above, ignoring the return
- # value eax, if necessary
- noregs = self.cpu.gc_ll_descr.is_shadow_stack()
- gcmap = self._regalloc.get_gcmap([eax], noregs=noregs)
- self.push_gcmap(self.mc, gcmap, store=True)
- #
- self.mc.CALL(x)
- if align:
- self.mc.ADD_ri(esp.value, align * WORD)
- #
- if can_collect:
- self._reload_frame_if_necessary(self.mc)
- if align:
- self.set_extra_stack_depth(self.mc, 0)
- self.pop_gcmap(self.mc)
+ def simple_call_no_collect(self, fnloc, arglocs):
+ cb = callbuilder.CallBuilder(self, fnloc, arglocs)
+ cb.emit_no_collect()
def _reload_frame_if_necessary(self, mc, align_stack=False):
gcrootmap = self.cpu.gc_ll_descr.gcrootmap
@@ -1198,10 +1036,6 @@
self._write_barrier_fastpath(mc, wbdescr, [ebp], array=False,
is_frame=True, align_stack=align_stack)
- def call(self, addr, args, res):
- self._emit_call(imm(addr), args)
- assert res is eax
-
genop_int_neg = _unaryop("NEG")
genop_int_invert = _unaryop("NOT")
genop_int_add = _binaryop_or_lea("ADD", True)
@@ -1446,7 +1280,7 @@
# ----------
def genop_call_malloc_gc(self, op, arglocs, result_loc):
- self.genop_call(op, arglocs, result_loc)
+ self._genop_call(op, arglocs, result_loc)
self.propagate_memoryerror_if_eax_is_null()
def propagate_memoryerror_if_eax_is_null(self):
@@ -1993,75 +1827,29 @@
self.pending_guard_tokens.append(guard_token)
def genop_call(self, op, arglocs, resloc):
- return self._genop_call(op, arglocs, resloc)
+ self._genop_call(op, arglocs, resloc)
def _genop_call(self, op, arglocs, resloc, is_call_release_gil=False):
from rpython.jit.backend.llsupport.descr import CallDescr
- sizeloc = arglocs[0]
- assert isinstance(sizeloc, ImmedLoc)
- size = sizeloc.value
- signloc = arglocs[1]
-
- x = arglocs[2] # the function address
- if x is eax:
- tmp = ecx
- else:
- tmp = eax
+ cb = callbuilder.CallBuilder(self, arglocs[2], arglocs[3:], resloc)
descr = op.getdescr()
assert isinstance(descr, CallDescr)
+ cb.callconv = descr.get_call_conv()
+ cb.argtypes = descr.get_arg_types()
+ cb.restype = descr.get_result_type()
+ sizeloc = arglocs[0]
+ assert isinstance(sizeloc, ImmedLoc)
+ cb.ressize = sizeloc.value
+ signloc = arglocs[1]
+ assert isinstance(signloc, ImmedLoc)
+ cb.ressign = signloc.value
- stack_max = PASS_ON_MY_FRAME
if is_call_release_gil:
- if self._is_asmgcc():
- from rpython.memory.gctransform import asmgcroot
- stack_max -= asmgcroot.JIT_USE_WORDS
- can_collect = False
+ cb.emit_call_release_gil()
else:
- can_collect = True
-
- self._emit_call(x, arglocs, 3, tmp=tmp,
- argtypes=descr.get_arg_types(),
- callconv=descr.get_call_conv(),
- can_collect=can_collect,
- stack_max=stack_max)
-
- if IS_X86_32 and isinstance(resloc, FrameLoc) and resloc.type == FLOAT:
- # a float or a long long return
- if descr.get_result_type() == 'L':
- self.mc.MOV_br(resloc.value, eax.value) # long long
- self.mc.MOV_br(resloc.value + 4, edx.value)
- # XXX should ideally not move the result on the stack,
- # but it's a mess to load eax/edx into a xmm register
- # and this way is simpler also because the result loc
- # can just be always a stack location
- else:
- self.mc.FSTPL_b(resloc.value) # float return
- elif descr.get_result_type() == 'S':
- # singlefloat return
- assert resloc is eax
- if IS_X86_32:
- # must convert ST(0) to a 32-bit singlefloat and load it into EAX
- # mess mess mess
- self.mc.SUB_ri(esp.value, 4)
- self.mc.FSTPS_s(0)
- self.mc.POP_r(eax.value)
- elif IS_X86_64:
- # must copy from the lower 32 bits of XMM0 into eax
- self.mc.MOVD_rx(eax.value, xmm0.value)
- elif size == WORD:
- assert resloc is eax or resloc is xmm0 # a full word
- elif size == 0:
- pass # void return
- else:
- # use the code in load_from_mem to do the zero- or sign-extension
- assert resloc is eax
- if size == 1:
- srcloc = eax.lowest8bits()
- else:
- srcloc = eax
- self.load_from_mem(eax, srcloc, sizeloc, signloc)
+ cb.emit()
def _store_force_index(self, guard_op):
faildescr = guard_op.getdescr()
@@ -2077,64 +1865,15 @@
def genop_guard_call_may_force(self, op, guard_op, guard_token,
arglocs, result_loc):
self._store_force_index(guard_op)
- self.genop_call(op, arglocs, result_loc)
+ self._genop_call(op, arglocs, result_loc)
self._emit_guard_not_forced(guard_token)
def genop_guard_call_release_gil(self, op, guard_op, guard_token,
arglocs, result_loc):
self._store_force_index(guard_op)
- # first, close the stack in the sense of the asmgcc GC root tracker
- gcrootmap = self.cpu.gc_ll_descr.gcrootmap
- if gcrootmap:
- # we put the gcmap now into the frame before releasing the GIL,
- # and pop it below after reacquiring the GIL. The assumption
- # is that this gcmap describes correctly the situation at any
- # point in-between: all values containing GC pointers should
- # be safely saved out of registers by now, and will not be
- # manipulated by any of the following CALLs.
- gcmap = self._regalloc.get_gcmap(noregs=True)
- self.push_gcmap(self.mc, gcmap, store=True)
- self.call_release_gil(gcrootmap, arglocs)
- # do the call
self._genop_call(op, arglocs, result_loc, is_call_release_gil=True)
- # then reopen the stack
- if gcrootmap:
- self.call_reacquire_gil(gcrootmap, result_loc)
- self.pop_gcmap(self.mc) # remove the gcmap saved above
- # finally, the guard_not_forced
self._emit_guard_not_forced(guard_token)
- def call_release_gil(self, gcrootmap, save_registers):
- if gcrootmap.is_shadow_stack:
- args = []
- else:
- from rpython.memory.gctransform import asmgcroot
- # build a 'css' structure on the stack: 2 words for the linkage,
- # and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a
- # total size of JIT_USE_WORDS. This structure is found at
- # [ESP+css].
- css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
- assert css >= 2
- # Save ebp
- index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
- self.mc.MOV_sr(index_of_ebp, ebp.value) # MOV [css.ebp], EBP
- # Save the "return address": we pretend that it's css
- if IS_X86_32:
- reg = eax
- elif IS_X86_64:
- reg = edi
- self.mc.LEA_rs(reg.value, css) # LEA reg, [css]
- frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
- self.mc.MOV_sr(frame_ptr, reg.value) # MOV [css.frame], reg
- # Set up jf_extra_stack_depth to pretend that the return address
- # was at css, and so our stack frame is supposedly shorter by
- # (css+WORD) bytes
- self.set_extra_stack_depth(self.mc, -css-WORD)
- # Call the closestack() function (also releasing the GIL)
- args = [reg]
- #
- self._emit_call(imm(self.releasegil_addr), args, can_collect=False)
-
def call_reacquire_gil(self, gcrootmap, save_loc):
# save the previous result (eax/xmm0) into the stack temporarily.
# XXX like with call_release_gil(), we assume that we don't need
@@ -2186,11 +1925,11 @@
self.call_assembler(op, guard_op, argloc, vloc, result_loc, eax)
self._emit_guard_not_forced(guard_token)
- def _call_assembler_emit_call(self, addr, argloc, tmploc):
- self._emit_call(addr, [argloc], 0, tmp=tmploc)
+ def _call_assembler_emit_call(self, addr, argloc, _):
+ self.simple_call(addr, [argloc])
- def _call_assembler_emit_helper_call(self, addr, arglocs, _):
- self._emit_call(addr, arglocs, 0, tmp=self._second_tmp_reg)
+ def _call_assembler_emit_helper_call(self, addr, arglocs, result_loc):
+ self.simple_call(addr, arglocs, result_loc)
def _call_assembler_check_descr(self, value, tmploc):
ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
diff --git a/rpython/jit/backend/x86/callbuilder.py b/rpython/jit/backend/x86/callbuilder.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/x86/callbuilder.py
@@ -0,0 +1,577 @@
+from rpython.rlib.clibffi import FFI_DEFAULT_ABI
+from rpython.rlib.objectmodel import we_are_translated
+from rpython.jit.metainterp.history import INT, FLOAT
+from rpython.jit.backend.x86.arch import (WORD, IS_X86_64, IS_X86_32,
+ PASS_ON_MY_FRAME)
+from rpython.jit.backend.x86.regloc import (eax, ecx, edx, ebx, esp, ebp, esi,
+ xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, r8, r9, r10, r11, edi,
+ r12, r13, r14, r15, X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG,
+ RegLoc, RawEspLoc, RawEbpLoc, imm, ImmedLoc)
+from rpython.jit.backend.x86.jump import remap_frame_layout
+
+
+# darwin requires the stack to be 16 bytes aligned on calls.
+# Same for gcc 4.5.0, better safe than sorry
+CALL_ALIGN = 16 // WORD
+
+def align_stack_words(words):
+ return (words + CALL_ALIGN - 1) & ~(CALL_ALIGN-1)
+
+
+
+class AbstractCallBuilder(object):
+
+ # max number of words we have room in esp; if we need more for
+ # arguments, we need to decrease esp temporarily
+ stack_max = PASS_ON_MY_FRAME
+
+ # this can be set to guide more complex calls: gives the detailed
+ # type of the arguments
+ argtypes = ""
+ ressign = False
+
+ # this is the calling convention (can be FFI_STDCALL on Windows)
+ callconv = FFI_DEFAULT_ABI
+
+ # is it for the main CALL of a call_release_gil?
+ is_call_release_gil = False
+
+ # set by save_result_value()
+ tmpresloc = None
+
+
+ def __init__(self, assembler, fnloc, arglocs,
+ resloc=eax, restype=INT, ressize=WORD):
+ # Avoid tons of issues with a non-immediate fnloc by sticking it
+ # as an extra argument if needed
+ self.fnloc_is_immediate = isinstance(fnloc, ImmedLoc)
+ if self.fnloc_is_immediate:
+ self.fnloc = fnloc
+ self.arglocs = arglocs
+ else:
+ self.arglocs = arglocs + [fnloc]
+ self.asm = assembler
+ self.mc = assembler.mc
+ self.resloc = resloc
+ self.restype = restype
+ self.ressize = ressize
+ self.current_esp = 0 # 0 or (usually) negative, counted in bytes
+
+ def emit_no_collect(self):
+ """Emit a call that cannot collect."""
+ self.prepare_arguments()
+ self.emit_raw_call()
+ self.restore_esp()
+ self.load_result()
+
+ def emit(self):
+ """Emit a regular call; not for CALL_RELEASE_GIL."""
+ self.prepare_arguments()
+ self.push_gcmap()
+ self.emit_raw_call()
+ self.restore_esp()
+ self.pop_gcmap()
+ self.load_result()
+
+ def emit_call_release_gil(self):
+ """Emit a CALL_RELEASE_GIL, including calls to releasegil_addr
+ and reacqgil_addr."""
+ self.select_call_release_gil_mode()
+ self.prepare_arguments()
+ self.push_gcmap_for_call_release_gil()
+ self.call_releasegil_addr_and_move_real_arguments()
+ self.emit_raw_call()
+ self.restore_esp()
+ self.move_real_result_and_call_reacqgil_addr()
+ self.pop_gcmap()
+ self.load_result()
+
+ def select_call_release_gil_mode(self):
+ """Overridden in CallBuilder64"""
+ self.is_call_release_gil = True
+ if self.asm._is_asmgcc():
+ from rpython.memory.gctransform import asmgcroot
+ self.stack_max = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS
+ assert self.stack_max >= 3
+
+ def emit_raw_call(self):
+ self.mc.CALL(self.fnloc)
+ if self.callconv != FFI_DEFAULT_ABI:
+ self.current_esp += self._fix_stdcall(self.callconv)
+
+ def subtract_esp_aligned(self, count):
+ if count > 0:
+ align = align_stack_words(count)
+ self.current_esp -= align * WORD
+ self.mc.SUB_ri(esp.value, align * WORD)
+
+ def restore_esp(self, target_esp=0):
+ if self.current_esp != target_esp:
+ self.mc.ADD_ri(esp.value, target_esp - self.current_esp)
+ self.current_esp = target_esp
+
+ def load_result(self):
+ """Overridden in CallBuilder32 and CallBuilder64"""
+ if self.ressize == 0:
+ return # void result
+ # use the code in load_from_mem to do the zero- or sign-extension
+ srcloc = self.tmpresloc
+ if srcloc is None:
+ if self.restype == FLOAT:
+ srcloc = xmm0
+ else:
+ srcloc = eax
+ if self.ressize >= WORD and self.resloc is srcloc:
+ return # no need for any MOV
+ if self.ressize == 1 and isinstance(srcloc, RegLoc):
+ srcloc = srcloc.lowest8bits()
+ self.asm.load_from_mem(self.resloc, srcloc,
+ imm(self.ressize), imm(self.ressign))
+
+ def push_gcmap(self):
+ # we push *now* the gcmap, describing the status of GC registers
+ # after the rearrangements done just before, ignoring the return
+ # value eax, if necessary
+ assert not self.is_call_release_gil
+ self.change_extra_stack_depth = (self.current_esp != 0)
+ if self.change_extra_stack_depth:
+ self.asm.set_extra_stack_depth(self.mc, -self.current_esp)
+ noregs = self.asm.cpu.gc_ll_descr.is_shadow_stack()
+ gcmap = self.asm._regalloc.get_gcmap([eax], noregs=noregs)
+ self.asm.push_gcmap(self.mc, gcmap, store=True)
+
+ def push_gcmap_for_call_release_gil(self):
+ assert self.is_call_release_gil
+ # we put the gcmap now into the frame before releasing the GIL,
+ # and pop it after reacquiring the GIL. The assumption
+ # is that this gcmap describes correctly the situation at any
+ # point in-between: all values containing GC pointers should
+ # be safely saved out of registers by now, and will not be
+ # manipulated by any of the following CALLs.
+ gcmap = self.asm._regalloc.get_gcmap(noregs=True)
+ self.asm.push_gcmap(self.mc, gcmap, store=True)
+
+ def pop_gcmap(self):
+ self.asm._reload_frame_if_necessary(self.mc)
+ if self.change_extra_stack_depth:
+ self.asm.set_extra_stack_depth(self.mc, 0)
+ self.asm.pop_gcmap(self.mc)
+
+ def call_releasegil_addr_and_move_real_arguments(self):
+ initial_esp = self.current_esp
+ self.save_register_arguments()
+ #
+ if not self.asm._is_asmgcc():
+ # the helper takes no argument
+ self.change_extra_stack_depth = False
+ else:
+ from rpython.memory.gctransform import asmgcroot
+ # build a 'css' structure on the stack: 2 words for the linkage,
+ # and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a
+ # total size of JIT_USE_WORDS. This structure is found at
+ # [ESP+css].
+ css = -self.current_esp + (
+ WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS))
+ assert css >= 2 * WORD
+ # Save ebp
+ index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
+ self.mc.MOV_sr(index_of_ebp, ebp.value) # MOV [css.ebp], EBP
+ # Save the "return address": we pretend that it's css
+ if IS_X86_32:
+ reg = eax
+ elif IS_X86_64:
+ reg = edi
+ self.mc.LEA_rs(reg.value, css) # LEA reg, [css]
+ frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
+ self.mc.MOV_sr(frame_ptr, reg.value) # MOV [css.frame], reg
+ # Set up jf_extra_stack_depth to pretend that the return address
+ # was at css, and so our stack frame is supposedly shorter by
+ # (PASS_ON_MY_FRAME-JIT_USE_WORDS+1) words
+ delta = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS + 1
+ self.change_extra_stack_depth = True
+ self.asm.set_extra_stack_depth(self.mc, -delta * WORD)
+ # Call the closestack() function (also releasing the GIL)
+ # with 'reg' as argument
+ if IS_X86_32:
+ self.subtract_esp_aligned(1)
+ self.mc.MOV_sr(0, reg.value)
+ #else:
+ # on x86_64, reg is edi so that it is already correct
+ #
+ self.mc.CALL(imm(self.asm.releasegil_addr))
+ #
+ if not we_are_translated(): # for testing: we should not access
+ self.mc.ADD(ebp, imm(1)) # ebp any more
+ #
+ self.restore_register_arguments()
+ self.restore_esp(initial_esp)
+
+ def save_register_arguments(self):
+ """Overridden in CallBuilder64"""
+
+ def restore_register_arguments(self):
+ """Overridden in CallBuilder64"""
+
+ def move_real_result_and_call_reacqgil_addr(self):
+ # save the result we just got (in eax/eax+edx/st(0)/xmm0)
+ self.save_result_value()
+ # call the reopenstack() function (also reacquiring the GIL)
+ if not self.asm._is_asmgcc():
+ css = 0 # the helper takes no argument
+ else:
+ from rpython.memory.gctransform import asmgcroot
+ css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
+ if IS_X86_32:
+ reg = eax
+ elif IS_X86_64:
+ reg = edi
+ self.mc.LEA_rs(reg.value, css)
+ if IS_X86_32:
+ self.mc.MOV_sr(0, reg.value)
+ #
+ self.mc.CALL(imm(self.asm.reacqgil_addr))
+ #
+ if not we_are_translated(): # for testing: now we can accesss
+ self.mc.SUB(ebp, imm(1)) # ebp again
+ #
+ # Now that we required the GIL, we can reload a possibly modified ebp
+ if self.asm._is_asmgcc():
+ # special-case: reload ebp from the css
+ from rpython.memory.gctransform import asmgcroot
+ index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
+ self.mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp]
+ #else:
+ # for shadowstack, done for us by _reload_frame_if_necessary()
+
+ def save_result_value(self):
+ """Overridden in CallBuilder32 and CallBuilder64"""
+ raise NotImplementedError
+
+
+class CallBuilder32(AbstractCallBuilder):
+
+ def prepare_arguments(self):
+ arglocs = self.arglocs
+ stack_depth = 0
+ n = len(arglocs)
+ for i in range(n):
+ loc = arglocs[i]
+ stack_depth += loc.get_width() // WORD
+ self.subtract_esp_aligned(stack_depth - self.stack_max)
+ #
+ p = 0
+ for i in range(n):
+ loc = arglocs[i]
+ if isinstance(loc, RegLoc):
+ if loc.is_xmm:
+ self.mc.MOVSD_sx(p, loc.value)
+ else:
+ self.mc.MOV_sr(p, loc.value)
+ p += loc.get_width()
+ p = 0
+ for i in range(n):
+ loc = arglocs[i]
+ if not isinstance(loc, RegLoc):
+ if loc.get_width() == 8:
+ self.mc.MOVSD(xmm0, loc)
+ self.mc.MOVSD_sx(p, xmm0.value)
+ elif isinstance(loc, ImmedLoc):
+ self.mc.MOV_si(p, loc.value)
+ else:
+ self.mc.MOV(eax, loc)
+ self.mc.MOV_sr(p, eax.value)
+ p += loc.get_width()
+ self.total_stack_used_by_arguments = p
+ #
+ if not self.fnloc_is_immediate: # the last "argument" pushed above
+ self.fnloc = RawEspLoc(p - WORD, INT)
+
+
+ def _fix_stdcall(self, callconv):
+ from rpython.rlib.clibffi import FFI_STDCALL
+ assert callconv == FFI_STDCALL
+ return self.total_stack_used_by_arguments
+
+ def load_result(self):
+ resloc = self.resloc
+ if resloc is not None and resloc.is_float():
+ # a float or a long long return
+ if self.tmpresloc is None:
+ if self.restype == 'L': # long long
+ # move eax/edx -> xmm0
+ self.mc.MOVD_xr(resloc.value^1, edx.value)
+ self.mc.MOVD_xr(resloc.value, eax.value)
+ self.mc.PUNPCKLDQ_xx(resloc.value, resloc.value^1)
+ else:
+ # float: we have to go via the stack
+ self.mc.FSTPL_s(0)
+ self.mc.MOVSD_xs(resloc.value, 0)
+ else:
+ self.mc.MOVSD(resloc, self.tmpresloc)
+ #
+ elif self.restype == 'S':
+ # singlefloat return: must convert ST(0) to a 32-bit singlefloat
+ # and load it into self.resloc. mess mess mess
+ if self.tmpresloc is None:
+ self.mc.FSTPS_s(0)
+ self.mc.MOV_rs(resloc.value, 0)
+ else:
+ self.mc.MOV(resloc, self.tmpresloc)
+ else:
+ AbstractCallBuilder.load_result(self)
+
+ def save_result_value(self):
+ # Temporarily save the result value into [ESP+4]. We use "+4"
+ # in order to leave the word at [ESP+0] free, in case it's needed
+ if self.ressize == 0: # void return
+ return
+ if self.resloc.is_float():
+ # a float or a long long return
+ self.tmpresloc = RawEspLoc(4, FLOAT)
+ if self.restype == 'L':
+ self.mc.MOV_sr(4, eax.value) # long long
+ self.mc.MOV_sr(8, edx.value)
+ else:
+ self.mc.FSTPL_s(4) # float return
+ else:
+ self.tmpresloc = RawEspLoc(4, INT)
+ if self.restype == 'S':
+ self.mc.FSTPS_s(4)
+ else:
+ assert self.restype == INT
+ assert self.ressize <= WORD
+ self.mc.MOV_sr(4, eax.value)
+
+
+class CallBuilder64(AbstractCallBuilder):
+
+ ARGUMENTS_GPR = [edi, esi, edx, ecx, r8, r9]
+ ARGUMENTS_XMM = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7]
+ DONT_MOVE_GPR = []
+ _ALL_CALLEE_SAVE_GPR = [ebx, r12, r13, r14, r15]
+
+ next_arg_gpr = 0
+ next_arg_xmm = 0
+
+ def _unused_gpr(self, hint):
+ i = self.next_arg_gpr
+ self.next_arg_gpr = i + 1
+ try:
+ res = self.ARGUMENTS_GPR[i]
+ except IndexError:
+ return None
+ if hint in self.DONT_MOVE_GPR:
+ self.ARGUMENTS_GPR[i] = hint
+ res = hint
+ return res
+
+ def _unused_xmm(self):
+ i = self.next_arg_xmm
+ self.next_arg_xmm = i + 1
+ try:
+ return self.ARGUMENTS_XMM[i]
+ except IndexError:
+ return None
+
+ def _permute_to_prefer_unused_registers(self, lst):
+ # permute 'lst' so that it starts with registers that are not
+ # in 'self.already_used', and ends with registers that are.
+ N = len(lst)
+ i = 0
+ while i < N:
+ reg = lst[i]
+ if reg in self.already_used:
+ # move this reg to the end, and decrement N
+ N -= 1
+ assert N >= i
+ lst[N], lst[i] = lst[i], lst[N]
+ else:
+ i += 1
+
+ def select_call_release_gil_mode(self):
+ AbstractCallBuilder.select_call_release_gil_mode(self)
+ # We have to copy the arguments around a bit more in this mode,
+ # but on the other hand we don't need prepare_arguments() moving
+ # them in precisely the final registers. Here we look around for
+ # unused registers that may be more likely usable.
+ from rpython.jit.backend.x86.regalloc import X86_64_RegisterManager
+ from rpython.jit.backend.x86.regalloc import X86_64_XMMRegisterManager
+ self.already_used = {}
+ for loc in self.arglocs:
+ self.already_used[loc] = None
+ #
+ lst = X86_64_RegisterManager.save_around_call_regs[:]
+ self._permute_to_prefer_unused_registers(lst)
+ # <optimization>
+ extra = []
+ for reg in self.asm._regalloc.rm.free_regs:
+ if (reg not in self.already_used and
+ reg in self._ALL_CALLEE_SAVE_GPR):
+ extra.append(reg)
+ self.free_callee_save_gprs = extra
+ lst = extra + lst
+ # </optimization>
+ self.ARGUMENTS_GPR = lst[:len(self.ARGUMENTS_GPR)]
+ self.DONT_MOVE_GPR = self._ALL_CALLEE_SAVE_GPR
+ #
+ lst = X86_64_XMMRegisterManager.save_around_call_regs[:]
+ self._permute_to_prefer_unused_registers(lst)
+ self.ARGUMENTS_XMM = lst[:len(self.ARGUMENTS_XMM)]
+
+ def prepare_arguments(self):
+ src_locs = []
+ dst_locs = []
+ xmm_src_locs = []
+ xmm_dst_locs = []
+ singlefloats = None
+
+ arglocs = self.arglocs
+ argtypes = self.argtypes
+
+ on_stack = 0
+ for i in range(len(arglocs)):
+ loc = arglocs[i]
+ if loc.is_float():
+ tgt = self._unused_xmm()
+ if tgt is None:
+ tgt = RawEspLoc(on_stack * WORD, FLOAT)
+ on_stack += 1
+ xmm_src_locs.append(loc)
+ xmm_dst_locs.append(tgt)
+ elif i < len(argtypes) and argtypes[i] == 'S':
+ # Singlefloat argument
+ if singlefloats is None:
+ singlefloats = []
+ tgt = self._unused_xmm()
+ if tgt is None:
+ tgt = RawEspLoc(on_stack * WORD, INT)
+ on_stack += 1
+ singlefloats.append((loc, tgt))
+ else:
+ tgt = self._unused_gpr(hint=loc)
+ if tgt is None:
+ tgt = RawEspLoc(on_stack * WORD, INT)
+ on_stack += 1
+ src_locs.append(loc)
+ dst_locs.append(tgt)
+
+ if not self.fnloc_is_immediate:
+ self.fnloc = dst_locs[-1] # the last "argument" prepared above
+
+ if not we_are_translated(): # assert that we got the right stack depth
+ floats = 0
+ for i in range(len(arglocs)):
+ arg = arglocs[i]
+ if arg.is_float() or (i < len(argtypes) and argtypes[i]=='S'):
+ floats += 1
+ all_args = len(arglocs)
+ stack_depth = (max(all_args - floats - len(self.ARGUMENTS_GPR), 0)
+ + max(floats - len(self.ARGUMENTS_XMM), 0))
+ assert stack_depth == on_stack
+
+ self.subtract_esp_aligned(on_stack - self.stack_max)
+
+ # Handle register arguments: first remap the xmm arguments
+ remap_frame_layout(self.asm, xmm_src_locs, xmm_dst_locs,
+ X86_64_XMM_SCRATCH_REG)
+ # Load the singlefloat arguments from main regs or stack to xmm regs
+ if singlefloats is not None:
+ for src, dst in singlefloats:
+ if isinstance(dst, RawEspLoc):
+ # XXX too much special logic
+ if isinstance(src, RawEbpLoc):
+ self.mc.MOV32(X86_64_SCRATCH_REG, src)
+ self.mc.MOV32(dst, X86_64_SCRATCH_REG)
+ else:
+ self.mc.MOV32(dst, src)
+ continue
+ if isinstance(src, ImmedLoc):
+ self.mc.MOV(X86_64_SCRATCH_REG, src)
+ src = X86_64_SCRATCH_REG
+ self.mc.MOVD(dst, src)
+ # Finally remap the arguments in the main regs
+ remap_frame_layout(self.asm, src_locs, dst_locs, X86_64_SCRATCH_REG)
+
+
+ def _fix_stdcall(self, callconv):
+ assert 0 # should not occur on 64-bit
+
+ def load_result(self):
+ if self.restype == 'S' and self.tmpresloc is None:
+ # singlefloat return: use MOVD to load the target register
+ # from the lower 32 bits of XMM0
+ self.mc.MOVD(self.resloc, xmm0)
+ else:
+ AbstractCallBuilder.load_result(self)
+
+ def save_result_value(self):
+ # Temporarily save the result value into [ESP].
+ if self.ressize == 0: # void return
+ return
+ #
+ if self.restype == FLOAT: # and not 'S'
+ self.mc.MOVSD_sx(0, xmm0.value)
+ self.tmpresloc = RawEspLoc(0, FLOAT)
+ return
+ #
+ if len(self.free_callee_save_gprs) == 0:
+ self.tmpresloc = RawEspLoc(0, INT)
+ else:
+ self.tmpresloc = self.free_callee_save_gprs[0]
+ #
+ if self.restype == 'S':
+ # singlefloat return: use MOVD to store the lower 32 bits
+ # of XMM0 into the tmpresloc (register or [ESP])
+ self.mc.MOVD(self.tmpresloc, xmm0)
+ else:
+ assert self.restype == INT
+ self.mc.MOV(self.tmpresloc, eax)
+
+ def save_register_arguments(self):
+ # Save the argument registers, which are given by self.ARGUMENTS_xxx.
+ n_gpr = min(self.next_arg_gpr, len(self.ARGUMENTS_GPR))
+ n_xmm = min(self.next_arg_xmm, len(self.ARGUMENTS_XMM))
+ n_saved_regs = n_gpr + n_xmm
+ for i in range(n_gpr):
+ if self.ARGUMENTS_GPR[i] in self._ALL_CALLEE_SAVE_GPR:
+ n_saved_regs -= 1 # don't need to save it
+ self.subtract_esp_aligned(n_saved_regs)
+ #
+ n = 0
+ for i in range(n_gpr):
+ if self.ARGUMENTS_GPR[i] not in self._ALL_CALLEE_SAVE_GPR:
+ self.mc.MOV_sr(n * WORD, self.ARGUMENTS_GPR[i].value)
+ n += 1
+ for i in range(n_xmm):
+ self.mc.MOVSD_sx(n * WORD, self.ARGUMENTS_XMM[i].value)
+ n += 1
+ assert n == n_saved_regs
+ self.n_saved_regs = n_saved_regs
+
+ def restore_register_arguments(self):
+ # Restore the saved values into the *real* registers used for calls
+ # --- which are not self.ARGUMENTS_xxx!
+ n_gpr = min(self.next_arg_gpr, len(self.ARGUMENTS_GPR))
+ n_xmm = min(self.next_arg_xmm, len(self.ARGUMENTS_XMM))
+ #
+ n = 0
+ for i in range(n_gpr):
+ tgtvalue = CallBuilder64.ARGUMENTS_GPR[i].value
+ if self.ARGUMENTS_GPR[i] not in self._ALL_CALLEE_SAVE_GPR:
+ self.mc.MOV_rs(tgtvalue, n * WORD)
+ n += 1
+ else:
+ self.mc.MOV_rr(tgtvalue, self.ARGUMENTS_GPR[i].value)
+ for i in range(n_xmm):
+ self.mc.MOVSD_xs(CallBuilder64.ARGUMENTS_XMM[i].value, n * WORD)
+ n += 1
+ assert n == self.n_saved_regs
+ #
+ if isinstance(self.fnloc, RegLoc): # fix this register
+ self.fnloc = CallBuilder64.ARGUMENTS_GPR[n_gpr - 1]
+
+
+if IS_X86_32:
+ CallBuilder = CallBuilder32
+if IS_X86_64:
+ CallBuilder = CallBuilder64
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -79,26 +79,14 @@
rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[1] = y
return ConstFloatLoc(adr)
- def after_call(self, v):
- # the result is stored in st0, but we don't have this around,
- # so genop_call will move it to some frame location immediately
- # after the call
- return self.frame_manager.loc(v)
+ def call_result_location(self, v):
+ return xmm0
class X86_64_XMMRegisterManager(X86XMMRegisterManager):
# xmm15 reserved for scratch use
all_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14]
save_around_call_regs = all_regs
- def call_result_location(self, v):
- return xmm0
-
- def after_call(self, v):
- # We use RegisterManager's implementation, since X86XMMRegisterManager
- # places the result on the stack, which we don't need to do when the
- # calling convention places the result in xmm0
- return RegisterManager.after_call(self, v)
-
class X86FrameManager(FrameManager):
def __init__(self, base_ofs):
FrameManager.__init__(self)
@@ -799,14 +787,6 @@
self._consider_call(op, guard_op)
def consider_call_release_gil(self, op, guard_op):
- # We spill the arguments to the stack, because we need to do 3 calls:
- # call_release_gil(), the_real_c_function(), and call_reacquire_gil().
- # The arguments are used on the second call only. XXX we assume
- # that the XMM arguments won't be modified by call_release_gil().
- for i in range(op.numargs()):
- loc = self.loc(op.getarg(i))
- if loc in self.rm.save_around_call_regs:
- self.rm.force_spill_var(op.getarg(i))
assert guard_op is not None
self._consider_call(op, guard_op)
@@ -1151,9 +1131,8 @@
# call memcpy()
self.rm.before_call()
self.xrm.before_call()
- self.assembler._emit_call(imm(self.assembler.memcpy_addr),
- [dstaddr_loc, srcaddr_loc, length_loc],
- can_collect=False)
+ self.assembler.simple_call_no_collect(imm(self.assembler.memcpy_addr),
+ [dstaddr_loc, srcaddr_loc, length_loc])
self.rm.possibly_free_var(length_box)
self.rm.possibly_free_var(dstaddr_box)
self.rm.possibly_free_var(srcaddr_box)
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -553,6 +553,7 @@
CALL_l = insn('\xE8', relative(1))
CALL_r = insn(rex_nw, '\xFF', register(1), chr(0xC0 | (2<<3)))
CALL_b = insn('\xFF', orbyte(2<<3), stack_bp(1))
+ CALL_s = insn('\xFF', orbyte(2<<3), stack_sp(1))
# XXX: Only here for testing purposes..."as" happens the encode the
# registers in the opposite order that we would otherwise do in a
@@ -583,6 +584,7 @@
# x87 instructions
FSTPL_b = insn('\xDD', orbyte(3<<3), stack_bp(1)) # rffi.DOUBLE ('as' wants L??)
+ FSTPL_s = insn('\xDD', orbyte(3<<3), stack_sp(1)) # rffi.DOUBLE ('as' wants L??)
FSTPS_s = insn('\xD9', orbyte(3<<3), stack_sp(1)) # lltype.SingleFloat
# ------------------------------ Random mess -----------------------
More information about the pypy-commit
mailing list