[pypy-commit] pypy s390x-backend: reading level=2 cache for estimation size of nursery
plan_rich
pypy.commits at gmail.com
Tue Feb 2 11:18:36 EST 2016
Author: Richard Plangger <planrichi at gmail.com>
Branch: s390x-backend
Changeset: r82042:037ac225f6c1
Date: 2016-02-02 17:17 +0100
http://bitbucket.org/pypy/pypy/changeset/037ac225f6c1/
Log: reading level=2 cache for estimation size of nursery
diff --git a/rpython/jit/backend/ppc/callbuilder.py b/rpython/jit/backend/ppc/callbuilder.py
--- a/rpython/jit/backend/ppc/callbuilder.py
+++ b/rpython/jit/backend/ppc/callbuilder.py
@@ -98,7 +98,7 @@
# We must also copy fnloc into FNREG
non_float_locs.append(self.fnloc)
- non_float_regs.append(self.mc.RAW_CALL_REG) # r2 or r12
+ non_float_regs.append(self.mc.RAW_CALL_REG)
if float_locs:
assert len(float_locs) <= len(self.FPR_ARGS)
diff --git a/rpython/jit/backend/zarch/callbuilder.py b/rpython/jit/backend/zarch/callbuilder.py
--- a/rpython/jit/backend/zarch/callbuilder.py
+++ b/rpython/jit/backend/zarch/callbuilder.py
@@ -62,7 +62,6 @@
# called function will in turn call further functions (which must be passed the
# address of the new frame). This stack grows downwards from high addresses
# """
- self.subtracted_to_sp = 0
gpr_regs = 0
fpr_regs = 0
@@ -88,11 +87,6 @@
if self.is_call_release_gil:
self.subtracted_to_sp += 8*WORD
base += 8*WORD
- # one additional word for remap frame layout
- # regalloc_push will overwrite -8(r.SP) and destroy
- # a parameter if we would not reserve that space
- # base += WORD
- # TODO self.subtracted_to_sp += WORD
for idx,i in enumerate(stack_params):
loc = arglocs[i]
offset = STD_FRAME_SIZE_IN_BYTES - base + 8 * idx
@@ -149,7 +143,7 @@
def emit_raw_call(self):
# always allocate a stack frame for the new function
# save the SP back chain
- #self.mc.STG(r.SP, l.addr(-self.subtracted_to_sp, r.SP))
+ self.mc.STG(r.SP, l.addr(-self.subtracted_to_sp, r.SP))
# move the frame pointer
if self.subtracted_to_sp != 0:
self.mc.LAY(r.SP, l.addr(-self.subtracted_to_sp, r.SP))
@@ -194,8 +188,6 @@
#
pos = STD_FRAME_SIZE_IN_BYTES - 7*WORD
self.mc.STMG(r.r8, r.r13, l.addr(pos, r.SP))
- # 6 registers, 1 for a floating point return value!
- # registered by prepare_arguments!
#
# Save this thread's shadowstack pointer into r8, for later comparison
gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
@@ -266,19 +258,17 @@
PARAM_SAVE_AREA_OFFSET = 0
if reg is not None:
# save 1 word below the stack pointer
- pos = STD_FRAME_SIZE_IN_BYTES
if reg.is_core_reg():
self.mc.LGR(RSAVEDRES, reg)
elif reg.is_fp_reg():
- self.mc.STD(reg, l.addr(pos-1*WORD, r.SP))
+ self.mc.STD(reg, l.addr(16*WORD, r.SP))
self.mc.load_imm(self.mc.RAW_CALL_REG, self.asm.reacqgil_addr)
self.mc.raw_call()
if reg is not None:
- pos = STD_FRAME_SIZE_IN_BYTES
if reg.is_core_reg():
self.mc.LGR(reg, RSAVEDRES)
elif reg.is_fp_reg():
- self.mc.LD(reg, l.addr(pos-1*WORD, r.SP))
+ self.mc.LD(reg, l.addr(16*WORD, r.SP))
# replace b1_location with BEQ(here)
pmc = OverwritingBuilder(self.mc, b1_location, 1)
diff --git a/rpython/jit/backend/zarch/codebuilder.py b/rpython/jit/backend/zarch/codebuilder.py
--- a/rpython/jit/backend/zarch/codebuilder.py
+++ b/rpython/jit/backend/zarch/codebuilder.py
@@ -189,11 +189,7 @@
return diff
def sync(self):
- # see sync. section of the zarch manual!
- # 0xf creates a checkpoint which is not needed.
- # we never want to restore the checkpoint, we only
- # want to create a memory fence (i.e. serialization)
- self.BCR_rr(0xe,0)
+ self.BCR_rr(0xf,0)
def raw_call(self, call_reg=r.RETURN):
"""Emit a call to the address stored in the register 'call_reg',
diff --git a/rpython/jit/backend/zarch/instruction_builder.py b/rpython/jit/backend/zarch/instruction_builder.py
--- a/rpython/jit/backend/zarch/instruction_builder.py
+++ b/rpython/jit/backend/zarch/instruction_builder.py
@@ -191,6 +191,13 @@
self.write_i32(imm32 & BIT_MASK_32)
return encode_ri
+def build_s(mnemonic, (opcode1,opcode2)):
+ @builder.arguments('bd')
+ def encode_s(self, base_displace):
+ self.writechar(opcode1)
+ self.writechar(opcode2)
+ encode_base_displace(self, base_displace)
+ return encode_s
def build_si(mnemonic, (opcode,)):
@builder.arguments('bd,u8')
diff --git a/rpython/jit/backend/zarch/instructions.py b/rpython/jit/backend/zarch/instructions.py
--- a/rpython/jit/backend/zarch/instructions.py
+++ b/rpython/jit/backend/zarch/instructions.py
@@ -288,6 +288,8 @@
'SVC': ('i', ['\x0A']),
'TRAP2': ('e', ['\x01','\xFF']),
+
+ 'STFLE': ('s', ['\xB2','\xB0']),
}
all_mnemonic_codes.update(arith_mnemonic_codes)
all_mnemonic_codes.update(logic_mnemonic_codes)
diff --git a/rpython/jit/backend/zarch/test/test_assembler.py b/rpython/jit/backend/zarch/test/test_assembler.py
--- a/rpython/jit/backend/zarch/test/test_assembler.py
+++ b/rpython/jit/backend/zarch/test/test_assembler.py
@@ -144,6 +144,19 @@
assert self.mc.BRC_byte_count == 4
assert self.mc.LG_byte_count == 6
+ def test_facility(self):
+ adr = self.a.datablockwrapper.malloc_aligned(16, 16)
+ self.a.mc.load_imm(r.r2, adr)
+ self.a.mc.STFLE(loc.addr(0,r.r2))
+ self.a.mc.BCR(con.ANY, r.r14)
+ run_asm(self.a)
+ fac_data = rffi.cast(rffi.CArrayPtr(rffi.ULONG), adr)
+ f64 = bin(fac_data[0])[2:]
+ s64 = bin(fac_data[1])[2:]
+ print(f64)
+ print(s64)
+ assert f64[18] == '1' # long displacement facility
+
def test_load_small_int_to_reg(self):
self.a.mc.LGHI(r.r2, loc.imm(123))
self.a.jmpto(r.r14)
diff --git a/rpython/memory/gc/env.py b/rpython/memory/gc/env.py
--- a/rpython/memory/gc/env.py
+++ b/rpython/memory/gc/env.py
@@ -137,6 +137,8 @@
return get_L2cache_linux2_cpuinfo()
if arch in ('alpha', 'ppc'):
return get_L2cache_linux2_cpuinfo(label='L2 cache')
+ if arch in ('s390x'):
+ return get_L2cache_linux2_cpuinfo_s390x()
if arch == 'ia64':
return get_L2cache_linux2_ia64()
if arch in ('parisc', 'parisc64'):
@@ -208,6 +210,67 @@
"Warning: cannot find your CPU L2 cache size in /proc/cpuinfo")
return -1
+def get_L2cache_linux2_cpuinfo_s390x(filename="/proc/cpuinfo", label='cache3'):
+ debug_start("gc-hardware")
+ L2cache = sys.maxint
+ try:
+ fd = os.open(filename, os.O_RDONLY, 0644)
+ try:
+ data = []
+ while True:
+ buf = os.read(fd, 4096)
+ if not buf:
+ break
+ data.append(buf)
+ finally:
+ os.close(fd)
+ except OSError:
+ pass
+ else:
+ data = ''.join(data)
+ linepos = 0
+ while True:
+ start = _findend(data, '\n' + label, linepos)
+ if start < 0:
+ break # done
+ linepos = _findend(data, '\n', start)
+ if linepos < 0:
+ break # no end-of-line??
+ # *** data[start:linepos] == " : level=2 type=Instruction scope=Private size=2048K ..."
+ start = _skipspace(data, start)
+ if data[start] != ':':
+ continue
+ # *** data[start:linepos] == ": level=2 type=Instruction scope=Private size=2048K ..."
+ start = _skipspace(data, start + 1)
+ # *** data[start:linepos] == "level=2 type=Instruction scope=Private size=2048K ..."
+ start += 44
+ end = start
+ while '0' <= data[end] <= '9':
+ end += 1
+ # *** data[start:end] == "2048"
+ if start == end:
+ continue
+ number = int(data[start:end])
+ # *** data[end:linepos] == " KB\n"
+ end = _skipspace(data, end)
+ if data[end] not in ('K', 'k'): # assume kilobytes for now
+ continue
+ number = number * 1024
+ # for now we look for the smallest of the L2 caches of the CPUs
+ if number < L2cache:
+ L2cache = number
+
+ debug_print("L2cache =", L2cache)
+ debug_stop("gc-hardware")
+
+ if L2cache < sys.maxint:
+ return L2cache
+ else:
+ # Print a top-level warning even in non-debug builds
+ llop.debug_print(lltype.Void,
+ "Warning: cannot find your CPU L2 cache size in /proc/cpuinfo")
+ return -1
+
def get_L2cache_linux2_sparc():
debug_start("gc-hardware")
cpu = 0
diff --git a/rpython/memory/gc/test/test_env.py b/rpython/memory/gc/test/test_env.py
--- a/rpython/memory/gc/test/test_env.py
+++ b/rpython/memory/gc/test/test_env.py
@@ -161,3 +161,22 @@
""")
result = env.get_L2cache_linux2_cpuinfo(str(filepath))
assert result == 3072 * 1024
+
+def test_estimate_best_nursery_size_linux2_s390x():
+ filepath = udir.join('estimate_best_nursery_size_linux2')
+ filepath.write("""\
+vendor_id : IBM/S390
+# processors : 2
+bogomips per cpu: 20325.00
+features : esan3 zarch stfle msa ldisp eimm dfp etf3eh highgprs
+cache0 : level=1 type=Data scope=Private size=128K line_size=256 associativity=8
+cache1 : level=1 type=Instruction scope=Private size=96K line_size=256 associativity=6
+cache2 : level=2 type=Data scope=Private size=2048K line_size=256 associativity=8
+cache3 : level=2 type=Instruction scope=Private size=2048K line_size=256 associativity=8
+cache4 : level=3 type=Unified scope=Shared size=65536K line_size=256 associativity=16
+cache5 : level=4 type=Unified scope=Shared size=491520K line_size=256 associativity=30
+processor 0: version = FF, identification = 026A77, machine = 2964
+processor 1: version = FF, identification = 026A77, machine = 2964
+""")
+ result = env.get_L2cache_linux2_cpuinfo_s390x(str(filepath))
+ assert result == 2048 * 1024
More information about the pypy-commit
mailing list