[pypy-commit] pypy default: hg merge gc_no_cleanup_nursery
arigo
noreply at buildbot.pypy.org
Sun Sep 28 20:47:40 CEST 2014
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r73741:9e2f7a37c1e2
Date: 2014-09-28 20:46 +0200
http://bitbucket.org/pypy/pypy/changeset/9e2f7a37c1e2/
Log: hg merge gc_no_cleanup_nursery
Merge the 'gc_no_cleanup_nursery' branch, started by from Wenzhu Man
(SoC'14) and then done by fijal. It removes the clearing of the
nursery. The drawback is that new objects are not automatically
filled with zeros any longer, which needs some care, mostly for GC
references (which the GC tries to follow, so they must not contain
garbage). The benefit is a quite large speed-up; I've heard about
10%, but we'll see more precisely on the benchmarks.
diff too long, truncating to 2000 out of 4325 lines
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -617,7 +617,7 @@
'raw_store': 1,
'same_as': 2,
'setarrayitem_gc': 8,
- 'setfield_gc': 21,
+ 'setfield_gc': 22,
})
def define_argsort():
diff --git a/pypy/module/pypyjit/test_pypy_c/test_call.py b/pypy/module/pypyjit/test_pypy_c/test_call.py
--- a/pypy/module/pypyjit/test_pypy_c/test_call.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_call.py
@@ -382,12 +382,16 @@
...
p20 = force_token()
p22 = new_with_vtable(...)
- p24 = new_array(1, descr=<ArrayP .>)
+ p24 = new_array_clear(1, descr=<ArrayP .>)
p26 = new_with_vtable(ConstClass(W_ListObject))
{{{
setfield_gc(p0, p20, descr=<FieldP .*PyFrame.vable_token .*>)
+ setfield_gc(p22, ConstPtr(null), descr=<FieldP pypy.interpreter.argument.Arguments.inst_keywords_w .*>)
+ setfield_gc(p22, ConstPtr(null), descr=<FieldP pypy.interpreter.argument.Arguments.inst_keywords .*>)
setfield_gc(p22, 1, descr=<FieldU pypy.interpreter.argument.Arguments.inst__jit_few_keywords .*>)
+ setfield_gc(p22, ConstPtr(null), descr=<FieldP pypy.interpreter.argument.Arguments.inst_keyword_names_w .*>)
setfield_gc(p26, ConstPtr(ptr22), descr=<FieldP pypy.objspace.std.listobject.W_ListObject.inst_strategy .*>)
+ setfield_gc(p26, ConstPtr(null), descr=<FieldP pypy.objspace.std.listobject.W_ListObject.inst_lstorage .*>)
setarrayitem_gc(p24, 0, p26, descr=<ArrayP .>)
setfield_gc(p22, p24, descr=<FieldP .*Arguments.inst_arguments_w .*>)
}}}
diff --git a/pypy/module/pypyjit/test_pypy_c/test_containers.py b/pypy/module/pypyjit/test_pypy_c/test_containers.py
--- a/pypy/module/pypyjit/test_pypy_c/test_containers.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_containers.py
@@ -68,10 +68,13 @@
guard_no_exception(descr=...)
i12 = call(ConstClass(ll_strhash), p10, descr=<Calli . r EF=0>)
p13 = new(descr=...)
- p15 = new_array(8, descr=<ArrayX .*>)
+ p15 = new_array_clear(8, descr=<ArrayX .*>)
setfield_gc(p13, p15, descr=<FieldP dicttable.entries .*>)
i17 = call(ConstClass(ll_dict_lookup_trampoline), p13, p10, i12, descr=<Calli . rri EF=4 OS=4>)
+ {{{
setfield_gc(p13, 16, descr=<FieldS dicttable.resize_counter .*>)
+ setfield_gc(p13, 0, descr=<FieldS dicttable.num_items .+>)
+ }}}
guard_no_exception(descr=...)
p20 = new_with_vtable(ConstClass(W_IntObject))
call(ConstClass(_ll_dict_setitem_lookup_done_trampoline), p13, p10, p20, i12, i17, descr=<Callv 0 rrrii EF=4>)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py
--- a/pypy/module/pypyjit/test_pypy_c/test_string.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_string.py
@@ -110,9 +110,12 @@
i85 = strlen(p80)
p86 = new(descr=<SizeDescr .+>)
p88 = newstr(23)
- setfield_gc(..., descr=<Field. stringbuilder.+>)
- setfield_gc(..., descr=<Field. stringbuilder.+>)
- setfield_gc(..., descr=<Field. stringbuilder.+>)
+ {{{
+ setfield_gc(p86, 0, descr=<FieldS stringbuilder.current_pos .+>)
+ setfield_gc(p86, p88, descr=<FieldP stringbuilder.current_buf .+>)
+ setfield_gc(p86, 23, descr=<FieldS stringbuilder.current_end .+>)
+ setfield_gc(p86, 23, descr=<FieldS stringbuilder.total_size .+>)
+ }}}
call(ConstClass(ll_append_res0__stringbuilderPtr_rpy_stringPtr), p86, p80, descr=<Callv 0 rr EF=4>)
guard_no_exception(descr=...)
i89 = getfield_gc(p86, descr=<FieldS stringbuilder.current_pos .+>)
diff --git a/pypy/tool/pypyjit_child.py b/pypy/tool/pypyjit_child.py
--- a/pypy/tool/pypyjit_child.py
+++ b/pypy/tool/pypyjit_child.py
@@ -10,10 +10,6 @@
graph = loc['graph']
interp.malloc_check = False
- def returns_null(T, *args, **kwds):
- return lltype.nullptr(T)
- interp.heap.malloc_nonmovable = returns_null # XXX
-
from rpython.jit.backend.llgraph.runner import LLGraphCPU
#LLtypeCPU.supports_floats = False # for now
apply_jit(interp, graph, LLGraphCPU)
diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -25,7 +25,7 @@
from rpython.jit.backend.llsupport.descr import InteriorFieldDescr
from rpython.jit.backend.llsupport.assembler import GuardToken, BaseAssembler
from rpython.jit.backend.llsupport.regalloc import get_scale
-from rpython.jit.metainterp.history import (Box, AbstractFailDescr,
+from rpython.jit.metainterp.history import (Box, AbstractFailDescr, ConstInt,
INT, FLOAT, REF)
from rpython.jit.metainterp.history import TargetToken
from rpython.jit.metainterp.resoperation import rop
@@ -578,6 +578,7 @@
return fcond
emit_op_setfield_raw = emit_op_setfield_gc
+ emit_op_zero_ptr_field = emit_op_setfield_gc
def emit_op_getfield_gc(self, op, arglocs, regalloc, fcond):
base_loc, ofs, res, size = arglocs
@@ -1174,3 +1175,84 @@
self.mc.VMOV_cs(r.svfp_ip.value, arg.value)
self.mc.VCVT_f32_f64(res.value, r.svfp_ip.value)
return fcond
+
+ #from ../x86/regalloc.py:1388
+ def emit_op_zero_array(self, op, arglocs, regalloc, fcond):
+ from rpython.jit.backend.llsupport.descr import unpack_arraydescr
+ assert len(arglocs) == 0
+ itemsize, baseofs, _ = unpack_arraydescr(op.getdescr())
+ args = op.getarglist()
+ base_loc = regalloc.rm.make_sure_var_in_reg(args[0], args)
+ sibox = args[1]
+ if isinstance(sibox, ConstInt):
+ startindex_loc = None
+ startindex = sibox.getint()
+ assert startindex >= 0
+ else:
+ startindex_loc = regalloc.rm.make_sure_var_in_reg(sibox, args)
+ startindex = -1
+ length_box = op.getarg(2)
+
+ # base_loc and startindex_loc are in two regs here (or they are
+ # immediates). Compute the dstaddr_loc, which is the raw
+ # address that we will pass as first argument to memset().
+ # It can be in the same register as either one, but not in
+ # args[2], because we're still needing the latter.
+ dstaddr_box = TempBox()
+ dstaddr_loc = regalloc.rm.force_allocate_reg(dstaddr_box, [args[2]])
+ if startindex >= 0: # a constant
+ ofs = baseofs + startindex * itemsize
+ reg = base_loc.value
+ else:
+ self.mc.gen_load_int(r.ip.value, itemsize)
+ self.mc.MLA(dstaddr_loc.value, r.ip.value,
+ startindex_loc.value, base_loc.value)
+ ofs = baseofs
+ reg = dstaddr_loc.value
+ if check_imm_arg(ofs):
+ self.mc.ADD_ri(dstaddr_loc.value, reg, imm=ofs)
+ else:
+ self.mc.gen_load_int(r.ip.value, ofs)
+ self.mc.ADD_rr(dstaddr_loc.value, reg, r.ip.value)
+
+ if (isinstance(length_box, ConstInt) and
+ length_box.getint() <= 14 and # same limit as GCC
+ itemsize in (4, 2, 1)):
+ # Inline a series of STR operations, starting at 'dstaddr_loc'.
+ # XXX we could optimize STRB/STRH into STR, but this needs care:
+ # XXX it only works if startindex_loc is a constant, otherwise
+ # XXX we'd be doing unaligned accesses
+ self.mc.gen_load_int(r.ip.value, 0)
+ for i in range(length_box.getint()):
+ if itemsize == 4:
+ self.mc.STR_ri(r.ip.value, dstaddr_loc.value, imm=i*4)
+ elif itemsize == 2:
+ self.mc.STRH_ri(r.ip.value, dstaddr_loc.value, imm=i*2)
+ else:
+ self.mc.STRB_ri(r.ip.value, dstaddr_loc.value, imm=i*1)
+
+ else:
+ if isinstance(length_box, ConstInt):
+ length_loc = imm(length_box.getint() * itemsize)
+ else:
+ # load length_loc in a register different than dstaddr_loc
+ length_loc = regalloc.rm.make_sure_var_in_reg(length_box,
+ [dstaddr_box])
+ if itemsize > 1:
+ # we need a register that is different from dstaddr_loc,
+ # but which can be identical to length_loc (as usual,
+ # only if the length_box is not used by future operations)
+ bytes_box = TempBox()
+ bytes_loc = regalloc.rm.force_allocate_reg(bytes_box,
+ [dstaddr_box])
+ self.mc.gen_load_int(r.ip.value, itemsize)
+ self.mc.MUL(bytes_loc.value, r.ip.value, length_loc.value)
+ length_box = bytes_box
+ length_loc = bytes_loc
+ #
+ # call memset()
+ regalloc.before_call()
+ self.simple_call_no_collect(imm(self.memset_addr),
+ [dstaddr_loc, imm(0), length_loc])
+ regalloc.rm.possibly_free_var(length_box)
+ regalloc.rm.possibly_free_var(dstaddr_box)
diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -818,8 +818,11 @@
def prepare_op_setfield_gc(self, op, fcond):
boxes = op.getarglist()
+ ofs, size, sign = unpack_fielddescr(op.getdescr())
+ return self._prepare_op_setfield(boxes, ofs, size)
+
+ def _prepare_op_setfield(self, boxes, ofs, size):
a0, a1 = boxes
- ofs, size, sign = unpack_fielddescr(op.getdescr())
base_loc = self.make_sure_var_in_reg(a0, boxes)
value_loc = self.make_sure_var_in_reg(a1, boxes)
ofs_size = default_imm_size if size < 8 else VMEM_imm_size
@@ -832,6 +835,11 @@
prepare_op_setfield_raw = prepare_op_setfield_gc
+ def prepare_op_zero_ptr_field(self, op, fcond):
+ a0 = op.getarg(0)
+ ofs = op.getarg(1).getint()
+ return self._prepare_op_setfield([a0, ConstInt(0)], ofs, WORD)
+
def prepare_op_getfield_gc(self, op, fcond):
a0 = op.getarg(0)
ofs, size, sign = unpack_fielddescr(op.getdescr())
@@ -988,6 +996,7 @@
prepare_op_copystrcontent = void
prepare_op_copyunicodecontent = void
+ prepare_op_zero_array = void
def prepare_op_unicodelen(self, op, fcond):
l0 = self.make_sure_var_in_reg(op.getarg(0))
diff --git a/rpython/jit/backend/llgraph/runner.py b/rpython/jit/backend/llgraph/runner.py
--- a/rpython/jit/backend/llgraph/runner.py
+++ b/rpython/jit/backend/llgraph/runner.py
@@ -225,6 +225,7 @@
'i': 0,
'f': 0.0}
+
class LLGraphCPU(model.AbstractCPU):
from rpython.jit.metainterp.typesystem import llhelper as ts
supports_floats = True
@@ -641,6 +642,11 @@
def bh_new_array(self, length, arraydescr):
array = lltype.malloc(arraydescr.A, length, zero=True)
+ assert getkind(arraydescr.A.OF) != 'ref' # getkind crashes on structs
+ return lltype.cast_opaque_ptr(llmemory.GCREF, array)
+
+ def bh_new_array_clear(self, length, arraydescr):
+ array = lltype.malloc(arraydescr.A, length, zero=True)
return lltype.cast_opaque_ptr(llmemory.GCREF, array)
def bh_classof(self, struct):
diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -1,5 +1,5 @@
from rpython.jit.backend.llsupport import jitframe
-from rpython.jit.backend.llsupport.memcpy import memcpy_fn
+from rpython.jit.backend.llsupport.memcpy import memcpy_fn, memset_fn
from rpython.jit.backend.llsupport.symbolic import WORD
from rpython.jit.metainterp.history import (INT, REF, FLOAT, JitCellToken,
ConstInt, BoxInt, AbstractFailDescr)
@@ -63,6 +63,7 @@
def __init__(self, cpu, translate_support_code=False):
self.cpu = cpu
self.memcpy_addr = 0
+ self.memset_addr = 0
self.rtyper = cpu.rtyper
self._debug = False
@@ -79,6 +80,7 @@
else:
self.gc_size_of_header = WORD # for tests
self.memcpy_addr = self.cpu.cast_ptr_to_int(memcpy_fn)
+ self.memset_addr = self.cpu.cast_ptr_to_int(memset_fn)
self._build_failure_recovery(False, withfloats=False)
self._build_failure_recovery(True, withfloats=False)
self._build_wb_slowpath(False)
diff --git a/rpython/jit/backend/llsupport/descr.py b/rpython/jit/backend/llsupport/descr.py
--- a/rpython/jit/backend/llsupport/descr.py
+++ b/rpython/jit/backend/llsupport/descr.py
@@ -35,9 +35,11 @@
size = 0 # help translation
tid = llop.combine_ushort(lltype.Signed, 0, 0)
- def __init__(self, size, count_fields_if_immut=-1):
+ def __init__(self, size, count_fields_if_immut=-1,
+ gc_fielddescrs=None):
self.size = size
self.count_fields_if_immut = count_fields_if_immut
+ self.gc_fielddescrs = gc_fielddescrs
def count_fields_if_immutable(self):
return self.count_fields_if_immut
@@ -58,10 +60,13 @@
except KeyError:
size = symbolic.get_size(STRUCT, gccache.translate_support_code)
count_fields_if_immut = heaptracker.count_fields_if_immutable(STRUCT)
+ gc_fielddescrs = heaptracker.gc_fielddescrs(gccache, STRUCT)
if heaptracker.has_gcstruct_a_vtable(STRUCT):
- sizedescr = SizeDescrWithVTable(size, count_fields_if_immut)
+ sizedescr = SizeDescrWithVTable(size, count_fields_if_immut,
+ gc_fielddescrs)
else:
- sizedescr = SizeDescr(size, count_fields_if_immut)
+ sizedescr = SizeDescr(size, count_fields_if_immut,
+ gc_fielddescrs)
gccache.init_size_descr(STRUCT, sizedescr)
cache[STRUCT] = sizedescr
return sizedescr
@@ -95,6 +100,9 @@
self.field_size = field_size
self.flag = flag
+ def __repr__(self):
+ return 'FieldDescr<%s>' % (self.name,)
+
def is_pointer_field(self):
return self.flag == FLAG_POINTER
diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -18,10 +18,12 @@
from rpython.jit.backend.llsupport.descr import get_call_descr
from rpython.jit.backend.llsupport.rewrite import GcRewriterAssembler
from rpython.memory.gctransform import asmgcroot
+from rpython.jit.codewriter.effectinfo import EffectInfo
# ____________________________________________________________
class GcLLDescription(GcCache):
+ malloc_zero_filled = True
def __init__(self, gcdescr, translator=None, rtyper=None):
GcCache.__init__(self, translator is not None, rtyper)
@@ -36,6 +38,8 @@
def _setup_str(self):
self.str_descr = get_array_descr(self, rstr.STR)
self.unicode_descr = get_array_descr(self, rstr.UNICODE)
+ self.str_hash_descr = get_field_descr(self, rstr.STR, 'hash')
+ self.unicode_hash_descr = get_field_descr(self, rstr.UNICODE, 'hash')
def generate_function(self, funcname, func, ARGS, RESULT=llmemory.GCREF):
"""Generates a variant of malloc with the given name and the given
@@ -118,7 +122,8 @@
descrs = JitFrameDescrs()
descrs.arraydescr = cpu.arraydescrof(jitframe.JITFRAME)
for name in ['jf_descr', 'jf_guard_exc', 'jf_force_descr',
- 'jf_frame_info', 'jf_gcmap', 'jf_extra_stack_depth']:
+ 'jf_frame_info', 'jf_gcmap', 'jf_extra_stack_depth',
+ 'jf_savedata', 'jf_forward']:
setattr(descrs, name, cpu.fielddescrof(jitframe.JITFRAME, name))
descrs.jfi_frame_size = cpu.fielddescrof(jitframe.JITFRAMEINFO,
'jfi_frame_size')
@@ -377,6 +382,7 @@
from rpython.memory.gcheader import GCHeaderBuilder
self.GCClass = self.layoutbuilder.GCClass
self.moving_gc = self.GCClass.moving_gc
+ self.malloc_zero_filled = self.GCClass.malloc_zero_filled
self.HDRPTR = lltype.Ptr(self.GCClass.HDR)
self.gcheaderbuilder = GCHeaderBuilder(self.HDRPTR.TO)
self.max_size_of_young_obj = self.GCClass.JIT_max_size_of_young_obj()
@@ -410,9 +416,9 @@
if self.DEBUG:
self._random_usage_of_xmm_registers()
type_id = rffi.cast(llgroup.HALFWORD, 0) # missing here
- return llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
- type_id, size,
- False, False, False)
+ return llop1.do_malloc_fixedsize(llmemory.GCREF,
+ type_id, size,
+ False, False, False)
self.generate_function('malloc_nursery', malloc_nursery_slowpath,
[lltype.Signed])
@@ -455,7 +461,7 @@
def malloc_str(length):
type_id = llop.extract_ushort(llgroup.HALFWORD, str_type_id)
- return llop1.do_malloc_varsize_clear(
+ return llop1.do_malloc_varsize(
llmemory.GCREF,
type_id, length, str_basesize, str_itemsize,
str_ofs_length)
@@ -464,7 +470,7 @@
def malloc_unicode(length):
type_id = llop.extract_ushort(llgroup.HALFWORD, unicode_type_id)
- return llop1.do_malloc_varsize_clear(
+ return llop1.do_malloc_varsize(
llmemory.GCREF,
type_id, length, unicode_basesize, unicode_itemsize,
unicode_ofs_length)
diff --git a/rpython/jit/backend/llsupport/jitframe.py b/rpython/jit/backend/llsupport/jitframe.py
--- a/rpython/jit/backend/llsupport/jitframe.py
+++ b/rpython/jit/backend/llsupport/jitframe.py
@@ -45,8 +45,9 @@
# detailed explanation how it is on your architecture
def jitframe_allocate(frame_info):
- frame = lltype.malloc(JITFRAME, frame_info.jfi_frame_depth, zero=True)
+ frame = lltype.malloc(JITFRAME, frame_info.jfi_frame_depth)
frame.jf_frame_info = frame_info
+ frame.jf_extra_stack_depth = 0
return frame
def jitframe_resolve(frame):
diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -14,6 +14,7 @@
get_call_descr, get_interiorfield_descr,
FieldDescr, ArrayDescr, CallDescr, InteriorFieldDescr,
FLAG_POINTER, FLAG_FLOAT)
+from rpython.jit.backend.llsupport.memcpy import memset_fn
from rpython.jit.backend.llsupport.asmmemmgr import AsmMemoryManager
from rpython.rlib.unroll import unrolling_iterable
@@ -607,6 +608,7 @@
def bh_new_array(self, length, arraydescr):
return self.gc_ll_descr.gc_malloc_array(length, arraydescr)
+ bh_new_array_clear = bh_new_array
def bh_newstr(self, length):
return self.gc_ll_descr.gc_malloc_str(length)
diff --git a/rpython/jit/backend/llsupport/memcpy.py b/rpython/jit/backend/llsupport/memcpy.py
--- a/rpython/jit/backend/llsupport/memcpy.py
+++ b/rpython/jit/backend/llsupport/memcpy.py
@@ -3,3 +3,6 @@
memcpy_fn = rffi.llexternal('memcpy', [llmemory.Address, llmemory.Address,
rffi.SIZE_T], lltype.Void,
sandboxsafe=True, _nowrapper=True)
+memset_fn = rffi.llexternal('memset', [llmemory.Address, rffi.INT,
+ rffi.SIZE_T], lltype.Void,
+ sandboxsafe=True, _nowrapper=True)
diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -1,12 +1,13 @@
from rpython.rlib import rgc
from rpython.rlib.rarithmetic import ovfcheck
-from rpython.rtyper.lltypesystem import llmemory
+from rpython.rtyper.lltypesystem import llmemory, lltype
from rpython.jit.metainterp import history
-from rpython.jit.metainterp.history import ConstInt, BoxPtr, ConstPtr
+from rpython.jit.metainterp.history import ConstInt, BoxPtr, ConstPtr, BoxInt
from rpython.jit.metainterp.resoperation import ResOperation, rop
from rpython.jit.codewriter import heaptracker
from rpython.jit.backend.llsupport.symbolic import WORD
-from rpython.jit.backend.llsupport.descr import SizeDescr, ArrayDescr
+from rpython.jit.backend.llsupport.descr import SizeDescr, ArrayDescr,\
+ FLAG_POINTER
from rpython.jit.metainterp.history import JitCellToken
FLAG_ARRAY = 0
@@ -38,6 +39,7 @@
_op_malloc_nursery = None
_v_last_malloced_nursery = None
c_zero = ConstInt(0)
+ c_null = ConstPtr(lltype.nullptr(llmemory.GCREF.TO))
def __init__(self, gc_ll_descr, cpu):
self.gc_ll_descr = gc_ll_descr
@@ -45,6 +47,8 @@
self.newops = []
self.known_lengths = {}
self.write_barrier_applied = {}
+ self.delayed_zero_setfields = {}
+ self.delayed_zero_setarrayitems = {}
def rewrite(self, operations):
# we can only remember one malloc since the next malloc can possibly
@@ -60,6 +64,8 @@
if op.is_malloc():
self.handle_malloc_operation(op)
continue
+ if op.is_guard():
+ self.emit_pending_zeros()
elif op.can_malloc():
self.emitting_an_operation_that_can_collect()
elif op.getopnum() == rop.LABEL:
@@ -68,6 +74,7 @@
# ---------- write barriers ----------
if self.gc_ll_descr.write_barrier_descr is not None:
if op.getopnum() == rop.SETFIELD_GC:
+ self.consider_setfield_gc(op)
self.handle_write_barrier_setfield(op)
continue
if op.getopnum() == rop.SETINTERIORFIELD_GC:
@@ -76,10 +83,18 @@
if op.getopnum() == rop.SETARRAYITEM_GC:
self.handle_write_barrier_setarrayitem(op)
continue
+ else:
+ # this is dead code, but in case we have a gc that does
+ # not have a write barrier and does not zero memory, we would
+ # need to clal it
+ if op.getopnum() == rop.SETFIELD_GC:
+ self.consider_setfield_gc(op)
# ---------- call assembler -----------
if op.getopnum() == rop.CALL_ASSEMBLER:
self.handle_call_assembler(op)
continue
+ if op.getopnum() == rop.JUMP or op.getopnum() == rop.FINISH:
+ self.emit_pending_zeros()
#
self.newops.append(op)
return self.newops
@@ -99,7 +114,7 @@
[op.result, ConstInt(classint)], None,
descr=self.gc_ll_descr.fielddescr_vtable)
self.newops.append(op)
- elif opnum == rop.NEW_ARRAY:
+ elif opnum == rop.NEW_ARRAY or opnum == rop.NEW_ARRAY_CLEAR:
descr = op.getdescr()
assert isinstance(descr, ArrayDescr)
self.handle_new_array(descr, op)
@@ -112,6 +127,44 @@
else:
raise NotImplementedError(op.getopname())
+ def clear_gc_fields(self, descr, result):
+ if self.gc_ll_descr.malloc_zero_filled:
+ return
+ try:
+ d = self.delayed_zero_setfields[result]
+ except KeyError:
+ d = {}
+ self.delayed_zero_setfields[result] = d
+ for fielddescr in descr.gc_fielddescrs:
+ ofs = self.cpu.unpack_fielddescr(fielddescr)
+ d[ofs] = None
+
+ def consider_setfield_gc(self, op):
+ offset = self.cpu.unpack_fielddescr(op.getdescr())
+ try:
+ del self.delayed_zero_setfields[op.getarg(0)][offset]
+ except KeyError:
+ pass
+
+ def clear_varsize_gc_fields(self, kind, descr, result, v_length, opnum):
+ if self.gc_ll_descr.malloc_zero_filled:
+ return
+ if kind == FLAG_ARRAY:
+ if descr.is_array_of_structs() or descr.is_array_of_pointers():
+ assert opnum == rop.NEW_ARRAY_CLEAR
+ if opnum == rop.NEW_ARRAY_CLEAR:
+ self.handle_clear_array_contents(descr, result, v_length)
+ return
+ if kind == FLAG_STR:
+ hash_descr = self.gc_ll_descr.str_hash_descr
+ elif kind == FLAG_UNICODE:
+ hash_descr = self.gc_ll_descr.unicode_hash_descr
+ else:
+ return
+ op = ResOperation(rop.SETFIELD_GC, [result, self.c_zero], None,
+ descr=hash_descr)
+ self.newops.append(op)
+
def handle_new_fixedsize(self, descr, op):
assert isinstance(descr, SizeDescr)
size = descr.size
@@ -119,6 +172,7 @@
self.gen_initialize_tid(op.result, descr.tid)
else:
self.gen_malloc_fixedsize(size, descr.tid, op.result)
+ self.clear_gc_fields(descr, op.result)
def handle_new_array(self, arraydescr, op, kind=FLAG_ARRAY):
v_length = op.getarg(0)
@@ -140,6 +194,8 @@
# might end up being allocated by malloc_external or some
# stuff that initializes GC header fields differently
self.gen_initialize_len(op.result, v_length, arraydescr.lendescr)
+ self.clear_varsize_gc_fields(kind, op.getdescr(), op.result,
+ v_length, op.getopnum())
return
if (total_size >= 0 and
self.gen_malloc_nursery(total_size, op.result)):
@@ -149,7 +205,7 @@
self.gen_boehm_malloc_array(arraydescr, v_length, op.result)
else:
opnum = op.getopnum()
- if opnum == rop.NEW_ARRAY:
+ if opnum == rop.NEW_ARRAY or opnum == rop.NEW_ARRAY_CLEAR:
self.gen_malloc_array(arraydescr, v_length, op.result)
elif opnum == rop.NEWSTR:
self.gen_malloc_str(v_length, op.result)
@@ -157,6 +213,21 @@
self.gen_malloc_unicode(v_length, op.result)
else:
raise NotImplementedError(op.getopname())
+ self.clear_varsize_gc_fields(kind, op.getdescr(), op.result, v_length,
+ op.getopnum())
+
+ def handle_clear_array_contents(self, arraydescr, v_arr, v_length=None):
+ # XXX more work here to reduce or remove the ZERO_ARRAY in some cases
+ if v_length is None:
+ v_length = BoxInt()
+ o = ResOperation(rop.ARRAYLEN_GC, [v_arr], v_length,
+ descr=arraydescr)
+ self.newops.append(o)
+ elif isinstance(v_length, ConstInt) and v_length.getint() == 0:
+ return
+ o = ResOperation(rop.ZERO_ARRAY, [v_arr, self.c_zero, v_length], None,
+ descr=arraydescr)
+ self.newops.append(o)
def gen_malloc_frame(self, frame_info, frame, size_box):
descrs = self.gc_ll_descr.getframedescrs(self.cpu)
@@ -177,10 +248,25 @@
self.gen_malloc_nursery_varsize_frame(size_box, frame)
self.gen_initialize_tid(frame, descrs.arraydescr.tid)
length_box = history.BoxInt()
- op1 = ResOperation(rop.GETFIELD_GC, [history.ConstInt(frame_info)],
- length_box,
- descr=descrs.jfi_frame_depth)
- self.newops.append(op1)
+ # we need to explicitely zero all the gc fields, because
+ # of the unusal malloc pattern
+ extra_ops = [
+ ResOperation(rop.GETFIELD_GC, [history.ConstInt(frame_info)],
+ length_box, descr=descrs.jfi_frame_depth),
+ ResOperation(rop.SETFIELD_GC, [frame, self.c_zero],
+ None, descr=descrs.jf_extra_stack_depth),
+ ResOperation(rop.SETFIELD_GC, [frame, self.c_null],
+ None, descr=descrs.jf_savedata),
+ ResOperation(rop.SETFIELD_GC, [frame, self.c_null],
+ None, descr=descrs.jf_force_descr),
+ ResOperation(rop.SETFIELD_GC, [frame, self.c_null],
+ None, descr=descrs.jf_descr),
+ ResOperation(rop.SETFIELD_GC, [frame, self.c_null],
+ None, descr=descrs.jf_guard_exc),
+ ResOperation(rop.SETFIELD_GC, [frame, self.c_null],
+ None, descr=descrs.jf_forward),
+ ]
+ self.newops += extra_ops
self.gen_initialize_len(frame, length_box,
descrs.arraydescr.lendescr)
@@ -225,8 +311,17 @@
# forgets the previous MALLOC_NURSERY, if any; and empty the
# set 'write_barrier_applied', so that future SETFIELDs will generate
# a write barrier as usual.
+ # it also writes down all the pending zero ptr fields
self._op_malloc_nursery = None
self.write_barrier_applied.clear()
+ self.emit_pending_zeros()
+
+ def emit_pending_zeros(self):
+ for v, d in self.delayed_zero_setfields.iteritems():
+ for ofs in d.iterkeys():
+ op = ResOperation(rop.ZERO_PTR_FIELD, [v, ConstInt(ofs)], None)
+ self.newops.append(op)
+ self.delayed_zero_setfields.clear()
def _gen_call_malloc_gc(self, args, v_result, descr):
"""Generate a CALL_MALLOC_GC with the given args."""
@@ -338,7 +433,8 @@
def gen_malloc_nursery(self, size, v_result):
"""Try to generate or update a CALL_MALLOC_NURSERY.
- If that fails, generate a plain CALL_MALLOC_GC instead.
+ If that succeeds, return True; you still need to write the tid.
+ If that fails, return False.
"""
size = self.round_up_for_allocation(size)
if not self.gc_ll_descr.can_use_nursery_malloc(size):
diff --git a/rpython/jit/backend/llsupport/test/test_descr.py b/rpython/jit/backend/llsupport/test/test_descr.py
--- a/rpython/jit/backend/llsupport/test/test_descr.py
+++ b/rpython/jit/backend/llsupport/test/test_descr.py
@@ -19,6 +19,8 @@
assert descr_t.size == symbolic.get_size(T, False)
assert descr_s.count_fields_if_immutable() == -1
assert descr_t.count_fields_if_immutable() == -1
+ assert descr_t.gc_fielddescrs == []
+ assert len(descr_s.gc_fielddescrs) == 1
assert descr_s == get_size_descr(c0, S)
assert descr_s != get_size_descr(c1, S)
#
@@ -26,6 +28,11 @@
assert isinstance(descr_s.size, Symbolic)
assert descr_s.count_fields_if_immutable() == -1
+ PARENT = lltype.Struct('P', ('x', lltype.Ptr(T)))
+ STRUCT = lltype.GcStruct('S', ('parent', PARENT), ('y', lltype.Ptr(T)))
+ descr_struct = get_size_descr(c0, STRUCT)
+ assert len(descr_struct.gc_fielddescrs) == 2
+
def test_get_size_descr_immut():
S = lltype.GcStruct('S', hints={'immutable': True})
T = lltype.GcStruct('T', ('parent', S),
diff --git a/rpython/jit/backend/llsupport/test/test_gc.py b/rpython/jit/backend/llsupport/test/test_gc.py
--- a/rpython/jit/backend/llsupport/test/test_gc.py
+++ b/rpython/jit/backend/llsupport/test/test_gc.py
@@ -59,7 +59,7 @@
x += self.gcheaderbuilder.size_gc_header
return x, tid
- def do_malloc_fixedsize_clear(self, RESTYPE, type_id, size,
+ def do_malloc_fixedsize(self, RESTYPE, type_id, size,
has_finalizer, has_light_finalizer,
contains_weakptr):
assert not contains_weakptr
@@ -70,7 +70,9 @@
self.record.append(("fixedsize", repr(size), tid, p))
return p
- def do_malloc_varsize_clear(self, RESTYPE, type_id, length, size,
+ do_malloc_fixedsize_clear = do_malloc_fixedsize
+
+ def do_malloc_varsize(self, RESTYPE, type_id, length, size,
itemsize, offset_to_length):
p, tid = self._malloc(type_id, size + itemsize * length)
(p + offset_to_length).signed[0] = length
@@ -80,6 +82,8 @@
repr(offset_to_length), p))
return p
+ do_malloc_varsize_clear = do_malloc_varsize
+
def _write_barrier_failing_case(self, adr_struct):
self.record.append(('barrier', adr_struct))
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -69,6 +69,8 @@
unicodedescr = self.gc_ll_descr.unicode_descr
strlendescr = strdescr.lendescr
unicodelendescr = unicodedescr.lendescr
+ strhashdescr = self.gc_ll_descr.str_hash_descr
+ unicodehashdescr = self.gc_ll_descr.unicode_hash_descr
casmdescr = JitCellToken()
clt = FakeLoopToken()
@@ -82,10 +84,15 @@
jfi_frame_depth = framedescrs.jfi_frame_depth
jfi_frame_size = framedescrs.jfi_frame_size
jf_frame_info = framedescrs.jf_frame_info
+ jf_savedata = framedescrs.jf_savedata
+ jf_force_descr = framedescrs.jf_force_descr
+ jf_descr = framedescrs.jf_descr
+ jf_guard_exc = framedescrs.jf_guard_exc
+ jf_forward = framedescrs.jf_forward
+ jf_extra_stack_depth = framedescrs.jf_extra_stack_depth
signedframedescr = self.cpu.signedframedescr
floatframedescr = self.cpu.floatframedescr
casmdescr.compiled_loop_token = clt
- tzdescr = None # noone cares
#
namespace.update(locals())
#
@@ -123,6 +130,9 @@
def unpack_arraydescr_size(self, d):
return 0, d.itemsize, 0
+ def unpack_fielddescr(self, d):
+ return d.offset
+
def arraydescrof(self, ARRAY):
try:
return self._cache[ARRAY]
@@ -144,7 +154,7 @@
def setup_method(self, meth):
class FakeCPU(BaseFakeCPU):
def sizeof(self, STRUCT):
- return SizeDescrWithVTable(102)
+ return SizeDescrWithVTable(102, gc_fielddescrs=[])
self.cpu = FakeCPU()
self.gc_ll_descr = GcLLDescr_boehm(None, None, None)
@@ -277,10 +287,11 @@
really_not_translated=True)
self.gc_ll_descr.write_barrier_descr.has_write_barrier_from_array = (
lambda cpu: True)
+ self.gc_ll_descr.malloc_zero_filled = False
#
class FakeCPU(BaseFakeCPU):
def sizeof(self, STRUCT):
- descr = SizeDescrWithVTable(104)
+ descr = SizeDescrWithVTable(104, gc_fielddescrs=[])
descr.tid = 9315
return descr
self.cpu = FakeCPU()
@@ -313,6 +324,7 @@
setfield_gc(p1, 5678, descr=tiddescr)
p2 = int_add(p1, %(tdescr.size)d)
setfield_gc(p2, 1234, descr=tiddescr)
+ zero_ptr_field(p1, %(tdescr.gc_fielddescrs[0].offset)s)
jump()
""")
@@ -422,6 +434,7 @@
[i0]
p0 = call_malloc_nursery_varsize(1, 1, i0, descr=strdescr)
setfield_gc(p0, i0, descr=strlendescr)
+ setfield_gc(p0, 0, descr=strhashdescr)
jump(i0)
""")
@@ -545,15 +558,19 @@
unicodedescr.basesize + 10 * unicodedescr.itemsize)d)
setfield_gc(p0, %(strdescr.tid)d, descr=tiddescr)
setfield_gc(p0, 14, descr=strlendescr)
+ setfield_gc(p0, 0, descr=strhashdescr)
p1 = int_add(p0, %(strdescr.basesize + 16 * strdescr.itemsize)d)
setfield_gc(p1, %(unicodedescr.tid)d, descr=tiddescr)
setfield_gc(p1, 10, descr=unicodelendescr)
+ setfield_gc(p1, 0, descr=unicodehashdescr)
p2 = call_malloc_nursery_varsize(2, %(unicodedescr.itemsize)d, i2,\
descr=unicodedescr)
setfield_gc(p2, i2, descr=unicodelendescr)
+ setfield_gc(p2, 0, descr=unicodehashdescr)
p3 = call_malloc_nursery_varsize(1, 1, i2, \
descr=strdescr)
setfield_gc(p3, i2, descr=strlendescr)
+ setfield_gc(p3, 0, descr=strhashdescr)
jump()
""")
@@ -587,7 +604,7 @@
self.gc_ll_descr.max_size_of_young_obj = 2000
self.check_rewrite("""
[i2, p3]
- p1 = new_array(129, descr=cdescr)
+ p1 = new_array_clear(129, descr=cdescr)
call(123456)
setarrayitem_gc(p1, i2, p3, descr=cdescr)
jump()
@@ -597,6 +614,7 @@
%(cdescr.basesize + 129 * cdescr.itemsize)d)
setfield_gc(p1, 8111, descr=tiddescr)
setfield_gc(p1, 129, descr=clendescr)
+ zero_array(p1, 0, 129, descr=cdescr)
call(123456)
cond_call_gc_wb(p1, descr=wbdescr)
setarrayitem_gc(p1, i2, p3, descr=cdescr)
@@ -608,7 +626,7 @@
self.gc_ll_descr.max_size_of_young_obj = 2000
self.check_rewrite("""
[i2, p3]
- p1 = new_array(130, descr=cdescr)
+ p1 = new_array_clear(130, descr=cdescr)
call(123456)
setarrayitem_gc(p1, i2, p3, descr=cdescr)
jump()
@@ -618,6 +636,7 @@
%(cdescr.basesize + 130 * cdescr.itemsize)d)
setfield_gc(p1, 8111, descr=tiddescr)
setfield_gc(p1, 130, descr=clendescr)
+ zero_array(p1, 0, 130, descr=cdescr)
call(123456)
cond_call_gc_wb_array(p1, i2, descr=wbdescr)
setarrayitem_gc(p1, i2, p3, descr=cdescr)
@@ -639,7 +658,7 @@
def test_label_makes_size_unknown(self):
self.check_rewrite("""
[i2, p3]
- p1 = new_array(5, descr=cdescr)
+ p1 = new_array_clear(5, descr=cdescr)
label(p1, i2, p3)
setarrayitem_gc(p1, i2, p3, descr=cdescr)
jump()
@@ -649,6 +668,7 @@
%(cdescr.basesize + 5 * cdescr.itemsize)d)
setfield_gc(p1, 8111, descr=tiddescr)
setfield_gc(p1, 5, descr=clendescr)
+ zero_array(p1, 0, 5, descr=cdescr)
label(p1, i2, p3)
cond_call_gc_wb_array(p1, i2, descr=wbdescr)
setarrayitem_gc(p1, i2, p3, descr=cdescr)
@@ -709,7 +729,7 @@
def test_initialization_store_array(self):
self.check_rewrite("""
[p1, i2]
- p0 = new_array(5, descr=cdescr)
+ p0 = new_array_clear(5, descr=cdescr)
setarrayitem_gc(p0, i2, p1, descr=cdescr)
jump()
""", """
@@ -718,6 +738,7 @@
%(cdescr.basesize + 5 * cdescr.itemsize)d)
setfield_gc(p0, 8111, descr=tiddescr)
setfield_gc(p0, 5, descr=clendescr)
+ zero_array(p0, 0, 5, descr=cdescr)
setarrayitem_gc(p0, i2, p1, descr=cdescr)
jump()
""")
@@ -751,9 +772,11 @@
[i0]
p0 = call_malloc_nursery(%(tdescr.size)d)
setfield_gc(p0, 5678, descr=tiddescr)
+ zero_ptr_field(p0, %(tdescr.gc_fielddescrs[0].offset)s)
p1 = call_malloc_nursery_varsize(1, 1, i0, \
descr=strdescr)
setfield_gc(p1, i0, descr=strlendescr)
+ setfield_gc(p1, 0, descr=strhashdescr)
cond_call_gc_wb(p0, descr=wbdescr)
setfield_gc(p0, p1, descr=tzdescr)
jump()
@@ -770,6 +793,7 @@
[p1]
p0 = call_malloc_nursery(%(tdescr.size)d)
setfield_gc(p0, 5678, descr=tiddescr)
+ zero_ptr_field(p0, %(tdescr.gc_fielddescrs[0].offset)s)
label(p0, p1)
cond_call_gc_wb(p0, descr=wbdescr)
setfield_gc(p0, p1, descr=tzdescr)
@@ -800,6 +824,12 @@
p1 = call_malloc_nursery_varsize_frame(i1)
setfield_gc(p1, 0, descr=tiddescr)
i2 = getfield_gc(ConstClass(frame_info), descr=jfi_frame_depth)
+ setfield_gc(p1, 0, descr=jf_extra_stack_depth)
+ setfield_gc(p1, NULL, descr=jf_savedata)
+ setfield_gc(p1, NULL, descr=jf_force_descr)
+ setfield_gc(p1, NULL, descr=jf_descr)
+ setfield_gc(p1, NULL, descr=jf_guard_exc)
+ setfield_gc(p1, NULL, descr=jf_forward)
setfield_gc(p1, i2, descr=framelendescr)
setfield_gc(p1, ConstClass(frame_info), descr=jf_frame_info)
setarrayitem_gc(p1, 0, i0, descr=signedframedescr)
diff --git a/rpython/jit/backend/llsupport/test/zrpy_gc_test.py b/rpython/jit/backend/llsupport/test/zrpy_gc_test.py
--- a/rpython/jit/backend/llsupport/test/zrpy_gc_test.py
+++ b/rpython/jit/backend/llsupport/test/zrpy_gc_test.py
@@ -223,7 +223,7 @@
## return None, f, None
def define_compile_framework_1(cls):
- # a moving GC. Supports malloc_varsize_nonmovable. Simple test, works
+ # a moving GC. Simple test, works
# without write_barriers and root stack enumeration.
def f(n, x, *args):
y = X()
diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -2036,6 +2036,14 @@
'ref', descr=arraydescr)
assert r1.value != r2.value
a = lltype.cast_opaque_ptr(lltype.Ptr(A), r1.value)
+ assert len(a) == 342
+
+ def test_new_array_clear(self):
+ A = lltype.GcArray(lltype.Signed)
+ arraydescr = self.cpu.arraydescrof(A)
+ r1 = self.execute_operation(rop.NEW_ARRAY_CLEAR, [BoxInt(342)],
+ 'ref', descr=arraydescr)
+ a = lltype.cast_opaque_ptr(lltype.Ptr(A), r1.value)
assert a[0] == 0
assert len(a) == 342
@@ -4272,9 +4280,6 @@
fail = self.cpu.get_latest_descr(deadframe)
assert fail.identifier == 23
assert self.cpu.get_int_value(deadframe, 0) == 42
- # make sure that force reads the registers from a zeroed piece of
- # memory
- assert values[0] == 0
def test_compile_bridge_while_running(self):
def func():
@@ -4442,3 +4447,99 @@
res = self.execute_operation(rop.CAST_FLOAT_TO_SINGLEFLOAT,
[boxfloat(12.5)], 'int')
assert res.getint() == struct.unpack("I", struct.pack("f", 12.5))[0]
+
+ def test_zero_ptr_field(self):
+ from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
+
+ if not isinstance(self.cpu, AbstractLLCPU):
+ py.test.skip("llgraph can't do zero_ptr_field")
+ T = lltype.GcStruct('T')
+ S = lltype.GcStruct('S', ('x', lltype.Ptr(T)))
+ tdescr = self.cpu.sizeof(T)
+ sdescr = self.cpu.sizeof(S)
+ fielddescr = self.cpu.fielddescrof(S, 'x')
+ loop = parse("""
+ []
+ p0 = new(descr=tdescr)
+ p1 = new(descr=sdescr)
+ setfield_gc(p1, p0, descr=fielddescr)
+ zero_ptr_field(p1, %d)
+ finish(p1)
+ """ % fielddescr.offset, namespace=locals())
+ looptoken = JitCellToken()
+ self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+ deadframe = self.cpu.execute_token(looptoken)
+ ref = self.cpu.get_ref_value(deadframe, 0)
+ s = lltype.cast_opaque_ptr(lltype.Ptr(S), ref)
+ assert not s.x
+
+ def test_zero_ptr_field_2(self):
+ from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
+
+ if not isinstance(self.cpu, AbstractLLCPU):
+ py.test.skip("llgraph does not do zero_ptr_field")
+
+ from rpython.jit.backend.llsupport import symbolic
+ S = lltype.GcStruct('S', ('x', lltype.Signed),
+ ('p', llmemory.GCREF),
+ ('y', lltype.Signed))
+ s = lltype.malloc(S)
+ s.x = -1296321
+ s.y = -4398176
+ s_ref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
+ s.p = s_ref
+ ofs_p, _ = symbolic.get_field_token(S, 'p', False)
+ #
+ self.execute_operation(rop.ZERO_PTR_FIELD, [
+ BoxPtr(s_ref), ConstInt(ofs_p)], # OK for now to assume that the
+ 'void') # 2nd argument is a constant
+ #
+ assert s.x == -1296321
+ assert s.p == lltype.nullptr(llmemory.GCREF.TO)
+ assert s.y == -4398176
+
+ def test_zero_array(self):
+ from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
+
+ if not isinstance(self.cpu, AbstractLLCPU):
+ py.test.skip("llgraph does not do zero_array")
+
+ PAIR = lltype.Struct('PAIR', ('a', lltype.Signed), ('b', lltype.Signed))
+ for OF in [lltype.Signed, rffi.INT, rffi.SHORT, rffi.UCHAR, PAIR]:
+ A = lltype.GcArray(OF)
+ arraydescr = self.cpu.arraydescrof(A)
+ a = lltype.malloc(A, 100)
+ addr = llmemory.cast_ptr_to_adr(a)
+ a_int = heaptracker.adr2int(addr)
+ a_ref = lltype.cast_opaque_ptr(llmemory.GCREF, a)
+ for (start, length) in [(0, 100), (49, 49), (1, 98),
+ (15, 9), (10, 10), (47, 0),
+ (0, 4)]:
+ for cls1 in [ConstInt, BoxInt]:
+ for cls2 in [ConstInt, BoxInt]:
+ print 'a_int:', a_int
+ print 'of:', OF
+ print 'start:', cls1.__name__, start
+ print 'length:', cls2.__name__, length
+ for i in range(100):
+ if OF == PAIR:
+ a[i].a = a[i].b = -123456789
+ else:
+ a[i] = rffi.cast(OF, -123456789)
+ startbox = cls1(start)
+ lengthbox = cls2(length)
+ if cls1 == cls2 and start == length:
+ lengthbox = startbox # same box!
+ self.execute_operation(rop.ZERO_ARRAY,
+ [BoxPtr(a_ref),
+ startbox,
+ lengthbox],
+ 'void', descr=arraydescr)
+ assert len(a) == 100
+ for i in range(100):
+ val = (0 if start <= i < start + length
+ else -123456789)
+ if OF == PAIR:
+ assert a[i].a == a[i].b == val
+ else:
+ assert a[i] == rffi.cast(OF, val)
diff --git a/rpython/jit/backend/test/test_ll_random.py b/rpython/jit/backend/test/test_ll_random.py
--- a/rpython/jit/backend/test/test_ll_random.py
+++ b/rpython/jit/backend/test/test_ll_random.py
@@ -95,7 +95,10 @@
fields.append(('parent', rclass.OBJECT))
kwds['hints'] = {'vtable': with_vtable._obj}
for i in range(r.randrange(1, 5)):
- TYPE = self.get_random_primitive_type(r)
+ if r.random() < 0.1:
+ TYPE = llmemory.GCREF
+ else:
+ TYPE = self.get_random_primitive_type(r)
fields.append(('f%d' % i, TYPE))
S = type('S%d' % self.counter, *fields, **kwds)
self.counter += 1
@@ -246,13 +249,43 @@
op = ResOperation(self.opnum, [v, c_vtable2], None)
return op, False
+class ZeroPtrFieldOperation(test_random.AbstractOperation):
+ def field_descr(self, builder, r):
+ if getattr(builder.cpu, 'is_llgraph', False):
+ raise test_random.CannotProduceOperation
+ v, S = builder.get_structptr_var(r, )
+ names = S._names
+ if names[0] == 'parent':
+ names = names[1:]
+ choice = []
+ for name in names:
+ FIELD = getattr(S, name)
+ if isinstance(FIELD, lltype.Ptr) and FIELD._needsgc():
+ choice.append(name)
+ if not choice:
+ raise test_random.CannotProduceOperation
+ name = r.choice(choice)
+ descr = builder.cpu.fielddescrof(S, name)
+ return v, descr.offset
+
+ def produce_into(self, builder, r):
+ v, offset = self.field_descr(builder, r)
+ builder.do(self.opnum, [v, ConstInt(offset)], None)
+
class GetFieldOperation(test_random.AbstractOperation):
def field_descr(self, builder, r):
v, S = builder.get_structptr_var(r, )
names = S._names
if names[0] == 'parent':
names = names[1:]
- name = r.choice(names)
+ choice = []
+ for name in names:
+ FIELD = getattr(S, name)
+ if not isinstance(FIELD, lltype.Ptr):
+ choice.append(name)
+ if not choice:
+ raise test_random.CannotProduceOperation
+ name = r.choice(choice)
descr = builder.cpu.fielddescrof(S, name)
descr._random_info = 'cpu.fielddescrof(..., %r)' % (name,)
descr._random_type = S
@@ -274,7 +307,14 @@
array_of_structs=True)
array = v.getref(lltype.Ptr(A))
v_index = builder.get_index(len(array), r)
- name = r.choice(A.OF._names)
+ choice = []
+ for name in A.OF._names:
+ FIELD = getattr(A.OF, name)
+ if not isinstance(FIELD, lltype.Ptr):
+ choice.append(name)
+ if not choice:
+ raise test_random.CannotProduceOperation
+ name = r.choice(choice)
descr = builder.cpu.interiorfielddescrof(A, name)
descr._random_info = 'cpu.interiorfielddescrof(..., %r)' % (name,)
descr._random_type = A
@@ -682,6 +722,7 @@
OPERATIONS.append(GetFieldOperation(rop.GETFIELD_GC))
OPERATIONS.append(GetInteriorFieldOperation(rop.GETINTERIORFIELD_GC))
OPERATIONS.append(SetFieldOperation(rop.SETFIELD_GC))
+ OPERATIONS.append(ZeroPtrFieldOperation(rop.ZERO_PTR_FIELD))
OPERATIONS.append(SetInteriorFieldOperation(rop.SETINTERIORFIELD_GC))
OPERATIONS.append(NewOperation(rop.NEW))
OPERATIONS.append(NewOperation(rop.NEW_WITH_VTABLE))
@@ -689,7 +730,7 @@
OPERATIONS.append(GetArrayItemOperation(rop.GETARRAYITEM_GC))
OPERATIONS.append(GetArrayItemOperation(rop.GETARRAYITEM_GC))
OPERATIONS.append(SetArrayItemOperation(rop.SETARRAYITEM_GC))
- OPERATIONS.append(NewArrayOperation(rop.NEW_ARRAY))
+ OPERATIONS.append(NewArrayOperation(rop.NEW_ARRAY_CLEAR))
OPERATIONS.append(ArrayLenOperation(rop.ARRAYLEN_GC))
OPERATIONS.append(NewStrOperation(rop.NEWSTR))
OPERATIONS.append(NewUnicodeOperation(rop.NEWUNICODE))
diff --git a/rpython/jit/backend/test/test_random.py b/rpython/jit/backend/test/test_random.py
--- a/rpython/jit/backend/test/test_random.py
+++ b/rpython/jit/backend/test/test_random.py
@@ -52,10 +52,13 @@
def do(self, opnum, argboxes, descr=None):
self.fakemetainterp._got_exc = None
- v_result = execute_nonspec(self.cpu, self.fakemetainterp,
- opnum, argboxes, descr)
- if isinstance(v_result, Const):
- v_result = v_result.clonebox()
+ if opnum == rop.ZERO_PTR_FIELD:
+ v_result = None
+ else:
+ v_result = execute_nonspec(self.cpu, self.fakemetainterp,
+ opnum, argboxes, descr)
+ if isinstance(v_result, Const):
+ v_result = v_result.clonebox()
self.loop.operations.append(ResOperation(opnum, argboxes, v_result,
descr))
return v_result
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -1486,6 +1486,8 @@
dest_addr = AddressLoc(base_loc, ofs_loc)
self.save_into_mem(dest_addr, value_loc, size_loc)
+ genop_discard_zero_ptr_field = genop_discard_setfield_gc
+
def genop_discard_setinteriorfield_gc(self, op, arglocs):
(base_loc, ofs_loc, itemsize_loc, fieldsize_loc,
index_loc, temp_loc, value_loc) = arglocs
@@ -2361,6 +2363,43 @@
elif IS_X86_64:
mc.MOVSX32_rj(loc.value, addr) # memory read, sign-extend
+ def genop_discard_zero_array(self, op, arglocs):
+ (base_loc, startindex_loc, bytes_loc,
+ itemsize_loc, baseofs_loc, null_loc) = arglocs
+ assert isinstance(bytes_loc, ImmedLoc)
+ assert isinstance(itemsize_loc, ImmedLoc)
+ assert isinstance(baseofs_loc, ImmedLoc)
+ assert isinstance(null_loc, RegLoc) and null_loc.is_xmm
+ baseofs = baseofs_loc.value
+ nbytes = bytes_loc.value
+ if valid_addressing_size(itemsize_loc.value):
+ scale = get_scale(itemsize_loc.value)
+ else:
+ assert isinstance(startindex_loc, ImmedLoc)
+ baseofs += startindex_loc.value * itemsize_loc.value
+ startindex_loc = imm0
+ scale = 0
+ null_reg_cleared = False
+ i = 0
+ while i < nbytes:
+ addr = addr_add(base_loc, startindex_loc, baseofs + i, scale)
+ current = nbytes - i
+ if current >= 16:
+ current = 16
+ if not null_reg_cleared:
+ self.mc.XORPS_xx(null_loc.value, null_loc.value)
+ null_reg_cleared = True
+ self.mc.MOVUPS(addr, null_loc)
+ else:
+ if current >= WORD:
+ current = WORD
+ elif current >= 4:
+ current = 4
+ elif current >= 2:
+ current = 2
+ self.save_into_mem(addr, imm0, imm(current))
+ i += current
+
genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST
genop_list = [Assembler386.not_implemented_op] * rop._LAST
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -8,7 +8,8 @@
unpack_arraydescr, unpack_fielddescr, unpack_interiorfielddescr)
from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
from rpython.jit.backend.llsupport.regalloc import (FrameManager, BaseRegalloc,
- RegisterManager, TempBox, compute_vars_longevity, is_comparison_or_ovf_op)
+ RegisterManager, TempBox, compute_vars_longevity, is_comparison_or_ovf_op,
+ valid_addressing_size)
from rpython.jit.backend.x86 import rx86
from rpython.jit.backend.x86.arch import (WORD, JITFRAME_FIXED_SIZE, IS_X86_32,
IS_X86_64)
@@ -958,6 +959,13 @@
need_lower_byte=need_lower_byte)
self.perform_discard(op, [base_loc, ofs_loc, size_loc, value_loc])
+ def consider_zero_ptr_field(self, op):
+ ofs_loc = imm(op.getarg(1).getint())
+ size_loc = imm(WORD)
+ base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), [])
+ value_loc = imm(0)
+ self.perform_discard(op, [base_loc, ofs_loc, size_loc, value_loc])
+
consider_setfield_raw = consider_setfield_gc
def consider_setinteriorfield_gc(self, op):
@@ -1376,6 +1384,70 @@
def consider_keepalive(self, op):
pass
+ def consider_zero_array(self, op):
+ itemsize, baseofs, _ = unpack_arraydescr(op.getdescr())
+ args = op.getarglist()
+ base_loc = self.rm.make_sure_var_in_reg(args[0], args)
+ startindex_loc = self.rm.make_sure_var_in_reg(args[1], args)
+ length_box = op.getarg(2)
+ if isinstance(length_box, ConstInt):
+ constbytes = length_box.getint() * itemsize
+ else:
+ constbytes = -1
+ if 0 <= constbytes <= 16 * 8 and (
+ valid_addressing_size(itemsize) or
+- isinstance(startindex_loc, ImmedLoc)):
+ if IS_X86_64:
+ null_loc = X86_64_XMM_SCRATCH_REG
+ else:
+ null_box = TempBox()
+ null_loc = self.xrm.force_allocate_reg(null_box)
+ self.xrm.possibly_free_var(null_box)
+ self.perform_discard(op, [base_loc, startindex_loc,
+ imm(constbytes), imm(itemsize),
+ imm(baseofs), null_loc])
+ else:
+ # base_loc and startindex_loc are in two regs here (or they are
+ # immediates). Compute the dstaddr_loc, which is the raw
+ # address that we will pass as first argument to memset().
+ # It can be in the same register as either one, but not in
+ # args[2], because we're still needing the latter.
+ dstaddr_box = TempBox()
+ dstaddr_loc = self.rm.force_allocate_reg(dstaddr_box, [args[2]])
+ itemsize_loc = imm(itemsize)
+ dst_addr = self.assembler._get_interiorfield_addr(
+ dstaddr_loc, startindex_loc, itemsize_loc,
+ base_loc, imm(baseofs))
+ self.assembler.mc.LEA(dstaddr_loc, dst_addr)
+ #
+ if constbytes >= 0:
+ length_loc = imm(constbytes)
+ else:
+ # load length_loc in a register different than dstaddr_loc
+ length_loc = self.rm.make_sure_var_in_reg(length_box,
+ [dstaddr_box])
+ if itemsize > 1:
+ # we need a register that is different from dstaddr_loc,
+ # but which can be identical to length_loc (as usual,
+ # only if the length_box is not used by future operations)
+ bytes_box = TempBox()
+ bytes_loc = self.rm.force_allocate_reg(bytes_box,
+ [dstaddr_box])
+ b_adr = self.assembler._get_interiorfield_addr(
+ bytes_loc, length_loc, itemsize_loc, imm0, imm0)
+ self.assembler.mc.LEA(bytes_loc, b_adr)
+ length_box = bytes_box
+ length_loc = bytes_loc
+ #
+ # call memset()
+ self.rm.before_call()
+ self.xrm.before_call()
+ self.assembler.simple_call_no_collect(
+ imm(self.assembler.memset_addr),
+ [dstaddr_loc, imm0, length_loc])
+ self.rm.possibly_free_var(length_box)
+ self.rm.possibly_free_var(dstaddr_box)
+
def not_implemented_op(self, op):
not_implemented("not implemented operation: %s" % op.getopname())
diff --git a/rpython/jit/backend/x86/regloc.py b/rpython/jit/backend/x86/regloc.py
--- a/rpython/jit/backend/x86/regloc.py
+++ b/rpython/jit/backend/x86/regloc.py
@@ -664,6 +664,7 @@
MOVDQ = _binaryop('MOVDQ')
MOVD32 = _binaryop('MOVD32')
+ MOVUPS = _binaryop('MOVUPS')
CALL = _relative_unaryop('CALL')
JMP = _relative_unaryop('JMP')
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -634,6 +634,9 @@
MOVD32_xs = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_sp(2))
PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b'))
+ MOVUPS_mx = xmminsn(rex_nw, '\x0F\x11', register(2, 8), mem_reg_plus_const(1))
+ MOVUPS_jx = xmminsn(rex_nw, '\x0F\x11', register(2, 8), abs_(1))
+ MOVUPS_ax = xmminsn(rex_nw, '\x0F\x11', register(2, 8), mem_reg_plus_scaled_reg_plus_const(1))
# ------------------------------------------------------------
@@ -764,6 +767,7 @@
define_modrm_modes('DIVSD_x*', ['\xF2', rex_nw, '\x0F\x5E', register(1, 8)], regtype='XMM')
define_modrm_modes('UCOMISD_x*', ['\x66', rex_nw, '\x0F\x2E', register(1, 8)], regtype='XMM')
define_modrm_modes('XORPD_x*', ['\x66', rex_nw, '\x0F\x57', register(1, 8)], regtype='XMM')
+define_modrm_modes('XORPS_x*', [rex_nw, '\x0F\x57', register(1, 8)], regtype='XMM')
define_modrm_modes('ANDPD_x*', ['\x66', rex_nw, '\x0F\x54', register(1, 8)], regtype='XMM')
def define_pxmm_insn(insnname_template, insn_char):
diff --git a/rpython/jit/codewriter/assembler.py b/rpython/jit/codewriter/assembler.py
--- a/rpython/jit/codewriter/assembler.py
+++ b/rpython/jit/codewriter/assembler.py
@@ -291,6 +291,7 @@
'int_sub',
'jit_merge_point',
'new_array',
+ 'new_array_clear',
'newstr',
'setarrayitem_gc_i',
'setarrayitem_gc_r',
diff --git a/rpython/jit/codewriter/codewriter.py b/rpython/jit/codewriter/codewriter.py
--- a/rpython/jit/codewriter/codewriter.py
+++ b/rpython/jit/codewriter/codewriter.py
@@ -13,7 +13,7 @@
class CodeWriter(object):
callcontrol = None # for tests
- debug = False
+ debug = True
def __init__(self, cpu=None, jitdrivers_sd=[]):
self.cpu = cpu
diff --git a/rpython/jit/codewriter/heaptracker.py b/rpython/jit/codewriter/heaptracker.py
--- a/rpython/jit/codewriter/heaptracker.py
+++ b/rpython/jit/codewriter/heaptracker.py
@@ -125,3 +125,19 @@
vtable = descr.as_vtable_size_descr()._corresponding_vtable
vtable = llmemory.cast_ptr_to_adr(vtable)
return adr2int(vtable)
+
+def gc_fielddescrs(gccache, STRUCT, res=None):
+ from rpython.jit.backend.llsupport import descr
+
+ if res is None:
+ res = []
+ # order is not relevant, except for tests
+ for name in STRUCT._names:
+ FIELD = getattr(STRUCT, name)
+ if FIELD is lltype.Void:
+ continue
+ elif isinstance(FIELD, lltype.Struct):
+ gc_fielddescrs(gccache, FIELD, res)
+ elif isinstance(FIELD, lltype.Ptr) and FIELD._needsgc():
+ res.append(descr.get_field_descr(gccache, STRUCT, name))
+ return res
diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py
--- a/rpython/jit/codewriter/jtransform.py
+++ b/rpython/jit/codewriter/jtransform.py
@@ -612,8 +612,43 @@
# XXX only strings or simple arrays for now
ARRAY = op.args[0].value
arraydescr = self.cpu.arraydescrof(ARRAY)
- return SpaceOperation('new_array', [op.args[2], arraydescr],
- op.result)
+ if op.args[1].value.get('zero', False):
+ opname = 'new_array_clear'
+ elif ((isinstance(ARRAY.OF, lltype.Ptr) and ARRAY.OF._needsgc()) or
+ isinstance(ARRAY.OF, lltype.Struct)):
+ opname = 'new_array_clear'
+ else:
+ opname = 'new_array'
+ return SpaceOperation(opname, [op.args[2], arraydescr], op.result)
+
+ def zero_contents(self, ops, v, TYPE):
+ if isinstance(TYPE, lltype.Struct):
+ for name, FIELD in TYPE._flds.iteritems():
+ if isinstance(FIELD, lltype.Struct):
+ # substruct
+ self.zero_contents(ops, v, FIELD)
+ else:
+ c_name = Constant(name, lltype.Void)
+ c_null = Constant(FIELD._defl(), FIELD)
+ op = SpaceOperation('setfield', [v, c_name, c_null],
+ None)
+ self.extend_with(ops, self.rewrite_op_setfield(op,
+ override_type=TYPE))
+ elif isinstance(TYPE, lltype.Array):
+ assert False # this operation disappeared
+ else:
+ raise TypeError("Expected struct or array, got '%r'", (TYPE,))
+ if len(ops) == 1:
+ return ops[0]
+ return ops
+
+ def extend_with(self, l, ops):
+ if ops is None:
+ return
+ if isinstance(ops, list):
+ l.extend(ops)
+ else:
+ l.append(ops)
def rewrite_op_free(self, op):
d = op.args[1].value.copy()
@@ -759,13 +794,17 @@
op1]
return op1
- def rewrite_op_setfield(self, op):
+ def rewrite_op_setfield(self, op, override_type=None):
if self.is_typeptr_getset(op):
# ignore the operation completely -- instead, it's done by 'new'
return
# turn the flow graph 'setfield' operation into our own version
[v_inst, c_fieldname, v_value] = op.args
RESULT = v_value.concretetype
+ if override_type is not None:
+ TYPE = override_type
+ else:
+ TYPE = v_inst.concretetype.TO
if RESULT is lltype.Void:
return
# check for virtualizable
@@ -775,10 +814,12 @@
return [SpaceOperation('-live-', [], None),
SpaceOperation('setfield_vable_%s' % kind,
[v_inst, v_value, descr], None)]
- self.check_field_access(v_inst.concretetype.TO)
- argname = getattr(v_inst.concretetype.TO, '_gckind', 'gc')
- descr = self.cpu.fielddescrof(v_inst.concretetype.TO,
- c_fieldname.value)
+ self.check_field_access(TYPE)
+ if override_type:
+ argname = 'gc'
+ else:
+ argname = getattr(TYPE, '_gckind', 'gc')
+ descr = self.cpu.fielddescrof(TYPE, c_fieldname.value)
kind = getkind(RESULT)[0]
if argname == 'raw' and kind == 'r':
raise Exception("setfield_raw_r not supported")
@@ -860,7 +901,10 @@
if op.args[1].value['flavor'] == 'raw':
return self._rewrite_raw_malloc(op, 'raw_malloc_fixedsize', [])
#
- assert op.args[1].value == {'flavor': 'gc'}
+ if op.args[1].value.get('zero', False):
+ zero = True
+ else:
+ zero = False
STRUCT = op.args[0].value
vtable = heaptracker.get_vtable_for_gcstruct(self.cpu, STRUCT)
if vtable:
@@ -881,7 +925,25 @@
else:
opname = 'new'
sizedescr = self.cpu.sizeof(STRUCT)
- return SpaceOperation(opname, [sizedescr], op.result)
+ op1 = SpaceOperation(opname, [sizedescr], op.result)
+ if zero:
+ return self.zero_contents([op1], op.result, STRUCT)
+ return op1
+
+ def _has_gcptrs_in(self, STRUCT):
+ if isinstance(STRUCT, lltype.Array):
+ ITEM = STRUCT.OF
+ if isinstance(ITEM, lltype.Struct):
+ STRUCT = ITEM
+ else:
+ return isinstance(ITEM, lltype.Ptr) and ITEM._needsgc()
+ for FIELD in STRUCT._flds.values():
+ if isinstance(FIELD, lltype.Ptr) and FIELD._needsgc():
+ return True
+ elif isinstance(FIELD, lltype.Struct):
+ if self._has_gcptrs_in(FIELD):
+ return True
+ return False
def rewrite_op_getinteriorarraysize(self, op):
# only supports strings and unicodes
@@ -1606,7 +1668,13 @@
v = Variable('new_length')
v.concretetype = lltype.Signed
ops.append(SpaceOperation('int_force_ge_zero', [v_length], v))
- ops.append(SpaceOperation('new_array', [v, arraydescr], op.result))
+ ARRAY = op.result.concretetype.TO
+ if ((isinstance(ARRAY.OF, lltype.Ptr) and ARRAY.OF._needsgc()) or
+ isinstance(ARRAY.OF, lltype.Struct)):
+ opname = 'new_array_clear'
+ else:
+ opname = 'new_array'
+ ops.append(SpaceOperation(opname, [v, arraydescr], op.result))
return ops
def do_fixed_list_len(self, op, args, arraydescr):
diff --git a/rpython/jit/codewriter/test/test_jtransform.py b/rpython/jit/codewriter/test/test_jtransform.py
--- a/rpython/jit/codewriter/test/test_jtransform.py
+++ b/rpython/jit/codewriter/test/test_jtransform.py
@@ -529,6 +529,35 @@
assert op1.opname == 'new'
assert op1.args == [('sizedescr', S)]
+def test_malloc_new_zero_2():
+ S = lltype.GcStruct('S', ('x', lltype.Signed))
+ v = varoftype(lltype.Ptr(S))
+ op = SpaceOperation('malloc', [Constant(S, lltype.Void),
+ Constant({'flavor': 'gc',
+ 'zero': True}, lltype.Void)], v)
+ op1, op2 = Transformer(FakeCPU()).rewrite_operation(op)
+ assert op1.opname == 'new'
+ assert op1.args == [('sizedescr', S)]
+ assert op2.opname == 'setfield_gc_i'
+ assert op2.args[0] == v
+
+def test_malloc_new_zero_nested():
+ S0 = lltype.GcStruct('S0')
+ S = lltype.Struct('S', ('x', lltype.Ptr(S0)))
+ S2 = lltype.GcStruct('S2', ('parent', S),
+ ('xx', lltype.Ptr(S0)))
+ v = varoftype(lltype.Ptr(S2))
+ op = SpaceOperation('malloc', [Constant(S2, lltype.Void),
+ Constant({'flavor': 'gc',
+ 'zero': True}, lltype.Void)], v)
+ op1, op2, op3 = Transformer(FakeCPU()).rewrite_operation(op)
+ assert op1.opname == 'new'
+ assert op1.args == [('sizedescr', S2)]
+ assert op2.opname == 'setfield_gc_r'
+ assert op2.args[0] == v
+ assert op3.opname == 'setfield_gc_r'
+ assert op3.args[0] == v
+
def test_malloc_new_with_vtable():
vtable = lltype.malloc(rclass.OBJECT_VTABLE, immortal=True)
S = lltype.GcStruct('S', ('parent', rclass.OBJECT))
@@ -1026,6 +1055,15 @@
assert op1.args == [v1]
assert op1.result == v2
+def test_malloc_varsize_zero():
+ c_A = Constant(lltype.GcArray(lltype.Signed), lltype.Void)
+ v1 = varoftype(lltype.Signed)
+ v2 = varoftype(c_A.value)
+ c_flags = Constant({"flavor": "gc", "zero": True}, lltype.Void)
+ op = SpaceOperation('malloc_varsize', [c_A, c_flags, v1], v2)
+ op1 = Transformer(FakeCPU()).rewrite_operation(op)
+ assert op1.opname == 'new_array_clear'
+
def test_str_concat():
# test that the oopspec is present and correctly transformed
PSTR = lltype.Ptr(rstr.STR)
diff --git a/rpython/jit/codewriter/test/test_list.py b/rpython/jit/codewriter/test/test_list.py
--- a/rpython/jit/codewriter/test/test_list.py
+++ b/rpython/jit/codewriter/test/test_list.py
@@ -11,6 +11,7 @@
# ____________________________________________________________
FIXEDLIST = lltype.Ptr(lltype.GcArray(lltype.Signed))
+FIXEDPTRLIST = lltype.Ptr(lltype.GcArray(FIXEDLIST))
VARLIST = lltype.Ptr(lltype.GcStruct('VARLIST',
('length', lltype.Signed),
('items', FIXEDLIST),
@@ -100,6 +101,8 @@
builtin_test('newlist', [Constant(5, lltype.Signed),
varoftype(lltype.Signed)], FIXEDLIST,
NotSupported)
+ builtin_test('newlist', [], FIXEDPTRLIST,
+ """new_array_clear $0, <ArrayDescr> -> %r0""")
def test_fixed_ll_arraycopy():
builtin_test('list.ll_arraycopy',
diff --git a/rpython/jit/metainterp/blackhole.py b/rpython/jit/metainterp/blackhole.py
--- a/rpython/jit/metainterp/blackhole.py
+++ b/rpython/jit/metainterp/blackhole.py
@@ -1008,7 +1008,11 @@
itemsdescr, arraydescr):
result = cpu.bh_new(structdescr)
cpu.bh_setfield_gc_i(result, length, lengthdescr)
- items = cpu.bh_new_array(length, arraydescr)
+ if (arraydescr.is_array_of_structs() or
+ arraydescr.is_array_of_pointers()):
+ items = cpu.bh_new_array_clear(length, arraydescr)
+ else:
+ items = cpu.bh_new_array(length, arraydescr)
cpu.bh_setfield_gc_r(result, items, itemsdescr)
return result
@@ -1017,7 +1021,11 @@
itemsdescr, arraydescr):
result = cpu.bh_new(structdescr)
cpu.bh_setfield_gc_i(result, 0, lengthdescr)
- items = cpu.bh_new_array(lengthhint, arraydescr)
+ if (arraydescr.is_array_of_structs() or
+ arraydescr.is_array_of_pointers()):
+ items = cpu.bh_new_array_clear(lengthhint, arraydescr)
+ else:
+ items = cpu.bh_new_array(lengthhint, arraydescr)
cpu.bh_setfield_gc_r(result, items, itemsdescr)
return result
@@ -1153,6 +1161,10 @@
def bhimpl_new_array(cpu, length, arraydescr):
return cpu.bh_new_array(length, arraydescr)
+ @arguments("cpu", "i", "d", returns="r")
+ def bhimpl_new_array_clear(cpu, length, arraydescr):
+ return cpu.bh_new_array_clear(length, arraydescr)
+
@arguments("cpu", "r", "i", "d", returns="i")
def bhimpl_getarrayitem_gc_i(cpu, array, index, arraydescr):
return cpu.bh_getarrayitem_gc_i(array, index, arraydescr)
diff --git a/rpython/jit/metainterp/executor.py b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -325,6 +325,8 @@
rop.INCREMENT_DEBUG_COUNTER,
rop.COND_CALL_GC_WB,
rop.COND_CALL_GC_WB_ARRAY,
+ rop.ZERO_PTR_FIELD,
+ rop.ZERO_ARRAY,
rop.DEBUG_MERGE_POINT,
rop.JIT_DEBUG,
rop.SETARRAYITEM_RAW,
diff --git a/rpython/jit/metainterp/gc.py b/rpython/jit/metainterp/gc.py
--- a/rpython/jit/metainterp/gc.py
+++ b/rpython/jit/metainterp/gc.py
@@ -26,7 +26,7 @@
malloc_zero_filled = True
class GC_incminimark(GcDescription):
- malloc_zero_filled = True
+ malloc_zero_filled = False
def get_description(config):
diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py
--- a/rpython/jit/metainterp/optimizeopt/intbounds.py
+++ b/rpython/jit/metainterp/optimizeopt/intbounds.py
@@ -24,6 +24,8 @@
return (1 << ((byte_size << 3) - 1)) - 1
+IS_64_BIT = sys.maxint > 2**32
+
def next_pow2_m1(n):
"""Calculate next power of 2 greater than n minus one."""
n |= n >> 1
@@ -31,7 +33,8 @@
n |= n >> 4
n |= n >> 8
n |= n >> 16
- n |= n >> 32
+ if IS_64_BIT:
+ n |= n >> 32
return n
diff --git a/rpython/jit/metainterp/optimizeopt/optimizer.py b/rpython/jit/metainterp/optimizeopt/optimizer.py
--- a/rpython/jit/metainterp/optimizeopt/optimizer.py
+++ b/rpython/jit/metainterp/optimizeopt/optimizer.py
@@ -257,6 +257,9 @@
def setinteriorfield(self, index, ofs, value):
raise NotImplementedError
+ def get_missing_null_value(self):
+ raise NotImplementedError # only for VArrayValue
+
class ConstantValue(OptValue):
def __init__(self, box):
diff --git a/rpython/jit/metainterp/optimizeopt/rewrite.py b/rpython/jit/metainterp/optimizeopt/rewrite.py
--- a/rpython/jit/metainterp/optimizeopt/rewrite.py
+++ b/rpython/jit/metainterp/optimizeopt/rewrite.py
@@ -502,6 +502,8 @@
descr=arraydescr)
self.optimizer.send_extra_operation(newop)
val = self.getvalue(resbox)
+ if val is None:
+ continue
if dest_value.is_virtual():
dest_value.setitem(index + dest_start, val)
else:
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_multilabel.py b/rpython/jit/metainterp/optimizeopt/test/test_multilabel.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_multilabel.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_multilabel.py
@@ -150,10 +150,10 @@
def test_nonmatching_arraystruct_1(self):
ops = """
[p1, f0]
- p2 = new_array(3, descr=complexarraydescr)
+ p2 = new_array_clear(3, descr=complexarraydescr)
setinteriorfield_gc(p2, 2, f0, descr=complexrealdescr)
label(p2, f0)
- p4 = new_array(3, descr=complexarraydescr)
+ p4 = new_array_clear(3, descr=complexarraydescr)
setinteriorfield_gc(p4, 2, f0, descr=compleximagdescr)
jump(p4, f0)
"""
@@ -163,10 +163,10 @@
def test_nonmatching_arraystruct_2(self):
ops = """
[p1, f0]
- p2 = new_array(3, descr=complexarraydescr)
+ p2 = new_array_clear(3, descr=complexarraydescr)
setinteriorfield_gc(p2, 2, f0, descr=complexrealdescr)
label(p2, f0)
- p4 = new_array(2, descr=complexarraydescr)
+ p4 = new_array_clear(2, descr=complexarraydescr)
setinteriorfield_gc(p4, 0, f0, descr=complexrealdescr)
jump(p4, f0)
"""
@@ -198,7 +198,7 @@
def test_not_virtual_arraystruct(self):
ops = """
[p1]
- p3 = new_array(3, descr=complexarraydescr)
+ p3 = new_array_clear(3, descr=complexarraydescr)
label(p3)
p4 = escape()
jump(p4)
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -940,7 +940,7 @@
def test_virtual_array_of_struct(self):
ops = """
[f0, f1, f2, f3]
- p0 = new_array(2, descr=complexarraydescr)
+ p0 = new_array_clear(2, descr=complexarraydescr)
setinteriorfield_gc(p0, 0, f1, descr=compleximagdescr)
setinteriorfield_gc(p0, 0, f0, descr=complexrealdescr)
setinteriorfield_gc(p0, 1, f3, descr=compleximagdescr)
@@ -966,7 +966,7 @@
def test_virtual_array_of_struct_forced(self):
ops = """
[f0, f1]
- p0 = new_array(1, descr=complexarraydescr)
+ p0 = new_array_clear(1, descr=complexarraydescr)
setinteriorfield_gc(p0, 0, f0, descr=complexrealdescr)
setinteriorfield_gc(p0, 0, f1, descr=compleximagdescr)
f2 = getinteriorfield_gc(p0, 0, descr=complexrealdescr)
@@ -978,7 +978,7 @@
expected = """
[f0, f1]
f2 = float_mul(f0, f1)
- p0 = new_array(1, descr=complexarraydescr)
+ p0 = new_array_clear(1, descr=complexarraydescr)
setinteriorfield_gc(p0, 0, f1, descr=compleximagdescr)
setinteriorfield_gc(p0, 0, f0, descr=complexrealdescr)
i0 = escape(f2, p0)
@@ -989,7 +989,7 @@
def test_virtual_array_of_struct_len(self):
ops = """
[]
- p0 = new_array(2, descr=complexarraydescr)
+ p0 = new_array_clear(2, descr=complexarraydescr)
i0 = arraylen_gc(p0)
finish(i0)
"""
@@ -1056,7 +1056,7 @@
"""
self.optimize_loop(ops, expected)
- def test_nonvirtual_dont_write_null_fields_on_force(self):
+ def test_nonvirtual_write_null_fields_on_force(self):
ops = """
[i]
p1 = new_with_vtable(ConstClass(node_vtable))
@@ -1070,6 +1070,7 @@
expected = """
[i]
p1 = new_with_vtable(ConstClass(node_vtable))
+ setfield_gc(p1, 0, descr=valuedescr)
escape(p1)
i2 = getfield_gc(p1, descr=valuedescr)
jump(i2)
@@ -1134,12 +1135,12 @@
[i1]
p1 = new_array(2, descr=arraydescr)
setarrayitem_gc(p1, 0, 25, descr=arraydescr)
- i2 = getarrayitem_gc(p1, 1, descr=arraydescr)
+ i2 = getarrayitem_gc(p1, 0, descr=arraydescr)
jump(i2)
"""
expected = """
[i1]
- jump(0)
+ jump(25)
"""
self.optimize_loop(ops, expected)
@@ -1176,7 +1177,7 @@
"""
self.optimize_loop(ops, expected)
- def test_nonvirtual_array_dont_write_null_fields_on_force(self):
+ def test_nonvirtual_array_write_null_fields_on_force(self):
ops = """
[i1]
p1 = new_array(5, descr=arraydescr)
@@ -1189,6 +1190,7 @@
[i1]
p1 = new_array(5, descr=arraydescr)
setarrayitem_gc(p1, 0, i1, descr=arraydescr)
+ setarrayitem_gc(p1, 1, 0, descr=arraydescr)
escape(p1)
jump(i1)
"""
@@ -2975,6 +2977,7 @@
[p1]
p0 = force_token()
p2 = new_with_vtable(ConstClass(jit_virtual_ref_vtable))
+ setfield_gc(p2, NULL, descr=virtualforceddescr)
setfield_gc(p2, p0, descr=virtualtokendescr)
escape(p2)
setfield_gc(p2, p1, descr=virtualforceddescr)
@@ -3007,6 +3010,7 @@
p3 = force_token()
#
p2 = new_with_vtable(ConstClass(jit_virtual_ref_vtable))
+ setfield_gc(p2, NULL, descr=virtualforceddescr)
setfield_gc(p2, p3, descr=virtualtokendescr)
setfield_gc(p0, p2, descr=nextdescr)
#
@@ -3046,6 +3050,7 @@
p3 = force_token()
#
p2 = new_with_vtable(ConstClass(jit_virtual_ref_vtable))
+ setfield_gc(p2, NULL, descr=virtualforceddescr)
setfield_gc(p2, p3, descr=virtualtokendescr)
setfield_gc(p0, p2, descr=nextdescr)
#
@@ -3122,6 +3127,7 @@
[i1]
p3 = force_token()
p2 = new_with_vtable(ConstClass(jit_virtual_ref_vtable))
+ setfield_gc(p2, NULL, descr=virtualforceddescr)
setfield_gc(p2, p3, descr=virtualtokendescr)
escape(p2)
p1 = new_with_vtable(ConstClass(node_vtable))
@@ -3147,6 +3153,7 @@
[i1, p1]
p3 = force_token()
p2 = new_with_vtable(ConstClass(jit_virtual_ref_vtable))
+ setfield_gc(p2, NULL, descr=virtualforceddescr)
setfield_gc(p2, p3, descr=virtualtokendescr)
escape(p2)
setfield_gc(p2, p1, descr=virtualforceddescr)
@@ -4784,15 +4791,18 @@
ops = """
[p0]
p1 = newstr(4)
+ strsetitem(p1, 2, 0)
setfield_gc(p0, p1, descr=valuedescr)
jump(p0)
"""
- # It used to be the case that this would have a series of
- # strsetitem(p1, idx, 0), which was silly because memory is 0 filled
- # when allocated.
+ # This test is slightly bogus: the string is not fully initialized.
+ # I *think* it is still right to not have a series of extra
+ # strsetitem(p1, idx, 0). We do preserve the single one from the
+ # source, though.
expected = """
[p0]
p1 = newstr(4)
+ strsetitem(p1, 2, 0)
setfield_gc(p0, p1, descr=valuedescr)
jump(p0)
"""
@@ -5108,6 +5118,9 @@
strsetitem(p1, 6, i0)
strsetitem(p1, 7, i0)
strsetitem(p1, 8, 3)
+ strsetitem(p1, 9, 0)
+ strsetitem(p1, 10, 0)
+ strsetitem(p1, 11, 0)
finish(p1)
"""
self.optimize_strunicode_loop(ops, expected)
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -1397,7 +1397,7 @@
"""
self.optimize_loop(ops, expected)
- def test_nonvirtual_dont_write_null_fields_on_force(self):
+ def test_nonvirtual_write_null_fields_on_force(self):
ops = """
[i]
p1 = new_with_vtable(ConstClass(node_vtable))
@@ -1411,6 +1411,7 @@
expected = """
[i]
p1 = new_with_vtable(ConstClass(node_vtable))
+ setfield_gc(p1, 0, descr=valuedescr)
escape(p1)
i2 = getfield_gc(p1, descr=valuedescr)
jump(i2)
@@ -1562,7 +1563,7 @@
[i1]
p1 = new_array(2, descr=arraydescr)
setarrayitem_gc(p1, 0, 25, descr=arraydescr)
- i2 = getarrayitem_gc(p1, 1, descr=arraydescr)
+ i2 = getarrayitem_gc(p1, 0, descr=arraydescr)
jump(i2)
"""
preamble = """
@@ -1608,7 +1609,7 @@
"""
self.optimize_loop(ops, expected)
- def test_nonvirtual_array_dont_write_null_fields_on_force(self):
+ def test_nonvirtual_array_write_null_fields_on_force(self):
ops = """
[i1]
p1 = new_array(5, descr=arraydescr)
@@ -1621,6 +1622,7 @@
[i1]
p1 = new_array(5, descr=arraydescr)
setarrayitem_gc(p1, 0, i1, descr=arraydescr)
+ setarrayitem_gc(p1, 1, 0, descr=arraydescr)
escape(p1)
jump(i1)
"""
@@ -3749,6 +3751,7 @@
[p1]
p0 = force_token()
p2 = new_with_vtable(ConstClass(jit_virtual_ref_vtable))
+ setfield_gc(p2, NULL, descr=virtualforceddescr)
setfield_gc(p2, p0, descr=virtualtokendescr)
escape(p2)
setfield_gc(p2, p1, descr=virtualforceddescr)
@@ -3781,6 +3784,7 @@
p3 = force_token()
#
p2 = new_with_vtable(ConstClass(jit_virtual_ref_vtable))
+ setfield_gc(p2, NULL, descr=virtualforceddescr)
setfield_gc(p2, p3, descr=virtualtokendescr)
setfield_gc(p0, p2, descr=nextdescr)
#
@@ -3820,6 +3824,7 @@
p3 = force_token()
#
p2 = new_with_vtable(ConstClass(jit_virtual_ref_vtable))
+ setfield_gc(p2, NULL, descr=virtualforceddescr)
setfield_gc(p2, p3, descr=virtualtokendescr)
setfield_gc(p0, p2, descr=nextdescr)
#
@@ -3907,6 +3912,7 @@
More information about the pypy-commit
mailing list