[pypy-commit] pypy default: hg merge gc_no_cleanup_nursery

arigo noreply at buildbot.pypy.org
Sun Sep 28 20:47:40 CEST 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r73741:9e2f7a37c1e2
Date: 2014-09-28 20:46 +0200
http://bitbucket.org/pypy/pypy/changeset/9e2f7a37c1e2/

Log:	hg merge gc_no_cleanup_nursery

	Merge the 'gc_no_cleanup_nursery' branch, started by from Wenzhu Man
	(SoC'14) and then done by fijal. It removes the clearing of the
	nursery. The drawback is that new objects are not automatically
	filled with zeros any longer, which needs some care, mostly for GC
	references (which the GC tries to follow, so they must not contain
	garbage). The benefit is a quite large speed-up; I've heard about
	10%, but we'll see more precisely on the benchmarks.

diff too long, truncating to 2000 out of 4325 lines

diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -617,7 +617,7 @@
             'raw_store': 1,
             'same_as': 2,
             'setarrayitem_gc': 8,
-            'setfield_gc': 21,
+            'setfield_gc': 22,
         })
 
     def define_argsort():
diff --git a/pypy/module/pypyjit/test_pypy_c/test_call.py b/pypy/module/pypyjit/test_pypy_c/test_call.py
--- a/pypy/module/pypyjit/test_pypy_c/test_call.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_call.py
@@ -382,12 +382,16 @@
             ...
             p20 = force_token()
             p22 = new_with_vtable(...)
-            p24 = new_array(1, descr=<ArrayP .>)
+            p24 = new_array_clear(1, descr=<ArrayP .>)
             p26 = new_with_vtable(ConstClass(W_ListObject))
             {{{
             setfield_gc(p0, p20, descr=<FieldP .*PyFrame.vable_token .*>)
+            setfield_gc(p22, ConstPtr(null), descr=<FieldP pypy.interpreter.argument.Arguments.inst_keywords_w .*>)
+            setfield_gc(p22, ConstPtr(null), descr=<FieldP pypy.interpreter.argument.Arguments.inst_keywords .*>)
             setfield_gc(p22, 1, descr=<FieldU pypy.interpreter.argument.Arguments.inst__jit_few_keywords .*>)
+            setfield_gc(p22, ConstPtr(null), descr=<FieldP pypy.interpreter.argument.Arguments.inst_keyword_names_w .*>)
             setfield_gc(p26, ConstPtr(ptr22), descr=<FieldP pypy.objspace.std.listobject.W_ListObject.inst_strategy .*>)
+            setfield_gc(p26, ConstPtr(null), descr=<FieldP pypy.objspace.std.listobject.W_ListObject.inst_lstorage .*>)
             setarrayitem_gc(p24, 0, p26, descr=<ArrayP .>)
             setfield_gc(p22, p24, descr=<FieldP .*Arguments.inst_arguments_w .*>)
             }}}
diff --git a/pypy/module/pypyjit/test_pypy_c/test_containers.py b/pypy/module/pypyjit/test_pypy_c/test_containers.py
--- a/pypy/module/pypyjit/test_pypy_c/test_containers.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_containers.py
@@ -68,10 +68,13 @@
             guard_no_exception(descr=...)
             i12 = call(ConstClass(ll_strhash), p10, descr=<Calli . r EF=0>)
             p13 = new(descr=...)
-            p15 = new_array(8, descr=<ArrayX .*>)
+            p15 = new_array_clear(8, descr=<ArrayX .*>)
             setfield_gc(p13, p15, descr=<FieldP dicttable.entries .*>)
             i17 = call(ConstClass(ll_dict_lookup_trampoline), p13, p10, i12, descr=<Calli . rri EF=4 OS=4>)
+            {{{
             setfield_gc(p13, 16, descr=<FieldS dicttable.resize_counter .*>)
+            setfield_gc(p13, 0, descr=<FieldS dicttable.num_items .+>)
+            }}}
             guard_no_exception(descr=...)
             p20 = new_with_vtable(ConstClass(W_IntObject))
             call(ConstClass(_ll_dict_setitem_lookup_done_trampoline), p13, p10, p20, i12, i17, descr=<Callv 0 rrrii EF=4>)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py
--- a/pypy/module/pypyjit/test_pypy_c/test_string.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_string.py
@@ -110,9 +110,12 @@
             i85 = strlen(p80)
             p86 = new(descr=<SizeDescr .+>)
             p88 = newstr(23)
-            setfield_gc(..., descr=<Field. stringbuilder.+>)
-            setfield_gc(..., descr=<Field. stringbuilder.+>)
-            setfield_gc(..., descr=<Field. stringbuilder.+>)
+            {{{
+            setfield_gc(p86, 0, descr=<FieldS stringbuilder.current_pos .+>)
+            setfield_gc(p86, p88, descr=<FieldP stringbuilder.current_buf .+>)
+            setfield_gc(p86, 23, descr=<FieldS stringbuilder.current_end .+>)
+            setfield_gc(p86, 23, descr=<FieldS stringbuilder.total_size .+>)
+            }}}
             call(ConstClass(ll_append_res0__stringbuilderPtr_rpy_stringPtr), p86, p80, descr=<Callv 0 rr EF=4>)
             guard_no_exception(descr=...)
             i89 = getfield_gc(p86, descr=<FieldS stringbuilder.current_pos .+>)
diff --git a/pypy/tool/pypyjit_child.py b/pypy/tool/pypyjit_child.py
--- a/pypy/tool/pypyjit_child.py
+++ b/pypy/tool/pypyjit_child.py
@@ -10,10 +10,6 @@
     graph = loc['graph']
     interp.malloc_check = False
 
-    def returns_null(T, *args, **kwds):
-        return lltype.nullptr(T)
-    interp.heap.malloc_nonmovable = returns_null     # XXX
-
     from rpython.jit.backend.llgraph.runner import LLGraphCPU
     #LLtypeCPU.supports_floats = False     # for now
     apply_jit(interp, graph, LLGraphCPU)
diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -25,7 +25,7 @@
 from rpython.jit.backend.llsupport.descr import InteriorFieldDescr
 from rpython.jit.backend.llsupport.assembler import GuardToken, BaseAssembler
 from rpython.jit.backend.llsupport.regalloc import get_scale
-from rpython.jit.metainterp.history import (Box, AbstractFailDescr,
+from rpython.jit.metainterp.history import (Box, AbstractFailDescr, ConstInt,
                                             INT, FLOAT, REF)
 from rpython.jit.metainterp.history import TargetToken
 from rpython.jit.metainterp.resoperation import rop
@@ -578,6 +578,7 @@
         return fcond
 
     emit_op_setfield_raw = emit_op_setfield_gc
+    emit_op_zero_ptr_field = emit_op_setfield_gc
 
     def emit_op_getfield_gc(self, op, arglocs, regalloc, fcond):
         base_loc, ofs, res, size = arglocs
@@ -1174,3 +1175,84 @@
         self.mc.VMOV_cs(r.svfp_ip.value, arg.value)
         self.mc.VCVT_f32_f64(res.value, r.svfp_ip.value)
         return fcond
+
+    #from ../x86/regalloc.py:1388
+    def emit_op_zero_array(self, op, arglocs, regalloc, fcond):
+        from rpython.jit.backend.llsupport.descr import unpack_arraydescr
+        assert len(arglocs) == 0
+        itemsize, baseofs, _ = unpack_arraydescr(op.getdescr())
+        args = op.getarglist()
+        base_loc = regalloc.rm.make_sure_var_in_reg(args[0], args)
+        sibox = args[1]
+        if isinstance(sibox, ConstInt):
+            startindex_loc = None
+            startindex = sibox.getint()
+            assert startindex >= 0
+        else:
+            startindex_loc = regalloc.rm.make_sure_var_in_reg(sibox, args)
+            startindex = -1
+        length_box = op.getarg(2)
+
+        # base_loc and startindex_loc are in two regs here (or they are
+        # immediates).  Compute the dstaddr_loc, which is the raw
+        # address that we will pass as first argument to memset().
+        # It can be in the same register as either one, but not in
+        # args[2], because we're still needing the latter.
+        dstaddr_box = TempBox()
+        dstaddr_loc = regalloc.rm.force_allocate_reg(dstaddr_box, [args[2]])
+        if startindex >= 0:    # a constant
+            ofs = baseofs + startindex * itemsize
+            reg = base_loc.value
+        else:
+            self.mc.gen_load_int(r.ip.value, itemsize)
+            self.mc.MLA(dstaddr_loc.value, r.ip.value,
+                        startindex_loc.value, base_loc.value)
+            ofs = baseofs
+            reg = dstaddr_loc.value
+        if check_imm_arg(ofs):
+            self.mc.ADD_ri(dstaddr_loc.value, reg, imm=ofs)
+        else:
+            self.mc.gen_load_int(r.ip.value, ofs)
+            self.mc.ADD_rr(dstaddr_loc.value, reg, r.ip.value)
+
+        if (isinstance(length_box, ConstInt) and
+                length_box.getint() <= 14 and     # same limit as GCC
+                itemsize in (4, 2, 1)):
+            # Inline a series of STR operations, starting at 'dstaddr_loc'.
+            # XXX we could optimize STRB/STRH into STR, but this needs care:
+            # XXX it only works if startindex_loc is a constant, otherwise
+            # XXX we'd be doing unaligned accesses
+            self.mc.gen_load_int(r.ip.value, 0)
+            for i in range(length_box.getint()):
+                if itemsize == 4:
+                    self.mc.STR_ri(r.ip.value, dstaddr_loc.value, imm=i*4)
+                elif itemsize == 2:
+                    self.mc.STRH_ri(r.ip.value, dstaddr_loc.value, imm=i*2)
+                else:
+                    self.mc.STRB_ri(r.ip.value, dstaddr_loc.value, imm=i*1)
+
+        else:
+            if isinstance(length_box, ConstInt):
+                length_loc = imm(length_box.getint() * itemsize)
+            else:
+                # load length_loc in a register different than dstaddr_loc
+                length_loc = regalloc.rm.make_sure_var_in_reg(length_box,
+                                                              [dstaddr_box])
+                if itemsize > 1:
+                    # we need a register that is different from dstaddr_loc,
+                    # but which can be identical to length_loc (as usual,
+                    # only if the length_box is not used by future operations)
+                    bytes_box = TempBox()
+                    bytes_loc = regalloc.rm.force_allocate_reg(bytes_box,
+                                                               [dstaddr_box])
+                    self.mc.gen_load_int(r.ip.value, itemsize)
+                    self.mc.MUL(bytes_loc.value, r.ip.value, length_loc.value)
+                    length_box = bytes_box
+                    length_loc = bytes_loc
+            #
+            # call memset()
+            regalloc.before_call()
+            self.simple_call_no_collect(imm(self.memset_addr),
+                                        [dstaddr_loc, imm(0), length_loc])
+            regalloc.rm.possibly_free_var(length_box)
+        regalloc.rm.possibly_free_var(dstaddr_box)
diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -818,8 +818,11 @@
 
     def prepare_op_setfield_gc(self, op, fcond):
         boxes = op.getarglist()
+        ofs, size, sign = unpack_fielddescr(op.getdescr())
+        return self._prepare_op_setfield(boxes, ofs, size)
+
+    def _prepare_op_setfield(self, boxes, ofs, size):
         a0, a1 = boxes
-        ofs, size, sign = unpack_fielddescr(op.getdescr())
         base_loc = self.make_sure_var_in_reg(a0, boxes)
         value_loc = self.make_sure_var_in_reg(a1, boxes)
         ofs_size = default_imm_size if size < 8 else VMEM_imm_size
@@ -832,6 +835,11 @@
 
     prepare_op_setfield_raw = prepare_op_setfield_gc
 
+    def prepare_op_zero_ptr_field(self, op, fcond):
+        a0 = op.getarg(0)
+        ofs = op.getarg(1).getint()
+        return self._prepare_op_setfield([a0, ConstInt(0)], ofs, WORD)
+
     def prepare_op_getfield_gc(self, op, fcond):
         a0 = op.getarg(0)
         ofs, size, sign = unpack_fielddescr(op.getdescr())
@@ -988,6 +996,7 @@
 
     prepare_op_copystrcontent = void
     prepare_op_copyunicodecontent = void
+    prepare_op_zero_array = void
 
     def prepare_op_unicodelen(self, op, fcond):
         l0 = self.make_sure_var_in_reg(op.getarg(0))
diff --git a/rpython/jit/backend/llgraph/runner.py b/rpython/jit/backend/llgraph/runner.py
--- a/rpython/jit/backend/llgraph/runner.py
+++ b/rpython/jit/backend/llgraph/runner.py
@@ -225,6 +225,7 @@
                 'i': 0,
                 'f': 0.0}
 
+
 class LLGraphCPU(model.AbstractCPU):
     from rpython.jit.metainterp.typesystem import llhelper as ts
     supports_floats = True
@@ -641,6 +642,11 @@
 
     def bh_new_array(self, length, arraydescr):
         array = lltype.malloc(arraydescr.A, length, zero=True)
+        assert getkind(arraydescr.A.OF) != 'ref' # getkind crashes on structs
+        return lltype.cast_opaque_ptr(llmemory.GCREF, array)
+
+    def bh_new_array_clear(self, length, arraydescr):
+        array = lltype.malloc(arraydescr.A, length, zero=True)
         return lltype.cast_opaque_ptr(llmemory.GCREF, array)
 
     def bh_classof(self, struct):
diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -1,5 +1,5 @@
 from rpython.jit.backend.llsupport import jitframe
-from rpython.jit.backend.llsupport.memcpy import memcpy_fn
+from rpython.jit.backend.llsupport.memcpy import memcpy_fn, memset_fn
 from rpython.jit.backend.llsupport.symbolic import WORD
 from rpython.jit.metainterp.history import (INT, REF, FLOAT, JitCellToken,
     ConstInt, BoxInt, AbstractFailDescr)
@@ -63,6 +63,7 @@
     def __init__(self, cpu, translate_support_code=False):
         self.cpu = cpu
         self.memcpy_addr = 0
+        self.memset_addr = 0
         self.rtyper = cpu.rtyper
         self._debug = False
 
@@ -79,6 +80,7 @@
         else:
             self.gc_size_of_header = WORD # for tests
         self.memcpy_addr = self.cpu.cast_ptr_to_int(memcpy_fn)
+        self.memset_addr = self.cpu.cast_ptr_to_int(memset_fn)
         self._build_failure_recovery(False, withfloats=False)
         self._build_failure_recovery(True, withfloats=False)
         self._build_wb_slowpath(False)
diff --git a/rpython/jit/backend/llsupport/descr.py b/rpython/jit/backend/llsupport/descr.py
--- a/rpython/jit/backend/llsupport/descr.py
+++ b/rpython/jit/backend/llsupport/descr.py
@@ -35,9 +35,11 @@
     size = 0      # help translation
     tid = llop.combine_ushort(lltype.Signed, 0, 0)
 
-    def __init__(self, size, count_fields_if_immut=-1):
+    def __init__(self, size, count_fields_if_immut=-1,
+                 gc_fielddescrs=None):
         self.size = size
         self.count_fields_if_immut = count_fields_if_immut
+        self.gc_fielddescrs = gc_fielddescrs
 
     def count_fields_if_immutable(self):
         return self.count_fields_if_immut
@@ -58,10 +60,13 @@
     except KeyError:
         size = symbolic.get_size(STRUCT, gccache.translate_support_code)
         count_fields_if_immut = heaptracker.count_fields_if_immutable(STRUCT)
+        gc_fielddescrs = heaptracker.gc_fielddescrs(gccache, STRUCT)
         if heaptracker.has_gcstruct_a_vtable(STRUCT):
-            sizedescr = SizeDescrWithVTable(size, count_fields_if_immut)
+            sizedescr = SizeDescrWithVTable(size, count_fields_if_immut,
+                                            gc_fielddescrs)
         else:
-            sizedescr = SizeDescr(size, count_fields_if_immut)
+            sizedescr = SizeDescr(size, count_fields_if_immut,
+                                  gc_fielddescrs)
         gccache.init_size_descr(STRUCT, sizedescr)
         cache[STRUCT] = sizedescr
         return sizedescr
@@ -95,6 +100,9 @@
         self.field_size = field_size
         self.flag = flag
 
+    def __repr__(self):
+        return 'FieldDescr<%s>' % (self.name,)
+
     def is_pointer_field(self):
         return self.flag == FLAG_POINTER
 
diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -18,10 +18,12 @@
 from rpython.jit.backend.llsupport.descr import get_call_descr
 from rpython.jit.backend.llsupport.rewrite import GcRewriterAssembler
 from rpython.memory.gctransform import asmgcroot
+from rpython.jit.codewriter.effectinfo import EffectInfo
 
 # ____________________________________________________________
 
 class GcLLDescription(GcCache):
+    malloc_zero_filled = True
 
     def __init__(self, gcdescr, translator=None, rtyper=None):
         GcCache.__init__(self, translator is not None, rtyper)
@@ -36,6 +38,8 @@
     def _setup_str(self):
         self.str_descr     = get_array_descr(self, rstr.STR)
         self.unicode_descr = get_array_descr(self, rstr.UNICODE)
+        self.str_hash_descr     = get_field_descr(self, rstr.STR,     'hash')
+        self.unicode_hash_descr = get_field_descr(self, rstr.UNICODE, 'hash')
 
     def generate_function(self, funcname, func, ARGS, RESULT=llmemory.GCREF):
         """Generates a variant of malloc with the given name and the given
@@ -118,7 +122,8 @@
         descrs = JitFrameDescrs()
         descrs.arraydescr = cpu.arraydescrof(jitframe.JITFRAME)
         for name in ['jf_descr', 'jf_guard_exc', 'jf_force_descr',
-                     'jf_frame_info', 'jf_gcmap', 'jf_extra_stack_depth']:
+                     'jf_frame_info', 'jf_gcmap', 'jf_extra_stack_depth',
+                     'jf_savedata', 'jf_forward']:
             setattr(descrs, name, cpu.fielddescrof(jitframe.JITFRAME, name))
         descrs.jfi_frame_size = cpu.fielddescrof(jitframe.JITFRAMEINFO,
                                                   'jfi_frame_size')
@@ -377,6 +382,7 @@
         from rpython.memory.gcheader import GCHeaderBuilder
         self.GCClass = self.layoutbuilder.GCClass
         self.moving_gc = self.GCClass.moving_gc
+        self.malloc_zero_filled = self.GCClass.malloc_zero_filled
         self.HDRPTR = lltype.Ptr(self.GCClass.HDR)
         self.gcheaderbuilder = GCHeaderBuilder(self.HDRPTR.TO)
         self.max_size_of_young_obj = self.GCClass.JIT_max_size_of_young_obj()
@@ -410,9 +416,9 @@
             if self.DEBUG:
                 self._random_usage_of_xmm_registers()
             type_id = rffi.cast(llgroup.HALFWORD, 0)    # missing here
-            return llop1.do_malloc_fixedsize_clear(llmemory.GCREF,
-                                                   type_id, size,
-                                                   False, False, False)
+            return llop1.do_malloc_fixedsize(llmemory.GCREF,
+                                             type_id, size,
+                                             False, False, False)
 
         self.generate_function('malloc_nursery', malloc_nursery_slowpath,
                                [lltype.Signed])
@@ -455,7 +461,7 @@
 
         def malloc_str(length):
             type_id = llop.extract_ushort(llgroup.HALFWORD, str_type_id)
-            return llop1.do_malloc_varsize_clear(
+            return llop1.do_malloc_varsize(
                 llmemory.GCREF,
                 type_id, length, str_basesize, str_itemsize,
                 str_ofs_length)
@@ -464,7 +470,7 @@
 
         def malloc_unicode(length):
             type_id = llop.extract_ushort(llgroup.HALFWORD, unicode_type_id)
-            return llop1.do_malloc_varsize_clear(
+            return llop1.do_malloc_varsize(
                 llmemory.GCREF,
                 type_id, length, unicode_basesize, unicode_itemsize,
                 unicode_ofs_length)
diff --git a/rpython/jit/backend/llsupport/jitframe.py b/rpython/jit/backend/llsupport/jitframe.py
--- a/rpython/jit/backend/llsupport/jitframe.py
+++ b/rpython/jit/backend/llsupport/jitframe.py
@@ -45,8 +45,9 @@
 # detailed explanation how it is on your architecture
 
 def jitframe_allocate(frame_info):
-    frame = lltype.malloc(JITFRAME, frame_info.jfi_frame_depth, zero=True)
+    frame = lltype.malloc(JITFRAME, frame_info.jfi_frame_depth)
     frame.jf_frame_info = frame_info
+    frame.jf_extra_stack_depth = 0
     return frame
 
 def jitframe_resolve(frame):
diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -14,6 +14,7 @@
     get_call_descr, get_interiorfield_descr,
     FieldDescr, ArrayDescr, CallDescr, InteriorFieldDescr,
     FLAG_POINTER, FLAG_FLOAT)
+from rpython.jit.backend.llsupport.memcpy import memset_fn
 from rpython.jit.backend.llsupport.asmmemmgr import AsmMemoryManager
 from rpython.rlib.unroll import unrolling_iterable
 
@@ -607,6 +608,7 @@
 
     def bh_new_array(self, length, arraydescr):
         return self.gc_ll_descr.gc_malloc_array(length, arraydescr)
+    bh_new_array_clear = bh_new_array
 
     def bh_newstr(self, length):
         return self.gc_ll_descr.gc_malloc_str(length)
diff --git a/rpython/jit/backend/llsupport/memcpy.py b/rpython/jit/backend/llsupport/memcpy.py
--- a/rpython/jit/backend/llsupport/memcpy.py
+++ b/rpython/jit/backend/llsupport/memcpy.py
@@ -3,3 +3,6 @@
 memcpy_fn = rffi.llexternal('memcpy', [llmemory.Address, llmemory.Address,
                                        rffi.SIZE_T], lltype.Void,
                             sandboxsafe=True, _nowrapper=True)
+memset_fn = rffi.llexternal('memset', [llmemory.Address, rffi.INT,
+                                       rffi.SIZE_T], lltype.Void,
+                            sandboxsafe=True, _nowrapper=True)
diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -1,12 +1,13 @@
 from rpython.rlib import rgc
 from rpython.rlib.rarithmetic import ovfcheck
-from rpython.rtyper.lltypesystem import llmemory
+from rpython.rtyper.lltypesystem import llmemory, lltype
 from rpython.jit.metainterp import history
-from rpython.jit.metainterp.history import ConstInt, BoxPtr, ConstPtr
+from rpython.jit.metainterp.history import ConstInt, BoxPtr, ConstPtr, BoxInt
 from rpython.jit.metainterp.resoperation import ResOperation, rop
 from rpython.jit.codewriter import heaptracker
 from rpython.jit.backend.llsupport.symbolic import WORD
-from rpython.jit.backend.llsupport.descr import SizeDescr, ArrayDescr
+from rpython.jit.backend.llsupport.descr import SizeDescr, ArrayDescr,\
+     FLAG_POINTER
 from rpython.jit.metainterp.history import JitCellToken
 
 FLAG_ARRAY = 0
@@ -38,6 +39,7 @@
     _op_malloc_nursery = None
     _v_last_malloced_nursery = None
     c_zero = ConstInt(0)
+    c_null = ConstPtr(lltype.nullptr(llmemory.GCREF.TO))
 
     def __init__(self, gc_ll_descr, cpu):
         self.gc_ll_descr = gc_ll_descr
@@ -45,6 +47,8 @@
         self.newops = []
         self.known_lengths = {}
         self.write_barrier_applied = {}
+        self.delayed_zero_setfields = {}
+        self.delayed_zero_setarrayitems = {}
 
     def rewrite(self, operations):
         # we can only remember one malloc since the next malloc can possibly
@@ -60,6 +64,8 @@
             if op.is_malloc():
                 self.handle_malloc_operation(op)
                 continue
+            if op.is_guard():
+                self.emit_pending_zeros()
             elif op.can_malloc():
                 self.emitting_an_operation_that_can_collect()
             elif op.getopnum() == rop.LABEL:
@@ -68,6 +74,7 @@
             # ---------- write barriers ----------
             if self.gc_ll_descr.write_barrier_descr is not None:
                 if op.getopnum() == rop.SETFIELD_GC:
+                    self.consider_setfield_gc(op)
                     self.handle_write_barrier_setfield(op)
                     continue
                 if op.getopnum() == rop.SETINTERIORFIELD_GC:
@@ -76,10 +83,18 @@
                 if op.getopnum() == rop.SETARRAYITEM_GC:
                     self.handle_write_barrier_setarrayitem(op)
                     continue
+            else:
+                # this is dead code, but in case we have a gc that does
+                # not have a write barrier and does not zero memory, we would
+                # need to clal it
+                if op.getopnum() == rop.SETFIELD_GC:
+                    self.consider_setfield_gc(op)
             # ---------- call assembler -----------
             if op.getopnum() == rop.CALL_ASSEMBLER:
                 self.handle_call_assembler(op)
                 continue
+            if op.getopnum() == rop.JUMP or op.getopnum() == rop.FINISH:
+                self.emit_pending_zeros()
             #
             self.newops.append(op)
         return self.newops
@@ -99,7 +114,7 @@
                                   [op.result, ConstInt(classint)], None,
                                   descr=self.gc_ll_descr.fielddescr_vtable)
                 self.newops.append(op)
-        elif opnum == rop.NEW_ARRAY:
+        elif opnum == rop.NEW_ARRAY or opnum == rop.NEW_ARRAY_CLEAR:
             descr = op.getdescr()
             assert isinstance(descr, ArrayDescr)
             self.handle_new_array(descr, op)
@@ -112,6 +127,44 @@
         else:
             raise NotImplementedError(op.getopname())
 
+    def clear_gc_fields(self, descr, result):
+        if self.gc_ll_descr.malloc_zero_filled:
+            return
+        try:
+            d = self.delayed_zero_setfields[result]
+        except KeyError:
+            d = {}
+            self.delayed_zero_setfields[result] = d
+        for fielddescr in descr.gc_fielddescrs:
+            ofs = self.cpu.unpack_fielddescr(fielddescr)
+            d[ofs] = None
+
+    def consider_setfield_gc(self, op):
+        offset = self.cpu.unpack_fielddescr(op.getdescr())
+        try:
+            del self.delayed_zero_setfields[op.getarg(0)][offset]
+        except KeyError:
+            pass
+
+    def clear_varsize_gc_fields(self, kind, descr, result, v_length, opnum):
+        if self.gc_ll_descr.malloc_zero_filled:
+            return
+        if kind == FLAG_ARRAY:
+            if descr.is_array_of_structs() or descr.is_array_of_pointers():
+                assert opnum == rop.NEW_ARRAY_CLEAR
+            if opnum == rop.NEW_ARRAY_CLEAR:
+                self.handle_clear_array_contents(descr, result, v_length)
+            return
+        if kind == FLAG_STR:
+            hash_descr = self.gc_ll_descr.str_hash_descr
+        elif kind == FLAG_UNICODE:
+            hash_descr = self.gc_ll_descr.unicode_hash_descr
+        else:
+            return
+        op = ResOperation(rop.SETFIELD_GC, [result, self.c_zero], None,
+                          descr=hash_descr)
+        self.newops.append(op)
+
     def handle_new_fixedsize(self, descr, op):
         assert isinstance(descr, SizeDescr)
         size = descr.size
@@ -119,6 +172,7 @@
             self.gen_initialize_tid(op.result, descr.tid)
         else:
             self.gen_malloc_fixedsize(size, descr.tid, op.result)
+        self.clear_gc_fields(descr, op.result)
 
     def handle_new_array(self, arraydescr, op, kind=FLAG_ARRAY):
         v_length = op.getarg(0)
@@ -140,6 +194,8 @@
             # might end up being allocated by malloc_external or some
             # stuff that initializes GC header fields differently
             self.gen_initialize_len(op.result, v_length, arraydescr.lendescr)
+            self.clear_varsize_gc_fields(kind, op.getdescr(), op.result,
+                                         v_length, op.getopnum())
             return
         if (total_size >= 0 and
                 self.gen_malloc_nursery(total_size, op.result)):
@@ -149,7 +205,7 @@
             self.gen_boehm_malloc_array(arraydescr, v_length, op.result)
         else:
             opnum = op.getopnum()
-            if opnum == rop.NEW_ARRAY:
+            if opnum == rop.NEW_ARRAY or opnum == rop.NEW_ARRAY_CLEAR:
                 self.gen_malloc_array(arraydescr, v_length, op.result)
             elif opnum == rop.NEWSTR:
                 self.gen_malloc_str(v_length, op.result)
@@ -157,6 +213,21 @@
                 self.gen_malloc_unicode(v_length, op.result)
             else:
                 raise NotImplementedError(op.getopname())
+        self.clear_varsize_gc_fields(kind, op.getdescr(), op.result, v_length,
+                                     op.getopnum())
+
+    def handle_clear_array_contents(self, arraydescr, v_arr, v_length=None):
+        # XXX more work here to reduce or remove the ZERO_ARRAY in some cases
+        if v_length is None:
+            v_length = BoxInt()
+            o = ResOperation(rop.ARRAYLEN_GC, [v_arr], v_length,
+                             descr=arraydescr)
+            self.newops.append(o)
+        elif isinstance(v_length, ConstInt) and v_length.getint() == 0:
+            return
+        o = ResOperation(rop.ZERO_ARRAY, [v_arr, self.c_zero, v_length], None,
+                         descr=arraydescr)
+        self.newops.append(o)
 
     def gen_malloc_frame(self, frame_info, frame, size_box):
         descrs = self.gc_ll_descr.getframedescrs(self.cpu)
@@ -177,10 +248,25 @@
             self.gen_malloc_nursery_varsize_frame(size_box, frame)
             self.gen_initialize_tid(frame, descrs.arraydescr.tid)
             length_box = history.BoxInt()
-            op1 = ResOperation(rop.GETFIELD_GC, [history.ConstInt(frame_info)],
-                               length_box,
-                               descr=descrs.jfi_frame_depth)
-            self.newops.append(op1)
+            # we need to explicitely zero all the gc fields, because
+            # of the unusal malloc pattern
+            extra_ops = [
+                ResOperation(rop.GETFIELD_GC, [history.ConstInt(frame_info)],
+                             length_box, descr=descrs.jfi_frame_depth),
+                ResOperation(rop.SETFIELD_GC, [frame, self.c_zero],
+                             None, descr=descrs.jf_extra_stack_depth),
+                ResOperation(rop.SETFIELD_GC, [frame, self.c_null],
+                             None, descr=descrs.jf_savedata),
+                ResOperation(rop.SETFIELD_GC, [frame, self.c_null],
+                             None, descr=descrs.jf_force_descr),
+                ResOperation(rop.SETFIELD_GC, [frame, self.c_null],
+                             None, descr=descrs.jf_descr),
+                ResOperation(rop.SETFIELD_GC, [frame, self.c_null],
+                             None, descr=descrs.jf_guard_exc),
+                ResOperation(rop.SETFIELD_GC, [frame, self.c_null],
+                             None, descr=descrs.jf_forward),
+            ]
+            self.newops += extra_ops
             self.gen_initialize_len(frame, length_box,
                                     descrs.arraydescr.lendescr)
 
@@ -225,8 +311,17 @@
         # forgets the previous MALLOC_NURSERY, if any; and empty the
         # set 'write_barrier_applied', so that future SETFIELDs will generate
         # a write barrier as usual.
+        # it also writes down all the pending zero ptr fields
         self._op_malloc_nursery = None
         self.write_barrier_applied.clear()
+        self.emit_pending_zeros()
+
+    def emit_pending_zeros(self):
+        for v, d in self.delayed_zero_setfields.iteritems():
+            for ofs in d.iterkeys():
+                op = ResOperation(rop.ZERO_PTR_FIELD, [v, ConstInt(ofs)], None)
+                self.newops.append(op)
+        self.delayed_zero_setfields.clear()
 
     def _gen_call_malloc_gc(self, args, v_result, descr):
         """Generate a CALL_MALLOC_GC with the given args."""
@@ -338,7 +433,8 @@
 
     def gen_malloc_nursery(self, size, v_result):
         """Try to generate or update a CALL_MALLOC_NURSERY.
-        If that fails, generate a plain CALL_MALLOC_GC instead.
+        If that succeeds, return True; you still need to write the tid.
+        If that fails, return False.
         """
         size = self.round_up_for_allocation(size)
         if not self.gc_ll_descr.can_use_nursery_malloc(size):
diff --git a/rpython/jit/backend/llsupport/test/test_descr.py b/rpython/jit/backend/llsupport/test/test_descr.py
--- a/rpython/jit/backend/llsupport/test/test_descr.py
+++ b/rpython/jit/backend/llsupport/test/test_descr.py
@@ -19,6 +19,8 @@
     assert descr_t.size == symbolic.get_size(T, False)
     assert descr_s.count_fields_if_immutable() == -1
     assert descr_t.count_fields_if_immutable() == -1
+    assert descr_t.gc_fielddescrs == []
+    assert len(descr_s.gc_fielddescrs) == 1
     assert descr_s == get_size_descr(c0, S)
     assert descr_s != get_size_descr(c1, S)
     #
@@ -26,6 +28,11 @@
     assert isinstance(descr_s.size, Symbolic)
     assert descr_s.count_fields_if_immutable() == -1
 
+    PARENT = lltype.Struct('P', ('x', lltype.Ptr(T)))
+    STRUCT = lltype.GcStruct('S', ('parent', PARENT), ('y', lltype.Ptr(T)))
+    descr_struct = get_size_descr(c0, STRUCT)
+    assert len(descr_struct.gc_fielddescrs) == 2
+
 def test_get_size_descr_immut():
     S = lltype.GcStruct('S', hints={'immutable': True})
     T = lltype.GcStruct('T', ('parent', S),
diff --git a/rpython/jit/backend/llsupport/test/test_gc.py b/rpython/jit/backend/llsupport/test/test_gc.py
--- a/rpython/jit/backend/llsupport/test/test_gc.py
+++ b/rpython/jit/backend/llsupport/test/test_gc.py
@@ -59,7 +59,7 @@
         x += self.gcheaderbuilder.size_gc_header
         return x, tid
 
-    def do_malloc_fixedsize_clear(self, RESTYPE, type_id, size,
+    def do_malloc_fixedsize(self, RESTYPE, type_id, size,
                                   has_finalizer, has_light_finalizer,
                                   contains_weakptr):
         assert not contains_weakptr
@@ -70,7 +70,9 @@
         self.record.append(("fixedsize", repr(size), tid, p))
         return p
 
-    def do_malloc_varsize_clear(self, RESTYPE, type_id, length, size,
+    do_malloc_fixedsize_clear = do_malloc_fixedsize
+
+    def do_malloc_varsize(self, RESTYPE, type_id, length, size,
                                 itemsize, offset_to_length):
         p, tid = self._malloc(type_id, size + itemsize * length)
         (p + offset_to_length).signed[0] = length
@@ -80,6 +82,8 @@
                             repr(offset_to_length), p))
         return p
 
+    do_malloc_varsize_clear = do_malloc_varsize
+
     def _write_barrier_failing_case(self, adr_struct):
         self.record.append(('barrier', adr_struct))
 
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -69,6 +69,8 @@
         unicodedescr = self.gc_ll_descr.unicode_descr
         strlendescr     = strdescr.lendescr
         unicodelendescr = unicodedescr.lendescr
+        strhashdescr     = self.gc_ll_descr.str_hash_descr
+        unicodehashdescr = self.gc_ll_descr.unicode_hash_descr
 
         casmdescr = JitCellToken()
         clt = FakeLoopToken()
@@ -82,10 +84,15 @@
         jfi_frame_depth = framedescrs.jfi_frame_depth
         jfi_frame_size = framedescrs.jfi_frame_size
         jf_frame_info = framedescrs.jf_frame_info
+        jf_savedata = framedescrs.jf_savedata
+        jf_force_descr = framedescrs.jf_force_descr
+        jf_descr = framedescrs.jf_descr
+        jf_guard_exc = framedescrs.jf_guard_exc
+        jf_forward = framedescrs.jf_forward
+        jf_extra_stack_depth = framedescrs.jf_extra_stack_depth
         signedframedescr = self.cpu.signedframedescr
         floatframedescr = self.cpu.floatframedescr
         casmdescr.compiled_loop_token = clt
-        tzdescr = None # noone cares
         #
         namespace.update(locals())
         #
@@ -123,6 +130,9 @@
     def unpack_arraydescr_size(self, d):
         return 0, d.itemsize, 0
 
+    def unpack_fielddescr(self, d):
+        return d.offset
+
     def arraydescrof(self, ARRAY):
         try:
             return self._cache[ARRAY]
@@ -144,7 +154,7 @@
     def setup_method(self, meth):
         class FakeCPU(BaseFakeCPU):
             def sizeof(self, STRUCT):
-                return SizeDescrWithVTable(102)
+                return SizeDescrWithVTable(102, gc_fielddescrs=[])
         self.cpu = FakeCPU()
         self.gc_ll_descr = GcLLDescr_boehm(None, None, None)
 
@@ -277,10 +287,11 @@
                                                really_not_translated=True)
         self.gc_ll_descr.write_barrier_descr.has_write_barrier_from_array = (
             lambda cpu: True)
+        self.gc_ll_descr.malloc_zero_filled = False
         #
         class FakeCPU(BaseFakeCPU):
             def sizeof(self, STRUCT):
-                descr = SizeDescrWithVTable(104)
+                descr = SizeDescrWithVTable(104, gc_fielddescrs=[])
                 descr.tid = 9315
                 return descr
         self.cpu = FakeCPU()
@@ -313,6 +324,7 @@
             setfield_gc(p1, 5678, descr=tiddescr)
             p2 = int_add(p1, %(tdescr.size)d)
             setfield_gc(p2, 1234, descr=tiddescr)
+            zero_ptr_field(p1, %(tdescr.gc_fielddescrs[0].offset)s)
             jump()
         """)
 
@@ -422,6 +434,7 @@
         [i0]
         p0 = call_malloc_nursery_varsize(1, 1, i0, descr=strdescr)
         setfield_gc(p0, i0, descr=strlendescr)
+        setfield_gc(p0, 0, descr=strhashdescr)
         jump(i0)
         """)
 
@@ -545,15 +558,19 @@
                         unicodedescr.basesize + 10 * unicodedescr.itemsize)d)
             setfield_gc(p0, %(strdescr.tid)d, descr=tiddescr)
             setfield_gc(p0, 14, descr=strlendescr)
+            setfield_gc(p0, 0, descr=strhashdescr)
             p1 = int_add(p0, %(strdescr.basesize + 16 * strdescr.itemsize)d)
             setfield_gc(p1, %(unicodedescr.tid)d, descr=tiddescr)
             setfield_gc(p1, 10, descr=unicodelendescr)
+            setfield_gc(p1, 0, descr=unicodehashdescr)
             p2 = call_malloc_nursery_varsize(2, %(unicodedescr.itemsize)d, i2,\
                                 descr=unicodedescr)
             setfield_gc(p2, i2, descr=unicodelendescr)
+            setfield_gc(p2, 0, descr=unicodehashdescr)
             p3 = call_malloc_nursery_varsize(1, 1, i2, \
                                 descr=strdescr)
             setfield_gc(p3, i2, descr=strlendescr)
+            setfield_gc(p3, 0, descr=strhashdescr)
             jump()
         """)
 
@@ -587,7 +604,7 @@
         self.gc_ll_descr.max_size_of_young_obj = 2000
         self.check_rewrite("""
             [i2, p3]
-            p1 = new_array(129, descr=cdescr)
+            p1 = new_array_clear(129, descr=cdescr)
             call(123456)
             setarrayitem_gc(p1, i2, p3, descr=cdescr)
             jump()
@@ -597,6 +614,7 @@
                                 %(cdescr.basesize + 129 * cdescr.itemsize)d)
             setfield_gc(p1, 8111, descr=tiddescr)
             setfield_gc(p1, 129, descr=clendescr)
+            zero_array(p1, 0, 129, descr=cdescr)
             call(123456)
             cond_call_gc_wb(p1, descr=wbdescr)
             setarrayitem_gc(p1, i2, p3, descr=cdescr)
@@ -608,7 +626,7 @@
         self.gc_ll_descr.max_size_of_young_obj = 2000
         self.check_rewrite("""
             [i2, p3]
-            p1 = new_array(130, descr=cdescr)
+            p1 = new_array_clear(130, descr=cdescr)
             call(123456)
             setarrayitem_gc(p1, i2, p3, descr=cdescr)
             jump()
@@ -618,6 +636,7 @@
                                 %(cdescr.basesize + 130 * cdescr.itemsize)d)
             setfield_gc(p1, 8111, descr=tiddescr)
             setfield_gc(p1, 130, descr=clendescr)
+            zero_array(p1, 0, 130, descr=cdescr)
             call(123456)
             cond_call_gc_wb_array(p1, i2, descr=wbdescr)
             setarrayitem_gc(p1, i2, p3, descr=cdescr)
@@ -639,7 +658,7 @@
     def test_label_makes_size_unknown(self):
         self.check_rewrite("""
             [i2, p3]
-            p1 = new_array(5, descr=cdescr)
+            p1 = new_array_clear(5, descr=cdescr)
             label(p1, i2, p3)
             setarrayitem_gc(p1, i2, p3, descr=cdescr)
             jump()
@@ -649,6 +668,7 @@
                                 %(cdescr.basesize + 5 * cdescr.itemsize)d)
             setfield_gc(p1, 8111, descr=tiddescr)
             setfield_gc(p1, 5, descr=clendescr)
+            zero_array(p1, 0, 5, descr=cdescr)
             label(p1, i2, p3)
             cond_call_gc_wb_array(p1, i2, descr=wbdescr)
             setarrayitem_gc(p1, i2, p3, descr=cdescr)
@@ -709,7 +729,7 @@
     def test_initialization_store_array(self):
         self.check_rewrite("""
             [p1, i2]
-            p0 = new_array(5, descr=cdescr)
+            p0 = new_array_clear(5, descr=cdescr)
             setarrayitem_gc(p0, i2, p1, descr=cdescr)
             jump()
         """, """
@@ -718,6 +738,7 @@
                                 %(cdescr.basesize + 5 * cdescr.itemsize)d)
             setfield_gc(p0, 8111, descr=tiddescr)
             setfield_gc(p0, 5, descr=clendescr)
+            zero_array(p0, 0, 5, descr=cdescr)
             setarrayitem_gc(p0, i2, p1, descr=cdescr)
             jump()
         """)
@@ -751,9 +772,11 @@
             [i0]
             p0 = call_malloc_nursery(%(tdescr.size)d)
             setfield_gc(p0, 5678, descr=tiddescr)
+            zero_ptr_field(p0, %(tdescr.gc_fielddescrs[0].offset)s)
             p1 = call_malloc_nursery_varsize(1, 1, i0, \
                                 descr=strdescr)
             setfield_gc(p1, i0, descr=strlendescr)
+            setfield_gc(p1, 0, descr=strhashdescr)
             cond_call_gc_wb(p0, descr=wbdescr)
             setfield_gc(p0, p1, descr=tzdescr)
             jump()
@@ -770,6 +793,7 @@
             [p1]
             p0 = call_malloc_nursery(%(tdescr.size)d)
             setfield_gc(p0, 5678, descr=tiddescr)
+            zero_ptr_field(p0, %(tdescr.gc_fielddescrs[0].offset)s)
             label(p0, p1)
             cond_call_gc_wb(p0, descr=wbdescr)
             setfield_gc(p0, p1, descr=tzdescr)
@@ -800,6 +824,12 @@
         p1 = call_malloc_nursery_varsize_frame(i1)
         setfield_gc(p1, 0, descr=tiddescr)
         i2 = getfield_gc(ConstClass(frame_info), descr=jfi_frame_depth)
+        setfield_gc(p1, 0, descr=jf_extra_stack_depth)
+        setfield_gc(p1, NULL, descr=jf_savedata)
+        setfield_gc(p1, NULL, descr=jf_force_descr)
+        setfield_gc(p1, NULL, descr=jf_descr)
+        setfield_gc(p1, NULL, descr=jf_guard_exc)
+        setfield_gc(p1, NULL, descr=jf_forward)
         setfield_gc(p1, i2, descr=framelendescr)
         setfield_gc(p1, ConstClass(frame_info), descr=jf_frame_info)
         setarrayitem_gc(p1, 0, i0, descr=signedframedescr)
diff --git a/rpython/jit/backend/llsupport/test/zrpy_gc_test.py b/rpython/jit/backend/llsupport/test/zrpy_gc_test.py
--- a/rpython/jit/backend/llsupport/test/zrpy_gc_test.py
+++ b/rpython/jit/backend/llsupport/test/zrpy_gc_test.py
@@ -223,7 +223,7 @@
 ##        return None, f, None
 
     def define_compile_framework_1(cls):
-        # a moving GC.  Supports malloc_varsize_nonmovable.  Simple test, works
+        # a moving GC.  Simple test, works
         # without write_barriers and root stack enumeration.
         def f(n, x, *args):
             y = X()
diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -2036,6 +2036,14 @@
                                     'ref', descr=arraydescr)
         assert r1.value != r2.value
         a = lltype.cast_opaque_ptr(lltype.Ptr(A), r1.value)
+        assert len(a) == 342
+
+    def test_new_array_clear(self):
+        A = lltype.GcArray(lltype.Signed)
+        arraydescr = self.cpu.arraydescrof(A)
+        r1 = self.execute_operation(rop.NEW_ARRAY_CLEAR, [BoxInt(342)],
+                                    'ref', descr=arraydescr)
+        a = lltype.cast_opaque_ptr(lltype.Ptr(A), r1.value)
         assert a[0] == 0
         assert len(a) == 342
 
@@ -4272,9 +4280,6 @@
         fail = self.cpu.get_latest_descr(deadframe)
         assert fail.identifier == 23
         assert self.cpu.get_int_value(deadframe, 0) == 42
-        # make sure that force reads the registers from a zeroed piece of
-        # memory
-        assert values[0] == 0
 
     def test_compile_bridge_while_running(self):
         def func():
@@ -4442,3 +4447,99 @@
         res = self.execute_operation(rop.CAST_FLOAT_TO_SINGLEFLOAT,
                                    [boxfloat(12.5)], 'int')
         assert res.getint() == struct.unpack("I", struct.pack("f", 12.5))[0]
+
+    def test_zero_ptr_field(self):
+        from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
+        
+        if not isinstance(self.cpu, AbstractLLCPU):
+            py.test.skip("llgraph can't do zero_ptr_field")
+        T = lltype.GcStruct('T')
+        S = lltype.GcStruct('S', ('x', lltype.Ptr(T)))
+        tdescr = self.cpu.sizeof(T)
+        sdescr = self.cpu.sizeof(S)
+        fielddescr = self.cpu.fielddescrof(S, 'x')
+        loop = parse("""
+        []
+        p0 = new(descr=tdescr)
+        p1 = new(descr=sdescr)
+        setfield_gc(p1, p0, descr=fielddescr)
+        zero_ptr_field(p1, %d)
+        finish(p1)
+        """ % fielddescr.offset, namespace=locals())
+        looptoken = JitCellToken()
+        self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+        deadframe = self.cpu.execute_token(looptoken)
+        ref = self.cpu.get_ref_value(deadframe, 0)
+        s = lltype.cast_opaque_ptr(lltype.Ptr(S), ref)
+        assert not s.x
+
+    def test_zero_ptr_field_2(self):
+        from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
+
+        if not isinstance(self.cpu, AbstractLLCPU):
+            py.test.skip("llgraph does not do zero_ptr_field")
+        
+        from rpython.jit.backend.llsupport import symbolic
+        S = lltype.GcStruct('S', ('x', lltype.Signed),
+                                 ('p', llmemory.GCREF),
+                                 ('y', lltype.Signed))
+        s = lltype.malloc(S)
+        s.x = -1296321
+        s.y = -4398176
+        s_ref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
+        s.p = s_ref
+        ofs_p, _ = symbolic.get_field_token(S, 'p', False)
+        #
+        self.execute_operation(rop.ZERO_PTR_FIELD, [
+            BoxPtr(s_ref), ConstInt(ofs_p)],   # OK for now to assume that the
+            'void')                            # 2nd argument is a constant
+        #
+        assert s.x == -1296321
+        assert s.p == lltype.nullptr(llmemory.GCREF.TO)
+        assert s.y == -4398176
+
+    def test_zero_array(self):
+        from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
+
+        if not isinstance(self.cpu, AbstractLLCPU):
+            py.test.skip("llgraph does not do zero_array")
+        
+        PAIR = lltype.Struct('PAIR', ('a', lltype.Signed), ('b', lltype.Signed))
+        for OF in [lltype.Signed, rffi.INT, rffi.SHORT, rffi.UCHAR, PAIR]:
+            A = lltype.GcArray(OF)
+            arraydescr = self.cpu.arraydescrof(A)
+            a = lltype.malloc(A, 100)
+            addr = llmemory.cast_ptr_to_adr(a)
+            a_int = heaptracker.adr2int(addr)
+            a_ref = lltype.cast_opaque_ptr(llmemory.GCREF, a)
+            for (start, length) in [(0, 100), (49, 49), (1, 98),
+                                    (15, 9), (10, 10), (47, 0),
+                                    (0, 4)]:
+                for cls1 in [ConstInt, BoxInt]:
+                    for cls2 in [ConstInt, BoxInt]:
+                        print 'a_int:', a_int
+                        print 'of:', OF
+                        print 'start:', cls1.__name__, start
+                        print 'length:', cls2.__name__, length
+                        for i in range(100):
+                            if OF == PAIR:
+                                a[i].a = a[i].b = -123456789
+                            else:
+                                a[i] = rffi.cast(OF, -123456789)
+                        startbox = cls1(start)
+                        lengthbox = cls2(length)
+                        if cls1 == cls2 and start == length:
+                            lengthbox = startbox    # same box!
+                        self.execute_operation(rop.ZERO_ARRAY,
+                                               [BoxPtr(a_ref),
+                                                startbox,
+                                                lengthbox],
+                                           'void', descr=arraydescr)
+                        assert len(a) == 100
+                        for i in range(100):
+                            val = (0 if start <= i < start + length
+                                     else -123456789)
+                            if OF == PAIR:
+                                assert a[i].a == a[i].b == val
+                            else:
+                                assert a[i] == rffi.cast(OF, val)
diff --git a/rpython/jit/backend/test/test_ll_random.py b/rpython/jit/backend/test/test_ll_random.py
--- a/rpython/jit/backend/test/test_ll_random.py
+++ b/rpython/jit/backend/test/test_ll_random.py
@@ -95,7 +95,10 @@
             fields.append(('parent', rclass.OBJECT))
             kwds['hints'] = {'vtable': with_vtable._obj}
         for i in range(r.randrange(1, 5)):
-            TYPE = self.get_random_primitive_type(r)
+            if r.random() < 0.1:
+                TYPE = llmemory.GCREF
+            else:
+                TYPE = self.get_random_primitive_type(r)
             fields.append(('f%d' % i, TYPE))
         S = type('S%d' % self.counter, *fields, **kwds)
         self.counter += 1
@@ -246,13 +249,43 @@
             op = ResOperation(self.opnum, [v, c_vtable2], None)
             return op, False
 
+class ZeroPtrFieldOperation(test_random.AbstractOperation):
+    def field_descr(self, builder, r):
+        if getattr(builder.cpu, 'is_llgraph', False):
+            raise test_random.CannotProduceOperation
+        v, S = builder.get_structptr_var(r, )
+        names = S._names
+        if names[0] == 'parent':
+            names = names[1:]
+        choice = []
+        for name in names:
+            FIELD = getattr(S, name)
+            if isinstance(FIELD, lltype.Ptr) and FIELD._needsgc():
+                choice.append(name)
+        if not choice:
+            raise test_random.CannotProduceOperation
+        name = r.choice(choice)
+        descr = builder.cpu.fielddescrof(S, name)
+        return v, descr.offset
+
+    def produce_into(self, builder, r):
+        v, offset = self.field_descr(builder, r)
+        builder.do(self.opnum, [v, ConstInt(offset)], None)
+
 class GetFieldOperation(test_random.AbstractOperation):
     def field_descr(self, builder, r):
         v, S = builder.get_structptr_var(r, )
         names = S._names
         if names[0] == 'parent':
             names = names[1:]
-        name = r.choice(names)
+        choice = []
+        for name in names:
+            FIELD = getattr(S, name)
+            if not isinstance(FIELD, lltype.Ptr):
+                choice.append(name)
+        if not choice:
+            raise test_random.CannotProduceOperation
+        name = r.choice(choice)
         descr = builder.cpu.fielddescrof(S, name)
         descr._random_info = 'cpu.fielddescrof(..., %r)' % (name,)
         descr._random_type = S
@@ -274,7 +307,14 @@
                                          array_of_structs=True)
         array = v.getref(lltype.Ptr(A))
         v_index = builder.get_index(len(array), r)
-        name = r.choice(A.OF._names)
+        choice = []
+        for name in A.OF._names:
+            FIELD = getattr(A.OF, name)
+            if not isinstance(FIELD, lltype.Ptr):
+                choice.append(name)
+        if not choice:
+            raise test_random.CannotProduceOperation
+        name = r.choice(choice)
         descr = builder.cpu.interiorfielddescrof(A, name)
         descr._random_info = 'cpu.interiorfielddescrof(..., %r)' % (name,)
         descr._random_type = A
@@ -682,6 +722,7 @@
     OPERATIONS.append(GetFieldOperation(rop.GETFIELD_GC))
     OPERATIONS.append(GetInteriorFieldOperation(rop.GETINTERIORFIELD_GC))
     OPERATIONS.append(SetFieldOperation(rop.SETFIELD_GC))
+    OPERATIONS.append(ZeroPtrFieldOperation(rop.ZERO_PTR_FIELD))
     OPERATIONS.append(SetInteriorFieldOperation(rop.SETINTERIORFIELD_GC))
     OPERATIONS.append(NewOperation(rop.NEW))
     OPERATIONS.append(NewOperation(rop.NEW_WITH_VTABLE))
@@ -689,7 +730,7 @@
     OPERATIONS.append(GetArrayItemOperation(rop.GETARRAYITEM_GC))
     OPERATIONS.append(GetArrayItemOperation(rop.GETARRAYITEM_GC))
     OPERATIONS.append(SetArrayItemOperation(rop.SETARRAYITEM_GC))
-    OPERATIONS.append(NewArrayOperation(rop.NEW_ARRAY))
+    OPERATIONS.append(NewArrayOperation(rop.NEW_ARRAY_CLEAR))
     OPERATIONS.append(ArrayLenOperation(rop.ARRAYLEN_GC))
     OPERATIONS.append(NewStrOperation(rop.NEWSTR))
     OPERATIONS.append(NewUnicodeOperation(rop.NEWUNICODE))
diff --git a/rpython/jit/backend/test/test_random.py b/rpython/jit/backend/test/test_random.py
--- a/rpython/jit/backend/test/test_random.py
+++ b/rpython/jit/backend/test/test_random.py
@@ -52,10 +52,13 @@
 
     def do(self, opnum, argboxes, descr=None):
         self.fakemetainterp._got_exc = None
-        v_result = execute_nonspec(self.cpu, self.fakemetainterp,
-                                   opnum, argboxes, descr)
-        if isinstance(v_result, Const):
-            v_result = v_result.clonebox()
+        if opnum == rop.ZERO_PTR_FIELD:
+            v_result = None
+        else:
+            v_result = execute_nonspec(self.cpu, self.fakemetainterp,
+                                       opnum, argboxes, descr)
+            if isinstance(v_result, Const):
+                v_result = v_result.clonebox()
         self.loop.operations.append(ResOperation(opnum, argboxes, v_result,
                                                  descr))
         return v_result
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -1486,6 +1486,8 @@
         dest_addr = AddressLoc(base_loc, ofs_loc)
         self.save_into_mem(dest_addr, value_loc, size_loc)
 
+    genop_discard_zero_ptr_field = genop_discard_setfield_gc
+
     def genop_discard_setinteriorfield_gc(self, op, arglocs):
         (base_loc, ofs_loc, itemsize_loc, fieldsize_loc,
             index_loc, temp_loc, value_loc) = arglocs
@@ -2361,6 +2363,43 @@
             elif IS_X86_64:
                 mc.MOVSX32_rj(loc.value, addr)     # memory read, sign-extend
 
+    def genop_discard_zero_array(self, op, arglocs):
+        (base_loc, startindex_loc, bytes_loc,
+         itemsize_loc, baseofs_loc, null_loc) = arglocs
+        assert isinstance(bytes_loc, ImmedLoc)
+        assert isinstance(itemsize_loc, ImmedLoc)
+        assert isinstance(baseofs_loc, ImmedLoc)
+        assert isinstance(null_loc, RegLoc) and null_loc.is_xmm
+        baseofs = baseofs_loc.value
+        nbytes = bytes_loc.value
+        if valid_addressing_size(itemsize_loc.value):
+            scale = get_scale(itemsize_loc.value)
+        else:
+            assert isinstance(startindex_loc, ImmedLoc)
+            baseofs += startindex_loc.value * itemsize_loc.value
+            startindex_loc = imm0
+            scale = 0
+        null_reg_cleared = False
+        i = 0
+        while i < nbytes:
+            addr = addr_add(base_loc, startindex_loc, baseofs + i, scale)
+            current = nbytes - i
+            if current >= 16:
+                current = 16
+                if not null_reg_cleared:
+                    self.mc.XORPS_xx(null_loc.value, null_loc.value)
+                    null_reg_cleared = True
+                self.mc.MOVUPS(addr, null_loc)
+            else:
+                if current >= WORD:
+                    current = WORD
+                elif current >= 4:
+                    current = 4
+                elif current >= 2:
+                    current = 2
+                self.save_into_mem(addr, imm0, imm(current))
+            i += current
+
 
 genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST
 genop_list = [Assembler386.not_implemented_op] * rop._LAST
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -8,7 +8,8 @@
     unpack_arraydescr, unpack_fielddescr, unpack_interiorfielddescr)
 from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
 from rpython.jit.backend.llsupport.regalloc import (FrameManager, BaseRegalloc,
-     RegisterManager, TempBox, compute_vars_longevity, is_comparison_or_ovf_op)
+     RegisterManager, TempBox, compute_vars_longevity, is_comparison_or_ovf_op,
+     valid_addressing_size)
 from rpython.jit.backend.x86 import rx86
 from rpython.jit.backend.x86.arch import (WORD, JITFRAME_FIXED_SIZE, IS_X86_32,
     IS_X86_64)
@@ -958,6 +959,13 @@
                                               need_lower_byte=need_lower_byte)
         self.perform_discard(op, [base_loc, ofs_loc, size_loc, value_loc])
 
+    def consider_zero_ptr_field(self, op):
+        ofs_loc = imm(op.getarg(1).getint())
+        size_loc = imm(WORD)
+        base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), [])
+        value_loc = imm(0)
+        self.perform_discard(op, [base_loc, ofs_loc, size_loc, value_loc])
+
     consider_setfield_raw = consider_setfield_gc
 
     def consider_setinteriorfield_gc(self, op):
@@ -1376,6 +1384,70 @@
     def consider_keepalive(self, op):
         pass
 
+    def consider_zero_array(self, op):
+        itemsize, baseofs, _ = unpack_arraydescr(op.getdescr())
+        args = op.getarglist()
+        base_loc = self.rm.make_sure_var_in_reg(args[0], args)
+        startindex_loc = self.rm.make_sure_var_in_reg(args[1], args)
+        length_box = op.getarg(2)
+        if isinstance(length_box, ConstInt):
+            constbytes = length_box.getint() * itemsize
+        else:
+            constbytes = -1
+        if 0 <= constbytes <= 16 * 8 and (
+                valid_addressing_size(itemsize) or
+-               isinstance(startindex_loc, ImmedLoc)):
+            if IS_X86_64:
+                null_loc = X86_64_XMM_SCRATCH_REG
+            else:
+                null_box = TempBox()
+                null_loc = self.xrm.force_allocate_reg(null_box)
+                self.xrm.possibly_free_var(null_box)
+            self.perform_discard(op, [base_loc, startindex_loc,
+                                      imm(constbytes), imm(itemsize),
+                                      imm(baseofs), null_loc])
+        else:
+            # base_loc and startindex_loc are in two regs here (or they are
+            # immediates).  Compute the dstaddr_loc, which is the raw
+            # address that we will pass as first argument to memset().
+            # It can be in the same register as either one, but not in
+            # args[2], because we're still needing the latter.
+            dstaddr_box = TempBox()
+            dstaddr_loc = self.rm.force_allocate_reg(dstaddr_box, [args[2]])
+            itemsize_loc = imm(itemsize)
+            dst_addr = self.assembler._get_interiorfield_addr(
+                dstaddr_loc, startindex_loc, itemsize_loc,
+                base_loc, imm(baseofs))
+            self.assembler.mc.LEA(dstaddr_loc, dst_addr)
+            #
+            if constbytes >= 0:
+                length_loc = imm(constbytes)
+            else:
+                # load length_loc in a register different than dstaddr_loc
+                length_loc = self.rm.make_sure_var_in_reg(length_box,
+                                                          [dstaddr_box])
+                if itemsize > 1:
+                    # we need a register that is different from dstaddr_loc,
+                    # but which can be identical to length_loc (as usual,
+                    # only if the length_box is not used by future operations)
+                    bytes_box = TempBox()
+                    bytes_loc = self.rm.force_allocate_reg(bytes_box,
+                                                           [dstaddr_box])
+                    b_adr = self.assembler._get_interiorfield_addr(
+                        bytes_loc, length_loc, itemsize_loc, imm0, imm0)
+                    self.assembler.mc.LEA(bytes_loc, b_adr)
+                    length_box = bytes_box
+                    length_loc = bytes_loc
+            #
+            # call memset()
+            self.rm.before_call()
+            self.xrm.before_call()
+            self.assembler.simple_call_no_collect(
+                imm(self.assembler.memset_addr),
+                [dstaddr_loc, imm0, length_loc])
+            self.rm.possibly_free_var(length_box)
+            self.rm.possibly_free_var(dstaddr_box)
+
     def not_implemented_op(self, op):
         not_implemented("not implemented operation: %s" % op.getopname())
 
diff --git a/rpython/jit/backend/x86/regloc.py b/rpython/jit/backend/x86/regloc.py
--- a/rpython/jit/backend/x86/regloc.py
+++ b/rpython/jit/backend/x86/regloc.py
@@ -664,6 +664,7 @@
 
     MOVDQ = _binaryop('MOVDQ')
     MOVD32 = _binaryop('MOVD32')
+    MOVUPS = _binaryop('MOVUPS')
 
     CALL = _relative_unaryop('CALL')
     JMP = _relative_unaryop('JMP')
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -634,6 +634,9 @@
     MOVD32_xs = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_sp(2))
 
     PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b'))
+    MOVUPS_mx = xmminsn(rex_nw, '\x0F\x11', register(2, 8), mem_reg_plus_const(1))
+    MOVUPS_jx = xmminsn(rex_nw, '\x0F\x11', register(2, 8), abs_(1))
+    MOVUPS_ax = xmminsn(rex_nw, '\x0F\x11', register(2, 8), mem_reg_plus_scaled_reg_plus_const(1))
 
     # ------------------------------------------------------------
 
@@ -764,6 +767,7 @@
 define_modrm_modes('DIVSD_x*', ['\xF2', rex_nw, '\x0F\x5E', register(1, 8)], regtype='XMM')
 define_modrm_modes('UCOMISD_x*', ['\x66', rex_nw, '\x0F\x2E', register(1, 8)], regtype='XMM')
 define_modrm_modes('XORPD_x*', ['\x66', rex_nw, '\x0F\x57', register(1, 8)], regtype='XMM')
+define_modrm_modes('XORPS_x*', [rex_nw, '\x0F\x57', register(1, 8)], regtype='XMM')
 define_modrm_modes('ANDPD_x*', ['\x66', rex_nw, '\x0F\x54', register(1, 8)], regtype='XMM')
 
 def define_pxmm_insn(insnname_template, insn_char):
diff --git a/rpython/jit/codewriter/assembler.py b/rpython/jit/codewriter/assembler.py
--- a/rpython/jit/codewriter/assembler.py
+++ b/rpython/jit/codewriter/assembler.py
@@ -291,6 +291,7 @@
     'int_sub',
     'jit_merge_point',
     'new_array',
+    'new_array_clear',
     'newstr',
     'setarrayitem_gc_i',
     'setarrayitem_gc_r',
diff --git a/rpython/jit/codewriter/codewriter.py b/rpython/jit/codewriter/codewriter.py
--- a/rpython/jit/codewriter/codewriter.py
+++ b/rpython/jit/codewriter/codewriter.py
@@ -13,7 +13,7 @@
 
 class CodeWriter(object):
     callcontrol = None    # for tests
-    debug = False
+    debug = True
 
     def __init__(self, cpu=None, jitdrivers_sd=[]):
         self.cpu = cpu
diff --git a/rpython/jit/codewriter/heaptracker.py b/rpython/jit/codewriter/heaptracker.py
--- a/rpython/jit/codewriter/heaptracker.py
+++ b/rpython/jit/codewriter/heaptracker.py
@@ -125,3 +125,19 @@
     vtable = descr.as_vtable_size_descr()._corresponding_vtable
     vtable = llmemory.cast_ptr_to_adr(vtable)
     return adr2int(vtable)
+    
+def gc_fielddescrs(gccache, STRUCT, res=None):
+    from rpython.jit.backend.llsupport import descr
+
+    if res is None:
+        res = []
+    # order is not relevant, except for tests
+    for name in STRUCT._names:
+        FIELD = getattr(STRUCT, name)
+        if FIELD is lltype.Void:
+            continue
+        elif isinstance(FIELD, lltype.Struct):
+            gc_fielddescrs(gccache, FIELD, res)
+        elif isinstance(FIELD, lltype.Ptr) and FIELD._needsgc():
+            res.append(descr.get_field_descr(gccache, STRUCT, name))
+    return res
diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py
--- a/rpython/jit/codewriter/jtransform.py
+++ b/rpython/jit/codewriter/jtransform.py
@@ -612,8 +612,43 @@
             # XXX only strings or simple arrays for now
             ARRAY = op.args[0].value
             arraydescr = self.cpu.arraydescrof(ARRAY)
-            return SpaceOperation('new_array', [op.args[2], arraydescr],
-                                  op.result)
+            if op.args[1].value.get('zero', False):
+                opname = 'new_array_clear'
+            elif ((isinstance(ARRAY.OF, lltype.Ptr) and ARRAY.OF._needsgc()) or
+                  isinstance(ARRAY.OF, lltype.Struct)):
+                opname = 'new_array_clear'
+            else:
+                opname = 'new_array'
+            return SpaceOperation(opname, [op.args[2], arraydescr], op.result)
+
+    def zero_contents(self, ops, v, TYPE):
+        if isinstance(TYPE, lltype.Struct):
+            for name, FIELD in TYPE._flds.iteritems():
+                if isinstance(FIELD, lltype.Struct):
+                    # substruct
+                    self.zero_contents(ops, v, FIELD)
+                else:
+                    c_name = Constant(name, lltype.Void)
+                    c_null = Constant(FIELD._defl(), FIELD)
+                    op = SpaceOperation('setfield', [v, c_name, c_null],
+                                        None)
+                    self.extend_with(ops, self.rewrite_op_setfield(op,
+                                          override_type=TYPE))
+        elif isinstance(TYPE, lltype.Array):
+            assert False # this operation disappeared
+        else:
+            raise TypeError("Expected struct or array, got '%r'", (TYPE,))
+        if len(ops) == 1:
+            return ops[0]
+        return ops
+
+    def extend_with(self, l, ops):
+        if ops is None:
+            return
+        if isinstance(ops, list):
+            l.extend(ops)
+        else:
+            l.append(ops)
 
     def rewrite_op_free(self, op):
         d = op.args[1].value.copy()
@@ -759,13 +794,17 @@
                    op1]
         return op1
 
-    def rewrite_op_setfield(self, op):
+    def rewrite_op_setfield(self, op, override_type=None):
         if self.is_typeptr_getset(op):
             # ignore the operation completely -- instead, it's done by 'new'
             return
         # turn the flow graph 'setfield' operation into our own version
         [v_inst, c_fieldname, v_value] = op.args
         RESULT = v_value.concretetype
+        if override_type is not None:
+            TYPE = override_type
+        else:
+            TYPE = v_inst.concretetype.TO
         if RESULT is lltype.Void:
             return
         # check for virtualizable
@@ -775,10 +814,12 @@
             return [SpaceOperation('-live-', [], None),
                     SpaceOperation('setfield_vable_%s' % kind,
                                    [v_inst, v_value, descr], None)]
-        self.check_field_access(v_inst.concretetype.TO)
-        argname = getattr(v_inst.concretetype.TO, '_gckind', 'gc')
-        descr = self.cpu.fielddescrof(v_inst.concretetype.TO,
-                                      c_fieldname.value)
+        self.check_field_access(TYPE)
+        if override_type:
+            argname = 'gc'
+        else:
+            argname = getattr(TYPE, '_gckind', 'gc')
+        descr = self.cpu.fielddescrof(TYPE, c_fieldname.value)
         kind = getkind(RESULT)[0]
         if argname == 'raw' and kind == 'r':
             raise Exception("setfield_raw_r not supported")
@@ -860,7 +901,10 @@
         if op.args[1].value['flavor'] == 'raw':
             return self._rewrite_raw_malloc(op, 'raw_malloc_fixedsize', [])
         #
-        assert op.args[1].value == {'flavor': 'gc'}
+        if op.args[1].value.get('zero', False):
+            zero = True
+        else:
+            zero = False
         STRUCT = op.args[0].value
         vtable = heaptracker.get_vtable_for_gcstruct(self.cpu, STRUCT)
         if vtable:
@@ -881,7 +925,25 @@
         else:
             opname = 'new'
         sizedescr = self.cpu.sizeof(STRUCT)
-        return SpaceOperation(opname, [sizedescr], op.result)
+        op1 = SpaceOperation(opname, [sizedescr], op.result)
+        if zero:
+            return self.zero_contents([op1], op.result, STRUCT)
+        return op1
+
+    def _has_gcptrs_in(self, STRUCT):
+        if isinstance(STRUCT, lltype.Array):
+            ITEM = STRUCT.OF
+            if isinstance(ITEM, lltype.Struct):
+                STRUCT = ITEM
+            else:
+                return isinstance(ITEM, lltype.Ptr) and ITEM._needsgc()
+        for FIELD in STRUCT._flds.values():
+            if isinstance(FIELD, lltype.Ptr) and FIELD._needsgc():
+                return True
+            elif isinstance(FIELD, lltype.Struct):
+                if self._has_gcptrs_in(FIELD):
+                    return True
+        return False
 
     def rewrite_op_getinteriorarraysize(self, op):
         # only supports strings and unicodes
@@ -1606,7 +1668,13 @@
             v = Variable('new_length')
             v.concretetype = lltype.Signed
             ops.append(SpaceOperation('int_force_ge_zero', [v_length], v))
-        ops.append(SpaceOperation('new_array', [v, arraydescr], op.result))
+        ARRAY = op.result.concretetype.TO
+        if ((isinstance(ARRAY.OF, lltype.Ptr) and ARRAY.OF._needsgc()) or
+               isinstance(ARRAY.OF, lltype.Struct)):
+            opname = 'new_array_clear'
+        else:
+            opname = 'new_array'
+        ops.append(SpaceOperation(opname, [v, arraydescr], op.result))
         return ops
 
     def do_fixed_list_len(self, op, args, arraydescr):
diff --git a/rpython/jit/codewriter/test/test_jtransform.py b/rpython/jit/codewriter/test/test_jtransform.py
--- a/rpython/jit/codewriter/test/test_jtransform.py
+++ b/rpython/jit/codewriter/test/test_jtransform.py
@@ -529,6 +529,35 @@
     assert op1.opname == 'new'
     assert op1.args == [('sizedescr', S)]
 
+def test_malloc_new_zero_2():
+    S = lltype.GcStruct('S', ('x', lltype.Signed))
+    v = varoftype(lltype.Ptr(S))
+    op = SpaceOperation('malloc', [Constant(S, lltype.Void),
+                                   Constant({'flavor': 'gc',
+                                             'zero': True}, lltype.Void)], v)
+    op1, op2 = Transformer(FakeCPU()).rewrite_operation(op)
+    assert op1.opname == 'new'
+    assert op1.args == [('sizedescr', S)]
+    assert op2.opname == 'setfield_gc_i'
+    assert op2.args[0] == v
+
+def test_malloc_new_zero_nested():
+    S0 = lltype.GcStruct('S0')
+    S = lltype.Struct('S', ('x', lltype.Ptr(S0)))
+    S2 = lltype.GcStruct('S2', ('parent', S),
+                         ('xx', lltype.Ptr(S0)))
+    v = varoftype(lltype.Ptr(S2))
+    op = SpaceOperation('malloc', [Constant(S2, lltype.Void),
+                                   Constant({'flavor': 'gc',
+                                             'zero': True}, lltype.Void)], v)
+    op1, op2, op3 = Transformer(FakeCPU()).rewrite_operation(op)
+    assert op1.opname == 'new'
+    assert op1.args == [('sizedescr', S2)]
+    assert op2.opname == 'setfield_gc_r'
+    assert op2.args[0] == v
+    assert op3.opname == 'setfield_gc_r'
+    assert op3.args[0] == v
+
 def test_malloc_new_with_vtable():
     vtable = lltype.malloc(rclass.OBJECT_VTABLE, immortal=True)
     S = lltype.GcStruct('S', ('parent', rclass.OBJECT))
@@ -1026,6 +1055,15 @@
     assert op1.args == [v1]
     assert op1.result == v2
 
+def test_malloc_varsize_zero():
+    c_A = Constant(lltype.GcArray(lltype.Signed), lltype.Void)
+    v1 = varoftype(lltype.Signed)
+    v2 = varoftype(c_A.value)
+    c_flags = Constant({"flavor": "gc", "zero": True}, lltype.Void)
+    op = SpaceOperation('malloc_varsize', [c_A, c_flags, v1], v2)
+    op1 = Transformer(FakeCPU()).rewrite_operation(op)
+    assert op1.opname == 'new_array_clear'
+
 def test_str_concat():
     # test that the oopspec is present and correctly transformed
     PSTR = lltype.Ptr(rstr.STR)
diff --git a/rpython/jit/codewriter/test/test_list.py b/rpython/jit/codewriter/test/test_list.py
--- a/rpython/jit/codewriter/test/test_list.py
+++ b/rpython/jit/codewriter/test/test_list.py
@@ -11,6 +11,7 @@
 # ____________________________________________________________
 
 FIXEDLIST = lltype.Ptr(lltype.GcArray(lltype.Signed))
+FIXEDPTRLIST = lltype.Ptr(lltype.GcArray(FIXEDLIST))
 VARLIST = lltype.Ptr(lltype.GcStruct('VARLIST',
                                      ('length', lltype.Signed),
                                      ('items', FIXEDLIST),
@@ -100,6 +101,8 @@
     builtin_test('newlist', [Constant(5, lltype.Signed),
                              varoftype(lltype.Signed)], FIXEDLIST,
                  NotSupported)
+    builtin_test('newlist', [], FIXEDPTRLIST,
+                 """new_array_clear $0, <ArrayDescr> -> %r0""")
 
 def test_fixed_ll_arraycopy():
     builtin_test('list.ll_arraycopy',
diff --git a/rpython/jit/metainterp/blackhole.py b/rpython/jit/metainterp/blackhole.py
--- a/rpython/jit/metainterp/blackhole.py
+++ b/rpython/jit/metainterp/blackhole.py
@@ -1008,7 +1008,11 @@
                        itemsdescr, arraydescr):
         result = cpu.bh_new(structdescr)
         cpu.bh_setfield_gc_i(result, length, lengthdescr)
-        items = cpu.bh_new_array(length, arraydescr)
+        if (arraydescr.is_array_of_structs() or
+            arraydescr.is_array_of_pointers()):
+            items = cpu.bh_new_array_clear(length, arraydescr)
+        else:
+            items = cpu.bh_new_array(length, arraydescr)
         cpu.bh_setfield_gc_r(result, items, itemsdescr)
         return result
 
@@ -1017,7 +1021,11 @@
                             itemsdescr, arraydescr):
         result = cpu.bh_new(structdescr)
         cpu.bh_setfield_gc_i(result, 0, lengthdescr)
-        items = cpu.bh_new_array(lengthhint, arraydescr)
+        if (arraydescr.is_array_of_structs() or
+            arraydescr.is_array_of_pointers()):
+            items = cpu.bh_new_array_clear(lengthhint, arraydescr)
+        else:
+            items = cpu.bh_new_array(lengthhint, arraydescr)
         cpu.bh_setfield_gc_r(result, items, itemsdescr)
         return result
 
@@ -1153,6 +1161,10 @@
     def bhimpl_new_array(cpu, length, arraydescr):
         return cpu.bh_new_array(length, arraydescr)
 
+    @arguments("cpu", "i", "d", returns="r")
+    def bhimpl_new_array_clear(cpu, length, arraydescr):
+        return cpu.bh_new_array_clear(length, arraydescr)        
+
     @arguments("cpu", "r", "i", "d", returns="i")
     def bhimpl_getarrayitem_gc_i(cpu, array, index, arraydescr):
         return cpu.bh_getarrayitem_gc_i(array, index, arraydescr)
diff --git a/rpython/jit/metainterp/executor.py b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -325,6 +325,8 @@
                          rop.INCREMENT_DEBUG_COUNTER,
                          rop.COND_CALL_GC_WB,
                          rop.COND_CALL_GC_WB_ARRAY,
+                         rop.ZERO_PTR_FIELD,
+                         rop.ZERO_ARRAY,
                          rop.DEBUG_MERGE_POINT,
                          rop.JIT_DEBUG,
                          rop.SETARRAYITEM_RAW,
diff --git a/rpython/jit/metainterp/gc.py b/rpython/jit/metainterp/gc.py
--- a/rpython/jit/metainterp/gc.py
+++ b/rpython/jit/metainterp/gc.py
@@ -26,7 +26,7 @@
     malloc_zero_filled = True
 
 class GC_incminimark(GcDescription):
-    malloc_zero_filled = True
+    malloc_zero_filled = False
 
 
 def get_description(config):
diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py
--- a/rpython/jit/metainterp/optimizeopt/intbounds.py
+++ b/rpython/jit/metainterp/optimizeopt/intbounds.py
@@ -24,6 +24,8 @@
         return (1 << ((byte_size << 3) - 1)) - 1
 
 
+IS_64_BIT = sys.maxint > 2**32
+
 def next_pow2_m1(n):
     """Calculate next power of 2 greater than n minus one."""
     n |= n >> 1
@@ -31,7 +33,8 @@
     n |= n >> 4
     n |= n >> 8
     n |= n >> 16
-    n |= n >> 32
+    if IS_64_BIT:
+        n |= n >> 32
     return n
 
 
diff --git a/rpython/jit/metainterp/optimizeopt/optimizer.py b/rpython/jit/metainterp/optimizeopt/optimizer.py
--- a/rpython/jit/metainterp/optimizeopt/optimizer.py
+++ b/rpython/jit/metainterp/optimizeopt/optimizer.py
@@ -257,6 +257,9 @@
     def setinteriorfield(self, index, ofs, value):
         raise NotImplementedError
 
+    def get_missing_null_value(self):
+        raise NotImplementedError    # only for VArrayValue
+
 
 class ConstantValue(OptValue):
     def __init__(self, box):
diff --git a/rpython/jit/metainterp/optimizeopt/rewrite.py b/rpython/jit/metainterp/optimizeopt/rewrite.py
--- a/rpython/jit/metainterp/optimizeopt/rewrite.py
+++ b/rpython/jit/metainterp/optimizeopt/rewrite.py
@@ -502,6 +502,8 @@
                                        descr=arraydescr)
                     self.optimizer.send_extra_operation(newop)
                     val = self.getvalue(resbox)
+                if val is None:
+                    continue
                 if dest_value.is_virtual():
                     dest_value.setitem(index + dest_start, val)
                 else:
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_multilabel.py b/rpython/jit/metainterp/optimizeopt/test/test_multilabel.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_multilabel.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_multilabel.py
@@ -150,10 +150,10 @@
     def test_nonmatching_arraystruct_1(self):
         ops = """
         [p1, f0]
-        p2 = new_array(3, descr=complexarraydescr)
+        p2 = new_array_clear(3, descr=complexarraydescr)
         setinteriorfield_gc(p2, 2, f0, descr=complexrealdescr)
         label(p2, f0)
-        p4 = new_array(3, descr=complexarraydescr)
+        p4 = new_array_clear(3, descr=complexarraydescr)
         setinteriorfield_gc(p4, 2, f0, descr=compleximagdescr)
         jump(p4, f0)
         """
@@ -163,10 +163,10 @@
     def test_nonmatching_arraystruct_2(self):
         ops = """
         [p1, f0]
-        p2 = new_array(3, descr=complexarraydescr)
+        p2 = new_array_clear(3, descr=complexarraydescr)
         setinteriorfield_gc(p2, 2, f0, descr=complexrealdescr)
         label(p2, f0)
-        p4 = new_array(2, descr=complexarraydescr)
+        p4 = new_array_clear(2, descr=complexarraydescr)
         setinteriorfield_gc(p4, 0, f0, descr=complexrealdescr)
         jump(p4, f0)
         """
@@ -198,7 +198,7 @@
     def test_not_virtual_arraystruct(self):
         ops = """
         [p1]
-        p3 = new_array(3, descr=complexarraydescr)
+        p3 = new_array_clear(3, descr=complexarraydescr)
         label(p3)
         p4 = escape()
         jump(p4)
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -940,7 +940,7 @@
     def test_virtual_array_of_struct(self):
         ops = """
         [f0, f1, f2, f3]
-        p0 = new_array(2, descr=complexarraydescr)
+        p0 = new_array_clear(2, descr=complexarraydescr)
         setinteriorfield_gc(p0, 0, f1, descr=compleximagdescr)
         setinteriorfield_gc(p0, 0, f0, descr=complexrealdescr)
         setinteriorfield_gc(p0, 1, f3, descr=compleximagdescr)
@@ -966,7 +966,7 @@
     def test_virtual_array_of_struct_forced(self):
         ops = """
         [f0, f1]
-        p0 = new_array(1, descr=complexarraydescr)
+        p0 = new_array_clear(1, descr=complexarraydescr)
         setinteriorfield_gc(p0, 0, f0, descr=complexrealdescr)
         setinteriorfield_gc(p0, 0, f1, descr=compleximagdescr)
         f2 = getinteriorfield_gc(p0, 0, descr=complexrealdescr)
@@ -978,7 +978,7 @@
         expected = """
         [f0, f1]
         f2 = float_mul(f0, f1)
-        p0 = new_array(1, descr=complexarraydescr)
+        p0 = new_array_clear(1, descr=complexarraydescr)
         setinteriorfield_gc(p0, 0, f1, descr=compleximagdescr)
         setinteriorfield_gc(p0, 0, f0, descr=complexrealdescr)
         i0 = escape(f2, p0)
@@ -989,7 +989,7 @@
     def test_virtual_array_of_struct_len(self):
         ops = """
         []
-        p0 = new_array(2, descr=complexarraydescr)
+        p0 = new_array_clear(2, descr=complexarraydescr)
         i0 = arraylen_gc(p0)
         finish(i0)
         """
@@ -1056,7 +1056,7 @@
         """
         self.optimize_loop(ops, expected)
 
-    def test_nonvirtual_dont_write_null_fields_on_force(self):
+    def test_nonvirtual_write_null_fields_on_force(self):
         ops = """
         [i]
         p1 = new_with_vtable(ConstClass(node_vtable))
@@ -1070,6 +1070,7 @@
         expected = """
         [i]
         p1 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p1, 0, descr=valuedescr)
         escape(p1)
         i2 = getfield_gc(p1, descr=valuedescr)
         jump(i2)
@@ -1134,12 +1135,12 @@
         [i1]
         p1 = new_array(2, descr=arraydescr)
         setarrayitem_gc(p1, 0, 25, descr=arraydescr)
-        i2 = getarrayitem_gc(p1, 1, descr=arraydescr)
+        i2 = getarrayitem_gc(p1, 0, descr=arraydescr)
         jump(i2)
         """
         expected = """
         [i1]
-        jump(0)
+        jump(25)
         """
         self.optimize_loop(ops, expected)
 
@@ -1176,7 +1177,7 @@
         """
         self.optimize_loop(ops, expected)
 
-    def test_nonvirtual_array_dont_write_null_fields_on_force(self):
+    def test_nonvirtual_array_write_null_fields_on_force(self):
         ops = """
         [i1]
         p1 = new_array(5, descr=arraydescr)
@@ -1189,6 +1190,7 @@
         [i1]
         p1 = new_array(5, descr=arraydescr)
         setarrayitem_gc(p1, 0, i1, descr=arraydescr)
+        setarrayitem_gc(p1, 1, 0, descr=arraydescr)
         escape(p1)
         jump(i1)
         """
@@ -2975,6 +2977,7 @@
         [p1]
         p0 = force_token()
         p2 = new_with_vtable(ConstClass(jit_virtual_ref_vtable))
+        setfield_gc(p2, NULL, descr=virtualforceddescr)
         setfield_gc(p2, p0, descr=virtualtokendescr)
         escape(p2)
         setfield_gc(p2, p1, descr=virtualforceddescr)
@@ -3007,6 +3010,7 @@
         p3 = force_token()
         #
         p2 = new_with_vtable(ConstClass(jit_virtual_ref_vtable))
+        setfield_gc(p2, NULL, descr=virtualforceddescr)
         setfield_gc(p2, p3, descr=virtualtokendescr)
         setfield_gc(p0, p2, descr=nextdescr)
         #
@@ -3046,6 +3050,7 @@
         p3 = force_token()
         #
         p2 = new_with_vtable(ConstClass(jit_virtual_ref_vtable))
+        setfield_gc(p2, NULL, descr=virtualforceddescr)
         setfield_gc(p2, p3, descr=virtualtokendescr)
         setfield_gc(p0, p2, descr=nextdescr)
         #
@@ -3122,6 +3127,7 @@
         [i1]
         p3 = force_token()
         p2 = new_with_vtable(ConstClass(jit_virtual_ref_vtable))
+        setfield_gc(p2, NULL, descr=virtualforceddescr)
         setfield_gc(p2, p3, descr=virtualtokendescr)
         escape(p2)
         p1 = new_with_vtable(ConstClass(node_vtable))
@@ -3147,6 +3153,7 @@
         [i1, p1]
         p3 = force_token()
         p2 = new_with_vtable(ConstClass(jit_virtual_ref_vtable))
+        setfield_gc(p2, NULL, descr=virtualforceddescr)
         setfield_gc(p2, p3, descr=virtualtokendescr)
         escape(p2)
         setfield_gc(p2, p1, descr=virtualforceddescr)
@@ -4784,15 +4791,18 @@
         ops = """
         [p0]
         p1 = newstr(4)
+        strsetitem(p1, 2, 0)
         setfield_gc(p0, p1, descr=valuedescr)
         jump(p0)
         """
-        # It used to be the case that this would have a series of
-        # strsetitem(p1, idx, 0), which was silly because memory is 0 filled
-        # when allocated.
+        # This test is slightly bogus: the string is not fully initialized.
+        # I *think* it is still right to not have a series of extra
+        # strsetitem(p1, idx, 0).  We do preserve the single one from the
+        # source, though.
         expected = """
         [p0]
         p1 = newstr(4)
+        strsetitem(p1, 2, 0)
         setfield_gc(p0, p1, descr=valuedescr)
         jump(p0)
         """
@@ -5108,6 +5118,9 @@
         strsetitem(p1, 6, i0)
         strsetitem(p1, 7, i0)
         strsetitem(p1, 8, 3)
+        strsetitem(p1, 9, 0)
+        strsetitem(p1, 10, 0)
+        strsetitem(p1, 11, 0)
         finish(p1)
         """
         self.optimize_strunicode_loop(ops, expected)
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -1397,7 +1397,7 @@
         """
         self.optimize_loop(ops, expected)
 
-    def test_nonvirtual_dont_write_null_fields_on_force(self):
+    def test_nonvirtual_write_null_fields_on_force(self):
         ops = """
         [i]
         p1 = new_with_vtable(ConstClass(node_vtable))
@@ -1411,6 +1411,7 @@
         expected = """
         [i]
         p1 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(p1, 0, descr=valuedescr)
         escape(p1)
         i2 = getfield_gc(p1, descr=valuedescr)
         jump(i2)
@@ -1562,7 +1563,7 @@
         [i1]
         p1 = new_array(2, descr=arraydescr)
         setarrayitem_gc(p1, 0, 25, descr=arraydescr)
-        i2 = getarrayitem_gc(p1, 1, descr=arraydescr)
+        i2 = getarrayitem_gc(p1, 0, descr=arraydescr)
         jump(i2)
         """
         preamble = """
@@ -1608,7 +1609,7 @@
         """
         self.optimize_loop(ops, expected)
 
-    def test_nonvirtual_array_dont_write_null_fields_on_force(self):
+    def test_nonvirtual_array_write_null_fields_on_force(self):
         ops = """
         [i1]
         p1 = new_array(5, descr=arraydescr)
@@ -1621,6 +1622,7 @@
         [i1]
         p1 = new_array(5, descr=arraydescr)
         setarrayitem_gc(p1, 0, i1, descr=arraydescr)
+        setarrayitem_gc(p1, 1, 0, descr=arraydescr)
         escape(p1)
         jump(i1)
         """
@@ -3749,6 +3751,7 @@
         [p1]
         p0 = force_token()
         p2 = new_with_vtable(ConstClass(jit_virtual_ref_vtable))
+        setfield_gc(p2, NULL, descr=virtualforceddescr)
         setfield_gc(p2, p0, descr=virtualtokendescr)
         escape(p2)
         setfield_gc(p2, p1, descr=virtualforceddescr)
@@ -3781,6 +3784,7 @@
         p3 = force_token()
         #
         p2 = new_with_vtable(ConstClass(jit_virtual_ref_vtable))
+        setfield_gc(p2, NULL, descr=virtualforceddescr)
         setfield_gc(p2, p3, descr=virtualtokendescr)
         setfield_gc(p0, p2, descr=nextdescr)
         #
@@ -3820,6 +3824,7 @@
         p3 = force_token()
         #
         p2 = new_with_vtable(ConstClass(jit_virtual_ref_vtable))
+        setfield_gc(p2, NULL, descr=virtualforceddescr)
         setfield_gc(p2, p3, descr=virtualtokendescr)
         setfield_gc(p0, p2, descr=nextdescr)
         #
@@ -3907,6 +3912,7 @@


More information about the pypy-commit mailing list