[pypy-svn] r77403 - in pypy/trunk/pypy: jit/backend/llgraph jit/backend/llsupport jit/backend/llsupport/test jit/backend/test jit/backend/x86 jit/metainterp rlib rpython/lltypesystem rpython/memory rpython/memory/gc rpython/memory/gc/test rpython/memory/gctransform rpython/memory/test translator/c/test

arigo at codespeak.net arigo at codespeak.net
Mon Sep 27 13:43:34 CEST 2010


Author: arigo
Date: Mon Sep 27 13:43:31 2010
New Revision: 77403

Added:
   pypy/trunk/pypy/rpython/memory/gc/inspector.py
      - copied unchanged from r77400, pypy/branch/smaller-writebarrier/pypy/rpython/memory/gc/inspector.py
Removed:
   pypy/trunk/pypy/rpython/memory/gc/inspect.py
Modified:
   pypy/trunk/pypy/jit/backend/llgraph/llimpl.py
   pypy/trunk/pypy/jit/backend/llsupport/gc.py
   pypy/trunk/pypy/jit/backend/llsupport/test/test_gc.py
   pypy/trunk/pypy/jit/backend/test/runner_test.py
   pypy/trunk/pypy/jit/backend/x86/assembler.py
   pypy/trunk/pypy/jit/backend/x86/regalloc.py
   pypy/trunk/pypy/jit/metainterp/resoperation.py
   pypy/trunk/pypy/rlib/rstring.py
   pypy/trunk/pypy/rpython/lltypesystem/llarena.py
   pypy/trunk/pypy/rpython/memory/gc/base.py
   pypy/trunk/pypy/rpython/memory/gc/generation.py
   pypy/trunk/pypy/rpython/memory/gc/minimark.py
   pypy/trunk/pypy/rpython/memory/gc/test/test_direct.py
   pypy/trunk/pypy/rpython/memory/gc/test/test_minimark.py
   pypy/trunk/pypy/rpython/memory/gctransform/framework.py
   pypy/trunk/pypy/rpython/memory/gcwrapper.py
   pypy/trunk/pypy/rpython/memory/test/test_gc.py
   pypy/trunk/pypy/rpython/memory/test/test_transformed_gc.py
   pypy/trunk/pypy/translator/c/test/test_lltyped.py
Log:
Merge branch/smaller-writebarrier.  Reduce the number of arguments
passed to the write barrier, and tweak the minimark GC a bit more.


Modified: pypy/trunk/pypy/jit/backend/llgraph/llimpl.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/llgraph/llimpl.py	(original)
+++ pypy/trunk/pypy/jit/backend/llgraph/llimpl.py	Mon Sep 27 13:43:31 2010
@@ -129,7 +129,7 @@
     'arraylen_gc'     : (('ref',), 'int'),
     'call'            : (('ref', 'varargs'), 'intorptr'),
     'call_assembler'  : (('varargs',), 'intorptr'),
-    'cond_call_gc_wb' : (('ptr', 'ptr'), None),
+    'cond_call_gc_wb' : (('ptr',), None),
     'oosend'          : (('varargs',), 'intorptr'),
     'oosend_pure'     : (('varargs',), 'intorptr'),
     'guard_true'      : (('bool',), None),
@@ -810,7 +810,7 @@
                  FLOAT: 0.0}
             return d[calldescr.typeinfo]
 
-    def op_cond_call_gc_wb(self, descr, a, b):
+    def op_cond_call_gc_wb(self, descr, a):
         py.test.skip("cond_call_gc_wb not supported")
 
     def op_oosend(self, descr, obj, *args):

Modified: pypy/trunk/pypy/jit/backend/llsupport/gc.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/llsupport/gc.py	(original)
+++ pypy/trunk/pypy/jit/backend/llsupport/gc.py	Mon Sep 27 13:43:31 2010
@@ -404,7 +404,7 @@
         self.GC_MALLOC_BASIC = lltype.Ptr(lltype.FuncType(
             [lltype.Signed, lltype.Signed], llmemory.GCREF))
         self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType(
-            [llmemory.Address, llmemory.Address], lltype.Void))
+            [llmemory.Address], lltype.Void))
         self.write_barrier_descr = WriteBarrierDescr(self)
         #
         def malloc_array(itemsize, tid, num_elem):
@@ -550,8 +550,7 @@
             # the GC, and call it immediately
             llop1 = self.llop1
             funcptr = llop1.get_write_barrier_failing_case(self.WB_FUNCPTR)
-            funcptr(llmemory.cast_ptr_to_adr(gcref_struct),
-                    llmemory.cast_ptr_to_adr(gcref_newptr))
+            funcptr(llmemory.cast_ptr_to_adr(gcref_struct))
 
     def rewrite_assembler(self, cpu, operations):
         # Perform two kinds of rewrites in parallel:
@@ -590,22 +589,24 @@
                 v = op.getarg(1)
                 if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
                                              bool(v.value)): # store a non-NULL
-                    self._gen_write_barrier(newops, op.getarg(0), v)
+                    self._gen_write_barrier(newops, op.getarg(0))
                     op = op.copy_and_change(rop.SETFIELD_RAW)
             # ---------- write barrier for SETARRAYITEM_GC ----------
             if op.getopnum() == rop.SETARRAYITEM_GC:
                 v = op.getarg(2)
                 if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
                                              bool(v.value)): # store a non-NULL
-                    self._gen_write_barrier(newops, op.getarg(0), v)
+                    # XXX detect when we should produce a
+                    # write_barrier_from_array
+                    self._gen_write_barrier(newops, op.getarg(0))
                     op = op.copy_and_change(rop.SETARRAYITEM_RAW)
             # ----------
             newops.append(op)
         del operations[:]
         operations.extend(newops)
 
-    def _gen_write_barrier(self, newops, v_base, v_value):
-        args = [v_base, v_value]
+    def _gen_write_barrier(self, newops, v_base):
+        args = [v_base]
         newops.append(ResOperation(rop.COND_CALL_GC_WB, args, None,
                                    descr=self.write_barrier_descr))
 

Modified: pypy/trunk/pypy/jit/backend/llsupport/test/test_gc.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/llsupport/test/test_gc.py	(original)
+++ pypy/trunk/pypy/jit/backend/llsupport/test/test_gc.py	Mon Sep 27 13:43:31 2010
@@ -141,8 +141,8 @@
                             repr(offset_to_length), p))
         return p
 
-    def _write_barrier_failing_case(self, adr_struct, adr_newptr):
-        self.record.append(('barrier', adr_struct, adr_newptr))
+    def _write_barrier_failing_case(self, adr_struct):
+        self.record.append(('barrier', adr_struct))
 
     def get_write_barrier_failing_case(self, FPTRTYPE):
         return llhelper(FPTRTYPE, self._write_barrier_failing_case)
@@ -238,7 +238,6 @@
         s_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
         r_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, r)
         s_adr = llmemory.cast_ptr_to_adr(s)
-        r_adr = llmemory.cast_ptr_to_adr(r)
         #
         s_hdr.tid &= ~gc_ll_descr.GCClass.JIT_WB_IF_FLAG
         gc_ll_descr.do_write_barrier(s_gcref, r_gcref)
@@ -246,7 +245,7 @@
         #
         s_hdr.tid |= gc_ll_descr.GCClass.JIT_WB_IF_FLAG
         gc_ll_descr.do_write_barrier(s_gcref, r_gcref)
-        assert self.llop1.record == [('barrier', s_adr, r_adr)]
+        assert self.llop1.record == [('barrier', s_adr)]
 
     def test_gen_write_barrier(self):
         gc_ll_descr = self.gc_ll_descr
@@ -254,13 +253,11 @@
         #
         newops = []
         v_base = BoxPtr()
-        v_value = BoxPtr()
-        gc_ll_descr._gen_write_barrier(newops, v_base, v_value)
+        gc_ll_descr._gen_write_barrier(newops, v_base)
         assert llop1.record == []
         assert len(newops) == 1
         assert newops[0].getopnum() == rop.COND_CALL_GC_WB
         assert newops[0].getarg(0) == v_base
-        assert newops[0].getarg(1) == v_value
         assert newops[0].result is None
         wbdescr = newops[0].getdescr()
         assert isinstance(wbdescr.jit_wb_if_flag, int)
@@ -360,7 +357,6 @@
         #
         assert operations[0].getopnum() == rop.COND_CALL_GC_WB
         assert operations[0].getarg(0) == v_base
-        assert operations[0].getarg(1) == v_value
         assert operations[0].result is None
         #
         assert operations[1].getopnum() == rop.SETFIELD_RAW
@@ -384,7 +380,6 @@
         #
         assert operations[0].getopnum() == rop.COND_CALL_GC_WB
         assert operations[0].getarg(0) == v_base
-        assert operations[0].getarg(1) == v_value
         assert operations[0].result is None
         #
         assert operations[1].getopnum() == rop.SETARRAYITEM_RAW

Modified: pypy/trunk/pypy/jit/backend/test/runner_test.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/test/runner_test.py	(original)
+++ pypy/trunk/pypy/jit/backend/test/runner_test.py	Mon Sep 27 13:43:31 2010
@@ -1406,12 +1406,12 @@
         assert not excvalue
 
     def test_cond_call_gc_wb(self):
-        def func_void(a, b):
-            record.append((a, b))
+        def func_void(a):
+            record.append(a)
         record = []
         #
         S = lltype.GcStruct('S', ('tid', lltype.Signed))
-        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed], lltype.Void)
+        FUNC = self.FuncType([lltype.Ptr(S)], lltype.Void)
         func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
         funcbox = self.get_funcbox(self.cpu, func_ptr)
         class WriteBarrierDescr(AbstractDescr):
@@ -1432,10 +1432,10 @@
             sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
             del record[:]
             self.execute_operation(rop.COND_CALL_GC_WB,
-                                   [BoxPtr(sgcref), ConstInt(-2121)],
+                                   [BoxPtr(sgcref)],
                                    'void', descr=WriteBarrierDescr())
             if cond:
-                assert record == [(s, -2121)]
+                assert record == [s]
             else:
                 assert record == []
 

Modified: pypy/trunk/pypy/jit/backend/x86/assembler.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/assembler.py	(original)
+++ pypy/trunk/pypy/jit/backend/x86/assembler.py	Mon Sep 27 13:43:31 2010
@@ -1765,6 +1765,7 @@
         jz_location = self.mc.get_relative_pos()
         # the following is supposed to be the slow path, so whenever possible
         # we choose the most compact encoding over the most efficient one.
+        # XXX improve a bit, particularly for IS_X86_64.
         for i in range(len(arglocs)-1, -1, -1):
             loc = arglocs[i]
             if isinstance(loc, RegLoc):
@@ -1777,12 +1778,11 @@
                     self.mc.PUSH_i32(loc.getint())
         
         if IS_X86_64:
-            # We clobber these registers to pass the arguments, but that's
+            # We clobber this register to pass the arguments, but that's
             # okay, because consider_cond_call_gc_wb makes sure that any
             # caller-save registers with values in them are present in arglocs,
             # so they are saved on the stack above and restored below 
             self.mc.MOV_rs(edi.value, 0)
-            self.mc.MOV_rs(esi.value, 8)
 
         # misaligned stack in the call, but it's ok because the write barrier
         # is not going to call anything more.  Also, this assumes that the

Modified: pypy/trunk/pypy/jit/backend/x86/regalloc.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/regalloc.py	(original)
+++ pypy/trunk/pypy/jit/backend/x86/regalloc.py	Mon Sep 27 13:43:31 2010
@@ -696,13 +696,9 @@
     def consider_cond_call_gc_wb(self, op):
         assert op.result is None
         args = op.getarglist()
-        loc_newvalue = self.rm.make_sure_var_in_reg(op.getarg(1), args)
-        # ^^^ we force loc_newvalue in a reg (unless it's a Const),
-        # because it will be needed anyway by the following setfield_gc.
-        # It avoids loading it twice from the memory.
         loc_base = self.rm.make_sure_var_in_reg(op.getarg(0), args,
                                                 imm_fine=False)
-        arglocs = [loc_base, loc_newvalue]
+        arglocs = [loc_base]
         # add eax, ecx and edx as extra "arguments" to ensure they are
         # saved and restored.  Fish in self.rm to know which of these
         # registers really need to be saved (a bit of a hack).  Moreover,

Modified: pypy/trunk/pypy/jit/metainterp/resoperation.py
==============================================================================
--- pypy/trunk/pypy/jit/metainterp/resoperation.py	(original)
+++ pypy/trunk/pypy/jit/metainterp/resoperation.py	Mon Sep 27 13:43:31 2010
@@ -456,7 +456,7 @@
     'UNICODESETITEM/3',
     'NEWUNICODE/1',
     #'RUNTIMENEW/1',     # ootype operation    
-    'COND_CALL_GC_WB/2d', # [objptr, newvalue]   (for the write barrier)
+    'COND_CALL_GC_WB/1d',  # [objptr]   (for the write barrier)
     'DEBUG_MERGE_POINT/1',      # debugging only
     'VIRTUAL_REF_FINISH/2',   # removed before it's passed to the backend
 

Modified: pypy/trunk/pypy/rlib/rstring.py
==============================================================================
--- pypy/trunk/pypy/rlib/rstring.py	(original)
+++ pypy/trunk/pypy/rlib/rstring.py	Mon Sep 27 13:43:31 2010
@@ -46,9 +46,7 @@
 
 # -------------- public API ---------------------------------
 
-# the following number is the maximum size of an RPython unicode
-# string that goes into the nursery of the minimark GC.
-INIT_SIZE = 56
+INIT_SIZE = 100 # XXX tweak
 
 class AbstractStringBuilder(object):
     def __init__(self, init_size=INIT_SIZE):

Modified: pypy/trunk/pypy/rpython/lltypesystem/llarena.py
==============================================================================
--- pypy/trunk/pypy/rpython/lltypesystem/llarena.py	(original)
+++ pypy/trunk/pypy/rpython/lltypesystem/llarena.py	Mon Sep 27 13:43:31 2010
@@ -472,8 +472,13 @@
     clear_large_memory_chunk = llmemory.raw_memclear
 
 
+llimpl_malloc = rffi.llexternal('malloc', [lltype.Signed], llmemory.Address,
+                                sandboxsafe=True, _nowrapper=True)
+llimpl_free = rffi.llexternal('free', [llmemory.Address], lltype.Void,
+                              sandboxsafe=True, _nowrapper=True)
+
 def llimpl_arena_malloc(nbytes, zero):
-    addr = llmemory.raw_malloc(nbytes)
+    addr = llimpl_malloc(nbytes)
     if zero and bool(addr):
         clear_large_memory_chunk(addr, nbytes)
     return addr
@@ -483,11 +488,8 @@
                   llfakeimpl=arena_malloc,
                   sandboxsafe=True)
 
-def llimpl_arena_free(arena_addr):
-    # NB. minimark.py assumes that arena_free() is actually just a raw_free().
-    llmemory.raw_free(arena_addr)
 register_external(arena_free, [llmemory.Address], None, 'll_arena.arena_free',
-                  llimpl=llimpl_arena_free,
+                  llimpl=llimpl_free,
                   llfakeimpl=arena_free,
                   sandboxsafe=True)
 

Modified: pypy/trunk/pypy/rpython/memory/gc/base.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gc/base.py	(original)
+++ pypy/trunk/pypy/rpython/memory/gc/base.py	Mon Sep 27 13:43:31 2010
@@ -76,7 +76,7 @@
     def set_root_walker(self, root_walker):
         self.root_walker = root_walker
 
-    def write_barrier(self, newvalue, addr_struct):
+    def write_barrier(self, addr_struct):
         pass
 
     def statistics(self, index):

Modified: pypy/trunk/pypy/rpython/memory/gc/generation.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gc/generation.py	(original)
+++ pypy/trunk/pypy/rpython/memory/gc/generation.py	Mon Sep 27 13:43:31 2010
@@ -321,7 +321,7 @@
         addr = pointer.address[0]
         newaddr = self.copy(addr)
         pointer.address[0] = newaddr
-        self.write_into_last_generation_obj(obj, newaddr)
+        self.write_into_last_generation_obj(obj)
 
     # ____________________________________________________________
     # Implementation of nursery-only collections
@@ -452,11 +452,12 @@
     #  "if addr_struct.int0 & JIT_WB_IF_FLAG: remember_young_pointer()")
     JIT_WB_IF_FLAG = GCFLAG_NO_YOUNG_PTRS
 
-    def write_barrier(self, newvalue, addr_struct):
+    def write_barrier(self, addr_struct):
         if self.header(addr_struct).tid & GCFLAG_NO_YOUNG_PTRS:
-            self.remember_young_pointer(addr_struct, newvalue)
+            self.remember_young_pointer(addr_struct)
 
     def _setup_wb(self):
+        DEBUG = self.DEBUG
         # The purpose of attaching remember_young_pointer to the instance
         # instead of keeping it as a regular method is to help the JIT call it.
         # Additionally, it makes the code in write_barrier() marginally smaller
@@ -464,33 +465,24 @@
         # For x86, there is also an extra requirement: when the JIT calls
         # remember_young_pointer(), it assumes that it will not touch the SSE
         # registers, so it does not save and restore them (that's a *hack*!).
-        def remember_young_pointer(addr_struct, addr):
+        def remember_young_pointer(addr_struct):
             #llop.debug_print(lltype.Void, "\tremember_young_pointer",
             #                 addr_struct, "<-", addr)
-            ll_assert(not self.is_in_nursery(addr_struct),
-                         "nursery object with GCFLAG_NO_YOUNG_PTRS")
-            # if we have tagged pointers around, we first need to check whether
-            # we have valid pointer here, otherwise we can do it after the
-            # is_in_nursery check
-            if (self.config.taggedpointers and
-                not self.is_valid_gc_object(addr)):
-                return
-            if self.is_in_nursery(addr):
-                self.old_objects_pointing_to_young.append(addr_struct)
-                self.header(addr_struct).tid &= ~GCFLAG_NO_YOUNG_PTRS
-            elif (not self.config.taggedpointers and
-                  not self.is_valid_gc_object(addr)):
-                return
-            self.write_into_last_generation_obj(addr_struct, addr)
+            if DEBUG:
+                ll_assert(not self.is_in_nursery(addr_struct),
+                          "nursery object with GCFLAG_NO_YOUNG_PTRS")
+            self.old_objects_pointing_to_young.append(addr_struct)
+            self.header(addr_struct).tid &= ~GCFLAG_NO_YOUNG_PTRS
+            self.write_into_last_generation_obj(addr_struct)
         remember_young_pointer._dont_inline_ = True
         self.remember_young_pointer = remember_young_pointer
 
-    def write_into_last_generation_obj(self, addr_struct, addr):
+    def write_into_last_generation_obj(self, addr_struct):
         objhdr = self.header(addr_struct)
         if objhdr.tid & GCFLAG_NO_HEAP_PTRS:
-            if not self.is_last_generation(addr):
-                objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
-                self.last_generation_root_objects.append(addr_struct)
+            objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
+            self.last_generation_root_objects.append(addr_struct)
+    write_into_last_generation_obj._always_inline_ = True
 
     def assume_young_pointers(self, addr_struct):
         objhdr = self.header(addr_struct)

Modified: pypy/trunk/pypy/rpython/memory/gc/minimark.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gc/minimark.py	(original)
+++ pypy/trunk/pypy/rpython/memory/gc/minimark.py	Mon Sep 27 13:43:31 2010
@@ -1,6 +1,7 @@
 import sys
 from pypy.rpython.lltypesystem import lltype, llmemory, llarena, llgroup
 from pypy.rpython.lltypesystem.lloperation import llop
+from pypy.rpython.lltypesystem.llmemory import raw_malloc_usage
 from pypy.rpython.memory.gc.base import GCBase, MovingGCBase
 from pypy.rpython.memory.gc import minimarkpage, base, generation
 from pypy.rpython.memory.support import DEFAULT_CHUNK_SIZE
@@ -92,7 +93,8 @@
         # PYPY_GC_NURSERY and fall back to half the size of
         # the L2 cache.  For 'major_collection_threshold' it will look
         # it up in the env var PYPY_GC_MAJOR_COLLECT.  It also sets
-        # 'max_heap_size' to PYPY_GC_MAX.
+        # 'max_heap_size' to PYPY_GC_MAX.  Finally, PYPY_GC_MIN sets
+        # the minimal value of 'next_major_collection_threshold'.
         "read_from_env": True,
 
         # The size of the nursery.  Note that this is only used as a
@@ -108,10 +110,10 @@
         "arena_size": 65536*WORD,
 
         # The maximum size of an object allocated compactly.  All objects
-        # that are larger are just allocated with raw_malloc().  The value
-        # chosen here is enough for a unicode string of length 56 (on 64-bits)
-        # or 60 (on 32-bits).  See rlib.rstring.INIT_SIZE.
-        "small_request_threshold": 256-WORD,
+        # that are larger are just allocated with raw_malloc().  Note that
+        # the size limit for being first allocated in the nursery is much
+        # larger; see below.
+        "small_request_threshold": 35*WORD,
 
         # Full collection threshold: after a major collection, we record
         # the total size consumed; and after every minor collection, if the
@@ -125,7 +127,16 @@
         # in regular arrays of pointers; more in arrays whose items are
         # larger.  A value of 0 disables card marking.
         "card_page_indices": 128,
-        "card_page_indices_min": 800,    # minimum number of indices for cards
+
+        # Objects whose total size is at least 'large_object' bytes are
+        # allocated out of the nursery immediately.  If the object
+        # has GC pointers in its varsized part, we use instead the
+        # higher limit 'large_object_gcptrs'.  The idea is that
+        # separately allocated objects are allocated immediately "old"
+        # and it's not good to have too many pointers from old to young
+        # objects.
+        "large_object": 1600*WORD,
+        "large_object_gcptrs": 8250*WORD,
         }
 
     def __init__(self, config, chunk_size=DEFAULT_CHUNK_SIZE,
@@ -136,7 +147,8 @@
                  small_request_threshold=5*WORD,
                  major_collection_threshold=2.5,
                  card_page_indices=0,
-                 card_page_indices_min=None,
+                 large_object=8*WORD,
+                 large_object_gcptrs=10*WORD,
                  ArenaCollectionClass=None):
         MovingGCBase.__init__(self, config, chunk_size)
         assert small_request_threshold % WORD == 0
@@ -145,16 +157,23 @@
         self.small_request_threshold = small_request_threshold
         self.major_collection_threshold = major_collection_threshold
         self.num_major_collects = 0
+        self.min_heap_size = 0.0
         self.max_heap_size = 0.0
         self.max_heap_size_already_raised = False
         #
         self.card_page_indices = card_page_indices
         if self.card_page_indices > 0:
-            self.card_page_indices_min = card_page_indices_min
             self.card_page_shift = 0
             while (1 << self.card_page_shift) < self.card_page_indices:
                 self.card_page_shift += 1
         #
+        # 'large_object' and 'large_object_gcptrs' limit how big objects
+        # can be in the nursery, so they give a lower bound on the allowed
+        # size of the nursery.
+        self.nonlarge_max = large_object - 1
+        self.nonlarge_gcptrs_max = large_object_gcptrs - 1
+        assert self.nonlarge_max <= self.nonlarge_gcptrs_max
+        #
         self.nursery      = NULL
         self.nursery_free = NULL
         self.nursery_top  = NULL
@@ -218,7 +237,7 @@
         else:
             #
             defaultsize = self.nursery_size
-            minsize = 18 * self.small_request_threshold
+            minsize = 2 * (self.nonlarge_gcptrs_max + 1)
             self.nursery_size = minsize
             self.allocate_nursery()
             #
@@ -229,28 +248,37 @@
                 newsize = generation.estimate_best_nursery_size()
                 if newsize <= 0:
                     newsize = defaultsize
+            newsize = max(newsize, minsize)
             #
             major_coll = base.read_float_from_env('PYPY_GC_MAJOR_COLLECT')
             if major_coll >= 1.0:
                 self.major_collection_threshold = major_coll
             #
+            min_heap_size = base.read_uint_from_env('PYPY_GC_MIN')
+            if min_heap_size > 0:
+                self.min_heap_size = float(min_heap_size)
+            else:
+                # defaults to 8 times the nursery
+                self.min_heap_size = newsize * 8
+            #
             max_heap_size = base.read_uint_from_env('PYPY_GC_MAX')
             if max_heap_size > 0:
                 self.max_heap_size = float(max_heap_size)
             #
             self.minor_collection()    # to empty the nursery
             llarena.arena_free(self.nursery)
-            self.nursery_size = max(newsize, minsize)
+            self.nursery_size = newsize
             self.allocate_nursery()
 
 
     def allocate_nursery(self):
         debug_start("gc-set-nursery-size")
         debug_print("nursery size:", self.nursery_size)
-        # the start of the nursery: we actually allocate a tiny bit more for
+        # the start of the nursery: we actually allocate a bit more for
         # the nursery than really needed, to simplify pointer arithmetic
-        # in malloc_fixedsize_clear().
-        extra = self.small_request_threshold
+        # in malloc_fixedsize_clear().  The few extra pages are never used
+        # anyway so it doesn't even count.
+        extra = self.nonlarge_gcptrs_max + 1
         self.nursery = llarena.arena_malloc(self.nursery_size + extra, True)
         if not self.nursery:
             raise MemoryError("cannot allocate nursery")
@@ -258,37 +286,54 @@
         self.nursery_free = self.nursery
         # the end of the nursery:
         self.nursery_top = self.nursery + self.nursery_size
-        # initialize the threshold, a bit arbitrarily
-        self.next_major_collection_threshold = (
-            self.nursery_size * self.major_collection_threshold)
+        # initialize the threshold
+        self.min_heap_size = max(self.min_heap_size, self.nursery_size *
+                                              self.major_collection_threshold)
+        self.set_major_threshold_from(0.0)
         debug_stop("gc-set-nursery-size")
 
+    def set_major_threshold_from(self, threshold):
+        # Set the next_major_collection_threshold.
+        if threshold < self.min_heap_size:
+            threshold = self.min_heap_size
+        #
+        if self.max_heap_size > 0.0 and threshold > self.max_heap_size:
+            threshold = self.max_heap_size
+            bounded = True
+        else:
+            bounded = False
+        #
+        self.next_major_collection_threshold = threshold
+        return bounded
+
 
     def malloc_fixedsize_clear(self, typeid, size, can_collect=True,
                                needs_finalizer=False, contains_weakptr=False):
         ll_assert(can_collect, "!can_collect")
         size_gc_header = self.gcheaderbuilder.size_gc_header
         totalsize = size_gc_header + size
-        rawtotalsize = llmemory.raw_malloc_usage(totalsize)
+        rawtotalsize = raw_malloc_usage(totalsize)
         #
         # If the object needs a finalizer, ask for a rawmalloc.
         # The following check should be constant-folded.
         if needs_finalizer:
             ll_assert(not contains_weakptr,
                      "'needs_finalizer' and 'contains_weakptr' both specified")
-            result = self.malloc_with_finalizer(typeid, totalsize)
+            obj = self.external_malloc(typeid, 0)
+            self.objects_with_finalizers.append(obj)
         #
-        # If totalsize is greater than small_request_threshold, ask for
-        # a rawmalloc.  The following check should be constant-folded.
-        elif rawtotalsize > self.small_request_threshold:
+        # If totalsize is greater than nonlarge_max (which should never be
+        # the case in practice), ask for a rawmalloc.  The following check
+        # should be constant-folded.
+        elif rawtotalsize > self.nonlarge_max:
             ll_assert(not contains_weakptr,
                       "'contains_weakptr' specified for a large object")
-            result = self._external_malloc(typeid, totalsize)
+            obj = self.external_malloc(typeid, 0)
             #
         else:
             # If totalsize is smaller than minimal_size_in_nursery, round it
             # up.  The following check should also be constant-folded.
-            min_size = llmemory.raw_malloc_usage(self.minimal_size_in_nursery)
+            min_size = raw_malloc_usage(self.minimal_size_in_nursery)
             if rawtotalsize < min_size:
                 totalsize = rawtotalsize = min_size
             #
@@ -306,8 +351,10 @@
             # If it is a weakref, record it (check constant-folded).
             if contains_weakptr:
                 self.young_objects_with_weakrefs.append(result+size_gc_header)
+            #
+            obj = result + size_gc_header
         #
-        return llmemory.cast_adr_to_ptr(result+size_gc_header, llmemory.GCREF)
+        return llmemory.cast_adr_to_ptr(obj, llmemory.GCREF)
 
 
     def malloc_varsize_clear(self, typeid, length, size, itemsize,
@@ -315,32 +362,41 @@
         ll_assert(can_collect, "!can_collect")
         size_gc_header = self.gcheaderbuilder.size_gc_header
         nonvarsize = size_gc_header + size
-        try:
-            varsize = ovfcheck(itemsize * length)
-            totalsize = ovfcheck(nonvarsize + varsize)
-        except OverflowError:
-            raise MemoryError
         #
-        # If totalsize is greater than small_request_threshold, ask for
-        # a rawmalloc.
-        if llmemory.raw_malloc_usage(totalsize) > self.small_request_threshold:
-            result = self._external_malloc_cardmark(typeid, totalsize, length)
+        # Compute the maximal length that makes the object still
+        # below 'nonlarge_max'.  All the following logic is usually
+        # constant-folded because self.nonlarge_max, size and itemsize
+        # are all constants (the arguments are constant due to
+        # inlining) and self.has_gcptr_in_varsize() is constant-folded.
+        if self.has_gcptr_in_varsize(typeid):
+            nonlarge_max = self.nonlarge_gcptrs_max
+        else:
+            nonlarge_max = self.nonlarge_max
+
+        if not raw_malloc_usage(itemsize):
+            too_many_items = raw_malloc_usage(nonvarsize) > nonlarge_max
+        else:
+            maxlength = nonlarge_max - raw_malloc_usage(nonvarsize)
+            maxlength = maxlength // raw_malloc_usage(itemsize)
+            too_many_items = length > maxlength
+
+        if too_many_items:
+            #
+            # If the total size of the object would be larger than
+            # 'nonlarge_max', then allocate it externally.
+            obj = self.external_malloc(typeid, length)
             #
         else:
-            # Round the size up to the next multiple of WORD.  Note that
-            # this is done only if totalsize <= self.small_request_threshold,
-            # i.e. it cannot overflow, and it keeps the property that
-            # totalsize <= self.small_request_threshold.
+            # With the above checks we know now that totalsize cannot be more
+            # than 'nonlarge_max'; in particular, the + and * cannot overflow.
+            totalsize = nonvarsize + itemsize * length
             totalsize = llarena.round_up_for_allocation(totalsize)
-            ll_assert(llmemory.raw_malloc_usage(totalsize) <=
-                      self.small_request_threshold,
-                      "round_up_for_allocation() rounded up too much?")
             #
             # 'totalsize' should contain at least the GC header and
             # the length word, so it should never be smaller than
             # 'minimal_size_in_nursery'
-            ll_assert(llmemory.raw_malloc_usage(totalsize) >=
-                      llmemory.raw_malloc_usage(self.minimal_size_in_nursery),
+            ll_assert(raw_malloc_usage(totalsize) >=
+                      raw_malloc_usage(self.minimal_size_in_nursery),
                       "malloc_varsize_clear(): totalsize < minimalsize")
             #
             # Get the memory from the nursery.  If there is not enough space
@@ -353,10 +409,12 @@
             # Build the object.
             llarena.arena_reserve(result, totalsize)
             self.init_gc_object(result, typeid, flags=0)
+            #
+            # Set the length and return the object.
+            obj = result + size_gc_header
+            (obj + offset_to_length).signed[0] = length
         #
-        # Set the length and return the object.
-        (result + size_gc_header + offset_to_length).signed[0] = length
-        return llmemory.cast_adr_to_ptr(result+size_gc_header, llmemory.GCREF)
+        return llmemory.cast_adr_to_ptr(obj, llmemory.GCREF)
 
 
     def collect(self, gen=1):
@@ -389,105 +447,112 @@
     collect_and_reserve._dont_inline_ = True
 
 
-    def _full_collect_if_needed(self, reserving_size):
-        reserving_size = llmemory.raw_malloc_usage(reserving_size)
-        if (float(self.get_total_memory_used()) + reserving_size >
-                self.next_major_collection_threshold):
-            self.minor_collection()
-            self.major_collection(reserving_size)
-
-    def _external_malloc(self, typeid, totalsize):
-        """Allocate a large object using raw_malloc()."""
-        return self._external_malloc_cardmark(typeid, totalsize, 0)
-
-
-    def _external_malloc_cardmark(self, typeid, totalsize, length):
-        """Allocate a large object using raw_malloc(), possibly as an
-        object with card marking enabled, if its length is large enough.
-        'length' can be specified as 0 if the object is not varsized."""
+    def external_malloc(self, typeid, length):
+        """Allocate a large object using the ArenaCollection or
+        raw_malloc(), possibly as an object with card marking enabled,
+        if it has gc pointers in its var-sized part.  'length' should be
+        specified as 0 if the object is not varsized.  The returned
+        object is fully initialized and zero-filled."""
+        #
+        # Compute the total size, carefully checking for overflows.
+        size_gc_header = self.gcheaderbuilder.size_gc_header
+        nonvarsize = size_gc_header + self.fixed_size(typeid)
+        if length == 0:
+            # this includes the case of fixed-size objects, for which we
+            # should not even ask for the varsize_item_sizes().
+            totalsize = nonvarsize
+        else:
+            itemsize = self.varsize_item_sizes(typeid)
+            try:
+                varsize = ovfcheck(itemsize * length)
+                totalsize = ovfcheck(nonvarsize + varsize)
+            except OverflowError:
+                raise MemoryError
         #
         # If somebody calls this function a lot, we must eventually
         # force a full collection.
-        self._full_collect_if_needed(totalsize)
+        if (float(self.get_total_memory_used()) + raw_malloc_usage(totalsize) >
+                self.next_major_collection_threshold):
+            self.minor_collection()
+            self.major_collection(raw_malloc_usage(totalsize))
         #
-        # Check if we need to introduce the card marker bits area.
-        if (self.card_page_indices <= 0     # <- this check is constant-folded
-            or length < self.card_page_indices_min   # <- must be large enough
-            or not self.has_gcptr_in_varsize(typeid)):  # <- must contain ptrs
+        # Check if the object would fit in the ArenaCollection.
+        if raw_malloc_usage(totalsize) <= self.small_request_threshold:
             #
-            # In these cases, we don't want a card marker bits area.
-            cardheadersize = 0
+            # Yes.  Round up 'totalsize' (it cannot overflow and it
+            # must remain <= self.small_request_threshold.)
+            totalsize = llarena.round_up_for_allocation(totalsize)
+            ll_assert(raw_malloc_usage(totalsize) <=
+                      self.small_request_threshold,
+                      "rounding up made totalsize > small_request_threshold")
+            #
+            # Allocate from the ArenaCollection and clear the memory returned.
+            result = self.ac.malloc(totalsize)
+            llmemory.raw_memclear(result, totalsize)
             extra_flags = 0
             #
         else:
-            # Reserve N extra words containing card bits before the object.
-            extra_words = self.card_marking_words_for_length(length)
-            cardheadersize = WORD * extra_words
-            extra_flags = GCFLAG_HAS_CARDS
-        #
-        allocsize = cardheadersize + llmemory.raw_malloc_usage(totalsize)
-        #
-        # Allocate the object using arena_malloc(), which we assume here
-        # is just the same as raw_malloc(), but allows the extra flexibility
-        # of saying that we have extra words in the header.
-        arena = llarena.arena_malloc(allocsize, False)
-        if not arena:
-            raise MemoryError("cannot allocate large object")
-        #
-        # Clear it using method 2 of llarena.arena_reset(), which is the
-        # same as just a raw_memclear().
-        llarena.arena_reset(arena, allocsize, 2)
-        #
-        # Reserve the card mark as a list of single bytes
-        # (the loop is empty in C).
-        i = 0
-        while i < cardheadersize:
-            llarena.arena_reserve(arena + i, llmemory.sizeof(lltype.Char))
-            i += 1
-        #
-        # Initialize the object.
-        result = arena + cardheadersize
-        llarena.arena_reserve(result, totalsize)
-        self.init_gc_object(result, typeid, GCFLAG_NO_YOUNG_PTRS | extra_flags)
-        #
-        # Record the newly allocated object and its size.
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        self.rawmalloced_total_size += llmemory.raw_malloc_usage(totalsize)
-        self.rawmalloced_objects.append(result + size_gc_header)
-        return result
-    _external_malloc_cardmark._dont_inline_ = True
-
-
-    def _malloc_nonmovable(self, typeid, totalsize):
-        """Allocate an object non-movable."""
-        #
-        rawtotalsize = llmemory.raw_malloc_usage(totalsize)
-        if rawtotalsize > self.small_request_threshold:
+            # No, so proceed to allocate it externally with raw_malloc().
+            # Check if we need to introduce the card marker bits area.
+            if (self.card_page_indices <= 0  # <- this check is constant-folded
+                or not self.has_gcptr_in_varsize(typeid) or
+                raw_malloc_usage(totalsize) <= self.nonlarge_gcptrs_max):
+                #
+                # In these cases, we don't want a card marker bits area.
+                # This case also includes all fixed-size objects.
+                cardheadersize = 0
+                extra_flags = 0
+                #
+            else:
+                # Reserve N extra words containing card bits before the object.
+                extra_words = self.card_marking_words_for_length(length)
+                cardheadersize = WORD * extra_words
+                extra_flags = GCFLAG_HAS_CARDS
+            #
+            # Detect very rare cases of overflows
+            if raw_malloc_usage(totalsize) > (sys.maxint - (WORD-1)
+                                              - cardheadersize):
+                raise MemoryError("rare case of overflow")
+            #
+            # Now we know that the following computations cannot overflow.
+            # Note that round_up_for_allocation() is also needed to get the
+            # correct number added to 'rawmalloced_total_size'.
+            allocsize = (cardheadersize + raw_malloc_usage(
+                            llarena.round_up_for_allocation(totalsize)))
+            #
+            # Allocate the object using arena_malloc(), which we assume here
+            # is just the same as raw_malloc(), but allows the extra
+            # flexibility of saying that we have extra words in the header.
+            arena = llarena.arena_malloc(allocsize, False)
+            if not arena:
+                raise MemoryError("cannot allocate large object")
+            #
+            # Clear it using method 2 of llarena.arena_reset(), which is the
+            # same as just a raw_memclear().  This also clears the card mark
+            # bits, if any.
+            llarena.arena_reset(arena, allocsize, 2)
+            #
+            # Reserve the card mark bits as a list of single bytes
+            # (the loop is empty in C).
+            i = 0
+            while i < cardheadersize:
+                llarena.arena_reserve(arena + i, llmemory.sizeof(lltype.Char))
+                i += 1
             #
-            # The size asked for is too large for the ArenaCollection.
-            return self._external_malloc(typeid, totalsize)
-        #
-        totalsize = llarena.round_up_for_allocation(totalsize)
-        #
-        # If somebody calls _malloc_nonmovable() a lot, we must eventually
-        # force a full collection.
-        self._full_collect_if_needed(totalsize)
-        #
-        # Ask the ArenaCollection to do the malloc.
-        result = self.ac.malloc(totalsize)
-        llmemory.raw_memclear(result, totalsize)
-        self.init_gc_object(result, typeid, GCFLAG_NO_YOUNG_PTRS)
-        return result
-
-
-    def malloc_with_finalizer(self, typeid, totalsize):
-        """Allocate an object with a finalizer."""
+            # Reserve the actual object.  (This is also a no-op in C).
+            result = arena + cardheadersize
+            llarena.arena_reserve(result, totalsize)
+            #
+            # Record the newly allocated object and its full malloced size.
+            self.rawmalloced_total_size += allocsize
+            self.rawmalloced_objects.append(result + size_gc_header)
         #
-        result = self._malloc_nonmovable(typeid, totalsize)
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        self.objects_with_finalizers.append(result + size_gc_header)
-        return result
-    malloc_with_finalizer._dont_inline_ = True
+        # Common code to fill the header and length of the object.
+        self.init_gc_object(result, typeid, GCFLAG_NO_YOUNG_PTRS | extra_flags)
+        if self.is_varsize(typeid):
+            offset_to_length = self.varsize_offset_to_length(typeid)
+            (result + size_gc_header + offset_to_length).signed[0] = length
+        return result + size_gc_header
 
 
     # ----------
@@ -529,37 +594,16 @@
 
 
     def malloc_fixedsize_nonmovable(self, typeid):
-        """NOT_RPYTHON: not tested translated"""
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        totalsize = size_gc_header + self.fixed_size(typeid)
-        #
-        result = self._malloc_nonmovable(typeid, totalsize)
-        obj = result + size_gc_header
+        obj = self.external_malloc(typeid, 0)
         return llmemory.cast_adr_to_ptr(obj, llmemory.GCREF)
 
     def malloc_varsize_nonmovable(self, typeid, length):
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        nonvarsize = size_gc_header + self.fixed_size(typeid)
-        itemsize = self.varsize_item_sizes(typeid)
-        offset_to_length = self.varsize_offset_to_length(typeid)
-        try:
-            varsize = ovfcheck(itemsize * length)
-            totalsize = ovfcheck(nonvarsize + varsize)
-        except OverflowError:
-            raise MemoryError
-        #
-        result = self._malloc_nonmovable(typeid, totalsize)
-        obj = result + size_gc_header
-        (obj + offset_to_length).signed[0] = length
+        obj = self.external_malloc(typeid, length)
         return llmemory.cast_adr_to_ptr(obj, llmemory.GCREF)
 
     def malloc_nonmovable(self, typeid, length, zero):
         # helper for testing, same as GCBase.malloc
-        if self.is_varsize(typeid):
-            gcref = self.malloc_varsize_nonmovable(typeid, length)
-        else:
-            gcref = self.malloc_fixedsize_nonmovable(typeid)
-        return llmemory.cast_ptr_to_adr(gcref)
+        return self.external_malloc(typeid, length or 0)    # None -> 0
 
 
     # ----------
@@ -675,19 +719,19 @@
     #  "if addr_struct.int0 & JIT_WB_IF_FLAG: remember_young_pointer()")
     JIT_WB_IF_FLAG = GCFLAG_NO_YOUNG_PTRS
 
-    def write_barrier(self, newvalue, addr_struct):
+    def write_barrier(self, addr_struct):
         if self.header(addr_struct).tid & GCFLAG_NO_YOUNG_PTRS:
-            self.remember_young_pointer(addr_struct, newvalue)
+            self.remember_young_pointer(addr_struct)
 
-    def write_barrier_from_array(self, newvalue, addr_array, index):
+    def write_barrier_from_array(self, addr_array, index):
         if self.header(addr_array).tid & GCFLAG_NO_YOUNG_PTRS:
             if self.card_page_indices > 0:     # <- constant-folded
-                self.remember_young_pointer_from_array(addr_array, index,
-                                                       newvalue)
+                self.remember_young_pointer_from_array(addr_array, index)
             else:
-                self.remember_young_pointer(addr_array, newvalue)
+                self.remember_young_pointer(addr_array)
 
     def _init_writebarrier_logic(self):
+        DEBUG = self.DEBUG
         # The purpose of attaching remember_young_pointer to the instance
         # instead of keeping it as a regular method is to help the JIT call it.
         # Additionally, it makes the code in write_barrier() marginally smaller
@@ -695,30 +739,22 @@
         # For x86, there is also an extra requirement: when the JIT calls
         # remember_young_pointer(), it assumes that it will not touch the SSE
         # registers, so it does not save and restore them (that's a *hack*!).
-        def remember_young_pointer(addr_struct, addr):
-            # 'addr_struct' is the address of the object in which we write;
-            # 'addr' is the address that we write in 'addr_struct'.
-            ll_assert(not self.is_in_nursery(addr_struct),
-                      "nursery object with GCFLAG_NO_YOUNG_PTRS")
-            # if we have tagged pointers around, we first need to check whether
-            # we have valid pointer here, otherwise we can do it after the
-            # is_in_nursery check
-            if (self.config.taggedpointers and
-                not self.is_valid_gc_object(addr)):
-                return
-            #
-            # Core logic: if the 'addr' is in the nursery, then we need
+        def remember_young_pointer(addr_struct):
+            # 'addr_struct' is the address of the object in which we write.
+            if DEBUG:
+                ll_assert(not self.is_in_nursery(addr_struct),
+                          "nursery object with GCFLAG_NO_YOUNG_PTRS")
+            #
+            # We assume that what we are writing is a pointer to the nursery
+            # (and don't care for the fact that this new pointer may not
+            # actually point to the nursery, which seems ok).  What we need is
             # to remove the flag GCFLAG_NO_YOUNG_PTRS and add the old object
             # to the list 'old_objects_pointing_to_young'.  We know that
             # 'addr_struct' cannot be in the nursery, because nursery objects
             # never have the flag GCFLAG_NO_YOUNG_PTRS to start with.
+            self.old_objects_pointing_to_young.append(addr_struct)
             objhdr = self.header(addr_struct)
-            if self.is_in_nursery(addr):
-                self.old_objects_pointing_to_young.append(addr_struct)
-                objhdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
-            elif (not self.config.taggedpointers and
-                  not self.is_valid_gc_object(addr)):
-                return
+            objhdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
             #
             # Second part: if 'addr_struct' is actually a prebuilt GC
             # object and it's the first time we see a write to it, we
@@ -737,17 +773,16 @@
 
 
     def _init_writebarrier_with_card_marker(self):
-        def remember_young_pointer_from_array(addr_array, index, addr):
+        def remember_young_pointer_from_array(addr_array, index):
             # 'addr_array' is the address of the object in which we write,
             # which must have an array part;  'index' is the index of the
-            # item that is (or contains) the pointer that we write;
-            # 'addr' is the address that we write in the array.
+            # item that is (or contains) the pointer that we write.
             objhdr = self.header(addr_array)
             if objhdr.tid & GCFLAG_HAS_CARDS == 0:
                 #
                 # no cards, use default logic.  The 'nocard_logic()' is just
                 # 'remember_young_pointer()', but forced to be inlined here.
-                nocard_logic(addr_array, addr)
+                nocard_logic(addr_array)
                 return
             #
             # 'addr_array' is a raw_malloc'ed array with card markers
@@ -764,22 +799,13 @@
             if byte & bitmask:
                 return
             #
-            # As in remember_young_pointer, check if 'addr' is a valid
-            # pointer, in case it can be a tagged integer
-            if (self.config.taggedpointers and
-                not self.is_valid_gc_object(addr)):
-                return
-            #
-            # If the 'addr' is in the nursery, then we need to set the flag.
-            # Note that the following check is done after the bit check
-            # above, because it is expected that the "bit already set"
-            # situation is the most common.
-            if self.is_in_nursery(addr):
-                addr_byte.char[0] = chr(byte | bitmask)
-                #
-                if objhdr.tid & GCFLAG_CARDS_SET == 0:
-                    self.old_objects_with_cards_set.append(addr_array)
-                    objhdr.tid |= GCFLAG_CARDS_SET
+            # We set the flag (even if the newly written address does not
+            # actually point to the nursery -- like remember_young_pointer()).
+            addr_byte.char[0] = chr(byte | bitmask)
+            #
+            if objhdr.tid & GCFLAG_CARDS_SET == 0:
+                self.old_objects_with_cards_set.append(addr_array)
+                objhdr.tid |= GCFLAG_CARDS_SET
 
         nocard_logic = func_with_new_name(self.remember_young_pointer,
                                           'remember_young_pointer_nocard')
@@ -997,7 +1023,7 @@
         if self.header(obj).tid & GCFLAG_HAS_SHADOW == 0:
             #
             # Common case: allocate a new nonmovable location for it.
-            newhdr = self.ac.malloc(totalsize)
+            newhdr = self._malloc_out_of_nursery(totalsize)
             #
         else:
             # The object has already a shadow.
@@ -1035,6 +1061,33 @@
         self.old_objects_pointing_to_young.append(newobj)
 
 
+    def _malloc_out_of_nursery(self, totalsize):
+        """Allocate non-movable memory for an object of the given
+        'totalsize' that lives so far in the nursery."""
+        if raw_malloc_usage(totalsize) <= self.small_request_threshold:
+            # most common path
+            return self.ac.malloc(totalsize)
+        else:
+            # for nursery objects that are not small
+            return self._malloc_out_of_nursery_nonsmall(totalsize)
+    _malloc_out_of_nursery._always_inline_ = True
+
+    def _malloc_out_of_nursery_nonsmall(self, totalsize):
+        # 'totalsize' should be aligned.
+        ll_assert(raw_malloc_usage(totalsize) & (WORD-1) == 0,
+                  "misaligned totalsize in _malloc_out_of_nursery_nonsmall")
+        #
+        arena = llarena.arena_malloc(raw_malloc_usage(totalsize), False)
+        if not arena:
+            raise MemoryError("cannot allocate object")
+        llarena.arena_reserve(arena, totalsize)
+        #
+        size_gc_header = self.gcheaderbuilder.size_gc_header
+        self.rawmalloced_total_size += raw_malloc_usage(totalsize)
+        self.rawmalloced_objects.append(arena + size_gc_header)
+        return arena
+
+
     # ----------
     # Full collection
 
@@ -1104,30 +1157,26 @@
         # Set the threshold for the next major collection to be when we
         # have allocated 'major_collection_threshold' times more than
         # we currently have.
-        self.next_major_collection_threshold = (
+        bounded = self.set_major_threshold_from(
             (self.get_total_memory_used() * self.major_collection_threshold)
             + reserving_size)
         #
         # Max heap size: gives an upper bound on the threshold.  If we
         # already have at least this much allocated, raise MemoryError.
-        if (self.max_heap_size > 0.0 and
-                self.next_major_collection_threshold > self.max_heap_size):
+        if bounded and (float(self.get_total_memory_used()) + reserving_size >=
+                        self.next_major_collection_threshold):
             #
-            self.next_major_collection_threshold = self.max_heap_size
-            if (float(self.get_total_memory_used()) + reserving_size >=
-                    self.next_major_collection_threshold):
-                #
-                # First raise MemoryError, giving the program a chance to
-                # quit cleanly.  It might still allocate in the nursery,
-                # which might eventually be emptied, triggering another
-                # major collect and (possibly) reaching here again with an
-                # even higher memory consumption.  To prevent it, if it's
-                # the second time we are here, then abort the program.
-                if self.max_heap_size_already_raised:
-                    llop.debug_fatalerror(lltype.Void,
-                                          "Using too much memory, aborting")
-                self.max_heap_size_already_raised = True
-                raise MemoryError
+            # First raise MemoryError, giving the program a chance to
+            # quit cleanly.  It might still allocate in the nursery,
+            # which might eventually be emptied, triggering another
+            # major collect and (possibly) reaching here again with an
+            # even higher memory consumption.  To prevent it, if it's
+            # the second time we are here, then abort the program.
+            if self.max_heap_size_already_raised:
+                llop.debug_fatalerror(lltype.Void,
+                                      "Using too much memory, aborting")
+            self.max_heap_size_already_raised = True
+            raise MemoryError
         #
         # At the end, we can execute the finalizers of the objects
         # listed in 'run_finalizers'.  Note that this will typically do
@@ -1159,8 +1208,7 @@
                 self.rawmalloced_objects.append(obj)
             else:
                 totalsize = size_gc_header + self.get_size(obj)
-                rawtotalsize = llmemory.raw_malloc_usage(totalsize)
-                self.rawmalloced_total_size -= rawtotalsize
+                allocsize = raw_malloc_usage(totalsize)
                 arena = llarena.getfakearenaaddress(obj - size_gc_header)
                 #
                 # Must also include the card marker area, if any
@@ -1175,8 +1223,10 @@
                     length = (obj + offset_to_length).signed[0]
                     extra_words = self.card_marking_words_for_length(length)
                     arena -= extra_words * WORD
+                    allocsize += extra_words * WORD
                 #
                 llarena.arena_free(arena)
+                self.rawmalloced_total_size -= allocsize
         #
         list.delete()
 
@@ -1260,7 +1310,8 @@
                 else:
                     size_gc_header = self.gcheaderbuilder.size_gc_header
                     size = self.get_size(obj)
-                    shadowhdr = self.ac.malloc(size_gc_header + size)
+                    shadowhdr = self._malloc_out_of_nursery(size_gc_header +
+                                                            size)
                     # initialize to an invalid tid *without* GCFLAG_VISITED,
                     # so that if the object dies before the next minor
                     # collection, the shadow will stay around but be collected
@@ -1454,7 +1505,7 @@
         self.total_memory_used = 0
 
     def malloc(self, size):
-        nsize = llmemory.raw_malloc_usage(size)
+        nsize = raw_malloc_usage(size)
         ll_assert(nsize > 0, "malloc: size is null or negative")
         ll_assert(nsize <= self.small_request_threshold,"malloc: size too big")
         ll_assert((nsize & (WORD-1)) == 0, "malloc: size is not aligned")

Modified: pypy/trunk/pypy/rpython/memory/gc/test/test_direct.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gc/test/test_direct.py	(original)
+++ pypy/trunk/pypy/rpython/memory/gc/test/test_direct.py	Mon Sep 27 13:43:31 2010
@@ -86,19 +86,17 @@
 
     def write(self, p, fieldname, newvalue):
         if self.gc.needs_write_barrier:
-            newaddr = llmemory.cast_ptr_to_adr(newvalue)
             addr_struct = llmemory.cast_ptr_to_adr(p)
-            self.gc.write_barrier(newaddr, addr_struct)
+            self.gc.write_barrier(addr_struct)
         setattr(p, fieldname, newvalue)
 
     def writearray(self, p, index, newvalue):
         if self.gc.needs_write_barrier:
-            newaddr = llmemory.cast_ptr_to_adr(newvalue)
             addr_struct = llmemory.cast_ptr_to_adr(p)
             if hasattr(self.gc, 'write_barrier_from_array'):
-                self.gc.write_barrier_from_array(newaddr, addr_struct, index)
+                self.gc.write_barrier_from_array(addr_struct, index)
             else:
-                self.gc.write_barrier(newaddr, addr_struct)
+                self.gc.write_barrier(addr_struct)
         p[index] = newvalue
 
     def malloc(self, TYPE, n=None):
@@ -507,8 +505,7 @@
                 for index, expected_x in nums.items():
                     assert a[index].x == expected_x
             self.stackroots.pop()
-    test_card_marker.GC_PARAMS = {"card_page_indices": 4,
-                                  "card_page_indices_min": 7}
+    test_card_marker.GC_PARAMS = {"card_page_indices": 4}
 
 class TestMiniMarkGCFull(DirectGCTest):
     from pypy.rpython.memory.gc.minimark import MiniMarkGC as GCClass

Modified: pypy/trunk/pypy/rpython/memory/gc/test/test_minimark.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gc/test/test_minimark.py	(original)
+++ pypy/trunk/pypy/rpython/memory/gc/test/test_minimark.py	Mon Sep 27 13:43:31 2010
@@ -5,26 +5,6 @@
 # Note that most tests are in test_direct.py.
 
 
-def test_stringbuilder_default_initsize_is_small():
-    # Check that pypy.rlib.rstring.INIT_SIZE is short enough to let
-    # the allocated object be considered as a "small" object.
-    # Otherwise it would not be allocated in the nursery at all,
-    # which is kind of bad (and also prevents shrink_array() from
-    # being useful).
-    from pypy.rlib.rstring import INIT_SIZE
-    from pypy.rpython.lltypesystem.rstr import STR, UNICODE
-    #
-    size_gc_header = llmemory.raw_malloc_usage(
-        llmemory.sizeof(llmemory.Address))
-    #
-    size1 = llmemory.raw_malloc_usage(llmemory.sizeof(STR, INIT_SIZE))
-    size1 = size_gc_header + size1
-    assert size1 <= MiniMarkGC.TRANSLATION_PARAMS["small_request_threshold"]
-    #
-    size2 = llmemory.raw_malloc_usage(llmemory.sizeof(UNICODE, INIT_SIZE))
-    size2 = size_gc_header + size2
-    assert size2 <= MiniMarkGC.TRANSLATION_PARAMS["small_request_threshold"]
-
 def test_card_marking_words_for_length():
     gc = MiniMarkGC(None, card_page_indices=128)
     assert gc.card_page_shift == 7

Modified: pypy/trunk/pypy/rpython/memory/gctransform/framework.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gctransform/framework.py	(original)
+++ pypy/trunk/pypy/rpython/memory/gctransform/framework.py	Mon Sep 27 13:43:31 2010
@@ -139,7 +139,7 @@
     def __init__(self, translator):
         from pypy.rpython.memory.gc.base import choose_gc_from_config
         from pypy.rpython.memory.gc.base import ARRAY_TYPEID_MAP
-        from pypy.rpython.memory.gc import inspect
+        from pypy.rpython.memory.gc import inspector
 
         super(FrameworkGCTransformer, self).__init__(translator, inline=True)
         if hasattr(self, 'GC_PARAMS'):
@@ -391,27 +391,27 @@
         else:
             self.id_ptr = None
 
-        self.get_rpy_roots_ptr = getfn(inspect.get_rpy_roots,
+        self.get_rpy_roots_ptr = getfn(inspector.get_rpy_roots,
                                        [s_gc],
                                        rgc.s_list_of_gcrefs(),
                                        minimal_transform=False)
-        self.get_rpy_referents_ptr = getfn(inspect.get_rpy_referents,
+        self.get_rpy_referents_ptr = getfn(inspector.get_rpy_referents,
                                            [s_gc, s_gcref],
                                            rgc.s_list_of_gcrefs(),
                                            minimal_transform=False)
-        self.get_rpy_memory_usage_ptr = getfn(inspect.get_rpy_memory_usage,
+        self.get_rpy_memory_usage_ptr = getfn(inspector.get_rpy_memory_usage,
                                               [s_gc, s_gcref],
                                               annmodel.SomeInteger(),
                                               minimal_transform=False)
-        self.get_rpy_type_index_ptr = getfn(inspect.get_rpy_type_index,
+        self.get_rpy_type_index_ptr = getfn(inspector.get_rpy_type_index,
                                             [s_gc, s_gcref],
                                             annmodel.SomeInteger(),
                                             minimal_transform=False)
-        self.is_rpy_instance_ptr = getfn(inspect.is_rpy_instance,
+        self.is_rpy_instance_ptr = getfn(inspector.is_rpy_instance,
                                          [s_gc, s_gcref],
                                          annmodel.SomeBool(),
                                          minimal_transform=False)
-        self.dump_rpy_heap_ptr = getfn(inspect.dump_rpy_heap,
+        self.dump_rpy_heap_ptr = getfn(inspector.dump_rpy_heap,
                                        [s_gc, annmodel.SomeInteger()],
                                        annmodel.s_Bool,
                                        minimal_transform=False)
@@ -426,7 +426,6 @@
         if GCClass.needs_write_barrier:
             self.write_barrier_ptr = getfn(GCClass.write_barrier.im_func,
                                            [s_gc,
-                                            annmodel.SomeAddress(),
                                             annmodel.SomeAddress()],
                                            annmodel.s_None,
                                            inline=True)
@@ -435,15 +434,13 @@
                 # func should not be a bound method, but a real function
                 assert isinstance(func, types.FunctionType)
                 self.write_barrier_failing_case_ptr = getfn(func,
-                                               [annmodel.SomeAddress(),
-                                                annmodel.SomeAddress()],
+                                               [annmodel.SomeAddress()],
                                                annmodel.s_None)
             func = getattr(GCClass, 'write_barrier_from_array', None)
             if func is not None:
                 self.write_barrier_from_array_ptr = getfn(func.im_func,
                                            [s_gc,
                                             annmodel.SomeAddress(),
-                                            annmodel.SomeAddress(),
                                             annmodel.SomeInteger()],
                                            annmodel.s_None,
                                            inline=True)
@@ -455,8 +452,7 @@
                     self.write_barrier_from_array_failing_case_ptr = \
                                              getfn(func,
                                                    [annmodel.SomeAddress(),
-                                                    annmodel.SomeInteger(),
-                                                    annmodel.SomeAddress()],
+                                                    annmodel.SomeInteger()],
                                                    annmodel.s_None)
         self.statistics_ptr = getfn(GCClass.statistics.im_func,
                                     [s_gc, annmodel.SomeInteger()],
@@ -1023,8 +1019,6 @@
             and not isinstance(v_newvalue, Constant)
             and v_struct.concretetype.TO._gckind == "gc"
             and hop.spaceop not in self.clean_sets):
-            v_newvalue = hop.genop("cast_ptr_to_adr", [v_newvalue],
-                                   resulttype = llmemory.Address)
             v_structaddr = hop.genop("cast_ptr_to_adr", [v_struct],
                                      resulttype = llmemory.Address)
             if (self.write_barrier_from_array_ptr is not None and
@@ -1034,14 +1028,12 @@
                 assert v_index.concretetype == lltype.Signed
                 hop.genop("direct_call", [self.write_barrier_from_array_ptr,
                                           self.c_const_gc,
-                                          v_newvalue,
                                           v_structaddr,
                                           v_index])
             else:
                 self.write_barrier_calls += 1
                 hop.genop("direct_call", [self.write_barrier_ptr,
                                           self.c_const_gc,
-                                          v_newvalue,
                                           v_structaddr])
         hop.rename('bare_' + opname)
 

Modified: pypy/trunk/pypy/rpython/memory/gcwrapper.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gcwrapper.py	(original)
+++ pypy/trunk/pypy/rpython/memory/gcwrapper.py	Mon Sep 27 13:43:31 2010
@@ -94,7 +94,6 @@
                         assert (type(index) is int    # <- fast path
                                 or lltype.typeOf(index) == lltype.Signed)
                         self.gc.write_barrier_from_array(
-                            llmemory.cast_ptr_to_adr(newvalue),
                             llmemory.cast_ptr_to_adr(toplevelcontainer),
                             index)
                         wb = False
@@ -102,7 +101,6 @@
             #
             if wb:
                 self.gc.write_barrier(
-                    llmemory.cast_ptr_to_adr(newvalue),
                     llmemory.cast_ptr_to_adr(toplevelcontainer))
         llheap.setinterior(toplevelcontainer, inneraddr, INNERTYPE, newvalue)
 

Modified: pypy/trunk/pypy/rpython/memory/test/test_gc.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/test/test_gc.py	(original)
+++ pypy/trunk/pypy/rpython/memory/test/test_gc.py	Mon Sep 27 13:43:31 2010
@@ -29,6 +29,7 @@
     GC_CAN_MALLOC_NONMOVABLE = True
     GC_CAN_SHRINK_ARRAY = False
     GC_CAN_SHRINK_BIG_ARRAY = False
+    BUT_HOW_BIG_IS_A_BIG_STRING = 12
 
     def setup_class(cls):
         cls._saved_logstate = py.log._getstate()
@@ -495,7 +496,8 @@
         # with larger numbers, it gets allocated outside the semispace
         # with some GCs.
         flag = self.GC_CAN_SHRINK_BIG_ARRAY
-        assert self.interpret(f, [12, 0, flag]) == 0x62024241
+        bigsize = self.BUT_HOW_BIG_IS_A_BIG_STRING
+        assert self.interpret(f, [bigsize, 0, flag]) == 0x62024241
 
     def test_tagged_simple(self):
         from pypy.rlib.objectmodel import UnboxedValue
@@ -770,7 +772,7 @@
     from pypy.rpython.memory.gc.minimark import MiniMarkGC as GCClass
     GC_CAN_SHRINK_BIG_ARRAY = False
     GC_CAN_MALLOC_NONMOVABLE = True
+    BUT_HOW_BIG_IS_A_BIG_STRING = 11*WORD
 
 class TestMiniMarkGCCardMarking(TestMiniMarkGC):
-    GC_PARAMS = {'card_page_indices': 4,
-                 'card_page_indices_min': 10}
+    GC_PARAMS = {'card_page_indices': 4}

Modified: pypy/trunk/pypy/rpython/memory/test/test_transformed_gc.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/test/test_transformed_gc.py	(original)
+++ pypy/trunk/pypy/rpython/memory/test/test_transformed_gc.py	Mon Sep 27 13:43:31 2010
@@ -1475,7 +1475,6 @@
                          'arena_size': 64*WORD,
                          'small_request_threshold': 5*WORD,
                          'card_page_indices': 4,
-                         'card_page_indices_min': 10,
                          }
             root_stack_depth = 200
 

Modified: pypy/trunk/pypy/translator/c/test/test_lltyped.py
==============================================================================
--- pypy/trunk/pypy/translator/c/test/test_lltyped.py	(original)
+++ pypy/trunk/pypy/translator/c/test/test_lltyped.py	Mon Sep 27 13:43:31 2010
@@ -783,6 +783,17 @@
         res = fn()
         assert res == 42
 
+    def test_llarena(self):
+        from pypy.rpython.lltypesystem import llmemory, llarena
+        #
+        def f():
+            a = llarena.arena_malloc(800, False)
+            llarena.arena_reset(a, 800, 2)
+            llarena.arena_free(a)
+        #
+        fn = self.getcompiled(f, [])
+        fn()
+
     def test_padding_in_prebuilt_struct(self):
         from pypy.rpython.lltypesystem import rffi
         from pypy.rpython.tool import rffi_platform



More information about the Pypy-commit mailing list