[pypy-svn] r79924 - in pypy/trunk/pypy: rlib rpython/lltypesystem rpython/lltypesystem/test rpython/memory/gc

arigo at codespeak.net arigo at codespeak.net
Thu Dec 9 14:14:00 CET 2010


Author: arigo
Date: Thu Dec  9 14:13:58 2010
New Revision: 79924

Modified:
   pypy/trunk/pypy/rlib/rmmap.py
   pypy/trunk/pypy/rpython/lltypesystem/llarena.py
   pypy/trunk/pypy/rpython/lltypesystem/lltype.py
   pypy/trunk/pypy/rpython/lltypesystem/test/test_llarena.py
   pypy/trunk/pypy/rpython/memory/gc/base.py
   pypy/trunk/pypy/rpython/memory/gc/minimark.py
Log:
Merge branch/gc-debug, adding PYPY_GC_DEBUG to the runtime environment
variables (setting gc.DEBUG) and tweaking PYPY_GC_NURSERY for small sizes.

Also new: in gc.DEBUG mode, rotate through 23 nurseries, marking the
unused 22 ones with PROT_NONE.  This should help noticing when the
code uses by mistake a pointer without having it listed by trackgcroot.



Modified: pypy/trunk/pypy/rlib/rmmap.py
==============================================================================
--- pypy/trunk/pypy/rlib/rmmap.py	(original)
+++ pypy/trunk/pypy/rlib/rmmap.py	Thu Dec  9 14:13:58 2010
@@ -67,7 +67,7 @@
     constant_names = ['PAGE_READONLY', 'PAGE_READWRITE', 'PAGE_WRITECOPY',
                       'FILE_MAP_READ', 'FILE_MAP_WRITE', 'FILE_MAP_COPY',
                       'DUPLICATE_SAME_ACCESS', 'MEM_COMMIT', 'MEM_RESERVE',
-                      'MEM_RELEASE', 'PAGE_EXECUTE_READWRITE']
+                      'MEM_RELEASE', 'PAGE_EXECUTE_READWRITE', 'PAGE_NOACCESS']
     for name in constant_names:
         setattr(CConfig, name, rffi_platform.ConstantInteger(name))
 

Modified: pypy/trunk/pypy/rpython/lltypesystem/llarena.py
==============================================================================
--- pypy/trunk/pypy/rpython/lltypesystem/llarena.py	(original)
+++ pypy/trunk/pypy/rpython/lltypesystem/llarena.py	Thu Dec  9 14:13:58 2010
@@ -26,6 +26,7 @@
         self.objectptrs = {}        # {offset: ptr-to-container}
         self.objectsizes = {}       # {offset: size}
         self.freed = False
+        self.protect_inaccessible = None
         self.reset(zero)
 
     def __repr__(self):
@@ -59,6 +60,8 @@
     def check(self):
         if self.freed:
             raise ArenaError("arena was already freed")
+        if self.protect_inaccessible is not None:
+            raise ArenaError("arena is currently arena_protect()ed")
 
     def _getid(self):
         address, length = self.usagemap.buffer_info()
@@ -127,6 +130,21 @@
     def mark_freed(self):
         self.freed = True    # this method is a hook for tests
 
+    def set_protect(self, inaccessible):
+        if inaccessible:
+            assert self.protect_inaccessible is None
+            saved = []
+            for ptr in self.objectptrs.values():
+                obj = ptr._obj
+                saved.append((obj, obj._protect()))
+            self.protect_inaccessible = saved
+        else:
+            assert self.protect_inaccessible is not None
+            saved = self.protect_inaccessible
+            for obj, storage in saved:
+                obj._unprotect(storage)
+            self.protect_inaccessible = None
+
 class fakearenaaddress(llmemory.fakeaddress):
 
     def __init__(self, arena, offset):
@@ -365,6 +383,16 @@
     """
     return Arena(ptr.arena.nbytes, False).getaddr(0)
 
+def arena_protect(arena_addr, size, inaccessible):
+    """For debugging, set or reset memory protection on an arena.
+    For now, the starting point and size should reference the whole arena.
+    The value of 'inaccessible' is a boolean.
+    """
+    arena_addr = getfakearenaaddress(arena_addr)
+    assert arena_addr.offset == 0
+    assert size == arena_addr.arena.nbytes
+    arena_addr.arena.set_protect(inaccessible)
+
 # ____________________________________________________________
 #
 # Translation support: the functions above turn into the code below.
@@ -475,6 +503,44 @@
     # them immediately.
     clear_large_memory_chunk = llmemory.raw_memclear
 
+if os.name == "posix":
+    from pypy.translator.tool.cbuild import ExternalCompilationInfo
+    _eci = ExternalCompilationInfo(includes=['sys/mman.h'])
+    raw_mprotect = rffi.llexternal('mprotect',
+                                   [llmemory.Address, rffi.SIZE_T, rffi.INT],
+                                   rffi.INT,
+                                   sandboxsafe=True, _nowrapper=True,
+                                   compilation_info=_eci)
+    def llimpl_protect(addr, size, inaccessible):
+        if inaccessible:
+            prot = 0
+        else:
+            from pypy.rlib.rmmap import PROT_READ, PROT_WRITE
+            prot = PROT_READ | PROT_WRITE
+        raw_mprotect(addr, rffi.cast(rffi.SIZE_T, size),
+                     rffi.cast(rffi.INT, prot))
+        # ignore potential errors
+    has_protect = True
+
+elif os.name == 'nt':
+    def llimpl_protect(addr, size, inaccessible):
+        from pypy.rlib.rmmap import VirtualProtect, LPDWORD
+        if inaccessible:
+            from pypy.rlib.rmmap import PAGE_NOACCESS as newprotect
+        else:
+            from pypy.rlib.rmmap import PAGE_READWRITE as newprotect
+        arg = lltype.malloc(LPDWORD.TO, 1, zero=True, flavor='raw')
+        VirtualProtect(rffi.cast(rffi.VOIDP, addr),
+                       rffi.cast(rffi.SIZE_T, size),
+                       newprotect,
+                       arg)
+        # ignore potential errors
+        lltype.free(arg, flavor='raw')
+    has_protect = True
+
+else:
+    has_protect = False
+
 
 llimpl_malloc = rffi.llexternal('malloc', [lltype.Signed], llmemory.Address,
                                 sandboxsafe=True, _nowrapper=True)
@@ -544,6 +610,21 @@
                   'll_arena.arena_new_view', llimpl=llimpl_arena_new_view,
                   llfakeimpl=arena_new_view, sandboxsafe=True)
 
+def llimpl_arena_protect(addr, size, inaccessible):
+    if has_protect:
+        # do some alignment
+        start = rffi.cast(lltype.Signed, addr)
+        end = start + size
+        start = (start + 4095) & ~ 4095
+        end = end & ~ 4095
+        if end > start:
+            llimpl_protect(rffi.cast(llmemory.Address, start), end-start,
+                           inaccessible)
+register_external(arena_protect, [llmemory.Address, lltype.Signed,
+                                  lltype.Bool], lltype.Void,
+                  'll_arena.arena_protect', llimpl=llimpl_arena_protect,
+                  llfakeimpl=arena_protect, sandboxsafe=True)
+
 def llimpl_getfakearenaaddress(addr):
     return addr
 register_external(getfakearenaaddress, [llmemory.Address], llmemory.Address,

Modified: pypy/trunk/pypy/rpython/lltypesystem/lltype.py
==============================================================================
--- pypy/trunk/pypy/rpython/lltypesystem/lltype.py	(original)
+++ pypy/trunk/pypy/rpython/lltypesystem/lltype.py	Thu Dec  9 14:13:58 2010
@@ -1381,6 +1381,15 @@
         self._check()   # no double-frees
         self._storage = None
 
+    def _protect(self):
+        result = self._storage
+        self._free()   # no double-frees or double-protects
+        return result
+
+    def _unprotect(self, saved_storage):
+        assert self._storage is None
+        self._storage = saved_storage
+
     def _was_freed(self):
         if self._storage is None:
             return True

Modified: pypy/trunk/pypy/rpython/lltypesystem/test/test_llarena.py
==============================================================================
--- pypy/trunk/pypy/rpython/lltypesystem/test/test_llarena.py	(original)
+++ pypy/trunk/pypy/rpython/lltypesystem/test/test_llarena.py	Thu Dec  9 14:13:58 2010
@@ -6,6 +6,8 @@
 from pypy.rpython.lltypesystem.llarena import round_up_for_allocation
 from pypy.rpython.lltypesystem.llarena import ArenaError, arena_new_view
 from pypy.rpython.lltypesystem.llarena import arena_shrink_obj
+from pypy.rpython.lltypesystem.llarena import arena_protect, has_protect
+from pypy.translator.c.test import test_genc, test_standalone
 
 def test_arena():
     S = lltype.Struct('S', ('x',lltype.Signed))
@@ -265,8 +267,7 @@
     assert res == 42
 
 def test_compiled():
-    from pypy.translator.c.test.test_genc import compile
-    fn = compile(test_look_inside_object, [])
+    fn = test_genc.compile(test_look_inside_object, [])
     res = fn()
     assert res == 42
 
@@ -282,3 +283,51 @@
     arena_reserve(a, size_gc_header + llmemory.sizeof(S, 10))
     arena_shrink_obj(a, size_gc_header + llmemory.sizeof(S, 5))
     arena_reset(a, size_gc_header + llmemory.sizeof(S, 5), False)
+
+def test_arena_protect():
+    a = arena_malloc(100, False)
+    S = lltype.Struct('S', ('x', lltype.Signed))
+    arena_reserve(a, llmemory.sizeof(S))
+    p = llmemory.cast_adr_to_ptr(a, lltype.Ptr(S))
+    p.x = 123
+    assert p.x == 123
+    arena_protect(a, 100, True)
+    py.test.raises(ArenaError, arena_reserve, a + 48, llmemory.sizeof(S))
+    py.test.raises(RuntimeError, "p.x")
+    py.test.raises(RuntimeError, "p.x = 124")
+    arena_protect(a, 100, False)
+    assert p.x == 123
+    p.x = 125
+    assert p.x == 125
+
+
+class TestStandalone(test_standalone.StandaloneTests):
+    def test_compiled_arena_protect(self):
+        import os
+        from pypy.translator.c.test.test_genc import compile
+        S = lltype.Struct('S', ('x', lltype.Signed))
+        #
+        def fn(argv):
+            testrun = int(argv[1])
+            a = arena_malloc(65536, False)
+            arena_reserve(a, llmemory.sizeof(S))
+            p = llmemory.cast_adr_to_ptr(a + 23432, lltype.Ptr(S))
+            p.x = 123
+            assert p.x == 123
+            arena_protect(a, 65536, True)
+            result = 0
+            if testrun == 1:
+                print p.x       # segfault
+            if testrun == 2:
+                p.x = 124       # segfault
+            arena_protect(a, 65536, False)
+            p.x += 10
+            print p.x
+            return 0
+        #
+        t, cbuilder = self.compile(fn)
+        data = cbuilder.cmdexec('0')
+        assert data == '133\n'
+        if has_protect:
+            cbuilder.cmdexec('1', expect_crash=True)
+            cbuilder.cmdexec('2', expect_crash=True)

Modified: pypy/trunk/pypy/rpython/memory/gc/base.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gc/base.py	(original)
+++ pypy/trunk/pypy/rpython/memory/gc/base.py	Thu Dec  9 14:13:58 2010
@@ -40,8 +40,7 @@
         # More stuff that needs to be initialized when the GC is already
         # fully working.  (Only called by gctransform/framework for now.)
         from pypy.rpython.memory.gc import env
-        if env.read_from_env('PYPY_GC_DEBUG') > 0:
-            self.DEBUG = True
+        self.DEBUG = env.read_from_env('PYPY_GC_DEBUG')
 
     def _teardown(self):
         pass

Modified: pypy/trunk/pypy/rpython/memory/gc/minimark.py
==============================================================================
--- pypy/trunk/pypy/rpython/memory/gc/minimark.py	(original)
+++ pypy/trunk/pypy/rpython/memory/gc/minimark.py	Thu Dec  9 14:13:58 2010
@@ -3,7 +3,8 @@
 Environment variables can be used to fine-tune the following parameters:
     
  PYPY_GC_NURSERY        The nursery size.  Defaults to half the size of
-                        the L2 cache.  Try values like '1.2MB'.
+                        the L2 cache.  Try values like '1.2MB'.  Small values
+                        (like 1 or 1KB) are useful for debugging.
 
  PYPY_GC_MAJOR_COLLECT  Major collection memory factor.  Default is '1.82',
                         which means trigger a major collection when the
@@ -34,7 +35,9 @@
                         times the nursery.
 
  PYPY_GC_DEBUG          Enable extra checks around collections that are
-                        too slow for normal use.
+                        too slow for normal use.  Values are 0 (off),
+                        1 (on major collections) or 2 (also on minor
+                        collections).
 """
 # XXX Should find a way to bound the major collection threshold by the
 # XXX total addressable size.  Maybe by keeping some minimarkpage arenas
@@ -226,7 +229,8 @@
         self.nursery      = NULL
         self.nursery_free = NULL
         self.nursery_top  = NULL
-        self.debug_always_do_minor_collect = False
+        self.debug_tiny_nursery = -1
+        self.debug_rotating_nurseries = None
         #
         # The ArenaCollection() handles the nonmovable objects allocation.
         if ArenaCollectionClass is None:
@@ -294,15 +298,20 @@
             # From there on, the GC is fully initialized and the code
             # below can use it
             newsize = env.read_from_env('PYPY_GC_NURSERY')
-            # PYPY_GC_NURSERY=1 forces a minor collect for every malloc.
-            # Useful to debug external factors, like trackgcroot or the
-            # handling of the write barrier.
-            self.debug_always_do_minor_collect = newsize == 1
+            # PYPY_GC_NURSERY=smallvalue means that minor collects occur
+            # very frequently; the extreme case is PYPY_GC_NURSERY=1, which
+            # forces a minor collect for every malloc.  Useful to debug
+            # external factors, like trackgcroot or the handling of the write
+            # barrier.  Implemented by still using 'minsize' for the nursery
+            # size (needed to handle e.g. mallocs of 8249 words) but hacking
+            # at the current nursery position in collect_and_reserve().
             if newsize <= 0:
                 newsize = env.estimate_best_nursery_size()
                 if newsize <= 0:
                     newsize = defaultsize
-            newsize = max(newsize, minsize)
+            if newsize < minsize:
+                self.debug_tiny_nursery = newsize & ~(WORD-1)
+                newsize = minsize
             #
             major_coll = env.read_float_from_env('PYPY_GC_MAJOR_COLLECT')
             if major_coll > 1.0:
@@ -335,17 +344,24 @@
             self.allocate_nursery()
 
 
-    def allocate_nursery(self):
-        debug_start("gc-set-nursery-size")
-        debug_print("nursery size:", self.nursery_size)
+    def _nursery_memory_size(self):
+        extra = self.nonlarge_gcptrs_max + 1
+        return self.nursery_size + extra
+
+    def _alloc_nursery(self):
         # the start of the nursery: we actually allocate a bit more for
         # the nursery than really needed, to simplify pointer arithmetic
         # in malloc_fixedsize_clear().  The few extra pages are never used
         # anyway so it doesn't even count.
-        extra = self.nonlarge_gcptrs_max + 1
-        self.nursery = llarena.arena_malloc(self.nursery_size + extra, 2)
-        if not self.nursery:
+        nursery = llarena.arena_malloc(self._nursery_memory_size(), 2)
+        if not nursery:
             raise MemoryError("cannot allocate nursery")
+        return nursery
+
+    def allocate_nursery(self):
+        debug_start("gc-set-nursery-size")
+        debug_print("nursery size:", self.nursery_size)
+        self.nursery = self._alloc_nursery()
         # the current position in the nursery:
         self.nursery_free = self.nursery
         # the end of the nursery:
@@ -379,6 +395,39 @@
         return bounded
 
 
+    def post_setup(self):
+        # set up extra stuff for PYPY_GC_DEBUG.
+        MovingGCBase.post_setup(self)
+        if self.DEBUG and llarena.has_protect:
+            # gc debug mode: allocate 23 nurseries instead of just 1,
+            # and use them alternatively, while mprotect()ing the unused
+            # ones to detect invalid access.
+            debug_start("gc-debug")
+            self.debug_rotating_nurseries = []
+            for i in range(22):
+                nurs = self._alloc_nursery()
+                llarena.arena_protect(nurs, self._nursery_memory_size(), True)
+                self.debug_rotating_nurseries.append(nurs)
+            debug_print("allocated", len(self.debug_rotating_nurseries),
+                        "extra nurseries")
+            debug_stop("gc-debug")
+
+    def debug_rotate_nursery(self):
+        if self.debug_rotating_nurseries is not None:
+            debug_start("gc-debug")
+            oldnurs = self.nursery
+            llarena.arena_protect(oldnurs, self._nursery_memory_size(), True)
+            self.debug_rotating_nurseries.append(oldnurs)
+            #
+            newnurs = self.debug_rotating_nurseries.pop(0)
+            llarena.arena_protect(newnurs, self._nursery_memory_size(), False)
+            self.nursery = newnurs
+            self.nursery_top = self.nursery + self.nursery_size
+            debug_print("switching to nursery", self.nursery,
+                        "size", self.nursery_size)
+            debug_stop("gc-debug")
+
+
     def malloc_fixedsize_clear(self, typeid, size, can_collect=True,
                                needs_finalizer=False, contains_weakptr=False):
         ll_assert(can_collect, "!can_collect")
@@ -516,8 +565,9 @@
         self.nursery_free = result + totalsize
         ll_assert(self.nursery_free <= self.nursery_top, "nursery overflow")
         #
-        if self.debug_always_do_minor_collect:
-            self.nursery_free = self.nursery_top
+        if self.debug_tiny_nursery >= 0:   # for debugging
+            if self.nursery_top - self.nursery_free > self.debug_tiny_nursery:
+                self.nursery_free = self.nursery_top - self.debug_tiny_nursery
         #
         return result
     collect_and_reserve._dont_inline_ = True
@@ -1003,13 +1053,14 @@
         # All live nursery objects are out, and the rest dies.  Fill
         # the whole nursery with zero and reset the current nursery pointer.
         llarena.arena_reset(self.nursery, self.nursery_size, 2)
+        self.debug_rotate_nursery()
         self.nursery_free = self.nursery
         #
         debug_print("minor collect, total memory used:",
                     self.get_total_memory_used())
+        if self.DEBUG >= 2:
+            self.debug_check_consistency()     # expensive!
         debug_stop("gc-minor")
-        if 0:  # not we_are_translated():
-            self.debug_check_consistency()     # xxx expensive!
 
 
     def collect_roots_in_nursery(self):



More information about the Pypy-commit mailing list