[pypy-svn] r16240 - in pypy/dist/pypy/rpython/memory: . test

cfbolz at codespeak.net cfbolz at codespeak.net
Mon Aug 22 22:35:20 CEST 2005


Author: cfbolz
Date: Mon Aug 22 22:35:18 2005
New Revision: 16240

Modified:
   pypy/dist/pypy/rpython/memory/gc.py
   pypy/dist/pypy/rpython/memory/gclltype.py
   pypy/dist/pypy/rpython/memory/gcwrapper.py
   pypy/dist/pypy/rpython/memory/test/test_gc.py
Log:
first attempt at a semispace gc


Modified: pypy/dist/pypy/rpython/memory/gc.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/gc.py	(original)
+++ pypy/dist/pypy/rpython/memory/gc.py	Mon Aug 22 22:35:18 2005
@@ -1,4 +1,5 @@
-from pypy.rpython.memory.lladdress import raw_malloc, raw_free, NULL
+from pypy.rpython.memory.lladdress import raw_malloc, raw_free, raw_memcopy
+from pypy.rpython.memory.lladdress import NULL
 from pypy.rpython.memory.support import AddressLinkedList
 from pypy.rpython.memory import lltypesimulation
 from pypy.rpython import lltype
@@ -11,18 +12,18 @@
 
 
 class MarkSweepGC(object):
-    _alloc_flavor_ = ""
+    _alloc_flavor_ = "raw"
 
-    def __init__(self, objectmodel, collect_every_bytes):
+    def __init__(self, objectmodel, start_heap_size):
         self.bytes_malloced = 0
-        self.collect_every_bytes = collect_every_bytes
+        self.heap_size = start_heap_size
         #need to maintain a list of malloced objects, since we used the systems
         #allocator and can't walk the heap
         self.malloced_objects = AddressLinkedList()
         self.objectmodel = objectmodel
 
     def malloc(self, typeid, length=0):
-        if self.bytes_malloced > self.collect_every_bytes:
+        if self.bytes_malloced > self.heap_size:
             self.collect()
         size = self.objectmodel.fixed_size(typeid)
         if self.objectmodel.is_varsize(typeid):
@@ -64,7 +65,6 @@
             for i in range(len(offsets)):
                 pointer = curr + offsets[i]
                 objects.append(pointer.address[0])
-                i += 1
             if self.objectmodel.is_varsize(typeid):
                 offset = self.objectmodel.varsize_offset_to_variable_part(
                     typeid)
@@ -72,24 +72,35 @@
                 offsets = self.objectmodel.varsize_offsets_to_gcpointers_in_var_part(typeid)
                 itemlength = self.objectmodel.varsize_item_sizes(typeid)
                 curr += offset
-                i = 0
                 for i in range(length):
                     item = curr + itemlength * i
                     for j in range(len(offsets)):
                         objects.append((item + offsets[j]).address[0])
             gc_info.signed[0] = 1
         newmo = AddressLinkedList()
+        curr_heap_size = 0
+        freed_size = 0
         while 1:  #sweep
             curr = self.malloced_objects.pop()
             if curr == NULL:
                 break
+            typeid = curr.signed[1]
+            size = self.objectmodel.fixed_size(typeid)
+            if self.objectmodel.is_varsize(typeid):
+                length = (curr + self.size_gc_header() + self.objectmodel.varsize_offset_to_length(typeid)).signed[0]
+                size += length * self.objectmodel.varsize_item_sizes(typeid)
             if curr.signed[0] == 1:
                 curr.signed[0] = 0
                 newmo.append(curr)
+                curr_heap_size += size + self.size_gc_header()
             else:
+                freed_size += size + self.size_gc_header()
                 raw_free(curr)
+        print "free %s bytes. the heap is %s bytes." % (freed_size, curr_heap_size)
         free_non_gc_object(self.malloced_objects)
         self.malloced_objects = newmo
+        if curr_heap_size > self.heap_size:
+            self.heap_size = curr_heap_size
 
     def size_gc_header(self):
         return lltypesimulation.sizeof(lltype.Signed) * 2
@@ -98,3 +109,117 @@
         addr.signed[0] = 0
         addr.signed[1] = typeid
 
+
+class SemiSpaceGC(object):
+    _alloc_flavor_ = "raw"
+
+    def __init__(self, objectmodel, space_size):
+        self.bytes_malloced = 0
+        self.space_size = space_size
+        self.tospace = raw_malloc(space_size)
+        self.top_of_space = self.tospace + space_size
+        self.fromspace = raw_malloc(space_size)
+        self.free = self.tospace
+        self.objectmodel = objectmodel
+
+    def malloc(self, typeid, length=0):
+        size = self.objectmodel.fixed_size(typeid)
+        if self.objectmodel.is_varsize(typeid):
+            size += length * self.objectmodel.varsize_item_sizes(typeid)
+        totalsize = size + self.size_gc_header()
+        if self.free + totalsize > self.top_of_space:
+            self.collect()
+        result = self.free
+        self.init_gc_object(result, typeid)
+        print "mallocing %s, size %s at %s" % (typeid, size, result)
+        self.free += totalsize
+        return result + self.size_gc_header()
+
+
+    def collect(self):
+        print "collecting"
+        self.fromspace, self.tospace = self.tospace, self.fromspace
+        self.top_of_space = self.tospace + self.space_size
+        roots = self.objectmodel.get_roots()
+        scan = self.free = self.tospace
+        while 1:
+            root = roots.pop()
+            if root == NULL:
+                break
+            print "root", root, root.address[0]
+            root.address[0] = self.copy(root.address[0])
+        while scan < self.free:
+            curr = scan + self.size_gc_header()
+            self.trace_and_copy(curr)
+            scan += self.get_size(curr) + self.size_gc_header()
+
+    def copy(self, obj):
+        if not self.fromspace <= obj < self.fromspace + self.space_size:
+            return self.copy_non_managed_obj(obj)
+        print "copying regularly", obj
+        if self.is_forwared(obj):
+            return self.get_forwarding_address(obj)
+        else:
+            newaddr = self.free
+            totalsize = self.get_size(obj) + self.size_gc_header()
+            raw_memcopy(obj - self.size_gc_header(), newaddr, totalsize)
+            self.free += totalsize
+            newobj = newaddr + self.size_gc_header()
+            self.set_forwarding_address(obj, newobj)
+            return newobj
+
+    def copy_non_managed_obj(self, obj): #umph, PBCs, not really copy
+        print "copying nonmanaged", obj
+        #we have to do the tracing here because PBCs are not moved to tospace
+        self.trace_and_copy(obj)
+        return obj
+
+    def trace_and_copy(self, obj):
+        gc_info = obj - self.size_gc_header()
+        typeid = gc_info.signed[1]
+        print "scanning", obj, typeid
+        offsets = self.objectmodel.offsets_to_gc_pointers(typeid)
+        for i in range(len(offsets)):
+            pointer = obj + offsets[i]
+            if pointer.address[0] != NULL:
+                pointer.address[0] = self.copy(pointer.address[0])
+        if self.objectmodel.is_varsize(typeid):
+            offset = self.objectmodel.varsize_offset_to_variable_part(
+                typeid)
+            length = (obj + self.objectmodel.varsize_offset_to_length(typeid)).signed[0]
+            offsets = self.objectmodel.varsize_offsets_to_gcpointers_in_var_part(typeid)
+            itemlength = self.objectmodel.varsize_item_sizes(typeid)
+            for i in range(length):
+                item = obj + offset + itemlength * i
+                for j in range(len(offsets)):
+                    pointer = item + offsets[j]
+                    if pointer.address[0] != NULL:
+                        pointer.address[0] = self.copy(pointer.address[0])
+
+    def is_forwared(self, obj):
+        return (obj - self.size_gc_header()).signed[1] == -1
+
+    def get_forwarding_address(self, obj):
+        return (obj - self.size_gc_header()).address[0]
+
+    def set_forwarding_address(self, obj, newobj):
+        gc_info = obj - self.size_gc_header()
+        gc_info.signed[1] = -1
+        gc_info.address[0] = newobj
+
+    def get_size(self, obj):
+        typeid = (obj - self.size_gc_header()).signed[1]
+        size = self.objectmodel.fixed_size(typeid)
+        if self.objectmodel.is_varsize(typeid):
+            lenaddr = obj + self.objectmodel.varsize_offset_to_length(typeid)
+            length = lenaddr.signed[0]
+            size += length * self.objectmodel.varsize_item_sizes(typeid)
+        return size
+
+
+    def size_gc_header(self):
+        return lltypesimulation.sizeof(lltype.Signed) * 2
+
+    def init_gc_object(self, addr, typeid):
+        addr.signed[0] = 0
+        addr.signed[1] = typeid

Modified: pypy/dist/pypy/rpython/memory/gclltype.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/gclltype.py	(original)
+++ pypy/dist/pypy/rpython/memory/gclltype.py	Mon Aug 22 22:35:18 2005
@@ -35,11 +35,12 @@
     fgcc.convert()    
     return None
 
+from pypy.rpython.memory.gc import MarkSweepGC, SemiSpaceGC
+use_gc = MarkSweepGC
 def create_mark_sweep_gc(llinterp, flowgraphs):
     from pypy.rpython.memory.gcwrapper import GcWrapper, LLInterpObjectModel
-    from pypy.rpython.memory.gc import MarkSweepGC
     #XXX hackish: we need the gc before the object model is ready
-    gc = MarkSweepGC(None, 4096)
+    gc = use_gc(None, 4096)
     fgcc = FlowGraphConstantConverter(flowgraphs, gc)
     fgcc.convert()    
     om = LLInterpObjectModel(llinterp, fgcc.cvter.types,

Modified: pypy/dist/pypy/rpython/memory/gcwrapper.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/gcwrapper.py	(original)
+++ pypy/dist/pypy/rpython/memory/gcwrapper.py	Mon Aug 22 22:35:18 2005
@@ -59,24 +59,31 @@
         return typeid
 
     def is_varsize(self, typeid):
+        assert typeid >= 0
         return self._is_varsize[typeid]
 
     def offsets_to_gc_pointers(self, typeid):
+        assert typeid >= 0
         return self._offsets_to_gc_pointers[typeid]
 
     def fixed_size(self, typeid):
+        assert typeid >= 0
         return self._fixed_size[typeid]
 
     def varsize_item_sizes(self, typeid):
+        assert typeid >= 0
         return self._varsize_item_sizes[typeid]
 
     def varsize_offset_to_variable_part(self, typeid):
+        assert typeid >= 0
         return self._varsize_offset_to_variable_part[typeid]
 
     def varsize_offset_to_length(self, typeid):
+        assert typeid >= 0
         return self._varsize_offset_to_length[typeid]
 
     def varsize_offsets_to_gcpointers_in_var_part(self, typeid):
+        assert typeid >= 0
         return self._varsize_offsets_to_gcpointers_in_var_part[typeid]
 
     def get_roots(self):
@@ -84,7 +91,9 @@
         if self.pseudo_root_pointers != NULL:
             raw_free(self.pseudo_root_pointers)
         self.roots = self.llinterp.find_roots() + self.constantroots
-        print "found:", self.roots
+        self.roots = [r for r in self.roots
+                          if isinstance(r._TYPE.TO,
+                                        (lltype.Struct, lltype.Array))]
         if len(self.roots) == 0:
             self.pseudo_root_pointers = NULL
         else:

Modified: pypy/dist/pypy/rpython/memory/test/test_gc.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/test/test_gc.py	(original)
+++ pypy/dist/pypy/rpython/memory/test/test_gc.py	Mon Aug 22 22:35:18 2005
@@ -3,7 +3,7 @@
 from pypy.annotation import model as annmodel
 from pypy.translator.annrpython import RPythonAnnotator
 from pypy.rpython.rtyper import RPythonTyper
-from pypy.rpython.memory.gc import GCError, MarkSweepGC
+from pypy.rpython.memory.gc import GCError, MarkSweepGC, SemiSpaceGC
 from pypy.rpython.memory.support import AddressLinkedList, INT_SIZE
 from pypy.rpython.memory.lladdress import raw_malloc, raw_free, NULL
 from pypy.rpython.memory.simulator import MemorySimulatorError
@@ -58,8 +58,16 @@
         variables.address[1] = gc.malloc(0)
         variables.address[2] = gc.malloc(0)
         variables.address[3] = gc.malloc(0)
+        variables.address[0].signed[0] = 0
+        variables.address[1].signed[0] = 1
+        variables.address[2].signed[0] = 2
+        variables.address[3].signed[0] = 3
         print "roots", roots
         gc.collect() #does not crash
+        assert variables.address[0].signed[0] == 0
+        assert variables.address[1].signed[0] == 1
+        assert variables.address[2].signed[0] == 2
+        assert variables.address[3].signed[0] == 3
         addr = gc.malloc(0)
         addr.signed[0] = 1
         print "roots", roots
@@ -70,6 +78,8 @@
         variables.address[0].address[1] = NULL
         print "roots", roots
         gc.collect() #does not crash
+        assert variables.address[0].address[0] == variables.address[1]
+        assert variables.address[0].address[1] == NULL        
         addr0 = gc.malloc(1)
         addr0.address[1] = NULL
         addr1 = gc.malloc(1)
@@ -83,7 +93,6 @@
         gc.collect()
         py.test.raises(MemorySimulatorError, "addr0.signed[0]")
         py.test.raises(MemorySimulatorError, "addr1.signed[0]")
-        py.test.raises(MemorySimulatorError, "addr2.signed[0]")
 
     def test_llinterp_lists(self):
         curr = simulator.current_size
@@ -137,3 +146,87 @@
         res = interpret(concat, [100])
         assert res == concat(100)
         assert simulator.current_size - curr < 16000
+
+
+class TestSemiSpaceGC(object):
+    def setup_class(cls):
+        gclltype.use_gc = SemiSpaceGC
+        cls.old = gclltype.use_gc
+    def teardown_class(cls):
+        gclltype.use_gc = cls.old
+
+    def test_simple(self):
+        variables = raw_malloc(4 * INT_SIZE)
+        roots = [variables + i * INT_SIZE for i in range(4)]
+        layout0 = [] #int
+        layout1 = [0, INT_SIZE] #(ptr, ptr)
+        om = PseudoObjectModel(roots, {0: layout0, 1: layout1}, {0: INT_SIZE, 1: 2 * INT_SIZE})
+        gc = SemiSpaceGC(om, 2 ** 16)
+        variables.address[0] = gc.malloc(0)
+        variables.address[1] = gc.malloc(0)
+        variables.address[2] = gc.malloc(0)
+        variables.address[3] = gc.malloc(0)
+        variables.address[0].signed[0] = 0
+        variables.address[1].signed[0] = 1
+        variables.address[2].signed[0] = 2
+        variables.address[3].signed[0] = 3
+        print "roots", roots
+        gc.collect() #does not crash
+        assert variables.address[0].signed[0] == 0
+        assert variables.address[1].signed[0] == 1
+        assert variables.address[2].signed[0] == 2
+        assert variables.address[3].signed[0] == 3
+        addr = gc.malloc(0)
+        addr.signed[0] = 1
+        print "roots", roots
+        gc.collect()
+##         py.test.raises(MemorySimulatorError, "addr.signed[0]")
+        variables.address[0] = gc.malloc(1)
+        variables.address[0].address[0] = variables.address[1]
+        variables.address[0].address[1] = NULL
+        print "roots", roots
+        gc.collect() #does not crash
+        assert variables.address[0].address[0] == variables.address[1]
+        assert variables.address[0].address[1] == NULL        
+        addr0 = gc.malloc(1)
+        addr0.address[1] = NULL
+        addr1 = gc.malloc(1)
+        addr1.address[0] = addr1.address[1] = NULL
+        addr0.address[0] = addr1
+        addr2 = variables.address[1]
+        print "addr0, addr1, addr2 =", addr0, addr1, addr2
+        variables.address[1] == NULL
+        variables.address[0].address[0] = NULL
+        print "roots", roots
+        gc.collect()
+
+    def test_llinterp_lists(self):
+        curr = simulator.current_size
+        def malloc_a_lot():
+            i = 0
+            while i < 10:
+                i += 1
+                a = [1] * 10
+                j = 0
+                while j < 20:
+                    j += 1
+                    a.append(j)
+        res = interpret(malloc_a_lot, [])
+        assert simulator.current_size - curr < 16000
+        print "size before: %s, size after %s" % (curr, simulator.current_size)
+
+    def test_llinterp_tuples(self):
+        curr = simulator.current_size
+        def malloc_a_lot():
+            i = 0
+            while i < 10:
+                i += 1
+                a = (1, 2, i)
+                b = [a] * 10
+                j = 0
+                while j < 20:
+                    j += 1
+                    b.append((1, j, i))
+        res = interpret(malloc_a_lot, [])
+        assert simulator.current_size - curr < 16000
+        print "size before: %s, size after %s" % (curr, simulator.current_size)



More information about the Pypy-commit mailing list