[pypy-svn] r47255 - in pypy/branch/kill-keepalives-again/pypy/rpython/lltypesystem: . test

arigo at codespeak.net arigo at codespeak.net
Sun Oct 7 12:17:17 CEST 2007


Author: arigo
Date: Sun Oct  7 12:17:15 2007
New Revision: 47255

Added:
   pypy/branch/kill-keepalives-again/pypy/rpython/lltypesystem/llarena.py   (contents, props changed)
   pypy/branch/kill-keepalives-again/pypy/rpython/lltypesystem/test/test_llarena.py   (contents, props changed)
Log:
Experimental.  Arenas that allow objects to be put anywhere in them, in
any order.  When run directly (or via the llinterp), they do best-effort
checks against overflows and overlaps by using the raw_malloc_usage()
estimate.  The advantage is that can be directly use address arithmetic
to handle the arenas, as the existing GCs do.  A drawback is that the
implementation looks a little bit like the memory simulator, but at
least it tries to be reasonably integrated with the rest of lltype and
llmemory.

For now I'll make arena_malloc, arena_free and arena_reset new
lloperations, instead of using raw_malloc with a symbolic arena size.  I
can imagine situations where it's more reasonable in the backend to use
a different implementation to allocate small objects and whole arenas
(e.g. mmap).  Similarly, arena_reset can optionally fill the arena with
zero, which for very large amounts of memory can be implemented in more
efficient ways than just memset().



Added: pypy/branch/kill-keepalives-again/pypy/rpython/lltypesystem/llarena.py
==============================================================================
--- (empty file)
+++ pypy/branch/kill-keepalives-again/pypy/rpython/lltypesystem/llarena.py	Sun Oct  7 12:17:15 2007
@@ -0,0 +1,143 @@
+import array
+from pypy.rpython.lltypesystem import lltype, llmemory
+
+# An "arena" is a large area of memory which can hold a number of
+# objects, not necessarily all of the same type or size.  It's used by
+# some of our framework GCs.  Addresses that point inside arenas support
+# direct arithmetic: adding and subtracting integers, and taking the
+# difference of two addresses.  When not translated to C, the arena
+# keeps track of which bytes are used by what object to detect GC bugs;
+# it internally uses raw_malloc_usage() to estimate the number of bytes
+# it needs to reserve.
+
+class ArenaError(Exception):
+    pass
+
+class Arena(object):
+
+    def __init__(self, nbytes, zero):
+        self.nbytes = nbytes
+        self.usagemap = array.array('c')
+        self.objects = {}
+        self.freed = False
+        self.reset(zero)
+
+    def reset(self, zero):
+        self.check()
+        for obj in self.objects.itervalues():
+            obj._free()
+        self.objects.clear()
+        if zero:
+            initialbyte = "0"
+        else:
+            initialbyte = "#"
+        self.usagemap[:] = array.array('c', initialbyte * self.nbytes)
+
+    def check(self):
+        if self.freed:
+            raise ArenaError("arena was already freed")
+
+    def _getid(self):
+        address, length = self.usagemap.buffer_info()
+        return address
+
+    def getaddr(self, offset):
+        if not (0 <= offset <= self.nbytes):
+            raise ArenaError("Address offset is outside the arena")
+        return fakearenaaddress(self, offset)
+
+    def allocate_object(self, offset, TYPE):
+        self.check()
+        size = llmemory.raw_malloc_usage(llmemory.sizeof(TYPE))
+        if offset + size > self.nbytes:
+            raise ArenaError("object overflows beyond the end of the arena")
+        zero = True
+        for c in self.usagemap[offset:offset+size]:
+            if c == '0':
+                pass
+            elif c == '#':
+                zero = False
+            else:
+                raise ArenaError("new object overlaps a previous object")
+        p = lltype.malloc(TYPE, flavor='raw', zero=zero)
+        self.usagemap[offset:offset+size] = array.array('c', 'X' * size)
+        self.objects[offset] = p._obj
+
+class fakearenaaddress(llmemory.fakeaddress):
+
+    def __init__(self, arena, offset):
+        self.arena = arena
+        self.offset = offset
+
+    def _getptr(self):
+        try:
+            obj = self.arena.objects[self.offset]
+        except KeyError:
+            self.arena.check()
+            raise ArenaError("don't know yet what type of object "
+                             "is at offset %d" % (self.offset,))
+        return obj._as_ptr()
+    ptr = property(_getptr)
+
+    def __repr__(self):
+        return '<arenaaddr %s + %d>' % (self.arena, self.offset)
+
+    def __add__(self, other):
+        if isinstance(other, llmemory.AddressOffset):
+            other = llmemory.raw_malloc_usage(other)
+        if isinstance(other, (int, long)):
+            return self.arena.getaddr(self.offset + other)
+        return NotImplemented
+
+    def __sub__(self, other):
+        if isinstance(other, llmemory.AddressOffset):
+            other = llmemory.raw_malloc_usage(other)
+        if isinstance(other, (int, long)):
+            return self.arena.getaddr(self.offset - other)
+        if isinstance(other, fakearenaaddress):
+            if self.other is not other.arena:
+                raise ArenaError("The two addresses are from different arenas")
+            return other.offset - self.offset
+        return NotImplemented
+
+    def __nonzero__(self):
+        return True
+
+    def __eq__(self, other):
+        if isinstance(other, fakearenaaddress):
+            return self.arena is other.arena and self.offset == other.offset
+        else:
+            return llmemory.fakeaddress.__eq__(self, other)
+
+    def _cast_to_ptr(self, EXPECTED_TYPE):
+        # the first cast determines what object type is at this address
+        if self.offset not in self.arena.objects:
+            self.arena.allocate_object(self.offset, EXPECTED_TYPE.TO)
+        return llmemory.fakeaddress._cast_to_ptr(self, EXPECTED_TYPE)
+
+    def _cast_to_int(self):
+        return self.arena._getid() + self.offset
+
+# ____________________________________________________________
+#
+# Public interface: arena_malloc(), arena_free() and arena_reset()
+# which directly correspond to lloperations.  Although the operations
+# are similar to raw_malloc(), raw_free() and raw_memclear(), the
+# backend can choose a different implementation for arenas, one that
+# is more suited to very large chunks of memory.
+
+def arena_malloc(nbytes, zero):
+    """Allocate and return a new arena, optionally zero-initialized."""
+    return Arena(nbytes, zero).getaddr(0)
+
+def arena_free(arena_addr):
+    """Release an arena."""
+    arena_reset(arena_addr, False)
+    arena_addr.arena.freed = True
+
+def arena_reset(arena_addr, zero):
+    """Free all objects in the arena, which can then be reused.
+    The arena is filled with zeroes if 'zero' is True."""
+    assert isinstance(arena_addr, fakearenaaddress)
+    assert arena_addr.offset == 0
+    arena_addr.arena.reset(zero)

Added: pypy/branch/kill-keepalives-again/pypy/rpython/lltypesystem/test/test_llarena.py
==============================================================================
--- (empty file)
+++ pypy/branch/kill-keepalives-again/pypy/rpython/lltypesystem/test/test_llarena.py	Sun Oct  7 12:17:15 2007
@@ -0,0 +1,47 @@
+import py
+from pypy.rpython.lltypesystem import lltype, llmemory
+from pypy.rpython.lltypesystem.llmemory import cast_adr_to_ptr
+from pypy.rpython.lltypesystem.llarena import arena_malloc, arena_reset
+from pypy.rpython.lltypesystem.llarena import ArenaError
+
+def test_arena():
+    S = lltype.Struct('S', ('x',lltype.Signed))
+    SPTR = lltype.Ptr(S)
+    ssize = llmemory.raw_malloc_usage(llmemory.sizeof(S))
+    a = arena_malloc(2*ssize+1, False)
+
+    s1_ptr1 = cast_adr_to_ptr(a, SPTR)
+    s1_ptr1.x = 1
+    s1_ptr2 = cast_adr_to_ptr(a, SPTR)
+    assert s1_ptr2.x == 1
+    assert s1_ptr1 == s1_ptr2
+
+    s2_ptr1 = cast_adr_to_ptr(a + ssize + 1, SPTR)
+    py.test.raises(lltype.UninitializedMemoryAccess, 's2_ptr1.x')
+    s2_ptr1.x = 2
+    s2_ptr2 = cast_adr_to_ptr(a + ssize + 1, SPTR)
+    assert s2_ptr2.x == 2
+    assert s2_ptr1 == s2_ptr2
+    assert s1_ptr1 != s2_ptr1
+    assert not (s2_ptr2 == s1_ptr2)
+    assert s1_ptr1 == cast_adr_to_ptr(a, SPTR)
+
+    S2 = lltype.Struct('S2', ('y',lltype.Char))
+    S2PTR = lltype.Ptr(S2)
+    py.test.raises(TypeError, cast_adr_to_ptr, a, S2PTR)
+    py.test.raises(ArenaError, cast_adr_to_ptr, a+1, SPTR)
+    py.test.raises(ArenaError, cast_adr_to_ptr, a+ssize, SPTR)
+    py.test.raises(ArenaError, cast_adr_to_ptr, a+2*ssize, SPTR)
+    py.test.raises(ArenaError, cast_adr_to_ptr, a+2*ssize+1, SPTR)
+
+    arena_reset(a, True)
+    s1_ptr1 = cast_adr_to_ptr(a, SPTR)
+    assert s1_ptr1.x == 0
+    s1_ptr1.x = 5
+
+    s2_ptr1 = cast_adr_to_ptr(a + ssize, S2PTR)
+    assert s2_ptr1.y == '\x00'
+    s2_ptr1.y = 'X'
+
+    assert cast_adr_to_ptr(a + 0, SPTR).x == 5
+    assert cast_adr_to_ptr((a + ssize + 1) - 1, S2PTR).y == 'X'



More information about the Pypy-commit mailing list