[pypy-commit] stmgc c7-refactor: Next test passes.

arigo noreply at buildbot.pypy.org
Mon Feb 10 20:30:31 CET 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: c7-refactor
Changeset: r725:ef01288963ea
Date: 2014-02-10 20:30 +0100
http://bitbucket.org/pypy/stmgc/changeset/ef01288963ea/

Log:	Next test passes.

diff --git a/c7/list.c b/c7/list.c
deleted file mode 100644
--- a/c7/list.c
+++ /dev/null
@@ -1,39 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-
-#include "list.h"
-
-
-#define SETSIZE(n)    (sizeof(struct stm_list_s) + ITEMSSIZE(n))
-#define ITEMSSIZE(n)  ((n) * sizeof(object_t*))
-#define OVERCNT(n)    (33 + ((((n) / 2) * 3) | 1))
-
-struct stm_list_s *stm_list_create(void)
-{
-    uintptr_t initial_allocation = 32;
-    struct stm_list_s *lst = malloc(SETSIZE(initial_allocation));
-    if (lst == NULL) {
-        perror("out of memory in stm_list_create");
-        abort();
-    }
-    lst->count = 0;
-    lst->last_allocated = initial_allocation - 1;
-    assert(lst->last_allocated & 1);
-    return lst;
-}
-
-struct stm_list_s *_stm_list_grow(struct stm_list_s *lst, uintptr_t nalloc)
-{
-    assert(lst->last_allocated & 1);
-    nalloc = OVERCNT(nalloc);
-    lst = realloc(lst, SETSIZE(nalloc));
-    if (lst == NULL) {
-        perror("out of memory in _stm_list_grow");
-        abort();
-    }
-    lst->last_allocated = nalloc - 1;
-    assert(lst->last_allocated & 1);
-    return lst;
-}
diff --git a/c7/list.h b/c7/list.h
deleted file mode 100644
--- a/c7/list.h
+++ /dev/null
@@ -1,76 +0,0 @@
-#ifndef _STM_LIST_H
-#define _STM_LIST_H
-
-#include "core.h"
-#include <stdlib.h>
-
-struct stm_list_s {
-    uintptr_t count;
-    union {
-        uintptr_t last_allocated;       /* always odd */
-        //struct stm_list_s *nextlist;    /* always even */
-    };
-    object_t *items[];
-};
-
-struct stm_list_s *stm_list_create(void);
-
-static inline void stm_list_free(struct stm_list_s *lst)
-{
-    free(lst);
-}
-
-
-struct stm_list_s *_stm_list_grow(struct stm_list_s *, uintptr_t);
-
-static inline struct stm_list_s *
-stm_list_append(struct stm_list_s *lst, object_t *item)
-{
-    uintptr_t index = lst->count++;
-    if (UNLIKELY(index > lst->last_allocated))
-        lst = _stm_list_grow(lst, index);
-    lst->items[index] = item;
-    return lst;
-}
-
-#define LIST_APPEND(lst, e) {                   \
-        lst = stm_list_append(lst, e);          \
-    }
-
-static inline void stm_list_clear(struct stm_list_s *lst)
-{
-    lst->count = 0;
-}
-
-static inline bool stm_list_is_empty(struct stm_list_s *lst)
-{
-    return (lst->count == 0);
-}
-
-static inline bool stm_list_count(struct stm_list_s *lst)
-{
-    return lst->count;
-}
-
-static inline object_t *stm_list_pop_item(struct stm_list_s *lst)
-{
-    return lst->items[--lst->count];
-}
-
-static inline object_t *stm_list_item(struct stm_list_s *lst, uintptr_t index)
-{
-    return lst->items[index];
-}
-
-#define STM_LIST_FOREACH(lst, CODE)             \
-    do {                                        \
-        struct stm_list_s *_lst = (lst);        \
-        uintptr_t _i;                           \
-        for (_i = _lst->count; _i--; ) {        \
-            object_t *item = _lst->items[_i];   \
-            CODE;                               \
-        }                                       \
-    } while (0)
-
-
-#endif
diff --git a/c7/nursery.c b/c7/nursery.c
deleted file mode 100644
--- a/c7/nursery.c
+++ /dev/null
@@ -1,355 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <sys/syscall.h>
-#include <asm/prctl.h>
-#include <sys/prctl.h>
-#include <pthread.h>
-
-
-#include "core.h"
-#include "list.h"
-#include "nursery.h"
-#include "pages.h"
-#include "stmsync.h"
-#include "largemalloc.h"
-
-void stm_major_collection(void)
-{
-    assert(_STM_TL->active);
-    abort();
-}
-
-
-bool _stm_is_young(object_t *o)
-{
-    assert((uintptr_t)o >= FIRST_NURSERY_PAGE * 4096);
-    return (uintptr_t)o < FIRST_AFTER_NURSERY_PAGE * 4096;
-}
-
-
-object_t *_stm_allocate_old(size_t size)
-{
-    object_t* o = stm_large_malloc(size);
-    memset(real_address(o), 0, size);
-    o->stm_flags |= GCFLAG_WRITE_BARRIER;
-    return o;
-}
-
-object_t *stm_allocate_prebuilt(size_t size)
-{
-    object_t* res = _stm_allocate_old(size);  /* XXX */
-    return res;
-}
-
-localchar_t *_stm_alloc_next_page(size_t size_class)
-{
-    /* may return uninitialized pages */
-    
-    /* 'alloc->next' points to where the next allocation should go.  The
-       present function is called instead when this next allocation is
-       equal to 'alloc->stop'.  As we know that 'start', 'next' and
-       'stop' are always nearby pointers, we play tricks and only store
-       the lower 16 bits of 'start' and 'stop', so that the three
-       variables plus some flags fit in 16 bytes.
-    */
-    uintptr_t page;
-    localchar_t *result;
-    alloc_for_size_t *alloc = &_STM_TL->alloc[size_class];
-    size_t size = size_class * 8;
-
-    /* reserve a fresh new page (XXX: from the end!) */
-    page = stm_pages_reserve(1);
-
-    assert(memset(real_address((object_t*)(page * 4096)), 0xdd, 4096));
-    
-    result = (localchar_t *)(page * 4096UL);
-    alloc->start = (uintptr_t)result;
-    alloc->stop = alloc->start + (4096 / size) * size;
-    alloc->next = result + size;
-    alloc->flag_partial_page = false;
-    return result;
-}
-
-object_t *stm_big_small_alloc_old(size_t size, bool *is_small)
-{
-    /* may return uninitialized objects */
-    object_t *result;
-    size_t size_class = size / 8;
-    assert(size_class >= 2);
-    
-    if (size_class >= LARGE_OBJECT_WORDS) {
-        result = stm_large_malloc(size);
-        *is_small = 0;
-    } else {
-        *is_small = 1;
-        alloc_for_size_t *alloc = &_STM_TL->alloc[size_class];
-        
-        if ((uint16_t)((uintptr_t)alloc->next) == alloc->stop) {
-            result = (object_t *)_stm_alloc_next_page(size_class);
-        } else {
-            result = (object_t *)alloc->next;
-            alloc->next += size;
-        }
-    }
-    return result;
-}
-
-
-
-void trace_if_young(object_t **pobj)
-{
-    /* takes a normal pointer to a thread-local pointer to an object */
-    if (*pobj == NULL)
-        return;
-    if (!_stm_is_young(*pobj))
-        return;
-
-    /* the location the object moved to is at an 8b offset */
-    localchar_t *temp = ((localchar_t *)(*pobj)) + 8;
-    object_t * TLPREFIX *pforwarded = (object_t* TLPREFIX *)temp;
-    if ((*pobj)->stm_flags & GCFLAG_MOVED) {
-        *pobj = *pforwarded;
-        return;
-    }
-
-    /* move obj to somewhere else */
-    size_t size = stmcb_size(real_address(*pobj));
-    bool is_small;
-    object_t *moved = stm_big_small_alloc_old(size, &is_small);
-
-    memcpy((void*)real_address(moved),
-           (void*)real_address(*pobj),
-           size);
-
-    /* object is not committed yet */
-    moved->stm_flags |= GCFLAG_NOT_COMMITTED;
-    if (is_small)              /* means, not allocated by large-malloc */
-        moved->stm_flags |= GCFLAG_SMALL;
-    assert(size == _stm_data_size((struct object_s*)REAL_ADDRESS(get_thread_base(0), moved)));
-    LIST_APPEND(_STM_TL->uncommitted_objects, moved);
-    
-    (*pobj)->stm_flags |= GCFLAG_MOVED;
-    *pforwarded = moved;
-    *pobj = moved;
-    
-    LIST_APPEND(_STM_TL->old_objects_to_trace, moved);
-}
-
-void minor_collect()
-{
-    /* visit shadowstack & add to old_obj_to_trace */
-    object_t **current = _STM_TL->shadow_stack;
-    object_t **base = _STM_TL->shadow_stack_base;
-    while (current-- != base) {
-        trace_if_young(current);
-    }
-    
-    /* visit old_obj_to_trace until empty */
-    struct stm_list_s *old_objs = _STM_TL->old_objects_to_trace;
-    while (!stm_list_is_empty(old_objs)) {
-        object_t *item = stm_list_pop_item(old_objs);
-
-        assert(!_stm_is_young(item));
-        assert(!(item->stm_flags & GCFLAG_WRITE_BARRIER));
-        
-        /* re-add write-barrier */
-        item->stm_flags |= GCFLAG_WRITE_BARRIER;
-        
-        stmcb_trace(real_address(item), trace_if_young);
-        old_objs = _STM_TL->old_objects_to_trace;
-    }
-
-    /* clear nursery */
-    localchar_t *nursery_base = (localchar_t*)(FIRST_NURSERY_PAGE * 4096);
-    memset((void*)real_address((object_t*)nursery_base), 0x0,
-           NURSERY_CURRENT(_STM_TL) - nursery_base);
-    SET_NURSERY_CURRENT(_STM_TL, nursery_base);
-}
-
-void _stm_minor_collect()
-{
-    minor_collect();
-}
-
-localchar_t *collect_and_reserve(size_t size)
-{
-    localchar_t *new_current = _STM_TL->nursery_current;
-
-    while (((uintptr_t)new_current > FIRST_AFTER_NURSERY_PAGE * 4096)
-           && _STM_TL->nursery_current_halfwords[LENDIAN]) {
-        
-        _STM_TL->nursery_current_halfwords[LENDIAN] = 0;
-        _stm_start_safe_point(0);
-        /* no collect, it would mess with nursery_current */
-        _stm_stop_safe_point(0);
-        
-        new_current = _STM_TL->nursery_current;
-    }
-
-    if (!((uintptr_t)new_current > FIRST_AFTER_NURSERY_PAGE * 4096)) {
-        /* after safe-point, new_current is actually fine again */
-        return new_current - size;
-    }
-    
-    /* reset nursery_current (left invalid by the caller) */
-    SET_NURSERY_CURRENT(_STM_TL, new_current - size);
-
-    minor_collect();
-
-    /* XXX: if we_want_major_collect: acquire EXCLUSIVE & COLLECT lock
-       and do it */
-
-    localchar_t *current = NURSERY_CURRENT(_STM_TL);
-    assert((uintptr_t)current + size <= FIRST_AFTER_NURSERY_PAGE * 4096);
-    SET_NURSERY_CURRENT(_STM_TL, current + size);
-    return current;
-}
-
-
-object_t *stm_allocate(size_t size)
-{
-    object_t *result;
-    
-    assert(_STM_TL->active);
-    assert(size % 8 == 0);
-    assert(16 <= size);
-
-    /* XXX move out of fastpath */
-    if (UNLIKELY(size >= NURSERY_SECTION)) {
-        /* allocate large objects outside the nursery immediately,
-           otherwise they may trigger too many minor collections
-           and degrade performance */
-        bool is_small;
-        result = stm_big_small_alloc_old(size, &is_small);
-
-        memset((void*)real_address(result), 0, size);
-
-        /* object is not committed yet */
-        result->stm_flags |= GCFLAG_NOT_COMMITTED;
-        if (is_small)              /* means, not allocated by large-malloc */
-            result->stm_flags |= GCFLAG_SMALL;
-        assert(size == _stm_data_size((struct object_s*)REAL_ADDRESS(get_thread_base(0), result)));
-
-        LIST_APPEND(_STM_TL->uncommitted_objects, result);
-        LIST_APPEND(_STM_TL->old_objects_to_trace, result);
-        return result;
-    }
-    
-    localchar_t *current = _STM_TL->nursery_current;
-    localchar_t *new_current = current + size;
-    SET_NURSERY_CURRENT(_STM_TL, new_current);
-
-    if ((uintptr_t)new_current > FIRST_AFTER_NURSERY_PAGE * 4096) {
-        current = collect_and_reserve(size);
-    }
-
-    result = (object_t *)current;
-    return result;
-}
-
-
-void push_uncommitted_to_other_threads()
-{
-    /* WE HAVE THE EXCLUSIVE LOCK HERE */
-    
-    struct stm_list_s *uncommitted = _STM_TL->uncommitted_objects;
-    char *local_base = _STM_TL->thread_base;
-    char *remote_base = get_thread_base(1 - _STM_TL->thread_num);
-    
-    STM_LIST_FOREACH(
-        uncommitted,
-        ({
-            /* write-lock always cleared for these objects */
-            uintptr_t lock_idx;
-            assert(lock_idx = (((uintptr_t)item) >> 4) - READMARKER_START);
-            assert(!write_locks[lock_idx]);
-
-            /* remove the flag (they are now committed) */
-            item->stm_flags &= ~GCFLAG_NOT_COMMITTED;
-
-            _stm_move_object(item,
-                REAL_ADDRESS(local_base, item),
-                REAL_ADDRESS(remote_base, item));
-        }));
-}
-
-void nursery_on_start()
-{
-    assert(stm_list_is_empty(_STM_TL->old_objects_to_trace));
-
-    _STM_TL->old_shadow_stack = _STM_TL->shadow_stack;
-}
-
-void nursery_on_commit()
-{
-    /* DON'T do a minor_collect. This is already done in
-       the caller (optimization) */
-    /* minor_collect(); */
-    
-    /* uncommitted objects */
-    push_uncommitted_to_other_threads();
-    stm_list_clear(_STM_TL->uncommitted_objects);
-
-    /* for small alloc classes, set the partial flag */
-    long j;
-    for (j = 2; j < LARGE_OBJECT_WORDS; j++) {
-        alloc_for_size_t *alloc = &_STM_TL->alloc[j];
-        uint16_t start = alloc->start;
-        uint16_t cur = (uintptr_t)alloc->next;
-        
-        if (start == cur)
-            continue;           /* page full -> will be replaced automatically */
-        
-        alloc->start = cur;     /* next transaction has different 'start' to
-                                   reset in case of an abort */
-        alloc->flag_partial_page = 1;
-    }
-}
-
-void nursery_on_abort()
-{
-    /* reset shadowstack */
-    _STM_TL->shadow_stack = _STM_TL->old_shadow_stack;
-
-    /* clear old_objects_to_trace (they will have the WRITE_BARRIER flag
-       set because the ones we care about are also in modified_objects) */
-    stm_list_clear(_STM_TL->old_objects_to_trace);
-
-    /* clear the nursery */
-    localchar_t *nursery_base = (localchar_t*)(FIRST_NURSERY_PAGE * 4096);
-    memset((void*)real_address((object_t*)nursery_base), 0x0,
-           NURSERY_CURRENT(_STM_TL) - nursery_base);
-    SET_NURSERY_CURRENT(_STM_TL, nursery_base);
-
-
-    /* reset the alloc-pages to the state at the start of the transaction */
-    long j;
-    for (j = 2; j < LARGE_OBJECT_WORDS; j++) {
-        alloc_for_size_t *alloc = &_STM_TL->alloc[j];
-        uint16_t num_allocated = ((uintptr_t)alloc->next) - alloc->start;
-        
-        if (num_allocated) {
-            /* forget about all non-committed objects */
-            alloc->next -= num_allocated;
-        }
-    }
-    
-    /* free uncommitted objects */
-    struct stm_list_s *uncommitted = _STM_TL->uncommitted_objects;
-    
-    STM_LIST_FOREACH(
-        uncommitted,
-        ({
-            if (!(item->stm_flags & GCFLAG_SMALL))
-                stm_large_free(item);
-        }));
-    
-    stm_list_clear(uncommitted);
-}
-
-
-
diff --git a/c7/nursery.h b/c7/nursery.h
deleted file mode 100644
--- a/c7/nursery.h
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
-
-object_t *stm_allocate_prebuilt(size_t size);
-object_t *_stm_allocate_old(size_t size);
-object_t *stm_allocate(size_t size);
-
-void _stm_minor_collect();
-bool _stm_is_young(object_t *o);
-
-void nursery_on_abort();
-void nursery_on_commit();
-void nursery_on_start();
-
-
-
-extern uintptr_t index_page_never_used;
-    
-
diff --git a/c7/pagecopy.c b/c7/pagecopy.c
deleted file mode 100644
--- a/c7/pagecopy.c
+++ /dev/null
@@ -1,57 +0,0 @@
-
-void pagecopy(void *dest, const void *src)
-{
-    unsigned long i;
-    for (i=0; i<4096/128; i++) {
-        asm volatile("movdqa (%0), %%xmm0\n"
-                     "movdqa 16(%0), %%xmm1\n"
-                     "movdqa 32(%0), %%xmm2\n"
-                     "movdqa 48(%0), %%xmm3\n"
-                     "movdqa %%xmm0, (%1)\n"
-                     "movdqa %%xmm1, 16(%1)\n"
-                     "movdqa %%xmm2, 32(%1)\n"
-                     "movdqa %%xmm3, 48(%1)\n"
-                     "movdqa 64(%0), %%xmm0\n"
-                     "movdqa 80(%0), %%xmm1\n"
-                     "movdqa 96(%0), %%xmm2\n"
-                     "movdqa 112(%0), %%xmm3\n"
-                     "movdqa %%xmm0, 64(%1)\n"
-                     "movdqa %%xmm1, 80(%1)\n"
-                     "movdqa %%xmm2, 96(%1)\n"
-                     "movdqa %%xmm3, 112(%1)\n"
-                     :
-                     : "r"(src + 128*i), "r"(dest + 128*i)
-                     : "xmm0", "xmm1", "xmm2", "xmm3", "memory");
-    }
-}
-
-#if 0   /* XXX enable if detected on the cpu */
-void pagecopy_ymm8(void *dest, const void *src)
-{
-    asm volatile("0:\n"
-                 "vmovdqa (%0), %%ymm0\n"
-                 "vmovdqa 32(%0), %%ymm1\n"
-                 "vmovdqa 64(%0), %%ymm2\n"
-                 "vmovdqa 96(%0), %%ymm3\n"
-                 "vmovdqa 128(%0), %%ymm4\n"
-                 "vmovdqa 160(%0), %%ymm5\n"
-                 "vmovdqa 192(%0), %%ymm6\n"
-                 "vmovdqa 224(%0), %%ymm7\n"
-                 "addq $256, %0\n"
-                 "vmovdqa %%ymm0, (%1)\n"
-                 "vmovdqa %%ymm1, 32(%1)\n"
-                 "vmovdqa %%ymm2, 64(%1)\n"
-                 "vmovdqa %%ymm3, 96(%1)\n"
-                 "vmovdqa %%ymm4, 128(%1)\n"
-                 "vmovdqa %%ymm5, 160(%1)\n"
-                 "vmovdqa %%ymm6, 192(%1)\n"
-                 "vmovdqa %%ymm7, 224(%1)\n"
-                 "addq $256, %1\n"
-                 "cmpq %2, %0\n"
-                 "jne 0b"
-                 : "=r"(src), "=r"(dest)
-                 : "r"((char *)src + 4096), "0"(src), "1"(dest)
-                 : "xmm0", "xmm1", "xmm2", "xmm3",
-                   "xmm4", "xmm5", "xmm6", "xmm7");
-}
-#endif
diff --git a/c7/pagecopy.h b/c7/pagecopy.h
deleted file mode 100644
--- a/c7/pagecopy.h
+++ /dev/null
@@ -1,2 +0,0 @@
-
-void pagecopy(void *dest, const void *src);
diff --git a/c7/reader_writer_lock.c b/c7/reader_writer_lock.c
deleted file mode 100644
--- a/c7/reader_writer_lock.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/* Taken from: http://locklessinc.com/articles/locks/
-   
-   Sticking to semi-portable C code, we can still do a little better.
-   There exists a form of the ticket lock that is designed for read-write
-   locks. An example written in assembly was posted to the Linux kernel
-   mailing list in 2002 by David Howells from RedHat. This was a highly
-   optimized version of a read-write ticket lock developed at IBM in the
-   early 90's by Joseph Seigh. Note that a similar (but not identical)
-   algorithm was published by John Mellor-Crummey and Michael Scott in
-   their landmark paper "Scalable Reader-Writer Synchronization for
-   Shared-Memory Multiprocessors". Converting the algorithm from
-   assembly language to C yields:
-*/
-#include <assert.h>
-#include "reader_writer_lock.h"
-
-
-#define EBUSY 1
-#define atomic_xadd(P, V) __sync_fetch_and_add((P), (V))
-#define cmpxchg(P, O, N) __sync_val_compare_and_swap((P), (O), (N))
-#define atomic_inc(P) __sync_add_and_fetch((P), 1)
-#define atomic_dec(P) __sync_add_and_fetch((P), -1) 
-#define atomic_add(P, V) __sync_add_and_fetch((P), (V))
-#define atomic_set_bit(P, V) __sync_or_and_fetch((P), 1<<(V))
-#define atomic_clear_bit(P, V) __sync_and_and_fetch((P), ~(1<<(V)))
-/* Compile read-write barrier */
-#define barrier() asm volatile("": : :"memory")
-
-/* Pause instruction to prevent excess processor bus usage */ 
-#define cpu_relax() asm volatile("pause\n": : :"memory")
-
-
-
-void rwticket_wrlock(rwticket *l)
-{
-	unsigned me = atomic_xadd(&l->u, (1<<16));
-	unsigned char val = me >> 16;
-	
-	while (val != l->s.write) cpu_relax();
-}
-
-int rwticket_wrunlock(rwticket *l)
-{
-	rwticket t = *l;
-	
-	barrier();
-
-	t.s.write++;
-	t.s.read++;
-	
-	*(unsigned short *) l = t.us;
-    return 0;
-}
-
-int rwticket_wrtrylock(rwticket *l)
-{
-    unsigned cmp = l->u;
-    
-	unsigned me = cmp & 0xff;//l->s.users;
-	unsigned char menew = me + 1;
-    //	unsigned read = (cmp & 0xffff) >> 8;//l->s.read << 8;
-	//unsigned cmp = (me << 16) + read + me;
-	unsigned cmpnew = (menew << 16) | (cmp & 0x0000ffff); //(menew << 16) + read + me;
-
-	if (cmpxchg(&l->u, cmp, cmpnew) == cmp) return 0;
-	
-	return EBUSY;
-}
-
-void rwticket_rdlock(rwticket *l)
-{
-	unsigned me = atomic_xadd(&l->u, (1<<16));
-	unsigned char val = me >> 16;
-	
-	while (val != l->s.read) cpu_relax();
-	l->s.read++;
-}
-
-void rwticket_rdunlock(rwticket *l)
-{
-	atomic_inc(&l->s.write);
-}
-
-int rwticket_rdtrylock(rwticket *l)
-{
-    assert(0);
-    /* XXX implement like wrtrylock */
-	unsigned me = l->s.users;
-	unsigned write = l->s.write;
-	unsigned char menew = me + 1;
-	unsigned cmp = (me << 16) + (me << 8) + write;
-	unsigned cmpnew = ((unsigned) menew << 16) + (menew << 8) + write;
-
-	if (cmpxchg(&l->u, cmp, cmpnew) == cmp) return 0;
-	
-	return EBUSY;
-}
diff --git a/c7/reader_writer_lock.h b/c7/reader_writer_lock.h
deleted file mode 100644
--- a/c7/reader_writer_lock.h
+++ /dev/null
@@ -1,22 +0,0 @@
-
-typedef union rwticket rwticket;
-union rwticket
-{
-	unsigned u;
-	unsigned short us;
-	struct
-	{
-		unsigned char write;
-		unsigned char read;
-		unsigned char users;
-	} s;
-};
-
-void rwticket_wrlock(rwticket *l);
-int rwticket_wrunlock(rwticket *l);
-int rwticket_wrtrylock(rwticket *l);
-void rwticket_rdlock(rwticket *l);
-void rwticket_rdunlock(rwticket *l);
-int rwticket_rdtrylock(rwticket *l);
-
-
diff --git a/c7/stm/gcpage.c b/c7/stm/gcpage.c
--- a/c7/stm/gcpage.c
+++ b/c7/stm/gcpage.c
@@ -3,7 +3,23 @@
 #endif
 
 
+static void setup_gcpage(void)
+{
+    largemalloc_init_arena(stm_object_pages + END_NURSERY_PAGE * 4096UL,
+                           (NB_PAGES - END_NURSERY_PAGE) * 4096UL);
+}
+
 object_t *stm_allocate_prebuilt(ssize_t size_rounded_up)
 {
     abort();
 }
+
+object_t *_stm_allocate_old(ssize_t size_rounded_up)
+{
+    char *addr = large_malloc(size_rounded_up);
+    memset(addr, 0, size_rounded_up);
+
+    object_t* o = (object_t *)(addr - stm_object_pages);
+    o->stm_flags = GCFLAG_WRITE_BARRIER;
+    return o;
+}
diff --git a/c7/largemalloc.c b/c7/stm/largemalloc.c
rename from c7/largemalloc.c
rename to c7/stm/largemalloc.c
--- a/c7/largemalloc.c
+++ b/c7/stm/largemalloc.c
@@ -1,3 +1,7 @@
+#ifndef _STM_CORE_H_
+# error "must be compiled via stmgc.c"
+#endif
+
 /* This contains a lot of inspiration from malloc() in the GNU C Library.
    More precisely, this is (a subset of) the part that handles large
    blocks, which in our case means at least 288 bytes.  It is actually
@@ -5,16 +9,6 @@
    or medium-block support that are also present in the GNU C Library.
 */
 
-#include <string.h>
-#include <stdio.h>
-#include <stddef.h>
-#include <assert.h>
-#include "largemalloc.h"
-#include "pages.h"
-#include "pagecopy.h"
-
-#define MMAP_LIMIT    (1280*1024)
-
 #define largebin_index(sz)                                      \
     (((sz) < (48 <<  6)) ?      ((sz) >>  6):  /*  0 - 47 */    \
      ((sz) < (24 <<  9)) ? 42 + ((sz) >>  9):  /* 48 - 65 */    \
@@ -82,7 +76,7 @@
    list are some unsorted chunks.  All unsorted chunks are after all
    sorted chunks.  The flag 'FLAG_SORTED' distinguishes them.
 
-   Note that if the user always calls stm_large_malloc() with a large
+   Note that if the user always calls large_malloc() with a large
    enough argument, then the few bins corresponding to smaller values
    will never be sorted at all.  They are still populated with the
    fragments of space between bigger allocations.
@@ -90,62 +84,7 @@
 
 static dlist_t largebins[N_BINS];
 static mchunk_t *first_chunk, *last_chunk;
-uint8_t alloc_lock = 0;
 
-void _stm_chunk_pages(struct object_s *data, uintptr_t *start, uintptr_t *num)
-{
-    /* expects object_s in thread0-space */
-    /* returns the start page and number of pages that the *payload*
-       spans over. the CHUNK_HEADER is not included in the calculations */
-    mchunk_t *chunk = data2chunk((char*)data);
-    *start = (((char*)data) - get_thread_base(0)) / 4096UL;
-    assert(*start < NB_PAGES);
-    size_t offset_into_page = ((uintptr_t)data) & 4095UL; // % 4096
-    *num = ((chunk->size & ~FLAG_SORTED) + offset_into_page + 4095) / 4096UL;
-    assert(*num < NB_PAGES);
-}
-
-size_t _stm_data_size(struct object_s *data)
-{
-    if (data->stm_flags & GCFLAG_SMALL)
-        return stmcb_size(data); /* XXX: inefficient */
-    
-    mchunk_t *chunk = data2chunk((char*)data);
-    return chunk->size & ~FLAG_SORTED;
-}
-
-void _stm_move_object(object_t* obj, char *src, char *dst)
-{
-    /* XXX: should be thread-safe... */
-    
-    /* only copies if page is PRIVATE
-       XXX: various optimizations for objects with
-       multiple pages. E.g. using pagecopy or
-       memcpy over multiple PRIVATE pages. */
-    char *end = src + _stm_data_size((struct object_s*)REAL_ADDRESS(get_thread_base(0), obj));
-    uintptr_t pagenum, num;
-    struct object_s *t0_obj = (struct object_s*)REAL_ADDRESS(get_thread_base(0), obj);
-
-    if (obj->stm_flags & GCFLAG_SMALL) {
-        pagenum = (uintptr_t)obj / 4096UL;
-        num = 1;
-    } else { 
-        _stm_chunk_pages(t0_obj, &pagenum, &num);
-    }
-
-    while (src < end) {
-        size_t to_copy = 4096UL - ((uintptr_t)src & 4095UL);
-        if (to_copy > end - src)
-            to_copy = end - src;
-        if (stm_get_page_flag(pagenum) == PRIVATE_PAGE) {
-            memcpy(dst, src, to_copy);
-        }
-        
-        pagenum++;
-        src += to_copy;
-        dst += to_copy;
-    }
-}
 
 static void insert_unsorted(mchunk_t *new)
 {
@@ -231,11 +170,8 @@
         really_sort_bin(index);
 }
 
-object_t *stm_large_malloc(size_t request_size)
+static char *large_malloc(size_t request_size)
 {
-    while (__sync_lock_test_and_set(&alloc_lock, 1))
-        spin_loop();
-    
     /* 'request_size' should already be a multiple of the word size here */
     assert((request_size & (sizeof(char *)-1)) == 0);
 
@@ -272,7 +208,7 @@
     }
 
     /* not enough memory. */
-    alloc_lock = 0;
+    fprintf(stderr, "not enough memory!\n");
     abort();
     return NULL;
 
@@ -303,19 +239,12 @@
     }
     mscan->size = request_size;
     mscan->prev_size = BOTH_CHUNKS_USED;
-    
-    alloc_lock = 0;
-    return (object_t *)(((char *)&mscan->d) - get_thread_base(0));
+
+    return (char *)&mscan->d;
 }
 
-void stm_large_free(object_t *tldata)
+static void large_free(char *data)
 {
-    assert(!(tldata->stm_flags & GCFLAG_SMALL));
-    
-    while (__sync_lock_test_and_set(&alloc_lock, 1))
-        spin_loop();
-    
-    char *data = _stm_real_address(tldata);
     mchunk_t *chunk = data2chunk(data);
     assert((chunk->size & (sizeof(char *) - 1)) == 0);
     assert(chunk->prev_size != THIS_CHUNK_FREE);
@@ -373,8 +302,6 @@
     }
 
     insert_unsorted(chunk);
-
-    alloc_lock = 0;
 }
 
 
@@ -414,16 +341,13 @@
     assert(data - 16 == (char *)last_chunk);
 }
 
-char *_stm_largemalloc_data_start()
-{
-    return (char*)first_chunk;
-}
-
-void stm_largemalloc_init(char *data_start, size_t data_size)
+static void largemalloc_init_arena(char *data_start, size_t data_size)
 {
     int i;
-    for (i = 0; i < N_BINS; i++)
-        largebins[i].prev = largebins[i].next = &largebins[i];
+    for (i = 0; i < N_BINS; i++) {
+        largebins[i].prev = &largebins[i];
+        largebins[i].next = &largebins[i];
+    }
 
     assert(data_size >= 2 * sizeof(struct malloc_chunk));
     assert((data_size & 31) == 0);
@@ -438,10 +362,8 @@
     insert_unsorted(first_chunk);
 }
 
-int stm_largemalloc_resize_arena(size_t new_size)
+static int largemalloc_resize_arena(size_t new_size)
 {
-    /* XXX not thread-safe regarding all functions here... */
-    
     assert(new_size >= 2 * sizeof(struct malloc_chunk));
     assert((new_size & 31) == 0);
 
@@ -490,7 +412,7 @@
         assert(last_chunk == next_chunk_u(old_last_chunk));
 
         /* then free the last_chunk (turn it from "used" to "free) */
-        stm_large_free((object_t *)(((char *)&old_last_chunk->d) - get_thread_base(0)));
+        large_free((char *)&old_last_chunk->d);
     }
     return 1;
 }
diff --git a/c7/largemalloc.h b/c7/stm/largemalloc.h
rename from c7/largemalloc.h
rename to c7/stm/largemalloc.h
--- a/c7/largemalloc.h
+++ b/c7/stm/largemalloc.h
@@ -1,17 +1,11 @@
-#include <stdlib.h>
-#include "core.h"
 
-void stm_largemalloc_init(char *data_start, size_t data_size);
-int stm_largemalloc_resize_arena(size_t new_size);
+/* all addresses passed to this interface should be "char *" pointers
+   in the segment 0. */
+static void largemalloc_init_arena(char *data_start, size_t data_size);
+static int largemalloc_resize_arena(size_t new_size) __attribute__((unused));
 
-object_t *stm_large_malloc(size_t request_size);
-void stm_large_free(object_t *data);
-
-void _stm_large_dump(void);
-char *_stm_largemalloc_data_start(void);
-
-void _stm_move_object(object_t *obj, char *src, char *dst);
-size_t _stm_data_size(struct object_s *data);
-void _stm_chunk_pages(struct object_s *data, uintptr_t *start, uintptr_t *num);
-                
-
+/* large_malloc() and large_free() are not thread-safe.  This is
+   due to the fact that they should be mostly called during minor or
+   major collections, which have their own synchronization mecanisms. */
+static char *large_malloc(size_t request_size);
+static void large_free(char *data) __attribute__((unused));
diff --git a/c7/stm/nursery.c b/c7/stm/nursery.c
--- a/c7/stm/nursery.c
+++ b/c7/stm/nursery.c
@@ -42,8 +42,8 @@
 
 bool _stm_in_nursery(object_t *obj)
 {
-    uint64_t p = (uint64_t)obj;
-    return (p - NURSERY_START) < NURSERY_SIZE;
+    assert((uintptr_t)obj >= NURSERY_START);
+    return (uintptr_t)obj < NURSERY_START + NURSERY_SIZE;
 }
 
 
diff --git a/c7/stm/setup.c b/c7/stm/setup.c
--- a/c7/stm/setup.c
+++ b/c7/stm/setup.c
@@ -61,6 +61,7 @@
 
     setup_sync();
     setup_nursery();
+    setup_gcpage();
 
 #if 0
     stm_largemalloc_init(heap, HEAP_PAGES * 4096UL);
diff --git a/c7/stmgc.c b/c7/stmgc.c
--- a/c7/stmgc.c
+++ b/c7/stmgc.c
@@ -3,10 +3,12 @@
 #include "stm/core.h"
 #include "stm/pages.h"
 #include "stm/sync.h"
+#include "stm/largemalloc.h"
 
 #include "stm/misc.c"
 #include "stm/pages.c"
 #include "stm/gcpage.c"
+#include "stm/largemalloc.c"
 #include "stm/nursery.c"
 #include "stm/sync.c"
 #include "stm/setup.c"
diff --git a/c7/stmgc.h b/c7/stmgc.h
--- a/c7/stmgc.h
+++ b/c7/stmgc.h
@@ -73,6 +73,8 @@
 char *_stm_real_address(object_t *o);
 object_t *_stm_segment_address(char *ptr);
 void _stm_test_switch(stm_thread_local_t *tl);
+object_t *_stm_allocate_old(ssize_t size_rounded_up);
+void _stm_large_dump(void);
 #endif
 
 #define _STM_GCFLAG_WRITE_BARRIER  0x01
diff --git a/c7/stmsync.c b/c7/stmsync.c
deleted file mode 100644
--- a/c7/stmsync.c
+++ /dev/null
@@ -1,306 +0,0 @@
-#include <assert.h>
-#include <string.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <sys/syscall.h>
-#include <sys/prctl.h>
-#include <asm/prctl.h>
-#include <semaphore.h>
-
-#include "stmsync.h"
-#include "core.h"
-#include "reader_writer_lock.h"
-#include "list.h"
-
-#define INVALID_GS_VALUE  0x6D6D6D6D
-
-/* a multi-reader, single-writer lock: transactions normally take a reader
-   lock, so don't conflict with each other; when we need to do a global GC,
-   we take a writer lock to "stop the world". */
-
-rwticket rw_shared_lock __attribute__((aligned(64)));        /* the "GIL" */
-rwticket rw_collection_lock __attribute__((aligned(64)));    /* for major collections */
-
-sem_t static_thread_semaphore __attribute__((aligned(64)));
-uint8_t static_threads[NB_THREADS]; /* 1 if running a pthread */
-__thread struct _thread_local1_s *pthread_tl = NULL;
-
-
-
-
-void _stm_acquire_tl_segment();
-void _stm_release_tl_segment();
-
-static void set_gs_register(uint64_t value)
-{
-    int result = syscall(SYS_arch_prctl, ARCH_SET_GS, value);
-    assert(result == 0);
-}
-
-bool _stm_is_in_transaction(void)
-{
-    return pthread_tl->active;
-}
-
-
-char* _stm_restore_local_state(int thread_num)
-{
-    if (thread_num == -1) {     /* mostly for debugging */
-        set_gs_register(INVALID_GS_VALUE);
-        return (char*)1;
-    }
-    
-    char *thread_base = get_thread_base(thread_num);
-    set_gs_register((uintptr_t)thread_base);
-
-    assert(_STM_TL->thread_num == thread_num);
-    assert(_STM_TL->thread_base == thread_base);
-    return thread_base;
-}
-
-
-void _stm_yield_thread_segment()
-{
-    _stm_release_tl_segment();
-    
-    /* release our static thread: */
-    static_threads[_STM_TL->thread_num] = 0;
-    sem_post(&static_thread_semaphore);
-    
-    assert(_stm_restore_local_state(-1)); /* invalid */
-}
-
-void _stm_grab_thread_segment()
-{
-    /* acquire a static thread: */
-    sem_wait(&static_thread_semaphore);
-    int thread_num = 0;
-    while (1) {
-        if (!__sync_lock_test_and_set(&static_threads[thread_num], 1))
-            break;
-        thread_num = (thread_num + 1) % NB_THREADS;
-    }
-    
-    _stm_restore_local_state(thread_num);
-    _stm_acquire_tl_segment();
-}
-
-
-void _stm_assert_clean_tl()
-{
-    /* between a pthread switch, these are the things
-       that must be guaranteed */
-    
-    /* already set are
-       thread_num, thread_base: to the current static thread
-       nursery_current: nursery should be cleared
-       active, need_abort: no transaction running
-       modified_objects: empty
-       alloc: re-usable by this thread
-       uncommitted_objects: empty
-       old_objects_to_trace: empty
-       !!shadow_stack...: still belongs to previous thread
-    */
-    assert(stm_list_is_empty(_STM_TL->modified_objects));
-    assert(stm_list_is_empty(_STM_TL->uncommitted_objects));
-    assert(stm_list_is_empty(_STM_TL->old_objects_to_trace));
-
-    assert(!_STM_TL->active);
-    /* assert(!_STM_TL->need_abort); may happen, but will be cleared by
-       start_transaction() */ 
-    assert(NURSERY_CURRENT(_STM_TL) == (localchar_t*)(FIRST_NURSERY_PAGE * 4096));
-}
-
-void _stm_acquire_tl_segment()
-{
-    /* makes tl-segment ours! */
-    _stm_assert_clean_tl();
-
-    _STM_TL->shadow_stack = pthread_tl->shadow_stack;
-    _STM_TL->shadow_stack_base = pthread_tl->shadow_stack_base;
-    _STM_TL->old_shadow_stack = pthread_tl->old_shadow_stack;
-}
-
-void _stm_release_tl_segment()
-{
-    /* makes tl-segment ours! */
-    _stm_assert_clean_tl();
-
-    pthread_tl->shadow_stack = _STM_TL->shadow_stack;
-    pthread_tl->shadow_stack_base = _STM_TL->shadow_stack_base;
-    pthread_tl->old_shadow_stack = _STM_TL->old_shadow_stack;
-}
-
-void stm_setup_pthread(void)
-{
-    struct _thread_local1_s* tl = malloc(sizeof(struct _thread_local1_s));
-    assert(!pthread_tl);
-    pthread_tl = tl;
-    
-    /* get us a clean thread segment */
-    _stm_grab_thread_segment();
-    _stm_assert_clean_tl();
-    
-    /* allocate shadow stack for this thread */
-    _STM_TL->shadow_stack = (object_t**)malloc(LENGTH_SHADOW_STACK * sizeof(void*));
-    _STM_TL->shadow_stack_base = _STM_TL->shadow_stack;
-
-    /* copy everything from _STM_TL */
-    memcpy(tl, REAL_ADDRESS(get_thread_base(_STM_TL->thread_num), _STM_TL),
-           sizeof(struct _thread_local1_s));
-
-    /* go into safe-point again: */
-    _stm_yield_thread_segment();
-}
-
-
-void stm_teardown_pthread(void)
-{
-    free(pthread_tl->shadow_stack_base);
-    
-    free(pthread_tl);
-    pthread_tl = NULL;
-}
-
-
-
-
-
-void _stm_reset_shared_lock()
-{
-    assert(!rwticket_wrtrylock(&rw_shared_lock));
-    assert(!rwticket_wrunlock(&rw_shared_lock));
-
-    memset(&rw_shared_lock, 0, sizeof(rwticket));
-
-    assert(!rwticket_wrtrylock(&rw_collection_lock));
-    assert(!rwticket_wrunlock(&rw_collection_lock));
-
-    memset(&rw_collection_lock, 0, sizeof(rwticket));
-
-    int i;
-    for (i = 0; i < NB_THREADS; i++)
-        assert(static_threads[i] == 0);
-    memset(static_threads, 0, sizeof(static_threads));
-    sem_init(&static_thread_semaphore, 0, NB_THREADS);
-}
-
-/* void stm_acquire_collection_lock() */
-/* { */
-/*     /\* we must have the exclusive lock here and */
-/*        not the colletion lock!! *\/ */
-/*     /\* XXX: for more than 2 threads, need a way */
-/*        to signal other threads with need_major_collect */
-/*        so that they don't leave COLLECT-safe-points */
-/*        when this flag is set. Otherwise we simply */
-/*        wait arbitrarily long until all threads reach */
-/*        COLLECT-safe-points by chance at the same time. *\/ */
-/*     while (1) { */
-/*         if (!rwticket_wrtrylock(&rw_collection_lock)) */
-/*             break;              /\* acquired! *\/ */
-        
-/*         stm_stop_exclusive_lock(); */
-/*         usleep(1); */
-/*         stm_start_exclusive_lock(); */
-/*         if (_STM_TL->need_abort) { */
-/*             stm_stop_exclusive_lock(); */
-/*             stm_start_shared_lock(); */
-/*             stm_abort_transaction(); */
-/*         } */
-/*     } */
-/* } */
-
-void stm_start_shared_lock(void)
-{
-    rwticket_rdlock(&rw_shared_lock); 
-}
-
-void stm_stop_shared_lock()
-{
-    rwticket_rdunlock(&rw_shared_lock); 
-}
-
-void stm_start_exclusive_lock(void)
-{
-    rwticket_wrlock(&rw_shared_lock);
-}
-
-void stm_stop_exclusive_lock(void)
-{
-    rwticket_wrunlock(&rw_shared_lock);
-}
-
-/* _stm_start_safe_point(LOCK_EXCLUSIVE|LOCK_COLLECT)
-   -> release the exclusive lock and also the collect-read-lock
-
-   THREAD_YIELD: gives up its (current thread's) GS segment
-   so that other threads can grab it and run. This will
-   make _STM_TL and all thread-local addresses unusable
-   for the current thread. (requires LOCK_COLLECT)
-*/
-void _stm_start_safe_point(uint8_t flags)
-{
-    assert(IMPLY(flags & THREAD_YIELD, flags & LOCK_COLLECT));
-    
-    if (flags & LOCK_EXCLUSIVE)
-        stm_stop_exclusive_lock();
-    else
-        stm_stop_shared_lock();
-    
-    if (flags & LOCK_COLLECT) {
-        rwticket_rdunlock(&rw_collection_lock);
-        
-        if (flags & THREAD_YIELD) {
-            _stm_yield_thread_segment();
-        }
-    }
-}
-
-/*
-  _stm_stop_safe_point(LOCK_COLLECT|LOCK_EXCLUSIVE);
-  -> reacquire the collect-read-lock and the exclusive lock
-
-  THREAD_YIELD: wait until we get a GS segment assigned
-  and then continue (requires LOCK_COLLECT)
- */
-void _stm_stop_safe_point(uint8_t flags)
-{
-    assert(IMPLY(flags & THREAD_YIELD, flags & LOCK_COLLECT));
-    if (flags & THREAD_YIELD) {
-        _stm_grab_thread_segment();
-    }
-    
-    if (flags & LOCK_EXCLUSIVE) {
-        stm_request_safe_point(1 - _STM_TL->thread_num);
-        stm_start_exclusive_lock();
-    } else {
-        stm_start_shared_lock();
-    }
-    
-    if (flags & LOCK_COLLECT) { /* if we released the collection lock */
-        /* acquire read-collection. always succeeds because
-           if there was a write-collection holder we would
-           also not have gotten the shared_lock */
-        rwticket_rdlock(&rw_collection_lock);
-    }
-    
-    if (_STM_TL->active && _STM_TL->need_abort) {
-        if (flags & LOCK_EXCLUSIVE) {
-            /* restore to shared-mode with the collection lock */
-            stm_stop_exclusive_lock();
-            stm_start_shared_lock();
-            stm_abort_transaction();
-        } else {
-            stm_abort_transaction();
-        }
-    }
-}
-
-
-
-void stm_request_safe_point(int thread_num)
-{
-    struct _thread_local1_s* other_tl = _stm_dbg_get_tl(thread_num);
-    other_tl->nursery_current_halfwords[LENDIAN] = 1;    
-}
-
diff --git a/c7/stmsync.h b/c7/stmsync.h
deleted file mode 100644
--- a/c7/stmsync.h
+++ /dev/null
@@ -1,30 +0,0 @@
-
-#include <stdint.h>
-
-void stm_start_shared_lock(void);
-void stm_stop_shared_lock(void);
-void stm_stop_exclusive_lock(void);
-void stm_start_exclusive_lock(void);
-void _stm_start_safe_point(uint8_t flags);
-void _stm_stop_safe_point(uint8_t flags);
-void _stm_reset_shared_lock(void);
-void _stm_grab_thread_segment(void);
-void _stm_yield_thread_segment(void);
-
-enum {
-    LOCK_COLLECT = (1 << 0),
-    LOCK_EXCLUSIVE = (1 << 1),
-    THREAD_YIELD = (1 << 2),
-};
-
-
-void stm_request_safe_point(int thread_num);
-
-
-#define NURSERY_CURRENT(tls)                                            \
-            ((localchar_t *)(uintptr_t)(                                \
-                (tls)->nursery_current_halfwords[1-LENDIAN]))
-
-#define SET_NURSERY_CURRENT(tls, new_value)                             \
-            ((tls)->nursery_current_halfwords[1-LENDIAN] =              \
-                (uintptr_t)(new_value))
diff --git a/c7/test/support.py b/c7/test/support.py
--- a/c7/test/support.py
+++ b/c7/test/support.py
@@ -42,6 +42,7 @@
 /*void stm_write(object_t *obj); use _checked_stm_write() instead */
 object_t *stm_allocate(ssize_t size_rounded_up);
 object_t *stm_allocate_prebuilt(ssize_t size_rounded_up);
+object_t *_stm_allocate_old(ssize_t size_rounded_up);
 
 void stm_setup(void);
 void stm_teardown(void);


More information about the pypy-commit mailing list