[pypy-commit] stmgc c7-refactor: Next test passes.
arigo
noreply at buildbot.pypy.org
Mon Feb 10 20:30:31 CET 2014
Author: Armin Rigo <arigo at tunes.org>
Branch: c7-refactor
Changeset: r725:ef01288963ea
Date: 2014-02-10 20:30 +0100
http://bitbucket.org/pypy/stmgc/changeset/ef01288963ea/
Log: Next test passes.
diff --git a/c7/list.c b/c7/list.c
deleted file mode 100644
--- a/c7/list.c
+++ /dev/null
@@ -1,39 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-
-#include "list.h"
-
-
-#define SETSIZE(n) (sizeof(struct stm_list_s) + ITEMSSIZE(n))
-#define ITEMSSIZE(n) ((n) * sizeof(object_t*))
-#define OVERCNT(n) (33 + ((((n) / 2) * 3) | 1))
-
-struct stm_list_s *stm_list_create(void)
-{
- uintptr_t initial_allocation = 32;
- struct stm_list_s *lst = malloc(SETSIZE(initial_allocation));
- if (lst == NULL) {
- perror("out of memory in stm_list_create");
- abort();
- }
- lst->count = 0;
- lst->last_allocated = initial_allocation - 1;
- assert(lst->last_allocated & 1);
- return lst;
-}
-
-struct stm_list_s *_stm_list_grow(struct stm_list_s *lst, uintptr_t nalloc)
-{
- assert(lst->last_allocated & 1);
- nalloc = OVERCNT(nalloc);
- lst = realloc(lst, SETSIZE(nalloc));
- if (lst == NULL) {
- perror("out of memory in _stm_list_grow");
- abort();
- }
- lst->last_allocated = nalloc - 1;
- assert(lst->last_allocated & 1);
- return lst;
-}
diff --git a/c7/list.h b/c7/list.h
deleted file mode 100644
--- a/c7/list.h
+++ /dev/null
@@ -1,76 +0,0 @@
-#ifndef _STM_LIST_H
-#define _STM_LIST_H
-
-#include "core.h"
-#include <stdlib.h>
-
-struct stm_list_s {
- uintptr_t count;
- union {
- uintptr_t last_allocated; /* always odd */
- //struct stm_list_s *nextlist; /* always even */
- };
- object_t *items[];
-};
-
-struct stm_list_s *stm_list_create(void);
-
-static inline void stm_list_free(struct stm_list_s *lst)
-{
- free(lst);
-}
-
-
-struct stm_list_s *_stm_list_grow(struct stm_list_s *, uintptr_t);
-
-static inline struct stm_list_s *
-stm_list_append(struct stm_list_s *lst, object_t *item)
-{
- uintptr_t index = lst->count++;
- if (UNLIKELY(index > lst->last_allocated))
- lst = _stm_list_grow(lst, index);
- lst->items[index] = item;
- return lst;
-}
-
-#define LIST_APPEND(lst, e) { \
- lst = stm_list_append(lst, e); \
- }
-
-static inline void stm_list_clear(struct stm_list_s *lst)
-{
- lst->count = 0;
-}
-
-static inline bool stm_list_is_empty(struct stm_list_s *lst)
-{
- return (lst->count == 0);
-}
-
-static inline bool stm_list_count(struct stm_list_s *lst)
-{
- return lst->count;
-}
-
-static inline object_t *stm_list_pop_item(struct stm_list_s *lst)
-{
- return lst->items[--lst->count];
-}
-
-static inline object_t *stm_list_item(struct stm_list_s *lst, uintptr_t index)
-{
- return lst->items[index];
-}
-
-#define STM_LIST_FOREACH(lst, CODE) \
- do { \
- struct stm_list_s *_lst = (lst); \
- uintptr_t _i; \
- for (_i = _lst->count; _i--; ) { \
- object_t *item = _lst->items[_i]; \
- CODE; \
- } \
- } while (0)
-
-
-#endif
diff --git a/c7/nursery.c b/c7/nursery.c
deleted file mode 100644
--- a/c7/nursery.c
+++ /dev/null
@@ -1,355 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <sys/syscall.h>
-#include <asm/prctl.h>
-#include <sys/prctl.h>
-#include <pthread.h>
-
-
-#include "core.h"
-#include "list.h"
-#include "nursery.h"
-#include "pages.h"
-#include "stmsync.h"
-#include "largemalloc.h"
-
-void stm_major_collection(void)
-{
- assert(_STM_TL->active);
- abort();
-}
-
-
-bool _stm_is_young(object_t *o)
-{
- assert((uintptr_t)o >= FIRST_NURSERY_PAGE * 4096);
- return (uintptr_t)o < FIRST_AFTER_NURSERY_PAGE * 4096;
-}
-
-
-object_t *_stm_allocate_old(size_t size)
-{
- object_t* o = stm_large_malloc(size);
- memset(real_address(o), 0, size);
- o->stm_flags |= GCFLAG_WRITE_BARRIER;
- return o;
-}
-
-object_t *stm_allocate_prebuilt(size_t size)
-{
- object_t* res = _stm_allocate_old(size); /* XXX */
- return res;
-}
-
-localchar_t *_stm_alloc_next_page(size_t size_class)
-{
- /* may return uninitialized pages */
-
- /* 'alloc->next' points to where the next allocation should go. The
- present function is called instead when this next allocation is
- equal to 'alloc->stop'. As we know that 'start', 'next' and
- 'stop' are always nearby pointers, we play tricks and only store
- the lower 16 bits of 'start' and 'stop', so that the three
- variables plus some flags fit in 16 bytes.
- */
- uintptr_t page;
- localchar_t *result;
- alloc_for_size_t *alloc = &_STM_TL->alloc[size_class];
- size_t size = size_class * 8;
-
- /* reserve a fresh new page (XXX: from the end!) */
- page = stm_pages_reserve(1);
-
- assert(memset(real_address((object_t*)(page * 4096)), 0xdd, 4096));
-
- result = (localchar_t *)(page * 4096UL);
- alloc->start = (uintptr_t)result;
- alloc->stop = alloc->start + (4096 / size) * size;
- alloc->next = result + size;
- alloc->flag_partial_page = false;
- return result;
-}
-
-object_t *stm_big_small_alloc_old(size_t size, bool *is_small)
-{
- /* may return uninitialized objects */
- object_t *result;
- size_t size_class = size / 8;
- assert(size_class >= 2);
-
- if (size_class >= LARGE_OBJECT_WORDS) {
- result = stm_large_malloc(size);
- *is_small = 0;
- } else {
- *is_small = 1;
- alloc_for_size_t *alloc = &_STM_TL->alloc[size_class];
-
- if ((uint16_t)((uintptr_t)alloc->next) == alloc->stop) {
- result = (object_t *)_stm_alloc_next_page(size_class);
- } else {
- result = (object_t *)alloc->next;
- alloc->next += size;
- }
- }
- return result;
-}
-
-
-
-void trace_if_young(object_t **pobj)
-{
- /* takes a normal pointer to a thread-local pointer to an object */
- if (*pobj == NULL)
- return;
- if (!_stm_is_young(*pobj))
- return;
-
- /* the location the object moved to is at an 8b offset */
- localchar_t *temp = ((localchar_t *)(*pobj)) + 8;
- object_t * TLPREFIX *pforwarded = (object_t* TLPREFIX *)temp;
- if ((*pobj)->stm_flags & GCFLAG_MOVED) {
- *pobj = *pforwarded;
- return;
- }
-
- /* move obj to somewhere else */
- size_t size = stmcb_size(real_address(*pobj));
- bool is_small;
- object_t *moved = stm_big_small_alloc_old(size, &is_small);
-
- memcpy((void*)real_address(moved),
- (void*)real_address(*pobj),
- size);
-
- /* object is not committed yet */
- moved->stm_flags |= GCFLAG_NOT_COMMITTED;
- if (is_small) /* means, not allocated by large-malloc */
- moved->stm_flags |= GCFLAG_SMALL;
- assert(size == _stm_data_size((struct object_s*)REAL_ADDRESS(get_thread_base(0), moved)));
- LIST_APPEND(_STM_TL->uncommitted_objects, moved);
-
- (*pobj)->stm_flags |= GCFLAG_MOVED;
- *pforwarded = moved;
- *pobj = moved;
-
- LIST_APPEND(_STM_TL->old_objects_to_trace, moved);
-}
-
-void minor_collect()
-{
- /* visit shadowstack & add to old_obj_to_trace */
- object_t **current = _STM_TL->shadow_stack;
- object_t **base = _STM_TL->shadow_stack_base;
- while (current-- != base) {
- trace_if_young(current);
- }
-
- /* visit old_obj_to_trace until empty */
- struct stm_list_s *old_objs = _STM_TL->old_objects_to_trace;
- while (!stm_list_is_empty(old_objs)) {
- object_t *item = stm_list_pop_item(old_objs);
-
- assert(!_stm_is_young(item));
- assert(!(item->stm_flags & GCFLAG_WRITE_BARRIER));
-
- /* re-add write-barrier */
- item->stm_flags |= GCFLAG_WRITE_BARRIER;
-
- stmcb_trace(real_address(item), trace_if_young);
- old_objs = _STM_TL->old_objects_to_trace;
- }
-
- /* clear nursery */
- localchar_t *nursery_base = (localchar_t*)(FIRST_NURSERY_PAGE * 4096);
- memset((void*)real_address((object_t*)nursery_base), 0x0,
- NURSERY_CURRENT(_STM_TL) - nursery_base);
- SET_NURSERY_CURRENT(_STM_TL, nursery_base);
-}
-
-void _stm_minor_collect()
-{
- minor_collect();
-}
-
-localchar_t *collect_and_reserve(size_t size)
-{
- localchar_t *new_current = _STM_TL->nursery_current;
-
- while (((uintptr_t)new_current > FIRST_AFTER_NURSERY_PAGE * 4096)
- && _STM_TL->nursery_current_halfwords[LENDIAN]) {
-
- _STM_TL->nursery_current_halfwords[LENDIAN] = 0;
- _stm_start_safe_point(0);
- /* no collect, it would mess with nursery_current */
- _stm_stop_safe_point(0);
-
- new_current = _STM_TL->nursery_current;
- }
-
- if (!((uintptr_t)new_current > FIRST_AFTER_NURSERY_PAGE * 4096)) {
- /* after safe-point, new_current is actually fine again */
- return new_current - size;
- }
-
- /* reset nursery_current (left invalid by the caller) */
- SET_NURSERY_CURRENT(_STM_TL, new_current - size);
-
- minor_collect();
-
- /* XXX: if we_want_major_collect: acquire EXCLUSIVE & COLLECT lock
- and do it */
-
- localchar_t *current = NURSERY_CURRENT(_STM_TL);
- assert((uintptr_t)current + size <= FIRST_AFTER_NURSERY_PAGE * 4096);
- SET_NURSERY_CURRENT(_STM_TL, current + size);
- return current;
-}
-
-
-object_t *stm_allocate(size_t size)
-{
- object_t *result;
-
- assert(_STM_TL->active);
- assert(size % 8 == 0);
- assert(16 <= size);
-
- /* XXX move out of fastpath */
- if (UNLIKELY(size >= NURSERY_SECTION)) {
- /* allocate large objects outside the nursery immediately,
- otherwise they may trigger too many minor collections
- and degrade performance */
- bool is_small;
- result = stm_big_small_alloc_old(size, &is_small);
-
- memset((void*)real_address(result), 0, size);
-
- /* object is not committed yet */
- result->stm_flags |= GCFLAG_NOT_COMMITTED;
- if (is_small) /* means, not allocated by large-malloc */
- result->stm_flags |= GCFLAG_SMALL;
- assert(size == _stm_data_size((struct object_s*)REAL_ADDRESS(get_thread_base(0), result)));
-
- LIST_APPEND(_STM_TL->uncommitted_objects, result);
- LIST_APPEND(_STM_TL->old_objects_to_trace, result);
- return result;
- }
-
- localchar_t *current = _STM_TL->nursery_current;
- localchar_t *new_current = current + size;
- SET_NURSERY_CURRENT(_STM_TL, new_current);
-
- if ((uintptr_t)new_current > FIRST_AFTER_NURSERY_PAGE * 4096) {
- current = collect_and_reserve(size);
- }
-
- result = (object_t *)current;
- return result;
-}
-
-
-void push_uncommitted_to_other_threads()
-{
- /* WE HAVE THE EXCLUSIVE LOCK HERE */
-
- struct stm_list_s *uncommitted = _STM_TL->uncommitted_objects;
- char *local_base = _STM_TL->thread_base;
- char *remote_base = get_thread_base(1 - _STM_TL->thread_num);
-
- STM_LIST_FOREACH(
- uncommitted,
- ({
- /* write-lock always cleared for these objects */
- uintptr_t lock_idx;
- assert(lock_idx = (((uintptr_t)item) >> 4) - READMARKER_START);
- assert(!write_locks[lock_idx]);
-
- /* remove the flag (they are now committed) */
- item->stm_flags &= ~GCFLAG_NOT_COMMITTED;
-
- _stm_move_object(item,
- REAL_ADDRESS(local_base, item),
- REAL_ADDRESS(remote_base, item));
- }));
-}
-
-void nursery_on_start()
-{
- assert(stm_list_is_empty(_STM_TL->old_objects_to_trace));
-
- _STM_TL->old_shadow_stack = _STM_TL->shadow_stack;
-}
-
-void nursery_on_commit()
-{
- /* DON'T do a minor_collect. This is already done in
- the caller (optimization) */
- /* minor_collect(); */
-
- /* uncommitted objects */
- push_uncommitted_to_other_threads();
- stm_list_clear(_STM_TL->uncommitted_objects);
-
- /* for small alloc classes, set the partial flag */
- long j;
- for (j = 2; j < LARGE_OBJECT_WORDS; j++) {
- alloc_for_size_t *alloc = &_STM_TL->alloc[j];
- uint16_t start = alloc->start;
- uint16_t cur = (uintptr_t)alloc->next;
-
- if (start == cur)
- continue; /* page full -> will be replaced automatically */
-
- alloc->start = cur; /* next transaction has different 'start' to
- reset in case of an abort */
- alloc->flag_partial_page = 1;
- }
-}
-
-void nursery_on_abort()
-{
- /* reset shadowstack */
- _STM_TL->shadow_stack = _STM_TL->old_shadow_stack;
-
- /* clear old_objects_to_trace (they will have the WRITE_BARRIER flag
- set because the ones we care about are also in modified_objects) */
- stm_list_clear(_STM_TL->old_objects_to_trace);
-
- /* clear the nursery */
- localchar_t *nursery_base = (localchar_t*)(FIRST_NURSERY_PAGE * 4096);
- memset((void*)real_address((object_t*)nursery_base), 0x0,
- NURSERY_CURRENT(_STM_TL) - nursery_base);
- SET_NURSERY_CURRENT(_STM_TL, nursery_base);
-
-
- /* reset the alloc-pages to the state at the start of the transaction */
- long j;
- for (j = 2; j < LARGE_OBJECT_WORDS; j++) {
- alloc_for_size_t *alloc = &_STM_TL->alloc[j];
- uint16_t num_allocated = ((uintptr_t)alloc->next) - alloc->start;
-
- if (num_allocated) {
- /* forget about all non-committed objects */
- alloc->next -= num_allocated;
- }
- }
-
- /* free uncommitted objects */
- struct stm_list_s *uncommitted = _STM_TL->uncommitted_objects;
-
- STM_LIST_FOREACH(
- uncommitted,
- ({
- if (!(item->stm_flags & GCFLAG_SMALL))
- stm_large_free(item);
- }));
-
- stm_list_clear(uncommitted);
-}
-
-
-
diff --git a/c7/nursery.h b/c7/nursery.h
deleted file mode 100644
--- a/c7/nursery.h
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
-
-object_t *stm_allocate_prebuilt(size_t size);
-object_t *_stm_allocate_old(size_t size);
-object_t *stm_allocate(size_t size);
-
-void _stm_minor_collect();
-bool _stm_is_young(object_t *o);
-
-void nursery_on_abort();
-void nursery_on_commit();
-void nursery_on_start();
-
-
-
-extern uintptr_t index_page_never_used;
-
-
diff --git a/c7/pagecopy.c b/c7/pagecopy.c
deleted file mode 100644
--- a/c7/pagecopy.c
+++ /dev/null
@@ -1,57 +0,0 @@
-
-void pagecopy(void *dest, const void *src)
-{
- unsigned long i;
- for (i=0; i<4096/128; i++) {
- asm volatile("movdqa (%0), %%xmm0\n"
- "movdqa 16(%0), %%xmm1\n"
- "movdqa 32(%0), %%xmm2\n"
- "movdqa 48(%0), %%xmm3\n"
- "movdqa %%xmm0, (%1)\n"
- "movdqa %%xmm1, 16(%1)\n"
- "movdqa %%xmm2, 32(%1)\n"
- "movdqa %%xmm3, 48(%1)\n"
- "movdqa 64(%0), %%xmm0\n"
- "movdqa 80(%0), %%xmm1\n"
- "movdqa 96(%0), %%xmm2\n"
- "movdqa 112(%0), %%xmm3\n"
- "movdqa %%xmm0, 64(%1)\n"
- "movdqa %%xmm1, 80(%1)\n"
- "movdqa %%xmm2, 96(%1)\n"
- "movdqa %%xmm3, 112(%1)\n"
- :
- : "r"(src + 128*i), "r"(dest + 128*i)
- : "xmm0", "xmm1", "xmm2", "xmm3", "memory");
- }
-}
-
-#if 0 /* XXX enable if detected on the cpu */
-void pagecopy_ymm8(void *dest, const void *src)
-{
- asm volatile("0:\n"
- "vmovdqa (%0), %%ymm0\n"
- "vmovdqa 32(%0), %%ymm1\n"
- "vmovdqa 64(%0), %%ymm2\n"
- "vmovdqa 96(%0), %%ymm3\n"
- "vmovdqa 128(%0), %%ymm4\n"
- "vmovdqa 160(%0), %%ymm5\n"
- "vmovdqa 192(%0), %%ymm6\n"
- "vmovdqa 224(%0), %%ymm7\n"
- "addq $256, %0\n"
- "vmovdqa %%ymm0, (%1)\n"
- "vmovdqa %%ymm1, 32(%1)\n"
- "vmovdqa %%ymm2, 64(%1)\n"
- "vmovdqa %%ymm3, 96(%1)\n"
- "vmovdqa %%ymm4, 128(%1)\n"
- "vmovdqa %%ymm5, 160(%1)\n"
- "vmovdqa %%ymm6, 192(%1)\n"
- "vmovdqa %%ymm7, 224(%1)\n"
- "addq $256, %1\n"
- "cmpq %2, %0\n"
- "jne 0b"
- : "=r"(src), "=r"(dest)
- : "r"((char *)src + 4096), "0"(src), "1"(dest)
- : "xmm0", "xmm1", "xmm2", "xmm3",
- "xmm4", "xmm5", "xmm6", "xmm7");
-}
-#endif
diff --git a/c7/pagecopy.h b/c7/pagecopy.h
deleted file mode 100644
--- a/c7/pagecopy.h
+++ /dev/null
@@ -1,2 +0,0 @@
-
-void pagecopy(void *dest, const void *src);
diff --git a/c7/reader_writer_lock.c b/c7/reader_writer_lock.c
deleted file mode 100644
--- a/c7/reader_writer_lock.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/* Taken from: http://locklessinc.com/articles/locks/
-
- Sticking to semi-portable C code, we can still do a little better.
- There exists a form of the ticket lock that is designed for read-write
- locks. An example written in assembly was posted to the Linux kernel
- mailing list in 2002 by David Howells from RedHat. This was a highly
- optimized version of a read-write ticket lock developed at IBM in the
- early 90's by Joseph Seigh. Note that a similar (but not identical)
- algorithm was published by John Mellor-Crummey and Michael Scott in
- their landmark paper "Scalable Reader-Writer Synchronization for
- Shared-Memory Multiprocessors". Converting the algorithm from
- assembly language to C yields:
-*/
-#include <assert.h>
-#include "reader_writer_lock.h"
-
-
-#define EBUSY 1
-#define atomic_xadd(P, V) __sync_fetch_and_add((P), (V))
-#define cmpxchg(P, O, N) __sync_val_compare_and_swap((P), (O), (N))
-#define atomic_inc(P) __sync_add_and_fetch((P), 1)
-#define atomic_dec(P) __sync_add_and_fetch((P), -1)
-#define atomic_add(P, V) __sync_add_and_fetch((P), (V))
-#define atomic_set_bit(P, V) __sync_or_and_fetch((P), 1<<(V))
-#define atomic_clear_bit(P, V) __sync_and_and_fetch((P), ~(1<<(V)))
-/* Compile read-write barrier */
-#define barrier() asm volatile("": : :"memory")
-
-/* Pause instruction to prevent excess processor bus usage */
-#define cpu_relax() asm volatile("pause\n": : :"memory")
-
-
-
-void rwticket_wrlock(rwticket *l)
-{
- unsigned me = atomic_xadd(&l->u, (1<<16));
- unsigned char val = me >> 16;
-
- while (val != l->s.write) cpu_relax();
-}
-
-int rwticket_wrunlock(rwticket *l)
-{
- rwticket t = *l;
-
- barrier();
-
- t.s.write++;
- t.s.read++;
-
- *(unsigned short *) l = t.us;
- return 0;
-}
-
-int rwticket_wrtrylock(rwticket *l)
-{
- unsigned cmp = l->u;
-
- unsigned me = cmp & 0xff;//l->s.users;
- unsigned char menew = me + 1;
- // unsigned read = (cmp & 0xffff) >> 8;//l->s.read << 8;
- //unsigned cmp = (me << 16) + read + me;
- unsigned cmpnew = (menew << 16) | (cmp & 0x0000ffff); //(menew << 16) + read + me;
-
- if (cmpxchg(&l->u, cmp, cmpnew) == cmp) return 0;
-
- return EBUSY;
-}
-
-void rwticket_rdlock(rwticket *l)
-{
- unsigned me = atomic_xadd(&l->u, (1<<16));
- unsigned char val = me >> 16;
-
- while (val != l->s.read) cpu_relax();
- l->s.read++;
-}
-
-void rwticket_rdunlock(rwticket *l)
-{
- atomic_inc(&l->s.write);
-}
-
-int rwticket_rdtrylock(rwticket *l)
-{
- assert(0);
- /* XXX implement like wrtrylock */
- unsigned me = l->s.users;
- unsigned write = l->s.write;
- unsigned char menew = me + 1;
- unsigned cmp = (me << 16) + (me << 8) + write;
- unsigned cmpnew = ((unsigned) menew << 16) + (menew << 8) + write;
-
- if (cmpxchg(&l->u, cmp, cmpnew) == cmp) return 0;
-
- return EBUSY;
-}
diff --git a/c7/reader_writer_lock.h b/c7/reader_writer_lock.h
deleted file mode 100644
--- a/c7/reader_writer_lock.h
+++ /dev/null
@@ -1,22 +0,0 @@
-
-typedef union rwticket rwticket;
-union rwticket
-{
- unsigned u;
- unsigned short us;
- struct
- {
- unsigned char write;
- unsigned char read;
- unsigned char users;
- } s;
-};
-
-void rwticket_wrlock(rwticket *l);
-int rwticket_wrunlock(rwticket *l);
-int rwticket_wrtrylock(rwticket *l);
-void rwticket_rdlock(rwticket *l);
-void rwticket_rdunlock(rwticket *l);
-int rwticket_rdtrylock(rwticket *l);
-
-
diff --git a/c7/stm/gcpage.c b/c7/stm/gcpage.c
--- a/c7/stm/gcpage.c
+++ b/c7/stm/gcpage.c
@@ -3,7 +3,23 @@
#endif
+static void setup_gcpage(void)
+{
+ largemalloc_init_arena(stm_object_pages + END_NURSERY_PAGE * 4096UL,
+ (NB_PAGES - END_NURSERY_PAGE) * 4096UL);
+}
+
object_t *stm_allocate_prebuilt(ssize_t size_rounded_up)
{
abort();
}
+
+object_t *_stm_allocate_old(ssize_t size_rounded_up)
+{
+ char *addr = large_malloc(size_rounded_up);
+ memset(addr, 0, size_rounded_up);
+
+ object_t* o = (object_t *)(addr - stm_object_pages);
+ o->stm_flags = GCFLAG_WRITE_BARRIER;
+ return o;
+}
diff --git a/c7/largemalloc.c b/c7/stm/largemalloc.c
rename from c7/largemalloc.c
rename to c7/stm/largemalloc.c
--- a/c7/largemalloc.c
+++ b/c7/stm/largemalloc.c
@@ -1,3 +1,7 @@
+#ifndef _STM_CORE_H_
+# error "must be compiled via stmgc.c"
+#endif
+
/* This contains a lot of inspiration from malloc() in the GNU C Library.
More precisely, this is (a subset of) the part that handles large
blocks, which in our case means at least 288 bytes. It is actually
@@ -5,16 +9,6 @@
or medium-block support that are also present in the GNU C Library.
*/
-#include <string.h>
-#include <stdio.h>
-#include <stddef.h>
-#include <assert.h>
-#include "largemalloc.h"
-#include "pages.h"
-#include "pagecopy.h"
-
-#define MMAP_LIMIT (1280*1024)
-
#define largebin_index(sz) \
(((sz) < (48 << 6)) ? ((sz) >> 6): /* 0 - 47 */ \
((sz) < (24 << 9)) ? 42 + ((sz) >> 9): /* 48 - 65 */ \
@@ -82,7 +76,7 @@
list are some unsorted chunks. All unsorted chunks are after all
sorted chunks. The flag 'FLAG_SORTED' distinguishes them.
- Note that if the user always calls stm_large_malloc() with a large
+ Note that if the user always calls large_malloc() with a large
enough argument, then the few bins corresponding to smaller values
will never be sorted at all. They are still populated with the
fragments of space between bigger allocations.
@@ -90,62 +84,7 @@
static dlist_t largebins[N_BINS];
static mchunk_t *first_chunk, *last_chunk;
-uint8_t alloc_lock = 0;
-void _stm_chunk_pages(struct object_s *data, uintptr_t *start, uintptr_t *num)
-{
- /* expects object_s in thread0-space */
- /* returns the start page and number of pages that the *payload*
- spans over. the CHUNK_HEADER is not included in the calculations */
- mchunk_t *chunk = data2chunk((char*)data);
- *start = (((char*)data) - get_thread_base(0)) / 4096UL;
- assert(*start < NB_PAGES);
- size_t offset_into_page = ((uintptr_t)data) & 4095UL; // % 4096
- *num = ((chunk->size & ~FLAG_SORTED) + offset_into_page + 4095) / 4096UL;
- assert(*num < NB_PAGES);
-}
-
-size_t _stm_data_size(struct object_s *data)
-{
- if (data->stm_flags & GCFLAG_SMALL)
- return stmcb_size(data); /* XXX: inefficient */
-
- mchunk_t *chunk = data2chunk((char*)data);
- return chunk->size & ~FLAG_SORTED;
-}
-
-void _stm_move_object(object_t* obj, char *src, char *dst)
-{
- /* XXX: should be thread-safe... */
-
- /* only copies if page is PRIVATE
- XXX: various optimizations for objects with
- multiple pages. E.g. using pagecopy or
- memcpy over multiple PRIVATE pages. */
- char *end = src + _stm_data_size((struct object_s*)REAL_ADDRESS(get_thread_base(0), obj));
- uintptr_t pagenum, num;
- struct object_s *t0_obj = (struct object_s*)REAL_ADDRESS(get_thread_base(0), obj);
-
- if (obj->stm_flags & GCFLAG_SMALL) {
- pagenum = (uintptr_t)obj / 4096UL;
- num = 1;
- } else {
- _stm_chunk_pages(t0_obj, &pagenum, &num);
- }
-
- while (src < end) {
- size_t to_copy = 4096UL - ((uintptr_t)src & 4095UL);
- if (to_copy > end - src)
- to_copy = end - src;
- if (stm_get_page_flag(pagenum) == PRIVATE_PAGE) {
- memcpy(dst, src, to_copy);
- }
-
- pagenum++;
- src += to_copy;
- dst += to_copy;
- }
-}
static void insert_unsorted(mchunk_t *new)
{
@@ -231,11 +170,8 @@
really_sort_bin(index);
}
-object_t *stm_large_malloc(size_t request_size)
+static char *large_malloc(size_t request_size)
{
- while (__sync_lock_test_and_set(&alloc_lock, 1))
- spin_loop();
-
/* 'request_size' should already be a multiple of the word size here */
assert((request_size & (sizeof(char *)-1)) == 0);
@@ -272,7 +208,7 @@
}
/* not enough memory. */
- alloc_lock = 0;
+ fprintf(stderr, "not enough memory!\n");
abort();
return NULL;
@@ -303,19 +239,12 @@
}
mscan->size = request_size;
mscan->prev_size = BOTH_CHUNKS_USED;
-
- alloc_lock = 0;
- return (object_t *)(((char *)&mscan->d) - get_thread_base(0));
+
+ return (char *)&mscan->d;
}
-void stm_large_free(object_t *tldata)
+static void large_free(char *data)
{
- assert(!(tldata->stm_flags & GCFLAG_SMALL));
-
- while (__sync_lock_test_and_set(&alloc_lock, 1))
- spin_loop();
-
- char *data = _stm_real_address(tldata);
mchunk_t *chunk = data2chunk(data);
assert((chunk->size & (sizeof(char *) - 1)) == 0);
assert(chunk->prev_size != THIS_CHUNK_FREE);
@@ -373,8 +302,6 @@
}
insert_unsorted(chunk);
-
- alloc_lock = 0;
}
@@ -414,16 +341,13 @@
assert(data - 16 == (char *)last_chunk);
}
-char *_stm_largemalloc_data_start()
-{
- return (char*)first_chunk;
-}
-
-void stm_largemalloc_init(char *data_start, size_t data_size)
+static void largemalloc_init_arena(char *data_start, size_t data_size)
{
int i;
- for (i = 0; i < N_BINS; i++)
- largebins[i].prev = largebins[i].next = &largebins[i];
+ for (i = 0; i < N_BINS; i++) {
+ largebins[i].prev = &largebins[i];
+ largebins[i].next = &largebins[i];
+ }
assert(data_size >= 2 * sizeof(struct malloc_chunk));
assert((data_size & 31) == 0);
@@ -438,10 +362,8 @@
insert_unsorted(first_chunk);
}
-int stm_largemalloc_resize_arena(size_t new_size)
+static int largemalloc_resize_arena(size_t new_size)
{
- /* XXX not thread-safe regarding all functions here... */
-
assert(new_size >= 2 * sizeof(struct malloc_chunk));
assert((new_size & 31) == 0);
@@ -490,7 +412,7 @@
assert(last_chunk == next_chunk_u(old_last_chunk));
/* then free the last_chunk (turn it from "used" to "free) */
- stm_large_free((object_t *)(((char *)&old_last_chunk->d) - get_thread_base(0)));
+ large_free((char *)&old_last_chunk->d);
}
return 1;
}
diff --git a/c7/largemalloc.h b/c7/stm/largemalloc.h
rename from c7/largemalloc.h
rename to c7/stm/largemalloc.h
--- a/c7/largemalloc.h
+++ b/c7/stm/largemalloc.h
@@ -1,17 +1,11 @@
-#include <stdlib.h>
-#include "core.h"
-void stm_largemalloc_init(char *data_start, size_t data_size);
-int stm_largemalloc_resize_arena(size_t new_size);
+/* all addresses passed to this interface should be "char *" pointers
+ in the segment 0. */
+static void largemalloc_init_arena(char *data_start, size_t data_size);
+static int largemalloc_resize_arena(size_t new_size) __attribute__((unused));
-object_t *stm_large_malloc(size_t request_size);
-void stm_large_free(object_t *data);
-
-void _stm_large_dump(void);
-char *_stm_largemalloc_data_start(void);
-
-void _stm_move_object(object_t *obj, char *src, char *dst);
-size_t _stm_data_size(struct object_s *data);
-void _stm_chunk_pages(struct object_s *data, uintptr_t *start, uintptr_t *num);
-
-
+/* large_malloc() and large_free() are not thread-safe. This is
+ due to the fact that they should be mostly called during minor or
+ major collections, which have their own synchronization mecanisms. */
+static char *large_malloc(size_t request_size);
+static void large_free(char *data) __attribute__((unused));
diff --git a/c7/stm/nursery.c b/c7/stm/nursery.c
--- a/c7/stm/nursery.c
+++ b/c7/stm/nursery.c
@@ -42,8 +42,8 @@
bool _stm_in_nursery(object_t *obj)
{
- uint64_t p = (uint64_t)obj;
- return (p - NURSERY_START) < NURSERY_SIZE;
+ assert((uintptr_t)obj >= NURSERY_START);
+ return (uintptr_t)obj < NURSERY_START + NURSERY_SIZE;
}
diff --git a/c7/stm/setup.c b/c7/stm/setup.c
--- a/c7/stm/setup.c
+++ b/c7/stm/setup.c
@@ -61,6 +61,7 @@
setup_sync();
setup_nursery();
+ setup_gcpage();
#if 0
stm_largemalloc_init(heap, HEAP_PAGES * 4096UL);
diff --git a/c7/stmgc.c b/c7/stmgc.c
--- a/c7/stmgc.c
+++ b/c7/stmgc.c
@@ -3,10 +3,12 @@
#include "stm/core.h"
#include "stm/pages.h"
#include "stm/sync.h"
+#include "stm/largemalloc.h"
#include "stm/misc.c"
#include "stm/pages.c"
#include "stm/gcpage.c"
+#include "stm/largemalloc.c"
#include "stm/nursery.c"
#include "stm/sync.c"
#include "stm/setup.c"
diff --git a/c7/stmgc.h b/c7/stmgc.h
--- a/c7/stmgc.h
+++ b/c7/stmgc.h
@@ -73,6 +73,8 @@
char *_stm_real_address(object_t *o);
object_t *_stm_segment_address(char *ptr);
void _stm_test_switch(stm_thread_local_t *tl);
+object_t *_stm_allocate_old(ssize_t size_rounded_up);
+void _stm_large_dump(void);
#endif
#define _STM_GCFLAG_WRITE_BARRIER 0x01
diff --git a/c7/stmsync.c b/c7/stmsync.c
deleted file mode 100644
--- a/c7/stmsync.c
+++ /dev/null
@@ -1,306 +0,0 @@
-#include <assert.h>
-#include <string.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <sys/syscall.h>
-#include <sys/prctl.h>
-#include <asm/prctl.h>
-#include <semaphore.h>
-
-#include "stmsync.h"
-#include "core.h"
-#include "reader_writer_lock.h"
-#include "list.h"
-
-#define INVALID_GS_VALUE 0x6D6D6D6D
-
-/* a multi-reader, single-writer lock: transactions normally take a reader
- lock, so don't conflict with each other; when we need to do a global GC,
- we take a writer lock to "stop the world". */
-
-rwticket rw_shared_lock __attribute__((aligned(64))); /* the "GIL" */
-rwticket rw_collection_lock __attribute__((aligned(64))); /* for major collections */
-
-sem_t static_thread_semaphore __attribute__((aligned(64)));
-uint8_t static_threads[NB_THREADS]; /* 1 if running a pthread */
-__thread struct _thread_local1_s *pthread_tl = NULL;
-
-
-
-
-void _stm_acquire_tl_segment();
-void _stm_release_tl_segment();
-
-static void set_gs_register(uint64_t value)
-{
- int result = syscall(SYS_arch_prctl, ARCH_SET_GS, value);
- assert(result == 0);
-}
-
-bool _stm_is_in_transaction(void)
-{
- return pthread_tl->active;
-}
-
-
-char* _stm_restore_local_state(int thread_num)
-{
- if (thread_num == -1) { /* mostly for debugging */
- set_gs_register(INVALID_GS_VALUE);
- return (char*)1;
- }
-
- char *thread_base = get_thread_base(thread_num);
- set_gs_register((uintptr_t)thread_base);
-
- assert(_STM_TL->thread_num == thread_num);
- assert(_STM_TL->thread_base == thread_base);
- return thread_base;
-}
-
-
-void _stm_yield_thread_segment()
-{
- _stm_release_tl_segment();
-
- /* release our static thread: */
- static_threads[_STM_TL->thread_num] = 0;
- sem_post(&static_thread_semaphore);
-
- assert(_stm_restore_local_state(-1)); /* invalid */
-}
-
-void _stm_grab_thread_segment()
-{
- /* acquire a static thread: */
- sem_wait(&static_thread_semaphore);
- int thread_num = 0;
- while (1) {
- if (!__sync_lock_test_and_set(&static_threads[thread_num], 1))
- break;
- thread_num = (thread_num + 1) % NB_THREADS;
- }
-
- _stm_restore_local_state(thread_num);
- _stm_acquire_tl_segment();
-}
-
-
-void _stm_assert_clean_tl()
-{
- /* between a pthread switch, these are the things
- that must be guaranteed */
-
- /* already set are
- thread_num, thread_base: to the current static thread
- nursery_current: nursery should be cleared
- active, need_abort: no transaction running
- modified_objects: empty
- alloc: re-usable by this thread
- uncommitted_objects: empty
- old_objects_to_trace: empty
- !!shadow_stack...: still belongs to previous thread
- */
- assert(stm_list_is_empty(_STM_TL->modified_objects));
- assert(stm_list_is_empty(_STM_TL->uncommitted_objects));
- assert(stm_list_is_empty(_STM_TL->old_objects_to_trace));
-
- assert(!_STM_TL->active);
- /* assert(!_STM_TL->need_abort); may happen, but will be cleared by
- start_transaction() */
- assert(NURSERY_CURRENT(_STM_TL) == (localchar_t*)(FIRST_NURSERY_PAGE * 4096));
-}
-
-void _stm_acquire_tl_segment()
-{
- /* makes tl-segment ours! */
- _stm_assert_clean_tl();
-
- _STM_TL->shadow_stack = pthread_tl->shadow_stack;
- _STM_TL->shadow_stack_base = pthread_tl->shadow_stack_base;
- _STM_TL->old_shadow_stack = pthread_tl->old_shadow_stack;
-}
-
-void _stm_release_tl_segment()
-{
- /* makes tl-segment ours! */
- _stm_assert_clean_tl();
-
- pthread_tl->shadow_stack = _STM_TL->shadow_stack;
- pthread_tl->shadow_stack_base = _STM_TL->shadow_stack_base;
- pthread_tl->old_shadow_stack = _STM_TL->old_shadow_stack;
-}
-
-void stm_setup_pthread(void)
-{
- struct _thread_local1_s* tl = malloc(sizeof(struct _thread_local1_s));
- assert(!pthread_tl);
- pthread_tl = tl;
-
- /* get us a clean thread segment */
- _stm_grab_thread_segment();
- _stm_assert_clean_tl();
-
- /* allocate shadow stack for this thread */
- _STM_TL->shadow_stack = (object_t**)malloc(LENGTH_SHADOW_STACK * sizeof(void*));
- _STM_TL->shadow_stack_base = _STM_TL->shadow_stack;
-
- /* copy everything from _STM_TL */
- memcpy(tl, REAL_ADDRESS(get_thread_base(_STM_TL->thread_num), _STM_TL),
- sizeof(struct _thread_local1_s));
-
- /* go into safe-point again: */
- _stm_yield_thread_segment();
-}
-
-
-void stm_teardown_pthread(void)
-{
- free(pthread_tl->shadow_stack_base);
-
- free(pthread_tl);
- pthread_tl = NULL;
-}
-
-
-
-
-
-void _stm_reset_shared_lock()
-{
- assert(!rwticket_wrtrylock(&rw_shared_lock));
- assert(!rwticket_wrunlock(&rw_shared_lock));
-
- memset(&rw_shared_lock, 0, sizeof(rwticket));
-
- assert(!rwticket_wrtrylock(&rw_collection_lock));
- assert(!rwticket_wrunlock(&rw_collection_lock));
-
- memset(&rw_collection_lock, 0, sizeof(rwticket));
-
- int i;
- for (i = 0; i < NB_THREADS; i++)
- assert(static_threads[i] == 0);
- memset(static_threads, 0, sizeof(static_threads));
- sem_init(&static_thread_semaphore, 0, NB_THREADS);
-}
-
-/* void stm_acquire_collection_lock() */
-/* { */
-/* /\* we must have the exclusive lock here and */
-/* not the colletion lock!! *\/ */
-/* /\* XXX: for more than 2 threads, need a way */
-/* to signal other threads with need_major_collect */
-/* so that they don't leave COLLECT-safe-points */
-/* when this flag is set. Otherwise we simply */
-/* wait arbitrarily long until all threads reach */
-/* COLLECT-safe-points by chance at the same time. *\/ */
-/* while (1) { */
-/* if (!rwticket_wrtrylock(&rw_collection_lock)) */
-/* break; /\* acquired! *\/ */
-
-/* stm_stop_exclusive_lock(); */
-/* usleep(1); */
-/* stm_start_exclusive_lock(); */
-/* if (_STM_TL->need_abort) { */
-/* stm_stop_exclusive_lock(); */
-/* stm_start_shared_lock(); */
-/* stm_abort_transaction(); */
-/* } */
-/* } */
-/* } */
-
-void stm_start_shared_lock(void)
-{
- rwticket_rdlock(&rw_shared_lock);
-}
-
-void stm_stop_shared_lock()
-{
- rwticket_rdunlock(&rw_shared_lock);
-}
-
-void stm_start_exclusive_lock(void)
-{
- rwticket_wrlock(&rw_shared_lock);
-}
-
-void stm_stop_exclusive_lock(void)
-{
- rwticket_wrunlock(&rw_shared_lock);
-}
-
-/* _stm_start_safe_point(LOCK_EXCLUSIVE|LOCK_COLLECT)
- -> release the exclusive lock and also the collect-read-lock
-
- THREAD_YIELD: gives up its (current thread's) GS segment
- so that other threads can grab it and run. This will
- make _STM_TL and all thread-local addresses unusable
- for the current thread. (requires LOCK_COLLECT)
-*/
-void _stm_start_safe_point(uint8_t flags)
-{
- assert(IMPLY(flags & THREAD_YIELD, flags & LOCK_COLLECT));
-
- if (flags & LOCK_EXCLUSIVE)
- stm_stop_exclusive_lock();
- else
- stm_stop_shared_lock();
-
- if (flags & LOCK_COLLECT) {
- rwticket_rdunlock(&rw_collection_lock);
-
- if (flags & THREAD_YIELD) {
- _stm_yield_thread_segment();
- }
- }
-}
-
-/*
- _stm_stop_safe_point(LOCK_COLLECT|LOCK_EXCLUSIVE);
- -> reacquire the collect-read-lock and the exclusive lock
-
- THREAD_YIELD: wait until we get a GS segment assigned
- and then continue (requires LOCK_COLLECT)
- */
-void _stm_stop_safe_point(uint8_t flags)
-{
- assert(IMPLY(flags & THREAD_YIELD, flags & LOCK_COLLECT));
- if (flags & THREAD_YIELD) {
- _stm_grab_thread_segment();
- }
-
- if (flags & LOCK_EXCLUSIVE) {
- stm_request_safe_point(1 - _STM_TL->thread_num);
- stm_start_exclusive_lock();
- } else {
- stm_start_shared_lock();
- }
-
- if (flags & LOCK_COLLECT) { /* if we released the collection lock */
- /* acquire read-collection. always succeeds because
- if there was a write-collection holder we would
- also not have gotten the shared_lock */
- rwticket_rdlock(&rw_collection_lock);
- }
-
- if (_STM_TL->active && _STM_TL->need_abort) {
- if (flags & LOCK_EXCLUSIVE) {
- /* restore to shared-mode with the collection lock */
- stm_stop_exclusive_lock();
- stm_start_shared_lock();
- stm_abort_transaction();
- } else {
- stm_abort_transaction();
- }
- }
-}
-
-
-
-void stm_request_safe_point(int thread_num)
-{
- struct _thread_local1_s* other_tl = _stm_dbg_get_tl(thread_num);
- other_tl->nursery_current_halfwords[LENDIAN] = 1;
-}
-
diff --git a/c7/stmsync.h b/c7/stmsync.h
deleted file mode 100644
--- a/c7/stmsync.h
+++ /dev/null
@@ -1,30 +0,0 @@
-
-#include <stdint.h>
-
-void stm_start_shared_lock(void);
-void stm_stop_shared_lock(void);
-void stm_stop_exclusive_lock(void);
-void stm_start_exclusive_lock(void);
-void _stm_start_safe_point(uint8_t flags);
-void _stm_stop_safe_point(uint8_t flags);
-void _stm_reset_shared_lock(void);
-void _stm_grab_thread_segment(void);
-void _stm_yield_thread_segment(void);
-
-enum {
- LOCK_COLLECT = (1 << 0),
- LOCK_EXCLUSIVE = (1 << 1),
- THREAD_YIELD = (1 << 2),
-};
-
-
-void stm_request_safe_point(int thread_num);
-
-
-#define NURSERY_CURRENT(tls) \
- ((localchar_t *)(uintptr_t)( \
- (tls)->nursery_current_halfwords[1-LENDIAN]))
-
-#define SET_NURSERY_CURRENT(tls, new_value) \
- ((tls)->nursery_current_halfwords[1-LENDIAN] = \
- (uintptr_t)(new_value))
diff --git a/c7/test/support.py b/c7/test/support.py
--- a/c7/test/support.py
+++ b/c7/test/support.py
@@ -42,6 +42,7 @@
/*void stm_write(object_t *obj); use _checked_stm_write() instead */
object_t *stm_allocate(ssize_t size_rounded_up);
object_t *stm_allocate_prebuilt(ssize_t size_rounded_up);
+object_t *_stm_allocate_old(ssize_t size_rounded_up);
void stm_setup(void);
void stm_teardown(void);
More information about the pypy-commit
mailing list