[pypy-commit] stmgc gc-small-uniform: hg merge default
arigo
noreply at buildbot.pypy.org
Sun Apr 13 18:25:13 CEST 2014
Author: Armin Rigo <arigo at tunes.org>
Branch: gc-small-uniform
Changeset: r1153:f4c49a88012e
Date: 2014-04-12 12:40 +0200
http://bitbucket.org/pypy/stmgc/changeset/f4c49a88012e/
Log: hg merge default
diff --git a/c7/demo/Makefile b/c7/demo/Makefile
--- a/c7/demo/Makefile
+++ b/c7/demo/Makefile
@@ -17,7 +17,7 @@
H_FILES = ../stmgc.h ../stm/*.h
C_FILES = ../stmgc.c ../stm/*.c
-COMMON = -I.. -pthread -lrt -g -Wall -Werror
+COMMON = -I.. -pthread -lrt -g -Wall -Werror -DSTM_LARGEMALLOC_TEST
# note that 'build' is partially optimized but still contains all asserts
diff --git a/c7/demo/demo_largemalloc.c b/c7/demo/demo_largemalloc.c
new file mode 100644
--- /dev/null
+++ b/c7/demo/demo_largemalloc.c
@@ -0,0 +1,72 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <time.h>
+
+#include "stmgc.h"
+#include "../stm/largemalloc.h"
+
+static inline double get_stm_time(void)
+{
+ struct timespec tp;
+ clock_gettime(CLOCK_MONOTONIC, &tp);
+ return tp.tv_sec + tp.tv_nsec * 0.000000001;
+}
+
+ssize_t stmcb_size_rounded_up(struct object_s *ob)
+{
+ abort();
+}
+
+void stmcb_trace(struct object_s *obj, void visit(object_t **))
+{
+ abort();
+}
+
+/************************************************************/
+
+#define ARENA_SIZE (1024*1024*1024)
+
+static char *arena_data;
+extern bool (*_stm_largemalloc_keep)(char *data); /* a hook for tests */
+void _stm_mutex_pages_lock(void);
+
+
+static bool keep_me(char *data) {
+ static bool last_answer = false;
+ last_answer = !last_answer;
+ return last_answer;
+}
+
+void timing(int scale)
+{
+ long limit = 1L << scale;
+ _stm_largemalloc_init_arena(arena_data, ARENA_SIZE);
+ double start = get_stm_time();
+
+ long i;
+ for (i = 0; i < limit; i++) {
+ _stm_large_malloc(16 + 8 * (i % 4)); /* may return NULL */
+ }
+ _stm_largemalloc_keep = keep_me;
+ _stm_largemalloc_sweep();
+ for (i = 0; i < limit; i++) {
+ _stm_large_malloc(16 + 8 * (i % 4)); /* may return NULL */
+ }
+
+ double stop = get_stm_time();
+ printf("scale %2d: %.9f\n", scale, stop - start);
+}
+
+
+
+int main(void)
+{
+ int i;
+ arena_data = malloc(ARENA_SIZE);
+ assert(arena_data != NULL);
+ _stm_mutex_pages_lock();
+ for (i = 0; i < 25; i++)
+ timing(i);
+ return 0;
+}
diff --git a/c7/gdb/gdb_stm.py b/c7/gdb/gdb_stm.py
new file mode 100644
--- /dev/null
+++ b/c7/gdb/gdb_stm.py
@@ -0,0 +1,49 @@
+""" Adds two built-in functions: $rfs(p=0) and $rgs(p=0).
+
+Returns the number or the address 'p', offset with the value of
+the %fs or %gs register in the current thread.
+
+Usage: you can for example add this line in your ~/.gdbinit:
+
+ python execfile('/path/to/gdb_stm.py')
+"""
+import gdb
+
+def gdb_function(func):
+ class Func(gdb.Function):
+ __doc__ = func.__doc__
+ invoke = staticmethod(func)
+ Func(func.__name__)
+
+# -------------------------------------------------------
+
+SEG_FS = 0x1003
+SEG_GS = 0x1004
+
+def get_segment_register(which):
+ v = gdb.parse_and_eval('(long*)malloc(8)')
+ L = gdb.lookup_type('long')
+ gdb.parse_and_eval('arch_prctl(%d, %d)' % (which, int(v.cast(L))))
+ result = int(v.dereference())
+ gdb.parse_and_eval('free(%d)' % (int(v.cast(L)),))
+ return result
+
+def rfsrgs(name, which):
+ seg = get_segment_register(which)
+ if name is None:
+ return seg
+ tp = name.type
+ if tp.code == gdb.TYPE_CODE_INT:
+ return name + seg
+ assert tp.code == gdb.TYPE_CODE_PTR
+ L = gdb.lookup_type('long')
+ return (name.cast(L) + seg).cast(tp)
+
+ at gdb_function
+def rfs(name=None):
+ return rfsrgs(name, SEG_FS)
+
+ at gdb_function
+def rgs(name=None):
+ return rfsrgs(name, SEG_GS)
+
diff --git a/c7/stm/atomic.h b/c7/stm/atomic.h
--- a/c7/stm/atomic.h
+++ b/c7/stm/atomic.h
@@ -36,4 +36,12 @@
#endif
+#define spinlock_acquire(lock) \
+ do { if (LIKELY(__sync_lock_test_and_set(&(lock), 1) == 0)) break; \
+ spin_loop(); } while (1)
+#define spinlock_release(lock) \
+ do { assert((lock) == 1); \
+ __sync_lock_release(&(lock)); } while (0)
+
+
#endif /* _STM_ATOMIC_H */
diff --git a/c7/stm/core.c b/c7/stm/core.c
--- a/c7/stm/core.c
+++ b/c7/stm/core.c
@@ -8,6 +8,23 @@
memset(write_locks, 0, sizeof(write_locks));
}
+#ifdef NDEBUG
+#define EVENTUALLY(condition) /* nothing */
+#else
+#define EVENTUALLY(condition) \
+ { \
+ if (!(condition)) { \
+ int _i; \
+ for (_i = 1; _i <= NB_SEGMENTS; _i++) \
+ spinlock_acquire(lock_pages_privatizing[_i]); \
+ if (!(condition)) \
+ stm_fatalerror("fails: " #condition); \
+ for (_i = 1; _i <= NB_SEGMENTS; _i++) \
+ spinlock_release(lock_pages_privatizing[_i]); \
+ } \
+ }
+#endif
+
static void check_flag_write_barrier(object_t *obj)
{
/* check that all copies of the object, apart from mine, have the
@@ -21,12 +38,7 @@
if (i == STM_SEGMENT->segment_num)
continue;
o1 = (struct object_s *)REAL_ADDRESS(get_segment_base(i), obj);
- if (!(o1->stm_flags & GCFLAG_WRITE_BARRIER)) {
- mutex_pages_lock(); /* try again... */
- if (!(o1->stm_flags & GCFLAG_WRITE_BARRIER))
- stm_fatalerror("missing GCFLAG_WRITE_BARRIER");
- mutex_pages_unlock();
- }
+ EVENTUALLY(o1->stm_flags & GCFLAG_WRITE_BARRIER);
}
#endif
}
@@ -271,7 +283,6 @@
with synchronize_object_now() but I don't completely see how to
improve...
*/
- assert(_has_mutex_pages());
assert(!_is_young(obj));
uintptr_t start = (uintptr_t)obj;
@@ -326,10 +337,7 @@
/* Copy around the version of 'obj' that lives in our own segment.
It is first copied into the shared pages, and then into other
segments' own private pages.
-
- This must be called with the mutex_pages_lock!
*/
- assert(_has_mutex_pages());
assert(!_is_young(obj));
assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
@@ -406,7 +414,7 @@
memcpy(dst, src, copy_size);
}
else {
- assert(memcmp(dst, src, copy_size) == 0); /* same page */
+ EVENTUALLY(memcmp(dst, src, copy_size) == 0); /* same page */
}
for (i = 1; i <= NB_SEGMENTS; i++) {
@@ -425,7 +433,7 @@
memcpy(dst, src, copy_size);
}
else {
- assert(memcmp(dst, src, copy_size) == 0); /* same page */
+ EVENTUALLY(!memcmp(dst, src, copy_size)); /* same page */
}
}
@@ -518,12 +526,10 @@
major_collection_now_at_safe_point();
/* synchronize overflow objects living in privatized pages */
- mutex_pages_lock();
push_overflow_objects_from_privatized_pages();
/* synchronize modified old objects to other threads */
push_modified_to_other_segments();
- mutex_pages_unlock();
/* update 'overflow_number' if needed */
if (STM_PSEGMENT->overflow_number_has_been_used) {
diff --git a/c7/stm/core.h b/c7/stm/core.h
--- a/c7/stm/core.h
+++ b/c7/stm/core.h
@@ -35,8 +35,6 @@
#define WRITELOCK_START ((END_NURSERY_PAGE * 4096UL) >> 4)
#define WRITELOCK_END READMARKER_END
-#define SHADOW_STACK_SIZE 1000
-
enum /* stm_flags */ {
/* This flag is set on non-nursery objects. It forces stm_write()
to call _stm_write_slowpath().
diff --git a/c7/stm/forksupport.c b/c7/stm/forksupport.c
--- a/c7/stm/forksupport.c
+++ b/c7/stm/forksupport.c
@@ -70,7 +70,6 @@
s_mutex_lock();
synchronize_all_threads(STOP_OTHERS_UNTIL_MUTEX_UNLOCK);
- mutex_pages_lock();
/* Make a new mmap at some other address, but of the same size as
the standard mmap at stm_object_pages
@@ -166,7 +165,6 @@
fork_big_copy = NULL;
bool was_in_transaction = fork_was_in_transaction;
- mutex_pages_unlock();
s_mutex_unlock();
if (!was_in_transaction) {
@@ -203,7 +201,6 @@
/* this new process contains no other thread, so we can
just release these locks early */
- mutex_pages_unlock();
s_mutex_unlock();
/* Move the copy of the mmap over the old one, overwriting it
diff --git a/c7/stm/gcpage.c b/c7/stm/gcpage.c
--- a/c7/stm/gcpage.c
+++ b/c7/stm/gcpage.c
@@ -32,18 +32,23 @@
pages_initialize_shared((pages_addr - stm_object_pages) / 4096UL, num);
}
+
+static int lock_growth_large = 0;
+
static char *allocate_outside_nursery_large(uint64_t size)
{
- /* thread-safe: use the lock of pages.c to prevent any remapping
- from occurring under our feet */
- mutex_pages_lock();
- increment_total_allocated(size + LARGE_MALLOC_OVERHEAD);
-
/* Allocate the object with largemalloc.c from the lower addresses. */
char *addr = _stm_large_malloc(size);
if (addr == NULL)
stm_fatalerror("not enough memory!");
+ if (LIKELY(addr + size <= uninitialized_page_start)) {
+ return addr;
+ }
+
+ /* uncommon case: need to initialize some more pages */
+ spinlock_acquire(lock_growth_large);
+
if (addr + size > uninitialized_page_start) {
uintptr_t npages;
npages = (addr + size - uninitialized_page_start) / 4096UL;
@@ -53,11 +58,10 @@
stm_fatalerror("out of memory!"); /* XXX */
}
setup_N_pages(uninitialized_page_start, npages);
+ __sync_synchronize();
uninitialized_page_start += npages * 4096UL;
}
-
- mutex_pages_unlock();
-
+ spinlock_release(lock_growth_large);
return addr;
}
@@ -213,7 +217,6 @@
total_allocated by 4096. */
long i;
- mutex_pages_lock();
for (i = 1; i <= NB_SEGMENTS; i++) {
/* The 'modified_old_objects' list gives the list of objects
@@ -263,7 +266,6 @@
for (i = 1; i <= NB_SEGMENTS; i++) {
major_restore_private_bits_for_modified_objects(i);
}
- mutex_pages_unlock();
}
@@ -422,9 +424,7 @@
static void sweep_large_objects(void)
{
- mutex_pages_lock();
_stm_largemalloc_sweep();
- mutex_pages_unlock();
}
static void clean_write_locks(void)
diff --git a/c7/stm/largemalloc.c b/c7/stm/largemalloc.c
--- a/c7/stm/largemalloc.c
+++ b/c7/stm/largemalloc.c
@@ -20,19 +20,25 @@
#define LAST_BIN_INDEX(sz) ((sz) >= (3 << 18))
typedef struct dlist_s {
- struct dlist_s *next; /* a doubly-linked list */
+ struct dlist_s *next; /* a circular doubly-linked list */
struct dlist_s *prev;
} dlist_t;
+typedef struct ulist_s {
+ struct ulist_s *up; /* a non-circular doubly-linked list */
+ struct ulist_s *down;
+} ulist_t;
+
typedef struct malloc_chunk {
size_t prev_size; /* - if the previous chunk is free: size of its data
- otherwise, if this chunk is free: 1
- otherwise, 0. */
- size_t size; /* size of the data in this chunk,
- plus optionally the FLAG_SORTED */
+ size_t size; /* size of the data in this chunk */
- dlist_t d; /* if free: a doubly-linked list */
+ dlist_t d; /* if free: a doubly-linked list 'largebins' */
/* if not free: the user data starts here */
+ ulist_t u; /* if free, if unsorted: up==UU_UNSORTED
+ if free, if sorted: a doubly-linked list */
/* The chunk has a total size of 'size'. It is immediately followed
in memory by another chunk. This list ends with the last "chunk"
@@ -41,29 +47,22 @@
one are considered "not free". */
} mchunk_t;
-#define FLAG_SORTED 1
+#define UU_UNSORTED ((ulist_t *) 1)
#define THIS_CHUNK_FREE 1
#define BOTH_CHUNKS_USED 0
#define CHUNK_HEADER_SIZE offsetof(struct malloc_chunk, d)
#define END_MARKER 0xDEADBEEF
+#define MIN_ALLOC_SIZE (sizeof(struct malloc_chunk) - CHUNK_HEADER_SIZE)
#define chunk_at_offset(p, ofs) ((mchunk_t *)(((char *)(p)) + (ofs)))
#define data2chunk(p) chunk_at_offset(p, -CHUNK_HEADER_SIZE)
+#define updown2chunk(p) chunk_at_offset(p, \
+ -(CHUNK_HEADER_SIZE + sizeof(dlist_t)))
-static mchunk_t *next_chunk_s(mchunk_t *p)
+static mchunk_t *next_chunk(mchunk_t *p)
{
- assert(p->size & FLAG_SORTED);
- return chunk_at_offset(p, CHUNK_HEADER_SIZE + p->size - FLAG_SORTED);
-}
-static mchunk_t *next_chunk_u(mchunk_t *p)
-{
- assert(!(p->size & FLAG_SORTED));
return chunk_at_offset(p, CHUNK_HEADER_SIZE + p->size);
}
-static mchunk_t *next_chunk_a(mchunk_t *p)
-{
- return chunk_at_offset(p, CHUNK_HEADER_SIZE + (p->size & ~FLAG_SORTED));
-}
/* The free chunks are stored in "bins". Each bin is a doubly-linked
@@ -76,36 +75,73 @@
neighbors to ensure this.
In each bin's doubly-linked list, chunks are sorted by their size in
- decreasing order (if you start from 'd.next'). At the end of this
- list are some unsorted chunks. All unsorted chunks are after all
- sorted chunks. The flag 'FLAG_SORTED' distinguishes them.
+ decreasing order (if you follow 'largebins[n].next',
+ 'largebins[n].next->next', etc.). At the end of this list are some
+ unsorted chunks. All unsorted chunks are after all sorted chunks.
+ Unsorted chunks are distinguished by having 'u.up == UU_UNSORTED'.
Note that if the user always calls large_malloc() with a large
enough argument, then the few bins corresponding to smaller values
will never be sorted at all. They are still populated with the
fragments of space between bigger allocations.
+
+ Following the 'd' linked list, we get only one chunk of every size.
+ The additional chunks of a given size are linked "vertically" in
+ the secondary 'u' doubly-linked list.
+
+
+ +-----+
+ | 296 |
+ +-----+
+ ^ |
+ | v
+ +-----+ +-----+
+ | 296 | | 288 |
+ +-----+ +-----+
+ ^ | ^ | UU_UNSORTED
+ | v | v |
+ largebins +-----+ +-----+ +-----+ +-----+ largebins
+ [4].next <-> | 304 | <-> | 296 | <-> | 288 | <-> | 296 | <-> [4].prev
+ +-----+ +-----+ +-----+ +-----+
+
*/
-static dlist_t largebins[N_BINS];
-static mchunk_t *first_chunk, *last_chunk;
+
+static struct {
+ int lock;
+ mchunk_t *first_chunk, *last_chunk;
+ dlist_t largebins[N_BINS];
+} lm __attribute__((aligned(64)));
+
+
+static void lm_lock(void)
+{
+ spinlock_acquire(lm.lock);
+}
+
+static void lm_unlock(void)
+{
+ spinlock_release(lm.lock);
+}
static void insert_unsorted(mchunk_t *new)
{
size_t index = LAST_BIN_INDEX(new->size) ? N_BINS - 1
: largebin_index(new->size);
- new->d.next = &largebins[index];
- new->d.prev = largebins[index].prev;
+ new->d.next = &lm.largebins[index];
+ new->d.prev = lm.largebins[index].prev;
new->d.prev->next = &new->d;
- largebins[index].prev = &new->d;
- assert(!(new->size & FLAG_SORTED));
+ new->u.up = UU_UNSORTED;
+ new->u.down = NULL;
+ lm.largebins[index].prev = &new->d;
}
static int compare_chunks(const void *vchunk1, const void *vchunk2)
{
/* sort by size */
- const mchunk_t *chunk1 = (const mchunk_t *)vchunk1;
- const mchunk_t *chunk2 = (const mchunk_t *)vchunk2;
+ mchunk_t *chunk1 = *(mchunk_t *const *)vchunk1;
+ mchunk_t *chunk2 = *(mchunk_t *const *)vchunk2;
if (chunk1->size < chunk2->size)
return -1;
if (chunk1->size == chunk2->size)
@@ -114,13 +150,15 @@
return +1;
}
+#define MAX_STACK_COUNT 64
+
static void really_sort_bin(size_t index)
{
- dlist_t *unsorted = largebins[index].prev;
- dlist_t *end = &largebins[index];
+ dlist_t *unsorted = lm.largebins[index].prev;
+ dlist_t *end = &lm.largebins[index];
dlist_t *scan = unsorted->prev;
size_t count = 1;
- while (scan != end && !(data2chunk(scan)->size & FLAG_SORTED)) {
+ while (scan != end && data2chunk(scan)->u.up == UU_UNSORTED) {
scan = scan->prev;
++count;
}
@@ -128,12 +166,20 @@
scan->next = end;
mchunk_t *chunk1;
- mchunk_t *chunks[count]; /* dynamically-sized */
+ mchunk_t *chunk_array[MAX_STACK_COUNT];
+ mchunk_t **chunks = chunk_array;
+
if (count == 1) {
chunk1 = data2chunk(unsorted); /* common case */
count = 0;
}
else {
+ if (count > MAX_STACK_COUNT) {
+ chunks = malloc(count * sizeof(mchunk_t *));
+ if (chunks == NULL) {
+ stm_fatalerror("out of memory"); // XXX
+ }
+ }
size_t i;
for (i = 0; i < count; i++) {
chunks[i] = data2chunk(unsorted);
@@ -144,55 +190,111 @@
chunk1 = chunks[--count];
}
- chunk1->size |= FLAG_SORTED;
size_t search_size = chunk1->size;
- dlist_t *head = largebins[index].next;
+ dlist_t *head = lm.largebins[index].next;
while (1) {
- if (head == end || search_size >= data2chunk(head)->size) {
+ if (head == end || data2chunk(head)->size < search_size) {
/* insert 'chunk1' here, before the current head */
head->prev->next = &chunk1->d;
chunk1->d.prev = head->prev;
head->prev = &chunk1->d;
chunk1->d.next = head;
- if (count == 0)
- break; /* all done */
- chunk1 = chunks[--count];
- chunk1->size |= FLAG_SORTED;
- search_size = chunk1->size;
+ chunk1->u.up = NULL;
+ chunk1->u.down = NULL;
+ head = &chunk1->d;
+ }
+ else if (data2chunk(head)->size == search_size) {
+ /* insert 'chunk1' vertically in the 'u' list */
+ ulist_t *uhead = &data2chunk(head)->u;
+ chunk1->u.up = uhead->up;
+ chunk1->u.down = uhead;
+ if (uhead->up != NULL)
+ uhead->up->down = &chunk1->u;
+ uhead->up = &chunk1->u;
+#ifndef NDEBUG
+ chunk1->d.next = (dlist_t *)0x42; /* not used */
+ chunk1->d.prev = (dlist_t *)0x42;
+#endif
}
else {
head = head->next;
+ continue;
}
+ if (count == 0)
+ break; /* all done */
+ chunk1 = chunks[--count];
+ search_size = chunk1->size;
}
+
+ if (chunks != chunk_array)
+ free(chunks);
}
static void sort_bin(size_t index)
{
- dlist_t *last = largebins[index].prev;
- if (last != &largebins[index] && !(data2chunk(last)->size & FLAG_SORTED))
+ dlist_t *last = lm.largebins[index].prev;
+ if (last != &lm.largebins[index] && data2chunk(last)->u.up == UU_UNSORTED)
really_sort_bin(index);
}
+static void unlink_chunk(mchunk_t *mscan)
+{
+ if (mscan->u.down != NULL) {
+ /* unlink mscan from the vertical list 'u' */
+ ulist_t *up = mscan->u.up;
+ ulist_t *down = mscan->u.down;
+ down->up = up;
+ if (up != NULL) up->down = down;
+ }
+ else {
+ dlist_t *prev = mscan->d.prev;
+ dlist_t *next = mscan->d.next;
+ if (mscan->u.up == NULL || mscan->u.up == UU_UNSORTED) {
+ /* unlink mscan from the doubly-linked list 'd' */
+ next->prev = prev;
+ prev->next = next;
+ }
+ else {
+ /* relink in the 'd' list the item above me */
+ mchunk_t *above = updown2chunk(mscan->u.up);
+ next->prev = &above->d;
+ prev->next = &above->d;
+ above->d.next = next;
+ above->d.prev = prev;
+ above->u.down = NULL;
+ }
+ }
+}
+
char *_stm_large_malloc(size_t request_size)
{
/* 'request_size' should already be a multiple of the word size here */
assert((request_size & (sizeof(char *)-1)) == 0);
+ /* it can be very small, but we need to ensure a minimal size
+ (currently 32 bytes) */
+ if (request_size < MIN_ALLOC_SIZE)
+ request_size = MIN_ALLOC_SIZE;
+
+ lm_lock();
+
size_t index = largebin_index(request_size);
sort_bin(index);
/* scan through the chunks of current bin in reverse order
to find the smallest that fits. */
- dlist_t *scan = largebins[index].prev;
- dlist_t *end = &largebins[index];
+ dlist_t *scan = lm.largebins[index].prev;
+ dlist_t *end = &lm.largebins[index];
mchunk_t *mscan;
while (scan != end) {
mscan = data2chunk(scan);
assert(mscan->prev_size == THIS_CHUNK_FREE);
- assert(next_chunk_s(mscan)->prev_size == mscan->size - FLAG_SORTED);
+ assert(next_chunk(mscan)->prev_size == mscan->size);
+ assert(IMPLY(mscan->d.prev != end,
+ data2chunk(mscan->d.prev)->size > mscan->size));
- if (mscan->size > request_size)
+ if (mscan->size >= request_size)
goto found;
scan = mscan->d.prev;
}
@@ -201,31 +303,40 @@
smallest item of the first non-empty bin, as it will be large
enough. */
while (++index < N_BINS) {
- if (largebins[index].prev != &largebins[index]) {
+ if (lm.largebins[index].prev != &lm.largebins[index]) {
/* non-empty bin. */
sort_bin(index);
- scan = largebins[index].prev;
- end = &largebins[index];
+ scan = lm.largebins[index].prev;
mscan = data2chunk(scan);
goto found;
}
}
/* not enough memory. */
+ lm_unlock();
return NULL;
found:
- assert(mscan->size & FLAG_SORTED);
- assert(mscan->size > request_size);
+ assert(mscan->size >= request_size);
+ assert(mscan->u.up != UU_UNSORTED);
- /* unlink mscan from the doubly-linked list */
- mscan->d.next->prev = mscan->d.prev;
- mscan->d.prev->next = mscan->d.next;
+ if (mscan->u.up != NULL) {
+ /* fast path: grab the item that is just above, to avoid needing
+ to rearrange the 'd' list */
+ mchunk_t *above = updown2chunk(mscan->u.up);
+ ulist_t *two_above = above->u.up;
+ mscan->u.up = two_above;
+ if (two_above != NULL) two_above->down = &mscan->u;
+ mscan = above;
+ }
+ else {
+ unlink_chunk(mscan);
+ }
- size_t remaining_size_plus_1 = mscan->size - request_size;
- if (remaining_size_plus_1 <= sizeof(struct malloc_chunk)) {
- next_chunk_s(mscan)->prev_size = BOTH_CHUNKS_USED;
- request_size = mscan->size & ~FLAG_SORTED;
+ size_t remaining_size = mscan->size - request_size;
+ if (remaining_size < sizeof(struct malloc_chunk)) {
+ next_chunk(mscan)->prev_size = BOTH_CHUNKS_USED;
+ request_size = mscan->size;
}
else {
/* only part of the chunk is being used; reduce the size
@@ -234,27 +345,35 @@
mchunk_t *new = chunk_at_offset(mscan, CHUNK_HEADER_SIZE +
request_size);
new->prev_size = THIS_CHUNK_FREE;
- size_t remaining_size = remaining_size_plus_1 - 1 - CHUNK_HEADER_SIZE;
- new->size = remaining_size;
- next_chunk_u(new)->prev_size = remaining_size;
+ size_t remaining_data_size = remaining_size - CHUNK_HEADER_SIZE;
+ new->size = remaining_data_size;
+ next_chunk(new)->prev_size = remaining_data_size;
insert_unsorted(new);
}
mscan->size = request_size;
mscan->prev_size = BOTH_CHUNKS_USED;
+ increment_total_allocated(request_size + LARGE_MALLOC_OVERHEAD);
+
+ lm_unlock();
return (char *)&mscan->d;
}
-void _stm_large_free(char *data)
+static void _large_free(mchunk_t *chunk)
{
- mchunk_t *chunk = data2chunk(data);
assert((chunk->size & (sizeof(char *) - 1)) == 0);
assert(chunk->prev_size != THIS_CHUNK_FREE);
+ /* 'size' is at least MIN_ALLOC_SIZE */
+ increment_total_allocated(-(chunk->size + LARGE_MALLOC_OVERHEAD));
+
#ifndef NDEBUG
- assert(chunk->size >= sizeof(dlist_t));
- assert(chunk->size <= (((char *)last_chunk) - (char *)data));
- memset(data, 0xDE, chunk->size);
+ {
+ char *data = (char *)&chunk->d;
+ assert(chunk->size >= sizeof(dlist_t));
+ assert(chunk->size <= (((char *)lm.last_chunk) - data));
+ memset(data, 0xDE, chunk->size);
+ }
#endif
/* try to merge with the following chunk in memory */
@@ -262,17 +381,15 @@
mchunk_t *mscan = chunk_at_offset(chunk, msize);
if (mscan->prev_size == BOTH_CHUNKS_USED) {
- assert((mscan->size & ((sizeof(char *) - 1) & ~FLAG_SORTED)) == 0);
+ assert((mscan->size & (sizeof(char *) - 1)) == 0);
mscan->prev_size = chunk->size;
}
else {
- mscan->size &= ~FLAG_SORTED;
size_t fsize = mscan->size;
mchunk_t *fscan = chunk_at_offset(mscan, fsize + CHUNK_HEADER_SIZE);
/* unlink the following chunk */
- mscan->d.next->prev = mscan->d.prev;
- mscan->d.prev->next = mscan->d.next;
+ unlink_chunk(mscan);
#ifndef NDEBUG
mscan->prev_size = (size_t)-258; /* 0xfffffffffffffefe */
mscan->size = (size_t)-515; /* 0xfffffffffffffdfd */
@@ -296,15 +413,14 @@
msize = chunk->prev_size + CHUNK_HEADER_SIZE;
mscan = chunk_at_offset(chunk, -msize);
assert(mscan->prev_size == THIS_CHUNK_FREE);
- assert((mscan->size & ~FLAG_SORTED) == chunk->prev_size);
+ assert(mscan->size == chunk->prev_size);
/* unlink the previous chunk */
- mscan->d.next->prev = mscan->d.prev;
- mscan->d.prev->next = mscan->d.next;
+ unlink_chunk(mscan);
/* merge the two chunks */
mscan->size = msize + chunk->size;
- next_chunk_u(mscan)->prev_size = mscan->size;
+ next_chunk(mscan)->prev_size = mscan->size;
assert(chunk->prev_size = (size_t)-1);
assert(chunk->size = (size_t)-1);
@@ -314,18 +430,28 @@
insert_unsorted(chunk);
}
+void _stm_large_free(char *data)
+{
+ lm_lock();
+ _large_free(data2chunk(data));
+ lm_unlock();
+}
+
void _stm_large_dump(void)
{
- char *data = ((char *)first_chunk) + 16;
+ lm_lock();
+ char *data = ((char *)lm.first_chunk) + 16;
size_t prev_size_if_free = 0;
+ fprintf(stderr, "\n");
while (1) {
- fprintf(stderr, "[ %p: %zu\n", data - 16, *(size_t*)(data - 16));
+ assert((((uintptr_t)data) & 7) == 0); /* alignment */
+ fprintf(stderr, "[ %p: %zu", data - 16, *(size_t*)(data - 16));
if (prev_size_if_free == 0) {
assert(*(size_t*)(data - 16) == THIS_CHUNK_FREE ||
*(size_t*)(data - 16) == BOTH_CHUNKS_USED);
if (*(size_t*)(data - 16) == THIS_CHUNK_FREE)
- prev_size_if_free = (*(size_t*)(data - 8)) & ~FLAG_SORTED;
+ prev_size_if_free = (*(size_t*)(data - 8));
}
else {
assert(*(size_t*)(data - 16) == prev_size_if_free);
@@ -333,30 +459,33 @@
}
if (*(size_t*)(data - 8) == END_MARKER)
break;
- fprintf(stderr, " %p: %zu ]", data - 8, *(size_t*)(data - 8));
if (prev_size_if_free) {
- fprintf(stderr, " (free %p / %p)\n",
- *(void **)data, *(void **)(data + 8));
+ fprintf(stderr, " \t(up %p / down %p)",
+ *(void **)(data + 16), *(void **)(data + 24));
+ }
+ fprintf(stderr, "\n %p: %zu ]", data - 8, *(size_t*)(data - 8));
+ if (prev_size_if_free) {
+ fprintf(stderr, "\t(prev %p <-> next %p)\n",
+ *(void **)(data + 8), *(void **)data);
}
else {
fprintf(stderr, "\n");
}
- if (!prev_size_if_free)
- assert(!((*(size_t*)(data - 8)) & FLAG_SORTED));
assert(*(ssize_t*)(data - 8) >= 16);
- data += (*(size_t*)(data - 8)) & ~FLAG_SORTED;
+ data += *(size_t*)(data - 8);
data += 16;
}
- fprintf(stderr, " %p: end. ]\n\n", data - 8);
- assert(data - 16 == (char *)last_chunk);
+ fprintf(stderr, "\n %p: end. ]\n\n", data - 8);
+ assert(data - 16 == (char *)lm.last_chunk);
+ lm_unlock();
}
char *_stm_largemalloc_data_start(void)
{
- return (char *)first_chunk;
+ return (char *)lm.first_chunk;
}
-#ifdef STM_TESTS
+#ifdef STM_LARGEMALLOC_TEST
bool (*_stm_largemalloc_keep)(char *data); /* a hook for tests */
#endif
@@ -364,87 +493,95 @@
{
int i;
for (i = 0; i < N_BINS; i++) {
- largebins[i].prev = &largebins[i];
- largebins[i].next = &largebins[i];
+ lm.largebins[i].prev = &lm.largebins[i];
+ lm.largebins[i].next = &lm.largebins[i];
}
assert(data_size >= 2 * sizeof(struct malloc_chunk));
assert((data_size & 31) == 0);
- first_chunk = (mchunk_t *)data_start;
- first_chunk->prev_size = THIS_CHUNK_FREE;
- first_chunk->size = data_size - 2 * CHUNK_HEADER_SIZE;
- last_chunk = chunk_at_offset(first_chunk, data_size - CHUNK_HEADER_SIZE);
- last_chunk->prev_size = first_chunk->size;
- last_chunk->size = END_MARKER;
- assert(last_chunk == next_chunk_u(first_chunk));
+ lm.first_chunk = (mchunk_t *)data_start;
+ lm.first_chunk->prev_size = THIS_CHUNK_FREE;
+ lm.first_chunk->size = data_size - 2 * CHUNK_HEADER_SIZE;
+ lm.last_chunk = chunk_at_offset(lm.first_chunk,
+ data_size - CHUNK_HEADER_SIZE);
+ lm.last_chunk->prev_size = lm.first_chunk->size;
+ lm.last_chunk->size = END_MARKER;
+ assert(lm.last_chunk == next_chunk(lm.first_chunk));
+ lm.lock = 0;
- insert_unsorted(first_chunk);
+ insert_unsorted(lm.first_chunk);
-#ifdef STM_TESTS
+#ifdef STM_LARGEMALLOC_TEST
_stm_largemalloc_keep = NULL;
#endif
}
int _stm_largemalloc_resize_arena(size_t new_size)
{
+ int result = 0;
+ lm_lock();
+
if (new_size < 2 * sizeof(struct malloc_chunk))
- return 0;
+ goto fail;
OPT_ASSERT((new_size & 31) == 0);
new_size -= CHUNK_HEADER_SIZE;
- mchunk_t *new_last_chunk = chunk_at_offset(first_chunk, new_size);
- mchunk_t *old_last_chunk = last_chunk;
- size_t old_size = ((char *)old_last_chunk) - (char *)first_chunk;
+ mchunk_t *new_last_chunk = chunk_at_offset(lm.first_chunk, new_size);
+ mchunk_t *old_last_chunk = lm.last_chunk;
+ size_t old_size = ((char *)old_last_chunk) - (char *)lm.first_chunk;
if (new_size < old_size) {
/* check if there is enough free space at the end to allow
such a reduction */
- size_t lsize = last_chunk->prev_size;
+ size_t lsize = lm.last_chunk->prev_size;
assert(lsize != THIS_CHUNK_FREE);
if (lsize == BOTH_CHUNKS_USED)
- return 0;
+ goto fail;
lsize += CHUNK_HEADER_SIZE;
- mchunk_t *prev_chunk = chunk_at_offset(last_chunk, -lsize);
+ mchunk_t *prev_chunk = chunk_at_offset(lm.last_chunk, -lsize);
if (((char *)new_last_chunk) < ((char *)prev_chunk) +
sizeof(struct malloc_chunk))
- return 0;
+ goto fail;
/* unlink the prev_chunk from the doubly-linked list */
- prev_chunk->d.next->prev = prev_chunk->d.prev;
- prev_chunk->d.prev->next = prev_chunk->d.next;
+ unlink_chunk(prev_chunk);
/* reduce the prev_chunk */
- assert((prev_chunk->size & ~FLAG_SORTED) == last_chunk->prev_size);
+ assert(prev_chunk->size == lm.last_chunk->prev_size);
prev_chunk->size = ((char*)new_last_chunk) - (char *)prev_chunk
- CHUNK_HEADER_SIZE;
/* make a fresh-new last chunk */
new_last_chunk->prev_size = prev_chunk->size;
new_last_chunk->size = END_MARKER;
- last_chunk = new_last_chunk;
- assert(last_chunk == next_chunk_u(prev_chunk));
+ lm.last_chunk = new_last_chunk;
+ assert(lm.last_chunk == next_chunk(prev_chunk));
insert_unsorted(prev_chunk);
}
else if (new_size > old_size) {
/* make the new last chunk first, with only the extra size */
- mchunk_t *old_last_chunk = last_chunk;
+ mchunk_t *old_last_chunk = lm.last_chunk;
old_last_chunk->size = (new_size - old_size) - CHUNK_HEADER_SIZE;
new_last_chunk->prev_size = BOTH_CHUNKS_USED;
new_last_chunk->size = END_MARKER;
- last_chunk = new_last_chunk;
- assert(last_chunk == next_chunk_u(old_last_chunk));
+ lm.last_chunk = new_last_chunk;
+ assert(lm.last_chunk == next_chunk(old_last_chunk));
/* then free the last_chunk (turn it from "used" to "free) */
- _stm_large_free((char *)&old_last_chunk->d);
+ _large_free(old_last_chunk);
}
- return 1;
+
+ result = 1;
+ fail:
+ lm_unlock();
+ return result;
}
static inline bool _largemalloc_sweep_keep(mchunk_t *chunk)
{
-#ifdef STM_TESTS
+#ifdef STM_LARGEMALLOC_TEST
if (_stm_largemalloc_keep != NULL)
return _stm_largemalloc_keep((char *)&chunk->d);
#endif
@@ -453,31 +590,32 @@
void _stm_largemalloc_sweep(void)
{
- /* This may be slightly optimized by inlining _stm_large_free() and
+ lm_lock();
+
+ /* This may be slightly optimized by inlining _large_free() and
making cases, e.g. we might know already if the previous block
was free or not. It's probably not really worth it. */
- mchunk_t *mnext, *chunk = first_chunk;
+ mchunk_t *mnext, *chunk = lm.first_chunk;
if (chunk->prev_size == THIS_CHUNK_FREE)
- chunk = next_chunk_a(chunk); /* go to the first non-free chunk */
+ chunk = next_chunk(chunk); /* go to the first non-free chunk */
- while (chunk != last_chunk) {
-
+ while (chunk != lm.last_chunk) {
/* here, the chunk we're pointing to is not free */
assert(chunk->prev_size != THIS_CHUNK_FREE);
/* first figure out the next non-free chunk */
- mnext = next_chunk_u(chunk);
+ mnext = next_chunk(chunk);
if (mnext->prev_size == THIS_CHUNK_FREE)
- mnext = next_chunk_a(mnext);
+ mnext = next_chunk(mnext);
/* use the callback to know if 'chunk' contains an object that
survives or dies */
if (!_largemalloc_sweep_keep(chunk)) {
- size_t size = chunk->size;
- increment_total_allocated(-(size + LARGE_MALLOC_OVERHEAD));
- _stm_large_free((char *)&chunk->d); /* dies */
+ _large_free(chunk); /* dies */
}
chunk = mnext;
}
+
+ lm_unlock();
}
diff --git a/c7/stm/misc.c b/c7/stm/misc.c
--- a/c7/stm/misc.c
+++ b/c7/stm/misc.c
@@ -75,19 +75,6 @@
uint64_t _stm_total_allocated(void)
{
- mutex_pages_lock();
- uint64_t result = increment_total_allocated(0);
- mutex_pages_unlock();
- return result;
-}
-
-void _stm_mutex_pages_lock(void)
-{
- mutex_pages_lock();
-}
-
-void _stm_mutex_pages_unlock(void)
-{
- mutex_pages_unlock();
+ return increment_total_allocated(0);
}
#endif
diff --git a/c7/stm/nursery.c b/c7/stm/nursery.c
--- a/c7/stm/nursery.c
+++ b/c7/stm/nursery.c
@@ -193,9 +193,7 @@
content); or add the object to 'large_overflow_objects'.
*/
if (STM_PSEGMENT->minor_collect_will_commit_now) {
- mutex_pages_lock();
synchronize_object_now(obj);
- mutex_pages_unlock();
}
else
LIST_APPEND(STM_PSEGMENT->large_overflow_objects, obj);
@@ -231,23 +229,13 @@
/* free any object left from 'young_outside_nursery' */
if (!tree_is_cleared(pseg->young_outside_nursery)) {
- bool locked = false;
wlog_t *item;
+
TREE_LOOP_FORWARD(*pseg->young_outside_nursery, item) {
assert(!_is_in_nursery((object_t *)item->addr));
- if (!locked) {
- mutex_pages_lock();
- locked = true;
- }
- char *realobj = REAL_ADDRESS(pseg->pub.segment_base, item->addr);
- ssize_t size = stmcb_size_rounded_up((struct object_s *)realobj);
- increment_total_allocated(-(size + LARGE_MALLOC_OVERHEAD));
_stm_large_free(stm_object_pages + item->addr);
} TREE_LOOP_END;
- if (locked)
- mutex_pages_unlock();
-
tree_clear(pseg->young_outside_nursery);
}
diff --git a/c7/stm/pages.c b/c7/stm/pages.c
--- a/c7/stm/pages.c
+++ b/c7/stm/pages.c
@@ -5,16 +5,12 @@
/************************************************************/
-static union {
- struct {
- uint8_t mutex_pages;
- volatile bool major_collection_requested;
- uint64_t total_allocated; /* keep track of how much memory we're
- using, ignoring nurseries */
- uint64_t total_allocated_bound;
- };
- char reserved[64];
-} pages_ctl __attribute__((aligned(64)));
+struct {
+ volatile bool major_collection_requested;
+ uint64_t total_allocated; /* keep track of how much memory we're
+ using, ignoring nurseries */
+ uint64_t total_allocated_bound;
+} pages_ctl;
static void setup_pages(void)
@@ -28,37 +24,15 @@
memset(pages_privatized, 0, sizeof(pages_privatized));
}
-static void mutex_pages_lock(void)
-{
- if (__sync_lock_test_and_set(&pages_ctl.mutex_pages, 1) == 0)
- return;
-
- int previous = change_timing_state(STM_TIME_SPIN_LOOP);
- while (__sync_lock_test_and_set(&pages_ctl.mutex_pages, 1) != 0) {
- spin_loop();
- }
- change_timing_state(previous);
-}
-
-static void mutex_pages_unlock(void)
-{
- __sync_lock_release(&pages_ctl.mutex_pages);
-}
-
-static bool _has_mutex_pages(void)
-{
- return pages_ctl.mutex_pages != 0;
-}
-
static uint64_t increment_total_allocated(ssize_t add_or_remove)
{
- assert(_has_mutex_pages());
- pages_ctl.total_allocated += add_or_remove;
+ uint64_t ta = __sync_add_and_fetch(&pages_ctl.total_allocated,
+ add_or_remove);
- if (pages_ctl.total_allocated >= pages_ctl.total_allocated_bound)
+ if (ta >= pages_ctl.total_allocated_bound)
pages_ctl.major_collection_requested = true;
- return pages_ctl.total_allocated;
+ return ta;
}
static bool is_major_collection_requested(void)
@@ -95,6 +69,17 @@
(void *)((addr - stm_object_pages) % (4096UL * NB_PAGES)),
(long)pgoff / NB_PAGES,
(void *)((pgoff % NB_PAGES) * 4096UL)));
+ assert(size % 4096 == 0);
+ assert(size <= TOTAL_MEMORY);
+ assert(((uintptr_t)addr) % 4096 == 0);
+ assert(addr >= stm_object_pages);
+ assert(addr <= stm_object_pages + TOTAL_MEMORY - size);
+ assert(pgoff >= 0);
+ assert(pgoff <= (TOTAL_MEMORY - size) / 4096UL);
+
+ /* assert remappings follow the rule that page N in one segment
+ can only be remapped to page N in another segment */
+ assert(((addr - stm_object_pages) / 4096UL - pgoff) % NB_PAGES == 0);
int res = remap_file_pages(addr, size, 0, pgoff, 0);
if (UNLIKELY(res < 0))
@@ -106,10 +91,12 @@
/* call remap_file_pages() to make all pages in the range(pagenum,
pagenum+count) refer to the same physical range of pages from
segment 0. */
- uintptr_t i;
- assert(_has_mutex_pages());
+ dprintf(("pages_initialize_shared: 0x%ld - 0x%ld\n", pagenum,
+ pagenum + count));
+ assert(pagenum < NB_PAGES);
if (count == 0)
return;
+ uintptr_t i;
for (i = 1; i <= NB_SEGMENTS; i++) {
char *segment_base = get_segment_base(i);
d_remap_file_pages(segment_base + pagenum * 4096UL,
@@ -119,14 +106,20 @@
static void page_privatize(uintptr_t pagenum)
{
- if (is_private_page(STM_SEGMENT->segment_num, pagenum)) {
- /* the page is already privatized */
+ /* check this thread's 'pages_privatized' bit */
+ uint64_t bitmask = 1UL << (STM_SEGMENT->segment_num - 1);
+ struct page_shared_s *ps = &pages_privatized[pagenum - PAGE_FLAG_START];
+ if (ps->by_segment & bitmask) {
+ /* the page is already privatized; nothing to do */
return;
}
- /* lock, to prevent concurrent threads from looking up this thread's
- 'pages_privatized' bits in parallel */
- mutex_pages_lock();
+#ifndef NDEBUG
+ spinlock_acquire(lock_pages_privatizing[STM_SEGMENT->segment_num]);
+#endif
+
+ /* add this thread's 'pages_privatized' bit */
+ __sync_fetch_and_add(&ps->by_segment, bitmask);
/* "unmaps" the page to make the address space location correspond
again to its underlying file offset (XXX later we should again
@@ -140,11 +133,9 @@
/* copy the content from the shared (segment 0) source */
pagecopy(new_page, stm_object_pages + pagenum * 4096UL);
- /* add this thread's 'pages_privatized' bit */
- uint64_t bitmask = 1UL << (STM_SEGMENT->segment_num - 1);
- pages_privatized[pagenum - PAGE_FLAG_START].by_segment |= bitmask;
-
- mutex_pages_unlock();
+#ifndef NDEBUG
+ spinlock_release(lock_pages_privatizing[STM_SEGMENT->segment_num]);
+#endif
}
static void _page_do_reshare(long segnum, uintptr_t pagenum)
diff --git a/c7/stm/pages.h b/c7/stm/pages.h
--- a/c7/stm/pages.h
+++ b/c7/stm/pages.h
@@ -34,6 +34,20 @@
};
static struct page_shared_s pages_privatized[PAGE_FLAG_END - PAGE_FLAG_START];
+/* Rules for concurrent access to this array, possibly with is_private_page():
+
+ - we clear bits only during major collection, when all threads are
+ synchronized anyway
+
+ - we set only the bit corresponding to our segment number, using
+ an atomic addition; and we do it _before_ we actually make the
+ page private.
+
+ - concurrently, other threads checking the bits might (rarely)
+ get the answer 'true' to is_private_page() even though it is not
+ actually private yet. This inconsistency is in the direction
+ that we want for synchronize_object_now().
+*/
static void pages_initialize_shared(uintptr_t pagenum, uintptr_t count);
static void page_privatize(uintptr_t pagenum);
@@ -41,10 +55,6 @@
static void _page_do_reshare(long segnum, uintptr_t pagenum);
static void pages_setup_readmarkers_for_nursery(void);
-/* Note: don't ever do "mutex_pages_lock(); mutex_lock()" in that order */
-static void mutex_pages_lock(void);
-static void mutex_pages_unlock(void);
-static bool _has_mutex_pages(void) __attribute__((unused));
static uint64_t increment_total_allocated(ssize_t add_or_remove);
static bool is_major_collection_requested(void);
static void force_major_collection_request(void);
@@ -62,3 +72,7 @@
if (pages_privatized[pagenum - PAGE_FLAG_START].by_segment != 0)
page_reshare(pagenum);
}
+
+#ifndef NDEBUG
+static char lock_pages_privatizing[NB_SEGMENTS + 1] = { 0 };
+#endif
diff --git a/c7/stm/setup.c b/c7/stm/setup.c
--- a/c7/stm/setup.c
+++ b/c7/stm/setup.c
@@ -9,7 +9,7 @@
PROT_READ | PROT_WRITE,
MAP_PAGES_FLAGS, -1, 0);
if (result == MAP_FAILED)
- stm_fatalerror("%s failed: %m\n", reason);
+ stm_fatalerror("%s failed: %m", reason);
return result;
}
@@ -132,17 +132,37 @@
teardown_pages();
}
+static void _shadowstack_trap_page(char *start, int prot)
+{
+ size_t bsize = STM_SHADOW_STACK_DEPTH * sizeof(struct stm_shadowentry_s);
+ char *end = start + bsize + 4095;
+ end -= (((uintptr_t)end) & 4095);
+ mprotect(end, 4096, prot);
+}
+
static void _init_shadow_stack(stm_thread_local_t *tl)
{
- struct stm_shadowentry_s *s = (struct stm_shadowentry_s *)
- malloc(SHADOW_STACK_SIZE * sizeof(struct stm_shadowentry_s));
- assert(s);
+ size_t bsize = STM_SHADOW_STACK_DEPTH * sizeof(struct stm_shadowentry_s);
+ char *start = malloc(bsize + 8192); /* for the trap page, plus rounding */
+ if (!start)
+ stm_fatalerror("can't allocate shadow stack");
+
+ /* set up a trap page: if the shadowstack overflows, it will
+ crash in a clean segfault */
+ _shadowstack_trap_page(start, PROT_NONE);
+
+ struct stm_shadowentry_s *s = (struct stm_shadowentry_s *)start;
tl->shadowstack = s;
tl->shadowstack_base = s;
}
static void _done_shadow_stack(stm_thread_local_t *tl)
{
+ assert(tl->shadowstack >= tl->shadowstack_base);
+
+ char *start = (char *)tl->shadowstack_base;
+ _shadowstack_trap_page(start, PROT_READ | PROT_WRITE);
+
free(tl->shadowstack_base);
tl->shadowstack = NULL;
tl->shadowstack_base = NULL;
diff --git a/c7/stm/timing.c b/c7/stm/timing.c
--- a/c7/stm/timing.c
+++ b/c7/stm/timing.c
@@ -55,7 +55,6 @@
"minor gc",
"major gc",
"sync pause",
- "spin loop",
};
void stm_flush_timing(stm_thread_local_t *tl, int verbose)
diff --git a/c7/stm/weakref.c b/c7/stm/weakref.c
--- a/c7/stm/weakref.c
+++ b/c7/stm/weakref.c
@@ -34,7 +34,7 @@
stm_char *point_to_loc = (stm_char*)WEAKREF_PTR(weakref, size);
long i;
- for (i = 1; i <= NB_SEGMENTS; i++) {
+ for (i = 0; i <= NB_SEGMENTS; i++) {
char *base = get_segment_base(i);
object_t ** ref_loc = (object_t **)REAL_ADDRESS(base, point_to_loc);
*ref_loc = value;
@@ -57,11 +57,14 @@
a young outside nursery object. */
assert(_is_in_nursery(item));
object_t *TLPREFIX *pforwarded_array = (object_t *TLPREFIX *)item;
+ ssize_t size = 16;
- /* the following checks are done like in nursery.c: */
- if (!(item->stm_flags & GCFLAG_HAS_SHADOW)
- || (pforwarded_array[0] != GCWORD_MOVED)) {
- /* weakref dies */
+ /* check if the weakref object was moved out of the nursery */
+ if (pforwarded_array[0] != GCWORD_MOVED) {
+ /* no: weakref dies */
+#ifndef NDEBUG
+ *WEAKREF_PTR(item, size) = (object_t *)-99;
+#endif
continue;
}
@@ -69,15 +72,13 @@
assert(!_is_young(item));
- ssize_t size = 16;
object_t *pointing_to = *WEAKREF_PTR(item, size);
assert(pointing_to != NULL);
if (_is_in_nursery(pointing_to)) {
object_t *TLPREFIX *pforwarded_array = (object_t *TLPREFIX *)pointing_to;
- /* the following checks are done like in nursery.c: */
- if (!(pointing_to->stm_flags & GCFLAG_HAS_SHADOW)
- || (pforwarded_array[0] != GCWORD_MOVED)) {
+ /* check if the target was moved out of the nursery */
+ if (pforwarded_array[0] != GCWORD_MOVED) {
/* pointing_to dies */
_set_weakref_in_all_segments(item, NULL);
continue; /* no need to remember in old_weakrefs */
@@ -96,7 +97,9 @@
_set_weakref_in_all_segments(item, NULL);
continue; /* no need to remember in old_weakrefs */
}
- /* pointing_to was already old */
+ /* pointing_to is either a surviving young object outside
+ the nursery, or it was already old; in both cases keeping
+ the currently stored pointer is what we need */
}
LIST_APPEND(STM_PSEGMENT->old_weakrefs, item);
}));
@@ -128,7 +131,7 @@
stm_char *wr = (stm_char *)WEAKREF_PTR(weakref, size);
char *real_wr = REAL_ADDRESS(pseg->pub.segment_base, wr);
object_t *pointing_to = *(object_t **)real_wr;
- assert(pointing_to != NULL);
+ assert((uintptr_t)pointing_to >= NURSERY_END);
if (!mark_visited_test(pointing_to)) {
//assert(flag_page_private[(uintptr_t)weakref / 4096UL] != PRIVATE_PAGE);
_set_weakref_in_all_segments(weakref, NULL);
diff --git a/c7/stmgc.h b/c7/stmgc.h
--- a/c7/stmgc.h
+++ b/c7/stmgc.h
@@ -70,7 +70,6 @@
STM_TIME_MINOR_GC,
STM_TIME_MAJOR_GC,
STM_TIME_SYNC_PAUSE,
- STM_TIME_SPIN_LOOP,
_STM_TIME_N
};
@@ -136,8 +135,6 @@
object_t *_stm_enum_modified_old_objects(long index);
object_t *_stm_enum_objects_pointing_to_nursery(long index);
uint64_t _stm_total_allocated(void);
-void _stm_mutex_pages_lock(void);
-void _stm_mutex_pages_unlock(void);
char *stm_object_pages;
#endif
@@ -262,6 +259,10 @@
void stm_setup(void);
void stm_teardown(void);
+/* The size of each shadow stack, in number of entries.
+ Must be big enough to accomodate all STM_PUSH_ROOTs! */
+#define STM_SHADOW_STACK_DEPTH 163840
+
/* Push and pop roots from/to the shadow stack. Only allowed inside
transaction. */
#define STM_PUSH_ROOT(tl, p) ((tl).shadowstack++->ss = (object_t *)(p))
diff --git a/c7/test/support.py b/c7/test/support.py
--- a/c7/test/support.py
+++ b/c7/test/support.py
@@ -96,8 +96,6 @@
void stm_collect(long level);
uint64_t _stm_total_allocated(void);
-void _stm_mutex_pages_lock(void);
-void _stm_mutex_pages_unlock(void);
long stm_identityhash(object_t *obj);
long stm_id(object_t *obj);
@@ -279,6 +277,7 @@
''', sources=source_files,
define_macros=[('STM_TESTS', '1'),
+ ('STM_LARGEMALLOC_TEST', '1'),
('STM_NO_COND_WAIT', '1'),
('STM_DEBUGPRINT', '1'),
('GC_N_SMALL_REQUESTS', str(GC_N_SMALL_REQUESTS)), #check
diff --git a/c7/test/test_largemalloc.py b/c7/test/test_largemalloc.py
--- a/c7/test/test_largemalloc.py
+++ b/c7/test/test_largemalloc.py
@@ -14,10 +14,12 @@
lib.memset(self.rawmem, 0xcd, self.size)
lib._stm_largemalloc_init_arena(self.rawmem, self.size)
- lib._stm_mutex_pages_lock() # for this file
def test_simple(self):
+ #
+ lib._stm_large_dump()
d1 = lib._stm_large_malloc(7000)
+ lib._stm_large_dump()
d2 = lib._stm_large_malloc(8000)
print d1
print d2
@@ -70,7 +72,7 @@
lib._stm_large_dump()
def test_resize_arena_reduce_2(self):
- lib._stm_large_malloc(self.size // 2 - 64)
+ lib._stm_large_malloc(self.size // 2 - 80)
r = lib._stm_largemalloc_resize_arena(self.size // 2)
assert r == 1
lib._stm_large_dump()
@@ -120,7 +122,7 @@
p.append((d, sz, content1, content2))
lib._stm_large_dump()
- def test_random_largemalloc_sweep(self):
+ def test_random_largemalloc_sweep(self, constrained_size_range=False):
@ffi.callback("bool(char *)")
def keep(data):
try:
@@ -138,7 +140,11 @@
r = random.Random(1000)
for j in range(500):
- sizes = [random.choice(range(104, 500, 8)) for i in range(20)]
+ if constrained_size_range:
+ max = 120
+ else:
+ max = 500
+ sizes = [random.choice(range(104, max, 8)) for i in range(20)]
all = [lib._stm_large_malloc(size) for size in sizes]
print all
@@ -170,3 +176,6 @@
assert all[i][50] == chr(65 + i)
else:
assert all_orig[i][50] == '\xDE'
+
+ def test_random_largemalloc_sweep_constrained_size_range(self):
+ self.test_random_largemalloc_sweep(constrained_size_range=True)
diff --git a/c7/test/test_weakref.py b/c7/test/test_weakref.py
--- a/c7/test/test_weakref.py
+++ b/c7/test/test_weakref.py
@@ -360,3 +360,40 @@
self.switch(1)
make_wr()
stm_major_collect()
+
+
+class TestManyThreads(BaseTest):
+ NB_THREADS = NB_SEGMENTS
+
+ def test_weakref_bug3(self):
+ # make an object
+ self.start_transaction()
+ lp0 = stm_allocate(16)
+ self.push_root(lp0)
+ self.commit_transaction()
+ lp0 = self.pop_root()
+ self.push_root(lp0)
+ #
+ # privatize the page in all segments
+ for i in range(NB_SEGMENTS-1, -1, -1):
+ self.switch(i)
+ self.start_transaction()
+ stm_set_char(lp0, 'A')
+ self.commit_transaction()
+ #
+ self.start_transaction()
+ lp2 = stm_allocate(16)
+ self.push_root(lp2)
+ lp1 = stm_allocate_weakref(lp2)
+ self.push_root(lp1)
+ self.commit_transaction()
+ lp1 = self.pop_root()
+ lp2 = self.pop_root()
+ self.push_root(lp2)
+ self.push_root(lp1)
+ # the commit copies the weakref to all segments, but misses
+ # segment #0
+ #
+ self.start_transaction()
+ stm_major_collect() # reshare all, keeping only segment #0
+ assert stm_get_weakref(lp1) == lp2
More information about the pypy-commit
mailing list