[pypy-commit] stmgc c8-private-pages: start with largemalloc support (WIP)
Raemi
noreply at buildbot.pypy.org
Wed Jan 14 14:08:42 CET 2015
Author: Remi Meier <remi.meier at inf.ethz.ch>
Branch: c8-private-pages
Changeset: r1525:349d0e3910ea
Date: 2015-01-14 13:41 +0100
http://bitbucket.org/pypy/stmgc/changeset/349d0e3910ea/
Log: start with largemalloc support (WIP)
diff --git a/c8/stm/core.c b/c8/stm/core.c
--- a/c8/stm/core.c
+++ b/c8/stm/core.c
@@ -144,11 +144,18 @@
most_recent_rev = log_entry->rev_num;
}
}
- OPT_ASSERT(copy_from_segnum != -1 && copy_from_segnum != my_segnum);
+ OPT_ASSERT(copy_from_segnum != my_segnum);
- /* make our page private */
+ /* make our page write-ready */
page_mark_accessible(my_segnum, pagenum);
- assert(get_page_status_in(my_segnum, pagenum) == PAGE_ACCESSIBLE);
+
+ if (copy_from_segnum == -1) {
+ /* this page is only accessible in the sharing segment so far (new
+ allocation). We can thus simply mark it accessible here and
+ not care about its contents so far. */
+ release_all_privatization_locks();
+ return;
+ }
/* before copying anything, acquire modification locks from our and
the other segment */
diff --git a/c8/stm/gcpage.c b/c8/stm/gcpage.c
--- a/c8/stm/gcpage.c
+++ b/c8/stm/gcpage.c
@@ -2,27 +2,47 @@
# error "must be compiled via stmgc.c"
#endif
+static struct list_s *testing_prebuilt_objs = NULL;
+static struct tree_s *tree_prebuilt_objs = NULL; /* XXX refactor */
+
static void setup_gcpage(void)
{
+ char *base = stm_object_pages + END_NURSERY_PAGE * 4096UL;
+ uintptr_t length = (NB_PAGES - END_NURSERY_PAGE) * 4096UL;
+ _stm_largemalloc_init_arena(base, length);
+
uninitialized_page_start = stm_object_pages + END_NURSERY_PAGE * 4096UL;
uninitialized_page_stop = uninitialized_page_start + NB_SHARED_PAGES * 4096UL;
}
static void teardown_gcpage(void)
{
+ LIST_FREE(testing_prebuilt_objs);
+ if (tree_prebuilt_objs != NULL) {
+ tree_free(tree_prebuilt_objs);
+ tree_prebuilt_objs = NULL;
+ }
}
+
+
static void setup_N_pages(char *pages_addr, long num)
{
- /* initialize to |N|P|N|N| */
+ /* make pages accessible in sharing segment only (pages already
+ PROT_READ/WRITE (see setup.c), but not marked accessible as page
+ status). */
+
+ /* lock acquiring maybe not necessary because the affected pages don't
+ need privatization protection. (but there is an assert right
+ now to enforce that XXXXXX) */
acquire_all_privatization_locks();
uintptr_t p = (pages_addr - stm_object_pages) / 4096UL;
dprintf(("setup_N_pages(%p, %lu): pagenum %lu\n", pages_addr, num, p));
while (num-->0) {
/* XXX: page_range_mark_accessible() */
- page_mark_accessible(STM_SEGMENT->segment_num, p + num);
+ page_mark_accessible(0, p + num);
}
release_all_privatization_locks();
@@ -33,14 +53,23 @@
static stm_char *allocate_outside_nursery_large(uint64_t size)
{
- /* XXX: real allocation */
+ /* Allocate the object with largemalloc.c from the lower addresses. */
+ char *addr = _stm_large_malloc(size);
+ if (addr == NULL)
+ stm_fatalerror("not enough memory!");
+
+ if (LIKELY(addr + size <= uninitialized_page_start))
+ return (stm_char*)(addr - stm_object_pages);
+
+
+ /* uncommon case: need to initialize some more pages */
spinlock_acquire(lock_growth_large);
- char *addr = uninitialized_page_start;
char *start = uninitialized_page_start;
- if (addr + size > start) { /* XXX: always for now */
+ if (addr + size > start) {
uintptr_t npages;
- npages = (addr + size - start) / 4096UL + 1;
+ npages = (addr + size - start) / 4096UL;
+ npages += GCPAGE_NUM_PAGES;
if (uninitialized_page_stop - start < npages * 4096UL) {
stm_fatalerror("out of memory!"); /* XXX */
}
diff --git a/c8/stm/largemalloc.c b/c8/stm/largemalloc.c
new file mode 100644
--- /dev/null
+++ b/c8/stm/largemalloc.c
@@ -0,0 +1,623 @@
+#ifndef _STM_CORE_H_
+# error "must be compiled via stmgc.c"
+#endif
+
+/* This contains a lot of inspiration from malloc() in the GNU C Library.
+ More precisely, this is (a subset of) the part that handles large
+ blocks, which in our case means at least 288 bytes. It is actually
+ a general allocator, although it doesn't contain any of the small-
+ or medium-block support that are also present in the GNU C Library.
+*/
+
+#define largebin_index(sz) \
+ (((sz) < (48 << 6)) ? ((sz) >> 6): /* 0 - 47 */ \
+ ((sz) < (24 << 9)) ? 42 + ((sz) >> 9): /* 48 - 65 */ \
+ ((sz) < (12 << 12)) ? 63 + ((sz) >> 12): /* 66 - 74 */ \
+ ((sz) < (6 << 15)) ? 74 + ((sz) >> 15): /* 75 - 79 */ \
+ ((sz) < (3 << 18)) ? 80 + ((sz) >> 18): /* 80 - 82 */ \
+ 83)
+#define N_BINS 84
+#define LAST_BIN_INDEX(sz) ((sz) >= (3 << 18))
+
+typedef struct dlist_s {
+ struct dlist_s *next; /* a circular doubly-linked list */
+ struct dlist_s *prev;
+} dlist_t;
+
+typedef struct ulist_s {
+ struct ulist_s *up; /* a non-circular doubly-linked list */
+ struct ulist_s *down;
+} ulist_t;
+
+typedef struct malloc_chunk {
+ size_t prev_size; /* - if the previous chunk is free: size of its data
+ - otherwise, if this chunk is free: 1
+ - otherwise, 0. */
+ size_t size; /* size of the data in this chunk */
+
+ dlist_t d; /* if free: a doubly-linked list 'largebins' */
+ /* if not free: the user data starts here */
+ ulist_t u; /* if free, if unsorted: up==UU_UNSORTED
+ if free, if sorted: a doubly-linked list */
+
+ /* The chunk has a total size of 'size'. It is immediately followed
+ in memory by another chunk. This list ends with the last "chunk"
+ being actually only two words long, with END_MARKER as 'size'.
+ Both this last chunk and the theoretical chunk before the first
+ one are considered "not free". */
+} mchunk_t;
+
+#define UU_UNSORTED ((ulist_t *) 1)
+#define THIS_CHUNK_FREE 1
+#define BOTH_CHUNKS_USED 0
+#define CHUNK_HEADER_SIZE offsetof(struct malloc_chunk, d)
+#define END_MARKER 0xDEADBEEF
+#define MIN_ALLOC_SIZE (sizeof(struct malloc_chunk) - CHUNK_HEADER_SIZE)
+
+#define chunk_at_offset(p, ofs) ((mchunk_t *)(((char *)(p)) + (ofs)))
+#define data2chunk(p) chunk_at_offset(p, -CHUNK_HEADER_SIZE)
+#define updown2chunk(p) chunk_at_offset(p, \
+ -(CHUNK_HEADER_SIZE + sizeof(dlist_t)))
+
+static mchunk_t *next_chunk(mchunk_t *p)
+{
+ return chunk_at_offset(p, CHUNK_HEADER_SIZE + p->size);
+}
+
+
+/* The free chunks are stored in "bins". Each bin is a doubly-linked
+ list of chunks. There are 84 bins, with largebin_index() giving the
+ correspondence between sizes and bin indices.
+
+ Each free chunk is preceeded in memory by a non-free chunk (or no
+ chunk at all). Each free chunk is followed in memory by a non-free
+ chunk (or no chunk at all). Chunks are consolidated with their
+ neighbors to ensure this.
+
+ In each bin's doubly-linked list, chunks are sorted by their size in
+ decreasing order (if you follow 'largebins[n].next',
+ 'largebins[n].next->next', etc.). At the end of this list are some
+ unsorted chunks. All unsorted chunks are after all sorted chunks.
+ Unsorted chunks are distinguished by having 'u.up == UU_UNSORTED'.
+
+ Note that if the user always calls large_malloc() with a large
+ enough argument, then the few bins corresponding to smaller values
+ will never be sorted at all. They are still populated with the
+ fragments of space between bigger allocations.
+
+ Following the 'd' linked list, we get only one chunk of every size.
+ The additional chunks of a given size are linked "vertically" in
+ the secondary 'u' doubly-linked list.
+
+
+ +-----+
+ | 296 |
+ +-----+
+ ^ |
+ | v
+ +-----+ +-----+
+ | 296 | | 288 |
+ +-----+ +-----+
+ ^ | ^ | UU_UNSORTED
+ | v | v |
+ largebins +-----+ +-----+ +-----+ +-----+ largebins
+ [4].next <-> | 304 | <-> | 296 | <-> | 288 | <-> | 296 | <-> [4].prev
+ +-----+ +-----+ +-----+ +-----+
+
+*/
+
+
+static struct {
+ int lock;
+ mchunk_t *first_chunk, *last_chunk;
+ dlist_t largebins[N_BINS];
+} lm __attribute__((aligned(64)));
+
+
+static void lm_lock(void)
+{
+ spinlock_acquire(lm.lock);
+}
+
+static void lm_unlock(void)
+{
+ spinlock_release(lm.lock);
+}
+
+
+static void insert_unsorted(mchunk_t *new)
+{
+ size_t index = LAST_BIN_INDEX(new->size) ? N_BINS - 1
+ : largebin_index(new->size);
+ new->d.next = &lm.largebins[index];
+ new->d.prev = lm.largebins[index].prev;
+ new->d.prev->next = &new->d;
+ new->u.up = UU_UNSORTED;
+ new->u.down = NULL;
+ lm.largebins[index].prev = &new->d;
+}
+
+static int compare_chunks(const void *vchunk1, const void *vchunk2)
+{
+ /* sort by size */
+ mchunk_t *chunk1 = *(mchunk_t *const *)vchunk1;
+ mchunk_t *chunk2 = *(mchunk_t *const *)vchunk2;
+ if (chunk1->size < chunk2->size)
+ return -1;
+ if (chunk1->size == chunk2->size)
+ return 0;
+ else
+ return +1;
+}
+
+#define MAX_STACK_COUNT 64
+
+static void really_sort_bin(size_t index)
+{
+ dlist_t *unsorted = lm.largebins[index].prev;
+ dlist_t *end = &lm.largebins[index];
+ dlist_t *scan = unsorted->prev;
+ size_t count = 1;
+ while (scan != end && data2chunk(scan)->u.up == UU_UNSORTED) {
+ scan = scan->prev;
+ ++count;
+ }
+ end->prev = scan;
+ scan->next = end;
+
+ mchunk_t *chunk1;
+ mchunk_t *chunk_array[MAX_STACK_COUNT];
+ mchunk_t **chunks = chunk_array;
+
+ if (count == 1) {
+ chunk1 = data2chunk(unsorted); /* common case */
+ count = 0;
+ }
+ else {
+ if (count > MAX_STACK_COUNT) {
+ chunks = malloc(count * sizeof(mchunk_t *));
+ if (chunks == NULL) {
+ stm_fatalerror("out of memory"); // XXX
+ }
+ }
+ size_t i;
+ for (i = 0; i < count; i++) {
+ chunks[i] = data2chunk(unsorted);
+ unsorted = unsorted->prev;
+ }
+ assert(unsorted == scan);
+ qsort(chunks, count, sizeof(mchunk_t *), compare_chunks);
+
+ chunk1 = chunks[--count];
+ }
+ size_t search_size = chunk1->size;
+ dlist_t *head = lm.largebins[index].next;
+
+ while (1) {
+ if (head == end || data2chunk(head)->size < search_size) {
+ /* insert 'chunk1' here, before the current head */
+ head->prev->next = &chunk1->d;
+ chunk1->d.prev = head->prev;
+ head->prev = &chunk1->d;
+ chunk1->d.next = head;
+ chunk1->u.up = NULL;
+ chunk1->u.down = NULL;
+ head = &chunk1->d;
+ }
+ else if (data2chunk(head)->size == search_size) {
+ /* insert 'chunk1' vertically in the 'u' list */
+ ulist_t *uhead = &data2chunk(head)->u;
+ chunk1->u.up = uhead->up;
+ chunk1->u.down = uhead;
+ if (uhead->up != NULL)
+ uhead->up->down = &chunk1->u;
+ uhead->up = &chunk1->u;
+#ifndef NDEBUG
+ chunk1->d.next = (dlist_t *)0x42; /* not used */
+ chunk1->d.prev = (dlist_t *)0x42;
+#endif
+ }
+ else {
+ head = head->next;
+ continue;
+ }
+ if (count == 0)
+ break; /* all done */
+ chunk1 = chunks[--count];
+ search_size = chunk1->size;
+ }
+
+ if (chunks != chunk_array)
+ free(chunks);
+}
+
+static void sort_bin(size_t index)
+{
+ dlist_t *last = lm.largebins[index].prev;
+ if (last != &lm.largebins[index] && data2chunk(last)->u.up == UU_UNSORTED)
+ really_sort_bin(index);
+}
+
+static void unlink_chunk(mchunk_t *mscan)
+{
+ if (mscan->u.down != NULL) {
+ /* unlink mscan from the vertical list 'u' */
+ ulist_t *up = mscan->u.up;
+ ulist_t *down = mscan->u.down;
+ down->up = up;
+ if (up != NULL) up->down = down;
+ }
+ else {
+ dlist_t *prev = mscan->d.prev;
+ dlist_t *next = mscan->d.next;
+ if (mscan->u.up == NULL || mscan->u.up == UU_UNSORTED) {
+ /* unlink mscan from the doubly-linked list 'd' */
+ next->prev = prev;
+ prev->next = next;
+ }
+ else {
+ /* relink in the 'd' list the item above me */
+ mchunk_t *above = updown2chunk(mscan->u.up);
+ next->prev = &above->d;
+ prev->next = &above->d;
+ above->d.next = next;
+ above->d.prev = prev;
+ above->u.down = NULL;
+ }
+ }
+}
+
+char *_stm_large_malloc(size_t request_size)
+{
+ /* 'request_size' should already be a multiple of the word size here */
+ assert((request_size & (sizeof(char *)-1)) == 0);
+
+ /* it can be very small, but we need to ensure a minimal size
+ (currently 32 bytes) */
+ if (request_size < MIN_ALLOC_SIZE)
+ request_size = MIN_ALLOC_SIZE;
+
+ lm_lock();
+
+ size_t index = largebin_index(request_size);
+ sort_bin(index);
+
+ /* scan through the chunks of current bin in reverse order
+ to find the smallest that fits. */
+ dlist_t *scan = lm.largebins[index].prev;
+ dlist_t *end = &lm.largebins[index];
+ mchunk_t *mscan;
+ while (scan != end) {
+ mscan = data2chunk(scan);
+ assert(mscan->prev_size == THIS_CHUNK_FREE);
+ assert(next_chunk(mscan)->prev_size == mscan->size);
+ assert(IMPLY(mscan->d.prev != end,
+ data2chunk(mscan->d.prev)->size > mscan->size));
+
+ if (mscan->size >= request_size)
+ goto found;
+ scan = mscan->d.prev;
+ }
+
+ /* search now through all higher bins. We only need to take the
+ smallest item of the first non-empty bin, as it will be large
+ enough. */
+ while (++index < N_BINS) {
+ if (lm.largebins[index].prev != &lm.largebins[index]) {
+ /* non-empty bin. */
+ sort_bin(index);
+ scan = lm.largebins[index].prev;
+ mscan = data2chunk(scan);
+ goto found;
+ }
+ }
+
+ /* not enough memory. */
+ lm_unlock();
+ return NULL;
+
+ found:
+ assert(mscan->size >= request_size);
+ assert(mscan->u.up != UU_UNSORTED);
+
+ if (mscan->u.up != NULL) {
+ /* fast path: grab the item that is just above, to avoid needing
+ to rearrange the 'd' list */
+ mchunk_t *above = updown2chunk(mscan->u.up);
+ ulist_t *two_above = above->u.up;
+ mscan->u.up = two_above;
+ if (two_above != NULL) two_above->down = &mscan->u;
+ mscan = above;
+ }
+ else {
+ unlink_chunk(mscan);
+ }
+
+ size_t remaining_size = mscan->size - request_size;
+ if (remaining_size < sizeof(struct malloc_chunk)) {
+ next_chunk(mscan)->prev_size = BOTH_CHUNKS_USED;
+ request_size = mscan->size;
+ }
+ else {
+ /* only part of the chunk is being used; reduce the size
+ of 'mscan' down to 'request_size', and create a new
+ chunk of the 'remaining_size' afterwards */
+ mchunk_t *new = chunk_at_offset(mscan, CHUNK_HEADER_SIZE +
+ request_size);
+ new->prev_size = THIS_CHUNK_FREE;
+ size_t remaining_data_size = remaining_size - CHUNK_HEADER_SIZE;
+ new->size = remaining_data_size;
+ next_chunk(new)->prev_size = remaining_data_size;
+ insert_unsorted(new);
+ }
+ mscan->size = request_size;
+ mscan->prev_size = BOTH_CHUNKS_USED;
+#ifndef NDEBUG
+ memset((char *)&mscan->d, 0xda, request_size);
+#endif
+
+ lm_unlock();
+
+ return (char *)&mscan->d;
+}
+
+static void _large_free(mchunk_t *chunk)
+{
+ assert((chunk->size & (sizeof(char *) - 1)) == 0);
+ assert(chunk->prev_size != THIS_CHUNK_FREE);
+
+ /* 'size' is at least MIN_ALLOC_SIZE */
+
+#ifndef NDEBUG
+ {
+ char *data = (char *)&chunk->d;
+ assert(chunk->size >= sizeof(dlist_t));
+ assert(chunk->size <= (((char *)lm.last_chunk) - data));
+ memset(data, 0xDE, chunk->size);
+ }
+#endif
+
+ /* try to merge with the following chunk in memory */
+ size_t msize = chunk->size + CHUNK_HEADER_SIZE;
+ mchunk_t *mscan = chunk_at_offset(chunk, msize);
+
+ if (mscan->prev_size == BOTH_CHUNKS_USED) {
+ assert((mscan->size & (sizeof(char *) - 1)) == 0);
+ mscan->prev_size = chunk->size;
+ }
+ else {
+ size_t fsize = mscan->size;
+ mchunk_t *fscan = chunk_at_offset(mscan, fsize + CHUNK_HEADER_SIZE);
+
+ /* unlink the following chunk */
+ unlink_chunk(mscan);
+#ifndef NDEBUG
+ mscan->prev_size = (size_t)-258; /* 0xfffffffffffffefe */
+ mscan->size = (size_t)-515; /* 0xfffffffffffffdfd */
+#endif
+
+ /* merge the two chunks */
+ assert(fsize == fscan->prev_size);
+ fsize += msize;
+ fscan->prev_size = fsize;
+ chunk->size = fsize;
+ }
+
+ /* try to merge with the previous chunk in memory */
+ if (chunk->prev_size == BOTH_CHUNKS_USED) {
+ chunk->prev_size = THIS_CHUNK_FREE;
+ }
+ else {
+ assert((chunk->prev_size & (sizeof(char *) - 1)) == 0);
+
+ /* get at the previous chunk */
+ msize = chunk->prev_size + CHUNK_HEADER_SIZE;
+ mscan = chunk_at_offset(chunk, -msize);
+ assert(mscan->prev_size == THIS_CHUNK_FREE);
+ assert(mscan->size == chunk->prev_size);
+
+ /* unlink the previous chunk */
+ unlink_chunk(mscan);
+
+ /* merge the two chunks */
+ mscan->size = msize + chunk->size;
+ next_chunk(mscan)->prev_size = mscan->size;
+
+ assert(chunk->prev_size = (size_t)-1);
+ assert(chunk->size = (size_t)-1);
+ chunk = mscan;
+ }
+
+ insert_unsorted(chunk);
+}
+
+void _stm_large_free(char *data)
+{
+ lm_lock();
+ _large_free(data2chunk(data));
+ lm_unlock();
+}
+
+
+void _stm_large_dump(void)
+{
+ lm_lock();
+ char *data = ((char *)lm.first_chunk) + 16;
+ size_t prev_size_if_free = 0;
+ fprintf(stderr, "\n");
+ while (1) {
+ assert((((uintptr_t)data) & 7) == 0); /* alignment */
+ fprintf(stderr, "[ %p: %zu", data - 16, *(size_t*)(data - 16));
+ if (prev_size_if_free == 0) {
+ assert(*(size_t*)(data - 16) == THIS_CHUNK_FREE ||
+ *(size_t*)(data - 16) == BOTH_CHUNKS_USED);
+ if (*(size_t*)(data - 16) == THIS_CHUNK_FREE)
+ prev_size_if_free = (*(size_t*)(data - 8));
+ }
+ else {
+ assert(*(size_t*)(data - 16) == prev_size_if_free);
+ prev_size_if_free = 0;
+ }
+ if (*(size_t*)(data - 8) == END_MARKER)
+ break;
+ if (prev_size_if_free) {
+ fprintf(stderr, " \t(up %p / down %p)",
+ *(void **)(data + 16), *(void **)(data + 24));
+ }
+ fprintf(stderr, "\n %p: %zu ]", data - 8, *(size_t*)(data - 8));
+ if (prev_size_if_free) {
+ fprintf(stderr, "\t(prev %p <-> next %p)\n",
+ *(void **)(data + 8), *(void **)data);
+ }
+ else {
+ fprintf(stderr, "\n");
+ }
+ assert(*(ssize_t*)(data - 8) >= 16);
+ data += *(size_t*)(data - 8);
+ data += 16;
+ }
+ fprintf(stderr, "\n %p: end. ]\n\n", data - 8);
+ assert(data - 16 == (char *)lm.last_chunk);
+ lm_unlock();
+}
+
+char *_stm_largemalloc_data_start(void)
+{
+ return (char *)lm.first_chunk;
+}
+
+#ifdef STM_LARGEMALLOC_TEST
+bool (*_stm_largemalloc_keep)(char *data); /* a hook for tests */
+#endif
+
+void _stm_largemalloc_init_arena(char *data_start, size_t data_size)
+{
+ int i;
+ for (i = 0; i < N_BINS; i++) {
+ lm.largebins[i].prev = &lm.largebins[i];
+ lm.largebins[i].next = &lm.largebins[i];
+ }
+
+ assert(data_size >= 2 * sizeof(struct malloc_chunk));
+ assert((data_size & 31) == 0);
+ lm.first_chunk = (mchunk_t *)data_start;
+ lm.first_chunk->prev_size = THIS_CHUNK_FREE;
+ lm.first_chunk->size = data_size - 2 * CHUNK_HEADER_SIZE;
+ lm.last_chunk = chunk_at_offset(lm.first_chunk,
+ data_size - CHUNK_HEADER_SIZE);
+ lm.last_chunk->prev_size = lm.first_chunk->size;
+ lm.last_chunk->size = END_MARKER;
+ assert(lm.last_chunk == next_chunk(lm.first_chunk));
+ lm.lock = 0;
+
+ insert_unsorted(lm.first_chunk);
+
+#ifdef STM_LARGEMALLOC_TEST
+ _stm_largemalloc_keep = NULL;
+#endif
+}
+
+int _stm_largemalloc_resize_arena(size_t new_size)
+{
+ int result = 0;
+ lm_lock();
+
+ if (new_size < 2 * sizeof(struct malloc_chunk))
+ goto fail;
+ OPT_ASSERT((new_size & 31) == 0);
+
+ new_size -= CHUNK_HEADER_SIZE;
+ mchunk_t *new_last_chunk = chunk_at_offset(lm.first_chunk, new_size);
+ mchunk_t *old_last_chunk = lm.last_chunk;
+ size_t old_size = ((char *)old_last_chunk) - (char *)lm.first_chunk;
+
+ if (new_size < old_size) {
+ /* check if there is enough free space at the end to allow
+ such a reduction */
+ size_t lsize = lm.last_chunk->prev_size;
+ assert(lsize != THIS_CHUNK_FREE);
+ if (lsize == BOTH_CHUNKS_USED)
+ goto fail;
+ lsize += CHUNK_HEADER_SIZE;
+ mchunk_t *prev_chunk = chunk_at_offset(lm.last_chunk, -lsize);
+ if (((char *)new_last_chunk) < ((char *)prev_chunk) +
+ sizeof(struct malloc_chunk))
+ goto fail;
+
+ /* unlink the prev_chunk from the doubly-linked list */
+ unlink_chunk(prev_chunk);
+
+ /* reduce the prev_chunk */
+ assert(prev_chunk->size == lm.last_chunk->prev_size);
+ prev_chunk->size = ((char*)new_last_chunk) - (char *)prev_chunk
+ - CHUNK_HEADER_SIZE;
+
+ /* make a fresh-new last chunk */
+ new_last_chunk->prev_size = prev_chunk->size;
+ new_last_chunk->size = END_MARKER;
+ lm.last_chunk = new_last_chunk;
+ assert(lm.last_chunk == next_chunk(prev_chunk));
+
+ insert_unsorted(prev_chunk);
+ }
+ else if (new_size > old_size) {
+ /* make the new last chunk first, with only the extra size */
+ mchunk_t *old_last_chunk = lm.last_chunk;
+ old_last_chunk->size = (new_size - old_size) - CHUNK_HEADER_SIZE;
+ new_last_chunk->prev_size = BOTH_CHUNKS_USED;
+ new_last_chunk->size = END_MARKER;
+ lm.last_chunk = new_last_chunk;
+ assert(lm.last_chunk == next_chunk(old_last_chunk));
+
+ /* then free the last_chunk (turn it from "used" to "free) */
+ _large_free(old_last_chunk);
+ }
+
+ result = 1;
+ fail:
+ lm_unlock();
+ return result;
+}
+
+
+static inline bool _largemalloc_sweep_keep(mchunk_t *chunk)
+{
+#ifdef STM_LARGEMALLOC_TEST
+ if (_stm_largemalloc_keep != NULL)
+ return _stm_largemalloc_keep((char *)&chunk->d);
+#endif
+ return true;
+ //XXX: return largemalloc_keep_object_at((char *)&chunk->d);
+}
+
+void _stm_largemalloc_sweep(void)
+{
+ lm_lock();
+
+ /* This may be slightly optimized by inlining _large_free() and
+ making cases, e.g. we might know already if the previous block
+ was free or not. It's probably not really worth it. */
+ mchunk_t *mnext, *chunk = lm.first_chunk;
+
+ if (chunk->prev_size == THIS_CHUNK_FREE)
+ chunk = next_chunk(chunk); /* go to the first non-free chunk */
+
+ while (chunk != lm.last_chunk) {
+ /* here, the chunk we're pointing to is not free */
+ assert(chunk->prev_size != THIS_CHUNK_FREE);
+
+ /* first figure out the next non-free chunk */
+ mnext = next_chunk(chunk);
+ if (mnext->prev_size == THIS_CHUNK_FREE)
+ mnext = next_chunk(mnext);
+
+ /* use the callback to know if 'chunk' contains an object that
+ survives or dies */
+ if (!_largemalloc_sweep_keep(chunk)) {
+ _large_free(chunk); /* dies */
+ }
+ chunk = mnext;
+ }
+
+ lm_unlock();
+}
diff --git a/c8/stm/largemalloc.h b/c8/stm/largemalloc.h
new file mode 100644
--- /dev/null
+++ b/c8/stm/largemalloc.h
@@ -0,0 +1,18 @@
+
+/* all addresses passed to this interface should be "char *" pointers
+ in the segment 0. */
+void _stm_largemalloc_init_arena(char *data_start, size_t data_size);
+int _stm_largemalloc_resize_arena(size_t new_size);
+char *_stm_largemalloc_data_start(void);
+
+/* large_malloc() and large_free() are not thread-safe. This is
+ due to the fact that they should be mostly called during minor or
+ major collections, which have their own synchronization mecanisms. */
+char *_stm_large_malloc(size_t request_size);
+void _stm_large_free(char *data);
+void _stm_largemalloc_sweep(void);
+
+void _stm_large_dump(void);
+
+
+#define LARGE_MALLOC_OVERHEAD (2 * sizeof(size_t)) /* estimate */
diff --git a/c8/stm/setup.c b/c8/stm/setup.c
--- a/c8/stm/setup.c
+++ b/c8/stm/setup.c
@@ -31,6 +31,11 @@
(NB_READMARKER_PAGES + NB_NURSERY_PAGES) * 4096,
PROT_READ | PROT_WRITE);
}
+
+ /* make the sharing segment writable for the memory allocator: */
+ mprotect(stm_object_pages + END_NURSERY_PAGE * 4096UL,
+ (NB_PAGES - END_NURSERY_PAGE) * 4096UL,
+ PROT_READ | PROT_WRITE);
}
diff --git a/c8/stm/smallmalloc.c b/c8/stm/smallmalloc.c
--- a/c8/stm/smallmalloc.c
+++ b/c8/stm/smallmalloc.c
@@ -67,23 +67,17 @@
/* if (!_stm_largemalloc_resize_arena(uninitialized_page_stop - base)) */
/* goto out_of_memory; */
- /* lock acquiring not necessary because the affected pages don't
- need privatization protection. (but there is an assert right
- now to enforce that XXXXXX) */
- acquire_all_privatization_locks();
+ /* make writable in sharing seg */
+ setup_N_pages(uninitialized_page_stop, GCPAGE_NUM_PAGES);
char *p = uninitialized_page_stop;
long i;
for (i = 0; i < GCPAGE_NUM_PAGES; i++) {
- /* accessible in seg0: */
- page_mark_accessible(0, (p - stm_object_pages) / 4096UL);
-
/* add to free_uniform_pages list */
((struct small_free_loc_s *)p)->nextpage = free_uniform_pages;
free_uniform_pages = (struct small_free_loc_s *)p;
p += 4096;
}
- release_all_privatization_locks();
}
spinlock_release(gmfp_lock);
@@ -128,17 +122,6 @@
smallpage->nextpage)))
goto retry;
-
-
- /* lock acquiring not necessary because the affected pages don't
- need privatization protection. (but there is an assert right
- now to enforce that XXXXXX) */
- acquire_all_privatization_locks();
- /* make page accessible in our segment too: */
- page_mark_accessible(STM_SEGMENT->segment_num,
- ((char*)smallpage - stm_object_pages) / 4096UL);
- release_all_privatization_locks();
-
/* Succeeded: we have a page in 'smallpage', which is not
initialized so far, apart from the 'nextpage' field read
above. Initialize it.
@@ -315,6 +298,7 @@
void _stm_smallmalloc_sweep(void)
{
+ acquire_all_privatization_locks(); /* should be done outside, but tests... */
long i, szword;
for (szword = 2; szword < GC_N_SMALL_REQUESTS; szword++) {
struct small_free_loc_s *page = small_page_lists[szword];
@@ -362,4 +346,5 @@
sweep_small_page(pageptr, NULL, sz);
}
}
+ release_all_privatization_locks();
}
diff --git a/c8/stmgc.c b/c8/stmgc.c
--- a/c8/stmgc.c
+++ b/c8/stmgc.c
@@ -6,6 +6,7 @@
#include "stm/core.h"
#include "stm/pagecopy.h"
#include "stm/pages.h"
+#include "stm/largemalloc.h"
#include "stm/gcpage.h"
#include "stm/sync.h"
#include "stm/setup.h"
@@ -20,6 +21,7 @@
#include "stm/pagecopy.c"
#include "stm/pages.c"
#include "stm/prebuilt.c"
+#include "stm/largemalloc.c"
#include "stm/gcpage.c"
#include "stm/nursery.c"
#include "stm/sync.c"
diff --git a/c8/stmgc.h b/c8/stmgc.h
--- a/c8/stmgc.h
+++ b/c8/stmgc.h
@@ -79,6 +79,16 @@
void _stm_test_switch_segment(int segnum);
void _push_obj_to_other_segments(object_t *obj);
+void _stm_largemalloc_init_arena(char *data_start, size_t data_size);
+int _stm_largemalloc_resize_arena(size_t new_size);
+char *_stm_largemalloc_data_start(void);
+char *_stm_large_malloc(size_t request_size);
+void _stm_large_free(char *data);
+void _stm_large_dump(void);
+bool (*_stm_largemalloc_keep)(char *data);
+void _stm_largemalloc_sweep(void);
+
+
char *stm_object_pages;
char *stm_file_pages;
object_t *_stm_allocate_old_small(ssize_t size_rounded_up);
diff --git a/c8/test/support.py b/c8/test/support.py
--- a/c8/test/support.py
+++ b/c8/test/support.py
@@ -81,6 +81,15 @@
void stm_collect(long level);
void _stm_set_nursery_free_count(uint64_t free_count);
+void _stm_largemalloc_init_arena(char *data_start, size_t data_size);
+int _stm_largemalloc_resize_arena(size_t new_size);
+char *_stm_largemalloc_data_start(void);
+char *_stm_large_malloc(size_t request_size);
+void _stm_large_free(char *data);
+void _stm_large_dump(void);
+bool (*_stm_largemalloc_keep)(char *data);
+void _stm_largemalloc_sweep(void);
+
long stm_identityhash(object_t *obj);
long stm_id(object_t *obj);
diff --git a/c8/test/test_largemalloc.py b/c8/test/test_largemalloc.py
new file mode 100644
--- /dev/null
+++ b/c8/test/test_largemalloc.py
@@ -0,0 +1,181 @@
+from support import *
+import sys, random
+
+ra = lambda x: x # backward compat.
+
+class TestLargeMalloc(BaseTest):
+ def setup_method(self, meth):
+ # initialize some big heap in stm_setup()
+ BaseTest.setup_method(self, meth)
+
+ # now re-initialize the heap to 1MB with 0xcd in it
+ self.size = 1024 * 1024 # 1MB
+ self.rawmem = lib._stm_largemalloc_data_start()
+
+ lib.memset(self.rawmem, 0xcd, self.size)
+ lib._stm_largemalloc_init_arena(self.rawmem, self.size)
+
+ def test_simple(self):
+ #
+ lib._stm_large_dump()
+ d1 = lib._stm_large_malloc(7000)
+ lib._stm_large_dump()
+ d2 = lib._stm_large_malloc(8000)
+ print d1
+ print d2
+ assert ra(d2) - ra(d1) == 7016
+ d3 = lib._stm_large_malloc(9000)
+ assert ra(d3) - ra(d2) == 8016
+ #
+ lib._stm_large_free(d1)
+ lib._stm_large_free(d2)
+ #
+ d4 = lib._stm_large_malloc(600)
+ assert d4 == d1
+ d5 = lib._stm_large_malloc(600)
+ assert ra(d5) == ra(d4) + 616
+ #
+ lib._stm_large_free(d5)
+ #
+ d6 = lib._stm_large_malloc(600)
+ assert d6 == d5
+ #
+ lib._stm_large_free(d4)
+ #
+ d7 = lib._stm_large_malloc(608)
+ assert ra(d7) == ra(d6) + 616
+ d8 = lib._stm_large_malloc(600)
+ assert d8 == d4
+ #
+ lib._stm_large_dump()
+
+ def test_overflow_1(self):
+ d = lib._stm_large_malloc(self.size - 32)
+ assert ra(d) == self.rawmem + 16
+ lib._stm_large_dump()
+
+ def test_overflow_2(self):
+ d = lib._stm_large_malloc(self.size - 16)
+ assert d == ffi.NULL
+ lib._stm_large_dump()
+
+ def test_overflow_3(self):
+ d = lib._stm_large_malloc(sys.maxint & ~7)
+ assert d == ffi.NULL
+ lib._stm_large_dump()
+
+ def test_resize_arena_reduce_1(self):
+ r = lib._stm_largemalloc_resize_arena(self.size - 32)
+ assert r == 1
+ d = lib._stm_large_malloc(self.size - 32)
+ assert d == ffi.NULL
+ lib._stm_large_dump()
+
+ def test_resize_arena_reduce_2(self):
+ lib._stm_large_malloc(self.size // 2 - 80)
+ r = lib._stm_largemalloc_resize_arena(self.size // 2)
+ assert r == 1
+ lib._stm_large_dump()
+
+ def test_resize_arena_reduce_3(self):
+ d1 = lib._stm_large_malloc(128)
+ r = lib._stm_largemalloc_resize_arena(self.size // 2)
+ assert r == 1
+ d2 = lib._stm_large_malloc(128)
+ assert ra(d1) == self.rawmem + 16
+ assert ra(d2) == ra(d1) + 128 + 16
+ lib._stm_large_dump()
+
+ def test_resize_arena_cannot_reduce_1(self):
+ lib._stm_large_malloc(self.size // 2)
+ r = lib._stm_largemalloc_resize_arena(self.size // 2)
+ assert r == 0
+ lib._stm_large_dump()
+
+ def test_resize_arena_cannot_reduce_2(self):
+ lib._stm_large_malloc(self.size // 2 - 56)
+ r = lib._stm_largemalloc_resize_arena(self.size // 2)
+ assert r == 0
+ lib._stm_large_dump()
+
+ def test_random(self):
+ r = random.Random(1007)
+ p = []
+ for i in range(100000):
+ if len(p) != 0 and (len(p) > 100 or r.randrange(0, 5) < 2):
+ index = r.randrange(0, len(p))
+ d, length, content1, content2 = p.pop(index)
+ print ' free %5d (%s)' % (length, d)
+ assert ra(d)[0] == content1
+ assert ra(d)[length - 1] == content2
+ lib._stm_large_free(d)
+ else:
+ sz = r.randrange(8, 160) * 8
+ d = lib._stm_large_malloc(sz)
+ print 'alloc %5d (%s)' % (sz, d)
+ assert d != ffi.NULL
+ lib.memset(ra(d), 0xdd, sz)
+ content1 = chr(r.randrange(0, 256))
+ content2 = chr(r.randrange(0, 256))
+ ra(d)[0] = content1
+ ra(d)[sz - 1] = content2
+ p.append((d, sz, content1, content2))
+ lib._stm_large_dump()
+
+ def test_random_largemalloc_sweep(self, constrained_size_range=False):
+ @ffi.callback("bool(char *)")
+ def keep(data):
+ try:
+ if data in from_before:
+ return False
+ index = all.index(data)
+ seen_for.add(index)
+ return index in keep_me
+ except Exception, e:
+ errors.append(e)
+ raise
+ lib._stm_largemalloc_keep = keep
+ errors = []
+ from_before = set()
+
+ r = random.Random(1000)
+ for j in range(500):
+ if constrained_size_range:
+ max = 120
+ else:
+ max = 500
+ sizes = [random.choice(range(104, max, 8)) for i in range(20)]
+ all = [lib._stm_large_malloc(size) for size in sizes]
+ print all
+
+ for i in range(len(all)):
+ all[i][50] = chr(65 + i)
+ all_orig = all[:]
+
+ keep_me = set()
+ for i in range(len(all)):
+ if r.random() < 0.5:
+ print 'free:', all[i]
+ lib._stm_large_free(all[i])
+ all[i] = None
+ elif r.random() < 0.5:
+ keep_me.add(i)
+
+ seen_for = set()
+ lib._stm_largemalloc_sweep()
+ if errors:
+ raise errors[0]
+ assert seen_for == set([i for i in range(len(all))
+ if all[i] is not None])
+ lib._stm_large_dump()
+
+ from_before = [all[i] for i in keep_me]
+
+ for i in range(len(all)):
+ if i in keep_me:
+ assert all[i][50] == chr(65 + i)
+ else:
+ assert all_orig[i][50] == '\xDE'
+
+ def test_random_largemalloc_sweep_constrained_size_range(self):
+ self.test_random_largemalloc_sweep(constrained_size_range=True)
More information about the pypy-commit
mailing list