[pypy-commit] pypy stm: Import from arigo/hack/stm/c.
arigo
noreply at buildbot.pypy.org
Tue Sep 27 13:29:49 CEST 2011
Author: Armin Rigo <arigo at tunes.org>
Branch: stm
Changeset: r47621:29beb9a43433
Date: 2011-09-27 13:28 +0200
http://bitbucket.org/pypy/pypy/changeset/29beb9a43433/
Log: Import from arigo/hack/stm/c.
diff --git a/pypy/translator/stm/__init__.py b/pypy/translator/stm/__init__.py
new file mode 100644
diff --git a/pypy/translator/stm/_rffi_stm.py b/pypy/translator/stm/_rffi_stm.py
new file mode 100644
--- /dev/null
+++ b/pypy/translator/stm/_rffi_stm.py
@@ -0,0 +1,40 @@
+import py
+import os
+from pypy.tool.autopath import pypydir
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.translator.tool.cbuild import ExternalCompilationInfo
+
+
+cdir = py.path.local(pypydir) / 'translator' / 'stm'
+
+
+eci = ExternalCompilationInfo(
+ include_dirs = [cdir],
+ includes = ['src_stm/et.h'],
+ post_include_bits = [
+ r'''#define stm_begin_transaction_inline() ; \
+ jmp_buf _jmpbuf; \
+ setjmp(_jmpbuf); \
+ stm_begin_transaction(&_jmpbuf);
+ '''],
+ separate_module_sources = ['#include "src_stm/et.c"\n'],
+)
+
+def llexternal(name, args, result, **kwds):
+ return rffi.llexternal(name, args, result, compilation_info=eci,
+ _nowrapper=True, **kwds)
+
+
+descriptor_init = llexternal('stm_descriptor_init', [], lltype.Void)
+descriptor_done = llexternal('stm_descriptor_done', [], lltype.Void)
+
+begin_transaction = llexternal('stm_begin_transaction_inline',[], lltype.Void)
+commit_transaction = llexternal('stm_commit_transaction', [], lltype.Signed)
+
+read_word = llexternal('stm_read_word', [rffi.VOIDPP], rffi.VOIDP)
+write_word = llexternal('stm_write_word', [rffi.VOIDPP, rffi.VOIDP],
+ lltype.Void)
+
+CALLBACK = lltype.Ptr(lltype.FuncType([rffi.VOIDP], rffi.VOIDP))
+perform_transaction = llexternal('stm_perform_transaction',
+ [CALLBACK, rffi.VOIDP], rffi.VOIDP)
diff --git a/pypy/translator/stm/src_stm/atomic_ops.h b/pypy/translator/stm/src_stm/atomic_ops.h
new file mode 100644
--- /dev/null
+++ b/pypy/translator/stm/src_stm/atomic_ops.h
@@ -0,0 +1,45 @@
+
+
+/* "compiler fence" for preventing reordering of loads/stores to
+ non-volatiles */
+#define CFENCE asm volatile ("":::"memory")
+
+
+#ifdef __llvm__
+# define HAS_SYNC_BOOL_COMPARE_AND_SWAP
+#endif
+
+#ifdef __GNUC__
+# if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
+# define HAS_SYNC_BOOL_COMPARE_AND_SWAP
+# endif
+#endif
+
+
+#ifdef HAS_SYNC_BOOL_COMPARE_AND_SWAP
+# define bool_cas __sync_bool_compare_and_swap
+#else
+/* x86 (32 bits and 64 bits) */
+static inline _Bool
+bool_cas(volatile unsigned long* ptr, unsigned long old, unsigned long _new)
+{
+ unsigned long prev;
+ asm volatile("lock;"
+#if defined(__amd64__)
+ "cmpxchgq %1, %2;"
+#else
+ "cmpxchgl %1, %2;"
+#endif
+ : "=a"(prev)
+ : "q"(_new), "m"(*ptr), "a"(old)
+ : "memory");
+ return prev == old;
+}
+/* end */
+#endif
+
+
+static inline void spinloop(void)
+{
+ asm volatile ("pause");
+}
diff --git a/pypy/translator/stm/src_stm/et.c b/pypy/translator/stm/src_stm/et.c
new file mode 100644
--- /dev/null
+++ b/pypy/translator/stm/src_stm/et.c
@@ -0,0 +1,745 @@
+/* -*- c-basic-offset: 2 -*- */
+
+/* XXX assumes that time never wraps around (in a 'long'), which may be
+ * correct on 64-bit machines but not on 32-bit machines if the process
+ * runs for long enough.
+ *
+ * XXX measure the overhead of the global_timestamp
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+
+#define USE_PTHREAD_MUTEX /* optional */
+#ifdef USE_PTHREAD_MUTEX
+# include <pthread.h>
+#endif
+
+#include "src_stm/et.h"
+#include "src_stm/atomic_ops.h"
+
+/************************************************************/
+
+#define IS_LOCKED(num) ((num) < 0)
+#define IS_LOCKED_OR_NEWER(num, max_age) \
+ (((unsigned long)(num)) > ((unsigned long)(max_age)))
+typedef long owner_version_t;
+
+typedef struct {
+ owner_version_t v; // the current version number
+} orec_t;
+
+/*** Specify the number of orecs in the global array. */
+#define NUM_STRIPES 1048576
+
+/*** declare the table of orecs */
+static char orecs[NUM_STRIPES * sizeof(orec_t)];
+
+/*** map addresses to orec table entries */
+inline static volatile orec_t* get_orec(void* addr)
+{
+ unsigned long index = (unsigned long)addr;
+ char *p = orecs + (index & ((NUM_STRIPES-1) * sizeof(orec_t)));
+ return (volatile orec_t *)p;
+}
+
+#include "src_stm/lists.c"
+
+/************************************************************/
+
+/* Uncomment the line to try this extra code. Doesn't work reliably so far */
+/*#define COMMIT_OTHER_INEV*/
+
+#define ABORT_REASONS 7
+#define SPINLOOP_REASONS 10
+#define OTHERINEV_REASONS 5
+
+struct tx_descriptor {
+ jmp_buf *setjmp_buf;
+ owner_version_t start_time;
+ owner_version_t end_time;
+ unsigned long last_known_global_timestamp;
+ struct OrecList reads;
+ unsigned num_commits;
+ unsigned num_aborts[ABORT_REASONS];
+ unsigned num_spinloops[SPINLOOP_REASONS];
+#ifdef COMMIT_OTHER_INEV
+ unsigned num_otherinev[OTHERINEV_REASONS];
+#endif
+ unsigned int spinloop_counter;
+ owner_version_t my_lock_word;
+ struct RedoLog redolog; /* last item, because it's the biggest one */
+};
+
+/* global_timestamp contains in its lowest bit a flag equal to 1
+ if there is an inevitable transaction running */
+static volatile unsigned long global_timestamp = 2;
+static __thread struct tx_descriptor *thread_descriptor;
+#ifdef COMMIT_OTHER_INEV
+static struct tx_descriptor *volatile thread_descriptor_inev;
+static volatile unsigned long d_inev_checking = 0;
+#endif
+
+/************************************************************/
+
+static unsigned long get_global_timestamp(struct tx_descriptor *d)
+{
+ return (d->last_known_global_timestamp = global_timestamp);
+}
+
+static _Bool change_global_timestamp(struct tx_descriptor *d,
+ unsigned long old,
+ unsigned long new)
+{
+ if (bool_cas(&global_timestamp, old, new))
+ {
+ d->last_known_global_timestamp = new;
+ return 1;
+ }
+ return 0;
+}
+
+static void set_global_timestamp(struct tx_descriptor *d, unsigned long new)
+{
+ global_timestamp = new;
+ d->last_known_global_timestamp = new;
+}
+
+static void tx_abort(int);
+
+static void tx_spinloop(int num)
+{
+ unsigned int c;
+ int i;
+ struct tx_descriptor *d = thread_descriptor;
+ d->num_spinloops[num]++;
+
+ //printf("tx_spinloop(%d)\n", num);
+
+ c = d->spinloop_counter;
+ d->spinloop_counter = c * 9;
+ i = c & 0xff0000;
+ while (i >= 0) {
+ spinloop();
+ i -= 0x10000;
+ }
+}
+
+static _Bool is_inevitable(struct tx_descriptor *d)
+{
+ return d->setjmp_buf == NULL;
+}
+
+/*** run the redo log to commit a transaction, and release the locks */
+static void tx_redo(struct tx_descriptor *d)
+{
+ owner_version_t newver = d->end_time;
+ wlog_t *item;
+ /* loop in "forward" order: in this order, if there are duplicate orecs
+ then only the last one has p != -1. */
+ REDOLOG_LOOP_FORWARD(d->redolog, item)
+ {
+ *item->addr = item->val;
+ /* but we must only unlock the orec if it's the last time it
+ appears in the redolog list. If it's not, then p == -1. */
+ if (item->p != -1)
+ {
+ volatile orec_t* o = get_orec(item->addr);
+ CFENCE;
+ o->v = newver;
+ }
+ } REDOLOG_LOOP_END;
+}
+
+/*** on abort, release locks and restore the old version number. */
+static void releaseAndRevertLocks(struct tx_descriptor *d)
+{
+ wlog_t *item;
+ REDOLOG_LOOP_FORWARD(d->redolog, item)
+ {
+ if (item->p != -1)
+ {
+ volatile orec_t* o = get_orec(item->addr);
+ o->v = item->p;
+ }
+ } REDOLOG_LOOP_END;
+}
+
+/*** release locks and restore the old version number, ready to retry later */
+static void releaseLocksForRetry(struct tx_descriptor *d)
+{
+ wlog_t *item;
+ REDOLOG_LOOP_FORWARD(d->redolog, item)
+ {
+ if (item->p != -1)
+ {
+ volatile orec_t* o = get_orec(item->addr);
+ o->v = item->p;
+ item->p = -1;
+ }
+ } REDOLOG_LOOP_END;
+}
+
+/*** lock all locations */
+static void acquireLocks(struct tx_descriptor *d)
+{
+ wlog_t *item;
+ // try to lock every location in the write set
+ REDOLOG_LOOP_BACKWARD(d->redolog, item)
+ {
+ // get orec, read its version#
+ volatile orec_t* o = get_orec(item->addr);
+ owner_version_t ovt;
+
+ retry:
+ ovt = o->v;
+
+ // if orec not locked, lock it
+ //
+ // NB: if ovt > start time, we may introduce inconsistent
+ // reads. Since most writes are also reads, we'll just abort under this
+ // condition. This can introduce false conflicts
+ if (!IS_LOCKED_OR_NEWER(ovt, d->start_time)) {
+ if (!bool_cas(&o->v, ovt, d->my_lock_word))
+ goto retry;
+ // save old version to item->p. Now we hold the lock.
+ // in case of duplicate orecs, only the last one has p != -1.
+ item->p = ovt;
+ }
+ // else if the location is too recent...
+ else if (!IS_LOCKED(ovt))
+ tx_abort(0);
+ // else it is locked: if we don't hold the lock...
+ else if (ovt != d->my_lock_word) {
+ // we can either abort or spinloop. Because we are at the end of
+ // the transaction we might try to spinloop, even though after the
+ // lock is released the ovt will be very recent, possibly
+ // > d->start_time. It is necessary to spinloop in case we are
+ // inevitable, so use that as a criteria. Another solution to avoid
+ // deadlocks would be to sort the order in which we take the locks.
+ if (is_inevitable(d))
+ tx_spinloop(8);
+ else
+ tx_abort(6);
+ goto retry;
+ }
+ } REDOLOG_LOOP_END;
+}
+
+static void common_cleanup(struct tx_descriptor *d)
+{
+ d->reads.size = 0;
+ redolog_clear(&d->redolog);
+}
+
+static void tx_cleanup(struct tx_descriptor *d)
+{
+ // release the locks and restore version numbers
+ releaseAndRevertLocks(d);
+ // reset all lists
+ common_cleanup(d);
+}
+
+static void tx_restart(struct tx_descriptor *d)
+{
+ tx_cleanup(d);
+ tx_spinloop(0);
+ longjmp(*d->setjmp_buf, 1);
+}
+
+/*** increase the abort count and restart the transaction */
+static void tx_abort(int reason)
+{
+ struct tx_descriptor *d = thread_descriptor;
+ assert(!is_inevitable(d));
+ d->num_aborts[reason]++;
+ tx_restart(d);
+}
+
+/**
+ * fast-path validation, assuming that I don't hold locks.
+ */
+static void validate_fast(struct tx_descriptor *d, int lognum)
+{
+ int i;
+ owner_version_t ovt;
+ assert(!is_inevitable(d));
+ for (i=0; i<d->reads.size; i++)
+ {
+ retry:
+ ovt = d->reads.items[i]->v;
+ if (IS_LOCKED_OR_NEWER(ovt, d->start_time))
+ {
+ // If locked, we wait until it becomes unlocked. The chances are
+ // that it will then have a very recent start_time, likely
+ // > d->start_time, but it might still be better than always aborting
+ if (IS_LOCKED(ovt))
+ {
+ tx_spinloop(lognum); /* tx_spinloop(1), tx_spinloop(2),
+ tx_spinloop(3) */
+ goto retry;
+ }
+ else
+ // abort if the timestamp is newer than my start time.
+ tx_abort(lognum); /* tx_abort(1), tx_abort(2), tx_abort(3) */
+ }
+ }
+}
+
+/**
+ * validate the read set by making sure that all orecs that we've read have
+ * timestamps at least as old as our start time, unless we locked those orecs.
+ */
+static void validate(struct tx_descriptor *d)
+{
+ int i;
+ owner_version_t ovt;
+ assert(!is_inevitable(d));
+ for (i=0; i<d->reads.size; i++)
+ {
+ ovt = d->reads.items[i]->v; // read this orec
+ if (IS_LOCKED_OR_NEWER(ovt, d->start_time))
+ {
+ if (!IS_LOCKED(ovt))
+ // if unlocked and newer than start time, abort
+ tx_abort(4);
+ else
+ {
+ // if locked and not by me, abort
+ if (ovt != d->my_lock_word)
+ tx_abort(5);
+ }
+ }
+ }
+}
+
+#ifdef USE_PTHREAD_MUTEX
+/* mutex: only to avoid busy-looping too much in tx_spinloop() below */
+static pthread_mutex_t mutex_inevitable = PTHREAD_MUTEX_INITIALIZER;
+#endif
+
+#ifdef COMMIT_OTHER_INEV
+unsigned long can_commit_with_other_inevitable(struct tx_descriptor *d,
+ unsigned long expected)
+{
+ int i;
+ owner_version_t ovt;
+ unsigned long result = 0;
+ struct tx_descriptor *d_inev;
+
+ // 'd_inev_checking' is 1 or 2 when an inevitable transaction is running
+ // and didn't start committing yet; otherwise it is 0. It is normally 1
+ // except in this function.
+ if (!bool_cas(&d_inev_checking, 1, 2))
+ {
+ d->num_otherinev[4]++;
+ return 0;
+ }
+
+ // optimization only: did the inevitable thread 'd_inev' read any data
+ // that we are about to commit? If we are sure that the answer is
+ // negative, then commit anyway, because it cannot make the inevitable
+ // thread fail. We can safely check an approximation of this, because
+ // we hold a lock on all orecs that we would like to write. So if all
+ // orecs read by d_inev are not locked now, then no conflict. This
+ // function is allowed to "fail" and give up rather than spinloop
+ // waiting for a condition to be true, which is potentially dangerous
+ // here, because we acquired all the locks.
+
+ // Note that if the inevitable thread itself adds in parallel an extra
+ // orec to d_inev->reads, *and* if this new orec is locked, then we
+ // will miss it here; but the d_inev thread will spinloop waiting for
+ // us to be done. So even if we commit, the d_inev thread will just
+ // wait and load the new committed value.
+
+ // while we are in this function, the d_inev thread is prevented from
+ // going too far with the commitTransaction() code because d_inev_checking
+ // is greater than 1; it will just tx_spinloop(9). (And of course it
+ // cannot abort.)
+
+ d_inev = thread_descriptor_inev;
+ if (!bool_cas(&d_inev->reads.locked, 0, 1))
+ {
+ d->num_otherinev[1]++;
+ goto give_up_1;
+ }
+
+ for (i=d_inev->reads.size; i--; )
+ {
+ ovt = d_inev->reads.items[i]->v; // read this orec
+ if (ovt == d->my_lock_word)
+ {
+ d->num_otherinev[2]++;
+ goto give_up_2;
+ }
+ }
+ assert(expected & 1);
+ if (!change_global_timestamp(d, expected, expected + 2))
+ {
+ d->num_otherinev[3]++;
+ goto give_up_2;
+ }
+
+ /* success: scale d_inet forward */
+ d->num_otherinev[0]++;
+ result = expected + 1;
+ assert(d_inev->start_time == result - 2);
+ d_inev->start_time = result;
+ CFENCE;
+
+ give_up_2:
+ d_inev->reads.locked = 0;
+
+ give_up_1:
+ d_inev_checking = 1;
+ return result;
+}
+#endif
+
+void wait_end_inevitability(struct tx_descriptor *d)
+{
+ unsigned long curts;
+ releaseLocksForRetry(d);
+
+ // We are going to wait until the other inevitable transaction
+ // finishes. XXX We could do better here: we could check if
+ // committing 'd' would create a conflict for the other inevitable
+ // thread 'd_inev' or not. It requires peeking in 'd_inev' from this
+ // thread (which we never do so far) in order to do something like
+ // 'validate_fast(d_inev); d_inev->start_time = updated;'
+
+ while ((curts = get_global_timestamp(d)) & 1)
+ {
+ // while we're about to wait anyway, we can do a validate_fast
+ if (d->start_time < curts - 1)
+ {
+ validate_fast(d, 3);
+ d->start_time = curts - 1;
+ }
+ tx_spinloop(4);
+#ifdef USE_PTHREAD_MUTEX
+ pthread_mutex_lock(&mutex_inevitable);
+ pthread_mutex_unlock(&mutex_inevitable);
+#endif
+ }
+ acquireLocks(d);
+}
+
+void commitInevitableTransaction(struct tx_descriptor *d)
+{
+ unsigned long ts;
+ _Bool ok;
+
+#ifdef COMMIT_OTHER_INEV
+ // reset d_inev_checking back from 1 to 0
+ while (!bool_cas(&d_inev_checking, 1, 0))
+ tx_spinloop(9);
+#endif
+ // no-one else can modify global_timestamp if I'm inevitable
+ // and d_inev_checking is 0
+ ts = get_global_timestamp(d);
+ assert(ts & 1);
+ set_global_timestamp(d, ts + 1);
+ d->end_time = ts + 1;
+ assert(d->end_time == (d->start_time + 2));
+
+ // run the redo log, and release the locks
+ tx_redo(d);
+
+#ifdef USE_PTHREAD_MUTEX
+ pthread_mutex_unlock(&mutex_inevitable);
+#endif
+}
+
+/* lazy/lazy read instrumentation */
+void* stm_read_word(void** addr)
+{
+ struct tx_descriptor *d = thread_descriptor;
+
+ // check writeset first
+ wlog_t* found;
+ REDOLOG_FIND(d->redolog, addr, found, goto not_found);
+ return found->val;
+
+ not_found:;
+ // get the orec addr
+ volatile orec_t* o = get_orec((void*)addr);
+ owner_version_t ovt;
+
+#ifdef COMMIT_OTHER_INEV
+ // log orec BEFORE we spinloop waiting for the orec lock to be released,
+ // for can_commit_with_other_inevitable()
+ oreclist_insert(&d->reads, (orec_t*)o);
+#endif
+
+ retry:
+ // read the orec BEFORE we read anything else
+ ovt = o->v;
+ CFENCE;
+
+ // this tx doesn't hold any locks, so if the lock for this addr is held,
+ // there is contention. A lock is never hold for too long, so spinloop
+ // until it is released.
+ if (IS_LOCKED_OR_NEWER(ovt, d->start_time))
+ {
+ if (IS_LOCKED(ovt)) {
+ tx_spinloop(7);
+ goto retry;
+ }
+ // else this location is too new, scale forward
+ owner_version_t newts = get_global_timestamp(d) & ~1;
+#ifdef COMMIT_OTHER_INEV
+ d->reads.size--; // ignore the newly logged orec
+#endif
+ validate_fast(d, 1);
+#ifdef COMMIT_OTHER_INEV
+ d->reads.size++;
+#endif
+ d->start_time = newts;
+ }
+
+ // orec is unlocked, with ts <= start_time. read the location
+ void* tmp = *addr;
+
+ // postvalidate AFTER reading addr:
+ CFENCE;
+ if (o->v != ovt)
+ goto retry; /* oups, try again */
+
+#ifndef COMMIT_OTHER_INEV
+ oreclist_insert(&d->reads, (orec_t*)o);
+#endif
+
+ return tmp;
+}
+
+void stm_write_word(void** addr, void* val)
+{
+ struct tx_descriptor *d = thread_descriptor;
+ redolog_insert(&d->redolog, addr, val);
+}
+
+
+void stm_descriptor_init(void)
+{
+ struct tx_descriptor *d = malloc(sizeof(struct tx_descriptor));
+ memset(d, 0, sizeof(struct tx_descriptor));
+
+ /* initialize 'my_lock_word' to be a unique negative number */
+ d->my_lock_word = (owner_version_t)d;
+ if (!IS_LOCKED(d->my_lock_word))
+ d->my_lock_word = ~d->my_lock_word;
+ assert(IS_LOCKED(d->my_lock_word));
+ d->spinloop_counter = (unsigned int)(d->my_lock_word | 1);
+
+ thread_descriptor = d;
+}
+
+void stm_descriptor_done(void)
+{
+ struct tx_descriptor *d = thread_descriptor;
+ thread_descriptor = NULL;
+
+ int num_aborts = 0, num_spinloops = 0;
+ int i, prevchar;
+ for (i=0; i<ABORT_REASONS; i++)
+ num_aborts += d->num_aborts[i];
+ for (i=0; i<SPINLOOP_REASONS; i++)
+ num_spinloops += d->num_spinloops[i];
+
+ fprintf(stderr, "thread %lx: %d commits, %d aborts ",
+ d->my_lock_word,
+ d->num_commits,
+ num_aborts);
+
+ for (i=0; i<ABORT_REASONS; i++)
+ fprintf(stderr, "%c%d", i == 0 ? '[' : ',',
+ d->num_aborts[i]);
+
+ for (i=1; i<SPINLOOP_REASONS; i++) /* num_spinloops[0] == num_aborts */
+ fprintf(stderr, "%c%d", i == 1 ? '|' : ',',
+ d->num_spinloops[i]);
+
+#ifdef COMMIT_OTHER_INEV
+ for (i=0; i<OTHERINEV_REASONS; i++)
+ fprintf(stderr, "%c%d", i == 0 ? '|' : ',',
+ d->num_otherinev[i]);
+#endif
+
+ fprintf(stderr, "]\n");
+ free(d);
+}
+
+void* stm_perform_transaction(void*(*callback)(void*), void *arg)
+{
+ void *result;
+ jmp_buf jmpbuf;
+ stm_begin_transaction(&jmpbuf);
+ result = callback(arg);
+ stm_commit_transaction();
+ return result;
+}
+
+void stm_begin_transaction(jmp_buf* buf)
+{
+ struct tx_descriptor *d = thread_descriptor;
+ d->setjmp_buf = buf;
+ d->start_time = d->last_known_global_timestamp & ~1;
+}
+
+long stm_commit_transaction(void)
+{
+ struct tx_descriptor *d = thread_descriptor;
+
+ // if I don't have writes, I'm committed
+ if (!redolog_any_entry(&d->redolog))
+ {
+ if (is_inevitable(d))
+ {
+ unsigned long ts = get_global_timestamp(d);
+ assert(ts & 1);
+ set_global_timestamp(d, ts - 1);
+#ifdef USE_PTHREAD_MUTEX
+ pthread_mutex_unlock(&mutex_inevitable);
+#endif
+ }
+ d->num_commits++;
+ common_cleanup(d);
+ return d->start_time;
+ }
+
+ // bring that variable over to this CPU core (optimization, maybe)
+ global_timestamp;
+
+ // acquire locks
+ acquireLocks(d);
+
+ if (is_inevitable(d))
+ {
+ commitInevitableTransaction(d);
+ }
+ else
+ {
+ while (1)
+ {
+ unsigned long expected = get_global_timestamp(d);
+ if (expected & 1)
+ {
+#ifdef COMMIT_OTHER_INEV
+ // there is another inevitable transaction running.
+ expected = can_commit_with_other_inevitable(d, expected);
+ if (expected != 0)
+ {
+ d->end_time = expected;
+ break;
+ }
+#endif
+ // wait until it is done. hopefully we can then proceed
+ // without conflicts.
+ wait_end_inevitability(d);
+ continue;
+ }
+ if (change_global_timestamp(d, expected, expected + 2))
+ {
+ d->end_time = expected + 2;
+ break;
+ }
+ }
+
+ // validate (but skip validation if nobody else committed)
+ if (d->end_time != (d->start_time + 2))
+ validate(d);
+
+ // run the redo log, and release the locks
+ tx_redo(d);
+ }
+
+ // remember that this was a commit
+ d->num_commits++;
+
+ // reset all lists
+ common_cleanup(d);
+ return d->end_time;
+}
+
+void stm_try_inevitable(void)
+{
+ /* when a transaction is inevitable, its start_time is equal to
+ global_timestamp and global_timestamp cannot be incremented
+ by another thread. We set the lowest bit in global_timestamp
+ to 1. */
+ struct tx_descriptor *d = thread_descriptor;
+
+ if (is_inevitable(d))
+ return; /* I am already inevitable */
+
+ while (1)
+ {
+ unsigned long curtime = get_global_timestamp(d);
+ if (d->start_time != (curtime & ~1))
+ { /* scale forward */
+ validate_fast(d, 2);
+ d->start_time = curtime & ~1;
+ }
+#ifdef USE_PTHREAD_MUTEX
+ pthread_mutex_lock(&mutex_inevitable);
+#endif
+ if (curtime & 1) /* there is, or was, already an inevitable thread */
+ {
+ /* should we spinloop here, or abort (and likely come back
+ in try_inevitable() very soon)? unclear. For now
+ let's try to spinloop, after the waiting done by
+ acquiring the mutex */
+#ifdef USE_PTHREAD_MUTEX
+ pthread_mutex_unlock(&mutex_inevitable);
+#endif
+ tx_spinloop(6);
+ continue;
+ }
+ if (change_global_timestamp(d, curtime, curtime + 1))
+ break;
+#ifdef USE_PTHREAD_MUTEX
+ pthread_mutex_unlock(&mutex_inevitable);
+#endif
+ }
+ d->setjmp_buf = NULL; /* inevitable from now on */
+#ifdef COMMIT_OTHER_INEV
+ thread_descriptor_inev = d;
+ CFENCE;
+ d_inev_checking = 1;
+#endif
+}
+
+void stm_begin_inevitable_transaction(void)
+{
+ struct tx_descriptor *d = thread_descriptor;
+ unsigned long curtime;
+
+ retry:
+#ifdef USE_PTHREAD_MUTEX
+ pthread_mutex_lock(&mutex_inevitable); /* possibly waiting here */
+#endif
+
+ while (1)
+ {
+ curtime = global_timestamp;
+ if (curtime & 1)
+ {
+#ifdef USE_PTHREAD_MUTEX
+ pthread_mutex_unlock(&mutex_inevitable);
+#endif
+ tx_spinloop(5);
+ goto retry;
+ }
+ if (bool_cas(&global_timestamp, curtime, curtime + 1))
+ break;
+ }
+ d->setjmp_buf = NULL;
+ d->start_time = curtime;
+#ifdef COMMIT_OTHER_INEV
+ thread_descriptor_inev = d;
+ CFENCE;
+ d_inev_checking = 1;
+#endif
+}
diff --git a/pypy/translator/stm/src_stm/et.h b/pypy/translator/stm/src_stm/et.h
new file mode 100644
--- /dev/null
+++ b/pypy/translator/stm/src_stm/et.h
@@ -0,0 +1,25 @@
+/*** Extendable Timestamps
+ *
+ * This is a C version of rstm_r5/stm/et.hpp.
+ * See http://www.cs.rochester.edu/research/synchronization/rstm/api.shtml
+ *
+ */
+
+#ifndef _ET_H
+#define _ET_H
+
+#include <setjmp.h>
+
+
+void stm_descriptor_init(void);
+void stm_descriptor_done(void);
+void* stm_perform_transaction(void*(*)(void*), void*);
+void stm_begin_transaction(jmp_buf* buf);
+long stm_commit_transaction(void);
+void* stm_read_word(void** addr);
+void stm_write_word(void** addr, void* val);
+void stm_try_inevitable(void);
+void stm_begin_inevitable_transaction(void);
+
+
+#endif /* _ET_H */
diff --git a/pypy/translator/stm/src_stm/lists.c b/pypy/translator/stm/src_stm/lists.c
new file mode 100644
--- /dev/null
+++ b/pypy/translator/stm/src_stm/lists.c
@@ -0,0 +1,241 @@
+/* -*- c-basic-offset: 2 -*- */
+
+#include <limits.h>
+
+/************************************************************/
+
+/* The redolog_xx functions are implemented as a tree, supporting
+ very high performance in REDOLOG_FIND in the common case where
+ there are no or few elements in the tree, but scaling correctly
+ if the number of items becomes large. */
+
+#define TREE_BITS 4
+#define TREE_ARITY (1 << TREE_BITS)
+
+#define TREE_DEPTH_MAX ((sizeof(void*)*8 - 2 + TREE_BITS-1) / TREE_BITS)
+/* sizeof(void*) = total number of bits
+ 2 = bits that we ignore anyway (2 or 3, conservatively 2)
+ (x + TREE_BITS-1) / TREE_BITS = divide by TREE_BITS, rounding up
+*/
+
+#define TREE_MASK ((TREE_ARITY - 1) * sizeof(void*))
+
+typedef struct {
+ void** addr;
+ void* val;
+ owner_version_t p; // the previous version number (if locked)
+} wlog_t;
+
+typedef struct {
+ char *items[TREE_ARITY];
+} wlog_node_t;
+
+struct RedoLog {
+ char *raw_start, *raw_current, *raw_end;
+ wlog_node_t toplevel;
+};
+
+static void _redolog_clear_node(wlog_node_t *node)
+{
+ memset(node, 0, sizeof(wlog_node_t));
+}
+
+static void redolog_clear(struct RedoLog *redolog)
+{
+ if (redolog->raw_current != redolog->raw_start)
+ {
+ _redolog_clear_node(&redolog->toplevel);
+ redolog->raw_current = redolog->raw_start;
+ }
+}
+
+static int redolog_any_entry(struct RedoLog *redolog)
+{
+ return redolog->raw_current != redolog->raw_start;
+}
+
+#define _REDOLOG_LOOP(redolog, item, INITIAL, _PLUS_) \
+{ \
+ struct { char **next; char **end; } _stack[TREE_DEPTH_MAX], *_stackp; \
+ char **_next, **_end, *_entry; \
+ /* initialization */ \
+ _stackp = _stack; /* empty stack */ \
+ _next = (redolog).toplevel.items + INITIAL; \
+ _end = _next _PLUS_ TREE_ARITY; \
+ /* loop */ \
+ while (1) \
+ { \
+ if (_next == _end) \
+ { \
+ if (_stackp == _stack) \
+ break; /* done */ \
+ /* finished with this level, go to the next one */ \
+ _stackp--; \
+ _next = _stackp->next; \
+ _end = _stackp->end; \
+ continue; \
+ } \
+ _entry = *_next; \
+ _next = _next _PLUS_ 1; \
+ if (_entry == NULL) /* empty entry */ \
+ continue; \
+ if (((long)_entry) & 1) \
+ { /* points to a further level: enter it */ \
+ _stackp->next = _next; \
+ _stackp->end = _end; \
+ _stackp++; \
+ _next = ((wlog_node_t *)(_entry - 1))->items + INITIAL; \
+ _end = _next _PLUS_ TREE_ARITY; \
+ continue; \
+ } \
+ /* points to a wlog_t item */ \
+ item = (wlog_t *)_entry;
+
+#define REDOLOG_LOOP_FORWARD(redolog, item) \
+ _REDOLOG_LOOP(redolog, item, 0, +)
+#define REDOLOG_LOOP_BACKWARD(redolog, item) \
+ _REDOLOG_LOOP(redolog, item, (TREE_ARITY-1), -)
+#define REDOLOG_LOOP_END } }
+
+#define REDOLOG_FIND(redolog, addr1, result, goto_not_found) \
+{ \
+ unsigned long _key = (unsigned long)(addr1); \
+ char *_p = (char *)((redolog).toplevel.items); \
+ char *_entry = *(char **)(_p + (_key & TREE_MASK)); \
+ if (_entry == NULL) \
+ goto_not_found; /* common case, hopefully */ \
+ result = _redolog_find(_entry, addr1); \
+ if (result == NULL || result->addr != (addr1)) \
+ goto_not_found; \
+}
+
+static wlog_t *_redolog_find(char *entry, void** addr)
+{
+ unsigned long key = (unsigned long)addr;
+ while (((long)entry) & 1)
+ { /* points to a further level */
+ key >>= TREE_BITS;
+ entry = *(char **)((entry - 1) + (key & TREE_MASK));
+ }
+ return (wlog_t *)entry; /* may be NULL */
+}
+
+static void redolog_insert(struct RedoLog *redolog, void** addr, void* val);
+
+static void _redolog_grow(struct RedoLog *redolog, long extra)
+{
+ struct RedoLog newredolog;
+ wlog_t *item, *newitem;
+ long alloc = redolog->raw_end - redolog->raw_start;
+ long newalloc = (alloc + extra + (alloc >> 2) + 31) & ~15;
+ //fprintf(stderr, "growth: %ld\n", newalloc);
+ char *newitems = malloc(newalloc);
+ newredolog.raw_start = newitems;
+ newredolog.raw_current = newitems;
+ newredolog.raw_end = newitems + newalloc;
+ _redolog_clear_node(&newredolog.toplevel);
+ REDOLOG_LOOP_FORWARD(*redolog, item)
+ {
+ assert(item->p == -1);
+ redolog_insert(&newredolog, item->addr, item->val);
+ } REDOLOG_LOOP_END;
+ free(redolog->raw_start);
+ *redolog = newredolog;
+}
+
+static char *_redolog_grab(struct RedoLog *redolog, long size)
+{
+ char *result;
+ result = redolog->raw_current;
+ redolog->raw_current += size;
+ if (redolog->raw_current > redolog->raw_end)
+ {
+ _redolog_grow(redolog, size);
+ return NULL;
+ }
+ return result;
+}
+
+static void redolog_insert(struct RedoLog *redolog, void** addr, void* val)
+{
+ retry:;
+ wlog_t *wlog;
+ unsigned long key = (unsigned long)addr;
+ int shift = 0;
+ char *p = (char *)(redolog->toplevel.items);
+ char *entry;
+ while (1)
+ {
+ p += (key >> shift) & TREE_MASK;
+ shift += TREE_BITS;
+ entry = *(char **)p;
+ if (entry == NULL)
+ break;
+ else if (((long)entry) & 1)
+ { /* points to a further level */
+ p = entry - 1;
+ }
+ else
+ {
+ wlog_t *wlog1 = (wlog_t *)entry;
+ if (wlog1->addr == addr)
+ {
+ /* overwrite and that's it */
+ wlog1->val = val;
+ return;
+ }
+ /* collision: there is already a different wlog here */
+ wlog_node_t *node = (wlog_node_t *)
+ _redolog_grab(redolog, sizeof(wlog_node_t));
+ if (node == NULL) goto retry;
+ _redolog_clear_node(node);
+ unsigned long key1 = (unsigned long)(wlog1->addr);
+ char *p1 = (char *)(node->items);
+ *(wlog_t **)(p1 + ((key1 >> shift) & TREE_MASK)) = wlog1;
+ *(char **)p = ((char *)node) + 1;
+ p = p1;
+ }
+ }
+ wlog = (wlog_t *)_redolog_grab(redolog, sizeof(wlog_t));
+ if (wlog == NULL) goto retry;
+ wlog->addr = addr;
+ wlog->val = val;
+ wlog->p = -1;
+ *(char **)p = (char *)wlog;
+}
+
+/************************************************************/
+
+/* The oreclist_xx functions are implemented as an array that grows
+ as needed. */
+
+struct OrecList {
+ long size, alloc;
+ unsigned long locked;
+ orec_t **items;
+};
+
+static void _oreclist_grow(struct OrecList *oreclist)
+{
+ long newalloc = oreclist->alloc + (oreclist->alloc >> 1) + 16;
+ orec_t **newitems = malloc(newalloc * sizeof(orec_t *));
+ long i;
+ for (i=0; i<oreclist->size; i++)
+ newitems[i] = oreclist->items[i];
+ while (!bool_cas(&oreclist->locked, 0, 1))
+ /* rare case */ ;
+ free(oreclist->items);
+ oreclist->items = newitems;
+ oreclist->alloc = newalloc;
+ CFENCE;
+ oreclist->locked = 0;
+}
+
+static void oreclist_insert(struct OrecList *oreclist, orec_t *newitem)
+{
+ if (oreclist->size == oreclist->alloc)
+ _oreclist_grow(oreclist);
+ oreclist->items[oreclist->size++] = newitem;
+}
+
+/************************************************************/
diff --git a/pypy/translator/stm/test/__init__.py b/pypy/translator/stm/test/__init__.py
new file mode 100644
diff --git a/pypy/translator/stm/test/test_basic.py b/pypy/translator/stm/test/test_basic.py
new file mode 100644
--- /dev/null
+++ b/pypy/translator/stm/test/test_basic.py
@@ -0,0 +1,2 @@
+
+
diff --git a/pypy/translator/stm/test/test_rffi_stm.py b/pypy/translator/stm/test/test_rffi_stm.py
new file mode 100644
--- /dev/null
+++ b/pypy/translator/stm/test/test_rffi_stm.py
@@ -0,0 +1,14 @@
+from pypy.translator.stm._rffi_stm import *
+from pypy.rpython.annlowlevel import llhelper
+
+def test_descriptor():
+ descriptor_init()
+ descriptor_done()
+
+def test_perform_transaction():
+ def callback1(x):
+ return lltype.nullptr(rffi.VOIDP.TO)
+ descriptor_init()
+ perform_transaction(llhelper(CALLBACK, callback1),
+ lltype.nullptr(rffi.VOIDP.TO))
+ descriptor_done()
More information about the pypy-commit
mailing list