[pypy-svn] pypy default: merge heads

arigo commits-noreply at bitbucket.org
Mon Apr 25 14:43:11 CEST 2011


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r43581:3d96d6bdc2be
Date: 2011-04-25 14:42 +0200
http://bitbucket.org/pypy/pypy/changeset/3d96d6bdc2be/

Log:	merge heads

diff --git a/pypy/rpython/llinterp.py b/pypy/rpython/llinterp.py
--- a/pypy/rpython/llinterp.py
+++ b/pypy/rpython/llinterp.py
@@ -532,7 +532,10 @@
             raise LLFatalError(msg, LLException(ll_exc_type, ll_exc))
 
     def op_debug_llinterpcall(self, pythonfunction, *args_ll):
-        return pythonfunction(*args_ll)
+        try:
+            return pythonfunction(*args_ll)
+        except:
+            self.make_llexception()
 
     def op_debug_start_traceback(self, *args):
         pass    # xxx write debugging code here?

diff --git a/pypy/translator/c/src/debug_print.h b/pypy/translator/c/src/debug_print.h
--- a/pypy/translator/c/src/debug_print.h
+++ b/pypy/translator/c/src/debug_print.h
@@ -20,7 +20,6 @@
    Note that 'fname' can be '-' to send the logging data to stderr.
 */
 
-
 /* macros used by the generated code */
 #define PYPY_HAVE_DEBUG_PRINTS    (pypy_have_debug_prints & 1 ? \
                                    (pypy_debug_ensure_opened(), 1) : 0)
@@ -40,174 +39,24 @@
 extern long pypy_have_debug_prints;
 extern FILE *pypy_debug_file;
 
+#define OP_LL_READ_TIMESTAMP(val) READ_TIMESTAMP(val)
 
-/* implementations */
+#include "src/asm.h"
 
-#ifndef PYPY_NOT_MAIN_FILE
-#include <string.h>
-
-#if defined(__GNUC__) && defined(__linux__)
-# include <sched.h>
-  static void pypy_setup_profiling()
-  {
-    cpu_set_t set;
-    CPU_ZERO(&set);
-    CPU_SET(0, &set);   /* restrict to a single cpu */
-    sched_setaffinity(0, sizeof(cpu_set_t), &set);
-  }
-#else
-static void pypy_setup_profiling() { }
-#endif
-
-long pypy_have_debug_prints = -1;
-FILE *pypy_debug_file = NULL;
-static bool_t debug_ready = 0;
-static bool_t debug_profile = 0;
-static char *debug_start_colors_1 = "";
-static char *debug_start_colors_2 = "";
-static char *debug_stop_colors = "";
-static char *debug_prefix = NULL;
-
-static void pypy_debug_open(void)
-{
-  char *filename = getenv("PYPYLOG");
-  if (filename)
-#ifndef MS_WINDOWS
-    unsetenv("PYPYLOG");   /* don't pass it to subprocesses */
-#else
-    putenv("PYPYLOG=");    /* don't pass it to subprocesses */
-#endif
-  if (filename && filename[0])
-    {
-      char *colon = strchr(filename, ':');
-      if (!colon)
-        {
-          /* PYPYLOG=filename --- profiling version */
-          debug_profile = 1;
-          pypy_setup_profiling();
-        }
-      else
-        {
-          /* PYPYLOG=prefix:filename --- conditional logging */
-          int n = colon - filename;
-          debug_prefix = malloc(n + 1);
-          memcpy(debug_prefix, filename, n);
-          debug_prefix[n] = '\0';
-          filename = colon + 1;
-        }
-      if (strcmp(filename, "-") != 0)
-        pypy_debug_file = fopen(filename, "w");
-    }
-  if (!pypy_debug_file)
-    {
-      pypy_debug_file = stderr;
-      if (isatty(2))
-        {
-          debug_start_colors_1 = "\033[1m\033[31m";
-          debug_start_colors_2 = "\033[31m";
-          debug_stop_colors = "\033[0m";
-        }
-    }
-  debug_ready = 1;
-}
-
-void pypy_debug_ensure_opened(void)
-{
-  if (!debug_ready)
-    pypy_debug_open();
-}
-
-
-#ifndef READ_TIMESTAMP
 /* asm_xxx.h may contain a specific implementation of READ_TIMESTAMP.
  * This is the default generic timestamp implementation.
  */
+#ifndef READ_TIMESTAMP
+
 #  ifdef _WIN32
 #    define READ_TIMESTAMP(val) QueryPerformanceCounter((LARGE_INTEGER*)&(val))
 #  else
 #    include <time.h>
 #    include <sys/time.h>
+
+long long pypy_read_timestamp();
+
 #    define READ_TIMESTAMP(val)  (val) = pypy_read_timestamp()
 
-     static long long pypy_read_timestamp(void)
-     {
-#    ifdef CLOCK_THREAD_CPUTIME_ID
-       struct timespec tspec;
-       clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tspec);
-       return ((long long)tspec.tv_sec) * 1000000000LL + tspec.tv_nsec;
-#    else
-       /* argh, we don't seem to have clock_gettime().  Bad OS. */
-       struct timeval tv;
-       gettimeofday(&tv, NULL);
-       return ((long long)tv.tv_sec) * 1000000LL + tv.tv_usec;
-#    endif
-     }
 #  endif
 #endif
-
-
-static bool_t startswithoneof(const char *str, const char *substr)
-{
-  const char *p = str;
-  for (; *substr; substr++)
-    {
-      if (*substr != ',')
-        {
-          if (p && *p++ != *substr)
-            p = NULL;   /* mismatch */
-        }
-      else if (p != NULL)
-        return 1;   /* match */
-      else
-        p = str;    /* mismatched, retry with the next */
-    }
-  return p != NULL;
-}
-
-#if defined(_MSC_VER) || defined(__MINGW32__)
-#define PYPY_LONG_LONG_PRINTF_FORMAT "I64"
-#else
-#define PYPY_LONG_LONG_PRINTF_FORMAT "ll"
-#endif
-
-static void display_startstop(const char *prefix, const char *postfix,
-                              const char *category, const char *colors)
-{
-  long long timestamp;
-  READ_TIMESTAMP(timestamp);
-  fprintf(pypy_debug_file, "%s[%"PYPY_LONG_LONG_PRINTF_FORMAT"x] %s%s%s\n%s",
-          colors,
-          timestamp, prefix, category, postfix,
-          debug_stop_colors);
-}
-
-void pypy_debug_start(const char *category)
-{
-  pypy_debug_ensure_opened();
-  /* Enter a nesting level.  Nested debug_prints are disabled by default
-     because the following left shift introduces a 0 in the last bit.
-     Note that this logic assumes that we are never going to nest
-     debug_starts more than 31 levels (63 on 64-bits). */
-  pypy_have_debug_prints <<= 1;
-  if (!debug_profile)
-    {
-      /* non-profiling version */
-      if (!debug_prefix || !startswithoneof(category, debug_prefix))
-        {
-          /* wrong section name, or no PYPYLOG at all, skip it */
-          return;
-        }
-      /* else make this subsection active */
-      pypy_have_debug_prints |= 1;
-    }
-  display_startstop("{", "", category, debug_start_colors_1);
-}
-
-void pypy_debug_stop(const char *category)
-{
-  if (debug_profile | (pypy_have_debug_prints & 1))
-    display_startstop("", "}", category, debug_start_colors_2);
-  pypy_have_debug_prints >>= 1;
-}
-
-#endif /* PYPY_NOT_MAIN_FILE */


diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -2074,6 +2074,50 @@
             assert self.meta_interp(f, [bigval, 5]) == 0
             self.check_loops(int_rshift=3, everywhere=True)
 
+    def notest_overflowing_shift2(self):
+        myjitdriver = JitDriver(greens = [], reds = ['a', 'b', 'n', 'sa'])
+        def f(a, b):
+            n = sa = 0
+            while n < 10:
+                myjitdriver.jit_merge_point(a=a, b=b, n=n, sa=sa)
+                if 0 < a < hint(sys.maxint/2, promote=True): pass
+                if 0 < b < 100: pass
+                sa += (a << b) >> b
+                n += 1
+            return sa
+
+        assert self.meta_interp(f, [5, 5]) == 50
+        self.check_loops(int_rshift=0, everywhere=True)
+
+        assert self.meta_interp(f, [5, 10]) == 50
+        self.check_loops(int_rshift=1, everywhere=True)
+
+        assert self.meta_interp(f, [10, 5]) == 100
+        self.check_loops(int_rshift=1, everywhere=True)
+
+        assert self.meta_interp(f, [10, 10]) == 100
+        self.check_loops(int_rshift=1, everywhere=True)
+
+        assert self.meta_interp(f, [5, 100]) == 0
+        self.check_loops(int_rshift=1, everywhere=True)
+
+    def test_read_timestamp(self):
+        import time
+        from pypy.rlib.rtimer import read_timestamp
+        def busy_loop():
+            start = time.time()
+            while time.time() - start < 0.1:
+                # busy wait
+                pass
+
+        def f():
+            t1 = read_timestamp()
+            busy_loop()
+            t2 = read_timestamp()
+            return t2 - t1 > 1000
+        res = self.interp_operations(f, [])
+        assert res
+
 class TestOOtype(BasicTests, OOJitMixin):
 
     def test_oohash(self):

diff --git a/pypy/translator/c/src/debug_print.c b/pypy/translator/c/src/debug_print.c
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/debug_print.c
@@ -0,0 +1,150 @@
+
+#include <string.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include <stdio.h>
+#include <unistd.h>
+#include "src/profiling.h"
+#include "src/debug_print.h"
+
+long pypy_have_debug_prints = -1;
+FILE *pypy_debug_file = NULL;
+static unsigned char debug_ready = 0;
+static unsigned char debug_profile = 0;
+static char *debug_start_colors_1 = "";
+static char *debug_start_colors_2 = "";
+static char *debug_stop_colors = "";
+static char *debug_prefix = NULL;
+
+static void pypy_debug_open(void)
+{
+  char *filename = getenv("PYPYLOG");
+  if (filename)
+#ifndef MS_WINDOWS
+    unsetenv("PYPYLOG");   /* don't pass it to subprocesses */
+#else
+    putenv("PYPYLOG=");    /* don't pass it to subprocesses */
+#endif
+  if (filename && filename[0])
+    {
+      char *colon = strchr(filename, ':');
+      if (!colon)
+        {
+          /* PYPYLOG=filename --- profiling version */
+          debug_profile = 1;
+          pypy_setup_profiling();
+        }
+      else
+        {
+          /* PYPYLOG=prefix:filename --- conditional logging */
+          int n = colon - filename;
+          debug_prefix = malloc(n + 1);
+          memcpy(debug_prefix, filename, n);
+          debug_prefix[n] = '\0';
+          filename = colon + 1;
+        }
+      if (strcmp(filename, "-") != 0)
+        pypy_debug_file = fopen(filename, "w");
+    }
+  if (!pypy_debug_file)
+    {
+      pypy_debug_file = stderr;
+      if (isatty(2))
+        {
+          debug_start_colors_1 = "\033[1m\033[31m";
+          debug_start_colors_2 = "\033[31m";
+          debug_stop_colors = "\033[0m";
+        }
+    }
+  debug_ready = 1;
+}
+
+void pypy_debug_ensure_opened(void)
+{
+  if (!debug_ready)
+    pypy_debug_open();
+}
+
+
+#ifndef _WIN32
+
+     static long long pypy_read_timestamp(void)
+     {
+#  ifdef CLOCK_THREAD_CPUTIME_ID
+       struct timespec tspec;
+       clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tspec);
+       return ((long long)tspec.tv_sec) * 1000000000LL + tspec.tv_nsec;
+#  else
+       /* argh, we don't seem to have clock_gettime().  Bad OS. */
+       struct timeval tv;
+       gettimeofday(&tv, NULL);
+       return ((long long)tv.tv_sec) * 1000000LL + tv.tv_usec;
+#  endif
+     }
+#endif
+
+
+static unsigned char startswithoneof(const char *str, const char *substr)
+{
+  const char *p = str;
+  for (; *substr; substr++)
+    {
+      if (*substr != ',')
+        {
+          if (p && *p++ != *substr)
+            p = NULL;   /* mismatch */
+        }
+      else if (p != NULL)
+        return 1;   /* match */
+      else
+        p = str;    /* mismatched, retry with the next */
+    }
+  return p != NULL;
+}
+
+#if defined(_MSC_VER) || defined(__MINGW32__)
+#define PYPY_LONG_LONG_PRINTF_FORMAT "I64"
+#else
+#define PYPY_LONG_LONG_PRINTF_FORMAT "ll"
+#endif
+
+static void display_startstop(const char *prefix, const char *postfix,
+                              const char *category, const char *colors)
+{
+  long long timestamp;
+  READ_TIMESTAMP(timestamp);
+  fprintf(pypy_debug_file, "%s[%"PYPY_LONG_LONG_PRINTF_FORMAT"x] %s%s%s\n%s",
+          colors,
+          timestamp, prefix, category, postfix,
+          debug_stop_colors);
+}
+
+void pypy_debug_start(const char *category)
+{
+  pypy_debug_ensure_opened();
+  /* Enter a nesting level.  Nested debug_prints are disabled by default
+     because the following left shift introduces a 0 in the last bit.
+     Note that this logic assumes that we are never going to nest
+     debug_starts more than 31 levels (63 on 64-bits). */
+  pypy_have_debug_prints <<= 1;
+  if (!debug_profile)
+    {
+      /* non-profiling version */
+      if (!debug_prefix || !startswithoneof(category, debug_prefix))
+        {
+          /* wrong section name, or no PYPYLOG at all, skip it */
+          return;
+        }
+      /* else make this subsection active */
+      pypy_have_debug_prints |= 1;
+    }
+  display_startstop("{", "", category, debug_start_colors_1);
+}
+
+void pypy_debug_stop(const char *category)
+{
+  if (debug_profile | (pypy_have_debug_prints & 1))
+    display_startstop("", "}", category, debug_start_colors_2);
+  pypy_have_debug_prints >>= 1;
+}

diff --git a/pypy/jit/codewriter/longlong.py b/pypy/jit/codewriter/longlong.py
--- a/pypy/jit/codewriter/longlong.py
+++ b/pypy/jit/codewriter/longlong.py
@@ -16,6 +16,7 @@
 
     from pypy.rlib.objectmodel import compute_hash
 
+    is_64_bit = True
     supports_longlong = False
     r_float_storage = float
     FLOATSTORAGE = lltype.Float
@@ -32,6 +33,7 @@
 
     from pypy.rlib import rarithmetic, longlong2float
 
+    is_64_bit = False
     supports_longlong = True
     r_float_storage = rarithmetic.r_longlong
     FLOATSTORAGE = lltype.SignedLongLong

diff --git a/pypy/module/pypyjit/interp_jit.py b/pypy/module/pypyjit/interp_jit.py
--- a/pypy/module/pypyjit/interp_jit.py
+++ b/pypy/module/pypyjit/interp_jit.py
@@ -20,33 +20,33 @@
                             'fastlocals_w[*]',
                             'last_exception',
                             'lastblock',
+                            'is_being_profiled',
                             ]
 
 JUMP_ABSOLUTE = opmap['JUMP_ABSOLUTE']
 
-def get_printable_location(next_instr, bytecode):
+def get_printable_location(next_instr, is_being_profiled, bytecode):
     from pypy.tool.stdlib_opcode import opcode_method_names
     name = opcode_method_names[ord(bytecode.co_code[next_instr])]
     return '%s #%d %s' % (bytecode.get_repr(), next_instr, name)
 
-def get_jitcell_at(next_instr, bytecode):
-    return bytecode.jit_cells.get(next_instr, None)
+def get_jitcell_at(next_instr, is_being_profiled, bytecode):
+    return bytecode.jit_cells.get((next_instr, is_being_profiled), None)
 
-def set_jitcell_at(newcell, next_instr, bytecode):
-    bytecode.jit_cells[next_instr] = newcell
+def set_jitcell_at(newcell, next_instr, is_being_profiled, bytecode):
+    bytecode.jit_cells[next_instr, is_being_profiled] = newcell
 
-def confirm_enter_jit(next_instr, bytecode, frame, ec):
+def confirm_enter_jit(next_instr, is_being_profiled, bytecode, frame, ec):
     return (frame.w_f_trace is None and
-            ec.profilefunc is None and
             ec.w_tracefunc is None)
 
-def can_never_inline(next_instr, bytecode):
+def can_never_inline(next_instr, is_being_profiled, bytecode):
     return (bytecode.co_flags & CO_GENERATOR) != 0
 
 
 class PyPyJitDriver(JitDriver):
     reds = ['frame', 'ec']
-    greens = ['next_instr', 'pycode']
+    greens = ['next_instr', 'is_being_profiled', 'pycode']
     virtualizables = ['frame']
 
 ##    def compute_invariants(self, reds, next_instr, pycode):
@@ -68,13 +68,16 @@
     def dispatch(self, pycode, next_instr, ec):
         self = hint(self, access_directly=True)
         next_instr = r_uint(next_instr)
+        is_being_profiled = self.is_being_profiled
         try:
             while True:
                 pypyjitdriver.jit_merge_point(ec=ec,
-                    frame=self, next_instr=next_instr, pycode=pycode)
+                    frame=self, next_instr=next_instr, pycode=pycode,
+                    is_being_profiled=is_being_profiled)
                 co_code = pycode.co_code
                 self.valuestackdepth = hint(self.valuestackdepth, promote=True)
                 next_instr = self.handle_bytecode(co_code, next_instr, ec)
+                is_being_profiled = self.is_being_profiled
         except ExitFrame:
             return self.popvalue()
 
@@ -97,7 +100,8 @@
             jumpto = r_uint(self.last_instr)
         #
         pypyjitdriver.can_enter_jit(frame=self, ec=ec, next_instr=jumpto,
-                                    pycode=self.getcode())
+                                    pycode=self.getcode(),
+                                    is_being_profiled=self.is_being_profiled)
         return jumpto
 
 

diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -422,12 +422,12 @@
 
 
 # Method names take the form of
-# 
+#
 #     <instruction name>_<operand type codes>
 #
 # For example, the method name for "mov reg, immed" is MOV_ri. Operand order
 # is Intel-style, with the destination first.
-# 
+#
 # The operand type codes are:
 #     r - register
 #     b - ebp/rbp offset
@@ -565,6 +565,9 @@
     # x87 instructions
     FSTP_b = insn('\xDD', orbyte(3<<3), stack_bp(1))
 
+    # ------------------------------ Random mess -----------------------
+    RDTSC = insn('\x0F\x31')
+
     # reserved as an illegal instruction
     UD2 = insn('\x0F\x0B')
 

diff --git a/pypy/jit/tl/pypyjit_demo.py b/pypy/jit/tl/pypyjit_demo.py
--- a/pypy/jit/tl/pypyjit_demo.py
+++ b/pypy/jit/tl/pypyjit_demo.py
@@ -1,17 +1,19 @@
 
 try:
-    def main(n):
-        def g(n):
-            return range(n)
-        s = 0
-        for i in range(n):  # ID: for
-            tmp = g(n)
-            s += tmp[i]     # ID: getitem
-            a = 0
-        return s
-    main(10)
-
+    def g(x):
+        return x - 1
+    def f(x):
+        while x:
+            x = g(x)
+    import cProfile
+    import time
+    t1 = time.time()
+    cProfile.run("f(10000000)")
+    t2 = time.time()
+    f(10000000)
+    t3 = time.time()
+    print t2 - t1, t3 - t2, (t3 - t2) / (t2 - t1)
 except Exception, e:
     print "Exception: ", type(e)
     print e
-    
+

diff --git a/pypy/translator/c/genc.py b/pypy/translator/c/genc.py
--- a/pypy/translator/c/genc.py
+++ b/pypy/translator/c/genc.py
@@ -915,6 +915,14 @@
     from pypy.rlib.rarithmetic import LONG_BIT
     defines['PYPY_LONG_BIT'] = LONG_BIT
 
+def add_extra_files(eci):
+    srcdir = py.path.local(autopath.pypydir).join('translator', 'c', 'src')
+    files = [
+        srcdir / 'profiling.c',
+        srcdir / 'debug_print.c',
+    ]
+    return eci.merge(ExternalCompilationInfo(separate_module_files=files))
+
 def gen_source_standalone(database, modulename, targetdir, eci,
                           entrypointname, defines={}): 
     assert database.standalone
@@ -964,6 +972,7 @@
         print >>fi, "#define INSTRUMENT_NCOUNTER %d" % n
         fi.close()
 
+    eci = add_extra_files(eci)
     eci = eci.convert_sources_to_files(being_main=True)
     files, eci = eci.get_module_files()
     return eci, filename, sg.getextrafiles() + list(files)
@@ -1010,6 +1019,7 @@
     gen_startupcode(f, database)
     f.close()
 
+    eci = add_extra_files(eci)
     eci = eci.convert_sources_to_files(being_main=True)
     files, eci = eci.get_module_files()
     return eci, filename, sg.getextrafiles() + list(files)

diff --git a/pypy/rlib/debug.py b/pypy/rlib/debug.py
--- a/pypy/rlib/debug.py
+++ b/pypy/rlib/debug.py
@@ -175,6 +175,7 @@
         c_pythonfunction = hop.inputconst(lltype.Void, pythonfunction)
         args_v = [hop.inputarg(hop.args_r[i], arg=i)
                   for i in range(2, hop.nb_args)]
+        hop.exception_is_here()
         return hop.genop('debug_llinterpcall', [c_pythonfunction] + args_v,
                          resulttype=RESTYPE)
 

diff --git a/pypy/rpython/test/test_llinterp.py b/pypy/rpython/test/test_llinterp.py
--- a/pypy/rpython/test/test_llinterp.py
+++ b/pypy/rpython/test/test_llinterp.py
@@ -658,3 +658,25 @@
         assert x == -42
 
     res = interpret(f, [])
+
+def test_raising_llimpl():
+    from pypy.rpython.extfunc import register_external
+
+    def external():
+        pass
+    
+    def raising():
+        raise OSError(15, "abcd")
+    
+    ext = register_external(external, [], llimpl=raising, llfakeimpl=raising)
+    
+    def f():
+        # this is a useful llfakeimpl that raises an exception
+        try:
+            external()
+            return True
+        except OSError:
+            return False
+
+    res = interpret(f, [])
+    assert not res

diff --git a/pypy/rlib/test/test_rtimer.py b/pypy/rlib/test/test_rtimer.py
new file mode 100644
--- /dev/null
+++ b/pypy/rlib/test/test_rtimer.py
@@ -0,0 +1,28 @@
+import time
+
+from pypy.rlib.rtimer import read_timestamp
+from pypy.rpython.test.test_llinterp import interpret
+from pypy.translator.c.test.test_genc import compile
+
+def timer():
+    t1 = read_timestamp()
+    start = time.time()
+    while time.time() - start < 0.1:
+        # busy wait
+        pass
+    t2 = read_timestamp()
+    return t2 - t1
+
+def test_timer():
+    diff = timer()
+    # We're counting ticks, verify they look correct
+    assert diff > 1000
+
+def test_annotation():
+    diff = interpret(timer, [])
+    assert diff > 1000
+
+def test_compile_c():
+    function = compile(timer, [])
+    diff = function()
+    assert diff > 1000
\ No newline at end of file

diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -367,7 +367,7 @@
         self.assembler.regalloc_perform_guard(guard_op, faillocs, arglocs,
                                               result_loc,
                                               current_depths)
-        self.possibly_free_vars(guard_op.getfailargs())        
+        self.possibly_free_vars(guard_op.getfailargs())
 
     def PerformDiscard(self, op, arglocs):
         if not we_are_translated():
@@ -443,7 +443,7 @@
                     assert isinstance(arg, Box)
                     if arg not in last_used:
                         last_used[arg] = i
-                        
+
         longevity = {}
         for arg in produced:
             if arg in last_used:
@@ -837,7 +837,7 @@
         self._call(op, [imm(size), vable] +
                    [self.loc(op.getarg(i)) for i in range(op.numargs())],
                    guard_not_forced_op=guard_op)
-        
+
     def consider_cond_call_gc_wb(self, op):
         assert op.result is None
         args = op.getarglist()
@@ -1217,6 +1217,29 @@
         else:
             raise AssertionError("bad unicode item size")
 
+    def consider_read_timestamp(self, op):
+        tmpbox_high = TempBox()
+        self.rm.force_allocate_reg(tmpbox_high, selected_reg=eax)
+        if longlong.is_64_bit:
+            # on 64-bit, use rax as temporary register and returns the
+            # result in rdx
+            result_loc = self.rm.force_allocate_reg(op.result,
+                                                    selected_reg=edx)
+            self.Perform(op, [], result_loc)
+        else:
+            # on 32-bit, use both eax and edx as temporary registers,
+            # use a temporary xmm register, and returns the result in
+            # another xmm register.
+            tmpbox_low = TempBox()
+            self.rm.force_allocate_reg(tmpbox_low, selected_reg=edx)
+            xmmtmpbox = TempBox()
+            xmmtmploc = self.xrm.force_allocate_reg(xmmtmpbox)
+            result_loc = self.xrm.force_allocate_reg(op.result)
+            self.Perform(op, [xmmtmploc], result_loc)
+            self.xrm.possibly_free_var(xmmtmpbox)
+            self.rm.possibly_free_var(tmpbox_low)
+        self.rm.possibly_free_var(tmpbox_high)
+
     def consider_jump(self, op):
         assembler = self.assembler
         assert self.jump_target_descr is None

diff --git a/pypy/translator/goal/translate.py b/pypy/translator/goal/translate.py
--- a/pypy/translator/goal/translate.py
+++ b/pypy/translator/goal/translate.py
@@ -221,12 +221,14 @@
 
     pdb_plus_show = PdbPlusShow(t) # need a translator to support extended commands
 
-    def debug(got_error):
+    def finish_profiling():
         if prof:
             prof.disable()
             statfilename = 'prof.dump'
             log.info('Dumping profiler stats to: %s' % statfilename)
-            prof.dump_stats(statfilename)
+            prof.dump_stats(statfilename)        
+
+    def debug(got_error):
         tb = None
         if got_error:
             import traceback
@@ -302,9 +304,11 @@
     except SystemExit:
         raise
     except:
+        finish_profiling()
         debug(True)
         raise SystemExit(1)
     else:
+        finish_profiling()
         if translateconfig.pdb:
             debug(False)
 

diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -5,7 +5,8 @@
 from pypy.rpython.lltypesystem import lltype, llmemory, rstr
 from pypy.rpython.ootypesystem import ootype
 from pypy.rpython.lltypesystem.lloperation import llop
-from pypy.rlib.rarithmetic import ovfcheck, r_uint, intmask
+from pypy.rlib.rarithmetic import ovfcheck, r_uint, intmask, r_longlong
+from pypy.rlib.rtimer import read_timestamp
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.jit.metainterp.history import BoxInt, BoxPtr, BoxFloat, check_descr
 from pypy.jit.metainterp.history import INT, REF, FLOAT, VOID, AbstractDescr
@@ -227,6 +228,15 @@
     length = lengthbox.getint()
     rstr.copy_unicode_contents(src, dst, srcstart, dststart, length)
 
+def do_read_timestamp(cpu, _):
+    x = read_timestamp()
+    if longlong.is_64_bit:
+        assert isinstance(x, int)         # 64-bit
+        return BoxInt(x)
+    else:
+        assert isinstance(x, r_longlong)  # 32-bit
+        return BoxFloat(x)
+
 # ____________________________________________________________
 
 ##def do_force_token(cpu):

diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -25,6 +25,7 @@
 from pypy.rlib.objectmodel import ComputedIntSymbolic, we_are_translated
 from pypy.rlib.rarithmetic import ovfcheck
 from pypy.rlib.rarithmetic import r_longlong, r_ulonglong, r_uint
+from pypy.rlib.rtimer import read_timestamp
 
 import py
 from pypy.tool.ansi_print import ansi_log
@@ -506,7 +507,7 @@
                         ', '.join(map(str, args)),))
                 self.fail_args = args
                 return op.fail_index
- 
+
             else:
                 assert 0, "unknown final operation %d" % (op.opnum,)
 
@@ -856,6 +857,9 @@
         opaque_frame = _to_opaque(self)
         return llmemory.cast_ptr_to_adr(opaque_frame)
 
+    def op_read_timestamp(self, descr):
+        return read_timestamp()
+
     def op_call_may_force(self, calldescr, func, *args):
         assert not self._forced
         self._may_force = self.opindex
@@ -937,7 +941,7 @@
 class OOFrame(Frame):
 
     OPHANDLERS = [None] * (rop._LAST+1)
-    
+
     def op_new_with_vtable(self, descr, vtable):
         assert descr is None
         typedescr = get_class_size(self.memocast, vtable)
@@ -958,7 +962,7 @@
         return res
 
     op_getfield_gc_pure = op_getfield_gc
-    
+
     def op_setfield_gc(self, fielddescr, obj, newvalue):
         TYPE = fielddescr.TYPE
         fieldname = fielddescr.fieldname

diff --git a/pypy/module/_lsprof/interp_lsprof.py b/pypy/module/_lsprof/interp_lsprof.py
--- a/pypy/module/_lsprof/interp_lsprof.py
+++ b/pypy/module/_lsprof/interp_lsprof.py
@@ -1,12 +1,39 @@
+import py
 
 from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.error import OperationError
+from pypy.interpreter.function import Method, Function
+from pypy.interpreter.gateway import interp2app, unwrap_spec, NoneNotWrapped
 from pypy.interpreter.typedef import (TypeDef, GetSetProperty,
                                       interp_attrproperty)
-from pypy.interpreter.gateway import interp2app, unwrap_spec, NoneNotWrapped
-from pypy.interpreter.function import Method, Function
-from pypy.interpreter.error import OperationError
+from pypy.rlib import jit
+from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.rtimer import read_timestamp, _is_64_bit
+from pypy.rpython.lltypesystem import rffi, lltype
+from pypy.translator.tool.cbuild import ExternalCompilationInfo
+from pypy.tool.autopath import pypydir
+from pypy.rlib.rarithmetic import r_longlong
+
 import time, sys
 
+# cpu affinity settings
+
+srcdir = py.path.local(pypydir).join('translator', 'c', 'src')
+eci = ExternalCompilationInfo(separate_module_files=
+                              [srcdir.join('profiling.c')])
+                                                     
+c_setup_profiling = rffi.llexternal('pypy_setup_profiling',
+                                  [], lltype.Void,
+                                  compilation_info = eci)
+c_teardown_profiling = rffi.llexternal('pypy_teardown_profiling',
+                                       [], lltype.Void,
+                                       compilation_info = eci)
+
+if _is_64_bit:
+    timer_size_int = int
+else:
+    timer_size_int = r_longlong
+
 class W_StatsEntry(Wrappable):
     def __init__(self, space, frame, callcount, reccallcount, tt, it,
                  w_sublist):
@@ -74,20 +101,43 @@
     l_w = []
     for v in values:
         if v.callcount != 0:
-            l_w.append(v.stats(space, factor))
+            l_w.append(v.stats(space, None, factor))
     return space.newlist(l_w)
 
-class ProfilerEntry(object):
+class ProfilerSubEntry(object):
     def __init__(self, frame):
         self.frame = frame
-        self.tt = 0
-        self.it = 0
+        self.ll_tt = r_longlong(0)
+        self.ll_it = r_longlong(0)
         self.callcount = 0
         self.recursivecallcount = 0
         self.recursionLevel = 0
+
+    def stats(self, space, parent, factor):
+        w_sse = W_StatsSubEntry(space, self.frame,
+                                self.callcount, self.recursivecallcount,
+                                factor * float(self.ll_tt),
+                                factor * float(self.ll_it))
+        return space.wrap(w_sse)
+
+    def _stop(self, tt, it):
+        if not we_are_translated():
+            assert type(tt) is timer_size_int
+            assert type(it) is timer_size_int
+        self.recursionLevel -= 1
+        if self.recursionLevel == 0:
+            self.ll_tt += tt
+        else:
+            self.recursivecallcount += 1
+        self.ll_it += it
+        self.callcount += 1
+
+class ProfilerEntry(ProfilerSubEntry):
+    def __init__(self, frame):
+        ProfilerSubEntry.__init__(self, frame)
         self.calls = {}
 
-    def stats(self, space, factor):
+    def stats(self, space, dummy, factor):
         if self.calls:
             w_sublist = space.newlist([sub_entry.stats(space, self, factor)
                                        for sub_entry in self.calls.values()])
@@ -95,67 +145,44 @@
             w_sublist = space.w_None
         w_se = W_StatsEntry(space, self.frame, self.callcount,
                             self.recursivecallcount,
-                            factor * self.tt, factor * self.it, w_sublist)
+                            factor * float(self.ll_tt),
+                            factor * float(self.ll_it), w_sublist)
         return space.wrap(w_se)
 
-class ProfilerSubEntry(object):
-    def __init__(self, frame):
-        self.frame = frame
-        self.tt = 0
-        self.it = 0
-        self.callcount = 0
-        self.recursivecallcount = 0
-        self.recursionLevel = 0
-
-    def stats(self, space, parent, factor):
-        w_sse = W_StatsSubEntry(space, self.frame,
-                                self.callcount, self.recursivecallcount,
-                                factor * self.tt, factor * self.it)
-        return space.wrap(w_sse)
+    @jit.purefunction
+    def _get_or_make_subentry(self, entry, make=True):
+        try:
+            return self.calls[entry]
+        except KeyError:
+            if make:
+                subentry = ProfilerSubEntry(entry.frame)
+                self.calls[entry] = subentry
+                return subentry
+            return None
 
 class ProfilerContext(object):
     def __init__(self, profobj, entry):
         self.entry = entry
-        self.subt = 0
+        self.ll_subt = timer_size_int(0)
         self.previous = profobj.current_context
         entry.recursionLevel += 1
         if profobj.subcalls and self.previous:
-            caller = self.previous.entry
-            try:
-                subentry = caller.calls[entry]
-            except KeyError:
-                subentry = ProfilerSubEntry(entry.frame)
-                caller.calls[entry] = subentry
+            caller = jit.hint(self.previous.entry, promote=True)
+            subentry = caller._get_or_make_subentry(entry)
             subentry.recursionLevel += 1
-        self.t0 = profobj.timer()
+        self.ll_t0 = profobj.ll_timer()
 
     def _stop(self, profobj, entry):
-        # XXX factor out two pieces of the same code
-        tt = profobj.timer() - self.t0
-        it = tt - self.subt
+        tt = profobj.ll_timer() - self.ll_t0
+        it = tt - self.ll_subt
         if self.previous:
-            self.previous.subt += tt
-        entry.recursionLevel -= 1
-        if entry.recursionLevel == 0:
-            entry.tt += tt
-        else:
-            entry.recursivecallcount += 1
-        entry.it += it
-        entry.callcount += 1
+            self.previous.ll_subt += tt
+        entry._stop(tt, it)
         if profobj.subcalls and self.previous:
-            caller = self.previous.entry
-            try:
-                subentry = caller.calls[entry]
-            except KeyError:
-                pass
-            else:
-                subentry.recursionLevel -= 1
-                if subentry.recursionLevel == 0:
-                    subentry.tt += tt
-                else:
-                    subentry.recursivecallcount += 1
-                subentry.it += it
-                subentry.callcount += 1
+            caller = jit.hint(self.previous.entry, promote=True)
+            subentry = caller._get_or_make_subentry(entry, False)
+            if subentry is not None:
+                subentry._stop(tt, it)
 
 def create_spec(space, w_arg):
     if isinstance(w_arg, Method):
@@ -187,7 +214,7 @@
     else:
         class_name = space.type(w_arg).getname(space, '?')
         return "{'%s' object}" % (class_name,)
-    
+
 def lsprof_call(space, w_self, frame, event, w_arg):
     assert isinstance(w_self, W_Profiler)
     if event == 'call':
@@ -209,6 +236,7 @@
         pass
 
 class W_Profiler(Wrappable):
+    
     def __init__(self, space, w_callable, time_unit, subcalls, builtins):
         self.subcalls = subcalls
         self.builtins = builtins
@@ -218,65 +246,94 @@
         self.data = {}
         self.builtin_data = {}
         self.space = space
+        self.is_enabled = False
+        self.total_timestamp = r_longlong(0)
+        self.total_real_time = 0.0
 
-    def timer(self):
+    def ll_timer(self):
         if self.w_callable:
             space = self.space
             try:
-                return space.float_w(space.call_function(self.w_callable))
+                if _is_64_bit:
+                    return space.int_w(space.call_function(self.w_callable))
+                else:
+                    return space.r_longlong_w(space.call_function(self.w_callable))
             except OperationError, e:
                 e.write_unraisable(space, "timer function ",
                                    self.w_callable)
-                return 0.0
-        return time.time()
+                return timer_size_int(0)
+        return read_timestamp()
 
     def enable(self, space, w_subcalls=NoneNotWrapped,
                w_builtins=NoneNotWrapped):
+        if self.is_enabled:
+            return      # ignored
         if w_subcalls is not None:
             self.subcalls = space.bool_w(w_subcalls)
         if w_builtins is not None:
             self.builtins = space.bool_w(w_builtins)
+        # We want total_real_time and total_timestamp to end up containing
+        # (endtime - starttime).  Now we are at the start, so we first
+        # have to subtract the current time.
+        self.is_enabled = True
+        self.total_real_time -= time.time()
+        self.total_timestamp -= read_timestamp()
         # set profiler hook
+        c_setup_profiling()
         space.getexecutioncontext().setllprofile(lsprof_call, space.wrap(self))
 
+    @jit.purefunction
+    def _get_or_make_entry(self, f_code, make=True):
+        try:
+            return self.data[f_code]
+        except KeyError:
+            if make:
+                entry = ProfilerEntry(f_code)
+                self.data[f_code] = entry
+                return entry
+            return None
+
+    @jit.purefunction
+    def _get_or_make_builtin_entry(self, key, make=True):
+        try:
+            return self.builtin_data[key]
+        except KeyError:
+            if make:
+                entry = ProfilerEntry(self.space.wrap(key))
+                self.builtin_data[key] = entry
+                return entry
+            return None
+
     def _enter_call(self, f_code):
         # we have a superb gc, no point in freelist :)
-        try:
-            entry = self.data[f_code]
-        except KeyError:
-            entry = ProfilerEntry(f_code)
-            self.data[f_code] = entry
+        self = jit.hint(self, promote=True)
+        entry = self._get_or_make_entry(f_code)
         self.current_context = ProfilerContext(self, entry)
 
     def _enter_return(self, f_code):
         context = self.current_context
         if context is None:
             return
-        try:
-            entry = self.data[f_code]
+        self = jit.hint(self, promote=True)
+        entry = self._get_or_make_entry(f_code, False)
+        if entry is not None:
             context._stop(self, entry)
-        except KeyError:
-            pass
         self.current_context = context.previous
 
     def _enter_builtin_call(self, key):
-        try:
-            entry = self.builtin_data[key]
-        except KeyError:
-            entry = ProfilerEntry(self.space.wrap(key))
-            self.builtin_data[key] = entry
-        self.current_context = ProfilerContext(self, entry)        
+        self = jit.hint(self, promote=True)
+        entry = self._get_or_make_builtin_entry(key)
+        self.current_context = ProfilerContext(self, entry)
 
     def _enter_builtin_return(self, key):
         context = self.current_context
         if context is None:
             return
-        try:
-            entry = self.builtin_data[key]
+        self = jit.hint(self, promote=True)
+        entry = self._get_or_make_builtin_entry(key, False)
+        if entry is not None:
             context._stop(self, entry)
-        except KeyError:
-            pass
-        self.current_context = context.previous        
+        self.current_context = context.previous
 
     def _flush_unmatched(self):
         context = self.current_context
@@ -288,13 +345,29 @@
         self.current_context = None
 
     def disable(self, space):
+        if not self.is_enabled:
+            return      # ignored
+        # We want total_real_time and total_timestamp to end up containing
+        # (endtime - starttime), or the sum of such intervals if
+        # enable() and disable() are called several times.
+        self.is_enabled = False
+        self.total_timestamp += read_timestamp()
+        self.total_real_time += time.time()
         # unset profiler hook
         space.getexecutioncontext().setllprofile(None, None)
+        c_teardown_profiling()
         self._flush_unmatched()
 
     def getstats(self, space):
         if self.w_callable is None:
-            factor = 1. # we measure time.time in floats
+            if self.is_enabled:
+                raise OperationError(space.w_RuntimeError,
+                    space.wrap("Profiler instance must be disabled "
+                               "before getting the stats"))
+            if self.total_timestamp:
+                factor = self.total_real_time / float(self.total_timestamp)
+            else:
+                factor = 1.0     # probably not used
         elif self.time_unit > 0.0:
             factor = self.time_unit
         else:

diff --git a/pypy/translator/c/src/g_include.h b/pypy/translator/c/src/g_include.h
--- a/pypy/translator/c/src/g_include.h
+++ b/pypy/translator/c/src/g_include.h
@@ -39,11 +39,13 @@
 #include "src/instrument.h"
 #include "src/asm.h"
 
+#include "src/profiling.h"
+
+#include "src/debug_print.h"
 
 /*** modules ***/
 #ifdef HAVE_RTYPER      /* only if we have an RTyper */
 #  include "src/rtyper.h"
-#  include "src/debug_print.h"
 #  include "src/debug_traceback.h"
 #  include "src/debug_alloc.h"
 #ifndef AVR

diff --git a/pypy/module/pypyjit/policy.py b/pypy/module/pypyjit/policy.py
--- a/pypy/module/pypyjit/policy.py
+++ b/pypy/module/pypyjit/policy.py
@@ -14,7 +14,7 @@
             modname, _ = modname.split('.', 1)
         if modname in ['pypyjit', 'signal', 'micronumpy', 'math', 'exceptions',
                        'imp', 'sys', 'array', '_ffi', 'itertools', 'operator',
-                       '_socket', '_sre']:
+                       '_socket', '_sre', '_lsprof']:
             return True
         return False
 

diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -1023,6 +1023,10 @@
             metainterp.history.record(rop.VIRTUAL_REF_FINISH,
                                       [vrefbox, lastbox], None)
 
+    @arguments()
+    def opimpl_ll_read_timestamp(self):
+        return self.metainterp.execute_and_record(rop.READ_TIMESTAMP, None)
+
     # ------------------------------
 
     def setup_call(self, argboxes):

diff --git a/pypy/rpython/lltypesystem/opimpl.py b/pypy/rpython/lltypesystem/opimpl.py
--- a/pypy/rpython/lltypesystem/opimpl.py
+++ b/pypy/rpython/lltypesystem/opimpl.py
@@ -380,7 +380,7 @@
     return ord(b)
 
 def op_cast_int_to_unichar(b):
-    assert type(b) is int 
+    assert type(b) is int
     return unichr(b)
 
 def op_cast_int_to_uint(b):
@@ -578,6 +578,10 @@
 def op_shrink_array(array, smallersize):
     return False
 
+def op_ll_read_timestamp():
+    from pypy.rlib.rtimer import read_timestamp
+    return read_timestamp()
+
 # ____________________________________________________________
 
 def get_op_impl(opname):

diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -1356,6 +1356,19 @@
         self.execute_operation(rop.JIT_DEBUG, [c_box, c_nest, c_nest,
                                                c_nest, c_nest], 'void')
 
+    def test_read_timestamp(self):
+        if longlong.is_64_bit:
+            got1 = self.execute_operation(rop.READ_TIMESTAMP, [], 'int')
+            got2 = self.execute_operation(rop.READ_TIMESTAMP, [], 'int')
+            res1 = got1.getint()
+            res2 = got2.getint()
+        else:
+            got1 = self.execute_operation(rop.READ_TIMESTAMP, [], 'float')
+            got2 = self.execute_operation(rop.READ_TIMESTAMP, [], 'float')
+            res1 = got1.getlonglong()
+            res2 = got2.getlonglong()
+        assert res1 < res2 < res1 + 2**32
+
 
 class LLtypeBackendTest(BaseBackendTest):
 

diff --git a/pypy/rpython/lltypesystem/lloperation.py b/pypy/rpython/lltypesystem/lloperation.py
--- a/pypy/rpython/lltypesystem/lloperation.py
+++ b/pypy/rpython/lltypesystem/lloperation.py
@@ -32,7 +32,7 @@
         assert isinstance(canraise, tuple)
 
         assert not canraise or not canfold
-        
+
         # The operation manipulates PyObjects
         self.pyobj = pyobj
 
@@ -440,6 +440,7 @@
     'get_write_barrier_failing_case': LLOp(sideeffects=False),
     'get_write_barrier_from_array_failing_case': LLOp(sideeffects=False),
     'gc_get_type_info_group': LLOp(sideeffects=False),
+    'll_read_timestamp': LLOp(canrun=True),
 
     # __________ GC operations __________
 
@@ -482,7 +483,7 @@
     'gc_typeids_z'        : LLOp(),
 
     # ------- JIT & GC interaction, only for some GCs ----------
-    
+
     'gc_adr_of_nursery_free' : LLOp(),
     # ^^^ returns an address of nursery free pointer, for later modifications
     'gc_adr_of_nursery_top' : LLOp(),
@@ -554,7 +555,8 @@
     'debug_pdb':            LLOp(),
     'debug_assert':         LLOp(tryfold=True),
     'debug_fatalerror':     LLOp(),
-    'debug_llinterpcall':   LLOp(), # Python func call 'res=arg[0](*arg[1:])'
+    'debug_llinterpcall':   LLOp(canraise=(Exception,)),
+                                    # Python func call 'res=arg[0](*arg[1:])'
                                     # in backends, abort() or whatever is fine
     'debug_start_traceback':   LLOp(),
     'debug_record_traceback':  LLOp(),

diff --git a/pypy/jit/backend/x86/test/test_regloc.py b/pypy/jit/backend/x86/test/test_regloc.py
--- a/pypy/jit/backend/x86/test/test_regloc.py
+++ b/pypy/jit/backend/x86/test/test_regloc.py
@@ -21,10 +21,12 @@
     assert_encodes_as(cb32, "MOV16", (ecx, ImmedLoc(12345)), '\x66\xB9\x39\x30')
 
     # 64-bit
-    assert_encodes_as(cb64, "MOV16", (ecx, ebx), '\x66\x89\xD9')
+    assert_encodes_as(cb64, "MOV16", (r8, ebx), '\x66\x41\x89\xD8')  # 11 011 000
+    assert_encodes_as(cb64, "MOV16", (ebx, r8), '\x66\x44\x89\xC3')  # 11 000 011
+    assert_encodes_as(cb64, "MOV16", (ecx, ebx), '\x66\x40\x89\xD9')
     # XXX: What we are testing for here is actually not the most compact
     # encoding.
-    assert_encodes_as(cb64, "MOV16", (ecx, ImmedLoc(12345)), '\x66\xC7\xC1\x39\x30')
+    assert_encodes_as(cb64, "MOV16", (ecx, ImmedLoc(12345)), '\x66\x40\xC7\xC1\x39\x30')
     assert_encodes_as(cb64, "MOV16", (AddressLoc(r13, ImmedLoc(0), 0, 0), ImmedLoc(12345)), '\x66\x41\xC7\x45\x00\x39\x30')
 
 def test_cmp_16():
@@ -33,8 +35,10 @@
     assert_encodes_as(cb32, "CMP16", (ecx, ImmedLoc(12345)), '\x66\x81\xF9\x39\x30')
 
     # 64-bit
-    assert_encodes_as(cb64, "CMP16", (ecx, ebx), '\x66\x39\xD9')
-    assert_encodes_as(cb64, "CMP16", (ecx, ImmedLoc(12345)), '\x66\x81\xF9\x39\x30')
+    assert_encodes_as(cb64, "CMP16", (r8, ebx), '\x66\x41\x39\xD8')  # 11 011 000
+    assert_encodes_as(cb64, "CMP16", (ebx, r8), '\x66\x44\x39\xC3')  # 11 000 011
+    assert_encodes_as(cb64, "CMP16", (ecx, ebx), '\x66\x40\x39\xD9')
+    assert_encodes_as(cb64, "CMP16", (ecx, ImmedLoc(12345)), '\x66\x40\x81\xF9\x39\x30')
     assert_encodes_as(cb64, "CMP16", (AddressLoc(r13, ImmedLoc(0), 0, 0), ImmedLoc(12345)), '\x66\x41\x81\x7D\x00\x39\x30')
 
 def test_relocation():

diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py
--- a/pypy/interpreter/executioncontext.py
+++ b/pypy/interpreter/executioncontext.py
@@ -298,8 +298,11 @@
 
         # Profile cases
         if self.profilefunc is not None:
-            if event not in ['leaveframe', 'call', 'c_call',
-                             'c_return', 'c_exception']:
+            if not (event == 'leaveframe' or
+                    event == 'call' or
+                    event == 'c_call' or
+                    event == 'c_return' or
+                    event == 'c_exception'):
                 return False
 
             last_exception = frame.last_exception

diff --git a/pypy/rlib/rtimer.py b/pypy/rlib/rtimer.py
new file mode 100644
--- /dev/null
+++ b/pypy/rlib/rtimer.py
@@ -0,0 +1,37 @@
+import time
+
+from pypy.rlib.rarithmetic import r_longlong, r_ulonglong, r_uint
+from pypy.rlib.rarithmetic import intmask, longlongmask
+from pypy.rpython.extregistry import ExtRegistryEntry
+from pypy.rpython.lltypesystem import lltype, rffi
+
+_is_64_bit = r_uint.BITS > 32
+
+
+def read_timestamp():
+    # Returns a longlong on 32-bit, and a regular int on 64-bit.
+    # When running on top of python, build the result a bit arbitrarily.
+    x = long(time.time() * 500000000)
+    if _is_64_bit:
+        return intmask(x)
+    else:
+        return longlongmask(x)
+
+
+class ReadTimestampEntry(ExtRegistryEntry):
+    _about_ = read_timestamp
+
+    def compute_result_annotation(self):
+        from pypy.annotation.model import SomeInteger
+        if _is_64_bit:
+            return SomeInteger()
+        else:
+            return SomeInteger(knowntype=r_longlong)
+
+    def specialize_call(self, hop):
+        hop.exception_cannot_occur()
+        if _is_64_bit:
+            resulttype = lltype.Signed
+        else:
+            resulttype = rffi.LONGLONG
+        return hop.genop("ll_read_timestamp", [], resulttype=resulttype)

diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -1,4 +1,5 @@
 from pypy.rlib.unroll import unrolling_iterable
+from pypy.rlib.rtimer import read_timestamp
 from pypy.rlib.rarithmetic import intmask, LONG_BIT, r_uint, ovfcheck
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.debug import debug_start, debug_stop
@@ -1212,6 +1213,10 @@
     def bhimpl_unicodesetitem(cpu, unicode, index, newchr):
         cpu.bh_unicodesetitem(unicode, index, newchr)
 
+    @arguments(returns=(longlong.is_64_bit and "i" or "f"))
+    def bhimpl_ll_read_timestamp():
+        return read_timestamp()
+
     # ----------
     # helpers to resume running in blackhole mode when a guard failed
 
@@ -1423,7 +1428,7 @@
 
     current_exc = blackholeinterp._prepare_resume_from_failure(
         resumedescr.guard_opnum, dont_change_position)
-        
+
     try:
         _run_forever(blackholeinterp, current_exc)
     finally:

diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -23,7 +23,7 @@
 
     # methods implemented by each concrete class
     # ------------------------------------------
-    
+
     def getopnum(self):
         raise NotImplementedError
 
@@ -234,7 +234,7 @@
 
     def getarg(self, i):
         raise IndexError
-    
+
     def setarg(self, i, box):
         raise IndexError
 
@@ -258,7 +258,7 @@
             return self._arg0
         else:
             raise IndexError
-    
+
     def setarg(self, i, box):
         if i == 0:
             self._arg0 = box
@@ -288,7 +288,7 @@
             return self._arg1
         else:
             raise IndexError
-    
+
     def setarg(self, i, box):
         if i == 0:
             self._arg0 = box
@@ -326,7 +326,7 @@
             return self._arg2
         else:
             raise IndexError
-    
+
     def setarg(self, i, box):
         if i == 0:
             self._arg0 = box
@@ -352,7 +352,7 @@
 
     def getarg(self, i):
         return self._args[i]
-    
+
     def setarg(self, i, box):
         self._args[i] = box
 
@@ -460,6 +460,7 @@
     '_MALLOC_LAST',
     'FORCE_TOKEN/0',
     'VIRTUAL_REF/2',         # removed before it's passed to the backend
+    'READ_TIMESTAMP/0',
     '_NOSIDEEFFECT_LAST', # ----- end of no_side_effect operations -----
 
     'SETARRAYITEM_GC/3d',
@@ -468,7 +469,7 @@
     'SETFIELD_RAW/2d',
     'STRSETITEM/3',
     'UNICODESETITEM/3',
-    #'RUNTIMENEW/1',     # ootype operation    
+    #'RUNTIMENEW/1',     # ootype operation
     'COND_CALL_GC_WB/2d', # [objptr, newvalue]   (for the write barrier)
     'DEBUG_MERGE_POINT/2',      # debugging only
     'JIT_DEBUG/*',              # debugging only
@@ -554,7 +555,7 @@
         2: BinaryOp,
         3: TernaryOp
         }
-    
+
     is_guard = name.startswith('GUARD')
     if is_guard:
         assert withdescr


diff --git a/pypy/rpython/lltypesystem/rdict.py b/pypy/rpython/lltypesystem/rdict.py
--- a/pypy/rpython/lltypesystem/rdict.py
+++ b/pypy/rpython/lltypesystem/rdict.py
@@ -521,6 +521,7 @@
             ll_dict_insertclean(d, entry.key, entry.value, hash)
         i += 1
     old_entries.delete()
+ll_dict_resize.oopspec = 'dict.resize(d)'
 
 # ------- a port of CPython's dictobject.c's lookdict implementation -------
 PERTURB_SHIFT = 5

diff --git a/pypy/module/_lsprof/test/test_cprofile.py b/pypy/module/_lsprof/test/test_cprofile.py
--- a/pypy/module/_lsprof/test/test_cprofile.py
+++ b/pypy/module/_lsprof/test/test_cprofile.py
@@ -91,6 +91,30 @@
         assert spam2bar.inlinetime == 1.0
         assert spam2bar.totaltime == 1.0
 
+    def test_scale_of_result(self):
+        import _lsprof, time
+        prof = _lsprof.Profiler()
+        def foo(n):
+            t = time.time()
+            while abs(t - time.time()) < 1.0:
+                pass      # busy-wait for 1 second
+        def bar(n):
+            foo(n)
+        prof.enable()
+        bar(0)
+        prof.disable()
+        stats = prof.getstats()
+        entries = {}
+        for entry in stats:
+            entries[entry.code] = entry
+        efoo = entries[foo.func_code]
+        ebar = entries[bar.func_code]
+        assert 0.9 < efoo.totaltime < 2.9
+        assert 0.9 < efoo.inlinetime < 2.9
+        for subentry in ebar.calls:
+            assert 0.9 < subentry.totaltime < 2.9
+            assert 0.9 < subentry.inlinetime < 2.9
+
     def test_cprofile(self):
         import sys, os
         # XXX this is evil trickery to walk around the fact that we don't

diff --git a/pypy/translator/c/src/profiling.h b/pypy/translator/c/src/profiling.h
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/profiling.h
@@ -0,0 +1,8 @@
+
+#ifndef PROFILING_H
+#define PROFILING_H
+
+void pypy_setup_profiling();
+void pypy_teardown_profiling();
+
+#endif

diff --git a/pypy/jit/tl/pypyjit.py b/pypy/jit/tl/pypyjit.py
--- a/pypy/jit/tl/pypyjit.py
+++ b/pypy/jit/tl/pypyjit.py
@@ -39,6 +39,7 @@
 config.objspace.usemodules.array = True
 config.objspace.usemodules._weakref = True
 config.objspace.usemodules._sre = False
+config.objspace.usemodules._lsprof = True
 #
 config.objspace.usemodules._ffi = True
 #
@@ -99,7 +100,7 @@
     from pypy.translator.goal.ann_override import PyPyAnnotatorPolicy
     from pypy.rpython.test.test_llinterp import get_interpreter
 
-    # first annotate, rtype, and backendoptimize PyPy
+    # first annotate and rtype
     try:
         interp, graph = get_interpreter(entry_point, [], backendopt=False,
                                         config=config,

diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -330,7 +330,7 @@
         if log:
             self._register_counter()
             operations = self._inject_debugging_code(looptoken, operations)
-        
+
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
         arglocs = regalloc.prepare_loop(inputargs, operations, looptoken)
         looptoken._x86_arglocs = arglocs
@@ -339,7 +339,7 @@
         stackadjustpos = self._assemble_bootstrap_code(inputargs, arglocs)
         self.looppos = self.mc.get_relative_pos()
         looptoken._x86_frame_depth = -1     # temporarily
-        looptoken._x86_param_depth = -1     # temporarily        
+        looptoken._x86_param_depth = -1     # temporarily
         frame_depth, param_depth = self._assemble(regalloc, operations)
         looptoken._x86_frame_depth = frame_depth
         looptoken._x86_param_depth = param_depth
@@ -538,7 +538,7 @@
 
     def _assemble(self, regalloc, operations):
         self._regalloc = regalloc
-        regalloc.walk_operations(operations)        
+        regalloc.walk_operations(operations)
         if we_are_translated() or self.cpu.dont_keepalive_stuff:
             self._regalloc = None   # else keep it around for debugging
         frame_depth = regalloc.fm.frame_depth
@@ -1015,7 +1015,7 @@
                     dst_locs.append(unused_gpr.pop())
                 else:
                     pass_on_stack.append(loc)
-        
+
         # Emit instructions to pass the stack arguments
         # XXX: Would be nice to let remap_frame_layout take care of this, but
         # we'd need to create something like StackLoc, but relative to esp,
@@ -1441,6 +1441,17 @@
         else:
             assert 0, itemsize
 
+    def genop_read_timestamp(self, op, arglocs, resloc):
+        self.mc.RDTSC()
+        if longlong.is_64_bit:
+            self.mc.SHL_ri(edx.value, 32)
+            self.mc.OR_rr(edx.value, eax.value)
+        else:
+            loc1, = arglocs
+            self.mc.MOVD_xr(loc1.value, edx.value)
+            self.mc.MOVD_xr(resloc.value, eax.value)
+            self.mc.PUNPCKLDQ_xx(resloc.value, loc1.value)
+
     def genop_guard_guard_true(self, ign_1, guard_op, guard_token, locs, ign_2):
         loc = locs[0]
         self.mc.TEST(loc, loc)
@@ -2131,7 +2142,7 @@
         assert rx86.fits_in_32bits(tid)
         self.mc.MOV_mi((eax.value, 0), tid)
         self.mc.MOV(heap(nursery_free_adr), edx)
-        
+
 genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST
 genop_list = [Assembler386.not_implemented_op] * rop._LAST
 genop_llong_list = {}
@@ -2142,7 +2153,7 @@
         opname = name[len('genop_discard_'):]
         num = getattr(rop, opname.upper())
         genop_discard_list[num] = value
-    elif name.startswith('genop_guard_') and name != 'genop_guard_exception': 
+    elif name.startswith('genop_guard_') and name != 'genop_guard_exception':
         opname = name[len('genop_guard_'):]
         num = getattr(rop, opname.upper())
         genop_guard_list[num] = value

diff --git a/pypy/translator/c/src/align.h b/pypy/translator/c/src/align.h
--- a/pypy/translator/c/src/align.h
+++ b/pypy/translator/c/src/align.h
@@ -1,3 +1,6 @@
+
+#ifndef _PYPY_ALIGN_H
+#define _PYPY_ALIGN_H
 
 /* alignment for arena-based garbage collectors: the following line
    enforces an alignment that should be enough for any structure
@@ -14,3 +17,5 @@
 #define ROUND_UP_FOR_ALLOCATION(x, minsize)  \
   ((((x)>=(minsize)?(x):(minsize))           \
                + (MEMORY_ALIGNMENT-1)) & ~(MEMORY_ALIGNMENT-1))
+
+#endif //_PYPY_ALIGN_H

diff --git a/pypy/translator/c/src/profiling.c b/pypy/translator/c/src/profiling.c
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/profiling.c
@@ -0,0 +1,35 @@
+
+#include <stddef.h>
+#if defined(__GNUC__) && defined(__linux__)
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#include <sched.h>
+#endif
+
+cpu_set_t base_cpu_set;
+int profiling_setup = 0;
+
+void pypy_setup_profiling()
+{
+  if (!profiling_setup) {
+    cpu_set_t set;
+    sched_getaffinity(0, sizeof(cpu_set_t), &base_cpu_set);
+    CPU_ZERO(&set);
+    CPU_SET(0, &set);   /* restrict to a single cpu */
+    sched_setaffinity(0, sizeof(cpu_set_t), &set);
+    profiling_setup = 1;
+  }
+}
+
+void pypy_teardown_profiling()
+{
+  if (profiling_setup) {
+    sched_setaffinity(0, sizeof(cpu_set_t), &base_cpu_set);
+    profiling_setup = 0;
+  }
+}
+#else
+void pypy_setup_profiling() { }
+void pypy_teardown_profiling() { }
+#endif


More information about the Pypy-commit mailing list