[Python-checkins] r81122 - in python/branches/py3k-jit: Include/code.h JIT JIT/jit_notes.txt Lib/test/support.py Lib/test/test_code.py Lib/test/test_sys.py Objects/codeobject.c Python/ceval.c

Wed May 12 18:51:15 CEST 2010

Author: jeffrey.yasskin
Date: Wed May 12 18:51:15 2010
New Revision: 81122

Log:
Import the code hotness model to py3k-jit.  This will help test the background
thread even before we have JITting hooked up.



Added:
   python/branches/py3k-jit/JIT/
   python/branches/py3k-jit/JIT/jit_notes.txt
Modified:
   python/branches/py3k-jit/Include/code.h
   python/branches/py3k-jit/Lib/test/support.py
   python/branches/py3k-jit/Lib/test/test_code.py
   python/branches/py3k-jit/Lib/test/test_sys.py
   python/branches/py3k-jit/Objects/codeobject.c
   python/branches/py3k-jit/Python/ceval.c

Modified: python/branches/py3k-jit/Include/code.h
==============================================================================

--- python/branches/py3k-jit/Include/code.h	(original)
+++ python/branches/py3k-jit/Include/code.h	Wed May 12 18:51:15 2010
@@ -28,6 +28,11 @@
 				   Objects/lnotab_notes.txt for details. */
     void *co_zombieframe;     /* for optimization only (see frameobject.c) */
     PyObject *co_weakreflist;   /* to support weakrefs to code objects */
+#ifdef WITH_LLVM
+    /* Measure of how hot this code object is. This will be used to
+       decide which code objects are worth sending through LLVM. */
+    long co_hotness;
+#endif  /* WITH_LLVM */
 } PyCodeObject;
 
 /* Masks for co_flags above */

Added: python/branches/py3k-jit/JIT/jit_notes.txt
==============================================================================
--- (empty file)
+++ python/branches/py3k-jit/JIT/jit_notes.txt	Wed May 12 18:51:15 2010
@@ -0,0 +1,43 @@
+How CPython Uses LLVM to JIT
+============================
+
+This document tries to provide a high-level overview of how LLVM is used inside
+Python, including details of all the optimizations implemented for
+LLVM-generated Python machine code. This document should be as developer-centric
+as possible: it should be able to answer questions like, "how does Python
+determine function hotness" and also "where is that implemented?".
+
+Hotness model: finding critical functions
+-----------------------------------------
+
+TODO: Hotness is currently only measured, not acted on.
+
+We use an online model to estimate which functions are most critical to an
+application's performance. This model is as follows:
+
+- Each code object has a hotness level (the co_hotness field).
+    - For each function entry, add 10 to the hotness level.
+    - For each loop backedge, add 1 to the hotness level.
+- If the hotness level exceeds a given threshold (see ceval.c),
+  compile the code object to machine code via LLVM. This check is done on
+  function-entry and generator re-entry.
+
+There several classes of functions we're trying to catch with this model:
+
+- Straight-line utility functions (lots of invocations, low running time).
+- Loop-heavy main functions (few invocations, high running time).
+- Long-running generators (few invocations, long lifetime).
+
+Miscellaneous notes:
+- JIT compilation is always disabled during startup by temporarily forcing `-j
+  never`. This improves startup time by disabling compilation and feedback
+  collection.
+
+Previous models:
+- Simple call count-based model (10000 calls == hot). This was implemented as
+  an obviously-deficient baseline to be improved upon.
+- Previously, we didn't check code hotness on generator re-entry, which we
+  changed to catch long-running generators that are called once.
+
+Relevant Files:
+- Python/ceval.c - definition, use of the hotness model.

Modified: python/branches/py3k-jit/Lib/test/support.py
==============================================================================
--- python/branches/py3k-jit/Lib/test/support.py	(original)
+++ python/branches/py3k-jit/Lib/test/support.py	Wed May 12 18:51:15 2010
@@ -38,7 +38,7 @@
     "set_memlimit", "bigmemtest", "bigaddrspacetest", "BasicTestRunner",
     "run_unittest", "run_doctest", "threading_setup", "threading_cleanup",
     "reap_children", "cpython_only", "check_impl_detail", "get_attribute",
-    "swap_item", "swap_attr",
+    "swap_item", "swap_attr", "WITH_LLVM"
     ]
 
 
@@ -1222,3 +1222,8 @@
             yield
         finally:
             del obj[item]
+
+# WITH_LLVM is true if Python was compiled with LLVM support.
+def foo(): pass
+WITH_LLVM = hasattr(foo.__code__, "co_hotness")
+del foo

Modified: python/branches/py3k-jit/Lib/test/test_code.py
==============================================================================
--- python/branches/py3k-jit/Lib/test/test_code.py	(original)
+++ python/branches/py3k-jit/Lib/test/test_code.py	Wed May 12 18:51:15 2010
@@ -124,6 +124,25 @@
     print("consts:", tuple(consts(co.co_consts)))
 
 
+def new_code(function_name, def_string, globals_dict=None):
+    """Compiles function_name, defined in def_string into a new code object.
+
+    Compiles and runs def_string in a temporary namespace, with the specified
+    globals dict if any, and returns the function named 'function_name' out of
+    that namespace.
+
+    This allows us to track things that change in a code object as it's called
+    repeatedly.  Simply defining a local function would re-use the same code
+    object for each function
+
+    """
+    namespace = {}
+    if globals_dict is None:
+        globals_dict = {}
+    exec(def_string, globals_dict, namespace)
+    return namespace[function_name]
+
+
 class CodeTest(unittest.TestCase):
 
     def test_newempty(self):
@@ -133,6 +152,121 @@
         self.assertEquals(co.co_firstlineno, 15)
 
 
+HOTNESS_CALL = 10
+HOTNESS_ITER = 1
+
+ at unittest.skipUnless(hasattr(new_code.__code__, "co_hotness"),
+                     "Only applies with LLVM compiled in.")
+class HotnessTest(unittest.TestCase):
+
+    def setUp(self):
+        self.while_loop = new_code("while_loop", """
+def while_loop(x):
+  while x > 0:
+    x = x - 1
+""")
+
+    def test_new_code_has_0_hotness(self):
+        self.assertEquals(self.while_loop.__code__.co_hotness, 0)
+
+    def test_call_adds_10_hotness(self):
+        self.while_loop(0)
+        self.assertEquals(self.while_loop.__code__.co_hotness, HOTNESS_CALL)
+        self.while_loop(0)
+        self.assertEquals(self.while_loop.__code__.co_hotness, 2 * HOTNESS_CALL)
+
+        list(map(self.while_loop, [0]))  # Don't go through fast_function.
+        self.assertEquals(self.while_loop.__code__.co_hotness, 3 * HOTNESS_CALL)
+
+        kwargs = new_code("kwargs", """
+def kwargs(**kwa):
+  return kwa
+""")
+        self.assertEquals(kwargs.__code__.co_hotness, 0)
+        kwargs(a=3, b=4)  # Also doesn't go through fast_function.
+        self.assertEquals(kwargs.__code__.co_hotness, HOTNESS_CALL)
+
+    def test_iteration_adds_1_hotness(self):
+        self.while_loop(1)
+        self.assertEquals(self.while_loop.__code__.co_hotness,
+                          HOTNESS_CALL + HOTNESS_ITER)
+        self.while_loop(36)
+        self.assertEquals(self.while_loop.__code__.co_hotness,
+                          2 * HOTNESS_CALL + 37 * HOTNESS_ITER)
+
+        for_loop = new_code("for_loop", """
+def for_loop():
+  for x in range(17):
+    pass
+""")
+        self.assertEquals(for_loop.__code__.co_hotness, 0)
+        for_loop()
+        self.assertEquals(for_loop.__code__.co_hotness,
+                          HOTNESS_CALL + 17 * HOTNESS_ITER)
+
+    def test_nested_for_loop_hotness(self):
+        # Verify our understanding of how the hotness model deals with nested
+        # for loops. This can be confusing, and we don't want to change it
+        # accidentally.
+        foo = new_code("foo", """
+def foo():
+    for x in range(50):
+        for y in range(70):
+            pass
+""")
+        self.assertEqual(foo.__code__.co_hotness, 0)
+        foo()
+        self.assertEqual(foo.__code__.co_hotness,
+                         HOTNESS_CALL + HOTNESS_ITER * 3500 +
+                         HOTNESS_ITER * 50)
+
+    def test_for_loop_jump_threading_hotness(self):
+        # Regression test: the bytecode peephole optimizer does some limited
+        # jump threading, which caused problems for one earlier attempt at
+        # tuning the hotness model.
+        foo = new_code("foo", """
+def foo():
+    for x in range(30):
+        if x % 2:  # Alternate between the two branches
+            x = 8  # Nonsense
+""")
+        self.assertEqual(foo.__code__.co_hotness, 0)
+        foo()
+
+        hotness = HOTNESS_CALL + HOTNESS_ITER * 30
+
+    def test_early_for_loop_exit_hotness(self):
+        # Make sure we understand how the hotness model counts early exits from
+        # for loops.
+        foo = new_code("foo", """
+def foo():
+    for x in range(1000):
+        return True
+""")
+        self.assertEqual(foo.__code__.co_hotness, 0)
+        foo()
+
+        # Note that we don't count the loop in any way, since we never take
+        # a loop backedge.
+        self.assertEqual(foo.__code__.co_hotness, HOTNESS_CALL)
+
+    def test_generator_hotness(self):
+        foo = new_code("foo", """
+def foo():
+    yield 5
+    yield 6
+""")
+        # Generator object created, but not run yet.  This counts as the call.
+        l = foo()
+        self.assertEqual(foo.__code__.co_hotness, HOTNESS_CALL)
+
+        next(l)  # Enter the generator.  This is not a call.
+        self.assertEqual(foo.__code__.co_hotness, HOTNESS_CALL)
+        next(l)  # Neither is this.
+        self.assertEqual(foo.__code__.co_hotness, HOTNESS_CALL)
+
+
+
 class CodeWeakRefTest(unittest.TestCase):
 
     def test_basic(self):
@@ -162,7 +296,7 @@
     from test.support import run_doctest, run_unittest
     from test import test_code
     run_doctest(test_code, verbose)
-    run_unittest(CodeTest, CodeWeakRefTest)
+    run_unittest(CodeTest, HotnessTest, CodeWeakRefTest)
 
 
 if __name__ == "__main__":

Modified: python/branches/py3k-jit/Lib/test/test_sys.py
==============================================================================
--- python/branches/py3k-jit/Lib/test/test_sys.py	(original)
+++ python/branches/py3k-jit/Lib/test/test_sys.py	Wed May 12 18:51:15 2010
@@ -6,6 +6,7 @@
 import textwrap
 import warnings
 import operator
+from test.support import WITH_LLVM
 
 # count the number of test runs, used to create unique
 # strings to intern in test_intern()
@@ -593,7 +594,10 @@
             return inner
         check(get_cell().__closure__[0], size(h + 'P'))
         # code
-        check(get_cell().__code__, size(h + '5i8Pi3P'))
+        if WITH_LLVM:
+            check(get_cell().__code__, size(h + '5i8Pi3Pl'))
+        else:
+            check(get_cell().__code__, size(h + '5i8Pi3P'))
         # complex
         check(complex(0,1), size(h + '2d'))
         # method_descriptor (descriptor object)

Modified: python/branches/py3k-jit/Objects/codeobject.c
==============================================================================
--- python/branches/py3k-jit/Objects/codeobject.c	(original)
+++ python/branches/py3k-jit/Objects/codeobject.c	Wed May 12 18:51:15 2010
@@ -109,6 +109,9 @@
         co->co_lnotab = lnotab;
         co->co_zombieframe = NULL;
         co->co_weakreflist = NULL;
+#ifdef WITH_LLVM
+        co->co_hotness = 0;
+#endif  /* WITH_LLVM */
     }
     return co;
 }
@@ -179,6 +182,9 @@
     {"co_name",         T_OBJECT,       OFF(co_name),           READONLY},
     {"co_firstlineno", T_INT,           OFF(co_firstlineno),    READONLY},
     {"co_lnotab",       T_OBJECT,       OFF(co_lnotab),         READONLY},
+#ifdef WITH_LLVM
+    {"co_hotness",      T_INT,          OFF(co_hotness),        READONLY},
+#endif
     {NULL}      /* Sentinel */
 };
 

Modified: python/branches/py3k-jit/Python/ceval.c
==============================================================================
--- python/branches/py3k-jit/Python/ceval.c	(original)
+++ python/branches/py3k-jit/Python/ceval.c	Wed May 12 18:51:15 2010
@@ -116,6 +116,11 @@
                                       PyObject *);
 static PyObject * update_star_args(int, int, PyObject *, PyObject ***);
 static PyObject * load_args(PyObject ***, int);
+
+#ifdef WITH_LLVM
+static inline void mark_called(PyCodeObject *co);
+#endif  /* WITH_LLVM */
+
 #define CALL_FLAG_VAR 1
 #define CALL_FLAG_KW 2
 
@@ -963,6 +968,14 @@
 #define JUMPTO(x)       (next_instr = first_instr + (x))
 #define JUMPBY(x)       (next_instr += (x))
 
+/* Feedback-gathering macros */
+#ifdef WITH_LLVM
+#define UPDATE_HOTNESS_JABS() \
+    do { if (oparg <= f->f_lasti) ++co->co_hotness; } while (0)
+#else
+#define UPDATE_HOTNESS_JABS()
+#endif  /* WITH_LLVM */
+
 /* OpCode prediction macros
     Some opcodes tend to come in pairs thus making it possible to
     predict the second code when the first is run.  For example,
@@ -2373,6 +2386,7 @@
             }
             if (w == Py_False) {
                 Py_DECREF(w);
+                UPDATE_HOTNESS_JABS();
                 JUMPTO(oparg);
                 FAST_DISPATCH();
             }
@@ -2380,8 +2394,10 @@
             Py_DECREF(w);
             if (err > 0)
                 err = 0;
-            else if (err == 0)
+            else if (err == 0) {
+                UPDATE_HOTNESS_JABS();
                 JUMPTO(oparg);
+            }
             else
                 break;
             DISPATCH();
@@ -2395,6 +2411,7 @@
             }
             if (w == Py_True) {
                 Py_DECREF(w);
+                UPDATE_HOTNESS_JABS();
                 JUMPTO(oparg);
                 FAST_DISPATCH();
             }
@@ -2402,6 +2419,7 @@
             Py_DECREF(w);
             if (err > 0) {
                 err = 0;
+                UPDATE_HOTNESS_JABS();
                 JUMPTO(oparg);
             }
             else if (err == 0)
@@ -2418,6 +2436,7 @@
                 FAST_DISPATCH();
             }
             if (w == Py_False) {
+                UPDATE_HOTNESS_JABS();
                 JUMPTO(oparg);
                 FAST_DISPATCH();
             }
@@ -2427,8 +2446,10 @@
                 Py_DECREF(w);
                 err = 0;
             }
-            else if (err == 0)
+            else if (err == 0) {
+                UPDATE_HOTNESS_JABS();
                 JUMPTO(oparg);
+            }
             else
                 break;
             DISPATCH();
@@ -2441,12 +2462,14 @@
                 FAST_DISPATCH();
             }
             if (w == Py_True) {
+                UPDATE_HOTNESS_JABS();
                 JUMPTO(oparg);
                 FAST_DISPATCH();
             }
             err = PyObject_IsTrue(w);
             if (err > 0) {
                 err = 0;
+                UPDATE_HOTNESS_JABS();
                 JUMPTO(oparg);
             }
             else if (err == 0) {
@@ -2459,6 +2482,7 @@
 
         PREDICTED_WITH_ARG(JUMP_ABSOLUTE);
         TARGET(JUMP_ABSOLUTE)
+            UPDATE_HOTNESS_JABS();
             JUMPTO(oparg);
 #if FAST_LOOPS
             /* Enabling this path speeds-up all while and for-loops by bypassing
@@ -2514,6 +2538,9 @@
             goto fast_block_end;
 
         TARGET(CONTINUE_LOOP)
+#ifdef WITH_LLVM
+            ++co->co_hotness;
+#endif
             retval = PyLong_FromLong(oparg);
             if (!retval) {
                 x = NULL;
@@ -3080,6 +3107,13 @@
     if (f == NULL)
         return NULL;
 
+#ifdef WITH_LLVM
+    /* This is where a code object is considered "called". Doing it here
+     * instead of PyEval_EvalFrame() makes support for generators somewhat
+     * cleaner. */
+    mark_called(co);
+#endif  /* WITH_LLVM */
+
     fastlocals = f->f_localsplus;
     freevars = f->f_localsplus + co->co_nlocals;
 
@@ -3793,6 +3827,14 @@
                      nargs);
 }
 
+#ifdef WITH_LLVM
+static inline void
+mark_called(PyCodeObject *co)
+{
+    co->co_hotness += 10;
+}
+#endif  /* WITH_LLVM */
+
 #define C_TRACE(x, call) \
 if (tstate->use_tracing && tstate->c_profilefunc) { \
     if (call_trace(tstate->c_profilefunc, \
@@ -3947,6 +3989,9 @@
         f = PyFrame_New(tstate, co, globals, NULL);
         if (f == NULL)
             return NULL;
+#ifdef WITH_LLVM
+        mark_called(co);
+#endif
 
         fastlocals = f->f_localsplus;
         stack = (*pp_stack) - n;