[pypy-commit] pypy default: Document the JitCounter and BaseJitCell classes.

arigo noreply at buildbot.pypy.org
Sun Aug 31 08:59:35 CEST 2014

Author: Armin Rigo <arigo at tunes.org>
Changeset: r73216:cd1ea9c54b1b
Date: 2014-08-31 08:59 +0200

Log:	Document the JitCounter and BaseJitCell classes.

	These two classes have some usage patterns and invariants that are
	worth writing down precisely.

diff --git a/rpython/jit/metainterp/counter.py b/rpython/jit/metainterp/counter.py
--- a/rpython/jit/metainterp/counter.py
+++ b/rpython/jit/metainterp/counter.py
@@ -7,13 +7,78 @@
 assert r_uint32.BITS == 32
 UINT32MAX = 2 ** 32 - 1
-# keep in sync with the C code in pypy__decay_jit_counters
+# keep in sync with the C code in pypy__decay_jit_counters below
 ENTRY = lltype.Struct('timetable_entry',
                       ('times', lltype.FixedSizeArray(rffi.FLOAT, 5)),
                       ('subhashes', lltype.FixedSizeArray(rffi.USHORT, 5)))
 class JitCounter:
+    """A process translated with the JIT contains one prebuilt instance
+    of this class.  It is used for three things:
+    * It maps greenkey hashes to counters, to know when we have seen this
+      greenkey enough to reach the 'threshold' or 'function_threshold'
+      parameters.  This is done in a lossy way by a fixed-size 'timetable'.
+    * It handles the counters on the failing guards, for 'trace_eagerness'.
+      This is done in the same 'timetable'.
+    * It records the JitCell objects that are created when we compile
+      a loop, in a non-lossy dictionary-like strurcture.  This is done
+      in the 'celltable'.
+    The 'timetable' is a table of DEFAULT_SIZE entries, each of which
+    containing 5 entries.  From a hash value, we use the index number
+    '_get_index(hash)', and then we look in all five entries for a
+    matching '_get_subhash(hash)'.  The five entries are roughly kept
+    sorted by decreasing recorded time.  The hash value itself should be
+    computed accordingly: we only use bits 21:32 for _get_index and
+    bits 0:16 for _get_subhash.  (This organization is "probably good"
+    to get not-too-random behavior; another motivation for it was for
+    the STM branch, to avoid pointless conflicts between threads.)
+    The time value stored in the timetable is a (short-precision)
+    floating-point number.  The idea is that a value of 0.0 means
+    absent, and values go up to the maximum of 1.0.
+    'compute_threshold(threshold)' returns basically the fraction
+    1.0/threshold, corresponding to the 'increment' value for the
+    following APIs.
+    'tick(hash, increment)' adds 'increment' to the time value stored
+    with the 'hash'.  Remember that only bits 0:16,21:32 of the hash
+    are used; in case of collision between two hashes, they will grow
+    twice as fast, because each tick() call will contribute to the
+    colliding time value.
+    'fetch_next_hash()' returns a "random" hash value suitable for
+    using in tick() later.  Used when compiling guards; when the
+    guard actually fails, we'll tick() the guard's stored random hash.
+    'reset(hash)', 'change_current_fraction(hash, new_time_value)'
+    change the time value associated with a hash.  The former resets
+    it to zero, and the latter changes it to the given value (which
+    should be a value close to 1.0).
+    'set_decay(decay)', 'decay_all_counters()' is used to globally
+    reduce all the stored time values.  They all get multiplied by
+    a fraction close to (but smaller than) 1.0, computed from the
+    'decay' parameter.
+    'install_new_cell(hash, newcell)' adds the new JitCell to the
+    celltable, at the index given by 'hash' (bits 21:32).  Unlike
+    the timetable, the celltable stores a linked list of JitCells
+    for every entry, and so it is not lossy.
+    'lookup_chain(hash)' returns the first JitCell at 'hash'.  You can
+    then walk the chain by following the '.next' attributes until you
+    reach None.
+    'cleanup_chain(hash)' resets the timetable's 'hash' entry and
+    cleans up the celltable at 'hash'.  It removes those JitCells
+    for which 'cell.should_remove_jitcell()' returns True.
+    """
     DEFAULT_SIZE = 2048
     def __init__(self, size=DEFAULT_SIZE, translator=None):
diff --git a/rpython/jit/metainterp/warmstate.py b/rpython/jit/metainterp/warmstate.py
--- a/rpython/jit/metainterp/warmstate.py
+++ b/rpython/jit/metainterp/warmstate.py
@@ -129,6 +129,49 @@
 JC_TEMPORARY       = 0x04
 class BaseJitCell(object):
+    """Subclasses of BaseJitCell are used in tandem with the single
+    JitCounter instance to record places in the JIT-tracked user program
+    where something particular occurs with the JIT.  For some
+    'greenkeys' (e.g. Python bytecode position), we create one instance
+    of JitCell and attach it to that greenkey.  This is implemented
+    with jitcounter.install_new_cell(), but conceptually you can think
+    about JitCode instances as attached to some locations of the
+    app-level Python code.
+    We create subclasses of BaseJitCell --one per jitdriver-- so that
+    they can store greenkeys of different types.  
+    Note that we don't create a JitCell the first time we see a given
+    greenkey position in the interpreter.  At first, we only hash the
+    greenkey and use that in the JitCounter to record the number of
+    times we have seen it.  We only create a JitCell when the
+    JitCounter's total time value reaches 1.0 and we are starting to
+    JIT-compile.
+    A JitCell has a 'wref_procedure_token' that is non-None when we
+    actually have a compiled procedure for that greenkey.  (It is a
+    weakref, so that it could later be freed; in this case the JitCell
+    will likely be reclaimed a bit later by 'should_remove_jitcell()'.)
+    There are other less-common cases where we also create a JitCell: to
+    record some long-term flags about the greenkey.  In general, a
+    JitCell can have any combination of the following flags set:
+        JC_TRACING: we are now tracing the loop from this greenkey.
+        We'll likely end up with a wref_procedure_token, soonish.
+        JC_TEMPORARY: a "temporary" wref_procedure_token.
+        It's the procedure_token of a dummy loop that simply calls
+        back the interpreter.  Used for a CALL_ASSEMBLER where the
+        target was not compiled yet.  In this situation we are still
+        ticking the JitCounter for the same hash, until we reach the
+        threshold and start tracing the loop in earnest.
+        JC_DONT_TRACE_HERE: when tracing, don't inline calls to
+        this particular function.  (We only set this flag when aborting
+        due to a trace too long, so we use the same flag as a hint to
+        also mean "please trace from here as soon as possible".)
+    """
     flags = 0     # JC_xxx flags
     wref_procedure_token = None
     next = None

More information about the pypy-commit mailing list