[pypy-commit] pypy matrixmath-dot: merge from default

mattip noreply at buildbot.pypy.org
Sat Jan 14 22:35:56 CET 2012


Author: mattip
Branch: matrixmath-dot
Changeset: r51331:ba2aa2febde8
Date: 2012-01-14 22:17 +0200
http://bitbucket.org/pypy/pypy/changeset/ba2aa2febde8/

Log:	merge from default

diff too long, truncating to 10000 out of 12004 lines

diff --git a/LICENSE b/LICENSE
--- a/LICENSE
+++ b/LICENSE
@@ -27,7 +27,7 @@
     DEALINGS IN THE SOFTWARE.
 
 
-PyPy Copyright holders 2003-2011
+PyPy Copyright holders 2003-2012
 ----------------------------------- 
 
 Except when otherwise stated (look for LICENSE files or information at
@@ -37,43 +37,47 @@
     Armin Rigo
     Maciej Fijalkowski
     Carl Friedrich Bolz
+    Amaury Forgeot d'Arc
     Antonio Cuni
-    Amaury Forgeot d'Arc
     Samuele Pedroni
     Michael Hudson
     Holger Krekel
-    Benjamin Peterson
+    Alex Gaynor
     Christian Tismer
     Hakan Ardo
-    Alex Gaynor
+    Benjamin Peterson
+    David Schneider
     Eric van Riet Paap
     Anders Chrigstrom
-    David Schneider
     Richard Emslie
     Dan Villiom Podlaski Christiansen
     Alexander Schremmer
+    Lukas Diekmann
     Aurelien Campeas
     Anders Lehmann
     Camillo Bruni
     Niklaus Haldimann
+    Sven Hager
     Leonardo Santagada
     Toon Verwaest
     Seo Sanghyeon
+    Justin Peel
     Lawrence Oluyede
     Bartosz Skowron
     Jakub Gustak
     Guido Wesdorp
     Daniel Roberts
+    Laura Creighton
     Adrien Di Mascio
-    Laura Creighton
     Ludovic Aubry
     Niko Matsakis
+    Wim Lavrijsen
+    Matti Picus
     Jason Creighton
     Jacob Hallen
     Alex Martelli
     Anders Hammarquist
     Jan de Mooij
-    Wim Lavrijsen
     Stephan Diehl
     Michael Foord
     Stefan Schwarzer
@@ -84,34 +88,36 @@
     Alexandre Fayolle
     Marius Gedminas
     Simon Burton
-    Justin Peel
+    David Edelsohn
     Jean-Paul Calderone
     John Witulski
-    Lukas Diekmann
+    Timo Paulssen
     holger krekel
-    Wim Lavrijsen
     Dario Bertini
+    Mark Pearse
     Andreas Stührk
     Jean-Philippe St. Pierre
     Guido van Rossum
     Pavel Vinogradov
     Valentino Volonghi
     Paul deGrandis
+    Ilya Osadchiy
+    Ronny Pfannschmidt
     Adrian Kuhn
     tav
     Georg Brandl
+    Philip Jenvey
     Gerald Klix
     Wanja Saatkamp
-    Ronny Pfannschmidt
     Boris Feigin
     Oscar Nierstrasz
     David Malcolm
     Eugene Oden
     Henry Mason
-    Sven Hager
+    Jeff Terrace
     Lukas Renggli
-    Ilya Osadchiy
     Guenter Jantzen
+    Ned Batchelder
     Bert Freudenberg
     Amit Regmi
     Ben Young
@@ -142,7 +148,6 @@
     Anders Qvist
     Beatrice During
     Alexander Sedov
-    Timo Paulssen
     Corbin Simpson
     Vincent Legoll
     Romain Guillebert
@@ -165,9 +170,10 @@
     Lucio Torre
     Lene Wagner
     Miguel de Val Borro
+    Artur Lisiecki
+    Bruno Gola
     Ignas Mikalajunas
-    Artur Lisiecki
-    Philip Jenvey
+    Stefano Rivera
     Joshua Gilbert
     Godefroid Chappelle
     Yusei Tahara
@@ -179,17 +185,17 @@
     Kristjan Valur Jonsson
     Bobby Impollonia
     Michael Hudson-Doyle
+    Laurence Tratt
+    Yasir Suhail
     Andrew Thompson
     Anders Sigfridsson
     Floris Bruynooghe
     Jacek Generowicz
     Dan Colish
     Zooko Wilcox-O Hearn
-    Dan Villiom Podlaski Christiansen
-    Anders Hammarquist
+    Dan Loewenherz
     Chris Lambacher
     Dinu Gherman
-    Dan Colish
     Brett Cannon
     Daniel Neuhäuser
     Michael Chermside
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -73,8 +73,12 @@
 
 class Field(object):
     def __init__(self, name, offset, size, ctype, num, is_bitfield):
-        for k in ('name', 'offset', 'size', 'ctype', 'num', 'is_bitfield'):
-            self.__dict__[k] = locals()[k]
+        self.__dict__['name'] = name
+        self.__dict__['offset'] = offset
+        self.__dict__['size'] = size
+        self.__dict__['ctype'] = ctype
+        self.__dict__['num'] = num
+        self.__dict__['is_bitfield'] = is_bitfield
 
     def __setattr__(self, name, value):
         raise AttributeError(name)
diff --git a/lib_pypy/numpypy/__init__.py b/lib_pypy/numpypy/__init__.py
new file mode 100644
--- /dev/null
+++ b/lib_pypy/numpypy/__init__.py
@@ -0,0 +1,2 @@
+from _numpypy import *
+from .fromnumeric import *
diff --git a/lib_pypy/numpypy/fromnumeric.py b/lib_pypy/numpypy/fromnumeric.py
new file mode 100644
--- /dev/null
+++ b/lib_pypy/numpypy/fromnumeric.py
@@ -0,0 +1,2400 @@
+######################################################################    
+# This is a copy of numpy/core/fromnumeric.py modified for numpypy
+######################################################################
+# Each name in __all__ was a function in  'numeric' that is now 
+# a method in 'numpy'.
+# When the corresponding method is added to numpypy BaseArray
+# each function should be added as a module function 
+# at the applevel 
+# This can be as simple as doing the following
+#
+# def func(a, ...):
+#     if not hasattr(a, 'func')
+#         a = numpypy.array(a)
+#     return a.func(...)
+#
+######################################################################
+
+import numpypy
+
+# Module containing non-deprecated functions borrowed from Numeric.
+__docformat__ = "restructuredtext en"
+
+# functions that are now methods
+__all__ = ['take', 'reshape', 'choose', 'repeat', 'put',
+           'swapaxes', 'transpose', 'sort', 'argsort', 'argmax', 'argmin',
+           'searchsorted', 'alen',
+           'resize', 'diagonal', 'trace', 'ravel', 'nonzero', 'shape',
+           'compress', 'clip', 'sum', 'product', 'prod', 'sometrue', 'alltrue',
+           'any', 'all', 'cumsum', 'cumproduct', 'cumprod', 'ptp', 'ndim',
+           'rank', 'size', 'around', 'round_', 'mean', 'std', 'var', 'squeeze',
+           'amax', 'amin',
+          ]
+          
+def take(a, indices, axis=None, out=None, mode='raise'):
+    """
+    Take elements from an array along an axis.
+
+    This function does the same thing as "fancy" indexing (indexing arrays
+    using arrays); however, it can be easier to use if you need elements
+    along a given axis.
+
+    Parameters
+    ----------
+    a : array_like
+        The source array.
+    indices : array_like
+        The indices of the values to extract.
+    axis : int, optional
+        The axis over which to select values. By default, the flattened
+        input array is used.
+    out : ndarray, optional
+        If provided, the result will be placed in this array. It should
+        be of the appropriate shape and dtype.
+    mode : {'raise', 'wrap', 'clip'}, optional
+        Specifies how out-of-bounds indices will behave.
+
+        * 'raise' -- raise an error (default)
+        * 'wrap' -- wrap around
+        * 'clip' -- clip to the range
+
+        'clip' mode means that all indices that are too large are replaced
+        by the index that addresses the last element along that axis. Note
+        that this disables indexing with negative numbers.
+
+    Returns
+    -------
+    subarray : ndarray
+        The returned array has the same type as `a`.
+
+    See Also
+    --------
+    ndarray.take : equivalent method
+
+    Examples
+    --------
+    >>> a = [4, 3, 5, 7, 6, 8]
+    >>> indices = [0, 1, 4]
+    >>> np.take(a, indices)
+    array([4, 3, 6])
+
+    In this example if `a` is an ndarray, "fancy" indexing can be used.
+
+    >>> a = np.array(a)
+    >>> a[indices]
+    array([4, 3, 6])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+# not deprecated --- copy if necessary, view otherwise
+def reshape(a, newshape, order='C'):
+    """
+    Gives a new shape to an array without changing its data.
+
+    Parameters
+    ----------
+    a : array_like
+        Array to be reshaped.
+    newshape : int or tuple of ints
+        The new shape should be compatible with the original shape. If
+        an integer, then the result will be a 1-D array of that length.
+        One shape dimension can be -1. In this case, the value is inferred
+        from the length of the array and remaining dimensions.
+    order : {'C', 'F', 'A'}, optional
+        Determines whether the array data should be viewed as in C
+        (row-major) order, FORTRAN (column-major) order, or the C/FORTRAN
+        order should be preserved.
+
+    Returns
+    -------
+    reshaped_array : ndarray
+        This will be a new view object if possible; otherwise, it will
+        be a copy.
+
+
+    See Also
+    --------
+    ndarray.reshape : Equivalent method.
+
+    Notes
+    -----
+
+    It is not always possible to change the shape of an array without
+    copying the data. If you want an error to be raise if the data is copied,
+    you should assign the new shape to the shape attribute of the array::
+
+     >>> a = np.zeros((10, 2))
+     # A transpose make the array non-contiguous
+     >>> b = a.T
+     # Taking a view makes it possible to modify the shape without modiying the
+     # initial object.
+     >>> c = b.view()
+     >>> c.shape = (20)
+     AttributeError: incompatible shape for a non-contiguous array
+
+
+    Examples
+    --------
+    >>> a = np.array([[1,2,3], [4,5,6]])
+    >>> np.reshape(a, 6)
+    array([1, 2, 3, 4, 5, 6])
+    >>> np.reshape(a, 6, order='F')
+    array([1, 4, 2, 5, 3, 6])
+
+    >>> np.reshape(a, (3,-1))       # the unspecified value is inferred to be 2
+    array([[1, 2],
+           [3, 4],
+           [5, 6]])
+
+    """
+    if not hasattr(a, 'reshape'):
+       a = numpypy.array(a)
+    return a.reshape(newshape)
+
+
+def choose(a, choices, out=None, mode='raise'):
+    """
+    Construct an array from an index array and a set of arrays to choose from.
+
+    First of all, if confused or uncertain, definitely look at the Examples -
+    in its full generality, this function is less simple than it might
+    seem from the following code description (below ndi =
+    `numpy.lib.index_tricks`):
+
+    ``np.choose(a,c) == np.array([c[a[I]][I] for I in ndi.ndindex(a.shape)])``.
+
+    But this omits some subtleties.  Here is a fully general summary:
+
+    Given an "index" array (`a`) of integers and a sequence of `n` arrays
+    (`choices`), `a` and each choice array are first broadcast, as necessary,
+    to arrays of a common shape; calling these *Ba* and *Bchoices[i], i =
+    0,...,n-1* we have that, necessarily, ``Ba.shape == Bchoices[i].shape``
+    for each `i`.  Then, a new array with shape ``Ba.shape`` is created as
+    follows:
+
+    * if ``mode=raise`` (the default), then, first of all, each element of
+      `a` (and thus `Ba`) must be in the range `[0, n-1]`; now, suppose that
+      `i` (in that range) is the value at the `(j0, j1, ..., jm)` position
+      in `Ba` - then the value at the same position in the new array is the
+      value in `Bchoices[i]` at that same position;
+
+    * if ``mode=wrap``, values in `a` (and thus `Ba`) may be any (signed)
+      integer; modular arithmetic is used to map integers outside the range
+      `[0, n-1]` back into that range; and then the new array is constructed
+      as above;
+
+    * if ``mode=clip``, values in `a` (and thus `Ba`) may be any (signed)
+      integer; negative integers are mapped to 0; values greater than `n-1`
+      are mapped to `n-1`; and then the new array is constructed as above.
+
+    Parameters
+    ----------
+    a : int array
+        This array must contain integers in `[0, n-1]`, where `n` is the number
+        of choices, unless ``mode=wrap`` or ``mode=clip``, in which cases any
+        integers are permissible.
+    choices : sequence of arrays
+        Choice arrays. `a` and all of the choices must be broadcastable to the
+        same shape.  If `choices` is itself an array (not recommended), then
+        its outermost dimension (i.e., the one corresponding to
+        ``choices.shape[0]``) is taken as defining the "sequence".
+    out : array, optional
+        If provided, the result will be inserted into this array. It should
+        be of the appropriate shape and dtype.
+    mode : {'raise' (default), 'wrap', 'clip'}, optional
+        Specifies how indices outside `[0, n-1]` will be treated:
+
+          * 'raise' : an exception is raised
+          * 'wrap' : value becomes value mod `n`
+          * 'clip' : values < 0 are mapped to 0, values > n-1 are mapped to n-1
+
+    Returns
+    -------
+    merged_array : array
+        The merged result.
+
+    Raises
+    ------
+    ValueError: shape mismatch
+        If `a` and each choice array are not all broadcastable to the same
+        shape.
+
+    See Also
+    --------
+    ndarray.choose : equivalent method
+
+    Notes
+    -----
+    To reduce the chance of misinterpretation, even though the following
+    "abuse" is nominally supported, `choices` should neither be, nor be
+    thought of as, a single array, i.e., the outermost sequence-like container
+    should be either a list or a tuple.
+
+    Examples
+    --------
+
+    >>> choices = [[0, 1, 2, 3], [10, 11, 12, 13],
+    ...   [20, 21, 22, 23], [30, 31, 32, 33]]
+    >>> np.choose([2, 3, 1, 0], choices
+    ... # the first element of the result will be the first element of the
+    ... # third (2+1) "array" in choices, namely, 20; the second element
+    ... # will be the second element of the fourth (3+1) choice array, i.e.,
+    ... # 31, etc.
+    ... )
+    array([20, 31, 12,  3])
+    >>> np.choose([2, 4, 1, 0], choices, mode='clip') # 4 goes to 3 (4-1)
+    array([20, 31, 12,  3])
+    >>> # because there are 4 choice arrays
+    >>> np.choose([2, 4, 1, 0], choices, mode='wrap') # 4 goes to (4 mod 4)
+    array([20,  1, 12,  3])
+    >>> # i.e., 0
+
+    A couple examples illustrating how choose broadcasts:
+
+    >>> a = [[1, 0, 1], [0, 1, 0], [1, 0, 1]]
+    >>> choices = [-10, 10]
+    >>> np.choose(a, choices)
+    array([[ 10, -10,  10],
+           [-10,  10, -10],
+           [ 10, -10,  10]])
+
+    >>> # With thanks to Anne Archibald
+    >>> a = np.array([0, 1]).reshape((2,1,1))
+    >>> c1 = np.array([1, 2, 3]).reshape((1,3,1))
+    >>> c2 = np.array([-1, -2, -3, -4, -5]).reshape((1,1,5))
+    >>> np.choose(a, (c1, c2)) # result is 2x3x5, res[0,:,:]=c1, res[1,:,:]=c2
+    array([[[ 1,  1,  1,  1,  1],
+            [ 2,  2,  2,  2,  2],
+            [ 3,  3,  3,  3,  3]],
+           [[-1, -2, -3, -4, -5],
+            [-1, -2, -3, -4, -5],
+            [-1, -2, -3, -4, -5]]])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def repeat(a, repeats, axis=None):
+    """
+    Repeat elements of an array.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+    repeats : {int, array of ints}
+        The number of repetitions for each element.  `repeats` is broadcasted
+        to fit the shape of the given axis.
+    axis : int, optional
+        The axis along which to repeat values.  By default, use the
+        flattened input array, and return a flat output array.
+
+    Returns
+    -------
+    repeated_array : ndarray
+        Output array which has the same shape as `a`, except along
+        the given axis.
+
+    See Also
+    --------
+    tile : Tile an array.
+
+    Examples
+    --------
+    >>> x = np.array([[1,2],[3,4]])
+    >>> np.repeat(x, 2)
+    array([1, 1, 2, 2, 3, 3, 4, 4])
+    >>> np.repeat(x, 3, axis=1)
+    array([[1, 1, 1, 2, 2, 2],
+           [3, 3, 3, 4, 4, 4]])
+    >>> np.repeat(x, [1, 2], axis=0)
+    array([[1, 2],
+           [3, 4],
+           [3, 4]])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def put(a, ind, v, mode='raise'):
+    """
+    Replaces specified elements of an array with given values.
+
+    The indexing works on the flattened target array. `put` is roughly
+    equivalent to:
+
+    ::
+
+        a.flat[ind] = v
+
+    Parameters
+    ----------
+    a : ndarray
+        Target array.
+    ind : array_like
+        Target indices, interpreted as integers.
+    v : array_like
+        Values to place in `a` at target indices. If `v` is shorter than
+        `ind` it will be repeated as necessary.
+    mode : {'raise', 'wrap', 'clip'}, optional
+        Specifies how out-of-bounds indices will behave.
+
+        * 'raise' -- raise an error (default)
+        * 'wrap' -- wrap around
+        * 'clip' -- clip to the range
+
+        'clip' mode means that all indices that are too large are replaced
+        by the index that addresses the last element along that axis. Note
+        that this disables indexing with negative numbers.
+
+    See Also
+    --------
+    putmask, place
+
+    Examples
+    --------
+    >>> a = np.arange(5)
+    >>> np.put(a, [0, 2], [-44, -55])
+    >>> a
+    array([-44,   1, -55,   3,   4])
+
+    >>> a = np.arange(5)
+    >>> np.put(a, 22, -5, mode='clip')
+    >>> a
+    array([ 0,  1,  2,  3, -5])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def swapaxes(a, axis1, axis2):
+    """
+    Interchange two axes of an array.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+    axis1 : int
+        First axis.
+    axis2 : int
+        Second axis.
+
+    Returns
+    -------
+    a_swapped : ndarray
+        If `a` is an ndarray, then a view of `a` is returned; otherwise
+        a new array is created.
+
+    Examples
+    --------
+    >>> x = np.array([[1,2,3]])
+    >>> np.swapaxes(x,0,1)
+    array([[1],
+           [2],
+           [3]])
+
+    >>> x = np.array([[[0,1],[2,3]],[[4,5],[6,7]]])
+    >>> x
+    array([[[0, 1],
+            [2, 3]],
+           [[4, 5],
+            [6, 7]]])
+
+    >>> np.swapaxes(x,0,2)
+    array([[[0, 4],
+            [2, 6]],
+           [[1, 5],
+            [3, 7]]])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def transpose(a, axes=None):
+    """
+    Permute the dimensions of an array.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+    axes : list of ints, optional
+        By default, reverse the dimensions, otherwise permute the axes
+        according to the values given.
+
+    Returns
+    -------
+    p : ndarray
+        `a` with its axes permuted.  A view is returned whenever
+        possible.
+
+    See Also
+    --------
+    rollaxis
+
+    Examples
+    --------
+    >>> x = np.arange(4).reshape((2,2))
+    >>> x
+    array([[0, 1],
+           [2, 3]])
+
+    >>> np.transpose(x)
+    array([[0, 2],
+           [1, 3]])
+
+    >>> x = np.ones((1, 2, 3))
+    >>> np.transpose(x, (1, 0, 2)).shape
+    (2, 1, 3)
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def sort(a, axis=-1, kind='quicksort', order=None):
+    """
+    Return a sorted copy of an array.
+
+    Parameters
+    ----------
+    a : array_like
+        Array to be sorted.
+    axis : int or None, optional
+        Axis along which to sort. If None, the array is flattened before
+        sorting. The default is -1, which sorts along the last axis.
+    kind : {'quicksort', 'mergesort', 'heapsort'}, optional
+        Sorting algorithm. Default is 'quicksort'.
+    order : list, optional
+        When `a` is a structured array, this argument specifies which fields
+        to compare first, second, and so on.  This list does not need to
+        include all of the fields.
+
+    Returns
+    -------
+    sorted_array : ndarray
+        Array of the same type and shape as `a`.
+
+    See Also
+    --------
+    ndarray.sort : Method to sort an array in-place.
+    argsort : Indirect sort.
+    lexsort : Indirect stable sort on multiple keys.
+    searchsorted : Find elements in a sorted array.
+
+    Notes
+    -----
+    The various sorting algorithms are characterized by their average speed,
+    worst case performance, work space size, and whether they are stable. A
+    stable sort keeps items with the same key in the same relative
+    order. The three available algorithms have the following
+    properties:
+
+    =========== ======= ============= ============ =======
+       kind      speed   worst case    work space  stable
+    =========== ======= ============= ============ =======
+    'quicksort'    1     O(n^2)            0          no
+    'mergesort'    2     O(n*log(n))      ~n/2        yes
+    'heapsort'     3     O(n*log(n))       0          no
+    =========== ======= ============= ============ =======
+
+    All the sort algorithms make temporary copies of the data when
+    sorting along any but the last axis.  Consequently, sorting along
+    the last axis is faster and uses less space than sorting along
+    any other axis.
+
+    The sort order for complex numbers is lexicographic. If both the real
+    and imaginary parts are non-nan then the order is determined by the
+    real parts except when they are equal, in which case the order is
+    determined by the imaginary parts.
+
+    Previous to numpy 1.4.0 sorting real and complex arrays containing nan
+    values led to undefined behaviour. In numpy versions >= 1.4.0 nan
+    values are sorted to the end. The extended sort order is:
+
+      * Real: [R, nan]
+      * Complex: [R + Rj, R + nanj, nan + Rj, nan + nanj]
+
+    where R is a non-nan real value. Complex values with the same nan
+    placements are sorted according to the non-nan part if it exists.
+    Non-nan values are sorted as before.
+
+    Examples
+    --------
+    >>> a = np.array([[1,4],[3,1]])
+    >>> np.sort(a)                # sort along the last axis
+    array([[1, 4],
+           [1, 3]])
+    >>> np.sort(a, axis=None)     # sort the flattened array
+    array([1, 1, 3, 4])
+    >>> np.sort(a, axis=0)        # sort along the first axis
+    array([[1, 1],
+           [3, 4]])
+
+    Use the `order` keyword to specify a field to use when sorting a
+    structured array:
+
+    >>> dtype = [('name', 'S10'), ('height', float), ('age', int)]
+    >>> values = [('Arthur', 1.8, 41), ('Lancelot', 1.9, 38),
+    ...           ('Galahad', 1.7, 38)]
+    >>> a = np.array(values, dtype=dtype)       # create a structured array
+    >>> np.sort(a, order='height')                        # doctest: +SKIP
+    array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
+           ('Lancelot', 1.8999999999999999, 38)],
+          dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])
+
+    Sort by age, then height if ages are equal:
+
+    >>> np.sort(a, order=['age', 'height'])               # doctest: +SKIP
+    array([('Galahad', 1.7, 38), ('Lancelot', 1.8999999999999999, 38),
+           ('Arthur', 1.8, 41)],
+          dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def argsort(a, axis=-1, kind='quicksort', order=None):
+    """
+    Returns the indices that would sort an array.
+
+    Perform an indirect sort along the given axis using the algorithm specified
+    by the `kind` keyword. It returns an array of indices of the same shape as
+    `a` that index data along the given axis in sorted order.
+
+    Parameters
+    ----------
+    a : array_like
+        Array to sort.
+    axis : int or None, optional
+        Axis along which to sort.  The default is -1 (the last axis). If None,
+        the flattened array is used.
+    kind : {'quicksort', 'mergesort', 'heapsort'}, optional
+        Sorting algorithm.
+    order : list, optional
+        When `a` is an array with fields defined, this argument specifies
+        which fields to compare first, second, etc.  Not all fields need be
+        specified.
+
+    Returns
+    -------
+    index_array : ndarray, int
+        Array of indices that sort `a` along the specified axis.
+        In other words, ``a[index_array]`` yields a sorted `a`.
+
+    See Also
+    --------
+    sort : Describes sorting algorithms used.
+    lexsort : Indirect stable sort with multiple keys.
+    ndarray.sort : Inplace sort.
+
+    Notes
+    -----
+    See `sort` for notes on the different sorting algorithms.
+
+    As of NumPy 1.4.0 `argsort` works with real/complex arrays containing
+    nan values. The enhanced sort order is documented in `sort`.
+
+    Examples
+    --------
+    One dimensional array:
+
+    >>> x = np.array([3, 1, 2])
+    >>> np.argsort(x)
+    array([1, 2, 0])
+
+    Two-dimensional array:
+
+    >>> x = np.array([[0, 3], [2, 2]])
+    >>> x
+    array([[0, 3],
+           [2, 2]])
+
+    >>> np.argsort(x, axis=0)
+    array([[0, 1],
+           [1, 0]])
+
+    >>> np.argsort(x, axis=1)
+    array([[0, 1],
+           [0, 1]])
+
+    Sorting with keys:
+
+    >>> x = np.array([(1, 0), (0, 1)], dtype=[('x', '<i4'), ('y', '<i4')])
+    >>> x
+    array([(1, 0), (0, 1)],
+          dtype=[('x', '<i4'), ('y', '<i4')])
+
+    >>> np.argsort(x, order=('x','y'))
+    array([1, 0])
+
+    >>> np.argsort(x, order=('y','x'))
+    array([0, 1])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def argmax(a, axis=None):
+    """
+    Indices of the maximum values along an axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+    axis : int, optional
+        By default, the index is into the flattened array, otherwise
+        along the specified axis.
+
+    Returns
+    -------
+    index_array : ndarray of ints
+        Array of indices into the array. It has the same shape as `a.shape`
+        with the dimension along `axis` removed.
+
+    See Also
+    --------
+    ndarray.argmax, argmin
+    amax : The maximum value along a given axis.
+    unravel_index : Convert a flat index into an index tuple.
+
+    Notes
+    -----
+    In case of multiple occurrences of the maximum values, the indices
+    corresponding to the first occurrence are returned.
+
+    Examples
+    --------
+    >>> a = np.arange(6).reshape(2,3)
+    >>> a
+    array([[0, 1, 2],
+           [3, 4, 5]])
+    >>> np.argmax(a)
+    5
+    >>> np.argmax(a, axis=0)
+    array([1, 1, 1])
+    >>> np.argmax(a, axis=1)
+    array([2, 2])
+
+    >>> b = np.arange(6)
+    >>> b[1] = 5
+    >>> b
+    array([0, 5, 2, 3, 4, 5])
+    >>> np.argmax(b) # Only the first occurrence is returned.
+    1
+
+    """
+    if not hasattr(a, 'argmax'):
+        a = numpypy.array(a)
+    return a.argmax()
+
+
+def argmin(a, axis=None):
+    """
+    Return the indices of the minimum values along an axis.
+
+    See Also
+    --------
+    argmax : Similar function.  Please refer to `numpy.argmax` for detailed
+        documentation.
+
+    """
+    if not hasattr(a, 'argmin'):
+        a = numpypy.array(a)
+    return a.argmin()
+
+
+def searchsorted(a, v, side='left'):
+    """
+    Find indices where elements should be inserted to maintain order.
+
+    Find the indices into a sorted array `a` such that, if the corresponding
+    elements in `v` were inserted before the indices, the order of `a` would
+    be preserved.
+
+    Parameters
+    ----------
+    a : 1-D array_like
+        Input array, sorted in ascending order.
+    v : array_like
+        Values to insert into `a`.
+    side : {'left', 'right'}, optional
+        If 'left', the index of the first suitable location found is given.  If
+        'right', return the last such index.  If there is no suitable
+        index, return either 0 or N (where N is the length of `a`).
+
+    Returns
+    -------
+    indices : array of ints
+        Array of insertion points with the same shape as `v`.
+
+    See Also
+    --------
+    sort : Return a sorted copy of an array.
+    histogram : Produce histogram from 1-D data.
+
+    Notes
+    -----
+    Binary search is used to find the required insertion points.
+
+    As of Numpy 1.4.0 `searchsorted` works with real/complex arrays containing
+    `nan` values. The enhanced sort order is documented in `sort`.
+
+    Examples
+    --------
+    >>> np.searchsorted([1,2,3,4,5], 3)
+    2
+    >>> np.searchsorted([1,2,3,4,5], 3, side='right')
+    3
+    >>> np.searchsorted([1,2,3,4,5], [-10, 10, 2, 3])
+    array([0, 5, 1, 2])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def resize(a, new_shape):
+    """
+    Return a new array with the specified shape.
+
+    If the new array is larger than the original array, then the new
+    array is filled with repeated copies of `a`.  Note that this behavior
+    is different from a.resize(new_shape) which fills with zeros instead
+    of repeated copies of `a`.
+
+    Parameters
+    ----------
+    a : array_like
+        Array to be resized.
+
+    new_shape : int or tuple of int
+        Shape of resized array.
+
+    Returns
+    -------
+    reshaped_array : ndarray
+        The new array is formed from the data in the old array, repeated
+        if necessary to fill out the required number of elements.  The
+        data are repeated in the order that they are stored in memory.
+
+    See Also
+    --------
+    ndarray.resize : resize an array in-place.
+
+    Examples
+    --------
+    >>> a=np.array([[0,1],[2,3]])
+    >>> np.resize(a,(1,4))
+    array([[0, 1, 2, 3]])
+    >>> np.resize(a,(2,4))
+    array([[0, 1, 2, 3],
+           [0, 1, 2, 3]])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def squeeze(a):
+    """
+    Remove single-dimensional entries from the shape of an array.
+
+    Parameters
+    ----------
+    a : array_like
+        Input data.
+
+    Returns
+    -------
+    squeezed : ndarray
+        The input array, but with with all dimensions of length 1
+        removed.  Whenever possible, a view on `a` is returned.
+
+    Examples
+    --------
+    >>> x = np.array([[[0], [1], [2]]])
+    >>> x.shape
+    (1, 3, 1)
+    >>> np.squeeze(x).shape
+    (3,)
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def diagonal(a, offset=0, axis1=0, axis2=1):
+    """
+    Return specified diagonals.
+
+    If `a` is 2-D, returns the diagonal of `a` with the given offset,
+    i.e., the collection of elements of the form ``a[i, i+offset]``.  If
+    `a` has more than two dimensions, then the axes specified by `axis1`
+    and `axis2` are used to determine the 2-D sub-array whose diagonal is
+    returned.  The shape of the resulting array can be determined by
+    removing `axis1` and `axis2` and appending an index to the right equal
+    to the size of the resulting diagonals.
+
+    Parameters
+    ----------
+    a : array_like
+        Array from which the diagonals are taken.
+    offset : int, optional
+        Offset of the diagonal from the main diagonal.  Can be positive or
+        negative.  Defaults to main diagonal (0).
+    axis1 : int, optional
+        Axis to be used as the first axis of the 2-D sub-arrays from which
+        the diagonals should be taken.  Defaults to first axis (0).
+    axis2 : int, optional
+        Axis to be used as the second axis of the 2-D sub-arrays from
+        which the diagonals should be taken. Defaults to second axis (1).
+
+    Returns
+    -------
+    array_of_diagonals : ndarray
+        If `a` is 2-D, a 1-D array containing the diagonal is returned.
+        If the dimension of `a` is larger, then an array of diagonals is
+        returned, "packed" from left-most dimension to right-most (e.g.,
+        if `a` is 3-D, then the diagonals are "packed" along rows).
+
+    Raises
+    ------
+    ValueError
+        If the dimension of `a` is less than 2.
+
+    See Also
+    --------
+    diag : MATLAB work-a-like for 1-D and 2-D arrays.
+    diagflat : Create diagonal arrays.
+    trace : Sum along diagonals.
+
+    Examples
+    --------
+    >>> a = np.arange(4).reshape(2,2)
+    >>> a
+    array([[0, 1],
+           [2, 3]])
+    >>> a.diagonal()
+    array([0, 3])
+    >>> a.diagonal(1)
+    array([1])
+
+    A 3-D example:
+
+    >>> a = np.arange(8).reshape(2,2,2); a
+    array([[[0, 1],
+            [2, 3]],
+           [[4, 5],
+            [6, 7]]])
+    >>> a.diagonal(0, # Main diagonals of two arrays created by skipping
+    ...            0, # across the outer(left)-most axis last and
+    ...            1) # the "middle" (row) axis first.
+    array([[0, 6],
+           [1, 7]])
+
+    The sub-arrays whose main diagonals we just obtained; note that each
+    corresponds to fixing the right-most (column) axis, and that the
+    diagonals are "packed" in rows.
+
+    >>> a[:,:,0] # main diagonal is [0 6]
+    array([[0, 2],
+           [4, 6]])
+    >>> a[:,:,1] # main diagonal is [1 7]
+    array([[1, 3],
+           [5, 7]])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def trace(a, offset=0, axis1=0, axis2=1, dtype=None, out=None):
+    """
+    Return the sum along diagonals of the array.
+
+    If `a` is 2-D, the sum along its diagonal with the given offset
+    is returned, i.e., the sum of elements ``a[i,i+offset]`` for all i.
+
+    If `a` has more than two dimensions, then the axes specified by axis1 and
+    axis2 are used to determine the 2-D sub-arrays whose traces are returned.
+    The shape of the resulting array is the same as that of `a` with `axis1`
+    and `axis2` removed.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array, from which the diagonals are taken.
+    offset : int, optional
+        Offset of the diagonal from the main diagonal. Can be both positive
+        and negative. Defaults to 0.
+    axis1, axis2 : int, optional
+        Axes to be used as the first and second axis of the 2-D sub-arrays
+        from which the diagonals should be taken. Defaults are the first two
+        axes of `a`.
+    dtype : dtype, optional
+        Determines the data-type of the returned array and of the accumulator
+        where the elements are summed. If dtype has the value None and `a` is
+        of integer type of precision less than the default integer
+        precision, then the default integer precision is used. Otherwise,
+        the precision is the same as that of `a`.
+    out : ndarray, optional
+        Array into which the output is placed. Its type is preserved and
+        it must be of the right shape to hold the output.
+
+    Returns
+    -------
+    sum_along_diagonals : ndarray
+        If `a` is 2-D, the sum along the diagonal is returned.  If `a` has
+        larger dimensions, then an array of sums along diagonals is returned.
+
+    See Also
+    --------
+    diag, diagonal, diagflat
+
+    Examples
+    --------
+    >>> np.trace(np.eye(3))
+    3.0
+    >>> a = np.arange(8).reshape((2,2,2))
+    >>> np.trace(a)
+    array([6, 8])
+
+    >>> a = np.arange(24).reshape((2,2,2,3))
+    >>> np.trace(a).shape
+    (2, 3)
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+def ravel(a, order='C'):
+    """
+    Return a flattened array.
+
+    A 1-D array, containing the elements of the input, is returned.  A copy is
+    made only if needed.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.  The elements in ``a`` are read in the order specified by
+        `order`, and packed as a 1-D array.
+    order : {'C','F', 'A', 'K'}, optional
+        The elements of ``a`` are read in this order. 'C' means to view
+        the elements in C (row-major) order. 'F' means to view the elements
+        in Fortran (column-major) order. 'A' means to view the elements
+        in 'F' order if a is Fortran contiguous, 'C' order otherwise.
+        'K' means to view the elements in the order they occur in memory,
+        except for reversing the data when strides are negative.
+        By default, 'C' order is used.
+
+    Returns
+    -------
+    1d_array : ndarray
+        Output of the same dtype as `a`, and of shape ``(a.size(),)``.
+
+    See Also
+    --------
+    ndarray.flat : 1-D iterator over an array.
+    ndarray.flatten : 1-D array copy of the elements of an array
+                      in row-major order.
+
+    Notes
+    -----
+    In row-major order, the row index varies the slowest, and the column
+    index the quickest.  This can be generalized to multiple dimensions,
+    where row-major order implies that the index along the first axis
+    varies slowest, and the index along the last quickest.  The opposite holds
+    for Fortran-, or column-major, mode.
+
+    Examples
+    --------
+    It is equivalent to ``reshape(-1, order=order)``.
+
+    >>> x = np.array([[1, 2, 3], [4, 5, 6]])
+    >>> print np.ravel(x)
+    [1 2 3 4 5 6]
+
+    >>> print x.reshape(-1)
+    [1 2 3 4 5 6]
+
+    >>> print np.ravel(x, order='F')
+    [1 4 2 5 3 6]
+
+    When ``order`` is 'A', it will preserve the array's 'C' or 'F' ordering:
+
+    >>> print np.ravel(x.T)
+    [1 4 2 5 3 6]
+    >>> print np.ravel(x.T, order='A')
+    [1 2 3 4 5 6]
+
+    When ``order`` is 'K', it will preserve orderings that are neither 'C'
+    nor 'F', but won't reverse axes:
+
+    >>> a = np.arange(3)[::-1]; a
+    array([2, 1, 0])
+    >>> a.ravel(order='C')
+    array([2, 1, 0])
+    >>> a.ravel(order='K')
+    array([2, 1, 0])
+
+    >>> a = np.arange(12).reshape(2,3,2).swapaxes(1,2); a
+    array([[[ 0,  2,  4],
+            [ 1,  3,  5]],
+           [[ 6,  8, 10],
+            [ 7,  9, 11]]])
+    >>> a.ravel(order='C')
+    array([ 0,  2,  4,  1,  3,  5,  6,  8, 10,  7,  9, 11])
+    >>> a.ravel(order='K')
+    array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def nonzero(a):
+    """
+    Return the indices of the elements that are non-zero.
+
+    Returns a tuple of arrays, one for each dimension of `a`, containing
+    the indices of the non-zero elements in that dimension. The
+    corresponding non-zero values can be obtained with::
+
+        a[nonzero(a)]
+
+    To group the indices by element, rather than dimension, use::
+
+        transpose(nonzero(a))
+
+    The result of this is always a 2-D array, with a row for
+    each non-zero element.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+
+    Returns
+    -------
+    tuple_of_arrays : tuple
+        Indices of elements that are non-zero.
+
+    See Also
+    --------
+    flatnonzero :
+        Return indices that are non-zero in the flattened version of the input
+        array.
+    ndarray.nonzero :
+        Equivalent ndarray method.
+    count_nonzero :
+        Counts the number of non-zero elements in the input array.
+
+    Examples
+    --------
+    >>> x = np.eye(3)
+    >>> x
+    array([[ 1.,  0.,  0.],
+           [ 0.,  1.,  0.],
+           [ 0.,  0.,  1.]])
+    >>> np.nonzero(x)
+    (array([0, 1, 2]), array([0, 1, 2]))
+
+    >>> x[np.nonzero(x)]
+    array([ 1.,  1.,  1.])
+    >>> np.transpose(np.nonzero(x))
+    array([[0, 0],
+           [1, 1],
+           [2, 2]])
+
+    A common use for ``nonzero`` is to find the indices of an array, where
+    a condition is True.  Given an array `a`, the condition `a` > 3 is a
+    boolean array and since False is interpreted as 0, np.nonzero(a > 3)
+    yields the indices of the `a` where the condition is true.
+
+    >>> a = np.array([[1,2,3],[4,5,6],[7,8,9]])
+    >>> a > 3
+    array([[False, False, False],
+           [ True,  True,  True],
+           [ True,  True,  True]], dtype=bool)
+    >>> np.nonzero(a > 3)
+    (array([1, 1, 1, 2, 2, 2]), array([0, 1, 2, 0, 1, 2]))
+
+    The ``nonzero`` method of the boolean array can also be called.
+
+    >>> (a > 3).nonzero()
+    (array([1, 1, 1, 2, 2, 2]), array([0, 1, 2, 0, 1, 2]))
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def shape(a):
+    """
+    Return the shape of an array.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+
+    Returns
+    -------
+    shape : tuple of ints
+        The elements of the shape tuple give the lengths of the
+        corresponding array dimensions.
+
+    See Also
+    --------
+    alen
+    ndarray.shape : Equivalent array method.
+
+    Examples
+    --------
+    >>> np.shape(np.eye(3))
+    (3, 3)
+    >>> np.shape([[1, 2]])
+    (1, 2)
+    >>> np.shape([0])
+    (1,)
+    >>> np.shape(0)
+    ()
+
+    >>> a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
+    >>> np.shape(a)
+    (2,)
+    >>> a.shape
+    (2,)
+
+    """
+    if not hasattr(a, 'shape'):
+        a = numpypy.array(a)
+    return a.shape
+
+
+def compress(condition, a, axis=None, out=None):
+    """
+    Return selected slices of an array along given axis.
+
+    When working along a given axis, a slice along that axis is returned in
+    `output` for each index where `condition` evaluates to True. When
+    working on a 1-D array, `compress` is equivalent to `extract`.
+
+    Parameters
+    ----------
+    condition : 1-D array of bools
+        Array that selects which entries to return. If len(condition)
+        is less than the size of `a` along the given axis, then output is
+        truncated to the length of the condition array.
+    a : array_like
+        Array from which to extract a part.
+    axis : int, optional
+        Axis along which to take slices. If None (default), work on the
+        flattened array.
+    out : ndarray, optional
+        Output array.  Its type is preserved and it must be of the right
+        shape to hold the output.
+
+    Returns
+    -------
+    compressed_array : ndarray
+        A copy of `a` without the slices along axis for which `condition`
+        is false.
+
+    See Also
+    --------
+    take, choose, diag, diagonal, select
+    ndarray.compress : Equivalent method.
+    numpy.doc.ufuncs : Section "Output arguments"
+
+    Examples
+    --------
+    >>> a = np.array([[1, 2], [3, 4], [5, 6]])
+    >>> a
+    array([[1, 2],
+           [3, 4],
+           [5, 6]])
+    >>> np.compress([0, 1], a, axis=0)
+    array([[3, 4]])
+    >>> np.compress([False, True, True], a, axis=0)
+    array([[3, 4],
+           [5, 6]])
+    >>> np.compress([False, True], a, axis=1)
+    array([[2],
+           [4],
+           [6]])
+
+    Working on the flattened array does not return slices along an axis but
+    selects elements.
+
+    >>> np.compress([False, True], a)
+    array([2])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def clip(a, a_min, a_max, out=None):
+    """
+    Clip (limit) the values in an array.
+
+    Given an interval, values outside the interval are clipped to
+    the interval edges.  For example, if an interval of ``[0, 1]``
+    is specified, values smaller than 0 become 0, and values larger
+    than 1 become 1.
+
+    Parameters
+    ----------
+    a : array_like
+        Array containing elements to clip.
+    a_min : scalar or array_like
+        Minimum value.
+    a_max : scalar or array_like
+        Maximum value.  If `a_min` or `a_max` are array_like, then they will
+        be broadcasted to the shape of `a`.
+    out : ndarray, optional
+        The results will be placed in this array. It may be the input
+        array for in-place clipping.  `out` must be of the right shape
+        to hold the output.  Its type is preserved.
+
+    Returns
+    -------
+    clipped_array : ndarray
+        An array with the elements of `a`, but where values
+        < `a_min` are replaced with `a_min`, and those > `a_max`
+        with `a_max`.
+
+    See Also
+    --------
+    numpy.doc.ufuncs : Section "Output arguments"
+
+    Examples
+    --------
+    >>> a = np.arange(10)
+    >>> np.clip(a, 1, 8)
+    array([1, 1, 2, 3, 4, 5, 6, 7, 8, 8])
+    >>> a
+    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+    >>> np.clip(a, 3, 6, out=a)
+    array([3, 3, 3, 3, 4, 5, 6, 6, 6, 6])
+    >>> a = np.arange(10)
+    >>> a
+    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+    >>> np.clip(a, [3,4,1,1,1,4,4,4,4,4], 8)
+    array([3, 4, 2, 3, 4, 5, 6, 7, 8, 8])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def sum(a, axis=None, dtype=None, out=None):
+    """
+    Sum of array elements over a given axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Elements to sum.
+    axis : integer, optional
+        Axis over which the sum is taken. By default `axis` is None,
+        and all elements are summed.
+    dtype : dtype, optional
+        The type of the returned array and of the accumulator in which
+        the elements are summed.  By default, the dtype of `a` is used.
+        An exception is when `a` has an integer type with less precision
+        than the default platform integer.  In that case, the default
+        platform integer is used instead.
+    out : ndarray, optional
+        Array into which the output is placed.  By default, a new array is
+        created.  If `out` is given, it must be of the appropriate shape
+        (the shape of `a` with `axis` removed, i.e.,
+        ``numpy.delete(a.shape, axis)``).  Its type is preserved. See
+        `doc.ufuncs` (Section "Output arguments") for more details.
+
+    Returns
+    -------
+    sum_along_axis : ndarray
+        An array with the same shape as `a`, with the specified
+        axis removed.   If `a` is a 0-d array, or if `axis` is None, a scalar
+        is returned.  If an output array is specified, a reference to
+        `out` is returned.
+
+    See Also
+    --------
+    ndarray.sum : Equivalent method.
+
+    cumsum : Cumulative sum of array elements.
+
+    trapz : Integration of array values using the composite trapezoidal rule.
+
+    mean, average
+
+    Notes
+    -----
+    Arithmetic is modular when using integer types, and no error is
+    raised on overflow.
+
+    Examples
+    --------
+    >>> np.sum([0.5, 1.5])
+    2.0
+    >>> np.sum([0.5, 0.7, 0.2, 1.5], dtype=np.int32)
+    1
+    >>> np.sum([[0, 1], [0, 5]])
+    6
+    >>> np.sum([[0, 1], [0, 5]], axis=0)
+    array([0, 6])
+    >>> np.sum([[0, 1], [0, 5]], axis=1)
+    array([1, 5])
+
+    If the accumulator is too small, overflow occurs:
+
+    >>> np.ones(128, dtype=np.int8).sum(dtype=np.int8)
+    -128
+
+    """
+    if not hasattr(a, "sum"):
+        a = numpypy.array(a)
+    return a.sum()
+
+
+def product (a, axis=None, dtype=None, out=None):
+    """
+    Return the product of array elements over a given axis.
+
+    See Also
+    --------
+    prod : equivalent function; see for details.
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def sometrue(a, axis=None, out=None):
+    """
+    Check whether some values are true.
+
+    Refer to `any` for full documentation.
+
+    See Also
+    --------
+    any : equivalent function
+
+    """
+    if not hasattr(a, 'any'):
+        a = numpypy.array(a)
+    return a.any()
+
+
+def alltrue (a, axis=None, out=None):
+    """
+    Check if all elements of input array are true.
+
+    See Also
+    --------
+    numpy.all : Equivalent function; see for details.
+
+    """
+    if not hasattr(a, 'all'):
+        a = numpypy.array(a)
+    return a.all()
+
+def any(a,axis=None, out=None):
+    """
+    Test whether any array element along a given axis evaluates to True.
+
+    Returns single boolean unless `axis` is not ``None``
+
+    Parameters
+    ----------
+    a : array_like
+        Input array or object that can be converted to an array.
+    axis : int, optional
+        Axis along which a logical OR is performed.  The default
+        (`axis` = `None`) is to perform a logical OR over a flattened
+        input array. `axis` may be negative, in which case it counts
+        from the last to the first axis.
+    out : ndarray, optional
+        Alternate output array in which to place the result.  It must have
+        the same shape as the expected output and its type is preserved
+        (e.g., if it is of type float, then it will remain so, returning
+        1.0 for True and 0.0 for False, regardless of the type of `a`).
+        See `doc.ufuncs` (Section "Output arguments") for details.
+
+    Returns
+    -------
+    any : bool or ndarray
+        A new boolean or `ndarray` is returned unless `out` is specified,
+        in which case a reference to `out` is returned.
+
+    See Also
+    --------
+    ndarray.any : equivalent method
+
+    all : Test whether all elements along a given axis evaluate to True.
+
+    Notes
+    -----
+    Not a Number (NaN), positive infinity and negative infinity evaluate
+    to `True` because these are not equal to zero.
+
+    Examples
+    --------
+    >>> np.any([[True, False], [True, True]])
+    True
+
+    >>> np.any([[True, False], [False, False]], axis=0)
+    array([ True, False], dtype=bool)
+
+    >>> np.any([-1, 0, 5])
+    True
+
+    >>> np.any(np.nan)
+    True
+
+    >>> o=np.array([False])
+    >>> z=np.any([-1, 4, 5], out=o)
+    >>> z, o
+    (array([ True], dtype=bool), array([ True], dtype=bool))
+    >>> # Check now that z is a reference to o
+    >>> z is o
+    True
+    >>> id(z), id(o) # identity of z and o              # doctest: +SKIP
+    (191614240, 191614240)
+
+    """
+    if not hasattr(a, 'any'):
+        a = numpypy.array(a)
+    return a.any()
+
+
+def all(a,axis=None, out=None):
+    """
+    Test whether all array elements along a given axis evaluate to True.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array or object that can be converted to an array.
+    axis : int, optional
+        Axis along which a logical AND is performed.
+        The default (`axis` = `None`) is to perform a logical AND
+        over a flattened input array.  `axis` may be negative, in which
+        case it counts from the last to the first axis.
+    out : ndarray, optional
+        Alternate output array in which to place the result.
+        It must have the same shape as the expected output and its
+        type is preserved (e.g., if ``dtype(out)`` is float, the result
+        will consist of 0.0's and 1.0's).  See `doc.ufuncs` (Section
+        "Output arguments") for more details.
+
+    Returns
+    -------
+    all : ndarray, bool
+        A new boolean or array is returned unless `out` is specified,
+        in which case a reference to `out` is returned.
+
+    See Also
+    --------
+    ndarray.all : equivalent method
+
+    any : Test whether any element along a given axis evaluates to True.
+
+    Notes
+    -----
+    Not a Number (NaN), positive infinity and negative infinity
+    evaluate to `True` because these are not equal to zero.
+
+    Examples
+    --------
+    >>> np.all([[True,False],[True,True]])
+    False
+
+    >>> np.all([[True,False],[True,True]], axis=0)
+    array([ True, False], dtype=bool)
+
+    >>> np.all([-1, 4, 5])
+    True
+
+    >>> np.all([1.0, np.nan])
+    True
+
+    >>> o=np.array([False])
+    >>> z=np.all([-1, 4, 5], out=o)
+    >>> id(z), id(o), z                             # doctest: +SKIP
+    (28293632, 28293632, array([ True], dtype=bool))
+
+    """
+    if not hasattr(a, 'all'):
+        a = numpypy.array(a)
+    return a.all()
+
+
+def cumsum (a, axis=None, dtype=None, out=None):
+    """
+    Return the cumulative sum of the elements along a given axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+    axis : int, optional
+        Axis along which the cumulative sum is computed. The default
+        (None) is to compute the cumsum over the flattened array.
+    dtype : dtype, optional
+        Type of the returned array and of the accumulator in which the
+        elements are summed.  If `dtype` is not specified, it defaults
+        to the dtype of `a`, unless `a` has an integer dtype with a
+        precision less than that of the default platform integer.  In
+        that case, the default platform integer is used.
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must
+        have the same shape and buffer length as the expected output
+        but the type will be cast if necessary. See `doc.ufuncs`
+        (Section "Output arguments") for more details.
+
+    Returns
+    -------
+    cumsum_along_axis : ndarray.
+        A new array holding the result is returned unless `out` is
+        specified, in which case a reference to `out` is returned. The
+        result has the same size as `a`, and the same shape as `a` if
+        `axis` is not None or `a` is a 1-d array.
+
+
+    See Also
+    --------
+    sum : Sum array elements.
+
+    trapz : Integration of array values using the composite trapezoidal rule.
+
+    Notes
+    -----
+    Arithmetic is modular when using integer types, and no error is
+    raised on overflow.
+
+    Examples
+    --------
+    >>> a = np.array([[1,2,3], [4,5,6]])
+    >>> a
+    array([[1, 2, 3],
+           [4, 5, 6]])
+    >>> np.cumsum(a)
+    array([ 1,  3,  6, 10, 15, 21])
+    >>> np.cumsum(a, dtype=float)     # specifies type of output value(s)
+    array([  1.,   3.,   6.,  10.,  15.,  21.])
+
+    >>> np.cumsum(a,axis=0)      # sum over rows for each of the 3 columns
+    array([[1, 2, 3],
+           [5, 7, 9]])
+    >>> np.cumsum(a,axis=1)      # sum over columns for each of the 2 rows
+    array([[ 1,  3,  6],
+           [ 4,  9, 15]])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def cumproduct(a, axis=None, dtype=None, out=None):
+    """
+    Return the cumulative product over the given axis.
+
+
+    See Also
+    --------
+    cumprod : equivalent function; see for details.
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def ptp(a, axis=None, out=None):
+    """
+    Range of values (maximum - minimum) along an axis.
+
+    The name of the function comes from the acronym for 'peak to peak'.
+
+    Parameters
+    ----------
+    a : array_like
+        Input values.
+    axis : int, optional
+        Axis along which to find the peaks.  By default, flatten the
+        array.
+    out : array_like
+        Alternative output array in which to place the result. It must
+        have the same shape and buffer length as the expected output,
+        but the type of the output values will be cast if necessary.
+
+    Returns
+    -------
+    ptp : ndarray
+        A new array holding the result, unless `out` was
+        specified, in which case a reference to `out` is returned.
+
+    Examples
+    --------
+    >>> x = np.arange(4).reshape((2,2))
+    >>> x
+    array([[0, 1],
+           [2, 3]])
+
+    >>> np.ptp(x, axis=0)
+    array([2, 2])
+
+    >>> np.ptp(x, axis=1)
+    array([1, 1])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def amax(a, axis=None, out=None):
+    """
+    Return the maximum of an array or maximum along an axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Input data.
+    axis : int, optional
+        Axis along which to operate.  By default flattened input is used.
+    out : ndarray, optional
+        Alternate output array in which to place the result.  Must be of
+        the same shape and buffer length as the expected output.  See
+        `doc.ufuncs` (Section "Output arguments") for more details.
+
+    Returns
+    -------
+    amax : ndarray or scalar
+        Maximum of `a`. If `axis` is None, the result is a scalar value.
+        If `axis` is given, the result is an array of dimension
+        ``a.ndim - 1``.
+
+    See Also
+    --------
+    nanmax : NaN values are ignored instead of being propagated.
+    fmax : same behavior as the C99 fmax function.
+    argmax : indices of the maximum values.
+
+    Notes
+    -----
+    NaN values are propagated, that is if at least one item is NaN, the
+    corresponding max value will be NaN as well.  To ignore NaN values
+    (MATLAB behavior), please use nanmax.
+
+    Examples
+    --------
+    >>> a = np.arange(4).reshape((2,2))
+    >>> a
+    array([[0, 1],
+           [2, 3]])
+    >>> np.amax(a)
+    3
+    >>> np.amax(a, axis=0)
+    array([2, 3])
+    >>> np.amax(a, axis=1)
+    array([1, 3])
+
+    >>> b = np.arange(5, dtype=np.float)
+    >>> b[2] = np.NaN
+    >>> np.amax(b)
+    nan
+    >>> np.nanmax(b)
+    4.0
+
+    """
+    if not hasattr(a, "max"):
+        a = numpypy.array(a)
+    return a.max()
+
+
+def amin(a, axis=None, out=None):
+    """
+    Return the minimum of an array or minimum along an axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Input data.
+    axis : int, optional
+        Axis along which to operate.  By default a flattened input is used.
+    out : ndarray, optional
+        Alternative output array in which to place the result.  Must
+        be of the same shape and buffer length as the expected output.
+        See `doc.ufuncs` (Section "Output arguments") for more details.
+
+    Returns
+    -------
+    amin : ndarray
+        A new array or a scalar array with the result.
+
+    See Also
+    --------
+    nanmin: nan values are ignored instead of being propagated
+    fmin: same behavior as the C99 fmin function
+    argmin: Return the indices of the minimum values.
+
+    amax, nanmax, fmax
+
+    Notes
+    -----
+    NaN values are propagated, that is if at least one item is nan, the
+    corresponding min value will be nan as well. To ignore NaN values (matlab
+    behavior), please use nanmin.
+
+    Examples
+    --------
+    >>> a = np.arange(4).reshape((2,2))
+    >>> a
+    array([[0, 1],
+           [2, 3]])
+    >>> np.amin(a)           # Minimum of the flattened array
+    0
+    >>> np.amin(a, axis=0)         # Minima along the first axis
+    array([0, 1])
+    >>> np.amin(a, axis=1)         # Minima along the second axis
+    array([0, 2])
+
+    >>> b = np.arange(5, dtype=np.float)
+    >>> b[2] = np.NaN
+    >>> np.amin(b)
+    nan
+    >>> np.nanmin(b)
+    0.0
+
+    """
+    # amin() is equivalent to min()
+    if not hasattr(a, 'min'):
+        a = numpypy.array(a)
+    return a.min()
+
+def alen(a):
+    """
+    Return the length of the first dimension of the input array.
+
+    Parameters
+    ----------
+    a : array_like
+       Input array.
+
+    Returns
+    -------
+    l : int
+       Length of the first dimension of `a`.
+
+    See Also
+    --------
+    shape, size
+
+    Examples
+    --------
+    >>> a = np.zeros((7,4,5))
+    >>> a.shape[0]
+    7
+    >>> np.alen(a)
+    7
+
+    """
+    if not hasattr(a, 'shape'):
+        a = numpypy.array(a)
+    return a.shape[0]
+
+
+def prod(a, axis=None, dtype=None, out=None):
+    """
+    Return the product of array elements over a given axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Input data.
+    axis : int, optional
+        Axis over which the product is taken.  By default, the product
+        of all elements is calculated.
+    dtype : data-type, optional
+        The data-type of the returned array, as well as of the accumulator
+        in which the elements are multiplied.  By default, if `a` is of
+        integer type, `dtype` is the default platform integer. (Note: if
+        the type of `a` is unsigned, then so is `dtype`.)  Otherwise,
+        the dtype is the same as that of `a`.
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must have
+        the same shape as the expected output, but the type of the
+        output values will be cast if necessary.
+
+    Returns
+    -------
+    product_along_axis : ndarray, see `dtype` parameter above.
+        An array shaped as `a` but with the specified axis removed.
+        Returns a reference to `out` if specified.
+
+    See Also
+    --------
+    ndarray.prod : equivalent method
+    numpy.doc.ufuncs : Section "Output arguments"
+
+    Notes
+    -----
+    Arithmetic is modular when using integer types, and no error is
+    raised on overflow.  That means that, on a 32-bit platform:
+
+    >>> x = np.array([536870910, 536870910, 536870910, 536870910])
+    >>> np.prod(x) #random
+    16
+
+    Examples
+    --------
+    By default, calculate the product of all elements:
+
+    >>> np.prod([1.,2.])
+    2.0
+
+    Even when the input array is two-dimensional:
+
+    >>> np.prod([[1.,2.],[3.,4.]])
+    24.0
+
+    But we can also specify the axis over which to multiply:
+
+    >>> np.prod([[1.,2.],[3.,4.]], axis=1)
+    array([  2.,  12.])
+
+    If the type of `x` is unsigned, then the output type is
+    the unsigned platform integer:
+
+    >>> x = np.array([1, 2, 3], dtype=np.uint8)
+    >>> np.prod(x).dtype == np.uint
+    True
+
+    If `x` is of a signed integer type, then the output type
+    is the default platform integer:
+
+    >>> x = np.array([1, 2, 3], dtype=np.int8)
+    >>> np.prod(x).dtype == np.int
+    True
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def cumprod(a, axis=None, dtype=None, out=None):
+    """
+    Return the cumulative product of elements along a given axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+    axis : int, optional
+        Axis along which the cumulative product is computed.  By default
+        the input is flattened.
+    dtype : dtype, optional
+        Type of the returned array, as well as of the accumulator in which
+        the elements are multiplied.  If *dtype* is not specified, it
+        defaults to the dtype of `a`, unless `a` has an integer dtype with
+        a precision less than that of the default platform integer.  In
+        that case, the default platform integer is used instead.
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must
+        have the same shape and buffer length as the expected output
+        but the type of the resulting values will be cast if necessary.
+
+    Returns
+    -------
+    cumprod : ndarray
+        A new array holding the result is returned unless `out` is
+        specified, in which case a reference to out is returned.
+
+    See Also
+    --------
+    numpy.doc.ufuncs : Section "Output arguments"
+
+    Notes
+    -----
+    Arithmetic is modular when using integer types, and no error is
+    raised on overflow.
+
+    Examples
+    --------
+    >>> a = np.array([1,2,3])
+    >>> np.cumprod(a) # intermediate results 1, 1*2
+    ...               # total product 1*2*3 = 6
+    array([1, 2, 6])
+    >>> a = np.array([[1, 2, 3], [4, 5, 6]])
+    >>> np.cumprod(a, dtype=float) # specify type of output
+    array([   1.,    2.,    6.,   24.,  120.,  720.])
+
+    The cumulative product for each column (i.e., over the rows) of `a`:
+
+    >>> np.cumprod(a, axis=0)
+    array([[ 1,  2,  3],
+           [ 4, 10, 18]])
+
+    The cumulative product for each row (i.e. over the columns) of `a`:
+
+    >>> np.cumprod(a,axis=1)
+    array([[  1,   2,   6],
+           [  4,  20, 120]])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def ndim(a):
+    """
+    Return the number of dimensions of an array.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.  If it is not already an ndarray, a conversion is
+        attempted.
+
+    Returns
+    -------
+    number_of_dimensions : int
+        The number of dimensions in `a`.  Scalars are zero-dimensional.
+
+    See Also
+    --------
+    ndarray.ndim : equivalent method
+    shape : dimensions of array
+    ndarray.shape : dimensions of array
+
+    Examples
+    --------
+    >>> np.ndim([[1,2,3],[4,5,6]])
+    2
+    >>> np.ndim(np.array([[1,2,3],[4,5,6]]))
+    2
+    >>> np.ndim(1)
+    0
+
+    """
+    if not hasattr(a, 'ndim'):
+        a = numpypy.array(a)
+    return a.ndim
+
+
+def rank(a):
+    """
+    Return the number of dimensions of an array.
+
+    If `a` is not already an array, a conversion is attempted.
+    Scalars are zero dimensional.
+
+    Parameters
+    ----------
+    a : array_like
+        Array whose number of dimensions is desired. If `a` is not an array,
+        a conversion is attempted.
+
+    Returns
+    -------
+    number_of_dimensions : int
+        The number of dimensions in the array.
+
+    See Also
+    --------
+    ndim : equivalent function
+    ndarray.ndim : equivalent property
+    shape : dimensions of array
+    ndarray.shape : dimensions of array
+
+    Notes
+    -----
+    In the old Numeric package, `rank` was the term used for the number of
+    dimensions, but in Numpy `ndim` is used instead.
+
+    Examples
+    --------
+    >>> np.rank([1,2,3])
+    1
+    >>> np.rank(np.array([[1,2,3],[4,5,6]]))
+    2
+    >>> np.rank(1)
+    0
+
+    """
+    if not hasattr(a, 'ndim'):
+        a = numpypy.array(a)
+    return a.ndim
+
+
+def size(a, axis=None):
+    """
+    Return the number of elements along a given axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Input data.
+    axis : int, optional
+        Axis along which the elements are counted.  By default, give
+        the total number of elements.
+
+    Returns
+    -------
+    element_count : int
+        Number of elements along the specified axis.
+
+    See Also
+    --------
+    shape : dimensions of array
+    ndarray.shape : dimensions of array
+    ndarray.size : number of elements in array
+
+    Examples
+    --------
+    >>> a = np.array([[1,2,3],[4,5,6]])
+    >>> np.size(a)
+    6
+    >>> np.size(a,1)
+    3
+    >>> np.size(a,0)
+    2
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def around(a, decimals=0, out=None):
+    """
+    Evenly round to the given number of decimals.
+
+    Parameters
+    ----------
+    a : array_like
+        Input data.
+    decimals : int, optional
+        Number of decimal places to round to (default: 0).  If
+        decimals is negative, it specifies the number of positions to
+        the left of the decimal point.
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must have
+        the same shape as the expected output, but the type of the output
+        values will be cast if necessary. See `doc.ufuncs` (Section
+        "Output arguments") for details.
+
+    Returns
+    -------
+    rounded_array : ndarray
+        An array of the same type as `a`, containing the rounded values.
+        Unless `out` was specified, a new array is created.  A reference to
+        the result is returned.
+
+        The real and imaginary parts of complex numbers are rounded
+        separately.  The result of rounding a float is a float.
+
+    See Also
+    --------
+    ndarray.round : equivalent method
+
+    ceil, fix, floor, rint, trunc
+
+
+    Notes
+    -----
+    For values exactly halfway between rounded decimal values, Numpy
+    rounds to the nearest even value. Thus 1.5 and 2.5 round to 2.0,
+    -0.5 and 0.5 round to 0.0, etc. Results may also be surprising due
+    to the inexact representation of decimal fractions in the IEEE
+    floating point standard [1]_ and errors introduced when scaling
+    by powers of ten.
+
+    References
+    ----------
+    .. [1] "Lecture Notes on the Status of  IEEE 754", William Kahan,
+           http://www.cs.berkeley.edu/~wkahan/ieee754status/IEEE754.PDF
+    .. [2] "How Futile are Mindless Assessments of
+           Roundoff in Floating-Point Computation?", William Kahan,
+           http://www.cs.berkeley.edu/~wkahan/Mindless.pdf
+
+    Examples
+    --------
+    >>> np.around([0.37, 1.64])
+    array([ 0.,  2.])
+    >>> np.around([0.37, 1.64], decimals=1)
+    array([ 0.4,  1.6])
+    >>> np.around([.5, 1.5, 2.5, 3.5, 4.5]) # rounds to nearest even value
+    array([ 0.,  2.,  2.,  4.,  4.])
+    >>> np.around([1,2,3,11], decimals=1) # ndarray of ints is returned
+    array([ 1,  2,  3, 11])
+    >>> np.around([1,2,3,11], decimals=-1)
+    array([ 0,  0,  0, 10])
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def round_(a, decimals=0, out=None):
+    """
+    Round an array to the given number of decimals.
+
+    Refer to `around` for full documentation.
+
+    See Also
+    --------
+    around : equivalent function
+
+    """
+    raise NotImplemented('Waiting on interp level method')
+
+
+def mean(a, axis=None, dtype=None, out=None):
+    """
+    Compute the arithmetic mean along the specified axis.
+
+    Returns the average of the array elements.  The average is taken over
+    the flattened array by default, otherwise over the specified axis.
+    `float64` intermediate and return values are used for integer inputs.
+
+    Parameters
+    ----------
+    a : array_like
+        Array containing numbers whose mean is desired. If `a` is not an
+        array, a conversion is attempted.
+    axis : int, optional
+        Axis along which the means are computed. The default is to compute
+        the mean of the flattened array.
+    dtype : data-type, optional
+        Type to use in computing the mean.  For integer inputs, the default
+        is `float64`; for floating point inputs, it is the same as the
+        input dtype.
+    out : ndarray, optional
+        Alternate output array in which to place the result.  The default
+        is ``None``; if provided, it must have the same shape as the
+        expected output, but the type will be cast if necessary.
+        See `doc.ufuncs` for details.
+
+    Returns
+    -------
+    m : ndarray, see dtype parameter above
+        If `out=None`, returns a new array containing the mean values,
+        otherwise a reference to the output array is returned.
+
+    See Also
+    --------
+    average : Weighted average
+
+    Notes
+    -----
+    The arithmetic mean is the sum of the elements along the axis divided
+    by the number of elements.
+
+    Note that for floating-point input, the mean is computed using the
+    same precision the input has.  Depending on the input data, this can
+    cause the results to be inaccurate, especially for `float32` (see
+    example below).  Specifying a higher-precision accumulator using the
+    `dtype` keyword can alleviate this issue.
+
+    Examples
+    --------
+    >>> a = np.array([[1, 2], [3, 4]])
+    >>> np.mean(a)
+    2.5
+    >>> np.mean(a, axis=0)
+    array([ 2.,  3.])
+    >>> np.mean(a, axis=1)
+    array([ 1.5,  3.5])
+
+    In single precision, `mean` can be inaccurate:
+
+    >>> a = np.zeros((2, 512*512), dtype=np.float32)
+    >>> a[0, :] = 1.0
+    >>> a[1, :] = 0.1
+    >>> np.mean(a)
+    0.546875
+
+    Computing the mean in float64 is more accurate:
+
+    >>> np.mean(a, dtype=np.float64)
+    0.55000000074505806
+
+    """
+    if not hasattr(a, "mean"):
+        a = numpypy.array(a)
+    return a.mean()
+
+
+def std(a, axis=None, dtype=None, out=None, ddof=0):
+    """
+    Compute the standard deviation along the specified axis.
+
+    Returns the standard deviation, a measure of the spread of a distribution,
+    of the array elements. The standard deviation is computed for the
+    flattened array by default, otherwise over the specified axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Calculate the standard deviation of these values.
+    axis : int, optional
+        Axis along which the standard deviation is computed. The default is
+        to compute the standard deviation of the flattened array.
+    dtype : dtype, optional
+        Type to use in computing the standard deviation. For arrays of
+        integer type the default is float64, for arrays of float types it is
+        the same as the array type.
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must have
+        the same shape as the expected output but the type (of the calculated
+        values) will be cast if necessary.
+    ddof : int, optional
+        Means Delta Degrees of Freedom.  The divisor used in calculations
+        is ``N - ddof``, where ``N`` represents the number of elements.
+        By default `ddof` is zero.
+
+    Returns
+    -------
+    standard_deviation : ndarray, see dtype parameter above.
+        If `out` is None, return a new array containing the standard deviation,
+        otherwise return a reference to the output array.
+
+    See Also
+    --------
+    var, mean
+    numpy.doc.ufuncs : Section "Output arguments"
+
+    Notes
+    -----
+    The standard deviation is the square root of the average of the squared
+    deviations from the mean, i.e., ``std = sqrt(mean(abs(x - x.mean())**2))``.
+
+    The average squared deviation is normally calculated as ``x.sum() / N``, where
+    ``N = len(x)``.  If, however, `ddof` is specified, the divisor ``N - ddof``
+    is used instead. In standard statistical practice, ``ddof=1`` provides an
+    unbiased estimator of the variance of the infinite population. ``ddof=0``
+    provides a maximum likelihood estimate of the variance for normally
+    distributed variables. The standard deviation computed in this function
+    is the square root of the estimated variance, so even with ``ddof=1``, it
+    will not be an unbiased estimate of the standard deviation per se.
+
+    Note that, for complex numbers, `std` takes the absolute
+    value before squaring, so that the result is always real and nonnegative.
+
+    For floating-point input, the *std* is computed using the same
+    precision the input has. Depending on the input data, this can cause
+    the results to be inaccurate, especially for float32 (see example below).
+    Specifying a higher-accuracy accumulator using the `dtype` keyword can
+    alleviate this issue.
+
+    Examples
+    --------
+    >>> a = np.array([[1, 2], [3, 4]])
+    >>> np.std(a)
+    1.1180339887498949
+    >>> np.std(a, axis=0)
+    array([ 1.,  1.])
+    >>> np.std(a, axis=1)
+    array([ 0.5,  0.5])
+
+    In single precision, std() can be inaccurate:
+
+    >>> a = np.zeros((2,512*512), dtype=np.float32)
+    >>> a[0,:] = 1.0
+    >>> a[1,:] = 0.1
+    >>> np.std(a)
+    0.45172946707416706
+
+    Computing the standard deviation in float64 is more accurate:
+
+    >>> np.std(a, dtype=np.float64)
+    0.44999999925552653
+
+    """
+    if not hasattr(a, "std"):
+        a = numpypy.array(a)
+    return a.std()
+
+
+def var(a, axis=None, dtype=None, out=None, ddof=0):
+    """
+    Compute the variance along the specified axis.
+
+    Returns the variance of the array elements, a measure of the spread of a
+    distribution.  The variance is computed for the flattened array by
+    default, otherwise over the specified axis.
+
+    Parameters
+    ----------
+    a : array_like
+        Array containing numbers whose variance is desired.  If `a` is not an
+        array, a conversion is attempted.
+    axis : int, optional
+        Axis along which the variance is computed.  The default is to compute
+        the variance of the flattened array.
+    dtype : data-type, optional
+        Type to use in computing the variance.  For arrays of integer type
+        the default is `float32`; for arrays of float types it is the same as
+        the array type.
+    out : ndarray, optional
+        Alternate output array in which to place the result.  It must have
+        the same shape as the expected output, but the type is cast if
+        necessary.
+    ddof : int, optional
+        "Delta Degrees of Freedom": the divisor used in the calculation is
+        ``N - ddof``, where ``N`` represents the number of elements. By
+        default `ddof` is zero.
+
+    Returns
+    -------
+    variance : ndarray, see dtype parameter above
+        If ``out=None``, returns a new array containing the variance;
+        otherwise, a reference to the output array is returned.
+
+    See Also
+    --------
+    std : Standard deviation
+    mean : Average
+    numpy.doc.ufuncs : Section "Output arguments"
+
+    Notes
+    -----
+    The variance is the average of the squared deviations from the mean,
+    i.e.,  ``var = mean(abs(x - x.mean())**2)``.
+
+    The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``.
+    If, however, `ddof` is specified, the divisor ``N - ddof`` is used
+    instead.  In standard statistical practice, ``ddof=1`` provides an
+    unbiased estimator of the variance of a hypothetical infinite population.
+    ``ddof=0`` provides a maximum likelihood estimate of the variance for
+    normally distributed variables.
+
+    Note that for complex numbers, the absolute value is taken before
+    squaring, so that the result is always real and nonnegative.
+
+    For floating-point input, the variance is computed using the same
+    precision the input has.  Depending on the input data, this can cause
+    the results to be inaccurate, especially for `float32` (see example
+    below).  Specifying a higher-accuracy accumulator using the ``dtype``
+    keyword can alleviate this issue.
+
+    Examples
+    --------
+    >>> a = np.array([[1,2],[3,4]])
+    >>> np.var(a)
+    1.25
+    >>> np.var(a,0)
+    array([ 1.,  1.])
+    >>> np.var(a,1)
+    array([ 0.25,  0.25])
+
+    In single precision, var() can be inaccurate:
+
+    >>> a = np.zeros((2,512*512), dtype=np.float32)
+    >>> a[0,:] = 1.0
+    >>> a[1,:] = 0.1
+    >>> np.var(a)
+    0.20405951142311096
+
+    Computing the standard deviation in float64 is more accurate:
+
+    >>> np.var(a, dtype=np.float64)
+    0.20249999932997387
+    >>> ((1-0.55)**2 + (0.1-0.55)**2)/2
+    0.20250000000000001
+
+    """
+    if not hasattr(a, "var"):
+        a = numpypy.array(a)
+    return a.var()
diff --git a/lib_pypy/numpypy/test/test_fromnumeric.py b/lib_pypy/numpypy/test/test_fromnumeric.py
new file mode 100644
--- /dev/null
+++ b/lib_pypy/numpypy/test/test_fromnumeric.py
@@ -0,0 +1,109 @@
+
+from pypy.module.micronumpy.test.test_base import BaseNumpyAppTest
+
+class AppTestFromNumeric(BaseNumpyAppTest):     
+    def test_argmax(self):
+        # tests taken from numpy/core/fromnumeric.py docstring
+        from numpypy import array, arange, argmax
+        a = arange(6).reshape((2,3))
+        assert argmax(a) == 5
+        # assert (argmax(a, axis=0) == array([1, 1, 1])).all()
+        # assert (argmax(a, axis=1) == array([2, 2])).all()
+        b = arange(6)
+        b[1] = 5
+        assert argmax(b) == 1
+
+    def test_argmin(self):
+        # tests adapted from test_argmax
+        from numpypy import array, arange, argmin
+        a = arange(6).reshape((2,3))
+        assert argmin(a) == 0
+        # assert (argmax(a, axis=0) == array([0, 0, 0])).all()
+        # assert (argmax(a, axis=1) == array([0, 0])).all()
+        b = arange(6)
+        b[1] = 0
+        assert argmin(b) == 0
+   
+    def test_shape(self):
+        # tests taken from numpy/core/fromnumeric.py docstring
+        from numpypy import array, identity, shape
+        assert shape(identity(3)) == (3, 3)
+        assert shape([[1, 2]]) == (1, 2)
+        assert shape([0]) ==  (1,)
+        assert shape(0) == ()
+        # a = array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
+        # assert shape(a) == (2,)
+
+    def test_sum(self):
+        # tests taken from numpy/core/fromnumeric.py docstring
+        from numpypy import array, sum, ones
+        assert sum([0.5, 1.5])== 2.0
+        assert sum([[0, 1], [0, 5]]) == 6
+        # assert sum([0.5, 0.7, 0.2, 1.5], dtype=int32) == 1
+        # assert (sum([[0, 1], [0, 5]], axis=0) == array([0, 6])).all()
+        # assert (sum([[0, 1], [0, 5]], axis=1) == array([1, 5])).all()
+        # If the accumulator is too small, overflow occurs:
+        # assert ones(128, dtype=int8).sum(dtype=int8) == -128
+                                 
+    def test_amin(self):
+        # tests taken from numpy/core/fromnumeric.py docstring
+        from numpypy import array, arange, amin
+        a = arange(4).reshape((2,2))
+        assert amin(a) == 0
+        # # Minima along the first axis
+        # assert (amin(a, axis=0) == array([0, 1])).all()
+        # # Minima along the second axis
+        # assert (amin(a, axis=1) == array([0, 2])).all()
+        # # NaN behaviour
+        # b = arange(5, dtype=float)
+        # b[2] = NaN
+        # assert amin(b) == nan
+        # assert nanmin(b) == 0.0
+
+    def test_amax(self):
+        # tests taken from numpy/core/fromnumeric.py docstring
+        from numpypy import array, arange, amax
+        a = arange(4).reshape((2,2))
+        assert amax(a) == 3
+        # assert (amax(a, axis=0) == array([2, 3])).all()
+        # assert (amax(a, axis=1) == array([1, 3])).all()
+        # # NaN behaviour
+        # b = arange(5, dtype=float)
+        # b[2] = NaN
+        # assert amax(b) == nan
+        # assert nanmax(b) == 4.0
+
+    def test_alen(self):
+        # tests taken from numpy/core/fromnumeric.py docstring
+        from numpypy import array, zeros, alen
+        a = zeros((7,4,5))
+        assert a.shape[0] == 7
+        assert alen(a)    == 7
+
+    def test_ndim(self):
+        # tests taken from numpy/core/fromnumeric.py docstring
+        from numpypy import array, ndim
+        assert ndim([[1,2,3],[4,5,6]]) == 2
+        assert ndim(array([[1,2,3],[4,5,6]])) == 2
+        assert ndim(1) == 0
+    
+    def test_rank(self):
+        # tests taken from numpy/core/fromnumeric.py docstring
+        from numpypy import array, rank
+        assert rank([[1,2,3],[4,5,6]]) == 2
+        assert rank(array([[1,2,3],[4,5,6]])) == 2
+        assert rank(1) == 0
+    
+    def test_var(self):
+        from numpypy import array, var
+        a = array([[1,2],[3,4]])
+        assert var(a) == 1.25
+        # assert (np.var(a,0) == array([ 1.,  1.])).all()
+        # assert (np.var(a,1) == array([ 0.25,  0.25])).all()
+
+    def test_std(self):
+        from numpypy import array, std
+        a = array([[1, 2], [3, 4]])
+        assert std(a) ==  1.1180339887498949
+        # assert (std(a, axis=0) == array([ 1.,  1.])).all()
+        # assert (std(a, axis=1) == array([ 0.5,  0.5]).all()
diff --git a/pypy/annotation/description.py b/pypy/annotation/description.py
--- a/pypy/annotation/description.py
+++ b/pypy/annotation/description.py
@@ -180,7 +180,12 @@
         if name is None:
             name = pyobj.func_name
         if signature is None:
-            signature = cpython_code_signature(pyobj.func_code)
+            if hasattr(pyobj, '_generator_next_method_of_'):
+                from pypy.interpreter.argument import Signature
+                signature = Signature(['entry'])     # haaaaaack
+                defaults = ()
+            else:
+                signature = cpython_code_signature(pyobj.func_code)
         if defaults is None:
             defaults = pyobj.func_defaults
         self.name = name
@@ -252,7 +257,8 @@
         try:
             inputcells = args.match_signature(signature, defs_s)
         except ArgErr, e:
-            raise TypeError, "signature mismatch: %s" % e.getmsg(self.name)
+            raise TypeError("signature mismatch: %s() %s" % 
+                            (self.name, e.getmsg()))
         return inputcells
 
     def specialize(self, inputcells, op=None):
diff --git a/pypy/doc/Makefile b/pypy/doc/Makefile
--- a/pypy/doc/Makefile
+++ b/pypy/doc/Makefile
@@ -12,7 +12,7 @@
 PAPEROPT_letter = -D latex_paper_size=letter
 ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 
-.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest
+.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex man changes linkcheck doctest
 
 help:
 	@echo "Please use \`make <target>' where <target> is one of"
@@ -23,6 +23,7 @@
 	@echo "  htmlhelp  to make HTML files and a HTML help project"
 	@echo "  qthelp    to make HTML files and a qthelp project"
 	@echo "  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  man       to make manual pages"
 	@echo "  changes   to make an overview of all changed/added/deprecated items"
 	@echo "  linkcheck to check all external links for integrity"
 	@echo "  doctest   to run all doctests embedded in the documentation (if enabled)"
@@ -79,6 +80,11 @@
 	@echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
 	      "run these through (pdf)latex."
 
+man:
+	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+	@echo
+	@echo "Build finished. The manual pages are in $(BUILDDIR)/man"
+
 changes:
 	python config/generate.py
 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst
--- a/pypy/doc/coding-guide.rst
+++ b/pypy/doc/coding-guide.rst
@@ -175,15 +175,15 @@
 RPython
 =================
 
-RPython Definition, not
------------------------
+RPython Definition
+------------------
 
-The list and exact details of the "RPython" restrictions are a somewhat
-evolving topic.  In particular, we have no formal language definition
-as we find it more practical to discuss and evolve the set of
-restrictions while working on the whole program analysis.  If you
-have any questions about the restrictions below then please feel
-free to mail us at pypy-dev at codespeak net.
+RPython is a restricted subset of Python that is amenable to static analysis.
+Although there are additions to the language and some things might surprisingly
+work, this is a rough list of restrictions that should be considered. Note
+that there are tons of special cased restrictions that you'll encounter
+as you go. The exact definition is "RPython is everything that our translation
+toolchain can accept" :)
 
 .. _`wrapped object`: coding-guide.html#wrapping-rules
 
@@ -198,7 +198,7 @@
   contain both a string and a int must be avoided.  It is allowed to
   mix None (basically with the role of a null pointer) with many other
   types: `wrapped objects`, class instances, lists, dicts, strings, etc.
-  but *not* with int and floats.
+  but *not* with int, floats or tuples.
 
 **constants**
 
@@ -209,9 +209,12 @@
   have this restriction, so if you need mutable global state, store it
   in the attributes of some prebuilt singleton instance.
 
+
+
 **control structures**
 
-  all allowed but yield, ``for`` loops restricted to builtin types
+  all allowed, ``for`` loops restricted to builtin types, generators
+  very restricted.
 
 **range**
 
@@ -226,7 +229,8 @@
 
 **generators**
 
-  generators are not supported.
+  generators are supported, but their exact scope is very limited. you can't
+  merge two different generator in one control point.
 
 **exceptions**
 
@@ -245,22 +249,27 @@
 
 **strings**
 
-  a lot of, but not all string methods are supported.  Indexes can be
+  a lot of, but not all string methods are supported and those that are
+  supported, not necesarilly accept all arguments.  Indexes can be
   negative.  In case they are not, then you get slightly more efficient
   code if the translator can prove that they are non-negative.  When
   slicing a string it is necessary to prove that the slice start and
-  stop indexes are non-negative.
+  stop indexes are non-negative. There is no implicit str-to-unicode cast
+  anywhere.
 
 **tuples**
 
   no variable-length tuples; use them to store or return pairs or n-tuples of
-  values. Each combination of types for elements and length constitute a separate
-  and not mixable type.
+  values. Each combination of types for elements and length constitute
+  a separate and not mixable type.
 
 **lists**
 
   lists are used as an allocated array.  Lists are over-allocated, so list.append()
-  is reasonably fast.  Negative or out-of-bound indexes are only allowed for the
+  is reasonably fast. However, if you use a fixed-size list, the code
+  is more efficient. Annotator can figure out most of the time that your
+  list is fixed-size, even when you use list comprehension.
+  Negative or out-of-bound indexes are only allowed for the
   most common operations, as follows:
 
   - *indexing*:
@@ -287,16 +296,14 @@
 
 **dicts**
 
-  dicts with a unique key type only, provided it is hashable. 
-  String keys have been the only allowed key types for a while, but this was generalized. 
-  After some re-optimization,
-  the implementation could safely decide that all string dict keys should be interned.
+  dicts with a unique key type only, provided it is hashable. Custom
+  hash functions and custom equality will not be honored.
+  Use ``pypy.rlib.objectmodel.r_dict`` for custom hash functions.
 
 
 **list comprehensions**
 
-  may be used to create allocated, initialized arrays.
-  After list over-allocation was introduced, there is no longer any restriction.
+  May be used to create allocated, initialized arrays.
 
 **functions**
 
@@ -334,9 +341,8 @@
 
 **objects**
 
-  in PyPy, wrapped objects are borrowed from the object space. Just like
-  in CPython, code that needs e.g. a dictionary can use a wrapped dict
-  and the object space operations on it.
+  Normal rules apply. Special methods are not honoured, except ``__init__`` and
+  ``__del__``.
 
 This layout makes the number of types to take care about quite limited.
 
diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py
--- a/pypy/doc/conf.py
+++ b/pypy/doc/conf.py
@@ -197,3 +197,10 @@
 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {'http://docs.python.org/': None}
 
+# -- Options for manpage output-------------------------------------------------
+
+man_pages = [
+  ('man/pypy.1', 'pypy',
+   u'fast, compliant alternative implementation of the Python language',
+   u'The PyPy Project', 1)
+]
diff --git a/pypy/doc/extradoc.rst b/pypy/doc/extradoc.rst
--- a/pypy/doc/extradoc.rst
+++ b/pypy/doc/extradoc.rst
@@ -8,6 +8,9 @@
 *Articles about PyPy published so far, most recent first:* (bibtex_ file)
 
 
+* `Runtime Feedback in a Meta-Tracing JIT for Efficient Dynamic Languages`_,
+  C.F. Bolz, A. Cuni, M. Fijalkowski, M. Leuschel, S. Pedroni, A. Rigo
+
 * `Allocation Removal by Partial Evaluation in a Tracing JIT`_,
   C.F. Bolz, A. Cuni, M. Fijalkowski, M. Leuschel, S. Pedroni, A. Rigo
 
@@ -50,6 +53,9 @@
 
 *Other research using PyPy (as far as we know it):*
 
+* `Hardware Transactional Memory Support for Lightweight Dynamic Language Evolution`_,
+  N. Riley and C. Zilles
+
 * `PyGirl: Generating Whole-System VMs from High-Level Prototypes using PyPy`_,
   C. Bruni and T. Verwaest
 
@@ -65,6 +71,7 @@
 
 
 .. _bibtex: https://bitbucket.org/pypy/extradoc/raw/tip/talk/bibtex.bib
+.. _`Runtime Feedback in a Meta-Tracing JIT for Efficient Dynamic Languages`: https://bitbucket.org/pypy/extradoc/raw/extradoc/talk/icooolps2011/jit-hints.pdf
 .. _`Allocation Removal by Partial Evaluation in a Tracing JIT`: http://codespeak.net/svn/pypy/extradoc/talk/pepm2011/bolz-allocation-removal.pdf
 .. _`Towards a Jitting VM for Prolog Execution`: http://www.stups.uni-duesseldorf.de/publications/bolz-prolog-jit.pdf
 .. _`High performance implementation of Python for CLI/.NET with JIT compiler generation for dynamic languages`: http://buildbot.pypy.org/misc/antocuni-thesis.pdf
@@ -74,6 +81,7 @@
 .. _`Automatic JIT Compiler Generation with Runtime Partial Evaluation`:  http://www.stups.uni-duesseldorf.de/thesis/final-master.pdf
 .. _`RPython: A Step towards Reconciling Dynamically and Statically Typed OO Languages`: http://www.disi.unige.it/person/AnconaD/papers/Recent_abstracts.html#AACM-DLS07
 .. _`EU Reports`: index-report.html
+.. _`Hardware Transactional Memory Support for Lightweight Dynamic Language Evolution`: http://sabi.net/nriley/pubs/dls6-riley.pdf
 .. _`PyGirl: Generating Whole-System VMs from High-Level Prototypes using PyPy`: http://scg.unibe.ch/archive/papers/Brun09cPyGirl.pdf
 .. _`Representation-Based Just-in-Time Specialization and the Psyco Prototype for Python`: http://psyco.sourceforge.net/psyco-pepm-a.ps.gz
 .. _`Back to the Future in One Week -- Implementing a Smalltalk VM in PyPy`: http://dx.doi.org/10.1007/978-3-540-89275-5_7
diff --git a/pypy/doc/man/pypy.1.rst b/pypy/doc/man/pypy.1.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/man/pypy.1.rst
@@ -0,0 +1,90 @@
+======
+ pypy
+======
+
+SYNOPSIS
+========
+
+``pypy`` [*options*]
+[``-c`` *cmd*\ \|\ ``-m`` *mod*\ \|\ *file.py*\ \|\ ``-``\ ]
+[*arg*\ ...]
+
+OPTIONS
+=======
+
+-i
+    Inspect interactively after running script.
+
+-O
+    Dummy optimization flag for compatibility with C Python.
+
+-c *cmd*
+    Program passed in as CMD (terminates option list).
+
+-S
+    Do not ``import site`` on initialization.
+
+-u
+    Unbuffered binary ``stdout`` and ``stderr``.
+
+-h, --help
+    Show a help message and exit.
+
+-m *mod*
+    Library module to be run as a script (terminates option list).
+
+-W *arg*
+    Warning control (*arg* is *action*:*message*:*category*:*module*:*lineno*).
+
+-E
+    Ignore environment variables (such as ``PYTHONPATH``).
+
+--version
+    Print the PyPy version.
+
+--info
+    Print translation information about this PyPy executable.
+
+--jit *arg*
+    Low level JIT parameters. Format is
+    *arg*\ ``=``\ *value*\ [``,``\ *arg*\ ``=``\ *value*\ ...]
+
+    ``off``
+        Disable the JIT.
+
+    ``threshold=``\ *value*
+        Number of times a loop has to run for it to become hot.
+
+    ``function_threshold=``\ *value*
+        Number of times a function must run for it to become traced from
+        start.
+
+    ``inlining=``\ *value*
+        Inline python functions or not (``1``/``0``).
+
+    ``loop_longevity=``\ *value*
+        A parameter controlling how long loops will be kept before being
+        freed, an estimate.
+
+    ``max_retrace_guards=``\ *value*
+        Number of extra guards a retrace can cause.
+
+    ``retrace_limit=``\ *value*
+        How many times we can try retracing before giving up.
+
+    ``trace_eagerness=``\ *value*
+        Number of times a guard has to fail before we start compiling a
+        bridge.
+
+    ``trace_limit=``\ *value*
+        Number of recorded operations before we abort tracing with
+        ``ABORT_TRACE_TOO_LONG``.
+
+    ``enable_opts=``\ *value*
+        Optimizations to enabled or ``all``.
+        Warning, this option is dangerous, and should be avoided.
+
+SEE ALSO
+========
+
+**python**\ (1)
diff --git a/pypy/doc/tool/makecontributor.py b/pypy/doc/tool/makecontributor.py
deleted file mode 100644
--- a/pypy/doc/tool/makecontributor.py
+++ /dev/null
@@ -1,47 +0,0 @@
-"""
-
-generates a contributor list
-
-"""
-import py
-
-# this file is useless, use the following commandline instead:
-# hg churn -c -t "{author}" | sed -e 's/ <.*//'
-
-try: 
-    path = py.std.sys.argv[1]
-except IndexError: 
-    print "usage: %s ROOTPATH" %(py.std.sys.argv[0])
-    raise SystemExit, 1
-
-d = {}
-
-for logentry in py.path.svnwc(path).log(): 
-    a = logentry.author 
-    if a in d: 
-        d[a] += 1
-    else: 
-        d[a] = 1
-
-items = d.items()
-items.sort(lambda x,y: -cmp(x[1], y[1]))
-
-import uconf # http://codespeak.net/svn/uconf/dist/uconf 
-
-# Authors that don't want to be listed
-excluded = set("anna gintas ignas".split())
-cutoff = 5 # cutoff for authors in the LICENSE file
-mark = False
-for author, count in items: 
-    if author in excluded:
-        continue
-    user = uconf.system.User(author)
-    try:
-        realname = user.realname.strip()
-    except KeyError:
-        realname = author
-    if not mark and count < cutoff:
-        mark = True
-        print '-'*60
-    print "   ", realname
-    #print count, "   ", author 
diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py
--- a/pypy/interpreter/argument.py
+++ b/pypy/interpreter/argument.py
@@ -428,8 +428,8 @@
             return self._match_signature(w_firstarg,
                                          scope_w, signature, defaults_w, 0)
         except ArgErr, e:
-            raise OperationError(self.space.w_TypeError,
-                                 self.space.wrap(e.getmsg(fnname)))
+            raise operationerrfmt(self.space.w_TypeError,
+                                  "%s() %s", fnname, e.getmsg())
 
     def _parse(self, w_firstarg, signature, defaults_w, blindargs=0):
         """Parse args and kwargs according to the signature of a code object,
@@ -450,8 +450,8 @@
         try:
             return self._parse(w_firstarg, signature, defaults_w, blindargs)
         except ArgErr, e:
-            raise OperationError(self.space.w_TypeError,
-                                 self.space.wrap(e.getmsg(fnname)))
+            raise operationerrfmt(self.space.w_TypeError,
+                                  "%s() %s", fnname, e.getmsg())
 
     @staticmethod
     def frompacked(space, w_args=None, w_kwds=None):
@@ -626,7 +626,7 @@
 
 class ArgErr(Exception):
 
-    def getmsg(self, fnname):
+    def getmsg(self):
         raise NotImplementedError
 
 class ArgErrCount(ArgErr):
@@ -642,11 +642,10 @@
         self.num_args = got_nargs
         self.num_kwds = nkwds
 
-    def getmsg(self, fnname):
+    def getmsg(self):
         n = self.expected_nargs
         if n == 0:
-            msg = "%s() takes no arguments (%d given)" % (
-                fnname,
+            msg = "takes no arguments (%d given)" % (
                 self.num_args + self.num_kwds)
         else:
             defcount = self.num_defaults
@@ -672,8 +671,7 @@
                 msg2 = " non-keyword"
             else:
                 msg2 = ""
-            msg = "%s() takes %s %d%s argument%s (%d given)" % (
-                fnname,
+            msg = "takes %s %d%s argument%s (%d given)" % (
                 msg1,
                 n,
                 msg2,
@@ -686,9 +684,8 @@
     def __init__(self, argname):
         self.argname = argname
 
-    def getmsg(self, fnname):
-        msg = "%s() got multiple values for keyword argument '%s'" % (
-            fnname,
+    def getmsg(self):
+        msg = "got multiple values for keyword argument '%s'" % (
             self.argname)
         return msg
 
@@ -722,13 +719,11 @@
                     break
         self.kwd_name = name
 
-    def getmsg(self, fnname):
+    def getmsg(self):
         if self.num_kwds == 1:
-            msg = "%s() got an unexpected keyword argument '%s'" % (
-                fnname,
+            msg = "got an unexpected keyword argument '%s'" % (
                 self.kwd_name)
         else:
-            msg = "%s() got %d unexpected keyword arguments" % (
-                fnname,
+            msg = "got %d unexpected keyword arguments" % (
                 self.num_kwds)
         return msg
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1591,12 +1591,15 @@
     'ArithmeticError',
     'AssertionError',
     'AttributeError',
+    'BaseException',
+    'DeprecationWarning',
     'EOFError',
     'EnvironmentError',
     'Exception',
     'FloatingPointError',
     'IOError',
     'ImportError',
+    'ImportWarning',
     'IndentationError',
     'IndexError',
     'KeyError',
@@ -1617,7 +1620,10 @@
     'TabError',
     'TypeError',
     'UnboundLocalError',
+    'UnicodeDecodeError',
     'UnicodeError',
+    'UnicodeEncodeError',
+    'UnicodeTranslateError',
     'ValueError',
     'ZeroDivisionError',
     'UnicodeEncodeError',
diff --git a/pypy/interpreter/eval.py b/pypy/interpreter/eval.py
--- a/pypy/interpreter/eval.py
+++ b/pypy/interpreter/eval.py
@@ -2,7 +2,6 @@
 This module defines the abstract base classes that support execution:
 Code and Frame.
 """
-from pypy.rlib import jit
 from pypy.interpreter.error import OperationError
 from pypy.interpreter.baseobjspace import Wrappable
 
@@ -98,7 +97,6 @@
         "Abstract. Get the expected number of locals."
         raise TypeError, "abstract"
 
-    @jit.dont_look_inside
     def fast2locals(self):
         # Copy values from the fastlocals to self.w_locals
         if self.w_locals is None:
@@ -112,7 +110,6 @@
                 w_name = self.space.wrap(name)
                 self.space.setitem(self.w_locals, w_name, w_value)
 
-    @jit.dont_look_inside
     def locals2fast(self):
         # Copy values from self.w_locals to the fastlocals
         assert self.w_locals is not None
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -619,7 +619,8 @@
                                                   self.descr_reqcls,
                                                   args)
         except Exception, e:
-            raise self.handle_exception(space, e)
+            self.handle_exception(space, e)
+            w_result = None
         if w_result is None:
             w_result = space.w_None
         return w_result
@@ -655,7 +656,8 @@
                                                   self.descr_reqcls,
                                                   args)
         except Exception, e:
-            raise self.handle_exception(space, e)
+            self.handle_exception(space, e)
+            w_result = None
         if w_result is None:
             w_result = space.w_None
         return w_result
@@ -674,7 +676,8 @@
                                                   self.descr_reqcls,
                                                   args.prepend(w_obj))
         except Exception, e:
-            raise self.handle_exception(space, e)
+            self.handle_exception(space, e)
+            w_result = None
         if w_result is None:
             w_result = space.w_None
         return w_result
@@ -690,7 +693,8 @@
             raise OperationError(space.w_SystemError,
                                  space.wrap("unexpected DescrMismatch error"))
         except Exception, e:
-            raise self.handle_exception(space, e)
+            self.handle_exception(space, e)
+            w_result = None
         if w_result is None:
             w_result = space.w_None
         return w_result
@@ -708,7 +712,8 @@
                                            self.descr_reqcls,
                                            Arguments(space, [w1]))
         except Exception, e:
-            raise self.handle_exception(space, e)
+            self.handle_exception(space, e)
+            w_result = None
         if w_result is None:
             w_result = space.w_None
         return w_result
@@ -726,7 +731,8 @@
                                            self.descr_reqcls,
                                            Arguments(space, [w1, w2]))
         except Exception, e:
-            raise self.handle_exception(space, e)
+            self.handle_exception(space, e)
+            w_result = None
         if w_result is None:
             w_result = space.w_None
         return w_result
@@ -744,7 +750,8 @@
                                            self.descr_reqcls,
                                            Arguments(space, [w1, w2, w3]))
         except Exception, e:
-            raise self.handle_exception(space, e)
+            self.handle_exception(space, e)
+            w_result = None
         if w_result is None:
             w_result = space.w_None
         return w_result
@@ -763,7 +770,8 @@
                                            Arguments(space,
                                                      [w1, w2, w3, w4]))
         except Exception, e:
-            raise self.handle_exception(space, e)
+            self.handle_exception(space, e)
+            w_result = None
         if w_result is None:
             w_result = space.w_None
         return w_result
diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py
--- a/pypy/interpreter/generator.py
+++ b/pypy/interpreter/generator.py
@@ -162,7 +162,8 @@
     # generate 2 versions of the function and 2 jit drivers.
     def _create_unpack_into():
         jitdriver = jit.JitDriver(greens=['pycode'],
-                                  reds=['self', 'frame', 'results'])
+                                  reds=['self', 'frame', 'results'],
+                                  name='unpack_into')
         def unpack_into(self, results):
             """This is a hack for performance: runs the generator and collects
             all produced items in a list."""
@@ -196,4 +197,4 @@
                 self.frame = None
         return unpack_into
     unpack_into = _create_unpack_into()
-    unpack_into_w = _create_unpack_into()
\ No newline at end of file
+    unpack_into_w = _create_unpack_into()
diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py
--- a/pypy/interpreter/test/test_argument.py
+++ b/pypy/interpreter/test/test_argument.py
@@ -393,8 +393,8 @@
 
         class FakeArgErr(ArgErr):
 
-            def getmsg(self, fname):
-                return "msg "+fname
+            def getmsg(self):
+                return "msg"
 
         def _match_signature(*args):
             raise FakeArgErr()
@@ -404,7 +404,7 @@
         excinfo = py.test.raises(OperationError, args.parse_obj, "obj", "foo",
                        Signature(["a", "b"], None, None))
         assert excinfo.value.w_type is TypeError
-        assert excinfo.value._w_value == "msg foo"
+        assert excinfo.value.get_w_value(space) == "foo() msg"
 
 
     def test_args_parsing_into_scope(self):
@@ -448,8 +448,8 @@
 
         class FakeArgErr(ArgErr):
 
-            def getmsg(self, fname):
-                return "msg "+fname
+            def getmsg(self):
+                return "msg"
 
         def _match_signature(*args):
             raise FakeArgErr()
@@ -460,7 +460,7 @@
                                  "obj", [None, None], "foo",
                                  Signature(["a", "b"], None, None))
         assert excinfo.value.w_type is TypeError
-        assert excinfo.value._w_value == "msg foo"
+        assert excinfo.value.get_w_value(space) == "foo() msg"
 
     def test_topacked_frompacked(self):
         space = DummySpace()
@@ -493,35 +493,35 @@
         # got_nargs, nkwds, expected_nargs, has_vararg, has_kwarg,
         # defaults_w, missing_args
         err = ArgErrCount(1, 0, 0, False, False, None, 0)
-        s = err.getmsg('foo')
-        assert s == "foo() takes no arguments (1 given)"
+        s = err.getmsg()
+        assert s == "takes no arguments (1 given)"
         err = ArgErrCount(0, 0, 1, False, False, [], 1)
-        s = err.getmsg('foo')
-        assert s == "foo() takes exactly 1 argument (0 given)"
+        s = err.getmsg()
+        assert s == "takes exactly 1 argument (0 given)"
         err = ArgErrCount(3, 0, 2, False, False, [], 0)
-        s = err.getmsg('foo')
-        assert s == "foo() takes exactly 2 arguments (3 given)"
+        s = err.getmsg()
+        assert s == "takes exactly 2 arguments (3 given)"
         err = ArgErrCount(3, 0, 2, False, False, ['a'], 0)
-        s = err.getmsg('foo')
-        assert s == "foo() takes at most 2 arguments (3 given)"
+        s = err.getmsg()
+        assert s == "takes at most 2 arguments (3 given)"
         err = ArgErrCount(1, 0, 2, True, False, [], 1)
-        s = err.getmsg('foo')
-        assert s == "foo() takes at least 2 arguments (1 given)"
+        s = err.getmsg()
+        assert s == "takes at least 2 arguments (1 given)"
         err = ArgErrCount(0, 1, 2, True, False, ['a'], 1)
-        s = err.getmsg('foo')
-        assert s == "foo() takes at least 1 non-keyword argument (0 given)"
+        s = err.getmsg()
+        assert s == "takes at least 1 non-keyword argument (0 given)"
         err = ArgErrCount(2, 1, 1, False, True, [], 0)
-        s = err.getmsg('foo')
-        assert s == "foo() takes exactly 1 non-keyword argument (2 given)"
+        s = err.getmsg()
+        assert s == "takes exactly 1 non-keyword argument (2 given)"
         err = ArgErrCount(0, 1, 1, False, True, [], 1)
-        s = err.getmsg('foo')
-        assert s == "foo() takes exactly 1 non-keyword argument (0 given)"
+        s = err.getmsg()
+        assert s == "takes exactly 1 non-keyword argument (0 given)"
         err = ArgErrCount(0, 1, 1, True, True, [], 1)
-        s = err.getmsg('foo')
-        assert s == "foo() takes at least 1 non-keyword argument (0 given)"
+        s = err.getmsg()
+        assert s == "takes at least 1 non-keyword argument (0 given)"
         err = ArgErrCount(2, 1, 1, False, True, ['a'], 0)
-        s = err.getmsg('foo')
-        assert s == "foo() takes at most 1 non-keyword argument (2 given)"
+        s = err.getmsg()
+        assert s == "takes at most 1 non-keyword argument (2 given)"
 
     def test_bad_type_for_star(self):
         space = self.space
@@ -543,12 +543,12 @@
     def test_unknown_keywords(self):
         space = DummySpace()
         err = ArgErrUnknownKwds(space, 1, ['a', 'b'], [True, False], None)
-        s = err.getmsg('foo')
-        assert s == "foo() got an unexpected keyword argument 'b'"
+        s = err.getmsg()
+        assert s == "got an unexpected keyword argument 'b'"
         err = ArgErrUnknownKwds(space, 2, ['a', 'b', 'c'],
                                 [True, False, False], None)
-        s = err.getmsg('foo')
-        assert s == "foo() got 2 unexpected keyword arguments"
+        s = err.getmsg()
+        assert s == "got 2 unexpected keyword arguments"
 
     def test_unknown_unicode_keyword(self):
         class DummySpaceUnicode(DummySpace):
@@ -558,13 +558,13 @@
         err = ArgErrUnknownKwds(space, 1, ['a', None, 'b', 'c'],
                                 [True, False, True, True],
                                 [unichr(0x1234), u'b', u'c'])
-        s = err.getmsg('foo')
-        assert s == "foo() got an unexpected keyword argument '\xe1\x88\xb4'"
+        s = err.getmsg()
+        assert s == "got an unexpected keyword argument '\xe1\x88\xb4'"
 
     def test_multiple_values(self):
         err = ArgErrMultipleValues('bla')
-        s = err.getmsg('foo')
-        assert s == "foo() got multiple values for keyword argument 'bla'"
+        s = err.getmsg()
+        assert s == "got multiple values for keyword argument 'bla'"
 
 class AppTestArgument:
     def test_error_message(self):
diff --git a/pypy/jit/backend/llsupport/test/test_runner.py b/pypy/jit/backend/llsupport/test/test_runner.py
--- a/pypy/jit/backend/llsupport/test/test_runner.py
+++ b/pypy/jit/backend/llsupport/test/test_runner.py
@@ -8,6 +8,12 @@
 
 class MyLLCPU(AbstractLLCPU):
     supports_floats = True
+
+    class assembler(object):
+        @staticmethod
+        def set_debug(flag):
+            pass
+    
     def compile_loop(self, inputargs, operations, looptoken):
         py.test.skip("llsupport test: cannot compile operations")
 
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -17,6 +17,7 @@
 from pypy.rpython.llinterp import LLException
 from pypy.jit.codewriter import heaptracker, longlong
 from pypy.rlib.rarithmetic import intmask
+from pypy.jit.backend.detect_cpu import autodetect_main_model_and_size
 
 def boxfloat(x):
     return BoxFloat(longlong.getfloatstorage(x))
@@ -27,6 +28,9 @@
 
 class Runner(object):
 
+    add_loop_instruction = ['overload for a specific cpu']
+    bridge_loop_instruction = ['overload for a specific cpu']
+
     def execute_operation(self, opname, valueboxes, result_type, descr=None):
         inputargs, operations = self._get_single_operation_list(opname,
                                                                 result_type,
@@ -2974,6 +2978,56 @@
         res = self.cpu.get_latest_value_int(0)
         assert res == -10
 
+    def test_compile_asmlen(self):
+        from pypy.jit.backend.llsupport.llmodel import AbstractLLCPU
+        if not isinstance(self.cpu, AbstractLLCPU):
+            py.test.skip("pointless test on non-asm")
+        from pypy.jit.backend.x86.tool.viewcode import machine_code_dump
+        import ctypes
+        ops = """
+        [i2]
+        i0 = same_as(i2)    # but forced to be in a register
+        label(i0, descr=1)
+        i1 = int_add(i0, i0)
+        guard_true(i1, descr=faildesr) [i1]
+        jump(i1, descr=1)
+        """
+        faildescr = BasicFailDescr(2)
+        loop = parse(ops, self.cpu, namespace=locals())
+        faildescr = loop.operations[-2].getdescr()
+        jumpdescr = loop.operations[-1].getdescr()
+        bridge_ops = """
+        [i0]
+        jump(i0, descr=jumpdescr)
+        """
+        bridge = parse(bridge_ops, self.cpu, namespace=locals())
+        looptoken = JitCellToken()
+        self.cpu.assembler.set_debug(False)
+        info = self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+        bridge_info = self.cpu.compile_bridge(faildescr, bridge.inputargs,
+                                              bridge.operations,
+                                              looptoken)
+        self.cpu.assembler.set_debug(True) # always on untranslated
+        assert info.asmlen != 0
+        cpuname = autodetect_main_model_and_size()
+        # XXX we have to check the precise assembler, otherwise
+        # we don't quite know if borders are correct
+
+        def checkops(mc, ops):
+            assert len(mc) == len(ops)
+            for i in range(len(mc)):
+                assert mc[i].split("\t")[-1].startswith(ops[i])
+            
+        data = ctypes.string_at(info.asmaddr, info.asmlen)
+        mc = list(machine_code_dump(data, info.asmaddr, cpuname))
+        lines = [line for line in mc if line.count('\t') == 2]
+        checkops(lines, self.add_loop_instructions)
+        data = ctypes.string_at(bridge_info.asmaddr, bridge_info.asmlen)
+        mc = list(machine_code_dump(data, bridge_info.asmaddr, cpuname))
+        lines = [line for line in mc if line.count('\t') == 2]
+        checkops(lines, self.bridge_loop_instructions)
+
+
     def test_compile_bridge_with_target(self):
         # This test creates a loopy piece of code in a bridge, and builds another
         # unrelated loop that ends in a jump directly to this loopy bit of code.
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -7,6 +7,7 @@
 from pypy.rpython.lltypesystem import lltype, rffi, rstr, llmemory
 from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rpython.annlowlevel import llhelper
+from pypy.rlib.jit import AsmInfo
 from pypy.jit.backend.model import CompiledLoopToken
 from pypy.jit.backend.x86.regalloc import (RegAlloc, get_ebp_ofs, _get_scale,
     gpr_reg_mgr_cls, _valid_addressing_size)
@@ -39,6 +40,7 @@
 from pypy.jit.codewriter.effectinfo import EffectInfo
 from pypy.jit.codewriter import longlong
 from pypy.rlib.rarithmetic import intmask
+from pypy.rlib.objectmodel import compute_unique_id
 
 # darwin requires the stack to be 16 bytes aligned on calls. Same for gcc 4.5.0,
 # better safe than sorry
@@ -58,7 +60,8 @@
         self.is_guard_not_invalidated = is_guard_not_invalidated
 
 DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
-                              ('bridge', lltype.Signed), # 0 or 1
+                              ('type', lltype.Char), # 'b'ridge, 'l'abel or
+                                                     # 'e'ntry point
                               ('number', lltype.Signed))
 
 class Assembler386(object):
@@ -147,12 +150,15 @@
     def finish_once(self):
         if self._debug:
             debug_start('jit-backend-counts')
-            for struct in self.loop_run_counters:
-                if struct.bridge:
-                    prefix = 'bridge '
+            for i in range(len(self.loop_run_counters)):
+                struct = self.loop_run_counters[i]
+                if struct.type == 'l':
+                    prefix = 'TargetToken(%d)' % struct.number
+                elif struct.type == 'b':
+                    prefix = 'bridge ' + str(struct.number)
                 else:
-                    prefix = 'loop '
-                debug_print(prefix + str(struct.number) + ':' + str(struct.i))
+                    prefix = 'entry ' + str(struct.number)
+                debug_print(prefix + ':' + str(struct.i))
             debug_stop('jit-backend-counts')
 
     def _build_float_constants(self):
@@ -406,6 +412,7 @@
         '''adds the following attributes to looptoken:
                _x86_function_addr   (address of the generated func, as an int)
                _x86_loop_code       (debug: addr of the start of the ResOps)
+               _x86_fullsize        (debug: full size including failure)
                _x86_debug_checksum
         '''
         # XXX this function is too longish and contains some code
@@ -422,8 +429,8 @@
 
         self.setup(looptoken)
         if log:
-            self._register_counter(False, looptoken.number)
-            operations = self._inject_debugging_code(looptoken, operations)
+            operations = self._inject_debugging_code(looptoken, operations,
+                                                     'e', looptoken.number)
 
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
         #
@@ -471,7 +478,8 @@
             name = "Loop # %s: %s" % (looptoken.number, loopname)
             self.cpu.profile_agent.native_code_written(name,
                                                        rawstart, full_size)
-        return ops_offset
+        return AsmInfo(ops_offset, rawstart + looppos,
+                       size_excluding_failure_stuff - looppos)
 
     def assemble_bridge(self, faildescr, inputargs, operations,
                         original_loop_token, log):
@@ -480,17 +488,12 @@
             assert len(set(inputargs)) == len(inputargs)
 
         descr_number = self.cpu.get_fail_descr_number(faildescr)
-        try:
-            failure_recovery = self._find_failure_recovery_bytecode(faildescr)
-        except ValueError:
-            debug_print("Bridge out of guard", descr_number,
-                        "was already compiled!")
-            return
+        failure_recovery = self._find_failure_recovery_bytecode(faildescr)
 
         self.setup(original_loop_token)
         if log:
-            self._register_counter(True, descr_number)
-            operations = self._inject_debugging_code(faildescr, operations)
+            operations = self._inject_debugging_code(faildescr, operations,
+                                                     'b', descr_number)
 
         arglocs = self.rebuild_faillocs_from_descr(failure_recovery)
         if not we_are_translated():
@@ -498,6 +501,7 @@
                     [loc.assembler() for loc in faildescr._x86_debug_faillocs])
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
         fail_depths = faildescr._x86_current_depths
+        startpos = self.mc.get_relative_pos()
         operations = regalloc.prepare_bridge(fail_depths, inputargs, arglocs,
                                              operations,
                                              self.current_clt.allgcrefs)
@@ -532,7 +536,7 @@
             name = "Bridge # %s" % (descr_number,)
             self.cpu.profile_agent.native_code_written(name,
                                                        rawstart, fullsize)
-        return ops_offset
+        return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
 
     def write_pending_failure_recoveries(self):
         # for each pending guard, generate the code of the recovery stub
@@ -597,22 +601,29 @@
         return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
                                    self.cpu.gc_ll_descr.gcrootmap)
 
-    def _register_counter(self, bridge, number):
-        if self._debug:
-            # YYY very minor leak -- we need the counters to stay alive
-            # forever, just because we want to report them at the end
-            # of the process
-            struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
-                                   track_allocation=False)
-            struct.i = 0
-            struct.bridge = int(bridge)
+    def _register_counter(self, tp, number, token):
+        # YYY very minor leak -- we need the counters to stay alive
+        # forever, just because we want to report them at the end
+        # of the process
+        struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
+                               track_allocation=False)
+        struct.i = 0
+        struct.type = tp
+        if tp == 'b' or tp == 'e':
             struct.number = number
-            self.loop_run_counters.append(struct)
+        else:
+            assert token
+            struct.number = compute_unique_id(token)
+        self.loop_run_counters.append(struct)            
+        return struct
 
     def _find_failure_recovery_bytecode(self, faildescr):
         adr_jump_offset = faildescr._x86_adr_jump_offset
         if adr_jump_offset == 0:
-            raise ValueError
+            # This case should be prevented by the logic in compile.py:
+            # look for CNT_BUSY_FLAG, which disables tracing from a guard
+            # when another tracing from the same guard is already in progress.
+            raise BridgeAlreadyCompiled
         # follow the JMP/Jcond
         p = rffi.cast(rffi.INTP, adr_jump_offset)
         adr_target = adr_jump_offset + 4 + rffi.cast(lltype.Signed, p[0])
@@ -651,27 +662,36 @@
             targettoken._x86_loop_code += rawstart
         self.target_tokens_currently_compiling = None
 
+    def _append_debugging_code(self, operations, tp, number, token):
+        counter = self._register_counter(tp, number, token)
+        c_adr = ConstInt(rffi.cast(lltype.Signed, counter))
+        box = BoxInt()
+        box2 = BoxInt()
+        ops = [ResOperation(rop.GETFIELD_RAW, [c_adr],
+                            box, descr=self.debug_counter_descr),
+               ResOperation(rop.INT_ADD, [box, ConstInt(1)], box2),
+               ResOperation(rop.SETFIELD_RAW, [c_adr, box2],
+                            None, descr=self.debug_counter_descr)]
+        operations.extend(ops)
+        
     @specialize.argtype(1)
-    def _inject_debugging_code(self, looptoken, operations):
+    def _inject_debugging_code(self, looptoken, operations, tp, number):
         if self._debug:
             # before doing anything, let's increase a counter
             s = 0
             for op in operations:
                 s += op.getopnum()
             looptoken._x86_debug_checksum = s
-            c_adr = ConstInt(rffi.cast(lltype.Signed,
-                                       self.loop_run_counters[-1]))
-            box = BoxInt()
-            box2 = BoxInt()
-            ops = [ResOperation(rop.GETFIELD_RAW, [c_adr],
-                                box, descr=self.debug_counter_descr),
-                   ResOperation(rop.INT_ADD, [box, ConstInt(1)], box2),
-                   ResOperation(rop.SETFIELD_RAW, [c_adr, box2],
-                                None, descr=self.debug_counter_descr)]
-            if operations[0].getopnum() == rop.LABEL:
-                operations = [operations[0]] + ops + operations[1:]
-            else:
-                operations =  ops + operations
+
+            newoperations = []
+            self._append_debugging_code(newoperations, tp, number,
+                                        None)
+            for op in operations:
+                newoperations.append(op)
+                if op.getopnum() == rop.LABEL:
+                    self._append_debugging_code(newoperations, 'l', number,
+                                                op.getdescr())
+            operations = newoperations
         return operations
 
     def _assemble(self, regalloc, operations):
@@ -792,7 +812,10 @@
         target = newlooptoken._x86_function_addr
         mc = codebuf.MachineCodeBlockWrapper()
         mc.JMP(imm(target))
-        assert mc.get_relative_pos() <= 13  # keep in sync with prepare_loop()
+        if WORD == 4:         # keep in sync with prepare_loop()
+            assert mc.get_relative_pos() == 5
+        else:
+            assert mc.get_relative_pos() <= 13
         mc.copy_to_raw_memory(oldadr)
 
     def dump(self, text):
@@ -2532,3 +2555,6 @@
 def not_implemented(msg):
     os.write(2, '[x86/asm] %s\n' % msg)
     raise NotImplementedError(msg)
+
+class BridgeAlreadyCompiled(Exception):
+    pass
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -188,7 +188,10 @@
         # note: we need to make a copy of inputargs because possibly_free_vars
         # is also used on op args, which is a non-resizable list
         self.possibly_free_vars(list(inputargs))
-        self.min_bytes_before_label = 13
+        if WORD == 4:       # see redirect_call_assembler()
+            self.min_bytes_before_label = 5
+        else:
+            self.min_bytes_before_label = 13
         return operations
 
     def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
@@ -741,7 +744,7 @@
         self.xrm.possibly_free_var(op.getarg(0))
 
     def consider_cast_int_to_float(self, op):
-        loc0 = self.rm.loc(op.getarg(0))
+        loc0 = self.rm.make_sure_var_in_reg(op.getarg(0))
         loc1 = self.xrm.force_allocate_reg(op.result)
         self.Perform(op, [loc0], loc1)
         self.rm.possibly_free_var(op.getarg(0))
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -33,6 +33,13 @@
     # for the individual tests see
     # ====> ../../test/runner_test.py
 
+    add_loop_instructions = ['mov', 'add', 'test', 'je', 'jmp']
+    if WORD == 4:
+        bridge_loop_instructions = ['lea', 'jmp']
+    else:
+        # the 'mov' is part of the 'jmp' so far
+        bridge_loop_instructions = ['lea', 'mov', 'jmp']
+
     def setup_method(self, meth):
         self.cpu = CPU(rtyper=None, stats=FakeStats())
         self.cpu.setup_once()
@@ -416,12 +423,13 @@
             ]
         inputargs = [i0]
         debug._log = dlog = debug.DebugLog()
-        ops_offset = self.cpu.compile_loop(inputargs, operations, looptoken)
+        info = self.cpu.compile_loop(inputargs, operations, looptoken)
+        ops_offset = info.ops_offset
         debug._log = None
         #
         assert ops_offset is looptoken._x86_ops_offset
-        # getfield_raw/int_add/setfield_raw + ops + None
-        assert len(ops_offset) == 3 + len(operations) + 1
+        # 2*(getfield_raw/int_add/setfield_raw) + ops + None
+        assert len(ops_offset) == 2*3 + len(operations) + 1
         assert (ops_offset[operations[0]] <=
                 ops_offset[operations[1]] <=
                 ops_offset[operations[2]] <=
@@ -519,6 +527,7 @@
         from pypy.tool.logparser import parse_log_file, extract_category
         from pypy.rlib import debug
 
+        targettoken, preambletoken = TargetToken(), TargetToken()
         loop = """
         [i0]
         label(i0, descr=preambletoken)
@@ -533,8 +542,8 @@
         guard_false(i12) []
         jump(i11, descr=targettoken)
         """
-        ops = parse(loop, namespace={'targettoken': TargetToken(),
-                                     'preambletoken': TargetToken()})
+        ops = parse(loop, namespace={'targettoken': targettoken,
+                                     'preambletoken': preambletoken})
         debug._log = dlog = debug.DebugLog()
         try:
             self.cpu.assembler.set_debug(True)
@@ -545,11 +554,16 @@
             struct = self.cpu.assembler.loop_run_counters[0]
             assert struct.i == 1
             struct = self.cpu.assembler.loop_run_counters[1]
-            assert struct.i == 10
+            assert struct.i == 1
+            struct = self.cpu.assembler.loop_run_counters[2]
+            assert struct.i == 9
             self.cpu.finish_once()
         finally:
             debug._log = None
-        assert ('jit-backend-counts', [('debug_print', 'loop -1:10')]) in dlog
+        l0 = ('debug_print', 'entry -1:1')
+        l1 = ('debug_print', preambletoken.repr_of_descr() + ':1')
+        l2 = ('debug_print', targettoken.repr_of_descr() + ':9')
+        assert ('jit-backend-counts', [l0, l1, l2]) in dlog
 
     def test_debugger_checksum(self):
         loop = """
diff --git a/pypy/jit/backend/x86/test/test_zrpy_platform.py b/pypy/jit/backend/x86/test/test_zrpy_platform.py
--- a/pypy/jit/backend/x86/test/test_zrpy_platform.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_platform.py
@@ -74,8 +74,8 @@
     myjitdriver = jit.JitDriver(greens = [], reds = ['n'])
 
     def entrypoint(argv):
-        myjitdriver.set_param('threshold', 2)
-        myjitdriver.set_param('trace_eagerness', 0)
+        jit.set_param(myjitdriver, 'threshold', 2)
+        jit.set_param(myjitdriver, 'trace_eagerness', 0)
         n = 16
         while n > 0:
             myjitdriver.can_enter_jit(n=n)
diff --git a/pypy/jit/backend/x86/tool/viewcode.py b/pypy/jit/backend/x86/tool/viewcode.py
--- a/pypy/jit/backend/x86/tool/viewcode.py
+++ b/pypy/jit/backend/x86/tool/viewcode.py
@@ -39,6 +39,7 @@
 def machine_code_dump(data, originaddr, backend_name, label_list=None):
     objdump_backend_option = {
         'x86': 'i386',
+        'x86_32': 'i386',
         'x86_64': 'x86-64',
         'i386': 'i386',
     }
diff --git a/pypy/jit/codewriter/call.py b/pypy/jit/codewriter/call.py
--- a/pypy/jit/codewriter/call.py
+++ b/pypy/jit/codewriter/call.py
@@ -42,8 +42,7 @@
         except AttributeError:
             pass
 
-        def is_candidate(graph):
-            return policy.look_inside_graph(graph)
+        is_candidate = policy.look_inside_graph
 
         assert len(self.jitdrivers_sd) > 0
         todo = [jd.portal_graph for jd in self.jitdrivers_sd]
diff --git a/pypy/jit/codewriter/policy.py b/pypy/jit/codewriter/policy.py
--- a/pypy/jit/codewriter/policy.py
+++ b/pypy/jit/codewriter/policy.py
@@ -8,11 +8,15 @@
 
 
 class JitPolicy(object):
-    def __init__(self):
+    def __init__(self, jithookiface=None):
         self.unsafe_loopy_graphs = set()
         self.supports_floats = False
         self.supports_longlong = False
         self.supports_singlefloats = False
+        if jithookiface is None:
+            from pypy.rlib.jit import JitHookInterface
+            jithookiface = JitHookInterface()
+        self.jithookiface = jithookiface
 
     def set_supports_floats(self, flag):
         self.supports_floats = flag
diff --git a/pypy/jit/codewriter/support.py b/pypy/jit/codewriter/support.py
--- a/pypy/jit/codewriter/support.py
+++ b/pypy/jit/codewriter/support.py
@@ -162,7 +162,6 @@
 _ll_4_list_setslice = rlist.ll_listsetslice
 _ll_2_list_delslice_startonly = rlist.ll_listdelslice_startonly
 _ll_3_list_delslice_startstop = rlist.ll_listdelslice_startstop
-_ll_1_list_list2fixed = lltypesystem_rlist.ll_list2fixed
 _ll_2_list_inplace_mul = rlist.ll_inplace_mul
 
 _ll_2_list_getitem_foldable = _ll_2_list_getitem
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -5,6 +5,7 @@
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib.debug import debug_start, debug_stop, debug_print
 from pypy.rlib import rstack
+from pypy.rlib.jit import JitDebugInfo
 from pypy.conftest import option
 from pypy.tool.sourcetools import func_with_new_name
 
@@ -75,7 +76,7 @@
             if descr is not original_jitcell_token:
                 original_jitcell_token.record_jump_to(descr)
             descr.exported_state = None
-            op._descr = None    # clear reference, mostly for tests
+            op.cleardescr()    # clear reference, mostly for tests
         elif isinstance(descr, TargetToken):
             # for a JUMP: record it as a potential jump.
             # (the following test is not enough to prevent more complicated
@@ -90,8 +91,8 @@
             assert descr.exported_state is None 
             if not we_are_translated():
                 op._descr_wref = weakref.ref(op._descr)
-            op._descr = None    # clear reference to prevent the history.Stats
-                                # from keeping the loop alive during tests
+            op.cleardescr()    # clear reference to prevent the history.Stats
+                               # from keeping the loop alive during tests
     # record this looptoken on the QuasiImmut used in the code
     if loop.quasi_immutable_deps is not None:
         for qmut in loop.quasi_immutable_deps:
@@ -112,33 +113,26 @@
     """
     from pypy.jit.metainterp.optimizeopt import optimize_trace
 
-    history = metainterp.history
     metainterp_sd = metainterp.staticdata
     jitdriver_sd = metainterp.jitdriver_sd
+    history = metainterp.history
 
-    if False:
-        part = partial_trace
-        assert False
-        procedur_token = metainterp.get_procedure_token(greenkey)
-        assert procedure_token
-        all_target_tokens = []
-    else:
-        jitcell_token = make_jitcell_token(jitdriver_sd)
-        part = create_empty_loop(metainterp)
-        part.inputargs = inputargs[:]
-        h_ops = history.operations
-        part.resume_at_jump_descr = resume_at_jump_descr
-        part.operations = [ResOperation(rop.LABEL, inputargs, None, descr=TargetToken(jitcell_token))] + \
-                          [h_ops[i].clone() for i in range(start, len(h_ops))] + \
-                          [ResOperation(rop.LABEL, jumpargs, None, descr=jitcell_token)]
+    jitcell_token = make_jitcell_token(jitdriver_sd)
+    part = create_empty_loop(metainterp)
+    part.inputargs = inputargs[:]
+    h_ops = history.operations
+    part.resume_at_jump_descr = resume_at_jump_descr
+    part.operations = [ResOperation(rop.LABEL, inputargs, None, descr=TargetToken(jitcell_token))] + \
+                      [h_ops[i].clone() for i in range(start, len(h_ops))] + \
+                      [ResOperation(rop.LABEL, jumpargs, None, descr=jitcell_token)]
 
-        try:
-            optimize_trace(metainterp_sd, part, jitdriver_sd.warmstate.enable_opts)
-        except InvalidLoop:
-            return None
-        target_token = part.operations[0].getdescr()
-        assert isinstance(target_token, TargetToken)
-        all_target_tokens = [target_token]
+    try:
+        optimize_trace(metainterp_sd, part, jitdriver_sd.warmstate.enable_opts)
+    except InvalidLoop:
+        return None
+    target_token = part.operations[0].getdescr()
+    assert isinstance(target_token, TargetToken)
+    all_target_tokens = [target_token]
 
     loop = create_empty_loop(metainterp)        
     loop.inputargs = part.inputargs
@@ -176,10 +170,10 @@
     loop.original_jitcell_token = jitcell_token
     for label in all_target_tokens:
         assert isinstance(label, TargetToken)
-        label.original_jitcell_token = jitcell_token
         if label.virtual_state and label.short_preamble:
             metainterp_sd.logger_ops.log_short_preamble([], label.short_preamble)
     jitcell_token.target_tokens = all_target_tokens
+    propagate_original_jitcell_token(loop)
     send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop, "loop")
     record_loop_or_bridge(metainterp_sd, loop)
     return all_target_tokens[0]
@@ -247,11 +241,11 @@
     for box in loop.inputargs:
         assert isinstance(box, Box)
 
-    target_token = loop.operations[-1].getdescr()
+    target_token = loop.operations[-1].getdescr()    
     resumekey.compile_and_attach(metainterp, loop)
+    
     target_token = label.getdescr()
     assert isinstance(target_token, TargetToken)
-    target_token.original_jitcell_token = loop.original_jitcell_token
     record_loop_or_bridge(metainterp_sd, loop)
     return target_token
 
@@ -288,14 +282,21 @@
     assert i == len(inputargs)
     loop.operations = extra_ops + loop.operations
 
+def propagate_original_jitcell_token(trace):
+    for op in trace.operations:
+        if op.getopnum() == rop.LABEL:
+            token = op.getdescr()
+            assert isinstance(token, TargetToken)
+            assert token.original_jitcell_token is None
+            token.original_jitcell_token = trace.original_jitcell_token
+            
+    
 def send_loop_to_backend(greenkey, jitdriver_sd, metainterp_sd, loop, type):
     vinfo = jitdriver_sd.virtualizable_info
     if vinfo is not None:
         patch_new_loop_to_load_virtualizable_fields(loop, jitdriver_sd)
 
     original_jitcell_token = loop.original_jitcell_token
-    jitdriver_sd.on_compile(metainterp_sd.logger_ops, original_jitcell_token,
-                            loop.operations, type, greenkey)
     loopname = jitdriver_sd.warmstate.get_location_str(greenkey)
     globaldata = metainterp_sd.globaldata
     original_jitcell_token.number = n = globaldata.loopnumbering
@@ -305,21 +306,41 @@
         show_procedures(metainterp_sd, loop)
         loop.check_consistency()
 
+    if metainterp_sd.warmrunnerdesc is not None:
+        hooks = metainterp_sd.warmrunnerdesc.hooks
+        debug_info = JitDebugInfo(jitdriver_sd, metainterp_sd.logger_ops,
+                                  original_jitcell_token, loop.operations,
+                                  type, greenkey)
+        hooks.before_compile(debug_info)
+    else:
+        debug_info = None
+        hooks = None
     operations = get_deep_immutable_oplist(loop.operations)
     metainterp_sd.profiler.start_backend()
     debug_start("jit-backend")
     try:
-        ops_offset = metainterp_sd.cpu.compile_loop(loop.inputargs, operations,
-                                                    original_jitcell_token, name=loopname)
+        asminfo = metainterp_sd.cpu.compile_loop(loop.inputargs, operations,
+                                                  original_jitcell_token,
+                                                  name=loopname)
     finally:
         debug_stop("jit-backend")
     metainterp_sd.profiler.end_backend()
+    if hooks is not None:
+        debug_info.asminfo = asminfo
+        hooks.after_compile(debug_info)
     metainterp_sd.stats.add_new_loop(loop)
     if not we_are_translated():
         metainterp_sd.stats.compiled()
     metainterp_sd.log("compiled new " + type)
     #
-    metainterp_sd.logger_ops.log_loop(loop.inputargs, loop.operations, n, type, ops_offset)
+    loopname = jitdriver_sd.warmstate.get_location_str(greenkey)
+    if asminfo is not None:
+        ops_offset = asminfo.ops_offset
+    else:
+        ops_offset = None
+    metainterp_sd.logger_ops.log_loop(loop.inputargs, loop.operations, n,
+                                      type, ops_offset,
+                                      name=loopname)
     #
     if metainterp_sd.warmrunnerdesc is not None:    # for tests
         metainterp_sd.warmrunnerdesc.memory_manager.keep_loop_alive(original_jitcell_token)
@@ -327,25 +348,40 @@
 def send_bridge_to_backend(jitdriver_sd, metainterp_sd, faildescr, inputargs,
                            operations, original_loop_token):
     n = metainterp_sd.cpu.get_fail_descr_number(faildescr)
-    jitdriver_sd.on_compile_bridge(metainterp_sd.logger_ops,
-                                   original_loop_token, operations, n)
     if not we_are_translated():
         show_procedures(metainterp_sd)
         seen = dict.fromkeys(inputargs)
         TreeLoop.check_consistency_of_branch(operations, seen)
+    if metainterp_sd.warmrunnerdesc is not None:
+        hooks = metainterp_sd.warmrunnerdesc.hooks
+        debug_info = JitDebugInfo(jitdriver_sd, metainterp_sd.logger_ops,
+                                  original_loop_token, operations, 'bridge',
+                                  fail_descr_no=n)
+        hooks.before_compile_bridge(debug_info)
+    else:
+        hooks = None
+        debug_info = None
+    operations = get_deep_immutable_oplist(operations)
     metainterp_sd.profiler.start_backend()
-    operations = get_deep_immutable_oplist(operations)
     debug_start("jit-backend")
     try:
-        ops_offset = metainterp_sd.cpu.compile_bridge(faildescr, inputargs, operations,
-                                                      original_loop_token)
+        asminfo = metainterp_sd.cpu.compile_bridge(faildescr, inputargs,
+                                                   operations,
+                                                   original_loop_token)
     finally:
         debug_stop("jit-backend")
     metainterp_sd.profiler.end_backend()
+    if hooks is not None:
+        debug_info.asminfo = asminfo
+        hooks.after_compile_bridge(debug_info)
     if not we_are_translated():
         metainterp_sd.stats.compiled()
     metainterp_sd.log("compiled new bridge")
     #
+    if asminfo is not None:
+        ops_offset = asminfo.ops_offset
+    else:
+        ops_offset = None
     metainterp_sd.logger_ops.log_bridge(inputargs, operations, n, ops_offset)
     #
     #if metainterp_sd.warmrunnerdesc is not None:    # for tests
@@ -558,6 +594,7 @@
         inputargs = metainterp.history.inputargs
         if not we_are_translated():
             self._debug_suboperations = new_loop.operations
+        propagate_original_jitcell_token(new_loop)
         send_bridge_to_backend(metainterp.jitdriver_sd, metainterp.staticdata,
                                self, inputargs, new_loop.operations,
                                new_loop.original_jitcell_token)
@@ -744,6 +781,7 @@
         jitdriver_sd = metainterp.jitdriver_sd
         redargs = new_loop.inputargs
         new_loop.original_jitcell_token = jitcell_token = make_jitcell_token(jitdriver_sd)
+        propagate_original_jitcell_token(new_loop)
         send_loop_to_backend(self.original_greenkey, metainterp.jitdriver_sd,
                              metainterp_sd, new_loop, "entry bridge")
         # send the new_loop to warmspot.py, to be called directly the next time
diff --git a/pypy/jit/metainterp/heapcache.py b/pypy/jit/metainterp/heapcache.py
--- a/pypy/jit/metainterp/heapcache.py
+++ b/pypy/jit/metainterp/heapcache.py
@@ -79,9 +79,9 @@
             opnum == rop.COPYSTRCONTENT or
             opnum == rop.COPYUNICODECONTENT):
             return
-        if rop._OVF_FIRST <= opnum <= rop._OVF_LAST:
-            return
-        if rop._NOSIDEEFFECT_FIRST <= opnum <= rop._NOSIDEEFFECT_LAST:
+        if (rop._OVF_FIRST <= opnum <= rop._OVF_LAST or
+            rop._NOSIDEEFFECT_FIRST <= opnum <= rop._NOSIDEEFFECT_LAST or
+            rop._GUARD_FIRST <= opnum <= rop._GUARD_LAST):
             return
         if opnum == rop.CALL or opnum == rop.CALL_LOOPINVARIANT:
             effectinfo = descr.get_extra_info()
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -1003,35 +1003,16 @@
         return insns
 
     def check_simple_loop(self, expected=None, **check):
-        # Usefull in the simplest case when we have only one trace ending with
-        # a jump back to itself and possibly a few bridges ending with finnish.
-        # Only the operations within the loop formed by that single jump will
-        # be counted.
-
-        # XXX hacked version, ignore and remove me when jit-targets is merged.
-        loops = self.get_all_loops()
-        loops = [loop for loop in loops if 'Preamble' not in repr(loop)] #XXX
-        assert len(loops) == 1
-        loop, = loops
-        jumpop = loop.operations[-1]
-        assert jumpop.getopnum() == rop.JUMP
-        insns = {}
-        for op in loop.operations:
-            opname = op.getopname()
-            insns[opname] = insns.get(opname, 0) + 1
-        return self._check_insns(insns, expected, check)
-
-    def check_simple_loop(self, expected=None, **check):
-        # Usefull in the simplest case when we have only one trace ending with
-        # a jump back to itself and possibly a few bridges ending with finnish.
-        # Only the operations within the loop formed by that single jump will
-        # be counted.
+        """ Usefull in the simplest case when we have only one trace ending with
+        a jump back to itself and possibly a few bridges.
+        Only the operations within the loop formed by that single jump will
+        be counted.
+        """
         loops = self.get_all_loops()
         assert len(loops) == 1
         loop = loops[0]
         jumpop = loop.operations[-1]
         assert jumpop.getopnum() == rop.JUMP
-        assert self.check_resops(jump=1)
         labels = [op for op in loop.operations if op.getopnum() == rop.LABEL]
         targets = [op._descr_wref() for op in labels]
         assert None not in targets # TargetToken was freed, give up
diff --git a/pypy/jit/metainterp/jitdriver.py b/pypy/jit/metainterp/jitdriver.py
--- a/pypy/jit/metainterp/jitdriver.py
+++ b/pypy/jit/metainterp/jitdriver.py
@@ -21,7 +21,6 @@
     #    self.portal_finishtoken... pypy.jit.metainterp.pyjitpl
     #    self.index             ... pypy.jit.codewriter.call
     #    self.mainjitcode       ... pypy.jit.codewriter.call
-    #    self.on_compile        ... pypy.jit.metainterp.warmstate
 
     # These attributes are read by the backend in CALL_ASSEMBLER:
     #    self.assembler_helper_adr
diff --git a/pypy/jit/metainterp/jitprof.py b/pypy/jit/metainterp/jitprof.py
--- a/pypy/jit/metainterp/jitprof.py
+++ b/pypy/jit/metainterp/jitprof.py
@@ -18,8 +18,8 @@
 OPT_FORCINGS
 ABORT_TOO_LONG
 ABORT_BRIDGE
+ABORT_BAD_LOOP
 ABORT_ESCAPE
-ABORT_BAD_LOOP
 ABORT_FORCE_QUASIIMMUT
 NVIRTUALS
 NVHOLES
@@ -30,10 +30,13 @@
 TOTAL_FREED_BRIDGES
 """
 
+counter_names = []
+
 def _setup():
     names = counters.split()
     for i, name in enumerate(names):
         globals()[name] = i
+        counter_names.append(name)
     global ncounters
     ncounters = len(names)
 _setup()
diff --git a/pypy/jit/metainterp/logger.py b/pypy/jit/metainterp/logger.py
--- a/pypy/jit/metainterp/logger.py
+++ b/pypy/jit/metainterp/logger.py
@@ -13,14 +13,14 @@
         self.metainterp_sd = metainterp_sd
         self.guard_number = guard_number
 
-    def log_loop(self, inputargs, operations, number=0, type=None, ops_offset=None):
+    def log_loop(self, inputargs, operations, number=0, type=None, ops_offset=None, name=''):
         if type is None:
             debug_start("jit-log-noopt-loop")
             logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-noopt-loop")
         else:
             debug_start("jit-log-opt-loop")
-            debug_print("# Loop", number, ":", type,
+            debug_print("# Loop", number, '(%s)' % name , ":", type,
                         "with", len(operations), "ops")
             logops = self._log_operations(inputargs, operations, ops_offset)
             debug_stop("jit-log-opt-loop")
diff --git a/pypy/jit/metainterp/optimizeopt/fficall.py b/pypy/jit/metainterp/optimizeopt/fficall.py
--- a/pypy/jit/metainterp/optimizeopt/fficall.py
+++ b/pypy/jit/metainterp/optimizeopt/fficall.py
@@ -234,11 +234,11 @@
             # longlongs are treated as floats, see
             # e.g. llsupport/descr.py:getDescrClass
             is_float = True
-        elif kind == 'u':
+        elif kind == 'u' or kind == 's':
             # they're all False
             pass
         else:
-            assert False, "unsupported ffitype or kind"
+            raise NotImplementedError("unsupported ffitype or kind: %s" % kind)
         #
         fieldsize = rffi.getintfield(ffitype, 'c_size')
         return self.optimizer.cpu.interiorfielddescrof_dynamic(
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py b/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_multilabel.py
@@ -1,10 +1,13 @@
 from __future__ import with_statement
 from pypy.jit.metainterp.optimizeopt.test.test_util import (
-    LLtypeMixin, BaseTest, Storage, _sortboxes, FakeDescrWithSnapshot)
+    LLtypeMixin, BaseTest, Storage, _sortboxes, FakeDescrWithSnapshot,
+    FakeMetaInterpStaticData)
 from pypy.jit.metainterp.history import TreeLoop, JitCellToken, TargetToken
 from pypy.jit.metainterp.resoperation import rop, opname, ResOperation
 from pypy.jit.metainterp.optimize import InvalidLoop
 from py.test import raises
+from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
+from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
 
 class BaseTestMultiLabel(BaseTest):
     enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll"
@@ -84,6 +87,8 @@
         
         return optimized
 
+class OptimizeoptTestMultiLabel(BaseTestMultiLabel):
+
     def test_simple(self):
         ops = """
         [i1]
@@ -381,6 +386,82 @@
         """
         self.optimize_loop(ops, expected)
 
-class TestLLtype(BaseTestMultiLabel, LLtypeMixin):
+    def test_virtual_as_field_of_forced_box(self):
+        ops = """
+        [p0]
+        pv1 = new_with_vtable(ConstClass(node_vtable))
+        label(pv1, p0)
+        pv2 = new_with_vtable(ConstClass(node_vtable))
+        setfield_gc(pv2, pv1, descr=valuedescr)
+        jump(pv1, pv2)
+        """
+        with raises(InvalidLoop):
+            self.optimize_loop(ops, ops)
+
+class OptRenameStrlen(Optimization):
+    def propagate_forward(self, op):
+        dispatch_opt(self, op)
+
+    def optimize_STRLEN(self, op):
+        newop = op.clone()
+        newop.result = op.result.clonebox()
+        self.emit_operation(newop)
+        self.make_equal_to(op.result, self.getvalue(newop.result))
+    
+dispatch_opt = make_dispatcher_method(OptRenameStrlen, 'optimize_',
+                                      default=OptRenameStrlen.emit_operation)
+
+class BaseTestOptimizerRenamingBoxes(BaseTestMultiLabel):
+
+    def _do_optimize_loop(self, loop, call_pure_results):
+        from pypy.jit.metainterp.optimizeopt.unroll import optimize_unroll
+        from pypy.jit.metainterp.optimizeopt.util import args_dict
+        from pypy.jit.metainterp.optimizeopt.pure import OptPure
+
+        self.loop = loop
+        loop.call_pure_results = args_dict()
+        metainterp_sd = FakeMetaInterpStaticData(self.cpu)
+        optimize_unroll(metainterp_sd, loop, [OptRenameStrlen(), OptPure()], True)
+
+    def test_optimizer_renaming_boxes(self):
+        ops = """
+        [p1]
+        i1 = strlen(p1)
+        label(p1)
+        i2 = strlen(p1)
+        i3 = int_add(i2, 7)
+        jump(p1)
+        """
+        expected = """
+        [p1]
+        i1 = strlen(p1)
+        label(p1, i1)
+        i11 = same_as(i1)
+        i2 = int_add(i11, 7)
+        jump(p1, i11)
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_optimizer_renaming_boxes_not_imported(self):
+        ops = """
+        [p1]
+        i1 = strlen(p1)
+        label(p1)
+        jump(p1)
+        """
+        expected = """
+        [p1]
+        i1 = strlen(p1)
+        label(p1, i1)
+        i11 = same_as(i1)
+        jump(p1, i11)
+        """
+        self.optimize_loop(ops, expected)
+
+        
+
+class TestLLtype(OptimizeoptTestMultiLabel, LLtypeMixin):
     pass
 
+class TestOptimizerRenamingBoxesLLtype(BaseTestOptimizerRenamingBoxes, LLtypeMixin):
+    pass
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -117,7 +117,7 @@
 
     def optimize_loop(self, ops, optops, call_pure_results=None):
         loop = self.parse(ops)
-        token = JitCellToken() 
+        token = JitCellToken()
         loop.operations = [ResOperation(rop.LABEL, loop.inputargs, None, descr=TargetToken(token))] + \
                           loop.operations
         if loop.operations[-1].getopnum() == rop.JUMP:
diff --git a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -7759,7 +7759,7 @@
         jump(i0, p0, i2)
         """
         self.optimize_loop(ops, expected)
-
+        
 class TestLLtype(OptimizeOptTest, LLtypeMixin):
     pass
 
diff --git a/pypy/jit/metainterp/optimizeopt/unroll.py b/pypy/jit/metainterp/optimizeopt/unroll.py
--- a/pypy/jit/metainterp/optimizeopt/unroll.py
+++ b/pypy/jit/metainterp/optimizeopt/unroll.py
@@ -265,7 +265,16 @@
                     self.optimizer.importable_values[value] = imp
                 newvalue = self.optimizer.getvalue(op.result)
                 newresult = newvalue.get_key_box()
-                assert newresult is op.result or newvalue.is_constant()
+                # note that emitting here SAME_AS should not happen, but
+                # in case it does, we would prefer to be suboptimal in asm
+                # to a fatal RPython exception.
+                if newresult is not op.result and not newvalue.is_constant():
+                    op = ResOperation(rop.SAME_AS, [op.result], newresult)
+                    self.optimizer._newoperations.append(op)
+                    if self.optimizer.loop.logops:
+                        debug_print('  Falling back to add extra: ' +
+                                    self.optimizer.loop.logops.repr_of_resop(op))
+                    
         self.optimizer.flush()
         self.optimizer.emitting_dissabled = False
 
@@ -430,7 +439,13 @@
             return
         for a in op.getarglist():
             if not isinstance(a, Const) and a not in seen:
-                self.ensure_short_op_emitted(self.short_boxes.producer(a), optimizer, seen)
+                self.ensure_short_op_emitted(self.short_boxes.producer(a), optimizer,
+                                             seen)
+
+        if self.optimizer.loop.logops:
+            debug_print('  Emitting short op: ' +
+                        self.optimizer.loop.logops.repr_of_resop(op))
+
         optimizer.send_extra_operation(op)
         seen[op.result] = True
         if op.is_ovf():
diff --git a/pypy/jit/metainterp/optimizeopt/virtualstate.py b/pypy/jit/metainterp/optimizeopt/virtualstate.py
--- a/pypy/jit/metainterp/optimizeopt/virtualstate.py
+++ b/pypy/jit/metainterp/optimizeopt/virtualstate.py
@@ -409,7 +409,13 @@
         if self.level == LEVEL_CONSTANT:
             return
         assert 0 <= self.position_in_notvirtuals
-        boxes[self.position_in_notvirtuals] = value.force_box(optimizer)
+        if optimizer:
+            box = value.force_box(optimizer)
+        else:
+            if value.is_virtual():
+                raise BadVirtualState
+            box = value.get_key_box()
+        boxes[self.position_in_notvirtuals] = box
 
     def _enum(self, virtual_state):
         if self.level == LEVEL_CONSTANT:
@@ -471,8 +477,14 @@
             optimizer = optimizer.optearlyforce
         assert len(values) == len(self.state)
         inputargs = [None] * len(self.notvirtuals)
+
+        # We try twice. The first time around we allow boxes to be forced
+        # which might change the virtual state if the box appear in more
+        # than one place among the inputargs.
         for i in range(len(values)):
             self.state[i].enum_forced_boxes(inputargs, values[i], optimizer)
+        for i in range(len(values)):
+            self.state[i].enum_forced_boxes(inputargs, values[i], None)
 
         if keyboxes:
             for i in range(len(values)):
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -976,10 +976,13 @@
         self.verify_green_args(jitdriver_sd, greenboxes)
         self.debug_merge_point(jitdriver_sd, jdindex, self.metainterp.in_recursion,
                                greenboxes)
-
+        
         if self.metainterp.seen_loop_header_for_jdindex < 0:
-            if not jitdriver_sd.no_loop_header or not any_operation:
+            if not any_operation:
                 return
+            if self.metainterp.in_recursion or not self.metainterp.get_procedure_token(greenboxes, True):
+                if not jitdriver_sd.no_loop_header:
+                    return
             # automatically add a loop_header if there is none
             self.metainterp.seen_loop_header_for_jdindex = jdindex
         #
@@ -1550,6 +1553,7 @@
 
 class MetaInterp(object):
     in_recursion = 0
+    cancel_count = 0
 
     def __init__(self, staticdata, jitdriver_sd):
         self.staticdata = staticdata
@@ -1790,6 +1794,15 @@
     def aborted_tracing(self, reason):
         self.staticdata.profiler.count(reason)
         debug_print('~~~ ABORTING TRACING')
+        jd_sd = self.jitdriver_sd
+        if not self.current_merge_points:
+            greenkey = None # we're in the bridge
+        else:
+            greenkey = self.current_merge_points[0][0][:jd_sd.num_green_args]
+            self.staticdata.warmrunnerdesc.hooks.on_abort(reason,
+                                                          jd_sd.jitdriver,
+                                                          greenkey,
+                                                          jd_sd.warmstate.get_location_str(greenkey))
         self.staticdata.stats.aborted()
 
     def blackhole_if_trace_too_long(self):
@@ -1963,9 +1976,14 @@
                         raise SwitchToBlackhole(ABORT_BAD_LOOP) # For now
                 self.compile_loop(original_boxes, live_arg_boxes, start, resumedescr)
                 # creation of the loop was cancelled!
+                self.cancel_count += 1
+                if self.staticdata.warmrunnerdesc:
+                    memmgr = self.staticdata.warmrunnerdesc.memory_manager
+                    if memmgr:
+                        if self.cancel_count > memmgr.max_unroll_loops:
+                            self.staticdata.log('cancelled too many times!')
+                            raise SwitchToBlackhole(ABORT_BAD_LOOP)
                 self.staticdata.log('cancelled, tracing more...')
-                #self.staticdata.log('cancelled, stopping tracing')
-                #raise SwitchToBlackhole(ABORT_BAD_LOOP)
 
         # Otherwise, no loop found so far, so continue tracing.
         start = len(self.history.operations)
@@ -2053,9 +2071,15 @@
             from pypy.jit.metainterp.resoperation import opname
             raise NotImplementedError(opname[opnum])
 
-    def get_procedure_token(self, greenkey):
+    def get_procedure_token(self, greenkey, with_compiled_targets=False):
         cell = self.jitdriver_sd.warmstate.jit_cell_at_key(greenkey)
-        return cell.get_procedure_token()
+        token = cell.get_procedure_token()
+        if with_compiled_targets:
+            if not token:
+                return None
+            if not token.target_tokens:
+                return None
+        return token
         
     def compile_loop(self, original_boxes, live_arg_boxes, start, resume_at_jump_descr):
         num_green_args = self.jitdriver_sd.num_green_args
@@ -2088,11 +2112,9 @@
     def compile_trace(self, live_arg_boxes, resume_at_jump_descr):
         num_green_args = self.jitdriver_sd.num_green_args
         greenkey = live_arg_boxes[:num_green_args]
-        target_jitcell_token = self.get_procedure_token(greenkey)
+        target_jitcell_token = self.get_procedure_token(greenkey, True)
         if not target_jitcell_token:
             return
-        if not target_jitcell_token.target_tokens:
-            return
 
         self.history.record(rop.JUMP, live_arg_boxes[num_green_args:], None,
                             descr=target_jitcell_token)
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -16,15 +16,15 @@
     # debug
     name = ""
     pc = 0
+    opnum = 0
+
+    _attrs_ = ('result',)
 
     def __init__(self, result):
         self.result = result
 
-    # methods implemented by each concrete class
-    # ------------------------------------------
-
     def getopnum(self):
-        raise NotImplementedError
+        return self.opnum
 
     # methods implemented by the arity mixins
     # ---------------------------------------
@@ -64,6 +64,9 @@
     def setdescr(self, descr):
         raise NotImplementedError
 
+    def cleardescr(self):
+        pass
+
     # common methods
     # --------------
 
@@ -196,6 +199,9 @@
         self._check_descr(descr)
         self._descr = descr
 
+    def cleardescr(self):
+        self._descr = None
+
     def _check_descr(self, descr):
         if not we_are_translated() and getattr(descr, 'I_am_a_descr', False):
             return # needed for the mock case in oparser_model
@@ -590,12 +596,9 @@
         baseclass = PlainResOp
     mixin = arity2mixin.get(arity, N_aryOp)
 
-    def getopnum(self):
-        return opnum
-
     cls_name = '%s_OP' % name
     bases = (get_base_class(mixin, baseclass),)
-    dic = {'getopnum': getopnum}
+    dic = {'opnum': opnum}
     return type(cls_name, bases, dic)
 
 setup(__name__ == '__main__')   # print out the table when run directly
diff --git a/pypy/jit/metainterp/test/support.py b/pypy/jit/metainterp/test/support.py
--- a/pypy/jit/metainterp/test/support.py
+++ b/pypy/jit/metainterp/test/support.py
@@ -56,8 +56,6 @@
         greenfield_info = None
         result_type = result_kind
         portal_runner_ptr = "???"
-        on_compile = lambda *args: None
-        on_compile_bridge = lambda *args: None
 
     stats = history.Stats()
     cpu = CPUClass(rtyper, stats, None, False)
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -2629,6 +2629,38 @@
         self.check_jitcell_token_count(1)
         self.check_target_token_count(5)
 
+    def test_max_unroll_loops(self):
+        from pypy.jit.metainterp.optimize import InvalidLoop
+        from pypy.jit.metainterp import optimizeopt
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'i'])
+        #
+        def f(n, limit):
+            set_param(myjitdriver, 'threshold', 5)
+            set_param(myjitdriver, 'max_unroll_loops', limit)
+            i = 0
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, i=i)
+                print i
+                i += 1
+            return i
+        #
+        def my_optimize_trace(*args, **kwds):
+            raise InvalidLoop
+        old_optimize_trace = optimizeopt.optimize_trace
+        optimizeopt.optimize_trace = my_optimize_trace
+        try:
+            res = self.meta_interp(f, [23, 4])
+            assert res == 23
+            self.check_trace_count(0)
+            self.check_aborted_count(3)
+            #
+            res = self.meta_interp(f, [23, 20])
+            assert res == 23
+            self.check_trace_count(0)
+            self.check_aborted_count(2)
+        finally:
+            optimizeopt.optimize_trace = old_optimize_trace
+
     def test_retrace_limit_with_extra_guards(self):
         myjitdriver = JitDriver(greens = [], reds = ['n', 'i', 'sa', 'a',
                                                      'node'])
@@ -2697,7 +2729,7 @@
         # bridge back to the preamble of the first loop is produced. A guard in
         # this bridge is later traced resulting in a failed attempt of retracing
         # the second loop.
-        self.check_trace_count(8)
+        self.check_trace_count(9)
 
         # FIXME: Add a gloabl retrace counter and test that we are not trying more than 5 times.
 
diff --git a/pypy/jit/metainterp/test/test_compile.py b/pypy/jit/metainterp/test/test_compile.py
--- a/pypy/jit/metainterp/test/test_compile.py
+++ b/pypy/jit/metainterp/test/test_compile.py
@@ -18,7 +18,7 @@
         self.seen.append((inputargs, operations, token))
 
 class FakeLogger(object):
-    def log_loop(self, inputargs, operations, number=0, type=None, ops_offset=None):
+    def log_loop(self, inputargs, operations, number=0, type=None, ops_offset=None, name=''):
         pass
 
     def repr_of_resop(self, op):
@@ -53,8 +53,6 @@
     call_pure_results = {}
     class jitdriver_sd:
         warmstate = FakeState()
-        on_compile = staticmethod(lambda *args: None)
-        on_compile_bridge = staticmethod(lambda *args: None)
         virtualizable_info = None
 
 def test_compile_loop():
diff --git a/pypy/jit/metainterp/test/test_fficall.py b/pypy/jit/metainterp/test/test_fficall.py
--- a/pypy/jit/metainterp/test/test_fficall.py
+++ b/pypy/jit/metainterp/test/test_fficall.py
@@ -148,28 +148,38 @@
         self.check_resops({'jump': 1, 'int_lt': 2, 'setinteriorfield_raw': 4,
                            'getinteriorfield_raw': 8, 'int_add': 6, 'guard_true': 2})
 
-    def test_array_getitem_uint8(self):
+    def _test_getitem_type(self, TYPE, ffitype, COMPUTE_TYPE):
+        reds = ["n", "i", "s", "data"]
+        if COMPUTE_TYPE is lltype.Float:
+            # Move the float var to the back.
+            reds.remove("s")
+            reds.append("s")
         myjitdriver = JitDriver(
             greens = [],
-            reds = ["n", "i", "s", "data"],
+            reds = reds,
         )
         def f(data, n):
-            i = s = 0
+            i = 0
+            s = rffi.cast(COMPUTE_TYPE, 0)
             while i < n:
                 myjitdriver.jit_merge_point(n=n, i=i, s=s, data=data)
-                s += rffi.cast(lltype.Signed, array_getitem(types.uchar, 1, data, 0, 0))
+                s += rffi.cast(COMPUTE_TYPE, array_getitem(ffitype, rffi.sizeof(TYPE), data, 0, 0))
                 i += 1
             return s
+        def main(n):
+            with lltype.scoped_alloc(rffi.CArray(TYPE), 1) as data:
+                data[0] = rffi.cast(TYPE, 200)
+                return f(data, n)
+        assert self.meta_interp(main, [10]) == 2000
 
-        def main(n):
-            with lltype.scoped_alloc(rffi.CArray(rffi.UCHAR), 1) as data:
-                data[0] = rffi.cast(rffi.UCHAR, 200)
-                return f(data, n)
-
-        assert self.meta_interp(main, [10]) == 2000
+    def test_array_getitem_uint8(self):
+        self._test_getitem_type(rffi.UCHAR, types.uchar, lltype.Signed)
         self.check_resops({'jump': 1, 'int_lt': 2, 'getinteriorfield_raw': 2,
                            'guard_true': 2, 'int_add': 4})
 
+    def test_array_getitem_float(self):
+        self._test_getitem_type(rffi.FLOAT, types.float, lltype.Float)
+
 
 class TestFfiCall(FfiCallTests, LLJitMixin):
     supports_all = False
diff --git a/pypy/jit/metainterp/test/test_heapcache.py b/pypy/jit/metainterp/test/test_heapcache.py
--- a/pypy/jit/metainterp/test/test_heapcache.py
+++ b/pypy/jit/metainterp/test/test_heapcache.py
@@ -255,6 +255,11 @@
         assert h.getarrayitem(box1, descr1, index1) is box2
         assert h.getarrayitem(box1, descr1, index2) is box4
 
+        h.invalidate_caches(rop.GUARD_TRUE, None, [])
+        assert h.getfield(box1, descr1) is box2
+        assert h.getarrayitem(box1, descr1, index1) is box2
+        assert h.getarrayitem(box1, descr1, index2) is box4
+
         h.invalidate_caches(
             rop.CALL_LOOPINVARIANT, FakeCallDescr(FakeEffektinfo.EF_LOOPINVARIANT), [])
 
diff --git a/pypy/jit/metainterp/test/test_jitdriver.py b/pypy/jit/metainterp/test/test_jitdriver.py
--- a/pypy/jit/metainterp/test/test_jitdriver.py
+++ b/pypy/jit/metainterp/test/test_jitdriver.py
@@ -10,57 +10,6 @@
 def getloc2(g):
     return "in jitdriver2, with g=%d" % g
 
-class JitDriverTests(object):
-    def test_on_compile(self):
-        called = {}
-        
-        class MyJitDriver(JitDriver):
-            def on_compile(self, logger, looptoken, operations, type, n, m):
-                called[(m, n, type)] = looptoken
-
-        driver = MyJitDriver(greens = ['n', 'm'], reds = ['i'])
-
-        def loop(n, m):
-            i = 0
-            while i < n + m:
-                driver.can_enter_jit(n=n, m=m, i=i)
-                driver.jit_merge_point(n=n, m=m, i=i)
-                i += 1
-
-        self.meta_interp(loop, [1, 4])
-        assert sorted(called.keys()) == [(4, 1, "loop")]
-        self.meta_interp(loop, [2, 4])
-        assert sorted(called.keys()) == [(4, 1, "loop"),
-                                         (4, 2, "loop")]
-
-    def test_on_compile_bridge(self):
-        called = {}
-        
-        class MyJitDriver(JitDriver):
-            def on_compile(self, logger, looptoken, operations, type, n, m):
-                called[(m, n, type)] = loop
-            def on_compile_bridge(self, logger, orig_token, operations, n):
-                assert 'bridge' not in called
-                called['bridge'] = orig_token
-
-        driver = MyJitDriver(greens = ['n', 'm'], reds = ['i'])
-
-        def loop(n, m):
-            i = 0
-            while i < n + m:
-                driver.can_enter_jit(n=n, m=m, i=i)
-                driver.jit_merge_point(n=n, m=m, i=i)
-                if i >= 4:
-                    i += 2
-                i += 1
-
-        self.meta_interp(loop, [1, 10])
-        assert sorted(called.keys()) == ['bridge', (10, 1, "loop")]
-
-
-class TestLLtypeSingle(JitDriverTests, LLJitMixin):
-    pass
-
 class MultipleJitDriversTests(object):
 
     def test_simple(self):
diff --git a/pypy/jit/metainterp/test/test_jitiface.py b/pypy/jit/metainterp/test/test_jitiface.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/metainterp/test/test_jitiface.py
@@ -0,0 +1,148 @@
+
+from pypy.rlib.jit import JitDriver, JitHookInterface
+from pypy.rlib import jit_hooks
+from pypy.jit.metainterp.test.support import LLJitMixin
+from pypy.jit.codewriter.policy import JitPolicy
+from pypy.jit.metainterp.jitprof import ABORT_FORCE_QUASIIMMUT
+from pypy.jit.metainterp.resoperation import rop
+from pypy.rpython.annlowlevel import hlstr
+
+class TestJitHookInterface(LLJitMixin):
+    def test_abort_quasi_immut(self):
+        reasons = []
+        
+        class MyJitIface(JitHookInterface):
+            def on_abort(self, reason, jitdriver, greenkey, greenkey_repr):
+                assert jitdriver is myjitdriver
+                assert len(greenkey) == 1
+                reasons.append(reason)
+                assert greenkey_repr == 'blah'
+
+        iface = MyJitIface()
+
+        myjitdriver = JitDriver(greens=['foo'], reds=['x', 'total'],
+                                get_printable_location=lambda *args: 'blah')
+
+        class Foo:
+            _immutable_fields_ = ['a?']
+            def __init__(self, a):
+                self.a = a
+        def f(a, x):
+            foo = Foo(a)
+            total = 0
+            while x > 0:
+                myjitdriver.jit_merge_point(foo=foo, x=x, total=total)
+                # read a quasi-immutable field out of a Constant
+                total += foo.a
+                foo.a += 1
+                x -= 1
+            return total
+        #
+        assert f(100, 7) == 721
+        res = self.meta_interp(f, [100, 7], policy=JitPolicy(iface))
+        assert res == 721
+        assert reasons == [ABORT_FORCE_QUASIIMMUT] * 2
+
+    def test_on_compile(self):
+        called = []
+        
+        class MyJitIface(JitHookInterface):
+            def after_compile(self, di):
+                called.append(("compile", di.greenkey[1].getint(),
+                               di.greenkey[0].getint(), di.type))
+
+            def before_compile(self, di):
+                called.append(("optimize", di.greenkey[1].getint(),
+                               di.greenkey[0].getint(), di.type))
+
+            #def before_optimize(self, jitdriver, logger, looptoken, oeprations,
+            #                   type, greenkey):
+            #    called.append(("trace", greenkey[1].getint(),
+            #                   greenkey[0].getint(), type))
+
+        iface = MyJitIface()
+
+        driver = JitDriver(greens = ['n', 'm'], reds = ['i'])
+
+        def loop(n, m):
+            i = 0
+            while i < n + m:
+                driver.can_enter_jit(n=n, m=m, i=i)
+                driver.jit_merge_point(n=n, m=m, i=i)
+                i += 1
+
+        self.meta_interp(loop, [1, 4], policy=JitPolicy(iface))
+        assert called == [#("trace", 4, 1, "loop"),
+                          ("optimize", 4, 1, "loop"),
+                          ("compile", 4, 1, "loop")]
+        self.meta_interp(loop, [2, 4], policy=JitPolicy(iface))
+        assert called == [#("trace", 4, 1, "loop"),
+                          ("optimize", 4, 1, "loop"),
+                          ("compile", 4, 1, "loop"),
+                          #("trace", 4, 2, "loop"),
+                          ("optimize", 4, 2, "loop"),
+                          ("compile", 4, 2, "loop")]
+
+    def test_on_compile_bridge(self):
+        called = []
+        
+        class MyJitIface(JitHookInterface):
+            def after_compile(self, di):
+                called.append("compile")
+
+            def after_compile_bridge(self, di):
+                called.append("compile_bridge")
+
+            def before_compile_bridge(self, di):
+                called.append("before_compile_bridge")
+            
+        driver = JitDriver(greens = ['n', 'm'], reds = ['i'])
+
+        def loop(n, m):
+            i = 0
+            while i < n + m:
+                driver.can_enter_jit(n=n, m=m, i=i)
+                driver.jit_merge_point(n=n, m=m, i=i)
+                if i >= 4:
+                    i += 2
+                i += 1
+
+        self.meta_interp(loop, [1, 10], policy=JitPolicy(MyJitIface()))
+        assert called == ["compile", "before_compile_bridge", "compile_bridge"]
+
+    def test_resop_interface(self):
+        driver = JitDriver(greens = [], reds = ['i'])
+
+        def loop(i):
+            while i > 0:
+                driver.jit_merge_point(i=i)
+                i -= 1
+
+        def main():
+            loop(1)
+            op = jit_hooks.resop_new(rop.INT_ADD,
+                                     [jit_hooks.boxint_new(3),
+                                      jit_hooks.boxint_new(4)],
+                                     jit_hooks.boxint_new(1))
+            assert hlstr(jit_hooks.resop_getopname(op)) == 'int_add'
+            assert jit_hooks.resop_getopnum(op) == rop.INT_ADD
+            box = jit_hooks.resop_getarg(op, 0)
+            assert jit_hooks.box_getint(box) == 3
+            box2 = jit_hooks.box_clone(box)
+            assert box2 != box
+            assert jit_hooks.box_getint(box2) == 3
+            assert not jit_hooks.box_isconst(box2)
+            box3 = jit_hooks.box_constbox(box)
+            assert jit_hooks.box_getint(box) == 3
+            assert jit_hooks.box_isconst(box3)
+            box4 = jit_hooks.box_nonconstbox(box)
+            assert not jit_hooks.box_isconst(box4)
+            box5 = jit_hooks.boxint_new(18)
+            jit_hooks.resop_setarg(op, 0, box5)
+            assert jit_hooks.resop_getarg(op, 0) == box5
+            box6 = jit_hooks.resop_getresult(op)
+            assert jit_hooks.box_getint(box6) == 1
+            jit_hooks.resop_setresult(op, box5)
+            assert jit_hooks.resop_getresult(op) == box5
+
+        self.meta_interp(main, [])
diff --git a/pypy/jit/metainterp/test/test_logger.py b/pypy/jit/metainterp/test/test_logger.py
--- a/pypy/jit/metainterp/test/test_logger.py
+++ b/pypy/jit/metainterp/test/test_logger.py
@@ -180,7 +180,7 @@
     def test_intro_loop(self):
         bare_logger = logger.Logger(self.make_metainterp_sd())
         output = capturing(bare_logger.log_loop, [], [], 1, "foo")
-        assert output.splitlines()[0] == "# Loop 1 : foo with 0 ops"
+        assert output.splitlines()[0] == "# Loop 1 () : foo with 0 ops"
         pure_parse(output)
 
     def test_intro_bridge(self):
diff --git a/pypy/jit/metainterp/test/test_loop.py b/pypy/jit/metainterp/test/test_loop.py
--- a/pypy/jit/metainterp/test/test_loop.py
+++ b/pypy/jit/metainterp/test/test_loop.py
@@ -756,7 +756,7 @@
         res = self.meta_interp(interpret, [1])
         assert res == interpret(1)
         # XXX it's unsure how many loops should be there
-        self.check_trace_count(3)
+        self.check_trace_count(2)
 
     def test_path_with_operations_not_from_start(self):
         jitdriver = JitDriver(greens = ['k'], reds = ['n', 'z'])
diff --git a/pypy/jit/metainterp/test/test_resoperation.py b/pypy/jit/metainterp/test/test_resoperation.py
--- a/pypy/jit/metainterp/test/test_resoperation.py
+++ b/pypy/jit/metainterp/test/test_resoperation.py
@@ -30,17 +30,17 @@
     cls = rop.opclasses[rop.rop.INT_ADD]
     assert issubclass(cls, rop.PlainResOp)
     assert issubclass(cls, rop.BinaryOp)
-    assert cls.getopnum.im_func(None) == rop.rop.INT_ADD
+    assert cls.getopnum.im_func(cls) == rop.rop.INT_ADD
 
     cls = rop.opclasses[rop.rop.CALL]
     assert issubclass(cls, rop.ResOpWithDescr)
     assert issubclass(cls, rop.N_aryOp)
-    assert cls.getopnum.im_func(None) == rop.rop.CALL
+    assert cls.getopnum.im_func(cls) == rop.rop.CALL
 
     cls = rop.opclasses[rop.rop.GUARD_TRUE]
     assert issubclass(cls, rop.GuardResOp)
     assert issubclass(cls, rop.UnaryOp)
-    assert cls.getopnum.im_func(None) == rop.rop.GUARD_TRUE
+    assert cls.getopnum.im_func(cls) == rop.rop.GUARD_TRUE
 
 def test_mixins_in_common_base():
     INT_ADD = rop.opclasses[rop.rop.INT_ADD]
diff --git a/pypy/jit/metainterp/test/test_virtual.py b/pypy/jit/metainterp/test/test_virtual.py
--- a/pypy/jit/metainterp/test/test_virtual.py
+++ b/pypy/jit/metainterp/test/test_virtual.py
@@ -880,7 +880,7 @@
                 elif op == 'j':
                     j = Int(0)
                 elif op == '+':
-                    sa += i.val * j.val
+                    sa += (i.val + 2) * (j.val + 2)
                 elif op == 'a':
                     i = Int(i.val + 1)
                 elif op == 'b':
@@ -902,6 +902,7 @@
         assert res == f(10)
         self.check_aborted_count(0)
         self.check_target_token_count(3)
+        self.check_resops(int_mul=2)
 
     def test_nested_loops_bridge(self):
         class Int(object):
diff --git a/pypy/jit/metainterp/test/test_virtualstate.py b/pypy/jit/metainterp/test/test_virtualstate.py
--- a/pypy/jit/metainterp/test/test_virtualstate.py
+++ b/pypy/jit/metainterp/test/test_virtualstate.py
@@ -5,7 +5,7 @@
      VArrayStateInfo, NotVirtualStateInfo, VirtualState, ShortBoxes
 from pypy.jit.metainterp.optimizeopt.optimizer import OptValue
 from pypy.jit.metainterp.history import BoxInt, BoxFloat, BoxPtr, ConstInt, ConstPtr
-from pypy.rpython.lltypesystem import lltype
+from pypy.rpython.lltypesystem import lltype, llmemory
 from pypy.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin, BaseTest, \
                                                            equaloplists, FakeDescrWithSnapshot
 from pypy.jit.metainterp.optimizeopt.intutils import IntBound
@@ -82,6 +82,13 @@
         assert isgeneral(value1, value2)
         assert not isgeneral(value2, value1)
 
+        assert isgeneral(OptValue(ConstInt(7)), OptValue(ConstInt(7)))
+        S = lltype.GcStruct('S')
+        foo = lltype.malloc(S)
+        fooref = lltype.cast_opaque_ptr(llmemory.GCREF, foo)
+        assert isgeneral(OptValue(ConstPtr(fooref)),
+                         OptValue(ConstPtr(fooref)))
+
     def test_field_matching_generalization(self):
         const1 = NotVirtualStateInfo(OptValue(ConstInt(1)))
         const2 = NotVirtualStateInfo(OptValue(ConstInt(2)))
diff --git a/pypy/jit/metainterp/test/test_ztranslation.py b/pypy/jit/metainterp/test/test_ztranslation.py
--- a/pypy/jit/metainterp/test/test_ztranslation.py
+++ b/pypy/jit/metainterp/test/test_ztranslation.py
@@ -3,7 +3,9 @@
 from pypy.jit.backend.llgraph import runner
 from pypy.rlib.jit import JitDriver, unroll_parameters, set_param
 from pypy.rlib.jit import PARAMETERS, dont_look_inside, hint
+from pypy.rlib.jit_hooks import boxint_new, resop_new, resop_getopnum
 from pypy.jit.metainterp.jitprof import Profiler
+from pypy.jit.metainterp.resoperation import rop
 from pypy.rpython.lltypesystem import lltype, llmemory
 
 class TranslationTest:
@@ -22,6 +24,7 @@
         # - jitdriver hooks
         # - two JITs
         # - string concatenation, slicing and comparison
+        # - jit hooks interface
 
         class Frame(object):
             _virtualizable2_ = ['l[*]']
@@ -91,7 +94,9 @@
             return f.i
         #
         def main(i, j):
-            return f(i) - f2(i+j, i, j)
+            op = resop_new(rop.INT_ADD, [boxint_new(3), boxint_new(5)],
+                           boxint_new(8))
+            return f(i) - f2(i+j, i, j) + resop_getopnum(op)
         res = ll_meta_interp(main, [40, 5], CPUClass=self.CPUClass,
                              type_system=self.type_system,
                              listops=True)
diff --git a/pypy/jit/metainterp/warmspot.py b/pypy/jit/metainterp/warmspot.py
--- a/pypy/jit/metainterp/warmspot.py
+++ b/pypy/jit/metainterp/warmspot.py
@@ -1,4 +1,5 @@
 import sys, py
+from pypy.tool.sourcetools import func_with_new_name
 from pypy.rpython.lltypesystem import lltype, llmemory
 from pypy.rpython.annlowlevel import llhelper, MixLevelHelperAnnotator,\
      cast_base_ptr_to_instance, hlstr
@@ -112,7 +113,7 @@
     return ll_meta_interp(function, args, backendopt=backendopt,
                           translate_support_code=True, **kwds)
 
-def _find_jit_marker(graphs, marker_name):
+def _find_jit_marker(graphs, marker_name, check_driver=True):
     results = []
     for graph in graphs:
         for block in graph.iterblocks():
@@ -120,8 +121,8 @@
                 op = block.operations[i]
                 if (op.opname == 'jit_marker' and
                     op.args[0].value == marker_name and
-                    (op.args[1].value is None or
-                    op.args[1].value.active)):   # the jitdriver
+                    (not check_driver or op.args[1].value is None or
+                     op.args[1].value.active)):   # the jitdriver
                     results.append((graph, block, i))
     return results
 
@@ -140,6 +141,9 @@
         "found several jit_merge_points in the same graph")
     return results
 
+def find_access_helpers(graphs):
+    return _find_jit_marker(graphs, 'access_helper', False)
+
 def locate_jit_merge_point(graph):
     [(graph, block, pos)] = find_jit_merge_points([graph])
     return block, pos, block.operations[pos]
@@ -206,6 +210,7 @@
         vrefinfo = VirtualRefInfo(self)
         self.codewriter.setup_vrefinfo(vrefinfo)
         #
+        self.hooks = policy.jithookiface
         self.make_virtualizable_infos()
         self.make_exception_classes()
         self.make_driverhook_graphs()
@@ -213,6 +218,7 @@
         self.rewrite_jit_merge_points(policy)
 
         verbose = False # not self.cpu.translate_support_code
+        self.rewrite_access_helpers()
         self.codewriter.make_jitcodes(verbose=verbose)
         self.rewrite_can_enter_jits()
         self.rewrite_set_param()
@@ -619,6 +625,24 @@
         graph = self.annhelper.getgraph(func, args_s, s_result)
         return self.annhelper.graph2delayed(graph, FUNC)
 
+    def rewrite_access_helpers(self):
+        ah = find_access_helpers(self.translator.graphs)
+        for graph, block, index in ah:
+            op = block.operations[index]
+            self.rewrite_access_helper(op)
+
+    def rewrite_access_helper(self, op):
+        ARGS = [arg.concretetype for arg in op.args[2:]]
+        RESULT = op.result.concretetype
+        FUNCPTR = lltype.Ptr(lltype.FuncType(ARGS, RESULT))
+        # make sure we make a copy of function so it no longer belongs
+        # to extregistry
+        func = op.args[1].value
+        func = func_with_new_name(func, func.func_name + '_compiled')
+        ptr = self.helper_func(FUNCPTR, func)
+        op.opname = 'direct_call'
+        op.args = [Constant(ptr, FUNCPTR)] + op.args[2:]
+
     def rewrite_jit_merge_points(self, policy):
         for jd in self.jitdrivers_sd:
             self.rewrite_jit_merge_point(jd, policy)
diff --git a/pypy/jit/metainterp/warmstate.py b/pypy/jit/metainterp/warmstate.py
--- a/pypy/jit/metainterp/warmstate.py
+++ b/pypy/jit/metainterp/warmstate.py
@@ -244,6 +244,11 @@
             if self.warmrunnerdesc.memory_manager:
                 self.warmrunnerdesc.memory_manager.max_retrace_guards = value
 
+    def set_param_max_unroll_loops(self, value):
+        if self.warmrunnerdesc:
+            if self.warmrunnerdesc.memory_manager:
+                self.warmrunnerdesc.memory_manager.max_unroll_loops = value
+
     def disable_noninlinable_function(self, greenkey):
         cell = self.jit_cell_at_key(greenkey)
         cell.dont_trace_here = True
@@ -596,20 +601,6 @@
                 return fn(*greenargs)
         self.should_unroll_one_iteration = should_unroll_one_iteration
         
-        if hasattr(jd.jitdriver, 'on_compile'):
-            def on_compile(logger, token, operations, type, greenkey):
-                greenargs = unwrap_greenkey(greenkey)
-                return jd.jitdriver.on_compile(logger, token, operations, type,
-                                               *greenargs)
-            def on_compile_bridge(logger, orig_token, operations, n):
-                return jd.jitdriver.on_compile_bridge(logger, orig_token,
-                                                      operations, n)
-            jd.on_compile = on_compile
-            jd.on_compile_bridge = on_compile_bridge
-        else:
-            jd.on_compile = lambda *args: None
-            jd.on_compile_bridge = lambda *args: None
-
         redargtypes = ''.join([kind[0] for kind in jd.red_args_types])
 
         def get_assembler_token(greenkey):
diff --git a/pypy/jit/tool/oparser.py b/pypy/jit/tool/oparser.py
--- a/pypy/jit/tool/oparser.py
+++ b/pypy/jit/tool/oparser.py
@@ -89,11 +89,18 @@
                 assert typ == 'class'
                 return self.model.ConstObj(ootype.cast_to_object(obj))
 
-    def get_descr(self, poss_descr):
+    def get_descr(self, poss_descr, allow_invent):
         if poss_descr.startswith('<'):
             return None
-        else:
+        try:
             return self._consts[poss_descr]
+        except KeyError:
+            if allow_invent:
+                int(poss_descr)
+                token = self.model.JitCellToken()
+                tt = self.model.TargetToken(token)
+                self._consts[poss_descr] = tt
+                return tt
 
     def box_for_var(self, elem):
         try:
@@ -186,7 +193,8 @@
 
             poss_descr = allargs[-1].strip()
             if poss_descr.startswith('descr='):
-                descr = self.get_descr(poss_descr[len('descr='):])
+                descr = self.get_descr(poss_descr[len('descr='):],
+                                       opname == 'label')
                 allargs = allargs[:-1]
             for arg in allargs:
                 arg = arg.strip()
diff --git a/pypy/jit/tool/oparser_model.py b/pypy/jit/tool/oparser_model.py
--- a/pypy/jit/tool/oparser_model.py
+++ b/pypy/jit/tool/oparser_model.py
@@ -6,7 +6,7 @@
         from pypy.jit.metainterp.history import TreeLoop, JitCellToken
         from pypy.jit.metainterp.history import Box, BoxInt, BoxFloat
         from pypy.jit.metainterp.history import ConstInt, ConstObj, ConstPtr, ConstFloat
-        from pypy.jit.metainterp.history import BasicFailDescr
+        from pypy.jit.metainterp.history import BasicFailDescr, TargetToken
         from pypy.jit.metainterp.typesystem import llhelper
 
         from pypy.jit.metainterp.history import get_const_ptr_for_string
@@ -42,6 +42,10 @@
         class JitCellToken(object):
             I_am_a_descr = True
 
+        class TargetToken(object):
+            def __init__(self, jct):
+                pass
+
         class BasicFailDescr(object):
             I_am_a_descr = True
 
diff --git a/pypy/jit/tool/pypytrace.vim b/pypy/jit/tool/pypytrace.vim
--- a/pypy/jit/tool/pypytrace.vim
+++ b/pypy/jit/tool/pypytrace.vim
@@ -19,6 +19,7 @@
 syn match pypyLoopArgs    '^[[].*'
 syn match pypyLoopStart   '^#.*'
 syn match pypyDebugMergePoint  '^debug_merge_point(.\+)'
+syn match pypyLogBoundary '[[][0-9a-f]\+[]] \([{].\+\|.\+[}]\)$'
 
 hi def link pypyLoopStart   Structure
 "hi def link pypyLoopArgs    PreProc
@@ -29,3 +30,4 @@
 hi def link pypyNumber      Number
 hi def link pypyDescr       PreProc
 hi def link pypyDescrField  Label
+hi def link pypyLogBoundary Statement
diff --git a/pypy/jit/tool/test/test_oparser.py b/pypy/jit/tool/test/test_oparser.py
--- a/pypy/jit/tool/test/test_oparser.py
+++ b/pypy/jit/tool/test/test_oparser.py
@@ -4,7 +4,8 @@
 
 from pypy.jit.tool.oparser import parse, OpParser
 from pypy.jit.metainterp.resoperation import rop
-from pypy.jit.metainterp.history import AbstractDescr, BoxInt, JitCellToken
+from pypy.jit.metainterp.history import AbstractDescr, BoxInt, JitCellToken,\
+     TargetToken
 
 class BaseTestOparser(object):
 
@@ -243,6 +244,16 @@
         b = loop.getboxes()
         assert isinstance(b.sum0, BoxInt)
 
+    def test_label(self):
+        x = """
+        [i0]
+        label(i0, descr=1)
+        jump(i0, descr=1)
+        """
+        loop = self.parse(x)
+        assert loop.operations[0].getdescr() is loop.operations[1].getdescr()
+        assert isinstance(loop.operations[0].getdescr(), TargetToken)
+
 
 class ForbiddenModule(object):
     def __init__(self, name, old_mod):
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -588,10 +588,18 @@
         raises(UnicodeDecodeError, '+3ADYAA-'.decode, 'utf-7')
 
     def test_utf_16_encode_decode(self):
-        import codecs
+        import codecs, sys
         x = u'123abc'
-        assert codecs.getencoder('utf-16')(x) == ('\xff\xfe1\x002\x003\x00a\x00b\x00c\x00', 6)
-        assert codecs.getdecoder('utf-16')('\xff\xfe1\x002\x003\x00a\x00b\x00c\x00') == (x, 14)
+        if sys.byteorder == 'big':
+            assert codecs.getencoder('utf-16')(x) == (
+                    '\xfe\xff\x001\x002\x003\x00a\x00b\x00c', 6)
+            assert codecs.getdecoder('utf-16')(
+                    '\xfe\xff\x001\x002\x003\x00a\x00b\x00c') == (x, 14)
+        else:
+            assert codecs.getencoder('utf-16')(x) == (
+                    '\xff\xfe1\x002\x003\x00a\x00b\x00c\x00', 6)
+            assert codecs.getdecoder('utf-16')(
+                    '\xff\xfe1\x002\x003\x00a\x00b\x00c\x00') == (x, 14)
 
     def test_unicode_escape(self):        
         assert u'\\'.encode('unicode-escape') == '\\\\'
diff --git a/pypy/module/_lsprof/interp_lsprof.py b/pypy/module/_lsprof/interp_lsprof.py
--- a/pypy/module/_lsprof/interp_lsprof.py
+++ b/pypy/module/_lsprof/interp_lsprof.py
@@ -19,8 +19,9 @@
 # cpu affinity settings
 
 srcdir = py.path.local(pypydir).join('translator', 'c', 'src')
-eci = ExternalCompilationInfo(separate_module_files=
-                              [srcdir.join('profiling.c')])
+eci = ExternalCompilationInfo(
+    separate_module_files=[srcdir.join('profiling.c')],
+    export_symbols=['pypy_setup_profiling', 'pypy_teardown_profiling'])
                                                      
 c_setup_profiling = rffi.llexternal('pypy_setup_profiling',
                                   [], lltype.Void,
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -23,6 +23,7 @@
 from pypy.interpreter.function import StaticMethod
 from pypy.objspace.std.sliceobject import W_SliceObject
 from pypy.module.__builtin__.descriptor import W_Property
+from pypy.module.__builtin__.interp_memoryview import W_MemoryView
 from pypy.rlib.entrypoint import entrypoint
 from pypy.rlib.unroll import unrolling_iterable
 from pypy.rlib.objectmodel import specialize
@@ -387,6 +388,8 @@
         "Float": "space.w_float",
         "Long": "space.w_long",
         "Complex": "space.w_complex",
+        "ByteArray": "space.w_bytearray",
+        "MemoryView": "space.gettypeobject(W_MemoryView.typedef)",
         "BaseObject": "space.w_object",
         'None': 'space.type(space.w_None)',
         'NotImplemented': 'space.type(space.w_NotImplemented)',
diff --git a/pypy/module/cpyext/buffer.py b/pypy/module/cpyext/buffer.py
--- a/pypy/module/cpyext/buffer.py
+++ b/pypy/module/cpyext/buffer.py
@@ -1,6 +1,36 @@
+from pypy.interpreter.error import OperationError
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.module.cpyext.api import (
     cpython_api, CANNOT_FAIL, Py_buffer)
+from pypy.module.cpyext.pyobject import PyObject
+
+ at cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
+def PyObject_CheckBuffer(space, w_obj):
+    """Return 1 if obj supports the buffer interface otherwise 0."""
+    return 0  # the bf_getbuffer field is never filled by cpyext
+
+ at cpython_api([PyObject, lltype.Ptr(Py_buffer), rffi.INT_real],
+             rffi.INT_real, error=-1)
+def PyObject_GetBuffer(space, w_obj, view, flags):
+    """Export obj into a Py_buffer, view.  These arguments must
+    never be NULL.  The flags argument is a bit field indicating what
+    kind of buffer the caller is prepared to deal with and therefore what
+    kind of buffer the exporter is allowed to return.  The buffer interface
+    allows for complicated memory sharing possibilities, but some caller may
+    not be able to handle all the complexity but may want to see if the
+    exporter will let them take a simpler view to its memory.
+
+    Some exporters may not be able to share memory in every possible way and
+    may need to raise errors to signal to some consumers that something is
+    just not possible. These errors should be a BufferError unless
+    there is another error that is actually causing the problem. The
+    exporter can use flags information to simplify how much of the
+    Py_buffer structure is filled in with non-default values and/or
+    raise an error if the object can't support a simpler view of its memory.
+
+    0 is returned on success and -1 on error."""
+    raise OperationError(space.w_TypeError, space.wrap(
+            'PyPy does not yet implement the new buffer interface'))
 
 @cpython_api([lltype.Ptr(Py_buffer), lltype.Char], rffi.INT_real, error=CANNOT_FAIL)
 def PyBuffer_IsContiguous(space, view, fortran):
diff --git a/pypy/module/cpyext/include/object.h b/pypy/module/cpyext/include/object.h
--- a/pypy/module/cpyext/include/object.h
+++ b/pypy/module/cpyext/include/object.h
@@ -123,10 +123,6 @@
 typedef Py_ssize_t (*segcountproc)(PyObject *, Py_ssize_t *);
 typedef Py_ssize_t (*charbufferproc)(PyObject *, Py_ssize_t, char **);
 
-typedef int (*objobjproc)(PyObject *, PyObject *);
-typedef int (*visitproc)(PyObject *, void *);
-typedef int (*traverseproc)(PyObject *, visitproc, void *);
-
 /* Py3k buffer interface */
 typedef struct bufferinfo {
     void *buf;
@@ -153,6 +149,41 @@
 typedef int (*getbufferproc)(PyObject *, Py_buffer *, int);
 typedef void (*releasebufferproc)(PyObject *, Py_buffer *);
 
+    /* Flags for getting buffers */
+#define PyBUF_SIMPLE 0
+#define PyBUF_WRITABLE 0x0001
+/*  we used to include an E, backwards compatible alias  */
+#define PyBUF_WRITEABLE PyBUF_WRITABLE
+#define PyBUF_FORMAT 0x0004
+#define PyBUF_ND 0x0008
+#define PyBUF_STRIDES (0x0010 | PyBUF_ND)
+#define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)
+#define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)
+#define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)
+#define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)
+
+#define PyBUF_CONTIG (PyBUF_ND | PyBUF_WRITABLE)
+#define PyBUF_CONTIG_RO (PyBUF_ND)
+
+#define PyBUF_STRIDED (PyBUF_STRIDES | PyBUF_WRITABLE)
+#define PyBUF_STRIDED_RO (PyBUF_STRIDES)
+
+#define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_WRITABLE | PyBUF_FORMAT)
+#define PyBUF_RECORDS_RO (PyBUF_STRIDES | PyBUF_FORMAT)
+
+#define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_WRITABLE | PyBUF_FORMAT)
+#define PyBUF_FULL_RO (PyBUF_INDIRECT | PyBUF_FORMAT)
+
+
+#define PyBUF_READ  0x100
+#define PyBUF_WRITE 0x200
+#define PyBUF_SHADOW 0x400
+/* end Py3k buffer interface */
+
+typedef int (*objobjproc)(PyObject *, PyObject *);
+typedef int (*visitproc)(PyObject *, void *);
+typedef int (*traverseproc)(PyObject *, visitproc, void *);
+
 typedef struct {
 	/* For numbers without flag bit Py_TPFLAGS_CHECKTYPES set, all
 	   arguments are guaranteed to be of the object's type (modulo
diff --git a/pypy/module/cpyext/include/pystate.h b/pypy/module/cpyext/include/pystate.h
--- a/pypy/module/cpyext/include/pystate.h
+++ b/pypy/module/cpyext/include/pystate.h
@@ -5,7 +5,7 @@
 struct _is; /* Forward */
 
 typedef struct _is {
-    int _foo;
+    struct _is *next;
 } PyInterpreterState;
 
 typedef struct _ts {
diff --git a/pypy/module/cpyext/methodobject.py b/pypy/module/cpyext/methodobject.py
--- a/pypy/module/cpyext/methodobject.py
+++ b/pypy/module/cpyext/methodobject.py
@@ -58,6 +58,7 @@
 class W_PyCFunctionObject(Wrappable):
     def __init__(self, space, ml, w_self, w_module=None):
         self.ml = ml
+        self.name = rffi.charp2str(self.ml.c_ml_name)
         self.w_self = w_self
         self.w_module = w_module
 
@@ -69,7 +70,7 @@
         flags &= ~(METH_CLASS | METH_STATIC | METH_COEXIST)
         if space.is_true(w_kw) and not flags & METH_KEYWORDS:
             raise OperationError(space.w_TypeError, space.wrap(
-                rffi.charp2str(self.ml.c_ml_name) + "() takes no keyword arguments"))
+                self.name + "() takes no keyword arguments"))
 
         func = rffi.cast(PyCFunction, self.ml.c_ml_meth)
         length = space.int_w(space.len(w_args))
@@ -80,13 +81,12 @@
             if length == 0:
                 return generic_cpy_call(space, func, w_self, None)
             raise OperationError(space.w_TypeError, space.wrap(
-                rffi.charp2str(self.ml.c_ml_name) + "() takes no arguments"))
+                self.name + "() takes no arguments"))
         elif flags & METH_O:
             if length != 1:
                 raise OperationError(space.w_TypeError,
                         space.wrap("%s() takes exactly one argument (%d given)" %  (
-                        rffi.charp2str(self.ml.c_ml_name), 
-                        length)))
+                        self.name, length)))
             w_arg = space.getitem(w_args, space.wrap(0))
             return generic_cpy_call(space, func, w_self, w_arg)
         elif flags & METH_VARARGS:
@@ -199,6 +199,7 @@
     __call__ = interp2app(cfunction_descr_call),
     __doc__ = GetSetProperty(W_PyCFunctionObject.get_doc),
     __module__ = interp_attrproperty_w('w_module', cls=W_PyCFunctionObject),
+    __name__ = interp_attrproperty('name', cls=W_PyCFunctionObject),
     )
 W_PyCFunctionObject.typedef.acceptable_as_base_class = False
 
diff --git a/pypy/module/cpyext/pystate.py b/pypy/module/cpyext/pystate.py
--- a/pypy/module/cpyext/pystate.py
+++ b/pypy/module/cpyext/pystate.py
@@ -2,7 +2,10 @@
     cpython_api, generic_cpy_call, CANNOT_FAIL, CConfig, cpython_struct)
 from pypy.rpython.lltypesystem import rffi, lltype
 
-PyInterpreterState = lltype.Ptr(cpython_struct("PyInterpreterState", ()))
+PyInterpreterStateStruct = lltype.ForwardReference()
+PyInterpreterState = lltype.Ptr(PyInterpreterStateStruct)
+cpython_struct(
+    "PyInterpreterState", [('next', PyInterpreterState)], PyInterpreterStateStruct)
 PyThreadState = lltype.Ptr(cpython_struct("PyThreadState", [('interp', PyInterpreterState)]))
 
 @cpython_api([], PyThreadState, error=CANNOT_FAIL)
@@ -54,7 +57,8 @@
 
 class InterpreterState(object):
     def __init__(self, space):
-        self.interpreter_state = lltype.malloc(PyInterpreterState.TO, flavor='raw', immortal=True)
+        self.interpreter_state = lltype.malloc(
+            PyInterpreterState.TO, flavor='raw', zero=True, immortal=True)
 
     def new_thread_state(self):
         capsule = ThreadStateCapsule()
diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -34,141 +34,6 @@
 
 @cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
 def PyObject_CheckBuffer(space, obj):
-    """Return 1 if obj supports the buffer interface otherwise 0."""
-    raise NotImplementedError
-
- at cpython_api([PyObject, Py_buffer, rffi.INT_real], rffi.INT_real, error=-1)
-def PyObject_GetBuffer(space, obj, view, flags):
-    """Export obj into a Py_buffer, view.  These arguments must
-    never be NULL.  The flags argument is a bit field indicating what
-    kind of buffer the caller is prepared to deal with and therefore what
-    kind of buffer the exporter is allowed to return.  The buffer interface
-    allows for complicated memory sharing possibilities, but some caller may
-    not be able to handle all the complexity but may want to see if the
-    exporter will let them take a simpler view to its memory.
-
-    Some exporters may not be able to share memory in every possible way and
-    may need to raise errors to signal to some consumers that something is
-    just not possible. These errors should be a BufferError unless
-    there is another error that is actually causing the problem. The
-    exporter can use flags information to simplify how much of the
-    Py_buffer structure is filled in with non-default values and/or
-    raise an error if the object can't support a simpler view of its memory.
-
-    0 is returned on success and -1 on error.
-
-    The following table gives possible values to the flags arguments.
-
-    Flag
-
-    Description
-
-    PyBUF_SIMPLE
-
-    This is the default flag state.  The returned
-    buffer may or may not have writable memory.  The
-    format of the data will be assumed to be unsigned
-    bytes.  This is a "stand-alone" flag constant. It
-    never needs to be '|'d to the others. The exporter
-    will raise an error if it cannot provide such a
-    contiguous buffer of bytes.
-
-    PyBUF_WRITABLE
-
-    The returned buffer must be writable.  If it is
-    not writable, then raise an error.
-
-    PyBUF_STRIDES
-
-    This implies PyBUF_ND. The returned
-    buffer must provide strides information (i.e. the
-    strides cannot be NULL). This would be used when
-    the consumer can handle strided, discontiguous
-    arrays.  Handling strides automatically assumes
-    you can handle shape.  The exporter can raise an
-    error if a strided representation of the data is
-    not possible (i.e. without the suboffsets).
-
-    PyBUF_ND
-
-    The returned buffer must provide shape
-    information. The memory will be assumed C-style
-    contiguous (last dimension varies the
-    fastest). The exporter may raise an error if it
-    cannot provide this kind of contiguous buffer. If
-    this is not given then shape will be NULL.
-
-    PyBUF_C_CONTIGUOUS
-    PyBUF_F_CONTIGUOUS
-    PyBUF_ANY_CONTIGUOUS
-
-    These flags indicate that the contiguity returned
-    buffer must be respectively, C-contiguous (last
-    dimension varies the fastest), Fortran contiguous
-    (first dimension varies the fastest) or either
-    one.  All of these flags imply
-    PyBUF_STRIDES and guarantee that the
-    strides buffer info structure will be filled in
-    correctly.
-
-    PyBUF_INDIRECT
-
-    This flag indicates the returned buffer must have
-    suboffsets information (which can be NULL if no
-    suboffsets are needed).  This can be used when
-    the consumer can handle indirect array
-    referencing implied by these suboffsets. This
-    implies PyBUF_STRIDES.
-
-    PyBUF_FORMAT
-
-    The returned buffer must have true format
-    information if this flag is provided. This would
-    be used when the consumer is going to be checking
-    for what 'kind' of data is actually stored. An
-    exporter should always be able to provide this
-    information if requested. If format is not
-    explicitly requested then the format must be
-    returned as NULL (which means 'B', or
-    unsigned bytes)
-
-    PyBUF_STRIDED
-
-    This is equivalent to (PyBUF_STRIDES |
-    PyBUF_WRITABLE).
-
-    PyBUF_STRIDED_RO
-
-    This is equivalent to (PyBUF_STRIDES).
-
-    PyBUF_RECORDS
-
-    This is equivalent to (PyBUF_STRIDES |
-    PyBUF_FORMAT | PyBUF_WRITABLE).
-
-    PyBUF_RECORDS_RO
-
-    This is equivalent to (PyBUF_STRIDES |
-    PyBUF_FORMAT).
-
-    PyBUF_FULL
-
-    This is equivalent to (PyBUF_INDIRECT |
-    PyBUF_FORMAT | PyBUF_WRITABLE).
-
-    PyBUF_FULL_RO
-
-    This is equivalent to (PyBUF_INDIRECT |
-    PyBUF_FORMAT).
-
-    PyBUF_CONTIG
-
-    This is equivalent to (PyBUF_ND |
-    PyBUF_WRITABLE).
-
-    PyBUF_CONTIG_RO
-
-    This is equivalent to (PyBUF_ND)."""
     raise NotImplementedError
 
 @cpython_api([rffi.CCHARP], Py_ssize_t, error=CANNOT_FAIL)
diff --git a/pypy/module/cpyext/test/test_methodobject.py b/pypy/module/cpyext/test/test_methodobject.py
--- a/pypy/module/cpyext/test/test_methodobject.py
+++ b/pypy/module/cpyext/test/test_methodobject.py
@@ -63,6 +63,7 @@
              ),
             ])
         assert mod.getarg_O(1) == 1
+        assert mod.getarg_O.__name__ == "getarg_O"
         raises(TypeError, mod.getarg_O)
         raises(TypeError, mod.getarg_O, 1, 1)
 
diff --git a/pypy/module/cpyext/test/test_pystate.py b/pypy/module/cpyext/test/test_pystate.py
--- a/pypy/module/cpyext/test/test_pystate.py
+++ b/pypy/module/cpyext/test/test_pystate.py
@@ -37,6 +37,7 @@
     def test_thread_state_interp(self, space, api):
         ts = api.PyThreadState_Get()
         assert ts.c_interp == api.PyInterpreterState_Head()
+        assert ts.c_interp.c_next == nullptr(PyInterpreterState.TO)
 
     def test_basic_threadstate_dance(self, space, api):
         # Let extension modules call these functions,
diff --git a/pypy/module/micronumpy/__init__.py b/pypy/module/micronumpy/__init__.py
--- a/pypy/module/micronumpy/__init__.py
+++ b/pypy/module/micronumpy/__init__.py
@@ -4,11 +4,12 @@
 class PyPyModule(MixedModule):
     interpleveldefs = {
         'debug_repr': 'interp_extras.debug_repr',
+        'remove_invalidates': 'interp_extras.remove_invalidates',
     }
     appleveldefs = {}
 
 class Module(MixedModule):
-    applevel_name = 'numpypy'
+    applevel_name = '_numpypy'
 
     submodules = {
         'pypy': PyPyModule
@@ -47,6 +48,7 @@
         'int_': 'interp_boxes.W_LongBox',
         'inexact': 'interp_boxes.W_InexactBox',
         'floating': 'interp_boxes.W_FloatingBox',
+        'float_': 'interp_boxes.W_Float64Box',
         'float32': 'interp_boxes.W_Float32Box',
         'float64': 'interp_boxes.W_Float64Box',
     }
diff --git a/pypy/module/micronumpy/app_numpy.py b/pypy/module/micronumpy/app_numpy.py
--- a/pypy/module/micronumpy/app_numpy.py
+++ b/pypy/module/micronumpy/app_numpy.py
@@ -1,6 +1,6 @@
 import math
 
-import numpypy
+import _numpypy
 
 
 inf = float("inf")
@@ -14,30 +14,54 @@
     return mean(a)
 
 def identity(n, dtype=None):
-    a = numpypy.zeros((n,n), dtype=dtype)
+    a = _numpypy.zeros((n,n), dtype=dtype)
     for i in range(n):
         a[i][i] = 1
     return a
 
-def mean(a):
+def mean(a, axis=None):
     if not hasattr(a, "mean"):
-        a = numpypy.array(a)
-    return a.mean()
+        a = _numpypy.array(a)
+    return a.mean(axis)
 
-def sum(a):
+def sum(a,axis=None):
+    '''sum(a, axis=None)
+    Sum of array elements over a given axis.
+    
+    Parameters
+    ----------
+    a : array_like
+        Elements to sum.
+    axis : integer, optional
+        Axis over which the sum is taken. By default `axis` is None,
+        and all elements are summed.
+    
+    Returns
+    -------
+    sum_along_axis : ndarray
+        An array with the same shape as `a`, with the specified
+        axis removed.   If `a` is a 0-d array, or if `axis` is None, a scalar
+        is returned.  If an output array is specified, a reference to
+        `out` is returned.
+    
+    See Also
+    --------
+    ndarray.sum : Equivalent method.
+    '''
+    # TODO: add to doc (once it's implemented): cumsum : Cumulative sum of array elements.
     if not hasattr(a, "sum"):
-        a = numpypy.array(a)
-    return a.sum()
+        a = _numpypy.array(a)
+    return a.sum(axis)
 
-def min(a):
+def min(a, axis=None):
     if not hasattr(a, "min"):
-        a = numpypy.array(a)
-    return a.min()
+        a = _numpypy.array(a)
+    return a.min(axis)
 
-def max(a):
+def max(a, axis=None):
     if not hasattr(a, "max"):
-        a = numpypy.array(a)
-    return a.max()
+        a = _numpypy.array(a)
+    return a.max(axis)
 
 def arange(start, stop=None, step=1, dtype=None):
     '''arange([start], stop[, step], dtype=None)
@@ -47,9 +71,9 @@
         stop = start
         start = 0
     if dtype is None:
-        test = numpypy.array([start, stop, step, 0])
+        test = _numpypy.array([start, stop, step, 0])
         dtype = test.dtype
-    arr = numpypy.zeros(int(math.ceil((stop - start) / step)), dtype=dtype)
+    arr = _numpypy.zeros(int(math.ceil((stop - start) / step)), dtype=dtype)
     i = start
     for j in range(arr.size):
         arr[j] = i
@@ -90,5 +114,5 @@
     you should assign the new shape to the shape attribute of the array
 '''
     if not hasattr(a, 'reshape'):
-        a = numpypy.array(a)
+        a = _numpypy.array(a)
     return a.reshape(shape)
diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py
--- a/pypy/module/micronumpy/compile.py
+++ b/pypy/module/micronumpy/compile.py
@@ -373,13 +373,17 @@
 
     def execute(self, interp):
         if self.name in SINGLE_ARG_FUNCTIONS:
-            if len(self.args) != 1:
+            if len(self.args) != 1 and self.name != 'sum':
                 raise ArgumentMismatch
             arr = self.args[0].execute(interp)
             if not isinstance(arr, BaseArray):
                 raise ArgumentNotAnArray
             if self.name == "sum":
-                w_res = arr.descr_sum(interp.space)
+                if len(self.args)>1:
+                    w_res = arr.descr_sum(interp.space,
+                                          self.args[1].execute(interp))
+                else:
+                    w_res = arr.descr_sum(interp.space)
             elif self.name == "prod":
                 w_res = arr.descr_prod(interp.space)
             elif self.name == "max":
@@ -430,7 +434,7 @@
     ('\]', 'array_right'),
     ('(->)|[\+\-\*\/]', 'operator'),
     ('=', 'assign'),
-    (',', 'coma'),
+    (',', 'comma'),
     ('\|', 'pipe'),
     ('\(', 'paren_left'),
     ('\)', 'paren_right'),
@@ -518,7 +522,7 @@
         return SliceConstant(start, stop, step)
 
 
-    def parse_expression(self, tokens):
+    def parse_expression(self, tokens, accept_comma=False):
         stack = []
         while tokens.remaining():
             token = tokens.pop()
@@ -538,9 +542,13 @@
                 stack.append(RangeConstant(tokens.pop().v))
                 end = tokens.pop()
                 assert end.name == 'pipe'
+            elif accept_comma and token.name == 'comma':
+                continue
             else:
                 tokens.push()
                 break
+        if accept_comma:
+            return stack
         stack.reverse()
         lhs = stack.pop()
         while stack:
@@ -554,10 +562,7 @@
         args = []
         tokens.pop() # lparen
         while tokens.get(0).name != 'paren_right':
-            if tokens.get(0).name == 'coma':
-                tokens.pop()
-                continue
-            args.append(self.parse_expression(tokens))
+            args += self.parse_expression(tokens, accept_comma=True)
         return FunctionCall(name, args)
 
     def parse_array_const(self, tokens):
@@ -573,7 +578,7 @@
             token = tokens.pop()
             if token.name == 'array_right':
                 return elems
-            assert token.name == 'coma'
+            assert token.name == 'comma'
 
     def parse_statement(self, tokens):
         if (tokens.get(0).name == 'identifier' and
diff --git a/pypy/module/micronumpy/interp_boxes.py b/pypy/module/micronumpy/interp_boxes.py
--- a/pypy/module/micronumpy/interp_boxes.py
+++ b/pypy/module/micronumpy/interp_boxes.py
@@ -78,6 +78,7 @@
     descr_sub = _binop_impl("subtract")
     descr_mul = _binop_impl("multiply")
     descr_div = _binop_impl("divide")
+    descr_pow = _binop_impl("power")
     descr_eq = _binop_impl("equal")
     descr_ne = _binop_impl("not_equal")
     descr_lt = _binop_impl("less")
@@ -170,6 +171,7 @@
     __sub__ = interp2app(W_GenericBox.descr_sub),
     __mul__ = interp2app(W_GenericBox.descr_mul),
     __div__ = interp2app(W_GenericBox.descr_div),
+    __pow__ = interp2app(W_GenericBox.descr_pow),
 
     __radd__ = interp2app(W_GenericBox.descr_radd),
     __rsub__ = interp2app(W_GenericBox.descr_rsub),
@@ -245,6 +247,7 @@
     long_name = "int64"
 W_LongBox.typedef = TypeDef(long_name, (W_SignedIntegerBox.typedef, int_typedef,),
     __module__ = "numpypy",
+   __new__ = interp2app(W_LongBox.descr__new__.im_func),
 )
 
 W_ULongBox.typedef = TypeDef("u" + long_name, W_UnsignedIntegerBox.typedef,
diff --git a/pypy/module/micronumpy/interp_extras.py b/pypy/module/micronumpy/interp_extras.py
--- a/pypy/module/micronumpy/interp_extras.py
+++ b/pypy/module/micronumpy/interp_extras.py
@@ -5,3 +5,11 @@
 @unwrap_spec(array=BaseArray)
 def debug_repr(space, array):
     return space.wrap(array.find_sig().debug_repr())
+
+ at unwrap_spec(array=BaseArray)
+def remove_invalidates(space, array):
+    """ Array modification will no longer invalidate any of it's
+    potential children. Use only for performance debugging
+    """
+    del array.invalidates[:]
+    return space.w_None
diff --git a/pypy/module/micronumpy/interp_iter.py b/pypy/module/micronumpy/interp_iter.py
--- a/pypy/module/micronumpy/interp_iter.py
+++ b/pypy/module/micronumpy/interp_iter.py
@@ -1,19 +1,20 @@
 
 from pypy.rlib import jit
 from pypy.rlib.objectmodel import instantiate
-from pypy.module.micronumpy.strides import calculate_broadcast_strides
+from pypy.module.micronumpy.strides import calculate_broadcast_strides,\
+     calculate_slice_strides
 
-# Iterators for arrays
-# --------------------
-# all those iterators with the exception of BroadcastIterator iterate over the
-# entire array in C order (the last index changes the fastest). This will
-# yield all elements. Views iterate over indices and look towards strides and
-# backstrides to find the correct position. Notably the offset between
-# x[..., i + 1] and x[..., i] will be strides[-1]. Offset between
-# x[..., k + 1, 0] and x[..., k, i_max] will be backstrides[-2] etc.
+class BaseTransform(object):
+    pass
 
-# BroadcastIterator works like that, but for indexes that don't change source
-# in the original array, strides[i] == backstrides[i] == 0
+class ViewTransform(BaseTransform):
+    def __init__(self, chunks):
+        # 4-tuple specifying slicing
+        self.chunks = chunks
+
+class BroadcastTransform(BaseTransform):
+    def __init__(self, res_shape):
+        self.res_shape = res_shape
 
 class BaseIterator(object):
     def next(self, shapelen):
@@ -22,6 +23,15 @@
     def done(self):
         raise NotImplementedError
 
+    def apply_transformations(self, arr, transformations):
+        v = self
+        for transform in transformations:
+            v = v.transform(arr, transform)
+        return v
+
+    def transform(self, arr, t):
+        raise NotImplementedError
+
 class ArrayIterator(BaseIterator):
     def __init__(self, size):
         self.offset = 0
@@ -36,6 +46,10 @@
     def done(self):
         return self.offset >= self.size
 
+    def transform(self, arr, t):
+        return ViewIterator(arr.start, arr.strides, arr.backstrides,
+                            arr.shape).transform(arr, t)
+
 class OneDimIterator(BaseIterator):
     def __init__(self, start, step, stop):
         self.offset = start
@@ -52,26 +66,29 @@
     def done(self):
         return self.offset == self.size
 
-def view_iter_from_arr(arr):
-    return ViewIterator(arr.start, arr.strides, arr.backstrides, arr.shape)
-
 class ViewIterator(BaseIterator):
-    def __init__(self, start, strides, backstrides, shape, res_shape=None):
+    def __init__(self, start, strides, backstrides, shape):
         self.offset  = start
         self._done   = False
-        if res_shape is not None and res_shape != shape:
-            r = calculate_broadcast_strides(strides, backstrides,
-                                            shape, res_shape)
-            self.strides, self.backstrides = r
-            self.res_shape = res_shape
-        else:
-            self.strides = strides
-            self.backstrides = backstrides
-            self.res_shape = shape
+        self.strides = strides
+        self.backstrides = backstrides
+        self.res_shape = shape
         self.indices = [0] * len(self.res_shape)
 
+    def transform(self, arr, t):
+        if isinstance(t, BroadcastTransform):
+            r = calculate_broadcast_strides(self.strides, self.backstrides,
+                                            self.res_shape, t.res_shape)
+            return ViewIterator(self.offset, r[0], r[1], t.res_shape)
+        elif isinstance(t, ViewTransform):
+            r = calculate_slice_strides(self.res_shape, self.offset,
+                                        self.strides,
+                                        self.backstrides, t.chunks)
+            return ViewIterator(r[1], r[2], r[3], r[0])
+
     @jit.unroll_safe
     def next(self, shapelen):
+        shapelen = jit.promote(len(self.res_shape))
         offset = self.offset
         indices = [0] * shapelen
         for i in range(shapelen):
@@ -96,6 +113,13 @@
         res._done = done
         return res
 
+    def apply_transformations(self, arr, transformations):
+        v = BaseIterator.apply_transformations(self, arr, transformations)
+        if len(arr.shape) == 1:
+            return OneDimIterator(self.offset, self.strides[0],
+                                  self.res_shape[0])
+        return v
+
     def done(self):
         return self._done
 
@@ -103,11 +127,57 @@
     def next(self, shapelen):
         return self
 
+    def transform(self, arr, t):
+        pass
+
+class AxisIterator(BaseIterator):
+    def __init__(self, start, dim, shape, strides, backstrides):
+        self.res_shape = shape[:]
+        self.strides = strides[:dim] + [0] + strides[dim:]
+        self.backstrides = backstrides[:dim] + [0] + backstrides[dim:]
+        self.first_line = True
+        self.indices = [0] * len(shape)
+        self._done = False
+        self.offset = start
+        self.dim = dim
+
+    @jit.unroll_safe
+    def next(self, shapelen):
+        offset = self.offset
+        first_line = self.first_line
+        indices = [0] * shapelen
+        for i in range(shapelen):
+            indices[i] = self.indices[i]
+        done = False
+        for i in range(shapelen - 1, -1, -1):
+            if indices[i] < self.res_shape[i] - 1:
+                if i == self.dim:
+                    first_line = False
+                indices[i] += 1
+                offset += self.strides[i]
+                break
+            else:
+                indices[i] = 0
+                offset -= self.backstrides[i]
+        else:
+            done = True
+        res = instantiate(AxisIterator)
+        res.offset = offset
+        res.indices = indices
+        res.strides = self.strides
+        res.backstrides = self.backstrides
+        res.res_shape = self.res_shape
+        res._done = done
+        res.first_line = first_line
+        res.dim = self.dim
+        return res        
+
+    def done(self):
+        return self._done
+
 # ------ other iterators that are not part of the computation frame ----------
-
-class AxisIterator(object):
-    """ This object will return offsets of each start of the last stride
-    """
+    
+class SkipLastAxisIterator(object):
     def __init__(self, arr):
         self.arr = arr
         self.indices = [0] * (len(arr.shape) - 1)
@@ -125,4 +195,3 @@
                 self.offset -= self.arr.backstrides[i]
         else:
             self.done = True
-        
diff --git a/pypy/module/micronumpy/interp_numarray.py b/pypy/module/micronumpy/interp_numarray.py
--- a/pypy/module/micronumpy/interp_numarray.py
+++ b/pypy/module/micronumpy/interp_numarray.py
@@ -1,6 +1,6 @@
 from pypy.interpreter.baseobjspace import Wrappable
 from pypy.interpreter.error import OperationError, operationerrfmt
-from pypy.interpreter.gateway import interp2app, unwrap_spec, NoneNotWrapped
+from pypy.interpreter.gateway import interp2app, NoneNotWrapped
 from pypy.interpreter.typedef import TypeDef, GetSetProperty
 from pypy.module.micronumpy import interp_ufuncs, interp_dtype, signature
 from pypy.module.micronumpy.strides import calculate_slice_strides
@@ -8,30 +8,39 @@
 from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.tool.sourcetools import func_with_new_name
 from pypy.rlib.rstring import StringBuilder
-from pypy.module.micronumpy.interp_iter import ArrayIterator,\
-     view_iter_from_arr, OneDimIterator, AxisIterator
+from pypy.module.micronumpy.interp_iter import ArrayIterator, OneDimIterator,\
+     SkipLastAxisIterator
 
 numpy_driver = jit.JitDriver(
     greens=['shapelen', 'sig'],
     virtualizables=['frame'],
-    reds=['result_size', 'frame', 'ri', 'self', 'result']
+    reds=['result_size', 'frame', 'ri', 'self', 'result'],
+    get_printable_location=signature.new_printable_location('numpy'),
+    name='numpy',
 )
 all_driver = jit.JitDriver(
     greens=['shapelen', 'sig'],
     virtualizables=['frame'],
-    reds=['frame', 'self', 'dtype']
+    reds=['frame', 'self', 'dtype'],
+    get_printable_location=signature.new_printable_location('all'),
+    name='numpy_all',
 )
 any_driver = jit.JitDriver(
     greens=['shapelen', 'sig'],
     virtualizables=['frame'],
-    reds=['frame', 'self', 'dtype']
+    reds=['frame', 'self', 'dtype'],
+    get_printable_location=signature.new_printable_location('any'),
+    name='numpy_any',
 )
 slice_driver = jit.JitDriver(
     greens=['shapelen', 'sig'],
     virtualizables=['frame'],
-    reds=['self', 'frame', 'source', 'res_iter']
+    reds=['self', 'frame', 'arr'],
+    get_printable_location=signature.new_printable_location('slice'),
+    name='numpy_slice',
 )
 
+
 def _find_shape_and_elems(space, w_iterable):
     shape = [space.len_w(w_iterable)]
     batch = space.listview(w_iterable)
@@ -148,9 +157,6 @@
 # (meaning that the realignment of elements crosses from one step into another)
 # return None so that the caller can raise an exception.
 def calc_new_strides(new_shape, old_shape, old_strides):
-    # Return the proper strides for new_shape, or None if the mapping crosses
-    # stepping boundaries
-
     # Assumes that prod(old_shape) == prod(new_shape), len(old_shape) > 1, and
     # len(new_shape) > 0
     steps = []
@@ -158,6 +164,7 @@
     oldI = 0
     new_strides = []
     if old_strides[0] < old_strides[-1]:
+        #Start at old_shape[0], old_stides[0]
         for i in range(len(old_shape)):
             steps.append(old_strides[i] / last_step)
             last_step *= old_shape[i]
@@ -175,10 +182,11 @@
             if n_new_elems_used == n_old_elems_to_use:
                 oldI += 1
                 if oldI >= len(old_shape):
-                    break
+                    continue
                 cur_step = steps[oldI]
                 n_old_elems_to_use *= old_shape[oldI]
     else:
+        #Start at old_shape[-1], old_strides[-1]
         for i in range(len(old_shape) - 1, -1, -1):
             steps.insert(0, old_strides[i] / last_step)
             last_step *= old_shape[i]
@@ -197,7 +205,7 @@
             if n_new_elems_used == n_old_elems_to_use:
                 oldI -= 1
                 if oldI < -len(old_shape):
-                    break
+                    continue
                 cur_step = steps[oldI]
                 n_old_elems_to_use *= old_shape[oldI]
     return new_strides
@@ -277,21 +285,26 @@
     descr_rpow = _binop_right_impl("power")
     descr_rmod = _binop_right_impl("mod")
 
-    def _reduce_ufunc_impl(ufunc_name):
-        def impl(self, space):
+    def _reduce_ufunc_impl(ufunc_name, promote_to_largest=False):
+        def impl(self, space, w_dim=None):
+            if space.is_w(w_dim, space.w_None):
+                w_dim = space.wrap(-1)
             return getattr(interp_ufuncs.get(space), ufunc_name).reduce(space,
-                   self, multidim=True)
+                                        self, True, promote_to_largest, w_dim)
         return func_with_new_name(impl, "reduce_%s_impl" % ufunc_name)
 
     descr_sum = _reduce_ufunc_impl("add")
-    descr_prod = _reduce_ufunc_impl("multiply")
+    descr_sum_promote = _reduce_ufunc_impl("add", True)
+    descr_prod = _reduce_ufunc_impl("multiply", True)
     descr_max = _reduce_ufunc_impl("maximum")
     descr_min = _reduce_ufunc_impl("minimum")
 
     def _reduce_argmax_argmin_impl(op_name):
         reduce_driver = jit.JitDriver(
             greens=['shapelen', 'sig'],
-            reds=['result', 'idx', 'frame', 'self', 'cur_best', 'dtype']
+            reds=['result', 'idx', 'frame', 'self', 'cur_best', 'dtype'],
+            get_printable_location=signature.new_printable_location(op_name),
+            name='numpy_' + op_name,
         )
         def loop(self):
             sig = self.find_sig()
@@ -377,7 +390,7 @@
         elif len(self.shape) < 2 and len(w_other.shape) < 2:
             w_res = self.descr_mul(space, w_other)
             assert isinstance(w_res, BaseArray)
-            return w_res.descr_sum(space)
+            return w_res.descr_sum(space, space.wrap(-1))
         dtype = interp_ufuncs.find_binop_result_dtype(space,
                                      self.find_dtype(), w_other.find_dtype())
         if self.size < 1 and w_other.size < 1:
@@ -438,6 +451,10 @@
     def descr_get_dtype(self, space):
         return space.wrap(self.find_dtype())
 
+    def descr_get_ndim(self, space):
+        return space.wrap(len(self.shape))
+
+    @jit.unroll_safe
     def descr_get_shape(self, space):
         return space.newtuple([space.wrap(i) for i in self.shape])
 
@@ -466,7 +483,7 @@
     def descr_repr(self, space):
         res = StringBuilder()
         res.append("array(")
-        concrete = self.get_concrete()
+        concrete = self.get_concrete_or_scalar()
         dtype = concrete.find_dtype()
         if not concrete.size:
             res.append('[]')
@@ -479,8 +496,9 @@
         else:
             concrete.to_str(space, 1, res, indent='       ')
         if (dtype is not interp_dtype.get_dtype_cache(space).w_float64dtype and
-            dtype is not interp_dtype.get_dtype_cache(space).w_int64dtype) or \
-            not self.size:
+            not (dtype.kind == interp_dtype.SIGNEDLTR and
+            dtype.itemtype.get_element_size() == rffi.sizeof(lltype.Signed)) or
+            not self.size):
             res.append(", dtype=" + dtype.name)
         res.append(")")
         return space.wrap(res.build())
@@ -613,9 +631,26 @@
             )
         return w_result
 
-    def descr_mean(self, space):
-        return space.div(self.descr_sum(space), 
-                         space.wrap(self.size))
+    def descr_mean(self, space, w_dim=None):
+        if space.is_w(w_dim, space.w_None):
+            w_dim = space.wrap(-1)
+            w_denom = space.wrap(self.size)
+        else:
+            dim = space.int_w(w_dim)
+            w_denom = space.wrap(self.shape[dim])
+        return space.div(self.descr_sum_promote(space, w_dim), w_denom)
+
+    def descr_var(self, space):
+        # var = mean((values - mean(values)) ** 2)
+        w_res = self.descr_sub(space, self.descr_mean(space, space.w_None))
+        assert isinstance(w_res, BaseArray) 
+        w_res = w_res.descr_pow(space, space.wrap(2))
+        assert isinstance(w_res, BaseArray)
+        return w_res.descr_mean(space, space.w_None)
+
+    def descr_std(self, space):
+        # std(v) = sqrt(var(v))
+        return interp_ufuncs.get(space).sqrt.call(space, [self.descr_var(space)])
 
     def descr_nonzero(self, space):
         if self.size > 1:
@@ -641,8 +676,8 @@
             strides.append(concrete.strides[i])
             backstrides.append(concrete.backstrides[i])
             shape.append(concrete.shape[i])
-        return space.wrap(W_NDimSlice(concrete.start, strides[:],
-                                      backstrides[:], shape[:], concrete))
+        return space.wrap(W_NDimSlice(concrete.start, strides,
+                                      backstrides, shape, concrete))
 
     def descr_get_flatiter(self, space):
         return space.wrap(W_FlatIterator(self))
@@ -650,11 +685,12 @@
     def getitem(self, item):
         raise NotImplementedError
 
-    def find_sig(self, res_shape=None):
+    def find_sig(self, res_shape=None, arr=None):
         """ find a correct signature for the array
         """
         res_shape = res_shape or self.shape
-        return signature.find_sig(self.create_sig(res_shape), self)
+        arr = arr or self
+        return signature.find_sig(self.create_sig(), arr)
 
     def descr_array_iface(self, space):
         if not self.shape:
@@ -708,7 +744,7 @@
     def copy(self, space):
         return Scalar(self.dtype, self.value)
 
-    def create_sig(self, res_shape):
+    def create_sig(self):
         return signature.ScalarSignature(self.dtype)
 
     def get_concrete_or_scalar(self):
@@ -726,7 +762,8 @@
         self.name = name
 
     def _del_sources(self):
-        # Function for deleting references to source arrays, to allow garbage-collecting them
+        # Function for deleting references to source arrays,
+        # to allow garbage-collecting them
         raise NotImplementedError
 
     def compute(self):
@@ -778,11 +815,11 @@
         self.size = size
         VirtualArray.__init__(self, 'slice', shape, child.find_dtype())
 
-    def create_sig(self, res_shape):
+    def create_sig(self):
         if self.forced_result is not None:
-            return self.forced_result.create_sig(res_shape)
+            return self.forced_result.create_sig()
         return signature.VirtualSliceSignature(
-            self.child.create_sig(res_shape))
+            self.child.create_sig())
 
     def force_if_needed(self):
         if self.forced_result is None:
@@ -792,6 +829,7 @@
     def _del_sources(self):
         self.child = None
 
+
 class Call1(VirtualArray):
     def __init__(self, ufunc, name, shape, res_dtype, values):
         VirtualArray.__init__(self, name, shape, res_dtype)
@@ -802,16 +840,17 @@
     def _del_sources(self):
         self.values = None
 
-    def create_sig(self, res_shape):
+    def create_sig(self):
         if self.forced_result is not None:
-            return self.forced_result.create_sig(res_shape)
-        return signature.Call1(self.ufunc, self.name,
-                               self.values.create_sig(res_shape))
+            return self.forced_result.create_sig()
+        return signature.Call1(self.ufunc, self.name, self.values.create_sig())
 
 class Call2(VirtualArray):
     """
     Intermediate class for performing binary operations.
     """
+    _immutable_fields_ = ['left', 'right']
+    
     def __init__(self, ufunc, name, shape, calc_dtype, res_dtype, left, right):
         VirtualArray.__init__(self, name, shape, res_dtype)
         self.ufunc = ufunc
@@ -826,12 +865,55 @@
         self.left = None
         self.right = None
 
-    def create_sig(self, res_shape):
+    def create_sig(self):
         if self.forced_result is not None:
-            return self.forced_result.create_sig(res_shape)
+            return self.forced_result.create_sig()
+        if self.shape != self.left.shape and self.shape != self.right.shape:
+            return signature.BroadcastBoth(self.ufunc, self.name,
+                                           self.calc_dtype,
+                                           self.left.create_sig(),
+                                           self.right.create_sig())
+        elif self.shape != self.left.shape:
+            return signature.BroadcastLeft(self.ufunc, self.name,
+                                           self.calc_dtype,
+                                           self.left.create_sig(),
+                                           self.right.create_sig())
+        elif self.shape != self.right.shape:
+            return signature.BroadcastRight(self.ufunc, self.name,
+                                            self.calc_dtype,
+                                            self.left.create_sig(),
+                                            self.right.create_sig())
         return signature.Call2(self.ufunc, self.name, self.calc_dtype,
-                               self.left.create_sig(res_shape),
-                               self.right.create_sig(res_shape))
+                               self.left.create_sig(), self.right.create_sig())
+
+class SliceArray(Call2):
+    def __init__(self, shape, dtype, left, right):
+        Call2.__init__(self, None, 'sliceloop', shape, dtype, dtype, left,
+                       right)
+    
+    def create_sig(self):
+        lsig = self.left.create_sig()
+        rsig = self.right.create_sig()
+        if self.shape != self.right.shape:
+            return signature.SliceloopBroadcastSignature(self.ufunc,
+                                                         self.name,
+                                                         self.calc_dtype,
+                                                         lsig, rsig)
+        return signature.SliceloopSignature(self.ufunc, self.name,
+                                            self.calc_dtype,
+                                            lsig, rsig)
+
+class AxisReduce(Call2):
+    """ NOTE: this is only used as a container, you should never
+    encounter such things in the wild. Remove this comment
+    when we'll make AxisReduce lazy
+    """
+    _immutable_fields_ = ['left', 'right']
+    
+    def __init__(self, ufunc, name, shape, dtype, left, right, dim):
+        Call2.__init__(self, ufunc, name, shape, dtype, dtype,
+                       left, right)
+        self.dim = dim
 
 class ConcreteArray(BaseArray):
     """ An array that have actual storage, whether owned or not
@@ -883,13 +965,8 @@
         if self.order == 'C':
             strides.reverse()
             backstrides.reverse()
-        self.strides = strides[:]
-        self.backstrides = backstrides[:]
-
-    def array_sig(self, res_shape):
-        if res_shape is not None and self.shape != res_shape:
-            return signature.ViewSignature(self.dtype)
-        return signature.ArraySignature(self.dtype)
+        self.strides = strides
+        self.backstrides = backstrides
 
     def to_str(self, space, comma, builder, indent=' ', use_ellipsis=False):
         '''Modifies builder with a representation of the array/slice
@@ -898,76 +975,80 @@
         each line will begin with indent.
         '''
         size = self.size
+        ccomma = ',' * comma
+        ncomma = ',' * (1 - comma)
+        dtype = self.find_dtype()
         if size < 1:
             builder.append('[]')
             return
+        elif size == 1:
+            builder.append(dtype.itemtype.str_format(self.getitem(0)))
+            return
         if size > 1000:
             # Once this goes True it does not go back to False for recursive
             # calls
             use_ellipsis = True
-        dtype = self.find_dtype()
         ndims = len(self.shape)
         i = 0
-        start = True
         builder.append('[')
         if ndims > 1:
             if use_ellipsis:
-                for i in range(3):
-                    if start:
-                        start = False
-                    else:
-                        builder.append(',' * comma + '\n')
-                        if ndims == 3:
+                for i in range(min(3, self.shape[0])):
+                    if i > 0:
+                        builder.append(ccomma + '\n')
+                        if ndims >= 3:
                             builder.append('\n' + indent)
                         else:
                             builder.append(indent)
                     view = self.create_slice([(i, 0, 0, 1)]).get_concrete()
-                    view.to_str(space, comma, builder, indent=indent + ' ', use_ellipsis=use_ellipsis)
-                builder.append('\n' + indent + '..., ')
-                i = self.shape[0] - 3
+                    view.to_str(space, comma, builder, indent=indent + ' ',
+                                                    use_ellipsis=use_ellipsis)
+                if i < self.shape[0] - 1:
+                    builder.append(ccomma + '\n' + indent + '...' + ncomma)
+                    i = self.shape[0] - 3
+                else:
+                    i += 1
             while i < self.shape[0]:
-                if start:
-                    start = False
-                else:
-                    builder.append(',' * comma + '\n')
-                    if ndims == 3:
+                if i > 0:
+                    builder.append(ccomma + '\n')
+                    if ndims >= 3:
                         builder.append('\n' + indent)
                     else:
                         builder.append(indent)
+                # create_slice requires len(chunks) > 1 in order to reduce
+                # shape
                 view = self.create_slice([(i, 0, 0, 1)]).get_concrete()
-                view.to_str(space, comma, builder, indent=indent + ' ', use_ellipsis=use_ellipsis)
+                view.to_str(space, comma, builder, indent=indent + ' ',
+                                                    use_ellipsis=use_ellipsis)
                 i += 1
         elif ndims == 1:
-            spacer = ',' * comma + ' '
+            spacer = ccomma + ' '
             item = self.start
             # An iterator would be a nicer way to walk along the 1d array, but
             # how do I reset it if printing ellipsis? iterators have no
             # "set_offset()"
             i = 0
             if use_ellipsis:
-                for i in range(3):
-                    if start:
-                        start = False
-                    else:
+                for i in range(min(3, self.shape[0])):
+                    if i > 0:
                         builder.append(spacer)
                     builder.append(dtype.itemtype.str_format(self.getitem(item)))
                     item += self.strides[0]
-                # Add a comma only if comma is False - this prevents adding two
-                # commas
-                builder.append(spacer + '...' + ',' * (1 - comma))
-                # Ugly, but can this be done with an iterator?
-                item = self.start + self.backstrides[0] - 2 * self.strides[0]
-                i = self.shape[0] - 3
+                if i < self.shape[0] - 1:
+                    # Add a comma only if comma is False - this prevents adding
+                    # two commas
+                    builder.append(spacer + '...' + ncomma)
+                    # Ugly, but can this be done with an iterator?
+                    item = self.start + self.backstrides[0] - 2 * self.strides[0]
+                    i = self.shape[0] - 3
+                else:
+                    i += 1
             while i < self.shape[0]:
-                if start:
-                    start = False
-                else:
+                if i > 0:
                     builder.append(spacer)
                 builder.append(dtype.itemtype.str_format(self.getitem(item)))
                 item += self.strides[0]
                 i += 1
-        else:
-            builder.append('[')
         builder.append(']')
 
     @jit.unroll_safe
@@ -1001,20 +1082,22 @@
             self.dtype is w_value.find_dtype()):
             self._fast_setslice(space, w_value)
         else:
-            self._sliceloop(w_value, res_shape)
+            arr = SliceArray(self.shape, self.dtype, self, w_value)
+            self._sliceloop(arr)
 
     def _fast_setslice(self, space, w_value):
         assert isinstance(w_value, ConcreteArray)
         itemsize = self.dtype.itemtype.get_element_size()
-        if len(self.shape) == 1:
+        shapelen = len(self.shape)
+        if shapelen == 1:
             rffi.c_memcpy(
                 rffi.ptradd(self.storage, self.start * itemsize),
                 rffi.ptradd(w_value.storage, w_value.start * itemsize),
                 self.size * itemsize
             )
         else:
-            dest = AxisIterator(self)
-            source = AxisIterator(w_value)
+            dest = SkipLastAxisIterator(self)
+            source = SkipLastAxisIterator(w_value)
             while not dest.done:
                 rffi.c_memcpy(
                     rffi.ptradd(self.storage, dest.offset * itemsize),
@@ -1024,21 +1107,16 @@
                 source.next()
                 dest.next()
 
-    def _sliceloop(self, source, res_shape):
-        sig = source.find_sig(res_shape)
-        frame = sig.create_frame(source, res_shape)
-        res_iter = view_iter_from_arr(self)
-        shapelen = len(res_shape)
-        while not res_iter.done():
-            slice_driver.jit_merge_point(sig=sig,
-                                         frame=frame,
-                                         shapelen=shapelen,
-                                         self=self, source=source,
-                                         res_iter=res_iter)
-            self.setitem(res_iter.offset, sig.eval(frame, source).convert_to(
-                self.find_dtype()))
+    def _sliceloop(self, arr):
+        sig = arr.find_sig()
+        frame = sig.create_frame(arr)
+        shapelen = len(self.shape)
+        while not frame.done():
+            slice_driver.jit_merge_point(sig=sig, frame=frame, self=self,
+                                         arr=arr,
+                                         shapelen=shapelen)
+            sig.eval(frame, arr)
             frame.next(shapelen)
-            res_iter = res_iter.next(shapelen)
 
     def copy(self, space):
         array = W_NDimArray(self.size, self.shape[:], self.dtype, self.order)
@@ -1047,7 +1125,7 @@
 
 
 class ViewArray(ConcreteArray):
-    def create_sig(self, res_shape):
+    def create_sig(self):
         return signature.ViewSignature(self.dtype)
 
 
@@ -1084,9 +1162,9 @@
                 strides.reverse()
                 backstrides.reverse()
                 new_shape.reverse()
-            self.strides = strides[:]
-            self.backstrides = backstrides[:]
-            self.shape = new_shape[:]
+            self.strides = strides
+            self.backstrides = backstrides
+            self.shape = new_shape
             return
         new_strides = calc_new_strides(new_shape, self.shape, self.strides)
         if new_strides is None:
@@ -1096,7 +1174,7 @@
         for nd in range(len(new_shape)):
             new_backstrides[nd] = (new_shape[nd] - 1) * new_strides[nd]
         self.strides = new_strides[:]
-        self.backstrides = new_backstrides[:]
+        self.backstrides = new_backstrides
         self.shape = new_shape[:]
 
 class W_NDimArray(ConcreteArray):
@@ -1111,8 +1189,8 @@
         self.shape = new_shape
         self.calc_strides(new_shape)
 
-    def create_sig(self, res_shape):
-        return self.array_sig(res_shape)
+    def create_sig(self):
+        return signature.ArraySignature(self.dtype)
 
     def __del__(self):
         lltype.free(self.storage, flavor='raw', track_allocation=False)
@@ -1239,6 +1317,7 @@
     shape = GetSetProperty(BaseArray.descr_get_shape,
                            BaseArray.descr_set_shape),
     size = GetSetProperty(BaseArray.descr_get_size),
+    ndim = GetSetProperty(BaseArray.descr_get_ndim),
 
     T = GetSetProperty(BaseArray.descr_get_transpose),
     flat = GetSetProperty(BaseArray.descr_get_flatiter),
@@ -1253,6 +1332,8 @@
     all = interp2app(BaseArray.descr_all),
     any = interp2app(BaseArray.descr_any),
     dot = interp2app(BaseArray.descr_dot),
+    var = interp2app(BaseArray.descr_var),
+    std = interp2app(BaseArray.descr_std),
 
     copy = interp2app(BaseArray.descr_copy),
     reshape = interp2app(BaseArray.descr_reshape),
@@ -1279,7 +1360,7 @@
     def descr_next(self, space):
         if self.iter.done():
             raise OperationError(space.w_StopIteration, space.w_None)
-        result = self.eval(self.iter)
+        result = self.getitem(self.iter.offset)
         self.iter = self.iter.next(self.shapelen)
         return result
 
diff --git a/pypy/module/micronumpy/interp_ufuncs.py b/pypy/module/micronumpy/interp_ufuncs.py
--- a/pypy/module/micronumpy/interp_ufuncs.py
+++ b/pypy/module/micronumpy/interp_ufuncs.py
@@ -1,19 +1,31 @@
 from pypy.interpreter.baseobjspace import Wrappable
 from pypy.interpreter.error import OperationError, operationerrfmt
-from pypy.interpreter.gateway import interp2app, unwrap_spec
+from pypy.interpreter.gateway import interp2app
 from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty
-from pypy.module.micronumpy import interp_boxes, interp_dtype, types
-from pypy.module.micronumpy.signature import ReduceSignature, ScalarSignature, find_sig
+from pypy.module.micronumpy import interp_boxes, interp_dtype
+from pypy.module.micronumpy.signature import ReduceSignature,\
+     find_sig, new_printable_location, AxisReduceSignature, ScalarSignature
 from pypy.rlib import jit
 from pypy.rlib.rarithmetic import LONG_BIT
 from pypy.tool.sourcetools import func_with_new_name
 
 reduce_driver = jit.JitDriver(
-    greens = ['shapelen', "sig"],
-    virtualizables = ["frame"],
-    reds = ["frame", "self", "dtype", "value", "obj"]
+    greens=['shapelen', "sig"],
+    virtualizables=["frame"],
+    reds=["frame", "self", "dtype", "value", "obj"],
+    get_printable_location=new_printable_location('reduce'),
+    name='numpy_reduce',
 )
 
+axisreduce_driver = jit.JitDriver(
+    greens=['shapelen', 'sig'],
+    virtualizables=['frame'],
+    reds=['self','arr', 'identity', 'frame'],
+    name='numpy_axisreduce',
+    get_printable_location=new_printable_location('axisreduce'),
+)
+
+
 class W_Ufunc(Wrappable):
     _attrs_ = ["name", "promote_to_float", "promote_bools", "identity"]
     _immutable_fields_ = ["promote_to_float", "promote_bools", "name"]
@@ -46,18 +58,72 @@
             )
         return self.call(space, __args__.arguments_w)
 
-    def descr_reduce(self, space, w_obj):
-        return self.reduce(space, w_obj, multidim=False)
+    def descr_reduce(self, space, w_obj, w_dim=0):
+        """reduce(...)
+        reduce(a, axis=0)
 
-    def reduce(self, space, w_obj, multidim):
-        from pypy.module.micronumpy.interp_numarray import convert_to_array, Scalar
-        
+        Reduces `a`'s dimension by one, by applying ufunc along one axis.
+
+        Let :math:`a.shape = (N_0, ..., N_i, ..., N_{M-1})`.  Then
+        :math:`ufunc.reduce(a, axis=i)[k_0, ..,k_{i-1}, k_{i+1}, .., k_{M-1}]` =
+        the result of iterating `j` over :math:`range(N_i)`, cumulatively applying
+        ufunc to each :math:`a[k_0, ..,k_{i-1}, j, k_{i+1}, .., k_{M-1}]`.
+        For a one-dimensional array, reduce produces results equivalent to:
+        ::
+
+         r = op.identity # op = ufunc
+         for i in xrange(len(A)):
+           r = op(r, A[i])
+         return r
+
+        For example, add.reduce() is equivalent to sum().
+
+        Parameters
+        ----------
+        a : array_like
+            The array to act on.
+        axis : int, optional
+            The axis along which to apply the reduction.
+
+        Examples
+        --------
+        >>> np.multiply.reduce([2,3,5])
+        30
+
+        A multi-dimensional array example:
+
+        >>> X = np.arange(8).reshape((2,2,2))
+        >>> X
+        array([[[0, 1],
+                [2, 3]],
+               [[4, 5],
+                [6, 7]]])
+        >>> np.add.reduce(X, 0)
+        array([[ 4,  6],
+               [ 8, 10]])
+        >>> np.add.reduce(X) # confirm: default axis value is 0
+        array([[ 4,  6],
+               [ 8, 10]])
+        >>> np.add.reduce(X, 1)
+        array([[ 2,  4],
+               [10, 12]])
+        >>> np.add.reduce(X, 2)
+        array([[ 1,  5],
+               [ 9, 13]])
+        """
+        return self.reduce(space, w_obj, False, False, w_dim)
+
+    def reduce(self, space, w_obj, multidim, promote_to_largest, w_dim):
+        from pypy.module.micronumpy.interp_numarray import convert_to_array, \
+                                                           Scalar
         if self.argcount != 2:
             raise OperationError(space.w_ValueError, space.wrap("reduce only "
                 "supported for binary functions"))
-
+        dim = space.int_w(w_dim)
         assert isinstance(self, W_Ufunc2)
         obj = convert_to_array(space, w_obj)
+        if dim >= len(obj.shape):
+            raise OperationError(space.w_ValueError, space.wrap("axis(=%d) out of bounds" % dim))
         if isinstance(obj, Scalar):
             raise OperationError(space.w_TypeError, space.wrap("cannot reduce "
                 "on a scalar"))
@@ -65,26 +131,80 @@
         size = obj.size
         dtype = find_unaryop_result_dtype(
             space, obj.find_dtype(),
-            promote_to_largest=True
+            promote_to_float=self.promote_to_float,
+            promote_to_largest=promote_to_largest,
+            promote_bools=True
         )
         shapelen = len(obj.shape)
+        if self.identity is None and size == 0:
+            raise operationerrfmt(space.w_ValueError, "zero-size array to "
+                    "%s.reduce without identity", self.name)
+        if shapelen > 1 and dim >= 0:
+            res = self.do_axis_reduce(obj, dtype, dim)
+            return space.wrap(res)
+        scalarsig = ScalarSignature(dtype)
         sig = find_sig(ReduceSignature(self.func, self.name, dtype,
-                                       ScalarSignature(dtype),
-                                       obj.create_sig(obj.shape)), obj)
+                                       scalarsig,
+                                       obj.create_sig()), obj)
         frame = sig.create_frame(obj)
-        if shapelen > 1 and not multidim:
-            raise OperationError(space.w_NotImplementedError,
-                space.wrap("not implemented yet"))
         if self.identity is None:
-            if size == 0:
-                raise operationerrfmt(space.w_ValueError, "zero-size array to "
-                    "%s.reduce without identity", self.name)
             value = sig.eval(frame, obj).convert_to(dtype)
             frame.next(shapelen)
         else:
             value = self.identity.convert_to(dtype)
         return self.reduce_loop(shapelen, sig, frame, value, obj, dtype)
 
+    def do_axis_reduce(self, obj, dtype, dim):
+        from pypy.module.micronumpy.interp_numarray import AxisReduce,\
+             W_NDimArray
+        
+        shape = obj.shape[0:dim] + obj.shape[dim + 1:len(obj.shape)]
+        size = 1
+        for s in shape:
+            size *= s
+        result = W_NDimArray(size, shape, dtype)
+        rightsig = obj.create_sig()
+        # note - this is just a wrapper so signature can fetch
+        #        both left and right, nothing more, especially
+        #        this is not a true virtual array, because shapes
+        #        don't quite match
+        arr = AxisReduce(self.func, self.name, obj.shape, dtype,
+                         result, obj, dim)
+        scalarsig = ScalarSignature(dtype)
+        sig = find_sig(AxisReduceSignature(self.func, self.name, dtype,
+                                           scalarsig, rightsig), arr)
+        assert isinstance(sig, AxisReduceSignature)
+        frame = sig.create_frame(arr)
+        shapelen = len(obj.shape)
+        if self.identity is not None:
+            identity = self.identity.convert_to(dtype)
+        else:
+            identity = None
+        self.reduce_axis_loop(frame, sig, shapelen, arr, identity)
+        return result
+
+    def reduce_axis_loop(self, frame, sig, shapelen, arr, identity):
+        # note - we can be advanterous here, depending on the exact field
+        # layout. For now let's say we iterate the original way and
+        # simply follow the original iteration order
+        while not frame.done():
+            axisreduce_driver.jit_merge_point(frame=frame, self=self,
+                                              sig=sig,
+                                              identity=identity,
+                                              shapelen=shapelen, arr=arr)
+            iter = frame.get_final_iter()
+            v = sig.eval(frame, arr).convert_to(sig.calc_dtype)
+            if iter.first_line:
+                if identity is not None:
+                    value = self.func(sig.calc_dtype, identity, v)
+                else:
+                    value = v
+            else:
+                cur = arr.left.getitem(iter.offset)
+                value = self.func(sig.calc_dtype, cur, v)
+            arr.left.setitem(iter.offset, value)
+            frame.next(shapelen)
+
     def reduce_loop(self, shapelen, sig, frame, value, obj, dtype):
         while not frame.done():
             reduce_driver.jit_merge_point(sig=sig,
@@ -92,10 +212,12 @@
                                           value=value, obj=obj, frame=frame,
                                           dtype=dtype)
             assert isinstance(sig, ReduceSignature)
-            value = sig.binfunc(dtype, value, sig.eval(frame, obj).convert_to(dtype))
+            value = sig.binfunc(dtype, value,
+                                sig.eval(frame, obj).convert_to(dtype))
             frame.next(shapelen)
         return value
 
+
 class W_Ufunc1(W_Ufunc):
     argcount = 1
 
@@ -180,6 +302,7 @@
     reduce = interp2app(W_Ufunc.descr_reduce),
 )
 
+
 def find_binop_result_dtype(space, dt1, dt2, promote_to_float=False,
     promote_bools=False):
     # dt1.num should be <= dt2.num
@@ -228,6 +351,7 @@
             dtypenum += 3
         return interp_dtype.get_dtype_cache(space).builtin_dtypes[dtypenum]
 
+
 def find_unaryop_result_dtype(space, dt, promote_to_float=False,
     promote_bools=False, promote_to_largest=False):
     if promote_bools and (dt.kind == interp_dtype.BOOLLTR):
@@ -252,6 +376,7 @@
             assert False
     return dt
 
+
 def find_dtype_for_scalar(space, w_obj, current_guess=None):
     bool_dtype = interp_dtype.get_dtype_cache(space).w_booldtype
     long_dtype = interp_dtype.get_dtype_cache(space).w_longdtype
@@ -345,7 +470,8 @@
 
         identity = extra_kwargs.get("identity")
         if identity is not None:
-            identity = interp_dtype.get_dtype_cache(space).w_longdtype.box(identity)
+            identity = \
+                 interp_dtype.get_dtype_cache(space).w_longdtype.box(identity)
         extra_kwargs["identity"] = identity
 
         func = ufunc_dtype_caller(space, ufunc_name, op_name, argcount,
diff --git a/pypy/module/micronumpy/signature.py b/pypy/module/micronumpy/signature.py
--- a/pypy/module/micronumpy/signature.py
+++ b/pypy/module/micronumpy/signature.py
@@ -1,10 +1,37 @@
 from pypy.rlib.objectmodel import r_dict, compute_identity_hash, compute_hash
 from pypy.rlib.rarithmetic import intmask
 from pypy.module.micronumpy.interp_iter import ViewIterator, ArrayIterator, \
-     OneDimIterator, ConstantIterator
-from pypy.module.micronumpy.strides import calculate_slice_strides
+     ConstantIterator, AxisIterator, ViewTransform,\
+     BroadcastTransform
 from pypy.rlib.jit import hint, unroll_safe, promote
 
+""" Signature specifies both the numpy expression that has been constructed
+and the assembler to be compiled. This is a very important observation -
+Two expressions will be using the same assembler if and only if they are
+compiled to the same signature.
+
+This is also a very convinient tool for specializations. For example
+a + a and a + b (where a != b) will compile to different assembler because
+we specialize on the same array access.
+
+When evaluating, signatures will create iterators per signature node,
+potentially sharing some of them. Iterators depend also on the actual
+expression, they're not only dependant on the array itself. For example
+a + b where a is dim 2 and b is dim 1 would create a broadcasted iterator for
+the array b.
+
+Such iterator changes are called Transformations. An actual iterator would
+be a combination of array and various transformation, like view, broadcast,
+dimension swapping etc.
+
+See interp_iter for transformations
+"""
+
+def new_printable_location(driver_name):
+    def get_printable_location(shapelen, sig):
+        return 'numpy ' + sig.debug_repr() + ' [%d dims,%s]' % (shapelen, driver_name)
+    return get_printable_location
+
 def sigeq(one, two):
     return one.eq(two)
 
@@ -28,7 +55,8 @@
         return sig
 
 class NumpyEvalFrame(object):
-    _virtualizable2_ = ['iterators[*]', 'final_iter', 'arraylist[*]']
+    _virtualizable2_ = ['iterators[*]', 'final_iter', 'arraylist[*]',
+                        'value', 'identity']
 
     @unroll_safe
     def __init__(self, iterators, arrays):
@@ -46,7 +74,7 @@
     def done(self):
         final_iter = promote(self.final_iter)
         if final_iter < 0:
-            return False
+            assert False
         return self.iterators[final_iter].done()
 
     @unroll_safe
@@ -54,6 +82,12 @@
         for i in range(len(self.iterators)):
             self.iterators[i] = self.iterators[i].next(shapelen)
 
+    def get_final_iter(self):
+        final_iter = promote(self.final_iter)
+        if final_iter < 0:
+            assert False
+        return self.iterators[final_iter]
+
 def _add_ptr_to_cache(ptr, cache):
     i = 0
     for p in cache:
@@ -65,6 +99,9 @@
         cache.append(ptr)
         return res
 
+def new_cache():
+    return r_dict(sigeq_no_numbering, sighash)
+
 class Signature(object):
     _attrs_ = ['iter_no', 'array_no']
     _immutable_fields_ = ['iter_no', 'array_no']
@@ -73,7 +110,7 @@
     iter_no = 0
 
     def invent_numbering(self):
-        cache = r_dict(sigeq_no_numbering, sighash)
+        cache = new_cache()
         allnumbers = []
         self._invent_numbering(cache, allnumbers)
 
@@ -90,13 +127,13 @@
             allnumbers.append(no)
         self.iter_no = no
 
-    def create_frame(self, arr, res_shape=None):
-        res_shape = res_shape or arr.shape
+    def create_frame(self, arr):
         iterlist = []
         arraylist = []
-        self._create_iter(iterlist, arraylist, arr, res_shape, [])
+        self._create_iter(iterlist, arraylist, arr, [])
         return NumpyEvalFrame(iterlist, arraylist)
 
+
 class ConcreteSignature(Signature):
     _immutable_fields_ = ['dtype']
 
@@ -115,16 +152,6 @@
     def hash(self):
         return compute_identity_hash(self.dtype)
 
-    def allocate_view_iter(self, arr, res_shape, chunklist):
-        r = arr.shape, arr.start, arr.strides, arr.backstrides
-        if chunklist:
-            for chunkelem in chunklist:
-                r = calculate_slice_strides(r[0], r[1], r[2], r[3], chunkelem)
-        shape, start, strides, backstrides = r
-        if len(res_shape) == 1:
-            return OneDimIterator(start, strides[0], res_shape[0])
-        return ViewIterator(start, strides, backstrides, shape, res_shape)
-
 class ArraySignature(ConcreteSignature):
     def debug_repr(self):
         return 'Array'
@@ -132,23 +159,25 @@
     def _invent_array_numbering(self, arr, cache):
         from pypy.module.micronumpy.interp_numarray import ConcreteArray
         concr = arr.get_concrete()
+        # this get_concrete never forces assembler. If we're here and array
+        # is not of a concrete class it means that we have a _forced_result,
+        # otherwise the signature would not match
         assert isinstance(concr, ConcreteArray)
+        assert concr.dtype is self.dtype
         self.array_no = _add_ptr_to_cache(concr.storage, cache)
 
-    def _create_iter(self, iterlist, arraylist, arr, res_shape, chunklist):
+    def _create_iter(self, iterlist, arraylist, arr, transforms):
         from pypy.module.micronumpy.interp_numarray import ConcreteArray
         concr = arr.get_concrete()
         assert isinstance(concr, ConcreteArray)
         storage = concr.storage
         if self.iter_no >= len(iterlist):
-            iterlist.append(self.allocate_iter(concr, res_shape, chunklist))
+            iterlist.append(self.allocate_iter(concr, transforms))
         if self.array_no >= len(arraylist):
             arraylist.append(storage)
 
-    def allocate_iter(self, arr, res_shape, chunklist):
-        if chunklist:
-            return self.allocate_view_iter(arr, res_shape, chunklist)
-        return ArrayIterator(arr.size)
+    def allocate_iter(self, arr, transforms):
+        return ArrayIterator(arr.size).apply_transformations(arr, transforms)
 
     def eval(self, frame, arr):
         iter = frame.iterators[self.iter_no]
@@ -161,7 +190,7 @@
     def _invent_array_numbering(self, arr, cache):
         pass
 
-    def _create_iter(self, iterlist, arraylist, arr, res_shape, chunklist):
+    def _create_iter(self, iterlist, arraylist, arr, transforms):
         if self.iter_no >= len(iterlist):
             iter = ConstantIterator()
             iterlist.append(iter)
@@ -181,8 +210,9 @@
         allnumbers.append(no)
         self.iter_no = no
 
-    def allocate_iter(self, arr, res_shape, chunklist):
-        return self.allocate_view_iter(arr, res_shape, chunklist)
+    def allocate_iter(self, arr, transforms):
+        return ViewIterator(arr.start, arr.strides, arr.backstrides,
+                            arr.shape).apply_transformations(arr, transforms)
 
 class VirtualSliceSignature(Signature):
     def __init__(self, child):
@@ -193,6 +223,9 @@
         assert isinstance(arr, VirtualSlice)
         self.child._invent_array_numbering(arr.child, cache)
 
+    def _invent_numbering(self, cache, allnumbers):
+        self.child._invent_numbering(new_cache(), allnumbers)
+
     def hash(self):
         return intmask(self.child.hash() ^ 1234)
 
@@ -202,12 +235,11 @@
         assert isinstance(other, VirtualSliceSignature)
         return self.child.eq(other.child, compare_array_no)
 
-    def _create_iter(self, iterlist, arraylist, arr, res_shape, chunklist):
+    def _create_iter(self, iterlist, arraylist, arr, transforms):
         from pypy.module.micronumpy.interp_numarray import VirtualSlice
         assert isinstance(arr, VirtualSlice)
-        chunklist.append(arr.chunks)
-        self.child._create_iter(iterlist, arraylist, arr.child, res_shape,
-                                chunklist)
+        transforms = transforms + [ViewTransform(arr.chunks)]
+        self.child._create_iter(iterlist, arraylist, arr.child, transforms)
 
     def eval(self, frame, arr):
         from pypy.module.micronumpy.interp_numarray import VirtualSlice
@@ -243,11 +275,10 @@
         assert isinstance(arr, Call1)
         self.child._invent_array_numbering(arr.values, cache)
 
-    def _create_iter(self, iterlist, arraylist, arr, res_shape, chunklist):
+    def _create_iter(self, iterlist, arraylist, arr, transforms):
         from pypy.module.micronumpy.interp_numarray import Call1
         assert isinstance(arr, Call1)
-        self.child._create_iter(iterlist, arraylist, arr.values, res_shape,
-                                chunklist)
+        self.child._create_iter(iterlist, arraylist, arr.values, transforms)
 
     def eval(self, frame, arr):
         from pypy.module.micronumpy.interp_numarray import Call1
@@ -288,29 +319,68 @@
         self.left._invent_numbering(cache, allnumbers)
         self.right._invent_numbering(cache, allnumbers)
 
-    def _create_iter(self, iterlist, arraylist, arr, res_shape, chunklist):
+    def _create_iter(self, iterlist, arraylist, arr, transforms):
         from pypy.module.micronumpy.interp_numarray import Call2
 
         assert isinstance(arr, Call2)
-        self.left._create_iter(iterlist, arraylist, arr.left, res_shape,
-                               chunklist)
-        self.right._create_iter(iterlist, arraylist, arr.right, res_shape,
-                                chunklist)
+        self.left._create_iter(iterlist, arraylist, arr.left, transforms)
+        self.right._create_iter(iterlist, arraylist, arr.right, transforms)
 
     def eval(self, frame, arr):
         from pypy.module.micronumpy.interp_numarray import Call2
         assert isinstance(arr, Call2)
         lhs = self.left.eval(frame, arr.left).convert_to(self.calc_dtype)
         rhs = self.right.eval(frame, arr.right).convert_to(self.calc_dtype)
+        
         return self.binfunc(self.calc_dtype, lhs, rhs)
 
     def debug_repr(self):
         return 'Call2(%s, %s, %s)' % (self.name, self.left.debug_repr(),
                                       self.right.debug_repr())
 
+class BroadcastLeft(Call2):
+    def _invent_numbering(self, cache, allnumbers):
+        self.left._invent_numbering(new_cache(), allnumbers)
+        self.right._invent_numbering(cache, allnumbers)
+    
+    def _create_iter(self, iterlist, arraylist, arr, transforms):
+        from pypy.module.micronumpy.interp_numarray import Call2
+
+        assert isinstance(arr, Call2)
+        ltransforms = transforms + [BroadcastTransform(arr.shape)]
+        self.left._create_iter(iterlist, arraylist, arr.left, ltransforms)
+        self.right._create_iter(iterlist, arraylist, arr.right, transforms)
+
+class BroadcastRight(Call2):
+    def _invent_numbering(self, cache, allnumbers):
+        self.left._invent_numbering(cache, allnumbers)
+        self.right._invent_numbering(new_cache(), allnumbers)
+
+    def _create_iter(self, iterlist, arraylist, arr, transforms):
+        from pypy.module.micronumpy.interp_numarray import Call2
+
+        assert isinstance(arr, Call2)
+        rtransforms = transforms + [BroadcastTransform(arr.shape)]
+        self.left._create_iter(iterlist, arraylist, arr.left, transforms)
+        self.right._create_iter(iterlist, arraylist, arr.right, rtransforms)
+
+class BroadcastBoth(Call2):
+    def _invent_numbering(self, cache, allnumbers):
+        self.left._invent_numbering(new_cache(), allnumbers)
+        self.right._invent_numbering(new_cache(), allnumbers)
+
+    def _create_iter(self, iterlist, arraylist, arr, transforms):
+        from pypy.module.micronumpy.interp_numarray import Call2
+
+        assert isinstance(arr, Call2)
+        rtransforms = transforms + [BroadcastTransform(arr.shape)]
+        ltransforms = transforms + [BroadcastTransform(arr.shape)]
+        self.left._create_iter(iterlist, arraylist, arr.left, ltransforms)
+        self.right._create_iter(iterlist, arraylist, arr.right, rtransforms)
+
 class ReduceSignature(Call2):
-    def _create_iter(self, iterlist, arraylist, arr, res_shape, chunklist):
-        self.right._create_iter(iterlist, arraylist, arr, res_shape, chunklist)
+    def _create_iter(self, iterlist, arraylist, arr, transforms):
+        self.right._create_iter(iterlist, arraylist, arr, transforms)
 
     def _invent_numbering(self, cache, allnumbers):
         self.right._invent_numbering(cache, allnumbers)
@@ -320,3 +390,63 @@
 
     def eval(self, frame, arr):
         return self.right.eval(frame, arr)
+
+    def debug_repr(self):
+        return 'ReduceSig(%s, %s)' % (self.name, self.right.debug_repr())
+
+class SliceloopSignature(Call2):
+    def eval(self, frame, arr):
+        from pypy.module.micronumpy.interp_numarray import Call2
+        
+        assert isinstance(arr, Call2)
+        ofs = frame.iterators[0].offset
+        arr.left.setitem(ofs, self.right.eval(frame, arr.right).convert_to(
+            self.calc_dtype))
+    
+    def debug_repr(self):
+        return 'SliceLoop(%s, %s, %s)' % (self.name, self.left.debug_repr(),
+                                          self.right.debug_repr())
+
+class SliceloopBroadcastSignature(SliceloopSignature):
+    def _invent_numbering(self, cache, allnumbers):
+        self.left._invent_numbering(new_cache(), allnumbers)
+        self.right._invent_numbering(cache, allnumbers)
+
+    def _create_iter(self, iterlist, arraylist, arr, transforms):
+        from pypy.module.micronumpy.interp_numarray import SliceArray
+
+        assert isinstance(arr, SliceArray)
+        rtransforms = transforms + [BroadcastTransform(arr.shape)]
+        self.left._create_iter(iterlist, arraylist, arr.left, transforms)
+        self.right._create_iter(iterlist, arraylist, arr.right, rtransforms)
+
+class AxisReduceSignature(Call2):
+    def _create_iter(self, iterlist, arraylist, arr, transforms):
+        from pypy.module.micronumpy.interp_numarray import AxisReduce,\
+             ConcreteArray
+
+        assert isinstance(arr, AxisReduce)
+        left = arr.left
+        assert isinstance(left, ConcreteArray)
+        iterlist.append(AxisIterator(left.start, arr.dim, arr.shape,
+                                     left.strides, left.backstrides))
+        self.right._create_iter(iterlist, arraylist, arr.right, transforms)
+
+    def _invent_numbering(self, cache, allnumbers):
+        allnumbers.append(0)
+        self.right._invent_numbering(cache, allnumbers)
+
+    def _invent_array_numbering(self, arr, cache):
+        from pypy.module.micronumpy.interp_numarray import AxisReduce
+
+        assert isinstance(arr, AxisReduce)
+        self.right._invent_array_numbering(arr.right, cache)
+
+    def eval(self, frame, arr):
+        from pypy.module.micronumpy.interp_numarray import AxisReduce
+
+        assert isinstance(arr, AxisReduce)
+        return self.right.eval(frame, arr.right).convert_to(self.calc_dtype)
+    
+    def debug_repr(self):
+        return 'AxisReduceSig(%s, %s)' % (self.name, self.right.debug_repr())
diff --git a/pypy/module/micronumpy/strides.py b/pypy/module/micronumpy/strides.py
--- a/pypy/module/micronumpy/strides.py
+++ b/pypy/module/micronumpy/strides.py
@@ -1,4 +1,9 @@
+from pypy.rlib import jit
 
+
+ at jit.look_inside_iff(lambda shape, start, strides, backstrides, chunks:
+    jit.isconstant(len(chunks))
+)
 def calculate_slice_strides(shape, start, strides, backstrides, chunks):
     rstrides = []
     rbackstrides = []
diff --git a/pypy/module/micronumpy/test/test_dtypes.py b/pypy/module/micronumpy/test/test_dtypes.py
--- a/pypy/module/micronumpy/test/test_dtypes.py
+++ b/pypy/module/micronumpy/test/test_dtypes.py
@@ -3,7 +3,7 @@
 
 class AppTestDtypes(BaseNumpyAppTest):
     def test_dtype(self):
-        from numpypy import dtype
+        from _numpypy import dtype
 
         d = dtype('?')
         assert d.num == 0
@@ -14,7 +14,7 @@
         raises(TypeError, dtype, 1042)
 
     def test_dtype_with_types(self):
-        from numpypy import dtype
+        from _numpypy import dtype
 
         assert dtype(bool).num == 0
         assert dtype(int).num == 7
@@ -22,13 +22,13 @@
         assert dtype(float).num == 12
 
     def test_array_dtype_attr(self):
-        from numpypy import array, dtype
+        from _numpypy import array, dtype
 
         a = array(range(5), long)
         assert a.dtype is dtype(long)
 
     def test_repr_str(self):
-        from numpypy import dtype
+        from _numpypy import dtype
 
         assert repr(dtype) == "<type 'numpypy.dtype'>"
         d = dtype('?')
@@ -36,7 +36,7 @@
         assert str(d) == "bool"
 
     def test_bool_array(self):
-        from numpypy import array, False_, True_
+        from _numpypy import array, False_, True_
 
         a = array([0, 1, 2, 2.5], dtype='?')
         assert a[0] is False_
@@ -44,7 +44,7 @@
             assert a[i] is True_
 
     def test_copy_array_with_dtype(self):
-        from numpypy import array, False_, True_, int64
+        from _numpypy import array, False_, True_, int64
 
         a = array([0, 1, 2, 3], dtype=long)
         # int on 64-bit, long in 32-bit
@@ -58,35 +58,35 @@
         assert b[0] is False_
 
     def test_zeros_bool(self):
-        from numpypy import zeros, False_
+        from _numpypy import zeros, False_
 
         a = zeros(10, dtype=bool)
         for i in range(10):
             assert a[i] is False_
 
     def test_ones_bool(self):
-        from numpypy import ones, True_
+        from _numpypy import ones, True_
 
         a = ones(10, dtype=bool)
         for i in range(10):
             assert a[i] is True_
 
     def test_zeros_long(self):
-        from numpypy import zeros, int64
+        from _numpypy import zeros, int64
         a = zeros(10, dtype=long)
         for i in range(10):
             assert isinstance(a[i], int64)
             assert a[1] == 0
 
     def test_ones_long(self):
-        from numpypy import ones, int64
+        from _numpypy import ones, int64
         a = ones(10, dtype=long)
         for i in range(10):
             assert isinstance(a[i], int64)
             assert a[1] == 1
 
     def test_overflow(self):
-        from numpypy import array, dtype
+        from _numpypy import array, dtype
         assert array([128], 'b')[0] == -128
         assert array([256], 'B')[0] == 0
         assert array([32768], 'h')[0] == -32768
@@ -98,7 +98,7 @@
         raises(OverflowError, "array([2**64], 'Q')")
 
     def test_bool_binop_types(self):
-        from numpypy import array, dtype
+        from _numpypy import array, dtype
         types = [
             '?', 'b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q', 'f', 'd'
         ]
@@ -107,7 +107,7 @@
             assert (a + array([0], t)).dtype is dtype(t)
 
     def test_binop_types(self):
-        from numpypy import array, dtype
+        from _numpypy import array, dtype
         tests = [('b','B','h'), ('b','h','h'), ('b','H','i'), ('b','i','i'),
                  ('b','l','l'), ('b','q','q'), ('b','Q','d'), ('B','h','h'),
                  ('B','H','H'), ('B','i','i'), ('B','I','I'), ('B','l','l'),
@@ -129,7 +129,7 @@
             assert (array([1], d1) + array([1], d2)).dtype is dtype(dout)
 
     def test_add_int8(self):
-        from numpypy import array, dtype
+        from _numpypy import array, dtype
 
         a = array(range(5), dtype="int8")
         b = a + a
@@ -138,7 +138,7 @@
             assert b[i] == i * 2
 
     def test_add_int16(self):
-        from numpypy import array, dtype
+        from _numpypy import array, dtype
 
         a = array(range(5), dtype="int16")
         b = a + a
@@ -147,7 +147,7 @@
             assert b[i] == i * 2
 
     def test_add_uint32(self):
-        from numpypy import array, dtype
+        from _numpypy import array, dtype
 
         a = array(range(5), dtype="I")
         b = a + a
@@ -156,19 +156,28 @@
             assert b[i] == i * 2
 
     def test_shape(self):
-        from numpypy import dtype
+        from _numpypy import dtype
 
         assert dtype(long).shape == ()
 
     def test_cant_subclass(self):
-        from numpypy import dtype
+        from _numpypy import dtype
 
         # You can't subclass dtype
         raises(TypeError, type, "Foo", (dtype,), {})
 
+    def test_new(self):
+        import _numpypy as np
+        assert np.int_(4) == 4
+        assert np.float_(3.4) == 3.4
+
+    def test_pow(self):
+        from _numpypy import int_
+        assert int_(4) ** 2 == 16
+
 class AppTestTypes(BaseNumpyAppTest):
     def test_abstract_types(self):
-        import numpypy as numpy
+        import _numpypy as numpy
         raises(TypeError, numpy.generic, 0)
         raises(TypeError, numpy.number, 0)
         raises(TypeError, numpy.integer, 0)
@@ -181,7 +190,7 @@
         raises(TypeError, numpy.inexact, 0)
 
     def test_bool(self):
-        import numpypy as numpy
+        import _numpypy as numpy
 
         assert numpy.bool_.mro() == [numpy.bool_, numpy.generic, object]
         assert numpy.bool_(3) is numpy.True_
@@ -196,7 +205,7 @@
         assert numpy.bool_("False") is numpy.True_
 
     def test_int8(self):
-        import numpypy as numpy
+        import _numpypy as numpy
 
         assert numpy.int8.mro() == [numpy.int8, numpy.signedinteger, numpy.integer, numpy.number, numpy.generic, object]
 
@@ -218,7 +227,7 @@
         assert numpy.int8('128') == -128
 
     def test_uint8(self):
-        import numpypy as numpy
+        import _numpypy as numpy
 
         assert numpy.uint8.mro() == [numpy.uint8, numpy.unsignedinteger, numpy.integer, numpy.number, numpy.generic, object]
 
@@ -241,7 +250,7 @@
         assert numpy.uint8('256') == 0
 
     def test_int16(self):
-        import numpypy as numpy
+        import _numpypy as numpy
 
         x = numpy.int16(3)
         assert x == 3
@@ -251,7 +260,7 @@
         assert numpy.int16('32768') == -32768
 
     def test_uint16(self):
-        import numpypy as numpy
+        import _numpypy as numpy
 
         assert numpy.uint16(65535) == 65535
         assert numpy.uint16(65536) == 0
@@ -260,7 +269,7 @@
 
     def test_int32(self):
         import sys
-        import numpypy as numpy
+        import _numpypy as numpy
 
         x = numpy.int32(23)
         assert x == 23
@@ -275,7 +284,7 @@
 
     def test_uint32(self):
         import sys
-        import numpypy as numpy
+        import _numpypy as numpy
 
         assert numpy.uint32(10) == 10
 
@@ -286,14 +295,14 @@
             assert numpy.uint32('4294967296') == 0
 
     def test_int_(self):
-        import numpypy as numpy
+        import _numpypy as numpy
 
         assert numpy.int_ is numpy.dtype(int).type
         assert numpy.int_.mro() == [numpy.int_, numpy.signedinteger, numpy.integer, numpy.number, numpy.generic, int, object]
 
     def test_int64(self):
         import sys
-        import numpypy as numpy
+        import _numpypy as numpy
 
         if sys.maxint == 2 ** 63 -1:
             assert numpy.int64.mro() == [numpy.int64, numpy.signedinteger, numpy.integer, numpy.number, numpy.generic, int, object]
@@ -315,7 +324,7 @@
 
     def test_uint64(self):
         import sys
-        import numpypy as numpy
+        import _numpypy as numpy
 
         assert numpy.uint64.mro() == [numpy.uint64, numpy.unsignedinteger, numpy.integer, numpy.number, numpy.generic, object]
 
@@ -330,7 +339,7 @@
         raises(OverflowError, numpy.uint64(18446744073709551616))
 
     def test_float32(self):
-        import numpypy as numpy
+        import _numpypy as numpy
 
         assert numpy.float32.mro() == [numpy.float32, numpy.floating, numpy.inexact, numpy.number, numpy.generic, object]
 
@@ -339,7 +348,7 @@
         raises(ValueError, numpy.float32, '23.2df')
 
     def test_float64(self):
-        import numpypy as numpy
+        import _numpypy as numpy
 
         assert numpy.float64.mro() == [numpy.float64, numpy.floating, numpy.inexact, numpy.number, numpy.generic, float, object]
 
@@ -352,7 +361,7 @@
         raises(ValueError, numpy.float64, '23.2df')
 
     def test_subclass_type(self):
-        import numpypy as numpy
+        import _numpypy as numpy
 
         class X(numpy.float64):
             def m(self):
diff --git a/pypy/module/micronumpy/test/test_module.py b/pypy/module/micronumpy/test/test_module.py
--- a/pypy/module/micronumpy/test/test_module.py
+++ b/pypy/module/micronumpy/test/test_module.py
@@ -3,33 +3,33 @@
 
 class AppTestNumPyModule(BaseNumpyAppTest):
     def test_mean(self):
-        from numpypy import array, mean
+        from _numpypy import array, mean
         assert mean(array(range(5))) == 2.0
         assert mean(range(5)) == 2.0
 
     def test_average(self):
-        from numpypy import array, average
+        from _numpypy import array, average
         assert average(range(10)) == 4.5
         assert average(array(range(10))) == 4.5
         
     def test_sum(self):
-        from numpypy import array, sum
+        from _numpypy import array, sum
         assert sum(range(10)) == 45
         assert sum(array(range(10))) == 45
 
     def test_min(self):
-        from numpypy import array, min
+        from _numpypy import array, min
         assert min(range(10)) == 0
         assert min(array(range(10))) == 0
         
     def test_max(self):
-        from numpypy import array, max
+        from _numpypy import array, max
         assert max(range(10)) == 9
         assert max(array(range(10))) == 9
 
     def test_constants(self):
         import math
-        from numpypy import inf, e, pi
+        from _numpypy import inf, e, pi
         assert type(inf) is float
         assert inf == float("inf")
         assert e == math.e
diff --git a/pypy/module/micronumpy/test/test_numarray.py b/pypy/module/micronumpy/test/test_numarray.py
--- a/pypy/module/micronumpy/test/test_numarray.py
+++ b/pypy/module/micronumpy/test/test_numarray.py
@@ -157,10 +157,13 @@
         assert calc_new_strides([2, 3, 4], [8, 3], [1, 16]) is None
         assert calc_new_strides([24], [2, 4, 3], [48, 6, 1]) is None
         assert calc_new_strides([24], [2, 4, 3], [24, 6, 2]) == [2]
+        assert calc_new_strides([105, 1], [3, 5, 7], [35, 7, 1]) == [1, 1]
+        assert calc_new_strides([1, 105], [3, 5, 7], [35, 7, 1]) == [105, 1]
+
 
 class AppTestNumArray(BaseNumpyAppTest):
     def test_ndarray(self):
-        from numpypy import ndarray, array, dtype
+        from _numpypy import ndarray, array, dtype
 
         assert type(ndarray) is type
         assert type(array) is not type
@@ -175,12 +178,26 @@
         assert a.dtype is dtype(int)
 
     def test_type(self):
-        from numpypy import array
+        from _numpypy import array
         ar = array(range(5))
         assert type(ar) is type(ar + ar)
 
+    def test_ndim(self):
+        from _numpypy import array
+        x = array(0.2)
+        assert x.ndim == 0
+        x = array([1, 2])
+        assert x.ndim == 1
+        x = array([[1, 2], [3, 4]])
+        assert x.ndim == 2
+        x = array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
+        assert x.ndim == 3
+        # numpy actually raises an AttributeError, but _numpypy raises an
+        # TypeError
+        raises(TypeError, 'x.ndim = 3')
+
     def test_init(self):
-        from numpypy import zeros
+        from _numpypy import zeros
         a = zeros(15)
         # Check that storage was actually zero'd.
         assert a[10] == 0.0
@@ -189,7 +206,7 @@
         assert a[13] == 5.3
 
     def test_size(self):
-        from numpypy import array
+        from _numpypy import array
         assert array(3).size == 1
         a = array([1, 2, 3])
         assert a.size == 3
@@ -200,13 +217,13 @@
         Test that empty() works.
         """
 
-        from numpypy import empty
+        from _numpypy import empty
         a = empty(2)
         a[1] = 1.0
         assert a[1] == 1.0
 
     def test_ones(self):
-        from numpypy import ones
+        from _numpypy import ones
         a = ones(3)
         assert len(a) == 3
         assert a[0] == 1
@@ -215,7 +232,7 @@
         assert a[2] == 4
 
     def test_copy(self):
-        from numpypy import arange, array
+        from _numpypy import arange, array
         a = arange(5)
         b = a.copy()
         for i in xrange(5):
@@ -231,13 +248,17 @@
         c = b.copy()
         assert (c == b).all()
 
+        a = arange(15).reshape(5,3)
+        b = a.copy()
+        assert (b == a).all()
+
     def test_iterator_init(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         assert a[3] == 3
 
     def test_getitem(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         raises(IndexError, "a[5]")
         a = a + a
@@ -246,7 +267,7 @@
         raises(IndexError, "a[-6]")
 
     def test_getitem_tuple(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         raises(IndexError, "a[(1,2)]")
         for i in xrange(5):
@@ -256,7 +277,7 @@
             assert a[i] == b[i]
 
     def test_setitem(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         a[-1] = 5.0
         assert a[4] == 5.0
@@ -264,7 +285,7 @@
         raises(IndexError, "a[-6] = 3.0")
 
     def test_setitem_tuple(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         raises(IndexError, "a[(1,2)] = [0,1]")
         for i in xrange(5):
@@ -275,7 +296,7 @@
             assert a[i] == i
 
     def test_setslice_array(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         b = array(range(2))
         a[1:4:2] = b
@@ -286,7 +307,7 @@
         assert b[1] == 0.
 
     def test_setslice_of_slice_array(self):
-        from numpypy import array, zeros
+        from _numpypy import array, zeros
         a = zeros(5)
         a[::2] = array([9., 10., 11.])
         assert a[0] == 9.
@@ -305,7 +326,7 @@
         assert a[0] == 3.
 
     def test_setslice_list(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5), float)
         b = [0., 1.]
         a[1:4:2] = b
@@ -313,14 +334,14 @@
         assert a[3] == 1.
 
     def test_setslice_constant(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5), float)
         a[1:4:2] = 0.
         assert a[1] == 0.
         assert a[3] == 0.
 
     def test_scalar(self):
-        from numpypy import array, dtype
+        from _numpypy import array, dtype
         a = array(3)
         raises(IndexError, "a[0]")
         raises(IndexError, "a[0] = 5")
@@ -329,13 +350,13 @@
         assert a.dtype is dtype(int)
 
     def test_len(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         assert len(a) == 5
         assert len(a + a) == 5
 
     def test_shape(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         assert a.shape == (5,)
         b = a + a
@@ -344,7 +365,7 @@
         assert c.shape == (3,)
 
     def test_set_shape(self):
-        from numpypy import array, zeros
+        from _numpypy import array, zeros
         a = array([])
         a.shape = []
         a = array(range(12))
@@ -364,7 +385,7 @@
         a.shape = (1,)
 
     def test_reshape(self):
-        from numpypy import array, zeros
+        from _numpypy import array, zeros
         a = array(range(12))
         exc = raises(ValueError, "b = a.reshape((3, 10))")
         assert str(exc.value) == "total size of new array must be unchanged"
@@ -377,7 +398,7 @@
         a.shape = (12, 2)
 
     def test_slice_reshape(self):
-        from numpypy import zeros, arange
+        from _numpypy import zeros, arange
         a = zeros((4, 2, 3))
         b = a[::2, :, :]
         b.shape = (2, 6)
@@ -413,13 +434,13 @@
         raises(ValueError, arange(10).reshape, (5, -1, -1))
 
     def test_reshape_varargs(self):
-        from numpypy import arange
+        from _numpypy import arange
         z = arange(96).reshape(12, -1)
         y = z.reshape(4, 3, 8)
         assert y.shape == (4, 3, 8)
 
     def test_add(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         b = a + a
         for i in range(5):
@@ -432,7 +453,7 @@
             assert c[i] == bool(a[i] + b[i])
 
     def test_add_other(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         b = array([i for i in reversed(range(5))])
         c = a + b
@@ -440,20 +461,20 @@
             assert c[i] == 4
 
     def test_add_constant(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         b = a + 5
         for i in range(5):
             assert b[i] == i + 5
 
     def test_radd(self):
-        from numpypy import array
+        from _numpypy import array
         r = 3 + array(range(3))
         for i in range(3):
             assert r[i] == i + 3
 
     def test_add_list(self):
-        from numpypy import array, ndarray
+        from _numpypy import array, ndarray
         a = array(range(5))
         b = list(reversed(range(5)))
         c = a + b
@@ -462,14 +483,14 @@
             assert c[i] == 4
 
     def test_subtract(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         b = a - a
         for i in range(5):
             assert b[i] == 0
 
     def test_subtract_other(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         b = array([1, 1, 1, 1, 1])
         c = a - b
@@ -477,35 +498,35 @@
             assert c[i] == i - 1
 
     def test_subtract_constant(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         b = a - 5
         for i in range(5):
             assert b[i] == i - 5
 
     def test_scalar_subtract(self):
-        from numpypy import int32
+        from _numpypy import int32
         assert int32(2) - 1 == 1
         assert 1 - int32(2) == -1
 
     def test_mul(self):
-        import numpypy
+        import _numpypy
 
-        a = numpypy.array(range(5))
+        a = _numpypy.array(range(5))
         b = a * a
         for i in range(5):
             assert b[i] == i * i
         assert b.dtype is numpypy.dtype(int)
 
-        a = numpypy.array(range(5), dtype=bool)
+        a = _numpypy.array(range(5), dtype=bool)
         b = a * a
-        assert b.dtype is numpypy.dtype(bool)
-        assert b[0] is numpypy.False_
+        assert b.dtype is _numpypy.dtype(bool)
+        assert b[0] is _numpypy.False_
         for i in range(1, 5):
-            assert b[i] is numpypy.True_
+            assert b[i] is _numpypy.True_
 
     def test_mul_constant(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         b = a * 5
         for i in range(5):
@@ -513,7 +534,7 @@
 
     def test_div(self):
         from math import isnan
-        from numpypy import array, dtype, inf
+        from _numpypy import array, dtype, inf
 
         a = array(range(1, 6))
         b = a / a
@@ -545,7 +566,7 @@
         assert c[2] == -inf
 
     def test_div_other(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         b = array([2, 2, 2, 2, 2], float)
         c = a / b
@@ -553,14 +574,14 @@
             assert c[i] == i / 2.0
 
     def test_div_constant(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         b = a / 5.0
         for i in range(5):
             assert b[i] == i / 5.0
 
     def test_pow(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5), float)
         b = a ** a
         for i in range(5):
@@ -570,7 +591,7 @@
         assert (a ** 2 == a * a).all()
 
     def test_pow_other(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5), float)
         b = array([2, 2, 2, 2, 2])
         c = a ** b
@@ -578,14 +599,14 @@
             assert c[i] == i ** 2
 
     def test_pow_constant(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5), float)
         b = a ** 2
         for i in range(5):
             assert b[i] == i ** 2
 
     def test_mod(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(1, 6))
         b = a % a
         for i in range(5):
@@ -598,7 +619,7 @@
             assert b[i] == 1
 
     def test_mod_other(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         b = array([2, 2, 2, 2, 2])
         c = a % b
@@ -606,14 +627,14 @@
             assert c[i] == i % 2
 
     def test_mod_constant(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         b = a % 2
         for i in range(5):
             assert b[i] == i % 2
 
     def test_pos(self):
-        from numpypy import array
+        from _numpypy import array
         a = array([1., -2., 3., -4., -5.])
         b = +a
         for i in range(5):
@@ -624,7 +645,7 @@
             assert a[i] == i
 
     def test_neg(self):
-        from numpypy import array
+        from _numpypy import array
         a = array([1., -2., 3., -4., -5.])
         b = -a
         for i in range(5):
@@ -635,7 +656,7 @@
             assert a[i] == -i
 
     def test_abs(self):
-        from numpypy import array
+        from _numpypy import array
         a = array([1., -2., 3., -4., -5.])
         b = abs(a)
         for i in range(5):
@@ -646,7 +667,7 @@
             assert a[i + 5] == abs(i)
 
     def test_auto_force(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         b = a - 1
         a[2] = 3
@@ -660,7 +681,7 @@
         assert c[1] == 4
 
     def test_getslice(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         s = a[1:5]
         assert len(s) == 4
@@ -674,7 +695,7 @@
         assert s[0] == 5
 
     def test_getslice_step(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(10))
         s = a[1:9:2]
         assert len(s) == 4
@@ -682,7 +703,7 @@
             assert s[i] == a[2 * i + 1]
 
     def test_slice_update(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         s = a[0:3]
         s[1] = 10
@@ -692,7 +713,7 @@
 
     def test_slice_invaidate(self):
         # check that slice shares invalidation list with
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         s = a[0:2]
         b = array([10, 11])
@@ -706,13 +727,18 @@
         assert d[1] == 12
 
     def test_mean(self):
-        from numpypy import array
+        from _numpypy import array, mean
         a = array(range(5))
         assert a.mean() == 2.0
         assert a[:4].mean() == 1.5
+        a = array(range(105)).reshape(3, 5, 7)
+        b = mean(a, axis=0)
+        b[0,0]==35.
+        assert (b == array(range(35, 70), dtype=float).reshape(5, 7)).all()
+        assert (mean(a, 2) == array(range(0, 15), dtype=float).reshape(3, 5) * 7 + 3).all()
 
     def test_sum(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         b = a.sum()
         assert b == 10
@@ -721,53 +747,78 @@
         a = array([True] * 5, bool)
         assert a.sum() == 5
 
+        raises(TypeError, 'a.sum(2, 3)')
+
+    def test_reduce_nd(self):
+        from numpypy import arange, array, multiply
+        a = arange(15).reshape(5, 3)
+        assert a.sum() == 105
+        assert a.max() == 14
+        assert array([]).sum() == 0.0
+        raises(ValueError, 'array([]).max()')
+        assert (a.sum(0) == [30, 35, 40]).all()
+        assert (a.sum(1) == [3, 12, 21, 30, 39]).all()
+        assert (a.max(0) == [12, 13, 14]).all()
+        assert (a.max(1) == [2, 5, 8, 11, 14]).all()
+        assert ((a + a).max() == 28)
+        assert ((a + a).max(0) == [24, 26, 28]).all()
+        assert ((a + a).sum(1) == [6, 24, 42, 60, 78]).all()
+        assert (multiply.reduce(a) == array([0, 3640, 12320])).all()
+        a = array(range(105)).reshape(3, 5, 7)
+        assert (a[:, 1, :].sum(0) == [126, 129, 132, 135, 138, 141, 144]).all()
+        assert (a[:, 1, :].sum(1) == [70, 315, 560]).all()
+        raises (ValueError, 'a[:, 1, :].sum(2)')
+        assert ((a + a).T.sum(2).T == (a + a).sum(0)).all()
+        assert (a.reshape(1,-1).sum(0) == range(105)).all()
+        assert (a.reshape(1,-1).sum(1) == 5460)
+
     def test_identity(self):
-        from numpypy import identity, array
-        from numpypy import int32, float64, dtype
+        from _numpypy import identity, array
+        from _numpypy import int32, float64, dtype
         a = identity(0)
         assert len(a) == 0
         assert a.dtype == dtype('float64')
-        assert a.shape == (0,0)
+        assert a.shape == (0, 0)
         b = identity(1, dtype=int32)
         assert len(b) == 1
         assert b[0][0] == 1
-        assert b.shape == (1,1)
+        assert b.shape == (1, 1)
         assert b.dtype == dtype('int32')
         c = identity(2)
-        assert c.shape == (2,2)
-        assert (c == [[1,0],[0,1]]).all()
+        assert c.shape == (2, 2)
+        assert (c == [[1, 0], [0, 1]]).all()
         d = identity(3, dtype='int32')
-        assert d.shape == (3,3)
+        assert d.shape == (3, 3)
         assert d.dtype == dtype('int32')
-        assert (d == [[1,0,0],[0,1,0],[0,0,1]]).all()
+        assert (d == [[1, 0, 0], [0, 1, 0], [0, 0, 1]]).all()
 
     def test_prod(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(1, 6))
         assert a.prod() == 120.0
         assert a[:4].prod() == 24.0
 
     def test_max(self):
-        from numpypy import array
+        from _numpypy import array
         a = array([-1.2, 3.4, 5.7, -3.0, 2.7])
         assert a.max() == 5.7
         b = array([])
         raises(ValueError, "b.max()")
 
     def test_max_add(self):
-        from numpypy import array
+        from _numpypy import array
         a = array([-1.2, 3.4, 5.7, -3.0, 2.7])
         assert (a + a).max() == 11.4
 
     def test_min(self):
-        from numpypy import array
+        from _numpypy import array
         a = array([-1.2, 3.4, 5.7, -3.0, 2.7])
         assert a.min() == -3.0
         b = array([])
         raises(ValueError, "b.min()")
 
     def test_argmax(self):
-        from numpypy import array
+        from _numpypy import array
         a = array([-1.2, 3.4, 5.7, -3.0, 2.7])
         r = a.argmax()
         assert r == 2
@@ -788,14 +839,14 @@
         assert a.argmax() == 2
 
     def test_argmin(self):
-        from numpypy import array
+        from _numpypy import array
         a = array([-1.2, 3.4, 5.7, -3.0, 2.7])
         assert a.argmin() == 3
         b = array([])
         raises(ValueError, "b.argmin()")
 
     def test_all(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         assert a.all() == False
         a[0] = 3.0
@@ -804,7 +855,7 @@
         assert b.all() == True
 
     def test_any(self):
-        from numpypy import array, zeros
+        from _numpypy import array, zeros
         a = array(range(5))
         assert a.any() == True
         b = zeros(5)
@@ -813,7 +864,7 @@
         assert c.any() == False
 
     def test_dot(self):
-        from numpypy import array, dot
+        from _numpypy import array, dot
         a = array(range(5))
         assert a.dot(a) == 30.0
 
@@ -839,14 +890,14 @@
                    [[86, 302, 518], [110, 390, 670], [134, 478, 822]]]).all()
 
     def test_dot_constant(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         b = a.dot(2.5)
         for i in xrange(5):
             assert b[i] == 2.5 * a[i]
 
     def test_dtype_guessing(self):
-        from numpypy import array, dtype, float64, int8, bool_
+        from _numpypy import array, dtype, float64, int8, bool_
 
         assert array([True]).dtype is dtype(bool)
         assert array([True, False]).dtype is dtype(bool)
@@ -863,7 +914,7 @@
 
     def test_comparison(self):
         import operator
-        from numpypy import array, dtype
+        from _numpypy import array, dtype
 
         a = array(range(5))
         b = array(range(5), float)
@@ -882,7 +933,7 @@
                 assert c[i] == func(b[i], 3)
 
     def test_nonzero(self):
-        from numpypy import array
+        from _numpypy import array
         a = array([1, 2])
         raises(ValueError, bool, a)
         raises(ValueError, bool, a == a)
@@ -892,7 +943,7 @@
         assert not bool(array([0]))
 
     def test_slice_assignment(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         a[::-1] = a
         assert (a == [0, 1, 2, 1, 0]).all()
@@ -902,8 +953,8 @@
         assert (a == [8, 6, 4, 2, 0]).all()
 
     def test_debug_repr(self):
-        from numpypy import zeros, sin
-        from numpypy.pypy import debug_repr
+        from _numpypy import zeros, sin
+        from _numpypy.pypy import debug_repr
         a = zeros(1)
         assert debug_repr(a) == 'Array'
         assert debug_repr(a + a) == 'Call2(add, Array, Array)'
@@ -916,8 +967,17 @@
         b[0] = 3
         assert debug_repr(b) == 'Array'
 
+    def test_remove_invalidates(self):
+        from _numpypy import array
+        from _numpypy.pypy import remove_invalidates
+        a = array([1, 2, 3])
+        b = a + a
+        remove_invalidates(a)
+        a[0] = 14
+        assert b[0] == 28
+
     def test_virtual_views(self):
-        from numpypy import arange
+        from _numpypy import arange
         a = arange(15)
         c = (a + a)
         d = c[::2]
@@ -935,7 +995,7 @@
         assert b[1] == 2
 
     def test_tolist_scalar(self):
-        from numpypy import int32, bool_
+        from _numpypy import int32, bool_
         x = int32(23)
         assert x.tolist() == 23
         assert type(x.tolist()) is int
@@ -943,13 +1003,13 @@
         assert y.tolist() is True
 
     def test_tolist_zerodim(self):
-        from numpypy import array
+        from _numpypy import array
         x = array(3)
         assert x.tolist() == 3
         assert type(x.tolist()) is int
 
     def test_tolist_singledim(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(range(5))
         assert a.tolist() == [0, 1, 2, 3, 4]
         assert type(a.tolist()[0]) is int
@@ -957,41 +1017,55 @@
         assert b.tolist() == [0.2, 0.4, 0.6]
 
     def test_tolist_multidim(self):
-        from numpypy import array
+        from _numpypy import array
         a = array([[1, 2], [3, 4]])
         assert a.tolist() == [[1, 2], [3, 4]]
 
     def test_tolist_view(self):
-        from numpypy import array
-        a = array([[1,2],[3,4]])
+        from _numpypy import array
+        a = array([[1, 2], [3, 4]])
         assert (a + a).tolist() == [[2, 4], [6, 8]]
 
     def test_tolist_slice(self):
-        from numpypy import array
+        from _numpypy import array
         a = array([[17.1, 27.2], [40.3, 50.3]])
-        assert a[:,0].tolist() == [17.1, 40.3]
+        assert a[:, 0].tolist() == [17.1, 40.3]
         assert a[0].tolist() == [17.1, 27.2]
 
+    def test_var(self):
+        from _numpypy import array
+        a = array(range(10))
+        assert a.var() == 8.25
+        a = array([5.0])
+        assert a.var() == 0.0
+
+    def test_std(self):
+        from _numpypy import array
+        a = array(range(10))
+        assert a.std() == 2.8722813232690143
+        a = array([5.0])
+        assert a.std() == 0.0
+
 
 class AppTestMultiDim(BaseNumpyAppTest):
     def test_init(self):
-        import numpypy
-        a = numpypy.zeros((2, 2))
+        import _numpypy
+        a = _numpypy.zeros((2, 2))
         assert len(a) == 2
 
     def test_shape(self):
-        import numpypy
-        assert numpypy.zeros(1).shape == (1,)
-        assert numpypy.zeros((2, 2)).shape == (2, 2)
-        assert numpypy.zeros((3, 1, 2)).shape == (3, 1, 2)
-        assert numpypy.array([[1], [2], [3]]).shape == (3, 1)
-        assert len(numpypy.zeros((3, 1, 2))) == 3
-        raises(TypeError, len, numpypy.zeros(()))
-        raises(ValueError, numpypy.array, [[1, 2], 3])
+        import _numpypy
+        assert _numpypy.zeros(1).shape == (1,)
+        assert _numpypy.zeros((2, 2)).shape == (2, 2)
+        assert _numpypy.zeros((3, 1, 2)).shape == (3, 1, 2)
+        assert _numpypy.array([[1], [2], [3]]).shape == (3, 1)
+        assert len(_numpypy.zeros((3, 1, 2))) == 3
+        raises(TypeError, len, _numpypy.zeros(()))
+        raises(ValueError, _numpypy.array, [[1, 2], 3])
 
     def test_getsetitem(self):
-        import numpypy
-        a = numpypy.zeros((2, 3, 1))
+        import _numpypy
+        a = _numpypy.zeros((2, 3, 1))
         raises(IndexError, a.__getitem__, (2, 0, 0))
         raises(IndexError, a.__getitem__, (0, 3, 0))
         raises(IndexError, a.__getitem__, (0, 0, 1))
@@ -1002,8 +1076,8 @@
         assert a[1, -1, 0] == 3
 
     def test_slices(self):
-        import numpypy
-        a = numpypy.zeros((4, 3, 2))
+        import _numpypy
+        a = _numpypy.zeros((4, 3, 2))
         raises(IndexError, a.__getitem__, (4,))
         raises(IndexError, a.__getitem__, (3, 3))
         raises(IndexError, a.__getitem__, (slice(None), 3))
@@ -1036,51 +1110,51 @@
         assert a[1][2][1] == 15
 
     def test_init_2(self):
-        import numpypy
-        raises(ValueError, numpypy.array, [[1], 2])
-        raises(ValueError, numpypy.array, [[1, 2], [3]])
-        raises(ValueError, numpypy.array, [[[1, 2], [3, 4], 5]])
-        raises(ValueError, numpypy.array, [[[1, 2], [3, 4], [5]]])
-        a = numpypy.array([[1, 2], [4, 5]])
+        import _numpypy
+        raises(ValueError, _numpypy.array, [[1], 2])
+        raises(ValueError, _numpypy.array, [[1, 2], [3]])
+        raises(ValueError, _numpypy.array, [[[1, 2], [3, 4], 5]])
+        raises(ValueError, _numpypy.array, [[[1, 2], [3, 4], [5]]])
+        a = _numpypy.array([[1, 2], [4, 5]])
         assert a[0, 1] == 2
         assert a[0][1] == 2
-        a = numpypy.array(([[[1, 2], [3, 4], [5, 6]]]))
+        a = _numpypy.array(([[[1, 2], [3, 4], [5, 6]]]))
         assert (a[0, 1] == [3, 4]).all()
 
     def test_setitem_slice(self):
-        import numpypy
-        a = numpypy.zeros((3, 4))
+        import _numpypy
+        a = _numpypy.zeros((3, 4))
         a[1] = [1, 2, 3, 4]
         assert a[1, 2] == 3
         raises(TypeError, a[1].__setitem__, [1, 2, 3])
-        a = numpypy.array([[1, 2], [3, 4]])
+        a = _numpypy.array([[1, 2], [3, 4]])
         assert (a == [[1, 2], [3, 4]]).all()
-        a[1] = numpypy.array([5, 6])
+        a[1] = _numpypy.array([5, 6])
         assert (a == [[1, 2], [5, 6]]).all()
-        a[:, 1] = numpypy.array([8, 10])
+        a[:, 1] = _numpypy.array([8, 10])
         assert (a == [[1, 8], [5, 10]]).all()
-        a[0, :: -1] = numpypy.array([11, 12])
+        a[0, :: -1] = _numpypy.array([11, 12])
         assert (a == [[12, 11], [5, 10]]).all()
 
     def test_ufunc(self):
-        from numpypy import array
+        from _numpypy import array
         a = array([[1, 2], [3, 4], [5, 6]])
         assert ((a + a) == \
             array([[1 + 1, 2 + 2], [3 + 3, 4 + 4], [5 + 5, 6 + 6]])).all()
 
     def test_getitem_add(self):
-        from numpypy import array
+        from _numpypy import array
         a = array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]])
         assert (a + a)[1, 1] == 8
 
     def test_ufunc_negative(self):
-        from numpypy import array, negative
+        from _numpypy import array, negative
         a = array([[1, 2], [3, 4]])
         b = negative(a + a)
         assert (b == [[-2, -4], [-6, -8]]).all()
 
     def test_getitem_3(self):
-        from numpypy import array
+        from _numpypy import array
         a = array([[1, 2], [3, 4], [5, 6], [7, 8],
                    [9, 10], [11, 12], [13, 14]])
         b = a[::2]
@@ -1091,37 +1165,37 @@
         assert c[1][1] == 12
 
     def test_multidim_ones(self):
-        from numpypy import ones
+        from _numpypy import ones
         a = ones((1, 2, 3))
         assert a[0, 1, 2] == 1.0
 
     def test_multidim_setslice(self):
-        from numpypy import zeros, ones
+        from _numpypy import zeros, ones
         a = zeros((3, 3))
         b = ones((3, 3))
-        a[:,1:3] = b[:,1:3]
+        a[:, 1:3] = b[:, 1:3]
         assert (a == [[0, 1, 1], [0, 1, 1], [0, 1, 1]]).all()
         a = zeros((3, 3))
         b = ones((3, 3))
-        a[:,::2] = b[:,::2]
+        a[:, ::2] = b[:, ::2]
         assert (a == [[1, 0, 1], [1, 0, 1], [1, 0, 1]]).all()
 
     def test_broadcast_ufunc(self):
-        from numpypy import array
+        from _numpypy import array
         a = array([[1, 2], [3, 4], [5, 6]])
         b = array([5, 6])
         c = ((a + b) == [[1 + 5, 2 + 6], [3 + 5, 4 + 6], [5 + 5, 6 + 6]])
         assert c.all()
 
     def test_broadcast_setslice(self):
-        from numpypy import zeros, ones
+        from _numpypy import zeros, ones
         a = zeros((10, 10))
         b = ones(10)
         a[:, :] = b
         assert a[3, 5] == 1
 
     def test_broadcast_shape_agreement(self):
-        from numpypy import zeros, array
+        from _numpypy import zeros, array
         a = zeros((3, 1, 3))
         b = array(((10, 11, 12), (20, 21, 22), (30, 31, 32)))
         c = ((a + b) == [b, b, b])
@@ -1135,7 +1209,7 @@
         assert c.all()
 
     def test_broadcast_scalar(self):
-        from numpypy import zeros
+        from _numpypy import zeros
         a = zeros((4, 5), 'd')
         a[:, 1] = 3
         assert a[2, 1] == 3
@@ -1146,14 +1220,14 @@
         assert a[3, 2] == 0
 
     def test_broadcast_call2(self):
-        from numpypy import zeros, ones
+        from _numpypy import zeros, ones
         a = zeros((4, 1, 5))
         b = ones((4, 3, 5))
         b[:] = (a + a)
         assert (b == zeros((4, 3, 5))).all()
 
     def test_broadcast_virtualview(self):
-        from numpypy import arange, zeros
+        from _numpypy import arange, zeros
         a = arange(8).reshape([2, 2, 2])
         b = (a + a)[1, 1]
         c = zeros((2, 2, 2))
@@ -1161,13 +1235,13 @@
         assert (c == [[[12, 14], [12, 14]], [[12, 14], [12, 14]]]).all()
 
     def test_argmax(self):
-        from numpypy import array
+        from _numpypy import array
         a = array([[1, 2], [3, 4], [5, 6]])
         assert a.argmax() == 5
         assert a[:2, ].argmax() == 3
 
     def test_broadcast_wrong_shapes(self):
-        from numpypy import zeros
+        from _numpypy import zeros
         a = zeros((4, 3, 2))
         b = zeros((4, 2))
         exc = raises(ValueError, lambda: a + b)
@@ -1175,7 +1249,7 @@
             " together with shapes (4,3,2) (4,2)"
 
     def test_reduce(self):
-        from numpypy import array
+        from _numpypy import array
         a = array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
         assert a.sum() == (13 * 12) / 2
         b = a[1:, 1::2]
@@ -1183,7 +1257,7 @@
         assert c.sum() == (6 + 8 + 10 + 12) * 2
 
     def test_transpose(self):
-        from numpypy import array
+        from _numpypy import array
         a = array(((range(3), range(3, 6)),
                    (range(6, 9), range(9, 12)),
                    (range(12, 15), range(15, 18)),
@@ -1202,7 +1276,7 @@
         assert(b[:, 0] == a[0, :]).all()
 
     def test_flatiter(self):
-        from numpypy import array, flatiter
+        from _numpypy import array, flatiter
         a = array([[10, 30], [40, 60]])
         f_iter = a.flat
         assert f_iter.next() == 10
@@ -1217,23 +1291,23 @@
         assert s == 140
 
     def test_flatiter_array_conv(self):
-        from numpypy import array, dot
+        from _numpypy import array, dot
         a = array([1, 2, 3])
         assert dot(a.flat, a.flat) == 14
 
     def test_flatiter_varray(self):
-        from numpypy import ones
+        from _numpypy import ones
         a = ones((2, 2))
         assert list(((a + a).flat)) == [2, 2, 2, 2]
 
     def test_slice_copy(self):
-        from numpypy import zeros
+        from _numpypy import zeros
         a = zeros((10, 10))
         b = a[0].copy()
         assert (b == zeros(10)).all()
 
     def test_array_interface(self):
-        from numpypy import array
+        from _numpypy import array
         a = array([1, 2, 3])
         i = a.__array_interface__
         assert isinstance(i['data'][0], int)
@@ -1242,6 +1316,7 @@
         assert isinstance(i['data'][0], int)
         raises(TypeError, getattr, array(3), '__array_interface__')
 
+
 class AppTestSupport(BaseNumpyAppTest):
     def setup_class(cls):
         import struct
@@ -1254,7 +1329,7 @@
 
     def test_fromstring(self):
         import sys
-        from numpypy import fromstring, array, uint8, float32, int32
+        from _numpypy import fromstring, array, uint8, float32, int32
 
         a = fromstring(self.data)
         for i in range(4):
@@ -1284,17 +1359,17 @@
         assert g[1] == 2
         assert g[2] == 3
         h = fromstring("1, , 2, 3", dtype=uint8, sep=",")
-        assert (h == [1,0,2,3]).all()
+        assert (h == [1, 0, 2, 3]).all()
         i = fromstring("1    2 3", dtype=uint8, sep=" ")
-        assert (i == [1,2,3]).all()
+        assert (i == [1, 2, 3]).all()
         j = fromstring("1\t\t\t\t2\t3", dtype=uint8, sep="\t")
-        assert (j == [1,2,3]).all()
+        assert (j == [1, 2, 3]).all()
         k = fromstring("1,x,2,3", dtype=uint8, sep=",")
-        assert (k == [1,0]).all()
+        assert (k == [1, 0]).all()
         l = fromstring("1,x,2,3", dtype='float32', sep=",")
-        assert (l == [1.0,-1.0]).all()
+        assert (l == [1.0, -1.0]).all()
         m = fromstring("1,,2,3", sep=",")
-        assert (m == [1.0,-1.0,2.0,3.0]).all()
+        assert (m == [1.0, -1.0, 2.0, 3.0]).all()
         n = fromstring("3.4 2.0 3.8 2.2", dtype=int32, sep=" ")
         assert (n == [3]).all()
         o = fromstring("1.0 2f.0f 3.8 2.2", dtype=float32, sep=" ")
@@ -1318,7 +1393,7 @@
             assert (u == [1, 0]).all()
 
     def test_fromstring_types(self):
-        from numpypy import (fromstring, int8, int16, int32, int64, uint8,
+        from _numpypy import (fromstring, int8, int16, int32, int64, uint8,
             uint16, uint32, float32, float64)
 
         a = fromstring('\xFF', dtype=int8)
@@ -1342,9 +1417,8 @@
         j = fromstring(self.ulongval, dtype='L')
         assert j[0] == 12
 
-
     def test_fromstring_invalid(self):
-        from numpypy import fromstring, uint16, uint8, int32
+        from _numpypy import fromstring, uint16, uint8, int32
         #default dtype is 64-bit float, so 3 bytes should fail
         raises(ValueError, fromstring, "\x01\x02\x03")
         #3 bytes is not modulo 2 bytes (int16)
@@ -1355,7 +1429,8 @@
 
 class AppTestRepr(BaseNumpyAppTest):
     def test_repr(self):
-        from numpypy import array, zeros
+        from _numpypy import array, zeros
+        int_size = array(5).dtype.itemsize
         a = array(range(5), float)
         assert repr(a) == "array([0.0, 1.0, 2.0, 3.0, 4.0])"
         a = array([], float)
@@ -1363,14 +1438,26 @@
         a = zeros(1001)
         assert repr(a) == "array([0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0])"
         a = array(range(5), long)
-        assert repr(a) == "array([0, 1, 2, 3, 4])"
+        if a.dtype.itemsize == int_size:
+            assert repr(a) == "array([0, 1, 2, 3, 4])"
+        else:
+            assert repr(a) == "array([0, 1, 2, 3, 4], dtype=int64)"
+        a = array(range(5), 'int32')
+        if a.dtype.itemsize == int_size:
+            assert repr(a) == "array([0, 1, 2, 3, 4])"
+        else:
+            assert repr(a) == "array([0, 1, 2, 3, 4], dtype=int32)"
         a = array([], long)
         assert repr(a) == "array([], dtype=int64)"
         a = array([True, False, True, False], "?")
         assert repr(a) == "array([True, False, True, False], dtype=bool)"
+        a = zeros([])
+        assert repr(a) == "array(0.0)"
+        a = array(0.2)
+        assert repr(a) == "array(0.2)"
 
     def test_repr_multi(self):
-        from numpypy import array, zeros
+        from _numpypy import arange, zeros
         a = zeros((3, 4))
         assert repr(a) == '''array([[0.0, 0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0, 0.0],
@@ -1383,9 +1470,19 @@
        [[0.0, 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, 0.0]]])'''
+        a = arange(1002).reshape((2, 501))
+        assert repr(a) == '''array([[0, 1, 2, ..., 498, 499, 500],
+       [501, 502, 503, ..., 999, 1000, 1001]])'''
+        assert repr(a.T) == '''array([[0, 501],
+       [1, 502],
+       [2, 503],
+       ...,
+       [498, 999],
+       [499, 1000],
+       [500, 1001]])'''
 
     def test_repr_slice(self):
-        from numpypy import array, zeros
+        from _numpypy import array, zeros
         a = array(range(5), float)
         b = a[1::2]
         assert repr(b) == "array([1.0, 3.0])"
@@ -1400,7 +1497,7 @@
         assert repr(b) == "array([], shape=(0, 5), dtype=int16)"
 
     def test_str(self):
-        from numpypy import array, zeros
+        from _numpypy import array, zeros
         a = array(range(5), float)
         assert str(a) == "[0.0 1.0 2.0 3.0 4.0]"
         assert str((2 * a)[:]) == "[0.0 2.0 4.0 6.0 8.0]"
@@ -1426,14 +1523,14 @@
 
         a = zeros((400, 400), dtype=int)
         assert str(a) == "[[0 0 0 ..., 0 0 0]\n [0 0 0 ..., 0 0 0]\n" \
-           " [0 0 0 ..., 0 0 0]\n ..., \n [0 0 0 ..., 0 0 0]\n" \
+           " [0 0 0 ..., 0 0 0]\n ...,\n [0 0 0 ..., 0 0 0]\n" \
            " [0 0 0 ..., 0 0 0]\n [0 0 0 ..., 0 0 0]]"
         a = zeros((2, 2, 2))
         r = str(a)
         assert r == '[[[0.0 0.0]\n  [0.0 0.0]]\n\n [[0.0 0.0]\n  [0.0 0.0]]]'
 
     def test_str_slice(self):
-        from numpypy import array, zeros
+        from _numpypy import array, zeros
         a = array(range(5), float)
         b = a[1::2]
         assert str(b) == "[1.0 3.0]"
@@ -1449,7 +1546,7 @@
 
 class AppTestRanges(BaseNumpyAppTest):
     def test_arange(self):
-        from numpypy import arange, array, dtype
+        from _numpypy import arange, array, dtype
         a = arange(3)
         assert (a == [0, 1, 2]).all()
         assert a.dtype is dtype(int)
@@ -1471,10 +1568,14 @@
 
 class AppTestRanges(BaseNumpyAppTest):
     def test_app_reshape(self):
-        from numpypy import arange, array, dtype, reshape
+        from _numpypy import arange, array, dtype, reshape
         a = arange(12)
         b = reshape(a, (3, 4))
         assert b.shape == (3, 4)
         a = range(12)
         b = reshape(a, (3, 4))
         assert b.shape == (3, 4)
+        a = array(range(105)).reshape(3, 5, 7)
+        assert a.reshape(1, -1).shape == (1, 105)
+        assert a.reshape(1, 1, -1).shape == (1, 1, 105)
+        assert a.reshape(-1, 1, 1).shape == (105, 1, 1)
diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py
--- a/pypy/module/micronumpy/test/test_ufuncs.py
+++ b/pypy/module/micronumpy/test/test_ufuncs.py
@@ -4,14 +4,14 @@
 
 class AppTestUfuncs(BaseNumpyAppTest):
     def test_ufunc_instance(self):
-        from numpypy import add, ufunc
+        from _numpypy import add, ufunc
 
         assert isinstance(add, ufunc)
         assert repr(add) == "<ufunc 'add'>"
         assert repr(ufunc) == "<type 'numpypy.ufunc'>"
 
     def test_ufunc_attrs(self):
-        from numpypy import add, multiply, sin
+        from _numpypy import add, multiply, sin
 
         assert add.identity == 0
         assert multiply.identity == 1
@@ -22,7 +22,7 @@
         assert sin.nin == 1
 
     def test_wrong_arguments(self):
-        from numpypy import add, sin
+        from _numpypy import add, sin
 
         raises(ValueError, add, 1)
         raises(TypeError, add, 1, 2, 3)
@@ -30,14 +30,14 @@
         raises(ValueError, sin)
 
     def test_single_item(self):
-        from numpypy import negative, sign, minimum
+        from _numpypy import negative, sign, minimum
 
         assert negative(5.0) == -5.0
         assert sign(-0.0) == 0.0
         assert minimum(2.0, 3.0) == 2.0
 
     def test_sequence(self):
-        from numpypy import array, ndarray, negative, minimum
+        from _numpypy import array, ndarray, negative, minimum
         a = array(range(3))
         b = [2.0, 1.0, 0.0]
         c = 1.0
@@ -71,7 +71,7 @@
             assert min_c_b[i] == min(b[i], c)
 
     def test_negative(self):
-        from numpypy import array, negative
+        from _numpypy import array, negative
 
         a = array([-5.0, 0.0, 1.0])
         b = negative(a)
@@ -86,7 +86,7 @@
         assert negative(a + a)[3] == -6
 
     def test_abs(self):
-        from numpypy import array, absolute
+        from _numpypy import array, absolute
 
         a = array([-5.0, -0.0, 1.0])
         b = absolute(a)
@@ -94,7 +94,7 @@
             assert b[i] == abs(a[i])
 
     def test_add(self):
-        from numpypy import array, add
+        from _numpypy import array, add
 
         a = array([-5.0, -0.0, 1.0])
         b = array([ 3.0, -2.0,-3.0])
@@ -103,7 +103,7 @@
             assert c[i] == a[i] + b[i]
 
     def test_divide(self):
-        from numpypy import array, divide
+        from _numpypy import array, divide
 
         a = array([-5.0, -0.0, 1.0])
         b = array([ 3.0, -2.0,-3.0])
@@ -114,7 +114,7 @@
         assert (divide(array([-10]), array([2])) == array([-5])).all()
 
     def test_fabs(self):
-        from numpypy import array, fabs
+        from _numpypy import array, fabs
         from math import fabs as math_fabs
 
         a = array([-5.0, -0.0, 1.0])
@@ -123,7 +123,7 @@
             assert b[i] == math_fabs(a[i])
 
     def test_minimum(self):
-        from numpypy import array, minimum
+        from _numpypy import array, minimum
 
         a = array([-5.0, -0.0, 1.0])
         b = array([ 3.0, -2.0,-3.0])
@@ -132,7 +132,7 @@
             assert c[i] == min(a[i], b[i])
 
     def test_maximum(self):
-        from numpypy import array, maximum
+        from _numpypy import array, maximum
 
         a = array([-5.0, -0.0, 1.0])
         b = array([ 3.0, -2.0,-3.0])
@@ -145,7 +145,7 @@
         assert isinstance(x, (int, long))
 
     def test_multiply(self):
-        from numpypy import array, multiply
+        from _numpypy import array, multiply
 
         a = array([-5.0, -0.0, 1.0])
         b = array([ 3.0, -2.0,-3.0])
@@ -154,7 +154,7 @@
             assert c[i] == a[i] * b[i]
 
     def test_sign(self):
-        from numpypy import array, sign, dtype
+        from _numpypy import array, sign, dtype
 
         reference = [-1.0, 0.0, 0.0, 1.0]
         a = array([-5.0, -0.0, 0.0, 6.0])
@@ -173,7 +173,7 @@
         assert a[1] == 0
 
     def test_reciporocal(self):
-        from numpypy import array, reciprocal
+        from _numpypy import array, reciprocal
 
         reference = [-0.2, float("inf"), float("-inf"), 2.0]
         a = array([-5.0, 0.0, -0.0, 0.5])
@@ -182,7 +182,7 @@
             assert b[i] == reference[i]
 
     def test_subtract(self):
-        from numpypy import array, subtract
+        from _numpypy import array, subtract
 
         a = array([-5.0, -0.0, 1.0])
         b = array([ 3.0, -2.0,-3.0])
@@ -191,7 +191,7 @@
             assert c[i] == a[i] - b[i]
 
     def test_floor(self):
-        from numpypy import array, floor
+        from _numpypy import array, floor
 
         reference = [-2.0, -1.0, 0.0, 1.0, 1.0]
         a = array([-1.4, -1.0, 0.0, 1.0, 1.4])
@@ -200,7 +200,7 @@
             assert b[i] == reference[i]
 
     def test_copysign(self):
-        from numpypy import array, copysign
+        from _numpypy import array, copysign
 
         reference = [5.0, -0.0, 0.0, -6.0]
         a = array([-5.0, 0.0, 0.0, 6.0])
@@ -216,7 +216,7 @@
 
     def test_exp(self):
         import math
-        from numpypy import array, exp
+        from _numpypy import array, exp
 
         a = array([-5.0, -0.0, 0.0, 12345678.0, float("inf"),
                    -float('inf'), -12343424.0])
@@ -230,7 +230,7 @@
 
     def test_sin(self):
         import math
-        from numpypy import array, sin
+        from _numpypy import array, sin
 
         a = array([0, 1, 2, 3, math.pi, math.pi*1.5, math.pi*2])
         b = sin(a)
@@ -243,7 +243,7 @@
 
     def test_cos(self):
         import math
-        from numpypy import array, cos
+        from _numpypy import array, cos
 
         a = array([0, 1, 2, 3, math.pi, math.pi*1.5, math.pi*2])
         b = cos(a)
@@ -252,7 +252,7 @@
 
     def test_tan(self):
         import math
-        from numpypy import array, tan
+        from _numpypy import array, tan
 
         a = array([0, 1, 2, 3, math.pi, math.pi*1.5, math.pi*2])
         b = tan(a)
@@ -262,7 +262,7 @@
 
     def test_arcsin(self):
         import math
-        from numpypy import array, arcsin
+        from _numpypy import array, arcsin
 
         a = array([-1, -0.5, -0.33, 0, 0.33, 0.5, 1])
         b = arcsin(a)
@@ -276,7 +276,7 @@
 
     def test_arccos(self):
         import math
-        from numpypy import array, arccos
+        from _numpypy import array, arccos
 
         a = array([-1, -0.5, -0.33, 0, 0.33, 0.5, 1])
         b = arccos(a)
@@ -291,20 +291,20 @@
 
     def test_arctan(self):
         import math
-        from numpypy import array, arctan
+        from _numpypy import array, arctan
 
         a = array([-3, -2, -1, 0, 1, 2, 3, float('inf'), float('-inf')])
         b = arctan(a)
         for i in range(len(a)):
             assert b[i] == math.atan(a[i])
 
-        a  = array([float('nan')])
+        a = array([float('nan')])
         b = arctan(a)
         assert math.isnan(b[0])
 
     def test_arcsinh(self):
         import math
-        from numpypy import arcsinh, inf
+        from _numpypy import arcsinh, inf
 
         for v in [inf, -inf, 1.0, math.e]:
             assert math.asinh(v) == arcsinh(v)
@@ -312,7 +312,7 @@
 
     def test_arctanh(self):
         import math
-        from numpypy import arctanh
+        from _numpypy import arctanh
 
         for v in [.99, .5, 0, -.5, -.99]:
             assert math.atanh(v) == arctanh(v)
@@ -323,7 +323,7 @@
 
     def test_sqrt(self):
         import math
-        from numpypy import sqrt
+        from _numpypy import sqrt
 
         nan, inf = float("nan"), float("inf")
         data = [1, 2, 3, inf]
@@ -333,22 +333,28 @@
         assert math.isnan(sqrt(nan))
 
     def test_reduce_errors(self):
-        from numpypy import sin, add
+        from _numpypy import sin, add
 
         raises(ValueError, sin.reduce, [1, 2, 3])
-        raises(TypeError, add.reduce, 1)
+        raises(ValueError, add.reduce, 1)
 
-    def test_reduce(self):
-        from numpypy import add, maximum
+    def test_reduce_1d(self):
+        from _numpypy import add, maximum
 
         assert add.reduce([1, 2, 3]) == 6
         assert maximum.reduce([1]) == 1
         assert maximum.reduce([1, 2, 3]) == 3
         raises(ValueError, maximum.reduce, [])
 
+    def test_reduceND(self):
+        from numpypy import add, arange
+        a = arange(12).reshape(3, 4)
+        assert (add.reduce(a, 0) == [12, 15, 18, 21]).all()
+        assert (add.reduce(a, 1) == [6.0, 22.0, 38.0]).all()
+
     def test_comparisons(self):
         import operator
-        from numpypy import equal, not_equal, less, less_equal, greater, greater_equal
+        from _numpypy import equal, not_equal, less, less_equal, greater, greater_equal
 
         for ufunc, func in [
             (equal, operator.eq),
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -47,6 +47,8 @@
         def f(i):
             interp = InterpreterState(codes[i])
             interp.run(space)
+            if not len(interp.results):
+                raise Exception("need results")
             w_res = interp.results[-1]
             if isinstance(w_res, BaseArray):
                 concr = w_res.get_concrete_or_scalar()
@@ -115,6 +117,28 @@
                                 "int_add": 1, "int_ge": 1, "guard_false": 1,
                                 "jump": 1, 'arraylen_gc': 1})
 
+    def define_axissum():
+        return """
+        a = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
+        b = sum(a,0)
+        b -> 1
+        """
+
+    def test_axissum(self):
+        result = self.run("axissum")
+        assert result == 30
+        # XXX note - the bridge here is fairly crucial and yet it's pretty
+        #            bogus. We need to improve the situation somehow.
+        self.check_simple_loop({'getinteriorfield_raw': 2,
+                                'setinteriorfield_raw': 1,
+                                'arraylen_gc': 1,
+                                'guard_true': 1,
+                                'int_lt': 1,
+                                'jump': 1,
+                                'float_add': 1,
+                                'int_add': 3,
+                                })
+
     def define_prod():
         return """
         a = |30|
@@ -193,9 +217,9 @@
         # This is the sum of the ops for both loops, however if you remove the
         # optimization then you end up with 2 float_adds, so we can still be
         # sure it was optimized correctly.
-        self.check_resops({'setinteriorfield_raw': 4, 'getfield_gc': 26,
+        self.check_resops({'setinteriorfield_raw': 4, 'getfield_gc': 22,
                            'getarrayitem_gc': 4, 'getarrayitem_gc_pure': 2,
-                           'getfield_gc_pure': 4,
+                           'getfield_gc_pure': 8,
                            'guard_class': 8, 'int_add': 8, 'float_mul': 2,
                            'jump': 2, 'int_ge': 4,
                            'getinteriorfield_raw': 4, 'float_add': 2,
@@ -212,7 +236,8 @@
     def test_ufunc(self):
         result = self.run("ufunc")
         assert result == -6
-        self.check_simple_loop({"getinteriorfield_raw": 2, "float_add": 1, "float_neg": 1,
+        self.check_simple_loop({"getinteriorfield_raw": 2, "float_add": 1,
+                                "float_neg": 1,
                                 "setinteriorfield_raw": 1, "int_add": 2,
                                 "int_ge": 1, "guard_false": 1, "jump": 1,
                                 'arraylen_gc': 1})
@@ -322,10 +347,9 @@
         result = self.run("setslice")
         assert result == 11.0
         self.check_trace_count(1)
-        self.check_simple_loop({'getinteriorfield_raw': 2, 'float_add' : 1,
-                                'setinteriorfield_raw': 1, 'int_add': 3,
-                                'int_lt': 1, 'guard_true': 1, 'jump': 1,
-                                'arraylen_gc': 3})
+        self.check_simple_loop({'getinteriorfield_raw': 2, 'float_add': 1,
+                                'setinteriorfield_raw': 1, 'int_add': 2,
+                                'int_eq': 1, 'guard_false': 1, 'jump': 1})
 
     def define_virtual_slice():
         return """
@@ -339,11 +363,12 @@
         result = self.run("virtual_slice")
         assert result == 4
         self.check_trace_count(1)
-        self.check_simple_loop({'getinteriorfield_raw': 2, 'float_add' : 1,
+        self.check_simple_loop({'getinteriorfield_raw': 2, 'float_add': 1,
                                 'setinteriorfield_raw': 1, 'int_add': 2,
                                 'int_ge': 1, 'guard_false': 1, 'jump': 1,
                                 'arraylen_gc': 1})
 
+
 class TestNumpyOld(LLJitMixin):
     def setup_class(cls):
         py.test.skip("old")
@@ -377,4 +402,3 @@
 
         result = self.meta_interp(f, [5], listops=True, backendopt=True)
         assert result == f(5)
-
diff --git a/pypy/module/pypyjit/__init__.py b/pypy/module/pypyjit/__init__.py
--- a/pypy/module/pypyjit/__init__.py
+++ b/pypy/module/pypyjit/__init__.py
@@ -7,16 +7,21 @@
     interpleveldefs = {
         'set_param':    'interp_jit.set_param',
         'residual_call': 'interp_jit.residual_call',
-        'set_compile_hook': 'interp_jit.set_compile_hook',
-        'DebugMergePoint': 'interp_resop.W_DebugMergePoint',
+        'set_compile_hook': 'interp_resop.set_compile_hook',
+        'set_optimize_hook': 'interp_resop.set_optimize_hook',
+        'set_abort_hook': 'interp_resop.set_abort_hook',
+        'ResOperation': 'interp_resop.WrappedOp',
+        'Box': 'interp_resop.WrappedBox',
     }
 
     def setup_after_space_initialization(self):
         # force the __extend__ hacks to occur early
         from pypy.module.pypyjit.interp_jit import pypyjitdriver
+        from pypy.module.pypyjit.policy import pypy_hooks
         # add the 'defaults' attribute
         from pypy.rlib.jit import PARAMETERS
         space = self.space
         pypyjitdriver.space = space
         w_obj = space.wrap(PARAMETERS)
         space.setattr(space.wrap(self), space.wrap('defaults'), w_obj)
+        pypy_hooks.space = space
diff --git a/pypy/module/pypyjit/interp_jit.py b/pypy/module/pypyjit/interp_jit.py
--- a/pypy/module/pypyjit/interp_jit.py
+++ b/pypy/module/pypyjit/interp_jit.py
@@ -13,11 +13,7 @@
 from pypy.interpreter.pycode import PyCode, CO_GENERATOR
 from pypy.interpreter.pyframe import PyFrame
 from pypy.interpreter.pyopcode import ExitFrame
-from pypy.interpreter.gateway import unwrap_spec
 from opcode import opmap
-from pypy.rlib.nonconst import NonConstant
-from pypy.jit.metainterp.resoperation import rop
-from pypy.module.pypyjit.interp_resop import debug_merge_point_from_boxes
 
 PyFrame._virtualizable2_ = ['last_instr', 'pycode',
                             'valuestackdepth', 'locals_stack_w[*]',
@@ -51,72 +47,19 @@
 def should_unroll_one_iteration(next_instr, is_being_profiled, bytecode):
     return (bytecode.co_flags & CO_GENERATOR) != 0
 
-def wrap_oplist(space, logops, operations):
-    list_w = []
-    for op in operations:
-        if op.getopnum() == rop.DEBUG_MERGE_POINT:
-            list_w.append(space.wrap(debug_merge_point_from_boxes(
-                op.getarglist())))
-        else:
-            list_w.append(space.wrap(logops.repr_of_resop(op)))
-    return list_w
-
 class PyPyJitDriver(JitDriver):
     reds = ['frame', 'ec']
     greens = ['next_instr', 'is_being_profiled', 'pycode']
     virtualizables = ['frame']
 
-    def on_compile(self, logger, looptoken, operations, type, next_instr,
-                   is_being_profiled, ll_pycode):
-        from pypy.rpython.annlowlevel import cast_base_ptr_to_instance
-
-        space = self.space
-        cache = space.fromcache(Cache)
-        if cache.in_recursion:
-            return
-        if space.is_true(cache.w_compile_hook):
-            logops = logger._make_log_operations()
-            list_w = wrap_oplist(space, logops, operations)
-            pycode = cast_base_ptr_to_instance(PyCode, ll_pycode)
-            cache.in_recursion = True
-            try:
-                space.call_function(cache.w_compile_hook,
-                                    space.wrap('main'),
-                                    space.wrap(type),
-                                    space.newtuple([pycode,
-                                    space.wrap(next_instr),
-                                    space.wrap(is_being_profiled)]),
-                                    space.newlist(list_w))
-            except OperationError, e:
-                e.write_unraisable(space, "jit hook ", cache.w_compile_hook)
-            cache.in_recursion = False
-
-    def on_compile_bridge(self, logger, orig_looptoken, operations, n):
-        space = self.space
-        cache = space.fromcache(Cache)
-        if cache.in_recursion:
-            return
-        if space.is_true(cache.w_compile_hook):
-            logops = logger._make_log_operations()
-            list_w = wrap_oplist(space, logops, operations)
-            cache.in_recursion = True
-            try:
-                space.call_function(cache.w_compile_hook,
-                                    space.wrap('main'),
-                                    space.wrap('bridge'),
-                                    space.wrap(n),
-                                    space.newlist(list_w))
-            except OperationError, e:
-                e.write_unraisable(space, "jit hook ", cache.w_compile_hook)
-            cache.in_recursion = False
-
 pypyjitdriver = PyPyJitDriver(get_printable_location = get_printable_location,
                               get_jitcell_at = get_jitcell_at,
                               set_jitcell_at = set_jitcell_at,
                               confirm_enter_jit = confirm_enter_jit,
                               can_never_inline = can_never_inline,
                               should_unroll_one_iteration =
-                              should_unroll_one_iteration)
+                              should_unroll_one_iteration,
+                              name='pypyjit')
 
 class __extend__(PyFrame):
 
@@ -223,34 +166,3 @@
     '''For testing.  Invokes callable(...), but without letting
     the JIT follow the call.'''
     return space.call_args(w_callable, __args__)
-
-class Cache(object):
-    in_recursion = False
-
-    def __init__(self, space):
-        self.w_compile_hook = space.w_None
-
-def set_compile_hook(space, w_hook):
-    """ set_compile_hook(hook)
-
-    Set a compiling hook that will be called each time a loop is compiled.
-    The hook will be called with the following signature:
-    hook(merge_point_type, loop_type, greenkey or guard_number, operations)
-
-    for now merge point type is always `main`
-
-    loop_type can be either `loop` `entry_bridge` or `bridge`
-    in case loop is not `bridge`, greenkey will be a set of constants
-    for jit merge point. in case it's `main` it'll be a tuple
-    (code, offset, is_being_profiled)
-
-    Note that jit hook is not reentrant. It means that if the code
-    inside the jit hook is itself jitted, it will get compiled, but the
-    jit hook won't be called for that.
-
-    XXX write down what else
-    """
-    cache = space.fromcache(Cache)
-    cache.w_compile_hook = w_hook
-    cache.in_recursion = NonConstant(False)
-    return space.w_None
diff --git a/pypy/module/pypyjit/interp_resop.py b/pypy/module/pypyjit/interp_resop.py
--- a/pypy/module/pypyjit/interp_resop.py
+++ b/pypy/module/pypyjit/interp_resop.py
@@ -1,41 +1,197 @@
 
-from pypy.interpreter.typedef import TypeDef, interp_attrproperty
+from pypy.interpreter.typedef import TypeDef, GetSetProperty
 from pypy.interpreter.baseobjspace import Wrappable
-from pypy.interpreter.gateway import unwrap_spec, interp2app
+from pypy.interpreter.gateway import unwrap_spec, interp2app, NoneNotWrapped
 from pypy.interpreter.pycode import PyCode
-from pypy.rpython.lltypesystem import lltype
-from pypy.rpython.annlowlevel import cast_base_ptr_to_instance
+from pypy.interpreter.error import OperationError
+from pypy.rpython.lltypesystem import lltype, llmemory
+from pypy.rpython.annlowlevel import cast_base_ptr_to_instance, hlstr
 from pypy.rpython.lltypesystem.rclass import OBJECT
+from pypy.jit.metainterp.resoperation import rop, AbstractResOp
+from pypy.rlib.nonconst import NonConstant
+from pypy.rlib import jit_hooks
 
-class W_DebugMergePoint(Wrappable):
-    """ A class representing debug_merge_point JIT operation
+class Cache(object):
+    in_recursion = False
+
+    def __init__(self, space):
+        self.w_compile_hook = space.w_None
+        self.w_abort_hook = space.w_None
+        self.w_optimize_hook = space.w_None
+
+def wrap_greenkey(space, jitdriver, greenkey, greenkey_repr):
+    if greenkey is None:
+        return space.w_None
+    jitdriver_name = jitdriver.name
+    if jitdriver_name == 'pypyjit':
+        next_instr = greenkey[0].getint()
+        is_being_profiled = greenkey[1].getint()
+        ll_code = lltype.cast_opaque_ptr(lltype.Ptr(OBJECT),
+                                         greenkey[2].getref_base())
+        pycode = cast_base_ptr_to_instance(PyCode, ll_code)
+        return space.newtuple([space.wrap(pycode), space.wrap(next_instr),
+                               space.newbool(bool(is_being_profiled))])
+    else:
+        return space.wrap(greenkey_repr)
+
+def set_compile_hook(space, w_hook):
+    """ set_compile_hook(hook)
+
+    Set a compiling hook that will be called each time a loop is compiled.
+    The hook will be called with the following signature:
+    hook(jitdriver_name, loop_type, greenkey or guard_number, operations,
+         assembler_addr, assembler_length)
+
+    jitdriver_name is the name of this particular jitdriver, 'pypyjit' is
+    the main interpreter loop
+
+    loop_type can be either `loop` `entry_bridge` or `bridge`
+    in case loop is not `bridge`, greenkey will be a tuple of constants
+    or a string describing it.
+
+    for the interpreter loop` it'll be a tuple
+    (code, offset, is_being_profiled)
+
+    assembler_addr is an integer describing where assembler starts,
+    can be accessed via ctypes, assembler_lenght is the lenght of compiled
+    asm
+
+    Note that jit hook is not reentrant. It means that if the code
+    inside the jit hook is itself jitted, it will get compiled, but the
+    jit hook won't be called for that.
     """
+    cache = space.fromcache(Cache)
+    cache.w_compile_hook = w_hook
+    cache.in_recursion = NonConstant(False)
 
-    def __init__(self, mp_no, offset, pycode):
-        self.mp_no = mp_no
+def set_optimize_hook(space, w_hook):
+    """ set_optimize_hook(hook)
+
+    Set a compiling hook that will be called each time a loop is optimized,
+    but before assembler compilation. This allows to add additional
+    optimizations on Python level.
+    
+    The hook will be called with the following signature:
+    hook(jitdriver_name, loop_type, greenkey or guard_number, operations)
+
+    jitdriver_name is the name of this particular jitdriver, 'pypyjit' is
+    the main interpreter loop
+
+    loop_type can be either `loop` `entry_bridge` or `bridge`
+    in case loop is not `bridge`, greenkey will be a tuple of constants
+    or a string describing it.
+
+    for the interpreter loop` it'll be a tuple
+    (code, offset, is_being_profiled)
+
+    Note that jit hook is not reentrant. It means that if the code
+    inside the jit hook is itself jitted, it will get compiled, but the
+    jit hook won't be called for that.
+
+    Result value will be the resulting list of operations, or None
+    """
+    cache = space.fromcache(Cache)
+    cache.w_optimize_hook = w_hook
+    cache.in_recursion = NonConstant(False)
+
+def set_abort_hook(space, w_hook):
+    """ set_abort_hook(hook)
+
+    Set a hook (callable) that will be called each time there is tracing
+    aborted due to some reason.
+
+    The hook will be called as in: hook(jitdriver_name, greenkey, reason)
+
+    Where reason is the reason for abort, see documentation for set_compile_hook
+    for descriptions of other arguments.
+    """
+    cache = space.fromcache(Cache)
+    cache.w_abort_hook = w_hook
+    cache.in_recursion = NonConstant(False)
+
+def wrap_oplist(space, logops, operations, ops_offset=None):
+    l_w = []
+    for op in operations:
+        if ops_offset is None:
+            ofs = -1
+        else:
+            ofs = ops_offset.get(op, 0)
+        l_w.append(WrappedOp(jit_hooks._cast_to_gcref(op), ofs,
+                             logops.repr_of_resop(op)))
+    return l_w
+
+class WrappedBox(Wrappable):
+    """ A class representing a single box
+    """
+    def __init__(self, llbox):
+        self.llbox = llbox
+
+    def descr_getint(self, space):
+        return space.wrap(jit_hooks.box_getint(self.llbox))
+
+ at unwrap_spec(no=int)
+def descr_new_box(space, w_tp, no):
+    return WrappedBox(jit_hooks.boxint_new(no))
+
+WrappedBox.typedef = TypeDef(
+    'Box',
+    __new__ = interp2app(descr_new_box),
+    getint = interp2app(WrappedBox.descr_getint),
+)
+
+ at unwrap_spec(num=int, offset=int, repr=str, res=WrappedBox)
+def descr_new_resop(space, w_tp, num, w_args, res, offset=-1,
+                    repr=''):
+    args = [space.interp_w(WrappedBox, w_arg).llbox for w_arg in
+            space.listview(w_args)]
+    if res is None:
+        llres = jit_hooks.emptyval()
+    else:
+        llres = res.llbox
+    return WrappedOp(jit_hooks.resop_new(num, args, llres), offset, repr)
+
+class WrappedOp(Wrappable):
+    """ A class representing a single ResOperation, wrapped nicely
+    """
+    def __init__(self, op, offset, repr_of_resop):
+        self.op = op
         self.offset = offset
-        self.pycode = pycode
+        self.repr_of_resop = repr_of_resop
 
     def descr_repr(self, space):
-        return space.wrap('DebugMergePoint()')
+        return space.wrap(self.repr_of_resop)
 
- at unwrap_spec(mp_no=int, offset=int, pycode=PyCode)
-def new_debug_merge_point(space, w_tp, mp_no, offset, pycode):
-    return W_DebugMergePoint(mp_no, offset, pycode)
+    def descr_num(self, space):
+        return space.wrap(jit_hooks.resop_getopnum(self.op))
 
-def debug_merge_point_from_boxes(boxes):
-    mp_no = boxes[0].getint()
-    offset = boxes[2].getint()
-    llcode = lltype.cast_opaque_ptr(lltype.Ptr(OBJECT),
-                                    boxes[4].getref_base())
-    pycode = cast_base_ptr_to_instance(PyCode, llcode)
-    assert pycode is not None
-    return W_DebugMergePoint(mp_no, offset, pycode)
+    def descr_name(self, space):
+        return space.wrap(hlstr(jit_hooks.resop_getopname(self.op)))
 
-W_DebugMergePoint.typedef = TypeDef(
-    'DebugMergePoint',
-    __new__ = interp2app(new_debug_merge_point),
-    __doc__ = W_DebugMergePoint.__doc__,
-    __repr__ = interp2app(W_DebugMergePoint.descr_repr),
-    code = interp_attrproperty('pycode', W_DebugMergePoint),
+    @unwrap_spec(no=int)
+    def descr_getarg(self, space, no):
+        return WrappedBox(jit_hooks.resop_getarg(self.op, no))
+
+    @unwrap_spec(no=int, box=WrappedBox)
+    def descr_setarg(self, space, no, box):
+        jit_hooks.resop_setarg(self.op, no, box.llbox)
+
+    def descr_getresult(self, space):
+        return WrappedBox(jit_hooks.resop_getresult(self.op))
+
+    def descr_setresult(self, space, w_box):
+        box = space.interp_w(WrappedBox, w_box)
+        jit_hooks.resop_setresult(self.op, box.llbox)
+
+WrappedOp.typedef = TypeDef(
+    'ResOperation',
+    __doc__ = WrappedOp.__doc__,
+    __new__ = interp2app(descr_new_resop),
+    __repr__ = interp2app(WrappedOp.descr_repr),
+    num = GetSetProperty(WrappedOp.descr_num),
+    name = GetSetProperty(WrappedOp.descr_name),
+    getarg = interp2app(WrappedOp.descr_getarg),
+    setarg = interp2app(WrappedOp.descr_setarg),
+    result = GetSetProperty(WrappedOp.descr_getresult,
+                            WrappedOp.descr_setresult)
 )
+WrappedOp.acceptable_as_base_class = False
diff --git a/pypy/module/pypyjit/policy.py b/pypy/module/pypyjit/policy.py
--- a/pypy/module/pypyjit/policy.py
+++ b/pypy/module/pypyjit/policy.py
@@ -1,4 +1,112 @@
 from pypy.jit.codewriter.policy import JitPolicy
+from pypy.rlib.jit import JitHookInterface
+from pypy.rlib import jit_hooks
+from pypy.interpreter.error import OperationError
+from pypy.jit.metainterp.jitprof import counter_names
+from pypy.module.pypyjit.interp_resop import wrap_oplist, Cache, wrap_greenkey,\
+     WrappedOp
+
+class PyPyJitIface(JitHookInterface):
+    def on_abort(self, reason, jitdriver, greenkey, greenkey_repr):
+        space = self.space
+        cache = space.fromcache(Cache)
+        if cache.in_recursion:
+            return
+        if space.is_true(cache.w_abort_hook):
+            cache.in_recursion = True
+            try:
+                try:
+                    space.call_function(cache.w_abort_hook,
+                                        space.wrap(jitdriver.name),
+                                        wrap_greenkey(space, jitdriver,
+                                                      greenkey, greenkey_repr),
+                                        space.wrap(counter_names[reason]))
+                except OperationError, e:
+                    e.write_unraisable(space, "jit hook ", cache.w_abort_hook)
+            finally:
+                cache.in_recursion = False
+
+    def after_compile(self, debug_info):
+        w_greenkey = wrap_greenkey(self.space, debug_info.get_jitdriver(),
+                                   debug_info.greenkey,
+                                   debug_info.get_greenkey_repr())
+        self._compile_hook(debug_info, w_greenkey)
+
+    def after_compile_bridge(self, debug_info):
+        self._compile_hook(debug_info,
+                           self.space.wrap(debug_info.fail_descr_no))
+
+    def before_compile(self, debug_info):
+        w_greenkey = wrap_greenkey(self.space, debug_info.get_jitdriver(),
+                                   debug_info.greenkey,
+                                   debug_info.get_greenkey_repr())
+        self._optimize_hook(debug_info, w_greenkey)
+
+    def before_compile_bridge(self, debug_info):
+        self._optimize_hook(debug_info,
+                            self.space.wrap(debug_info.fail_descr_no))
+
+    def _compile_hook(self, debug_info, w_arg):
+        space = self.space
+        cache = space.fromcache(Cache)
+        if cache.in_recursion:
+            return
+        if space.is_true(cache.w_compile_hook):
+            logops = debug_info.logger._make_log_operations()
+            list_w = wrap_oplist(space, logops, debug_info.operations,
+                                 debug_info.asminfo.ops_offset)
+            cache.in_recursion = True
+            try:
+                try:
+                    jd_name = debug_info.get_jitdriver().name
+                    asminfo = debug_info.asminfo
+                    space.call_function(cache.w_compile_hook,
+                                        space.wrap(jd_name),
+                                        space.wrap(debug_info.type),
+                                        w_arg,
+                                        space.newlist(list_w),
+                                        space.wrap(asminfo.asmaddr),
+                                        space.wrap(asminfo.asmlen))
+                except OperationError, e:
+                    e.write_unraisable(space, "jit hook ", cache.w_compile_hook)
+            finally:
+                cache.in_recursion = False
+
+    def _optimize_hook(self, debug_info, w_arg):
+        space = self.space
+        cache = space.fromcache(Cache)
+        if cache.in_recursion:
+            return
+        if space.is_true(cache.w_optimize_hook):
+            logops = debug_info.logger._make_log_operations()
+            list_w = wrap_oplist(space, logops, debug_info.operations)
+            cache.in_recursion = True
+            try:
+                try:
+                    jd_name = debug_info.get_jitdriver().name
+                    w_res = space.call_function(cache.w_optimize_hook,
+                                                space.wrap(jd_name),
+                                                space.wrap(debug_info.type),
+                                                w_arg,
+                                                space.newlist(list_w))
+                    if space.is_w(w_res, space.w_None):
+                        return
+                    l = []
+                    for w_item in space.listview(w_res):
+                        item = space.interp_w(WrappedOp, w_item)
+                        l.append(jit_hooks._cast_to_resop(item.op))
+                    del debug_info.operations[:] # modifying operations above is
+                    # probably not a great idea since types may not work
+                    # and we'll end up with half-working list and
+                    # a segfault/fatal RPython error
+                    for elem in l:
+                        debug_info.operations.append(elem)
+                except OperationError, e:
+                    e.write_unraisable(space, "jit hook ", cache.w_compile_hook)
+            finally:
+                cache.in_recursion = False
+
+pypy_hooks = PyPyJitIface()
 
 class PyPyJitPolicy(JitPolicy):
 
@@ -12,12 +120,16 @@
                 modname == 'thread.os_thread'):
             return True
         if '.' in modname:
-            modname, _ = modname.split('.', 1)
+            modname, rest = modname.split('.', 1)
+        else:
+            rest = ''
         if modname in ['pypyjit', 'signal', 'micronumpy', 'math', 'exceptions',
                        'imp', 'sys', 'array', '_ffi', 'itertools', 'operator',
                        'posix', '_socket', '_sre', '_lsprof', '_weakref',
                        '__pypy__', 'cStringIO', '_collections', 'struct',
                        'mmap', 'marshal']:
+            if modname == 'pypyjit' and 'interp_resop' in rest:
+                return False
             return True
         return False
 
diff --git a/pypy/module/pypyjit/test/test_jit_hook.py b/pypy/module/pypyjit/test/test_jit_hook.py
--- a/pypy/module/pypyjit/test/test_jit_hook.py
+++ b/pypy/module/pypyjit/test/test_jit_hook.py
@@ -1,22 +1,40 @@
 
 import py
 from pypy.conftest import gettestobjspace, option
+from pypy.interpreter.gateway import interp2app
 from pypy.interpreter.pycode import PyCode
-from pypy.interpreter.gateway import interp2app
-from pypy.jit.metainterp.history import JitCellToken
-from pypy.jit.metainterp.resoperation import ResOperation, rop
+from pypy.jit.metainterp.history import JitCellToken, ConstInt, ConstPtr
+from pypy.jit.metainterp.resoperation import rop
 from pypy.jit.metainterp.logger import Logger
 from pypy.rpython.annlowlevel import (cast_instance_to_base_ptr,
                                       cast_base_ptr_to_instance)
 from pypy.rpython.lltypesystem import lltype, llmemory
+from pypy.rpython.lltypesystem.rclass import OBJECT
 from pypy.module.pypyjit.interp_jit import pypyjitdriver
+from pypy.module.pypyjit.policy import pypy_hooks
 from pypy.jit.tool.oparser import parse
 from pypy.jit.metainterp.typesystem import llhelper
+from pypy.jit.metainterp.jitprof import ABORT_TOO_LONG
+from pypy.rlib.jit import JitDebugInfo, AsmInfo
+
+class MockJitDriverSD(object):
+    class warmstate(object):
+        @staticmethod
+        def get_location_str(boxes):
+            ll_code = lltype.cast_opaque_ptr(lltype.Ptr(OBJECT),
+                                             boxes[2].getref_base())
+            pycode = cast_base_ptr_to_instance(PyCode, ll_code)
+            return pycode.co_name
+
+    jitdriver = pypyjitdriver
+
 
 class MockSD(object):
     class cpu(object):
         ts = llhelper
 
+    jitdrivers_sd = [MockJitDriverSD]
+
 class AppTestJitHook(object):
     def setup_class(cls):
         if option.runappdirect:
@@ -24,9 +42,9 @@
         space = gettestobjspace(usemodules=('pypyjit',))
         cls.space = space
         w_f = space.appexec([], """():
-        def f():
+        def function():
             pass
-        return f
+        return function
         """)
         cls.w_f = w_f
         ll_code = cast_instance_to_base_ptr(w_f.code)
@@ -34,41 +52,73 @@
         logger = Logger(MockSD())
 
         oplist = parse("""
-        [i1, i2]
+        [i1, i2, p2]
         i3 = int_add(i1, i2)
         debug_merge_point(0, 0, 0, 0, ConstPtr(ptr0))
+        guard_nonnull(p2) []
         guard_true(i3) []
         """, namespace={'ptr0': code_gcref}).operations
+        greenkey = [ConstInt(0), ConstInt(0), ConstPtr(code_gcref)]
+        offset = {}
+        for i, op in enumerate(oplist):
+            if i != 1:
+               offset[op] = i
+
+        di_loop = JitDebugInfo(MockJitDriverSD, logger, JitCellToken(),
+                               oplist, 'loop', greenkey)
+        di_loop_optimize = JitDebugInfo(MockJitDriverSD, logger, JitCellToken(),
+                                        oplist, 'loop', greenkey)
+        di_loop.asminfo = AsmInfo(offset, 0, 0)
+        di_bridge = JitDebugInfo(MockJitDriverSD, logger, JitCellToken(),
+                                 oplist, 'bridge', fail_descr_no=0)
+        di_bridge.asminfo = AsmInfo(offset, 0, 0)
 
         def interp_on_compile():
-            pypyjitdriver.on_compile(logger, JitCellToken(), oplist, 'loop',
-                                     0, False, ll_code)
+            di_loop.oplist = cls.oplist
+            pypy_hooks.after_compile(di_loop)
 
         def interp_on_compile_bridge():
-            pypyjitdriver.on_compile_bridge(logger, JitCellToken(), oplist, 0)
+            pypy_hooks.after_compile_bridge(di_bridge)
+
+        def interp_on_optimize():
+            di_loop_optimize.oplist = cls.oplist
+            pypy_hooks.before_compile(di_loop_optimize)
+
+        def interp_on_abort():
+            pypy_hooks.on_abort(ABORT_TOO_LONG, pypyjitdriver, greenkey,
+                                'blah')
         
         cls.w_on_compile = space.wrap(interp2app(interp_on_compile))
         cls.w_on_compile_bridge = space.wrap(interp2app(interp_on_compile_bridge))
+        cls.w_on_abort = space.wrap(interp2app(interp_on_abort))
+        cls.w_int_add_num = space.wrap(rop.INT_ADD)
+        cls.w_on_optimize = space.wrap(interp2app(interp_on_optimize))
+        cls.orig_oplist = oplist
+
+    def setup_method(self, meth):
+        self.__class__.oplist = self.orig_oplist[:]
 
     def test_on_compile(self):
         import pypyjit
         all = []
 
-        def hook(*args):
-            assert args[0] == 'main'
-            assert args[1] in ['loop', 'bridge']
-            all.append(args[2:])
+        def hook(name, looptype, tuple_or_guard_no, ops, asmstart, asmlen):
+            all.append((name, looptype, tuple_or_guard_no, ops))


More information about the pypy-commit mailing list