[Numpy-discussion] [Numpy-svn] r8413 - trunk/numpy/lib - Author: oliphant - Add percentile function.

Thu Jul 8 15:27:30 EDT 2010

isn't this related to
http://projects.scipy.org/numpy/ticket/626
percentile() and clamp()

which was set to invalid

-Sebastian


On Sun, May 16, 2010 at 12:11 AM,  <numpy-svn at scipy.org> wrote:
> Author: oliphant
> Date: 2010-05-15 17:11:10 -0500 (Sat, 15 May 2010)
> New Revision: 8413
>
> Modified:
>   trunk/numpy/lib/function_base.py
> Log:
> Add percentile function.
>
> Modified: trunk/numpy/lib/function_base.py
> ===================================================================
> --- trunk/numpy/lib/function_base.py    2010-05-13 12:47:23 UTC (rev 8412)
> +++ trunk/numpy/lib/function_base.py    2010-05-15 22:11:10 UTC (rev 8413)
> @@ -1,6 +1,6 @@
>  __docformat__ = "restructuredtext en"
>  __all__ = ['select', 'piecewise', 'trim_zeros',
> -           'copy', 'iterable',
> +           'copy', 'iterable', 'percentile',
>            'diff', 'gradient', 'angle', 'unwrap', 'sort_complex', 'disp',
>            'extract', 'place', 'nansum', 'nanmax', 'nanargmax',
>            'nanargmin', 'nanmin', 'vectorize', 'asarray_chkfinite', 'average',
> @@ -2804,7 +2804,7 @@
>
>     See Also
>     --------
> -    mean
> +    mean, percentile
>
>     Notes
>     -----
> @@ -2863,6 +2863,132 @@
>     # and check, use out array.
>     return mean(sorted[indexer], axis=axis, out=out)
>
> +def percentile(a, q, axis=None, out=None, overwrite_input=False):
> +    """
> +    Compute the qth percentile of the data along the specified axis.
> +
> +    Returns the qth percentile of the array elements.
> +
> +    Parameters
> +    ----------
> +    a : array_like
> +        Input array or object that can be converted to an array.
> +    q : float in range of [0,100] (or sequence of floats)
> +        percentile to compute which must be between 0 and 100 inclusive
> +    axis : {None, int}, optional
> +        Axis along which the percentiles are computed. The default (axis=None)
> +        is to compute the median along a flattened version of the array.
> +    out : ndarray, optional
> +        Alternative output array in which to place the result. It must
> +        have the same shape and buffer length as the expected output,
> +        but the type (of the output) will be cast if necessary.
> +    overwrite_input : {False, True}, optional
> +       If True, then allow use of memory of input array (a) for
> +       calculations. The input array will be modified by the call to
> +       median. This will save memory when you do not need to preserve
> +       the contents of the input array. Treat the input as undefined,
> +       but it will probably be fully or partially sorted. Default is
> +       False. Note that, if `overwrite_input` is True and the input
> +       is not already an ndarray, an error will be raised.
> +
> +    Returns
> +    -------
> +    pcntile : ndarray
> +        A new array holding the result (unless `out` is specified, in
> +        which case that array is returned instead).  If the input contains
> +        integers, or floats of smaller precision than 64, then the output
> +        data-type is float64.  Otherwise, the output data-type is the same
> +        as that of the input.
> +
> +    See Also
> +    --------
> +    mean, median
> +
> +    Notes
> +    -----
> +    Given a vector V of length N, the qth percentile of V is the qth ranked
> +    value in a sorted copy of V.  A weighted average of the two nearest neighbors
> +    is used if the normalized ranking does not match q exactly.
> +    The same as the median if q is 0.5; the same as the min if q is 0;
> +    and the same as the max if q is 1
> +
> +    Examples
> +    --------
> +    >>> a = np.array([[10, 7, 4], [3, 2, 1]])
> +    >>> a
> +    array([[10,  7,  4],
> +           [ 3,  2,  1]])
> +    >>> np.percentile(a, 0.5)
> +    3.5
> +    >>> np.percentile(a, 0.5, axis=0)
> +    array([ 6.5,  4.5,  2.5])
> +    >>> np.percentile(a, 0.5, axis=1)
> +    array([ 7.,  2.])
> +    >>> m = np.percentile(a, 0.5, axis=0)
> +    >>> out = np.zeros_like(m)
> +    >>> np.percentile(a, 0.5, axis=0, out=m)
> +    array([ 6.5,  4.5,  2.5])
> +    >>> m
> +    array([ 6.5,  4.5,  2.5])
> +    >>> b = a.copy()
> +    >>> np.percentile(b, 0.5, axis=1, overwrite_input=True)
> +    array([ 7.,  2.])
> +    >>> assert not np.all(a==b)
> +    >>> b = a.copy()
> +    >>> np.percentile(b, 0.5, axis=None, overwrite_input=True)
> +    3.5
> +    >>> assert not np.all(a==b)
> +
> +    """
> +    if q == 0:
> +        return a.min(axis=axis, out=out)
> +    elif q == 100:
> +        return a.max(axis=axis, out=out)
> +
> +    if overwrite_input:
> +        if axis is None:
> +            sorted = a.ravel()
> +            sorted.sort()
> +        else:
> +            a.sort(axis=axis)
> +            sorted = a
> +    else:
> +        sorted = sort(a, axis=axis)
> +    if axis is None:
> +        axis = 0
> +
> +    return _compute_qth_percentile(sorted, q, axis, out)
> +
> +# handle sequence of q's without calling sort multiple times
> +def _compute_qth_percentile(sorted, q, axis, out):
> +    if not isscalar(q):
> +        return [_compute_qth_percentile(sorted, qi, axis, out)
> +                    for qi in q]
> +    q = q / 100.0
> +    if (q < 0) or (q > 1):
> +        raise ValueError, "percentile must be either in the range [0,100]"
> +
> +    indexer = [slice(None)] * sorted.ndim
> +    Nx = sorted.shape[axis]
> +    index = q*(Nx-1)
> +    i = int(index)
> +    if i == index:
> +        indexer[axis] = slice(i, i+1)
> +        weights = array(1)
> +        sumval = 1.0
> +    else:
> +        indexer[axis] = slice(i, i+2)
> +        j = i + 1
> +        weights = array([(j - index), (index - i)],float)
> +        wshape = [1]*sorted.ndim
> +        wshape[axis] = 2
> +        weights.shape = wshape
> +        sumval = weights.sum()
> +
> +    # Use add.reduce in both cases to coerce data type as well as
> +    #   check and use out array.
> +    return add.reduce(sorted[indexer]*weights, axis=axis, out=out)/sumval
> +
>  def trapz(y, x=None, dx=1.0, axis=-1):
>     """
>     Integrate along the given axis using the composite trapezoidal rule.
>
> _______________________________________________
> Numpy-svn mailing list
> Numpy-svn at scipy.org
> http://mail.scipy.org/mailman/listinfo/numpy-svn
>