[Numpy-svn] r3149 - in trunk/numpy: lib lib/tests oldnumeric
numpy-svn at scipy.org
numpy-svn at scipy.org
Wed Sep 13 21:49:26 EDT 2006
Author: oliphant
Date: 2006-09-13 20:49:10 -0500 (Wed, 13 Sep 2006)
New Revision: 3149
Modified:
trunk/numpy/lib/function_base.py
trunk/numpy/lib/tests/test_function_base.py
trunk/numpy/lib/tests/test_twodim_base.py
trunk/numpy/lib/twodim_base.py
trunk/numpy/oldnumeric/functions.py
Log:
Add histogramnd and fix histogram2d
Modified: trunk/numpy/lib/function_base.py
===================================================================
--- trunk/numpy/lib/function_base.py 2006-09-14 01:20:52 UTC (rev 3148)
+++ trunk/numpy/lib/function_base.py 2006-09-14 01:49:10 UTC (rev 3149)
@@ -4,23 +4,23 @@
'diff', 'gradient', 'angle', 'unwrap', 'sort_complex', 'disp',
'unique', 'extract', 'place', 'nansum', 'nanmax', 'nanargmax',
'nanargmin', 'nanmin', 'vectorize', 'asarray_chkfinite', 'average',
- 'histogram', 'bincount', 'digitize', 'cov', 'corrcoef', 'msort',
- 'median', 'sinc', 'hamming', 'hanning', 'bartlett', 'blackman',
- 'kaiser', 'trapz', 'i0', 'add_newdoc', 'add_docstring', 'meshgrid',
- 'delete', 'insert', 'append'
+ 'histogram', 'histogramnd', 'bincount', 'digitize', 'cov',
+ 'corrcoef', 'msort', 'median', 'sinc', 'hamming', 'hanning',
+ 'bartlett', 'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc',
+ 'add_docstring', 'meshgrid', 'delete', 'insert', 'append'
]
import types
import numpy.core.numeric as _nx
from numpy.core.numeric import ones, zeros, arange, concatenate, array, \
- asarray, asanyarray, empty, empty_like, asanyarray, ndarray
+ asarray, asanyarray, empty, empty_like, asanyarray, ndarray, around
from numpy.core.numeric import ScalarType, dot, where, newaxis, intp, \
- integer
+ integer, isscalar
from numpy.core.umath import pi, multiply, add, arctan2, \
- frompyfunc, isnan, cos, less_equal, sqrt, sin, mod, exp
+ frompyfunc, isnan, cos, less_equal, sqrt, sin, mod, exp, log10
from numpy.core.fromnumeric import ravel, nonzero, choose, sort
from numpy.core.numerictypes import typecodes
-from numpy.lib.shape_base import atleast_1d
+from numpy.lib.shape_base import atleast_1d, atleast_2d
from numpy.lib.twodim_base import diag
from _compiled_base import _insert, add_docstring
from _compiled_base import digitize, bincount
@@ -75,8 +75,7 @@
----------
bins: Number of bins
range: Lower and upper bin edges (default: [sample.min(), sample.max()]).
- Does not really work, all values greater than range are stored in
- the last bin.
+ All values greater than range are stored in the last bin.
normed: If False (default), return the number of samples in each bin.
If True, return a frequency distribution.
@@ -104,6 +103,130 @@
else:
return n, bins
+def histogramnd(sample, bins=10, range=None, normed=False):
+ """histogramnd(sample, bins = 10, range = None, normed = False) -> H, edges
+
+ Return the N-dimensional histogram computed from sample.
+
+ Parameters
+ ----------
+ sample: A sequence of N arrays, or an KxN array.
+ bins: A sequence of edge arrays, or a sequence of the number of bins.
+ If a scalar is given, it is assumed to be the number of bins
+ for all dimensions.
+ range: A sequence of lower and upper bin edges (default: [min, max]).
+ normed: If False, returns the number of samples in each bin.
+ If True, returns the frequency distribution.
+
+
+ Output
+ ------
+ H: Histogram array.
+ edges: List of arrays defining the bin edges.
+
+ Example:
+ x = random.randn(100,3)
+ H, edges = histogramnd(x, bins = (5, 6, 7))
+
+ See also: histogram
+ """
+
+ try:
+ N, D = sample.shape
+ except (AttributeError, ValueError):
+ ss = atleast_2d(sample)
+ sample = ss.transpose()
+ N, D = sample.shape
+
+ nbin = empty(D, int)
+ edges = D*[None]
+ dedges = D*[None]
+
+ try:
+ M = len(bins)
+ if M != D:
+ raise AttributeError, 'The dimension of bins must be a equal to the dimension of the sample x.'
+ except TypeError:
+ bins = D*[bins]
+
+ if range is None:
+ smin = atleast_1d(sample.min(0))
+ smax = atleast_1d(sample.max(0))
+ else:
+ smin = zeros(D)
+ smax = zeros(D)
+ for i in arange(D):
+ smin[i], smax[i] = range[i]
+
+ for i in arange(D):
+ if isscalar(bins[i]):
+ nbin[i] = bins[i]
+ edges[i] = linspace(smin[i], smax[i], nbin[i]+1)
+ else:
+ edges[i] = asarray(bins[i], float)
+ nbin[i] = len(edges[i])-1
+
+
+
+ Ncount = {}
+ nbin = asarray(nbin)
+
+ for i in arange(D):
+ Ncount[i] = digitize(sample[:,i], edges[i])
+ dedges[i] = diff(edges[i])
+ # Remove values falling outside of bins
+ # Values that fall on an edge are put in the right bin.
+ # For the rightmost bin, we want values equal to the right
+ # edge to be counted in the last bin, and not as an outlier.
+ outliers = zeros(N, int)
+ for i in arange(D):
+ decimal = int(-log10(dedges[i].min())) +6
+ on_edge = where(around(sample[:,i], decimal) == around(edges[i][-1], decimal))[0]
+ Ncount[i][on_edge] -= 1
+ outliers += (Ncount[i] == 0) | (Ncount[i] == nbin[i]+1)
+ indices = where(outliers == 0)[0]
+ for i in arange(D):
+ Ncount[i] = Ncount[i][indices] - 1
+ N = len(indices)
+
+ # Flattened histogram matrix (1D)
+ hist = zeros(nbin.prod(), int)
+
+ # Compute the sample indices in the flattened histogram matrix.
+ ni = nbin.argsort()
+ shape = []
+ xy = zeros(N, int)
+ for i in arange(0, D-1):
+ xy += Ncount[ni[i]] * nbin[ni[i+1:]].prod()
+
+ xy += Ncount[ni[-1]]
+
+ # Compute the number of repetitions in xy and assign it to the flattened histmat.
+ if len(xy) == 0:
+ return zeros(nbin, int)
+
+ flatcount = bincount(xy)
+ a = arange(len(flatcount))
+ hist[a] = flatcount
+
+ # Shape into a proper matrix
+ hist = hist.reshape(sort(nbin))
+ for i,j in enumerate(ni):
+ hist = hist.swapaxes(i,j)
+ if (hist.shape == nbin).all():
+ break
+
+ if normed:
+ s = hist.sum()
+ for i in arange(D):
+ shape = ones(D, int)
+ shape[i] = nbin[i]
+ hist = hist / dedges[i].reshape(shape)
+ hist /= s
+
+ return hist, edges
+
+
def average(a, axis=None, weights=None, returned=False):
"""average(a, axis=None weights=None, returned=False)
Modified: trunk/numpy/lib/tests/test_function_base.py
===================================================================
--- trunk/numpy/lib/tests/test_function_base.py 2006-09-14 01:20:52 UTC (rev 3148)
+++ trunk/numpy/lib/tests/test_function_base.py 2006-09-14 01:49:10 UTC (rev 3149)
@@ -353,6 +353,31 @@
(a,b)=histogram(linspace(0,10,100))
assert(all(a==10))
+class test_histogramnd(NumpyTestCase):
+ def check_simple(self):
+ x = array([[-.5, .5, 1.5], [-.5, 1.5, 2.5], [-.5, 2.5, .5], \
+ [.5, .5, 1.5], [.5, 1.5, 2.5], [.5, 2.5, 2.5]])
+ H, edges = histogramnd(x, (2,3,3), range = [[-1,1], [0,3], [0,3]])
+ answer = asarray([[[0,1,0], [0,0,1], [1,0,0]], [[0,1,0], [0,0,1], [0,0,1]]])
+ assert(all(H == answer))
+ # Check normalization
+ ed = [[-2,0,2], [0,1,2,3], [0,1,2,3]]
+ H, edges = histogramnd(x, bins = ed, normed = True)
+ assert(all(H == answer/12.))
+ # Check that H has the correct shape.
+ H, edges = histogramnd(x, (2,3,4), range = [[-1,1], [0,3], [0,4]], normed=True)
+ answer = asarray([[[0,1,0,0], [0,0,1,0], [1,0,0,0]], [[0,1,0,0], [0,0,1,0], [0,0,1,0]]])
+ assert_array_almost_equal(H, answer/6., 4)
+ # Check that a sequence of arrays is accepted and H has the correct shape.
+ z = [squeeze(y) for y in split(x,3,axis=1)]
+ H, edges = histogramnd(z, bins=(4,3,2),range=[[-2,2], [0,3], [0,2]])
+ answer = asarray([[[0,0],[0,0],[0,0]],
+ [[0,1], [0,0], [1,0]],
+ [[0,1], [0,0],[0,0]],
+ [[0,0],[0,0],[0,0]]])
+ assert_array_equal(H, answer)
+
+
class test_unique(NumpyTestCase):
def check_simple(self):
x = array([4,3,2,1,1,2,3,4, 0])
Modified: trunk/numpy/lib/tests/test_twodim_base.py
===================================================================
--- trunk/numpy/lib/tests/test_twodim_base.py 2006-09-14 01:20:52 UTC (rev 3148)
+++ trunk/numpy/lib/tests/test_twodim_base.py 2006-09-14 01:49:10 UTC (rev 3149)
@@ -5,7 +5,8 @@
from numpy.testing import *
set_package_path()
from numpy import arange, rot90, add, fliplr, flipud, zeros, ones, eye, \
- array, diag
+ array, diag, histogram2d
+import numpy as np
restore_path()
##################################################
@@ -133,13 +134,12 @@
class test_histogram2d(NumpyTestCase):
def check_simple(self):
- import numpy as np
x = array([ 0.41702200, 0.72032449, 0.00011437481, 0.302332573, 0.146755891])
y = array([ 0.09233859, 0.18626021, 0.34556073, 0.39676747, 0.53881673])
xedges = np.linspace(0,1,10)
yedges = np.linspace(0,1,10)
- H = np.histogram2d(x, y, (xedges, yedges))[0]
- answer = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0],
+ H = histogram2d(x, y, (xedges, yedges))[0]
+ answer = array([[0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 1, 0, 0, 0, 0, 0, 0],
@@ -148,7 +148,26 @@
[0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0]])
- assert_equal(H, answer)
-
+ assert_array_equal(H.T, answer)
+ def check_asym(self):
+ x = array([1, 1, 2, 3, 4, 4, 4, 5])
+ y = array([1, 3, 2, 0, 1, 2, 3, 4])
+ H, xed, yed = histogram2d(x,y, (6, 5), range = [[0,6],[0,5]], normed=True)
+ answer = array([[0.,0,0,0,0],
+ [0,1,0,1,0],
+ [0,0,1,0,0],
+ [1,0,0,0,0],
+ [0,1,1,1,0],
+ [0,0,0,0,1]])
+ assert_array_almost_equal(H, answer/8., 3)
+ def check_norm(self):
+ x = array([1,2,3,1,2,3,1,2,3])
+ y = array([1,1,1,2,2,2,3,3,3])
+ H, xed, yed = histogram2d(x,y,[[1,2,3,5], [1,2,3,5]], normed=True)
+ answer=array([[1,1,.5],
+ [1,1,.5],
+ [.5,.5,.25]])/9.
+ assert_array_almost_equal(H, answer, 3)
+
if __name__ == "__main__":
NumpyTest().run()
Modified: trunk/numpy/lib/twodim_base.py
===================================================================
--- trunk/numpy/lib/twodim_base.py 2006-09-14 01:20:52 UTC (rev 3148)
+++ trunk/numpy/lib/twodim_base.py 2006-09-14 01:49:10 UTC (rev 3149)
@@ -2,8 +2,8 @@
"""
-__all__ = ['diag','diagflat','eye','fliplr','flipud','rot90','tri','triu','tril',
- 'vander','histogram2d']
+__all__ = ['diag','diagflat','eye','fliplr','flipud','rot90','tri','triu',
+ 'tril','vander','histogram2d']
from numpy.core.numeric import asanyarray, int_, equal, subtract, arange, \
zeros, arange, greater_equal, multiply, ones, asarray
@@ -143,14 +143,21 @@
X[:,i] = x**(N-i-1)
return X
-def histogram2d(x,y, bins, normed = False):
- """Compute the 2D histogram for a dataset (x,y) given the edges or
- the number of bins.
+def histogram2d(x,y, bins=10, range=None, normed=False):
+ """histogram2d(x,y, bins=10, range=None, normed=False) -> H, xedges, yedges
+
+ Compute the 2D histogram from samples x,y.
- Returns histogram, xedges, yedges.
- The histogram array is a count of the number of samples in each bin.
- The array is oriented such that H[i,j] is the number of samples falling
- into binx[j] and biny[i].
+ Parameters
+ ----------
+ x,y: 1D data series. Both arrays must have the same length.
+ bins: Number of bins -or- [nbin x, nbin y] -or-
+ [bin edges] -or- [x bin edges, y bin edges].
+ range: A sequence of lower and upper bin edges (default: [min, max]).
+ normed: True or False.
+
+ The histogram array is a count of the number of samples in each
+ two dimensional bin.
Setting normed to True returns a density rather than a bin count.
Data falling outside of the edges are not counted.
"""
@@ -160,33 +167,40 @@
except TypeError:
N = 1
bins = [bins]
+ x = asarray(x)
+ y = asarray(y)
+ if range is None:
+ xmin, xmax = x.min(), x.max()
+ ymin, ymax = y.min(), y.max()
+ else:
+ xmin, xmax = range[0]
+ ymin, ymax = range[1]
if N == 2:
if np.isscalar(bins[0]):
xnbin = bins[0]
- xedges = np.linspace(x.min(), x.max(), xnbin+1)
-
+ xedges = np.linspace(xmin, xmax, xnbin+1)
else:
xedges = asarray(bins[0], float)
xnbin = len(xedges)-1
-
if np.isscalar(bins[1]):
ynbin = bins[1]
- yedges = np.linspace(y.min(), y.max(), ynbin+1)
+ yedges = np.linspace(ymin, ymax, ynbin+1)
else:
yedges = asarray(bins[1], float)
ynbin = len(yedges)-1
elif N == 1:
ynbin = xnbin = bins[0]
- xedges = np.linspace(x.min(), x.max(), xnbin+1)
- yedges = np.linspace(y.max(), y.min(), ynbin+1)
- xedges[-1] *= 1.0001
- yedges[-1] *= 1.0001
+ xedges = np.linspace(xmin, xmax, xnbin+1)
+ yedges = np.linspace(ymin, ymax, ynbin+1)
else:
yedges = asarray(bins, float)
xedges = yedges.copy()
ynbin = len(yedges)-1
xnbin = len(xedges)-1
+ dxedges = np.diff(xedges)
+ dyedges = np.diff(yedges)
+
# Flattened histogram matrix (1D)
hist = np.zeros((xnbin)*(ynbin), int)
@@ -194,30 +208,41 @@
xbin = np.digitize(x,xedges)
ybin = np.digitize(y,yedges)
- # Remove the outliers
+ # Values that fall on an edge are put in the right bin.
+ # For the rightmost bin, we want values equal to the right
+ # edge to be counted in the last bin, and not as an outlier.
+ xdecimal = int(-np.log10(dxedges.min()))+6
+ ydecimal = int(-np.log10(dyedges.min()))+6
+ on_edge_x = np.where(np.around(x,xdecimal) == np.around(xedges[-1], xdecimal))[0]
+ on_edge_y = np.where(np.around(y,ydecimal) == np.around(yedges[-1], ydecimal))[0]
+ xbin[on_edge_x] -= 1
+ ybin[on_edge_y] -= 1
+ # Remove the true outliers
outliers = (xbin==0) | (xbin==xnbin+1) | (ybin==0) | (ybin == ynbin+1)
-
- xbin = xbin[outliers==False]
- ybin = ybin[outliers==False]
+ xbin = xbin[outliers==False] - 1
+ ybin = ybin[outliers==False] - 1
# Compute the sample indices in the flattened histogram matrix.
if xnbin >= ynbin:
xy = ybin*(xnbin) + xbin
- shift = xnbin + 1
+
else:
xy = xbin*(ynbin) + ybin
- shift = ynbin + 1
+
# Compute the number of repetitions in xy and assign it to the flattened
# histogram matrix.
+
flatcount = np.bincount(xy)
- indices = np.arange(len(flatcount)-shift)
- hist[indices] = flatcount[shift:]
+ indices = np.arange(len(flatcount))
+ hist[indices] = flatcount
+ shape = np.sort([xnbin, ynbin])
# Shape into a proper matrix
- histmat = hist.reshape(xnbin, ynbin)
-
+ histmat = hist.reshape(shape)
+ if (shape == (ynbin, xnbin)).all():
+ histmat = histmat.T
if normed:
- diff2 = np.outer(np.diff(yedges), np.diff(xedges))
+ diff2 = np.outer(dxedges, dyedges)
histmat = histmat / diff2 / histmat.sum()
return histmat, xedges, yedges
Modified: trunk/numpy/oldnumeric/functions.py
===================================================================
--- trunk/numpy/oldnumeric/functions.py 2006-09-14 01:20:52 UTC (rev 3148)
+++ trunk/numpy/oldnumeric/functions.py 2006-09-14 01:49:10 UTC (rev 3149)
@@ -119,3 +119,4 @@
def average(a, axis=0, weights=None, returned=False):
return N.average(a, axis, weights, returned)
+
More information about the Numpy-svn
mailing list