Hello, I am having trouble with performance when trying to create a cross tabulation using numpy. Ideally, I would calculate each cell in the cross tabulation separately because this gives me the greatest amount of flexibility. I have included some sample code as a reference and am really looking for better approaches to the simpleLoop method. So far the histogram2d and histogramdd methods seem to outperform any code I write by a factor of about 100, at least. I chalk this up to I just don't understand enough about numpy, yet. Any help would be appreciated. Here is the test code: import numpy as np import time import random # Create a simple loop and count up the number of matching cases # Basic cross tabulation or histogram of the data # This approach is prefered because of the need to customize the calculation potentially for each cell. def simpleLoop(c): #number of items per inner loop a_cnt = len(np.unique(c[:,0])) b_cnt = len(np.unique(c[:,1])) idx = 0 result = np.zeros(b_cnt * a_cnt) for i in np.unique(c[:,0]): for j in np.unique(c[:,1]): result[idx] = np.sum(1*(c[:,0] == i) & (c[:,1] == j)) idx += 1 result.resize(len(result)/b_cnt,b_cnt) return result # Use numpys histogram method to calculate the matrix of combinations and the number of cases in each one. def simpleHistogram(c): #number of items per inner loop return np.histogramdd((c[:,0],c[:,1]), bins=[np.unique(c[:, 0]),range(1,11)]) # Variation1 of simple histogram def simpleHistogram1(c): #number of items per inner loop results = [] for i in np.unique(c[:,1]): results.append(np.histogramdd((c[:,0][c[:,1]==i]), bins=[np.unique(c[:,0])]) or 0) return np.column_stack([result[0] for result in results]) if __name__ == '__main__': a = np.random.randint(1,900,200000) b = np.random.randint(1,10,200000) c = np.column_stack((a,b)) print '---- Simple Loop ----' start = time.time() results = simpleLoop(c) print results[0] print time.time() - start print '---- Histogram dd no looping ----' start = time.time() results = simpleHistogram(c) print results[0][0] print time.time() - start print '---- Histogram run 1 time for each item in column 1 (10 times) ----' start = time.time() results = simpleHistogram1(c) print results[0] print time.time() - start