[MATRIX-SIG] Calculating correlation Matrix
Janne Sinkkonen
janne@avocado.pc.helsinki.fi
16 Jul 1997 11:43:25 +0300
hyoon@nyptsrv1.etsd.ml.com (Hoon Yoon - IPT Quant) writes:
> Also, if anyone know of good stat func for NumPy, I would really
> appreciate the source.
> Thanks much in advance,
Here are some elementary functions including moments (and correlation
matrices) and something to help you with bootstrapping.
# This is from Konrad Hinsen.
import Numeric
def histogram(data, nbins, range = None):
data = Numeric.array(data)
if range is None:
min = Numeric.minimum.reduce(data)
max = Numeric.maximum.reduce(data)
else:
min, max = range
data = Numeric.repeat(data,
Numeric.logical_and(Numeric.less_equal(data, max),
Numeric.greater_equal(data,
min)))
bin_width = (max-min)/nbins
data = Numeric.array(Numeric.floor((data - min)/bin_width), Numeric.Int)
histo = Numeric.add.reduce(Numeric.equal(
Numeric.arange(nbins)[:,Numeric.NewAxis], data), -1)
bins = min + bin_width*(Numeric.arange(nbins)+0.5)
return Numeric.transpose(Numeric.array([bins, histo]))
# I use these as well
from Numeric import *
def mean(X):
N=X.shape[0]
return sum(X)/N
def variance(X):
N=X.shape[0]
return sum(X**2)/N-(sum(X)/N)**2
def sample_variance(X):
N=X.shape[0]
return (sum(X**2)-(sum(X)**2)/N)/(N-1)
def std(X):
return sqrt(variance(X))
def sample_std(X):
return sqrt(sample_variance(X))
def covariance(X):
N=X.shape[0]
Xt=transpose(X)
mX=sum(X)/N
# Note that dot() equals to sum(multiply.outer()).
# The latter is more memory-hungry.
return dot(Xt,X)/N-multiply.outer(mX,mX)
def covariance2(X,Y):
N,M=X.shape[0],Y.shape[0]
Xt=transpose(X)
mX,mY=sum(X)/N,sum(Y)/N
return dot(Xt,Y)/N-multiply.outer(mX,mY)
def correlation(X):
C=covariance(X)
V=diagonal(C)
return C/sqrt(multiply.outer(V,V))
def correlation2(X,Y):
C,VX,VY=covariance2(X,Y),variance(X),variance(Y)
return C/sqrt(multiply.outer(VX,VY))
def normalize(a):
m=mean(a)
s=std(a)
return (a-m)/s, m, s
def normalize_from(a,b):
m=mean(b)
s=std(b)
return (a-m)/s
# Euclidean distances
def squared_distances(X,Y):
return add.outer(sum(X*X,-1),sum(Y*Y,-1))- 2*dot(X,transpose(Y))
# Some functions to make bootstrapping easier.
from Numeric import *
from RandomArray import *
import string
import simpleStat
def bsIndices(n):
"Returns bootstrap indices for a sample of size N."
r=randint(0,n,n)
# The modulus is needless for future versions of Numpy
# and even now for (all?) non-Inter platforms.
return r%n
def bsVector(a):
"Returns the bootstrap vector generated from the bootstrap INDICES."
# This looks ugly but it is (relatively) fast.
b=concatenate((searchsorted(sort(a),arange(len(a))),(len(a),)))
return (b[1:]-b[:-1]).astype(Float)/len(a)
def bsStd(sample,estimator,n):
"Based on SAMPLE, estimates standard deviation of ESTIMATOR
using N bootstrap repetitions."
estimates=[]
for k in xrange(n):
ii=bsIndices(len(sample))
estimates.append(estimator(take(sample,ii)))
r=array(estimates)
return (simpleStat.mean(r),
simpleStat.sample_std(r))
def bsDistribution_w(sample,w_estimator,n):
"Based on SAMPLE, returns samples from the distribution of W_ESTIMATOR
using N bootstrap repetitions. W_ESTIMATOR takes the sample
and weights as its arguments."
estimates=[]
l=len(sample)
even=ones(l,Float)/l
for k in xrange(n):
ii=bsIndices(l)
estimates.append(w_estimator(take(sample,ii),even))
r=array(estimates)
return r
def bsStdBias(sample,w_estimator,n):
estimates=[]
l=len(sample)
B=zeros(l,Float)
even=ones(l,Float)/l
for k in xrange(n):
ii=bsIndices(l)
B=B+bsVector(ii)
estimates.append(w_estimator(take(sample,ii),even))
r=sort(array(estimates))
return (simpleStat.mean(r),
simpleStat.sample_std(r),
simpleStat.mean(r)-w_estimator(sample,B/n),
r[int(len(r)/2)],
r[int(3*len(r)/4)]-r[int(1*len(r)/4)])
--
Janne Sinkkonen <janne@iki.fi> <URL: http://www.iki.fi/~janne/ >
_______________
MATRIX-SIG - SIG on Matrix Math for Python
send messages to: matrix-sig@python.org
administrivia to: matrix-sig-request@python.org
_______________