[Scipy-svn] r2453 - in trunk/Lib/sandbox/models: . family tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Thu Dec 21 19:02:54 EST 2006
Author: jarrod.millman
Date: 2006-12-21 18:02:48 -0600 (Thu, 21 Dec 2006)
New Revision: 2453
Modified:
trunk/Lib/sandbox/models/bspline.py
trunk/Lib/sandbox/models/family/family.py
trunk/Lib/sandbox/models/formula.py
trunk/Lib/sandbox/models/glm.py
trunk/Lib/sandbox/models/info.py
trunk/Lib/sandbox/models/model.py
trunk/Lib/sandbox/models/regression.py
trunk/Lib/sandbox/models/smoothers.py
trunk/Lib/sandbox/models/tests/test_formula.py
Log:
expanded tabs to spaces
Modified: trunk/Lib/sandbox/models/bspline.py
===================================================================
--- trunk/Lib/sandbox/models/bspline.py 2006-12-21 21:42:48 UTC (rev 2452)
+++ trunk/Lib/sandbox/models/bspline.py 2006-12-22 00:02:48 UTC (rev 2453)
@@ -50,11 +50,11 @@
"""
if lower:
- t = _zero_triband(a * b, lower=1)
- return t[0].sum() + 2 * t[1:].sum()
+ t = _zero_triband(a * b, lower=1)
+ return t[0].sum() + 2 * t[1:].sum()
else:
- t = _zero_triband(a * b, lower=0)
- return t[-1].sum() + 2 * t[:-1].sum()
+ t = _zero_triband(a * b, lower=0)
+ return t[-1].sum() + 2 * t[:-1].sum()
def _zero_triband(a, lower=0):
@@ -64,9 +64,9 @@
nrow, ncol = a.shape
if lower:
- for i in range(nrow): a[i,(ncol-i):] = 0.
+ for i in range(nrow): a[i,(ncol-i):] = 0.
else:
- for i in range(nrow): a[i,0:i] = 0.
+ for i in range(nrow): a[i,0:i] = 0.
return a
def _zerofunc(x):
@@ -137,17 +137,18 @@
upper = min(upper, self.tau.shape[0] - self.m)
lower = max(0, lower)
- d = N.asarray(d)
- if d.shape == ():
- v = _bspline.evaluate(x, self.tau, self.m, int(d), lower, upper)
- else:
- if d.shape[0] != 2:
- raise ValueError, "if d is not an integer, expecting a jx2 array with first row indicating order \
- of derivative, second row coefficient in front."
+ d = N.asarray(d)
+ if d.shape == ():
+ v = _bspline.evaluate(x, self.tau, self.m, int(d), lower, upper)
+ else:
+ if d.shape[0] != 2:
+ raise ValueError, "if d is not an integer, expecting a jx2 \
+ array with first row indicating order \
+ of derivative, second row coefficient in front."
- v = 0
- for i in range(d.shape[1]):
- v += d[1,i] * _bspline.evaluate(x, self.tau, self.m, d[0,i], lower, upper)
+ v = 0
+ for i in range(d.shape[1]):
+ v += d[1,i] * _bspline.evaluate(x, self.tau, self.m, d[0,i], lower, upper)
v.shape = (upper-lower,) + _shape
if upper == self.tau.shape[0] - self.m:
@@ -159,23 +160,24 @@
Compute Gram inner product matrix.
"""
- d = N.squeeze(d)
- if N.asarray(d).shape == ():
- self.g = _bspline.gram(self.tau, self.m, int(d), int(d))
- else:
- d = N.asarray(d)
- if d.shape[0] != 2:
- raise ValueError, "if d is not an integer, expecting a jx2 array with first row indicating order \
- of derivative, second row coefficient in front."
- if d.shape == (2,):
- d.shape = (2,1)
- self.g = 0
- for i in range(d.shape[1]):
+ d = N.squeeze(d)
+ if N.asarray(d).shape == ():
+ self.g = _bspline.gram(self.tau, self.m, int(d), int(d))
+ else:
+ d = N.asarray(d)
+ if d.shape[0] != 2:
+ raise ValueError, "if d is not an integer, expecting a jx2 \
+ array with first row indicating order \
+ of derivative, second row coefficient in front."
+ if d.shape == (2,):
+ d.shape = (2,1)
+ self.g = 0
+ for i in range(d.shape[1]):
for j in range(d.shape[1]):
- self.g += d[1,i]* d[1,j] * _bspline.gram(self.tau, self.m, int(d[0,i]), int(d[0,j]))
- self.g = self.g.T
- self.d = d
- return N.nan_to_num(self.g)
+ self.g += d[1,i]* d[1,j] * _bspline.gram(self.tau, self.m, int(d[0,i]), int(d[0,j]))
+ self.g = self.g.T
+ self.d = d
+ return N.nan_to_num(self.g)
class SmoothingSpline(BSpline):
@@ -185,10 +187,10 @@
default_pen = 1.0e-03
def smooth(self, y, x=None, weights=None):
- if self.method == "target_df":
- self.fit_target_df(y, x=x, weights=weights, df=self.target_df)
- elif self.method == "optimize_gcv":
- self.fit_optimize_gcv(y, x=x, weights=weights)
+ if self.method == "target_df":
+ self.fit_target_df(y, x=x, weights=weights, df=self.target_df)
+ elif self.method == "optimize_gcv":
+ self.fit_optimize_gcv(y, x=x, weights=weights)
def fit(self, y, x=None, weights=None, pen=0.):
banded = True
@@ -200,20 +202,21 @@
banded = False
if x.shape != y.shape:
- raise ValueError, 'x and y shape do not agree, by default x are the Bspline\'s internal knots'
+ raise ValueError, 'x and y shape do not agree, by default x are \
+ the Bspline\'s internal knots'
- bt = self.basis(x)
- if pen >= self.penmax:
- pen = self.penmax
+ bt = self.basis(x)
+ if pen >= self.penmax:
+ pen = self.penmax
if weights is not None:
- self.weights = weights
- else:
- self.weights = 1.
+ self.weights = weights
+ else:
+ self.weights = 1.
- _w = N.sqrt(self.weights)
- bt *= _w
+ _w = N.sqrt(self.weights)
+ bt *= _w
# throw out rows with zeros (this happens at boundary points!)
@@ -222,109 +225,110 @@
bt = bt[:,mask]
y = y[mask]
- self.df_total = y.shape[0]
+ self.df_total = y.shape[0]
bty = N.dot(bt, _w * y)
- self.N = y.shape[0]
+ self.N = y.shape[0]
if not banded:
self.btb = N.dot(bt, bt.T)
- _g = band2array(self.g, lower=1, symmetric=True)
+ _g = band2array(self.g, lower=1, symmetric=True)
self.coef, _, self.rank = L.lstsq(self.btb + pen*_g, bty)[0:3]
- self.rank = min(self.rank, self.btb.shape[0])
+ self.rank = min(self.rank, self.btb.shape[0])
else:
self.btb = N.zeros(self.g.shape, N.float64)
nband, nbasis = self.g.shape
for i in range(nbasis):
for k in range(min(nband, nbasis-i)):
- self.btb[k,i] = (bt[i] * bt[i+k]).sum()
+ self.btb[k,i] = (bt[i] * bt[i+k]).sum()
- bty.shape = (1,bty.shape[0])
+ bty.shape = (1,bty.shape[0])
self.chol, self.coef = solveh_banded(self.btb +
pen*self.g,
bty, lower=1)
- self.coef = N.squeeze(self.coef)
- self.resid = y * self.weights - N.dot(self.coef, bt)
- self.pen = pen
+ self.coef = N.squeeze(self.coef)
+ self.resid = y * self.weights - N.dot(self.coef, bt)
+ self.pen = pen
def gcv(self):
- """
- Generalized cross-validation score of current fit.
- """
+ """
+ Generalized cross-validation score of current fit.
+ """
- norm_resid = (self.resid**2).sum()
- return norm_resid / (self.df_total - self.trace())
+ norm_resid = (self.resid**2).sum()
+ return norm_resid / (self.df_total - self.trace())
def df_resid(self):
- """
- self.N - self.trace()
+ """
+ self.N - self.trace()
- where self.N is the number of observations of last fit.
- """
+ where self.N is the number of observations of last fit.
+ """
- return self.N - self.trace()
+ return self.N - self.trace()
def df_fit(self):
- """
- = self.trace()
+ """
+ = self.trace()
- How many degrees of freedom used in the fit?
- """
- return self.trace()
+ How many degrees of freedom used in the fit?
+ """
+ return self.trace()
def trace(self):
- """
- Trace of the smoothing matrix S(pen)
- """
+ """
+ Trace of the smoothing matrix S(pen)
+ """
- if self.pen > 0:
- _invband = _bspline.invband(self.chol.copy())
- tr = _trace_symbanded(_invband, self.btb, lower=1)
- return tr
- else:
- return self.rank
+ if self.pen > 0:
+ _invband = _bspline.invband(self.chol.copy())
+ tr = _trace_symbanded(_invband, self.btb, lower=1)
+ return tr
+ else:
+ return self.rank
def fit_target_df(self, y, x=None, df=None, weights=None, tol=1.0e-03):
- """
- Fit smoothing spline with approximately df degrees of freedom
+ """
+ Fit smoothing spline with approximately df degrees of freedom
used in the fit, i.e. so that self.trace() is approximately df.
- In general, df must be greater than the dimension of the null space
- of the Gram inner product. For cubic smoothing splines, this means
- that df > 2.
+ In general, df must be greater than the dimension of the null space
+ of the Gram inner product. For cubic smoothing splines, this means
+ that df > 2.
"""
- df = df or self.target_df
+ df = df or self.target_df
- apen, bpen = 0, 1.0e-03
- olddf = y.shape[0] - self.m
+ apen, bpen = 0, 1.0e-03
+ olddf = y.shape[0] - self.m
- if hasattr(self, "pen"):
- self.fit(y, x=x, weights=weights, pen=self.pen)
- curdf = self.trace()
- if N.fabs(curdf - df) / df < tol:
- return
- if curdf > df:
- apen, bpen = self.pen, 2 * self.pen
- else:
- apen, bpen = 0., self.pen
+ if hasattr(self, "pen"):
+ self.fit(y, x=x, weights=weights, pen=self.pen)
+ curdf = self.trace()
+ if N.fabs(curdf - df) / df < tol:
+ return
+ if curdf > df:
+ apen, bpen = self.pen, 2 * self.pen
+ else:
+ apen, bpen = 0., self.pen
- while True:
- curpen = 0.5 * (apen + bpen)
- self.fit(y, x=x, weights=weights, pen=curpen)
- curdf = self.trace()
- if curdf > df:
- apen, bpen = curpen, 2 * curpen
- else:
- apen, bpen = apen, curpen
- if apen >= self.penmax:
- raise ValueError, "penalty too large, try setting penmax higher or decreasing df"
- if N.fabs(curdf - df) / df < tol:
- break
+ while True:
+ curpen = 0.5 * (apen + bpen)
+ self.fit(y, x=x, weights=weights, pen=curpen)
+ curdf = self.trace()
+ if curdf > df:
+ apen, bpen = curpen, 2 * curpen
+ else:
+ apen, bpen = apen, curpen
+ if apen >= self.penmax:
+ raise ValueError, "penalty too large, try setting penmax \
+ higher or decreasing df"
+ if N.fabs(curdf - df) / df < tol:
+ break
def fit_optimize_gcv(self, y, x=None, weights=None, tol=1.0e-03,
bracket=(0,1.0e-03)):
- """
+ """
Fit smoothing spline trying to optimize GCV.
Try to find a bracketing interval for scipy.optimize.golden
@@ -334,15 +338,15 @@
sometimes difficult to find a bracketing interval.
"""
-
- def _gcv(pen, y, x):
- self.fit(y, x=x, pen=N.exp(pen))
- a = self.gcv()
- return a
- a = golden(_gcv, args=(y,x), brack=(-100,20), tol=tol)
+ def _gcv(pen, y, x):
+ self.fit(y, x=x, pen=N.exp(pen))
+ a = self.gcv()
+ return a
+ a = golden(_gcv, args=(y,x), brack=(-100,20), tol=tol)
+
def band2array(a, lower=0, symmetric=False, hermitian=False):
"""
Take an upper or lower triangular banded matrix and return a matrix using
@@ -365,7 +369,7 @@
_a += _b
if symmetric and j > 0: _a += _b.T
elif hermitian and j > 0: _a += _b.conjugate().T
- _a = _a.T
+ _a = _a.T
return _a
Modified: trunk/Lib/sandbox/models/family/family.py
===================================================================
--- trunk/Lib/sandbox/models/family/family.py 2006-12-21 21:42:48 UTC (rev 2452)
+++ trunk/Lib/sandbox/models/family/family.py 2006-12-22 00:02:48 UTC (rev 2453)
@@ -14,7 +14,7 @@
def weights(self, mu):
- """
+ """
Weights for IRLS step.
"""
Modified: trunk/Lib/sandbox/models/formula.py
===================================================================
--- trunk/Lib/sandbox/models/formula.py 2006-12-21 21:42:48 UTC (rev 2452)
+++ trunk/Lib/sandbox/models/formula.py 2006-12-22 00:02:48 UTC (rev 2453)
@@ -54,9 +54,9 @@
# Namespace in which self.name will be looked up in, if needed
def _get_namespace(self):
- if isinstance(self.__namespace, N.ndarray):
- return self.__namespace
- else: return self.__namespace or default_namespace
+ if isinstance(self.__namespace, N.ndarray):
+ return self.__namespace
+ else: return self.__namespace or default_namespace
def _set_namespace(self, value): self.__namespace = value
def _del_namespace(self): del self.__namespace
@@ -105,13 +105,13 @@
def __call__(self, *args, **kw):
"""
Return the columns associated to self in a design matrix.
- If the term has no 'func' attribute, it returns
+ If the term has no 'func' attribute, it returns
- self.namespace[self.termname]
+ self.namespace[self.termname]
- else, it returns
-
- self.func(*args, **kw)
+ else, it returns
+
+ self.func(*args, **kw)
"""
@@ -156,8 +156,8 @@
def get_columns(self, *args, **kw):
"""
- Calling function for factor instance.
- """
+ Calling function for factor instance.
+ """
v = self.namespace[self._name]
while True:
@@ -181,9 +181,9 @@
def values(self, *args, **kw):
"""
- Return the keys of the factor, rather than the columns of the design
- matrix.
- """
+ Return the keys of the factor, rather than the columns of the design
+ matrix.
+ """
del(self.func)
val = self(*args, **kw)
@@ -204,7 +204,7 @@
When adding \'intercept\' to a factor, this just returns
- formula(self, namespace=self.namespace)
+ formula(self, namespace=self.namespace)
"""
@@ -237,8 +237,8 @@
__names = self.names()
_names = ['%s-%s' % (__names[keep[i]], __names[reference]) for i in range(len(keep))]
value = quantitative(_names, func=self,
- termname='%s:maineffect' % self.termname,
- transform=maineffect_func)
+ termname='%s:maineffect' % self.termname,
+ transform=maineffect_func)
value.namespace = self.namespace
return value
@@ -269,9 +269,9 @@
def __call__(self, *args, **kw):
"""
- A quantitative is just like term, except there is an additional
- transformation: self.transform.
- """
+ A quantitative is just like term, except there is an additional
+ transformation: self.transform.
+ """
return self.transform(term.__call__(self, *args, **kw))
class formula(object):
@@ -287,9 +287,9 @@
"""
def _get_namespace(self):
- if isinstance(self.__namespace, N.ndarray):
- return self.__namespace
- else: return self.__namespace or default_namespace
+ if isinstance(self.__namespace, N.ndarray):
+ return self.__namespace
+ else: return self.__namespace or default_namespace
def _set_namespace(self, value): self.__namespace = value
def _del_namespace(self): del self.__namespace
@@ -396,11 +396,11 @@
"""
if not isinstance(query_term, formula):
- if type(query_term) == type("name"):
- try: query = self[query_term]
- except: return False
- elif isinstance(query_term, term):
- return query_term.termname in self.termnames()
+ if type(query_term) == type("name"):
+ try: query = self[query_term]
+ except: return False
+ elif isinstance(query_term, term):
+ return query_term.termname in self.termnames()
elif len(query_term.terms) == 1:
query_term = query_term.terms[0]
return query_term.termname in self.termnames()
@@ -525,7 +525,7 @@
sumterms.namespace = self.namespace
_term = quantitative(names, func=sumterms, termname=termname,
- transform=product_func)
+ transform=product_func)
_term.namespace = self.namespace
Modified: trunk/Lib/sandbox/models/glm.py
===================================================================
--- trunk/Lib/sandbox/models/glm.py 2006-12-21 21:42:48 UTC (rev 2452)
+++ trunk/Lib/sandbox/models/glm.py 2006-12-22 00:02:48 UTC (rev 2453)
@@ -29,12 +29,12 @@
return self.family.deviance(Y, results.mu) / scale
def next(self):
- results = self.results; Y = self.Y
+ results = self.results; Y = self.Y
self.weights = self.family.weights(results.mu)
self.initialize(self.design)
Z = results.predict + self.family.link.deriv(results.mu) * (Y - results.mu)
newresults = wls_model.fit(self, Z)
- newresults.Y = Y
+ newresults.Y = Y
newresults.mu = self.family.link.inverse(newresults.predict)
self.iter += 1
return newresults
Modified: trunk/Lib/sandbox/models/info.py
===================================================================
--- trunk/Lib/sandbox/models/info.py 2006-12-21 21:42:48 UTC (rev 2452)
+++ trunk/Lib/sandbox/models/info.py 2006-12-22 00:02:48 UTC (rev 2453)
@@ -2,6 +2,16 @@
Statistical models
==================
+This module contains a several linear statistical models
+- model formulae as in R (to some extent)
+- OLS (ordinary least square regression)
+- WLS (weighted least square regression)
+- ARp regression
+- GLMS (generalized linear models)
+- robust linear models using M estimators (with a number of standard default robust norms as in R's rlm)
+- robust scale estimates (MAD, Huber's proposal 2).
+- mixed effects models
+- generalized additive models (gam)
"""
depends = ['weave',
Modified: trunk/Lib/sandbox/models/model.py
===================================================================
--- trunk/Lib/sandbox/models/model.py 2006-12-21 21:42:48 UTC (rev 2452)
+++ trunk/Lib/sandbox/models/model.py 2006-12-22 00:02:48 UTC (rev 2453)
@@ -1,6 +1,6 @@
import numpy as N
from numpy.linalg import inv
-import scipy.optimize
+#from scipy import optimize
from scipy.sandbox.models.contrast import ContrastResults
from scipy.sandbox.models.utils import recipr
@@ -64,10 +64,10 @@
raise NotImplementedError
def newton(self, theta):
- raise NotImplementedError
+ raise NotImplementedError
# def f(theta):
# return -self.logL(theta)
-# self.results = scipy.optimize.fmin(f, theta)
+# self.results = optimize.fmin(f, theta)
class LikelihoodModelResults:
Modified: trunk/Lib/sandbox/models/regression.py
===================================================================
--- trunk/Lib/sandbox/models/regression.py 2006-12-21 21:42:48 UTC (rev 2452)
+++ trunk/Lib/sandbox/models/regression.py 2006-12-22 00:02:48 UTC (rev 2453)
@@ -60,10 +60,10 @@
self.initialize(design)
def initialize(self, design):
- """
- Set design for model, prewhitening design matrix and precomputing
- covariance of coefficients (up to scale factor in front).
- """
+ """
+ Set design for model, prewhitening design matrix and precomputing
+ covariance of coefficients (up to scale factor in front).
+ """
self.design = design
self.wdesign = self.whiten(design)
@@ -73,9 +73,9 @@
self.df_resid = self.wdesign.shape[0] - utils.rank(self.design)
def whiten(self, Y):
- """
- OLS model whitener does nothing: returns Y.
- """
+ """
+ OLS model whitener does nothing: returns Y.
+ """
return Y
@@ -95,7 +95,7 @@
def fit(self, Y):
"""
Full fit of the model including estimate of covariance matrix,
- (whitened) residuals and scale.
+ (whitened) residuals and scale.
"""
@@ -160,80 +160,80 @@
"""
def __init__(self, design, rho):
- if type(rho) is type(1):
- self.order = rho
- self.rho = N.zeros(self.order, N.float64)
- else:
- self.rho = N.squeeze(N.asarray(rho))
- if len(self.rho.shape) not in [0,1]:
- raise ValueError, "AR parameters must be a scalar or a vector"
- if self.rho.shape == ():
- self.rho.shape = (1,)
- self.order = self.rho.shape[0]
+ if type(rho) is type(1):
+ self.order = rho
+ self.rho = N.zeros(self.order, N.float64)
+ else:
+ self.rho = N.squeeze(N.asarray(rho))
+ if len(self.rho.shape) not in [0,1]:
+ raise ValueError, "AR parameters must be a scalar or a vector"
+ if self.rho.shape == ():
+ self.rho.shape = (1,)
+ self.order = self.rho.shape[0]
ols_model.__init__(self, design)
def iterative_fit(self, Y, niter=3):
- """
- Perform an iterative two-stage procedure to estimate AR(p)
- paramters and regression coefficients simultaneously.
- """
- for i in range(niter):
- self.initialize(self.design)
- results = self.fit(Y)
- self.rho, _ = self.yule_walker(Y - results.predict)
+ """
+ Perform an iterative two-stage procedure to estimate AR(p)
+ paramters and regression coefficients simultaneously.
+ """
+ for i in range(niter):
+ self.initialize(self.design)
+ results = self.fit(Y)
+ self.rho, _ = self.yule_walker(Y - results.predict)
def whiten(self, X):
- """
- Whiten a series of columns according to an AR(p)
- covariance structure.
+ """
+ Whiten a series of columns according to an AR(p)
+ covariance structure.
- """
- X = N.asarray(X, N.float64)
- _X = X.copy()
- for i in range(self.order):
- _X[(i+1):] = _X[(i+1):] - self.rho[i] * X[0:-(i+1)]
- return _X
+ """
+ X = N.asarray(X, N.float64)
+ _X = X.copy()
+ for i in range(self.order):
+ _X[(i+1):] = _X[(i+1):] - self.rho[i] * X[0:-(i+1)]
+ return _X
def yule_walker(self, X, method="unbiased", df=None):
- """
- Estimate AR(p) parameters from a sequence X using Yule-Walker equation.
- Method can be "unbiased" or "MLE" and this determines
- denominator in estimate of ACF at lag k. If "MLE", the denominator is
- n=r.shape[0], if "unbiased" the denominator is n-k.
+ """
+ Estimate AR(p) parameters from a sequence X using Yule-Walker equation.
+ Method can be "unbiased" or "MLE" and this determines
+ denominator in estimate of ACF at lag k. If "MLE", the denominator is
+ n=r.shape[0], if "unbiased" the denominator is n-k.
- If df is supplied, then it is assumed the X has df degrees of
- freedom rather than n.
+ If df is supplied, then it is assumed the X has df degrees of
+ freedom rather than n.
- See, for example:
+ See, for example:
http://en.wikipedia.org/wiki/Autoregressive_moving_average_model
- """
-
- method = str(method).lower()
- if method not in ["unbiased", "mle"]:
- raise ValueError, "ACF estimation method must be 'unbiased' \
- or 'MLE'"
- X = N.asarray(X, N.float64)
- X -= X.mean()
- n = df or X.shape[0]
+ """
+
+ method = str(method).lower()
+ if method not in ["unbiased", "mle"]:
+ raise ValueError, "ACF estimation method must be 'unbiased' \
+ or 'MLE'"
+ X = N.asarray(X, N.float64)
+ X -= X.mean()
+ n = df or X.shape[0]
- if method == "unbiased":
- denom = lambda k: n - k
- else:
- denom = lambda k: n
+ if method == "unbiased":
+ denom = lambda k: n - k
+ else:
+ denom = lambda k: n
- if len(X.shape) != 1:
- raise ValueError, "expecting a vector to estimate AR parameters"
- r = N.zeros(self.order+1, N.float64)
- r[0] = (X**2).sum() / denom(0)
- for k in range(1,self.order+1):
- r[k] = (X[0:-k]*X[k:]).sum() / denom(k)
- R = toeplitz(r[:-1])
+ if len(X.shape) != 1:
+ raise ValueError, "expecting a vector to estimate AR parameters"
+ r = N.zeros(self.order+1, N.float64)
+ r[0] = (X**2).sum() / denom(0)
+ for k in range(1,self.order+1):
+ r[k] = (X[0:-k]*X[k:]).sum() / denom(k)
+ R = toeplitz(r[:-1])
- rho = L.solve(R, r[1:])
- sigmasq = r[0] - (r[1:]*rho).sum()
- return rho, N.sqrt(sigmasq)
+ rho = L.solve(R, r[1:])
+ sigmasq = r[0] - (r[1:]*rho).sum()
+ return rho, N.sqrt(sigmasq)
class wls_model(ols_model):
"""
@@ -272,11 +272,11 @@
ols_model.__init__(self, design)
def whiten(self, X):
- """
- Whitener for WLS model, multiplies by sqrt(self.weights)
- """
+ """
+ Whitener for WLS model, multiplies by sqrt(self.weights)
+ """
- X = N.asarray(X, N.float64)
+ X = N.asarray(X, N.float64)
if X.ndim == 1:
return X * N.sqrt(self.weights)
Modified: trunk/Lib/sandbox/models/smoothers.py
===================================================================
--- trunk/Lib/sandbox/models/smoothers.py 2006-12-21 21:42:48 UTC (rev 2452)
+++ trunk/Lib/sandbox/models/smoothers.py 2006-12-22 00:02:48 UTC (rev 2453)
@@ -26,44 +26,44 @@
"""
def df_fit(self):
- """
- Degrees of freedom used in the fit.
- """
- return self.order + 1
+ """
+ Degrees of freedom used in the fit.
+ """
+ return self.order + 1
def df_resid(self):
- """
- Residual degrees of freedom from last fit.
- """
- return self.N - self.order - 1
+ """
+ Residual degrees of freedom from last fit.
+ """
+ return self.N - self.order - 1
def __init__(self, order, x=None):
- self.order = order
- self.coef = N.zeros((order+1,), N.float64)
- if x is not None:
- self.X = N.array([x**i for i in range(order+1)]).T
+ self.order = order
+ self.coef = N.zeros((order+1,), N.float64)
+ if x is not None:
+ self.X = N.array([x**i for i in range(order+1)]).T
def __call__(self, x=None):
- if x is not None:
- X = N.array([(x**i) for i in range(self.order+1)])
- else: X = self.X
+ if x is not None:
+ X = N.array([(x**i) for i in range(self.order+1)])
+ else: X = self.X
return N.squeeze(N.dot(X.T, self.coef))
def fit(self, y, x=None, weights=None):
- self.N = y.shape[0]
- if weights is None:
- weights = 1
- _w = N.sqrt(weights)
- if x is None:
- if not hasattr(self, "X"):
- raise ValueError, "x needed to fit poly_smoother"
- else:
- self.X = N.array([(x**i) for i in range(self.order+1)])
+ self.N = y.shape[0]
+ if weights is None:
+ weights = 1
+ _w = N.sqrt(weights)
+ if x is None:
+ if not hasattr(self, "X"):
+ raise ValueError, "x needed to fit poly_smoother"
+ else:
+ self.X = N.array([(x**i) for i in range(self.order+1)])
- X = self.X * _w
+ X = self.X * _w
- _y = y * _w
- self.coef = N.dot(L.pinv(X).T, _y)
+ _y = y * _w
+ self.coef = N.dot(L.pinv(X).T, _y)
class smoothing_spline(bspline):
@@ -81,16 +81,16 @@
if x.shape != y.shape:
raise ValueError, 'x and y shape do not agree, by default x are the Bspline\'s internal knots'
- bt = self.basis(x)
- if pen >= self.penmax:
- pen = self.penmax
+ bt = self.basis(x)
+ if pen >= self.penmax:
+ pen = self.penmax
if weights is None:
- weights = N.array(1.)
+ weights = N.array(1.)
- wmean = weights.mean()
- _w = N.sqrt(weights / wmean)
- bt *= _w
+ wmean = weights.mean()
+ _w = N.sqrt(weights / wmean)
+ bt *= _w
# throw out rows with zeros (this happens at boundary points!)
@@ -99,69 +99,69 @@
bt = bt[:,mask]
y = y[mask]
- self.df_total = y.shape[0]
+ self.df_total = y.shape[0]
- if bt.shape[1] != y.shape[0]:
- raise ValueError, "some x values are outside range of B-spline knots"
+ if bt.shape[1] != y.shape[0]:
+ raise ValueError, "some x values are outside range of B-spline knots"
bty = N.dot(bt, _w * y)
- self.N = y.shape[0]
+ self.N = y.shape[0]
if not banded:
self.btb = N.dot(bt, bt.T)
- _g = band2array(self.g, lower=1, symmetric=True)
+ _g = band2array(self.g, lower=1, symmetric=True)
self.coef, _, self.rank = L.lstsq(self.btb + pen*_g, bty)[0:3]
- self.rank = min(self.rank, self.btb.shape[0])
+ self.rank = min(self.rank, self.btb.shape[0])
else:
self.btb = N.zeros(self.g.shape, N.float64)
nband, nbasis = self.g.shape
for i in range(nbasis):
for k in range(min(nband, nbasis-i)):
- self.btb[k,i] = (bt[i] * bt[i+k]).sum()
+ self.btb[k,i] = (bt[i] * bt[i+k]).sum()
- bty.shape = (1,bty.shape[0])
+ bty.shape = (1,bty.shape[0])
self.chol, self.coef = solveh_banded(self.btb +
- pen*self.g,
- bty, lower=1)
+ pen*self.g,
+ bty, lower=1)
- self.coef = N.squeeze(self.coef)
- self.resid = N.sqrt(wmean) * (y * _w - N.dot(self.coef, bt))
- self.pen = pen
+ self.coef = N.squeeze(self.coef)
+ self.resid = N.sqrt(wmean) * (y * _w - N.dot(self.coef, bt))
+ self.pen = pen
def gcv(self):
- """
- Generalized cross-validation score of current fit.
- """
+ """
+ Generalized cross-validation score of current fit.
+ """
- norm_resid = (self.resid**2).sum()
- return norm_resid / (self.df_total - self.trace())
+ norm_resid = (self.resid**2).sum()
+ return norm_resid / (self.df_total - self.trace())
def df_resid(self):
- """
- self.N - self.trace()
+ """
+ self.N - self.trace()
- where self.N is the number of observations of last fit.
- """
-
- return self.N - self.trace()
+ where self.N is the number of observations of last fit.
+ """
+
+ return self.N - self.trace()
def df_fit(self):
- """
- = self.trace()
+ """
+ = self.trace()
- How many degrees of freedom used in the fit?
- """
- return self.trace()
+ How many degrees of freedom used in the fit?
+ """
+ return self.trace()
def trace(self):
- """
- Trace of the smoothing matrix S(pen)
- """
+ """
+ Trace of the smoothing matrix S(pen)
+ """
- if self.pen > 0:
- _invband = _bspline.invband(self.chol.copy())
- tr = _trace_symbanded(_invband, self.btb, lower=1)
- return tr
- else:
- return self.rank
+ if self.pen > 0:
+ _invband = _bspline.invband(self.chol.copy())
+ tr = _trace_symbanded(_invband, self.btb, lower=1)
+ return tr
+ else:
+ return self.rank
class smoothing_spline_fixeddf(smoothing_spline):
@@ -178,34 +178,34 @@
target_df = 5
def __init__(self, knots, order=4, coef=None, M=None, target_df=None):
- if target_df is not None:
- self.target_df = target_df
- bspline.__init__(self, knots, order=order, coef=coef, M=M)
- self.target_reached = False
+ if target_df is not None:
+ self.target_df = target_df
+ bspline.__init__(self, knots, order=order, coef=coef, M=M)
+ self.target_reached = False
def fit(self, y, x=None, df=None, weights=None, tol=1.0e-03):
- df = df or self.target_df
+ df = df or self.target_df
- apen, bpen = 0, 1.0e-03
- olddf = y.shape[0] - self.m
+ apen, bpen = 0, 1.0e-03
+ olddf = y.shape[0] - self.m
- if not self.target_reached:
- while True:
- curpen = 0.5 * (apen + bpen)
- smoothing_spline.fit(self, y, x=x, weights=weights, pen=curpen)
- curdf = self.trace()
- if curdf > df:
- apen, bpen = curpen, 2 * curpen
- else:
- apen, bpen = apen, curpen
- if apen >= self.penmax:
- raise ValueError, "penalty too large, try setting penmax higher or decreasing df"
- if N.fabs(curdf - df) / df < tol:
- self.target_reached = True
- break
- else:
- smoothing_spline.fit(self, y, x=x, weights=weights, pen=self.pen)
+ if not self.target_reached:
+ while True:
+ curpen = 0.5 * (apen + bpen)
+ smoothing_spline.fit(self, y, x=x, weights=weights, pen=curpen)
+ curdf = self.trace()
+ if curdf > df:
+ apen, bpen = curpen, 2 * curpen
+ else:
+ apen, bpen = apen, curpen
+ if apen >= self.penmax:
+ raise ValueError, "penalty too large, try setting penmax higher or decreasing df"
+ if N.fabs(curdf - df) / df < tol:
+ self.target_reached = True
+ break
+ else:
+ smoothing_spline.fit(self, y, x=x, weights=weights, pen=self.pen)
class smoothing_spline_gcv(smoothing_spline):
@@ -221,14 +221,14 @@
"""
def fit(self, y, x=None, weights=None, tol=1.0e-03,
- bracket=(0,1.0e-03)):
+ bracket=(0,1.0e-03)):
- def _gcv(pen, y, x):
- smoothing_spline.fit(y, x=x, pen=N.exp(pen), weights=weights)
- a = self.gcv()
- return a
+ def _gcv(pen, y, x):
+ smoothing_spline.fit(y, x=x, pen=N.exp(pen), weights=weights)
+ a = self.gcv()
+ return a
- a = golden(_gcv, args=(y,x), brack=(-100,20), tol=tol)
+ a = golden(_gcv, args=(y,x), brack=(-100,20), tol=tol)
def _trace_symbanded(a,b, lower=0):
"""
@@ -236,11 +236,11 @@
"""
if lower:
- t = _zero_triband(a * b, lower=1)
- return t[0].sum() + 2 * t[1:].sum()
+ t = _zero_triband(a * b, lower=1)
+ return t[0].sum() + 2 * t[1:].sum()
else:
- t = _zero_triband(a * b, lower=0)
- return t[-1].sum() + 2 * t[:-1].sum()
+ t = _zero_triband(a * b, lower=0)
+ return t[-1].sum() + 2 * t[:-1].sum()
@@ -251,7 +251,7 @@
nrow, ncol = a.shape
if lower:
- for i in range(nrow): a[i,(ncol-i):] = 0.
+ for i in range(nrow): a[i,(ncol-i):] = 0.
else:
- for i in range(nrow): a[i,0:i] = 0.
+ for i in range(nrow): a[i,0:i] = 0.
return a
Modified: trunk/Lib/sandbox/models/tests/test_formula.py
===================================================================
--- trunk/Lib/sandbox/models/tests/test_formula.py 2006-12-21 21:42:48 UTC (rev 2452)
+++ trunk/Lib/sandbox/models/tests/test_formula.py 2006-12-22 00:02:48 UTC (rev 2453)
@@ -57,43 +57,43 @@
self.formula = self.terms[0]
for i in range(1, 10):
self.formula += self.terms[i]
- self.formula.namespace = self.namespace
+ self.formula.namespace = self.namespace
def test_namespace(self):
- space1 = {'X':N.arange(50), 'Y':N.arange(50)*2}
- space2 = {'X':N.arange(20), 'Y':N.arange(20)*2}
- X = formula.term('X')
- Y = formula.term('Y')
+ space1 = {'X':N.arange(50), 'Y':N.arange(50)*2}
+ space2 = {'X':N.arange(20), 'Y':N.arange(20)*2}
+ X = formula.term('X')
+ Y = formula.term('Y')
- X.namespace = space1
- assert_almost_equal(X(), N.arange(50))
+ X.namespace = space1
+ assert_almost_equal(X(), N.arange(50))
- Y.namespace = space2
- assert_almost_equal(Y(), N.arange(20)*2)
+ Y.namespace = space2
+ assert_almost_equal(Y(), N.arange(20)*2)
- f = X + Y
+ f = X + Y
- f.namespace = space1
- self.assertEqual(f().shape, (2,50))
- assert_almost_equal(Y(), N.arange(50)*2)
- assert_almost_equal(X(), N.arange(50))
+ f.namespace = space1
+ self.assertEqual(f().shape, (2,50))
+ assert_almost_equal(Y(), N.arange(50)*2)
+ assert_almost_equal(X(), N.arange(50))
- f.namespace = space2
- self.assertEqual(f().shape, (2,20))
- assert_almost_equal(Y(), N.arange(20)*2)
- assert_almost_equal(X(), N.arange(20))
+ f.namespace = space2
+ self.assertEqual(f().shape, (2,20))
+ assert_almost_equal(Y(), N.arange(20)*2)
+ assert_almost_equal(X(), N.arange(20))
def test_termcolumns(self):
t1 = formula.term("A")
t2 = formula.term("B")
f = t1 + t2 + t1 * t2
- def other(val):
- return N.array([3.2*val,4.342*val**2, 5.234*val**3])
- q = formula.quantitative(['other%d' % i for i in range(1,4)], termname='other', func=t1, transform=other)
- f += q
- q.namespace = f.namespace = self.formula.namespace
- assert_almost_equal(q(), f()[f.termcolumns(q)])
+ def other(val):
+ return N.array([3.2*val,4.342*val**2, 5.234*val**3])
+ q = formula.quantitative(['other%d' % i for i in range(1,4)], termname='other', func=t1, transform=other)
+ f += q
+ q.namespace = f.namespace = self.formula.namespace
+ assert_almost_equal(q(), f()[f.termcolumns(q)])
def test_str(self):
@@ -111,27 +111,27 @@
prod = self.terms[0] * self.terms[2]
self.formula += prod
x = self.formula.design()
- p = self.formula['A*C']
+ p = self.formula['A*C']
col = self.formula.termcolumns(prod, dict=False)
assert_almost_equal(N.squeeze(x[:,col]), self.X[:,0] * self.X[:,2])
assert_almost_equal(N.squeeze(p()), self.X[:,0] * self.X[:,2])
-
+
def test_intercept1(self):
prod = self.terms[0] * self.terms[2]
self.formula += formula.I
- icol = self.formula.names().index('intercept')
- assert_almost_equal(self.formula()[icol], N.ones((40,)))
+ icol = self.formula.names().index('intercept')
+ assert_almost_equal(self.formula()[icol], N.ones((40,)))
def test_intercept2(self):
prod = self.terms[0] * self.terms[2]
self.formula += formula.I
- icol = self.formula.names().index('intercept')
- assert_almost_equal(self.formula()[icol], N.ones((40,)))
+ icol = self.formula.names().index('intercept')
+ assert_almost_equal(self.formula()[icol], N.ones((40,)))
def test_intercept3(self):
prod = self.terms[0] * formula.I
- prod.namespace = self.formula.namespace
- assert_almost_equal(N.squeeze(prod()), self.terms[0]())
+ prod.namespace = self.formula.namespace
+ assert_almost_equal(N.squeeze(prod()), self.terms[0]())
@@ -163,53 +163,53 @@
resid = N.identity(40) - P
self.namespace['noise'] = N.transpose(N.dot(resid, R.standard_normal((40,5))))
terms = dummy + self.terms[2]
- terms.namespace = self.formula.namespace
+ terms.namespace = self.formula.namespace
c = contrast.Contrast(terms, self.formula)
c.getmatrix()
self.assertEquals(c.matrix.shape, (10,))
def test_power(self):
- t = self.terms[2]
- t2 = t**2
- t.namespace = t2.namespace = self.formula.namespace
- assert_almost_equal(t()**2, t2())
+ t = self.terms[2]
+ t2 = t**2
+ t.namespace = t2.namespace = self.formula.namespace
+ assert_almost_equal(t()**2, t2())
def test_quantitative(self):
- t = self.terms[2]
- sint = formula.quantitative('t', func=t, transform=N.sin)
- t.namespace = sint.namespace = self.formula.namespace
- assert_almost_equal(N.sin(t()), sint())
+ t = self.terms[2]
+ sint = formula.quantitative('t', func=t, transform=N.sin)
+ t.namespace = sint.namespace = self.formula.namespace
+ assert_almost_equal(N.sin(t()), sint())
def test_factor1(self):
- f = ['a','b','c']*10
- fac = formula.factor('ff', set(f))
- fac.namespace = {'ff':f}
- self.assertEquals(list(fac.values()), f)
+ f = ['a','b','c']*10
+ fac = formula.factor('ff', set(f))
+ fac.namespace = {'ff':f}
+ self.assertEquals(list(fac.values()), f)
def test_factor2(self):
- f = ['a','b','c']*10
- fac = formula.factor('ff', set(f))
- fac.namespace = {'ff':f}
- self.assertEquals(fac().shape, (3,30))
+ f = ['a','b','c']*10
+ fac = formula.factor('ff', set(f))
+ fac.namespace = {'ff':f}
+ self.assertEquals(fac().shape, (3,30))
def test_factor3(self):
- f = ['a','b','c']*10
- fac = formula.factor('ff', set(f))
- fac.namespace = {'ff':f}
- m = fac.main_effect(reference=1)
- self.assertEquals(m().shape, (2,30))
+ f = ['a','b','c']*10
+ fac = formula.factor('ff', set(f))
+ fac.namespace = {'ff':f}
+ m = fac.main_effect(reference=1)
+ self.assertEquals(m().shape, (2,30))
def test_factor4(self):
- f = ['a','b','c']*10
- fac = formula.factor('ff', set(f))
- fac.namespace = {'ff':f}
- m = fac.main_effect(reference=2)
- r = N.array([N.identity(3)]*10)
- r.shape = (30,3)
- r = r.T
- _m = N.array([r[0]-r[2],r[1]-r[2]])
- assert_almost_equal(_m, m())
+ f = ['a','b','c']*10
+ fac = formula.factor('ff', set(f))
+ fac.namespace = {'ff':f}
+ m = fac.main_effect(reference=2)
+ r = N.array([N.identity(3)]*10)
+ r.shape = (30,3)
+ r = r.T
+ _m = N.array([r[0]-r[2],r[1]-r[2]])
+ assert_almost_equal(_m, m())
def test_contrast4(self):
More information about the Scipy-svn
mailing list