[Scipy-svn] r2453 - in trunk/Lib/sandbox/models: . family tests

scipy-svn at scipy.org scipy-svn at scipy.org
Thu Dec 21 19:02:54 EST 2006


Author: jarrod.millman
Date: 2006-12-21 18:02:48 -0600 (Thu, 21 Dec 2006)
New Revision: 2453

Modified:
   trunk/Lib/sandbox/models/bspline.py
   trunk/Lib/sandbox/models/family/family.py
   trunk/Lib/sandbox/models/formula.py
   trunk/Lib/sandbox/models/glm.py
   trunk/Lib/sandbox/models/info.py
   trunk/Lib/sandbox/models/model.py
   trunk/Lib/sandbox/models/regression.py
   trunk/Lib/sandbox/models/smoothers.py
   trunk/Lib/sandbox/models/tests/test_formula.py
Log:
expanded tabs to spaces


Modified: trunk/Lib/sandbox/models/bspline.py
===================================================================
--- trunk/Lib/sandbox/models/bspline.py	2006-12-21 21:42:48 UTC (rev 2452)
+++ trunk/Lib/sandbox/models/bspline.py	2006-12-22 00:02:48 UTC (rev 2453)
@@ -50,11 +50,11 @@
     """
 
     if lower:
-	t = _zero_triband(a * b, lower=1)
-	return t[0].sum() + 2 * t[1:].sum()
+        t = _zero_triband(a * b, lower=1)
+        return t[0].sum() + 2 * t[1:].sum()
     else:
-	t = _zero_triband(a * b, lower=0)
-	return t[-1].sum() + 2 * t[:-1].sum()
+        t = _zero_triband(a * b, lower=0)
+        return t[-1].sum() + 2 * t[:-1].sum()
 
 
 def _zero_triband(a, lower=0):
@@ -64,9 +64,9 @@
 
     nrow, ncol = a.shape
     if lower: 
-	for i in range(nrow): a[i,(ncol-i):] = 0.
+        for i in range(nrow): a[i,(ncol-i):] = 0.
     else:
-	for i in range(nrow): a[i,0:i] = 0.
+        for i in range(nrow): a[i,0:i] = 0.
     return a
 
 def _zerofunc(x):
@@ -137,17 +137,18 @@
         upper = min(upper, self.tau.shape[0] - self.m)
         lower = max(0, lower)
         
-	d = N.asarray(d)
-	if d.shape == ():
-	    v = _bspline.evaluate(x, self.tau, self.m, int(d), lower, upper)
-	else:
-	    if d.shape[0] != 2:
-		raise ValueError, "if d is not an integer, expecting a jx2 array with first row indicating order \
-		of derivative, second row coefficient in front."
+        d = N.asarray(d)
+        if d.shape == ():
+            v = _bspline.evaluate(x, self.tau, self.m, int(d), lower, upper)
+        else:
+            if d.shape[0] != 2:
+                raise ValueError, "if d is not an integer, expecting a jx2 \
+                   array with first row indicating order \
+		   of derivative, second row coefficient in front."
 
-	    v = 0
-	    for i in range(d.shape[1]):
-		v += d[1,i] * _bspline.evaluate(x, self.tau, self.m, d[0,i], lower, upper)
+            v = 0
+            for i in range(d.shape[1]):
+                v += d[1,i] * _bspline.evaluate(x, self.tau, self.m, d[0,i], lower, upper)
 
         v.shape = (upper-lower,) + _shape
         if upper == self.tau.shape[0] - self.m:
@@ -159,23 +160,24 @@
         Compute Gram inner product matrix.
         """
         
-	d = N.squeeze(d)
-	if N.asarray(d).shape == ():
-	    self.g = _bspline.gram(self.tau, self.m, int(d), int(d))
-	else:
-	    d = N.asarray(d)
-	    if d.shape[0] != 2:
-		raise ValueError, "if d is not an integer, expecting a jx2 array with first row indicating order \
-		of derivative, second row coefficient in front."
-	    if d.shape == (2,):
-		d.shape = (2,1)
-	    self.g = 0
-	    for i in range(d.shape[1]):
+        d = N.squeeze(d)
+        if N.asarray(d).shape == ():
+            self.g = _bspline.gram(self.tau, self.m, int(d), int(d))
+        else:
+            d = N.asarray(d)
+            if d.shape[0] != 2:
+                raise ValueError, "if d is not an integer, expecting a jx2 \
+                   array with first row indicating order \
+		   of derivative, second row coefficient in front."
+            if d.shape == (2,):
+                d.shape = (2,1)
+            self.g = 0
+            for i in range(d.shape[1]):
                 for j in range(d.shape[1]):
-		    self.g += d[1,i]* d[1,j] * _bspline.gram(self.tau, self.m, int(d[0,i]), int(d[0,j]))
-	self.g = self.g.T
-	self.d = d
-	return N.nan_to_num(self.g)
+                    self.g += d[1,i]* d[1,j] * _bspline.gram(self.tau, self.m, int(d[0,i]), int(d[0,j]))
+        self.g = self.g.T
+        self.d = d
+        return N.nan_to_num(self.g)
 
 class SmoothingSpline(BSpline):
 
@@ -185,10 +187,10 @@
     default_pen = 1.0e-03
 
     def smooth(self, y, x=None, weights=None):
-	if self.method == "target_df":
-	    self.fit_target_df(y, x=x, weights=weights, df=self.target_df)
-	elif self.method == "optimize_gcv":
-	    self.fit_optimize_gcv(y, x=x, weights=weights)
+        if self.method == "target_df":
+            self.fit_target_df(y, x=x, weights=weights, df=self.target_df)
+        elif self.method == "optimize_gcv":
+            self.fit_optimize_gcv(y, x=x, weights=weights)
 
     def fit(self, y, x=None, weights=None, pen=0.):
         banded = True
@@ -200,20 +202,21 @@
             banded = False
             
         if x.shape != y.shape:
-            raise ValueError, 'x and y shape do not agree, by default x are the Bspline\'s internal knots'
+            raise ValueError, 'x and y shape do not agree, by default x are \
+               the Bspline\'s internal knots'
         
-	bt = self.basis(x)
-	if pen >= self.penmax:
-	    pen = self.penmax
+        bt = self.basis(x)
+        if pen >= self.penmax:
+            pen = self.penmax
 
 
         if weights is not None:
-	    self.weights = weights 
-	else:
-	    self.weights = 1. 
+            self.weights = weights 
+        else:
+            self.weights = 1. 
 
-	_w = N.sqrt(self.weights)
-	bt *= _w
+        _w = N.sqrt(self.weights)
+        bt *= _w
 
         # throw out rows with zeros (this happens at boundary points!)
 
@@ -222,109 +225,110 @@
         bt = bt[:,mask]
         y = y[mask]
 
-	self.df_total = y.shape[0]
+        self.df_total = y.shape[0]
         bty = N.dot(bt, _w * y)
-	self.N = y.shape[0]
+        self.N = y.shape[0]
         if not banded:
             self.btb = N.dot(bt, bt.T)
-	    _g = band2array(self.g, lower=1, symmetric=True)
+            _g = band2array(self.g, lower=1, symmetric=True)
             self.coef, _, self.rank = L.lstsq(self.btb + pen*_g, bty)[0:3]
-	    self.rank = min(self.rank, self.btb.shape[0])
+            self.rank = min(self.rank, self.btb.shape[0])
         else:
             self.btb = N.zeros(self.g.shape, N.float64)
             nband, nbasis = self.g.shape
             for i in range(nbasis):
                 for k in range(min(nband, nbasis-i)):
-		    self.btb[k,i] = (bt[i] * bt[i+k]).sum()
+                    self.btb[k,i] = (bt[i] * bt[i+k]).sum()
 
-	    bty.shape = (1,bty.shape[0])
+                bty.shape = (1,bty.shape[0])
             self.chol, self.coef = solveh_banded(self.btb + 
 						 pen*self.g,
 						 bty, lower=1)
 
-	self.coef = N.squeeze(self.coef)
-	self.resid = y * self.weights - N.dot(self.coef, bt)	    
-	self.pen = pen
+        self.coef = N.squeeze(self.coef)
+        self.resid = y * self.weights - N.dot(self.coef, bt)	    
+        self.pen = pen
 
     def gcv(self):
-	"""
-	Generalized cross-validation score of current fit.
-	"""
+        """
+        Generalized cross-validation score of current fit.
+        """
 
-	norm_resid = (self.resid**2).sum()
-	return norm_resid / (self.df_total - self.trace())
+        norm_resid = (self.resid**2).sum()
+        return norm_resid / (self.df_total - self.trace())
 
     def df_resid(self):
-	"""
-	self.N - self.trace()
+        """
+        self.N - self.trace()
 
-	where self.N is the number of observations of last fit.
-	"""
+        where self.N is the number of observations of last fit.
+        """
 	
-	return self.N - self.trace()
+        return self.N - self.trace()
 
     def df_fit(self):
-	"""
-	= self.trace()
+        """
+        = self.trace()
 
-	How many degrees of freedom used in the fit? 
-	"""
-	return self.trace()
+        How many degrees of freedom used in the fit? 
+        """
+        return self.trace()
 
     def trace(self):
-	"""
-	Trace of the smoothing matrix S(pen)
-	"""
+        """
+        Trace of the smoothing matrix S(pen)
+        """
 
-	if self.pen > 0:
-	    _invband = _bspline.invband(self.chol.copy())
-	    tr = _trace_symbanded(_invband, self.btb, lower=1)
-	    return tr
-	else:
-	    return self.rank
+        if self.pen > 0:
+            _invband = _bspline.invband(self.chol.copy())
+            tr = _trace_symbanded(_invband, self.btb, lower=1)
+            return tr
+        else:
+            return self.rank
 
     def fit_target_df(self, y, x=None, df=None, weights=None, tol=1.0e-03):
-	"""
-	Fit smoothing spline with approximately df degrees of freedom
+        """
+        Fit smoothing spline with approximately df degrees of freedom
         used in the fit, i.e. so that self.trace() is approximately df.
 
-	In general, df must be greater than the dimension of the null space
-	of the Gram inner product. For cubic smoothing splines, this means
-	that df > 2.
+        In general, df must be greater than the dimension of the null space
+        of the Gram inner product. For cubic smoothing splines, this means
+        that df > 2.
 
         """
 
-	df = df or self.target_df
+        df = df or self.target_df
 
-	apen, bpen = 0, 1.0e-03
-	olddf = y.shape[0] - self.m
+        apen, bpen = 0, 1.0e-03
+        olddf = y.shape[0] - self.m
 
-	if hasattr(self, "pen"):
-	    self.fit(y, x=x, weights=weights, pen=self.pen)
-	    curdf = self.trace()
-	    if N.fabs(curdf - df) / df < tol: 
-		return
-	    if curdf > df:
-		apen, bpen = self.pen, 2 * self.pen
-	    else:
-		apen, bpen = 0., self.pen
+        if hasattr(self, "pen"):
+            self.fit(y, x=x, weights=weights, pen=self.pen)
+            curdf = self.trace()
+            if N.fabs(curdf - df) / df < tol:
+                return
+            if curdf > df:
+                apen, bpen = self.pen, 2 * self.pen
+            else:
+                apen, bpen = 0., self.pen
 
-	while True:
-	    curpen = 0.5 * (apen + bpen)
-	    self.fit(y, x=x, weights=weights, pen=curpen)
-	    curdf = self.trace()
-	    if curdf > df:
-		apen, bpen = curpen, 2 * curpen
-	    else:
-		apen, bpen = apen, curpen
-	    if apen >= self.penmax:
-		raise ValueError, "penalty too large, try setting penmax higher or decreasing df"
-	    if N.fabs(curdf - df) / df < tol: 
-		break
+        while True:
+            curpen = 0.5 * (apen + bpen)
+            self.fit(y, x=x, weights=weights, pen=curpen)
+            curdf = self.trace()
+            if curdf > df:
+                apen, bpen = curpen, 2 * curpen
+            else:
+                apen, bpen = apen, curpen
+            if apen >= self.penmax:
+                raise ValueError, "penalty too large, try setting penmax \
+                   higher or decreasing df"
+            if N.fabs(curdf - df) / df < tol: 
+                break
 
     def fit_optimize_gcv(self, y, x=None, weights=None, tol=1.0e-03,
 		     bracket=(0,1.0e-03)):
-	"""
+        """
 	Fit smoothing spline trying to optimize GCV.
 
 	Try to find a bracketing interval for scipy.optimize.golden
@@ -334,15 +338,15 @@
 	sometimes difficult to find a bracketing interval.
 
         """
-    
-	def _gcv(pen, y, x):
-	    self.fit(y, x=x, pen=N.exp(pen))
-	    a = self.gcv()
-	    return a
 
-	a = golden(_gcv, args=(y,x), brack=(-100,20), tol=tol)
+        def _gcv(pen, y, x):
+            self.fit(y, x=x, pen=N.exp(pen))
+            a = self.gcv()
+            return a
 
+        a = golden(_gcv, args=(y,x), brack=(-100,20), tol=tol)
 
+
 def band2array(a, lower=0, symmetric=False, hermitian=False):
     """
     Take an upper or lower triangular banded matrix and return a matrix using
@@ -365,7 +369,7 @@
             _a += _b 
             if symmetric and j > 0: _a += _b.T
             elif hermitian and j > 0: _a += _b.conjugate().T
-	_a = _a.T
+        _a = _a.T
 
     return _a
 

Modified: trunk/Lib/sandbox/models/family/family.py
===================================================================
--- trunk/Lib/sandbox/models/family/family.py	2006-12-21 21:42:48 UTC (rev 2452)
+++ trunk/Lib/sandbox/models/family/family.py	2006-12-22 00:02:48 UTC (rev 2453)
@@ -14,7 +14,7 @@
 
     def weights(self, mu):
         
-	"""
+        """
         Weights for IRLS step.
         """
 

Modified: trunk/Lib/sandbox/models/formula.py
===================================================================
--- trunk/Lib/sandbox/models/formula.py	2006-12-21 21:42:48 UTC (rev 2452)
+++ trunk/Lib/sandbox/models/formula.py	2006-12-22 00:02:48 UTC (rev 2453)
@@ -54,9 +54,9 @@
     # Namespace in which self.name will be looked up in, if needed
 
     def _get_namespace(self): 
-	if isinstance(self.__namespace, N.ndarray): 
-	    return self.__namespace 
-	else: return self.__namespace or default_namespace
+        if isinstance(self.__namespace, N.ndarray): 
+            return self.__namespace 
+        else: return self.__namespace or default_namespace
 
     def _set_namespace(self, value):  self.__namespace = value
     def _del_namespace(self): del self.__namespace
@@ -105,13 +105,13 @@
     def __call__(self, *args, **kw):
         """
         Return the columns associated to self in a design matrix.
-	If the term has no 'func' attribute, it returns
+        If the term has no 'func' attribute, it returns
         
-	self.namespace[self.termname]
+        self.namespace[self.termname]
 
-	else, it returns
-	
-	self.func(*args, **kw)
+        else, it returns
+        
+        self.func(*args, **kw)
 
         """
         
@@ -156,8 +156,8 @@
 
     def get_columns(self, *args, **kw):
         """
-	Calling function for factor instance.
-	"""
+        Calling function for factor instance.
+        """
 
         v = self.namespace[self._name]
         while True:
@@ -181,9 +181,9 @@
 
     def values(self, *args, **kw):
         """
-	Return the keys of the factor, rather than the columns of the design 
-	matrix.
-	"""
+        Return the keys of the factor, rather than the columns of the design 
+        matrix.
+        """
 
         del(self.func)
         val = self(*args, **kw)
@@ -204,7 +204,7 @@
 
         When adding \'intercept\' to a factor, this just returns 
 
-	formula(self, namespace=self.namespace)
+        formula(self, namespace=self.namespace)
 
         """
         
@@ -237,8 +237,8 @@
         __names = self.names()
         _names = ['%s-%s' % (__names[keep[i]], __names[reference]) for i in range(len(keep))]
         value = quantitative(_names, func=self, 
-		     termname='%s:maineffect' % self.termname,
-		     transform=maineffect_func)
+                     termname='%s:maineffect' % self.termname,
+                     transform=maineffect_func)
         value.namespace = self.namespace
         return value
 
@@ -269,9 +269,9 @@
 
     def __call__(self, *args, **kw):
         """
-	A quantitative is just like term, except there is an additional
-	transformation: self.transform.
-	"""
+        A quantitative is just like term, except there is an additional
+        transformation: self.transform.
+        """
         return self.transform(term.__call__(self, *args, **kw))
 
 class formula(object):
@@ -287,9 +287,9 @@
     """
     
     def _get_namespace(self): 
-	if isinstance(self.__namespace, N.ndarray): 
-	    return self.__namespace 
-	else: return self.__namespace or default_namespace
+        if isinstance(self.__namespace, N.ndarray): 
+            return self.__namespace 
+        else: return self.__namespace or default_namespace
 
     def _set_namespace(self, value):  self.__namespace = value
     def _del_namespace(self): del self.__namespace
@@ -396,11 +396,11 @@
         """
 
         if not isinstance(query_term, formula):
-	    if type(query_term) == type("name"):
-		try: query = self[query_term]
-		except: return False
-	    elif isinstance(query_term, term):
-		return query_term.termname in self.termnames()
+            if type(query_term) == type("name"):
+                try: query = self[query_term]
+                except: return False
+            elif isinstance(query_term, term):
+                return query_term.termname in self.termnames()
         elif len(query_term.terms) == 1:
             query_term = query_term.terms[0]
             return query_term.termname in self.termnames()
@@ -525,7 +525,7 @@
                     sumterms.namespace = self.namespace
 
                     _term = quantitative(names, func=sumterms, termname=termname,
-					 transform=product_func)
+                                         transform=product_func)
                     _term.namespace = self.namespace
 
 

Modified: trunk/Lib/sandbox/models/glm.py
===================================================================
--- trunk/Lib/sandbox/models/glm.py	2006-12-21 21:42:48 UTC (rev 2452)
+++ trunk/Lib/sandbox/models/glm.py	2006-12-22 00:02:48 UTC (rev 2453)
@@ -29,12 +29,12 @@
         return self.family.deviance(Y, results.mu) / scale
 
     def next(self):
-	results = self.results; Y = self.Y
+        results = self.results; Y = self.Y
         self.weights = self.family.weights(results.mu)
         self.initialize(self.design)
         Z = results.predict + self.family.link.deriv(results.mu) * (Y - results.mu)
         newresults = wls_model.fit(self, Z)
-	newresults.Y = Y
+        newresults.Y = Y
         newresults.mu = self.family.link.inverse(newresults.predict)
         self.iter += 1
         return newresults

Modified: trunk/Lib/sandbox/models/info.py
===================================================================
--- trunk/Lib/sandbox/models/info.py	2006-12-21 21:42:48 UTC (rev 2452)
+++ trunk/Lib/sandbox/models/info.py	2006-12-22 00:02:48 UTC (rev 2453)
@@ -2,6 +2,16 @@
 Statistical models
 ==================
 
+This module contains a several linear statistical models
+- model formulae as in R (to some extent)
+- OLS (ordinary least square regression)
+- WLS (weighted least square regression)
+- ARp regression
+- GLMS (generalized linear models)
+- robust linear models using M estimators (with a number of standard default robust norms as in R's rlm)
+- robust scale estimates (MAD, Huber's proposal 2).
+- mixed effects models
+- generalized additive models (gam)
 """
 
 depends = ['weave',

Modified: trunk/Lib/sandbox/models/model.py
===================================================================
--- trunk/Lib/sandbox/models/model.py	2006-12-21 21:42:48 UTC (rev 2452)
+++ trunk/Lib/sandbox/models/model.py	2006-12-22 00:02:48 UTC (rev 2453)
@@ -1,6 +1,6 @@
 import numpy as N
 from numpy.linalg import inv
-import scipy.optimize
+#from scipy import optimize
 
 from scipy.sandbox.models.contrast import ContrastResults
 from scipy.sandbox.models.utils import recipr
@@ -64,10 +64,10 @@
         raise NotImplementedError
 
     def newton(self, theta):
-	raise NotImplementedError
+        raise NotImplementedError
 #         def f(theta):
 #             return -self.logL(theta)
-#         self.results = scipy.optimize.fmin(f, theta)
+#         self.results = optimize.fmin(f, theta)
         
 class LikelihoodModelResults:
 

Modified: trunk/Lib/sandbox/models/regression.py
===================================================================
--- trunk/Lib/sandbox/models/regression.py	2006-12-21 21:42:48 UTC (rev 2452)
+++ trunk/Lib/sandbox/models/regression.py	2006-12-22 00:02:48 UTC (rev 2453)
@@ -60,10 +60,10 @@
         self.initialize(design)
 
     def initialize(self, design):
-	"""
-	Set design for model, prewhitening design matrix and precomputing
-	covariance of coefficients (up to scale factor in front).
-	"""
+        """
+        Set design for model, prewhitening design matrix and precomputing
+        covariance of coefficients (up to scale factor in front).
+        """
 
         self.design = design
         self.wdesign = self.whiten(design)
@@ -73,9 +73,9 @@
         self.df_resid = self.wdesign.shape[0] - utils.rank(self.design)
 
     def whiten(self, Y):
-	"""
-	OLS model whitener does nothing: returns Y.
-	"""
+        """
+        OLS model whitener does nothing: returns Y.
+        """
 
         return Y
     
@@ -95,7 +95,7 @@
     def fit(self, Y):
         """
         Full fit of the model including estimate of covariance matrix, 
-	(whitened) residuals and scale. 
+        (whitened) residuals and scale. 
 
         """
     
@@ -160,80 +160,80 @@
     """
 
     def __init__(self, design, rho):
-	if type(rho) is type(1):
-	    self.order = rho
-	    self.rho = N.zeros(self.order, N.float64)
-	else:
-	    self.rho = N.squeeze(N.asarray(rho))
-	    if len(self.rho.shape) not in [0,1]:
-		raise ValueError, "AR parameters must be a scalar or a vector"
-	    if self.rho.shape == ():
-		self.rho.shape = (1,)
-	    self.order = self.rho.shape[0]
+        if type(rho) is type(1):
+            self.order = rho
+            self.rho = N.zeros(self.order, N.float64)
+        else:
+            self.rho = N.squeeze(N.asarray(rho))
+            if len(self.rho.shape) not in [0,1]:
+                raise ValueError, "AR parameters must be a scalar or a vector"
+            if self.rho.shape == ():
+                self.rho.shape = (1,)
+            self.order = self.rho.shape[0]
         ols_model.__init__(self, design)
 
     def iterative_fit(self, Y, niter=3):
-	"""
-	Perform an iterative two-stage procedure to estimate AR(p)
-	paramters and regression coefficients simultaneously.
-	"""
-	for i in range(niter):
-	    self.initialize(self.design)
-	    results = self.fit(Y)
-	    self.rho, _ = self.yule_walker(Y - results.predict)
+        """
+        Perform an iterative two-stage procedure to estimate AR(p)
+        paramters and regression coefficients simultaneously.
+        """
+        for i in range(niter):
+            self.initialize(self.design)
+            results = self.fit(Y)
+            self.rho, _ = self.yule_walker(Y - results.predict)
 
 
     def whiten(self, X):
-	"""
-	Whiten a series of columns according to an AR(p)
-	covariance structure.
+        """
+        Whiten a series of columns according to an AR(p)
+        covariance structure.
 
-	"""
-	X = N.asarray(X, N.float64)
-	_X = X.copy()
-	for i in range(self.order):
-	    _X[(i+1):] = _X[(i+1):] - self.rho[i] * X[0:-(i+1)]
-	return _X
+        """
+        X = N.asarray(X, N.float64)
+        _X = X.copy()
+        for i in range(self.order):
+            _X[(i+1):] = _X[(i+1):] - self.rho[i] * X[0:-(i+1)]
+        return _X
 
     def yule_walker(self, X, method="unbiased", df=None):
-	"""
-	Estimate AR(p) parameters from a sequence X using Yule-Walker equation.
-	Method can be "unbiased" or "MLE" and this determines
-	denominator in estimate of ACF at lag k. If "MLE", the denominator is 
-	n=r.shape[0], if "unbiased" the denominator is n-k.
+        """
+        Estimate AR(p) parameters from a sequence X using Yule-Walker equation.
+        Method can be "unbiased" or "MLE" and this determines
+        denominator in estimate of ACF at lag k. If "MLE", the denominator is 
+        n=r.shape[0], if "unbiased" the denominator is n-k.
 
-	If df is supplied, then it is assumed the X has df degrees of
-	freedom rather than n.
+        If df is supplied, then it is assumed the X has df degrees of
+        freedom rather than n.
 
-	See, for example:
+        See, for example:
 
         http://en.wikipedia.org/wiki/Autoregressive_moving_average_model
-	"""
-	
-	method = str(method).lower()
-	if method not in ["unbiased", "mle"]:
-	    raise ValueError, "ACF estimation method must be 'unbiased' \
-	    or 'MLE'"
-	X = N.asarray(X, N.float64)
-	X -= X.mean()
-	n = df or X.shape[0]
+        """
+        
+        method = str(method).lower()
+        if method not in ["unbiased", "mle"]:
+            raise ValueError, "ACF estimation method must be 'unbiased' \
+            or 'MLE'"
+        X = N.asarray(X, N.float64)
+        X -= X.mean()
+        n = df or X.shape[0]
 
-	if method == "unbiased":
-	    denom = lambda k: n - k
-	else:
-	    denom = lambda k: n
+        if method == "unbiased":
+            denom = lambda k: n - k
+        else:
+            denom = lambda k: n
 
-	if len(X.shape) != 1:
-	    raise ValueError, "expecting a vector to estimate AR parameters"
-	r = N.zeros(self.order+1, N.float64)
-	r[0] = (X**2).sum() / denom(0)
-	for k in range(1,self.order+1):
-	    r[k] = (X[0:-k]*X[k:]).sum() / denom(k)
-	R = toeplitz(r[:-1])
+        if len(X.shape) != 1:
+            raise ValueError, "expecting a vector to estimate AR parameters"
+        r = N.zeros(self.order+1, N.float64)
+        r[0] = (X**2).sum() / denom(0)
+        for k in range(1,self.order+1):
+            r[k] = (X[0:-k]*X[k:]).sum() / denom(k)
+        R = toeplitz(r[:-1])
 
-	rho = L.solve(R, r[1:])
-	sigmasq = r[0] - (r[1:]*rho).sum()
-	return rho, N.sqrt(sigmasq)
+        rho = L.solve(R, r[1:])
+        sigmasq = r[0] - (r[1:]*rho).sum()
+        return rho, N.sqrt(sigmasq)
 
 class wls_model(ols_model):
     """
@@ -272,11 +272,11 @@
         ols_model.__init__(self, design)
 
     def whiten(self, X):
-	"""
-	Whitener for WLS model, multiplies by sqrt(self.weights)
-	"""
+        """
+        Whitener for WLS model, multiplies by sqrt(self.weights)
+        """
 
-	X = N.asarray(X, N.float64)
+        X = N.asarray(X, N.float64)
 
         if X.ndim == 1:
             return X * N.sqrt(self.weights)

Modified: trunk/Lib/sandbox/models/smoothers.py
===================================================================
--- trunk/Lib/sandbox/models/smoothers.py	2006-12-21 21:42:48 UTC (rev 2452)
+++ trunk/Lib/sandbox/models/smoothers.py	2006-12-22 00:02:48 UTC (rev 2453)
@@ -26,44 +26,44 @@
     """
 
     def df_fit(self):
-	"""
-	Degrees of freedom used in the fit.
-	"""
-	return self.order + 1
+        """
+        Degrees of freedom used in the fit.
+        """
+        return self.order + 1
 
     def df_resid(self):
-	"""
-	Residual degrees of freedom from last fit.
-	"""
-	return self.N - self.order - 1
+        """
+        Residual degrees of freedom from last fit.
+        """
+        return self.N - self.order - 1
 
     def __init__(self, order, x=None):
-	self.order = order
-	self.coef = N.zeros((order+1,), N.float64)
-	if x is not None:
-	    self.X = N.array([x**i for i in range(order+1)]).T
+        self.order = order
+        self.coef = N.zeros((order+1,), N.float64)
+        if x is not None:
+            self.X = N.array([x**i for i in range(order+1)]).T
 
     def __call__(self, x=None):
-	if x is not None:
-	    X = N.array([(x**i) for i in range(self.order+1)])
-	else: X = self.X
+        if x is not None:
+            X = N.array([(x**i) for i in range(self.order+1)])
+        else: X = self.X
         return N.squeeze(N.dot(X.T, self.coef)) 
     
     def fit(self, y, x=None, weights=None):
-	self.N = y.shape[0]
-	if weights is None:
-	    weights = 1
-	_w = N.sqrt(weights)
-	if x is None:
-	    if not hasattr(self, "X"):
-		raise ValueError, "x needed to fit poly_smoother"
-	else:
-	    self.X = N.array([(x**i) for i in range(self.order+1)])
+        self.N = y.shape[0]
+        if weights is None:
+            weights = 1
+        _w = N.sqrt(weights)
+        if x is None:
+            if not hasattr(self, "X"):
+                raise ValueError, "x needed to fit poly_smoother"
+        else:
+            self.X = N.array([(x**i) for i in range(self.order+1)])
 
-	X = self.X * _w
+        X = self.X * _w
 
-	_y = y * _w
-	self.coef = N.dot(L.pinv(X).T, _y)
+        _y = y * _w
+        self.coef = N.dot(L.pinv(X).T, _y)
 
 class smoothing_spline(bspline):
 
@@ -81,16 +81,16 @@
         if x.shape != y.shape:
             raise ValueError, 'x and y shape do not agree, by default x are the Bspline\'s internal knots'
         
-	bt = self.basis(x)
-	if pen >= self.penmax:
-	    pen = self.penmax
+        bt = self.basis(x)
+        if pen >= self.penmax:
+            pen = self.penmax
 
         if weights is None:
-	    weights = N.array(1.)
+            weights = N.array(1.)
 
-	wmean = weights.mean()
-	_w = N.sqrt(weights / wmean)
-	bt *= _w
+        wmean = weights.mean()
+        _w = N.sqrt(weights / wmean)
+        bt *= _w
 
         # throw out rows with zeros (this happens at boundary points!)
 
@@ -99,69 +99,69 @@
         bt = bt[:,mask]
         y = y[mask]
 
-	self.df_total = y.shape[0]
+        self.df_total = y.shape[0]
 
-	if bt.shape[1] != y.shape[0]:
-	    raise ValueError, "some x values are outside range of B-spline knots"
+        if bt.shape[1] != y.shape[0]:
+            raise ValueError, "some x values are outside range of B-spline knots"
         bty = N.dot(bt, _w * y)
-	self.N = y.shape[0]
+        self.N = y.shape[0]
         if not banded:
             self.btb = N.dot(bt, bt.T)
-	    _g = band2array(self.g, lower=1, symmetric=True)
+            _g = band2array(self.g, lower=1, symmetric=True)
             self.coef, _, self.rank = L.lstsq(self.btb + pen*_g, bty)[0:3]
-	    self.rank = min(self.rank, self.btb.shape[0])
+            self.rank = min(self.rank, self.btb.shape[0])
         else:
             self.btb = N.zeros(self.g.shape, N.float64)
             nband, nbasis = self.g.shape
             for i in range(nbasis):
                 for k in range(min(nband, nbasis-i)):
-		    self.btb[k,i] = (bt[i] * bt[i+k]).sum()
+                    self.btb[k,i] = (bt[i] * bt[i+k]).sum()
 
-	    bty.shape = (1,bty.shape[0])
+            bty.shape = (1,bty.shape[0])
             self.chol, self.coef = solveh_banded(self.btb + 
-						 pen*self.g,
-						 bty, lower=1)
+                                                 pen*self.g,
+                                                 bty, lower=1)
 
-	self.coef = N.squeeze(self.coef)
-	self.resid = N.sqrt(wmean) * (y * _w - N.dot(self.coef, bt))
-	self.pen = pen
+        self.coef = N.squeeze(self.coef)
+        self.resid = N.sqrt(wmean) * (y * _w - N.dot(self.coef, bt))
+        self.pen = pen
 
     def gcv(self):
-	"""
-	Generalized cross-validation score of current fit.
-	"""
+        """
+        Generalized cross-validation score of current fit.
+        """
 
-	norm_resid = (self.resid**2).sum()
-	return norm_resid / (self.df_total - self.trace())
+        norm_resid = (self.resid**2).sum()
+        return norm_resid / (self.df_total - self.trace())
 
     def df_resid(self):
-	"""
-	self.N - self.trace()
+        """
+        self.N - self.trace()
 
-	where self.N is the number of observations of last fit.
-	"""
-	
-	return self.N - self.trace()
+        where self.N is the number of observations of last fit.
+        """
+        
+        return self.N - self.trace()
 
     def df_fit(self):
-	"""
-	= self.trace()
+        """
+        = self.trace()
 
-	How many degrees of freedom used in the fit? 
-	"""
-	return self.trace()
+        How many degrees of freedom used in the fit? 
+        """
+        return self.trace()
 
     def trace(self):
-	"""
-	Trace of the smoothing matrix S(pen)
-	"""
+        """
+        Trace of the smoothing matrix S(pen)
+        """
 
-	if self.pen > 0:
-	    _invband = _bspline.invband(self.chol.copy())
-	    tr = _trace_symbanded(_invband, self.btb, lower=1)
-	    return tr
-	else:
-	    return self.rank
+        if self.pen > 0:
+            _invband = _bspline.invband(self.chol.copy())
+            tr = _trace_symbanded(_invband, self.btb, lower=1)
+            return tr
+        else:
+            return self.rank
 
 class smoothing_spline_fixeddf(smoothing_spline):
 
@@ -178,34 +178,34 @@
     target_df = 5
 
     def __init__(self, knots, order=4, coef=None, M=None, target_df=None):
-	if target_df is not None:
-	    self.target_df = target_df
-	bspline.__init__(self, knots, order=order, coef=coef, M=M)
-	self.target_reached = False
+        if target_df is not None:
+            self.target_df = target_df
+        bspline.__init__(self, knots, order=order, coef=coef, M=M)
+        self.target_reached = False
 
     def fit(self, y, x=None, df=None, weights=None, tol=1.0e-03):
 
-	df = df or self.target_df
+        df = df or self.target_df
 
-	apen, bpen = 0, 1.0e-03
-	olddf = y.shape[0] - self.m
+        apen, bpen = 0, 1.0e-03
+        olddf = y.shape[0] - self.m
 
-	if not self.target_reached:
-	    while True:
-		curpen = 0.5 * (apen + bpen)
-		smoothing_spline.fit(self, y, x=x, weights=weights, pen=curpen)
-		curdf = self.trace()
-		if curdf > df:
-		    apen, bpen = curpen, 2 * curpen
-		else:
-		    apen, bpen = apen, curpen
-		    if apen >= self.penmax:
-			raise ValueError, "penalty too large, try setting penmax higher or decreasing df"
-		if N.fabs(curdf - df) / df < tol: 
-		    self.target_reached = True
-		    break
-	else:
-	    smoothing_spline.fit(self, y, x=x, weights=weights, pen=self.pen)
+        if not self.target_reached:
+            while True:
+                curpen = 0.5 * (apen + bpen)
+                smoothing_spline.fit(self, y, x=x, weights=weights, pen=curpen)
+                curdf = self.trace()
+                if curdf > df:
+                    apen, bpen = curpen, 2 * curpen
+                else:
+                    apen, bpen = apen, curpen
+                    if apen >= self.penmax:
+                        raise ValueError, "penalty too large, try setting penmax higher or decreasing df"
+                if N.fabs(curdf - df) / df < tol: 
+                    self.target_reached = True
+                    break
+        else:
+            smoothing_spline.fit(self, y, x=x, weights=weights, pen=self.pen)
 
 class smoothing_spline_gcv(smoothing_spline):
 
@@ -221,14 +221,14 @@
     """
 
     def fit(self, y, x=None, weights=None, tol=1.0e-03,
-	    bracket=(0,1.0e-03)):
+            bracket=(0,1.0e-03)):
     
-	def _gcv(pen, y, x):
-	    smoothing_spline.fit(y, x=x, pen=N.exp(pen), weights=weights)
-	    a = self.gcv()
-	    return a
+        def _gcv(pen, y, x):
+            smoothing_spline.fit(y, x=x, pen=N.exp(pen), weights=weights)
+            a = self.gcv()
+            return a
 
-	a = golden(_gcv, args=(y,x), brack=(-100,20), tol=tol)
+        a = golden(_gcv, args=(y,x), brack=(-100,20), tol=tol)
 
 def _trace_symbanded(a,b, lower=0):
     """
@@ -236,11 +236,11 @@
     """
 
     if lower:
-	t = _zero_triband(a * b, lower=1)
-	return t[0].sum() + 2 * t[1:].sum()
+        t = _zero_triband(a * b, lower=1)
+        return t[0].sum() + 2 * t[1:].sum()
     else:
-	t = _zero_triband(a * b, lower=0)
-	return t[-1].sum() + 2 * t[:-1].sum()
+        t = _zero_triband(a * b, lower=0)
+        return t[-1].sum() + 2 * t[:-1].sum()
 
 
 
@@ -251,7 +251,7 @@
 
     nrow, ncol = a.shape
     if lower: 
-	for i in range(nrow): a[i,(ncol-i):] = 0.
+        for i in range(nrow): a[i,(ncol-i):] = 0.
     else:
-	for i in range(nrow): a[i,0:i] = 0.
+        for i in range(nrow): a[i,0:i] = 0.
     return a

Modified: trunk/Lib/sandbox/models/tests/test_formula.py
===================================================================
--- trunk/Lib/sandbox/models/tests/test_formula.py	2006-12-21 21:42:48 UTC (rev 2452)
+++ trunk/Lib/sandbox/models/tests/test_formula.py	2006-12-22 00:02:48 UTC (rev 2453)
@@ -57,43 +57,43 @@
         self.formula = self.terms[0]
         for i in range(1, 10):
             self.formula += self.terms[i]
-	self.formula.namespace = self.namespace
+        self.formula.namespace = self.namespace
 
     def test_namespace(self):
-	space1 = {'X':N.arange(50), 'Y':N.arange(50)*2}
-	space2 = {'X':N.arange(20), 'Y':N.arange(20)*2}
-	X = formula.term('X')
-	Y = formula.term('Y')
+        space1 = {'X':N.arange(50), 'Y':N.arange(50)*2}
+        space2 = {'X':N.arange(20), 'Y':N.arange(20)*2}
+        X = formula.term('X')
+        Y = formula.term('Y')
 
-	X.namespace = space1 
-	assert_almost_equal(X(), N.arange(50))
+        X.namespace = space1 
+        assert_almost_equal(X(), N.arange(50))
 
-	Y.namespace = space2
-	assert_almost_equal(Y(), N.arange(20)*2)
+        Y.namespace = space2
+        assert_almost_equal(Y(), N.arange(20)*2)
 
-	f = X + Y
+        f = X + Y
 
-	f.namespace = space1
-	self.assertEqual(f().shape, (2,50))
-	assert_almost_equal(Y(), N.arange(50)*2)
-	assert_almost_equal(X(), N.arange(50))
+        f.namespace = space1
+        self.assertEqual(f().shape, (2,50))
+        assert_almost_equal(Y(), N.arange(50)*2)
+        assert_almost_equal(X(), N.arange(50))
 
-	f.namespace = space2
-	self.assertEqual(f().shape, (2,20))
-	assert_almost_equal(Y(), N.arange(20)*2)
-	assert_almost_equal(X(), N.arange(20))
+        f.namespace = space2
+        self.assertEqual(f().shape, (2,20))
+        assert_almost_equal(Y(), N.arange(20)*2)
+        assert_almost_equal(X(), N.arange(20))
 
 
     def test_termcolumns(self):
         t1 = formula.term("A")
         t2 = formula.term("B")
         f = t1 + t2 + t1 * t2
-	def other(val):
-	    return N.array([3.2*val,4.342*val**2, 5.234*val**3])
-	q = formula.quantitative(['other%d' % i for i in range(1,4)], termname='other', func=t1, transform=other)
-	f += q
-	q.namespace = f.namespace = self.formula.namespace
-	assert_almost_equal(q(), f()[f.termcolumns(q)])
+        def other(val):
+            return N.array([3.2*val,4.342*val**2, 5.234*val**3])
+        q = formula.quantitative(['other%d' % i for i in range(1,4)], termname='other', func=t1, transform=other)
+        f += q
+        q.namespace = f.namespace = self.formula.namespace
+        assert_almost_equal(q(), f()[f.termcolumns(q)])
 
 
     def test_str(self):
@@ -111,27 +111,27 @@
         prod = self.terms[0] * self.terms[2]
         self.formula += prod
         x = self.formula.design()
-	p = self.formula['A*C']
+        p = self.formula['A*C']
         col = self.formula.termcolumns(prod, dict=False)
         assert_almost_equal(N.squeeze(x[:,col]), self.X[:,0] * self.X[:,2])
         assert_almost_equal(N.squeeze(p()), self.X[:,0] * self.X[:,2])
-	
+        
     def test_intercept1(self):
         prod = self.terms[0] * self.terms[2]
         self.formula += formula.I
-	icol = self.formula.names().index('intercept')
-	assert_almost_equal(self.formula()[icol], N.ones((40,)))
+        icol = self.formula.names().index('intercept')
+        assert_almost_equal(self.formula()[icol], N.ones((40,)))
 
     def test_intercept2(self):
         prod = self.terms[0] * self.terms[2]
         self.formula += formula.I
-	icol = self.formula.names().index('intercept')
-	assert_almost_equal(self.formula()[icol], N.ones((40,)))
+        icol = self.formula.names().index('intercept')
+        assert_almost_equal(self.formula()[icol], N.ones((40,)))
 
     def test_intercept3(self):
         prod = self.terms[0] * formula.I
-	prod.namespace = self.formula.namespace
-	assert_almost_equal(N.squeeze(prod()), self.terms[0]())
+        prod.namespace = self.formula.namespace
+        assert_almost_equal(N.squeeze(prod()), self.terms[0]())
 
 
 
@@ -163,53 +163,53 @@
         resid = N.identity(40) - P
         self.namespace['noise'] = N.transpose(N.dot(resid, R.standard_normal((40,5))))
         terms = dummy + self.terms[2]
-	terms.namespace = self.formula.namespace
+        terms.namespace = self.formula.namespace
         c = contrast.Contrast(terms, self.formula)
         c.getmatrix()
         self.assertEquals(c.matrix.shape, (10,))
 
     def test_power(self):
     
-	t = self.terms[2]
-	t2 = t**2
-	t.namespace = t2.namespace = self.formula.namespace
-	assert_almost_equal(t()**2, t2())
+        t = self.terms[2]
+        t2 = t**2
+        t.namespace = t2.namespace = self.formula.namespace
+        assert_almost_equal(t()**2, t2())
 
     def test_quantitative(self):
-	t = self.terms[2]
-	sint = formula.quantitative('t', func=t, transform=N.sin)
-	t.namespace = sint.namespace = self.formula.namespace
-	assert_almost_equal(N.sin(t()), sint())
+        t = self.terms[2]
+        sint = formula.quantitative('t', func=t, transform=N.sin)
+        t.namespace = sint.namespace = self.formula.namespace
+        assert_almost_equal(N.sin(t()), sint())
 
     def test_factor1(self):
-	f = ['a','b','c']*10
-	fac = formula.factor('ff', set(f))
-	fac.namespace = {'ff':f}
-	self.assertEquals(list(fac.values()), f)
+        f = ['a','b','c']*10
+        fac = formula.factor('ff', set(f))
+        fac.namespace = {'ff':f}
+        self.assertEquals(list(fac.values()), f)
 
     def test_factor2(self):
-	f = ['a','b','c']*10
-	fac = formula.factor('ff', set(f))
-	fac.namespace = {'ff':f}
-	self.assertEquals(fac().shape, (3,30))
+        f = ['a','b','c']*10
+        fac = formula.factor('ff', set(f))
+        fac.namespace = {'ff':f}
+        self.assertEquals(fac().shape, (3,30))
 
     def test_factor3(self):
-	f = ['a','b','c']*10
-	fac = formula.factor('ff', set(f))
-	fac.namespace = {'ff':f}
-	m = fac.main_effect(reference=1)
-	self.assertEquals(m().shape, (2,30))
+        f = ['a','b','c']*10
+        fac = formula.factor('ff', set(f))
+        fac.namespace = {'ff':f}
+        m = fac.main_effect(reference=1)
+        self.assertEquals(m().shape, (2,30))
 
     def test_factor4(self):
-	f = ['a','b','c']*10
-	fac = formula.factor('ff', set(f))
-	fac.namespace = {'ff':f}
-	m = fac.main_effect(reference=2)
-	r = N.array([N.identity(3)]*10)
-	r.shape = (30,3)
-	r = r.T
-	_m = N.array([r[0]-r[2],r[1]-r[2]])
-	assert_almost_equal(_m, m())
+        f = ['a','b','c']*10
+        fac = formula.factor('ff', set(f))
+        fac.namespace = {'ff':f}
+        m = fac.main_effect(reference=2)
+        r = N.array([N.identity(3)]*10)
+        r.shape = (30,3)
+        r = r.T
+        _m = N.array([r[0]-r[2],r[1]-r[2]])
+        assert_almost_equal(_m, m())
 
     def test_contrast4(self):
     




More information about the Scipy-svn mailing list