[Scipy-svn] r2703 - in trunk/Lib/sandbox/models: . family robust tests

Sun Feb 11 23:05:39 EST 2007

Author: jarrod.millman
Date: 2007-02-11 22:05:31 -0600 (Sun, 11 Feb 2007)
New Revision: 2703

Modified:
   trunk/Lib/sandbox/models/__init__.py
   trunk/Lib/sandbox/models/family/varfuncs.py
   trunk/Lib/sandbox/models/formula.py
   trunk/Lib/sandbox/models/gam.py
   trunk/Lib/sandbox/models/glm.py
   trunk/Lib/sandbox/models/info.py
   trunk/Lib/sandbox/models/mixed.py
   trunk/Lib/sandbox/models/regression.py
   trunk/Lib/sandbox/models/rlm.py
   trunk/Lib/sandbox/models/robust/__init__.py
   trunk/Lib/sandbox/models/robust/scale.py
   trunk/Lib/sandbox/models/smoothers.py
   trunk/Lib/sandbox/models/tests/test_formula.py
   trunk/Lib/sandbox/models/tests/test_utils.py
   trunk/Lib/sandbox/models/utils.py
Log:
some minor documentation improvements


Modified: trunk/Lib/sandbox/models/__init__.py
===================================================================

--- trunk/Lib/sandbox/models/__init__.py	2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/__init__.py	2007-02-12 04:05:31 UTC (rev 2703)
@@ -2,6 +2,8 @@
 # models - Statistical Models
 #
 
+__docformat__ = 'restructuredtext'
+
 from scipy.sandbox.models.info import __doc__
 
 import scipy.sandbox.models.model

Modified: trunk/Lib/sandbox/models/family/varfuncs.py
===================================================================
--- trunk/Lib/sandbox/models/family/varfuncs.py	2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/family/varfuncs.py	2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,3 +1,5 @@
+__docformat__ = 'restructuredtext'
+
 import numpy as N
 
 class VarianceFunction:
@@ -4,7 +6,6 @@
     """
     Variance function that relates the variance of a random variable
     to its mean. Defaults to 1.
-
     """
 
     def __call__(self, mu):
@@ -13,7 +14,6 @@
 constant = VarianceFunction()
 
 class Power:
-
     """
     Variance function:
 
@@ -27,7 +27,6 @@
         return N.power(N.fabs(mu), self.power)
 
 class Binomial:
-
     """
     Binomial variance function
 
@@ -50,4 +49,3 @@
 mu_squared = Power(power=2)
 mu_cubed = Power(power=3)
 binary = Binomial()
-    

Modified: trunk/Lib/sandbox/models/formula.py
===================================================================
--- trunk/Lib/sandbox/models/formula.py	2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/formula.py	2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,3 +1,6 @@
+"""
+Provides the basic classes needed to specify statistical models.
+"""
 import copy
 import types
 import numpy as N
@@ -2,6 +5,7 @@
 
+__docformat__ = 'restructuredtext'
+
 default_namespace = {}
 
 class term(object):
-
     """
@@ -13,7 +17,6 @@
     defaults to formula.default_namespace. 
     When called in an instance of formula, 
     the namespace used is that formula's namespace.
-    
     """
 
     def __pow__(self, power):
@@ -106,13 +109,9 @@
         """
         Return the columns associated to self in a design matrix.
         If the term has no 'func' attribute, it returns
-        
-        self.namespace[self.termname]
-
+        ``self.namespace[self.termname]``
         else, it returns
-        
-        self.func(*args, **kw)
-
+        ``self.func(*args, **kw)``
         """
         
         if not hasattr(self, 'func'):
@@ -243,7 +242,6 @@
         return value
 
 class quantitative(term):
-
     """
     A subclass of term that can be used to apply point transformations
     of another term, i.e. to take powers:
@@ -260,7 +258,6 @@
     >>> x3.namespace = x.namespace
     >>> print N.allclose(x()**2, x3())
     True
-
     """
 
     def __init__(self, name, func=None, termname=None, transform=lambda x: x):
@@ -275,9 +272,7 @@
         return self.transform(term.__call__(self, *args, **kw))
 
 class formula(object):
-
     """
-
     A formula object for manipulating design matrices in regression models,
     essentially consisting of a list of term instances.
 
@@ -302,11 +297,9 @@
     def __init__(self, termlist, namespace=default_namespace):
         """
         Create a formula from either:
-
-        i) a formula object
-        ii) a sequence of term instances
-        iii) one term
-
+         i. a `formula` object
+         ii. a sequence of `term` instances
+         iii. one `term`
         """
 
 
@@ -457,7 +450,7 @@
 
     def design(self, *args, **kw):
         """
-        transpose(self(*args, **kw))
+        ``transpose(self(*args, **kw))``
         """
         return self(*args, **kw).T
 
@@ -607,12 +600,11 @@
 only term in the formula, then a keywords argument
 \'nrow\' is needed.
 
->>> from formula import *
+>>> from scipy.sandbox.models.formula import formula, I
 >>> I()
-1
+array(1.0)
 >>> I(nrow=5)
-array([1, 1, 1, 1, 1])
-
+array([ 1.,  1.,  1.,  1.,  1.])
 >>> f=formula(I)
 >>> f(nrow=5)
 array([1, 1, 1, 1, 1])

Modified: trunk/Lib/sandbox/models/gam.py
===================================================================
--- trunk/Lib/sandbox/models/gam.py	2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/gam.py	2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,3 +1,6 @@
+"""
+Generalized additive models
+"""
 import numpy as N
 
 from scipy.sandbox.models import family

Modified: trunk/Lib/sandbox/models/glm.py
===================================================================
--- trunk/Lib/sandbox/models/glm.py	2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/glm.py	2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,3 +1,6 @@
+"""
+General linear model
+"""
 import numpy as N
 from scipy.sandbox.models import family
 from scipy.sandbox.models.regression import wls_model

Modified: trunk/Lib/sandbox/models/info.py
===================================================================
--- trunk/Lib/sandbox/models/info.py	2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/info.py	2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,18 +1,24 @@
 """
 Statistical models
-==================
 
-This module contains a several linear statistical models
-- model formulae as in R (to some extent)
-- OLS (ordinary least square regression)
-- WLS (weighted least square regression)
-- ARp regression
-- GLMS (generalized linear models)
-- robust linear models using M estimators (with a number of standard default robust norms as in R's rlm)
-- robust scale estimates (MAD, Huber's proposal 2).
-- mixed effects models
-- generalized additive models (gam)
+ - model `formula`
+ - standard `regression` models
+
+  - `ols_model` (ordinary least square regression)
+  - `wls_model` (weighted least square regression)
+  - `ar_model` (autoregression)
+
+ - `glm.model` (generalized linear models)
+ - robust statistical models
+
+  - `rlm.model` (robust linear models using M estimators)
+  - `robust.norms` estimates
+  - `robust.scale` estimates (MAD, Huber's proposal 2).
+
+ - `mixed` effects models
+ - `gam` (generalized additive models)
 """
+__docformat__ = 'restructuredtext en'
 
 depends = ['weave',
            'special.orthogonal',

Modified: trunk/Lib/sandbox/models/mixed.py
===================================================================
--- trunk/Lib/sandbox/models/mixed.py	2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/mixed.py	2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,3 +1,7 @@
+"""
+Mixed effects models
+"""
+
 import numpy as N
 import numpy.linalg as L
 from scipy.sandbox.models.formula import formula, I
@@ -3,5 +7,4 @@
 
 class Unit:
-
     """
     Individual experimental unit for 

Modified: trunk/Lib/sandbox/models/regression.py
===================================================================
--- trunk/Lib/sandbox/models/regression.py	2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/regression.py	2007-02-12 04:05:31 UTC (rev 2703)
@@ -16,6 +16,8 @@
 
 """
 
+__docformat__ = 'restructuredtext en'
+
 import numpy as N
 import numpy.linalg as L
 from scipy.linalg import norm, toeplitz
@@ -24,10 +26,11 @@
 from scipy.sandbox.models import utils
 
 class ols_model(likelihood_model):
-    
     """
     A simple ordinary least squares model.
 
+    Examples
+    --------
     >>> import numpy as N
     >>> 
     >>> from scipy.sandbox.models.formula import term, I
@@ -46,16 +49,22 @@
     >>> results.t()
     array([ 0.98019606,  1.87867287])
     >>> print results.Tcontrast([0,1])
-    <T contrast: effect=2.142857, sd=1.140623, t=1.878673, df_denom=5, pval=0.042804>
+    <T contrast: effect=2.14285714286, sd=1.14062281591, t=1.87867287326, df_denom=5>
     >>> print results.Fcontrast(N.identity(2))
-    <F contrast: F=19.460784, df_denom=5, df_num=2, pval=0.123740>
->>> 
+    <F contrast: F=19.4607843137, df_denom=5, df_num=2>
     """
 
     def logL(self, b, Y):
         return -norm(self.whiten(Y) - N.dot(self.wdesign, b))**2 / 2.
 
     def __init__(self, design):
+        """
+        Create a `ols_model` from a design.
+
+        :Parameters:
+            design : TODO
+                TODO
+        """
         likelihood_model.__init__(self)
         self.initialize(design)
 
@@ -63,6 +72,10 @@
         """
         Set design for model, prewhitening design matrix and precomputing
         covariance of coefficients (up to scale factor in front).
+
+        :Parameters:
+            design : TODO
+                TODO
         """
 
         self.design = design
@@ -117,6 +130,11 @@
     """
     A regression model with an AR(p) covariance structure.
 
+    The linear autoregressive process of order p--AR(p)--is defined as:
+        TODO
+
+    Examples
+    --------
     >>> import numpy as N
     >>> import numpy.random as R
     >>> 
@@ -146,17 +164,14 @@
     >>> results.t()
     array([ 30.796394  ,  -2.66543144])
     >>> print results.Tcontrast([0,1])
-    <T contrast: effect=-0.561455, sd=0.210643, t=-2.665431, df_denom=5, pval=0.044592>
+    <T contrast: effect=-0.561454972239, sd=0.210643186553, t=-2.66543144085, df_denom=5>
     >>> print results.Fcontrast(N.identity(2))
-    <F contrast: F=2762.428127, df_denom=5, df_num=2, pval=0.000000>
-    >>> 
-
+    <F contrast: F=2762.42812716, df_denom=5, df_num=2>
+    >>>
     >>> model.rho = N.array([0,0])
     >>> model.iterative_fit(data['Y'], niter=3)
     >>> print model.rho
     [-0.61887622 -0.88137957]
-    >>> 
-
     """
 
     def __init__(self, design, rho):
@@ -175,7 +190,13 @@
     def iterative_fit(self, Y, niter=3):
         """
         Perform an iterative two-stage procedure to estimate AR(p)
-        paramters and regression coefficients simultaneously.
+        parameters and regression coefficients simultaneously.
+
+        :Parameters:
+            Y : TODO
+                TODO
+            niter : ``integer``
+                the number of iterations
         """
         for i in range(niter):
             self.initialize(self.design)
@@ -188,6 +209,9 @@
         Whiten a series of columns according to an AR(p)
         covariance structure.
 
+        :Parameters:
+            X : TODO
+                TODO
         """
         X = N.asarray(X, N.float64)
         _X = X.copy()
@@ -198,16 +222,25 @@
     def yule_walker(self, X, method="unbiased", df=None):
         """
         Estimate AR(p) parameters from a sequence X using Yule-Walker equation.
-        Method can be "unbiased" or "MLE" and this determines
-        denominator in estimate of ACF at lag k. If "MLE", the denominator is 
-        n=r.shape[0], if "unbiased" the denominator is n-k.
 
-        If df is supplied, then it is assumed the X has df degrees of
-        freedom rather than n.
+        unbiased or maximum-likelihood estimator (mle)
 
         See, for example:
 
         http://en.wikipedia.org/wiki/Autoregressive_moving_average_model
+
+        :Parameters:
+            X : TODO
+                TODO
+            method : ``string``
+                Method can be "unbiased" or "mle" and this determines
+                denominator in estimate of autocorrelation function (ACF)
+                at lag k. If "mle", the denominator is n=r.shape[0], if
+                "unbiased" the denominator is n-k.
+            df : ``integer``
+                Specifies the degrees of freedom. If df is supplied,
+                then it is assumed the X has df degrees of
+                freedom rather than n.
         """
         
         method = str(method).lower()
@@ -237,7 +270,6 @@
 
 class wls_model(ols_model):
     """
-
     A regression model with diagonal but non-identity covariance
     structure. The weights are presumed to be
     (proportional to the) inverse of the
@@ -261,10 +293,9 @@
     >>> results.t()
     array([ 0.35684428,  2.0652652 ])
     >>> print results.Tcontrast([0,1])
-    <T contrast: effect=2.916667, sd=1.412248, t=2.065265, df_denom=5, pval=0.035345>
+    <T contrast: effect=2.91666666667, sd=1.41224801095, t=2.06526519708, df_denom=5>
     >>> print results.Fcontrast(N.identity(2))
-    <F contrast: F=26.998607, df_denom=5, df_num=2, pval=0.110763>
-
+    <F contrast: F=26.9986072423, df_denom=5, df_num=2>
     """
 
     def __init__(self, design, weights=1):

Modified: trunk/Lib/sandbox/models/rlm.py
===================================================================
--- trunk/Lib/sandbox/models/rlm.py	2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/rlm.py	2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,3 +1,6 @@
+"""
+Robust linear models
+"""
 import numpy as N
 
 from scipy.sandbox.models.regression import wls_model

Modified: trunk/Lib/sandbox/models/robust/__init__.py
===================================================================
--- trunk/Lib/sandbox/models/robust/__init__.py	2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/robust/__init__.py	2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,3 +1,6 @@
+"""
+Robust statistical models
+"""
 import numpy as N
 import numpy.linalg as L
 
@@ -3,5 +6,2 @@
 from scipy.sandbox.models.robust import norms
 from scipy.sandbox.models.robust.scale import MAD
-
-
-

Modified: trunk/Lib/sandbox/models/robust/scale.py
===================================================================
--- trunk/Lib/sandbox/models/robust/scale.py	2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/robust/scale.py	2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,6 +1,6 @@
 import numpy as N
-import scipy
-import scipy.stats
+from scipy import median
+from scipy.stats import norm
 
 def MAD(a, c=0.6745):
     """
@@ -11,8 +11,8 @@
     """
 
     a = N.asarray(a, N.float64)
-    d = N.multiply.outer(scipy.median(a), N.ones(a.shape[1:]))
-    return scipy.median(N.fabs(a - d) / c)
+    d = N.multiply.outer(median(a), N.ones(a.shape[1:]))
+    return median(N.fabs(a - d) / c)
 
 class Huber:
     """
@@ -25,8 +25,8 @@
     c = 1.5
     tol = 1.0e-06
 
-    tmp = 2 * scipy.stats.norm.cdf(c) - 1
-    gamma = tmp + c**2 * (1 - tmp) - 2 * c * scipy.stats.norm.pdf(c)
+    tmp = 2 * norm.cdf(c) - 1
+    gamma = tmp + c**2 * (1 - tmp) - 2 * c * norm.pdf(c)
     del(tmp)
     
     niter = 10
@@ -41,7 +41,7 @@
         self.a = N.asarray(a, N.float64)
         if mu is None:
             self.n = self.a.shape[0] - 1
-            self.mu = N.multiply.outer(scipy.median(self.a), N.ones(self.a.shape[1:]))
+            self.mu = N.multiply.outer(median(self.a), N.ones(self.a.shape[1:]))
             self.est_mu = True
         else:
             self.n = self.a.shape[0]

Modified: trunk/Lib/sandbox/models/smoothers.py
===================================================================
--- trunk/Lib/sandbox/models/smoothers.py	2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/smoothers.py	2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,7 +1,6 @@
 """
 This module contains scatterplot smoothers, that is classes
 who generate a smooth fit of a set of (x,y) pairs.
-
 """
 
 import numpy as N
@@ -15,7 +14,6 @@
 
 
 class poly_smoother:
-
     """
     Polynomial smoother up to a given order.
     Fit based on weighted least squares.

Modified: trunk/Lib/sandbox/models/tests/test_formula.py
===================================================================
--- trunk/Lib/sandbox/models/tests/test_formula.py	2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/tests/test_formula.py	2007-02-12 04:05:31 UTC (rev 2703)
@@ -8,7 +8,6 @@
 import numpy.random as R
 import numpy.linalg as L
 from numpy.testing import assert_almost_equal, NumpyTest, NumpyTestCase
-import scipy
 
 from scipy.sandbox.models import utils, formula, contrast
 

Modified: trunk/Lib/sandbox/models/tests/test_utils.py
===================================================================
--- trunk/Lib/sandbox/models/tests/test_utils.py	2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/tests/test_utils.py	2007-02-12 04:05:31 UTC (rev 2703)
@@ -6,7 +6,6 @@
 import numpy.random as R
 from numpy.testing import assert_almost_equal, NumpyTest, NumpyTestCase
 
-import scipy
 from scipy.sandbox.models import utils
 
 class test_Utils(NumpyTestCase):

Modified: trunk/Lib/sandbox/models/utils.py
===================================================================
--- trunk/Lib/sandbox/models/utils.py	2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/utils.py	2007-02-12 04:05:31 UTC (rev 2703)
@@ -3,6 +3,8 @@
 import scipy.interpolate
 import scipy.linalg
 
+__docformat__ = 'restructuredtext'
+
 def recipr(X):
     """
     Return the reciprocal of an array, setting all entries less than or
@@ -72,25 +74,27 @@
     return N.asarray(N.transpose(value)).astype(N.float64)
 
 class StepFunction:
-    '''A basic step function: values at the ends are handled in the simplest way possible: everything to the left of x[0] is set to ival; everything to the right of x[-1] is set to y[-1].
-    
+    """
+    A basic step function: values at the ends are handled in the simplest
+    way possible: everything to the left of x[0] is set to ival; everything
+    to the right of x[-1] is set to y[-1].
+
+    Examples
+    --------
+    >>> from numpy import arange
+    >>> from scipy.sandbox.models.utils import StepFunction
     >>>
-    >>> from numpy import *
-    >>>
     >>> x = arange(20)
     >>> y = arange(20)
-    >>>
     >>> f = StepFunction(x, y)
     >>>
     >>> print f(3.2)
-    3
+    3.0
     >>> print f([[3.2,4.5],[24,-3.1]])
-    [[ 3  4]
-     [19  0]]
-    >>>
+    [[  3.   4.]
+     [ 19.   0.]]
+    """
 
-    '''
-
     def __init__(self, x, y, ival=0., sorted=False):
             
         _x = N.asarray(x)