[Scipy-svn] r2703 - in trunk/Lib/sandbox/models: . family robust tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Sun Feb 11 23:05:39 EST 2007
Author: jarrod.millman
Date: 2007-02-11 22:05:31 -0600 (Sun, 11 Feb 2007)
New Revision: 2703
Modified:
trunk/Lib/sandbox/models/__init__.py
trunk/Lib/sandbox/models/family/varfuncs.py
trunk/Lib/sandbox/models/formula.py
trunk/Lib/sandbox/models/gam.py
trunk/Lib/sandbox/models/glm.py
trunk/Lib/sandbox/models/info.py
trunk/Lib/sandbox/models/mixed.py
trunk/Lib/sandbox/models/regression.py
trunk/Lib/sandbox/models/rlm.py
trunk/Lib/sandbox/models/robust/__init__.py
trunk/Lib/sandbox/models/robust/scale.py
trunk/Lib/sandbox/models/smoothers.py
trunk/Lib/sandbox/models/tests/test_formula.py
trunk/Lib/sandbox/models/tests/test_utils.py
trunk/Lib/sandbox/models/utils.py
Log:
some minor documentation improvements
Modified: trunk/Lib/sandbox/models/__init__.py
===================================================================
--- trunk/Lib/sandbox/models/__init__.py 2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/__init__.py 2007-02-12 04:05:31 UTC (rev 2703)
@@ -2,6 +2,8 @@
# models - Statistical Models
#
+__docformat__ = 'restructuredtext'
+
from scipy.sandbox.models.info import __doc__
import scipy.sandbox.models.model
Modified: trunk/Lib/sandbox/models/family/varfuncs.py
===================================================================
--- trunk/Lib/sandbox/models/family/varfuncs.py 2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/family/varfuncs.py 2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,3 +1,5 @@
+__docformat__ = 'restructuredtext'
+
import numpy as N
class VarianceFunction:
@@ -4,7 +6,6 @@
"""
Variance function that relates the variance of a random variable
to its mean. Defaults to 1.
-
"""
def __call__(self, mu):
@@ -13,7 +14,6 @@
constant = VarianceFunction()
class Power:
-
"""
Variance function:
@@ -27,7 +27,6 @@
return N.power(N.fabs(mu), self.power)
class Binomial:
-
"""
Binomial variance function
@@ -50,4 +49,3 @@
mu_squared = Power(power=2)
mu_cubed = Power(power=3)
binary = Binomial()
-
Modified: trunk/Lib/sandbox/models/formula.py
===================================================================
--- trunk/Lib/sandbox/models/formula.py 2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/formula.py 2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,3 +1,6 @@
+"""
+Provides the basic classes needed to specify statistical models.
+"""
import copy
import types
import numpy as N
@@ -2,6 +5,7 @@
+__docformat__ = 'restructuredtext'
+
default_namespace = {}
class term(object):
-
"""
@@ -13,7 +17,6 @@
defaults to formula.default_namespace.
When called in an instance of formula,
the namespace used is that formula's namespace.
-
"""
def __pow__(self, power):
@@ -106,13 +109,9 @@
"""
Return the columns associated to self in a design matrix.
If the term has no 'func' attribute, it returns
-
- self.namespace[self.termname]
-
+ ``self.namespace[self.termname]``
else, it returns
-
- self.func(*args, **kw)
-
+ ``self.func(*args, **kw)``
"""
if not hasattr(self, 'func'):
@@ -243,7 +242,6 @@
return value
class quantitative(term):
-
"""
A subclass of term that can be used to apply point transformations
of another term, i.e. to take powers:
@@ -260,7 +258,6 @@
>>> x3.namespace = x.namespace
>>> print N.allclose(x()**2, x3())
True
-
"""
def __init__(self, name, func=None, termname=None, transform=lambda x: x):
@@ -275,9 +272,7 @@
return self.transform(term.__call__(self, *args, **kw))
class formula(object):
-
"""
-
A formula object for manipulating design matrices in regression models,
essentially consisting of a list of term instances.
@@ -302,11 +297,9 @@
def __init__(self, termlist, namespace=default_namespace):
"""
Create a formula from either:
-
- i) a formula object
- ii) a sequence of term instances
- iii) one term
-
+ i. a `formula` object
+ ii. a sequence of `term` instances
+ iii. one `term`
"""
@@ -457,7 +450,7 @@
def design(self, *args, **kw):
"""
- transpose(self(*args, **kw))
+ ``transpose(self(*args, **kw))``
"""
return self(*args, **kw).T
@@ -607,12 +600,11 @@
only term in the formula, then a keywords argument
\'nrow\' is needed.
->>> from formula import *
+>>> from scipy.sandbox.models.formula import formula, I
>>> I()
-1
+array(1.0)
>>> I(nrow=5)
-array([1, 1, 1, 1, 1])
-
+array([ 1., 1., 1., 1., 1.])
>>> f=formula(I)
>>> f(nrow=5)
array([1, 1, 1, 1, 1])
Modified: trunk/Lib/sandbox/models/gam.py
===================================================================
--- trunk/Lib/sandbox/models/gam.py 2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/gam.py 2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,3 +1,6 @@
+"""
+Generalized additive models
+"""
import numpy as N
from scipy.sandbox.models import family
Modified: trunk/Lib/sandbox/models/glm.py
===================================================================
--- trunk/Lib/sandbox/models/glm.py 2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/glm.py 2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,3 +1,6 @@
+"""
+General linear model
+"""
import numpy as N
from scipy.sandbox.models import family
from scipy.sandbox.models.regression import wls_model
Modified: trunk/Lib/sandbox/models/info.py
===================================================================
--- trunk/Lib/sandbox/models/info.py 2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/info.py 2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,18 +1,24 @@
"""
Statistical models
-==================
-This module contains a several linear statistical models
-- model formulae as in R (to some extent)
-- OLS (ordinary least square regression)
-- WLS (weighted least square regression)
-- ARp regression
-- GLMS (generalized linear models)
-- robust linear models using M estimators (with a number of standard default robust norms as in R's rlm)
-- robust scale estimates (MAD, Huber's proposal 2).
-- mixed effects models
-- generalized additive models (gam)
+ - model `formula`
+ - standard `regression` models
+
+ - `ols_model` (ordinary least square regression)
+ - `wls_model` (weighted least square regression)
+ - `ar_model` (autoregression)
+
+ - `glm.model` (generalized linear models)
+ - robust statistical models
+
+ - `rlm.model` (robust linear models using M estimators)
+ - `robust.norms` estimates
+ - `robust.scale` estimates (MAD, Huber's proposal 2).
+
+ - `mixed` effects models
+ - `gam` (generalized additive models)
"""
+__docformat__ = 'restructuredtext en'
depends = ['weave',
'special.orthogonal',
Modified: trunk/Lib/sandbox/models/mixed.py
===================================================================
--- trunk/Lib/sandbox/models/mixed.py 2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/mixed.py 2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,3 +1,7 @@
+"""
+Mixed effects models
+"""
+
import numpy as N
import numpy.linalg as L
from scipy.sandbox.models.formula import formula, I
@@ -3,5 +7,4 @@
class Unit:
-
"""
Individual experimental unit for
Modified: trunk/Lib/sandbox/models/regression.py
===================================================================
--- trunk/Lib/sandbox/models/regression.py 2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/regression.py 2007-02-12 04:05:31 UTC (rev 2703)
@@ -16,6 +16,8 @@
"""
+__docformat__ = 'restructuredtext en'
+
import numpy as N
import numpy.linalg as L
from scipy.linalg import norm, toeplitz
@@ -24,10 +26,11 @@
from scipy.sandbox.models import utils
class ols_model(likelihood_model):
-
"""
A simple ordinary least squares model.
+ Examples
+ --------
>>> import numpy as N
>>>
>>> from scipy.sandbox.models.formula import term, I
@@ -46,16 +49,22 @@
>>> results.t()
array([ 0.98019606, 1.87867287])
>>> print results.Tcontrast([0,1])
- <T contrast: effect=2.142857, sd=1.140623, t=1.878673, df_denom=5, pval=0.042804>
+ <T contrast: effect=2.14285714286, sd=1.14062281591, t=1.87867287326, df_denom=5>
>>> print results.Fcontrast(N.identity(2))
- <F contrast: F=19.460784, df_denom=5, df_num=2, pval=0.123740>
->>>
+ <F contrast: F=19.4607843137, df_denom=5, df_num=2>
"""
def logL(self, b, Y):
return -norm(self.whiten(Y) - N.dot(self.wdesign, b))**2 / 2.
def __init__(self, design):
+ """
+ Create a `ols_model` from a design.
+
+ :Parameters:
+ design : TODO
+ TODO
+ """
likelihood_model.__init__(self)
self.initialize(design)
@@ -63,6 +72,10 @@
"""
Set design for model, prewhitening design matrix and precomputing
covariance of coefficients (up to scale factor in front).
+
+ :Parameters:
+ design : TODO
+ TODO
"""
self.design = design
@@ -117,6 +130,11 @@
"""
A regression model with an AR(p) covariance structure.
+ The linear autoregressive process of order p--AR(p)--is defined as:
+ TODO
+
+ Examples
+ --------
>>> import numpy as N
>>> import numpy.random as R
>>>
@@ -146,17 +164,14 @@
>>> results.t()
array([ 30.796394 , -2.66543144])
>>> print results.Tcontrast([0,1])
- <T contrast: effect=-0.561455, sd=0.210643, t=-2.665431, df_denom=5, pval=0.044592>
+ <T contrast: effect=-0.561454972239, sd=0.210643186553, t=-2.66543144085, df_denom=5>
>>> print results.Fcontrast(N.identity(2))
- <F contrast: F=2762.428127, df_denom=5, df_num=2, pval=0.000000>
- >>>
-
+ <F contrast: F=2762.42812716, df_denom=5, df_num=2>
+ >>>
>>> model.rho = N.array([0,0])
>>> model.iterative_fit(data['Y'], niter=3)
>>> print model.rho
[-0.61887622 -0.88137957]
- >>>
-
"""
def __init__(self, design, rho):
@@ -175,7 +190,13 @@
def iterative_fit(self, Y, niter=3):
"""
Perform an iterative two-stage procedure to estimate AR(p)
- paramters and regression coefficients simultaneously.
+ parameters and regression coefficients simultaneously.
+
+ :Parameters:
+ Y : TODO
+ TODO
+ niter : ``integer``
+ the number of iterations
"""
for i in range(niter):
self.initialize(self.design)
@@ -188,6 +209,9 @@
Whiten a series of columns according to an AR(p)
covariance structure.
+ :Parameters:
+ X : TODO
+ TODO
"""
X = N.asarray(X, N.float64)
_X = X.copy()
@@ -198,16 +222,25 @@
def yule_walker(self, X, method="unbiased", df=None):
"""
Estimate AR(p) parameters from a sequence X using Yule-Walker equation.
- Method can be "unbiased" or "MLE" and this determines
- denominator in estimate of ACF at lag k. If "MLE", the denominator is
- n=r.shape[0], if "unbiased" the denominator is n-k.
- If df is supplied, then it is assumed the X has df degrees of
- freedom rather than n.
+ unbiased or maximum-likelihood estimator (mle)
See, for example:
http://en.wikipedia.org/wiki/Autoregressive_moving_average_model
+
+ :Parameters:
+ X : TODO
+ TODO
+ method : ``string``
+ Method can be "unbiased" or "mle" and this determines
+ denominator in estimate of autocorrelation function (ACF)
+ at lag k. If "mle", the denominator is n=r.shape[0], if
+ "unbiased" the denominator is n-k.
+ df : ``integer``
+ Specifies the degrees of freedom. If df is supplied,
+ then it is assumed the X has df degrees of
+ freedom rather than n.
"""
method = str(method).lower()
@@ -237,7 +270,6 @@
class wls_model(ols_model):
"""
-
A regression model with diagonal but non-identity covariance
structure. The weights are presumed to be
(proportional to the) inverse of the
@@ -261,10 +293,9 @@
>>> results.t()
array([ 0.35684428, 2.0652652 ])
>>> print results.Tcontrast([0,1])
- <T contrast: effect=2.916667, sd=1.412248, t=2.065265, df_denom=5, pval=0.035345>
+ <T contrast: effect=2.91666666667, sd=1.41224801095, t=2.06526519708, df_denom=5>
>>> print results.Fcontrast(N.identity(2))
- <F contrast: F=26.998607, df_denom=5, df_num=2, pval=0.110763>
-
+ <F contrast: F=26.9986072423, df_denom=5, df_num=2>
"""
def __init__(self, design, weights=1):
Modified: trunk/Lib/sandbox/models/rlm.py
===================================================================
--- trunk/Lib/sandbox/models/rlm.py 2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/rlm.py 2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,3 +1,6 @@
+"""
+Robust linear models
+"""
import numpy as N
from scipy.sandbox.models.regression import wls_model
Modified: trunk/Lib/sandbox/models/robust/__init__.py
===================================================================
--- trunk/Lib/sandbox/models/robust/__init__.py 2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/robust/__init__.py 2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,3 +1,6 @@
+"""
+Robust statistical models
+"""
import numpy as N
import numpy.linalg as L
@@ -3,5 +6,2 @@
from scipy.sandbox.models.robust import norms
from scipy.sandbox.models.robust.scale import MAD
-
-
-
Modified: trunk/Lib/sandbox/models/robust/scale.py
===================================================================
--- trunk/Lib/sandbox/models/robust/scale.py 2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/robust/scale.py 2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,6 +1,6 @@
import numpy as N
-import scipy
-import scipy.stats
+from scipy import median
+from scipy.stats import norm
def MAD(a, c=0.6745):
"""
@@ -11,8 +11,8 @@
"""
a = N.asarray(a, N.float64)
- d = N.multiply.outer(scipy.median(a), N.ones(a.shape[1:]))
- return scipy.median(N.fabs(a - d) / c)
+ d = N.multiply.outer(median(a), N.ones(a.shape[1:]))
+ return median(N.fabs(a - d) / c)
class Huber:
"""
@@ -25,8 +25,8 @@
c = 1.5
tol = 1.0e-06
- tmp = 2 * scipy.stats.norm.cdf(c) - 1
- gamma = tmp + c**2 * (1 - tmp) - 2 * c * scipy.stats.norm.pdf(c)
+ tmp = 2 * norm.cdf(c) - 1
+ gamma = tmp + c**2 * (1 - tmp) - 2 * c * norm.pdf(c)
del(tmp)
niter = 10
@@ -41,7 +41,7 @@
self.a = N.asarray(a, N.float64)
if mu is None:
self.n = self.a.shape[0] - 1
- self.mu = N.multiply.outer(scipy.median(self.a), N.ones(self.a.shape[1:]))
+ self.mu = N.multiply.outer(median(self.a), N.ones(self.a.shape[1:]))
self.est_mu = True
else:
self.n = self.a.shape[0]
Modified: trunk/Lib/sandbox/models/smoothers.py
===================================================================
--- trunk/Lib/sandbox/models/smoothers.py 2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/smoothers.py 2007-02-12 04:05:31 UTC (rev 2703)
@@ -1,7 +1,6 @@
"""
This module contains scatterplot smoothers, that is classes
who generate a smooth fit of a set of (x,y) pairs.
-
"""
import numpy as N
@@ -15,7 +14,6 @@
class poly_smoother:
-
"""
Polynomial smoother up to a given order.
Fit based on weighted least squares.
Modified: trunk/Lib/sandbox/models/tests/test_formula.py
===================================================================
--- trunk/Lib/sandbox/models/tests/test_formula.py 2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/tests/test_formula.py 2007-02-12 04:05:31 UTC (rev 2703)
@@ -8,7 +8,6 @@
import numpy.random as R
import numpy.linalg as L
from numpy.testing import assert_almost_equal, NumpyTest, NumpyTestCase
-import scipy
from scipy.sandbox.models import utils, formula, contrast
Modified: trunk/Lib/sandbox/models/tests/test_utils.py
===================================================================
--- trunk/Lib/sandbox/models/tests/test_utils.py 2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/tests/test_utils.py 2007-02-12 04:05:31 UTC (rev 2703)
@@ -6,7 +6,6 @@
import numpy.random as R
from numpy.testing import assert_almost_equal, NumpyTest, NumpyTestCase
-import scipy
from scipy.sandbox.models import utils
class test_Utils(NumpyTestCase):
Modified: trunk/Lib/sandbox/models/utils.py
===================================================================
--- trunk/Lib/sandbox/models/utils.py 2007-02-12 03:13:11 UTC (rev 2702)
+++ trunk/Lib/sandbox/models/utils.py 2007-02-12 04:05:31 UTC (rev 2703)
@@ -3,6 +3,8 @@
import scipy.interpolate
import scipy.linalg
+__docformat__ = 'restructuredtext'
+
def recipr(X):
"""
Return the reciprocal of an array, setting all entries less than or
@@ -72,25 +74,27 @@
return N.asarray(N.transpose(value)).astype(N.float64)
class StepFunction:
- '''A basic step function: values at the ends are handled in the simplest way possible: everything to the left of x[0] is set to ival; everything to the right of x[-1] is set to y[-1].
-
+ """
+ A basic step function: values at the ends are handled in the simplest
+ way possible: everything to the left of x[0] is set to ival; everything
+ to the right of x[-1] is set to y[-1].
+
+ Examples
+ --------
+ >>> from numpy import arange
+ >>> from scipy.sandbox.models.utils import StepFunction
>>>
- >>> from numpy import *
- >>>
>>> x = arange(20)
>>> y = arange(20)
- >>>
>>> f = StepFunction(x, y)
>>>
>>> print f(3.2)
- 3
+ 3.0
>>> print f([[3.2,4.5],[24,-3.1]])
- [[ 3 4]
- [19 0]]
- >>>
+ [[ 3. 4.]
+ [ 19. 0.]]
+ """
- '''
-
def __init__(self, x, y, ival=0., sorted=False):
_x = N.asarray(x)
More information about the Scipy-svn
mailing list