[Scipy-svn] r3933 - in trunk/scipy/stats/models: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Wed Feb 13 00:04:06 EST 2008
Author: jonathan.taylor
Date: 2008-02-12 23:04:04 -0600 (Tue, 12 Feb 2008)
New Revision: 3933
Modified:
trunk/scipy/stats/models/contrast.py
trunk/scipy/stats/models/formula.py
trunk/scipy/stats/models/tests/test_formula.py
Log:
made namespace conventions less ambiguous
Modified: trunk/scipy/stats/models/contrast.py
===================================================================
--- trunk/scipy/stats/models/contrast.py 2008-02-13 02:39:15 UTC (rev 3932)
+++ trunk/scipy/stats/models/contrast.py 2008-02-13 05:04:04 UTC (rev 3933)
@@ -1,3 +1,5 @@
+import copy
+
import numpy as N
from numpy.linalg import pinv
from scipy.stats.models import utils
@@ -78,8 +80,9 @@
then evaldesign can be set to False.
"""
- self.term.namespace = self.formula.namespace
- T = N.transpose(N.array(self.term(*args, **kw)))
+ t = copy.copy(self.term)
+ t.namespace = self.formula.namespace
+ T = N.transpose(N.array(t(*args, **kw)))
if T.ndim == 1:
T.shape = (T.shape[0], 1)
Modified: trunk/scipy/stats/models/formula.py
===================================================================
--- trunk/scipy/stats/models/formula.py 2008-02-13 02:39:15 UTC (rev 3932)
+++ trunk/scipy/stats/models/formula.py 2008-02-13 05:04:04 UTC (rev 3933)
@@ -22,6 +22,31 @@
defaults to formula.default_namespace.
When called in an instance of formula,
the namespace used is that formula's namespace.
+
+ Inheritance of the namespace under +,*,- operations:
+ ----------------------------------------------------
+
+ By default, the namespace is empty, which means it must be
+ specified before evaluating the design matrix.
+
+ When it is unambiguous, the namespaces of objects are derived from the
+ context.
+
+ Rules:
+ ------
+
+ i) "X * I", "X + I", "X**i": these inherit X's namespace
+ ii) "F.main_effect()": this inherits the Factor F's namespace
+ iii) "A-B": this inherits A's namespace
+ iv) if A.namespace == B.namespace, then A+B inherits this namespace
+ v) if A.namespace == B.namespace, then A*B inherits this namespace
+
+ Equality of namespaces:
+ -----------------------
+
+ This is done by comparing the namespaces directly, if
+ an exception is raised in the check of equality, they are
+ assumed not to be equal.
"""
def __pow__(self, power):
@@ -80,9 +105,10 @@
"""
Formula(self) + Formula(other)
"""
- other = Formula(other, namespace=self.namespace)
+ other = Formula(other)
f = other + self
- f.namespace = self.namespace
+ if _namespace_equal(other.namespace, self.namespace):
+ f.namespace = self.namespace
return f
def __mul__(self, other):
@@ -95,9 +121,10 @@
elif self.name is 'intercept':
f = Formula(other, namespace=other.namespace)
else:
- other = Formula(other, namespace=self.namespace)
+ other = Formula(other, namespace=other.namespace)
f = other * self
- f.namespace = self.namespace
+ if _namespace_equal(other.namespace, self.namespace):
+ f.namespace = self.namespace
return f
def names(self):
@@ -124,7 +151,8 @@
else:
val = self.func
if callable(val):
- if hasattr(val, "namespace"):
+ if isinstance(val, (Term, Formula)):
+ val = copy.copy(val)
val.namespace = self.namespace
val = val(*args, **kw)
@@ -172,7 +200,8 @@
v = self.namespace[self._name]
while True:
if callable(v):
- if hasattr(v, "namespace"):
+ if isinstance(v, (Term, Formula)):
+ v = copy.copy(v)
v.namespace = self.namespace
v = v(*args, **kw)
else: break
@@ -376,7 +405,7 @@
intercept = False
iindex = 0
for t in self.terms:
-
+ t = copy.copy(t)
t.namespace = namespace
val = t(*args, **kw)
@@ -414,7 +443,7 @@
else:
allvals = I(nrow=nrow)
allvals.shape = (1,) + allvals.shape
- return allvals
+ return N.squeeze(allvals)
def hasterm(self, query_term):
"""
@@ -495,7 +524,7 @@
TO DO: check for nesting relationship. Should not be too difficult.
"""
- other = Formula(other, namespace=self.namespace)
+ other = Formula(other)
selftermnames = self.termnames()
othertermnames = other.termnames()
@@ -520,7 +549,6 @@
if self.terms[i].name is 'intercept':
_term = other.terms[j]
_term.namespace = other.namespace
-
elif other.terms[j].name is 'intercept':
_term = self.terms[i]
_term.namespace = self.namespace
@@ -548,16 +576,17 @@
sumterms = self + other
sumterms.terms = [self, other] # enforce the order we want
- sumterms.namespace = self.namespace
- _term = Quantitative(names, func=sumterms, termname=termname,
+ _term = Quantitative(names, func=sumterms,
+ termname=termname,
transform=product_func)
- _term.namespace = self.namespace
+ if _namespace_equal(self.namespace, other.namespace):
+ _term.namespace = self.namespace
terms.append(_term)
- return Formula(terms, namespace=self.namespace)
+ return Formula(terms)
def __add__(self, other):
@@ -568,12 +597,15 @@
terms in the formula are sorted alphabetically.
"""
- other = Formula(other, namespace=self.namespace)
+ other = Formula(other)
terms = self.terms + other.terms
pieces = [(term.name, term) for term in terms]
pieces.sort()
terms = [piece[1] for piece in pieces]
- return Formula(terms, namespace=self.namespace)
+ f = Formula(terms)
+ if _namespace_equal(self.namespace, other.namespace):
+ f.namespace = self.namespace
+ return f
def __sub__(self, other):
@@ -583,7 +615,7 @@
function does not raise an exception.
"""
- other = Formula(other, namespace=self.namespace)
+ other = Formula(other)
terms = copy.copy(self.terms)
for term in other.terms:
@@ -591,9 +623,11 @@
if terms[i].termname == term.termname:
terms.pop(i)
break
- return Formula(terms, namespace=self.namespace)
+ f = Formula(terms)
+ f.namespace = self.namespace
+ return f
-def isnested(A, B, namespace=globals()):
+def isnested(A, B, namespace=None):
"""
Is factor B nested within factor A or vice versa: a very crude test
which depends on the namespace.
@@ -603,9 +637,13 @@
"""
- a = A(namespace, values=True)[0]
- b = B(namespace, values=True)[0]
+ if namespace is not None:
+ A = copy.copy(A); A.namespace = namespace
+ B = copy.copy(B); B.namespace = namespace
+ a = A(values=True)[0]
+ b = B(values=True)[0]
+
if len(a) != len(b):
raise ValueError, 'A() and B() should be sequences of the same length'
@@ -645,17 +683,25 @@
"""
-def interactions(terms, order=2):
+def interactions(terms, order=[1,2]):
"""
- Output all pairwise interactions up to a given order of a
+ Output all pairwise interactions of given order of a
sequence of terms.
- If order is greater than len(terms), it is treated as len(terms).
+ The argument order is a sequence specifying which order
+ of interactions should be generated -- the default
+ creates main effects and two-way interactions. If order
+ is an integer, it is changed to range(1,order+1), so
+ order=3 is equivalent to order=[1,2,3], generating
+ all one, two and three-way interactions.
+
+ If any entry of order is greater than len(terms), it is
+ effectively treated as len(terms).
>>> print interactions([Term(l) for l in ['a', 'b', 'c']])
<formula: a*b + a*c + b*c + a + b + c>
>>>
- >>> print interactions([Term(l) for l in ['a', 'b', 'c']], order=5)
+ >>> print interactions([Term(l) for l in ['a', 'b', 'c']], order=range(5))
<formula: a*b + a*b*c + a*c + b*c + a + b + c>
>>>
@@ -664,10 +710,13 @@
values = {}
+ if N.asarray(order).shape == ():
+ order = range(1, int(order)+1)
+
# First order
- for o in range(order):
- I = N.indices((l,)*(o+1))
+ for o in order:
+ I = N.indices((l,)*(o))
I.shape = (I.shape[0], N.product(I.shape[1:]))
for m in range(I.shape[1]):
@@ -681,8 +730,15 @@
v *= ll[ii+1]
values[tuple(I[:,m])] = v
- value = values[(0,)]; del(values[(0,)])
+ key = values.keys()[0]
+ value = values[key]; del(values[key])
for v in values.values():
value += v
return value
+
+def _namespace_equal(space1, space2):
+ try:
+ return space1 == space2
+ except:
+ return False
Modified: trunk/scipy/stats/models/tests/test_formula.py
===================================================================
--- trunk/scipy/stats/models/tests/test_formula.py 2008-02-13 02:39:15 UTC (rev 3932)
+++ trunk/scipy/stats/models/tests/test_formula.py 2008-02-13 05:04:04 UTC (rev 3933)
@@ -66,6 +66,7 @@
def test_namespace(self):
space1 = {'X':N.arange(50), 'Y':N.arange(50)*2}
space2 = {'X':N.arange(20), 'Y':N.arange(20)*2}
+ space3 = {'X':N.arange(30), 'Y':N.arange(30)*2}
X = formula.Term('X')
Y = formula.Term('Y')
@@ -79,15 +80,44 @@
f.namespace = space1
self.assertEqual(f().shape, (2,50))
- assert_almost_equal(Y(), N.arange(50)*2)
+ assert_almost_equal(Y(), N.arange(20)*2)
assert_almost_equal(X(), N.arange(50))
f.namespace = space2
self.assertEqual(f().shape, (2,20))
assert_almost_equal(Y(), N.arange(20)*2)
- assert_almost_equal(X(), N.arange(20))
+ assert_almost_equal(X(), N.arange(50))
+ f.namespace = space3
+ self.assertEqual(f().shape, (2,30))
+ assert_almost_equal(Y(), N.arange(20)*2)
+ assert_almost_equal(X(), N.arange(50))
+ xx = X**2
+ self.assertEqual(xx().shape, (50,))
+
+ xx.namespace = space3
+ self.assertEqual(xx().shape, (30,))
+
+ xx = X * formula.I
+ self.assertEqual(xx().shape, (50,))
+ xx.namespace = space3
+ self.assertEqual(xx().shape, (30,))
+
+ xx = X * X
+ self.assertEqual(xx.namespace, X.namespace)
+
+ xx = X + Y
+ self.assertEqual(xx.namespace, {})
+
+ Y.namespace = {'X':N.arange(50), 'Y':N.arange(50)*2}
+ xx = X + Y
+ self.assertEqual(xx.namespace, {})
+
+ Y.namespace = X.namespace
+ xx = X+Y
+ self.assertEqual(xx.namespace, Y.namespace)
+
def test_termcolumns(self):
t1 = formula.Term("A")
t2 = formula.Term("B")
@@ -112,33 +142,30 @@
self.assertEquals(x.shape, (40, 10))
def test_product(self):
- prod = self.terms[0] * self.terms[2]
- self.formula += prod
- x = self.formula.design()
- p = self.formula['A*C']
- col = self.formula.termcolumns(prod, dict=False)
+ prod = self.formula['A'] * self.formula['C']
+ f = self.formula + prod
+ f.namespace = self.namespace
+ x = f.design()
+ p = f['A*C']
+ p.namespace = self.namespace
+ col = f.termcolumns(prod, dict=False)
assert_almost_equal(N.squeeze(x[:,col]), self.X[:,0] * self.X[:,2])
assert_almost_equal(N.squeeze(p()), self.X[:,0] * self.X[:,2])
def test_intercept1(self):
prod = self.terms[0] * self.terms[2]
- self.formula += formula.I
- icol = self.formula.names().index('intercept')
- assert_almost_equal(self.formula()[icol], N.ones((40,)))
+ f = self.formula + formula.I
+ icol = f.names().index('intercept')
+ f.namespace = self.namespace
+ assert_almost_equal(f()[icol], N.ones((40,)))
- def test_intercept2(self):
- prod = self.terms[0] * self.terms[2]
- self.formula += formula.I
- icol = self.formula.names().index('intercept')
- assert_almost_equal(self.formula()[icol], N.ones((40,)))
-
def test_intercept3(self):
- prod = self.terms[0] * formula.I
+ t = self.formula['A']
+ t.namespace = self.namespace
+ prod = t * formula.I
prod.namespace = self.formula.namespace
- assert_almost_equal(N.squeeze(prod()), self.terms[0]())
+ assert_almost_equal(N.squeeze(prod()), t())
-
-
def test_contrast1(self):
term = self.terms[0] + self.terms[2]
c = contrast.Contrast(term, self.formula)
@@ -202,6 +229,7 @@
fac = formula.Factor('ff', f)
fac.namespace = {'ff':f}
m = fac.main_effect(reference=1)
+ m.namespace = fac.namespace
self.assertEquals(m().shape, (2,30))
def test_factor4(self):
@@ -209,6 +237,7 @@
fac = formula.Factor('ff', f)
fac.namespace = {'ff':f}
m = fac.main_effect(reference=2)
+ m.namespace = fac.namespace
r = N.array([N.identity(3)]*10)
r.shape = (30,3)
r = r.T
@@ -251,7 +280,7 @@
def test_contrast4(self):
f = self.formula + self.terms[5] + self.terms[5]
-
+ f.namespace = self.namespace
estimable = False
c = contrast.Contrast(self.terms[5], f)
@@ -267,6 +296,12 @@
f = formula.interactions([formula.Term(l) for l in ['a', 'b', 'c', 'd']], order=3)
assert_equal(set(f.termnames()), set(['a', 'b', 'c', 'd', 'a*b', 'a*c', 'a*d', 'b*c', 'b*d', 'c*d', 'a*b*c', 'a*c*d', 'a*b*d', 'b*c*d']))
+ f = formula.interactions([formula.Term(l) for l in ['a', 'b', 'c', 'd']], order=[1,2,3])
+ assert_equal(set(f.termnames()), set(['a', 'b', 'c', 'd', 'a*b', 'a*c', 'a*d', 'b*c', 'b*d', 'c*d', 'a*b*c', 'a*c*d', 'a*b*d', 'b*c*d']))
+
+ f = formula.interactions([formula.Term(l) for l in ['a', 'b', 'c', 'd']], order=[3])
+ assert_equal(set(f.termnames()), set(['a*b*c', 'a*c*d', 'a*b*d', 'b*c*d']))
+
def test_subtract(self):
f = formula.interactions([formula.Term(l) for l in ['a', 'b', 'c']])
ff = f - f['a*b']
More information about the Scipy-svn
mailing list