From scipy-svn at scipy.org Fri Nov 3 11:31:01 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 3 Nov 2006 10:31:01 -0600 (CST) Subject: [Scipy-svn] r2301 - trunk/Lib/sandbox/arraysetops Message-ID: <20061103163101.873F539C03D@new.scipy.org> Author: rkern Date: 2006-11-03 10:30:58 -0600 (Fri, 03 Nov 2006) New Revision: 2301 Modified: trunk/Lib/sandbox/arraysetops/arraysetops.py Log: Fix syntax error. Modified: trunk/Lib/sandbox/arraysetops/arraysetops.py =================================================================== --- trunk/Lib/sandbox/arraysetops/arraysetops.py 2006-10-30 23:12:34 UTC (rev 2300) +++ trunk/Lib/sandbox/arraysetops/arraysetops.py 2006-11-03 16:30:58 UTC (rev 2301) @@ -62,7 +62,7 @@ ar = numpy.array( ar1 ).ravel() if retIndx: perm = numpy.argsort( ar ) - aux = numpy.take( ar, perm 0,axis=0) + aux = numpy.take( ar, perm, axis=0) flag = ediff1d( aux, 1 ) != 0 return numpy.compress( flag, perm ,axis=-1), numpy.compress( flag, aux ,axis=-1) else: From scipy-svn at scipy.org Sun Nov 5 17:27:11 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 5 Nov 2006 16:27:11 -0600 (CST) Subject: [Scipy-svn] r2302 - in trunk/Lib/integrate: . tests Message-ID: <20061105222711.31F6539C07E@new.scipy.org> Author: stefan Date: 2006-11-05 16:26:48 -0600 (Sun, 05 Nov 2006) New Revision: 2302 Modified: trunk/Lib/integrate/quadrature.py trunk/Lib/integrate/tests/test_quadrature.py Log: Fix romb. Modified: trunk/Lib/integrate/quadrature.py =================================================================== --- trunk/Lib/integrate/quadrature.py 2006-11-03 16:30:58 UTC (rev 2301) +++ trunk/Lib/integrate/quadrature.py 2006-11-05 22:26:48 UTC (rev 2302) @@ -273,7 +273,7 @@ The number of samples must be 1 + a non-negative power of two: N=2**k + 1 See also: - + quad - adaptive quadrature using QUADPACK romberg - adaptive Romberg quadrature quadrature - adaptive Gaussian quadrature @@ -306,7 +306,7 @@ start = stop = step = Ninterv for i in range(2,k+1): start >>= 1 - slice_R = tupleset(slice_R, slice(start,stop,step)) + slice_R = tupleset(slice_R, axis, slice(start,stop,step)) step >>= 1 R[(i,1)] = 0.5*(R[(i-1,1)] + h*add.reduce(y[slice_R],axis)) for j in range(2,i+1): Modified: trunk/Lib/integrate/tests/test_quadrature.py =================================================================== --- trunk/Lib/integrate/tests/test_quadrature.py 2006-11-03 16:30:58 UTC (rev 2301) +++ trunk/Lib/integrate/tests/test_quadrature.py 2006-11-05 22:26:48 UTC (rev 2302) @@ -4,7 +4,7 @@ from numpy.testing import * set_package_path() -from scipy.integrate import quadrature, romberg +from scipy.integrate import quadrature, romberg, romb restore_path() class test_quadrature(ScipyTestCase): @@ -27,5 +27,8 @@ table_val = 0.30614353532540296487 assert_almost_equal(val, table_val, decimal=7) + def check_romb(self): + assert_equal(romb(numpy.arange(17)),128) + if __name__ == "__main__": ScipyTest().run() From scipy-svn at scipy.org Mon Nov 6 20:03:59 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 6 Nov 2006 19:03:59 -0600 (CST) Subject: [Scipy-svn] r2304 - trunk/Lib/sandbox/models Message-ID: <20061107010359.3FDA139C00E@new.scipy.org> Author: matthew.brett at gmail.com Date: 2006-11-06 19:03:56 -0600 (Mon, 06 Nov 2006) New Revision: 2304 Added: trunk/Lib/sandbox/models/info.py Modified: trunk/Lib/sandbox/models/model.py trunk/Lib/sandbox/models/utils.py Log: Tiny cleanups, addition of skeleton info.py Added: trunk/Lib/sandbox/models/info.py =================================================================== --- trunk/Lib/sandbox/models/info.py 2006-11-06 20:50:54 UTC (rev 2303) +++ trunk/Lib/sandbox/models/info.py 2006-11-07 01:03:56 UTC (rev 2304) @@ -0,0 +1,14 @@ +""" +Statistical models +================== + + +""" + +depends = ['weave', + 'special.orthogonal', + 'integrate', + 'optimize', + 'linalg'] + +postpone_import = True Modified: trunk/Lib/sandbox/models/model.py =================================================================== --- trunk/Lib/sandbox/models/model.py 2006-11-06 20:50:54 UTC (rev 2303) +++ trunk/Lib/sandbox/models/model.py 2006-11-07 01:03:56 UTC (rev 2304) @@ -6,7 +6,6 @@ from contrast import ContrastResults class Model: - """ A (predictive) statistical model. The class Model itself does nothing but lays out the methods expected of any subclass. Modified: trunk/Lib/sandbox/models/utils.py =================================================================== --- trunk/Lib/sandbox/models/utils.py 2006-11-06 20:50:54 UTC (rev 2303) +++ trunk/Lib/sandbox/models/utils.py 2006-11-07 01:03:56 UTC (rev 2304) @@ -1,6 +1,5 @@ import numpy as N import numpy.linalg as L -import scipy import scipy.interpolate import scipy.linalg @@ -23,7 +22,7 @@ _shape = a.shape a.shape = N.product(a.shape,axis=0) - m = scipy.median(N.fabs(a - scipy.median(a))) / c + m = N.median(N.fabs(a - N.median(a))) / c a.shape = _shape return m From scipy-svn at scipy.org Mon Nov 6 20:46:19 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 6 Nov 2006 19:46:19 -0600 (CST) Subject: [Scipy-svn] r2305 - in trunk/Lib/sandbox/models: . family robust Message-ID: <20061107014619.4446EC7C021@new.scipy.org> Author: matthew.brett at gmail.com Date: 2006-11-06 19:46:10 -0600 (Mon, 06 Nov 2006) New Revision: 2305 Modified: trunk/Lib/sandbox/models/bsplines.py trunk/Lib/sandbox/models/contrast.py trunk/Lib/sandbox/models/cox.py trunk/Lib/sandbox/models/family/__init__.py trunk/Lib/sandbox/models/family/family.py trunk/Lib/sandbox/models/formula.py trunk/Lib/sandbox/models/glm.py trunk/Lib/sandbox/models/mixed.py trunk/Lib/sandbox/models/model.py trunk/Lib/sandbox/models/regression.py trunk/Lib/sandbox/models/rlm.py trunk/Lib/sandbox/models/robust/__init__.py Log: Changed relative to absolute imports according to PEP8 python style guide Modified: trunk/Lib/sandbox/models/bsplines.py =================================================================== --- trunk/Lib/sandbox/models/bsplines.py 2006-11-07 01:03:56 UTC (rev 2304) +++ trunk/Lib/sandbox/models/bsplines.py 2006-11-07 01:46:10 UTC (rev 2305) @@ -1,7 +1,7 @@ import numpy as N import numpy.linalg as L import scipy.integrate -import _bspline +from scipy.sandbox.models import _bspline # note to self: check out eig_banded! in linalg.decomp? Modified: trunk/Lib/sandbox/models/contrast.py =================================================================== --- trunk/Lib/sandbox/models/contrast.py 2006-11-07 01:03:56 UTC (rev 2304) +++ trunk/Lib/sandbox/models/contrast.py 2006-11-07 01:46:10 UTC (rev 2305) @@ -1,6 +1,6 @@ import numpy as N from numpy.linalg import pinv -import utils +from scipy.sandbox.models import utils class ContrastResults: """ Modified: trunk/Lib/sandbox/models/cox.py =================================================================== --- trunk/Lib/sandbox/models/cox.py 2006-11-07 01:03:56 UTC (rev 2304) +++ trunk/Lib/sandbox/models/cox.py 2006-11-07 01:46:10 UTC (rev 2305) @@ -1,7 +1,7 @@ +import shutil +import tempfile import numpy as N -import survival -import model -import tempfile, shutil +from scipy.sandbox.models import survival, model class DiscreteRV: Modified: trunk/Lib/sandbox/models/family/__init__.py =================================================================== --- trunk/Lib/sandbox/models/family/__init__.py 2006-11-07 01:03:56 UTC (rev 2304) +++ trunk/Lib/sandbox/models/family/__init__.py 2006-11-07 01:46:10 UTC (rev 2305) @@ -1,4 +1,4 @@ +from scipy.sandbox.models.family.family import Gaussian, Family, \ + Poisson, Gamma, InverseGaussian, Binomial -from family import Gaussian, Family, Poisson, Gamma, InverseGaussian, Binomial - Modified: trunk/Lib/sandbox/models/family/family.py =================================================================== --- trunk/Lib/sandbox/models/family/family.py 2006-11-07 01:03:56 UTC (rev 2304) +++ trunk/Lib/sandbox/models/family/family.py 2006-11-07 01:46:10 UTC (rev 2305) @@ -1,6 +1,6 @@ -import links as L -import varfuncs as V import numpy as N +from scipy.sandbox.models.family import links as L +from scipy.sandbox.models.family import varfuncs as V class Family: Modified: trunk/Lib/sandbox/models/formula.py =================================================================== --- trunk/Lib/sandbox/models/formula.py 2006-11-07 01:03:56 UTC (rev 2304) +++ trunk/Lib/sandbox/models/formula.py 2006-11-07 01:46:10 UTC (rev 2305) @@ -1,4 +1,5 @@ -import types, copy +import copy +import types import numpy as N terms = {} Modified: trunk/Lib/sandbox/models/glm.py =================================================================== --- trunk/Lib/sandbox/models/glm.py 2006-11-07 01:03:56 UTC (rev 2304) +++ trunk/Lib/sandbox/models/glm.py 2006-11-07 01:46:10 UTC (rev 2305) @@ -1,6 +1,6 @@ import numpy as N -import family -from regression import WLSModel +from scipy.sandbox.models import family +from scipy.sandbox.models.regression import WLSModel class Model(WLSModel): Modified: trunk/Lib/sandbox/models/mixed.py =================================================================== --- trunk/Lib/sandbox/models/mixed.py 2006-11-07 01:03:56 UTC (rev 2304) +++ trunk/Lib/sandbox/models/mixed.py 2006-11-07 01:46:10 UTC (rev 2305) @@ -1,6 +1,6 @@ import numpy as N import numpy.linalg as L -from formula import Formula, I +from scipy.sandbox.models.formula import Formula, I class Unit: Modified: trunk/Lib/sandbox/models/model.py =================================================================== --- trunk/Lib/sandbox/models/model.py 2006-11-07 01:03:56 UTC (rev 2304) +++ trunk/Lib/sandbox/models/model.py 2006-11-07 01:46:10 UTC (rev 2305) @@ -1,9 +1,9 @@ -import scipy.optimize import numpy as N from numpy.linalg import inv +import scipy.optimize -from utils import recipr -from contrast import ContrastResults +from scipy.sandbox.models.contrast import ContrastResults +from scipy.sandbox.models.utils import recipr class Model: """ Modified: trunk/Lib/sandbox/models/regression.py =================================================================== --- trunk/Lib/sandbox/models/regression.py 2006-11-07 01:03:56 UTC (rev 2304) +++ trunk/Lib/sandbox/models/regression.py 2006-11-07 01:46:10 UTC (rev 2305) @@ -1,9 +1,8 @@ import numpy as N import numpy.linalg as L -from model import LikelihoodModel, LikelihoodModelResults - -import utils import scipy.linalg +from scipy.sandbox.models.model import LikelihoodModel, LikelihoodModelResults +from scipy.sandbox.models import utils class OLSModel(LikelihoodModel): Modified: trunk/Lib/sandbox/models/rlm.py =================================================================== --- trunk/Lib/sandbox/models/rlm.py 2006-11-07 01:03:56 UTC (rev 2304) +++ trunk/Lib/sandbox/models/rlm.py 2006-11-07 01:46:10 UTC (rev 2305) @@ -1,7 +1,7 @@ import numpy as N -from robust import norms, scale -from regression import WLSModel +from scipy.sandbox.models.regression import WLSModel +from scipy.sandbox.models.robust import norms, scale class Model(WLSModel): Modified: trunk/Lib/sandbox/models/robust/__init__.py =================================================================== --- trunk/Lib/sandbox/models/robust/__init__.py 2006-11-07 01:03:56 UTC (rev 2304) +++ trunk/Lib/sandbox/models/robust/__init__.py 2006-11-07 01:46:10 UTC (rev 2305) @@ -1,8 +1,8 @@ import numpy as N import numpy.linalg as L -import norms -from scale import MAD +from scipy.sandbox.models.robust import norms +from scipy.sandbox.models.robust.scale import MAD From scipy-svn at scipy.org Tue Nov 7 04:05:54 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 7 Nov 2006 03:05:54 -0600 (CST) Subject: [Scipy-svn] r2306 - trunk/Lib/sandbox/odr Message-ID: <20061107090554.A875439C052@new.scipy.org> Author: rkern Date: 2006-11-07 03:05:47 -0600 (Tue, 07 Nov 2006) New Revision: 2306 Modified: trunk/Lib/sandbox/odr/setup.py Log: Allow non-ATLAS optimized LAPACK libraries for odr. Modified: trunk/Lib/sandbox/odr/setup.py =================================================================== --- trunk/Lib/sandbox/odr/setup.py 2006-11-07 01:46:10 UTC (rev 2305) +++ trunk/Lib/sandbox/odr/setup.py 2006-11-07 09:05:47 UTC (rev 2306) @@ -19,32 +19,24 @@ 'd_mprec.f', 'dlunoc.f'] - atlas_info = get_info('atlas') - #atlas_info = {} # uncomment if ATLAS is available but want to use - # Fortran LAPACK/BLAS; useful for testing - blas_libs = [] - if not atlas_info: - warnings.warn(AtlasNotFoundError.__doc__) - blas_info = get_info('blas') - if blas_info: - libodr_files.append('d_lpk.f') - blas_libs.extend(blas_info['libraries']) - else: - warnings.warn(BlasNotFoundError.__doc__) - libodr_files.append('d_lpkbls.f') - else: + blas_info = get_info('blas_opt') + if blas_info: libodr_files.append('d_lpk.f') - blas_libs.extend(atlas_info['libraries']) + else: + warnings.warn(BlasNotFoundError.__doc__) + libodr_files.append('d_lpkbls.f') libodr = [os.path.join('odrpack', x) for x in libodr_files] config.add_library('odrpack', sources=libodr) sources = ['__odrpack.c'] + libraries = ['odrpack'] + blas_info.pop('libraries', []) + include_dirs = ['.'] + blas_info.pop('include_dirs', []) config.add_extension('__odrpack', - sources=sources, - libraries=['odrpack']+blas_libs, - include_dirs=['.'], - library_dirs=atlas_info['library_dirs'], - ) + sources=sources, + libraries=libraries, + include_dirs=include_dirs, + **blas_info + ) return config From scipy-svn at scipy.org Tue Nov 7 04:07:46 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 7 Nov 2006 03:07:46 -0600 (CST) Subject: [Scipy-svn] r2307 - trunk Message-ID: <20061107090746.3778539C052@new.scipy.org> Author: rkern Date: 2006-11-07 03:07:45 -0600 (Tue, 07 Nov 2006) New Revision: 2307 Modified: trunk/ Log: Ignore some files. Property changes on: trunk ___________________________________________________________________ Name: svn:ignore + *.pyc *.swp *.pyd *.so build dist scipy.egg-info From scipy-svn at scipy.org Tue Nov 7 16:51:34 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 7 Nov 2006 15:51:34 -0600 (CST) Subject: [Scipy-svn] r2308 - in trunk/Lib/sandbox/models: . family tests Message-ID: <20061107215134.1B1D939C01A@new.scipy.org> Author: jonathan.taylor Date: 2006-11-07 15:50:20 -0600 (Tue, 07 Nov 2006) New Revision: 2308 Modified: trunk/Lib/sandbox/models/cox.py trunk/Lib/sandbox/models/family/links.py trunk/Lib/sandbox/models/formula.py trunk/Lib/sandbox/models/tests/test_regression.py trunk/Lib/sandbox/models/tests/test_robust.py Log: some name changes: changing CamelCase to lower_case Modified: trunk/Lib/sandbox/models/cox.py =================================================================== --- trunk/Lib/sandbox/models/cox.py 2006-11-07 09:07:45 UTC (rev 2307) +++ trunk/Lib/sandbox/models/cox.py 2006-11-07 21:50:20 UTC (rev 2308) @@ -3,7 +3,7 @@ import numpy as N from scipy.sandbox.models import survival, model -class DiscreteRV: +class discrete: """ A simple little class for working with discrete random vectors. @@ -15,13 +15,13 @@ self.x = N.array([self.x]) self.n = self.x.shape[0] if w is None: - w = N.ones(self.n, N.float64) + w = N.ones(self.n, N.float64) else: if w.shape[0] != self.n: raise ValueError, 'incompatible shape for weights w' if N.any(N.less(w, 0)): raise ValueError, 'weights should be non-negative' - self.w = w / w.sum() + self.w = w / w.sum() def mean(self, f=None): if f is None: @@ -31,11 +31,11 @@ return (fx * self.w).sum() def cov(self): - mu = self.moment() + mu = self.mean() dx = self.x - N.multiply.outer(mu, self.x.shape[1]) return N.dot(dx, N.transpose(dx)) -class Observation(survival.RightCensored): +class observation(survival.right_censored): def __getitem__(self, item): if self.namespace is not None: @@ -45,18 +45,17 @@ def __init__(self, time, delta, namespace=None): self.namespace = namespace - survival.RightCensored.__init__(self, time, delta) + survival.right_censored.__init__(self, time, delta) def __call__(self, formula, time=None, **extra): return formula(namespace=self, time=time, **extra) -class ProportionalHazards(model.LikelihoodModel): +class coxph(model.likelihood_model): def __init__(self, subjects, formula, time_dependent=False): self.subjects, self.formula = subjects, formula self.time_dependent = time_dependent self.initialize(self.subjects) - def initialize(self, subjects): @@ -142,7 +141,7 @@ if ties == 'breslow': w = N.exp(N.dot(Z, b)) - rv = DiscreteRV(Z[risk], w=w[risk]) + rv = discrete(Z[risk], w=w[risk]) score -= rv.mean() * d elif ties == 'efron': w = N.exp(N.dot(Z, b)) @@ -150,7 +149,7 @@ for j in range(d): efron_w = w efron_w[fail] -= i * w[fail] / d - rv = DiscreteRV(Z[risk], w=efron_w[risk]) + rv = discrete(Z[risk], w=efron_w[risk]) score -= rv.mean() elif ties == 'cox': raise NotImplementedError, 'Cox tie breaking method not implemented' @@ -175,7 +174,7 @@ if ties == 'breslow': w = N.exp(N.dot(Z, b)) - rv = DiscreteRV(Z[risk], w=w[risk]) + rv = discrete(Z[risk], w=w[risk]) info += rv.cov() elif ties == 'efron': w = N.exp(N.dot(Z, b)) @@ -183,7 +182,7 @@ for j in range(d): efron_w = w efron_w[fail] -= i * w[fail] / d - rv = DiscreteRV(Z[risk], w=efron_w[risk]) + rv = discrete(Z[risk], w=efron_w[risk]) info += rv.cov() elif ties == 'cox': raise NotImplementedError, 'Cox tie breaking method not implemented' @@ -200,7 +199,7 @@ Y = R.standard_exponential((2*n,)) / lin delta = R.binomial(1, 0.9, size=(2*n,)) - subjects = [Observation(Y[i], delta[i]) for i in range(2*n)] + subjects = [observation(Y[i], delta[i]) for i in range(2*n)] for i in range(2*n): subjects[i].X = X[i] @@ -208,7 +207,7 @@ x = F.Quantitative('X') f = F.Formula(x) - c = ProportionalHazards(subjects, f) + c = coxph(subjects, f) c.cache() c.newton([0.4]) Modified: trunk/Lib/sandbox/models/family/links.py =================================================================== --- trunk/Lib/sandbox/models/family/links.py 2006-11-07 09:07:45 UTC (rev 2307) +++ trunk/Lib/sandbox/models/family/links.py 2006-11-07 21:50:20 UTC (rev 2308) @@ -3,11 +3,9 @@ class Link: - def __init__(self): - pass + def initialize(self, Y): + return N.asarray(Y).mean() * N.ones(Y.shape) - pass - class Logit(Link): """ @@ -17,26 +15,16 @@ """ tol = 1.0e-10 - init = 1.0e-03 - def clean(self, p, inverse=False, initialize=False): - if initialize: - tol = Logit.tol - else: - tol = Logit.init + def clean(self, p): + return N.clip(p, Logit.tol, 1. - Logit.tol) - if not inverse: - return N.clip(p, tol, 1. - tol) - else: - l = self(tol); u = self(1 - tol) - return N.clip(p, l, u) - - def __call__(self, p, initialize=True, **extra): - p = self.clean(p, **extra) + def __call__(self, p): + p = self.clean(p) return N.log(p / (1. - p)) def inverse(self, z): - t = N.exp(self.clean(z, inverse=True)) + t = N.exp(z) return t / (1. + t) def deriv(self, p): @@ -57,7 +45,7 @@ def __init__(self, power=1.): self.power = power - def __call__(self, x, **extra): + def __call__(self, x): return N.power(x, self.power) def inverse(self, x): @@ -108,16 +96,10 @@ """ tol = 1.0e-10 - init = 1.0e-03 def clean(self, x): - if initialize: - tol = Logit.tol - else: - tol = Logit.init + return N.clip(x, Logit.tol, N.inf) - return N.clip(x, tol, N.inf) - def __call__(self, x, **extra): x = self.clean(x) return N.log(x) @@ -143,7 +125,7 @@ def __init__(self, dbn=scipy.stats.norm): self.dbn = dbn - def __call__(self, p, **extra): + def __call__(self, p): p = self.clean(p) return self.dbn.ppf(p) @@ -181,7 +163,7 @@ """ - def __call__(self, p, **extra): + def __call__(self, p): p = self.clean(p) return N.log(-N.log(p)) Modified: trunk/Lib/sandbox/models/formula.py =================================================================== --- trunk/Lib/sandbox/models/formula.py 2006-11-07 09:07:45 UTC (rev 2307) +++ trunk/Lib/sandbox/models/formula.py 2006-11-07 21:50:20 UTC (rev 2308) @@ -4,7 +4,7 @@ terms = {} -class Term: +class term: """ This class is very simple: it is just a named term in a model formula. @@ -15,7 +15,7 @@ """ - def __init__(self, name, func=None, termname=None, namespace=terms): + def __init__(self, name, func=None, termname=None, namespace={}): self.name = name @@ -39,22 +39,22 @@ def __add__(self, other): """ - Formula(self) + Formula(other) + formula(self) + formula(other) """ - other = Formula(other) + other = formula(other) return other + self def __mul__(self, other): """ - Formula(self) * Formula(other) + formula(self) * formula(other) """ if other.name is 'intercept': - return Formula(self) + return formula(self) elif self.name is 'intercept': - return Formula(other) + return formula(other) - other = Formula(other) + other = formula(other) return other * self def names(self): @@ -67,28 +67,32 @@ else: return list(self.name) - def __call__(self, namespace=terms, usefn=True, **extra): + def __call__(self, namespace=terms, usefn=True, args=(), kw={}): """ Return the columns associated to self in a design matrix. The default behaviour is to return namespace[self.termname] where namespace defaults to globals(). If usefn, and self.func exists then return - self.func(namespace=namespace, **extra) + + self.func(*args, **kw) + + with kw['namespace'] = namespace. """ + kw['namespace'] = namespace if not hasattr(self, 'func') or not usefn: val = namespace[self.termname] - if isinstance(val, Formula): - val = val(namespace=namespace, **extra) + if isinstance(val, formula): + val = val(*args, **kw) elif callable(val): - val = val(**extra) + val = val(*args, **kw) else: - val = self.func(namespace=namespace, **extra) + val = self.func(*args, **kw) val = N.asarray(val) return N.squeeze(val) -class Factor(Term): +class factor(term): """ A categorical factor. @@ -96,7 +100,7 @@ def __init__(self, termname, keys, ordinal=False): """ - Factor is initialized with keys, representing all valid + factor is initialized with keys, representing all valid levels of the factor. """ @@ -114,7 +118,7 @@ # FIXME: n is not defined here col = [float(self.keys.index(v[i])) for i in range(n)] return N.array(col) - Term.__init__(self, self.name, func=func) + term.__init__(self, self.name, func=func) else: def func(namespace=terms): @@ -124,9 +128,9 @@ col = [float((v[i] == key)) for i in range(len(v))] value.append(col) return N.array(value) - Term.__init__(self, ['(%s==%s)' % (self.termname, str(key)) for key in self.keys], func=func, termname=self.termname) + term.__init__(self, ['(%s==%s)' % (self.termname, str(key)) for key in self.keys], func=func, termname=self.termname) - def __call__(self, namespace=terms, values=False, **extra): + def __call__(self, namespace=terms, values=False, args=(), kw={}): """ Return either the columns in the design matrix, or the actual values of the factor, if values==True. @@ -135,9 +139,9 @@ if namespace is None: namespace = globals() if not values: - return Term.__call__(self, namespace=namespace, usefn=True, **extra) + return term.__call__(self, namespace=namespace, usefn=True, args=args, kw=kw) else: - return Term.__call__(self, namespace=namespace, usefn=False, **extra) + return term.__call__(self, namespace=namespace, usefn=False, args=args, kw=kw) def verify(self, values): """ @@ -149,19 +153,19 @@ def __add__(self, other): """ - Formula(self) + Formula(other) + formula(self) + formula(other) - When adding \'intercept\' to a Factor, this just returns self. + When adding \'intercept\' to a factor, this just returns self. """ if other.name is 'intercept': - return Formula(self) + return formula(self) else: - return Term.__add__(self, other) + return term.__add__(self, other) def main_effect(self, reference=None): """ - Return the 'main effect' columns of a Factor, choosing + Return the 'main effect' columns of a factor, choosing a reference column number to remove. """ @@ -181,12 +185,12 @@ keep.pop(reference) __names = self.names() _names = ['%s-%s' % (__names[keep[i]], __names[reference]) for i in range(len(keep))] - return Term(_names, func=func, termname='%s:maineffect' % self.termname) + return term(_names, func=func, termname='%s:maineffect' % self.termname) -class Quantitative(Term): +class quantitative(term): """ - A subclass of Term that presumes namespace[self.termname] is + A subclass of term that presumes namespace[self.termname] is an ndarray. Basically used for __pow__ method and (looking forward) for splines. @@ -195,7 +199,7 @@ def __pow__(self, power): """ - Raise the quantitative Term's values to an integer power, i.e. + Raise the quantitative term's values to an integer power, i.e. polynomial. """ try: @@ -211,13 +215,13 @@ def func(obj=self, namespace=terms, power=power, **extra): x = N.asarray(obj(namespace=namespace, **extra)) return N.power(x, power) - value = Term(name, func=func) + value = term(name, func=func) value.power = power return value -class FuncQuant(Quantitative): +class func_quant(quantitative): """ - A Term for a quantitative function of a Term. + A term for a quantitative function of a term. """ counter = 0 @@ -238,18 +242,18 @@ except: termname = 'f%d(%s)' % (FuncQuant.counter, quant.name) FuncQuant.counter += 1 - Term.__init__(self, termname, func=func) + term.__init__(self, termname, func=func) -class Formula: +class formula: """ - A Formula object for manipulating design matrices in regression models, - essentially consisting of a list of Term instances. + A formula object for manipulating design matrices in regression models, + essentially consisting of a list of term instances. The object supports addition and multiplication which correspond to concatenation and pairwise multiplication, respectively, - of the columns of the two Formulas. + of the columns of the two formulas. """ def _terms_changed(self): @@ -258,19 +262,19 @@ def __init__(self, terms): """ - Create a Formula from either: + Create a formula from either: - i) a Formula object - ii) a sequence of Term instances - iii) one Term + i) a formula object + ii) a sequence of term instances + iii) one term """ - if isinstance(terms, Formula): + if isinstance(terms, formula): self.terms = copy.copy(list(terms.terms)) elif type(terms) is types.ListType: self.terms = terms - elif isinstance(terms, Term): + elif isinstance(terms, term): self.terms = [terms] else: raise ValueError @@ -279,18 +283,18 @@ def __str__(self): """ - String representation of list of termnames of a Formula. + String representation of list of termnames of a formula. """ value = [] for term in self.terms: value += [term.termname] return '' % ' + '.join(value) - def __call__(self, namespace=terms, nrow=-1, **extra): + def __call__(self, namespace=terms, nrow=-1, args=(), kw={}): """ - Create (transpose) of the design matrix of the Formula within - namespace. Extra arguments are passed to each Term instance. If - the Formula just contains an intercept, then the keyword + Create (transpose) of the design matrix of the formula within + namespace. Extra arguments are passed to each term instance. If + the formula just contains an intercept, then the keyword argument 'n' indicates the number of rows (observations). """ @@ -299,7 +303,7 @@ allvals = [] intercept = False for term in self.terms: - val = term(namespace=namespace, **extra) + val = term(namespace=namespace, args=args, kw=kw) if term.termname == 'intercept': intercept = True elif val.ndim == 1: @@ -330,7 +334,7 @@ Determine whether a given term is in a formula. """ - if not isinstance(term, Formula): + if not isinstance(term, formula): return term.termname in self.termnames() elif len(term.terms) == 1: term = term.terms[0] @@ -365,7 +369,7 @@ def names(self): """ - Return a list of the names in the Formula. The order of the + Return a list of the names in the formula. The order of the names corresponds to the order of the columns when self is evaluated. """ @@ -378,7 +382,7 @@ def termnames(self): """ Return a list of the term names in the formula. These - are the names of each Term instance in self. + are the names of each term instance in self. """ names = [] @@ -386,21 +390,21 @@ names += [term.termname] return names - def design(self, namespace=terms, **keywords): + def design(self, namespace=terms, args=(), kw={}): """ transpose(self(namespace=namespace, **keywords)) """ - return N.transpose(self(namespace=namespace, **keywords)) + return self(namespace=namespace, args=args, kw=kw).T def __mul__(self, other, nested=False): """ - This returns a Formula whose columns are the pairwise + This returns a formula whose columns are the pairwise product of the columns of self and other. TO DO: check for nesting relationship. Should not be too difficult. """ - other = Formula(other) + other = formula(other) selftermnames = self.termnames() othertermnames = other.termnames() @@ -423,9 +427,9 @@ othernames = other.terms[j].names() if self.terms[i].name is 'intercept': - term = other.terms[j] + _term = other.terms[j] elif other.terms[j].name is 'intercept': - term = self.terms[i] + _term = self.terms[i] else: names = [] @@ -451,36 +455,36 @@ value.append(selfval[r] * otherval[s]) return N.array(value) - term = Term(names, func=func, termname=termname) - terms.append(term) + _term = term(names, func=func, termname=termname) + terms.append(_term) - return Formula(terms) + return formula(terms) def __add__(self, other): """ - Return a Formula whose columns are the + Return a formula whose columns are the concatenation of the columns of self and other. - Terms in the formula are sorted alphabetically. + terms in the formula are sorted alphabetically. """ - other = Formula(other) + other = formula(other) terms = self.terms + other.terms pieces = [(term.name, term) for term in terms] pieces.sort() terms = [piece[1] for piece in pieces] - return Formula(terms) + return formula(terms) def __sub__(self, other): """ - Return a Formula with all terms in other removed from self. - If other contains Term instances not in Formula, this + Return a formula with all terms in other removed from self. + If other contains term instances not in formula, this function does not raise an exception. """ - other = Formula(other) + other = formula(other) terms = copy.copy(self.terms) for term in other.terms: @@ -488,7 +492,7 @@ if terms[i].termname == term.termname: terms.pop(i) break - return Formula(terms) + return formula(terms) def isnested(A, B, namespace=globals()): """ @@ -524,9 +528,9 @@ def _intercept_fn(nrow=1, **extra): return N.ones((1,nrow)) -I = Term('intercept', func=_intercept_fn) +I = term('intercept', func=_intercept_fn) I.__doc__ = """ -Intercept term in a Formula. If intercept is the +Intercept term in a formula. If intercept is the only term in the formula, then a keywords argument \'nrow\' is needed. @@ -536,7 +540,7 @@ >>> I(nrow=5) array([1, 1, 1, 1, 1]) ->>> f=Formula(I) +>>> f=formula(I) >>> f(nrow=5) array([1, 1, 1, 1, 1]) Modified: trunk/Lib/sandbox/models/tests/test_regression.py =================================================================== --- trunk/Lib/sandbox/models/tests/test_regression.py 2006-11-07 09:07:45 UTC (rev 2307) +++ trunk/Lib/sandbox/models/tests/test_regression.py 2006-11-07 21:50:20 UTC (rev 2308) @@ -1,6 +1,6 @@ import unittest from numpy.random import standard_normal -from scipy.sandbox.models.regression import OLSModel, ARModel +from scipy.sandbox.models.regression import ols_model, ar_model from numpy.testing import * W = standard_normal @@ -10,14 +10,14 @@ def testOLS(self): X = W((40,10)) Y = W((40,)) - model = OLSModel(design=X) + model = ols_model(design=X) results = model.fit(Y) self.assertEquals(results.df_resid, 30) def testAR(self): X = W((40,10)) Y = W((40,)) - model = ARModel(design=X, rho=0.4) + model = ar_model(design=X, rho=0.4) results = model.fit(Y) self.assertEquals(results.df_resid, 30) @@ -25,7 +25,7 @@ X = W((40,10)) X[:,0] = X[:,1] + X[:,2] Y = W((40,)) - model = OLSModel(design=X) + model = ols_model(design=X) results = model.fit(Y) self.assertEquals(results.df_resid, 31) @@ -33,14 +33,10 @@ X = W((40,10)) X[:,0] = X[:,1] + X[:,2] Y = W((40,)) - model = ARModel(design=X, rho=0.9) + model = ar_model(design=X, rho=0.9) results = model.fit(Y) self.assertEquals(results.df_resid, 31) -def suite(): - suite = unittest.makeSuite(RegressionTest) - return suite - if __name__ == '__main__': ScipyTest.run() Modified: trunk/Lib/sandbox/models/tests/test_robust.py =================================================================== --- trunk/Lib/sandbox/models/tests/test_robust.py 2006-11-07 09:07:45 UTC (rev 2307) +++ trunk/Lib/sandbox/models/tests/test_robust.py 2006-11-07 21:50:20 UTC (rev 2308) @@ -1,4 +1,4 @@ -import models as S +import scipy.sandbox.models as S import unittest import numpy.random as R import numpy as N @@ -10,7 +10,7 @@ def testRobust(self): X = W((40,10)) Y = W((40,)) - model = S.rlm.RobustLinearModel(design=X) + model = S.rlm(design=X) results = model.fit(Y) self.assertEquals(results.df_resid, 30) @@ -18,15 +18,10 @@ X = W((40,10)) X[:,0] = X[:,1] + X[:,2] Y = W((40,)) - model = S.rlm.RobustLinearModel(design=X) + model = S.rlm(design=X) results = model.fit(Y) self.assertEquals(results.df_resid, 31) -def suite(): - suite = unittest.makeSuite(RegressionTest) - return suite - - if __name__ == '__main__': unittest.main() From scipy-svn at scipy.org Tue Nov 7 17:07:29 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 7 Nov 2006 16:07:29 -0600 (CST) Subject: [Scipy-svn] r2309 - in trunk/Lib/sandbox/models: . tests Message-ID: <20061107220729.51EE039C04D@new.scipy.org> Author: jonathan.taylor Date: 2006-11-07 16:04:53 -0600 (Tue, 07 Nov 2006) New Revision: 2309 Modified: trunk/Lib/sandbox/models/__init__.py trunk/Lib/sandbox/models/glm.py trunk/Lib/sandbox/models/regression.py trunk/Lib/sandbox/models/rlm.py trunk/Lib/sandbox/models/tests/test_formula.py trunk/Lib/sandbox/models/tests/test_glm.py trunk/Lib/sandbox/models/tests/test_regression.py trunk/Lib/sandbox/models/tests/test_utils.py Log: more name change fixes, changed names in tests, too Modified: trunk/Lib/sandbox/models/__init__.py =================================================================== --- trunk/Lib/sandbox/models/__init__.py 2006-11-07 21:50:20 UTC (rev 2308) +++ trunk/Lib/sandbox/models/__init__.py 2006-11-07 22:04:53 UTC (rev 2309) @@ -3,11 +3,10 @@ import regression import robust import family -from glm import Model as glm -from rlm import Model as rlm +from glm import model as glm +from rlm import model as rlm - import unittest def suite(): return unittest.TestSuite([tests.suite()]) Modified: trunk/Lib/sandbox/models/glm.py =================================================================== --- trunk/Lib/sandbox/models/glm.py 2006-11-07 21:50:20 UTC (rev 2308) +++ trunk/Lib/sandbox/models/glm.py 2006-11-07 22:04:53 UTC (rev 2309) @@ -1,8 +1,8 @@ import numpy as N from scipy.sandbox.models import family -from scipy.sandbox.models.regression import WLSModel +from scipy.sandbox.models.regression import wls_model -class Model(WLSModel): +class model(wls_model): niter = 10 @@ -20,7 +20,7 @@ """ Return (unnormalized) log-likelihood for glm. - Note that self.scale is interpreted as a variance in OLSModel, so + Note that self.scale is interpreted as a variance in old_model, so we divide the residuals by its sqrt. """ if results is None: @@ -32,7 +32,7 @@ self.weights = self.family.weights(results.mu) self.initialize(self.design) Z = results.predict + self.family.link.deriv(results.mu) * (Y - results.mu) - newresults = WLSModel.fit(self, Z) + newresults = wls_model.fit(self, Z) newresults.mu = self.family.link.inverse(newresults.predict) self.iter += 1 return newresults @@ -41,7 +41,7 @@ """ Continue iterating, or has convergence been obtained? """ - if self.iter >= Model.niter: + if self.iter >= model.niter: return False curdev = self.deviance(results=results) @@ -67,7 +67,7 @@ self.Y = N.asarray(Y, N.float64) iter(self) - self.results = WLSModel.fit(self, self.family.link(Y, initialize=True)) + self.results = wls_model.fit(self, self.family.link.initialize(Y)) self.results.mu = self.family.link.inverse(self.results.predict) self.scale = self.results.scale = self.estimate_scale() Modified: trunk/Lib/sandbox/models/regression.py =================================================================== --- trunk/Lib/sandbox/models/regression.py 2006-11-07 21:50:20 UTC (rev 2308) +++ trunk/Lib/sandbox/models/regression.py 2006-11-07 22:04:53 UTC (rev 2309) @@ -4,7 +4,7 @@ from scipy.sandbox.models.model import LikelihoodModel, LikelihoodModelResults from scipy.sandbox.models import utils -class OLSModel(LikelihoodModel): +class ols_model(LikelihoodModel): """ A simple ordinary least squares model. @@ -62,7 +62,7 @@ return lfit -class ARModel(OLSModel): +class ar_model(ols_model): """ A regression model with an AR(1) covariance structure. @@ -79,7 +79,7 @@ factor = 1. / N.sqrt(1 - self.rho**2) return N.concatenate([[X[0]], (X[1:] - self.rho * X[0:-1]) * factor]) -class WLSModel(ARModel): +class wls_model(ar_model): """ A regression model with diagonal but non-identity covariance Modified: trunk/Lib/sandbox/models/rlm.py =================================================================== --- trunk/Lib/sandbox/models/rlm.py 2006-11-07 21:50:20 UTC (rev 2308) +++ trunk/Lib/sandbox/models/rlm.py 2006-11-07 22:04:53 UTC (rev 2309) @@ -1,9 +1,9 @@ import numpy as N -from scipy.sandbox.models.regression import WLSModel +from scipy.sandbox.models.regression import wls_model from scipy.sandbox.models.robust import norms, scale -class Model(WLSModel): +class model(wls_model): niter = 20 scale_est = 'MAD' @@ -22,7 +22,7 @@ """ Return (unnormalized) log-likelihood from M estimator. - Note that self.scale is interpreted as a variance in OLSModel, so + Note that self.scale is interpreted as a variance in ols_model, so we divide the residuals by its sqrt. """ if results is None: @@ -32,7 +32,7 @@ def next(self, results): self.weights = self.M.weights((results.Y - results.predict) / N.sqrt(results.scale)) self.initialize(self.design) - results = WLSModel.fit(self, results.Y) + results = wls_model.fit(self, results.Y) self.scale = results.scale = self.estimate_scale(results) self.iter += 1 return results @@ -41,7 +41,7 @@ """ Continue iterating, or has convergence been obtained? """ - if self.iter >= Model.niter: + if self.iter >= model.niter: return False curdev = self.deviance(results) @@ -53,7 +53,7 @@ def estimate_scale(self, results): """ - Note that self.scale is interpreted as a variance in OLSModel, so + Note that self.scale is interpreted as a variance in ols_model, so we return MAD(resid)**2 by default. """ resid = results.Y - results.predict @@ -67,7 +67,7 @@ def fit(self, Y, **keywords): iter(self) - self.results = WLSModel.fit(self, Y) + self.results = wls_model.fit(self, Y) self.scale = self.results.scale = self.estimate_scale(self.results) while self.cont(self.results): Modified: trunk/Lib/sandbox/models/tests/test_formula.py =================================================================== --- trunk/Lib/sandbox/models/tests/test_formula.py 2006-11-07 21:50:20 UTC (rev 2308) +++ trunk/Lib/sandbox/models/tests/test_formula.py 2006-11-07 22:04:53 UTC (rev 2309) @@ -7,42 +7,42 @@ from scipy.sandbox.models import utils, formula, contrast -class test_Term(ScipyTestCase): +class test_term(unittest.TestCase): def test_init(self): - t1 = formula.Term("trivial") + t1 = formula.term("trivial") sqr = lambda x: x*x - t2 = formula.Term("not_so_trivial", sqr, "sqr") + t2 = formula.term("not_so_trivial", sqr, "sqr") - self.assertRaises(ValueError, formula.Term, "name", termname=0) + self.assertRaises(ValueError, formula.term, "name", termname=0) def test_str(self): - t = formula.Term("name") + t = formula.term("name") s = str(t) def test_add(self): - t1 = formula.Term("t1") - t2 = formula.Term("t2") + t1 = formula.term("t1") + t2 = formula.term("t2") f = t1 + t2 - self.assert_(isinstance(f, formula.Formula)) + self.assert_(isinstance(f, formula.formula)) self.assert_(f.hasterm(t1)) self.assert_(f.hasterm(t2)) def test_mul(self): - t1 = formula.Term("t1") - t2 = formula.Term("t2") + t1 = formula.term("t1") + t2 = formula.term("t2") f = t1 * t2 - self.assert_(isinstance(f, formula.Formula)) + self.assert_(isinstance(f, formula.formula)) - intercept = formula.Term("intercept") + intercept = formula.term("intercept") f = t1 * intercept - self.assertEqual(str(f), str(formula.Formula(t1))) + self.assertEqual(str(f), str(formula.formula(t1))) f = intercept * t1 - self.assertEqual(str(f), str(formula.Formula(t1))) + self.assertEqual(str(f), str(formula.formula(t1))) -class test_Formula(ScipyTestCase): +class test_formula(ScipyTestCase): def setUp(self): self.X = R.standard_normal((40,10)) @@ -51,7 +51,7 @@ for i in range(10): name = '%s' % string.uppercase[i] self.namespace[name] = self.X[:,i] - self.terms.append(formula.Term(name)) + self.terms.append(formula.term(name)) self.formula = self.terms[0] for i in range(1, 10): @@ -86,7 +86,7 @@ def test_contrast2(self): - dummy = formula.Term('zero') + dummy = formula.term('zero') self.namespace['zero'] = N.zeros((40,), N.float64) term = dummy + self.terms[2] c = contrast.Contrast(term, self.formula) @@ -99,7 +99,7 @@ X = self.formula.design(namespace=self.namespace) P = N.dot(X, L.pinv(X)) - dummy = formula.Term('noise') + dummy = formula.term('noise') resid = N.identity(40) - P self.namespace['noise'] = N.transpose(N.dot(resid, R.standard_normal((40,5)))) term = dummy + self.terms[2] @@ -120,9 +120,9 @@ self.assertEquals(estimable, False) def suite(): - suite = unittest.makeSuite(FormulaTest) + suite = unittest.makeSuite(formulaTest) return suite if __name__ == '__main__': - ScipyTest.run() + unittest.main() Modified: trunk/Lib/sandbox/models/tests/test_glm.py =================================================================== --- trunk/Lib/sandbox/models/tests/test_glm.py 2006-11-07 21:50:20 UTC (rev 2308) +++ trunk/Lib/sandbox/models/tests/test_glm.py 2006-11-07 22:04:53 UTC (rev 2309) @@ -3,7 +3,7 @@ import numpy.random as R import numpy as N from numpy.testing import * -from scipy.sandbox.models.glm import Model +from scipy.sandbox.models.glm import model W = R.standard_normal @@ -14,8 +14,8 @@ X = W((40,10)) Y = N.greater(W((40,)), 0) family = S.family.Binomial() - model = Model(design=X, family=S.family.Binomial()) - results = model.fit(Y) + cmodel = model(design=X, family=S.family.Binomial()) + results = cmodel.fit(Y) self.assertEquals(results.df_resid, 30) def check_Logisticdegenerate(self): @@ -23,8 +23,8 @@ X[:,0] = X[:,1] + X[:,2] Y = N.greater(W((40,)), 0) family = S.family.Binomial() - model = Model(design=X, family=S.family.Binomial()) - results = model.fit(Y) + cmodel = model(design=X, family=S.family.Binomial()) + results = cmodel.fit(Y) self.assertEquals(results.df_resid, 31) Modified: trunk/Lib/sandbox/models/tests/test_regression.py =================================================================== --- trunk/Lib/sandbox/models/tests/test_regression.py 2006-11-07 21:50:20 UTC (rev 2308) +++ trunk/Lib/sandbox/models/tests/test_regression.py 2006-11-07 22:04:53 UTC (rev 2309) @@ -5,7 +5,7 @@ W = standard_normal -class test_Regression(ScipyTestCase): +class test_Regression(unittest.TestCase): def testOLS(self): X = W((40,10)) @@ -39,4 +39,4 @@ if __name__ == '__main__': - ScipyTest.run() + unittest.main() Modified: trunk/Lib/sandbox/models/tests/test_utils.py =================================================================== --- trunk/Lib/sandbox/models/tests/test_utils.py 2006-11-07 21:50:20 UTC (rev 2308) +++ trunk/Lib/sandbox/models/tests/test_utils.py 2006-11-07 22:04:53 UTC (rev 2309) @@ -5,7 +5,7 @@ from numpy.testing import * from scipy.sandbox.models import utils -class test_Utils(ScipyTestCase): +class test_Utils(unittest.TestCase): def test_recipr(self): X = N.array([[2,1],[-1,0]]) @@ -52,4 +52,4 @@ self.assertRaises(ValueError, utils.StepFunction, x, y) if __name__ == '__main__': - ScipyTest.run() + unittest.main() From scipy-svn at scipy.org Tue Nov 7 19:27:40 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 7 Nov 2006 18:27:40 -0600 (CST) Subject: [Scipy-svn] r2310 - trunk/Lib Message-ID: <20061108002740.387C539C070@new.scipy.org> Author: rkern Date: 2006-11-07 18:20:45 -0600 (Tue, 07 Nov 2006) New Revision: 2310 Modified: trunk/Lib/__init__.py Log: Delete the numpy linalg from the __init__.py so that scipy.linalg can be imported. Modified: trunk/Lib/__init__.py =================================================================== --- trunk/Lib/__init__.py 2006-11-07 22:04:53 UTC (rev 2309) +++ trunk/Lib/__init__.py 2006-11-08 00:20:45 UTC (rev 2310) @@ -47,6 +47,9 @@ numpy name space """ del _num +# Remove the linalg imported from numpy so that the scipy.linalg package can be +# imported. +del linalg from __config__ import show as show_config from version import version as __version__ From scipy-svn at scipy.org Wed Nov 8 01:01:36 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 8 Nov 2006 00:01:36 -0600 (CST) Subject: [Scipy-svn] r2311 - trunk/Lib/sandbox/models Message-ID: <20061108060136.3156A39C04B@new.scipy.org> Author: timl Date: 2006-11-08 00:01:28 -0600 (Wed, 08 Nov 2006) New Revision: 2311 Modified: trunk/Lib/sandbox/models/cox.py Log: cosmetic fixes Modified: trunk/Lib/sandbox/models/cox.py =================================================================== --- trunk/Lib/sandbox/models/cox.py 2006-11-08 00:20:45 UTC (rev 2310) +++ trunk/Lib/sandbox/models/cox.py 2006-11-08 06:01:28 UTC (rev 2311) @@ -100,7 +100,6 @@ self.risk[t] = N.compress([s.atrisk(t) for s in self.subjects], N.arange(self.design[t].shape[0]),axis=-1) def __del__(self): - shutil.rmtree(self.cachedir, ignore_errors=True) def logL(self, b, ties='breslow'): @@ -195,7 +194,7 @@ n = 100 X = N.array([0]*n + [1]*n) b = 0.4 - lin = 1. + b * X + lin = 1 + b*X Y = R.standard_exponential((2*n,)) / lin delta = R.binomial(1, 0.9, size=(2*n,)) From scipy-svn at scipy.org Wed Nov 8 01:02:40 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 8 Nov 2006 00:02:40 -0600 (CST) Subject: [Scipy-svn] r2312 - trunk/Lib/sandbox/models Message-ID: <20061108060240.4C07B39C04B@new.scipy.org> Author: timl Date: 2006-11-08 00:02:34 -0600 (Wed, 08 Nov 2006) New Revision: 2312 Modified: trunk/Lib/sandbox/models/model.py trunk/Lib/sandbox/models/rlm.py Log: remove unused/needed **keywords args Modified: trunk/Lib/sandbox/models/model.py =================================================================== --- trunk/Lib/sandbox/models/model.py 2006-11-08 06:01:28 UTC (rev 2311) +++ trunk/Lib/sandbox/models/model.py 2006-11-08 06:02:34 UTC (rev 2312) @@ -11,10 +11,10 @@ but lays out the methods expected of any subclass. """ - def __init__(self, **keywords): + def __init__(self): pass - def initialize(self, **keywords): + def initialize(self): """ Initialize (possibly re-initialize) a Model instance. For instance, the design matrix of a linear model may change @@ -22,20 +22,20 @@ """ raise NotImplementedError - def fit(self, **keywords): + def fit(self): """ Fit a model to data. """ raise NotImplementedError - def predict(self, **keywords): + def predict(self, design=None): """ After a model has been fit, results are (assumed to be) stored in self.results, which itself should have a predict method. """ - self.results.predict(**keywords) + self.results.predict(design) - def view(self, **keywords): + def view(self): """ View results of a model. """ @@ -43,27 +43,27 @@ class LikelihoodModel(Model): - def logL(self, theta, **extra): + def logL(self, theta): """ Log-likelihood of model. """ raise NotImplementedError - def score(self, theta, **extra): + def score(self, theta): """ Score function of model = gradient of logL with respect to theta. """ raise NotImplementedError - def information(self, theta, **extra): + def information(self, theta): """ Score function of model = - Hessian of logL with respect to theta. """ raise NotImplementedError - def newton(self, theta, **extra): + def newton(self, theta): def f(theta): return -self.logL(theta) self.results = scipy.optimize.fmin(f, theta) Modified: trunk/Lib/sandbox/models/rlm.py =================================================================== --- trunk/Lib/sandbox/models/rlm.py 2006-11-08 06:01:28 UTC (rev 2311) +++ trunk/Lib/sandbox/models/rlm.py 2006-11-08 06:02:34 UTC (rev 2312) @@ -8,7 +8,7 @@ niter = 20 scale_est = 'MAD' - def __init__(self, design, M=norms.Hampel(), **keywords): + def __init__(self, design, M=norms.Hampel()): self.M = M self.weights = 1 self.initialize(design) @@ -64,7 +64,7 @@ else: return scale.scale_est(self, resid)**2 - def fit(self, Y, **keywords): + def fit(self, Y): iter(self) self.results = wls_model.fit(self, Y) From scipy-svn at scipy.org Wed Nov 8 01:04:30 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 8 Nov 2006 00:04:30 -0600 (CST) Subject: [Scipy-svn] r2313 - trunk/Lib/sandbox/models Message-ID: <20061108060430.1218739C04B@new.scipy.org> Author: timl Date: 2006-11-08 00:04:22 -0600 (Wed, 08 Nov 2006) New Revision: 2313 Modified: trunk/Lib/sandbox/models/glm.py trunk/Lib/sandbox/models/mixed.py Log: remove unused/needed **keywords args Modified: trunk/Lib/sandbox/models/glm.py =================================================================== --- trunk/Lib/sandbox/models/glm.py 2006-11-08 06:02:34 UTC (rev 2312) +++ trunk/Lib/sandbox/models/glm.py 2006-11-08 06:04:22 UTC (rev 2313) @@ -6,7 +6,7 @@ niter = 10 - def __init__(self, design, family=family.Gaussian(), **keywords): + def __init__(self, design, family=family.Gaussian()): self.family = family self.weights = 1 self.initialize(design) @@ -59,12 +59,12 @@ if results is None: results = self.results - if Y is None: Y = self.Y + if Y is None: + Y = self.Y resid = Y - results.mu return (N.power(resid, 2) / self.family.variance(results.mu)).sum() / results.df_resid - def fit(self, Y, **keywords): - + def fit(self, Y): self.Y = N.asarray(Y, N.float64) iter(self) self.results = wls_model.fit(self, self.family.link.initialize(Y)) Modified: trunk/Lib/sandbox/models/mixed.py =================================================================== --- trunk/Lib/sandbox/models/mixed.py 2006-11-08 06:02:34 UTC (rev 2312) +++ trunk/Lib/sandbox/models/mixed.py 2006-11-08 06:04:22 UTC (rev 2313) @@ -144,7 +144,7 @@ Vol. 82, No. 397. (Mar., 1987), pp. 97-105. """ - def __init__(self, units, response, fixed=I, random=I, **extra): + def __init__(self, units, response, fixed=I, random=I): self.units = units self.m = len(self.units) From scipy-svn at scipy.org Wed Nov 8 01:08:12 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 8 Nov 2006 00:08:12 -0600 (CST) Subject: [Scipy-svn] r2314 - trunk/Lib/sandbox/models Message-ID: <20061108060812.634E839C04B@new.scipy.org> Author: timl Date: 2006-11-08 00:07:57 -0600 (Wed, 08 Nov 2006) New Revision: 2314 Modified: trunk/Lib/sandbox/models/regression.py Log: cleanup keyword args and fix class structure Modified: trunk/Lib/sandbox/models/regression.py =================================================================== --- trunk/Lib/sandbox/models/regression.py 2006-11-08 06:04:22 UTC (rev 2313) +++ trunk/Lib/sandbox/models/regression.py 2006-11-08 06:07:57 UTC (rev 2314) @@ -10,14 +10,14 @@ A simple ordinary least squares model. """ - def logL(self, b, Y, **extra): + def logL(self, b, Y): return -scipy.linalg.norm(self.whiten(Y) - N.dot(self.wdesign, b))**2 / 2. - def __init__(self, design, **keywords): - LikelihoodModel.__init__(self, **keywords) + def __init__(self, design): + LikelihoodModel.__init__(self) self.initialize(design) - def initialize(self, design, **keywords): + def initialize(self, design): self.design = design self.wdesign = self.whiten(design) self.calc_beta = L.pinv(self.wdesign) @@ -41,7 +41,7 @@ lfit.predict = N.dot(self.design, lfit.beta) - def fit(self, Y, **keywords): + def fit(self, Y): """ Full \'fit\' of the model including estimate of covariance matrix, (whitened) residuals and scale. @@ -70,16 +70,16 @@ determine the self.whiten method from AR(p) parameters. """ - def __init__(self, design, rho=0, **keywords): - LikelihoodModel.__init__(self, **keywords) + def __init__(self, design, rho=0): self.rho = rho - self.initialize(design) + ols_model.__init__(self, design) + def whiten(self, X): factor = 1. / N.sqrt(1 - self.rho**2) return N.concatenate([[X[0]], (X[1:] - self.rho * X[0:-1]) * factor]) -class wls_model(ar_model): +class wls_model(ols_model): """ A regression model with diagonal but non-identity covariance @@ -88,11 +88,11 @@ """ - def __init__(self, design, weights=1, **keywords): - LikelihoodModel.__init__(self, **keywords) + def __init__(self, design, weights=1): self.weights = weights - self.initialize(design) + ols_model.__init__(self, design) + def whiten(self, X): if X.ndim == 1: return X * N.sqrt(self.weights) @@ -125,14 +125,13 @@ raise ValueError, 'need normalized residuals to estimate standard deviation' sdd = utils.recipr(self.sd) / N.sqrt(self.df) - norm_resid = self.resid * N.multiply.outer(N.ones(self.Y.shape[0]), sdd) - return norm_resid + return self.resid * N.multiply.outer(N.ones(self.Y.shape[0]), sdd) + def predict(self, design): """ Return fitted values from a design matrix. """ - return N.dot(design, self.beta) def Rsq(self, adjusted=False): From scipy-svn at scipy.org Thu Nov 9 11:00:44 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 9 Nov 2006 10:00:44 -0600 (CST) Subject: [Scipy-svn] r2315 - in trunk/Lib/sandbox/models: . tests Message-ID: <20061109160044.E03BA39C11D@new.scipy.org> Author: jonathan.taylor Date: 2006-11-09 10:00:41 -0600 (Thu, 09 Nov 2006) New Revision: 2315 Modified: trunk/Lib/sandbox/models/contrast.py trunk/Lib/sandbox/models/formula.py trunk/Lib/sandbox/models/tests/test_formula.py Log: refactored formula -- namespace is now explicitly assigned to terms and formula, in a call it is passed down the calling hierarchy Modified: trunk/Lib/sandbox/models/contrast.py =================================================================== --- trunk/Lib/sandbox/models/contrast.py 2006-11-08 06:07:57 UTC (rev 2314) +++ trunk/Lib/sandbox/models/contrast.py 2006-11-09 16:00:41 UTC (rev 2315) @@ -1,6 +1,8 @@ import numpy as N from numpy.linalg import pinv from scipy.sandbox.models import utils +from scipy.sandbox.models.formula import formula as formula_class +from scipy.sandbox.models.formula import term as term_class class ContrastResults: """ @@ -66,7 +68,7 @@ return '' % \ `{'term':str(self.term), 'formula':str(self.formula)}` - def getmatrix(self, evaldesign=True, **keywords): + def getmatrix(self, *args, **kw): """ Construct a contrast matrix C so that @@ -78,23 +80,23 @@ then evaldesign can be set to False. """ - T = N.transpose(N.array(self.term(**keywords))) + self.term.namespace = self.formula.namespace + T = N.transpose(N.array(self.term(*args, **kw))) if T.ndim == 1: T.shape = (T.shape[0], 1) - T = utils.clean0(T) + self.T = utils.clean0(T) - if evaldesign: - self.D = self.formula.design(**keywords) - self.pinv = pinv(self.D) + self.D = self.formula.design(*args, **kw) - self.matrix = contrastfromcols(T, self.D) + self.matrix = contrastfromcols(self.T, self.D) try: self.rank = self.matrix.shape[1] except: self.rank = 1 + def contrastfromcols(T, D, pseudo=None): """ From an n x p design matrix D and a matrix T, tries Modified: trunk/Lib/sandbox/models/formula.py =================================================================== --- trunk/Lib/sandbox/models/formula.py 2006-11-08 06:07:57 UTC (rev 2314) +++ trunk/Lib/sandbox/models/formula.py 2006-11-09 16:00:41 UTC (rev 2315) @@ -2,23 +2,45 @@ import types import numpy as N -terms = {} +default_namespace = {} -class term: +class term(object): """ This class is very simple: it is just a named term in a model formula. It is also callable: by default it namespace[self.name], where namespace - defaults to formula.terms, to which variables are added on - instantiation. + defaults to formula.default_namespace. + When called in an instance of formula, + the namespace used is that formula's namespace. """ - def __init__(self, name, func=None, termname=None, namespace={}): + def __pow__(self, power): + """ + Raise the quantitative term's values to an integer power, i.e. + polynomial. + """ + + try: + power = float(power) + except: + raise ValueError, 'expecting a float' + + if power == int(power): + name = '%s^%d' % (self.name, int(power)) + else: + name = '%s^%0.2f' % (self.name, power) + + value = quantitative(name, func=self, transform=lambda x: N.power(x, power)) + value.power = power + value.namespace = self.namespace + return value + + def __init__(self, name, func=None, termname=None): self.name = name - + self.__namespace = None if termname is None: self.termname = name else: @@ -29,8 +51,13 @@ if func: self.func = func - namespace[self.termname] = self + # Namespace in which self.name will be looked up in, if needed + def _get_namespace(self): return self.__namespace or default_namespace + def _set_namespace(self, value): self.__namespace = value + def _del_namespace(self): del self.__namespace + namespace = property(_get_namespace, _set_namespace, _del_namespace) + def __str__(self): """ '' % self.termname @@ -41,8 +68,10 @@ """ formula(self) + formula(other) """ - other = formula(other) - return other + self + other = formula(other, namespace=self.namespace) + f = other + self + f.namespace = self.namespace + return f def __mul__(self, other): """ @@ -50,16 +79,18 @@ """ if other.name is 'intercept': - return formula(self) + f = formula(self, namespace=self.namespace) elif self.name is 'intercept': - return formula(other) - - other = formula(other) - return other * self + f = formula(other, namespace=other.namespace) + else: + other = formula(other, namespace=self.namespace) + f = other * self + f.namespace = self.namespace + return f def names(self): """ - Return the names of the columns in design associated to self, + Return the names of the columns in design associated to the terms, i.e. len(self.names()) = self().shape[0]. """ if type(self.name) is types.StringType: @@ -67,28 +98,28 @@ else: return list(self.name) - def __call__(self, namespace=terms, usefn=True, args=(), kw={}): + def __call__(self, *args, **kw): """ Return the columns associated to self in a design matrix. - The default behaviour is to return namespace[self.termname] - where namespace defaults to globals(). + If the term has no 'func' attribute, it returns + + self.namespace[self.termname] - If usefn, and self.func exists then return + else, it returns + + self.func(*args, **kw) - self.func(*args, **kw) - - with kw['namespace'] = namespace. """ - kw['namespace'] = namespace - if not hasattr(self, 'func') or not usefn: - val = namespace[self.termname] - if isinstance(val, formula): - val = val(*args, **kw) - elif callable(val): - val = val(*args, **kw) - else: - val = self.func(*args, **kw) + if not hasattr(self, 'func'): + val = self.namespace[self.termname] + else: + val = self.func + if callable(val): + if hasattr(val, "namespace"): + val.namespace = self.namespace + val = val(*args, **kw) + val = N.asarray(val) return N.squeeze(val) @@ -98,6 +129,8 @@ A categorical factor. """ + + def __init__(self, termname, keys, ordinal=False): """ factor is initialized with keys, representing all valid @@ -111,38 +144,48 @@ self.ordinal = ordinal if self.ordinal: - self._sort = True + name = self.name + else: + name = ['(%s==%s)' % (self.termname, str(key)) for key in self.keys] - def func(namespace=terms, key=key): - v = namespace[self._name] - # FIXME: n is not defined here - col = [float(self.keys.index(v[i])) for i in range(n)] - return N.array(col) - term.__init__(self, self.name, func=func) + term.__init__(self, name, termname=self.termname, func=self.get_columns) - else: - def func(namespace=terms): - v = namespace[self._name] - value = [] - for key in self.keys: - col = [float((v[i] == key)) for i in range(len(v))] - value.append(col) - return N.array(value) - term.__init__(self, ['(%s==%s)' % (self.termname, str(key)) for key in self.keys], func=func, termname=self.termname) + def get_columns(self, *args, **kw): + """ + Calling function for factor instance. + """ - def __call__(self, namespace=terms, values=False, args=(), kw={}): - """ - Return either the columns in the design matrix, or the - actual values of the factor, if values==True. - """ + v = self.namespace[self._name] + while True: + if callable(v): + if hasattr(v, "namespace"): + v.namespace = self.namespace + v = v(*args, **kw) + else: break - if namespace is None: - namespace = globals() - if not values: - return term.__call__(self, namespace=namespace, usefn=True, args=args, kw=kw) - else: - return term.__call__(self, namespace=namespace, usefn=False, args=args, kw=kw) + if self.ordinal: + col = [float(self.keys.index(v[i])) for i in range(len(self.keys))] + return N.array(col) + else: + n = len(v) + value = [] + for key in self.keys: + col = [float((v[i] == key)) for i in range(n)] + value.append(col) + return N.array(value) + + def values(self, *args, **kw): + """ + Return the keys of the factor, rather than the columns of the design + matrix. + """ + + del(self.func) + val = self(*args, **kw) + self.func = self.get_columns + return val + def verify(self, values): """ Verify that all values correspond to valid keys in self. @@ -155,11 +198,14 @@ """ formula(self) + formula(other) - When adding \'intercept\' to a factor, this just returns self. + When adding \'intercept\' to a factor, this just returns + + formula(self, namespace=self.namespace) + """ if other.name is 'intercept': - return formula(self) + return formula(self, namespace=self.namespace) else: return term.__add__(self, other) @@ -172,78 +218,58 @@ if reference is None: reference = 0 - def func(namespace=terms, reference=reference, names=self.names(), **keywords): - value = N.asarray(self(namespace=namespace, **keywords)) + names = self.names() + + def maineffect_func(value, reference=reference): rvalue = [] keep = range(value.shape[0]) keep.pop(reference) for i in range(len(keep)): rvalue.append(value[keep[i]] - value[reference]) - return rvalue + return N.array(rvalue) keep = range(len(self.names())) keep.pop(reference) __names = self.names() _names = ['%s-%s' % (__names[keep[i]], __names[reference]) for i in range(len(keep))] - return term(_names, func=func, termname='%s:maineffect' % self.termname) + value = quantitative(_names, func=self, + termname='%s:maineffect' % self.termname, + transform=maineffect_func) + value.namespace = self.namespace + return value class quantitative(term): """ - A subclass of term that presumes namespace[self.termname] is - an ndarray. + A subclass of term that can be used to apply point transformations + of another term, i.e. to take powers: - Basically used for __pow__ method and (looking forward) for splines. + >>> import numpy as N + >>> from scipy.sandbox.models import formula + >>> X = N.linspace(0,10,101) + >>> x = formula.term('X') + >>> x.namespace={'X':X} + >>> x2 = x**2 + >>> print N.allclose(x()**2, x2()) + True + >>> x3 = formula.quantitative('x2', func=x, transform=lambda x: x**2) + >>> x3.namespace = x.namespace + >>> print N.allclose(x()**2, x3()) + True """ - def __pow__(self, power): - """ - Raise the quantitative term's values to an integer power, i.e. - polynomial. - """ - try: - power = float(power) - except: - raise ValueError, 'expecting a float' + def __init__(self, name, func=None, termname=None, transform=lambda x: x): + self.transform = transform + term.__init__(self, name, func=func, termname=termname) - if power == int(power): - name = '%s^%d' % (self.name, int(power)) - else: - name = '%s^%0.2f' % (self.name, power) + def __call__(self, *args, **kw): + """ + A quantitative is just like term, except there is an additional + transformation: self.transfrom. + """ + return self.transform(term.__call__(self, *args, **kw)) - def func(obj=self, namespace=terms, power=power, **extra): - x = N.asarray(obj(namespace=namespace, **extra)) - return N.power(x, power) - value = term(name, func=func) - value.power = power - return value - -class func_quant(quantitative): - """ - A term for a quantitative function of a term. - """ - - counter = 0 - - def __init__(self, x, f): - """ - Return a term whose values are f(x(namespace=namespace)). - """ - - self.f = f - self.x = x - def func(namespace=terms, f=self.f): - x = namespace[x.name] - return f(x) - # FIXME: quant is not defined here. - try: - termname = '%s(%s)' % (f.func_name, quant.name) - except: - termname = 'f%d(%s)' % (FuncQuant.counter, quant.name) - FuncQuant.counter += 1 - term.__init__(self, termname, func=func) - class formula: """ @@ -256,11 +282,16 @@ of the columns of the two formulas. """ + def _get_namespace(self): return self.__namespace or default_namespace + def _set_namespace(self, value): self.__namespace = value + def _del_namespace(self): del self.__namespace + namespace = property(_get_namespace, _set_namespace, _del_namespace) + def _terms_changed(self): self._names = self.names() self._termnames = self.termnames() - def __init__(self, terms): + def __init__(self, termlist, namespace=default_namespace): """ Create a formula from either: @@ -270,12 +301,13 @@ """ - if isinstance(terms, formula): - self.terms = copy.copy(list(terms.terms)) - elif type(terms) is types.ListType: - self.terms = terms - elif isinstance(terms, term): - self.terms = [terms] + self.__namespace = namespace + if isinstance(termlist, formula): + self.terms = copy.copy(list(termlist.terms)) + elif type(termlist) is types.ListType: + self.terms = termlist + elif isinstance(termlist, term): + self.terms = [termlist] else: raise ValueError @@ -290,28 +322,38 @@ value += [term.termname] return '' % ' + '.join(value) - def __call__(self, namespace=terms, nrow=-1, args=(), kw={}): + def __call__(self, *args, **kw): + """ Create (transpose) of the design matrix of the formula within namespace. Extra arguments are passed to each term instance. If the formula just contains an intercept, then the keyword argument 'n' indicates the number of rows (observations). """ - - if namespace is None: - namespace = globals() + allvals = [] intercept = False - for term in self.terms: - val = term(namespace=namespace, args=args, kw=kw) - if term.termname == 'intercept': - intercept = True - elif val.ndim == 1: + iindex = 0 + for t in self.terms: + + t.namespace = self.namespace + val = t(*args, **kw) + + isintercept = False + if hasattr(t, "termname"): + if t.termname == 'intercept': + intercept = True + isintercept = True + interceptindex = iindex + allvals.append(None) + + if val.ndim == 1 and not isintercept: val.shape = (1, val.shape[0]) allvals.append(val) - else: + elif not isintercept: allvals.append(val) - + iindex += 1 + if not intercept: try: allvals = N.concatenate(allvals) @@ -319,9 +361,12 @@ pass else: if allvals != []: + if interceptindex > 0: + n = allvals[0].shape[1] + else: + n = allvals[1].shape[1] + allvals[interceptindex] = N.ones((1,n), N.float64) allvals = N.concatenate(allvals) - n = allvals.shape[1] - allvals = N.concatenate([N.ones((1,n), N.float64), allvals]) elif nrow <= 1: raise ValueError, 'with only intercept in formula, keyword \'nrow\' argument needed' else: @@ -390,11 +435,11 @@ names += [term.termname] return names - def design(self, namespace=terms, args=(), kw={}): + def design(self, *args, **kw): """ - transpose(self(namespace=namespace, **keywords)) + transpose(self(*args, **kw)) """ - return self(namespace=namespace, args=args, kw=kw).T + return self(*args, **kw).T def __mul__(self, other, nested=False): """ @@ -404,7 +449,7 @@ TO DO: check for nesting relationship. Should not be too difficult. """ - other = formula(other) + other = formula(other, namespace=self.namespace) selftermnames = self.termnames() othertermnames = other.termnames() @@ -428,37 +473,45 @@ if self.terms[i].name is 'intercept': _term = other.terms[j] + _term.namespace = other.namespace + elif other.terms[j].name is 'intercept': _term = self.terms[i] - + _term.namespace = self.namespace else: names = [] - for r in range(len(selfnames)): - for s in range(len(othernames)): + + d1 = len(selfnames) + d2 = len(othernames) + + for r in range(d1): + for s in range(d2): name = '%s*%s' % (str(selfnames[r]), str(othernames[s])) pieces = name.split('*') pieces.sort() name = '*'.join(pieces) names.append(name) - def func(namespace=terms, selfterm=self.terms[i], otherterm=other.terms[j], **extra): - value = [] - selfval = N.array(selfterm(namespace=namespace, **extra)) - if len(selfval.shape) == 1: - selfval.shape = (1, selfval.shape[0]) - otherval = N.array(otherterm(namespace=namespace, **extra)) - if len(otherval.shape) == 1: - otherval.shape = (1, otherval.shape[0]) + def product_func(value, d1=d1, d2=d2): - for r in range(selfval.shape[0]): - for s in range(otherval.shape[0]): - value.append(selfval[r] * otherval[s]) + out = [] + for r in range(d1): + for s in range(d2): + out.append(value[r] * value[d1+s]) + return N.array(out) - return N.array(value) - _term = term(names, func=func, termname=termname) + sumterms = self + other + sumterms.terms = [self, other] # enforce the order we want + sumterms.namespace = self.namespace + + _term = quantitative(names, func=sumterms, termname=termname, + transform=product_func) + _term.namespace = self.namespace + + terms.append(_term) - return formula(terms) + return formula(terms, namespace=self.namespace) def __add__(self, other): @@ -469,12 +522,12 @@ terms in the formula are sorted alphabetically. """ - other = formula(other) + other = formula(other, namespace=self.namespace) terms = self.terms + other.terms pieces = [(term.name, term) for term in terms] pieces.sort() terms = [piece[1] for piece in pieces] - return formula(terms) + return formula(terms, namespace=self.namespace) def __sub__(self, other): @@ -484,7 +537,7 @@ function does not raise an exception. """ - other = formula(other) + other = formula(other, namespace=self.namespace) terms = copy.copy(self.terms) for term in other.terms: @@ -492,7 +545,7 @@ if terms[i].termname == term.termname: terms.pop(i) break - return formula(terms) + return formula(terms, namespace=self.namespace) def isnested(A, B, namespace=globals()): """ Modified: trunk/Lib/sandbox/models/tests/test_formula.py =================================================================== --- trunk/Lib/sandbox/models/tests/test_formula.py 2006-11-08 06:07:57 UTC (rev 2314) +++ trunk/Lib/sandbox/models/tests/test_formula.py 2006-11-09 16:00:41 UTC (rev 2315) @@ -56,29 +56,51 @@ self.formula = self.terms[0] for i in range(1, 10): self.formula += self.terms[i] + self.formula.namespace = self.namespace def test_str(self): s = str(self.formula) def test_call(self): - x = self.formula(namespace=self.namespace) + x = self.formula() self.assertEquals(N.array(x).shape, (10, 40)) def test_design(self): - x = self.formula.design(namespace=self.namespace) + x = self.formula.design() self.assertEquals(x.shape, (40, 10)) def test_product(self): prod = self.terms[0] * self.terms[2] self.formula += prod - x = self.formula.design(namespace=self.namespace) + x = self.formula.design() + p = self.formula['A*C'] col = self.formula.termcolumns(prod, dict=False) assert_almost_equal(N.squeeze(x[:,col]), self.X[:,0] * self.X[:,2]) + assert_almost_equal(N.squeeze(p()), self.X[:,0] * self.X[:,2]) + + def test_intercept1(self): + prod = self.terms[0] * self.terms[2] + self.formula += formula.I + icol = self.formula.names().index('intercept') + assert_almost_equal(self.formula()[icol], N.ones((40,))) + def test_intercept2(self): + prod = self.terms[0] * self.terms[2] + self.formula += formula.I + icol = self.formula.names().index('intercept') + assert_almost_equal(self.formula()[icol], N.ones((40,))) + + def test_intercept3(self): + prod = self.terms[0] * formula.I + prod.namespace = self.formula.namespace + assert_almost_equal(N.squeeze(prod()), self.terms[0]()) + + + def test_contrast1(self): term = self.terms[0] + self.terms[2] c = contrast.Contrast(term, self.formula) - c.getmatrix(namespace=self.namespace) + c.getmatrix() col1 = self.formula.termcolumns(self.terms[0], dict=False) col2 = self.formula.termcolumns(self.terms[1], dict=False) test = [[1] + [0]*9, [0]*2 + [1] + [0]*7] @@ -90,24 +112,67 @@ self.namespace['zero'] = N.zeros((40,), N.float64) term = dummy + self.terms[2] c = contrast.Contrast(term, self.formula) - c.getmatrix(namespace=self.namespace) + c.getmatrix() test = [0]*2 + [1] + [0]*7 assert_almost_equal(c.matrix, test) def test_contrast3(self): - X = self.formula.design(namespace=self.namespace) + X = self.formula.design() P = N.dot(X, L.pinv(X)) dummy = formula.term('noise') resid = N.identity(40) - P self.namespace['noise'] = N.transpose(N.dot(resid, R.standard_normal((40,5)))) - term = dummy + self.terms[2] - c = contrast.Contrast(term, self.formula) - y = term(namespace=self.namespace) - c.getmatrix(namespace=self.namespace) + terms = dummy + self.terms[2] + terms.namespace = self.formula.namespace + c = contrast.Contrast(terms, self.formula) + c.getmatrix() self.assertEquals(c.matrix.shape, (10,)) + def test_power(self): + + t = self.terms[2] + t2 = formula.quantitative('t', func=t)**2 + t.namespace = t2.namespace = self.formula.namespace + assert_almost_equal(t()**2, t2()) + + def test_quantitative(self): + t = self.terms[2] + sint = formula.quantitative('t', func=t, transform=N.sin) + t.namespace = sint.namespace = self.formula.namespace + assert_almost_equal(N.sin(t()), sint()) + + def test_factor1(self): + f = ['a','b','c']*10 + fac = formula.factor('ff', set(f)) + fac.namespace = {'ff':f} + self.assertEquals(list(fac.values()), f) + + def test_factor2(self): + f = ['a','b','c']*10 + fac = formula.factor('ff', set(f)) + fac.namespace = {'ff':f} + self.assertEquals(fac().shape, (3,30)) + + def test_factor3(self): + f = ['a','b','c']*10 + fac = formula.factor('ff', set(f)) + fac.namespace = {'ff':f} + m = fac.main_effect(reference=1) + self.assertEquals(m().shape, (2,30)) + + def test_factor4(self): + f = ['a','b','c']*10 + fac = formula.factor('ff', set(f)) + fac.namespace = {'ff':f} + m = fac.main_effect(reference=2) + r = N.array([N.identity(3)]*10) + r.shape = (30,3) + r = r.T + _m = N.array([r[0]-r[2],r[1]-r[2]]) + assert_almost_equal(_m, m()) + def test_contrast4(self): f = self.formula + self.terms[5] + self.terms[5] @@ -115,7 +180,7 @@ estimable = False c = contrast.Contrast(self.terms[5], f) - c.getmatrix(namespace=self.namespace) + c.getmatrix() self.assertEquals(estimable, False) From scipy-svn at scipy.org Fri Nov 10 10:29:06 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 10 Nov 2006 09:29:06 -0600 (CST) Subject: [Scipy-svn] r2316 - trunk/Lib/io Message-ID: <20061110152906.7B2F439C09E@new.scipy.org> Author: matthew.brett at gmail.com Date: 2006-11-10 09:29:03 -0600 (Fri, 10 Nov 2006) New Revision: 2316 Modified: trunk/Lib/io/mio5.py Log: Fix numpy python 2.5 indexing oddness Modified: trunk/Lib/io/mio5.py =================================================================== --- trunk/Lib/io/mio5.py 2006-11-09 16:00:41 UTC (rev 2315) +++ trunk/Lib/io/mio5.py 2006-11-10 15:29:03 UTC (rev 2316) @@ -391,7 +391,7 @@ self.obj_template = mat_struct() def get_raw_array(self): - namelength = self.read_element() + namelength = self.read_element()[0] # get field names names = self.read_element() splitnames = [names[i:i+namelength] for i in \ From scipy-svn at scipy.org Thu Nov 16 00:41:54 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 15 Nov 2006 23:41:54 -0600 (CST) Subject: [Scipy-svn] r2317 - in trunk/Lib/sandbox/pyem: . profile_data tests Message-ID: <20061116054154.95C5C39C0EC@new.scipy.org> Author: cdavid Date: 2006-11-15 23:41:35 -0600 (Wed, 15 Nov 2006) New Revision: 2317 Added: trunk/Lib/sandbox/pyem/misc.py trunk/Lib/sandbox/pyem/test_reg.py Modified: trunk/Lib/sandbox/pyem/Changelog trunk/Lib/sandbox/pyem/TODO trunk/Lib/sandbox/pyem/_c_densities.py trunk/Lib/sandbox/pyem/densities.py trunk/Lib/sandbox/pyem/gauss_mix.py trunk/Lib/sandbox/pyem/gmm_em.py trunk/Lib/sandbox/pyem/info.py trunk/Lib/sandbox/pyem/profile_data/profile_densities.py trunk/Lib/sandbox/pyem/profile_data/profile_gmm.py trunk/Lib/sandbox/pyem/setup.py trunk/Lib/sandbox/pyem/tests/test_densities.py Log: * bump to 0.5.6 * various cosmetic changes Modified: trunk/Lib/sandbox/pyem/Changelog =================================================================== --- trunk/Lib/sandbox/pyem/Changelog 2006-11-10 15:29:03 UTC (rev 2316) +++ trunk/Lib/sandbox/pyem/Changelog 2006-11-16 05:41:35 UTC (rev 2317) @@ -1,3 +1,11 @@ +pyem (0.5.6) Thu, 16 Nov 2006 14:18:19 +0900 + + * bump to 0.5.6 + * Add __str__ and __repr__ for GM and GMM classes + * Add regularization method (but not used yet). + * Change 'f<8' to N.float64 for ctype enabled densities + * Move 'Magic numbers' into a separated python file, misc.py + pyem (0.5.5) Tue, 24 Oct 2006 18:30:54 +0900 * Fix a bug inmultiple_gaussian_den which prevents Modified: trunk/Lib/sandbox/pyem/TODO =================================================================== --- trunk/Lib/sandbox/pyem/TODO 2006-11-10 15:29:03 UTC (rev 2316) +++ trunk/Lib/sandbox/pyem/TODO 2006-11-16 05:41:35 UTC (rev 2317) @@ -1,12 +1,12 @@ -# Last Change: Fri Oct 20 12:00 PM 2006 J +# Last Change: Thu Nov 09 06:00 PM 2006 J Things which must be implemented for a 1.0 version (in importante order) - - A class for learning + a classifier - - test for various length and model size of gaussian densities/GM and GMM - - a small help/tutorial - - be complient with scipy module dev guidelines (DEVELOPERS.TXT) + - A classifier + - basic regularization Things which would be nice (after 1.0 version): + - Bayes prior (hard, suppose MCMC) + - variational Bayes (hard ? Not sure yet) - Integrate libem (libem should be modified so that it would be easy to package and distribute) - Other initialization schemes Modified: trunk/Lib/sandbox/pyem/_c_densities.py =================================================================== --- trunk/Lib/sandbox/pyem/_c_densities.py 2006-11-10 15:29:03 UTC (rev 2316) +++ trunk/Lib/sandbox/pyem/_c_densities.py 2006-11-16 05:41:35 UTC (rev 2317) @@ -1,7 +1,7 @@ #! /usr/bin/python # # Copyrighted David Cournapeau -# Last Change: Thu Oct 19 06:00 PM 2006 J +# Last Change: Thu Nov 09 05:00 PM 2006 J # This module uses a C implementation through ctypes, for diagonal cases # TODO: @@ -26,12 +26,12 @@ # Requirements for diag gden _gden = load_library('c_gden.so', __file__) -arg1 = ndpointer(dtype=' MAX_DEV: + if N.fabs(N.sum(w, 0) - 1) > _MAX_DBL_DEV: raise GmParamError('weight does not sum to 1') if not len(w.shape) == 1: Modified: trunk/Lib/sandbox/pyem/gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/gmm_em.py 2006-11-10 15:29:03 UTC (rev 2316) +++ trunk/Lib/sandbox/pyem/gmm_em.py 2006-11-16 05:41:35 UTC (rev 2317) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Tue Oct 24 06:00 PM 2006 J +# Last Change: Thu Nov 16 02:00 PM 2006 J # TODO: # - which methods to avoid va shrinking to 0 ? There are several options, @@ -15,6 +15,8 @@ from kmean import kmean from gauss_mix import GM +from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND + # Error classes class GmmError(Exception): """Base class for exceptions in this module.""" @@ -38,12 +40,9 @@ # sense to use inheritance for # interface specification in python, since its # dynamic type systeme. -# Anyway, a mixture class should encapsulates all details concerning a mixture model: -# - internal parameters for the pdfs -# - can compute sufficient statistics for EM -# - can sample a model -# - can generate random valid parameters for a new pdf (using class method) -class MixtureModel: +# Anyway, a mixture model class should encapsulates all details +# concerning getting sufficient statistics (SS), likelihood and bic. +class MixtureModel(object): pass class ExpMixtureModel(MixtureModel): @@ -90,7 +89,7 @@ """ Init the model at random.""" k = self.gm.k d = self.gm.d - if mode == 'diag': + if self.gm.mode == 'diag': w = N.ones(k) / k mu = randn(k, d) va = N.fabs(randn(k, d)) @@ -120,6 +119,7 @@ self.init = init_methods[init] self.isinit = False + self.initst = init def sufficient_statistics(self, data): """ Return normalized and non-normalized sufficient statistics @@ -168,7 +168,10 @@ elif self.gm.mode == 'full': # In full mode, this is the bottleneck: the triple loop # kills performances. This is pretty straightforward - # algebra, so computing it in C should not be too difficult + # algebra, so computing it in C should not be too difficult. The + # real problem is to have valid covariance matrices, and to keep + # them positive definite, maybe with special storage... Not sure + # it really worth the risk mu = N.zeros((k, d)) va = N.zeros((k*d, d)) @@ -239,6 +242,13 @@ n = N.shape(data)[0] return bic(lk, free_deg, n) + # syntactic sugar + def __repr__(self): + repre = "" + repre += "Gaussian Mixture Model\n" + repre += " -> initialized by %s\n" % str(self.initst) + repre += self.gm.__repr__() + return repre class EM: """An EM trainer. An EM trainer @@ -265,6 +275,8 @@ Returns: likelihood (one value per iteration). """ + if not isinstance(model, MixtureModel): + raise TypeError("expect a MixtureModel as a model") # Initialize the data (may do nothing depending on the model) model.init(data) @@ -282,9 +294,62 @@ model.update_em(data, g) if has_em_converged(like[i], like[i-1], thresh): return like[0:i] + # # Em computation, with computation of the likelihood + # g, tgd = model.sufficient_statistics(data) + # like[0] = N.sum(N.log(N.sum(tgd, 1)), axis = 0) + # model.update_em(data, g) + # for i in range(1, maxiter): + # print "=== Iteration %d ===" % i + # isreg = False + # for j in range(model.gm.k): + # va = model.gm.va[j] + # if va.any() < _MIN_INV_COND: + # isreg = True + # print "\tregularization detected" + # print "\t" + str(va) + # model.gm.va[j] = regularize_diag(va) + # print "\t" + str(va) + ", " + str(model.gm.va[j]) + # print "\t" + str(gauss_den(data, model.gm.mu[j], model.gm.va[j])) + # print "\tend regularization detected" + # var = va + # + # g, tgd = model.sufficient_statistics(data) + # try: + # assert not( (N.isnan(tgd)).any() ) + # if isreg: + # print var + # except AssertionError: + # print "tgd is nan..." + # print model.gm.va[13,:] + # print 1/model.gm.va[13,:] + # print densities.gauss_den(data, model.gm.mu[13], model.gm.va[13]) + # print N.isnan((multiple_gauss_den(data, model.gm.mu, model.gm.va))).any() + # print "Exciting" + # import sys + # sys.exit(-1) + # like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0) + # model.update_em(data, g) + # assert not( model.gm.va.any() < 1e-6) + # if has_em_converged(like[i], like[i-1], thresh): + # return like[0:i] return like +def regularize_diag(variance, alpha = _DEF_ALPHA): + delta = N.sum(variance) / variance.size + if delta > _MIN_DBL_DELTA: + return variance + alpha * delta + else: + return variance + alpha * _MIN_DBL_DELTA + +def regularize_full(variance): + # Trace of a positive definite matrix is always > 0 + delta = N.trace(variance) / variance.shape[0] + if delta > _MIN_DBL_DELTA: + return variance + alpha * delta + else: + return variance + alpha * _MIN_DBL_DELTA + # Misc functions def bic(lk, deg, n): """ Expects lk to be log likelihood """ Modified: trunk/Lib/sandbox/pyem/info.py =================================================================== --- trunk/Lib/sandbox/pyem/info.py 2006-11-10 15:29:03 UTC (rev 2316) +++ trunk/Lib/sandbox/pyem/info.py 2006-11-16 05:41:35 UTC (rev 2317) @@ -60,7 +60,7 @@ Copyright: David Cournapeau 2006 License: BSD-style (see LICENSE.txt in main source directory) """ -version = '0.5.5' +version = '0.5.6' depends = ['linalg', 'stats'] ignore = False Added: trunk/Lib/sandbox/pyem/misc.py =================================================================== --- trunk/Lib/sandbox/pyem/misc.py 2006-11-10 15:29:03 UTC (rev 2316) +++ trunk/Lib/sandbox/pyem/misc.py 2006-11-16 05:41:35 UTC (rev 2317) @@ -0,0 +1,21 @@ +# Last Change: Fri Nov 10 10:00 AM 2006 J + +#===================================================================== +# "magic number", that is number used to control regularization and co +# Change them at your risk ! +#===================================================================== + +# max deviation allowed when comparing double (this is actually stupid, +# I should actually use a number of decimals) +_MAX_DBL_DEV = 1e-10 + +# max conditional number allowed +_MAX_COND = 1e8 +_MIN_INV_COND = 1/_MAX_COND + +# Default alpha for regularization +_DEF_ALPHA = 1e-1 + +# Default min delta for regularization +_MIN_DBL_DELTA = 1e-5 + Modified: trunk/Lib/sandbox/pyem/profile_data/profile_densities.py =================================================================== --- trunk/Lib/sandbox/pyem/profile_data/profile_densities.py 2006-11-10 15:29:03 UTC (rev 2316) +++ trunk/Lib/sandbox/pyem/profile_data/profile_densities.py 2006-11-16 05:41:35 UTC (rev 2317) @@ -1,7 +1,7 @@ import numpy as N from numpy.random import randn -from pyem import densities as D -from pyem import _c_densities as DC +from scipy.sandbox.pyem import densities as D +from scipy.sandbox.pyem import _c_densities as DC #import tables def bench(func, mode = 'diag'): Modified: trunk/Lib/sandbox/pyem/profile_data/profile_gmm.py =================================================================== --- trunk/Lib/sandbox/pyem/profile_data/profile_gmm.py 2006-11-10 15:29:03 UTC (rev 2316) +++ trunk/Lib/sandbox/pyem/profile_data/profile_gmm.py 2006-11-16 05:41:35 UTC (rev 2317) @@ -1,16 +1,14 @@ import numpy as N -from pyem import GM, GMM +from scipy.sandbox.pyem import GM, GMM import copy -from pyem._c_densities import gauss_den - def bench1(mode = 'diag'): #=========================================== # GMM of 20 comp, 20 dimension, 1e4 frames #=========================================== d = 15 k = 30 - nframes = 1e4 + nframes = 1e5 niter = 10 mode = 'diag' Modified: trunk/Lib/sandbox/pyem/setup.py =================================================================== --- trunk/Lib/sandbox/pyem/setup.py 2006-11-10 15:29:03 UTC (rev 2316) +++ trunk/Lib/sandbox/pyem/setup.py 2006-11-16 05:41:35 UTC (rev 2317) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Thu Oct 19 07:00 PM 2006 J +# Last Change: Thu Nov 09 06:00 PM 2006 J # TODO: # - check how to handle cmd line build options with distutils and use # it in the building process @@ -16,14 +16,14 @@ DESCRIPTION ='A python module for Expectation Maximization learning of mixtures pdf', AUTHOR ='David Cournapeau', AUTHOR_EMAIL='david at ar.media.kyoto-u.ac.jp', -URL ='http://ar.media.kyoto-u.ac.jp/members/david', +URL ='http://ar.media.kyoto-u.ac.jp/members/david/softwares/pyem', def configuration(parent_package='',top_path=None, package_name='pyem'): from numpy.distutils.misc_util import Configuration config = Configuration(package_name,parent_package,top_path, version = VERSION) config.add_data_dir('tests') - config.add_subpackage('profile_data') + config.add_data_dir('profile_data') config.add_extension('c_gden', #define_macros=[('LIBSVM_EXPORTS', None), # ('LIBSVM_DLL', None)], @@ -34,7 +34,8 @@ if __name__ == "__main__": from numpy.distutils.core import setup #setup(**configuration(top_path='').todict()) - setup(**configuration(top_path='',)) + #setup(**configuration(top_path='')) + setup(configuration=configuration) # from distutils.core import setup, Extension # from pyem import version as pyem_version # Added: trunk/Lib/sandbox/pyem/test_reg.py =================================================================== --- trunk/Lib/sandbox/pyem/test_reg.py 2006-11-10 15:29:03 UTC (rev 2316) +++ trunk/Lib/sandbox/pyem/test_reg.py 2006-11-16 05:41:35 UTC (rev 2317) @@ -0,0 +1,44 @@ +import numpy as N + +from gauss_mix import GM +from gmm_em import GMM, EM + +from numpy.random import seed + +def test_reg(): + seed(0) + # Generate data with a few components + d = 2 + k = 1 + n = 500 + + w, mu, va = GM.gen_param(d, k) + gm = GM.fromvalues(w, mu, va) + + data = gm.sample(n) + + # Try to learn with an insane number of components + gmm = GMM(GM(d, 30), 'random') + + em = EM() + like= em.train(data, gmm, 20, 1e-20) + + # import pylab as P + # P.subplot(2, 1, 1) + # P.plot(data[:, 0], data[:, 1], '.') + # gmm.gm.plot() + # P.subplot(2, 1, 2) + # P.plot(like) + # print like + # P.show() + +if __name__ == "__main__": + # import hotshot, hotshot.stats + # profile_file = 'manyk.prof' + # prof = hotshot.Profile(profile_file, lineevents=1) + # prof.runcall(test_reg) + # p = hotshot.stats.load(profile_file) + # print p.sort_stats('cumulative').print_stats(20) + # prof.close() + test_reg() + Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_densities.py 2006-11-10 15:29:03 UTC (rev 2316) +++ trunk/Lib/sandbox/pyem/tests/test_densities.py 2006-11-16 05:41:35 UTC (rev 2317) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Thu Oct 19 07:00 PM 2006 J +# Last Change: Thu Nov 09 05:00 PM 2006 J # TODO: # - having "fake tests" to check that all mode (scalar, diag and full) are From scipy-svn at scipy.org Thu Nov 16 04:24:17 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 16 Nov 2006 03:24:17 -0600 (CST) Subject: [Scipy-svn] r2318 - in trunk/Lib/sandbox/numexpr: . tests Message-ID: <20061116092417.B4B1039C09E@new.scipy.org> Author: cookedm Date: 2006-11-16 03:24:11 -0600 (Thu, 16 Nov 2006) New Revision: 2318 Modified: trunk/Lib/sandbox/numexpr/compiler.py trunk/Lib/sandbox/numexpr/expressions.py trunk/Lib/sandbox/numexpr/interp_body.c trunk/Lib/sandbox/numexpr/interpreter.c trunk/Lib/sandbox/numexpr/tests/test_numexpr.py Log: [numexpr] whitespace cleanups Modified: trunk/Lib/sandbox/numexpr/compiler.py =================================================================== --- trunk/Lib/sandbox/numexpr/compiler.py 2006-11-16 05:41:35 UTC (rev 2317) +++ trunk/Lib/sandbox/numexpr/compiler.py 2006-11-16 09:24:11 UTC (rev 2318) @@ -1,11 +1,12 @@ - import sys import numpy import interpreter, expressions -typecode_to_kind = {'b': 'bool', 'i': 'int', 'f': 'float', 'c': 'complex', 'n' : 'none'} -kind_to_typecode = {'bool': 'b', 'int': 'i', 'float': 'f', 'complex': 'c', 'none' : 'n'} +typecode_to_kind = {'b': 'bool', 'i': 'int', 'f': 'float', + 'c': 'complex', 'n' : 'none'} +kind_to_typecode = {'bool': 'b', 'int': 'i', 'float': 'f', + 'complex': 'c', 'none' : 'n'} type_to_kind = expressions.type_to_kind kind_to_type = expressions.kind_to_type @@ -477,17 +478,17 @@ collapseDuplicateSubtrees(ast) input_order = getInputOrder(ast, input_order) - constants_order, constants = getConstants(ast) - - if isReduction(ast): - ast.reg.temporary = False - - optimizeTemporariesAllocation(ast) - + constants_order, constants = getConstants(ast) + + if isReduction(ast): + ast.reg.temporary = False + + optimizeTemporariesAllocation(ast) + ast.reg.temporary = False - r_output = 0 - ast.reg.n = 0 - + r_output = 0 + ast.reg.n = 0 + r_inputs = r_output + 1 r_constants = setOrderedRegisterNumbers(input_order, r_inputs) r_temps = setOrderedRegisterNumbers(constants_order, r_constants) @@ -526,12 +527,12 @@ r_constants = 1 + len(nex.signature) r_temps = r_constants + len(nex.constants) def getArg(pc, offset): - arg = ord(nex.program[pc+offset]) - op = rev_opcodes.get(ord(nex.program[pc])) - code = op.split('_')[1][offset-1] - if arg == 255: - return None - if code != 'n': + arg = ord(nex.program[pc+offset]) + op = rev_opcodes.get(ord(nex.program[pc])) + code = op.split('_')[1][offset-1] + if arg == 255: + return None + if code != 'n': if arg == 0: return 'r0' elif arg < r_constants: @@ -539,8 +540,8 @@ elif arg < r_temps: return 'c%d[%s]' % (arg, nex.constants[arg - r_constants]) else: - return 't%d' % (arg,) - else: + return 't%d' % (arg,) + else: return arg source = [] for pc in range(0, len(nex.program), 4): @@ -624,5 +625,3 @@ compiled_ex = _numexpr_cache[numexpr_key] = \ numexpr(ex, signature, copy_args, **kwargs) return compiled_ex(*arguments) - - Modified: trunk/Lib/sandbox/numexpr/expressions.py =================================================================== --- trunk/Lib/sandbox/numexpr/expressions.py 2006-11-16 05:41:35 UTC (rev 2317) +++ trunk/Lib/sandbox/numexpr/expressions.py 2006-11-16 09:24:11 UTC (rev 2318) @@ -32,8 +32,8 @@ for i, x in enumerate(args): if isConstant(x): args[i] = x = ConstantNode(x) - elif not isinstance(x, ExpressionNode): - raise TypeError( "unsupported object type: %s" % type(x) ) + if not isinstance(x, ExpressionNode): + raise TypeError("unsupported object type: %s" % (type(x),)) return f(*args) func.__name__ = f.__name__ func.__doc__ = f.__doc__ @@ -213,7 +213,7 @@ 'where' : where_func, 'complex' : func(complex, 'complex'), - + 'sum' : sum_func, 'prod' : prod_func, } Modified: trunk/Lib/sandbox/numexpr/interp_body.c =================================================================== --- trunk/Lib/sandbox/numexpr/interp_body.c 2006-11-16 05:41:35 UTC (rev 2317) +++ trunk/Lib/sandbox/numexpr/interp_body.c 2006-11-16 09:24:11 UTC (rev 2318) @@ -92,8 +92,8 @@ #define reduce_ptr (dest + flat_index(&store_index, j)) #define i_reduce *(long *)reduce_ptr #define f_reduce *(double *)reduce_ptr - #define cr_reduce *(double *)ptr - #define ci_reduce *((double *)ptr+1) + #define cr_reduce *(double *)ptr + #define ci_reduce *((double *)ptr+1) #define b_dest ((char *)dest)[j] #define i_dest ((long *)dest)[j] #define f_dest ((double *)dest)[j] @@ -114,7 +114,7 @@ #define f3 ((double *)x3)[j*sf3] #define c3r ((double *)x3)[j*sf3] #define c3i ((double *)x3)[j*sf3+1] - + double fa, fb; cdouble ca, cb; char *ptr; @@ -231,14 +231,14 @@ case OP_SUM_CCN: VEC_ARG1(ptr = reduce_ptr; cr_reduce += c1r; ci_reduce += c1i); - + case OP_PROD_IIN: VEC_ARG1(i_reduce *= i1); case OP_PROD_FFN: VEC_ARG1(f_reduce *= f1); case OP_PROD_CCN: VEC_ARG1(ptr = reduce_ptr; fa = cr_reduce*c1r - ci_reduce*c1i; ci_reduce = cr_reduce*c1i + ci_reduce*c1r; cr_reduce = fa); - + default: *pc_error = pc; return -3; Modified: trunk/Lib/sandbox/numexpr/interpreter.c =================================================================== --- trunk/Lib/sandbox/numexpr/interpreter.c 2006-11-16 05:41:35 UTC (rev 2317) +++ trunk/Lib/sandbox/numexpr/interpreter.c 2006-11-16 09:24:11 UTC (rev 2318) @@ -87,14 +87,14 @@ OP_REAL_FC, OP_IMAG_FC, OP_COMPLEX_CFF, - + OP_REDUCTION, - + OP_SUM, OP_SUM_IIN, OP_SUM_FFN, OP_SUM_CCN, - + OP_PROD, OP_PROD_IIN, OP_PROD_FFN, @@ -488,7 +488,7 @@ PyString_AsStringAndSize(program_object, (char **)&program, &n); return program[n-4]; -} +} static int get_reduction_axis(PyObject* program) { @@ -545,10 +545,10 @@ continue; } if ((op >= OP_REDUCTION) && pc != prog_len-4) { - PyErr_Format(PyExc_RuntimeError, + PyErr_Format(PyExc_RuntimeError, "invalid program: reduction operations must occur last"); return -1; - } + } for (argno = 0; ; argno++) { sig = op_signature(op, argno); if (sig == -1) { @@ -839,7 +839,7 @@ struct index_data *index_data; }; -static inline unsigned int +static inline unsigned int flat_index(struct index_data *id, unsigned int j) { int i, k = id->count - 1; unsigned int findex = id->findex; @@ -848,7 +848,7 @@ findex = 0; for (i = 0; i < id->count; i++) findex += id->strides[i] * id->index[i]; - } + } id->index[k] += 1; if (id->index[k] >= id->shape[k]) { while (id->index[k] >= id->shape[k]) { @@ -956,7 +956,7 @@ int i, j, size, r, pc_error; char **inputs = NULL; intp strides[MAX_DIMS]; /* clean up XXX */ - + n_inputs = PyTuple_Size(args); if (PyString_Size(self->signature) != n_inputs) { return PyErr_Format(PyExc_ValueError, @@ -966,7 +966,7 @@ return PyErr_Format(PyExc_ValueError, "keyword arguments are not accepted"); } - + /* This is overkill - we shouldn't need to allocate all of this space, but this makes it easier figure out */ a_inputs = PyTuple_New(3*n_inputs); @@ -974,7 +974,7 @@ inputs = PyMem_New(char *, n_inputs); if (!inputs) goto cleanup_and_exit; - + inddata = PyMem_New(struct index_data, n_inputs+1); if (!inddata) goto cleanup_and_exit; for (i = 0; i < n_inputs+1; i++) @@ -982,7 +982,7 @@ /* First, make sure everything is some sort of array so that we can work with their shapes. Count dimensions concurrently. */ - + for (i = 0; i < n_inputs; i++) { PyObject *o = PyTuple_GET_ITEM(args, i); /* borrowed ref */ PyObject *a; @@ -991,18 +991,18 @@ if (typecode == -1) goto cleanup_and_exit; /* Convert it just in case of a non-swapped array */ a = PyArray_FROM_OTF(o, typecode, NOTSWAPPED); - if (!a) goto cleanup_and_exit; + if (!a) goto cleanup_and_exit; PyTuple_SET_ITEM(a_inputs, i, a); /* steals reference */ if (PyArray_NDIM(a) > n_dimensions) n_dimensions = PyArray_NDIM(a); } - + /* Broadcast all of the inputs to determine the output shape (this will - require some modifications if we later allow a final reduction + require some modifications if we later allow a final reduction operation). If an array has too few dimensions it's shape is padded with ones fromthe left. All array dimensions must match, or be one. */ - - for (i = 0; i < n_dimensions; i++) + + for (i = 0; i < n_dimensions; i++) shape[i] = 1; for (i = 0; i < n_inputs; i++) { PyObject *a = PyTuple_GET_ITEM(a_inputs, i); @@ -1014,17 +1014,17 @@ if (shape[delta+j] == 1) shape[delta+j] = n; else { - PyErr_SetString(PyExc_ValueError, + PyErr_SetString(PyExc_ValueError, "cannot broadcast inputs to common shape"); goto cleanup_and_exit; } } } size = PyArray_MultiplyList(shape, n_dimensions); - + /* Broadcast indices of all of the arrays. We could improve efficiency by keeping track of what needs to be broadcast above */ - + for (i = 0; i < n_inputs; i++) { PyObject *a = PyTuple_GET_ITEM(a_inputs, i); PyObject *b; @@ -1032,12 +1032,12 @@ int delta = n_dimensions - PyArray_NDIM(a); if (PyArray_NDIM(a)) { for (j = 0; j < n_dimensions; j++) - strides[j] = (j < delta || PyArray_DIM(a, j-delta) == 1) ? + strides[j] = (j < delta || PyArray_DIM(a, j-delta) == 1) ? 0 : PyArray_STRIDE(a, j-delta); Py_INCREF(PyArray_DESCR(a)); - b = PyArray_NewFromDescr(a->ob_type, + b = PyArray_NewFromDescr(a->ob_type, PyArray_DESCR(a), - n_dimensions, shape, + n_dimensions, shape, strides, PyArray_DATA(a), 0, a); if (!b) goto cleanup_and_exit; } else { /* Leave scalars alone */ @@ -1045,10 +1045,10 @@ Py_INCREF(b); } /* Store b so that it stays alive till we're done */ - PyTuple_SET_ITEM(a_inputs, i+n_inputs, b); + PyTuple_SET_ITEM(a_inputs, i+n_inputs, b); } - - + + for (i = 0; i < n_inputs; i++) { PyObject *a = PyTuple_GET_ITEM(a_inputs, i+n_inputs); char c = PyString_AS_STRING(self->signature)[i]; @@ -1082,7 +1082,7 @@ } } else { PyObject *origA = a; - int inner_size = -1; + int inner_size = -1; /* Check array is contiguous */ for (j = PyArray_NDIM(a)-1; j >= 0; j--) { if ((inner_size == -1 && PyArray_STRIDE(a, j) % PyArray_ITEMSIZE(a)) || @@ -1109,7 +1109,6 @@ } } - if (last_opcode(self->program) > OP_REDUCTION) { char retsig = get_return_sig(self->program); @@ -1146,7 +1145,7 @@ } } - + } /* TODO optimize strides -- in this and other inddata cases, strides and shape can be tweaked to minimize the amount of looping */ @@ -1159,7 +1158,7 @@ inddata[0].index = PyMem_New(int, n_dimensions); for (j = 0; j < inddata[0].count; j++) inddata[0].index[j] = 0; - + if (last_opcode(self->program) >= OP_SUM && last_opcode(self->program) < OP_PROD) { PyObject *zero = PyInt_FromLong(0); @@ -1171,7 +1170,7 @@ Py_DECREF(one); } } - else { + else { char retsig = get_return_sig(self->program); self->memsteps[0] = size_from_char(retsig); output = PyArray_SimpleNew(n_dimensions, @@ -1179,10 +1178,10 @@ typecode_from_char(retsig)); if (!output) goto cleanup_and_exit; } - + r = run_interpreter(self, size, PyArray_DATA(output), inputs, inddata, &pc_error); - + if (r < 0) { Py_XDECREF(output); output = NULL; @@ -1409,8 +1408,8 @@ #undef add_func if (PyModule_AddObject(m, "funccodes", d) < 0) return; - + if (PyModule_AddObject(m, "allaxes", PyInt_FromLong(255)) < 0) return; if (PyModule_AddObject(m, "maxdims", PyInt_FromLong(MAX_DIMS)) < 0) return; - + } Modified: trunk/Lib/sandbox/numexpr/tests/test_numexpr.py =================================================================== --- trunk/Lib/sandbox/numexpr/tests/test_numexpr.py 2006-11-16 05:41:35 UTC (rev 2317) +++ trunk/Lib/sandbox/numexpr/tests/test_numexpr.py 2006-11-16 09:24:11 UTC (rev 2318) @@ -36,16 +36,16 @@ def check_reductions(self): # Check that they compile OK. assert_equal(disassemble(numexpr("sum(x**2+2, axis=None)", [('x', float)])), - [('mul_fff', 't3', 'r1[x]', 'r1[x]'), - ('add_fff', 't3', 't3', 'c2[2.0]'), + [('mul_fff', 't3', 'r1[x]', 'r1[x]'), + ('add_fff', 't3', 't3', 'c2[2.0]'), ('sum_ffn', 'r0', 't3', None)]) assert_equal(disassemble(numexpr("sum(x**2+2, axis=1)", [('x', float)])), - [('mul_fff', 't3', 'r1[x]', 'r1[x]'), - ('add_fff', 't3', 't3', 'c2[2.0]'), + [('mul_fff', 't3', 'r1[x]', 'r1[x]'), + ('add_fff', 't3', 't3', 'c2[2.0]'), ('sum_ffn', 'r0', 't3', 1)]) assert_equal(disassemble(numexpr("prod(x**2+2, axis=2)", [('x', float)])), - [('mul_fff', 't3', 'r1[x]', 'r1[x]'), - ('add_fff', 't3', 't3', 'c2[2.0]'), + [('mul_fff', 't3', 'r1[x]', 'r1[x]'), + ('add_fff', 't3', 't3', 'c2[2.0]'), ('prod_ffn', 'r0', 't3', 2)]) # Check that full reductions work. x = arange(10.0) @@ -71,7 +71,7 @@ x = (arange(10) % 2).astype(bool) assert_equal(evaluate("prod(x,axis=0)"), prod(x,axis=0)) assert_equal(evaluate("sum(x,axis=0)"), sum(x,axis=0)) - + def check_axis(self): y = arange(9.0).reshape(3,3) try: @@ -86,13 +86,13 @@ pass else: raise ValueError("should raise exception!") - - - + + + def check_r0_reuse(self): assert_equal(disassemble(numexpr("x**2+2", [('x', float)])), - [('mul_fff', 'r0', 'r1[x]', 'r1[x]'), + [('mul_fff', 'r0', 'r1[x]', 'r1[x]'), ('add_fff', 'r0', 'r0', 'c2[2.0]')]) class test_evaluate(NumpyTestCase): @@ -133,8 +133,8 @@ x = sin(complex(a, b)).real + z.imag y = evaluate("sin(complex(a, b)).real + z.imag") assert_array_almost_equal(x, y) - - + + def check_complex_strides(self): a = arange(100).reshape(10,10)[::2] b = arange(50).reshape(5,10) @@ -144,10 +144,10 @@ c['c2'].fill(0xaaaa) c1 = c['c1'] a0 = a[0] - assert_array_equal(evaluate("c1"), c1) + assert_array_equal(evaluate("c1"), c1) assert_array_equal(evaluate("a0+c1"), a0+c1) - - + + def check_broadcasting(self): a = arange(100).reshape(10,10)[::2] c = arange(10) @@ -156,20 +156,20 @@ assert_array_equal(evaluate("a+d"), a+d) expr = numexpr("2.0*a+3.0*c",[('a',float),('c', float)]) assert_array_equal(expr(a,c), 2.0*a+3.0*c) - + def check_all_scalar(self): a = 3. b = 4. assert_equal(evaluate("a+b"), a+b) expr = numexpr("2*a+3*b",[('a',float),('b', float)]) assert_equal(expr(a,b), 2*a+3*b) - + def check_run(self): a = arange(100).reshape(10,10)[::2] b = arange(10) expr = numexpr("2*a+3*b",[('a',float),('b', float)]) assert_array_equal(expr(a,b), expr.run(a,b)) - + def check_illegal_value(self): a = arange(3) try: @@ -178,8 +178,8 @@ pass else: self.fail() - + tests = [ ('MISC', ['b*c+d*e', '2*a+3*b', From scipy-svn at scipy.org Thu Nov 16 04:25:06 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 16 Nov 2006 03:25:06 -0600 (CST) Subject: [Scipy-svn] r2319 - trunk/Lib/sandbox/numexpr Message-ID: <20061116092506.4CFF539C09E@new.scipy.org> Author: cookedm Date: 2006-11-16 03:25:03 -0600 (Thu, 16 Nov 2006) New Revision: 2319 Modified: trunk/Lib/sandbox/numexpr/README trunk/Lib/sandbox/numexpr/complex_functions.inc Log: [numexpr] set svn:eol-style to native Property changes on: trunk/Lib/sandbox/numexpr/README ___________________________________________________________________ Name: svn:eol-style + native Property changes on: trunk/Lib/sandbox/numexpr/complex_functions.inc ___________________________________________________________________ Name: svn:eol-style + native From scipy-svn at scipy.org Thu Nov 16 07:27:09 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 16 Nov 2006 06:27:09 -0600 (CST) Subject: [Scipy-svn] r2320 - in trunk/Lib/sandbox/pyem: . tests Message-ID: <20061116122709.1AD4A39C01A@new.scipy.org> Author: cdavid Date: 2006-11-16 06:26:52 -0600 (Thu, 16 Nov 2006) New Revision: 2320 Added: trunk/Lib/sandbox/pyem/basic_example1.py trunk/Lib/sandbox/pyem/basic_example2.py trunk/Lib/sandbox/pyem/basic_example3.py trunk/Lib/sandbox/pyem/demo1.py trunk/Lib/sandbox/pyem/demo2.py trunk/Lib/sandbox/pyem/examples.py trunk/Lib/sandbox/pyem/tests/test_examples.py Removed: trunk/Lib/sandbox/pyem/example.py trunk/Lib/sandbox/pyem/example2.py Modified: trunk/Lib/sandbox/pyem/Changelog trunk/Lib/sandbox/pyem/__init__.py trunk/Lib/sandbox/pyem/gauss_mix.py trunk/Lib/sandbox/pyem/gmm_em.py trunk/Lib/sandbox/pyem/tests/test_online_em.py Log: Various bug fixes (see Changelog) Modified: trunk/Lib/sandbox/pyem/Changelog =================================================================== --- trunk/Lib/sandbox/pyem/Changelog 2006-11-16 09:25:03 UTC (rev 2319) +++ trunk/Lib/sandbox/pyem/Changelog 2006-11-16 12:26:52 UTC (rev 2320) @@ -1,3 +1,11 @@ +pyem (0.5.6) Thu, 16 Nov 2006 21:02:02 +0900 + + * correct examples + * correct exceptions msg strings in gauss_mix, which + were buggy + * add examples from website to the package, so that above errors + do not appear again + pyem (0.5.6) Thu, 16 Nov 2006 14:18:19 +0900 * bump to 0.5.6 Modified: trunk/Lib/sandbox/pyem/__init__.py =================================================================== --- trunk/Lib/sandbox/pyem/__init__.py 2006-11-16 09:25:03 UTC (rev 2319) +++ trunk/Lib/sandbox/pyem/__init__.py 2006-11-16 12:26:52 UTC (rev 2320) @@ -1,11 +1,12 @@ #! /usr/bin/env python -# Last Change: Fri Oct 20 11:00 AM 2006 J +# Last Change: Thu Nov 16 09:00 PM 2006 J from info import __doc__ from gauss_mix import GmParamError, GM from gmm_em import GmmParamError, GMM, EM from online_em import OnGMM as _OnGMM +import examples as _examples __all__ = filter(lambda s:not s.startswith('_'),dir()) Added: trunk/Lib/sandbox/pyem/basic_example1.py =================================================================== --- trunk/Lib/sandbox/pyem/basic_example1.py 2006-11-16 09:25:03 UTC (rev 2319) +++ trunk/Lib/sandbox/pyem/basic_example1.py 2006-11-16 12:26:52 UTC (rev 2320) @@ -0,0 +1,48 @@ +import numpy as N +import pylab as P +from scipy.sandbox.pyem import GM + +#------------------------------ +# Hyper parameters: +# - K: number of clusters +# - d: dimension +k = 3 +d = 2 + +#------------------------------------------------------- +# Values for weights, mean and (diagonal) variances +# - the weights are an array of rank 1 +# - mean is expected to be rank 2 with one row for one component +# - variances are also expteced to be rank 2. For diagonal, one row +# is one diagonal, for full, the first d rows are the first variance, +# etc... In this case, the variance matrix should be k*d rows and d +# colums +w = N.array([0.2, 0.45, 0.35]) +mu = N.array([[4.1, 3], [1, 5], [-2, -3]]) +va = N.array([[1, 1.5], [3, 4], [2, 3.5]]) + +#----------------------------------------- +# First method: directly from parameters: +# Both methods are equivalents. +gm = GM.fromvalues(w, mu, va) + +#------------------------------------- +# Second method to build a GM instance: +gm = GM(d, k, mode = 'diag') +# The set_params checks that w, mu, and va corresponds to k, d and m +gm.set_param(w, mu, va) + +# Once set_params is called, both methods are equivalent. The 2d +# method is useful when using a GM object for learning (where +# the learner class will set the params), whereas the first one +# is useful when there is a need to quickly sample a model +# from existing values, without a need to give the hyper parameters + +# Create a Gaussian Mixture from the parameters, and sample +# 1000 items from it (one row = one 2 dimension sample) +data = gm.sample(1000) + +# Plot the samples +P.plot(data[:, 0], data[:, 1], '.') +# Plot the ellipsoids of confidence with a level a 75 % +gm.plot(level = 0.75) Added: trunk/Lib/sandbox/pyem/basic_example2.py =================================================================== --- trunk/Lib/sandbox/pyem/basic_example2.py 2006-11-16 09:25:03 UTC (rev 2319) +++ trunk/Lib/sandbox/pyem/basic_example2.py 2006-11-16 12:26:52 UTC (rev 2320) @@ -0,0 +1,45 @@ +from numpy.random import seed + +from scipy.sandbox.pyem import GM, GMM, EM +import copy + +# To reproduce results, fix the random seed +seed(1) + +#+++++++++++++++++++++++++++++ +# Meta parameters of the model +# - k: Number of components +# - d: dimension of each Gaussian +# - mode: Mode of covariance matrix: full or diag (string) +# - nframes: number of frames (frame = one data point = one +# row of d elements) +k = 2 +d = 2 +mode = 'diag' +nframes = 1e3 + +#+++++++++++++++++++++++++++++++++++++++++++ +# Create an artificial GM model, samples it +#+++++++++++++++++++++++++++++++++++++++++++ +w, mu, va = GM.gen_param(d, k, mode, spread = 1.5) +gm = GM.fromvalues(w, mu, va) + +# Sample nframes frames from the model +data = gm.sample(nframes) + +#++++++++++++++++++++++++ +# Learn the model with EM +#++++++++++++++++++++++++ + +# Create a Model from a Gaussian mixture with kmean initialization +lgm = GM(d, k, mode) +gmm = GMM(lgm, 'kmean') + +# The actual EM, with likelihood computation. The threshold +# is compared to the (linearly appromixated) derivative of the likelihood +em = EM() +like = em.train(data, gmm, maxiter = 30, thresh = 1e-8) + +# The computed parameters are in gmm.gm, which is the same than lgm +# (remember, python does not copy most objects by default). You can for example +# plot lgm against gm to compare Added: trunk/Lib/sandbox/pyem/basic_example3.py =================================================================== --- trunk/Lib/sandbox/pyem/basic_example3.py 2006-11-16 09:25:03 UTC (rev 2319) +++ trunk/Lib/sandbox/pyem/basic_example3.py 2006-11-16 12:26:52 UTC (rev 2320) @@ -0,0 +1,64 @@ +import numpy as N +from numpy.random import seed + +from scipy.sandbox.pyem import GM, GMM, EM +import copy + +seed(2) + +k = 4 +d = 2 +mode = 'diag' +nframes = 1e3 + +#+++++++++++++++++++++++++++++++++++++++++++ +# Create an artificial GMM model, samples it +#+++++++++++++++++++++++++++++++++++++++++++ +w, mu, va = GM.gen_param(d, k, mode, spread = 1.0) +gm = GM.fromvalues(w, mu, va) + +# Sample nframes frames from the model +data = gm.sample(nframes) + +#++++++++++++++++++++++++ +# Learn the model with EM +#++++++++++++++++++++++++ + +# List of learned mixtures lgm[i] is a mixture with i+1 components +lgm = [] +kmax = 6 +bics = N.zeros(kmax) +em = EM() +for i in range(kmax): + lgm.append(GM(d, i+1, mode)) + + gmm = GMM(lgm[i], 'kmean') + em.train(data, gmm, maxiter = 30, thresh = 1e-10) + bics[i] = gmm.bic(data) + +print "Original model has %d clusters, bics says %d" % (k, N.argmax(bics)+1) + +#+++++++++++++++ +# Draw the model +#+++++++++++++++ +import pylab as P +P.subplot(3, 2, 1) + +for k in range(kmax): + P.subplot(3, 2, k+1) + level = 0.9 + P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_') + + # h keeps the handles of the plot, so that you can modify + # its parameters like label or color + h = lgm[k].plot(level = level) + [i.set_color('r') for i in h] + h[0].set_label('EM confidence ellipsoides') + + h = gm.plot(level = level) + [i.set_color('g') for i in h] + h[0].set_label('Real confidence ellipsoides') + +P.legend(loc = 0) +# depending on your configuration, you may have to call P.show() +# to actually display the figure Copied: trunk/Lib/sandbox/pyem/demo1.py (from rev 2316, trunk/Lib/sandbox/pyem/example.py) Copied: trunk/Lib/sandbox/pyem/demo2.py (from rev 2316, trunk/Lib/sandbox/pyem/example2.py) Deleted: trunk/Lib/sandbox/pyem/example.py =================================================================== --- trunk/Lib/sandbox/pyem/example.py 2006-11-16 09:25:03 UTC (rev 2319) +++ trunk/Lib/sandbox/pyem/example.py 2006-11-16 12:26:52 UTC (rev 2320) @@ -1,109 +0,0 @@ -#! /usr/bin/env python - -# Example of use of pyem toolbox. Feel free to change parameters -# such as dimension, number of components, mode of covariance. -# -# You can also try less trivial things such as adding outliers, sampling -# a mixture with full covariance and estimating it with a mixture with diagonal -# gaussians (replace the mode of the learned model lgm) -# -# Later, I hope to add functions for number of component estimation using eg BIC - -import numpy as N -from numpy.random import seed - -from scipy.sandbox.pyem import GM, GMM, EM -import copy - -seed(1) -#+++++++++++++++++++++++++++++ -# Meta parameters of the model -# - k: Number of components -# - d: dimension of each Gaussian -# - mode: Mode of covariance matrix: full or diag (string) -# - nframes: number of frames (frame = one data point = one -# row of d elements) -k = 2 -d = 2 -mode = 'diag' -nframes = 1e3 - -#+++++++++++++++++++++++++++++++++++++++++++ -# Create an artificial GM model, samples it -#+++++++++++++++++++++++++++++++++++++++++++ -w, mu, va = GM.gen_param(d, k, mode, spread = 1.5) -gm = GM.fromvalues(w, mu, va) - -# Sample nframes frames from the model -data = gm.sample(nframes) - -#++++++++++++++++++++++++ -# Learn the model with EM -#++++++++++++++++++++++++ - -# Init the model -lgm = GM(d, k, mode) -gmm = GMM(lgm, 'kmean') -gmm.init(data) - -# Keep a copy for drawing later -gm0 = copy.copy(lgm) - -# The actual EM, with likelihood computation. The threshold -# is compared to the (linearly appromixated) derivative of the likelihood -em = EM() -like = em.train(data, gmm, maxiter = 30, thresh = 1e-8) - -#+++++++++++++++ -# Draw the model -#+++++++++++++++ -import pylab as P -P.subplot(2, 1, 1) - -# Level is the confidence level for confidence ellipsoids: 1.0 means that -# all points will be (almost surely) inside the ellipsoid -level = 0.8 -if not d == 1: - P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_') - - # h keeps the handles of the plot, so that you can modify - # its parameters like label or color - h = gm.plot(level = level) - [i.set_color('g') for i in h] - h[0].set_label('true confidence ellipsoides') - - # Initial confidence ellipses as found by kmean - h = gm0.plot(level = level) - [i.set_color('k') for i in h] - h[0].set_label('kmean confidence ellipsoides') - - # Values found by EM - h = lgm.plot(level = level) - [i.set_color('r') for i in h] - h[0].set_label('EM confidence ellipsoides') - - P.legend(loc = 0) -else: - # The 1d plotting function is quite elaborate: the confidence - # interval are represented by filled areas, the pdf of the mixture and - # the pdf of each component is drawn (optional) - h = gm.plot1d(level = level) - [i.set_color('g') for i in h['pdf']] - h['pdf'][0].set_label('true pdf') - - h0 = gm0.plot1d(level = level) - [i.set_color('k') for i in h0['pdf']] - h0['pdf'][0].set_label('initial pdf') - - hl = lgm.plot1d(fill = 1, level = level) - [i.set_color('r') for i in hl['pdf']] - hl['pdf'][0].set_label('pdf found by EM') - - P.legend(loc = 0) - -P.subplot(2, 1, 2) -P.plot(like) -P.title('log likelihood') - -P.show() -# P.save('2d diag.png') Deleted: trunk/Lib/sandbox/pyem/example2.py =================================================================== --- trunk/Lib/sandbox/pyem/example2.py 2006-11-16 09:25:03 UTC (rev 2319) +++ trunk/Lib/sandbox/pyem/example2.py 2006-11-16 12:26:52 UTC (rev 2320) @@ -1,104 +0,0 @@ -#! /usr/bin/env python - -# Example of use of pyem toolbox. Feel free to change parameters -# such as dimension, number of components, mode of covariance. -# -# You can also try less trivial things such as adding outliers, sampling -# a mixture with full covariance and estimating it with a mixture with diagonal -# gaussians (replace the mode of the learned model lgm) -# -# Later, I hope to add functions for number of component estimation using eg BIC - -import numpy as N -from numpy.random import seed - -from scipy.sandbox.pyem import GM, GMM, EM -import copy - -seed(2) -#+++++++++++++++++++++++++++++ -# Meta parameters of the model -# - k: Number of components -# - d: dimension of each Gaussian -# - mode: Mode of covariance matrix: full or diag (string) -# - nframes: number of frames (frame = one data point = one -# row of d elements) -k = 4 -d = 2 -mode = 'diag' -nframes = 1e3 - -#+++++++++++++++++++++++++++++++++++++++++++ -# Create an artificial GMM model, samples it -#+++++++++++++++++++++++++++++++++++++++++++ -w, mu, va = GM.gen_param(d, k, mode, spread = 1.0) -gm = GM.fromvalues(w, mu, va) - -# Sample nframes frames from the model -data = gm.sample(nframes) - -#++++++++++++++++++++++++ -# Learn the model with EM -#++++++++++++++++++++++++ - -lgm = [] -kmax = 6 -bics = N.zeros(kmax) -for i in range(kmax): - # Init the model with an empty Gaussian Mixture, and create a Gaussian - # Mixture Model from it - lgm.append(GM(d, i+1, mode)) - gmm = GMM(lgm[i], 'kmean') - - # The actual EM, with likelihood computation. The threshold - # is compared to the (linearly appromixated) derivative of the likelihood - em = EM() - em.train(data, gmm, maxiter = 30, thresh = 1e-10) - bics[i] = gmm.bic(data) - -print "Original model has %d clusters, bics says %d" % (k, N.argmax(bics)+1) - -#+++++++++++++++ -# Draw the model -#+++++++++++++++ -import pylab as P -P.subplot(3, 2, 1) - -for k in range(kmax): - P.subplot(3, 2, k+1) - # Level is the confidence level for confidence ellipsoids: 1.0 means that - # all points will be (almost surely) inside the ellipsoid - level = 0.8 - if not d == 1: - P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_') - - # h keeps the handles of the plot, so that you can modify - # its parameters like label or color - h = lgm[k].plot(level = level) - [i.set_color('r') for i in h] - h[0].set_label('EM confidence ellipsoides') - - h = gm.plot(level = level) - [i.set_color('g') for i in h] - h[0].set_label('Real confidence ellipsoides') - else: - # The 1d plotting function is quite elaborate: the confidence - # interval are represented by filled areas, the pdf of the mixture and - # the pdf of each component is drawn (optional) - h = gm.plot1d(level = level) - [i.set_color('g') for i in h['pdf']] - h['pdf'][0].set_label('true pdf') - - h0 = gm0.plot1d(level = level) - [i.set_color('k') for i in h0['pdf']] - h0['pdf'][0].set_label('initial pdf') - - hl = lgm.plot1d(fill = 1, level = level) - [i.set_color('r') for i in hl['pdf']] - hl['pdf'][0].set_label('pdf found by EM') - - P.legend(loc = 0) - -P.legend(loc = 0) -P.show() -# P.save('2d diag.png') Added: trunk/Lib/sandbox/pyem/examples.py =================================================================== --- trunk/Lib/sandbox/pyem/examples.py 2006-11-16 09:25:03 UTC (rev 2319) +++ trunk/Lib/sandbox/pyem/examples.py 2006-11-16 12:26:52 UTC (rev 2320) @@ -0,0 +1,14 @@ +def ex1(): + import basic_example1 + +def ex2(): + import basic_example2 + +def ex3(): + import basic_example3 + +if __name__ == '__main__': + ex1() + ex2() + ex3() + Modified: trunk/Lib/sandbox/pyem/gauss_mix.py =================================================================== --- trunk/Lib/sandbox/pyem/gauss_mix.py 2006-11-16 09:25:03 UTC (rev 2319) +++ trunk/Lib/sandbox/pyem/gauss_mix.py 2006-11-16 12:26:52 UTC (rev 2320) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Thu Nov 09 06:00 PM 2006 J +# Last Change: Thu Nov 16 08:00 PM 2006 J # Module to implement GaussianMixture class. @@ -82,10 +82,10 @@ k, d, mode = check_gmm_param(weights, mu, sigma) if not k == self.k: raise GmParamError("Number of given components is %d, expected %d" - % (shape(k), shape(self.k))) + % (k, self.k)) if not d == self.d: raise GmParamError("Dimension of the given model is %d, expected %d" - % (shape(d), shape(self.d))) + % (d, self.d)) if not mode == self.mode and not d == 1: raise GmParamError("Given covariance mode is %s, expected %s" % (mode, self.mode)) Modified: trunk/Lib/sandbox/pyem/gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/gmm_em.py 2006-11-16 09:25:03 UTC (rev 2319) +++ trunk/Lib/sandbox/pyem/gmm_em.py 2006-11-16 12:26:52 UTC (rev 2320) @@ -374,7 +374,7 @@ K = mu.shape[0] n = data.shape[0] - d = data.shape[1] + d = mu.shape[1] y = N.zeros((K, n)) if mu.size == va.size: Added: trunk/Lib/sandbox/pyem/tests/test_examples.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_examples.py 2006-11-16 09:25:03 UTC (rev 2319) +++ trunk/Lib/sandbox/pyem/tests/test_examples.py 2006-11-16 12:26:52 UTC (rev 2320) @@ -0,0 +1,26 @@ +#! /usr/bin/env python +# Last Change: Thu Nov 16 09:00 PM 2006 J + +from numpy.testing import * + +set_package_path() +from pyem.examples import ex1, ex2, ex3 +restore_path() + +# #Optional: +# set_local_path() +# # import modules that are located in the same directory as this file. +# restore_path() + +class test_examples(NumpyTestCase): + def check_ex1(self, level = 5): + ex1() + + def check_ex2(self, level = 5): + ex2() + + def check_ex3(self, level = 5): + ex3() + +if __name__ == "__main__": + NumpyTest().run() Modified: trunk/Lib/sandbox/pyem/tests/test_online_em.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_online_em.py 2006-11-16 09:25:03 UTC (rev 2319) +++ trunk/Lib/sandbox/pyem/tests/test_online_em.py 2006-11-16 12:26:52 UTC (rev 2320) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Mon Oct 23 07:00 PM 2006 J +# Last Change: Thu Nov 16 09:00 PM 2006 J import copy @@ -61,7 +61,7 @@ self._create_model(d, k, mode, nframes, emiter) self._check(d, k, mode, nframes, emiter) - def check_2d(self, level = 2): + def check_2d(self, level = 1): d = 2 k = 2 mode = 'diag' @@ -72,7 +72,7 @@ self._create_model(d, k, mode, nframes, emiter) self._check(d, k, mode, nframes, emiter) - def check_5d(self, level = 2): + def check_5d(self, level = 5): d = 5 k = 2 mode = 'diag' From scipy-svn at scipy.org Thu Nov 16 15:19:55 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 16 Nov 2006 14:19:55 -0600 (CST) Subject: [Scipy-svn] r2321 - trunk/Lib/sandbox/montecarlo Message-ID: <20061116201955.D840839C055@new.scipy.org> Author: edschofield Date: 2006-11-16 14:19:52 -0600 (Thu, 16 Nov 2006) New Revision: 2321 Modified: trunk/Lib/sandbox/montecarlo/setup.py Log: Modified montecarlo/setup.py to compile in randomkit.c statically. Modified: trunk/Lib/sandbox/montecarlo/setup.py =================================================================== --- trunk/Lib/sandbox/montecarlo/setup.py 2006-11-16 12:26:52 UTC (rev 2320) +++ trunk/Lib/sandbox/montecarlo/setup.py 2006-11-16 20:19:52 UTC (rev 2321) @@ -7,18 +7,12 @@ config = Configuration('montecarlo', parent_package, top_path) - # This code requires 'randomkit' to have been built using 'add_extension' in - # numpy/random/setup.py. + # This code requires 'randomkit.c' and 'randomkit.h' to have been copied + # to (or symlinked to) montecarlo/src/. - random_lib_dir = dirname(numpy.random.__file__) - config.add_extension('_intsampler', - include_dirs = [numpy.get_numpy_include(), random_lib_dir], - libraries=['randomkit'], - library_dirs=[random_lib_dir], - runtime_library_dirs=[random_lib_dir], sources = [join('src', f) for f in - ['_intsamplermodule.c', 'compact5table.c']]) + ['_intsamplermodule.c', 'compact5table.c', 'randomkit.c']]) config.add_data_dir('tests') config.add_data_dir('examples') From scipy-svn at scipy.org Thu Nov 16 23:52:52 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 16 Nov 2006 22:52:52 -0600 (CST) Subject: [Scipy-svn] r2322 - trunk/Lib/sandbox/numexpr Message-ID: <20061117045252.992C639C090@new.scipy.org> Author: cookedm Date: 2006-11-16 22:52:49 -0600 (Thu, 16 Nov 2006) New Revision: 2322 Modified: trunk/Lib/sandbox/numexpr/compiler.py trunk/Lib/sandbox/numexpr/interpreter.c Log: [numexpr] more whitespace, and fix compiler warnings with Python 2.5 (use Py_ssize_t) Modified: trunk/Lib/sandbox/numexpr/compiler.py =================================================================== --- trunk/Lib/sandbox/numexpr/compiler.py 2006-11-16 20:19:52 UTC (rev 2321) +++ trunk/Lib/sandbox/numexpr/compiler.py 2006-11-17 04:52:49 UTC (rev 2322) @@ -114,7 +114,7 @@ basesig = ''.join(x.typecode() for x in list(ast.children)) # Find some operation that will work on an acceptable casting of args. for sig in sigPerms(basesig): - value = ast.value + '_' + retsig + sig + value = ast.value + '_' + retsig + sig if value in interpreter.opcodes: break else: @@ -215,9 +215,9 @@ c = compile(s, '', 'eval') # make VariableNode's for the names names = {} - for name in c.co_names: - if name == "None": - names[name] = None + for name in c.co_names: + if name == "None": + names[name] = None else: t = types.get(name, float) names[name] = expr.VariableNode(name, type_to_kind[t]) @@ -228,10 +228,10 @@ ex = expr.ConstantNode(ex, expressions.getKind(ex)) return ex - -def isReduction(ast): - return ast.value.startswith('sum_') or ast.value.startswith('prod_') +def isReduction(ast): + return ast.value.startswith('sum_') or ast.value.startswith('prod_') + def getInputOrder(ast, input_order=None): """Derive the input order of the variables in an expression. """ @@ -315,12 +315,12 @@ def optimizeTemporariesAllocation(ast): """Attempt to minimize the number of temporaries needed, by reusing old ones. - """ + """ nodes = list(x for x in ast.postorderWalk() if x.reg.temporary) - users_of = dict((n.reg, set()) for n in nodes) - if nodes and nodes[-1] is not ast: - for c in ast.children: - if c.reg.temporary: + users_of = dict((n.reg, set()) for n in nodes) + if nodes and nodes[-1] is not ast: + for c in ast.children: + if c.reg.temporary: users_of[c.reg].add(ast) for n in reversed(nodes): for c in n.children: Modified: trunk/Lib/sandbox/numexpr/interpreter.c =================================================================== --- trunk/Lib/sandbox/numexpr/interpreter.c 2006-11-16 20:19:52 UTC (rev 2321) +++ trunk/Lib/sandbox/numexpr/interpreter.c 2006-11-17 04:52:49 UTC (rev 2322) @@ -483,7 +483,7 @@ static int last_opcode(PyObject *program_object) { - int n; + Py_ssize_t n; unsigned char *program; PyString_AsStringAndSize(program_object, (char **)&program, &n); return program[n-4]; @@ -506,7 +506,8 @@ check_program(NumExprObject *self) { unsigned char *program; - int prog_len, rno, pc, arg, argloc, argno, n_buffers, n_inputs; + Py_ssize_t prog_len, n_buffers, n_inputs; + int rno, pc, arg, argloc, argno; char sig, *fullsig, *signature; if (PyString_AsStringAndSize(self->program, (char **)&program, @@ -915,14 +916,16 @@ struct index_data *index_data, int *pc_error) { int r; + Py_ssize_t plen; unsigned int blen1, blen2; struct vm_params params; *pc_error = -1; if (PyString_AsStringAndSize(self->program, (char **)&(params.program), - &(params.prog_len)) < 0) { + &plen) < 0) { return -1; } + params.prog_len = plen; if ((params.n_inputs = PyObject_Length(self->signature)) == -1) return -1; params.output = output; @@ -952,7 +955,8 @@ { PyObject *output = NULL, *a_inputs = NULL; struct index_data *inddata = NULL; - unsigned int n_inputs, n_dimensions = 0, shape[MAX_DIMS]; + unsigned int n_inputs, n_dimensions = 0; + int shape[MAX_DIMS]; int i, j, size, r, pc_error; char **inputs = NULL; intp strides[MAX_DIMS]; /* clean up XXX */ @@ -1134,7 +1138,8 @@ j += 1; } } - output = PyArray_SimpleNew(n_dimensions-1, dims, typecode_from_char(retsig)); + output = PyArray_SimpleNew(n_dimensions-1, dims, + typecode_from_char(retsig)); if (!output) goto cleanup_and_exit; for (i = j = 0; i < n_dimensions; i++) { if (i != axis) { @@ -1162,11 +1167,11 @@ if (last_opcode(self->program) >= OP_SUM && last_opcode(self->program) < OP_PROD) { PyObject *zero = PyInt_FromLong(0); - PyArray_FillWithScalar(output, zero); + PyArray_FillWithScalar((PyArrayObject *)output, zero); Py_DECREF(zero); } else { PyObject *one = PyInt_FromLong(1); - PyArray_FillWithScalar(output, one); + PyArray_FillWithScalar((PyArrayObject *)output, one); Py_DECREF(one); } } From scipy-svn at scipy.org Mon Nov 20 15:29:04 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 20 Nov 2006 14:29:04 -0600 (CST) Subject: [Scipy-svn] r2324 - in trunk/Lib/sandbox/arpack: . tests Message-ID: <20061120202904.268FD39C051@new.scipy.org> Author: nmarais Date: 2006-11-20 14:27:36 -0600 (Mon, 20 Nov 2006) New Revision: 2324 Modified: trunk/Lib/sandbox/arpack/speigs.py trunk/Lib/sandbox/arpack/tests/test_speigs.py Log: Refactor lower-level ARPACK drivers and improve docstrings Modified: trunk/Lib/sandbox/arpack/speigs.py =================================================================== --- trunk/Lib/sandbox/arpack/speigs.py 2006-11-18 02:09:53 UTC (rev 2323) +++ trunk/Lib/sandbox/arpack/speigs.py 2006-11-20 20:27:36 UTC (rev 2324) @@ -1,49 +1,56 @@ import numpy as N import _arpack +import warnings -def eigvals(matvec, n, nev, ncv=None): - """ - Calculate eigenvalues for system with matrix-vector product matvec, dimension n +__all___=['ArpackException','ARPACK_eigs', 'ARPACK_gen_eigs'] - Arguments - ========= - matvec -- Function that provides matrix-vector product, i.e. matvec(x) -> A*x - n -- Matrix dimension of the problem - nev -- Number of eigenvalues to calculate - ncv -- Number of Arnoldi basisvectors to use. If None, default to 2*nev - - Return Values - ============= - Real array of nev eigenvalues, or complex array if values are complex - """ - +class ArpackException(RuntimeError): + ARPACKErrors = { 0: """Normal exit.""", + 3: """No shifts could be applied during a cycle of the + Implicitly restarted Arnoldi iteration. One possibility + is to increase the size of NCV relative to NEV.""", + -1: """N must be positive.""", + -2: """NEV must be positive.""", + -3: """NCV-NEV >= 2 and less than or equal to N.""", + -4: """The maximum number of Arnoldi update iteration + must be greater than zero.""", + -5: """WHICH must be one of 'LM', 'SM', 'LR', 'SR', 'LI', 'SI'""", + -6: """BMAT must be one of 'I' or 'G'.""", + -7: """Length of private work array is not sufficient.""", + -8: """Error return from LAPACK eigenvalue calculation;""", + -9: """Starting vector is zero.""", + -10: """IPARAM(7) must be 1,2,3,4.""", + -11: """IPARAM(7) = 1 and BMAT = 'G' are incompatable.""", + -12: """IPARAM(1) must be equal to 0 or 1.""", + -9999: """Could not build an Arnoldi factorization. + IPARAM(5) returns the size of the current Arnoldi + factorization.""", + } + def __init__(self, info): + self.info = info + def __str__(self): + try: return self.ARPACKErrors[self.info] + except KeyError: return "Unknown ARPACK error" + +def check_init(n, nev, ncv): assert(nev <= n-4) # ARPACK seems to cause a segfault otherwise if ncv is None: - ncv = min(2*nev, n-1) - iparam = N.zeros(11, N.int32) # Array with assorted extra paramters for F77 call - ishfts = 1 # Some random arpack parameter - maxitr = n*3 # Maximum number of iterations - # Some random arpack parameter (I think it tells ARPACK to solve the - # regular eigenproblem the "standard" way - mode1 = 1 - iparam[0] = ishfts - iparam[2] = maxitr - iparam[6] = mode1 + ncv = min(2*nev+1, n-1) + maxitr = max(n, 1000) # Maximum number of iterations + return ncv, maxitr + +def init_workspaces(n,nev,ncv): ipntr = N.zeros(14, N.int32) # Pointers into memory structure used by F77 calls d = N.zeros((ncv, 3), N.float64, order='FORTRAN') # Temp workspace # Temp workspace/error residuals upon iteration completion resid = N.zeros(n, N.float64) - workd = N.zeros(3*n, N.float64) # workspace workl = N.zeros(3*ncv*ncv+6*ncv, N.float64) # more workspace - tol = 1e-16 # Desired convergence tollerance - ido = 0 # Communication variable used by ARPACK to tell the user what to do - info = 0 # Used for error reporting - which = 'SM' # Request smallest magnitude eigenvalues, see dnaupd.f for more options - bmat = 'I' # Standard (not generalised) eigenproblem # Storage for the Arnoldi basis vectors - v = N.zeros((n, ncv), dtype=float, order='FORTRAN') - + v = N.zeros((n, ncv), dtype=N.float64, order='FORTRAN') + return (ipntr, d, resid, workd, workl, v) + +def init_debug(): # Causes various debug info to be printed by ARPACK _arpack.debug.ndigit = -3 _arpack.debug.logfil = 6 @@ -54,43 +61,70 @@ _arpack.debug.mneigh = 0 _arpack.debug.mneupd = 1 - - cnt = 0 - # Arnouldi iteration. - while True: - ido,resid,v,iparam,ipntr,info = _arpack.dnaupd( - ido, bmat, which, nev, tol, resid, v, iparam, ipntr, workd, workl, info) - # Exit if reverse communication flag does not request a matrix-vector - # product - if ido not in (-1, 1): break - cnt += 1 - x = workd[ipntr[0]-1:ipntr[0]+n-1] - workd[ipntr[1]-1:ipntr[1]+n-1] = matvec(x) # y = A*x - - - if info != 0: raise "Hell" # Indicates some error during the Arnouldi iterations - - dr = N.zeros(nev+1, N.float64) # Real part of eigenvalues - di = N.zeros(nev+1, N.float64) # Imaginary part of eigenvalues +def init_postproc_workspace(n, nev, ncv): # Used as workspace and to return eigenvectors if requested. Not touched if # eigenvectors are not requested - z = N.zeros((n, nev+1), N.float64, order='FORTRAN') workev = N.zeros(3*ncv, N.float64) # More workspace select = N.zeros(ncv, N.int32) # Used as internal workspace since dneupd # parameter HOWMNY == 'A' + return (workev, select) +def postproc(n, nev, ncv, sigmar, sigmai, bmat, which, + tol, resid, v, iparam, ipntr, workd, workl, info): + workev, select = init_postproc_workspace(n, nev, ncv) + ierr = 0 # Postprocess the Arnouldi vectors to extract eigenvalues/vectors # If dneupd's first paramter is 'True' the eigenvectors are also calculated, # 'False' only the eigenvalues dr,di,z,info = _arpack.dneupd( - True, 'A', select, 0., 0., workev, bmat, which, nev, tol, - resid, v, iparam, ipntr, workd, workl, info) + True, 'A', select, sigmar, sigmai, workev, bmat, which, nev, tol, resid, v, + iparam, ipntr, workd, workl, info) + if N.abs(di[:-1]).max() == 0: dr = dr[:-1] else: dr = dr[:-1] + 1j*di[:-1] return (dr, z[:,:-1]) -def geneigvals(matvec, sigma_solve, n, sigma, nev, ncv=None): + +def ARPACK_eigs(matvec, n, nev, which='SM', ncv=None, tol=1e-14): """ + Calculate eigenvalues for system with matrix-vector product matvec, dimension n + + Arguments + ========= + matvec -- Function that provides matrix-vector product, i.e. matvec(x) -> A*x + n -- Matrix dimension of the problem + nev -- Number of eigenvalues to calculate + which -- Spectrum selection. See details below. Defaults to 'SM' + ncv -- Number of Arnoldi basisvectors to use. If None, default to 2*nev+1 + tol -- Numerical tollerance for Arnouldi iteration convergence. Defaults to 1e-14 + + Spectrum Selection + ================== + which can take one of several values: + + 'LM' -> Request eigenvalues with largest magnitude. + 'SM' -> Request eigenvalues with smallest magnitude. + 'LR' -> Request eigenvalues with largest real part. + 'SR' -> Request eigenvalues with smallest real part. + 'LI' -> Request eigenvalues with largest imaginary part. + 'SI' -> Request eigenvalues with smallest imaginary part. + + Return Values + ============= + (eig_vals, eig_vecs) where eig_vals are the requested eigenvalues and + eig_vecs the corresponding eigenvectors. If all the eigenvalues are real, + eig_vals is a real array but if some eigenvalues are complex it is a + complex array. + + """ + bmat = 'I' # Standard eigenproblem + ncv, resid, iparam, ipntr, v, workd, workl, info = ARPACK_iteration( + matvec, lambda x: x, n, bmat, which, nev, tol, ncv, mode=1) + return postproc(n, nev, ncv, 0., 0., bmat, which, tol, + resid, v, iparam, ipntr, workd, workl, info) + +def ARPACK_gen_eigs(matvec, sigma_solve, n, sigma, nev, which='LR', ncv=None, tol=1e-14): + """ Calculate eigenvalues close to sigma for generalised eigen system Given a system [A]x = k_i*[M]x where [A] and [M] are matrices and k_i are @@ -104,53 +138,73 @@ n -- Matrix dimension of the problem sigma -- Eigenvalue spectral shift real value nev -- Number of eigenvalues to calculate - ncv -- Number of Arnoldi basisvectors to use. If None, default to 2*nev + which -- Spectrum selection. See details below. Defaults to 'LR' + ncv -- Number of Arnoldi basisvectors to use. If None, default to 2*nev+1 + tol -- Numerical tollerance for Arnouldi iteration convergence. Defaults to 1e-14 + Spectrum Shift + ============== + + The spectrum of the orignal system is shifted by sigma. This transforms the + original eigenvalues to be 1/(original_eig-sigma) in the shifted + system. ARPACK then operates on the shifted system, transforming it back to + the original system in a postprocessing step. + + The spectrum shift causes eigenvalues close to sigma to become very large + in the transformed system. This allows quick convergence for these + eigenvalues. This is particularly useful if a system has a number of + trivial zero-eigenvalues that are to be ignored. + + Spectrum Selection + ================== + which can take one of several values: + + 'LM' -> Request spectrum shifted eigenvalues with largest magnitude. + 'SM' -> Request spectrum shifted eigenvalues with smallest magnitude. + 'LR' -> Request spectrum shifted eigenvalues with largest real part. + 'SR' -> Request spectrum shifted eigenvalues with smallest real part. + 'LI' -> Request spectrum shifted eigenvalues with largest imaginary part. + 'SI' -> Request spectrum shifted eigenvalues with smallest imaginary part. + + The effect on the actual system is: + 'LM' -> Eigenvalues closest to sigma on the complex plane + 'LR' -> Eigenvalues with real part > sigma, provided they exist + + Return Values ============= - Real array of nev eigenvalues, or complex array if values are complex + (eig_vals, eig_vecs) where eig_vals are the requested eigenvalues and + eig_vecs the corresponding eigenvectors. If all the eigenvalues are real, + eig_vals is a real array but if some eigenvalues are complex it is a + complex array. The eigenvalues and vectors correspond to the original + system, not the shifted system. The shifted system is only used interally. + """ - if ncv is None: - ncv = min(2*nev, n-1) + bmat = 'G' # Generalised eigenproblem + ncv, resid, iparam, ipntr, v, workd, workl, info = ARPACK_iteration( + matvec, sigma_solve, n, bmat, which, nev, tol, ncv, mode=3) + sigmar = sigma + sigmai = 0. + return postproc(n, nev, ncv, sigmar, sigmai, bmat, which, tol, + resid, v, iparam, ipntr, workd, workl, info) - iparam = N.zeros(11, N.int32) # Array with assorted extra paramters for F77 call +def ARPACK_iteration(matvec, sigma_solve, n, bmat, which, nev, tol, ncv, mode): + ncv, maxitr = check_init(n, nev, ncv) + ipntr, d, resid, workd, workl, v = init_workspaces(n,nev,ncv) + init_debug() ishfts = 1 # Some random arpack parameter - maxitr = n*3 # Maximum number of iterations # Some random arpack parameter (I think it tells ARPACK to solve the - # regular eigenproblem the "standard" way - mode = 3 - iparam[0] = ishfts - iparam[2] = maxitr - iparam[6] = mode - ipntr = N.zeros(14, N.int32) # Pointers into memory structure used by F77 calls - d = N.zeros((ncv, 3), N.float64, order='FORTRAN') # Temp workspace - # Temp workspace/error residuals upon iteration completion - resid = N.zeros(n, N.float64) - - workd = N.zeros(3*n, N.float64) # workspace - workl = N.zeros(3*ncv*ncv+6*ncv, N.float64) # more workspace - tol = 1e-7 # Desired convergence tollerance + # general eigenproblem using shift-invert + iparam = N.zeros(11, N.int32) # Array with assorted extra paramters for F77 call + iparam[[0,2,6]] = ishfts, maxitr, mode ido = 0 # Communication variable used by ARPACK to tell the user what to do info = 0 # Used for error reporting - which = 'LR' # Request largest magnitude eigenvalues, see dnaupd.f for more options - bmat = 'G' # Generalised eigenproblem - # Storage for the Arnoldi basis vectors - v = N.zeros((n, ncv), dtype=float, order='FORTRAN') - # Causes various debug info to be printed by ARPACK - _arpack.debug.ndigit = -3 - _arpack.debug.logfil = 6 - _arpack.debug.mnaitr = 0 - _arpack.debug.mnapps = 0 - _arpack.debug.mnaupd = 1 - _arpack.debug.mnaup2 = 0 - _arpack.debug.mneigh = 0 - _arpack.debug.mneupd = 1 - # Arnouldi iteration. while True: - ido,resid,v,iparam,ipntr,info = _arpack.dnaupd(ido, bmat, which, nev, tol, resid, v, - iparam, ipntr, workd, workl, info) - if ido == -1: # Perform y = inv[A - sigma*M]*M*x + ido,resid,v,iparam,ipntr,info = _arpack.dnaupd( + ido, bmat, which, nev, tol, resid, v, iparam, ipntr, workd, workl, info) + if ido == -1 or ido == 1 and mode not in (3,4): + # Perform y = inv[A - sigma*M]*M*x x = workd[ipntr[0]-1:ipntr[0]+n-1] Mx = matvec(x) # Mx = [M]*x workd[ipntr[1]-1:ipntr[1]+n-1] = sigma_solve(Mx) @@ -163,26 +217,9 @@ workd[ipntr[1]-1:ipntr[1]+n-1] = matvec(x) else: # Finished, or error break - if info != 0: raise "Hell" # Indicates some error during the Arnouldi iterations + if info == 1: + warn.warn("Maximum number of iterations taken: %s"%iparam[2]) + elif info != 0: + raise ArpackException(info) - # Used as workspace and to return eigenvectors if requested. Not touched if - # eigenvectors are not requested - z = N.zeros((n, nev+1), N.float64, order='FORTRAN') - workev = N.zeros(3*ncv, N.float64) # More workspace - ierr = 0 - select = N.zeros(ncv, N.int32) # Used as internal workspace since dneupd - # parameter HOWMNY == 'A' - sigmar = sigma - sigmai = 0. - # Postprocess the Arnouldi vectors to extract eigenvalues/vectors - # If dneupd's first paramter is 'True' the eigenvectors are also calculated, - # 'False' only the eigenvalues - dr,di,z,info = _arpack.dneupd( - True, 'A', select, sigmar, sigmai, workev, bmat, which, nev, tol, resid, v, - iparam, ipntr, workd, workl, info) - - if N.abs(di[:-1]).max() == 0: - return dr[:-1] - else: - return dr[:-1] + 1j*di[:-1] - + return (ncv, resid, iparam, ipntr, v, workd, workl, info) Modified: trunk/Lib/sandbox/arpack/tests/test_speigs.py =================================================================== --- trunk/Lib/sandbox/arpack/tests/test_speigs.py 2006-11-18 02:09:53 UTC (rev 2323) +++ trunk/Lib/sandbox/arpack/tests/test_speigs.py 2006-11-20 20:27:36 UTC (rev 2324) @@ -28,7 +28,7 @@ matvec = get_matvec(A) #= lambda x: N.asarray(A*x)[0] nev=4 - eigvs = eigvals(matvec, A.shape[0], nev=nev) + eigvs = ARPACK_eigs(matvec, A.shape[0], nev=nev) calc_vals = eigvs[0] # Ensure the calculate eigenvectors have the same sign as the refence values calc_vecs = eigvs[1] / [N.sign(x[0]) for x in eigvs[1].T] @@ -40,11 +40,14 @@ # def test(self): # import pickle # import scipy.linsolve -# A,B = pickle.load(file('/tmp/mats.pickle')) +# A,B = pickle.load(file('mats.pickle')) # sigma = 27. # sigma_solve = scipy.linsolve.splu(A - sigma*B).solve -# w = geneigvals(B.matvec, sigma_solve, B.shape[0], sigma, 10) - +# w = ARPACK_gen_eigs(B.matvec, sigma_solve, B.shape[0], sigma, 10)[0] +# assert_array_almost_equal(w, +# [27.346442255386375, 49.100299170945405, 56.508474856551544, 56.835800191692492, +# 65.944215785041365, 66.194792400328367, 78.003788872725238, 79.550811647295944, +# 94.646308846854879, 95.30841709116271], decimal=11) if __name__ == "__main__": ScipyTest().run() From scipy-svn at scipy.org Tue Nov 21 13:34:01 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 21 Nov 2006 12:34:01 -0600 (CST) Subject: [Scipy-svn] r2325 - in trunk/Lib/io: . tests Message-ID: <20061121183401.D3F5039C075@new.scipy.org> Author: matthew.brett at gmail.com Date: 2006-11-21 12:33:56 -0600 (Tue, 21 Nov 2006) New Revision: 2325 Modified: trunk/Lib/io/mio.py trunk/Lib/io/tests/test_mio.py Log: Allow file-like objects to be passed to loadmat and savemat Modified: trunk/Lib/io/mio.py =================================================================== --- trunk/Lib/io/mio.py 2006-11-20 20:27:36 UTC (rev 2324) +++ trunk/Lib/io/mio.py 2006-11-21 18:33:56 UTC (rev 2325) @@ -12,12 +12,12 @@ from mio4 import MatFile4Reader, MatFile4Writer from mio5 import MatFile5Reader, MatFile5Writer +def find_mat_file(file_name, appendmat=True): + ''' Try to find .mat file on system path -def mat_reader_factory(file_name, appendmat=True, **kwargs): - """Create reader for matlab (TM) .mat format files - - See docstring for loadmat for input options - """ + file_name - file name string + append_mat - If True, and file_name does not end in '.mat', appends it + ''' if appendmat and file_name[-4:] == ".mat": file_name = file_name[:-4] if os.sep in file_name: @@ -38,10 +38,25 @@ break except IOError: pass + return full_name + +def mat_reader_factory(file_name, appendmat=True, **kwargs): + """Create reader for matlab (TM) .mat format files + + See docstring for loadmat for input options + """ + if isinstance(file_name, basestring): + full_name = find_mat_file(file_name, appendmat) if full_name is None: raise IOError, "%s not found on the path." % file_name - - byte_stream = open(full_name, 'rb') + byte_stream = open(full_name, 'rb') + else: + try: + file_name.read(0) + except AttributeError: + raise IOError, 'Reader needs file name or open file-like object' + byte_stream = file_name + MR = MatFile4Reader(byte_stream, **kwargs) if MR.format_looks_right(): return MR @@ -55,9 +70,10 @@ If name not a full path name, search for the file on the sys.path list and use the first one found (the current directory is searched first). + Can also pass open file-like object m_dict - optional dictionary in which to insert matfile variables appendmat - True to append the .mat extension to the end of the - given filename. + given filename, if not already present base_name - base name for unnamed variables (unused in code) byte_order - byte order ('native', 'little', 'BIG') in ('native', '=') @@ -92,12 +108,18 @@ @appendmat - if true, appends '.mat' extension to filename, if not present """ - if appendmat and file_name[-4:] != ".mat": - file_name = file_name + ".mat" - file_stream = open(file_name, 'wb') + if isinstance(file_name, basestring): + if appendmat and file_name[-4:] != ".mat": + file_name = file_name + ".mat" + file_stream = open(file_name, 'wb') + else: + try: + file_name.write('') + except AttributeError: + raise IOError, 'Writer needs file name or writeable file-like object' + byte_stream = file_name + MW = MatFile4Writer(file_stream) MW.put_variables(mdict) file_stream.close() -if __name__ == '__main__': - D = savemat('test.mat', {'a': 1}) Modified: trunk/Lib/io/tests/test_mio.py =================================================================== --- trunk/Lib/io/tests/test_mio.py 2006-11-20 20:27:36 UTC (rev 2324) +++ trunk/Lib/io/tests/test_mio.py 2006-11-21 18:33:56 UTC (rev 2325) @@ -2,6 +2,7 @@ import os from glob import glob +from cStringIO import StringIO from tempfile import mkstemp from numpy.testing import set_package_path, restore_path, ScipyTestCase, ScipyTest from numpy.testing import assert_equal, assert_array_almost_equal @@ -88,13 +89,11 @@ # Add the round trip tests dynamically, with given parameters def _make_rt_check_case(name, expected): def cc(self): - (fd, fname) = mkstemp('.mat') - file_stream = os.fdopen(fd, 'wb') - MW = MatFile4Writer(file_stream) + mat_stream = StringIO() + MW = MatFile4Writer(mat_stream) MW.put_variables(expected) - file_stream.close() - self._check_case(name, [fname], expected) - os.remove(fname) + mat_stream.seek(0) + self._check_case(name, [mat_stream], expected) cc.__doc__ = "check loadmat case %s" % name return cc From scipy-svn at scipy.org Tue Nov 21 17:06:56 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 21 Nov 2006 16:06:56 -0600 (CST) Subject: [Scipy-svn] r2326 - trunk/Lib/io Message-ID: <20061121220656.CBBAA39C00F@new.scipy.org> Author: matthew.brett at gmail.com Date: 2006-11-21 16:06:52 -0600 (Tue, 21 Nov 2006) New Revision: 2326 Modified: trunk/Lib/io/mio.py Log: Fix bugs in file stream handling in savemat Modified: trunk/Lib/io/mio.py =================================================================== --- trunk/Lib/io/mio.py 2006-11-21 18:33:56 UTC (rev 2325) +++ trunk/Lib/io/mio.py 2006-11-21 22:06:52 UTC (rev 2326) @@ -108,7 +108,8 @@ @appendmat - if true, appends '.mat' extension to filename, if not present """ - if isinstance(file_name, basestring): + file_is_string = isinstance(file_name, basestring) + if file_is_string: if appendmat and file_name[-4:] != ".mat": file_name = file_name + ".mat" file_stream = open(file_name, 'wb') @@ -117,9 +118,10 @@ file_name.write('') except AttributeError: raise IOError, 'Writer needs file name or writeable file-like object' - byte_stream = file_name + file_stream = file_name MW = MatFile4Writer(file_stream) MW.put_variables(mdict) - file_stream.close() + if file_is_string: + file_stream.close() From scipy-svn at scipy.org Tue Nov 21 17:08:16 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 21 Nov 2006 16:08:16 -0600 (CST) Subject: [Scipy-svn] r2327 - trunk/Lib/io/tests Message-ID: <20061121220816.B8BF739C08F@new.scipy.org> Author: matthew.brett at gmail.com Date: 2006-11-21 16:08:13 -0600 (Tue, 21 Nov 2006) New Revision: 2327 Modified: trunk/Lib/io/tests/test_mio.py Log: Use savemat for round trip tests Modified: trunk/Lib/io/tests/test_mio.py =================================================================== --- trunk/Lib/io/tests/test_mio.py 2006-11-21 22:06:52 UTC (rev 2326) +++ trunk/Lib/io/tests/test_mio.py 2006-11-21 22:08:13 UTC (rev 2327) @@ -90,8 +90,7 @@ def _make_rt_check_case(name, expected): def cc(self): mat_stream = StringIO() - MW = MatFile4Writer(mat_stream) - MW.put_variables(expected) + savemat(mat_stream, expected) mat_stream.seek(0) self._check_case(name, [mat_stream], expected) cc.__doc__ = "check loadmat case %s" % name From scipy-svn at scipy.org Wed Nov 22 13:31:08 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 22 Nov 2006 12:31:08 -0600 (CST) Subject: [Scipy-svn] r2328 - trunk/Lib/sandbox/montecarlo Message-ID: <20061122183108.C2A7E39C282@new.scipy.org> Author: edschofield Date: 2006-11-22 12:31:04 -0600 (Wed, 22 Nov 2006) New Revision: 2328 Modified: trunk/Lib/sandbox/montecarlo/setup.py Log: Added test for existence of randomkit.{ch} in montecarlo/setup.py. Fixes bug #293. Modified: trunk/Lib/sandbox/montecarlo/setup.py =================================================================== --- trunk/Lib/sandbox/montecarlo/setup.py 2006-11-21 22:08:13 UTC (rev 2327) +++ trunk/Lib/sandbox/montecarlo/setup.py 2006-11-22 18:31:04 UTC (rev 2328) @@ -1,15 +1,15 @@ import numpy from numpy.distutils.core import setup from numpy.distutils.misc_util import Configuration -from os.path import join, dirname +from os.path import join, dirname, exists def configuration(parent_package='', top_path=None): config = Configuration('montecarlo', parent_package, top_path) - - # This code requires 'randomkit.c' and 'randomkit.h' to have been copied - # to (or symlinked to) montecarlo/src/. - + + if not (exists('src/randomkit.c') and exists('src/randomkit.h')): + raise OSError, "Please copy or symlink randomkit.c and randomkit.h to montecarlo/src/ from numpy/random/mtrand/ in the NumPy source tree!" + config.add_extension('_intsampler', sources = [join('src', f) for f in ['_intsamplermodule.c', 'compact5table.c', 'randomkit.c']]) From scipy-svn at scipy.org Wed Nov 22 14:01:02 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 22 Nov 2006 13:01:02 -0600 (CST) Subject: [Scipy-svn] r2329 - trunk/Lib/sandbox/montecarlo Message-ID: <20061122190102.AADD639C298@new.scipy.org> Author: edschofield Date: 2006-11-22 13:00:56 -0600 (Wed, 22 Nov 2006) New Revision: 2329 Modified: trunk/Lib/sandbox/montecarlo/setup.py Log: Improved robustness of path selection in montecarlo/setup.py Modified: trunk/Lib/sandbox/montecarlo/setup.py =================================================================== --- trunk/Lib/sandbox/montecarlo/setup.py 2006-11-22 18:31:04 UTC (rev 2328) +++ trunk/Lib/sandbox/montecarlo/setup.py 2006-11-22 19:00:56 UTC (rev 2329) @@ -7,7 +7,7 @@ config = Configuration('montecarlo', parent_package, top_path) - if not (exists('src/randomkit.c') and exists('src/randomkit.h')): + if not (exists(join(config.package_path, 'src', 'randomkit.c')) and exists(join(config.package_path, 'src', 'randomkit.h'))): raise OSError, "Please copy or symlink randomkit.c and randomkit.h to montecarlo/src/ from numpy/random/mtrand/ in the NumPy source tree!" config.add_extension('_intsampler', From scipy-svn at scipy.org Sun Nov 26 00:06:03 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 25 Nov 2006 23:06:03 -0600 (CST) Subject: [Scipy-svn] r2330 - in trunk/Lib: . sandbox Message-ID: <20061126050603.6E03939C079@new.scipy.org> Author: rkern Date: 2006-11-25 23:06:00 -0600 (Sat, 25 Nov 2006) New Revision: 2330 Added: trunk/Lib/odr/ Removed: trunk/Lib/sandbox/odr/ Modified: trunk/Lib/setup.py Log: Move the odr package out of the sandbox and into the main package due to requests. Copied: trunk/Lib/odr (from rev 2329, trunk/Lib/sandbox/odr) Modified: trunk/Lib/setup.py =================================================================== --- trunk/Lib/setup.py 2006-11-22 19:00:56 UTC (rev 2329) +++ trunk/Lib/setup.py 2006-11-26 05:06:00 UTC (rev 2330) @@ -12,7 +12,7 @@ config.add_subpackage('linsolve') config.add_subpackage('maxentropy') config.add_subpackage('misc') - #config.add_subpackage('montecarlo') + config.add_subpackage('odr') config.add_subpackage('optimize') config.add_subpackage('sandbox') config.add_subpackage('signal') From scipy-svn at scipy.org Sun Nov 26 00:13:44 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 25 Nov 2006 23:13:44 -0600 (CST) Subject: [Scipy-svn] r2331 - trunk Message-ID: <20061126051344.0548F39C09B@new.scipy.org> Author: rkern Date: 2006-11-25 23:13:41 -0600 (Sat, 25 Nov 2006) New Revision: 2331 Modified: trunk/MANIFEST.in Log: Update the manifest for source distributions. Modified: trunk/MANIFEST.in =================================================================== --- trunk/MANIFEST.in 2006-11-26 05:06:00 UTC (rev 2330) +++ trunk/MANIFEST.in 2006-11-26 05:13:41 UTC (rev 2331) @@ -11,6 +11,14 @@ # include Lib/sandbox/ann/* include Lib/sandbox/ann/data/* +include Lib/sandbox/ann/doc/* +include Lib/sandbox/arpack/* +include Lib/sandbox/arpack/ARPACK/* +include Lib/sandbox/arpack/ARPACK/LAPACK/* +include Lib/sandbox/arpack/ARPACK/SRC/* +include Lib/sandbox/arpack/ARPACK/UTIL/* +include Lib/sandbox/arpack/build/* +include Lib/sandbox/arpack/tests/* include Lib/sandbox/arraysetops/* include Lib/sandbox/arraysetops/tests/* include Lib/sandbox/buildgrid/* @@ -40,9 +48,12 @@ include Lib/sandbox/newoptimize/* include Lib/sandbox/numexpr/* include Lib/sandbox/numexpr/tests/* -include Lib/sandbox/odr/* -include Lib/sandbox/odr/odrpack/* +include Lib/sandbox/oliphant/* include Lib/sandbox/plt/* +include Lib/sandbox/pyem/* +include Lib/sandbox/pyem/profile_data/* +include Lib/sandbox/pyem/src/* +include Lib/sandbox/pyem/tests/* include Lib/sandbox/pysparse/* include Lib/sandbox/pysparse/Tools/* include Lib/sandbox/pysparse/amd/* @@ -57,6 +68,9 @@ include Lib/sandbox/pysparse/tests/* include Lib/sandbox/pysparse/umfpack/* include Lib/sandbox/rkern/* +include Lib/sandbox/spline/* +include Lib/sandbox/spline/fitpack/* +include Lib/sandbox/spline/tests/* include Lib/sandbox/stats/* include Lib/sandbox/svm/* include Lib/sandbox/svm/libsvm-2.82/* From scipy-svn at scipy.org Tue Nov 28 03:05:13 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 28 Nov 2006 02:05:13 -0600 (CST) Subject: [Scipy-svn] r2332 - in trunk/Lib/sandbox: . cdavid cdavid/src cdavid/tests Message-ID: <20061128080513.5E3A139C193@new.scipy.org> Author: cdavid Date: 2006-11-28 02:04:47 -0600 (Tue, 28 Nov 2006) New Revision: 2332 Added: trunk/Lib/sandbox/cdavid/ trunk/Lib/sandbox/cdavid/Changelog trunk/Lib/sandbox/cdavid/TODO trunk/Lib/sandbox/cdavid/__init__.py trunk/Lib/sandbox/cdavid/autocorr.py trunk/Lib/sandbox/cdavid/info.py trunk/Lib/sandbox/cdavid/lpc.py trunk/Lib/sandbox/cdavid/setup.py trunk/Lib/sandbox/cdavid/src/ trunk/Lib/sandbox/cdavid/src/Makefile trunk/Lib/sandbox/cdavid/src/README trunk/Lib/sandbox/cdavid/src/autocorr_nofft.c trunk/Lib/sandbox/cdavid/src/autocorr_nofft.def trunk/Lib/sandbox/cdavid/src/autocorr_nofft.h trunk/Lib/sandbox/cdavid/src/autocorr_nofft.tpl trunk/Lib/sandbox/cdavid/src/common.h trunk/Lib/sandbox/cdavid/src/levinson.c trunk/Lib/sandbox/cdavid/src/levinson.def trunk/Lib/sandbox/cdavid/src/levinson.h trunk/Lib/sandbox/cdavid/src/levinson.tpl trunk/Lib/sandbox/cdavid/src/lpc.c trunk/Lib/sandbox/cdavid/src/lpc.def trunk/Lib/sandbox/cdavid/src/lpc.h trunk/Lib/sandbox/cdavid/src/lpc.tpl trunk/Lib/sandbox/cdavid/tests/ trunk/Lib/sandbox/cdavid/tests/test_autocorr.py trunk/Lib/sandbox/cdavid/tests/test_lpc.py Modified: trunk/Lib/sandbox/setup.py Log: Intial commit of cdavid, for lpc Added: trunk/Lib/sandbox/cdavid/Changelog =================================================================== --- trunk/Lib/sandbox/cdavid/Changelog 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/Changelog 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,5 @@ +pyem (0.1) Tue, 28 Nov 2006 16:56:35 +0900 + + * first release + +-- David Cournapeau Added: trunk/Lib/sandbox/cdavid/TODO =================================================================== --- trunk/Lib/sandbox/cdavid/TODO 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/TODO 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,6 @@ +# Last Change: Tue Nov 28 05:00 PM 2006 J + + - there is no doc. + - the handling of non contiguous arrays is not really + elegant, and the code is difficult to maintain + - rank > 2: must code in C ? (yuk) Added: trunk/Lib/sandbox/cdavid/__init__.py =================================================================== --- trunk/Lib/sandbox/cdavid/__init__.py 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/__init__.py 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,9 @@ +# Last Change: Tue Nov 28 04:00 PM 2006 J +from info import __doc__ + +from lpc import lpc2 as lpc +from autocorr import autocorr_oneside_nofft as autocorr + +from numpy.testing import NumpyTest +test = NumpyTest().test + Added: trunk/Lib/sandbox/cdavid/autocorr.py =================================================================== --- trunk/Lib/sandbox/cdavid/autocorr.py 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/autocorr.py 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,322 @@ +#! /usr/bin/env python +# Last Change: Tue Nov 28 03:00 PM 2006 J + +# TODO: - proper test +# TODO: - proper profiling + +from numpy.fft import fft, ifft +from numpy import correlate, log2, floor, conj, real, \ + concatenate, sum, max + +from warnings import warn + +# use ctype to have one sided c imp of autocorr +import ctypes +from ctypes import c_uint, c_int +from numpy.ctypeslib import ndpointer, load_library + +ctypes_major = int(ctypes.__version__.split('.')[0]) +if ctypes_major < 1: + msg = "version of ctypes is %s, expected at least %s" \ + % (ctypes.__version__, '1.0.1') + raise importerror(msg) + +import numpy as N + +# load autocorr lib +_autocorr = load_library('gabsig.so', __file__) + +#=============================== +# define the functions with args +#=============================== + +# contiguous 1d +arg1 = ndpointer(dtype = N.float64, flags='CONTIGUOUS,ALIGNED') +arg2 = c_uint +arg3 = ndpointer(dtype = N.float64, flags='CONTIGUOUS,ALIGNED') +arg4 = c_uint +_autocorr.dbl_xcorr_nofft_1d.argtypes = [arg1, arg2, arg3, arg4] +_autocorr.dbl_xcorr_nofft_1d.restype = c_int + +arg1 = ndpointer(dtype = N.float32, flags='CONTIGUOUS,ALIGNED') +arg2 = c_uint +arg3 = ndpointer(dtype = N.float32, flags='CONTIGUOUS,ALIGNED') +arg4 = c_uint +_autocorr.flt_xcorr_nofft_1d.argtypes = [arg1, arg2, arg3, arg4] +_autocorr.flt_xcorr_nofft_1d.restype = c_int + +# non contiguous 1d +arg1 = ndpointer(dtype = N.float64, flags = 'ALIGNED') +arg2 = c_uint +arg3 = c_uint +arg4 = ndpointer(dtype = N.float64, flags = 'ALIGNED') +arg5 = c_uint +arg6 = c_uint +_autocorr.dbl_xcorr_nofft_1d_noncontiguous.argtypes = [arg1, \ + arg2, arg3, arg4, arg5, arg6] +_autocorr.dbl_xcorr_nofft_1d_noncontiguous.restype = c_int + +arg1 = ndpointer(dtype = N.float32, flags = 'ALIGNED') +arg2 = c_uint +arg3 = c_uint +arg4 = ndpointer(dtype = N.float32, flags = 'ALIGNED') +arg5 = c_uint +arg6 = c_uint +_autocorr.flt_xcorr_nofft_1d_noncontiguous.argtypes = [arg1, \ + arg2, arg3, arg4, arg5, arg6] +_autocorr.flt_xcorr_nofft_1d_noncontiguous.restype = c_int + +# contiguous 2d +arg1 = ndpointer(dtype = N.float64, flags='ALIGNED') +arg2 = c_uint +arg3 = c_uint +arg4 = ndpointer(dtype = N.float64, flags='ALIGNED') +arg5 = c_uint +_autocorr.dbl_xcorr_nofft_2d.argtypes = [arg1, arg2, arg3, arg4, arg5] +_autocorr.dbl_xcorr_nofft_2d.restype = c_int + +arg1 = ndpointer(dtype = N.float32, flags='ALIGNED') +arg2 = c_uint +arg3 = c_uint +arg4 = ndpointer(dtype = N.float32, flags='ALIGNED') +arg5 = c_uint +_autocorr.flt_xcorr_nofft_2d.argtypes = [arg1, arg2, arg3, arg4, arg5] +_autocorr.flt_xcorr_nofft_2d.restype = c_int + +# non contiguous 2d +arg1 = ndpointer(dtype = N.float64, flags='ALIGNED') +arg2 = c_uint +arg3 = c_uint +arg4 = c_uint +arg5 = c_uint +arg6 = ndpointer(dtype = N.float64, flags='ALIGNED') +arg7 = c_uint +arg8 = c_uint +arg9 = c_uint +_autocorr.dbl_xcorr_nofft_2d_noncontiguous.argtypes = [arg1, arg2, \ + arg3, arg4, arg5, arg6, arg7, arg8, arg9] +_autocorr.dbl_xcorr_nofft_2d_noncontiguous.restype = c_int + +arg1 = ndpointer(dtype = N.float32, flags='ALIGNED') +arg2 = c_uint +arg3 = c_uint +arg4 = c_uint +arg5 = c_uint +arg6 = ndpointer(dtype = N.float32, flags='ALIGNED') +arg7 = c_uint +arg8 = c_uint +arg9 = c_uint +_autocorr.flt_xcorr_nofft_2d_noncontiguous.argtypes = [arg1, arg2, \ + arg3, arg4, arg5, arg6, arg7, arg8, arg9] +_autocorr.flt_xcorr_nofft_2d_noncontiguous.restype = c_int + +#====================================== +# Fonctions to be used for testing only +#====================================== +def _raw_autocorr_1d(signal, lag): + assert signal.ndim == 1 + assert signal.flags['CONTIGUOUS'] + + if lag >= signal.size: + raise RuntimeError("lag should be < to input size") + + if signal.dtype == N.float64: + res = N.zeros((lag+1), N.float64) + _autocorr.dbl_xcorr_nofft_1d(signal, signal.size, res, lag) + elif signal.dtype == N.float32: + res = N.zeros((lag+1), N.float32) + _autocorr.flt_xcorr_nofft_1d(signal, signal.size, res, lag) + else: + raise TypeError("only float 32 and 64 bits supported for now") + + return res + +def _raw_autocorr_1d_noncontiguous(signal, lag): + assert signal.ndim == 1 + + if lag >= signal.size: + raise RuntimeError("lag should be < to input size") + + if signal.dtype == N.float64: + res = N.zeros((lag+1), N.float64) + _autocorr.dbl_xcorr_nofft_1d_noncontiguous(signal, signal.size, + signal.strides[0], res, res.strides[0], lag) + elif signal.dtype == N.float32: + res = N.zeros((lag+1), N.float32) + _autocorr.flt_xcorr_nofft_1d_noncontiguous(signal, signal.size, + signal.strides[0], res, res.strides[0], lag) + else: + raise TypeError("only float 32 and 64 bits supported for now") + + return res + +# python implementation of autocorr for rank <= 2 +def _autocorr_oneside_nofft_py(signal, lag, axis = -1): + if signal.ndim > 2: + raise NotImplemented("only for rank <=2") + + if axis % 2 == 0: + res = N.zeros((lag+1, signal.shape[1]), signal.dtype) + center = signal.shape[0] - 1 + for i in range(signal.shape[1]): + #print "compute corr of " + str(signal[:, i]) + res[:, i] = correlate(signal[:, i], signal[:, i], \ + 'full')[center:center+lag+1] + elif axis % 2 == 1: + res = N.zeros((signal.shape[0], lag+1), signal.dtype) + center = signal.shape[1] - 1 + for i in range(signal.shape[0]): + #print "compute corr of " + str(signal[i]) + res[i] = correlate(signal[i], signal[i], \ + 'full')[center:center+lag+1] + else: + raise RuntimeError("this should bnot happen, please fill a bug") + + return res + +#============= +# Public API +#============= +def autocorr_oneside_nofft(signal, lag, axis = -1): + """Compute the righ side of autocorrelation along the axis, for lags up to lag. + + This implementation does NOT use FFT.""" + # TODO For rank < 2, the overhead of python code may be significant. Should + # TODO not be difficult to do in C anyway (we can still use ctypes) + + # rank 0, 1 + if signal.ndim < 2: + size = signal.shape[-1] + if lag >= size: + raise RuntimeError("lag should be < to input size") + + res = N.zeros((lag+1), signal.dtype) + + if signal.flags['CONTIGUOUS']: + if signal.dtype == N.float64: + _autocorr.dbl_xcorr_nofft_1d(signal, size, res, lag) + elif signal.dtype == N.float32: + _autocorr.flt_xcorr_nofft_1d(signal, size, res, lag) + else: + raise TypeError("only float 32 and 64 bits supported for now") + else: + istride = signal.strides[0] + ostride = signal.itemsize + if signal.dtype == N.float64: + _autocorr.dbl_xcorr_nofft_1d_noncontiguous(signal, size, istride, + res, ostride, lag) + elif signal.dtype == N.float32: + _autocorr.flt_xcorr_nofft_1d_noncontiguous(signal, size, istride, + res, ostride, lag) + else: + raise TypeError("only float 32 and 64 bits supported for now") + + # rank 2 case + elif signal.ndim == 2: + size = signal.shape[axis] + if lag >= size: + raise RuntimeError("lag should be < to input size") + res = N.zeros((signal.shape[0], lag+1), signal.dtype) + else: + res = N.zeros((lag+1, signal.shape[1]), signal.dtype) + + if signal.dtype == N.float64: + # contiguous case + if signal.flags['C'] and axis % 2 == 1: + res = N.zeros((signal.shape[0], lag+1), signal.dtype) + _autocorr.dbl_xcorr_nofft_2d(signal, signal.shape[0], signal.shape[1], + res, lag) + # contiguous case + elif signal.flags['F'] and axis % 2 == 0: + res = N.zeros((lag+1, signal.shape[1]), signal.dtype, order = 'F') + _autocorr.dbl_xcorr_nofft_2d(signal, signal.shape[1], signal.shape[0], + res, lag) + # non contiguous case + elif axis % 2 == 0: + res = N.zeros((lag+1, signal.shape[1]), signal.dtype) + warn("non contiguous used, this will be slow") + _autocorr.dbl_xcorr_nofft_2d_noncontiguous(signal, + signal.shape[1], signal.shape[0], + signal.strides[1], signal.strides[0], + res, res.strides[1], res.strides[0], lag) + elif axis % 2 == 1: + res = N.zeros((signal.shape[0], lag+1), signal.dtype) + warn("non contiguous used, this will be slow") + _autocorr.dbl_xcorr_nofft_2d_noncontiguous(signal, + signal.shape[0], signal.shape[1], + signal.strides[0], signal.strides[1], + res, res.strides[0], res.strides[1], lag) + elif signal.dtype == N.float32: + # contiguous case + if signal.flags['C'] and axis % 2 == 1: + res = N.zeros((signal.shape[0], lag+1), signal.dtype) + _autocorr.flt_xcorr_nofft_2d(signal, signal.shape[0], signal.shape[1], + res, lag) + # contiguous case + elif signal.flags['F'] and axis % 2 == 0: + res = N.zeros((lag+1, signal.shape[1]), signal.dtype, order = 'F') + _autocorr.flt_xcorr_nofft_2d(signal, signal.shape[1], signal.shape[0], + res, lag) + # non contiguous case + elif axis % 2 == 0: + res = N.zeros((lag+1, signal.shape[1]), signal.dtype) + warn("non contiguous used, this will be slow") + _autocorr.flt_xcorr_nofft_2d_noncontiguous(signal, + signal.shape[1], signal.shape[0], + signal.strides[1], signal.strides[0], + res, res.strides[1], res.strides[0], lag) + elif axis % 2 == 1: + res = N.zeros((signal.shape[0], lag+1), signal.dtype) + warn("non contiguous used, this will be slow") + _autocorr.flt_xcorr_nofft_2d_noncontiguous(signal, + signal.shape[0], signal.shape[1], + signal.strides[0], signal.strides[1], + res, res.strides[0], res.strides[1], lag) + else: + raise TypeError("only float 32 and 64 bits supported for now") + else: + raise RuntimeError("rank > 2 not supported yet") + + return res + +def bench(): + size = 256 + nframes = 4000 + lag = 24 + + X = N.random.randn(nframes, size) + X = N.require(X, requirements = 'C') + + niter = 10 + + # Contiguous + print "Running optimized with ctypes" + def contig(*args, **kargs): + return autocorr_oneside_nofft(*args, **kargs) + for i in range(niter): + Yt = contig(X, lag, axis = 1) + + Yr = _autocorr_oneside_nofft_py(X, lag, axis = 1) + N.testing.assert_array_almost_equal(Yt, Yr, 10) + + # Non contiguous + print "Running optimized with ctypes (non contiguous)" + def ncontig(*args, **kargs): + return autocorr_oneside_nofft(*args, **kargs) + X = N.require(X, requirements = 'F') + for i in range(niter): + Yt = ncontig(X, lag, axis = 1) + + Yr = _autocorr_oneside_nofft_py(X, lag, axis = 1) + N.testing.assert_array_almost_equal(Yt, Yr, 10) + + print "Benchmark func done" + +if __name__ == '__main__': + import hotshot, hotshot.stats + profile_file = 'autocorr.prof' + prof = hotshot.Profile(profile_file, lineevents=1) + prof.runcall(bench) + p = hotshot.stats.load(profile_file) + print p.sort_stats('cumulative').print_stats(20) + prof.close() Added: trunk/Lib/sandbox/cdavid/info.py =================================================================== --- trunk/Lib/sandbox/cdavid/info.py 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/info.py 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,10 @@ +""" +LPC Routines for speech processing + +Copyright: David Cournapeau 2006 +License: BSD-style (see LICENSE.txt in main source directory) +""" +version = '0.1' + +depends = ['linalg'] +ignore = False Added: trunk/Lib/sandbox/cdavid/lpc.py =================================================================== --- trunk/Lib/sandbox/cdavid/lpc.py 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/lpc.py 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,249 @@ +#! /usr/bin/env python + +# Last Change: Tue Nov 28 03:00 PM 2006 J + +# TODO: make lpc work for any rank +from warnings import warn + +import numpy as _N +import scipy.signal as _sig + +from scipy.linalg import toeplitz, inv + +from autocorr import autocorr_oneside_nofft + +# use ctype to have fast lpc computation +import ctypes +from ctypes import c_uint, c_int +from numpy.ctypeslib import ndpointer, load_library +ctypes_major = int(ctypes.__version__.split('.')[0]) +if ctypes_major < 1: + msg = "version of ctypes is %s, expected at least %s" \ + % (ctypes.__version__, '1.0.1') + raise importerror(msg) + +# load lpc +_lpc = load_library('gabsig.so', __file__) + +#=============================== +# define the functions with args +#=============================== +arg1 = ndpointer(dtype = _N.float64, flags = 'C') +arg2 = c_uint +arg3 = c_uint +arg4 = ndpointer(dtype = _N.float64, flags = 'C') +arg5 = ndpointer(dtype = _N.float64, flags = 'C') +arg6 = ndpointer(dtype = _N.float64, flags = 'C') + +_lpc.dbl_lpc.argtypes = [arg1, arg2, arg3, arg4, arg5, arg6] +_lpc.dbl_lpc.restype = c_int + +arg1 = ndpointer(dtype = _N.float32, flags = 'C') +arg2 = c_uint +arg3 = c_uint +arg4 = ndpointer(dtype = _N.float32, flags = 'C') +arg5 = ndpointer(dtype = _N.float32, flags = 'C') +arg6 = ndpointer(dtype = _N.float32, flags = 'C') + +_lpc.flt_lpc.argtypes = [arg1, arg2, arg3, arg4, arg5, arg6] +_lpc.flt_lpc.restype = c_int + +arg1 = ndpointer(dtype = _N.float64, flags = 'C') +arg2 = c_uint +arg3 = c_uint +arg4 = ndpointer(dtype = _N.float64, flags = 'C') +arg5 = ndpointer(dtype = _N.float64, flags = 'C') +arg6 = ndpointer(dtype = _N.float64, flags = 'C') + +_lpc.dbl_levinson2d.argtypes = [arg1, arg2, arg3, arg4, arg5, arg6] +_lpc.dbl_levinson2d.restype = c_int + +arg1 = ndpointer(dtype = _N.float32, flags = 'C') +arg2 = c_uint +arg3 = c_uint +arg4 = ndpointer(dtype = _N.float32, flags = 'C') +arg5 = ndpointer(dtype = _N.float32, flags = 'C') +arg6 = ndpointer(dtype = _N.float32, flags = 'C') + +_lpc.flt_levinson2d.argtypes = [arg1, arg2, arg3, arg4, arg5, arg6] +_lpc.flt_levinson2d.restype = c_int + +def lpc_ref(signal, order): + """ Return the order + 1 LPC coefficients + for the signal. This is just for reference, as it is using + the direct inversion of the toeplitz matrix, which is really slow""" + if signal.ndim > 1: + print "Warning, not tested for rank > 1" + p = order + 1 + r = autocorr_oneside_nofft(signal, order) / signal.shape[0] + return _N.concatenate(([1.], _N.dot(inv(toeplitz(r[:-1])), -r[1:]))) + +def lpc(signal, order): + """ Return the order + 1 LPC coefficients + for the signal using levinson durbin algorithm """ + if signal.ndim > 1: + warn("Warning, not tested for rank > 1") + if signal.size <= order: + raise RuntimeError("size is smaller than order !") + + if signal.dtype == _N.float64: + coeff = _N.zeros((order+1), _N.float64) + kcoeff = _N.zeros((order), _N.float64) + err = _N.zeros((1), _N.float64) + st = _lpc.dbl_lpc(signal, signal.size, order, coeff, kcoeff, err) + elif signal.dtype == _N.float32: + coeff = _N.zeros((order+1), _N.float32) + kcoeff = _N.zeros((order), _N.float32) + err = _N.zeros((1), _N.float32) + st = _lpc.flt_lpc(signal, signal.size, order, coeff, kcoeff, err) + else: + raise TypeError("Sorry, only float32 and float64 supported") + if not (st == 0): + raise RuntimeError("Error while using levinson algo, returns err is %d", st) + return coeff, err, kcoeff + +def lpcres_ref(signal, order, axis = 0): + return _sig.lfilter(lpc_ref(signal, order), 1., signal, axis = 0) + +def lpcres(signal, order, axis = 0): + return _sig.lfilter(lpc(signal, order)[0], 1., signal, axis = 0) + +def _lpc2_py(signal, order, axis = -1): + """python implementation of lpc for rank 2., Do not use, for testing purpose only""" + if signal.ndim > 2: + raise NotImplemented("only for rank <=2") + + if signal.ndim < 2: + return lpc(_N.require(signal, requirements = 'C'), order) + + # For each array of direction axis, compute levinson durbin + if axis % 2 == 0: + # Prepare output arrays + coeff = _N.zeros((order+1, signal.shape[1]), signal.dtype) + kcoeff = _N.zeros((order, signal.shape[1]), signal.dtype) + err = _N.zeros(signal.shape[1], signal.dtype) + for i in range(signal.shape[1]): + coeff[:, i], err[i], kcoeff[:, i] = \ + lpc(_N.require(signal[:, i], requirements = 'C'), order) + elif axis % 2 == 1: + # Prepare output arrays + coeff = _N.zeros((signal.shape[0], order+1), signal.dtype) + kcoeff = _N.zeros((signal.shape[0], order), signal.dtype) + err = _N.zeros(signal.shape[0], signal.dtype) + for i in range(signal.shape[0]): + coeff[i], err[i], kcoeff[i] = \ + lpc(_N.require(signal[i], requirements = 'C'), order) + else: + raise RuntimeError("this should not happen, please fill a bug") + + return coeff, err, kcoeff + +def lpc2(signal, order, axis = -1): + """ Returns ar coeff, err and k coeff""" + sz = signal.shape[axis] + if order >= sz: + raise RuntimeError("order should be strictly smaller than the length of signal") + + # rank 1 + if signal.ndim < 2: + if signal.dtype == _N.float64: + coeff = _N.zeros((order+1), _N.float64) + kcoeff = _N.zeros((order), _N.float64) + err = _N.zeros((1), _N.float64) + st = _lpc.dbl_lpc(signal, signal.size, order, coeff, kcoeff, err) + elif signal.dtype == _N.float32: + coeff = _N.zeros((order+1), _N.float32) + kcoeff = _N.zeros((order), _N.float32) + err = _N.zeros((1), _N.float32) + st = _lpc.flt_lpc(signal, signal.size, order, coeff, kcoeff, err) + else: + raise TypeError("Sorry, only float32 and float64 supported") + + if not (st == 0): + raise RuntimeError("Error while using levinson algo, returns err is %d", st) + + return coeff, err, kcoeff + # rank 2 + elif signal.ndim == 2: + # Compute biased autocorrelation up to lag = order + bias = 1. / sz + corr = bias * autocorr_oneside_nofft(signal, order, axis) + + if axis % 2 == 0: + # we transpose to have a major row order + icorr = corr.T + icorr = _N.require(icorr, requirements = 'C') + at = _N.zeros((icorr.shape[0], order+1), icorr.dtype) + kt = _N.zeros((icorr.shape[0], order), icorr.dtype) + et = _N.zeros(icorr.shape[0], icorr.dtype) + + if icorr.dtype == _N.float64: + _lpc.dbl_levinson2d(icorr, icorr.shape[0], icorr.shape[1], at, et, kt) + elif icorr.dtype == _N.float32: + _lpc.flt_levinson2d(icorr, icorr.shape[0], icorr.shape[1], at, et, kt) + else: + raise TypeError("Only float32 and float64 supported") + + return at.T, et.T, kt.T + elif axis % 2 == 1: + icorr = _N.require(corr, requirements = 'C') + at = _N.zeros((icorr.shape[0], order+1), icorr.dtype) + kt = _N.zeros((icorr.shape[0], order), icorr.dtype) + et = _N.zeros(icorr.shape[0], icorr.dtype) + + if icorr.dtype == _N.float64: + _lpc.dbl_levinson2d(icorr, icorr.shape[0], icorr.shape[1], at, et, kt) + elif icorr.dtype == _N.float32: + _lpc.flt_levinson2d(icorr, icorr.shape[0], icorr.shape[1], at, et, kt) + else: + raise TypeError("Only float32 and float64 supported") + + return at, et, kt + else: + raise RuntimeError("This should not happen, this is a bug") + else: + raise RuntimeError("Sorry, only rank <= 2 supported for now") + +def bench(): + size = 256 + nframes = 4000 + order = 24 + + X = _N.random.randn(nframes, size) + X = _N.require(X, requirements = 'C') + + niter = 10 + + # Contiguous + print "Running optimized with ctypes" + for i in range(niter): + at, et, kt = lpc2(X, order, axis = 1) + + a, e, k = _lpc2_py(X, order, axis = 1) + _N.testing.assert_array_almost_equal(a, at, 10) + _N.testing.assert_array_almost_equal(e, et, 10) + _N.testing.assert_array_almost_equal(k, kt, 10) + + ## Non contiguous + #print "Running optimized with ctypes (non contiguous)" + #def ncontig(*args, **kargs): + # return lpc2(*args, **kargs) + #X = _N.require(X, requirements = 'F') + #for i in range(niter): + # at, et, kt = ncontig(X, order, axis = 1) + + #a, e, k = _lpc2_py(X, order, axis = 1) + #_N.testing.assert_array_almost_equal(a, at, 10) + #_N.testing.assert_array_almost_equal(e, et, 10) + #_N.testing.assert_array_almost_equal(k, kt, 10) + + print "Benchmark func done" + +if __name__ == '__main__': + import hotshot, hotshot.stats + profile_file = 'lpc.prof' + prof = hotshot.Profile(profile_file, lineevents=1) + prof.runcall(bench) + p = hotshot.stats.load(profile_file) + print p.sort_stats('cumulative').print_stats(20) + prof.close() Added: trunk/Lib/sandbox/cdavid/setup.py =================================================================== --- trunk/Lib/sandbox/cdavid/setup.py 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/setup.py 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,41 @@ +#! /usr/bin/env python +# Last Change: Tue Nov 28 03:00 PM 2006 J + +""" toolbox of Cournapeau David. Implements various things such as +autocorrelation with lag argument, lpc coefficients computation. + +2006, David Cournapeau + +LICENSE: the license of pyaudio is the same than scipy""" + +from os.path import join +import os + +from info import version as cdavid_version + +DISTNAME = 'cdavid' +VERSION = cdavid_version +DESCRIPTION ='A scipy package for various speech processing tools lpc' +MAINTAINER ='David Cournapeau', +MAINTAINER_EMAIL='david at ar.media.kyoto-u.ac.jp', +URL ='http://ar.media.kyoto-u.ac.jp/members/david', +LICENSE = 'BSD' + +def configuration(parent_package='',top_path=None, package_name=DISTNAME): + from numpy.distutils.misc_util import Configuration + config = Configuration(package_name,parent_package,top_path, + version = VERSION) + config.add_data_dir('tests') + #config.add_data_dir('profile_data') + config.add_extension('gabsig', + #define_macros=[('LIBSVM_EXPORTS', None), + # ('LIBSVM_DLL', None)], + sources=[join('src', 'levinson.c'), + join('src', 'autocorr_nofft.c'), + join('src', 'lpc.c')]) + + return config + +if __name__ == "__main__": + from numpy.distutils.core import setup + setup(configuration=configuration) Added: trunk/Lib/sandbox/cdavid/src/Makefile =================================================================== --- trunk/Lib/sandbox/cdavid/src/Makefile 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/src/Makefile 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,64 @@ +CC = colorgcc +LD = gcc + +AUTOGEN = autogen + +PYTHONINC = -I/usr/include/python2.4 +NUMPYINC = -I/home/david/local/lib/python2.4/site-packages/numpy/core/include + +DEBUG = -g +OPTIMS = -O3 -funroll-all-loops -march=pentium4 -msse2 +#OPTIMS = $(DEBUG) +WARN = -W -Wall -Winline -Wstrict-prototypes -Wmissing-prototypes -Waggregate-return -Wcast-align -Wcast-qual -Wnested-externs -Wshadow -Wbad-function-cast -Wwrite-strings +LANGSTD = -std=gnu9x + +CFLAGS = $(PYTHONINC) $(NUMPYINC) $(OPTIMS) $(WARN) $(LANGSTD) + +OBJS = autocorr_nofft.o levinson.o lpc.o + +SRC = levinson.c lpc.c autocorr_nofft.c + +# targets +lib: libgabsig.so + +src: $(SRC) + +#================= +# Shared libraries +#================= +libgabsig.so: $(OBJS) + $(LD) -shared -o $@ $(OBJS) -Wl,-soname,$@ + +#============= +# Object files +#============= +lpc.o: lpc.c + $(CC) -c $(CFLAGS) -fPIC $< + +levinson.o: levinson.c + $(CC) -c $(CFLAGS) -fPIC $< + +autocorr_nofft.o: autocorr_nofft.c autocorr_nofft.h + $(CC) -c $(CFLAGS) -fPIC $< + +#========================= +# Autogenerated c sources +#========================= +autocorr_nofft.c: autocorr_nofft.def autocorr_nofft.tpl + $(AUTOGEN) autocorr_nofft.def + +lpc.c: lpc.def lpc.tpl lpc.h + $(AUTOGEN) lpc.def + +levinson.c: levinson.def levinson.tpl levinson.h + $(AUTOGEN) levinson.def + +clean: + rm -f libautocorr.so + rm -f test_autocorr + rm -f *.o + rm -f *.so + rm -f *.pyc + rm -f autocorr_nofft.c + rm -f lpc.c + rm -f levinson.c Added: trunk/Lib/sandbox/cdavid/src/README =================================================================== --- trunk/Lib/sandbox/cdavid/src/README 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/src/README 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,6 @@ +C sources are generated by autogen. This enables the support of float and double with the +same sources (ala C++ template). + +To generate the sources, just execute make src. +To modify a source, change the file .tpl instead of the .c, and regenerate the source +(using make src, for example) Added: trunk/Lib/sandbox/cdavid/src/autocorr_nofft.c =================================================================== --- trunk/Lib/sandbox/cdavid/src/autocorr_nofft.c 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/src/autocorr_nofft.c 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,299 @@ +/* + * Last Change: Tue Nov 28 03:00 PM 2006 J + * vim:syntax=c + * + * TODO: is size_t 64 bits long on 64 bits machines ? + */ +#include /* for size_t */ +#include /* for size_t */ + +#include "autocorr_nofft.h" + +/* + * NOFFT auto correlation + */ + + +/* + * float version; out must have a size of lag+1, already pre allocated + * + * lag should be < size + * + * returns 0 is succesfull, other value otherwise. + */ +int flt_xcorr_nofft_1d(const float *in, + const size_t size, float *out, const size_t lag) +{ + size_t i, j; + float acc; + + /* lag 0 */ + acc = 0; + for (i = 0; i < size; ++i) { + acc += in[i]*in[i]; + } + out[0] = acc; + + /* lag : 1 -> lag */ + for (i = 1; i <= lag; ++i) { + acc = 0; + for (j = i; j < size; ++j) { + acc += in[j-i]*in[j]; + } + out[i] = acc; + } + + return 0; +} + +/* + * double version; out must have a size of lag+1, already pre allocated + * + * lag should be < size + * + * returns 0 is succesfull, other value otherwise. + */ +int dbl_xcorr_nofft_1d(const double *in, + const size_t size, double *out, const size_t lag) +{ + size_t i, j; + double acc; + + /* lag 0 */ + acc = 0; + for (i = 0; i < size; ++i) { + acc += in[i]*in[i]; + } + out[0] = acc; + + /* lag : 1 -> lag */ + for (i = 1; i <= lag; ++i) { + acc = 0; + for (j = i; j < size; ++j) { + acc += in[j-i]*in[j]; + } + out[i] = acc; + } + + return 0; +} + + + +/* + * float version for non contiguous arrays; the corresponding + * array should have at least in_size elements. + * + * Constraints: + * - lag should be < in_size + * - strides in bytes + * - TODO: check if should be aligned ? + * + * returns 0 is succesfull, other value otherwise. + */ +int flt_xcorr_nofft_1d_noncontiguous(const float *in, size_t in_size, + size_t in_stride, float *out, size_t out_stride, size_t lag) +{ + size_t i, j, clag; + size_t istride = in_stride / sizeof(float); + size_t ostride = out_stride / sizeof(float); + float acc; + + /* lag 0 */ + acc = 0; + for (i = 0; i < in_size * istride; i+= istride) { + acc += in[i]*in[i]; + } + out[0] = acc; + + /* lag : 1 -> lag */ + for (i = 1; i <= lag ; ++i) { + acc = 0; + clag = i * istride; + for (j = clag; j < in_size * istride; j += istride) { + acc += in[j-clag]*in[j]; + } + out[i * ostride] = acc; + } + + return 0; +} + +/* + * double version for non contiguous arrays; the corresponding + * array should have at least in_size elements. + * + * Constraints: + * - lag should be < in_size + * - strides in bytes + * - TODO: check if should be aligned ? + * + * returns 0 is succesfull, other value otherwise. + */ +int dbl_xcorr_nofft_1d_noncontiguous(const double *in, size_t in_size, + size_t in_stride, double *out, size_t out_stride, size_t lag) +{ + size_t i, j, clag; + size_t istride = in_stride / sizeof(double); + size_t ostride = out_stride / sizeof(double); + double acc; + + /* lag 0 */ + acc = 0; + for (i = 0; i < in_size * istride; i+= istride) { + acc += in[i]*in[i]; + } + out[0] = acc; + + /* lag : 1 -> lag */ + for (i = 1; i <= lag ; ++i) { + acc = 0; + clag = i * istride; + for (j = clag; j < in_size * istride; j += istride) { + acc += in[j-clag]*in[j]; + } + out[i * ostride] = acc; + } + + return 0; +} + + + +/* + * For rank 2 arrays, contiguous cases + * float version; out must have a size of lag+1, already pre allocated + * + * lag should be < size + * + * returns 0 is succesfull, other value otherwise. + */ +int flt_xcorr_nofft_2d(const float *in, + size_t dim0, size_t dim1, float *out, const size_t lag) +{ + size_t i; + float *coaxis; + +#if 0 + for(i = 0; i < dim0; ++i) { + fprintf(stdout, "%d 1d autocorr, first element is %f\n", i, in[i * dim1]); + } +#endif + for(i = 0; i < dim0; ++i) { + coaxis = out + i * (lag + 1); + flt_xcorr_nofft_1d(in + i * dim1, dim1, coaxis, lag); + } + + return 0; +} + +/* + * For rank 2 arrays, contiguous cases + * double version; out must have a size of lag+1, already pre allocated + * + * lag should be < size + * + * returns 0 is succesfull, other value otherwise. + */ +int dbl_xcorr_nofft_2d(const double *in, + size_t dim0, size_t dim1, double *out, const size_t lag) +{ + size_t i; + double *coaxis; + +#if 0 + for(i = 0; i < dim0; ++i) { + fprintf(stdout, "%d 1d autocorr, first element is %f\n", i, in[i * dim1]); + } +#endif + for(i = 0; i < dim0; ++i) { + coaxis = out + i * (lag + 1); + dbl_xcorr_nofft_1d(in + i * dim1, dim1, coaxis, lag); + } + + return 0; +} + + + +/* + * For rank 2 arrays, non contiguous cases + * float version; out must have a size of lag+1, already pre allocated + * + * lag should be < size + * + * returns 0 is succesfull, other value otherwise. + */ +int flt_xcorr_nofft_2d_noncontiguous(const float *in, + size_t dim0, size_t dim1, size_t in_stride0, size_t in_stride1, + float *out, size_t out_stride0, size_t out_stride1, + const size_t lag) +{ + size_t i; + + size_t istride0 = in_stride0 / sizeof(float); + size_t ostride0 = out_stride0 / sizeof(float); + + float *coaxis; +#if 0 + fprintf(stdout, "%s: shape is (%d, %d)\n", __func__, dim0, dim1); + fprintf(stdout, "%s: istrides are (%d, %d)\n", __func__, istride0, istride1); + + fprintf(stdout, "%s: ostrides are (%d, %d)\n", __func__, ostride0, ostride1); + for(i = 0; i < dim0; ++i) { + ciaxis = in + i * istride0; + coaxis = out + i * istride0; + fprintf(stdout, "%d 1d autocorr, first element is %f, last is %f (%d el)\n", + i, ciaxis[0], ciaxis[(dim1-1) * istride1], dim1); + } +#endif + + for(i = 0; i < dim0; ++i) { + coaxis = out + i * ostride0; + flt_xcorr_nofft_1d_noncontiguous(in + i * istride0, dim1, in_stride1, + coaxis, out_stride1, lag); + } + return 0; +} + +/* + * For rank 2 arrays, non contiguous cases + * double version; out must have a size of lag+1, already pre allocated + * + * lag should be < size + * + * returns 0 is succesfull, other value otherwise. + */ +int dbl_xcorr_nofft_2d_noncontiguous(const double *in, + size_t dim0, size_t dim1, size_t in_stride0, size_t in_stride1, + double *out, size_t out_stride0, size_t out_stride1, + const size_t lag) +{ + size_t i; + + size_t istride0 = in_stride0 / sizeof(double); + size_t ostride0 = out_stride0 / sizeof(double); + + double *coaxis; +#if 0 + fprintf(stdout, "%s: shape is (%d, %d)\n", __func__, dim0, dim1); + fprintf(stdout, "%s: istrides are (%d, %d)\n", __func__, istride0, istride1); + + fprintf(stdout, "%s: ostrides are (%d, %d)\n", __func__, ostride0, ostride1); + for(i = 0; i < dim0; ++i) { + ciaxis = in + i * istride0; + coaxis = out + i * istride0; + fprintf(stdout, "%d 1d autocorr, first element is %f, last is %f (%d el)\n", + i, ciaxis[0], ciaxis[(dim1-1) * istride1], dim1); + } +#endif + + for(i = 0; i < dim0; ++i) { + coaxis = out + i * ostride0; + dbl_xcorr_nofft_1d_noncontiguous(in + i * istride0, dim1, in_stride1, + coaxis, out_stride1, lag); + } + return 0; +} + + Added: trunk/Lib/sandbox/cdavid/src/autocorr_nofft.def =================================================================== --- trunk/Lib/sandbox/cdavid/src/autocorr_nofft.def 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/src/autocorr_nofft.def 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,18 @@ +/* + * Last Change: Tue Nov 28 03:00 PM 2006 J + * vim:syntax=c + */ +autogen definitions autocorr_nofft.tpl; + +float_type = { + type_name = "float" ; + short_name = "flt" ; + upper_name = "FLOAT" ; +} ; + +float_type = { + type_name = "double" ; + short_name = "dbl" ; + upper_name = "DOUBLE" ; +} ; + Added: trunk/Lib/sandbox/cdavid/src/autocorr_nofft.h =================================================================== --- trunk/Lib/sandbox/cdavid/src/autocorr_nofft.h 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/src/autocorr_nofft.h 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,47 @@ +/* + * Last Change: Mon Nov 27 07:00 PM 2006 J + */ + +#ifndef _C_AUTOCORR_NOFFT_H_ +#define _C_AUTOCORR_NOFFT_H_ + +/* + * Direct implementation of auto correlation (faster when a few lags only are + * necessary). One side only, out should have at least lag+1 elements allocated + * + * Expect in and out to be contiguous + */ +int flt_xcorr_nofft_1d(const float *in, size_t size, float *out, size_t lag); +int dbl_xcorr_nofft_1d(const double *in, size_t size, double *out, size_t lag); + +/* + * Direct implementation of auto correlation (faster when a few lags only are + * necessary). One side only, out should have at least lag+1 elements allocated + * + * Expect in and out need not to be contiguous + */ +int flt_xcorr_nofft_1d_noncontiguous(const float *in, size_t in_size, size_t in_stride, + float *out, size_t out_stride, size_t lag); +int dbl_xcorr_nofft_1d_noncontiguous(const double *in, size_t in_size, size_t in_stride, + double *out, size_t out_stride, size_t lag); + +/* + * 1d autocorrelation for rank 2 arrays + */ +int flt_xcorr_nofft_2d(const float *in, size_t dim0, size_t dim1, + float *out, size_t lag); +int dbl_xcorr_nofft_2d(const double *in, size_t dim0, size_t dim1, + double *out, size_t lag); + +/* + * 1d autocorrelation for rank 2 arrays, non contiguous cases + */ +int dbl_xcorr_nofft_2d_noncontiguous(const double *in, + size_t dim0, size_t dim1, size_t in_stride0, size_t in_stride1, + double *out, size_t out_stride0, size_t out_stride1, + const size_t lag); +int flt_xcorr_nofft_2d_noncontiguous(const float *in, + size_t dim0, size_t dim1, size_t in_stride0, size_t in_stride1, + float *out, size_t out_stride0, size_t out_stride1, + const size_t lag); +#endif Added: trunk/Lib/sandbox/cdavid/src/autocorr_nofft.tpl =================================================================== --- trunk/Lib/sandbox/cdavid/src/autocorr_nofft.tpl 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/src/autocorr_nofft.tpl 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,162 @@ +[+ AutoGen5 template c +] +/* + * Last Change: Tue Nov 28 03:00 PM 2006 J + * vim:syntax=c + * + * TODO: is size_t 64 bits long on 64 bits machines ? + */ +#include /* for size_t */ +#include /* for size_t */ + +#include "autocorr_nofft.h" + +/* + * NOFFT auto correlation + */ + +[+ For float_type +] +/* + * [+ (get "type_name") +] version; out must have a size of lag+1, already pre allocated + * + * lag should be < size + * + * returns 0 is succesfull, other value otherwise. + */ +int [+ (get "short_name") +]_xcorr_nofft_1d(const [+ (get "type_name") +] *in, + const size_t size, [+ (get "type_name") +] *out, const size_t lag) +{ + size_t i, j; + [+ (get "type_name") +] acc; + + /* lag 0 */ + acc = 0; + for (i = 0; i < size; ++i) { + acc += in[i]*in[i]; + } + out[0] = acc; + + /* lag : 1 -> lag */ + for (i = 1; i <= lag; ++i) { + acc = 0; + for (j = i; j < size; ++j) { + acc += in[j-i]*in[j]; + } + out[i] = acc; + } + + return 0; +} +[+ ENDFOR float_type +] + +[+ For float_type +] +/* + * [+ (get "type_name") +] version for non contiguous arrays; the corresponding + * array should have at least in_size elements. + * + * Constraints: + * - lag should be < in_size + * - strides in bytes + * - TODO: check if should be aligned ? + * + * returns 0 is succesfull, other value otherwise. + */ +int [+ (get "short_name") +]_xcorr_nofft_1d_noncontiguous(const [+ (get "type_name") +] *in, size_t in_size, + size_t in_stride, [+ (get "type_name") +] *out, size_t out_stride, size_t lag) +{ + size_t i, j, clag; + size_t istride = in_stride / sizeof([+ (get "type_name") +]); + size_t ostride = out_stride / sizeof([+ (get "type_name") +]); + [+ (get "type_name") +] acc; + + /* lag 0 */ + acc = 0; + for (i = 0; i < in_size * istride; i+= istride) { + acc += in[i]*in[i]; + } + out[0] = acc; + + /* lag : 1 -> lag */ + for (i = 1; i <= lag ; ++i) { + acc = 0; + clag = i * istride; + for (j = clag; j < in_size * istride; j += istride) { + acc += in[j-clag]*in[j]; + } + out[i * ostride] = acc; + } + + return 0; +} +[+ ENDFOR float_type +] + +[+ For float_type +] +/* + * For rank 2 arrays, contiguous cases + * [+ (get "type_name") +] version; out must have a size of lag+1, already pre allocated + * + * lag should be < size + * + * returns 0 is succesfull, other value otherwise. + */ +int [+ (get "short_name") +]_xcorr_nofft_2d(const [+ (get "type_name") +] *in, + size_t dim0, size_t dim1, [+ (get "type_name") +] *out, const size_t lag) +{ + size_t i; + [+ (get "type_name") +] *coaxis; + +#if 0 + for(i = 0; i < dim0; ++i) { + fprintf(stdout, "%d 1d autocorr, first element is %f\n", i, in[i * dim1]); + } +#endif + for(i = 0; i < dim0; ++i) { + coaxis = out + i * (lag + 1); + [+ (get "short_name") +]_xcorr_nofft_1d(in + i * dim1, dim1, coaxis, lag); + } + + return 0; +} +[+ ENDFOR float_type +] + +[+ For float_type +] +/* + * For rank 2 arrays, non contiguous cases + * [+ (get "type_name") +] version; out must have a size of lag+1, already pre allocated + * + * lag should be < size + * + * returns 0 is succesfull, other value otherwise. + */ +int [+ (get "short_name") +]_xcorr_nofft_2d_noncontiguous(const [+ (get "type_name") +] *in, + size_t dim0, size_t dim1, size_t in_stride0, size_t in_stride1, + [+ (get "type_name") +] *out, size_t out_stride0, size_t out_stride1, + const size_t lag) +{ + size_t i; + + size_t istride0 = in_stride0 / sizeof([+ (get "type_name") +]); + size_t ostride0 = out_stride0 / sizeof([+ (get "type_name") +]); + + [+ (get "type_name") +] *coaxis; +#if 0 + fprintf(stdout, "%s: shape is (%d, %d)\n", __func__, dim0, dim1); + fprintf(stdout, "%s: istrides are (%d, %d)\n", __func__, istride0, istride1); + + fprintf(stdout, "%s: ostrides are (%d, %d)\n", __func__, ostride0, ostride1); + for(i = 0; i < dim0; ++i) { + ciaxis = in + i * istride0; + coaxis = out + i * istride0; + fprintf(stdout, "%d 1d autocorr, first element is %f, last is %f (%d el)\n", + i, ciaxis[0], ciaxis[(dim1-1) * istride1], dim1); + } +#endif + + for(i = 0; i < dim0; ++i) { + coaxis = out + i * ostride0; + [+ (get "short_name") +]_xcorr_nofft_1d_noncontiguous(in + i * istride0, dim1, in_stride1, + coaxis, out_stride1, lag); + } + return 0; +} +[+ ENDFOR float_type +] + Added: trunk/Lib/sandbox/cdavid/src/common.h =================================================================== --- trunk/Lib/sandbox/cdavid/src/common.h 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/src/common.h 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,38 @@ +/* + * Last Change: Tue Nov 28 04:00 PM 2006 J + * + * Implements FP macros missing in C89 + */ +#ifndef _GABSIG_C_COMMON_H + #define _GABSIG_C_COMMON_H + +#include +#if defined(fpclassify) + + #if !defined(isnan) + #define isnan(x) (fpclassify((x)) == FP_NAN) + #endif + #if !defined(isinf) + #define isinf(x) (fpclassify((x)) == FP_INFINITE) + #endif + +#else /* check to see if already have a function like this */ + #if !defined(HAVE_ISNAN) + #if !defined(isnan) + #define isnan(x) ((x) == (x)) + #endif + #endif /* HAVE_ISNAN */ + + #if !defined(HAVE_ISINF) + #if !defined(isinf) + #define isinf(x) (!isnan((x)) && isnan((x)-(x))) + #endif + #endif /* HAVE_ISINF */ + +#endif /* defined(fpclassify) */ + +#if !defined(isfinite) + #define isfinite(x) (!isnan((x)) && !isinf((x))) +#endif + +#endif /* nef of recursive header inclusion protection */ Added: trunk/Lib/sandbox/cdavid/src/levinson.c =================================================================== --- trunk/Lib/sandbox/cdavid/src/levinson.c 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/src/levinson.c 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,313 @@ +/* + * Last Change: Tue Nov 28 04:00 PM 2006 J + * + * vim:syntax=c + */ +#include +#include /* for isfinite */ +#include +#include + +#include "common.h" + +#include "levinson.h" + + +/* The actual computation : + * - in : the input vector which defines the toeplitz matrix + * - size : size of in (ie number of elements) + * - order : size of the system to solve. order must be < size -1 + * - acoeff: solution (ie ar coefficients). Size must be at last order+1 + * - err : *prediction* error (scalar) + * - kcoeff: reflexion coefficients. Size must be at last equal to equal to order. + * - tmp : cache, mnust have at least order elements + */ + +/* + * this function assume all arrays are allocated with the + * right size, and that the parameters make sense. No checking + * is done, must be done before calling this function + * + * Returns 0 on success, -1 if a compuation error happened (overflow, underflow + * for error calculation) + */ + +int flt_levinson1d(const float* in, + size_t order, float* acoeff, + float* err, float* kcoeff, + float* tmp) +{ + /* TODO: to check if first element of corr is 0*/ + + size_t i, j; + float acc; + int ret = 0; + + /* + * order 0 + */ + acoeff[0] = (float)1.0; + *err = in[0]; + + /* + * order >= 1 + */ + for ( i = 1; i <= order; ++i) { + acc = in[i]; + for ( j = 1; j <= i-1; ++j) { + acc += acoeff[j]*in[i-j]; + } + kcoeff[i-1] = -acc/(*err); + acoeff[i] = kcoeff[i-1]; + + for ( j = 0; j < order; ++j) { + tmp[j] = acoeff[j]; + } + + for (j = 1; j < i; ++j) { + acoeff[j] += kcoeff[i-1]*tmp[i-j]; + } + *err *= (1-kcoeff[i-1]*kcoeff[i-1]); + } + + return ret; +} + +int flt_levinson1d_check(const float* in, + size_t order, float* acoeff, + float* err, float* kcoeff, + float* tmp) +{ + /* TODO: to check if first element of corr is 0*/ + + size_t i, j; + float acc; + int ret = 0; + + /* + * order 0 + */ + acoeff[0] = (float)1.0; + *err = in[0]; + + /* + * order >= 1 + */ + for ( i = 1; i <= order; ++i) { + acc = in[i]; + for ( j = 1; j <= i-1; ++j) { + acc += acoeff[j]*in[i-j]; + } + kcoeff[i-1] = -acc/(*err); + if (!isfinite(kcoeff[i-1])) { + fprintf(stderr, "%s:%s, kcoeff is not finite, err is %e\n", + __FILE__, __func__, *err); + ret = -1; + } + acoeff[i] = kcoeff[i-1]; + + for ( j = 0; j < order; ++j) { + tmp[j] = acoeff[j]; + } + + for (j = 1; j < i; ++j) { + acoeff[j] += kcoeff[i-1]*tmp[i-j]; + } + *err *= (1-kcoeff[i-1]*kcoeff[i-1]); + } + + return ret; +} + +/* + * For rank 2 arrays, contiguous cases + * float version; out must have a size of dim0 * (lag + 1), + * already pre allocated + * + * order should be < dim1 + * + * returns 0 is succesfull, other value otherwise. + */ +int flt_levinson2d(const float *in, + size_t dim0, size_t dim1, + float *acoeff, + float *err, + float *kcoeff) +{ + size_t i; + size_t order = dim1 - 1; + float *caaxis, *ceaxis, *ckaxis, *buff; + + buff = malloc(sizeof(*buff) * order); + if (buff == NULL) { + goto fail_malloc; + } + +#if 0 + for(i = 0; i < dim0; ++i) { + fprintf(stdout, "%d 1d levinson, first element is %f\n", i, in[i * dim1]); + } +#endif + for(i = 0; i < dim0; ++i) { + caaxis = acoeff + i * (order + 1); + ckaxis = kcoeff + i * (order); + ceaxis = err + i; + flt_levinson1d(in + i * dim1, order, caaxis, ceaxis, ckaxis, buff); + } + + free(buff); + return 0; + +fail_malloc: + return -1; +} + +/* The actual computation : + * - in : the input vector which defines the toeplitz matrix + * - size : size of in (ie number of elements) + * - order : size of the system to solve. order must be < size -1 + * - acoeff: solution (ie ar coefficients). Size must be at last order+1 + * - err : *prediction* error (scalar) + * - kcoeff: reflexion coefficients. Size must be at last equal to equal to order. + * - tmp : cache, mnust have at least order elements + */ + +/* + * this function assume all arrays are allocated with the + * right size, and that the parameters make sense. No checking + * is done, must be done before calling this function + * + * Returns 0 on success, -1 if a compuation error happened (overflow, underflow + * for error calculation) + */ + +int dbl_levinson1d(const double* in, + size_t order, double* acoeff, + double* err, double* kcoeff, + double* tmp) +{ + /* TODO: to check if first element of corr is 0*/ + + size_t i, j; + double acc; + int ret = 0; + + /* + * order 0 + */ + acoeff[0] = (double)1.0; + *err = in[0]; + + /* + * order >= 1 + */ + for ( i = 1; i <= order; ++i) { + acc = in[i]; + for ( j = 1; j <= i-1; ++j) { + acc += acoeff[j]*in[i-j]; + } + kcoeff[i-1] = -acc/(*err); + acoeff[i] = kcoeff[i-1]; + + for ( j = 0; j < order; ++j) { + tmp[j] = acoeff[j]; + } + + for (j = 1; j < i; ++j) { + acoeff[j] += kcoeff[i-1]*tmp[i-j]; + } + *err *= (1-kcoeff[i-1]*kcoeff[i-1]); + } + + return ret; +} + +int dbl_levinson1d_check(const double* in, + size_t order, double* acoeff, + double* err, double* kcoeff, + double* tmp) +{ + /* TODO: to check if first element of corr is 0*/ + + size_t i, j; + double acc; + int ret = 0; + + /* + * order 0 + */ + acoeff[0] = (double)1.0; + *err = in[0]; + + /* + * order >= 1 + */ + for ( i = 1; i <= order; ++i) { + acc = in[i]; + for ( j = 1; j <= i-1; ++j) { + acc += acoeff[j]*in[i-j]; + } + kcoeff[i-1] = -acc/(*err); + if (!isfinite(kcoeff[i-1])) { + fprintf(stderr, "%s:%s, kcoeff is not finite, err is %e\n", + __FILE__, __func__, *err); + ret = -1; + } + acoeff[i] = kcoeff[i-1]; + + for ( j = 0; j < order; ++j) { + tmp[j] = acoeff[j]; + } + + for (j = 1; j < i; ++j) { + acoeff[j] += kcoeff[i-1]*tmp[i-j]; + } + *err *= (1-kcoeff[i-1]*kcoeff[i-1]); + } + + return ret; +} + +/* + * For rank 2 arrays, contiguous cases + * double version; out must have a size of dim0 * (lag + 1), + * already pre allocated + * + * order should be < dim1 + * + * returns 0 is succesfull, other value otherwise. + */ +int dbl_levinson2d(const double *in, + size_t dim0, size_t dim1, + double *acoeff, + double *err, + double *kcoeff) +{ + size_t i; + size_t order = dim1 - 1; + double *caaxis, *ceaxis, *ckaxis, *buff; + + buff = malloc(sizeof(*buff) * order); + if (buff == NULL) { + goto fail_malloc; + } + +#if 0 + for(i = 0; i < dim0; ++i) { + fprintf(stdout, "%d 1d levinson, first element is %f\n", i, in[i * dim1]); + } +#endif + for(i = 0; i < dim0; ++i) { + caaxis = acoeff + i * (order + 1); + ckaxis = kcoeff + i * (order); + ceaxis = err + i; + dbl_levinson1d(in + i * dim1, order, caaxis, ceaxis, ckaxis, buff); + } + + free(buff); + return 0; + +fail_malloc: + return -1; +} + Added: trunk/Lib/sandbox/cdavid/src/levinson.def =================================================================== --- trunk/Lib/sandbox/cdavid/src/levinson.def 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/src/levinson.def 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,18 @@ +/* + * Last Change: Tue Nov 28 03:00 PM 2006 J + * vim:syntax=c + */ +autogen definitions levinson.tpl; + +float_type = { + type_name = "float" ; + short_name = "flt" ; + upper_name = "FLOAT" ; +} ; + +float_type = { + type_name = "double" ; + short_name = "dbl" ; + upper_name = "DOUBLE" ; +} ; + Added: trunk/Lib/sandbox/cdavid/src/levinson.h =================================================================== --- trunk/Lib/sandbox/cdavid/src/levinson.h 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/src/levinson.h 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,24 @@ +#ifndef LEVINSON1D_H +#define LEVINSON1D_H + +#include + +int dbl_levinson1d(const double* in, size_t order, + double* acoeff, double *err, double* kcoeff, double* tmp); + +int dbl_levinson1d_check(const double* in, size_t order, + double* acoeff, double *err, double* kcoeff, double* tmp); + +int flt_levinson1d(const float* in, size_t order, + float* acoeff, float *err, float* kcoeff, float* tmp); + +int flt_levinson1d_check(const float* in, size_t order, + float* acoeff, float *err, float* kcoeff, float* tmp); + +int dbl_levinson2d(const double* in, size_t dim0, size_t dim1, + double* acoeff, double *err, double* kcoeff); + +int flt_levinson2d(const float* in, size_t dim0, size_t dim1, + float* acoeff, float *err, float* kcoeff); + +#endif Added: trunk/Lib/sandbox/cdavid/src/levinson.tpl =================================================================== --- trunk/Lib/sandbox/cdavid/src/levinson.tpl 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/src/levinson.tpl 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,165 @@ +[+ AutoGen5 template c +] +/* + * Last Change: Tue Nov 28 04:00 PM 2006 J + * + * vim:syntax=c + */ +#include +#include /* for isfinite */ +#include +#include + +#include "common.h" + +#include "levinson.h" + +[+ For float_type +] +/* The actual computation : + * - in : the input vector which defines the toeplitz matrix + * - size : size of in (ie number of elements) + * - order : size of the system to solve. order must be < size -1 + * - acoeff: solution (ie ar coefficients). Size must be at last order+1 + * - err : *prediction* error (scalar) + * - kcoeff: reflexion coefficients. Size must be at last equal to equal to order. + * - tmp : cache, mnust have at least order elements + */ + +/* + * this function assume all arrays are allocated with the + * right size, and that the parameters make sense. No checking + * is done, must be done before calling this function + * + * Returns 0 on success, -1 if a compuation error happened (overflow, underflow + * for error calculation) + */ + +int [+ (get "short_name") +]_levinson1d(const [+ (get "type_name") +]* in, + size_t order, [+ (get "type_name") +]* acoeff, + [+ (get "type_name") +]* err, [+ (get "type_name") +]* kcoeff, + [+ (get "type_name") +]* tmp) +{ + /* TODO: to check if first element of corr is 0*/ + + size_t i, j; + [+ (get "type_name") +] acc; + int ret = 0; + + /* + * order 0 + */ + acoeff[0] = ([+ (get "type_name") +])1.0; + *err = in[0]; + + /* + * order >= 1 + */ + for ( i = 1; i <= order; ++i) { + acc = in[i]; + for ( j = 1; j <= i-1; ++j) { + acc += acoeff[j]*in[i-j]; + } + kcoeff[i-1] = -acc/(*err); + acoeff[i] = kcoeff[i-1]; + + for ( j = 0; j < order; ++j) { + tmp[j] = acoeff[j]; + } + + for (j = 1; j < i; ++j) { + acoeff[j] += kcoeff[i-1]*tmp[i-j]; + } + *err *= (1-kcoeff[i-1]*kcoeff[i-1]); + } + + return ret; +} + +int [+ (get "short_name") +]_levinson1d_check(const [+ (get "type_name") +]* in, + size_t order, [+ (get "type_name") +]* acoeff, + [+ (get "type_name") +]* err, [+ (get "type_name") +]* kcoeff, + [+ (get "type_name") +]* tmp) +{ + /* TODO: to check if first element of corr is 0*/ + + size_t i, j; + [+ (get "type_name") +] acc; + int ret = 0; + + /* + * order 0 + */ + acoeff[0] = ([+ (get "type_name") +])1.0; + *err = in[0]; + + /* + * order >= 1 + */ + for ( i = 1; i <= order; ++i) { + acc = in[i]; + for ( j = 1; j <= i-1; ++j) { + acc += acoeff[j]*in[i-j]; + } + kcoeff[i-1] = -acc/(*err); + if (!isfinite(kcoeff[i-1])) { + fprintf(stderr, "%s:%s, kcoeff is not finite, err is %e\n", + __FILE__, __func__, *err); + ret = -1; + } + acoeff[i] = kcoeff[i-1]; + + for ( j = 0; j < order; ++j) { + tmp[j] = acoeff[j]; + } + + for (j = 1; j < i; ++j) { + acoeff[j] += kcoeff[i-1]*tmp[i-j]; + } + *err *= (1-kcoeff[i-1]*kcoeff[i-1]); + } + + return ret; +} + +/* + * For rank 2 arrays, contiguous cases + * [+ (get "type_name") +] version; out must have a size of dim0 * (lag + 1), + * already pre allocated + * + * order should be < dim1 + * + * returns 0 is succesfull, other value otherwise. + */ +int [+ (get "short_name") +]_levinson2d(const [+ (get "type_name") +] *in, + size_t dim0, size_t dim1, + [+ (get "type_name") +] *acoeff, + [+ (get "type_name") +] *err, + [+ (get "type_name") +] *kcoeff) +{ + size_t i; + size_t order = dim1 - 1; + [+ (get "type_name") +] *caaxis, *ceaxis, *ckaxis, *buff; + + buff = malloc(sizeof(*buff) * order); + if (buff == NULL) { + goto fail_malloc; + } + +#if 0 + for(i = 0; i < dim0; ++i) { + fprintf(stdout, "%d 1d levinson, first element is %f\n", i, in[i * dim1]); + } +#endif + for(i = 0; i < dim0; ++i) { + caaxis = acoeff + i * (order + 1); + ckaxis = kcoeff + i * (order); + ceaxis = err + i; + [+ (get "short_name") +]_levinson1d(in + i * dim1, order, caaxis, ceaxis, ckaxis, buff); + } + + free(buff); + return 0; + +fail_malloc: + return -1; +} +[+ ENDFOR float_type +] Added: trunk/Lib/sandbox/cdavid/src/lpc.c =================================================================== --- trunk/Lib/sandbox/cdavid/src/lpc.c 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/src/lpc.c 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,144 @@ +/* + * Last Change: Tue Nov 28 03:00 PM 2006 J + * + * vim:syntax=c + */ +#include /* for malloc and co */ +#include + +#include "levinson.h" +#include "autocorr_nofft.h" + +#include "lpc.h" + + +/* + * (float version) Compute lpc coeff (order x coefficients) of a + * contiguous array + * + * err is a double, coeff must be able to contain order+1 elements, and kcoeff + * order elements + */ +int flt_lpc(const float* signal, + size_t size, size_t order, float* coeff, + float* kcoeff, float* err) +{ + size_t i, nbuff, ncache; + float *buff, *cache, biasnorm; + int status; + + biasnorm = 1.0/size; + nbuff = order + 1; + ncache = order; + + buff = malloc(sizeof(*buff) * nbuff); + if (buff == NULL) { + status = -2; + goto fail_buff_malloc; + } + + cache = malloc(sizeof(*cache) * ncache); + if (cache == NULL) { + status = -2; + goto fail_cache_malloc; + } + + /* + * Compute the autocorreleation up to lag order, normalized by the + * size of the signal + */ + flt_xcorr_nofft_1d(signal, size, buff, order); + for(i = 0; i < nbuff; ++i) { + buff[i] *= biasnorm; + } + + /* + * Compute the inverse coefficients using (simple) levinson recursive algo + */ + status = flt_levinson1d(buff, order, + coeff, err, kcoeff, cache); + if (status) { + status = -1; + goto fail_levinson; + } + + free(cache); + free(buff); + + return 0; + +fail_levinson: + free(cache); +fail_cache_malloc: + free(buff); +fail_buff_malloc: + fprintf(stderr, "Failure\n"); + return status; +} + + +/* + * (double version) Compute lpc coeff (order x coefficients) of a + * contiguous array + * + * err is a double, coeff must be able to contain order+1 elements, and kcoeff + * order elements + */ +int dbl_lpc(const double* signal, + size_t size, size_t order, double* coeff, + double* kcoeff, double* err) +{ + size_t i, nbuff, ncache; + double *buff, *cache, biasnorm; + int status; + + biasnorm = 1.0/size; + nbuff = order + 1; + ncache = order; + + buff = malloc(sizeof(*buff) * nbuff); + if (buff == NULL) { + status = -2; + goto fail_buff_malloc; + } + + cache = malloc(sizeof(*cache) * ncache); + if (cache == NULL) { + status = -2; + goto fail_cache_malloc; + } + + /* + * Compute the autocorreleation up to lag order, normalized by the + * size of the signal + */ + dbl_xcorr_nofft_1d(signal, size, buff, order); + for(i = 0; i < nbuff; ++i) { + buff[i] *= biasnorm; + } + + /* + * Compute the inverse coefficients using (simple) levinson recursive algo + */ + status = dbl_levinson1d(buff, order, + coeff, err, kcoeff, cache); + if (status) { + status = -1; + goto fail_levinson; + } + + free(cache); + free(buff); + + return 0; + +fail_levinson: + free(cache); +fail_cache_malloc: + free(buff); +fail_buff_malloc: + fprintf(stderr, "Failure\n"); + return status; +} + + Added: trunk/Lib/sandbox/cdavid/src/lpc.def =================================================================== --- trunk/Lib/sandbox/cdavid/src/lpc.def 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/src/lpc.def 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,18 @@ +/* + * Last Change: Tue Nov 28 03:00 PM 2006 J + * vim:syntax=c + */ +autogen definitions lpc.tpl; + +float_type = { + type_name = "float" ; + short_name = "flt" ; + upper_name = "FLOAT" ; +} ; + +float_type = { + type_name = "double" ; + short_name = "dbl" ; + upper_name = "DOUBLE" ; +} ; + Added: trunk/Lib/sandbox/cdavid/src/lpc.h =================================================================== --- trunk/Lib/sandbox/cdavid/src/lpc.h 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/src/lpc.h 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,20 @@ +/* + * Last Change: Tue Nov 28 12:00 PM 2006 J + */ + +/* + * Functions to compute lpc coefficients with contiguous arrays. + * + * input is signal, output are coeff, kcoeff and err. + * + * requirements: + * - signal must have size elements + * - order < size + * - coeff must have order + 1 elements at least + * - kcoeff must have order elements at least + * - err must have at least one element + */ +int dbl_lpc(const double* signal, size_t size, size_t order, double* coeff, + double* kcoeff, double* err); +int flt_lpc(const float* signal, size_t size, size_t order, float* coeff, + float* kcoeff, float* err); Added: trunk/Lib/sandbox/cdavid/src/lpc.tpl =================================================================== --- trunk/Lib/sandbox/cdavid/src/lpc.tpl 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/src/lpc.tpl 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,80 @@ +[+ AutoGen5 template c +] +/* + * Last Change: Tue Nov 28 03:00 PM 2006 J + * + * vim:syntax=c + */ +#include /* for malloc and co */ +#include + +#include "levinson.h" +#include "autocorr_nofft.h" + +#include "lpc.h" + +[+ For float_type +] +/* + * ([+ (get "type_name") +] version) Compute lpc coeff (order x coefficients) of a + * contiguous array + * + * err is a double, coeff must be able to contain order+1 elements, and kcoeff + * order elements + */ +int [+ (get "short_name") +]_lpc(const [+ (get "type_name") +]* signal, + size_t size, size_t order, [+ (get "type_name") +]* coeff, + [+ (get "type_name") +]* kcoeff, [+ (get "type_name") +]* err) +{ + size_t i, nbuff, ncache; + [+ (get "type_name") +] *buff, *cache, biasnorm; + int status; + + biasnorm = 1.0/size; + nbuff = order + 1; + ncache = order; + + buff = malloc(sizeof(*buff) * nbuff); + if (buff == NULL) { + status = -2; + goto fail_buff_malloc; + } + + cache = malloc(sizeof(*cache) * ncache); + if (cache == NULL) { + status = -2; + goto fail_cache_malloc; + } + + /* + * Compute the autocorreleation up to lag order, normalized by the + * size of the signal + */ + [+ (get "short_name") +]_xcorr_nofft_1d(signal, size, buff, order); + for(i = 0; i < nbuff; ++i) { + buff[i] *= biasnorm; + } + + /* + * Compute the inverse coefficients using (simple) levinson recursive algo + */ + status = [+ (get "short_name") +]_levinson1d(buff, order, + coeff, err, kcoeff, cache); + if (status) { + status = -1; + goto fail_levinson; + } + + free(cache); + free(buff); + + return 0; + +fail_levinson: + free(cache); +fail_cache_malloc: + free(buff); +fail_buff_malloc: + fprintf(stderr, "Failure\n"); + return status; +} + +[+ ENDFOR float_type +] Added: trunk/Lib/sandbox/cdavid/tests/test_autocorr.py =================================================================== --- trunk/Lib/sandbox/cdavid/tests/test_autocorr.py 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/tests/test_autocorr.py 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,385 @@ +#! /usr/bin/env python +# Last Change: Mon Nov 27 08:00 PM 2006 J + +from numpy.testing import * +from numpy.random import randn, seed +from numpy import correlate, array, concatenate, require + +from numpy.ctypeslib import ndpointer, load_library +from ctypes import c_uint + +set_package_path() +from autocorr import _raw_autocorr_1d, _raw_autocorr_1d_noncontiguous +from autocorr import autocorr_oneside_nofft as autocorr +from autocorr import _autocorr_oneside_nofft_py as autocorr_py +restore_path() + +import numpy + +# number of decimals to check +nd = 20 +# minimum number of correct decimals required +md = 12 + +a = array([1, 2, 3.]) +b = a + 3 + +x = concatenate((a, b)).reshape(2, 3) + +# float and double C order +xc = require(x, dtype = numpy.float64, requirements = 'C') +xcf = require(x, dtype = numpy.float32, requirements = 'C') +xc1 = xc[0] +xcf1 = xcf[0] + +# float and double F order +xf = require(x, dtype = numpy.float64, requirements = 'FORTRAN') +xff = require(x, dtype = numpy.float32, requirements = 'FORTRAN') +xf1 = xf[0] +xff1 = xff[0] + +# This class tests the C functions directly. This is more a debugging tool +# that a test case, as the tested functions are not part of the public API +class test_ctype_1d(NumpyTestCase): + def check_contiguous_double(self): + # double test + xt = xc1 + yt = _raw_autocorr_1d(xt, xt.size - 1) + + yr = correlate(xt, xt, mode = 'full') + yr = yr[xt.size-1:] + + assert_array_equal(yt, yr) + + def check_contiguous_float(self): + # float test + xt = xcf1 + + yt = _raw_autocorr_1d(xt, xt.size - 1) + + yr = correlate(xt, xt, mode = 'full') + yr = yr[xt.size-1:] + + assert_array_equal(yt, yr) + + def check_non_contiguous_double(self): + # double test + xt = xf1 + yt = _raw_autocorr_1d_noncontiguous(xt, xt.size - 1) + + yr = correlate(xt, xt, mode = 'full') + yr = yr[xt.size-1:] + + assert_array_equal(yt, yr) + + def check_non_contiguous_float(self): + # float test + xt = xff1 + yt = _raw_autocorr_1d_noncontiguous(xt, xt.size - 1) + + yr = correlate(xt, xt, mode = 'full') + yr = yr[xt.size-1:] + + assert_array_equal(yt, yr) + +# Test autocorrelation for rank 1 arrays +class test_autocorr_1d(NumpyTestCase): + def check_contiguous_double(self): + # double test + xt = xc1 + yt = autocorr(xt, xt.size - 1) + + yr = correlate(xt, xt, mode = 'full') + yr = yr[xt.size-1:] + + assert_array_equal(yt, yr) + + def check_contiguous_float(self): + # float test + xt = xcf1 + + yt = autocorr(xt, xt.size - 1) + + yr = correlate(xt, xt, mode = 'full') + yr = yr[xt.size-1:] + + assert_array_equal(yt, yr) + + def check_non_contiguous_double(self): + # double test + xt = xf1 + yt = autocorr(xt, xt.size - 1) + + yr = correlate(xt, xt, mode = 'full') + yr = yr[xt.size-1:] + + assert_array_equal(yt, yr) + + def check_non_contiguous_float(self): + # float test + xt = xff1 + yt = autocorr(xt, xt.size - 1) + + yr = correlate(xt, xt, mode = 'full') + yr = yr[xt.size-1:] + + assert_array_equal(yt, yr) + +# This class is a pure python implementation of autocorrelation +# with rank 2 arrays. This will be used in the above test cases; +# this function implements the expected behaviour of the public +# autocorr function. +class test_autocorr_py(NumpyTestCase): + def check_full(self): + xt = xc + axis = -1 + lag = xt.shape[axis] - 1 + yt = autocorr_py(xt, lag, axis = axis) + + yr = yt.copy() + for i in range(xt.shape[(axis +1) % 2]): + tmp = correlate(xt[i], xt[i], 'full') + center = xt[i].size - 1 + assert_array_equal(tmp[center:center+1+lag], yt[i]) + + xt = xc + axis = 0 + lag = xt.shape[axis] - 1 + yt = autocorr_py(xt, lag, axis = axis) + + yr = yt.copy() + for i in range(xt.shape[(axis +1) % 2]): + tmp = correlate(xt[:, i], xt[:, i], 'full') + center = xt[:,i].size - 1 + assert_array_equal(tmp[center:center+1+lag], yt[:, i]) + + def check_partial(self): + xt = xc + axis = -1 + lag = 1 + yt = autocorr_py(xt, lag, axis = axis) + + yr = yt.copy() + for i in range(xt.shape[(axis +1) % 2]): + tmp = correlate(xt[i], xt[i], 'full') + center = xt[i].size - 1 + assert_array_equal(tmp[center:center+1+lag], yt[i]) + + xt = xc + axis = 0 + lag = 1 + yt = autocorr_py(xt, lag, axis = axis) + + yr = yt.copy() + for i in range(xt.shape[(axis +1) % 2]): + tmp = correlate(xt[:, i], xt[:, i], 'full') + center = xt[:,i].size - 1 + assert_array_equal(tmp[center:center+1+lag], yt[:, i]) + +# Test autocorrelation for rank 2 arrays +class test_autocorr_2d(NumpyTestCase): + def check_double_full(self): + # C, axis 1 test + xt = xc + axis = -1 + lag = xt.shape[axis] - 1 + yt = autocorr(xt, lag, axis = axis) + + yr = autocorr_py(xt, lag, axis = axis) + assert_array_equal(yt, yr) + + # C, axis 0 test + xt = xc + axis = 0 + lag = xt.shape[axis] - 1 + yt = autocorr(xt, lag, axis = axis) + + yr = autocorr_py(xt, lag, axis = axis) + assert_array_equal(yt, yr) + + # F, axis 0 test + xt = xf + axis = 0 + lag = xt.shape[axis] - 1 + yt = autocorr(xt, lag, axis = axis) + + yr = autocorr_py(xt, lag, axis = axis) + assert_array_equal(yt, yr) + + # F, axis 1 test + xt = xf + axis = -1 + lag = xt.shape[axis] - 1 + yt = autocorr(xt, lag, axis = axis) + + yr = autocorr_py(xt, lag, axis = axis) + assert_array_equal(yt, yr) + + def check_float(self): + # C, axis 1 test + xt = xcf + axis = -1 + lag = xt.shape[axis] - 1 + yt = autocorr(xt, lag, axis = axis) + + yr = autocorr_py(xt, lag, axis = axis) + assert_array_equal(yt, yr) + + # C, axis 0 test + xt = xcf + axis = 0 + lag = xt.shape[axis] - 1 + yt = autocorr(xt, lag, axis = axis) + + yr = autocorr_py(xt, lag, axis = axis) + assert_array_equal(yt, yr) + + # F, axis 0 test + xt = xff + axis = 0 + lag = xt.shape[axis] - 1 + yt = autocorr(xt, lag, axis = axis) + + yr = autocorr_py(xt, lag, axis = axis) + assert_array_equal(yt, yr) + + # F, axis 1 test + xt = xff + axis = -1 + lag = xt.shape[axis] - 1 + yt = autocorr(xt, lag, axis = axis) + + yr = autocorr_py(xt, lag, axis = axis) + assert_array_equal(yt, yr) + + def check_double_partial(self): + # C, axis 1 test + xt = xc + axis = -1 + lag = 1 + yt = autocorr(xt, lag, axis = axis) + + yr = autocorr_py(xt, lag, axis = axis) + assert_array_equal(yt, yr) + + # C, axis 0 test + xt = xc + axis = 0 + lag = 0 + yt = autocorr(xt, lag, axis = axis) + + yr = autocorr_py(xt, lag, axis = axis) + assert_array_equal(yt, yr) + + # F, axis 0 test + xt = xf + axis = 1 + lag = xt.shape[axis] - 1 + yt = autocorr(xt, lag, axis = axis) + + yr = autocorr_py(xt, lag, axis = axis) + assert_array_equal(yt, yr) + + # F, axis 1 test + xt = xf + axis = -1 + lag = 1 + yt = autocorr(xt, lag, axis = axis) + + yr = autocorr_py(xt, lag, axis = axis) + assert_array_equal(yt, yr) + +if __name__ == "__main__": + ScipyTest().run() + +#class test_autocorr_2d(NumpyTestCase): +# def check_double(self): +# # C, axis 1 test +# xt = xc +# axis = -1 +# lag = xt.shape[axis] - 1 +# yt = autocorr(xt, lag, axis = axis) +# +# yr = yt.copy() +# for i in range(xt.shape[(axis +1) % 2]): +# tmp = correlate(xt[i], xt[i], 'full') +# assert_array_equal(tmp[lag:], yt[i]) +# +# # C, axis 0 test +# xt = xc +# axis = 0 +# lag = xt.shape[axis] - 1 +# yt = autocorr(xt, lag, axis = axis) +# +# yr = yt.copy() +# for i in range(xt.shape[(axis +1) % 2]): +# tmp = correlate(xt[:, i], xt[:, i], 'full') +# assert_array_equal(tmp[lag:], yt[:, i]) +# +# # F, axis 0 test +# xt = xf +# axis = 0 +# lag = xt.shape[axis] - 1 +# yt = autocorr(xt, lag, axis = axis) +# +# yr = yt.copy() +# for i in range(xt.shape[(axis +1) % 2]): +# tmp = correlate(xt[:, i], xt[:, i], 'full') +# assert_array_equal(tmp[lag:], yt[:, i]) +# +# # F, axis 1 test +# xt = xf +# axis = -1 +# lag = xt.shape[axis] - 1 +# yt = autocorr(xt, lag, axis = axis) +# +# yr = yt.copy() +# for i in range(xt.shape[(axis +1) % 2]): +# tmp = correlate(xt[i], xt[i], 'full') +# assert_array_equal(tmp[lag:], yt[i]) +# +# def check_float(self): +# # C, axis 1 test +# xt = xcf +# axis = -1 +# lag = xt.shape[axis] - 1 +# yt = autocorr(xt, lag, axis = axis) +# +# yr = yt.copy() +# for i in range(xt.shape[(axis +1) % 2]): +# tmp = correlate(xt[i], xt[i], 'full') +# assert_array_equal(tmp[lag:], yt[i]) +# +# # C, axis 0 test +# xt = xcf +# axis = 0 +# lag = xt.shape[axis] - 1 +# yt = autocorr(xt, lag, axis = axis) +# +# yr = yt.copy() +# for i in range(xt.shape[(axis +1) % 2]): +# tmp = correlate(xt[:, i], xt[:, i], 'full') +# assert_array_equal(tmp[lag:], yt[:, i]) +# +# # F, axis 0 test +# xt = xff +# axis = 0 +# lag = xt.shape[axis] - 1 +# yt = autocorr(xt, lag, axis = axis) +# +# yr = yt.copy() +# for i in range(xt.shape[(axis +1) % 2]): +# tmp = correlate(xt[:, i], xt[:, i], 'full') +# assert_array_equal(tmp[lag:], yt[:, i]) +# +# # F, axis 1 test +# xt = xff +# axis = -1 +# lag = xt.shape[axis] - 1 +# yt = autocorr(xt, lag, axis = axis) +# +# yr = yt.copy() +# for i in range(xt.shape[(axis +1) % 2]): +# tmp = correlate(xt[i], xt[i], 'full') +# assert_array_equal(tmp[lag:], yt[i]) +# Added: trunk/Lib/sandbox/cdavid/tests/test_lpc.py =================================================================== --- trunk/Lib/sandbox/cdavid/tests/test_lpc.py 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/cdavid/tests/test_lpc.py 2006-11-28 08:04:47 UTC (rev 2332) @@ -0,0 +1,187 @@ +#! /usr/bin/env python +# Last Change: Tue Nov 28 03:00 PM 2006 J + +from numpy.testing import * +from numpy.random import randn, seed +from numpy import correlate, array, concatenate, require + +from numpy.ctypeslib import ndpointer, load_library +from ctypes import c_uint + +set_package_path() +from lpc import _lpc2_py as lpc_py +from lpc import lpc_ref, lpc2 +from autocorr import autocorr_oneside_nofft +restore_path() + +import numpy + +# number of decimals to check +nd = 20 +# minimum number of correct decimals required +md = 12 + +a = array([1, 2, 3.]) +b = a + 3 + +x = concatenate((a, b)).reshape(2, 3) + +# float and double C order +xc = require(x, dtype = numpy.float64, requirements = 'C') +xcf = require(x, dtype = numpy.float32, requirements = 'C') +xc1 = xc[0] +xcf1 = xcf[0] + +# float and double F order +xf = require(x, dtype = numpy.float64, requirements = 'FORTRAN') +xff = require(x, dtype = numpy.float32, requirements = 'FORTRAN') +xf1 = xf[0] +xff1 = xff[0] + +# This class uses lpc in 1 dimension and loop on the axis. Is tested against +# a direct matrix inversion of the autocorrelation matrix (using matrix inverse +# instead of levinson durbin) +class test_lpc_py(NumpyTestCase): + def check_float(self): + # Axis -1 + xt = xcf + axis = -1 + order = 1 + + a, k, e = lpc_py(xt, order, axis) + assert a.dtype == k.dtype == e.dtype == numpy.float32 + + tmp = numpy.zeros((xt.shape[0], order+1), xt.dtype) + for i in range(xt.shape[0]): + tmp[i] = lpc_ref(xt[i], order) + + assert_array_almost_equal(tmp, a) + + # Axis 0 + xt = xcf + axis = 0 + order = 1 + + a, k, e = lpc_py(xt, order, axis) + assert a.dtype == k.dtype == e.dtype == numpy.float32 + + tmp = numpy.zeros((order + 1, xt.shape[1]), xt.dtype) + for i in range(xt.shape[1]): + tmp[:, i] = lpc_ref(xt[:, i], order) + + assert_array_almost_equal(tmp, a) + + def check_double(self): + # Axis -1 + xt = xc + axis = -1 + order = 1 + + a, e, k = lpc_py(xt, order, axis) + assert a.dtype == k.dtype == e.dtype == numpy.float64 + + tmp = numpy.zeros((xt.shape[0], order+1), xt.dtype) + for i in range(xt.shape[0]): + tmp[i] = lpc_ref(xt[i], order) + + assert_array_almost_equal(tmp, a) + + # Axis 0 + xt = xc + axis = 0 + order = 1 + + a, e, k = lpc_py(xt, order, axis) + assert a.dtype == k.dtype == e.dtype == numpy.float64 + + tmp = numpy.zeros((order + 1, xt.shape[1]), xt.dtype) + for i in range(xt.shape[1]): + tmp[:, i] = lpc_ref(xt[:, i], order) + + assert_array_almost_equal(tmp, a) + +class test_lpc(NumpyTestCase): + def check_float(self): + # Axis -1 + xt = xcf + axis = -1 + order = 1 + + a, e, k = lpc2(xt, order, axis) + at, et, kt = lpc_py(xt, order, axis) + + assert a.dtype == e.dtype == k.dtype == numpy.float32 + + assert_array_almost_equal(a, at) + assert_array_almost_equal(e, et) + assert_array_almost_equal(k, kt) + + # Axis 0 + xt = xcf + axis = 0 + order = 1 + + a, e, k = lpc2(xt, order, axis) + at, et, kt = lpc_py(xt, order, axis) + + assert a.dtype == e.dtype == k.dtype == numpy.float32 + + assert_array_almost_equal(a, at) + assert_array_almost_equal(e, et) + assert_array_almost_equal(k, kt) + + def check_float_rank1(self): + # test rank 1 + xt = xcf[0] + axis = 0 + order = 1 + + a, e, k = lpc2(xt, order, axis) + at, et, kt = lpc_py(xt, order, axis) + + assert a.dtype == e.dtype == k.dtype == numpy.float32 + + assert_array_almost_equal(a, at) + assert_array_almost_equal(e, et) + assert_array_almost_equal(k, kt) + + def check_double(self): + # Axis -1 + xt = xc + axis = -1 + order = 1 + + a, e, k = lpc2(xt, order, axis) + at, et, kt = lpc_py(xt, order, axis) + + assert_array_almost_equal(a, at) + assert_array_almost_equal(e, et) + assert_array_almost_equal(k, kt) + + # Axis 0 + xt = xc + axis = 0 + order = 1 + + a, e, k = lpc2(xt, order, axis) + at, et, kt = lpc_py(xt, order, axis) + + assert_array_almost_equal(a, at) + assert_array_almost_equal(e, et) + assert_array_almost_equal(k, kt) + + def check_double_rank1(self): + # test rank 1 + xt = xc[0] + axis = 0 + order = 1 + + a, e, k = lpc2(xt, order, axis) + at, et, kt = lpc_py(xt, order, axis) + + assert_array_almost_equal(a, at) + assert_array_almost_equal(e, et) + assert_array_almost_equal(k, kt) + +if __name__ == "__main__": + ScipyTest().run() Modified: trunk/Lib/sandbox/setup.py =================================================================== --- trunk/Lib/sandbox/setup.py 2006-11-26 05:13:41 UTC (rev 2331) +++ trunk/Lib/sandbox/setup.py 2006-11-28 08:04:47 UTC (rev 2332) @@ -76,6 +76,9 @@ # Package for Gaussian Mixture Models #config.add_subpackage('pyem') + # David Cournapeau's corner: autocorrelation, lpc, lpc residual + config.add_subpackage('cdavid') + # New spline package (based on scipy.interpolate) #config.add_subpackage('spline') From scipy-svn at scipy.org Tue Nov 28 03:07:22 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 28 Nov 2006 02:07:22 -0600 (CST) Subject: [Scipy-svn] r2333 - trunk/Lib/sandbox Message-ID: <20061128080722.428E439C18F@new.scipy.org> Author: cdavid Date: 2006-11-28 02:07:14 -0600 (Tue, 28 Nov 2006) New Revision: 2333 Modified: trunk/Lib/sandbox/setup.py Log: Remove accidently added packages in Lib/sandbox/setup.py Modified: trunk/Lib/sandbox/setup.py =================================================================== --- trunk/Lib/sandbox/setup.py 2006-11-28 08:04:47 UTC (rev 2332) +++ trunk/Lib/sandbox/setup.py 2006-11-28 08:07:14 UTC (rev 2333) @@ -77,7 +77,7 @@ #config.add_subpackage('pyem') # David Cournapeau's corner: autocorrelation, lpc, lpc residual - config.add_subpackage('cdavid') + #config.add_subpackage('cdavid') # New spline package (based on scipy.interpolate) #config.add_subpackage('spline') From scipy-svn at scipy.org Tue Nov 28 03:24:28 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 28 Nov 2006 02:24:28 -0600 (CST) Subject: [Scipy-svn] r2334 - trunk/Lib/sandbox/cdavid/tests Message-ID: <20061128082428.9EED939C18B@new.scipy.org> Author: cdavid Date: 2006-11-28 02:24:21 -0600 (Tue, 28 Nov 2006) New Revision: 2334 Modified: trunk/Lib/sandbox/cdavid/tests/test_autocorr.py trunk/Lib/sandbox/cdavid/tests/test_lpc.py Log: Change packages inclusion in tests Modified: trunk/Lib/sandbox/cdavid/tests/test_autocorr.py =================================================================== --- trunk/Lib/sandbox/cdavid/tests/test_autocorr.py 2006-11-28 08:07:14 UTC (rev 2333) +++ trunk/Lib/sandbox/cdavid/tests/test_autocorr.py 2006-11-28 08:24:21 UTC (rev 2334) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Mon Nov 27 08:00 PM 2006 J +# Last Change: Tue Nov 28 05:00 PM 2006 J from numpy.testing import * from numpy.random import randn, seed @@ -9,9 +9,9 @@ from ctypes import c_uint set_package_path() -from autocorr import _raw_autocorr_1d, _raw_autocorr_1d_noncontiguous -from autocorr import autocorr_oneside_nofft as autocorr -from autocorr import _autocorr_oneside_nofft_py as autocorr_py +from cdavid.autocorr import _raw_autocorr_1d, _raw_autocorr_1d_noncontiguous +from cdavid.autocorr import autocorr_oneside_nofft as autocorr +from cdavid.autocorr import _autocorr_oneside_nofft_py as autocorr_py restore_path() import numpy Modified: trunk/Lib/sandbox/cdavid/tests/test_lpc.py =================================================================== --- trunk/Lib/sandbox/cdavid/tests/test_lpc.py 2006-11-28 08:07:14 UTC (rev 2333) +++ trunk/Lib/sandbox/cdavid/tests/test_lpc.py 2006-11-28 08:24:21 UTC (rev 2334) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Tue Nov 28 03:00 PM 2006 J +# Last Change: Tue Nov 28 05:00 PM 2006 J from numpy.testing import * from numpy.random import randn, seed @@ -9,9 +9,9 @@ from ctypes import c_uint set_package_path() -from lpc import _lpc2_py as lpc_py -from lpc import lpc_ref, lpc2 -from autocorr import autocorr_oneside_nofft +from cdavid.lpc import _lpc2_py as lpc_py +from cdavid.lpc import lpc_ref, lpc2 +from cdavid.autocorr import autocorr_oneside_nofft restore_path() import numpy From scipy-svn at scipy.org Tue Nov 28 19:42:10 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 28 Nov 2006 18:42:10 -0600 (CST) Subject: [Scipy-svn] r2335 - trunk/Lib/sandbox/models Message-ID: <20061129004210.289F239C0BC@new.scipy.org> Author: timl Date: 2006-11-28 18:42:04 -0600 (Tue, 28 Nov 2006) New Revision: 2335 Modified: trunk/Lib/sandbox/models/contrast.py trunk/Lib/sandbox/models/formula.py Log: convert tabs to spaces Modified: trunk/Lib/sandbox/models/contrast.py =================================================================== --- trunk/Lib/sandbox/models/contrast.py 2006-11-28 08:24:21 UTC (rev 2334) +++ trunk/Lib/sandbox/models/contrast.py 2006-11-29 00:42:04 UTC (rev 2335) @@ -80,15 +80,15 @@ then evaldesign can be set to False. """ - self.term.namespace = self.formula.namespace + self.term.namespace = self.formula.namespace T = N.transpose(N.array(self.term(*args, **kw))) if T.ndim == 1: T.shape = (T.shape[0], 1) - self.T = utils.clean0(T) + self.T = utils.clean0(T) - self.D = self.formula.design(*args, **kw) + self.D = self.formula.design(*args, **kw) self.matrix = contrastfromcols(self.T, self.D) try: Modified: trunk/Lib/sandbox/models/formula.py =================================================================== --- trunk/Lib/sandbox/models/formula.py 2006-11-28 08:24:21 UTC (rev 2334) +++ trunk/Lib/sandbox/models/formula.py 2006-11-29 00:42:04 UTC (rev 2335) @@ -32,15 +32,15 @@ else: name = '%s^%0.2f' % (self.name, power) - value = quantitative(name, func=self, transform=lambda x: N.power(x, power)) - value.power = power - value.namespace = self.namespace + value = quantitative(name, func=self, transform=lambda x: N.power(x, power)) + value.power = power + value.namespace = self.namespace return value def __init__(self, name, func=None, termname=None): self.name = name - self.__namespace = None + self.__namespace = None if termname is None: self.termname = name else: @@ -70,8 +70,8 @@ """ other = formula(other, namespace=self.namespace) f = other + self - f.namespace = self.namespace - return f + f.namespace = self.namespace + return f def __mul__(self, other): """ @@ -82,11 +82,11 @@ f = formula(self, namespace=self.namespace) elif self.name is 'intercept': f = formula(other, namespace=other.namespace) - else: - other = formula(other, namespace=self.namespace) - f = other * self - f.namespace = self.namespace - return f + else: + other = formula(other, namespace=self.namespace) + f = other * self + f.namespace = self.namespace + return f def names(self): """ @@ -113,12 +113,12 @@ if not hasattr(self, 'func'): val = self.namespace[self.termname] - else: - val = self.func - if callable(val): - if hasattr(val, "namespace"): - val.namespace = self.namespace - val = val(*args, **kw) + else: + val = self.func + if callable(val): + if hasattr(val, "namespace"): + val.namespace = self.namespace + val = val(*args, **kw) val = N.asarray(val) return N.squeeze(val) @@ -144,47 +144,47 @@ self.ordinal = ordinal if self.ordinal: - name = self.name + name = self.name else: - name = ['(%s==%s)' % (self.termname, str(key)) for key in self.keys] + name = ['(%s==%s)' % (self.termname, str(key)) for key in self.keys] - term.__init__(self, name, termname=self.termname, func=self.get_columns) + term.__init__(self, name, termname=self.termname, func=self.get_columns) def get_columns(self, *args, **kw): - """ + """ Calling function for factor instance. """ - v = self.namespace[self._name] - while True: - if callable(v): - if hasattr(v, "namespace"): - v.namespace = self.namespace - v = v(*args, **kw) - else: break + v = self.namespace[self._name] + while True: + if callable(v): + if hasattr(v, "namespace"): + v.namespace = self.namespace + v = v(*args, **kw) + else: break - if self.ordinal: - col = [float(self.keys.index(v[i])) for i in range(len(self.keys))] - return N.array(col) + if self.ordinal: + col = [float(self.keys.index(v[i])) for i in range(len(self.keys))] + return N.array(col) - else: - n = len(v) - value = [] - for key in self.keys: - col = [float((v[i] == key)) for i in range(n)] - value.append(col) - return N.array(value) + else: + n = len(v) + value = [] + for key in self.keys: + col = [float((v[i] == key)) for i in range(n)] + value.append(col) + return N.array(value) def values(self, *args, **kw): - """ + """ Return the keys of the factor, rather than the columns of the design matrix. """ - del(self.func) - val = self(*args, **kw) - self.func = self.get_columns - return val + del(self.func) + val = self(*args, **kw) + self.func = self.get_columns + return val def verify(self, values): """ @@ -218,7 +218,7 @@ if reference is None: reference = 0 - names = self.names() + names = self.names() def maineffect_func(value, reference=reference): rvalue = [] @@ -235,8 +235,8 @@ value = quantitative(_names, func=self, termname='%s:maineffect' % self.termname, transform=maineffect_func) - value.namespace = self.namespace - return value + value.namespace = self.namespace + return value class quantitative(term): @@ -260,15 +260,15 @@ """ def __init__(self, name, func=None, termname=None, transform=lambda x: x): - self.transform = transform - term.__init__(self, name, func=func, termname=termname) + self.transform = transform + term.__init__(self, name, func=func, termname=termname) def __call__(self, *args, **kw): - """ + """ A quantitative is just like term, except there is an additional transformation: self.transfrom. """ - return self.transform(term.__call__(self, *args, **kw)) + return self.transform(term.__call__(self, *args, **kw)) class formula: @@ -301,7 +301,7 @@ """ - self.__namespace = namespace + self.__namespace = namespace if isinstance(termlist, formula): self.terms = copy.copy(list(termlist.terms)) elif type(termlist) is types.ListType: @@ -333,26 +333,26 @@ allvals = [] intercept = False - iindex = 0 + iindex = 0 for t in self.terms: - t.namespace = self.namespace + t.namespace = self.namespace val = t(*args, **kw) - isintercept = False + isintercept = False if hasattr(t, "termname"): - if t.termname == 'intercept': - intercept = True - isintercept = True - interceptindex = iindex - allvals.append(None) + if t.termname == 'intercept': + intercept = True + isintercept = True + interceptindex = iindex + allvals.append(None) if val.ndim == 1 and not isintercept: val.shape = (1, val.shape[0]) allvals.append(val) elif not isintercept: allvals.append(val) - iindex += 1 + iindex += 1 if not intercept: try: @@ -361,11 +361,11 @@ pass else: if allvals != []: - if interceptindex > 0: - n = allvals[0].shape[1] - else: - n = allvals[1].shape[1] - allvals[interceptindex] = N.ones((1,n), N.float64) + if interceptindex > 0: + n = allvals[0].shape[1] + else: + n = allvals[1].shape[1] + allvals[interceptindex] = N.ones((1,n), N.float64) allvals = N.concatenate(allvals) elif nrow <= 1: raise ValueError, 'with only intercept in formula, keyword \'nrow\' argument needed' @@ -473,16 +473,16 @@ if self.terms[i].name is 'intercept': _term = other.terms[j] - _term.namespace = other.namespace + _term.namespace = other.namespace elif other.terms[j].name is 'intercept': _term = self.terms[i] - _term.namespace = self.namespace + _term.namespace = self.namespace else: names = [] - d1 = len(selfnames) - d2 = len(othernames) + d1 = len(selfnames) + d2 = len(othernames) for r in range(d1): for s in range(d2): @@ -494,19 +494,19 @@ def product_func(value, d1=d1, d2=d2): - out = [] - for r in range(d1): - for s in range(d2): - out.append(value[r] * value[d1+s]) - return N.array(out) + out = [] + for r in range(d1): + for s in range(d2): + out.append(value[r] * value[d1+s]) + return N.array(out) - sumterms = self + other - sumterms.terms = [self, other] # enforce the order we want - sumterms.namespace = self.namespace + sumterms = self + other + sumterms.terms = [self, other] # enforce the order we want + sumterms.namespace = self.namespace _term = quantitative(names, func=sumterms, termname=termname, transform=product_func) - _term.namespace = self.namespace + _term.namespace = self.namespace terms.append(_term) From scipy-svn at scipy.org Tue Nov 28 20:04:04 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 28 Nov 2006 19:04:04 -0600 (CST) Subject: [Scipy-svn] r2336 - trunk/Lib/sandbox/models Message-ID: <20061129010404.6B41139C020@new.scipy.org> Author: timl Date: 2006-11-28 19:03:53 -0600 (Tue, 28 Nov 2006) New Revision: 2336 Modified: trunk/Lib/sandbox/models/contrast.py trunk/Lib/sandbox/models/cox.py trunk/Lib/sandbox/models/formula.py trunk/Lib/sandbox/models/mixed.py Log: fix name errors Modified: trunk/Lib/sandbox/models/contrast.py =================================================================== --- trunk/Lib/sandbox/models/contrast.py 2006-11-29 00:42:04 UTC (rev 2335) +++ trunk/Lib/sandbox/models/contrast.py 2006-11-29 01:03:53 UTC (rev 2336) @@ -1,8 +1,6 @@ import numpy as N from numpy.linalg import pinv from scipy.sandbox.models import utils -from scipy.sandbox.models.formula import formula as formula_class -from scipy.sandbox.models.formula import term as term_class class ContrastResults: """ Modified: trunk/Lib/sandbox/models/cox.py =================================================================== --- trunk/Lib/sandbox/models/cox.py 2006-11-29 00:42:04 UTC (rev 2335) +++ trunk/Lib/sandbox/models/cox.py 2006-11-29 01:03:53 UTC (rev 2336) @@ -203,8 +203,8 @@ subjects[i].X = X[i] import formula as F - x = F.Quantitative('X') - f = F.Formula(x) + x = F.quantitative('X') + f = F.formula(x) c = coxph(subjects, f) Modified: trunk/Lib/sandbox/models/formula.py =================================================================== --- trunk/Lib/sandbox/models/formula.py 2006-11-29 00:42:04 UTC (rev 2335) +++ trunk/Lib/sandbox/models/formula.py 2006-11-29 01:03:53 UTC (rev 2336) @@ -270,7 +270,7 @@ """ return self.transform(term.__call__(self, *args, **kw)) -class formula: +class formula(object): """ @@ -367,10 +367,10 @@ n = allvals[1].shape[1] allvals[interceptindex] = N.ones((1,n), N.float64) allvals = N.concatenate(allvals) - elif nrow <= 1: + elif nrow <= 1: # FIXME: nrow is undefined here raise ValueError, 'with only intercept in formula, keyword \'nrow\' argument needed' else: - allvals = I(nrow=nrow) + allvals = I(nrow=nrow) # ... and here allvals.shape = (1,) + allvals.shape return allvals Modified: trunk/Lib/sandbox/models/mixed.py =================================================================== --- trunk/Lib/sandbox/models/mixed.py 2006-11-29 00:42:04 UTC (rev 2335) +++ trunk/Lib/sandbox/models/mixed.py 2006-11-29 01:03:53 UTC (rev 2336) @@ -1,6 +1,6 @@ import numpy as N import numpy.linalg as L -from scipy.sandbox.models.formula import Formula, I +from scipy.sandbox.models.formula import formula, I class Unit: @@ -148,9 +148,9 @@ self.units = units self.m = len(self.units) - self.fixed = Formula(fixed) - self.random = Formula(random) - self.response = Formula(response) + self.fixed = formula(fixed) + self.random = formula(random) + self.response = formula(response) self.N = 0 for unit in self.units: @@ -315,9 +315,9 @@ n = 3 import formula - fixed = formula.Term('f') - random = formula.Term('r') - response = formula.Term('y') + fixed = formula.term('f') + random = formula.term('r') + response = formula.term('y') for i in range(nsubj): d = R.standard_normal() From scipy-svn at scipy.org Wed Nov 29 05:15:21 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 29 Nov 2006 04:15:21 -0600 (CST) Subject: [Scipy-svn] r2337 - trunk/Lib/linsolve Message-ID: <20061129101521.03DE139C257@new.scipy.org> Author: rc Date: 2006-11-29 04:15:15 -0600 (Wed, 29 Nov 2006) New Revision: 2337 Modified: trunk/Lib/linsolve/linsolve.py Log: rhs shape check added Modified: trunk/Lib/linsolve/linsolve.py =================================================================== --- trunk/Lib/linsolve/linsolve.py 2006-11-29 01:03:53 UTC (rev 2336) +++ trunk/Lib/linsolve/linsolve.py 2006-11-29 10:15:15 UTC (rev 2337) @@ -43,6 +43,12 @@ return mat def spsolve(A, b, permc_spec=2): + if b.ndim > 1: + if max( b.shape ) == b.size: + b = b.squeeze() + else: + raise ValueError, "rhs must be a vector (has shape %s)" % (b.shape,) + if not hasattr(A, 'tocsr') and not hasattr(A, 'tocsc'): raise ValueError, "sparse matrix must be able to return CSC format--"\ "A.tocsc()--or CSR format--A.tocsr()" @@ -51,8 +57,11 @@ " (rows, cols) = A.shape" M, N = A.shape if (M != N): - raise ValueError, "matrix must be square" - + raise ValueError, "matrix must be square (has shape %s)" % (A.shape,) + if M != b.size: + raise ValueError, "matrix - rhs size mismatch (%s - %s)"\ + % (A.shape, b.shape) + if isUmfpack and useUmfpack: mat = _toCS_umfpack( A ) From scipy-svn at scipy.org Wed Nov 29 08:13:53 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 29 Nov 2006 07:13:53 -0600 (CST) Subject: [Scipy-svn] r2338 - trunk/Lib/io Message-ID: <20061129131353.A55CC39C04B@new.scipy.org> Author: matthew.brett at gmail.com Date: 2006-11-29 07:13:49 -0600 (Wed, 29 Nov 2006) New Revision: 2338 Modified: trunk/Lib/io/miobase.py Log: Working on downcasting class Modified: trunk/Lib/io/miobase.py =================================================================== --- trunk/Lib/io/miobase.py 2006-11-29 10:15:15 UTC (rev 2337) +++ trunk/Lib/io/miobase.py 2006-11-29 13:13:49 UTC (rev 2338) @@ -380,46 +380,67 @@ class DownCaster(object): - ''' Downcasts arrays ''' + ''' Downcast arrays to acceptable datatypes - def __init__(self, - type_list=None, - rtol=1.0000000000000001e-05, - atol=1e-08): - ''' Set types for which we are attempting to downcast ''' - def_dict = self.default_dt_dict() - if type_list is None: - self.dt_dict = def_dict - else: - dt_dict = {} - for T in type_list: - T = dtype(T).type - dt_dict[T] = def_dict[T] - self.dt_dict = dt_dict - self.rtol = rtol - self.atol = atol + Initialization specifies acceptable datatypes (ADs) - def eps(self, dt): - ''' Calculate machine precision for datatype + Implements downcast method - returns array that may be of + different storage type to the input array, where the new type is + one of the ADs. - Machine precision defined as difference between X and smallest - encodable number greater than X, where X is usually 1. + Also implements downcast_and_nearest method - returns downcast + array, and datatype within the ADs that is nearest to the input + datatype. - Input can be datatype, in which case X=1, or X. + The algorithm for the "nearest" is: return input datatype if in + ADs; else return a higher precision datatype in ADs of same type + if available; else return the next lower precision datatype in + ADs of same type; else raise an error. + + At its simplest, the downcast method can reject arrays that + are not in the list of ADs. + + ''' + + _sctype_trans = {'complex': 'c', 'c': 'c', + 'float': 'f', 'f': 'f', + 'int': 'i', 'i': 'i', + 'uint': 'u', 'u': 'u'} + + def __init__(self, sctype_list=None, sctype_tols=None): + ''' Set types for which we are attempting to downcast + + Input + sctype_list - list of acceptable scalar types + If None defaults to all system types + sctype_tols - dictionary key datatype, values rtol, tol + to specify tolerances for checking near equality in downcasting + ''' - try: - dt = dtype(dt) - start = array(1, dt) - except TypeError: - start = array(dt) - dt = start.dtype - two = array(2, dt) - e = start.copy() - while (e / two + start) > start: - e = e / two - return e - - def default_dt_dict(self): + sys_dict = self.system_sctype_dict() + if sctype_list is None: + self.sctype_dict = sys_dict.copy() + sctype_list = self.sctype_dict.keys() + else: + D = {} + for k, v in sys_dict.items(): + if k in sctype_list: + D[k] = v + self.sctype_dict = D + self.sctype_list = sctype_list + self.sctype_tols = self.default_sctype_tols() + if sctype_tols is not None: + self.sctype_tols.merge(sctype_tols) + self.sized_sctypes = {} + for k in ('c', 'f', 'i', 'u'): + self.sized_sctypes[k] = self.sctypes_by_size(k) + self.int_sctypes = [T for T in self.sctype_list if dtype(T).kind in ('i', 'u')] + N = {} + for k in sys_dict: + N[k] = self._nearest_dtype(k) + self.nearest_dtypes = N + + def system_sctype_dict(self): d_dict = {} for sc_type in ('complex','float'): t_list = sctypes[sc_type] @@ -452,42 +473,131 @@ } return d_dict - def storage_criterion(self, maxstorage, kinds, cmp_func=lambda x, y: x <= y): - D = {} - for k, v in self.dt_dict.items(): - if v['kind'] in kinds: - sz = v['size'] - if cmp_func(sz, maxstorage): - D[k] = sz - I = D.items() - I.sort(lambda x, y: cmp(x[1], y[1])) - return I + def default_sctype_tols(self): + ''' Default allclose tolerance values for dtypes ''' + t_dict = {} + for sc_type in ('complex','float'): + t_list = sctypes[sc_type] + for T in t_list: + dt = dtype(T) + F = finfo(dt) + t_dict[T] = { + 'rtol': F.eps, + 'atol': F.tiny} + tiny = finfo(float64).tiny + for sc_type in ('int', 'uint'): + t_list = sctypes[sc_type] + for T in t_list: + dt = dtype(T) + t_dict[T] = { + 'rtol': 0, + 'atol': tiny} + return t_dict + def tols_from_sctype(self, sctype): + ''' Return rtol and atol for sctype ''' + tols = self.sctype_tols[sctype] + return tols['rtol'], tols['atol'] + + def sctypes_by_size(self, sctype): + ''' Returns storage size ordered list of entries of scalar type sctype + + Input + sctype - one of "complex" or "c", "float" or "f" , + "int" or "i", "uint" or "u" + ''' + try: + sctype = self._sctype_trans[sctype] + except KeyError: + raise TypeError, 'Did not recognize sctype %s' % sctype + D = [] + for t in self.sctype_list: + dt = dtype(t) + if dt.kind == sctype: + D.append([t, dt.itemsize]) + D.sort(lambda x, y: cmp(y[1], x[1])) + return D + + def _nearest_dtype(self, dt): + ''' Return dtype closest in size to that of dt + + Input + dt - dtype + + ID = input dtype. VD = valid dtype. Return ID if ID is + in VDs. If ID is smaller / larger than all VDs, return + smallest / largest VD. Otherwise return nearest VD larger than + ID. + ''' + dt = dtype(dt) + if dt in self.sctype_list: + return dt + sctypes = self.sized_sctypes[dt.kind] + if not sctypes: + return None + dti = dt.itemsize + for i, t in enumerate(sctypes): + if t[1] < dti: + break + else: + return t[0] + if i: + i-=1 + return sctypes[i][0] + def smaller_same_kind(self, arr): - dts = self.storage_criterion(arr.dtype.itemsize, - (arr.dtype.kind,), - lambda x, y: x < y) + ''' Return arr maybe downcast to same kind, smaller storage + + If arr cannot be downcast within given tolerances, then return + arr if arr is in list of acceptable types, otherwise return + None + ''' + dt = arr.dtype + dti = dt.itemsize + sctypes = self.sized_sctypes[dt.kind] + scts = [t[0] for i, t in enumerate(sctypes) if t[1] < dti] + rtol, atol = self.tols_from_sctype(dt.type) ret_arr = arr - for T in dts: + for T in scts: test_arr = arr.astype(T) - if allclose(test_arr, arr, self.rtol, self.atol): + if allclose(test_arr, arr, rtol, atol): ret_arr = test_arr else: break + else: # No downcasting withing tolerance + if dt not in self.sctype_list: + return None return ret_arr + + def smallest_int_dtype(self, mx, mn): + ''' Return integer type with smallest storage containing mx and mn - - def smallest_int_type(self, mx, mn): + Inputs + mx - maximum value + mn - minumum value + + Returns None if no integer can contain this range + ''' dt = None - for k, v in self.dt_dict.items(): - if v['kind'] in ('i', 'u'): - if v['max'] >= mx and v['min'] <= mn: - c_sz = v['size'] - if dt is None or c_sz < sz: - dt = k - sz = c_sz + for T in self.int_sctypes: + t_dict = self.sctype_dict[T] + if t_dict['max'] >= mx and t_dict['min'] <= mn: + c_sz = t_dict['size'] + if dt is None or c_sz < sz: + dt = T + sz = c_sz return dt + def recast(self, arr): + arr = self.downcast(arr) + if arr is not None: + return arr + # Could not downcast, arr dtype not in known list + dt = self.capable_dtype[arr.dtype.type] + if dt is not None: + return arr.astype(dt) + raise ValueError, 'Could not recast array within precision' + def downcast(self, arr): dtk = arr.dtype.kind if dtk == 'c': @@ -501,10 +611,13 @@ def downcast_complex(self, arr): # can we downcast to float? - fts = self.dt_arrs['float'] - flts = flts[flts['storage'] <= arr.dtype.itemsize] - test_arr = arr.astype(flt[0]['type']) - if allclose(arr, test_arr, self.rtol, self.atol): + dt = arr.dtype + dti = ceil(dt.itemsize / 2) + sctypes = self.sized_sctypes['f'] + flts = [t[0] for i, t in enumerate(sctypes) if t[1] <= dti] + test_arr = arr.astype(flts[0]) + rtol, atol = self.tols_from_sctype(dt.type) + if allclose(arr, test_arr, rtol, atol): return self.downcast_float(test_arr) # try downcasting to another complex type return self.smaller_same_kind(arr) @@ -512,13 +625,21 @@ def downcast_float(self, arr): # Try integer test_arr = self.downcast_integer(arr) - if allclose(arr, test_arr, self.rtol, self.atol): + rtol, atol = self.tols_from_sctype(arr.dtype.type) + if allclose(arr, test_arr, rtol, atol): return test_arr # Otherwise descend the float types return self.smaller_same_kind(arr) def downcast_integer(self, arr): + ''' Downcasts arr to integer + + Returns None if range of arr cannot be contained in acceptable + integer types + ''' mx = amax(arr) mn = amin(arr) - idt = self.smallest_int_type(mx, mn) - return arr.astype(idt) + idt = self.smallest_int_dtype(mx, mn) + if idt: + return arr.astype(idt) + return None From scipy-svn at scipy.org Wed Nov 29 13:40:29 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 29 Nov 2006 12:40:29 -0600 (CST) Subject: [Scipy-svn] r2339 - trunk/Lib/io Message-ID: <20061129184029.C93C439C201@new.scipy.org> Author: matthew.brett at gmail.com Date: 2006-11-29 12:40:23 -0600 (Wed, 29 Nov 2006) New Revision: 2339 Added: trunk/Lib/io/recaster.py Modified: trunk/Lib/io/miobase.py Log: More work on recasting, moved recasting into own module Modified: trunk/Lib/io/miobase.py =================================================================== --- trunk/Lib/io/miobase.py 2006-11-29 13:13:49 UTC (rev 2338) +++ trunk/Lib/io/miobase.py 2006-11-29 18:40:23 UTC (rev 2339) @@ -378,268 +378,3 @@ def __init__(self, file_stream): self.file_stream = file_stream - -class DownCaster(object): - ''' Downcast arrays to acceptable datatypes - - Initialization specifies acceptable datatypes (ADs) - - Implements downcast method - returns array that may be of - different storage type to the input array, where the new type is - one of the ADs. - - Also implements downcast_and_nearest method - returns downcast - array, and datatype within the ADs that is nearest to the input - datatype. - - The algorithm for the "nearest" is: return input datatype if in - ADs; else return a higher precision datatype in ADs of same type - if available; else return the next lower precision datatype in - ADs of same type; else raise an error. - - At its simplest, the downcast method can reject arrays that - are not in the list of ADs. - - ''' - - _sctype_trans = {'complex': 'c', 'c': 'c', - 'float': 'f', 'f': 'f', - 'int': 'i', 'i': 'i', - 'uint': 'u', 'u': 'u'} - - def __init__(self, sctype_list=None, sctype_tols=None): - ''' Set types for which we are attempting to downcast - - Input - sctype_list - list of acceptable scalar types - If None defaults to all system types - sctype_tols - dictionary key datatype, values rtol, tol - to specify tolerances for checking near equality in downcasting - - ''' - sys_dict = self.system_sctype_dict() - if sctype_list is None: - self.sctype_dict = sys_dict.copy() - sctype_list = self.sctype_dict.keys() - else: - D = {} - for k, v in sys_dict.items(): - if k in sctype_list: - D[k] = v - self.sctype_dict = D - self.sctype_list = sctype_list - self.sctype_tols = self.default_sctype_tols() - if sctype_tols is not None: - self.sctype_tols.merge(sctype_tols) - self.sized_sctypes = {} - for k in ('c', 'f', 'i', 'u'): - self.sized_sctypes[k] = self.sctypes_by_size(k) - self.int_sctypes = [T for T in self.sctype_list if dtype(T).kind in ('i', 'u')] - N = {} - for k in sys_dict: - N[k] = self._nearest_dtype(k) - self.nearest_dtypes = N - - def system_sctype_dict(self): - d_dict = {} - for sc_type in ('complex','float'): - t_list = sctypes[sc_type] - for T in t_list: - dt = dtype(T) - d_dict[T] = { - 'kind': dt.kind, - 'size': dt.itemsize} - for T in sctypes['int']: - dt = dtype(T) - sz = dt.itemsize - bits = sz*8-1 - end = 2**bits - d_dict[T] = { - 'kind': dt.kind, - 'size': sz, - 'min': -end, - 'max': end-1 - } - for T in sctypes['uint']: - dt = dtype(T) - sz = dt.itemsize - bits = sz*8 - end = 2**bits - d_dict[T] = { - 'kind': dt.kind, - 'size': sz, - 'min': 0, - 'max': end - } - return d_dict - - def default_sctype_tols(self): - ''' Default allclose tolerance values for dtypes ''' - t_dict = {} - for sc_type in ('complex','float'): - t_list = sctypes[sc_type] - for T in t_list: - dt = dtype(T) - F = finfo(dt) - t_dict[T] = { - 'rtol': F.eps, - 'atol': F.tiny} - tiny = finfo(float64).tiny - for sc_type in ('int', 'uint'): - t_list = sctypes[sc_type] - for T in t_list: - dt = dtype(T) - t_dict[T] = { - 'rtol': 0, - 'atol': tiny} - return t_dict - - def tols_from_sctype(self, sctype): - ''' Return rtol and atol for sctype ''' - tols = self.sctype_tols[sctype] - return tols['rtol'], tols['atol'] - - def sctypes_by_size(self, sctype): - ''' Returns storage size ordered list of entries of scalar type sctype - - Input - sctype - one of "complex" or "c", "float" or "f" , - "int" or "i", "uint" or "u" - ''' - try: - sctype = self._sctype_trans[sctype] - except KeyError: - raise TypeError, 'Did not recognize sctype %s' % sctype - D = [] - for t in self.sctype_list: - dt = dtype(t) - if dt.kind == sctype: - D.append([t, dt.itemsize]) - D.sort(lambda x, y: cmp(y[1], x[1])) - return D - - def _nearest_dtype(self, dt): - ''' Return dtype closest in size to that of dt - - Input - dt - dtype - - ID = input dtype. VD = valid dtype. Return ID if ID is - in VDs. If ID is smaller / larger than all VDs, return - smallest / largest VD. Otherwise return nearest VD larger than - ID. - ''' - dt = dtype(dt) - if dt in self.sctype_list: - return dt - sctypes = self.sized_sctypes[dt.kind] - if not sctypes: - return None - dti = dt.itemsize - for i, t in enumerate(sctypes): - if t[1] < dti: - break - else: - return t[0] - if i: - i-=1 - return sctypes[i][0] - - def smaller_same_kind(self, arr): - ''' Return arr maybe downcast to same kind, smaller storage - - If arr cannot be downcast within given tolerances, then return - arr if arr is in list of acceptable types, otherwise return - None - ''' - dt = arr.dtype - dti = dt.itemsize - sctypes = self.sized_sctypes[dt.kind] - scts = [t[0] for i, t in enumerate(sctypes) if t[1] < dti] - rtol, atol = self.tols_from_sctype(dt.type) - ret_arr = arr - for T in scts: - test_arr = arr.astype(T) - if allclose(test_arr, arr, rtol, atol): - ret_arr = test_arr - else: - break - else: # No downcasting withing tolerance - if dt not in self.sctype_list: - return None - return ret_arr - - def smallest_int_dtype(self, mx, mn): - ''' Return integer type with smallest storage containing mx and mn - - Inputs - mx - maximum value - mn - minumum value - - Returns None if no integer can contain this range - ''' - dt = None - for T in self.int_sctypes: - t_dict = self.sctype_dict[T] - if t_dict['max'] >= mx and t_dict['min'] <= mn: - c_sz = t_dict['size'] - if dt is None or c_sz < sz: - dt = T - sz = c_sz - return dt - - def recast(self, arr): - arr = self.downcast(arr) - if arr is not None: - return arr - # Could not downcast, arr dtype not in known list - dt = self.capable_dtype[arr.dtype.type] - if dt is not None: - return arr.astype(dt) - raise ValueError, 'Could not recast array within precision' - - def downcast(self, arr): - dtk = arr.dtype.kind - if dtk == 'c': - return self.downcast_complex(arr) - elif dtk == 'f': - return self.downcast_float(arr) - elif dtk in ('u', 'i'): - return self.downcast_integer(arr) - else: - raise TypeError, 'Do not recognize array kind %s' % dtk - - def downcast_complex(self, arr): - # can we downcast to float? - dt = arr.dtype - dti = ceil(dt.itemsize / 2) - sctypes = self.sized_sctypes['f'] - flts = [t[0] for i, t in enumerate(sctypes) if t[1] <= dti] - test_arr = arr.astype(flts[0]) - rtol, atol = self.tols_from_sctype(dt.type) - if allclose(arr, test_arr, rtol, atol): - return self.downcast_float(test_arr) - # try downcasting to another complex type - return self.smaller_same_kind(arr) - - def downcast_float(self, arr): - # Try integer - test_arr = self.downcast_integer(arr) - rtol, atol = self.tols_from_sctype(arr.dtype.type) - if allclose(arr, test_arr, rtol, atol): - return test_arr - # Otherwise descend the float types - return self.smaller_same_kind(arr) - - def downcast_integer(self, arr): - ''' Downcasts arr to integer - - Returns None if range of arr cannot be contained in acceptable - integer types - ''' - mx = amax(arr) - mn = amin(arr) - idt = self.smallest_int_dtype(mx, mn) - if idt: - return arr.astype(idt) - return None Added: trunk/Lib/io/recaster.py =================================================================== --- trunk/Lib/io/recaster.py 2006-11-29 13:13:49 UTC (rev 2338) +++ trunk/Lib/io/recaster.py 2006-11-29 18:40:23 UTC (rev 2339) @@ -0,0 +1,342 @@ +# Author: Matthew Brett + +""" +Recaster class for recasting numeric arrays +""" + +from numpy import * + + +class Recaster(object): + ''' Class to recast arrays to one of acceptable scalar types + + Initialization specifies acceptable types (ATs) + + Implements downcast and recast method - returns array that may be + of different storage type to the input array, where the new type + is one of the ATs. Downcast forces return array to be same size or + smaller than the input. recast method will return a larger type + if no smaller type will contain the data without loss of + precision. + + At its simplest, the downcast method can reject arrays that + are not in the list of ATs. + ''' + + _sctype_trans = {'complex': 'c', 'c': 'c', + 'float': 'f', 'f': 'f', + 'int': 'i', 'i': 'i', + 'uint': 'u', 'u': 'u'} + + def __init__(self, sctype_list=None, sctype_tols=None): + ''' Set types for which we are attempting to downcast + + Input + sctype_list - list of acceptable scalar types + If None defaults to all system types + sctype_tols - dictionary key datatype, values rtol, tol + to specify tolerances for checking near equality in downcasting + ''' + sys_dict = self.system_sctype_dict() + if sctype_list is None: + self.sctype_dict = sys_dict.copy() + sctype_list = self.sctype_dict.keys() + else: + D = {} + for k, v in sys_dict.items(): + if k in sctype_list: + D[k] = v + self.sctype_dict = D + self.sctype_list = sctype_list + self.sctype_tols = self.default_sctype_tols() + if sctype_tols is not None: + self.sctype_tols.merge(sctype_tols) + # Cache sctype sizes, + self.sized_sctypes = {} + for k in ('c', 'f', 'i', 'u'): + self.sized_sctypes[k] = self.sctypes_by_size(k) + self.int_sctypes = [T for T in self.sctype_list if dtype(T).kind in ('i', 'u')] + self.all_int_sized_sctypes = [] + for k, v in self.sized_sctypes.items(): + if k in ('u', 'i'): + self.all_int_sized_sctypes.append(v) + self.nearest_dtypes = {} + for k in sys_dict: + self.nearest_dtypes[k] = self._nearest_dtype(k) + self.capable_dtypes = {} + for k in sys_dict: + self.capable_dtypes[k] = self._capable_dtype(k) + + def system_sctype_dict(self): + d_dict = {} + for sc_type in ('complex','float'): + t_list = sctypes[sc_type] + for T in t_list: + dt = dtype(T) + d_dict[T] = { + 'kind': dt.kind, + 'size': dt.itemsize} + for T in sctypes['int']: + dt = dtype(T) + sz = dt.itemsize + bits = sz*8-1 + end = 2**bits + d_dict[T] = { + 'kind': dt.kind, + 'size': sz, + 'min': -end, + 'max': end-1 + } + for T in sctypes['uint']: + dt = dtype(T) + sz = dt.itemsize + bits = sz*8 + end = 2**bits + d_dict[T] = { + 'kind': dt.kind, + 'size': sz, + 'min': 0, + 'max': end + } + return d_dict + + def default_sctype_tols(self): + ''' Default allclose tolerance values for all dtypes ''' + t_dict = {} + for sc_type in ('complex','float'): + t_list = sctypes[sc_type] + for T in t_list: + dt = dtype(T) + F = finfo(dt) + t_dict[T] = { + 'rtol': F.eps, + 'atol': F.tiny} + F = finfo(float64) + for sc_type in ('int', 'uint'): + t_list = sctypes[sc_type] + for T in t_list: + dt = dtype(T) + t_dict[T] = { + 'rtol': F.eps, + 'atol': F.tiny} + return t_dict + + def tols_from_sctype(self, sctype): + ''' Return rtol and atol for sctype ''' + tols = self.sctype_tols[sctype] + return tols['rtol'], tols['atol'] + + def sctypes_by_size(self, sctype): + ''' Returns storage size ordered list of entries of scalar type sctype + + Input + sctype - one of "complex" or "c", "float" or "f" , + "int" or "i", "uint" or "u" + ''' + try: + sctype = self._sctype_trans[sctype] + except KeyError: + raise TypeError, 'Did not recognize sctype %s' % sctype + D = [] + for t in self.sctype_list: + dt = dtype(t) + if dt.kind == sctype: + D.append([t, dt.itemsize]) + D.sort(lambda x, y: cmp(y[1], x[1])) + return D + + def _nearest_sctype(self, sct): + ''' Return scalar type closest in size to that of sct + + Input + sct - sctype + + ID = input sctype. AT = acceptable sctype. Return ID if ID is + in ATs. If ID is smaller / larger than all ATs, return + smallest / largest AT. Otherwise return nearest AT larger than + ID. + ''' + dt = dtype(sct) + if sct in self.sctype_list: + return sct + sctypes = self.sized_sctypes[dt.kind] + if not sctypes: + return None + dti = sct.itemsize + for i, t in enumerate(sctypes): + if t[1] < dti: + break + else: + return t[0] + if i: + i-=1 + return sctypes[i][0] + + def _capable_sctype(self, sct): + ''' Return smallest scalar type containing sct type without precision loss + + Input + sct - scalar type + + ID = input type. AT = acceptable type. Return ID if ID is + in ATs. Otherwise return smallest AT that is larger than or + same size as ID. + + If the desired sctype is an integer, returns the smallest + integer (int or uint) that can contain the range of the input + integer type + + If there is no type that can contain sct without loss of + precision, return None + ''' + if sct in self.sctype_list: + return sct + out_t = None + # Unsigned and signed integers + # Precision loss defined by max min outside datatype range + dt = dtype(sct) + if dt.kind in ('u', 'i'): + sctypes = self.all_int_sized_sctypes + if not sctypes: + return None + D = self.sctype_dict + mx = D[sct]['max'] + mn = D[sct]['min'] + for i, t in emumerate(sctypes): + this_sct = t[0] + this_d = D[dt] + if this_d['max'] >= mx and this_d['min'] <= mn: + out_t = this_sct + else: + # Complex and float types + # Precision loss defined by data size < sct + sctypes = self.sized_sctypes[sct] + if not sctypes: + return None + dti = dtype(sct).itemsize + out_t = None + for i, t in enumerate(sctypes): + if t[1] >= dti: + out_t = t[0] + return out_t + + def smaller_same_kind(self, arr): + ''' Return arr maybe downcast to same kind, smaller storage + + If arr cannot be downcast within given tolerances, then: + return arr if arr is in list of acceptable types, otherwise + return None + ''' + dtp = arr.dtype + dti = dt.itemsize + sctypes = self.sized_sctypes[dtp.kind] + sctypes = [t[0] for i, t in enumerate(sctypes) if t[1] < dti] + return self.smaller_from_sctypes(arr, sctypes) + + def smallest_from_sctypes(self, arr, sctypes): + ''' Returns array recast to smallest possible type from list + + Inputs + arr - array to recast + sctypes - list of scalar types to try + + Returns None if no recast is within tolerance + ''' + dt = arr.dtype.type + rtol, atol = self.tols_from_sctype(dt) + ret_arr = arr + for T in sctypes: + test_arr = arr.astype(T) + if allclose(test_arr, arr, rtol, atol): + ret_arr = test_arr + else: + break + else: # No downcasting withing tolerance + if dt not in self.sctype_list: + return None + return ret_arr + + def smallest_int_dtype(self, mx, mn): + ''' Return integer type with smallest storage containing mx and mn + + Inputs + mx - maximum value + mn - minumum value + + Returns None if no integer can contain this range + ''' + dt = None + for T in self.int_sctypes: + t_dict = self.sctype_dict[T] + if t_dict['max'] >= mx and t_dict['min'] <= mn: + c_sz = t_dict['size'] + if dt is None or c_sz < sz: + dt = T + sz = c_sz + return dt + + def recast(self, arr): + ''' Try arr downcast, upcast if necesary to get compatible type ''' + dt = arr.dtype.type + ret_arr = self.downcast(arr) + if ret_arr is not None: + return ret_arr + # Could not downcast, arr dtype not in known list + # Try upcast to larger dtype of same kind + udt = self.capable_dtype[dt] + if udt is not None: + return arr.astype(udt) + # We are stuck for floats and complex now + # Can try casting integers to floats + if arr.dt.kind in ('i', 'u'): + sctypes = self.sized_sctypes['f'] + arr = self.smallest_from_sctypes(arr, sctypes) + if arr is not None: + return arr + raise ValueError, 'Could not recast array within precision' + + def downcast(self, arr): + dtk = arr.dtype.kind + if dtk == 'c': + return self.downcast_complex(arr) + elif dtk == 'f': + return self.downcast_float(arr) + elif dtk in ('u', 'i'): + return self.downcast_integer(arr) + else: + raise TypeError, 'Do not recognize array kind %s' % dtk + + def downcast_complex(self, arr): + # can we downcast to float? + dt = arr.dtype + dti = ceil(dt.itemsize / 2) + sctypes = self.sized_sctypes['f'] + flts = [t[0] for i, t in enumerate(sctypes) if t[1] <= dti] + test_arr = arr.astype(flts[0]) + rtol, atol = self.tols_from_sctype(dt.type) + if allclose(arr, test_arr, rtol, atol): + return self.downcast_float(test_arr) + # try downcasting to another complex type + return self.smaller_same_kind(arr) + + def downcast_float(self, arr): + # Try integer + test_arr = self.downcast_integer(arr) + rtol, atol = self.tols_from_sctype(arr.dtype.type) + if allclose(arr, test_arr, rtol, atol): + return test_arr + # Otherwise descend the float types + return self.smaller_same_kind(arr) + + def downcast_integer(self, arr): + ''' Downcasts arr to integer + + Returns None if range of arr cannot be contained in acceptable + integer types + ''' + mx = amax(arr) + mn = amin(arr) + idt = self.smallest_int_dtype(mx, mn) + if idt: + return arr.astype(idt) + return None From scipy-svn at scipy.org Thu Nov 30 01:16:38 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 30 Nov 2006 00:16:38 -0600 (CST) Subject: [Scipy-svn] r2340 - trunk/Lib/io Message-ID: <20061130061638.65EAD39C0E2@new.scipy.org> Author: timl Date: 2006-11-30 00:16:27 -0600 (Thu, 30 Nov 2006) New Revision: 2340 Modified: trunk/Lib/io/recaster.py Log: s/emumerate/enumerate/ typo Modified: trunk/Lib/io/recaster.py =================================================================== --- trunk/Lib/io/recaster.py 2006-11-29 18:40:23 UTC (rev 2339) +++ trunk/Lib/io/recaster.py 2006-11-30 06:16:27 UTC (rev 2340) @@ -202,7 +202,7 @@ D = self.sctype_dict mx = D[sct]['max'] mn = D[sct]['min'] - for i, t in emumerate(sctypes): + for i, t in enumerate(sctypes): this_sct = t[0] this_d = D[dt] if this_d['max'] >= mx and this_d['min'] <= mn: From scipy-svn at scipy.org Thu Nov 30 08:40:40 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 30 Nov 2006 07:40:40 -0600 (CST) Subject: [Scipy-svn] r2341 - in trunk/Lib/io: . tests Message-ID: <20061130134040.36C7839C299@new.scipy.org> Author: matthew.brett at gmail.com Date: 2006-11-30 07:40:34 -0600 (Thu, 30 Nov 2006) New Revision: 2341 Added: trunk/Lib/io/tests/test_recaster.py Modified: trunk/Lib/io/__init__.py trunk/Lib/io/recaster.py Log: Further recaster debugging, with unit tests Modified: trunk/Lib/io/__init__.py =================================================================== --- trunk/Lib/io/__init__.py 2006-11-30 06:16:27 UTC (rev 2340) +++ trunk/Lib/io/__init__.py 2006-11-30 13:40:34 UTC (rev 2341) @@ -8,6 +8,7 @@ from numpyio import packbits, unpackbits, bswap, fread, fwrite, \ convert_objectarray from mio import * +from recaster import Recaster from array_import import * from data_store import * from pickler import * Modified: trunk/Lib/io/recaster.py =================================================================== --- trunk/Lib/io/recaster.py 2006-11-30 06:16:27 UTC (rev 2340) +++ trunk/Lib/io/recaster.py 2006-11-30 13:40:34 UTC (rev 2341) @@ -6,6 +6,42 @@ from numpy import * +def sctype_attributes(): + ''' Return dictionary describing numpy scalar types ''' + d_dict = {} + for sc_type in ('complex','float'): + t_list = sctypes[sc_type] + for T in t_list: + F = finfo(T) + dt = dtype(T) + d_dict[T] = { + 'kind': dt.kind, + 'size': dt.itemsize, + 'max': F.max, + 'min': F.min} + for T in sctypes['int']: + dt = dtype(T) + sz = dt.itemsize + bits = sz*8-1 + end = 2**bits + d_dict[T] = { + 'kind': dt.kind, + 'size': sz, + 'min': -end, + 'max': end-1 + } + for T in sctypes['uint']: + dt = dtype(T) + sz = dt.itemsize + bits = sz*8 + end = 2**bits + d_dict[T] = { + 'kind': dt.kind, + 'size': sz, + 'min': 0, + 'max': end + } + return d_dict class Recaster(object): ''' Class to recast arrays to one of acceptable scalar types @@ -28,6 +64,8 @@ 'int': 'i', 'i': 'i', 'uint': 'u', 'u': 'u'} + _sctype_attributes = sctype_attributes() + def __init__(self, sctype_list=None, sctype_tols=None): ''' Set types for which we are attempting to downcast @@ -37,68 +75,28 @@ sctype_tols - dictionary key datatype, values rtol, tol to specify tolerances for checking near equality in downcasting ''' - sys_dict = self.system_sctype_dict() if sctype_list is None: - self.sctype_dict = sys_dict.copy() - sctype_list = self.sctype_dict.keys() - else: - D = {} - for k, v in sys_dict.items(): - if k in sctype_list: - D[k] = v - self.sctype_dict = D + sctype_list = self._sctype_attributes.keys() self.sctype_list = sctype_list self.sctype_tols = self.default_sctype_tols() if sctype_tols is not None: - self.sctype_tols.merge(sctype_tols) + self.sctype_tols.update(sctype_tols) # Cache sctype sizes, self.sized_sctypes = {} for k in ('c', 'f', 'i', 'u'): self.sized_sctypes[k] = self.sctypes_by_size(k) - self.int_sctypes = [T for T in self.sctype_list if dtype(T).kind in ('i', 'u')] - self.all_int_sized_sctypes = [] + # All integer sizes + self.ints_sized_sctypes = [] for k, v in self.sized_sctypes.items(): if k in ('u', 'i'): - self.all_int_sized_sctypes.append(v) - self.nearest_dtypes = {} - for k in sys_dict: - self.nearest_dtypes[k] = self._nearest_dtype(k) - self.capable_dtypes = {} - for k in sys_dict: - self.capable_dtypes[k] = self._capable_dtype(k) - - def system_sctype_dict(self): - d_dict = {} - for sc_type in ('complex','float'): - t_list = sctypes[sc_type] - for T in t_list: - dt = dtype(T) - d_dict[T] = { - 'kind': dt.kind, - 'size': dt.itemsize} - for T in sctypes['int']: - dt = dtype(T) - sz = dt.itemsize - bits = sz*8-1 - end = 2**bits - d_dict[T] = { - 'kind': dt.kind, - 'size': sz, - 'min': -end, - 'max': end-1 - } - for T in sctypes['uint']: - dt = dtype(T) - sz = dt.itemsize - bits = sz*8 - end = 2**bits - d_dict[T] = { - 'kind': dt.kind, - 'size': sz, - 'min': 0, - 'max': end - } - return d_dict + for e in v: + self.ints_sized_sctypes.append(e) + if self.ints_sized_sctypes: + self.ints_sized_sctypes.sort(lambda x, y: cmp(y[1], x[1])) + # Capable types list + self._capable_sctypes = {} + for k in self._sctype_attributes: + self._capable_sctypes[k] = self.get_capable_sctype(k) def default_sctype_tols(self): ''' Default allclose tolerance values for all dtypes ''' @@ -121,11 +119,6 @@ 'atol': F.tiny} return t_dict - def tols_from_sctype(self, sctype): - ''' Return rtol and atol for sctype ''' - tols = self.sctype_tols[sctype] - return tols['rtol'], tols['atol'] - def sctypes_by_size(self, sctype): ''' Returns storage size ordered list of entries of scalar type sctype @@ -144,35 +137,19 @@ D.append([t, dt.itemsize]) D.sort(lambda x, y: cmp(y[1], x[1])) return D - - def _nearest_sctype(self, sct): - ''' Return scalar type closest in size to that of sct - Input - sct - sctype + def capable_sctype(self, sct): + ''' Return smallest type containing sct type without precision loss - ID = input sctype. AT = acceptable sctype. Return ID if ID is - in ATs. If ID is smaller / larger than all ATs, return - smallest / largest AT. Otherwise return nearest AT larger than - ID. + Value pulled fron dictionary cached from init - see + get_capable_sctype method for algorithm ''' - dt = dtype(sct) - if sct in self.sctype_list: - return sct - sctypes = self.sized_sctypes[dt.kind] - if not sctypes: + try: + return self._capable_sctypes[sct] + except KeyError: return None - dti = sct.itemsize - for i, t in enumerate(sctypes): - if t[1] < dti: - break - else: - return t[0] - if i: - i-=1 - return sctypes[i][0] - def _capable_sctype(self, sct): + def get_capable_sctype(self, sct): ''' Return smallest scalar type containing sct type without precision loss Input @@ -194,33 +171,30 @@ out_t = None # Unsigned and signed integers # Precision loss defined by max min outside datatype range - dt = dtype(sct) - if dt.kind in ('u', 'i'): - sctypes = self.all_int_sized_sctypes - if not sctypes: - return None - D = self.sctype_dict - mx = D[sct]['max'] - mn = D[sct]['min'] - for i, t in enumerate(sctypes): - this_sct = t[0] - this_d = D[dt] - if this_d['max'] >= mx and this_d['min'] <= mn: - out_t = this_sct + D = self._sctype_attributes[sct] + if D['kind'] in ('u', 'i'): + out_t = self.smallest_int_sctype(D['max'], D['min']) else: # Complex and float types # Precision loss defined by data size < sct - sctypes = self.sized_sctypes[sct] + sctypes = self.sized_sctypes[D['kind']] if not sctypes: return None - dti = dtype(sct).itemsize + dti = D['size'] out_t = None for i, t in enumerate(sctypes): if t[1] >= dti: out_t = t[0] + else: + break return out_t - def smaller_same_kind(self, arr): + def tols_from_sctype(self, sctype): + ''' Return rtol and atol for sctype ''' + tols = self.sctype_tols[sctype] + return tols['rtol'], tols['atol'] + + def smallest_same_kind(self, arr): ''' Return arr maybe downcast to same kind, smaller storage If arr cannot be downcast within given tolerances, then: @@ -228,10 +202,10 @@ return None ''' dtp = arr.dtype - dti = dt.itemsize + dti = dtp.itemsize sctypes = self.sized_sctypes[dtp.kind] sctypes = [t[0] for i, t in enumerate(sctypes) if t[1] < dti] - return self.smaller_from_sctypes(arr, sctypes) + return self.smallest_from_sctypes(arr, sctypes) def smallest_from_sctypes(self, arr, sctypes): ''' Returns array recast to smallest possible type from list @@ -249,14 +223,14 @@ test_arr = arr.astype(T) if allclose(test_arr, arr, rtol, atol): ret_arr = test_arr + can_downcast = True else: break - else: # No downcasting withing tolerance - if dt not in self.sctype_list: - return None + if ret_arr.dtype.type not in self.sctype_list: + return None return ret_arr - def smallest_int_dtype(self, mx, mn): + def smallest_int_sctype(self, mx, mn): ''' Return integer type with smallest storage containing mx and mn Inputs @@ -265,15 +239,14 @@ Returns None if no integer can contain this range ''' - dt = None - for T in self.int_sctypes: - t_dict = self.sctype_dict[T] + sct = None + for T, tsz in self.ints_sized_sctypes: + t_dict = self._sctype_attributes[T] if t_dict['max'] >= mx and t_dict['min'] <= mn: - c_sz = t_dict['size'] - if dt is None or c_sz < sz: - dt = T - sz = c_sz - return dt + if sct is None or tsz < sz: + sct = T + sz = tsz + return sct def recast(self, arr): ''' Try arr downcast, upcast if necesary to get compatible type ''' @@ -317,7 +290,7 @@ if allclose(arr, test_arr, rtol, atol): return self.downcast_float(test_arr) # try downcasting to another complex type - return self.smaller_same_kind(arr) + return self.smallest_same_kind(arr) def downcast_float(self, arr): # Try integer @@ -326,7 +299,7 @@ if allclose(arr, test_arr, rtol, atol): return test_arr # Otherwise descend the float types - return self.smaller_same_kind(arr) + return self.smallest_same_kind(arr) def downcast_integer(self, arr): ''' Downcasts arr to integer @@ -336,7 +309,7 @@ ''' mx = amax(arr) mn = amin(arr) - idt = self.smallest_int_dtype(mx, mn) + idt = self.smallest_int_sctype(mx, mn) if idt: return arr.astype(idt) return None Added: trunk/Lib/io/tests/test_recaster.py =================================================================== --- trunk/Lib/io/tests/test_recaster.py 2006-11-30 06:16:27 UTC (rev 2340) +++ trunk/Lib/io/tests/test_recaster.py 2006-11-30 13:40:34 UTC (rev 2341) @@ -0,0 +1,45 @@ +from numpy.testing import * +import numpy as N + +set_package_path() +from io.recaster import sctype_attributes, Recaster +restore_path() + +try: # Python 2.3 support + from sets import Set as set +except: + pass + +class test_recaster(ScipyTestCase): + def setUp(self): + self.recaster = Recaster([N.int32, N.complex64, N.float32]) + + def test_init(self): + # Setting sctype_list + R = Recaster() + assert set(R.sctype_list) == set(sctype_attributes().keys()), \ + 'Default recaster should include all system types' + T = N.float32 + R = Recaster([T]) + assert R.sctype_list == [T], 'Scalar type list not correctly set' + # Setting tolerances + tols = self.recaster.default_sctype_tols() + assert tols == self.recaster.sctype_tols, 'Unexpected tols dictionary' + F = N.finfo(T) + R = Recaster(sctype_tols={T: {'rtol': F.eps*2, 'atol': F.tiny*2, 'silly': 'silly text'}}) + assert tols != R.sctype_tols, 'Tols dictionary not set correctly' + r, a = R.tols_from_sctype(T) + assert r == F.eps*2, 'Rtol not correctly set' + assert a == F.tiny*2, 'Atol not correctly set' + # Sctype size lists + # Integer sizes + # Cabable types + + def test_methods(self): + A = N.array(1, N.float64) + B = A.astype(N.float32) + # smallest from sctypes + C = self.recaster.smallest_from_sctypes(A, [N.float32]) + # smaller same kind + C = self.recaster.smallest_same_kind(A) + assert C.dtype == N.dtype(N.float32), 'Dtype was not downcast' From scipy-svn at scipy.org Thu Nov 30 13:49:50 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 30 Nov 2006 12:49:50 -0600 (CST) Subject: [Scipy-svn] r2342 - in trunk/Lib/io: . tests Message-ID: <20061130184950.B3EE539C018@new.scipy.org> Author: matthew.brett at gmail.com Date: 2006-11-30 12:49:45 -0600 (Thu, 30 Nov 2006) New Revision: 2342 Modified: trunk/Lib/io/__init__.py trunk/Lib/io/recaster.py trunk/Lib/io/tests/test_recaster.py Log: Further debugging, tests on recaster Modified: trunk/Lib/io/__init__.py =================================================================== --- trunk/Lib/io/__init__.py 2006-11-30 13:40:34 UTC (rev 2341) +++ trunk/Lib/io/__init__.py 2006-11-30 18:49:45 UTC (rev 2342) @@ -8,7 +8,7 @@ from numpyio import packbits, unpackbits, bswap, fread, fwrite, \ convert_objectarray from mio import * -from recaster import Recaster +from recaster import sctype_attributes, Recaster from array_import import * from data_store import * from pickler import * Modified: trunk/Lib/io/recaster.py =================================================================== --- trunk/Lib/io/recaster.py 2006-11-30 13:40:34 UTC (rev 2341) +++ trunk/Lib/io/recaster.py 2006-11-30 18:49:45 UTC (rev 2342) @@ -205,15 +205,20 @@ dti = dtp.itemsize sctypes = self.sized_sctypes[dtp.kind] sctypes = [t[0] for i, t in enumerate(sctypes) if t[1] < dti] - return self.smallest_from_sctypes(arr, sctypes) + return self._smallest_from_sctypes(arr, sctypes) - def smallest_from_sctypes(self, arr, sctypes): + def _smallest_from_sctypes(self, arr, sctypes): ''' Returns array recast to smallest possible type from list - + Inputs arr - array to recast sctypes - list of scalar types to try - + + sctypes is expected to be ordered by size with largest first, + and to all be of the same type. It would not usually be + sensible to use this routine for integers (see + smallest_int_sctype method) + Returns None if no recast is within tolerance ''' dt = arr.dtype.type @@ -248,26 +253,6 @@ sz = tsz return sct - def recast(self, arr): - ''' Try arr downcast, upcast if necesary to get compatible type ''' - dt = arr.dtype.type - ret_arr = self.downcast(arr) - if ret_arr is not None: - return ret_arr - # Could not downcast, arr dtype not in known list - # Try upcast to larger dtype of same kind - udt = self.capable_dtype[dt] - if udt is not None: - return arr.astype(udt) - # We are stuck for floats and complex now - # Can try casting integers to floats - if arr.dt.kind in ('i', 'u'): - sctypes = self.sized_sctypes['f'] - arr = self.smallest_from_sctypes(arr, sctypes) - if arr is not None: - return arr - raise ValueError, 'Could not recast array within precision' - def downcast(self, arr): dtk = arr.dtype.kind if dtk == 'c': @@ -313,3 +298,24 @@ if idt: return arr.astype(idt) return None + + def recast(self, arr): + ''' Try arr downcast, upcast if necesary to get compatible type ''' + dt = arr.dtype.type + ret_arr = self.downcast(arr) + if ret_arr is not None: + return ret_arr + # Could not downcast, arr dtype not in known list + # Try upcast to larger dtype of same kind + udt = self.capable_dtype[dt] + if udt is not None: + return arr.astype(udt) + # We are stuck for floats and complex now + # Can try casting integers to floats + if arr.dt.kind in ('i', 'u'): + sctypes = self.sized_sctypes['f'] + arr = self._smallest_from_sctypes(arr, sctypes) + if arr is not None: + return arr + raise ValueError, 'Could not recast array within precision' + Modified: trunk/Lib/io/tests/test_recaster.py =================================================================== --- trunk/Lib/io/tests/test_recaster.py 2006-11-30 13:40:34 UTC (rev 2341) +++ trunk/Lib/io/tests/test_recaster.py 2006-11-30 18:49:45 UTC (rev 2342) @@ -12,7 +12,8 @@ class test_recaster(ScipyTestCase): def setUp(self): - self.recaster = Recaster([N.int32, N.complex64, N.float32]) + self.valid_types = [N.int32, N.complex128, N.float64] + self.recaster = Recaster(self.valid_types) def test_init(self): # Setting sctype_list @@ -35,11 +36,64 @@ # Integer sizes # Cabable types - def test_methods(self): - A = N.array(1, N.float64) - B = A.astype(N.float32) - # smallest from sctypes - C = self.recaster.smallest_from_sctypes(A, [N.float32]) - # smaller same kind - C = self.recaster.smallest_same_kind(A) - assert C.dtype == N.dtype(N.float32), 'Dtype was not downcast' + def test_smallest_same_kind(self): + R = self.recaster + value = 1 + # smallest same kind + # Define expected type output from same kind downcast of value + required_types = {'complex': N.complex128, + 'float': N.float64, + 'int': N.int32, + 'uint': None} + for kind, req_type in required_types.items(): + if req_type is not None: + rdtsz = N.dtype(req_type).itemsize + for T in N.sctypes[kind]: + tdtsz = N.dtype(T).itemsize + ok_T = T in R.sctype_list + expect_none = ((req_type is None) or + ((tdtsz < rdtsz) and not ok_T)) + A = N.array(value, T) + C = R.smallest_same_kind(A) + if expect_none: + assert C is None, 'Expecting None for %s' % T + else: + assert C.dtype.type == req_type, \ + 'Expected %s type, got %s type' % \ + (C.dtype.type, req_type) + + def test_smallest_int_sctype(self): + # Smallest int sctype with testing recaster + params = sctype_attributes() + mmax = params[N.int32]['max'] + mmin = params[N.int32]['min'] + for kind in ('int', 'uint'): + for T in N.sctypes[kind]: + mx = params[T]['max'] + mn = params[T]['min'] + rt = self.recaster.smallest_int_sctype(mx, mn) + if mx <= mmax and mn >= mmin: + assert rt == N.int32, 'Expected int32 type' + else: + assert rt is None, 'Expected None, got %s for %s' % (T, rt) + + # Smallest int sctype with full recaster + RF = Recaster() + test_triples = [(N.uint8, 0, 255), + (N.int8, -128, 0), + (N.uint16, 0, params[N.uint16]['max']), + (N.int16, params[N.int16]['min'], 0), + (N.uint32, 0, params[N.uint32]['max']), + (N.int32, params[N.int32]['min'], 0), + (N.uint64, 0, params[N.uint64]['max']), + (N.int64, params[N.int64]['min'], 0)] + for T, mn, mx in test_triples: + rt = RF.smallest_int_sctype(mx, mn) + assert rt == T, 'Expected %s, got %s type' % (T, rt) + + def test_downcasts(self): + value = 1 + R = self.recaster + A = N.array(value, N.complex128) + B = R.downcast_complex(A) + assert B.dtype.type == N.int32 From scipy-svn at scipy.org Thu Nov 30 14:14:36 2006 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 30 Nov 2006 13:14:36 -0600 (CST) Subject: [Scipy-svn] r2343 - in trunk/Lib/io: . tests Message-ID: <20061130191436.65FC239C21F@new.scipy.org> Author: matthew.brett at gmail.com Date: 2006-11-30 13:14:12 -0600 (Thu, 30 Nov 2006) New Revision: 2343 Modified: trunk/Lib/io/recaster.py trunk/Lib/io/tests/test_recaster.py Log: And more tests on recaster Modified: trunk/Lib/io/recaster.py =================================================================== --- trunk/Lib/io/recaster.py 2006-11-30 18:49:45 UTC (rev 2342) +++ trunk/Lib/io/recaster.py 2006-11-30 19:14:12 UTC (rev 2343) @@ -265,27 +265,32 @@ raise TypeError, 'Do not recognize array kind %s' % dtk def downcast_complex(self, arr): + ''' Downcasts complex array to smaller type if possible ''' # can we downcast to float? dt = arr.dtype dti = ceil(dt.itemsize / 2) sctypes = self.sized_sctypes['f'] flts = [t[0] for i, t in enumerate(sctypes) if t[1] <= dti] - test_arr = arr.astype(flts[0]) - rtol, atol = self.tols_from_sctype(dt.type) - if allclose(arr, test_arr, rtol, atol): - return self.downcast_float(test_arr) - # try downcasting to another complex type - return self.smallest_same_kind(arr) + if flts: # There are smaller floats to try + test_arr = arr.astype(flts[0]) + rtol, atol = self.tols_from_sctype(dt.type) + if allclose(arr, test_arr, rtol, atol): + arr = test_arr + # try downcasting to int or another complex type + return self.downcast_to_int_or_same(arr) - def downcast_float(self, arr): + def downcast_to_int_or_same(self, arr): + ''' Downcast to integer or smaller of same kind ''' # Try integer test_arr = self.downcast_integer(arr) rtol, atol = self.tols_from_sctype(arr.dtype.type) if allclose(arr, test_arr, rtol, atol): return test_arr - # Otherwise descend the float types + # Otherwise descend the types of same kind return self.smallest_same_kind(arr) + downcast_float = downcast_to_int_or_same + def downcast_integer(self, arr): ''' Downcasts arr to integer Modified: trunk/Lib/io/tests/test_recaster.py =================================================================== --- trunk/Lib/io/tests/test_recaster.py 2006-11-30 18:49:45 UTC (rev 2342) +++ trunk/Lib/io/tests/test_recaster.py 2006-11-30 19:14:12 UTC (rev 2343) @@ -92,8 +92,11 @@ assert rt == T, 'Expected %s, got %s type' % (T, rt) def test_downcasts(self): - value = 1 + value = 100 R = self.recaster - A = N.array(value, N.complex128) - B = R.downcast_complex(A) - assert B.dtype.type == N.int32 + for T in (N.complex128, N.complex64, + N.float64, N.uint64): + B = R.downcast(N.array(value, T)) + assert B is not None, 'Got None for %s' % T + assert B.dtype.type == N.int32 +