From scipy-svn at scipy.org Fri Apr 2 06:47:13 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 2 Apr 2010 05:47:13 -0500 (CDT) Subject: [Scipy-svn] r6296 - trunk/scipy/special/tests Message-ID: <20100402104713.C8E6639C4B4@scipy.org> Author: ptvirtan Date: 2010-04-02 05:47:13 -0500 (Fri, 02 Apr 2010) New Revision: 6296 Modified: trunk/scipy/special/tests/test_mpmath.py Log: BUG: special: mpmath 0.14 has moved libhyper around, so don't try to use it Fixes #1145 Modified: trunk/scipy/special/tests/test_mpmath.py =================================================================== --- trunk/scipy/special/tests/test_mpmath.py 2010-03-31 16:31:03 UTC (rev 6295) +++ trunk/scipy/special/tests/test_mpmath.py 2010-04-02 10:47:13 UTC (rev 6296) @@ -114,7 +114,7 @@ for z in [-10, -1.01, -0.99, 0, 0.6, 0.95, 1.5, 10]: try: v = float(mpmath.hyp2f1(a, b, c, z)) - except (TypeError, mpmath.libhyper.NoConvergence): + except: continue dataset.append((a, b, c, z, v)) dataset = np.array(dataset, dtype=np.float_) From scipy-svn at scipy.org Fri Apr 2 06:47:21 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 2 Apr 2010 05:47:21 -0500 (CDT) Subject: [Scipy-svn] r6297 - trunk/scipy/special/tests Message-ID: <20100402104721.3B55A39C4B4@scipy.org> Author: ptvirtan Date: 2010-04-02 05:47:21 -0500 (Fri, 02 Apr 2010) New Revision: 6297 Modified: trunk/scipy/special/tests/test_mpmath.py Log: BUG: special: fix some mpmath-based tests by passing 'exact' fractions to mpmath so that its hypergeometric evaluation routines work correctly Modified: trunk/scipy/special/tests/test_mpmath.py =================================================================== --- trunk/scipy/special/tests/test_mpmath.py 2010-04-02 10:47:13 UTC (rev 6296) +++ trunk/scipy/special/tests/test_mpmath.py 2010-04-02 10:47:21 UTC (rev 6297) @@ -95,12 +95,18 @@ # Taken from mpmath unit tests -- this point failed for mpmath 0.13 but # was fixed in their SVN since then pts = [ - (112, 51./10, -9./10, -0.99999), - ## Mpmath currently (0.13) fails also for these: - #(10,-900,10.5,0.99), - #(10,-900,-10.5,0.99), + (112, (51,10), (-9,10), -0.99999), + (10,-900,10.5,0.99), + (10,-900,-10.5,0.99), ] - dataset = [p + (float(mpmath.hyp2f1(*p)),) for p in pts] + + def fev(x): + if isinstance(x, tuple): + return float(x[0]) / x[1] + else: + return x + + dataset = [tuple(map(fev, p)) + (float(mpmath.hyp2f1(*p)),) for p in pts] dataset = np.array(dataset, dtype=np.float_) FuncData(sc.hyp2f1, dataset, (0,1,2,3), 4, rtol=1e-10).check() From scipy-svn at scipy.org Fri Apr 2 18:35:55 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 2 Apr 2010 17:35:55 -0500 (CDT) Subject: [Scipy-svn] r6298 - trunk/scipy/sparse/linalg/isolve/tests Message-ID: <20100402223555.D7BDB39CAED@scipy.org> Author: warren.weckesser Date: 2010-04-02 17:35:55 -0500 (Fri, 02 Apr 2010) New Revision: 6298 Modified: trunk/scipy/sparse/linalg/isolve/tests/test_lgmres.py Log: Remove unused imports. Modified: trunk/scipy/sparse/linalg/isolve/tests/test_lgmres.py =================================================================== --- trunk/scipy/sparse/linalg/isolve/tests/test_lgmres.py 2010-04-02 10:47:21 UTC (rev 6297) +++ trunk/scipy/sparse/linalg/isolve/tests/test_lgmres.py 2010-04-02 22:35:55 UTC (rev 6298) @@ -4,13 +4,13 @@ from numpy.testing import * -from numpy import zeros, ones, arange, array, abs, max, allclose +from numpy import zeros, array, allclose from scipy.linalg import norm -from scipy.sparse import spdiags, csr_matrix, triu, tril +from scipy.sparse import csr_matrix from scipy.sparse.linalg.interface import LinearOperator from scipy.sparse.linalg import splu -from scipy.sparse.linalg.isolve import lgmres, gmres +from scipy.sparse.linalg.isolve import lgmres Am = csr_matrix(array([[-2,1,0,0,0,9], [1,-2,1,0,5,0], From scipy-svn at scipy.org Sat Apr 3 12:38:09 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 3 Apr 2010 11:38:09 -0500 (CDT) Subject: [Scipy-svn] r6299 - trunk/doc/source Message-ID: <20100403163809.A2FA439CAF5@scipy.org> Author: warren.weckesser Date: 2010-04-03 11:38:09 -0500 (Sat, 03 Apr 2010) New Revision: 6299 Modified: trunk/doc/source/index.rst Log: DOC: The stsci package was removed. Modified: trunk/doc/source/index.rst =================================================================== --- trunk/doc/source/index.rst 2010-04-02 22:35:55 UTC (rev 6298) +++ trunk/doc/source/index.rst 2010-04-03 16:38:09 UTC (rev 6299) @@ -41,5 +41,4 @@ spatial special stats - stsci weave From scipy-svn at scipy.org Sat Apr 3 13:29:21 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 3 Apr 2010 12:29:21 -0500 (CDT) Subject: [Scipy-svn] r6300 - in trunk: doc/source scipy/linalg Message-ID: <20100403172921.02CC139CAFE@scipy.org> Author: warren.weckesser Date: 2010-04-03 12:29:20 -0500 (Sat, 03 Apr 2010) New Revision: 6300 Removed: trunk/doc/source/stsci.rst Modified: trunk/doc/source/linalg.rst trunk/scipy/linalg/info.py Log: DOC: Update linalg module docs; remove stsci.rst. Modified: trunk/doc/source/linalg.rst =================================================================== --- trunk/doc/source/linalg.rst 2010-04-03 16:38:09 UTC (rev 6299) +++ trunk/doc/source/linalg.rst 2010-04-03 17:29:20 UTC (rev 6300) @@ -19,12 +19,6 @@ lstsq pinv pinv2 - kron - hankel - toeplitz - tri - tril - triu Eigenvalues and Decompositions ============================== @@ -74,17 +68,18 @@ sqrtm funm -Iterative linear systems solutions -================================== +Special Matrices +================ -These functions are deprecated - use scipy.sparse.linalg instead - .. autosummary:: :toctree: generated/ - cg - cgs - qmr - gmres - bicg - bicgstab + block_diag + circulant + hadamard + hankel + kron + toeplitz + tri + tril + triu Deleted: trunk/doc/source/stsci.rst =================================================================== --- trunk/doc/source/stsci.rst 2010-04-03 16:38:09 UTC (rev 6299) +++ trunk/doc/source/stsci.rst 2010-04-03 17:29:20 UTC (rev 6300) @@ -1,40 +0,0 @@ -============================================================= -Image Array Manipulation and Convolution (:mod:`scipy.stsci`) -============================================================= - -.. module:: scipy.stsci - -Image Array manipulation Functions (:mod:`scipy.stsci.image`) -============================================================= - -.. module:: scipy.stsci.image - -.. autosummary:: - :toctree: generated/ - - average - combine - median - minimum - threshhold - translate - - -Image Array Convolution Functions (:mod:`scipy.stsci.convolve`) -=============================================================== - -.. module:: scipy.stsci.convolve - -.. autosummary:: - :toctree: generated/ - - boxcar - convolution_modes - convolve - convolve2d - correlate - correlate2d - cross_correlate - dft - iraf_frame - pix_modes Modified: trunk/scipy/linalg/info.py =================================================================== --- trunk/scipy/linalg/info.py 2010-04-03 16:38:09 UTC (rev 6299) +++ trunk/scipy/linalg/info.py 2010-04-03 17:29:20 UTC (rev 6300) @@ -36,7 +36,7 @@ rsf2csf --- Real to complex schur form hessenberg --- Hessenberg form of a matrix -matrix Functions:: +Matrix Functions:: expm --- matrix exponential using Pade approx. expm2 --- matrix exponential using Eigenvalue decomp. @@ -52,15 +52,17 @@ sqrtm --- matrix square root funm --- Evaluating an arbitrary matrix function. -Iterative linear systems solutions:: +Special Matrices:: - cg --- Conjugate gradient (symmetric systems only) - cgs --- Conjugate gradient squared - qmr --- Quasi-minimal residual - gmres --- Generalized minimal residual - bicg --- Bi-conjugate gradient - bicgstab --- Bi-conjugate gradient stabilized - + block_diag --- Construct a block diagonal matrix from submatrices. + circulant --- Circulant matrix + hadamard --- Hadamard matrix of order 2^n + hankel --- Hankel matrix + kron --- Kronecker product of two arrays. + toeplitz --- Toeplitz matrix + tri --- Construct a matrix filled with ones at and below a given diagonal. + tril --- Construct a lower-triangular matrix from a given matrix. + triu --- Construct an upper-triangular matrix from a given matrix. """ postpone_import = 1 From scipy-svn at scipy.org Sat Apr 3 14:37:21 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 3 Apr 2010 13:37:21 -0500 (CDT) Subject: [Scipy-svn] r6301 - in trunk: doc/source scipy/signal Message-ID: <20100403183721.110E039CB07@scipy.org> Author: warren.weckesser Date: 2010-04-03 13:37:20 -0500 (Sat, 03 Apr 2010) New Revision: 6301 Modified: trunk/doc/source/signal.rst trunk/scipy/signal/info.py Log: DOC: Update signal module docs. Modified: trunk/doc/source/signal.rst =================================================================== --- trunk/doc/source/signal.rst 2010-04-03 17:29:20 UTC (rev 6300) +++ trunk/doc/source/signal.rst 2010-04-03 18:37:20 UTC (rev 6301) @@ -97,7 +97,9 @@ lti lsim + lsim2 impulse + impulse2 step LTI Reresentations @@ -119,10 +121,11 @@ .. autosummary:: :toctree: generated/ + chirp + gausspulse sawtooth square - gausspulse - chirp + sweep_poly Window functions ================ Modified: trunk/scipy/signal/info.py =================================================================== --- trunk/scipy/signal/info.py 2010-04-03 17:29:20 UTC (rev 6300) +++ trunk/scipy/signal/info.py 2010-04-03 18:37:20 UTC (rev 6301) @@ -64,10 +64,12 @@ Linear Systems: - lti -- linear time invariant system object. - lsim -- continuous-time simulation of output to linear system. - impulse -- impulse response of linear, time-invariant (LTI) system. - step -- step response of continous-time LTI system. + lti -- linear time invariant system object. + lsim -- continuous-time simulation of output to linear system. + lsim2 -- like lsim, but `scipy.integrate.odeint` is used. + impulse -- impulse response of linear, time-invariant (LTI) system. + impulse2 -- like impulse2, but `scipy.integrate.odeint` is used. + step -- step response of continous-time LTI system. LTI Reresentations: @@ -80,10 +82,11 @@ Waveforms: + chirp -- Frequency swept cosine signal, with several frequency functions. + gausspulse -- Gaussian modulated sinusoid sawtooth -- Periodic sawtooth square -- Square wave - gausspulse -- Gaussian modulated sinusoid - chirp -- Frequency swept cosine signal + sweep_poly -- Frequency swept cosine signal; frequency is arbitrary polynomial. Window functions: From scipy-svn at scipy.org Sat Apr 3 15:38:44 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 3 Apr 2010 14:38:44 -0500 (CDT) Subject: [Scipy-svn] r6302 - in trunk: doc/source scipy/signal Message-ID: <20100403193844.691A139CB04@scipy.org> Author: warren.weckesser Date: 2010-04-03 14:38:44 -0500 (Sat, 03 Apr 2010) New Revision: 6302 Modified: trunk/doc/source/signal.rst trunk/scipy/signal/info.py Log: DOC: Add references to a couple existing functions to module docstring of scipy.signal. Modified: trunk/doc/source/signal.rst =================================================================== --- trunk/doc/source/signal.rst 2010-04-03 18:37:20 UTC (rev 6301) +++ trunk/doc/source/signal.rst 2010-04-03 19:38:44 UTC (rev 6302) @@ -61,12 +61,14 @@ .. autosummary:: :toctree: generated/ - remez + bilinear firwin + freqs + freqz iirdesign iirfilter - freqs - freqz + kaiserord + remez unique_roots residue Modified: trunk/scipy/signal/info.py =================================================================== --- trunk/scipy/signal/info.py 2010-04-03 18:37:20 UTC (rev 6301) +++ trunk/scipy/signal/info.py 2010-04-03 19:38:44 UTC (rev 6302) @@ -42,12 +42,14 @@ Filter design: - remez -- Optimal FIR filter design. + bilinear -- Return a digital filter from an analog filter using the bilinear transform. firwin -- Windowed FIR filter design. + freqs -- Analog filter frequency response. + freqz -- Digital filter frequency response. iirdesign -- IIR filter design given bands and gains. iirfilter -- IIR filter design given order and critical frequencies. - freqs -- Analog filter frequency response. - freqz -- Digital filter frequency response. + kaiserord -- Design a Kaiser window to limit ripple and width of transition region. + remez -- Optimal FIR filter design. unique_roots -- Unique roots and their multiplicities. residue -- Partial fraction expansion of b(s) / a(s). From scipy-svn at scipy.org Sat Apr 3 17:00:13 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 3 Apr 2010 16:00:13 -0500 (CDT) Subject: [Scipy-svn] r6303 - trunk/scipy/signal/tests Message-ID: <20100403210013.6334239CB04@scipy.org> Author: warren.weckesser Date: 2010-04-03 16:00:13 -0500 (Sat, 03 Apr 2010) New Revision: 6303 Modified: trunk/scipy/signal/tests/test_filter_design.py Log: TEST: Add a test for scipy.signal.firwin. Modified: trunk/scipy/signal/tests/test_filter_design.py =================================================================== --- trunk/scipy/signal/tests/test_filter_design.py 2010-04-03 19:38:44 UTC (rev 6302) +++ trunk/scipy/signal/tests/test_filter_design.py 2010-04-03 21:00:13 UTC (rev 6303) @@ -3,8 +3,9 @@ import numpy as np from numpy.testing import TestCase, assert_array_almost_equal -from scipy.signal import tf2zpk, bessel, BadCoefficients +from scipy.signal import tf2zpk, bessel, BadCoefficients, kaiserord, firwin, freqz + class TestTf2zpk(TestCase): def test_simple(self): z_r = np.array([0.5, -0.5]) @@ -36,3 +37,14 @@ finally: warnings.simplefilter("always", BadCoefficients) + +class TestFirWin(TestCase): + + def test_lowpass(self): + width = 0.04 + ntaps, beta = kaiserord(120, width) + taps = firwin(ntaps, cutoff=0.5, window=('kaiser', beta)) + freq_samples = np.array([0.0, 0.25, 0.5-width/2, 0.5+width/2, 0.75, 1.0]) + freqs, response = freqz(taps, worN=np.pi*freq_samples) + assert_array_almost_equal(np.abs(response), + [1.0, 1.0, 1.0, 0.0, 0.0, 0.0], decimal=5) From scipy-svn at scipy.org Mon Apr 5 01:14:06 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 5 Apr 2010 00:14:06 -0500 (CDT) Subject: [Scipy-svn] r6304 - in trunk/scipy/linalg: . tests Message-ID: <20100405051406.047B739CAE6@scipy.org> Author: warren.weckesser Date: 2010-04-05 00:14:05 -0500 (Mon, 05 Apr 2010) New Revision: 6304 Modified: trunk/scipy/linalg/basic.py trunk/scipy/linalg/generic_flapack.pyf trunk/scipy/linalg/tests/test_basic.py Log: BUG: linalg (ticket #676): Fixed reversed declaration of the dimensions of the 'b' argument to the pbsv functions in generic_flapack.py. Also added shape validation for solveh_banded and solve_banded, and corrected the solve_banded docstring. Modified: trunk/scipy/linalg/basic.py =================================================================== --- trunk/scipy/linalg/basic.py 2010-04-03 21:00:13 UTC (rev 6303) +++ trunk/scipy/linalg/basic.py 2010-04-05 05:14:05 UTC (rev 6304) @@ -194,6 +194,14 @@ """ a1, b1 = map(asarray_chkfinite,(ab,b)) + + # Validate shapes. + if a1.shape[-1] != b1.shape[0]: + raise ValueError("shapes of ab and b are not compatible.") + if l+u+1 != a1.shape[0]: + raise ValueError("invalid values for the number of lower and upper diagonals:" + " l+u+1 (%d) does not equal ab.shape[0] (%d)" % (l+u+1, ab.shape[0])) + overwrite_b = overwrite_b or (b1 is not b and not hasattr(b,'__array__')) gbsv, = get_lapack_funcs(('gbsv',),(a1,b1)) @@ -235,7 +243,7 @@ Parameters ---------- - ab : array, shape (M, u + 1) + ab : array, shape (u + 1, M) Banded matrix b : array, shape (M,) or (M, K) Right-hand side @@ -256,6 +264,10 @@ """ ab, b = map(asarray_chkfinite,(ab,b)) + # Validate shapes. + if ab.shape[-1] != b.shape[0]: + raise ValueError("shapes of ab and b are not compatible.") + pbsv, = get_lapack_funcs(('pbsv',),(ab,b)) c,x,info = pbsv(ab,b, lower=lower, Modified: trunk/scipy/linalg/generic_flapack.pyf =================================================================== --- trunk/scipy/linalg/generic_flapack.pyf 2010-04-03 21:00:13 UTC (rev 6303) +++ trunk/scipy/linalg/generic_flapack.pyf 2010-04-05 05:14:05 UTC (rev 6304) @@ -78,11 +78,11 @@ integer optional,check(shape(ab,0)==ldab),depend(ab) :: ldab=shape(ab,0) integer intent(hide),depend(ab) :: n=shape(ab,1) integer intent(hide),depend(ab) :: kd=shape(ab,0)-1 - integer intent(hide),depend(b) :: ldb=shape(b,1) - integer intent(hide),depend(b) :: nrhs=shape(b,0) + integer intent(hide),depend(b) :: ldb=shape(b,0) + integer intent(hide),depend(b) :: nrhs=shape(b,1) integer optional,intent(in),check(lower==0||lower==1) :: lower = 0 - dimension(nrhs,ldb),intent(in,out,copy,out=x) :: b + dimension(ldb, nrhs),intent(in,out,copy,out=x) :: b dimension(ldab,n),intent(in,out,copy,out=c) :: ab integer intent(out) :: info @@ -110,11 +110,11 @@ integer optional,check(shape(ab,0)==ldab),depend(ab) :: ldab=shape(ab,0) integer intent(hide),depend(ab) :: n=shape(ab,1) integer intent(hide),depend(ab) :: kd=shape(ab,0)-1 - integer intent(hide),depend(b) :: ldb=shape(b,1) - integer intent(hide),depend(b) :: nrhs=shape(b,0) + integer intent(hide),depend(b) :: ldb=shape(b,0) + integer intent(hide),depend(b) :: nrhs=shape(b,1) integer optional,intent(in),check(lower==0||lower==1) :: lower = 0 - dimension(nrhs,ldb),intent(in,out,copy,out=x) :: b + dimension(ldb, nrhs),intent(in,out,copy,out=x) :: b dimension(ldab,n),intent(in,out,copy,out=c) :: ab integer intent(out) :: info Modified: trunk/scipy/linalg/tests/test_basic.py =================================================================== --- trunk/scipy/linalg/tests/test_basic.py 2010-04-03 21:00:13 UTC (rev 6303) +++ trunk/scipy/linalg/tests/test_basic.py 2010-04-05 05:14:05 UTC (rev 6304) @@ -19,12 +19,14 @@ python tests/test_basic.py """ -from numpy import arange, array, dot, zeros, identity, conjugate, transpose +from numpy import arange, array, dot, zeros, identity, conjugate, transpose, \ + float32 import numpy.linalg as linalg from numpy.testing import * -from scipy.linalg import solve, inv, det, lstsq, pinv, pinv2, solve_banded, norm +from scipy.linalg import solve, inv, det, lstsq, pinv, pinv2, norm,\ + solve_banded, solveh_banded def random(size): @@ -33,19 +35,204 @@ class TestSolveBanded(TestCase): - def test_simple(self): + def test_real(self): + a = array([[ 1.0, 20, 0, 0], + [ -30, 4, 6, 0], + [ 2, 1, 20, 2], + [ 0, -1, 7, 14]]) + ab = array([[ 0.0, 20, 6, 2], + [ 1, 4, 20, 14], + [ -30, 1, 7, 0], + [ 2, -1, 0, 0]]) + l,u = 2,1 + b4 = array([10.0, 0.0, 2.0, 14.0]) + b4by1 = b4.reshape(-1,1) + b4by2 = array([[ 2, 1], + [-30, 4], + [ 2, 3], + [ 1, 3]]) + b4by4 = array([[1, 0, 0, 0], + [0, 0, 0, 1], + [0, 1, 0, 0], + [0, 1, 0, 0]]) + for b in [b4, b4by1, b4by2, b4by4]: + x = solve_banded((l, u), ab, b) + assert_array_almost_equal(dot(a, x), b) - a = [[1,20,0,0],[-30,4,6,0],[2,1,20,2],[0,-1,7,14]] - ab = [[0,20,6,2], - [1,4,20,14], - [-30,1,7,0], - [2,-1,0,0]] + def test_complex(self): + a = array([[ 1.0, 20, 0, 0], + [ -30, 4, 6, 0], + [ 2j, 1, 20, 2j], + [ 0, -1, 7, 14]]) + ab = array([[ 0.0, 20, 6, 2j], + [ 1, 4, 20, 14], + [ -30, 1, 7, 0], + [ 2j, -1, 0, 0]]) l,u = 2,1 - for b in ([[1,0,0,0],[0,0,0,1],[0,1,0,0],[0,1,0,0]], - [[2,1],[-30,4],[2,3],[1,3]]): - x = solve_banded((l,u),ab,b) - assert_array_almost_equal(dot(a,x),b) + b4 = array([10.0, 0.0, 2.0, 14.0j]) + b4by1 = b4.reshape(-1,1) + b4by2 = array([[ 2, 1], + [-30, 4], + [ 2, 3], + [ 1, 3]]) + b4by4 = array([[1, 0, 0, 0], + [0, 0, 0,1j], + [0, 1, 0, 0], + [0, 1, 0, 0]]) + for b in [b4, b4by1, b4by2, b4by4]: + x = solve_banded((l, u), ab, b) + assert_array_almost_equal(dot(a, x), b) + def test_bad_shape(self): + ab = array([[ 0.0, 20, 6, 2], + [ 1, 4, 20, 14], + [ -30, 1, 7, 0], + [ 2, -1, 0, 0]]) + l,u = 2,1 + bad = array([1.0, 2.0, 3.0, 4.0]).reshape(-1,4) + assert_raises(ValueError, solve_banded, (l, u), ab, bad) + assert_raises(ValueError, solve_banded, (l, u), ab, [1.0, 2.0]) + + # Values of (l,u) are not compatible with ab. + assert_raises(ValueError, solve_banded, (1, 1), ab, [1.0, 2.0]) + + +class TestSolveHBanded(TestCase): + + def test_01_upper(self): + # Solve + # [ 4 1 0] [1] + # [ 1 4 1] X = [4] + # [ 0 1 4] [1] + # with the RHS as a 1D array. + ab = array([[-99, 1.0, 1.0], [4.0, 4.0, 4.0]]) + b = array([1.0, 4.0, 1.0]) + c, x = solveh_banded(ab, b) + assert_array_almost_equal(x, [0.0, 1.0, 0.0]) + + def test_02_upper(self): + # Solve + # [ 4 1 0] [1 4] + # [ 1 4 1] X = [4 2] + # [ 0 1 4] [1 4] + # + ab = array([[-99, 1.0, 1.0], + [4.0, 4.0, 4.0]]) + b = array([[1.0, 4.0], + [4.0, 2.0], + [1.0, 4.0]]) + c, x = solveh_banded(ab, b) + expected = array([[0.0, 1.0], + [1.0, 0.0], + [0.0, 1.0]]) + assert_array_almost_equal(x, expected) + + def test_03_upper(self): + # Solve + # [ 4 1 0] [1] + # [ 1 4 1] X = [4] + # [ 0 1 4] [1] + # with the RHS as a 2D array with shape (3,1). + ab = array([[-99, 1.0, 1.0], [4.0, 4.0, 4.0]]) + b = array([1.0, 4.0, 1.0]).reshape(-1,1) + c, x = solveh_banded(ab, b) + assert_array_almost_equal(x, array([0.0, 1.0, 0.0]).reshape(-1,1)) + + def test_01_lower(self): + # Solve + # [ 4 1 0] [1] + # [ 1 4 1] X = [4] + # [ 0 1 4] [1] + # + ab = array([[4.0, 4.0, 4.0], + [1.0, 1.0, -99]]) + b = array([1.0, 4.0, 1.0]) + c, x = solveh_banded(ab, b, lower=True) + assert_array_almost_equal(x, [0.0, 1.0, 0.0]) + + def test_02_lower(self): + # Solve + # [ 4 1 0] [1 4] + # [ 1 4 1] X = [4 2] + # [ 0 1 4] [1 4] + # + ab = array([[4.0, 4.0, 4.0], + [1.0, 1.0, -99]]) + b = array([[1.0, 4.0], + [4.0, 2.0], + [1.0, 4.0]]) + c, x = solveh_banded(ab, b, lower=True) + expected = array([[0.0, 1.0], + [1.0, 0.0], + [0.0, 1.0]]) + assert_array_almost_equal(x, expected) + + def test_01_float32(self): + # Solve + # [ 4 1 0] [1] + # [ 1 4 1] X = [4] + # [ 0 1 4] [1] + # + ab = array([[-99, 1.0, 1.0], [4.0, 4.0, 4.0]], dtype=float32) + b = array([1.0, 4.0, 1.0], dtype=float32) + c, x = solveh_banded(ab, b) + assert_array_almost_equal(x, [0.0, 1.0, 0.0]) + + def test_02_float32(self): + # Solve + # [ 4 1 0] [1 4] + # [ 1 4 1] X = [4 2] + # [ 0 1 4] [1 4] + # + ab = array([[-99, 1.0, 1.0], + [4.0, 4.0, 4.0]], dtype=float32) + b = array([[1.0, 4.0], + [4.0, 2.0], + [1.0, 4.0]], dtype=float32) + c, x = solveh_banded(ab, b) + expected = array([[0.0, 1.0], + [1.0, 0.0], + [0.0, 1.0]]) + assert_array_almost_equal(x, expected) + + def test_01_complex(self): + # Solve + # [ 4 -j 0] [ -j] + # [ j 4 -j] X = [4-j] + # [ 0 j 4] [4+j] + # + ab = array([[-99, -1.0j, -1.0j], [4.0, 4.0, 4.0]]) + b = array([-1.0j, 4.0-1j, 4+1j]) + c, x = solveh_banded(ab, b) + assert_array_almost_equal(x, [0.0, 1.0, 1.0]) + + def test_02_complex(self): + # Solve + # [ 4 -j 0] [ -j 4j] + # [ j 4 -j] X = [4-j -1-j] + # [ 0 j 4] [4+j 4 ] + # + ab = array([[-99, -1.0j, -1.0j], + [4.0, 4.0, 4.0]]) + b = array([[ -1j, 4.0j], + [4.0-1j, -1.0-1j], + [4.0+1j, 4.0]]) + c, x = solveh_banded(ab, b) + expected = array([[0.0, 1.0j], + [1.0, 0.0], + [1.0, 1.0]]) + assert_array_almost_equal(x, expected) + + def test_bad_shapes(self): + ab = array([[-99, 1.0, 1.0], + [4.0, 4.0, 4.0]]) + b = array([[1.0, 4.0], + [4.0, 2.0]]) + assert_raises(ValueError, solveh_banded, ab, b) + assert_raises(ValueError, solveh_banded, ab, [1.0, 2.0]) + assert_raises(ValueError, solveh_banded, ab, [1.0]) + + class TestSolve(TestCase): def test_20Feb04_bug(self): From scipy-svn at scipy.org Mon Apr 5 19:36:38 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 5 Apr 2010 18:36:38 -0500 (CDT) Subject: [Scipy-svn] r6305 - in trunk/scipy/linalg: . tests Message-ID: <20100405233638.2697E39CAEC@scipy.org> Author: warren.weckesser Date: 2010-04-05 18:36:38 -0500 (Mon, 05 Apr 2010) New Revision: 6305 Modified: trunk/scipy/linalg/basic.py trunk/scipy/linalg/tests/test_basic.py Log: BUG(DOC): linalg: Corrected the docstring for cholesky_banded, and added tests. Modified: trunk/scipy/linalg/basic.py =================================================================== --- trunk/scipy/linalg/basic.py 2010-04-05 05:14:05 UTC (rev 6304) +++ trunk/scipy/linalg/basic.py 2010-04-05 23:36:38 UTC (rev 6305) @@ -303,7 +303,7 @@ Parameters ---------- - ab : array, shape (M, u + 1) + ab : array, shape (u + 1, M) Banded matrix overwrite_ab : boolean Discard data in ab (may enhance performance) @@ -312,7 +312,7 @@ Returns ------- - c : array, shape (M, u+1) + c : array, shape (u+1, M) Cholesky factorization of a, in the same banded format as ab """ Modified: trunk/scipy/linalg/tests/test_basic.py =================================================================== --- trunk/scipy/linalg/tests/test_basic.py 2010-04-05 05:14:05 UTC (rev 6304) +++ trunk/scipy/linalg/tests/test_basic.py 2010-04-05 23:36:38 UTC (rev 6305) @@ -20,13 +20,13 @@ """ from numpy import arange, array, dot, zeros, identity, conjugate, transpose, \ - float32 + float32, zeros_like import numpy.linalg as linalg from numpy.testing import * from scipy.linalg import solve, inv, det, lstsq, pinv, pinv2, norm,\ - solve_banded, solveh_banded + solve_banded, solveh_banded, cholesky_banded def random(size): @@ -335,6 +335,69 @@ assert_array_almost_equal(dot(a,x),b) +class TestCholeskyBanded(TestCase): + + def test_upper_real(self): + # Symmetric positive definite banded matrix `a` + a = array([[4.0, 1.0, 0.0, 0.0], + [1.0, 4.0, 0.5, 0.0], + [0.0, 0.5, 4.0, 0.2], + [0.0, 0.0, 0.2, 4.0]]) + # Banded storage form of `a`. + ab = array([[-1.0, 1.0, 0.5, 0.2], + [4.0, 4.0, 4.0, 4.0]]) + c = cholesky_banded(ab, lower=False) + ufac = zeros_like(a) + ufac[range(4),range(4)] = c[-1] + ufac[(0,1,2),(1,2,3)] = c[0,1:] + assert_array_almost_equal(a, dot(ufac.T, ufac)) + + def test_upper_complex(self): + # Hermitian positive definite banded matrix `a` + a = array([[4.0, 1.0, 0.0, 0.0], + [1.0, 4.0, 0.5, 0.0], + [0.0, 0.5, 4.0, -0.2j], + [0.0, 0.0, 0.2j, 4.0]]) + # Banded storage form of `a`. + ab = array([[-1.0, 1.0, 0.5, -0.2j], + [4.0, 4.0, 4.0, 4.0]]) + c = cholesky_banded(ab, lower=False) + ufac = zeros_like(a) + ufac[range(4),range(4)] = c[-1] + ufac[(0,1,2),(1,2,3)] = c[0,1:] + assert_array_almost_equal(a, dot(ufac.conj().T, ufac)) + + def test_lower_real(self): + # Symmetric positive definite banded matrix `a` + a = array([[4.0, 1.0, 0.0, 0.0], + [1.0, 4.0, 0.5, 0.0], + [0.0, 0.5, 4.0, 0.2], + [0.0, 0.0, 0.2, 4.0]]) + # Banded storage form of `a`. + ab = array([[4.0, 4.0, 4.0, 4.0], + [1.0, 0.5, 0.2, -1.0]]) + c = cholesky_banded(ab, lower=True) + lfac = zeros_like(a) + lfac[range(4),range(4)] = c[0] + lfac[(1,2,3),(0,1,2)] = c[1,:3] + assert_array_almost_equal(a, dot(lfac, lfac.T)) + + def test_lower_complex(self): + # Hermitian positive definite banded matrix `a` + a = array([[4.0, 1.0, 0.0, 0.0], + [1.0, 4.0, 0.5, 0.0], + [0.0, 0.5, 4.0, -0.2j], + [0.0, 0.0, 0.2j, 4.0]]) + # Banded storage form of `a`. + ab = array([[4.0, 4.0, 4.0, 4.0], + [1.0, 0.5, 0.2j, -1.0]]) + c = cholesky_banded(ab, lower=True) + lfac = zeros_like(a) + lfac[range(4),range(4)] = c[0] + lfac[(1,2,3),(0,1,2)] = c[1,:3] + assert_array_almost_equal(a, dot(lfac, lfac.conj().T)) + + class TestInv(TestCase): def test_simple(self): From scipy-svn at scipy.org Mon Apr 5 20:14:20 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 5 Apr 2010 19:14:20 -0500 (CDT) Subject: [Scipy-svn] r6306 - trunk/scipy/linalg Message-ID: <20100406001420.3F38D39CAEB@scipy.org> Author: warren.weckesser Date: 2010-04-05 19:14:20 -0500 (Mon, 05 Apr 2010) New Revision: 6306 Modified: trunk/scipy/linalg/basic.py trunk/scipy/linalg/decomp.py trunk/scipy/linalg/matfuncs.py Log: STY: linalg: Use False and True instead of 0 and 1. Modified: trunk/scipy/linalg/basic.py =================================================================== --- trunk/scipy/linalg/basic.py 2010-04-05 23:36:38 UTC (rev 6305) +++ trunk/scipy/linalg/basic.py 2010-04-06 00:14:20 UTC (rev 6306) @@ -31,7 +31,7 @@ import decomp -def lu_solve((lu, piv), b, trans=0, overwrite_b=0): +def lu_solve((lu, piv), b, trans=0, overwrite_b=False): """Solve an equation system, a x = b, given the LU factorization of a Parameters @@ -72,7 +72,7 @@ raise ValueError,\ 'illegal value in %-th argument of internal gesv|posv'%(-info) -def cho_solve((c, lower), b, overwrite_b=0): +def cho_solve((c, lower), b, overwrite_b=False): """Solve an equation system, a x = b, given the Cholesky factorization of a Parameters @@ -104,8 +104,8 @@ 'illegal value in %-th argument of internal gesv|posv'%(-info) # Linear equations -def solve(a, b, sym_pos=0, lower=0, overwrite_a=0, overwrite_b=0, - debug = 0): +def solve(a, b, sym_pos=False, lower=False, overwrite_a=False, overwrite_b=False, + debug=False): """Solve the equation a x = b for x Parameters @@ -159,8 +159,8 @@ raise ValueError,\ 'illegal value in %-th argument of internal gesv|posv'%(-info) -def solve_banded((l,u), ab, b, overwrite_ab=0, overwrite_b=0, - debug = 0): +def solve_banded((l,u), ab, b, overwrite_ab=False, overwrite_b=False, + debug=False): """Solve the equation a x = b for x, assuming a is banded matrix. The matrix a is stored in ab using the matrix diagonal orded form:: @@ -217,8 +217,8 @@ raise ValueError,\ 'illegal value in %-th argument of internal gbsv'%(-info) -def solveh_banded(ab, b, overwrite_ab=0, overwrite_b=0, - lower=0): +def solveh_banded(ab, b, overwrite_ab=False, overwrite_b=False, + lower=False): """Solve equation a x = b. a is Hermitian positive-definite banded matrix. The matrix a is stored in ab either in lower diagonal or upper @@ -280,7 +280,7 @@ raise ValueError,\ 'illegal value in %d-th argument of internal pbsv'%(-info) -def cholesky_banded(ab, overwrite_ab=0, lower=0): +def cholesky_banded(ab, overwrite_ab=False, lower=False): """Cholesky decompose a banded Hermitian positive-definite matrix The matrix a is stored in ab either in lower diagonal or upper @@ -332,7 +332,7 @@ # matrix inversion -def inv(a, overwrite_a=0): +def inv(a, overwrite_a=False): """Compute the inverse of a matrix. Parameters @@ -406,7 +406,7 @@ ### Determinant -def det(a, overwrite_a=0): +def det(a, overwrite_a=False): """Compute the determinant of a matrix Parameters @@ -434,7 +434,7 @@ ### Linear Least Squares -def lstsq(a, b, cond=None, overwrite_a=0, overwrite_b=0): +def lstsq(a, b, cond=None, overwrite_a=False, overwrite_b=False): """Compute least-squares solution to equation :m:`a x = b` Compute a vector x such that the 2-norm :m:`|b - a x|` is minimised. Modified: trunk/scipy/linalg/decomp.py =================================================================== --- trunk/scipy/linalg/decomp.py 2010-04-05 23:36:38 UTC (rev 6305) +++ trunk/scipy/linalg/decomp.py 2010-04-06 00:14:20 UTC (rev 6306) @@ -396,7 +396,7 @@ " and no eigenvalues or eigenvectors were" " computed." % (info-b1.shape[0])) -def eig_banded(a_band, lower=0, eigvals_only=0, overwrite_a_band=0, +def eig_banded(a_band, lower=False, eigvals_only=False, overwrite_a_band=False, select='a', select_range=None, max_ev = 0): """Solve real symmetric or complex hermetian band matrix eigenvalue problem. @@ -544,7 +544,7 @@ return w return w, v -def eigvals(a,b=None,overwrite_a=0): +def eigvals(a, b=None, overwrite_a=False): """Compute eigenvalues from an ordinary or generalized eigenvalue problem. Find eigenvalues of a general matrix:: @@ -641,7 +641,7 @@ overwrite_a=overwrite_a, overwrite_b=overwrite_b, turbo=turbo, eigvals=eigvals, type=type) -def eigvals_banded(a_band,lower=0,overwrite_a_band=0, +def eigvals_banded(a_band, lower=False, overwrite_a_band=False, select='a', select_range=None): """Solve real symmetric or complex hermitian band matrix eigenvalue problem. @@ -711,7 +711,7 @@ overwrite_a_band=overwrite_a_band, select=select, select_range=select_range) -def lu_factor(a, overwrite_a=0): +def lu_factor(a, overwrite_a=False): """Compute pivoted LU decomposition of a matrix. The decomposition is:: @@ -758,7 +758,7 @@ RuntimeWarning) return lu, piv -def lu_solve(a_lu_pivots,b): +def lu_solve(a_lu_pivots, b): """Solve an equation system, a x = b, given the LU factorization of a Parameters @@ -795,7 +795,7 @@ return b -def lu(a,permute_l=0,overwrite_a=0): +def lu(a, permute_l=False, overwrite_a=False): """Compute pivoted LU decompostion of a matrix. The decomposition is:: @@ -849,7 +849,7 @@ return l,u return p,l,u -def svd(a,full_matrices=1,compute_uv=1,overwrite_a=0): +def svd(a, full_matrices=True, compute_uv=True, overwrite_a=False): """Singular Value Decomposition. Factorizes the matrix a into two unitary matrices U and Vh and @@ -929,7 +929,7 @@ else: return s -def svdvals(a,overwrite_a=0): +def svdvals(a, overwrite_a=False): """Compute singular values of a matrix. Parameters @@ -954,7 +954,7 @@ """ return svd(a,compute_uv=0,overwrite_a=overwrite_a) -def diagsvd(s,M,N): +def diagsvd(s, M, N): """Construct the sigma matrix in SVD from singular values and size M,N. Parameters @@ -981,7 +981,7 @@ else: raise ValueError, "Length of s must be M or N." -def cholesky(a,lower=0,overwrite_a=0): +def cholesky(a, lower=False, overwrite_a=False): """Compute the Cholesky decomposition of a matrix. Returns the Cholesky decomposition, :lm:`A = L L^*` or :lm:`A = U^* U` @@ -1028,7 +1028,7 @@ 'illegal value in %-th argument of internal potrf'%(-info) return c -def cho_factor(a, lower=0, overwrite_a=0): +def cho_factor(a, lower=False, overwrite_a=False): """Compute the Cholesky decomposition of a matrix, to use in cho_solve Returns a matrix containing the Cholesky decomposition, @@ -1115,7 +1115,7 @@ raise TypeError, msg return b -def qr(a, overwrite_a=0, lwork=None, econ=None, mode='qr'): +def qr(a, overwrite_a=False, lwork=None, econ=None, mode='qr'): """Compute QR decomposition of a matrix. Calculate the decomposition :lm:`A = Q R` where Q is unitary/orthogonal @@ -1237,7 +1237,7 @@ -def qr_old(a,overwrite_a=0,lwork=None): +def qr_old(a, overwrite_a=False, lwork=None): """Compute QR decomposition of a matrix. Calculate the decomposition :lm:`A = Q R` where Q is unitary/orthogonal @@ -1291,7 +1291,7 @@ -def rq(a,overwrite_a=0,lwork=None): +def rq(a, overwrite_a=False, lwork=None): """Compute RQ decomposition of a square real matrix. Calculate the decomposition :lm:`A = R Q` where Q is unitary/orthogonal @@ -1353,7 +1353,7 @@ _double_precision = ['i','l','d'] -def schur(a,output='real',lwork=None,overwrite_a=0): +def schur(a, output='real', lwork=None, overwrite_a=False): """Compute Schur decomposition of a matrix. The Schur decomposition is @@ -1421,6 +1421,7 @@ _array_kind = {'b':0, 'h':0, 'B': 0, 'i':0, 'l': 0, 'f': 0, 'd': 0, 'F': 1, 'D': 1} _array_precision = {'i': 1, 'l': 1, 'f': 0, 'd': 1, 'F': 0, 'D': 1} _array_type = [['f', 'd'], ['F', 'D']] + def _commonType(*arrays): kind = 0 precision = 0 @@ -1533,7 +1534,7 @@ Q = u[:,:num] return Q -def hessenberg(a,calc_q=0,overwrite_a=0): +def hessenberg(a, calc_q=False, overwrite_a=False): """Compute Hessenberg form of a matrix. The Hessenberg decomposition is Modified: trunk/scipy/linalg/matfuncs.py =================================================================== --- trunk/scipy/linalg/matfuncs.py 2010-04-05 23:36:38 UTC (rev 6305) +++ trunk/scipy/linalg/matfuncs.py 2010-04-06 00:14:20 UTC (rev 6306) @@ -19,7 +19,7 @@ eps = np.finfo(float).eps feps = np.finfo(single).eps -def expm(A,q=7): +def expm(A, q=7): """Compute the matrix exponential using Pade approximation. Parameters @@ -89,7 +89,7 @@ vri = inv(vr) return dot(dot(vr,diag(exp(s))),vri).astype(t) -def expm3(A,q=20): +def expm3(A, q=20): """Compute the matrix exponential using Taylor series. Parameters @@ -120,7 +120,8 @@ return eA _array_precision = {'i': 1, 'l': 1, 'f': 0, 'd': 1, 'F': 0, 'D': 1} -def toreal(arr,tol=None): + +def toreal(arr, tol=None): """Return as real array if imaginary part is small. Parameters @@ -267,7 +268,7 @@ else: return solve(coshm(A), sinhm(A)) -def funm(A,func,disp=1): +def funm(A, func, disp=True): """Evaluate a matrix function specified by a callable. Returns the value of matrix-valued function f at A. The function f @@ -342,7 +343,7 @@ else: return F, err -def logm(A,disp=1): +def logm(A, disp=True): """Compute matrix logarithm. The matrix logarithm is the inverse of expm: expm(logm(A)) == A @@ -393,7 +394,7 @@ else: return F, errest -def signm(a,disp=1): +def signm(a, disp=True): """Matrix sign function. Extension of the scalar sign(x) to matrices. @@ -470,7 +471,7 @@ else: return S0, errest -def sqrtm(A,disp=1): +def sqrtm(A, disp=True): """Matrix square root. Parameters From scipy-svn at scipy.org Mon Apr 5 22:52:03 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 5 Apr 2010 21:52:03 -0500 (CDT) Subject: [Scipy-svn] r6307 - in trunk: doc/source scipy/signal Message-ID: <20100406025203.60A3139CAF6@scipy.org> Author: warren.weckesser Date: 2010-04-05 21:52:03 -0500 (Mon, 05 Apr 2010) New Revision: 6307 Added: trunk/scipy/signal/windows.py Modified: trunk/doc/source/signal.rst trunk/scipy/signal/__init__.py trunk/scipy/signal/info.py trunk/scipy/signal/signaltools.py Log: REF: signal: Moved the windows to their own file. Modified: trunk/doc/source/signal.rst =================================================================== --- trunk/doc/source/signal.rst 2010-04-06 00:14:20 UTC (rev 6306) +++ trunk/doc/source/signal.rst 2010-04-06 02:52:03 UTC (rev 6307) @@ -135,22 +135,24 @@ .. autosummary:: :toctree: generated/ - boxcar - triang - parzen - bohman + get_window + barthann + bartlett blackman blackmanharris - nuttall + bohman + boxcar + chebwin flattop - bartlett + gaussian + general_gaussian + hamming hann - barthann - hamming kaiser - gaussian - general_gaussian + nuttall + parzen slepian + triang Wavelets ======== Modified: trunk/scipy/signal/__init__.py =================================================================== --- trunk/scipy/signal/__init__.py 2010-04-06 00:14:20 UTC (rev 6306) +++ trunk/scipy/signal/__init__.py 2010-04-06 02:52:03 UTC (rev 6307) @@ -9,6 +9,7 @@ from bsplines import * from filter_design import * from ltisys import * +from windows import * from signaltools import * from wavelets import * Modified: trunk/scipy/signal/info.py =================================================================== --- trunk/scipy/signal/info.py 2010-04-06 00:14:20 UTC (rev 6306) +++ trunk/scipy/signal/info.py 2010-04-06 02:52:03 UTC (rev 6307) @@ -92,22 +92,24 @@ Window functions: + get_window -- Return a window of a given length and type. + barthann -- Bartlett-Hann window + bartlett -- Bartlett window + blackman -- Blackman window + blackmanharris -- Minimum 4-term Blackman-Harris window + bohman -- Bohman window boxcar -- Boxcar window - triang -- Triangular window - parzen -- Parzen window - bohman -- Bohman window - blackman -- Blackman window - blackmanharris -- Minimum 4-term Blackman-Harris window - nuttall -- Nuttall's minimum 4-term Blackman-Harris window + chebwin -- Dolph-Chebyshev window flattop -- Flat top window - bartlett -- Bartlett window + gaussian -- Gaussian window + general_gaussian -- Generalized Gaussian window + hamming -- Hamming window hann -- Hann window - barthann -- Bartlett-Hann window - hamming -- Hamming window kaiser -- Kaiser window - gaussian -- Gaussian window - general_gaussian -- Generalized Gaussian window + nuttall -- Nuttall's minimum 4-term Blackman-Harris window + parzen -- Parzen window slepian -- Slepian window + triang -- Triangular window Wavelets: Modified: trunk/scipy/signal/signaltools.py =================================================================== --- trunk/scipy/signal/signaltools.py 2010-04-06 00:14:20 UTC (rev 6306) +++ trunk/scipy/signal/signaltools.py 2010-04-06 02:52:03 UTC (rev 6307) @@ -1,23 +1,21 @@ # Author: Travis Oliphant # 1999 -- 2002 -import types import warnings import sigtools -from scipy import special, linalg +from scipy import linalg from scipy.fftpack import fft, ifft, ifftshift, fft2, ifft2, fftn, \ - ifftn, fftfreq -from numpy import polyadd, polymul, polydiv, polysub, \ - roots, poly, polyval, polyder, cast, asarray, isscalar, atleast_1d, \ - ones, sin, linspace, real, extract, real_if_close, zeros, array, arange, \ - where, sqrt, rank, newaxis, argmax, product, cos, pi, exp, \ - ravel, size, less_equal, sum, r_, iscomplexobj, take, \ - argsort, allclose, expand_dims, unique, prod, sort, reshape, \ - transpose, dot, any, mean, cosh, arccosh, \ - arccos, concatenate, flipud, ndarray + ifftn, fftfreq +from numpy import polyadd, polymul, polydiv, polysub, roots, \ + poly, polyval, polyder, cast, asarray, isscalar, atleast_1d, \ + ones, real, real_if_close, zeros, array, arange, where, rank, \ + newaxis, product, ravel, sum, r_, iscomplexobj, take, \ + argsort, allclose, expand_dims, unique, prod, sort, reshape, \ + transpose, dot, any, mean, flipud, ndarray import numpy as np from scipy.misc import factorial +from .windows import get_window _modedict = {'valid':0, 'same':1, 'full':2} @@ -625,8 +623,8 @@ zi = {z_0[-1], z_1[-1], ..., z_K-1[-1]} where K=max(M,N). """ - N = size(a)-1 - M = size(b)-1 + N = np.size(a)-1 + M = np.size(b)-1 K = max(M,N) y = asarray(y) zi = zeros(K,y.dtype.char) @@ -634,10 +632,10 @@ x = zeros(M,y.dtype.char) else: x = asarray(x) - L = size(x) + L = np.size(x) if L < M: x = r_[x,zeros(M-L)] - L = size(y) + L = np.size(y) if L < N: y = r_[y,zeros(N-L)] @@ -668,366 +666,7 @@ return quot, rem -def boxcar(M,sym=1): - """The M-point boxcar window. - """ - return ones(M, float) - -def triang(M,sym=1): - """The M-point triangular window. - - """ - if M < 1: - return array([]) - if M == 1: - return ones(1,'d') - odd = M % 2 - if not sym and not odd: - M = M + 1 - n = arange(1,int((M+1)/2)+1) - if M % 2 == 0: - w = (2*n-1.0)/M - w = r_[w, w[::-1]] - else: - w = 2*n/(M+1.0) - w = r_[w, w[-2::-1]] - - if not sym and not odd: - w = w[:-1] - return w - -def parzen(M,sym=1): - """The M-point Parzen window. - - """ - if M < 1: - return array([]) - if M == 1: - return ones(1,'d') - odd = M % 2 - if not sym and not odd: - M = M+1 - n = arange(-(M-1)/2.0,(M-1)/2.0+0.5,1.0) - na = extract(n < -(M-1)/4.0, n) - nb = extract(abs(n) <= (M-1)/4.0, n) - wa = 2*(1-abs(na)/(M/2.0))**3.0 - wb = 1-6*(abs(nb)/(M/2.0))**2.0 + 6*(abs(nb)/(M/2.0))**3.0 - w = r_[wa,wb,wa[::-1]] - if not sym and not odd: - w = w[:-1] - return w - -def bohman(M,sym=1): - """The M-point Bohman window. - - """ - if M < 1: - return array([]) - if M == 1: - return ones(1,'d') - odd = M % 2 - if not sym and not odd: - M = M+1 - fac = abs(linspace(-1,1,M)[1:-1]) - w = (1 - fac)* cos(pi*fac) + 1.0/pi*sin(pi*fac) - w = r_[0,w,0] - if not sym and not odd: - w = w[:-1] - return w - -def blackman(M,sym=1): - """The M-point Blackman window. - - """ - if M < 1: - return array([]) - if M == 1: - return ones(1,'d') - odd = M % 2 - if not sym and not odd: - M = M+1 - n = arange(0,M) - w = 0.42-0.5*cos(2.0*pi*n/(M-1)) + 0.08*cos(4.0*pi*n/(M-1)) - if not sym and not odd: - w = w[:-1] - return w - -def nuttall(M,sym=1): - """A minimum 4-term Blackman-Harris window according to Nuttall. - - """ - if M < 1: - return array([]) - if M == 1: - return ones(1,'d') - odd = M % 2 - if not sym and not odd: - M = M+1 - a = [0.3635819, 0.4891775, 0.1365995, 0.0106411] - n = arange(0,M) - fac = n*2*pi/(M-1.0) - w = a[0] - a[1]*cos(fac) + a[2]*cos(2*fac) - a[3]*cos(3*fac) - if not sym and not odd: - w = w[:-1] - return w - -def blackmanharris(M,sym=1): - """The M-point minimum 4-term Blackman-Harris window. - - """ - if M < 1: - return array([]) - if M == 1: - return ones(1,'d') - odd = M % 2 - if not sym and not odd: - M = M+1 - a = [0.35875, 0.48829, 0.14128, 0.01168]; - n = arange(0,M) - fac = n*2*pi/(M-1.0) - w = a[0] - a[1]*cos(fac) + a[2]*cos(2*fac) - a[3]*cos(3*fac) - if not sym and not odd: - w = w[:-1] - return w - -def flattop(M,sym=1): - """The M-point Flat top window. - - """ - if M < 1: - return array([]) - if M == 1: - return ones(1,'d') - odd = M % 2 - if not sym and not odd: - M = M+1 - a = [0.2156, 0.4160, 0.2781, 0.0836, 0.0069] - n = arange(0,M) - fac = n*2*pi/(M-1.0) - w = a[0] - a[1]*cos(fac) + a[2]*cos(2*fac) - a[3]*cos(3*fac) + \ - a[4]*cos(4*fac) - if not sym and not odd: - w = w[:-1] - return w - - -def bartlett(M,sym=1): - """The M-point Bartlett window. - - """ - if M < 1: - return array([]) - if M == 1: - return ones(1,'d') - odd = M % 2 - if not sym and not odd: - M = M+1 - n = arange(0,M) - w = where(less_equal(n,(M-1)/2.0),2.0*n/(M-1),2.0-2.0*n/(M-1)) - if not sym and not odd: - w = w[:-1] - return w - -def hanning(M,sym=1): - """The M-point Hanning window. - - """ - if M < 1: - return array([]) - if M == 1: - return ones(1,'d') - odd = M % 2 - if not sym and not odd: - M = M+1 - n = arange(0,M) - w = 0.5-0.5*cos(2.0*pi*n/(M-1)) - if not sym and not odd: - w = w[:-1] - return w - -hann = hanning - -def barthann(M,sym=1): - """Return the M-point modified Bartlett-Hann window. - - """ - if M < 1: - return array([]) - if M == 1: - return ones(1,'d') - odd = M % 2 - if not sym and not odd: - M = M+1 - n = arange(0,M) - fac = abs(n/(M-1.0)-0.5) - w = 0.62 - 0.48*fac + 0.38*cos(2*pi*fac) - if not sym and not odd: - w = w[:-1] - return w - -def hamming(M,sym=1): - """The M-point Hamming window. - - """ - if M < 1: - return array([]) - if M == 1: - return ones(1,'d') - odd = M % 2 - if not sym and not odd: - M = M+1 - n = arange(0,M) - w = 0.54-0.46*cos(2.0*pi*n/(M-1)) - if not sym and not odd: - w = w[:-1] - return w - - - -def kaiser(M,beta,sym=1): - """Return a Kaiser window of length M with shape parameter beta. - - """ - if M < 1: - return array([]) - if M == 1: - return ones(1,'d') - odd = M % 2 - if not sym and not odd: - M = M+1 - n = arange(0,M) - alpha = (M-1)/2.0 - w = special.i0(beta * sqrt(1-((n-alpha)/alpha)**2.0))/special.i0(beta) - if not sym and not odd: - w = w[:-1] - return w - -def gaussian(M,std,sym=1): - """Return a Gaussian window of length M with standard-deviation std. - - """ - if M < 1: - return array([]) - if M == 1: - return ones(1,'d') - odd = M % 2 - if not sym and not odd: - M = M + 1 - n = arange(0,M)-(M-1.0)/2.0 - sig2 = 2*std*std - w = exp(-n**2 / sig2) - if not sym and not odd: - w = w[:-1] - return w - -def general_gaussian(M,p,sig,sym=1): - """Return a window with a generalized Gaussian shape. - - exp(-0.5*(x/sig)**(2*p)) - - half power point is at (2*log(2)))**(1/(2*p))*sig - - """ - if M < 1: - return array([]) - if M == 1: - return ones(1,'d') - odd = M % 2 - if not sym and not odd: - M = M+1 - n = arange(0,M)-(M-1.0)/2.0 - w = exp(-0.5*(n/sig)**(2*p)) - if not sym and not odd: - w = w[:-1] - return w - - -# contributed by Kumar Appaiah. -def chebwin(M, at, sym=1): - """Dolph-Chebyshev window. - - INPUTS: - - M : int - Window size - at : float - Attenuation (in dB) - sym : bool - Generates symmetric window if True. - - """ - if M < 1: - return array([]) - if M == 1: - return ones(1,'d') - - odd = M % 2 - if not sym and not odd: - M = M+1 - - # compute the parameter beta - order = M - 1.0 - beta = cosh(1.0/order * arccosh(10**(abs(at)/20.))) - k = r_[0:M]*1.0 - x = beta*cos(pi*k/M) - #find the window's DFT coefficients - # Use analytic definition of Chebyshev polynomial instead of expansion - # from scipy.special. Using the expansion in scipy.special leads to errors. - p = zeros(x.shape) - p[x > 1] = cosh(order * arccosh(x[x > 1])) - p[x < -1] = (1 - 2*(order%2)) * cosh(order * arccosh(-x[x < -1])) - p[np.abs(x) <=1 ] = cos(order * arccos(x[np.abs(x) <= 1])) - - # Appropriate IDFT and filling up - # depending on even/odd M - if M % 2: - w = real(fft(p)) - n = (M + 1) / 2 - w = w[:n] / w[0] - w = concatenate((w[n - 1:0:-1], w)) - else: - p = p * exp(1.j*pi / M * r_[0:M]) - w = real(fft(p)) - n = M / 2 + 1 - w = w / w[1] - w = concatenate((w[n - 1:0:-1], w[1:n])) - if not sym and not odd: - w = w[:-1] - return w - - -def slepian(M,width,sym=1): - """Return the M-point slepian window. - - """ - if (M*width > 27.38): - raise ValueError, "Cannot reliably obtain slepian sequences for"\ - " M*width > 27.38." - if M < 1: - return array([]) - if M == 1: - return ones(1,'d') - odd = M % 2 - if not sym and not odd: - M = M+1 - - twoF = width/2.0 - alpha = (M-1)/2.0 - m = arange(0,M)-alpha - n = m[:,newaxis] - k = m[newaxis,:] - AF = twoF*special.sinc(twoF*(n-k)) - [lam,vec] = linalg.eig(AF) - ind = argmax(abs(lam),axis=-1) - w = abs(vec[:,ind]) - w = w / max(w) - - if not sym and not odd: - w = w[:-1] - return w - - def hilbert(x, N=None, axis=-1): """Compute the analytic signal. @@ -1423,88 +1062,6 @@ return b, a -def get_window(window,Nx,fftbins=1): - """Return a window of length Nx and type window. - - If fftbins is 1, create a "periodic" window ready to use with ifftshift - and be multiplied by the result of an fft (SEE ALSO fftfreq). - - Window types: boxcar, triang, blackman, hamming, hanning, bartlett, - parzen, bohman, blackmanharris, nuttall, barthann, - kaiser (needs beta), gaussian (needs std), - general_gaussian (needs power, width), - slepian (needs width) - - If the window requires no parameters, then it can be a string. - If the window requires parameters, the window argument should be a tuple - with the first argument the string name of the window, and the next - arguments the needed parameters. - If window is a floating point number, it is interpreted as the beta - parameter of the kaiser window. - """ - - sym = not fftbins - try: - beta = float(window) - except (TypeError, ValueError): - args = () - if isinstance(window, types.TupleType): - winstr = window[0] - if len(window) > 1: - args = window[1:] - elif isinstance(window, types.StringType): - if window in ['kaiser', 'ksr', 'gaussian', 'gauss', 'gss', - 'general gaussian', 'general_gaussian', - 'general gauss', 'general_gauss', 'ggs', - 'slepian', 'optimal', 'slep', 'dss']: - raise ValueError, "That window needs a parameter -- pass a tuple" - else: - winstr = window - - if winstr in ['blackman', 'black', 'blk']: - winfunc = blackman - elif winstr in ['triangle', 'triang', 'tri']: - winfunc = triang - elif winstr in ['hamming', 'hamm', 'ham']: - winfunc = hamming - elif winstr in ['bartlett', 'bart', 'brt']: - winfunc = bartlett - elif winstr in ['hanning', 'hann', 'han']: - winfunc = hanning - elif winstr in ['blackmanharris', 'blackharr','bkh']: - winfunc = blackmanharris - elif winstr in ['parzen', 'parz', 'par']: - winfunc = parzen - elif winstr in ['bohman', 'bman', 'bmn']: - winfunc = bohman - elif winstr in ['nuttall', 'nutl', 'nut']: - winfunc = nuttall - elif winstr in ['barthann', 'brthan', 'bth']: - winfunc = barthann - elif winstr in ['flattop', 'flat', 'flt']: - winfunc = flattop - elif winstr in ['kaiser', 'ksr']: - winfunc = kaiser - elif winstr in ['gaussian', 'gauss', 'gss']: - winfunc = gaussian - elif winstr in ['general gaussian', 'general_gaussian', - 'general gauss', 'general_gauss', 'ggs']: - winfunc = general_gaussian - elif winstr in ['boxcar', 'box', 'ones']: - winfunc = boxcar - elif winstr in ['slepian', 'slep', 'optimal', 'dss']: - winfunc = slepian - else: - raise ValueError, "Unknown window type." - - params = (Nx,)+args + (sym,) - else: - winfunc = kaiser - params = (Nx,beta,sym) - - return winfunc(*params) - - def resample(x,num,t=None,axis=0,window=None): """Resample to num samples using Fourier method along the given axis. Added: trunk/scipy/signal/windows.py =================================================================== --- trunk/scipy/signal/windows.py (rev 0) +++ trunk/scipy/signal/windows.py 2010-04-06 02:52:03 UTC (rev 6307) @@ -0,0 +1,449 @@ +"""The suite of window functions.""" + +import types + +import numpy as np +from scipy import special, linalg +from scipy.fftpack import fft + + +def boxcar(M, sym=True): + """The M-point boxcar window. + + """ + return np.ones(M, float) + +def triang(M, sym=True): + """The M-point triangular window. + + """ + if M < 1: + return np.array([]) + if M == 1: + return np.ones(1,'d') + odd = M % 2 + if not sym and not odd: + M = M + 1 + n = np.arange(1,int((M+1)/2)+1) + if M % 2 == 0: + w = (2*n-1.0)/M + w = np.r_[w, w[::-1]] + else: + w = 2*n/(M+1.0) + w = np.r_[w, w[-2::-1]] + + if not sym and not odd: + w = w[:-1] + return w + +def parzen(M, sym=True): + """The M-point Parzen window. + + """ + if M < 1: + return np.array([]) + if M == 1: + return np.ones(1,'d') + odd = M % 2 + if not sym and not odd: + M = M+1 + n = np.arange(-(M-1)/2.0,(M-1)/2.0+0.5,1.0) + na = np.extract(n < -(M-1)/4.0, n) + nb = np.extract(abs(n) <= (M-1)/4.0, n) + wa = 2*(1-np.abs(na)/(M/2.0))**3.0 + wb = 1-6*(np.abs(nb)/(M/2.0))**2.0 + 6*(np.abs(nb)/(M/2.0))**3.0 + w = np.r_[wa,wb,wa[::-1]] + if not sym and not odd: + w = w[:-1] + return w + +def bohman(M, sym=True): + """The M-point Bohman window. + + """ + if M < 1: + return np.array([]) + if M == 1: + return np.ones(1,'d') + odd = M % 2 + if not sym and not odd: + M = M+1 + fac = np.abs(np.linspace(-1,1,M)[1:-1]) + w = (1 - fac) * np.cos(np.pi*fac) + 1.0/np.pi*np.sin(np.pi*fac) + w = np.r_[0,w,0] + if not sym and not odd: + w = w[:-1] + return w + +def blackman(M, sym=True): + """The M-point Blackman window. + + """ + if M < 1: + return np.array([]) + if M == 1: + return np.ones(1,'d') + odd = M % 2 + if not sym and not odd: + M = M+1 + n = np.arange(0,M) + w = 0.42-0.5*np.cos(2.0*np.pi*n/(M-1)) + 0.08*np.cos(4.0*np.pi*n/(M-1)) + if not sym and not odd: + w = w[:-1] + return w + +def nuttall(M, sym=True): + """A minimum 4-term Blackman-Harris window according to Nuttall. + + """ + if M < 1: + return np.array([]) + if M == 1: + return np.ones(1,'d') + odd = M % 2 + if not sym and not odd: + M = M+1 + a = [0.3635819, 0.4891775, 0.1365995, 0.0106411] + n = np.arange(0,M) + fac = n*2*np.pi/(M-1.0) + w = a[0] - a[1]*np.cos(fac) + a[2]*np.cos(2*fac) - a[3]*np.cos(3*fac) + if not sym and not odd: + w = w[:-1] + return w + +def blackmanharris(M, sym=True): + """The M-point minimum 4-term Blackman-Harris window. + + """ + if M < 1: + return np.array([]) + if M == 1: + return np.ones(1,'d') + odd = M % 2 + if not sym and not odd: + M = M+1 + a = [0.35875, 0.48829, 0.14128, 0.01168]; + n = np.arange(0,M) + fac = n*2*np.pi/(M-1.0) + w = a[0] - a[1]*np.cos(fac) + a[2]*np.cos(2*fac) - a[3]*np.cos(3*fac) + if not sym and not odd: + w = w[:-1] + return w + +def flattop(M, sym=True): + """The M-point Flat top window. + + """ + if M < 1: + return np.array([]) + if M == 1: + return np.ones(1,'d') + odd = M % 2 + if not sym and not odd: + M = M+1 + a = [0.2156, 0.4160, 0.2781, 0.0836, 0.0069] + n = np.arange(0,M) + fac = n*2*np.pi/(M-1.0) + w = a[0] - a[1]*np.cos(fac) + a[2]*np.cos(2*fac) - a[3]*np.cos(3*fac) + \ + a[4]*np.cos(4*fac) + if not sym and not odd: + w = w[:-1] + return w + + +def bartlett(M, sym=True): + """The M-point Bartlett window. + + """ + if M < 1: + return np.array([]) + if M == 1: + return np.ones(1,'d') + odd = M % 2 + if not sym and not odd: + M = M+1 + n = np.arange(0,M) + w = np.where(np.less_equal(n,(M-1)/2.0),2.0*n/(M-1),2.0-2.0*n/(M-1)) + if not sym and not odd: + w = w[:-1] + return w + +def hanning(M, sym=True): + """The M-point Hanning window. + + """ + if M < 1: + return np.array([]) + if M == 1: + return np.ones(1,'d') + odd = M % 2 + if not sym and not odd: + M = M+1 + n = np.arange(0,M) + w = 0.5-0.5*np.cos(2.0*np.pi*n/(M-1)) + if not sym and not odd: + w = w[:-1] + return w + +hann = hanning + +def barthann(M, sym=True): + """Return the M-point modified Bartlett-Hann window. + + """ + if M < 1: + return np.array([]) + if M == 1: + return np.ones(1,'d') + odd = M % 2 + if not sym and not odd: + M = M+1 + n = np.arange(0,M) + fac = np.abs(n/(M-1.0)-0.5) + w = 0.62 - 0.48*fac + 0.38*np.cos(2*np.pi*fac) + if not sym and not odd: + w = w[:-1] + return w + +def hamming(M, sym=True): + """The M-point Hamming window. + + """ + if M < 1: + return np.array([]) + if M == 1: + return np.ones(1,'d') + odd = M % 2 + if not sym and not odd: + M = M+1 + n = np.arange(0,M) + w = 0.54-0.46*np.cos(2.0*np.pi*n/(M-1)) + if not sym and not odd: + w = w[:-1] + return w + + +def kaiser(M, beta, sym=True): + """Return a Kaiser window of length M with shape parameter beta. + + """ + if M < 1: + return np.array([]) + if M == 1: + return np.ones(1,'d') + odd = M % 2 + if not sym and not odd: + M = M + 1 + n = np.arange(0,M) + alpha = (M-1)/2.0 + w = special.i0(beta * np.sqrt(1-((n-alpha)/alpha)**2.0))/special.i0(beta) + if not sym and not odd: + w = w[:-1] + return w + +def gaussian(M, std, sym=True): + """Return a Gaussian window of length M with standard-deviation std. + + """ + if M < 1: + return np.array([]) + if M == 1: + return np.ones(1,'d') + odd = M % 2 + if not sym and not odd: + M = M + 1 + n = np.arange(0,M) - (M-1.0)/2.0 + sig2 = 2*std*std + w = np.exp(-n**2 / sig2) + if not sym and not odd: + w = w[:-1] + return w + +def general_gaussian(M, p, sig, sym=True): + """Return a window with a generalized Gaussian shape. + + exp(-0.5*(x/sig)**(2*p)) + + half power point is at (2*log(2)))**(1/(2*p))*sig + + """ + if M < 1: + return np.array([]) + if M == 1: + return np.ones(1,'d') + odd = M % 2 + if not sym and not odd: + M = M + 1 + n = np.arange(0,M) - (M-1.0)/2.0 + w = np.exp(-0.5*(n/sig)**(2*p)) + if not sym and not odd: + w = w[:-1] + return w + + +# `chebwin` contributed by Kumar Appaiah. + +def chebwin(M, at, sym=True): + """Dolph-Chebyshev window. + + INPUTS: + + M : int + Window size + at : float + Attenuation (in dB) + sym : bool + Generates symmetric window if True. + + """ + if M < 1: + return np.array([]) + if M == 1: + return np.ones(1,'d') + + odd = M % 2 + if not sym and not odd: + M = M + 1 + + # compute the parameter beta + order = M - 1.0 + beta = np.cosh(1.0/order * np.arccosh(10**(np.abs(at)/20.))) + k = np.r_[0:M]*1.0 + x = beta * np.cos(np.pi*k/M) + #find the window's DFT coefficients + # Use analytic definition of Chebyshev polynomial instead of expansion + # from scipy.special. Using the expansion in scipy.special leads to errors. + p = np.zeros(x.shape) + p[x > 1] = np.cosh(order * np.arccosh(x[x > 1])) + p[x < -1] = (1 - 2*(order%2)) * np.cosh(order * np.arccosh(-x[x < -1])) + p[np.abs(x) <=1 ] = np.cos(order * np.arccos(x[np.abs(x) <= 1])) + + # Appropriate IDFT and filling up + # depending on even/odd M + if M % 2: + w = np.real(fft(p)) + n = (M + 1) / 2 + w = w[:n] / w[0] + w = np.concatenate((w[n - 1:0:-1], w)) + else: + p = p * np.exp(1.j*np.pi / M * np.r_[0:M]) + w = np.real(fft(p)) + n = M / 2 + 1 + w = w / w[1] + w = np.concatenate((w[n - 1:0:-1], w[1:n])) + if not sym and not odd: + w = w[:-1] + return w + + +def slepian(M, width, sym=True): + """Return the M-point slepian window. + + """ + if (M*width > 27.38): + raise ValueError, "Cannot reliably obtain slepian sequences for"\ + " M*width > 27.38." + if M < 1: + return np.array([]) + if M == 1: + return np.ones(1,'d') + odd = M % 2 + if not sym and not odd: + M = M+1 + + twoF = width/2.0 + alpha = (M-1)/2.0 + m = np.arange(0,M) - alpha + n = m[:,np.newaxis] + k = m[np.newaxis,:] + AF = twoF*special.sinc(twoF*(n-k)) + [lam,vec] = linalg.eig(AF) + ind = np.argmax(abs(lam),axis=-1) + w = np.abs(vec[:,ind]) + w = w / max(w) + + if not sym and not odd: + w = w[:-1] + return w + + +def get_window(window, Nx, fftbins=True): + """Return a window of length Nx and type window. + + If fftbins is True, create a "periodic" window ready to use with ifftshift + and be multiplied by the result of an fft (SEE ALSO fftfreq). + + Window types: boxcar, triang, blackman, hamming, hanning, bartlett, + parzen, bohman, blackmanharris, nuttall, barthann, + kaiser (needs beta), gaussian (needs std), + general_gaussian (needs power, width), + slepian (needs width) + + If the window requires no parameters, then it can be a string. + If the window requires parameters, the window argument should be a tuple + with the first argument the string name of the window, and the next + arguments the needed parameters. + If window is a floating point number, it is interpreted as the beta + parameter of the kaiser window. + """ + + sym = not fftbins + try: + beta = float(window) + except (TypeError, ValueError): + args = () + if isinstance(window, types.TupleType): + winstr = window[0] + if len(window) > 1: + args = window[1:] + elif isinstance(window, types.StringType): + if window in ['kaiser', 'ksr', 'gaussian', 'gauss', 'gss', + 'general gaussian', 'general_gaussian', + 'general gauss', 'general_gauss', 'ggs', + 'slepian', 'optimal', 'slep', 'dss']: + raise ValueError, "That window needs a parameter -- pass a tuple" + else: + winstr = window + + if winstr in ['blackman', 'black', 'blk']: + winfunc = blackman + elif winstr in ['triangle', 'triang', 'tri']: + winfunc = triang + elif winstr in ['hamming', 'hamm', 'ham']: + winfunc = hamming + elif winstr in ['bartlett', 'bart', 'brt']: + winfunc = bartlett + elif winstr in ['hanning', 'hann', 'han']: + winfunc = hanning + elif winstr in ['blackmanharris', 'blackharr','bkh']: + winfunc = blackmanharris + elif winstr in ['parzen', 'parz', 'par']: + winfunc = parzen + elif winstr in ['bohman', 'bman', 'bmn']: + winfunc = bohman + elif winstr in ['nuttall', 'nutl', 'nut']: + winfunc = nuttall + elif winstr in ['barthann', 'brthan', 'bth']: + winfunc = barthann + elif winstr in ['flattop', 'flat', 'flt']: + winfunc = flattop + elif winstr in ['kaiser', 'ksr']: + winfunc = kaiser + elif winstr in ['gaussian', 'gauss', 'gss']: + winfunc = gaussian + elif winstr in ['general gaussian', 'general_gaussian', + 'general gauss', 'general_gauss', 'ggs']: + winfunc = general_gaussian + elif winstr in ['boxcar', 'box', 'ones']: + winfunc = boxcar + elif winstr in ['slepian', 'slep', 'optimal', 'dss']: + winfunc = slepian + else: + raise ValueError, "Unknown window type." + + params = (Nx,)+args + (sym,) + else: + winfunc = kaiser + params = (Nx,beta,sym) + + return winfunc(*params) From scipy-svn at scipy.org Tue Apr 6 00:03:16 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 5 Apr 2010 23:03:16 -0500 (CDT) Subject: [Scipy-svn] r6308 - in trunk/scipy/signal: . tests Message-ID: <20100406040316.3871539CAF6@scipy.org> Author: warren.weckesser Date: 2010-04-05 23:03:16 -0500 (Mon, 05 Apr 2010) New Revision: 6308 Added: trunk/scipy/signal/tests/test_windows.py Modified: trunk/scipy/signal/tests/test_signaltools.py trunk/scipy/signal/windows.py Log: ENH: signal: Added 'chebwin' to the windows understood by get_window(); see ticket #831. Also moved tests of windows to their own file, and fixed one test in test_signaltools.py that was missing the actual comparison to complete the test. Modified: trunk/scipy/signal/tests/test_signaltools.py =================================================================== --- trunk/scipy/signal/tests/test_signaltools.py 2010-04-06 02:52:03 UTC (rev 6307) +++ trunk/scipy/signal/tests/test_signaltools.py 2010-04-06 04:03:16 UTC (rev 6308) @@ -139,7 +139,9 @@ d = array([[34,30,44, 62, 66],\ [52,48,62, 80, 84],\ [82,78,92,110,114]]) + assert_array_equal(c,d) + class OldTestConvolve2d(_TestConvolve2d): old_behavior = True @dec.deprecated() @@ -309,49 +311,7 @@ assert_array_equal(signal.order_filter([1,2,3],[1,0,1],1), [2,3,2]) -class TestChebWin: - def test_cheb_odd(self): - cheb_odd_true = array([0.200938, 0.107729, 0.134941, 0.165348, - 0.198891, 0.235450, 0.274846, 0.316836, - 0.361119, 0.407338, 0.455079, 0.503883, - 0.553248, 0.602637, 0.651489, 0.699227, - 0.745266, 0.789028, 0.829947, 0.867485, - 0.901138, 0.930448, 0.955010, 0.974482, - 0.988591, 0.997138, 1.000000, 0.997138, - 0.988591, 0.974482, 0.955010, 0.930448, - 0.901138, 0.867485, 0.829947, 0.789028, - 0.745266, 0.699227, 0.651489, 0.602637, - 0.553248, 0.503883, 0.455079, 0.407338, - 0.361119, 0.316836, 0.274846, 0.235450, - 0.198891, 0.165348, 0.134941, 0.107729, - 0.200938]) - cheb_odd = signal.chebwin(53, at=-40) - assert_array_almost_equal(cheb_odd, cheb_odd_true, decimal=4) - - def test_cheb_even(self): - cheb_even_true = array([0.203894, 0.107279, 0.133904, - 0.163608, 0.196338, 0.231986, - 0.270385, 0.311313, 0.354493, - 0.399594, 0.446233, 0.493983, - 0.542378, 0.590916, 0.639071, - 0.686302, 0.732055, 0.775783, - 0.816944, 0.855021, 0.889525, - 0.920006, 0.946060, 0.967339, - 0.983557, 0.994494, 1.000000, - 1.000000, 0.994494, 0.983557, - 0.967339, 0.946060, 0.920006, - 0.889525, 0.855021, 0.816944, - 0.775783, 0.732055, 0.686302, - 0.639071, 0.590916, 0.542378, - 0.493983, 0.446233, 0.399594, - 0.354493, 0.311313, 0.270385, - 0.231986, 0.196338, 0.163608, - 0.133904, 0.107279, 0.203894]) - - cheb_even = signal.chebwin(54, at=-40) - assert_array_almost_equal(cheb_even, cheb_even_true, decimal=4) - class _TestLinearFilter(TestCase): dt = None def test_rank1(self): Added: trunk/scipy/signal/tests/test_windows.py =================================================================== --- trunk/scipy/signal/tests/test_windows.py (rev 0) +++ trunk/scipy/signal/tests/test_windows.py 2010-04-06 04:03:16 UTC (rev 6308) @@ -0,0 +1,65 @@ + +from numpy import array, ones_like +from numpy.testing import assert_array_almost_equal, assert_array_equal +from scipy import signal + + +cheb_odd_true = array([0.200938, 0.107729, 0.134941, 0.165348, + 0.198891, 0.235450, 0.274846, 0.316836, + 0.361119, 0.407338, 0.455079, 0.503883, + 0.553248, 0.602637, 0.651489, 0.699227, + 0.745266, 0.789028, 0.829947, 0.867485, + 0.901138, 0.930448, 0.955010, 0.974482, + 0.988591, 0.997138, 1.000000, 0.997138, + 0.988591, 0.974482, 0.955010, 0.930448, + 0.901138, 0.867485, 0.829947, 0.789028, + 0.745266, 0.699227, 0.651489, 0.602637, + 0.553248, 0.503883, 0.455079, 0.407338, + 0.361119, 0.316836, 0.274846, 0.235450, + 0.198891, 0.165348, 0.134941, 0.107729, + 0.200938]) + +cheb_even_true = array([0.203894, 0.107279, 0.133904, + 0.163608, 0.196338, 0.231986, + 0.270385, 0.311313, 0.354493, + 0.399594, 0.446233, 0.493983, + 0.542378, 0.590916, 0.639071, + 0.686302, 0.732055, 0.775783, + 0.816944, 0.855021, 0.889525, + 0.920006, 0.946060, 0.967339, + 0.983557, 0.994494, 1.000000, + 1.000000, 0.994494, 0.983557, + 0.967339, 0.946060, 0.920006, + 0.889525, 0.855021, 0.816944, + 0.775783, 0.732055, 0.686302, + 0.639071, 0.590916, 0.542378, + 0.493983, 0.446233, 0.399594, + 0.354493, 0.311313, 0.270385, + 0.231986, 0.196338, 0.163608, + 0.133904, 0.107279, 0.203894]) + + +class TestChebWin(object): + + def test_cheb_odd(self): + cheb_odd = signal.chebwin(53, at=-40) + assert_array_almost_equal(cheb_odd, cheb_odd_true, decimal=4) + + def test_cheb_even(self): + cheb_even = signal.chebwin(54, at=-40) + assert_array_almost_equal(cheb_even, cheb_even_true, decimal=4) + + +class TestGetWindow(object): + + def test_boxcar(self): + w = signal.get_window('boxcar', 12) + assert_array_equal(w, ones_like(w)) + + def test_cheb_odd(self): + w = signal.get_window(('chebwin', -40), 53, fftbins=False) + assert_array_almost_equal(w, cheb_odd_true, decimal=4) + + def test_cheb_even(self): + w = signal.get_window(('chebwin', -40), 54, fftbins=False) + assert_array_almost_equal(w, cheb_even_true, decimal=4) Modified: trunk/scipy/signal/windows.py =================================================================== --- trunk/scipy/signal/windows.py 2010-04-06 02:52:03 UTC (rev 6307) +++ trunk/scipy/signal/windows.py 2010-04-06 04:03:16 UTC (rev 6308) @@ -377,7 +377,7 @@ parzen, bohman, blackmanharris, nuttall, barthann, kaiser (needs beta), gaussian (needs std), general_gaussian (needs power, width), - slepian (needs width) + slepian (needs width), chebwin (needs attenuation) If the window requires no parameters, then it can be a string. If the window requires parameters, the window argument should be a tuple @@ -400,8 +400,10 @@ if window in ['kaiser', 'ksr', 'gaussian', 'gauss', 'gss', 'general gaussian', 'general_gaussian', 'general gauss', 'general_gauss', 'ggs', - 'slepian', 'optimal', 'slep', 'dss']: - raise ValueError, "That window needs a parameter -- pass a tuple" + 'slepian', 'optimal', 'slep', 'dss', + 'chebwin', 'cheb']: + raise ValueError("The '" + window + "' window needs one or " + "more parameters -- pass a tuple.") else: winstr = window @@ -438,12 +440,14 @@ winfunc = boxcar elif winstr in ['slepian', 'slep', 'optimal', 'dss']: winfunc = slepian + elif winstr in ['chebwin', 'cheb']: + winfunc = chebwin else: raise ValueError, "Unknown window type." - params = (Nx,)+args + (sym,) + params = (Nx,) + args + (sym,) else: winfunc = kaiser - params = (Nx,beta,sym) + params = (Nx, beta, sym) return winfunc(*params) From scipy-svn at scipy.org Tue Apr 6 12:40:50 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 6 Apr 2010 11:40:50 -0500 (CDT) Subject: [Scipy-svn] r6309 - trunk/scipy/linalg Message-ID: <20100406164050.4E4D139CAF7@scipy.org> Author: warren.weckesser Date: 2010-04-06 11:40:50 -0500 (Tue, 06 Apr 2010) New Revision: 6309 Modified: trunk/scipy/linalg/generic_flapack.pyf Log: BUG: linalg: fixed typo in generic_flapack.pyf. Modified: trunk/scipy/linalg/generic_flapack.pyf =================================================================== --- trunk/scipy/linalg/generic_flapack.pyf 2010-04-06 04:03:16 UTC (rev 6308) +++ trunk/scipy/linalg/generic_flapack.pyf 2010-04-06 16:40:50 UTC (rev 6309) @@ -177,7 +177,7 @@ integer intent(in),optional :: lo = 0 integer intent(in),optional,depend(n) :: hi = n-1 dimension(n-1),intent(out),depend(n) :: tau - dimension(lwork),intent(cahce,hide),depend(lwork) :: work + dimension(lwork),intent(cache,hide),depend(lwork) :: work integer intent(in),optional,depend(n),check(lwork>=MAX(n,1)) :: lwork = MAX(n,1) integer intent(out) :: info From scipy-svn at scipy.org Tue Apr 6 12:55:57 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 6 Apr 2010 11:55:57 -0500 (CDT) Subject: [Scipy-svn] r6310 - trunk/doc/source Message-ID: <20100406165557.861C839CAF7@scipy.org> Author: warren.weckesser Date: 2010-04-06 11:55:57 -0500 (Tue, 06 Apr 2010) New Revision: 6310 Modified: trunk/doc/source/constants.rst Log: DOC: Fixed force units in the 'constants' doc. Modified: trunk/doc/source/constants.rst =================================================================== --- trunk/doc/source/constants.rst 2010-04-06 16:40:50 UTC (rev 6309) +++ trunk/doc/source/constants.rst 2010-04-06 16:55:57 UTC (rev 6310) @@ -567,9 +567,9 @@ ----- ==================== ======================================================= -``dyn`` one dyne in watts -``lbf`` one pound force in watts -``kgf`` one kilogram force in watts +``dyn`` one dyne in newtons +``lbf`` one pound force in newtons +``kgf`` one kilogram force in newtons ==================== ======================================================= Optics From scipy-svn at scipy.org Thu Apr 8 10:23:39 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 8 Apr 2010 09:23:39 -0500 (CDT) Subject: [Scipy-svn] r6311 - branches/0.7.x Message-ID: <20100408142339.B374239CAED@scipy.org> Author: stefan Date: 2010-04-08 09:23:39 -0500 (Thu, 08 Apr 2010) New Revision: 6311 Modified: branches/0.7.x/pavement.py Log: Correct WINE_PY26. Modified: branches/0.7.x/pavement.py =================================================================== --- branches/0.7.x/pavement.py 2010-04-06 16:55:57 UTC (rev 6310) +++ branches/0.7.x/pavement.py 2010-04-08 14:23:39 UTC (rev 6311) @@ -70,7 +70,7 @@ sys.path.pop(0) # Default python version -PYVER="2.5" +PYVER="2.6" DMG_DIR = "dmg-source" # Wine config for win32 builds @@ -78,10 +78,8 @@ WINE_PY25 = [r"C:\Python25\python.exe"] WINE_PY26 = [r"C:\Python26\python26.exe"] elif sys.platform == "darwin": - WINE_PY25 = ["/Applications/Darwine/Wine.bundle/Contents/bin/wine", - os.environ['HOME'] + '/.wine/drive_c/Python25/python.exe'] - WINE_PY26 = ["/Applications/Darwine/Wine.bundle/Contents/bin/wine", - os.environ['HOME'] + '/.wine/drive_c/Python26/python.exe'] + WINE_PY25 = ["wine", os.environ['HOME'] + "/.wine/drive_c/Python25/python.exe"] + WINE_PY26 = ["wine", os.environ['HOME'] + "/.wine/drive_c/Python26/python.exe"] else: WINE_PY25 = [os.environ['HOME'] + "/.wine/drive_c/Python25/python.exe"] WINE_PY26 = [os.environ['HOME'] + "/.wine/drive_c/Python26/python.exe"] From scipy-svn at scipy.org Thu Apr 8 10:24:23 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 8 Apr 2010 09:24:23 -0500 (CDT) Subject: [Scipy-svn] r6312 - branches/0.7.x Message-ID: <20100408142423.ABAA839CAED@scipy.org> Author: stefan Date: 2010-04-08 09:24:23 -0500 (Thu, 08 Apr 2010) New Revision: 6312 Modified: branches/0.7.x/pavement.py Log: Correct location of Atlas libs. Modified: branches/0.7.x/pavement.py =================================================================== --- branches/0.7.x/pavement.py 2010-04-08 14:23:39 UTC (rev 6311) +++ branches/0.7.x/pavement.py 2010-04-08 14:24:23 UTC (rev 6312) @@ -275,9 +275,9 @@ #------------------ # Wine-based builds #------------------ -SSE3_CFG = {'BLAS': 'None', 'LAPACK': 'None', 'ATLAS': r'C:\local\lib\yop\sse3'} -SSE2_CFG = {'BLAS': 'None', 'LAPACK': 'None', 'ATLAS': r'C:\local\lib\yop\sse2'} -NOSSE_CFG = {'ATLAS': 'None', 'BLAS': r'C:\local\lib\yop\nosse', 'LAPACK': r'C:\local\lib\yop\nosse'} +SSE3_CFG = {'BLAS': 'None', 'LAPACK': 'None', 'ATLAS': r'C:\local\lib\sse3'} +SSE2_CFG = {'BLAS': 'None', 'LAPACK': 'None', 'ATLAS': r'C:\local\lib\sse2'} +NOSSE_CFG = {'ATLAS': 'None', 'BLAS': r'C:\local\lib\nosse', 'LAPACK': r'C:\local\lib\nosse'} SITECFG = {"sse2" : SSE2_CFG, "sse3" : SSE3_CFG, "nosse" : NOSSE_CFG} From scipy-svn at scipy.org Thu Apr 8 10:25:03 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 8 Apr 2010 09:25:03 -0500 (CDT) Subject: [Scipy-svn] r6313 - branches/0.7.x Message-ID: <20100408142503.F062739CAED@scipy.org> Author: stefan Date: 2010-04-08 09:25:03 -0500 (Thu, 08 Apr 2010) New Revision: 6313 Modified: branches/0.7.x/pavement.py Log: Set python version in options to 2.6. Modified: branches/0.7.x/pavement.py =================================================================== --- branches/0.7.x/pavement.py 2010-04-08 14:24:23 UTC (rev 6312) +++ branches/0.7.x/pavement.py 2010-04-08 14:25:03 UTC (rev 6313) @@ -122,7 +122,7 @@ options(sphinx=Bunch(builddir="build", sourcedir="source", docroot='doc'), virtualenv=Bunch(script_name=BOOTSTRAP_SCRIPT, packages_to_install=["sphinx==0.6.5"]), - wininst=Bunch(pyver="2.5", scratch=True)) + wininst=Bunch(pyver="2.6", scratch=True)) def parse_numpy_version(pyexec): if isinstance(pyexec, str): From scipy-svn at scipy.org Thu Apr 8 10:25:40 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 8 Apr 2010 09:25:40 -0500 (CDT) Subject: [Scipy-svn] r6314 - branches/0.7.x Message-ID: <20100408142540.A86B539CAED@scipy.org> Author: stefan Date: 2010-04-08 09:25:40 -0500 (Thu, 08 Apr 2010) New Revision: 6314 Modified: branches/0.7.x/pavement.py Log: Fix MakeNSIS call when building superpack. Modified: branches/0.7.x/pavement.py =================================================================== --- branches/0.7.x/pavement.py 2010-04-08 14:25:03 UTC (rev 6313) +++ branches/0.7.x/pavement.py 2010-04-08 14:25:40 UTC (rev 6314) @@ -77,12 +77,15 @@ if sys.platform == "win32": WINE_PY25 = [r"C:\Python25\python.exe"] WINE_PY26 = [r"C:\Python26\python26.exe"] + MAKENSIS = ["makensis"] elif sys.platform == "darwin": WINE_PY25 = ["wine", os.environ['HOME'] + "/.wine/drive_c/Python25/python.exe"] WINE_PY26 = ["wine", os.environ['HOME'] + "/.wine/drive_c/Python26/python.exe"] + MAKENSIS = ["wine", "makensis"] else: WINE_PY25 = [os.environ['HOME'] + "/.wine/drive_c/Python25/python.exe"] WINE_PY26 = [os.environ['HOME'] + "/.wine/drive_c/Python26/python.exe"] + MAKENSIS = ["wine", "makensis"] WINE_PYS = {'2.6' : WINE_PY26, '2.5': WINE_PY25} SUPERPACK_BUILD = 'build-superpack' SUPERPACK_BINDIR = os.path.join(SUPERPACK_BUILD, 'binaries') @@ -359,8 +362,8 @@ def bdist_superpack(options): """Build all arch specific wininst installers.""" prepare_nsis_script(options.wininst.pyver, FULLVERSION) - subprocess.check_call(['makensis', 'scipy-superinstaller.nsi'], - cwd=SUPERPACK_BUILD) + subprocess.check_call(MAKENSIS + ['scipy-superinstaller.nsi'], + cwd=SUPERPACK_BUILD) pyver = options.wininst.pyver From scipy-svn at scipy.org Thu Apr 8 10:35:50 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 8 Apr 2010 09:35:50 -0500 (CDT) Subject: [Scipy-svn] r6315 - tags Message-ID: <20100408143550.E8EC639CAED@scipy.org> Author: stefan Date: 2010-04-08 09:35:50 -0500 (Thu, 08 Apr 2010) New Revision: 6315 Added: tags/0.7.2rc1/ Log: Tagging 0.7.2 release candidate 1. Copied: tags/0.7.2rc1 (from rev 6314, branches/0.7.x) From scipy-svn at scipy.org Fri Apr 9 03:45:21 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 9 Apr 2010 02:45:21 -0500 (CDT) Subject: [Scipy-svn] r6316 - trunk/scipy/linalg Message-ID: <20100409074521.4E38239CAED@scipy.org> Author: cdavid Date: 2010-04-09 02:45:21 -0500 (Fri, 09 Apr 2010) New Revision: 6316 Modified: trunk/scipy/linalg/basic.py trunk/scipy/linalg/decomp.py Log: BUG: use correct string formatting in raising depending on info from lapack. Thanks to Yaroslav Halchenko for the fix. Modified: trunk/scipy/linalg/basic.py =================================================================== --- trunk/scipy/linalg/basic.py 2010-04-08 14:35:50 UTC (rev 6315) +++ trunk/scipy/linalg/basic.py 2010-04-09 07:45:21 UTC (rev 6316) @@ -69,8 +69,8 @@ x,info = getrs(lu,piv,b1,trans=trans,overwrite_b=overwrite_b) if info==0: return x - raise ValueError,\ - 'illegal value in %-th argument of internal gesv|posv'%(-info) + raise ValueError( + 'illegal value in %d-th argument of internal gesv|posv' % (-info)) def cho_solve((c, lower), b, overwrite_b=False): """Solve an equation system, a x = b, given the Cholesky factorization of a @@ -100,8 +100,8 @@ x,info = potrs(c,b1,lower=lower,overwrite_b=overwrite_b) if info==0: return x - raise ValueError,\ - 'illegal value in %-th argument of internal gesv|posv'%(-info) + raise ValueError( + 'illegal value in %d-th argument of internal gesv|posv' % (-info)) # Linear equations def solve(a, b, sym_pos=False, lower=False, overwrite_a=False, overwrite_b=False, @@ -156,8 +156,8 @@ return x if info>0: raise LinAlgError, "singular matrix" - raise ValueError,\ - 'illegal value in %-th argument of internal gesv|posv'%(-info) + raise ValueError( + 'illegal value in %d-th argument of internal gesv|posv' % (-info)) def solve_banded((l,u), ab, b, overwrite_ab=False, overwrite_b=False, debug=False): @@ -214,8 +214,8 @@ return x if info>0: raise LinAlgError, "singular matrix" - raise ValueError,\ - 'illegal value in %-th argument of internal gbsv'%(-info) + raise ValueError( + 'illegal value in %d-th argument of internal gbsv' % (-info)) def solveh_banded(ab, b, overwrite_ab=False, overwrite_b=False, lower=False): @@ -277,8 +277,8 @@ return c, x if info>0: raise LinAlgError, "%d-th leading minor not positive definite" % info - raise ValueError,\ - 'illegal value in %d-th argument of internal pbsv'%(-info) + raise ValueError( + 'illegal value in %d-th argument of internal pbsv' % (-info)) def cholesky_banded(ab, overwrite_ab=False, lower=False): """Cholesky decompose a banded Hermitian positive-definite matrix @@ -327,8 +327,8 @@ return c if info>0: raise LinAlgError, "%d-th leading minor not positive definite" % info - raise ValueError,\ - 'illegal value in %d-th argument of internal pbtrf'%(-info) + raise ValueError( + 'illegal value in %d-th argument of internal pbtrf' % (-info)) # matrix inversion @@ -370,7 +370,7 @@ ## return a_inv ## if info>0: raise LinAlgError, "singular matrix" ## if info<0: raise ValueError,\ -## 'illegal value in %-th argument of internal inv.getrf|getri'%(-info) +## 'illegal value in %d-th argument of internal inv.getrf|getri'%(-info) getrf,getri = get_lapack_funcs(('getrf','getri'),(a1,)) #XXX: C ATLAS versions of getrf/i have rowmajor=1, this could be # exploited for further optimization. But it will be probably @@ -399,8 +399,8 @@ else: # clapack inv_a,info = getri(lu,piv,overwrite_lu=1) if info>0: raise LinAlgError, "singular matrix" - if info<0: raise ValueError,\ - 'illegal value in %-th argument of internal getrf|getri'%(-info) + if info<0: raise ValueError( + 'illegal value in %d-th argument of internal getrf|getri' % (-info)) return inv_a @@ -428,8 +428,8 @@ overwrite_a = overwrite_a or (a1 is not a and not hasattr(a,'__array__')) fdet, = get_flinalg_funcs(('det',),(a1,)) a_det,info = fdet(a1,overwrite_a=overwrite_a) - if info<0: raise ValueError,\ - 'illegal value in %-th argument of internal det.getrf'%(-info) + if info<0: raise ValueError( + 'illegal value in %d-th argument of internal det.getrf' % (-info)) return a_det ### Linear Least Squares @@ -495,8 +495,8 @@ else: raise NotImplementedError,'calling gelss from %s' % (gelss.module_name) if info>0: raise LinAlgError, "SVD did not converge in Linear Least Squares" - if info<0: raise ValueError,\ - 'illegal value in %-th argument of internal gelss'%(-info) + if info<0: raise ValueError( + 'illegal value in %d-th argument of internal gelss' % (-info)) resids = asarray([], dtype=x.dtype) if n0: raise LinAlgError,"generalized eig algorithm did not converge" only_real = numpy.logical_and.reduce(numpy.equal(w.imag,0.0)) @@ -188,8 +188,8 @@ overwrite_a=overwrite_a) t = {'f':'F','d':'D'}[wr.dtype.char] w = wr+_I*wi - if info<0: raise ValueError,\ - 'illegal value in %-th argument of internal geev'%(-info) + if info<0: raise ValueError( + 'illegal value in %d-th argument of internal geev' % (-info)) if info>0: raise LinAlgError,"eig algorithm did not converge" only_real = numpy.logical_and.reduce(numpy.equal(w.imag,0.0)) @@ -536,8 +536,8 @@ w = w[:m] if not eigvals_only: v = v[:, :m] - if info<0: raise ValueError,\ - 'illegal value in %-th argument of internal %s'%(-info, internal_name) + if info<0: raise ValueError( + 'illegal value in %d-th argument of internal %s'%(-info, internal_name)) if info>0: raise LinAlgError,"eig algorithm did not converge" if eigvals_only: @@ -752,8 +752,8 @@ overwrite_a = overwrite_a or (_datanotshared(a1,a)) getrf, = get_lapack_funcs(('getrf',),(a1,)) lu, piv, info = getrf(a,overwrite_a=overwrite_a) - if info<0: raise ValueError,\ - 'illegal value in %-th argument of internal getrf (lu_factor)'%(-info) + if info<0: raise ValueError( + 'illegal value in %d-th argument of internal getrf (lu_factor)' % (-info)) if info>0: warn("Diagonal number %d is exactly zero. Singular matrix." % info, RuntimeWarning) return lu, piv @@ -843,8 +843,8 @@ overwrite_a = overwrite_a or (_datanotshared(a1,a)) flu, = get_flinalg_funcs(('lu',),(a1,)) p,l,u,info = flu(a1,permute_l=permute_l,overwrite_a = overwrite_a) - if info<0: raise ValueError,\ - 'illegal value in %-th argument of internal lu.getrf'%(-info) + if info<0: raise ValueError( + 'illegal value in %d-th argument of internal lu.getrf' % (-info)) if permute_l: return l,u return p,l,u @@ -922,8 +922,8 @@ else: # 'clapack' raise NotImplementedError,'calling gesdd from %s' % (gesdd.module_name) if info>0: raise LinAlgError, "SVD did not converge" - if info<0: raise ValueError,\ - 'illegal value in %-th argument of internal gesdd'%(-info) + if info<0: raise ValueError( + 'illegal value in %d-th argument of internal gesdd' % (-info)) if compute_uv: return u,s,v else: @@ -1024,8 +1024,8 @@ potrf, = get_lapack_funcs(('potrf',),(a1,)) c,info = potrf(a1,lower=lower,overwrite_a=overwrite_a,clean=1) if info>0: raise LinAlgError, "matrix not positive definite" - if info<0: raise ValueError,\ - 'illegal value in %-th argument of internal potrf'%(-info) + if info<0: raise ValueError( + 'illegal value in %d-th argument of internal potrf' % (-info)) return c def cho_factor(a, lower=False, overwrite_a=False): @@ -1071,8 +1071,8 @@ potrf, = get_lapack_funcs(('potrf',),(a1,)) c,info = potrf(a1,lower=lower,overwrite_a=overwrite_a,clean=0) if info>0: raise LinAlgError, "matrix not positive definite" - if info<0: raise ValueError,\ - 'illegal value in %-th argument of internal potrf'%(-info) + if info<0: raise ValueError( + 'illegal value in %d-th argument of internal potrf' % (-info)) return c, lower def cho_solve(clow, b): @@ -1193,7 +1193,7 @@ qr,tau,work,info = geqrf(a1,lwork=lwork,overwrite_a=overwrite_a) if info<0: - raise ValueError("illegal value in %-th argument of internal geqrf" + raise ValueError("illegal value in %d-th argument of internal geqrf" % -info) if not econ or M0: raise LinAlgError, "Schur form not found. Possibly ill-conditioned." return result[0], result[-3] @@ -1569,13 +1569,13 @@ overwrite_a = overwrite_a or (_datanotshared(a1,a)) gehrd,gebal = get_lapack_funcs(('gehrd','gebal'),(a1,)) ba,lo,hi,pivscale,info = gebal(a,permute=1,overwrite_a = overwrite_a) - if info<0: raise ValueError,\ - 'illegal value in %-th argument of internal gebal (hessenberg)'%(-info) + if info<0: raise ValueError( + 'illegal value in %d-th argument of internal gebal (hessenberg)' % (-info)) n = len(a1) lwork = calc_lwork.gehrd(gehrd.prefix,n,lo,hi) hq,tau,info = gehrd(ba,lo=lo,hi=hi,lwork=lwork,overwrite_a=1) - if info<0: raise ValueError,\ - 'illegal value in %-th argument of internal gehrd (hessenberg)'%(-info) + if info<0: raise ValueError( + 'illegal value in %d-th argument of internal gehrd (hessenberg)' % (-info)) if not calc_q: for i in range(lo,hi): From scipy-svn at scipy.org Fri Apr 9 03:45:32 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 9 Apr 2010 02:45:32 -0500 (CDT) Subject: [Scipy-svn] r6317 - trunk/scipy/linalg Message-ID: <20100409074532.3059F39CAED@scipy.org> Author: cdavid Date: 2010-04-09 02:45:32 -0500 (Fri, 09 Apr 2010) New Revision: 6317 Modified: trunk/scipy/linalg/decomp.py Log: ENH: a bit more information upon failure in LinAlgError's from lapack Thanks to Yaroslav Halchenko for the fix Modified: trunk/scipy/linalg/decomp.py =================================================================== --- trunk/scipy/linalg/decomp.py 2010-04-09 07:45:21 UTC (rev 6316) +++ trunk/scipy/linalg/decomp.py 2010-04-09 07:45:32 UTC (rev 6317) @@ -82,7 +82,8 @@ w = (alphar+_I*alphai)/beta if info<0: raise ValueError( 'illegal value in %d-th argument of internal ggev' % (-info)) - if info>0: raise LinAlgError,"generalized eig algorithm did not converge" + if info>0: raise LinAlgError( + "generalized eig algorithm did not converge (info=%d)" % (info)) only_real = numpy.logical_and.reduce(numpy.equal(w.imag,0.0)) if not (ggev.prefix in 'cz' or only_real): @@ -190,7 +191,9 @@ w = wr+_I*wi if info<0: raise ValueError( 'illegal value in %d-th argument of internal geev' % (-info)) - if info>0: raise LinAlgError,"eig algorithm did not converge" + if info>0: raise LinAlgError( + "eig algorithm did not converge (only eigenvalues "\ + "with order >=%d have converged)" % (info)) only_real = numpy.logical_and.reduce(numpy.equal(w.imag,0.0)) if not (geev.prefix in 'cz' or only_real): @@ -1023,7 +1026,9 @@ overwrite_a = overwrite_a or _datanotshared(a1,a) potrf, = get_lapack_funcs(('potrf',),(a1,)) c,info = potrf(a1,lower=lower,overwrite_a=overwrite_a,clean=1) - if info>0: raise LinAlgError, "matrix not positive definite" + if info>0: raise LinAlgError( + "matrix not positive definite (leading minor of order %d"\ + "is not positive definite)" % (info-1)) if info<0: raise ValueError( 'illegal value in %d-th argument of internal potrf' % (-info)) return c @@ -1070,7 +1075,9 @@ overwrite_a = overwrite_a or (_datanotshared(a1,a)) potrf, = get_lapack_funcs(('potrf',),(a1,)) c,info = potrf(a1,lower=lower,overwrite_a=overwrite_a,clean=0) - if info>0: raise LinAlgError, "matrix not positive definite" + if info>0: raise LinAlgError( + "matrix not positive definite (leading minor of order %d"\ + "is not positive definite)"%(info-1)) if info<0: raise ValueError( 'illegal value in %d-th argument of internal potrf' % (-info)) return c, lower From scipy-svn at scipy.org Fri Apr 9 13:57:47 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 9 Apr 2010 12:57:47 -0500 (CDT) Subject: [Scipy-svn] r6318 - trunk/scipy/linalg Message-ID: <20100409175747.0EF3839CAF2@scipy.org> Author: warren.weckesser Date: 2010-04-09 12:57:46 -0500 (Fri, 09 Apr 2010) New Revision: 6318 Modified: trunk/scipy/linalg/decomp.py Log: BUG: Fix missing parenthesis. Modified: trunk/scipy/linalg/decomp.py =================================================================== --- trunk/scipy/linalg/decomp.py 2010-04-09 07:45:32 UTC (rev 6317) +++ trunk/scipy/linalg/decomp.py 2010-04-09 17:57:46 UTC (rev 6318) @@ -1341,7 +1341,7 @@ lwork = work[0] rq,tau,work,info = gerqf(a1,lwork=lwork,overwrite_a=overwrite_a) if info<0: raise ValueError( - 'illegal value in %d-th argument of internal geqrf'%(-info) + 'illegal value in %d-th argument of internal geqrf' % (-info)) gemm, = get_blas_funcs(('gemm',),(rq,)) t = rq.dtype.char R = special_matrices.triu(rq) From scipy-svn at scipy.org Fri Apr 9 21:47:20 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 9 Apr 2010 20:47:20 -0500 (CDT) Subject: [Scipy-svn] r6319 - in trunk: doc/source scipy/linalg scipy/linalg/tests Message-ID: <20100410014720.4E95239CAF2@scipy.org> Author: warren.weckesser Date: 2010-04-09 20:47:20 -0500 (Fri, 09 Apr 2010) New Revision: 6319 Added: trunk/scipy/linalg/decomp_cholesky.py trunk/scipy/linalg/decomp_lu.py trunk/scipy/linalg/decomp_qr.py trunk/scipy/linalg/decomp_schur.py trunk/scipy/linalg/decomp_svd.py Modified: trunk/doc/source/linalg.rst trunk/scipy/linalg/__init__.py trunk/scipy/linalg/basic.py trunk/scipy/linalg/decomp.py trunk/scipy/linalg/generic_flapack.pyf trunk/scipy/linalg/info.py trunk/scipy/linalg/matfuncs.py trunk/scipy/linalg/misc.py trunk/scipy/linalg/tests/test_basic.py trunk/scipy/linalg/tests/test_decomp.py Log: linalg: REF: Split basic.py and decomp.py into multiple modules. Removed duplicated versions of the functions cho_solve and lu_solve. Return value of solveh_banded no longer includes the Choleskey factor (ticket #676). ENH: Added cho_solve_banded. STY: Clean up python style in many places. Modified: trunk/doc/source/linalg.rst =================================================================== --- trunk/doc/source/linalg.rst 2010-04-09 17:57:46 UTC (rev 6318) +++ trunk/doc/source/linalg.rst 2010-04-10 01:47:20 UTC (rev 6319) @@ -20,8 +20,8 @@ pinv pinv2 -Eigenvalues and Decompositions -============================== +Eigenvalue Problem +================== .. autosummary:: :toctree: generated/ @@ -32,6 +32,13 @@ eigvalsh eig_banded eigvals_banded + +Decompositions +============== + +.. autosummary:: + :toctree: generated/ + lu lu_factor lu_solve @@ -43,6 +50,7 @@ cholesky_banded cho_factor cho_solve + cho_solve_banded qr schur rsf2csf Modified: trunk/scipy/linalg/__init__.py =================================================================== --- trunk/scipy/linalg/__init__.py 2010-04-09 17:57:46 UTC (rev 6318) +++ trunk/scipy/linalg/__init__.py 2010-04-10 01:47:20 UTC (rev 6319) @@ -5,12 +5,19 @@ from info import __doc__ from linalg_version import linalg_version as __version__ +from misc import * from basic import * from decomp import * +from decomp_lu import * +from decomp_cholesky import * +from decomp_qr import * +from decomp_svd import * +from decomp_schur import * from matfuncs import * from blas import * +from special_matrices import * -__all__ = filter(lambda s:not s.startswith('_'),dir()) +__all__ = filter(lambda s: not s.startswith('_'), dir()) from numpy.dual import register_func for k in ['norm', 'inv', 'svd', 'solve', 'det', 'eig', 'eigh', 'eigvals', Modified: trunk/scipy/linalg/basic.py =================================================================== --- trunk/scipy/linalg/basic.py 2010-04-09 17:57:46 UTC (rev 6318) +++ trunk/scipy/linalg/basic.py 2010-04-10 01:47:20 UTC (rev 6319) @@ -1,108 +1,22 @@ -## Automatically adapted for scipy Oct 18, 2005 by - -## Automatically adapted for scipy Oct 18, 2005 by - # # Author: Pearu Peterson, March 2002 # # w/ additions by Travis Oliphant, March 2002 -__all__ = ['solve', 'inv', 'det', 'lstsq', 'pinv', 'pinv2', - 'cholesky_banded', 'solveh_banded', 'lu_solve', 'cho_solve', - 'solve_banded', - # From special_matrices: - 'tri','tril', 'triu', 'toeplitz', 'circulant', 'hankel', 'kron', - 'hadamard', 'block_diag', 'all_mat', - # From misc: - 'LinAlgError', 'norm', - ] +__all__ = ['solve', 'solveh_banded', 'solve_banded', + 'inv', 'det', 'lstsq', 'pinv', 'pinv2'] from numpy import asarray, zeros, sum, conjugate, dot, transpose, \ asarray_chkfinite, single import numpy -#from blas import get_blas_funcs from flinalg import get_flinalg_funcs from lapack import get_lapack_funcs -from misc import LinAlgError, norm -from special_matrices import tri, tril, triu, toeplitz, circulant, hankel, \ - hadamard, kron, block_diag, all_mat +from misc import LinAlgError from scipy.linalg import calc_lwork -import decomp +import decomp_svd -def lu_solve((lu, piv), b, trans=0, overwrite_b=False): - """Solve an equation system, a x = b, given the LU factorization of a - - Parameters - ---------- - (lu, piv) - Factorization of the coefficient matrix a, as given by lu_factor - b : array - Right-hand side - trans : {0, 1, 2} - Type of system to solve: - - ===== ========= - trans system - ===== ========= - 0 a x = b - 1 a^T x = b - 2 a^H x = b - ===== ========= - - Returns - ------- - x : array - Solution to the system - - See also - -------- - lu_factor : LU factorize a matrix - - """ - b1 = asarray_chkfinite(b) - overwrite_b = overwrite_b or (b1 is not b and not hasattr(b,'__array__')) - if lu.shape[0] != b1.shape[0]: - raise ValueError, "incompatible dimensions." - getrs, = get_lapack_funcs(('getrs',),(lu,b1)) - x,info = getrs(lu,piv,b1,trans=trans,overwrite_b=overwrite_b) - if info==0: - return x - raise ValueError( - 'illegal value in %d-th argument of internal gesv|posv' % (-info)) - -def cho_solve((c, lower), b, overwrite_b=False): - """Solve an equation system, a x = b, given the Cholesky factorization of a - - Parameters - ---------- - (c, lower) - Cholesky factorization of a, as given by cho_factor - b : array - Right-hand side - - Returns - ------- - x : array - The solution to the system a x = b - - See also - -------- - cho_factor : Cholesky factorization of a matrix - - """ - b1 = asarray_chkfinite(b) - overwrite_b = overwrite_b or (b1 is not b and not hasattr(b,'__array__')) - if c.shape[0] != b1.shape[0]: - raise ValueError, "incompatible dimensions." - potrs, = get_lapack_funcs(('potrs',),(c,b1)) - x,info = potrs(c,b1,lower=lower,overwrite_b=overwrite_b) - if info==0: - return x - raise ValueError( - 'illegal value in %d-th argument of internal gesv|posv' % (-info)) - # Linear equations def solve(a, b, sym_pos=False, lower=False, overwrite_a=False, overwrite_b=False, debug=False): @@ -141,25 +55,23 @@ print 'solve:overwrite_a=',overwrite_a print 'solve:overwrite_b=',overwrite_b if sym_pos: - posv, = get_lapack_funcs(('posv',),(a1,b1)) - c,x,info = posv(a1,b1, - lower = lower, + posv, = get_lapack_funcs(('posv',), (a1,b1)) + c, x, info = posv(a1, b1, lower=lower, overwrite_a=overwrite_a, overwrite_b=overwrite_b) else: - gesv, = get_lapack_funcs(('gesv',),(a1,b1)) - lu,piv,x,info = gesv(a1,b1, - overwrite_a=overwrite_a, - overwrite_b=overwrite_b) + gesv, = get_lapack_funcs(('gesv',), (a1,b1)) + lu, piv, x, info = gesv(a1, b1, overwrite_a=overwrite_a, + overwrite_b=overwrite_b) - if info==0: + if info == 0: return x - if info>0: - raise LinAlgError, "singular matrix" - raise ValueError( - 'illegal value in %d-th argument of internal gesv|posv' % (-info)) + if info > 0: + raise LinAlgError("singular matrix") + raise ValueError('illegal value in %d-th argument of internal gesv|posv' + % -info) -def solve_banded((l,u), ab, b, overwrite_ab=False, overwrite_b=False, +def solve_banded((l, u), ab, b, overwrite_ab=False, overwrite_b=False, debug=False): """Solve the equation a x = b for x, assuming a is banded matrix. @@ -193,32 +105,29 @@ The solution to the system a x = b """ - a1, b1 = map(asarray_chkfinite,(ab,b)) + a1, b1 = map(asarray_chkfinite, (ab, b)) # Validate shapes. if a1.shape[-1] != b1.shape[0]: raise ValueError("shapes of ab and b are not compatible.") - if l+u+1 != a1.shape[0]: + if l + u + 1 != a1.shape[0]: raise ValueError("invalid values for the number of lower and upper diagonals:" " l+u+1 (%d) does not equal ab.shape[0] (%d)" % (l+u+1, ab.shape[0])) overwrite_b = overwrite_b or (b1 is not b and not hasattr(b,'__array__')) - gbsv, = get_lapack_funcs(('gbsv',),(a1,b1)) - a2 = zeros((2*l+u+1,a1.shape[1]), dtype=gbsv.dtype) + gbsv, = get_lapack_funcs(('gbsv',), (a1, b1)) + a2 = zeros((2*l+u+1, a1.shape[1]), dtype=gbsv.dtype) a2[l:,:] = a1 - lu,piv,x,info = gbsv(l,u,a2,b1, - overwrite_ab=1, - overwrite_b=overwrite_b) - if info==0: + lu, piv, x, info = gbsv(l, u, a2, b1, overwrite_ab=True, + overwrite_b=overwrite_b) + if info == 0: return x - if info>0: - raise LinAlgError, "singular matrix" - raise ValueError( - 'illegal value in %d-th argument of internal gbsv' % (-info)) + if info > 0: + raise LinAlgError("singular matrix") + raise ValueError('illegal value in %d-th argument of internal gbsv' % -info) -def solveh_banded(ab, b, overwrite_ab=False, overwrite_b=False, - lower=False): +def solveh_banded(ab, b, overwrite_ab=False, overwrite_b=False, lower=False): """Solve equation a x = b. a is Hermitian positive-definite banded matrix. The matrix a is stored in ab either in lower diagonal or upper @@ -256,81 +165,27 @@ Returns ------- - c : array, shape (M, u+1) - Cholesky factorization of a, in the same banded format as ab x : array, shape (M,) or (M, K) The solution to the system a x = b """ - ab, b = map(asarray_chkfinite,(ab,b)) + ab, b = map(asarray_chkfinite, (ab, b)) # Validate shapes. if ab.shape[-1] != b.shape[0]: raise ValueError("shapes of ab and b are not compatible.") - pbsv, = get_lapack_funcs(('pbsv',),(ab,b)) - c,x,info = pbsv(ab,b, - lower=lower, - overwrite_ab=overwrite_ab, - overwrite_b=overwrite_b) - if info==0: - return c, x - if info>0: - raise LinAlgError, "%d-th leading minor not positive definite" % info - raise ValueError( - 'illegal value in %d-th argument of internal pbsv' % (-info)) + pbsv, = get_lapack_funcs(('pbsv',), (ab, b)) + c, x, info = pbsv(ab, b, lower=lower, overwrite_ab=overwrite_ab, + overwrite_b=overwrite_b) + if info > 0: + raise LinAlgError("%d-th leading minor not positive definite" % info) + if info < 0: + raise ValueError('illegal value in %d-th argument of internal pbsv' + % -info) + return x -def cholesky_banded(ab, overwrite_ab=False, lower=False): - """Cholesky decompose a banded Hermitian positive-definite matrix - The matrix a is stored in ab either in lower diagonal or upper - diagonal ordered form: - - ab[u + i - j, j] == a[i,j] (if upper form; i <= j) - ab[ i - j, j] == a[i,j] (if lower form; i >= j) - - Example of ab (shape of a is (6,6), u=2):: - - upper form: - * * a02 a13 a24 a35 - * a01 a12 a23 a34 a45 - a00 a11 a22 a33 a44 a55 - - lower form: - a00 a11 a22 a33 a44 a55 - a10 a21 a32 a43 a54 * - a20 a31 a42 a53 * * - - Parameters - ---------- - ab : array, shape (u + 1, M) - Banded matrix - overwrite_ab : boolean - Discard data in ab (may enhance performance) - lower : boolean - Is the matrix in the lower form. (Default is upper form) - - Returns - ------- - c : array, shape (u+1, M) - Cholesky factorization of a, in the same banded format as ab - - """ - ab = asarray_chkfinite(ab) - - pbtrf, = get_lapack_funcs(('pbtrf',),(ab,)) - c,info = pbtrf(ab, - lower=lower, - overwrite_ab=overwrite_ab) - - if info==0: - return c - if info>0: - raise LinAlgError, "%d-th leading minor not positive definite" % info - raise ValueError( - 'illegal value in %d-th argument of internal pbtrf' % (-info)) - - # matrix inversion def inv(a, overwrite_a=False): """Compute the inverse of a matrix. @@ -360,7 +215,7 @@ """ a1 = asarray_chkfinite(a) if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]: - raise ValueError, 'expected square matrix' + raise ValueError('expected square matrix') overwrite_a = overwrite_a or (a1 is not a and not hasattr(a,'__array__')) #XXX: I found no advantage or disadvantage of using finv. ## finv, = get_flinalg_funcs(('inv',),(a1,)) @@ -371,21 +226,21 @@ ## if info>0: raise LinAlgError, "singular matrix" ## if info<0: raise ValueError,\ ## 'illegal value in %d-th argument of internal inv.getrf|getri'%(-info) - getrf,getri = get_lapack_funcs(('getrf','getri'),(a1,)) + getrf, getri = get_lapack_funcs(('getrf','getri'), (a1,)) #XXX: C ATLAS versions of getrf/i have rowmajor=1, this could be # exploited for further optimization. But it will be probably # a mess. So, a good testing site is required before trying # to do that. - if getrf.module_name[:7]=='clapack'!=getri.module_name[:7]: + if getrf.module_name[:7] == 'clapack' != getri.module_name[:7]: # ATLAS 3.2.1 has getrf but not getri. - lu,piv,info = getrf(transpose(a1), - rowmajor=0,overwrite_a=overwrite_a) + lu, piv, info = getrf(transpose(a1), rowmajor=0, + overwrite_a=overwrite_a) lu = transpose(lu) else: - lu,piv,info = getrf(a1,overwrite_a=overwrite_a) - if info==0: + lu, piv, info = getrf(a1, overwrite_a=overwrite_a) + if info == 0: if getri.module_name[:7] == 'flapack': - lwork = calc_lwork.getri(getri.prefix,a1.shape[0]) + lwork = calc_lwork.getri(getri.prefix, a1.shape[0]) lwork = lwork[1] # XXX: the following line fixes curious SEGFAULT when # benchmarking 500x500 matrix inverse. This seems to @@ -393,14 +248,15 @@ # minimal (when using lwork[0] instead of lwork[1]) then # all tests pass. Further investigation is required if # more such SEGFAULTs occur. - lwork = int(1.01*lwork) - inv_a,info = getri(lu,piv, - lwork=lwork,overwrite_lu=1) + lwork = int(1.01 * lwork) + inv_a, info = getri(lu, piv, lwork=lwork, overwrite_lu=1) else: # clapack - inv_a,info = getri(lu,piv,overwrite_lu=1) - if info>0: raise LinAlgError, "singular matrix" - if info<0: raise ValueError( - 'illegal value in %d-th argument of internal getrf|getri' % (-info)) + inv_a, info = getri(lu, piv, overwrite_lu=1) + if info > 0: + raise LinAlgError("singular matrix") + if info < 0: + raise ValueError('illegal value in %d-th argument of internal ' + 'getrf|getri' % -info) return inv_a @@ -424,12 +280,13 @@ """ a1 = asarray_chkfinite(a) if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]: - raise ValueError, 'expected square matrix' + raise ValueError('expected square matrix') overwrite_a = overwrite_a or (a1 is not a and not hasattr(a,'__array__')) - fdet, = get_flinalg_funcs(('det',),(a1,)) - a_det,info = fdet(a1,overwrite_a=overwrite_a) - if info<0: raise ValueError( - 'illegal value in %d-th argument of internal det.getrf' % (-info)) + fdet, = get_flinalg_funcs(('det',), (a1,)) + a_det, info = fdet(a1, overwrite_a=overwrite_a) + if info < 0: + raise ValueError('illegal value in %d-th argument of internal ' + 'det.getrf' % -info) return a_det ### Linear Least Squares @@ -468,41 +325,47 @@ Raises LinAlgError if computation does not converge """ - a1, b1 = map(asarray_chkfinite,(a,b)) + a1, b1 = map(asarray_chkfinite, (a, b)) if len(a1.shape) != 2: raise ValueError, 'expected matrix' - m,n = a1.shape - if len(b1.shape)==2: nrhs = b1.shape[1] - else: nrhs = 1 + m, n = a1.shape + if len(b1.shape) == 2: + nrhs = b1.shape[1] + else: + nrhs = 1 if m != b1.shape[0]: - raise ValueError, 'incompatible dimensions' - gelss, = get_lapack_funcs(('gelss',),(a1,b1)) - if n>m: + raise ValueError('incompatible dimensions') + gelss, = get_lapack_funcs(('gelss',), (a1, b1)) + if n > m: # need to extend b matrix as it will be filled with # a larger solution matrix - b2 = zeros((n,nrhs), dtype=gelss.dtype) - if len(b1.shape)==2: b2[:m,:] = b1 - else: b2[:m,0] = b1 + b2 = zeros((n, nrhs), dtype=gelss.dtype) + if len(b1.shape) == 2: + b2[:m,:] = b1 + else: + b2[:m,0] = b1 b1 = b2 overwrite_a = overwrite_a or (a1 is not a and not hasattr(a,'__array__')) overwrite_b = overwrite_b or (b1 is not b and not hasattr(b,'__array__')) if gelss.module_name[:7] == 'flapack': - lwork = calc_lwork.gelss(gelss.prefix,m,n,nrhs)[1] - v,x,s,rank,info = gelss(a1,b1,cond = cond, - lwork = lwork, - overwrite_a = overwrite_a, - overwrite_b = overwrite_b) + lwork = calc_lwork.gelss(gelss.prefix, m, n, nrhs)[1] + v, x, s, rank, info = gelss(a1, b1, cond=cond, lwork=lwork, + overwrite_a=overwrite_a, + overwrite_b=overwrite_b) else: - raise NotImplementedError,'calling gelss from %s' % (gelss.module_name) - if info>0: raise LinAlgError, "SVD did not converge in Linear Least Squares" - if info<0: raise ValueError( - 'illegal value in %d-th argument of internal gelss' % (-info)) + raise NotImplementedError('calling gelss from %s' % gelss.module_name) + if info > 0: + raise LinAlgError("SVD did not converge in Linear Least Squares") + if info < 0: + raise ValueError('illegal value in %d-th argument of internal gelss' + % -info) resids = asarray([], dtype=x.dtype) - if n cutoff: psigma[i,i] = 1.0/conjugate(s[i]) Modified: trunk/scipy/linalg/decomp.py =================================================================== --- trunk/scipy/linalg/decomp.py 2010-04-09 17:57:46 UTC (rev 6318) +++ trunk/scipy/linalg/decomp.py 2010-04-10 01:47:20 UTC (rev 6319) @@ -1,5 +1,3 @@ -## Automatically adapted for scipy Oct 18, 2005 by - # # Author: Pearu Peterson, March 2002 # @@ -9,83 +7,70 @@ # additions by Bart Vandereycken, June 2006 # additions by Andrew D Straw, May 2007 # additions by Tiziano Zito, November 2008 - +# +# April 2010: Functions for LU, QR, SVD, Schur and Cholesky decompositions were +# moved to their own files. Still in this file are functions for eigenstuff +# and for the Hessenberg form. + __all__ = ['eig','eigh','eig_banded','eigvals','eigvalsh', 'eigvals_banded', - 'lu','svd','svdvals','diagsvd','cholesky','qr','qr_old','rq', - 'schur','rsf2csf','lu_factor','cho_factor','cho_solve','orth', 'hessenberg'] -from misc import LinAlgError -import misc -import special_matrices - -from warnings import warn -from lapack import get_lapack_funcs, find_best_lapack_type -from blas import get_blas_funcs -from flinalg import get_flinalg_funcs -from scipy.linalg import calc_lwork import numpy from numpy import array, asarray_chkfinite, asarray, diag, zeros, ones, \ - single, isfinite, inexact, complexfloating, nonzero, iscomplexobj + isfinite, inexact, nonzero, iscomplexobj, cast -cast = numpy.cast -r_ = numpy.r_ +# Local imports +from scipy.linalg import calc_lwork +from misc import LinAlgError, _datanotshared +from lapack import get_lapack_funcs +from blas import get_blas_funcs + _I = cast['F'](1j) -def _make_complex_eigvecs(w,vin,cmplx_tcode): - v = numpy.array(vin,dtype=cmplx_tcode) + +def _make_complex_eigvecs(w, vin, cmplx_tcode): + v = numpy.array(vin, dtype=cmplx_tcode) #ind = numpy.flatnonzero(numpy.not_equal(w.imag,0.0)) - ind = numpy.flatnonzero(numpy.logical_and(numpy.not_equal(w.imag,0.0), + ind = numpy.flatnonzero(numpy.logical_and(numpy.not_equal(w.imag, 0.0), numpy.isfinite(w))) - vnew = numpy.zeros((v.shape[0],len(ind)>>1),cmplx_tcode) - vnew.real = numpy.take(vin,ind[::2],1) - vnew.imag = numpy.take(vin,ind[1::2],1) + vnew = numpy.zeros((v.shape[0], len(ind)>>1), cmplx_tcode) + vnew.real = numpy.take(vin, ind[::2],1) + vnew.imag = numpy.take(vin, ind[1::2],1) count = 0 conj = numpy.conjugate for i in range(len(ind)/2): - v[:,ind[2*i]] = vnew[:,count] - v[:,ind[2*i+1]] = conj(vnew[:,count]) + v[:, ind[2*i]] = vnew[:, count] + v[:, ind[2*i+1]] = conj(vnew[:, count]) count += 1 return v - - -def _datanotshared(a1,a): - if a1 is a: - return False - else: - #try comparing data pointers - try: - return a1.__array_interface__['data'][0] != a.__array_interface__['data'][0] - except: - return True - - -def _geneig(a1,b,left,right,overwrite_a,overwrite_b): +def _geneig(a1, b, left, right, overwrite_a, overwrite_b): b1 = asarray(b) - overwrite_b = overwrite_b or _datanotshared(b1,b) + overwrite_b = overwrite_b or _datanotshared(b1, b) if len(b1.shape) != 2 or b1.shape[0] != b1.shape[1]: - raise ValueError, 'expected square matrix' - ggev, = get_lapack_funcs(('ggev',),(a1,b1)) - cvl,cvr = left,right + raise ValueError('expected square matrix') + ggev, = get_lapack_funcs(('ggev',), (a1, b1)) + cvl, cvr = left, right if ggev.module_name[:7] == 'clapack': - raise NotImplementedError,'calling ggev from %s' % (ggev.module_name) - res = ggev(a1,b1,lwork=-1) + raise NotImplementedError('calling ggev from %s' % ggev.module_name) + res = ggev(a1, b1, lwork=-1) lwork = res[-2][0] if ggev.prefix in 'cz': - alpha,beta,vl,vr,work,info = ggev(a1,b1,cvl,cvr,lwork, - overwrite_a,overwrite_b) + alpha, beta, vl, vr, work, info = ggev(a1, b1, cvl, cvr, lwork, + overwrite_a, overwrite_b) w = alpha / beta else: - alphar,alphai,beta,vl,vr,work,info = ggev(a1,b1,cvl,cvr,lwork, - overwrite_a,overwrite_b) - w = (alphar+_I*alphai)/beta - if info<0: raise ValueError( - 'illegal value in %d-th argument of internal ggev' % (-info)) - if info>0: raise LinAlgError( - "generalized eig algorithm did not converge (info=%d)" % (info)) + alphar, alphai, beta, vl, vr, work, info = ggev(a1, b1, cvl, cvr, lwork, + overwrite_a,overwrite_b) + w = (alphar + _I * alphai) / beta + if info < 0: + raise ValueError('illegal value in %d-th argument of internal ggev' + % -info) + if info > 0: + raise LinAlgError("generalized eig algorithm did not converge (info=%d)" + % info) - only_real = numpy.logical_and.reduce(numpy.equal(w.imag,0.0)) + only_real = numpy.logical_and.reduce(numpy.equal(w.imag, 0.0)) if not (ggev.prefix in 'cz' or only_real): t = w.dtype.char if left: @@ -100,7 +85,7 @@ return w, vl return w, vr -def eig(a,b=None, left=False, right=True, overwrite_a=False, overwrite_b=False): +def eig(a, b=None, left=False, right=True, overwrite_a=False, overwrite_b=False): """Solve an ordinary or generalized eigenvalue problem of a square matrix. Find eigenvalues w and right or left eigenvectors of a general matrix:: @@ -153,49 +138,50 @@ a1 = asarray_chkfinite(a) if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]: raise ValueError('expected square matrix') - overwrite_a = overwrite_a or (_datanotshared(a1,a)) + overwrite_a = overwrite_a or (_datanotshared(a1, a)) if b is not None: b = asarray_chkfinite(b) if b.shape != a1.shape: raise ValueError('a and b must have the same shape') - return _geneig(a1,b,left,right,overwrite_a,overwrite_b) - geev, = get_lapack_funcs(('geev',),(a1,)) - compute_vl,compute_vr=left,right + return _geneig(a1, b, left, right, overwrite_a, overwrite_b) + geev, = get_lapack_funcs(('geev',), (a1,)) + compute_vl, compute_vr = left, right if geev.module_name[:7] == 'flapack': - lwork = calc_lwork.geev(geev.prefix,a1.shape[0], - compute_vl,compute_vr)[1] + lwork = calc_lwork.geev(geev.prefix, a1.shape[0], + compute_vl, compute_vr)[1] if geev.prefix in 'cz': - w,vl,vr,info = geev(a1,lwork = lwork, - compute_vl=compute_vl, - compute_vr=compute_vr, - overwrite_a=overwrite_a) + w, vl, vr, info = geev(a1, lwork=lwork, + compute_vl=compute_vl, + compute_vr=compute_vr, + overwrite_a=overwrite_a) else: - wr,wi,vl,vr,info = geev(a1,lwork = lwork, - compute_vl=compute_vl, - compute_vr=compute_vr, - overwrite_a=overwrite_a) + wr, wi, vl, vr, info = geev(a1, lwork=lwork, + compute_vl=compute_vl, + compute_vr=compute_vr, + overwrite_a=overwrite_a) t = {'f':'F','d':'D'}[wr.dtype.char] - w = wr+_I*wi + w = wr + _I * wi else: # 'clapack' if geev.prefix in 'cz': - w,vl,vr,info = geev(a1, - compute_vl=compute_vl, - compute_vr=compute_vr, - overwrite_a=overwrite_a) - else: - wr,wi,vl,vr,info = geev(a1, + w, vl, vr, info = geev(a1, compute_vl=compute_vl, compute_vr=compute_vr, overwrite_a=overwrite_a) + else: + wr, wi, vl, vr, info = geev(a1, + compute_vl=compute_vl, + compute_vr=compute_vr, + overwrite_a=overwrite_a) t = {'f':'F','d':'D'}[wr.dtype.char] - w = wr+_I*wi - if info<0: raise ValueError( - 'illegal value in %d-th argument of internal geev' % (-info)) - if info>0: raise LinAlgError( - "eig algorithm did not converge (only eigenvalues "\ - "with order >=%d have converged)" % (info)) + w = wr + _I * wi + if info < 0: + raise ValueError('illegal value in %d-th argument of internal geev' + % -info) + if info > 0: + raise LinAlgError("eig algorithm did not converge (only eigenvalues " + "with order >= %d have converged)" % info) - only_real = numpy.logical_and.reduce(numpy.equal(w.imag,0.0)) + only_real = numpy.logical_and.reduce(numpy.equal(w.imag, 0.0)) if not (geev.prefix in 'cz' or only_real): t = w.dtype.char if left: @@ -281,17 +267,17 @@ """ a1 = asarray_chkfinite(a) if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]: - raise ValueError, 'expected square matrix' - overwrite_a = overwrite_a or (_datanotshared(a1,a)) + raise ValueError('expected square matrix') + overwrite_a = overwrite_a or (_datanotshared(a1, a)) if iscomplexobj(a1): cplx = True else: cplx = False if b is not None: b1 = asarray_chkfinite(b) - overwrite_b = overwrite_b or _datanotshared(b1,b) + overwrite_b = overwrite_b or _datanotshared(b1, b) if len(b1.shape) != 2 or b1.shape[0] != b1.shape[1]: - raise ValueError, 'expected square matrix' + raise ValueError('expected square matrix') if b1.shape != a1.shape: raise ValueError("wrong b dimensions %s, should " @@ -401,7 +387,7 @@ def eig_banded(a_band, lower=False, eigvals_only=False, overwrite_a_band=False, select='a', select_range=None, max_ev = 0): - """Solve real symmetric or complex hermetian band matrix eigenvalue problem. + """Solve real symmetric or complex hermitian band matrix eigenvalue problem. Find eigenvalues w and optionally right eigenvectors v of a:: @@ -472,34 +458,34 @@ """ if eigvals_only or overwrite_a_band: a1 = asarray_chkfinite(a_band) - overwrite_a_band = overwrite_a_band or (_datanotshared(a1,a_band)) + overwrite_a_band = overwrite_a_band or (_datanotshared(a1, a_band)) else: a1 = array(a_band) if issubclass(a1.dtype.type, inexact) and not isfinite(a1).all(): - raise ValueError, "array must not contain infs or NaNs" + raise ValueError("array must not contain infs or NaNs") overwrite_a_band = 1 if len(a1.shape) != 2: - raise ValueError, 'expected two-dimensional array' + raise ValueError('expected two-dimensional array') if select.lower() not in [0, 1, 2, 'a', 'v', 'i', 'all', 'value', 'index']: - raise ValueError, 'invalid argument for select' + raise ValueError('invalid argument for select') if select.lower() in [0, 'a', 'all']: if a1.dtype.char in 'GFD': - bevd, = get_lapack_funcs(('hbevd',),(a1,)) + bevd, = get_lapack_funcs(('hbevd',), (a1,)) # FIXME: implement this somewhen, for now go with builtin values # FIXME: calc optimal lwork by calling ?hbevd(lwork=-1) # or by using calc_lwork.f ??? # lwork = calc_lwork.hbevd(bevd.prefix, a1.shape[0], lower) internal_name = 'hbevd' else: # a1.dtype.char in 'fd': - bevd, = get_lapack_funcs(('sbevd',),(a1,)) + bevd, = get_lapack_funcs(('sbevd',), (a1,)) # FIXME: implement this somewhen, for now go with builtin values # see above # lwork = calc_lwork.sbevd(bevd.prefix, a1.shape[0], lower) internal_name = 'sbevd' - w,v,info = bevd(a1, compute_v = not eigvals_only, - lower = lower, - overwrite_ab = overwrite_a_band) + w,v,info = bevd(a1, compute_v=not eigvals_only, + lower=lower, + overwrite_ab=overwrite_a_band) if select.lower() in [1, 2, 'i', 'v', 'index', 'value']: # calculate certain range only if select.lower() in [2, 'i', 'index']: @@ -517,31 +503,33 @@ max_ev = 1 # calculate optimal abstol for dsbevx (see manpage) if a1.dtype.char in 'fF': # single precision - lamch, = get_lapack_funcs(('lamch',),(array(0, dtype='f'),)) + lamch, = get_lapack_funcs(('lamch',), (array(0, dtype='f'),)) else: - lamch, = get_lapack_funcs(('lamch',),(array(0, dtype='d'),)) + lamch, = get_lapack_funcs(('lamch',), (array(0, dtype='d'),)) abstol = 2 * lamch('s') if a1.dtype.char in 'GFD': - bevx, = get_lapack_funcs(('hbevx',),(a1,)) + bevx, = get_lapack_funcs(('hbevx',), (a1,)) internal_name = 'hbevx' else: # a1.dtype.char in 'gfd' - bevx, = get_lapack_funcs(('sbevx',),(a1,)) + bevx, = get_lapack_funcs(('sbevx',), (a1,)) internal_name = 'sbevx' # il+1, iu+1: translate python indexing (0 ... N-1) into Fortran # indexing (1 ... N) w, v, m, ifail, info = bevx(a1, vl, vu, il+1, iu+1, - compute_v = not eigvals_only, - mmax = max_ev, - range = select, lower = lower, - overwrite_ab = overwrite_a_band, + compute_v=not eigvals_only, + mmax=max_ev, + range=select, lower=lower, + overwrite_ab=overwrite_a_band, abstol=abstol) # crop off w and v w = w[:m] if not eigvals_only: v = v[:, :m] - if info<0: raise ValueError( - 'illegal value in %d-th argument of internal %s'%(-info, internal_name)) - if info>0: raise LinAlgError,"eig algorithm did not converge" + if info < 0: + raise ValueError('illegal value in %d-th argument of internal %s' + % (-info, internal_name)) + if info > 0: + raise LinAlgError("eig algorithm did not converge") if eigvals_only: return w @@ -580,7 +568,7 @@ eigh : eigenvalues and eigenvectors of symmetric/Hermitean arrays. """ - return eig(a,b=b,left=0,right=0,overwrite_a=overwrite_a) + return eig(a, b=b, left=0, right=0, overwrite_a=overwrite_a) def eigvalsh(a, b=None, lower=True, overwrite_a=False, overwrite_b=False, turbo=True, eigvals=None, type=1): @@ -710,837 +698,13 @@ eig : eigenvalues and right eigenvectors for non-symmetric arrays """ - return eig_banded(a_band,lower=lower,eigvals_only=1, + return eig_banded(a_band, lower=lower, eigvals_only=1, overwrite_a_band=overwrite_a_band, select=select, select_range=select_range) -def lu_factor(a, overwrite_a=False): - """Compute pivoted LU decomposition of a matrix. - - The decomposition is:: - - A = P L U - - where P is a permutation matrix, L lower triangular with unit - diagonal elements, and U upper triangular. - - Parameters - ---------- - a : array, shape (M, M) - Matrix to decompose - overwrite_a : boolean - Whether to overwrite data in A (may increase performance) - - Returns - ------- - lu : array, shape (N, N) - Matrix containing U in its upper triangle, and L in its lower triangle. - The unit diagonal elements of L are not stored. - piv : array, shape (N,) - Pivot indices representing the permutation matrix P: - row i of matrix was interchanged with row piv[i]. - - See also - -------- - lu_solve : solve an equation system using the LU factorization of a matrix - - Notes - ----- - This is a wrapper to the *GETRF routines from LAPACK. - - """ - a1 = asarray(a) - if len(a1.shape) != 2 or (a1.shape[0] != a1.shape[1]): - raise ValueError, 'expected square matrix' - overwrite_a = overwrite_a or (_datanotshared(a1,a)) - getrf, = get_lapack_funcs(('getrf',),(a1,)) - lu, piv, info = getrf(a,overwrite_a=overwrite_a) - if info<0: raise ValueError( - 'illegal value in %d-th argument of internal getrf (lu_factor)' % (-info)) - if info>0: warn("Diagonal number %d is exactly zero. Singular matrix." % info, - RuntimeWarning) - return lu, piv - -def lu_solve(a_lu_pivots, b): - """Solve an equation system, a x = b, given the LU factorization of a - - Parameters - ---------- - (lu, piv) - Factorization of the coefficient matrix a, as given by lu_factor - b : array - Right-hand side - - Returns - ------- - x : array - Solution to the system - - See also - -------- - lu_factor : LU factorize a matrix - - """ - a_lu, pivots = a_lu_pivots - a_lu = asarray_chkfinite(a_lu) - pivots = asarray_chkfinite(pivots) - b = asarray_chkfinite(b) - _assert_squareness(a_lu) - - getrs, = get_lapack_funcs(('getrs',),(a_lu,)) - b, info = getrs(a_lu,pivots,b) - if info < 0: - msg = "Argument %d to lapack's ?getrs() has an illegal value." % info - raise TypeError, msg - if info > 0: - msg = "Unknown error occured int ?getrs(): error code = %d" % info - raise TypeError, msg - return b - - -def lu(a, permute_l=False, overwrite_a=False): - """Compute pivoted LU decompostion of a matrix. - - The decomposition is:: - - A = P L U - - where P is a permutation matrix, L lower triangular with unit - diagonal elements, and U upper triangular. - - Parameters - ---------- - a : array, shape (M, N) - Array to decompose - permute_l : boolean - Perform the multiplication P*L (Default: do not permute) - overwrite_a : boolean - Whether to overwrite data in a (may improve performance) - - Returns - ------- - (If permute_l == False) - p : array, shape (M, M) - Permutation matrix - l : array, shape (M, K) - Lower triangular or trapezoidal matrix with unit diagonal. - K = min(M, N) - u : array, shape (K, N) - Upper triangular or trapezoidal matrix - - (If permute_l == True) - pl : array, shape (M, K) - Permuted L matrix. - K = min(M, N) - u : array, shape (K, N) - Upper triangular or trapezoidal matrix - - Notes - ----- - This is a LU factorization routine written for Scipy. - - """ - a1 = asarray_chkfinite(a) - if len(a1.shape) != 2: - raise ValueError, 'expected matrix' - overwrite_a = overwrite_a or (_datanotshared(a1,a)) - flu, = get_flinalg_funcs(('lu',),(a1,)) - p,l,u,info = flu(a1,permute_l=permute_l,overwrite_a = overwrite_a) - if info<0: raise ValueError( - 'illegal value in %d-th argument of internal lu.getrf' % (-info)) - if permute_l: - return l,u - return p,l,u - -def svd(a, full_matrices=True, compute_uv=True, overwrite_a=False): - """Singular Value Decomposition. - - Factorizes the matrix a into two unitary matrices U and Vh and - an 1d-array s of singular values (real, non-negative) such that - a == U S Vh if S is an suitably shaped matrix of zeros whose - main diagonal is s. - - Parameters - ---------- - a : array, shape (M, N) - Matrix to decompose - full_matrices : boolean - If true, U, Vh are shaped (M,M), (N,N) - If false, the shapes are (M,K), (K,N) where K = min(M,N) - compute_uv : boolean - Whether to compute also U, Vh in addition to s (Default: true) - overwrite_a : boolean - Whether data in a is overwritten (may improve performance) - - Returns - ------- - U: array, shape (M,M) or (M,K) depending on full_matrices - s: array, shape (K,) - The singular values, sorted so that s[i] >= s[i+1]. K = min(M, N) - Vh: array, shape (N,N) or (K,N) depending on full_matrices - - For compute_uv = False, only s is returned. - - Raises LinAlgError if SVD computation does not converge - - Examples - -------- - >>> from scipy import random, linalg, allclose, dot - >>> a = random.randn(9, 6) + 1j*random.randn(9, 6) - >>> U, s, Vh = linalg.svd(a) - >>> U.shape, Vh.shape, s.shape - ((9, 9), (6, 6), (6,)) - - >>> U, s, Vh = linalg.svd(a, full_matrices=False) - >>> U.shape, Vh.shape, s.shape - ((9, 6), (6, 6), (6,)) - >>> S = linalg.diagsvd(s, 6, 6) - >>> allclose(a, dot(U, dot(S, Vh))) - True - - >>> s2 = linalg.svd(a, compute_uv=False) - >>> allclose(s, s2) - True - - See also - -------- - svdvals : return singular values of a matrix - diagsvd : return the Sigma matrix, given the vector s - - """ - # A hack until full_matrices == 0 support is fixed here. - if full_matrices == 0: - import numpy.linalg - return numpy.linalg.svd(a, full_matrices=0, compute_uv=compute_uv) - a1 = asarray_chkfinite(a) - if len(a1.shape) != 2: - raise ValueError, 'expected matrix' - m,n = a1.shape - overwrite_a = overwrite_a or (_datanotshared(a1,a)) - gesdd, = get_lapack_funcs(('gesdd',),(a1,)) - if gesdd.module_name[:7] == 'flapack': - lwork = calc_lwork.gesdd(gesdd.prefix,m,n,compute_uv)[1] - u,s,v,info = gesdd(a1,compute_uv = compute_uv, lwork = lwork, - overwrite_a = overwrite_a) - else: # 'clapack' - raise NotImplementedError,'calling gesdd from %s' % (gesdd.module_name) - if info>0: raise LinAlgError, "SVD did not converge" - if info<0: raise ValueError( - 'illegal value in %d-th argument of internal gesdd' % (-info)) - if compute_uv: - return u,s,v - else: - return s - -def svdvals(a, overwrite_a=False): - """Compute singular values of a matrix. - - Parameters - ---------- - a : array, shape (M, N) - Matrix to decompose - overwrite_a : boolean - Whether data in a is overwritten (may improve performance) - - Returns - ------- - s: array, shape (K,) - The singular values, sorted so that s[i] >= s[i+1]. K = min(M, N) - - Raises LinAlgError if SVD computation does not converge - - See also - -------- - svd : return the full singular value decomposition of a matrix - diagsvd : return the Sigma matrix, given the vector s - - """ - return svd(a,compute_uv=0,overwrite_a=overwrite_a) - -def diagsvd(s, M, N): - """Construct the sigma matrix in SVD from singular values and size M,N. - - Parameters - ---------- - s : array, shape (M,) or (N,) - Singular values - M : integer - N : integer - Size of the matrix whose singular values are s - - Returns - ------- - S : array, shape (M, N) - The S-matrix in the singular value decomposition - - """ - part = diag(s) - typ = part.dtype.char - MorN = len(s) - if MorN == M: - return r_['-1',part,zeros((M,N-M),typ)] - elif MorN == N: - return r_[part,zeros((M-N,N),typ)] - else: - raise ValueError, "Length of s must be M or N." - -def cholesky(a, lower=False, overwrite_a=False): - """Compute the Cholesky decomposition of a matrix. - - Returns the Cholesky decomposition, :lm:`A = L L^*` or :lm:`A = U^* U` - of a Hermitian positive-definite matrix :lm:`A`. - - Parameters - ---------- - a : array, shape (M, M) - Matrix to be decomposed - lower : boolean - Whether to compute the upper or lower triangular Cholesky factorization - (Default: upper-triangular) - overwrite_a : boolean - Whether to overwrite data in a (may improve performance) - - Returns - ------- - B : array, shape (M, M) - Upper- or lower-triangular Cholesky factor of A - - Raises LinAlgError if decomposition fails - - Examples - -------- - >>> from scipy import array, linalg, dot - >>> a = array([[1,-2j],[2j,5]]) - >>> L = linalg.cholesky(a, lower=True) - >>> L - array([[ 1.+0.j, 0.+0.j], - [ 0.+2.j, 1.+0.j]]) - >>> dot(L, L.T.conj()) - array([[ 1.+0.j, 0.-2.j], - [ 0.+2.j, 5.+0.j]]) - - """ - a1 = asarray_chkfinite(a) - if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]: - raise ValueError, 'expected square matrix' - overwrite_a = overwrite_a or _datanotshared(a1,a) - potrf, = get_lapack_funcs(('potrf',),(a1,)) - c,info = potrf(a1,lower=lower,overwrite_a=overwrite_a,clean=1) - if info>0: raise LinAlgError( - "matrix not positive definite (leading minor of order %d"\ - "is not positive definite)" % (info-1)) - if info<0: raise ValueError( - 'illegal value in %d-th argument of internal potrf' % (-info)) - return c - -def cho_factor(a, lower=False, overwrite_a=False): - """Compute the Cholesky decomposition of a matrix, to use in cho_solve - - Returns a matrix containing the Cholesky decomposition, - ``A = L L*`` or ``A = U* U`` of a Hermitian positive-definite matrix `a`. - The return value can be directly used as the first parameter to cho_solve. - - .. warning:: - The returned matrix also contains random data in the entries not - used by the Cholesky decomposition. If you need to zero these - entries, use the function `cholesky` instead. - - Parameters - ---------- - a : array, shape (M, M) - Matrix to be decomposed - lower : boolean - Whether to compute the upper or lower triangular Cholesky factorization - (Default: upper-triangular) - overwrite_a : boolean - Whether to overwrite data in a (may improve performance) - - Returns - ------- - c : array, shape (M, M) - Matrix whose upper or lower triangle contains the Cholesky factor - of `a`. Other parts of the matrix contain random data. - lower : boolean - Flag indicating whether the factor is in the lower or upper triangle - - Raises - ------ - LinAlgError - Raised if decomposition fails. - - """ - a1 = asarray_chkfinite(a) - if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]: - raise ValueError, 'expected square matrix' - overwrite_a = overwrite_a or (_datanotshared(a1,a)) - potrf, = get_lapack_funcs(('potrf',),(a1,)) - c,info = potrf(a1,lower=lower,overwrite_a=overwrite_a,clean=0) - if info>0: raise LinAlgError( - "matrix not positive definite (leading minor of order %d"\ - "is not positive definite)"%(info-1)) - if info<0: raise ValueError( - 'illegal value in %d-th argument of internal potrf' % (-info)) - return c, lower - -def cho_solve(clow, b): - """Solve a previously factored symmetric system of equations. - - The equation system is - - A x = b, A = U^H U = L L^H - - and A is real symmetric or complex Hermitian. - - Parameters - ---------- - clow : tuple (c, lower) - Cholesky factor and a flag indicating whether it is lower triangular. - The return value from cho_factor can be used. - b : array - Right-hand side of the equation system - - First input is a tuple (LorU, lower) which is the output to cho_factor. - Second input is the right-hand side. - - Returns - ------- - x : array - Solution to the equation system - - """ - c, lower = clow - c = asarray_chkfinite(c) - _assert_squareness(c) - b = asarray_chkfinite(b) - potrs, = get_lapack_funcs(('potrs',),(c,)) - b, info = potrs(c,b,lower) - if info < 0: - msg = "Argument %d to lapack's ?potrs() has an illegal value." % info - raise TypeError, msg - if info > 0: - msg = "Unknown error occured int ?potrs(): error code = %d" % info - raise TypeError, msg - return b - -def qr(a, overwrite_a=False, lwork=None, econ=None, mode='qr'): - """Compute QR decomposition of a matrix. - - Calculate the decomposition :lm:`A = Q R` where Q is unitary/orthogonal - and R upper triangular. - - Parameters - ---------- - a : array, shape (M, N) - Matrix to be decomposed - overwrite_a : boolean - Whether data in a is overwritten (may improve performance) - lwork : integer - Work array size, lwork >= a.shape[1]. If None or -1, an optimal size - is computed. - econ : boolean - Whether to compute the economy-size QR decomposition, making shapes - of Q and R (M, K) and (K, N) instead of (M,M) and (M,N). K=min(M,N). - Default is False. - mode : {'qr', 'r'} - Determines what information is to be returned: either both Q and R - or only R. - - Returns - ------- - (if mode == 'qr') - Q : double or complex array, shape (M, M) or (M, K) for econ==True - - (for any mode) - R : double or complex array, shape (M, N) or (K, N) for econ==True - Size K = min(M, N) - - Raises LinAlgError if decomposition fails - - Notes - ----- - This is an interface to the LAPACK routines dgeqrf, zgeqrf, - dorgqr, and zungqr. - - Examples - -------- - >>> from scipy import random, linalg, dot - >>> a = random.randn(9, 6) - >>> q, r = linalg.qr(a) - >>> allclose(a, dot(q, r)) - True - >>> q.shape, r.shape - ((9, 9), (9, 6)) - - >>> r2 = linalg.qr(a, mode='r') - >>> allclose(r, r2) - - >>> q3, r3 = linalg.qr(a, econ=True) - >>> q3.shape, r3.shape - ((9, 6), (6, 6)) - - """ - if econ is None: - econ = False - else: - warn("qr econ argument will be removed after scipy 0.7. " - "The economy transform will then be available through " - "the mode='economic' argument.", DeprecationWarning) - - a1 = asarray_chkfinite(a) - if len(a1.shape) != 2: - raise ValueError("expected 2D array") - M, N = a1.shape - overwrite_a = overwrite_a or (_datanotshared(a1,a)) - - geqrf, = get_lapack_funcs(('geqrf',),(a1,)) - if lwork is None or lwork == -1: - # get optimal work array - qr,tau,work,info = geqrf(a1,lwork=-1,overwrite_a=1) - lwork = work[0] - - qr,tau,work,info = geqrf(a1,lwork=lwork,overwrite_a=overwrite_a) - if info<0: - raise ValueError("illegal value in %d-th argument of internal geqrf" - % -info) - - if not econ or M= a.shape[1]. If None or -1, an optimal size - is computed. - - Returns - ------- - Q : double or complex array, shape (M, M) - R : double or complex array, shape (M, N) - Size K = min(M, N) - - Raises LinAlgError if decomposition fails - - """ - a1 = asarray_chkfinite(a) - if len(a1.shape) != 2: - raise ValueError, 'expected matrix' - M,N = a1.shape - overwrite_a = overwrite_a or (_datanotshared(a1,a)) - geqrf, = get_lapack_funcs(('geqrf',),(a1,)) - if lwork is None or lwork == -1: - # get optimal work array - qr,tau,work,info = geqrf(a1,lwork=-1,overwrite_a=1) - lwork = work[0] - qr,tau,work,info = geqrf(a1,lwork=lwork,overwrite_a=overwrite_a) - if info<0: raise ValueError( - 'illegal value in %d-th argument of internal geqrf' % (-info)) - gemm, = get_blas_funcs(('gemm',),(qr,)) - t = qr.dtype.char - R = special_matrices.triu(qr) - Q = numpy.identity(M,dtype=t) - ident = numpy.identity(M,dtype=t) - zeros = numpy.zeros - for i in range(min(M,N)): - v = zeros((M,),t) - v[i] = 1 - v[i+1:M] = qr[i+1:M,i] - H = gemm(-tau[i],v,v,1+0j,ident,trans_b=2) - Q = gemm(1,Q,H) - return Q, R - - - -def rq(a, overwrite_a=False, lwork=None): - """Compute RQ decomposition of a square real matrix. - - Calculate the decomposition :lm:`A = R Q` where Q is unitary/orthogonal - and R upper triangular. - - Parameters - ---------- - a : array, shape (M, M) - Square real matrix to be decomposed - overwrite_a : boolean - Whether data in a is overwritten (may improve performance) - lwork : integer - Work array size, lwork >= a.shape[1]. If None or -1, an optimal size - is computed. - econ : boolean - - Returns - ------- - R : double array, shape (M, N) or (K, N) for econ==True - Size K = min(M, N) - Q : double or complex array, shape (M, M) or (M, K) for econ==True - - Raises LinAlgError if decomposition fails - - """ - # TODO: implement support for non-square and complex arrays - a1 = asarray_chkfinite(a) - if len(a1.shape) != 2: - raise ValueError, 'expected matrix' - M,N = a1.shape - if M != N: - raise ValueError, 'expected square matrix' - if issubclass(a1.dtype.type,complexfloating): - raise ValueError, 'expected real (non-complex) matrix' - overwrite_a = overwrite_a or (_datanotshared(a1,a)) - gerqf, = get_lapack_funcs(('gerqf',),(a1,)) - if lwork is None or lwork == -1: - # get optimal work array - rq,tau,work,info = gerqf(a1,lwork=-1,overwrite_a=1) - lwork = work[0] - rq,tau,work,info = gerqf(a1,lwork=lwork,overwrite_a=overwrite_a) - if info<0: raise ValueError( - 'illegal value in %d-th argument of internal geqrf' % (-info)) - gemm, = get_blas_funcs(('gemm',),(rq,)) - t = rq.dtype.char - R = special_matrices.triu(rq) - Q = numpy.identity(M,dtype=t) - ident = numpy.identity(M,dtype=t) - zeros = numpy.zeros - - k = min(M,N) - for i in range(k): - v = zeros((M,),t) - v[N-k+i] = 1 - v[0:N-k+i] = rq[M-k+i,0:N-k+i] - H = gemm(-tau[i],v,v,1+0j,ident,trans_b=2) - Q = gemm(1,Q,H) - return R, Q - _double_precision = ['i','l','d'] -def schur(a, output='real', lwork=None, overwrite_a=False): - """Compute Schur decomposition of a matrix. - The Schur decomposition is - - A = Z T Z^H - - where Z is unitary and T is either upper-triangular, or for real - Schur decomposition (output='real'), quasi-upper triangular. In - the quasi-triangular form, 2x2 blocks describing complex-valued - eigenvalue pairs may extrude from the diagonal. - - Parameters - ---------- - a : array, shape (M, M) - Matrix to decompose - output : {'real', 'complex'} - Construct the real or complex Schur decomposition (for real matrices). - lwork : integer - Work array size. If None or -1, it is automatically computed. - overwrite_a : boolean - Whether to overwrite data in a (may improve performance) - - Returns - ------- - T : array, shape (M, M) - Schur form of A. It is real-valued for the real Schur decomposition. - Z : array, shape (M, M) - An unitary Schur transformation matrix for A. - It is real-valued for the real Schur decomposition. - - See also - -------- - rsf2csf : Convert real Schur form to complex Schur form - - """ - if not output in ['real','complex','r','c']: - raise ValueError, "argument must be 'real', or 'complex'" - a1 = asarray_chkfinite(a) - if len(a1.shape) != 2 or (a1.shape[0] != a1.shape[1]): - raise ValueError, 'expected square matrix' - typ = a1.dtype.char - if output in ['complex','c'] and typ not in ['F','D']: - if typ in _double_precision: - a1 = a1.astype('D') - typ = 'D' - else: - a1 = a1.astype('F') - typ = 'F' - overwrite_a = overwrite_a or (_datanotshared(a1,a)) - gees, = get_lapack_funcs(('gees',),(a1,)) - if lwork is None or lwork == -1: - # get optimal work array - result = gees(lambda x: None,a,lwork=-1) - lwork = result[-2][0] - result = gees(lambda x: None,a,lwork=result[-2][0],overwrite_a=overwrite_a) - info = result[-1] - if info<0: raise ValueError( - 'illegal value in %d-th argument of internal gees' % (-info)) - elif info>0: raise LinAlgError, "Schur form not found. Possibly ill-conditioned." - return result[0], result[-3] - -eps = numpy.finfo(float).eps -feps = numpy.finfo(single).eps - -_array_kind = {'b':0, 'h':0, 'B': 0, 'i':0, 'l': 0, 'f': 0, 'd': 0, 'F': 1, 'D': 1} -_array_precision = {'i': 1, 'l': 1, 'f': 0, 'd': 1, 'F': 0, 'D': 1} -_array_type = [['f', 'd'], ['F', 'D']] - -def _commonType(*arrays): - kind = 0 - precision = 0 - for a in arrays: - t = a.dtype.char - kind = max(kind, _array_kind[t]) - precision = max(precision, _array_precision[t]) - return _array_type[kind][precision] - -def _castCopy(type, *arrays): - cast_arrays = () - for a in arrays: - if a.dtype.char == type: - cast_arrays = cast_arrays + (a.copy(),) - else: - cast_arrays = cast_arrays + (a.astype(type),) - if len(cast_arrays) == 1: - return cast_arrays[0] - else: - return cast_arrays - -def _assert_squareness(*arrays): - for a in arrays: - if max(a.shape) != min(a.shape): - raise LinAlgError, 'Array must be square' - -def rsf2csf(T, Z): - """Convert real Schur form to complex Schur form. - - Convert a quasi-diagonal real-valued Schur form to the upper triangular - complex-valued Schur form. - - Parameters - ---------- - T : array, shape (M, M) - Real Schur form of the original matrix - Z : array, shape (M, M) - Schur transformation matrix - - Returns - ------- - T : array, shape (M, M) - Complex Schur form of the original matrix - Z : array, shape (M, M) - Schur transformation matrix corresponding to the complex form - - See also - -------- - schur : Schur decompose a matrix - - """ - Z,T = map(asarray_chkfinite, (Z,T)) - if len(Z.shape) !=2 or Z.shape[0] != Z.shape[1]: - raise ValueError, "matrix must be square." - if len(T.shape) !=2 or T.shape[0] != T.shape[1]: - raise ValueError, "matrix must be square." - if T.shape[0] != Z.shape[0]: - raise ValueError, "matrices must be same dimension." - N = T.shape[0] - arr = numpy.array - t = _commonType(Z, T, arr([3.0],'F')) - Z, T = _castCopy(t, Z, T) - conj = numpy.conj - dot = numpy.dot - r_ = numpy.r_ - transp = numpy.transpose - for m in range(N-1,0,-1): - if abs(T[m,m-1]) > eps*(abs(T[m-1,m-1]) + abs(T[m,m])): - k = slice(m-1,m+1) - mu = eigvals(T[k,k]) - T[m,m] - r = misc.norm([mu[0], T[m,m-1]]) - c = mu[0] / r - s = T[m,m-1] / r - G = r_[arr([[conj(c),s]],dtype=t),arr([[-s,c]],dtype=t)] - Gc = conj(transp(G)) - j = slice(m-1,N) - T[k,j] = dot(G,T[k,j]) - i = slice(0,m+1) - T[i,k] = dot(T[i,k], Gc) - i = slice(0,N) - Z[i,k] = dot(Z[i,k], Gc) - T[m,m-1] = 0.0; - return T, Z - - -# Orthonormal decomposition - -def orth(A): - """Construct an orthonormal basis for the range of A using SVD - - Parameters - ---------- - A : array, shape (M, N) - - Returns - ------- - Q : array, shape (M, K) - Orthonormal basis for the range of A. - K = effective rank of A, as determined by automatic cutoff - - See also - -------- - svd : Singular value decomposition of a matrix - - """ - u,s,vh = svd(A) - M,N = A.shape - tol = max(M,N)*numpy.amax(s)*eps - num = numpy.sum(s > tol,dtype=int) - Q = u[:,:num] - return Q - def hessenberg(a, calc_q=False, overwrite_a=False): """Compute Hessenberg form of a matrix. @@ -1572,39 +736,41 @@ """ a1 = asarray(a) if len(a1.shape) != 2 or (a1.shape[0] != a1.shape[1]): - raise ValueError, 'expected square matrix' - overwrite_a = overwrite_a or (_datanotshared(a1,a)) - gehrd,gebal = get_lapack_funcs(('gehrd','gebal'),(a1,)) - ba,lo,hi,pivscale,info = gebal(a,permute=1,overwrite_a = overwrite_a) - if info<0: raise ValueError( - 'illegal value in %d-th argument of internal gebal (hessenberg)' % (-info)) + raise ValueError('expected square matrix') + overwrite_a = overwrite_a or (_datanotshared(a1, a)) + gehrd,gebal = get_lapack_funcs(('gehrd','gebal'), (a1,)) + ba, lo, hi, pivscale, info = gebal(a, permute=1, overwrite_a=overwrite_a) + if info < 0: + raise ValueError('illegal value in %d-th argument of internal gebal ' + '(hessenberg)' % -info) n = len(a1) - lwork = calc_lwork.gehrd(gehrd.prefix,n,lo,hi) - hq,tau,info = gehrd(ba,lo=lo,hi=hi,lwork=lwork,overwrite_a=1) - if info<0: raise ValueError( - 'illegal value in %d-th argument of internal gehrd (hessenberg)' % (-info)) + lwork = calc_lwork.gehrd(gehrd.prefix, n, lo, hi) + hq, tau, info = gehrd(ba, lo=lo, hi=hi, lwork=lwork, overwrite_a=1) + if info < 0: + raise ValueError('illegal value in %d-th argument of internal gehrd ' + '(hessenberg)' % -info) if not calc_q: - for i in range(lo,hi): - hq[i+2:hi+1,i] = 0.0 + for i in range(lo, hi): + hq[i+2:hi+1, i] = 0.0 return hq # XXX: Use ORGHR routines to compute q. - ger,gemm = get_blas_funcs(('ger','gemm'),(hq,)) + ger,gemm = get_blas_funcs(('ger','gemm'), (hq,)) typecode = hq.dtype.char q = None - for i in range(lo,hi): + for i in range(lo, hi): if tau[i]==0.0: continue - v = zeros(n,dtype=typecode) + v = zeros(n, dtype=typecode) v[i+1] = 1.0 - v[i+2:hi+1] = hq[i+2:hi+1,i] - hq[i+2:hi+1,i] = 0.0 - h = ger(-tau[i],v,v,a=diag(ones(n,dtype=typecode)),overwrite_a=1) + v[i+2:hi+1] = hq[i+2:hi+1, i] + hq[i+2:hi+1, i] = 0.0 + h = ger(-tau[i], v, v,a=diag(ones(n, dtype=typecode)), overwrite_a=1) if q is None: q = h else: - q = gemm(1.0,q,h) + q = gemm(1.0, q, h) if q is None: - q = diag(ones(n,dtype=typecode)) - return hq,q + q = diag(ones(n, dtype=typecode)) + return hq, q Added: trunk/scipy/linalg/decomp_cholesky.py =================================================================== --- trunk/scipy/linalg/decomp_cholesky.py (rev 0) +++ trunk/scipy/linalg/decomp_cholesky.py 2010-04-10 01:47:20 UTC (rev 6319) @@ -0,0 +1,217 @@ +"""Cholesky decomposition functions.""" + +from numpy import asarray_chkfinite + +# Local imports +from misc import LinAlgError, _datanotshared +from lapack import get_lapack_funcs + +__all__ = ['cholesky', 'cho_factor', 'cho_solve', 'cholesky_banded', + 'cho_solve_banded'] + + +def _cholesky(a, lower=False, overwrite_a=False, clean=True): + """Common code for cholesky() and cho_factor().""" + + a1 = asarray_chkfinite(a) + if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]: + raise ValueError('expected square matrix') + + overwrite_a = overwrite_a or _datanotshared(a1, a) + potrf, = get_lapack_funcs(('potrf',), (a1,)) + c, info = potrf(a1, lower=lower, overwrite_a=overwrite_a, clean=clean) + if info > 0: + raise LinAlgError("%d-th leading minor not positive definite" % info) + if info < 0: + raise ValueError('illegal value in %d-th argument of internal potrf' + % -info) + return c, lower + +def cholesky(a, lower=False, overwrite_a=False): + """Compute the Cholesky decomposition of a matrix. + + Returns the Cholesky decomposition, :lm:`A = L L^*` or :lm:`A = U^* U` + of a Hermitian positive-definite matrix :lm:`A`. + + Parameters + ---------- + a : array, shape (M, M) + Matrix to be decomposed + lower : boolean + Whether to compute the upper or lower triangular Cholesky factorization + (Default: upper-triangular) + overwrite_a : boolean + Whether to overwrite data in a (may improve performance) + + Returns + ------- + c : array, shape (M, M) + Upper- or lower-triangular Cholesky factor of A + + Raises LinAlgError if decomposition fails + + Examples + -------- + >>> from scipy import array, linalg, dot + >>> a = array([[1,-2j],[2j,5]]) + >>> L = linalg.cholesky(a, lower=True) + >>> L + array([[ 1.+0.j, 0.+0.j], + [ 0.+2.j, 1.+0.j]]) + >>> dot(L, L.T.conj()) + array([[ 1.+0.j, 0.-2.j], + [ 0.+2.j, 5.+0.j]]) + + """ + c, lower = _cholesky(a, lower=lower, overwrite_a=overwrite_a, clean=True) + return c + + +def cho_factor(a, lower=False, overwrite_a=False): + """Compute the Cholesky decomposition of a matrix, to use in cho_solve + + Returns a matrix containing the Cholesky decomposition, + ``A = L L*`` or ``A = U* U`` of a Hermitian positive-definite matrix `a`. + The return value can be directly used as the first parameter to cho_solve. + + .. warning:: + The returned matrix also contains random data in the entries not + used by the Cholesky decomposition. If you need to zero these + entries, use the function `cholesky` instead. + + Parameters + ---------- + a : array, shape (M, M) + Matrix to be decomposed + lower : boolean + Whether to compute the upper or lower triangular Cholesky factorization + (Default: upper-triangular) + overwrite_a : boolean + Whether to overwrite data in a (may improve performance) + + Returns + ------- + c : array, shape (M, M) + Matrix whose upper or lower triangle contains the Cholesky factor + of `a`. Other parts of the matrix contain random data. + lower : boolean + Flag indicating whether the factor is in the lower or upper triangle + + Raises + ------ + LinAlgError + Raised if decomposition fails. + + See also + -------- + cho_solve : Solve a linear set equations using the Cholesky factorization + of a matrix. + + """ + c, lower = _cholesky(a, lower=lower, overwrite_a=overwrite_a, clean=False) + return c, lower + + +def cho_solve((c, lower), b, overwrite_b=False): + """Solve the linear equations A x = b, given the Cholesky factorization of A. + + Parameters + ---------- + (c, lower) : tuple, (array, bool) + Cholesky factorization of a, as given by cho_factor + b : array + Right-hand side + + Returns + ------- + x : array + The solution to the system A x = b + + See also + -------- + cho_factor : Cholesky factorization of a matrix + + """ + + b1 = asarray_chkfinite(b) + c = asarray_chkfinite(c) + if c.ndim != 2 or c.shape[0] != c.shape[1]: + raise ValueError("The factored matrix c is not square.") + if c.shape[1] != b1.shape[0]: + raise ValueError("incompatible dimensions.") + + overwrite_b = overwrite_b or (b1 is not b and not hasattr(b,'__array__')) + + potrs, = get_lapack_funcs(('potrs',), (c, b1)) + x, info = potrs(c, b1, lower=lower, overwrite_b=overwrite_b) + if info != 0: + raise ValueError('illegal value in %d-th argument of internal potrs' + % -info) + return x + +def cholesky_banded(ab, overwrite_ab=False, lower=False): + """Cholesky decompose a banded Hermitian positive-definite matrix + + The matrix a is stored in ab either in lower diagonal or upper + diagonal ordered form: + + ab[u + i - j, j] == a[i,j] (if upper form; i <= j) + ab[ i - j, j] == a[i,j] (if lower form; i >= j) + + Example of ab (shape of a is (6,6), u=2):: + + upper form: + * * a02 a13 a24 a35 + * a01 a12 a23 a34 a45 + a00 a11 a22 a33 a44 a55 + + lower form: + a00 a11 a22 a33 a44 a55 + a10 a21 a32 a43 a54 * + a20 a31 a42 a53 * * + + Parameters + ---------- + ab : array, shape (u + 1, M) + Banded matrix + overwrite_ab : boolean + Discard data in ab (may enhance performance) + lower : boolean + Is the matrix in the lower form. (Default is upper form) + + Returns + ------- + c : array, shape (u+1, M) + Cholesky factorization of a, in the same banded format as ab + + """ + ab = asarray_chkfinite(ab) + + pbtrf, = get_lapack_funcs(('pbtrf',), (ab,)) + c, info = pbtrf(ab, lower=lower, overwrite_ab=overwrite_ab) + if info > 0: + raise LinAlgError("%d-th leading minor not positive definite" % info) + if info < 0: + raise ValueError('illegal value in %d-th argument of internal pbtrf' + % -info) + return c + +# my new function +def cho_solve_banded((ab, lower), b, overwrite_b=False): + """To be written...""" + + ab = asarray_chkfinite(ab) + b = asarray_chkfinite(b) + + # Validate shapes. + if ab.shape[-1] != b.shape[0]: + raise ValueError("shapes of ab and b are not compatible.") + + pbtrs, = get_lapack_funcs(('pbtrs',), (ab, b)) + x, info = pbtrs(ab, b, lower=lower, overwrite_b=overwrite_b) + if info > 0: + raise LinAlgError("%d-th leading minor not positive definite" % info) + if info < 0: + raise ValueError('illegal value in %d-th argument of internal pbtrs' + % -info) + return x Added: trunk/scipy/linalg/decomp_lu.py =================================================================== --- trunk/scipy/linalg/decomp_lu.py (rev 0) +++ trunk/scipy/linalg/decomp_lu.py 2010-04-10 01:47:20 UTC (rev 6319) @@ -0,0 +1,159 @@ +"""LU decomposition functions.""" + +from warnings import warn + +from numpy import asarray, asarray_chkfinite + +# Local imports +from misc import _datanotshared +from lapack import get_lapack_funcs +from flinalg import get_flinalg_funcs + + +def lu_factor(a, overwrite_a=False): + """Compute pivoted LU decomposition of a matrix. + + The decomposition is:: + + A = P L U + + where P is a permutation matrix, L lower triangular with unit + diagonal elements, and U upper triangular. + + Parameters + ---------- + a : array, shape (M, M) + Matrix to decompose + overwrite_a : boolean + Whether to overwrite data in A (may increase performance) + + Returns + ------- + lu : array, shape (N, N) + Matrix containing U in its upper triangle, and L in its lower triangle. + The unit diagonal elements of L are not stored. + piv : array, shape (N,) + Pivot indices representing the permutation matrix P: + row i of matrix was interchanged with row piv[i]. + + See also + -------- + lu_solve : solve an equation system using the LU factorization of a matrix + + Notes + ----- + This is a wrapper to the *GETRF routines from LAPACK. + + """ + a1 = asarray(a) + if len(a1.shape) != 2 or (a1.shape[0] != a1.shape[1]): + raise ValueError, 'expected square matrix' + overwrite_a = overwrite_a or (_datanotshared(a1, a)) + getrf, = get_lapack_funcs(('getrf',), (a1,)) + lu, piv, info = getrf(a, overwrite_a=overwrite_a) + if info < 0: + raise ValueError('illegal value in %d-th argument of ' + 'internal getrf (lu_factor)' % -info) + if info > 0: + warn("Diagonal number %d is exactly zero. Singular matrix." % info, + RuntimeWarning) + return lu, piv + + +def lu_solve((lu, piv), b, trans=0, overwrite_b=False): + """Solve an equation system, a x = b, given the LU factorization of a + + Parameters + ---------- + (lu, piv) + Factorization of the coefficient matrix a, as given by lu_factor + b : array + Right-hand side + trans : {0, 1, 2} + Type of system to solve: + + ===== ========= + trans system + ===== ========= + 0 a x = b + 1 a^T x = b + 2 a^H x = b + ===== ========= + + Returns + ------- + x : array + Solution to the system + + See also + -------- + lu_factor : LU factorize a matrix + + """ + b1 = asarray_chkfinite(b) + overwrite_b = overwrite_b or (b1 is not b and not hasattr(b, '__array__')) + if lu.shape[0] != b1.shape[0]: + raise ValueError("incompatible dimensions.") + + getrs, = get_lapack_funcs(('getrs',), (lu, b1)) + x,info = getrs(lu, piv, b1, trans=trans, overwrite_b=overwrite_b) + if info == 0: + return x + raise ValueError('illegal value in %d-th argument of internal gesv|posv' + % -info) + + +def lu(a, permute_l=False, overwrite_a=False): + """Compute pivoted LU decompostion of a matrix. + + The decomposition is:: + + A = P L U + + where P is a permutation matrix, L lower triangular with unit + diagonal elements, and U upper triangular. + + Parameters + ---------- + a : array, shape (M, N) + Array to decompose + permute_l : boolean + Perform the multiplication P*L (Default: do not permute) + overwrite_a : boolean + Whether to overwrite data in a (may improve performance) + + Returns + ------- + (If permute_l == False) + p : array, shape (M, M) + Permutation matrix + l : array, shape (M, K) + Lower triangular or trapezoidal matrix with unit diagonal. + K = min(M, N) + u : array, shape (K, N) + Upper triangular or trapezoidal matrix + + (If permute_l == True) + pl : array, shape (M, K) + Permuted L matrix. + K = min(M, N) + u : array, shape (K, N) + Upper triangular or trapezoidal matrix + + Notes + ----- + This is a LU factorization routine written for Scipy. + + """ + a1 = asarray_chkfinite(a) + if len(a1.shape) != 2: + raise ValueError('expected matrix') + overwrite_a = overwrite_a or (_datanotshared(a1, a)) + flu, = get_flinalg_funcs(('lu',), (a1,)) + p, l, u, info = flu(a1, permute_l=permute_l, overwrite_a=overwrite_a) + if info < 0: + raise ValueError('illegal value in %d-th argument of ' + 'internal lu.getrf' % -info) + if permute_l: + return l, u + return p, l, u Added: trunk/scipy/linalg/decomp_qr.py =================================================================== --- trunk/scipy/linalg/decomp_qr.py (rev 0) +++ trunk/scipy/linalg/decomp_qr.py 2010-04-10 01:47:20 UTC (rev 6319) @@ -0,0 +1,248 @@ +"""QR decomposition functions.""" + +from warnings import warn + +import numpy +from numpy import asarray_chkfinite, complexfloating + +# Local imports +import special_matrices +from blas import get_blas_funcs +from lapack import get_lapack_funcs, find_best_lapack_type +from misc import _datanotshared + + +def qr(a, overwrite_a=False, lwork=None, econ=None, mode='qr'): + """Compute QR decomposition of a matrix. + + Calculate the decomposition :lm:`A = Q R` where Q is unitary/orthogonal + and R upper triangular. + + Parameters + ---------- + a : array, shape (M, N) + Matrix to be decomposed + overwrite_a : boolean + Whether data in a is overwritten (may improve performance) + lwork : integer + Work array size, lwork >= a.shape[1]. If None or -1, an optimal size + is computed. + econ : boolean + Whether to compute the economy-size QR decomposition, making shapes + of Q and R (M, K) and (K, N) instead of (M,M) and (M,N). K=min(M,N). + Default is False. + mode : {'qr', 'r'} + Determines what information is to be returned: either both Q and R + or only R. + + Returns + ------- + (if mode == 'qr') + Q : double or complex array, shape (M, M) or (M, K) for econ==True + + (for any mode) + R : double or complex array, shape (M, N) or (K, N) for econ==True + Size K = min(M, N) + + Raises LinAlgError if decomposition fails + + Notes + ----- + This is an interface to the LAPACK routines dgeqrf, zgeqrf, + dorgqr, and zungqr. + + Examples + -------- + >>> from scipy import random, linalg, dot + >>> a = random.randn(9, 6) + >>> q, r = linalg.qr(a) + >>> allclose(a, dot(q, r)) + True + >>> q.shape, r.shape + ((9, 9), (9, 6)) + + >>> r2 = linalg.qr(a, mode='r') + >>> allclose(r, r2) + + >>> q3, r3 = linalg.qr(a, econ=True) + >>> q3.shape, r3.shape + ((9, 6), (6, 6)) + + """ + if econ is None: + econ = False + else: + warn("qr econ argument will be removed after scipy 0.7. " + "The economy transform will then be available through " + "the mode='economic' argument.", DeprecationWarning) + + a1 = asarray_chkfinite(a) + if len(a1.shape) != 2: + raise ValueError("expected 2D array") + M, N = a1.shape + overwrite_a = overwrite_a or (_datanotshared(a1, a)) + + geqrf, = get_lapack_funcs(('geqrf',), (a1,)) + if lwork is None or lwork == -1: + # get optimal work array + qr, tau, work, info = geqrf(a1, lwork=-1, overwrite_a=1) + lwork = work[0] + + qr, tau, work, info = geqrf(a1, lwork=lwork, overwrite_a=overwrite_a) + if info < 0: + raise ValueError("illegal value in %d-th argument of internal geqrf" + % -info) + if not econ or M < N: + R = special_matrices.triu(qr) + else: + R = special_matrices.triu(qr[0:N, 0:N]) + + if mode == 'r': + return R + + if find_best_lapack_type((a1,))[0] == 's' or \ + find_best_lapack_type((a1,))[0] == 'd': + gor_un_gqr, = get_lapack_funcs(('orgqr',), (qr,)) + else: + gor_un_gqr, = get_lapack_funcs(('ungqr',), (qr,)) + + if M < N: + # get optimal work array + Q, work, info = gor_un_gqr(qr[:,0:M], tau, lwork=-1, overwrite_a=1) + lwork = work[0] + Q, work, info = gor_un_gqr(qr[:,0:M], tau, lwork=lwork, overwrite_a=1) + elif econ: + # get optimal work array + Q, work, info = gor_un_gqr(qr, tau, lwork=-1, overwrite_a=1) + lwork = work[0] + Q, work, info = gor_un_gqr(qr, tau, lwork=lwork, overwrite_a=1) + else: + t = qr.dtype.char + qqr = numpy.empty((M, M), dtype=t) + qqr[:,0:N] = qr + # get optimal work array + Q, work, info = gor_un_gqr(qqr, tau, lwork=-1, overwrite_a=1) + lwork = work[0] + Q, work, info = gor_un_gqr(qqr, tau, lwork=lwork, overwrite_a=1) + + if info < 0: + raise ValueError("illegal value in %d-th argument of internal gorgqr" + % -info) + return Q, R + + + +def qr_old(a, overwrite_a=False, lwork=None): + """Compute QR decomposition of a matrix. + + Calculate the decomposition :lm:`A = Q R` where Q is unitary/orthogonal + and R upper triangular. + + Parameters + ---------- + a : array, shape (M, N) + Matrix to be decomposed + overwrite_a : boolean + Whether data in a is overwritten (may improve performance) + lwork : integer + Work array size, lwork >= a.shape[1]. If None or -1, an optimal size + is computed. + + Returns + ------- + Q : double or complex array, shape (M, M) + R : double or complex array, shape (M, N) + Size K = min(M, N) + + Raises LinAlgError if decomposition fails + + """ + a1 = asarray_chkfinite(a) + if len(a1.shape) != 2: + raise ValueError, 'expected matrix' + M,N = a1.shape + overwrite_a = overwrite_a or (_datanotshared(a1, a)) + geqrf, = get_lapack_funcs(('geqrf',), (a1,)) + if lwork is None or lwork == -1: + # get optimal work array + qr, tau, work, info = geqrf(a1, lwork=-1, overwrite_a=1) + lwork = work[0] + qr, tau, work, info = geqrf(a1, lwork=lwork, overwrite_a=overwrite_a) + if info < 0: + raise ValueError('illegal value in %d-th argument of internal geqrf' + % -info) + gemm, = get_blas_funcs(('gemm',), (qr,)) + t = qr.dtype.char + R = special_matrices.triu(qr) + Q = numpy.identity(M, dtype=t) + ident = numpy.identity(M, dtype=t) + zeros = numpy.zeros + for i in range(min(M, N)): + v = zeros((M,), t) + v[i] = 1 + v[i+1:M] = qr[i+1:M, i] + H = gemm(-tau[i], v, v, 1+0j, ident, trans_b=2) + Q = gemm(1, Q, H) + return Q, R + + +def rq(a, overwrite_a=False, lwork=None): + """Compute RQ decomposition of a square real matrix. + + Calculate the decomposition :lm:`A = R Q` where Q is unitary/orthogonal + and R upper triangular. + + Parameters + ---------- + a : array, shape (M, M) + Square real matrix to be decomposed + overwrite_a : boolean + Whether data in a is overwritten (may improve performance) + lwork : integer + Work array size, lwork >= a.shape[1]. If None or -1, an optimal size + is computed. + econ : boolean + + Returns + ------- + R : double array, shape (M, N) or (K, N) for econ==True + Size K = min(M, N) + Q : double or complex array, shape (M, M) or (M, K) for econ==True + + Raises LinAlgError if decomposition fails + + """ + # TODO: implement support for non-square and complex arrays + a1 = asarray_chkfinite(a) + if len(a1.shape) != 2: + raise ValueError('expected matrix') + M,N = a1.shape + if M != N: + raise ValueError('expected square matrix') + if issubclass(a1.dtype.type, complexfloating): + raise ValueError('expected real (non-complex) matrix') + overwrite_a = overwrite_a or (_datanotshared(a1, a)) + gerqf, = get_lapack_funcs(('gerqf',), (a1,)) + if lwork is None or lwork == -1: + # get optimal work array + rq, tau, work, info = gerqf(a1, lwork=-1, overwrite_a=1) + lwork = work[0] + rq, tau, work, info = gerqf(a1, lwork=lwork, overwrite_a=overwrite_a) + if info < 0: + raise ValueError('illegal value in %d-th argument of internal geqrf' + % -info) + gemm, = get_blas_funcs(('gemm',), (rq,)) + t = rq.dtype.char + R = special_matrices.triu(rq) + Q = numpy.identity(M, dtype=t) + ident = numpy.identity(M, dtype=t) + zeros = numpy.zeros + + k = min(M, N) + for i in range(k): + v = zeros((M,), t) + v[N-k+i] = 1 + v[0:N-k+i] = rq[M-k+i, 0:N-k+i] + H = gemm(-tau[i], v, v, 1+0j, ident, trans_b=2) + Q = gemm(1, Q, H) + return R, Q Added: trunk/scipy/linalg/decomp_schur.py =================================================================== --- trunk/scipy/linalg/decomp_schur.py (rev 0) +++ trunk/scipy/linalg/decomp_schur.py 2010-04-10 01:47:20 UTC (rev 6319) @@ -0,0 +1,167 @@ +"""Schur decomposition functions.""" + +import numpy +from numpy import asarray_chkfinite, single + +# Local imports. +import misc +from misc import LinAlgError, _datanotshared +from lapack import get_lapack_funcs +from decomp import eigvals + + +__all__ = ['schur', 'rsf2csf'] + +_double_precision = ['i','l','d'] + +def schur(a, output='real', lwork=None, overwrite_a=False): + """Compute Schur decomposition of a matrix. + + The Schur decomposition is + + A = Z T Z^H + + where Z is unitary and T is either upper-triangular, or for real + Schur decomposition (output='real'), quasi-upper triangular. In + the quasi-triangular form, 2x2 blocks describing complex-valued + eigenvalue pairs may extrude from the diagonal. + + Parameters + ---------- + a : array, shape (M, M) + Matrix to decompose + output : {'real', 'complex'} + Construct the real or complex Schur decomposition (for real matrices). + lwork : integer + Work array size. If None or -1, it is automatically computed. + overwrite_a : boolean + Whether to overwrite data in a (may improve performance) + + Returns + ------- + T : array, shape (M, M) + Schur form of A. It is real-valued for the real Schur decomposition. + Z : array, shape (M, M) + An unitary Schur transformation matrix for A. + It is real-valued for the real Schur decomposition. + + See also + -------- + rsf2csf : Convert real Schur form to complex Schur form + + """ + if not output in ['real','complex','r','c']: + raise ValueError, "argument must be 'real', or 'complex'" + a1 = asarray_chkfinite(a) + if len(a1.shape) != 2 or (a1.shape[0] != a1.shape[1]): + raise ValueError, 'expected square matrix' + typ = a1.dtype.char + if output in ['complex','c'] and typ not in ['F','D']: + if typ in _double_precision: + a1 = a1.astype('D') + typ = 'D' + else: + a1 = a1.astype('F') + typ = 'F' + overwrite_a = overwrite_a or (_datanotshared(a1, a)) + gees, = get_lapack_funcs(('gees',), (a1,)) + if lwork is None or lwork == -1: + # get optimal work array + result = gees(lambda x: None, a, lwork=-1) + lwork = result[-2][0] + result = gees(lambda x: None, a, lwork=result[-2][0], overwrite_a=overwrite_a) + info = result[-1] + if info < 0: + raise ValueError('illegal value in %d-th argument of internal gees' + % -info) + elif info > 0: + raise LinAlgError("Schur form not found. Possibly ill-conditioned.") + return result[0], result[-3] + + +eps = numpy.finfo(float).eps +feps = numpy.finfo(single).eps + +_array_kind = {'b':0, 'h':0, 'B': 0, 'i':0, 'l': 0, 'f': 0, 'd': 0, 'F': 1, 'D': 1} +_array_precision = {'i': 1, 'l': 1, 'f': 0, 'd': 1, 'F': 0, 'D': 1} +_array_type = [['f', 'd'], ['F', 'D']] + +def _commonType(*arrays): + kind = 0 + precision = 0 + for a in arrays: + t = a.dtype.char + kind = max(kind, _array_kind[t]) + precision = max(precision, _array_precision[t]) + return _array_type[kind][precision] + +def _castCopy(type, *arrays): + cast_arrays = () + for a in arrays: + if a.dtype.char == type: + cast_arrays = cast_arrays + (a.copy(),) + else: + cast_arrays = cast_arrays + (a.astype(type),) + if len(cast_arrays) == 1: + return cast_arrays[0] + else: + return cast_arrays + + +def rsf2csf(T, Z): + """Convert real Schur form to complex Schur form. + + Convert a quasi-diagonal real-valued Schur form to the upper triangular + complex-valued Schur form. + + Parameters + ---------- + T : array, shape (M, M) + Real Schur form of the original matrix + Z : array, shape (M, M) + Schur transformation matrix + + Returns + ------- + T : array, shape (M, M) + Complex Schur form of the original matrix + Z : array, shape (M, M) + Schur transformation matrix corresponding to the complex form + + See also + -------- + schur : Schur decompose a matrix + + """ + Z, T = map(asarray_chkfinite, (Z, T)) + if len(Z.shape) != 2 or Z.shape[0] != Z.shape[1]: + raise ValueError("matrix must be square.") + if len(T.shape) != 2 or T.shape[0] != T.shape[1]: + raise ValueError("matrix must be square.") + if T.shape[0] != Z.shape[0]: + raise ValueError("matrices must be same dimension.") + N = T.shape[0] + arr = numpy.array + t = _commonType(Z, T, arr([3.0],'F')) + Z, T = _castCopy(t, Z, T) + conj = numpy.conj + dot = numpy.dot + r_ = numpy.r_ + transp = numpy.transpose + for m in range(N-1, 0, -1): + if abs(T[m,m-1]) > eps*(abs(T[m-1,m-1]) + abs(T[m,m])): + k = slice(m-1, m+1) + mu = eigvals(T[k,k]) - T[m,m] + r = misc.norm([mu[0], T[m,m-1]]) + c = mu[0] / r + s = T[m,m-1] / r + G = r_[arr([[conj(c), s]], dtype=t), arr([[-s, c]], dtype=t)] + Gc = conj(transp(G)) + j = slice(m-1, N) + T[k,j] = dot(G, T[k,j]) + i = slice(0, m+1) + T[i,k] = dot(T[i,k], Gc) + i = slice(0, N) + Z[i,k] = dot(Z[i,k], Gc) + T[m,m-1] = 0.0; + return T, Z Added: trunk/scipy/linalg/decomp_svd.py =================================================================== --- trunk/scipy/linalg/decomp_svd.py (rev 0) +++ trunk/scipy/linalg/decomp_svd.py 2010-04-10 01:47:20 UTC (rev 6319) @@ -0,0 +1,173 @@ +"""SVD decomposition functions.""" + +import numpy +from numpy import asarray_chkfinite, zeros, r_, diag +from scipy.linalg import calc_lwork + +# Local imports. +from misc import LinAlgError, _datanotshared +from lapack import get_lapack_funcs + + +def svd(a, full_matrices=True, compute_uv=True, overwrite_a=False): + """Singular Value Decomposition. + + Factorizes the matrix a into two unitary matrices U and Vh and + an 1d-array s of singular values (real, non-negative) such that + a == U S Vh if S is an suitably shaped matrix of zeros whose + main diagonal is s. + + Parameters + ---------- + a : array, shape (M, N) + Matrix to decompose + full_matrices : boolean + If true, U, Vh are shaped (M,M), (N,N) + If false, the shapes are (M,K), (K,N) where K = min(M,N) + compute_uv : boolean + Whether to compute also U, Vh in addition to s (Default: true) + overwrite_a : boolean + Whether data in a is overwritten (may improve performance) + + Returns + ------- + U: array, shape (M,M) or (M,K) depending on full_matrices + s: array, shape (K,) + The singular values, sorted so that s[i] >= s[i+1]. K = min(M, N) + Vh: array, shape (N,N) or (K,N) depending on full_matrices + + For compute_uv = False, only s is returned. + + Raises LinAlgError if SVD computation does not converge + + Examples + -------- + >>> from scipy import random, linalg, allclose, dot + >>> a = random.randn(9, 6) + 1j*random.randn(9, 6) + >>> U, s, Vh = linalg.svd(a) + >>> U.shape, Vh.shape, s.shape + ((9, 9), (6, 6), (6,)) + + >>> U, s, Vh = linalg.svd(a, full_matrices=False) + >>> U.shape, Vh.shape, s.shape + ((9, 6), (6, 6), (6,)) + >>> S = linalg.diagsvd(s, 6, 6) + >>> allclose(a, dot(U, dot(S, Vh))) + True + + >>> s2 = linalg.svd(a, compute_uv=False) + >>> allclose(s, s2) + True + + See also + -------- + svdvals : return singular values of a matrix + diagsvd : return the Sigma matrix, given the vector s + + """ + # A hack until full_matrices == 0 support is fixed here. + if full_matrices == 0: + import numpy.linalg + return numpy.linalg.svd(a, full_matrices=0, compute_uv=compute_uv) + a1 = asarray_chkfinite(a) + if len(a1.shape) != 2: + raise ValueError('expected matrix') + m,n = a1.shape + overwrite_a = overwrite_a or (_datanotshared(a1, a)) + gesdd, = get_lapack_funcs(('gesdd',), (a1,)) + if gesdd.module_name[:7] == 'flapack': + lwork = calc_lwork.gesdd(gesdd.prefix, m, n, compute_uv)[1] + u,s,v,info = gesdd(a1,compute_uv = compute_uv, lwork = lwork, + overwrite_a = overwrite_a) + else: # 'clapack' + raise NotImplementedError('calling gesdd from %s' % gesdd.module_name) + if info > 0: + raise LinAlgError("SVD did not converge") + if info < 0: + raise ValueError('illegal value in %d-th argument of internal gesdd' + % -info) + if compute_uv: + return u, s, v + else: + return s + +def svdvals(a, overwrite_a=False): + """Compute singular values of a matrix. + + Parameters + ---------- + a : array, shape (M, N) + Matrix to decompose + overwrite_a : boolean + Whether data in a is overwritten (may improve performance) + + Returns + ------- + s: array, shape (K,) + The singular values, sorted so that s[i] >= s[i+1]. K = min(M, N) + + Raises LinAlgError if SVD computation does not converge + + See also + -------- + svd : return the full singular value decomposition of a matrix + diagsvd : return the Sigma matrix, given the vector s + + """ + return svd(a, compute_uv=0, overwrite_a=overwrite_a) + +def diagsvd(s, M, N): + """Construct the sigma matrix in SVD from singular values and size M,N. + + Parameters + ---------- + s : array, shape (M,) or (N,) + Singular values + M : integer + N : integer + Size of the matrix whose singular values are s + + Returns + ------- + S : array, shape (M, N) + The S-matrix in the singular value decomposition + + """ + part = diag(s) + typ = part.dtype.char + MorN = len(s) + if MorN == M: + return r_['-1', part, zeros((M, N-M), typ)] + elif MorN == N: + return r_[part, zeros((M-N,N), typ)] + else: + raise ValueError("Length of s must be M or N.") + + +# Orthonormal decomposition + +def orth(A): + """Construct an orthonormal basis for the range of A using SVD + + Parameters + ---------- + A : array, shape (M, N) + + Returns + ------- + Q : array, shape (M, K) + Orthonormal basis for the range of A. + K = effective rank of A, as determined by automatic cutoff + + See also + -------- + svd : Singular value decomposition of a matrix + + """ + u, s, vh = svd(A) + M, N = A.shape + eps = numpy.finfo(float).eps + tol = max(M,N) * numpy.amax(s) * eps + num = numpy.sum(s > tol, dtype=int) + Q = u[:,:num] + return Q Modified: trunk/scipy/linalg/generic_flapack.pyf =================================================================== --- trunk/scipy/linalg/generic_flapack.pyf 2010-04-09 17:57:46 UTC (rev 6318) +++ trunk/scipy/linalg/generic_flapack.pyf 2010-04-10 01:47:20 UTC (rev 6319) @@ -56,6 +56,57 @@ end subroutine pbtrf + + subroutine pbtrs(lower, n, kd, nrhs, ab, ldab, b, ldb, info) + + ! Solve a system of linear equations A*X = B with a symmetric + ! positive definite band matrix A using the Cholesky factorization. + ! AB is the triangular factur U or L from the Cholesky factorization + ! previously computed with *PBTRF. + ! A = U^T * U, AB = U if lower = 0 + ! A = L * L^T, AB = L if lower = 1 + + callstatement (*f2py_func)((lower?"L":"U"),&n,&kd,&nrhs,ab,&ldab,b,&ldb,&info); + callprotoargument char*,int*,int*,int*,*,int*,*,int*,int* + + integer optional,check(shape(ab,0)==ldab),depend(ab) :: ldab=shape(ab,0) + integer intent(hide),depend(ab) :: n=shape(ab,1) + integer intent(hide),depend(ab) :: kd=shape(ab,0)-1 + integer intent(hide),depend(b) :: ldb=shape(b,0) + integer intent(hide),depend(b) :: nrhs=shape(b,1) + integer optional,intent(in),check(lower==0||lower==1) :: lower = 0 + + dimension(ldb, nrhs),intent(in,out,copy,out=x) :: b + dimension(ldab,n),intent(in) :: ab + integer intent(out) :: info + + end subroutine pbtrs + + subroutine pbtrs(lower, n, kd, nrhs, ab, ldab, b, ldb, info) + + ! Solve a system of linear equations A*X = B with a symmetric + ! positive definite band matrix A using the Cholesky factorization. + ! AB is the triangular factur U or L from the Cholesky factorization + ! previously computed with *PBTRF. + ! A = U^T * U, AB = U if lower = 0 + ! A = L * L^T, AB = L if lower = 1 + + callstatement (*f2py_func)((lower?"L":"U"),&n,&kd,&nrhs,ab,&ldab,b,&ldb,&info); + callprotoargument char*,int*,int*,int*,*,int*,*,int*,int* + + integer optional,check(shape(ab,0)==ldab),depend(ab) :: ldab=shape(ab,0) + integer intent(hide),depend(ab) :: n=shape(ab,1) + integer intent(hide),depend(ab) :: kd=shape(ab,0)-1 + integer intent(hide),depend(b) :: ldb=shape(b,0) + integer intent(hide),depend(b) :: nrhs=shape(b,1) + integer optional,intent(in),check(lower==0||lower==1) :: lower = 0 + + dimension(ldb, nrhs),intent(in,out,copy,out=x) :: b + dimension(ldab,n),intent(in) :: ab + integer intent(out) :: info + + end subroutine pbtrs + subroutine pbsv(lower,n,kd,nrhs,ab,ldab,b,ldb,info) ! Modified: trunk/scipy/linalg/info.py =================================================================== --- trunk/scipy/linalg/info.py 2010-04-09 17:57:46 UTC (rev 6318) +++ trunk/scipy/linalg/info.py 2010-04-10 01:47:20 UTC (rev 6319) @@ -14,12 +14,17 @@ pinv --- Pseudo-inverse (Moore-Penrose) using lstsq pinv2 --- Pseudo-inverse using svd -Eigenvalues and Decompositions:: +Eigenvalue Problem:: eig --- Find the eigenvalues and vectors of a square matrix eigvals --- Find the eigenvalues of a square matrix + eigh --- Find the eigenvalues and eigenvectors of a complex Hermitian or real symmetric matrix. + eigvalsh --- Find the eigenvalues of a complex Hermitian or real symmetric matrix. eig_banded --- Find the eigenvalues and vectors of a band matrix eigvals_banded --- Find the eigenvalues of a band matrix + +Decompositions:: + lu --- LU decomposition of a matrix lu_factor --- LU decomposition returning unordered matrix and pivots lu_solve --- solve Ax=b using back substitution with output of lu_factor Modified: trunk/scipy/linalg/matfuncs.py =================================================================== --- trunk/scipy/linalg/matfuncs.py 2010-04-09 17:57:46 UTC (rev 6318) +++ trunk/scipy/linalg/matfuncs.py 2010-04-10 01:47:20 UTC (rev 6319) @@ -1,5 +1,3 @@ -## Automatically adapted for scipy Oct 18, 2005 by - # # Author: Travis Oliphant, March 2002 # @@ -13,9 +11,15 @@ isfinite, sqrt, identity, single from numpy import matrix as mat import numpy as np -from basic import solve, inv, norm, triu, all_mat -from decomp import eig, schur, rsf2csf, orth, svd +# Local imports +from misc import norm +from basic import solve, inv +from special_matrices import triu, all_mat +from decomp import eig +from decomp_svd import orth, svd +from decomp_schur import schur, rsf2csf + eps = np.finfo(float).eps feps = np.finfo(single).eps Modified: trunk/scipy/linalg/misc.py =================================================================== --- trunk/scipy/linalg/misc.py 2010-04-09 17:57:46 UTC (rev 6318) +++ trunk/scipy/linalg/misc.py 2010-04-10 01:47:20 UTC (rev 6319) @@ -1,10 +1,21 @@ import numpy as np from numpy.linalg import LinAlgError -### Norm +__all__ = ['LinAlgError', 'norm'] + def norm(a, ord=None): # Differs from numpy only in non-finite handling return np.linalg.norm(np.asarray_chkfinite(a), ord=ord) norm.__doc__ = np.linalg.norm.__doc__ + +def _datanotshared(a1,a): + if a1 is a: + return False + else: + #try comparing data pointers + try: + return a1.__array_interface__['data'][0] != a.__array_interface__['data'][0] + except: + return True \ No newline at end of file Modified: trunk/scipy/linalg/tests/test_basic.py =================================================================== --- trunk/scipy/linalg/tests/test_basic.py 2010-04-09 17:57:46 UTC (rev 6318) +++ trunk/scipy/linalg/tests/test_basic.py 2010-04-10 01:47:20 UTC (rev 6319) @@ -107,7 +107,7 @@ # with the RHS as a 1D array. ab = array([[-99, 1.0, 1.0], [4.0, 4.0, 4.0]]) b = array([1.0, 4.0, 1.0]) - c, x = solveh_banded(ab, b) + x = solveh_banded(ab, b) assert_array_almost_equal(x, [0.0, 1.0, 0.0]) def test_02_upper(self): @@ -121,7 +121,7 @@ b = array([[1.0, 4.0], [4.0, 2.0], [1.0, 4.0]]) - c, x = solveh_banded(ab, b) + x = solveh_banded(ab, b) expected = array([[0.0, 1.0], [1.0, 0.0], [0.0, 1.0]]) @@ -135,7 +135,7 @@ # with the RHS as a 2D array with shape (3,1). ab = array([[-99, 1.0, 1.0], [4.0, 4.0, 4.0]]) b = array([1.0, 4.0, 1.0]).reshape(-1,1) - c, x = solveh_banded(ab, b) + x = solveh_banded(ab, b) assert_array_almost_equal(x, array([0.0, 1.0, 0.0]).reshape(-1,1)) def test_01_lower(self): @@ -147,7 +147,7 @@ ab = array([[4.0, 4.0, 4.0], [1.0, 1.0, -99]]) b = array([1.0, 4.0, 1.0]) - c, x = solveh_banded(ab, b, lower=True) + x = solveh_banded(ab, b, lower=True) assert_array_almost_equal(x, [0.0, 1.0, 0.0]) def test_02_lower(self): @@ -161,7 +161,7 @@ b = array([[1.0, 4.0], [4.0, 2.0], [1.0, 4.0]]) - c, x = solveh_banded(ab, b, lower=True) + x = solveh_banded(ab, b, lower=True) expected = array([[0.0, 1.0], [1.0, 0.0], [0.0, 1.0]]) @@ -175,7 +175,7 @@ # ab = array([[-99, 1.0, 1.0], [4.0, 4.0, 4.0]], dtype=float32) b = array([1.0, 4.0, 1.0], dtype=float32) - c, x = solveh_banded(ab, b) + x = solveh_banded(ab, b) assert_array_almost_equal(x, [0.0, 1.0, 0.0]) def test_02_float32(self): @@ -189,7 +189,7 @@ b = array([[1.0, 4.0], [4.0, 2.0], [1.0, 4.0]], dtype=float32) - c, x = solveh_banded(ab, b) + x = solveh_banded(ab, b) expected = array([[0.0, 1.0], [1.0, 0.0], [0.0, 1.0]]) @@ -203,7 +203,7 @@ # ab = array([[-99, -1.0j, -1.0j], [4.0, 4.0, 4.0]]) b = array([-1.0j, 4.0-1j, 4+1j]) - c, x = solveh_banded(ab, b) + x = solveh_banded(ab, b) assert_array_almost_equal(x, [0.0, 1.0, 1.0]) def test_02_complex(self): @@ -217,7 +217,7 @@ b = array([[ -1j, 4.0j], [4.0-1j, -1.0-1j], [4.0+1j, 4.0]]) - c, x = solveh_banded(ab, b) + x = solveh_banded(ab, b) expected = array([[0.0, 1.0j], [1.0, 0.0], [1.0, 1.0]]) Modified: trunk/scipy/linalg/tests/test_decomp.py =================================================================== --- trunk/scipy/linalg/tests/test_decomp.py 2010-04-09 17:57:46 UTC (rev 6318) +++ trunk/scipy/linalg/tests/test_decomp.py 2010-04-10 01:47:20 UTC (rev 6319) @@ -17,8 +17,8 @@ import numpy as np from numpy.testing import * -from scipy.linalg import eig,eigvals,lu,svd,svdvals,cholesky,qr, \ - schur,rsf2csf, lu_solve,lu_factor,solve,diagsvd,hessenberg,rq, \ +from scipy.linalg import eig, eigvals, lu, svd, svdvals, cholesky, qr, \ + schur, rsf2csf, lu_solve, lu_factor, solve, diagsvd, hessenberg, rq, \ eig_banded, eigvals_banded, eigh from scipy.linalg.flapack import dgbtrf, dgbtrs, zgbtrf, zgbtrs, \ dsbev, dsbevd, dsbevx, zhbevd, zhbevx @@ -837,8 +837,8 @@ c = transpose(c) a = dot(c,transpose(conjugate(c))) assert_array_almost_equal(cholesky(a,lower=1),c) + - class TestQR(TestCase): def test_simple(self): From scipy-svn at scipy.org Fri Apr 9 21:55:24 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 9 Apr 2010 20:55:24 -0500 (CDT) Subject: [Scipy-svn] r6320 - trunk/scipy/linalg Message-ID: <20100410015524.1CF8339CAF2@scipy.org> Author: warren.weckesser Date: 2010-04-09 20:55:23 -0500 (Fri, 09 Apr 2010) New Revision: 6320 Modified: trunk/scipy/linalg/decomp_cholesky.py trunk/scipy/linalg/info.py Log: DOC: Add cho_solve_banded to info.py. (Also remove a temporary comment from decomp_cholesky.py.) Modified: trunk/scipy/linalg/decomp_cholesky.py =================================================================== --- trunk/scipy/linalg/decomp_cholesky.py 2010-04-10 01:47:20 UTC (rev 6319) +++ trunk/scipy/linalg/decomp_cholesky.py 2010-04-10 01:55:23 UTC (rev 6320) @@ -196,7 +196,7 @@ % -info) return c -# my new function + def cho_solve_banded((ab, lower), b, overwrite_b=False): """To be written...""" Modified: trunk/scipy/linalg/info.py =================================================================== --- trunk/scipy/linalg/info.py 2010-04-10 01:47:20 UTC (rev 6319) +++ trunk/scipy/linalg/info.py 2010-04-10 01:55:23 UTC (rev 6320) @@ -36,6 +36,7 @@ cholesky_banded --- Cholesky decomposition of a banded symmetric or Hermitian matrix cho_factor --- Cholesky decomposition for use in solving linear system cho_solve --- Solve previously factored linear system + cho_solve_banded --- Solve previously factored banded linear system. qr --- QR decomposition of a matrix schur --- Schur decomposition of a matrix rsf2csf --- Real to complex schur form From scipy-svn at scipy.org Fri Apr 9 22:54:35 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 9 Apr 2010 21:54:35 -0500 (CDT) Subject: [Scipy-svn] r6321 - in trunk/scipy/linalg: . tests Message-ID: <20100410025435.D1A2039CAE7@scipy.org> Author: warren.weckesser Date: 2010-04-09 21:54:35 -0500 (Fri, 09 Apr 2010) New Revision: 6321 Added: trunk/scipy/linalg/tests/test_decomp_cholesky.py Modified: trunk/scipy/linalg/decomp_cholesky.py trunk/scipy/linalg/tests/test_basic.py trunk/scipy/linalg/tests/test_decomp.py Log: DOC+TEST: linalg: Add a docstring and tests for cho_solve_banded. Modified: trunk/scipy/linalg/decomp_cholesky.py =================================================================== --- trunk/scipy/linalg/decomp_cholesky.py 2010-04-10 01:55:23 UTC (rev 6320) +++ trunk/scipy/linalg/decomp_cholesky.py 2010-04-10 02:54:35 UTC (rev 6321) @@ -197,18 +197,39 @@ return c -def cho_solve_banded((ab, lower), b, overwrite_b=False): - """To be written...""" +def cho_solve_banded((cb, lower), b, overwrite_b=False): + """Solve the linear equations A x = b, given the Cholesky factorization of A. - ab = asarray_chkfinite(ab) + Parameters + ---------- + (cb, lower) : tuple, (array, bool) + `cb` is the Cholesky factorization of A, as given by cholesky_banded. + `lower` must be the same value that was given to cholesky_banded. + b : array + Right-hand side + overwrite_b : bool + If True, the function will overwrite the values in `b`. + + Returns + ------- + x : array + The solution to the system A x = b + + See also + -------- + cholesky_banded : Cholesky factorization of a banded matrix + + """ + + cb = asarray_chkfinite(cb) b = asarray_chkfinite(b) # Validate shapes. - if ab.shape[-1] != b.shape[0]: - raise ValueError("shapes of ab and b are not compatible.") + if cb.shape[-1] != b.shape[0]: + raise ValueError("shapes of cb and b are not compatible.") - pbtrs, = get_lapack_funcs(('pbtrs',), (ab, b)) - x, info = pbtrs(ab, b, lower=lower, overwrite_b=overwrite_b) + pbtrs, = get_lapack_funcs(('pbtrs',), (cb, b)) + x, info = pbtrs(cb, b, lower=lower, overwrite_b=overwrite_b) if info > 0: raise LinAlgError("%d-th leading minor not positive definite" % info) if info < 0: Modified: trunk/scipy/linalg/tests/test_basic.py =================================================================== --- trunk/scipy/linalg/tests/test_basic.py 2010-04-10 01:55:23 UTC (rev 6320) +++ trunk/scipy/linalg/tests/test_basic.py 2010-04-10 02:54:35 UTC (rev 6321) @@ -335,69 +335,6 @@ assert_array_almost_equal(dot(a,x),b) -class TestCholeskyBanded(TestCase): - - def test_upper_real(self): - # Symmetric positive definite banded matrix `a` - a = array([[4.0, 1.0, 0.0, 0.0], - [1.0, 4.0, 0.5, 0.0], - [0.0, 0.5, 4.0, 0.2], - [0.0, 0.0, 0.2, 4.0]]) - # Banded storage form of `a`. - ab = array([[-1.0, 1.0, 0.5, 0.2], - [4.0, 4.0, 4.0, 4.0]]) - c = cholesky_banded(ab, lower=False) - ufac = zeros_like(a) - ufac[range(4),range(4)] = c[-1] - ufac[(0,1,2),(1,2,3)] = c[0,1:] - assert_array_almost_equal(a, dot(ufac.T, ufac)) - - def test_upper_complex(self): - # Hermitian positive definite banded matrix `a` - a = array([[4.0, 1.0, 0.0, 0.0], - [1.0, 4.0, 0.5, 0.0], - [0.0, 0.5, 4.0, -0.2j], - [0.0, 0.0, 0.2j, 4.0]]) - # Banded storage form of `a`. - ab = array([[-1.0, 1.0, 0.5, -0.2j], - [4.0, 4.0, 4.0, 4.0]]) - c = cholesky_banded(ab, lower=False) - ufac = zeros_like(a) - ufac[range(4),range(4)] = c[-1] - ufac[(0,1,2),(1,2,3)] = c[0,1:] - assert_array_almost_equal(a, dot(ufac.conj().T, ufac)) - - def test_lower_real(self): - # Symmetric positive definite banded matrix `a` - a = array([[4.0, 1.0, 0.0, 0.0], - [1.0, 4.0, 0.5, 0.0], - [0.0, 0.5, 4.0, 0.2], - [0.0, 0.0, 0.2, 4.0]]) - # Banded storage form of `a`. - ab = array([[4.0, 4.0, 4.0, 4.0], - [1.0, 0.5, 0.2, -1.0]]) - c = cholesky_banded(ab, lower=True) - lfac = zeros_like(a) - lfac[range(4),range(4)] = c[0] - lfac[(1,2,3),(0,1,2)] = c[1,:3] - assert_array_almost_equal(a, dot(lfac, lfac.T)) - - def test_lower_complex(self): - # Hermitian positive definite banded matrix `a` - a = array([[4.0, 1.0, 0.0, 0.0], - [1.0, 4.0, 0.5, 0.0], - [0.0, 0.5, 4.0, -0.2j], - [0.0, 0.0, 0.2j, 4.0]]) - # Banded storage form of `a`. - ab = array([[4.0, 4.0, 4.0, 4.0], - [1.0, 0.5, 0.2j, -1.0]]) - c = cholesky_banded(ab, lower=True) - lfac = zeros_like(a) - lfac[range(4),range(4)] = c[0] - lfac[(1,2,3),(0,1,2)] = c[1,:3] - assert_array_almost_equal(a, dot(lfac, lfac.conj().T)) - - class TestInv(TestCase): def test_simple(self): @@ -550,8 +487,6 @@ assert_array_almost_equal(x,direct_lstsq(a,b,1)) - - class TestPinv(TestCase): def test_simple(self): Modified: trunk/scipy/linalg/tests/test_decomp.py =================================================================== --- trunk/scipy/linalg/tests/test_decomp.py 2010-04-10 01:55:23 UTC (rev 6320) +++ trunk/scipy/linalg/tests/test_decomp.py 2010-04-10 02:54:35 UTC (rev 6321) @@ -15,7 +15,8 @@ """ import numpy as np -from numpy.testing import * +from numpy.testing import TestCase, assert_equal, assert_array_almost_equal, \ + assert_array_equal, assert_raises, run_module_suite from scipy.linalg import eig, eigvals, lu, svd, svdvals, cholesky, qr, \ schur, rsf2csf, lu_solve, lu_factor, solve, diagsvd, hessenberg, rq, \ @@ -790,55 +791,7 @@ def test_simple(self): assert_array_almost_equal(diagsvd([1,0,0],3,3),[[1,0,0],[0,0,0],[0,0,0]]) -class TestCholesky(TestCase): - def test_simple(self): - a = [[8,2,3],[2,9,3],[3,3,6]] - c = cholesky(a) - assert_array_almost_equal(dot(transpose(c),c),a) - c = transpose(c) - a = dot(c,transpose(c)) - assert_array_almost_equal(cholesky(a,lower=1),c) - - def test_simple_complex(self): - m = array([[3+1j,3+4j,5],[0,2+2j,2+7j],[0,0,7+4j]]) - a = dot(transpose(conjugate(m)),m) - c = cholesky(a) - a1 = dot(transpose(conjugate(c)),c) - assert_array_almost_equal(a,a1) - c = transpose(c) - a = dot(c,transpose(conjugate(c))) - assert_array_almost_equal(cholesky(a,lower=1),c) - - def test_random(self): - n = 20 - for k in range(2): - m = random([n,n]) - for i in range(n): - m[i,i] = 20*(.1+m[i,i]) - a = dot(transpose(m),m) - c = cholesky(a) - a1 = dot(transpose(c),c) - assert_array_almost_equal(a,a1) - c = transpose(c) - a = dot(c,transpose(c)) - assert_array_almost_equal(cholesky(a,lower=1),c) - - def test_random_complex(self): - n = 20 - for k in range(2): - m = random([n,n])+1j*random([n,n]) - for i in range(n): - m[i,i] = 20*(.1+abs(m[i,i])) - a = dot(transpose(conjugate(m)),m) - c = cholesky(a) - a1 = dot(transpose(conjugate(c)),c) - assert_array_almost_equal(a,a1) - c = transpose(c) - a = dot(c,transpose(conjugate(c))) - assert_array_almost_equal(cholesky(a,lower=1),c) - - class TestQR(TestCase): def test_simple(self): Added: trunk/scipy/linalg/tests/test_decomp_cholesky.py =================================================================== --- trunk/scipy/linalg/tests/test_decomp_cholesky.py (rev 0) +++ trunk/scipy/linalg/tests/test_decomp_cholesky.py 2010-04-10 02:54:35 UTC (rev 6321) @@ -0,0 +1,140 @@ + + +from numpy.testing import TestCase, assert_array_almost_equal + +from numpy import array, transpose, dot, conjugate, zeros_like +from numpy.random import rand +from scipy.linalg import cholesky, cholesky_banded, cho_solve_banded + + +def random(size): + return rand(*size) + + +class TestCholesky(TestCase): + + def test_simple(self): + a = [[8,2,3],[2,9,3],[3,3,6]] + c = cholesky(a) + assert_array_almost_equal(dot(transpose(c),c),a) + c = transpose(c) + a = dot(c,transpose(c)) + assert_array_almost_equal(cholesky(a,lower=1),c) + + def test_simple_complex(self): + m = array([[3+1j,3+4j,5],[0,2+2j,2+7j],[0,0,7+4j]]) + a = dot(transpose(conjugate(m)),m) + c = cholesky(a) + a1 = dot(transpose(conjugate(c)),c) + assert_array_almost_equal(a,a1) + c = transpose(c) + a = dot(c,transpose(conjugate(c))) + assert_array_almost_equal(cholesky(a,lower=1),c) + + def test_random(self): + n = 20 + for k in range(2): + m = random([n,n]) + for i in range(n): + m[i,i] = 20*(.1+m[i,i]) + a = dot(transpose(m),m) + c = cholesky(a) + a1 = dot(transpose(c),c) + assert_array_almost_equal(a,a1) + c = transpose(c) + a = dot(c,transpose(c)) + assert_array_almost_equal(cholesky(a,lower=1),c) + + def test_random_complex(self): + n = 20 + for k in range(2): + m = random([n,n])+1j*random([n,n]) + for i in range(n): + m[i,i] = 20*(.1+abs(m[i,i])) + a = dot(transpose(conjugate(m)),m) + c = cholesky(a) + a1 = dot(transpose(conjugate(c)),c) + assert_array_almost_equal(a,a1) + c = transpose(c) + a = dot(c,transpose(conjugate(c))) + assert_array_almost_equal(cholesky(a,lower=1),c) + + +class TestCholeskyBanded(TestCase): + """Tests for cholesky_banded() and cho_solve_banded.""" + + def test_upper_real(self): + # Symmetric positive definite banded matrix `a` + a = array([[4.0, 1.0, 0.0, 0.0], + [1.0, 4.0, 0.5, 0.0], + [0.0, 0.5, 4.0, 0.2], + [0.0, 0.0, 0.2, 4.0]]) + # Banded storage form of `a`. + ab = array([[-1.0, 1.0, 0.5, 0.2], + [4.0, 4.0, 4.0, 4.0]]) + c = cholesky_banded(ab, lower=False) + ufac = zeros_like(a) + ufac[range(4),range(4)] = c[-1] + ufac[(0,1,2),(1,2,3)] = c[0,1:] + assert_array_almost_equal(a, dot(ufac.T, ufac)) + + b = array([0.0, 0.5, 4.2, 4.2]) + x = cho_solve_banded((c, False), b) + assert_array_almost_equal(x, [0.0, 0.0, 1.0, 1.0]) + + def test_upper_complex(self): + # Hermitian positive definite banded matrix `a` + a = array([[4.0, 1.0, 0.0, 0.0], + [1.0, 4.0, 0.5, 0.0], + [0.0, 0.5, 4.0, -0.2j], + [0.0, 0.0, 0.2j, 4.0]]) + # Banded storage form of `a`. + ab = array([[-1.0, 1.0, 0.5, -0.2j], + [4.0, 4.0, 4.0, 4.0]]) + c = cholesky_banded(ab, lower=False) + ufac = zeros_like(a) + ufac[range(4),range(4)] = c[-1] + ufac[(0,1,2),(1,2,3)] = c[0,1:] + assert_array_almost_equal(a, dot(ufac.conj().T, ufac)) + + b = array([0.0, 0.5, 4.0-0.2j, 0.2j + 4.0]) + x = cho_solve_banded((c, False), b) + assert_array_almost_equal(x, [0.0, 0.0, 1.0, 1.0]) + + def test_lower_real(self): + # Symmetric positive definite banded matrix `a` + a = array([[4.0, 1.0, 0.0, 0.0], + [1.0, 4.0, 0.5, 0.0], + [0.0, 0.5, 4.0, 0.2], + [0.0, 0.0, 0.2, 4.0]]) + # Banded storage form of `a`. + ab = array([[4.0, 4.0, 4.0, 4.0], + [1.0, 0.5, 0.2, -1.0]]) + c = cholesky_banded(ab, lower=True) + lfac = zeros_like(a) + lfac[range(4),range(4)] = c[0] + lfac[(1,2,3),(0,1,2)] = c[1,:3] + assert_array_almost_equal(a, dot(lfac, lfac.T)) + + b = array([0.0, 0.5, 4.2, 4.2]) + x = cho_solve_banded((c, True), b) + assert_array_almost_equal(x, [0.0, 0.0, 1.0, 1.0]) + + def test_lower_complex(self): + # Hermitian positive definite banded matrix `a` + a = array([[4.0, 1.0, 0.0, 0.0], + [1.0, 4.0, 0.5, 0.0], + [0.0, 0.5, 4.0, -0.2j], + [0.0, 0.0, 0.2j, 4.0]]) + # Banded storage form of `a`. + ab = array([[4.0, 4.0, 4.0, 4.0], + [1.0, 0.5, 0.2j, -1.0]]) + c = cholesky_banded(ab, lower=True) + lfac = zeros_like(a) + lfac[range(4),range(4)] = c[0] + lfac[(1,2,3),(0,1,2)] = c[1,:3] + assert_array_almost_equal(a, dot(lfac, lfac.conj().T)) + + b = array([0.0, 0.5j, 3.8j, 3.8]) + x = cho_solve_banded((c, True), b) + assert_array_almost_equal(x, [0.0, 0.0, 1.0j, 1.0]) From scipy-svn at scipy.org Fri Apr 9 23:07:21 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 9 Apr 2010 22:07:21 -0500 (CDT) Subject: [Scipy-svn] r6322 - in trunk: doc/release scipy/linalg Message-ID: <20100410030721.50B7B39CAE7@scipy.org> Author: warren.weckesser Date: 2010-04-09 22:07:21 -0500 (Fri, 09 Apr 2010) New Revision: 6322 Modified: trunk/doc/release/0.8.0-notes.rst trunk/scipy/linalg/decomp_cholesky.py Log: DOC: Add a 'version added' note to the docstring of cho_solve_banded, and update the release notes with recent changes to linalg. Modified: trunk/doc/release/0.8.0-notes.rst =================================================================== --- trunk/doc/release/0.8.0-notes.rst 2010-04-10 02:54:35 UTC (rev 6321) +++ trunk/doc/release/0.8.0-notes.rst 2010-04-10 03:07:21 UTC (rev 6322) @@ -106,13 +106,17 @@ A new function, `sweep_poly`, was added. -New special matrix functions (scipy.linalg) -------------------------------------------- -The functions `circulant` and `hadamard` were added to `scipy.linalg`. +New functions and others changes in scipy.linalg +------------------------------------------------ +The functions `circulant`, `hadamard` and `cho_solve_banded` were added +to `scipy.linalg`. The function `block_diag` was enhanced to accept scalar and 1D arguments, along with the usual 2D arguments. +The function `solveh_banded` no longer returns the Cholesky factorization. It +returns just the solution. + ARPACK-based sparse SVD ----------------------- Modified: trunk/scipy/linalg/decomp_cholesky.py =================================================================== --- trunk/scipy/linalg/decomp_cholesky.py 2010-04-10 02:54:35 UTC (rev 6321) +++ trunk/scipy/linalg/decomp_cholesky.py 2010-04-10 03:07:21 UTC (rev 6322) @@ -219,6 +219,11 @@ -------- cholesky_banded : Cholesky factorization of a banded matrix + Notes + ----- + + .. versionadded:: 0.8.0 + """ cb = asarray_chkfinite(cb) From scipy-svn at scipy.org Fri Apr 9 23:11:47 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 9 Apr 2010 22:11:47 -0500 (CDT) Subject: [Scipy-svn] r6323 - trunk/doc/release Message-ID: <20100410031147.E8D1B39CAE7@scipy.org> Author: warren.weckesser Date: 2010-04-09 22:11:47 -0500 (Fri, 09 Apr 2010) New Revision: 6323 Modified: trunk/doc/release/0.8.0-notes.rst Log: DOC: Fix typo. Modified: trunk/doc/release/0.8.0-notes.rst =================================================================== --- trunk/doc/release/0.8.0-notes.rst 2010-04-10 03:07:21 UTC (rev 6322) +++ trunk/doc/release/0.8.0-notes.rst 2010-04-10 03:11:47 UTC (rev 6323) @@ -106,8 +106,8 @@ A new function, `sweep_poly`, was added. -New functions and others changes in scipy.linalg ------------------------------------------------- +New functions and other changes in scipy.linalg +----------------------------------------------- The functions `circulant`, `hadamard` and `cho_solve_banded` were added to `scipy.linalg`. From scipy-svn at scipy.org Sat Apr 10 08:24:26 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 10 Apr 2010 07:24:26 -0500 (CDT) Subject: [Scipy-svn] scipy-svn@scipy.org, Private Invitation Message-ID: <20100410122426.8AAA539CB06@scipy.org> 78% OFF Pharmacy Online http://coyback.com 30 Day Money Back Satisfaction Guarantee! From scipy-svn at scipy.org Sun Apr 18 02:38:05 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 18 Apr 2010 01:38:05 -0500 (CDT) Subject: [Scipy-svn] r6324 - branches/0.7.x Message-ID: <20100418063805.B5AAF39CAF1@scipy.org> Author: jarrod.millman Date: 2010-04-18 01:38:05 -0500 (Sun, 18 Apr 2010) New Revision: 6324 Modified: branches/0.7.x/THANKS.txt Log: test Modified: branches/0.7.x/THANKS.txt =================================================================== --- branches/0.7.x/THANKS.txt 2010-04-10 03:11:47 UTC (rev 6323) +++ branches/0.7.x/THANKS.txt 2010-04-18 06:38:05 UTC (rev 6324) @@ -73,4 +73,4 @@ Agilent which gave a genereous donation for support of SciPy. UC Berkeley for providing travel money and hosting numerous sprints. The University of Stellenbosch for funding the development of - the SciKits portal. +the SciKits portal. From scipy-svn at scipy.org Sun Apr 18 03:10:12 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 18 Apr 2010 02:10:12 -0500 (CDT) Subject: [Scipy-svn] r6325 - branches/0.7.x Message-ID: <20100418071012.C601B39CAF1@scipy.org> Author: rgommers Date: 2010-04-18 02:10:12 -0500 (Sun, 18 Apr 2010) New Revision: 6325 Modified: branches/0.7.x/THANKS.txt Log: test Modified: branches/0.7.x/THANKS.txt =================================================================== --- branches/0.7.x/THANKS.txt 2010-04-18 06:38:05 UTC (rev 6324) +++ branches/0.7.x/THANKS.txt 2010-04-18 07:10:12 UTC (rev 6325) @@ -73,4 +73,4 @@ Agilent which gave a genereous donation for support of SciPy. UC Berkeley for providing travel money and hosting numerous sprints. The University of Stellenbosch for funding the development of -the SciKits portal. + the SciKits portal. From scipy-svn at scipy.org Sun Apr 18 03:11:23 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 18 Apr 2010 02:11:23 -0500 (CDT) Subject: [Scipy-svn] r6326 - branches/0.7.x/scipy/weave Message-ID: <20100418071123.F2E7239CAF1@scipy.org> Author: rgommers Date: 2010-04-18 02:11:23 -0500 (Sun, 18 Apr 2010) New Revision: 6326 Modified: branches/0.7.x/scipy/weave/build_tools.py Log: Fix gcc_exists Windows warnings and function itself. Modified: branches/0.7.x/scipy/weave/build_tools.py =================================================================== --- branches/0.7.x/scipy/weave/build_tools.py 2010-04-18 07:10:12 UTC (rev 6325) +++ branches/0.7.x/scipy/weave/build_tools.py 2010-04-18 07:11:23 UTC (rev 6326) @@ -336,19 +336,19 @@ compiler_name = 'unix' return compiler_name -def gcc_exists(name = 'gcc'): - """ Test to make sure gcc is found - - Does this return correct value on win98??? - """ +def gcc_exists(name='gcc'): + """ Test to make sure gcc is found.""" result = 0 cmd = [str(name), '-v'] try: - p = subprocess.Popen(cmd, True, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) + if sys.platform == 'win32': + p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + else: + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) str_result = p.stdout.read() - #print str_result - if 'Reading specs' in str_result: + if 'specs' in str_result: result = 1 except: # This was needed because the msvc compiler messes with From scipy-svn at scipy.org Sun Apr 18 03:23:05 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 18 Apr 2010 02:23:05 -0500 (CDT) Subject: [Scipy-svn] r6327 - branches/0.7.x/doc/source Message-ID: <20100418072305.5F85339CAF1@scipy.org> Author: rgommers Date: 2010-04-18 02:23:05 -0500 (Sun, 18 Apr 2010) New Revision: 6327 Modified: branches/0.7.x/doc/source/conf.py Log: Add micro version number in docs, and extend copyright to 2010. Modified: branches/0.7.x/doc/source/conf.py =================================================================== --- branches/0.7.x/doc/source/conf.py 2010-04-18 07:11:23 UTC (rev 6326) +++ branches/0.7.x/doc/source/conf.py 2010-04-18 07:23:05 UTC (rev 6327) @@ -34,7 +34,7 @@ # General substitutions. project = 'SciPy' -copyright = '2008-2009, The Scipy community' +copyright = '2008-2010, The Scipy community' # The default replacements for |version| and |release|, also used in various # other places throughout the built documents. @@ -42,7 +42,7 @@ # The short X.Y version. version = '0.7' # The full version, including alpha/beta/rc tags. -release = '0.7' +release = '0.7.2' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: From scipy-svn at scipy.org Sun Apr 18 03:23:30 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 18 Apr 2010 02:23:30 -0500 (CDT) Subject: [Scipy-svn] r6328 - branches/0.7.x/scipy/special/tests Message-ID: <20100418072330.AADF039CAF1@scipy.org> Author: rgommers Date: 2010-04-18 02:23:30 -0500 (Sun, 18 Apr 2010) New Revision: 6328 Modified: branches/0.7.x/scipy/special/tests/test_basic.py Log: Fix failing test, -0.0 is not actually equal to 0.0 Modified: branches/0.7.x/scipy/special/tests/test_basic.py =================================================================== --- branches/0.7.x/scipy/special/tests/test_basic.py 2010-04-18 07:23:05 UTC (rev 6327) +++ branches/0.7.x/scipy/special/tests/test_basic.py 2010-04-18 07:23:30 UTC (rev 6328) @@ -346,7 +346,8 @@ def test_nrdtrimn(self): assert_approx_equal(cephes.nrdtrimn(0.5,1,1),1.0) def test_nrdtrisd(self): - assert_equal(cephes.nrdtrisd(0.5,0.5,0.5),0.0) + # abs() because nrdtrisd(0.5,0.5,0.5) returns -0.0, should be +0.0 + assert_equal(np.abs(cephes.nrdtrisd(0.5,0.5,0.5)), 0.0) def test_obl_ang1(self): cephes.obl_ang1(1,1,1,0) @@ -1409,7 +1410,7 @@ 123.70194191713507279, 129.02417238949092824, 134.00114761868422559]), rtol=1e-13) - + jn301 = jn_zeros(301,5) assert_tol_equal(jn301, array([313.59097866698830153, 323.21549776096288280, @@ -1422,7 +1423,7 @@ assert_tol_equal(jn0[260-1], 816.02884495068867280, rtol=1e-13) assert_tol_equal(jn0[280-1], 878.86068707124422606, rtol=1e-13) assert_tol_equal(jn0[300-1], 941.69253065317954064, rtol=1e-13) - + jn10 = jn_zeros(10, 300) assert_tol_equal(jn10[260-1], 831.67668514305631151, rtol=1e-13) assert_tol_equal(jn10[280-1], 894.51275095371316931, rtol=1e-13) @@ -1597,7 +1598,7 @@ an = yn_zeros(4,2) assert_array_almost_equal(an,array([ 5.64515, 9.36162]),5) an = yn_zeros(443,5) - assert_tol_equal(an, [450.13573091578090314, 463.05692376675001542, + assert_tol_equal(an, [450.13573091578090314, 463.05692376675001542, 472.80651546418663566, 481.27353184725625838, 488.98055964441374646], rtol=1e-15) @@ -1651,7 +1652,7 @@ for z in [-1300, -11, -10, -1, 1., 10., 200.5, 401., 600.5, 700.6, 1300, 10003]: yield v, z - + # check half-integers; these are problematic points at least # for cephes/iv for v in 0.5 + arange(-60, 60): @@ -1744,12 +1745,12 @@ assert_tol_equal(iv(-2, 1+0j), 0.1357476697670383) assert_tol_equal(kv(-1, 1+0j), 0.6019072301972347) assert_tol_equal(kv(-2, 1+0j), 1.624838898635178) - + assert_tol_equal(jv(-0.5, 1+0j), 0.43109886801837607952) assert_tol_equal(jv(-0.5, 1+1j), 0.2628946385649065-0.827050182040562j) assert_tol_equal(yv(-0.5, 1+0j), 0.6713967071418031) assert_tol_equal(yv(-0.5, 1+1j), 0.967901282890131+0.0602046062142816j) - + assert_tol_equal(iv(-0.5, 1+0j), 1.231200214592967) assert_tol_equal(iv(-0.5, 1+1j), 0.77070737376928+0.39891821043561j) assert_tol_equal(kv(-0.5, 1+0j), 0.4610685044478945) @@ -1876,8 +1877,8 @@ y=(iv(0,2)+iv(2,2))/2 x = ivp(1,2) assert_almost_equal(x,y,10) - + class TestLaguerre(TestCase): def test_laguerre(self): lag0 = laguerre(0) @@ -2048,7 +2049,7 @@ eps = 1e-7 + 1e-7*abs(x) dp = (pbdv(eta, x + eps)[0] - pbdv(eta, x - eps)[0]) / eps / 2. assert_tol_equal(p[1], dp, rtol=1e-6, atol=1e-6) - + def test_pbvv_gradient(self): x = np.linspace(-4, 4, 8)[:,None] eta = np.linspace(-10, 10, 5)[None,:] @@ -2057,8 +2058,8 @@ eps = 1e-7 + 1e-7*abs(x) dp = (pbvv(eta, x + eps)[0] - pbvv(eta, x - eps)[0]) / eps / 2. assert_tol_equal(p[1], dp, rtol=1e-6, atol=1e-6) - + class TestPolygamma(TestCase): # from Table 6.2 (pg. 271) of A&S def test_polygamma(self): From scipy-svn at scipy.org Sun Apr 18 03:23:44 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 18 Apr 2010 02:23:44 -0500 (CDT) Subject: [Scipy-svn] r6329 - branches/0.7.x Message-ID: <20100418072344.39DEC39CAF1@scipy.org> Author: rgommers Date: 2010-04-18 02:23:44 -0500 (Sun, 18 Apr 2010) New Revision: 6329 Modified: branches/0.7.x/setup.py Log: Update version to 0.7.2rc2 Modified: branches/0.7.x/setup.py =================================================================== --- branches/0.7.x/setup.py 2010-04-18 07:23:30 UTC (rev 6328) +++ branches/0.7.x/setup.py 2010-04-18 07:23:44 UTC (rev 6329) @@ -44,7 +44,7 @@ MINOR = 7 MICRO = 2 ISRELEASED = True -VERSION = '%d.%d.%drc1' % (MAJOR, MINOR, MICRO) +VERSION = '%d.%d.%drc2' % (MAJOR, MINOR, MICRO) # BEFORE importing distutils, remove MANIFEST. distutils doesn't properly # update it when the contents of directories change. From scipy-svn at scipy.org Sun Apr 18 04:11:06 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 18 Apr 2010 03:11:06 -0500 (CDT) Subject: [Scipy-svn] r6330 - tags Message-ID: <20100418081106.2280339C4B4@scipy.org> Author: rgommers Date: 2010-04-18 03:11:05 -0500 (Sun, 18 Apr 2010) New Revision: 6330 Added: tags/0.7.2rc2/ Log: Create tag 0.7.2rc2 Copied: tags/0.7.2rc2 (from rev 6329, branches/0.7.x) From scipy-svn at scipy.org Sun Apr 18 04:13:45 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 18 Apr 2010 03:13:45 -0500 (CDT) Subject: [Scipy-svn] r6331 - branches/0.7.x/scipy Message-ID: <20100418081345.3D15F39C4B4@scipy.org> Author: rgommers Date: 2010-04-18 03:13:45 -0500 (Sun, 18 Apr 2010) New Revision: 6331 Modified: branches/0.7.x/scipy/__init__.py Log: Suppress specific binary incompatibility warnings from Cython code. Modified: branches/0.7.x/scipy/__init__.py =================================================================== --- branches/0.7.x/scipy/__init__.py 2010-04-18 08:11:05 UTC (rev 6330) +++ branches/0.7.x/scipy/__init__.py 2010-04-18 08:13:45 UTC (rev 6331) @@ -29,6 +29,12 @@ "scipy (detected version %s)" % _num.version.version, UserWarning) +# Suppress warnings due to a known harmless change in numpy 1.4.1 +if maxver == 1 and minver >= 4: + import warnings + warnings.filterwarnings(action='ignore', message='.*numpy.dtype size changed.*') + warnings.filterwarnings(action='ignore', message='.*numpy.flatiter size changed.*') + __all__ += ['oldnumeric']+_num.__all__ __all__ += ['randn', 'rand', 'fft', 'ifft'] From scipy-svn at scipy.org Sun Apr 18 10:16:05 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 18 Apr 2010 09:16:05 -0500 (CDT) Subject: [Scipy-svn] r6332 - branches/0.7.x/scipy Message-ID: <20100418141605.D58FE39CAF7@scipy.org> Author: rgommers Date: 2010-04-18 09:16:05 -0500 (Sun, 18 Apr 2010) New Revision: 6332 Modified: branches/0.7.x/scipy/__init__.py Log: BUG: fix typo that breaks import of main package. Modified: branches/0.7.x/scipy/__init__.py =================================================================== --- branches/0.7.x/scipy/__init__.py 2010-04-18 08:13:45 UTC (rev 6331) +++ branches/0.7.x/scipy/__init__.py 2010-04-18 14:16:05 UTC (rev 6332) @@ -30,7 +30,7 @@ UserWarning) # Suppress warnings due to a known harmless change in numpy 1.4.1 -if maxver == 1 and minver >= 4: +if majver == 1 and minver >= 4: import warnings warnings.filterwarnings(action='ignore', message='.*numpy.dtype size changed.*') warnings.filterwarnings(action='ignore', message='.*numpy.flatiter size changed.*') From scipy-svn at scipy.org Sun Apr 18 10:16:18 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 18 Apr 2010 09:16:18 -0500 (CDT) Subject: [Scipy-svn] r6333 - branches/0.7.x Message-ID: <20100418141618.D4BC039CAF7@scipy.org> Author: rgommers Date: 2010-04-18 09:16:18 -0500 (Sun, 18 Apr 2010) New Revision: 6333 Modified: branches/0.7.x/pavement.py Log: REL: Update end point for Changelog. Modified: branches/0.7.x/pavement.py =================================================================== --- branches/0.7.x/pavement.py 2010-04-18 14:16:05 UTC (rev 6332) +++ branches/0.7.x/pavement.py 2010-04-18 14:16:18 UTC (rev 6333) @@ -110,7 +110,7 @@ # Start/end of the log (from git) LOG_START = 'svn/tags/0.7.1' -LOG_END = '0.7.x' +LOG_END = 'svn/0.7.x' # Virtualenv bootstrap stuff BOOTSTRAP_DIR = "bootstrap" From scipy-svn at scipy.org Sun Apr 18 11:07:00 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 18 Apr 2010 10:07:00 -0500 (CDT) Subject: [Scipy-svn] r6334 - branches/0.7.x Message-ID: <20100418150700.B2E6539CAF7@scipy.org> Author: rgommers Date: 2010-04-18 10:07:00 -0500 (Sun, 18 Apr 2010) New Revision: 6334 Modified: branches/0.7.x/setup.py Log: REL: set version to 0.7.2rc3. Modified: branches/0.7.x/setup.py =================================================================== --- branches/0.7.x/setup.py 2010-04-18 14:16:18 UTC (rev 6333) +++ branches/0.7.x/setup.py 2010-04-18 15:07:00 UTC (rev 6334) @@ -44,7 +44,7 @@ MINOR = 7 MICRO = 2 ISRELEASED = True -VERSION = '%d.%d.%drc2' % (MAJOR, MINOR, MICRO) +VERSION = '%d.%d.%drc3' % (MAJOR, MINOR, MICRO) # BEFORE importing distutils, remove MANIFEST. distutils doesn't properly # update it when the contents of directories change. From scipy-svn at scipy.org Thu Apr 22 06:58:09 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 22 Apr 2010 05:58:09 -0500 (CDT) Subject: [Scipy-svn] r6335 - tags Message-ID: <20100422105809.0C0AD39CAE6@scipy.org> Author: rgommers Date: 2010-04-22 05:58:08 -0500 (Thu, 22 Apr 2010) New Revision: 6335 Added: tags/0.7.2rc3/ Log: Create tag 0.7.2rc3 Copied: tags/0.7.2rc3 (from rev 6334, branches/0.7.x) From scipy-svn at scipy.org Thu Apr 22 07:00:00 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 22 Apr 2010 06:00:00 -0500 (CDT) Subject: [Scipy-svn] r6336 - branches/0.7.x Message-ID: <20100422110000.6FB3B39CAE6@scipy.org> Author: rgommers Date: 2010-04-22 06:00:00 -0500 (Thu, 22 Apr 2010) New Revision: 6336 Modified: branches/0.7.x/setup.py Log: REL: Set version to 0.7.2. Modified: branches/0.7.x/setup.py =================================================================== --- branches/0.7.x/setup.py 2010-04-22 10:58:08 UTC (rev 6335) +++ branches/0.7.x/setup.py 2010-04-22 11:00:00 UTC (rev 6336) @@ -44,7 +44,7 @@ MINOR = 7 MICRO = 2 ISRELEASED = True -VERSION = '%d.%d.%drc3' % (MAJOR, MINOR, MICRO) +VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO) # BEFORE importing distutils, remove MANIFEST. distutils doesn't properly # update it when the contents of directories change. From scipy-svn at scipy.org Thu Apr 22 09:41:25 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 22 Apr 2010 08:41:25 -0500 (CDT) Subject: [Scipy-svn] r6337 - tags Message-ID: <20100422134125.B818939CAE6@scipy.org> Author: rgommers Date: 2010-04-22 08:41:25 -0500 (Thu, 22 Apr 2010) New Revision: 6337 Added: tags/0.7.2/ Log: Create tag 0.7.2 Copied: tags/0.7.2 (from rev 6336, branches/0.7.x) From scipy-svn at scipy.org Thu Apr 22 20:21:58 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 22 Apr 2010 19:21:58 -0500 (CDT) Subject: [Scipy-svn] r6338 - in trunk/scipy/signal: . tests Message-ID: <20100423002158.26C2439CAE6@scipy.org> Author: warren.weckesser Date: 2010-04-22 19:21:57 -0500 (Thu, 22 Apr 2010) New Revision: 6338 Modified: trunk/scipy/signal/tests/test_waveforms.py trunk/scipy/signal/waveforms.py Log: BUG: signal: Handle integer arguments in chirp() and gausspulse() correctly (ticket #1156). Modified: trunk/scipy/signal/tests/test_waveforms.py =================================================================== --- trunk/scipy/signal/tests/test_waveforms.py 2010-04-22 13:41:25 UTC (rev 6337) +++ trunk/scipy/signal/tests/test_waveforms.py 2010-04-23 00:21:57 UTC (rev 6338) @@ -174,7 +174,49 @@ t = np.linspace(0, t1, 10) assert_raises(ValueError, waveforms.chirp, t, f0, t1, f1, method) + def test_integer_t1(self): + f0 = 10.0 + f1 = 20.0 + t = np.linspace(-1, 1, 11) + t1 = 3.0 + float_result = waveforms.chirp(t, f0, t1, f1) + t1 = 3 + int_result = waveforms.chirp(t, f0, t1, f1) + err_msg = "Integer input 't1=3' gives wrong result" + assert_equal(int_result, float_result, err_msg=err_msg) + def test_integer_f0(self): + f1 = 20.0 + t1 = 3.0 + t = np.linspace(-1, 1, 11) + f0 = 10.0 + float_result = waveforms.chirp(t, f0, t1, f1) + f0 = 10 + int_result = waveforms.chirp(t, f0, t1, f1) + err_msg = "Integer input 'f0=10' gives wrong result" + assert_equal(int_result, float_result, err_msg=err_msg) + + def test_integer_f1(self): + f0 = 10.0 + t1 = 3.0 + t = np.linspace(-1, 1, 11) + f1 = 20.0 + float_result = waveforms.chirp(t, f0, t1, f1) + f1 = 20 + int_result = waveforms.chirp(t, f0, t1, f1) + err_msg = "Integer input 'f1=20' gives wrong result" + assert_equal(int_result, float_result, err_msg=err_msg) + + def test_integer_all(self): + f0 = 10 + t1 = 3 + f1 = 20 + t = np.linspace(-1, 1, 11) + float_result = waveforms.chirp(t, float(f0), float(t1), float(f1)) + int_result = waveforms.chirp(t, f0, t1, f1) + err_msg = "Integer input 'f0=10, t1=3, f1=20' gives wrong result" + assert_equal(int_result, float_result, err_msg=err_msg) + class TestSweepPoly(TestCase): def test_sweep_poly_quad1(self): @@ -242,5 +284,33 @@ abserr = np.max(np.abs(f - expected)) assert_(abserr < 1e-6) + +class TestGaussPulse(TestCase): + + def test_integer_fc(self): + float_result = waveforms.gausspulse('cutoff', fc=1000.0) + int_result = waveforms.gausspulse('cutoff', fc=1000) + err_msg = "Integer input 'fc=1000' gives wrong result" + assert_equal(int_result, float_result, err_msg=err_msg) + + def test_integer_bw(self): + float_result = waveforms.gausspulse('cutoff', bw=1.0) + int_result = waveforms.gausspulse('cutoff', bw=1) + err_msg = "Integer input 'bw=1' gives wrong result" + assert_equal(int_result, float_result, err_msg=err_msg) + + def test_integer_bwr(self): + float_result = waveforms.gausspulse('cutoff', bwr=-6.0) + int_result = waveforms.gausspulse('cutoff', bwr=-6) + err_msg = "Integer input 'bwr=-6' gives wrong result" + assert_equal(int_result, float_result, err_msg=err_msg) + + def test_integer_tpr(self): + float_result = waveforms.gausspulse('cutoff', tpr=-60.0) + int_result = waveforms.gausspulse('cutoff', tpr=-60) + err_msg = "Integer input 'tpr=-60' gives wrong result" + assert_equal(int_result, float_result, err_msg=err_msg) + + if __name__ == "__main__": run_module_suite() Modified: trunk/scipy/signal/waveforms.py =================================================================== --- trunk/scipy/signal/waveforms.py 2010-04-22 13:41:25 UTC (rev 6337) +++ trunk/scipy/signal/waveforms.py 2010-04-23 00:21:57 UTC (rev 6338) @@ -116,18 +116,18 @@ # exp(-a t^2) <-> sqrt(pi/a) exp(-pi^2/a * f^2) = g(f) - ref = pow(10, bwr/ 20) + ref = pow(10.0, bwr / 20.0) # fdel = fc*bw/2: g(fdel) = ref --- solve this for a # # pi^2/a * fc^2 * bw^2 /4=-log(ref) - a = -(pi*fc*bw)**2 / (4*log(ref)) + a = -(pi*fc*bw)**2 / (4.0*log(ref)) if t == 'cutoff': # compute cut_off point # Solve exp(-a tc**2) = tref for tc # tc = sqrt(-log(tref) / a) where tref = 10^(tpr/20) if tpr >= 0: raise ValueError, "Reference level for time cutoff must be < 0 dB" - tref = pow(10, tpr / 20) + tref = pow(10.0, tpr / 20.0) return sqrt(-log(tref)/a) yenv = exp(-a*t*t) @@ -242,6 +242,9 @@ chirp_phase for a description of the arguments. """ + f0 = float(f0) + t1 = float(t1) + f1 = float(f1) if method in ['linear', 'lin', 'li']: beta = (f1 - f0) / t1 phase = 2*pi * (f0*t + 0.5*beta*t*t) From scipy-svn at scipy.org Sat Apr 24 16:59:32 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 24 Apr 2010 15:59:32 -0500 (CDT) Subject: [Scipy-svn] r6339 - in trunk/scipy/ndimage: . src tests Message-ID: <20100424205932.5EA4339CB04@scipy.org> Author: stefan Date: 2010-04-24 15:59:32 -0500 (Sat, 24 Apr 2010) New Revision: 6339 Modified: trunk/scipy/ndimage/measurements.py trunk/scipy/ndimage/setup.py trunk/scipy/ndimage/src/nd_image.c trunk/scipy/ndimage/tests/test_ndimage.py Log: Merge branch 'ndimage_measurements_rewrite' of http://broad.mit.edu/~thouis/scipy into measurements Modified: trunk/scipy/ndimage/measurements.py =================================================================== --- trunk/scipy/ndimage/measurements.py 2010-04-23 00:21:57 UTC (rev 6338) +++ trunk/scipy/ndimage/measurements.py 2010-04-24 20:59:32 UTC (rev 6339) @@ -34,6 +34,7 @@ import _ni_support import _nd_image import morphology +import time def label(input, structure = None, output = None): """ @@ -180,7 +181,160 @@ max_label = input.max() return _nd_image.find_objects(input, max_label) -def sum(input, labels=None, index=None): +def labeled_comprehension(input, labels, index, func, out_dtype, default, pass_positions=False): + '''Roughly equivalent to [func(input[labels == i]) for i in index]. + + Special cases: + - index a scalar: returns a single value + - index is None: returns func(inputs[labels > 0]) + + func will be called with linear indices as a second argument if + pass_positions is True. + ''' + + as_scalar = numpy.isscalar(index) + input = numpy.asarray(input) + + if pass_positions: + positions = numpy.arange(input.size).reshape(input.shape) + + if labels is None: + if index is not None: + raise ValueError, "index without defined labels" + if not pass_positions: + return func(input.ravel()) + else: + return func(input.ravel(), positions.ravel()) + + try: + input, labels = numpy.broadcast_arrays(input, labels) + except ValueError: + raise ValueError, "input and labels must have the same shape (excepting dimensions with width 1)" + + if index is None: + if not pass_positions: + return func(input[labels > 0]) + else: + return func(input[labels > 0], positions[labels > 0]) + + index = numpy.atleast_1d(index) + if any(index.astype(labels.dtype).astype(index.dtype) != index): + raise ValueError, "Cannot convert index values from <%s> to <%s> (labels' type) without loss of precision"%(index.dtype, labels.dtype) + index = index.astype(labels.dtype) + + # optimization: find min/max in index, and select those parts of labels, input, and positions + lo = index.min() + hi = index.max() + mask = (labels >= lo) & (labels <= hi) + + # this also ravels the arrays + labels = labels[mask] + input = input[mask] + if pass_positions: + positions = positions[mask] + + # sort everything by labels + label_order = labels.argsort() + labels = labels[label_order] + input = input[label_order] + if pass_positions: + positions = positions[label_order] + + index_order = index.argsort() + sorted_index = index[index_order] + + def do_map(inputs, output): + '''labels must be sorted''' + + nlabels = labels.size + nidx = sorted_index.size + + # Find boundaries for each stretch of constant labels + # This could be faster, but we already paid N log N to sort labels. + lo = numpy.searchsorted(labels, sorted_index, side='left') + hi = numpy.searchsorted(labels, sorted_index, side='right') + + for i, l, h in zip(range(nidx), lo, hi): + if l == h: + continue + idx = sorted_index[i] + output[i] = func(*[inp[l:h] for inp in inputs]) + + temp = numpy.empty(index.shape, out_dtype) + temp[:] = default + if not pass_positions: + do_map([input], temp) + else: + do_map([input, positions], temp) + output = numpy.zeros(index.shape, out_dtype) + output[index_order] = temp + + if as_scalar: + output = output[0] + + return output + +def _stats(input, labels = None, index = None, do_sum2=False): + '''returns count, sum, and optionally sum^2 by label''' + + def single_group(vals): + if do_sum2: + return vals.size, vals.sum(), (vals * vals.conjugate()).sum() + else: + return vals.size, vals.sum() + + if labels is None: + return single_group(input) + + # ensure input and labels match sizes + input, labels = numpy.broadcast_arrays(input, labels) + + if index is None: + return single_group(input[labels > 0]) + + if numpy.isscalar(index): + return single_group(input[labels == index]) + + # remap labels to unique integers if necessary, or if the largest + # label is larger than the number of values. + if ((not numpy.issubdtype(labels.dtype, numpy.int)) or + (labels.min() < 0) or (labels.max() > labels.size)): + unique_labels, new_labels = numpy.unique1d(labels, return_inverse=True) + + counts = numpy.bincount(new_labels) + sums = numpy.bincount(new_labels, weights=input.ravel()) + if do_sum2: + sums2 = numpy.bincount(new_labels, weights=(input * input.conjugate()).ravel()) + + idxs = numpy.searchsorted(unique_labels, index) + # make all of idxs valid + idxs[idxs >= unique_labels.size] = 0 + found = (unique_labels[idxs] == index) + else: + # labels are an integer type, and there aren't too many, so + # call bincount directly. + counts = numpy.bincount(labels.ravel()) + sums = numpy.bincount(labels.ravel(), weights=input.ravel()) + if do_sum2: + sums2 = numpy.bincount(labels.ravel(), weights=(input * input.conjugate()).ravel()) + + # make sure all index values are valid + idxs = numpy.asanyarray(index, numpy.int).copy() + found = (idxs >= 0) & (idxs < counts.size) + idxs[~ found] = 0 + + counts = counts[idxs] + counts[~ found] = 0 + sums = sums[idxs] + sums[~ found] = 0 + if not do_sum2: + return (counts, sums) + sums2 = sums2[idxs] + sums2[~ found] = 0 + return (counts, sums, sums2) + + +def sum(input, labels = None, index = None): """Calculate the sum of the values of the array. :Parameters: @@ -201,248 +355,253 @@ [1.0, 5.0] """ - input = numpy.asarray(input) - if numpy.iscomplexobj(input): - raise TypeError, 'Complex type not supported' - if labels is not None: - labels = numpy.asarray(labels) - labels = _broadcast(labels, input.shape) + count, sum = _stats(input, labels, index) + return sum - if labels.shape != input.shape: - raise RuntimeError, 'input and labels shape are not equal' - if index is not None: - T = getattr(index,'dtype',numpy.int32) - if T not in [numpy.int8, numpy.int16, numpy.int32, - numpy.uint8, numpy.uint16, numpy.bool]: - raise ValueError("Invalid index type") - index = numpy.asarray(index,dtype=T) - return _nd_image.statistics(input, labels, index, 0) +def mean(input, labels = None, index = None): + """Calculate the mean of the values of an array at labels. + Labels must be None or an array that can be broadcast to the input. -def mean(input, labels = None, index = None): - """Calculate the mean of the values of the array. - - The index parameter is a single label number or a sequence of - label numbers of the objects to be measured. If index is None, all - values are used where labels is larger than zero. + Index must be None, a single label or sequence of labels. If + None, the mean for all values where label is greater than 0 is + calculated. """ - input = numpy.asarray(input) - if numpy.iscomplexobj(input): - raise TypeError, 'Complex type not supported' - if labels is not None: - labels = numpy.asarray(labels) - labels = _broadcast(labels, input.shape) - if labels.shape != input.shape: - raise RuntimeError, 'input and labels shape are not equal' - return _nd_image.statistics(input, labels, index, 1) + count, sum = _stats(input, labels, index) + return sum / numpy.asanyarray(count).astype(numpy.float) - def variance(input, labels = None, index = None): - """Calculate the variance of the values of the array. + """Calculate the variance of the values of an array at labels. - The index parameter is a single label number or a sequence of - label numbers of the objects to be measured. If index is None, all - values are used where labels is larger than zero. + Labels must be None or an array of the same dimensions as the input. + + Index must be None, a single label or sequence of labels. If + none, all values where label is greater than zero are used. """ - input = numpy.asarray(input) - if numpy.iscomplexobj(input): - raise TypeError, 'Complex type not supported' - if labels is not None: - labels = numpy.asarray(labels) - labels = _broadcast(labels, input.shape) - if labels.shape != input.shape: - raise RuntimeError, 'input and labels shape are not equal' - return _nd_image.statistics(input, labels, index, 2) + count, sum, sum2 = _stats(input, labels, index, do_sum2=True) + mean = sum / numpy.asanyarray(count).astype(numpy.float) + mean2 = sum2 / numpy.asanyarray(count).astype(numpy.float) + return mean2 - (mean * mean.conjugate()) def standard_deviation(input, labels = None, index = None): - """Calculate the standard deviation of the values of the array. + """Calculate the standard deviation of the values of an array at labels. - The index parameter is a single label number or a sequence of - label numbers of the objects to be measured. If index is None, all - values are used where labels is larger than zero. + Labels must be None or an array of the same dimensions as the input. + + Index must be None, a single label or sequence of labels. If + none, all values where label is greater than zero are used. """ - var = variance(input, labels, index) - if (isinstance(var, types.ListType)): - return [math.sqrt(x) for x in var] - else: - return math.sqrt(var) + return numpy.sqrt(variance(input, labels, index)) -def minimum(input, labels = None, index = None): - """Calculate the minimum of the values of the array. +def _select(input, labels = None, index = None, find_min=False, find_max=False, find_min_positions=False, find_max_positions=False): + '''returns min, max, or both, plus positions if requested''' - The index parameter is a single label number or a sequence of - label numbers of the objects to be measured. If index is None, all - values are used where labels is larger than zero. - """ - input = numpy.asarray(input) - if numpy.iscomplexobj(input): - raise TypeError, 'Complex type not supported' - if labels is not None: - labels = numpy.asarray(labels) - labels = _broadcast(labels, input.shape) - if labels.shape != input.shape: - raise RuntimeError, 'input and labels shape are not equal' - return _nd_image.statistics(input, labels, index, 3) + find_positions = find_min_positions or find_max_positions + positions = None + if find_positions: + positions = numpy.arange(input.size).reshape(input.shape) + def single_group(vals, positions): + result = [] + if find_min: + result += [vals.min()] + if find_min_positions: + result += [positions[vals == vals.min()][0]] + if find_max: + result += [vals.max()] + if find_max_positions: + result += [positions[vals == vals.max()][0]] + return result + + if labels is None: + return single_group(input, positions) -def maximum(input, labels=None, index=None): - """Return the maximum input value. + # ensure input and labels match sizes + input, labels = numpy.broadcast_arrays(input, labels) - The index parameter is a single label number or a sequence of - label numbers of the objects to be measured. If index is None, all - values are used where labels is larger than zero. + if index is None: + mask = (labels > 0) + return single_group(input[mask], positions[mask] if find_positions else None) - """ - input = numpy.asarray(input) - if numpy.iscomplexobj(input): - raise TypeError, 'Complex type not supported' - if labels is not None: - labels = numpy.asarray(labels) - labels = _broadcast(labels, input.shape) + if numpy.isscalar(index): + mask = (labels == index) + return single_group(input[mask], positions[mask] if find_positions else None) - if labels.shape != input.shape: - raise RuntimeError, 'input and labels shape are not equal' - return _nd_image.statistics(input, labels, index, 4) + order = input.ravel().argsort() + input = input.ravel()[order] + labels = labels.ravel()[order] + if find_positions: + positions = positions.ravel()[order] + # remap labels to unique integers if necessary, or if the largest + # label is larger than the number of values. + if ((not numpy.issubdtype(labels.dtype, numpy.int)) or + (labels.min() < 0) or (labels.max() > labels.size)): + # remap labels, and indexes + unique_labels, labels = numpy.unique1d(labels, return_inverse=True) + idxs = numpy.searchsorted(unique_labels, index) -def _index_to_position(index, shape): - """Convert a linear index to a position""" - if len(shape) > 0: - pos = [] - stride = numpy.multiply.reduce(shape) - for size in shape: - stride = stride // size - pos.append(index // stride) - index -= pos[-1] * stride - return tuple(pos) + # make all of idxs valid + idxs[idxs >= unique_labels.size] = 0 + found = (unique_labels[idxs] == index) else: - return 0 + # labels are an integer type, and there aren't too many. + idxs = numpy.asanyarray(index, numpy.int).copy() + found = (idxs >= 0) & (idxs <= labels.max()) + + idxs[~ found] = labels.max() + 1 + result = [] + if find_min: + mins = numpy.zeros(labels.max() + 2, input.dtype) + mins[labels[::-1]] = input[::-1] + result += [mins[idxs]] + if find_min_positions: + minpos = numpy.zeros(labels.max() + 2) + minpos[labels[::-1]] = positions[::-1] + result += [minpos[idxs]] + if find_max: + maxs = numpy.zeros(labels.max() + 2, input.dtype) + maxs[labels] = input + result += [maxs[idxs]] + if find_max_positions: + maxpos = numpy.zeros(labels.max() + 2) + maxpos[labels] = positions + result += [maxpos[idxs]] + return result +def minimum(input, labels = None, index = None): + """Calculate the minimum of the values of an array at labels. + + Labels must be None or an array of the same dimensions as the input. + + Index must be None, a single label or sequence of labels. If + none, all values where label is greater than zero are used. + """ + return _select(input, labels, index, find_min=True)[0] + +def maximum(input, labels = None, index = None): + """Calculate the maximum of the values of an array at labels. + + Labels must be None or an array of the same dimensions as the input. + + Index must be None, a single label or sequence of labels. If + none, all values where label is greater than zero are used. + """ + return _select(input, labels, index, find_max=True)[0] + def minimum_position(input, labels = None, index = None): - """Find the position of the minimum of the values of the array. + """Find the positions of the minimums of the values of an array at labels. - The index parameter is a single label number or a sequence of - label numbers of the objects to be measured. If index is None, all - values are used where labels is larger than zero. + Labels must be None or an array of the same dimensions as the input. + + Index must be None, a single label or sequence of labels. If + none, all values where label is greater than zero are used. """ - input = numpy.asarray(input) - if numpy.iscomplexobj(input): - raise TypeError, 'Complex type not supported' - if labels is not None: - labels = numpy.asarray(labels) - labels = _broadcast(labels, input.shape) + + dims = numpy.array(numpy.asarray(input).shape) + # see numpy.unravel_index to understand this line. + dim_prod = numpy.cumprod([1] + list(dims[:0:-1]))[::-1] - if labels.shape != input.shape: - raise RuntimeError, 'input and labels shape are not equal' - pos = _nd_image.statistics(input, labels, index, 5) - if (isinstance(pos, types.ListType)): - return [_index_to_position(x, input.shape) for x in pos] - else: - return _index_to_position(pos, input.shape) + result = _select(input, labels, index, find_min_positions=True)[0] + if numpy.isscalar(result): + return tuple((result // dim_prod) % dims) + return [tuple(v) for v in (result.reshape(-1, 1) // dim_prod) % dims] + def maximum_position(input, labels = None, index = None): - """Find the position of the maximum of the values of the array. + """Find the positions of the maximums of the values of an array at labels. - The index parameter is a single label number or a sequence of - label numbers of the objects to be measured. If index is None, all - values are used where labels is larger than zero. + Labels must be None or an array of the same dimensions as the input. + + Index must be None, a single label or sequence of labels. If + none, all values where label is greater than zero are used. """ - input = numpy.asarray(input) - if numpy.iscomplexobj(input): - raise TypeError, 'Complex type not supported' - if labels is not None: - labels = numpy.asarray(labels) - labels = _broadcast(labels, input.shape) + + dims = numpy.array(numpy.asarray(input).shape) + # see numpy.unravel_index to understand this line. + dim_prod = numpy.cumprod([1] + list(dims[:0:-1]))[::-1] - if labels.shape != input.shape: - raise RuntimeError, 'input and labels shape are not equal' - pos = _nd_image.statistics(input, labels, index, 6) - if (isinstance(pos, types.ListType)): - return [_index_to_position(x, input.shape) for x in pos] - else: - return _index_to_position(pos, input.shape) + result = _select(input, labels, index, find_max_positions=True)[0] + if numpy.isscalar(result): + return tuple((result // dim_prod) % dims) + return [tuple(v) for v in (result.reshape(-1, 1) // dim_prod) % dims] + def extrema(input, labels = None, index = None): - """Calculate the minimum, the maximum and their positions of the - values of the array. + """Calculate the minimums and maximums of the values of an array + at labels, along with their positions. - The index parameter is a single label number or a sequence of - label numbers of the objects to be measured. If index is None, all - values are used where labels is larger than zero. + Labels must be None or an array of the same dimensions as the input. + + Index must be None, a single label or sequence of labels. If + none, all values where label is greater than zero are used. + + Returns: minimums, maximums, min_positions, max_positions """ - input = numpy.asarray(input) - if numpy.iscomplexobj(input): - raise TypeError, 'Complex type not supported' - if labels is not None: - labels = numpy.asarray(labels) - labels = _broadcast(labels, input.shape) + + dims = numpy.array(numpy.asarray(input).shape) + # see numpy.unravel_index to understand this line. + dim_prod = numpy.cumprod([1] + list(dims[:0:-1]))[::-1] - if labels.shape != input.shape: - raise RuntimeError, 'input and labels shape are not equal' + minimums, min_positions, maximums, max_positions = _select(input, labels, index, + find_min=True, find_max=True, + find_min_positions=True, find_max_positions=True) - min, max, minp, maxp = _nd_image.statistics(input, labels, index, 7) - if (isinstance(minp, types.ListType)): - minp = [_index_to_position(x, input.shape) for x in minp] - maxp = [_index_to_position(x, input.shape) for x in maxp] - else: - minp = _index_to_position(minp, input.shape) - maxp = _index_to_position(maxp, input.shape) - return min, max, minp, maxp + if numpy.isscalar(minimums): + return minimums, maximums, tuple((min_positions // dim_prod) % dims), tuple((max_positions // dim_prod) % dims) + min_positions = [tuple(v) for v in (min_positions.reshape(-1, 1) // dim_prod) % dims] + max_positions = [tuple(v) for v in (max_positions.reshape(-1, 1) // dim_prod) % dims] + return minimums, maximums, min_positions, max_positions + def center_of_mass(input, labels = None, index = None): - """Calculate the center of mass of of the array. + """Calculate the center of mass of the values of an array at labels. - The index parameter is a single label number or a sequence of - label numbers of the objects to be measured. If index is None, all - values are used where labels is larger than zero. + Labels must be None or an array of the same dimensions as the input. + + Index must be None, a single label or sequence of labels. If + none, all values where label is greater than zero are used. """ - input = numpy.asarray(input) - if numpy.iscomplexobj(input): - raise TypeError, 'Complex type not supported' - if labels is not None: - labels = numpy.asarray(labels) - labels = _broadcast(labels, input.shape) - if labels.shape != input.shape: - raise RuntimeError, 'input and labels shape are not equal' - return _nd_image.center_of_mass(input, labels, index) + normalizer = sum(input, labels, index) + grids = numpy.ogrid[[slice(0, i) for i in input.shape]] + results = [sum(input * grids[dir].astype(float), labels, index) / normalizer for dir in range(input.ndim)] + + if numpy.isscalar(results[0]): + return tuple(results) + return [tuple(v) for v in numpy.array(results).T] + def histogram(input, min, max, bins, labels = None, index = None): - """Calculate a histogram of of the array. + """Calculate the histogram of the values of an array at labels. - The histogram is defined by its minimum and maximum value and the + Labels must be None or an array of the same dimensions as the input. + + The histograms are defined by the minimum and maximum values and the number of bins. - The index parameter is a single label number or a sequence of - label numbers of the objects to be measured. If index is None, all - values are used where labels is larger than zero. + Index must be None, a single label or sequence of labels. If + none, all values where label is greater than zero are used. """ - input = numpy.asarray(input) - if numpy.iscomplexobj(input): - raise TypeError, 'Complex type not supported' - if labels is not None: - labels = numpy.asarray(labels) - labels = _broadcast(labels, input.shape) + + _bins = numpy.linspace(min, max, bins + 1) - if labels.shape != input.shape: - raise RuntimeError, 'input and labels shape are not equal' - if bins < 1: - raise RuntimeError, 'number of bins must be >= 1' - if min >= max: - raise RuntimeError, 'min must be < max' - return _nd_image.histogram(input, min, max, bins, labels, index) + def _hist(vals): + return numpy.histogram(vals, _bins)[0] + return labeled_comprehension(input, labels, index, _hist, object, None, pass_positions=False) + def watershed_ift(input, markers, structure = None, output = None): """Apply watershed from markers using a iterative forest transform algorithm. @@ -489,23 +648,3 @@ output, return_value = _ni_support._get_output(output, input) _nd_image.watershed_ift(input, markers, structure, output) return return_value - -def _broadcast(arr, sshape): - """Return broadcast view of arr, else return None.""" - ashape = arr.shape - return_value = numpy.zeros(sshape, arr.dtype) - # Just return arr if they have the same shape - if sshape == ashape: - return arr - srank = len(sshape) - arank = len(ashape) - - aslices = [] - sslices = [] - for i in range(arank): - aslices.append(slice(0, ashape[i], 1)) - - for i in range(srank): - sslices.append(slice(0, sshape[i], 1)) - return_value[sslices] = arr[aslices] - return return_value Modified: trunk/scipy/ndimage/setup.py =================================================================== --- trunk/scipy/ndimage/setup.py 2010-04-23 00:21:57 UTC (rev 6338) +++ trunk/scipy/ndimage/setup.py 2010-04-24 20:59:32 UTC (rev 6339) @@ -12,6 +12,7 @@ "src/ni_measure.c", "src/ni_morphology.c","src/ni_support.c"], include_dirs=['src']+[get_include()], + extra_compile_args=['-Wall'], ) config.add_data_dir('tests') Modified: trunk/scipy/ndimage/src/nd_image.c =================================================================== --- trunk/scipy/ndimage/src/nd_image.c 2010-04-23 00:21:57 UTC (rev 6338) +++ trunk/scipy/ndimage/src/nd_image.c 2010-04-24 20:59:32 UTC (rev 6339) @@ -731,399 +731,6 @@ return PyErr_Occurred() ? NULL : Py_BuildValue(""); } -static int _NI_GetIndices(PyObject* indices_object, - maybelong** result_indices, maybelong* min_label, - maybelong* max_label, maybelong* n_results) -{ - maybelong *indices = NULL, n_indices, ii; - - if (indices_object == Py_None) { - *min_label = -1; - *n_results = 1; - } else { - n_indices = NI_ObjectToLongSequenceAndLength(indices_object, &indices); - if (n_indices < 0) - goto exit; - if (n_indices < 1) { - PyErr_SetString(PyExc_RuntimeError, "no correct indices provided"); - goto exit; - } else { - *min_label = *max_label = indices[0]; - if (*min_label < 0) { - PyErr_SetString(PyExc_RuntimeError, - "negative indices not allowed"); - goto exit; - } - for(ii = 1; ii < n_indices; ii++) { - if (indices[ii] < 0) { - PyErr_SetString(PyExc_RuntimeError, - "negative indices not allowed"); - goto exit; - } - if (indices[ii] < *min_label) - *min_label = indices[ii]; - if (indices[ii] > *max_label) - *max_label = indices[ii]; - } - *result_indices = (maybelong*)malloc((*max_label - *min_label + 1) * - sizeof(maybelong)); - if (!*result_indices) { - PyErr_NoMemory(); - goto exit; - } - for(ii = 0; ii < *max_label - *min_label + 1; ii++) - (*result_indices)[ii] = -1; - *n_results = 0; - for(ii = 0; ii < n_indices; ii++) { - if ((*result_indices)[indices[ii] - *min_label] >= 0) { - PyErr_SetString(PyExc_RuntimeError, "duplicate index"); - goto exit; - } - (*result_indices)[indices[ii] - *min_label] = ii; - ++(*n_results); - } - } - } - exit: - if (indices) - free(indices); - return PyErr_Occurred() == NULL; -} - - -PyObject* _NI_BuildMeasurementResultArrayObject(maybelong n_results, - PyArrayObject** values) -{ - PyObject *result = NULL; - if (n_results > 1) { - result = PyList_New(n_results); - if (result) { - maybelong ii; - for(ii = 0; ii < n_results; ii++) { - PyList_SET_ITEM(result, ii, (PyObject*)values[ii]); - Py_XINCREF(values[ii]); - } - } - } else { - result = (PyObject*)values[0]; - Py_XINCREF(values[0]); - } - return result; -} - - -PyObject* _NI_BuildMeasurementResultDouble(maybelong n_results, - double* values) -{ - PyObject *result = NULL; - if (n_results > 1) { - result = PyList_New(n_results); - if (result) { - int ii; - for(ii = 0; ii < n_results; ii++) { - PyObject* val = PyFloat_FromDouble(values[ii]); - if (!val) { - Py_XDECREF(result); - return NULL; - } - PyList_SET_ITEM(result, ii, val); - } - } - } else { - result = Py_BuildValue("d", values[0]); - } - return result; -} - - -PyObject* _NI_BuildMeasurementResultDoubleTuple(maybelong n_results, - int tuple_size, double* values) -{ - PyObject *result = NULL; - maybelong ii; - int jj; - - if (n_results > 1) { - result = PyList_New(n_results); - if (result) { - for(ii = 0; ii < n_results; ii++) { - PyObject* val = PyTuple_New(tuple_size); - if (!val) { - Py_XDECREF(result); - return NULL; - } - for(jj = 0; jj < tuple_size; jj++) { - maybelong idx = jj + ii * tuple_size; - PyTuple_SetItem(val, jj, PyFloat_FromDouble(values[idx])); - if (PyErr_Occurred()) { - Py_XDECREF(result); - return NULL; - } - } - PyList_SET_ITEM(result, ii, val); - } - } - } else { - result = PyTuple_New(tuple_size); - if (result) { - for(ii = 0; ii < tuple_size; ii++) { - PyTuple_SetItem(result, ii, PyFloat_FromDouble(values[ii])); - if (PyErr_Occurred()) { - Py_XDECREF(result); - return NULL; - } - } - } - } - return result; -} - - -PyObject* _NI_BuildMeasurementResultInt(maybelong n_results, - maybelong* values) -{ - PyObject *result = NULL; - if (n_results > 1) { - result = PyList_New(n_results); - if (result) { - maybelong ii; - for(ii = 0; ii < n_results; ii++) { - PyObject* val = PyInt_FromLong(values[ii]); - if (!val) { - Py_XDECREF(result); - return NULL; - } - PyList_SET_ITEM(result, ii, val); - } - } - } else { - result = Py_BuildValue("l", values[0]); - } - return result; -} - - -static PyObject *Py_Statistics(PyObject *obj, PyObject *args) -{ - PyArrayObject *input = NULL, *labels = NULL; - PyObject *indices_object, *result = NULL; - PyObject *res1 = NULL, *res2 = NULL, *res3 = NULL, *res4 = NULL; - double *dresult1 = NULL, *dresult2 = NULL; - maybelong *lresult1 = NULL, *lresult2 = NULL; - maybelong min_label, max_label, *result_indices = NULL, n_results, ii; - int type; - - if (!PyArg_ParseTuple(args, "O&O&Oi", NI_ObjectToInputArray, &input, - NI_ObjectToOptionalInputArray, &labels, &indices_object, &type)) - goto exit; - - if (!_NI_GetIndices(indices_object, &result_indices, &min_label, - &max_label, &n_results)) - goto exit; - - if (type >= 0 && type <= 7) { - dresult1 = (double*)malloc(n_results * sizeof(double)); - if (!dresult1) { - PyErr_NoMemory(); - goto exit; - } - } - if (type == 2 || type == 7) { - dresult2 = (double*)malloc(n_results * sizeof(double)); - if (!dresult2) { - PyErr_NoMemory(); - goto exit; - } - } - if (type == 1 || type == 2 || (type >= 5 && type <= 7)) { - lresult1 = (maybelong*)malloc(n_results * sizeof(maybelong)); - if (!lresult1) { - PyErr_NoMemory(); - goto exit; - } - } - if (type == 7) { - lresult2 = (maybelong*)malloc(n_results * sizeof(maybelong)); - if (!lresult2) { - PyErr_NoMemory(); - goto exit; - } - } - switch(type) { - case 0: - if (!NI_Statistics(input, labels, min_label, max_label, result_indices, - n_results, dresult1, NULL, NULL, NULL, NULL, NULL, NULL)) - goto exit; - result = _NI_BuildMeasurementResultDouble(n_results, dresult1); - break; - case 1: - if (!NI_Statistics(input, labels, min_label, max_label, result_indices, - n_results, dresult1, lresult1, NULL, NULL, NULL, NULL, NULL)) - goto exit; - for(ii = 0; ii < n_results; ii++) - dresult1[ii] = lresult1[ii] > 0 ? dresult1[ii] / lresult1[ii] : 0.0; - - result = _NI_BuildMeasurementResultDouble(n_results, dresult1); - break; - case 2: - if (!NI_Statistics(input, labels, min_label, max_label, result_indices, - n_results, dresult1, lresult1, dresult2, NULL, NULL, NULL, NULL)) - goto exit; - result = _NI_BuildMeasurementResultDouble(n_results, dresult2); - break; - case 3: - if (!NI_Statistics(input, labels, min_label, max_label, result_indices, - n_results, NULL, NULL, NULL, dresult1, NULL, NULL, NULL)) - goto exit; - result = _NI_BuildMeasurementResultDouble(n_results, dresult1); - break; - case 4: - if (!NI_Statistics(input, labels, min_label, max_label, result_indices, - n_results, NULL, NULL, NULL, NULL, dresult1, NULL, NULL)) - goto exit; - result = _NI_BuildMeasurementResultDouble(n_results, dresult1); - break; - case 5: - if (!NI_Statistics(input, labels, min_label, max_label, result_indices, - n_results, NULL, NULL, NULL, dresult1, NULL, lresult1, NULL)) - goto exit; - result = _NI_BuildMeasurementResultInt(n_results, lresult1); - break; - case 6: - if (!NI_Statistics(input, labels, min_label, max_label, result_indices, - n_results, NULL, NULL, NULL, NULL, dresult1, NULL, lresult1)) - goto exit; - result = _NI_BuildMeasurementResultInt(n_results, lresult1); - break; - case 7: - if (!NI_Statistics(input, labels, min_label, max_label, result_indices, - n_results, NULL, NULL, NULL, dresult1, dresult2, - lresult1, lresult2)) - goto exit; - res1 = _NI_BuildMeasurementResultDouble(n_results, dresult1); - res2 = _NI_BuildMeasurementResultDouble(n_results, dresult2); - res3 = _NI_BuildMeasurementResultInt(n_results, lresult1); - res4 = _NI_BuildMeasurementResultInt(n_results, lresult2); - if (!res1 || !res2 || !res3 || !res4) - goto exit; - result = Py_BuildValue("OOOO", res1, res2, res3, res4); - break; - default: - PyErr_SetString(PyExc_RuntimeError, "operation not supported"); - goto exit; - } - - exit: - Py_XDECREF(input); - Py_XDECREF(labels); - if (result_indices) - free(result_indices); - if (dresult1) - free(dresult1); - if (dresult2) - free(dresult2); - if (lresult1) - free(lresult1); - if (lresult2) - free(lresult2); - return result; -} - - -static PyObject *Py_CenterOfMass(PyObject *obj, PyObject *args) -{ - PyArrayObject *input = NULL, *labels = NULL; - PyObject *indices_object, *result = NULL; - double *center_of_mass = NULL; - maybelong min_label, max_label, *result_indices = NULL, n_results; - - if (!PyArg_ParseTuple(args, "O&O&O", NI_ObjectToInputArray, &input, - NI_ObjectToOptionalInputArray, &labels, &indices_object)) - goto exit; - - if (!_NI_GetIndices(indices_object, &result_indices, &min_label, - &max_label, &n_results)) - goto exit; - - center_of_mass = (double*)malloc(input->nd * n_results * - sizeof(double)); - if (!center_of_mass) { - PyErr_NoMemory(); - goto exit; - } - - if (!NI_CenterOfMass(input, labels, min_label, max_label, - result_indices, n_results, center_of_mass)) - goto exit; - - result = _NI_BuildMeasurementResultDoubleTuple(n_results, input->nd, - center_of_mass); - - exit: - Py_XDECREF(input); - Py_XDECREF(labels); - if (result_indices) - free(result_indices); - if (center_of_mass) - free(center_of_mass); - return result; -} - -static PyObject *Py_Histogram(PyObject *obj, PyObject *args) -{ - PyArrayObject *input = NULL, *labels = NULL, **histograms = NULL; - PyObject *indices_object, *result = NULL; - maybelong min_label, max_label, *result_indices = NULL, n_results; - maybelong jj, nbins; - long nbins_in; - double min, max; - - if (!PyArg_ParseTuple(args, "O&ddlO&O", NI_ObjectToInputArray, &input, - &min, &max, &nbins_in, NI_ObjectToOptionalInputArray, - &labels, &indices_object)) - goto exit; - nbins = nbins_in; - - if (!_NI_GetIndices(indices_object, &result_indices, &min_label, - &max_label, &n_results)) - goto exit; - - /* Set all pointers to NULL, so that freeing the memory */ - /* doesn't cause problems. */ - histograms = (PyArrayObject**)calloc(input->nd * n_results, - sizeof(PyArrayObject*)); - if (!histograms) { - PyErr_NoMemory(); - goto exit; - } - for(jj = 0; jj < n_results; jj++) { - histograms[jj] = NA_NewArray(NULL, tInt32, 1, &nbins); - if (!histograms[jj]) { - PyErr_NoMemory(); - goto exit; - } - } - - if (!NI_Histogram(input, labels, min_label, max_label, result_indices, - n_results, histograms, min, max, nbins)) - goto exit; - - result = _NI_BuildMeasurementResultArrayObject(n_results, histograms); - - exit: - Py_XDECREF(input); - Py_XDECREF(labels); - if (result_indices) - free(result_indices); - if (histograms) { - for(jj = 0; jj < n_results; jj++) { - Py_XDECREF(histograms[jj]); - } - free(histograms); - } - return result; -} - static PyObject *Py_DistanceTransformBruteForce(PyObject *obj, PyObject *args) { @@ -1293,12 +900,6 @@ METH_VARARGS, NULL}, {"watershed_ift", (PyCFunction)Py_WatershedIFT, METH_VARARGS, NULL}, - {"statistics", (PyCFunction)Py_Statistics, - METH_VARARGS, NULL}, - {"center_of_mass", (PyCFunction)Py_CenterOfMass, - METH_VARARGS, NULL}, - {"histogram", (PyCFunction)Py_Histogram, - METH_VARARGS, NULL}, {"distance_transform_bf", (PyCFunction)Py_DistanceTransformBruteForce, METH_VARARGS, NULL}, {"distance_transform_op", (PyCFunction)Py_DistanceTransformOnePass, Modified: trunk/scipy/ndimage/tests/test_ndimage.py =================================================================== --- trunk/scipy/ndimage/tests/test_ndimage.py 2010-04-23 00:21:57 UTC (rev 6338) +++ trunk/scipy/ndimage/tests/test_ndimage.py 2010-04-24 20:59:32 UTC (rev 6339) @@ -48,6 +48,8 @@ a = numpy.asarray(a, numpy.complex128) b = numpy.asarray(b, numpy.complex128) t = ((a.real - b.real)**2).sum() + ((a.imag - b.imag)**2).sum() + if (a.dtype == numpy.object or b.dtype == numpy.object): + t = sum([diff(c,d)**2 for c,d in zip(a,b)]) else: a = numpy.asarray(a) a = a.astype(numpy.float64) @@ -2777,15 +2779,8 @@ input = numpy.array([[1, 2], [3, 4]], type) output = ndimage.sum(input, labels = labels, index = [4, 8, 2]) - self.failUnless(output == [4.0, 0.0, 5.0]) + self.failUnless(numpy.all(output == [4.0, 0.0, 5.0])) - def test_sum13(self): - "sum 13" - input = numpy.array([1,2,3,4]) - labels = numpy.array([0,0,0,0]) - index = numpy.array([0],numpy.uint64) - self.failUnlessRaises(ValueError,ndimage.sum,input,labels,index) - def test_mean01(self): "mean 1" labels = numpy.array([1, 0], bool) @@ -2817,7 +2812,8 @@ input = numpy.array([[1, 2], [3, 4]], type) output = ndimage.mean(input, labels = labels, index = [4, 8, 2]) - self.failUnless(output == [4.0, 0.0, 2.5]) + self.failUnless(numpy.all(output[[0,2]] == [4.0, 2.5]) and + numpy.isnan(output[1])) def test_minimum01(self): "minimum 1" @@ -2850,7 +2846,7 @@ input = numpy.array([[1, 2], [3, 4]], type) output = ndimage.minimum(input, labels = labels, index = [2, 3, 8]) - self.failUnless(output == [2.0, 4.0, 0.0]) + self.failUnless(numpy.all(output == [2.0, 4.0, 0.0])) def test_maximum01(self): "maximum 1" @@ -2883,7 +2879,7 @@ input = numpy.array([[1, 2], [3, 4]], type) output = ndimage.maximum(input, labels = labels, index = [2, 3, 8]) - self.failUnless(output == [3.0, 4.0, 0.0]) + self.failUnless(numpy.all(output == [3.0, 4.0, 0.0])) def test_maximum05(self): "Ticket #501" @@ -2895,7 +2891,7 @@ for type in self.types: input = numpy.array([], type) output = ndimage.variance(input) - self.failUnless(float(output) == 0.0) + self.failUnless(numpy.isnan(output)) def test_variance02(self): "variance 2" @@ -2909,13 +2905,13 @@ for type in self.types: input = numpy.array([1, 3], type) output = ndimage.variance(input) - self.failUnless(output == 2.0) + self.failUnless(output == 1.0) def test_variance04(self): "variance 4" input = numpy.array([1, 0], bool) output = ndimage.variance(input) - self.failUnless(output == 0.5) + self.failUnless(output == 0.25) def test_variance05(self): "variance 5" @@ -2923,7 +2919,7 @@ for type in self.types: input = numpy.array([1, 3, 8], type) output = ndimage.variance(input, labels, 2) - self.failUnless(output == 2.0) + self.failUnless(output == 1.0) def test_variance06(self): "variance 6" @@ -2931,14 +2927,14 @@ for type in self.types: input = numpy.array([1, 3, 8, 10, 8], type) output = ndimage.variance(input, labels, [2, 3, 4]) - self.failUnless(output == [2.0, 2.0, 0.0]) + self.failUnless(numpy.all(output == [1.0, 1.0, 0.0])) def test_standard_deviation01(self): "standard deviation 1" for type in self.types: input = numpy.array([], type) output = ndimage.standard_deviation(input) - self.failUnless(float(output) == 0.0) + self.failUnless(numpy.isnan(output)) def test_standard_deviation02(self): "standard deviation 2" @@ -2952,13 +2948,13 @@ for type in self.types: input = numpy.array([1, 3], type) output = ndimage.standard_deviation(input) - self.failUnless(output == math.sqrt(2.0)) + self.failUnless(output == math.sqrt(1.0)) def test_standard_deviation04(self): "standard deviation 4" input = numpy.array([1, 0], bool) output = ndimage.standard_deviation(input) - self.failUnless(output == math.sqrt(0.5)) + self.failUnless(output == 0.5) def test_standard_deviation05(self): "standard deviation 5" @@ -2966,7 +2962,7 @@ for type in self.types: input = numpy.array([1, 3, 8], type) output = ndimage.standard_deviation(input, labels, 2) - self.failUnless(output == math.sqrt(2.0)) + self.failUnless(output == 1.0) def test_standard_deviation06(self): "standard deviation 6" @@ -2975,8 +2971,7 @@ input = numpy.array([1, 3, 8, 10, 8], type) output = ndimage.standard_deviation(input, labels, [2, 3, 4]) - self.failUnless(output == [math.sqrt(2.0), math.sqrt(2.0), - 0.0]) + self.failUnless(all(output == [1.0, 1.0, 0.0])) def test_minimum_position01(self): "minimum position 1" @@ -3041,7 +3036,7 @@ [1, 5, 1, 1]], type) output = ndimage.minimum_position(input, labels, [2, 3]) - self.failUnless(output == [(0, 1), (1, 2)]) + self.failUnless(output[0] == (0, 1) and output[1] == (1, 2)) def test_maximum_position01(self): "maximum position 1" @@ -3098,7 +3093,7 @@ [1, 5, 1, 1]], type) output = ndimage.maximum_position(input, labels, [1, 2]) - self.failUnless(output == [(0, 0), (1, 1)]) + self.failUnless(output[0] == (0, 0) and output[1] == (1, 1)) def test_extrema01(self): "extrema 1" @@ -3148,8 +3143,10 @@ labels = labels, index = [2, 3, 8]) output5 = ndimage.maximum_position(input, labels = labels, index = [2, 3, 8]) - self.failUnless(output1 == (output2, output3, output4, - output5)) + self.failUnless(numpy.all(output1[0] == output2)) + self.failUnless(numpy.all(output1[1] == output3)) + self.failUnless(numpy.all(output1[2] == output4)) + self.failUnless(numpy.all(output1[3] == output5)) def test_extrema04(self): "extrema 4" @@ -3165,8 +3162,10 @@ [1, 2]) output5 = ndimage.maximum_position(input, labels, [1, 2]) - self.failUnless(output1 == (output2, output3, output4, - output5)) + self.failUnless(numpy.all(output1[0] == output2)) + self.failUnless(numpy.all(output1[1] == output3)) + self.failUnless(numpy.all(output1[2] == output4)) + self.failUnless(numpy.all(output1[3] == output5)) def test_center_of_mass01(self): "center of mass 1" @@ -3260,7 +3259,7 @@ def test_histogram02(self): "histogram 2" labels = [1, 1, 1, 1, 2, 2, 2, 2] - true = [0, 2, 0, 1, 0] + true = [0, 2, 0, 1, 1] input = numpy.array([1, 1, 3, 4, 3, 3, 3, 3]) output = ndimage.histogram(input, 0, 4, 5, labels, 1) e = diff(true, output) @@ -3269,7 +3268,7 @@ def test_histogram03(self): "histogram 3" labels = [1, 0, 1, 1, 2, 2, 2, 2] - true1 = [0, 1, 0, 1, 0] + true1 = [0, 1, 0, 1, 1] true2 = [0, 0, 0, 3, 0] input = numpy.array([1, 1, 3, 4, 3, 5, 3, 3]) output = ndimage.histogram(input, 0, 4, 5, labels, (1,2)) From scipy-svn at scipy.org Sat Apr 24 17:15:35 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 24 Apr 2010 16:15:35 -0500 (CDT) Subject: [Scipy-svn] r6340 - in trunk/scipy/stats: . tests Message-ID: <20100424211535.C0C0539CB04@scipy.org> Author: stefan Date: 2010-04-24 16:15:35 -0500 (Sat, 24 Apr 2010) New Revision: 6340 Modified: trunk/scipy/stats/stats.py trunk/scipy/stats/tests/test_stats.py Log: BUG: Fix negative axis handling in nanstd (patch by Keith Goodman and Josef Perktold). Modified: trunk/scipy/stats/stats.py =================================================================== --- trunk/scipy/stats/stats.py 2010-04-24 20:59:32 UTC (rev 6339) +++ trunk/scipy/stats/stats.py 2010-04-24 21:15:35 UTC (rev 6340) @@ -302,17 +302,11 @@ x[np.isnan(x)] = 0. m1 = np.sum(x,axis)/n - # Kludge to subtract m1 from the correct axis - if axis!=0: - shape = np.arange(x.ndim).tolist() - shape.remove(axis) - shape.insert(0,axis) - x = x.transpose(tuple(shape)) - d = (x-m1)**2.0 - shape = tuple(array(shape).argsort()) - d = d.transpose(shape) + if axis: + d = (x - np.expand_dims(m1, axis))**2.0 else: - d = (x-m1)**2.0 + d = (x - m1)**2.0 + m2 = np.sum(d,axis)-(m1*m1)*Nnan if bias: m2c = m2 / n Modified: trunk/scipy/stats/tests/test_stats.py =================================================================== --- trunk/scipy/stats/tests/test_stats.py 2010-04-24 20:59:32 UTC (rev 6339) +++ trunk/scipy/stats/tests/test_stats.py 2010-04-24 21:15:35 UTC (rev 6340) @@ -225,6 +225,10 @@ s = stats.nanstd(self.Xall) assert np.isnan(s) + def test_nanstd_negative_axis(self): + x = np.array([1, 2, 3]) + assert_equal(stats.nanstd(x, -1), 1) + def test_nanmedian_none(self): """Check nanmedian when no values are nan.""" m = stats.nanmedian(self.X) From scipy-svn at scipy.org Tue Apr 27 11:12:43 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 10:12:43 -0500 (CDT) Subject: [Scipy-svn] r6341 - trunk/scipy/ndimage/tests Message-ID: <20100427151243.0957C39CB2A@scipy.org> Author: rgommers Date: 2010-04-27 10:12:42 -0500 (Tue, 27 Apr 2010) New Revision: 6341 Modified: trunk/scipy/ndimage/tests/test_io.py Log: TST: Skip imread test if PIL can not be imported. Modified: trunk/scipy/ndimage/tests/test_io.py =================================================================== --- trunk/scipy/ndimage/tests/test_io.py 2010-04-24 21:15:35 UTC (rev 6340) +++ trunk/scipy/ndimage/tests/test_io.py 2010-04-27 15:12:42 UTC (rev 6341) @@ -3,6 +3,13 @@ import os +try: + from PIL import Image + pil_missing = False +except ImportError: + pil_missing = True + + at dec.skipif(pil_missing, msg="The Python Image Library could not be found.") def test_imread(): lp = os.path.join(os.path.dirname(__file__), 'dots.png') img = ndi.imread(lp) From scipy-svn at scipy.org Tue Apr 27 12:52:51 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 11:52:51 -0500 (CDT) Subject: [Scipy-svn] r6342 - in trunk/scipy/sparse/linalg: isolve tests Message-ID: <20100427165251.9262839CB26@scipy.org> Author: stefan Date: 2010-04-27 11:52:51 -0500 (Tue, 27 Apr 2010) New Revision: 6342 Added: trunk/scipy/sparse/linalg/tests/test_iterative.py Modified: trunk/scipy/sparse/linalg/isolve/iterative.py Log: ENH: Rename GMRES's restrt parameter to restart. Remains compatible with old signature. Removed default restart of 20. Modified: trunk/scipy/sparse/linalg/isolve/iterative.py =================================================================== --- trunk/scipy/sparse/linalg/isolve/iterative.py 2010-04-27 15:12:42 UTC (rev 6341) +++ trunk/scipy/sparse/linalg/isolve/iterative.py 2010-04-27 16:52:51 UTC (rev 6342) @@ -303,7 +303,7 @@ return postprocess(x), info -def gmres(A, b, x0=None, tol=1e-5, restrt=20, maxiter=None, xtype=None, M=None, callback=None): +def gmres(A, b, x0=None, tol=1e-5, restart=None, maxiter=None, xtype=None, M=None, callback=None, restrt=None): """Use Generalized Minimal RESidual iteration to solve A x = b Parameters @@ -319,7 +319,7 @@ Starting guess for the solution. tol : float Relative tolerance to achieve before terminating. - restrt : integer + restart : integer Number of iterations between restarts. Larger values increase iteration cost, but may be necessary for convergence. maxiter : integer @@ -359,6 +359,14 @@ This parameter has been superceeded by LinearOperator. """ + + # Change 'restrt' keyword to 'restart' + if restrt is None: + restrt = restart + elif restart is not None: + raise ValueError("Cannot specify both restart and restrt keywords. " + "Preferably use 'restart' only.") + A,M,x,b,postprocess = make_system(A,M,x0,b,xtype) n = len(b) Added: trunk/scipy/sparse/linalg/tests/test_iterative.py =================================================================== --- trunk/scipy/sparse/linalg/tests/test_iterative.py (rev 0) +++ trunk/scipy/sparse/linalg/tests/test_iterative.py 2010-04-27 16:52:51 UTC (rev 6342) @@ -0,0 +1,18 @@ +import numpy as np +from numpy.testing import run_module_suite, assert_almost_equal + +import scipy.sparse as sp +import scipy.sparse.linalg as spla + +def test_gmres_basic(): + A = np.vander(np.arange(10) + 1)[:, ::-1] + b = np.zeros(10) + b[0] = 1 + x = np.linalg.solve(A, b) + + x_gm, err = spla.gmres(A, b, restart=5, maxiter=1) + + assert_almost_equal(x_gm[0], 0.359, decimal=2) + +if __name__ == "__main__": + run_module_suite() From scipy-svn at scipy.org Tue Apr 27 17:54:55 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 16:54:55 -0500 (CDT) Subject: [Scipy-svn] r6345 - trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC Message-ID: <20100427215455.AA20039CAE7@scipy.org> Author: ptvirtan Date: 2010-04-27 16:54:55 -0500 (Tue, 27 Apr 2010) New Revision: 6345 Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cdiagonal.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsequ.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsitrf.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsrfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/clacon.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/clangs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cldperm.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotgrowth.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cutil.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ccopy_to_ucol.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cdrop_row.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpanel_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpivotL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scomplex.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scsum1.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_scomplex.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/util.c Log: ENH: sparse.linalg.dsolve/SuperLU: re-apply r4767, r4768, r5892 patches to SuperLU sources - Rename c_abs() and c_abs1() to slu_c_abs() and slu_c_abs1() to avoid conflict with Python headers. Hopefully resolves ticket #735 - BUG: fix SuperLU dubious format string. Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cdiagonal.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cdiagonal.c 2010-04-27 21:54:15 UTC (rev 6344) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cdiagonal.c 2010-04-27 21:54:55 UTC (rev 6345) @@ -95,7 +95,7 @@ { if ((rowind_new[j + fill] = rowind[j]) == i) diag = j; nzval_new[j + fill] = nzval[j]; - s += c_abs1(&nzval_new[j + fill]); + s += slu_c_abs1(&nzval_new[j + fill]); } if (diag >= 0) { nzval_new[diag+fill].r = s * 3.0; @@ -122,7 +122,7 @@ for (j = colptr[i]; j < colptr[i + 1]; j++) { if (rowind[j] == i) diag = j; - s += c_abs1(&nzval[j]); + s += slu_c_abs1(&nzval[j]); } nzval[diag].r = s * 3.0; nzval[diag].i = 0.0; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsequ.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsequ.c 2010-04-27 21:54:15 UTC (rev 6344) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsequ.c 2010-04-27 21:54:55 UTC (rev 6345) @@ -127,7 +127,7 @@ for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; - r[irow] = SUPERLU_MAX( r[irow], c_abs1(&Aval[i]) ); + r[irow] = SUPERLU_MAX( r[irow], slu_c_abs1(&Aval[i]) ); } /* Find the maximum and minimum scale factors. */ @@ -162,7 +162,7 @@ for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; - c[j] = SUPERLU_MAX( c[j], c_abs1(&Aval[i]) * r[irow] ); + c[j] = SUPERLU_MAX( c[j], slu_c_abs1(&Aval[i]) * r[irow] ); } /* Find the maximum and minimum scale factors. */ Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsitrf.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsitrf.c 2010-04-27 21:54:15 UTC (rev 6344) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsitrf.c 2010-04-27 21:54:55 UTC (rev 6345) @@ -375,7 +375,7 @@ amax[0] = 0.0; /* Scatter into SPA dense[*] */ for (k = xa_begin[icol]; k < xa_end[icol]; k++) { - register float tmp = c_abs1 (&a[k]); + register float tmp = slu_c_abs1 (&a[k]); if (tmp > amax[0]) amax[0] = tmp; dense[asub[k]] = a[k]; } Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsrfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsrfs.c 2010-04-27 21:54:15 UTC (rev 6344) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsrfs.c 2010-04-27 21:54:55 UTC (rev 6345) @@ -287,21 +287,21 @@ than SAFE2, then SAFE1 is added to the i-th component of the numerator before dividing. */ - for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] ); + for (i = 0; i < A->nrow; ++i) rwork[i] = slu_c_abs1( &Bptr[i] ); /* Compute abs(op(A))*abs(X) + abs(B). */ if (notran) { for (k = 0; k < A->ncol; ++k) { - xk = c_abs1( &Xptr[k] ); + xk = slu_c_abs1( &Xptr[k] ); for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) - rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk; + rwork[Astore->rowind[i]] += slu_c_abs1(&Aval[i]) * xk; } } else { for (k = 0; k < A->ncol; ++k) { s = 0.; for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) { irow = Astore->rowind[i]; - s += c_abs1(&Aval[i]) * c_abs1(&Xptr[irow]); + s += slu_c_abs1(&Aval[i]) * slu_c_abs1(&Xptr[irow]); } rwork[k] += s; } @@ -309,9 +309,9 @@ s = 0.; for (i = 0; i < A->nrow; ++i) { if (rwork[i] > safe2) { - s = SUPERLU_MAX( s, c_abs1(&work[i]) / rwork[i] ); + s = SUPERLU_MAX( s, slu_c_abs1(&work[i]) / rwork[i] ); } else if ( rwork[i] != 0.0 ) { - s = SUPERLU_MAX( s, (c_abs1(&work[i]) + safe1) / rwork[i] ); + s = SUPERLU_MAX( s, (slu_c_abs1(&work[i]) + safe1) / rwork[i] ); } /* If rwork[i] is exactly 0.0, then we know the true residual also must be exactly 0.0. */ @@ -364,22 +364,22 @@ inv(op(A)) * diag(W), where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ - for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] ); + for (i = 0; i < A->nrow; ++i) rwork[i] = slu_c_abs1( &Bptr[i] ); /* Compute abs(op(A))*abs(X) + abs(B). */ if ( notran ) { for (k = 0; k < A->ncol; ++k) { - xk = c_abs1( &Xptr[k] ); + xk = slu_c_abs1( &Xptr[k] ); for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) - rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk; + rwork[Astore->rowind[i]] += slu_c_abs1(&Aval[i]) * xk; } } else { for (k = 0; k < A->ncol; ++k) { s = 0.; for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) { irow = Astore->rowind[i]; - xk = c_abs1( &Xptr[irow] ); - s += c_abs1(&Aval[i]) * xk; + xk = slu_c_abs1( &Xptr[irow] ); + s += slu_c_abs1(&Aval[i]) * xk; } rwork[k] += s; } @@ -387,9 +387,9 @@ for (i = 0; i < A->nrow; ++i) if (rwork[i] > safe2) - rwork[i] = c_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i]; + rwork[i] = slu_c_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i]; else - rwork[i] = c_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1; + rwork[i] = slu_c_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1; kase = 0; do { @@ -437,13 +437,13 @@ lstres = 0.; if ( notran && colequ ) { for (i = 0; i < A->nrow; ++i) - lstres = SUPERLU_MAX( lstres, C[i] * c_abs1( &Xptr[i]) ); + lstres = SUPERLU_MAX( lstres, C[i] * slu_c_abs1( &Xptr[i]) ); } else if ( !notran && rowequ ) { for (i = 0; i < A->nrow; ++i) - lstres = SUPERLU_MAX( lstres, R[i] * c_abs1( &Xptr[i]) ); + lstres = SUPERLU_MAX( lstres, R[i] * slu_c_abs1( &Xptr[i]) ); } else { for (i = 0; i < A->nrow; ++i) - lstres = SUPERLU_MAX( lstres, c_abs1( &Xptr[i]) ); + lstres = SUPERLU_MAX( lstres, slu_c_abs1( &Xptr[i]) ); } if ( lstres != 0. ) ferr[j] /= lstres; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/clacon.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/clacon.c 2010-04-27 21:54:15 UTC (rev 6344) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/clacon.c 2010-04-27 21:54:55 UTC (rev 6345) @@ -113,14 +113,14 @@ L20: if (*n == 1) { v[0] = x[0]; - *est = c_abs(&v[0]); + *est = slu_c_abs(&v[0]); /* ... QUIT */ goto L150; } *est = scsum1_(n, x, &c__1); for (i = 0; i < *n; ++i) { - d__1 = c_abs(&x[i]); + d__1 = slu_c_abs(&x[i]); if (d__1 > safmin) { d__1 = 1 / d__1; x[i].r *= d__1; @@ -165,7 +165,7 @@ if (*est <= estold) goto L120; for (i = 0; i < *n; ++i) { - d__1 = c_abs(&x[i]); + d__1 = slu_c_abs(&x[i]); if (d__1 > safmin) { d__1 = 1 / d__1; x[i].r *= d__1; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/clangs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/clangs.c 2010-04-27 21:54:15 UTC (rev 6344) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/clangs.c 2010-04-27 21:54:55 UTC (rev 6345) @@ -79,7 +79,7 @@ value = 0.; for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) - value = SUPERLU_MAX( value, c_abs( &Aval[i]) ); + value = SUPERLU_MAX( value, slu_c_abs( &Aval[i]) ); } else if (lsame_(norm, "O") || *(unsigned char *)norm == '1') { /* Find norm1(A). */ @@ -87,7 +87,7 @@ for (j = 0; j < A->ncol; ++j) { sum = 0.; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) - sum += c_abs( &Aval[i] ); + sum += slu_c_abs( &Aval[i] ); value = SUPERLU_MAX(value,sum); } @@ -99,7 +99,7 @@ for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) { irow = Astore->rowind[i]; - rwork[irow] += c_abs( &Aval[i] ); + rwork[irow] += slu_c_abs( &Aval[i] ); } value = 0.; for (i = 0; i < A->nrow; ++i) Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cldperm.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cldperm.c 2010-04-27 21:54:15 UTC (rev 6344) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cldperm.c 2010-04-27 21:54:55 UTC (rev 6345) @@ -132,7 +132,7 @@ icntl[1] = -1; #endif - for (i = 0; i < nnz; ++i) nzval_d[i] = c_abs1(&nzval[i]); + for (i = 0; i < nnz; ++i) nzval_d[i] = slu_c_abs1(&nzval[i]); mc64ad_(&job, &n, &nnz, colptr, adjncy, nzval_d, &num, perm, &liw, iw, &ldw, dw, icntl, info); Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotL.c 2010-04-27 21:54:15 UTC (rev 6344) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotL.c 2010-04-27 21:54:55 UTC (rev 6345) @@ -113,7 +113,7 @@ diag = EMPTY; old_pivptr = nsupc; for (isub = nsupc; isub < nsupr; ++isub) { - rtemp = c_abs1 (&lu_col_ptr[isub]); + rtemp = slu_c_abs1 (&lu_col_ptr[isub]); if ( rtemp > pivmax ) { pivmax = rtemp; pivptr = isub; @@ -138,7 +138,7 @@ /* Choose appropriate pivotal element by our policy. */ if ( *usepr ) { - rtemp = c_abs1 (&lu_col_ptr[old_pivptr]); + rtemp = slu_c_abs1 (&lu_col_ptr[old_pivptr]); if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = old_pivptr; else @@ -147,7 +147,7 @@ if ( *usepr == 0 ) { /* Use diagonal pivot? */ if ( diag >= 0 ) { /* diagonal exists */ - rtemp = c_abs1 (&lu_col_ptr[diag]); + rtemp = slu_c_abs1 (&lu_col_ptr[diag]); if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag; } *pivrow = lsub_ptr[pivptr]; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotgrowth.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotgrowth.c 2010-04-27 21:54:15 UTC (rev 6344) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotgrowth.c 2010-04-27 21:54:55 UTC (rev 6345) @@ -88,15 +88,15 @@ maxaj = 0.; oldcol = inv_perm_c[j]; for (i = Astore->colptr[oldcol]; i < Astore->colptr[oldcol+1]; ++i) - maxaj = SUPERLU_MAX( maxaj, c_abs1( &Aval[i]) ); + maxaj = SUPERLU_MAX( maxaj, slu_c_abs1( &Aval[i]) ); maxuj = 0.; for (i = Ustore->colptr[j]; i < Ustore->colptr[j+1]; i++) - maxuj = SUPERLU_MAX( maxuj, c_abs1( &Uval[i]) ); + maxuj = SUPERLU_MAX( maxuj, slu_c_abs1( &Uval[i]) ); /* Supernode */ for (i = 0; i < nz_in_U; ++i) - maxuj = SUPERLU_MAX( maxuj, c_abs1( &luval[i]) ); + maxuj = SUPERLU_MAX( maxuj, slu_c_abs1( &luval[i]) ); ++nz_in_U; luval += nsupr; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cutil.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cutil.c 2010-04-27 21:54:15 UTC (rev 6344) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cutil.c 2010-04-27 21:54:55 UTC (rev 6345) @@ -408,8 +408,8 @@ err = xnorm = 0.0; for (i = 0; i < X->nrow; i++) { c_sub(&temp, &soln_work[i], &xtrue[i]); - err = SUPERLU_MAX(err, c_abs(&temp)); - xnorm = SUPERLU_MAX(xnorm, c_abs(&soln_work[i])); + err = SUPERLU_MAX(err, slu_c_abs(&temp)); + xnorm = SUPERLU_MAX(xnorm, slu_c_abs(&soln_work[i])); } err = err / xnorm; printf("||X - Xtrue||/||X|| = %e\n", err); Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ccopy_to_ucol.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ccopy_to_ucol.c 2010-04-27 21:54:15 UTC (rev 6344) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ccopy_to_ucol.c 2010-04-27 21:54:55 UTC (rev 6345) @@ -20,7 +20,7 @@ static int _compare_(const void *a, const void *b) { register int *x = (int *)a, *y = (int *)b; - register float xx = c_abs1(&A[*x]), yy = c_abs1(&A[*y]); + register float xx = slu_c_abs1(&A[*x]), yy = slu_c_abs1(&A[*y]); if (xx > yy) return -1; else if (xx < yy) return 1; else return 0; @@ -108,7 +108,7 @@ for (i = 0; i < segsze; i++) { irow = lsub[isub++]; - tmp = c_abs1(&dense[irow]); + tmp = slu_c_abs1(&dense[irow]); /* first dropping rule */ if (quota > 0 && tmp >= drop_tol) { @@ -164,7 +164,7 @@ } } for (i = xusub[jcol]; i <= m0; ) { - if (c_abs1(&ucol[i]) <= tol) { + if (slu_c_abs1(&ucol[i]) <= tol) { switch (milu) { case SMILU_1: case SMILU_2: @@ -192,7 +192,7 @@ } if (milu == SMILU_2) { - sum->r = c_abs1(sum); sum->i = 0.0; + sum->r = slu_c_abs1(sum); sum->i = 0.0; } if (milu == SMILU_3) sum->i = 0.0; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cdrop_row.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cdrop_row.c 2010-04-27 21:54:15 UTC (rev 6344) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cdrop_row.c 2010-04-27 21:54:55 UTC (rev 6345) @@ -103,7 +103,7 @@ case INF_NORM: default: k = icamax_(&n, &lusup[xlusup_first + i], &m) - 1; - temp[i] = c_abs1(&lusup[xlusup_first + i + m * k]); + temp[i] = slu_c_abs1(&lusup[xlusup_first + i + m * k]); break; } @@ -125,7 +125,7 @@ case SMILU_3: for (j = 0; j < n; j++) lusup[xlusup_first + (m - 1) + j * m].r += - c_abs1(&lusup[xlusup_first + i + j * m]); + slu_c_abs1(&lusup[xlusup_first + i + j * m]); break; case SILU: default: @@ -141,7 +141,7 @@ if (milu == SMILU_3) for (j = 0; j < n; j++) { lusup[xlusup_first + m1 + j * m].r = - c_abs1(&lusup[xlusup_first + m1 + j * m]); + slu_c_abs1(&lusup[xlusup_first + m1 + j * m]); lusup[xlusup_first + m1 + j * m].i = 0.0; } } @@ -201,7 +201,7 @@ case SMILU_3: for (j = 0; j < n; j++) lusup[xlusup_first + (m - 1) + j * m].r += - c_abs1(&lusup[xlusup_first + i + j * m]); + slu_c_abs1(&lusup[xlusup_first + i + j * m]); break; case SILU: default: @@ -217,7 +217,7 @@ if (milu == SMILU_3) for (j = 0; j < n; j++) { lusup[xlusup_first + m1 + j * m].r = - c_abs1(&lusup[xlusup_first + m1 + j * m]); + slu_c_abs1(&lusup[xlusup_first + m1 + j * m]); lusup[xlusup_first + m1 + j * m].i = 0.0; } } @@ -275,7 +275,7 @@ case SMILU_2: cs_mult(&lusup[xlusup_first + j * inc_diag], &lusup[xlusup_first + j * inc_diag], - 1.0 + c_abs1(&t)); + 1.0 + slu_c_abs1(&t)); break; case SMILU_3: c_add(&t, &t, &one); Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpanel_dfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpanel_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpanel_dfs.c 2010-04-27 21:54:55 UTC (rev 6345) @@ -108,7 +108,7 @@ /* For each nonz in A[*,jj] do dfs */ for (k = xa_begin[jj]; k < xa_end[jj]; k++) { krow = asub[k]; - tmp = c_abs1(&a[k]); + tmp = slu_c_abs1(&a[k]); if (tmp > *amax_col) *amax_col = tmp; dense_col[krow] = a[k]; kmark = marker[krow]; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpivotL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpivotL.c 2010-04-27 21:54:15 UTC (rev 6344) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpivotL.c 2010-04-27 21:54:55 UTC (rev 6345) @@ -114,16 +114,16 @@ switch (milu) { case SMILU_1: c_add(&temp, &lu_col_ptr[isub], &drop_sum); - rtemp = c_abs1(&temp); + rtemp = slu_c_abs1(&temp); break; case SMILU_2: case SMILU_3: /* In this case, drop_sum contains the sum of the abs. value */ - rtemp = c_abs1(&lu_col_ptr[isub]); + rtemp = slu_c_abs1(&lu_col_ptr[isub]); break; case SILU: default: - rtemp = c_abs1(&lu_col_ptr[isub]); + rtemp = slu_c_abs1(&lu_col_ptr[isub]); break; } if (rtemp > pivmax) { pivmax = rtemp; pivptr = isub; } @@ -180,15 +180,15 @@ switch (milu) { case SMILU_1: c_add(&temp, &lu_col_ptr[old_pivptr], &drop_sum); - rtemp = c_abs1(&temp); + rtemp = slu_c_abs1(&temp); break; case SMILU_2: case SMILU_3: - rtemp = c_abs1(&lu_col_ptr[old_pivptr]) + drop_sum.r; + rtemp = slu_c_abs1(&lu_col_ptr[old_pivptr]) + drop_sum.r; break; case SILU: default: - rtemp = c_abs1(&lu_col_ptr[old_pivptr]); + rtemp = slu_c_abs1(&lu_col_ptr[old_pivptr]); break; } if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = old_pivptr; @@ -200,15 +200,15 @@ switch (milu) { case SMILU_1: c_add(&temp, &lu_col_ptr[diag], &drop_sum); - rtemp = c_abs1(&temp); + rtemp = slu_c_abs1(&temp); break; case SMILU_2: case SMILU_3: - rtemp = c_abs1(&lu_col_ptr[diag]) + drop_sum.r; + rtemp = slu_c_abs1(&lu_col_ptr[diag]) + drop_sum.r; break; case SILU: default: - rtemp = c_abs1(&lu_col_ptr[diag]); + rtemp = slu_c_abs1(&lu_col_ptr[diag]); break; } if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scomplex.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scomplex.c 2010-04-27 21:54:15 UTC (rev 6344) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scomplex.c 2010-04-27 21:54:55 UTC (rev 6345) @@ -49,7 +49,7 @@ /*! \brief Returns sqrt(z.r^2 + z.i^2) */ -double c_abs(complex *z) +double slu_c_abs(complex *z) { float temp; float real = z->r; @@ -71,7 +71,7 @@ /*! \brief Approximates the abs. Returns abs(z.r) + abs(z.i) */ -double c_abs1(complex *z) +double slu_c_abs1(complex *z) { float real = z->r; float imag = z->i; @@ -109,7 +109,7 @@ /*! \brief SIGN functions for complex number. Returns z/abs(z) */ complex c_sgn(complex *z) { - register float t = c_abs(z); + register float t = slu_c_abs(z); register complex retval; if (t == 0.0) { Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scsum1.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scsum1.c 2010-04-27 21:54:15 UTC (rev 6344) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scsum1.c 2010-04-27 21:54:55 UTC (rev 6345) @@ -46,7 +46,7 @@ int i__1, i__2; float ret_val; /* Builtin functions */ - double c_abs(complex *); + double slu_c_abs(complex *); /* Local variables */ static int i, nincx; static float stemp; @@ -73,7 +73,7 @@ /* NEXT LINE MODIFIED. */ - stemp += c_abs(&CX(i)); + stemp += slu_c_abs(&CX(i)); /* L10: */ } ret_val = stemp; @@ -87,7 +87,7 @@ /* NEXT LINE MODIFIED. */ - stemp += c_abs(&CX(i)); + stemp += slu_c_abs(&CX(i)); /* L30: */ } ret_val = stemp; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_scomplex.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_scomplex.h 2010-04-27 21:54:15 UTC (rev 6344) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_scomplex.h 2010-04-27 21:54:55 UTC (rev 6345) @@ -59,8 +59,8 @@ /* Prototypes for functions in scomplex.c */ void c_div(complex *, complex *, complex *); -double c_abs(complex *); /* exact */ -double c_abs1(complex *); /* approximate */ +double slu_c_abs(complex *); /* exact */ +double slu_c_abs1(complex *); /* approximate */ void c_exp(complex *, complex *); void r_cnjg(complex *, complex *); double r_imag(complex *); Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/util.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/util.c 2010-04-27 21:54:15 UTC (rev 6344) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/util.c 2010-04-27 21:54:55 UTC (rev 6345) @@ -29,7 +29,7 @@ void superlu_abort_and_exit(char* msg) { - fprintf(stderr, msg); + fprintf(stderr, "%s\n", msg); exit (-1); } From scipy-svn at scipy.org Tue Apr 27 17:46:38 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 16:46:38 -0500 (CDT) Subject: [Scipy-svn] r6343 - trunk/scipy/sparse/linalg/isolve Message-ID: <20100427214638.6552439CB2B@scipy.org> Author: ptvirtan Date: 2010-04-27 16:46:38 -0500 (Tue, 27 Apr 2010) New Revision: 6343 Modified: trunk/scipy/sparse/linalg/isolve/iterative.py Log: BUG: sparse.linalg.isolve: fix bug introduced by the GMRES restrt parameter rename Modified: trunk/scipy/sparse/linalg/isolve/iterative.py =================================================================== --- trunk/scipy/sparse/linalg/isolve/iterative.py 2010-04-27 16:52:51 UTC (rev 6342) +++ trunk/scipy/sparse/linalg/isolve/iterative.py 2010-04-27 21:46:38 UTC (rev 6343) @@ -319,10 +319,11 @@ Starting guess for the solution. tol : float Relative tolerance to achieve before terminating. - restart : integer + restart : integer, optional Number of iterations between restarts. Larger values increase iteration cost, but may be necessary for convergence. - maxiter : integer + (Default: 20) + maxiter : integer, optional Maximum number of iterations. Iteration will stop after maxiter steps even if the specified tolerance has not been achieved. M : {sparse matrix, dense matrix, LinearOperator} @@ -373,6 +374,8 @@ if maxiter is None: maxiter = n*10 + if restrt is None: + restrt = 20 restrt = min(restrt, n) matvec = A.matvec From scipy-svn at scipy.org Tue Apr 27 17:55:09 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 16:55:09 -0500 (CDT) Subject: [Scipy-svn] r6346 - trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC Message-ID: <20100427215509.6AB7039CAE7@scipy.org> Author: ptvirtan Date: 2010-04-27 16:55:09 -0500 (Tue, 27 Apr 2010) New Revision: 6346 Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scipy_slu_config.h Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_Cnames.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_util.h Log: ENH: sparse.linalg.dsolve/SuperLU: patch SuperLU upstream sources to get some variables from Scipy Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scipy_slu_config.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scipy_slu_config.h (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scipy_slu_config.h 2010-04-27 21:55:09 UTC (rev 6346) @@ -0,0 +1,34 @@ +#ifndef SCIPY_SLU_CONFIG_H +#define SCIPY_SLU_CONFIG_H + +#include + +/* + * Support routines + */ +void superlu_python_module_abort(char *msg); +void *superlu_python_module_malloc(size_t size); +void superlu_python_module_free(void *ptr); + +#define USER_ABORT superlu_python_module_abort +#define USER_MALLOC superlu_python_module_malloc +#define USER_FREE superlu_python_module_free + +/* + * Fortran configuration + */ +#if defined(NO_APPEND_FORTRAN) +#if defined(UPPERCASE_FORTRAN) +#define UpCase 1 +#else +#define NoChange 1 +#endif +#else +#if defined(UPPERCASE_FORTRAN) +#error Uppercase and trailing slash in Fortran names not supported +#else +#define Add_ 1 +#endif +#endif + +#endif Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_Cnames.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_Cnames.h 2010-04-27 21:54:55 UTC (rev 6345) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_Cnames.h 2010-04-27 21:55:09 UTC (rev 6346) @@ -19,6 +19,7 @@ #ifndef __SUPERLU_CNAMES /* allow multiple inclusions */ #define __SUPERLU_CNAMES +#include "scipy_slu_config.h" #define ADD_ 0 #define ADD__ 1 Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_util.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_util.h 2010-04-27 21:54:55 UTC (rev 6345) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_util.h 2010-04-27 21:55:09 UTC (rev 6346) @@ -21,6 +21,8 @@ */ #include +#include "scipy_slu_config.h" + /*********************************************************************** * Macros ***********************************************************************/ From scipy-svn at scipy.org Tue Apr 27 17:55:24 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 16:55:24 -0500 (CDT) Subject: [Scipy-svn] r6347 - trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC Message-ID: <20100427215524.8F5D639CAE7@scipy.org> Author: ptvirtan Date: 2010-04-27 16:55:24 -0500 (Tue, 27 Apr 2010) New Revision: 6347 Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlamch.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slamch.c Log: BUG: sparse.linalg.dsolve/SuperLU: sprinkle volatile into dlamc/slamc implementation to avoid an infinite loop Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlamch.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlamch.c 2010-04-27 21:55:09 UTC (rev 6346) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlamch.c 2010-04-27 21:55:24 UTC (rev 6347) @@ -673,9 +673,13 @@ { /* >>Start of File<< System generated locals */ - double ret_val; + volatile double ret_val; + volatile double x; + volatile double y; - ret_val = *a + *b; + x = *a; + y = *b; + ret_val = x + y; return ret_val; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slamch.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slamch.c 2010-04-27 21:55:09 UTC (rev 6346) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slamch.c 2010-04-27 21:55:24 UTC (rev 6347) @@ -684,12 +684,14 @@ /* >>Start of File<< System generated locals */ - float ret_val; + volatile float ret_val; + volatile float x; + volatile float y; + x = *a; + y = *b; + ret_val = x + y; - - ret_val = *a + *b; - return ret_val; /* End of SLAMC3 */ From scipy-svn at scipy.org Tue Apr 27 17:55:40 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 16:55:40 -0500 (CDT) Subject: [Scipy-svn] r6348 - trunk/scipy/sparse/linalg/dsolve Message-ID: <20100427215540.7262D39CAE7@scipy.org> Author: ptvirtan Date: 2010-04-27 16:55:40 -0500 (Tue, 27 Apr 2010) New Revision: 6348 Modified: trunk/scipy/sparse/linalg/dsolve/_superlu_utils.c Log: ENH: sparse.linalg.dsolve/SuperLU: add stubs for HSL routines that SuperLU tries to call Modified: trunk/scipy/sparse/linalg/dsolve/_superlu_utils.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_superlu_utils.c 2010-04-27 21:55:24 UTC (rev 6347) +++ trunk/scipy/sparse/linalg/dsolve/_superlu_utils.c 2010-04-27 21:55:40 UTC (rev 6348) @@ -67,3 +67,18 @@ return; } +/* + * Stubs for Harwell Subroutine Library functions that SuperLU tries to call. + */ + +void mc64id_(int *a) +{ + superlu_python_module_abort("chosen functionality not available"); +} + +void mc64ad_(int *a, int *b, int *c, int d[], int e[], double f[], + int *g, int h[], int *i, int j[], int *k, double l[], + int m[], int n[]) +{ + superlu_python_module_abort("chosen functionality not available"); +} From scipy-svn at scipy.org Tue Apr 27 17:56:00 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 16:56:00 -0500 (CDT) Subject: [Scipy-svn] r6349 - trunk/scipy/sparse/linalg/dsolve Message-ID: <20100427215600.B281B39CAE7@scipy.org> Author: ptvirtan Date: 2010-04-27 16:56:00 -0500 (Tue, 27 Apr 2010) New Revision: 6349 Added: trunk/scipy/sparse/linalg/dsolve/_superlumodule.c Removed: trunk/scipy/sparse/linalg/dsolve/_csuperlumodule.c trunk/scipy/sparse/linalg/dsolve/_dsuperlumodule.c trunk/scipy/sparse/linalg/dsolve/_ssuperlumodule.c trunk/scipy/sparse/linalg/dsolve/_superlu.py trunk/scipy/sparse/linalg/dsolve/_zsuperlumodule.c Modified: trunk/scipy/sparse/linalg/dsolve/_superluobject.c trunk/scipy/sparse/linalg/dsolve/_superluobject.h trunk/scipy/sparse/linalg/dsolve/linsolve.py trunk/scipy/sparse/linalg/dsolve/setup.py Log: ENH: sparse.linalg.dsolve: rewrite superlu Python wrappers Provide type polymorphism at run-time, rather than building separate modules for each data type. Deleted: trunk/scipy/sparse/linalg/dsolve/_csuperlumodule.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_csuperlumodule.c 2010-04-27 21:55:40 UTC (rev 6348) +++ trunk/scipy/sparse/linalg/dsolve/_csuperlumodule.c 2010-04-27 21:56:00 UTC (rev 6349) @@ -1,203 +0,0 @@ - -/* Copyright 1999 Travis Oliphant - Permision to copy and modified this file is granted under the revised BSD license. - No warranty is expressed or IMPLIED -*/ - -/* - This file implements glue between the SuperLU library for - sparse matrix inversion and Python. -*/ - - -/* We want a low-level interface to: - xGSSV - - These will be done in separate files due to the include structure of - SuperLU. - - Define a user abort and a user malloc and free (to keep pointers - that will be released on errors) -*/ - -#include "Python.h" -#include "SuperLU/SRC/csp_defs.h" -#include "_superluobject.h" -#include - - -extern jmp_buf _superlu_py_jmpbuf; - - -static char doc_cgssv[] = "Direct inversion of sparse matrix.\n\nX = cgssv(A,B) solves A*X = B for X."; - -static PyObject *Py_cgssv (PyObject *self, PyObject *args, PyObject *kwdict) -{ - PyObject *Py_B=NULL, *Py_X=NULL; - PyArrayObject *nzvals=NULL; - PyArrayObject *colind=NULL, *rowptr=NULL; - int N, nnz; - int info; - int csc=0, permc_spec=2; - int *perm_r=NULL, *perm_c=NULL; - SuperMatrix A, B, L, U; - superlu_options_t options; - SuperLUStat_t stat; - - static char *kwlist[] = {"N","nnz","nzvals","colind","rowptr","B", "csc", "permc_spec",NULL}; - - /* Get input arguments */ - if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iiO!O!O!O|ii", kwlist, &N, &nnz, &PyArray_Type, &nzvals, &PyArray_Type, &colind, &PyArray_Type, &rowptr, &Py_B, &csc, &permc_spec)) - return NULL; - - if (!_CHECK_INTEGER(colind) || !_CHECK_INTEGER(rowptr)) { - PyErr_SetString(PyExc_TypeError, "colind and rowptr must be of type cint"); - return NULL; - } - - - /* Create Space for output */ - Py_X = PyArray_CopyFromObject(Py_B,PyArray_CFLOAT,1,2); - if (Py_X == NULL) return NULL; - if (csc) { - if (NCFormat_from_spMatrix(&A, N, N, nnz, nzvals, colind, rowptr, PyArray_CFLOAT)) { - Py_DECREF(Py_X); - return NULL; - } - } - else { - if (NRFormat_from_spMatrix(&A, N, N, nnz, nzvals, colind, rowptr, PyArray_CFLOAT)) { - Py_DECREF(Py_X); - return NULL; - } - } - - if (DenseSuper_from_Numeric(&B, Py_X)) { - Destroy_SuperMatrix_Store(&A); - Py_DECREF(Py_X); - return NULL; - } - - /* Setup options */ - - if (setjmp(_superlu_py_jmpbuf)) goto fail; - else { - perm_c = intMalloc(N); - perm_r = intMalloc(N); - set_default_options(&options); - options.ColPerm=superlu_module_getpermc(permc_spec); - StatInit(&stat); - - /* Compute direct inverse of sparse Matrix */ - cgssv(&options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info); - } - - SUPERLU_FREE(perm_r); - SUPERLU_FREE(perm_c); - Destroy_SuperMatrix_Store(&A); - Destroy_SuperMatrix_Store(&B); - Destroy_SuperNode_Matrix(&L); - Destroy_CompCol_Matrix(&U); - StatFree(&stat); - - - return Py_BuildValue("Ni", Py_X, info); - - fail: - SUPERLU_FREE(perm_r); - SUPERLU_FREE(perm_c); - Destroy_SuperMatrix_Store(&A); - Destroy_SuperMatrix_Store(&B); - Destroy_SuperNode_Matrix(&L); - Destroy_CompCol_Matrix(&U); - StatFree(&stat); - - Py_XDECREF(Py_X); - return NULL; -} - -/*******************************Begin Code Adapted from PySparse *****************/ - - -static char doc_cgstrf[] = "cgstrf(A, ...)\n\ -\n\ -performs a factorization of the sparse matrix A=*(N,nnz,nzvals,rowind,colptr) and \n\ -returns a factored_lu object.\n\ -\n\ -see dgstrf for more information."; - -static PyObject * -Py_cgstrf(PyObject *self, PyObject *args, PyObject *keywds) { - - /* default value for SuperLU parameters*/ - double diag_pivot_thresh = 1.0; - double drop_tol = 0.0; - int relax = 1; - int panel_size = 10; - int permc_spec = 2; - int N, nnz; - PyArrayObject *rowind, *colptr, *nzvals; - SuperMatrix A; - PyObject *result; - - static char *kwlist[] = {"N","nnz","nzvals","rowind","colptr","permc_spec","diag_pivot_thresh", "drop_tol", "relax", "panel_size", NULL}; - - int res = PyArg_ParseTupleAndKeywords(args, keywds, "iiO!O!O!|iddii", kwlist, - &N, &nnz, - &PyArray_Type, &nzvals, - &PyArray_Type, &rowind, - &PyArray_Type, &colptr, - &permc_spec, - &diag_pivot_thresh, - &drop_tol, - &relax, - &panel_size); - if (!res) - return NULL; - - if (!_CHECK_INTEGER(colptr) || !_CHECK_INTEGER(rowind)) { - PyErr_SetString(PyExc_TypeError, "colptr and rowind must be of type cint"); - return NULL; - } - - if (NCFormat_from_spMatrix(&A, N, N, nnz, nzvals, rowind, colptr, PyArray_CFLOAT)) goto fail; - - result = newSciPyLUObject(&A, diag_pivot_thresh, drop_tol, relax, panel_size,\ - permc_spec, PyArray_CFLOAT); - - Destroy_SuperMatrix_Store(&A); /* arrays of input matrix will not be freed */ - - return result; - - fail: - Destroy_SuperMatrix_Store(&A); /* arrays of input matrix will not be freed */ - return NULL; -} - - -/*******************************End Code Adapted from PySparse *****************/ - - -static PyMethodDef cSuperLU_Methods[] = { - {"cgssv", (PyCFunction) Py_cgssv, METH_VARARGS|METH_KEYWORDS, doc_cgssv}, - {"cgstrf", (PyCFunction) Py_cgstrf, METH_VARARGS|METH_KEYWORDS, doc_cgstrf}, - /* {"_cgstrs", Py_cgstrs, METH_VARARGS, doc_cgstrs}, - {"_cgscon", Py_cgscon, METH_VARARGS, doc_cgscon}, - {"_cgsequ", Py_cgsequ, METH_VARARGS, doc_cgsequ}, - {"_claqgs", Py_claqgs, METH_VARARGS, doc_claqgs}, - {"_cgsrfs", Py_cgsrfs, METH_VARARGS, doc_cgsrfs}, */ - {NULL, NULL} -}; - - -PyMODINIT_FUNC -init_csuperlu(void) -{ - Py_InitModule("_csuperlu", cSuperLU_Methods); - import_array(); - -} - - - - Deleted: trunk/scipy/sparse/linalg/dsolve/_dsuperlumodule.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_dsuperlumodule.c 2010-04-27 21:55:40 UTC (rev 6348) +++ trunk/scipy/sparse/linalg/dsolve/_dsuperlumodule.c 2010-04-27 21:56:00 UTC (rev 6349) @@ -1,251 +0,0 @@ - -/* Copyright 1999 Travis Oliphant - Permision to copy and modified this file is granted under the revised BSD license. - No warranty is expressed or IMPLIED -*/ - -/* - This file implements glue between the SuperLU library for - sparse matrix inversion and Python. -*/ - - -/* We want a low-level interface to: - xGSSV - - These will be done in separate files due to the include structure of - SuperLU. - - Define a user abort and a user malloc and free (to keep pointers - that will be released on errors) -*/ - -#include "Python.h" -#include "SuperLU/SRC/dsp_defs.h" -#include "_superluobject.h" -#include - -extern jmp_buf _superlu_py_jmpbuf; - - -static char doc_dgssv[] = "Direct inversion of sparse matrix.\n\nX = dgssv(A,B) solves A*X = B for X."; - -static PyObject *Py_dgssv (PyObject *self, PyObject *args, PyObject *kwdict) -{ - PyObject *Py_B=NULL, *Py_X=NULL; - PyArrayObject *nzvals=NULL; - PyArrayObject *colind=NULL, *rowptr=NULL; - int N, nnz; - int info; - int csc=0, permc_spec=2; - int *perm_r=NULL, *perm_c=NULL; - SuperMatrix A, B, L, U; - superlu_options_t options; - SuperLUStat_t stat; - - - static char *kwlist[] = {"N","nnz","nzvals","colind","rowptr","B", "csc", "permc_spec",NULL}; - - /* Get input arguments */ - if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iiO!O!O!O|ii", kwlist, &N, &nnz, &PyArray_Type, &nzvals, &PyArray_Type, &colind, &PyArray_Type, &rowptr, &Py_B, &csc, &permc_spec)) - return NULL; - - if (!_CHECK_INTEGER(colind) || !_CHECK_INTEGER(rowptr)) { - PyErr_SetString(PyExc_TypeError, "colind and rowptr must be of type cint"); - return NULL; - } - - /* Create Space for output */ - Py_X = PyArray_CopyFromObject(Py_B,PyArray_DOUBLE,1,2); - if (Py_X == NULL) return NULL; - - if (csc) { - if (NCFormat_from_spMatrix(&A, N, N, nnz, nzvals, colind, rowptr, PyArray_DOUBLE)) { - Py_DECREF(Py_X); - return NULL; - } - } - else { - if (NRFormat_from_spMatrix(&A, N, N, nnz, nzvals, colind, rowptr, PyArray_DOUBLE)) { - Py_DECREF(Py_X); - return NULL; - } - } - - if (DenseSuper_from_Numeric(&B, Py_X)) { - Destroy_SuperMatrix_Store(&A); - Py_DECREF(Py_X); - return NULL; - } - - /* B and Py_X share same data now but Py_X "owns" it */ - - /* Setup options */ - - if (setjmp(_superlu_py_jmpbuf)) goto fail; - else { - perm_c = intMalloc(N); - perm_r = intMalloc(N); - set_default_options(&options); - options.ColPerm=superlu_module_getpermc(permc_spec); - StatInit(&stat); - - /* Compute direct inverse of sparse Matrix */ - dgssv(&options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info); - } - - SUPERLU_FREE(perm_r); - SUPERLU_FREE(perm_c); - Destroy_SuperMatrix_Store(&A); /* holds just a pointer to the data */ - Destroy_SuperMatrix_Store(&B); - Destroy_SuperNode_Matrix(&L); - Destroy_CompCol_Matrix(&U); - StatFree(&stat); - - return Py_BuildValue("Ni", Py_X, info); - - fail: - - SUPERLU_FREE(perm_r); - SUPERLU_FREE(perm_c); - Destroy_SuperMatrix_Store(&A); /* holds just a pointer to the data */ - Destroy_SuperMatrix_Store(&B); - Destroy_SuperNode_Matrix(&L); - Destroy_CompCol_Matrix(&U); - StatFree(&stat); - Py_XDECREF(Py_X); - return NULL; -} - - -/*******************************Begin Code Adapted from PySparse *****************/ - - -static char doc_dgstrf[] = "dgstrf(A, ...)\n\ -\n\ -performs a factorization of the sparse matrix A=*(N,nnz,nzvals,rowind,colptr) and \n\ -returns a factored_lu object.\n\ -\n\ -arguments\n\ ----------\n\ -\n\ -Matrix to be factorized is represented as N,nnz,nzvals,rowind,colptr\n\ - as separate arguments. This is compressed sparse column representation.\n\ -\n\ -N number of rows and columns \n\ -nnz number of non-zero elements\n\ -nzvals non-zero values \n\ -rowind row-index for this column (same size as nzvals)\n\ -colptr index into rowind for first non-zero value in this column\n\ - size is (N+1). Last value should be nnz. \n\ -\n\ -additional keyword arguments:\n\ ------------------------------\n\ -permc_spec specifies the matrix ordering used for the factorization\n\ - 0: natural ordering\n\ - 1: MMD applied to the structure of A^T * A\n\ - 2: MMD applied to the structure of A^T + A\n\ - 3: COLAMD, approximate minimum degree column ordering\n\ - (default: 2)\n\ -\n\ -diag_pivot_thresh threshhold for partial pivoting.\n\ - 0.0 <= diag_pivot_thresh <= 1.0\n\ - 0.0 corresponds to no pivoting\n\ - 1.0 corresponds to partial pivoting\n\ - (default: 1.0)\n\ -\n\ -drop_tol drop tolerance parameter\n\ - 0.0 <= drop_tol <= 1.0\n\ - 0.0 corresponds to exact factorization\n\ - CAUTION: the drop_tol is not implemented in SuperLU 2.0\n\ - (default: 0.0)\n\ -\n\ -relax to control degree of relaxing supernodes\n\ - (default: 1)\n\ -\n\ -panel_size a panel consist of at most panel_size consecutive columns.\n\ - (default: 10)\n\ -"; - -static PyObject * -Py_dgstrf(PyObject *self, PyObject *args, PyObject *keywds) { - - /* default value for SuperLU parameters*/ - double diag_pivot_thresh = 1.0; - double drop_tol = 0.0; - int relax = 1; - int panel_size = 10; - int permc_spec = 2; - int N, nnz; - PyArrayObject *rowind, *colptr, *nzvals; - SuperMatrix A; - PyObject *result; - - static char *kwlist[] = {"N","nnz","nzvals","rowind","colptr","permc_spec","diag_pivot_thresh", "drop_tol", "relax", "panel_size", NULL}; - - int res = PyArg_ParseTupleAndKeywords(args, keywds, "iiO!O!O!|iddii", kwlist, - &N, &nnz, - &PyArray_Type, &nzvals, - &PyArray_Type, &rowind, - &PyArray_Type, &colptr, - &permc_spec, - &diag_pivot_thresh, - &drop_tol, - &relax, - &panel_size); - if (!res) - return NULL; - - if (!_CHECK_INTEGER(colptr) || !_CHECK_INTEGER(rowind)) { - PyErr_SetString(PyExc_TypeError, "rowind and colptr must be of type cint"); - return NULL; - } - - if (NCFormat_from_spMatrix(&A, N, N, nnz, nzvals, rowind, colptr, PyArray_DOUBLE)) goto fail; - - result = newSciPyLUObject(&A, diag_pivot_thresh, drop_tol, relax, panel_size,\ - permc_spec, PyArray_DOUBLE); - if (result == NULL) goto fail; - - Destroy_SuperMatrix_Store(&A); /* arrays of input matrix will not be freed */ - return result; - - fail: - Destroy_SuperMatrix_Store(&A); /* arrays of input matrix will not be freed */ - return NULL; -} - - -/*******************************End Code Adapted from PySparse *****************/ - -static PyMethodDef dSuperLU_Methods[] = { - {"dgssv", (PyCFunction) Py_dgssv, METH_VARARGS|METH_KEYWORDS, doc_dgssv}, - {"dgstrf", (PyCFunction) Py_dgstrf, METH_VARARGS|METH_KEYWORDS, doc_dgstrf}, - /* - {"_dgstrs", Py_dgstrs, METH_VARARGS, doc_dgstrs}, - {"_dgscon", Py_dgscon, METH_VARARGS, doc_dgscon}, - {"_dgsequ", Py_dgsequ, METH_VARARGS, doc_dgsequ}, - {"_dlaqgs", Py_dlaqgs, METH_VARARGS, doc_dlaqgs}, - {"_dgsrfs", Py_dgsrfs, METH_VARARGS, doc_dgsrfs}, */ - {NULL, NULL} -}; - - -PyMODINIT_FUNC -init_dsuperlu(void) -{ - PyObject *m, *d; - - SciPySuperLUType.ob_type = &PyType_Type; - - m = Py_InitModule("_dsuperlu", dSuperLU_Methods); - d = PyModule_GetDict(m); - - PyDict_SetItemString(d, "SciPyLUType", (PyObject *)&SciPySuperLUType); - - import_array(); -} - - - - Deleted: trunk/scipy/sparse/linalg/dsolve/_ssuperlumodule.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_ssuperlumodule.c 2010-04-27 21:55:40 UTC (rev 6348) +++ trunk/scipy/sparse/linalg/dsolve/_ssuperlumodule.c 2010-04-27 21:56:00 UTC (rev 6349) @@ -1,202 +0,0 @@ - -/* Copyright 1999 Travis Oliphant - Permision to copy and modified this file is granted under the revised BSD license. - No warranty is expressed or IMPLIED -*/ - -/* - This file implements glue between the SuperLU library for - sparse matrix inversion and Python. -*/ - - -/* We want a low-level interface to: - xGSSV - - These will be done in separate files due to the include structure of - SuperLU. - - Define a user abort and a user malloc and free (to keep pointers - that will be released on errors) -*/ - -#include "Python.h" -#include "SuperLU/SRC/ssp_defs.h" -#include "_superluobject.h" -#include - -extern jmp_buf _superlu_py_jmpbuf; - - -static char doc_sgssv[] = "Direct inversion of sparse matrix.\n\nX = sgssv(A,B) solves A*X = B for X."; - -static PyObject *Py_sgssv (PyObject *self, PyObject *args, PyObject *kwdict) -{ - PyObject *Py_B=NULL, *Py_X=NULL; - PyArrayObject *nzvals=NULL; - PyArrayObject *colind=NULL, *rowptr=NULL; - int N, nnz; - int info; - int csc=0, permc_spec=2; - int *perm_r=NULL, *perm_c=NULL; - SuperMatrix A, B, L, U; - superlu_options_t options; - SuperLUStat_t stat; - - static char *kwlist[] = {"N","nnz","nzvals","colind","rowptr","B", "csc", "permc_spec",NULL}; - - /* Get input arguments */ - if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iiO!O!O!O|ii", kwlist, &N, &nnz, &PyArray_Type, &nzvals, &PyArray_Type, &colind, &PyArray_Type, &rowptr, &Py_B, &csc, &permc_spec)) - return NULL; - - if (!_CHECK_INTEGER(colind) || !_CHECK_INTEGER(rowptr)) { - PyErr_SetString(PyExc_TypeError, "colind and rowptr must be of type cint"); - return NULL; - } - - /* Create Space for output */ - Py_X = PyArray_CopyFromObject(Py_B,PyArray_FLOAT,1,2); - - if (Py_X == NULL) return NULL; - - if (csc) { - if (NCFormat_from_spMatrix(&A, N, N, nnz, nzvals, colind, rowptr, PyArray_FLOAT)) { - Py_DECREF(Py_X); - return NULL; - } - } - else { - if (NRFormat_from_spMatrix(&A, N, N, nnz, nzvals, colind, rowptr, PyArray_FLOAT)) { - Py_DECREF(Py_X); - return NULL; - } - } - - if (DenseSuper_from_Numeric(&B, Py_X)) { - Destroy_SuperMatrix_Store(&A); - Py_DECREF(Py_X); - return NULL; - } - /* B and Py_X share same data now but Py_X "owns" it */ - - /* Setup options */ - - if (setjmp(_superlu_py_jmpbuf)) goto fail; - else { - perm_c = intMalloc(N); - perm_r = intMalloc(N); - set_default_options(&options); - options.ColPerm=superlu_module_getpermc(permc_spec); - StatInit(&stat); - - /* Compute direct inverse of sparse Matrix */ - sgssv(&options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info); - } - - SUPERLU_FREE(perm_r); - SUPERLU_FREE(perm_c); - Destroy_SuperMatrix_Store(&A); - Destroy_SuperMatrix_Store(&B); - Destroy_SuperNode_Matrix(&L); - Destroy_CompCol_Matrix(&U); - StatFree(&stat); - - return Py_BuildValue("Ni", Py_X, info); - - fail: - SUPERLU_FREE(perm_r); - SUPERLU_FREE(perm_c); - Destroy_SuperMatrix_Store(&A); - Destroy_SuperMatrix_Store(&B); - Destroy_SuperNode_Matrix(&L); - Destroy_CompCol_Matrix(&U); - StatFree(&stat); - - Py_XDECREF(Py_X); - return NULL; -} - -/*******************************Begin Code Adapted from PySparse *****************/ - - -static char doc_sgstrf[] = "sgstrf(A, ...)\n\ -\n\ -performs a factorization of the sparse matrix A=*(N,nnz,nzvals,rowind,colptr) and \n\ -returns a factored_lu object.\n\ -\n\ -see dgstrf for more information."; - -static PyObject * -Py_sgstrf(PyObject *self, PyObject *args, PyObject *keywds) { - - /* default value for SuperLU parameters*/ - double diag_pivot_thresh = 1.0; - double drop_tol = 0.0; - int relax = 1; - int panel_size = 10; - int permc_spec = 2; - int N, nnz; - PyArrayObject *rowind, *colptr, *nzvals; - SuperMatrix A; - PyObject *result; - - static char *kwlist[] = {"N","nnz","nzvals","rowind","colptr","permc_spec","diag_pivot_thresh", "drop_tol", "relax", "panel_size", NULL}; - - int res = PyArg_ParseTupleAndKeywords(args, keywds, "iiO!O!O!|iddii", kwlist, - &N, &nnz, - &PyArray_Type, &nzvals, - &PyArray_Type, &rowind, - &PyArray_Type, &colptr, - &permc_spec, - &diag_pivot_thresh, - &drop_tol, - &relax, - &panel_size); - if (!res) - return NULL; - - if (!_CHECK_INTEGER(colptr) || !_CHECK_INTEGER(rowind)) { - PyErr_SetString(PyExc_TypeError, "colptr and rowind must be of type cint"); - return NULL; - } - - if (NCFormat_from_spMatrix(&A, N, N, nnz, nzvals, rowind, colptr, PyArray_FLOAT)) goto fail; - - result = newSciPyLUObject(&A, diag_pivot_thresh, drop_tol, relax, panel_size,\ - permc_spec, PyArray_FLOAT); - if (result == NULL) goto fail; - - Destroy_SuperMatrix_Store(&A); /* arrays of input matrix will not be freed */ - return result; - - fail: - Destroy_SuperMatrix_Store(&A); /* arrays of input matrix will not be freed */ - return NULL; -} - - -/*******************************End Code Adapted from PySparse *****************/ - - -static PyMethodDef sSuperLU_Methods[] = { - {"sgssv", (PyCFunction) Py_sgssv, METH_VARARGS|METH_KEYWORDS, doc_sgssv}, - {"sgstrf", (PyCFunction) Py_sgstrf, METH_VARARGS|METH_KEYWORDS, doc_sgstrf}, - /* {"_sgstrs", Py_sgstrs, METH_VARARGS, doc_sgstrs}, - {"_sgscon", Py_sgscon, METH_VARARGS, doc_sgscon}, - {"_sgsequ", Py_sgsequ, METH_VARARGS, doc_sgsequ}, - {"_slaqgs", Py_slaqgs, METH_VARARGS, doc_slaqgs}, - {"_sgsrfs", Py_sgsrfs, METH_VARARGS, doc_sgsrfs}, */ - {NULL, NULL} -}; - -PyMODINIT_FUNC -init_ssuperlu(void) -{ - Py_InitModule("_ssuperlu", sSuperLU_Methods); - import_array(); - -} - - - - Deleted: trunk/scipy/sparse/linalg/dsolve/_superlu.py =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_superlu.py 2010-04-27 21:55:40 UTC (rev 6348) +++ trunk/scipy/sparse/linalg/dsolve/_superlu.py 2010-04-27 21:56:00 UTC (rev 6349) @@ -1,4 +0,0 @@ -from _zsuperlu import * -from _ssuperlu import * -from _dsuperlu import * -from _csuperlu import * Added: trunk/scipy/sparse/linalg/dsolve/_superlumodule.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_superlumodule.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/_superlumodule.c 2010-04-27 21:56:00 UTC (rev 6349) @@ -0,0 +1,264 @@ +/* -*-c-*- */ +/* + * _superlu module + * + * Python interface to SuperLU decompositions. + */ + +/* Copyright 1999 Travis Oliphant + * + * Permision to copy and modified this file is granted under + * the revised BSD license. No warranty is expressed or IMPLIED + */ + +#include +#include + +#include "_superluobject.h" + +extern jmp_buf _superlu_py_jmpbuf; + +/* + * Data-type dependent implementations for Xgssv and Xgstrf; + * + * These have to included from separate files because of SuperLU include + * structure. + */ + +static PyObject * +Py_gssv(PyObject *self, PyObject *args, PyObject *kwdict) +{ + PyObject *Py_B=NULL, *Py_X=NULL; + PyArrayObject *nzvals=NULL; + PyArrayObject *colind=NULL, *rowptr=NULL; + int N, nnz; + int info; + int csc=0, permc_spec=2; + int *perm_r=NULL, *perm_c=NULL; + SuperMatrix A, B, L, U; + superlu_options_t options; + SuperLUStat_t stat; + int type; + + static char *kwlist[] = {"N","nnz","nzvals","colind","rowptr","B", "csc", + "permc_spec",NULL}; + + /* Get input arguments */ + if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iiO!O!O!O|ii", kwlist, + &N, &nnz, &PyArray_Type, &nzvals, + &PyArray_Type, &colind, &PyArray_Type, + &rowptr, &Py_B, &csc, &permc_spec)) { + return NULL; + } + + if (!_CHECK_INTEGER(colind) || !_CHECK_INTEGER(rowptr)) { + PyErr_SetString(PyExc_TypeError, + "colind and rowptr must be of type cint"); + return NULL; + } + + type = PyArray_TYPE(nzvals); + if (!CHECK_SLU_TYPE(type)) { + PyErr_SetString(PyExc_TypeError, + "nzvals is not of a type supported by SuperLU"); + return NULL; + } + + /* Create Space for output */ + Py_X = PyArray_CopyFromObject(Py_B, type, 1, 2); + if (Py_X == NULL) return NULL; + + if (csc) { + if (NCFormat_from_spMatrix(&A, N, N, nnz, nzvals, colind, rowptr, + type)) { + Py_DECREF(Py_X); + return NULL; + } + } + else { + if (NRFormat_from_spMatrix(&A, N, N, nnz, nzvals, colind, rowptr, + type)) { + Py_DECREF(Py_X); + return NULL; + } + } + + if (DenseSuper_from_Numeric(&B, Py_X)) { + Destroy_SuperMatrix_Store(&A); + Py_DECREF(Py_X); + return NULL; + } + + /* B and Py_X share same data now but Py_X "owns" it */ + + /* Setup options */ + + if (setjmp(_superlu_py_jmpbuf)) { + goto fail; + } + else { + perm_c = intMalloc(N); + perm_r = intMalloc(N); + set_default_options(&options); + options.ColPerm = superlu_module_getpermc(permc_spec); + StatInit(&stat); + + /* Compute direct inverse of sparse Matrix */ + gssv(type, &options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info); + } + + SUPERLU_FREE(perm_r); + SUPERLU_FREE(perm_c); + Destroy_SuperMatrix_Store(&A); /* holds just a pointer to the data */ + Destroy_SuperMatrix_Store(&B); + Destroy_SuperNode_Matrix(&L); + Destroy_CompCol_Matrix(&U); + StatFree(&stat); + + return Py_BuildValue("Ni", Py_X, info); + +fail: + SUPERLU_FREE(perm_r); + SUPERLU_FREE(perm_c); + Destroy_SuperMatrix_Store(&A); /* holds just a pointer to the data */ + Destroy_SuperMatrix_Store(&B); + Destroy_SuperNode_Matrix(&L); + Destroy_CompCol_Matrix(&U); + StatFree(&stat); + Py_XDECREF(Py_X); + return NULL; +} + +static PyObject * +Py_gstrf(PyObject *self, PyObject *args, PyObject *keywds) +{ + /* default value for SuperLU parameters*/ + double diag_pivot_thresh = 1.0; + int relax = 1; + int panel_size = 10; + int permc_spec = 2; + int N, nnz; + PyArrayObject *rowind, *colptr, *nzvals; + SuperMatrix A; + PyObject *result; + int type; + + static char *kwlist[] = {"N","nnz","nzvals","rowind","colptr", + "permc_spec","diag_pivot_thresh", + "relax", "panel_size", NULL}; + + int res = PyArg_ParseTupleAndKeywords( + args, keywds, "iiO!O!O!|iddii", kwlist, + &N, &nnz, + &PyArray_Type, &nzvals, + &PyArray_Type, &rowind, + &PyArray_Type, &colptr, + &permc_spec, + &diag_pivot_thresh, + &relax, + &panel_size); + + if (!res) + return NULL; + + if (!_CHECK_INTEGER(colptr) || !_CHECK_INTEGER(rowind)) { + PyErr_SetString(PyExc_TypeError, + "rowind and colptr must be of type cint"); + return NULL; + } + + type = PyArray_TYPE(nzvals); + if (!CHECK_SLU_TYPE(type)) { + PyErr_SetString(PyExc_TypeError, + "nzvals is not of a type supported by SuperLU"); + return NULL; + } + + if (NCFormat_from_spMatrix(&A, N, N, nnz, nzvals, rowind, colptr, + type)) { + goto fail; + } + + result = newSciPyLUObject(&A, diag_pivot_thresh, relax, + panel_size, permc_spec, type); + if (result == NULL) { + goto fail; + } + + /* arrays of input matrix will not be freed */ + Destroy_SuperMatrix_Store(&A); + return result; + +fail: + /* arrays of input matrix will not be freed */ + Destroy_SuperMatrix_Store(&A); + return NULL; +} + +static char gssv_doc[] = "Direct inversion of sparse matrix.\n\nX = gssv(A,B) solves A*X = B for X."; + +static char gstrf_doc[] = "gstrf(A, ...)\n\ +\n\ +performs a factorization of the sparse matrix A=*(N,nnz,nzvals,rowind,colptr) and \n\ +returns a factored_lu object.\n\ +\n\ +arguments\n\ +---------\n\ +\n\ +Matrix to be factorized is represented as N,nnz,nzvals,rowind,colptr\n\ + as separate arguments. This is compressed sparse column representation.\n\ +\n\ +N number of rows and columns \n\ +nnz number of non-zero elements\n\ +nzvals non-zero values \n\ +rowind row-index for this column (same size as nzvals)\n\ +colptr index into rowind for first non-zero value in this column\n\ + size is (N+1). Last value should be nnz. \n\ +\n\ +additional keyword arguments:\n\ +-----------------------------\n\ +permc_spec specifies the matrix ordering used for the factorization\n\ + 0: natural ordering\n\ + 1: MMD applied to the structure of A^T * A\n\ + 2: MMD applied to the structure of A^T + A\n\ + 3: COLAMD, approximate minimum degree column ordering\n\ + (default: 2)\n\ +\n\ +diag_pivot_thresh threshhold for partial pivoting.\n\ + 0.0 <= diag_pivot_thresh <= 1.0\n\ + 0.0 corresponds to no pivoting\n\ + 1.0 corresponds to partial pivoting\n\ + (default: 1.0)\n\ +\n\ +relax to control degree of relaxing supernodes\n\ + (default: 1)\n\ +\n\ +panel_size a panel consist of at most panel_size consecutive columns.\n\ + (default: 10)\n\ +"; + + +/* + * Main SuperLU module + */ + +static PyMethodDef SuperLU_Methods[] = { + {"gssv", (PyCFunction)Py_gssv, METH_VARARGS|METH_KEYWORDS, gssv_doc}, + {"gstrf", (PyCFunction)Py_gstrf, METH_VARARGS|METH_KEYWORDS, gstrf_doc}, + {NULL, NULL} +}; + +PyMODINIT_FUNC +init_superlu(void) +{ + PyObject *m, *d; + + SciPySuperLUType.ob_type = &PyType_Type; + + m = Py_InitModule("_superlu", SuperLU_Methods); + d = PyModule_GetDict(m); + + PyDict_SetItemString(d, "SciPyLUType", (PyObject *)&SciPySuperLUType); + + import_array(); +} Modified: trunk/scipy/sparse/linalg/dsolve/_superluobject.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_superluobject.c 2010-04-27 21:55:40 UTC (rev 6348) +++ trunk/scipy/sparse/linalg/dsolve/_superluobject.c 2010-04-27 21:56:00 UTC (rev 6349) @@ -1,6 +1,5 @@ +#include -#include "Python.h" -#include "SuperLU/SRC/zsp_defs.h" #define NO_IMPORT_ARRAY #include "_superluobject.h" #include @@ -37,12 +36,18 @@ static char *kwlist[] = {"rhs","trans",NULL}; + if (!CHECK_SLU_TYPE(self->type)) { + PyErr_SetString(PyExc_ValueError, "unsupported data type"); + return NULL; + } + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|c", kwlist, &PyArray_Type, &b, &itrans)) return NULL; - /* solve transposed system: matrix was passed row-wise instead of column-wise */ + /* solve transposed system: matrix was passed row-wise instead of + * column-wise */ if (itrans == 'n' || itrans == 'N') trans = NOTRANS; else if (itrans == 't' || itrans == 'T') @@ -67,26 +72,13 @@ StatInit(&stat); /* Solve the system, overwriting vector x. */ - switch(self->type) { - case PyArray_FLOAT: - sgstrs(trans, &self->L, &self->U, self->perm_c, self->perm_r, &B, &stat, &info); - break; - case PyArray_DOUBLE: - dgstrs(trans, &self->L, &self->U, self->perm_c, self->perm_r, &B, &stat, &info); - break; - case PyArray_CFLOAT: - cgstrs(trans, &self->L, &self->U, self->perm_c, self->perm_r, &B, &stat, &info); - break; - case PyArray_CDOUBLE: - zgstrs(trans, &self->L, &self->U, self->perm_c, self->perm_r, &B, &stat, &info); - break; - default: - PyErr_SetString(PyExc_TypeError, "Invalid type for array."); - goto fail; - } + gstrs(self->type, + trans, &self->L, &self->U, self->perm_c, self->perm_r, &B, + &stat, &info); if (info) { - PyErr_SetString(PyExc_SystemError, "gstrs was called with invalid arguments"); + PyErr_SetString(PyExc_SystemError, + "gstrs was called with invalid arguments"); goto fail; } @@ -95,7 +87,7 @@ StatFree(&stat); return (PyObject *)x; - fail: +fail: Destroy_SuperMatrix_Store(&B); StatFree(&stat); Py_XDECREF(x); @@ -197,32 +189,25 @@ ldx = m; } - if (setjmp(_superlu_py_jmpbuf)) return -1; - else - switch (aX->descr->type_num) { - case PyArray_FLOAT: - sCreate_Dense_Matrix(X, m, n, (float *)aX->data, ldx, SLU_DN, SLU_S, SLU_GE); - break; - case PyArray_DOUBLE: - dCreate_Dense_Matrix(X, m, n, (double *)aX->data, ldx, SLU_DN, SLU_D, SLU_GE); - break; - case PyArray_CFLOAT: - cCreate_Dense_Matrix(X, m, n, (complex *)aX->data, ldx, SLU_DN, SLU_C, SLU_GE); - break; - case PyArray_CDOUBLE: - zCreate_Dense_Matrix(X, m, n, (doublecomplex *)aX->data, ldx, SLU_DN, SLU_Z, SLU_GE); - break; - default: - PyErr_SetString(PyExc_TypeError, "Invalid type for Numeric array."); - return -1; + if (setjmp(_superlu_py_jmpbuf)) + return -1; + else { + if (!CHECK_SLU_TYPE(aX->descr->type_num)) { + PyErr_SetString(PyExc_ValueError, "unsupported data type"); + return -1; } - + Create_Dense_Matrix(aX->descr->type_num, X, m, n, + aX->data, ldx, SLU_DN, + NPY_TYPECODE_TO_SLU(aX->descr->type_num), SLU_GE); + } return 0; } /* Natively handles Compressed Sparse Row and CSC */ -int NRFormat_from_spMatrix(SuperMatrix *A, int m, int n, int nnz, PyArrayObject *nzvals, PyArrayObject *colind, PyArrayObject *rowptr, int typenum) +int NRFormat_from_spMatrix(SuperMatrix *A, int m, int n, int nnz, + PyArrayObject *nzvals, PyArrayObject *colind, + PyArrayObject *rowptr, int typenum) { int err = 0; @@ -234,34 +219,26 @@ return -1; } - if (setjmp(_superlu_py_jmpbuf)) return -1; - else - switch (nzvals->descr->type_num) { - case PyArray_FLOAT: - sCreate_CompRow_Matrix(A, m, n, nnz, (float *)nzvals->data, (int *)colind->data, \ - (int *)rowptr->data, SLU_NR, SLU_S, SLU_GE); - break; - case PyArray_DOUBLE: - dCreate_CompRow_Matrix(A, m, n, nnz, (double *)nzvals->data, (int *)colind->data, \ - (int *)rowptr->data, SLU_NR, SLU_D, SLU_GE); - break; - case PyArray_CFLOAT: - cCreate_CompRow_Matrix(A, m, n, nnz, (complex *)nzvals->data, (int *)colind->data, \ - (int *)rowptr->data, SLU_NR, SLU_C, SLU_GE); - break; - case PyArray_CDOUBLE: - zCreate_CompRow_Matrix(A, m, n, nnz, (doublecomplex *)nzvals->data, (int *)colind->data, \ - (int *)rowptr->data, SLU_NR, SLU_Z, SLU_GE); - break; - default: + if (setjmp(_superlu_py_jmpbuf)) + return -1; + else { + if (!CHECK_SLU_TYPE(nzvals->descr->type_num)) { PyErr_SetString(PyExc_TypeError, "Invalid type for array."); return -1; } + Create_CompRow_Matrix(nzvals->descr->type_num, + A, m, n, nnz, nzvals->data, (int *)colind->data, + (int *)rowptr->data, SLU_NR, + NPY_TYPECODE_TO_SLU(nzvals->descr->type_num), + SLU_GE); + } return 0; } -int NCFormat_from_spMatrix(SuperMatrix *A, int m, int n, int nnz, PyArrayObject *nzvals, PyArrayObject *rowind, PyArrayObject *colptr, int typenum) +int NCFormat_from_spMatrix(SuperMatrix *A, int m, int n, int nnz, + PyArrayObject *nzvals, PyArrayObject *rowind, + PyArrayObject *colptr, int typenum) { int err=0; @@ -274,29 +251,19 @@ } - if (setjmp(_superlu_py_jmpbuf)) return -1; - else - switch (nzvals->descr->type_num) { - case PyArray_FLOAT: - sCreate_CompCol_Matrix(A, m, n, nnz, (float *)nzvals->data, (int *)rowind->data, \ - (int *)colptr->data, SLU_NC, SLU_S, SLU_GE); - break; - case PyArray_DOUBLE: - dCreate_CompCol_Matrix(A, m, n, nnz, (double *)nzvals->data, (int *)rowind->data, \ - (int *)colptr->data, SLU_NC, SLU_D, SLU_GE); - break; - case PyArray_CFLOAT: - cCreate_CompCol_Matrix(A, m, n, nnz, (complex *)nzvals->data, (int *)rowind->data, \ - (int *)colptr->data, SLU_NC, SLU_C, SLU_GE); - break; - case PyArray_CDOUBLE: - zCreate_CompCol_Matrix(A, m, n, nnz, (doublecomplex *)nzvals->data, (int *)rowind->data, \ - (int *)colptr->data, SLU_NC, SLU_Z, SLU_GE); - break; - default: + if (setjmp(_superlu_py_jmpbuf)) + return -1; + else { + if (!CHECK_SLU_TYPE(nzvals->descr->type_num)) { PyErr_SetString(PyExc_TypeError, "Invalid type for array."); return -1; } + Create_CompCol_Matrix(nzvals->descr->type_num, + A, m, n, nnz, nzvals->data, (int *)rowind->data, + (int *)colptr->data, SLU_NC, + NPY_TYPECODE_TO_SLU(nzvals->descr->type_num), + SLU_GE); + } return 0; } @@ -319,7 +286,7 @@ PyObject * newSciPyLUObject(SuperMatrix *A, double diag_pivot_thresh, - double drop_tol, int relax, int panel_size, int permc_spec, + int relax, int panel_size, int permc_spec, int intype) { @@ -351,46 +318,30 @@ etree = intMalloc(n); self->perm_r = intMalloc(n); self->perm_c = intMalloc(n); - + set_default_options(&options); options.ColPerm=superlu_module_getpermc(permc_spec); options.DiagPivotThresh = diag_pivot_thresh; StatInit(&stat); get_perm_c(permc_spec, A, self->perm_c); /* calc column permutation */ - sp_preorder(&options, A, self->perm_c, etree, &AC); /* apply column permutation */ - + sp_preorder(&options, A, self->perm_c, etree, &AC); /* apply column + * permutation */ /* Perform factorization */ - switch (A->Dtype) { - case SLU_S: - sgstrf(&options, &AC, (float) drop_tol, relax, panel_size, - etree, NULL, lwork, self->perm_c, self->perm_r, - &self->L, &self->U, &stat, &info); - break; - case SLU_D: - dgstrf(&options, &AC, drop_tol, relax, panel_size, - etree, NULL, lwork, self->perm_c, self->perm_r, - &self->L, &self->U, &stat, &info); - break; - case SLU_C: - cgstrf(&options, &AC, (float) drop_tol, relax, panel_size, - etree, NULL, lwork, self->perm_c, self->perm_r, - &self->L, &self->U, &stat, &info); - break; - case SLU_Z: - zgstrf(&options, &AC, drop_tol, relax, panel_size, - etree, NULL, lwork, self->perm_c, self->perm_r, - &self->L, &self->U, &stat, &info); - break; - default: + if (!CHECK_SLU_TYPE(SLU_TYPECODE_TO_NPY(A->Dtype))) { PyErr_SetString(PyExc_ValueError, "Invalid type in SuperMatrix."); goto fail; } - + gstrf(SLU_TYPECODE_TO_NPY(A->Dtype), + &options, &AC, relax, panel_size, + etree, NULL, lwork, self->perm_c, self->perm_r, + &self->L, &self->U, &stat, &info); + if (info) { if (info < 0) - PyErr_SetString(PyExc_SystemError, "dgstrf was called with invalid arguments"); + PyErr_SetString(PyExc_SystemError, + "gstrf was called with invalid arguments"); else { if (info <= n) PyErr_SetString(PyExc_RuntimeError, "Factor is exactly singular"); @@ -399,7 +350,7 @@ } goto fail; } - + /* free memory */ SUPERLU_FREE(etree); Destroy_CompCol_Permuted(&AC); @@ -407,7 +358,7 @@ return (PyObject *)self; - fail: +fail: SUPERLU_FREE(etree); Destroy_CompCol_Permuted(&AC); StatFree(&stat); Modified: trunk/scipy/sparse/linalg/dsolve/_superluobject.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_superluobject.h 2010-04-27 21:55:40 UTC (rev 6348) +++ trunk/scipy/sparse/linalg/dsolve/_superluobject.h 2010-04-27 21:56:00 UTC (rev 6349) @@ -1,105 +1,118 @@ -#ifndef __SUPERLU_OBJECT /* allow multiple inclusions */ +#ifndef __SUPERLU_OBJECT #define __SUPERLU_OBJECT #include "Python.h" -#define PY_ARRAY_UNIQUE_SYMBOL scipy_superlu +#include "SuperLU/SRC/slu_zdefs.h" +#define PY_ARRAY_UNIQUE_SYMBOL _scipy_sparse_superlu_ARRAY_API #include "numpy/arrayobject.h" -#include "SuperLU/SRC/util.h" -#include "SuperLU/SRC/scomplex.h" -#include "SuperLU/SRC/dcomplex.h" +#include "SuperLU/SRC/slu_util.h" +#include "SuperLU/SRC/slu_dcomplex.h" +#include "SuperLU/SRC/slu_scomplex.h" #define _CHECK_INTEGER(x) (PyArray_ISINTEGER(x) && (x)->descr->elsize == sizeof(int)) -/*********************************************************************** +/* * SuperLUObject definition */ - -typedef struct SciPyLUObject { - PyObject_VAR_HEAD - int m,n; - SuperMatrix L; - SuperMatrix U; - int *perm_r; - int *perm_c; - int type; +typedef struct { + PyObject_VAR_HEAD + int m,n; + SuperMatrix L; + SuperMatrix U; + int *perm_r; + int *perm_c; + int type; } SciPyLUObject; extern PyTypeObject SciPySuperLUType; int DenseSuper_from_Numeric(SuperMatrix *, PyObject *); -int NRFormat_from_spMatrix(SuperMatrix *, int, int, int, PyArrayObject *, PyArrayObject *, PyArrayObject *, int); -int NCFormat_from_spMatrix(SuperMatrix *, int, int, int, PyArrayObject *, PyArrayObject *, PyArrayObject *, int); +int NRFormat_from_spMatrix(SuperMatrix *, int, int, int, PyArrayObject *, + PyArrayObject *, PyArrayObject *, int); +int NCFormat_from_spMatrix(SuperMatrix *, int, int, int, PyArrayObject *, + PyArrayObject *, PyArrayObject *, int); colperm_t superlu_module_getpermc(int); -PyObject *newSciPyLUObject(SuperMatrix *, double, double, int, int, int, int); +PyObject *newSciPyLUObject(SuperMatrix *, double, int, int, int, int); -void -dgstrf (superlu_options_t *, SuperMatrix *, double, - int, int, int *, void *, int, - int *, int *, SuperMatrix *, SuperMatrix *, - SuperLUStat_t *, int *); -void -sgstrf (superlu_options_t *, SuperMatrix *, float, - int, int, int *, void *, int, - int *, int *, SuperMatrix *, SuperMatrix *, - SuperLUStat_t *, int *); +/* + * Definitions for other SuperLU data types than Z, + * and type-generic definitions. + */ -void -cgstrf (superlu_options_t *, SuperMatrix *, float, - int, int, int *, void *, int, - int *, int *, SuperMatrix *, SuperMatrix *, - SuperLUStat_t *, int *); +#define CHECK_SLU_TYPE(type) \ + (type == NPY_FLOAT || type == NPY_DOUBLE || type == NPY_CFLOAT || type == NPY_CDOUBLE) -void -dgstrs (trans_t, SuperMatrix *, SuperMatrix *, - int *, int *, SuperMatrix *, - SuperLUStat_t *, int *); +#define TYPE_GENERIC_FUNC(name, returntype) \ + returntype s##name(name##_ARGS); \ + returntype d##name(name##_ARGS); \ + returntype c##name(name##_ARGS); \ + static returntype name(int type, name##_ARGS) \ + { \ + switch(type) { \ + case NPY_FLOAT: s##name(name##_ARGS_REF); break; \ + case NPY_DOUBLE: d##name(name##_ARGS_REF); break; \ + case NPY_CFLOAT: c##name(name##_ARGS_REF); break; \ + case NPY_CDOUBLE: z##name(name##_ARGS_REF); break; \ + default: return; \ + } \ + } -void -sgstrs (trans_t, SuperMatrix *, SuperMatrix *, - int *, int *, SuperMatrix *, - SuperLUStat_t *, int *); +#define SLU_TYPECODE_TO_NPY(s) \ + ( ((s) == SLU_S) ? NPY_FLOAT : \ + ((s) == SLU_D) ? NPY_DOUBLE : \ + ((s) == SLU_C) ? NPY_CFLOAT : \ + ((s) == SLU_Z) ? NPY_CDOUBLE : -1) -void -cgstrs (trans_t, SuperMatrix *, SuperMatrix *, - int *, int *, SuperMatrix *, - SuperLUStat_t *, int *); +#define NPY_TYPECODE_TO_SLU(s) \ + ( ((s) == NPY_FLOAT) ? SLU_S : \ + ((s) == NPY_DOUBLE) ? SLU_D : \ + ((s) == NPY_CFLOAT) ? SLU_C : \ + ((s) == NPY_CDOUBLE) ? SLU_Z : -1) -void -sCreate_Dense_Matrix(SuperMatrix *, int, int, float *, int, Stype_t, Dtype_t, Mtype_t); -void -dCreate_Dense_Matrix(SuperMatrix *, int, int, double *, int, Stype_t, Dtype_t, Mtype_t); -void -cCreate_Dense_Matrix(SuperMatrix *, int, int, complex *, int, Stype_t, Dtype_t, Mtype_t); +#define gstrf_ARGS \ + superlu_options_t *a, SuperMatrix *b, \ + int c, int d, int *e, void *f, int g, \ + int *h, int *i, SuperMatrix *j, SuperMatrix *k, \ + SuperLUStat_t *l, int *m +#define gstrf_ARGS_REF a,b,c,d,e,f,g,h,i,j,k,l,m -void -sCreate_CompRow_Matrix(SuperMatrix *, int, int, int, - float *, int *, int *, - Stype_t, Dtype_t, Mtype_t); +#define gsitrf_ARGS gstrf_ARGS +#define gsitrf_ARGS_REF gstrf_ARGS_REF -void -dCreate_CompRow_Matrix(SuperMatrix *, int, int, int, - double *, int *, int *, - Stype_t, Dtype_t, Mtype_t); +#define gstrs_ARGS \ + trans_t a, SuperMatrix *b, SuperMatrix *c, \ + int *d, int *e, SuperMatrix *f, \ + SuperLUStat_t *g, int *h +#define gstrs_ARGS_REF a,b,c,d,e,f,g,h -void -cCreate_CompRow_Matrix(SuperMatrix *, int, int, int, - complex *, int *, int *, - Stype_t, Dtype_t, Mtype_t); +#define gssv_ARGS \ + superlu_options_t *a, SuperMatrix *b, int *c, int *d, \ + SuperMatrix *e, SuperMatrix *f, SuperMatrix *g, \ + SuperLUStat_t *h, int *i +#define gssv_ARGS_REF a,b,c,d,e,f,g,h,i -void -sCreate_CompCol_Matrix(SuperMatrix *, int, int, int, - float *, int *, int *, - Stype_t, Dtype_t, Mtype_t); -void -dCreate_CompCol_Matrix(SuperMatrix *, int, int, int, - double *, int *, int *, - Stype_t, Dtype_t, Mtype_t); -void -cCreate_CompCol_Matrix(SuperMatrix *, int, int, int, - complex *, int *, int *, - Stype_t, Dtype_t, Mtype_t); +#define Create_Dense_Matrix_ARGS \ + SuperMatrix *a, int b, int c, void *d, int e, \ + Stype_t f, Dtype_t g, Mtype_t h +#define Create_Dense_Matrix_ARGS_REF a,b,c,d,e,f,g,h +#define Create_CompRow_Matrix_ARGS \ + SuperMatrix *a, int b, int c, int d, \ + void *e, int *f, int *g, \ + Stype_t h, Dtype_t i, Mtype_t j +#define Create_CompRow_Matrix_ARGS_REF a,b,c,d,e,f,g,h,i,j +#define Create_CompCol_Matrix_ARGS Create_CompRow_Matrix_ARGS +#define Create_CompCol_Matrix_ARGS_REF Create_CompRow_Matrix_ARGS_REF + +TYPE_GENERIC_FUNC(gstrf, void); +TYPE_GENERIC_FUNC(gsitrf, void); +TYPE_GENERIC_FUNC(gstrs, void); +TYPE_GENERIC_FUNC(gssv, void); +TYPE_GENERIC_FUNC(Create_Dense_Matrix, void); +TYPE_GENERIC_FUNC(Create_CompRow_Matrix, void); +TYPE_GENERIC_FUNC(Create_CompCol_Matrix, void); + #endif /* __SUPERLU_OBJECT */ Deleted: trunk/scipy/sparse/linalg/dsolve/_zsuperlumodule.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_zsuperlumodule.c 2010-04-27 21:55:40 UTC (rev 6348) +++ trunk/scipy/sparse/linalg/dsolve/_zsuperlumodule.c 2010-04-27 21:56:00 UTC (rev 6349) @@ -1,210 +0,0 @@ - -/* Copyright 1999 Travis Oliphant - Permision to copy and modified this file is granted under the revised BSD license. - No warranty is expressed or IMPLIED - - Changes: 2004 converted to SuperLU_3.0 and added factor and solve routines for - more flexible handling. - - Also added NC (compressed sparse column handling -- best to use CSC) -*/ - -/* - This file implements glue between the SuperLU library for - sparse matrix inversion and Python. -*/ - - -/* We want a low-level interface to: - xGSSV - xgstrf -- factor - xgstrs -- solve - - These will be done in separate files due to the include structure of - SuperLU. - - Define a user abort and a user malloc and free (to keep pointers - that will be released on errors) -*/ - -#include "Python.h" -#include "SuperLU/SRC/zsp_defs.h" -#include "_superluobject.h" -#include - -extern jmp_buf _superlu_py_jmpbuf; - - -static char doc_zgssv[] = "Direct inversion of sparse matrix.\n\nX = zgssv(A,B) solves A*X = B for X."; - -static PyObject *Py_zgssv (PyObject *self, PyObject *args, PyObject *kwdict) -{ - PyObject *Py_B=NULL, *Py_X=NULL; - PyArrayObject *nzvals=NULL; - PyArrayObject *colind=NULL, *rowptr=NULL; - int N, nnz; - int csc=0, permc_spec=2; - int info; - int *perm_r=NULL, *perm_c=NULL; - SuperMatrix A, B, L, U; - superlu_options_t options; - SuperLUStat_t stat; - - static char *kwlist[] = {"N","nnz","nzvals","colind","rowptr","B", "csc", "permc_spec",NULL}; - - /* Get input arguments */ - if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iiO!O!O!O|ii", kwlist, &N, &nnz, &PyArray_Type, &nzvals, &PyArray_Type, &colind, &PyArray_Type, &rowptr, &Py_B, &csc, &permc_spec)) - return NULL; - - if (!_CHECK_INTEGER(colind) || !_CHECK_INTEGER(rowptr)) { - PyErr_SetString(PyExc_TypeError, "colind and rowptr must be of type cint"); - return NULL; - } - - /* Create Space for output */ - Py_X = PyArray_CopyFromObject(Py_B,PyArray_CDOUBLE,1,2); - if (Py_X == NULL) return NULL; - if (csc) { - if (NCFormat_from_spMatrix(&A, N, N, nnz, nzvals, colind, rowptr, PyArray_CDOUBLE)) { - Py_DECREF(Py_X); return NULL; - } - } - else { - if (NRFormat_from_spMatrix(&A, N, N, nnz, nzvals, colind, rowptr, PyArray_CDOUBLE)) { - Py_DECREF(Py_X); return NULL; - } - } - if (DenseSuper_from_Numeric(&B, Py_X)) { - Destroy_SuperMatrix_Store(&A); - Py_DECREF(Py_X); - return NULL; - } - - /* B and Py_X share same data now but Py_X "owns" it */ - - /* Setup options */ - - if (setjmp(_superlu_py_jmpbuf)) goto fail; - else { - perm_c = intMalloc(N); - perm_r = intMalloc(N); - set_default_options(&options); - options.ColPerm=superlu_module_getpermc(permc_spec); - StatInit(&stat); - - /* Compute direct inverse of sparse Matrix */ - zgssv(&options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info); - } - - SUPERLU_FREE(perm_r); - SUPERLU_FREE(perm_c); - Destroy_SuperMatrix_Store(&A); - Destroy_SuperMatrix_Store(&B); - Destroy_SuperNode_Matrix(&L); - Destroy_CompCol_Matrix(&U); - StatFree(&stat); - - return Py_BuildValue("Ni", Py_X, info); - - fail: - SUPERLU_FREE(perm_r); - SUPERLU_FREE(perm_c); - Destroy_SuperMatrix_Store(&A); - Destroy_SuperMatrix_Store(&B); - Destroy_SuperNode_Matrix(&L); - Destroy_CompCol_Matrix(&U); - StatFree(&stat); - Py_XDECREF(Py_X); - return NULL; -} - -/*******************************Begin Code Adapted from PySparse *****************/ - -static char doc_zgstrf[] = "zgstrf(A, ...)\n\ -\n\ -performs a factorization of the sparse matrix A=*(N,nnz,nzvals,rowind,colptr) and \n\ -returns a factored_lu object.\n\ -\n\ -see dgstrf for more information."; - -static PyObject * -Py_zgstrf(PyObject *self, PyObject *args, PyObject *keywds) { - - /* default value for SuperLU parameters*/ - double diag_pivot_thresh = 1.0; - double drop_tol = 0.0; - int relax = 1; - int panel_size = 10; - int permc_spec = 2; - int N, nnz; - PyArrayObject *rowind, *colptr, *nzvals; - SuperMatrix A; - PyObject *result; - - static char *kwlist[] = {"N","nnz","nzvals","rowind","colptr","permc_spec","diag_pivot_thresh", "drop_tol", "relax", "panel_size", NULL}; - - int res = PyArg_ParseTupleAndKeywords(args, keywds, "iiO!O!O!|iddii", kwlist, - &N, &nnz, - &PyArray_Type, &nzvals, - &PyArray_Type, &rowind, - &PyArray_Type, &colptr, - &permc_spec, - &diag_pivot_thresh, - &drop_tol, - &relax, - &panel_size); - if (!res) - return NULL; - - - if (!_CHECK_INTEGER(colptr) || !_CHECK_INTEGER(rowind)) { - PyErr_SetString(PyExc_TypeError, "colptr and rowind must be of type cint"); - return NULL; - } - - if (NCFormat_from_spMatrix(&A, N, N, nnz, nzvals, rowind, colptr, PyArray_CDOUBLE)) goto fail; - - result = newSciPyLUObject(&A, diag_pivot_thresh, drop_tol, relax, panel_size,\ - permc_spec, PyArray_CDOUBLE); - if (result == NULL) goto fail; - - Destroy_SuperMatrix_Store(&A); /* arrays of input matrix will not be freed */ - return result; - - fail: - Destroy_SuperMatrix_Store(&A); /* arrays of input matrix will not be freed */ - return NULL; -} - - -/*******************************End Code Adapted from PySparse *****************/ - - -static PyMethodDef zSuperLU_Methods[] = { - {"zgssv", (PyCFunction) Py_zgssv, METH_VARARGS|METH_KEYWORDS, doc_zgssv}, - {"zgstrf", (PyCFunction) Py_zgstrf, METH_VARARGS|METH_KEYWORDS, doc_zgstrf}, - /* {"zgstrs", (PyCFunction) Py_zgstrs, METH_VARARGS|METH_KEYWORDS, doc_zgstrs}, - {"_zgscon", Py_zgscon, METH_VARARGS, doc_zgscon}, - {"_zgsequ", Py_zgsequ, METH_VARARGS, doc_zgsequ}, - {"_zlaqgs", Py_zlaqgs, METH_VARARGS, doc_zlaqgs}, - {"_zgsrfs", Py_zgsrfs, METH_VARARGS, doc_zgsrfs}, */ - {NULL, NULL} -}; - - -/* This should be imported first */ -PyMODINIT_FUNC -init_zsuperlu(void) -{ - - Py_InitModule("_zsuperlu", zSuperLU_Methods); - - import_array(); - - if (PyErr_Occurred()) - Py_FatalError("can't initialize module zsuperlu"); -} - - - - Modified: trunk/scipy/sparse/linalg/dsolve/linsolve.py =================================================================== --- trunk/scipy/sparse/linalg/dsolve/linsolve.py 2010-04-27 21:55:40 UTC (rev 6348) +++ trunk/scipy/sparse/linalg/dsolve/linsolve.py 2010-04-27 21:56:00 UTC (rev 6349) @@ -20,10 +20,6 @@ __all__ = [ 'use_solver', 'spsolve', 'splu', 'factorized' ] -#convert numpy char to superLU char -superLU_transtabl = {'f':'s', 'd':'d', 'F':'c', 'D':'z'} - - def use_solver( **kwargs ): """ Valid keyword arguments with defaults (other ignored): @@ -93,13 +89,9 @@ else: flag = 0 # CSR format - ftype = superLU_transtabl[A.dtype.char] - - gssv = eval('_superlu.' + ftype + 'gssv') b = asarray(b, dtype=A.dtype) + return _superlu.gssv(N, A.nnz, A.data, A.indices, A.indptr, b, flag, permc_spec)[0] - return gssv(N, A.nnz, A.data, A.indices, A.indptr, b, flag, permc_spec)[0] - def splu(A, permc_spec=2, diag_pivot_thresh=1.0, drop_tol=0.0, relax=1, panel_size=10): """ @@ -122,12 +114,9 @@ if (M != N): raise ValueError, "can only factor square matrices" #is this true? - ftype = superLU_transtabl[A.dtype.char] + return _superlu.gstrf(N, A.nnz, A.data, A.indices, A.indptr, permc_spec, + diag_pivot_thresh, drop_tol, relax, panel_size) - gstrf = eval('_superlu.' + ftype + 'gstrf') - return gstrf(N, A.nnz, A.data, A.indices, A.indptr, permc_spec, - diag_pivot_thresh, drop_tol, relax, panel_size) - def factorized( A ): """ Return a fuction for solving a sparse linear system, with A pre-factorized. Modified: trunk/scipy/sparse/linalg/dsolve/setup.py =================================================================== --- trunk/scipy/sparse/linalg/dsolve/setup.py 2010-04-27 21:55:40 UTC (rev 6348) +++ trunk/scipy/sparse/linalg/dsolve/setup.py 2010-04-27 21:56:00 UTC (rev 6349) @@ -1,6 +1,7 @@ #!/usr/bin/env python -from os.path import join +from os.path import join, dirname import sys +import os def configuration(parent_package='',top_path=None): from numpy.distutils.misc_util import Configuration @@ -16,45 +17,23 @@ superlu_defs = [] superlu_defs.append(('USE_VENDOR_BLAS',1)) + superlu_src = os.path.join(dirname(__file__), 'SuperLU', 'SRC') + config.add_library('superlu_src', sources = [join(superlu_src,'*.c')], - macros = superlu_defs + macros = superlu_defs, + include_dirs=[superlu_src], ) - #SuperLU/SRC/util.h has been modifed to use these by default - #macs = [('USER_ABORT','superlu_python_module_abort'), - # ('USER_MALLOC','superlu_python_module_malloc'), - # ('USER_FREE','superlu_python_module_free')] - # Extension - config.add_extension('_zsuperlu', - sources = ['_zsuperlumodule.c','_superlu_utils.c', + config.add_extension('_superlu', + sources = ['_superlumodule.c', + '_superlu_utils.c', '_superluobject.c'], libraries = ['superlu_src'], - extra_info = lapack_opt + extra_info = lapack_opt, ) - config.add_extension('_dsuperlu', - sources = ['_dsuperlumodule.c','_superlu_utils.c', - '_superluobject.c'], - libraries = ['superlu_src'], - extra_info = lapack_opt - ) - - config.add_extension('_csuperlu', - sources = ['_csuperlumodule.c','_superlu_utils.c', - '_superluobject.c'], - libraries = ['superlu_src'], - extra_info = lapack_opt - ) - - config.add_extension('_ssuperlu', - sources = ['_ssuperlumodule.c','_superlu_utils.c', - '_superluobject.c'], - libraries = ['superlu_src'], - extra_info = lapack_opt - ) - config.add_subpackage('umfpack') return config From scipy-svn at scipy.org Tue Apr 27 17:56:20 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 16:56:20 -0500 (CDT) Subject: [Scipy-svn] r6350 - trunk/scipy/sparse/linalg/dsolve Message-ID: <20100427215620.4396239CAE7@scipy.org> Author: ptvirtan Date: 2010-04-27 16:56:19 -0500 (Tue, 27 Apr 2010) New Revision: 6350 Modified: trunk/scipy/sparse/linalg/dsolve/_superlumodule.c trunk/scipy/sparse/linalg/dsolve/_superluobject.c trunk/scipy/sparse/linalg/dsolve/_superluobject.h trunk/scipy/sparse/linalg/dsolve/linsolve.py Log: ENH: sparse.linalg.dsolve: allow passing any SuperLU options to the factorized in the internal gstrf interface. Enable support for incomplete LU Modified: trunk/scipy/sparse/linalg/dsolve/_superlumodule.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_superlumodule.c 2010-04-27 21:56:00 UTC (rev 6349) +++ trunk/scipy/sparse/linalg/dsolve/_superlumodule.c 2010-04-27 21:56:19 UTC (rev 6350) @@ -33,21 +33,22 @@ PyArrayObject *colind=NULL, *rowptr=NULL; int N, nnz; int info; - int csc=0, permc_spec=2; + int csc=0; int *perm_r=NULL, *perm_c=NULL; SuperMatrix A, B, L, U; superlu_options_t options; SuperLUStat_t stat; + PyObject *option_dict = NULL; int type; static char *kwlist[] = {"N","nnz","nzvals","colind","rowptr","B", "csc", - "permc_spec",NULL}; + "options",NULL}; /* Get input arguments */ - if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iiO!O!O!O|ii", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iiO!O!O!O|iO", kwlist, &N, &nnz, &PyArray_Type, &nzvals, &PyArray_Type, &colind, &PyArray_Type, - &rowptr, &Py_B, &csc, &permc_spec)) { + &rowptr, &Py_B, &csc, &option_dict)) { return NULL; } @@ -64,6 +65,10 @@ return NULL; } + if (!set_superlu_options_from_dict(&options, 0, option_dict)) { + return NULL; + } + /* Create Space for output */ Py_X = PyArray_CopyFromObject(Py_B, type, 1, 2); if (Py_X == NULL) return NULL; @@ -99,8 +104,6 @@ else { perm_c = intMalloc(N); perm_r = intMalloc(N); - set_default_options(&options); - options.ColPerm = superlu_module_getpermc(permc_spec); StatInit(&stat); /* Compute direct inverse of sparse Matrix */ @@ -133,30 +136,30 @@ Py_gstrf(PyObject *self, PyObject *args, PyObject *keywds) { /* default value for SuperLU parameters*/ - double diag_pivot_thresh = 1.0; int relax = 1; int panel_size = 10; - int permc_spec = 2; int N, nnz; PyArrayObject *rowind, *colptr, *nzvals; SuperMatrix A; PyObject *result; + PyObject *option_dict = NULL; int type; - + int ilu = 0; + static char *kwlist[] = {"N","nnz","nzvals","rowind","colptr", - "permc_spec","diag_pivot_thresh", - "relax", "panel_size", NULL}; + "options", "relax", "panel_size", "ilu", + NULL}; int res = PyArg_ParseTupleAndKeywords( - args, keywds, "iiO!O!O!|iddii", kwlist, + args, keywds, "iiO!O!O!|Oiii", kwlist, &N, &nnz, &PyArray_Type, &nzvals, &PyArray_Type, &rowind, &PyArray_Type, &colptr, - &permc_spec, - &diag_pivot_thresh, + &option_dict, &relax, - &panel_size); + &panel_size, + &ilu); if (!res) return NULL; @@ -179,8 +182,8 @@ goto fail; } - result = newSciPyLUObject(&A, diag_pivot_thresh, relax, - panel_size, permc_spec, type); + result = newSciPyLUObject(&A, relax, + panel_size, option_dict, type, ilu); if (result == NULL) { goto fail; } @@ -217,24 +220,17 @@ \n\ additional keyword arguments:\n\ -----------------------------\n\ -permc_spec specifies the matrix ordering used for the factorization\n\ - 0: natural ordering\n\ - 1: MMD applied to the structure of A^T * A\n\ - 2: MMD applied to the structure of A^T + A\n\ - 3: COLAMD, approximate minimum degree column ordering\n\ - (default: 2)\n\ +options specifies additional options for SuperLU\n\ + (same keys and values as in superlu_options_t C structure)\n\ \n\ -diag_pivot_thresh threshhold for partial pivoting.\n\ - 0.0 <= diag_pivot_thresh <= 1.0\n\ - 0.0 corresponds to no pivoting\n\ - 1.0 corresponds to partial pivoting\n\ - (default: 1.0)\n\ -\n\ relax to control degree of relaxing supernodes\n\ (default: 1)\n\ \n\ panel_size a panel consist of at most panel_size consecutive columns.\n\ (default: 10)\n\ +\n\ +ilu whether to perform an incomplete LU decomposition\n\ + (default: false)\n\ "; Modified: trunk/scipy/sparse/linalg/dsolve/_superluobject.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_superluobject.c 2010-04-27 21:56:00 UTC (rev 6349) +++ trunk/scipy/sparse/linalg/dsolve/_superluobject.c 2010-04-27 21:56:19 UTC (rev 6350) @@ -6,6 +6,7 @@ extern jmp_buf _superlu_py_jmpbuf; + /*********************************************************************** * SciPyLUObject methods */ @@ -268,26 +269,9 @@ return 0; } -colperm_t superlu_module_getpermc(int permc_spec) -{ - switch(permc_spec) { - case 0: - return NATURAL; - case 1: - return MMD_ATA; - case 2: - return MMD_AT_PLUS_A; - case 3: - return COLAMD; - } - ABORT("Invalid input for permc_spec."); - return NATURAL; /* compiler complains... */ -} - PyObject * -newSciPyLUObject(SuperMatrix *A, double diag_pivot_thresh, - int relax, int panel_size, int permc_spec, - int intype) +newSciPyLUObject(SuperMatrix *A, int relax, int panel_size, + PyObject *option_dict, int intype, int ilu) { /* A must be in SLU_NC format used by the factorization routine. */ @@ -302,6 +286,10 @@ n = A->ncol; + if (!set_superlu_options_from_dict(&options, ilu, option_dict)) { + return NULL; + } + /* Create SciPyLUObject */ self = PyObject_New(SciPyLUObject, &SciPySuperLUType); if (self == NULL) @@ -318,13 +306,9 @@ etree = intMalloc(n); self->perm_r = intMalloc(n); self->perm_c = intMalloc(n); + StatInit(&stat); - set_default_options(&options); - options.ColPerm=superlu_module_getpermc(permc_spec); - options.DiagPivotThresh = diag_pivot_thresh; - StatInit(&stat); - - get_perm_c(permc_spec, A, self->perm_c); /* calc column permutation */ + get_perm_c(options.ColPerm, A, self->perm_c); /* calc column permutation */ sp_preorder(&options, A, self->perm_c, etree, &AC); /* apply column * permutation */ @@ -333,10 +317,18 @@ PyErr_SetString(PyExc_ValueError, "Invalid type in SuperMatrix."); goto fail; } - gstrf(SLU_TYPECODE_TO_NPY(A->Dtype), - &options, &AC, relax, panel_size, - etree, NULL, lwork, self->perm_c, self->perm_r, - &self->L, &self->U, &stat, &info); + if (ilu) { + gsitrf(SLU_TYPECODE_TO_NPY(A->Dtype), + &options, &AC, relax, panel_size, + etree, NULL, lwork, self->perm_c, self->perm_r, + &self->L, &self->U, &stat, &info); + } + else { + gstrf(SLU_TYPECODE_TO_NPY(A->Dtype), + &options, &AC, relax, panel_size, + etree, NULL, lwork, self->perm_c, self->perm_r, + &self->L, &self->U, &stat, &info); + } if (info) { if (info < 0) @@ -365,3 +357,161 @@ SciPyLU_dealloc(self); return NULL; } + + +/*********************************************************************** + * Preparing superlu_options_t + */ + +#define ENUM_CHECK_INIT \ + long i = -1; \ + char *s = ""; \ + if (PyString_Check(input)) { \ + s = PyString_AS_STRING(input); \ + } \ + if (PyInt_Check(input)) { \ + i = PyInt_AsLong(input); \ + } + +#define ENUM_CHECK_FINISH \ + PyErr_SetString(PyExc_ValueError, "unknown value"); \ + return 0; + +#define ENUM_CHECK(name) \ + if (strcmp(s, #name) == 0 || i == (long)name) { *value = name; return 1; } + +static int yes_no_cvt(PyObject *input, yes_no_t *value) +{ + if (input == Py_True) { + *value = YES; + } else if (input == Py_False) { + *value = NO; + } else { + PyErr_SetString(PyExc_ValueError, "value not a boolean"); + return 0; + } + return 1; +} + +static int fact_cvt(PyObject *input, fact_t *value) +{ + ENUM_CHECK_INIT; + ENUM_CHECK(DOFACT); + ENUM_CHECK(SamePattern); + ENUM_CHECK(SamePattern_SameRowPerm); + ENUM_CHECK(FACTORED); + ENUM_CHECK_FINISH; +} + +static int rowperm_cvt(PyObject *input, rowperm_t *value) +{ + ENUM_CHECK_INIT; + ENUM_CHECK(NOROWPERM); + ENUM_CHECK(LargeDiag); + ENUM_CHECK(MY_PERMR); + ENUM_CHECK_FINISH; +} + +static int colperm_cvt(PyObject *input, colperm_t *value) +{ + ENUM_CHECK_INIT; + ENUM_CHECK(NATURAL); + ENUM_CHECK(MMD_ATA); + ENUM_CHECK(MMD_AT_PLUS_A); + ENUM_CHECK(COLAMD); + ENUM_CHECK(MY_PERMC); + ENUM_CHECK_FINISH; +} + +static int trans_cvt(PyObject *input, trans_t *value) +{ + ENUM_CHECK_INIT; + ENUM_CHECK(NOTRANS); + ENUM_CHECK(TRANS); + ENUM_CHECK(CONJ); + ENUM_CHECK_FINISH; +} + +static int iterrefine_cvt(PyObject *input, IterRefine_t *value) +{ + ENUM_CHECK_INIT; + ENUM_CHECK(NOREFINE); + ENUM_CHECK(SINGLE); + ENUM_CHECK(DOUBLE); + ENUM_CHECK(EXTRA); + ENUM_CHECK_FINISH; +} + +static int norm_cvt(PyObject *input, norm_t *value) +{ + ENUM_CHECK_INIT; + ENUM_CHECK(ONE_NORM); + ENUM_CHECK(TWO_NORM); + ENUM_CHECK(INF_NORM); + ENUM_CHECK_FINISH; +} + +static int milu_cvt(PyObject *input, milu_t *value) +{ + ENUM_CHECK_INIT; + ENUM_CHECK(SILU); + ENUM_CHECK(SMILU_1); + ENUM_CHECK(SMILU_2); + ENUM_CHECK(SMILU_3); + ENUM_CHECK_FINISH; +} + +int set_superlu_options_from_dict(superlu_options_t *options, + int ilu, PyObject *option_dict) +{ + PyObject *args; + int ret; + + static char *kwlist[] = { + "Fact", "Equil", "ColPerm", "Trans", "IterRefine", + "DiagPivotThresh", "PivotGrowth", "ConditionNumber", + "RowPerm", "SymmetricMode", "PrintStat", "ReplaceTinyPivot", + "SolveInitialized", "RefineInitialized", "ILU_Norm", + "ILU_MILU", "ILU_DropTol", "ILU_FillTol", "ILU_FillFactor", + "ILU_DropRule", NULL + }; + + if (ilu) { + ilu_set_default_options(options); + } + else { + set_default_options(options); + } + + if (option_dict == NULL) { + return 0; + } + + args = PyTuple_New(0); + ret = PyArg_ParseTupleAndKeywords( + args, option_dict, + "|O&O&O&O&O&dO&O&O&O&O&O&O&O&O&O&dddi", kwlist, + fact_cvt, &options->Fact, + yes_no_cvt, &options->Equil, + colperm_cvt, &options->ColPerm, + trans_cvt, &options->Trans, + iterrefine_cvt, &options->IterRefine, + &options->DiagPivotThresh, + yes_no_cvt, &options->PivotGrowth, + yes_no_cvt, &options->ConditionNumber, + rowperm_cvt, &options->RowPerm, + yes_no_cvt, &options->SymmetricMode, + yes_no_cvt, &options->PrintStat, + yes_no_cvt, &options->ReplaceTinyPivot, + yes_no_cvt, &options->SolveInitialized, + yes_no_cvt, &options->RefineInitialized, + norm_cvt, &options->ILU_Norm, + milu_cvt, &options->ILU_MILU, + &options->ILU_DropTol, + &options->ILU_FillTol, + &options->ILU_FillFactor, + &options->ILU_DropRule + ); + Py_DECREF(args); + return ret; +} Modified: trunk/scipy/sparse/linalg/dsolve/_superluobject.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_superluobject.h 2010-04-27 21:56:00 UTC (rev 6349) +++ trunk/scipy/sparse/linalg/dsolve/_superluobject.h 2010-04-27 21:56:19 UTC (rev 6350) @@ -33,9 +33,10 @@ int NCFormat_from_spMatrix(SuperMatrix *, int, int, int, PyArrayObject *, PyArrayObject *, PyArrayObject *, int); colperm_t superlu_module_getpermc(int); -PyObject *newSciPyLUObject(SuperMatrix *, double, int, int, int, int); +PyObject *newSciPyLUObject(SuperMatrix *, int, int, PyObject*, int, int); +int set_superlu_options_from_dict(superlu_options_t *options, + int ilu, PyObject *option_dict); - /* * Definitions for other SuperLU data types than Z, * and type-generic definitions. @@ -93,7 +94,7 @@ SuperLUStat_t *h, int *i #define gssv_ARGS_REF a,b,c,d,e,f,g,h,i -#define Create_Dense_Matrix_ARGS \ +#define Create_Dense_Matrix_ARGS \ SuperMatrix *a, int b, int c, void *d, int e, \ Stype_t f, Dtype_t g, Mtype_t h #define Create_Dense_Matrix_ARGS_REF a,b,c,d,e,f,g,h Modified: trunk/scipy/sparse/linalg/dsolve/linsolve.py =================================================================== --- trunk/scipy/sparse/linalg/dsolve/linsolve.py 2010-04-27 21:56:00 UTC (rev 6349) +++ trunk/scipy/sparse/linalg/dsolve/linsolve.py 2010-04-27 21:56:19 UTC (rev 6350) @@ -90,7 +90,9 @@ flag = 0 # CSR format b = asarray(b, dtype=A.dtype) - return _superlu.gssv(N, A.nnz, A.data, A.indices, A.indptr, b, flag, permc_spec)[0] + options = dict(ColPerm=permc_spec) + return _superlu.gssv(N, A.nnz, A.data, A.indices, A.indptr, b, flag, + options=options)[0] def splu(A, permc_spec=2, diag_pivot_thresh=1.0, drop_tol=0.0, relax=1, panel_size=10): @@ -114,8 +116,12 @@ if (M != N): raise ValueError, "can only factor square matrices" #is this true? - return _superlu.gstrf(N, A.nnz, A.data, A.indices, A.indptr, permc_spec, - diag_pivot_thresh, drop_tol, relax, panel_size) + ilu = (drop_tol != 0) + options = dict(ILU_DropTol=drop_tol, DiagPivotThresh=diag_pivot_thresh, + ColPerm=permc_spec) + return _superlu.gstrf(N, A.nnz, A.data, A.indices, A.indptr, + relax=relax, panel_size=panel_size, ilu=ilu, + options=options) def factorized( A ): """ From scipy-svn at scipy.org Tue Apr 27 17:56:40 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 16:56:40 -0500 (CDT) Subject: [Scipy-svn] r6351 - trunk/scipy/sparse/linalg/dsolve Message-ID: <20100427215640.406D339CAE7@scipy.org> Author: ptvirtan Date: 2010-04-27 16:56:40 -0500 (Tue, 27 Apr 2010) New Revision: 6351 Modified: trunk/scipy/sparse/linalg/dsolve/_superlumodule.c trunk/scipy/sparse/linalg/dsolve/_superluobject.c trunk/scipy/sparse/linalg/dsolve/_superluobject.h trunk/scipy/sparse/linalg/dsolve/linsolve.py Log: ENH: sparse.linalg.dsolve: clean up Python interface to SuperLU Modified: trunk/scipy/sparse/linalg/dsolve/_superlumodule.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_superlumodule.c 2010-04-27 21:56:19 UTC (rev 6350) +++ trunk/scipy/sparse/linalg/dsolve/_superlumodule.c 2010-04-27 21:56:40 UTC (rev 6351) @@ -65,7 +65,7 @@ return NULL; } - if (!set_superlu_options_from_dict(&options, 0, option_dict)) { + if (!set_superlu_options_from_dict(&options, 0, option_dict, NULL, NULL)) { return NULL; } @@ -136,8 +136,6 @@ Py_gstrf(PyObject *self, PyObject *args, PyObject *keywds) { /* default value for SuperLU parameters*/ - int relax = 1; - int panel_size = 10; int N, nnz; PyArrayObject *rowind, *colptr, *nzvals; SuperMatrix A; @@ -146,19 +144,17 @@ int type; int ilu = 0; - static char *kwlist[] = {"N","nnz","nzvals","rowind","colptr", - "options", "relax", "panel_size", "ilu", + static char *kwlist[] = {"N","nnz","nzvals","colind","rowptr", + "options", "ilu", NULL}; int res = PyArg_ParseTupleAndKeywords( - args, keywds, "iiO!O!O!|Oiii", kwlist, + args, keywds, "iiO!O!O!|Oi", kwlist, &N, &nnz, &PyArray_Type, &nzvals, &PyArray_Type, &rowind, &PyArray_Type, &colptr, &option_dict, - &relax, - &panel_size, &ilu); if (!res) @@ -182,8 +178,7 @@ goto fail; } - result = newSciPyLUObject(&A, relax, - panel_size, option_dict, type, ilu); + result = newSciPyLUObject(&A, option_dict, type, ilu); if (result == NULL) { goto fail; } @@ -221,14 +216,9 @@ additional keyword arguments:\n\ -----------------------------\n\ options specifies additional options for SuperLU\n\ - (same keys and values as in superlu_options_t C structure)\n\ + (same keys and values as in superlu_options_t C structure,\n\ + and additionally 'Relax' and 'PanelSize')\n\ \n\ -relax to control degree of relaxing supernodes\n\ - (default: 1)\n\ -\n\ -panel_size a panel consist of at most panel_size consecutive columns.\n\ - (default: 10)\n\ -\n\ ilu whether to perform an incomplete LU decomposition\n\ (default: false)\n\ "; Modified: trunk/scipy/sparse/linalg/dsolve/_superluobject.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_superluobject.c 2010-04-27 21:56:19 UTC (rev 6350) +++ trunk/scipy/sparse/linalg/dsolve/_superluobject.c 2010-04-27 21:56:40 UTC (rev 6351) @@ -1,3 +1,10 @@ +/* -*-c-*- */ +/* + * _superlu object + * + * Python object representing SuperLU factorization + some utility functions. + */ + #include #define NO_IMPORT_ARRAY @@ -3,4 +10,5 @@ #include "_superluobject.h" #include +#include extern jmp_buf _superlu_py_jmpbuf; @@ -22,7 +30,7 @@ x array, solution vector(s)\n\ trans 'N': solve A * x == b\n\ 'T': solve A^T * x == b\n\ - 'H': solve A^H * x == b (not yet implemented)\n\ + 'H': solve A^H * x == b\n\ (optional, default value 'N')\n\ "; @@ -270,8 +278,7 @@ } PyObject * -newSciPyLUObject(SuperMatrix *A, int relax, int panel_size, - PyObject *option_dict, int intype, int ilu) +newSciPyLUObject(SuperMatrix *A, PyObject *option_dict, int intype, int ilu) { /* A must be in SLU_NC format used by the factorization routine. */ @@ -283,10 +290,12 @@ int n; superlu_options_t options; SuperLUStat_t stat; - + int panel_size, relax; + n = A->ncol; - if (!set_superlu_options_from_dict(&options, ilu, option_dict)) { + if (!set_superlu_options_from_dict(&options, ilu, option_dict, + &panel_size, &relax)) { return NULL; } @@ -366,6 +375,7 @@ #define ENUM_CHECK_INIT \ long i = -1; \ char *s = ""; \ + if (input == Py_None) return 1; \ if (PyString_Check(input)) { \ s = PyString_AS_STRING(input); \ } \ @@ -373,16 +383,38 @@ i = PyInt_AsLong(input); \ } -#define ENUM_CHECK_FINISH \ - PyErr_SetString(PyExc_ValueError, "unknown value"); \ +#define ENUM_CHECK_FINISH(message) \ + PyErr_SetString(PyExc_ValueError, message); \ return 0; #define ENUM_CHECK(name) \ - if (strcmp(s, #name) == 0 || i == (long)name) { *value = name; return 1; } + if (my_strxcmp(s, #name) == 0 || i == (long)name) { *value = name; return 1; } +/* + * Compare strings ignoring case, underscores and whitespace + */ +static int my_strxcmp(const char *a, const char *b) +{ + int c; + while (*a != '\0' && *b != '\0') { + while (*a == '_' || isspace(*a)) ++a; + while (*b == '_' || isspace(*b)) ++b; + c = (int)tolower(*a) - (int)tolower(*b); + if (c != 0) { + return c; + } + ++a; + ++b; + } + return (int)tolower(*a) - (int)tolower(*b); +} + static int yes_no_cvt(PyObject *input, yes_no_t *value) { - if (input == Py_True) { + if (input == Py_None) { + return 1; + } + else if (input == Py_True) { *value = YES; } else if (input == Py_False) { *value = NO; @@ -400,7 +432,7 @@ ENUM_CHECK(SamePattern); ENUM_CHECK(SamePattern_SameRowPerm); ENUM_CHECK(FACTORED); - ENUM_CHECK_FINISH; + ENUM_CHECK_FINISH("invalid value for 'Fact' parameter"); } static int rowperm_cvt(PyObject *input, rowperm_t *value) @@ -409,7 +441,7 @@ ENUM_CHECK(NOROWPERM); ENUM_CHECK(LargeDiag); ENUM_CHECK(MY_PERMR); - ENUM_CHECK_FINISH; + ENUM_CHECK_FINISH("invalid value for 'RowPerm' parameter"); } static int colperm_cvt(PyObject *input, colperm_t *value) @@ -420,7 +452,7 @@ ENUM_CHECK(MMD_AT_PLUS_A); ENUM_CHECK(COLAMD); ENUM_CHECK(MY_PERMC); - ENUM_CHECK_FINISH; + ENUM_CHECK_FINISH("invalid value for 'ColPerm' parameter"); } static int trans_cvt(PyObject *input, trans_t *value) @@ -429,7 +461,10 @@ ENUM_CHECK(NOTRANS); ENUM_CHECK(TRANS); ENUM_CHECK(CONJ); - ENUM_CHECK_FINISH; + if (my_strxcmp(s, "N") == 0) { *value = NOTRANS; return 1; } + if (my_strxcmp(s, "T") == 0) { *value = TRANS; return 1; } + if (my_strxcmp(s, "H") == 0) { *value = CONJ; return 1; } + ENUM_CHECK_FINISH("invalid value for 'Trans' parameter"); } static int iterrefine_cvt(PyObject *input, IterRefine_t *value) @@ -439,7 +474,7 @@ ENUM_CHECK(SINGLE); ENUM_CHECK(DOUBLE); ENUM_CHECK(EXTRA); - ENUM_CHECK_FINISH; + ENUM_CHECK_FINISH("invalid value for 'IterRefine' parameter"); } static int norm_cvt(PyObject *input, norm_t *value) @@ -448,7 +483,7 @@ ENUM_CHECK(ONE_NORM); ENUM_CHECK(TWO_NORM); ENUM_CHECK(INF_NORM); - ENUM_CHECK_FINISH; + ENUM_CHECK_FINISH("invalid value for 'ILU_Norm' parameter"); } static int milu_cvt(PyObject *input, milu_t *value) @@ -458,14 +493,100 @@ ENUM_CHECK(SMILU_1); ENUM_CHECK(SMILU_2); ENUM_CHECK(SMILU_3); - ENUM_CHECK_FINISH; + ENUM_CHECK_FINISH("invalid value for 'ILU_MILU' parameter"); } +static int droprule_one_cvt(PyObject *input, int *value) +{ + ENUM_CHECK_INIT; + if (my_strxcmp(s, "BASIC") == 0) { *value = DROP_BASIC; return 1; } + if (my_strxcmp(s, "PROWS") == 0) { *value = DROP_PROWS; return 1; } + if (my_strxcmp(s, "COLUMN") == 0) { *value = DROP_COLUMN; return 1; } + if (my_strxcmp(s, "AREA") == 0) { *value = DROP_AREA; return 1; } + if (my_strxcmp(s, "SECONDARY") == 0) { *value = DROP_SECONDARY; return 1; } + if (my_strxcmp(s, "DYNAMIC") == 0) { *value = DROP_DYNAMIC; return 1; } + if (my_strxcmp(s, "INTERP") == 0) { *value = DROP_INTERP; return 1; } + ENUM_CHECK_FINISH("invalid value for 'ILU_DropRule' parameter"); +} + +static int droprule_cvt(PyObject *input, int *value) +{ + PyObject *seq = NULL; + int i; + int rule = 0; + + if (input == Py_None) { + /* Leave as default */ + return 1; + } + else if (PyInt_Check(input)) { + *value = PyInt_AsLong(input); + return 1; + } + else if (PyString_Check(input)) { + /* Comma-separated string */ + seq = PyObject_CallMethod(input, "split", "s", ","); + if (seq == NULL || !PySequence_Check(seq)) + goto fail; + } + else if (PySequence_Check(input)) { + /* Sequence of strings or integers */ + seq = input; + Py_INCREF(seq); + } + else { + PyErr_SetString(PyExc_ValueError, "invalid value for drop rule"); + goto fail; + } + + /* OR multiple values together */ + for (i = 0; i < PySequence_Size(seq); ++i) { + PyObject *item; + int one_value; + item = PySequence_ITEM(seq, i); + if (item == NULL) { + goto fail; + } + if (!droprule_one_cvt(item, &one_value)) { + Py_DECREF(item); + goto fail; + } + Py_DECREF(item); + rule |= one_value; + } + Py_DECREF(seq); + + *value = rule; + return 1; + +fail: + Py_XDECREF(seq); + return 0; +} + +static int double_cvt(PyObject *input, double *value) +{ + if (input == Py_None) return 1; + *value = PyFloat_AsDouble(input); + if (PyErr_Occurred()) return 0; + return 1; +} + +static int int_cvt(PyObject *input, int *value) +{ + if (input == Py_None) return 1; + *value = PyInt_AsLong(input); + if (PyErr_Occurred()) return 0; + return 1; +} + int set_superlu_options_from_dict(superlu_options_t *options, - int ilu, PyObject *option_dict) + int ilu, PyObject *option_dict, + int *panel_size, int *relax) { PyObject *args; int ret; + int _relax, _panel_size; static char *kwlist[] = { "Fact", "Equil", "ColPerm", "Trans", "IterRefine", @@ -473,7 +594,7 @@ "RowPerm", "SymmetricMode", "PrintStat", "ReplaceTinyPivot", "SolveInitialized", "RefineInitialized", "ILU_Norm", "ILU_MILU", "ILU_DropTol", "ILU_FillTol", "ILU_FillFactor", - "ILU_DropRule", NULL + "ILU_DropRule", "PanelSize", "Relax", NULL }; if (ilu) { @@ -483,6 +604,9 @@ set_default_options(options); } + _panel_size = sp_ienv(1); + _relax = sp_ienv(2); + if (option_dict == NULL) { return 0; } @@ -490,13 +614,13 @@ args = PyTuple_New(0); ret = PyArg_ParseTupleAndKeywords( args, option_dict, - "|O&O&O&O&O&dO&O&O&O&O&O&O&O&O&O&dddi", kwlist, + "|O&O&O&O&O&O&O&O&O&O&O&O&O&O&O&O&O&O&O&O&O&O&", kwlist, fact_cvt, &options->Fact, yes_no_cvt, &options->Equil, colperm_cvt, &options->ColPerm, trans_cvt, &options->Trans, iterrefine_cvt, &options->IterRefine, - &options->DiagPivotThresh, + double_cvt, &options->DiagPivotThresh, yes_no_cvt, &options->PivotGrowth, yes_no_cvt, &options->ConditionNumber, rowperm_cvt, &options->RowPerm, @@ -507,11 +631,21 @@ yes_no_cvt, &options->RefineInitialized, norm_cvt, &options->ILU_Norm, milu_cvt, &options->ILU_MILU, - &options->ILU_DropTol, - &options->ILU_FillTol, - &options->ILU_FillFactor, - &options->ILU_DropRule + double_cvt, &options->ILU_DropTol, + double_cvt, &options->ILU_FillTol, + double_cvt, &options->ILU_FillFactor, + droprule_cvt, &options->ILU_DropRule, + int_cvt, &_panel_size, + int_cvt, &_relax ); Py_DECREF(args); + + if (panel_size != NULL) { + *panel_size = _panel_size; + } + if (relax != NULL) { + *relax = _relax; + } + return ret; } Modified: trunk/scipy/sparse/linalg/dsolve/_superluobject.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_superluobject.h 2010-04-27 21:56:19 UTC (rev 6350) +++ trunk/scipy/sparse/linalg/dsolve/_superluobject.h 2010-04-27 21:56:40 UTC (rev 6351) @@ -1,3 +1,10 @@ +/* -*-c-*- */ +/* + * _superlu object + * + * Python object representing SuperLU factorization + some utility functions. + */ + #ifndef __SUPERLU_OBJECT #define __SUPERLU_OBJECT @@ -33,9 +40,10 @@ int NCFormat_from_spMatrix(SuperMatrix *, int, int, int, PyArrayObject *, PyArrayObject *, PyArrayObject *, int); colperm_t superlu_module_getpermc(int); -PyObject *newSciPyLUObject(SuperMatrix *, int, int, PyObject*, int, int); +PyObject *newSciPyLUObject(SuperMatrix *, PyObject*, int, int); int set_superlu_options_from_dict(superlu_options_t *options, - int ilu, PyObject *option_dict); + int ilu, PyObject *option_dict, + int *panel_size, int *relax); /* * Definitions for other SuperLU data types than Z, Modified: trunk/scipy/sparse/linalg/dsolve/linsolve.py =================================================================== --- trunk/scipy/sparse/linalg/dsolve/linsolve.py 2010-04-27 21:56:19 UTC (rev 6350) +++ trunk/scipy/sparse/linalg/dsolve/linsolve.py 2010-04-27 21:56:40 UTC (rev 6351) @@ -18,7 +18,7 @@ useUmfpack = True -__all__ = [ 'use_solver', 'spsolve', 'splu', 'factorized' ] +__all__ = [ 'use_solver', 'spsolve', 'splu', 'spilu', 'factorized' ] def use_solver( **kwargs ): """ @@ -41,7 +41,7 @@ umfpack.configure( **kwargs ) -def spsolve(A, b, permc_spec=2): +def spsolve(A, b, permc_spec=None): """Solve the sparse linear system Ax=b """ if isspmatrix( b ): @@ -86,23 +86,71 @@ else: if isspmatrix_csc(A): flag = 1 # CSC format + elif isspmatrix_csr(A): + flag = 0 # CSR format else: - flag = 0 # CSR format + A = csc_matrix(A) + flag = 1 b = asarray(b, dtype=A.dtype) options = dict(ColPerm=permc_spec) return _superlu.gssv(N, A.nnz, A.data, A.indices, A.indptr, b, flag, options=options)[0] -def splu(A, permc_spec=2, diag_pivot_thresh=1.0, - drop_tol=0.0, relax=1, panel_size=10): +def splu(A, permc_spec=None, diag_pivot_thresh=None, + drop_tol=None, relax=None, panel_size=None, options=dict()): """ - A linear solver, for a sparse, square matrix A, using LU decomposition where - L is a lower triangular matrix and U is an upper triagular matrix. + Compute the LU decomposition of a sparse, square matrix. - Returns a factored_lu object. (scipy.sparse.linalg.dsolve._superlu.SciPyLUType) + Parameters + ---------- + A + Sparse matrix to factorize. Should be in CSR or CSC format. - See scipy.sparse.linalg.dsolve._superlu.dgstrf for more info. + permc_spec : str, optional + How to permute the columns of the matrix for sparsity preservation. + (default: 'COLAMD') + + - ``NATURAL``: natural ordering. + - ``MMD_ATA``: minimum degree ordering on the structure of A^T A. + - ``MMD_AT_PLUS_A``: minimum degree ordering on the structure of A^T+A. + - ``COLAMD``: approximate minimum degree column ordering + + diag_pivot_thresh : float, optional + Threshold used for a diagonal entry to be an acceptable pivot. + See SuperLU user's guide for details [SLU]_ + drop_tol : float, optional + (deprecated) No effect. + relax : int, optional + Expert option for customizing the degree of relaxing supernodes. + See SuperLU user's guide for details [SLU]_ + panel_size : int, optional + Expert option for customizing the panel size. + See SuperLU user's guide for details [SLU]_ + options : dict, optional + Dictionary containing additional expert options to SuperLU. + See SuperLU user guide [SLU]_ (section 2.4 on the 'Options' argument) + for more details. For example, you can specify + ``options=dict(Equil=False, IterRefine='SINGLE'))`` + to turn equilibration off and perform a single iterative refinement. + + Returns + ------- + invA : scipy.sparse.linalg.dsolve._superlu.SciPyLUType + Object, which has a ``solve`` method. + + See also + -------- + spilu : incomplete LU decomposition + + Notes + ----- + This function uses the SuperLU library. + + References + ---------- + .. [SLU] SuperLU http://crd.lbl.gov/~xiaoye/SuperLU/ + """ if not isspmatrix_csc(A): @@ -116,13 +164,85 @@ if (M != N): raise ValueError, "can only factor square matrices" #is this true? - ilu = (drop_tol != 0) - options = dict(ILU_DropTol=drop_tol, DiagPivotThresh=diag_pivot_thresh, - ColPerm=permc_spec) + _options = dict(DiagPivotThresh=diag_pivot_thresh, ColPerm=permc_spec, + PanelSize=panel_size, Relax=relax) + if options is not None: + _options.update(options) return _superlu.gstrf(N, A.nnz, A.data, A.indices, A.indptr, - relax=relax, panel_size=panel_size, ilu=ilu, - options=options) + ilu=False, options=_options) +def spilu(A, drop_tol=None, fill_factor=None, drop_rule=None, permc_spec=None, + diag_pivot_thresh=None, relax=None, panel_size=None, options=None): + """ + Compute an incomplete LU decomposition for a sparse, square matrix A. + + The resulting object is an approximation to the inverse of A. + + Parameters + ---------- + A + Sparse matrix to factorize + + drop_tol : float, optional + Drop tolerance (0 <= tol <= 1) for an incomplete LU decomposition. + (default: 1e-4) + fill_factor : float, optional + Specifies the fill ratio upper bound (>= 1.0) for ILU. (default: 10) + drop_rule : str, optional + Comma-separated string of drop rules to use. + Available rules: ``basic``, ``prows``, ``column``, ``area``, + ``secondary``, ``dynamic``, ``interp``. (Default: ``basic,area``) + + See SuperLU documentation for details. + milu : str, optional + Which version of modified ILU to use. (Choices: ``silu``, + ``smilu_1``, ``smilu_2`` (default), ``smilu_3``.) + + Remaining other options + Same as for `splu` + + Returns + ------- + invA_approx : scipy.sparse.linalg.dsolve._superlu.SciPyLUType + Object, which has a ``solve`` method. + + See also + -------- + splu : complete LU decomposition + + Notes + ----- + To improve the better approximation to the inverse, you may need to + increase ``fill_factor`` AND decrease ``drop_tol``. + + This function uses the SuperLU library. + + References + ---------- + .. [SLU] SuperLU http://crd.lbl.gov/~xiaoye/SuperLU/ + + """ + + if not isspmatrix_csc(A): + A = csc_matrix(A) + warn('splu requires CSC matrix format', SparseEfficiencyWarning) + + A.sort_indices() + A = A.asfptype() #upcast to a floating point format + + M, N = A.shape + if (M != N): + raise ValueError, "can only factor square matrices" #is this true? + + _options = dict(ILU_DropRule=drop_rule, ILU_DropTol=drop_tol, + ILU_FillFactor=fill_factor, + DiagPivotThresh=diag_pivot_thresh, ColPerm=permc_spec, + PanelSize=panel_size, Relax=relax) + if options is not None: + _options.update(options) + return _superlu.gstrf(N, A.nnz, A.data, A.indices, A.indptr, + ilu=True, options=_options) + def factorized( A ): """ Return a fuction for solving a sparse linear system, with A pre-factorized. From scipy-svn at scipy.org Tue Apr 27 17:56:53 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 16:56:53 -0500 (CDT) Subject: [Scipy-svn] r6352 - trunk/scipy/sparse/linalg/dsolve/tests Message-ID: <20100427215653.3763C39CAE7@scipy.org> Author: ptvirtan Date: 2010-04-27 16:56:53 -0500 (Tue, 27 Apr 2010) New Revision: 6352 Modified: trunk/scipy/sparse/linalg/dsolve/tests/test_linsolve.py Log: ENH: sparse.linalg.dsolve: Add some tests for splu and spilu Modified: trunk/scipy/sparse/linalg/dsolve/tests/test_linsolve.py =================================================================== --- trunk/scipy/sparse/linalg/dsolve/tests/test_linsolve.py 2010-04-27 21:56:40 UTC (rev 6351) +++ trunk/scipy/sparse/linalg/dsolve/tests/test_linsolve.py 2010-04-27 21:56:53 UTC (rev 6352) @@ -1,11 +1,12 @@ import warnings -from numpy import array, finfo +from numpy import array, finfo, arange +import numpy.random as random from numpy.testing import * from scipy.linalg import norm, inv from scipy.sparse import spdiags, SparseEfficiencyWarning -from scipy.sparse.linalg.dsolve import spsolve, use_solver +from scipy.sparse.linalg.dsolve import spsolve, use_solver, splu, spilu warnings.simplefilter('ignore',SparseEfficiencyWarning) @@ -39,5 +40,26 @@ assert( norm(b - Asp*x) < 10 * cond_A * eps ) +class TestSplu(object): + def setUp(self): + n = 40 + d = arange(n) + 1 + self.n = n + self.A = spdiags((d, 2*d, d[::-1]), (-3, 0, 5), n, n) + random.seed(1234) + + def test_splu(self): + x = random.rand(self.n) + lu = splu(self.A) + r = self.A*lu.solve(x) + assert abs(x - r).max() < 1e-13 + + def test_spilu(self): + x = random.rand(self.n) + lu = spilu(self.A, drop_tol=1e-2, fill_factor=5) + r = self.A*lu.solve(x) + assert abs(x - r).max() < 1e-2 + assert abs(x - r).max() > 1e-5 + if __name__ == "__main__": run_module_suite() From scipy-svn at scipy.org Tue Apr 27 17:57:04 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 16:57:04 -0500 (CDT) Subject: [Scipy-svn] r6353 - trunk/scipy/sparse/linalg/dsolve/SuperLU Message-ID: <20100427215704.3D22639CAE7@scipy.org> Author: ptvirtan Date: 2010-04-27 16:57:04 -0500 (Tue, 27 Apr 2010) New Revision: 6353 Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/scipychanges.txt Log: ENH: sparse.linalg.dsolve: add an explanation of changes made to upstream SuperLU sources Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/scipychanges.txt =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/scipychanges.txt (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/scipychanges.txt 2010-04-27 21:57:04 UTC (rev 6353) @@ -0,0 +1,17 @@ +The following changes have been made to the original SuperLU 4.0 sources: + +1) ENH: scipy.sparse.dsolve/SuperLU: re-apply r4767, r4768, r5892 patches to SuperLU sources + + - Rename c_abs() and c_abs1() to slu_c_abs() and slu_c_abs1() to + avoid conflict with Python headers. Hopefully resolves ticket #735 + + - BUG: fix SuperLU dubious format string. + + - Drop ?myblas2.c routines, since we USE_VENDOR_BLAS + + - Do not include *.f Harwell Subroutine Library files because of + license issues. + +2) ENH: sparse.linalg.dsolve/SuperLU: patch SuperLU upstream sources to get some variables from Scipy + +3) BUG: scipy.sparse.dsolve/SuperLU: sprinkle volatile into dlamc/slamc implementation to avoid an infinite loop From scipy-svn at scipy.org Tue Apr 27 17:57:16 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 16:57:16 -0500 (CDT) Subject: [Scipy-svn] r6354 - in trunk/scipy/sparse/linalg/dsolve: . SuperLU SuperLU/SRC Message-ID: <20100427215716.1229039CAE7@scipy.org> Author: ptvirtan Date: 2010-04-27 16:57:15 -0500 (Tue, 27 Apr 2010) New Revision: 6354 Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpivotL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scipy_slu_config.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spivotL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpivotL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/scipychanges.txt trunk/scipy/sparse/linalg/dsolve/_superlumodule.c Log: BUG: sparse.linalg.dsolve/SuperLU: patch SuperLU sources to eliminate a crash for singular matrices for which pivoting fails Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotL.c 2010-04-27 21:57:04 UTC (rev 6353) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotL.c 2010-04-27 21:57:15 UTC (rev 6354) @@ -108,7 +108,11 @@ Also search for user-specified pivot, and diagonal element. */ if ( *usepr ) *pivrow = iperm_r[jcol]; diagind = iperm_c[jcol]; +#ifdef SCIPY_SPECIFIC_FIX + pivmax = -1.0; +#else pivmax = 0.0; +#endif pivptr = nsupc; diag = EMPTY; old_pivptr = nsupc; @@ -123,6 +127,13 @@ } /* Test for singularity */ +#ifdef SCIPY_SPECIFIC_FIX + if (pivmax < 0.0) { + perm_r[diagind] = jcol; + *usepr = 0; + return (jcol+1); + } +#endif if ( pivmax == 0.0 ) { #if 1 *pivrow = lsub_ptr[pivptr]; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpivotL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpivotL.c 2010-04-27 21:57:04 UTC (rev 6353) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpivotL.c 2010-04-27 21:57:15 UTC (rev 6354) @@ -107,7 +107,11 @@ Also search for user-specified pivot, and diagonal element. */ if ( *usepr ) *pivrow = iperm_r[jcol]; diagind = iperm_c[jcol]; +#ifdef SCIPY_SPECIFIC_FIX + pivmax = -1.0; +#else pivmax = 0.0; +#endif pivptr = nsupc; diag = EMPTY; old_pivptr = nsupc; @@ -122,6 +126,13 @@ } /* Test for singularity */ +#ifdef SCIPY_SPECIFIC_FIX + if (pivmax < 0.0) { + perm_r[diagind] = jcol; + *usepr = 0; + return (jcol+1); + } +#endif if ( pivmax == 0.0 ) { #if 1 *pivrow = lsub_ptr[pivptr]; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scipy_slu_config.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scipy_slu_config.h 2010-04-27 21:57:04 UTC (rev 6353) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scipy_slu_config.h 2010-04-27 21:57:15 UTC (rev 6354) @@ -14,6 +14,8 @@ #define USER_MALLOC superlu_python_module_malloc #define USER_FREE superlu_python_module_free +#define SCIPY_SPECIFIC_FIX 1 + /* * Fortran configuration */ Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spivotL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spivotL.c 2010-04-27 21:57:04 UTC (rev 6353) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spivotL.c 2010-04-27 21:57:15 UTC (rev 6354) @@ -107,7 +107,11 @@ Also search for user-specified pivot, and diagonal element. */ if ( *usepr ) *pivrow = iperm_r[jcol]; diagind = iperm_c[jcol]; +#ifdef SCIPY_SPECIFIC_FIX + pivmax = -1.0; +#else pivmax = 0.0; +#endif pivptr = nsupc; diag = EMPTY; old_pivptr = nsupc; @@ -122,6 +126,13 @@ } /* Test for singularity */ +#ifdef SCIPY_SPECIFIC_FIX + if (pivmax < 0.0) { + perm_r[diagind] = jcol; + *usepr = 0; + return (jcol+1); + } +#endif if ( pivmax == 0.0 ) { #if 1 *pivrow = lsub_ptr[pivptr]; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpivotL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpivotL.c 2010-04-27 21:57:04 UTC (rev 6353) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpivotL.c 2010-04-27 21:57:15 UTC (rev 6354) @@ -108,7 +108,11 @@ Also search for user-specified pivot, and diagonal element. */ if ( *usepr ) *pivrow = iperm_r[jcol]; diagind = iperm_c[jcol]; +#ifdef SCIPY_SPECIFIC_FIX + pivmax = -1.0; +#else pivmax = 0.0; +#endif pivptr = nsupc; diag = EMPTY; old_pivptr = nsupc; @@ -123,6 +127,13 @@ } /* Test for singularity */ +#ifdef SCIPY_SPECIFIC_FIX + if (pivmax < 0.0) { + perm_r[diagind] = jcol; + *usepr = 0; + return (jcol+1); + } +#endif if ( pivmax == 0.0 ) { #if 1 *pivrow = lsub_ptr[pivptr]; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/scipychanges.txt =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/scipychanges.txt 2010-04-27 21:57:04 UTC (rev 6353) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/scipychanges.txt 2010-04-27 21:57:15 UTC (rev 6354) @@ -15,3 +15,5 @@ 2) ENH: sparse.linalg.dsolve/SuperLU: patch SuperLU upstream sources to get some variables from Scipy 3) BUG: scipy.sparse.dsolve/SuperLU: sprinkle volatile into dlamc/slamc implementation to avoid an infinite loop + +4) BUG: sparse.linalg.dsolve/SuperLU: patch SuperLU sources to eliminate a crash for singular matrices for which pivoting fails Modified: trunk/scipy/sparse/linalg/dsolve/_superlumodule.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_superlumodule.c 2010-04-27 21:57:04 UTC (rev 6353) +++ trunk/scipy/sparse/linalg/dsolve/_superlumodule.c 2010-04-27 21:57:15 UTC (rev 6354) @@ -40,6 +40,7 @@ SuperLUStat_t stat; PyObject *option_dict = NULL; int type; + int ssv_finished = 0; static char *kwlist[] = {"N","nnz","nzvals","colind","rowptr","B", "csc", "options",NULL}; @@ -109,7 +110,8 @@ /* Compute direct inverse of sparse Matrix */ gssv(type, &options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info); } - + ssv_finished = 1; + SUPERLU_FREE(perm_r); SUPERLU_FREE(perm_c); Destroy_SuperMatrix_Store(&A); /* holds just a pointer to the data */ @@ -125,8 +127,12 @@ SUPERLU_FREE(perm_c); Destroy_SuperMatrix_Store(&A); /* holds just a pointer to the data */ Destroy_SuperMatrix_Store(&B); - Destroy_SuperNode_Matrix(&L); - Destroy_CompCol_Matrix(&U); + if (ssv_finished) { + /* Avoid trying to free partially initialized matrices; + might leak some memory, but avoids a crash */ + Destroy_SuperNode_Matrix(&L); + Destroy_CompCol_Matrix(&U); + } StatFree(&stat); Py_XDECREF(Py_X); return NULL; From scipy-svn at scipy.org Tue Apr 27 17:57:39 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 16:57:39 -0500 (CDT) Subject: [Scipy-svn] r6355 - in trunk/scipy/sparse/linalg/dsolve: . SuperLU SuperLU/SRC Message-ID: <20100427215739.4962739CAE7@scipy.org> Author: ptvirtan Date: 2010-04-27 16:57:39 -0500 (Tue, 27 Apr 2010) New Revision: 6355 Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpivotL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dpivotL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_spivotL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zpivotL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/scipychanges.txt trunk/scipy/sparse/linalg/dsolve/_superluobject.c Log: BUG: sparse.linalg.dsolve/SuperLU: patch SuperLU sources to not exit(1) when ILU decomposition encounters singularity; instead, raise a Python exception Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpivotL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpivotL.c 2010-04-27 21:57:15 UTC (rev 6354) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpivotL.c 2010-04-27 21:57:39 UTC (rev 6355) @@ -136,9 +136,13 @@ /* Test for singularity */ if (pivmax < 0.0) { +#if SCIPY_SPECIFIC_FIX + ABORT("[0]: matrix is singular"); +#else fprintf(stderr, "[0]: jcol=%d, SINGULAR!!!\n", jcol); fflush(stderr); exit(1); +#endif } if ( pivmax == 0.0 ) { if (diag != EMPTY) @@ -151,9 +155,13 @@ for (icol = jcol; icol < n; icol++) if (marker[swap[icol]] <= jcol) break; if (icol >= n) { +#if SCIPY_SPECIFIC_FIX + ABORT("[1]: matrix is singular"); +#else fprintf(stderr, "[1]: jcol=%d, SINGULAR!!!\n", jcol); fflush(stderr); exit(1); +#endif } *pivrow = swap[icol]; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dpivotL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dpivotL.c 2010-04-27 21:57:15 UTC (rev 6354) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dpivotL.c 2010-04-27 21:57:39 UTC (rev 6355) @@ -134,9 +134,13 @@ /* Test for singularity */ if (pivmax < 0.0) { +#if SCIPY_SPECIFIC_FIX + ABORT("[0]: matrix is singular"); +#else fprintf(stderr, "[0]: jcol=%d, SINGULAR!!!\n", jcol); fflush(stderr); exit(1); +#endif } if ( pivmax == 0.0 ) { if (diag != EMPTY) @@ -149,9 +153,13 @@ for (icol = jcol; icol < n; icol++) if (marker[swap[icol]] <= jcol) break; if (icol >= n) { +#if SCIPY_SPECIFIC_FIX + ABORT("[1]: matrix is singular"); +#else fprintf(stderr, "[1]: jcol=%d, SINGULAR!!!\n", jcol); fflush(stderr); exit(1); +#endif } *pivrow = swap[icol]; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_spivotL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_spivotL.c 2010-04-27 21:57:15 UTC (rev 6354) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_spivotL.c 2010-04-27 21:57:39 UTC (rev 6355) @@ -134,9 +134,13 @@ /* Test for singularity */ if (pivmax < 0.0) { +#if SCIPY_SPECIFIC_FIX + ABORT("[0]: matrix is singular"); +#else fprintf(stderr, "[0]: jcol=%d, SINGULAR!!!\n", jcol); fflush(stderr); exit(1); +#endif } if ( pivmax == 0.0 ) { if (diag != EMPTY) @@ -149,9 +153,13 @@ for (icol = jcol; icol < n; icol++) if (marker[swap[icol]] <= jcol) break; if (icol >= n) { +#if SCIPY_SPECIFIC_FIX + ABORT("[1]: matrix is singular"); +#else fprintf(stderr, "[1]: jcol=%d, SINGULAR!!!\n", jcol); fflush(stderr); exit(1); +#endif } *pivrow = swap[icol]; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zpivotL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zpivotL.c 2010-04-27 21:57:15 UTC (rev 6354) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zpivotL.c 2010-04-27 21:57:39 UTC (rev 6355) @@ -136,9 +136,13 @@ /* Test for singularity */ if (pivmax < 0.0) { +#if SCIPY_SPECIFIC_FIX + ABORT("[0]: matrix is singular"); +#else fprintf(stderr, "[0]: jcol=%d, SINGULAR!!!\n", jcol); fflush(stderr); exit(1); +#endif } if ( pivmax == 0.0 ) { if (diag != EMPTY) @@ -151,9 +155,13 @@ for (icol = jcol; icol < n; icol++) if (marker[swap[icol]] <= jcol) break; if (icol >= n) { +#if SCIPY_SPECIFIC_FIX + ABORT("[1]: matrix is singular"); +#else fprintf(stderr, "[1]: jcol=%d, SINGULAR!!!\n", jcol); fflush(stderr); exit(1); +#endif } *pivrow = swap[icol]; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/scipychanges.txt =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/scipychanges.txt 2010-04-27 21:57:15 UTC (rev 6354) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/scipychanges.txt 2010-04-27 21:57:39 UTC (rev 6355) @@ -17,3 +17,5 @@ 3) BUG: scipy.sparse.dsolve/SuperLU: sprinkle volatile into dlamc/slamc implementation to avoid an infinite loop 4) BUG: sparse.linalg.dsolve/SuperLU: patch SuperLU sources to eliminate a crash for singular matrices for which pivoting fails + +5) BUG: parse.linalg.dsolve/SuperLU: patch SuperLU sources to not exit(1) when ILU decomposition encounters singularity; instead, raise a Python exception Modified: trunk/scipy/sparse/linalg/dsolve/_superluobject.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_superluobject.c 2010-04-27 21:57:15 UTC (rev 6354) +++ trunk/scipy/sparse/linalg/dsolve/_superluobject.c 2010-04-27 21:57:39 UTC (rev 6355) @@ -120,8 +120,12 @@ { SUPERLU_FREE(self->perm_r); SUPERLU_FREE(self->perm_c); - Destroy_SuperNode_Matrix(&self->L); - Destroy_CompCol_Matrix(&self->U); + if (self->L.Store != NULL) { + Destroy_SuperNode_Matrix(&self->L); + } + if (self->U.Store != NULL) { + Destroy_CompCol_Matrix(&self->U); + } PyObject_Del(self); } @@ -291,6 +295,7 @@ superlu_options_t options; SuperLUStat_t stat; int panel_size, relax; + int trf_finished = 0; n = A->ncol; @@ -338,6 +343,7 @@ etree, NULL, lwork, self->perm_c, self->perm_r, &self->L, &self->U, &stat, &info); } + trf_finished = 1; if (info) { if (info < 0) @@ -360,6 +366,12 @@ return (PyObject *)self; fail: + if (!trf_finished) { + /* Avoid trying to free partially initialized matrices; + might leak some memory, but avoids a crash */ + self->L.Store = NULL; + self->U.Store = NULL; + } SUPERLU_FREE(etree); Destroy_CompCol_Permuted(&AC); StatFree(&stat); From scipy-svn at scipy.org Tue Apr 27 17:57:54 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 16:57:54 -0500 (CDT) Subject: [Scipy-svn] r6356 - in trunk/scipy/sparse/linalg/dsolve: . tests Message-ID: <20100427215754.7CD7F39CAE7@scipy.org> Author: ptvirtan Date: 2010-04-27 16:57:54 -0500 (Tue, 27 Apr 2010) New Revision: 6356 Modified: trunk/scipy/sparse/linalg/dsolve/_superluobject.c trunk/scipy/sparse/linalg/dsolve/tests/test_linsolve.py Log: ENH: sparse.linalg.dsolve: expose perm_* attributes of the splu object to Python side (patch from #937) Modified: trunk/scipy/sparse/linalg/dsolve/_superluobject.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_superluobject.c 2010-04-27 21:57:39 UTC (rev 6355) +++ trunk/scipy/sparse/linalg/dsolve/_superluobject.c 2010-04-27 21:57:54 UTC (rev 6356) @@ -136,8 +136,22 @@ return Py_BuildValue("(i,i)", self->m, self->n); if (strcmp(name, "nnz") == 0) return Py_BuildValue("i", ((SCformat *)self->L.Store)->nnz + ((SCformat *)self->U.Store)->nnz); + if (strcmp(name, "perm_r") == 0) { + PyArrayObject* perm_r = PyArray_SimpleNewFromData(1, (npy_intp*) (&self->n), NPY_INT, (void*)self->perm_r); + /* For ref counting of the memory */ + PyArray_BASE(perm_r) = self; + Py_INCREF(self); + return perm_r ; + } + if (strcmp(name, "perm_c") == 0) { + PyArrayObject* perm_c = PyArray_SimpleNewFromData(1, (npy_intp*) (&self->n), NPY_INT, (void*)self->perm_c); + /* For ref counting of the memory */ + PyArray_BASE(perm_c) = self; + Py_INCREF(self); + return perm_c ; + } if (strcmp(name, "__members__") == 0) { - char *members[] = {"shape", "nnz"}; + char *members[] = {"shape", "nnz", "perm_r", "perm_c"}; int i; PyObject *list = PyList_New(sizeof(members)/sizeof(char *)); @@ -158,6 +172,27 @@ /*********************************************************************** * SciPySuperLUType structure */ +static char factored_lu_doc[] = "\ +Object resulting from a factorization of a sparse matrix\n\ +\n\ +Attributes\n\ +-----------\n\ +\n\ +shape : 2-tuple\n\ + the shape of the orginal matrix factored\n \ +nnz : int\n\ + the number of non zero coefficient of the matrix\n \ +perm_c\n\ + the permutation applied to the colums of the matrix for the LU factorization\n\ +perm_r\n\ + the permutation applied to the rows of the matrix for the LU factorization\n\ +\n\ +Methods\n\ +-------\n\ +solve\n\ + solves the system for a given right hand side vector\n \ +\n\ +"; PyTypeObject SciPySuperLUType = { PyObject_HEAD_INIT(NULL) @@ -175,6 +210,13 @@ 0, /* tp_as_sequence*/ 0, /* tp_as_mapping*/ 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + 0, /* tp_flags */ + factored_lu_doc, /* tp_doc */ }; Modified: trunk/scipy/sparse/linalg/dsolve/tests/test_linsolve.py =================================================================== --- trunk/scipy/sparse/linalg/dsolve/tests/test_linsolve.py 2010-04-27 21:57:39 UTC (rev 6355) +++ trunk/scipy/sparse/linalg/dsolve/tests/test_linsolve.py 2010-04-27 21:57:54 UTC (rev 6356) @@ -1,11 +1,11 @@ import warnings -from numpy import array, finfo, arange +from numpy import array, finfo, arange, eye, all, unique, ones, dot import numpy.random as random from numpy.testing import * from scipy.linalg import norm, inv -from scipy.sparse import spdiags, SparseEfficiencyWarning +from scipy.sparse import spdiags, SparseEfficiencyWarning, csc_matrix from scipy.sparse.linalg.dsolve import spsolve, use_solver, splu, spilu warnings.simplefilter('ignore',SparseEfficiencyWarning) @@ -14,11 +14,10 @@ use_solver( useUmfpack = False ) class TestLinsolve(TestCase): - ## this crashes SuperLU - #def test_singular(self): - # A = csc_matrix( (5,5), dtype='d' ) - # b = array([1, 2, 3, 4, 5],dtype='d') - # x = spsolve(A,b) + def test_singular(self): + A = csc_matrix( (5,5), dtype='d' ) + b = array([1, 2, 3, 4, 5],dtype='d') + x = spsolve(A, b, use_umfpack=False) def test_twodiags(self): A = spdiags([[1, 2, 3, 4, 5], [6, 5, 8, 9, 10]], [0, 1], 5, 5) @@ -48,18 +47,89 @@ self.A = spdiags((d, 2*d, d[::-1]), (-3, 0, 5), n, n) random.seed(1234) - def test_splu(self): + def test_splu_smoketest(self): + # Check that splu works at all x = random.rand(self.n) lu = splu(self.A) r = self.A*lu.solve(x) assert abs(x - r).max() < 1e-13 - def test_spilu(self): + def test_spilu_smoketest(self): + # Check that spilu works at all x = random.rand(self.n) lu = spilu(self.A, drop_tol=1e-2, fill_factor=5) r = self.A*lu.solve(x) assert abs(x - r).max() < 1e-2 assert abs(x - r).max() > 1e-5 + def test_splu_nnz0(self): + A = csc_matrix( (5,5), dtype='d' ) + assert_raises(RuntimeError, splu, A) + + def test_spilu_nnz0(self): + A = csc_matrix( (5,5), dtype='d' ) + assert_raises(RuntimeError, spilu, A) + + def test_splu_basic(self): + # Test basic splu functionality. + n = 30 + a = random.random((n, n)) + a[a < 0.95] = 0 + # First test with a singular matrix + a[:, 0] = 0 + a_ = csc_matrix(a) + # Matrix is exactly singular + assert_raises(RuntimeError, splu, a_) + + # Make a diagonal dominant, to make sure it is not singular + a += 4*eye(n) + a_ = csc_matrix(a) + lu = splu(a_) + b = ones(n) + x = lu.solve(b) + assert_almost_equal(dot(a, x), b) + + def test_splu_perm(self): + # Test the permutation vectors exposed by splu. + n = 30 + a = random.random((n, n)) + a[a < 0.95] = 0 + # Make a diagonal dominant, to make sure it is not singular + a += 4*eye(n) + a_ = csc_matrix(a) + lu = splu(a_) + # Check that the permutation indices do belong to [0, n-1]. + for perm in (lu.perm_r, lu.perm_c): + assert_(all(perm > -1)) + assert_(all(perm < n)) + assert_equal(len(unique(perm)), len(perm)) + + # Now make a symmetric, and test that the two permutation vectors are + # the same + a += a.T + a_ = csc_matrix(a) + lu = splu(a_) + assert_array_equal(lu.perm_r, lu.perm_c) + + def test_lu_refcount(self): + # Test that we are keeping track of the reference count with splu. + n = 30 + a = random.random((n, n)) + a[a < 0.95] = 0 + # Make a diagonal dominant, to make sure it is not singular + a += 4*eye(n) + a_ = csc_matrix(a) + lu = splu(a_) + + # And now test that we don't have a refcount bug + import gc, sys + rc = sys.getrefcount(lu) + for attr in ('perm_r', 'perm_c'): + perm = getattr(lu, attr) + assert_equal(sys.getrefcount(lu), rc + 1) + del perm + assert_equal(sys.getrefcount(lu), rc) + + if __name__ == "__main__": run_module_suite() From scipy-svn at scipy.org Tue Apr 27 17:58:02 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 16:58:02 -0500 (CDT) Subject: [Scipy-svn] r6357 - trunk/scipy/sparse/linalg/dsolve Message-ID: <20100427215802.BE29A39CAE7@scipy.org> Author: ptvirtan Date: 2010-04-27 16:58:02 -0500 (Tue, 27 Apr 2010) New Revision: 6357 Modified: trunk/scipy/sparse/linalg/dsolve/linsolve.py Log: ENH: sparse.linalg.dsolve: add an option to spsolve to specify if umfpack is used Modified: trunk/scipy/sparse/linalg/dsolve/linsolve.py =================================================================== --- trunk/scipy/sparse/linalg/dsolve/linsolve.py 2010-04-27 21:57:54 UTC (rev 6356) +++ trunk/scipy/sparse/linalg/dsolve/linsolve.py 2010-04-27 21:58:02 UTC (rev 6357) @@ -41,7 +41,7 @@ umfpack.configure( **kwargs ) -def spsolve(A, b, permc_spec=None): +def spsolve(A, b, permc_spec=None, use_umfpack=True): """Solve the sparse linear system Ax=b """ if isspmatrix( b ): @@ -67,8 +67,9 @@ raise ValueError, "matrix - rhs size mismatch (%s - %s)"\ % (A.shape, b.size) + use_umfpack = use_umfpack and useUmfpack - if isUmfpack and useUmfpack: + if isUmfpack and use_umfpack: if noScikit: warn( 'scipy.sparse.linalg.dsolve.umfpack will be removed,'\ ' install scikits.umfpack instead', DeprecationWarning ) From scipy-svn at scipy.org Tue Apr 27 18:01:22 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 17:01:22 -0500 (CDT) Subject: [Scipy-svn] r6358 - trunk/doc/release Message-ID: <20100427220122.F3F5339CB2B@scipy.org> Author: ptvirtan Date: 2010-04-27 17:01:22 -0500 (Tue, 27 Apr 2010) New Revision: 6358 Modified: trunk/doc/release/0.8.0-notes.rst Log: DOC: write something to release notes about SuperLU upgrade Modified: trunk/doc/release/0.8.0-notes.rst =================================================================== --- trunk/doc/release/0.8.0-notes.rst 2010-04-27 21:58:02 UTC (rev 6357) +++ trunk/doc/release/0.8.0-notes.rst 2010-04-27 22:01:22 UTC (rev 6358) @@ -129,7 +129,14 @@ The function `scipy.constants.find` was modified to return the list of keys that it finds, instead of printing them and returning None. +Incomplete sparse LU decompositions +----------------------------------- +Scipy now wraps SuperLU version 4.0, which supports incomplete sparse LU +decompositions. These can be accessed via `scipy.sparse.linalg.spilu`. +Upgrade to SuperLU 4.0 also fixes some known bugs. + + Removed features ================ From scipy-svn at scipy.org Tue Apr 27 20:32:04 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 19:32:04 -0500 (CDT) Subject: [Scipy-svn] r6359 - in trunk/scipy: fftpack integrate io Message-ID: <20100428003204.ED10A39CAE7@scipy.org> Author: warren.weckesser Date: 2010-04-27 19:32:04 -0500 (Tue, 27 Apr 2010) New Revision: 6359 Modified: trunk/scipy/fftpack/basic.py trunk/scipy/integrate/__init__.py trunk/scipy/integrate/info.py trunk/scipy/integrate/odepack.py trunk/scipy/integrate/quadpack.py trunk/scipy/io/fopen.py Log: Remove defunct comments. Modified: trunk/scipy/fftpack/basic.py =================================================================== --- trunk/scipy/fftpack/basic.py 2010-04-27 22:01:22 UTC (rev 6358) +++ trunk/scipy/fftpack/basic.py 2010-04-28 00:32:04 UTC (rev 6359) @@ -1,5 +1,3 @@ -## Automatically adapted for scipy Oct 21, 2005 by - """ Discrete Fourier Transforms - basic.py """ Modified: trunk/scipy/integrate/__init__.py =================================================================== --- trunk/scipy/integrate/__init__.py 2010-04-27 22:01:22 UTC (rev 6358) +++ trunk/scipy/integrate/__init__.py 2010-04-28 00:32:04 UTC (rev 6359) @@ -1,5 +1,3 @@ -## Automatically adapted for scipy Oct 21, 2005 by - # # integrate - Integration routines # Modified: trunk/scipy/integrate/info.py =================================================================== --- trunk/scipy/integrate/info.py 2010-04-27 22:01:22 UTC (rev 6358) +++ trunk/scipy/integrate/info.py 2010-04-28 00:32:04 UTC (rev 6359) @@ -1,5 +1,3 @@ -## Automatically adapted for scipy Oct 21, 2005 by - """ Integration routines ==================== Modified: trunk/scipy/integrate/odepack.py =================================================================== --- trunk/scipy/integrate/odepack.py 2010-04-27 22:01:22 UTC (rev 6358) +++ trunk/scipy/integrate/odepack.py 2010-04-28 00:32:04 UTC (rev 6359) @@ -1,5 +1,3 @@ -## Automatically adapted for scipy Oct 21, 2005 by - # Author: Travis Oliphant __all__ = ['odeint'] Modified: trunk/scipy/integrate/quadpack.py =================================================================== --- trunk/scipy/integrate/quadpack.py 2010-04-27 22:01:22 UTC (rev 6358) +++ trunk/scipy/integrate/quadpack.py 2010-04-28 00:32:04 UTC (rev 6359) @@ -1,5 +1,3 @@ -## Automatically adapted for scipy Oct 21, 2005 by - # Author: Travis Oliphant 2001 __all__ = ['quad', 'dblquad', 'tplquad', 'quad_explain', 'Inf','inf'] Modified: trunk/scipy/io/fopen.py =================================================================== --- trunk/scipy/io/fopen.py 2010-04-27 22:01:22 UTC (rev 6358) +++ trunk/scipy/io/fopen.py 2010-04-28 00:32:04 UTC (rev 6359) @@ -1,5 +1,3 @@ -## Automatically adapted for scipy Oct 05, 2005 by convertcode.py - # Author: Travis Oliphant import struct From scipy-svn at scipy.org Tue Apr 27 20:35:17 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 19:35:17 -0500 (CDT) Subject: [Scipy-svn] r6360 - trunk/scipy/linalg Message-ID: <20100428003517.0EF9E39CAE7@scipy.org> Author: warren.weckesser Date: 2010-04-27 19:35:16 -0500 (Tue, 27 Apr 2010) New Revision: 6360 Modified: trunk/scipy/linalg/blas.py trunk/scipy/linalg/flinalg.py trunk/scipy/linalg/interface_gen.py trunk/scipy/linalg/lapack.py trunk/scipy/linalg/setup.py trunk/scipy/linalg/setup_atlas_version.py Log: Remove defunct comments. Modified: trunk/scipy/linalg/blas.py =================================================================== --- trunk/scipy/linalg/blas.py 2010-04-28 00:32:04 UTC (rev 6359) +++ trunk/scipy/linalg/blas.py 2010-04-28 00:35:16 UTC (rev 6360) @@ -1,5 +1,3 @@ -## Automatically adapted for scipy Oct 18, 2005 by - # # Author: Pearu Peterson, March 2002 # Modified: trunk/scipy/linalg/flinalg.py =================================================================== --- trunk/scipy/linalg/flinalg.py 2010-04-28 00:32:04 UTC (rev 6359) +++ trunk/scipy/linalg/flinalg.py 2010-04-28 00:35:16 UTC (rev 6360) @@ -1,5 +1,3 @@ -## Automatically adapted for scipy Oct 18, 2005 by - # # Author: Pearu Peterson, March 2002 # Modified: trunk/scipy/linalg/interface_gen.py =================================================================== --- trunk/scipy/linalg/interface_gen.py 2010-04-28 00:32:04 UTC (rev 6359) +++ trunk/scipy/linalg/interface_gen.py 2010-04-28 00:35:16 UTC (rev 6360) @@ -1,5 +1,3 @@ -## Automatically adapted for scipy Oct 18, 2005 by - #!/usr/bin/env python import os Modified: trunk/scipy/linalg/lapack.py =================================================================== --- trunk/scipy/linalg/lapack.py 2010-04-28 00:32:04 UTC (rev 6359) +++ trunk/scipy/linalg/lapack.py 2010-04-28 00:35:16 UTC (rev 6360) @@ -1,5 +1,3 @@ -## Automatically adapted for scipy Oct 18, 2005 by - # # Author: Pearu Peterson, March 2002 # Modified: trunk/scipy/linalg/setup.py =================================================================== --- trunk/scipy/linalg/setup.py 2010-04-28 00:32:04 UTC (rev 6359) +++ trunk/scipy/linalg/setup.py 2010-04-28 00:35:16 UTC (rev 6360) @@ -1,5 +1,3 @@ -## Automatically adapted for scipy Oct 18, 2005 by - #!/usr/bin/env python import os Modified: trunk/scipy/linalg/setup_atlas_version.py =================================================================== --- trunk/scipy/linalg/setup_atlas_version.py 2010-04-28 00:32:04 UTC (rev 6359) +++ trunk/scipy/linalg/setup_atlas_version.py 2010-04-28 00:35:16 UTC (rev 6360) @@ -1,5 +1,3 @@ -## Automatically adapted for scipy Oct 18, 2005 by - #!/usr/bin/env python import os From scipy-svn at scipy.org Tue Apr 27 20:38:59 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 19:38:59 -0500 (CDT) Subject: [Scipy-svn] r6361 - in trunk/scipy: optimize signal sparse/linalg/isolve special Message-ID: <20100428003859.BB52339CAE7@scipy.org> Author: warren.weckesser Date: 2010-04-27 19:38:59 -0500 (Tue, 27 Apr 2010) New Revision: 6361 Modified: trunk/scipy/optimize/lbfgsb.py trunk/scipy/optimize/linesearch.py trunk/scipy/optimize/zeros.py trunk/scipy/signal/bsplines.py trunk/scipy/sparse/linalg/isolve/setup.py trunk/scipy/special/basic.py Log: Remove defunct comments. Modified: trunk/scipy/optimize/lbfgsb.py =================================================================== --- trunk/scipy/optimize/lbfgsb.py 2010-04-28 00:35:16 UTC (rev 6360) +++ trunk/scipy/optimize/lbfgsb.py 2010-04-28 00:38:59 UTC (rev 6361) @@ -1,6 +1,4 @@ -## Automatically adapted for scipy Oct 07, 2005 by convertcode.py - ## License for the Python wrapper ## ============================== Modified: trunk/scipy/optimize/linesearch.py =================================================================== --- trunk/scipy/optimize/linesearch.py 2010-04-28 00:35:16 UTC (rev 6360) +++ trunk/scipy/optimize/linesearch.py 2010-04-28 00:38:59 UTC (rev 6361) @@ -1,4 +1,3 @@ -## Automatically adapted for scipy Oct 07, 2005 by convertcode.py from scipy.optimize import minpack2 import numpy Modified: trunk/scipy/optimize/zeros.py =================================================================== --- trunk/scipy/optimize/zeros.py 2010-04-28 00:35:16 UTC (rev 6360) +++ trunk/scipy/optimize/zeros.py 2010-04-28 00:38:59 UTC (rev 6361) @@ -1,4 +1,3 @@ -## Automatically adapted for scipy Oct 07, 2005 by convertcode.py import _zeros from numpy import finfo Modified: trunk/scipy/signal/bsplines.py =================================================================== --- trunk/scipy/signal/bsplines.py 2010-04-28 00:35:16 UTC (rev 6360) +++ trunk/scipy/signal/bsplines.py 2010-04-28 00:38:59 UTC (rev 6361) @@ -1,4 +1,3 @@ -## Automatically adapted for scipy Oct 21, 2005 by convertcode.py import scipy.special from numpy import logical_and, asarray, pi, zeros_like, \ Modified: trunk/scipy/sparse/linalg/isolve/setup.py =================================================================== --- trunk/scipy/sparse/linalg/isolve/setup.py 2010-04-28 00:35:16 UTC (rev 6360) +++ trunk/scipy/sparse/linalg/isolve/setup.py 2010-04-28 00:38:59 UTC (rev 6361) @@ -1,5 +1,4 @@ #!/usr/bin/env python -## Automatically adapted for scipy Oct 18, 2005 by import os import sys Modified: trunk/scipy/special/basic.py =================================================================== --- trunk/scipy/special/basic.py 2010-04-28 00:35:16 UTC (rev 6360) +++ trunk/scipy/special/basic.py 2010-04-28 00:38:59 UTC (rev 6361) @@ -1,5 +1,3 @@ -## Automatically adapted for scipy Oct 05, 2005 by convertcode.py - # # Author: Travis Oliphant, 2002 # From scipy-svn at scipy.org Tue Apr 27 21:11:02 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 20:11:02 -0500 (CDT) Subject: [Scipy-svn] r6362 - trunk/scipy/linalg Message-ID: <20100428011102.039AC39CAE7@scipy.org> Author: warren.weckesser Date: 2010-04-27 20:11:01 -0500 (Tue, 27 Apr 2010) New Revision: 6362 Modified: trunk/scipy/linalg/interface_gen.py Log: Remove obsolete version check. Modified: trunk/scipy/linalg/interface_gen.py =================================================================== --- trunk/scipy/linalg/interface_gen.py 2010-04-28 00:38:59 UTC (rev 6361) +++ trunk/scipy/linalg/interface_gen.py 2010-04-28 01:11:01 UTC (rev 6362) @@ -1,12 +1,7 @@ #!/usr/bin/env python import os -import sys - -if sys.version[:3]>='2.3': - import re -else: - import pre as re +import re from distutils.dir_util import mkpath def all_subroutines(interface_in): From scipy-svn at scipy.org Thu Apr 29 09:50:35 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 29 Apr 2010 08:50:35 -0500 (CDT) Subject: [Scipy-svn] r6363 - trunk/doc/release Message-ID: <20100429135035.913F539CAEE@scipy.org> Author: ptvirtan Date: 2010-04-29 08:50:35 -0500 (Thu, 29 Apr 2010) New Revision: 6363 Modified: trunk/doc/release/0.8.0-notes.rst Log: DOC: update release notes regarding Python 3 Modified: trunk/doc/release/0.8.0-notes.rst =================================================================== --- trunk/doc/release/0.8.0-notes.rst 2010-04-28 01:11:01 UTC (rev 6362) +++ trunk/doc/release/0.8.0-notes.rst 2010-04-29 13:50:35 UTC (rev 6363) @@ -31,13 +31,13 @@ project - everything - from which algorithms we implement, to details about our function's call signatures. -Python 3.0 -========== +Python 3 +======== -Python 3.0 is not supported at all; it requires NumPy to be ported to -Python 3.0. This requires immense effort, since a lot of C code has -to be ported. The transition to 3.0 is still under consideration; -currently, we don't have any timeline or roadmap for this transition. +Python 3 compatibility is planned and is currently technically +feasible, since Numpy has been ported. However, since the Python 3 +compatible Numpy 2.0 has not been released yet, support for Python 3 +in Scipy might not yet be included in Scipy 0.8. Major documentation improvements ================================ From scipy-svn at scipy.org Fri Apr 30 12:38:37 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 30 Apr 2010 11:38:37 -0500 (CDT) Subject: [Scipy-svn] r6364 - trunk/scipy/sparse/linalg/dsolve Message-ID: <20100430163837.114B839CB4E@scipy.org> Author: ptvirtan Date: 2010-04-30 11:38:37 -0500 (Fri, 30 Apr 2010) New Revision: 6364 Modified: trunk/scipy/sparse/linalg/dsolve/_superluobject.c trunk/scipy/sparse/linalg/dsolve/_superluobject.h Log: BUG: sparse.linalg.dsolve: fix a crash bug on 64-bit systems when accessing superluobject.perm_* Modified: trunk/scipy/sparse/linalg/dsolve/_superluobject.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_superluobject.c 2010-04-29 13:50:35 UTC (rev 6363) +++ trunk/scipy/sparse/linalg/dsolve/_superluobject.c 2010-04-30 16:38:37 UTC (rev 6364) @@ -179,9 +179,9 @@ -----------\n\ \n\ shape : 2-tuple\n\ - the shape of the orginal matrix factored\n \ + the shape of the orginal matrix factored\n\ nnz : int\n\ - the number of non zero coefficient of the matrix\n \ + the number of non-zero elements in the matrix\n\ perm_c\n\ the permutation applied to the colums of the matrix for the LU factorization\n\ perm_r\n\ Modified: trunk/scipy/sparse/linalg/dsolve/_superluobject.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/_superluobject.h 2010-04-29 13:50:35 UTC (rev 6363) +++ trunk/scipy/sparse/linalg/dsolve/_superluobject.h 2010-04-30 16:38:37 UTC (rev 6364) @@ -24,7 +24,7 @@ */ typedef struct { PyObject_VAR_HEAD - int m,n; + npy_intp m,n; SuperMatrix L; SuperMatrix U; int *perm_r; From scipy-svn at scipy.org Tue Apr 27 17:54:15 2010 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 27 Apr 2010 16:54:15 -0500 (CDT) Subject: [Scipy-svn] r6344 - in trunk/scipy/sparse/linalg/dsolve: . SuperLU SuperLU/SRC Message-ID: <20100427215415.8045939CAE7@scipy.org> Author: ptvirtan Date: 2010-04-27 16:54:15 -0500 (Tue, 27 Apr 2010) New Revision: 6344 Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cdiagonal.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsisx.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsitrf.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cldperm.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/creadrb.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/creadtriple.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ddiagonal.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsisx.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsitrf.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrsU.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dldperm.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dreadrb.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dreadtriple.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/html_mainpage.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ccolumn_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ccopy_to_ucol.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cdrop_row.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpanel_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpivotL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_csnode_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dcolumn_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dcopy_to_ucol.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ddrop_row.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dpanel_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dpivotL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dsnode_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_heap_relax_snode.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_relax_snode.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_scolumn_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_scopy_to_ucol.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_sdrop_row.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_spanel_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_spivotL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ssnode_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zcolumn_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zcopy_to_ucol.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zdrop_row.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zpanel_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zpivotL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zsnode_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/mark_relax.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sdiagonal.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsisx.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsitrf.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sldperm.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_Cnames.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_cdefs.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_dcomplex.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_ddefs.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_scomplex.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_sdefs.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_util.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_zdefs.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sreadrb.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sreadtriple.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zdiagonal.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsisx.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsitrf.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zldperm.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zreadrb.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zreadtriple.c Removed: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/Cnames.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/Makefile trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csp_defs.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcomplex.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsp_defs.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scomplex.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssp_defs.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/util.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsp_defs.h trunk/scipy/sparse/linalg/dsolve/SuperLU/changes.txt Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/README trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ccolumn_bmod.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ccolumn_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ccopy_to_ucol.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgscon.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsequ.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsrfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgssv.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgssvx.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgstrf.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgstrs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/clacon.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/clangs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/claqgs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cmemory.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/colamd.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/colamd.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpanel_bmod.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpanel_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotgrowth.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpruneL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/creadhb.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csnode_bmod.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csnode_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csp_blas2.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csp_blas3.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cutil.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dGetDiagU.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcolumn_bmod.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcolumn_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcomplex.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcopy_to_ucol.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgscon.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsequ.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsrfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgssv.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgssvx.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrf.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrsL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlacon.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlamch.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlangs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlaqgs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dmemory.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpanel_bmod.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpanel_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpivotL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpivotgrowth.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpruneL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dreadhb.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsnode_bmod.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsnode_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsp_blas2.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsp_blas3.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dutil.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dzsum1.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/get_perm_c.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/heap_relax_snode.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/icmax1.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/izmax1.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/lsame.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/memory.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/relax_snode.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scolumn_bmod.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scolumn_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scomplex.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scopy_to_ucol.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scsum1.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgscon.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsequ.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsrfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgssv.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgssvx.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgstrf.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgstrs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slacon.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slamch.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slangs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slaqgs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/smemory.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sp_coletree.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sp_ienv.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sp_preorder.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spanel_bmod.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spanel_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spivotL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spivotgrowth.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spruneL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sreadhb.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssnode_bmod.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssnode_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssp_blas2.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssp_blas3.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/superlu_timer.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/supermatrix.h trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sutil.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/util.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/xerbla.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zcolumn_bmod.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zcolumn_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zcopy_to_ucol.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgscon.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsequ.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsrfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgssv.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgssvx.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgstrf.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgstrs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zlacon.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zlangs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zlaqgs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zmemory.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpanel_bmod.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpanel_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpivotL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpivotgrowth.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpruneL.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zreadhb.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsnode_bmod.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsnode_dfs.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsp_blas2.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsp_blas3.c trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zutil.c trunk/scipy/sparse/linalg/dsolve/setup.py Log: ENH: sparse.linalg.dsolve/SuperLU: update SuperLU upstream sources to version 4.0 Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/README =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/README 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/README 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,4 +1,4 @@ - SuperLU (Version 3.0) + SuperLU (Version 4.0) ===================== Copyright (c) 2003, The Regents of the University of California, through @@ -52,6 +52,7 @@ SuperLU/README instructions on installation SuperLU/CBLAS/ needed BLAS routines in C, not necessarily fast + SuperLU/DOC/ Users' Guide and documentation of source code SuperLU/EXAMPLE/ example programs SuperLU/FORTRAN/ Fortran interface SuperLU/INSTALL/ test machine dependent parameters; the Users' Guide. @@ -100,14 +101,14 @@ In this case, you should do the following: 1) In SuperLU/make.inc, undefine (comment out) BLASDEF, and define: - BLASLIB = ../blas$(PLAT).a + BLASLIB = ../lib/blas$(PLAT).a 2) Go to the SuperLU/ directory, type: make blaslib to make the BLAS library from the routines in the CBLAS/ subdirectory. 3. C preprocessor definition CDEFS. - In the header file SRC/Cnames.h, we use macros to determine how + In the header file SRC/slu_Cnames.h, we use macros to determine how C routines should be named so that they are callable by Fortran. (Some vendor-supplied BLAS libraries do not have C interface. So the re-naming is needed in order for the SuperLU BLAS calls (in C) to @@ -144,14 +145,12 @@ TESTING/ztest.out - ------------------ -| RELEASE NOTES | ------------------ -* Version 3.0, 10-15-03 - - add "options" and "stat" argument for the driver routines - DGSSV/DGSSVX. This interface is more user-friendly and flexible. - - add more examples in EXAMPLE/ - - add a "symmetric mode" with better performance when the matrix is - symmetric, or diagonal dominant, or positive definite, or nearly so. - +-------------------- +| RELEASE VERSIONS | +-------------------- + February 4, 1997 Version 1.0 + November 15, 1997 Version 1.1 + September 1, 1999 Version 2.0 + October 15, 2003 Version 3.0 + August 1, 2008 Version 3.1 + June 30, 2009 Version 4.0 Deleted: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/Cnames.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/Cnames.h 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/Cnames.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,278 +0,0 @@ -/* - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 1, 1997 - * - */ -#ifndef __SUPERLU_CNAMES /* allow multiple inclusions */ -#define __SUPERLU_CNAMES - -/* - * These macros define how C routines will be called. ADD_ assumes that - * they will be called by fortran, which expects C routines to have an - * underscore postfixed to the name (Suns, and the Intel expect this). - * NOCHANGE indicates that fortran will be calling, and that it expects - * the name called by fortran to be identical to that compiled by the C - * (RS6K's do this). UPCASE says it expects C routines called by fortran - * to be in all upcase (CRAY wants this). - */ - -#define ADD_ 0 -#define ADD__ 1 -#define NOCHANGE 2 -#define UPCASE 3 -#define C_CALL 4 - -#ifdef UpCase -#define F77_CALL_C UPCASE -#endif - -#ifdef NoChange -#define F77_CALL_C NOCHANGE -#endif - -#ifdef Add_ -#define F77_CALL_C ADD_ -#endif - -#ifdef Add__ -#define F77_CALL_C ADD__ -#endif - -/* Default */ -#ifndef F77_CALL_C -#define F77_CALL_C ADD_ -#endif - - -#if (F77_CALL_C == ADD_) -/* - * These defines set up the naming scheme required to have a fortran 77 - * routine call a C routine - * No redefinition necessary to have following Fortran to C interface: - * FORTRAN CALL C DECLARATION - * call dgemm(...) void dgemm_(...) - * - * This is the default. - */ - -#endif - -#if (F77_CALL_C == ADD__) -/* - * These defines set up the naming scheme required to have a fortran 77 - * routine call a C routine - * for following Fortran to C interface: - * FORTRAN CALL C DECLARATION - * call dgemm(...) void dgemm__(...) - */ -#define sasum_ sasum__ -#define isamax_ isamax__ -#define scopy_ scopy__ -#define sscal_ sscal__ -#define sger_ sger__ -#define snrm2_ snrm2__ -#define ssymv_ ssymv__ -#define sdot_ sdot__ -#define saxpy_ saxpy__ -#define ssyr2_ ssyr2__ -#define srot_ srot__ -#define sgemv_ sgemv__ -#define strsv_ strsv__ -#define sgemm_ sgemm__ -#define strsm_ strsm__ - -#define dasum_ dasum__ -#define idamax_ idamax__ -#define dcopy_ dcopy__ -#define dscal_ dscal__ -#define dger_ dger__ -#define dnrm2_ dnrm2__ -#define dsymv_ dsymv__ -#define ddot_ ddot__ -#define daxpy_ daxpy__ -#define dsyr2_ dsyr2__ -#define drot_ drot__ -#define dgemv_ dgemv__ -#define dtrsv_ dtrsv__ -#define dgemm_ dgemm__ -#define dtrsm_ dtrsm__ - -#define scasum_ scasum__ -#define icamax_ icamax__ -#define ccopy_ ccopy__ -#define cscal_ cscal__ -#define scnrm2_ scnrm2__ -#define caxpy_ caxpy__ -#define cgemv_ cgemv__ -#define ctrsv_ ctrsv__ -#define cgemm_ cgemm__ -#define ctrsm_ ctrsm__ -#define cgerc_ cgerc__ -#define chemv_ chemv__ -#define cher2_ cher2__ - -#define dzasum_ dzasum__ -#define izamax_ izamax__ -#define zcopy_ zcopy__ -#define zscal_ zscal__ -#define dznrm2_ dznrm2__ -#define zaxpy_ zaxpy__ -#define zgemv_ zgemv__ -#define ztrsv_ ztrsv__ -#define zgemm_ zgemm__ -#define ztrsm_ ztrsm__ -#define zgerc_ zgerc__ -#define zhemv_ zhemv__ -#define zher2_ zher2__ - -#define c_bridge_dgssv_ c_bridge_dgssv__ -#define c_fortran_dgssv_ c_fortran_dgssv__ -#endif - -#if (F77_CALL_C == UPCASE) -/* - * These defines set up the naming scheme required to have a fortran 77 - * routine call a C routine - * following Fortran to C interface: - * FORTRAN CALL C DECLARATION - * call dgemm(...) void DGEMM(...) - */ -#define sasum_ SASUM -#define isamax_ ISAMAX -#define scopy_ SCOPY -#define sscal_ SSCAL -#define sger_ SGER -#define snrm2_ SNRM2 -#define ssymv_ SSYMV -#define sdot_ SDOT -#define saxpy_ SAXPY -#define ssyr2_ SSYR2 -#define srot_ SROT -#define sgemv_ SGEMV -#define strsv_ STRSV -#define sgemm_ SGEMM -#define strsm_ STRSM - -#define dasum_ SASUM -#define idamax_ ISAMAX -#define dcopy_ SCOPY -#define dscal_ SSCAL -#define dger_ SGER -#define dnrm2_ SNRM2 -#define dsymv_ SSYMV -#define ddot_ SDOT -#define daxpy_ SAXPY -#define dsyr2_ SSYR2 -#define drot_ SROT -#define dgemv_ SGEMV -#define dtrsv_ STRSV -#define dgemm_ SGEMM -#define dtrsm_ STRSM - -#define scasum_ SCASUM -#define icamax_ ICAMAX -#define ccopy_ CCOPY -#define cscal_ CSCAL -#define scnrm2_ SCNRM2 -#define caxpy_ CAXPY -#define cgemv_ CGEMV -#define ctrsv_ CTRSV -#define cgemm_ CGEMM -#define ctrsm_ CTRSM -#define cgerc_ CGERC -#define chemv_ CHEMV -#define cher2_ CHER2 - -#define dzasum_ SCASUM -#define izamax_ ICAMAX -#define zcopy_ CCOPY -#define zscal_ CSCAL -#define dznrm2_ SCNRM2 -#define zaxpy_ CAXPY -#define zgemv_ CGEMV -#define ztrsv_ CTRSV -#define zgemm_ CGEMM -#define ztrsm_ CTRSM -#define zgerc_ CGERC -#define zhemv_ CHEMV -#define zher2_ CHER2 - -#define c_bridge_dgssv_ C_BRIDGE_DGSSV -#define c_fortran_dgssv_ C_FORTRAN_DGSSV -#endif - -#if (F77_CALL_C == NOCHANGE) -/* - * These defines set up the naming scheme required to have a fortran 77 - * routine call a C routine - * for following Fortran to C interface: - * FORTRAN CALL C DECLARATION - * call dgemm(...) void dgemm(...) - */ -#define sasum_ sasum -#define isamax_ isamax -#define scopy_ scopy -#define sscal_ sscal -#define sger_ sger -#define snrm2_ snrm2 -#define ssymv_ ssymv -#define sdot_ sdot -#define saxpy_ saxpy -#define ssyr2_ ssyr2 -#define srot_ srot -#define sgemv_ sgemv -#define strsv_ strsv -#define sgemm_ sgemm -#define strsm_ strsm - -#define dasum_ dasum -#define idamax_ idamax -#define dcopy_ dcopy -#define dscal_ dscal -#define dger_ dger -#define dnrm2_ dnrm2 -#define dsymv_ dsymv -#define ddot_ ddot -#define daxpy_ daxpy -#define dsyr2_ dsyr2 -#define drot_ drot -#define dgemv_ dgemv -#define dtrsv_ dtrsv -#define dgemm_ dgemm -#define dtrsm_ dtrsm - -#define scasum_ scasum -#define icamax_ icamax -#define ccopy_ ccopy -#define cscal_ cscal -#define scnrm2_ scnrm2 -#define caxpy_ caxpy -#define cgemv_ cgemv -#define ctrsv_ ctrsv -#define cgemm_ cgemm -#define ctrsm_ ctrsm -#define cgerc_ cgerc -#define chemv_ chemv -#define cher2_ cher2 - -#define dzasum_ dzasum -#define izamax_ izamax -#define zcopy_ zcopy -#define zscal_ zscal -#define dznrm2_ dznrm2 -#define zaxpy_ zaxpy -#define zgemv_ zgemv -#define ztrsv_ ztrsv -#define zgemm_ zgemm -#define ztrsm_ ztrsm -#define zgerc_ zgerc -#define zhemv_ zhemv -#define zher2_ zher2 - -#define c_bridge_dgssv_ c_bridge_dgssv -#define c_fortran_dgssv_ c_fortran_dgssv -#endif - -#endif /* __SUPERLU_CNAMES */ Deleted: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/Makefile =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/Makefile 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/Makefile 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,115 +0,0 @@ -# makefile for sparse supernodal LU, implemented in ANSI C -include ../make.inc - -####################################################################### -# This is the makefile to create a library for SuperLU. -# The files are organized as follows: -# -# ALLAUX -- Auxiliary routines called from all precisions -# SCLAUX -- Auxiliary routines called from both real and complex -# DZLAUX -- Auxiliary routines called from both double precision -# and complex*16 -# SLUSRC -- Single precision real SuperLU routines -# DLUSRC -- Double precision real SuperLU routines -# CLUSRC -- Single precision complex SuperLU routines -# ZLUSRC -- Double precision complex SuperLU routines -# -# The library can be set up to include routines for any combination -# of the four precisions. To create or add to the library, enter make -# followed by one or more of the precisions desired. Some examples: -# make single -# make single double -# make single double complex complex16 -# Alternatively, the command -# make -# without any arguments creates a library of all four precisions. -# The library is called -# superlu.a -# and is created at the next higher directory level. -# -# To remove the object files after the library is created, enter -# make clean -# -####################################################################### - -ALLAUX = superlu_timer.o lsame.o util.o memory.o get_perm_c.o mmd.o \ - sp_coletree.o sp_preorder.o sp_ienv.o relax_snode.o heap_relax_snode.o \ - xerbla.o colamd.o - -SCLAUX = slamch.o - -DZLAUX = dlamch.o - -SLUSRC = \ - sgssv.o sgssvx.o \ - ssp_blas2.o ssp_blas3.o sgscon.o slacon.o \ - slangs.o sgsequ.o slaqgs.o spivotgrowth.o \ - sgsrfs.o sgstrf.o sgstrs.o scopy_to_ucol.o \ - ssnode_dfs.o ssnode_bmod.o \ - spanel_dfs.o spanel_bmod.o sreadhb.o \ - scolumn_dfs.o scolumn_bmod.o spivotL.o spruneL.o \ - smemory.o sutil.o smyblas2.o - -DLUSRC = \ - dgssv.o dgssvx.o \ - dsp_blas2.o dsp_blas3.o dgscon.o dlacon.o \ - dlangs.o dgsequ.o dlaqgs.o dpivotgrowth.o \ - dgsrfs.o dgstrf.o dgstrs.o dcopy_to_ucol.o \ - dsnode_dfs.o dsnode_bmod.o \ - dpanel_dfs.o dpanel_bmod.o dreadhb.o \ - dcolumn_dfs.o dcolumn_bmod.o dpivotL.o dpruneL.o \ - dmemory.o dutil.o dmyblas2.o - -CLUSRC = \ - scomplex.o scsum1.o icmax1.o \ - cgssv.o cgssvx.o \ - csp_blas2.o csp_blas3.o cgscon.o clacon.o \ - clangs.o cgsequ.o claqgs.o cpivotgrowth.o \ - cgsrfs.o cgstrf.o cgstrs.o ccopy_to_ucol.o \ - csnode_dfs.o csnode_bmod.o \ - cpanel_dfs.o cpanel_bmod.o creadhb.o \ - ccolumn_dfs.o ccolumn_bmod.o cpivotL.o cpruneL.o \ - cmemory.o cutil.o cmyblas2.o - -ZLUSRC = \ - dcomplex.o dzsum1.o izmax1.o \ - zgssv.o zgssvx.o \ - zsp_blas2.o zsp_blas3.o zgscon.o zlacon.o \ - zlangs.o zgsequ.o zlaqgs.o zpivotgrowth.o \ - zgsrfs.o zgstrf.o zgstrs.o zcopy_to_ucol.o \ - zsnode_dfs.o zsnode_bmod.o \ - zpanel_dfs.o zpanel_bmod.o zreadhb.o \ - zcolumn_dfs.o zcolumn_bmod.o zpivotL.o zpruneL.o \ - zmemory.o zutil.o zmyblas2.o - -all: single double complex complex16 - -single: $(SLUSRC) $(ALLAUX) $(SCLAUX) - $(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) $(SLUSRC) $(ALLAUX) $(SCLAUX) - $(RANLIB) ../$(SUPERLULIB) - -double: $(DLUSRC) $(ALLAUX) $(DZLAUX) - $(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) $(DLUSRC) $(ALLAUX) $(DZLAUX) - $(RANLIB) ../$(SUPERLULIB) - -complex: $(CLUSRC) $(ALLAUX) $(SCLAUX) - $(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) $(CLUSRC) $(ALLAUX) $(SCLAUX) - $(RANLIB) ../$(SUPERLULIB) - -complex16: $(ZLUSRC) $(ALLAUX) $(DZLAUX) - $(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) $(ZLUSRC) $(ALLAUX) $(DZLAUX) - $(RANLIB) ../$(SUPERLULIB) - - -################################## -# Do not optimize these routines # -################################## -slamch.o: slamch.c ; $(CC) -c $(NOOPTS) $< -dlamch.o: dlamch.c ; $(CC) -c $(NOOPTS) $< -superlu_timer.o: superlu_timer.c ; $(CC) -c $(NOOPTS) $< - -.c.o: - $(CC) $(CFLAGS) $(CDEFS) $(BLASDEF) -c $< $(VERBOSE) - -clean: - rm -f *.o ../superlu$(PLAT).a Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ccolumn_bmod.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ccolumn_bmod.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ccolumn_bmod.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,27 +1,29 @@ -/* +/*! @file ccolumn_bmod.c + * \brief performs numeric block updates + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
- */
-/*
-  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ *  Permission is hereby granted to use or copy this program for any
+ *  purpose, provided the above notices are retained on all copies.
+ *  Permission to modify the code and to distribute modified code is
+ *  granted, provided the above notices are retained, and a notice that
+ *  the code was modified is included with the above copyright notice.
+ * 
*/ #include #include -#include "csp_defs.h" +#include "slu_cdefs.h" /* * Function prototypes @@ -32,8 +34,17 @@ -/* Return value: 0 - successful return +/*! \brief + * + *
+ * Purpose:
+ * ========
+ * Performs numeric block updates (sup-col) in topological order.
+ * It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
+ * Special processing on the supernodal portion of L\U[*,j]
+ * Return value:   0 - successful return
  *               > 0 - number of bytes allocated when run out of space
+ * 
*/ int ccolumn_bmod ( @@ -48,14 +59,7 @@ SuperLUStat_t *stat /* output */ ) { -/* - * Purpose: - * ======== - * Performs numeric block updates (sup-col) in topological order. - * It features: col-col, 2cols-col, 3cols-col, and sup-col updates. - * Special processing on the supernodal portion of L\U[*,j] - * - */ + #ifdef _CRAY _fcd ftcs1 = _cptofcd("L", strlen("L")), ftcs2 = _cptofcd("N", strlen("N")), Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ccolumn_dfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ccolumn_dfs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ccolumn_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,50 +1,38 @@ - -/* +/*! @file ccolumn_dfs.c + * \brief Performs a symbolic factorization + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
- */
-/*
-  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -#include "csp_defs.h" +#include "slu_cdefs.h" -/* What type of supernodes we want */ +/*! \brief What type of supernodes we want */ #define T2_SUPER -int -ccolumn_dfs( - const int m, /* in - number of rows in the matrix */ - const int jcol, /* in */ - int *perm_r, /* in */ - int *nseg, /* modified - with new segments appended */ - int *lsub_col, /* in - defines the RHS vector to start the dfs */ - int *segrep, /* modified - with new segments appended */ - int *repfnz, /* modified */ - int *xprune, /* modified */ - int *marker, /* modified */ - int *parent, /* working array */ - int *xplore, /* working array */ - GlobalLU_t *Glu /* modified */ - ) -{ -/* + +/*! \brief + * + *
  * Purpose
  * =======
- *   "column_dfs" performs a symbolic factorization on column jcol, and
+ *   CCOLUMN_DFS performs a symbolic factorization on column jcol, and
  *   decide the supernode boundary.
  *
  *   This routine does not use numeric values, but only use the RHS 
@@ -72,8 +60,25 @@
  * ============
  *     0  success;
  *   > 0  number of bytes allocated when run out of space.
- *
+ * 
*/ +int +ccolumn_dfs( + const int m, /* in - number of rows in the matrix */ + const int jcol, /* in */ + int *perm_r, /* in */ + int *nseg, /* modified - with new segments appended */ + int *lsub_col, /* in - defines the RHS vector to start the dfs */ + int *segrep, /* modified - with new segments appended */ + int *repfnz, /* modified */ + int *xprune, /* modified */ + int *marker, /* modified */ + int *parent, /* working array */ + int *xplore, /* working array */ + GlobalLU_t *Glu /* modified */ + ) +{ + int jcolp1, jcolm1, jsuper, nsuper, nextl; int k, krep, krow, kmark, kperm; int *marker2; /* Used for small panel LU */ Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ccopy_to_ucol.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ccopy_to_ucol.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ccopy_to_ucol.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,27 +1,26 @@ - -/* +/*! @file ccopy_to_ucol.c + * \brief Copy a computed column of U to the compressed data structure + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
  *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "csp_defs.h" -#include "util.h" +#include "slu_cdefs.h" int ccopy_to_ucol( @@ -47,7 +46,6 @@ complex *ucol; int *usub, *xusub; int nzumax; - complex zero = {0.0, 0.0}; xsup = Glu->xsup; Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cdiagonal.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cdiagonal.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cdiagonal.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,133 @@ + +/*! @file cdiagonal.c + * \brief Auxiliary routines to work with diagonal elements + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + +#include "slu_cdefs.h" + +int cfill_diag(int n, NCformat *Astore) +/* fill explicit zeros on the diagonal entries, so that the matrix is not + structurally singular. */ +{ + complex *nzval = (complex *)Astore->nzval; + int *rowind = Astore->rowind; + int *colptr = Astore->colptr; + int nnz = colptr[n]; + int fill = 0; + complex *nzval_new; + complex zero = {1.0, 0.0}; + int *rowind_new; + int i, j, diag; + + for (i = 0; i < n; i++) + { + diag = -1; + for (j = colptr[i]; j < colptr[i + 1]; j++) + if (rowind[j] == i) diag = j; + if (diag < 0) fill++; + } + if (fill) + { + nzval_new = complexMalloc(nnz + fill); + rowind_new = intMalloc(nnz + fill); + fill = 0; + for (i = 0; i < n; i++) + { + diag = -1; + for (j = colptr[i] - fill; j < colptr[i + 1]; j++) + { + if ((rowind_new[j + fill] = rowind[j]) == i) diag = j; + nzval_new[j + fill] = nzval[j]; + } + if (diag < 0) + { + rowind_new[colptr[i + 1] + fill] = i; + nzval_new[colptr[i + 1] + fill] = zero; + fill++; + } + colptr[i + 1] += fill; + } + Astore->nzval = nzval_new; + Astore->rowind = rowind_new; + SUPERLU_FREE(nzval); + SUPERLU_FREE(rowind); + } + Astore->nnz += fill; + return fill; +} + +int cdominate(int n, NCformat *Astore) +/* make the matrix diagonally dominant */ +{ + complex *nzval = (complex *)Astore->nzval; + int *rowind = Astore->rowind; + int *colptr = Astore->colptr; + int nnz = colptr[n]; + int fill = 0; + complex *nzval_new; + int *rowind_new; + int i, j, diag; + double s; + + for (i = 0; i < n; i++) + { + diag = -1; + for (j = colptr[i]; j < colptr[i + 1]; j++) + if (rowind[j] == i) diag = j; + if (diag < 0) fill++; + } + if (fill) + { + nzval_new = complexMalloc(nnz + fill); + rowind_new = intMalloc(nnz+ fill); + fill = 0; + for (i = 0; i < n; i++) + { + s = 1e-6; + diag = -1; + for (j = colptr[i] - fill; j < colptr[i + 1]; j++) + { + if ((rowind_new[j + fill] = rowind[j]) == i) diag = j; + nzval_new[j + fill] = nzval[j]; + s += c_abs1(&nzval_new[j + fill]); + } + if (diag >= 0) { + nzval_new[diag+fill].r = s * 3.0; + nzval_new[diag+fill].i = 0.0; + } else { + rowind_new[colptr[i + 1] + fill] = i; + nzval_new[colptr[i + 1] + fill].r = s * 3.0; + nzval_new[colptr[i + 1] + fill].i = 0.0; + fill++; + } + colptr[i + 1] += fill; + } + Astore->nzval = nzval_new; + Astore->rowind = rowind_new; + SUPERLU_FREE(nzval); + SUPERLU_FREE(rowind); + } + else + { + for (i = 0; i < n; i++) + { + s = 1e-6; + diag = -1; + for (j = colptr[i]; j < colptr[i + 1]; j++) + { + if (rowind[j] == i) diag = j; + s += c_abs1(&nzval[j]); + } + nzval[diag].r = s * 3.0; + nzval[diag].i = 0.0; + } + } + Astore->nnz += fill; + return fill; +} Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgscon.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgscon.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgscon.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,70 +1,81 @@ -/* +/*! @file cgscon.c + * \brief Estimates reciprocal of the condition number of a general matrix + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Modified from lapack routines CGECON.
+ * 
*/ + /* * File name: cgscon.c * History: Modified from lapack routines CGECON. */ #include -#include "csp_defs.h" +#include "slu_cdefs.h" +/*! \brief + * + *
+ *   Purpose   
+ *   =======   
+ *
+ *   CGSCON estimates the reciprocal of the condition number of a general 
+ *   real matrix A, in either the 1-norm or the infinity-norm, using   
+ *   the LU factorization computed by CGETRF.   *
+ *
+ *   An estimate is obtained for norm(inv(A)), and the reciprocal of the   
+ *   condition number is computed as   
+ *      RCOND = 1 / ( norm(A) * norm(inv(A)) ).   
+ *
+ *   See supermatrix.h for the definition of 'SuperMatrix' structure.
+ * 
+ *   Arguments   
+ *   =========   
+ *
+ *    NORM    (input) char*
+ *            Specifies whether the 1-norm condition number or the   
+ *            infinity-norm condition number is required:   
+ *            = '1' or 'O':  1-norm;   
+ *            = 'I':         Infinity-norm.
+ *	    
+ *    L       (input) SuperMatrix*
+ *            The factor L from the factorization Pr*A*Pc=L*U as computed by
+ *            cgstrf(). Use compressed row subscripts storage for supernodes,
+ *            i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
+ * 
+ *    U       (input) SuperMatrix*
+ *            The factor U from the factorization Pr*A*Pc=L*U as computed by
+ *            cgstrf(). Use column-wise storage scheme, i.e., U has types:
+ *            Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
+ *	    
+ *    ANORM   (input) float
+ *            If NORM = '1' or 'O', the 1-norm of the original matrix A.   
+ *            If NORM = 'I', the infinity-norm of the original matrix A.
+ *	    
+ *    RCOND   (output) float*
+ *           The reciprocal of the condition number of the matrix A,   
+ *           computed as RCOND = 1/(norm(A) * norm(inv(A))).
+ *	    
+ *    INFO    (output) int*
+ *           = 0:  successful exit   
+ *           < 0:  if INFO = -i, the i-th argument had an illegal value   
+ *
+ *    ===================================================================== 
+ * 
+ */ + void cgscon(char *norm, SuperMatrix *L, SuperMatrix *U, float anorm, float *rcond, SuperLUStat_t *stat, int *info) { -/* - Purpose - ======= - CGSCON estimates the reciprocal of the condition number of a general - real matrix A, in either the 1-norm or the infinity-norm, using - the LU factorization computed by CGETRF. - An estimate is obtained for norm(inv(A)), and the reciprocal of the - condition number is computed as - RCOND = 1 / ( norm(A) * norm(inv(A)) ). - - See supermatrix.h for the definition of 'SuperMatrix' structure. - - Arguments - ========= - - NORM (input) char* - Specifies whether the 1-norm condition number or the - infinity-norm condition number is required: - = '1' or 'O': 1-norm; - = 'I': Infinity-norm. - - L (input) SuperMatrix* - The factor L from the factorization Pr*A*Pc=L*U as computed by - cgstrf(). Use compressed row subscripts storage for supernodes, - i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU. - - U (input) SuperMatrix* - The factor U from the factorization Pr*A*Pc=L*U as computed by - cgstrf(). Use column-wise storage scheme, i.e., U has types: - Stype = SLU_NC, Dtype = SLU_C, Mtype = TRU. - - ANORM (input) float - If NORM = '1' or 'O', the 1-norm of the original matrix A. - If NORM = 'I', the infinity-norm of the original matrix A. - - RCOND (output) float* - The reciprocal of the condition number of the matrix A, - computed as RCOND = 1/(norm(A) * norm(inv(A))). - - INFO (output) int* - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== -*/ - /* Local variables */ int kase, kase1, onenrm, i; float ainvnm; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsequ.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsequ.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsequ.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,82 +1,91 @@ - -/* +/*! @file cgsequ.c + * \brief Computes row and column scalings + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Modified from LAPACK routine CGEEQU
+ * 
*/ /* * File name: cgsequ.c * History: Modified from LAPACK routine CGEEQU */ #include -#include "csp_defs.h" -#include "util.h" +#include "slu_cdefs.h" + + +/*! \brief + * + *
+ * Purpose   
+ *   =======   
+ *
+ *   CGSEQU computes row and column scalings intended to equilibrate an   
+ *   M-by-N sparse matrix A and reduce its condition number. R returns the row
+ *   scale factors and C the column scale factors, chosen to try to make   
+ *   the largest element in each row and column of the matrix B with   
+ *   elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.   
+ *
+ *   R(i) and C(j) are restricted to be between SMLNUM = smallest safe   
+ *   number and BIGNUM = largest safe number.  Use of these scaling   
+ *   factors is not guaranteed to reduce the condition number of A but   
+ *   works well in practice.   
+ *
+ *   See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ *   Arguments   
+ *   =========   
+ *
+ *   A       (input) SuperMatrix*
+ *           The matrix of dimension (A->nrow, A->ncol) whose equilibration
+ *           factors are to be computed. The type of A can be:
+ *           Stype = SLU_NC; Dtype = SLU_C; Mtype = SLU_GE.
+ *	    
+ *   R       (output) float*, size A->nrow
+ *           If INFO = 0 or INFO > M, R contains the row scale factors   
+ *           for A.
+ *	    
+ *   C       (output) float*, size A->ncol
+ *           If INFO = 0,  C contains the column scale factors for A.
+ *	    
+ *   ROWCND  (output) float*
+ *           If INFO = 0 or INFO > M, ROWCND contains the ratio of the   
+ *           smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and   
+ *           AMAX is neither too large nor too small, it is not worth   
+ *           scaling by R.
+ *	    
+ *   COLCND  (output) float*
+ *           If INFO = 0, COLCND contains the ratio of the smallest   
+ *           C(i) to the largest C(i).  If COLCND >= 0.1, it is not   
+ *           worth scaling by C.
+ *	    
+ *   AMAX    (output) float*
+ *           Absolute value of largest matrix element.  If AMAX is very   
+ *           close to overflow or very close to underflow, the matrix   
+ *           should be scaled.
+ *	    
+ *   INFO    (output) int*
+ *           = 0:  successful exit   
+ *           < 0:  if INFO = -i, the i-th argument had an illegal value   
+ *           > 0:  if INFO = i,  and i is   
+ *                 <= A->nrow:  the i-th row of A is exactly zero   
+ *                 >  A->ncol:  the (i-M)-th column of A is exactly zero   
+ *
+ *   ===================================================================== 
+ * 
+ */ void cgsequ(SuperMatrix *A, float *r, float *c, float *rowcnd, float *colcnd, float *amax, int *info) { -/* - Purpose - ======= - CGSEQU computes row and column scalings intended to equilibrate an - M-by-N sparse matrix A and reduce its condition number. R returns the row - scale factors and C the column scale factors, chosen to try to make - the largest element in each row and column of the matrix B with - elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. - R(i) and C(j) are restricted to be between SMLNUM = smallest safe - number and BIGNUM = largest safe number. Use of these scaling - factors is not guaranteed to reduce the condition number of A but - works well in practice. - - See supermatrix.h for the definition of 'SuperMatrix' structure. - - Arguments - ========= - - A (input) SuperMatrix* - The matrix of dimension (A->nrow, A->ncol) whose equilibration - factors are to be computed. The type of A can be: - Stype = SLU_NC; Dtype = SLU_C; Mtype = SLU_GE. - - R (output) float*, size A->nrow - If INFO = 0 or INFO > M, R contains the row scale factors - for A. - - C (output) float*, size A->ncol - If INFO = 0, C contains the column scale factors for A. - - ROWCND (output) float* - If INFO = 0 or INFO > M, ROWCND contains the ratio of the - smallest R(i) to the largest R(i). If ROWCND >= 0.1 and - AMAX is neither too large nor too small, it is not worth - scaling by R. - - COLCND (output) float* - If INFO = 0, COLCND contains the ratio of the smallest - C(i) to the largest C(i). If COLCND >= 0.1, it is not - worth scaling by C. - - AMAX (output) float* - Absolute value of largest matrix element. If AMAX is very - close to overflow or very close to underflow, the matrix - should be scaled. - - INFO (output) int* - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, and i is - <= A->nrow: the i-th row of A is exactly zero - > A->ncol: the (i-M)-th column of A is exactly zero - - ===================================================================== -*/ - /* Local variables */ NCformat *Astore; complex *Aval; @@ -118,7 +127,7 @@ for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; - r[irow] = SUPERLU_MAX( r[irow], slu_c_abs1(&Aval[i]) ); + r[irow] = SUPERLU_MAX( r[irow], c_abs1(&Aval[i]) ); } /* Find the maximum and minimum scale factors. */ @@ -153,7 +162,7 @@ for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; - c[j] = SUPERLU_MAX( c[j], slu_c_abs1(&Aval[i]) * r[irow] ); + c[j] = SUPERLU_MAX( c[j], c_abs1(&Aval[i]) * r[irow] ); } /* Find the maximum and minimum scale factors. */ Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsisx.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsisx.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsisx.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,693 @@ + +/*! @file cgsisx.c + * \brief Gives the approximate solutions of linear equations A*X=B or A'*X=B + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
+ */ +#include "slu_cdefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ * CGSISX gives the approximate solutions of linear equations A*X=B or A'*X=B,
+ * using the ILU factorization from cgsitrf(). An estimation of
+ * the condition number is provided. It performs the following steps:
+ *
+ *   1. If A is stored column-wise (A->Stype = SLU_NC):
+ *  
+ *	1.1. If options->Equil = YES or options->RowPerm = LargeDiag, scaling
+ *	     factors are computed to equilibrate the system:
+ *	     options->Trans = NOTRANS:
+ *		 diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
+ *	     options->Trans = TRANS:
+ *		 (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
+ *	     options->Trans = CONJ:
+ *		 (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
+ *	     Whether or not the system will be equilibrated depends on the
+ *	     scaling of the matrix A, but if equilibration is used, A is
+ *	     overwritten by diag(R)*A*diag(C) and B by diag(R)*B
+ *	     (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans
+ *	     = TRANS or CONJ).
+ *
+ *	1.2. Permute columns of A, forming A*Pc, where Pc is a permutation
+ *	     matrix that usually preserves sparsity.
+ *	     For more details of this step, see sp_preorder.c.
+ *
+ *	1.3. If options->Fact != FACTORED, the LU decomposition is used to
+ *	     factor the matrix A (after equilibration if options->Equil = YES)
+ *	     as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
+ *
+ *	1.4. Compute the reciprocal pivot growth factor.
+ *
+ *	1.5. If some U(i,i) = 0, so that U is exactly singular, then the
+ *	     routine fills a small number on the diagonal entry, that is
+ *		U(i,i) = ||A(:,i)||_oo * options->ILU_FillTol ** (1 - i / n),
+ *	     and info will be increased by 1. The factored form of A is used
+ *	     to estimate the condition number of the preconditioner. If the
+ *	     reciprocal of the condition number is less than machine precision,
+ *	     info = A->ncol+1 is returned as a warning, but the routine still
+ *	     goes on to solve for X.
+ *
+ *	1.6. The system of equations is solved for X using the factored form
+ *	     of A.
+ *
+ *	1.7. options->IterRefine is not used
+ *
+ *	1.8. If equilibration was used, the matrix X is premultiplied by
+ *	     diag(C) (if options->Trans = NOTRANS) or diag(R)
+ *	     (if options->Trans = TRANS or CONJ) so that it solves the
+ *	     original system before equilibration.
+ *
+ *	1.9. options for ILU only
+ *	     1) If options->RowPerm = LargeDiag, MC64 is used to scale and
+ *		permute the matrix to an I-matrix, that is Pr*Dr*A*Dc has
+ *		entries of modulus 1 on the diagonal and off-diagonal entries
+ *		of modulus at most 1. If MC64 fails, dgsequ() is used to
+ *		equilibrate the system.
+ *	     2) options->ILU_DropTol = tau is the threshold for dropping.
+ *		For L, it is used directly (for the whole row in a supernode);
+ *		For U, ||A(:,i)||_oo * tau is used as the threshold
+ *	        for the	i-th column.
+ *		If a secondary dropping rule is required, tau will
+ *	        also be used to compute the second threshold.
+ *	     3) options->ILU_FillFactor = gamma, used as the initial guess
+ *		of memory growth.
+ *		If a secondary dropping rule is required, it will also
+ *              be used as an upper bound of the memory.
+ *	     4) options->ILU_DropRule specifies the dropping rule.
+ *		Option		Explanation
+ *		======		===========
+ *		DROP_BASIC:	Basic dropping rule, supernodal based ILU.
+ *		DROP_PROWS:	Supernodal based ILUTP, p = gamma * nnz(A) / n.
+ *		DROP_COLUMN:	Variation of ILUTP, for j-th column,
+ *				p = gamma * nnz(A(:,j)).
+ *		DROP_AREA;	Variation of ILUTP, for j-th column, use
+ *				nnz(F(:,1:j)) / nnz(A(:,1:j)) to control the
+ *				memory.
+ *		DROP_DYNAMIC:	Modify the threshold tau during the
+ *				factorizaion.
+ *				If nnz(L(:,1:j)) / nnz(A(:,1:j)) < gamma
+ *				    tau_L(j) := MIN(1, tau_L(j-1) * 2);
+ *				Otherwise
+ *				    tau_L(j) := MIN(1, tau_L(j-1) * 2);
+ *				tau_U(j) uses the similar rule.
+ *				NOTE: the thresholds used by L and U are
+ *				indenpendent.
+ *		DROP_INTERP:	Compute the second dropping threshold by
+ *				interpolation instead of sorting (default).
+ *				In this case, the actual fill ratio is not
+ *				guaranteed smaller than gamma.
+ *		DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive.
+ *		( The default option is DROP_BASIC | DROP_AREA. )
+ *	     5) options->ILU_Norm is the criterion of computing the average
+ *		value of a row in L.
+ *		options->ILU_Norm	average(x[1:n])
+ *		=================	===============
+ *		ONE_NORM		||x||_1 / n
+ *		TWO_NORM		||x||_2 / sqrt(n)
+ *		INF_NORM		max{|x[i]|}
+ *	     6) options->ILU_MILU specifies the type of MILU's variation.
+ *		= SILU (default): do not perform MILU;
+ *		= SMILU_1 (not recommended):
+ *		    U(i,i) := U(i,i) + sum(dropped entries);
+ *		= SMILU_2:
+ *		    U(i,i) := U(i,i) + SGN(U(i,i)) * sum(dropped entries);
+ *		= SMILU_3:
+ *		    U(i,i) := U(i,i) + SGN(U(i,i)) * sum(|dropped entries|);
+ *		NOTE: Even SMILU_1 does not preserve the column sum because of
+ *		late dropping.
+ *	     7) options->ILU_FillTol is used as the perturbation when
+ *		encountering zero pivots. If some U(i,i) = 0, so that U is
+ *		exactly singular, then
+ *		   U(i,i) := ||A(:,i)|| * options->ILU_FillTol ** (1 - i / n).
+ *
+ *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
+ *	to the transpose of A:
+ *
+ *	2.1. If options->Equil = YES or options->RowPerm = LargeDiag, scaling
+ *	     factors are computed to equilibrate the system:
+ *	     options->Trans = NOTRANS:
+ *		 diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
+ *	     options->Trans = TRANS:
+ *		 (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
+ *	     options->Trans = CONJ:
+ *		 (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
+ *	     Whether or not the system will be equilibrated depends on the
+ *	     scaling of the matrix A, but if equilibration is used, A' is
+ *	     overwritten by diag(R)*A'*diag(C) and B by diag(R)*B
+ *	     (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
+ *
+ *	2.2. Permute columns of transpose(A) (rows of A),
+ *	     forming transpose(A)*Pc, where Pc is a permutation matrix that
+ *	     usually preserves sparsity.
+ *	     For more details of this step, see sp_preorder.c.
+ *
+ *	2.3. If options->Fact != FACTORED, the LU decomposition is used to
+ *	     factor the transpose(A) (after equilibration if
+ *	     options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the
+ *	     permutation Pr determined by partial pivoting.
+ *
+ *	2.4. Compute the reciprocal pivot growth factor.
+ *
+ *	2.5. If some U(i,i) = 0, so that U is exactly singular, then the
+ *	     routine fills a small number on the diagonal entry, that is
+ *		 U(i,i) = ||A(:,i)||_oo * options->ILU_FillTol ** (1 - i / n).
+ *	     And info will be increased by 1. The factored form of A is used
+ *	     to estimate the condition number of the preconditioner. If the
+ *	     reciprocal of the condition number is less than machine precision,
+ *	     info = A->ncol+1 is returned as a warning, but the routine still
+ *	     goes on to solve for X.
+ *
+ *	2.6. The system of equations is solved for X using the factored form
+ *	     of transpose(A).
+ *
+ *	2.7. If options->IterRefine is not used.
+ *
+ *	2.8. If equilibration was used, the matrix X is premultiplied by
+ *	     diag(C) (if options->Trans = NOTRANS) or diag(R)
+ *	     (if options->Trans = TRANS or CONJ) so that it solves the
+ *	     original system before equilibration.
+ *
+ *   See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ * Arguments
+ * =========
+ *
+ * options (input) superlu_options_t*
+ *	   The structure defines the input parameters to control
+ *	   how the LU decomposition will be performed and how the
+ *	   system will be solved.
+ *
+ * A	   (input/output) SuperMatrix*
+ *	   Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
+ *	   of the linear equations is A->nrow. Currently, the type of A can be:
+ *	   Stype = SLU_NC or SLU_NR, Dtype = SLU_C, Mtype = SLU_GE.
+ *	   In the future, more general A may be handled.
+ *
+ *	   On entry, If options->Fact = FACTORED and equed is not 'N',
+ *	   then A must have been equilibrated by the scaling factors in
+ *	   R and/or C.
+ *	   On exit, A is not modified if options->Equil = NO, or if
+ *	   options->Equil = YES but equed = 'N' on exit.
+ *	   Otherwise, if options->Equil = YES and equed is not 'N',
+ *	   A is scaled as follows:
+ *	   If A->Stype = SLU_NC:
+ *	     equed = 'R':  A := diag(R) * A
+ *	     equed = 'C':  A := A * diag(C)
+ *	     equed = 'B':  A := diag(R) * A * diag(C).
+ *	   If A->Stype = SLU_NR:
+ *	     equed = 'R':  transpose(A) := diag(R) * transpose(A)
+ *	     equed = 'C':  transpose(A) := transpose(A) * diag(C)
+ *	     equed = 'B':  transpose(A) := diag(R) * transpose(A) * diag(C).
+ *
+ * perm_c  (input/output) int*
+ *	   If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
+ *	   which defines the permutation matrix Pc; perm_c[i] = j means
+ *	   column i of A is in position j in A*Pc.
+ *	   On exit, perm_c may be overwritten by the product of the input
+ *	   perm_c and a permutation that postorders the elimination tree
+ *	   of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
+ *	   is already in postorder.
+ *
+ *	   If A->Stype = SLU_NR, column permutation vector of size A->nrow,
+ *	   which describes permutation of columns of transpose(A) 
+ *	   (rows of A) as described above.
+ *
+ * perm_r  (input/output) int*
+ *	   If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
+ *	   which defines the permutation matrix Pr, and is determined
+ *	   by partial pivoting.  perm_r[i] = j means row i of A is in 
+ *	   position j in Pr*A.
+ *
+ *	   If A->Stype = SLU_NR, permutation vector of size A->ncol, which
+ *	   determines permutation of rows of transpose(A)
+ *	   (columns of A) as described above.
+ *
+ *	   If options->Fact = SamePattern_SameRowPerm, the pivoting routine
+ *	   will try to use the input perm_r, unless a certain threshold
+ *	   criterion is violated. In that case, perm_r is overwritten by a
+ *	   new permutation determined by partial pivoting or diagonal
+ *	   threshold pivoting.
+ *	   Otherwise, perm_r is output argument.
+ *
+ * etree   (input/output) int*,  dimension (A->ncol)
+ *	   Elimination tree of Pc'*A'*A*Pc.
+ *	   If options->Fact != FACTORED and options->Fact != DOFACT,
+ *	   etree is an input argument, otherwise it is an output argument.
+ *	   Note: etree is a vector of parent pointers for a forest whose
+ *	   vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
+ *
+ * equed   (input/output) char*
+ *	   Specifies the form of equilibration that was done.
+ *	   = 'N': No equilibration.
+ *	   = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
+ *	   = 'C': Column equilibration, i.e., A was postmultiplied by diag(C).
+ *	   = 'B': Both row and column equilibration, i.e., A was replaced 
+ *		  by diag(R)*A*diag(C).
+ *	   If options->Fact = FACTORED, equed is an input argument,
+ *	   otherwise it is an output argument.
+ *
+ * R	   (input/output) float*, dimension (A->nrow)
+ *	   The row scale factors for A or transpose(A).
+ *	   If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
+ *	       (if A->Stype = SLU_NR) is multiplied on the left by diag(R).
+ *	   If equed = 'N' or 'C', R is not accessed.
+ *	   If options->Fact = FACTORED, R is an input argument,
+ *	       otherwise, R is output.
+ *	   If options->zFact = FACTORED and equed = 'R' or 'B', each element
+ *	       of R must be positive.
+ *
+ * C	   (input/output) float*, dimension (A->ncol)
+ *	   The column scale factors for A or transpose(A).
+ *	   If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
+ *	       (if A->Stype = SLU_NR) is multiplied on the right by diag(C).
+ *	   If equed = 'N' or 'R', C is not accessed.
+ *	   If options->Fact = FACTORED, C is an input argument,
+ *	       otherwise, C is output.
+ *	   If options->Fact = FACTORED and equed = 'C' or 'B', each element
+ *	       of C must be positive.
+ *
+ * L	   (output) SuperMatrix*
+ *	   The factor L from the factorization
+ *	       Pr*A*Pc=L*U		(if A->Stype SLU_= NC) or
+ *	       Pr*transpose(A)*Pc=L*U	(if A->Stype = SLU_NR).
+ *	   Uses compressed row subscripts storage for supernodes, i.e.,
+ *	   L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
+ *
+ * U	   (output) SuperMatrix*
+ *	   The factor U from the factorization
+ *	       Pr*A*Pc=L*U		(if A->Stype = SLU_NC) or
+ *	       Pr*transpose(A)*Pc=L*U	(if A->Stype = SLU_NR).
+ *	   Uses column-wise storage scheme, i.e., U has types:
+ *	   Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
+ *
+ * work    (workspace/output) void*, size (lwork) (in bytes)
+ *	   User supplied workspace, should be large enough
+ *	   to hold data structures for factors L and U.
+ *	   On exit, if fact is not 'F', L and U point to this array.
+ *
+ * lwork   (input) int
+ *	   Specifies the size of work array in bytes.
+ *	   = 0:  allocate space internally by system malloc;
+ *	   > 0:  use user-supplied work array of length lwork in bytes,
+ *		 returns error if space runs out.
+ *	   = -1: the routine guesses the amount of space needed without
+ *		 performing the factorization, and returns it in
+ *		 mem_usage->total_needed; no other side effects.
+ *
+ *	   See argument 'mem_usage' for memory usage statistics.
+ *
+ * B	   (input/output) SuperMatrix*
+ *	   B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
+ *	   On entry, the right hand side matrix.
+ *	   If B->ncol = 0, only LU decomposition is performed, the triangular
+ *			   solve is skipped.
+ *	   On exit,
+ *	      if equed = 'N', B is not modified; otherwise
+ *	      if A->Stype = SLU_NC:
+ *		 if options->Trans = NOTRANS and equed = 'R' or 'B',
+ *		    B is overwritten by diag(R)*B;
+ *		 if options->Trans = TRANS or CONJ and equed = 'C' of 'B',
+ *		    B is overwritten by diag(C)*B;
+ *	      if A->Stype = SLU_NR:
+ *		 if options->Trans = NOTRANS and equed = 'C' or 'B',
+ *		    B is overwritten by diag(C)*B;
+ *		 if options->Trans = TRANS or CONJ and equed = 'R' of 'B',
+ *		    B is overwritten by diag(R)*B.
+ *
+ * X	   (output) SuperMatrix*
+ *	   X has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
+ *	   If info = 0 or info = A->ncol+1, X contains the solution matrix
+ *	   to the original system of equations. Note that A and B are modified
+ *	   on exit if equed is not 'N', and the solution to the equilibrated
+ *	   system is inv(diag(C))*X if options->Trans = NOTRANS and
+ *	   equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C'
+ *	   and equed = 'R' or 'B'.
+ *
+ * recip_pivot_growth (output) float*
+ *	   The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ).
+ *	   The infinity norm is used. If recip_pivot_growth is much less
+ *	   than 1, the stability of the LU factorization could be poor.
+ *
+ * rcond   (output) float*
+ *	   The estimate of the reciprocal condition number of the matrix A
+ *	   after equilibration (if done). If rcond is less than the machine
+ *	   precision (in particular, if rcond = 0), the matrix is singular
+ *	   to working precision. This condition is indicated by a return
+ *	   code of info > 0.
+ *
+ * mem_usage (output) mem_usage_t*
+ *	   Record the memory usage statistics, consisting of following fields:
+ *	   - for_lu (float)
+ *	     The amount of space used in bytes for L\U data structures.
+ *	   - total_needed (float)
+ *	     The amount of space needed in bytes to perform factorization.
+ *	   - expansions (int)
+ *	     The number of memory expansions during the LU factorization.
+ *
+ * stat   (output) SuperLUStat_t*
+ *	  Record the statistics on runtime and floating-point operation count.
+ *	  See slu_util.h for the definition of 'SuperLUStat_t'.
+ *
+ * info    (output) int*
+ *	   = 0: successful exit
+ *	   < 0: if info = -i, the i-th argument had an illegal value
+ *	   > 0: if info = i, and i is
+ *		<= A->ncol: number of zero pivots. They are replaced by small
+ *		      entries due to options->ILU_FillTol.
+ *		= A->ncol+1: U is nonsingular, but RCOND is less than machine
+ *		      precision, meaning that the matrix is singular to
+ *		      working precision. Nevertheless, the solution and
+ *		      error bounds are computed because there are a number
+ *		      of situations where the computed solution can be more
+ *		      accurate than the value of RCOND would suggest.
+ *		> A->ncol+1: number of bytes allocated when memory allocation
+ *		      failure occurred, plus A->ncol.
+ * 
+ */ + +void +cgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, + int *etree, char *equed, float *R, float *C, + SuperMatrix *L, SuperMatrix *U, void *work, int lwork, + SuperMatrix *B, SuperMatrix *X, + float *recip_pivot_growth, float *rcond, + mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info) +{ + + DNformat *Bstore, *Xstore; + complex *Bmat, *Xmat; + int ldb, ldx, nrhs; + SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/ + SuperMatrix AC; /* Matrix postmultiplied by Pc */ + int colequ, equil, nofact, notran, rowequ, permc_spec, mc64; + trans_t trant; + char norm[1]; + int i, j, info1; + float amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; + int relax, panel_size; + float diag_pivot_thresh; + double t0; /* temporary time */ + double *utime; + + int *perm = NULL; + + /* External functions */ + extern float clangs(char *, SuperMatrix *); + + Bstore = B->Store; + Xstore = X->Store; + Bmat = Bstore->nzval; + Xmat = Xstore->nzval; + ldb = Bstore->lda; + ldx = Xstore->lda; + nrhs = B->ncol; + + *info = 0; + nofact = (options->Fact != FACTORED); + equil = (options->Equil == YES); + notran = (options->Trans == NOTRANS); + mc64 = (options->RowPerm == LargeDiag); + if ( nofact ) { + *(unsigned char *)equed = 'N'; + rowequ = FALSE; + colequ = FALSE; + } else { + rowequ = lsame_(equed, "R") || lsame_(equed, "B"); + colequ = lsame_(equed, "C") || lsame_(equed, "B"); + smlnum = slamch_("Safe minimum"); + bignum = 1. / smlnum; + } + + /* Test the input parameters */ + if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern && + options->Fact != SamePattern_SameRowPerm && + !notran && options->Trans != TRANS && options->Trans != CONJ && + !equil && options->Equil != NO) + *info = -1; + else if ( A->nrow != A->ncol || A->nrow < 0 || + (A->Stype != SLU_NC && A->Stype != SLU_NR) || + A->Dtype != SLU_C || A->Mtype != SLU_GE ) + *info = -2; + else if (options->Fact == FACTORED && + !(rowequ || colequ || lsame_(equed, "N"))) + *info = -6; + else { + if (rowequ) { + rcmin = bignum; + rcmax = 0.; + for (j = 0; j < A->nrow; ++j) { + rcmin = SUPERLU_MIN(rcmin, R[j]); + rcmax = SUPERLU_MAX(rcmax, R[j]); + } + if (rcmin <= 0.) *info = -7; + else if ( A->nrow > 0) + rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); + else rowcnd = 1.; + } + if (colequ && *info == 0) { + rcmin = bignum; + rcmax = 0.; + for (j = 0; j < A->nrow; ++j) { + rcmin = SUPERLU_MIN(rcmin, C[j]); + rcmax = SUPERLU_MAX(rcmax, C[j]); + } + if (rcmin <= 0.) *info = -8; + else if (A->nrow > 0) + colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); + else colcnd = 1.; + } + if (*info == 0) { + if ( lwork < -1 ) *info = -12; + else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || + B->Stype != SLU_DN || B->Dtype != SLU_C || + B->Mtype != SLU_GE ) + *info = -13; + else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) || + (B->ncol != 0 && B->ncol != X->ncol) || + X->Stype != SLU_DN || + X->Dtype != SLU_C || X->Mtype != SLU_GE ) + *info = -14; + } + } + if (*info != 0) { + i = -(*info); + xerbla_("cgsisx", &i); + return; + } + + /* Initialization for factor parameters */ + panel_size = sp_ienv(1); + relax = sp_ienv(2); + diag_pivot_thresh = options->DiagPivotThresh; + + utime = stat->utime; + + /* Convert A to SLU_NC format when necessary. */ + if ( A->Stype == SLU_NR ) { + NRformat *Astore = A->Store; + AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); + cCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, + Astore->nzval, Astore->colind, Astore->rowptr, + SLU_NC, A->Dtype, A->Mtype); + if ( notran ) { /* Reverse the transpose argument. */ + trant = TRANS; + notran = 0; + } else { + trant = NOTRANS; + notran = 1; + } + } else { /* A->Stype == SLU_NC */ + trant = options->Trans; + AA = A; + } + + if ( nofact ) { + register int i, j; + NCformat *Astore = AA->Store; + int nnz = Astore->nnz; + int *colptr = Astore->colptr; + int *rowind = Astore->rowind; + complex *nzval = (complex *)Astore->nzval; + int n = AA->nrow; + + if ( mc64 ) { + *equed = 'B'; + rowequ = colequ = 1; + t0 = SuperLU_timer_(); + if ((perm = intMalloc(n)) == NULL) + ABORT("SUPERLU_MALLOC fails for perm[]"); + + info1 = cldperm(5, n, nnz, colptr, rowind, nzval, perm, R, C); + + if (info1 > 0) { /* MC64 fails, call cgsequ() later */ + mc64 = 0; + SUPERLU_FREE(perm); + perm = NULL; + } else { + for (i = 0; i < n; i++) { + R[i] = exp(R[i]); + C[i] = exp(C[i]); + } + /* permute and scale the matrix */ + for (j = 0; j < n; j++) { + for (i = colptr[j]; i < colptr[j + 1]; i++) { + cs_mult(&nzval[i], &nzval[i], R[rowind[i]] * C[j]); + rowind[i] = perm[rowind[i]]; + } + } + } + utime[EQUIL] = SuperLU_timer_() - t0; + } + if ( !mc64 & equil ) { + t0 = SuperLU_timer_(); + /* Compute row and column scalings to equilibrate the matrix A. */ + cgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1); + + if ( info1 == 0 ) { + /* Equilibrate matrix A. */ + claqgs(AA, R, C, rowcnd, colcnd, amax, equed); + rowequ = lsame_(equed, "R") || lsame_(equed, "B"); + colequ = lsame_(equed, "C") || lsame_(equed, "B"); + } + utime[EQUIL] = SuperLU_timer_() - t0; + } + } + + if ( nrhs > 0 ) { + /* Scale the right hand side if equilibration was performed. */ + if ( notran ) { + if ( rowequ ) { + for (j = 0; j < nrhs; ++j) + for (i = 0; i < A->nrow; ++i) { + cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], R[i]); + } + } + } else if ( colequ ) { + for (j = 0; j < nrhs; ++j) + for (i = 0; i < A->nrow; ++i) { + cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], C[i]); + } + } + } + + if ( nofact ) { + + t0 = SuperLU_timer_(); + /* + * Gnet column permutation vector perm_c[], according to permc_spec: + * permc_spec = NATURAL: natural ordering + * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A + * permc_spec = MMD_ATA: minimum degree on structure of A'*A + * permc_spec = COLAMD: approximate minimum degree column ordering + * permc_spec = MY_PERMC: the ordering already supplied in perm_c[] + */ + permc_spec = options->ColPerm; + if ( permc_spec != MY_PERMC && options->Fact == DOFACT ) + get_perm_c(permc_spec, AA, perm_c); + utime[COLPERM] = SuperLU_timer_() - t0; + + t0 = SuperLU_timer_(); + sp_preorder(options, AA, perm_c, etree, &AC); + utime[ETREE] = SuperLU_timer_() - t0; + + /* Compute the LU factorization of A*Pc. */ + t0 = SuperLU_timer_(); + cgsitrf(options, &AC, relax, panel_size, etree, work, lwork, + perm_c, perm_r, L, U, stat, info); + utime[FACT] = SuperLU_timer_() - t0; + + if ( lwork == -1 ) { + mem_usage->total_needed = *info - A->ncol; + return; + } + } + + if ( options->PivotGrowth ) { + if ( *info > 0 ) return; + + /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */ + *recip_pivot_growth = cPivotGrowth(A->ncol, AA, perm_c, L, U); + } + + if ( options->ConditionNumber ) { + /* Estimate the reciprocal of the condition number of A. */ + t0 = SuperLU_timer_(); + if ( notran ) { + *(unsigned char *)norm = '1'; + } else { + *(unsigned char *)norm = 'I'; + } + anorm = clangs(norm, AA); + cgscon(norm, L, U, anorm, rcond, stat, &info1); + utime[RCOND] = SuperLU_timer_() - t0; + } + + if ( nrhs > 0 ) { + /* Compute the solution matrix X. */ + for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */ + for (i = 0; i < B->nrow; i++) + Xmat[i + j*ldx] = Bmat[i + j*ldb]; + + t0 = SuperLU_timer_(); + cgstrs (trant, L, U, perm_c, perm_r, X, stat, &info1); + utime[SOLVE] = SuperLU_timer_() - t0; + + /* Transform the solution matrix X to a solution of the original + system. */ + if ( notran ) { + if ( colequ ) { + for (j = 0; j < nrhs; ++j) + for (i = 0; i < A->nrow; ++i) { + cs_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], C[i]); + } + } + } else { + if ( rowequ ) { + if (perm) { + complex *tmp; + int n = A->nrow; + + if ((tmp = complexMalloc(n)) == NULL) + ABORT("SUPERLU_MALLOC fails for tmp[]"); + for (j = 0; j < nrhs; j++) { + for (i = 0; i < n; i++) + tmp[i] = Xmat[i + j * ldx]; /*dcopy*/ + for (i = 0; i < n; i++) + cs_mult(&Xmat[i+j*ldx], &tmp[perm[i]], R[i]); + } + SUPERLU_FREE(tmp); + } else { + for (j = 0; j < nrhs; ++j) + for (i = 0; i < A->nrow; ++i) { + cs_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], R[i]); + } + } + } + } + } /* end if nrhs > 0 */ + + if ( options->ConditionNumber ) { + /* Set INFO = A->ncol+1 if the matrix is singular to working precision. */ + if ( *rcond < slamch_("E") && *info == 0) *info = A->ncol + 1; + } + + if (perm) SUPERLU_FREE(perm); + + if ( nofact ) { + ilu_cQuerySpace(L, U, mem_usage); + Destroy_CompCol_Permuted(&AC); + } + if ( A->Stype == SLU_NR ) { + Destroy_SuperMatrix_Store(AA); + SUPERLU_FREE(AA); + } + +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsitrf.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsitrf.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsitrf.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,628 @@ + +/*! @file cgsitf.c + * \brief Computes an ILU factorization of a general sparse matrix + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
+ */ + +#include "slu_cdefs.h" + +#ifdef DEBUG +int num_drop_L; +#endif + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ * CGSITRF computes an ILU factorization of a general sparse m-by-n
+ * matrix A using partial pivoting with row interchanges.
+ * The factorization has the form
+ *     Pr * A = L * U
+ * where Pr is a row permutation matrix, L is lower triangular with unit
+ * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
+ * triangular (upper trapezoidal if A->nrow < A->ncol).
+ *
+ * See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ * Arguments
+ * =========
+ *
+ * options (input) superlu_options_t*
+ *	   The structure defines the input parameters to control
+ *	   how the ILU decomposition will be performed.
+ *
+ * A	    (input) SuperMatrix*
+ *	    Original matrix A, permuted by columns, of dimension
+ *	    (A->nrow, A->ncol). The type of A can be:
+ *	    Stype = SLU_NCP; Dtype = SLU_C; Mtype = SLU_GE.
+ *
+ * relax    (input) int
+ *	    To control degree of relaxing supernodes. If the number
+ *	    of nodes (columns) in a subtree of the elimination tree is less
+ *	    than relax, this subtree is considered as one supernode,
+ *	    regardless of the row structures of those columns.
+ *
+ * panel_size (input) int
+ *	    A panel consists of at most panel_size consecutive columns.
+ *
+ * etree    (input) int*, dimension (A->ncol)
+ *	    Elimination tree of A'*A.
+ *	    Note: etree is a vector of parent pointers for a forest whose
+ *	    vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
+ *	    On input, the columns of A should be permuted so that the
+ *	    etree is in a certain postorder.
+ *
+ * work     (input/output) void*, size (lwork) (in bytes)
+ *	    User-supplied work space and space for the output data structures.
+ *	    Not referenced if lwork = 0;
+ *
+ * lwork   (input) int
+ *	   Specifies the size of work array in bytes.
+ *	   = 0:  allocate space internally by system malloc;
+ *	   > 0:  use user-supplied work array of length lwork in bytes,
+ *		 returns error if space runs out.
+ *	   = -1: the routine guesses the amount of space needed without
+ *		 performing the factorization, and returns it in
+ *		 *info; no other side effects.
+ *
+ * perm_c   (input) int*, dimension (A->ncol)
+ *	    Column permutation vector, which defines the
+ *	    permutation matrix Pc; perm_c[i] = j means column i of A is
+ *	    in position j in A*Pc.
+ *	    When searching for diagonal, perm_c[*] is applied to the
+ *	    row subscripts of A, so that diagonal threshold pivoting
+ *	    can find the diagonal of A, rather than that of A*Pc.
+ *
+ * perm_r   (input/output) int*, dimension (A->nrow)
+ *	    Row permutation vector which defines the permutation matrix Pr,
+ *	    perm_r[i] = j means row i of A is in position j in Pr*A.
+ *	    If options->Fact = SamePattern_SameRowPerm, the pivoting routine
+ *	       will try to use the input perm_r, unless a certain threshold
+ *	       criterion is violated. In that case, perm_r is overwritten by
+ *	       a new permutation determined by partial pivoting or diagonal
+ *	       threshold pivoting.
+ *	    Otherwise, perm_r is output argument;
+ *
+ * L	    (output) SuperMatrix*
+ *	    The factor L from the factorization Pr*A=L*U; use compressed row
+ *	    subscripts storage for supernodes, i.e., L has type:
+ *	    Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
+ *
+ * U	    (output) SuperMatrix*
+ *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
+ *	    storage scheme, i.e., U has types: Stype = SLU_NC,
+ *	    Dtype = SLU_C, Mtype = SLU_TRU.
+ *
+ * stat     (output) SuperLUStat_t*
+ *	    Record the statistics on runtime and floating-point operation count.
+ *	    See slu_util.h for the definition of 'SuperLUStat_t'.
+ *
+ * info     (output) int*
+ *	    = 0: successful exit
+ *	    < 0: if info = -i, the i-th argument had an illegal value
+ *	    > 0: if info = i, and i is
+ *	       <= A->ncol: number of zero pivots. They are replaced by small
+ *		  entries according to options->ILU_FillTol.
+ *	       > A->ncol: number of bytes allocated when memory allocation
+ *		  failure occurred, plus A->ncol. If lwork = -1, it is
+ *		  the estimated amount of space needed, plus A->ncol.
+ *
+ * ======================================================================
+ *
+ * Local Working Arrays:
+ * ======================
+ *   m = number of rows in the matrix
+ *   n = number of columns in the matrix
+ *
+ *   marker[0:3*m-1]: marker[i] = j means that node i has been
+ *	reached when working on column j.
+ *	Storage: relative to original row subscripts
+ *	NOTE: There are 4 of them:
+ *	      marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c;
+ *	      marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c;
+ *	      marker_relax(has its own space) is used for relaxed supernodes.
+ *
+ *   parent[0:m-1]: parent vector used during dfs
+ *	Storage: relative to new row subscripts
+ *
+ *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
+ *	unexplored neighbor of i in lsub[*]
+ *
+ *   segrep[0:nseg-1]: contains the list of supernodal representatives
+ *	in topological order of the dfs. A supernode representative is the
+ *	last column of a supernode.
+ *	The maximum size of segrep[] is n.
+ *
+ *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
+ *	supernodal representative r, repfnz[r] is the location of the first
+ *	nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
+ *	indicates the supernode r has been explored.
+ *	NOTE: There are W of them, each used for one column of a panel.
+ *
+ *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
+ *	the panel diagonal. These are filled in during dpanel_dfs(), and are
+ *	used later in the inner LU factorization within the panel.
+ *	panel_lsub[]/dense[] pair forms the SPA data structure.
+ *	NOTE: There are W of them.
+ *
+ *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
+ *		   NOTE: there are W of them.
+ *
+ *   tempv[0:*]: real temporary used for dense numeric kernels;
+ *	The size of this array is defined by NUM_TEMPV() in slu_util.h.
+ *	It is also used by the dropping routine ilu_ddrop_row().
+ * 
+ */ + +void +cgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size, + int *etree, void *work, int lwork, int *perm_c, int *perm_r, + SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info) +{ + /* Local working arrays */ + NCPformat *Astore; + int *iperm_r = NULL; /* inverse of perm_r; used when + options->Fact == SamePattern_SameRowPerm */ + int *iperm_c; /* inverse of perm_c */ + int *swap, *iswap; /* swap is used to store the row permutation + during the factorization. Initially, it is set + to iperm_c (row indeces of Pc*A*Pc'). + iswap is the inverse of swap. After the + factorization, it is equal to perm_r. */ + int *iwork; + complex *cwork; + int *segrep, *repfnz, *parent, *xplore; + int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */ + int *marker, *marker_relax; + complex *dense, *tempv; + float *stempv; + int *relax_end, *relax_fsupc; + complex *a; + int *asub; + int *xa_begin, *xa_end; + int *xsup, *supno; + int *xlsub, *xlusup, *xusub; + int nzlumax; + float *amax; + complex drop_sum; + static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ + int *iwork2; /* used by the second dropping rule */ + + /* Local scalars */ + fact_t fact = options->Fact; + double diag_pivot_thresh = options->DiagPivotThresh; + double drop_tol = options->ILU_DropTol; /* tau */ + double fill_ini = options->ILU_FillTol; /* tau^hat */ + double gamma = options->ILU_FillFactor; + int drop_rule = options->ILU_DropRule; + milu_t milu = options->ILU_MILU; + double fill_tol; + int pivrow; /* pivotal row number in the original matrix A */ + int nseg1; /* no of segments in U-column above panel row jcol */ + int nseg; /* no of segments in each U-column */ + register int jcol; + register int kcol; /* end column of a relaxed snode */ + register int icol; + register int i, k, jj, new_next, iinfo; + int m, n, min_mn, jsupno, fsupc, nextlu, nextu; + int w_def; /* upper bound on panel width */ + int usepr, iperm_r_allocated = 0; + int nnzL, nnzU; + int *panel_histo = stat->panel_histo; + flops_t *ops = stat->ops; + + int last_drop;/* the last column which the dropping rules applied */ + int quota; + int nnzAj; /* number of nonzeros in A(:,1:j) */ + int nnzLj, nnzUj; + double tol_L = drop_tol, tol_U = drop_tol; + complex zero = {0.0, 0.0}; + + /* Executable */ + iinfo = 0; + m = A->nrow; + n = A->ncol; + min_mn = SUPERLU_MIN(m, n); + Astore = A->Store; + a = Astore->nzval; + asub = Astore->rowind; + xa_begin = Astore->colbeg; + xa_end = Astore->colend; + + /* Allocate storage common to the factor routines */ + *info = cLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size, + gamma, L, U, &Glu, &iwork, &cwork); + if ( *info ) return; + + xsup = Glu.xsup; + supno = Glu.supno; + xlsub = Glu.xlsub; + xlusup = Glu.xlusup; + xusub = Glu.xusub; + + SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore, + &repfnz, &panel_lsub, &marker_relax, &marker); + cSetRWork(m, panel_size, cwork, &dense, &tempv); + + usepr = (fact == SamePattern_SameRowPerm); + if ( usepr ) { + /* Compute the inverse of perm_r */ + iperm_r = (int *) intMalloc(m); + for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k; + iperm_r_allocated = 1; + } + + iperm_c = (int *) intMalloc(n); + for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k; + swap = (int *)intMalloc(n); + for (k = 0; k < n; k++) swap[k] = iperm_c[k]; + iswap = (int *)intMalloc(n); + for (k = 0; k < n; k++) iswap[k] = perm_c[k]; + amax = (float *) floatMalloc(panel_size); + if (drop_rule & DROP_SECONDARY) + iwork2 = (int *)intMalloc(n); + else + iwork2 = NULL; + + nnzAj = 0; + nnzLj = 0; + nnzUj = 0; + last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(3), (int)(min_mn * 0.95)); + + /* Identify relaxed snodes */ + relax_end = (int *) intMalloc(n); + relax_fsupc = (int *) intMalloc(n); + if ( options->SymmetricMode == YES ) + ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); + else + ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); + + ifill (perm_r, m, EMPTY); + ifill (marker, m * NO_MARKER, EMPTY); + supno[0] = -1; + xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0; + w_def = panel_size; + + /* Mark the rows used by relaxed supernodes */ + ifill (marker_relax, m, EMPTY); + i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end, + asub, marker_relax); +#if ( PRNTlevel >= 1) + printf("%d relaxed supernodes.\n", i); +#endif + + /* + * Work on one "panel" at a time. A panel is one of the following: + * (a) a relaxed supernode at the bottom of the etree, or + * (b) panel_size contiguous columns, defined by the user + */ + for (jcol = 0; jcol < min_mn; ) { + + if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */ + kcol = relax_end[jcol]; /* end of the relaxed snode */ + panel_histo[kcol-jcol+1]++; + + /* Drop small rows in the previous supernode. */ + if (jcol > 0 && jcol < last_drop) { + int first = xsup[supno[jcol - 1]]; + int last = jcol - 1; + int quota; + + /* Compute the quota */ + if (drop_rule & DROP_PROWS) + quota = gamma * Astore->nnz / m * (m - first) / m + * (last - first + 1); + else if (drop_rule & DROP_COLUMN) { + int i; + quota = 0; + for (i = first; i <= last; i++) + quota += xa_end[i] - xa_begin[i]; + quota = gamma * quota * (m - first) / m; + } else if (drop_rule & DROP_AREA) + quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) + - nnzLj; + else + quota = m * n; + fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn); + + /* Drop small rows */ + stempv = (float *) tempv; + i = ilu_cdrop_row(options, first, last, tol_L, quota, &nnzLj, + &fill_tol, &Glu, stempv, iwork2, 0); + /* Reset the parameters */ + if (drop_rule & DROP_DYNAMIC) { + if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) + < nnzLj) + tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); + else + tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); + } + if (fill_tol < 0) iinfo -= (int)fill_tol; +#ifdef DEBUG + num_drop_L += i * (last - first + 1); +#endif + } + + /* -------------------------------------- + * Factorize the relaxed supernode(jcol:kcol) + * -------------------------------------- */ + /* Determine the union of the row structure of the snode */ + if ( (*info = ilu_csnode_dfs(jcol, kcol, asub, xa_begin, xa_end, + marker, &Glu)) != 0 ) + return; + + nextu = xusub[jcol]; + nextlu = xlusup[jcol]; + jsupno = supno[jcol]; + fsupc = xsup[jsupno]; + new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1); + nzlumax = Glu.nzlumax; + while ( new_next > nzlumax ) { + if ((*info = cLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu))) + return; + } + + for (icol = jcol; icol <= kcol; icol++) { + xusub[icol+1] = nextu; + + amax[0] = 0.0; + /* Scatter into SPA dense[*] */ + for (k = xa_begin[icol]; k < xa_end[icol]; k++) { + register float tmp = c_abs1 (&a[k]); + if (tmp > amax[0]) amax[0] = tmp; + dense[asub[k]] = a[k]; + } + nnzAj += xa_end[icol] - xa_begin[icol]; + if (amax[0] == 0.0) { + amax[0] = fill_ini; +#if ( PRNTlevel >= 1) + printf("Column %d is entirely zero!\n", icol); + fflush(stdout); +#endif + } + + /* Numeric update within the snode */ + csnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat); + + if (usepr) pivrow = iperm_r[icol]; + fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn); + if ( (*info = ilu_cpivotL(icol, diag_pivot_thresh, &usepr, + perm_r, iperm_c[icol], swap, iswap, + marker_relax, &pivrow, + amax[0] * fill_tol, milu, zero, + &Glu, stat)) ) { + iinfo++; + marker[pivrow] = kcol; + } + + } + + jcol = kcol + 1; + + } else { /* Work on one panel of panel_size columns */ + + /* Adjust panel_size so that a panel won't overlap with the next + * relaxed snode. + */ + panel_size = w_def; + for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) + if ( relax_end[k] != EMPTY ) { + panel_size = k - jcol; + break; + } + if ( k == min_mn ) panel_size = min_mn - jcol; + panel_histo[panel_size]++; + + /* symbolic factor on a panel of columns */ + ilu_cpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1, + dense, amax, panel_lsub, segrep, repfnz, + marker, parent, xplore, &Glu); + + /* numeric sup-panel updates in topological order */ + cpanel_bmod(m, panel_size, jcol, nseg1, dense, + tempv, segrep, repfnz, &Glu, stat); + + /* Sparse LU within the panel, and below panel diagonal */ + for (jj = jcol; jj < jcol + panel_size; jj++) { + + k = (jj - jcol) * m; /* column index for w-wide arrays */ + + nseg = nseg1; /* Begin after all the panel segments */ + + nnzAj += xa_end[jj] - xa_begin[jj]; + + if ((*info = ilu_ccolumn_dfs(m, jj, perm_r, &nseg, + &panel_lsub[k], segrep, &repfnz[k], + marker, parent, xplore, &Glu))) + return; + + /* Numeric updates */ + if ((*info = ccolumn_bmod(jj, (nseg - nseg1), &dense[k], + tempv, &segrep[nseg1], &repfnz[k], + jcol, &Glu, stat)) != 0) return; + + /* Make a fill-in position if the column is entirely zero */ + if (xlsub[jj + 1] == xlsub[jj]) { + register int i, row; + int nextl; + int nzlmax = Glu.nzlmax; + int *lsub = Glu.lsub; + int *marker2 = marker + 2 * m; + + /* Allocate memory */ + nextl = xlsub[jj] + 1; + if (nextl >= nzlmax) { + int error = cLUMemXpand(jj, nextl, LSUB, &nzlmax, &Glu); + if (error) { *info = error; return; } + lsub = Glu.lsub; + } + xlsub[jj + 1]++; + assert(xlusup[jj]==xlusup[jj+1]); + xlusup[jj + 1]++; + Glu.lusup[xlusup[jj]] = zero; + + /* Choose a row index (pivrow) for fill-in */ + for (i = jj; i < n; i++) + if (marker_relax[swap[i]] <= jj) break; + row = swap[i]; + marker2[row] = jj; + lsub[xlsub[jj]] = row; +#ifdef DEBUG + printf("Fill col %d.\n", jj); + fflush(stdout); +#endif + } + + /* Computer the quota */ + if (drop_rule & DROP_PROWS) + quota = gamma * Astore->nnz / m * jj / m; + else if (drop_rule & DROP_COLUMN) + quota = gamma * (xa_end[jj] - xa_begin[jj]) * + (jj + 1) / m; + else if (drop_rule & DROP_AREA) + quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj; + else + quota = m; + + /* Copy the U-segments to ucol[*] and drop small entries */ + if ((*info = ilu_ccopy_to_ucol(jj, nseg, segrep, &repfnz[k], + perm_r, &dense[k], drop_rule, + milu, amax[jj - jcol] * tol_U, + quota, &drop_sum, &nnzUj, &Glu, + iwork2)) != 0) + return; + + /* Reset the dropping threshold if required */ + if (drop_rule & DROP_DYNAMIC) { + if (gamma * 0.9 * nnzAj * 0.5 < nnzLj) + tol_U = SUPERLU_MIN(1.0, tol_U * 2.0); + else + tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5); + } + + cs_mult(&drop_sum, &drop_sum, MILU_ALPHA); + if (usepr) pivrow = iperm_r[jj]; + fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn); + if ( (*info = ilu_cpivotL(jj, diag_pivot_thresh, &usepr, perm_r, + iperm_c[jj], swap, iswap, + marker_relax, &pivrow, + amax[jj - jcol] * fill_tol, milu, + drop_sum, &Glu, stat)) ) { + iinfo++; + marker[m + pivrow] = jj; + marker[2 * m + pivrow] = jj; + } + + /* Reset repfnz[] for this column */ + resetrep_col (nseg, segrep, &repfnz[k]); + + /* Start a new supernode, drop the previous one */ + if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) { + int first = xsup[supno[jj - 1]]; + int last = jj - 1; + int quota; + + /* Compute the quota */ + if (drop_rule & DROP_PROWS) + quota = gamma * Astore->nnz / m * (m - first) / m + * (last - first + 1); + else if (drop_rule & DROP_COLUMN) { + int i; + quota = 0; + for (i = first; i <= last; i++) + quota += xa_end[i] - xa_begin[i]; + quota = gamma * quota * (m - first) / m; + } else if (drop_rule & DROP_AREA) + quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) + / m) - nnzLj; + else + quota = m * n; + fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / + (double)min_mn); + + /* Drop small rows */ + stempv = (float *) tempv; + i = ilu_cdrop_row(options, first, last, tol_L, quota, + &nnzLj, &fill_tol, &Glu, stempv, iwork2, + 1); + + /* Reset the parameters */ + if (drop_rule & DROP_DYNAMIC) { + if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) + < nnzLj) + tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); + else + tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); + } + if (fill_tol < 0) iinfo -= (int)fill_tol; +#ifdef DEBUG + num_drop_L += i * (last - first + 1); +#endif + } /* if start a new supernode */ + + } /* for */ + + jcol += panel_size; /* Move to the next panel */ + + } /* else */ + + } /* for */ + + *info = iinfo; + + if ( m > n ) { + k = 0; + for (i = 0; i < m; ++i) + if ( perm_r[i] == EMPTY ) { + perm_r[i] = n + k; + ++k; + } + } + + ilu_countnz(min_mn, &nnzL, &nnzU, &Glu); + fixupL(min_mn, perm_r, &Glu); + + cLUWorkFree(iwork, cwork, &Glu); /* Free work space and compress storage */ + + if ( fact == SamePattern_SameRowPerm ) { + /* L and U structures may have changed due to possibly different + pivoting, even though the storage is available. + There could also be memory expansions, so the array locations + may have changed, */ + ((SCformat *)L->Store)->nnz = nnzL; + ((SCformat *)L->Store)->nsuper = Glu.supno[n]; + ((SCformat *)L->Store)->nzval = Glu.lusup; + ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup; + ((SCformat *)L->Store)->rowind = Glu.lsub; + ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub; + ((NCformat *)U->Store)->nnz = nnzU; + ((NCformat *)U->Store)->nzval = Glu.ucol; + ((NCformat *)U->Store)->rowind = Glu.usub; + ((NCformat *)U->Store)->colptr = Glu.xusub; + } else { + cCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, + Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno, + Glu.xsup, SLU_SC, SLU_C, SLU_TRLU); + cCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, + Glu.usub, Glu.xusub, SLU_NC, SLU_C, SLU_TRU); + } + + ops[FACT] += ops[TRSV] + ops[GEMV]; + + if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); + SUPERLU_FREE (iperm_c); + SUPERLU_FREE (relax_end); + SUPERLU_FREE (swap); + SUPERLU_FREE (iswap); + SUPERLU_FREE (relax_fsupc); + SUPERLU_FREE (amax); + if ( iwork2 ) SUPERLU_FREE (iwork2); + +} Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsrfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsrfs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsrfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,25 +1,26 @@ -/* +/*! @file cgsrfs.c + * \brief Improves computed solution to a system of inear equations + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Modified from lapack routine CGERFS
+ * 
*/ /* * File name: cgsrfs.c * History: Modified from lapack routine CGERFS */ #include -#include "csp_defs.h" +#include "slu_cdefs.h" -void -cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U, - int *perm_c, int *perm_r, char *equed, float *R, float *C, - SuperMatrix *B, SuperMatrix *X, float *ferr, float *berr, - SuperLUStat_t *stat, int *info) -{ -/* +/*! \brief + * + *
  *   Purpose   
  *   =======   
  *
@@ -123,8 +124,16 @@
  *
  *    ITMAX is the maximum number of steps of iterative refinement.   
  *
- */  
+ * 
+ */ +void +cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U, + int *perm_c, int *perm_r, char *equed, float *R, float *C, + SuperMatrix *B, SuperMatrix *X, float *ferr, float *berr, + SuperLUStat_t *stat, int *info) +{ + #define ITMAX 5 /* Table of constant values */ @@ -224,6 +233,8 @@ nz = A->ncol + 1; eps = slamch_("Epsilon"); safmin = slamch_("Safe minimum"); + /* Set SAFE1 essentially to be the underflow threshold times the + number of additions in each row. */ safe1 = nz * safmin; safe2 = safe1 / eps; @@ -274,34 +285,36 @@ where abs(Z) is the componentwise absolute value of the matrix or vector Z. If the i-th component of the denominator is less than SAFE2, then SAFE1 is added to the i-th component of the - numerator and denominator before dividing. */ + numerator before dividing. */ - for (i = 0; i < A->nrow; ++i) rwork[i] = slu_c_abs1( &Bptr[i] ); + for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] ); /* Compute abs(op(A))*abs(X) + abs(B). */ if (notran) { for (k = 0; k < A->ncol; ++k) { - xk = slu_c_abs1( &Xptr[k] ); + xk = c_abs1( &Xptr[k] ); for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) - rwork[Astore->rowind[i]] += slu_c_abs1(&Aval[i]) * xk; + rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk; } } else { for (k = 0; k < A->ncol; ++k) { s = 0.; for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) { irow = Astore->rowind[i]; - s += slu_c_abs1(&Aval[i]) * slu_c_abs1(&Xptr[irow]); + s += c_abs1(&Aval[i]) * c_abs1(&Xptr[irow]); } rwork[k] += s; } } s = 0.; for (i = 0; i < A->nrow; ++i) { - if (rwork[i] > safe2) - s = SUPERLU_MAX( s, slu_c_abs1(&work[i]) / rwork[i] ); - else - s = SUPERLU_MAX( s, (slu_c_abs1(&work[i]) + safe1) / - (rwork[i] + safe1) ); + if (rwork[i] > safe2) { + s = SUPERLU_MAX( s, c_abs1(&work[i]) / rwork[i] ); + } else if ( rwork[i] != 0.0 ) { + s = SUPERLU_MAX( s, (c_abs1(&work[i]) + safe1) / rwork[i] ); + } + /* If rwork[i] is exactly 0.0, then we know the true + residual also must be exactly 0.0. */ } berr[j] = s; @@ -351,22 +364,22 @@ inv(op(A)) * diag(W), where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ - for (i = 0; i < A->nrow; ++i) rwork[i] = slu_c_abs1( &Bptr[i] ); + for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] ); /* Compute abs(op(A))*abs(X) + abs(B). */ if ( notran ) { for (k = 0; k < A->ncol; ++k) { - xk = slu_c_abs1( &Xptr[k] ); + xk = c_abs1( &Xptr[k] ); for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) - rwork[Astore->rowind[i]] += slu_c_abs1(&Aval[i]) * xk; + rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk; } } else { for (k = 0; k < A->ncol; ++k) { s = 0.; for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) { irow = Astore->rowind[i]; - xk = slu_c_abs1( &Xptr[irow] ); - s += slu_c_abs1(&Aval[i]) * xk; + xk = c_abs1( &Xptr[irow] ); + s += c_abs1(&Aval[i]) * xk; } rwork[k] += s; } @@ -374,9 +387,9 @@ for (i = 0; i < A->nrow; ++i) if (rwork[i] > safe2) - rwork[i] = slu_c_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i]; + rwork[i] = c_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i]; else - rwork[i] = slu_c_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1; + rwork[i] = c_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1; kase = 0; do { @@ -424,13 +437,13 @@ lstres = 0.; if ( notran && colequ ) { for (i = 0; i < A->nrow; ++i) - lstres = SUPERLU_MAX( lstres, C[i] * slu_c_abs1( &Xptr[i]) ); + lstres = SUPERLU_MAX( lstres, C[i] * c_abs1( &Xptr[i]) ); } else if ( !notran && rowequ ) { for (i = 0; i < A->nrow; ++i) - lstres = SUPERLU_MAX( lstres, R[i] * slu_c_abs1( &Xptr[i]) ); + lstres = SUPERLU_MAX( lstres, R[i] * c_abs1( &Xptr[i]) ); } else { for (i = 0; i < A->nrow; ++i) - lstres = SUPERLU_MAX( lstres, slu_c_abs1( &Xptr[i]) ); + lstres = SUPERLU_MAX( lstres, c_abs1( &Xptr[i]) ); } if ( lstres != 0. ) ferr[j] /= lstres; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgssv.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgssv.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgssv.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,20 +1,19 @@ - -/* +/*! @file cgssv.c + * \brief Solves the system of linear equations A*X=B + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
- *
+ * 
*/ -#include "csp_defs.h" +#include "slu_cdefs.h" -void -cgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, - SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, - SuperLUStat_t *stat, int *info ) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -127,15 +126,21 @@
  *                so the solution could not be computed.
  *             > A->ncol: number of bytes allocated when memory allocation
  *                failure occurred, plus A->ncol.
- *   
+ * 
*/ + +void +cgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, + SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, + SuperLUStat_t *stat, int *info ) +{ + DNformat *Bstore; SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int lwork = 0, *etree, i; /* Set default values for some parameters */ - float drop_tol = 0.; int panel_size; /* panel size */ int relax; /* no of columns in a relaxed snodes */ int permc_spec; @@ -201,8 +206,8 @@ relax, panel_size, sp_ienv(3), sp_ienv(4));*/ t = SuperLU_timer_(); /* Compute the LU factorization of A. */ - cgstrf(options, &AC, drop_tol, relax, panel_size, - etree, NULL, lwork, perm_c, perm_r, L, U, stat, info); + cgstrf(options, &AC, relax, panel_size, etree, + NULL, lwork, perm_c, perm_r, L, U, stat, info); utime[FACT] = SuperLU_timer_() - t; t = SuperLU_timer_(); Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgssvx.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgssvx.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgssvx.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,22 +1,19 @@ -/* +/*! @file cgssvx.c + * \brief Solves the system of linear equations A*X=B or A'*X=B + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
- *
+ * 
*/ -#include "csp_defs.h" +#include "slu_cdefs.h" -void -cgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, - int *etree, char *equed, float *R, float *C, - SuperMatrix *L, SuperMatrix *U, void *work, int lwork, - SuperMatrix *B, SuperMatrix *X, float *recip_pivot_growth, - float *rcond, float *ferr, float *berr, - mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info ) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -314,7 +311,7 @@
  *
  * stat   (output) SuperLUStat_t*
  *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
+ *        See slu_util.h for the definition of 'SuperLUStat_t'.
  *
  * info    (output) int*
  *         = 0: successful exit   
@@ -332,9 +329,19 @@
  *                    accurate than the value of RCOND would suggest.   
  *              > A->ncol+1: number of bytes allocated when memory allocation
  *                    failure occurred, plus A->ncol.
- *
+ * 
*/ +void +cgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, + int *etree, char *equed, float *R, float *C, + SuperMatrix *L, SuperMatrix *U, void *work, int lwork, + SuperMatrix *B, SuperMatrix *X, float *recip_pivot_growth, + float *rcond, float *ferr, float *berr, + mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info ) +{ + + DNformat *Bstore, *Xstore; complex *Bmat, *Xmat; int ldb, ldx, nrhs; @@ -346,13 +353,12 @@ int i, j, info1; float amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; int relax, panel_size; - float diag_pivot_thresh, drop_tol; + float diag_pivot_thresh; double t0; /* temporary time */ double *utime; /* External functions */ extern float clangs(char *, SuperMatrix *); - extern double slamch_(char *); Bstore = B->Store; Xstore = X->Store; @@ -443,7 +449,6 @@ panel_size = sp_ienv(1); relax = sp_ienv(2); diag_pivot_thresh = options->DiagPivotThresh; - drop_tol = 0.0; utime = stat->utime; @@ -455,7 +460,7 @@ Astore->nzval, Astore->colind, Astore->rowptr, SLU_NC, A->Dtype, A->Mtype); if ( notran ) { /* Reverse the transpose argument. */ - trant = CONJ; + trant = TRANS; notran = 0; } else { trant = NOTRANS; @@ -523,8 +528,8 @@ /* Compute the LU factorization of A*Pc. */ t0 = SuperLU_timer_(); - cgstrf(options, &AC, drop_tol, relax, panel_size, - etree, work, lwork, perm_c, perm_r, L, U, stat, info); + cgstrf(options, &AC, relax, panel_size, etree, + work, lwork, perm_c, perm_r, L, U, stat, info); utime[FACT] = SuperLU_timer_() - t0; if ( lwork == -1 ) { Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgstrf.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgstrf.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgstrf.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,33 +1,32 @@ -/* +/*! @file cgstrf.c + * \brief Computes an LU factorization of a general sparse matrix + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
+ * 
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
  *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "csp_defs.h" -void -cgstrf (superlu_options_t *options, SuperMatrix *A, float drop_tol, - int relax, int panel_size, int *etree, void *work, int lwork, - int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, - SuperLUStat_t *stat, int *info) -{ -/* +#include "slu_cdefs.h" + +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -53,11 +52,6 @@
  *          (A->nrow, A->ncol). The type of A can be:
  *          Stype = SLU_NCP; Dtype = SLU_C; Mtype = SLU_GE.
  *
- * drop_tol (input) float (NOT IMPLEMENTED)
- *	    Drop tolerance parameter. At step j of the Gaussian elimination,
- *          if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
- *          0 <= drop_tol <= 1. The default value of drop_tol is 0.
- *
  * relax    (input) int
  *          To control degree of relaxing supernodes. If the number
  *          of nodes (columns) in a subtree of the elimination tree is less
@@ -117,7 +111,7 @@
  *
  * stat     (output) SuperLUStat_t*
  *          Record the statistics on runtime and floating-point operation count.
- *          See util.h for the definition of 'SuperLUStat_t'.
+ *          See slu_util.h for the definition of 'SuperLUStat_t'.
  *
  * info     (output) int*
  *          = 0: successful exit
@@ -177,13 +171,20 @@
  *	    	   NOTE: there are W of them.
  *
  *   tempv[0:*]: real temporary used for dense numeric kernels;
- *	The size of this array is defined by NUM_TEMPV() in csp_defs.h.
- *
+ *	The size of this array is defined by NUM_TEMPV() in slu_cdefs.h.
+ * 
*/ + +void +cgstrf (superlu_options_t *options, SuperMatrix *A, + int relax, int panel_size, int *etree, void *work, int lwork, + int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, + SuperLUStat_t *stat, int *info) +{ /* Local working arrays */ NCPformat *Astore; - int *iperm_r; /* inverse of perm_r; - used when options->Fact == SamePattern_SameRowPerm */ + int *iperm_r = NULL; /* inverse of perm_r; used when + options->Fact == SamePattern_SameRowPerm */ int *iperm_c; /* inverse of perm_c */ int *iwork; complex *cwork; @@ -199,7 +200,8 @@ int *xsup, *supno; int *xlsub, *xlusup, *xusub; int nzlumax; - static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ + float fill_ratio = sp_ienv(6); /* estimated fill ratio */ + static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ /* Local scalars */ fact_t fact = options->Fact; @@ -230,7 +232,7 @@ /* Allocate storage common to the factor routines */ *info = cLUMemInit(fact, work, lwork, m, n, Astore->nnz, - panel_size, L, U, &Glu, &iwork, &cwork); + panel_size, fill_ratio, L, U, &Glu, &iwork, &cwork); if ( *info ) return; xsup = Glu.xsup; @@ -417,7 +419,7 @@ ((NCformat *)U->Store)->rowind = Glu.usub; ((NCformat *)U->Store)->colptr = Glu.xusub; } else { - cCreate_SuperNode_Matrix(L, A->nrow, A->ncol, nnzL, Glu.lusup, + cCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno, Glu.xsup, SLU_SC, SLU_C, SLU_TRLU); cCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, @@ -425,6 +427,7 @@ } ops[FACT] += ops[TRSV] + ops[GEMV]; + stat->expansions = --(Glu.num_expansions); if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); SUPERLU_FREE (iperm_c); Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgstrs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgstrs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cgstrs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,25 +1,27 @@ -/* +/*! @file cgstrs.c + * \brief Solves a system using LU factorization + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "csp_defs.h" +#include "slu_cdefs.h" /* @@ -29,13 +31,9 @@ void clsolve(int, int, complex*, complex*); void cmatvec(int, int, int, complex*, complex*, complex*); - -void -cgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U, - int *perm_c, int *perm_r, SuperMatrix *B, - SuperLUStat_t *stat, int *info) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -85,8 +83,15 @@
  * info    (output) int*
  * 	   = 0: successful exit
  *	   < 0: if info = -i, the i-th argument had an illegal value
- *
+ * 
*/ + +void +cgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U, + int *perm_c, int *perm_r, SuperMatrix *B, + SuperLUStat_t *stat, int *info) +{ + #ifdef _CRAY _fcd ftcs1, ftcs2, ftcs3, ftcs4; #endif @@ -293,7 +298,7 @@ stat->ops[SOLVE] = solve_ops; - } else { /* Solve A'*X=B */ + } else { /* Solve A'*X=B or CONJ(A)*X=B */ /* Permute right hand sides to form Pc'*B. */ for (i = 0; i < nrhs; i++) { rhs_work = &Bmat[i*ldb]; @@ -302,28 +307,23 @@ } stat->ops[SOLVE] = 0; - if (trans == TRANS) { - - for (k = 0; k < nrhs; ++k) { + for (k = 0; k < nrhs; ++k) { + /* Multiply by inv(U'). */ + sp_ctrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info); - /* Multiply by inv(U'). */ - sp_ctrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info); - - /* Multiply by inv(L'). */ - sp_ctrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info); - } - } - else { + /* Multiply by inv(L'). */ + sp_ctrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info); + } + } else { /* trans == CONJ */ for (k = 0; k < nrhs; ++k) { /* Multiply by conj(inv(U')). */ sp_ctrsv("U", "C", "N", L, U, &Bmat[k*ldb], stat, info); /* Multiply by conj(inv(L')). */ sp_ctrsv("L", "C", "U", L, U, &Bmat[k*ldb], stat, info); - } - } - + } + } /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */ for (i = 0; i < nrhs; i++) { rhs_work = &Bmat[i*ldb]; @@ -331,7 +331,7 @@ for (k = 0; k < n; k++) rhs_work[k] = soln[k]; } - } + } SUPERLU_FREE(work); SUPERLU_FREE(soln); Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/clacon.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/clacon.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/clacon.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,68 +1,75 @@ - -/* +/*! @file clacon.c + * \brief Estimates the 1-norm + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
- *
+ * 
*/ #include -#include "Cnames.h" -#include "scomplex.h" +#include "slu_Cnames.h" +#include "slu_scomplex.h" +/*! \brief + * + *
+ *   Purpose   
+ *   =======   
+ *
+ *   CLACON estimates the 1-norm of a square matrix A.   
+ *   Reverse communication is used for evaluating matrix-vector products. 
+ * 
+ *
+ *   Arguments   
+ *   =========   
+ *
+ *   N      (input) INT
+ *          The order of the matrix.  N >= 1.   
+ *
+ *   V      (workspace) COMPLEX PRECISION array, dimension (N)   
+ *          On the final return, V = A*W,  where  EST = norm(V)/norm(W)   
+ *          (W is not returned).   
+ *
+ *   X      (input/output) COMPLEX PRECISION array, dimension (N)   
+ *          On an intermediate return, X should be overwritten by   
+ *                A * X,   if KASE=1,   
+ *                A' * X,  if KASE=2,
+ *          where A' is the conjugate transpose of A,
+ *         and CLACON must be re-called with all the other parameters   
+ *          unchanged.   
+ *
+ *
+ *   EST    (output) FLOAT PRECISION   
+ *          An estimate (a lower bound) for norm(A).   
+ *
+ *   KASE   (input/output) INT
+ *          On the initial call to CLACON, KASE should be 0.   
+ *          On an intermediate return, KASE will be 1 or 2, indicating   
+ *          whether X should be overwritten by A * X  or A' * X.   
+ *          On the final return from CLACON, KASE will again be 0.   
+ *
+ *   Further Details   
+ *   ======= =======   
+ *
+ *   Contributed by Nick Higham, University of Manchester.   
+ *   Originally named CONEST, dated March 16, 1988.   
+ *
+ *   Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of 
+ *   a real or complex matrix, with applications to condition estimation", 
+ *   ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.   
+ *   ===================================================================== 
+ * 
+ */ + int clacon_(int *n, complex *v, complex *x, float *est, int *kase) { -/* - Purpose - ======= - CLACON estimates the 1-norm of a square matrix A. - Reverse communication is used for evaluating matrix-vector products. - - Arguments - ========= - - N (input) INT - The order of the matrix. N >= 1. - - V (workspace) COMPLEX PRECISION array, dimension (N) - On the final return, V = A*W, where EST = norm(V)/norm(W) - (W is not returned). - - X (input/output) COMPLEX PRECISION array, dimension (N) - On an intermediate return, X should be overwritten by - A * X, if KASE=1, - A' * X, if KASE=2, - where A' is the conjugate transpose of A, - and CLACON must be re-called with all the other parameters - unchanged. - - - EST (output) FLOAT PRECISION - An estimate (a lower bound) for norm(A). - - KASE (input/output) INT - On the initial call to CLACON, KASE should be 0. - On an intermediate return, KASE will be 1 or 2, indicating - whether X should be overwritten by A * X or A' * X. - On the final return from CLACON, KASE will again be 0. - - Further Details - ======= ======= - - Contributed by Nick Higham, University of Manchester. - Originally named CONEST, dated March 16, 1988. - - Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of - a real or complex matrix, with applications to condition estimation", - ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. - ===================================================================== -*/ - /* Table of constant values */ int c__1 = 1; complex zero = {0.0, 0.0}; @@ -106,14 +113,14 @@ L20: if (*n == 1) { v[0] = x[0]; - *est = slu_c_abs(&v[0]); + *est = c_abs(&v[0]); /* ... QUIT */ goto L150; } *est = scsum1_(n, x, &c__1); for (i = 0; i < *n; ++i) { - d__1 = slu_c_abs(&x[i]); + d__1 = c_abs(&x[i]); if (d__1 > safmin) { d__1 = 1 / d__1; x[i].r *= d__1; @@ -158,7 +165,7 @@ if (*est <= estold) goto L120; for (i = 0; i < *n; ++i) { - d__1 = slu_c_abs(&x[i]); + d__1 = c_abs(&x[i]); if (d__1 > safmin) { d__1 = 1 / d__1; x[i].r *= d__1; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/clangs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/clangs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/clangs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,58 +1,65 @@ - -/* +/*! @file clangs.c + * \brief Returns the value of the one norm + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Modified from lapack routine CLANGE 
+ * 
*/ /* * File name: clangs.c * History: Modified from lapack routine CLANGE */ #include -#include "csp_defs.h" -#include "util.h" +#include "slu_cdefs.h" +/*! \brief + * + *
+ * Purpose   
+ *   =======   
+ *
+ *   CLANGS returns the value of the one norm, or the Frobenius norm, or 
+ *   the infinity norm, or the element of largest absolute value of a 
+ *   real matrix A.   
+ *
+ *   Description   
+ *   ===========   
+ *
+ *   CLANGE returns the value   
+ *
+ *      CLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'   
+ *               (   
+ *               ( norm1(A),         NORM = '1', 'O' or 'o'   
+ *               (   
+ *               ( normI(A),         NORM = 'I' or 'i'   
+ *               (   
+ *               ( normF(A),         NORM = 'F', 'f', 'E' or 'e'   
+ *
+ *   where  norm1  denotes the  one norm of a matrix (maximum column sum), 
+ *   normI  denotes the  infinity norm  of a matrix  (maximum row sum) and 
+ *   normF  denotes the  Frobenius norm of a matrix (square root of sum of 
+ *   squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.   
+ *
+ *   Arguments   
+ *   =========   
+ *
+ *   NORM    (input) CHARACTER*1   
+ *           Specifies the value to be returned in CLANGE as described above.   
+ *   A       (input) SuperMatrix*
+ *           The M by N sparse matrix A. 
+ *
+ *  =====================================================================
+ * 
+ */ + float clangs(char *norm, SuperMatrix *A) { -/* - Purpose - ======= - - CLANGS returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - real matrix A. - - Description - =========== - - CLANGE returns the value - - CLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in CLANGE as described above. - A (input) SuperMatrix* - The M by N sparse matrix A. - - ===================================================================== -*/ /* Local variables */ NCformat *Astore; @@ -72,7 +79,7 @@ value = 0.; for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) - value = SUPERLU_MAX( value, slu_c_abs( &Aval[i]) ); + value = SUPERLU_MAX( value, c_abs( &Aval[i]) ); } else if (lsame_(norm, "O") || *(unsigned char *)norm == '1') { /* Find norm1(A). */ @@ -80,7 +87,7 @@ for (j = 0; j < A->ncol; ++j) { sum = 0.; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) - sum += slu_c_abs( &Aval[i] ); + sum += c_abs( &Aval[i] ); value = SUPERLU_MAX(value,sum); } @@ -92,7 +99,7 @@ for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) { irow = Astore->rowind[i]; - rwork[irow] += slu_c_abs( &Aval[i] ); + rwork[irow] += c_abs( &Aval[i] ); } value = 0.; for (i = 0; i < A->nrow; ++i) Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/claqgs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/claqgs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/claqgs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,81 +1,89 @@ - -/* +/*! @file claqgs.c + * \brief Equlibrates a general sprase matrix + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
- *
+ * 
+ * Modified from LAPACK routine CLAQGE
+ * 
*/ /* * File name: claqgs.c * History: Modified from LAPACK routine CLAQGE */ #include -#include "csp_defs.h" -#include "util.h" +#include "slu_cdefs.h" +/*! \brief + * + *
+ *   Purpose   
+ *   =======   
+ *
+ *   CLAQGS equilibrates a general sparse M by N matrix A using the row and   
+ *   scaling factors in the vectors R and C.   
+ *
+ *   See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ *   Arguments   
+ *   =========   
+ *
+ *   A       (input/output) SuperMatrix*
+ *           On exit, the equilibrated matrix.  See EQUED for the form of 
+ *           the equilibrated matrix. The type of A can be:
+ *	    Stype = NC; Dtype = SLU_C; Mtype = GE.
+ *	    
+ *   R       (input) float*, dimension (A->nrow)
+ *           The row scale factors for A.
+ *	    
+ *   C       (input) float*, dimension (A->ncol)
+ *           The column scale factors for A.
+ *	    
+ *   ROWCND  (input) float
+ *           Ratio of the smallest R(i) to the largest R(i).
+ *	    
+ *   COLCND  (input) float
+ *           Ratio of the smallest C(i) to the largest C(i).
+ *	    
+ *   AMAX    (input) float
+ *           Absolute value of largest matrix entry.
+ *	    
+ *   EQUED   (output) char*
+ *           Specifies the form of equilibration that was done.   
+ *           = 'N':  No equilibration   
+ *           = 'R':  Row equilibration, i.e., A has been premultiplied by  
+ *                   diag(R).   
+ *           = 'C':  Column equilibration, i.e., A has been postmultiplied  
+ *                   by diag(C).   
+ *           = 'B':  Both row and column equilibration, i.e., A has been
+ *                   replaced by diag(R) * A * diag(C).   
+ *
+ *   Internal Parameters   
+ *   ===================   
+ *
+ *   THRESH is a threshold value used to decide if row or column scaling   
+ *   should be done based on the ratio of the row or column scaling   
+ *   factors.  If ROWCND < THRESH, row scaling is done, and if   
+ *   COLCND < THRESH, column scaling is done.   
+ *
+ *   LARGE and SMALL are threshold values used to decide if row scaling   
+ *   should be done based on the absolute size of the largest matrix   
+ *   element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.   
+ *
+ *   ===================================================================== 
+ * 
+ */ + void claqgs(SuperMatrix *A, float *r, float *c, float rowcnd, float colcnd, float amax, char *equed) { -/* - Purpose - ======= - CLAQGS equilibrates a general sparse M by N matrix A using the row and - scaling factors in the vectors R and C. - See supermatrix.h for the definition of 'SuperMatrix' structure. - - Arguments - ========= - - A (input/output) SuperMatrix* - On exit, the equilibrated matrix. See EQUED for the form of - the equilibrated matrix. The type of A can be: - Stype = NC; Dtype = SLU_C; Mtype = GE. - - R (input) float*, dimension (A->nrow) - The row scale factors for A. - - C (input) float*, dimension (A->ncol) - The column scale factors for A. - - ROWCND (input) float - Ratio of the smallest R(i) to the largest R(i). - - COLCND (input) float - Ratio of the smallest C(i) to the largest C(i). - - AMAX (input) float - Absolute value of largest matrix entry. - - EQUED (output) char* - Specifies the form of equilibration that was done. - = 'N': No equilibration - = 'R': Row equilibration, i.e., A has been premultiplied by - diag(R). - = 'C': Column equilibration, i.e., A has been postmultiplied - by diag(C). - = 'B': Both row and column equilibration, i.e., A has been - replaced by diag(R) * A * diag(C). - - Internal Parameters - =================== - - THRESH is a threshold value used to decide if row or column scaling - should be done based on the ratio of the row or column scaling - factors. If ROWCND < THRESH, row scaling is done, and if - COLCND < THRESH, column scaling is done. - - LARGE and SMALL are threshold values used to decide if row scaling - should be done based on the absolute size of the largest matrix - element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. - - ===================================================================== -*/ - #define THRESH (0.1) /* Local variables */ Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cldperm.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cldperm.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cldperm.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,168 @@ + +/*! @file + * \brief Finds a row permutation so that the matrix has large entries on the diagonal + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
+ */ + +#include "slu_cdefs.h" + +extern void mc64id_(int_t*); +extern void mc64ad_(int_t*, int_t*, int_t*, int_t [], int_t [], double [], + int_t*, int_t [], int_t*, int_t[], int_t*, double [], + int_t [], int_t []); + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ *   CLDPERM finds a row permutation so that the matrix has large
+ *   entries on the diagonal.
+ *
+ * Arguments
+ * =========
+ *
+ * job    (input) int
+ *        Control the action. Possible values for JOB are:
+ *        = 1 : Compute a row permutation of the matrix so that the
+ *              permuted matrix has as many entries on its diagonal as
+ *              possible. The values on the diagonal are of arbitrary size.
+ *              HSL subroutine MC21A/AD is used for this.
+ *        = 2 : Compute a row permutation of the matrix so that the smallest 
+ *              value on the diagonal of the permuted matrix is maximized.
+ *        = 3 : Compute a row permutation of the matrix so that the smallest
+ *              value on the diagonal of the permuted matrix is maximized.
+ *              The algorithm differs from the one used for JOB = 2 and may
+ *              have quite a different performance.
+ *        = 4 : Compute a row permutation of the matrix so that the sum
+ *              of the diagonal entries of the permuted matrix is maximized.
+ *        = 5 : Compute a row permutation of the matrix so that the product
+ *              of the diagonal entries of the permuted matrix is maximized
+ *              and vectors to scale the matrix so that the nonzero diagonal 
+ *              entries of the permuted matrix are one in absolute value and 
+ *              all the off-diagonal entries are less than or equal to one in 
+ *              absolute value.
+ *        Restriction: 1 <= JOB <= 5.
+ *
+ * n      (input) int
+ *        The order of the matrix.
+ *
+ * nnz    (input) int
+ *        The number of nonzeros in the matrix.
+ *
+ * adjncy (input) int*, of size nnz
+ *        The adjacency structure of the matrix, which contains the row
+ *        indices of the nonzeros.
+ *
+ * colptr (input) int*, of size n+1
+ *        The pointers to the beginning of each column in ADJNCY.
+ *
+ * nzval  (input) complex*, of size nnz
+ *        The nonzero values of the matrix. nzval[k] is the value of
+ *        the entry corresponding to adjncy[k].
+ *        It is not used if job = 1.
+ *
+ * perm   (output) int*, of size n
+ *        The permutation vector. perm[i] = j means row i in the
+ *        original matrix is in row j of the permuted matrix.
+ *
+ * u      (output) double*, of size n
+ *        If job = 5, the natural logarithms of the row scaling factors. 
+ *
+ * v      (output) double*, of size n
+ *        If job = 5, the natural logarithms of the column scaling factors. 
+ *        The scaled matrix B has entries b_ij = a_ij * exp(u_i + v_j).
+ * 
+ */ + +int +cldperm(int_t job, int_t n, int_t nnz, int_t colptr[], int_t adjncy[], + complex nzval[], int_t *perm, float u[], float v[]) +{ + int_t i, liw, ldw, num; + int_t *iw, icntl[10], info[10]; + double *dw; + double *nzval_d = (double *) SUPERLU_MALLOC(nnz * sizeof(double)); + +#if ( DEBUGlevel>=1 ) + CHECK_MALLOC(0, "Enter cldperm()"); +#endif + liw = 5*n; + if ( job == 3 ) liw = 10*n + nnz; + if ( !(iw = intMalloc(liw)) ) ABORT("Malloc fails for iw[]"); + ldw = 3*n + nnz; + if ( !(dw = (double*) SUPERLU_MALLOC(ldw * sizeof(double))) ) + ABORT("Malloc fails for dw[]"); + + /* Increment one to get 1-based indexing. */ + for (i = 0; i <= n; ++i) ++colptr[i]; + for (i = 0; i < nnz; ++i) ++adjncy[i]; +#if ( DEBUGlevel>=2 ) + printf("LDPERM(): n %d, nnz %d\n", n, nnz); + slu_PrintInt10("colptr", n+1, colptr); + slu_PrintInt10("adjncy", nnz, adjncy); +#endif + + /* + * NOTE: + * ===== + * + * MC64AD assumes that column permutation vector is defined as: + * perm(i) = j means column i of permuted A is in column j of original A. + * + * Since a symmetric permutation preserves the diagonal entries. Then + * by the following relation: + * P'(A*P')P = P'A + * we can apply inverse(perm) to rows of A to get large diagonal entries. + * But, since 'perm' defined in MC64AD happens to be the reverse of + * SuperLU's definition of permutation vector, therefore, it is already + * an inverse for our purpose. We will thus use it directly. + * + */ + mc64id_(icntl); +#if 0 + /* Suppress error and warning messages. */ + icntl[0] = -1; + icntl[1] = -1; +#endif + + for (i = 0; i < nnz; ++i) nzval_d[i] = c_abs1(&nzval[i]); + mc64ad_(&job, &n, &nnz, colptr, adjncy, nzval_d, &num, perm, + &liw, iw, &ldw, dw, icntl, info); + +#if ( DEBUGlevel>=2 ) + slu_PrintInt10("perm", n, perm); + printf(".. After MC64AD info %d\tsize of matching %d\n", info[0], num); +#endif + if ( info[0] == 1 ) { /* Structurally singular */ + printf(".. The last %d permutations:\n", n-num); + slu_PrintInt10("perm", n-num, &perm[num]); + } + + /* Restore to 0-based indexing. */ + for (i = 0; i <= n; ++i) --colptr[i]; + for (i = 0; i < nnz; ++i) --adjncy[i]; + for (i = 0; i < n; ++i) --perm[i]; + + if ( job == 5 ) + for (i = 0; i < n; ++i) { + u[i] = dw[i]; + v[i] = dw[n+i]; + } + + SUPERLU_FREE(iw); + SUPERLU_FREE(dw); + SUPERLU_FREE(nzval_d); + +#if ( DEBUGlevel>=1 ) + CHECK_MALLOC(0, "Exit cldperm()"); +#endif + + return info[0]; +} Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cmemory.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cmemory.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cmemory.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,54 +1,32 @@ -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 +/*! @file cmemory.c + * \brief Memory details * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
*/ -#include "csp_defs.h" +#include "slu_cdefs.h" -/* Constants */ -#define NO_MEMTYPE 4 /* 0: lusup; - 1: ucol; - 2: lsub; - 3: usub */ -#define GluIntArray(n) (5 * (n) + 5) /* Internal prototypes */ void *cexpand (int *, MemType,int, int, GlobalLU_t *); -int cLUWorkInit (int, int, int, int **, complex **, LU_space_t); +int cLUWorkInit (int, int, int, int **, complex **, GlobalLU_t *); void copy_mem_complex (int, void *, void *); void cStackCompress (GlobalLU_t *); -void cSetupSpace (void *, int, LU_space_t *); -void *cuser_malloc (int, int); -void cuser_free (int, int); +void cSetupSpace (void *, int, GlobalLU_t *); +void *cuser_malloc (int, int, GlobalLU_t *); +void cuser_free (int, int, GlobalLU_t *); -/* External prototypes (in memory.c - prec-indep) */ +/* External prototypes (in memory.c - prec-independent) */ extern void copy_mem_int (int, void *, void *); extern void user_bcopy (char *, char *, int); -/* Headers for 4 types of dynamatically managed memory */ -typedef struct e_node { - int size; /* length of the memory that has been used */ - void *mem; /* pointer to the new malloc'd store */ -} ExpHeader; -typedef struct { - int size; - int used; - int top1; /* grow upward, relative to &array[0] */ - int top2; /* grow downward */ - void *array; -} LU_stack_t; - -/* Variables local to this file */ -static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */ -static LU_stack_t stack; -static int no_expand; - /* Macros to manipulate stack */ -#define StackFull(x) ( x + stack.used >= stack.size ) +#define StackFull(x) ( x + Glu->stack.used >= Glu->stack.size ) #define NotDoubleAlign(addr) ( (long int)addr & 7 ) #define DoubleAlign(addr) ( ((long int)addr + 7) & ~7L ) #define TempSpace(m, w) ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \ @@ -58,66 +36,67 @@ -/* - * Setup the memory model to be used for factorization. +/*! \brief Setup the memory model to be used for factorization. + * * lwork = 0: use system malloc; * lwork > 0: use user-supplied work[] space. */ -void cSetupSpace(void *work, int lwork, LU_space_t *MemModel) +void cSetupSpace(void *work, int lwork, GlobalLU_t *Glu) { if ( lwork == 0 ) { - *MemModel = SYSTEM; /* malloc/free */ + Glu->MemModel = SYSTEM; /* malloc/free */ } else if ( lwork > 0 ) { - *MemModel = USER; /* user provided space */ - stack.used = 0; - stack.top1 = 0; - stack.top2 = (lwork/4)*4; /* must be word addressable */ - stack.size = stack.top2; - stack.array = (void *) work; + Glu->MemModel = USER; /* user provided space */ + Glu->stack.used = 0; + Glu->stack.top1 = 0; + Glu->stack.top2 = (lwork/4)*4; /* must be word addressable */ + Glu->stack.size = Glu->stack.top2; + Glu->stack.array = (void *) work; } } -void *cuser_malloc(int bytes, int which_end) +void *cuser_malloc(int bytes, int which_end, GlobalLU_t *Glu) { void *buf; if ( StackFull(bytes) ) return (NULL); if ( which_end == HEAD ) { - buf = (char*) stack.array + stack.top1; - stack.top1 += bytes; + buf = (char*) Glu->stack.array + Glu->stack.top1; + Glu->stack.top1 += bytes; } else { - stack.top2 -= bytes; - buf = (char*) stack.array + stack.top2; + Glu->stack.top2 -= bytes; + buf = (char*) Glu->stack.array + Glu->stack.top2; } - stack.used += bytes; + Glu->stack.used += bytes; return buf; } -void cuser_free(int bytes, int which_end) +void cuser_free(int bytes, int which_end, GlobalLU_t *Glu) { if ( which_end == HEAD ) { - stack.top1 -= bytes; + Glu->stack.top1 -= bytes; } else { - stack.top2 += bytes; + Glu->stack.top2 += bytes; } - stack.used -= bytes; + Glu->stack.used -= bytes; } -/* +/*! \brief + * + *
  * mem_usage consists of the following fields:
  *    - for_lu (float)
  *      The amount of space used in bytes for the L\U data structures.
  *    - total_needed (float)
  *      The amount of space needed in bytes to perform factorization.
- *    - expansions (int)
- *      Number of memory expansions during the LU factorization.
+ * 
*/ int cQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage) { @@ -132,33 +111,75 @@ dword = sizeof(complex); /* For LU factors */ - mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] * - dword + Lstore->rowind_colptr[n] * iword ); - mem_usage->for_lu += (float)( (n + 1) * iword + + mem_usage->for_lu = (float)( (4.0*n + 3.0) * iword + + Lstore->nzval_colptr[n] * dword + + Lstore->rowind_colptr[n] * iword ); + mem_usage->for_lu += (float)( (n + 1.0) * iword + Ustore->colptr[n] * (dword + iword) ); /* Working storage to support factorization */ mem_usage->total_needed = mem_usage->for_lu + - (float)( (2 * panel_size + 4 + NO_MARKER) * n * iword + - (panel_size + 1) * n * dword ); + (float)( (2.0 * panel_size + 4.0 + NO_MARKER) * n * iword + + (panel_size + 1.0) * n * dword ); - mem_usage->expansions = --no_expand; - return 0; } /* cQuerySpace */ -/* - * Allocate storage for the data structures common to all factor routines. - * For those unpredictable size, make a guess as FILL * nnz(A). + +/*! \brief + * + *
+ * mem_usage consists of the following fields:
+ *    - for_lu (float)
+ *      The amount of space used in bytes for the L\U data structures.
+ *    - total_needed (float)
+ *      The amount of space needed in bytes to perform factorization.
+ * 
+ */ +int ilu_cQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage) +{ + SCformat *Lstore; + NCformat *Ustore; + register int n, panel_size = sp_ienv(1); + register float iword, dword; + + Lstore = L->Store; + Ustore = U->Store; + n = L->ncol; + iword = sizeof(int); + dword = sizeof(double); + + /* For LU factors */ + mem_usage->for_lu = (float)( (4.0f * n + 3.0f) * iword + + Lstore->nzval_colptr[n] * dword + + Lstore->rowind_colptr[n] * iword ); + mem_usage->for_lu += (float)( (n + 1.0f) * iword + + Ustore->colptr[n] * (dword + iword) ); + + /* Working storage to support factorization. + ILU needs 5*n more integers than LU */ + mem_usage->total_needed = mem_usage->for_lu + + (float)( (2.0f * panel_size + 9.0f + NO_MARKER) * n * iword + + (panel_size + 1.0f) * n * dword ); + + return 0; +} /* ilu_cQuerySpace */ + + +/*! \brief Allocate storage for the data structures common to all factor routines. + * + *
+ * For those unpredictable size, estimate as fill_ratio * nnz(A).
  * Return value:
  *     If lwork = -1, return the estimated amount of space required, plus n;
  *     otherwise, return the amount of space actually allocated when
  *     memory allocation failure occurred.
+ * 
*/ int cLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz, - int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu, - int **iwork, complex **dwork) + int panel_size, float fill_ratio, SuperMatrix *L, SuperMatrix *U, + GlobalLU_t *Glu, int **iwork, complex **dwork) { int info, iword, dword; SCformat *Lstore; @@ -170,32 +191,33 @@ complex *ucol; int *usub, *xusub; int nzlmax, nzumax, nzlumax; - int FILL = sp_ienv(6); - Glu->n = n; - no_expand = 0; iword = sizeof(int); dword = sizeof(complex); + Glu->n = n; + Glu->num_expansions = 0; - if ( !expanders ) - expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader)); - if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders"); + if ( !Glu->expanders ) + Glu->expanders = (ExpHeader*)SUPERLU_MALLOC( NO_MEMTYPE * + sizeof(ExpHeader) ); + if ( !Glu->expanders ) ABORT("SUPERLU_MALLOC fails for expanders"); if ( fact != SamePattern_SameRowPerm ) { /* Guess for L\U factors */ - nzumax = nzlumax = FILL * annz; - nzlmax = SUPERLU_MAX(1, FILL/4.) * annz; + nzumax = nzlumax = fill_ratio * annz; + nzlmax = SUPERLU_MAX(1, fill_ratio/4.) * annz; if ( lwork == -1 ) { return ( GluIntArray(n) * iword + TempSpace(m, panel_size) + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n ); } else { - cSetupSpace(work, lwork, &Glu->MemModel); + cSetupSpace(work, lwork, Glu); } -#ifdef DEBUG - printf("cLUMemInit() called: annz %d, MemModel %d\n", - annz, Glu->MemModel); +#if ( PRNTlevel >= 1 ) + printf("cLUMemInit() called: fill_ratio %ld, nzlmax %ld, nzumax %ld\n", + fill_ratio, nzlmax, nzumax); + fflush(stdout); #endif /* Integer pointers for L\U factors */ @@ -206,11 +228,11 @@ xlusup = intMalloc(n+1); xusub = intMalloc(n+1); } else { - xsup = (int *)cuser_malloc((n+1) * iword, HEAD); - supno = (int *)cuser_malloc((n+1) * iword, HEAD); - xlsub = (int *)cuser_malloc((n+1) * iword, HEAD); - xlusup = (int *)cuser_malloc((n+1) * iword, HEAD); - xusub = (int *)cuser_malloc((n+1) * iword, HEAD); + xsup = (int *)cuser_malloc((n+1) * iword, HEAD, Glu); + supno = (int *)cuser_malloc((n+1) * iword, HEAD, Glu); + xlsub = (int *)cuser_malloc((n+1) * iword, HEAD, Glu); + xlusup = (int *)cuser_malloc((n+1) * iword, HEAD, Glu); + xusub = (int *)cuser_malloc((n+1) * iword, HEAD, Glu); } lusup = (complex *) cexpand( &nzlumax, LUSUP, 0, 0, Glu ); @@ -225,7 +247,8 @@ SUPERLU_FREE(lsub); SUPERLU_FREE(usub); } else { - cuser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD); + cuser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, + HEAD, Glu); } nzlumax /= 2; nzumax /= 2; @@ -234,6 +257,11 @@ printf("Not enough memory to perform factorization.\n"); return (cmemory_usage(nzlmax, nzumax, nzlumax, n) + n); } +#if ( PRNTlevel >= 1) + printf("cLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", + nzlmax, nzumax); + fflush(stdout); +#endif lusup = (complex *) cexpand( &nzlumax, LUSUP, 0, 0, Glu ); ucol = (complex *) cexpand( &nzumax, UCOL, 0, 0, Glu ); lsub = (int *) cexpand( &nzlmax, LSUB, 0, 0, Glu ); @@ -260,18 +288,18 @@ Glu->MemModel = SYSTEM; } else { Glu->MemModel = USER; - stack.top2 = (lwork/4)*4; /* must be word-addressable */ - stack.size = stack.top2; + Glu->stack.top2 = (lwork/4)*4; /* must be word-addressable */ + Glu->stack.size = Glu->stack.top2; } - lsub = expanders[LSUB].mem = Lstore->rowind; - lusup = expanders[LUSUP].mem = Lstore->nzval; - usub = expanders[USUB].mem = Ustore->rowind; - ucol = expanders[UCOL].mem = Ustore->nzval;; - expanders[LSUB].size = nzlmax; - expanders[LUSUP].size = nzlumax; - expanders[USUB].size = nzumax; - expanders[UCOL].size = nzumax; + lsub = Glu->expanders[LSUB].mem = Lstore->rowind; + lusup = Glu->expanders[LUSUP].mem = Lstore->nzval; + usub = Glu->expanders[USUB].mem = Ustore->rowind; + ucol = Glu->expanders[UCOL].mem = Ustore->nzval;; + Glu->expanders[LSUB].size = nzlmax; + Glu->expanders[LUSUP].size = nzlumax; + Glu->expanders[USUB].size = nzumax; + Glu->expanders[UCOL].size = nzumax; } Glu->xsup = xsup; @@ -287,20 +315,20 @@ Glu->nzumax = nzumax; Glu->nzlumax = nzlumax; - info = cLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel); + info = cLUWorkInit(m, n, panel_size, iwork, dwork, Glu); if ( info ) return ( info + cmemory_usage(nzlmax, nzumax, nzlumax, n) + n); - ++no_expand; + ++Glu->num_expansions; return 0; } /* cLUMemInit */ -/* Allocate known working storage. Returns 0 if success, otherwise +/*! \brief Allocate known working storage. Returns 0 if success, otherwise returns the number of bytes allocated so far when failure occurred. */ int cLUWorkInit(int m, int n, int panel_size, int **iworkptr, - complex **dworkptr, LU_space_t MemModel) + complex **dworkptr, GlobalLU_t *Glu) { int isize, dsize, extra; complex *old_ptr; @@ -311,19 +339,19 @@ dsize = (m * panel_size + NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(complex); - if ( MemModel == SYSTEM ) + if ( Glu->MemModel == SYSTEM ) *iworkptr = (int *) intCalloc(isize/sizeof(int)); else - *iworkptr = (int *) cuser_malloc(isize, TAIL); + *iworkptr = (int *) cuser_malloc(isize, TAIL, Glu); if ( ! *iworkptr ) { fprintf(stderr, "cLUWorkInit: malloc fails for local iworkptr[]\n"); return (isize + n); } - if ( MemModel == SYSTEM ) + if ( Glu->MemModel == SYSTEM ) *dworkptr = (complex *) SUPERLU_MALLOC(dsize); else { - *dworkptr = (complex *) cuser_malloc(dsize, TAIL); + *dworkptr = (complex *) cuser_malloc(dsize, TAIL, Glu); if ( NotDoubleAlign(*dworkptr) ) { old_ptr = *dworkptr; *dworkptr = (complex*) DoubleAlign(*dworkptr); @@ -332,8 +360,8 @@ #ifdef DEBUG printf("cLUWorkInit: not aligned, extra %d\n", extra); #endif - stack.top2 -= extra; - stack.used += extra; + Glu->stack.top2 -= extra; + Glu->stack.used += extra; } } if ( ! *dworkptr ) { @@ -345,8 +373,7 @@ } -/* - * Set up pointers for real working arrays. +/*! \brief Set up pointers for real working arrays. */ void cSetRWork(int m, int panel_size, complex *dworkptr, @@ -362,8 +389,7 @@ cfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero); } -/* - * Free the working storage used by factor routines. +/*! \brief Free the working storage used by factor routines. */ void cLUWorkFree(int *iwork, complex *dwork, GlobalLU_t *Glu) { @@ -371,18 +397,21 @@ SUPERLU_FREE (iwork); SUPERLU_FREE (dwork); } else { - stack.used -= (stack.size - stack.top2); - stack.top2 = stack.size; + Glu->stack.used -= (Glu->stack.size - Glu->stack.top2); + Glu->stack.top2 = Glu->stack.size; /* cStackCompress(Glu); */ } - SUPERLU_FREE (expanders); - expanders = 0; + SUPERLU_FREE (Glu->expanders); + Glu->expanders = NULL; } -/* Expand the data structures for L and U during the factorization. +/*! \brief Expand the data structures for L and U during the factorization. + * + *
  * Return value:   0 - successful return
  *               > 0 - number of bytes allocated when run out of space
+ * 
*/ int cLUMemXpand(int jcol, @@ -446,8 +475,7 @@ for (i = 0; i < howmany; i++) dnew[i] = dold[i]; } -/* - * Expand the existing storage to accommodate more fill-ins. +/*! \brief Expand the existing storage to accommodate more fill-ins. */ void *cexpand ( @@ -463,12 +491,14 @@ float alpha; void *new_mem, *old_mem; int new_len, tries, lword, extra, bytes_to_copy; + ExpHeader *expanders = Glu->expanders; /* Array of 4 types of memory */ alpha = EXPAND; - if ( no_expand == 0 || keep_prev ) /* First time allocate requested */ + if ( Glu->num_expansions == 0 || keep_prev ) { + /* First time allocate requested */ new_len = *prev_len; - else { + } else { new_len = alpha * *prev_len; } @@ -476,9 +506,8 @@ else lword = sizeof(complex); if ( Glu->MemModel == SYSTEM ) { - new_mem = (void *) SUPERLU_MALLOC(new_len * lword); -/* new_mem = (void *) calloc(new_len, lword); */ - if ( no_expand != 0 ) { + new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword); + if ( Glu->num_expansions != 0 ) { tries = 0; if ( keep_prev ) { if ( !new_mem ) return (NULL); @@ -487,8 +516,7 @@ if ( ++tries > 10 ) return (NULL); alpha = Reduce(alpha); new_len = alpha * *prev_len; - new_mem = (void *) SUPERLU_MALLOC(new_len * lword); -/* new_mem = (void *) calloc(new_len, lword); */ + new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword); } } if ( type == LSUB || type == USUB ) { @@ -501,8 +529,8 @@ expanders[type].mem = (void *) new_mem; } else { /* MemModel == USER */ - if ( no_expand == 0 ) { - new_mem = cuser_malloc(new_len * lword, HEAD); + if ( Glu->num_expansions == 0 ) { + new_mem = cuser_malloc(new_len * lword, HEAD, Glu); if ( NotDoubleAlign(new_mem) && (type == LUSUP || type == UCOL) ) { old_mem = new_mem; @@ -511,12 +539,11 @@ #ifdef DEBUG printf("expand(): not aligned, extra %d\n", extra); #endif - stack.top1 += extra; - stack.used += extra; + Glu->stack.top1 += extra; + Glu->stack.used += extra; } expanders[type].mem = (void *) new_mem; - } - else { + } else { tries = 0; extra = (new_len - *prev_len) * lword; if ( keep_prev ) { @@ -532,7 +559,7 @@ if ( type != USUB ) { new_mem = (void*)((char*)expanders[type + 1].mem + extra); - bytes_to_copy = (char*)stack.array + stack.top1 + bytes_to_copy = (char*)Glu->stack.array + Glu->stack.top1 - (char*)expanders[type + 1].mem; user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy); @@ -548,11 +575,11 @@ Glu->ucol = expanders[UCOL].mem = (void*)((char*)expanders[UCOL].mem + extra); } - stack.top1 += extra; - stack.used += extra; + Glu->stack.top1 += extra; + Glu->stack.used += extra; if ( type == UCOL ) { - stack.top1 += extra; /* Add same amount for USUB */ - stack.used += extra; + Glu->stack.top1 += extra; /* Add same amount for USUB */ + Glu->stack.used += extra; } } /* if ... */ @@ -562,15 +589,14 @@ expanders[type].size = new_len; *prev_len = new_len; - if ( no_expand ) ++no_expand; + if ( Glu->num_expansions ) ++Glu->num_expansions; return (void *) expanders[type].mem; } /* cexpand */ -/* - * Compress the work[] array to remove fragmentation. +/*! \brief Compress the work[] array to remove fragmentation. */ void cStackCompress(GlobalLU_t *Glu) @@ -610,9 +636,9 @@ usub = ito; last = (char*)usub + xusub[ndim] * iword; - fragment = (char*) (((char*)stack.array + stack.top1) - last); - stack.used -= (long int) fragment; - stack.top1 -= (long int) fragment; + fragment = (char*) (((char*)Glu->stack.array + Glu->stack.top1) - last); + Glu->stack.used -= (long int) fragment; + Glu->stack.top1 -= (long int) fragment; Glu->ucol = ucol; Glu->lsub = lsub; @@ -626,8 +652,7 @@ } -/* - * Allocate storage for original matrix A +/*! \brief Allocate storage for original matrix A */ void callocateA(int n, int nnz, complex **a, int **asub, int **xa) @@ -641,7 +666,7 @@ complex *complexMalloc(int n) { complex *buf; - buf = (complex *) SUPERLU_MALLOC(n * sizeof(complex)); + buf = (complex *) SUPERLU_MALLOC((size_t)n * sizeof(complex)); if ( !buf ) { ABORT("SUPERLU_MALLOC failed for buf in complexMalloc()\n"); } @@ -653,7 +678,7 @@ complex *buf; register int i; complex zero = {0.0, 0.0}; - buf = (complex *) SUPERLU_MALLOC(n * sizeof(complex)); + buf = (complex *) SUPERLU_MALLOC((size_t)n * sizeof(complex)); if ( !buf ) { ABORT("SUPERLU_MALLOC failed for buf in complexCalloc()\n"); } Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/colamd.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/colamd.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/colamd.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,10 +1,20 @@ -/* ========================================================================== */ -/* === colamd - a sparse matrix column ordering algorithm =================== */ -/* ========================================================================== */ +/*! @file colamd.c + *\brief A sparse matrix column ordering algorithm + +
+    ========================================================================== 
+    === colamd/symamd - a sparse matrix column ordering algorithm ============ 
+    ========================================================================== 
 
-/*
-    colamd:  An approximate minimum degree column ordering algorithm.
 
+    colamd:  an approximate minimum degree column ordering algorithm,
+    	for LU factorization of symmetric or unsymmetric matrices,
+	QR factorization, least squares, interior point methods for
+	linear programming problems, and other related problems.
+
+    symamd:  an approximate minimum degree ordering algorithm for Cholesky
+    	factorization of symmetric matrices.
+
     Purpose:
 
 	Colamd computes a permutation Q such that the Cholesky factorization of
@@ -14,13 +24,17 @@
 	factorization, and P is computed during numerical factorization via
 	conventional partial pivoting with row interchanges.  Colamd is the
 	column ordering method used in SuperLU, part of the ScaLAPACK library.
-	It is also available as user-contributed software for Matlab 5.2,
+	It is also available as built-in function in MATLAB Version 6,
 	available from MathWorks, Inc. (http://www.mathworks.com).  This
-	routine can be used in place of COLMMD in Matlab.  By default, the \
-	and / operators in Matlab perform a column ordering (using COLMMD)
-	prior to LU factorization using sparse partial pivoting, in the
-	built-in Matlab LU(A) routine.
+	routine can be used in place of colmmd in MATLAB.
 
+    	Symamd computes a permutation P of a symmetric matrix A such that the
+	Cholesky factorization of PAP' has less fill-in and requires fewer
+	floating point operations than A.  Symamd constructs a matrix M such
+	that M'M has the same nonzero pattern of A, and then orders the columns
+	of M using colmmd.  The column ordering of M is then returned as the
+	row and column ordering P of A. 
+
     Authors:
 
 	The authors of the code itself are Stefan I. Larimore and Timothy A.
@@ -30,112 +44,124 @@
 
     Date:
 
-	August 3, 1998.  Version 1.0.
+	September 8, 2003.  Version 2.3.
 
     Acknowledgements:
 
 	This work was supported by the National Science Foundation, under
 	grants DMS-9504974 and DMS-9803599.
 
-    Notice:
+    Copyright and License:
 
-	Copyright (c) 1998 by the University of Florida.  All Rights Reserved.
+	Copyright (c) 1998-2003 by the University of Florida.
+	All Rights Reserved.
 
 	THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
 	EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 
-	Permission is hereby granted to use or copy this program for any
-	purpose, provided the above notices are retained on all copies.
-	User documentation of any code that uses this code must cite the
-	Authors, the Copyright, and "Used by permission."  If this code is
-	accessible from within Matlab, then typing "help colamd" or "colamd"
-	(with no arguments) must cite the Authors.  Permission to modify the
-	code and to distribute modified code is granted, provided the above
-	notices are retained, and a notice that the code was modified is
-	included with the above copyright notice.  You must also retain the
-	Availability information below, of the original version.
+	Permission is hereby granted to use, copy, modify, and/or distribute
+	this program, provided that the Copyright, this License, and the
+	Availability of the original version is retained on all copies and made
+	accessible to the end-user of any code or package that includes COLAMD
+	or any modified version of COLAMD. 
 
-	This software is provided free of charge.
-
     Availability:
 
-	This file is located at
+	The colamd/symamd library is available at
 
-		http://www.cise.ufl.edu/~davis/colamd/colamd.c
+	    http://www.cise.ufl.edu/research/sparse/colamd/
 
-	The colamd.h file is required, located in the same directory.
-	The colamdmex.c file provides a Matlab interface for colamd.
-	The symamdmex.c file provides a Matlab interface for symamd, which is
-	a symmetric ordering based on this code, colamd.c.  All codes are
-	purely ANSI C compliant (they use no Unix-specific routines, include
-	files, etc.).
-*/
+	This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.c
+	file.  It requires the colamd.h file.  It is required by the colamdmex.c
+	and symamdmex.c files, for the MATLAB interface to colamd and symamd.
 
-/* ========================================================================== */
-/* === Description of user-callable routines ================================ */
-/* ========================================================================== */
+    See the ChangeLog file for changes since Version 1.0.
 
-/*
-    Each user-callable routine (declared as PUBLIC) is briefly described below.
-    Refer to the comments preceding each routine for more details.
+    ========================================================================== 
+    === Description of user-callable routines ================================ 
+    ========================================================================== 
 
+
     ----------------------------------------------------------------------------
     colamd_recommended:
     ----------------------------------------------------------------------------
 
-	Usage:
+	C syntax:
 
-	    Alen = colamd_recommended (nnz, n_row, n_col) ;
+	    #include "colamd.h"
+	    int colamd_recommended (int nnz, int n_row, int n_col) ;
 
+	    or as a C macro
+
+	    #include "colamd.h"
+	    Alen = COLAMD_RECOMMENDED (int nnz, int n_row, int n_col) ;
+
 	Purpose:
 
 	    Returns recommended value of Alen for use by colamd.  Returns -1
-	    if any input argument is negative.
+	    if any input argument is negative.  The use of this routine
+	    or macro is optional.  Note that the macro uses its arguments
+	    more than once, so be careful for side effects, if you pass
+	    expressions as arguments to COLAMD_RECOMMENDED.  Not needed for
+	    symamd, which dynamically allocates its own memory.
 
-	Arguments:
+	Arguments (all input arguments):
 
 	    int nnz ;		Number of nonzeros in the matrix A.  This must
 				be the same value as p [n_col] in the call to
 				colamd - otherwise you will get a wrong value
 				of the recommended memory to use.
+
 	    int n_row ;		Number of rows in the matrix A.
+
 	    int n_col ;		Number of columns in the matrix A.
 
     ----------------------------------------------------------------------------
     colamd_set_defaults:
     ----------------------------------------------------------------------------
 
-	Usage:
+	C syntax:
 
-	    colamd_set_defaults (knobs) ;
+	    #include "colamd.h"
+	    colamd_set_defaults (double knobs [COLAMD_KNOBS]) ;
 
 	Purpose:
 
-	    Sets the default parameters.
+	    Sets the default parameters.  The use of this routine is optional.
 
 	Arguments:
 
 	    double knobs [COLAMD_KNOBS] ;	Output only.
 
-		Rows with more than (knobs [COLAMD_DENSE_ROW] * n_col) entries
-		are removed prior to ordering.  Columns with more than
-		(knobs [COLAMD_DENSE_COL] * n_row) entries are removed
-		prior to ordering, and placed last in the output column
-		ordering.  Default values of these two knobs are both 0.5.
-		Currently, only knobs [0] and knobs [1] are used, but future
-		versions may use more knobs.  If so, they will be properly set
-		to their defaults by the future version of colamd_set_defaults,
-		so that the code that calls colamd will not need to change,
-		assuming that you either use colamd_set_defaults, or pass a
-		(double *) NULL pointer as the knobs array to colamd.
+		Colamd: rows with more than (knobs [COLAMD_DENSE_ROW] * n_col)
+		entries are removed prior to ordering.  Columns with more than
+		(knobs [COLAMD_DENSE_COL] * n_row) entries are removed prior to
+		ordering, and placed last in the output column ordering. 
 
+		Symamd: uses only knobs [COLAMD_DENSE_ROW], which is knobs [0].
+		Rows and columns with more than (knobs [COLAMD_DENSE_ROW] * n)
+		entries are removed prior to ordering, and placed last in the
+		output ordering.
+
+		COLAMD_DENSE_ROW and COLAMD_DENSE_COL are defined as 0 and 1,
+		respectively, in colamd.h.  Default values of these two knobs
+		are both 0.5.  Currently, only knobs [0] and knobs [1] are
+		used, but future versions may use more knobs.  If so, they will
+		be properly set to their defaults by the future version of
+		colamd_set_defaults, so that the code that calls colamd will
+		not need to change, assuming that you either use
+		colamd_set_defaults, or pass a (double *) NULL pointer as the
+		knobs array to colamd or symamd.
+
     ----------------------------------------------------------------------------
     colamd:
     ----------------------------------------------------------------------------
 
-	Usage:
+	C syntax:
 
-	    colamd (n_row, n_col, Alen, A, p, knobs) ;
+	    #include "colamd.h"
+	    int colamd (int n_row, int n_col, int Alen, int *A, int *p,
+	    	double knobs [COLAMD_KNOBS], int stats [COLAMD_STATS]) ;
 
 	Purpose:
 
@@ -143,34 +169,44 @@
 	    (AQ)'AQ=LL' have less fill-in and require fewer floating point
 	    operations than factorizing the unpermuted matrix A or A'A,
 	    respectively.
+	    
+	Returns:
 
+	    TRUE (1) if successful, FALSE (0) otherwise.
+
 	Arguments:
 
-	    int n_row ;
+	    int n_row ;		Input argument.
 
 		Number of rows in the matrix A.
 		Restriction:  n_row >= 0.
 		Colamd returns FALSE if n_row is negative.
 
-	    int n_col ;
+	    int n_col ;		Input argument.
 
 		Number of columns in the matrix A.
 		Restriction:  n_col >= 0.
 		Colamd returns FALSE if n_col is negative.
 
-	    int Alen ;
+	    int Alen ;		Input argument.
 
 		Restriction (see note):
-		Alen >= 2*nnz + 6*(n_col+1) + 4*(n_row+1) + n_col + COLAMD_STATS
+		Alen >= 2*nnz + 6*(n_col+1) + 4*(n_row+1) + n_col
 		Colamd returns FALSE if these conditions are not met.
 
 		Note:  this restriction makes an modest assumption regarding
-		the size of the two typedef'd structures, below.  We do,
-		however, guarantee that
-		Alen >= colamd_recommended (nnz, n_row, n_col)
+		the size of the two typedef's structures in colamd.h.
+		We do, however, guarantee that
+
+			Alen >= colamd_recommended (nnz, n_row, n_col)
+		
+		or equivalently as a C preprocessor macro: 
+
+			Alen >= COLAMD_RECOMMENDED (nnz, n_row, n_col)
+
 		will be sufficient.
 
-	    int A [Alen] ;	Input argument, stats on output.
+	    int A [Alen] ;	Input argument, undefined on output.
 
 		A is an integer array of size Alen.  Alen must be at least as
 		large as the bare minimum value given above, but this is very
@@ -191,21 +227,8 @@
 		n_row-1, and columns are in the range 0 to n_col-1.  Colamd
 		returns FALSE if any row index is out of range.
 
-		The contents of A are modified during ordering, and are thus
-		undefined on output with the exception of a few statistics
-		about the ordering (A [0..COLAMD_STATS-1]):
-		A [0]:  number of dense or empty rows ignored.
-		A [1]:  number of dense or empty columns ignored (and ordered
-			last in the output permutation p)
-		A [2]:  number of garbage collections performed.
-		A [3]:  0, if all row indices in each column were in sorted
-			  order, and no duplicates were present.
-			1, otherwise (in which case colamd had to do more work)
-		Note that a row can become "empty" if it contains only
-		"dense" and/or "empty" columns, and similarly a column can
-		become "empty" if it only contains "dense" and/or "empty" rows.
-		Future versions may return more statistics in A, but the usage
-		of these 4 entries in A will remain unchanged.
+		The contents of A are modified during ordering, and are
+		undefined on output.
 
 	    int p [n_col+1] ;	Both input and output argument.
 
@@ -227,26 +250,335 @@
 		If colamd returns FALSE, then no permutation is returned, and
 		p is undefined on output.
 
-	    double knobs [COLAMD_KNOBS] ;	Input only.
+	    double knobs [COLAMD_KNOBS] ;	Input argument.
 
-		See colamd_set_defaults for a description.  If the knobs array
-		is not present (that is, if a (double *) NULL pointer is passed
-		in its place), then the default values of the parameters are
-		used instead.
+		See colamd_set_defaults for a description.
 
-*/
+	    int stats [COLAMD_STATS] ;		Output argument.
 
+		Statistics on the ordering, and error status.
+		See colamd.h for related definitions.
+		Colamd returns FALSE if stats is not present.
 
-/* ========================================================================== */
-/* === Include files ======================================================== */
-/* ========================================================================== */
+		stats [0]:  number of dense or empty rows ignored.
 
-/* limits.h:  the largest positive integer (INT_MAX) */
-#include 
+		stats [1]:  number of dense or empty columns ignored (and
+				ordered last in the output permutation p)
+				Note that a row can become "empty" if it
+				contains only "dense" and/or "empty" columns,
+				and similarly a column can become "empty" if it
+				only contains "dense" and/or "empty" rows.
 
-/* colamd.h:  knob array size, stats output size, and global prototypes */
-#include "colamd.h"
+		stats [2]:  number of garbage collections performed.
+				This can be excessively high if Alen is close
+				to the minimum required value.
 
+		stats [3]:  status code.  < 0 is an error code.
+			    > 1 is a warning or notice.
+
+			0	OK.  Each column of the input matrix contained
+				row indices in increasing order, with no
+				duplicates.
+
+			1	OK, but columns of input matrix were jumbled
+				(unsorted columns or duplicate entries).  Colamd
+				had to do some extra work to sort the matrix
+				first and remove duplicate entries, but it
+				still was able to return a valid permutation
+				(return value of colamd was TRUE).
+
+					stats [4]: highest numbered column that
+						is unsorted or has duplicate
+						entries.
+					stats [5]: last seen duplicate or
+						unsorted row index.
+					stats [6]: number of duplicate or
+						unsorted row indices.
+
+			-1	A is a null pointer
+
+			-2	p is a null pointer
+
+			-3 	n_row is negative
+
+					stats [4]: n_row
+
+			-4	n_col is negative
+
+					stats [4]: n_col
+
+			-5	number of nonzeros in matrix is negative
+
+					stats [4]: number of nonzeros, p [n_col]
+
+			-6	p [0] is nonzero
+
+					stats [4]: p [0]
+
+			-7	A is too small
+
+					stats [4]: required size
+					stats [5]: actual size (Alen)
+
+			-8	a column has a negative number of entries
+
+					stats [4]: column with < 0 entries
+					stats [5]: number of entries in col
+
+			-9	a row index is out of bounds
+
+					stats [4]: column with bad row index
+					stats [5]: bad row index
+					stats [6]: n_row, # of rows of matrx
+
+			-10	(unused; see symamd.c)
+
+			-999	(unused; see symamd.c)
+
+		Future versions may return more statistics in the stats array.
+
+	Example:
+	
+	    See http://www.cise.ufl.edu/research/sparse/colamd/example.c
+	    for a complete example.
+
+	    To order the columns of a 5-by-4 matrix with 11 nonzero entries in
+	    the following nonzero pattern
+
+	    	x 0 x 0
+		x 0 x x
+		0 x x 0
+		0 0 x x
+		x x 0 0
+
+	    with default knobs and no output statistics, do the following:
+
+		#include "colamd.h"
+		#define ALEN COLAMD_RECOMMENDED (11, 5, 4)
+		int A [ALEN] = {1, 2, 5, 3, 5, 1, 2, 3, 4, 2, 4} ;
+		int p [ ] = {0, 3, 5, 9, 11} ;
+		int stats [COLAMD_STATS] ;
+		colamd (5, 4, ALEN, A, p, (double *) NULL, stats) ;
+
+	    The permutation is returned in the array p, and A is destroyed.
+
+    ----------------------------------------------------------------------------
+    symamd:
+    ----------------------------------------------------------------------------
+
+	C syntax:
+
+	    #include "colamd.h"
+	    int symamd (int n, int *A, int *p, int *perm,
+	    	double knobs [COLAMD_KNOBS], int stats [COLAMD_STATS],
+		void (*allocate) (size_t, size_t), void (*release) (void *)) ;
+
+	Purpose:
+
+    	    The symamd routine computes an ordering P of a symmetric sparse
+	    matrix A such that the Cholesky factorization PAP' = LL' remains
+	    sparse.  It is based on a column ordering of a matrix M constructed
+	    so that the nonzero pattern of M'M is the same as A.  The matrix A
+	    is assumed to be symmetric; only the strictly lower triangular part
+	    is accessed.  You must pass your selected memory allocator (usually
+	    calloc/free or mxCalloc/mxFree) to symamd, for it to allocate
+	    memory for the temporary matrix M.
+
+	Returns:
+
+	    TRUE (1) if successful, FALSE (0) otherwise.
+
+	Arguments:
+
+	    int n ;		Input argument.
+
+	    	Number of rows and columns in the symmetrix matrix A.
+		Restriction:  n >= 0.
+		Symamd returns FALSE if n is negative.
+
+	    int A [nnz] ;	Input argument.
+
+	    	A is an integer array of size nnz, where nnz = p [n].
+		
+		The row indices of the entries in column c of the matrix are
+		held in A [(p [c]) ... (p [c+1]-1)].  The row indices in a
+		given column c need not be in ascending order, and duplicate
+		row indices may be present.  However, symamd will run faster
+		if the columns are in sorted order with no duplicate entries. 
+
+		The matrix is 0-based.  That is, rows are in the range 0 to
+		n-1, and columns are in the range 0 to n-1.  Symamd
+		returns FALSE if any row index is out of range.
+
+		The contents of A are not modified.
+
+	    int p [n+1] ;   	Input argument.
+
+		p is an integer array of size n+1.  On input, it holds the
+		"pointers" for the column form of the matrix A.  Column c of
+		the matrix A is held in A [(p [c]) ... (p [c+1]-1)].  The first
+		entry, p [0], must be zero, and p [c] <= p [c+1] must hold
+		for all c in the range 0 to n-1.  The value p [n] is
+		thus the total number of entries in the pattern of the matrix A.
+		Symamd returns FALSE if these conditions are not met.
+
+		The contents of p are not modified.
+
+	    int perm [n+1] ;   	Output argument.
+
+		On output, if symamd returns TRUE, the array perm holds the
+		permutation P, where perm [0] is the first index in the new
+		ordering, and perm [n-1] is the last.  That is, perm [k] = j
+		means that row and column j of A is the kth column in PAP',
+		where k is in the range 0 to n-1 (perm [0] = j means
+		that row and column j of A are the first row and column in
+		PAP').  The array is used as a workspace during the ordering,
+		which is why it must be of length n+1, not just n.
+
+	    double knobs [COLAMD_KNOBS] ;	Input argument.
+
+		See colamd_set_defaults for a description.
+
+	    int stats [COLAMD_STATS] ;		Output argument.
+
+		Statistics on the ordering, and error status.
+		See colamd.h for related definitions.
+		Symamd returns FALSE if stats is not present.
+
+		stats [0]:  number of dense or empty row and columns ignored
+				(and ordered last in the output permutation 
+				perm).  Note that a row/column can become
+				"empty" if it contains only "dense" and/or
+				"empty" columns/rows.
+
+		stats [1]:  (same as stats [0])
+
+		stats [2]:  number of garbage collections performed.
+
+		stats [3]:  status code.  < 0 is an error code.
+			    > 1 is a warning or notice.
+
+			0	OK.  Each column of the input matrix contained
+				row indices in increasing order, with no
+				duplicates.
+
+			1	OK, but columns of input matrix were jumbled
+				(unsorted columns or duplicate entries).  Symamd
+				had to do some extra work to sort the matrix
+				first and remove duplicate entries, but it
+				still was able to return a valid permutation
+				(return value of symamd was TRUE).
+
+					stats [4]: highest numbered column that
+						is unsorted or has duplicate
+						entries.
+					stats [5]: last seen duplicate or
+						unsorted row index.
+					stats [6]: number of duplicate or
+						unsorted row indices.
+
+			-1	A is a null pointer
+
+			-2	p is a null pointer
+
+			-3	(unused, see colamd.c)
+
+			-4 	n is negative
+
+					stats [4]: n
+
+			-5	number of nonzeros in matrix is negative
+
+					stats [4]: # of nonzeros (p [n]).
+
+			-6	p [0] is nonzero
+
+					stats [4]: p [0]
+
+			-7	(unused)
+
+			-8	a column has a negative number of entries
+
+					stats [4]: column with < 0 entries
+					stats [5]: number of entries in col
+
+			-9	a row index is out of bounds
+
+					stats [4]: column with bad row index
+					stats [5]: bad row index
+					stats [6]: n_row, # of rows of matrx
+
+			-10	out of memory (unable to allocate temporary
+				workspace for M or count arrays using the
+				"allocate" routine passed into symamd).
+
+			-999	internal error.  colamd failed to order the
+				matrix M, when it should have succeeded.  This
+				indicates a bug.  If this (and *only* this)
+				error code occurs, please contact the authors.
+				Don't contact the authors if you get any other
+				error code.
+
+		Future versions may return more statistics in the stats array.
+
+	    void * (*allocate) (size_t, size_t)
+
+	    	A pointer to a function providing memory allocation.  The
+		allocated memory must be returned initialized to zero.  For a
+		C application, this argument should normally be a pointer to
+		calloc.  For a MATLAB mexFunction, the routine mxCalloc is
+		passed instead.
+
+	    void (*release) (size_t, size_t)
+
+	    	A pointer to a function that frees memory allocated by the
+		memory allocation routine above.  For a C application, this
+		argument should normally be a pointer to free.  For a MATLAB
+		mexFunction, the routine mxFree is passed instead.
+
+
+    ----------------------------------------------------------------------------
+    colamd_report:
+    ----------------------------------------------------------------------------
+
+	C syntax:
+
+	    #include "colamd.h"
+	    colamd_report (int stats [COLAMD_STATS]) ;
+
+	Purpose:
+
+	    Prints the error status and statistics recorded in the stats
+	    array on the standard error output (for a standard C routine)
+	    or on the MATLAB output (for a mexFunction).
+
+	Arguments:
+
+	    int stats [COLAMD_STATS] ;	Input only.  Statistics from colamd.
+
+
+    ----------------------------------------------------------------------------
+    symamd_report:
+    ----------------------------------------------------------------------------
+
+	C syntax:
+
+	    #include "colamd.h"
+	    symamd_report (int stats [COLAMD_STATS]) ;
+
+	Purpose:
+
+	    Prints the error status and statistics recorded in the stats
+	    array on the standard error output (for a standard C routine)
+	    or on the MATLAB output (for a mexFunction).
+
+	Arguments:
+
+	    int stats [COLAMD_STATS] ;	Input only.  Statistics from symamd.
+
+ 
+*/ + /* ========================================================================== */ /* === Scaffolding code definitions ======================================== */ /* ========================================================================== */ @@ -254,11 +586,8 @@ /* Ensure that debugging is turned off: */ #ifndef NDEBUG #define NDEBUG -#endif +#endif /* NDEBUG */ -/* assert.h: the assert macro (no debugging if NDEBUG is defined) */ -#include - /* Our "scaffolding code" philosophy: In our opinion, well-written library code should keep its "debugging" code, and just normally have it turned off @@ -276,77 +605,62 @@ (3) (gasp!) for actually finding bugs. This code has been heavily tested and "should" be fully functional and bug-free ... but you never know... - To enable debugging, comment out the "#define NDEBUG" above. The code will - become outrageously slow when debugging is enabled. To control the level of - debugging output, set an environment variable D to 0 (little), 1 (some), - 2, 3, or 4 (lots). + To enable debugging, comment out the "#define NDEBUG" above. For a MATLAB + mexFunction, you will also need to modify mexopts.sh to remove the -DNDEBUG + definition. The code will become outrageously slow when debugging is + enabled. To control the level of debugging output, set an environment + variable D to 0 (little), 1 (some), 2, 3, or 4 (lots). When debugging, + you should see the following message on the standard output: + + colamd: debug version, D = 1 (THIS WILL BE SLOW!) + + or a similar message for symamd. If you don't, then debugging has not + been enabled. + */ /* ========================================================================== */ -/* === Row and Column structures ============================================ */ +/* === Include files ======================================================== */ /* ========================================================================== */ -typedef struct ColInfo_struct -{ - int start ; /* index for A of first row in this column, or DEAD */ - /* if column is dead */ - int length ; /* number of rows in this column */ - union - { - int thickness ; /* number of original columns represented by this */ - /* col, if the column is alive */ - int parent ; /* parent in parent tree super-column structure, if */ - /* the column is dead */ - } shared1 ; - union - { - int score ; /* the score used to maintain heap, if col is alive */ - int order ; /* pivot ordering of this column, if col is dead */ - } shared2 ; - union - { - int headhash ; /* head of a hash bucket, if col is at the head of */ - /* a degree list */ - int hash ; /* hash value, if col is not in a degree list */ - int prev ; /* previous column in degree list, if col is in a */ - /* degree list (but not at the head of a degree list) */ - } shared3 ; - union - { - int degree_next ; /* next column, if col is in a degree list */ - int hash_next ; /* next column, if col is in a hash list */ - } shared4 ; +#include "colamd.h" +#include -} ColInfo ; +#ifdef MATLAB_MEX_FILE +#include "mex.h" +#include "matrix.h" +#else +#include +#include +#endif /* MATLAB_MEX_FILE */ -typedef struct RowInfo_struct -{ - int start ; /* index for A of first col in this row */ - int length ; /* number of principal columns in this row */ - union - { - int degree ; /* number of principal & non-principal columns in row */ - int p ; /* used as a row pointer in init_rows_cols () */ - } shared1 ; - union - { - int mark ; /* for computing set differences and marking dead rows*/ - int first_column ;/* first column in row (used in garbage collection) */ - } shared2 ; - -} RowInfo ; - /* ========================================================================== */ /* === Definitions ========================================================== */ /* ========================================================================== */ +/* Routines are either PUBLIC (user-callable) or PRIVATE (not user-callable) */ +#define PUBLIC +#define PRIVATE static + #define MAX(a,b) (((a) > (b)) ? (a) : (b)) #define MIN(a,b) (((a) < (b)) ? (a) : (b)) #define ONES_COMPLEMENT(r) (-(r)-1) -#define TRUE (1) -#define FALSE (0) +/* -------------------------------------------------------------------------- */ +/* Change for version 2.1: define TRUE and FALSE only if not yet defined */ +/* -------------------------------------------------------------------------- */ + +#ifndef TRUE +#define TRUE (1) +#endif + +#ifndef FALSE +#define FALSE (0) +#endif + +/* -------------------------------------------------------------------------- */ + #define EMPTY (-1) /* Row and column status */ @@ -368,10 +682,30 @@ #define KILL_PRINCIPAL_COL(c) { Col [c].start = DEAD_PRINCIPAL ; } #define KILL_NON_PRINCIPAL_COL(c) { Col [c].start = DEAD_NON_PRINCIPAL ; } -/* Routines are either PUBLIC (user-callable) or PRIVATE (not user-callable) */ -#define PUBLIC -#define PRIVATE static +/* ========================================================================== */ +/* === Colamd reporting mechanism =========================================== */ +/* ========================================================================== */ +#ifdef MATLAB_MEX_FILE + +/* use mexPrintf in a MATLAB mexFunction, for debugging and statistics output */ +#define PRINTF mexPrintf + +/* In MATLAB, matrices are 1-based to the user, but 0-based internally */ +#define INDEX(i) ((i)+1) + +#else + +/* Use printf in standard C environment, for debugging and statistics output. */ +/* Output is generated only if debugging is enabled at compile time, or if */ +/* the caller explicitly calls colamd_report or symamd_report. */ +#define PRINTF printf + +/* In C, matrices are 0-based and indices are reported as such in *_report */ +#define INDEX(i) (i) + +#endif /* MATLAB_MEX_FILE */ + /* ========================================================================== */ /* === Prototypes of PRIVATE routines ======================================= */ /* ========================================================================== */ @@ -380,18 +714,19 @@ ( int n_row, int n_col, - RowInfo Row [], - ColInfo Col [], + Colamd_Row Row [], + Colamd_Col Col [], int A [], - int p [] + int p [], + int stats [COLAMD_STATS] ) ; PRIVATE void init_scoring ( int n_row, int n_col, - RowInfo Row [], - ColInfo Col [], + Colamd_Row Row [], + Colamd_Col Col [], int A [], int head [], double knobs [COLAMD_KNOBS], @@ -405,8 +740,8 @@ int n_row, int n_col, int Alen, - RowInfo Row [], - ColInfo Col [], + Colamd_Row Row [], + Colamd_Col Col [], int A [], int head [], int n_col2, @@ -417,17 +752,19 @@ PRIVATE void order_children ( int n_col, - ColInfo Col [], + Colamd_Col Col [], int p [] ) ; PRIVATE void detect_super_cols ( + #ifndef NDEBUG int n_col, - RowInfo Row [], -#endif - ColInfo Col [], + Colamd_Row Row [], +#endif /* NDEBUG */ + + Colamd_Col Col [], int A [], int head [], int row_start, @@ -438,8 +775,8 @@ ( int n_row, int n_col, - RowInfo Row [], - ColInfo Col [], + Colamd_Row Row [], + Colamd_Col Col [], int A [], int *pfree ) ; @@ -447,29 +784,49 @@ PRIVATE int clear_mark ( int n_row, - RowInfo Row [] + Colamd_Row Row [] ) ; +PRIVATE void print_report +( + char *method, + int stats [COLAMD_STATS] +) ; + /* ========================================================================== */ -/* === Debugging definitions ================================================ */ +/* === Debugging prototypes and definitions ================================= */ /* ========================================================================== */ #ifndef NDEBUG -/* === With debugging ======================================================= */ +/* colamd_debug is the *ONLY* global variable, and is only */ +/* present when debugging */ -/* stdlib.h: for getenv and atoi, to get debugging level from environment */ -#include +PRIVATE int colamd_debug ; /* debug print level */ -/* stdio.h: for printf (no printing if debugging is turned off) */ -#include +#define DEBUG0(params) { (void) PRINTF params ; } +#define DEBUG1(params) { if (colamd_debug >= 1) (void) PRINTF params ; } +#define DEBUG2(params) { if (colamd_debug >= 2) (void) PRINTF params ; } +#define DEBUG3(params) { if (colamd_debug >= 3) (void) PRINTF params ; } +#define DEBUG4(params) { if (colamd_debug >= 4) (void) PRINTF params ; } +#ifdef MATLAB_MEX_FILE +#define ASSERT(expression) (mxAssert ((expression), "")) +#else +#define ASSERT(expression) (assert (expression)) +#endif /* MATLAB_MEX_FILE */ + +PRIVATE void colamd_get_debug /* gets the debug print level from getenv */ +( + char *method +) ; + PRIVATE void debug_deg_lists ( int n_row, int n_col, - RowInfo Row [], - ColInfo Col [], + Colamd_Row Row [], + Colamd_Col Col [], int head [], int min_score, int should, @@ -479,7 +836,7 @@ PRIVATE void debug_mark ( int n_row, - RowInfo Row [], + Colamd_Row Row [], int tag_mark, int max_mark ) ; @@ -488,8 +845,8 @@ ( int n_row, int n_col, - RowInfo Row [], - ColInfo Col [], + Colamd_Row Row [], + Colamd_Col Col [], int A [] ) ; @@ -497,25 +854,14 @@ ( int n_row, int n_col, - RowInfo Row [], - ColInfo Col [], + Colamd_Row Row [], + Colamd_Col Col [], int A [], int n_col2 ) ; -/* the following is the *ONLY* global variable in this file, and is only */ -/* present when debugging */ +#else /* NDEBUG */ -PRIVATE int debug_colamd ; /* debug print level */ - -#define DEBUG0(params) { (void) printf params ; } -#define DEBUG1(params) { if (debug_colamd >= 1) (void) printf params ; } -#define DEBUG2(params) { if (debug_colamd >= 2) (void) printf params ; } -#define DEBUG3(params) { if (debug_colamd >= 3) (void) printf params ; } -#define DEBUG4(params) { if (debug_colamd >= 4) (void) printf params ; } - -#else - /* === No debugging ========================================================= */ #define DEBUG0(params) ; @@ -524,11 +870,14 @@ #define DEBUG3(params) ; #define DEBUG4(params) ; -#endif +#define ASSERT(expression) ((void) 0) +#endif /* NDEBUG */ + /* ========================================================================== */ + /* ========================================================================== */ /* === USER-CALLABLE ROUTINES: ============================================== */ /* ========================================================================== */ @@ -541,7 +890,10 @@ /* The colamd_recommended routine returns the suggested size for Alen. This value has been determined to provide good balance between the number of - garbage collections and the memory requirements for colamd. + garbage collections and the memory requirements for colamd. If any + argument is negative, a -1 is returned as an error condition. This + function is also available as a macro defined in colamd.h, so that you + can use it for a statically-allocated array size. */ PUBLIC int colamd_recommended /* returns recommended value of Alen. */ @@ -553,31 +905,7 @@ int n_col /* number of columns in A */ ) { - /* === Local variables ================================================== */ - - int minimum ; /* bare minimum requirements */ - int recommended ; /* recommended value of Alen */ - - if (nnz < 0 || n_row < 0 || n_col < 0) - { - /* return -1 if any input argument is corrupted */ - DEBUG0 (("colamd_recommended error!")) ; - DEBUG0 ((" nnz: %d, n_row: %d, n_col: %d\n", nnz, n_row, n_col)) ; - return (-1) ; - } - - minimum = - 2 * (nnz) /* for A */ - + (((n_col) + 1) * sizeof (ColInfo) / sizeof (int)) /* for Col */ - + (((n_row) + 1) * sizeof (RowInfo) / sizeof (int)) /* for Row */ - + n_col /* minimum elbow room to guarrantee success */ - + COLAMD_STATS ; /* for output statistics */ - - /* recommended is equal to the minumum plus enough memory to keep the */ - /* number garbage collections low */ - recommended = minimum + nnz/5 ; - - return (recommended) ; + return (COLAMD_RECOMMENDED (nnz, n_row, n_col)) ; } @@ -590,11 +918,14 @@ controllable parameters for colamd: knobs [0] rows with knobs[0]*n_col entries or more are removed - prior to ordering. + prior to ordering in colamd. Rows and columns with + knobs[0]*n_col entries or more are removed prior to + ordering in symamd and placed last in the output + ordering. knobs [1] columns with knobs[1]*n_row entries or more are removed - prior to ordering, and placed last in the column - permutation. + prior to ordering in colamd, and placed last in the + column permutation. Symamd ignores this knob. knobs [2..19] unused, but future versions might use this */ @@ -624,88 +955,355 @@ /* ========================================================================== */ -/* === colamd =============================================================== */ +/* === symamd =============================================================== */ /* ========================================================================== */ -/* - The colamd routine computes a column ordering Q of a sparse matrix - A such that the LU factorization P(AQ) = LU remains sparse, where P is - selected via partial pivoting. The routine can also be viewed as - providing a permutation Q such that the Cholesky factorization - (AQ)'(AQ) = LL' remains sparse. +PUBLIC int symamd /* return TRUE if OK, FALSE otherwise */ +( + /* === Parameters ======================================================= */ - On input, the nonzero patterns of the columns of A are stored in the - array A, in order 0 to n_col-1. A is held in 0-based form (rows in the - range 0 to n_row-1 and columns in the range 0 to n_col-1). Row indices - for column c are located in A [(p [c]) ... (p [c+1]-1)], where p [0] = 0, - and thus p [n_col] is the number of entries in A. The matrix is - destroyed on output. The row indices within each column do not have to - be sorted (from small to large row indices), and duplicate row indices - may be present. However, colamd will work a little faster if columns are - sorted and no duplicates are present. Matlab 5.2 always passes the matrix - with sorted columns, and no duplicates. + int n, /* number of rows and columns of A */ + int A [], /* row indices of A */ + int p [], /* column pointers of A */ + int perm [], /* output permutation, size n+1 */ + double knobs [COLAMD_KNOBS], /* parameters (uses defaults if NULL) */ + int stats [COLAMD_STATS], /* output statistics and error codes */ + void * (*allocate) (size_t, size_t), + /* pointer to calloc (ANSI C) or */ + /* mxCalloc (for MATLAB mexFunction) */ + void (*release) (void *) + /* pointer to free (ANSI C) or */ + /* mxFree (for MATLAB mexFunction) */ +) +{ + /* === Local variables ================================================== */ - The integer array A is of size Alen. Alen must be at least of size - (where nnz is the number of entries in A): + int *count ; /* length of each column of M, and col pointer*/ + int *mark ; /* mark array for finding duplicate entries */ + int *M ; /* row indices of matrix M */ + int Mlen ; /* length of M */ + int n_row ; /* number of rows in M */ + int nnz ; /* number of entries in A */ + int i ; /* row index of A */ + int j ; /* column index of A */ + int k ; /* row index of M */ + int mnz ; /* number of nonzeros in M */ + int pp ; /* index into a column of A */ + int last_row ; /* last row seen in the current column */ + int length ; /* number of nonzeros in a column */ - nnz for the input column form of A - + nnz for a row form of A that colamd generates - + 6*(n_col+1) for a ColInfo Col [0..n_col] array - (this assumes sizeof (ColInfo) is 6 int's). - + 4*(n_row+1) for a RowInfo Row [0..n_row] array - (this assumes sizeof (RowInfo) is 4 int's). - + elbow_room must be at least n_col. We recommend at least - nnz/5 in addition to that. If sufficient, - changes in the elbow room affect the ordering - time only, not the ordering itself. - + COLAMD_STATS for the output statistics + double cknobs [COLAMD_KNOBS] ; /* knobs for colamd */ + double default_knobs [COLAMD_KNOBS] ; /* default knobs for colamd */ + int cstats [COLAMD_STATS] ; /* colamd stats */ - Colamd returns FALSE is memory is insufficient, or TRUE otherwise. +#ifndef NDEBUG + colamd_get_debug ("symamd") ; +#endif /* NDEBUG */ - On input, the caller must specify: + /* === Check the input arguments ======================================== */ - n_row the number of rows of A - n_col the number of columns of A - Alen the size of the array A - A [0 ... nnz-1] the row indices, where nnz = p [n_col] - A [nnz ... Alen-1] (need not be initialized by the user) - p [0 ... n_col] the column pointers, p [0] = 0, and p [n_col] - is the number of entries in A. Column c of A - is stored in A [p [c] ... p [c+1]-1]. - knobs [0 ... 19] a set of parameters that control the behavior - of colamd. If knobs is a NULL pointer the - defaults are used. The user-callable - colamd_set_defaults routine sets the default - parameters. See that routine for a description - of the user-controllable parameters. + if (!stats) + { + DEBUG0 (("symamd: stats not present\n")) ; + return (FALSE) ; + } + for (i = 0 ; i < COLAMD_STATS ; i++) + { + stats [i] = 0 ; + } + stats [COLAMD_STATUS] = COLAMD_OK ; + stats [COLAMD_INFO1] = -1 ; + stats [COLAMD_INFO2] = -1 ; - If the return value of Colamd is TRUE, then on output: + if (!A) + { + stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ; + DEBUG0 (("symamd: A not present\n")) ; + return (FALSE) ; + } - p [0 ... n_col-1] the column permutation. p [0] is the first - column index, and p [n_col-1] is the last. - That is, p [k] = j means that column j of A - is the kth column of AQ. + if (!p) /* p is not present */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ; + DEBUG0 (("symamd: p not present\n")) ; + return (FALSE) ; + } - A is undefined on output (the matrix pattern is - destroyed), except for the following statistics: + if (n < 0) /* n must be >= 0 */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ; + stats [COLAMD_INFO1] = n ; + DEBUG0 (("symamd: n negative %d\n", n)) ; + return (FALSE) ; + } - A [0] the number of dense (or empty) rows ignored - A [1] the number of dense (or empty) columms. These - are ordered last, in their natural order. - A [2] the number of garbage collections performed. - If this is excessive, then you would have - gotten your results faster if Alen was larger. - A [3] 0, if all row indices in each column were in - sorted order and no duplicates were present. - 1, if there were unsorted or duplicate row - indices in the input. You would have gotten - your results faster if A [3] was returned as 0. + nnz = p [n] ; + if (nnz < 0) /* nnz must be >= 0 */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ; + stats [COLAMD_INFO1] = nnz ; + DEBUG0 (("symamd: number of entries negative %d\n", nnz)) ; + return (FALSE) ; + } - If the return value of Colamd is FALSE, then A and p are undefined on - output. + if (p [0] != 0) + { + stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ; + stats [COLAMD_INFO1] = p [0] ; + DEBUG0 (("symamd: p[0] not zero %d\n", p [0])) ; + return (FALSE) ; + } + + /* === If no knobs, set default knobs =================================== */ + + if (!knobs) + { + colamd_set_defaults (default_knobs) ; + knobs = default_knobs ; + } + + /* === Allocate count and mark ========================================== */ + + count = (int *) ((*allocate) (n+1, sizeof (int))) ; + if (!count) + { + stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ; + DEBUG0 (("symamd: allocate count (size %d) failed\n", n+1)) ; + return (FALSE) ; + } + + mark = (int *) ((*allocate) (n+1, sizeof (int))) ; + if (!mark) + { + stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ; + (*release) ((void *) count) ; + DEBUG0 (("symamd: allocate mark (size %d) failed\n", n+1)) ; + return (FALSE) ; + } + + /* === Compute column counts of M, check if A is valid ================== */ + + stats [COLAMD_INFO3] = 0 ; /* number of duplicate or unsorted row indices*/ + + for (i = 0 ; i < n ; i++) + { + mark [i] = -1 ; + } + + for (j = 0 ; j < n ; j++) + { + last_row = -1 ; + + length = p [j+1] - p [j] ; + if (length < 0) + { + /* column pointers must be non-decreasing */ + stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ; + stats [COLAMD_INFO1] = j ; + stats [COLAMD_INFO2] = length ; + (*release) ((void *) count) ; + (*release) ((void *) mark) ; + DEBUG0 (("symamd: col %d negative length %d\n", j, length)) ; + return (FALSE) ; + } + + for (pp = p [j] ; pp < p [j+1] ; pp++) + { + i = A [pp] ; + if (i < 0 || i >= n) + { + /* row index i, in column j, is out of bounds */ + stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ; + stats [COLAMD_INFO1] = j ; + stats [COLAMD_INFO2] = i ; + stats [COLAMD_INFO3] = n ; + (*release) ((void *) count) ; + (*release) ((void *) mark) ; + DEBUG0 (("symamd: row %d col %d out of bounds\n", i, j)) ; + return (FALSE) ; + } + + if (i <= last_row || mark [i] == j) + { + /* row index is unsorted or repeated (or both), thus col */ + /* is jumbled. This is a notice, not an error condition. */ + stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ; + stats [COLAMD_INFO1] = j ; + stats [COLAMD_INFO2] = i ; + (stats [COLAMD_INFO3]) ++ ; + DEBUG1 (("symamd: row %d col %d unsorted/duplicate\n", i, j)) ; + } + + if (i > j && mark [i] != j) + { + /* row k of M will contain column indices i and j */ + count [i]++ ; + count [j]++ ; + } + + /* mark the row as having been seen in this column */ + mark [i] = j ; + + last_row = i ; + } + } + + if (stats [COLAMD_STATUS] == COLAMD_OK) + { + /* if there are no duplicate entries, then mark is no longer needed */ + (*release) ((void *) mark) ; + } + + /* === Compute column pointers of M ===================================== */ + + /* use output permutation, perm, for column pointers of M */ + perm [0] = 0 ; + for (j = 1 ; j <= n ; j++) + { + perm [j] = perm [j-1] + count [j-1] ; + } + for (j = 0 ; j < n ; j++) + { + count [j] = perm [j] ; + } + + /* === Construct M ====================================================== */ + + mnz = perm [n] ; + n_row = mnz / 2 ; + Mlen = colamd_recommended (mnz, n_row, n) ; + M = (int *) ((*allocate) (Mlen, sizeof (int))) ; + DEBUG0 (("symamd: M is %d-by-%d with %d entries, Mlen = %d\n", + n_row, n, mnz, Mlen)) ; + + if (!M) + { + stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ; + (*release) ((void *) count) ; + (*release) ((void *) mark) ; + DEBUG0 (("symamd: allocate M (size %d) failed\n", Mlen)) ; + return (FALSE) ; + } + + k = 0 ; + + if (stats [COLAMD_STATUS] == COLAMD_OK) + { + /* Matrix is OK */ + for (j = 0 ; j < n ; j++) + { + ASSERT (p [j+1] - p [j] >= 0) ; + for (pp = p [j] ; pp < p [j+1] ; pp++) + { + i = A [pp] ; + ASSERT (i >= 0 && i < n) ; + if (i > j) + { + /* row k of M contains column indices i and j */ + M [count [i]++] = k ; + M [count [j]++] = k ; + k++ ; + } + } + } + } + else + { + /* Matrix is jumbled. Do not add duplicates to M. Unsorted cols OK. */ + DEBUG0 (("symamd: Duplicates in A.\n")) ; + for (i = 0 ; i < n ; i++) + { + mark [i] = -1 ; + } + for (j = 0 ; j < n ; j++) + { + ASSERT (p [j+1] - p [j] >= 0) ; + for (pp = p [j] ; pp < p [j+1] ; pp++) + { + i = A [pp] ; + ASSERT (i >= 0 && i < n) ; + if (i > j && mark [i] != j) + { + /* row k of M contains column indices i and j */ + M [count [i]++] = k ; + M [count [j]++] = k ; + k++ ; + mark [i] = j ; + } + } + } + (*release) ((void *) mark) ; + } + + /* count and mark no longer needed */ + (*release) ((void *) count) ; + ASSERT (k == n_row) ; + + /* === Adjust the knobs for M =========================================== */ + + for (i = 0 ; i < COLAMD_KNOBS ; i++) + { + cknobs [i] = knobs [i] ; + } + + /* there are no dense rows in M */ + cknobs [COLAMD_DENSE_ROW] = 1.0 ; + + if (n_row != 0 && n < n_row) + { + /* On input, the knob is a fraction of 1..n, the number of rows of A. */ + /* Convert it to a fraction of 1..n_row, of the number of rows of M. */ + cknobs [COLAMD_DENSE_COL] = (knobs [COLAMD_DENSE_ROW] * n) / n_row ; + } + else + { + /* no dense columns in M */ + cknobs [COLAMD_DENSE_COL] = 1.0 ; + } + + DEBUG0 (("symamd: dense col knob for M: %g\n", cknobs [COLAMD_DENSE_COL])) ; + + /* === Order the columns of M =========================================== */ + + if (!colamd (n_row, n, Mlen, M, perm, cknobs, cstats)) + { + /* This "cannot" happen, unless there is a bug in the code. */ + stats [COLAMD_STATUS] = COLAMD_ERROR_internal_error ; + (*release) ((void *) M) ; + DEBUG0 (("symamd: internal error!\n")) ; + return (FALSE) ; + } + + /* Note that the output permutation is now in perm */ + + /* === get the statistics for symamd from colamd ======================== */ + + /* note that a dense column in colamd means a dense row and col in symamd */ + stats [COLAMD_DENSE_ROW] = cstats [COLAMD_DENSE_COL] ; + stats [COLAMD_DENSE_COL] = cstats [COLAMD_DENSE_COL] ; + stats [COLAMD_DEFRAG_COUNT] = cstats [COLAMD_DEFRAG_COUNT] ; + + /* === Free M =========================================================== */ + + (*release) ((void *) M) ; + DEBUG0 (("symamd: done.\n")) ; + return (TRUE) ; + +} + +/* ========================================================================== */ +/* === colamd =============================================================== */ +/* ========================================================================== */ + +/* + The colamd routine computes a column ordering Q of a sparse matrix + A such that the LU factorization P(AQ) = LU remains sparse, where P is + selected via partial pivoting. The routine can also be viewed as + providing a permutation Q such that the Cholesky factorization + (AQ)'(AQ) = LL' remains sparse. */ -PUBLIC int colamd /* returns TRUE if successful */ +PUBLIC int colamd /* returns TRUE if successful, FALSE otherwise*/ ( /* === Parameters ======================================================= */ @@ -714,7 +1312,8 @@ int Alen, /* length of A */ int A [], /* row indices of A */ int p [], /* pointers to columns in A */ - double knobs [COLAMD_KNOBS] /* parameters (uses defaults if NULL) */ + double knobs [COLAMD_KNOBS],/* parameters (uses defaults if NULL) */ + int stats [COLAMD_STATS] /* output statistics and error codes */ ) { /* === Local variables ================================================== */ @@ -723,69 +1322,115 @@ int nnz ; /* nonzeros in A */ int Row_size ; /* size of Row [], in integers */ int Col_size ; /* size of Col [], in integers */ - int elbow_room ; /* remaining free space */ - RowInfo *Row ; /* pointer into A of Row [0..n_row] array */ - ColInfo *Col ; /* pointer into A of Col [0..n_col] array */ + int need ; /* minimum required length of A */ + Colamd_Row *Row ; /* pointer into A of Row [0..n_row] array */ + Colamd_Col *Col ; /* pointer into A of Col [0..n_col] array */ int n_col2 ; /* number of non-dense, non-empty columns */ int n_row2 ; /* number of non-dense, non-empty rows */ int ngarbage ; /* number of garbage collections performed */ int max_deg ; /* maximum row degree */ - double default_knobs [COLAMD_KNOBS] ; /* default knobs knobs array */ - int init_result ; /* return code from initialization */ + double default_knobs [COLAMD_KNOBS] ; /* default knobs array */ #ifndef NDEBUG - debug_colamd = 0 ; /* no debug printing */ - /* get "D" environment variable, which gives the debug printing level */ - if (getenv ("D")) debug_colamd = atoi (getenv ("D")) ; - DEBUG0 (("debug version, D = %d (THIS WILL BE SLOOOOW!)\n", debug_colamd)) ; -#endif + colamd_get_debug ("colamd") ; +#endif /* NDEBUG */ /* === Check the input arguments ======================================== */ - if (n_row < 0 || n_col < 0 || !A || !p) + if (!stats) { - /* n_row and n_col must be non-negative, A and p must be present */ - DEBUG0 (("colamd error! %d %d %d\n", n_row, n_col, Alen)) ; + DEBUG0 (("colamd: stats not present\n")) ; return (FALSE) ; } + for (i = 0 ; i < COLAMD_STATS ; i++) + { + stats [i] = 0 ; + } + stats [COLAMD_STATUS] = COLAMD_OK ; + stats [COLAMD_INFO1] = -1 ; + stats [COLAMD_INFO2] = -1 ; + + if (!A) /* A is not present */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ; + DEBUG0 (("colamd: A not present\n")) ; + return (FALSE) ; + } + + if (!p) /* p is not present */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ; + DEBUG0 (("colamd: p not present\n")) ; + return (FALSE) ; + } + + if (n_row < 0) /* n_row must be >= 0 */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_nrow_negative ; + stats [COLAMD_INFO1] = n_row ; + DEBUG0 (("colamd: nrow negative %d\n", n_row)) ; + return (FALSE) ; + } + + if (n_col < 0) /* n_col must be >= 0 */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ; + stats [COLAMD_INFO1] = n_col ; + DEBUG0 (("colamd: ncol negative %d\n", n_col)) ; + return (FALSE) ; + } + nnz = p [n_col] ; - if (nnz < 0 || p [0] != 0) + if (nnz < 0) /* nnz must be >= 0 */ { - /* nnz must be non-negative, and p [0] must be zero */ - DEBUG0 (("colamd error! %d %d\n", nnz, p [0])) ; + stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ; + stats [COLAMD_INFO1] = nnz ; + DEBUG0 (("colamd: number of entries negative %d\n", nnz)) ; return (FALSE) ; } - /* === If no knobs, set default parameters ============================== */ + if (p [0] != 0) + { + stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ; + stats [COLAMD_INFO1] = p [0] ; + DEBUG0 (("colamd: p[0] not zero %d\n", p [0])) ; + return (FALSE) ; + } + /* === If no knobs, set default knobs =================================== */ + if (!knobs) { + colamd_set_defaults (default_knobs) ; knobs = default_knobs ; - colamd_set_defaults (knobs) ; } /* === Allocate the Row and Col arrays from array A ===================== */ - Col_size = (n_col + 1) * sizeof (ColInfo) / sizeof (int) ; - Row_size = (n_row + 1) * sizeof (RowInfo) / sizeof (int) ; - elbow_room = Alen - (2*nnz + Col_size + Row_size) ; - if (elbow_room < n_col + COLAMD_STATS) + Col_size = COLAMD_C (n_col) ; + Row_size = COLAMD_R (n_row) ; + need = 2*nnz + n_col + Col_size + Row_size ; + + if (need > Alen) { /* not enough space in array A to perform the ordering */ - DEBUG0 (("colamd error! elbow_room %d, %d\n", elbow_room,n_col)) ; + stats [COLAMD_STATUS] = COLAMD_ERROR_A_too_small ; + stats [COLAMD_INFO1] = need ; + stats [COLAMD_INFO2] = Alen ; + DEBUG0 (("colamd: Need Alen >= %d, given only Alen = %d\n", need,Alen)); return (FALSE) ; } - Alen = 2*nnz + elbow_room ; - Col = (ColInfo *) &A [Alen] ; - Row = (RowInfo *) &A [Alen + Col_size] ; + Alen -= Col_size + Row_size ; + Col = (Colamd_Col *) &A [Alen] ; + Row = (Colamd_Row *) &A [Alen + Col_size] ; + /* === Construct the row and column data structures ===================== */ - init_result = init_rows_cols (n_row, n_col, Row, Col, A, p) ; - if (init_result == -1) + if (!init_rows_cols (n_row, n_col, Row, Col, A, p, stats)) { /* input matrix is invalid */ - DEBUG0 (("colamd error! matrix invalid\n")) ; + DEBUG0 (("colamd: Matrix invalid\n")) ; return (FALSE) ; } @@ -803,22 +1448,44 @@ order_children (n_col, Col, p) ; - /* === Return statistics in A =========================================== */ + /* === Return statistics in stats ======================================= */ - for (i = 0 ; i < COLAMD_STATS ; i++) - { - A [i] = 0 ; - } - A [COLAMD_DENSE_ROW] = n_row - n_row2 ; - A [COLAMD_DENSE_COL] = n_col - n_col2 ; - A [COLAMD_DEFRAG_COUNT] = ngarbage ; - A [COLAMD_JUMBLED_COLS] = init_result ; - + stats [COLAMD_DENSE_ROW] = n_row - n_row2 ; + stats [COLAMD_DENSE_COL] = n_col - n_col2 ; + stats [COLAMD_DEFRAG_COUNT] = ngarbage ; + DEBUG0 (("colamd: done.\n")) ; return (TRUE) ; } /* ========================================================================== */ +/* === colamd_report ======================================================== */ +/* ========================================================================== */ + +PUBLIC void colamd_report +( + int stats [COLAMD_STATS] +) +{ + print_report ("colamd", stats) ; +} + + +/* ========================================================================== */ +/* === symamd_report ======================================================== */ +/* ========================================================================== */ + +PUBLIC void symamd_report +( + int stats [COLAMD_STATS] +) +{ + print_report ("symamd", stats) ; +} + + + +/* ========================================================================== */ /* === NON-USER-CALLABLE ROUTINES: ========================================== */ /* ========================================================================== */ @@ -834,20 +1501,21 @@ matrix. Also, row and column attributes are stored in the Col and Row structs. If the columns are un-sorted or contain duplicate row indices, this routine will also sort and remove duplicate row indices from the - column form of the matrix. Returns -1 on error, 1 if columns jumbled, - or 0 if columns not jumbled. Not user-callable. + column form of the matrix. Returns FALSE if the matrix is invalid, + TRUE otherwise. Not user-callable. */ -PRIVATE int init_rows_cols /* returns status code */ +PRIVATE int init_rows_cols /* returns TRUE if OK, or FALSE otherwise */ ( /* === Parameters ======================================================= */ int n_row, /* number of rows of A */ int n_col, /* number of columns of A */ - RowInfo Row [], /* of size n_row+1 */ - ColInfo Col [], /* of size n_col+1 */ + Colamd_Row Row [], /* of size n_row+1 */ + Colamd_Col Col [], /* of size n_col+1 */ int A [], /* row indices of A, of size Alen */ - int p [] /* pointers to columns in A, of size n_col+1 */ + int p [], /* pointers to columns in A, of size n_col+1 */ + int stats [COLAMD_STATS] /* colamd statistics */ ) { /* === Local variables ================================================== */ @@ -858,44 +1526,36 @@ int *cp_end ; /* a pointer to the end of a column */ int *rp ; /* a row pointer */ int *rp_end ; /* a pointer to the end of a row */ - int last_start ; /* start index of previous column in A */ - int start ; /* start index of column in A */ int last_row ; /* previous row */ - int jumbled_columns ; /* indicates if columns are jumbled */ /* === Initialize columns, and check column pointers ==================== */ - last_start = 0 ; for (col = 0 ; col < n_col ; col++) { - start = p [col] ; - if (start < last_start) + Col [col].start = p [col] ; + Col [col].length = p [col+1] - p [col] ; + + if (Col [col].length < 0) { /* column pointers must be non-decreasing */ - DEBUG0 (("colamd error! last p %d p [col] %d\n",last_start,start)); - return (-1) ; + stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ; + stats [COLAMD_INFO1] = col ; + stats [COLAMD_INFO2] = Col [col].length ; + DEBUG0 (("colamd: col %d length %d < 0\n", col, Col [col].length)) ; + return (FALSE) ; } - Col [col].start = start ; - Col [col].length = p [col+1] - start ; + Col [col].shared1.thickness = 1 ; Col [col].shared2.score = 0 ; Col [col].shared3.prev = EMPTY ; Col [col].shared4.degree_next = EMPTY ; - last_start = start ; } - /* must check the end pointer for last column */ - if (p [n_col] < last_start) - { - /* column pointers must be non-decreasing */ - DEBUG0 (("colamd error! last p %d p [n_col] %d\n",p[col],last_start)) ; - return (-1) ; - } /* p [0..n_col] no longer needed, used as "head" in subsequent routines */ /* === Scan columns, compute row degrees, and check row indices ========= */ - jumbled_columns = FALSE ; + stats [COLAMD_INFO3] = 0 ; /* number of duplicate or unsorted row indices*/ for (row = 0 ; row < n_row ; row++) { @@ -917,22 +1577,28 @@ /* make sure row indices within range */ if (row < 0 || row >= n_row) { - DEBUG0 (("colamd error! col %d row %d last_row %d\n", - col, row, last_row)) ; - return (-1) ; + stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ; + stats [COLAMD_INFO1] = col ; + stats [COLAMD_INFO2] = row ; + stats [COLAMD_INFO3] = n_row ; + DEBUG0 (("colamd: row %d col %d out of bounds\n", row, col)) ; + return (FALSE) ; } - else if (row <= last_row) + + if (row <= last_row || Row [row].shared2.mark == col) { - /* row indices are not sorted or repeated, thus cols */ - /* are jumbled */ - jumbled_columns = TRUE ; + /* row index are unsorted or repeated (or both), thus col */ + /* is jumbled. This is a notice, not an error condition. */ + stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ; + stats [COLAMD_INFO1] = col ; + stats [COLAMD_INFO2] = row ; + (stats [COLAMD_INFO3]) ++ ; + DEBUG1 (("colamd: row %d col %d unsorted/duplicate\n",row,col)); } - /* prevent repeated row from being counted */ + if (Row [row].shared2.mark != col) { Row [row].length++ ; - Row [row].shared2.mark = col ; - last_row = row ; } else { @@ -940,6 +1606,11 @@ /* it will be removed */ Col [col].length-- ; } + + /* mark the row as having been seen in this column */ + Row [row].shared2.mark = col ; + + last_row = row ; } } @@ -959,7 +1630,7 @@ /* === Create row form ================================================== */ - if (jumbled_columns) + if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED) { /* if cols jumbled, watch for repeated row indices */ for (col = 0 ; col < n_col ; col++) @@ -1001,8 +1672,9 @@ /* === See if we need to re-create columns ============================== */ - if (jumbled_columns) + if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED) { + DEBUG0 (("colamd: reconstructing column form, matrix jumbled\n")) ; #ifndef NDEBUG /* make sure column lengths are correct */ @@ -1021,10 +1693,10 @@ } for (col = 0 ; col < n_col ; col++) { - assert (p [col] == 0) ; + ASSERT (p [col] == 0) ; } /* now p is all zero (different than when debugging is turned off) */ -#endif +#endif /* NDEBUG */ /* === Compute col pointers ========================================= */ @@ -1053,13 +1725,11 @@ A [(p [*rp++])++] = row ; } } - return (1) ; } - else - { - /* no columns jumbled (this is faster) */ - return (0) ; - } + + /* === Done. Matrix is not (or no longer) jumbled ====================== */ + + return (TRUE) ; } @@ -1078,8 +1748,8 @@ int n_row, /* number of rows of A */ int n_col, /* number of columns of A */ - RowInfo Row [], /* of size n_row+1 */ - ColInfo Col [], /* of size n_col+1 */ + Colamd_Row Row [], /* of size n_row+1 */ + Colamd_Col Col [], /* of size n_col+1 */ int A [], /* column form and row form of A */ int head [], /* of size n_col+1 */ double knobs [COLAMD_KNOBS],/* parameters */ @@ -1093,7 +1763,7 @@ int c ; /* a column index */ int r, row ; /* a row index */ int *cp ; /* a column pointer */ - int deg ; /* degree (# entries) of a row or column */ + int deg ; /* degree of a row or column */ int *cp_end ; /* a pointer to the end of a column */ int *new_cp ; /* new column pointer */ int col_length ; /* length of pruned column */ @@ -1105,22 +1775,23 @@ int min_score ; /* smallest column score */ int max_deg ; /* maximum row degree */ int next_col ; /* Used to add to degree list.*/ + #ifndef NDEBUG int debug_count ; /* debug only. */ -#endif +#endif /* NDEBUG */ /* === Extract knobs ==================================================== */ dense_row_count = MAX (0, MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ; dense_col_count = MAX (0, MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ; - DEBUG0 (("densecount: %d %d\n", dense_row_count, dense_col_count)) ; + DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ; max_deg = 0 ; n_col2 = n_col ; n_row2 = n_row ; /* === Kill empty columns =============================================== */ - /* Put the empty columns at the end in their natural, so that LU */ + /* Put the empty columns at the end in their natural order, so that LU */ /* factorization can proceed as far as possible. */ for (c = n_col-1 ; c >= 0 ; c--) { @@ -1132,7 +1803,7 @@ KILL_PRINCIPAL_COL (c) ; } } - DEBUG0 (("null columns killed: %d\n", n_col - n_col2)) ; + DEBUG1 (("colamd: null columns killed: %d\n", n_col - n_col2)) ; /* === Kill dense columns =============================================== */ @@ -1159,14 +1830,14 @@ KILL_PRINCIPAL_COL (c) ; } } - DEBUG0 (("Dense and null columns killed: %d\n", n_col - n_col2)) ; + DEBUG1 (("colamd: Dense and null columns killed: %d\n", n_col - n_col2)) ; /* === Kill dense and empty rows ======================================== */ for (r = 0 ; r < n_row ; r++) { deg = Row [r].shared1.degree ; - assert (deg >= 0 && deg <= n_col) ; + ASSERT (deg >= 0 && deg <= n_col) ; if (deg > dense_row_count || deg == 0) { /* kill a dense or empty row */ @@ -1179,7 +1850,7 @@ max_deg = MAX (max_deg, deg) ; } } - DEBUG0 (("Dense and null rows killed: %d\n", n_row - n_row2)) ; + DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ; /* === Compute initial column scores ==================================== */ @@ -1222,20 +1893,21 @@ { /* a newly-made null column (all rows in this col are "dense" */ /* and have already been killed) */ - DEBUG0 (("Newly null killed: %d\n", c)) ; + DEBUG2 (("Newly null killed: %d\n", c)) ; Col [c].shared2.order = --n_col2 ; KILL_PRINCIPAL_COL (c) ; } else { /* set column length and set score */ - assert (score >= 0) ; - assert (score <= n_col) ; + ASSERT (score >= 0) ; + ASSERT (score <= n_col) ; Col [c].length = col_length ; Col [c].shared2.score = score ; } } - DEBUG0 (("Dense, null, and newly-null columns killed: %d\n",n_col-n_col2)) ; + DEBUG1 (("colamd: Dense, null, and newly-null columns killed: %d\n", + n_col-n_col2)) ; /* At this point, all empty rows and columns are dead. All live columns */ /* are "clean" (containing no dead rows) and simplicial (no supercolumns */ @@ -1244,13 +1916,13 @@ #ifndef NDEBUG debug_structures (n_row, n_col, Row, Col, A, n_col2) ; -#endif +#endif /* NDEBUG */ /* === Initialize degree lists ========================================== */ #ifndef NDEBUG debug_count = 0 ; -#endif +#endif /* NDEBUG */ /* clear the hash buckets */ for (c = 0 ; c <= n_col ; c++) @@ -1272,11 +1944,11 @@ score = Col [c].shared2.score ; - assert (min_score >= 0) ; - assert (min_score <= n_col) ; - assert (score >= 0) ; - assert (score <= n_col) ; - assert (head [score] >= EMPTY) ; + ASSERT (min_score >= 0) ; + ASSERT (min_score <= n_col) ; + ASSERT (score >= 0) ; + ASSERT (score <= n_col) ; + ASSERT (head [score] >= EMPTY) ; /* now add this column to dList at proper score location */ next_col = head [score] ; @@ -1296,16 +1968,17 @@ #ifndef NDEBUG debug_count++ ; -#endif +#endif /* NDEBUG */ + } } #ifndef NDEBUG - DEBUG0 (("Live cols %d out of %d, non-princ: %d\n", + DEBUG1 (("colamd: Live cols %d out of %d, non-princ: %d\n", debug_count, n_col, n_col-debug_count)) ; - assert (debug_count == n_col2) ; + ASSERT (debug_count == n_col2) ; debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2, max_deg) ; -#endif +#endif /* NDEBUG */ /* === Return number of remaining columns, and max row degree =========== */ @@ -1331,9 +2004,9 @@ int n_row, /* number of rows of A */ int n_col, /* number of columns of A */ - int Alen, /* size of A, 2*nnz + elbow_room or larger */ - RowInfo Row [], /* of size n_row+1 */ - ColInfo Col [], /* of size n_col+1 */ + int Alen, /* size of A, 2*nnz + n_col or larger */ + Colamd_Row Row [], /* of size n_row+1 */ + Colamd_Col Col [], /* of size n_col+1 */ int A [], /* column form and row form of A */ int head [], /* of size n_col+1 */ int n_col2, /* Remaining columns to order */ @@ -1351,8 +2024,8 @@ int *new_cp ; /* modified column pointer */ int *new_rp ; /* modified row pointer */ int pivot_row_start ; /* pointer to start of pivot row */ - int pivot_row_degree ; /* # of columns in pivot row */ - int pivot_row_length ; /* # of supercolumns in pivot row */ + int pivot_row_degree ; /* number of columns in pivot row */ + int pivot_row_length ; /* number of supercolumns in pivot row */ int pivot_col_score ; /* score of pivot column */ int needed_memory ; /* free space needed for pivot row */ int *cp_end ; /* pointer to the end of a column */ @@ -1368,16 +2041,17 @@ int row_mark ; /* Row [row].shared2.mark */ int set_difference ; /* set difference size of row with pivot row */ int min_score ; /* smallest column score */ - int col_thickness ; /* "thickness" (# of columns in a supercol) */ + int col_thickness ; /* "thickness" (no. of columns in a supercol) */ int max_mark ; /* maximum value of tag_mark */ int pivot_col_thickness ; /* number of columns represented by pivot col */ int prev_col ; /* Used by Dlist operations. */ int next_col ; /* Used by Dlist operations. */ int ngarbage ; /* number of garbage collections performed */ + #ifndef NDEBUG int debug_d ; /* debug loop counter */ int debug_step = 0 ; /* debug loop counter */ -#endif +#endif /* NDEBUG */ /* === Initialization and clear mark ==================================== */ @@ -1385,7 +2059,7 @@ tag_mark = clear_mark (n_row, Row) ; min_score = 0 ; ngarbage = 0 ; - DEBUG0 (("Ordering.. n_col2=%d\n", n_col2)) ; + DEBUG1 (("colamd: Ordering, n_col2=%d\n", n_col2)) ; /* === Order the columns ================================================ */ @@ -1395,31 +2069,31 @@ #ifndef NDEBUG if (debug_step % 100 == 0) { - DEBUG0 (("\n... Step k: %d out of n_col2: %d\n", k, n_col2)) ; + DEBUG2 (("\n... Step k: %d out of n_col2: %d\n", k, n_col2)) ; } else { - DEBUG1 (("\n----------Step k: %d out of n_col2: %d\n", k, n_col2)) ; + DEBUG3 (("\n----------Step k: %d out of n_col2: %d\n", k, n_col2)) ; } debug_step++ ; debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2-k, max_deg) ; debug_matrix (n_row, n_col, Row, Col, A) ; -#endif +#endif /* NDEBUG */ /* === Select pivot column, and order it ============================ */ /* make sure degree list isn't empty */ - assert (min_score >= 0) ; - assert (min_score <= n_col) ; - assert (head [min_score] >= EMPTY) ; + ASSERT (min_score >= 0) ; + ASSERT (min_score <= n_col) ; + ASSERT (head [min_score] >= EMPTY) ; #ifndef NDEBUG for (debug_d = 0 ; debug_d < min_score ; debug_d++) { - assert (head [debug_d] == EMPTY) ; + ASSERT (head [debug_d] == EMPTY) ; } -#endif +#endif /* NDEBUG */ /* get pivot column from head of minimum degree list */ while (head [min_score] == EMPTY && min_score < n_col) @@ -1427,7 +2101,7 @@ min_score++ ; } pivot_col = head [min_score] ; - assert (pivot_col >= 0 && pivot_col <= n_col) ; + ASSERT (pivot_col >= 0 && pivot_col <= n_col) ; next_col = Col [pivot_col].shared4.degree_next ; head [min_score] = next_col ; if (next_col != EMPTY) @@ -1435,7 +2109,7 @@ Col [next_col].shared3.prev = EMPTY ; } - assert (COL_IS_ALIVE (pivot_col)) ; + ASSERT (COL_IS_ALIVE (pivot_col)) ; DEBUG3 (("Pivot col: %d\n", pivot_col)) ; /* remember score for defrag check */ @@ -1447,7 +2121,7 @@ /* increment order count by column thickness */ pivot_col_thickness = Col [pivot_col].shared1.thickness ; k += pivot_col_thickness ; - assert (pivot_col_thickness > 0) ; + ASSERT (pivot_col_thickness > 0) ; /* === Garbage_collection, if necessary ============================= */ @@ -1457,12 +2131,13 @@ pfree = garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ; ngarbage++ ; /* after garbage collection we will have enough */ - assert (pfree + needed_memory < Alen) ; + ASSERT (pfree + needed_memory < Alen) ; /* garbage collection has wiped out the Row[].shared2.mark array */ tag_mark = clear_mark (n_row, Row) ; + #ifndef NDEBUG debug_matrix (n_row, n_col, Row, Col, A) ; -#endif +#endif /* NDEBUG */ } /* === Compute pivot row pattern ==================================== */ @@ -1502,7 +2177,7 @@ { /* tag column in pivot row */ Col [col].shared1.thickness = -col_thickness ; - assert (pfree < Alen) ; + ASSERT (pfree < Alen) ; /* place column in pivot row */ A [pfree++] = col ; pivot_row_degree += col_thickness ; @@ -1517,7 +2192,7 @@ #ifndef NDEBUG DEBUG3 (("check2\n")) ; debug_mark (n_row, Row, tag_mark, max_mark) ; -#endif +#endif /* NDEBUG */ /* === Kill all rows used to construct pivot row ==================== */ @@ -1528,7 +2203,7 @@ { /* may be killing an already dead row */ row = *cp++ ; - DEBUG2 (("Kill row in pivot col: %d\n", row)) ; + DEBUG3 (("Kill row in pivot col: %d\n", row)) ; KILL_ROW (row) ; } @@ -1539,15 +2214,15 @@ { /* pick the "pivot" row arbitrarily (first row in col) */ pivot_row = A [Col [pivot_col].start] ; - DEBUG2 (("Pivotal row is %d\n", pivot_row)) ; + DEBUG3 (("Pivotal row is %d\n", pivot_row)) ; } else { /* there is no pivot row, since it is of zero length */ pivot_row = EMPTY ; - assert (pivot_row_length == 0) ; + ASSERT (pivot_row_length == 0) ; } - assert (Col [pivot_col].length > 0 || pivot_row_length == 0) ; + ASSERT (Col [pivot_col].length > 0 || pivot_row_length == 0) ; /* === Approximate degree computation =============================== */ @@ -1570,23 +2245,23 @@ /* === Compute set differences ====================================== */ - DEBUG1 (("** Computing set differences phase. **\n")) ; + DEBUG3 (("** Computing set differences phase. **\n")) ; /* pivot row is currently dead - it will be revived later. */ - DEBUG2 (("Pivot row: ")) ; + DEBUG3 (("Pivot row: ")) ; /* for each column in pivot row */ rp = &A [pivot_row_start] ; rp_end = rp + pivot_row_length ; while (rp < rp_end) { col = *rp++ ; - assert (COL_IS_ALIVE (col) && col != pivot_col) ; - DEBUG2 (("Col: %d\n", col)) ; + ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ; + DEBUG3 (("Col: %d\n", col)) ; /* clear tags used to construct pivot row pattern */ col_thickness = -Col [col].shared1.thickness ; - assert (col_thickness > 0) ; + ASSERT (col_thickness > 0) ; Col [col].shared1.thickness = col_thickness ; /* === Remove column from degree list =========================== */ @@ -1594,9 +2269,9 @@ cur_score = Col [col].shared2.score ; prev_col = Col [col].shared3.prev ; next_col = Col [col].shared4.degree_next ; - assert (cur_score >= 0) ; - assert (cur_score <= n_col) ; - assert (cur_score >= EMPTY) ; + ASSERT (cur_score >= 0) ; + ASSERT (cur_score <= n_col) ; + ASSERT (cur_score >= EMPTY) ; if (prev_col == EMPTY) { head [cur_score] = next_col ; @@ -1624,21 +2299,21 @@ { continue ; } - assert (row != pivot_row) ; + ASSERT (row != pivot_row) ; set_difference = row_mark - tag_mark ; /* check if the row has been seen yet */ if (set_difference < 0) { - assert (Row [row].shared1.degree <= max_deg) ; + ASSERT (Row [row].shared1.degree <= max_deg) ; set_difference = Row [row].shared1.degree ; } /* subtract column thickness from this row's set difference */ set_difference -= col_thickness ; - assert (set_difference >= 0) ; + ASSERT (set_difference >= 0) ; /* absorb this row if the set difference becomes zero */ if (set_difference == 0) { - DEBUG1 (("aggressive absorption. Row: %d\n", row)) ; + DEBUG3 (("aggressive absorption. Row: %d\n", row)) ; KILL_ROW (row) ; } else @@ -1652,11 +2327,11 @@ #ifndef NDEBUG debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2-k-pivot_row_degree, max_deg) ; -#endif +#endif /* NDEBUG */ /* === Add up set differences for each column ======================= */ - DEBUG1 (("** Adding set differences phase. **\n")) ; + DEBUG3 (("** Adding set differences phase. **\n")) ; /* for each column in pivot row */ rp = &A [pivot_row_start] ; @@ -1665,7 +2340,7 @@ { /* get a column */ col = *rp++ ; - assert (COL_IS_ALIVE (col) && col != pivot_col) ; + ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ; hash = 0 ; cur_score = 0 ; cp = &A [Col [col].start] ; @@ -1673,20 +2348,20 @@ new_cp = cp ; cp_end = cp + Col [col].length ; - DEBUG2 (("Adding set diffs for Col: %d.\n", col)) ; + DEBUG4 (("Adding set diffs for Col: %d.\n", col)) ; while (cp < cp_end) { /* get a row */ row = *cp++ ; - assert(row >= 0 && row < n_row) ; + ASSERT(row >= 0 && row < n_row) ; row_mark = Row [row].shared2.mark ; /* skip if dead */ if (ROW_IS_MARKED_DEAD (row_mark)) { continue ; } - assert (row_mark > tag_mark) ; + ASSERT (row_mark > tag_mark) ; /* compact the column */ *new_cp++ = row ; /* compute hash function */ @@ -1704,11 +2379,11 @@ if (Col [col].length == 0) { - DEBUG1 (("further mass elimination. Col: %d\n", col)) ; + DEBUG4 (("further mass elimination. Col: %d\n", col)) ; /* nothing left but the pivot row in this column */ KILL_PRINCIPAL_COL (col) ; pivot_row_degree -= Col [col].shared1.thickness ; - assert (pivot_row_degree >= 0) ; + ASSERT (pivot_row_degree >= 0) ; /* order it */ Col [col].shared2.order = k ; /* increment order count by column thickness */ @@ -1718,7 +2393,7 @@ { /* === Prepare for supercolumn detection ==================== */ - DEBUG2 (("Preparing supercol detection for Col: %d.\n", col)) ; + DEBUG4 (("Preparing supercol detection for Col: %d.\n", col)) ; /* save score so far */ Col [col].shared2.score = cur_score ; @@ -1726,8 +2401,8 @@ /* add column to hash table, for supercolumn detection */ hash %= n_col + 1 ; - DEBUG2 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ; - assert (hash <= n_col) ; + DEBUG4 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ; + ASSERT (hash <= n_col) ; head_column = head [hash] ; if (head_column > EMPTY) @@ -1747,7 +2422,7 @@ /* save hash function in Col [col].shared3.hash */ Col [col].shared3.hash = (int) hash ; - assert (COL_IS_ALIVE (col)) ; + ASSERT (COL_IS_ALIVE (col)) ; } } @@ -1755,12 +2430,14 @@ /* === Supercolumn detection ======================================== */ - DEBUG1 (("** Supercolumn detection phase. **\n")) ; + DEBUG3 (("** Supercolumn detection phase. **\n")) ; detect_super_cols ( + #ifndef NDEBUG n_col, Row, -#endif +#endif /* NDEBUG */ + Col, A, head, pivot_row_start, pivot_row_length) ; /* === Kill the pivotal column ====================================== */ @@ -1772,17 +2449,18 @@ tag_mark += (max_deg + 1) ; if (tag_mark >= max_mark) { - DEBUG1 (("clearing tag_mark\n")) ; + DEBUG2 (("clearing tag_mark\n")) ; tag_mark = clear_mark (n_row, Row) ; } + #ifndef NDEBUG DEBUG3 (("check3\n")) ; debug_mark (n_row, Row, tag_mark, max_mark) ; -#endif +#endif /* NDEBUG */ /* === Finalize the new pivot row, and column scores ================ */ - DEBUG1 (("** Finalize scores phase. **\n")) ; + DEBUG3 (("** Finalize scores phase. **\n")) ; /* for each column in pivot row */ rp = &A [pivot_row_start] ; @@ -1816,18 +2494,18 @@ /* make sure score is less or equal than the max score */ cur_score = MIN (cur_score, max_score) ; - assert (cur_score >= 0) ; + ASSERT (cur_score >= 0) ; /* store updated score */ Col [col].shared2.score = cur_score ; /* === Place column back in degree list ========================= */ - assert (min_score >= 0) ; - assert (min_score <= n_col) ; - assert (cur_score >= 0) ; - assert (cur_score <= n_col) ; - assert (head [cur_score] >= EMPTY) ; + ASSERT (min_score >= 0) ; + ASSERT (min_score <= n_col) ; + ASSERT (cur_score >= 0) ; + ASSERT (cur_score <= n_col) ; + ASSERT (head [cur_score] >= EMPTY) ; next_col = head [cur_score] ; Col [col].shared4.degree_next = next_col ; Col [col].shared3.prev = EMPTY ; @@ -1845,7 +2523,7 @@ #ifndef NDEBUG debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2-k, max_deg) ; -#endif +#endif /* NDEBUG */ /* === Resurrect the new pivot row ================================== */ @@ -1889,7 +2567,7 @@ /* === Parameters ======================================================= */ int n_col, /* number of columns of A */ - ColInfo Col [], /* of size n_col+1 */ + Colamd_Col Col [], /* of size n_col+1 */ int p [] /* p [0 ... n_col-1] is the column permutation*/ ) { @@ -1905,7 +2583,7 @@ for (i = 0 ; i < n_col ; i++) { /* find an un-ordered non-principal column */ - assert (COL_IS_DEAD (i)) ; + ASSERT (COL_IS_DEAD (i)) ; if (!COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == EMPTY) { parent = i ; @@ -1923,7 +2601,7 @@ do { - assert (Col [c].shared2.order == EMPTY) ; + ASSERT (Col [c].shared2.order == EMPTY) ; /* order this column */ Col [c].shared2.order = order++ ; @@ -1992,9 +2670,10 @@ #ifndef NDEBUG /* these two parameters are only needed when debugging is enabled: */ int n_col, /* number of columns of A */ - RowInfo Row [], /* of size n_row+1 */ -#endif - ColInfo Col [], /* of size n_col+1 */ + Colamd_Row Row [], /* of size n_row+1 */ +#endif /* NDEBUG */ + + Colamd_Col Col [], /* of size n_col+1 */ int A [], /* row indices of A */ int head [], /* head of degree lists and hash buckets */ int row_start, /* pointer to set of columns to check */ @@ -2003,7 +2682,7 @@ { /* === Local variables ================================================== */ - int hash ; /* hash # for a column */ + int hash ; /* hash value for a column */ int *rp ; /* pointer to a row */ int c ; /* a column index */ int super_c ; /* column index of the column to absorb into */ @@ -2031,7 +2710,7 @@ /* get hash number for this column */ hash = Col [col].shared3.hash ; - assert (hash <= n_col) ; + ASSERT (hash <= n_col) ; /* === Get the first column in this hash bucket ===================== */ @@ -2050,8 +2729,8 @@ for (super_c = first_col ; super_c != EMPTY ; super_c = Col [super_c].shared4.hash_next) { - assert (COL_IS_ALIVE (super_c)) ; - assert (Col [super_c].shared3.hash == hash) ; + ASSERT (COL_IS_ALIVE (super_c)) ; + ASSERT (Col [super_c].shared3.hash == hash) ; length = Col [super_c].length ; /* prev_c is the column preceding column c in the hash bucket */ @@ -2062,9 +2741,9 @@ for (c = Col [super_c].shared4.hash_next ; c != EMPTY ; c = Col [c].shared4.hash_next) { - assert (c != super_c) ; - assert (COL_IS_ALIVE (c)) ; - assert (Col [c].shared3.hash == hash) ; + ASSERT (c != super_c) ; + ASSERT (COL_IS_ALIVE (c)) ; + ASSERT (Col [c].shared3.hash == hash) ; /* not identical if lengths or scores are different */ if (Col [c].length != length || @@ -2081,8 +2760,8 @@ for (i = 0 ; i < length ; i++) { /* the columns are "clean" (no dead rows) */ - assert (ROW_IS_ALIVE (*cp1)) ; - assert (ROW_IS_ALIVE (*cp2)) ; + ASSERT (ROW_IS_ALIVE (*cp1)) ; + ASSERT (ROW_IS_ALIVE (*cp2)) ; /* row indices will same order for both supercols, */ /* no gather scatter nessasary */ if (*cp1++ != *cp2++) @@ -2100,7 +2779,7 @@ /* === Got it! two columns are identical =================== */ - assert (Col [c].shared2.score == Col [super_c].shared2.score) ; + ASSERT (Col [c].shared2.score == Col [super_c].shared2.score) ; Col [super_c].shared1.thickness += Col [c].shared1.thickness ; Col [c].shared1.parent = super_c ; @@ -2147,8 +2826,8 @@ int n_row, /* number of rows */ int n_col, /* number of columns */ - RowInfo Row [], /* row info */ - ColInfo Col [], /* column info */ + Colamd_Row Row [], /* row info */ + Colamd_Col Col [], /* column info */ int A [], /* A [0 ... Alen-1] holds the matrix */ int *pfree /* &A [0] ... pfree is in use */ ) @@ -2164,10 +2843,10 @@ #ifndef NDEBUG int debug_rows ; - DEBUG0 (("Defrag..\n")) ; - for (psrc = &A[0] ; psrc < pfree ; psrc++) assert (*psrc >= 0) ; + DEBUG2 (("Defrag..\n")) ; + for (psrc = &A[0] ; psrc < pfree ; psrc++) ASSERT (*psrc >= 0) ; debug_rows = 0 ; -#endif +#endif /* NDEBUG */ /* === Defragment the columns =========================================== */ @@ -2179,7 +2858,7 @@ psrc = &A [Col [c].start] ; /* move and compact the column */ - assert (pdest <= psrc) ; + ASSERT (pdest <= psrc) ; Col [c].start = (int) (pdest - &A [0]) ; length = Col [c].length ; for (j = 0 ; j < length ; j++) @@ -2203,7 +2882,7 @@ if (Row [r].length == 0) { /* this row is of zero length. cannot compact it, so kill it */ - DEBUG0 (("Defrag row kill\n")) ; + DEBUG3 (("Defrag row kill\n")) ; KILL_ROW (r) ; } else @@ -2211,12 +2890,14 @@ /* save first column index in Row [r].shared2.first_column */ psrc = &A [Row [r].start] ; Row [r].shared2.first_column = *psrc ; - assert (ROW_IS_ALIVE (r)) ; + ASSERT (ROW_IS_ALIVE (r)) ; /* flag the start of the row with the one's complement of row */ *psrc = ONES_COMPLEMENT (r) ; + #ifndef NDEBUG debug_rows++ ; -#endif +#endif /* NDEBUG */ + } } } @@ -2232,13 +2913,13 @@ psrc-- ; /* get the row index */ r = ONES_COMPLEMENT (*psrc) ; - assert (r >= 0 && r < n_row) ; + ASSERT (r >= 0 && r < n_row) ; /* restore first column index */ *psrc = Row [r].shared2.first_column ; - assert (ROW_IS_ALIVE (r)) ; + ASSERT (ROW_IS_ALIVE (r)) ; /* move and compact the row */ - assert (pdest <= psrc) ; + ASSERT (pdest <= psrc) ; Row [r].start = (int) (pdest - &A [0]) ; length = Row [r].length ; for (j = 0 ; j < length ; j++) @@ -2250,13 +2931,15 @@ } } Row [r].length = (int) (pdest - &A [Row [r].start]) ; + #ifndef NDEBUG debug_rows-- ; -#endif +#endif /* NDEBUG */ + } } /* ensure we found all the rows */ - assert (debug_rows == 0) ; + ASSERT (debug_rows == 0) ; /* === Return the new value of pfree ==================================== */ @@ -2278,14 +2961,13 @@ /* === Parameters ======================================================= */ int n_row, /* number of rows in A */ - RowInfo Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */ + Colamd_Row Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */ ) { /* === Local variables ================================================== */ int r ; - DEBUG0 (("Clear mark\n")) ; for (r = 0 ; r < n_row ; r++) { if (ROW_IS_ALIVE (r)) @@ -2298,9 +2980,141 @@ /* ========================================================================== */ -/* === debugging routines =================================================== */ +/* === print_report ========================================================= */ /* ========================================================================== */ +PRIVATE void print_report +( + char *method, + int stats [COLAMD_STATS] +) +{ + + int i1, i2, i3 ; + + if (!stats) + { + PRINTF ("%s: No statistics available.\n", method) ; + return ; + } + + i1 = stats [COLAMD_INFO1] ; + i2 = stats [COLAMD_INFO2] ; + i3 = stats [COLAMD_INFO3] ; + + if (stats [COLAMD_STATUS] >= 0) + { + PRINTF ("%s: OK. ", method) ; + } + else + { + PRINTF ("%s: ERROR. ", method) ; + } + + switch (stats [COLAMD_STATUS]) + { + + case COLAMD_OK_BUT_JUMBLED: + + PRINTF ("Matrix has unsorted or duplicate row indices.\n") ; + + PRINTF ("%s: number of duplicate or out-of-order row indices: %d\n", + method, i3) ; + + PRINTF ("%s: last seen duplicate or out-of-order row index: %d\n", + method, INDEX (i2)) ; + + PRINTF ("%s: last seen in column: %d", + method, INDEX (i1)) ; + + /* no break - fall through to next case instead */ + + case COLAMD_OK: + + PRINTF ("\n") ; + + PRINTF ("%s: number of dense or empty rows ignored: %d\n", + method, stats [COLAMD_DENSE_ROW]) ; + + PRINTF ("%s: number of dense or empty columns ignored: %d\n", + method, stats [COLAMD_DENSE_COL]) ; + + PRINTF ("%s: number of garbage collections performed: %d\n", + method, stats [COLAMD_DEFRAG_COUNT]) ; + break ; + + case COLAMD_ERROR_A_not_present: + + PRINTF ("Array A (row indices of matrix) not present.\n") ; + break ; + + case COLAMD_ERROR_p_not_present: + + PRINTF ("Array p (column pointers for matrix) not present.\n") ; + break ; + + case COLAMD_ERROR_nrow_negative: + + PRINTF ("Invalid number of rows (%d).\n", i1) ; + break ; + + case COLAMD_ERROR_ncol_negative: + + PRINTF ("Invalid number of columns (%d).\n", i1) ; + break ; + + case COLAMD_ERROR_nnz_negative: + + PRINTF ("Invalid number of nonzero entries (%d).\n", i1) ; + break ; + + case COLAMD_ERROR_p0_nonzero: + + PRINTF ("Invalid column pointer, p [0] = %d, must be zero.\n", i1) ; + break ; + + case COLAMD_ERROR_A_too_small: + + PRINTF ("Array A too small.\n") ; + PRINTF (" Need Alen >= %d, but given only Alen = %d.\n", + i1, i2) ; + break ; + + case COLAMD_ERROR_col_length_negative: + + PRINTF + ("Column %d has a negative number of nonzero entries (%d).\n", + INDEX (i1), i2) ; + break ; + + case COLAMD_ERROR_row_index_out_of_bounds: + + PRINTF + ("Row index (row %d) out of bounds (%d to %d) in column %d.\n", + INDEX (i2), INDEX (0), INDEX (i3-1), INDEX (i1)) ; + break ; + + case COLAMD_ERROR_out_of_memory: + + PRINTF ("Out of memory.\n") ; + break ; + + case COLAMD_ERROR_internal_error: + + /* if this happens, there is a bug in the code */ + PRINTF + ("Internal error! Please contact authors (davis at cise.ufl.edu).\n") ; + break ; + } +} + + + + +/* ========================================================================== */ +/* === colamd debugging routines ============================================ */ +/* ========================================================================== */ + /* When debugging is disabled, the remainder of this file is ignored. */ #ifndef NDEBUG @@ -2323,8 +3137,8 @@ int n_row, int n_col, - RowInfo Row [], - ColInfo Col [], + Colamd_Row Row [], + Colamd_Col Col [], int A [], int n_col2 ) @@ -2351,21 +3165,21 @@ len = Col [c].length ; score = Col [c].shared2.score ; DEBUG4 (("initial live col %5d %5d %5d\n", c, len, score)) ; - assert (len > 0) ; - assert (score >= 0) ; - assert (Col [c].shared1.thickness == 1) ; + ASSERT (len > 0) ; + ASSERT (score >= 0) ; + ASSERT (Col [c].shared1.thickness == 1) ; cp = &A [Col [c].start] ; cp_end = cp + len ; while (cp < cp_end) { r = *cp++ ; - assert (ROW_IS_ALIVE (r)) ; + ASSERT (ROW_IS_ALIVE (r)) ; } } else { i = Col [c].shared2.order ; - assert (i >= n_col2 && i < n_col) ; + ASSERT (i >= n_col2 && i < n_col) ; } } @@ -2376,8 +3190,8 @@ i = 0 ; len = Row [r].length ; deg = Row [r].shared1.degree ; - assert (len > 0) ; - assert (deg > 0) ; + ASSERT (len > 0) ; + ASSERT (deg > 0) ; rp = &A [Row [r].start] ; rp_end = rp + len ; while (rp < rp_end) @@ -2388,7 +3202,7 @@ i++ ; } } - assert (i > 0) ; + ASSERT (i > 0) ; } } } @@ -2410,8 +3224,8 @@ int n_row, int n_col, - RowInfo Row [], - ColInfo Col [], + Colamd_Row Row [], + Colamd_Col Col [], int head [], int min_score, int should, @@ -2427,7 +3241,7 @@ /* === Check the degree lists =========================================== */ - if (n_col > 10000 && debug_colamd <= 0) + if (n_col > 10000 && colamd_debug <= 0) { return ; } @@ -2445,17 +3259,17 @@ { DEBUG4 ((" %d", col)) ; have += Col [col].shared1.thickness ; - assert (COL_IS_ALIVE (col)) ; + ASSERT (COL_IS_ALIVE (col)) ; col = Col [col].shared4.degree_next ; } DEBUG4 (("\n")) ; } DEBUG4 (("should %d have %d\n", should, have)) ; - assert (should == have) ; + ASSERT (should == have) ; /* === Check the row degrees ============================================ */ - if (n_row > 10000 && debug_colamd <= 0) + if (n_row > 10000 && colamd_debug <= 0) { return ; } @@ -2463,7 +3277,7 @@ { if (ROW_IS_ALIVE (row)) { - assert (Row [row].shared1.degree <= max_deg) ; + ASSERT (Row [row].shared1.degree <= max_deg) ; } } } @@ -2483,7 +3297,7 @@ /* === Parameters ======================================================= */ int n_row, - RowInfo Row [], + Colamd_Row Row [], int tag_mark, int max_mark ) @@ -2494,14 +3308,14 @@ /* === Check the Row marks ============================================== */ - assert (tag_mark > 0 && tag_mark <= max_mark) ; - if (n_row > 10000 && debug_colamd <= 0) + ASSERT (tag_mark > 0 && tag_mark <= max_mark) ; + if (n_row > 10000 && colamd_debug <= 0) { return ; } for (r = 0 ; r < n_row ; r++) { - assert (Row [r].shared2.mark < tag_mark) ; + ASSERT (Row [r].shared2.mark < tag_mark) ; } } @@ -2520,8 +3334,8 @@ int n_row, int n_col, - RowInfo Row [], - ColInfo Col [], + Colamd_Row Row [], + Colamd_Col Col [], int A [] ) { @@ -2536,7 +3350,7 @@ /* === Dump the rows and columns of the matrix ========================== */ - if (debug_colamd < 3) + if (colamd_debug < 3) { return ; } @@ -2555,7 +3369,7 @@ while (rp < rp_end) { c = *rp++ ; - DEBUG3 ((" %d col %d\n", COL_IS_ALIVE (c), c)) ; + DEBUG4 ((" %d col %d\n", COL_IS_ALIVE (c), c)) ; } } @@ -2574,10 +3388,27 @@ while (cp < cp_end) { r = *cp++ ; - DEBUG3 ((" %d row %d\n", ROW_IS_ALIVE (r), r)) ; + DEBUG4 ((" %d row %d\n", ROW_IS_ALIVE (r), r)) ; } } } -#endif +PRIVATE void colamd_get_debug +( + char *method +) +{ + colamd_debug = 0 ; /* no debug printing */ + /* get "D" environment variable, which gives the debug printing level */ + if (getenv ("D")) + { + colamd_debug = atoi (getenv ("D")) ; + } + + DEBUG0 (("%s: debug version, D = %d (THIS WILL BE SLOW!)\n", + method, colamd_debug)) ; +} + +#endif /* NDEBUG */ + Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/colamd.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/colamd.h 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/colamd.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,49 +1,203 @@ -/* ========================================================================== */ -/* === colamd prototypes and definitions ==================================== */ -/* ========================================================================== */ +/*! @file colamd.h + \brief Colamd prototypes and definitions -/* - This is the colamd include file, +
 
+    ==========================================================================
+    === colamd/symamd prototypes and definitions =============================
+    ==========================================================================
 
-	http://www.cise.ufl.edu/~davis/colamd/colamd.h
+    You must include this file (colamd.h) in any routine that uses colamd,
+    symamd, or the related macros and definitions.
 
-    for use in the colamd.c, colamdmex.c, and symamdmex.c files located at
+    Authors:
 
-	http://www.cise.ufl.edu/~davis/colamd/
+	The authors of the code itself are Stefan I. Larimore and Timothy A.
+	Davis (davis at cise.ufl.edu), University of Florida.  The algorithm was
+	developed in collaboration with John Gilbert, Xerox PARC, and Esmond
+	Ng, Oak Ridge National Laboratory.
 
-    See those files for a description of colamd and symamd, and for the
-    copyright notice, which also applies to this file.
+    Date:
 
-    August 3, 1998.  Version 1.0.
+	September 8, 2003.  Version 2.3.
+
+    Acknowledgements:
+
+	This work was supported by the National Science Foundation, under
+	grants DMS-9504974 and DMS-9803599.
+
+    Notice:
+
+	Copyright (c) 1998-2003 by the University of Florida.
+	All Rights Reserved.
+
+	THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+	EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+
+	Permission is hereby granted to use, copy, modify, and/or distribute
+	this program, provided that the Copyright, this License, and the
+	Availability of the original version is retained on all copies and made
+	accessible to the end-user of any code or package that includes COLAMD
+	or any modified version of COLAMD. 
+
+    Availability:
+
+	The colamd/symamd library is available at
+
+	    http://www.cise.ufl.edu/research/sparse/colamd/
+
+	This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.h
+	file.  It is required by the colamd.c, colamdmex.c, and symamdmex.c
+	files, and by any C code that calls the routines whose prototypes are
+	listed below, or that uses the colamd/symamd definitions listed below.
+ 
*/ +#ifndef COLAMD_H +#define COLAMD_H + /* ========================================================================== */ -/* === Definitions ========================================================== */ +/* === Include files ======================================================== */ /* ========================================================================== */ +#include + +/* ========================================================================== */ +/* === Knob and statistics definitions ====================================== */ +/* ========================================================================== */ + /* size of the knobs [ ] array. Only knobs [0..1] are currently used. */ #define COLAMD_KNOBS 20 -/* number of output statistics. Only A [0..2] are currently used. */ +/* number of output statistics. Only stats [0..6] are currently used. */ #define COLAMD_STATS 20 -/* knobs [0] and A [0]: dense row knob and output statistic. */ +/* knobs [0] and stats [0]: dense row knob and output statistic. */ #define COLAMD_DENSE_ROW 0 -/* knobs [1] and A [1]: dense column knob and output statistic. */ +/* knobs [1] and stats [1]: dense column knob and output statistic. */ #define COLAMD_DENSE_COL 1 -/* A [2]: memory defragmentation count output statistic */ +/* stats [2]: memory defragmentation count output statistic */ #define COLAMD_DEFRAG_COUNT 2 -/* A [3]: whether or not the input columns were jumbled or had duplicates */ -#define COLAMD_JUMBLED_COLS 3 +/* stats [3]: colamd status: zero OK, > 0 warning or notice, < 0 error */ +#define COLAMD_STATUS 3 +/* stats [4..6]: error info, or info on jumbled columns */ +#define COLAMD_INFO1 4 +#define COLAMD_INFO2 5 +#define COLAMD_INFO3 6 + +/* error codes returned in stats [3]: */ +#define COLAMD_OK (0) +#define COLAMD_OK_BUT_JUMBLED (1) +#define COLAMD_ERROR_A_not_present (-1) +#define COLAMD_ERROR_p_not_present (-2) +#define COLAMD_ERROR_nrow_negative (-3) +#define COLAMD_ERROR_ncol_negative (-4) +#define COLAMD_ERROR_nnz_negative (-5) +#define COLAMD_ERROR_p0_nonzero (-6) +#define COLAMD_ERROR_A_too_small (-7) +#define COLAMD_ERROR_col_length_negative (-8) +#define COLAMD_ERROR_row_index_out_of_bounds (-9) +#define COLAMD_ERROR_out_of_memory (-10) +#define COLAMD_ERROR_internal_error (-999) + /* ========================================================================== */ +/* === Row and Column structures ============================================ */ +/* ========================================================================== */ + +/* User code that makes use of the colamd/symamd routines need not directly */ +/* reference these structures. They are used only for the COLAMD_RECOMMENDED */ +/* macro. */ + +typedef struct Colamd_Col_struct +{ + int start ; /* index for A of first row in this column, or DEAD */ + /* if column is dead */ + int length ; /* number of rows in this column */ + union + { + int thickness ; /* number of original columns represented by this */ + /* col, if the column is alive */ + int parent ; /* parent in parent tree super-column structure, if */ + /* the column is dead */ + } shared1 ; + union + { + int score ; /* the score used to maintain heap, if col is alive */ + int order ; /* pivot ordering of this column, if col is dead */ + } shared2 ; + union + { + int headhash ; /* head of a hash bucket, if col is at the head of */ + /* a degree list */ + int hash ; /* hash value, if col is not in a degree list */ + int prev ; /* previous column in degree list, if col is in a */ + /* degree list (but not at the head of a degree list) */ + } shared3 ; + union + { + int degree_next ; /* next column, if col is in a degree list */ + int hash_next ; /* next column, if col is in a hash list */ + } shared4 ; + +} Colamd_Col ; + +typedef struct Colamd_Row_struct +{ + int start ; /* index for A of first col in this row */ + int length ; /* number of principal columns in this row */ + union + { + int degree ; /* number of principal & non-principal columns in row */ + int p ; /* used as a row pointer in init_rows_cols () */ + } shared1 ; + union + { + int mark ; /* for computing set differences and marking dead rows*/ + int first_column ;/* first column in row (used in garbage collection) */ + } shared2 ; + +} Colamd_Row ; + +/* ========================================================================== */ +/* === Colamd recommended memory size ======================================= */ +/* ========================================================================== */ + +/* + The recommended length Alen of the array A passed to colamd is given by + the COLAMD_RECOMMENDED (nnz, n_row, n_col) macro. It returns -1 if any + argument is negative. 2*nnz space is required for the row and column + indices of the matrix. COLAMD_C (n_col) + COLAMD_R (n_row) space is + required for the Col and Row arrays, respectively, which are internal to + colamd. An additional n_col space is the minimal amount of "elbow room", + and nnz/5 more space is recommended for run time efficiency. + + This macro is not needed when using symamd. + + Explicit typecast to int added Sept. 23, 2002, COLAMD version 2.2, to avoid + gcc -pedantic warning messages. +*/ + +#define COLAMD_C(n_col) ((int) (((n_col) + 1) * sizeof (Colamd_Col) / sizeof (int))) +#define COLAMD_R(n_row) ((int) (((n_row) + 1) * sizeof (Colamd_Row) / sizeof (int))) + +#define COLAMD_RECOMMENDED(nnz, n_row, n_col) \ +( \ +((nnz) < 0 || (n_row) < 0 || (n_col) < 0) \ +? \ + (-1) \ +: \ + (2 * (nnz) + COLAMD_C (n_col) + COLAMD_R (n_row) + (n_col) + ((nnz) / 5)) \ +) + +/* ========================================================================== */ /* === Prototypes of user-callable routines ================================= */ /* ========================================================================== */ -int colamd_recommended /* returns recommended value of Alen */ +int colamd_recommended /* returns recommended value of Alen, */ + /* or (-1) if input arguments are erroneous */ ( int nnz, /* nonzeros in A */ int n_row, /* number of rows in A */ @@ -55,13 +209,41 @@ double knobs [COLAMD_KNOBS] /* parameter settings for colamd */ ) ; -int colamd /* returns TRUE if successful, FALSE otherwise*/ +int colamd /* returns (1) if successful, (0) otherwise*/ ( /* A and p arguments are modified on output */ int n_row, /* number of rows in A */ int n_col, /* number of columns in A */ int Alen, /* size of the array A */ int A [], /* row indices of A, of size Alen */ int p [], /* column pointers of A, of size n_col+1 */ - double knobs [COLAMD_KNOBS] /* parameter settings for colamd */ + double knobs [COLAMD_KNOBS],/* parameter settings for colamd */ + int stats [COLAMD_STATS] /* colamd output statistics and error codes */ ) ; +int symamd /* return (1) if OK, (0) otherwise */ +( + int n, /* number of rows and columns of A */ + int A [], /* row indices of A */ + int p [], /* column pointers of A */ + int perm [], /* output permutation, size n_col+1 */ + double knobs [COLAMD_KNOBS], /* parameters (uses defaults if NULL) */ + int stats [COLAMD_STATS], /* output statistics and error codes */ + void * (*allocate) (size_t, size_t), + /* pointer to calloc (ANSI C) or */ + /* mxCalloc (for MATLAB mexFunction) */ + void (*release) (void *) + /* pointer to free (ANSI C) or */ + /* mxFree (for MATLAB mexFunction) */ +) ; + +void colamd_report +( + int stats [COLAMD_STATS] +) ; + +void symamd_report +( + int stats [COLAMD_STATS] +) ; + +#endif /* COLAMD_H */ Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpanel_bmod.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpanel_bmod.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpanel_bmod.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,27 +1,32 @@ -/* +/*! @file cpanel_bmod.c + * \brief Performs numeric block updates + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ /* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. + */ #include #include -#include "csp_defs.h" +#include "slu_cdefs.h" /* * Function prototypes @@ -30,6 +35,25 @@ void cmatvec(int, int, int, complex *, complex *, complex *); extern void ccheck_tempv(); +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ *    Performs numeric block updates (sup-panel) in topological order.
+ *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
+ *    Special processing on the supernodal portion of L\U[*,j]
+ *
+ *    Before entering this routine, the original nonzeros in the panel 
+ *    were already copied into the spa[m,w].
+ *
+ *    Updated/Output parameters-
+ *    dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned 
+ *    collectively in the m-by-w vector dense[*]. 
+ * 
+ */ + void cpanel_bmod ( const int m, /* in - number of rows in the matrix */ @@ -44,23 +68,8 @@ SuperLUStat_t *stat /* output */ ) { -/* - * Purpose - * ======= - * - * Performs numeric block updates (sup-panel) in topological order. - * It features: col-col, 2cols-col, 3cols-col, and sup-col updates. - * Special processing on the supernodal portion of L\U[*,j] - * - * Before entering this routine, the original nonzeros in the panel - * were already copied into the spa[m,w]. - * - * Updated/Output parameters- - * dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned - * collectively in the m-by-w vector dense[*]. - * - */ + #ifdef USE_VENDOR_BLAS #ifdef _CRAY _fcd ftcs1 = _cptofcd("L", strlen("L")), Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpanel_dfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpanel_dfs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpanel_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,48 +1,32 @@ - -/* +/*! @file cpanel_dfs.c + * \brief Peforms a symbolic factorization on a panel of symbols + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "csp_defs.h" -#include "util.h" -void -cpanel_dfs ( - const int m, /* in - number of rows in the matrix */ - const int w, /* in */ - const int jcol, /* in */ - SuperMatrix *A, /* in - original matrix */ - int *perm_r, /* in */ - int *nseg, /* out */ - complex *dense, /* out */ - int *panel_lsub, /* out */ - int *segrep, /* out */ - int *repfnz, /* out */ - int *xprune, /* out */ - int *marker, /* out */ - int *parent, /* working array */ - int *xplore, /* working array */ - GlobalLU_t *Glu /* modified */ - ) -{ -/* +#include "slu_cdefs.h" + +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -68,8 +52,29 @@
  *   repfnz: SuperA-col --> PA-row
  *   parent: SuperA-col --> SuperA-col
  *   xplore: SuperA-col --> index to L-structure
- *
+ * 
*/ + +void +cpanel_dfs ( + const int m, /* in - number of rows in the matrix */ + const int w, /* in */ + const int jcol, /* in */ + SuperMatrix *A, /* in - original matrix */ + int *perm_r, /* in */ + int *nseg, /* out */ + complex *dense, /* out */ + int *panel_lsub, /* out */ + int *segrep, /* out */ + int *repfnz, /* out */ + int *xprune, /* out */ + int *marker, /* out */ + int *parent, /* working array */ + int *xplore, /* working array */ + GlobalLU_t *Glu /* modified */ + ) +{ + NCPformat *Astore; complex *a; int *asub; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotL.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotL.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,44 +1,36 @@ -/* +/*! @file cpivotL.c + * \brief Performs numerical pivoting + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ + #include #include -#include "csp_defs.h" +#include "slu_cdefs.h" #undef DEBUG -int -cpivotL( - const int jcol, /* in */ - const float u, /* in - diagonal pivoting threshold */ - int *usepr, /* re-use the pivot sequence given by perm_r/iperm_r */ - int *perm_r, /* may be modified */ - int *iperm_r, /* in - inverse of perm_r */ - int *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */ - int *pivrow, /* out */ - GlobalLU_t *Glu, /* modified - global LU data structures */ - SuperLUStat_t *stat /* output */ - ) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *   Performs the numerical pivoting on the current column of L,
@@ -57,8 +49,23 @@
  *
  *   Return value: 0      success;
  *                 i > 0  U(i,i) is exactly zero.
- *
+ * 
*/ + +int +cpivotL( + const int jcol, /* in */ + const double u, /* in - diagonal pivoting threshold */ + int *usepr, /* re-use the pivot sequence given by perm_r/iperm_r */ + int *perm_r, /* may be modified */ + int *iperm_r, /* in - inverse of perm_r */ + int *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */ + int *pivrow, /* out */ + GlobalLU_t *Glu, /* modified - global LU data structures */ + SuperLUStat_t *stat /* output */ + ) +{ + complex one = {1.0, 0.0}; int fsupc; /* first column in the supernode */ int nsupc; /* no of columns in the supernode */ @@ -106,7 +113,7 @@ diag = EMPTY; old_pivptr = nsupc; for (isub = nsupc; isub < nsupr; ++isub) { - rtemp = slu_c_abs1 (&lu_col_ptr[isub]); + rtemp = c_abs1 (&lu_col_ptr[isub]); if ( rtemp > pivmax ) { pivmax = rtemp; pivptr = isub; @@ -117,8 +124,12 @@ /* Test for singularity */ if ( pivmax == 0.0 ) { +#if 1 *pivrow = lsub_ptr[pivptr]; perm_r[*pivrow] = jcol; +#else + perm_r[diagind] = jcol; +#endif *usepr = 0; return (jcol+1); } @@ -127,7 +138,7 @@ /* Choose appropriate pivotal element by our policy. */ if ( *usepr ) { - rtemp = slu_c_abs1 (&lu_col_ptr[old_pivptr]); + rtemp = c_abs1 (&lu_col_ptr[old_pivptr]); if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = old_pivptr; else @@ -136,7 +147,7 @@ if ( *usepr == 0 ) { /* Use diagonal pivot? */ if ( diag >= 0 ) { /* diagonal exists */ - rtemp = slu_c_abs1 (&lu_col_ptr[diag]); + rtemp = c_abs1 (&lu_col_ptr[diag]); if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag; } *pivrow = lsub_ptr[pivptr]; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotgrowth.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotgrowth.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotgrowth.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,21 +1,20 @@ - -/* +/*! @file cpivotgrowth.c + * \brief Computes the reciprocal pivot growth factor + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
- *
+ * 
*/ #include -#include "csp_defs.h" -#include "util.h" +#include "slu_cdefs.h" -float -cPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, - SuperMatrix *L, SuperMatrix *U) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -43,8 +42,14 @@
  *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
  *          storage scheme, i.e., U has types: Stype = NC;
  *          Dtype = SLU_C; Mtype = TRU.
- *
+ * 
*/ + +float +cPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, + SuperMatrix *L, SuperMatrix *U) +{ + NCformat *Astore; SCformat *Lstore; NCformat *Ustore; @@ -83,15 +88,15 @@ maxaj = 0.; oldcol = inv_perm_c[j]; for (i = Astore->colptr[oldcol]; i < Astore->colptr[oldcol+1]; ++i) - maxaj = SUPERLU_MAX( maxaj, slu_c_abs1( &Aval[i]) ); + maxaj = SUPERLU_MAX( maxaj, c_abs1( &Aval[i]) ); maxuj = 0.; for (i = Ustore->colptr[j]; i < Ustore->colptr[j+1]; i++) - maxuj = SUPERLU_MAX( maxuj, slu_c_abs1( &Uval[i]) ); + maxuj = SUPERLU_MAX( maxuj, c_abs1( &Uval[i]) ); /* Supernode */ for (i = 0; i < nz_in_U; ++i) - maxuj = SUPERLU_MAX( maxuj, slu_c_abs1( &luval[i]) ); + maxuj = SUPERLU_MAX( maxuj, c_abs1( &luval[i]) ); ++nz_in_U; luval += nsupr; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpruneL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpruneL.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpruneL.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,28 +1,39 @@ - -/* +/*! @file cpruneL.c + * \brief Prunes the L-structure + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ *
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "csp_defs.h" -#include "util.h" +#include "slu_cdefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *   Prunes the L-structure of supernodes whose L-structure
+ *   contains the current pivot row "pivrow"
+ * 
+ */ + void cpruneL( const int jcol, /* in */ @@ -35,13 +46,7 @@ GlobalLU_t *Glu /* modified - global LU data structures */ ) { -/* - * Purpose - * ======= - * Prunes the L-structure of supernodes whose L-structure - * contains the current pivot row "pivrow" - * - */ + complex utemp; int jsupno, irep, irep1, kmin, kmax, krow, movnum; int i, ktemp, minloc, maxloc; @@ -108,8 +113,8 @@ kmax--; else if ( perm_r[lsub[kmin]] != EMPTY ) kmin++; - else { /* kmin below pivrow, and kmax above pivrow: - * interchange the two subscripts + else { /* kmin below pivrow (not yet pivoted), and kmax + * above pivrow: interchange the two subscripts */ ktemp = lsub[kmin]; lsub[kmin] = lsub[kmax]; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/creadhb.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/creadhb.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/creadhb.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,18 +1,85 @@ - -/* +/*! @file creadhb.c + * \brief Read a matrix stored in Harwell-Boeing format + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Purpose
+ * =======
+ * 
+ * Read a COMPLEX PRECISION matrix stored in Harwell-Boeing format 
+ * as described below.
+ * 
+ * Line 1 (A72,A8) 
+ *  	Col. 1 - 72   Title (TITLE) 
+ *	Col. 73 - 80  Key (KEY) 
+ * 
+ * Line 2 (5I14) 
+ * 	Col. 1 - 14   Total number of lines excluding header (TOTCRD) 
+ * 	Col. 15 - 28  Number of lines for pointers (PTRCRD) 
+ * 	Col. 29 - 42  Number of lines for row (or variable) indices (INDCRD) 
+ * 	Col. 43 - 56  Number of lines for numerical values (VALCRD) 
+ *	Col. 57 - 70  Number of lines for right-hand sides (RHSCRD) 
+ *                    (including starting guesses and solution vectors 
+ *		       if present) 
+ *           	      (zero indicates no right-hand side data is present) 
+ *
+ * Line 3 (A3, 11X, 4I14) 
+ *   	Col. 1 - 3    Matrix type (see below) (MXTYPE) 
+ * 	Col. 15 - 28  Number of rows (or variables) (NROW) 
+ * 	Col. 29 - 42  Number of columns (or elements) (NCOL) 
+ *	Col. 43 - 56  Number of row (or variable) indices (NNZERO) 
+ *	              (equal to number of entries for assembled matrices) 
+ * 	Col. 57 - 70  Number of elemental matrix entries (NELTVL) 
+ *	              (zero in the case of assembled matrices) 
+ * Line 4 (2A16, 2A20) 
+ * 	Col. 1 - 16   Format for pointers (PTRFMT) 
+ *	Col. 17 - 32  Format for row (or variable) indices (INDFMT) 
+ *	Col. 33 - 52  Format for numerical values of coefficient matrix (VALFMT) 
+ * 	Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) 
+ *
+ * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present 
+ *    	Col. 1 	      Right-hand side type: 
+ *	         	  F for full storage or M for same format as matrix 
+ *    	Col. 2        G if a starting vector(s) (Guess) is supplied. (RHSTYP) 
+ *    	Col. 3        X if an exact solution vector(s) is supplied. 
+ *	Col. 15 - 28  Number of right-hand sides (NRHS) 
+ *	Col. 29 - 42  Number of row indices (NRHSIX) 
+ *          	      (ignored in case of unassembled matrices) 
+ *
+ * The three character type field on line 3 describes the matrix type. 
+ * The following table lists the permitted values for each of the three 
+ * characters. As an example of the type field, RSA denotes that the matrix 
+ * is real, symmetric, and assembled. 
+ *
+ * First Character: 
+ *	R Real matrix 
+ *	C Complex matrix 
+ *	P Pattern only (no numerical values supplied) 
+ *
+ * Second Character: 
+ *	S Symmetric 
+ *	U Unsymmetric 
+ *	H Hermitian 
+ *	Z Skew symmetric 
+ *	R Rectangular 
+ *
+ * Third Character: 
+ *	A Assembled 
+ *	E Elemental matrices (unassembled) 
+ *
+ * 
*/ #include #include -#include "csp_defs.h" +#include "slu_cdefs.h" -/* Eat up the rest of the current line */ +/*! \brief Eat up the rest of the current line */ int cDumpLine(FILE *fp) { register int c; @@ -60,7 +127,7 @@ return 0; } -int cReadVector(FILE *fp, int n, int *where, int perline, int persize) +static int ReadVector(FILE *fp, int n, int *where, int perline, int persize) { register int i, j, item; char tmp, buf[100]; @@ -80,7 +147,7 @@ return 0; } -/* Read complex numbers as pairs of (real, imaginary) */ +/*! \brief Read complex numbers as pairs of (real, imaginary) */ int cReadValues(FILE *fp, int n, complex *destination, int perline, int persize) { register int i, j, k, s, pair; @@ -118,72 +185,6 @@ creadhb(int *nrow, int *ncol, int *nonz, complex **nzval, int **rowind, int **colptr) { -/* - * Purpose - * ======= - * - * Read a COMPLEX PRECISION matrix stored in Harwell-Boeing format - * as described below. - * - * Line 1 (A72,A8) - * Col. 1 - 72 Title (TITLE) - * Col. 73 - 80 Key (KEY) - * - * Line 2 (5I14) - * Col. 1 - 14 Total number of lines excluding header (TOTCRD) - * Col. 15 - 28 Number of lines for pointers (PTRCRD) - * Col. 29 - 42 Number of lines for row (or variable) indices (INDCRD) - * Col. 43 - 56 Number of lines for numerical values (VALCRD) - * Col. 57 - 70 Number of lines for right-hand sides (RHSCRD) - * (including starting guesses and solution vectors - * if present) - * (zero indicates no right-hand side data is present) - * - * Line 3 (A3, 11X, 4I14) - * Col. 1 - 3 Matrix type (see below) (MXTYPE) - * Col. 15 - 28 Number of rows (or variables) (NROW) - * Col. 29 - 42 Number of columns (or elements) (NCOL) - * Col. 43 - 56 Number of row (or variable) indices (NNZERO) - * (equal to number of entries for assembled matrices) - * Col. 57 - 70 Number of elemental matrix entries (NELTVL) - * (zero in the case of assembled matrices) - * Line 4 (2A16, 2A20) - * Col. 1 - 16 Format for pointers (PTRFMT) - * Col. 17 - 32 Format for row (or variable) indices (INDFMT) - * Col. 33 - 52 Format for numerical values of coefficient matrix (VALFMT) - * Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) - * - * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present - * Col. 1 Right-hand side type: - * F for full storage or M for same format as matrix - * Col. 2 G if a starting vector(s) (Guess) is supplied. (RHSTYP) - * Col. 3 X if an exact solution vector(s) is supplied. - * Col. 15 - 28 Number of right-hand sides (NRHS) - * Col. 29 - 42 Number of row indices (NRHSIX) - * (ignored in case of unassembled matrices) - * - * The three character type field on line 3 describes the matrix type. - * The following table lists the permitted values for each of the three - * characters. As an example of the type field, RSA denotes that the matrix - * is real, symmetric, and assembled. - * - * First Character: - * R Real matrix - * C Complex matrix - * P Pattern only (no numerical values supplied) - * - * Second Character: - * S Symmetric - * U Unsymmetric - * H Hermitian - * Z Skew symmetric - * R Rectangular - * - * Third Character: - * A Assembled - * E Elemental matrices (unassembled) - * - */ register int i, numer_lines = 0, rhscrd = 0; int tmp, colnum, colsize, rownum, rowsize, valnum, valsize; @@ -254,8 +255,8 @@ printf("valnum %d, valsize %d\n", valnum, valsize); #endif - cReadVector(fp, *ncol+1, *colptr, colnum, colsize); - cReadVector(fp, *nonz, *rowind, rownum, rowsize); + ReadVector(fp, *ncol+1, *colptr, colnum, colsize); + ReadVector(fp, *nonz, *rowind, rownum, rowsize); if ( numer_lines ) { cReadValues(fp, *nonz, *nzval, valnum, valsize); } Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/creadrb.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/creadrb.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/creadrb.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,246 @@ + +/*! @file creadrb.c + * \brief Read a matrix stored in Rutherford-Boeing format + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
+ * + * Purpose + * ======= + * + * Read a COMPLEX PRECISION matrix stored in Rutherford-Boeing format + * as described below. + * + * Line 1 (A72, A8) + * Col. 1 - 72 Title (TITLE) + * Col. 73 - 80 Matrix name / identifier (MTRXID) + * + * Line 2 (I14, 3(1X, I13)) + * Col. 1 - 14 Total number of lines excluding header (TOTCRD) + * Col. 16 - 28 Number of lines for pointers (PTRCRD) + * Col. 30 - 42 Number of lines for row (or variable) indices (INDCRD) + * Col. 44 - 56 Number of lines for numerical values (VALCRD) + * + * Line 3 (A3, 11X, 4(1X, I13)) + * Col. 1 - 3 Matrix type (see below) (MXTYPE) + * Col. 15 - 28 Compressed Column: Number of rows (NROW) + * Elemental: Largest integer used to index variable (MVAR) + * Col. 30 - 42 Compressed Column: Number of columns (NCOL) + * Elemental: Number of element matrices (NELT) + * Col. 44 - 56 Compressed Column: Number of entries (NNZERO) + * Elemental: Number of variable indeces (NVARIX) + * Col. 58 - 70 Compressed Column: Unused, explicitly zero + * Elemental: Number of elemental matrix entries (NELTVL) + * + * Line 4 (2A16, A20) + * Col. 1 - 16 Fortran format for pointers (PTRFMT) + * Col. 17 - 32 Fortran format for row (or variable) indices (INDFMT) + * Col. 33 - 52 Fortran format for numerical values of coefficient matrix + * (VALFMT) + * (blank in the case of matrix patterns) + * + * The three character type field on line 3 describes the matrix type. + * The following table lists the permitted values for each of the three + * characters. As an example of the type field, RSA denotes that the matrix + * is real, symmetric, and assembled. + * + * First Character: + * R Real matrix + * C Complex matrix + * I integer matrix + * P Pattern only (no numerical values supplied) + * Q Pattern only (numerical values supplied in associated auxiliary value + * file) + * + * Second Character: + * S Symmetric + * U Unsymmetric + * H Hermitian + * Z Skew symmetric + * R Rectangular + * + * Third Character: + * A Compressed column form + * E Elemental form + * + * + */ + +#include "slu_cdefs.h" + + +/*! \brief Eat up the rest of the current line */ +static int cDumpLine(FILE *fp) +{ + register int c; + while ((c = fgetc(fp)) != '\n') ; + return 0; +} + +static int cParseIntFormat(char *buf, int *num, int *size) +{ + char *tmp; + + tmp = buf; + while (*tmp++ != '(') ; + sscanf(tmp, "%d", num); + while (*tmp != 'I' && *tmp != 'i') ++tmp; + ++tmp; + sscanf(tmp, "%d", size); + return 0; +} + +static int cParseFloatFormat(char *buf, int *num, int *size) +{ + char *tmp, *period; + + tmp = buf; + while (*tmp++ != '(') ; + *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/ + while (*tmp != 'E' && *tmp != 'e' && *tmp != 'D' && *tmp != 'd' + && *tmp != 'F' && *tmp != 'f') { + /* May find kP before nE/nD/nF, like (1P6F13.6). In this case the + num picked up refers to P, which should be skipped. */ + if (*tmp=='p' || *tmp=='P') { + ++tmp; + *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/ + } else { + ++tmp; + } + } + ++tmp; + period = tmp; + while (*period != '.' && *period != ')') ++period ; + *period = '\0'; + *size = atoi(tmp); /*sscanf(tmp, "%2d", size);*/ + + return 0; +} + +static int ReadVector(FILE *fp, int n, int *where, int perline, int persize) +{ + register int i, j, item; + char tmp, buf[100]; + + i = 0; + while (i < n) { + fgets(buf, 100, fp); /* read a line at a time */ + for (j=0; j + * -- SuperLU routine (version 4.0) -- + * Lawrence Berkeley National Laboratory. + * June 30, 2009 + * + */ + +#include "slu_cdefs.h" + + +void +creadtriple(int *m, int *n, int *nonz, + complex **nzval, int **rowind, int **colptr) +{ +/* + * Output parameters + * ================= + * (a,asub,xa): asub[*] contains the row subscripts of nonzeros + * in columns of matrix A; a[*] the numerical values; + * row i of A is given by a[k],k=xa[i],...,xa[i+1]-1. + * + */ + int j, k, jsize, nnz, nz; + complex *a, *val; + int *asub, *xa, *row, *col; + int zero_base = 0; + + /* Matrix format: + * First line: #rows, #cols, #non-zero + * Triplet in the rest of lines: + * row, col, value + */ + + scanf("%d%d", n, nonz); + *m = *n; + printf("m %d, n %d, nonz %d\n", *m, *n, *nonz); + callocateA(*n, *nonz, nzval, rowind, colptr); /* Allocate storage */ + a = *nzval; + asub = *rowind; + xa = *colptr; + + val = (complex *) SUPERLU_MALLOC(*nonz * sizeof(complex)); + row = (int *) SUPERLU_MALLOC(*nonz * sizeof(int)); + col = (int *) SUPERLU_MALLOC(*nonz * sizeof(int)); + + for (j = 0; j < *n; ++j) xa[j] = 0; + + /* Read into the triplet array from a file */ + for (nnz = 0, nz = 0; nnz < *nonz; ++nnz) { + scanf("%d%d%f%f\n", &row[nz], &col[nz], &val[nz].r, &val[nz].i); + + if ( nnz == 0 ) { /* first nonzero */ + if ( row[0] == 0 || col[0] == 0 ) { + zero_base = 1; + printf("triplet file: row/col indices are zero-based.\n"); + } else + printf("triplet file: row/col indices are one-based.\n"); + } + + if ( !zero_base ) { + /* Change to 0-based indexing. */ + --row[nz]; + --col[nz]; + } + + if (row[nz] < 0 || row[nz] >= *m || col[nz] < 0 || col[nz] >= *n + /*|| val[nz] == 0.*/) { + fprintf(stderr, "nz %d, (%d, %d) = (%e,%e) out of bound, removed\n", + nz, row[nz], col[nz], val[nz].r, val[nz].i); + exit(-1); + } else { + ++xa[col[nz]]; + ++nz; + } + } + + *nonz = nz; + + /* Initialize the array of column pointers */ + k = 0; + jsize = xa[0]; + xa[0] = 0; + for (j = 1; j < *n; ++j) { + k += jsize; + jsize = xa[j]; + xa[j] = k; + } + + /* Copy the triplets into the column oriented storage */ + for (nz = 0; nz < *nonz; ++nz) { + j = col[nz]; + k = xa[j]; + asub[k] = row[nz]; + a[k] = val[nz]; + ++xa[j]; + } + + /* Reset the column pointers to the beginning of each column */ + for (j = *n; j > 0; --j) + xa[j] = xa[j-1]; + xa[0] = 0; + + SUPERLU_FREE(val); + SUPERLU_FREE(row); + SUPERLU_FREE(col); + +#ifdef CHK_INPUT + { + int i; + for (i = 0; i < *n; i++) { + printf("Col %d, xa %d\n", i, xa[i]); + for (k = xa[i]; k < xa[i+1]; k++) + printf("%d\t%16.10f\n", asub[k], a[k]); + } + } +#endif + +} + + +void creadrhs(int m, complex *b) +{ + FILE *fp, *fopen(); + int i; + /*int j;*/ + + if ( !(fp = fopen("b.dat", "r")) ) { + fprintf(stderr, "dreadrhs: file does not exist\n"); + exit(-1); + } + for (i = 0; i < m; ++i) + fscanf(fp, "%f%f\n", &b[i].r, &b[i].i); + + /* readpair_(j, &b[i]);*/ + fclose(fp); +} Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csnode_bmod.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csnode_bmod.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csnode_bmod.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,29 +1,31 @@ -/* +/*! @file csnode_bmod.c + * \brief Performs numeric block updates within the relaxed snode. + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "csp_defs.h" +#include "slu_cdefs.h" -/* - * Performs numeric block updates within the relaxed snode. + +/*! \brief Performs numeric block updates within the relaxed snode. */ int csnode_bmod ( Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csnode_dfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csnode_dfs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csnode_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,28 +1,46 @@ - -/* +/*! @file csnode_dfs.c + * \brief Determines the union of row structures of columns within the relaxed node + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "csp_defs.h" -#include "util.h" +#include "slu_cdefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *    csnode_dfs() - Determine the union of the row structures of those 
+ *    columns within the relaxed snode.
+ *    Note: The relaxed snodes are leaves of the supernodal etree, therefore, 
+ *    the portion outside the rectangular supernode must be zero.
+ *
+ * Return value
+ * ============
+ *     0   success;
+ *    >0   number of bytes allocated when run out of memory.
+ * 
+ */ + int csnode_dfs ( const int jcol, /* in - start of the supernode */ @@ -35,19 +53,7 @@ GlobalLU_t *Glu /* modified */ ) { -/* Purpose - * ======= - * csnode_dfs() - Determine the union of the row structures of those - * columns within the relaxed snode. - * Note: The relaxed snodes are leaves of the supernodal etree, therefore, - * the portion outside the rectangular supernode must be zero. - * - * Return value - * ============ - * 0 success; - * >0 number of bytes allocated when run out of memory. - * - */ + register int i, k, ifrom, ito, nextl, new_next; int nsuper, krow, kmark, mem_error; int *xsup, *supno; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csp_blas2.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csp_blas2.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csp_blas2.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,17 +1,20 @@ -/* +/*! @file csp_blas2.c + * \brief Sparse BLAS 2, using some dense BLAS 2 operations + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
- *
+ * 
*/ /* * File name: csp_blas2.c * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations. */ -#include "csp_defs.h" +#include "slu_cdefs.h" /* * Function prototypes @@ -20,12 +23,9 @@ void clsolve(int, int, complex*, complex*); void cmatvec(int, int, int, complex*, complex*, complex*); - -int -sp_ctrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, - SuperMatrix *U, complex *x, SuperLUStat_t *stat, int *info) -{ -/* +/*! \brief Solves one of the systems of equations A*x = b, or A'*x = b + * + *
  *   Purpose
  *   =======
  *
@@ -49,8 +49,8 @@
  *             On entry, trans specifies the equations to be solved as   
  *             follows:   
  *                trans = 'N' or 'n'   A*x = b.   
- *                trans = 'T' or 't'   A'*x = b.   
- *                trans = 'C' or 'c'   A**H*x = b.   
+ *                trans = 'T' or 't'   A'*x = b.
+ *                trans = 'C' or 'c'   A^H*x = b.   
  *
  *   diag   - (input) char*
  *             On entry, diag specifies whether or not A is unit   
@@ -75,8 +75,12 @@
  *
  *   info    - (output) int*
  *             If *info = -i, the i-th argument had an illegal value.
- *
+ * 
*/ +int +sp_ctrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, + SuperMatrix *U, complex *x, SuperLUStat_t *stat, int *info) +{ #ifdef _CRAY _fcd ftcs1 = _cptofcd("L", strlen("L")), ftcs2 = _cptofcd("N", strlen("N")), @@ -98,7 +102,8 @@ /* Test the input parameters */ *info = 0; if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1; - else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans, "C")) *info = -2; + else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && + !lsame_(trans, "C")) *info = -2; else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3; else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4; else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5; @@ -131,7 +136,8 @@ luptr = L_NZ_START(fsupc); nrow = nsupr - nsupc; - solve_ops += 4 * nsupc * (nsupc - 1); + /* 1 c_div costs 10 flops */ + solve_ops += 4 * nsupc * (nsupc - 1) + 10 * nsupc; solve_ops += 8 * nrow * nsupc; if ( nsupc == 1 ) { @@ -184,7 +190,8 @@ nsupc = L_FST_SUPC(k+1) - fsupc; luptr = L_NZ_START(fsupc); - solve_ops += 4 * nsupc * (nsupc + 1); + /* 1 c_div costs 10 flops */ + solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc; if ( nsupc == 1 ) { c_div(&x[fsupc], &x[fsupc], &Lval[luptr]); @@ -219,7 +226,7 @@ } /* for k ... */ } - } else if (lsame_(trans, "T")) { /* Form x := inv(A')*x */ + } else if ( lsame_(trans, "T") ) { /* Form x := inv(A')*x */ if ( lsame_(uplo, "L") ) { /* Form x := inv(L')*x */ @@ -249,13 +256,13 @@ solve_ops += 4 * nsupc * (nsupc - 1); #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); - ftcs2 = _cptofcd(trans, strlen("T")); + ftcs2 = _cptofcd("T", strlen("T")); ftcs3 = _cptofcd("U", strlen("U")); CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #else - ctrsv_("L", trans, "U", &nsupc, &Lval[luptr], &nsupr, - &x[fsupc], &incx); + ctrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr, + &x[fsupc], &incx); #endif } } @@ -278,20 +285,21 @@ } } - solve_ops += 4 * nsupc * (nsupc + 1); + /* 1 c_div costs 10 flops */ + solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc; if ( nsupc == 1 ) { c_div(&x[fsupc], &x[fsupc], &Lval[luptr]); } else { #ifdef _CRAY ftcs1 = _cptofcd("U", strlen("U")); - ftcs2 = _cptofcd(trans, strlen("T")); + ftcs2 = _cptofcd("T", strlen("T")); ftcs3 = _cptofcd("N", strlen("N")); CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #else - ctrsv_("U", trans, "N", &nsupc, &Lval[luptr], &nsupr, - &x[fsupc], &incx); + ctrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr, + &x[fsupc], &incx); #endif } } /* for k ... */ @@ -321,9 +329,9 @@ c_sub(&x[jcol], &x[jcol], &comp_zero); iptr++; } - } - - if ( nsupc > 1 ) { + } + + if ( nsupc > 1 ) { solve_ops += 4 * nsupc * (nsupc - 1); #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); @@ -357,8 +365,9 @@ } } - solve_ops += 4 * nsupc * (nsupc + 1); - + /* 1 c_div costs 10 flops */ + solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc; + if ( nsupc == 1 ) { cc_conj(&temp, &Lval[luptr]); c_div(&x[fsupc], &x[fsupc], &temp); @@ -373,12 +382,11 @@ ctrsv_("U", trans, "N", &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #endif - } - } /* for k ... */ - } + } + } /* for k ... */ + } } - stat->ops[SOLVE] += solve_ops; SUPERLU_FREE(work); return 0; @@ -386,65 +394,69 @@ +/*! \brief Performs one of the matrix-vector operations y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y + * + *
  
+ *   Purpose   
+ *   =======   
+ *
+ *   sp_cgemv()  performs one of the matrix-vector operations   
+ *      y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
+ *   where alpha and beta are scalars, x and y are vectors and A is a
+ *   sparse A->nrow by A->ncol matrix.   
+ *
+ *   Parameters   
+ *   ==========   
+ *
+ *   TRANS  - (input) char*
+ *            On entry, TRANS specifies the operation to be performed as   
+ *            follows:   
+ *               TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
+ *               TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
+ *               TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
+ *
+ *   ALPHA  - (input) complex
+ *            On entry, ALPHA specifies the scalar alpha.   
+ *
+ *   A      - (input) SuperMatrix*
+ *            Before entry, the leading m by n part of the array A must   
+ *            contain the matrix of coefficients.   
+ *
+ *   X      - (input) complex*, array of DIMENSION at least   
+ *            ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
+ *           and at least   
+ *            ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
+ *            Before entry, the incremented array X must contain the   
+ *            vector x.   
+ * 
+ *   INCX   - (input) int
+ *            On entry, INCX specifies the increment for the elements of   
+ *            X. INCX must not be zero.   
+ *
+ *   BETA   - (input) complex
+ *            On entry, BETA specifies the scalar beta. When BETA is   
+ *            supplied as zero then Y need not be set on input.   
+ *
+ *   Y      - (output) complex*,  array of DIMENSION at least   
+ *            ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
+ *            and at least   
+ *            ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
+ *            Before entry with BETA non-zero, the incremented array Y   
+ *            must contain the vector y. On exit, Y is overwritten by the 
+ *            updated vector y.
+ *	      
+ *   INCY   - (input) int
+ *            On entry, INCY specifies the increment for the elements of   
+ *            Y. INCY must not be zero.   
+ *
+ *    ==== Sparse Level 2 Blas routine.   
+ * 
+*/ int sp_cgemv(char *trans, complex alpha, SuperMatrix *A, complex *x, int incx, complex beta, complex *y, int incy) { -/* Purpose - ======= - sp_cgemv() performs one of the matrix-vector operations - y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, - where alpha and beta are scalars, x and y are vectors and A is a - sparse A->nrow by A->ncol matrix. - - Parameters - ========== - - TRANS - (input) char* - On entry, TRANS specifies the operation to be performed as - follows: - TRANS = 'N' or 'n' y := alpha*A*x + beta*y. - TRANS = 'T' or 't' y := alpha*A'*x + beta*y. - TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. - - ALPHA - (input) complex - On entry, ALPHA specifies the scalar alpha. - - A - (input) SuperMatrix* - Before entry, the leading m by n part of the array A must - contain the matrix of coefficients. - - X - (input) complex*, array of DIMENSION at least - ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. - Before entry, the incremented array X must contain the - vector x. - - INCX - (input) int - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - - BETA - (input) complex - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then Y need not be set on input. - - Y - (output) complex*, array of DIMENSION at least - ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. - Before entry with BETA non-zero, the incremented array Y - must contain the vector y. On exit, Y is overwritten by the - updated vector y. - - INCY - (input) int - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - - ==== Sparse Level 2 Blas routine. -*/ - /* Local variables */ NCformat *Astore; complex *Aval; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csp_blas3.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csp_blas3.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csp_blas3.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,116 +1,122 @@ - -/* +/*! @file csp_blas3.c + * \brief Sparse BLAS3, using some dense BLAS3 operations + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
- *
+ * 
*/ /* * File name: sp_blas3.c * Purpose: Sparse BLAS3, using some dense BLAS3 operations. */ -#include "csp_defs.h" -#include "util.h" +#include "slu_cdefs.h" +/*! \brief + * + *
+ * Purpose   
+ *   =======   
+ * 
+ *   sp_c performs one of the matrix-matrix operations   
+ * 
+ *      C := alpha*op( A )*op( B ) + beta*C,   
+ * 
+ *   where  op( X ) is one of 
+ * 
+ *      op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
+ * 
+ *   alpha and beta are scalars, and A, B and C are matrices, with op( A ) 
+ *   an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. 
+ *   
+ * 
+ *   Parameters   
+ *   ==========   
+ * 
+ *   TRANSA - (input) char*
+ *            On entry, TRANSA specifies the form of op( A ) to be used in 
+ *            the matrix multiplication as follows:   
+ *               TRANSA = 'N' or 'n',  op( A ) = A.   
+ *               TRANSA = 'T' or 't',  op( A ) = A'.   
+ *               TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).   
+ *            Unchanged on exit.   
+ * 
+ *   TRANSB - (input) char*
+ *            On entry, TRANSB specifies the form of op( B ) to be used in 
+ *            the matrix multiplication as follows:   
+ *               TRANSB = 'N' or 'n',  op( B ) = B.   
+ *               TRANSB = 'T' or 't',  op( B ) = B'.   
+ *               TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).   
+ *            Unchanged on exit.   
+ * 
+ *   M      - (input) int   
+ *            On entry,  M  specifies  the number of rows of the matrix 
+ *	     op( A ) and of the matrix C.  M must be at least zero. 
+ *	     Unchanged on exit.   
+ * 
+ *   N      - (input) int
+ *            On entry,  N specifies the number of columns of the matrix 
+ *	     op( B ) and the number of columns of the matrix C. N must be 
+ *	     at least zero.
+ *	     Unchanged on exit.   
+ * 
+ *   K      - (input) int
+ *            On entry, K specifies the number of columns of the matrix 
+ *	     op( A ) and the number of rows of the matrix op( B ). K must 
+ *	     be at least  zero.   
+ *           Unchanged on exit.
+ *      
+ *   ALPHA  - (input) complex
+ *            On entry, ALPHA specifies the scalar alpha.   
+ * 
+ *   A      - (input) SuperMatrix*
+ *            Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
+ *            Currently, the type of A can be:
+ *                Stype = NC or NCP; Dtype = SLU_C; Mtype = GE. 
+ *            In the future, more general A can be handled.
+ * 
+ *   B      - COMPLEX PRECISION array of DIMENSION ( LDB, kb ), where kb is 
+ *            n when TRANSB = 'N' or 'n',  and is  k otherwise.   
+ *            Before entry with  TRANSB = 'N' or 'n',  the leading k by n 
+ *            part of the array B must contain the matrix B, otherwise 
+ *            the leading n by k part of the array B must contain the 
+ *            matrix B.   
+ *            Unchanged on exit.   
+ * 
+ *   LDB    - (input) int
+ *            On entry, LDB specifies the first dimension of B as declared 
+ *            in the calling (sub) program. LDB must be at least max( 1, n ).  
+ *            Unchanged on exit.   
+ * 
+ *   BETA   - (input) complex
+ *            On entry, BETA specifies the scalar beta. When BETA is   
+ *            supplied as zero then C need not be set on input.   
+ *  
+ *   C      - COMPLEX PRECISION array of DIMENSION ( LDC, n ).   
+ *            Before entry, the leading m by n part of the array C must 
+ *            contain the matrix C,  except when beta is zero, in which 
+ *            case C need not be set on entry.   
+ *            On exit, the array C is overwritten by the m by n matrix 
+ *	     ( alpha*op( A )*B + beta*C ).   
+ *  
+ *   LDC    - (input) int
+ *            On entry, LDC specifies the first dimension of C as declared 
+ *            in the calling (sub)program. LDC must be at least max(1,m).   
+ *            Unchanged on exit.   
+ *  
+ *   ==== Sparse Level 3 Blas routine.   
+ * 
+ */ + int sp_cgemm(char *transa, char *transb, int m, int n, int k, complex alpha, SuperMatrix *A, complex *b, int ldb, complex beta, complex *c, int ldc) { -/* Purpose - ======= - - sp_c performs one of the matrix-matrix operations - - C := alpha*op( A )*op( B ) + beta*C, - - where op( X ) is one of - - op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), - - alpha and beta are scalars, and A, B and C are matrices, with op( A ) - an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. - - - Parameters - ========== - - TRANSA - (input) char* - On entry, TRANSA specifies the form of op( A ) to be used in - the matrix multiplication as follows: - TRANSA = 'N' or 'n', op( A ) = A. - TRANSA = 'T' or 't', op( A ) = A'. - TRANSA = 'C' or 'c', op( A ) = conjg( A' ). - Unchanged on exit. - - TRANSB - (input) char* - On entry, TRANSB specifies the form of op( B ) to be used in - the matrix multiplication as follows: - TRANSB = 'N' or 'n', op( B ) = B. - TRANSB = 'T' or 't', op( B ) = B'. - TRANSB = 'C' or 'c', op( B ) = conjg( B' ). - Unchanged on exit. - - M - (input) int - On entry, M specifies the number of rows of the matrix - op( A ) and of the matrix C. M must be at least zero. - Unchanged on exit. - - N - (input) int - On entry, N specifies the number of columns of the matrix - op( B ) and the number of columns of the matrix C. N must be - at least zero. - Unchanged on exit. - - K - (input) int - On entry, K specifies the number of columns of the matrix - op( A ) and the number of rows of the matrix op( B ). K must - be at least zero. - Unchanged on exit. - - ALPHA - (input) complex - On entry, ALPHA specifies the scalar alpha. - - A - (input) SuperMatrix* - Matrix A with a sparse format, of dimension (A->nrow, A->ncol). - Currently, the type of A can be: - Stype = NC or NCP; Dtype = SLU_C; Mtype = GE. - In the future, more general A can be handled. - - B - COMPLEX PRECISION array of DIMENSION ( LDB, kb ), where kb is - n when TRANSB = 'N' or 'n', and is k otherwise. - Before entry with TRANSB = 'N' or 'n', the leading k by n - part of the array B must contain the matrix B, otherwise - the leading n by k part of the array B must contain the - matrix B. - Unchanged on exit. - - LDB - (input) int - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. LDB must be at least max( 1, n ). - Unchanged on exit. - - BETA - (input) complex - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then C need not be set on input. - - C - COMPLEX PRECISION array of DIMENSION ( LDC, n ). - Before entry, the leading m by n part of the array C must - contain the matrix C, except when beta is zero, in which - case C need not be set on entry. - On exit, the array C is overwritten by the m by n matrix - ( alpha*op( A )*B + beta*C ). - - LDC - (input) int - On entry, LDC specifies the first dimension of C as declared - in the calling (sub)program. LDC must be at least max(1,m). - Unchanged on exit. - - ==== Sparse Level 3 Blas routine. -*/ int incx = 1, incy = 1; int j; Deleted: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csp_defs.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csp_defs.h 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csp_defs.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,237 +0,0 @@ - -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ -#ifndef __SUPERLU_cSP_DEFS /* allow multiple inclusions */ -#define __SUPERLU_cSP_DEFS - -/* - * File name: csp_defs.h - * Purpose: Sparse matrix types and function prototypes - * History: - */ - -#ifdef _CRAY -#include -#include -#endif - -/* Define my integer type int_t */ -typedef int int_t; /* default */ - -#include "Cnames.h" -#include "supermatrix.h" -#include "util.h" -#include "scomplex.h" - - -/* - * Global data structures used in LU factorization - - * - * nsuper: #supernodes = nsuper + 1, numbered [0, nsuper]. - * (xsup,supno): supno[i] is the supernode no to which i belongs; - * xsup(s) points to the beginning of the s-th supernode. - * e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12) - * xsup 0 1 2 4 7 12 - * Note: dfs will be performed on supernode rep. relative to the new - * row pivoting ordering - * - * (xlsub,lsub): lsub[*] contains the compressed subscript of - * rectangular supernodes; xlsub[j] points to the starting - * location of the j-th column in lsub[*]. Note that xlsub - * is indexed by column. - * Storage: original row subscripts - * - * During the course of sparse LU factorization, we also use - * (xlsub,lsub) for the purpose of symmetric pruning. For each - * supernode {s,s+1,...,t=s+r} with first column s and last - * column t, the subscript set - * lsub[j], j=xlsub[s], .., xlsub[s+1]-1 - * is the structure of column s (i.e. structure of this supernode). - * It is used for the storage of numerical values. - * Furthermore, - * lsub[j], j=xlsub[t], .., xlsub[t+1]-1 - * is the structure of the last column t of this supernode. - * It is for the purpose of symmetric pruning. Therefore, the - * structural subscripts can be rearranged without making physical - * interchanges among the numerical values. - * - * However, if the supernode has only one column, then we - * only keep one set of subscripts. For any subscript interchange - * performed, similar interchange must be done on the numerical - * values. - * - * The last column structures (for pruning) will be removed - * after the numercial LU factorization phase. - * - * (xlusup,lusup): lusup[*] contains the numerical values of the - * rectangular supernodes; xlusup[j] points to the starting - * location of the j-th column in storage vector lusup[*] - * Note: xlusup is indexed by column. - * Each rectangular supernode is stored by column-major - * scheme, consistent with Fortran 2-dim array storage. - * - * (xusub,ucol,usub): ucol[*] stores the numerical values of - * U-columns outside the rectangular supernodes. The row - * subscript of nonzero ucol[k] is stored in usub[k]. - * xusub[i] points to the starting location of column i in ucol. - * Storage: new row subscripts; that is subscripts of PA. - */ -typedef struct { - int *xsup; /* supernode and column mapping */ - int *supno; - int *lsub; /* compressed L subscripts */ - int *xlsub; - complex *lusup; /* L supernodes */ - int *xlusup; - complex *ucol; /* U columns */ - int *usub; - int *xusub; - int nzlmax; /* current max size of lsub */ - int nzumax; /* " " " ucol */ - int nzlumax; /* " " " lusup */ - int n; /* number of columns in the matrix */ - LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */ -} GlobalLU_t; - -typedef struct { - float for_lu; - float total_needed; - int expansions; -} mem_usage_t; - -#ifdef __cplusplus -extern "C" { -#endif - -/* Driver routines */ -extern void -cgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *, - SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *); -extern void -cgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *, - char *, float *, float *, SuperMatrix *, SuperMatrix *, - void *, int, SuperMatrix *, SuperMatrix *, - float *, float *, float *, float *, - mem_usage_t *, SuperLUStat_t *, int *); - -/* Supernodal LU factor related */ -extern void -cCreate_CompCol_Matrix(SuperMatrix *, int, int, int, complex *, - int *, int *, Stype_t, Dtype_t, Mtype_t); -extern void -cCreate_CompRow_Matrix(SuperMatrix *, int, int, int, complex *, - int *, int *, Stype_t, Dtype_t, Mtype_t); -extern void -cCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *); -extern void -cCreate_Dense_Matrix(SuperMatrix *, int, int, complex *, int, - Stype_t, Dtype_t, Mtype_t); -extern void -cCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, complex *, - int *, int *, int *, int *, int *, - Stype_t, Dtype_t, Mtype_t); -extern void -cCopy_Dense_Matrix(int, int, complex *, int, complex *, int); - -extern void countnz (const int, int *, int *, int *, GlobalLU_t *); -extern void fixupL (const int, const int *, GlobalLU_t *); - -extern void callocateA (int, int, complex **, int **, int **); -extern void cgstrf (superlu_options_t*, SuperMatrix*, float, - int, int, int*, void *, int, int *, int *, - SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *); -extern int csnode_dfs (const int, const int, const int *, const int *, - const int *, int *, int *, GlobalLU_t *); -extern int csnode_bmod (const int, const int, const int, complex *, - complex *, GlobalLU_t *, SuperLUStat_t*); -extern void cpanel_dfs (const int, const int, const int, SuperMatrix *, - int *, int *, complex *, int *, int *, int *, - int *, int *, int *, int *, GlobalLU_t *); -extern void cpanel_bmod (const int, const int, const int, const int, - complex *, complex *, int *, int *, - GlobalLU_t *, SuperLUStat_t*); -extern int ccolumn_dfs (const int, const int, int *, int *, int *, int *, - int *, int *, int *, int *, int *, GlobalLU_t *); -extern int ccolumn_bmod (const int, const int, complex *, - complex *, int *, int *, int, - GlobalLU_t *, SuperLUStat_t*); -extern int ccopy_to_ucol (int, int, int *, int *, int *, - complex *, GlobalLU_t *); -extern int cpivotL (const int, const float, int *, int *, - int *, int *, int *, GlobalLU_t *, SuperLUStat_t*); -extern void cpruneL (const int, const int *, const int, const int, - const int *, const int *, int *, GlobalLU_t *); -extern void creadmt (int *, int *, int *, complex **, int **, int **); -extern void cGenXtrue (int, int, complex *, int); -extern void cFillRHS (trans_t, int, complex *, int, SuperMatrix *, - SuperMatrix *); -extern void cgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *, - SuperMatrix *, SuperLUStat_t*, int *); - - -/* Driver related */ - -extern void cgsequ (SuperMatrix *, float *, float *, float *, - float *, float *, int *); -extern void claqgs (SuperMatrix *, float *, float *, float, - float, float, char *); -extern void cgscon (char *, SuperMatrix *, SuperMatrix *, - float, float *, SuperLUStat_t*, int *); -extern float cPivotGrowth(int, SuperMatrix *, int *, - SuperMatrix *, SuperMatrix *); -extern void cgsrfs (trans_t, SuperMatrix *, SuperMatrix *, - SuperMatrix *, int *, int *, char *, float *, - float *, SuperMatrix *, SuperMatrix *, - float *, float *, SuperLUStat_t*, int *); - -extern int sp_ctrsv (char *, char *, char *, SuperMatrix *, - SuperMatrix *, complex *, SuperLUStat_t*, int *); -extern int sp_cgemv (char *, complex, SuperMatrix *, complex *, - int, complex, complex *, int); - -extern int sp_cgemm (char *, char *, int, int, int, complex, - SuperMatrix *, complex *, int, complex, - complex *, int); - -/* Memory-related */ -extern int cLUMemInit (fact_t, void *, int, int, int, int, int, - SuperMatrix *, SuperMatrix *, - GlobalLU_t *, int **, complex **); -extern void cSetRWork (int, int, complex *, complex **, complex **); -extern void cLUWorkFree (int *, complex *, GlobalLU_t *); -extern int cLUMemXpand (int, int, MemType, int *, GlobalLU_t *); - -extern complex *complexMalloc(int); -extern complex *complexCalloc(int); -extern float *floatMalloc(int); -extern float *floatCalloc(int); -extern int cmemory_usage(const int, const int, const int, const int); -extern int cQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *); - -/* Auxiliary routines */ -extern void creadhb(int *, int *, int *, complex **, int **, int **); -extern void cCompRow_to_CompCol(int, int, int, complex*, int*, int*, - complex **, int **, int **); -extern void cfill (complex *, int, complex); -extern void cinf_norm_error (int, SuperMatrix *, complex *); -extern void PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *, - complex, complex, complex *, complex *, char *); - -/* Routines for debugging */ -extern void cPrint_CompCol_Matrix(char *, SuperMatrix *); -extern void cPrint_SuperNode_Matrix(char *, SuperMatrix *); -extern void cPrint_Dense_Matrix(char *, SuperMatrix *); -extern void print_lu_col(char *, int, int, int *, GlobalLU_t *); -extern void check_tempv(int, complex *); - -#ifdef __cplusplus - } -#endif - -#endif /* __SUPERLU_cSP_DEFS */ - Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cutil.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cutil.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cutil.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,26 +1,29 @@ -/* - * -- SuperLU routine (version 3.0) -- +/*! @file cutil.c + * \brief Matrix utility functions + * + *
+ * -- SuperLU routine (version 3.1) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
- * October 15, 2003
+ * August 1, 2008
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ + #include -#include "csp_defs.h" +#include "slu_cdefs.h" void cCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, @@ -64,7 +67,7 @@ Astore->rowptr = rowptr; } -/* Copy matrix A into matrix B. */ +/*! \brief Copy matrix A into matrix B. */ void cCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B) { @@ -108,12 +111,7 @@ cCopy_Dense_Matrix(int M, int N, complex *X, int ldx, complex *Y, int ldy) { -/* - * - * Purpose - * ======= - * - * Copies a two-dimensional matrix X to another matrix Y. +/*! \brief Copies a two-dimensional matrix X to another matrix Y. */ int i, j; @@ -150,8 +148,7 @@ } -/* - * Convert a row compressed storage into a column compressed storage. +/*! \brief Convert a row compressed storage into a column compressed storage. */ void cCompRow_to_CompCol(int m, int n, int nnz, @@ -240,7 +237,8 @@ for (j = c; j < c + nsup; ++j) { d = Astore->nzval_colptr[j]; for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) { - printf("%d\t%d\t%e\t%e\n", rowind[i], j, dp[d++], dp[d++]); + printf("%d\t%d\t%e\t%e\n", rowind[i], j, dp[d], dp[d+1]); + d += 2; } } } @@ -266,23 +264,24 @@ void cPrint_Dense_Matrix(char *what, SuperMatrix *A) { - DNformat *Astore; - register int i; + DNformat *Astore = (DNformat *) A->Store; + register int i, j, lda = Astore->lda; float *dp; printf("\nDense matrix %s:\n", what); printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); - Astore = (DNformat *) A->Store; dp = (float *) Astore->nzval; - printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda); + printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda); printf("\nnzval: "); - for (i = 0; i < 2*A->nrow; ++i) printf("%f ", dp[i]); + for (j = 0; j < A->ncol; ++j) { + for (i = 0; i < 2*A->nrow; ++i) printf("%f ", dp[i + j*2*lda]); + printf("\n"); + } printf("\n"); fflush(stdout); } -/* - * Diagnostic print of column "jcol" in the U/L factor. +/*! \brief Diagnostic print of column "jcol" in the U/L factor. */ void cprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu) @@ -324,9 +323,7 @@ } -/* - * Check whether tempv[] == 0. This should be true before and after - * calling any numeric routines, i.e., "panel_bmod" and "column_bmod". +/*! \brief Check whether tempv[] == 0. This should be true before and after calling any numeric routines, i.e., "panel_bmod" and "column_bmod". */ void ccheck_tempv(int n, complex *tempv) { @@ -353,8 +350,7 @@ } } -/* - * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's +/*! \brief Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's */ void cFillRHS(trans_t trans, int nrhs, complex *x, int ldx, @@ -383,8 +379,7 @@ } -/* - * Fills a complex precision array with a given value. +/*! \brief Fills a complex precision array with a given value. */ void cfill(complex *a, int alen, complex dval) @@ -395,8 +390,7 @@ -/* - * Check the inf-norm of the error vector +/*! \brief Check the inf-norm of the error vector */ void cinf_norm_error(int nrhs, SuperMatrix *X, complex *xtrue) { @@ -414,8 +408,8 @@ err = xnorm = 0.0; for (i = 0; i < X->nrow; i++) { c_sub(&temp, &soln_work[i], &xtrue[i]); - err = SUPERLU_MAX(err, slu_c_abs(&temp)); - xnorm = SUPERLU_MAX(xnorm, slu_c_abs(&soln_work[i])); + err = SUPERLU_MAX(err, c_abs(&temp)); + xnorm = SUPERLU_MAX(xnorm, c_abs(&soln_work[i])); } err = err / xnorm; printf("||X - Xtrue||/||X|| = %e\n", err); @@ -424,7 +418,7 @@ -/* Print performance of the code. */ +/*! \brief Print performance of the code. */ void cPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage, float rpg, float rcond, float *ferr, @@ -452,9 +446,9 @@ printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz); printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz); - printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", - mem_usage->for_lu/1e6, mem_usage->total_needed/1e6, - mem_usage->expansions); + printf("L\\U MB %.3f\ttotal MB needed %.3f\n", + mem_usage->for_lu/1e6, mem_usage->total_needed/1e6); + printf("Number of memory expansions: %d\n", stat->expansions); printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n"); printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n", Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dGetDiagU.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dGetDiagU.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dGetDiagU.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,39 +1,38 @@ -/* +/*! @file dGetDiagU.c + * \brief Extracts main diagonal of matrix + * + *
 
  * -- Auxiliary routine in SuperLU (version 2.0) --
  * Lawrence Berkeley National Lab, Univ. of California Berkeley.
  * Xiaoye S. Li
  * September 11, 2003
  *
- */
+ *  Purpose
+ * =======
+ *
+ * GetDiagU extracts the main diagonal of matrix U of the LU factorization.
+ *  
+ * Arguments
+ * =========
+ *
+ * L      (input) SuperMatrix*
+ *        The factor L from the factorization Pr*A*Pc=L*U as computed by
+ *        dgstrf(). Use compressed row subscripts storage for supernodes,
+ *        i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
+ *
+ * diagU  (output) double*, dimension (n)
+ *        The main diagonal of matrix U.
+ *
+ * Note
+ * ====
+ * The diagonal blocks of the L and U matrices are stored in the L
+ * data structures.
+ * 
+*/ +#include -#include "dsp_defs.h" - - void dGetDiagU(SuperMatrix *L, double *diagU) { - /* - * Purpose - * ======= - * - * GetDiagU extracts the main diagonal of matrix U of the LU factorization. - * - * Arguments - * ========= - * - * L (input) SuperMatrix* - * The factor L from the factorization Pr*A*Pc=L*U as computed by - * dgstrf(). Use compressed row subscripts storage for supernodes, - * i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU. - * - * diagU (output) double*, dimension (n) - * The main diagonal of matrix U. - * - * Note - * ==== - * The diagonal blocks of the L and U matrices are stored in the L - * data structures. - * - */ int_t i, k, nsupers; int_t fsupc, nsupr, nsupc, luptr; double *dblock, *Lval; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcolumn_bmod.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcolumn_bmod.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcolumn_bmod.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,27 +1,29 @@ -/* +/*! @file dcolumn_bmod.c + * \brief performs numeric block updates + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
- */
-/*
-  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ *  Permission is hereby granted to use or copy this program for any
+ *  purpose, provided the above notices are retained on all copies.
+ *  Permission to modify the code and to distribute modified code is
+ *  granted, provided the above notices are retained, and a notice that
+ *  the code was modified is included with the above copyright notice.
+ * 
*/ #include #include -#include "dsp_defs.h" +#include "slu_ddefs.h" /* * Function prototypes @@ -32,8 +34,17 @@ -/* Return value: 0 - successful return +/*! \brief + * + *
+ * Purpose:
+ * ========
+ * Performs numeric block updates (sup-col) in topological order.
+ * It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
+ * Special processing on the supernodal portion of L\U[*,j]
+ * Return value:   0 - successful return
  *               > 0 - number of bytes allocated when run out of space
+ * 
*/ int dcolumn_bmod ( @@ -48,14 +59,7 @@ SuperLUStat_t *stat /* output */ ) { -/* - * Purpose: - * ======== - * Performs numeric block updates (sup-col) in topological order. - * It features: col-col, 2cols-col, 3cols-col, and sup-col updates. - * Special processing on the supernodal portion of L\U[*,j] - * - */ + #ifdef _CRAY _fcd ftcs1 = _cptofcd("L", strlen("L")), ftcs2 = _cptofcd("N", strlen("N")), Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcolumn_dfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcolumn_dfs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcolumn_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,50 +1,38 @@ - -/* +/*! @file dcolumn_dfs.c + * \brief Performs a symbolic factorization + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
- */
-/*
-  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -#include "dsp_defs.h" +#include "slu_ddefs.h" -/* What type of supernodes we want */ +/*! \brief What type of supernodes we want */ #define T2_SUPER -int -dcolumn_dfs( - const int m, /* in - number of rows in the matrix */ - const int jcol, /* in */ - int *perm_r, /* in */ - int *nseg, /* modified - with new segments appended */ - int *lsub_col, /* in - defines the RHS vector to start the dfs */ - int *segrep, /* modified - with new segments appended */ - int *repfnz, /* modified */ - int *xprune, /* modified */ - int *marker, /* modified */ - int *parent, /* working array */ - int *xplore, /* working array */ - GlobalLU_t *Glu /* modified */ - ) -{ -/* + +/*! \brief + * + *
  * Purpose
  * =======
- *   "column_dfs" performs a symbolic factorization on column jcol, and
+ *   DCOLUMN_DFS performs a symbolic factorization on column jcol, and
  *   decide the supernode boundary.
  *
  *   This routine does not use numeric values, but only use the RHS 
@@ -72,8 +60,25 @@
  * ============
  *     0  success;
  *   > 0  number of bytes allocated when run out of space.
- *
+ * 
*/ +int +dcolumn_dfs( + const int m, /* in - number of rows in the matrix */ + const int jcol, /* in */ + int *perm_r, /* in */ + int *nseg, /* modified - with new segments appended */ + int *lsub_col, /* in - defines the RHS vector to start the dfs */ + int *segrep, /* modified - with new segments appended */ + int *repfnz, /* modified */ + int *xprune, /* modified */ + int *marker, /* modified */ + int *parent, /* working array */ + int *xplore, /* working array */ + GlobalLU_t *Glu /* modified */ + ) +{ + int jcolp1, jcolm1, jsuper, nsuper, nextl; int k, krep, krow, kmark, kperm; int *marker2; /* Used for small panel LU */ Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcomplex.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcomplex.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcomplex.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,20 +1,24 @@ -/* +/*! @file dcomplex.c + * \brief Common arithmetic for complex type + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
- */
-/*
  * This file defines common arithmetic operations for complex type.
+ * 
*/ + #include +#include #include -#include "dcomplex.h" +#include "slu_dcomplex.h" -/* Complex Division c = a/b */ +/*! \brief Complex Division c = a/b */ void z_div(doublecomplex *c, doublecomplex *a, doublecomplex *b) { double ratio, den; @@ -26,8 +30,8 @@ abi = - abi; if( abr <= abi ) { if (abi == 0) { - fprintf(stderr, "z_div.c: division by zero"); - exit (-1); + fprintf(stderr, "z_div.c: division by zero\n"); + exit(-1); } ratio = b->r / b->i ; den = b->i * (1 + ratio*ratio); @@ -43,7 +47,8 @@ c->i = ci; } -/* Returns sqrt(z.r^2 + z.i^2) */ + +/*! \brief Returns sqrt(z.r^2 + z.i^2) */ double z_abs(doublecomplex *z) { double temp; @@ -65,8 +70,7 @@ } -/* Approximates the abs */ -/* Returns abs(z.r) + abs(z.i) */ +/*! \brief Approximates the abs. Returns abs(z.r) + abs(z.i) */ double z_abs1(doublecomplex *z) { double real = z->r; @@ -78,7 +82,7 @@ return (real + imag); } -/* Return the exponentiation */ +/*! \brief Return the exponentiation */ void z_exp(doublecomplex *r, doublecomplex *z) { double expx; @@ -88,17 +92,56 @@ r->i = expx * sin(z->i); } -/* Return the complex conjugate */ +/*! \brief Return the complex conjugate */ void d_cnjg(doublecomplex *r, doublecomplex *z) { r->r = z->r; r->i = -z->i; } -/* Return the imaginary part */ +/*! \brief Return the imaginary part */ double d_imag(doublecomplex *z) { return (z->i); } +/*! \brief SIGN functions for complex number. Returns z/abs(z) */ +doublecomplex z_sgn(doublecomplex *z) +{ + register double t = z_abs(z); + register doublecomplex retval; + + if (t == 0.0) { + retval.r = 1.0, retval.i = 0.0; + } else { + retval.r = z->r / t, retval.i = z->i / t; + } + + return retval; +} + +/*! \brief Square-root of a complex number. */ +doublecomplex z_sqrt(doublecomplex *z) +{ + doublecomplex retval; + register double cr, ci, real, imag; + + real = z->r; + imag = z->i; + + if ( imag == 0.0 ) { + retval.r = sqrt(real); + retval.i = 0.0; + } else { + ci = (sqrt(real*real + imag*imag) - real) / 2.0; + ci = sqrt(ci); + cr = imag / (2.0 * ci); + retval.r = cr; + retval.i = ci; + } + + return retval; +} + + Deleted: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcomplex.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcomplex.h 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcomplex.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,73 +0,0 @@ - - -/* - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * - */ -#ifndef __SUPERLU_DCOMPLEX /* allow multiple inclusions */ -#define __SUPERLU_DCOMPLEX - -/* - * This header file is to be included in source files z*.c - */ -#ifndef DCOMPLEX_INCLUDE -#define DCOMPLEX_INCLUDE - -typedef struct { double r, i; } doublecomplex; - - -/* Macro definitions */ - -/* Complex Addition c = a + b */ -#define z_add(c, a, b) { (c)->r = (a)->r + (b)->r; \ - (c)->i = (a)->i + (b)->i; } - -/* Complex Subtraction c = a - b */ -#define z_sub(c, a, b) { (c)->r = (a)->r - (b)->r; \ - (c)->i = (a)->i - (b)->i; } - -/* Complex-Double Multiplication */ -#define zd_mult(c, a, b) { (c)->r = (a)->r * (b); \ - (c)->i = (a)->i * (b); } - -/* Complex-Complex Multiplication */ -#define zz_mult(c, a, b) { \ - double cr, ci; \ - cr = (a)->r * (b)->r - (a)->i * (b)->i; \ - ci = (a)->i * (b)->r + (a)->r * (b)->i; \ - (c)->r = cr; \ - (c)->i = ci; \ - } - -#define zz_conj(a, b) { \ - (a)->r = (b)->r; \ - (a)->i = -((b)->i); \ - } - -/* Complex equality testing */ -#define z_eq(a, b) ( (a)->r == (b)->r && (a)->i == (b)->i ) - - -#ifdef __cplusplus -extern "C" { -#endif - -/* Prototypes for functions in dcomplex.c */ -void z_div(doublecomplex *, doublecomplex *, doublecomplex *); -double z_abs(doublecomplex *); /* exact */ -double z_abs1(doublecomplex *); /* approximate */ -void z_exp(doublecomplex *, doublecomplex *); -void d_cnjg(doublecomplex *r, doublecomplex *z); -double d_imag(doublecomplex *); - - -#ifdef __cplusplus - } -#endif - -#endif - -#endif /* __SUPERLU_DCOMPLEX */ Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcopy_to_ucol.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcopy_to_ucol.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcopy_to_ucol.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,27 +1,26 @@ - -/* +/*! @file dcopy_to_ucol.c + * \brief Copy a computed column of U to the compressed data structure + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
  *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "dsp_defs.h" -#include "util.h" +#include "slu_ddefs.h" int dcopy_to_ucol( @@ -47,7 +46,6 @@ double *ucol; int *usub, *xusub; int nzumax; - double zero = 0.0; xsup = Glu->xsup; Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ddiagonal.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ddiagonal.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ddiagonal.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,129 @@ + +/*! @file ddiagonal.c + * \brief Auxiliary routines to work with diagonal elements + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + +#include "slu_ddefs.h" + +int dfill_diag(int n, NCformat *Astore) +/* fill explicit zeros on the diagonal entries, so that the matrix is not + structurally singular. */ +{ + double *nzval = (double *)Astore->nzval; + int *rowind = Astore->rowind; + int *colptr = Astore->colptr; + int nnz = colptr[n]; + int fill = 0; + double *nzval_new; + double zero = 0.0; + int *rowind_new; + int i, j, diag; + + for (i = 0; i < n; i++) + { + diag = -1; + for (j = colptr[i]; j < colptr[i + 1]; j++) + if (rowind[j] == i) diag = j; + if (diag < 0) fill++; + } + if (fill) + { + nzval_new = doubleMalloc(nnz + fill); + rowind_new = intMalloc(nnz + fill); + fill = 0; + for (i = 0; i < n; i++) + { + diag = -1; + for (j = colptr[i] - fill; j < colptr[i + 1]; j++) + { + if ((rowind_new[j + fill] = rowind[j]) == i) diag = j; + nzval_new[j + fill] = nzval[j]; + } + if (diag < 0) + { + rowind_new[colptr[i + 1] + fill] = i; + nzval_new[colptr[i + 1] + fill] = zero; + fill++; + } + colptr[i + 1] += fill; + } + Astore->nzval = nzval_new; + Astore->rowind = rowind_new; + SUPERLU_FREE(nzval); + SUPERLU_FREE(rowind); + } + Astore->nnz += fill; + return fill; +} + +int ddominate(int n, NCformat *Astore) +/* make the matrix diagonally dominant */ +{ + double *nzval = (double *)Astore->nzval; + int *rowind = Astore->rowind; + int *colptr = Astore->colptr; + int nnz = colptr[n]; + int fill = 0; + double *nzval_new; + int *rowind_new; + int i, j, diag; + double s; + + for (i = 0; i < n; i++) + { + diag = -1; + for (j = colptr[i]; j < colptr[i + 1]; j++) + if (rowind[j] == i) diag = j; + if (diag < 0) fill++; + } + if (fill) + { + nzval_new = doubleMalloc(nnz + fill); + rowind_new = intMalloc(nnz+ fill); + fill = 0; + for (i = 0; i < n; i++) + { + s = 1e-6; + diag = -1; + for (j = colptr[i] - fill; j < colptr[i + 1]; j++) + { + if ((rowind_new[j + fill] = rowind[j]) == i) diag = j; + s += fabs(nzval_new[j + fill] = nzval[j]); + } + if (diag >= 0) { + nzval_new[diag+fill] = s * 3.0; + } else { + rowind_new[colptr[i + 1] + fill] = i; + nzval_new[colptr[i + 1] + fill] = s * 3.0; + fill++; + } + colptr[i + 1] += fill; + } + Astore->nzval = nzval_new; + Astore->rowind = rowind_new; + SUPERLU_FREE(nzval); + SUPERLU_FREE(rowind); + } + else + { + for (i = 0; i < n; i++) + { + s = 1e-6; + diag = -1; + for (j = colptr[i]; j < colptr[i + 1]; j++) + { + if (rowind[j] == i) diag = j; + s += fabs(nzval[j]); + } + nzval[diag] = s * 3.0; + } + } + Astore->nnz += fill; + return fill; +} Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgscon.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgscon.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgscon.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,70 +1,81 @@ -/* +/*! @file dgscon.c + * \brief Estimates reciprocal of the condition number of a general matrix + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Modified from lapack routines DGECON.
+ * 
*/ + /* * File name: dgscon.c * History: Modified from lapack routines DGECON. */ #include -#include "dsp_defs.h" +#include "slu_ddefs.h" +/*! \brief + * + *
+ *   Purpose   
+ *   =======   
+ *
+ *   DGSCON estimates the reciprocal of the condition number of a general 
+ *   real matrix A, in either the 1-norm or the infinity-norm, using   
+ *   the LU factorization computed by DGETRF.   *
+ *
+ *   An estimate is obtained for norm(inv(A)), and the reciprocal of the   
+ *   condition number is computed as   
+ *      RCOND = 1 / ( norm(A) * norm(inv(A)) ).   
+ *
+ *   See supermatrix.h for the definition of 'SuperMatrix' structure.
+ * 
+ *   Arguments   
+ *   =========   
+ *
+ *    NORM    (input) char*
+ *            Specifies whether the 1-norm condition number or the   
+ *            infinity-norm condition number is required:   
+ *            = '1' or 'O':  1-norm;   
+ *            = 'I':         Infinity-norm.
+ *	    
+ *    L       (input) SuperMatrix*
+ *            The factor L from the factorization Pr*A*Pc=L*U as computed by
+ *            dgstrf(). Use compressed row subscripts storage for supernodes,
+ *            i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
+ * 
+ *    U       (input) SuperMatrix*
+ *            The factor U from the factorization Pr*A*Pc=L*U as computed by
+ *            dgstrf(). Use column-wise storage scheme, i.e., U has types:
+ *            Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
+ *	    
+ *    ANORM   (input) double
+ *            If NORM = '1' or 'O', the 1-norm of the original matrix A.   
+ *            If NORM = 'I', the infinity-norm of the original matrix A.
+ *	    
+ *    RCOND   (output) double*
+ *           The reciprocal of the condition number of the matrix A,   
+ *           computed as RCOND = 1/(norm(A) * norm(inv(A))).
+ *	    
+ *    INFO    (output) int*
+ *           = 0:  successful exit   
+ *           < 0:  if INFO = -i, the i-th argument had an illegal value   
+ *
+ *    ===================================================================== 
+ * 
+ */ + void dgscon(char *norm, SuperMatrix *L, SuperMatrix *U, double anorm, double *rcond, SuperLUStat_t *stat, int *info) { -/* - Purpose - ======= - DGSCON estimates the reciprocal of the condition number of a general - real matrix A, in either the 1-norm or the infinity-norm, using - the LU factorization computed by DGETRF. - An estimate is obtained for norm(inv(A)), and the reciprocal of the - condition number is computed as - RCOND = 1 / ( norm(A) * norm(inv(A)) ). - - See supermatrix.h for the definition of 'SuperMatrix' structure. - - Arguments - ========= - - NORM (input) char* - Specifies whether the 1-norm condition number or the - infinity-norm condition number is required: - = '1' or 'O': 1-norm; - = 'I': Infinity-norm. - - L (input) SuperMatrix* - The factor L from the factorization Pr*A*Pc=L*U as computed by - dgstrf(). Use compressed row subscripts storage for supernodes, - i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU. - - U (input) SuperMatrix* - The factor U from the factorization Pr*A*Pc=L*U as computed by - dgstrf(). Use column-wise storage scheme, i.e., U has types: - Stype = SLU_NC, Dtype = SLU_D, Mtype = TRU. - - ANORM (input) double - If NORM = '1' or 'O', the 1-norm of the original matrix A. - If NORM = 'I', the infinity-norm of the original matrix A. - - RCOND (output) double* - The reciprocal of the condition number of the matrix A, - computed as RCOND = 1/(norm(A) * norm(inv(A))). - - INFO (output) int* - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== -*/ - /* Local variables */ int kase, kase1, onenrm, i; double ainvnm; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsequ.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsequ.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsequ.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,82 +1,91 @@ - -/* +/*! @file dgsequ.c + * \brief Computes row and column scalings + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Modified from LAPACK routine DGEEQU
+ * 
*/ /* * File name: dgsequ.c * History: Modified from LAPACK routine DGEEQU */ #include -#include "dsp_defs.h" -#include "util.h" +#include "slu_ddefs.h" + + +/*! \brief + * + *
+ * Purpose   
+ *   =======   
+ *
+ *   DGSEQU computes row and column scalings intended to equilibrate an   
+ *   M-by-N sparse matrix A and reduce its condition number. R returns the row
+ *   scale factors and C the column scale factors, chosen to try to make   
+ *   the largest element in each row and column of the matrix B with   
+ *   elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.   
+ *
+ *   R(i) and C(j) are restricted to be between SMLNUM = smallest safe   
+ *   number and BIGNUM = largest safe number.  Use of these scaling   
+ *   factors is not guaranteed to reduce the condition number of A but   
+ *   works well in practice.   
+ *
+ *   See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ *   Arguments   
+ *   =========   
+ *
+ *   A       (input) SuperMatrix*
+ *           The matrix of dimension (A->nrow, A->ncol) whose equilibration
+ *           factors are to be computed. The type of A can be:
+ *           Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE.
+ *	    
+ *   R       (output) double*, size A->nrow
+ *           If INFO = 0 or INFO > M, R contains the row scale factors   
+ *           for A.
+ *	    
+ *   C       (output) double*, size A->ncol
+ *           If INFO = 0,  C contains the column scale factors for A.
+ *	    
+ *   ROWCND  (output) double*
+ *           If INFO = 0 or INFO > M, ROWCND contains the ratio of the   
+ *           smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and   
+ *           AMAX is neither too large nor too small, it is not worth   
+ *           scaling by R.
+ *	    
+ *   COLCND  (output) double*
+ *           If INFO = 0, COLCND contains the ratio of the smallest   
+ *           C(i) to the largest C(i).  If COLCND >= 0.1, it is not   
+ *           worth scaling by C.
+ *	    
+ *   AMAX    (output) double*
+ *           Absolute value of largest matrix element.  If AMAX is very   
+ *           close to overflow or very close to underflow, the matrix   
+ *           should be scaled.
+ *	    
+ *   INFO    (output) int*
+ *           = 0:  successful exit   
+ *           < 0:  if INFO = -i, the i-th argument had an illegal value   
+ *           > 0:  if INFO = i,  and i is   
+ *                 <= A->nrow:  the i-th row of A is exactly zero   
+ *                 >  A->ncol:  the (i-M)-th column of A is exactly zero   
+ *
+ *   ===================================================================== 
+ * 
+ */ void dgsequ(SuperMatrix *A, double *r, double *c, double *rowcnd, double *colcnd, double *amax, int *info) { -/* - Purpose - ======= - DGSEQU computes row and column scalings intended to equilibrate an - M-by-N sparse matrix A and reduce its condition number. R returns the row - scale factors and C the column scale factors, chosen to try to make - the largest element in each row and column of the matrix B with - elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. - R(i) and C(j) are restricted to be between SMLNUM = smallest safe - number and BIGNUM = largest safe number. Use of these scaling - factors is not guaranteed to reduce the condition number of A but - works well in practice. - - See supermatrix.h for the definition of 'SuperMatrix' structure. - - Arguments - ========= - - A (input) SuperMatrix* - The matrix of dimension (A->nrow, A->ncol) whose equilibration - factors are to be computed. The type of A can be: - Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE. - - R (output) double*, size A->nrow - If INFO = 0 or INFO > M, R contains the row scale factors - for A. - - C (output) double*, size A->ncol - If INFO = 0, C contains the column scale factors for A. - - ROWCND (output) double* - If INFO = 0 or INFO > M, ROWCND contains the ratio of the - smallest R(i) to the largest R(i). If ROWCND >= 0.1 and - AMAX is neither too large nor too small, it is not worth - scaling by R. - - COLCND (output) double* - If INFO = 0, COLCND contains the ratio of the smallest - C(i) to the largest C(i). If COLCND >= 0.1, it is not - worth scaling by C. - - AMAX (output) double* - Absolute value of largest matrix element. If AMAX is very - close to overflow or very close to underflow, the matrix - should be scaled. - - INFO (output) int* - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, and i is - <= A->nrow: the i-th row of A is exactly zero - > A->ncol: the (i-M)-th column of A is exactly zero - - ===================================================================== -*/ - /* Local variables */ NCformat *Astore; double *Aval; Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsisx.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsisx.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsisx.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,693 @@ + +/*! @file dgsisx.c + * \brief Gives the approximate solutions of linear equations A*X=B or A'*X=B + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
+ */ +#include "slu_ddefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ * DGSISX gives the approximate solutions of linear equations A*X=B or A'*X=B,
+ * using the ILU factorization from dgsitrf(). An estimation of
+ * the condition number is provided. It performs the following steps:
+ *
+ *   1. If A is stored column-wise (A->Stype = SLU_NC):
+ *  
+ *	1.1. If options->Equil = YES or options->RowPerm = LargeDiag, scaling
+ *	     factors are computed to equilibrate the system:
+ *	     options->Trans = NOTRANS:
+ *		 diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
+ *	     options->Trans = TRANS:
+ *		 (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
+ *	     options->Trans = CONJ:
+ *		 (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
+ *	     Whether or not the system will be equilibrated depends on the
+ *	     scaling of the matrix A, but if equilibration is used, A is
+ *	     overwritten by diag(R)*A*diag(C) and B by diag(R)*B
+ *	     (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans
+ *	     = TRANS or CONJ).
+ *
+ *	1.2. Permute columns of A, forming A*Pc, where Pc is a permutation
+ *	     matrix that usually preserves sparsity.
+ *	     For more details of this step, see sp_preorder.c.
+ *
+ *	1.3. If options->Fact != FACTORED, the LU decomposition is used to
+ *	     factor the matrix A (after equilibration if options->Equil = YES)
+ *	     as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
+ *
+ *	1.4. Compute the reciprocal pivot growth factor.
+ *
+ *	1.5. If some U(i,i) = 0, so that U is exactly singular, then the
+ *	     routine fills a small number on the diagonal entry, that is
+ *		U(i,i) = ||A(:,i)||_oo * options->ILU_FillTol ** (1 - i / n),
+ *	     and info will be increased by 1. The factored form of A is used
+ *	     to estimate the condition number of the preconditioner. If the
+ *	     reciprocal of the condition number is less than machine precision,
+ *	     info = A->ncol+1 is returned as a warning, but the routine still
+ *	     goes on to solve for X.
+ *
+ *	1.6. The system of equations is solved for X using the factored form
+ *	     of A.
+ *
+ *	1.7. options->IterRefine is not used
+ *
+ *	1.8. If equilibration was used, the matrix X is premultiplied by
+ *	     diag(C) (if options->Trans = NOTRANS) or diag(R)
+ *	     (if options->Trans = TRANS or CONJ) so that it solves the
+ *	     original system before equilibration.
+ *
+ *	1.9. options for ILU only
+ *	     1) If options->RowPerm = LargeDiag, MC64 is used to scale and
+ *		permute the matrix to an I-matrix, that is Pr*Dr*A*Dc has
+ *		entries of modulus 1 on the diagonal and off-diagonal entries
+ *		of modulus at most 1. If MC64 fails, dgsequ() is used to
+ *		equilibrate the system.
+ *	     2) options->ILU_DropTol = tau is the threshold for dropping.
+ *		For L, it is used directly (for the whole row in a supernode);
+ *		For U, ||A(:,i)||_oo * tau is used as the threshold
+ *	        for the	i-th column.
+ *		If a secondary dropping rule is required, tau will
+ *	        also be used to compute the second threshold.
+ *	     3) options->ILU_FillFactor = gamma, used as the initial guess
+ *		of memory growth.
+ *		If a secondary dropping rule is required, it will also
+ *              be used as an upper bound of the memory.
+ *	     4) options->ILU_DropRule specifies the dropping rule.
+ *		Option		Explanation
+ *		======		===========
+ *		DROP_BASIC:	Basic dropping rule, supernodal based ILU.
+ *		DROP_PROWS:	Supernodal based ILUTP, p = gamma * nnz(A) / n.
+ *		DROP_COLUMN:	Variation of ILUTP, for j-th column,
+ *				p = gamma * nnz(A(:,j)).
+ *		DROP_AREA;	Variation of ILUTP, for j-th column, use
+ *				nnz(F(:,1:j)) / nnz(A(:,1:j)) to control the
+ *				memory.
+ *		DROP_DYNAMIC:	Modify the threshold tau during the
+ *				factorizaion.
+ *				If nnz(L(:,1:j)) / nnz(A(:,1:j)) < gamma
+ *				    tau_L(j) := MIN(1, tau_L(j-1) * 2);
+ *				Otherwise
+ *				    tau_L(j) := MIN(1, tau_L(j-1) * 2);
+ *				tau_U(j) uses the similar rule.
+ *				NOTE: the thresholds used by L and U are
+ *				indenpendent.
+ *		DROP_INTERP:	Compute the second dropping threshold by
+ *				interpolation instead of sorting (default).
+ *				In this case, the actual fill ratio is not
+ *				guaranteed smaller than gamma.
+ *		DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive.
+ *		( The default option is DROP_BASIC | DROP_AREA. )
+ *	     5) options->ILU_Norm is the criterion of computing the average
+ *		value of a row in L.
+ *		options->ILU_Norm	average(x[1:n])
+ *		=================	===============
+ *		ONE_NORM		||x||_1 / n
+ *		TWO_NORM		||x||_2 / sqrt(n)
+ *		INF_NORM		max{|x[i]|}
+ *	     6) options->ILU_MILU specifies the type of MILU's variation.
+ *		= SILU (default): do not perform MILU;
+ *		= SMILU_1 (not recommended):
+ *		    U(i,i) := U(i,i) + sum(dropped entries);
+ *		= SMILU_2:
+ *		    U(i,i) := U(i,i) + SGN(U(i,i)) * sum(dropped entries);
+ *		= SMILU_3:
+ *		    U(i,i) := U(i,i) + SGN(U(i,i)) * sum(|dropped entries|);
+ *		NOTE: Even SMILU_1 does not preserve the column sum because of
+ *		late dropping.
+ *	     7) options->ILU_FillTol is used as the perturbation when
+ *		encountering zero pivots. If some U(i,i) = 0, so that U is
+ *		exactly singular, then
+ *		   U(i,i) := ||A(:,i)|| * options->ILU_FillTol ** (1 - i / n).
+ *
+ *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
+ *	to the transpose of A:
+ *
+ *	2.1. If options->Equil = YES or options->RowPerm = LargeDiag, scaling
+ *	     factors are computed to equilibrate the system:
+ *	     options->Trans = NOTRANS:
+ *		 diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
+ *	     options->Trans = TRANS:
+ *		 (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
+ *	     options->Trans = CONJ:
+ *		 (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
+ *	     Whether or not the system will be equilibrated depends on the
+ *	     scaling of the matrix A, but if equilibration is used, A' is
+ *	     overwritten by diag(R)*A'*diag(C) and B by diag(R)*B
+ *	     (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
+ *
+ *	2.2. Permute columns of transpose(A) (rows of A),
+ *	     forming transpose(A)*Pc, where Pc is a permutation matrix that
+ *	     usually preserves sparsity.
+ *	     For more details of this step, see sp_preorder.c.
+ *
+ *	2.3. If options->Fact != FACTORED, the LU decomposition is used to
+ *	     factor the transpose(A) (after equilibration if
+ *	     options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the
+ *	     permutation Pr determined by partial pivoting.
+ *
+ *	2.4. Compute the reciprocal pivot growth factor.
+ *
+ *	2.5. If some U(i,i) = 0, so that U is exactly singular, then the
+ *	     routine fills a small number on the diagonal entry, that is
+ *		 U(i,i) = ||A(:,i)||_oo * options->ILU_FillTol ** (1 - i / n).
+ *	     And info will be increased by 1. The factored form of A is used
+ *	     to estimate the condition number of the preconditioner. If the
+ *	     reciprocal of the condition number is less than machine precision,
+ *	     info = A->ncol+1 is returned as a warning, but the routine still
+ *	     goes on to solve for X.
+ *
+ *	2.6. The system of equations is solved for X using the factored form
+ *	     of transpose(A).
+ *
+ *	2.7. If options->IterRefine is not used.
+ *
+ *	2.8. If equilibration was used, the matrix X is premultiplied by
+ *	     diag(C) (if options->Trans = NOTRANS) or diag(R)
+ *	     (if options->Trans = TRANS or CONJ) so that it solves the
+ *	     original system before equilibration.
+ *
+ *   See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ * Arguments
+ * =========
+ *
+ * options (input) superlu_options_t*
+ *	   The structure defines the input parameters to control
+ *	   how the LU decomposition will be performed and how the
+ *	   system will be solved.
+ *
+ * A	   (input/output) SuperMatrix*
+ *	   Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
+ *	   of the linear equations is A->nrow. Currently, the type of A can be:
+ *	   Stype = SLU_NC or SLU_NR, Dtype = SLU_D, Mtype = SLU_GE.
+ *	   In the future, more general A may be handled.
+ *
+ *	   On entry, If options->Fact = FACTORED and equed is not 'N',
+ *	   then A must have been equilibrated by the scaling factors in
+ *	   R and/or C.
+ *	   On exit, A is not modified if options->Equil = NO, or if
+ *	   options->Equil = YES but equed = 'N' on exit.
+ *	   Otherwise, if options->Equil = YES and equed is not 'N',
+ *	   A is scaled as follows:
+ *	   If A->Stype = SLU_NC:
+ *	     equed = 'R':  A := diag(R) * A
+ *	     equed = 'C':  A := A * diag(C)
+ *	     equed = 'B':  A := diag(R) * A * diag(C).
+ *	   If A->Stype = SLU_NR:
+ *	     equed = 'R':  transpose(A) := diag(R) * transpose(A)
+ *	     equed = 'C':  transpose(A) := transpose(A) * diag(C)
+ *	     equed = 'B':  transpose(A) := diag(R) * transpose(A) * diag(C).
+ *
+ * perm_c  (input/output) int*
+ *	   If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
+ *	   which defines the permutation matrix Pc; perm_c[i] = j means
+ *	   column i of A is in position j in A*Pc.
+ *	   On exit, perm_c may be overwritten by the product of the input
+ *	   perm_c and a permutation that postorders the elimination tree
+ *	   of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
+ *	   is already in postorder.
+ *
+ *	   If A->Stype = SLU_NR, column permutation vector of size A->nrow,
+ *	   which describes permutation of columns of transpose(A) 
+ *	   (rows of A) as described above.
+ *
+ * perm_r  (input/output) int*
+ *	   If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
+ *	   which defines the permutation matrix Pr, and is determined
+ *	   by partial pivoting.  perm_r[i] = j means row i of A is in 
+ *	   position j in Pr*A.
+ *
+ *	   If A->Stype = SLU_NR, permutation vector of size A->ncol, which
+ *	   determines permutation of rows of transpose(A)
+ *	   (columns of A) as described above.
+ *
+ *	   If options->Fact = SamePattern_SameRowPerm, the pivoting routine
+ *	   will try to use the input perm_r, unless a certain threshold
+ *	   criterion is violated. In that case, perm_r is overwritten by a
+ *	   new permutation determined by partial pivoting or diagonal
+ *	   threshold pivoting.
+ *	   Otherwise, perm_r is output argument.
+ *
+ * etree   (input/output) int*,  dimension (A->ncol)
+ *	   Elimination tree of Pc'*A'*A*Pc.
+ *	   If options->Fact != FACTORED and options->Fact != DOFACT,
+ *	   etree is an input argument, otherwise it is an output argument.
+ *	   Note: etree is a vector of parent pointers for a forest whose
+ *	   vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
+ *
+ * equed   (input/output) char*
+ *	   Specifies the form of equilibration that was done.
+ *	   = 'N': No equilibration.
+ *	   = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
+ *	   = 'C': Column equilibration, i.e., A was postmultiplied by diag(C).
+ *	   = 'B': Both row and column equilibration, i.e., A was replaced 
+ *		  by diag(R)*A*diag(C).
+ *	   If options->Fact = FACTORED, equed is an input argument,
+ *	   otherwise it is an output argument.
+ *
+ * R	   (input/output) double*, dimension (A->nrow)
+ *	   The row scale factors for A or transpose(A).
+ *	   If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
+ *	       (if A->Stype = SLU_NR) is multiplied on the left by diag(R).
+ *	   If equed = 'N' or 'C', R is not accessed.
+ *	   If options->Fact = FACTORED, R is an input argument,
+ *	       otherwise, R is output.
+ *	   If options->zFact = FACTORED and equed = 'R' or 'B', each element
+ *	       of R must be positive.
+ *
+ * C	   (input/output) double*, dimension (A->ncol)
+ *	   The column scale factors for A or transpose(A).
+ *	   If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
+ *	       (if A->Stype = SLU_NR) is multiplied on the right by diag(C).
+ *	   If equed = 'N' or 'R', C is not accessed.
+ *	   If options->Fact = FACTORED, C is an input argument,
+ *	       otherwise, C is output.
+ *	   If options->Fact = FACTORED and equed = 'C' or 'B', each element
+ *	       of C must be positive.
+ *
+ * L	   (output) SuperMatrix*
+ *	   The factor L from the factorization
+ *	       Pr*A*Pc=L*U		(if A->Stype SLU_= NC) or
+ *	       Pr*transpose(A)*Pc=L*U	(if A->Stype = SLU_NR).
+ *	   Uses compressed row subscripts storage for supernodes, i.e.,
+ *	   L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
+ *
+ * U	   (output) SuperMatrix*
+ *	   The factor U from the factorization
+ *	       Pr*A*Pc=L*U		(if A->Stype = SLU_NC) or
+ *	       Pr*transpose(A)*Pc=L*U	(if A->Stype = SLU_NR).
+ *	   Uses column-wise storage scheme, i.e., U has types:
+ *	   Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
+ *
+ * work    (workspace/output) void*, size (lwork) (in bytes)
+ *	   User supplied workspace, should be large enough
+ *	   to hold data structures for factors L and U.
+ *	   On exit, if fact is not 'F', L and U point to this array.
+ *
+ * lwork   (input) int
+ *	   Specifies the size of work array in bytes.
+ *	   = 0:  allocate space internally by system malloc;
+ *	   > 0:  use user-supplied work array of length lwork in bytes,
+ *		 returns error if space runs out.
+ *	   = -1: the routine guesses the amount of space needed without
+ *		 performing the factorization, and returns it in
+ *		 mem_usage->total_needed; no other side effects.
+ *
+ *	   See argument 'mem_usage' for memory usage statistics.
+ *
+ * B	   (input/output) SuperMatrix*
+ *	   B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
+ *	   On entry, the right hand side matrix.
+ *	   If B->ncol = 0, only LU decomposition is performed, the triangular
+ *			   solve is skipped.
+ *	   On exit,
+ *	      if equed = 'N', B is not modified; otherwise
+ *	      if A->Stype = SLU_NC:
+ *		 if options->Trans = NOTRANS and equed = 'R' or 'B',
+ *		    B is overwritten by diag(R)*B;
+ *		 if options->Trans = TRANS or CONJ and equed = 'C' of 'B',
+ *		    B is overwritten by diag(C)*B;
+ *	      if A->Stype = SLU_NR:
+ *		 if options->Trans = NOTRANS and equed = 'C' or 'B',
+ *		    B is overwritten by diag(C)*B;
+ *		 if options->Trans = TRANS or CONJ and equed = 'R' of 'B',
+ *		    B is overwritten by diag(R)*B.
+ *
+ * X	   (output) SuperMatrix*
+ *	   X has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
+ *	   If info = 0 or info = A->ncol+1, X contains the solution matrix
+ *	   to the original system of equations. Note that A and B are modified
+ *	   on exit if equed is not 'N', and the solution to the equilibrated
+ *	   system is inv(diag(C))*X if options->Trans = NOTRANS and
+ *	   equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C'
+ *	   and equed = 'R' or 'B'.
+ *
+ * recip_pivot_growth (output) double*
+ *	   The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ).
+ *	   The infinity norm is used. If recip_pivot_growth is much less
+ *	   than 1, the stability of the LU factorization could be poor.
+ *
+ * rcond   (output) double*
+ *	   The estimate of the reciprocal condition number of the matrix A
+ *	   after equilibration (if done). If rcond is less than the machine
+ *	   precision (in particular, if rcond = 0), the matrix is singular
+ *	   to working precision. This condition is indicated by a return
+ *	   code of info > 0.
+ *
+ * mem_usage (output) mem_usage_t*
+ *	   Record the memory usage statistics, consisting of following fields:
+ *	   - for_lu (float)
+ *	     The amount of space used in bytes for L\U data structures.
+ *	   - total_needed (float)
+ *	     The amount of space needed in bytes to perform factorization.
+ *	   - expansions (int)
+ *	     The number of memory expansions during the LU factorization.
+ *
+ * stat   (output) SuperLUStat_t*
+ *	  Record the statistics on runtime and floating-point operation count.
+ *	  See slu_util.h for the definition of 'SuperLUStat_t'.
+ *
+ * info    (output) int*
+ *	   = 0: successful exit
+ *	   < 0: if info = -i, the i-th argument had an illegal value
+ *	   > 0: if info = i, and i is
+ *		<= A->ncol: number of zero pivots. They are replaced by small
+ *		      entries due to options->ILU_FillTol.
+ *		= A->ncol+1: U is nonsingular, but RCOND is less than machine
+ *		      precision, meaning that the matrix is singular to
+ *		      working precision. Nevertheless, the solution and
+ *		      error bounds are computed because there are a number
+ *		      of situations where the computed solution can be more
+ *		      accurate than the value of RCOND would suggest.
+ *		> A->ncol+1: number of bytes allocated when memory allocation
+ *		      failure occurred, plus A->ncol.
+ * 
+ */ + +void +dgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, + int *etree, char *equed, double *R, double *C, + SuperMatrix *L, SuperMatrix *U, void *work, int lwork, + SuperMatrix *B, SuperMatrix *X, + double *recip_pivot_growth, double *rcond, + mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info) +{ + + DNformat *Bstore, *Xstore; + double *Bmat, *Xmat; + int ldb, ldx, nrhs; + SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/ + SuperMatrix AC; /* Matrix postmultiplied by Pc */ + int colequ, equil, nofact, notran, rowequ, permc_spec, mc64; + trans_t trant; + char norm[1]; + int i, j, info1; + double amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; + int relax, panel_size; + double diag_pivot_thresh; + double t0; /* temporary time */ + double *utime; + + int *perm = NULL; + + /* External functions */ + extern double dlangs(char *, SuperMatrix *); + + Bstore = B->Store; + Xstore = X->Store; + Bmat = Bstore->nzval; + Xmat = Xstore->nzval; + ldb = Bstore->lda; + ldx = Xstore->lda; + nrhs = B->ncol; + + *info = 0; + nofact = (options->Fact != FACTORED); + equil = (options->Equil == YES); + notran = (options->Trans == NOTRANS); + mc64 = (options->RowPerm == LargeDiag); + if ( nofact ) { + *(unsigned char *)equed = 'N'; + rowequ = FALSE; + colequ = FALSE; + } else { + rowequ = lsame_(equed, "R") || lsame_(equed, "B"); + colequ = lsame_(equed, "C") || lsame_(equed, "B"); + smlnum = dlamch_("Safe minimum"); + bignum = 1. / smlnum; + } + + /* Test the input parameters */ + if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern && + options->Fact != SamePattern_SameRowPerm && + !notran && options->Trans != TRANS && options->Trans != CONJ && + !equil && options->Equil != NO) + *info = -1; + else if ( A->nrow != A->ncol || A->nrow < 0 || + (A->Stype != SLU_NC && A->Stype != SLU_NR) || + A->Dtype != SLU_D || A->Mtype != SLU_GE ) + *info = -2; + else if (options->Fact == FACTORED && + !(rowequ || colequ || lsame_(equed, "N"))) + *info = -6; + else { + if (rowequ) { + rcmin = bignum; + rcmax = 0.; + for (j = 0; j < A->nrow; ++j) { + rcmin = SUPERLU_MIN(rcmin, R[j]); + rcmax = SUPERLU_MAX(rcmax, R[j]); + } + if (rcmin <= 0.) *info = -7; + else if ( A->nrow > 0) + rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); + else rowcnd = 1.; + } + if (colequ && *info == 0) { + rcmin = bignum; + rcmax = 0.; + for (j = 0; j < A->nrow; ++j) { + rcmin = SUPERLU_MIN(rcmin, C[j]); + rcmax = SUPERLU_MAX(rcmax, C[j]); + } + if (rcmin <= 0.) *info = -8; + else if (A->nrow > 0) + colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); + else colcnd = 1.; + } + if (*info == 0) { + if ( lwork < -1 ) *info = -12; + else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || + B->Stype != SLU_DN || B->Dtype != SLU_D || + B->Mtype != SLU_GE ) + *info = -13; + else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) || + (B->ncol != 0 && B->ncol != X->ncol) || + X->Stype != SLU_DN || + X->Dtype != SLU_D || X->Mtype != SLU_GE ) + *info = -14; + } + } + if (*info != 0) { + i = -(*info); + xerbla_("dgsisx", &i); + return; + } + + /* Initialization for factor parameters */ + panel_size = sp_ienv(1); + relax = sp_ienv(2); + diag_pivot_thresh = options->DiagPivotThresh; + + utime = stat->utime; + + /* Convert A to SLU_NC format when necessary. */ + if ( A->Stype == SLU_NR ) { + NRformat *Astore = A->Store; + AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); + dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, + Astore->nzval, Astore->colind, Astore->rowptr, + SLU_NC, A->Dtype, A->Mtype); + if ( notran ) { /* Reverse the transpose argument. */ + trant = TRANS; + notran = 0; + } else { + trant = NOTRANS; + notran = 1; + } + } else { /* A->Stype == SLU_NC */ + trant = options->Trans; + AA = A; + } + + if ( nofact ) { + register int i, j; + NCformat *Astore = AA->Store; + int nnz = Astore->nnz; + int *colptr = Astore->colptr; + int *rowind = Astore->rowind; + double *nzval = (double *)Astore->nzval; + int n = AA->nrow; + + if ( mc64 ) { + *equed = 'B'; + rowequ = colequ = 1; + t0 = SuperLU_timer_(); + if ((perm = intMalloc(n)) == NULL) + ABORT("SUPERLU_MALLOC fails for perm[]"); + + info1 = dldperm(5, n, nnz, colptr, rowind, nzval, perm, R, C); + + if (info1 > 0) { /* MC64 fails, call dgsequ() later */ + mc64 = 0; + SUPERLU_FREE(perm); + perm = NULL; + } else { + for (i = 0; i < n; i++) { + R[i] = exp(R[i]); + C[i] = exp(C[i]); + } + /* permute and scale the matrix */ + for (j = 0; j < n; j++) { + for (i = colptr[j]; i < colptr[j + 1]; i++) { + nzval[i] *= R[rowind[i]] * C[j]; + rowind[i] = perm[rowind[i]]; + } + } + } + utime[EQUIL] = SuperLU_timer_() - t0; + } + if ( !mc64 & equil ) { + t0 = SuperLU_timer_(); + /* Compute row and column scalings to equilibrate the matrix A. */ + dgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1); + + if ( info1 == 0 ) { + /* Equilibrate matrix A. */ + dlaqgs(AA, R, C, rowcnd, colcnd, amax, equed); + rowequ = lsame_(equed, "R") || lsame_(equed, "B"); + colequ = lsame_(equed, "C") || lsame_(equed, "B"); + } + utime[EQUIL] = SuperLU_timer_() - t0; + } + } + + if ( nrhs > 0 ) { + /* Scale the right hand side if equilibration was performed. */ + if ( notran ) { + if ( rowequ ) { + for (j = 0; j < nrhs; ++j) + for (i = 0; i < A->nrow; ++i) { + Bmat[i + j*ldb] *= R[i]; + } + } + } else if ( colequ ) { + for (j = 0; j < nrhs; ++j) + for (i = 0; i < A->nrow; ++i) { + Bmat[i + j*ldb] *= C[i]; + } + } + } + + if ( nofact ) { + + t0 = SuperLU_timer_(); + /* + * Gnet column permutation vector perm_c[], according to permc_spec: + * permc_spec = NATURAL: natural ordering + * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A + * permc_spec = MMD_ATA: minimum degree on structure of A'*A + * permc_spec = COLAMD: approximate minimum degree column ordering + * permc_spec = MY_PERMC: the ordering already supplied in perm_c[] + */ + permc_spec = options->ColPerm; + if ( permc_spec != MY_PERMC && options->Fact == DOFACT ) + get_perm_c(permc_spec, AA, perm_c); + utime[COLPERM] = SuperLU_timer_() - t0; + + t0 = SuperLU_timer_(); + sp_preorder(options, AA, perm_c, etree, &AC); + utime[ETREE] = SuperLU_timer_() - t0; + + /* Compute the LU factorization of A*Pc. */ + t0 = SuperLU_timer_(); + dgsitrf(options, &AC, relax, panel_size, etree, work, lwork, + perm_c, perm_r, L, U, stat, info); + utime[FACT] = SuperLU_timer_() - t0; + + if ( lwork == -1 ) { + mem_usage->total_needed = *info - A->ncol; + return; + } + } + + if ( options->PivotGrowth ) { + if ( *info > 0 ) return; + + /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */ + *recip_pivot_growth = dPivotGrowth(A->ncol, AA, perm_c, L, U); + } + + if ( options->ConditionNumber ) { + /* Estimate the reciprocal of the condition number of A. */ + t0 = SuperLU_timer_(); + if ( notran ) { + *(unsigned char *)norm = '1'; + } else { + *(unsigned char *)norm = 'I'; + } + anorm = dlangs(norm, AA); + dgscon(norm, L, U, anorm, rcond, stat, &info1); + utime[RCOND] = SuperLU_timer_() - t0; + } + + if ( nrhs > 0 ) { + /* Compute the solution matrix X. */ + for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */ + for (i = 0; i < B->nrow; i++) + Xmat[i + j*ldx] = Bmat[i + j*ldb]; + + t0 = SuperLU_timer_(); + dgstrs (trant, L, U, perm_c, perm_r, X, stat, &info1); + utime[SOLVE] = SuperLU_timer_() - t0; + + /* Transform the solution matrix X to a solution of the original + system. */ + if ( notran ) { + if ( colequ ) { + for (j = 0; j < nrhs; ++j) + for (i = 0; i < A->nrow; ++i) { + Xmat[i + j*ldx] *= C[i]; + } + } + } else { + if ( rowequ ) { + if (perm) { + double *tmp; + int n = A->nrow; + + if ((tmp = doubleMalloc(n)) == NULL) + ABORT("SUPERLU_MALLOC fails for tmp[]"); + for (j = 0; j < nrhs; j++) { + for (i = 0; i < n; i++) + tmp[i] = Xmat[i + j * ldx]; /*dcopy*/ + for (i = 0; i < n; i++) + Xmat[i + j * ldx] = R[i] * tmp[perm[i]]; + } + SUPERLU_FREE(tmp); + } else { + for (j = 0; j < nrhs; ++j) + for (i = 0; i < A->nrow; ++i) { + Xmat[i + j*ldx] *= R[i]; + } + } + } + } + } /* end if nrhs > 0 */ + + if ( options->ConditionNumber ) { + /* Set INFO = A->ncol+1 if the matrix is singular to working precision. */ + if ( *rcond < dlamch_("E") && *info == 0) *info = A->ncol + 1; + } + + if (perm) SUPERLU_FREE(perm); + + if ( nofact ) { + ilu_dQuerySpace(L, U, mem_usage); + Destroy_CompCol_Permuted(&AC); + } + if ( A->Stype == SLU_NR ) { + Destroy_SuperMatrix_Store(AA); + SUPERLU_FREE(AA); + } + +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsitrf.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsitrf.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsitrf.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,625 @@ + +/*! @file dgsitf.c + * \brief Computes an ILU factorization of a general sparse matrix + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
+ */ + +#include "slu_ddefs.h" + +#ifdef DEBUG +int num_drop_L; +#endif + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ * DGSITRF computes an ILU factorization of a general sparse m-by-n
+ * matrix A using partial pivoting with row interchanges.
+ * The factorization has the form
+ *     Pr * A = L * U
+ * where Pr is a row permutation matrix, L is lower triangular with unit
+ * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
+ * triangular (upper trapezoidal if A->nrow < A->ncol).
+ *
+ * See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ * Arguments
+ * =========
+ *
+ * options (input) superlu_options_t*
+ *	   The structure defines the input parameters to control
+ *	   how the ILU decomposition will be performed.
+ *
+ * A	    (input) SuperMatrix*
+ *	    Original matrix A, permuted by columns, of dimension
+ *	    (A->nrow, A->ncol). The type of A can be:
+ *	    Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE.
+ *
+ * relax    (input) int
+ *	    To control degree of relaxing supernodes. If the number
+ *	    of nodes (columns) in a subtree of the elimination tree is less
+ *	    than relax, this subtree is considered as one supernode,
+ *	    regardless of the row structures of those columns.
+ *
+ * panel_size (input) int
+ *	    A panel consists of at most panel_size consecutive columns.
+ *
+ * etree    (input) int*, dimension (A->ncol)
+ *	    Elimination tree of A'*A.
+ *	    Note: etree is a vector of parent pointers for a forest whose
+ *	    vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
+ *	    On input, the columns of A should be permuted so that the
+ *	    etree is in a certain postorder.
+ *
+ * work     (input/output) void*, size (lwork) (in bytes)
+ *	    User-supplied work space and space for the output data structures.
+ *	    Not referenced if lwork = 0;
+ *
+ * lwork   (input) int
+ *	   Specifies the size of work array in bytes.
+ *	   = 0:  allocate space internally by system malloc;
+ *	   > 0:  use user-supplied work array of length lwork in bytes,
+ *		 returns error if space runs out.
+ *	   = -1: the routine guesses the amount of space needed without
+ *		 performing the factorization, and returns it in
+ *		 *info; no other side effects.
+ *
+ * perm_c   (input) int*, dimension (A->ncol)
+ *	    Column permutation vector, which defines the
+ *	    permutation matrix Pc; perm_c[i] = j means column i of A is
+ *	    in position j in A*Pc.
+ *	    When searching for diagonal, perm_c[*] is applied to the
+ *	    row subscripts of A, so that diagonal threshold pivoting
+ *	    can find the diagonal of A, rather than that of A*Pc.
+ *
+ * perm_r   (input/output) int*, dimension (A->nrow)
+ *	    Row permutation vector which defines the permutation matrix Pr,
+ *	    perm_r[i] = j means row i of A is in position j in Pr*A.
+ *	    If options->Fact = SamePattern_SameRowPerm, the pivoting routine
+ *	       will try to use the input perm_r, unless a certain threshold
+ *	       criterion is violated. In that case, perm_r is overwritten by
+ *	       a new permutation determined by partial pivoting or diagonal
+ *	       threshold pivoting.
+ *	    Otherwise, perm_r is output argument;
+ *
+ * L	    (output) SuperMatrix*
+ *	    The factor L from the factorization Pr*A=L*U; use compressed row
+ *	    subscripts storage for supernodes, i.e., L has type:
+ *	    Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
+ *
+ * U	    (output) SuperMatrix*
+ *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
+ *	    storage scheme, i.e., U has types: Stype = SLU_NC,
+ *	    Dtype = SLU_D, Mtype = SLU_TRU.
+ *
+ * stat     (output) SuperLUStat_t*
+ *	    Record the statistics on runtime and floating-point operation count.
+ *	    See slu_util.h for the definition of 'SuperLUStat_t'.
+ *
+ * info     (output) int*
+ *	    = 0: successful exit
+ *	    < 0: if info = -i, the i-th argument had an illegal value
+ *	    > 0: if info = i, and i is
+ *	       <= A->ncol: number of zero pivots. They are replaced by small
+ *		  entries according to options->ILU_FillTol.
+ *	       > A->ncol: number of bytes allocated when memory allocation
+ *		  failure occurred, plus A->ncol. If lwork = -1, it is
+ *		  the estimated amount of space needed, plus A->ncol.
+ *
+ * ======================================================================
+ *
+ * Local Working Arrays:
+ * ======================
+ *   m = number of rows in the matrix
+ *   n = number of columns in the matrix
+ *
+ *   marker[0:3*m-1]: marker[i] = j means that node i has been
+ *	reached when working on column j.
+ *	Storage: relative to original row subscripts
+ *	NOTE: There are 4 of them:
+ *	      marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c;
+ *	      marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c;
+ *	      marker_relax(has its own space) is used for relaxed supernodes.
+ *
+ *   parent[0:m-1]: parent vector used during dfs
+ *	Storage: relative to new row subscripts
+ *
+ *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
+ *	unexplored neighbor of i in lsub[*]
+ *
+ *   segrep[0:nseg-1]: contains the list of supernodal representatives
+ *	in topological order of the dfs. A supernode representative is the
+ *	last column of a supernode.
+ *	The maximum size of segrep[] is n.
+ *
+ *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
+ *	supernodal representative r, repfnz[r] is the location of the first
+ *	nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
+ *	indicates the supernode r has been explored.
+ *	NOTE: There are W of them, each used for one column of a panel.
+ *
+ *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
+ *	the panel diagonal. These are filled in during dpanel_dfs(), and are
+ *	used later in the inner LU factorization within the panel.
+ *	panel_lsub[]/dense[] pair forms the SPA data structure.
+ *	NOTE: There are W of them.
+ *
+ *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
+ *		   NOTE: there are W of them.
+ *
+ *   tempv[0:*]: real temporary used for dense numeric kernels;
+ *	The size of this array is defined by NUM_TEMPV() in slu_util.h.
+ *	It is also used by the dropping routine ilu_ddrop_row().
+ * 
+ */ + +void +dgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size, + int *etree, void *work, int lwork, int *perm_c, int *perm_r, + SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info) +{ + /* Local working arrays */ + NCPformat *Astore; + int *iperm_r = NULL; /* inverse of perm_r; used when + options->Fact == SamePattern_SameRowPerm */ + int *iperm_c; /* inverse of perm_c */ + int *swap, *iswap; /* swap is used to store the row permutation + during the factorization. Initially, it is set + to iperm_c (row indeces of Pc*A*Pc'). + iswap is the inverse of swap. After the + factorization, it is equal to perm_r. */ + int *iwork; + double *dwork; + int *segrep, *repfnz, *parent, *xplore; + int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */ + int *marker, *marker_relax; + double *dense, *tempv; + int *relax_end, *relax_fsupc; + double *a; + int *asub; + int *xa_begin, *xa_end; + int *xsup, *supno; + int *xlsub, *xlusup, *xusub; + int nzlumax; + double *amax; + double drop_sum; + static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ + int *iwork2; /* used by the second dropping rule */ + + /* Local scalars */ + fact_t fact = options->Fact; + double diag_pivot_thresh = options->DiagPivotThresh; + double drop_tol = options->ILU_DropTol; /* tau */ + double fill_ini = options->ILU_FillTol; /* tau^hat */ + double gamma = options->ILU_FillFactor; + int drop_rule = options->ILU_DropRule; + milu_t milu = options->ILU_MILU; + double fill_tol; + int pivrow; /* pivotal row number in the original matrix A */ + int nseg1; /* no of segments in U-column above panel row jcol */ + int nseg; /* no of segments in each U-column */ + register int jcol; + register int kcol; /* end column of a relaxed snode */ + register int icol; + register int i, k, jj, new_next, iinfo; + int m, n, min_mn, jsupno, fsupc, nextlu, nextu; + int w_def; /* upper bound on panel width */ + int usepr, iperm_r_allocated = 0; + int nnzL, nnzU; + int *panel_histo = stat->panel_histo; + flops_t *ops = stat->ops; + + int last_drop;/* the last column which the dropping rules applied */ + int quota; + int nnzAj; /* number of nonzeros in A(:,1:j) */ + int nnzLj, nnzUj; + double tol_L = drop_tol, tol_U = drop_tol; + double zero = 0.0; + + /* Executable */ + iinfo = 0; + m = A->nrow; + n = A->ncol; + min_mn = SUPERLU_MIN(m, n); + Astore = A->Store; + a = Astore->nzval; + asub = Astore->rowind; + xa_begin = Astore->colbeg; + xa_end = Astore->colend; + + /* Allocate storage common to the factor routines */ + *info = dLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size, + gamma, L, U, &Glu, &iwork, &dwork); + if ( *info ) return; + + xsup = Glu.xsup; + supno = Glu.supno; + xlsub = Glu.xlsub; + xlusup = Glu.xlusup; + xusub = Glu.xusub; + + SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore, + &repfnz, &panel_lsub, &marker_relax, &marker); + dSetRWork(m, panel_size, dwork, &dense, &tempv); + + usepr = (fact == SamePattern_SameRowPerm); + if ( usepr ) { + /* Compute the inverse of perm_r */ + iperm_r = (int *) intMalloc(m); + for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k; + iperm_r_allocated = 1; + } + + iperm_c = (int *) intMalloc(n); + for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k; + swap = (int *)intMalloc(n); + for (k = 0; k < n; k++) swap[k] = iperm_c[k]; + iswap = (int *)intMalloc(n); + for (k = 0; k < n; k++) iswap[k] = perm_c[k]; + amax = (double *) doubleMalloc(panel_size); + if (drop_rule & DROP_SECONDARY) + iwork2 = (int *)intMalloc(n); + else + iwork2 = NULL; + + nnzAj = 0; + nnzLj = 0; + nnzUj = 0; + last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(3), (int)(min_mn * 0.95)); + + /* Identify relaxed snodes */ + relax_end = (int *) intMalloc(n); + relax_fsupc = (int *) intMalloc(n); + if ( options->SymmetricMode == YES ) + ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); + else + ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); + + ifill (perm_r, m, EMPTY); + ifill (marker, m * NO_MARKER, EMPTY); + supno[0] = -1; + xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0; + w_def = panel_size; + + /* Mark the rows used by relaxed supernodes */ + ifill (marker_relax, m, EMPTY); + i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end, + asub, marker_relax); +#if ( PRNTlevel >= 1) + printf("%d relaxed supernodes.\n", i); +#endif + + /* + * Work on one "panel" at a time. A panel is one of the following: + * (a) a relaxed supernode at the bottom of the etree, or + * (b) panel_size contiguous columns, defined by the user + */ + for (jcol = 0; jcol < min_mn; ) { + + if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */ + kcol = relax_end[jcol]; /* end of the relaxed snode */ + panel_histo[kcol-jcol+1]++; + + /* Drop small rows in the previous supernode. */ + if (jcol > 0 && jcol < last_drop) { + int first = xsup[supno[jcol - 1]]; + int last = jcol - 1; + int quota; + + /* Compute the quota */ + if (drop_rule & DROP_PROWS) + quota = gamma * Astore->nnz / m * (m - first) / m + * (last - first + 1); + else if (drop_rule & DROP_COLUMN) { + int i; + quota = 0; + for (i = first; i <= last; i++) + quota += xa_end[i] - xa_begin[i]; + quota = gamma * quota * (m - first) / m; + } else if (drop_rule & DROP_AREA) + quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) + - nnzLj; + else + quota = m * n; + fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn); + + /* Drop small rows */ + i = ilu_ddrop_row(options, first, last, tol_L, quota, &nnzLj, + &fill_tol, &Glu, tempv, iwork2, 0); + /* Reset the parameters */ + if (drop_rule & DROP_DYNAMIC) { + if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) + < nnzLj) + tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); + else + tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); + } + if (fill_tol < 0) iinfo -= (int)fill_tol; +#ifdef DEBUG + num_drop_L += i * (last - first + 1); +#endif + } + + /* -------------------------------------- + * Factorize the relaxed supernode(jcol:kcol) + * -------------------------------------- */ + /* Determine the union of the row structure of the snode */ + if ( (*info = ilu_dsnode_dfs(jcol, kcol, asub, xa_begin, xa_end, + marker, &Glu)) != 0 ) + return; + + nextu = xusub[jcol]; + nextlu = xlusup[jcol]; + jsupno = supno[jcol]; + fsupc = xsup[jsupno]; + new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1); + nzlumax = Glu.nzlumax; + while ( new_next > nzlumax ) { + if ((*info = dLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu))) + return; + } + + for (icol = jcol; icol <= kcol; icol++) { + xusub[icol+1] = nextu; + + amax[0] = 0.0; + /* Scatter into SPA dense[*] */ + for (k = xa_begin[icol]; k < xa_end[icol]; k++) { + register double tmp = fabs(a[k]); + if (tmp > amax[0]) amax[0] = tmp; + dense[asub[k]] = a[k]; + } + nnzAj += xa_end[icol] - xa_begin[icol]; + if (amax[0] == 0.0) { + amax[0] = fill_ini; +#if ( PRNTlevel >= 1) + printf("Column %d is entirely zero!\n", icol); + fflush(stdout); +#endif + } + + /* Numeric update within the snode */ + dsnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat); + + if (usepr) pivrow = iperm_r[icol]; + fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn); + if ( (*info = ilu_dpivotL(icol, diag_pivot_thresh, &usepr, + perm_r, iperm_c[icol], swap, iswap, + marker_relax, &pivrow, + amax[0] * fill_tol, milu, zero, + &Glu, stat)) ) { + iinfo++; + marker[pivrow] = kcol; + } + + } + + jcol = kcol + 1; + + } else { /* Work on one panel of panel_size columns */ + + /* Adjust panel_size so that a panel won't overlap with the next + * relaxed snode. + */ + panel_size = w_def; + for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) + if ( relax_end[k] != EMPTY ) { + panel_size = k - jcol; + break; + } + if ( k == min_mn ) panel_size = min_mn - jcol; + panel_histo[panel_size]++; + + /* symbolic factor on a panel of columns */ + ilu_dpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1, + dense, amax, panel_lsub, segrep, repfnz, + marker, parent, xplore, &Glu); + + /* numeric sup-panel updates in topological order */ + dpanel_bmod(m, panel_size, jcol, nseg1, dense, + tempv, segrep, repfnz, &Glu, stat); + + /* Sparse LU within the panel, and below panel diagonal */ + for (jj = jcol; jj < jcol + panel_size; jj++) { + + k = (jj - jcol) * m; /* column index for w-wide arrays */ + + nseg = nseg1; /* Begin after all the panel segments */ + + nnzAj += xa_end[jj] - xa_begin[jj]; + + if ((*info = ilu_dcolumn_dfs(m, jj, perm_r, &nseg, + &panel_lsub[k], segrep, &repfnz[k], + marker, parent, xplore, &Glu))) + return; + + /* Numeric updates */ + if ((*info = dcolumn_bmod(jj, (nseg - nseg1), &dense[k], + tempv, &segrep[nseg1], &repfnz[k], + jcol, &Glu, stat)) != 0) return; + + /* Make a fill-in position if the column is entirely zero */ + if (xlsub[jj + 1] == xlsub[jj]) { + register int i, row; + int nextl; + int nzlmax = Glu.nzlmax; + int *lsub = Glu.lsub; + int *marker2 = marker + 2 * m; + + /* Allocate memory */ + nextl = xlsub[jj] + 1; + if (nextl >= nzlmax) { + int error = dLUMemXpand(jj, nextl, LSUB, &nzlmax, &Glu); + if (error) { *info = error; return; } + lsub = Glu.lsub; + } + xlsub[jj + 1]++; + assert(xlusup[jj]==xlusup[jj+1]); + xlusup[jj + 1]++; + Glu.lusup[xlusup[jj]] = zero; + + /* Choose a row index (pivrow) for fill-in */ + for (i = jj; i < n; i++) + if (marker_relax[swap[i]] <= jj) break; + row = swap[i]; + marker2[row] = jj; + lsub[xlsub[jj]] = row; +#ifdef DEBUG + printf("Fill col %d.\n", jj); + fflush(stdout); +#endif + } + + /* Computer the quota */ + if (drop_rule & DROP_PROWS) + quota = gamma * Astore->nnz / m * jj / m; + else if (drop_rule & DROP_COLUMN) + quota = gamma * (xa_end[jj] - xa_begin[jj]) * + (jj + 1) / m; + else if (drop_rule & DROP_AREA) + quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj; + else + quota = m; + + /* Copy the U-segments to ucol[*] and drop small entries */ + if ((*info = ilu_dcopy_to_ucol(jj, nseg, segrep, &repfnz[k], + perm_r, &dense[k], drop_rule, + milu, amax[jj - jcol] * tol_U, + quota, &drop_sum, &nnzUj, &Glu, + iwork2)) != 0) + return; + + /* Reset the dropping threshold if required */ + if (drop_rule & DROP_DYNAMIC) { + if (gamma * 0.9 * nnzAj * 0.5 < nnzLj) + tol_U = SUPERLU_MIN(1.0, tol_U * 2.0); + else + tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5); + } + + drop_sum *= MILU_ALPHA; + if (usepr) pivrow = iperm_r[jj]; + fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn); + if ( (*info = ilu_dpivotL(jj, diag_pivot_thresh, &usepr, perm_r, + iperm_c[jj], swap, iswap, + marker_relax, &pivrow, + amax[jj - jcol] * fill_tol, milu, + drop_sum, &Glu, stat)) ) { + iinfo++; + marker[m + pivrow] = jj; + marker[2 * m + pivrow] = jj; + } + + /* Reset repfnz[] for this column */ + resetrep_col (nseg, segrep, &repfnz[k]); + + /* Start a new supernode, drop the previous one */ + if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) { + int first = xsup[supno[jj - 1]]; + int last = jj - 1; + int quota; + + /* Compute the quota */ + if (drop_rule & DROP_PROWS) + quota = gamma * Astore->nnz / m * (m - first) / m + * (last - first + 1); + else if (drop_rule & DROP_COLUMN) { + int i; + quota = 0; + for (i = first; i <= last; i++) + quota += xa_end[i] - xa_begin[i]; + quota = gamma * quota * (m - first) / m; + } else if (drop_rule & DROP_AREA) + quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) + / m) - nnzLj; + else + quota = m * n; + fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / + (double)min_mn); + + /* Drop small rows */ + i = ilu_ddrop_row(options, first, last, tol_L, quota, + &nnzLj, &fill_tol, &Glu, tempv, iwork2, + 1); + + /* Reset the parameters */ + if (drop_rule & DROP_DYNAMIC) { + if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) + < nnzLj) + tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); + else + tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); + } + if (fill_tol < 0) iinfo -= (int)fill_tol; +#ifdef DEBUG + num_drop_L += i * (last - first + 1); +#endif + } /* if start a new supernode */ + + } /* for */ + + jcol += panel_size; /* Move to the next panel */ + + } /* else */ + + } /* for */ + + *info = iinfo; + + if ( m > n ) { + k = 0; + for (i = 0; i < m; ++i) + if ( perm_r[i] == EMPTY ) { + perm_r[i] = n + k; + ++k; + } + } + + ilu_countnz(min_mn, &nnzL, &nnzU, &Glu); + fixupL(min_mn, perm_r, &Glu); + + dLUWorkFree(iwork, dwork, &Glu); /* Free work space and compress storage */ + + if ( fact == SamePattern_SameRowPerm ) { + /* L and U structures may have changed due to possibly different + pivoting, even though the storage is available. + There could also be memory expansions, so the array locations + may have changed, */ + ((SCformat *)L->Store)->nnz = nnzL; + ((SCformat *)L->Store)->nsuper = Glu.supno[n]; + ((SCformat *)L->Store)->nzval = Glu.lusup; + ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup; + ((SCformat *)L->Store)->rowind = Glu.lsub; + ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub; + ((NCformat *)U->Store)->nnz = nnzU; + ((NCformat *)U->Store)->nzval = Glu.ucol; + ((NCformat *)U->Store)->rowind = Glu.usub; + ((NCformat *)U->Store)->colptr = Glu.xusub; + } else { + dCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, + Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno, + Glu.xsup, SLU_SC, SLU_D, SLU_TRLU); + dCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, + Glu.usub, Glu.xusub, SLU_NC, SLU_D, SLU_TRU); + } + + ops[FACT] += ops[TRSV] + ops[GEMV]; + + if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); + SUPERLU_FREE (iperm_c); + SUPERLU_FREE (relax_end); + SUPERLU_FREE (swap); + SUPERLU_FREE (iswap); + SUPERLU_FREE (relax_fsupc); + SUPERLU_FREE (amax); + if ( iwork2 ) SUPERLU_FREE (iwork2); + +} Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsrfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsrfs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsrfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,25 +1,26 @@ -/* +/*! @file dgsrfs.c + * \brief Improves computed solution to a system of inear equations + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Modified from lapack routine DGERFS
+ * 
*/ /* * File name: dgsrfs.c * History: Modified from lapack routine DGERFS */ #include -#include "dsp_defs.h" +#include "slu_ddefs.h" -void -dgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U, - int *perm_c, int *perm_r, char *equed, double *R, double *C, - SuperMatrix *B, SuperMatrix *X, double *ferr, double *berr, - SuperLUStat_t *stat, int *info) -{ -/* +/*! \brief + * + *
  *   Purpose   
  *   =======   
  *
@@ -123,8 +124,16 @@
  *
  *    ITMAX is the maximum number of steps of iterative refinement.   
  *
- */  
+ * 
+ */ +void +dgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U, + int *perm_c, int *perm_r, char *equed, double *R, double *C, + SuperMatrix *B, SuperMatrix *X, double *ferr, double *berr, + SuperLUStat_t *stat, int *info) +{ + #define ITMAX 5 /* Table of constant values */ @@ -224,6 +233,8 @@ nz = A->ncol + 1; eps = dlamch_("Epsilon"); safmin = dlamch_("Safe minimum"); + /* Set SAFE1 essentially to be the underflow threshold times the + number of additions in each row. */ safe1 = nz * safmin; safe2 = safe1 / eps; @@ -274,7 +285,7 @@ where abs(Z) is the componentwise absolute value of the matrix or vector Z. If the i-th component of the denominator is less than SAFE2, then SAFE1 is added to the i-th component of the - numerator and denominator before dividing. */ + numerator before dividing. */ for (i = 0; i < A->nrow; ++i) rwork[i] = fabs( Bptr[i] ); @@ -297,11 +308,15 @@ } s = 0.; for (i = 0; i < A->nrow; ++i) { - if (rwork[i] > safe2) + if (rwork[i] > safe2) { s = SUPERLU_MAX( s, fabs(work[i]) / rwork[i] ); - else - s = SUPERLU_MAX( s, (fabs(work[i]) + safe1) / - (rwork[i] + safe1) ); + } else if ( rwork[i] != 0.0 ) { + /* Adding SAFE1 to the numerator guards against + spuriously zero residuals (underflow). */ + s = SUPERLU_MAX( s, (safe1 + fabs(work[i])) / rwork[i] ); + } + /* If rwork[i] is exactly 0.0, then we know the true + residual also must be exactly 0.0. */ } berr[j] = s; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgssv.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgssv.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgssv.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,20 +1,19 @@ - -/* +/*! @file dgssv.c + * \brief Solves the system of linear equations A*X=B + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
- *
+ * 
*/ -#include "dsp_defs.h" +#include "slu_ddefs.h" -void -dgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, - SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, - SuperLUStat_t *stat, int *info ) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -127,15 +126,21 @@
  *                so the solution could not be computed.
  *             > A->ncol: number of bytes allocated when memory allocation
  *                failure occurred, plus A->ncol.
- *   
+ * 
*/ + +void +dgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, + SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, + SuperLUStat_t *stat, int *info ) +{ + DNformat *Bstore; SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int lwork = 0, *etree, i; /* Set default values for some parameters */ - double drop_tol = 0.; int panel_size; /* panel size */ int relax; /* no of columns in a relaxed snodes */ int permc_spec; @@ -201,8 +206,8 @@ relax, panel_size, sp_ienv(3), sp_ienv(4));*/ t = SuperLU_timer_(); /* Compute the LU factorization of A. */ - dgstrf(options, &AC, drop_tol, relax, panel_size, - etree, NULL, lwork, perm_c, perm_r, L, U, stat, info); + dgstrf(options, &AC, relax, panel_size, etree, + NULL, lwork, perm_c, perm_r, L, U, stat, info); utime[FACT] = SuperLU_timer_() - t; t = SuperLU_timer_(); Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgssvx.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgssvx.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgssvx.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,22 +1,19 @@ -/* +/*! @file dgssvx.c + * \brief Solves the system of linear equations A*X=B or A'*X=B + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
- *
+ * 
*/ -#include "dsp_defs.h" +#include "slu_ddefs.h" -void -dgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, - int *etree, char *equed, double *R, double *C, - SuperMatrix *L, SuperMatrix *U, void *work, int lwork, - SuperMatrix *B, SuperMatrix *X, double *recip_pivot_growth, - double *rcond, double *ferr, double *berr, - mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info ) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -314,7 +311,7 @@
  *
  * stat   (output) SuperLUStat_t*
  *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
+ *        See slu_util.h for the definition of 'SuperLUStat_t'.
  *
  * info    (output) int*
  *         = 0: successful exit   
@@ -332,9 +329,19 @@
  *                    accurate than the value of RCOND would suggest.   
  *              > A->ncol+1: number of bytes allocated when memory allocation
  *                    failure occurred, plus A->ncol.
- *
+ * 
*/ +void +dgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, + int *etree, char *equed, double *R, double *C, + SuperMatrix *L, SuperMatrix *U, void *work, int lwork, + SuperMatrix *B, SuperMatrix *X, double *recip_pivot_growth, + double *rcond, double *ferr, double *berr, + mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info ) +{ + + DNformat *Bstore, *Xstore; double *Bmat, *Xmat; int ldb, ldx, nrhs; @@ -346,13 +353,12 @@ int i, j, info1; double amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; int relax, panel_size; - double diag_pivot_thresh, drop_tol; + double diag_pivot_thresh; double t0; /* temporary time */ double *utime; /* External functions */ extern double dlangs(char *, SuperMatrix *); - extern double dlamch_(char *); Bstore = B->Store; Xstore = X->Store; @@ -443,7 +449,6 @@ panel_size = sp_ienv(1); relax = sp_ienv(2); diag_pivot_thresh = options->DiagPivotThresh; - drop_tol = 0.0; utime = stat->utime; @@ -523,8 +528,8 @@ /* Compute the LU factorization of A*Pc. */ t0 = SuperLU_timer_(); - dgstrf(options, &AC, drop_tol, relax, panel_size, - etree, work, lwork, perm_c, perm_r, L, U, stat, info); + dgstrf(options, &AC, relax, panel_size, etree, + work, lwork, perm_c, perm_r, L, U, stat, info); utime[FACT] = SuperLU_timer_() - t0; if ( lwork == -1 ) { Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrf.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrf.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrf.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,33 +1,32 @@ -/* +/*! @file dgstrf.c + * \brief Computes an LU factorization of a general sparse matrix + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
+ * 
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
  *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "dsp_defs.h" -void -dgstrf (superlu_options_t *options, SuperMatrix *A, double drop_tol, - int relax, int panel_size, int *etree, void *work, int lwork, - int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, - SuperLUStat_t *stat, int *info) -{ -/* +#include "slu_ddefs.h" + +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -53,11 +52,6 @@
  *          (A->nrow, A->ncol). The type of A can be:
  *          Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE.
  *
- * drop_tol (input) double (NOT IMPLEMENTED)
- *	    Drop tolerance parameter. At step j of the Gaussian elimination,
- *          if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
- *          0 <= drop_tol <= 1. The default value of drop_tol is 0.
- *
  * relax    (input) int
  *          To control degree of relaxing supernodes. If the number
  *          of nodes (columns) in a subtree of the elimination tree is less
@@ -117,7 +111,7 @@
  *
  * stat     (output) SuperLUStat_t*
  *          Record the statistics on runtime and floating-point operation count.
- *          See util.h for the definition of 'SuperLUStat_t'.
+ *          See slu_util.h for the definition of 'SuperLUStat_t'.
  *
  * info     (output) int*
  *          = 0: successful exit
@@ -177,13 +171,20 @@
  *	    	   NOTE: there are W of them.
  *
  *   tempv[0:*]: real temporary used for dense numeric kernels;
- *	The size of this array is defined by NUM_TEMPV() in dsp_defs.h.
- *
+ *	The size of this array is defined by NUM_TEMPV() in slu_ddefs.h.
+ * 
*/ + +void +dgstrf (superlu_options_t *options, SuperMatrix *A, + int relax, int panel_size, int *etree, void *work, int lwork, + int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, + SuperLUStat_t *stat, int *info) +{ /* Local working arrays */ NCPformat *Astore; - int *iperm_r; /* inverse of perm_r; - used when options->Fact == SamePattern_SameRowPerm */ + int *iperm_r = NULL; /* inverse of perm_r; used when + options->Fact == SamePattern_SameRowPerm */ int *iperm_c; /* inverse of perm_c */ int *iwork; double *dwork; @@ -199,7 +200,8 @@ int *xsup, *supno; int *xlsub, *xlusup, *xusub; int nzlumax; - static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ + double fill_ratio = sp_ienv(6); /* estimated fill ratio */ + static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ /* Local scalars */ fact_t fact = options->Fact; @@ -230,7 +232,7 @@ /* Allocate storage common to the factor routines */ *info = dLUMemInit(fact, work, lwork, m, n, Astore->nnz, - panel_size, L, U, &Glu, &iwork, &dwork); + panel_size, fill_ratio, L, U, &Glu, &iwork, &dwork); if ( *info ) return; xsup = Glu.xsup; @@ -417,7 +419,7 @@ ((NCformat *)U->Store)->rowind = Glu.usub; ((NCformat *)U->Store)->colptr = Glu.xusub; } else { - dCreate_SuperNode_Matrix(L, A->nrow, A->ncol, nnzL, Glu.lusup, + dCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno, Glu.xsup, SLU_SC, SLU_D, SLU_TRLU); dCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, @@ -425,6 +427,7 @@ } ops[FACT] += ops[TRSV] + ops[GEMV]; + stat->expansions = --(Glu.num_expansions); if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); SUPERLU_FREE (iperm_c); Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,25 +1,27 @@ -/* +/*! @file dgstrs.c + * \brief Solves a system using LU factorization + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "dsp_defs.h" +#include "slu_ddefs.h" /* @@ -29,13 +31,9 @@ void dlsolve(int, int, double*, double*); void dmatvec(int, int, int, double*, double*, double*); - -void -dgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U, - int *perm_c, int *perm_r, SuperMatrix *B, - SuperLUStat_t *stat, int *info) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -85,8 +83,15 @@
  * info    (output) int*
  * 	   = 0: successful exit
  *	   < 0: if info = -i, the i-th argument had an illegal value
- *
+ * 
*/ + +void +dgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U, + int *perm_c, int *perm_r, SuperMatrix *B, + SuperLUStat_t *stat, int *info) +{ + #ifdef _CRAY _fcd ftcs1, ftcs2, ftcs3, ftcs4; #endif @@ -288,7 +293,7 @@ stat->ops[SOLVE] = solve_ops; - } else { /* Solve A'*X=B */ + } else { /* Solve A'*X=B or CONJ(A)*X=B */ /* Permute right hand sides to form Pc'*B. */ for (i = 0; i < nrhs; i++) { rhs_work = &Bmat[i*ldb]; @@ -297,7 +302,6 @@ } stat->ops[SOLVE] = 0; - for (k = 0; k < nrhs; ++k) { /* Multiply by inv(U'). */ @@ -307,7 +311,6 @@ sp_dtrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info); } - /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */ for (i = 0; i < nrhs; i++) { rhs_work = &Bmat[i*ldb]; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrsL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrsL.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrsL.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,27 +1,27 @@ - - -/* +/*! @file dgstrsL.c + * \brief Performs the L-solve using the LU factorization computed by DGSTRF + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * September 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "dsp_defs.h" -#include "util.h" +#include "slu_ddefs.h" +#include "slu_util.h" /* @@ -31,15 +31,13 @@ void dlsolve(int, int, double*, double*); void dmatvec(int, int, int, double*, double*, double*); - -void -dgstrsL(char *trans, SuperMatrix *L, int *perm_r, SuperMatrix *B, int *info) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
- * DGSTRSL only performs the L-solve using the LU factorization computed
+ * dgstrsL only performs the L-solve using the LU factorization computed
  * by DGSTRF.
  *
  * See supermatrix.h for the definition of 'SuperMatrix' structure.
@@ -75,8 +73,11 @@
  * info    (output) int*
  * 	   = 0: successful exit
  *	   < 0: if info = -i, the i-th argument had an illegal value
- *
+ * 
*/ +void +dgstrsL(char *trans, SuperMatrix *L, int *perm_r, SuperMatrix *B, int *info) +{ #ifdef _CRAY _fcd ftcs1, ftcs2, ftcs3, ftcs4; #endif Copied: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrsU.c (from rev 6343, trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrs.c) =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrsU.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrsU.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,224 @@ +/*! @file dgstrsU.c + * \brief Performs the U-solve using the LU factorization computed by DGSTRF + * + *
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ * 
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
+ */ + + +#include "slu_ddefs.h" + + +/* + * Function prototypes + */ +void dusolve(int, int, double*, double*); +void dlsolve(int, int, double*, double*); +void dmatvec(int, int, int, double*, double*, double*); + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ * dgstrsU only performs the U-solve using the LU factorization computed
+ * by DGSTRF.
+ *
+ * See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ * Arguments
+ * =========
+ *
+ * trans   (input) trans_t
+ *          Specifies the form of the system of equations:
+ *          = NOTRANS: A * X = B  (No transpose)
+ *          = TRANS:   A'* X = B  (Transpose)
+ *          = CONJ:    A**H * X = B  (Conjugate transpose)
+ *
+ * L       (input) SuperMatrix*
+ *         The factor L from the factorization Pr*A*Pc=L*U as computed by
+ *         dgstrf(). Use compressed row subscripts storage for supernodes,
+ *         i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
+ *
+ * U       (input) SuperMatrix*
+ *         The factor U from the factorization Pr*A*Pc=L*U as computed by
+ *         dgstrf(). Use column-wise storage scheme, i.e., U has types:
+ *         Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
+ *
+ * perm_c  (input) int*, dimension (L->ncol)
+ *	   Column permutation vector, which defines the 
+ *         permutation matrix Pc; perm_c[i] = j means column i of A is 
+ *         in position j in A*Pc.
+ *
+ * perm_r  (input) int*, dimension (L->nrow)
+ *         Row permutation vector, which defines the permutation matrix Pr; 
+ *         perm_r[i] = j means row i of A is in position j in Pr*A.
+ *
+ * B       (input/output) SuperMatrix*
+ *         B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
+ *         On entry, the right hand side matrix.
+ *         On exit, the solution matrix if info = 0;
+ *
+ * stat     (output) SuperLUStat_t*
+ *          Record the statistics on runtime and floating-point operation count.
+ *          See util.h for the definition of 'SuperLUStat_t'.
+ *
+ * info    (output) int*
+ * 	   = 0: successful exit
+ *	   < 0: if info = -i, the i-th argument had an illegal value
+ * 
+ */ +void +dgstrsU(trans_t trans, SuperMatrix *L, SuperMatrix *U, + int *perm_c, int *perm_r, SuperMatrix *B, + SuperLUStat_t *stat, int *info) +{ +#ifdef _CRAY + _fcd ftcs1, ftcs2, ftcs3, ftcs4; +#endif + int incx = 1, incy = 1; +#ifdef USE_VENDOR_BLAS + double alpha = 1.0, beta = 1.0; + double *work_col; +#endif + DNformat *Bstore; + double *Bmat; + SCformat *Lstore; + NCformat *Ustore; + double *Lval, *Uval; + int fsupc, nrow, nsupr, nsupc, luptr, istart, irow; + int i, j, k, iptr, jcol, n, ldb, nrhs; + double *rhs_work, *soln; + flops_t solve_ops; + void dprint_soln(); + + /* Test input parameters ... */ + *info = 0; + Bstore = B->Store; + ldb = Bstore->lda; + nrhs = B->ncol; + if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1; + else if ( L->nrow != L->ncol || L->nrow < 0 || + L->Stype != SLU_SC || L->Dtype != SLU_D || L->Mtype != SLU_TRLU ) + *info = -2; + else if ( U->nrow != U->ncol || U->nrow < 0 || + U->Stype != SLU_NC || U->Dtype != SLU_D || U->Mtype != SLU_TRU ) + *info = -3; + else if ( ldb < SUPERLU_MAX(0, L->nrow) || + B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE ) + *info = -6; + if ( *info ) { + i = -(*info); + xerbla_("dgstrs", &i); + return; + } + + n = L->nrow; + soln = doubleMalloc(n); + if ( !soln ) ABORT("Malloc fails for local soln[]."); + + Bmat = Bstore->nzval; + Lstore = L->Store; + Lval = Lstore->nzval; + Ustore = U->Store; + Uval = Ustore->nzval; + solve_ops = 0; + + if ( trans == NOTRANS ) { + /* + * Back solve Ux=y. + */ + for (k = Lstore->nsuper; k >= 0; k--) { + fsupc = L_FST_SUPC(k); + istart = L_SUB_START(fsupc); + nsupr = L_SUB_START(fsupc+1) - istart; + nsupc = L_FST_SUPC(k+1) - fsupc; + luptr = L_NZ_START(fsupc); + + solve_ops += nsupc * (nsupc + 1) * nrhs; + + if ( nsupc == 1 ) { + rhs_work = &Bmat[0]; + for (j = 0; j < nrhs; j++) { + rhs_work[fsupc] /= Lval[luptr]; + rhs_work += ldb; + } + } else { +#ifdef USE_VENDOR_BLAS +#ifdef _CRAY + ftcs1 = _cptofcd("L", strlen("L")); + ftcs2 = _cptofcd("U", strlen("U")); + ftcs3 = _cptofcd("N", strlen("N")); + STRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha, + &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb); +#else + dtrsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha, + &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb); +#endif +#else + for (j = 0; j < nrhs; j++) + dusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] ); +#endif + } + + for (j = 0; j < nrhs; ++j) { + rhs_work = &Bmat[j*ldb]; + for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) { + solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); + for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){ + irow = U_SUB(i); + rhs_work[irow] -= rhs_work[jcol] * Uval[i]; + } + } + } + + } /* for U-solve */ + +#ifdef DEBUG + printf("After U-solve: x=\n"); + dprint_soln(n, nrhs, Bmat); +#endif + + /* Compute the final solution X := Pc*X. */ + for (i = 0; i < nrhs; i++) { + rhs_work = &Bmat[i*ldb]; + for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]]; + for (k = 0; k < n; k++) rhs_work[k] = soln[k]; + } + + stat->ops[SOLVE] = solve_ops; + + } else { /* Solve U'x = b */ + /* Permute right hand sides to form Pc'*B. */ + for (i = 0; i < nrhs; i++) { + rhs_work = &Bmat[i*ldb]; + for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k]; + for (k = 0; k < n; k++) rhs_work[k] = soln[k]; + } + + for (k = 0; k < nrhs; ++k) { + /* Multiply by inv(U'). */ + sp_dtrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info); + } + + } + + SUPERLU_FREE(soln); +} + Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlacon.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlacon.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlacon.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,67 +1,74 @@ - -/* +/*! @file dlacon.c + * \brief Estimates the 1-norm + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
- *
+ * 
*/ #include -#include "Cnames.h" +#include "slu_Cnames.h" +/*! \brief + * + *
+ *   Purpose   
+ *   =======   
+ *
+ *   DLACON estimates the 1-norm of a square matrix A.   
+ *   Reverse communication is used for evaluating matrix-vector products. 
+ * 
+ *
+ *   Arguments   
+ *   =========   
+ *
+ *   N      (input) INT
+ *          The order of the matrix.  N >= 1.   
+ *
+ *   V      (workspace) DOUBLE PRECISION array, dimension (N)   
+ *          On the final return, V = A*W,  where  EST = norm(V)/norm(W)   
+ *          (W is not returned).   
+ *
+ *   X      (input/output) DOUBLE PRECISION array, dimension (N)   
+ *          On an intermediate return, X should be overwritten by   
+ *                A * X,   if KASE=1,   
+ *                A' * X,  if KASE=2,
+ *         and DLACON must be re-called with all the other parameters   
+ *          unchanged.   
+ *
+ *   ISGN   (workspace) INT array, dimension (N)
+ *
+ *   EST    (output) DOUBLE PRECISION   
+ *          An estimate (a lower bound) for norm(A).   
+ *
+ *   KASE   (input/output) INT
+ *          On the initial call to DLACON, KASE should be 0.   
+ *          On an intermediate return, KASE will be 1 or 2, indicating   
+ *          whether X should be overwritten by A * X  or A' * X.   
+ *          On the final return from DLACON, KASE will again be 0.   
+ *
+ *   Further Details   
+ *   ======= =======   
+ *
+ *   Contributed by Nick Higham, University of Manchester.   
+ *   Originally named CONEST, dated March 16, 1988.   
+ *
+ *   Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of 
+ *   a real or complex matrix, with applications to condition estimation", 
+ *   ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.   
+ *   ===================================================================== 
+ * 
+ */ + int dlacon_(int *n, double *v, double *x, int *isgn, double *est, int *kase) { -/* - Purpose - ======= - DLACON estimates the 1-norm of a square matrix A. - Reverse communication is used for evaluating matrix-vector products. - - Arguments - ========= - - N (input) INT - The order of the matrix. N >= 1. - - V (workspace) DOUBLE PRECISION array, dimension (N) - On the final return, V = A*W, where EST = norm(V)/norm(W) - (W is not returned). - - X (input/output) DOUBLE PRECISION array, dimension (N) - On an intermediate return, X should be overwritten by - A * X, if KASE=1, - A' * X, if KASE=2, - and DLACON must be re-called with all the other parameters - unchanged. - - ISGN (workspace) INT array, dimension (N) - - EST (output) DOUBLE PRECISION - An estimate (a lower bound) for norm(A). - - KASE (input/output) INT - On the initial call to DLACON, KASE should be 0. - On an intermediate return, KASE will be 1 or 2, indicating - whether X should be overwritten by A * X or A' * X. - On the final return from DLACON, KASE will again be 0. - - Further Details - ======= ======= - - Contributed by Nick Higham, University of Manchester. - Originally named CONEST, dated March 16, 1988. - - Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of - a real or complex matrix, with applications to condition estimation", - ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. - ===================================================================== -*/ - /* Table of constant values */ int c__1 = 1; double zero = 0.0; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlamch.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlamch.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlamch.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,17 +1,26 @@ +/*! @file dlamch.c + * \brief Determines double precision machine parameters + * + *
+ *       -- LAPACK auxiliary routine (version 2.0) --   
+ *       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
+ *       Courant Institute, Argonne National Lab, and Rice University   
+ *       October 31, 1992   
+ * 
+ */ #include +#include "slu_Cnames.h" + #define TRUE_ (1) #define FALSE_ (0) #define abs(x) ((x) >= 0 ? (x) : -(x)) #define min(a,b) ((a) <= (b) ? (a) : (b)) #define max(a,b) ((a) >= (b) ? (a) : (b)) -double dlamch_(char *cmach) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 +/*! \brief + +
     Purpose   
     =======   
 
@@ -47,8 +56,12 @@
             rmax  = overflow threshold  - (base**emax)*(1-eps)   
 
    ===================================================================== 
+
*/ +double dlamch_(char *cmach) +{ + static int first = TRUE_; /* System generated locals */ @@ -125,18 +138,11 @@ /* End of DLAMCH */ } /* dlamch_ */ - - -/* Subroutine */ int dlamc1_(int *beta, int *t, int *rnd, int - *ieee1) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose +/* Subroutine */ +/*! \brief + +
+ Purpose   
     =======   
 
     DLAMC1 determines the machine parameters given by BETA, T, RND, and   
@@ -177,7 +183,11 @@
           Comms. of the ACM, 17, 276-277.   
 
    ===================================================================== 
+
*/ +int dlamc1_(int *beta, int *t, int *rnd, int + *ieee1) +{ /* Initialized data */ static int first = TRUE_; /* System generated locals */ @@ -337,16 +347,10 @@ } /* dlamc1_ */ -/* Subroutine */ int dlamc2_(int *beta, int *t, int *rnd, - double *eps, int *emin, double *rmin, int *emax, - double *rmax) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - +/* Subroutine */ +/*! \brief + +
     Purpose   
     =======   
 
@@ -402,7 +406,13 @@
     W. Kahan of the University of California at Berkeley.   
 
    ===================================================================== 
+
*/ +int dlamc2_(int *beta, int *t, int *rnd, + double *eps, int *emin, double *rmin, int *emax, + double *rmax) +{ + /* Table of constant values */ static int c__1 = 1; @@ -638,15 +648,9 @@ } /* dlamc2_ */ - -double dlamc3_(double *a, double *b) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - +/*! \brief + +
     Purpose   
     =======   
 
@@ -663,7 +667,10 @@
             The values A and B.   
 
    ===================================================================== 
+
*/ +double dlamc3_(double *a, double *b) +{ /* >>Start of File<< System generated locals */ double ret_val; @@ -677,14 +684,10 @@ } /* dlamc3_ */ -/* Subroutine */ int dlamc4_(int *emin, double *start, int *base) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 +/* Subroutine */ +/*! \brief - +
     Purpose   
     =======   
 
@@ -706,7 +709,11 @@
             The base of the machine.   
 
    ===================================================================== 
+
*/ + +int dlamc4_(int *emin, double *start, int *base) +{ /* System generated locals */ int i__1; double d__1; @@ -765,15 +772,10 @@ } /* dlamc4_ */ -/* Subroutine */ int dlamc5_(int *beta, int *p, int *emin, - int *ieee, int *emax, double *rmax) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - +/* Subroutine */ +/*! \brief + +
     Purpose   
     =======   
 
@@ -815,7 +817,13 @@
        First compute LEXP and UEXP, two powers of 2 that bound   
        abs(EMIN). We then assume that EMAX + abs(EMIN) will sum   
        approximately to the bound that is closest to abs(EMIN).   
-       (EMAX is the exponent of the required number RMAX). */
+       (EMAX is the exponent of the required number RMAX).
+
+*/ +int dlamc5_(int *beta, int *p, int *emin, + int *ieee, int *emax, double *rmax) +{ + /* Table of constant values */ static double c_b5 = 0.; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlangs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlangs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlangs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,58 +1,65 @@ - -/* +/*! @file dlangs.c + * \brief Returns the value of the one norm + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Modified from lapack routine DLANGE 
+ * 
*/ /* * File name: dlangs.c * History: Modified from lapack routine DLANGE */ #include -#include "dsp_defs.h" -#include "util.h" +#include "slu_ddefs.h" +/*! \brief + * + *
+ * Purpose   
+ *   =======   
+ *
+ *   DLANGS returns the value of the one norm, or the Frobenius norm, or 
+ *   the infinity norm, or the element of largest absolute value of a 
+ *   real matrix A.   
+ *
+ *   Description   
+ *   ===========   
+ *
+ *   DLANGE returns the value   
+ *
+ *      DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'   
+ *               (   
+ *               ( norm1(A),         NORM = '1', 'O' or 'o'   
+ *               (   
+ *               ( normI(A),         NORM = 'I' or 'i'   
+ *               (   
+ *               ( normF(A),         NORM = 'F', 'f', 'E' or 'e'   
+ *
+ *   where  norm1  denotes the  one norm of a matrix (maximum column sum), 
+ *   normI  denotes the  infinity norm  of a matrix  (maximum row sum) and 
+ *   normF  denotes the  Frobenius norm of a matrix (square root of sum of 
+ *   squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.   
+ *
+ *   Arguments   
+ *   =========   
+ *
+ *   NORM    (input) CHARACTER*1   
+ *           Specifies the value to be returned in DLANGE as described above.   
+ *   A       (input) SuperMatrix*
+ *           The M by N sparse matrix A. 
+ *
+ *  =====================================================================
+ * 
+ */ + double dlangs(char *norm, SuperMatrix *A) { -/* - Purpose - ======= - - DLANGS returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - real matrix A. - - Description - =========== - - DLANGE returns the value - - DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in DLANGE as described above. - A (input) SuperMatrix* - The M by N sparse matrix A. - - ===================================================================== -*/ /* Local variables */ NCformat *Astore; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlaqgs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlaqgs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dlaqgs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,80 +1,89 @@ - -/* +/*! @file dlaqgs.c + * \brief Equlibrates a general sprase matrix + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
- *
+ * 
+ * Modified from LAPACK routine DLAQGE
+ * 
*/ /* * File name: dlaqgs.c * History: Modified from LAPACK routine DLAQGE */ #include -#include "dsp_defs.h" +#include "slu_ddefs.h" +/*! \brief + * + *
+ *   Purpose   
+ *   =======   
+ *
+ *   DLAQGS equilibrates a general sparse M by N matrix A using the row and   
+ *   scaling factors in the vectors R and C.   
+ *
+ *   See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ *   Arguments   
+ *   =========   
+ *
+ *   A       (input/output) SuperMatrix*
+ *           On exit, the equilibrated matrix.  See EQUED for the form of 
+ *           the equilibrated matrix. The type of A can be:
+ *	    Stype = NC; Dtype = SLU_D; Mtype = GE.
+ *	    
+ *   R       (input) double*, dimension (A->nrow)
+ *           The row scale factors for A.
+ *	    
+ *   C       (input) double*, dimension (A->ncol)
+ *           The column scale factors for A.
+ *	    
+ *   ROWCND  (input) double
+ *           Ratio of the smallest R(i) to the largest R(i).
+ *	    
+ *   COLCND  (input) double
+ *           Ratio of the smallest C(i) to the largest C(i).
+ *	    
+ *   AMAX    (input) double
+ *           Absolute value of largest matrix entry.
+ *	    
+ *   EQUED   (output) char*
+ *           Specifies the form of equilibration that was done.   
+ *           = 'N':  No equilibration   
+ *           = 'R':  Row equilibration, i.e., A has been premultiplied by  
+ *                   diag(R).   
+ *           = 'C':  Column equilibration, i.e., A has been postmultiplied  
+ *                   by diag(C).   
+ *           = 'B':  Both row and column equilibration, i.e., A has been
+ *                   replaced by diag(R) * A * diag(C).   
+ *
+ *   Internal Parameters   
+ *   ===================   
+ *
+ *   THRESH is a threshold value used to decide if row or column scaling   
+ *   should be done based on the ratio of the row or column scaling   
+ *   factors.  If ROWCND < THRESH, row scaling is done, and if   
+ *   COLCND < THRESH, column scaling is done.   
+ *
+ *   LARGE and SMALL are threshold values used to decide if row scaling   
+ *   should be done based on the absolute size of the largest matrix   
+ *   element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.   
+ *
+ *   ===================================================================== 
+ * 
+ */ + void dlaqgs(SuperMatrix *A, double *r, double *c, double rowcnd, double colcnd, double amax, char *equed) { -/* - Purpose - ======= - DLAQGS equilibrates a general sparse M by N matrix A using the row and - scaling factors in the vectors R and C. - See supermatrix.h for the definition of 'SuperMatrix' structure. - - Arguments - ========= - - A (input/output) SuperMatrix* - On exit, the equilibrated matrix. See EQUED for the form of - the equilibrated matrix. The type of A can be: - Stype = NC; Dtype = SLU_D; Mtype = GE. - - R (input) double*, dimension (A->nrow) - The row scale factors for A. - - C (input) double*, dimension (A->ncol) - The column scale factors for A. - - ROWCND (input) double - Ratio of the smallest R(i) to the largest R(i). - - COLCND (input) double - Ratio of the smallest C(i) to the largest C(i). - - AMAX (input) double - Absolute value of largest matrix entry. - - EQUED (output) char* - Specifies the form of equilibration that was done. - = 'N': No equilibration - = 'R': Row equilibration, i.e., A has been premultiplied by - diag(R). - = 'C': Column equilibration, i.e., A has been postmultiplied - by diag(C). - = 'B': Both row and column equilibration, i.e., A has been - replaced by diag(R) * A * diag(C). - - Internal Parameters - =================== - - THRESH is a threshold value used to decide if row or column scaling - should be done based on the ratio of the row or column scaling - factors. If ROWCND < THRESH, row scaling is done, and if - COLCND < THRESH, column scaling is done. - - LARGE and SMALL are threshold values used to decide if row scaling - should be done based on the absolute size of the largest matrix - element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. - - ===================================================================== -*/ - #define THRESH (0.1) /* Local variables */ Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dldperm.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dldperm.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dldperm.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,165 @@ + +/*! @file + * \brief Finds a row permutation so that the matrix has large entries on the diagonal + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
+ */ + +#include "slu_ddefs.h" + +extern void mc64id_(int_t*); +extern void mc64ad_(int_t*, int_t*, int_t*, int_t [], int_t [], double [], + int_t*, int_t [], int_t*, int_t[], int_t*, double [], + int_t [], int_t []); + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ *   DLDPERM finds a row permutation so that the matrix has large
+ *   entries on the diagonal.
+ *
+ * Arguments
+ * =========
+ *
+ * job    (input) int
+ *        Control the action. Possible values for JOB are:
+ *        = 1 : Compute a row permutation of the matrix so that the
+ *              permuted matrix has as many entries on its diagonal as
+ *              possible. The values on the diagonal are of arbitrary size.
+ *              HSL subroutine MC21A/AD is used for this.
+ *        = 2 : Compute a row permutation of the matrix so that the smallest 
+ *              value on the diagonal of the permuted matrix is maximized.
+ *        = 3 : Compute a row permutation of the matrix so that the smallest
+ *              value on the diagonal of the permuted matrix is maximized.
+ *              The algorithm differs from the one used for JOB = 2 and may
+ *              have quite a different performance.
+ *        = 4 : Compute a row permutation of the matrix so that the sum
+ *              of the diagonal entries of the permuted matrix is maximized.
+ *        = 5 : Compute a row permutation of the matrix so that the product
+ *              of the diagonal entries of the permuted matrix is maximized
+ *              and vectors to scale the matrix so that the nonzero diagonal 
+ *              entries of the permuted matrix are one in absolute value and 
+ *              all the off-diagonal entries are less than or equal to one in 
+ *              absolute value.
+ *        Restriction: 1 <= JOB <= 5.
+ *
+ * n      (input) int
+ *        The order of the matrix.
+ *
+ * nnz    (input) int
+ *        The number of nonzeros in the matrix.
+ *
+ * adjncy (input) int*, of size nnz
+ *        The adjacency structure of the matrix, which contains the row
+ *        indices of the nonzeros.
+ *
+ * colptr (input) int*, of size n+1
+ *        The pointers to the beginning of each column in ADJNCY.
+ *
+ * nzval  (input) double*, of size nnz
+ *        The nonzero values of the matrix. nzval[k] is the value of
+ *        the entry corresponding to adjncy[k].
+ *        It is not used if job = 1.
+ *
+ * perm   (output) int*, of size n
+ *        The permutation vector. perm[i] = j means row i in the
+ *        original matrix is in row j of the permuted matrix.
+ *
+ * u      (output) double*, of size n
+ *        If job = 5, the natural logarithms of the row scaling factors. 
+ *
+ * v      (output) double*, of size n
+ *        If job = 5, the natural logarithms of the column scaling factors. 
+ *        The scaled matrix B has entries b_ij = a_ij * exp(u_i + v_j).
+ * 
+ */ + +int +dldperm(int_t job, int_t n, int_t nnz, int_t colptr[], int_t adjncy[], + double nzval[], int_t *perm, double u[], double v[]) +{ + int_t i, liw, ldw, num; + int_t *iw, icntl[10], info[10]; + double *dw; + +#if ( DEBUGlevel>=1 ) + CHECK_MALLOC(0, "Enter dldperm()"); +#endif + liw = 5*n; + if ( job == 3 ) liw = 10*n + nnz; + if ( !(iw = intMalloc(liw)) ) ABORT("Malloc fails for iw[]"); + ldw = 3*n + nnz; + if ( !(dw = (double*) SUPERLU_MALLOC(ldw * sizeof(double))) ) + ABORT("Malloc fails for dw[]"); + + /* Increment one to get 1-based indexing. */ + for (i = 0; i <= n; ++i) ++colptr[i]; + for (i = 0; i < nnz; ++i) ++adjncy[i]; +#if ( DEBUGlevel>=2 ) + printf("LDPERM(): n %d, nnz %d\n", n, nnz); + slu_PrintInt10("colptr", n+1, colptr); + slu_PrintInt10("adjncy", nnz, adjncy); +#endif + + /* + * NOTE: + * ===== + * + * MC64AD assumes that column permutation vector is defined as: + * perm(i) = j means column i of permuted A is in column j of original A. + * + * Since a symmetric permutation preserves the diagonal entries. Then + * by the following relation: + * P'(A*P')P = P'A + * we can apply inverse(perm) to rows of A to get large diagonal entries. + * But, since 'perm' defined in MC64AD happens to be the reverse of + * SuperLU's definition of permutation vector, therefore, it is already + * an inverse for our purpose. We will thus use it directly. + * + */ + mc64id_(icntl); +#if 0 + /* Suppress error and warning messages. */ + icntl[0] = -1; + icntl[1] = -1; +#endif + + mc64ad_(&job, &n, &nnz, colptr, adjncy, nzval, &num, perm, + &liw, iw, &ldw, dw, icntl, info); + +#if ( DEBUGlevel>=2 ) + slu_PrintInt10("perm", n, perm); + printf(".. After MC64AD info %d\tsize of matching %d\n", info[0], num); +#endif + if ( info[0] == 1 ) { /* Structurally singular */ + printf(".. The last %d permutations:\n", n-num); + slu_PrintInt10("perm", n-num, &perm[num]); + } + + /* Restore to 0-based indexing. */ + for (i = 0; i <= n; ++i) --colptr[i]; + for (i = 0; i < nnz; ++i) --adjncy[i]; + for (i = 0; i < n; ++i) --perm[i]; + + if ( job == 5 ) + for (i = 0; i < n; ++i) { + u[i] = dw[i]; + v[i] = dw[n+i]; + } + + SUPERLU_FREE(iw); + SUPERLU_FREE(dw); + +#if ( DEBUGlevel>=1 ) + CHECK_MALLOC(0, "Exit dldperm()"); +#endif + + return info[0]; +} Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dmemory.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dmemory.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dmemory.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,54 +1,32 @@ -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 +/*! @file dmemory.c + * \brief Memory details * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
*/ -#include "dsp_defs.h" +#include "slu_ddefs.h" -/* Constants */ -#define NO_MEMTYPE 4 /* 0: lusup; - 1: ucol; - 2: lsub; - 3: usub */ -#define GluIntArray(n) (5 * (n) + 5) /* Internal prototypes */ void *dexpand (int *, MemType,int, int, GlobalLU_t *); -int dLUWorkInit (int, int, int, int **, double **, LU_space_t); +int dLUWorkInit (int, int, int, int **, double **, GlobalLU_t *); void copy_mem_double (int, void *, void *); void dStackCompress (GlobalLU_t *); -void dSetupSpace (void *, int, LU_space_t *); -void *duser_malloc (int, int); -void duser_free (int, int); +void dSetupSpace (void *, int, GlobalLU_t *); +void *duser_malloc (int, int, GlobalLU_t *); +void duser_free (int, int, GlobalLU_t *); -/* External prototypes (in memory.c - prec-indep) */ +/* External prototypes (in memory.c - prec-independent) */ extern void copy_mem_int (int, void *, void *); extern void user_bcopy (char *, char *, int); -/* Headers for 4 types of dynamatically managed memory */ -typedef struct e_node { - int size; /* length of the memory that has been used */ - void *mem; /* pointer to the new malloc'd store */ -} ExpHeader; -typedef struct { - int size; - int used; - int top1; /* grow upward, relative to &array[0] */ - int top2; /* grow downward */ - void *array; -} LU_stack_t; - -/* Variables local to this file */ -static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */ -static LU_stack_t stack; -static int no_expand; - /* Macros to manipulate stack */ -#define StackFull(x) ( x + stack.used >= stack.size ) +#define StackFull(x) ( x + Glu->stack.used >= Glu->stack.size ) #define NotDoubleAlign(addr) ( (long int)addr & 7 ) #define DoubleAlign(addr) ( ((long int)addr + 7) & ~7L ) #define TempSpace(m, w) ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \ @@ -58,66 +36,67 @@ -/* - * Setup the memory model to be used for factorization. +/*! \brief Setup the memory model to be used for factorization. + * * lwork = 0: use system malloc; * lwork > 0: use user-supplied work[] space. */ -void dSetupSpace(void *work, int lwork, LU_space_t *MemModel) +void dSetupSpace(void *work, int lwork, GlobalLU_t *Glu) { if ( lwork == 0 ) { - *MemModel = SYSTEM; /* malloc/free */ + Glu->MemModel = SYSTEM; /* malloc/free */ } else if ( lwork > 0 ) { - *MemModel = USER; /* user provided space */ - stack.used = 0; - stack.top1 = 0; - stack.top2 = (lwork/4)*4; /* must be word addressable */ - stack.size = stack.top2; - stack.array = (void *) work; + Glu->MemModel = USER; /* user provided space */ + Glu->stack.used = 0; + Glu->stack.top1 = 0; + Glu->stack.top2 = (lwork/4)*4; /* must be word addressable */ + Glu->stack.size = Glu->stack.top2; + Glu->stack.array = (void *) work; } } -void *duser_malloc(int bytes, int which_end) +void *duser_malloc(int bytes, int which_end, GlobalLU_t *Glu) { void *buf; if ( StackFull(bytes) ) return (NULL); if ( which_end == HEAD ) { - buf = (char*) stack.array + stack.top1; - stack.top1 += bytes; + buf = (char*) Glu->stack.array + Glu->stack.top1; + Glu->stack.top1 += bytes; } else { - stack.top2 -= bytes; - buf = (char*) stack.array + stack.top2; + Glu->stack.top2 -= bytes; + buf = (char*) Glu->stack.array + Glu->stack.top2; } - stack.used += bytes; + Glu->stack.used += bytes; return buf; } -void duser_free(int bytes, int which_end) +void duser_free(int bytes, int which_end, GlobalLU_t *Glu) { if ( which_end == HEAD ) { - stack.top1 -= bytes; + Glu->stack.top1 -= bytes; } else { - stack.top2 += bytes; + Glu->stack.top2 += bytes; } - stack.used -= bytes; + Glu->stack.used -= bytes; } -/* +/*! \brief + * + *
  * mem_usage consists of the following fields:
  *    - for_lu (float)
  *      The amount of space used in bytes for the L\U data structures.
  *    - total_needed (float)
  *      The amount of space needed in bytes to perform factorization.
- *    - expansions (int)
- *      Number of memory expansions during the LU factorization.
+ * 
*/ int dQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage) { @@ -132,33 +111,75 @@ dword = sizeof(double); /* For LU factors */ - mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] * - dword + Lstore->rowind_colptr[n] * iword ); - mem_usage->for_lu += (float)( (n + 1) * iword + + mem_usage->for_lu = (float)( (4.0*n + 3.0) * iword + + Lstore->nzval_colptr[n] * dword + + Lstore->rowind_colptr[n] * iword ); + mem_usage->for_lu += (float)( (n + 1.0) * iword + Ustore->colptr[n] * (dword + iword) ); /* Working storage to support factorization */ mem_usage->total_needed = mem_usage->for_lu + - (float)( (2 * panel_size + 4 + NO_MARKER) * n * iword + - (panel_size + 1) * n * dword ); + (float)( (2.0 * panel_size + 4.0 + NO_MARKER) * n * iword + + (panel_size + 1.0) * n * dword ); - mem_usage->expansions = --no_expand; - return 0; } /* dQuerySpace */ -/* - * Allocate storage for the data structures common to all factor routines. - * For those unpredictable size, make a guess as FILL * nnz(A). + +/*! \brief + * + *
+ * mem_usage consists of the following fields:
+ *    - for_lu (float)
+ *      The amount of space used in bytes for the L\U data structures.
+ *    - total_needed (float)
+ *      The amount of space needed in bytes to perform factorization.
+ * 
+ */ +int ilu_dQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage) +{ + SCformat *Lstore; + NCformat *Ustore; + register int n, panel_size = sp_ienv(1); + register float iword, dword; + + Lstore = L->Store; + Ustore = U->Store; + n = L->ncol; + iword = sizeof(int); + dword = sizeof(double); + + /* For LU factors */ + mem_usage->for_lu = (float)( (4.0f * n + 3.0f) * iword + + Lstore->nzval_colptr[n] * dword + + Lstore->rowind_colptr[n] * iword ); + mem_usage->for_lu += (float)( (n + 1.0f) * iword + + Ustore->colptr[n] * (dword + iword) ); + + /* Working storage to support factorization. + ILU needs 5*n more integers than LU */ + mem_usage->total_needed = mem_usage->for_lu + + (float)( (2.0f * panel_size + 9.0f + NO_MARKER) * n * iword + + (panel_size + 1.0f) * n * dword ); + + return 0; +} /* ilu_dQuerySpace */ + + +/*! \brief Allocate storage for the data structures common to all factor routines. + * + *
+ * For those unpredictable size, estimate as fill_ratio * nnz(A).
  * Return value:
  *     If lwork = -1, return the estimated amount of space required, plus n;
  *     otherwise, return the amount of space actually allocated when
  *     memory allocation failure occurred.
+ * 
*/ int dLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz, - int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu, - int **iwork, double **dwork) + int panel_size, double fill_ratio, SuperMatrix *L, SuperMatrix *U, + GlobalLU_t *Glu, int **iwork, double **dwork) { int info, iword, dword; SCformat *Lstore; @@ -170,32 +191,33 @@ double *ucol; int *usub, *xusub; int nzlmax, nzumax, nzlumax; - int FILL = sp_ienv(6); - Glu->n = n; - no_expand = 0; iword = sizeof(int); dword = sizeof(double); + Glu->n = n; + Glu->num_expansions = 0; - if ( !expanders ) - expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader)); - if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders"); + if ( !Glu->expanders ) + Glu->expanders = (ExpHeader*)SUPERLU_MALLOC( NO_MEMTYPE * + sizeof(ExpHeader) ); + if ( !Glu->expanders ) ABORT("SUPERLU_MALLOC fails for expanders"); if ( fact != SamePattern_SameRowPerm ) { /* Guess for L\U factors */ - nzumax = nzlumax = FILL * annz; - nzlmax = SUPERLU_MAX(1, FILL/4.) * annz; + nzumax = nzlumax = fill_ratio * annz; + nzlmax = SUPERLU_MAX(1, fill_ratio/4.) * annz; if ( lwork == -1 ) { return ( GluIntArray(n) * iword + TempSpace(m, panel_size) + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n ); } else { - dSetupSpace(work, lwork, &Glu->MemModel); + dSetupSpace(work, lwork, Glu); } -#ifdef DEBUG - printf("dLUMemInit() called: annz %d, MemModel %d\n", - annz, Glu->MemModel); +#if ( PRNTlevel >= 1 ) + printf("dLUMemInit() called: fill_ratio %ld, nzlmax %ld, nzumax %ld\n", + fill_ratio, nzlmax, nzumax); + fflush(stdout); #endif /* Integer pointers for L\U factors */ @@ -206,11 +228,11 @@ xlusup = intMalloc(n+1); xusub = intMalloc(n+1); } else { - xsup = (int *)duser_malloc((n+1) * iword, HEAD); - supno = (int *)duser_malloc((n+1) * iword, HEAD); - xlsub = (int *)duser_malloc((n+1) * iword, HEAD); - xlusup = (int *)duser_malloc((n+1) * iword, HEAD); - xusub = (int *)duser_malloc((n+1) * iword, HEAD); + xsup = (int *)duser_malloc((n+1) * iword, HEAD, Glu); + supno = (int *)duser_malloc((n+1) * iword, HEAD, Glu); + xlsub = (int *)duser_malloc((n+1) * iword, HEAD, Glu); + xlusup = (int *)duser_malloc((n+1) * iword, HEAD, Glu); + xusub = (int *)duser_malloc((n+1) * iword, HEAD, Glu); } lusup = (double *) dexpand( &nzlumax, LUSUP, 0, 0, Glu ); @@ -225,7 +247,8 @@ SUPERLU_FREE(lsub); SUPERLU_FREE(usub); } else { - duser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD); + duser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, + HEAD, Glu); } nzlumax /= 2; nzumax /= 2; @@ -234,6 +257,11 @@ printf("Not enough memory to perform factorization.\n"); return (dmemory_usage(nzlmax, nzumax, nzlumax, n) + n); } +#if ( PRNTlevel >= 1) + printf("dLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", + nzlmax, nzumax); + fflush(stdout); +#endif lusup = (double *) dexpand( &nzlumax, LUSUP, 0, 0, Glu ); ucol = (double *) dexpand( &nzumax, UCOL, 0, 0, Glu ); lsub = (int *) dexpand( &nzlmax, LSUB, 0, 0, Glu ); @@ -260,18 +288,18 @@ Glu->MemModel = SYSTEM; } else { Glu->MemModel = USER; - stack.top2 = (lwork/4)*4; /* must be word-addressable */ - stack.size = stack.top2; + Glu->stack.top2 = (lwork/4)*4; /* must be word-addressable */ + Glu->stack.size = Glu->stack.top2; } - lsub = expanders[LSUB].mem = Lstore->rowind; - lusup = expanders[LUSUP].mem = Lstore->nzval; - usub = expanders[USUB].mem = Ustore->rowind; - ucol = expanders[UCOL].mem = Ustore->nzval;; - expanders[LSUB].size = nzlmax; - expanders[LUSUP].size = nzlumax; - expanders[USUB].size = nzumax; - expanders[UCOL].size = nzumax; + lsub = Glu->expanders[LSUB].mem = Lstore->rowind; + lusup = Glu->expanders[LUSUP].mem = Lstore->nzval; + usub = Glu->expanders[USUB].mem = Ustore->rowind; + ucol = Glu->expanders[UCOL].mem = Ustore->nzval;; + Glu->expanders[LSUB].size = nzlmax; + Glu->expanders[LUSUP].size = nzlumax; + Glu->expanders[USUB].size = nzumax; + Glu->expanders[UCOL].size = nzumax; } Glu->xsup = xsup; @@ -287,20 +315,20 @@ Glu->nzumax = nzumax; Glu->nzlumax = nzlumax; - info = dLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel); + info = dLUWorkInit(m, n, panel_size, iwork, dwork, Glu); if ( info ) return ( info + dmemory_usage(nzlmax, nzumax, nzlumax, n) + n); - ++no_expand; + ++Glu->num_expansions; return 0; } /* dLUMemInit */ -/* Allocate known working storage. Returns 0 if success, otherwise +/*! \brief Allocate known working storage. Returns 0 if success, otherwise returns the number of bytes allocated so far when failure occurred. */ int dLUWorkInit(int m, int n, int panel_size, int **iworkptr, - double **dworkptr, LU_space_t MemModel) + double **dworkptr, GlobalLU_t *Glu) { int isize, dsize, extra; double *old_ptr; @@ -311,19 +339,19 @@ dsize = (m * panel_size + NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(double); - if ( MemModel == SYSTEM ) + if ( Glu->MemModel == SYSTEM ) *iworkptr = (int *) intCalloc(isize/sizeof(int)); else - *iworkptr = (int *) duser_malloc(isize, TAIL); + *iworkptr = (int *) duser_malloc(isize, TAIL, Glu); if ( ! *iworkptr ) { fprintf(stderr, "dLUWorkInit: malloc fails for local iworkptr[]\n"); return (isize + n); } - if ( MemModel == SYSTEM ) + if ( Glu->MemModel == SYSTEM ) *dworkptr = (double *) SUPERLU_MALLOC(dsize); else { - *dworkptr = (double *) duser_malloc(dsize, TAIL); + *dworkptr = (double *) duser_malloc(dsize, TAIL, Glu); if ( NotDoubleAlign(*dworkptr) ) { old_ptr = *dworkptr; *dworkptr = (double*) DoubleAlign(*dworkptr); @@ -332,8 +360,8 @@ #ifdef DEBUG printf("dLUWorkInit: not aligned, extra %d\n", extra); #endif - stack.top2 -= extra; - stack.used += extra; + Glu->stack.top2 -= extra; + Glu->stack.used += extra; } } if ( ! *dworkptr ) { @@ -345,8 +373,7 @@ } -/* - * Set up pointers for real working arrays. +/*! \brief Set up pointers for real working arrays. */ void dSetRWork(int m, int panel_size, double *dworkptr, @@ -362,8 +389,7 @@ dfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero); } -/* - * Free the working storage used by factor routines. +/*! \brief Free the working storage used by factor routines. */ void dLUWorkFree(int *iwork, double *dwork, GlobalLU_t *Glu) { @@ -371,18 +397,21 @@ SUPERLU_FREE (iwork); SUPERLU_FREE (dwork); } else { - stack.used -= (stack.size - stack.top2); - stack.top2 = stack.size; + Glu->stack.used -= (Glu->stack.size - Glu->stack.top2); + Glu->stack.top2 = Glu->stack.size; /* dStackCompress(Glu); */ } - SUPERLU_FREE (expanders); - expanders = 0; + SUPERLU_FREE (Glu->expanders); + Glu->expanders = NULL; } -/* Expand the data structures for L and U during the factorization. +/*! \brief Expand the data structures for L and U during the factorization. + * + *
  * Return value:   0 - successful return
  *               > 0 - number of bytes allocated when run out of space
+ * 
*/ int dLUMemXpand(int jcol, @@ -446,8 +475,7 @@ for (i = 0; i < howmany; i++) dnew[i] = dold[i]; } -/* - * Expand the existing storage to accommodate more fill-ins. +/*! \brief Expand the existing storage to accommodate more fill-ins. */ void *dexpand ( @@ -463,12 +491,14 @@ float alpha; void *new_mem, *old_mem; int new_len, tries, lword, extra, bytes_to_copy; + ExpHeader *expanders = Glu->expanders; /* Array of 4 types of memory */ alpha = EXPAND; - if ( no_expand == 0 || keep_prev ) /* First time allocate requested */ + if ( Glu->num_expansions == 0 || keep_prev ) { + /* First time allocate requested */ new_len = *prev_len; - else { + } else { new_len = alpha * *prev_len; } @@ -476,9 +506,8 @@ else lword = sizeof(double); if ( Glu->MemModel == SYSTEM ) { - new_mem = (void *) SUPERLU_MALLOC(new_len * lword); -/* new_mem = (void *) calloc(new_len, lword); */ - if ( no_expand != 0 ) { + new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword); + if ( Glu->num_expansions != 0 ) { tries = 0; if ( keep_prev ) { if ( !new_mem ) return (NULL); @@ -487,8 +516,7 @@ if ( ++tries > 10 ) return (NULL); alpha = Reduce(alpha); new_len = alpha * *prev_len; - new_mem = (void *) SUPERLU_MALLOC(new_len * lword); -/* new_mem = (void *) calloc(new_len, lword); */ + new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword); } } if ( type == LSUB || type == USUB ) { @@ -501,8 +529,8 @@ expanders[type].mem = (void *) new_mem; } else { /* MemModel == USER */ - if ( no_expand == 0 ) { - new_mem = duser_malloc(new_len * lword, HEAD); + if ( Glu->num_expansions == 0 ) { + new_mem = duser_malloc(new_len * lword, HEAD, Glu); if ( NotDoubleAlign(new_mem) && (type == LUSUP || type == UCOL) ) { old_mem = new_mem; @@ -511,12 +539,11 @@ #ifdef DEBUG printf("expand(): not aligned, extra %d\n", extra); #endif - stack.top1 += extra; - stack.used += extra; + Glu->stack.top1 += extra; + Glu->stack.used += extra; } expanders[type].mem = (void *) new_mem; - } - else { + } else { tries = 0; extra = (new_len - *prev_len) * lword; if ( keep_prev ) { @@ -532,7 +559,7 @@ if ( type != USUB ) { new_mem = (void*)((char*)expanders[type + 1].mem + extra); - bytes_to_copy = (char*)stack.array + stack.top1 + bytes_to_copy = (char*)Glu->stack.array + Glu->stack.top1 - (char*)expanders[type + 1].mem; user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy); @@ -548,11 +575,11 @@ Glu->ucol = expanders[UCOL].mem = (void*)((char*)expanders[UCOL].mem + extra); } - stack.top1 += extra; - stack.used += extra; + Glu->stack.top1 += extra; + Glu->stack.used += extra; if ( type == UCOL ) { - stack.top1 += extra; /* Add same amount for USUB */ - stack.used += extra; + Glu->stack.top1 += extra; /* Add same amount for USUB */ + Glu->stack.used += extra; } } /* if ... */ @@ -562,15 +589,14 @@ expanders[type].size = new_len; *prev_len = new_len; - if ( no_expand ) ++no_expand; + if ( Glu->num_expansions ) ++Glu->num_expansions; return (void *) expanders[type].mem; } /* dexpand */ -/* - * Compress the work[] array to remove fragmentation. +/*! \brief Compress the work[] array to remove fragmentation. */ void dStackCompress(GlobalLU_t *Glu) @@ -610,9 +636,9 @@ usub = ito; last = (char*)usub + xusub[ndim] * iword; - fragment = (char*) (((char*)stack.array + stack.top1) - last); - stack.used -= (long int) fragment; - stack.top1 -= (long int) fragment; + fragment = (char*) (((char*)Glu->stack.array + Glu->stack.top1) - last); + Glu->stack.used -= (long int) fragment; + Glu->stack.top1 -= (long int) fragment; Glu->ucol = ucol; Glu->lsub = lsub; @@ -626,8 +652,7 @@ } -/* - * Allocate storage for original matrix A +/*! \brief Allocate storage for original matrix A */ void dallocateA(int n, int nnz, double **a, int **asub, int **xa) @@ -641,7 +666,7 @@ double *doubleMalloc(int n) { double *buf; - buf = (double *) SUPERLU_MALLOC(n * sizeof(double)); + buf = (double *) SUPERLU_MALLOC((size_t)n * sizeof(double)); if ( !buf ) { ABORT("SUPERLU_MALLOC failed for buf in doubleMalloc()\n"); } @@ -653,7 +678,7 @@ double *buf; register int i; double zero = 0.0; - buf = (double *) SUPERLU_MALLOC(n * sizeof(double)); + buf = (double *) SUPERLU_MALLOC((size_t)n * sizeof(double)); if ( !buf ) { ABORT("SUPERLU_MALLOC failed for buf in doubleCalloc()\n"); } Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpanel_bmod.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpanel_bmod.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpanel_bmod.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,27 +1,32 @@ -/* +/*! @file dpanel_bmod.c + * \brief Performs numeric block updates + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ /* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. + */ #include #include -#include "dsp_defs.h" +#include "slu_ddefs.h" /* * Function prototypes @@ -30,6 +35,25 @@ void dmatvec(int, int, int, double *, double *, double *); extern void dcheck_tempv(); +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ *    Performs numeric block updates (sup-panel) in topological order.
+ *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
+ *    Special processing on the supernodal portion of L\U[*,j]
+ *
+ *    Before entering this routine, the original nonzeros in the panel 
+ *    were already copied into the spa[m,w].
+ *
+ *    Updated/Output parameters-
+ *    dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned 
+ *    collectively in the m-by-w vector dense[*]. 
+ * 
+ */ + void dpanel_bmod ( const int m, /* in - number of rows in the matrix */ @@ -44,23 +68,8 @@ SuperLUStat_t *stat /* output */ ) { -/* - * Purpose - * ======= - * - * Performs numeric block updates (sup-panel) in topological order. - * It features: col-col, 2cols-col, 3cols-col, and sup-col updates. - * Special processing on the supernodal portion of L\U[*,j] - * - * Before entering this routine, the original nonzeros in the panel - * were already copied into the spa[m,w]. - * - * Updated/Output parameters- - * dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned - * collectively in the m-by-w vector dense[*]. - * - */ + #ifdef USE_VENDOR_BLAS #ifdef _CRAY _fcd ftcs1 = _cptofcd("L", strlen("L")), Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpanel_dfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpanel_dfs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpanel_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,48 +1,32 @@ - -/* +/*! @file dpanel_dfs.c + * \brief Peforms a symbolic factorization on a panel of symbols + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "dsp_defs.h" -#include "util.h" -void -dpanel_dfs ( - const int m, /* in - number of rows in the matrix */ - const int w, /* in */ - const int jcol, /* in */ - SuperMatrix *A, /* in - original matrix */ - int *perm_r, /* in */ - int *nseg, /* out */ - double *dense, /* out */ - int *panel_lsub, /* out */ - int *segrep, /* out */ - int *repfnz, /* out */ - int *xprune, /* out */ - int *marker, /* out */ - int *parent, /* working array */ - int *xplore, /* working array */ - GlobalLU_t *Glu /* modified */ - ) -{ -/* +#include "slu_ddefs.h" + +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -68,8 +52,29 @@
  *   repfnz: SuperA-col --> PA-row
  *   parent: SuperA-col --> SuperA-col
  *   xplore: SuperA-col --> index to L-structure
- *
+ * 
*/ + +void +dpanel_dfs ( + const int m, /* in - number of rows in the matrix */ + const int w, /* in */ + const int jcol, /* in */ + SuperMatrix *A, /* in - original matrix */ + int *perm_r, /* in */ + int *nseg, /* out */ + double *dense, /* out */ + int *panel_lsub, /* out */ + int *segrep, /* out */ + int *repfnz, /* out */ + int *xprune, /* out */ + int *marker, /* out */ + int *parent, /* working array */ + int *xplore, /* working array */ + GlobalLU_t *Glu /* modified */ + ) +{ + NCPformat *Astore; double *a; int *asub; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpivotL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpivotL.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpivotL.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,44 +1,36 @@ -/* +/*! @file dpivotL.c + * \brief Performs numerical pivoting + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ + #include #include -#include "dsp_defs.h" +#include "slu_ddefs.h" #undef DEBUG -int -dpivotL( - const int jcol, /* in */ - const double u, /* in - diagonal pivoting threshold */ - int *usepr, /* re-use the pivot sequence given by perm_r/iperm_r */ - int *perm_r, /* may be modified */ - int *iperm_r, /* in - inverse of perm_r */ - int *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */ - int *pivrow, /* out */ - GlobalLU_t *Glu, /* modified - global LU data structures */ - SuperLUStat_t *stat /* output */ - ) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *   Performs the numerical pivoting on the current column of L,
@@ -57,8 +49,23 @@
  *
  *   Return value: 0      success;
  *                 i > 0  U(i,i) is exactly zero.
- *
+ * 
*/ + +int +dpivotL( + const int jcol, /* in */ + const double u, /* in - diagonal pivoting threshold */ + int *usepr, /* re-use the pivot sequence given by perm_r/iperm_r */ + int *perm_r, /* may be modified */ + int *iperm_r, /* in - inverse of perm_r */ + int *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */ + int *pivrow, /* out */ + GlobalLU_t *Glu, /* modified - global LU data structures */ + SuperLUStat_t *stat /* output */ + ) +{ + int fsupc; /* first column in the supernode */ int nsupc; /* no of columns in the supernode */ int nsupr; /* no of rows in the supernode */ @@ -116,8 +123,12 @@ /* Test for singularity */ if ( pivmax == 0.0 ) { +#if 1 *pivrow = lsub_ptr[pivptr]; perm_r[*pivrow] = jcol; +#else + perm_r[diagind] = jcol; +#endif *usepr = 0; return (jcol+1); } Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpivotgrowth.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpivotgrowth.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpivotgrowth.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,21 +1,20 @@ - -/* +/*! @file dpivotgrowth.c + * \brief Computes the reciprocal pivot growth factor + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
- *
+ * 
*/ #include -#include "dsp_defs.h" -#include "util.h" +#include "slu_ddefs.h" -double -dPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, - SuperMatrix *L, SuperMatrix *U) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -43,8 +42,14 @@
  *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
  *          storage scheme, i.e., U has types: Stype = NC;
  *          Dtype = SLU_D; Mtype = TRU.
- *
+ * 
*/ + +double +dPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, + SuperMatrix *L, SuperMatrix *U) +{ + NCformat *Astore; SCformat *Lstore; NCformat *Ustore; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpruneL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpruneL.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpruneL.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,28 +1,39 @@ - -/* +/*! @file dpruneL.c + * \brief Prunes the L-structure + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ *
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "dsp_defs.h" -#include "util.h" +#include "slu_ddefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *   Prunes the L-structure of supernodes whose L-structure
+ *   contains the current pivot row "pivrow"
+ * 
+ */ + void dpruneL( const int jcol, /* in */ @@ -35,13 +46,7 @@ GlobalLU_t *Glu /* modified - global LU data structures */ ) { -/* - * Purpose - * ======= - * Prunes the L-structure of supernodes whose L-structure - * contains the current pivot row "pivrow" - * - */ + double utemp; int jsupno, irep, irep1, kmin, kmax, krow, movnum; int i, ktemp, minloc, maxloc; @@ -108,8 +113,8 @@ kmax--; else if ( perm_r[lsub[kmin]] != EMPTY ) kmin++; - else { /* kmin below pivrow, and kmax above pivrow: - * interchange the two subscripts + else { /* kmin below pivrow (not yet pivoted), and kmax + * above pivrow: interchange the two subscripts */ ktemp = lsub[kmin]; lsub[kmin] = lsub[kmax]; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dreadhb.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dreadhb.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dreadhb.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,18 +1,85 @@ - -/* +/*! @file dreadhb.c + * \brief Read a matrix stored in Harwell-Boeing format + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Purpose
+ * =======
+ * 
+ * Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format 
+ * as described below.
+ * 
+ * Line 1 (A72,A8) 
+ *  	Col. 1 - 72   Title (TITLE) 
+ *	Col. 73 - 80  Key (KEY) 
+ * 
+ * Line 2 (5I14) 
+ * 	Col. 1 - 14   Total number of lines excluding header (TOTCRD) 
+ * 	Col. 15 - 28  Number of lines for pointers (PTRCRD) 
+ * 	Col. 29 - 42  Number of lines for row (or variable) indices (INDCRD) 
+ * 	Col. 43 - 56  Number of lines for numerical values (VALCRD) 
+ *	Col. 57 - 70  Number of lines for right-hand sides (RHSCRD) 
+ *                    (including starting guesses and solution vectors 
+ *		       if present) 
+ *           	      (zero indicates no right-hand side data is present) 
+ *
+ * Line 3 (A3, 11X, 4I14) 
+ *   	Col. 1 - 3    Matrix type (see below) (MXTYPE) 
+ * 	Col. 15 - 28  Number of rows (or variables) (NROW) 
+ * 	Col. 29 - 42  Number of columns (or elements) (NCOL) 
+ *	Col. 43 - 56  Number of row (or variable) indices (NNZERO) 
+ *	              (equal to number of entries for assembled matrices) 
+ * 	Col. 57 - 70  Number of elemental matrix entries (NELTVL) 
+ *	              (zero in the case of assembled matrices) 
+ * Line 4 (2A16, 2A20) 
+ * 	Col. 1 - 16   Format for pointers (PTRFMT) 
+ *	Col. 17 - 32  Format for row (or variable) indices (INDFMT) 
+ *	Col. 33 - 52  Format for numerical values of coefficient matrix (VALFMT) 
+ * 	Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) 
+ *
+ * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present 
+ *    	Col. 1 	      Right-hand side type: 
+ *	         	  F for full storage or M for same format as matrix 
+ *    	Col. 2        G if a starting vector(s) (Guess) is supplied. (RHSTYP) 
+ *    	Col. 3        X if an exact solution vector(s) is supplied. 
+ *	Col. 15 - 28  Number of right-hand sides (NRHS) 
+ *	Col. 29 - 42  Number of row indices (NRHSIX) 
+ *          	      (ignored in case of unassembled matrices) 
+ *
+ * The three character type field on line 3 describes the matrix type. 
+ * The following table lists the permitted values for each of the three 
+ * characters. As an example of the type field, RSA denotes that the matrix 
+ * is real, symmetric, and assembled. 
+ *
+ * First Character: 
+ *	R Real matrix 
+ *	C Complex matrix 
+ *	P Pattern only (no numerical values supplied) 
+ *
+ * Second Character: 
+ *	S Symmetric 
+ *	U Unsymmetric 
+ *	H Hermitian 
+ *	Z Skew symmetric 
+ *	R Rectangular 
+ *
+ * Third Character: 
+ *	A Assembled 
+ *	E Elemental matrices (unassembled) 
+ *
+ * 
*/ #include #include -#include "dsp_defs.h" +#include "slu_ddefs.h" -/* Eat up the rest of the current line */ +/*! \brief Eat up the rest of the current line */ int dDumpLine(FILE *fp) { register int c; @@ -60,7 +127,7 @@ return 0; } -int dReadVector(FILE *fp, int n, int *where, int perline, int persize) +static int ReadVector(FILE *fp, int n, int *where, int perline, int persize) { register int i, j, item; char tmp, buf[100]; @@ -108,72 +175,6 @@ dreadhb(int *nrow, int *ncol, int *nonz, double **nzval, int **rowind, int **colptr) { -/* - * Purpose - * ======= - * - * Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format - * as described below. - * - * Line 1 (A72,A8) - * Col. 1 - 72 Title (TITLE) - * Col. 73 - 80 Key (KEY) - * - * Line 2 (5I14) - * Col. 1 - 14 Total number of lines excluding header (TOTCRD) - * Col. 15 - 28 Number of lines for pointers (PTRCRD) - * Col. 29 - 42 Number of lines for row (or variable) indices (INDCRD) - * Col. 43 - 56 Number of lines for numerical values (VALCRD) - * Col. 57 - 70 Number of lines for right-hand sides (RHSCRD) - * (including starting guesses and solution vectors - * if present) - * (zero indicates no right-hand side data is present) - * - * Line 3 (A3, 11X, 4I14) - * Col. 1 - 3 Matrix type (see below) (MXTYPE) - * Col. 15 - 28 Number of rows (or variables) (NROW) - * Col. 29 - 42 Number of columns (or elements) (NCOL) - * Col. 43 - 56 Number of row (or variable) indices (NNZERO) - * (equal to number of entries for assembled matrices) - * Col. 57 - 70 Number of elemental matrix entries (NELTVL) - * (zero in the case of assembled matrices) - * Line 4 (2A16, 2A20) - * Col. 1 - 16 Format for pointers (PTRFMT) - * Col. 17 - 32 Format for row (or variable) indices (INDFMT) - * Col. 33 - 52 Format for numerical values of coefficient matrix (VALFMT) - * Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) - * - * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present - * Col. 1 Right-hand side type: - * F for full storage or M for same format as matrix - * Col. 2 G if a starting vector(s) (Guess) is supplied. (RHSTYP) - * Col. 3 X if an exact solution vector(s) is supplied. - * Col. 15 - 28 Number of right-hand sides (NRHS) - * Col. 29 - 42 Number of row indices (NRHSIX) - * (ignored in case of unassembled matrices) - * - * The three character type field on line 3 describes the matrix type. - * The following table lists the permitted values for each of the three - * characters. As an example of the type field, RSA denotes that the matrix - * is real, symmetric, and assembled. - * - * First Character: - * R Real matrix - * C Complex matrix - * P Pattern only (no numerical values supplied) - * - * Second Character: - * S Symmetric - * U Unsymmetric - * H Hermitian - * Z Skew symmetric - * R Rectangular - * - * Third Character: - * A Assembled - * E Elemental matrices (unassembled) - * - */ register int i, numer_lines = 0, rhscrd = 0; int tmp, colnum, colsize, rownum, rowsize, valnum, valsize; @@ -244,8 +245,8 @@ printf("valnum %d, valsize %d\n", valnum, valsize); #endif - dReadVector(fp, *ncol+1, *colptr, colnum, colsize); - dReadVector(fp, *nonz, *rowind, rownum, rowsize); + ReadVector(fp, *ncol+1, *colptr, colnum, colsize); + ReadVector(fp, *nonz, *rowind, rownum, rowsize); if ( numer_lines ) { dReadValues(fp, *nonz, *nzval, valnum, valsize); } Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dreadrb.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dreadrb.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dreadrb.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,237 @@ + +/*! @file dreadrb.c + * \brief Read a matrix stored in Rutherford-Boeing format + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
+ * + * Purpose + * ======= + * + * Read a DOUBLE PRECISION matrix stored in Rutherford-Boeing format + * as described below. + * + * Line 1 (A72, A8) + * Col. 1 - 72 Title (TITLE) + * Col. 73 - 80 Matrix name / identifier (MTRXID) + * + * Line 2 (I14, 3(1X, I13)) + * Col. 1 - 14 Total number of lines excluding header (TOTCRD) + * Col. 16 - 28 Number of lines for pointers (PTRCRD) + * Col. 30 - 42 Number of lines for row (or variable) indices (INDCRD) + * Col. 44 - 56 Number of lines for numerical values (VALCRD) + * + * Line 3 (A3, 11X, 4(1X, I13)) + * Col. 1 - 3 Matrix type (see below) (MXTYPE) + * Col. 15 - 28 Compressed Column: Number of rows (NROW) + * Elemental: Largest integer used to index variable (MVAR) + * Col. 30 - 42 Compressed Column: Number of columns (NCOL) + * Elemental: Number of element matrices (NELT) + * Col. 44 - 56 Compressed Column: Number of entries (NNZERO) + * Elemental: Number of variable indeces (NVARIX) + * Col. 58 - 70 Compressed Column: Unused, explicitly zero + * Elemental: Number of elemental matrix entries (NELTVL) + * + * Line 4 (2A16, A20) + * Col. 1 - 16 Fortran format for pointers (PTRFMT) + * Col. 17 - 32 Fortran format for row (or variable) indices (INDFMT) + * Col. 33 - 52 Fortran format for numerical values of coefficient matrix + * (VALFMT) + * (blank in the case of matrix patterns) + * + * The three character type field on line 3 describes the matrix type. + * The following table lists the permitted values for each of the three + * characters. As an example of the type field, RSA denotes that the matrix + * is real, symmetric, and assembled. + * + * First Character: + * R Real matrix + * C Complex matrix + * I integer matrix + * P Pattern only (no numerical values supplied) + * Q Pattern only (numerical values supplied in associated auxiliary value + * file) + * + * Second Character: + * S Symmetric + * U Unsymmetric + * H Hermitian + * Z Skew symmetric + * R Rectangular + * + * Third Character: + * A Compressed column form + * E Elemental form + * + * + */ + +#include "slu_ddefs.h" + + +/*! \brief Eat up the rest of the current line */ +static int dDumpLine(FILE *fp) +{ + register int c; + while ((c = fgetc(fp)) != '\n') ; + return 0; +} + +static int dParseIntFormat(char *buf, int *num, int *size) +{ + char *tmp; + + tmp = buf; + while (*tmp++ != '(') ; + sscanf(tmp, "%d", num); + while (*tmp != 'I' && *tmp != 'i') ++tmp; + ++tmp; + sscanf(tmp, "%d", size); + return 0; +} + +static int dParseFloatFormat(char *buf, int *num, int *size) +{ + char *tmp, *period; + + tmp = buf; + while (*tmp++ != '(') ; + *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/ + while (*tmp != 'E' && *tmp != 'e' && *tmp != 'D' && *tmp != 'd' + && *tmp != 'F' && *tmp != 'f') { + /* May find kP before nE/nD/nF, like (1P6F13.6). In this case the + num picked up refers to P, which should be skipped. */ + if (*tmp=='p' || *tmp=='P') { + ++tmp; + *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/ + } else { + ++tmp; + } + } + ++tmp; + period = tmp; + while (*period != '.' && *period != ')') ++period ; + *period = '\0'; + *size = atoi(tmp); /*sscanf(tmp, "%2d", size);*/ + + return 0; +} + +static int ReadVector(FILE *fp, int n, int *where, int perline, int persize) +{ + register int i, j, item; + char tmp, buf[100]; + + i = 0; + while (i < n) { + fgets(buf, 100, fp); /* read a line at a time */ + for (j=0; j + * -- SuperLU routine (version 4.0) -- + * Lawrence Berkeley National Laboratory. + * June 30, 2009 + * + */ + +#include "slu_ddefs.h" + + +void +dreadtriple(int *m, int *n, int *nonz, + double **nzval, int **rowind, int **colptr) +{ +/* + * Output parameters + * ================= + * (a,asub,xa): asub[*] contains the row subscripts of nonzeros + * in columns of matrix A; a[*] the numerical values; + * row i of A is given by a[k],k=xa[i],...,xa[i+1]-1. + * + */ + int j, k, jsize, nnz, nz; + double *a, *val; + int *asub, *xa, *row, *col; + int zero_base = 0; + + /* Matrix format: + * First line: #rows, #cols, #non-zero + * Triplet in the rest of lines: + * row, col, value + */ + + scanf("%d%d", n, nonz); + *m = *n; + printf("m %d, n %d, nonz %d\n", *m, *n, *nonz); + dallocateA(*n, *nonz, nzval, rowind, colptr); /* Allocate storage */ + a = *nzval; + asub = *rowind; + xa = *colptr; + + val = (double *) SUPERLU_MALLOC(*nonz * sizeof(double)); + row = (int *) SUPERLU_MALLOC(*nonz * sizeof(int)); + col = (int *) SUPERLU_MALLOC(*nonz * sizeof(int)); + + for (j = 0; j < *n; ++j) xa[j] = 0; + + /* Read into the triplet array from a file */ + for (nnz = 0, nz = 0; nnz < *nonz; ++nnz) { + scanf("%d%d%lf\n", &row[nz], &col[nz], &val[nz]); + + if ( nnz == 0 ) { /* first nonzero */ + if ( row[0] == 0 || col[0] == 0 ) { + zero_base = 1; + printf("triplet file: row/col indices are zero-based.\n"); + } else + printf("triplet file: row/col indices are one-based.\n"); + } + + if ( !zero_base ) { + /* Change to 0-based indexing. */ + --row[nz]; + --col[nz]; + } + + if (row[nz] < 0 || row[nz] >= *m || col[nz] < 0 || col[nz] >= *n + /*|| val[nz] == 0.*/) { + fprintf(stderr, "nz %d, (%d, %d) = %e out of bound, removed\n", + nz, row[nz], col[nz], val[nz]); + exit(-1); + } else { + ++xa[col[nz]]; + ++nz; + } + } + + *nonz = nz; + + /* Initialize the array of column pointers */ + k = 0; + jsize = xa[0]; + xa[0] = 0; + for (j = 1; j < *n; ++j) { + k += jsize; + jsize = xa[j]; + xa[j] = k; + } + + /* Copy the triplets into the column oriented storage */ + for (nz = 0; nz < *nonz; ++nz) { + j = col[nz]; + k = xa[j]; + asub[k] = row[nz]; + a[k] = val[nz]; + ++xa[j]; + } + + /* Reset the column pointers to the beginning of each column */ + for (j = *n; j > 0; --j) + xa[j] = xa[j-1]; + xa[0] = 0; + + SUPERLU_FREE(val); + SUPERLU_FREE(row); + SUPERLU_FREE(col); + +#ifdef CHK_INPUT + { + int i; + for (i = 0; i < *n; i++) { + printf("Col %d, xa %d\n", i, xa[i]); + for (k = xa[i]; k < xa[i+1]; k++) + printf("%d\t%16.10f\n", asub[k], a[k]); + } + } +#endif + +} + + +void dreadrhs(int m, double *b) +{ + FILE *fp, *fopen(); + int i; + /*int j;*/ + + if ( !(fp = fopen("b.dat", "r")) ) { + fprintf(stderr, "dreadrhs: file does not exist\n"); + exit(-1); + } + for (i = 0; i < m; ++i) + fscanf(fp, "%lf\n", &b[i]); + + /* readpair_(j, &b[i]);*/ + fclose(fp); +} Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsnode_bmod.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsnode_bmod.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsnode_bmod.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,29 +1,31 @@ -/* +/*! @file dsnode_bmod.c + * \brief Performs numeric block updates within the relaxed snode. + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "dsp_defs.h" +#include "slu_ddefs.h" -/* - * Performs numeric block updates within the relaxed snode. + +/*! \brief Performs numeric block updates within the relaxed snode. */ int dsnode_bmod ( Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsnode_dfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsnode_dfs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsnode_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,28 +1,46 @@ - -/* +/*! @file dsnode_dfs.c + * \brief Determines the union of row structures of columns within the relaxed node + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "dsp_defs.h" -#include "util.h" +#include "slu_ddefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *    dsnode_dfs() - Determine the union of the row structures of those 
+ *    columns within the relaxed snode.
+ *    Note: The relaxed snodes are leaves of the supernodal etree, therefore, 
+ *    the portion outside the rectangular supernode must be zero.
+ *
+ * Return value
+ * ============
+ *     0   success;
+ *    >0   number of bytes allocated when run out of memory.
+ * 
+ */ + int dsnode_dfs ( const int jcol, /* in - start of the supernode */ @@ -35,19 +53,7 @@ GlobalLU_t *Glu /* modified */ ) { -/* Purpose - * ======= - * dsnode_dfs() - Determine the union of the row structures of those - * columns within the relaxed snode. - * Note: The relaxed snodes are leaves of the supernodal etree, therefore, - * the portion outside the rectangular supernode must be zero. - * - * Return value - * ============ - * 0 success; - * >0 number of bytes allocated when run out of memory. - * - */ + register int i, k, ifrom, ito, nextl, new_next; int nsuper, krow, kmark, mem_error; int *xsup, *supno; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsp_blas2.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsp_blas2.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsp_blas2.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,17 +1,20 @@ -/* +/*! @file dsp_blas2.c + * \brief Sparse BLAS 2, using some dense BLAS 2 operations + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
- *
+ * 
*/ /* * File name: dsp_blas2.c * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations. */ -#include "dsp_defs.h" +#include "slu_ddefs.h" /* * Function prototypes @@ -20,12 +23,9 @@ void dlsolve(int, int, double*, double*); void dmatvec(int, int, int, double*, double*, double*); - -int -sp_dtrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, - SuperMatrix *U, double *x, SuperLUStat_t *stat, int *info) -{ -/* +/*! \brief Solves one of the systems of equations A*x = b, or A'*x = b + * + *
  *   Purpose
  *   =======
  *
@@ -49,7 +49,7 @@
  *             On entry, trans specifies the equations to be solved as   
  *             follows:   
  *                trans = 'N' or 'n'   A*x = b.   
- *                trans = 'T' or 't'   A'*x = b.   
+ *                trans = 'T' or 't'   A'*x = b.
  *                trans = 'C' or 'c'   A'*x = b.   
  *
  *   diag   - (input) char*
@@ -75,8 +75,12 @@
  *
  *   info    - (output) int*
  *             If *info = -i, the i-th argument had an illegal value.
- *
+ * 
*/ +int +sp_dtrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, + SuperMatrix *U, double *x, SuperLUStat_t *stat, int *info) +{ #ifdef _CRAY _fcd ftcs1 = _cptofcd("L", strlen("L")), ftcs2 = _cptofcd("N", strlen("N")), @@ -96,7 +100,8 @@ /* Test the input parameters */ *info = 0; if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1; - else if ( !lsame_(trans, "N") && !lsame_(trans, "T") ) *info = -2; + else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && + !lsame_(trans, "C")) *info = -2; else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3; else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4; else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5; @@ -298,68 +303,71 @@ +/*! \brief Performs one of the matrix-vector operations y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, + * + *
+ *   Purpose   
+ *   =======   
+ *
+ *   sp_dgemv()  performs one of the matrix-vector operations   
+ *      y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
+ *   where alpha and beta are scalars, x and y are vectors and A is a
+ *   sparse A->nrow by A->ncol matrix.   
+ *
+ *   Parameters   
+ *   ==========   
+ *
+ *   TRANS  - (input) char*
+ *            On entry, TRANS specifies the operation to be performed as   
+ *            follows:   
+ *               TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
+ *               TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
+ *               TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
+ *
+ *   ALPHA  - (input) double
+ *            On entry, ALPHA specifies the scalar alpha.   
+ *
+ *   A      - (input) SuperMatrix*
+ *            Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
+ *            Currently, the type of A can be:
+ *                Stype = NC or NCP; Dtype = SLU_D; Mtype = GE. 
+ *            In the future, more general A can be handled.
+ *
+ *   X      - (input) double*, array of DIMENSION at least   
+ *            ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
+ *            and at least   
+ *            ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
+ *            Before entry, the incremented array X must contain the   
+ *            vector x.   
+ *
+ *   INCX   - (input) int
+ *            On entry, INCX specifies the increment for the elements of   
+ *            X. INCX must not be zero.   
+ *
+ *   BETA   - (input) double
+ *            On entry, BETA specifies the scalar beta. When BETA is   
+ *            supplied as zero then Y need not be set on input.   
+ *
+ *   Y      - (output) double*,  array of DIMENSION at least   
+ *            ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
+ *            and at least   
+ *            ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
+ *            Before entry with BETA non-zero, the incremented array Y   
+ *            must contain the vector y. On exit, Y is overwritten by the 
+ *            updated vector y.
+ *	     
+ *   INCY   - (input) int
+ *            On entry, INCY specifies the increment for the elements of   
+ *            Y. INCY must not be zero.   
+ *
+ *   ==== Sparse Level 2 Blas routine.   
+ * 
+ */ int sp_dgemv(char *trans, double alpha, SuperMatrix *A, double *x, int incx, double beta, double *y, int incy) { -/* Purpose - ======= - - sp_dgemv() performs one of the matrix-vector operations - y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, - where alpha and beta are scalars, x and y are vectors and A is a - sparse A->nrow by A->ncol matrix. - - Parameters - ========== - - TRANS - (input) char* - On entry, TRANS specifies the operation to be performed as - follows: - TRANS = 'N' or 'n' y := alpha*A*x + beta*y. - TRANS = 'T' or 't' y := alpha*A'*x + beta*y. - TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. - - ALPHA - (input) double - On entry, ALPHA specifies the scalar alpha. - - A - (input) SuperMatrix* - Matrix A with a sparse format, of dimension (A->nrow, A->ncol). - Currently, the type of A can be: - Stype = NC or NCP; Dtype = SLU_D; Mtype = GE. - In the future, more general A can be handled. - - X - (input) double*, array of DIMENSION at least - ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. - Before entry, the incremented array X must contain the - vector x. - - INCX - (input) int - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - - BETA - (input) double - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then Y need not be set on input. - - Y - (output) double*, array of DIMENSION at least - ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. - Before entry with BETA non-zero, the incremented array Y - must contain the vector y. On exit, Y is overwritten by the - updated vector y. - - INCY - (input) int - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - - ==== Sparse Level 2 Blas routine. -*/ - /* Local variables */ NCformat *Astore; double *Aval; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsp_blas3.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsp_blas3.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsp_blas3.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,116 +1,122 @@ - -/* +/*! @file dsp_blas3.c + * \brief Sparse BLAS3, using some dense BLAS3 operations + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
- *
+ * 
*/ /* * File name: sp_blas3.c * Purpose: Sparse BLAS3, using some dense BLAS3 operations. */ -#include "dsp_defs.h" -#include "util.h" +#include "slu_ddefs.h" +/*! \brief + * + *
+ * Purpose   
+ *   =======   
+ * 
+ *   sp_d performs one of the matrix-matrix operations   
+ * 
+ *      C := alpha*op( A )*op( B ) + beta*C,   
+ * 
+ *   where  op( X ) is one of 
+ * 
+ *      op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
+ * 
+ *   alpha and beta are scalars, and A, B and C are matrices, with op( A ) 
+ *   an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. 
+ *   
+ * 
+ *   Parameters   
+ *   ==========   
+ * 
+ *   TRANSA - (input) char*
+ *            On entry, TRANSA specifies the form of op( A ) to be used in 
+ *            the matrix multiplication as follows:   
+ *               TRANSA = 'N' or 'n',  op( A ) = A.   
+ *               TRANSA = 'T' or 't',  op( A ) = A'.   
+ *               TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).   
+ *            Unchanged on exit.   
+ * 
+ *   TRANSB - (input) char*
+ *            On entry, TRANSB specifies the form of op( B ) to be used in 
+ *            the matrix multiplication as follows:   
+ *               TRANSB = 'N' or 'n',  op( B ) = B.   
+ *               TRANSB = 'T' or 't',  op( B ) = B'.   
+ *               TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).   
+ *            Unchanged on exit.   
+ * 
+ *   M      - (input) int   
+ *            On entry,  M  specifies  the number of rows of the matrix 
+ *	     op( A ) and of the matrix C.  M must be at least zero. 
+ *	     Unchanged on exit.   
+ * 
+ *   N      - (input) int
+ *            On entry,  N specifies the number of columns of the matrix 
+ *	     op( B ) and the number of columns of the matrix C. N must be 
+ *	     at least zero.
+ *	     Unchanged on exit.   
+ * 
+ *   K      - (input) int
+ *            On entry, K specifies the number of columns of the matrix 
+ *	     op( A ) and the number of rows of the matrix op( B ). K must 
+ *	     be at least  zero.   
+ *           Unchanged on exit.
+ *      
+ *   ALPHA  - (input) double
+ *            On entry, ALPHA specifies the scalar alpha.   
+ * 
+ *   A      - (input) SuperMatrix*
+ *            Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
+ *            Currently, the type of A can be:
+ *                Stype = NC or NCP; Dtype = SLU_D; Mtype = GE. 
+ *            In the future, more general A can be handled.
+ * 
+ *   B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is 
+ *            n when TRANSB = 'N' or 'n',  and is  k otherwise.   
+ *            Before entry with  TRANSB = 'N' or 'n',  the leading k by n 
+ *            part of the array B must contain the matrix B, otherwise 
+ *            the leading n by k part of the array B must contain the 
+ *            matrix B.   
+ *            Unchanged on exit.   
+ * 
+ *   LDB    - (input) int
+ *            On entry, LDB specifies the first dimension of B as declared 
+ *            in the calling (sub) program. LDB must be at least max( 1, n ).  
+ *            Unchanged on exit.   
+ * 
+ *   BETA   - (input) double
+ *            On entry, BETA specifies the scalar beta. When BETA is   
+ *            supplied as zero then C need not be set on input.   
+ *  
+ *   C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).   
+ *            Before entry, the leading m by n part of the array C must 
+ *            contain the matrix C,  except when beta is zero, in which 
+ *            case C need not be set on entry.   
+ *            On exit, the array C is overwritten by the m by n matrix 
+ *	     ( alpha*op( A )*B + beta*C ).   
+ *  
+ *   LDC    - (input) int
+ *            On entry, LDC specifies the first dimension of C as declared 
+ *            in the calling (sub)program. LDC must be at least max(1,m).   
+ *            Unchanged on exit.   
+ *  
+ *   ==== Sparse Level 3 Blas routine.   
+ * 
+ */ + int sp_dgemm(char *transa, char *transb, int m, int n, int k, double alpha, SuperMatrix *A, double *b, int ldb, double beta, double *c, int ldc) { -/* Purpose - ======= - - sp_d performs one of the matrix-matrix operations - - C := alpha*op( A )*op( B ) + beta*C, - - where op( X ) is one of - - op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), - - alpha and beta are scalars, and A, B and C are matrices, with op( A ) - an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. - - - Parameters - ========== - - TRANSA - (input) char* - On entry, TRANSA specifies the form of op( A ) to be used in - the matrix multiplication as follows: - TRANSA = 'N' or 'n', op( A ) = A. - TRANSA = 'T' or 't', op( A ) = A'. - TRANSA = 'C' or 'c', op( A ) = conjg( A' ). - Unchanged on exit. - - TRANSB - (input) char* - On entry, TRANSB specifies the form of op( B ) to be used in - the matrix multiplication as follows: - TRANSB = 'N' or 'n', op( B ) = B. - TRANSB = 'T' or 't', op( B ) = B'. - TRANSB = 'C' or 'c', op( B ) = conjg( B' ). - Unchanged on exit. - - M - (input) int - On entry, M specifies the number of rows of the matrix - op( A ) and of the matrix C. M must be at least zero. - Unchanged on exit. - - N - (input) int - On entry, N specifies the number of columns of the matrix - op( B ) and the number of columns of the matrix C. N must be - at least zero. - Unchanged on exit. - - K - (input) int - On entry, K specifies the number of columns of the matrix - op( A ) and the number of rows of the matrix op( B ). K must - be at least zero. - Unchanged on exit. - - ALPHA - (input) double - On entry, ALPHA specifies the scalar alpha. - - A - (input) SuperMatrix* - Matrix A with a sparse format, of dimension (A->nrow, A->ncol). - Currently, the type of A can be: - Stype = NC or NCP; Dtype = SLU_D; Mtype = GE. - In the future, more general A can be handled. - - B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is - n when TRANSB = 'N' or 'n', and is k otherwise. - Before entry with TRANSB = 'N' or 'n', the leading k by n - part of the array B must contain the matrix B, otherwise - the leading n by k part of the array B must contain the - matrix B. - Unchanged on exit. - - LDB - (input) int - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. LDB must be at least max( 1, n ). - Unchanged on exit. - - BETA - (input) double - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then C need not be set on input. - - C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). - Before entry, the leading m by n part of the array C must - contain the matrix C, except when beta is zero, in which - case C need not be set on entry. - On exit, the array C is overwritten by the m by n matrix - ( alpha*op( A )*B + beta*C ). - - LDC - (input) int - On entry, LDC specifies the first dimension of C as declared - in the calling (sub)program. LDC must be at least max(1,m). - Unchanged on exit. - - ==== Sparse Level 3 Blas routine. -*/ int incx = 1, incy = 1; int j; Deleted: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsp_defs.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsp_defs.h 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsp_defs.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,234 +0,0 @@ - -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ -#ifndef __SUPERLU_dSP_DEFS /* allow multiple inclusions */ -#define __SUPERLU_dSP_DEFS - -/* - * File name: dsp_defs.h - * Purpose: Sparse matrix types and function prototypes - * History: - */ - -#ifdef _CRAY -#include -#include -#endif - -/* Define my integer type int_t */ -typedef int int_t; /* default */ - -#include "Cnames.h" -#include "supermatrix.h" -#include "util.h" - - -/* - * Global data structures used in LU factorization - - * - * nsuper: #supernodes = nsuper + 1, numbered [0, nsuper]. - * (xsup,supno): supno[i] is the supernode no to which i belongs; - * xsup(s) points to the beginning of the s-th supernode. - * e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12) - * xsup 0 1 2 4 7 12 - * Note: dfs will be performed on supernode rep. relative to the new - * row pivoting ordering - * - * (xlsub,lsub): lsub[*] contains the compressed subscript of - * rectangular supernodes; xlsub[j] points to the starting - * location of the j-th column in lsub[*]. Note that xlsub - * is indexed by column. - * Storage: original row subscripts - * - * During the course of sparse LU factorization, we also use - * (xlsub,lsub) for the purpose of symmetric pruning. For each - * supernode {s,s+1,...,t=s+r} with first column s and last - * column t, the subscript set - * lsub[j], j=xlsub[s], .., xlsub[s+1]-1 - * is the structure of column s (i.e. structure of this supernode). - * It is used for the storage of numerical values. - * Furthermore, - * lsub[j], j=xlsub[t], .., xlsub[t+1]-1 - * is the structure of the last column t of this supernode. - * It is for the purpose of symmetric pruning. Therefore, the - * structural subscripts can be rearranged without making physical - * interchanges among the numerical values. - * - * However, if the supernode has only one column, then we - * only keep one set of subscripts. For any subscript interchange - * performed, similar interchange must be done on the numerical - * values. - * - * The last column structures (for pruning) will be removed - * after the numercial LU factorization phase. - * - * (xlusup,lusup): lusup[*] contains the numerical values of the - * rectangular supernodes; xlusup[j] points to the starting - * location of the j-th column in storage vector lusup[*] - * Note: xlusup is indexed by column. - * Each rectangular supernode is stored by column-major - * scheme, consistent with Fortran 2-dim array storage. - * - * (xusub,ucol,usub): ucol[*] stores the numerical values of - * U-columns outside the rectangular supernodes. The row - * subscript of nonzero ucol[k] is stored in usub[k]. - * xusub[i] points to the starting location of column i in ucol. - * Storage: new row subscripts; that is subscripts of PA. - */ -typedef struct { - int *xsup; /* supernode and column mapping */ - int *supno; - int *lsub; /* compressed L subscripts */ - int *xlsub; - double *lusup; /* L supernodes */ - int *xlusup; - double *ucol; /* U columns */ - int *usub; - int *xusub; - int nzlmax; /* current max size of lsub */ - int nzumax; /* " " " ucol */ - int nzlumax; /* " " " lusup */ - int n; /* number of columns in the matrix */ - LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */ -} GlobalLU_t; - -typedef struct { - float for_lu; - float total_needed; - int expansions; -} mem_usage_t; - -#ifdef __cplusplus -extern "C" { -#endif - -/* Driver routines */ -extern void -dgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *, - SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *); -extern void -dgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *, - char *, double *, double *, SuperMatrix *, SuperMatrix *, - void *, int, SuperMatrix *, SuperMatrix *, - double *, double *, double *, double *, - mem_usage_t *, SuperLUStat_t *, int *); - -/* Supernodal LU factor related */ -extern void -dCreate_CompCol_Matrix(SuperMatrix *, int, int, int, double *, - int *, int *, Stype_t, Dtype_t, Mtype_t); -extern void -dCreate_CompRow_Matrix(SuperMatrix *, int, int, int, double *, - int *, int *, Stype_t, Dtype_t, Mtype_t); -extern void -dCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *); -extern void -dCreate_Dense_Matrix(SuperMatrix *, int, int, double *, int, - Stype_t, Dtype_t, Mtype_t); -extern void -dCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, double *, - int *, int *, int *, int *, int *, - Stype_t, Dtype_t, Mtype_t); -extern void -dCopy_Dense_Matrix(int, int, double *, int, double *, int); - -extern void countnz (const int, int *, int *, int *, GlobalLU_t *); -extern void fixupL (const int, const int *, GlobalLU_t *); - -extern void dallocateA (int, int, double **, int **, int **); -extern void dgstrf (superlu_options_t*, SuperMatrix*, double, - int, int, int*, void *, int, int *, int *, - SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *); -extern int dsnode_dfs (const int, const int, const int *, const int *, - const int *, int *, int *, GlobalLU_t *); -extern int dsnode_bmod (const int, const int, const int, double *, - double *, GlobalLU_t *, SuperLUStat_t*); -extern void dpanel_dfs (const int, const int, const int, SuperMatrix *, - int *, int *, double *, int *, int *, int *, - int *, int *, int *, int *, GlobalLU_t *); -extern void dpanel_bmod (const int, const int, const int, const int, - double *, double *, int *, int *, - GlobalLU_t *, SuperLUStat_t*); -extern int dcolumn_dfs (const int, const int, int *, int *, int *, int *, - int *, int *, int *, int *, int *, GlobalLU_t *); -extern int dcolumn_bmod (const int, const int, double *, - double *, int *, int *, int, - GlobalLU_t *, SuperLUStat_t*); -extern int dcopy_to_ucol (int, int, int *, int *, int *, - double *, GlobalLU_t *); -extern int dpivotL (const int, const double, int *, int *, - int *, int *, int *, GlobalLU_t *, SuperLUStat_t*); -extern void dpruneL (const int, const int *, const int, const int, - const int *, const int *, int *, GlobalLU_t *); -extern void dreadmt (int *, int *, int *, double **, int **, int **); -extern void dGenXtrue (int, int, double *, int); -extern void dFillRHS (trans_t, int, double *, int, SuperMatrix *, - SuperMatrix *); -extern void dgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *, - SuperMatrix *, SuperLUStat_t*, int *); - - -/* Driver related */ - -extern void dgsequ (SuperMatrix *, double *, double *, double *, - double *, double *, int *); -extern void dlaqgs (SuperMatrix *, double *, double *, double, - double, double, char *); -extern void dgscon (char *, SuperMatrix *, SuperMatrix *, - double, double *, SuperLUStat_t*, int *); -extern double dPivotGrowth(int, SuperMatrix *, int *, - SuperMatrix *, SuperMatrix *); -extern void dgsrfs (trans_t, SuperMatrix *, SuperMatrix *, - SuperMatrix *, int *, int *, char *, double *, - double *, SuperMatrix *, SuperMatrix *, - double *, double *, SuperLUStat_t*, int *); - -extern int sp_dtrsv (char *, char *, char *, SuperMatrix *, - SuperMatrix *, double *, SuperLUStat_t*, int *); -extern int sp_dgemv (char *, double, SuperMatrix *, double *, - int, double, double *, int); - -extern int sp_dgemm (char *, char *, int, int, int, double, - SuperMatrix *, double *, int, double, - double *, int); - -/* Memory-related */ -extern int dLUMemInit (fact_t, void *, int, int, int, int, int, - SuperMatrix *, SuperMatrix *, - GlobalLU_t *, int **, double **); -extern void dSetRWork (int, int, double *, double **, double **); -extern void dLUWorkFree (int *, double *, GlobalLU_t *); -extern int dLUMemXpand (int, int, MemType, int *, GlobalLU_t *); - -extern double *doubleMalloc(int); -extern double *doubleCalloc(int); -extern int dmemory_usage(const int, const int, const int, const int); -extern int dQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *); - -/* Auxiliary routines */ -extern void dreadhb(int *, int *, int *, double **, int **, int **); -extern void dCompRow_to_CompCol(int, int, int, double*, int*, int*, - double **, int **, int **); -extern void dfill (double *, int, double); -extern void dinf_norm_error (int, SuperMatrix *, double *); -extern void PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *, - double, double, double *, double *, char *); - -/* Routines for debugging */ -extern void dPrint_CompCol_Matrix(char *, SuperMatrix *); -extern void dPrint_SuperNode_Matrix(char *, SuperMatrix *); -extern void dPrint_Dense_Matrix(char *, SuperMatrix *); -extern void print_lu_col(char *, int, int, int *, GlobalLU_t *); -extern void check_tempv(int, double *); - -#ifdef __cplusplus - } -#endif - -#endif /* __SUPERLU_dSP_DEFS */ - Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dutil.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dutil.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dutil.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,26 +1,29 @@ -/* - * -- SuperLU routine (version 3.0) -- +/*! @file dutil.c + * \brief Matrix utility functions + * + *
+ * -- SuperLU routine (version 3.1) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
- * October 15, 2003
+ * August 1, 2008
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ + #include -#include "dsp_defs.h" +#include "slu_ddefs.h" void dCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, @@ -64,7 +67,7 @@ Astore->rowptr = rowptr; } -/* Copy matrix A into matrix B. */ +/*! \brief Copy matrix A into matrix B. */ void dCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B) { @@ -108,12 +111,7 @@ dCopy_Dense_Matrix(int M, int N, double *X, int ldx, double *Y, int ldy) { -/* - * - * Purpose - * ======= - * - * Copies a two-dimensional matrix X to another matrix Y. +/*! \brief Copies a two-dimensional matrix X to another matrix Y. */ int i, j; @@ -150,8 +148,7 @@ } -/* - * Convert a row compressed storage into a column compressed storage. +/*! \brief Convert a row compressed storage into a column compressed storage. */ void dCompRow_to_CompCol(int m, int n, int nnz, @@ -266,23 +263,24 @@ void dPrint_Dense_Matrix(char *what, SuperMatrix *A) { - DNformat *Astore; - register int i; + DNformat *Astore = (DNformat *) A->Store; + register int i, j, lda = Astore->lda; double *dp; printf("\nDense matrix %s:\n", what); printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); - Astore = (DNformat *) A->Store; dp = (double *) Astore->nzval; - printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda); + printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda); printf("\nnzval: "); - for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i]); + for (j = 0; j < A->ncol; ++j) { + for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i + j*lda]); + printf("\n"); + } printf("\n"); fflush(stdout); } -/* - * Diagnostic print of column "jcol" in the U/L factor. +/*! \brief Diagnostic print of column "jcol" in the U/L factor. */ void dprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu) @@ -324,9 +322,7 @@ } -/* - * Check whether tempv[] == 0. This should be true before and after - * calling any numeric routines, i.e., "panel_bmod" and "column_bmod". +/*! \brief Check whether tempv[] == 0. This should be true before and after calling any numeric routines, i.e., "panel_bmod" and "column_bmod". */ void dcheck_tempv(int n, double *tempv) { @@ -352,8 +348,7 @@ } } -/* - * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's +/*! \brief Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's */ void dFillRHS(trans_t trans, int nrhs, double *x, int ldx, @@ -382,8 +377,7 @@ } -/* - * Fills a double precision array with a given value. +/*! \brief Fills a double precision array with a given value. */ void dfill(double *a, int alen, double dval) @@ -394,8 +388,7 @@ -/* - * Check the inf-norm of the error vector +/*! \brief Check the inf-norm of the error vector */ void dinf_norm_error(int nrhs, SuperMatrix *X, double *xtrue) { @@ -421,7 +414,7 @@ -/* Print performance of the code. */ +/*! \brief Print performance of the code. */ void dPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage, double rpg, double rcond, double *ferr, @@ -449,9 +442,9 @@ printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz); printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz); - printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", - mem_usage->for_lu/1e6, mem_usage->total_needed/1e6, - mem_usage->expansions); + printf("L\\U MB %.3f\ttotal MB needed %.3f\n", + mem_usage->for_lu/1e6, mem_usage->total_needed/1e6); + printf("Number of memory expansions: %d\n", stat->expansions); printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n"); printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n", Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dzsum1.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dzsum1.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dzsum1.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,12 +1,20 @@ -#include "dcomplex.h" +/*! @file dzsum1.c + * \brief Takes sum of the absolute values of a complex vector and returns a double precision result + * + *
+ *     -- LAPACK auxiliary routine (version 2.0) --   
+ *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
+ *     Courant Institute, Argonne National Lab, and Rice University   
+ *     October 31, 1992   
+ * 
+ */ -double dzsum1_(int *n, doublecomplex *cx, int *incx) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 +#include "slu_dcomplex.h" +#include "slu_Cnames.h" +/*! \brief + +
     Purpose   
     =======   
 
@@ -31,7 +39,10 @@
             The spacing between successive values of CX.  INCX > 0.   
 
     ===================================================================== 
+
*/ +double dzsum1_(int *n, doublecomplex *cx, int *incx) +{ /* Builtin functions */ double z_abs(doublecomplex *); Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/get_perm_c.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/get_perm_c.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/get_perm_c.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,11 +1,14 @@ -/* - * -- SuperLU routine (version 2.0) -- +/*! @file get_perm_c.c + * \brief Matrix permutation operations + * + *
+ * -- SuperLU routine (version 3.1) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
+ * August 1, 2008
+ * 
*/ -#include "dsp_defs.h" +#include "slu_ddefs.h" #include "colamd.h" extern int genmmd_(int *, int *, int *, int *, int *, int *, int *, @@ -22,12 +25,11 @@ ) { int Alen, *A, i, info, *p; - double *knobs; + double knobs[COLAMD_KNOBS]; + int stats[COLAMD_STATS]; Alen = colamd_recommended(nnz, m, n); - if ( !(knobs = (double *) SUPERLU_MALLOC(COLAMD_KNOBS * sizeof(double))) ) - ABORT("Malloc fails for knobs"); colamd_set_defaults(knobs); if (!(A = (int *) SUPERLU_MALLOC(Alen * sizeof(int))) ) @@ -36,29 +38,17 @@ ABORT("Malloc fails for p[]"); for (i = 0; i <= n; ++i) p[i] = colptr[i]; for (i = 0; i < nnz; ++i) A[i] = rowind[i]; - info = colamd(m, n, Alen, A, p, knobs); + info = colamd(m, n, Alen, A, p, knobs, stats); if ( info == FALSE ) ABORT("COLAMD failed"); for (i = 0; i < n; ++i) perm_c[p[i]] = i; - SUPERLU_FREE(knobs); SUPERLU_FREE(A); SUPERLU_FREE(p); } - -void -getata( - const int m, /* number of rows in matrix A. */ - const int n, /* number of columns in matrix A. */ - const int nz, /* number of nonzeros in matrix A */ - int *colptr, /* column pointer of size n+1 for matrix A. */ - int *rowind, /* row indices of size nz for matrix A. */ - int *atanz, /* out - on exit, returns the actual number of - nonzeros in matrix A'*A. */ - int **ata_colptr, /* out - size n+1 */ - int **ata_rowind /* out - size *atanz */ - ) -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -75,8 +65,20 @@
  * =========
  *     o  Do I need to withhold the *dense* rows?
  *     o  How do I know the number of nonzeros in A'*A?
- * 
+ * 
*/ +void +getata( + const int m, /* number of rows in matrix A. */ + const int n, /* number of columns in matrix A. */ + const int nz, /* number of nonzeros in matrix A */ + int *colptr, /* column pointer of size n+1 for matrix A. */ + int *rowind, /* row indices of size nz for matrix A. */ + int *atanz, /* out - on exit, returns the actual number of + nonzeros in matrix A'*A. */ + int **ata_colptr, /* out - size n+1 */ + int **ata_rowind /* out - size *atanz */ + ) { register int i, j, k, col, num_nz, ti, trow; int *marker, *b_colptr, *b_rowind; @@ -188,6 +190,18 @@ } +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ * Form the structure of A'+A. A is an n-by-n matrix in column oriented
+ * format represented by (colptr, rowind). The output A'+A is in column
+ * oriented format (symmetrically, also row oriented), represented by
+ * (b_colptr, b_rowind).
+ * 
+ */ void at_plus_a( const int n, /* number of columns in matrix A. */ @@ -200,16 +214,6 @@ int **b_rowind /* out - size *bnz */ ) { -/* - * Purpose - * ======= - * - * Form the structure of A'+A. A is an n-by-n matrix in column oriented - * format represented by (colptr, rowind). The output A'+A is in column - * oriented format (symmetrically, also row oriented), represented by - * (b_colptr, b_rowind). - * - */ register int i, j, k, col, num_nz; int *t_colptr, *t_rowind; /* a column oriented form of T = A' */ int *marker; @@ -324,9 +328,9 @@ SUPERLU_FREE(t_rowind); } -void -get_perm_c(int ispec, SuperMatrix *A, int *perm_c) -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -356,11 +360,13 @@
  *	   Column permutation vector of size A->ncol, which defines the 
  *         permutation matrix Pc; perm_c[i] = j means column i of A is 
  *         in position j in A*Pc.
- *
+ * 
*/ +void +get_perm_c(int ispec, SuperMatrix *A, int *perm_c) { NCformat *Astore = A->Store; - int m, n, bnz, *b_colptr, i; + int m, n, bnz = 0, *b_colptr, i; int delta, maxint, nofsub, *invp; int *b_rowind, *dhead, *qsize, *llist, *marker; double t, SuperLU_timer_(); @@ -372,12 +378,16 @@ switch ( ispec ) { case 0: /* Natural ordering */ for (i = 0; i < n; ++i) perm_c[i] = i; - /*printf("Use natural column ordering.\n");*/ +#if ( PRNTlevel>=1 ) + printf("Use natural column ordering.\n"); +#endif return; case 1: /* Minimum degree ordering on A'*A */ getata(m, n, Astore->nnz, Astore->colptr, Astore->rowind, &bnz, &b_colptr, &b_rowind); - /*printf("Use minimum degree ordering on A'*A.\n");*/ +#if ( PRNTlevel>=1 ) + printf("Use minimum degree ordering on A'*A.\n"); +#endif t = SuperLU_timer_() - t; /*printf("Form A'*A time = %8.3f\n", t);*/ break; @@ -385,14 +395,18 @@ if ( m != n ) ABORT("Matrix is not square"); at_plus_a(n, Astore->nnz, Astore->colptr, Astore->rowind, &bnz, &b_colptr, &b_rowind); - /*printf("Use minimum degree ordering on A'+A.\n");*/ +#if ( PRNTlevel>=1 ) + printf("Use minimum degree ordering on A'+A.\n"); +#endif t = SuperLU_timer_() - t; /*printf("Form A'+A time = %8.3f\n", t);*/ break; case 3: /* Approximate minimum degree column ordering. */ get_colamd(m, n, Astore->nnz, Astore->colptr, Astore->rowind, perm_c); - /*printf(".. Use approximate minimum degree column ordering.\n");*/ +#if ( PRNTlevel>=1 ) + printf(".. Use approximate minimum degree column ordering.\n"); +#endif return; default: ABORT("Invalid ISPEC"); @@ -420,19 +434,18 @@ for (i = 0; i <= n; ++i) ++b_colptr[i]; for (i = 0; i < bnz; ++i) ++b_rowind[i]; - genmmd_(&n, b_colptr, b_rowind, invp, perm_c, &delta, dhead, + genmmd_(&n, b_colptr, b_rowind, perm_c, invp, &delta, dhead, qsize, llist, marker, &maxint, &nofsub); /* Transform perm_c into 0-based indexing. */ for (i = 0; i < n; ++i) --perm_c[i]; - SUPERLU_FREE(b_colptr); - SUPERLU_FREE(b_rowind); SUPERLU_FREE(invp); SUPERLU_FREE(dhead); SUPERLU_FREE(qsize); SUPERLU_FREE(llist); SUPERLU_FREE(marker); + SUPERLU_FREE(b_rowind); t = SuperLU_timer_() - t; /* printf("call GENMMD time = %8.3f\n", t);*/ @@ -441,4 +454,5 @@ for (i = 0; i < n; ++i) perm_c[i] = i; } + SUPERLU_FREE(b_colptr); } Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/heap_relax_snode.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/heap_relax_snode.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/heap_relax_snode.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,25 +1,37 @@ -/* +/*! @file heap_relax_snode.c + * \brief Identify the initial relaxed supernodes + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "dsp_defs.h" +#include "slu_ddefs.h" +/*! \brief + * + *
+ * Purpose
+ * =======
+ *    relax_snode() - Identify the initial relaxed supernodes, assuming that 
+ *    the matrix has been reordered according to the postorder of the etree.
+ * 
+ */ + void heap_relax_snode ( const int n, @@ -31,13 +43,6 @@ int *relax_end /* last column in a supernode */ ) { -/* - * Purpose - * ======= - * relax_snode() - Identify the initial relaxed supernodes, assuming that - * the matrix has been reordered according to the postorder of the etree. - * - */ register int i, j, k, l, parent; register int snode_start; /* beginning of a snode */ int *et_save, *post, *inv_post, *iwork; @@ -91,7 +96,10 @@ } else { for (i = snode_start; i <= j; ++i) { l = inv_post[i]; - if ( descendants[i] == 0 ) relax_end[l] = l; + if ( descendants[i] == 0 ) { + relax_end[l] = l; + ++nsuper_et; + } } } j++; Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/html_mainpage.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/html_mainpage.h (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/html_mainpage.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,9 @@ +/*! \mainpage SuperLU Documentation + + SuperLU is a sequential library for the direct solution of large, + sparse, nonsymmetric systems of linear equations on high performance + machines. It also provides threshold-based ILU factorization + preconditioner. The library is written in C and is callable from either + C or Fortran. + + */ Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/icmax1.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/icmax1.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/icmax1.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,14 +1,20 @@ +/*! @file icmax1.c + * \brief Finds the index of the element whose real part has maximum absolute value + * + *
+ *     -- LAPACK auxiliary routine (version 2.0) --   
+ *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
+ *     Courant Institute, Argonne National Lab, and Rice University   
+ *     October 31, 1992   
+ * 
+ */ #include -#include "scomplex.h" +#include "slu_scomplex.h" +#include "slu_Cnames.h" -int icmax1_(int *n, complex *cx, int *incx) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 +/*! \brief - +
     Purpose   
     =======   
 
@@ -33,9 +39,11 @@
             The spacing between successive values of CX.  INCX >= 1.   
 
    ===================================================================== 
-  
-
-
+  
+*/ +int icmax1_(int *n, complex *cx, int *incx) +{ +/* NEXT LINE IS THE ONLY MODIFICATION. Copied: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ccolumn_dfs.c (from rev 6343, trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scolumn_dfs.c) =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ccolumn_dfs.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ccolumn_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,258 @@ + +/*! @file ilu_ccolumn_dfs.c + * \brief Performs a symbolic factorization + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+*/ + +#include "slu_cdefs.h" + + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *   ILU_CCOLUMN_DFS performs a symbolic factorization on column jcol, and
+ *   decide the supernode boundary.
+ *
+ *   This routine does not use numeric values, but only use the RHS
+ *   row indices to start the dfs.
+ *
+ *   A supernode representative is the last column of a supernode.
+ *   The nonzeros in U[*,j] are segments that end at supernodal
+ *   representatives. The routine returns a list of such supernodal
+ *   representatives in topological order of the dfs that generates them.
+ *   The location of the first nonzero in each such supernodal segment
+ *   (supernodal entry location) is also returned.
+ *
+ * Local parameters
+ * ================
+ *   nseg: no of segments in current U[*,j]
+ *   jsuper: jsuper=EMPTY if column j does not belong to the same
+ *	supernode as j-1. Otherwise, jsuper=nsuper.
+ *
+ *   marker2: A-row --> A-row/col (0/1)
+ *   repfnz: SuperA-col --> PA-row
+ *   parent: SuperA-col --> SuperA-col
+ *   xplore: SuperA-col --> index to L-structure
+ *
+ * Return value
+ * ============
+ *     0  success;
+ *   > 0  number of bytes allocated when run out of space.
+ * 
+ */ +int +ilu_ccolumn_dfs( + const int m, /* in - number of rows in the matrix */ + const int jcol, /* in */ + int *perm_r, /* in */ + int *nseg, /* modified - with new segments appended */ + int *lsub_col, /* in - defines the RHS vector to start the + dfs */ + int *segrep, /* modified - with new segments appended */ + int *repfnz, /* modified */ + int *marker, /* modified */ + int *parent, /* working array */ + int *xplore, /* working array */ + GlobalLU_t *Glu /* modified */ + ) +{ + + int jcolp1, jcolm1, jsuper, nsuper, nextl; + int k, krep, krow, kmark, kperm; + int *marker2; /* Used for small panel LU */ + int fsupc; /* First column of a snode */ + int myfnz; /* First nonz column of a U-segment */ + int chperm, chmark, chrep, kchild; + int xdfs, maxdfs, kpar, oldrep; + int jptr, jm1ptr; + int ito, ifrom; /* Used to compress row subscripts */ + int mem_error; + int *xsup, *supno, *lsub, *xlsub; + int nzlmax; + static int first = 1, maxsuper; + + xsup = Glu->xsup; + supno = Glu->supno; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + nzlmax = Glu->nzlmax; + + if ( first ) { + maxsuper = sp_ienv(3); + first = 0; + } + jcolp1 = jcol + 1; + jcolm1 = jcol - 1; + nsuper = supno[jcol]; + jsuper = nsuper; + nextl = xlsub[jcol]; + marker2 = &marker[2*m]; + + + /* For each nonzero in A[*,jcol] do dfs */ + for (k = 0; lsub_col[k] != EMPTY; k++) { + + krow = lsub_col[k]; + lsub_col[k] = EMPTY; + kmark = marker2[krow]; + + /* krow was visited before, go to the next nonzero */ + if ( kmark == jcol ) continue; + + /* For each unmarked nbr krow of jcol + * krow is in L: place it in structure of L[*,jcol] + */ + marker2[krow] = jcol; + kperm = perm_r[krow]; + + if ( kperm == EMPTY ) { + lsub[nextl++] = krow; /* krow is indexed into A */ + if ( nextl >= nzlmax ) { + if ((mem_error = cLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu))) + return (mem_error); + lsub = Glu->lsub; + } + if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing */ + } else { + /* krow is in U: if its supernode-rep krep + * has been explored, update repfnz[*] + */ + krep = xsup[supno[kperm]+1] - 1; + myfnz = repfnz[krep]; + + if ( myfnz != EMPTY ) { /* Visited before */ + if ( myfnz > kperm ) repfnz[krep] = kperm; + /* continue; */ + } + else { + /* Otherwise, perform dfs starting at krep */ + oldrep = EMPTY; + parent[krep] = oldrep; + repfnz[krep] = kperm; + xdfs = xlsub[xsup[supno[krep]]]; + maxdfs = xlsub[krep + 1]; + + do { + /* + * For each unmarked kchild of krep + */ + while ( xdfs < maxdfs ) { + + kchild = lsub[xdfs]; + xdfs++; + chmark = marker2[kchild]; + + if ( chmark != jcol ) { /* Not reached yet */ + marker2[kchild] = jcol; + chperm = perm_r[kchild]; + + /* Case kchild is in L: place it in L[*,k] */ + if ( chperm == EMPTY ) { + lsub[nextl++] = kchild; + if ( nextl >= nzlmax ) { + if ( (mem_error = cLUMemXpand(jcol,nextl, + LSUB,&nzlmax,Glu)) ) + return (mem_error); + lsub = Glu->lsub; + } + if ( chmark != jcolm1 ) jsuper = EMPTY; + } else { + /* Case kchild is in U: + * chrep = its supernode-rep. If its rep has + * been explored, update its repfnz[*] + */ + chrep = xsup[supno[chperm]+1] - 1; + myfnz = repfnz[chrep]; + if ( myfnz != EMPTY ) { /* Visited before */ + if ( myfnz > chperm ) + repfnz[chrep] = chperm; + } else { + /* Continue dfs at super-rep of kchild */ + xplore[krep] = xdfs; + oldrep = krep; + krep = chrep; /* Go deeper down G(L^t) */ + parent[krep] = oldrep; + repfnz[krep] = chperm; + xdfs = xlsub[xsup[supno[krep]]]; + maxdfs = xlsub[krep + 1]; + } /* else */ + + } /* else */ + + } /* if */ + + } /* while */ + + /* krow has no more unexplored nbrs; + * place supernode-rep krep in postorder DFS. + * backtrack dfs to its parent + */ + segrep[*nseg] = krep; + ++(*nseg); + kpar = parent[krep]; /* Pop from stack, mimic recursion */ + if ( kpar == EMPTY ) break; /* dfs done */ + krep = kpar; + xdfs = xplore[krep]; + maxdfs = xlsub[krep + 1]; + + } while ( kpar != EMPTY ); /* Until empty stack */ + + } /* else */ + + } /* else */ + + } /* for each nonzero ... */ + + /* Check to see if j belongs in the same supernode as j-1 */ + if ( jcol == 0 ) { /* Do nothing for column 0 */ + nsuper = supno[0] = 0; + } else { + fsupc = xsup[nsuper]; + jptr = xlsub[jcol]; /* Not compressed yet */ + jm1ptr = xlsub[jcolm1]; + + if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY; + + /* Always start a new supernode for a singular column */ + if ( nextl == jptr ) jsuper = EMPTY; + + /* Make sure the number of columns in a supernode doesn't + exceed threshold. */ + if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY; + + /* If jcol starts a new supernode, reclaim storage space in + * lsub from the previous supernode. Note we only store + * the subscript set of the first columns of the supernode. + */ + if ( jsuper == EMPTY ) { /* starts a new supernode */ + if ( (fsupc < jcolm1) ) { /* >= 2 columns in nsuper */ +#ifdef CHK_COMPRESS + printf(" Compress lsub[] at super %d-%d\n", fsupc, jcolm1); +#endif + ito = xlsub[fsupc+1]; + xlsub[jcolm1] = ito; + xlsub[jcol] = ito; + for (ifrom = jptr; ifrom < nextl; ++ifrom, ++ito) + lsub[ito] = lsub[ifrom]; + nextl = ito; + } + nsuper++; + supno[jcol] = nsuper; + } /* if a new supernode */ + + } /* else: jcol > 0 */ + + /* Tidy up the pointers before exit */ + xsup[nsuper+1] = jcolp1; + supno[jcolp1] = nsuper; + xlsub[jcolp1] = nextl; + + return 0; +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ccopy_to_ucol.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ccopy_to_ucol.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ccopy_to_ucol.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,202 @@ + +/*! @file ilu_ccopy_to_ucol.c + * \brief Copy a computed column of U to the compressed data structure + * and drop some small entries + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + +#include "slu_cdefs.h" + +#ifdef DEBUG +int num_drop_U; +#endif + +static complex *A; /* used in _compare_ only */ +static int _compare_(const void *a, const void *b) +{ + register int *x = (int *)a, *y = (int *)b; + register float xx = c_abs1(&A[*x]), yy = c_abs1(&A[*y]); + if (xx > yy) return -1; + else if (xx < yy) return 1; + else return 0; +} + + +int +ilu_ccopy_to_ucol( + int jcol, /* in */ + int nseg, /* in */ + int *segrep, /* in */ + int *repfnz, /* in */ + int *perm_r, /* in */ + complex *dense, /* modified - reset to zero on return */ + int drop_rule,/* in */ + milu_t milu, /* in */ + double drop_tol, /* in */ + int quota, /* maximum nonzero entries allowed */ + complex *sum, /* out - the sum of dropped entries */ + int *nnzUj, /* in - out */ + GlobalLU_t *Glu, /* modified */ + int *work /* working space with minimum size n, + * used by the second dropping rule */ + ) +{ +/* + * Gather from SPA dense[*] to global ucol[*]. + */ + int ksub, krep, ksupno; + int i, k, kfnz, segsze; + int fsupc, isub, irow; + int jsupno, nextu; + int new_next, mem_error; + int *xsup, *supno; + int *lsub, *xlsub; + complex *ucol; + int *usub, *xusub; + int nzumax; + int m; /* number of entries in the nonzero U-segments */ + register float d_max = 0.0, d_min = 1.0 / dlamch_("Safe minimum"); + register double tmp; + complex zero = {0.0, 0.0}; + + xsup = Glu->xsup; + supno = Glu->supno; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + ucol = Glu->ucol; + usub = Glu->usub; + xusub = Glu->xusub; + nzumax = Glu->nzumax; + + *sum = zero; + if (drop_rule == NODROP) { + drop_tol = -1.0, quota = Glu->n; + } + + jsupno = supno[jcol]; + nextu = xusub[jcol]; + k = nseg - 1; + for (ksub = 0; ksub < nseg; ksub++) { + krep = segrep[k--]; + ksupno = supno[krep]; + + if ( ksupno != jsupno ) { /* Should go into ucol[] */ + kfnz = repfnz[krep]; + if ( kfnz != EMPTY ) { /* Nonzero U-segment */ + + fsupc = xsup[ksupno]; + isub = xlsub[fsupc] + kfnz - fsupc; + segsze = krep - kfnz + 1; + + new_next = nextu + segsze; + while ( new_next > nzumax ) { + if ((mem_error = cLUMemXpand(jcol, nextu, UCOL, &nzumax, + Glu)) != 0) + return (mem_error); + ucol = Glu->ucol; + if ((mem_error = cLUMemXpand(jcol, nextu, USUB, &nzumax, + Glu)) != 0) + return (mem_error); + usub = Glu->usub; + lsub = Glu->lsub; + } + + for (i = 0; i < segsze; i++) { + irow = lsub[isub++]; + tmp = c_abs1(&dense[irow]); + + /* first dropping rule */ + if (quota > 0 && tmp >= drop_tol) { + if (tmp > d_max) d_max = tmp; + if (tmp < d_min) d_min = tmp; + usub[nextu] = perm_r[irow]; + ucol[nextu] = dense[irow]; + nextu++; + } else { + switch (milu) { + case SMILU_1: + case SMILU_2: + c_add(sum, sum, &dense[irow]); + break; + case SMILU_3: + /* *sum += fabs(dense[irow]);*/ + sum->r += tmp; + break; + case SILU: + default: + break; + } +#ifdef DEBUG + num_drop_U++; +#endif + } + dense[irow] = zero; + } + + } + + } + + } /* for each segment... */ + + xusub[jcol + 1] = nextu; /* Close U[*,jcol] */ + m = xusub[jcol + 1] - xusub[jcol]; + + /* second dropping rule */ + if (drop_rule & DROP_SECONDARY && m > quota) { + register double tol = d_max; + register int m0 = xusub[jcol] + m - 1; + + if (quota > 0) { + if (drop_rule & DROP_INTERP) { + d_max = 1.0 / d_max; d_min = 1.0 / d_min; + tol = 1.0 / (d_max + (d_min - d_max) * quota / m); + } else { + A = &ucol[xusub[jcol]]; + for (i = 0; i < m; i++) work[i] = i; + qsort(work, m, sizeof(int), _compare_); + tol = fabs(usub[xusub[jcol] + work[quota]]); + } + } + for (i = xusub[jcol]; i <= m0; ) { + if (c_abs1(&ucol[i]) <= tol) { + switch (milu) { + case SMILU_1: + case SMILU_2: + c_add(sum, sum, &ucol[i]); + break; + case SMILU_3: + sum->r += tmp; + break; + case SILU: + default: + break; + } + ucol[i] = ucol[m0]; + usub[i] = usub[m0]; + m0--; + m--; +#ifdef DEBUG + num_drop_U++; +#endif + xusub[jcol + 1]--; + continue; + } + i++; + } + } + + if (milu == SMILU_2) { + sum->r = c_abs1(sum); sum->i = 0.0; + } + if (milu == SMILU_3) sum->i = 0.0; + + *nnzUj += m; + + return 0; +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cdrop_row.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cdrop_row.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cdrop_row.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,321 @@ + +/*! @file ilu_cdrop_row.c + * \brief Drop small rows from L + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * <\pre>
+ */
+
+#include 
+#include 
+#include "slu_cdefs.h"
+
+extern void cswap_(int *, complex [], int *, complex [], int *);
+extern void caxpy_(int *, complex *, complex [], int *, complex [], int *);
+
+static float *A;  /* used in _compare_ only */
+static int _compare_(const void *a, const void *b)
+{
+    register int *x = (int *)a, *y = (int *)b;
+    if (A[*x] - A[*y] > 0.0) return -1;
+    else if (A[*x] - A[*y] < 0.0) return 1;
+    else return 0;
+}
+
+/*! \brief
+ * 
+ * Purpose
+ * =======
+ *    ilu_cdrop_row() - Drop some small rows from the previous 
+ *    supernode (L-part only).
+ * 
+ */ +int ilu_cdrop_row( + superlu_options_t *options, /* options */ + int first, /* index of the first column in the supernode */ + int last, /* index of the last column in the supernode */ + double drop_tol, /* dropping parameter */ + int quota, /* maximum nonzero entries allowed */ + int *nnzLj, /* in/out number of nonzeros in L(:, 1:last) */ + double *fill_tol, /* in/out - on exit, fill_tol=-num_zero_pivots, + * does not change if options->ILU_MILU != SMILU1 */ + GlobalLU_t *Glu, /* modified */ + float swork[], /* working space with minimum size last-first+1 */ + int iwork[], /* working space with minimum size m - n, + * used by the second dropping rule */ + int lastc /* if lastc == 0, there is nothing after the + * working supernode [first:last]; + * if lastc == 1, there is one more column after + * the working supernode. */ ) +{ + register int i, j, k, m1; + register int nzlc; /* number of nonzeros in column last+1 */ + register int xlusup_first, xlsub_first; + int m, n; /* m x n is the size of the supernode */ + int r = 0; /* number of dropped rows */ + register float *temp; + register complex *lusup = Glu->lusup; + register int *lsub = Glu->lsub; + register int *xlsub = Glu->xlsub; + register int *xlusup = Glu->xlusup; + register float d_max = 0.0, d_min = 1.0; + int drop_rule = options->ILU_DropRule; + milu_t milu = options->ILU_MILU; + norm_t nrm = options->ILU_Norm; + complex zero = {0.0, 0.0}; + complex one = {1.0, 0.0}; + complex none = {-1.0, 0.0}; + int inc_diag; /* inc_diag = m + 1 */ + int nzp = 0; /* number of zero pivots */ + + xlusup_first = xlusup[first]; + xlsub_first = xlsub[first]; + m = xlusup[first + 1] - xlusup_first; + n = last - first + 1; + m1 = m - 1; + inc_diag = m + 1; + nzlc = lastc ? (xlusup[last + 2] - xlusup[last + 1]) : 0; + temp = swork - n; + + /* Quick return if nothing to do. */ + if (m == 0 || m == n || drop_rule == NODROP) + { + *nnzLj += m * n; + return 0; + } + + /* basic dropping: ILU(tau) */ + for (i = n; i <= m1; ) + { + /* the average abs value of ith row */ + switch (nrm) + { + case ONE_NORM: + temp[i] = scasum_(&n, &lusup[xlusup_first + i], &m) / (double)n; + break; + case TWO_NORM: + temp[i] = scnrm2_(&n, &lusup[xlusup_first + i], &m) + / sqrt((double)n); + break; + case INF_NORM: + default: + k = icamax_(&n, &lusup[xlusup_first + i], &m) - 1; + temp[i] = c_abs1(&lusup[xlusup_first + i + m * k]); + break; + } + + /* drop small entries due to drop_tol */ + if (drop_rule & DROP_BASIC && temp[i] < drop_tol) + { + r++; + /* drop the current row and move the last undropped row here */ + if (r > 1) /* add to last row */ + { + /* accumulate the sum (for MILU) */ + switch (milu) + { + case SMILU_1: + case SMILU_2: + caxpy_(&n, &one, &lusup[xlusup_first + i], &m, + &lusup[xlusup_first + m - 1], &m); + break; + case SMILU_3: + for (j = 0; j < n; j++) + lusup[xlusup_first + (m - 1) + j * m].r += + c_abs1(&lusup[xlusup_first + i + j * m]); + break; + case SILU: + default: + break; + } + ccopy_(&n, &lusup[xlusup_first + m1], &m, + &lusup[xlusup_first + i], &m); + } /* if (r > 1) */ + else /* move to last row */ + { + cswap_(&n, &lusup[xlusup_first + m1], &m, + &lusup[xlusup_first + i], &m); + if (milu == SMILU_3) + for (j = 0; j < n; j++) { + lusup[xlusup_first + m1 + j * m].r = + c_abs1(&lusup[xlusup_first + m1 + j * m]); + lusup[xlusup_first + m1 + j * m].i = 0.0; + } + } + lsub[xlsub_first + i] = lsub[xlsub_first + m1]; + m1--; + continue; + } /* if dropping */ + else + { + if (temp[i] > d_max) d_max = temp[i]; + if (temp[i] < d_min) d_min = temp[i]; + } + i++; + } /* for */ + + /* Secondary dropping: drop more rows according to the quota. */ + quota = ceil((double)quota / (double)n); + if (drop_rule & DROP_SECONDARY && m - r > quota) + { + register double tol = d_max; + + /* Calculate the second dropping tolerance */ + if (quota > n) + { + if (drop_rule & DROP_INTERP) /* by interpolation */ + { + d_max = 1.0 / d_max; d_min = 1.0 / d_min; + tol = 1.0 / (d_max + (d_min - d_max) * quota / (m - n - r)); + } + else /* by quick sort */ + { + register int *itemp = iwork - n; + A = temp; + for (i = n; i <= m1; i++) itemp[i] = i; + qsort(iwork, m1 - n + 1, sizeof(int), _compare_); + tol = temp[iwork[quota]]; + } + } + + for (i = n; i <= m1; ) + { + if (temp[i] <= tol) + { + register int j; + r++; + /* drop the current row and move the last undropped row here */ + if (r > 1) /* add to last row */ + { + /* accumulate the sum (for MILU) */ + switch (milu) + { + case SMILU_1: + case SMILU_2: + caxpy_(&n, &one, &lusup[xlusup_first + i], &m, + &lusup[xlusup_first + m - 1], &m); + break; + case SMILU_3: + for (j = 0; j < n; j++) + lusup[xlusup_first + (m - 1) + j * m].r += + c_abs1(&lusup[xlusup_first + i + j * m]); + break; + case SILU: + default: + break; + } + ccopy_(&n, &lusup[xlusup_first + m1], &m, + &lusup[xlusup_first + i], &m); + } /* if (r > 1) */ + else /* move to last row */ + { + cswap_(&n, &lusup[xlusup_first + m1], &m, + &lusup[xlusup_first + i], &m); + if (milu == SMILU_3) + for (j = 0; j < n; j++) { + lusup[xlusup_first + m1 + j * m].r = + c_abs1(&lusup[xlusup_first + m1 + j * m]); + lusup[xlusup_first + m1 + j * m].i = 0.0; + } + } + lsub[xlsub_first + i] = lsub[xlsub_first + m1]; + m1--; + temp[i] = temp[m1]; + + continue; + } + i++; + + } /* for */ + + } /* if secondary dropping */ + + for (i = n; i < m; i++) temp[i] = 0.0; + + if (r == 0) + { + *nnzLj += m * n; + return 0; + } + + /* add dropped entries to the diagnal */ + if (milu != SILU) + { + register int j; + complex t; + for (j = 0; j < n; j++) + { + cs_mult(&t, &lusup[xlusup_first + (m - 1) + j * m], + MILU_ALPHA); + switch (milu) + { + case SMILU_1: + if ( !(c_eq(&t, &none)) ) { + c_add(&t, &t, &one); + cc_mult(&lusup[xlusup_first + j * inc_diag], + &lusup[xlusup_first + j * inc_diag], + &t); + } + else + { + cs_mult( + &lusup[xlusup_first + j * inc_diag], + &lusup[xlusup_first + j * inc_diag], + *fill_tol); +#ifdef DEBUG + printf("[1] ZERO PIVOT: FILL col %d.\n", first + j); + fflush(stdout); +#endif + nzp++; + } + break; + case SMILU_2: + cs_mult(&lusup[xlusup_first + j * inc_diag], + &lusup[xlusup_first + j * inc_diag], + 1.0 + c_abs1(&t)); + break; + case SMILU_3: + c_add(&t, &t, &one); + cc_mult(&lusup[xlusup_first + j * inc_diag], + &lusup[xlusup_first + j * inc_diag], + &t); + break; + case SILU: + default: + break; + } + } + if (nzp > 0) *fill_tol = -nzp; + } + + /* Remove dropped entries from the memory and fix the pointers. */ + m1 = m - r; + for (j = 1; j < n; j++) + { + register int tmp1, tmp2; + tmp1 = xlusup_first + j * m1; + tmp2 = xlusup_first + j * m; + for (i = 0; i < m1; i++) + lusup[i + tmp1] = lusup[i + tmp2]; + } + for (i = 0; i < nzlc; i++) + lusup[xlusup_first + i + n * m1] = lusup[xlusup_first + i + n * m]; + for (i = 0; i < nzlc; i++) + lsub[xlsub[last + 1] - r + i] = lsub[xlsub[last + 1] + i]; + for (i = first + 1; i <= last + 1; i++) + { + xlusup[i] -= r * (i - first); + xlsub[i] -= r; + } + if (lastc) + { + xlusup[last + 2] -= r * n; + xlsub[last + 2] -= r; + } + + *nnzLj += (m - r) * n; + return r; +} Copied: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpanel_dfs.c (from rev 6343, trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/cpanel_dfs.c) =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpanel_dfs.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpanel_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,248 @@ + +/*! @file ilu_cpanel_dfs.c + * \brief Peforms a symbolic factorization on a panel of symbols and + * record the entries with maximum absolute value in each column + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + +#include "slu_cdefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ *   Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
+ *
+ *   A supernode representative is the last column of a supernode.
+ *   The nonzeros in U[*,j] are segments that end at supernodal
+ *   representatives.
+ *
+ *   The routine returns one list of the supernodal representatives
+ *   in topological order of the dfs that generates them. This list is
+ *   a superset of the topological order of each individual column within
+ *   the panel.
+ *   The location of the first nonzero in each supernodal segment
+ *   (supernodal entry location) is also returned. Each column has a
+ *   separate list for this purpose.
+ *
+ *   Two marker arrays are used for dfs:
+ *     marker[i] == jj, if i was visited during dfs of current column jj;
+ *     marker1[i] >= jcol, if i was visited by earlier columns in this panel;
+ *
+ *   marker: A-row --> A-row/col (0/1)
+ *   repfnz: SuperA-col --> PA-row
+ *   parent: SuperA-col --> SuperA-col
+ *   xplore: SuperA-col --> index to L-structure
+ * 
+ */ +void +ilu_cpanel_dfs( + const int m, /* in - number of rows in the matrix */ + const int w, /* in */ + const int jcol, /* in */ + SuperMatrix *A, /* in - original matrix */ + int *perm_r, /* in */ + int *nseg, /* out */ + complex *dense, /* out */ + float *amax, /* out - max. abs. value of each column in panel */ + int *panel_lsub, /* out */ + int *segrep, /* out */ + int *repfnz, /* out */ + int *marker, /* out */ + int *parent, /* working array */ + int *xplore, /* working array */ + GlobalLU_t *Glu /* modified */ +) +{ + + NCPformat *Astore; + complex *a; + int *asub; + int *xa_begin, *xa_end; + int krep, chperm, chmark, chrep, oldrep, kchild, myfnz; + int k, krow, kmark, kperm; + int xdfs, maxdfs, kpar; + int jj; /* index through each column in the panel */ + int *marker1; /* marker1[jj] >= jcol if vertex jj was visited + by a previous column within this panel. */ + int *repfnz_col; /* start of each column in the panel */ + complex *dense_col; /* start of each column in the panel */ + int nextl_col; /* next available position in panel_lsub[*,jj] */ + int *xsup, *supno; + int *lsub, *xlsub; + float *amax_col; + register double tmp; + + /* Initialize pointers */ + Astore = A->Store; + a = Astore->nzval; + asub = Astore->rowind; + xa_begin = Astore->colbeg; + xa_end = Astore->colend; + marker1 = marker + m; + repfnz_col = repfnz; + dense_col = dense; + amax_col = amax; + *nseg = 0; + xsup = Glu->xsup; + supno = Glu->supno; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + + /* For each column in the panel */ + for (jj = jcol; jj < jcol + w; jj++) { + nextl_col = (jj - jcol) * m; + +#ifdef CHK_DFS + printf("\npanel col %d: ", jj); +#endif + + *amax_col = 0.0; + /* For each nonz in A[*,jj] do dfs */ + for (k = xa_begin[jj]; k < xa_end[jj]; k++) { + krow = asub[k]; + tmp = c_abs1(&a[k]); + if (tmp > *amax_col) *amax_col = tmp; + dense_col[krow] = a[k]; + kmark = marker[krow]; + if ( kmark == jj ) + continue; /* krow visited before, go to the next nonzero */ + + /* For each unmarked nbr krow of jj + * krow is in L: place it in structure of L[*,jj] + */ + marker[krow] = jj; + kperm = perm_r[krow]; + + if ( kperm == EMPTY ) { + panel_lsub[nextl_col++] = krow; /* krow is indexed into A */ + } + /* + * krow is in U: if its supernode-rep krep + * has been explored, update repfnz[*] + */ + else { + + krep = xsup[supno[kperm]+1] - 1; + myfnz = repfnz_col[krep]; + +#ifdef CHK_DFS + printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, kperm); +#endif + if ( myfnz != EMPTY ) { /* Representative visited before */ + if ( myfnz > kperm ) repfnz_col[krep] = kperm; + /* continue; */ + } + else { + /* Otherwise, perform dfs starting at krep */ + oldrep = EMPTY; + parent[krep] = oldrep; + repfnz_col[krep] = kperm; + xdfs = xlsub[xsup[supno[krep]]]; + maxdfs = xlsub[krep + 1]; + +#ifdef CHK_DFS + printf(" xdfs %d, maxdfs %d: ", xdfs, maxdfs); + for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]); + printf("\n"); +#endif + do { + /* + * For each unmarked kchild of krep + */ + while ( xdfs < maxdfs ) { + + kchild = lsub[xdfs]; + xdfs++; + chmark = marker[kchild]; + + if ( chmark != jj ) { /* Not reached yet */ + marker[kchild] = jj; + chperm = perm_r[kchild]; + + /* Case kchild is in L: place it in L[*,j] */ + if ( chperm == EMPTY ) { + panel_lsub[nextl_col++] = kchild; + } + /* Case kchild is in U: + * chrep = its supernode-rep. If its rep has + * been explored, update its repfnz[*] + */ + else { + + chrep = xsup[supno[chperm]+1] - 1; + myfnz = repfnz_col[chrep]; +#ifdef CHK_DFS + printf("chrep %d,myfnz %d,perm_r[%d] %d\n",chrep,myfnz,kchild,chperm); +#endif + if ( myfnz != EMPTY ) { /* Visited before */ + if ( myfnz > chperm ) + repfnz_col[chrep] = chperm; + } + else { + /* Cont. dfs at snode-rep of kchild */ + xplore[krep] = xdfs; + oldrep = krep; + krep = chrep; /* Go deeper down G(L) */ + parent[krep] = oldrep; + repfnz_col[krep] = chperm; + xdfs = xlsub[xsup[supno[krep]]]; + maxdfs = xlsub[krep + 1]; +#ifdef CHK_DFS + printf(" xdfs %d, maxdfs %d: ", xdfs, maxdfs); + for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]); + printf("\n"); +#endif + } /* else */ + + } /* else */ + + } /* if... */ + + } /* while xdfs < maxdfs */ + + /* krow has no more unexplored nbrs: + * Place snode-rep krep in postorder DFS, if this + * segment is seen for the first time. (Note that + * "repfnz[krep]" may change later.) + * Backtrack dfs to its parent. + */ + if ( marker1[krep] < jcol ) { + segrep[*nseg] = krep; + ++(*nseg); + marker1[krep] = jj; + } + + kpar = parent[krep]; /* Pop stack, mimic recursion */ + if ( kpar == EMPTY ) break; /* dfs done */ + krep = kpar; + xdfs = xplore[krep]; + maxdfs = xlsub[krep + 1]; + +#ifdef CHK_DFS + printf(" pop stack: krep %d,xdfs %d,maxdfs %d: ", krep,xdfs,maxdfs); + for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]); + printf("\n"); +#endif + } while ( kpar != EMPTY ); /* do-while - until empty stack */ + + } /* else */ + + } /* else */ + + } /* for each nonz in A[*,jj] */ + + repfnz_col += m; /* Move to next column */ + dense_col += m; + amax_col++; + + } /* for jj ... */ + +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpivotL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpivotL.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpivotL.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,274 @@ + +/*! @file ilu_cpivotL.c + * \brief Performs numerical pivoting + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + + +#include +#include +#include "slu_cdefs.h" + +#ifndef SGN +#define SGN(x) ((x)>=0?1:-1) +#endif + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *   Performs the numerical pivoting on the current column of L,
+ *   and the CDIV operation.
+ *
+ *   Pivot policy:
+ *   (1) Compute thresh = u * max_(i>=j) abs(A_ij);
+ *   (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
+ *	     pivot row = k;
+ *	 ELSE IF abs(A_jj) >= thresh THEN
+ *	     pivot row = j;
+ *	 ELSE
+ *	     pivot row = m;
+ *
+ *   Note: If you absolutely want to use a given pivot order, then set u=0.0.
+ *
+ *   Return value: 0	  success;
+ *		   i > 0  U(i,i) is exactly zero.
+ * 
+ */ + +int +ilu_cpivotL( + const int jcol, /* in */ + const double u, /* in - diagonal pivoting threshold */ + int *usepr, /* re-use the pivot sequence given by + * perm_r/iperm_r */ + int *perm_r, /* may be modified */ + int diagind, /* diagonal of Pc*A*Pc' */ + int *swap, /* in/out record the row permutation */ + int *iswap, /* in/out inverse of swap, it is the same as + perm_r after the factorization */ + int *marker, /* in */ + int *pivrow, /* in/out, as an input if *usepr!=0 */ + double fill_tol, /* in - fill tolerance of current column + * used for a singular column */ + milu_t milu, /* in */ + complex drop_sum, /* in - computed in ilu_ccopy_to_ucol() + (MILU only) */ + GlobalLU_t *Glu, /* modified - global LU data structures */ + SuperLUStat_t *stat /* output */ + ) +{ + + int n; /* number of columns */ + int fsupc; /* first column in the supernode */ + int nsupc; /* no of columns in the supernode */ + int nsupr; /* no of rows in the supernode */ + int lptr; /* points to the starting subscript of the supernode */ + register int pivptr; + int old_pivptr, diag, ptr0; + register float pivmax, rtemp; + float thresh; + complex temp; + complex *lu_sup_ptr; + complex *lu_col_ptr; + int *lsub_ptr; + register int isub, icol, k, itemp; + int *lsub, *xlsub; + complex *lusup; + int *xlusup; + flops_t *ops = stat->ops; + int info; + complex one = {1.0, 0.0}; + + /* Initialize pointers */ + n = Glu->n; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + lusup = Glu->lusup; + xlusup = Glu->xlusup; + fsupc = (Glu->xsup)[(Glu->supno)[jcol]]; + nsupc = jcol - fsupc; /* excluding jcol; nsupc >= 0 */ + lptr = xlsub[fsupc]; + nsupr = xlsub[fsupc+1] - lptr; + lu_sup_ptr = &lusup[xlusup[fsupc]]; /* start of the current supernode */ + lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */ + lsub_ptr = &lsub[lptr]; /* start of row indices of the supernode */ + + /* Determine the largest abs numerical value for partial pivoting; + Also search for user-specified pivot, and diagonal element. */ + pivmax = -1.0; + pivptr = nsupc; + diag = EMPTY; + old_pivptr = nsupc; + ptr0 = EMPTY; + for (isub = nsupc; isub < nsupr; ++isub) { + if (marker[lsub_ptr[isub]] > jcol) + continue; /* do not overlap with a later relaxed supernode */ + + switch (milu) { + case SMILU_1: + c_add(&temp, &lu_col_ptr[isub], &drop_sum); + rtemp = c_abs1(&temp); + break; + case SMILU_2: + case SMILU_3: + /* In this case, drop_sum contains the sum of the abs. value */ + rtemp = c_abs1(&lu_col_ptr[isub]); + break; + case SILU: + default: + rtemp = c_abs1(&lu_col_ptr[isub]); + break; + } + if (rtemp > pivmax) { pivmax = rtemp; pivptr = isub; } + if (*usepr && lsub_ptr[isub] == *pivrow) old_pivptr = isub; + if (lsub_ptr[isub] == diagind) diag = isub; + if (ptr0 == EMPTY) ptr0 = isub; + } + + if (milu == SMILU_2 || milu == SMILU_3) pivmax += drop_sum.r; + + /* Test for singularity */ + if (pivmax < 0.0) { + fprintf(stderr, "[0]: jcol=%d, SINGULAR!!!\n", jcol); + fflush(stderr); + exit(1); + } + if ( pivmax == 0.0 ) { + if (diag != EMPTY) + *pivrow = lsub_ptr[pivptr = diag]; + else if (ptr0 != EMPTY) + *pivrow = lsub_ptr[pivptr = ptr0]; + else { + /* look for the first row which does not + belong to any later supernodes */ + for (icol = jcol; icol < n; icol++) + if (marker[swap[icol]] <= jcol) break; + if (icol >= n) { + fprintf(stderr, "[1]: jcol=%d, SINGULAR!!!\n", jcol); + fflush(stderr); + exit(1); + } + + *pivrow = swap[icol]; + + /* pick up the pivot row */ + for (isub = nsupc; isub < nsupr; ++isub) + if ( lsub_ptr[isub] == *pivrow ) { pivptr = isub; break; } + } + pivmax = fill_tol; + lu_col_ptr[pivptr].r = pivmax; + lu_col_ptr[pivptr].i = 0.0; + *usepr = 0; +#ifdef DEBUG + printf("[0] ZERO PIVOT: FILL (%d, %d).\n", *pivrow, jcol); + fflush(stdout); +#endif + info =jcol + 1; + } /* if (*pivrow == 0.0) */ + else { + thresh = u * pivmax; + + /* Choose appropriate pivotal element by our policy. */ + if ( *usepr ) { + switch (milu) { + case SMILU_1: + c_add(&temp, &lu_col_ptr[old_pivptr], &drop_sum); + rtemp = c_abs1(&temp); + break; + case SMILU_2: + case SMILU_3: + rtemp = c_abs1(&lu_col_ptr[old_pivptr]) + drop_sum.r; + break; + case SILU: + default: + rtemp = c_abs1(&lu_col_ptr[old_pivptr]); + break; + } + if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = old_pivptr; + else *usepr = 0; + } + if ( *usepr == 0 ) { + /* Use diagonal pivot? */ + if ( diag >= 0 ) { /* diagonal exists */ + switch (milu) { + case SMILU_1: + c_add(&temp, &lu_col_ptr[diag], &drop_sum); + rtemp = c_abs1(&temp); + break; + case SMILU_2: + case SMILU_3: + rtemp = c_abs1(&lu_col_ptr[diag]) + drop_sum.r; + break; + case SILU: + default: + rtemp = c_abs1(&lu_col_ptr[diag]); + break; + } + if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag; + } + *pivrow = lsub_ptr[pivptr]; + } + info = 0; + + /* Reset the diagonal */ + switch (milu) { + case SMILU_1: + c_add(&lu_col_ptr[pivptr], &lu_col_ptr[pivptr], &drop_sum); + break; + case SMILU_2: + case SMILU_3: + temp = c_sgn(&lu_col_ptr[pivptr]); + cc_mult(&temp, &temp, &drop_sum); + c_add(&lu_col_ptr[pivptr], &lu_col_ptr[pivptr], &drop_sum); + break; + case SILU: + default: + break; + } + + } /* else */ + + /* Record pivot row */ + perm_r[*pivrow] = jcol; + if (jcol < n - 1) { + register int t1, t2, t; + t1 = iswap[*pivrow]; t2 = jcol; + if (t1 != t2) { + t = swap[t1]; swap[t1] = swap[t2]; swap[t2] = t; + t1 = swap[t1]; t2 = t; + t = iswap[t1]; iswap[t1] = iswap[t2]; iswap[t2] = t; + } + } /* if (jcol < n - 1) */ + + /* Interchange row subscripts */ + if ( pivptr != nsupc ) { + itemp = lsub_ptr[pivptr]; + lsub_ptr[pivptr] = lsub_ptr[nsupc]; + lsub_ptr[nsupc] = itemp; + + /* Interchange numerical values as well, for the whole snode, such + * that L is indexed the same way as A. + */ + for (icol = 0; icol <= nsupc; icol++) { + itemp = pivptr + icol * nsupr; + temp = lu_sup_ptr[itemp]; + lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr]; + lu_sup_ptr[nsupc + icol*nsupr] = temp; + } + } /* if */ + + /* cdiv operation */ + ops[FACT] += 10 * (nsupr - nsupc); + c_div(&temp, &one, &lu_col_ptr[nsupc]); + for (k = nsupc+1; k < nsupr; k++) + cc_mult(&lu_col_ptr[k], &lu_col_ptr[k], &temp); + + return info; +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_csnode_dfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_csnode_dfs.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_csnode_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,90 @@ + +/*! @file ilu_csnode_dfs.c + * \brief Determines the union of row structures of columns within the relaxed node + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + +#include "slu_cdefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *    ilu_csnode_dfs() - Determine the union of the row structures of those
+ *    columns within the relaxed snode.
+ *    Note: The relaxed snodes are leaves of the supernodal etree, therefore,
+ *    the portion outside the rectangular supernode must be zero.
+ *
+ * Return value
+ * ============
+ *     0   success;
+ *    >0   number of bytes allocated when run out of memory.
+ * 
+ */ + +int +ilu_csnode_dfs( + const int jcol, /* in - start of the supernode */ + const int kcol, /* in - end of the supernode */ + const int *asub, /* in */ + const int *xa_begin, /* in */ + const int *xa_end, /* in */ + int *marker, /* modified */ + GlobalLU_t *Glu /* modified */ + ) +{ + + register int i, k, nextl; + int nsuper, krow, kmark, mem_error; + int *xsup, *supno; + int *lsub, *xlsub; + int nzlmax; + + xsup = Glu->xsup; + supno = Glu->supno; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + nzlmax = Glu->nzlmax; + + nsuper = ++supno[jcol]; /* Next available supernode number */ + nextl = xlsub[jcol]; + + for (i = jcol; i <= kcol; i++) + { + /* For each nonzero in A[*,i] */ + for (k = xa_begin[i]; k < xa_end[i]; k++) + { + krow = asub[k]; + kmark = marker[krow]; + if ( kmark != kcol ) + { /* First time visit krow */ + marker[krow] = kcol; + lsub[nextl++] = krow; + if ( nextl >= nzlmax ) + { + if ( (mem_error = cLUMemXpand(jcol, nextl, LSUB, &nzlmax, + Glu)) != 0) + return (mem_error); + lsub = Glu->lsub; + } + } + } + supno[i] = nsuper; + } + + /* Supernode > 1 */ + if ( jcol < kcol ) + for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl; + + xsup[nsuper+1] = kcol + 1; + supno[kcol+1] = nsuper; + xlsub[kcol+1] = nextl; + + return 0; +} Copied: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dcolumn_dfs.c (from rev 6343, trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scolumn_dfs.c) =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dcolumn_dfs.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dcolumn_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,258 @@ + +/*! @file ilu_dcolumn_dfs.c + * \brief Performs a symbolic factorization + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+*/ + +#include "slu_ddefs.h" + + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *   ILU_DCOLUMN_DFS performs a symbolic factorization on column jcol, and
+ *   decide the supernode boundary.
+ *
+ *   This routine does not use numeric values, but only use the RHS
+ *   row indices to start the dfs.
+ *
+ *   A supernode representative is the last column of a supernode.
+ *   The nonzeros in U[*,j] are segments that end at supernodal
+ *   representatives. The routine returns a list of such supernodal
+ *   representatives in topological order of the dfs that generates them.
+ *   The location of the first nonzero in each such supernodal segment
+ *   (supernodal entry location) is also returned.
+ *
+ * Local parameters
+ * ================
+ *   nseg: no of segments in current U[*,j]
+ *   jsuper: jsuper=EMPTY if column j does not belong to the same
+ *	supernode as j-1. Otherwise, jsuper=nsuper.
+ *
+ *   marker2: A-row --> A-row/col (0/1)
+ *   repfnz: SuperA-col --> PA-row
+ *   parent: SuperA-col --> SuperA-col
+ *   xplore: SuperA-col --> index to L-structure
+ *
+ * Return value
+ * ============
+ *     0  success;
+ *   > 0  number of bytes allocated when run out of space.
+ * 
+ */ +int +ilu_dcolumn_dfs( + const int m, /* in - number of rows in the matrix */ + const int jcol, /* in */ + int *perm_r, /* in */ + int *nseg, /* modified - with new segments appended */ + int *lsub_col, /* in - defines the RHS vector to start the + dfs */ + int *segrep, /* modified - with new segments appended */ + int *repfnz, /* modified */ + int *marker, /* modified */ + int *parent, /* working array */ + int *xplore, /* working array */ + GlobalLU_t *Glu /* modified */ + ) +{ + + int jcolp1, jcolm1, jsuper, nsuper, nextl; + int k, krep, krow, kmark, kperm; + int *marker2; /* Used for small panel LU */ + int fsupc; /* First column of a snode */ + int myfnz; /* First nonz column of a U-segment */ + int chperm, chmark, chrep, kchild; + int xdfs, maxdfs, kpar, oldrep; + int jptr, jm1ptr; + int ito, ifrom; /* Used to compress row subscripts */ + int mem_error; + int *xsup, *supno, *lsub, *xlsub; + int nzlmax; + static int first = 1, maxsuper; + + xsup = Glu->xsup; + supno = Glu->supno; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + nzlmax = Glu->nzlmax; + + if ( first ) { + maxsuper = sp_ienv(3); + first = 0; + } + jcolp1 = jcol + 1; + jcolm1 = jcol - 1; + nsuper = supno[jcol]; + jsuper = nsuper; + nextl = xlsub[jcol]; + marker2 = &marker[2*m]; + + + /* For each nonzero in A[*,jcol] do dfs */ + for (k = 0; lsub_col[k] != EMPTY; k++) { + + krow = lsub_col[k]; + lsub_col[k] = EMPTY; + kmark = marker2[krow]; + + /* krow was visited before, go to the next nonzero */ + if ( kmark == jcol ) continue; + + /* For each unmarked nbr krow of jcol + * krow is in L: place it in structure of L[*,jcol] + */ + marker2[krow] = jcol; + kperm = perm_r[krow]; + + if ( kperm == EMPTY ) { + lsub[nextl++] = krow; /* krow is indexed into A */ + if ( nextl >= nzlmax ) { + if ((mem_error = dLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu))) + return (mem_error); + lsub = Glu->lsub; + } + if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing */ + } else { + /* krow is in U: if its supernode-rep krep + * has been explored, update repfnz[*] + */ + krep = xsup[supno[kperm]+1] - 1; + myfnz = repfnz[krep]; + + if ( myfnz != EMPTY ) { /* Visited before */ + if ( myfnz > kperm ) repfnz[krep] = kperm; + /* continue; */ + } + else { + /* Otherwise, perform dfs starting at krep */ + oldrep = EMPTY; + parent[krep] = oldrep; + repfnz[krep] = kperm; + xdfs = xlsub[xsup[supno[krep]]]; + maxdfs = xlsub[krep + 1]; + + do { + /* + * For each unmarked kchild of krep + */ + while ( xdfs < maxdfs ) { + + kchild = lsub[xdfs]; + xdfs++; + chmark = marker2[kchild]; + + if ( chmark != jcol ) { /* Not reached yet */ + marker2[kchild] = jcol; + chperm = perm_r[kchild]; + + /* Case kchild is in L: place it in L[*,k] */ + if ( chperm == EMPTY ) { + lsub[nextl++] = kchild; + if ( nextl >= nzlmax ) { + if ( (mem_error = dLUMemXpand(jcol,nextl, + LSUB,&nzlmax,Glu)) ) + return (mem_error); + lsub = Glu->lsub; + } + if ( chmark != jcolm1 ) jsuper = EMPTY; + } else { + /* Case kchild is in U: + * chrep = its supernode-rep. If its rep has + * been explored, update its repfnz[*] + */ + chrep = xsup[supno[chperm]+1] - 1; + myfnz = repfnz[chrep]; + if ( myfnz != EMPTY ) { /* Visited before */ + if ( myfnz > chperm ) + repfnz[chrep] = chperm; + } else { + /* Continue dfs at super-rep of kchild */ + xplore[krep] = xdfs; + oldrep = krep; + krep = chrep; /* Go deeper down G(L^t) */ + parent[krep] = oldrep; + repfnz[krep] = chperm; + xdfs = xlsub[xsup[supno[krep]]]; + maxdfs = xlsub[krep + 1]; + } /* else */ + + } /* else */ + + } /* if */ + + } /* while */ + + /* krow has no more unexplored nbrs; + * place supernode-rep krep in postorder DFS. + * backtrack dfs to its parent + */ + segrep[*nseg] = krep; + ++(*nseg); + kpar = parent[krep]; /* Pop from stack, mimic recursion */ + if ( kpar == EMPTY ) break; /* dfs done */ + krep = kpar; + xdfs = xplore[krep]; + maxdfs = xlsub[krep + 1]; + + } while ( kpar != EMPTY ); /* Until empty stack */ + + } /* else */ + + } /* else */ + + } /* for each nonzero ... */ + + /* Check to see if j belongs in the same supernode as j-1 */ + if ( jcol == 0 ) { /* Do nothing for column 0 */ + nsuper = supno[0] = 0; + } else { + fsupc = xsup[nsuper]; + jptr = xlsub[jcol]; /* Not compressed yet */ + jm1ptr = xlsub[jcolm1]; + + if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY; + + /* Always start a new supernode for a singular column */ + if ( nextl == jptr ) jsuper = EMPTY; + + /* Make sure the number of columns in a supernode doesn't + exceed threshold. */ + if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY; + + /* If jcol starts a new supernode, reclaim storage space in + * lsub from the previous supernode. Note we only store + * the subscript set of the first columns of the supernode. + */ + if ( jsuper == EMPTY ) { /* starts a new supernode */ + if ( (fsupc < jcolm1) ) { /* >= 2 columns in nsuper */ +#ifdef CHK_COMPRESS + printf(" Compress lsub[] at super %d-%d\n", fsupc, jcolm1); +#endif + ito = xlsub[fsupc+1]; + xlsub[jcolm1] = ito; + xlsub[jcol] = ito; + for (ifrom = jptr; ifrom < nextl; ++ifrom, ++ito) + lsub[ito] = lsub[ifrom]; + nextl = ito; + } + nsuper++; + supno[jcol] = nsuper; + } /* if a new supernode */ + + } /* else: jcol > 0 */ + + /* Tidy up the pointers before exit */ + xsup[nsuper+1] = jcolp1; + supno[jcolp1] = nsuper; + xlsub[jcolp1] = nextl; + + return 0; +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dcopy_to_ucol.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dcopy_to_ucol.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dcopy_to_ucol.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,199 @@ + +/*! @file ilu_dcopy_to_ucol.c + * \brief Copy a computed column of U to the compressed data structure + * and drop some small entries + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + +#include "slu_ddefs.h" + +#ifdef DEBUG +int num_drop_U; +#endif + +static double *A; /* used in _compare_ only */ +static int _compare_(const void *a, const void *b) +{ + register int *x = (int *)a, *y = (int *)b; + register double xx = fabs(A[*x]), yy = fabs(A[*y]); + if (xx > yy) return -1; + else if (xx < yy) return 1; + else return 0; +} + + +int +ilu_dcopy_to_ucol( + int jcol, /* in */ + int nseg, /* in */ + int *segrep, /* in */ + int *repfnz, /* in */ + int *perm_r, /* in */ + double *dense, /* modified - reset to zero on return */ + int drop_rule,/* in */ + milu_t milu, /* in */ + double drop_tol, /* in */ + int quota, /* maximum nonzero entries allowed */ + double *sum, /* out - the sum of dropped entries */ + int *nnzUj, /* in - out */ + GlobalLU_t *Glu, /* modified */ + int *work /* working space with minimum size n, + * used by the second dropping rule */ + ) +{ +/* + * Gather from SPA dense[*] to global ucol[*]. + */ + int ksub, krep, ksupno; + int i, k, kfnz, segsze; + int fsupc, isub, irow; + int jsupno, nextu; + int new_next, mem_error; + int *xsup, *supno; + int *lsub, *xlsub; + double *ucol; + int *usub, *xusub; + int nzumax; + int m; /* number of entries in the nonzero U-segments */ + register double d_max = 0.0, d_min = 1.0 / dlamch_("Safe minimum"); + register double tmp; + double zero = 0.0; + + xsup = Glu->xsup; + supno = Glu->supno; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + ucol = Glu->ucol; + usub = Glu->usub; + xusub = Glu->xusub; + nzumax = Glu->nzumax; + + *sum = zero; + if (drop_rule == NODROP) { + drop_tol = -1.0, quota = Glu->n; + } + + jsupno = supno[jcol]; + nextu = xusub[jcol]; + k = nseg - 1; + for (ksub = 0; ksub < nseg; ksub++) { + krep = segrep[k--]; + ksupno = supno[krep]; + + if ( ksupno != jsupno ) { /* Should go into ucol[] */ + kfnz = repfnz[krep]; + if ( kfnz != EMPTY ) { /* Nonzero U-segment */ + + fsupc = xsup[ksupno]; + isub = xlsub[fsupc] + kfnz - fsupc; + segsze = krep - kfnz + 1; + + new_next = nextu + segsze; + while ( new_next > nzumax ) { + if ((mem_error = dLUMemXpand(jcol, nextu, UCOL, &nzumax, + Glu)) != 0) + return (mem_error); + ucol = Glu->ucol; + if ((mem_error = dLUMemXpand(jcol, nextu, USUB, &nzumax, + Glu)) != 0) + return (mem_error); + usub = Glu->usub; + lsub = Glu->lsub; + } + + for (i = 0; i < segsze; i++) { + irow = lsub[isub++]; + tmp = fabs(dense[irow]); + + /* first dropping rule */ + if (quota > 0 && tmp >= drop_tol) { + if (tmp > d_max) d_max = tmp; + if (tmp < d_min) d_min = tmp; + usub[nextu] = perm_r[irow]; + ucol[nextu] = dense[irow]; + nextu++; + } else { + switch (milu) { + case SMILU_1: + case SMILU_2: + *sum += dense[irow]; + break; + case SMILU_3: + /* *sum += fabs(dense[irow]);*/ + *sum += tmp; + break; + case SILU: + default: + break; + } +#ifdef DEBUG + num_drop_U++; +#endif + } + dense[irow] = zero; + } + + } + + } + + } /* for each segment... */ + + xusub[jcol + 1] = nextu; /* Close U[*,jcol] */ + m = xusub[jcol + 1] - xusub[jcol]; + + /* second dropping rule */ + if (drop_rule & DROP_SECONDARY && m > quota) { + register double tol = d_max; + register int m0 = xusub[jcol] + m - 1; + + if (quota > 0) { + if (drop_rule & DROP_INTERP) { + d_max = 1.0 / d_max; d_min = 1.0 / d_min; + tol = 1.0 / (d_max + (d_min - d_max) * quota / m); + } else { + A = &ucol[xusub[jcol]]; + for (i = 0; i < m; i++) work[i] = i; + qsort(work, m, sizeof(int), _compare_); + tol = fabs(usub[xusub[jcol] + work[quota]]); + } + } + for (i = xusub[jcol]; i <= m0; ) { + if (fabs(ucol[i]) <= tol) { + switch (milu) { + case SMILU_1: + case SMILU_2: + *sum += ucol[i]; + break; + case SMILU_3: + *sum += fabs(ucol[i]); + break; + case SILU: + default: + break; + } + ucol[i] = ucol[m0]; + usub[i] = usub[m0]; + m0--; + m--; +#ifdef DEBUG + num_drop_U++; +#endif + xusub[jcol + 1]--; + continue; + } + i++; + } + } + + if (milu == SMILU_2) *sum = fabs(*sum); + + *nnzUj += m; + + return 0; +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ddrop_row.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ddrop_row.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ddrop_row.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,307 @@ + +/*! @file ilu_ddrop_row.c + * \brief Drop small rows from L + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * <\pre>
+ */
+
+#include 
+#include 
+#include "slu_ddefs.h"
+
+extern void dswap_(int *, double [], int *, double [], int *);
+extern void daxpy_(int *, double *, double [], int *, double [], int *);
+
+static double *A;  /* used in _compare_ only */
+static int _compare_(const void *a, const void *b)
+{
+    register int *x = (int *)a, *y = (int *)b;
+    if (A[*x] - A[*y] > 0.0) return -1;
+    else if (A[*x] - A[*y] < 0.0) return 1;
+    else return 0;
+}
+
+/*! \brief
+ * 
+ * Purpose
+ * =======
+ *    ilu_ddrop_row() - Drop some small rows from the previous 
+ *    supernode (L-part only).
+ * 
+ */ +int ilu_ddrop_row( + superlu_options_t *options, /* options */ + int first, /* index of the first column in the supernode */ + int last, /* index of the last column in the supernode */ + double drop_tol, /* dropping parameter */ + int quota, /* maximum nonzero entries allowed */ + int *nnzLj, /* in/out number of nonzeros in L(:, 1:last) */ + double *fill_tol, /* in/out - on exit, fill_tol=-num_zero_pivots, + * does not change if options->ILU_MILU != SMILU1 */ + GlobalLU_t *Glu, /* modified */ + double dwork[], /* working space with minimum size last-first+1 */ + int iwork[], /* working space with minimum size m - n, + * used by the second dropping rule */ + int lastc /* if lastc == 0, there is nothing after the + * working supernode [first:last]; + * if lastc == 1, there is one more column after + * the working supernode. */ ) +{ + register int i, j, k, m1; + register int nzlc; /* number of nonzeros in column last+1 */ + register int xlusup_first, xlsub_first; + int m, n; /* m x n is the size of the supernode */ + int r = 0; /* number of dropped rows */ + register double *temp; + register double *lusup = Glu->lusup; + register int *lsub = Glu->lsub; + register int *xlsub = Glu->xlsub; + register int *xlusup = Glu->xlusup; + register double d_max = 0.0, d_min = 1.0; + int drop_rule = options->ILU_DropRule; + milu_t milu = options->ILU_MILU; + norm_t nrm = options->ILU_Norm; + double zero = 0.0; + double one = 1.0; + double none = -1.0; + int inc_diag; /* inc_diag = m + 1 */ + int nzp = 0; /* number of zero pivots */ + + xlusup_first = xlusup[first]; + xlsub_first = xlsub[first]; + m = xlusup[first + 1] - xlusup_first; + n = last - first + 1; + m1 = m - 1; + inc_diag = m + 1; + nzlc = lastc ? (xlusup[last + 2] - xlusup[last + 1]) : 0; + temp = dwork - n; + + /* Quick return if nothing to do. */ + if (m == 0 || m == n || drop_rule == NODROP) + { + *nnzLj += m * n; + return 0; + } + + /* basic dropping: ILU(tau) */ + for (i = n; i <= m1; ) + { + /* the average abs value of ith row */ + switch (nrm) + { + case ONE_NORM: + temp[i] = dasum_(&n, &lusup[xlusup_first + i], &m) / (double)n; + break; + case TWO_NORM: + temp[i] = dnrm2_(&n, &lusup[xlusup_first + i], &m) + / sqrt((double)n); + break; + case INF_NORM: + default: + k = idamax_(&n, &lusup[xlusup_first + i], &m) - 1; + temp[i] = fabs(lusup[xlusup_first + i + m * k]); + break; + } + + /* drop small entries due to drop_tol */ + if (drop_rule & DROP_BASIC && temp[i] < drop_tol) + { + r++; + /* drop the current row and move the last undropped row here */ + if (r > 1) /* add to last row */ + { + /* accumulate the sum (for MILU) */ + switch (milu) + { + case SMILU_1: + case SMILU_2: + daxpy_(&n, &one, &lusup[xlusup_first + i], &m, + &lusup[xlusup_first + m - 1], &m); + break; + case SMILU_3: + for (j = 0; j < n; j++) + lusup[xlusup_first + (m - 1) + j * m] += + fabs(lusup[xlusup_first + i + j * m]); + break; + case SILU: + default: + break; + } + dcopy_(&n, &lusup[xlusup_first + m1], &m, + &lusup[xlusup_first + i], &m); + } /* if (r > 1) */ + else /* move to last row */ + { + dswap_(&n, &lusup[xlusup_first + m1], &m, + &lusup[xlusup_first + i], &m); + if (milu == SMILU_3) + for (j = 0; j < n; j++) { + lusup[xlusup_first + m1 + j * m] = + fabs(lusup[xlusup_first + m1 + j * m]); + } + } + lsub[xlsub_first + i] = lsub[xlsub_first + m1]; + m1--; + continue; + } /* if dropping */ + else + { + if (temp[i] > d_max) d_max = temp[i]; + if (temp[i] < d_min) d_min = temp[i]; + } + i++; + } /* for */ + + /* Secondary dropping: drop more rows according to the quota. */ + quota = ceil((double)quota / (double)n); + if (drop_rule & DROP_SECONDARY && m - r > quota) + { + register double tol = d_max; + + /* Calculate the second dropping tolerance */ + if (quota > n) + { + if (drop_rule & DROP_INTERP) /* by interpolation */ + { + d_max = 1.0 / d_max; d_min = 1.0 / d_min; + tol = 1.0 / (d_max + (d_min - d_max) * quota / (m - n - r)); + } + else /* by quick sort */ + { + register int *itemp = iwork - n; + A = temp; + for (i = n; i <= m1; i++) itemp[i] = i; + qsort(iwork, m1 - n + 1, sizeof(int), _compare_); + tol = temp[iwork[quota]]; + } + } + + for (i = n; i <= m1; ) + { + if (temp[i] <= tol) + { + register int j; + r++; + /* drop the current row and move the last undropped row here */ + if (r > 1) /* add to last row */ + { + /* accumulate the sum (for MILU) */ + switch (milu) + { + case SMILU_1: + case SMILU_2: + daxpy_(&n, &one, &lusup[xlusup_first + i], &m, + &lusup[xlusup_first + m - 1], &m); + break; + case SMILU_3: + for (j = 0; j < n; j++) + lusup[xlusup_first + (m - 1) + j * m] += + fabs(lusup[xlusup_first + i + j * m]); + break; + case SILU: + default: + break; + } + dcopy_(&n, &lusup[xlusup_first + m1], &m, + &lusup[xlusup_first + i], &m); + } /* if (r > 1) */ + else /* move to last row */ + { + dswap_(&n, &lusup[xlusup_first + m1], &m, + &lusup[xlusup_first + i], &m); + if (milu == SMILU_3) + for (j = 0; j < n; j++) { + lusup[xlusup_first + m1 + j * m] = + fabs(lusup[xlusup_first + m1 + j * m]); + } + } + lsub[xlsub_first + i] = lsub[xlsub_first + m1]; + m1--; + temp[i] = temp[m1]; + + continue; + } + i++; + + } /* for */ + + } /* if secondary dropping */ + + for (i = n; i < m; i++) temp[i] = 0.0; + + if (r == 0) + { + *nnzLj += m * n; + return 0; + } + + /* add dropped entries to the diagnal */ + if (milu != SILU) + { + register int j; + double t; + for (j = 0; j < n; j++) + { + t = lusup[xlusup_first + (m - 1) + j * m] * MILU_ALPHA; + switch (milu) + { + case SMILU_1: + if (t != none) { + lusup[xlusup_first + j * inc_diag] *= (one + t); + } + else + { + lusup[xlusup_first + j * inc_diag] *= *fill_tol; +#ifdef DEBUG + printf("[1] ZERO PIVOT: FILL col %d.\n", first + j); + fflush(stdout); +#endif + nzp++; + } + break; + case SMILU_2: + lusup[xlusup_first + j * inc_diag] *= (1.0 + fabs(t)); + break; + case SMILU_3: + lusup[xlusup_first + j * inc_diag] *= (one + t); + break; + case SILU: + default: + break; + } + } + if (nzp > 0) *fill_tol = -nzp; + } + + /* Remove dropped entries from the memory and fix the pointers. */ + m1 = m - r; + for (j = 1; j < n; j++) + { + register int tmp1, tmp2; + tmp1 = xlusup_first + j * m1; + tmp2 = xlusup_first + j * m; + for (i = 0; i < m1; i++) + lusup[i + tmp1] = lusup[i + tmp2]; + } + for (i = 0; i < nzlc; i++) + lusup[xlusup_first + i + n * m1] = lusup[xlusup_first + i + n * m]; + for (i = 0; i < nzlc; i++) + lsub[xlsub[last + 1] - r + i] = lsub[xlsub[last + 1] + i]; + for (i = first + 1; i <= last + 1; i++) + { + xlusup[i] -= r * (i - first); + xlsub[i] -= r; + } + if (lastc) + { + xlusup[last + 2] -= r * n; + xlsub[last + 2] -= r; + } + + *nnzLj += (m - r) * n; + return r; +} Copied: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dpanel_dfs.c (from rev 6343, trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dpanel_dfs.c) =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dpanel_dfs.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dpanel_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,248 @@ + +/*! @file ilu_dpanel_dfs.c + * \brief Peforms a symbolic factorization on a panel of symbols and + * record the entries with maximum absolute value in each column + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + +#include "slu_ddefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ *   Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
+ *
+ *   A supernode representative is the last column of a supernode.
+ *   The nonzeros in U[*,j] are segments that end at supernodal
+ *   representatives.
+ *
+ *   The routine returns one list of the supernodal representatives
+ *   in topological order of the dfs that generates them. This list is
+ *   a superset of the topological order of each individual column within
+ *   the panel.
+ *   The location of the first nonzero in each supernodal segment
+ *   (supernodal entry location) is also returned. Each column has a
+ *   separate list for this purpose.
+ *
+ *   Two marker arrays are used for dfs:
+ *     marker[i] == jj, if i was visited during dfs of current column jj;
+ *     marker1[i] >= jcol, if i was visited by earlier columns in this panel;
+ *
+ *   marker: A-row --> A-row/col (0/1)
+ *   repfnz: SuperA-col --> PA-row
+ *   parent: SuperA-col --> SuperA-col
+ *   xplore: SuperA-col --> index to L-structure
+ * 
+ */ +void +ilu_dpanel_dfs( + const int m, /* in - number of rows in the matrix */ + const int w, /* in */ + const int jcol, /* in */ + SuperMatrix *A, /* in - original matrix */ + int *perm_r, /* in */ + int *nseg, /* out */ + double *dense, /* out */ + double *amax, /* out - max. abs. value of each column in panel */ + int *panel_lsub, /* out */ + int *segrep, /* out */ + int *repfnz, /* out */ + int *marker, /* out */ + int *parent, /* working array */ + int *xplore, /* working array */ + GlobalLU_t *Glu /* modified */ +) +{ + + NCPformat *Astore; + double *a; + int *asub; + int *xa_begin, *xa_end; + int krep, chperm, chmark, chrep, oldrep, kchild, myfnz; + int k, krow, kmark, kperm; + int xdfs, maxdfs, kpar; + int jj; /* index through each column in the panel */ + int *marker1; /* marker1[jj] >= jcol if vertex jj was visited + by a previous column within this panel. */ + int *repfnz_col; /* start of each column in the panel */ + double *dense_col; /* start of each column in the panel */ + int nextl_col; /* next available position in panel_lsub[*,jj] */ + int *xsup, *supno; + int *lsub, *xlsub; + double *amax_col; + register double tmp; + + /* Initialize pointers */ + Astore = A->Store; + a = Astore->nzval; + asub = Astore->rowind; + xa_begin = Astore->colbeg; + xa_end = Astore->colend; + marker1 = marker + m; + repfnz_col = repfnz; + dense_col = dense; + amax_col = amax; + *nseg = 0; + xsup = Glu->xsup; + supno = Glu->supno; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + + /* For each column in the panel */ + for (jj = jcol; jj < jcol + w; jj++) { + nextl_col = (jj - jcol) * m; + +#ifdef CHK_DFS + printf("\npanel col %d: ", jj); +#endif + + *amax_col = 0.0; + /* For each nonz in A[*,jj] do dfs */ + for (k = xa_begin[jj]; k < xa_end[jj]; k++) { + krow = asub[k]; + tmp = fabs(a[k]); + if (tmp > *amax_col) *amax_col = tmp; + dense_col[krow] = a[k]; + kmark = marker[krow]; + if ( kmark == jj ) + continue; /* krow visited before, go to the next nonzero */ + + /* For each unmarked nbr krow of jj + * krow is in L: place it in structure of L[*,jj] + */ + marker[krow] = jj; + kperm = perm_r[krow]; + + if ( kperm == EMPTY ) { + panel_lsub[nextl_col++] = krow; /* krow is indexed into A */ + } + /* + * krow is in U: if its supernode-rep krep + * has been explored, update repfnz[*] + */ + else { + + krep = xsup[supno[kperm]+1] - 1; + myfnz = repfnz_col[krep]; + +#ifdef CHK_DFS + printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, kperm); +#endif + if ( myfnz != EMPTY ) { /* Representative visited before */ + if ( myfnz > kperm ) repfnz_col[krep] = kperm; + /* continue; */ + } + else { + /* Otherwise, perform dfs starting at krep */ + oldrep = EMPTY; + parent[krep] = oldrep; + repfnz_col[krep] = kperm; + xdfs = xlsub[xsup[supno[krep]]]; + maxdfs = xlsub[krep + 1]; + +#ifdef CHK_DFS + printf(" xdfs %d, maxdfs %d: ", xdfs, maxdfs); + for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]); + printf("\n"); +#endif + do { + /* + * For each unmarked kchild of krep + */ + while ( xdfs < maxdfs ) { + + kchild = lsub[xdfs]; + xdfs++; + chmark = marker[kchild]; + + if ( chmark != jj ) { /* Not reached yet */ + marker[kchild] = jj; + chperm = perm_r[kchild]; + + /* Case kchild is in L: place it in L[*,j] */ + if ( chperm == EMPTY ) { + panel_lsub[nextl_col++] = kchild; + } + /* Case kchild is in U: + * chrep = its supernode-rep. If its rep has + * been explored, update its repfnz[*] + */ + else { + + chrep = xsup[supno[chperm]+1] - 1; + myfnz = repfnz_col[chrep]; +#ifdef CHK_DFS + printf("chrep %d,myfnz %d,perm_r[%d] %d\n",chrep,myfnz,kchild,chperm); +#endif + if ( myfnz != EMPTY ) { /* Visited before */ + if ( myfnz > chperm ) + repfnz_col[chrep] = chperm; + } + else { + /* Cont. dfs at snode-rep of kchild */ + xplore[krep] = xdfs; + oldrep = krep; + krep = chrep; /* Go deeper down G(L) */ + parent[krep] = oldrep; + repfnz_col[krep] = chperm; + xdfs = xlsub[xsup[supno[krep]]]; + maxdfs = xlsub[krep + 1]; +#ifdef CHK_DFS + printf(" xdfs %d, maxdfs %d: ", xdfs, maxdfs); + for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]); + printf("\n"); +#endif + } /* else */ + + } /* else */ + + } /* if... */ + + } /* while xdfs < maxdfs */ + + /* krow has no more unexplored nbrs: + * Place snode-rep krep in postorder DFS, if this + * segment is seen for the first time. (Note that + * "repfnz[krep]" may change later.) + * Backtrack dfs to its parent. + */ + if ( marker1[krep] < jcol ) { + segrep[*nseg] = krep; + ++(*nseg); + marker1[krep] = jj; + } + + kpar = parent[krep]; /* Pop stack, mimic recursion */ + if ( kpar == EMPTY ) break; /* dfs done */ + krep = kpar; + xdfs = xplore[krep]; + maxdfs = xlsub[krep + 1]; + +#ifdef CHK_DFS + printf(" pop stack: krep %d,xdfs %d,maxdfs %d: ", krep,xdfs,maxdfs); + for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]); + printf("\n"); +#endif + } while ( kpar != EMPTY ); /* do-while - until empty stack */ + + } /* else */ + + } /* else */ + + } /* for each nonz in A[*,jj] */ + + repfnz_col += m; /* Move to next column */ + dense_col += m; + amax_col++; + + } /* for jj ... */ + +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dpivotL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dpivotL.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dpivotL.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,266 @@ + +/*! @file ilu_dpivotL.c + * \brief Performs numerical pivoting + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + + +#include +#include +#include "slu_ddefs.h" + +#ifndef SGN +#define SGN(x) ((x)>=0?1:-1) +#endif + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *   Performs the numerical pivoting on the current column of L,
+ *   and the CDIV operation.
+ *
+ *   Pivot policy:
+ *   (1) Compute thresh = u * max_(i>=j) abs(A_ij);
+ *   (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
+ *	     pivot row = k;
+ *	 ELSE IF abs(A_jj) >= thresh THEN
+ *	     pivot row = j;
+ *	 ELSE
+ *	     pivot row = m;
+ *
+ *   Note: If you absolutely want to use a given pivot order, then set u=0.0.
+ *
+ *   Return value: 0	  success;
+ *		   i > 0  U(i,i) is exactly zero.
+ * 
+ */ + +int +ilu_dpivotL( + const int jcol, /* in */ + const double u, /* in - diagonal pivoting threshold */ + int *usepr, /* re-use the pivot sequence given by + * perm_r/iperm_r */ + int *perm_r, /* may be modified */ + int diagind, /* diagonal of Pc*A*Pc' */ + int *swap, /* in/out record the row permutation */ + int *iswap, /* in/out inverse of swap, it is the same as + perm_r after the factorization */ + int *marker, /* in */ + int *pivrow, /* in/out, as an input if *usepr!=0 */ + double fill_tol, /* in - fill tolerance of current column + * used for a singular column */ + milu_t milu, /* in */ + double drop_sum, /* in - computed in ilu_dcopy_to_ucol() + (MILU only) */ + GlobalLU_t *Glu, /* modified - global LU data structures */ + SuperLUStat_t *stat /* output */ + ) +{ + + int n; /* number of columns */ + int fsupc; /* first column in the supernode */ + int nsupc; /* no of columns in the supernode */ + int nsupr; /* no of rows in the supernode */ + int lptr; /* points to the starting subscript of the supernode */ + register int pivptr; + int old_pivptr, diag, ptr0; + register double pivmax, rtemp; + double thresh; + double temp; + double *lu_sup_ptr; + double *lu_col_ptr; + int *lsub_ptr; + register int isub, icol, k, itemp; + int *lsub, *xlsub; + double *lusup; + int *xlusup; + flops_t *ops = stat->ops; + int info; + + /* Initialize pointers */ + n = Glu->n; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + lusup = Glu->lusup; + xlusup = Glu->xlusup; + fsupc = (Glu->xsup)[(Glu->supno)[jcol]]; + nsupc = jcol - fsupc; /* excluding jcol; nsupc >= 0 */ + lptr = xlsub[fsupc]; + nsupr = xlsub[fsupc+1] - lptr; + lu_sup_ptr = &lusup[xlusup[fsupc]]; /* start of the current supernode */ + lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */ + lsub_ptr = &lsub[lptr]; /* start of row indices of the supernode */ + + /* Determine the largest abs numerical value for partial pivoting; + Also search for user-specified pivot, and diagonal element. */ + pivmax = -1.0; + pivptr = nsupc; + diag = EMPTY; + old_pivptr = nsupc; + ptr0 = EMPTY; + for (isub = nsupc; isub < nsupr; ++isub) { + if (marker[lsub_ptr[isub]] > jcol) + continue; /* do not overlap with a later relaxed supernode */ + + switch (milu) { + case SMILU_1: + rtemp = fabs(lu_col_ptr[isub] + drop_sum); + break; + case SMILU_2: + case SMILU_3: + /* In this case, drop_sum contains the sum of the abs. value */ + rtemp = fabs(lu_col_ptr[isub]); + break; + case SILU: + default: + rtemp = fabs(lu_col_ptr[isub]); + break; + } + if (rtemp > pivmax) { pivmax = rtemp; pivptr = isub; } + if (*usepr && lsub_ptr[isub] == *pivrow) old_pivptr = isub; + if (lsub_ptr[isub] == diagind) diag = isub; + if (ptr0 == EMPTY) ptr0 = isub; + } + + if (milu == SMILU_2 || milu == SMILU_3) pivmax += drop_sum; + + /* Test for singularity */ + if (pivmax < 0.0) { + fprintf(stderr, "[0]: jcol=%d, SINGULAR!!!\n", jcol); + fflush(stderr); + exit(1); + } + if ( pivmax == 0.0 ) { + if (diag != EMPTY) + *pivrow = lsub_ptr[pivptr = diag]; + else if (ptr0 != EMPTY) + *pivrow = lsub_ptr[pivptr = ptr0]; + else { + /* look for the first row which does not + belong to any later supernodes */ + for (icol = jcol; icol < n; icol++) + if (marker[swap[icol]] <= jcol) break; + if (icol >= n) { + fprintf(stderr, "[1]: jcol=%d, SINGULAR!!!\n", jcol); + fflush(stderr); + exit(1); + } + + *pivrow = swap[icol]; + + /* pick up the pivot row */ + for (isub = nsupc; isub < nsupr; ++isub) + if ( lsub_ptr[isub] == *pivrow ) { pivptr = isub; break; } + } + pivmax = fill_tol; + lu_col_ptr[pivptr] = pivmax; + *usepr = 0; +#ifdef DEBUG + printf("[0] ZERO PIVOT: FILL (%d, %d).\n", *pivrow, jcol); + fflush(stdout); +#endif + info =jcol + 1; + } /* if (*pivrow == 0.0) */ + else { + thresh = u * pivmax; + + /* Choose appropriate pivotal element by our policy. */ + if ( *usepr ) { + switch (milu) { + case SMILU_1: + rtemp = fabs(lu_col_ptr[old_pivptr] + drop_sum); + break; + case SMILU_2: + case SMILU_3: + rtemp = fabs(lu_col_ptr[old_pivptr]) + drop_sum; + break; + case SILU: + default: + rtemp = fabs(lu_col_ptr[old_pivptr]); + break; + } + if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = old_pivptr; + else *usepr = 0; + } + if ( *usepr == 0 ) { + /* Use diagonal pivot? */ + if ( diag >= 0 ) { /* diagonal exists */ + switch (milu) { + case SMILU_1: + rtemp = fabs(lu_col_ptr[diag] + drop_sum); + break; + case SMILU_2: + case SMILU_3: + rtemp = fabs(lu_col_ptr[diag]) + drop_sum; + break; + case SILU: + default: + rtemp = fabs(lu_col_ptr[diag]); + break; + } + if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag; + } + *pivrow = lsub_ptr[pivptr]; + } + info = 0; + + /* Reset the diagonal */ + switch (milu) { + case SMILU_1: + lu_col_ptr[pivptr] += drop_sum; + break; + case SMILU_2: + case SMILU_3: + lu_col_ptr[pivptr] += SGN(lu_col_ptr[pivptr]) * drop_sum; + break; + case SILU: + default: + break; + } + + } /* else */ + + /* Record pivot row */ + perm_r[*pivrow] = jcol; + if (jcol < n - 1) { + register int t1, t2, t; + t1 = iswap[*pivrow]; t2 = jcol; + if (t1 != t2) { + t = swap[t1]; swap[t1] = swap[t2]; swap[t2] = t; + t1 = swap[t1]; t2 = t; + t = iswap[t1]; iswap[t1] = iswap[t2]; iswap[t2] = t; + } + } /* if (jcol < n - 1) */ + + /* Interchange row subscripts */ + if ( pivptr != nsupc ) { + itemp = lsub_ptr[pivptr]; + lsub_ptr[pivptr] = lsub_ptr[nsupc]; + lsub_ptr[nsupc] = itemp; + + /* Interchange numerical values as well, for the whole snode, such + * that L is indexed the same way as A. + */ + for (icol = 0; icol <= nsupc; icol++) { + itemp = pivptr + icol * nsupr; + temp = lu_sup_ptr[itemp]; + lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr]; + lu_sup_ptr[nsupc + icol*nsupr] = temp; + } + } /* if */ + + /* cdiv operation */ + ops[FACT] += nsupr - nsupc; + temp = 1.0 / lu_col_ptr[nsupc]; + for (k = nsupc+1; k < nsupr; k++) lu_col_ptr[k] *= temp; + + return info; +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dsnode_dfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dsnode_dfs.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dsnode_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,90 @@ + +/*! @file ilu_dsnode_dfs.c + * \brief Determines the union of row structures of columns within the relaxed node + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + +#include "slu_ddefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *    ilu_dsnode_dfs() - Determine the union of the row structures of those
+ *    columns within the relaxed snode.
+ *    Note: The relaxed snodes are leaves of the supernodal etree, therefore,
+ *    the portion outside the rectangular supernode must be zero.
+ *
+ * Return value
+ * ============
+ *     0   success;
+ *    >0   number of bytes allocated when run out of memory.
+ * 
+ */ + +int +ilu_dsnode_dfs( + const int jcol, /* in - start of the supernode */ + const int kcol, /* in - end of the supernode */ + const int *asub, /* in */ + const int *xa_begin, /* in */ + const int *xa_end, /* in */ + int *marker, /* modified */ + GlobalLU_t *Glu /* modified */ + ) +{ + + register int i, k, nextl; + int nsuper, krow, kmark, mem_error; + int *xsup, *supno; + int *lsub, *xlsub; + int nzlmax; + + xsup = Glu->xsup; + supno = Glu->supno; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + nzlmax = Glu->nzlmax; + + nsuper = ++supno[jcol]; /* Next available supernode number */ + nextl = xlsub[jcol]; + + for (i = jcol; i <= kcol; i++) + { + /* For each nonzero in A[*,i] */ + for (k = xa_begin[i]; k < xa_end[i]; k++) + { + krow = asub[k]; + kmark = marker[krow]; + if ( kmark != kcol ) + { /* First time visit krow */ + marker[krow] = kcol; + lsub[nextl++] = krow; + if ( nextl >= nzlmax ) + { + if ( (mem_error = dLUMemXpand(jcol, nextl, LSUB, &nzlmax, + Glu)) != 0) + return (mem_error); + lsub = Glu->lsub; + } + } + } + supno[i] = nsuper; + } + + /* Supernode > 1 */ + if ( jcol < kcol ) + for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl; + + xsup[nsuper+1] = kcol + 1; + supno[kcol+1] = nsuper; + xlsub[kcol+1] = nextl; + + return 0; +} Copied: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_heap_relax_snode.c (from rev 6343, trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/heap_relax_snode.c) =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_heap_relax_snode.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_heap_relax_snode.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,120 @@ +/*! @file ilu_heap_relax_snode.c + * \brief Identify the initial relaxed supernodes + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 1, 2009
+ * 
+ */ + +#include "slu_ddefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *    ilu_heap_relax_snode() - Identify the initial relaxed supernodes,
+ *    assuming that the matrix has been reordered according to the postorder
+ *    of the etree.
+ * 
+ */ + +void +ilu_heap_relax_snode ( + const int n, + int *et, /* column elimination tree */ + const int relax_columns, /* max no of columns allowed in a + relaxed snode */ + int *descendants, /* no of descendants of each node + in the etree */ + int *relax_end, /* last column in a supernode + * if j-th column starts a relaxed + * supernode, relax_end[j] represents + * the last column of this supernode */ + int *relax_fsupc /* first column in a supernode + * relax_fsupc[j] represents the first + * column of j-th supernode */ + ) +{ + register int i, j, k, l, f, parent; + register int snode_start; /* beginning of a snode */ + int *et_save, *post, *inv_post, *iwork; + int nsuper_et = 0, nsuper_et_post = 0; + + /* The etree may not be postordered, but is heap ordered. */ + + iwork = (int*) intMalloc(3*n+2); + if ( !iwork ) ABORT("SUPERLU_MALLOC fails for iwork[]"); + inv_post = iwork + n+1; + et_save = inv_post + n+1; + + /* Post order etree */ + post = (int *) TreePostorder(n, et); + for (i = 0; i < n+1; ++i) inv_post[post[i]] = i; + + /* Renumber etree in postorder */ + for (i = 0; i < n; ++i) { + iwork[post[i]] = post[et[i]]; + et_save[i] = et[i]; /* Save the original etree */ + } + for (i = 0; i < n; ++i) et[i] = iwork[i]; + + /* Compute the number of descendants of each node in the etree */ + ifill (relax_end, n, EMPTY); + ifill (relax_fsupc, n, EMPTY); + for (j = 0; j < n; j++) descendants[j] = 0; + for (j = 0; j < n; j++) { + parent = et[j]; + if ( parent != n ) /* not the dummy root */ + descendants[parent] += descendants[j] + 1; + } + + /* Identify the relaxed supernodes by postorder traversal of the etree. */ + for ( f = j = 0; j < n; ) { + parent = et[j]; + snode_start = j; + while ( parent != n && descendants[parent] < relax_columns ) { + j = parent; + parent = et[j]; + } + /* Found a supernode in postordered etree; j is the last column. */ + ++nsuper_et_post; + k = n; + for (i = snode_start; i <= j; ++i) + k = SUPERLU_MIN(k, inv_post[i]); + l = inv_post[j]; + if ( (l - k) == (j - snode_start) ) { + /* It's also a supernode in the original etree */ + relax_end[k] = l; /* Last column is recorded */ + relax_fsupc[f++] = k; + ++nsuper_et; + } else { + for (i = snode_start; i <= j; ++i) { + l = inv_post[i]; + if ( descendants[i] == 0 ) { + relax_end[l] = l; + relax_fsupc[f++] = l; + ++nsuper_et; + } + } + } + j++; + /* Search for a new leaf */ + while ( descendants[j] != 0 && j < n ) j++; + } + +#if ( PRNTlevel>=1 ) + printf(".. heap_snode_relax:\n" + "\tNo of relaxed snodes in postordered etree:\t%d\n" + "\tNo of relaxed snodes in original etree:\t%d\n", + nsuper_et_post, nsuper_et); +#endif + + /* Recover the original etree */ + for (i = 0; i < n; ++i) et[i] = et_save[i]; + + SUPERLU_FREE(post); + SUPERLU_FREE(iwork); +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_relax_snode.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_relax_snode.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_relax_snode.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,69 @@ +/*! @file ilu_relax_snode.c + * \brief Identify initial relaxed supernodes + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 1, 2009
+ * 
+ */ + +#include "slu_ddefs.h" +/*! \brief + * + *
+ * Purpose
+ * =======
+ *    ilu_relax_snode() - Identify the initial relaxed supernodes, assuming
+ *    that the matrix has been reordered according to the postorder of the
+ *    etree.
+ * 
+ */ +void +ilu_relax_snode ( + const int n, + int *et, /* column elimination tree */ + const int relax_columns, /* max no of columns allowed in a + relaxed snode */ + int *descendants, /* no of descendants of each node + in the etree */ + int *relax_end, /* last column in a supernode + * if j-th column starts a relaxed + * supernode, relax_end[j] represents + * the last column of this supernode */ + int *relax_fsupc /* first column in a supernode + * relax_fsupc[j] represents the first + * column of j-th supernode */ + ) +{ + + register int j, f, parent; + register int snode_start; /* beginning of a snode */ + + ifill (relax_end, n, EMPTY); + ifill (relax_fsupc, n, EMPTY); + for (j = 0; j < n; j++) descendants[j] = 0; + + /* Compute the number of descendants of each node in the etree */ + for (j = 0; j < n; j++) { + parent = et[j]; + if ( parent != n ) /* not the dummy root */ + descendants[parent] += descendants[j] + 1; + } + + /* Identify the relaxed supernodes by postorder traversal of the etree. */ + for (j = f = 0; j < n; ) { + parent = et[j]; + snode_start = j; + while ( parent != n && descendants[parent] < relax_columns ) { + j = parent; + parent = et[j]; + } + /* Found a supernode with j being the last column. */ + relax_end[snode_start] = j; /* Last column is recorded */ + j++; + relax_fsupc[f++] = snode_start; + /* Search for a new leaf */ + while ( descendants[j] != 0 && j < n ) j++; + } +} Copied: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_scolumn_dfs.c (from rev 6343, trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scolumn_dfs.c) =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_scolumn_dfs.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_scolumn_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,258 @@ + +/*! @file ilu_scolumn_dfs.c + * \brief Performs a symbolic factorization + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+*/ + +#include "slu_sdefs.h" + + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *   ILU_SCOLUMN_DFS performs a symbolic factorization on column jcol, and
+ *   decide the supernode boundary.
+ *
+ *   This routine does not use numeric values, but only use the RHS
+ *   row indices to start the dfs.
+ *
+ *   A supernode representative is the last column of a supernode.
+ *   The nonzeros in U[*,j] are segments that end at supernodal
+ *   representatives. The routine returns a list of such supernodal
+ *   representatives in topological order of the dfs that generates them.
+ *   The location of the first nonzero in each such supernodal segment
+ *   (supernodal entry location) is also returned.
+ *
+ * Local parameters
+ * ================
+ *   nseg: no of segments in current U[*,j]
+ *   jsuper: jsuper=EMPTY if column j does not belong to the same
+ *	supernode as j-1. Otherwise, jsuper=nsuper.
+ *
+ *   marker2: A-row --> A-row/col (0/1)
+ *   repfnz: SuperA-col --> PA-row
+ *   parent: SuperA-col --> SuperA-col
+ *   xplore: SuperA-col --> index to L-structure
+ *
+ * Return value
+ * ============
+ *     0  success;
+ *   > 0  number of bytes allocated when run out of space.
+ * 
+ */ +int +ilu_scolumn_dfs( + const int m, /* in - number of rows in the matrix */ + const int jcol, /* in */ + int *perm_r, /* in */ + int *nseg, /* modified - with new segments appended */ + int *lsub_col, /* in - defines the RHS vector to start the + dfs */ + int *segrep, /* modified - with new segments appended */ + int *repfnz, /* modified */ + int *marker, /* modified */ + int *parent, /* working array */ + int *xplore, /* working array */ + GlobalLU_t *Glu /* modified */ + ) +{ + + int jcolp1, jcolm1, jsuper, nsuper, nextl; + int k, krep, krow, kmark, kperm; + int *marker2; /* Used for small panel LU */ + int fsupc; /* First column of a snode */ + int myfnz; /* First nonz column of a U-segment */ + int chperm, chmark, chrep, kchild; + int xdfs, maxdfs, kpar, oldrep; + int jptr, jm1ptr; + int ito, ifrom; /* Used to compress row subscripts */ + int mem_error; + int *xsup, *supno, *lsub, *xlsub; + int nzlmax; + static int first = 1, maxsuper; + + xsup = Glu->xsup; + supno = Glu->supno; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + nzlmax = Glu->nzlmax; + + if ( first ) { + maxsuper = sp_ienv(3); + first = 0; + } + jcolp1 = jcol + 1; + jcolm1 = jcol - 1; + nsuper = supno[jcol]; + jsuper = nsuper; + nextl = xlsub[jcol]; + marker2 = &marker[2*m]; + + + /* For each nonzero in A[*,jcol] do dfs */ + for (k = 0; lsub_col[k] != EMPTY; k++) { + + krow = lsub_col[k]; + lsub_col[k] = EMPTY; + kmark = marker2[krow]; + + /* krow was visited before, go to the next nonzero */ + if ( kmark == jcol ) continue; + + /* For each unmarked nbr krow of jcol + * krow is in L: place it in structure of L[*,jcol] + */ + marker2[krow] = jcol; + kperm = perm_r[krow]; + + if ( kperm == EMPTY ) { + lsub[nextl++] = krow; /* krow is indexed into A */ + if ( nextl >= nzlmax ) { + if ((mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu))) + return (mem_error); + lsub = Glu->lsub; + } + if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing */ + } else { + /* krow is in U: if its supernode-rep krep + * has been explored, update repfnz[*] + */ + krep = xsup[supno[kperm]+1] - 1; + myfnz = repfnz[krep]; + + if ( myfnz != EMPTY ) { /* Visited before */ + if ( myfnz > kperm ) repfnz[krep] = kperm; + /* continue; */ + } + else { + /* Otherwise, perform dfs starting at krep */ + oldrep = EMPTY; + parent[krep] = oldrep; + repfnz[krep] = kperm; + xdfs = xlsub[xsup[supno[krep]]]; + maxdfs = xlsub[krep + 1]; + + do { + /* + * For each unmarked kchild of krep + */ + while ( xdfs < maxdfs ) { + + kchild = lsub[xdfs]; + xdfs++; + chmark = marker2[kchild]; + + if ( chmark != jcol ) { /* Not reached yet */ + marker2[kchild] = jcol; + chperm = perm_r[kchild]; + + /* Case kchild is in L: place it in L[*,k] */ + if ( chperm == EMPTY ) { + lsub[nextl++] = kchild; + if ( nextl >= nzlmax ) { + if ( (mem_error = sLUMemXpand(jcol,nextl, + LSUB,&nzlmax,Glu)) ) + return (mem_error); + lsub = Glu->lsub; + } + if ( chmark != jcolm1 ) jsuper = EMPTY; + } else { + /* Case kchild is in U: + * chrep = its supernode-rep. If its rep has + * been explored, update its repfnz[*] + */ + chrep = xsup[supno[chperm]+1] - 1; + myfnz = repfnz[chrep]; + if ( myfnz != EMPTY ) { /* Visited before */ + if ( myfnz > chperm ) + repfnz[chrep] = chperm; + } else { + /* Continue dfs at super-rep of kchild */ + xplore[krep] = xdfs; + oldrep = krep; + krep = chrep; /* Go deeper down G(L^t) */ + parent[krep] = oldrep; + repfnz[krep] = chperm; + xdfs = xlsub[xsup[supno[krep]]]; + maxdfs = xlsub[krep + 1]; + } /* else */ + + } /* else */ + + } /* if */ + + } /* while */ + + /* krow has no more unexplored nbrs; + * place supernode-rep krep in postorder DFS. + * backtrack dfs to its parent + */ + segrep[*nseg] = krep; + ++(*nseg); + kpar = parent[krep]; /* Pop from stack, mimic recursion */ + if ( kpar == EMPTY ) break; /* dfs done */ + krep = kpar; + xdfs = xplore[krep]; + maxdfs = xlsub[krep + 1]; + + } while ( kpar != EMPTY ); /* Until empty stack */ + + } /* else */ + + } /* else */ + + } /* for each nonzero ... */ + + /* Check to see if j belongs in the same supernode as j-1 */ + if ( jcol == 0 ) { /* Do nothing for column 0 */ + nsuper = supno[0] = 0; + } else { + fsupc = xsup[nsuper]; + jptr = xlsub[jcol]; /* Not compressed yet */ + jm1ptr = xlsub[jcolm1]; + + if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY; + + /* Always start a new supernode for a singular column */ + if ( nextl == jptr ) jsuper = EMPTY; + + /* Make sure the number of columns in a supernode doesn't + exceed threshold. */ + if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY; + + /* If jcol starts a new supernode, reclaim storage space in + * lsub from the previous supernode. Note we only store + * the subscript set of the first columns of the supernode. + */ + if ( jsuper == EMPTY ) { /* starts a new supernode */ + if ( (fsupc < jcolm1) ) { /* >= 2 columns in nsuper */ +#ifdef CHK_COMPRESS + printf(" Compress lsub[] at super %d-%d\n", fsupc, jcolm1); +#endif + ito = xlsub[fsupc+1]; + xlsub[jcolm1] = ito; + xlsub[jcol] = ito; + for (ifrom = jptr; ifrom < nextl; ++ifrom, ++ito) + lsub[ito] = lsub[ifrom]; + nextl = ito; + } + nsuper++; + supno[jcol] = nsuper; + } /* if a new supernode */ + + } /* else: jcol > 0 */ + + /* Tidy up the pointers before exit */ + xsup[nsuper+1] = jcolp1; + supno[jcolp1] = nsuper; + xlsub[jcolp1] = nextl; + + return 0; +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_scopy_to_ucol.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_scopy_to_ucol.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_scopy_to_ucol.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,199 @@ + +/*! @file ilu_scopy_to_ucol.c + * \brief Copy a computed column of U to the compressed data structure + * and drop some small entries + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + +#include "slu_sdefs.h" + +#ifdef DEBUG +int num_drop_U; +#endif + +static float *A; /* used in _compare_ only */ +static int _compare_(const void *a, const void *b) +{ + register int *x = (int *)a, *y = (int *)b; + register double xx = fabs(A[*x]), yy = fabs(A[*y]); + if (xx > yy) return -1; + else if (xx < yy) return 1; + else return 0; +} + + +int +ilu_scopy_to_ucol( + int jcol, /* in */ + int nseg, /* in */ + int *segrep, /* in */ + int *repfnz, /* in */ + int *perm_r, /* in */ + float *dense, /* modified - reset to zero on return */ + int drop_rule,/* in */ + milu_t milu, /* in */ + double drop_tol, /* in */ + int quota, /* maximum nonzero entries allowed */ + float *sum, /* out - the sum of dropped entries */ + int *nnzUj, /* in - out */ + GlobalLU_t *Glu, /* modified */ + int *work /* working space with minimum size n, + * used by the second dropping rule */ + ) +{ +/* + * Gather from SPA dense[*] to global ucol[*]. + */ + int ksub, krep, ksupno; + int i, k, kfnz, segsze; + int fsupc, isub, irow; + int jsupno, nextu; + int new_next, mem_error; + int *xsup, *supno; + int *lsub, *xlsub; + float *ucol; + int *usub, *xusub; + int nzumax; + int m; /* number of entries in the nonzero U-segments */ + register float d_max = 0.0, d_min = 1.0 / dlamch_("Safe minimum"); + register double tmp; + float zero = 0.0; + + xsup = Glu->xsup; + supno = Glu->supno; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + ucol = Glu->ucol; + usub = Glu->usub; + xusub = Glu->xusub; + nzumax = Glu->nzumax; + + *sum = zero; + if (drop_rule == NODROP) { + drop_tol = -1.0, quota = Glu->n; + } + + jsupno = supno[jcol]; + nextu = xusub[jcol]; + k = nseg - 1; + for (ksub = 0; ksub < nseg; ksub++) { + krep = segrep[k--]; + ksupno = supno[krep]; + + if ( ksupno != jsupno ) { /* Should go into ucol[] */ + kfnz = repfnz[krep]; + if ( kfnz != EMPTY ) { /* Nonzero U-segment */ + + fsupc = xsup[ksupno]; + isub = xlsub[fsupc] + kfnz - fsupc; + segsze = krep - kfnz + 1; + + new_next = nextu + segsze; + while ( new_next > nzumax ) { + if ((mem_error = sLUMemXpand(jcol, nextu, UCOL, &nzumax, + Glu)) != 0) + return (mem_error); + ucol = Glu->ucol; + if ((mem_error = sLUMemXpand(jcol, nextu, USUB, &nzumax, + Glu)) != 0) + return (mem_error); + usub = Glu->usub; + lsub = Glu->lsub; + } + + for (i = 0; i < segsze; i++) { + irow = lsub[isub++]; + tmp = fabs(dense[irow]); + + /* first dropping rule */ + if (quota > 0 && tmp >= drop_tol) { + if (tmp > d_max) d_max = tmp; + if (tmp < d_min) d_min = tmp; + usub[nextu] = perm_r[irow]; + ucol[nextu] = dense[irow]; + nextu++; + } else { + switch (milu) { + case SMILU_1: + case SMILU_2: + *sum += dense[irow]; + break; + case SMILU_3: + /* *sum += fabs(dense[irow]);*/ + *sum += tmp; + break; + case SILU: + default: + break; + } +#ifdef DEBUG + num_drop_U++; +#endif + } + dense[irow] = zero; + } + + } + + } + + } /* for each segment... */ + + xusub[jcol + 1] = nextu; /* Close U[*,jcol] */ + m = xusub[jcol + 1] - xusub[jcol]; + + /* second dropping rule */ + if (drop_rule & DROP_SECONDARY && m > quota) { + register double tol = d_max; + register int m0 = xusub[jcol] + m - 1; + + if (quota > 0) { + if (drop_rule & DROP_INTERP) { + d_max = 1.0 / d_max; d_min = 1.0 / d_min; + tol = 1.0 / (d_max + (d_min - d_max) * quota / m); + } else { + A = &ucol[xusub[jcol]]; + for (i = 0; i < m; i++) work[i] = i; + qsort(work, m, sizeof(int), _compare_); + tol = fabs(usub[xusub[jcol] + work[quota]]); + } + } + for (i = xusub[jcol]; i <= m0; ) { + if (fabs(ucol[i]) <= tol) { + switch (milu) { + case SMILU_1: + case SMILU_2: + *sum += ucol[i]; + break; + case SMILU_3: + *sum += fabs(ucol[i]); + break; + case SILU: + default: + break; + } + ucol[i] = ucol[m0]; + usub[i] = usub[m0]; + m0--; + m--; +#ifdef DEBUG + num_drop_U++; +#endif + xusub[jcol + 1]--; + continue; + } + i++; + } + } + + if (milu == SMILU_2) *sum = fabs(*sum); + + *nnzUj += m; + + return 0; +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_sdrop_row.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_sdrop_row.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_sdrop_row.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,307 @@ + +/*! @file ilu_sdrop_row.c + * \brief Drop small rows from L + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * <\pre>
+ */
+
+#include 
+#include 
+#include "slu_sdefs.h"
+
+extern void sswap_(int *, float [], int *, float [], int *);
+extern void saxpy_(int *, float *, float [], int *, float [], int *);
+
+static float *A;  /* used in _compare_ only */
+static int _compare_(const void *a, const void *b)
+{
+    register int *x = (int *)a, *y = (int *)b;
+    if (A[*x] - A[*y] > 0.0) return -1;
+    else if (A[*x] - A[*y] < 0.0) return 1;
+    else return 0;
+}
+
+/*! \brief
+ * 
+ * Purpose
+ * =======
+ *    ilu_sdrop_row() - Drop some small rows from the previous 
+ *    supernode (L-part only).
+ * 
+ */ +int ilu_sdrop_row( + superlu_options_t *options, /* options */ + int first, /* index of the first column in the supernode */ + int last, /* index of the last column in the supernode */ + double drop_tol, /* dropping parameter */ + int quota, /* maximum nonzero entries allowed */ + int *nnzLj, /* in/out number of nonzeros in L(:, 1:last) */ + double *fill_tol, /* in/out - on exit, fill_tol=-num_zero_pivots, + * does not change if options->ILU_MILU != SMILU1 */ + GlobalLU_t *Glu, /* modified */ + float swork[], /* working space with minimum size last-first+1 */ + int iwork[], /* working space with minimum size m - n, + * used by the second dropping rule */ + int lastc /* if lastc == 0, there is nothing after the + * working supernode [first:last]; + * if lastc == 1, there is one more column after + * the working supernode. */ ) +{ + register int i, j, k, m1; + register int nzlc; /* number of nonzeros in column last+1 */ + register int xlusup_first, xlsub_first; + int m, n; /* m x n is the size of the supernode */ + int r = 0; /* number of dropped rows */ + register float *temp; + register float *lusup = Glu->lusup; + register int *lsub = Glu->lsub; + register int *xlsub = Glu->xlsub; + register int *xlusup = Glu->xlusup; + register float d_max = 0.0, d_min = 1.0; + int drop_rule = options->ILU_DropRule; + milu_t milu = options->ILU_MILU; + norm_t nrm = options->ILU_Norm; + float zero = 0.0; + float one = 1.0; + float none = -1.0; + int inc_diag; /* inc_diag = m + 1 */ + int nzp = 0; /* number of zero pivots */ + + xlusup_first = xlusup[first]; + xlsub_first = xlsub[first]; + m = xlusup[first + 1] - xlusup_first; + n = last - first + 1; + m1 = m - 1; + inc_diag = m + 1; + nzlc = lastc ? (xlusup[last + 2] - xlusup[last + 1]) : 0; + temp = swork - n; + + /* Quick return if nothing to do. */ + if (m == 0 || m == n || drop_rule == NODROP) + { + *nnzLj += m * n; + return 0; + } + + /* basic dropping: ILU(tau) */ + for (i = n; i <= m1; ) + { + /* the average abs value of ith row */ + switch (nrm) + { + case ONE_NORM: + temp[i] = sasum_(&n, &lusup[xlusup_first + i], &m) / (double)n; + break; + case TWO_NORM: + temp[i] = snrm2_(&n, &lusup[xlusup_first + i], &m) + / sqrt((double)n); + break; + case INF_NORM: + default: + k = isamax_(&n, &lusup[xlusup_first + i], &m) - 1; + temp[i] = fabs(lusup[xlusup_first + i + m * k]); + break; + } + + /* drop small entries due to drop_tol */ + if (drop_rule & DROP_BASIC && temp[i] < drop_tol) + { + r++; + /* drop the current row and move the last undropped row here */ + if (r > 1) /* add to last row */ + { + /* accumulate the sum (for MILU) */ + switch (milu) + { + case SMILU_1: + case SMILU_2: + saxpy_(&n, &one, &lusup[xlusup_first + i], &m, + &lusup[xlusup_first + m - 1], &m); + break; + case SMILU_3: + for (j = 0; j < n; j++) + lusup[xlusup_first + (m - 1) + j * m] += + fabs(lusup[xlusup_first + i + j * m]); + break; + case SILU: + default: + break; + } + scopy_(&n, &lusup[xlusup_first + m1], &m, + &lusup[xlusup_first + i], &m); + } /* if (r > 1) */ + else /* move to last row */ + { + sswap_(&n, &lusup[xlusup_first + m1], &m, + &lusup[xlusup_first + i], &m); + if (milu == SMILU_3) + for (j = 0; j < n; j++) { + lusup[xlusup_first + m1 + j * m] = + fabs(lusup[xlusup_first + m1 + j * m]); + } + } + lsub[xlsub_first + i] = lsub[xlsub_first + m1]; + m1--; + continue; + } /* if dropping */ + else + { + if (temp[i] > d_max) d_max = temp[i]; + if (temp[i] < d_min) d_min = temp[i]; + } + i++; + } /* for */ + + /* Secondary dropping: drop more rows according to the quota. */ + quota = ceil((double)quota / (double)n); + if (drop_rule & DROP_SECONDARY && m - r > quota) + { + register double tol = d_max; + + /* Calculate the second dropping tolerance */ + if (quota > n) + { + if (drop_rule & DROP_INTERP) /* by interpolation */ + { + d_max = 1.0 / d_max; d_min = 1.0 / d_min; + tol = 1.0 / (d_max + (d_min - d_max) * quota / (m - n - r)); + } + else /* by quick sort */ + { + register int *itemp = iwork - n; + A = temp; + for (i = n; i <= m1; i++) itemp[i] = i; + qsort(iwork, m1 - n + 1, sizeof(int), _compare_); + tol = temp[iwork[quota]]; + } + } + + for (i = n; i <= m1; ) + { + if (temp[i] <= tol) + { + register int j; + r++; + /* drop the current row and move the last undropped row here */ + if (r > 1) /* add to last row */ + { + /* accumulate the sum (for MILU) */ + switch (milu) + { + case SMILU_1: + case SMILU_2: + saxpy_(&n, &one, &lusup[xlusup_first + i], &m, + &lusup[xlusup_first + m - 1], &m); + break; + case SMILU_3: + for (j = 0; j < n; j++) + lusup[xlusup_first + (m - 1) + j * m] += + fabs(lusup[xlusup_first + i + j * m]); + break; + case SILU: + default: + break; + } + scopy_(&n, &lusup[xlusup_first + m1], &m, + &lusup[xlusup_first + i], &m); + } /* if (r > 1) */ + else /* move to last row */ + { + sswap_(&n, &lusup[xlusup_first + m1], &m, + &lusup[xlusup_first + i], &m); + if (milu == SMILU_3) + for (j = 0; j < n; j++) { + lusup[xlusup_first + m1 + j * m] = + fabs(lusup[xlusup_first + m1 + j * m]); + } + } + lsub[xlsub_first + i] = lsub[xlsub_first + m1]; + m1--; + temp[i] = temp[m1]; + + continue; + } + i++; + + } /* for */ + + } /* if secondary dropping */ + + for (i = n; i < m; i++) temp[i] = 0.0; + + if (r == 0) + { + *nnzLj += m * n; + return 0; + } + + /* add dropped entries to the diagnal */ + if (milu != SILU) + { + register int j; + float t; + for (j = 0; j < n; j++) + { + t = lusup[xlusup_first + (m - 1) + j * m] * MILU_ALPHA; + switch (milu) + { + case SMILU_1: + if (t != none) { + lusup[xlusup_first + j * inc_diag] *= (one + t); + } + else + { + lusup[xlusup_first + j * inc_diag] *= *fill_tol; +#ifdef DEBUG + printf("[1] ZERO PIVOT: FILL col %d.\n", first + j); + fflush(stdout); +#endif + nzp++; + } + break; + case SMILU_2: + lusup[xlusup_first + j * inc_diag] *= (1.0 + fabs(t)); + break; + case SMILU_3: + lusup[xlusup_first + j * inc_diag] *= (one + t); + break; + case SILU: + default: + break; + } + } + if (nzp > 0) *fill_tol = -nzp; + } + + /* Remove dropped entries from the memory and fix the pointers. */ + m1 = m - r; + for (j = 1; j < n; j++) + { + register int tmp1, tmp2; + tmp1 = xlusup_first + j * m1; + tmp2 = xlusup_first + j * m; + for (i = 0; i < m1; i++) + lusup[i + tmp1] = lusup[i + tmp2]; + } + for (i = 0; i < nzlc; i++) + lusup[xlusup_first + i + n * m1] = lusup[xlusup_first + i + n * m]; + for (i = 0; i < nzlc; i++) + lsub[xlsub[last + 1] - r + i] = lsub[xlsub[last + 1] + i]; + for (i = first + 1; i <= last + 1; i++) + { + xlusup[i] -= r * (i - first); + xlsub[i] -= r; + } + if (lastc) + { + xlusup[last + 2] -= r * n; + xlsub[last + 2] -= r; + } + + *nnzLj += (m - r) * n; + return r; +} Copied: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_spanel_dfs.c (from rev 6343, trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spanel_dfs.c) =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_spanel_dfs.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_spanel_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,248 @@ + +/*! @file ilu_spanel_dfs.c + * \brief Peforms a symbolic factorization on a panel of symbols and + * record the entries with maximum absolute value in each column + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + +#include "slu_sdefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ *   Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
+ *
+ *   A supernode representative is the last column of a supernode.
+ *   The nonzeros in U[*,j] are segments that end at supernodal
+ *   representatives.
+ *
+ *   The routine returns one list of the supernodal representatives
+ *   in topological order of the dfs that generates them. This list is
+ *   a superset of the topological order of each individual column within
+ *   the panel.
+ *   The location of the first nonzero in each supernodal segment
+ *   (supernodal entry location) is also returned. Each column has a
+ *   separate list for this purpose.
+ *
+ *   Two marker arrays are used for dfs:
+ *     marker[i] == jj, if i was visited during dfs of current column jj;
+ *     marker1[i] >= jcol, if i was visited by earlier columns in this panel;
+ *
+ *   marker: A-row --> A-row/col (0/1)
+ *   repfnz: SuperA-col --> PA-row
+ *   parent: SuperA-col --> SuperA-col
+ *   xplore: SuperA-col --> index to L-structure
+ * 
+ */ +void +ilu_spanel_dfs( + const int m, /* in - number of rows in the matrix */ + const int w, /* in */ + const int jcol, /* in */ + SuperMatrix *A, /* in - original matrix */ + int *perm_r, /* in */ + int *nseg, /* out */ + float *dense, /* out */ + float *amax, /* out - max. abs. value of each column in panel */ + int *panel_lsub, /* out */ + int *segrep, /* out */ + int *repfnz, /* out */ + int *marker, /* out */ + int *parent, /* working array */ + int *xplore, /* working array */ + GlobalLU_t *Glu /* modified */ +) +{ + + NCPformat *Astore; + float *a; + int *asub; + int *xa_begin, *xa_end; + int krep, chperm, chmark, chrep, oldrep, kchild, myfnz; + int k, krow, kmark, kperm; + int xdfs, maxdfs, kpar; + int jj; /* index through each column in the panel */ + int *marker1; /* marker1[jj] >= jcol if vertex jj was visited + by a previous column within this panel. */ + int *repfnz_col; /* start of each column in the panel */ + float *dense_col; /* start of each column in the panel */ + int nextl_col; /* next available position in panel_lsub[*,jj] */ + int *xsup, *supno; + int *lsub, *xlsub; + float *amax_col; + register double tmp; + + /* Initialize pointers */ + Astore = A->Store; + a = Astore->nzval; + asub = Astore->rowind; + xa_begin = Astore->colbeg; + xa_end = Astore->colend; + marker1 = marker + m; + repfnz_col = repfnz; + dense_col = dense; + amax_col = amax; + *nseg = 0; + xsup = Glu->xsup; + supno = Glu->supno; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + + /* For each column in the panel */ + for (jj = jcol; jj < jcol + w; jj++) { + nextl_col = (jj - jcol) * m; + +#ifdef CHK_DFS + printf("\npanel col %d: ", jj); +#endif + + *amax_col = 0.0; + /* For each nonz in A[*,jj] do dfs */ + for (k = xa_begin[jj]; k < xa_end[jj]; k++) { + krow = asub[k]; + tmp = fabs(a[k]); + if (tmp > *amax_col) *amax_col = tmp; + dense_col[krow] = a[k]; + kmark = marker[krow]; + if ( kmark == jj ) + continue; /* krow visited before, go to the next nonzero */ + + /* For each unmarked nbr krow of jj + * krow is in L: place it in structure of L[*,jj] + */ + marker[krow] = jj; + kperm = perm_r[krow]; + + if ( kperm == EMPTY ) { + panel_lsub[nextl_col++] = krow; /* krow is indexed into A */ + } + /* + * krow is in U: if its supernode-rep krep + * has been explored, update repfnz[*] + */ + else { + + krep = xsup[supno[kperm]+1] - 1; + myfnz = repfnz_col[krep]; + +#ifdef CHK_DFS + printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, kperm); +#endif + if ( myfnz != EMPTY ) { /* Representative visited before */ + if ( myfnz > kperm ) repfnz_col[krep] = kperm; + /* continue; */ + } + else { + /* Otherwise, perform dfs starting at krep */ + oldrep = EMPTY; + parent[krep] = oldrep; + repfnz_col[krep] = kperm; + xdfs = xlsub[xsup[supno[krep]]]; + maxdfs = xlsub[krep + 1]; + +#ifdef CHK_DFS + printf(" xdfs %d, maxdfs %d: ", xdfs, maxdfs); + for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]); + printf("\n"); +#endif + do { + /* + * For each unmarked kchild of krep + */ + while ( xdfs < maxdfs ) { + + kchild = lsub[xdfs]; + xdfs++; + chmark = marker[kchild]; + + if ( chmark != jj ) { /* Not reached yet */ + marker[kchild] = jj; + chperm = perm_r[kchild]; + + /* Case kchild is in L: place it in L[*,j] */ + if ( chperm == EMPTY ) { + panel_lsub[nextl_col++] = kchild; + } + /* Case kchild is in U: + * chrep = its supernode-rep. If its rep has + * been explored, update its repfnz[*] + */ + else { + + chrep = xsup[supno[chperm]+1] - 1; + myfnz = repfnz_col[chrep]; +#ifdef CHK_DFS + printf("chrep %d,myfnz %d,perm_r[%d] %d\n",chrep,myfnz,kchild,chperm); +#endif + if ( myfnz != EMPTY ) { /* Visited before */ + if ( myfnz > chperm ) + repfnz_col[chrep] = chperm; + } + else { + /* Cont. dfs at snode-rep of kchild */ + xplore[krep] = xdfs; + oldrep = krep; + krep = chrep; /* Go deeper down G(L) */ + parent[krep] = oldrep; + repfnz_col[krep] = chperm; + xdfs = xlsub[xsup[supno[krep]]]; + maxdfs = xlsub[krep + 1]; +#ifdef CHK_DFS + printf(" xdfs %d, maxdfs %d: ", xdfs, maxdfs); + for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]); + printf("\n"); +#endif + } /* else */ + + } /* else */ + + } /* if... */ + + } /* while xdfs < maxdfs */ + + /* krow has no more unexplored nbrs: + * Place snode-rep krep in postorder DFS, if this + * segment is seen for the first time. (Note that + * "repfnz[krep]" may change later.) + * Backtrack dfs to its parent. + */ + if ( marker1[krep] < jcol ) { + segrep[*nseg] = krep; + ++(*nseg); + marker1[krep] = jj; + } + + kpar = parent[krep]; /* Pop stack, mimic recursion */ + if ( kpar == EMPTY ) break; /* dfs done */ + krep = kpar; + xdfs = xplore[krep]; + maxdfs = xlsub[krep + 1]; + +#ifdef CHK_DFS + printf(" pop stack: krep %d,xdfs %d,maxdfs %d: ", krep,xdfs,maxdfs); + for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]); + printf("\n"); +#endif + } while ( kpar != EMPTY ); /* do-while - until empty stack */ + + } /* else */ + + } /* else */ + + } /* for each nonz in A[*,jj] */ + + repfnz_col += m; /* Move to next column */ + dense_col += m; + amax_col++; + + } /* for jj ... */ + +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_spivotL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_spivotL.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_spivotL.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,266 @@ + +/*! @file ilu_spivotL.c + * \brief Performs numerical pivoting + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + + +#include +#include +#include "slu_sdefs.h" + +#ifndef SGN +#define SGN(x) ((x)>=0?1:-1) +#endif + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *   Performs the numerical pivoting on the current column of L,
+ *   and the CDIV operation.
+ *
+ *   Pivot policy:
+ *   (1) Compute thresh = u * max_(i>=j) abs(A_ij);
+ *   (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
+ *	     pivot row = k;
+ *	 ELSE IF abs(A_jj) >= thresh THEN
+ *	     pivot row = j;
+ *	 ELSE
+ *	     pivot row = m;
+ *
+ *   Note: If you absolutely want to use a given pivot order, then set u=0.0.
+ *
+ *   Return value: 0	  success;
+ *		   i > 0  U(i,i) is exactly zero.
+ * 
+ */ + +int +ilu_spivotL( + const int jcol, /* in */ + const double u, /* in - diagonal pivoting threshold */ + int *usepr, /* re-use the pivot sequence given by + * perm_r/iperm_r */ + int *perm_r, /* may be modified */ + int diagind, /* diagonal of Pc*A*Pc' */ + int *swap, /* in/out record the row permutation */ + int *iswap, /* in/out inverse of swap, it is the same as + perm_r after the factorization */ + int *marker, /* in */ + int *pivrow, /* in/out, as an input if *usepr!=0 */ + double fill_tol, /* in - fill tolerance of current column + * used for a singular column */ + milu_t milu, /* in */ + float drop_sum, /* in - computed in ilu_scopy_to_ucol() + (MILU only) */ + GlobalLU_t *Glu, /* modified - global LU data structures */ + SuperLUStat_t *stat /* output */ + ) +{ + + int n; /* number of columns */ + int fsupc; /* first column in the supernode */ + int nsupc; /* no of columns in the supernode */ + int nsupr; /* no of rows in the supernode */ + int lptr; /* points to the starting subscript of the supernode */ + register int pivptr; + int old_pivptr, diag, ptr0; + register float pivmax, rtemp; + float thresh; + float temp; + float *lu_sup_ptr; + float *lu_col_ptr; + int *lsub_ptr; + register int isub, icol, k, itemp; + int *lsub, *xlsub; + float *lusup; + int *xlusup; + flops_t *ops = stat->ops; + int info; + + /* Initialize pointers */ + n = Glu->n; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + lusup = Glu->lusup; + xlusup = Glu->xlusup; + fsupc = (Glu->xsup)[(Glu->supno)[jcol]]; + nsupc = jcol - fsupc; /* excluding jcol; nsupc >= 0 */ + lptr = xlsub[fsupc]; + nsupr = xlsub[fsupc+1] - lptr; + lu_sup_ptr = &lusup[xlusup[fsupc]]; /* start of the current supernode */ + lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */ + lsub_ptr = &lsub[lptr]; /* start of row indices of the supernode */ + + /* Determine the largest abs numerical value for partial pivoting; + Also search for user-specified pivot, and diagonal element. */ + pivmax = -1.0; + pivptr = nsupc; + diag = EMPTY; + old_pivptr = nsupc; + ptr0 = EMPTY; + for (isub = nsupc; isub < nsupr; ++isub) { + if (marker[lsub_ptr[isub]] > jcol) + continue; /* do not overlap with a later relaxed supernode */ + + switch (milu) { + case SMILU_1: + rtemp = fabs(lu_col_ptr[isub] + drop_sum); + break; + case SMILU_2: + case SMILU_3: + /* In this case, drop_sum contains the sum of the abs. value */ + rtemp = fabs(lu_col_ptr[isub]); + break; + case SILU: + default: + rtemp = fabs(lu_col_ptr[isub]); + break; + } + if (rtemp > pivmax) { pivmax = rtemp; pivptr = isub; } + if (*usepr && lsub_ptr[isub] == *pivrow) old_pivptr = isub; + if (lsub_ptr[isub] == diagind) diag = isub; + if (ptr0 == EMPTY) ptr0 = isub; + } + + if (milu == SMILU_2 || milu == SMILU_3) pivmax += drop_sum; + + /* Test for singularity */ + if (pivmax < 0.0) { + fprintf(stderr, "[0]: jcol=%d, SINGULAR!!!\n", jcol); + fflush(stderr); + exit(1); + } + if ( pivmax == 0.0 ) { + if (diag != EMPTY) + *pivrow = lsub_ptr[pivptr = diag]; + else if (ptr0 != EMPTY) + *pivrow = lsub_ptr[pivptr = ptr0]; + else { + /* look for the first row which does not + belong to any later supernodes */ + for (icol = jcol; icol < n; icol++) + if (marker[swap[icol]] <= jcol) break; + if (icol >= n) { + fprintf(stderr, "[1]: jcol=%d, SINGULAR!!!\n", jcol); + fflush(stderr); + exit(1); + } + + *pivrow = swap[icol]; + + /* pick up the pivot row */ + for (isub = nsupc; isub < nsupr; ++isub) + if ( lsub_ptr[isub] == *pivrow ) { pivptr = isub; break; } + } + pivmax = fill_tol; + lu_col_ptr[pivptr] = pivmax; + *usepr = 0; +#ifdef DEBUG + printf("[0] ZERO PIVOT: FILL (%d, %d).\n", *pivrow, jcol); + fflush(stdout); +#endif + info =jcol + 1; + } /* if (*pivrow == 0.0) */ + else { + thresh = u * pivmax; + + /* Choose appropriate pivotal element by our policy. */ + if ( *usepr ) { + switch (milu) { + case SMILU_1: + rtemp = fabs(lu_col_ptr[old_pivptr] + drop_sum); + break; + case SMILU_2: + case SMILU_3: + rtemp = fabs(lu_col_ptr[old_pivptr]) + drop_sum; + break; + case SILU: + default: + rtemp = fabs(lu_col_ptr[old_pivptr]); + break; + } + if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = old_pivptr; + else *usepr = 0; + } + if ( *usepr == 0 ) { + /* Use diagonal pivot? */ + if ( diag >= 0 ) { /* diagonal exists */ + switch (milu) { + case SMILU_1: + rtemp = fabs(lu_col_ptr[diag] + drop_sum); + break; + case SMILU_2: + case SMILU_3: + rtemp = fabs(lu_col_ptr[diag]) + drop_sum; + break; + case SILU: + default: + rtemp = fabs(lu_col_ptr[diag]); + break; + } + if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag; + } + *pivrow = lsub_ptr[pivptr]; + } + info = 0; + + /* Reset the diagonal */ + switch (milu) { + case SMILU_1: + lu_col_ptr[pivptr] += drop_sum; + break; + case SMILU_2: + case SMILU_3: + lu_col_ptr[pivptr] += SGN(lu_col_ptr[pivptr]) * drop_sum; + break; + case SILU: + default: + break; + } + + } /* else */ + + /* Record pivot row */ + perm_r[*pivrow] = jcol; + if (jcol < n - 1) { + register int t1, t2, t; + t1 = iswap[*pivrow]; t2 = jcol; + if (t1 != t2) { + t = swap[t1]; swap[t1] = swap[t2]; swap[t2] = t; + t1 = swap[t1]; t2 = t; + t = iswap[t1]; iswap[t1] = iswap[t2]; iswap[t2] = t; + } + } /* if (jcol < n - 1) */ + + /* Interchange row subscripts */ + if ( pivptr != nsupc ) { + itemp = lsub_ptr[pivptr]; + lsub_ptr[pivptr] = lsub_ptr[nsupc]; + lsub_ptr[nsupc] = itemp; + + /* Interchange numerical values as well, for the whole snode, such + * that L is indexed the same way as A. + */ + for (icol = 0; icol <= nsupc; icol++) { + itemp = pivptr + icol * nsupr; + temp = lu_sup_ptr[itemp]; + lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr]; + lu_sup_ptr[nsupc + icol*nsupr] = temp; + } + } /* if */ + + /* cdiv operation */ + ops[FACT] += nsupr - nsupc; + temp = 1.0 / lu_col_ptr[nsupc]; + for (k = nsupc+1; k < nsupr; k++) lu_col_ptr[k] *= temp; + + return info; +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ssnode_dfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ssnode_dfs.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ssnode_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,90 @@ + +/*! @file ilu_ssnode_dfs.c + * \brief Determines the union of row structures of columns within the relaxed node + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + +#include "slu_sdefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *    ilu_ssnode_dfs() - Determine the union of the row structures of those
+ *    columns within the relaxed snode.
+ *    Note: The relaxed snodes are leaves of the supernodal etree, therefore,
+ *    the portion outside the rectangular supernode must be zero.
+ *
+ * Return value
+ * ============
+ *     0   success;
+ *    >0   number of bytes allocated when run out of memory.
+ * 
+ */ + +int +ilu_ssnode_dfs( + const int jcol, /* in - start of the supernode */ + const int kcol, /* in - end of the supernode */ + const int *asub, /* in */ + const int *xa_begin, /* in */ + const int *xa_end, /* in */ + int *marker, /* modified */ + GlobalLU_t *Glu /* modified */ + ) +{ + + register int i, k, nextl; + int nsuper, krow, kmark, mem_error; + int *xsup, *supno; + int *lsub, *xlsub; + int nzlmax; + + xsup = Glu->xsup; + supno = Glu->supno; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + nzlmax = Glu->nzlmax; + + nsuper = ++supno[jcol]; /* Next available supernode number */ + nextl = xlsub[jcol]; + + for (i = jcol; i <= kcol; i++) + { + /* For each nonzero in A[*,i] */ + for (k = xa_begin[i]; k < xa_end[i]; k++) + { + krow = asub[k]; + kmark = marker[krow]; + if ( kmark != kcol ) + { /* First time visit krow */ + marker[krow] = kcol; + lsub[nextl++] = krow; + if ( nextl >= nzlmax ) + { + if ( (mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, + Glu)) != 0) + return (mem_error); + lsub = Glu->lsub; + } + } + } + supno[i] = nsuper; + } + + /* Supernode > 1 */ + if ( jcol < kcol ) + for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl; + + xsup[nsuper+1] = kcol + 1; + supno[kcol+1] = nsuper; + xlsub[kcol+1] = nextl; + + return 0; +} Copied: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zcolumn_dfs.c (from rev 6343, trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scolumn_dfs.c) =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zcolumn_dfs.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zcolumn_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,258 @@ + +/*! @file ilu_zcolumn_dfs.c + * \brief Performs a symbolic factorization + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+*/ + +#include "slu_zdefs.h" + + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *   ILU_ZCOLUMN_DFS performs a symbolic factorization on column jcol, and
+ *   decide the supernode boundary.
+ *
+ *   This routine does not use numeric values, but only use the RHS
+ *   row indices to start the dfs.
+ *
+ *   A supernode representative is the last column of a supernode.
+ *   The nonzeros in U[*,j] are segments that end at supernodal
+ *   representatives. The routine returns a list of such supernodal
+ *   representatives in topological order of the dfs that generates them.
+ *   The location of the first nonzero in each such supernodal segment
+ *   (supernodal entry location) is also returned.
+ *
+ * Local parameters
+ * ================
+ *   nseg: no of segments in current U[*,j]
+ *   jsuper: jsuper=EMPTY if column j does not belong to the same
+ *	supernode as j-1. Otherwise, jsuper=nsuper.
+ *
+ *   marker2: A-row --> A-row/col (0/1)
+ *   repfnz: SuperA-col --> PA-row
+ *   parent: SuperA-col --> SuperA-col
+ *   xplore: SuperA-col --> index to L-structure
+ *
+ * Return value
+ * ============
+ *     0  success;
+ *   > 0  number of bytes allocated when run out of space.
+ * 
+ */ +int +ilu_zcolumn_dfs( + const int m, /* in - number of rows in the matrix */ + const int jcol, /* in */ + int *perm_r, /* in */ + int *nseg, /* modified - with new segments appended */ + int *lsub_col, /* in - defines the RHS vector to start the + dfs */ + int *segrep, /* modified - with new segments appended */ + int *repfnz, /* modified */ + int *marker, /* modified */ + int *parent, /* working array */ + int *xplore, /* working array */ + GlobalLU_t *Glu /* modified */ + ) +{ + + int jcolp1, jcolm1, jsuper, nsuper, nextl; + int k, krep, krow, kmark, kperm; + int *marker2; /* Used for small panel LU */ + int fsupc; /* First column of a snode */ + int myfnz; /* First nonz column of a U-segment */ + int chperm, chmark, chrep, kchild; + int xdfs, maxdfs, kpar, oldrep; + int jptr, jm1ptr; + int ito, ifrom; /* Used to compress row subscripts */ + int mem_error; + int *xsup, *supno, *lsub, *xlsub; + int nzlmax; + static int first = 1, maxsuper; + + xsup = Glu->xsup; + supno = Glu->supno; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + nzlmax = Glu->nzlmax; + + if ( first ) { + maxsuper = sp_ienv(3); + first = 0; + } + jcolp1 = jcol + 1; + jcolm1 = jcol - 1; + nsuper = supno[jcol]; + jsuper = nsuper; + nextl = xlsub[jcol]; + marker2 = &marker[2*m]; + + + /* For each nonzero in A[*,jcol] do dfs */ + for (k = 0; lsub_col[k] != EMPTY; k++) { + + krow = lsub_col[k]; + lsub_col[k] = EMPTY; + kmark = marker2[krow]; + + /* krow was visited before, go to the next nonzero */ + if ( kmark == jcol ) continue; + + /* For each unmarked nbr krow of jcol + * krow is in L: place it in structure of L[*,jcol] + */ + marker2[krow] = jcol; + kperm = perm_r[krow]; + + if ( kperm == EMPTY ) { + lsub[nextl++] = krow; /* krow is indexed into A */ + if ( nextl >= nzlmax ) { + if ((mem_error = zLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu))) + return (mem_error); + lsub = Glu->lsub; + } + if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing */ + } else { + /* krow is in U: if its supernode-rep krep + * has been explored, update repfnz[*] + */ + krep = xsup[supno[kperm]+1] - 1; + myfnz = repfnz[krep]; + + if ( myfnz != EMPTY ) { /* Visited before */ + if ( myfnz > kperm ) repfnz[krep] = kperm; + /* continue; */ + } + else { + /* Otherwise, perform dfs starting at krep */ + oldrep = EMPTY; + parent[krep] = oldrep; + repfnz[krep] = kperm; + xdfs = xlsub[xsup[supno[krep]]]; + maxdfs = xlsub[krep + 1]; + + do { + /* + * For each unmarked kchild of krep + */ + while ( xdfs < maxdfs ) { + + kchild = lsub[xdfs]; + xdfs++; + chmark = marker2[kchild]; + + if ( chmark != jcol ) { /* Not reached yet */ + marker2[kchild] = jcol; + chperm = perm_r[kchild]; + + /* Case kchild is in L: place it in L[*,k] */ + if ( chperm == EMPTY ) { + lsub[nextl++] = kchild; + if ( nextl >= nzlmax ) { + if ( (mem_error = zLUMemXpand(jcol,nextl, + LSUB,&nzlmax,Glu)) ) + return (mem_error); + lsub = Glu->lsub; + } + if ( chmark != jcolm1 ) jsuper = EMPTY; + } else { + /* Case kchild is in U: + * chrep = its supernode-rep. If its rep has + * been explored, update its repfnz[*] + */ + chrep = xsup[supno[chperm]+1] - 1; + myfnz = repfnz[chrep]; + if ( myfnz != EMPTY ) { /* Visited before */ + if ( myfnz > chperm ) + repfnz[chrep] = chperm; + } else { + /* Continue dfs at super-rep of kchild */ + xplore[krep] = xdfs; + oldrep = krep; + krep = chrep; /* Go deeper down G(L^t) */ + parent[krep] = oldrep; + repfnz[krep] = chperm; + xdfs = xlsub[xsup[supno[krep]]]; + maxdfs = xlsub[krep + 1]; + } /* else */ + + } /* else */ + + } /* if */ + + } /* while */ + + /* krow has no more unexplored nbrs; + * place supernode-rep krep in postorder DFS. + * backtrack dfs to its parent + */ + segrep[*nseg] = krep; + ++(*nseg); + kpar = parent[krep]; /* Pop from stack, mimic recursion */ + if ( kpar == EMPTY ) break; /* dfs done */ + krep = kpar; + xdfs = xplore[krep]; + maxdfs = xlsub[krep + 1]; + + } while ( kpar != EMPTY ); /* Until empty stack */ + + } /* else */ + + } /* else */ + + } /* for each nonzero ... */ + + /* Check to see if j belongs in the same supernode as j-1 */ + if ( jcol == 0 ) { /* Do nothing for column 0 */ + nsuper = supno[0] = 0; + } else { + fsupc = xsup[nsuper]; + jptr = xlsub[jcol]; /* Not compressed yet */ + jm1ptr = xlsub[jcolm1]; + + if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY; + + /* Always start a new supernode for a singular column */ + if ( nextl == jptr ) jsuper = EMPTY; + + /* Make sure the number of columns in a supernode doesn't + exceed threshold. */ + if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY; + + /* If jcol starts a new supernode, reclaim storage space in + * lsub from the previous supernode. Note we only store + * the subscript set of the first columns of the supernode. + */ + if ( jsuper == EMPTY ) { /* starts a new supernode */ + if ( (fsupc < jcolm1) ) { /* >= 2 columns in nsuper */ +#ifdef CHK_COMPRESS + printf(" Compress lsub[] at super %d-%d\n", fsupc, jcolm1); +#endif + ito = xlsub[fsupc+1]; + xlsub[jcolm1] = ito; + xlsub[jcol] = ito; + for (ifrom = jptr; ifrom < nextl; ++ifrom, ++ito) + lsub[ito] = lsub[ifrom]; + nextl = ito; + } + nsuper++; + supno[jcol] = nsuper; + } /* if a new supernode */ + + } /* else: jcol > 0 */ + + /* Tidy up the pointers before exit */ + xsup[nsuper+1] = jcolp1; + supno[jcolp1] = nsuper; + xlsub[jcolp1] = nextl; + + return 0; +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zcopy_to_ucol.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zcopy_to_ucol.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zcopy_to_ucol.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,202 @@ + +/*! @file ilu_zcopy_to_ucol.c + * \brief Copy a computed column of U to the compressed data structure + * and drop some small entries + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + +#include "slu_zdefs.h" + +#ifdef DEBUG +int num_drop_U; +#endif + +static doublecomplex *A; /* used in _compare_ only */ +static int _compare_(const void *a, const void *b) +{ + register int *x = (int *)a, *y = (int *)b; + register double xx = z_abs1(&A[*x]), yy = z_abs1(&A[*y]); + if (xx > yy) return -1; + else if (xx < yy) return 1; + else return 0; +} + + +int +ilu_zcopy_to_ucol( + int jcol, /* in */ + int nseg, /* in */ + int *segrep, /* in */ + int *repfnz, /* in */ + int *perm_r, /* in */ + doublecomplex *dense, /* modified - reset to zero on return */ + int drop_rule,/* in */ + milu_t milu, /* in */ + double drop_tol, /* in */ + int quota, /* maximum nonzero entries allowed */ + doublecomplex *sum, /* out - the sum of dropped entries */ + int *nnzUj, /* in - out */ + GlobalLU_t *Glu, /* modified */ + int *work /* working space with minimum size n, + * used by the second dropping rule */ + ) +{ +/* + * Gather from SPA dense[*] to global ucol[*]. + */ + int ksub, krep, ksupno; + int i, k, kfnz, segsze; + int fsupc, isub, irow; + int jsupno, nextu; + int new_next, mem_error; + int *xsup, *supno; + int *lsub, *xlsub; + doublecomplex *ucol; + int *usub, *xusub; + int nzumax; + int m; /* number of entries in the nonzero U-segments */ + register double d_max = 0.0, d_min = 1.0 / dlamch_("Safe minimum"); + register double tmp; + doublecomplex zero = {0.0, 0.0}; + + xsup = Glu->xsup; + supno = Glu->supno; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + ucol = Glu->ucol; + usub = Glu->usub; + xusub = Glu->xusub; + nzumax = Glu->nzumax; + + *sum = zero; + if (drop_rule == NODROP) { + drop_tol = -1.0, quota = Glu->n; + } + + jsupno = supno[jcol]; + nextu = xusub[jcol]; + k = nseg - 1; + for (ksub = 0; ksub < nseg; ksub++) { + krep = segrep[k--]; + ksupno = supno[krep]; + + if ( ksupno != jsupno ) { /* Should go into ucol[] */ + kfnz = repfnz[krep]; + if ( kfnz != EMPTY ) { /* Nonzero U-segment */ + + fsupc = xsup[ksupno]; + isub = xlsub[fsupc] + kfnz - fsupc; + segsze = krep - kfnz + 1; + + new_next = nextu + segsze; + while ( new_next > nzumax ) { + if ((mem_error = zLUMemXpand(jcol, nextu, UCOL, &nzumax, + Glu)) != 0) + return (mem_error); + ucol = Glu->ucol; + if ((mem_error = zLUMemXpand(jcol, nextu, USUB, &nzumax, + Glu)) != 0) + return (mem_error); + usub = Glu->usub; + lsub = Glu->lsub; + } + + for (i = 0; i < segsze; i++) { + irow = lsub[isub++]; + tmp = z_abs1(&dense[irow]); + + /* first dropping rule */ + if (quota > 0 && tmp >= drop_tol) { + if (tmp > d_max) d_max = tmp; + if (tmp < d_min) d_min = tmp; + usub[nextu] = perm_r[irow]; + ucol[nextu] = dense[irow]; + nextu++; + } else { + switch (milu) { + case SMILU_1: + case SMILU_2: + z_add(sum, sum, &dense[irow]); + break; + case SMILU_3: + /* *sum += fabs(dense[irow]);*/ + sum->r += tmp; + break; + case SILU: + default: + break; + } +#ifdef DEBUG + num_drop_U++; +#endif + } + dense[irow] = zero; + } + + } + + } + + } /* for each segment... */ + + xusub[jcol + 1] = nextu; /* Close U[*,jcol] */ + m = xusub[jcol + 1] - xusub[jcol]; + + /* second dropping rule */ + if (drop_rule & DROP_SECONDARY && m > quota) { + register double tol = d_max; + register int m0 = xusub[jcol] + m - 1; + + if (quota > 0) { + if (drop_rule & DROP_INTERP) { + d_max = 1.0 / d_max; d_min = 1.0 / d_min; + tol = 1.0 / (d_max + (d_min - d_max) * quota / m); + } else { + A = &ucol[xusub[jcol]]; + for (i = 0; i < m; i++) work[i] = i; + qsort(work, m, sizeof(int), _compare_); + tol = fabs(usub[xusub[jcol] + work[quota]]); + } + } + for (i = xusub[jcol]; i <= m0; ) { + if (z_abs1(&ucol[i]) <= tol) { + switch (milu) { + case SMILU_1: + case SMILU_2: + z_add(sum, sum, &ucol[i]); + break; + case SMILU_3: + sum->r += tmp; + break; + case SILU: + default: + break; + } + ucol[i] = ucol[m0]; + usub[i] = usub[m0]; + m0--; + m--; +#ifdef DEBUG + num_drop_U++; +#endif + xusub[jcol + 1]--; + continue; + } + i++; + } + } + + if (milu == SMILU_2) { + sum->r = z_abs1(sum); sum->i = 0.0; + } + if (milu == SMILU_3) sum->i = 0.0; + + *nnzUj += m; + + return 0; +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zdrop_row.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zdrop_row.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zdrop_row.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,321 @@ + +/*! @file ilu_zdrop_row.c + * \brief Drop small rows from L + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * <\pre>
+ */
+
+#include 
+#include 
+#include "slu_zdefs.h"
+
+extern void zswap_(int *, doublecomplex [], int *, doublecomplex [], int *);
+extern void zaxpy_(int *, doublecomplex *, doublecomplex [], int *, doublecomplex [], int *);
+
+static double *A;  /* used in _compare_ only */
+static int _compare_(const void *a, const void *b)
+{
+    register int *x = (int *)a, *y = (int *)b;
+    if (A[*x] - A[*y] > 0.0) return -1;
+    else if (A[*x] - A[*y] < 0.0) return 1;
+    else return 0;
+}
+
+/*! \brief
+ * 
+ * Purpose
+ * =======
+ *    ilu_zdrop_row() - Drop some small rows from the previous 
+ *    supernode (L-part only).
+ * 
+ */ +int ilu_zdrop_row( + superlu_options_t *options, /* options */ + int first, /* index of the first column in the supernode */ + int last, /* index of the last column in the supernode */ + double drop_tol, /* dropping parameter */ + int quota, /* maximum nonzero entries allowed */ + int *nnzLj, /* in/out number of nonzeros in L(:, 1:last) */ + double *fill_tol, /* in/out - on exit, fill_tol=-num_zero_pivots, + * does not change if options->ILU_MILU != SMILU1 */ + GlobalLU_t *Glu, /* modified */ + double dwork[], /* working space with minimum size last-first+1 */ + int iwork[], /* working space with minimum size m - n, + * used by the second dropping rule */ + int lastc /* if lastc == 0, there is nothing after the + * working supernode [first:last]; + * if lastc == 1, there is one more column after + * the working supernode. */ ) +{ + register int i, j, k, m1; + register int nzlc; /* number of nonzeros in column last+1 */ + register int xlusup_first, xlsub_first; + int m, n; /* m x n is the size of the supernode */ + int r = 0; /* number of dropped rows */ + register double *temp; + register doublecomplex *lusup = Glu->lusup; + register int *lsub = Glu->lsub; + register int *xlsub = Glu->xlsub; + register int *xlusup = Glu->xlusup; + register double d_max = 0.0, d_min = 1.0; + int drop_rule = options->ILU_DropRule; + milu_t milu = options->ILU_MILU; + norm_t nrm = options->ILU_Norm; + doublecomplex zero = {0.0, 0.0}; + doublecomplex one = {1.0, 0.0}; + doublecomplex none = {-1.0, 0.0}; + int inc_diag; /* inc_diag = m + 1 */ + int nzp = 0; /* number of zero pivots */ + + xlusup_first = xlusup[first]; + xlsub_first = xlsub[first]; + m = xlusup[first + 1] - xlusup_first; + n = last - first + 1; + m1 = m - 1; + inc_diag = m + 1; + nzlc = lastc ? (xlusup[last + 2] - xlusup[last + 1]) : 0; + temp = dwork - n; + + /* Quick return if nothing to do. */ + if (m == 0 || m == n || drop_rule == NODROP) + { + *nnzLj += m * n; + return 0; + } + + /* basic dropping: ILU(tau) */ + for (i = n; i <= m1; ) + { + /* the average abs value of ith row */ + switch (nrm) + { + case ONE_NORM: + temp[i] = dzasum_(&n, &lusup[xlusup_first + i], &m) / (double)n; + break; + case TWO_NORM: + temp[i] = dznrm2_(&n, &lusup[xlusup_first + i], &m) + / sqrt((double)n); + break; + case INF_NORM: + default: + k = izamax_(&n, &lusup[xlusup_first + i], &m) - 1; + temp[i] = z_abs1(&lusup[xlusup_first + i + m * k]); + break; + } + + /* drop small entries due to drop_tol */ + if (drop_rule & DROP_BASIC && temp[i] < drop_tol) + { + r++; + /* drop the current row and move the last undropped row here */ + if (r > 1) /* add to last row */ + { + /* accumulate the sum (for MILU) */ + switch (milu) + { + case SMILU_1: + case SMILU_2: + zaxpy_(&n, &one, &lusup[xlusup_first + i], &m, + &lusup[xlusup_first + m - 1], &m); + break; + case SMILU_3: + for (j = 0; j < n; j++) + lusup[xlusup_first + (m - 1) + j * m].r += + z_abs1(&lusup[xlusup_first + i + j * m]); + break; + case SILU: + default: + break; + } + zcopy_(&n, &lusup[xlusup_first + m1], &m, + &lusup[xlusup_first + i], &m); + } /* if (r > 1) */ + else /* move to last row */ + { + zswap_(&n, &lusup[xlusup_first + m1], &m, + &lusup[xlusup_first + i], &m); + if (milu == SMILU_3) + for (j = 0; j < n; j++) { + lusup[xlusup_first + m1 + j * m].r = + z_abs1(&lusup[xlusup_first + m1 + j * m]); + lusup[xlusup_first + m1 + j * m].i = 0.0; + } + } + lsub[xlsub_first + i] = lsub[xlsub_first + m1]; + m1--; + continue; + } /* if dropping */ + else + { + if (temp[i] > d_max) d_max = temp[i]; + if (temp[i] < d_min) d_min = temp[i]; + } + i++; + } /* for */ + + /* Secondary dropping: drop more rows according to the quota. */ + quota = ceil((double)quota / (double)n); + if (drop_rule & DROP_SECONDARY && m - r > quota) + { + register double tol = d_max; + + /* Calculate the second dropping tolerance */ + if (quota > n) + { + if (drop_rule & DROP_INTERP) /* by interpolation */ + { + d_max = 1.0 / d_max; d_min = 1.0 / d_min; + tol = 1.0 / (d_max + (d_min - d_max) * quota / (m - n - r)); + } + else /* by quick sort */ + { + register int *itemp = iwork - n; + A = temp; + for (i = n; i <= m1; i++) itemp[i] = i; + qsort(iwork, m1 - n + 1, sizeof(int), _compare_); + tol = temp[iwork[quota]]; + } + } + + for (i = n; i <= m1; ) + { + if (temp[i] <= tol) + { + register int j; + r++; + /* drop the current row and move the last undropped row here */ + if (r > 1) /* add to last row */ + { + /* accumulate the sum (for MILU) */ + switch (milu) + { + case SMILU_1: + case SMILU_2: + zaxpy_(&n, &one, &lusup[xlusup_first + i], &m, + &lusup[xlusup_first + m - 1], &m); + break; + case SMILU_3: + for (j = 0; j < n; j++) + lusup[xlusup_first + (m - 1) + j * m].r += + z_abs1(&lusup[xlusup_first + i + j * m]); + break; + case SILU: + default: + break; + } + zcopy_(&n, &lusup[xlusup_first + m1], &m, + &lusup[xlusup_first + i], &m); + } /* if (r > 1) */ + else /* move to last row */ + { + zswap_(&n, &lusup[xlusup_first + m1], &m, + &lusup[xlusup_first + i], &m); + if (milu == SMILU_3) + for (j = 0; j < n; j++) { + lusup[xlusup_first + m1 + j * m].r = + z_abs1(&lusup[xlusup_first + m1 + j * m]); + lusup[xlusup_first + m1 + j * m].i = 0.0; + } + } + lsub[xlsub_first + i] = lsub[xlsub_first + m1]; + m1--; + temp[i] = temp[m1]; + + continue; + } + i++; + + } /* for */ + + } /* if secondary dropping */ + + for (i = n; i < m; i++) temp[i] = 0.0; + + if (r == 0) + { + *nnzLj += m * n; + return 0; + } + + /* add dropped entries to the diagnal */ + if (milu != SILU) + { + register int j; + doublecomplex t; + for (j = 0; j < n; j++) + { + zd_mult(&t, &lusup[xlusup_first + (m - 1) + j * m], + MILU_ALPHA); + switch (milu) + { + case SMILU_1: + if ( !(z_eq(&t, &none)) ) { + z_add(&t, &t, &one); + zz_mult(&lusup[xlusup_first + j * inc_diag], + &lusup[xlusup_first + j * inc_diag], + &t); + } + else + { + zd_mult( + &lusup[xlusup_first + j * inc_diag], + &lusup[xlusup_first + j * inc_diag], + *fill_tol); +#ifdef DEBUG + printf("[1] ZERO PIVOT: FILL col %d.\n", first + j); + fflush(stdout); +#endif + nzp++; + } + break; + case SMILU_2: + zd_mult(&lusup[xlusup_first + j * inc_diag], + &lusup[xlusup_first + j * inc_diag], + 1.0 + z_abs1(&t)); + break; + case SMILU_3: + z_add(&t, &t, &one); + zz_mult(&lusup[xlusup_first + j * inc_diag], + &lusup[xlusup_first + j * inc_diag], + &t); + break; + case SILU: + default: + break; + } + } + if (nzp > 0) *fill_tol = -nzp; + } + + /* Remove dropped entries from the memory and fix the pointers. */ + m1 = m - r; + for (j = 1; j < n; j++) + { + register int tmp1, tmp2; + tmp1 = xlusup_first + j * m1; + tmp2 = xlusup_first + j * m; + for (i = 0; i < m1; i++) + lusup[i + tmp1] = lusup[i + tmp2]; + } + for (i = 0; i < nzlc; i++) + lusup[xlusup_first + i + n * m1] = lusup[xlusup_first + i + n * m]; + for (i = 0; i < nzlc; i++) + lsub[xlsub[last + 1] - r + i] = lsub[xlsub[last + 1] + i]; + for (i = first + 1; i <= last + 1; i++) + { + xlusup[i] -= r * (i - first); + xlsub[i] -= r; + } + if (lastc) + { + xlusup[last + 2] -= r * n; + xlsub[last + 2] -= r; + } + + *nnzLj += (m - r) * n; + return r; +} Copied: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zpanel_dfs.c (from rev 6343, trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpanel_dfs.c) =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zpanel_dfs.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zpanel_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,248 @@ + +/*! @file ilu_zpanel_dfs.c + * \brief Peforms a symbolic factorization on a panel of symbols and + * record the entries with maximum absolute value in each column + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + +#include "slu_zdefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ *   Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
+ *
+ *   A supernode representative is the last column of a supernode.
+ *   The nonzeros in U[*,j] are segments that end at supernodal
+ *   representatives.
+ *
+ *   The routine returns one list of the supernodal representatives
+ *   in topological order of the dfs that generates them. This list is
+ *   a superset of the topological order of each individual column within
+ *   the panel.
+ *   The location of the first nonzero in each supernodal segment
+ *   (supernodal entry location) is also returned. Each column has a
+ *   separate list for this purpose.
+ *
+ *   Two marker arrays are used for dfs:
+ *     marker[i] == jj, if i was visited during dfs of current column jj;
+ *     marker1[i] >= jcol, if i was visited by earlier columns in this panel;
+ *
+ *   marker: A-row --> A-row/col (0/1)
+ *   repfnz: SuperA-col --> PA-row
+ *   parent: SuperA-col --> SuperA-col
+ *   xplore: SuperA-col --> index to L-structure
+ * 
+ */ +void +ilu_zpanel_dfs( + const int m, /* in - number of rows in the matrix */ + const int w, /* in */ + const int jcol, /* in */ + SuperMatrix *A, /* in - original matrix */ + int *perm_r, /* in */ + int *nseg, /* out */ + doublecomplex *dense, /* out */ + double *amax, /* out - max. abs. value of each column in panel */ + int *panel_lsub, /* out */ + int *segrep, /* out */ + int *repfnz, /* out */ + int *marker, /* out */ + int *parent, /* working array */ + int *xplore, /* working array */ + GlobalLU_t *Glu /* modified */ +) +{ + + NCPformat *Astore; + doublecomplex *a; + int *asub; + int *xa_begin, *xa_end; + int krep, chperm, chmark, chrep, oldrep, kchild, myfnz; + int k, krow, kmark, kperm; + int xdfs, maxdfs, kpar; + int jj; /* index through each column in the panel */ + int *marker1; /* marker1[jj] >= jcol if vertex jj was visited + by a previous column within this panel. */ + int *repfnz_col; /* start of each column in the panel */ + doublecomplex *dense_col; /* start of each column in the panel */ + int nextl_col; /* next available position in panel_lsub[*,jj] */ + int *xsup, *supno; + int *lsub, *xlsub; + double *amax_col; + register double tmp; + + /* Initialize pointers */ + Astore = A->Store; + a = Astore->nzval; + asub = Astore->rowind; + xa_begin = Astore->colbeg; + xa_end = Astore->colend; + marker1 = marker + m; + repfnz_col = repfnz; + dense_col = dense; + amax_col = amax; + *nseg = 0; + xsup = Glu->xsup; + supno = Glu->supno; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + + /* For each column in the panel */ + for (jj = jcol; jj < jcol + w; jj++) { + nextl_col = (jj - jcol) * m; + +#ifdef CHK_DFS + printf("\npanel col %d: ", jj); +#endif + + *amax_col = 0.0; + /* For each nonz in A[*,jj] do dfs */ + for (k = xa_begin[jj]; k < xa_end[jj]; k++) { + krow = asub[k]; + tmp = z_abs1(&a[k]); + if (tmp > *amax_col) *amax_col = tmp; + dense_col[krow] = a[k]; + kmark = marker[krow]; + if ( kmark == jj ) + continue; /* krow visited before, go to the next nonzero */ + + /* For each unmarked nbr krow of jj + * krow is in L: place it in structure of L[*,jj] + */ + marker[krow] = jj; + kperm = perm_r[krow]; + + if ( kperm == EMPTY ) { + panel_lsub[nextl_col++] = krow; /* krow is indexed into A */ + } + /* + * krow is in U: if its supernode-rep krep + * has been explored, update repfnz[*] + */ + else { + + krep = xsup[supno[kperm]+1] - 1; + myfnz = repfnz_col[krep]; + +#ifdef CHK_DFS + printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, kperm); +#endif + if ( myfnz != EMPTY ) { /* Representative visited before */ + if ( myfnz > kperm ) repfnz_col[krep] = kperm; + /* continue; */ + } + else { + /* Otherwise, perform dfs starting at krep */ + oldrep = EMPTY; + parent[krep] = oldrep; + repfnz_col[krep] = kperm; + xdfs = xlsub[xsup[supno[krep]]]; + maxdfs = xlsub[krep + 1]; + +#ifdef CHK_DFS + printf(" xdfs %d, maxdfs %d: ", xdfs, maxdfs); + for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]); + printf("\n"); +#endif + do { + /* + * For each unmarked kchild of krep + */ + while ( xdfs < maxdfs ) { + + kchild = lsub[xdfs]; + xdfs++; + chmark = marker[kchild]; + + if ( chmark != jj ) { /* Not reached yet */ + marker[kchild] = jj; + chperm = perm_r[kchild]; + + /* Case kchild is in L: place it in L[*,j] */ + if ( chperm == EMPTY ) { + panel_lsub[nextl_col++] = kchild; + } + /* Case kchild is in U: + * chrep = its supernode-rep. If its rep has + * been explored, update its repfnz[*] + */ + else { + + chrep = xsup[supno[chperm]+1] - 1; + myfnz = repfnz_col[chrep]; +#ifdef CHK_DFS + printf("chrep %d,myfnz %d,perm_r[%d] %d\n",chrep,myfnz,kchild,chperm); +#endif + if ( myfnz != EMPTY ) { /* Visited before */ + if ( myfnz > chperm ) + repfnz_col[chrep] = chperm; + } + else { + /* Cont. dfs at snode-rep of kchild */ + xplore[krep] = xdfs; + oldrep = krep; + krep = chrep; /* Go deeper down G(L) */ + parent[krep] = oldrep; + repfnz_col[krep] = chperm; + xdfs = xlsub[xsup[supno[krep]]]; + maxdfs = xlsub[krep + 1]; +#ifdef CHK_DFS + printf(" xdfs %d, maxdfs %d: ", xdfs, maxdfs); + for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]); + printf("\n"); +#endif + } /* else */ + + } /* else */ + + } /* if... */ + + } /* while xdfs < maxdfs */ + + /* krow has no more unexplored nbrs: + * Place snode-rep krep in postorder DFS, if this + * segment is seen for the first time. (Note that + * "repfnz[krep]" may change later.) + * Backtrack dfs to its parent. + */ + if ( marker1[krep] < jcol ) { + segrep[*nseg] = krep; + ++(*nseg); + marker1[krep] = jj; + } + + kpar = parent[krep]; /* Pop stack, mimic recursion */ + if ( kpar == EMPTY ) break; /* dfs done */ + krep = kpar; + xdfs = xplore[krep]; + maxdfs = xlsub[krep + 1]; + +#ifdef CHK_DFS + printf(" pop stack: krep %d,xdfs %d,maxdfs %d: ", krep,xdfs,maxdfs); + for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]); + printf("\n"); +#endif + } while ( kpar != EMPTY ); /* do-while - until empty stack */ + + } /* else */ + + } /* else */ + + } /* for each nonz in A[*,jj] */ + + repfnz_col += m; /* Move to next column */ + dense_col += m; + amax_col++; + + } /* for jj ... */ + +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zpivotL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zpivotL.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zpivotL.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,274 @@ + +/*! @file ilu_zpivotL.c + * \brief Performs numerical pivoting + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + + +#include +#include +#include "slu_zdefs.h" + +#ifndef SGN +#define SGN(x) ((x)>=0?1:-1) +#endif + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *   Performs the numerical pivoting on the current column of L,
+ *   and the CDIV operation.
+ *
+ *   Pivot policy:
+ *   (1) Compute thresh = u * max_(i>=j) abs(A_ij);
+ *   (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
+ *	     pivot row = k;
+ *	 ELSE IF abs(A_jj) >= thresh THEN
+ *	     pivot row = j;
+ *	 ELSE
+ *	     pivot row = m;
+ *
+ *   Note: If you absolutely want to use a given pivot order, then set u=0.0.
+ *
+ *   Return value: 0	  success;
+ *		   i > 0  U(i,i) is exactly zero.
+ * 
+ */ + +int +ilu_zpivotL( + const int jcol, /* in */ + const double u, /* in - diagonal pivoting threshold */ + int *usepr, /* re-use the pivot sequence given by + * perm_r/iperm_r */ + int *perm_r, /* may be modified */ + int diagind, /* diagonal of Pc*A*Pc' */ + int *swap, /* in/out record the row permutation */ + int *iswap, /* in/out inverse of swap, it is the same as + perm_r after the factorization */ + int *marker, /* in */ + int *pivrow, /* in/out, as an input if *usepr!=0 */ + double fill_tol, /* in - fill tolerance of current column + * used for a singular column */ + milu_t milu, /* in */ + doublecomplex drop_sum, /* in - computed in ilu_zcopy_to_ucol() + (MILU only) */ + GlobalLU_t *Glu, /* modified - global LU data structures */ + SuperLUStat_t *stat /* output */ + ) +{ + + int n; /* number of columns */ + int fsupc; /* first column in the supernode */ + int nsupc; /* no of columns in the supernode */ + int nsupr; /* no of rows in the supernode */ + int lptr; /* points to the starting subscript of the supernode */ + register int pivptr; + int old_pivptr, diag, ptr0; + register double pivmax, rtemp; + double thresh; + doublecomplex temp; + doublecomplex *lu_sup_ptr; + doublecomplex *lu_col_ptr; + int *lsub_ptr; + register int isub, icol, k, itemp; + int *lsub, *xlsub; + doublecomplex *lusup; + int *xlusup; + flops_t *ops = stat->ops; + int info; + doublecomplex one = {1.0, 0.0}; + + /* Initialize pointers */ + n = Glu->n; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + lusup = Glu->lusup; + xlusup = Glu->xlusup; + fsupc = (Glu->xsup)[(Glu->supno)[jcol]]; + nsupc = jcol - fsupc; /* excluding jcol; nsupc >= 0 */ + lptr = xlsub[fsupc]; + nsupr = xlsub[fsupc+1] - lptr; + lu_sup_ptr = &lusup[xlusup[fsupc]]; /* start of the current supernode */ + lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */ + lsub_ptr = &lsub[lptr]; /* start of row indices of the supernode */ + + /* Determine the largest abs numerical value for partial pivoting; + Also search for user-specified pivot, and diagonal element. */ + pivmax = -1.0; + pivptr = nsupc; + diag = EMPTY; + old_pivptr = nsupc; + ptr0 = EMPTY; + for (isub = nsupc; isub < nsupr; ++isub) { + if (marker[lsub_ptr[isub]] > jcol) + continue; /* do not overlap with a later relaxed supernode */ + + switch (milu) { + case SMILU_1: + z_add(&temp, &lu_col_ptr[isub], &drop_sum); + rtemp = z_abs1(&temp); + break; + case SMILU_2: + case SMILU_3: + /* In this case, drop_sum contains the sum of the abs. value */ + rtemp = z_abs1(&lu_col_ptr[isub]); + break; + case SILU: + default: + rtemp = z_abs1(&lu_col_ptr[isub]); + break; + } + if (rtemp > pivmax) { pivmax = rtemp; pivptr = isub; } + if (*usepr && lsub_ptr[isub] == *pivrow) old_pivptr = isub; + if (lsub_ptr[isub] == diagind) diag = isub; + if (ptr0 == EMPTY) ptr0 = isub; + } + + if (milu == SMILU_2 || milu == SMILU_3) pivmax += drop_sum.r; + + /* Test for singularity */ + if (pivmax < 0.0) { + fprintf(stderr, "[0]: jcol=%d, SINGULAR!!!\n", jcol); + fflush(stderr); + exit(1); + } + if ( pivmax == 0.0 ) { + if (diag != EMPTY) + *pivrow = lsub_ptr[pivptr = diag]; + else if (ptr0 != EMPTY) + *pivrow = lsub_ptr[pivptr = ptr0]; + else { + /* look for the first row which does not + belong to any later supernodes */ + for (icol = jcol; icol < n; icol++) + if (marker[swap[icol]] <= jcol) break; + if (icol >= n) { + fprintf(stderr, "[1]: jcol=%d, SINGULAR!!!\n", jcol); + fflush(stderr); + exit(1); + } + + *pivrow = swap[icol]; + + /* pick up the pivot row */ + for (isub = nsupc; isub < nsupr; ++isub) + if ( lsub_ptr[isub] == *pivrow ) { pivptr = isub; break; } + } + pivmax = fill_tol; + lu_col_ptr[pivptr].r = pivmax; + lu_col_ptr[pivptr].i = 0.0; + *usepr = 0; +#ifdef DEBUG + printf("[0] ZERO PIVOT: FILL (%d, %d).\n", *pivrow, jcol); + fflush(stdout); +#endif + info =jcol + 1; + } /* if (*pivrow == 0.0) */ + else { + thresh = u * pivmax; + + /* Choose appropriate pivotal element by our policy. */ + if ( *usepr ) { + switch (milu) { + case SMILU_1: + z_add(&temp, &lu_col_ptr[old_pivptr], &drop_sum); + rtemp = z_abs1(&temp); + break; + case SMILU_2: + case SMILU_3: + rtemp = z_abs1(&lu_col_ptr[old_pivptr]) + drop_sum.r; + break; + case SILU: + default: + rtemp = z_abs1(&lu_col_ptr[old_pivptr]); + break; + } + if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = old_pivptr; + else *usepr = 0; + } + if ( *usepr == 0 ) { + /* Use diagonal pivot? */ + if ( diag >= 0 ) { /* diagonal exists */ + switch (milu) { + case SMILU_1: + z_add(&temp, &lu_col_ptr[diag], &drop_sum); + rtemp = z_abs1(&temp); + break; + case SMILU_2: + case SMILU_3: + rtemp = z_abs1(&lu_col_ptr[diag]) + drop_sum.r; + break; + case SILU: + default: + rtemp = z_abs1(&lu_col_ptr[diag]); + break; + } + if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag; + } + *pivrow = lsub_ptr[pivptr]; + } + info = 0; + + /* Reset the diagonal */ + switch (milu) { + case SMILU_1: + z_add(&lu_col_ptr[pivptr], &lu_col_ptr[pivptr], &drop_sum); + break; + case SMILU_2: + case SMILU_3: + temp = z_sgn(&lu_col_ptr[pivptr]); + zz_mult(&temp, &temp, &drop_sum); + z_add(&lu_col_ptr[pivptr], &lu_col_ptr[pivptr], &drop_sum); + break; + case SILU: + default: + break; + } + + } /* else */ + + /* Record pivot row */ + perm_r[*pivrow] = jcol; + if (jcol < n - 1) { + register int t1, t2, t; + t1 = iswap[*pivrow]; t2 = jcol; + if (t1 != t2) { + t = swap[t1]; swap[t1] = swap[t2]; swap[t2] = t; + t1 = swap[t1]; t2 = t; + t = iswap[t1]; iswap[t1] = iswap[t2]; iswap[t2] = t; + } + } /* if (jcol < n - 1) */ + + /* Interchange row subscripts */ + if ( pivptr != nsupc ) { + itemp = lsub_ptr[pivptr]; + lsub_ptr[pivptr] = lsub_ptr[nsupc]; + lsub_ptr[nsupc] = itemp; + + /* Interchange numerical values as well, for the whole snode, such + * that L is indexed the same way as A. + */ + for (icol = 0; icol <= nsupc; icol++) { + itemp = pivptr + icol * nsupr; + temp = lu_sup_ptr[itemp]; + lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr]; + lu_sup_ptr[nsupc + icol*nsupr] = temp; + } + } /* if */ + + /* cdiv operation */ + ops[FACT] += 10 * (nsupr - nsupc); + z_div(&temp, &one, &lu_col_ptr[nsupc]); + for (k = nsupc+1; k < nsupr; k++) + zz_mult(&lu_col_ptr[k], &lu_col_ptr[k], &temp); + + return info; +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zsnode_dfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zsnode_dfs.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zsnode_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,90 @@ + +/*! @file ilu_zsnode_dfs.c + * \brief Determines the union of row structures of columns within the relaxed node + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + +#include "slu_zdefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *    ilu_zsnode_dfs() - Determine the union of the row structures of those
+ *    columns within the relaxed snode.
+ *    Note: The relaxed snodes are leaves of the supernodal etree, therefore,
+ *    the portion outside the rectangular supernode must be zero.
+ *
+ * Return value
+ * ============
+ *     0   success;
+ *    >0   number of bytes allocated when run out of memory.
+ * 
+ */ + +int +ilu_zsnode_dfs( + const int jcol, /* in - start of the supernode */ + const int kcol, /* in - end of the supernode */ + const int *asub, /* in */ + const int *xa_begin, /* in */ + const int *xa_end, /* in */ + int *marker, /* modified */ + GlobalLU_t *Glu /* modified */ + ) +{ + + register int i, k, nextl; + int nsuper, krow, kmark, mem_error; + int *xsup, *supno; + int *lsub, *xlsub; + int nzlmax; + + xsup = Glu->xsup; + supno = Glu->supno; + lsub = Glu->lsub; + xlsub = Glu->xlsub; + nzlmax = Glu->nzlmax; + + nsuper = ++supno[jcol]; /* Next available supernode number */ + nextl = xlsub[jcol]; + + for (i = jcol; i <= kcol; i++) + { + /* For each nonzero in A[*,i] */ + for (k = xa_begin[i]; k < xa_end[i]; k++) + { + krow = asub[k]; + kmark = marker[krow]; + if ( kmark != kcol ) + { /* First time visit krow */ + marker[krow] = kcol; + lsub[nextl++] = krow; + if ( nextl >= nzlmax ) + { + if ( (mem_error = zLUMemXpand(jcol, nextl, LSUB, &nzlmax, + Glu)) != 0) + return (mem_error); + lsub = Glu->lsub; + } + } + } + supno[i] = nsuper; + } + + /* Supernode > 1 */ + if ( jcol < kcol ) + for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl; + + xsup[nsuper+1] = kcol + 1; + supno[kcol+1] = nsuper; + xlsub[kcol+1] = nextl; + + return 0; +} Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/izmax1.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/izmax1.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/izmax1.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,14 +1,20 @@ -#include "dcomplex.h" +/*! @file izmax1.c + * \brief Finds the index of the element whose real part has maximum absolute value + * + *
+ *     -- LAPACK auxiliary routine (version 2.0) --   
+ *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
+ *     Courant Institute, Argonne National Lab, and Rice University   
+ *     October 31, 1992   
+ * 
+ */ +#include +#include "slu_dcomplex.h" +#include "slu_Cnames.h" -int -izmax1_(int *n, doublecomplex *cx, int *incx) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 +/*! \brief - +
     Purpose   
     =======   
 
@@ -33,8 +39,14 @@
             The spacing between successive values of CX.  INCX >= 1.   
 
    ===================================================================== 
+
*/ +int +izmax1_(int *n, doublecomplex *cx, int *incx) +{ + + /* System generated locals */ int ret_val, i__1, i__2; double d__1; @@ -60,17 +72,17 @@ /* CODE FOR INCREMENT NOT EQUAL TO 1 */ ix = 1; - smax = (d__1 = CX(1).r, abs(d__1)); + smax = (d__1 = CX(1).r, fabs(d__1)); ix += *incx; i__1 = *n; for (i = 2; i <= *n; ++i) { i__2 = ix; - if ((d__1 = CX(ix).r, abs(d__1)) <= smax) { + if ((d__1 = CX(ix).r, fabs(d__1)) <= smax) { goto L10; } ret_val = i; i__2 = ix; - smax = (d__1 = CX(ix).r, abs(d__1)); + smax = (d__1 = CX(ix).r, fabs(d__1)); L10: ix += *incx; /* L20: */ @@ -80,16 +92,16 @@ /* CODE FOR INCREMENT EQUAL TO 1 */ L30: - smax = (d__1 = CX(1).r, abs(d__1)); + smax = (d__1 = CX(1).r, fabs(d__1)); i__1 = *n; for (i = 2; i <= *n; ++i) { i__2 = i; - if ((d__1 = CX(i).r, abs(d__1)) <= smax) { + if ((d__1 = CX(i).r, fabs(d__1)) <= smax) { goto L40; } ret_val = i; i__2 = i; - smax = (d__1 = CX(i).r, abs(d__1)); + smax = (d__1 = CX(i).r, fabs(d__1)); L40: ; } Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/lsame.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/lsame.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/lsame.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,10 +1,18 @@ -int lsame_(char *ca, char *cb) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 +/*! @file lsame.c + * \brief Check if CA is the same letter as CB regardless of case. + * + *
+ * -- LAPACK auxiliary routine (version 2.0) --   
+ *      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
+ *      Courant Institute, Argonne National Lab, and Rice University   
+ *      September 30, 1994   
+ * 
+ */ +#include "slu_Cnames.h" +/*! \brief + +
     Purpose   
     =======   
 
@@ -18,8 +26,13 @@
             CA and CB specify the single characters to be compared.   
 
    ===================================================================== 
+
*/ +int lsame_(char *ca, char *cb) +{ + + /* System generated locals */ int ret_val; Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/mark_relax.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/mark_relax.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/mark_relax.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,47 @@ +/*! @file mark_relax.c + * \brief Record the rows pivoted by the relaxed supernodes. + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 1, 2009
+ * <\pre>
+ */
+#include "slu_ddefs.h"
+
+/*! \brief
+ *
+ * 
+ * Purpose
+ * =======
+ *    mark_relax() - record the rows used by the relaxed supernodes.
+ * 
+ */ +int mark_relax( + int n, /* order of the matrix A */ + int *relax_end, /* last column in a relaxed supernode. + * if j-th column starts a relaxed supernode, + * relax_end[j] represents the last column of + * this supernode. */ + int *relax_fsupc, /* first column in a relaxed supernode. + * relax_fsupc[j] represents the first column of + * j-th supernode. */ + int *xa_begin, /* Astore->colbeg */ + int *xa_end, /* Astore->colend */ + int *asub, /* row index of A */ + int *marker /* marker[j] is the maximum column index if j-th + * row belongs to a relaxed supernode. */ ) +{ + register int jcol, kcol; + register int i, j, k; + + for (i = 0; i < n && relax_fsupc[i] != EMPTY; i++) + { + jcol = relax_fsupc[i]; /* first column */ + kcol = relax_end[jcol]; /* last column */ + for (j = jcol; j <= kcol; j++) + for (k = xa_begin[j]; k < xa_end[j]; k++) + marker[asub[k]] = jcol; + } + return i; +} Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/memory.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/memory.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/memory.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,14 +1,17 @@ -/* +/*! @file memory.c + * \brief Precision-independent memory-related routines + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
- *
+ * 
*/ /** Precision-independent memory-related routines. (Shared by [sdcz]memory.c) **/ -#include "dsp_defs.h" +#include "slu_ddefs.h" #if ( DEBUGlevel>=1 ) /* Debug malloc/free. */ @@ -16,6 +19,7 @@ #define PAD_FACTOR 2 #define DWORD (sizeof(double)) /* Be sure it's no smaller than double. */ +/* size_t is usually defined as 'unsigned long' */ void *superlu_malloc(size_t size) { @@ -23,7 +27,7 @@ buf = (char *) malloc(size + DWORD); if ( !buf ) { - printf("superlu_malloc fails: malloc_total %.0f MB, size %d\n", + printf("superlu_malloc fails: malloc_total %.0f MB, size %ld\n", superlu_malloc_total*1e-6, size); ABORT("superlu_malloc: out of memory"); } @@ -85,8 +89,7 @@ #endif -/* - * Set up pointers for integer working arrays. +/*! \brief Set up pointers for integer working arrays. */ void SetIWork(int m, int n, int panel_size, int *iworkptr, int **segrep, Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/relax_snode.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/relax_snode.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/relax_snode.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,25 +1,35 @@ -/* +/*! @file relax_snode.c + * \brief Identify initial relaxed supernodes + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "dsp_defs.h" - +#include "slu_ddefs.h" +/*! \brief + * + *
+ * Purpose
+ * =======
+ *    relax_snode() - Identify the initial relaxed supernodes, assuming that 
+ *    the matrix has been reordered according to the postorder of the etree.
+ * 
+ */ void relax_snode ( const int n, @@ -31,13 +41,7 @@ int *relax_end /* last column in a supernode */ ) { -/* - * Purpose - * ======= - * relax_snode() - Identify the initial relaxed supernodes, assuming that - * the matrix has been reordered according to the postorder of the etree. - * - */ + register int j, parent; register int snode_start; /* beginning of a snode */ Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scolumn_bmod.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scolumn_bmod.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scolumn_bmod.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,27 +1,29 @@ -/* +/*! @file scolumn_bmod.c + * \brief performs numeric block updates + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
- */
-/*
-  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ *  Permission is hereby granted to use or copy this program for any
+ *  purpose, provided the above notices are retained on all copies.
+ *  Permission to modify the code and to distribute modified code is
+ *  granted, provided the above notices are retained, and a notice that
+ *  the code was modified is included with the above copyright notice.
+ * 
*/ #include #include -#include "ssp_defs.h" +#include "slu_sdefs.h" /* * Function prototypes @@ -32,8 +34,17 @@ -/* Return value: 0 - successful return +/*! \brief + * + *
+ * Purpose:
+ * ========
+ * Performs numeric block updates (sup-col) in topological order.
+ * It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
+ * Special processing on the supernodal portion of L\U[*,j]
+ * Return value:   0 - successful return
  *               > 0 - number of bytes allocated when run out of space
+ * 
*/ int scolumn_bmod ( @@ -48,14 +59,7 @@ SuperLUStat_t *stat /* output */ ) { -/* - * Purpose: - * ======== - * Performs numeric block updates (sup-col) in topological order. - * It features: col-col, 2cols-col, 3cols-col, and sup-col updates. - * Special processing on the supernodal portion of L\U[*,j] - * - */ + #ifdef _CRAY _fcd ftcs1 = _cptofcd("L", strlen("L")), ftcs2 = _cptofcd("N", strlen("N")), Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scolumn_dfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scolumn_dfs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scolumn_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,50 +1,38 @@ - -/* +/*! @file scolumn_dfs.c + * \brief Performs a symbolic factorization + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
- */
-/*
-  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -#include "ssp_defs.h" +#include "slu_sdefs.h" -/* What type of supernodes we want */ +/*! \brief What type of supernodes we want */ #define T2_SUPER -int -scolumn_dfs( - const int m, /* in - number of rows in the matrix */ - const int jcol, /* in */ - int *perm_r, /* in */ - int *nseg, /* modified - with new segments appended */ - int *lsub_col, /* in - defines the RHS vector to start the dfs */ - int *segrep, /* modified - with new segments appended */ - int *repfnz, /* modified */ - int *xprune, /* modified */ - int *marker, /* modified */ - int *parent, /* working array */ - int *xplore, /* working array */ - GlobalLU_t *Glu /* modified */ - ) -{ -/* + +/*! \brief + * + *
  * Purpose
  * =======
- *   "column_dfs" performs a symbolic factorization on column jcol, and
+ *   SCOLUMN_DFS performs a symbolic factorization on column jcol, and
  *   decide the supernode boundary.
  *
  *   This routine does not use numeric values, but only use the RHS 
@@ -72,8 +60,25 @@
  * ============
  *     0  success;
  *   > 0  number of bytes allocated when run out of space.
- *
+ * 
*/ +int +scolumn_dfs( + const int m, /* in - number of rows in the matrix */ + const int jcol, /* in */ + int *perm_r, /* in */ + int *nseg, /* modified - with new segments appended */ + int *lsub_col, /* in - defines the RHS vector to start the dfs */ + int *segrep, /* modified - with new segments appended */ + int *repfnz, /* modified */ + int *xprune, /* modified */ + int *marker, /* modified */ + int *parent, /* working array */ + int *xplore, /* working array */ + GlobalLU_t *Glu /* modified */ + ) +{ + int jcolp1, jcolm1, jsuper, nsuper, nextl; int k, krep, krow, kmark, kperm; int *marker2; /* Used for small panel LU */ Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scomplex.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scomplex.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scomplex.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,20 +1,24 @@ -/* +/*! @file scomplex.c + * \brief Common arithmetic for complex type + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
- */
-/*
  * This file defines common arithmetic operations for complex type.
+ * 
*/ + #include +#include #include -#include "scomplex.h" +#include "slu_scomplex.h" -/* Complex Division c = a/b */ +/*! \brief Complex Division c = a/b */ void c_div(complex *c, complex *a, complex *b) { float ratio, den; @@ -26,8 +30,8 @@ abi = - abi; if( abr <= abi ) { if (abi == 0) { - fprintf(stderr, "z_div.c: division by zero"); - exit (-1); + fprintf(stderr, "z_div.c: division by zero\n"); + exit(-1); } ratio = b->r / b->i ; den = b->i * (1 + ratio*ratio); @@ -44,8 +48,8 @@ } -/* Returns sqrt(z.r^2 + z.i^2) */ -double slu_c_abs(complex *z) +/*! \brief Returns sqrt(z.r^2 + z.i^2) */ +double c_abs(complex *z) { float temp; float real = z->r; @@ -66,9 +70,8 @@ } -/* Approximates the abs */ -/* Returns abs(z.r) + abs(z.i) */ -double slu_c_abs1(complex *z) +/*! \brief Approximates the abs. Returns abs(z.r) + abs(z.i) */ +double c_abs1(complex *z) { float real = z->r; float imag = z->i; @@ -79,7 +82,7 @@ return (real + imag); } -/* Return the exponentiation */ +/*! \brief Return the exponentiation */ void c_exp(complex *r, complex *z) { float expx; @@ -89,17 +92,56 @@ r->i = expx * sin(z->i); } -/* Return the complex conjugate */ +/*! \brief Return the complex conjugate */ void r_cnjg(complex *r, complex *z) { r->r = z->r; r->i = -z->i; } -/* Return the imaginary part */ +/*! \brief Return the imaginary part */ double r_imag(complex *z) { return (z->i); } +/*! \brief SIGN functions for complex number. Returns z/abs(z) */ +complex c_sgn(complex *z) +{ + register float t = c_abs(z); + register complex retval; + + if (t == 0.0) { + retval.r = 1.0, retval.i = 0.0; + } else { + retval.r = z->r / t, retval.i = z->i / t; + } + + return retval; +} + +/*! \brief Square-root of a complex number. */ +complex c_sqrt(complex *z) +{ + complex retval; + register float cr, ci, real, imag; + + real = z->r; + imag = z->i; + + if ( imag == 0.0 ) { + retval.r = sqrt(real); + retval.i = 0.0; + } else { + ci = (sqrt(real*real + imag*imag) - real) / 2.0; + ci = sqrt(ci); + cr = imag / (2.0 * ci); + retval.r = cr; + retval.i = ci; + } + + return retval; +} + + Deleted: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scomplex.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scomplex.h 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scomplex.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,73 +0,0 @@ - - -/* - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * - */ -#ifndef __SUPERLU_SCOMPLEX /* allow multiple inclusions */ -#define __SUPERLU_SCOMPLEX - -/* - * This header file is to be included in source files c*.c - */ -#ifndef SCOMPLEX_INCLUDE -#define SCOMPLEX_INCLUDE - -typedef struct { float r, i; } complex; - - -/* Macro definitions */ - -/* Complex Addition c = a + b */ -#define c_add(c, a, b) { (c)->r = (a)->r + (b)->r; \ - (c)->i = (a)->i + (b)->i; } - -/* Complex Subtraction c = a - b */ -#define c_sub(c, a, b) { (c)->r = (a)->r - (b)->r; \ - (c)->i = (a)->i - (b)->i; } - -/* Complex-Double Multiplication */ -#define cs_mult(c, a, b) { (c)->r = (a)->r * (b); \ - (c)->i = (a)->i * (b); } - -/* Complex-Complex Multiplication */ -#define cc_mult(c, a, b) { \ - float cr, ci; \ - cr = (a)->r * (b)->r - (a)->i * (b)->i; \ - ci = (a)->i * (b)->r + (a)->r * (b)->i; \ - (c)->r = cr; \ - (c)->i = ci; \ - } - -#define cc_conj(a, b) { \ - (a)->r = (b)->r; \ - (a)->i = -((b)->i); \ - } - -/* Complex equality testing */ -#define c_eq(a, b) ( (a)->r == (b)->r && (a)->i == (b)->i ) - - -#ifdef __cplusplus -extern "C" { -#endif - -/* Prototypes for functions in scomplex.c */ -void c_div(complex *, complex *, complex *); -double slu_c_abs(complex *); /* exact */ -double slu_c_abs1(complex *); /* approximate */ -void c_exp(complex *, complex *); -void r_cnjg(complex *, complex *); -double r_imag(complex *); - - -#ifdef __cplusplus - } -#endif - -#endif - -#endif /* __SUPERLU_SCOMPLEX */ Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scopy_to_ucol.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scopy_to_ucol.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scopy_to_ucol.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,27 +1,26 @@ - -/* +/*! @file scopy_to_ucol.c + * \brief Copy a computed column of U to the compressed data structure + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
  *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "ssp_defs.h" -#include "util.h" +#include "slu_sdefs.h" int scopy_to_ucol( @@ -47,7 +46,6 @@ float *ucol; int *usub, *xusub; int nzumax; - float zero = 0.0; xsup = Glu->xsup; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scsum1.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scsum1.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scsum1.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,13 +1,19 @@ -#include "scomplex.h" +/*! @file scsum1.c + * \brief Takes sum of the absolute values of a complex vector and returns a single precision result + * + *
+ *     -- LAPACK auxiliary routine (version 2.0) --   
+ *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
+ *     Courant Institute, Argonne National Lab, and Rice University   
+ *     October 31, 1992   
+ * 
+ */ +#include "slu_scomplex.h" +#include "slu_Cnames.h" -double scsum1_(int *n, complex *cx, int *incx) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 +/*! \brief - +
     Purpose   
     =======   
 
@@ -32,17 +38,15 @@
             The spacing between successive values of CX.  INCX > 0.   
 
     ===================================================================== 
-  
-
-
-    
-   Parameter adjustments   
-       Function Body */
+
+*/ +double scsum1_(int *n, complex *cx, int *incx) +{ /* System generated locals */ int i__1, i__2; float ret_val; /* Builtin functions */ - double slu_c_abs(complex *); + double c_abs(complex *); /* Local variables */ static int i, nincx; static float stemp; @@ -69,7 +73,7 @@ /* NEXT LINE MODIFIED. */ - stemp += slu_c_abs(&CX(i)); + stemp += c_abs(&CX(i)); /* L10: */ } ret_val = stemp; @@ -83,7 +87,7 @@ /* NEXT LINE MODIFIED. */ - stemp += slu_c_abs(&CX(i)); + stemp += c_abs(&CX(i)); /* L30: */ } ret_val = stemp; Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sdiagonal.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sdiagonal.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sdiagonal.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,129 @@ + +/*! @file sdiagonal.c + * \brief Auxiliary routines to work with diagonal elements + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + +#include "slu_sdefs.h" + +int sfill_diag(int n, NCformat *Astore) +/* fill explicit zeros on the diagonal entries, so that the matrix is not + structurally singular. */ +{ + float *nzval = (float *)Astore->nzval; + int *rowind = Astore->rowind; + int *colptr = Astore->colptr; + int nnz = colptr[n]; + int fill = 0; + float *nzval_new; + float zero = 0.0; + int *rowind_new; + int i, j, diag; + + for (i = 0; i < n; i++) + { + diag = -1; + for (j = colptr[i]; j < colptr[i + 1]; j++) + if (rowind[j] == i) diag = j; + if (diag < 0) fill++; + } + if (fill) + { + nzval_new = floatMalloc(nnz + fill); + rowind_new = intMalloc(nnz + fill); + fill = 0; + for (i = 0; i < n; i++) + { + diag = -1; + for (j = colptr[i] - fill; j < colptr[i + 1]; j++) + { + if ((rowind_new[j + fill] = rowind[j]) == i) diag = j; + nzval_new[j + fill] = nzval[j]; + } + if (diag < 0) + { + rowind_new[colptr[i + 1] + fill] = i; + nzval_new[colptr[i + 1] + fill] = zero; + fill++; + } + colptr[i + 1] += fill; + } + Astore->nzval = nzval_new; + Astore->rowind = rowind_new; + SUPERLU_FREE(nzval); + SUPERLU_FREE(rowind); + } + Astore->nnz += fill; + return fill; +} + +int sdominate(int n, NCformat *Astore) +/* make the matrix diagonally dominant */ +{ + float *nzval = (float *)Astore->nzval; + int *rowind = Astore->rowind; + int *colptr = Astore->colptr; + int nnz = colptr[n]; + int fill = 0; + float *nzval_new; + int *rowind_new; + int i, j, diag; + double s; + + for (i = 0; i < n; i++) + { + diag = -1; + for (j = colptr[i]; j < colptr[i + 1]; j++) + if (rowind[j] == i) diag = j; + if (diag < 0) fill++; + } + if (fill) + { + nzval_new = floatMalloc(nnz + fill); + rowind_new = intMalloc(nnz+ fill); + fill = 0; + for (i = 0; i < n; i++) + { + s = 1e-6; + diag = -1; + for (j = colptr[i] - fill; j < colptr[i + 1]; j++) + { + if ((rowind_new[j + fill] = rowind[j]) == i) diag = j; + s += fabs(nzval_new[j + fill] = nzval[j]); + } + if (diag >= 0) { + nzval_new[diag+fill] = s * 3.0; + } else { + rowind_new[colptr[i + 1] + fill] = i; + nzval_new[colptr[i + 1] + fill] = s * 3.0; + fill++; + } + colptr[i + 1] += fill; + } + Astore->nzval = nzval_new; + Astore->rowind = rowind_new; + SUPERLU_FREE(nzval); + SUPERLU_FREE(rowind); + } + else + { + for (i = 0; i < n; i++) + { + s = 1e-6; + diag = -1; + for (j = colptr[i]; j < colptr[i + 1]; j++) + { + if (rowind[j] == i) diag = j; + s += fabs(nzval[j]); + } + nzval[diag] = s * 3.0; + } + } + Astore->nnz += fill; + return fill; +} Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgscon.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgscon.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgscon.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,70 +1,81 @@ -/* +/*! @file sgscon.c + * \brief Estimates reciprocal of the condition number of a general matrix + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Modified from lapack routines SGECON.
+ * 
*/ + /* * File name: sgscon.c * History: Modified from lapack routines SGECON. */ #include -#include "ssp_defs.h" +#include "slu_sdefs.h" +/*! \brief + * + *
+ *   Purpose   
+ *   =======   
+ *
+ *   SGSCON estimates the reciprocal of the condition number of a general 
+ *   real matrix A, in either the 1-norm or the infinity-norm, using   
+ *   the LU factorization computed by SGETRF.   *
+ *
+ *   An estimate is obtained for norm(inv(A)), and the reciprocal of the   
+ *   condition number is computed as   
+ *      RCOND = 1 / ( norm(A) * norm(inv(A)) ).   
+ *
+ *   See supermatrix.h for the definition of 'SuperMatrix' structure.
+ * 
+ *   Arguments   
+ *   =========   
+ *
+ *    NORM    (input) char*
+ *            Specifies whether the 1-norm condition number or the   
+ *            infinity-norm condition number is required:   
+ *            = '1' or 'O':  1-norm;   
+ *            = 'I':         Infinity-norm.
+ *	    
+ *    L       (input) SuperMatrix*
+ *            The factor L from the factorization Pr*A*Pc=L*U as computed by
+ *            sgstrf(). Use compressed row subscripts storage for supernodes,
+ *            i.e., L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
+ * 
+ *    U       (input) SuperMatrix*
+ *            The factor U from the factorization Pr*A*Pc=L*U as computed by
+ *            sgstrf(). Use column-wise storage scheme, i.e., U has types:
+ *            Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
+ *	    
+ *    ANORM   (input) float
+ *            If NORM = '1' or 'O', the 1-norm of the original matrix A.   
+ *            If NORM = 'I', the infinity-norm of the original matrix A.
+ *	    
+ *    RCOND   (output) float*
+ *           The reciprocal of the condition number of the matrix A,   
+ *           computed as RCOND = 1/(norm(A) * norm(inv(A))).
+ *	    
+ *    INFO    (output) int*
+ *           = 0:  successful exit   
+ *           < 0:  if INFO = -i, the i-th argument had an illegal value   
+ *
+ *    ===================================================================== 
+ * 
+ */ + void sgscon(char *norm, SuperMatrix *L, SuperMatrix *U, float anorm, float *rcond, SuperLUStat_t *stat, int *info) { -/* - Purpose - ======= - SGSCON estimates the reciprocal of the condition number of a general - real matrix A, in either the 1-norm or the infinity-norm, using - the LU factorization computed by SGETRF. - An estimate is obtained for norm(inv(A)), and the reciprocal of the - condition number is computed as - RCOND = 1 / ( norm(A) * norm(inv(A)) ). - - See supermatrix.h for the definition of 'SuperMatrix' structure. - - Arguments - ========= - - NORM (input) char* - Specifies whether the 1-norm condition number or the - infinity-norm condition number is required: - = '1' or 'O': 1-norm; - = 'I': Infinity-norm. - - L (input) SuperMatrix* - The factor L from the factorization Pr*A*Pc=L*U as computed by - sgstrf(). Use compressed row subscripts storage for supernodes, - i.e., L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU. - - U (input) SuperMatrix* - The factor U from the factorization Pr*A*Pc=L*U as computed by - sgstrf(). Use column-wise storage scheme, i.e., U has types: - Stype = SLU_NC, Dtype = SLU_S, Mtype = TRU. - - ANORM (input) float - If NORM = '1' or 'O', the 1-norm of the original matrix A. - If NORM = 'I', the infinity-norm of the original matrix A. - - RCOND (output) float* - The reciprocal of the condition number of the matrix A, - computed as RCOND = 1/(norm(A) * norm(inv(A))). - - INFO (output) int* - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== -*/ - /* Local variables */ int kase, kase1, onenrm, i; float ainvnm; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsequ.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsequ.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsequ.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,82 +1,91 @@ - -/* +/*! @file sgsequ.c + * \brief Computes row and column scalings + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Modified from LAPACK routine SGEEQU
+ * 
*/ /* * File name: sgsequ.c * History: Modified from LAPACK routine SGEEQU */ #include -#include "ssp_defs.h" -#include "util.h" +#include "slu_sdefs.h" + + +/*! \brief + * + *
+ * Purpose   
+ *   =======   
+ *
+ *   SGSEQU computes row and column scalings intended to equilibrate an   
+ *   M-by-N sparse matrix A and reduce its condition number. R returns the row
+ *   scale factors and C the column scale factors, chosen to try to make   
+ *   the largest element in each row and column of the matrix B with   
+ *   elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.   
+ *
+ *   R(i) and C(j) are restricted to be between SMLNUM = smallest safe   
+ *   number and BIGNUM = largest safe number.  Use of these scaling   
+ *   factors is not guaranteed to reduce the condition number of A but   
+ *   works well in practice.   
+ *
+ *   See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ *   Arguments   
+ *   =========   
+ *
+ *   A       (input) SuperMatrix*
+ *           The matrix of dimension (A->nrow, A->ncol) whose equilibration
+ *           factors are to be computed. The type of A can be:
+ *           Stype = SLU_NC; Dtype = SLU_S; Mtype = SLU_GE.
+ *	    
+ *   R       (output) float*, size A->nrow
+ *           If INFO = 0 or INFO > M, R contains the row scale factors   
+ *           for A.
+ *	    
+ *   C       (output) float*, size A->ncol
+ *           If INFO = 0,  C contains the column scale factors for A.
+ *	    
+ *   ROWCND  (output) float*
+ *           If INFO = 0 or INFO > M, ROWCND contains the ratio of the   
+ *           smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and   
+ *           AMAX is neither too large nor too small, it is not worth   
+ *           scaling by R.
+ *	    
+ *   COLCND  (output) float*
+ *           If INFO = 0, COLCND contains the ratio of the smallest   
+ *           C(i) to the largest C(i).  If COLCND >= 0.1, it is not   
+ *           worth scaling by C.
+ *	    
+ *   AMAX    (output) float*
+ *           Absolute value of largest matrix element.  If AMAX is very   
+ *           close to overflow or very close to underflow, the matrix   
+ *           should be scaled.
+ *	    
+ *   INFO    (output) int*
+ *           = 0:  successful exit   
+ *           < 0:  if INFO = -i, the i-th argument had an illegal value   
+ *           > 0:  if INFO = i,  and i is   
+ *                 <= A->nrow:  the i-th row of A is exactly zero   
+ *                 >  A->ncol:  the (i-M)-th column of A is exactly zero   
+ *
+ *   ===================================================================== 
+ * 
+ */ void sgsequ(SuperMatrix *A, float *r, float *c, float *rowcnd, float *colcnd, float *amax, int *info) { -/* - Purpose - ======= - SGSEQU computes row and column scalings intended to equilibrate an - M-by-N sparse matrix A and reduce its condition number. R returns the row - scale factors and C the column scale factors, chosen to try to make - the largest element in each row and column of the matrix B with - elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. - R(i) and C(j) are restricted to be between SMLNUM = smallest safe - number and BIGNUM = largest safe number. Use of these scaling - factors is not guaranteed to reduce the condition number of A but - works well in practice. - - See supermatrix.h for the definition of 'SuperMatrix' structure. - - Arguments - ========= - - A (input) SuperMatrix* - The matrix of dimension (A->nrow, A->ncol) whose equilibration - factors are to be computed. The type of A can be: - Stype = SLU_NC; Dtype = SLU_S; Mtype = SLU_GE. - - R (output) float*, size A->nrow - If INFO = 0 or INFO > M, R contains the row scale factors - for A. - - C (output) float*, size A->ncol - If INFO = 0, C contains the column scale factors for A. - - ROWCND (output) float* - If INFO = 0 or INFO > M, ROWCND contains the ratio of the - smallest R(i) to the largest R(i). If ROWCND >= 0.1 and - AMAX is neither too large nor too small, it is not worth - scaling by R. - - COLCND (output) float* - If INFO = 0, COLCND contains the ratio of the smallest - C(i) to the largest C(i). If COLCND >= 0.1, it is not - worth scaling by C. - - AMAX (output) float* - Absolute value of largest matrix element. If AMAX is very - close to overflow or very close to underflow, the matrix - should be scaled. - - INFO (output) int* - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, and i is - <= A->nrow: the i-th row of A is exactly zero - > A->ncol: the (i-M)-th column of A is exactly zero - - ===================================================================== -*/ - /* Local variables */ NCformat *Astore; float *Aval; Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsisx.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsisx.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsisx.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,693 @@ + +/*! @file sgsisx.c + * \brief Gives the approximate solutions of linear equations A*X=B or A'*X=B + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
+ */ +#include "slu_sdefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ * SGSISX gives the approximate solutions of linear equations A*X=B or A'*X=B,
+ * using the ILU factorization from sgsitrf(). An estimation of
+ * the condition number is provided. It performs the following steps:
+ *
+ *   1. If A is stored column-wise (A->Stype = SLU_NC):
+ *  
+ *	1.1. If options->Equil = YES or options->RowPerm = LargeDiag, scaling
+ *	     factors are computed to equilibrate the system:
+ *	     options->Trans = NOTRANS:
+ *		 diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
+ *	     options->Trans = TRANS:
+ *		 (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
+ *	     options->Trans = CONJ:
+ *		 (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
+ *	     Whether or not the system will be equilibrated depends on the
+ *	     scaling of the matrix A, but if equilibration is used, A is
+ *	     overwritten by diag(R)*A*diag(C) and B by diag(R)*B
+ *	     (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans
+ *	     = TRANS or CONJ).
+ *
+ *	1.2. Permute columns of A, forming A*Pc, where Pc is a permutation
+ *	     matrix that usually preserves sparsity.
+ *	     For more details of this step, see sp_preorder.c.
+ *
+ *	1.3. If options->Fact != FACTORED, the LU decomposition is used to
+ *	     factor the matrix A (after equilibration if options->Equil = YES)
+ *	     as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
+ *
+ *	1.4. Compute the reciprocal pivot growth factor.
+ *
+ *	1.5. If some U(i,i) = 0, so that U is exactly singular, then the
+ *	     routine fills a small number on the diagonal entry, that is
+ *		U(i,i) = ||A(:,i)||_oo * options->ILU_FillTol ** (1 - i / n),
+ *	     and info will be increased by 1. The factored form of A is used
+ *	     to estimate the condition number of the preconditioner. If the
+ *	     reciprocal of the condition number is less than machine precision,
+ *	     info = A->ncol+1 is returned as a warning, but the routine still
+ *	     goes on to solve for X.
+ *
+ *	1.6. The system of equations is solved for X using the factored form
+ *	     of A.
+ *
+ *	1.7. options->IterRefine is not used
+ *
+ *	1.8. If equilibration was used, the matrix X is premultiplied by
+ *	     diag(C) (if options->Trans = NOTRANS) or diag(R)
+ *	     (if options->Trans = TRANS or CONJ) so that it solves the
+ *	     original system before equilibration.
+ *
+ *	1.9. options for ILU only
+ *	     1) If options->RowPerm = LargeDiag, MC64 is used to scale and
+ *		permute the matrix to an I-matrix, that is Pr*Dr*A*Dc has
+ *		entries of modulus 1 on the diagonal and off-diagonal entries
+ *		of modulus at most 1. If MC64 fails, dgsequ() is used to
+ *		equilibrate the system.
+ *	     2) options->ILU_DropTol = tau is the threshold for dropping.
+ *		For L, it is used directly (for the whole row in a supernode);
+ *		For U, ||A(:,i)||_oo * tau is used as the threshold
+ *	        for the	i-th column.
+ *		If a secondary dropping rule is required, tau will
+ *	        also be used to compute the second threshold.
+ *	     3) options->ILU_FillFactor = gamma, used as the initial guess
+ *		of memory growth.
+ *		If a secondary dropping rule is required, it will also
+ *              be used as an upper bound of the memory.
+ *	     4) options->ILU_DropRule specifies the dropping rule.
+ *		Option		Explanation
+ *		======		===========
+ *		DROP_BASIC:	Basic dropping rule, supernodal based ILU.
+ *		DROP_PROWS:	Supernodal based ILUTP, p = gamma * nnz(A) / n.
+ *		DROP_COLUMN:	Variation of ILUTP, for j-th column,
+ *				p = gamma * nnz(A(:,j)).
+ *		DROP_AREA;	Variation of ILUTP, for j-th column, use
+ *				nnz(F(:,1:j)) / nnz(A(:,1:j)) to control the
+ *				memory.
+ *		DROP_DYNAMIC:	Modify the threshold tau during the
+ *				factorizaion.
+ *				If nnz(L(:,1:j)) / nnz(A(:,1:j)) < gamma
+ *				    tau_L(j) := MIN(1, tau_L(j-1) * 2);
+ *				Otherwise
+ *				    tau_L(j) := MIN(1, tau_L(j-1) * 2);
+ *				tau_U(j) uses the similar rule.
+ *				NOTE: the thresholds used by L and U are
+ *				indenpendent.
+ *		DROP_INTERP:	Compute the second dropping threshold by
+ *				interpolation instead of sorting (default).
+ *				In this case, the actual fill ratio is not
+ *				guaranteed smaller than gamma.
+ *		DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive.
+ *		( The default option is DROP_BASIC | DROP_AREA. )
+ *	     5) options->ILU_Norm is the criterion of computing the average
+ *		value of a row in L.
+ *		options->ILU_Norm	average(x[1:n])
+ *		=================	===============
+ *		ONE_NORM		||x||_1 / n
+ *		TWO_NORM		||x||_2 / sqrt(n)
+ *		INF_NORM		max{|x[i]|}
+ *	     6) options->ILU_MILU specifies the type of MILU's variation.
+ *		= SILU (default): do not perform MILU;
+ *		= SMILU_1 (not recommended):
+ *		    U(i,i) := U(i,i) + sum(dropped entries);
+ *		= SMILU_2:
+ *		    U(i,i) := U(i,i) + SGN(U(i,i)) * sum(dropped entries);
+ *		= SMILU_3:
+ *		    U(i,i) := U(i,i) + SGN(U(i,i)) * sum(|dropped entries|);
+ *		NOTE: Even SMILU_1 does not preserve the column sum because of
+ *		late dropping.
+ *	     7) options->ILU_FillTol is used as the perturbation when
+ *		encountering zero pivots. If some U(i,i) = 0, so that U is
+ *		exactly singular, then
+ *		   U(i,i) := ||A(:,i)|| * options->ILU_FillTol ** (1 - i / n).
+ *
+ *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
+ *	to the transpose of A:
+ *
+ *	2.1. If options->Equil = YES or options->RowPerm = LargeDiag, scaling
+ *	     factors are computed to equilibrate the system:
+ *	     options->Trans = NOTRANS:
+ *		 diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
+ *	     options->Trans = TRANS:
+ *		 (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
+ *	     options->Trans = CONJ:
+ *		 (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
+ *	     Whether or not the system will be equilibrated depends on the
+ *	     scaling of the matrix A, but if equilibration is used, A' is
+ *	     overwritten by diag(R)*A'*diag(C) and B by diag(R)*B
+ *	     (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
+ *
+ *	2.2. Permute columns of transpose(A) (rows of A),
+ *	     forming transpose(A)*Pc, where Pc is a permutation matrix that
+ *	     usually preserves sparsity.
+ *	     For more details of this step, see sp_preorder.c.
+ *
+ *	2.3. If options->Fact != FACTORED, the LU decomposition is used to
+ *	     factor the transpose(A) (after equilibration if
+ *	     options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the
+ *	     permutation Pr determined by partial pivoting.
+ *
+ *	2.4. Compute the reciprocal pivot growth factor.
+ *
+ *	2.5. If some U(i,i) = 0, so that U is exactly singular, then the
+ *	     routine fills a small number on the diagonal entry, that is
+ *		 U(i,i) = ||A(:,i)||_oo * options->ILU_FillTol ** (1 - i / n).
+ *	     And info will be increased by 1. The factored form of A is used
+ *	     to estimate the condition number of the preconditioner. If the
+ *	     reciprocal of the condition number is less than machine precision,
+ *	     info = A->ncol+1 is returned as a warning, but the routine still
+ *	     goes on to solve for X.
+ *
+ *	2.6. The system of equations is solved for X using the factored form
+ *	     of transpose(A).
+ *
+ *	2.7. If options->IterRefine is not used.
+ *
+ *	2.8. If equilibration was used, the matrix X is premultiplied by
+ *	     diag(C) (if options->Trans = NOTRANS) or diag(R)
+ *	     (if options->Trans = TRANS or CONJ) so that it solves the
+ *	     original system before equilibration.
+ *
+ *   See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ * Arguments
+ * =========
+ *
+ * options (input) superlu_options_t*
+ *	   The structure defines the input parameters to control
+ *	   how the LU decomposition will be performed and how the
+ *	   system will be solved.
+ *
+ * A	   (input/output) SuperMatrix*
+ *	   Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
+ *	   of the linear equations is A->nrow. Currently, the type of A can be:
+ *	   Stype = SLU_NC or SLU_NR, Dtype = SLU_S, Mtype = SLU_GE.
+ *	   In the future, more general A may be handled.
+ *
+ *	   On entry, If options->Fact = FACTORED and equed is not 'N',
+ *	   then A must have been equilibrated by the scaling factors in
+ *	   R and/or C.
+ *	   On exit, A is not modified if options->Equil = NO, or if
+ *	   options->Equil = YES but equed = 'N' on exit.
+ *	   Otherwise, if options->Equil = YES and equed is not 'N',
+ *	   A is scaled as follows:
+ *	   If A->Stype = SLU_NC:
+ *	     equed = 'R':  A := diag(R) * A
+ *	     equed = 'C':  A := A * diag(C)
+ *	     equed = 'B':  A := diag(R) * A * diag(C).
+ *	   If A->Stype = SLU_NR:
+ *	     equed = 'R':  transpose(A) := diag(R) * transpose(A)
+ *	     equed = 'C':  transpose(A) := transpose(A) * diag(C)
+ *	     equed = 'B':  transpose(A) := diag(R) * transpose(A) * diag(C).
+ *
+ * perm_c  (input/output) int*
+ *	   If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
+ *	   which defines the permutation matrix Pc; perm_c[i] = j means
+ *	   column i of A is in position j in A*Pc.
+ *	   On exit, perm_c may be overwritten by the product of the input
+ *	   perm_c and a permutation that postorders the elimination tree
+ *	   of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
+ *	   is already in postorder.
+ *
+ *	   If A->Stype = SLU_NR, column permutation vector of size A->nrow,
+ *	   which describes permutation of columns of transpose(A) 
+ *	   (rows of A) as described above.
+ *
+ * perm_r  (input/output) int*
+ *	   If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
+ *	   which defines the permutation matrix Pr, and is determined
+ *	   by partial pivoting.  perm_r[i] = j means row i of A is in 
+ *	   position j in Pr*A.
+ *
+ *	   If A->Stype = SLU_NR, permutation vector of size A->ncol, which
+ *	   determines permutation of rows of transpose(A)
+ *	   (columns of A) as described above.
+ *
+ *	   If options->Fact = SamePattern_SameRowPerm, the pivoting routine
+ *	   will try to use the input perm_r, unless a certain threshold
+ *	   criterion is violated. In that case, perm_r is overwritten by a
+ *	   new permutation determined by partial pivoting or diagonal
+ *	   threshold pivoting.
+ *	   Otherwise, perm_r is output argument.
+ *
+ * etree   (input/output) int*,  dimension (A->ncol)
+ *	   Elimination tree of Pc'*A'*A*Pc.
+ *	   If options->Fact != FACTORED and options->Fact != DOFACT,
+ *	   etree is an input argument, otherwise it is an output argument.
+ *	   Note: etree is a vector of parent pointers for a forest whose
+ *	   vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
+ *
+ * equed   (input/output) char*
+ *	   Specifies the form of equilibration that was done.
+ *	   = 'N': No equilibration.
+ *	   = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
+ *	   = 'C': Column equilibration, i.e., A was postmultiplied by diag(C).
+ *	   = 'B': Both row and column equilibration, i.e., A was replaced 
+ *		  by diag(R)*A*diag(C).
+ *	   If options->Fact = FACTORED, equed is an input argument,
+ *	   otherwise it is an output argument.
+ *
+ * R	   (input/output) float*, dimension (A->nrow)
+ *	   The row scale factors for A or transpose(A).
+ *	   If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
+ *	       (if A->Stype = SLU_NR) is multiplied on the left by diag(R).
+ *	   If equed = 'N' or 'C', R is not accessed.
+ *	   If options->Fact = FACTORED, R is an input argument,
+ *	       otherwise, R is output.
+ *	   If options->zFact = FACTORED and equed = 'R' or 'B', each element
+ *	       of R must be positive.
+ *
+ * C	   (input/output) float*, dimension (A->ncol)
+ *	   The column scale factors for A or transpose(A).
+ *	   If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
+ *	       (if A->Stype = SLU_NR) is multiplied on the right by diag(C).
+ *	   If equed = 'N' or 'R', C is not accessed.
+ *	   If options->Fact = FACTORED, C is an input argument,
+ *	       otherwise, C is output.
+ *	   If options->Fact = FACTORED and equed = 'C' or 'B', each element
+ *	       of C must be positive.
+ *
+ * L	   (output) SuperMatrix*
+ *	   The factor L from the factorization
+ *	       Pr*A*Pc=L*U		(if A->Stype SLU_= NC) or
+ *	       Pr*transpose(A)*Pc=L*U	(if A->Stype = SLU_NR).
+ *	   Uses compressed row subscripts storage for supernodes, i.e.,
+ *	   L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
+ *
+ * U	   (output) SuperMatrix*
+ *	   The factor U from the factorization
+ *	       Pr*A*Pc=L*U		(if A->Stype = SLU_NC) or
+ *	       Pr*transpose(A)*Pc=L*U	(if A->Stype = SLU_NR).
+ *	   Uses column-wise storage scheme, i.e., U has types:
+ *	   Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
+ *
+ * work    (workspace/output) void*, size (lwork) (in bytes)
+ *	   User supplied workspace, should be large enough
+ *	   to hold data structures for factors L and U.
+ *	   On exit, if fact is not 'F', L and U point to this array.
+ *
+ * lwork   (input) int
+ *	   Specifies the size of work array in bytes.
+ *	   = 0:  allocate space internally by system malloc;
+ *	   > 0:  use user-supplied work array of length lwork in bytes,
+ *		 returns error if space runs out.
+ *	   = -1: the routine guesses the amount of space needed without
+ *		 performing the factorization, and returns it in
+ *		 mem_usage->total_needed; no other side effects.
+ *
+ *	   See argument 'mem_usage' for memory usage statistics.
+ *
+ * B	   (input/output) SuperMatrix*
+ *	   B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
+ *	   On entry, the right hand side matrix.
+ *	   If B->ncol = 0, only LU decomposition is performed, the triangular
+ *			   solve is skipped.
+ *	   On exit,
+ *	      if equed = 'N', B is not modified; otherwise
+ *	      if A->Stype = SLU_NC:
+ *		 if options->Trans = NOTRANS and equed = 'R' or 'B',
+ *		    B is overwritten by diag(R)*B;
+ *		 if options->Trans = TRANS or CONJ and equed = 'C' of 'B',
+ *		    B is overwritten by diag(C)*B;
+ *	      if A->Stype = SLU_NR:
+ *		 if options->Trans = NOTRANS and equed = 'C' or 'B',
+ *		    B is overwritten by diag(C)*B;
+ *		 if options->Trans = TRANS or CONJ and equed = 'R' of 'B',
+ *		    B is overwritten by diag(R)*B.
+ *
+ * X	   (output) SuperMatrix*
+ *	   X has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
+ *	   If info = 0 or info = A->ncol+1, X contains the solution matrix
+ *	   to the original system of equations. Note that A and B are modified
+ *	   on exit if equed is not 'N', and the solution to the equilibrated
+ *	   system is inv(diag(C))*X if options->Trans = NOTRANS and
+ *	   equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C'
+ *	   and equed = 'R' or 'B'.
+ *
+ * recip_pivot_growth (output) float*
+ *	   The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ).
+ *	   The infinity norm is used. If recip_pivot_growth is much less
+ *	   than 1, the stability of the LU factorization could be poor.
+ *
+ * rcond   (output) float*
+ *	   The estimate of the reciprocal condition number of the matrix A
+ *	   after equilibration (if done). If rcond is less than the machine
+ *	   precision (in particular, if rcond = 0), the matrix is singular
+ *	   to working precision. This condition is indicated by a return
+ *	   code of info > 0.
+ *
+ * mem_usage (output) mem_usage_t*
+ *	   Record the memory usage statistics, consisting of following fields:
+ *	   - for_lu (float)
+ *	     The amount of space used in bytes for L\U data structures.
+ *	   - total_needed (float)
+ *	     The amount of space needed in bytes to perform factorization.
+ *	   - expansions (int)
+ *	     The number of memory expansions during the LU factorization.
+ *
+ * stat   (output) SuperLUStat_t*
+ *	  Record the statistics on runtime and floating-point operation count.
+ *	  See slu_util.h for the definition of 'SuperLUStat_t'.
+ *
+ * info    (output) int*
+ *	   = 0: successful exit
+ *	   < 0: if info = -i, the i-th argument had an illegal value
+ *	   > 0: if info = i, and i is
+ *		<= A->ncol: number of zero pivots. They are replaced by small
+ *		      entries due to options->ILU_FillTol.
+ *		= A->ncol+1: U is nonsingular, but RCOND is less than machine
+ *		      precision, meaning that the matrix is singular to
+ *		      working precision. Nevertheless, the solution and
+ *		      error bounds are computed because there are a number
+ *		      of situations where the computed solution can be more
+ *		      accurate than the value of RCOND would suggest.
+ *		> A->ncol+1: number of bytes allocated when memory allocation
+ *		      failure occurred, plus A->ncol.
+ * 
+ */ + +void +sgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, + int *etree, char *equed, float *R, float *C, + SuperMatrix *L, SuperMatrix *U, void *work, int lwork, + SuperMatrix *B, SuperMatrix *X, + float *recip_pivot_growth, float *rcond, + mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info) +{ + + DNformat *Bstore, *Xstore; + float *Bmat, *Xmat; + int ldb, ldx, nrhs; + SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/ + SuperMatrix AC; /* Matrix postmultiplied by Pc */ + int colequ, equil, nofact, notran, rowequ, permc_spec, mc64; + trans_t trant; + char norm[1]; + int i, j, info1; + float amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; + int relax, panel_size; + float diag_pivot_thresh; + double t0; /* temporary time */ + double *utime; + + int *perm = NULL; + + /* External functions */ + extern float slangs(char *, SuperMatrix *); + + Bstore = B->Store; + Xstore = X->Store; + Bmat = Bstore->nzval; + Xmat = Xstore->nzval; + ldb = Bstore->lda; + ldx = Xstore->lda; + nrhs = B->ncol; + + *info = 0; + nofact = (options->Fact != FACTORED); + equil = (options->Equil == YES); + notran = (options->Trans == NOTRANS); + mc64 = (options->RowPerm == LargeDiag); + if ( nofact ) { + *(unsigned char *)equed = 'N'; + rowequ = FALSE; + colequ = FALSE; + } else { + rowequ = lsame_(equed, "R") || lsame_(equed, "B"); + colequ = lsame_(equed, "C") || lsame_(equed, "B"); + smlnum = slamch_("Safe minimum"); + bignum = 1. / smlnum; + } + + /* Test the input parameters */ + if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern && + options->Fact != SamePattern_SameRowPerm && + !notran && options->Trans != TRANS && options->Trans != CONJ && + !equil && options->Equil != NO) + *info = -1; + else if ( A->nrow != A->ncol || A->nrow < 0 || + (A->Stype != SLU_NC && A->Stype != SLU_NR) || + A->Dtype != SLU_S || A->Mtype != SLU_GE ) + *info = -2; + else if (options->Fact == FACTORED && + !(rowequ || colequ || lsame_(equed, "N"))) + *info = -6; + else { + if (rowequ) { + rcmin = bignum; + rcmax = 0.; + for (j = 0; j < A->nrow; ++j) { + rcmin = SUPERLU_MIN(rcmin, R[j]); + rcmax = SUPERLU_MAX(rcmax, R[j]); + } + if (rcmin <= 0.) *info = -7; + else if ( A->nrow > 0) + rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); + else rowcnd = 1.; + } + if (colequ && *info == 0) { + rcmin = bignum; + rcmax = 0.; + for (j = 0; j < A->nrow; ++j) { + rcmin = SUPERLU_MIN(rcmin, C[j]); + rcmax = SUPERLU_MAX(rcmax, C[j]); + } + if (rcmin <= 0.) *info = -8; + else if (A->nrow > 0) + colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); + else colcnd = 1.; + } + if (*info == 0) { + if ( lwork < -1 ) *info = -12; + else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || + B->Stype != SLU_DN || B->Dtype != SLU_S || + B->Mtype != SLU_GE ) + *info = -13; + else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) || + (B->ncol != 0 && B->ncol != X->ncol) || + X->Stype != SLU_DN || + X->Dtype != SLU_S || X->Mtype != SLU_GE ) + *info = -14; + } + } + if (*info != 0) { + i = -(*info); + xerbla_("sgsisx", &i); + return; + } + + /* Initialization for factor parameters */ + panel_size = sp_ienv(1); + relax = sp_ienv(2); + diag_pivot_thresh = options->DiagPivotThresh; + + utime = stat->utime; + + /* Convert A to SLU_NC format when necessary. */ + if ( A->Stype == SLU_NR ) { + NRformat *Astore = A->Store; + AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); + sCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, + Astore->nzval, Astore->colind, Astore->rowptr, + SLU_NC, A->Dtype, A->Mtype); + if ( notran ) { /* Reverse the transpose argument. */ + trant = TRANS; + notran = 0; + } else { + trant = NOTRANS; + notran = 1; + } + } else { /* A->Stype == SLU_NC */ + trant = options->Trans; + AA = A; + } + + if ( nofact ) { + register int i, j; + NCformat *Astore = AA->Store; + int nnz = Astore->nnz; + int *colptr = Astore->colptr; + int *rowind = Astore->rowind; + float *nzval = (float *)Astore->nzval; + int n = AA->nrow; + + if ( mc64 ) { + *equed = 'B'; + rowequ = colequ = 1; + t0 = SuperLU_timer_(); + if ((perm = intMalloc(n)) == NULL) + ABORT("SUPERLU_MALLOC fails for perm[]"); + + info1 = sldperm(5, n, nnz, colptr, rowind, nzval, perm, R, C); + + if (info1 > 0) { /* MC64 fails, call sgsequ() later */ + mc64 = 0; + SUPERLU_FREE(perm); + perm = NULL; + } else { + for (i = 0; i < n; i++) { + R[i] = exp(R[i]); + C[i] = exp(C[i]); + } + /* permute and scale the matrix */ + for (j = 0; j < n; j++) { + for (i = colptr[j]; i < colptr[j + 1]; i++) { + nzval[i] *= R[rowind[i]] * C[j]; + rowind[i] = perm[rowind[i]]; + } + } + } + utime[EQUIL] = SuperLU_timer_() - t0; + } + if ( !mc64 & equil ) { + t0 = SuperLU_timer_(); + /* Compute row and column scalings to equilibrate the matrix A. */ + sgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1); + + if ( info1 == 0 ) { + /* Equilibrate matrix A. */ + slaqgs(AA, R, C, rowcnd, colcnd, amax, equed); + rowequ = lsame_(equed, "R") || lsame_(equed, "B"); + colequ = lsame_(equed, "C") || lsame_(equed, "B"); + } + utime[EQUIL] = SuperLU_timer_() - t0; + } + } + + if ( nrhs > 0 ) { + /* Scale the right hand side if equilibration was performed. */ + if ( notran ) { + if ( rowequ ) { + for (j = 0; j < nrhs; ++j) + for (i = 0; i < A->nrow; ++i) { + Bmat[i + j*ldb] *= R[i]; + } + } + } else if ( colequ ) { + for (j = 0; j < nrhs; ++j) + for (i = 0; i < A->nrow; ++i) { + Bmat[i + j*ldb] *= C[i]; + } + } + } + + if ( nofact ) { + + t0 = SuperLU_timer_(); + /* + * Gnet column permutation vector perm_c[], according to permc_spec: + * permc_spec = NATURAL: natural ordering + * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A + * permc_spec = MMD_ATA: minimum degree on structure of A'*A + * permc_spec = COLAMD: approximate minimum degree column ordering + * permc_spec = MY_PERMC: the ordering already supplied in perm_c[] + */ + permc_spec = options->ColPerm; + if ( permc_spec != MY_PERMC && options->Fact == DOFACT ) + get_perm_c(permc_spec, AA, perm_c); + utime[COLPERM] = SuperLU_timer_() - t0; + + t0 = SuperLU_timer_(); + sp_preorder(options, AA, perm_c, etree, &AC); + utime[ETREE] = SuperLU_timer_() - t0; + + /* Compute the LU factorization of A*Pc. */ + t0 = SuperLU_timer_(); + sgsitrf(options, &AC, relax, panel_size, etree, work, lwork, + perm_c, perm_r, L, U, stat, info); + utime[FACT] = SuperLU_timer_() - t0; + + if ( lwork == -1 ) { + mem_usage->total_needed = *info - A->ncol; + return; + } + } + + if ( options->PivotGrowth ) { + if ( *info > 0 ) return; + + /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */ + *recip_pivot_growth = sPivotGrowth(A->ncol, AA, perm_c, L, U); + } + + if ( options->ConditionNumber ) { + /* Estimate the reciprocal of the condition number of A. */ + t0 = SuperLU_timer_(); + if ( notran ) { + *(unsigned char *)norm = '1'; + } else { + *(unsigned char *)norm = 'I'; + } + anorm = slangs(norm, AA); + sgscon(norm, L, U, anorm, rcond, stat, &info1); + utime[RCOND] = SuperLU_timer_() - t0; + } + + if ( nrhs > 0 ) { + /* Compute the solution matrix X. */ + for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */ + for (i = 0; i < B->nrow; i++) + Xmat[i + j*ldx] = Bmat[i + j*ldb]; + + t0 = SuperLU_timer_(); + sgstrs (trant, L, U, perm_c, perm_r, X, stat, &info1); + utime[SOLVE] = SuperLU_timer_() - t0; + + /* Transform the solution matrix X to a solution of the original + system. */ + if ( notran ) { + if ( colequ ) { + for (j = 0; j < nrhs; ++j) + for (i = 0; i < A->nrow; ++i) { + Xmat[i + j*ldx] *= C[i]; + } + } + } else { + if ( rowequ ) { + if (perm) { + float *tmp; + int n = A->nrow; + + if ((tmp = floatMalloc(n)) == NULL) + ABORT("SUPERLU_MALLOC fails for tmp[]"); + for (j = 0; j < nrhs; j++) { + for (i = 0; i < n; i++) + tmp[i] = Xmat[i + j * ldx]; /*dcopy*/ + for (i = 0; i < n; i++) + Xmat[i + j * ldx] = R[i] * tmp[perm[i]]; + } + SUPERLU_FREE(tmp); + } else { + for (j = 0; j < nrhs; ++j) + for (i = 0; i < A->nrow; ++i) { + Xmat[i + j*ldx] *= R[i]; + } + } + } + } + } /* end if nrhs > 0 */ + + if ( options->ConditionNumber ) { + /* Set INFO = A->ncol+1 if the matrix is singular to working precision. */ + if ( *rcond < slamch_("E") && *info == 0) *info = A->ncol + 1; + } + + if (perm) SUPERLU_FREE(perm); + + if ( nofact ) { + ilu_sQuerySpace(L, U, mem_usage); + Destroy_CompCol_Permuted(&AC); + } + if ( A->Stype == SLU_NR ) { + Destroy_SuperMatrix_Store(AA); + SUPERLU_FREE(AA); + } + +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsitrf.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsitrf.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsitrf.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,625 @@ + +/*! @file sgsitf.c + * \brief Computes an ILU factorization of a general sparse matrix + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
+ */ + +#include "slu_sdefs.h" + +#ifdef DEBUG +int num_drop_L; +#endif + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ * SGSITRF computes an ILU factorization of a general sparse m-by-n
+ * matrix A using partial pivoting with row interchanges.
+ * The factorization has the form
+ *     Pr * A = L * U
+ * where Pr is a row permutation matrix, L is lower triangular with unit
+ * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
+ * triangular (upper trapezoidal if A->nrow < A->ncol).
+ *
+ * See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ * Arguments
+ * =========
+ *
+ * options (input) superlu_options_t*
+ *	   The structure defines the input parameters to control
+ *	   how the ILU decomposition will be performed.
+ *
+ * A	    (input) SuperMatrix*
+ *	    Original matrix A, permuted by columns, of dimension
+ *	    (A->nrow, A->ncol). The type of A can be:
+ *	    Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
+ *
+ * relax    (input) int
+ *	    To control degree of relaxing supernodes. If the number
+ *	    of nodes (columns) in a subtree of the elimination tree is less
+ *	    than relax, this subtree is considered as one supernode,
+ *	    regardless of the row structures of those columns.
+ *
+ * panel_size (input) int
+ *	    A panel consists of at most panel_size consecutive columns.
+ *
+ * etree    (input) int*, dimension (A->ncol)
+ *	    Elimination tree of A'*A.
+ *	    Note: etree is a vector of parent pointers for a forest whose
+ *	    vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
+ *	    On input, the columns of A should be permuted so that the
+ *	    etree is in a certain postorder.
+ *
+ * work     (input/output) void*, size (lwork) (in bytes)
+ *	    User-supplied work space and space for the output data structures.
+ *	    Not referenced if lwork = 0;
+ *
+ * lwork   (input) int
+ *	   Specifies the size of work array in bytes.
+ *	   = 0:  allocate space internally by system malloc;
+ *	   > 0:  use user-supplied work array of length lwork in bytes,
+ *		 returns error if space runs out.
+ *	   = -1: the routine guesses the amount of space needed without
+ *		 performing the factorization, and returns it in
+ *		 *info; no other side effects.
+ *
+ * perm_c   (input) int*, dimension (A->ncol)
+ *	    Column permutation vector, which defines the
+ *	    permutation matrix Pc; perm_c[i] = j means column i of A is
+ *	    in position j in A*Pc.
+ *	    When searching for diagonal, perm_c[*] is applied to the
+ *	    row subscripts of A, so that diagonal threshold pivoting
+ *	    can find the diagonal of A, rather than that of A*Pc.
+ *
+ * perm_r   (input/output) int*, dimension (A->nrow)
+ *	    Row permutation vector which defines the permutation matrix Pr,
+ *	    perm_r[i] = j means row i of A is in position j in Pr*A.
+ *	    If options->Fact = SamePattern_SameRowPerm, the pivoting routine
+ *	       will try to use the input perm_r, unless a certain threshold
+ *	       criterion is violated. In that case, perm_r is overwritten by
+ *	       a new permutation determined by partial pivoting or diagonal
+ *	       threshold pivoting.
+ *	    Otherwise, perm_r is output argument;
+ *
+ * L	    (output) SuperMatrix*
+ *	    The factor L from the factorization Pr*A=L*U; use compressed row
+ *	    subscripts storage for supernodes, i.e., L has type:
+ *	    Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
+ *
+ * U	    (output) SuperMatrix*
+ *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
+ *	    storage scheme, i.e., U has types: Stype = SLU_NC,
+ *	    Dtype = SLU_S, Mtype = SLU_TRU.
+ *
+ * stat     (output) SuperLUStat_t*
+ *	    Record the statistics on runtime and floating-point operation count.
+ *	    See slu_util.h for the definition of 'SuperLUStat_t'.
+ *
+ * info     (output) int*
+ *	    = 0: successful exit
+ *	    < 0: if info = -i, the i-th argument had an illegal value
+ *	    > 0: if info = i, and i is
+ *	       <= A->ncol: number of zero pivots. They are replaced by small
+ *		  entries according to options->ILU_FillTol.
+ *	       > A->ncol: number of bytes allocated when memory allocation
+ *		  failure occurred, plus A->ncol. If lwork = -1, it is
+ *		  the estimated amount of space needed, plus A->ncol.
+ *
+ * ======================================================================
+ *
+ * Local Working Arrays:
+ * ======================
+ *   m = number of rows in the matrix
+ *   n = number of columns in the matrix
+ *
+ *   marker[0:3*m-1]: marker[i] = j means that node i has been
+ *	reached when working on column j.
+ *	Storage: relative to original row subscripts
+ *	NOTE: There are 4 of them:
+ *	      marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c;
+ *	      marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c;
+ *	      marker_relax(has its own space) is used for relaxed supernodes.
+ *
+ *   parent[0:m-1]: parent vector used during dfs
+ *	Storage: relative to new row subscripts
+ *
+ *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
+ *	unexplored neighbor of i in lsub[*]
+ *
+ *   segrep[0:nseg-1]: contains the list of supernodal representatives
+ *	in topological order of the dfs. A supernode representative is the
+ *	last column of a supernode.
+ *	The maximum size of segrep[] is n.
+ *
+ *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
+ *	supernodal representative r, repfnz[r] is the location of the first
+ *	nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
+ *	indicates the supernode r has been explored.
+ *	NOTE: There are W of them, each used for one column of a panel.
+ *
+ *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
+ *	the panel diagonal. These are filled in during dpanel_dfs(), and are
+ *	used later in the inner LU factorization within the panel.
+ *	panel_lsub[]/dense[] pair forms the SPA data structure.
+ *	NOTE: There are W of them.
+ *
+ *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
+ *		   NOTE: there are W of them.
+ *
+ *   tempv[0:*]: real temporary used for dense numeric kernels;
+ *	The size of this array is defined by NUM_TEMPV() in slu_util.h.
+ *	It is also used by the dropping routine ilu_ddrop_row().
+ * 
+ */ + +void +sgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size, + int *etree, void *work, int lwork, int *perm_c, int *perm_r, + SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info) +{ + /* Local working arrays */ + NCPformat *Astore; + int *iperm_r = NULL; /* inverse of perm_r; used when + options->Fact == SamePattern_SameRowPerm */ + int *iperm_c; /* inverse of perm_c */ + int *swap, *iswap; /* swap is used to store the row permutation + during the factorization. Initially, it is set + to iperm_c (row indeces of Pc*A*Pc'). + iswap is the inverse of swap. After the + factorization, it is equal to perm_r. */ + int *iwork; + float *swork; + int *segrep, *repfnz, *parent, *xplore; + int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */ + int *marker, *marker_relax; + float *dense, *tempv; + int *relax_end, *relax_fsupc; + float *a; + int *asub; + int *xa_begin, *xa_end; + int *xsup, *supno; + int *xlsub, *xlusup, *xusub; + int nzlumax; + float *amax; + float drop_sum; + static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ + int *iwork2; /* used by the second dropping rule */ + + /* Local scalars */ + fact_t fact = options->Fact; + double diag_pivot_thresh = options->DiagPivotThresh; + double drop_tol = options->ILU_DropTol; /* tau */ + double fill_ini = options->ILU_FillTol; /* tau^hat */ + double gamma = options->ILU_FillFactor; + int drop_rule = options->ILU_DropRule; + milu_t milu = options->ILU_MILU; + double fill_tol; + int pivrow; /* pivotal row number in the original matrix A */ + int nseg1; /* no of segments in U-column above panel row jcol */ + int nseg; /* no of segments in each U-column */ + register int jcol; + register int kcol; /* end column of a relaxed snode */ + register int icol; + register int i, k, jj, new_next, iinfo; + int m, n, min_mn, jsupno, fsupc, nextlu, nextu; + int w_def; /* upper bound on panel width */ + int usepr, iperm_r_allocated = 0; + int nnzL, nnzU; + int *panel_histo = stat->panel_histo; + flops_t *ops = stat->ops; + + int last_drop;/* the last column which the dropping rules applied */ + int quota; + int nnzAj; /* number of nonzeros in A(:,1:j) */ + int nnzLj, nnzUj; + double tol_L = drop_tol, tol_U = drop_tol; + float zero = 0.0; + + /* Executable */ + iinfo = 0; + m = A->nrow; + n = A->ncol; + min_mn = SUPERLU_MIN(m, n); + Astore = A->Store; + a = Astore->nzval; + asub = Astore->rowind; + xa_begin = Astore->colbeg; + xa_end = Astore->colend; + + /* Allocate storage common to the factor routines */ + *info = sLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size, + gamma, L, U, &Glu, &iwork, &swork); + if ( *info ) return; + + xsup = Glu.xsup; + supno = Glu.supno; + xlsub = Glu.xlsub; + xlusup = Glu.xlusup; + xusub = Glu.xusub; + + SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore, + &repfnz, &panel_lsub, &marker_relax, &marker); + sSetRWork(m, panel_size, swork, &dense, &tempv); + + usepr = (fact == SamePattern_SameRowPerm); + if ( usepr ) { + /* Compute the inverse of perm_r */ + iperm_r = (int *) intMalloc(m); + for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k; + iperm_r_allocated = 1; + } + + iperm_c = (int *) intMalloc(n); + for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k; + swap = (int *)intMalloc(n); + for (k = 0; k < n; k++) swap[k] = iperm_c[k]; + iswap = (int *)intMalloc(n); + for (k = 0; k < n; k++) iswap[k] = perm_c[k]; + amax = (float *) floatMalloc(panel_size); + if (drop_rule & DROP_SECONDARY) + iwork2 = (int *)intMalloc(n); + else + iwork2 = NULL; + + nnzAj = 0; + nnzLj = 0; + nnzUj = 0; + last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(3), (int)(min_mn * 0.95)); + + /* Identify relaxed snodes */ + relax_end = (int *) intMalloc(n); + relax_fsupc = (int *) intMalloc(n); + if ( options->SymmetricMode == YES ) + ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); + else + ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); + + ifill (perm_r, m, EMPTY); + ifill (marker, m * NO_MARKER, EMPTY); + supno[0] = -1; + xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0; + w_def = panel_size; + + /* Mark the rows used by relaxed supernodes */ + ifill (marker_relax, m, EMPTY); + i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end, + asub, marker_relax); +#if ( PRNTlevel >= 1) + printf("%d relaxed supernodes.\n", i); +#endif + + /* + * Work on one "panel" at a time. A panel is one of the following: + * (a) a relaxed supernode at the bottom of the etree, or + * (b) panel_size contiguous columns, defined by the user + */ + for (jcol = 0; jcol < min_mn; ) { + + if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */ + kcol = relax_end[jcol]; /* end of the relaxed snode */ + panel_histo[kcol-jcol+1]++; + + /* Drop small rows in the previous supernode. */ + if (jcol > 0 && jcol < last_drop) { + int first = xsup[supno[jcol - 1]]; + int last = jcol - 1; + int quota; + + /* Compute the quota */ + if (drop_rule & DROP_PROWS) + quota = gamma * Astore->nnz / m * (m - first) / m + * (last - first + 1); + else if (drop_rule & DROP_COLUMN) { + int i; + quota = 0; + for (i = first; i <= last; i++) + quota += xa_end[i] - xa_begin[i]; + quota = gamma * quota * (m - first) / m; + } else if (drop_rule & DROP_AREA) + quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) + - nnzLj; + else + quota = m * n; + fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn); + + /* Drop small rows */ + i = ilu_sdrop_row(options, first, last, tol_L, quota, &nnzLj, + &fill_tol, &Glu, tempv, iwork2, 0); + /* Reset the parameters */ + if (drop_rule & DROP_DYNAMIC) { + if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) + < nnzLj) + tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); + else + tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); + } + if (fill_tol < 0) iinfo -= (int)fill_tol; +#ifdef DEBUG + num_drop_L += i * (last - first + 1); +#endif + } + + /* -------------------------------------- + * Factorize the relaxed supernode(jcol:kcol) + * -------------------------------------- */ + /* Determine the union of the row structure of the snode */ + if ( (*info = ilu_ssnode_dfs(jcol, kcol, asub, xa_begin, xa_end, + marker, &Glu)) != 0 ) + return; + + nextu = xusub[jcol]; + nextlu = xlusup[jcol]; + jsupno = supno[jcol]; + fsupc = xsup[jsupno]; + new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1); + nzlumax = Glu.nzlumax; + while ( new_next > nzlumax ) { + if ((*info = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu))) + return; + } + + for (icol = jcol; icol <= kcol; icol++) { + xusub[icol+1] = nextu; + + amax[0] = 0.0; + /* Scatter into SPA dense[*] */ + for (k = xa_begin[icol]; k < xa_end[icol]; k++) { + register float tmp = fabs(a[k]); + if (tmp > amax[0]) amax[0] = tmp; + dense[asub[k]] = a[k]; + } + nnzAj += xa_end[icol] - xa_begin[icol]; + if (amax[0] == 0.0) { + amax[0] = fill_ini; +#if ( PRNTlevel >= 1) + printf("Column %d is entirely zero!\n", icol); + fflush(stdout); +#endif + } + + /* Numeric update within the snode */ + ssnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat); + + if (usepr) pivrow = iperm_r[icol]; + fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn); + if ( (*info = ilu_spivotL(icol, diag_pivot_thresh, &usepr, + perm_r, iperm_c[icol], swap, iswap, + marker_relax, &pivrow, + amax[0] * fill_tol, milu, zero, + &Glu, stat)) ) { + iinfo++; + marker[pivrow] = kcol; + } + + } + + jcol = kcol + 1; + + } else { /* Work on one panel of panel_size columns */ + + /* Adjust panel_size so that a panel won't overlap with the next + * relaxed snode. + */ + panel_size = w_def; + for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) + if ( relax_end[k] != EMPTY ) { + panel_size = k - jcol; + break; + } + if ( k == min_mn ) panel_size = min_mn - jcol; + panel_histo[panel_size]++; + + /* symbolic factor on a panel of columns */ + ilu_spanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1, + dense, amax, panel_lsub, segrep, repfnz, + marker, parent, xplore, &Glu); + + /* numeric sup-panel updates in topological order */ + spanel_bmod(m, panel_size, jcol, nseg1, dense, + tempv, segrep, repfnz, &Glu, stat); + + /* Sparse LU within the panel, and below panel diagonal */ + for (jj = jcol; jj < jcol + panel_size; jj++) { + + k = (jj - jcol) * m; /* column index for w-wide arrays */ + + nseg = nseg1; /* Begin after all the panel segments */ + + nnzAj += xa_end[jj] - xa_begin[jj]; + + if ((*info = ilu_scolumn_dfs(m, jj, perm_r, &nseg, + &panel_lsub[k], segrep, &repfnz[k], + marker, parent, xplore, &Glu))) + return; + + /* Numeric updates */ + if ((*info = scolumn_bmod(jj, (nseg - nseg1), &dense[k], + tempv, &segrep[nseg1], &repfnz[k], + jcol, &Glu, stat)) != 0) return; + + /* Make a fill-in position if the column is entirely zero */ + if (xlsub[jj + 1] == xlsub[jj]) { + register int i, row; + int nextl; + int nzlmax = Glu.nzlmax; + int *lsub = Glu.lsub; + int *marker2 = marker + 2 * m; + + /* Allocate memory */ + nextl = xlsub[jj] + 1; + if (nextl >= nzlmax) { + int error = sLUMemXpand(jj, nextl, LSUB, &nzlmax, &Glu); + if (error) { *info = error; return; } + lsub = Glu.lsub; + } + xlsub[jj + 1]++; + assert(xlusup[jj]==xlusup[jj+1]); + xlusup[jj + 1]++; + Glu.lusup[xlusup[jj]] = zero; + + /* Choose a row index (pivrow) for fill-in */ + for (i = jj; i < n; i++) + if (marker_relax[swap[i]] <= jj) break; + row = swap[i]; + marker2[row] = jj; + lsub[xlsub[jj]] = row; +#ifdef DEBUG + printf("Fill col %d.\n", jj); + fflush(stdout); +#endif + } + + /* Computer the quota */ + if (drop_rule & DROP_PROWS) + quota = gamma * Astore->nnz / m * jj / m; + else if (drop_rule & DROP_COLUMN) + quota = gamma * (xa_end[jj] - xa_begin[jj]) * + (jj + 1) / m; + else if (drop_rule & DROP_AREA) + quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj; + else + quota = m; + + /* Copy the U-segments to ucol[*] and drop small entries */ + if ((*info = ilu_scopy_to_ucol(jj, nseg, segrep, &repfnz[k], + perm_r, &dense[k], drop_rule, + milu, amax[jj - jcol] * tol_U, + quota, &drop_sum, &nnzUj, &Glu, + iwork2)) != 0) + return; + + /* Reset the dropping threshold if required */ + if (drop_rule & DROP_DYNAMIC) { + if (gamma * 0.9 * nnzAj * 0.5 < nnzLj) + tol_U = SUPERLU_MIN(1.0, tol_U * 2.0); + else + tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5); + } + + drop_sum *= MILU_ALPHA; + if (usepr) pivrow = iperm_r[jj]; + fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn); + if ( (*info = ilu_spivotL(jj, diag_pivot_thresh, &usepr, perm_r, + iperm_c[jj], swap, iswap, + marker_relax, &pivrow, + amax[jj - jcol] * fill_tol, milu, + drop_sum, &Glu, stat)) ) { + iinfo++; + marker[m + pivrow] = jj; + marker[2 * m + pivrow] = jj; + } + + /* Reset repfnz[] for this column */ + resetrep_col (nseg, segrep, &repfnz[k]); + + /* Start a new supernode, drop the previous one */ + if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) { + int first = xsup[supno[jj - 1]]; + int last = jj - 1; + int quota; + + /* Compute the quota */ + if (drop_rule & DROP_PROWS) + quota = gamma * Astore->nnz / m * (m - first) / m + * (last - first + 1); + else if (drop_rule & DROP_COLUMN) { + int i; + quota = 0; + for (i = first; i <= last; i++) + quota += xa_end[i] - xa_begin[i]; + quota = gamma * quota * (m - first) / m; + } else if (drop_rule & DROP_AREA) + quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) + / m) - nnzLj; + else + quota = m * n; + fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / + (double)min_mn); + + /* Drop small rows */ + i = ilu_sdrop_row(options, first, last, tol_L, quota, + &nnzLj, &fill_tol, &Glu, tempv, iwork2, + 1); + + /* Reset the parameters */ + if (drop_rule & DROP_DYNAMIC) { + if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) + < nnzLj) + tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); + else + tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); + } + if (fill_tol < 0) iinfo -= (int)fill_tol; +#ifdef DEBUG + num_drop_L += i * (last - first + 1); +#endif + } /* if start a new supernode */ + + } /* for */ + + jcol += panel_size; /* Move to the next panel */ + + } /* else */ + + } /* for */ + + *info = iinfo; + + if ( m > n ) { + k = 0; + for (i = 0; i < m; ++i) + if ( perm_r[i] == EMPTY ) { + perm_r[i] = n + k; + ++k; + } + } + + ilu_countnz(min_mn, &nnzL, &nnzU, &Glu); + fixupL(min_mn, perm_r, &Glu); + + sLUWorkFree(iwork, swork, &Glu); /* Free work space and compress storage */ + + if ( fact == SamePattern_SameRowPerm ) { + /* L and U structures may have changed due to possibly different + pivoting, even though the storage is available. + There could also be memory expansions, so the array locations + may have changed, */ + ((SCformat *)L->Store)->nnz = nnzL; + ((SCformat *)L->Store)->nsuper = Glu.supno[n]; + ((SCformat *)L->Store)->nzval = Glu.lusup; + ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup; + ((SCformat *)L->Store)->rowind = Glu.lsub; + ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub; + ((NCformat *)U->Store)->nnz = nnzU; + ((NCformat *)U->Store)->nzval = Glu.ucol; + ((NCformat *)U->Store)->rowind = Glu.usub; + ((NCformat *)U->Store)->colptr = Glu.xusub; + } else { + sCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, + Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno, + Glu.xsup, SLU_SC, SLU_S, SLU_TRLU); + sCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, + Glu.usub, Glu.xusub, SLU_NC, SLU_S, SLU_TRU); + } + + ops[FACT] += ops[TRSV] + ops[GEMV]; + + if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); + SUPERLU_FREE (iperm_c); + SUPERLU_FREE (relax_end); + SUPERLU_FREE (swap); + SUPERLU_FREE (iswap); + SUPERLU_FREE (relax_fsupc); + SUPERLU_FREE (amax); + if ( iwork2 ) SUPERLU_FREE (iwork2); + +} Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsrfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsrfs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsrfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,25 +1,26 @@ -/* +/*! @file sgsrfs.c + * \brief Improves computed solution to a system of inear equations + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Modified from lapack routine SGERFS
+ * 
*/ /* * File name: sgsrfs.c * History: Modified from lapack routine SGERFS */ #include -#include "ssp_defs.h" +#include "slu_sdefs.h" -void -sgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U, - int *perm_c, int *perm_r, char *equed, float *R, float *C, - SuperMatrix *B, SuperMatrix *X, float *ferr, float *berr, - SuperLUStat_t *stat, int *info) -{ -/* +/*! \brief + * + *
  *   Purpose   
  *   =======   
  *
@@ -123,8 +124,16 @@
  *
  *    ITMAX is the maximum number of steps of iterative refinement.   
  *
- */  
+ * 
+ */ +void +sgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U, + int *perm_c, int *perm_r, char *equed, float *R, float *C, + SuperMatrix *B, SuperMatrix *X, float *ferr, float *berr, + SuperLUStat_t *stat, int *info) +{ + #define ITMAX 5 /* Table of constant values */ @@ -224,6 +233,8 @@ nz = A->ncol + 1; eps = slamch_("Epsilon"); safmin = slamch_("Safe minimum"); + /* Set SAFE1 essentially to be the underflow threshold times the + number of additions in each row. */ safe1 = nz * safmin; safe2 = safe1 / eps; @@ -274,7 +285,7 @@ where abs(Z) is the componentwise absolute value of the matrix or vector Z. If the i-th component of the denominator is less than SAFE2, then SAFE1 is added to the i-th component of the - numerator and denominator before dividing. */ + numerator before dividing. */ for (i = 0; i < A->nrow; ++i) rwork[i] = fabs( Bptr[i] ); @@ -297,11 +308,15 @@ } s = 0.; for (i = 0; i < A->nrow; ++i) { - if (rwork[i] > safe2) + if (rwork[i] > safe2) { s = SUPERLU_MAX( s, fabs(work[i]) / rwork[i] ); - else - s = SUPERLU_MAX( s, (fabs(work[i]) + safe1) / - (rwork[i] + safe1) ); + } else if ( rwork[i] != 0.0 ) { + /* Adding SAFE1 to the numerator guards against + spuriously zero residuals (underflow). */ + s = SUPERLU_MAX( s, (safe1 + fabs(work[i])) / rwork[i] ); + } + /* If rwork[i] is exactly 0.0, then we know the true + residual also must be exactly 0.0. */ } berr[j] = s; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgssv.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgssv.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgssv.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,20 +1,19 @@ - -/* +/*! @file sgssv.c + * \brief Solves the system of linear equations A*X=B + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
- *
+ * 
*/ -#include "ssp_defs.h" +#include "slu_sdefs.h" -void -sgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, - SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, - SuperLUStat_t *stat, int *info ) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -127,15 +126,21 @@
  *                so the solution could not be computed.
  *             > A->ncol: number of bytes allocated when memory allocation
  *                failure occurred, plus A->ncol.
- *   
+ * 
*/ + +void +sgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, + SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, + SuperLUStat_t *stat, int *info ) +{ + DNformat *Bstore; SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int lwork = 0, *etree, i; /* Set default values for some parameters */ - float drop_tol = 0.; int panel_size; /* panel size */ int relax; /* no of columns in a relaxed snodes */ int permc_spec; @@ -201,8 +206,8 @@ relax, panel_size, sp_ienv(3), sp_ienv(4));*/ t = SuperLU_timer_(); /* Compute the LU factorization of A. */ - sgstrf(options, &AC, drop_tol, relax, panel_size, - etree, NULL, lwork, perm_c, perm_r, L, U, stat, info); + sgstrf(options, &AC, relax, panel_size, etree, + NULL, lwork, perm_c, perm_r, L, U, stat, info); utime[FACT] = SuperLU_timer_() - t; t = SuperLU_timer_(); Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgssvx.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgssvx.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgssvx.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,22 +1,19 @@ -/* +/*! @file sgssvx.c + * \brief Solves the system of linear equations A*X=B or A'*X=B + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
- *
+ * 
*/ -#include "ssp_defs.h" +#include "slu_sdefs.h" -void -sgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, - int *etree, char *equed, float *R, float *C, - SuperMatrix *L, SuperMatrix *U, void *work, int lwork, - SuperMatrix *B, SuperMatrix *X, float *recip_pivot_growth, - float *rcond, float *ferr, float *berr, - mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info ) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -314,7 +311,7 @@
  *
  * stat   (output) SuperLUStat_t*
  *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
+ *        See slu_util.h for the definition of 'SuperLUStat_t'.
  *
  * info    (output) int*
  *         = 0: successful exit   
@@ -332,9 +329,19 @@
  *                    accurate than the value of RCOND would suggest.   
  *              > A->ncol+1: number of bytes allocated when memory allocation
  *                    failure occurred, plus A->ncol.
- *
+ * 
*/ +void +sgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, + int *etree, char *equed, float *R, float *C, + SuperMatrix *L, SuperMatrix *U, void *work, int lwork, + SuperMatrix *B, SuperMatrix *X, float *recip_pivot_growth, + float *rcond, float *ferr, float *berr, + mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info ) +{ + + DNformat *Bstore, *Xstore; float *Bmat, *Xmat; int ldb, ldx, nrhs; @@ -346,13 +353,12 @@ int i, j, info1; float amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; int relax, panel_size; - float diag_pivot_thresh, drop_tol; + float diag_pivot_thresh; double t0; /* temporary time */ double *utime; /* External functions */ extern float slangs(char *, SuperMatrix *); - extern double slamch_(char *); Bstore = B->Store; Xstore = X->Store; @@ -443,7 +449,6 @@ panel_size = sp_ienv(1); relax = sp_ienv(2); diag_pivot_thresh = options->DiagPivotThresh; - drop_tol = 0.0; utime = stat->utime; @@ -523,8 +528,8 @@ /* Compute the LU factorization of A*Pc. */ t0 = SuperLU_timer_(); - sgstrf(options, &AC, drop_tol, relax, panel_size, - etree, work, lwork, perm_c, perm_r, L, U, stat, info); + sgstrf(options, &AC, relax, panel_size, etree, + work, lwork, perm_c, perm_r, L, U, stat, info); utime[FACT] = SuperLU_timer_() - t0; if ( lwork == -1 ) { Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgstrf.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgstrf.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgstrf.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,33 +1,32 @@ -/* +/*! @file sgstrf.c + * \brief Computes an LU factorization of a general sparse matrix + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
+ * 
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
  *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "ssp_defs.h" -void -sgstrf (superlu_options_t *options, SuperMatrix *A, float drop_tol, - int relax, int panel_size, int *etree, void *work, int lwork, - int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, - SuperLUStat_t *stat, int *info) -{ -/* +#include "slu_sdefs.h" + +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -53,11 +52,6 @@
  *          (A->nrow, A->ncol). The type of A can be:
  *          Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
  *
- * drop_tol (input) float (NOT IMPLEMENTED)
- *	    Drop tolerance parameter. At step j of the Gaussian elimination,
- *          if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
- *          0 <= drop_tol <= 1. The default value of drop_tol is 0.
- *
  * relax    (input) int
  *          To control degree of relaxing supernodes. If the number
  *          of nodes (columns) in a subtree of the elimination tree is less
@@ -117,7 +111,7 @@
  *
  * stat     (output) SuperLUStat_t*
  *          Record the statistics on runtime and floating-point operation count.
- *          See util.h for the definition of 'SuperLUStat_t'.
+ *          See slu_util.h for the definition of 'SuperLUStat_t'.
  *
  * info     (output) int*
  *          = 0: successful exit
@@ -177,13 +171,20 @@
  *	    	   NOTE: there are W of them.
  *
  *   tempv[0:*]: real temporary used for dense numeric kernels;
- *	The size of this array is defined by NUM_TEMPV() in ssp_defs.h.
- *
+ *	The size of this array is defined by NUM_TEMPV() in slu_sdefs.h.
+ * 
*/ + +void +sgstrf (superlu_options_t *options, SuperMatrix *A, + int relax, int panel_size, int *etree, void *work, int lwork, + int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, + SuperLUStat_t *stat, int *info) +{ /* Local working arrays */ NCPformat *Astore; - int *iperm_r; /* inverse of perm_r; - used when options->Fact == SamePattern_SameRowPerm */ + int *iperm_r = NULL; /* inverse of perm_r; used when + options->Fact == SamePattern_SameRowPerm */ int *iperm_c; /* inverse of perm_c */ int *iwork; float *swork; @@ -199,7 +200,8 @@ int *xsup, *supno; int *xlsub, *xlusup, *xusub; int nzlumax; - static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ + float fill_ratio = sp_ienv(6); /* estimated fill ratio */ + static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ /* Local scalars */ fact_t fact = options->Fact; @@ -230,7 +232,7 @@ /* Allocate storage common to the factor routines */ *info = sLUMemInit(fact, work, lwork, m, n, Astore->nnz, - panel_size, L, U, &Glu, &iwork, &swork); + panel_size, fill_ratio, L, U, &Glu, &iwork, &swork); if ( *info ) return; xsup = Glu.xsup; @@ -417,7 +419,7 @@ ((NCformat *)U->Store)->rowind = Glu.usub; ((NCformat *)U->Store)->colptr = Glu.xusub; } else { - sCreate_SuperNode_Matrix(L, A->nrow, A->ncol, nnzL, Glu.lusup, + sCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno, Glu.xsup, SLU_SC, SLU_S, SLU_TRLU); sCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, @@ -425,6 +427,7 @@ } ops[FACT] += ops[TRSV] + ops[GEMV]; + stat->expansions = --(Glu.num_expansions); if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); SUPERLU_FREE (iperm_c); Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgstrs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgstrs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sgstrs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,25 +1,27 @@ -/* +/*! @file sgstrs.c + * \brief Solves a system using LU factorization + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "ssp_defs.h" +#include "slu_sdefs.h" /* @@ -29,13 +31,9 @@ void slsolve(int, int, float*, float*); void smatvec(int, int, int, float*, float*, float*); - -void -sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U, - int *perm_c, int *perm_r, SuperMatrix *B, - SuperLUStat_t *stat, int *info) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -85,8 +83,15 @@
  * info    (output) int*
  * 	   = 0: successful exit
  *	   < 0: if info = -i, the i-th argument had an illegal value
- *
+ * 
*/ + +void +sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U, + int *perm_c, int *perm_r, SuperMatrix *B, + SuperLUStat_t *stat, int *info) +{ + #ifdef _CRAY _fcd ftcs1, ftcs2, ftcs3, ftcs4; #endif @@ -288,7 +293,7 @@ stat->ops[SOLVE] = solve_ops; - } else { /* Solve A'*X=B */ + } else { /* Solve A'*X=B or CONJ(A)*X=B */ /* Permute right hand sides to form Pc'*B. */ for (i = 0; i < nrhs; i++) { rhs_work = &Bmat[i*ldb]; @@ -297,7 +302,6 @@ } stat->ops[SOLVE] = 0; - for (k = 0; k < nrhs; ++k) { /* Multiply by inv(U'). */ @@ -307,7 +311,6 @@ sp_strsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info); } - /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */ for (i = 0; i < nrhs; i++) { rhs_work = &Bmat[i*ldb]; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slacon.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slacon.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slacon.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,67 +1,74 @@ - -/* +/*! @file slacon.c + * \brief Estimates the 1-norm + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
- *
+ * 
*/ #include -#include "Cnames.h" +#include "slu_Cnames.h" +/*! \brief + * + *
+ *   Purpose   
+ *   =======   
+ *
+ *   SLACON estimates the 1-norm of a square matrix A.   
+ *   Reverse communication is used for evaluating matrix-vector products. 
+ * 
+ *
+ *   Arguments   
+ *   =========   
+ *
+ *   N      (input) INT
+ *          The order of the matrix.  N >= 1.   
+ *
+ *   V      (workspace) FLOAT PRECISION array, dimension (N)   
+ *          On the final return, V = A*W,  where  EST = norm(V)/norm(W)   
+ *          (W is not returned).   
+ *
+ *   X      (input/output) FLOAT PRECISION array, dimension (N)   
+ *          On an intermediate return, X should be overwritten by   
+ *                A * X,   if KASE=1,   
+ *                A' * X,  if KASE=2,
+ *         and SLACON must be re-called with all the other parameters   
+ *          unchanged.   
+ *
+ *   ISGN   (workspace) INT array, dimension (N)
+ *
+ *   EST    (output) FLOAT PRECISION   
+ *          An estimate (a lower bound) for norm(A).   
+ *
+ *   KASE   (input/output) INT
+ *          On the initial call to SLACON, KASE should be 0.   
+ *          On an intermediate return, KASE will be 1 or 2, indicating   
+ *          whether X should be overwritten by A * X  or A' * X.   
+ *          On the final return from SLACON, KASE will again be 0.   
+ *
+ *   Further Details   
+ *   ======= =======   
+ *
+ *   Contributed by Nick Higham, University of Manchester.   
+ *   Originally named CONEST, dated March 16, 1988.   
+ *
+ *   Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of 
+ *   a real or complex matrix, with applications to condition estimation", 
+ *   ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.   
+ *   ===================================================================== 
+ * 
+ */ + int slacon_(int *n, float *v, float *x, int *isgn, float *est, int *kase) { -/* - Purpose - ======= - SLACON estimates the 1-norm of a square matrix A. - Reverse communication is used for evaluating matrix-vector products. - - Arguments - ========= - - N (input) INT - The order of the matrix. N >= 1. - - V (workspace) FLOAT PRECISION array, dimension (N) - On the final return, V = A*W, where EST = norm(V)/norm(W) - (W is not returned). - - X (input/output) FLOAT PRECISION array, dimension (N) - On an intermediate return, X should be overwritten by - A * X, if KASE=1, - A' * X, if KASE=2, - and SLACON must be re-called with all the other parameters - unchanged. - - ISGN (workspace) INT array, dimension (N) - - EST (output) FLOAT PRECISION - An estimate (a lower bound) for norm(A). - - KASE (input/output) INT - On the initial call to SLACON, KASE should be 0. - On an intermediate return, KASE will be 1 or 2, indicating - whether X should be overwritten by A * X or A' * X. - On the final return from SLACON, KASE will again be 0. - - Further Details - ======= ======= - - Contributed by Nick Higham, University of Manchester. - Originally named CONEST, dated March 16, 1988. - - Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of - a real or complex matrix, with applications to condition estimation", - ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. - ===================================================================== -*/ - /* Table of constant values */ int c__1 = 1; float zero = 0.0; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slamch.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slamch.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slamch.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,4 +1,16 @@ +/*! @file slamch.c + * \brief Determines single precision machine parameters and other service routines + * + *
+ *   -- LAPACK auxiliary routine (version 2.0) --   
+ *      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
+ *      Courant Institute, Argonne National Lab, and Rice University   
+ *      October 31, 1992   
+ * 
+ */ #include +#include "slu_Cnames.h" + #define TRUE_ (1) #define FALSE_ (0) #define min(a,b) ((a) <= (b) ? (a) : (b)) @@ -6,15 +18,10 @@ #define abs(x) ((x) >= 0 ? (x) : -(x)) #define dabs(x) (double)abs(x) -double slamch_(char *cmach) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 +/*! \brief - - Purpose +
+ Purpose   
     =======   
 
     SLAMCH determines single precision machine parameters.   
@@ -49,7 +56,10 @@
             rmax  = overflow threshold  - (base**emax)*(1-eps)   
 
    ===================================================================== 
+
*/ +double slamch_(char *cmach) +{ /* >>Start of File<< Initialized data */ static int first = TRUE_; @@ -133,16 +143,11 @@ } /* slamch_ */ -/* Subroutine */ int slamc1_(int *beta, int *t, int *rnd, int - *ieee1) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 +/* Subroutine */ +/*! \brief - - Purpose +
+ Purpose   
     =======   
 
     SLAMC1 determines the machine parameters given by BETA, T, RND, and   
@@ -183,7 +188,12 @@
           Comms. of the ACM, 17, 276-277.   
 
    ===================================================================== 
+
*/ + +int slamc1_(int *beta, int *t, int *rnd, int + *ieee1) +{ /* Initialized data */ static int first = TRUE_; /* System generated locals */ @@ -345,15 +355,11 @@ } /* slamc1_ */ -/* Subroutine */ int slamc2_(int *beta, int *t, int *rnd, float * - eps, int *emin, float *rmin, int *emax, float *rmax) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 +/* Subroutine */ +/*! \brief +
     Purpose   
     =======   
 
@@ -409,7 +415,11 @@
     W. Kahan of the University of California at Berkeley.   
 
    ===================================================================== 
+
*/ +int slamc2_(int *beta, int *t, int *rnd, float * + eps, int *emin, float *rmin, int *emax, float *rmax) +{ /* Table of constant values */ static int c__1 = 1; @@ -647,15 +657,9 @@ } /* slamc2_ */ +/*! \brief -double slamc3_(float *a, float *b) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - +
     Purpose   
     =======   
 
@@ -672,7 +676,12 @@
             The values A and B.   
 
    ===================================================================== 
+
*/ + +double slamc3_(float *a, float *b) +{ + /* >>Start of File<< System generated locals */ float ret_val; @@ -688,14 +697,11 @@ } /* slamc3_ */ -/* Subroutine */ int slamc4_(int *emin, float *start, int *base) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 +/* Subroutine */ +/*! \brief +
     Purpose   
     =======   
 
@@ -717,7 +723,11 @@
             The base of the machine.   
 
    ===================================================================== 
+
*/ + +int slamc4_(int *emin, float *start, int *base) +{ /* System generated locals */ int i__1; float r__1; @@ -778,15 +788,10 @@ } /* slamc4_ */ -/* Subroutine */ int slamc5_(int *beta, int *p, int *emin, - int *ieee, int *emax, float *rmax) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 +/* Subroutine */ +/*! \brief - +
     Purpose   
     =======   
 
@@ -828,7 +833,13 @@
        First compute LEXP and UEXP, two powers of 2 that bound   
        abs(EMIN). We then assume that EMAX + abs(EMIN) will sum   
        approximately to the bound that is closest to abs(EMIN).   
-       (EMAX is the exponent of the required number RMAX). */
+       (EMAX is the exponent of the required number RMAX). 
+
+*/ + +int slamc5_(int *beta, int *p, int *emin, + int *ieee, int *emax, float *rmax) +{ /* Table of constant values */ static float c_b5 = 0.f; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slangs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slangs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slangs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,58 +1,65 @@ - -/* +/*! @file slangs.c + * \brief Returns the value of the one norm + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Modified from lapack routine SLANGE 
+ * 
*/ /* * File name: slangs.c * History: Modified from lapack routine SLANGE */ #include -#include "ssp_defs.h" -#include "util.h" +#include "slu_sdefs.h" +/*! \brief + * + *
+ * Purpose   
+ *   =======   
+ *
+ *   SLANGS returns the value of the one norm, or the Frobenius norm, or 
+ *   the infinity norm, or the element of largest absolute value of a 
+ *   real matrix A.   
+ *
+ *   Description   
+ *   ===========   
+ *
+ *   SLANGE returns the value   
+ *
+ *      SLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'   
+ *               (   
+ *               ( norm1(A),         NORM = '1', 'O' or 'o'   
+ *               (   
+ *               ( normI(A),         NORM = 'I' or 'i'   
+ *               (   
+ *               ( normF(A),         NORM = 'F', 'f', 'E' or 'e'   
+ *
+ *   where  norm1  denotes the  one norm of a matrix (maximum column sum), 
+ *   normI  denotes the  infinity norm  of a matrix  (maximum row sum) and 
+ *   normF  denotes the  Frobenius norm of a matrix (square root of sum of 
+ *   squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.   
+ *
+ *   Arguments   
+ *   =========   
+ *
+ *   NORM    (input) CHARACTER*1   
+ *           Specifies the value to be returned in SLANGE as described above.   
+ *   A       (input) SuperMatrix*
+ *           The M by N sparse matrix A. 
+ *
+ *  =====================================================================
+ * 
+ */ + float slangs(char *norm, SuperMatrix *A) { -/* - Purpose - ======= - - SLANGS returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - real matrix A. - - Description - =========== - - SLANGE returns the value - - SLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in SLANGE as described above. - A (input) SuperMatrix* - The M by N sparse matrix A. - - ===================================================================== -*/ /* Local variables */ NCformat *Astore; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slaqgs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slaqgs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slaqgs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,81 +1,89 @@ - -/* +/*! @file slaqgs.c + * \brief Equlibrates a general sprase matrix + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
- *
+ * 
+ * Modified from LAPACK routine SLAQGE
+ * 
*/ /* * File name: slaqgs.c * History: Modified from LAPACK routine SLAQGE */ #include -#include "ssp_defs.h" -#include "util.h" +#include "slu_sdefs.h" +/*! \brief + * + *
+ *   Purpose   
+ *   =======   
+ *
+ *   SLAQGS equilibrates a general sparse M by N matrix A using the row and   
+ *   scaling factors in the vectors R and C.   
+ *
+ *   See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ *   Arguments   
+ *   =========   
+ *
+ *   A       (input/output) SuperMatrix*
+ *           On exit, the equilibrated matrix.  See EQUED for the form of 
+ *           the equilibrated matrix. The type of A can be:
+ *	    Stype = NC; Dtype = SLU_S; Mtype = GE.
+ *	    
+ *   R       (input) float*, dimension (A->nrow)
+ *           The row scale factors for A.
+ *	    
+ *   C       (input) float*, dimension (A->ncol)
+ *           The column scale factors for A.
+ *	    
+ *   ROWCND  (input) float
+ *           Ratio of the smallest R(i) to the largest R(i).
+ *	    
+ *   COLCND  (input) float
+ *           Ratio of the smallest C(i) to the largest C(i).
+ *	    
+ *   AMAX    (input) float
+ *           Absolute value of largest matrix entry.
+ *	    
+ *   EQUED   (output) char*
+ *           Specifies the form of equilibration that was done.   
+ *           = 'N':  No equilibration   
+ *           = 'R':  Row equilibration, i.e., A has been premultiplied by  
+ *                   diag(R).   
+ *           = 'C':  Column equilibration, i.e., A has been postmultiplied  
+ *                   by diag(C).   
+ *           = 'B':  Both row and column equilibration, i.e., A has been
+ *                   replaced by diag(R) * A * diag(C).   
+ *
+ *   Internal Parameters   
+ *   ===================   
+ *
+ *   THRESH is a threshold value used to decide if row or column scaling   
+ *   should be done based on the ratio of the row or column scaling   
+ *   factors.  If ROWCND < THRESH, row scaling is done, and if   
+ *   COLCND < THRESH, column scaling is done.   
+ *
+ *   LARGE and SMALL are threshold values used to decide if row scaling   
+ *   should be done based on the absolute size of the largest matrix   
+ *   element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.   
+ *
+ *   ===================================================================== 
+ * 
+ */ + void slaqgs(SuperMatrix *A, float *r, float *c, float rowcnd, float colcnd, float amax, char *equed) { -/* - Purpose - ======= - SLAQGS equilibrates a general sparse M by N matrix A using the row and - scaling factors in the vectors R and C. - See supermatrix.h for the definition of 'SuperMatrix' structure. - - Arguments - ========= - - A (input/output) SuperMatrix* - On exit, the equilibrated matrix. See EQUED for the form of - the equilibrated matrix. The type of A can be: - Stype = NC; Dtype = SLU_S; Mtype = GE. - - R (input) float*, dimension (A->nrow) - The row scale factors for A. - - C (input) float*, dimension (A->ncol) - The column scale factors for A. - - ROWCND (input) float - Ratio of the smallest R(i) to the largest R(i). - - COLCND (input) float - Ratio of the smallest C(i) to the largest C(i). - - AMAX (input) float - Absolute value of largest matrix entry. - - EQUED (output) char* - Specifies the form of equilibration that was done. - = 'N': No equilibration - = 'R': Row equilibration, i.e., A has been premultiplied by - diag(R). - = 'C': Column equilibration, i.e., A has been postmultiplied - by diag(C). - = 'B': Both row and column equilibration, i.e., A has been - replaced by diag(R) * A * diag(C). - - Internal Parameters - =================== - - THRESH is a threshold value used to decide if row or column scaling - should be done based on the ratio of the row or column scaling - factors. If ROWCND < THRESH, row scaling is done, and if - COLCND < THRESH, column scaling is done. - - LARGE and SMALL are threshold values used to decide if row scaling - should be done based on the absolute size of the largest matrix - element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. - - ===================================================================== -*/ - #define THRESH (0.1) /* Local variables */ Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sldperm.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sldperm.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sldperm.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,168 @@ + +/*! @file + * \brief Finds a row permutation so that the matrix has large entries on the diagonal + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
+ */ + +#include "slu_sdefs.h" + +extern void mc64id_(int_t*); +extern void mc64ad_(int_t*, int_t*, int_t*, int_t [], int_t [], double [], + int_t*, int_t [], int_t*, int_t[], int_t*, double [], + int_t [], int_t []); + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ *   SLDPERM finds a row permutation so that the matrix has large
+ *   entries on the diagonal.
+ *
+ * Arguments
+ * =========
+ *
+ * job    (input) int
+ *        Control the action. Possible values for JOB are:
+ *        = 1 : Compute a row permutation of the matrix so that the
+ *              permuted matrix has as many entries on its diagonal as
+ *              possible. The values on the diagonal are of arbitrary size.
+ *              HSL subroutine MC21A/AD is used for this.
+ *        = 2 : Compute a row permutation of the matrix so that the smallest 
+ *              value on the diagonal of the permuted matrix is maximized.
+ *        = 3 : Compute a row permutation of the matrix so that the smallest
+ *              value on the diagonal of the permuted matrix is maximized.
+ *              The algorithm differs from the one used for JOB = 2 and may
+ *              have quite a different performance.
+ *        = 4 : Compute a row permutation of the matrix so that the sum
+ *              of the diagonal entries of the permuted matrix is maximized.
+ *        = 5 : Compute a row permutation of the matrix so that the product
+ *              of the diagonal entries of the permuted matrix is maximized
+ *              and vectors to scale the matrix so that the nonzero diagonal 
+ *              entries of the permuted matrix are one in absolute value and 
+ *              all the off-diagonal entries are less than or equal to one in 
+ *              absolute value.
+ *        Restriction: 1 <= JOB <= 5.
+ *
+ * n      (input) int
+ *        The order of the matrix.
+ *
+ * nnz    (input) int
+ *        The number of nonzeros in the matrix.
+ *
+ * adjncy (input) int*, of size nnz
+ *        The adjacency structure of the matrix, which contains the row
+ *        indices of the nonzeros.
+ *
+ * colptr (input) int*, of size n+1
+ *        The pointers to the beginning of each column in ADJNCY.
+ *
+ * nzval  (input) float*, of size nnz
+ *        The nonzero values of the matrix. nzval[k] is the value of
+ *        the entry corresponding to adjncy[k].
+ *        It is not used if job = 1.
+ *
+ * perm   (output) int*, of size n
+ *        The permutation vector. perm[i] = j means row i in the
+ *        original matrix is in row j of the permuted matrix.
+ *
+ * u      (output) double*, of size n
+ *        If job = 5, the natural logarithms of the row scaling factors. 
+ *
+ * v      (output) double*, of size n
+ *        If job = 5, the natural logarithms of the column scaling factors. 
+ *        The scaled matrix B has entries b_ij = a_ij * exp(u_i + v_j).
+ * 
+ */ + +int +sldperm(int_t job, int_t n, int_t nnz, int_t colptr[], int_t adjncy[], + float nzval[], int_t *perm, float u[], float v[]) +{ + int_t i, liw, ldw, num; + int_t *iw, icntl[10], info[10]; + double *dw; + double *nzval_d = (double *) SUPERLU_MALLOC(nnz * sizeof(double)); + +#if ( DEBUGlevel>=1 ) + CHECK_MALLOC(0, "Enter sldperm()"); +#endif + liw = 5*n; + if ( job == 3 ) liw = 10*n + nnz; + if ( !(iw = intMalloc(liw)) ) ABORT("Malloc fails for iw[]"); + ldw = 3*n + nnz; + if ( !(dw = (double*) SUPERLU_MALLOC(ldw * sizeof(double))) ) + ABORT("Malloc fails for dw[]"); + + /* Increment one to get 1-based indexing. */ + for (i = 0; i <= n; ++i) ++colptr[i]; + for (i = 0; i < nnz; ++i) ++adjncy[i]; +#if ( DEBUGlevel>=2 ) + printf("LDPERM(): n %d, nnz %d\n", n, nnz); + slu_PrintInt10("colptr", n+1, colptr); + slu_PrintInt10("adjncy", nnz, adjncy); +#endif + + /* + * NOTE: + * ===== + * + * MC64AD assumes that column permutation vector is defined as: + * perm(i) = j means column i of permuted A is in column j of original A. + * + * Since a symmetric permutation preserves the diagonal entries. Then + * by the following relation: + * P'(A*P')P = P'A + * we can apply inverse(perm) to rows of A to get large diagonal entries. + * But, since 'perm' defined in MC64AD happens to be the reverse of + * SuperLU's definition of permutation vector, therefore, it is already + * an inverse for our purpose. We will thus use it directly. + * + */ + mc64id_(icntl); +#if 0 + /* Suppress error and warning messages. */ + icntl[0] = -1; + icntl[1] = -1; +#endif + + for (i = 0; i < nnz; ++i) nzval_d[i] = nzval[i]; + mc64ad_(&job, &n, &nnz, colptr, adjncy, nzval_d, &num, perm, + &liw, iw, &ldw, dw, icntl, info); + +#if ( DEBUGlevel>=2 ) + slu_PrintInt10("perm", n, perm); + printf(".. After MC64AD info %d\tsize of matching %d\n", info[0], num); +#endif + if ( info[0] == 1 ) { /* Structurally singular */ + printf(".. The last %d permutations:\n", n-num); + slu_PrintInt10("perm", n-num, &perm[num]); + } + + /* Restore to 0-based indexing. */ + for (i = 0; i <= n; ++i) --colptr[i]; + for (i = 0; i < nnz; ++i) --adjncy[i]; + for (i = 0; i < n; ++i) --perm[i]; + + if ( job == 5 ) + for (i = 0; i < n; ++i) { + u[i] = dw[i]; + v[i] = dw[n+i]; + } + + SUPERLU_FREE(iw); + SUPERLU_FREE(dw); + SUPERLU_FREE(nzval_d); + +#if ( DEBUGlevel>=1 ) + CHECK_MALLOC(0, "Exit sldperm()"); +#endif + + return info[0]; +} Copied: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_Cnames.h (from rev 6343, trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/Cnames.h) =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_Cnames.h (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_Cnames.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,361 @@ +/*! @file slu_Cnames.h + * \brief Macros defining how C routines will be called + * + *
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 1, 1997
+ *
+ * These macros define how C routines will be called.  ADD_ assumes that
+ * they will be called by fortran, which expects C routines to have an
+ * underscore postfixed to the name (Suns, and the Intel expect this).
+ * NOCHANGE indicates that fortran will be calling, and that it expects
+ * the name called by fortran to be identical to that compiled by the C
+ * (RS6K's do this).  UPCASE says it expects C routines called by fortran
+ * to be in all upcase (CRAY wants this). 
+ * 
+ */ +#ifndef __SUPERLU_CNAMES /* allow multiple inclusions */ +#define __SUPERLU_CNAMES + + +#define ADD_ 0 +#define ADD__ 1 +#define NOCHANGE 2 +#define UPCASE 3 +#define C_CALL 4 + +#ifdef UpCase +#define F77_CALL_C UPCASE +#endif + +#ifdef NoChange +#define F77_CALL_C NOCHANGE +#endif + +#ifdef Add_ +#define F77_CALL_C ADD_ +#endif + +#ifdef Add__ +#define F77_CALL_C ADD__ +#endif + +/* Default */ +#ifndef F77_CALL_C +#define F77_CALL_C ADD_ +#endif + + +#if (F77_CALL_C == ADD_) +/* + * These defines set up the naming scheme required to have a fortran 77 + * routine call a C routine + * No redefinition necessary to have following Fortran to C interface: + * FORTRAN CALL C DECLARATION + * call dgemm(...) void dgemm_(...) + * + * This is the default. + */ + +#endif + +#if (F77_CALL_C == ADD__) +/* + * These defines set up the naming scheme required to have a fortran 77 + * routine call a C routine + * for following Fortran to C interface: + * FORTRAN CALL C DECLARATION + * call dgemm(...) void dgemm__(...) + */ +/* BLAS */ +#define sswap_ sswap__ +#define saxpy_ saxpy__ +#define sasum_ sasum__ +#define isamax_ isamax__ +#define scopy_ scopy__ +#define sscal_ sscal__ +#define sger_ sger__ +#define snrm2_ snrm2__ +#define ssymv_ ssymv__ +#define sdot_ sdot__ +#define saxpy_ saxpy__ +#define ssyr2_ ssyr2__ +#define srot_ srot__ +#define sgemv_ sgemv__ +#define strsv_ strsv__ +#define sgemm_ sgemm__ +#define strsm_ strsm__ + +#define dswap_ dswap__ +#define daxpy_ daxpy__ +#define dasum_ dasum__ +#define idamax_ idamax__ +#define dcopy_ dcopy__ +#define dscal_ dscal__ +#define dger_ dger__ +#define dnrm2_ dnrm2__ +#define dsymv_ dsymv__ +#define ddot_ ddot__ +#define daxpy_ daxpy__ +#define dsyr2_ dsyr2__ +#define drot_ drot__ +#define dgemv_ dgemv__ +#define dtrsv_ dtrsv__ +#define dgemm_ dgemm__ +#define dtrsm_ dtrsm__ + +#define cswap_ cswap__ +#define caxpy_ caxpy__ +#define scasum_ scasum__ +#define icamax_ icamax__ +#define ccopy_ ccopy__ +#define cscal_ cscal__ +#define scnrm2_ scnrm2__ +#define caxpy_ caxpy__ +#define cgemv_ cgemv__ +#define ctrsv_ ctrsv__ +#define cgemm_ cgemm__ +#define ctrsm_ ctrsm__ +#define cgerc_ cgerc__ +#define chemv_ chemv__ +#define cher2_ cher2__ + +#define zswap_ zswap__ +#define zaxpy_ zaxpy__ +#define dzasum_ dzasum__ +#define izamax_ izamax__ +#define zcopy_ zcopy__ +#define zscal_ zscal__ +#define dznrm2_ dznrm2__ +#define zaxpy_ zaxpy__ +#define zgemv_ zgemv__ +#define ztrsv_ ztrsv__ +#define zgemm_ zgemm__ +#define ztrsm_ ztrsm__ +#define zgerc_ zgerc__ +#define zhemv_ zhemv__ +#define zher2_ zher2__ + +/* LAPACK */ +#define dlamch_ dlamch__ +#define slamch_ slamch__ +#define xerbla_ xerbla__ +#define lsame_ lsame__ +#define dlacon_ dlacon__ +#define slacon_ slacon__ +#define icmax1_ icmax1__ +#define scsum1_ scsum1__ +#define clacon_ clacon__ +#define dzsum1_ dzsum1__ +#define izmax1_ izmax1__ +#define zlacon_ zlacon__ + +/* Fortran interface */ +#define c_bridge_dgssv_ c_bridge_dgssv__ +#define c_fortran_sgssv_ c_fortran_sgssv__ +#define c_fortran_dgssv_ c_fortran_dgssv__ +#define c_fortran_cgssv_ c_fortran_cgssv__ +#define c_fortran_zgssv_ c_fortran_zgssv__ +#endif + +#if (F77_CALL_C == UPCASE) +/* + * These defines set up the naming scheme required to have a fortran 77 + * routine call a C routine + * following Fortran to C interface: + * FORTRAN CALL C DECLARATION + * call dgemm(...) void DGEMM(...) + */ +/* BLAS */ +#define sswap_ SSWAP +#define saxpy_ SAXPY +#define sasum_ SASUM +#define isamax_ ISAMAX +#define scopy_ SCOPY +#define sscal_ SSCAL +#define sger_ SGER +#define snrm2_ SNRM2 +#define ssymv_ SSYMV +#define sdot_ SDOT +#define saxpy_ SAXPY +#define ssyr2_ SSYR2 +#define srot_ SROT +#define sgemv_ SGEMV +#define strsv_ STRSV +#define sgemm_ SGEMM +#define strsm_ STRSM + +#define dswap_ DSWAP +#define daxpy_ DAXPY +#define dasum_ SASUM +#define idamax_ ISAMAX +#define dcopy_ SCOPY +#define dscal_ SSCAL +#define dger_ SGER +#define dnrm2_ SNRM2 +#define dsymv_ SSYMV +#define ddot_ SDOT +#define daxpy_ SAXPY +#define dsyr2_ SSYR2 +#define drot_ SROT +#define dgemv_ SGEMV +#define dtrsv_ STRSV +#define dgemm_ SGEMM +#define dtrsm_ STRSM + +#define cswap_ CSWAP +#define caxpy_ CAXPY +#define scasum_ SCASUM +#define icamax_ ICAMAX +#define ccopy_ CCOPY +#define cscal_ CSCAL +#define scnrm2_ SCNRM2 +#define caxpy_ CAXPY +#define cgemv_ CGEMV +#define ctrsv_ CTRSV +#define cgemm_ CGEMM +#define ctrsm_ CTRSM +#define cgerc_ CGERC +#define chemv_ CHEMV +#define cher2_ CHER2 + +#define zswap_ ZSWAP +#define zaxpy_ ZAXPY +#define dzasum_ DZASUM +#define izamax_ IZAMAX +#define zcopy_ ZCOPY +#define zscal_ ZSCAL +#define dznrm2_ DZNRM2 +#define zaxpy_ ZAXPY +#define zgemv_ ZGEMV +#define ztrsv_ ZTRSV +#define zgemm_ ZGEMM +#define ztrsm_ ZTRSM +#define zgerc_ ZGERC +#define zhemv_ ZHEMV +#define zher2_ ZHER2 + +/* LAPACK */ +#define dlamch_ DLAMCH +#define slamch_ SLAMCH +#define xerbla_ XERBLA +#define lsame_ LSAME +#define dlacon_ DLACON +#define slacon_ SLACON +#define icmax1_ ICMAX1 +#define scsum1_ SCSUM1 +#define clacon_ CLACON +#define dzsum1_ DZSUM1 +#define izmax1_ IZMAX1 +#define zlacon_ ZLACON + +/* Fortran interface */ +#define c_bridge_dgssv_ C_BRIDGE_DGSSV +#define c_fortran_sgssv_ C_FORTRAN_SGSSV +#define c_fortran_dgssv_ C_FORTRAN_DGSSV +#define c_fortran_cgssv_ C_FORTRAN_CGSSV +#define c_fortran_zgssv_ C_FORTRAN_ZGSSV +#endif + +#if (F77_CALL_C == NOCHANGE) +/* + * These defines set up the naming scheme required to have a fortran 77 + * routine call a C routine + * for following Fortran to C interface: + * FORTRAN CALL C DECLARATION + * call dgemm(...) void dgemm(...) + */ +/* BLAS */ +#define sswap_ sswap +#define saxpy_ saxpy +#define sasum_ sasum +#define isamax_ isamax +#define scopy_ scopy +#define sscal_ sscal +#define sger_ sger +#define snrm2_ snrm2 +#define ssymv_ ssymv +#define sdot_ sdot +#define saxpy_ saxpy +#define ssyr2_ ssyr2 +#define srot_ srot +#define sgemv_ sgemv +#define strsv_ strsv +#define sgemm_ sgemm +#define strsm_ strsm + +#define dswap_ dswap +#define daxpy_ daxpy +#define dasum_ dasum +#define idamax_ idamax +#define dcopy_ dcopy +#define dscal_ dscal +#define dger_ dger +#define dnrm2_ dnrm2 +#define dsymv_ dsymv +#define ddot_ ddot +#define daxpy_ daxpy +#define dsyr2_ dsyr2 +#define drot_ drot +#define dgemv_ dgemv +#define dtrsv_ dtrsv +#define dgemm_ dgemm +#define dtrsm_ dtrsm + +#define cswap_ cswap +#define caxpy_ caxpy +#define scasum_ scasum +#define icamax_ icamax +#define ccopy_ ccopy +#define cscal_ cscal +#define scnrm2_ scnrm2 +#define caxpy_ caxpy +#define cgemv_ cgemv +#define ctrsv_ ctrsv +#define cgemm_ cgemm +#define ctrsm_ ctrsm +#define cgerc_ cgerc +#define chemv_ chemv +#define cher2_ cher2 + +#define zswap_ zswap +#define zaxpy_ zaxpy +#define dzasum_ dzasum +#define izamax_ izamax +#define zcopy_ zcopy +#define zscal_ zscal +#define dznrm2_ dznrm2 +#define zaxpy_ zaxpy +#define zgemv_ zgemv +#define ztrsv_ ztrsv +#define zgemm_ zgemm +#define ztrsm_ ztrsm +#define zgerc_ zgerc +#define zhemv_ zhemv +#define zher2_ zher2 + +/* LAPACK */ +#define dlamch_ dlamch +#define slamch_ slamch +#define xerbla_ xerbla +#define lsame_ lsame +#define dlacon_ dlacon +#define slacon_ slacon +#define icmax1_ icmax1 +#define scsum1_ scsum1 +#define clacon_ clacon +#define dzsum1_ dzsum1 +#define izmax1_ izmax1 +#define zlacon_ zlacon + +/* Fortran interface */ +#define c_bridge_dgssv_ c_bridge_dgssv +#define c_fortran_sgssv_ c_fortran_sgssv +#define c_fortran_dgssv_ c_fortran_dgssv +#define c_fortran_cgssv_ c_fortran_cgssv +#define c_fortran_zgssv_ c_fortran_zgssv +#endif + +#endif /* __SUPERLU_CNAMES */ Copied: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_cdefs.h (from rev 6343, trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/csp_defs.h) =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_cdefs.h (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_cdefs.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,282 @@ + +/*! @file slu_cdefs.h + * \brief Header file for real operations + * + *
 
+ * -- SuperLU routine (version 4.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * June 30, 2009
+ * 
+ * Global data structures used in LU factorization -
+ * 
+ *   nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
+ *   (xsup,supno): supno[i] is the supernode no to which i belongs;
+ *	xsup(s) points to the beginning of the s-th supernode.
+ *	e.g.   supno 0 1 2 2 3 3 3 4 4 4 4 4   (n=12)
+ *	        xsup 0 1 2 4 7 12
+ *	Note: dfs will be performed on supernode rep. relative to the new 
+ *	      row pivoting ordering
+ *
+ *   (xlsub,lsub): lsub[*] contains the compressed subscript of
+ *	rectangular supernodes; xlsub[j] points to the starting
+ *	location of the j-th column in lsub[*]. Note that xlsub 
+ *	is indexed by column.
+ *	Storage: original row subscripts
+ *
+ *      During the course of sparse LU factorization, we also use
+ *	(xlsub,lsub) for the purpose of symmetric pruning. For each
+ *	supernode {s,s+1,...,t=s+r} with first column s and last
+ *	column t, the subscript set
+ *		lsub[j], j=xlsub[s], .., xlsub[s+1]-1
+ *	is the structure of column s (i.e. structure of this supernode).
+ *	It is used for the storage of numerical values.
+ *	Furthermore,
+ *		lsub[j], j=xlsub[t], .., xlsub[t+1]-1
+ *	is the structure of the last column t of this supernode.
+ *	It is for the purpose of symmetric pruning. Therefore, the
+ *	structural subscripts can be rearranged without making physical
+ *	interchanges among the numerical values.
+ *
+ *	However, if the supernode has only one column, then we
+ *	only keep one set of subscripts. For any subscript interchange
+ *	performed, similar interchange must be done on the numerical
+ *	values.
+ *
+ *	The last column structures (for pruning) will be removed
+ *	after the numercial LU factorization phase.
+ *
+ *   (xlusup,lusup): lusup[*] contains the numerical values of the
+ *	rectangular supernodes; xlusup[j] points to the starting
+ *	location of the j-th column in storage vector lusup[*]
+ *	Note: xlusup is indexed by column.
+ *	Each rectangular supernode is stored by column-major
+ *	scheme, consistent with Fortran 2-dim array storage.
+ *
+ *   (xusub,ucol,usub): ucol[*] stores the numerical values of
+ *	U-columns outside the rectangular supernodes. The row
+ *	subscript of nonzero ucol[k] is stored in usub[k].
+ *	xusub[i] points to the starting location of column i in ucol.
+ *	Storage: new row subscripts; that is subscripts of PA.
+ * 
+ */ +#ifndef __SUPERLU_cSP_DEFS /* allow multiple inclusions */ +#define __SUPERLU_cSP_DEFS + +/* + * File name: csp_defs.h + * Purpose: Sparse matrix types and function prototypes + * History: + */ + +#ifdef _CRAY +#include +#include +#endif + +/* Define my integer type int_t */ +typedef int int_t; /* default */ + +#include +#include +#include "slu_Cnames.h" +#include "supermatrix.h" +#include "slu_util.h" +#include "slu_scomplex.h" + + + +typedef struct { + int *xsup; /* supernode and column mapping */ + int *supno; + int *lsub; /* compressed L subscripts */ + int *xlsub; + complex *lusup; /* L supernodes */ + int *xlusup; + complex *ucol; /* U columns */ + int *usub; + int *xusub; + int nzlmax; /* current max size of lsub */ + int nzumax; /* " " " ucol */ + int nzlumax; /* " " " lusup */ + int n; /* number of columns in the matrix */ + LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */ + int num_expansions; + ExpHeader *expanders; /* Array of pointers to 4 types of memory */ + LU_stack_t stack; /* use user supplied memory */ +} GlobalLU_t; + + +/* -------- Prototypes -------- */ + +#ifdef __cplusplus +extern "C" { +#endif + +/*! \brief Driver routines */ +extern void +cgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *, + SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *); +extern void +cgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *, + char *, float *, float *, SuperMatrix *, SuperMatrix *, + void *, int, SuperMatrix *, SuperMatrix *, + float *, float *, float *, float *, + mem_usage_t *, SuperLUStat_t *, int *); + /* ILU */ +extern void +cgsisv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *, + SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *); +extern void +cgsisx(superlu_options_t *, SuperMatrix *, int *, int *, int *, + char *, float *, float *, SuperMatrix *, SuperMatrix *, + void *, int, SuperMatrix *, SuperMatrix *, float *, float *, + mem_usage_t *, SuperLUStat_t *, int *); + + +/*! \brief Supernodal LU factor related */ +extern void +cCreate_CompCol_Matrix(SuperMatrix *, int, int, int, complex *, + int *, int *, Stype_t, Dtype_t, Mtype_t); +extern void +cCreate_CompRow_Matrix(SuperMatrix *, int, int, int, complex *, + int *, int *, Stype_t, Dtype_t, Mtype_t); +extern void +cCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *); +extern void +cCreate_Dense_Matrix(SuperMatrix *, int, int, complex *, int, + Stype_t, Dtype_t, Mtype_t); +extern void +cCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, complex *, + int *, int *, int *, int *, int *, + Stype_t, Dtype_t, Mtype_t); +extern void +cCopy_Dense_Matrix(int, int, complex *, int, complex *, int); + +extern void countnz (const int, int *, int *, int *, GlobalLU_t *); +extern void ilu_countnz (const int, int *, int *, GlobalLU_t *); +extern void fixupL (const int, const int *, GlobalLU_t *); + +extern void callocateA (int, int, complex **, int **, int **); +extern void cgstrf (superlu_options_t*, SuperMatrix*, + int, int, int*, void *, int, int *, int *, + SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *); +extern int csnode_dfs (const int, const int, const int *, const int *, + const int *, int *, int *, GlobalLU_t *); +extern int csnode_bmod (const int, const int, const int, complex *, + complex *, GlobalLU_t *, SuperLUStat_t*); +extern void cpanel_dfs (const int, const int, const int, SuperMatrix *, + int *, int *, complex *, int *, int *, int *, + int *, int *, int *, int *, GlobalLU_t *); +extern void cpanel_bmod (const int, const int, const int, const int, + complex *, complex *, int *, int *, + GlobalLU_t *, SuperLUStat_t*); +extern int ccolumn_dfs (const int, const int, int *, int *, int *, int *, + int *, int *, int *, int *, int *, GlobalLU_t *); +extern int ccolumn_bmod (const int, const int, complex *, + complex *, int *, int *, int, + GlobalLU_t *, SuperLUStat_t*); +extern int ccopy_to_ucol (int, int, int *, int *, int *, + complex *, GlobalLU_t *); +extern int cpivotL (const int, const double, int *, int *, + int *, int *, int *, GlobalLU_t *, SuperLUStat_t*); +extern void cpruneL (const int, const int *, const int, const int, + const int *, const int *, int *, GlobalLU_t *); +extern void creadmt (int *, int *, int *, complex **, int **, int **); +extern void cGenXtrue (int, int, complex *, int); +extern void cFillRHS (trans_t, int, complex *, int, SuperMatrix *, + SuperMatrix *); +extern void cgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *, + SuperMatrix *, SuperLUStat_t*, int *); +/* ILU */ +extern void cgsitrf (superlu_options_t*, SuperMatrix*, int, int, int*, + void *, int, int *, int *, SuperMatrix *, SuperMatrix *, + SuperLUStat_t*, int *); +extern int cldperm(int, int, int, int [], int [], complex [], + int [], float [], float []); +extern int ilu_csnode_dfs (const int, const int, const int *, const int *, + const int *, int *, GlobalLU_t *); +extern void ilu_cpanel_dfs (const int, const int, const int, SuperMatrix *, + int *, int *, complex *, float *, int *, int *, + int *, int *, int *, int *, GlobalLU_t *); +extern int ilu_ccolumn_dfs (const int, const int, int *, int *, int *, + int *, int *, int *, int *, int *, + GlobalLU_t *); +extern int ilu_ccopy_to_ucol (int, int, int *, int *, int *, + complex *, int, milu_t, double, int, + complex *, int *, GlobalLU_t *, int *); +extern int ilu_cpivotL (const int, const double, int *, int *, int, int *, + int *, int *, int *, double, milu_t, + complex, GlobalLU_t *, SuperLUStat_t*); +extern int ilu_cdrop_row (superlu_options_t *, int, int, double, + int, int *, double *, GlobalLU_t *, + float *, int *, int); + + +/*! \brief Driver related */ + +extern void cgsequ (SuperMatrix *, float *, float *, float *, + float *, float *, int *); +extern void claqgs (SuperMatrix *, float *, float *, float, + float, float, char *); +extern void cgscon (char *, SuperMatrix *, SuperMatrix *, + float, float *, SuperLUStat_t*, int *); +extern float cPivotGrowth(int, SuperMatrix *, int *, + SuperMatrix *, SuperMatrix *); +extern void cgsrfs (trans_t, SuperMatrix *, SuperMatrix *, + SuperMatrix *, int *, int *, char *, float *, + float *, SuperMatrix *, SuperMatrix *, + float *, float *, SuperLUStat_t*, int *); + +extern int sp_ctrsv (char *, char *, char *, SuperMatrix *, + SuperMatrix *, complex *, SuperLUStat_t*, int *); +extern int sp_cgemv (char *, complex, SuperMatrix *, complex *, + int, complex, complex *, int); + +extern int sp_cgemm (char *, char *, int, int, int, complex, + SuperMatrix *, complex *, int, complex, + complex *, int); +extern double slamch_(char *); + + +/*! \brief Memory-related */ +extern int cLUMemInit (fact_t, void *, int, int, int, int, int, + float, SuperMatrix *, SuperMatrix *, + GlobalLU_t *, int **, complex **); +extern void cSetRWork (int, int, complex *, complex **, complex **); +extern void cLUWorkFree (int *, complex *, GlobalLU_t *); +extern int cLUMemXpand (int, int, MemType, int *, GlobalLU_t *); + +extern complex *complexMalloc(int); +extern complex *complexCalloc(int); +extern float *floatMalloc(int); +extern float *floatCalloc(int); +extern int cmemory_usage(const int, const int, const int, const int); +extern int cQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *); +extern int ilu_cQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *); + +/*! \brief Auxiliary routines */ +extern void creadhb(int *, int *, int *, complex **, int **, int **); +extern void creadrb(int *, int *, int *, complex **, int **, int **); +extern void creadtriple(int *, int *, int *, complex **, int **, int **); +extern void cCompRow_to_CompCol(int, int, int, complex*, int*, int*, + complex **, int **, int **); +extern void cfill (complex *, int, complex); +extern void cinf_norm_error (int, SuperMatrix *, complex *); +extern void PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *, + complex, complex, complex *, complex *, char *); + +/*! \brief Routines for debugging */ +extern void cPrint_CompCol_Matrix(char *, SuperMatrix *); +extern void cPrint_SuperNode_Matrix(char *, SuperMatrix *); +extern void cPrint_Dense_Matrix(char *, SuperMatrix *); +extern void cprint_lu_col(char *, int, int, int *, GlobalLU_t *); +extern int print_double_vec(char *, int, double *); +extern void check_tempv(int, complex *); + +#ifdef __cplusplus + } +#endif + +#endif /* __SUPERLU_cSP_DEFS */ + Copied: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_dcomplex.h (from rev 6343, trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dcomplex.h) =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_dcomplex.h (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_dcomplex.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,78 @@ + +/*! @file slu_dcomplex.h + * \brief Header file for complex operations + *
 
+ *  -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ * Contains definitions for various complex operations.
+ * This header file is to be included in source files z*.c
+ * 
+ */ +#ifndef __SUPERLU_DCOMPLEX /* allow multiple inclusions */ +#define __SUPERLU_DCOMPLEX + + +#ifndef DCOMPLEX_INCLUDE +#define DCOMPLEX_INCLUDE + +typedef struct { double r, i; } doublecomplex; + + +/* Macro definitions */ + +/*! \brief Complex Addition c = a + b */ +#define z_add(c, a, b) { (c)->r = (a)->r + (b)->r; \ + (c)->i = (a)->i + (b)->i; } + +/*! \brief Complex Subtraction c = a - b */ +#define z_sub(c, a, b) { (c)->r = (a)->r - (b)->r; \ + (c)->i = (a)->i - (b)->i; } + +/*! \brief Complex-Double Multiplication */ +#define zd_mult(c, a, b) { (c)->r = (a)->r * (b); \ + (c)->i = (a)->i * (b); } + +/*! \brief Complex-Complex Multiplication */ +#define zz_mult(c, a, b) { \ + double cr, ci; \ + cr = (a)->r * (b)->r - (a)->i * (b)->i; \ + ci = (a)->i * (b)->r + (a)->r * (b)->i; \ + (c)->r = cr; \ + (c)->i = ci; \ + } + +#define zz_conj(a, b) { \ + (a)->r = (b)->r; \ + (a)->i = -((b)->i); \ + } + +/*! \brief Complex equality testing */ +#define z_eq(a, b) ( (a)->r == (b)->r && (a)->i == (b)->i ) + + +#ifdef __cplusplus +extern "C" { +#endif + +/* Prototypes for functions in dcomplex.c */ +void z_div(doublecomplex *, doublecomplex *, doublecomplex *); +double z_abs(doublecomplex *); /* exact */ +double z_abs1(doublecomplex *); /* approximate */ +void z_exp(doublecomplex *, doublecomplex *); +void d_cnjg(doublecomplex *r, doublecomplex *z); +double d_imag(doublecomplex *); +doublecomplex z_sgn(doublecomplex *); +doublecomplex z_sqrt(doublecomplex *); + + + +#ifdef __cplusplus + } +#endif + +#endif + +#endif /* __SUPERLU_DCOMPLEX */ Copied: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_ddefs.h (from rev 6343, trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/dsp_defs.h) =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_ddefs.h (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_ddefs.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,279 @@ + +/*! @file slu_ddefs.h + * \brief Header file for real operations + * + *
 
+ * -- SuperLU routine (version 4.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * June 30, 2009
+ * 
+ * Global data structures used in LU factorization -
+ * 
+ *   nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
+ *   (xsup,supno): supno[i] is the supernode no to which i belongs;
+ *	xsup(s) points to the beginning of the s-th supernode.
+ *	e.g.   supno 0 1 2 2 3 3 3 4 4 4 4 4   (n=12)
+ *	        xsup 0 1 2 4 7 12
+ *	Note: dfs will be performed on supernode rep. relative to the new 
+ *	      row pivoting ordering
+ *
+ *   (xlsub,lsub): lsub[*] contains the compressed subscript of
+ *	rectangular supernodes; xlsub[j] points to the starting
+ *	location of the j-th column in lsub[*]. Note that xlsub 
+ *	is indexed by column.
+ *	Storage: original row subscripts
+ *
+ *      During the course of sparse LU factorization, we also use
+ *	(xlsub,lsub) for the purpose of symmetric pruning. For each
+ *	supernode {s,s+1,...,t=s+r} with first column s and last
+ *	column t, the subscript set
+ *		lsub[j], j=xlsub[s], .., xlsub[s+1]-1
+ *	is the structure of column s (i.e. structure of this supernode).
+ *	It is used for the storage of numerical values.
+ *	Furthermore,
+ *		lsub[j], j=xlsub[t], .., xlsub[t+1]-1
+ *	is the structure of the last column t of this supernode.
+ *	It is for the purpose of symmetric pruning. Therefore, the
+ *	structural subscripts can be rearranged without making physical
+ *	interchanges among the numerical values.
+ *
+ *	However, if the supernode has only one column, then we
+ *	only keep one set of subscripts. For any subscript interchange
+ *	performed, similar interchange must be done on the numerical
+ *	values.
+ *
+ *	The last column structures (for pruning) will be removed
+ *	after the numercial LU factorization phase.
+ *
+ *   (xlusup,lusup): lusup[*] contains the numerical values of the
+ *	rectangular supernodes; xlusup[j] points to the starting
+ *	location of the j-th column in storage vector lusup[*]
+ *	Note: xlusup is indexed by column.
+ *	Each rectangular supernode is stored by column-major
+ *	scheme, consistent with Fortran 2-dim array storage.
+ *
+ *   (xusub,ucol,usub): ucol[*] stores the numerical values of
+ *	U-columns outside the rectangular supernodes. The row
+ *	subscript of nonzero ucol[k] is stored in usub[k].
+ *	xusub[i] points to the starting location of column i in ucol.
+ *	Storage: new row subscripts; that is subscripts of PA.
+ * 
+ */ +#ifndef __SUPERLU_dSP_DEFS /* allow multiple inclusions */ +#define __SUPERLU_dSP_DEFS + +/* + * File name: dsp_defs.h + * Purpose: Sparse matrix types and function prototypes + * History: + */ + +#ifdef _CRAY +#include +#include +#endif + +/* Define my integer type int_t */ +typedef int int_t; /* default */ + +#include +#include +#include "slu_Cnames.h" +#include "supermatrix.h" +#include "slu_util.h" + + + +typedef struct { + int *xsup; /* supernode and column mapping */ + int *supno; + int *lsub; /* compressed L subscripts */ + int *xlsub; + double *lusup; /* L supernodes */ + int *xlusup; + double *ucol; /* U columns */ + int *usub; + int *xusub; + int nzlmax; /* current max size of lsub */ + int nzumax; /* " " " ucol */ + int nzlumax; /* " " " lusup */ + int n; /* number of columns in the matrix */ + LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */ + int num_expansions; + ExpHeader *expanders; /* Array of pointers to 4 types of memory */ + LU_stack_t stack; /* use user supplied memory */ +} GlobalLU_t; + + +/* -------- Prototypes -------- */ + +#ifdef __cplusplus +extern "C" { +#endif + +/*! \brief Driver routines */ +extern void +dgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *, + SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *); +extern void +dgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *, + char *, double *, double *, SuperMatrix *, SuperMatrix *, + void *, int, SuperMatrix *, SuperMatrix *, + double *, double *, double *, double *, + mem_usage_t *, SuperLUStat_t *, int *); + /* ILU */ +extern void +dgsisv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *, + SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *); +extern void +dgsisx(superlu_options_t *, SuperMatrix *, int *, int *, int *, + char *, double *, double *, SuperMatrix *, SuperMatrix *, + void *, int, SuperMatrix *, SuperMatrix *, double *, double *, + mem_usage_t *, SuperLUStat_t *, int *); + + +/*! \brief Supernodal LU factor related */ +extern void +dCreate_CompCol_Matrix(SuperMatrix *, int, int, int, double *, + int *, int *, Stype_t, Dtype_t, Mtype_t); +extern void +dCreate_CompRow_Matrix(SuperMatrix *, int, int, int, double *, + int *, int *, Stype_t, Dtype_t, Mtype_t); +extern void +dCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *); +extern void +dCreate_Dense_Matrix(SuperMatrix *, int, int, double *, int, + Stype_t, Dtype_t, Mtype_t); +extern void +dCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, double *, + int *, int *, int *, int *, int *, + Stype_t, Dtype_t, Mtype_t); +extern void +dCopy_Dense_Matrix(int, int, double *, int, double *, int); + +extern void countnz (const int, int *, int *, int *, GlobalLU_t *); +extern void ilu_countnz (const int, int *, int *, GlobalLU_t *); +extern void fixupL (const int, const int *, GlobalLU_t *); + +extern void dallocateA (int, int, double **, int **, int **); +extern void dgstrf (superlu_options_t*, SuperMatrix*, + int, int, int*, void *, int, int *, int *, + SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *); +extern int dsnode_dfs (const int, const int, const int *, const int *, + const int *, int *, int *, GlobalLU_t *); +extern int dsnode_bmod (const int, const int, const int, double *, + double *, GlobalLU_t *, SuperLUStat_t*); +extern void dpanel_dfs (const int, const int, const int, SuperMatrix *, + int *, int *, double *, int *, int *, int *, + int *, int *, int *, int *, GlobalLU_t *); +extern void dpanel_bmod (const int, const int, const int, const int, + double *, double *, int *, int *, + GlobalLU_t *, SuperLUStat_t*); +extern int dcolumn_dfs (const int, const int, int *, int *, int *, int *, + int *, int *, int *, int *, int *, GlobalLU_t *); +extern int dcolumn_bmod (const int, const int, double *, + double *, int *, int *, int, + GlobalLU_t *, SuperLUStat_t*); +extern int dcopy_to_ucol (int, int, int *, int *, int *, + double *, GlobalLU_t *); +extern int dpivotL (const int, const double, int *, int *, + int *, int *, int *, GlobalLU_t *, SuperLUStat_t*); +extern void dpruneL (const int, const int *, const int, const int, + const int *, const int *, int *, GlobalLU_t *); +extern void dreadmt (int *, int *, int *, double **, int **, int **); +extern void dGenXtrue (int, int, double *, int); +extern void dFillRHS (trans_t, int, double *, int, SuperMatrix *, + SuperMatrix *); +extern void dgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *, + SuperMatrix *, SuperLUStat_t*, int *); +/* ILU */ +extern void dgsitrf (superlu_options_t*, SuperMatrix*, int, int, int*, + void *, int, int *, int *, SuperMatrix *, SuperMatrix *, + SuperLUStat_t*, int *); +extern int dldperm(int, int, int, int [], int [], double [], + int [], double [], double []); +extern int ilu_dsnode_dfs (const int, const int, const int *, const int *, + const int *, int *, GlobalLU_t *); +extern void ilu_dpanel_dfs (const int, const int, const int, SuperMatrix *, + int *, int *, double *, double *, int *, int *, + int *, int *, int *, int *, GlobalLU_t *); +extern int ilu_dcolumn_dfs (const int, const int, int *, int *, int *, + int *, int *, int *, int *, int *, + GlobalLU_t *); +extern int ilu_dcopy_to_ucol (int, int, int *, int *, int *, + double *, int, milu_t, double, int, + double *, int *, GlobalLU_t *, int *); +extern int ilu_dpivotL (const int, const double, int *, int *, int, int *, + int *, int *, int *, double, milu_t, + double, GlobalLU_t *, SuperLUStat_t*); +extern int ilu_ddrop_row (superlu_options_t *, int, int, double, + int, int *, double *, GlobalLU_t *, + double *, int *, int); + + +/*! \brief Driver related */ + +extern void dgsequ (SuperMatrix *, double *, double *, double *, + double *, double *, int *); +extern void dlaqgs (SuperMatrix *, double *, double *, double, + double, double, char *); +extern void dgscon (char *, SuperMatrix *, SuperMatrix *, + double, double *, SuperLUStat_t*, int *); +extern double dPivotGrowth(int, SuperMatrix *, int *, + SuperMatrix *, SuperMatrix *); +extern void dgsrfs (trans_t, SuperMatrix *, SuperMatrix *, + SuperMatrix *, int *, int *, char *, double *, + double *, SuperMatrix *, SuperMatrix *, + double *, double *, SuperLUStat_t*, int *); + +extern int sp_dtrsv (char *, char *, char *, SuperMatrix *, + SuperMatrix *, double *, SuperLUStat_t*, int *); +extern int sp_dgemv (char *, double, SuperMatrix *, double *, + int, double, double *, int); + +extern int sp_dgemm (char *, char *, int, int, int, double, + SuperMatrix *, double *, int, double, + double *, int); +extern double dlamch_(char *); + + +/*! \brief Memory-related */ +extern int dLUMemInit (fact_t, void *, int, int, int, int, int, + double, SuperMatrix *, SuperMatrix *, + GlobalLU_t *, int **, double **); +extern void dSetRWork (int, int, double *, double **, double **); +extern void dLUWorkFree (int *, double *, GlobalLU_t *); +extern int dLUMemXpand (int, int, MemType, int *, GlobalLU_t *); + +extern double *doubleMalloc(int); +extern double *doubleCalloc(int); +extern int dmemory_usage(const int, const int, const int, const int); +extern int dQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *); +extern int ilu_dQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *); + +/*! \brief Auxiliary routines */ +extern void dreadhb(int *, int *, int *, double **, int **, int **); +extern void dreadrb(int *, int *, int *, double **, int **, int **); +extern void dreadtriple(int *, int *, int *, double **, int **, int **); +extern void dCompRow_to_CompCol(int, int, int, double*, int*, int*, + double **, int **, int **); +extern void dfill (double *, int, double); +extern void dinf_norm_error (int, SuperMatrix *, double *); +extern void PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *, + double, double, double *, double *, char *); + +/*! \brief Routines for debugging */ +extern void dPrint_CompCol_Matrix(char *, SuperMatrix *); +extern void dPrint_SuperNode_Matrix(char *, SuperMatrix *); +extern void dPrint_Dense_Matrix(char *, SuperMatrix *); +extern void dprint_lu_col(char *, int, int, int *, GlobalLU_t *); +extern int print_double_vec(char *, int, double *); +extern void check_tempv(int, double *); + +#ifdef __cplusplus + } +#endif + +#endif /* __SUPERLU_dSP_DEFS */ + Copied: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_scomplex.h (from rev 6343, trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/scomplex.h) =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_scomplex.h (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_scomplex.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,78 @@ + +/*! @file slu_scomplex.h + * \brief Header file for complex operations + *
 
+ *  -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ * Contains definitions for various complex operations.
+ * This header file is to be included in source files c*.c
+ * 
+ */ +#ifndef __SUPERLU_SCOMPLEX /* allow multiple inclusions */ +#define __SUPERLU_SCOMPLEX + + +#ifndef SCOMPLEX_INCLUDE +#define SCOMPLEX_INCLUDE + +typedef struct { float r, i; } complex; + + +/* Macro definitions */ + +/*! \brief Complex Addition c = a + b */ +#define c_add(c, a, b) { (c)->r = (a)->r + (b)->r; \ + (c)->i = (a)->i + (b)->i; } + +/*! \brief Complex Subtraction c = a - b */ +#define c_sub(c, a, b) { (c)->r = (a)->r - (b)->r; \ + (c)->i = (a)->i - (b)->i; } + +/*! \brief Complex-Double Multiplication */ +#define cs_mult(c, a, b) { (c)->r = (a)->r * (b); \ + (c)->i = (a)->i * (b); } + +/*! \brief Complex-Complex Multiplication */ +#define cc_mult(c, a, b) { \ + float cr, ci; \ + cr = (a)->r * (b)->r - (a)->i * (b)->i; \ + ci = (a)->i * (b)->r + (a)->r * (b)->i; \ + (c)->r = cr; \ + (c)->i = ci; \ + } + +#define cc_conj(a, b) { \ + (a)->r = (b)->r; \ + (a)->i = -((b)->i); \ + } + +/*! \brief Complex equality testing */ +#define c_eq(a, b) ( (a)->r == (b)->r && (a)->i == (b)->i ) + + +#ifdef __cplusplus +extern "C" { +#endif + +/* Prototypes for functions in scomplex.c */ +void c_div(complex *, complex *, complex *); +double c_abs(complex *); /* exact */ +double c_abs1(complex *); /* approximate */ +void c_exp(complex *, complex *); +void r_cnjg(complex *, complex *); +double r_imag(complex *); +complex c_sgn(complex *); +complex c_sqrt(complex *); + + + +#ifdef __cplusplus + } +#endif + +#endif + +#endif /* __SUPERLU_SCOMPLEX */ Copied: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_sdefs.h (from rev 6343, trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssp_defs.h) =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_sdefs.h (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_sdefs.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,279 @@ + +/*! @file slu_sdefs.h + * \brief Header file for real operations + * + *
 
+ * -- SuperLU routine (version 4.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * June 30, 2009
+ * 
+ * Global data structures used in LU factorization -
+ * 
+ *   nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
+ *   (xsup,supno): supno[i] is the supernode no to which i belongs;
+ *	xsup(s) points to the beginning of the s-th supernode.
+ *	e.g.   supno 0 1 2 2 3 3 3 4 4 4 4 4   (n=12)
+ *	        xsup 0 1 2 4 7 12
+ *	Note: dfs will be performed on supernode rep. relative to the new 
+ *	      row pivoting ordering
+ *
+ *   (xlsub,lsub): lsub[*] contains the compressed subscript of
+ *	rectangular supernodes; xlsub[j] points to the starting
+ *	location of the j-th column in lsub[*]. Note that xlsub 
+ *	is indexed by column.
+ *	Storage: original row subscripts
+ *
+ *      During the course of sparse LU factorization, we also use
+ *	(xlsub,lsub) for the purpose of symmetric pruning. For each
+ *	supernode {s,s+1,...,t=s+r} with first column s and last
+ *	column t, the subscript set
+ *		lsub[j], j=xlsub[s], .., xlsub[s+1]-1
+ *	is the structure of column s (i.e. structure of this supernode).
+ *	It is used for the storage of numerical values.
+ *	Furthermore,
+ *		lsub[j], j=xlsub[t], .., xlsub[t+1]-1
+ *	is the structure of the last column t of this supernode.
+ *	It is for the purpose of symmetric pruning. Therefore, the
+ *	structural subscripts can be rearranged without making physical
+ *	interchanges among the numerical values.
+ *
+ *	However, if the supernode has only one column, then we
+ *	only keep one set of subscripts. For any subscript interchange
+ *	performed, similar interchange must be done on the numerical
+ *	values.
+ *
+ *	The last column structures (for pruning) will be removed
+ *	after the numercial LU factorization phase.
+ *
+ *   (xlusup,lusup): lusup[*] contains the numerical values of the
+ *	rectangular supernodes; xlusup[j] points to the starting
+ *	location of the j-th column in storage vector lusup[*]
+ *	Note: xlusup is indexed by column.
+ *	Each rectangular supernode is stored by column-major
+ *	scheme, consistent with Fortran 2-dim array storage.
+ *
+ *   (xusub,ucol,usub): ucol[*] stores the numerical values of
+ *	U-columns outside the rectangular supernodes. The row
+ *	subscript of nonzero ucol[k] is stored in usub[k].
+ *	xusub[i] points to the starting location of column i in ucol.
+ *	Storage: new row subscripts; that is subscripts of PA.
+ * 
+ */ +#ifndef __SUPERLU_sSP_DEFS /* allow multiple inclusions */ +#define __SUPERLU_sSP_DEFS + +/* + * File name: ssp_defs.h + * Purpose: Sparse matrix types and function prototypes + * History: + */ + +#ifdef _CRAY +#include +#include +#endif + +/* Define my integer type int_t */ +typedef int int_t; /* default */ + +#include +#include +#include "slu_Cnames.h" +#include "supermatrix.h" +#include "slu_util.h" + + + +typedef struct { + int *xsup; /* supernode and column mapping */ + int *supno; + int *lsub; /* compressed L subscripts */ + int *xlsub; + float *lusup; /* L supernodes */ + int *xlusup; + float *ucol; /* U columns */ + int *usub; + int *xusub; + int nzlmax; /* current max size of lsub */ + int nzumax; /* " " " ucol */ + int nzlumax; /* " " " lusup */ + int n; /* number of columns in the matrix */ + LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */ + int num_expansions; + ExpHeader *expanders; /* Array of pointers to 4 types of memory */ + LU_stack_t stack; /* use user supplied memory */ +} GlobalLU_t; + + +/* -------- Prototypes -------- */ + +#ifdef __cplusplus +extern "C" { +#endif + +/*! \brief Driver routines */ +extern void +sgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *, + SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *); +extern void +sgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *, + char *, float *, float *, SuperMatrix *, SuperMatrix *, + void *, int, SuperMatrix *, SuperMatrix *, + float *, float *, float *, float *, + mem_usage_t *, SuperLUStat_t *, int *); + /* ILU */ +extern void +sgsisv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *, + SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *); +extern void +sgsisx(superlu_options_t *, SuperMatrix *, int *, int *, int *, + char *, float *, float *, SuperMatrix *, SuperMatrix *, + void *, int, SuperMatrix *, SuperMatrix *, float *, float *, + mem_usage_t *, SuperLUStat_t *, int *); + + +/*! \brief Supernodal LU factor related */ +extern void +sCreate_CompCol_Matrix(SuperMatrix *, int, int, int, float *, + int *, int *, Stype_t, Dtype_t, Mtype_t); +extern void +sCreate_CompRow_Matrix(SuperMatrix *, int, int, int, float *, + int *, int *, Stype_t, Dtype_t, Mtype_t); +extern void +sCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *); +extern void +sCreate_Dense_Matrix(SuperMatrix *, int, int, float *, int, + Stype_t, Dtype_t, Mtype_t); +extern void +sCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, float *, + int *, int *, int *, int *, int *, + Stype_t, Dtype_t, Mtype_t); +extern void +sCopy_Dense_Matrix(int, int, float *, int, float *, int); + +extern void countnz (const int, int *, int *, int *, GlobalLU_t *); +extern void ilu_countnz (const int, int *, int *, GlobalLU_t *); +extern void fixupL (const int, const int *, GlobalLU_t *); + +extern void sallocateA (int, int, float **, int **, int **); +extern void sgstrf (superlu_options_t*, SuperMatrix*, + int, int, int*, void *, int, int *, int *, + SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *); +extern int ssnode_dfs (const int, const int, const int *, const int *, + const int *, int *, int *, GlobalLU_t *); +extern int ssnode_bmod (const int, const int, const int, float *, + float *, GlobalLU_t *, SuperLUStat_t*); +extern void spanel_dfs (const int, const int, const int, SuperMatrix *, + int *, int *, float *, int *, int *, int *, + int *, int *, int *, int *, GlobalLU_t *); +extern void spanel_bmod (const int, const int, const int, const int, + float *, float *, int *, int *, + GlobalLU_t *, SuperLUStat_t*); +extern int scolumn_dfs (const int, const int, int *, int *, int *, int *, + int *, int *, int *, int *, int *, GlobalLU_t *); +extern int scolumn_bmod (const int, const int, float *, + float *, int *, int *, int, + GlobalLU_t *, SuperLUStat_t*); +extern int scopy_to_ucol (int, int, int *, int *, int *, + float *, GlobalLU_t *); +extern int spivotL (const int, const double, int *, int *, + int *, int *, int *, GlobalLU_t *, SuperLUStat_t*); +extern void spruneL (const int, const int *, const int, const int, + const int *, const int *, int *, GlobalLU_t *); +extern void sreadmt (int *, int *, int *, float **, int **, int **); +extern void sGenXtrue (int, int, float *, int); +extern void sFillRHS (trans_t, int, float *, int, SuperMatrix *, + SuperMatrix *); +extern void sgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *, + SuperMatrix *, SuperLUStat_t*, int *); +/* ILU */ +extern void sgsitrf (superlu_options_t*, SuperMatrix*, int, int, int*, + void *, int, int *, int *, SuperMatrix *, SuperMatrix *, + SuperLUStat_t*, int *); +extern int sldperm(int, int, int, int [], int [], float [], + int [], float [], float []); +extern int ilu_ssnode_dfs (const int, const int, const int *, const int *, + const int *, int *, GlobalLU_t *); +extern void ilu_spanel_dfs (const int, const int, const int, SuperMatrix *, + int *, int *, float *, float *, int *, int *, + int *, int *, int *, int *, GlobalLU_t *); +extern int ilu_scolumn_dfs (const int, const int, int *, int *, int *, + int *, int *, int *, int *, int *, + GlobalLU_t *); +extern int ilu_scopy_to_ucol (int, int, int *, int *, int *, + float *, int, milu_t, double, int, + float *, int *, GlobalLU_t *, int *); +extern int ilu_spivotL (const int, const double, int *, int *, int, int *, + int *, int *, int *, double, milu_t, + float, GlobalLU_t *, SuperLUStat_t*); +extern int ilu_sdrop_row (superlu_options_t *, int, int, double, + int, int *, double *, GlobalLU_t *, + float *, int *, int); + + +/*! \brief Driver related */ + +extern void sgsequ (SuperMatrix *, float *, float *, float *, + float *, float *, int *); +extern void slaqgs (SuperMatrix *, float *, float *, float, + float, float, char *); +extern void sgscon (char *, SuperMatrix *, SuperMatrix *, + float, float *, SuperLUStat_t*, int *); +extern float sPivotGrowth(int, SuperMatrix *, int *, + SuperMatrix *, SuperMatrix *); +extern void sgsrfs (trans_t, SuperMatrix *, SuperMatrix *, + SuperMatrix *, int *, int *, char *, float *, + float *, SuperMatrix *, SuperMatrix *, + float *, float *, SuperLUStat_t*, int *); + +extern int sp_strsv (char *, char *, char *, SuperMatrix *, + SuperMatrix *, float *, SuperLUStat_t*, int *); +extern int sp_sgemv (char *, float, SuperMatrix *, float *, + int, float, float *, int); + +extern int sp_sgemm (char *, char *, int, int, int, float, + SuperMatrix *, float *, int, float, + float *, int); +extern double slamch_(char *); + + +/*! \brief Memory-related */ +extern int sLUMemInit (fact_t, void *, int, int, int, int, int, + float, SuperMatrix *, SuperMatrix *, + GlobalLU_t *, int **, float **); +extern void sSetRWork (int, int, float *, float **, float **); +extern void sLUWorkFree (int *, float *, GlobalLU_t *); +extern int sLUMemXpand (int, int, MemType, int *, GlobalLU_t *); + +extern float *floatMalloc(int); +extern float *floatCalloc(int); +extern int smemory_usage(const int, const int, const int, const int); +extern int sQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *); +extern int ilu_sQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *); + +/*! \brief Auxiliary routines */ +extern void sreadhb(int *, int *, int *, float **, int **, int **); +extern void sreadrb(int *, int *, int *, float **, int **, int **); +extern void sreadtriple(int *, int *, int *, float **, int **, int **); +extern void sCompRow_to_CompCol(int, int, int, float*, int*, int*, + float **, int **, int **); +extern void sfill (float *, int, float); +extern void sinf_norm_error (int, SuperMatrix *, float *); +extern void PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *, + float, float, float *, float *, char *); + +/*! \brief Routines for debugging */ +extern void sPrint_CompCol_Matrix(char *, SuperMatrix *); +extern void sPrint_SuperNode_Matrix(char *, SuperMatrix *); +extern void sPrint_Dense_Matrix(char *, SuperMatrix *); +extern void sprint_lu_col(char *, int, int, int *, GlobalLU_t *); +extern int print_double_vec(char *, int, double *); +extern void check_tempv(int, float *); + +#ifdef __cplusplus + } +#endif + +#endif /* __SUPERLU_sSP_DEFS */ + Copied: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_util.h (from rev 6343, trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/util.h) =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_util.h (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_util.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,367 @@ +/** @file slu_util.h + * \brief Utility header file + * + * -- SuperLU routine (version 3.1) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * August 1, 2008 + * + */ + +#ifndef __SUPERLU_UTIL /* allow multiple inclusions */ +#define __SUPERLU_UTIL + +#include +#include +#include +/* +#ifndef __STDC__ +#include +#endif +*/ +#include + +/*********************************************************************** + * Macros + ***********************************************************************/ +#define FIRSTCOL_OF_SNODE(i) (xsup[i]) +/* No of marker arrays used in the symbolic factorization, + each of size n */ +#define NO_MARKER 3 +#define NUM_TEMPV(m,w,t,b) ( SUPERLU_MAX(m, (t + b)*w) ) + +#ifndef USER_ABORT +#define USER_ABORT(msg) superlu_abort_and_exit(msg) +#endif + +#define ABORT(err_msg) \ + { char msg[256];\ + sprintf(msg,"%s at line %d in file %s\n",err_msg,__LINE__, __FILE__);\ + USER_ABORT(msg); } + + +#ifndef USER_MALLOC +#if 1 +#define USER_MALLOC(size) superlu_malloc(size) +#else +/* The following may check out some uninitialized data */ +#define USER_MALLOC(size) memset (superlu_malloc(size), '\x0F', size) +#endif +#endif + +#define SUPERLU_MALLOC(size) USER_MALLOC(size) + +#ifndef USER_FREE +#define USER_FREE(addr) superlu_free(addr) +#endif + +#define SUPERLU_FREE(addr) USER_FREE(addr) + +#define CHECK_MALLOC(where) { \ + extern int superlu_malloc_total; \ + printf("%s: malloc_total %d Bytes\n", \ + where, superlu_malloc_total); \ +} + +#define SUPERLU_MAX(x, y) ( (x) > (y) ? (x) : (y) ) +#define SUPERLU_MIN(x, y) ( (x) < (y) ? (x) : (y) ) + +/********************************************************* + * Macros used for easy access of sparse matrix entries. * + *********************************************************/ +#define L_SUB_START(col) ( Lstore->rowind_colptr[col] ) +#define L_SUB(ptr) ( Lstore->rowind[ptr] ) +#define L_NZ_START(col) ( Lstore->nzval_colptr[col] ) +#define L_FST_SUPC(superno) ( Lstore->sup_to_col[superno] ) +#define U_NZ_START(col) ( Ustore->colptr[col] ) +#define U_SUB(ptr) ( Ustore->rowind[ptr] ) + + +/*********************************************************************** + * Constants + ***********************************************************************/ +#define EMPTY (-1) +/*#define NO (-1)*/ +#define FALSE 0 +#define TRUE 1 + +#define NO_MEMTYPE 4 /* 0: lusup; + 1: ucol; + 2: lsub; + 3: usub */ + +#define GluIntArray(n) (5 * (n) + 5) + +/* Dropping rules */ +#define NODROP ( 0x0000 ) +#define DROP_BASIC ( 0x0001 ) /* ILU(tau) */ +#define DROP_PROWS ( 0x0002 ) /* ILUTP: keep p maximum rows */ +#define DROP_COLUMN ( 0x0004 ) /* ILUTP: for j-th column, + p = gamma * nnz(A(:,j)) */ +#define DROP_AREA ( 0x0008 ) /* ILUTP: for j-th column, use + nnz(F(:,1:j)) / nnz(A(:,1:j)) + to limit memory growth */ +#define DROP_SECONDARY ( 0x000E ) /* PROWS | COLUMN | AREA */ +#define DROP_DYNAMIC ( 0x0010 ) /* adaptive tau */ +#define DROP_INTERP ( 0x0100 ) /* use interpolation */ + + +#if 1 +#define MILU_ALPHA (1.0e-2) /* multiple of drop_sum to be added to diagonal */ +#else +#define MILU_ALPHA 1.0 /* multiple of drop_sum to be added to diagonal */ +#endif + + +/*********************************************************************** + * Enumerate types + ***********************************************************************/ +typedef enum {NO, YES} yes_no_t; +typedef enum {DOFACT, SamePattern, SamePattern_SameRowPerm, FACTORED} fact_t; +typedef enum {NOROWPERM, LargeDiag, MY_PERMR} rowperm_t; +typedef enum {NATURAL, MMD_ATA, MMD_AT_PLUS_A, COLAMD, MY_PERMC}colperm_t; +typedef enum {NOTRANS, TRANS, CONJ} trans_t; +typedef enum {NOEQUIL, ROW, COL, BOTH} DiagScale_t; +typedef enum {NOREFINE, SINGLE=1, DOUBLE, EXTRA} IterRefine_t; +typedef enum {LUSUP, UCOL, LSUB, USUB} MemType; +typedef enum {HEAD, TAIL} stack_end_t; +typedef enum {SYSTEM, USER} LU_space_t; +typedef enum {ONE_NORM, TWO_NORM, INF_NORM} norm_t; +typedef enum {SILU, SMILU_1, SMILU_2, SMILU_3} milu_t; +#if 0 +typedef enum {NODROP = 0x0000, + DROP_BASIC = 0x0001, /* ILU(tau) */ + DROP_PROWS = 0x0002, /* ILUTP: keep p maximum rows */ + DROP_COLUMN = 0x0004, /* ILUTP: for j-th column, + p = gamma * nnz(A(:,j)) */ + DROP_AREA = 0x0008, /* ILUTP: for j-th column, use + nnz(F(:,1:j)) / nnz(A(:,1:j)) + to limit memory growth */ + DROP_SECONDARY = 0x000E, /* PROWS | COLUMN | AREA */ + DROP_DYNAMIC = 0x0010, + DROP_INTERP = 0x0100} rule_t; +#endif + + +/* + * The following enumerate type is used by the statistics variable + * to keep track of flop count and time spent at various stages. + * + * Note that not all of the fields are disjoint. + */ +typedef enum { + COLPERM, /* find a column ordering that minimizes fills */ + RELAX, /* find artificial supernodes */ + ETREE, /* compute column etree */ + EQUIL, /* equilibrate the original matrix */ + FACT, /* perform LU factorization */ + RCOND, /* estimate reciprocal condition number */ + SOLVE, /* forward and back solves */ + REFINE, /* perform iterative refinement */ + TRSV, /* fraction of FACT spent in xTRSV */ + GEMV, /* fraction of FACT spent in xGEMV */ + FERR, /* estimate error bounds after iterative refinement */ + NPHASES /* total number of phases */ +} PhaseType; + + +/*********************************************************************** + * Type definitions + ***********************************************************************/ +typedef float flops_t; +typedef unsigned char Logical; + +/* + *-- This contains the options used to control the solve process. + * + * Fact (fact_t) + * Specifies whether or not the factored form of the matrix + * A is supplied on entry, and if not, how the matrix A should + * be factorizaed. + * = DOFACT: The matrix A will be factorized from scratch, and the + * factors will be stored in L and U. + * = SamePattern: The matrix A will be factorized assuming + * that a factorization of a matrix with the same sparsity + * pattern was performed prior to this one. Therefore, this + * factorization will reuse column permutation vector + * ScalePermstruct->perm_c and the column elimination tree + * LUstruct->etree. + * = SamePattern_SameRowPerm: The matrix A will be factorized + * assuming that a factorization of a matrix with the same + * sparsity pattern and similar numerical values was performed + * prior to this one. Therefore, this factorization will reuse + * both row and column scaling factors R and C, both row and + * column permutation vectors perm_r and perm_c, and the + * data structure set up from the previous symbolic factorization. + * = FACTORED: On entry, L, U, perm_r and perm_c contain the + * factored form of A. If DiagScale is not NOEQUIL, the matrix + * A has been equilibrated with scaling factors R and C. + * + * Equil (yes_no_t) + * Specifies whether to equilibrate the system (scale A's row and + * columns to have unit norm). + * + * ColPerm (colperm_t) + * Specifies what type of column permutation to use to reduce fill. + * = NATURAL: use the natural ordering + * = MMD_ATA: use minimum degree ordering on structure of A'*A + * = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A + * = COLAMD: use approximate minimum degree column ordering + * = MY_PERMC: use the ordering specified in ScalePermstruct->perm_c[] + * + * Trans (trans_t) + * Specifies the form of the system of equations: + * = NOTRANS: A * X = B (No transpose) + * = TRANS: A**T * X = B (Transpose) + * = CONJ: A**H * X = B (Transpose) + * + * IterRefine (IterRefine_t) + * Specifies whether to perform iterative refinement. + * = NO: no iterative refinement + * = WorkingPrec: perform iterative refinement in working precision + * = ExtraPrec: perform iterative refinement in extra precision + * + * DiagPivotThresh (double, in [0.0, 1.0]) (only for sequential SuperLU) + * Specifies the threshold used for a diagonal entry to be an + * acceptable pivot. + * + * PivotGrowth (yes_no_t) + * Specifies whether to compute the reciprocal pivot growth. + * + * ConditionNumber (ues_no_t) + * Specifies whether to compute the reciprocal condition number. + * + * RowPerm (rowperm_t) (only for SuperLU_DIST or ILU) + * Specifies whether to permute rows of the original matrix. + * = NO: not to permute the rows + * = LargeDiag: make the diagonal large relative to the off-diagonal + * = MY_PERMR: use the permutation given in ScalePermstruct->perm_r[] + * + * SymmetricMode (yest_no_t) + * Specifies whether to use symmetric mode. + * + * PrintStat (yes_no_t) + * Specifies whether to print the solver's statistics. + * + * ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST) + * Specifies whether to replace the tiny diagonals by + * sqrt(epsilon)*||A|| during LU factorization. + * + * SolveInitialized (yes_no_t) (only for SuperLU_DIST) + * Specifies whether the initialization has been performed to the + * triangular solve. + * + * RefineInitialized (yes_no_t) (only for SuperLU_DIST) + * Specifies whether the initialization has been performed to the + * sparse matrix-vector multiplication routine needed in iterative + * refinement. + */ +typedef struct { + fact_t Fact; + yes_no_t Equil; + colperm_t ColPerm; + trans_t Trans; + IterRefine_t IterRefine; + double DiagPivotThresh; + yes_no_t PivotGrowth; + yes_no_t ConditionNumber; + rowperm_t RowPerm; + yes_no_t SymmetricMode; + yes_no_t PrintStat; + yes_no_t ReplaceTinyPivot; + yes_no_t SolveInitialized; + yes_no_t RefineInitialized; + double ILU_DropTol; /* threshold for dropping */ + double ILU_FillTol; /* threshold for zero pivot perturbation */ + double ILU_FillFactor; /* gamma in the secondary dropping */ + int ILU_DropRule; + norm_t ILU_Norm; + milu_t ILU_MILU; +} superlu_options_t; + +/*! \brief Headers for 4 types of dynamatically managed memory */ +typedef struct e_node { + int size; /* length of the memory that has been used */ + void *mem; /* pointer to the new malloc'd store */ +} ExpHeader; + +typedef struct { + int size; + int used; + int top1; /* grow upward, relative to &array[0] */ + int top2; /* grow downward */ + void *array; +} LU_stack_t; + +typedef struct { + int *panel_histo; /* histogram of panel size distribution */ + double *utime; /* running time at various phases */ + flops_t *ops; /* operation count at various phases */ + int TinyPivots; /* number of tiny pivots */ + int RefineSteps; /* number of iterative refinement steps */ + int expansions; /* number of memory expansions */ +} SuperLUStat_t; + +typedef struct { + float for_lu; + float total_needed; +} mem_usage_t; + + +/*********************************************************************** + * Prototypes + ***********************************************************************/ +#ifdef __cplusplus +extern "C" { +#endif + +extern void Destroy_SuperMatrix_Store(SuperMatrix *); +extern void Destroy_CompCol_Matrix(SuperMatrix *); +extern void Destroy_CompRow_Matrix(SuperMatrix *); +extern void Destroy_SuperNode_Matrix(SuperMatrix *); +extern void Destroy_CompCol_Permuted(SuperMatrix *); +extern void Destroy_Dense_Matrix(SuperMatrix *); +extern void get_perm_c(int, SuperMatrix *, int *); +extern void set_default_options(superlu_options_t *options); +extern void ilu_set_default_options(superlu_options_t *options); +extern void sp_preorder (superlu_options_t *, SuperMatrix*, int*, int*, + SuperMatrix*); +extern void superlu_abort_and_exit(char*); +extern void *superlu_malloc (size_t); +extern int *intMalloc (int); +extern int *intCalloc (int); +extern void superlu_free (void*); +extern void SetIWork (int, int, int, int *, int **, int **, int **, + int **, int **, int **, int **); +extern int sp_coletree (int *, int *, int *, int, int, int *); +extern void relax_snode (const int, int *, const int, int *, int *); +extern void heap_relax_snode (const int, int *, const int, int *, int *); +extern int mark_relax(int, int *, int *, int *, int *, int *, int *); +extern void ilu_relax_snode (const int, int *, const int, int *, + int *, int *); +extern void ilu_heap_relax_snode (const int, int *, const int, int *, + int *, int*); +extern void resetrep_col (const int, const int *, int *); +extern int spcoletree (int *, int *, int *, int, int, int *); +extern int *TreePostorder (int, int *); +extern double SuperLU_timer_ (); +extern int sp_ienv (int); +extern int lsame_ (char *, char *); +extern int xerbla_ (char *, int *); +extern void ifill (int *, int, int); +extern void snode_profile (int, int *); +extern void super_stats (int, int *); +extern void check_repfnz(int, int, int, int *); +extern void PrintSumm (char *, int, int, int); +extern void StatInit(SuperLUStat_t *); +extern void StatPrint (SuperLUStat_t *); +extern void StatFree(SuperLUStat_t *); +extern void print_panel_seg(int, int, int, int, int *, int *); +extern int print_int_vec(char *,int, int *); +extern int slu_PrintInt10(char *, int, int *); + +#ifdef __cplusplus + } +#endif + +#endif /* __SUPERLU_UTIL */ Copied: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_zdefs.h (from rev 6343, trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsp_defs.h) =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_zdefs.h (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_zdefs.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,282 @@ + +/*! @file slu_zdefs.h + * \brief Header file for real operations + * + *
 
+ * -- SuperLU routine (version 4.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * June 30, 2009
+ * 
+ * Global data structures used in LU factorization -
+ * 
+ *   nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
+ *   (xsup,supno): supno[i] is the supernode no to which i belongs;
+ *	xsup(s) points to the beginning of the s-th supernode.
+ *	e.g.   supno 0 1 2 2 3 3 3 4 4 4 4 4   (n=12)
+ *	        xsup 0 1 2 4 7 12
+ *	Note: dfs will be performed on supernode rep. relative to the new 
+ *	      row pivoting ordering
+ *
+ *   (xlsub,lsub): lsub[*] contains the compressed subscript of
+ *	rectangular supernodes; xlsub[j] points to the starting
+ *	location of the j-th column in lsub[*]. Note that xlsub 
+ *	is indexed by column.
+ *	Storage: original row subscripts
+ *
+ *      During the course of sparse LU factorization, we also use
+ *	(xlsub,lsub) for the purpose of symmetric pruning. For each
+ *	supernode {s,s+1,...,t=s+r} with first column s and last
+ *	column t, the subscript set
+ *		lsub[j], j=xlsub[s], .., xlsub[s+1]-1
+ *	is the structure of column s (i.e. structure of this supernode).
+ *	It is used for the storage of numerical values.
+ *	Furthermore,
+ *		lsub[j], j=xlsub[t], .., xlsub[t+1]-1
+ *	is the structure of the last column t of this supernode.
+ *	It is for the purpose of symmetric pruning. Therefore, the
+ *	structural subscripts can be rearranged without making physical
+ *	interchanges among the numerical values.
+ *
+ *	However, if the supernode has only one column, then we
+ *	only keep one set of subscripts. For any subscript interchange
+ *	performed, similar interchange must be done on the numerical
+ *	values.
+ *
+ *	The last column structures (for pruning) will be removed
+ *	after the numercial LU factorization phase.
+ *
+ *   (xlusup,lusup): lusup[*] contains the numerical values of the
+ *	rectangular supernodes; xlusup[j] points to the starting
+ *	location of the j-th column in storage vector lusup[*]
+ *	Note: xlusup is indexed by column.
+ *	Each rectangular supernode is stored by column-major
+ *	scheme, consistent with Fortran 2-dim array storage.
+ *
+ *   (xusub,ucol,usub): ucol[*] stores the numerical values of
+ *	U-columns outside the rectangular supernodes. The row
+ *	subscript of nonzero ucol[k] is stored in usub[k].
+ *	xusub[i] points to the starting location of column i in ucol.
+ *	Storage: new row subscripts; that is subscripts of PA.
+ * 
+ */ +#ifndef __SUPERLU_zSP_DEFS /* allow multiple inclusions */ +#define __SUPERLU_zSP_DEFS + +/* + * File name: zsp_defs.h + * Purpose: Sparse matrix types and function prototypes + * History: + */ + +#ifdef _CRAY +#include +#include +#endif + +/* Define my integer type int_t */ +typedef int int_t; /* default */ + +#include +#include +#include "slu_Cnames.h" +#include "supermatrix.h" +#include "slu_util.h" +#include "slu_dcomplex.h" + + + +typedef struct { + int *xsup; /* supernode and column mapping */ + int *supno; + int *lsub; /* compressed L subscripts */ + int *xlsub; + doublecomplex *lusup; /* L supernodes */ + int *xlusup; + doublecomplex *ucol; /* U columns */ + int *usub; + int *xusub; + int nzlmax; /* current max size of lsub */ + int nzumax; /* " " " ucol */ + int nzlumax; /* " " " lusup */ + int n; /* number of columns in the matrix */ + LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */ + int num_expansions; + ExpHeader *expanders; /* Array of pointers to 4 types of memory */ + LU_stack_t stack; /* use user supplied memory */ +} GlobalLU_t; + + +/* -------- Prototypes -------- */ + +#ifdef __cplusplus +extern "C" { +#endif + +/*! \brief Driver routines */ +extern void +zgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *, + SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *); +extern void +zgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *, + char *, double *, double *, SuperMatrix *, SuperMatrix *, + void *, int, SuperMatrix *, SuperMatrix *, + double *, double *, double *, double *, + mem_usage_t *, SuperLUStat_t *, int *); + /* ILU */ +extern void +zgsisv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *, + SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *); +extern void +zgsisx(superlu_options_t *, SuperMatrix *, int *, int *, int *, + char *, double *, double *, SuperMatrix *, SuperMatrix *, + void *, int, SuperMatrix *, SuperMatrix *, double *, double *, + mem_usage_t *, SuperLUStat_t *, int *); + + +/*! \brief Supernodal LU factor related */ +extern void +zCreate_CompCol_Matrix(SuperMatrix *, int, int, int, doublecomplex *, + int *, int *, Stype_t, Dtype_t, Mtype_t); +extern void +zCreate_CompRow_Matrix(SuperMatrix *, int, int, int, doublecomplex *, + int *, int *, Stype_t, Dtype_t, Mtype_t); +extern void +zCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *); +extern void +zCreate_Dense_Matrix(SuperMatrix *, int, int, doublecomplex *, int, + Stype_t, Dtype_t, Mtype_t); +extern void +zCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, doublecomplex *, + int *, int *, int *, int *, int *, + Stype_t, Dtype_t, Mtype_t); +extern void +zCopy_Dense_Matrix(int, int, doublecomplex *, int, doublecomplex *, int); + +extern void countnz (const int, int *, int *, int *, GlobalLU_t *); +extern void ilu_countnz (const int, int *, int *, GlobalLU_t *); +extern void fixupL (const int, const int *, GlobalLU_t *); + +extern void zallocateA (int, int, doublecomplex **, int **, int **); +extern void zgstrf (superlu_options_t*, SuperMatrix*, + int, int, int*, void *, int, int *, int *, + SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *); +extern int zsnode_dfs (const int, const int, const int *, const int *, + const int *, int *, int *, GlobalLU_t *); +extern int zsnode_bmod (const int, const int, const int, doublecomplex *, + doublecomplex *, GlobalLU_t *, SuperLUStat_t*); +extern void zpanel_dfs (const int, const int, const int, SuperMatrix *, + int *, int *, doublecomplex *, int *, int *, int *, + int *, int *, int *, int *, GlobalLU_t *); +extern void zpanel_bmod (const int, const int, const int, const int, + doublecomplex *, doublecomplex *, int *, int *, + GlobalLU_t *, SuperLUStat_t*); +extern int zcolumn_dfs (const int, const int, int *, int *, int *, int *, + int *, int *, int *, int *, int *, GlobalLU_t *); +extern int zcolumn_bmod (const int, const int, doublecomplex *, + doublecomplex *, int *, int *, int, + GlobalLU_t *, SuperLUStat_t*); +extern int zcopy_to_ucol (int, int, int *, int *, int *, + doublecomplex *, GlobalLU_t *); +extern int zpivotL (const int, const double, int *, int *, + int *, int *, int *, GlobalLU_t *, SuperLUStat_t*); +extern void zpruneL (const int, const int *, const int, const int, + const int *, const int *, int *, GlobalLU_t *); +extern void zreadmt (int *, int *, int *, doublecomplex **, int **, int **); +extern void zGenXtrue (int, int, doublecomplex *, int); +extern void zFillRHS (trans_t, int, doublecomplex *, int, SuperMatrix *, + SuperMatrix *); +extern void zgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *, + SuperMatrix *, SuperLUStat_t*, int *); +/* ILU */ +extern void zgsitrf (superlu_options_t*, SuperMatrix*, int, int, int*, + void *, int, int *, int *, SuperMatrix *, SuperMatrix *, + SuperLUStat_t*, int *); +extern int zldperm(int, int, int, int [], int [], doublecomplex [], + int [], double [], double []); +extern int ilu_zsnode_dfs (const int, const int, const int *, const int *, + const int *, int *, GlobalLU_t *); +extern void ilu_zpanel_dfs (const int, const int, const int, SuperMatrix *, + int *, int *, doublecomplex *, double *, int *, int *, + int *, int *, int *, int *, GlobalLU_t *); +extern int ilu_zcolumn_dfs (const int, const int, int *, int *, int *, + int *, int *, int *, int *, int *, + GlobalLU_t *); +extern int ilu_zcopy_to_ucol (int, int, int *, int *, int *, + doublecomplex *, int, milu_t, double, int, + doublecomplex *, int *, GlobalLU_t *, int *); +extern int ilu_zpivotL (const int, const double, int *, int *, int, int *, + int *, int *, int *, double, milu_t, + doublecomplex, GlobalLU_t *, SuperLUStat_t*); +extern int ilu_zdrop_row (superlu_options_t *, int, int, double, + int, int *, double *, GlobalLU_t *, + double *, int *, int); + + +/*! \brief Driver related */ + +extern void zgsequ (SuperMatrix *, double *, double *, double *, + double *, double *, int *); +extern void zlaqgs (SuperMatrix *, double *, double *, double, + double, double, char *); +extern void zgscon (char *, SuperMatrix *, SuperMatrix *, + double, double *, SuperLUStat_t*, int *); +extern double zPivotGrowth(int, SuperMatrix *, int *, + SuperMatrix *, SuperMatrix *); +extern void zgsrfs (trans_t, SuperMatrix *, SuperMatrix *, + SuperMatrix *, int *, int *, char *, double *, + double *, SuperMatrix *, SuperMatrix *, + double *, double *, SuperLUStat_t*, int *); + +extern int sp_ztrsv (char *, char *, char *, SuperMatrix *, + SuperMatrix *, doublecomplex *, SuperLUStat_t*, int *); +extern int sp_zgemv (char *, doublecomplex, SuperMatrix *, doublecomplex *, + int, doublecomplex, doublecomplex *, int); + +extern int sp_zgemm (char *, char *, int, int, int, doublecomplex, + SuperMatrix *, doublecomplex *, int, doublecomplex, + doublecomplex *, int); +extern double dlamch_(char *); + + +/*! \brief Memory-related */ +extern int zLUMemInit (fact_t, void *, int, int, int, int, int, + double, SuperMatrix *, SuperMatrix *, + GlobalLU_t *, int **, doublecomplex **); +extern void zSetRWork (int, int, doublecomplex *, doublecomplex **, doublecomplex **); +extern void zLUWorkFree (int *, doublecomplex *, GlobalLU_t *); +extern int zLUMemXpand (int, int, MemType, int *, GlobalLU_t *); + +extern doublecomplex *doublecomplexMalloc(int); +extern doublecomplex *doublecomplexCalloc(int); +extern double *doubleMalloc(int); +extern double *doubleCalloc(int); +extern int zmemory_usage(const int, const int, const int, const int); +extern int zQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *); +extern int ilu_zQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *); + +/*! \brief Auxiliary routines */ +extern void zreadhb(int *, int *, int *, doublecomplex **, int **, int **); +extern void zreadrb(int *, int *, int *, doublecomplex **, int **, int **); +extern void zreadtriple(int *, int *, int *, doublecomplex **, int **, int **); +extern void zCompRow_to_CompCol(int, int, int, doublecomplex*, int*, int*, + doublecomplex **, int **, int **); +extern void zfill (doublecomplex *, int, doublecomplex); +extern void zinf_norm_error (int, SuperMatrix *, doublecomplex *); +extern void PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *, + doublecomplex, doublecomplex, doublecomplex *, doublecomplex *, char *); + +/*! \brief Routines for debugging */ +extern void zPrint_CompCol_Matrix(char *, SuperMatrix *); +extern void zPrint_SuperNode_Matrix(char *, SuperMatrix *); +extern void zPrint_Dense_Matrix(char *, SuperMatrix *); +extern void zprint_lu_col(char *, int, int, int *, GlobalLU_t *); +extern int print_double_vec(char *, int, double *); +extern void check_tempv(int, doublecomplex *); + +#ifdef __cplusplus + } +#endif + +#endif /* __SUPERLU_zSP_DEFS */ + Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/smemory.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/smemory.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/smemory.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,54 +1,32 @@ -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 +/*! @file smemory.c + * \brief Memory details * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
*/ -#include "ssp_defs.h" +#include "slu_sdefs.h" -/* Constants */ -#define NO_MEMTYPE 4 /* 0: lusup; - 1: ucol; - 2: lsub; - 3: usub */ -#define GluIntArray(n) (5 * (n) + 5) /* Internal prototypes */ void *sexpand (int *, MemType,int, int, GlobalLU_t *); -int sLUWorkInit (int, int, int, int **, float **, LU_space_t); +int sLUWorkInit (int, int, int, int **, float **, GlobalLU_t *); void copy_mem_float (int, void *, void *); void sStackCompress (GlobalLU_t *); -void sSetupSpace (void *, int, LU_space_t *); -void *suser_malloc (int, int); -void suser_free (int, int); +void sSetupSpace (void *, int, GlobalLU_t *); +void *suser_malloc (int, int, GlobalLU_t *); +void suser_free (int, int, GlobalLU_t *); -/* External prototypes (in memory.c - prec-indep) */ +/* External prototypes (in memory.c - prec-independent) */ extern void copy_mem_int (int, void *, void *); extern void user_bcopy (char *, char *, int); -/* Headers for 4 types of dynamatically managed memory */ -typedef struct e_node { - int size; /* length of the memory that has been used */ - void *mem; /* pointer to the new malloc'd store */ -} ExpHeader; -typedef struct { - int size; - int used; - int top1; /* grow upward, relative to &array[0] */ - int top2; /* grow downward */ - void *array; -} LU_stack_t; - -/* Variables local to this file */ -static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */ -static LU_stack_t stack; -static int no_expand; - /* Macros to manipulate stack */ -#define StackFull(x) ( x + stack.used >= stack.size ) +#define StackFull(x) ( x + Glu->stack.used >= Glu->stack.size ) #define NotDoubleAlign(addr) ( (long int)addr & 7 ) #define DoubleAlign(addr) ( ((long int)addr + 7) & ~7L ) #define TempSpace(m, w) ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \ @@ -58,66 +36,67 @@ -/* - * Setup the memory model to be used for factorization. +/*! \brief Setup the memory model to be used for factorization. + * * lwork = 0: use system malloc; * lwork > 0: use user-supplied work[] space. */ -void sSetupSpace(void *work, int lwork, LU_space_t *MemModel) +void sSetupSpace(void *work, int lwork, GlobalLU_t *Glu) { if ( lwork == 0 ) { - *MemModel = SYSTEM; /* malloc/free */ + Glu->MemModel = SYSTEM; /* malloc/free */ } else if ( lwork > 0 ) { - *MemModel = USER; /* user provided space */ - stack.used = 0; - stack.top1 = 0; - stack.top2 = (lwork/4)*4; /* must be word addressable */ - stack.size = stack.top2; - stack.array = (void *) work; + Glu->MemModel = USER; /* user provided space */ + Glu->stack.used = 0; + Glu->stack.top1 = 0; + Glu->stack.top2 = (lwork/4)*4; /* must be word addressable */ + Glu->stack.size = Glu->stack.top2; + Glu->stack.array = (void *) work; } } -void *suser_malloc(int bytes, int which_end) +void *suser_malloc(int bytes, int which_end, GlobalLU_t *Glu) { void *buf; if ( StackFull(bytes) ) return (NULL); if ( which_end == HEAD ) { - buf = (char*) stack.array + stack.top1; - stack.top1 += bytes; + buf = (char*) Glu->stack.array + Glu->stack.top1; + Glu->stack.top1 += bytes; } else { - stack.top2 -= bytes; - buf = (char*) stack.array + stack.top2; + Glu->stack.top2 -= bytes; + buf = (char*) Glu->stack.array + Glu->stack.top2; } - stack.used += bytes; + Glu->stack.used += bytes; return buf; } -void suser_free(int bytes, int which_end) +void suser_free(int bytes, int which_end, GlobalLU_t *Glu) { if ( which_end == HEAD ) { - stack.top1 -= bytes; + Glu->stack.top1 -= bytes; } else { - stack.top2 += bytes; + Glu->stack.top2 += bytes; } - stack.used -= bytes; + Glu->stack.used -= bytes; } -/* +/*! \brief + * + *
  * mem_usage consists of the following fields:
  *    - for_lu (float)
  *      The amount of space used in bytes for the L\U data structures.
  *    - total_needed (float)
  *      The amount of space needed in bytes to perform factorization.
- *    - expansions (int)
- *      Number of memory expansions during the LU factorization.
+ * 
*/ int sQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage) { @@ -132,33 +111,75 @@ dword = sizeof(float); /* For LU factors */ - mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] * - dword + Lstore->rowind_colptr[n] * iword ); - mem_usage->for_lu += (float)( (n + 1) * iword + + mem_usage->for_lu = (float)( (4.0*n + 3.0) * iword + + Lstore->nzval_colptr[n] * dword + + Lstore->rowind_colptr[n] * iword ); + mem_usage->for_lu += (float)( (n + 1.0) * iword + Ustore->colptr[n] * (dword + iword) ); /* Working storage to support factorization */ mem_usage->total_needed = mem_usage->for_lu + - (float)( (2 * panel_size + 4 + NO_MARKER) * n * iword + - (panel_size + 1) * n * dword ); + (float)( (2.0 * panel_size + 4.0 + NO_MARKER) * n * iword + + (panel_size + 1.0) * n * dword ); - mem_usage->expansions = --no_expand; - return 0; } /* sQuerySpace */ -/* - * Allocate storage for the data structures common to all factor routines. - * For those unpredictable size, make a guess as FILL * nnz(A). + +/*! \brief + * + *
+ * mem_usage consists of the following fields:
+ *    - for_lu (float)
+ *      The amount of space used in bytes for the L\U data structures.
+ *    - total_needed (float)
+ *      The amount of space needed in bytes to perform factorization.
+ * 
+ */ +int ilu_sQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage) +{ + SCformat *Lstore; + NCformat *Ustore; + register int n, panel_size = sp_ienv(1); + register float iword, dword; + + Lstore = L->Store; + Ustore = U->Store; + n = L->ncol; + iword = sizeof(int); + dword = sizeof(double); + + /* For LU factors */ + mem_usage->for_lu = (float)( (4.0f * n + 3.0f) * iword + + Lstore->nzval_colptr[n] * dword + + Lstore->rowind_colptr[n] * iword ); + mem_usage->for_lu += (float)( (n + 1.0f) * iword + + Ustore->colptr[n] * (dword + iword) ); + + /* Working storage to support factorization. + ILU needs 5*n more integers than LU */ + mem_usage->total_needed = mem_usage->for_lu + + (float)( (2.0f * panel_size + 9.0f + NO_MARKER) * n * iword + + (panel_size + 1.0f) * n * dword ); + + return 0; +} /* ilu_sQuerySpace */ + + +/*! \brief Allocate storage for the data structures common to all factor routines. + * + *
+ * For those unpredictable size, estimate as fill_ratio * nnz(A).
  * Return value:
  *     If lwork = -1, return the estimated amount of space required, plus n;
  *     otherwise, return the amount of space actually allocated when
  *     memory allocation failure occurred.
+ * 
*/ int sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz, - int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu, - int **iwork, float **dwork) + int panel_size, float fill_ratio, SuperMatrix *L, SuperMatrix *U, + GlobalLU_t *Glu, int **iwork, float **dwork) { int info, iword, dword; SCformat *Lstore; @@ -170,32 +191,33 @@ float *ucol; int *usub, *xusub; int nzlmax, nzumax, nzlumax; - int FILL = sp_ienv(6); - Glu->n = n; - no_expand = 0; iword = sizeof(int); dword = sizeof(float); + Glu->n = n; + Glu->num_expansions = 0; - if ( !expanders ) - expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader)); - if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders"); + if ( !Glu->expanders ) + Glu->expanders = (ExpHeader*)SUPERLU_MALLOC( NO_MEMTYPE * + sizeof(ExpHeader) ); + if ( !Glu->expanders ) ABORT("SUPERLU_MALLOC fails for expanders"); if ( fact != SamePattern_SameRowPerm ) { /* Guess for L\U factors */ - nzumax = nzlumax = FILL * annz; - nzlmax = SUPERLU_MAX(1, FILL/4.) * annz; + nzumax = nzlumax = fill_ratio * annz; + nzlmax = SUPERLU_MAX(1, fill_ratio/4.) * annz; if ( lwork == -1 ) { return ( GluIntArray(n) * iword + TempSpace(m, panel_size) + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n ); } else { - sSetupSpace(work, lwork, &Glu->MemModel); + sSetupSpace(work, lwork, Glu); } -#ifdef DEBUG - printf("sLUMemInit() called: annz %d, MemModel %d\n", - annz, Glu->MemModel); +#if ( PRNTlevel >= 1 ) + printf("sLUMemInit() called: fill_ratio %ld, nzlmax %ld, nzumax %ld\n", + fill_ratio, nzlmax, nzumax); + fflush(stdout); #endif /* Integer pointers for L\U factors */ @@ -206,11 +228,11 @@ xlusup = intMalloc(n+1); xusub = intMalloc(n+1); } else { - xsup = (int *)suser_malloc((n+1) * iword, HEAD); - supno = (int *)suser_malloc((n+1) * iword, HEAD); - xlsub = (int *)suser_malloc((n+1) * iword, HEAD); - xlusup = (int *)suser_malloc((n+1) * iword, HEAD); - xusub = (int *)suser_malloc((n+1) * iword, HEAD); + xsup = (int *)suser_malloc((n+1) * iword, HEAD, Glu); + supno = (int *)suser_malloc((n+1) * iword, HEAD, Glu); + xlsub = (int *)suser_malloc((n+1) * iword, HEAD, Glu); + xlusup = (int *)suser_malloc((n+1) * iword, HEAD, Glu); + xusub = (int *)suser_malloc((n+1) * iword, HEAD, Glu); } lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu ); @@ -225,7 +247,8 @@ SUPERLU_FREE(lsub); SUPERLU_FREE(usub); } else { - suser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD); + suser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, + HEAD, Glu); } nzlumax /= 2; nzumax /= 2; @@ -234,6 +257,11 @@ printf("Not enough memory to perform factorization.\n"); return (smemory_usage(nzlmax, nzumax, nzlumax, n) + n); } +#if ( PRNTlevel >= 1) + printf("sLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", + nzlmax, nzumax); + fflush(stdout); +#endif lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu ); ucol = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu ); lsub = (int *) sexpand( &nzlmax, LSUB, 0, 0, Glu ); @@ -260,18 +288,18 @@ Glu->MemModel = SYSTEM; } else { Glu->MemModel = USER; - stack.top2 = (lwork/4)*4; /* must be word-addressable */ - stack.size = stack.top2; + Glu->stack.top2 = (lwork/4)*4; /* must be word-addressable */ + Glu->stack.size = Glu->stack.top2; } - lsub = expanders[LSUB].mem = Lstore->rowind; - lusup = expanders[LUSUP].mem = Lstore->nzval; - usub = expanders[USUB].mem = Ustore->rowind; - ucol = expanders[UCOL].mem = Ustore->nzval;; - expanders[LSUB].size = nzlmax; - expanders[LUSUP].size = nzlumax; - expanders[USUB].size = nzumax; - expanders[UCOL].size = nzumax; + lsub = Glu->expanders[LSUB].mem = Lstore->rowind; + lusup = Glu->expanders[LUSUP].mem = Lstore->nzval; + usub = Glu->expanders[USUB].mem = Ustore->rowind; + ucol = Glu->expanders[UCOL].mem = Ustore->nzval;; + Glu->expanders[LSUB].size = nzlmax; + Glu->expanders[LUSUP].size = nzlumax; + Glu->expanders[USUB].size = nzumax; + Glu->expanders[UCOL].size = nzumax; } Glu->xsup = xsup; @@ -287,20 +315,20 @@ Glu->nzumax = nzumax; Glu->nzlumax = nzlumax; - info = sLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel); + info = sLUWorkInit(m, n, panel_size, iwork, dwork, Glu); if ( info ) return ( info + smemory_usage(nzlmax, nzumax, nzlumax, n) + n); - ++no_expand; + ++Glu->num_expansions; return 0; } /* sLUMemInit */ -/* Allocate known working storage. Returns 0 if success, otherwise +/*! \brief Allocate known working storage. Returns 0 if success, otherwise returns the number of bytes allocated so far when failure occurred. */ int sLUWorkInit(int m, int n, int panel_size, int **iworkptr, - float **dworkptr, LU_space_t MemModel) + float **dworkptr, GlobalLU_t *Glu) { int isize, dsize, extra; float *old_ptr; @@ -311,19 +339,19 @@ dsize = (m * panel_size + NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(float); - if ( MemModel == SYSTEM ) + if ( Glu->MemModel == SYSTEM ) *iworkptr = (int *) intCalloc(isize/sizeof(int)); else - *iworkptr = (int *) suser_malloc(isize, TAIL); + *iworkptr = (int *) suser_malloc(isize, TAIL, Glu); if ( ! *iworkptr ) { fprintf(stderr, "sLUWorkInit: malloc fails for local iworkptr[]\n"); return (isize + n); } - if ( MemModel == SYSTEM ) + if ( Glu->MemModel == SYSTEM ) *dworkptr = (float *) SUPERLU_MALLOC(dsize); else { - *dworkptr = (float *) suser_malloc(dsize, TAIL); + *dworkptr = (float *) suser_malloc(dsize, TAIL, Glu); if ( NotDoubleAlign(*dworkptr) ) { old_ptr = *dworkptr; *dworkptr = (float*) DoubleAlign(*dworkptr); @@ -332,8 +360,8 @@ #ifdef DEBUG printf("sLUWorkInit: not aligned, extra %d\n", extra); #endif - stack.top2 -= extra; - stack.used += extra; + Glu->stack.top2 -= extra; + Glu->stack.used += extra; } } if ( ! *dworkptr ) { @@ -345,8 +373,7 @@ } -/* - * Set up pointers for real working arrays. +/*! \brief Set up pointers for real working arrays. */ void sSetRWork(int m, int panel_size, float *dworkptr, @@ -362,8 +389,7 @@ sfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero); } -/* - * Free the working storage used by factor routines. +/*! \brief Free the working storage used by factor routines. */ void sLUWorkFree(int *iwork, float *dwork, GlobalLU_t *Glu) { @@ -371,18 +397,21 @@ SUPERLU_FREE (iwork); SUPERLU_FREE (dwork); } else { - stack.used -= (stack.size - stack.top2); - stack.top2 = stack.size; + Glu->stack.used -= (Glu->stack.size - Glu->stack.top2); + Glu->stack.top2 = Glu->stack.size; /* sStackCompress(Glu); */ } - SUPERLU_FREE (expanders); - expanders = 0; + SUPERLU_FREE (Glu->expanders); + Glu->expanders = NULL; } -/* Expand the data structures for L and U during the factorization. +/*! \brief Expand the data structures for L and U during the factorization. + * + *
  * Return value:   0 - successful return
  *               > 0 - number of bytes allocated when run out of space
+ * 
*/ int sLUMemXpand(int jcol, @@ -446,8 +475,7 @@ for (i = 0; i < howmany; i++) dnew[i] = dold[i]; } -/* - * Expand the existing storage to accommodate more fill-ins. +/*! \brief Expand the existing storage to accommodate more fill-ins. */ void *sexpand ( @@ -463,12 +491,14 @@ float alpha; void *new_mem, *old_mem; int new_len, tries, lword, extra, bytes_to_copy; + ExpHeader *expanders = Glu->expanders; /* Array of 4 types of memory */ alpha = EXPAND; - if ( no_expand == 0 || keep_prev ) /* First time allocate requested */ + if ( Glu->num_expansions == 0 || keep_prev ) { + /* First time allocate requested */ new_len = *prev_len; - else { + } else { new_len = alpha * *prev_len; } @@ -476,9 +506,8 @@ else lword = sizeof(float); if ( Glu->MemModel == SYSTEM ) { - new_mem = (void *) SUPERLU_MALLOC(new_len * lword); -/* new_mem = (void *) calloc(new_len, lword); */ - if ( no_expand != 0 ) { + new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword); + if ( Glu->num_expansions != 0 ) { tries = 0; if ( keep_prev ) { if ( !new_mem ) return (NULL); @@ -487,8 +516,7 @@ if ( ++tries > 10 ) return (NULL); alpha = Reduce(alpha); new_len = alpha * *prev_len; - new_mem = (void *) SUPERLU_MALLOC(new_len * lword); -/* new_mem = (void *) calloc(new_len, lword); */ + new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword); } } if ( type == LSUB || type == USUB ) { @@ -501,8 +529,8 @@ expanders[type].mem = (void *) new_mem; } else { /* MemModel == USER */ - if ( no_expand == 0 ) { - new_mem = suser_malloc(new_len * lword, HEAD); + if ( Glu->num_expansions == 0 ) { + new_mem = suser_malloc(new_len * lword, HEAD, Glu); if ( NotDoubleAlign(new_mem) && (type == LUSUP || type == UCOL) ) { old_mem = new_mem; @@ -511,12 +539,11 @@ #ifdef DEBUG printf("expand(): not aligned, extra %d\n", extra); #endif - stack.top1 += extra; - stack.used += extra; + Glu->stack.top1 += extra; + Glu->stack.used += extra; } expanders[type].mem = (void *) new_mem; - } - else { + } else { tries = 0; extra = (new_len - *prev_len) * lword; if ( keep_prev ) { @@ -532,7 +559,7 @@ if ( type != USUB ) { new_mem = (void*)((char*)expanders[type + 1].mem + extra); - bytes_to_copy = (char*)stack.array + stack.top1 + bytes_to_copy = (char*)Glu->stack.array + Glu->stack.top1 - (char*)expanders[type + 1].mem; user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy); @@ -548,11 +575,11 @@ Glu->ucol = expanders[UCOL].mem = (void*)((char*)expanders[UCOL].mem + extra); } - stack.top1 += extra; - stack.used += extra; + Glu->stack.top1 += extra; + Glu->stack.used += extra; if ( type == UCOL ) { - stack.top1 += extra; /* Add same amount for USUB */ - stack.used += extra; + Glu->stack.top1 += extra; /* Add same amount for USUB */ + Glu->stack.used += extra; } } /* if ... */ @@ -562,15 +589,14 @@ expanders[type].size = new_len; *prev_len = new_len; - if ( no_expand ) ++no_expand; + if ( Glu->num_expansions ) ++Glu->num_expansions; return (void *) expanders[type].mem; } /* sexpand */ -/* - * Compress the work[] array to remove fragmentation. +/*! \brief Compress the work[] array to remove fragmentation. */ void sStackCompress(GlobalLU_t *Glu) @@ -610,9 +636,9 @@ usub = ito; last = (char*)usub + xusub[ndim] * iword; - fragment = (char*) (((char*)stack.array + stack.top1) - last); - stack.used -= (long int) fragment; - stack.top1 -= (long int) fragment; + fragment = (char*) (((char*)Glu->stack.array + Glu->stack.top1) - last); + Glu->stack.used -= (long int) fragment; + Glu->stack.top1 -= (long int) fragment; Glu->ucol = ucol; Glu->lsub = lsub; @@ -626,8 +652,7 @@ } -/* - * Allocate storage for original matrix A +/*! \brief Allocate storage for original matrix A */ void sallocateA(int n, int nnz, float **a, int **asub, int **xa) @@ -641,7 +666,7 @@ float *floatMalloc(int n) { float *buf; - buf = (float *) SUPERLU_MALLOC(n * sizeof(float)); + buf = (float *) SUPERLU_MALLOC((size_t)n * sizeof(float)); if ( !buf ) { ABORT("SUPERLU_MALLOC failed for buf in floatMalloc()\n"); } @@ -653,7 +678,7 @@ float *buf; register int i; float zero = 0.0; - buf = (float *) SUPERLU_MALLOC(n * sizeof(float)); + buf = (float *) SUPERLU_MALLOC((size_t)n * sizeof(float)); if ( !buf ) { ABORT("SUPERLU_MALLOC failed for buf in floatCalloc()\n"); } Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sp_coletree.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sp_coletree.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sp_coletree.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,3 +1,24 @@ +/*! @file sp_coletree.c + * \brief Tree layout and computation routines + * + *
+ * -- SuperLU routine (version 3.1) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * August 1, 2008
+ *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
+*/ /* Elimination tree computation and layout routines */ @@ -3,5 +24,5 @@ #include #include -#include "dsp_defs.h" +#include "slu_ddefs.h" /* @@ -24,7 +45,6 @@ * Implemented path-halving by XSL 07/05/95. */ -static int *pp; /* parent array for sets */ static int *mxCallocInt(int n) @@ -42,17 +62,19 @@ static void initialize_disjoint_sets ( - int n - ) + int n, + int **pp + ) { - pp = mxCallocInt(n); + (*pp) = mxCallocInt(n); } static int make_set ( - int i - ) + int i, + int *pp + ) { pp[i] = i; return i; @@ -61,9 +83,10 @@ static int link ( - int s, - int t - ) + int s, + int t, + int *pp + ) { pp[s] = t; return t; @@ -72,7 +95,10 @@ /* PATH HALVING */ static -int find (int i) +int find ( + int i, + int *pp + ) { register int p, gp; @@ -102,8 +128,8 @@ static void finalize_disjoint_sets ( - void - ) + int *pp + ) { SUPERLU_FREE(pp); } @@ -143,9 +169,10 @@ int row, col; int rroot; int p; + int *pp; root = mxCallocInt (nc); - initialize_disjoint_sets (nc); + initialize_disjoint_sets (nc, &pp); /* Compute firstcol[row] = first nonzero column in row */ @@ -163,17 +190,17 @@ centered at its first vertex, which has the same fill. */ for (col = 0; col < nc; col++) { - cset = make_set (col); + cset = make_set (col, pp); root[cset] = col; parent[col] = nc; /* Matlab */ for (p = acolst[col]; p < acolend[col]; p++) { row = firstcol[arow[p]]; if (row >= col) continue; - rset = find (row); + rset = find (row, pp); rroot = root[rset]; if (rroot != col) { parent[rroot] = col; - cset = link (cset, rset); + cset = link (cset, rset, pp); root[cset] = col; } } @@ -181,7 +208,7 @@ SUPERLU_FREE (root); SUPERLU_FREE (firstcol); - finalize_disjoint_sets (); + finalize_disjoint_sets (pp); return 0; } @@ -209,35 +236,88 @@ * Based on code written by John Gilbert at CMI in 1987. */ -static int *first_kid, *next_kid; /* Linked list of children. */ -static int *post, postnum; - static /* * Depth-first search from vertex v. */ void etdfs ( - int v - ) + int v, + int first_kid[], + int next_kid[], + int post[], + int *postnum + ) { int w; for (w = first_kid[v]; w != -1; w = next_kid[w]) { - etdfs (w); + etdfs (w, first_kid, next_kid, post, postnum); } /* post[postnum++] = v; in Matlab */ - post[v] = postnum++; /* Modified by X.Li on 2/14/95 */ + post[v] = (*postnum)++; /* Modified by X. Li on 08/10/07 */ } +static /* + * Depth-first search from vertex n. No recursion. + * This routine was contributed by C?dric Doucet, CEDRAT Group, Meylan, France. + */ +void nr_etdfs (int n, int *parent, + int *first_kid, int *next_kid, + int *post, int postnum) +{ + int current = n, first, next; + + while (postnum != n){ + + /* no kid for the current node */ + first = first_kid[current]; + + /* no first kid for the current node */ + if (first == -1){ + + /* numbering this node because it has no kid */ + post[current] = postnum++; + + /* looking for the next kid */ + next = next_kid[current]; + + while (next == -1){ + + /* no more kids : back to the parent node */ + current = parent[current]; + + /* numbering the parent node */ + post[current] = postnum++; + + /* get the next kid */ + next = next_kid[current]; + } + + /* stopping criterion */ + if (postnum==n+1) return; + + /* updating current node */ + current = next; + } + /* updating current node */ + else { + current = first; + } + } +} + +/* * Post order a tree */ int *TreePostorder( - int n, - int *parent -) + int n, + int *parent + ) { + int *first_kid, *next_kid; /* Linked list of children. */ + int *post, postnum; int v, dad; /* Allocate storage for working arrays and results */ @@ -255,7 +335,13 @@ /* Depth-first search from dummy root vertex #n */ postnum = 0; - etdfs (n); +#if 0 + /* recursion */ + etdfs (n, first_kid, next_kid, post, &postnum); +#else + /* no recursion */ + nr_etdfs(n, parent, first_kid, next_kid, post, postnum); +#endif SUPERLU_FREE (first_kid); SUPERLU_FREE (next_kid); @@ -306,27 +392,28 @@ int row, col; int rroot; int p; + int *pp; root = mxCallocInt (n); - initialize_disjoint_sets (n); + initialize_disjoint_sets (n, &pp); for (col = 0; col < n; col++) { - cset = make_set (col); + cset = make_set (col, pp); root[cset] = col; parent[col] = n; /* Matlab */ for (p = acolst[col]; p < acolend[col]; p++) { row = arow[p]; if (row >= col) continue; - rset = find (row); + rset = find (row, pp); rroot = root[rset]; if (rroot != col) { parent[rroot] = col; - cset = link (cset, rset); + cset = link (cset, rset, pp); root[cset] = col; } } } SUPERLU_FREE (root); - finalize_disjoint_sets (); + finalize_disjoint_sets (pp); return 0; } /* SP_SYMETREE */ Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sp_ienv.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sp_ienv.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sp_ienv.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,3 +1,7 @@ +/*! @file sp_ienv.c + * \brief Chooses machine-dependent parameters for the local environment +*/ + /* * File name: sp_ienv.c * History: Modified from lapack routine ILAENV @@ -2,6 +6,7 @@ */ -int -sp_ienv(int ispec) -{ -/* +#include "slu_Cnames.h" + +/*! \brief + +
     Purpose   
@@ -40,7 +45,11 @@
             < 0:  if SP_IENV = -k, the k-th argument had an illegal value. 
   
     ===================================================================== 
+
*/ +int +sp_ienv(int ispec) +{ int i; switch (ispec) { Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sp_preorder.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sp_preorder.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sp_preorder.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,10 +1,12 @@ -#include "dsp_defs.h" +/*! @file sp_preorder.c + * \brief Permute and performs functions on columns of orginal matrix + */ +#include "slu_ddefs.h" -void -sp_preorder(superlu_options_t *options, SuperMatrix *A, int *perm_c, - int *etree, SuperMatrix *AC) -{ -/* + +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -54,9 +56,12 @@
  *         The resulting matrix after applied the column permutation
  *         perm_c[] to matrix A. The type of AC can be:
  *         Stype = SLU_NCP; Dtype = A->Dtype; Mtype = SLU_GE.
- *
+ * 
*/ - +void +sp_preorder(superlu_options_t *options, SuperMatrix *A, int *perm_c, + int *etree, SuperMatrix *AC) +{ NCformat *Astore; NCPformat *ACstore; int *iwork, *post; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spanel_bmod.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spanel_bmod.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spanel_bmod.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,27 +1,32 @@ -/* +/*! @file spanel_bmod.c + * \brief Performs numeric block updates + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ /* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. + */ #include #include -#include "ssp_defs.h" +#include "slu_sdefs.h" /* * Function prototypes @@ -30,6 +35,25 @@ void smatvec(int, int, int, float *, float *, float *); extern void scheck_tempv(); +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ *    Performs numeric block updates (sup-panel) in topological order.
+ *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
+ *    Special processing on the supernodal portion of L\U[*,j]
+ *
+ *    Before entering this routine, the original nonzeros in the panel 
+ *    were already copied into the spa[m,w].
+ *
+ *    Updated/Output parameters-
+ *    dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned 
+ *    collectively in the m-by-w vector dense[*]. 
+ * 
+ */ + void spanel_bmod ( const int m, /* in - number of rows in the matrix */ @@ -44,23 +68,8 @@ SuperLUStat_t *stat /* output */ ) { -/* - * Purpose - * ======= - * - * Performs numeric block updates (sup-panel) in topological order. - * It features: col-col, 2cols-col, 3cols-col, and sup-col updates. - * Special processing on the supernodal portion of L\U[*,j] - * - * Before entering this routine, the original nonzeros in the panel - * were already copied into the spa[m,w]. - * - * Updated/Output parameters- - * dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned - * collectively in the m-by-w vector dense[*]. - * - */ + #ifdef USE_VENDOR_BLAS #ifdef _CRAY _fcd ftcs1 = _cptofcd("L", strlen("L")), Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spanel_dfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spanel_dfs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spanel_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,48 +1,32 @@ - -/* +/*! @file spanel_dfs.c + * \brief Peforms a symbolic factorization on a panel of symbols + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "ssp_defs.h" -#include "util.h" -void -spanel_dfs ( - const int m, /* in - number of rows in the matrix */ - const int w, /* in */ - const int jcol, /* in */ - SuperMatrix *A, /* in - original matrix */ - int *perm_r, /* in */ - int *nseg, /* out */ - float *dense, /* out */ - int *panel_lsub, /* out */ - int *segrep, /* out */ - int *repfnz, /* out */ - int *xprune, /* out */ - int *marker, /* out */ - int *parent, /* working array */ - int *xplore, /* working array */ - GlobalLU_t *Glu /* modified */ - ) -{ -/* +#include "slu_sdefs.h" + +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -68,8 +52,29 @@
  *   repfnz: SuperA-col --> PA-row
  *   parent: SuperA-col --> SuperA-col
  *   xplore: SuperA-col --> index to L-structure
- *
+ * 
*/ + +void +spanel_dfs ( + const int m, /* in - number of rows in the matrix */ + const int w, /* in */ + const int jcol, /* in */ + SuperMatrix *A, /* in - original matrix */ + int *perm_r, /* in */ + int *nseg, /* out */ + float *dense, /* out */ + int *panel_lsub, /* out */ + int *segrep, /* out */ + int *repfnz, /* out */ + int *xprune, /* out */ + int *marker, /* out */ + int *parent, /* working array */ + int *xplore, /* working array */ + GlobalLU_t *Glu /* modified */ + ) +{ + NCPformat *Astore; float *a; int *asub; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spivotL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spivotL.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spivotL.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,44 +1,36 @@ -/* +/*! @file spivotL.c + * \brief Performs numerical pivoting + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ + #include #include -#include "ssp_defs.h" +#include "slu_sdefs.h" #undef DEBUG -int -spivotL( - const int jcol, /* in */ - const float u, /* in - diagonal pivoting threshold */ - int *usepr, /* re-use the pivot sequence given by perm_r/iperm_r */ - int *perm_r, /* may be modified */ - int *iperm_r, /* in - inverse of perm_r */ - int *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */ - int *pivrow, /* out */ - GlobalLU_t *Glu, /* modified - global LU data structures */ - SuperLUStat_t *stat /* output */ - ) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *   Performs the numerical pivoting on the current column of L,
@@ -57,8 +49,23 @@
  *
  *   Return value: 0      success;
  *                 i > 0  U(i,i) is exactly zero.
- *
+ * 
*/ + +int +spivotL( + const int jcol, /* in */ + const double u, /* in - diagonal pivoting threshold */ + int *usepr, /* re-use the pivot sequence given by perm_r/iperm_r */ + int *perm_r, /* may be modified */ + int *iperm_r, /* in - inverse of perm_r */ + int *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */ + int *pivrow, /* out */ + GlobalLU_t *Glu, /* modified - global LU data structures */ + SuperLUStat_t *stat /* output */ + ) +{ + int fsupc; /* first column in the supernode */ int nsupc; /* no of columns in the supernode */ int nsupr; /* no of rows in the supernode */ @@ -116,8 +123,12 @@ /* Test for singularity */ if ( pivmax == 0.0 ) { +#if 1 *pivrow = lsub_ptr[pivptr]; perm_r[*pivrow] = jcol; +#else + perm_r[diagind] = jcol; +#endif *usepr = 0; return (jcol+1); } Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spivotgrowth.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spivotgrowth.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spivotgrowth.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,21 +1,20 @@ - -/* +/*! @file spivotgrowth.c + * \brief Computes the reciprocal pivot growth factor + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
- *
+ * 
*/ #include -#include "ssp_defs.h" -#include "util.h" +#include "slu_sdefs.h" -float -sPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, - SuperMatrix *L, SuperMatrix *U) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -43,8 +42,14 @@
  *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
  *          storage scheme, i.e., U has types: Stype = NC;
  *          Dtype = SLU_S; Mtype = TRU.
- *
+ * 
*/ + +float +sPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, + SuperMatrix *L, SuperMatrix *U) +{ + NCformat *Astore; SCformat *Lstore; NCformat *Ustore; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spruneL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spruneL.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/spruneL.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,28 +1,39 @@ - -/* +/*! @file spruneL.c + * \brief Prunes the L-structure + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ *
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "ssp_defs.h" -#include "util.h" +#include "slu_sdefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *   Prunes the L-structure of supernodes whose L-structure
+ *   contains the current pivot row "pivrow"
+ * 
+ */ + void spruneL( const int jcol, /* in */ @@ -35,13 +46,7 @@ GlobalLU_t *Glu /* modified - global LU data structures */ ) { -/* - * Purpose - * ======= - * Prunes the L-structure of supernodes whose L-structure - * contains the current pivot row "pivrow" - * - */ + float utemp; int jsupno, irep, irep1, kmin, kmax, krow, movnum; int i, ktemp, minloc, maxloc; @@ -108,8 +113,8 @@ kmax--; else if ( perm_r[lsub[kmin]] != EMPTY ) kmin++; - else { /* kmin below pivrow, and kmax above pivrow: - * interchange the two subscripts + else { /* kmin below pivrow (not yet pivoted), and kmax + * above pivrow: interchange the two subscripts */ ktemp = lsub[kmin]; lsub[kmin] = lsub[kmax]; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sreadhb.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sreadhb.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sreadhb.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,18 +1,85 @@ - -/* +/*! @file sreadhb.c + * \brief Read a matrix stored in Harwell-Boeing format + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Purpose
+ * =======
+ * 
+ * Read a FLOAT PRECISION matrix stored in Harwell-Boeing format 
+ * as described below.
+ * 
+ * Line 1 (A72,A8) 
+ *  	Col. 1 - 72   Title (TITLE) 
+ *	Col. 73 - 80  Key (KEY) 
+ * 
+ * Line 2 (5I14) 
+ * 	Col. 1 - 14   Total number of lines excluding header (TOTCRD) 
+ * 	Col. 15 - 28  Number of lines for pointers (PTRCRD) 
+ * 	Col. 29 - 42  Number of lines for row (or variable) indices (INDCRD) 
+ * 	Col. 43 - 56  Number of lines for numerical values (VALCRD) 
+ *	Col. 57 - 70  Number of lines for right-hand sides (RHSCRD) 
+ *                    (including starting guesses and solution vectors 
+ *		       if present) 
+ *           	      (zero indicates no right-hand side data is present) 
+ *
+ * Line 3 (A3, 11X, 4I14) 
+ *   	Col. 1 - 3    Matrix type (see below) (MXTYPE) 
+ * 	Col. 15 - 28  Number of rows (or variables) (NROW) 
+ * 	Col. 29 - 42  Number of columns (or elements) (NCOL) 
+ *	Col. 43 - 56  Number of row (or variable) indices (NNZERO) 
+ *	              (equal to number of entries for assembled matrices) 
+ * 	Col. 57 - 70  Number of elemental matrix entries (NELTVL) 
+ *	              (zero in the case of assembled matrices) 
+ * Line 4 (2A16, 2A20) 
+ * 	Col. 1 - 16   Format for pointers (PTRFMT) 
+ *	Col. 17 - 32  Format for row (or variable) indices (INDFMT) 
+ *	Col. 33 - 52  Format for numerical values of coefficient matrix (VALFMT) 
+ * 	Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) 
+ *
+ * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present 
+ *    	Col. 1 	      Right-hand side type: 
+ *	         	  F for full storage or M for same format as matrix 
+ *    	Col. 2        G if a starting vector(s) (Guess) is supplied. (RHSTYP) 
+ *    	Col. 3        X if an exact solution vector(s) is supplied. 
+ *	Col. 15 - 28  Number of right-hand sides (NRHS) 
+ *	Col. 29 - 42  Number of row indices (NRHSIX) 
+ *          	      (ignored in case of unassembled matrices) 
+ *
+ * The three character type field on line 3 describes the matrix type. 
+ * The following table lists the permitted values for each of the three 
+ * characters. As an example of the type field, RSA denotes that the matrix 
+ * is real, symmetric, and assembled. 
+ *
+ * First Character: 
+ *	R Real matrix 
+ *	C Complex matrix 
+ *	P Pattern only (no numerical values supplied) 
+ *
+ * Second Character: 
+ *	S Symmetric 
+ *	U Unsymmetric 
+ *	H Hermitian 
+ *	Z Skew symmetric 
+ *	R Rectangular 
+ *
+ * Third Character: 
+ *	A Assembled 
+ *	E Elemental matrices (unassembled) 
+ *
+ * 
*/ #include #include -#include "ssp_defs.h" +#include "slu_sdefs.h" -/* Eat up the rest of the current line */ +/*! \brief Eat up the rest of the current line */ int sDumpLine(FILE *fp) { register int c; @@ -60,7 +127,7 @@ return 0; } -int sReadVector(FILE *fp, int n, int *where, int perline, int persize) +static int ReadVector(FILE *fp, int n, int *where, int perline, int persize) { register int i, j, item; char tmp, buf[100]; @@ -108,72 +175,6 @@ sreadhb(int *nrow, int *ncol, int *nonz, float **nzval, int **rowind, int **colptr) { -/* - * Purpose - * ======= - * - * Read a FLOAT PRECISION matrix stored in Harwell-Boeing format - * as described below. - * - * Line 1 (A72,A8) - * Col. 1 - 72 Title (TITLE) - * Col. 73 - 80 Key (KEY) - * - * Line 2 (5I14) - * Col. 1 - 14 Total number of lines excluding header (TOTCRD) - * Col. 15 - 28 Number of lines for pointers (PTRCRD) - * Col. 29 - 42 Number of lines for row (or variable) indices (INDCRD) - * Col. 43 - 56 Number of lines for numerical values (VALCRD) - * Col. 57 - 70 Number of lines for right-hand sides (RHSCRD) - * (including starting guesses and solution vectors - * if present) - * (zero indicates no right-hand side data is present) - * - * Line 3 (A3, 11X, 4I14) - * Col. 1 - 3 Matrix type (see below) (MXTYPE) - * Col. 15 - 28 Number of rows (or variables) (NROW) - * Col. 29 - 42 Number of columns (or elements) (NCOL) - * Col. 43 - 56 Number of row (or variable) indices (NNZERO) - * (equal to number of entries for assembled matrices) - * Col. 57 - 70 Number of elemental matrix entries (NELTVL) - * (zero in the case of assembled matrices) - * Line 4 (2A16, 2A20) - * Col. 1 - 16 Format for pointers (PTRFMT) - * Col. 17 - 32 Format for row (or variable) indices (INDFMT) - * Col. 33 - 52 Format for numerical values of coefficient matrix (VALFMT) - * Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) - * - * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present - * Col. 1 Right-hand side type: - * F for full storage or M for same format as matrix - * Col. 2 G if a starting vector(s) (Guess) is supplied. (RHSTYP) - * Col. 3 X if an exact solution vector(s) is supplied. - * Col. 15 - 28 Number of right-hand sides (NRHS) - * Col. 29 - 42 Number of row indices (NRHSIX) - * (ignored in case of unassembled matrices) - * - * The three character type field on line 3 describes the matrix type. - * The following table lists the permitted values for each of the three - * characters. As an example of the type field, RSA denotes that the matrix - * is real, symmetric, and assembled. - * - * First Character: - * R Real matrix - * C Complex matrix - * P Pattern only (no numerical values supplied) - * - * Second Character: - * S Symmetric - * U Unsymmetric - * H Hermitian - * Z Skew symmetric - * R Rectangular - * - * Third Character: - * A Assembled - * E Elemental matrices (unassembled) - * - */ register int i, numer_lines = 0, rhscrd = 0; int tmp, colnum, colsize, rownum, rowsize, valnum, valsize; @@ -244,8 +245,8 @@ printf("valnum %d, valsize %d\n", valnum, valsize); #endif - sReadVector(fp, *ncol+1, *colptr, colnum, colsize); - sReadVector(fp, *nonz, *rowind, rownum, rowsize); + ReadVector(fp, *ncol+1, *colptr, colnum, colsize); + ReadVector(fp, *nonz, *rowind, rownum, rowsize); if ( numer_lines ) { sReadValues(fp, *nonz, *nzval, valnum, valsize); } Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sreadrb.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sreadrb.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sreadrb.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,237 @@ + +/*! @file sreadrb.c + * \brief Read a matrix stored in Rutherford-Boeing format + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
+ * + * Purpose + * ======= + * + * Read a FLOAT PRECISION matrix stored in Rutherford-Boeing format + * as described below. + * + * Line 1 (A72, A8) + * Col. 1 - 72 Title (TITLE) + * Col. 73 - 80 Matrix name / identifier (MTRXID) + * + * Line 2 (I14, 3(1X, I13)) + * Col. 1 - 14 Total number of lines excluding header (TOTCRD) + * Col. 16 - 28 Number of lines for pointers (PTRCRD) + * Col. 30 - 42 Number of lines for row (or variable) indices (INDCRD) + * Col. 44 - 56 Number of lines for numerical values (VALCRD) + * + * Line 3 (A3, 11X, 4(1X, I13)) + * Col. 1 - 3 Matrix type (see below) (MXTYPE) + * Col. 15 - 28 Compressed Column: Number of rows (NROW) + * Elemental: Largest integer used to index variable (MVAR) + * Col. 30 - 42 Compressed Column: Number of columns (NCOL) + * Elemental: Number of element matrices (NELT) + * Col. 44 - 56 Compressed Column: Number of entries (NNZERO) + * Elemental: Number of variable indeces (NVARIX) + * Col. 58 - 70 Compressed Column: Unused, explicitly zero + * Elemental: Number of elemental matrix entries (NELTVL) + * + * Line 4 (2A16, A20) + * Col. 1 - 16 Fortran format for pointers (PTRFMT) + * Col. 17 - 32 Fortran format for row (or variable) indices (INDFMT) + * Col. 33 - 52 Fortran format for numerical values of coefficient matrix + * (VALFMT) + * (blank in the case of matrix patterns) + * + * The three character type field on line 3 describes the matrix type. + * The following table lists the permitted values for each of the three + * characters. As an example of the type field, RSA denotes that the matrix + * is real, symmetric, and assembled. + * + * First Character: + * R Real matrix + * C Complex matrix + * I integer matrix + * P Pattern only (no numerical values supplied) + * Q Pattern only (numerical values supplied in associated auxiliary value + * file) + * + * Second Character: + * S Symmetric + * U Unsymmetric + * H Hermitian + * Z Skew symmetric + * R Rectangular + * + * Third Character: + * A Compressed column form + * E Elemental form + * + *
+ */ + +#include "slu_sdefs.h" + + +/*! \brief Eat up the rest of the current line */ +static int sDumpLine(FILE *fp) +{ + register int c; + while ((c = fgetc(fp)) != '\n') ; + return 0; +} + +static int sParseIntFormat(char *buf, int *num, int *size) +{ + char *tmp; + + tmp = buf; + while (*tmp++ != '(') ; + sscanf(tmp, "%d", num); + while (*tmp != 'I' && *tmp != 'i') ++tmp; + ++tmp; + sscanf(tmp, "%d", size); + return 0; +} + +static int sParseFloatFormat(char *buf, int *num, int *size) +{ + char *tmp, *period; + + tmp = buf; + while (*tmp++ != '(') ; + *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/ + while (*tmp != 'E' && *tmp != 'e' && *tmp != 'D' && *tmp != 'd' + && *tmp != 'F' && *tmp != 'f') { + /* May find kP before nE/nD/nF, like (1P6F13.6). In this case the + num picked up refers to P, which should be skipped. */ + if (*tmp=='p' || *tmp=='P') { + ++tmp; + *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/ + } else { + ++tmp; + } + } + ++tmp; + period = tmp; + while (*period != '.' && *period != ')') ++period ; + *period = '\0'; + *size = atoi(tmp); /*sscanf(tmp, "%2d", size);*/ + + return 0; +} + +static int ReadVector(FILE *fp, int n, int *where, int perline, int persize) +{ + register int i, j, item; + char tmp, buf[100]; + + i = 0; + while (i < n) { + fgets(buf, 100, fp); /* read a line at a time */ + for (j=0; j + * -- SuperLU routine (version 4.0) -- + * Lawrence Berkeley National Laboratory. + * June 30, 2009 + *
+ */ + +#include "slu_sdefs.h" + + +void +sreadtriple(int *m, int *n, int *nonz, + float **nzval, int **rowind, int **colptr) +{ +/* + * Output parameters + * ================= + * (a,asub,xa): asub[*] contains the row subscripts of nonzeros + * in columns of matrix A; a[*] the numerical values; + * row i of A is given by a[k],k=xa[i],...,xa[i+1]-1. + * + */ + int j, k, jsize, nnz, nz; + float *a, *val; + int *asub, *xa, *row, *col; + int zero_base = 0; + + /* Matrix format: + * First line: #rows, #cols, #non-zero + * Triplet in the rest of lines: + * row, col, value + */ + + scanf("%d%d", n, nonz); + *m = *n; + printf("m %d, n %d, nonz %d\n", *m, *n, *nonz); + sallocateA(*n, *nonz, nzval, rowind, colptr); /* Allocate storage */ + a = *nzval; + asub = *rowind; + xa = *colptr; + + val = (float *) SUPERLU_MALLOC(*nonz * sizeof(float)); + row = (int *) SUPERLU_MALLOC(*nonz * sizeof(int)); + col = (int *) SUPERLU_MALLOC(*nonz * sizeof(int)); + + for (j = 0; j < *n; ++j) xa[j] = 0; + + /* Read into the triplet array from a file */ + for (nnz = 0, nz = 0; nnz < *nonz; ++nnz) { + scanf("%d%d%f\n", &row[nz], &col[nz], &val[nz]); + + if ( nnz == 0 ) { /* first nonzero */ + if ( row[0] == 0 || col[0] == 0 ) { + zero_base = 1; + printf("triplet file: row/col indices are zero-based.\n"); + } else + printf("triplet file: row/col indices are one-based.\n"); + } + + if ( !zero_base ) { + /* Change to 0-based indexing. */ + --row[nz]; + --col[nz]; + } + + if (row[nz] < 0 || row[nz] >= *m || col[nz] < 0 || col[nz] >= *n + /*|| val[nz] == 0.*/) { + fprintf(stderr, "nz %d, (%d, %d) = %e out of bound, removed\n", + nz, row[nz], col[nz], val[nz]); + exit(-1); + } else { + ++xa[col[nz]]; + ++nz; + } + } + + *nonz = nz; + + /* Initialize the array of column pointers */ + k = 0; + jsize = xa[0]; + xa[0] = 0; + for (j = 1; j < *n; ++j) { + k += jsize; + jsize = xa[j]; + xa[j] = k; + } + + /* Copy the triplets into the column oriented storage */ + for (nz = 0; nz < *nonz; ++nz) { + j = col[nz]; + k = xa[j]; + asub[k] = row[nz]; + a[k] = val[nz]; + ++xa[j]; + } + + /* Reset the column pointers to the beginning of each column */ + for (j = *n; j > 0; --j) + xa[j] = xa[j-1]; + xa[0] = 0; + + SUPERLU_FREE(val); + SUPERLU_FREE(row); + SUPERLU_FREE(col); + +#ifdef CHK_INPUT + { + int i; + for (i = 0; i < *n; i++) { + printf("Col %d, xa %d\n", i, xa[i]); + for (k = xa[i]; k < xa[i+1]; k++) + printf("%d\t%16.10f\n", asub[k], a[k]); + } + } +#endif + +} + + +void sreadrhs(int m, float *b) +{ + FILE *fp, *fopen(); + int i; + /*int j;*/ + + if ( !(fp = fopen("b.dat", "r")) ) { + fprintf(stderr, "dreadrhs: file does not exist\n"); + exit(-1); + } + for (i = 0; i < m; ++i) + fscanf(fp, "%f\n", &b[i]); + + /* readpair_(j, &b[i]);*/ + fclose(fp); +} Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssnode_bmod.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssnode_bmod.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssnode_bmod.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,29 +1,31 @@ -/* +/*! @file ssnode_bmod.c + * \brief Performs numeric block updates within the relaxed snode. + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "ssp_defs.h" +#include "slu_sdefs.h" -/* - * Performs numeric block updates within the relaxed snode. + +/*! \brief Performs numeric block updates within the relaxed snode. */ int ssnode_bmod ( Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssnode_dfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssnode_dfs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssnode_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,28 +1,46 @@ - -/* +/*! @file ssnode_dfs.c + * \brief Determines the union of row structures of columns within the relaxed node + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "ssp_defs.h" -#include "util.h" +#include "slu_sdefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *    ssnode_dfs() - Determine the union of the row structures of those 
+ *    columns within the relaxed snode.
+ *    Note: The relaxed snodes are leaves of the supernodal etree, therefore, 
+ *    the portion outside the rectangular supernode must be zero.
+ *
+ * Return value
+ * ============
+ *     0   success;
+ *    >0   number of bytes allocated when run out of memory.
+ * 
+ */ + int ssnode_dfs ( const int jcol, /* in - start of the supernode */ @@ -35,19 +53,7 @@ GlobalLU_t *Glu /* modified */ ) { -/* Purpose - * ======= - * ssnode_dfs() - Determine the union of the row structures of those - * columns within the relaxed snode. - * Note: The relaxed snodes are leaves of the supernodal etree, therefore, - * the portion outside the rectangular supernode must be zero. - * - * Return value - * ============ - * 0 success; - * >0 number of bytes allocated when run out of memory. - * - */ + register int i, k, ifrom, ito, nextl, new_next; int nsuper, krow, kmark, mem_error; int *xsup, *supno; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssp_blas2.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssp_blas2.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssp_blas2.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,17 +1,20 @@ -/* +/*! @file ssp_blas2.c + * \brief Sparse BLAS 2, using some dense BLAS 2 operations + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
- *
+ * 
*/ /* * File name: ssp_blas2.c * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations. */ -#include "ssp_defs.h" +#include "slu_sdefs.h" /* * Function prototypes @@ -20,12 +23,9 @@ void slsolve(int, int, float*, float*); void smatvec(int, int, int, float*, float*, float*); - -int -sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L, - SuperMatrix *U, float *x, SuperLUStat_t *stat, int *info) -{ -/* +/*! \brief Solves one of the systems of equations A*x = b, or A'*x = b + * + *
  *   Purpose
  *   =======
  *
@@ -49,7 +49,7 @@
  *             On entry, trans specifies the equations to be solved as   
  *             follows:   
  *                trans = 'N' or 'n'   A*x = b.   
- *                trans = 'T' or 't'   A'*x = b.   
+ *                trans = 'T' or 't'   A'*x = b.
  *                trans = 'C' or 'c'   A'*x = b.   
  *
  *   diag   - (input) char*
@@ -75,8 +75,12 @@
  *
  *   info    - (output) int*
  *             If *info = -i, the i-th argument had an illegal value.
- *
+ * 
*/ +int +sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L, + SuperMatrix *U, float *x, SuperLUStat_t *stat, int *info) +{ #ifdef _CRAY _fcd ftcs1 = _cptofcd("L", strlen("L")), ftcs2 = _cptofcd("N", strlen("N")), @@ -96,7 +100,8 @@ /* Test the input parameters */ *info = 0; if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1; - else if ( !lsame_(trans, "N") && !lsame_(trans, "T") ) *info = -2; + else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && + !lsame_(trans, "C")) *info = -2; else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3; else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4; else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5; @@ -298,68 +303,71 @@ +/*! \brief Performs one of the matrix-vector operations y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, + * + *
+ *   Purpose   
+ *   =======   
+ *
+ *   sp_sgemv()  performs one of the matrix-vector operations   
+ *      y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
+ *   where alpha and beta are scalars, x and y are vectors and A is a
+ *   sparse A->nrow by A->ncol matrix.   
+ *
+ *   Parameters   
+ *   ==========   
+ *
+ *   TRANS  - (input) char*
+ *            On entry, TRANS specifies the operation to be performed as   
+ *            follows:   
+ *               TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
+ *               TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
+ *               TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
+ *
+ *   ALPHA  - (input) float
+ *            On entry, ALPHA specifies the scalar alpha.   
+ *
+ *   A      - (input) SuperMatrix*
+ *            Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
+ *            Currently, the type of A can be:
+ *                Stype = NC or NCP; Dtype = SLU_S; Mtype = GE. 
+ *            In the future, more general A can be handled.
+ *
+ *   X      - (input) float*, array of DIMENSION at least   
+ *            ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
+ *            and at least   
+ *            ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
+ *            Before entry, the incremented array X must contain the   
+ *            vector x.   
+ *
+ *   INCX   - (input) int
+ *            On entry, INCX specifies the increment for the elements of   
+ *            X. INCX must not be zero.   
+ *
+ *   BETA   - (input) float
+ *            On entry, BETA specifies the scalar beta. When BETA is   
+ *            supplied as zero then Y need not be set on input.   
+ *
+ *   Y      - (output) float*,  array of DIMENSION at least   
+ *            ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
+ *            and at least   
+ *            ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
+ *            Before entry with BETA non-zero, the incremented array Y   
+ *            must contain the vector y. On exit, Y is overwritten by the 
+ *            updated vector y.
+ *	     
+ *   INCY   - (input) int
+ *            On entry, INCY specifies the increment for the elements of   
+ *            Y. INCY must not be zero.   
+ *
+ *   ==== Sparse Level 2 Blas routine.   
+ * 
+ */ int sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x, int incx, float beta, float *y, int incy) { -/* Purpose - ======= - - sp_sgemv() performs one of the matrix-vector operations - y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, - where alpha and beta are scalars, x and y are vectors and A is a - sparse A->nrow by A->ncol matrix. - - Parameters - ========== - - TRANS - (input) char* - On entry, TRANS specifies the operation to be performed as - follows: - TRANS = 'N' or 'n' y := alpha*A*x + beta*y. - TRANS = 'T' or 't' y := alpha*A'*x + beta*y. - TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. - - ALPHA - (input) float - On entry, ALPHA specifies the scalar alpha. - - A - (input) SuperMatrix* - Matrix A with a sparse format, of dimension (A->nrow, A->ncol). - Currently, the type of A can be: - Stype = NC or NCP; Dtype = SLU_S; Mtype = GE. - In the future, more general A can be handled. - - X - (input) float*, array of DIMENSION at least - ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. - Before entry, the incremented array X must contain the - vector x. - - INCX - (input) int - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - - BETA - (input) float - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then Y need not be set on input. - - Y - (output) float*, array of DIMENSION at least - ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. - Before entry with BETA non-zero, the incremented array Y - must contain the vector y. On exit, Y is overwritten by the - updated vector y. - - INCY - (input) int - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - - ==== Sparse Level 2 Blas routine. -*/ - /* Local variables */ NCformat *Astore; float *Aval; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssp_blas3.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssp_blas3.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssp_blas3.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,116 +1,122 @@ - -/* +/*! @file ssp_blas3.c + * \brief Sparse BLAS3, using some dense BLAS3 operations + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
- *
+ * 
*/ /* * File name: sp_blas3.c * Purpose: Sparse BLAS3, using some dense BLAS3 operations. */ -#include "ssp_defs.h" -#include "util.h" +#include "slu_sdefs.h" +/*! \brief + * + *
+ * Purpose   
+ *   =======   
+ * 
+ *   sp_s performs one of the matrix-matrix operations   
+ * 
+ *      C := alpha*op( A )*op( B ) + beta*C,   
+ * 
+ *   where  op( X ) is one of 
+ * 
+ *      op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
+ * 
+ *   alpha and beta are scalars, and A, B and C are matrices, with op( A ) 
+ *   an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. 
+ *   
+ * 
+ *   Parameters   
+ *   ==========   
+ * 
+ *   TRANSA - (input) char*
+ *            On entry, TRANSA specifies the form of op( A ) to be used in 
+ *            the matrix multiplication as follows:   
+ *               TRANSA = 'N' or 'n',  op( A ) = A.   
+ *               TRANSA = 'T' or 't',  op( A ) = A'.   
+ *               TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).   
+ *            Unchanged on exit.   
+ * 
+ *   TRANSB - (input) char*
+ *            On entry, TRANSB specifies the form of op( B ) to be used in 
+ *            the matrix multiplication as follows:   
+ *               TRANSB = 'N' or 'n',  op( B ) = B.   
+ *               TRANSB = 'T' or 't',  op( B ) = B'.   
+ *               TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).   
+ *            Unchanged on exit.   
+ * 
+ *   M      - (input) int   
+ *            On entry,  M  specifies  the number of rows of the matrix 
+ *	     op( A ) and of the matrix C.  M must be at least zero. 
+ *	     Unchanged on exit.   
+ * 
+ *   N      - (input) int
+ *            On entry,  N specifies the number of columns of the matrix 
+ *	     op( B ) and the number of columns of the matrix C. N must be 
+ *	     at least zero.
+ *	     Unchanged on exit.   
+ * 
+ *   K      - (input) int
+ *            On entry, K specifies the number of columns of the matrix 
+ *	     op( A ) and the number of rows of the matrix op( B ). K must 
+ *	     be at least  zero.   
+ *           Unchanged on exit.
+ *      
+ *   ALPHA  - (input) float
+ *            On entry, ALPHA specifies the scalar alpha.   
+ * 
+ *   A      - (input) SuperMatrix*
+ *            Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
+ *            Currently, the type of A can be:
+ *                Stype = NC or NCP; Dtype = SLU_S; Mtype = GE. 
+ *            In the future, more general A can be handled.
+ * 
+ *   B      - FLOAT PRECISION array of DIMENSION ( LDB, kb ), where kb is 
+ *            n when TRANSB = 'N' or 'n',  and is  k otherwise.   
+ *            Before entry with  TRANSB = 'N' or 'n',  the leading k by n 
+ *            part of the array B must contain the matrix B, otherwise 
+ *            the leading n by k part of the array B must contain the 
+ *            matrix B.   
+ *            Unchanged on exit.   
+ * 
+ *   LDB    - (input) int
+ *            On entry, LDB specifies the first dimension of B as declared 
+ *            in the calling (sub) program. LDB must be at least max( 1, n ).  
+ *            Unchanged on exit.   
+ * 
+ *   BETA   - (input) float
+ *            On entry, BETA specifies the scalar beta. When BETA is   
+ *            supplied as zero then C need not be set on input.   
+ *  
+ *   C      - FLOAT PRECISION array of DIMENSION ( LDC, n ).   
+ *            Before entry, the leading m by n part of the array C must 
+ *            contain the matrix C,  except when beta is zero, in which 
+ *            case C need not be set on entry.   
+ *            On exit, the array C is overwritten by the m by n matrix 
+ *	     ( alpha*op( A )*B + beta*C ).   
+ *  
+ *   LDC    - (input) int
+ *            On entry, LDC specifies the first dimension of C as declared 
+ *            in the calling (sub)program. LDC must be at least max(1,m).   
+ *            Unchanged on exit.   
+ *  
+ *   ==== Sparse Level 3 Blas routine.   
+ * 
+ */ + int sp_sgemm(char *transa, char *transb, int m, int n, int k, float alpha, SuperMatrix *A, float *b, int ldb, float beta, float *c, int ldc) { -/* Purpose - ======= - - sp_s performs one of the matrix-matrix operations - - C := alpha*op( A )*op( B ) + beta*C, - - where op( X ) is one of - - op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), - - alpha and beta are scalars, and A, B and C are matrices, with op( A ) - an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. - - - Parameters - ========== - - TRANSA - (input) char* - On entry, TRANSA specifies the form of op( A ) to be used in - the matrix multiplication as follows: - TRANSA = 'N' or 'n', op( A ) = A. - TRANSA = 'T' or 't', op( A ) = A'. - TRANSA = 'C' or 'c', op( A ) = conjg( A' ). - Unchanged on exit. - - TRANSB - (input) char* - On entry, TRANSB specifies the form of op( B ) to be used in - the matrix multiplication as follows: - TRANSB = 'N' or 'n', op( B ) = B. - TRANSB = 'T' or 't', op( B ) = B'. - TRANSB = 'C' or 'c', op( B ) = conjg( B' ). - Unchanged on exit. - - M - (input) int - On entry, M specifies the number of rows of the matrix - op( A ) and of the matrix C. M must be at least zero. - Unchanged on exit. - - N - (input) int - On entry, N specifies the number of columns of the matrix - op( B ) and the number of columns of the matrix C. N must be - at least zero. - Unchanged on exit. - - K - (input) int - On entry, K specifies the number of columns of the matrix - op( A ) and the number of rows of the matrix op( B ). K must - be at least zero. - Unchanged on exit. - - ALPHA - (input) float - On entry, ALPHA specifies the scalar alpha. - - A - (input) SuperMatrix* - Matrix A with a sparse format, of dimension (A->nrow, A->ncol). - Currently, the type of A can be: - Stype = NC or NCP; Dtype = SLU_S; Mtype = GE. - In the future, more general A can be handled. - - B - FLOAT PRECISION array of DIMENSION ( LDB, kb ), where kb is - n when TRANSB = 'N' or 'n', and is k otherwise. - Before entry with TRANSB = 'N' or 'n', the leading k by n - part of the array B must contain the matrix B, otherwise - the leading n by k part of the array B must contain the - matrix B. - Unchanged on exit. - - LDB - (input) int - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. LDB must be at least max( 1, n ). - Unchanged on exit. - - BETA - (input) float - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then C need not be set on input. - - C - FLOAT PRECISION array of DIMENSION ( LDC, n ). - Before entry, the leading m by n part of the array C must - contain the matrix C, except when beta is zero, in which - case C need not be set on entry. - On exit, the array C is overwritten by the m by n matrix - ( alpha*op( A )*B + beta*C ). - - LDC - (input) int - On entry, LDC specifies the first dimension of C as declared - in the calling (sub)program. LDC must be at least max(1,m). - Unchanged on exit. - - ==== Sparse Level 3 Blas routine. -*/ int incx = 1, incy = 1; int j; Deleted: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssp_defs.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssp_defs.h 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/ssp_defs.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,234 +0,0 @@ - -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ -#ifndef __SUPERLU_sSP_DEFS /* allow multiple inclusions */ -#define __SUPERLU_sSP_DEFS - -/* - * File name: ssp_defs.h - * Purpose: Sparse matrix types and function prototypes - * History: - */ - -#ifdef _CRAY -#include -#include -#endif - -/* Define my integer type int_t */ -typedef int int_t; /* default */ - -#include "Cnames.h" -#include "supermatrix.h" -#include "util.h" - - -/* - * Global data structures used in LU factorization - - * - * nsuper: #supernodes = nsuper + 1, numbered [0, nsuper]. - * (xsup,supno): supno[i] is the supernode no to which i belongs; - * xsup(s) points to the beginning of the s-th supernode. - * e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12) - * xsup 0 1 2 4 7 12 - * Note: dfs will be performed on supernode rep. relative to the new - * row pivoting ordering - * - * (xlsub,lsub): lsub[*] contains the compressed subscript of - * rectangular supernodes; xlsub[j] points to the starting - * location of the j-th column in lsub[*]. Note that xlsub - * is indexed by column. - * Storage: original row subscripts - * - * During the course of sparse LU factorization, we also use - * (xlsub,lsub) for the purpose of symmetric pruning. For each - * supernode {s,s+1,...,t=s+r} with first column s and last - * column t, the subscript set - * lsub[j], j=xlsub[s], .., xlsub[s+1]-1 - * is the structure of column s (i.e. structure of this supernode). - * It is used for the storage of numerical values. - * Furthermore, - * lsub[j], j=xlsub[t], .., xlsub[t+1]-1 - * is the structure of the last column t of this supernode. - * It is for the purpose of symmetric pruning. Therefore, the - * structural subscripts can be rearranged without making physical - * interchanges among the numerical values. - * - * However, if the supernode has only one column, then we - * only keep one set of subscripts. For any subscript interchange - * performed, similar interchange must be done on the numerical - * values. - * - * The last column structures (for pruning) will be removed - * after the numercial LU factorization phase. - * - * (xlusup,lusup): lusup[*] contains the numerical values of the - * rectangular supernodes; xlusup[j] points to the starting - * location of the j-th column in storage vector lusup[*] - * Note: xlusup is indexed by column. - * Each rectangular supernode is stored by column-major - * scheme, consistent with Fortran 2-dim array storage. - * - * (xusub,ucol,usub): ucol[*] stores the numerical values of - * U-columns outside the rectangular supernodes. The row - * subscript of nonzero ucol[k] is stored in usub[k]. - * xusub[i] points to the starting location of column i in ucol. - * Storage: new row subscripts; that is subscripts of PA. - */ -typedef struct { - int *xsup; /* supernode and column mapping */ - int *supno; - int *lsub; /* compressed L subscripts */ - int *xlsub; - float *lusup; /* L supernodes */ - int *xlusup; - float *ucol; /* U columns */ - int *usub; - int *xusub; - int nzlmax; /* current max size of lsub */ - int nzumax; /* " " " ucol */ - int nzlumax; /* " " " lusup */ - int n; /* number of columns in the matrix */ - LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */ -} GlobalLU_t; - -typedef struct { - float for_lu; - float total_needed; - int expansions; -} mem_usage_t; - -#ifdef __cplusplus -extern "C" { -#endif - -/* Driver routines */ -extern void -sgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *, - SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *); -extern void -sgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *, - char *, float *, float *, SuperMatrix *, SuperMatrix *, - void *, int, SuperMatrix *, SuperMatrix *, - float *, float *, float *, float *, - mem_usage_t *, SuperLUStat_t *, int *); - -/* Supernodal LU factor related */ -extern void -sCreate_CompCol_Matrix(SuperMatrix *, int, int, int, float *, - int *, int *, Stype_t, Dtype_t, Mtype_t); -extern void -sCreate_CompRow_Matrix(SuperMatrix *, int, int, int, float *, - int *, int *, Stype_t, Dtype_t, Mtype_t); -extern void -sCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *); -extern void -sCreate_Dense_Matrix(SuperMatrix *, int, int, float *, int, - Stype_t, Dtype_t, Mtype_t); -extern void -sCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, float *, - int *, int *, int *, int *, int *, - Stype_t, Dtype_t, Mtype_t); -extern void -sCopy_Dense_Matrix(int, int, float *, int, float *, int); - -extern void countnz (const int, int *, int *, int *, GlobalLU_t *); -extern void fixupL (const int, const int *, GlobalLU_t *); - -extern void sallocateA (int, int, float **, int **, int **); -extern void sgstrf (superlu_options_t*, SuperMatrix*, float, - int, int, int*, void *, int, int *, int *, - SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *); -extern int ssnode_dfs (const int, const int, const int *, const int *, - const int *, int *, int *, GlobalLU_t *); -extern int ssnode_bmod (const int, const int, const int, float *, - float *, GlobalLU_t *, SuperLUStat_t*); -extern void spanel_dfs (const int, const int, const int, SuperMatrix *, - int *, int *, float *, int *, int *, int *, - int *, int *, int *, int *, GlobalLU_t *); -extern void spanel_bmod (const int, const int, const int, const int, - float *, float *, int *, int *, - GlobalLU_t *, SuperLUStat_t*); -extern int scolumn_dfs (const int, const int, int *, int *, int *, int *, - int *, int *, int *, int *, int *, GlobalLU_t *); -extern int scolumn_bmod (const int, const int, float *, - float *, int *, int *, int, - GlobalLU_t *, SuperLUStat_t*); -extern int scopy_to_ucol (int, int, int *, int *, int *, - float *, GlobalLU_t *); -extern int spivotL (const int, const float, int *, int *, - int *, int *, int *, GlobalLU_t *, SuperLUStat_t*); -extern void spruneL (const int, const int *, const int, const int, - const int *, const int *, int *, GlobalLU_t *); -extern void sreadmt (int *, int *, int *, float **, int **, int **); -extern void sGenXtrue (int, int, float *, int); -extern void sFillRHS (trans_t, int, float *, int, SuperMatrix *, - SuperMatrix *); -extern void sgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *, - SuperMatrix *, SuperLUStat_t*, int *); - - -/* Driver related */ - -extern void sgsequ (SuperMatrix *, float *, float *, float *, - float *, float *, int *); -extern void slaqgs (SuperMatrix *, float *, float *, float, - float, float, char *); -extern void sgscon (char *, SuperMatrix *, SuperMatrix *, - float, float *, SuperLUStat_t*, int *); -extern float sPivotGrowth(int, SuperMatrix *, int *, - SuperMatrix *, SuperMatrix *); -extern void sgsrfs (trans_t, SuperMatrix *, SuperMatrix *, - SuperMatrix *, int *, int *, char *, float *, - float *, SuperMatrix *, SuperMatrix *, - float *, float *, SuperLUStat_t*, int *); - -extern int sp_strsv (char *, char *, char *, SuperMatrix *, - SuperMatrix *, float *, SuperLUStat_t*, int *); -extern int sp_sgemv (char *, float, SuperMatrix *, float *, - int, float, float *, int); - -extern int sp_sgemm (char *, char *, int, int, int, float, - SuperMatrix *, float *, int, float, - float *, int); - -/* Memory-related */ -extern int sLUMemInit (fact_t, void *, int, int, int, int, int, - SuperMatrix *, SuperMatrix *, - GlobalLU_t *, int **, float **); -extern void sSetRWork (int, int, float *, float **, float **); -extern void sLUWorkFree (int *, float *, GlobalLU_t *); -extern int sLUMemXpand (int, int, MemType, int *, GlobalLU_t *); - -extern float *floatMalloc(int); -extern float *floatCalloc(int); -extern int smemory_usage(const int, const int, const int, const int); -extern int sQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *); - -/* Auxiliary routines */ -extern void sreadhb(int *, int *, int *, float **, int **, int **); -extern void sCompRow_to_CompCol(int, int, int, float*, int*, int*, - float **, int **, int **); -extern void sfill (float *, int, float); -extern void sinf_norm_error (int, SuperMatrix *, float *); -extern void PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *, - float, float, float *, float *, char *); - -/* Routines for debugging */ -extern void sPrint_CompCol_Matrix(char *, SuperMatrix *); -extern void sPrint_SuperNode_Matrix(char *, SuperMatrix *); -extern void sPrint_Dense_Matrix(char *, SuperMatrix *); -extern void print_lu_col(char *, int, int, int *, GlobalLU_t *); -extern void check_tempv(int, float *); - -#ifdef __cplusplus - } -#endif - -#endif /* __SUPERLU_sSP_DEFS */ - Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/superlu_timer.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/superlu_timer.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/superlu_timer.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,11 +1,15 @@ -/* +/*! @file superlu_timer.c + * \brief Returns the time used + * + *
  * Purpose
  * ======= 
- *	Returns the time in seconds used by the process.
+ * 
+ * Returns the time in seconds used by the process.
  *
  * Note: the timer function call is machine dependent. Use conditional
  *       compilation to choose the appropriate function.
- *
+ * 
*/ @@ -15,11 +19,23 @@ * nanoseconds. */ #include - + double SuperLU_timer_() { return ( (double)gethrtime() / 1e9 ); } +#elif _WIN32 + +#include + +double SuperLU_timer_() +{ + clock_t t; + t=clock(); + + return ((double)t)/CLOCKS_PER_SEC; +} + #else #ifndef NO_TIMER @@ -32,13 +48,14 @@ #ifndef CLK_TCK #define CLK_TCK 60 #endif - +/*! \brief Timer function + */ double SuperLU_timer_() { #ifdef NO_TIMER - /* no sys/times.h on WIN32 */ - double tmp; - tmp = 0.0; + /* no sys/times.h on WIN32 */ + double tmp; + tmp = 0.0; #else struct tms use; double tmp; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/supermatrix.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/supermatrix.h 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/supermatrix.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,18 +1,24 @@ +/*! @file supermatrix.h + * \brief Defines matrix types + */ #ifndef __SUPERLU_SUPERMATRIX /* allow multiple inclusions */ #define __SUPERLU_SUPERMATRIX + /******************************************** * The matrix types are defined as follows. * ********************************************/ typedef enum { SLU_NC, /* column-wise, no supernode */ + SLU_NCP, /* column-wise, column-permuted, no supernode + (The consecutive columns of nonzeros, after permutation, + may not be stored contiguously.) */ SLU_NR, /* row-wize, no supernode */ SLU_SC, /* column-wise, supernode */ + SLU_SCP, /* supernode, column-wise, permuted */ SLU_SR, /* row-wise, supernode */ - SLU_NCP, /* column-wise, column-permuted, no supernode - (The consecutive columns of nonzeros, after permutation, - may not be stored contiguously.) */ - SLU_DN /* Fortran style column-wise storage for dense matrix */ + SLU_DN, /* Fortran style column-wise storage for dense matrix */ + SLU_NR_loc /* distributed compressed row format */ } Stype_t; typedef enum { @@ -49,10 +55,10 @@ * The storage schemes are defined as follows. * ***********************************************/ -/* Stype == NC (Also known as Harwell-Boeing sparse matrix format) */ +/* Stype == SLU_NC (Also known as Harwell-Boeing sparse matrix format) */ typedef struct { int_t nnz; /* number of nonzeros in the matrix */ - void *nzval; /* pointer to array of nonzero values, packed by column */ + void *nzval; /* pointer to array of nonzero values, packed by column */ int_t *rowind; /* pointer to array of row indices of the nonzeros */ int_t *colptr; /* pointer to array of beginning of columns in nzval[] and rowind[] */ @@ -62,21 +68,20 @@ beyond the last column, so that colptr[ncol] = nnz. */ } NCformat; -/* Stype == NR (Also known as row compressed storage (RCS). */ +/* Stype == SLU_NR */ typedef struct { - int_t nnz; /* number of nonzeros in the matrix */ - void *nzval; /* pointer to array of nonzero values, packed by row */ - int_t *colind; /* pointer to array of column indices of the nonzeros */ - int_t *rowptr; /* pointer to array of beginning of rows in nzval[] - and colind[] */ - /* Note: - Zero-based indexing is used; - nzval[] and colind[] are of the same length, nnz; - rowptr[] has nrow+1 entries, the last one pointing - beyond the last column, so that rowptr[nrow] = nnz. */ + int_t nnz; /* number of nonzeros in the matrix */ + void *nzval; /* pointer to array of nonzero values, packed by raw */ + int_t *colind; /* pointer to array of columns indices of the nonzeros */ + int_t *rowptr; /* pointer to array of beginning of rows in nzval[] + and colind[] */ + /* Note: + Zero-based indexing is used; + rowptr[] has nrow+1 entries, the last one pointing + beyond the last row, so that rowptr[nrow] = nnz. */ } NRformat; -/* Stype == SC */ +/* Stype == SLU_SC */ typedef struct { int_t nnz; /* number of nonzeros in the matrix */ int_t nsuper; /* number of supernodes, minus 1 */ @@ -85,9 +90,9 @@ int_t *rowind; /* pointer to array of compressed row indices of rectangular supernodes */ int_t *rowind_colptr;/* pointer to array of beginning of columns in rowind[] */ - int_t *col_to_sup; /* col_to_sup[j] is the supernode number to which column + int_t *col_to_sup; /* col_to_sup[j] is the supernode number to which column j belongs; mapping from column to supernode number. */ - int_t *sup_to_col; /* sup_to_col[s] points to the start of the s-th + int_t *sup_to_col; /* sup_to_col[s] points to the start of the s-th supernode; mapping from supernode number to column. e.g.: col_to_sup: 0 1 2 2 3 3 3 4 4 4 4 4 4 (ncol=12) sup_to_col: 0 1 2 4 7 12 (nsuper=4) */ @@ -101,8 +106,40 @@ entries are defined. */ } SCformat; -/* Stype == NCP */ +/* Stype == SLU_SCP */ typedef struct { + int_t nnz; /* number of nonzeros in the matrix */ + int_t nsuper; /* number of supernodes */ + void *nzval; /* pointer to array of nonzero values, packed by column */ + int_t *nzval_colbeg;/* nzval_colbeg[j] points to beginning of column j + in nzval[] */ + int_t *nzval_colend;/* nzval_colend[j] points to one past the last element + of column j in nzval[] */ + int_t *rowind; /* pointer to array of compressed row indices of + rectangular supernodes */ + int_t *rowind_colbeg;/* rowind_colbeg[j] points to beginning of column j + in rowind[] */ + int_t *rowind_colend;/* rowind_colend[j] points to one past the last element + of column j in rowind[] */ + int_t *col_to_sup; /* col_to_sup[j] is the supernode number to which column + j belongs; mapping from column to supernode. */ + int_t *sup_to_colbeg; /* sup_to_colbeg[s] points to the start of the s-th + supernode; mapping from supernode to column.*/ + int_t *sup_to_colend; /* sup_to_colend[s] points to one past the end of the + s-th supernode; mapping from supernode number to + column. + e.g.: col_to_sup: 0 1 2 2 3 3 3 4 4 4 4 4 4 (ncol=12) + sup_to_colbeg: 0 1 2 4 7 (nsuper=4) + sup_to_colend: 1 2 4 7 12 */ + /* Note: + Zero-based indexing is used; + nzval_colptr[], rowind_colptr[], col_to_sup and + sup_to_col[] have ncol+1 entries, the last one + pointing beyond the last column. */ +} SCPformat; + +/* Stype == SLU_NCP */ +typedef struct { int_t nnz; /* number of nonzeros in the matrix */ void *nzval; /* pointer to array of nonzero values, packed by column */ int_t *rowind;/* pointer to array of row indices of the nonzeros */ @@ -118,23 +155,26 @@ postmultiplied by a column permutation matrix. */ } NCPformat; -/* Stype == DN */ +/* Stype == SLU_DN */ typedef struct { int_t lda; /* leading dimension */ void *nzval; /* array of size lda*ncol to represent a dense matrix */ } DNformat; +/* Stype == SLU_NR_loc (Distributed Compressed Row Format) */ +typedef struct { + int_t nnz_loc; /* number of nonzeros in the local submatrix */ + int_t m_loc; /* number of rows local to this processor */ + int_t fst_row; /* global index of the first row */ + void *nzval; /* pointer to array of nonzero values, packed by row */ + int_t *rowptr; /* pointer to array of beginning of rows in nzval[] + and colind[] */ + int_t *colind; /* pointer to array of column indices of the nonzeros */ + /* Note: + Zero-based indexing is used; + rowptr[] has n_loc + 1 entries, the last one pointing + beyond the last row, so that rowptr[n_loc] = nnz_loc.*/ +} NRformat_loc; -/********************************************************* - * Macros used for easy access of sparse matrix entries. * - *********************************************************/ -#define L_SUB_START(col) ( Lstore->rowind_colptr[col] ) -#define L_SUB(ptr) ( Lstore->rowind[ptr] ) -#define L_NZ_START(col) ( Lstore->nzval_colptr[col] ) -#define L_FST_SUPC(superno) ( Lstore->sup_to_col[superno] ) -#define U_NZ_START(col) ( Ustore->colptr[col] ) -#define U_SUB(ptr) ( Ustore->rowind[ptr] ) - - #endif /* __SUPERLU_SUPERMATRIX */ Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sutil.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sutil.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/sutil.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,26 +1,29 @@ -/* - * -- SuperLU routine (version 3.0) -- +/*! @file sutil.c + * \brief Matrix utility functions + * + *
+ * -- SuperLU routine (version 3.1) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
- * October 15, 2003
+ * August 1, 2008
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ + #include -#include "ssp_defs.h" +#include "slu_sdefs.h" void sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, @@ -64,7 +67,7 @@ Astore->rowptr = rowptr; } -/* Copy matrix A into matrix B. */ +/*! \brief Copy matrix A into matrix B. */ void sCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B) { @@ -108,12 +111,7 @@ sCopy_Dense_Matrix(int M, int N, float *X, int ldx, float *Y, int ldy) { -/* - * - * Purpose - * ======= - * - * Copies a two-dimensional matrix X to another matrix Y. +/*! \brief Copies a two-dimensional matrix X to another matrix Y. */ int i, j; @@ -150,8 +148,7 @@ } -/* - * Convert a row compressed storage into a column compressed storage. +/*! \brief Convert a row compressed storage into a column compressed storage. */ void sCompRow_to_CompCol(int m, int n, int nnz, @@ -266,23 +263,24 @@ void sPrint_Dense_Matrix(char *what, SuperMatrix *A) { - DNformat *Astore; - register int i; + DNformat *Astore = (DNformat *) A->Store; + register int i, j, lda = Astore->lda; float *dp; printf("\nDense matrix %s:\n", what); printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); - Astore = (DNformat *) A->Store; dp = (float *) Astore->nzval; - printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda); + printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda); printf("\nnzval: "); - for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i]); + for (j = 0; j < A->ncol; ++j) { + for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i + j*lda]); + printf("\n"); + } printf("\n"); fflush(stdout); } -/* - * Diagnostic print of column "jcol" in the U/L factor. +/*! \brief Diagnostic print of column "jcol" in the U/L factor. */ void sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu) @@ -324,9 +322,7 @@ } -/* - * Check whether tempv[] == 0. This should be true before and after - * calling any numeric routines, i.e., "panel_bmod" and "column_bmod". +/*! \brief Check whether tempv[] == 0. This should be true before and after calling any numeric routines, i.e., "panel_bmod" and "column_bmod". */ void scheck_tempv(int n, float *tempv) { @@ -352,8 +348,7 @@ } } -/* - * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's +/*! \brief Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's */ void sFillRHS(trans_t trans, int nrhs, float *x, int ldx, @@ -382,8 +377,7 @@ } -/* - * Fills a float precision array with a given value. +/*! \brief Fills a float precision array with a given value. */ void sfill(float *a, int alen, float dval) @@ -394,8 +388,7 @@ -/* - * Check the inf-norm of the error vector +/*! \brief Check the inf-norm of the error vector */ void sinf_norm_error(int nrhs, SuperMatrix *X, float *xtrue) { @@ -421,7 +414,7 @@ -/* Print performance of the code. */ +/*! \brief Print performance of the code. */ void sPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage, float rpg, float rcond, float *ferr, @@ -449,9 +442,9 @@ printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz); printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz); - printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", - mem_usage->for_lu/1e6, mem_usage->total_needed/1e6, - mem_usage->expansions); + printf("L\\U MB %.3f\ttotal MB needed %.3f\n", + mem_usage->for_lu/1e6, mem_usage->total_needed/1e6); + printf("Number of memory expansions: %d\n", stat->expansions); printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n"); printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n", Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/util.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/util.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/util.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,39 +1,39 @@ -/* +/*! @file util.c + * \brief Utility functions + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ + #include -#include "dsp_defs.h" -#include "util.h" +#include "slu_ddefs.h" -/* - * Global statistics variale +/*! \brief Global statistics variale */ void superlu_abort_and_exit(char* msg) { - fprintf(stderr, "%s\n", msg); + fprintf(stderr, msg); exit (-1); } -/* - * Set the default values for the options argument. +/*! \brief Set the default values for the options argument. */ void set_default_options(superlu_options_t *options) { @@ -49,7 +49,57 @@ options->PrintStat = YES; } -/* Deallocate the structure pointing to the actual storage of the matrix. */ +/*! \brief Set the default values for the options argument for ILU. + */ +void ilu_set_default_options(superlu_options_t *options) +{ + set_default_options(options); + + /* further options for incomplete factorization */ + options->DiagPivotThresh = 0.1; + options->RowPerm = LargeDiag; + options->DiagPivotThresh = 0.1; + options->ILU_FillFactor = 10.0; + options->ILU_DropTol = 1e-4; + options->ILU_DropRule = DROP_BASIC | DROP_AREA; + options->ILU_Norm = INF_NORM; + options->ILU_MILU = SMILU_2; /* SILU */ + options->ILU_FillTol = 1e-2; +} + +/*! \brief Print the options setting. + */ +void print_options(superlu_options_t *options) +{ + printf(".. options:\n"); + printf("\tFact\t %8d\n", options->Fact); + printf("\tEquil\t %8d\n", options->Equil); + printf("\tColPerm\t %8d\n", options->ColPerm); + printf("\tDiagPivotThresh %8.4f\n", options->DiagPivotThresh); + printf("\tTrans\t %8d\n", options->Trans); + printf("\tIterRefine\t%4d\n", options->IterRefine); + printf("\tSymmetricMode\t%4d\n", options->SymmetricMode); + printf("\tPivotGrowth\t%4d\n", options->PivotGrowth); + printf("\tConditionNumber\t%4d\n", options->ConditionNumber); + printf("..\n"); +} + +/*! \brief Print the options setting. + */ +void print_ilu_options(superlu_options_t *options) +{ + printf(".. ILU options:\n"); + printf("\tDiagPivotThresh\t%6.2e\n", options->DiagPivotThresh); + printf("\ttau\t%6.2e\n", options->ILU_DropTol); + printf("\tgamma\t%6.2f\n", options->ILU_FillFactor); + printf("\tDropRule\t%0x\n", options->ILU_DropRule); + printf("\tMILU\t%d\n", options->ILU_MILU); + printf("\tMILU_ALPHA\t%6.2e\n", MILU_ALPHA); + printf("\tDiagFillTol\t%6.2e\n", options->ILU_FillTol); + printf("..\n"); +} + +/*! \brief Deallocate the structure pointing to the actual storage of the matrix. */ void Destroy_SuperMatrix_Store(SuperMatrix *A) { @@ -86,7 +136,7 @@ SUPERLU_FREE ( A->Store ); } -/* A is of type Stype==NCP */ +/*! \brief A is of type Stype==NCP */ void Destroy_CompCol_Permuted(SuperMatrix *A) { @@ -95,7 +145,7 @@ SUPERLU_FREE ( A->Store ); } -/* A is of type Stype==DN */ +/*! \brief A is of type Stype==DN */ void Destroy_Dense_Matrix(SuperMatrix *A) { @@ -104,8 +154,7 @@ SUPERLU_FREE ( A->Store ); } -/* - * Reset repfnz[] for the current column +/*! \brief Reset repfnz[] for the current column */ void resetrep_col (const int nseg, const int *segrep, int *repfnz) @@ -119,9 +168,7 @@ } -/* - * Count the total number of nonzeros in factors L and U, and in the - * symmetrically reduced L. +/*! \brief Count the total number of nonzeros in factors L and U, and in the symmetrically reduced L. */ void countnz(const int n, int *xprune, int *nnzL, int *nnzU, GlobalLU_t *Glu) @@ -158,12 +205,41 @@ /* printf("\tNo of nonzeros in symm-reduced L = %d\n", nnzL0);*/ } +/*! \brief Count the total number of nonzeros in factors L and U. + */ +void +ilu_countnz(const int n, int *nnzL, int *nnzU, GlobalLU_t *Glu) +{ + int nsuper, fsupc, i, j; + int jlen, irep; + int *xsup, *xlsub; + xsup = Glu->xsup; + xlsub = Glu->xlsub; + *nnzL = 0; + *nnzU = (Glu->xusub)[n]; + nsuper = (Glu->supno)[n]; -/* - * Fix up the data storage lsub for L-subscripts. It removes the subscript - * sets for structural pruning, and applies permuation to the remaining - * subscripts. + if ( n <= 0 ) return; + + /* + * For each supernode + */ + for (i = 0; i <= nsuper; i++) { + fsupc = xsup[i]; + jlen = xlsub[fsupc+1] - xlsub[fsupc]; + + for (j = fsupc; j < xsup[i+1]; j++) { + *nnzL += jlen; + *nnzU += j - fsupc + 1; + jlen--; + } + irep = xsup[i+1] - 1; + } +} + + +/*! \brief Fix up the data storage lsub for L-subscripts. It removes the subscript sets for structural pruning, and applies permuation to the remaining subscripts. */ void fixupL(const int n, const int *perm_r, GlobalLU_t *Glu) @@ -199,8 +275,7 @@ } -/* - * Diagnostic print of segment info after panel_dfs(). +/*! \brief Diagnostic print of segment info after panel_dfs(). */ void print_panel_seg(int n, int w, int jcol, int nseg, int *segrep, int *repfnz) @@ -234,6 +309,9 @@ stat->utime[i] = 0.; stat->ops[i] = 0.; } + stat->TinyPivots = 0; + stat->RefineSteps = 0; + stat->expansions = 0; } @@ -255,6 +333,8 @@ printf("Solve flops = %e\tMflops = %8.2f\n", ops[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE]); + printf("Number of memory expansions: %d\n", stat->expansions); + } @@ -283,8 +363,7 @@ -/* - * Fills an integer array with a given value. +/*! \brief Fills an integer array with a given value. */ void ifill(int *a, int alen, int ival) { @@ -294,8 +373,7 @@ -/* - * Get the statistics of the supernodes +/*! \brief Get the statistics of the supernodes */ #define NBUCKS 10 static int max_sup_size; @@ -350,8 +428,7 @@ -/* - * Check whether repfnz[] == EMPTY after reset. +/*! \brief Check whether repfnz[] == EMPTY after reset. */ void check_repfnz(int n, int w, int jcol, int *repfnz) { @@ -367,7 +444,7 @@ } -/* Print a summary of the testing results. */ +/*! \brief Print a summary of the testing results. */ void PrintSumm(char *type, int nfail, int nrun, int nerrs) { @@ -389,3 +466,19 @@ for (i = 0; i < n; ++i) printf("%d\t%d\n", i, vec[i]); return 0; } + +int slu_PrintInt10(char *name, int len, int *x) +{ + register i; + + printf("%10s:", name); + for (i = 0; i < len; ++i) + { + if ( i % 10 == 0 ) printf("\n\t[%2d-%2d]", i, i + 9); + printf("%6d", x[i]); + } + printf("\n"); + return 0; +} + + Deleted: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/util.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/util.h 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/util.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,272 +0,0 @@ -#ifndef __SUPERLU_UTIL /* allow multiple inclusions */ -#define __SUPERLU_UTIL - -#include -#include -#include -#ifndef __STDC__ -#include -#endif -#include - -/*********************************************************************** - * Macros - ***********************************************************************/ -#define FIRSTCOL_OF_SNODE(i) (xsup[i]) -/* No of marker arrays used in the symbolic factorization, - each of size n */ -#define NO_MARKER 3 -#define NUM_TEMPV(m,w,t,b) ( SUPERLU_MAX(m, (t + b)*w) ) - -#ifndef USER_ABORT -#define USER_ABORT(msg) superlu_python_module_abort(msg) -#endif - -#define ABORT(err_msg) \ - { char msg[256];\ - sprintf(msg,"%s at line %d in file %s\n",err_msg,__LINE__, __FILE__);\ - USER_ABORT(msg); } - - -#ifndef USER_MALLOC -#if 1 -#define USER_MALLOC(size) superlu_python_module_malloc(size) -#else -/* The following may check out some uninitialized data */ -#define USER_MALLOC(size) memset (superlu_malloc(size), '\x0F', size) -#endif -#endif - -#define SUPERLU_MALLOC(size) USER_MALLOC(size) - -#ifndef USER_FREE -#define USER_FREE(addr) superlu_python_module_free(addr) -#endif - -#define SUPERLU_FREE(addr) USER_FREE(addr) - -#define CHECK_MALLOC(where) { \ - extern int superlu_malloc_total; \ - printf("%s: malloc_total %d Bytes\n", \ - where, superlu_malloc_total); \ -} - -#define SUPERLU_MAX(x, y) ( (x) > (y) ? (x) : (y) ) -#define SUPERLU_MIN(x, y) ( (x) < (y) ? (x) : (y) ) - -/*********************************************************************** - * Constants - ***********************************************************************/ -#define EMPTY (-1) -/*#define NO (-1)*/ -#define FALSE 0 -#define TRUE 1 - -/*********************************************************************** - * Enumerate types - ***********************************************************************/ -typedef enum {NO, YES} yes_no_t; -typedef enum {DOFACT, SamePattern, SamePattern_SameRowPerm, FACTORED} fact_t; -typedef enum {NOROWPERM, LargeDiag, MY_PERMR} rowperm_t; -typedef enum {NATURAL, MMD_ATA, MMD_AT_PLUS_A, COLAMD, MY_PERMC}colperm_t; -typedef enum {NOTRANS, TRANS, CONJ} trans_t; -typedef enum {NOEQUIL, ROW, COL, BOTH} DiagScale_t; -typedef enum {NOREFINE, SINGLE=1, DOUBLE, EXTRA} IterRefine_t; -typedef enum {LUSUP, UCOL, LSUB, USUB} MemType; -typedef enum {HEAD, TAIL} stack_end_t; -typedef enum {SYSTEM, USER} LU_space_t; - -/* - * The following enumerate type is used by the statistics variable - * to keep track of flop count and time spent at various stages. - * - * Note that not all of the fields are disjoint. - */ -typedef enum { - COLPERM, /* find a column ordering that minimizes fills */ - RELAX, /* find artificial supernodes */ - ETREE, /* compute column etree */ - EQUIL, /* equilibrate the original matrix */ - FACT, /* perform LU factorization */ - RCOND, /* estimate reciprocal condition number */ - SOLVE, /* forward and back solves */ - REFINE, /* perform iterative refinement */ - FLOAT, /* time spent in floating-point operations */ - TRSV, /* fraction of FACT spent in xTRSV */ - GEMV, /* fraction of FACT spent in xGEMV */ - FERR, /* estimate error bounds after iterative refinement */ - NPHASES /* total number of phases */ -} PhaseType; - - -/*********************************************************************** - * Type definitions - ***********************************************************************/ -typedef float flops_t; -typedef unsigned char Logical; - -/* - *-- This contains the options used to control the solve process. - * - * Fact (fact_t) - * Specifies whether or not the factored form of the matrix - * A is supplied on entry, and if not, how the matrix A should - * be factorizaed. - * = DOFACT: The matrix A will be factorized from scratch, and the - * factors will be stored in L and U. - * = SamePattern: The matrix A will be factorized assuming - * that a factorization of a matrix with the same sparsity - * pattern was performed prior to this one. Therefore, this - * factorization will reuse column permutation vector - * ScalePermstruct->perm_c and the column elimination tree - * LUstruct->etree. - * = SamePattern_SameRowPerm: The matrix A will be factorized - * assuming that a factorization of a matrix with the same - * sparsity pattern and similar numerical values was performed - * prior to this one. Therefore, this factorization will reuse - * both row and column scaling factors R and C, and the - * both row and column permutation vectors perm_r and perm_c, - * distributed data structure set up from the previous symbolic - * factorization. - * = FACTORED: On entry, L, U, perm_r and perm_c contain the - * factored form of A. If DiagScale is not NOEQUIL, the matrix - * A has been equilibrated with scaling factors R and C. - * - * Equil (yes_no_t) - * Specifies whether to equilibrate the system (scale A's row and - * columns to have unit norm). - * - * ColPerm (colperm_t) - * Specifies what type of column permutation to use to reduce fill. - * = NATURAL: use the natural ordering - * = MMD_ATA: use minimum degree ordering on structure of A'*A - * = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A - * = COLAMD: use approximate minimum degree column ordering - * = MY_PERMC: use the ordering specified in ScalePermstruct->perm_c[] - * - * Trans (trans_t) - * Specifies the form of the system of equations: - * = NOTRANS: A * X = B (No transpose) - * = TRANS: A**T * X = B (Transpose) - * = CONJ: A**H * X = B (Transpose) - * - * IterRefine (IterRefine_t) - * Specifies whether to perform iterative refinement. - * = NO: no iterative refinement - * = WorkingPrec: perform iterative refinement in working precision - * = ExtraPrec: perform iterative refinement in extra precision - * - * PrintStat (yes_no_t) - * Specifies whether to print the solver's statistics. - * - * DiagPivotThresh (double, in [0.0, 1.0]) (only for sequential SuperLU) - * Specifies the threshold used for a diagonal entry to be an - * acceptable pivot. - * - * PivotGrowth (yes_no_t) - * Specifies whether to compute the reciprocal pivot growth. - * - * ConditionNumber (ues_no_t) - * Specifies whether to compute the reciprocal condition number. - * - * RowPerm (rowperm_t) (only for SuperLU_DIST) - * Specifies whether to permute rows of the original matrix. - * = NO: not to permute the rows - * = LargeDiag: make the diagonal large relative to the off-diagonal - * = MY_PERMR: use the permutation given in ScalePermstruct->perm_r[] - * - * ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST) - * Specifies whether to replace the tiny diagonals by - * sqrt(epsilon)*||A|| during LU factorization. - * - * SolveInitialized (yes_no_t) (only for SuperLU_DIST) - * Specifies whether the initialization has been performed to the - * triangular solve. - * - * RefineInitialized (yes_no_t) (only for SuperLU_DIST) - * Specifies whether the initialization has been performed to the - * sparse matrix-vector multiplication routine needed in iterative - * refinement. - */ -typedef struct { - fact_t Fact; - yes_no_t Equil; - colperm_t ColPerm; - trans_t Trans; - IterRefine_t IterRefine; - yes_no_t PrintStat; - yes_no_t SymmetricMode; - double DiagPivotThresh; - yes_no_t PivotGrowth; - yes_no_t ConditionNumber; - rowperm_t RowPerm; - yes_no_t ReplaceTinyPivot; - yes_no_t SolveInitialized; - yes_no_t RefineInitialized; -} superlu_options_t; - -typedef struct { - int *panel_histo; /* histogram of panel size distribution */ - double *utime; /* running time at various phases */ - flops_t *ops; /* operation count at various phases */ - int TinyPivots; /* number of tiny pivots */ - int RefineSteps; /* number of iterative refinement steps */ -} SuperLUStat_t; - - -/*********************************************************************** - * Prototypes - ***********************************************************************/ -#ifdef __cplusplus -extern "C" { -#endif - - /* Added for SciPy */ -extern void superlu_python_module_abort(char *); -extern void *superlu_python_module_malloc (size_t); -extern void superlu_python_module_free (void *); - /* Added for SciPy */ - -extern void Destroy_SuperMatrix_Store(SuperMatrix *); -extern void Destroy_CompCol_Matrix(SuperMatrix *); -extern void Destroy_CompRow_Matrix(SuperMatrix *); -extern void Destroy_SuperNode_Matrix(SuperMatrix *); -extern void Destroy_CompCol_Permuted(SuperMatrix *); -extern void Destroy_Dense_Matrix(SuperMatrix *); -extern void get_perm_c(int, SuperMatrix *, int *); -extern void set_default_options(superlu_options_t *options); -extern void sp_preorder (superlu_options_t *, SuperMatrix*, int*, int*, - SuperMatrix*); -/* extern void superlu_abort_and_exit(char*); -extern void *superlu_malloc (size_t); */ -extern int *intMalloc (int); -extern int *intCalloc (int); -/* extern void superlu_free (void*); */ -extern void SetIWork (int, int, int, int *, int **, int **, int **, - int **, int **, int **, int **); -extern int sp_coletree (int *, int *, int *, int, int, int *); -extern void relax_snode (const int, int *, const int, int *, int *); -extern void heap_relax_snode (const int, int *, const int, int *, int *); -extern void resetrep_col (const int, const int *, int *); -extern int spcoletree (int *, int *, int *, int, int, int *); -extern int *TreePostorder (int, int *); -extern double SuperLU_timer_ (void); -extern int sp_ienv (int); -extern int lsame_ (char *, char *); -extern int xerbla_ (char *, int *); -extern void ifill (int *, int, int); -extern void snode_profile (int, int *); -extern void super_stats (int, int *); -extern void PrintSumm (char *, int, int, int); -extern void StatInit(SuperLUStat_t *); -extern void StatPrint (SuperLUStat_t *); -extern void StatFree(SuperLUStat_t *); -extern void print_panel_seg(int, int, int, int, int *, int *); -extern void check_repfnz(int, int, int, int *); - - -#ifdef __cplusplus - } -#endif - -#endif /* __SUPERLU_UTIL */ Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/xerbla.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/xerbla.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/xerbla.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,3 +1,6 @@ +#include +#include "slu_Cnames.h" + /* Subroutine */ int xerbla_(char *srname, int *info) { /* -- LAPACK auxiliary routine (version 2.0) -- Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zcolumn_bmod.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zcolumn_bmod.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zcolumn_bmod.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,27 +1,29 @@ -/* +/*! @file zcolumn_bmod.c + * \brief performs numeric block updates + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
- */
-/*
-  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ *  Permission is hereby granted to use or copy this program for any
+ *  purpose, provided the above notices are retained on all copies.
+ *  Permission to modify the code and to distribute modified code is
+ *  granted, provided the above notices are retained, and a notice that
+ *  the code was modified is included with the above copyright notice.
+ * 
*/ #include #include -#include "zsp_defs.h" +#include "slu_zdefs.h" /* * Function prototypes @@ -32,8 +34,17 @@ -/* Return value: 0 - successful return +/*! \brief + * + *
+ * Purpose:
+ * ========
+ * Performs numeric block updates (sup-col) in topological order.
+ * It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
+ * Special processing on the supernodal portion of L\U[*,j]
+ * Return value:   0 - successful return
  *               > 0 - number of bytes allocated when run out of space
+ * 
*/ int zcolumn_bmod ( @@ -48,14 +59,7 @@ SuperLUStat_t *stat /* output */ ) { -/* - * Purpose: - * ======== - * Performs numeric block updates (sup-col) in topological order. - * It features: col-col, 2cols-col, 3cols-col, and sup-col updates. - * Special processing on the supernodal portion of L\U[*,j] - * - */ + #ifdef _CRAY _fcd ftcs1 = _cptofcd("L", strlen("L")), ftcs2 = _cptofcd("N", strlen("N")), Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zcolumn_dfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zcolumn_dfs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zcolumn_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,50 +1,38 @@ - -/* +/*! @file zcolumn_dfs.c + * \brief Performs a symbolic factorization + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
- */
-/*
-  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -#include "zsp_defs.h" +#include "slu_zdefs.h" -/* What type of supernodes we want */ +/*! \brief What type of supernodes we want */ #define T2_SUPER -int -zcolumn_dfs( - const int m, /* in - number of rows in the matrix */ - const int jcol, /* in */ - int *perm_r, /* in */ - int *nseg, /* modified - with new segments appended */ - int *lsub_col, /* in - defines the RHS vector to start the dfs */ - int *segrep, /* modified - with new segments appended */ - int *repfnz, /* modified */ - int *xprune, /* modified */ - int *marker, /* modified */ - int *parent, /* working array */ - int *xplore, /* working array */ - GlobalLU_t *Glu /* modified */ - ) -{ -/* + +/*! \brief + * + *
  * Purpose
  * =======
- *   "column_dfs" performs a symbolic factorization on column jcol, and
+ *   ZCOLUMN_DFS performs a symbolic factorization on column jcol, and
  *   decide the supernode boundary.
  *
  *   This routine does not use numeric values, but only use the RHS 
@@ -72,8 +60,25 @@
  * ============
  *     0  success;
  *   > 0  number of bytes allocated when run out of space.
- *
+ * 
*/ +int +zcolumn_dfs( + const int m, /* in - number of rows in the matrix */ + const int jcol, /* in */ + int *perm_r, /* in */ + int *nseg, /* modified - with new segments appended */ + int *lsub_col, /* in - defines the RHS vector to start the dfs */ + int *segrep, /* modified - with new segments appended */ + int *repfnz, /* modified */ + int *xprune, /* modified */ + int *marker, /* modified */ + int *parent, /* working array */ + int *xplore, /* working array */ + GlobalLU_t *Glu /* modified */ + ) +{ + int jcolp1, jcolm1, jsuper, nsuper, nextl; int k, krep, krow, kmark, kperm; int *marker2; /* Used for small panel LU */ Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zcopy_to_ucol.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zcopy_to_ucol.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zcopy_to_ucol.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,27 +1,26 @@ - -/* +/*! @file zcopy_to_ucol.c + * \brief Copy a computed column of U to the compressed data structure + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
  *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "zsp_defs.h" -#include "util.h" +#include "slu_zdefs.h" int zcopy_to_ucol( @@ -47,7 +46,6 @@ doublecomplex *ucol; int *usub, *xusub; int nzumax; - doublecomplex zero = {0.0, 0.0}; xsup = Glu->xsup; Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zdiagonal.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zdiagonal.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zdiagonal.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,133 @@ + +/*! @file zdiagonal.c + * \brief Auxiliary routines to work with diagonal elements + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory
+ * June 30, 2009
+ * 
+ */ + +#include "slu_zdefs.h" + +int zfill_diag(int n, NCformat *Astore) +/* fill explicit zeros on the diagonal entries, so that the matrix is not + structurally singular. */ +{ + doublecomplex *nzval = (doublecomplex *)Astore->nzval; + int *rowind = Astore->rowind; + int *colptr = Astore->colptr; + int nnz = colptr[n]; + int fill = 0; + doublecomplex *nzval_new; + doublecomplex zero = {1.0, 0.0}; + int *rowind_new; + int i, j, diag; + + for (i = 0; i < n; i++) + { + diag = -1; + for (j = colptr[i]; j < colptr[i + 1]; j++) + if (rowind[j] == i) diag = j; + if (diag < 0) fill++; + } + if (fill) + { + nzval_new = doublecomplexMalloc(nnz + fill); + rowind_new = intMalloc(nnz + fill); + fill = 0; + for (i = 0; i < n; i++) + { + diag = -1; + for (j = colptr[i] - fill; j < colptr[i + 1]; j++) + { + if ((rowind_new[j + fill] = rowind[j]) == i) diag = j; + nzval_new[j + fill] = nzval[j]; + } + if (diag < 0) + { + rowind_new[colptr[i + 1] + fill] = i; + nzval_new[colptr[i + 1] + fill] = zero; + fill++; + } + colptr[i + 1] += fill; + } + Astore->nzval = nzval_new; + Astore->rowind = rowind_new; + SUPERLU_FREE(nzval); + SUPERLU_FREE(rowind); + } + Astore->nnz += fill; + return fill; +} + +int zdominate(int n, NCformat *Astore) +/* make the matrix diagonally dominant */ +{ + doublecomplex *nzval = (doublecomplex *)Astore->nzval; + int *rowind = Astore->rowind; + int *colptr = Astore->colptr; + int nnz = colptr[n]; + int fill = 0; + doublecomplex *nzval_new; + int *rowind_new; + int i, j, diag; + double s; + + for (i = 0; i < n; i++) + { + diag = -1; + for (j = colptr[i]; j < colptr[i + 1]; j++) + if (rowind[j] == i) diag = j; + if (diag < 0) fill++; + } + if (fill) + { + nzval_new = doublecomplexMalloc(nnz + fill); + rowind_new = intMalloc(nnz+ fill); + fill = 0; + for (i = 0; i < n; i++) + { + s = 1e-6; + diag = -1; + for (j = colptr[i] - fill; j < colptr[i + 1]; j++) + { + if ((rowind_new[j + fill] = rowind[j]) == i) diag = j; + nzval_new[j + fill] = nzval[j]; + s += z_abs1(&nzval_new[j + fill]); + } + if (diag >= 0) { + nzval_new[diag+fill].r = s * 3.0; + nzval_new[diag+fill].i = 0.0; + } else { + rowind_new[colptr[i + 1] + fill] = i; + nzval_new[colptr[i + 1] + fill].r = s * 3.0; + nzval_new[colptr[i + 1] + fill].i = 0.0; + fill++; + } + colptr[i + 1] += fill; + } + Astore->nzval = nzval_new; + Astore->rowind = rowind_new; + SUPERLU_FREE(nzval); + SUPERLU_FREE(rowind); + } + else + { + for (i = 0; i < n; i++) + { + s = 1e-6; + diag = -1; + for (j = colptr[i]; j < colptr[i + 1]; j++) + { + if (rowind[j] == i) diag = j; + s += z_abs1(&nzval[j]); + } + nzval[diag].r = s * 3.0; + nzval[diag].i = 0.0; + } + } + Astore->nnz += fill; + return fill; +} Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgscon.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgscon.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgscon.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,70 +1,81 @@ -/* +/*! @file zgscon.c + * \brief Estimates reciprocal of the condition number of a general matrix + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Modified from lapack routines ZGECON.
+ * 
*/ + /* * File name: zgscon.c * History: Modified from lapack routines ZGECON. */ #include -#include "zsp_defs.h" +#include "slu_zdefs.h" +/*! \brief + * + *
+ *   Purpose   
+ *   =======   
+ *
+ *   ZGSCON estimates the reciprocal of the condition number of a general 
+ *   real matrix A, in either the 1-norm or the infinity-norm, using   
+ *   the LU factorization computed by ZGETRF.   *
+ *
+ *   An estimate is obtained for norm(inv(A)), and the reciprocal of the   
+ *   condition number is computed as   
+ *      RCOND = 1 / ( norm(A) * norm(inv(A)) ).   
+ *
+ *   See supermatrix.h for the definition of 'SuperMatrix' structure.
+ * 
+ *   Arguments   
+ *   =========   
+ *
+ *    NORM    (input) char*
+ *            Specifies whether the 1-norm condition number or the   
+ *            infinity-norm condition number is required:   
+ *            = '1' or 'O':  1-norm;   
+ *            = 'I':         Infinity-norm.
+ *	    
+ *    L       (input) SuperMatrix*
+ *            The factor L from the factorization Pr*A*Pc=L*U as computed by
+ *            zgstrf(). Use compressed row subscripts storage for supernodes,
+ *            i.e., L has types: Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
+ * 
+ *    U       (input) SuperMatrix*
+ *            The factor U from the factorization Pr*A*Pc=L*U as computed by
+ *            zgstrf(). Use column-wise storage scheme, i.e., U has types:
+ *            Stype = SLU_NC, Dtype = SLU_Z, Mtype = SLU_TRU.
+ *	    
+ *    ANORM   (input) double
+ *            If NORM = '1' or 'O', the 1-norm of the original matrix A.   
+ *            If NORM = 'I', the infinity-norm of the original matrix A.
+ *	    
+ *    RCOND   (output) double*
+ *           The reciprocal of the condition number of the matrix A,   
+ *           computed as RCOND = 1/(norm(A) * norm(inv(A))).
+ *	    
+ *    INFO    (output) int*
+ *           = 0:  successful exit   
+ *           < 0:  if INFO = -i, the i-th argument had an illegal value   
+ *
+ *    ===================================================================== 
+ * 
+ */ + void zgscon(char *norm, SuperMatrix *L, SuperMatrix *U, double anorm, double *rcond, SuperLUStat_t *stat, int *info) { -/* - Purpose - ======= - ZGSCON estimates the reciprocal of the condition number of a general - real matrix A, in either the 1-norm or the infinity-norm, using - the LU factorization computed by ZGETRF. - An estimate is obtained for norm(inv(A)), and the reciprocal of the - condition number is computed as - RCOND = 1 / ( norm(A) * norm(inv(A)) ). - - See supermatrix.h for the definition of 'SuperMatrix' structure. - - Arguments - ========= - - NORM (input) char* - Specifies whether the 1-norm condition number or the - infinity-norm condition number is required: - = '1' or 'O': 1-norm; - = 'I': Infinity-norm. - - L (input) SuperMatrix* - The factor L from the factorization Pr*A*Pc=L*U as computed by - zgstrf(). Use compressed row subscripts storage for supernodes, - i.e., L has types: Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU. - - U (input) SuperMatrix* - The factor U from the factorization Pr*A*Pc=L*U as computed by - zgstrf(). Use column-wise storage scheme, i.e., U has types: - Stype = SLU_NC, Dtype = SLU_Z, Mtype = TRU. - - ANORM (input) double - If NORM = '1' or 'O', the 1-norm of the original matrix A. - If NORM = 'I', the infinity-norm of the original matrix A. - - RCOND (output) double* - The reciprocal of the condition number of the matrix A, - computed as RCOND = 1/(norm(A) * norm(inv(A))). - - INFO (output) int* - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== -*/ - /* Local variables */ int kase, kase1, onenrm, i; double ainvnm; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsequ.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsequ.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsequ.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,82 +1,91 @@ - -/* +/*! @file zgsequ.c + * \brief Computes row and column scalings + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Modified from LAPACK routine ZGEEQU
+ * 
*/ /* * File name: zgsequ.c * History: Modified from LAPACK routine ZGEEQU */ #include -#include "zsp_defs.h" -#include "util.h" +#include "slu_zdefs.h" + + +/*! \brief + * + *
+ * Purpose   
+ *   =======   
+ *
+ *   ZGSEQU computes row and column scalings intended to equilibrate an   
+ *   M-by-N sparse matrix A and reduce its condition number. R returns the row
+ *   scale factors and C the column scale factors, chosen to try to make   
+ *   the largest element in each row and column of the matrix B with   
+ *   elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.   
+ *
+ *   R(i) and C(j) are restricted to be between SMLNUM = smallest safe   
+ *   number and BIGNUM = largest safe number.  Use of these scaling   
+ *   factors is not guaranteed to reduce the condition number of A but   
+ *   works well in practice.   
+ *
+ *   See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ *   Arguments   
+ *   =========   
+ *
+ *   A       (input) SuperMatrix*
+ *           The matrix of dimension (A->nrow, A->ncol) whose equilibration
+ *           factors are to be computed. The type of A can be:
+ *           Stype = SLU_NC; Dtype = SLU_Z; Mtype = SLU_GE.
+ *	    
+ *   R       (output) double*, size A->nrow
+ *           If INFO = 0 or INFO > M, R contains the row scale factors   
+ *           for A.
+ *	    
+ *   C       (output) double*, size A->ncol
+ *           If INFO = 0,  C contains the column scale factors for A.
+ *	    
+ *   ROWCND  (output) double*
+ *           If INFO = 0 or INFO > M, ROWCND contains the ratio of the   
+ *           smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and   
+ *           AMAX is neither too large nor too small, it is not worth   
+ *           scaling by R.
+ *	    
+ *   COLCND  (output) double*
+ *           If INFO = 0, COLCND contains the ratio of the smallest   
+ *           C(i) to the largest C(i).  If COLCND >= 0.1, it is not   
+ *           worth scaling by C.
+ *	    
+ *   AMAX    (output) double*
+ *           Absolute value of largest matrix element.  If AMAX is very   
+ *           close to overflow or very close to underflow, the matrix   
+ *           should be scaled.
+ *	    
+ *   INFO    (output) int*
+ *           = 0:  successful exit   
+ *           < 0:  if INFO = -i, the i-th argument had an illegal value   
+ *           > 0:  if INFO = i,  and i is   
+ *                 <= A->nrow:  the i-th row of A is exactly zero   
+ *                 >  A->ncol:  the (i-M)-th column of A is exactly zero   
+ *
+ *   ===================================================================== 
+ * 
+ */ void zgsequ(SuperMatrix *A, double *r, double *c, double *rowcnd, double *colcnd, double *amax, int *info) { -/* - Purpose - ======= - ZGSEQU computes row and column scalings intended to equilibrate an - M-by-N sparse matrix A and reduce its condition number. R returns the row - scale factors and C the column scale factors, chosen to try to make - the largest element in each row and column of the matrix B with - elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. - R(i) and C(j) are restricted to be between SMLNUM = smallest safe - number and BIGNUM = largest safe number. Use of these scaling - factors is not guaranteed to reduce the condition number of A but - works well in practice. - - See supermatrix.h for the definition of 'SuperMatrix' structure. - - Arguments - ========= - - A (input) SuperMatrix* - The matrix of dimension (A->nrow, A->ncol) whose equilibration - factors are to be computed. The type of A can be: - Stype = SLU_NC; Dtype = SLU_Z; Mtype = SLU_GE. - - R (output) double*, size A->nrow - If INFO = 0 or INFO > M, R contains the row scale factors - for A. - - C (output) double*, size A->ncol - If INFO = 0, C contains the column scale factors for A. - - ROWCND (output) double* - If INFO = 0 or INFO > M, ROWCND contains the ratio of the - smallest R(i) to the largest R(i). If ROWCND >= 0.1 and - AMAX is neither too large nor too small, it is not worth - scaling by R. - - COLCND (output) double* - If INFO = 0, COLCND contains the ratio of the smallest - C(i) to the largest C(i). If COLCND >= 0.1, it is not - worth scaling by C. - - AMAX (output) double* - Absolute value of largest matrix element. If AMAX is very - close to overflow or very close to underflow, the matrix - should be scaled. - - INFO (output) int* - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, and i is - <= A->nrow: the i-th row of A is exactly zero - > A->ncol: the (i-M)-th column of A is exactly zero - - ===================================================================== -*/ - /* Local variables */ NCformat *Astore; doublecomplex *Aval; Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsisx.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsisx.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsisx.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,693 @@ + +/*! @file zgsisx.c + * \brief Gives the approximate solutions of linear equations A*X=B or A'*X=B + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
+ */ +#include "slu_zdefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ * ZGSISX gives the approximate solutions of linear equations A*X=B or A'*X=B,
+ * using the ILU factorization from zgsitrf(). An estimation of
+ * the condition number is provided. It performs the following steps:
+ *
+ *   1. If A is stored column-wise (A->Stype = SLU_NC):
+ *  
+ *	1.1. If options->Equil = YES or options->RowPerm = LargeDiag, scaling
+ *	     factors are computed to equilibrate the system:
+ *	     options->Trans = NOTRANS:
+ *		 diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
+ *	     options->Trans = TRANS:
+ *		 (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
+ *	     options->Trans = CONJ:
+ *		 (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
+ *	     Whether or not the system will be equilibrated depends on the
+ *	     scaling of the matrix A, but if equilibration is used, A is
+ *	     overwritten by diag(R)*A*diag(C) and B by diag(R)*B
+ *	     (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans
+ *	     = TRANS or CONJ).
+ *
+ *	1.2. Permute columns of A, forming A*Pc, where Pc is a permutation
+ *	     matrix that usually preserves sparsity.
+ *	     For more details of this step, see sp_preorder.c.
+ *
+ *	1.3. If options->Fact != FACTORED, the LU decomposition is used to
+ *	     factor the matrix A (after equilibration if options->Equil = YES)
+ *	     as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
+ *
+ *	1.4. Compute the reciprocal pivot growth factor.
+ *
+ *	1.5. If some U(i,i) = 0, so that U is exactly singular, then the
+ *	     routine fills a small number on the diagonal entry, that is
+ *		U(i,i) = ||A(:,i)||_oo * options->ILU_FillTol ** (1 - i / n),
+ *	     and info will be increased by 1. The factored form of A is used
+ *	     to estimate the condition number of the preconditioner. If the
+ *	     reciprocal of the condition number is less than machine precision,
+ *	     info = A->ncol+1 is returned as a warning, but the routine still
+ *	     goes on to solve for X.
+ *
+ *	1.6. The system of equations is solved for X using the factored form
+ *	     of A.
+ *
+ *	1.7. options->IterRefine is not used
+ *
+ *	1.8. If equilibration was used, the matrix X is premultiplied by
+ *	     diag(C) (if options->Trans = NOTRANS) or diag(R)
+ *	     (if options->Trans = TRANS or CONJ) so that it solves the
+ *	     original system before equilibration.
+ *
+ *	1.9. options for ILU only
+ *	     1) If options->RowPerm = LargeDiag, MC64 is used to scale and
+ *		permute the matrix to an I-matrix, that is Pr*Dr*A*Dc has
+ *		entries of modulus 1 on the diagonal and off-diagonal entries
+ *		of modulus at most 1. If MC64 fails, dgsequ() is used to
+ *		equilibrate the system.
+ *	     2) options->ILU_DropTol = tau is the threshold for dropping.
+ *		For L, it is used directly (for the whole row in a supernode);
+ *		For U, ||A(:,i)||_oo * tau is used as the threshold
+ *	        for the	i-th column.
+ *		If a secondary dropping rule is required, tau will
+ *	        also be used to compute the second threshold.
+ *	     3) options->ILU_FillFactor = gamma, used as the initial guess
+ *		of memory growth.
+ *		If a secondary dropping rule is required, it will also
+ *              be used as an upper bound of the memory.
+ *	     4) options->ILU_DropRule specifies the dropping rule.
+ *		Option		Explanation
+ *		======		===========
+ *		DROP_BASIC:	Basic dropping rule, supernodal based ILU.
+ *		DROP_PROWS:	Supernodal based ILUTP, p = gamma * nnz(A) / n.
+ *		DROP_COLUMN:	Variation of ILUTP, for j-th column,
+ *				p = gamma * nnz(A(:,j)).
+ *		DROP_AREA;	Variation of ILUTP, for j-th column, use
+ *				nnz(F(:,1:j)) / nnz(A(:,1:j)) to control the
+ *				memory.
+ *		DROP_DYNAMIC:	Modify the threshold tau during the
+ *				factorizaion.
+ *				If nnz(L(:,1:j)) / nnz(A(:,1:j)) < gamma
+ *				    tau_L(j) := MIN(1, tau_L(j-1) * 2);
+ *				Otherwise
+ *				    tau_L(j) := MIN(1, tau_L(j-1) * 2);
+ *				tau_U(j) uses the similar rule.
+ *				NOTE: the thresholds used by L and U are
+ *				indenpendent.
+ *		DROP_INTERP:	Compute the second dropping threshold by
+ *				interpolation instead of sorting (default).
+ *				In this case, the actual fill ratio is not
+ *				guaranteed smaller than gamma.
+ *		DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive.
+ *		( The default option is DROP_BASIC | DROP_AREA. )
+ *	     5) options->ILU_Norm is the criterion of computing the average
+ *		value of a row in L.
+ *		options->ILU_Norm	average(x[1:n])
+ *		=================	===============
+ *		ONE_NORM		||x||_1 / n
+ *		TWO_NORM		||x||_2 / sqrt(n)
+ *		INF_NORM		max{|x[i]|}
+ *	     6) options->ILU_MILU specifies the type of MILU's variation.
+ *		= SILU (default): do not perform MILU;
+ *		= SMILU_1 (not recommended):
+ *		    U(i,i) := U(i,i) + sum(dropped entries);
+ *		= SMILU_2:
+ *		    U(i,i) := U(i,i) + SGN(U(i,i)) * sum(dropped entries);
+ *		= SMILU_3:
+ *		    U(i,i) := U(i,i) + SGN(U(i,i)) * sum(|dropped entries|);
+ *		NOTE: Even SMILU_1 does not preserve the column sum because of
+ *		late dropping.
+ *	     7) options->ILU_FillTol is used as the perturbation when
+ *		encountering zero pivots. If some U(i,i) = 0, so that U is
+ *		exactly singular, then
+ *		   U(i,i) := ||A(:,i)|| * options->ILU_FillTol ** (1 - i / n).
+ *
+ *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
+ *	to the transpose of A:
+ *
+ *	2.1. If options->Equil = YES or options->RowPerm = LargeDiag, scaling
+ *	     factors are computed to equilibrate the system:
+ *	     options->Trans = NOTRANS:
+ *		 diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
+ *	     options->Trans = TRANS:
+ *		 (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
+ *	     options->Trans = CONJ:
+ *		 (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
+ *	     Whether or not the system will be equilibrated depends on the
+ *	     scaling of the matrix A, but if equilibration is used, A' is
+ *	     overwritten by diag(R)*A'*diag(C) and B by diag(R)*B
+ *	     (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
+ *
+ *	2.2. Permute columns of transpose(A) (rows of A),
+ *	     forming transpose(A)*Pc, where Pc is a permutation matrix that
+ *	     usually preserves sparsity.
+ *	     For more details of this step, see sp_preorder.c.
+ *
+ *	2.3. If options->Fact != FACTORED, the LU decomposition is used to
+ *	     factor the transpose(A) (after equilibration if
+ *	     options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the
+ *	     permutation Pr determined by partial pivoting.
+ *
+ *	2.4. Compute the reciprocal pivot growth factor.
+ *
+ *	2.5. If some U(i,i) = 0, so that U is exactly singular, then the
+ *	     routine fills a small number on the diagonal entry, that is
+ *		 U(i,i) = ||A(:,i)||_oo * options->ILU_FillTol ** (1 - i / n).
+ *	     And info will be increased by 1. The factored form of A is used
+ *	     to estimate the condition number of the preconditioner. If the
+ *	     reciprocal of the condition number is less than machine precision,
+ *	     info = A->ncol+1 is returned as a warning, but the routine still
+ *	     goes on to solve for X.
+ *
+ *	2.6. The system of equations is solved for X using the factored form
+ *	     of transpose(A).
+ *
+ *	2.7. If options->IterRefine is not used.
+ *
+ *	2.8. If equilibration was used, the matrix X is premultiplied by
+ *	     diag(C) (if options->Trans = NOTRANS) or diag(R)
+ *	     (if options->Trans = TRANS or CONJ) so that it solves the
+ *	     original system before equilibration.
+ *
+ *   See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ * Arguments
+ * =========
+ *
+ * options (input) superlu_options_t*
+ *	   The structure defines the input parameters to control
+ *	   how the LU decomposition will be performed and how the
+ *	   system will be solved.
+ *
+ * A	   (input/output) SuperMatrix*
+ *	   Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
+ *	   of the linear equations is A->nrow. Currently, the type of A can be:
+ *	   Stype = SLU_NC or SLU_NR, Dtype = SLU_Z, Mtype = SLU_GE.
+ *	   In the future, more general A may be handled.
+ *
+ *	   On entry, If options->Fact = FACTORED and equed is not 'N',
+ *	   then A must have been equilibrated by the scaling factors in
+ *	   R and/or C.
+ *	   On exit, A is not modified if options->Equil = NO, or if
+ *	   options->Equil = YES but equed = 'N' on exit.
+ *	   Otherwise, if options->Equil = YES and equed is not 'N',
+ *	   A is scaled as follows:
+ *	   If A->Stype = SLU_NC:
+ *	     equed = 'R':  A := diag(R) * A
+ *	     equed = 'C':  A := A * diag(C)
+ *	     equed = 'B':  A := diag(R) * A * diag(C).
+ *	   If A->Stype = SLU_NR:
+ *	     equed = 'R':  transpose(A) := diag(R) * transpose(A)
+ *	     equed = 'C':  transpose(A) := transpose(A) * diag(C)
+ *	     equed = 'B':  transpose(A) := diag(R) * transpose(A) * diag(C).
+ *
+ * perm_c  (input/output) int*
+ *	   If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
+ *	   which defines the permutation matrix Pc; perm_c[i] = j means
+ *	   column i of A is in position j in A*Pc.
+ *	   On exit, perm_c may be overwritten by the product of the input
+ *	   perm_c and a permutation that postorders the elimination tree
+ *	   of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
+ *	   is already in postorder.
+ *
+ *	   If A->Stype = SLU_NR, column permutation vector of size A->nrow,
+ *	   which describes permutation of columns of transpose(A) 
+ *	   (rows of A) as described above.
+ *
+ * perm_r  (input/output) int*
+ *	   If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
+ *	   which defines the permutation matrix Pr, and is determined
+ *	   by partial pivoting.  perm_r[i] = j means row i of A is in 
+ *	   position j in Pr*A.
+ *
+ *	   If A->Stype = SLU_NR, permutation vector of size A->ncol, which
+ *	   determines permutation of rows of transpose(A)
+ *	   (columns of A) as described above.
+ *
+ *	   If options->Fact = SamePattern_SameRowPerm, the pivoting routine
+ *	   will try to use the input perm_r, unless a certain threshold
+ *	   criterion is violated. In that case, perm_r is overwritten by a
+ *	   new permutation determined by partial pivoting or diagonal
+ *	   threshold pivoting.
+ *	   Otherwise, perm_r is output argument.
+ *
+ * etree   (input/output) int*,  dimension (A->ncol)
+ *	   Elimination tree of Pc'*A'*A*Pc.
+ *	   If options->Fact != FACTORED and options->Fact != DOFACT,
+ *	   etree is an input argument, otherwise it is an output argument.
+ *	   Note: etree is a vector of parent pointers for a forest whose
+ *	   vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
+ *
+ * equed   (input/output) char*
+ *	   Specifies the form of equilibration that was done.
+ *	   = 'N': No equilibration.
+ *	   = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
+ *	   = 'C': Column equilibration, i.e., A was postmultiplied by diag(C).
+ *	   = 'B': Both row and column equilibration, i.e., A was replaced 
+ *		  by diag(R)*A*diag(C).
+ *	   If options->Fact = FACTORED, equed is an input argument,
+ *	   otherwise it is an output argument.
+ *
+ * R	   (input/output) double*, dimension (A->nrow)
+ *	   The row scale factors for A or transpose(A).
+ *	   If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
+ *	       (if A->Stype = SLU_NR) is multiplied on the left by diag(R).
+ *	   If equed = 'N' or 'C', R is not accessed.
+ *	   If options->Fact = FACTORED, R is an input argument,
+ *	       otherwise, R is output.
+ *	   If options->zFact = FACTORED and equed = 'R' or 'B', each element
+ *	       of R must be positive.
+ *
+ * C	   (input/output) double*, dimension (A->ncol)
+ *	   The column scale factors for A or transpose(A).
+ *	   If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
+ *	       (if A->Stype = SLU_NR) is multiplied on the right by diag(C).
+ *	   If equed = 'N' or 'R', C is not accessed.
+ *	   If options->Fact = FACTORED, C is an input argument,
+ *	       otherwise, C is output.
+ *	   If options->Fact = FACTORED and equed = 'C' or 'B', each element
+ *	       of C must be positive.
+ *
+ * L	   (output) SuperMatrix*
+ *	   The factor L from the factorization
+ *	       Pr*A*Pc=L*U		(if A->Stype SLU_= NC) or
+ *	       Pr*transpose(A)*Pc=L*U	(if A->Stype = SLU_NR).
+ *	   Uses compressed row subscripts storage for supernodes, i.e.,
+ *	   L has types: Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
+ *
+ * U	   (output) SuperMatrix*
+ *	   The factor U from the factorization
+ *	       Pr*A*Pc=L*U		(if A->Stype = SLU_NC) or
+ *	       Pr*transpose(A)*Pc=L*U	(if A->Stype = SLU_NR).
+ *	   Uses column-wise storage scheme, i.e., U has types:
+ *	   Stype = SLU_NC, Dtype = SLU_Z, Mtype = SLU_TRU.
+ *
+ * work    (workspace/output) void*, size (lwork) (in bytes)
+ *	   User supplied workspace, should be large enough
+ *	   to hold data structures for factors L and U.
+ *	   On exit, if fact is not 'F', L and U point to this array.
+ *
+ * lwork   (input) int
+ *	   Specifies the size of work array in bytes.
+ *	   = 0:  allocate space internally by system malloc;
+ *	   > 0:  use user-supplied work array of length lwork in bytes,
+ *		 returns error if space runs out.
+ *	   = -1: the routine guesses the amount of space needed without
+ *		 performing the factorization, and returns it in
+ *		 mem_usage->total_needed; no other side effects.
+ *
+ *	   See argument 'mem_usage' for memory usage statistics.
+ *
+ * B	   (input/output) SuperMatrix*
+ *	   B has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE.
+ *	   On entry, the right hand side matrix.
+ *	   If B->ncol = 0, only LU decomposition is performed, the triangular
+ *			   solve is skipped.
+ *	   On exit,
+ *	      if equed = 'N', B is not modified; otherwise
+ *	      if A->Stype = SLU_NC:
+ *		 if options->Trans = NOTRANS and equed = 'R' or 'B',
+ *		    B is overwritten by diag(R)*B;
+ *		 if options->Trans = TRANS or CONJ and equed = 'C' of 'B',
+ *		    B is overwritten by diag(C)*B;
+ *	      if A->Stype = SLU_NR:
+ *		 if options->Trans = NOTRANS and equed = 'C' or 'B',
+ *		    B is overwritten by diag(C)*B;
+ *		 if options->Trans = TRANS or CONJ and equed = 'R' of 'B',
+ *		    B is overwritten by diag(R)*B.
+ *
+ * X	   (output) SuperMatrix*
+ *	   X has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE.
+ *	   If info = 0 or info = A->ncol+1, X contains the solution matrix
+ *	   to the original system of equations. Note that A and B are modified
+ *	   on exit if equed is not 'N', and the solution to the equilibrated
+ *	   system is inv(diag(C))*X if options->Trans = NOTRANS and
+ *	   equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C'
+ *	   and equed = 'R' or 'B'.
+ *
+ * recip_pivot_growth (output) double*
+ *	   The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ).
+ *	   The infinity norm is used. If recip_pivot_growth is much less
+ *	   than 1, the stability of the LU factorization could be poor.
+ *
+ * rcond   (output) double*
+ *	   The estimate of the reciprocal condition number of the matrix A
+ *	   after equilibration (if done). If rcond is less than the machine
+ *	   precision (in particular, if rcond = 0), the matrix is singular
+ *	   to working precision. This condition is indicated by a return
+ *	   code of info > 0.
+ *
+ * mem_usage (output) mem_usage_t*
+ *	   Record the memory usage statistics, consisting of following fields:
+ *	   - for_lu (float)
+ *	     The amount of space used in bytes for L\U data structures.
+ *	   - total_needed (float)
+ *	     The amount of space needed in bytes to perform factorization.
+ *	   - expansions (int)
+ *	     The number of memory expansions during the LU factorization.
+ *
+ * stat   (output) SuperLUStat_t*
+ *	  Record the statistics on runtime and floating-point operation count.
+ *	  See slu_util.h for the definition of 'SuperLUStat_t'.
+ *
+ * info    (output) int*
+ *	   = 0: successful exit
+ *	   < 0: if info = -i, the i-th argument had an illegal value
+ *	   > 0: if info = i, and i is
+ *		<= A->ncol: number of zero pivots. They are replaced by small
+ *		      entries due to options->ILU_FillTol.
+ *		= A->ncol+1: U is nonsingular, but RCOND is less than machine
+ *		      precision, meaning that the matrix is singular to
+ *		      working precision. Nevertheless, the solution and
+ *		      error bounds are computed because there are a number
+ *		      of situations where the computed solution can be more
+ *		      accurate than the value of RCOND would suggest.
+ *		> A->ncol+1: number of bytes allocated when memory allocation
+ *		      failure occurred, plus A->ncol.
+ * 
+ */ + +void +zgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, + int *etree, char *equed, double *R, double *C, + SuperMatrix *L, SuperMatrix *U, void *work, int lwork, + SuperMatrix *B, SuperMatrix *X, + double *recip_pivot_growth, double *rcond, + mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info) +{ + + DNformat *Bstore, *Xstore; + doublecomplex *Bmat, *Xmat; + int ldb, ldx, nrhs; + SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/ + SuperMatrix AC; /* Matrix postmultiplied by Pc */ + int colequ, equil, nofact, notran, rowequ, permc_spec, mc64; + trans_t trant; + char norm[1]; + int i, j, info1; + double amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; + int relax, panel_size; + double diag_pivot_thresh; + double t0; /* temporary time */ + double *utime; + + int *perm = NULL; + + /* External functions */ + extern double zlangs(char *, SuperMatrix *); + + Bstore = B->Store; + Xstore = X->Store; + Bmat = Bstore->nzval; + Xmat = Xstore->nzval; + ldb = Bstore->lda; + ldx = Xstore->lda; + nrhs = B->ncol; + + *info = 0; + nofact = (options->Fact != FACTORED); + equil = (options->Equil == YES); + notran = (options->Trans == NOTRANS); + mc64 = (options->RowPerm == LargeDiag); + if ( nofact ) { + *(unsigned char *)equed = 'N'; + rowequ = FALSE; + colequ = FALSE; + } else { + rowequ = lsame_(equed, "R") || lsame_(equed, "B"); + colequ = lsame_(equed, "C") || lsame_(equed, "B"); + smlnum = dlamch_("Safe minimum"); + bignum = 1. / smlnum; + } + + /* Test the input parameters */ + if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern && + options->Fact != SamePattern_SameRowPerm && + !notran && options->Trans != TRANS && options->Trans != CONJ && + !equil && options->Equil != NO) + *info = -1; + else if ( A->nrow != A->ncol || A->nrow < 0 || + (A->Stype != SLU_NC && A->Stype != SLU_NR) || + A->Dtype != SLU_Z || A->Mtype != SLU_GE ) + *info = -2; + else if (options->Fact == FACTORED && + !(rowequ || colequ || lsame_(equed, "N"))) + *info = -6; + else { + if (rowequ) { + rcmin = bignum; + rcmax = 0.; + for (j = 0; j < A->nrow; ++j) { + rcmin = SUPERLU_MIN(rcmin, R[j]); + rcmax = SUPERLU_MAX(rcmax, R[j]); + } + if (rcmin <= 0.) *info = -7; + else if ( A->nrow > 0) + rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); + else rowcnd = 1.; + } + if (colequ && *info == 0) { + rcmin = bignum; + rcmax = 0.; + for (j = 0; j < A->nrow; ++j) { + rcmin = SUPERLU_MIN(rcmin, C[j]); + rcmax = SUPERLU_MAX(rcmax, C[j]); + } + if (rcmin <= 0.) *info = -8; + else if (A->nrow > 0) + colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); + else colcnd = 1.; + } + if (*info == 0) { + if ( lwork < -1 ) *info = -12; + else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || + B->Stype != SLU_DN || B->Dtype != SLU_Z || + B->Mtype != SLU_GE ) + *info = -13; + else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) || + (B->ncol != 0 && B->ncol != X->ncol) || + X->Stype != SLU_DN || + X->Dtype != SLU_Z || X->Mtype != SLU_GE ) + *info = -14; + } + } + if (*info != 0) { + i = -(*info); + xerbla_("zgsisx", &i); + return; + } + + /* Initialization for factor parameters */ + panel_size = sp_ienv(1); + relax = sp_ienv(2); + diag_pivot_thresh = options->DiagPivotThresh; + + utime = stat->utime; + + /* Convert A to SLU_NC format when necessary. */ + if ( A->Stype == SLU_NR ) { + NRformat *Astore = A->Store; + AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); + zCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, + Astore->nzval, Astore->colind, Astore->rowptr, + SLU_NC, A->Dtype, A->Mtype); + if ( notran ) { /* Reverse the transpose argument. */ + trant = TRANS; + notran = 0; + } else { + trant = NOTRANS; + notran = 1; + } + } else { /* A->Stype == SLU_NC */ + trant = options->Trans; + AA = A; + } + + if ( nofact ) { + register int i, j; + NCformat *Astore = AA->Store; + int nnz = Astore->nnz; + int *colptr = Astore->colptr; + int *rowind = Astore->rowind; + doublecomplex *nzval = (doublecomplex *)Astore->nzval; + int n = AA->nrow; + + if ( mc64 ) { + *equed = 'B'; + rowequ = colequ = 1; + t0 = SuperLU_timer_(); + if ((perm = intMalloc(n)) == NULL) + ABORT("SUPERLU_MALLOC fails for perm[]"); + + info1 = zldperm(5, n, nnz, colptr, rowind, nzval, perm, R, C); + + if (info1 > 0) { /* MC64 fails, call zgsequ() later */ + mc64 = 0; + SUPERLU_FREE(perm); + perm = NULL; + } else { + for (i = 0; i < n; i++) { + R[i] = exp(R[i]); + C[i] = exp(C[i]); + } + /* permute and scale the matrix */ + for (j = 0; j < n; j++) { + for (i = colptr[j]; i < colptr[j + 1]; i++) { + zd_mult(&nzval[i], &nzval[i], R[rowind[i]] * C[j]); + rowind[i] = perm[rowind[i]]; + } + } + } + utime[EQUIL] = SuperLU_timer_() - t0; + } + if ( !mc64 & equil ) { + t0 = SuperLU_timer_(); + /* Compute row and column scalings to equilibrate the matrix A. */ + zgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1); + + if ( info1 == 0 ) { + /* Equilibrate matrix A. */ + zlaqgs(AA, R, C, rowcnd, colcnd, amax, equed); + rowequ = lsame_(equed, "R") || lsame_(equed, "B"); + colequ = lsame_(equed, "C") || lsame_(equed, "B"); + } + utime[EQUIL] = SuperLU_timer_() - t0; + } + } + + if ( nrhs > 0 ) { + /* Scale the right hand side if equilibration was performed. */ + if ( notran ) { + if ( rowequ ) { + for (j = 0; j < nrhs; ++j) + for (i = 0; i < A->nrow; ++i) { + zd_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], R[i]); + } + } + } else if ( colequ ) { + for (j = 0; j < nrhs; ++j) + for (i = 0; i < A->nrow; ++i) { + zd_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], C[i]); + } + } + } + + if ( nofact ) { + + t0 = SuperLU_timer_(); + /* + * Gnet column permutation vector perm_c[], according to permc_spec: + * permc_spec = NATURAL: natural ordering + * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A + * permc_spec = MMD_ATA: minimum degree on structure of A'*A + * permc_spec = COLAMD: approximate minimum degree column ordering + * permc_spec = MY_PERMC: the ordering already supplied in perm_c[] + */ + permc_spec = options->ColPerm; + if ( permc_spec != MY_PERMC && options->Fact == DOFACT ) + get_perm_c(permc_spec, AA, perm_c); + utime[COLPERM] = SuperLU_timer_() - t0; + + t0 = SuperLU_timer_(); + sp_preorder(options, AA, perm_c, etree, &AC); + utime[ETREE] = SuperLU_timer_() - t0; + + /* Compute the LU factorization of A*Pc. */ + t0 = SuperLU_timer_(); + zgsitrf(options, &AC, relax, panel_size, etree, work, lwork, + perm_c, perm_r, L, U, stat, info); + utime[FACT] = SuperLU_timer_() - t0; + + if ( lwork == -1 ) { + mem_usage->total_needed = *info - A->ncol; + return; + } + } + + if ( options->PivotGrowth ) { + if ( *info > 0 ) return; + + /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */ + *recip_pivot_growth = zPivotGrowth(A->ncol, AA, perm_c, L, U); + } + + if ( options->ConditionNumber ) { + /* Estimate the reciprocal of the condition number of A. */ + t0 = SuperLU_timer_(); + if ( notran ) { + *(unsigned char *)norm = '1'; + } else { + *(unsigned char *)norm = 'I'; + } + anorm = zlangs(norm, AA); + zgscon(norm, L, U, anorm, rcond, stat, &info1); + utime[RCOND] = SuperLU_timer_() - t0; + } + + if ( nrhs > 0 ) { + /* Compute the solution matrix X. */ + for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */ + for (i = 0; i < B->nrow; i++) + Xmat[i + j*ldx] = Bmat[i + j*ldb]; + + t0 = SuperLU_timer_(); + zgstrs (trant, L, U, perm_c, perm_r, X, stat, &info1); + utime[SOLVE] = SuperLU_timer_() - t0; + + /* Transform the solution matrix X to a solution of the original + system. */ + if ( notran ) { + if ( colequ ) { + for (j = 0; j < nrhs; ++j) + for (i = 0; i < A->nrow; ++i) { + zd_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], C[i]); + } + } + } else { + if ( rowequ ) { + if (perm) { + doublecomplex *tmp; + int n = A->nrow; + + if ((tmp = doublecomplexMalloc(n)) == NULL) + ABORT("SUPERLU_MALLOC fails for tmp[]"); + for (j = 0; j < nrhs; j++) { + for (i = 0; i < n; i++) + tmp[i] = Xmat[i + j * ldx]; /*dcopy*/ + for (i = 0; i < n; i++) + zd_mult(&Xmat[i+j*ldx], &tmp[perm[i]], R[i]); + } + SUPERLU_FREE(tmp); + } else { + for (j = 0; j < nrhs; ++j) + for (i = 0; i < A->nrow; ++i) { + zd_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], R[i]); + } + } + } + } + } /* end if nrhs > 0 */ + + if ( options->ConditionNumber ) { + /* Set INFO = A->ncol+1 if the matrix is singular to working precision. */ + if ( *rcond < dlamch_("E") && *info == 0) *info = A->ncol + 1; + } + + if (perm) SUPERLU_FREE(perm); + + if ( nofact ) { + ilu_zQuerySpace(L, U, mem_usage); + Destroy_CompCol_Permuted(&AC); + } + if ( A->Stype == SLU_NR ) { + Destroy_SuperMatrix_Store(AA); + SUPERLU_FREE(AA); + } + +} Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsitrf.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsitrf.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsitrf.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,628 @@ + +/*! @file zgsitf.c + * \brief Computes an ILU factorization of a general sparse matrix + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
+ */ + +#include "slu_zdefs.h" + +#ifdef DEBUG +int num_drop_L; +#endif + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ * ZGSITRF computes an ILU factorization of a general sparse m-by-n
+ * matrix A using partial pivoting with row interchanges.
+ * The factorization has the form
+ *     Pr * A = L * U
+ * where Pr is a row permutation matrix, L is lower triangular with unit
+ * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
+ * triangular (upper trapezoidal if A->nrow < A->ncol).
+ *
+ * See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ * Arguments
+ * =========
+ *
+ * options (input) superlu_options_t*
+ *	   The structure defines the input parameters to control
+ *	   how the ILU decomposition will be performed.
+ *
+ * A	    (input) SuperMatrix*
+ *	    Original matrix A, permuted by columns, of dimension
+ *	    (A->nrow, A->ncol). The type of A can be:
+ *	    Stype = SLU_NCP; Dtype = SLU_Z; Mtype = SLU_GE.
+ *
+ * relax    (input) int
+ *	    To control degree of relaxing supernodes. If the number
+ *	    of nodes (columns) in a subtree of the elimination tree is less
+ *	    than relax, this subtree is considered as one supernode,
+ *	    regardless of the row structures of those columns.
+ *
+ * panel_size (input) int
+ *	    A panel consists of at most panel_size consecutive columns.
+ *
+ * etree    (input) int*, dimension (A->ncol)
+ *	    Elimination tree of A'*A.
+ *	    Note: etree is a vector of parent pointers for a forest whose
+ *	    vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
+ *	    On input, the columns of A should be permuted so that the
+ *	    etree is in a certain postorder.
+ *
+ * work     (input/output) void*, size (lwork) (in bytes)
+ *	    User-supplied work space and space for the output data structures.
+ *	    Not referenced if lwork = 0;
+ *
+ * lwork   (input) int
+ *	   Specifies the size of work array in bytes.
+ *	   = 0:  allocate space internally by system malloc;
+ *	   > 0:  use user-supplied work array of length lwork in bytes,
+ *		 returns error if space runs out.
+ *	   = -1: the routine guesses the amount of space needed without
+ *		 performing the factorization, and returns it in
+ *		 *info; no other side effects.
+ *
+ * perm_c   (input) int*, dimension (A->ncol)
+ *	    Column permutation vector, which defines the
+ *	    permutation matrix Pc; perm_c[i] = j means column i of A is
+ *	    in position j in A*Pc.
+ *	    When searching for diagonal, perm_c[*] is applied to the
+ *	    row subscripts of A, so that diagonal threshold pivoting
+ *	    can find the diagonal of A, rather than that of A*Pc.
+ *
+ * perm_r   (input/output) int*, dimension (A->nrow)
+ *	    Row permutation vector which defines the permutation matrix Pr,
+ *	    perm_r[i] = j means row i of A is in position j in Pr*A.
+ *	    If options->Fact = SamePattern_SameRowPerm, the pivoting routine
+ *	       will try to use the input perm_r, unless a certain threshold
+ *	       criterion is violated. In that case, perm_r is overwritten by
+ *	       a new permutation determined by partial pivoting or diagonal
+ *	       threshold pivoting.
+ *	    Otherwise, perm_r is output argument;
+ *
+ * L	    (output) SuperMatrix*
+ *	    The factor L from the factorization Pr*A=L*U; use compressed row
+ *	    subscripts storage for supernodes, i.e., L has type:
+ *	    Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
+ *
+ * U	    (output) SuperMatrix*
+ *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
+ *	    storage scheme, i.e., U has types: Stype = SLU_NC,
+ *	    Dtype = SLU_Z, Mtype = SLU_TRU.
+ *
+ * stat     (output) SuperLUStat_t*
+ *	    Record the statistics on runtime and floating-point operation count.
+ *	    See slu_util.h for the definition of 'SuperLUStat_t'.
+ *
+ * info     (output) int*
+ *	    = 0: successful exit
+ *	    < 0: if info = -i, the i-th argument had an illegal value
+ *	    > 0: if info = i, and i is
+ *	       <= A->ncol: number of zero pivots. They are replaced by small
+ *		  entries according to options->ILU_FillTol.
+ *	       > A->ncol: number of bytes allocated when memory allocation
+ *		  failure occurred, plus A->ncol. If lwork = -1, it is
+ *		  the estimated amount of space needed, plus A->ncol.
+ *
+ * ======================================================================
+ *
+ * Local Working Arrays:
+ * ======================
+ *   m = number of rows in the matrix
+ *   n = number of columns in the matrix
+ *
+ *   marker[0:3*m-1]: marker[i] = j means that node i has been
+ *	reached when working on column j.
+ *	Storage: relative to original row subscripts
+ *	NOTE: There are 4 of them:
+ *	      marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c;
+ *	      marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c;
+ *	      marker_relax(has its own space) is used for relaxed supernodes.
+ *
+ *   parent[0:m-1]: parent vector used during dfs
+ *	Storage: relative to new row subscripts
+ *
+ *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
+ *	unexplored neighbor of i in lsub[*]
+ *
+ *   segrep[0:nseg-1]: contains the list of supernodal representatives
+ *	in topological order of the dfs. A supernode representative is the
+ *	last column of a supernode.
+ *	The maximum size of segrep[] is n.
+ *
+ *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
+ *	supernodal representative r, repfnz[r] is the location of the first
+ *	nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
+ *	indicates the supernode r has been explored.
+ *	NOTE: There are W of them, each used for one column of a panel.
+ *
+ *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
+ *	the panel diagonal. These are filled in during dpanel_dfs(), and are
+ *	used later in the inner LU factorization within the panel.
+ *	panel_lsub[]/dense[] pair forms the SPA data structure.
+ *	NOTE: There are W of them.
+ *
+ *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
+ *		   NOTE: there are W of them.
+ *
+ *   tempv[0:*]: real temporary used for dense numeric kernels;
+ *	The size of this array is defined by NUM_TEMPV() in slu_util.h.
+ *	It is also used by the dropping routine ilu_ddrop_row().
+ * 
+ */ + +void +zgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size, + int *etree, void *work, int lwork, int *perm_c, int *perm_r, + SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info) +{ + /* Local working arrays */ + NCPformat *Astore; + int *iperm_r = NULL; /* inverse of perm_r; used when + options->Fact == SamePattern_SameRowPerm */ + int *iperm_c; /* inverse of perm_c */ + int *swap, *iswap; /* swap is used to store the row permutation + during the factorization. Initially, it is set + to iperm_c (row indeces of Pc*A*Pc'). + iswap is the inverse of swap. After the + factorization, it is equal to perm_r. */ + int *iwork; + doublecomplex *zwork; + int *segrep, *repfnz, *parent, *xplore; + int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */ + int *marker, *marker_relax; + doublecomplex *dense, *tempv; + double *dtempv; + int *relax_end, *relax_fsupc; + doublecomplex *a; + int *asub; + int *xa_begin, *xa_end; + int *xsup, *supno; + int *xlsub, *xlusup, *xusub; + int nzlumax; + double *amax; + doublecomplex drop_sum; + static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ + int *iwork2; /* used by the second dropping rule */ + + /* Local scalars */ + fact_t fact = options->Fact; + double diag_pivot_thresh = options->DiagPivotThresh; + double drop_tol = options->ILU_DropTol; /* tau */ + double fill_ini = options->ILU_FillTol; /* tau^hat */ + double gamma = options->ILU_FillFactor; + int drop_rule = options->ILU_DropRule; + milu_t milu = options->ILU_MILU; + double fill_tol; + int pivrow; /* pivotal row number in the original matrix A */ + int nseg1; /* no of segments in U-column above panel row jcol */ + int nseg; /* no of segments in each U-column */ + register int jcol; + register int kcol; /* end column of a relaxed snode */ + register int icol; + register int i, k, jj, new_next, iinfo; + int m, n, min_mn, jsupno, fsupc, nextlu, nextu; + int w_def; /* upper bound on panel width */ + int usepr, iperm_r_allocated = 0; + int nnzL, nnzU; + int *panel_histo = stat->panel_histo; + flops_t *ops = stat->ops; + + int last_drop;/* the last column which the dropping rules applied */ + int quota; + int nnzAj; /* number of nonzeros in A(:,1:j) */ + int nnzLj, nnzUj; + double tol_L = drop_tol, tol_U = drop_tol; + doublecomplex zero = {0.0, 0.0}; + + /* Executable */ + iinfo = 0; + m = A->nrow; + n = A->ncol; + min_mn = SUPERLU_MIN(m, n); + Astore = A->Store; + a = Astore->nzval; + asub = Astore->rowind; + xa_begin = Astore->colbeg; + xa_end = Astore->colend; + + /* Allocate storage common to the factor routines */ + *info = zLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size, + gamma, L, U, &Glu, &iwork, &zwork); + if ( *info ) return; + + xsup = Glu.xsup; + supno = Glu.supno; + xlsub = Glu.xlsub; + xlusup = Glu.xlusup; + xusub = Glu.xusub; + + SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore, + &repfnz, &panel_lsub, &marker_relax, &marker); + zSetRWork(m, panel_size, zwork, &dense, &tempv); + + usepr = (fact == SamePattern_SameRowPerm); + if ( usepr ) { + /* Compute the inverse of perm_r */ + iperm_r = (int *) intMalloc(m); + for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k; + iperm_r_allocated = 1; + } + + iperm_c = (int *) intMalloc(n); + for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k; + swap = (int *)intMalloc(n); + for (k = 0; k < n; k++) swap[k] = iperm_c[k]; + iswap = (int *)intMalloc(n); + for (k = 0; k < n; k++) iswap[k] = perm_c[k]; + amax = (double *) doubleMalloc(panel_size); + if (drop_rule & DROP_SECONDARY) + iwork2 = (int *)intMalloc(n); + else + iwork2 = NULL; + + nnzAj = 0; + nnzLj = 0; + nnzUj = 0; + last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(3), (int)(min_mn * 0.95)); + + /* Identify relaxed snodes */ + relax_end = (int *) intMalloc(n); + relax_fsupc = (int *) intMalloc(n); + if ( options->SymmetricMode == YES ) + ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); + else + ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); + + ifill (perm_r, m, EMPTY); + ifill (marker, m * NO_MARKER, EMPTY); + supno[0] = -1; + xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0; + w_def = panel_size; + + /* Mark the rows used by relaxed supernodes */ + ifill (marker_relax, m, EMPTY); + i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end, + asub, marker_relax); +#if ( PRNTlevel >= 1) + printf("%d relaxed supernodes.\n", i); +#endif + + /* + * Work on one "panel" at a time. A panel is one of the following: + * (a) a relaxed supernode at the bottom of the etree, or + * (b) panel_size contiguous columns, defined by the user + */ + for (jcol = 0; jcol < min_mn; ) { + + if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */ + kcol = relax_end[jcol]; /* end of the relaxed snode */ + panel_histo[kcol-jcol+1]++; + + /* Drop small rows in the previous supernode. */ + if (jcol > 0 && jcol < last_drop) { + int first = xsup[supno[jcol - 1]]; + int last = jcol - 1; + int quota; + + /* Compute the quota */ + if (drop_rule & DROP_PROWS) + quota = gamma * Astore->nnz / m * (m - first) / m + * (last - first + 1); + else if (drop_rule & DROP_COLUMN) { + int i; + quota = 0; + for (i = first; i <= last; i++) + quota += xa_end[i] - xa_begin[i]; + quota = gamma * quota * (m - first) / m; + } else if (drop_rule & DROP_AREA) + quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) + - nnzLj; + else + quota = m * n; + fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn); + + /* Drop small rows */ + dtempv = (double *) tempv; + i = ilu_zdrop_row(options, first, last, tol_L, quota, &nnzLj, + &fill_tol, &Glu, dtempv, iwork2, 0); + /* Reset the parameters */ + if (drop_rule & DROP_DYNAMIC) { + if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) + < nnzLj) + tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); + else + tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); + } + if (fill_tol < 0) iinfo -= (int)fill_tol; +#ifdef DEBUG + num_drop_L += i * (last - first + 1); +#endif + } + + /* -------------------------------------- + * Factorize the relaxed supernode(jcol:kcol) + * -------------------------------------- */ + /* Determine the union of the row structure of the snode */ + if ( (*info = ilu_zsnode_dfs(jcol, kcol, asub, xa_begin, xa_end, + marker, &Glu)) != 0 ) + return; + + nextu = xusub[jcol]; + nextlu = xlusup[jcol]; + jsupno = supno[jcol]; + fsupc = xsup[jsupno]; + new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1); + nzlumax = Glu.nzlumax; + while ( new_next > nzlumax ) { + if ((*info = zLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu))) + return; + } + + for (icol = jcol; icol <= kcol; icol++) { + xusub[icol+1] = nextu; + + amax[0] = 0.0; + /* Scatter into SPA dense[*] */ + for (k = xa_begin[icol]; k < xa_end[icol]; k++) { + register double tmp = z_abs1 (&a[k]); + if (tmp > amax[0]) amax[0] = tmp; + dense[asub[k]] = a[k]; + } + nnzAj += xa_end[icol] - xa_begin[icol]; + if (amax[0] == 0.0) { + amax[0] = fill_ini; +#if ( PRNTlevel >= 1) + printf("Column %d is entirely zero!\n", icol); + fflush(stdout); +#endif + } + + /* Numeric update within the snode */ + zsnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat); + + if (usepr) pivrow = iperm_r[icol]; + fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn); + if ( (*info = ilu_zpivotL(icol, diag_pivot_thresh, &usepr, + perm_r, iperm_c[icol], swap, iswap, + marker_relax, &pivrow, + amax[0] * fill_tol, milu, zero, + &Glu, stat)) ) { + iinfo++; + marker[pivrow] = kcol; + } + + } + + jcol = kcol + 1; + + } else { /* Work on one panel of panel_size columns */ + + /* Adjust panel_size so that a panel won't overlap with the next + * relaxed snode. + */ + panel_size = w_def; + for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) + if ( relax_end[k] != EMPTY ) { + panel_size = k - jcol; + break; + } + if ( k == min_mn ) panel_size = min_mn - jcol; + panel_histo[panel_size]++; + + /* symbolic factor on a panel of columns */ + ilu_zpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1, + dense, amax, panel_lsub, segrep, repfnz, + marker, parent, xplore, &Glu); + + /* numeric sup-panel updates in topological order */ + zpanel_bmod(m, panel_size, jcol, nseg1, dense, + tempv, segrep, repfnz, &Glu, stat); + + /* Sparse LU within the panel, and below panel diagonal */ + for (jj = jcol; jj < jcol + panel_size; jj++) { + + k = (jj - jcol) * m; /* column index for w-wide arrays */ + + nseg = nseg1; /* Begin after all the panel segments */ + + nnzAj += xa_end[jj] - xa_begin[jj]; + + if ((*info = ilu_zcolumn_dfs(m, jj, perm_r, &nseg, + &panel_lsub[k], segrep, &repfnz[k], + marker, parent, xplore, &Glu))) + return; + + /* Numeric updates */ + if ((*info = zcolumn_bmod(jj, (nseg - nseg1), &dense[k], + tempv, &segrep[nseg1], &repfnz[k], + jcol, &Glu, stat)) != 0) return; + + /* Make a fill-in position if the column is entirely zero */ + if (xlsub[jj + 1] == xlsub[jj]) { + register int i, row; + int nextl; + int nzlmax = Glu.nzlmax; + int *lsub = Glu.lsub; + int *marker2 = marker + 2 * m; + + /* Allocate memory */ + nextl = xlsub[jj] + 1; + if (nextl >= nzlmax) { + int error = zLUMemXpand(jj, nextl, LSUB, &nzlmax, &Glu); + if (error) { *info = error; return; } + lsub = Glu.lsub; + } + xlsub[jj + 1]++; + assert(xlusup[jj]==xlusup[jj+1]); + xlusup[jj + 1]++; + Glu.lusup[xlusup[jj]] = zero; + + /* Choose a row index (pivrow) for fill-in */ + for (i = jj; i < n; i++) + if (marker_relax[swap[i]] <= jj) break; + row = swap[i]; + marker2[row] = jj; + lsub[xlsub[jj]] = row; +#ifdef DEBUG + printf("Fill col %d.\n", jj); + fflush(stdout); +#endif + } + + /* Computer the quota */ + if (drop_rule & DROP_PROWS) + quota = gamma * Astore->nnz / m * jj / m; + else if (drop_rule & DROP_COLUMN) + quota = gamma * (xa_end[jj] - xa_begin[jj]) * + (jj + 1) / m; + else if (drop_rule & DROP_AREA) + quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj; + else + quota = m; + + /* Copy the U-segments to ucol[*] and drop small entries */ + if ((*info = ilu_zcopy_to_ucol(jj, nseg, segrep, &repfnz[k], + perm_r, &dense[k], drop_rule, + milu, amax[jj - jcol] * tol_U, + quota, &drop_sum, &nnzUj, &Glu, + iwork2)) != 0) + return; + + /* Reset the dropping threshold if required */ + if (drop_rule & DROP_DYNAMIC) { + if (gamma * 0.9 * nnzAj * 0.5 < nnzLj) + tol_U = SUPERLU_MIN(1.0, tol_U * 2.0); + else + tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5); + } + + zd_mult(&drop_sum, &drop_sum, MILU_ALPHA); + if (usepr) pivrow = iperm_r[jj]; + fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn); + if ( (*info = ilu_zpivotL(jj, diag_pivot_thresh, &usepr, perm_r, + iperm_c[jj], swap, iswap, + marker_relax, &pivrow, + amax[jj - jcol] * fill_tol, milu, + drop_sum, &Glu, stat)) ) { + iinfo++; + marker[m + pivrow] = jj; + marker[2 * m + pivrow] = jj; + } + + /* Reset repfnz[] for this column */ + resetrep_col (nseg, segrep, &repfnz[k]); + + /* Start a new supernode, drop the previous one */ + if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) { + int first = xsup[supno[jj - 1]]; + int last = jj - 1; + int quota; + + /* Compute the quota */ + if (drop_rule & DROP_PROWS) + quota = gamma * Astore->nnz / m * (m - first) / m + * (last - first + 1); + else if (drop_rule & DROP_COLUMN) { + int i; + quota = 0; + for (i = first; i <= last; i++) + quota += xa_end[i] - xa_begin[i]; + quota = gamma * quota * (m - first) / m; + } else if (drop_rule & DROP_AREA) + quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) + / m) - nnzLj; + else + quota = m * n; + fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / + (double)min_mn); + + /* Drop small rows */ + dtempv = (double *) tempv; + i = ilu_zdrop_row(options, first, last, tol_L, quota, + &nnzLj, &fill_tol, &Glu, dtempv, iwork2, + 1); + + /* Reset the parameters */ + if (drop_rule & DROP_DYNAMIC) { + if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) + < nnzLj) + tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); + else + tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); + } + if (fill_tol < 0) iinfo -= (int)fill_tol; +#ifdef DEBUG + num_drop_L += i * (last - first + 1); +#endif + } /* if start a new supernode */ + + } /* for */ + + jcol += panel_size; /* Move to the next panel */ + + } /* else */ + + } /* for */ + + *info = iinfo; + + if ( m > n ) { + k = 0; + for (i = 0; i < m; ++i) + if ( perm_r[i] == EMPTY ) { + perm_r[i] = n + k; + ++k; + } + } + + ilu_countnz(min_mn, &nnzL, &nnzU, &Glu); + fixupL(min_mn, perm_r, &Glu); + + zLUWorkFree(iwork, zwork, &Glu); /* Free work space and compress storage */ + + if ( fact == SamePattern_SameRowPerm ) { + /* L and U structures may have changed due to possibly different + pivoting, even though the storage is available. + There could also be memory expansions, so the array locations + may have changed, */ + ((SCformat *)L->Store)->nnz = nnzL; + ((SCformat *)L->Store)->nsuper = Glu.supno[n]; + ((SCformat *)L->Store)->nzval = Glu.lusup; + ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup; + ((SCformat *)L->Store)->rowind = Glu.lsub; + ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub; + ((NCformat *)U->Store)->nnz = nnzU; + ((NCformat *)U->Store)->nzval = Glu.ucol; + ((NCformat *)U->Store)->rowind = Glu.usub; + ((NCformat *)U->Store)->colptr = Glu.xusub; + } else { + zCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, + Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno, + Glu.xsup, SLU_SC, SLU_Z, SLU_TRLU); + zCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, + Glu.usub, Glu.xusub, SLU_NC, SLU_Z, SLU_TRU); + } + + ops[FACT] += ops[TRSV] + ops[GEMV]; + + if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); + SUPERLU_FREE (iperm_c); + SUPERLU_FREE (relax_end); + SUPERLU_FREE (swap); + SUPERLU_FREE (iswap); + SUPERLU_FREE (relax_fsupc); + SUPERLU_FREE (amax); + if ( iwork2 ) SUPERLU_FREE (iwork2); + +} Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsrfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsrfs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsrfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,25 +1,26 @@ -/* +/*! @file zgsrfs.c + * \brief Improves computed solution to a system of inear equations + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Modified from lapack routine ZGERFS
+ * 
*/ /* * File name: zgsrfs.c * History: Modified from lapack routine ZGERFS */ #include -#include "zsp_defs.h" +#include "slu_zdefs.h" -void -zgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U, - int *perm_c, int *perm_r, char *equed, double *R, double *C, - SuperMatrix *B, SuperMatrix *X, double *ferr, double *berr, - SuperLUStat_t *stat, int *info) -{ -/* +/*! \brief + * + *
  *   Purpose   
  *   =======   
  *
@@ -123,8 +124,16 @@
  *
  *    ITMAX is the maximum number of steps of iterative refinement.   
  *
- */  
+ * 
+ */ +void +zgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U, + int *perm_c, int *perm_r, char *equed, double *R, double *C, + SuperMatrix *B, SuperMatrix *X, double *ferr, double *berr, + SuperLUStat_t *stat, int *info) +{ + #define ITMAX 5 /* Table of constant values */ @@ -224,6 +233,8 @@ nz = A->ncol + 1; eps = dlamch_("Epsilon"); safmin = dlamch_("Safe minimum"); + /* Set SAFE1 essentially to be the underflow threshold times the + number of additions in each row. */ safe1 = nz * safmin; safe2 = safe1 / eps; @@ -274,7 +285,7 @@ where abs(Z) is the componentwise absolute value of the matrix or vector Z. If the i-th component of the denominator is less than SAFE2, then SAFE1 is added to the i-th component of the - numerator and denominator before dividing. */ + numerator before dividing. */ for (i = 0; i < A->nrow; ++i) rwork[i] = z_abs1( &Bptr[i] ); @@ -297,11 +308,13 @@ } s = 0.; for (i = 0; i < A->nrow; ++i) { - if (rwork[i] > safe2) + if (rwork[i] > safe2) { s = SUPERLU_MAX( s, z_abs1(&work[i]) / rwork[i] ); - else - s = SUPERLU_MAX( s, (z_abs1(&work[i]) + safe1) / - (rwork[i] + safe1) ); + } else if ( rwork[i] != 0.0 ) { + s = SUPERLU_MAX( s, (z_abs1(&work[i]) + safe1) / rwork[i] ); + } + /* If rwork[i] is exactly 0.0, then we know the true + residual also must be exactly 0.0. */ } berr[j] = s; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgssv.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgssv.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgssv.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,20 +1,19 @@ - -/* +/*! @file zgssv.c + * \brief Solves the system of linear equations A*X=B + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
- *
+ * 
*/ -#include "zsp_defs.h" +#include "slu_zdefs.h" -void -zgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, - SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, - SuperLUStat_t *stat, int *info ) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -127,15 +126,21 @@
  *                so the solution could not be computed.
  *             > A->ncol: number of bytes allocated when memory allocation
  *                failure occurred, plus A->ncol.
- *   
+ * 
*/ + +void +zgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, + SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, + SuperLUStat_t *stat, int *info ) +{ + DNformat *Bstore; SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int lwork = 0, *etree, i; /* Set default values for some parameters */ - double drop_tol = 0.; int panel_size; /* panel size */ int relax; /* no of columns in a relaxed snodes */ int permc_spec; @@ -201,8 +206,8 @@ relax, panel_size, sp_ienv(3), sp_ienv(4));*/ t = SuperLU_timer_(); /* Compute the LU factorization of A. */ - zgstrf(options, &AC, drop_tol, relax, panel_size, - etree, NULL, lwork, perm_c, perm_r, L, U, stat, info); + zgstrf(options, &AC, relax, panel_size, etree, + NULL, lwork, perm_c, perm_r, L, U, stat, info); utime[FACT] = SuperLU_timer_() - t; t = SuperLU_timer_(); Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgssvx.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgssvx.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgssvx.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,22 +1,19 @@ -/* +/*! @file zgssvx.c + * \brief Solves the system of linear equations A*X=B or A'*X=B + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
- *
+ * 
*/ -#include "zsp_defs.h" +#include "slu_zdefs.h" -void -zgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, - int *etree, char *equed, double *R, double *C, - SuperMatrix *L, SuperMatrix *U, void *work, int lwork, - SuperMatrix *B, SuperMatrix *X, double *recip_pivot_growth, - double *rcond, double *ferr, double *berr, - mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info ) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -314,7 +311,7 @@
  *
  * stat   (output) SuperLUStat_t*
  *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
+ *        See slu_util.h for the definition of 'SuperLUStat_t'.
  *
  * info    (output) int*
  *         = 0: successful exit   
@@ -332,9 +329,19 @@
  *                    accurate than the value of RCOND would suggest.   
  *              > A->ncol+1: number of bytes allocated when memory allocation
  *                    failure occurred, plus A->ncol.
- *
+ * 
*/ +void +zgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, + int *etree, char *equed, double *R, double *C, + SuperMatrix *L, SuperMatrix *U, void *work, int lwork, + SuperMatrix *B, SuperMatrix *X, double *recip_pivot_growth, + double *rcond, double *ferr, double *berr, + mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info ) +{ + + DNformat *Bstore, *Xstore; doublecomplex *Bmat, *Xmat; int ldb, ldx, nrhs; @@ -346,13 +353,12 @@ int i, j, info1; double amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; int relax, panel_size; - double diag_pivot_thresh, drop_tol; + double diag_pivot_thresh; double t0; /* temporary time */ double *utime; /* External functions */ extern double zlangs(char *, SuperMatrix *); - extern double dlamch_(char *); Bstore = B->Store; Xstore = X->Store; @@ -443,7 +449,6 @@ panel_size = sp_ienv(1); relax = sp_ienv(2); diag_pivot_thresh = options->DiagPivotThresh; - drop_tol = 0.0; utime = stat->utime; @@ -455,7 +460,7 @@ Astore->nzval, Astore->colind, Astore->rowptr, SLU_NC, A->Dtype, A->Mtype); if ( notran ) { /* Reverse the transpose argument. */ - trant = CONJ; + trant = TRANS; notran = 0; } else { trant = NOTRANS; @@ -523,8 +528,8 @@ /* Compute the LU factorization of A*Pc. */ t0 = SuperLU_timer_(); - zgstrf(options, &AC, drop_tol, relax, panel_size, - etree, work, lwork, perm_c, perm_r, L, U, stat, info); + zgstrf(options, &AC, relax, panel_size, etree, + work, lwork, perm_c, perm_r, L, U, stat, info); utime[FACT] = SuperLU_timer_() - t0; if ( lwork == -1 ) { Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgstrf.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgstrf.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgstrf.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,33 +1,32 @@ -/* +/*! @file zgstrf.c + * \brief Computes an LU factorization of a general sparse matrix + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
+ * 
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
  *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "zsp_defs.h" -void -zgstrf (superlu_options_t *options, SuperMatrix *A, double drop_tol, - int relax, int panel_size, int *etree, void *work, int lwork, - int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, - SuperLUStat_t *stat, int *info) -{ -/* +#include "slu_zdefs.h" + +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -53,11 +52,6 @@
  *          (A->nrow, A->ncol). The type of A can be:
  *          Stype = SLU_NCP; Dtype = SLU_Z; Mtype = SLU_GE.
  *
- * drop_tol (input) double (NOT IMPLEMENTED)
- *	    Drop tolerance parameter. At step j of the Gaussian elimination,
- *          if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
- *          0 <= drop_tol <= 1. The default value of drop_tol is 0.
- *
  * relax    (input) int
  *          To control degree of relaxing supernodes. If the number
  *          of nodes (columns) in a subtree of the elimination tree is less
@@ -117,7 +111,7 @@
  *
  * stat     (output) SuperLUStat_t*
  *          Record the statistics on runtime and floating-point operation count.
- *          See util.h for the definition of 'SuperLUStat_t'.
+ *          See slu_util.h for the definition of 'SuperLUStat_t'.
  *
  * info     (output) int*
  *          = 0: successful exit
@@ -177,13 +171,20 @@
  *	    	   NOTE: there are W of them.
  *
  *   tempv[0:*]: real temporary used for dense numeric kernels;
- *	The size of this array is defined by NUM_TEMPV() in zsp_defs.h.
- *
+ *	The size of this array is defined by NUM_TEMPV() in slu_zdefs.h.
+ * 
*/ + +void +zgstrf (superlu_options_t *options, SuperMatrix *A, + int relax, int panel_size, int *etree, void *work, int lwork, + int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, + SuperLUStat_t *stat, int *info) +{ /* Local working arrays */ NCPformat *Astore; - int *iperm_r; /* inverse of perm_r; - used when options->Fact == SamePattern_SameRowPerm */ + int *iperm_r = NULL; /* inverse of perm_r; used when + options->Fact == SamePattern_SameRowPerm */ int *iperm_c; /* inverse of perm_c */ int *iwork; doublecomplex *zwork; @@ -199,7 +200,8 @@ int *xsup, *supno; int *xlsub, *xlusup, *xusub; int nzlumax; - static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ + double fill_ratio = sp_ienv(6); /* estimated fill ratio */ + static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ /* Local scalars */ fact_t fact = options->Fact; @@ -230,7 +232,7 @@ /* Allocate storage common to the factor routines */ *info = zLUMemInit(fact, work, lwork, m, n, Astore->nnz, - panel_size, L, U, &Glu, &iwork, &zwork); + panel_size, fill_ratio, L, U, &Glu, &iwork, &zwork); if ( *info ) return; xsup = Glu.xsup; @@ -417,7 +419,7 @@ ((NCformat *)U->Store)->rowind = Glu.usub; ((NCformat *)U->Store)->colptr = Glu.xusub; } else { - zCreate_SuperNode_Matrix(L, A->nrow, A->ncol, nnzL, Glu.lusup, + zCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno, Glu.xsup, SLU_SC, SLU_Z, SLU_TRLU); zCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, @@ -425,6 +427,7 @@ } ops[FACT] += ops[TRSV] + ops[GEMV]; + stat->expansions = --(Glu.num_expansions); if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); SUPERLU_FREE (iperm_c); Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgstrs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgstrs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zgstrs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,25 +1,27 @@ -/* +/*! @file zgstrs.c + * \brief Solves a system using LU factorization + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "zsp_defs.h" +#include "slu_zdefs.h" /* @@ -29,13 +31,9 @@ void zlsolve(int, int, doublecomplex*, doublecomplex*); void zmatvec(int, int, int, doublecomplex*, doublecomplex*, doublecomplex*); - -void -zgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U, - int *perm_c, int *perm_r, SuperMatrix *B, - SuperLUStat_t *stat, int *info) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -85,8 +83,15 @@
  * info    (output) int*
  * 	   = 0: successful exit
  *	   < 0: if info = -i, the i-th argument had an illegal value
- *
+ * 
*/ + +void +zgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U, + int *perm_c, int *perm_r, SuperMatrix *B, + SuperLUStat_t *stat, int *info) +{ + #ifdef _CRAY _fcd ftcs1, ftcs2, ftcs3, ftcs4; #endif @@ -293,7 +298,7 @@ stat->ops[SOLVE] = solve_ops; - } else { /* Solve A'*X=B */ + } else { /* Solve A'*X=B or CONJ(A)*X=B */ /* Permute right hand sides to form Pc'*B. */ for (i = 0; i < nrhs; i++) { rhs_work = &Bmat[i*ldb]; @@ -302,30 +307,23 @@ } stat->ops[SOLVE] = 0; - if (trans == TRANS) { - - for (k = 0; k < nrhs; ++k) { - - /* Multiply by inv(U'). */ - sp_ztrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info); - - /* Multiply by inv(L'). */ - sp_ztrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info); - - } - } - else { - for (k = 0; k < nrhs; ++k) { - /* Multiply by inv(U'). */ + for (k = 0; k < nrhs; ++k) { + /* Multiply by inv(U'). */ + sp_ztrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info); + + /* Multiply by inv(L'). */ + sp_ztrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info); + } + } else { /* trans == CONJ */ + for (k = 0; k < nrhs; ++k) { + /* Multiply by conj(inv(U')). */ sp_ztrsv("U", "C", "N", L, U, &Bmat[k*ldb], stat, info); - /* Multiply by inv(L'). */ + /* Multiply by conj(inv(L')). */ sp_ztrsv("L", "C", "U", L, U, &Bmat[k*ldb], stat, info); - - } - } - + } + } /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */ for (i = 0; i < nrhs; i++) { rhs_work = &Bmat[i*ldb]; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zlacon.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zlacon.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zlacon.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,68 +1,75 @@ - -/* +/*! @file zlacon.c + * \brief Estimates the 1-norm + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
- *
+ * 
*/ #include -#include "Cnames.h" -#include "dcomplex.h" +#include "slu_Cnames.h" +#include "slu_dcomplex.h" +/*! \brief + * + *
+ *   Purpose   
+ *   =======   
+ *
+ *   ZLACON estimates the 1-norm of a square matrix A.   
+ *   Reverse communication is used for evaluating matrix-vector products. 
+ * 
+ *
+ *   Arguments   
+ *   =========   
+ *
+ *   N      (input) INT
+ *          The order of the matrix.  N >= 1.   
+ *
+ *   V      (workspace) DOUBLE COMPLEX PRECISION array, dimension (N)   
+ *          On the final return, V = A*W,  where  EST = norm(V)/norm(W)   
+ *          (W is not returned).   
+ *
+ *   X      (input/output) DOUBLE COMPLEX PRECISION array, dimension (N)   
+ *          On an intermediate return, X should be overwritten by   
+ *                A * X,   if KASE=1,   
+ *                A' * X,  if KASE=2,
+ *          where A' is the conjugate transpose of A,
+ *         and ZLACON must be re-called with all the other parameters   
+ *          unchanged.   
+ *
+ *
+ *   EST    (output) DOUBLE PRECISION   
+ *          An estimate (a lower bound) for norm(A).   
+ *
+ *   KASE   (input/output) INT
+ *          On the initial call to ZLACON, KASE should be 0.   
+ *          On an intermediate return, KASE will be 1 or 2, indicating   
+ *          whether X should be overwritten by A * X  or A' * X.   
+ *          On the final return from ZLACON, KASE will again be 0.   
+ *
+ *   Further Details   
+ *   ======= =======   
+ *
+ *   Contributed by Nick Higham, University of Manchester.   
+ *   Originally named CONEST, dated March 16, 1988.   
+ *
+ *   Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of 
+ *   a real or complex matrix, with applications to condition estimation", 
+ *   ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.   
+ *   ===================================================================== 
+ * 
+ */ + int zlacon_(int *n, doublecomplex *v, doublecomplex *x, double *est, int *kase) { -/* - Purpose - ======= - ZLACON estimates the 1-norm of a square matrix A. - Reverse communication is used for evaluating matrix-vector products. - - Arguments - ========= - - N (input) INT - The order of the matrix. N >= 1. - - V (workspace) DOUBLE COMPLEX PRECISION array, dimension (N) - On the final return, V = A*W, where EST = norm(V)/norm(W) - (W is not returned). - - X (input/output) DOUBLE COMPLEX PRECISION array, dimension (N) - On an intermediate return, X should be overwritten by - A * X, if KASE=1, - A' * X, if KASE=2, - where A' is the conjugate transpose of A, - and ZLACON must be re-called with all the other parameters - unchanged. - - - EST (output) DOUBLE PRECISION - An estimate (a lower bound) for norm(A). - - KASE (input/output) INT - On the initial call to ZLACON, KASE should be 0. - On an intermediate return, KASE will be 1 or 2, indicating - whether X should be overwritten by A * X or A' * X. - On the final return from ZLACON, KASE will again be 0. - - Further Details - ======= ======= - - Contributed by Nick Higham, University of Manchester. - Originally named CONEST, dated March 16, 1988. - - Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of - a real or complex matrix, with applications to condition estimation", - ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. - ===================================================================== -*/ - /* Table of constant values */ int c__1 = 1; doublecomplex zero = {0.0, 0.0}; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zlangs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zlangs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zlangs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,58 +1,65 @@ - -/* +/*! @file zlangs.c + * \brief Returns the value of the one norm + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Modified from lapack routine ZLANGE 
+ * 
*/ /* * File name: zlangs.c * History: Modified from lapack routine ZLANGE */ #include -#include "zsp_defs.h" -#include "util.h" +#include "slu_zdefs.h" +/*! \brief + * + *
+ * Purpose   
+ *   =======   
+ *
+ *   ZLANGS returns the value of the one norm, or the Frobenius norm, or 
+ *   the infinity norm, or the element of largest absolute value of a 
+ *   real matrix A.   
+ *
+ *   Description   
+ *   ===========   
+ *
+ *   ZLANGE returns the value   
+ *
+ *      ZLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'   
+ *               (   
+ *               ( norm1(A),         NORM = '1', 'O' or 'o'   
+ *               (   
+ *               ( normI(A),         NORM = 'I' or 'i'   
+ *               (   
+ *               ( normF(A),         NORM = 'F', 'f', 'E' or 'e'   
+ *
+ *   where  norm1  denotes the  one norm of a matrix (maximum column sum), 
+ *   normI  denotes the  infinity norm  of a matrix  (maximum row sum) and 
+ *   normF  denotes the  Frobenius norm of a matrix (square root of sum of 
+ *   squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.   
+ *
+ *   Arguments   
+ *   =========   
+ *
+ *   NORM    (input) CHARACTER*1   
+ *           Specifies the value to be returned in ZLANGE as described above.   
+ *   A       (input) SuperMatrix*
+ *           The M by N sparse matrix A. 
+ *
+ *  =====================================================================
+ * 
+ */ + double zlangs(char *norm, SuperMatrix *A) { -/* - Purpose - ======= - - ZLANGS returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - real matrix A. - - Description - =========== - - ZLANGE returns the value - - ZLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in ZLANGE as described above. - A (input) SuperMatrix* - The M by N sparse matrix A. - - ===================================================================== -*/ /* Local variables */ NCformat *Astore; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zlaqgs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zlaqgs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zlaqgs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,81 +1,89 @@ - -/* +/*! @file zlaqgs.c + * \brief Equlibrates a general sprase matrix + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
- *
+ * 
+ * Modified from LAPACK routine ZLAQGE
+ * 
*/ /* * File name: zlaqgs.c * History: Modified from LAPACK routine ZLAQGE */ #include -#include "zsp_defs.h" -#include "util.h" +#include "slu_zdefs.h" +/*! \brief + * + *
+ *   Purpose   
+ *   =======   
+ *
+ *   ZLAQGS equilibrates a general sparse M by N matrix A using the row and   
+ *   scaling factors in the vectors R and C.   
+ *
+ *   See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ *   Arguments   
+ *   =========   
+ *
+ *   A       (input/output) SuperMatrix*
+ *           On exit, the equilibrated matrix.  See EQUED for the form of 
+ *           the equilibrated matrix. The type of A can be:
+ *	    Stype = NC; Dtype = SLU_Z; Mtype = GE.
+ *	    
+ *   R       (input) double*, dimension (A->nrow)
+ *           The row scale factors for A.
+ *	    
+ *   C       (input) double*, dimension (A->ncol)
+ *           The column scale factors for A.
+ *	    
+ *   ROWCND  (input) double
+ *           Ratio of the smallest R(i) to the largest R(i).
+ *	    
+ *   COLCND  (input) double
+ *           Ratio of the smallest C(i) to the largest C(i).
+ *	    
+ *   AMAX    (input) double
+ *           Absolute value of largest matrix entry.
+ *	    
+ *   EQUED   (output) char*
+ *           Specifies the form of equilibration that was done.   
+ *           = 'N':  No equilibration   
+ *           = 'R':  Row equilibration, i.e., A has been premultiplied by  
+ *                   diag(R).   
+ *           = 'C':  Column equilibration, i.e., A has been postmultiplied  
+ *                   by diag(C).   
+ *           = 'B':  Both row and column equilibration, i.e., A has been
+ *                   replaced by diag(R) * A * diag(C).   
+ *
+ *   Internal Parameters   
+ *   ===================   
+ *
+ *   THRESH is a threshold value used to decide if row or column scaling   
+ *   should be done based on the ratio of the row or column scaling   
+ *   factors.  If ROWCND < THRESH, row scaling is done, and if   
+ *   COLCND < THRESH, column scaling is done.   
+ *
+ *   LARGE and SMALL are threshold values used to decide if row scaling   
+ *   should be done based on the absolute size of the largest matrix   
+ *   element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.   
+ *
+ *   ===================================================================== 
+ * 
+ */ + void zlaqgs(SuperMatrix *A, double *r, double *c, double rowcnd, double colcnd, double amax, char *equed) { -/* - Purpose - ======= - ZLAQGS equilibrates a general sparse M by N matrix A using the row and - scaling factors in the vectors R and C. - See supermatrix.h for the definition of 'SuperMatrix' structure. - - Arguments - ========= - - A (input/output) SuperMatrix* - On exit, the equilibrated matrix. See EQUED for the form of - the equilibrated matrix. The type of A can be: - Stype = NC; Dtype = SLU_Z; Mtype = GE. - - R (input) double*, dimension (A->nrow) - The row scale factors for A. - - C (input) double*, dimension (A->ncol) - The column scale factors for A. - - ROWCND (input) double - Ratio of the smallest R(i) to the largest R(i). - - COLCND (input) double - Ratio of the smallest C(i) to the largest C(i). - - AMAX (input) double - Absolute value of largest matrix entry. - - EQUED (output) char* - Specifies the form of equilibration that was done. - = 'N': No equilibration - = 'R': Row equilibration, i.e., A has been premultiplied by - diag(R). - = 'C': Column equilibration, i.e., A has been postmultiplied - by diag(C). - = 'B': Both row and column equilibration, i.e., A has been - replaced by diag(R) * A * diag(C). - - Internal Parameters - =================== - - THRESH is a threshold value used to decide if row or column scaling - should be done based on the ratio of the row or column scaling - factors. If ROWCND < THRESH, row scaling is done, and if - COLCND < THRESH, column scaling is done. - - LARGE and SMALL are threshold values used to decide if row scaling - should be done based on the absolute size of the largest matrix - element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. - - ===================================================================== -*/ - #define THRESH (0.1) /* Local variables */ Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zldperm.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zldperm.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zldperm.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,168 @@ + +/*! @file + * \brief Finds a row permutation so that the matrix has large entries on the diagonal + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
+ */ + +#include "slu_zdefs.h" + +extern void mc64id_(int_t*); +extern void mc64ad_(int_t*, int_t*, int_t*, int_t [], int_t [], double [], + int_t*, int_t [], int_t*, int_t[], int_t*, double [], + int_t [], int_t []); + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ *   ZLDPERM finds a row permutation so that the matrix has large
+ *   entries on the diagonal.
+ *
+ * Arguments
+ * =========
+ *
+ * job    (input) int
+ *        Control the action. Possible values for JOB are:
+ *        = 1 : Compute a row permutation of the matrix so that the
+ *              permuted matrix has as many entries on its diagonal as
+ *              possible. The values on the diagonal are of arbitrary size.
+ *              HSL subroutine MC21A/AD is used for this.
+ *        = 2 : Compute a row permutation of the matrix so that the smallest 
+ *              value on the diagonal of the permuted matrix is maximized.
+ *        = 3 : Compute a row permutation of the matrix so that the smallest
+ *              value on the diagonal of the permuted matrix is maximized.
+ *              The algorithm differs from the one used for JOB = 2 and may
+ *              have quite a different performance.
+ *        = 4 : Compute a row permutation of the matrix so that the sum
+ *              of the diagonal entries of the permuted matrix is maximized.
+ *        = 5 : Compute a row permutation of the matrix so that the product
+ *              of the diagonal entries of the permuted matrix is maximized
+ *              and vectors to scale the matrix so that the nonzero diagonal 
+ *              entries of the permuted matrix are one in absolute value and 
+ *              all the off-diagonal entries are less than or equal to one in 
+ *              absolute value.
+ *        Restriction: 1 <= JOB <= 5.
+ *
+ * n      (input) int
+ *        The order of the matrix.
+ *
+ * nnz    (input) int
+ *        The number of nonzeros in the matrix.
+ *
+ * adjncy (input) int*, of size nnz
+ *        The adjacency structure of the matrix, which contains the row
+ *        indices of the nonzeros.
+ *
+ * colptr (input) int*, of size n+1
+ *        The pointers to the beginning of each column in ADJNCY.
+ *
+ * nzval  (input) doublecomplex*, of size nnz
+ *        The nonzero values of the matrix. nzval[k] is the value of
+ *        the entry corresponding to adjncy[k].
+ *        It is not used if job = 1.
+ *
+ * perm   (output) int*, of size n
+ *        The permutation vector. perm[i] = j means row i in the
+ *        original matrix is in row j of the permuted matrix.
+ *
+ * u      (output) double*, of size n
+ *        If job = 5, the natural logarithms of the row scaling factors. 
+ *
+ * v      (output) double*, of size n
+ *        If job = 5, the natural logarithms of the column scaling factors. 
+ *        The scaled matrix B has entries b_ij = a_ij * exp(u_i + v_j).
+ * 
+ */ + +int +zldperm(int_t job, int_t n, int_t nnz, int_t colptr[], int_t adjncy[], + doublecomplex nzval[], int_t *perm, double u[], double v[]) +{ + int_t i, liw, ldw, num; + int_t *iw, icntl[10], info[10]; + double *dw; + double *nzval_d = (double *) SUPERLU_MALLOC(nnz * sizeof(double)); + +#if ( DEBUGlevel>=1 ) + CHECK_MALLOC(0, "Enter zldperm()"); +#endif + liw = 5*n; + if ( job == 3 ) liw = 10*n + nnz; + if ( !(iw = intMalloc(liw)) ) ABORT("Malloc fails for iw[]"); + ldw = 3*n + nnz; + if ( !(dw = (double*) SUPERLU_MALLOC(ldw * sizeof(double))) ) + ABORT("Malloc fails for dw[]"); + + /* Increment one to get 1-based indexing. */ + for (i = 0; i <= n; ++i) ++colptr[i]; + for (i = 0; i < nnz; ++i) ++adjncy[i]; +#if ( DEBUGlevel>=2 ) + printf("LDPERM(): n %d, nnz %d\n", n, nnz); + slu_PrintInt10("colptr", n+1, colptr); + slu_PrintInt10("adjncy", nnz, adjncy); +#endif + + /* + * NOTE: + * ===== + * + * MC64AD assumes that column permutation vector is defined as: + * perm(i) = j means column i of permuted A is in column j of original A. + * + * Since a symmetric permutation preserves the diagonal entries. Then + * by the following relation: + * P'(A*P')P = P'A + * we can apply inverse(perm) to rows of A to get large diagonal entries. + * But, since 'perm' defined in MC64AD happens to be the reverse of + * SuperLU's definition of permutation vector, therefore, it is already + * an inverse for our purpose. We will thus use it directly. + * + */ + mc64id_(icntl); +#if 0 + /* Suppress error and warning messages. */ + icntl[0] = -1; + icntl[1] = -1; +#endif + + for (i = 0; i < nnz; ++i) nzval_d[i] = z_abs1(&nzval[i]); + mc64ad_(&job, &n, &nnz, colptr, adjncy, nzval_d, &num, perm, + &liw, iw, &ldw, dw, icntl, info); + +#if ( DEBUGlevel>=2 ) + slu_PrintInt10("perm", n, perm); + printf(".. After MC64AD info %d\tsize of matching %d\n", info[0], num); +#endif + if ( info[0] == 1 ) { /* Structurally singular */ + printf(".. The last %d permutations:\n", n-num); + slu_PrintInt10("perm", n-num, &perm[num]); + } + + /* Restore to 0-based indexing. */ + for (i = 0; i <= n; ++i) --colptr[i]; + for (i = 0; i < nnz; ++i) --adjncy[i]; + for (i = 0; i < n; ++i) --perm[i]; + + if ( job == 5 ) + for (i = 0; i < n; ++i) { + u[i] = dw[i]; + v[i] = dw[n+i]; + } + + SUPERLU_FREE(iw); + SUPERLU_FREE(dw); + SUPERLU_FREE(nzval_d); + +#if ( DEBUGlevel>=1 ) + CHECK_MALLOC(0, "Exit zldperm()"); +#endif + + return info[0]; +} Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zmemory.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zmemory.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zmemory.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,54 +1,32 @@ -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 +/*! @file zmemory.c + * \brief Memory details * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
*/ -#include "zsp_defs.h" +#include "slu_zdefs.h" -/* Constants */ -#define NO_MEMTYPE 4 /* 0: lusup; - 1: ucol; - 2: lsub; - 3: usub */ -#define GluIntArray(n) (5 * (n) + 5) /* Internal prototypes */ void *zexpand (int *, MemType,int, int, GlobalLU_t *); -int zLUWorkInit (int, int, int, int **, doublecomplex **, LU_space_t); +int zLUWorkInit (int, int, int, int **, doublecomplex **, GlobalLU_t *); void copy_mem_doublecomplex (int, void *, void *); void zStackCompress (GlobalLU_t *); -void zSetupSpace (void *, int, LU_space_t *); -void *zuser_malloc (int, int); -void zuser_free (int, int); +void zSetupSpace (void *, int, GlobalLU_t *); +void *zuser_malloc (int, int, GlobalLU_t *); +void zuser_free (int, int, GlobalLU_t *); -/* External prototypes (in memory.c - prec-indep) */ +/* External prototypes (in memory.c - prec-independent) */ extern void copy_mem_int (int, void *, void *); extern void user_bcopy (char *, char *, int); -/* Headers for 4 types of dynamatically managed memory */ -typedef struct e_node { - int size; /* length of the memory that has been used */ - void *mem; /* pointer to the new malloc'd store */ -} ExpHeader; -typedef struct { - int size; - int used; - int top1; /* grow upward, relative to &array[0] */ - int top2; /* grow downward */ - void *array; -} LU_stack_t; - -/* Variables local to this file */ -static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */ -static LU_stack_t stack; -static int no_expand; - /* Macros to manipulate stack */ -#define StackFull(x) ( x + stack.used >= stack.size ) +#define StackFull(x) ( x + Glu->stack.used >= Glu->stack.size ) #define NotDoubleAlign(addr) ( (long int)addr & 7 ) #define DoubleAlign(addr) ( ((long int)addr + 7) & ~7L ) #define TempSpace(m, w) ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \ @@ -58,66 +36,67 @@ -/* - * Setup the memory model to be used for factorization. +/*! \brief Setup the memory model to be used for factorization. + * * lwork = 0: use system malloc; * lwork > 0: use user-supplied work[] space. */ -void zSetupSpace(void *work, int lwork, LU_space_t *MemModel) +void zSetupSpace(void *work, int lwork, GlobalLU_t *Glu) { if ( lwork == 0 ) { - *MemModel = SYSTEM; /* malloc/free */ + Glu->MemModel = SYSTEM; /* malloc/free */ } else if ( lwork > 0 ) { - *MemModel = USER; /* user provided space */ - stack.used = 0; - stack.top1 = 0; - stack.top2 = (lwork/4)*4; /* must be word addressable */ - stack.size = stack.top2; - stack.array = (void *) work; + Glu->MemModel = USER; /* user provided space */ + Glu->stack.used = 0; + Glu->stack.top1 = 0; + Glu->stack.top2 = (lwork/4)*4; /* must be word addressable */ + Glu->stack.size = Glu->stack.top2; + Glu->stack.array = (void *) work; } } -void *zuser_malloc(int bytes, int which_end) +void *zuser_malloc(int bytes, int which_end, GlobalLU_t *Glu) { void *buf; if ( StackFull(bytes) ) return (NULL); if ( which_end == HEAD ) { - buf = (char*) stack.array + stack.top1; - stack.top1 += bytes; + buf = (char*) Glu->stack.array + Glu->stack.top1; + Glu->stack.top1 += bytes; } else { - stack.top2 -= bytes; - buf = (char*) stack.array + stack.top2; + Glu->stack.top2 -= bytes; + buf = (char*) Glu->stack.array + Glu->stack.top2; } - stack.used += bytes; + Glu->stack.used += bytes; return buf; } -void zuser_free(int bytes, int which_end) +void zuser_free(int bytes, int which_end, GlobalLU_t *Glu) { if ( which_end == HEAD ) { - stack.top1 -= bytes; + Glu->stack.top1 -= bytes; } else { - stack.top2 += bytes; + Glu->stack.top2 += bytes; } - stack.used -= bytes; + Glu->stack.used -= bytes; } -/* +/*! \brief + * + *
  * mem_usage consists of the following fields:
  *    - for_lu (float)
  *      The amount of space used in bytes for the L\U data structures.
  *    - total_needed (float)
  *      The amount of space needed in bytes to perform factorization.
- *    - expansions (int)
- *      Number of memory expansions during the LU factorization.
+ * 
*/ int zQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage) { @@ -132,33 +111,75 @@ dword = sizeof(doublecomplex); /* For LU factors */ - mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] * - dword + Lstore->rowind_colptr[n] * iword ); - mem_usage->for_lu += (float)( (n + 1) * iword + + mem_usage->for_lu = (float)( (4.0*n + 3.0) * iword + + Lstore->nzval_colptr[n] * dword + + Lstore->rowind_colptr[n] * iword ); + mem_usage->for_lu += (float)( (n + 1.0) * iword + Ustore->colptr[n] * (dword + iword) ); /* Working storage to support factorization */ mem_usage->total_needed = mem_usage->for_lu + - (float)( (2 * panel_size + 4 + NO_MARKER) * n * iword + - (panel_size + 1) * n * dword ); + (float)( (2.0 * panel_size + 4.0 + NO_MARKER) * n * iword + + (panel_size + 1.0) * n * dword ); - mem_usage->expansions = --no_expand; - return 0; } /* zQuerySpace */ -/* - * Allocate storage for the data structures common to all factor routines. - * For those unpredictable size, make a guess as FILL * nnz(A). + +/*! \brief + * + *
+ * mem_usage consists of the following fields:
+ *    - for_lu (float)
+ *      The amount of space used in bytes for the L\U data structures.
+ *    - total_needed (float)
+ *      The amount of space needed in bytes to perform factorization.
+ * 
+ */ +int ilu_zQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage) +{ + SCformat *Lstore; + NCformat *Ustore; + register int n, panel_size = sp_ienv(1); + register float iword, dword; + + Lstore = L->Store; + Ustore = U->Store; + n = L->ncol; + iword = sizeof(int); + dword = sizeof(double); + + /* For LU factors */ + mem_usage->for_lu = (float)( (4.0f * n + 3.0f) * iword + + Lstore->nzval_colptr[n] * dword + + Lstore->rowind_colptr[n] * iword ); + mem_usage->for_lu += (float)( (n + 1.0f) * iword + + Ustore->colptr[n] * (dword + iword) ); + + /* Working storage to support factorization. + ILU needs 5*n more integers than LU */ + mem_usage->total_needed = mem_usage->for_lu + + (float)( (2.0f * panel_size + 9.0f + NO_MARKER) * n * iword + + (panel_size + 1.0f) * n * dword ); + + return 0; +} /* ilu_zQuerySpace */ + + +/*! \brief Allocate storage for the data structures common to all factor routines. + * + *
+ * For those unpredictable size, estimate as fill_ratio * nnz(A).
  * Return value:
  *     If lwork = -1, return the estimated amount of space required, plus n;
  *     otherwise, return the amount of space actually allocated when
  *     memory allocation failure occurred.
+ * 
*/ int zLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz, - int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu, - int **iwork, doublecomplex **dwork) + int panel_size, double fill_ratio, SuperMatrix *L, SuperMatrix *U, + GlobalLU_t *Glu, int **iwork, doublecomplex **dwork) { int info, iword, dword; SCformat *Lstore; @@ -170,32 +191,33 @@ doublecomplex *ucol; int *usub, *xusub; int nzlmax, nzumax, nzlumax; - int FILL = sp_ienv(6); - Glu->n = n; - no_expand = 0; iword = sizeof(int); dword = sizeof(doublecomplex); + Glu->n = n; + Glu->num_expansions = 0; - if ( !expanders ) - expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader)); - if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders"); + if ( !Glu->expanders ) + Glu->expanders = (ExpHeader*)SUPERLU_MALLOC( NO_MEMTYPE * + sizeof(ExpHeader) ); + if ( !Glu->expanders ) ABORT("SUPERLU_MALLOC fails for expanders"); if ( fact != SamePattern_SameRowPerm ) { /* Guess for L\U factors */ - nzumax = nzlumax = FILL * annz; - nzlmax = SUPERLU_MAX(1, FILL/4.) * annz; + nzumax = nzlumax = fill_ratio * annz; + nzlmax = SUPERLU_MAX(1, fill_ratio/4.) * annz; if ( lwork == -1 ) { return ( GluIntArray(n) * iword + TempSpace(m, panel_size) + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n ); } else { - zSetupSpace(work, lwork, &Glu->MemModel); + zSetupSpace(work, lwork, Glu); } -#ifdef DEBUG - printf("zLUMemInit() called: annz %d, MemModel %d\n", - annz, Glu->MemModel); +#if ( PRNTlevel >= 1 ) + printf("zLUMemInit() called: fill_ratio %ld, nzlmax %ld, nzumax %ld\n", + fill_ratio, nzlmax, nzumax); + fflush(stdout); #endif /* Integer pointers for L\U factors */ @@ -206,11 +228,11 @@ xlusup = intMalloc(n+1); xusub = intMalloc(n+1); } else { - xsup = (int *)zuser_malloc((n+1) * iword, HEAD); - supno = (int *)zuser_malloc((n+1) * iword, HEAD); - xlsub = (int *)zuser_malloc((n+1) * iword, HEAD); - xlusup = (int *)zuser_malloc((n+1) * iword, HEAD); - xusub = (int *)zuser_malloc((n+1) * iword, HEAD); + xsup = (int *)zuser_malloc((n+1) * iword, HEAD, Glu); + supno = (int *)zuser_malloc((n+1) * iword, HEAD, Glu); + xlsub = (int *)zuser_malloc((n+1) * iword, HEAD, Glu); + xlusup = (int *)zuser_malloc((n+1) * iword, HEAD, Glu); + xusub = (int *)zuser_malloc((n+1) * iword, HEAD, Glu); } lusup = (doublecomplex *) zexpand( &nzlumax, LUSUP, 0, 0, Glu ); @@ -225,7 +247,8 @@ SUPERLU_FREE(lsub); SUPERLU_FREE(usub); } else { - zuser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD); + zuser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, + HEAD, Glu); } nzlumax /= 2; nzumax /= 2; @@ -234,6 +257,11 @@ printf("Not enough memory to perform factorization.\n"); return (zmemory_usage(nzlmax, nzumax, nzlumax, n) + n); } +#if ( PRNTlevel >= 1) + printf("zLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", + nzlmax, nzumax); + fflush(stdout); +#endif lusup = (doublecomplex *) zexpand( &nzlumax, LUSUP, 0, 0, Glu ); ucol = (doublecomplex *) zexpand( &nzumax, UCOL, 0, 0, Glu ); lsub = (int *) zexpand( &nzlmax, LSUB, 0, 0, Glu ); @@ -260,18 +288,18 @@ Glu->MemModel = SYSTEM; } else { Glu->MemModel = USER; - stack.top2 = (lwork/4)*4; /* must be word-addressable */ - stack.size = stack.top2; + Glu->stack.top2 = (lwork/4)*4; /* must be word-addressable */ + Glu->stack.size = Glu->stack.top2; } - lsub = expanders[LSUB].mem = Lstore->rowind; - lusup = expanders[LUSUP].mem = Lstore->nzval; - usub = expanders[USUB].mem = Ustore->rowind; - ucol = expanders[UCOL].mem = Ustore->nzval;; - expanders[LSUB].size = nzlmax; - expanders[LUSUP].size = nzlumax; - expanders[USUB].size = nzumax; - expanders[UCOL].size = nzumax; + lsub = Glu->expanders[LSUB].mem = Lstore->rowind; + lusup = Glu->expanders[LUSUP].mem = Lstore->nzval; + usub = Glu->expanders[USUB].mem = Ustore->rowind; + ucol = Glu->expanders[UCOL].mem = Ustore->nzval;; + Glu->expanders[LSUB].size = nzlmax; + Glu->expanders[LUSUP].size = nzlumax; + Glu->expanders[USUB].size = nzumax; + Glu->expanders[UCOL].size = nzumax; } Glu->xsup = xsup; @@ -287,20 +315,20 @@ Glu->nzumax = nzumax; Glu->nzlumax = nzlumax; - info = zLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel); + info = zLUWorkInit(m, n, panel_size, iwork, dwork, Glu); if ( info ) return ( info + zmemory_usage(nzlmax, nzumax, nzlumax, n) + n); - ++no_expand; + ++Glu->num_expansions; return 0; } /* zLUMemInit */ -/* Allocate known working storage. Returns 0 if success, otherwise +/*! \brief Allocate known working storage. Returns 0 if success, otherwise returns the number of bytes allocated so far when failure occurred. */ int zLUWorkInit(int m, int n, int panel_size, int **iworkptr, - doublecomplex **dworkptr, LU_space_t MemModel) + doublecomplex **dworkptr, GlobalLU_t *Glu) { int isize, dsize, extra; doublecomplex *old_ptr; @@ -311,19 +339,19 @@ dsize = (m * panel_size + NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(doublecomplex); - if ( MemModel == SYSTEM ) + if ( Glu->MemModel == SYSTEM ) *iworkptr = (int *) intCalloc(isize/sizeof(int)); else - *iworkptr = (int *) zuser_malloc(isize, TAIL); + *iworkptr = (int *) zuser_malloc(isize, TAIL, Glu); if ( ! *iworkptr ) { fprintf(stderr, "zLUWorkInit: malloc fails for local iworkptr[]\n"); return (isize + n); } - if ( MemModel == SYSTEM ) + if ( Glu->MemModel == SYSTEM ) *dworkptr = (doublecomplex *) SUPERLU_MALLOC(dsize); else { - *dworkptr = (doublecomplex *) zuser_malloc(dsize, TAIL); + *dworkptr = (doublecomplex *) zuser_malloc(dsize, TAIL, Glu); if ( NotDoubleAlign(*dworkptr) ) { old_ptr = *dworkptr; *dworkptr = (doublecomplex*) DoubleAlign(*dworkptr); @@ -332,8 +360,8 @@ #ifdef DEBUG printf("zLUWorkInit: not aligned, extra %d\n", extra); #endif - stack.top2 -= extra; - stack.used += extra; + Glu->stack.top2 -= extra; + Glu->stack.used += extra; } } if ( ! *dworkptr ) { @@ -345,8 +373,7 @@ } -/* - * Set up pointers for real working arrays. +/*! \brief Set up pointers for real working arrays. */ void zSetRWork(int m, int panel_size, doublecomplex *dworkptr, @@ -362,8 +389,7 @@ zfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero); } -/* - * Free the working storage used by factor routines. +/*! \brief Free the working storage used by factor routines. */ void zLUWorkFree(int *iwork, doublecomplex *dwork, GlobalLU_t *Glu) { @@ -371,18 +397,21 @@ SUPERLU_FREE (iwork); SUPERLU_FREE (dwork); } else { - stack.used -= (stack.size - stack.top2); - stack.top2 = stack.size; + Glu->stack.used -= (Glu->stack.size - Glu->stack.top2); + Glu->stack.top2 = Glu->stack.size; /* zStackCompress(Glu); */ } - SUPERLU_FREE (expanders); - expanders = 0; + SUPERLU_FREE (Glu->expanders); + Glu->expanders = NULL; } -/* Expand the data structures for L and U during the factorization. +/*! \brief Expand the data structures for L and U during the factorization. + * + *
  * Return value:   0 - successful return
  *               > 0 - number of bytes allocated when run out of space
+ * 
*/ int zLUMemXpand(int jcol, @@ -446,8 +475,7 @@ for (i = 0; i < howmany; i++) dnew[i] = dold[i]; } -/* - * Expand the existing storage to accommodate more fill-ins. +/*! \brief Expand the existing storage to accommodate more fill-ins. */ void *zexpand ( @@ -463,12 +491,14 @@ float alpha; void *new_mem, *old_mem; int new_len, tries, lword, extra, bytes_to_copy; + ExpHeader *expanders = Glu->expanders; /* Array of 4 types of memory */ alpha = EXPAND; - if ( no_expand == 0 || keep_prev ) /* First time allocate requested */ + if ( Glu->num_expansions == 0 || keep_prev ) { + /* First time allocate requested */ new_len = *prev_len; - else { + } else { new_len = alpha * *prev_len; } @@ -476,9 +506,8 @@ else lword = sizeof(doublecomplex); if ( Glu->MemModel == SYSTEM ) { - new_mem = (void *) SUPERLU_MALLOC(new_len * lword); -/* new_mem = (void *) calloc(new_len, lword); */ - if ( no_expand != 0 ) { + new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword); + if ( Glu->num_expansions != 0 ) { tries = 0; if ( keep_prev ) { if ( !new_mem ) return (NULL); @@ -487,8 +516,7 @@ if ( ++tries > 10 ) return (NULL); alpha = Reduce(alpha); new_len = alpha * *prev_len; - new_mem = (void *) SUPERLU_MALLOC(new_len * lword); -/* new_mem = (void *) calloc(new_len, lword); */ + new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword); } } if ( type == LSUB || type == USUB ) { @@ -501,8 +529,8 @@ expanders[type].mem = (void *) new_mem; } else { /* MemModel == USER */ - if ( no_expand == 0 ) { - new_mem = zuser_malloc(new_len * lword, HEAD); + if ( Glu->num_expansions == 0 ) { + new_mem = zuser_malloc(new_len * lword, HEAD, Glu); if ( NotDoubleAlign(new_mem) && (type == LUSUP || type == UCOL) ) { old_mem = new_mem; @@ -511,12 +539,11 @@ #ifdef DEBUG printf("expand(): not aligned, extra %d\n", extra); #endif - stack.top1 += extra; - stack.used += extra; + Glu->stack.top1 += extra; + Glu->stack.used += extra; } expanders[type].mem = (void *) new_mem; - } - else { + } else { tries = 0; extra = (new_len - *prev_len) * lword; if ( keep_prev ) { @@ -532,7 +559,7 @@ if ( type != USUB ) { new_mem = (void*)((char*)expanders[type + 1].mem + extra); - bytes_to_copy = (char*)stack.array + stack.top1 + bytes_to_copy = (char*)Glu->stack.array + Glu->stack.top1 - (char*)expanders[type + 1].mem; user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy); @@ -548,11 +575,11 @@ Glu->ucol = expanders[UCOL].mem = (void*)((char*)expanders[UCOL].mem + extra); } - stack.top1 += extra; - stack.used += extra; + Glu->stack.top1 += extra; + Glu->stack.used += extra; if ( type == UCOL ) { - stack.top1 += extra; /* Add same amount for USUB */ - stack.used += extra; + Glu->stack.top1 += extra; /* Add same amount for USUB */ + Glu->stack.used += extra; } } /* if ... */ @@ -562,15 +589,14 @@ expanders[type].size = new_len; *prev_len = new_len; - if ( no_expand ) ++no_expand; + if ( Glu->num_expansions ) ++Glu->num_expansions; return (void *) expanders[type].mem; } /* zexpand */ -/* - * Compress the work[] array to remove fragmentation. +/*! \brief Compress the work[] array to remove fragmentation. */ void zStackCompress(GlobalLU_t *Glu) @@ -610,9 +636,9 @@ usub = ito; last = (char*)usub + xusub[ndim] * iword; - fragment = (char*) (((char*)stack.array + stack.top1) - last); - stack.used -= (long int) fragment; - stack.top1 -= (long int) fragment; + fragment = (char*) (((char*)Glu->stack.array + Glu->stack.top1) - last); + Glu->stack.used -= (long int) fragment; + Glu->stack.top1 -= (long int) fragment; Glu->ucol = ucol; Glu->lsub = lsub; @@ -626,8 +652,7 @@ } -/* - * Allocate storage for original matrix A +/*! \brief Allocate storage for original matrix A */ void zallocateA(int n, int nnz, doublecomplex **a, int **asub, int **xa) @@ -641,7 +666,7 @@ doublecomplex *doublecomplexMalloc(int n) { doublecomplex *buf; - buf = (doublecomplex *) SUPERLU_MALLOC(n * sizeof(doublecomplex)); + buf = (doublecomplex *) SUPERLU_MALLOC((size_t)n * sizeof(doublecomplex)); if ( !buf ) { ABORT("SUPERLU_MALLOC failed for buf in doublecomplexMalloc()\n"); } @@ -653,7 +678,7 @@ doublecomplex *buf; register int i; doublecomplex zero = {0.0, 0.0}; - buf = (doublecomplex *) SUPERLU_MALLOC(n * sizeof(doublecomplex)); + buf = (doublecomplex *) SUPERLU_MALLOC((size_t)n * sizeof(doublecomplex)); if ( !buf ) { ABORT("SUPERLU_MALLOC failed for buf in doublecomplexCalloc()\n"); } Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpanel_bmod.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpanel_bmod.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpanel_bmod.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,27 +1,32 @@ -/* +/*! @file zpanel_bmod.c + * \brief Performs numeric block updates + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ /* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. + */ #include #include -#include "zsp_defs.h" +#include "slu_zdefs.h" /* * Function prototypes @@ -30,6 +35,25 @@ void zmatvec(int, int, int, doublecomplex *, doublecomplex *, doublecomplex *); extern void zcheck_tempv(); +/*! \brief + * + *
+ * Purpose
+ * =======
+ *
+ *    Performs numeric block updates (sup-panel) in topological order.
+ *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
+ *    Special processing on the supernodal portion of L\U[*,j]
+ *
+ *    Before entering this routine, the original nonzeros in the panel 
+ *    were already copied into the spa[m,w].
+ *
+ *    Updated/Output parameters-
+ *    dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned 
+ *    collectively in the m-by-w vector dense[*]. 
+ * 
+ */ + void zpanel_bmod ( const int m, /* in - number of rows in the matrix */ @@ -44,23 +68,8 @@ SuperLUStat_t *stat /* output */ ) { -/* - * Purpose - * ======= - * - * Performs numeric block updates (sup-panel) in topological order. - * It features: col-col, 2cols-col, 3cols-col, and sup-col updates. - * Special processing on the supernodal portion of L\U[*,j] - * - * Before entering this routine, the original nonzeros in the panel - * were already copied into the spa[m,w]. - * - * Updated/Output parameters- - * dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned - * collectively in the m-by-w vector dense[*]. - * - */ + #ifdef USE_VENDOR_BLAS #ifdef _CRAY _fcd ftcs1 = _cptofcd("L", strlen("L")), Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpanel_dfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpanel_dfs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpanel_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,48 +1,32 @@ - -/* +/*! @file zpanel_dfs.c + * \brief Peforms a symbolic factorization on a panel of symbols + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "zsp_defs.h" -#include "util.h" -void -zpanel_dfs ( - const int m, /* in - number of rows in the matrix */ - const int w, /* in */ - const int jcol, /* in */ - SuperMatrix *A, /* in - original matrix */ - int *perm_r, /* in */ - int *nseg, /* out */ - doublecomplex *dense, /* out */ - int *panel_lsub, /* out */ - int *segrep, /* out */ - int *repfnz, /* out */ - int *xprune, /* out */ - int *marker, /* out */ - int *parent, /* working array */ - int *xplore, /* working array */ - GlobalLU_t *Glu /* modified */ - ) -{ -/* +#include "slu_zdefs.h" + +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -68,8 +52,29 @@
  *   repfnz: SuperA-col --> PA-row
  *   parent: SuperA-col --> SuperA-col
  *   xplore: SuperA-col --> index to L-structure
- *
+ * 
*/ + +void +zpanel_dfs ( + const int m, /* in - number of rows in the matrix */ + const int w, /* in */ + const int jcol, /* in */ + SuperMatrix *A, /* in - original matrix */ + int *perm_r, /* in */ + int *nseg, /* out */ + doublecomplex *dense, /* out */ + int *panel_lsub, /* out */ + int *segrep, /* out */ + int *repfnz, /* out */ + int *xprune, /* out */ + int *marker, /* out */ + int *parent, /* working array */ + int *xplore, /* working array */ + GlobalLU_t *Glu /* modified */ + ) +{ + NCPformat *Astore; doublecomplex *a; int *asub; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpivotL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpivotL.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpivotL.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,44 +1,36 @@ -/* +/*! @file zpivotL.c + * \brief Performs numerical pivoting + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ + #include #include -#include "zsp_defs.h" +#include "slu_zdefs.h" #undef DEBUG -int -zpivotL( - const int jcol, /* in */ - const double u, /* in - diagonal pivoting threshold */ - int *usepr, /* re-use the pivot sequence given by perm_r/iperm_r */ - int *perm_r, /* may be modified */ - int *iperm_r, /* in - inverse of perm_r */ - int *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */ - int *pivrow, /* out */ - GlobalLU_t *Glu, /* modified - global LU data structures */ - SuperLUStat_t *stat /* output */ - ) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *   Performs the numerical pivoting on the current column of L,
@@ -57,8 +49,23 @@
  *
  *   Return value: 0      success;
  *                 i > 0  U(i,i) is exactly zero.
- *
+ * 
*/ + +int +zpivotL( + const int jcol, /* in */ + const double u, /* in - diagonal pivoting threshold */ + int *usepr, /* re-use the pivot sequence given by perm_r/iperm_r */ + int *perm_r, /* may be modified */ + int *iperm_r, /* in - inverse of perm_r */ + int *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */ + int *pivrow, /* out */ + GlobalLU_t *Glu, /* modified - global LU data structures */ + SuperLUStat_t *stat /* output */ + ) +{ + doublecomplex one = {1.0, 0.0}; int fsupc; /* first column in the supernode */ int nsupc; /* no of columns in the supernode */ @@ -117,8 +124,12 @@ /* Test for singularity */ if ( pivmax == 0.0 ) { +#if 1 *pivrow = lsub_ptr[pivptr]; perm_r[*pivrow] = jcol; +#else + perm_r[diagind] = jcol; +#endif *usepr = 0; return (jcol+1); } Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpivotgrowth.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpivotgrowth.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpivotgrowth.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,21 +1,20 @@ - -/* +/*! @file zpivotgrowth.c + * \brief Computes the reciprocal pivot growth factor + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
- *
+ * 
*/ #include -#include "zsp_defs.h" -#include "util.h" +#include "slu_zdefs.h" -double -zPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, - SuperMatrix *L, SuperMatrix *U) -{ -/* +/*! \brief + * + *
  * Purpose
  * =======
  *
@@ -43,8 +42,14 @@
  *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
  *          storage scheme, i.e., U has types: Stype = NC;
  *          Dtype = SLU_Z; Mtype = TRU.
- *
+ * 
*/ + +double +zPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, + SuperMatrix *L, SuperMatrix *U) +{ + NCformat *Astore; SCformat *Lstore; NCformat *Ustore; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpruneL.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpruneL.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zpruneL.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,28 +1,39 @@ - -/* +/*! @file zpruneL.c + * \brief Prunes the L-structure + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ *
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "zsp_defs.h" -#include "util.h" +#include "slu_zdefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *   Prunes the L-structure of supernodes whose L-structure
+ *   contains the current pivot row "pivrow"
+ * 
+ */ + void zpruneL( const int jcol, /* in */ @@ -35,13 +46,7 @@ GlobalLU_t *Glu /* modified - global LU data structures */ ) { -/* - * Purpose - * ======= - * Prunes the L-structure of supernodes whose L-structure - * contains the current pivot row "pivrow" - * - */ + doublecomplex utemp; int jsupno, irep, irep1, kmin, kmax, krow, movnum; int i, ktemp, minloc, maxloc; @@ -108,8 +113,8 @@ kmax--; else if ( perm_r[lsub[kmin]] != EMPTY ) kmin++; - else { /* kmin below pivrow, and kmax above pivrow: - * interchange the two subscripts + else { /* kmin below pivrow (not yet pivoted), and kmax + * above pivrow: interchange the two subscripts */ ktemp = lsub[kmin]; lsub[kmin] = lsub[kmax]; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zreadhb.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zreadhb.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zreadhb.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,18 +1,85 @@ - -/* +/*! @file zreadhb.c + * \brief Read a matrix stored in Harwell-Boeing format + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Purpose
+ * =======
+ * 
+ * Read a DOUBLE COMPLEX PRECISION matrix stored in Harwell-Boeing format 
+ * as described below.
+ * 
+ * Line 1 (A72,A8) 
+ *  	Col. 1 - 72   Title (TITLE) 
+ *	Col. 73 - 80  Key (KEY) 
+ * 
+ * Line 2 (5I14) 
+ * 	Col. 1 - 14   Total number of lines excluding header (TOTCRD) 
+ * 	Col. 15 - 28  Number of lines for pointers (PTRCRD) 
+ * 	Col. 29 - 42  Number of lines for row (or variable) indices (INDCRD) 
+ * 	Col. 43 - 56  Number of lines for numerical values (VALCRD) 
+ *	Col. 57 - 70  Number of lines for right-hand sides (RHSCRD) 
+ *                    (including starting guesses and solution vectors 
+ *		       if present) 
+ *           	      (zero indicates no right-hand side data is present) 
+ *
+ * Line 3 (A3, 11X, 4I14) 
+ *   	Col. 1 - 3    Matrix type (see below) (MXTYPE) 
+ * 	Col. 15 - 28  Number of rows (or variables) (NROW) 
+ * 	Col. 29 - 42  Number of columns (or elements) (NCOL) 
+ *	Col. 43 - 56  Number of row (or variable) indices (NNZERO) 
+ *	              (equal to number of entries for assembled matrices) 
+ * 	Col. 57 - 70  Number of elemental matrix entries (NELTVL) 
+ *	              (zero in the case of assembled matrices) 
+ * Line 4 (2A16, 2A20) 
+ * 	Col. 1 - 16   Format for pointers (PTRFMT) 
+ *	Col. 17 - 32  Format for row (or variable) indices (INDFMT) 
+ *	Col. 33 - 52  Format for numerical values of coefficient matrix (VALFMT) 
+ * 	Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) 
+ *
+ * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present 
+ *    	Col. 1 	      Right-hand side type: 
+ *	         	  F for full storage or M for same format as matrix 
+ *    	Col. 2        G if a starting vector(s) (Guess) is supplied. (RHSTYP) 
+ *    	Col. 3        X if an exact solution vector(s) is supplied. 
+ *	Col. 15 - 28  Number of right-hand sides (NRHS) 
+ *	Col. 29 - 42  Number of row indices (NRHSIX) 
+ *          	      (ignored in case of unassembled matrices) 
+ *
+ * The three character type field on line 3 describes the matrix type. 
+ * The following table lists the permitted values for each of the three 
+ * characters. As an example of the type field, RSA denotes that the matrix 
+ * is real, symmetric, and assembled. 
+ *
+ * First Character: 
+ *	R Real matrix 
+ *	C Complex matrix 
+ *	P Pattern only (no numerical values supplied) 
+ *
+ * Second Character: 
+ *	S Symmetric 
+ *	U Unsymmetric 
+ *	H Hermitian 
+ *	Z Skew symmetric 
+ *	R Rectangular 
+ *
+ * Third Character: 
+ *	A Assembled 
+ *	E Elemental matrices (unassembled) 
+ *
+ * 
*/ #include #include -#include "zsp_defs.h" +#include "slu_zdefs.h" -/* Eat up the rest of the current line */ +/*! \brief Eat up the rest of the current line */ int zDumpLine(FILE *fp) { register int c; @@ -60,7 +127,7 @@ return 0; } -int zReadVector(FILE *fp, int n, int *where, int perline, int persize) +static int ReadVector(FILE *fp, int n, int *where, int perline, int persize) { register int i, j, item; char tmp, buf[100]; @@ -80,7 +147,7 @@ return 0; } -/* Read complex numbers as pairs of (real, imaginary) */ +/*! \brief Read complex numbers as pairs of (real, imaginary) */ int zReadValues(FILE *fp, int n, doublecomplex *destination, int perline, int persize) { register int i, j, k, s, pair; @@ -118,72 +185,6 @@ zreadhb(int *nrow, int *ncol, int *nonz, doublecomplex **nzval, int **rowind, int **colptr) { -/* - * Purpose - * ======= - * - * Read a DOUBLE COMPLEX PRECISION matrix stored in Harwell-Boeing format - * as described below. - * - * Line 1 (A72,A8) - * Col. 1 - 72 Title (TITLE) - * Col. 73 - 80 Key (KEY) - * - * Line 2 (5I14) - * Col. 1 - 14 Total number of lines excluding header (TOTCRD) - * Col. 15 - 28 Number of lines for pointers (PTRCRD) - * Col. 29 - 42 Number of lines for row (or variable) indices (INDCRD) - * Col. 43 - 56 Number of lines for numerical values (VALCRD) - * Col. 57 - 70 Number of lines for right-hand sides (RHSCRD) - * (including starting guesses and solution vectors - * if present) - * (zero indicates no right-hand side data is present) - * - * Line 3 (A3, 11X, 4I14) - * Col. 1 - 3 Matrix type (see below) (MXTYPE) - * Col. 15 - 28 Number of rows (or variables) (NROW) - * Col. 29 - 42 Number of columns (or elements) (NCOL) - * Col. 43 - 56 Number of row (or variable) indices (NNZERO) - * (equal to number of entries for assembled matrices) - * Col. 57 - 70 Number of elemental matrix entries (NELTVL) - * (zero in the case of assembled matrices) - * Line 4 (2A16, 2A20) - * Col. 1 - 16 Format for pointers (PTRFMT) - * Col. 17 - 32 Format for row (or variable) indices (INDFMT) - * Col. 33 - 52 Format for numerical values of coefficient matrix (VALFMT) - * Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) - * - * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present - * Col. 1 Right-hand side type: - * F for full storage or M for same format as matrix - * Col. 2 G if a starting vector(s) (Guess) is supplied. (RHSTYP) - * Col. 3 X if an exact solution vector(s) is supplied. - * Col. 15 - 28 Number of right-hand sides (NRHS) - * Col. 29 - 42 Number of row indices (NRHSIX) - * (ignored in case of unassembled matrices) - * - * The three character type field on line 3 describes the matrix type. - * The following table lists the permitted values for each of the three - * characters. As an example of the type field, RSA denotes that the matrix - * is real, symmetric, and assembled. - * - * First Character: - * R Real matrix - * C Complex matrix - * P Pattern only (no numerical values supplied) - * - * Second Character: - * S Symmetric - * U Unsymmetric - * H Hermitian - * Z Skew symmetric - * R Rectangular - * - * Third Character: - * A Assembled - * E Elemental matrices (unassembled) - * - */ register int i, numer_lines = 0, rhscrd = 0; int tmp, colnum, colsize, rownum, rowsize, valnum, valsize; @@ -254,8 +255,8 @@ printf("valnum %d, valsize %d\n", valnum, valsize); #endif - zReadVector(fp, *ncol+1, *colptr, colnum, colsize); - zReadVector(fp, *nonz, *rowind, rownum, rowsize); + ReadVector(fp, *ncol+1, *colptr, colnum, colsize); + ReadVector(fp, *nonz, *rowind, rownum, rowsize); if ( numer_lines ) { zReadValues(fp, *nonz, *nzval, valnum, valsize); } Added: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zreadrb.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zreadrb.c (rev 0) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zreadrb.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -0,0 +1,246 @@ + +/*! @file zreadrb.c + * \brief Read a matrix stored in Rutherford-Boeing format + * + *
+ * -- SuperLU routine (version 4.0) --
+ * Lawrence Berkeley National Laboratory.
+ * June 30, 2009
+ * 
+ * + * Purpose + * ======= + * + * Read a DOUBLE COMPLEX PRECISION matrix stored in Rutherford-Boeing format + * as described below. + * + * Line 1 (A72, A8) + * Col. 1 - 72 Title (TITLE) + * Col. 73 - 80 Matrix name / identifier (MTRXID) + * + * Line 2 (I14, 3(1X, I13)) + * Col. 1 - 14 Total number of lines excluding header (TOTCRD) + * Col. 16 - 28 Number of lines for pointers (PTRCRD) + * Col. 30 - 42 Number of lines for row (or variable) indices (INDCRD) + * Col. 44 - 56 Number of lines for numerical values (VALCRD) + * + * Line 3 (A3, 11X, 4(1X, I13)) + * Col. 1 - 3 Matrix type (see below) (MXTYPE) + * Col. 15 - 28 Compressed Column: Number of rows (NROW) + * Elemental: Largest integer used to index variable (MVAR) + * Col. 30 - 42 Compressed Column: Number of columns (NCOL) + * Elemental: Number of element matrices (NELT) + * Col. 44 - 56 Compressed Column: Number of entries (NNZERO) + * Elemental: Number of variable indeces (NVARIX) + * Col. 58 - 70 Compressed Column: Unused, explicitly zero + * Elemental: Number of elemental matrix entries (NELTVL) + * + * Line 4 (2A16, A20) + * Col. 1 - 16 Fortran format for pointers (PTRFMT) + * Col. 17 - 32 Fortran format for row (or variable) indices (INDFMT) + * Col. 33 - 52 Fortran format for numerical values of coefficient matrix + * (VALFMT) + * (blank in the case of matrix patterns) + * + * The three character type field on line 3 describes the matrix type. + * The following table lists the permitted values for each of the three + * characters. As an example of the type field, RSA denotes that the matrix + * is real, symmetric, and assembled. + * + * First Character: + * R Real matrix + * C Complex matrix + * I integer matrix + * P Pattern only (no numerical values supplied) + * Q Pattern only (numerical values supplied in associated auxiliary value + * file) + * + * Second Character: + * S Symmetric + * U Unsymmetric + * H Hermitian + * Z Skew symmetric + * R Rectangular + * + * Third Character: + * A Compressed column form + * E Elemental form + * + *
+ */ + +#include "slu_zdefs.h" + + +/*! \brief Eat up the rest of the current line */ +static int zDumpLine(FILE *fp) +{ + register int c; + while ((c = fgetc(fp)) != '\n') ; + return 0; +} + +static int zParseIntFormat(char *buf, int *num, int *size) +{ + char *tmp; + + tmp = buf; + while (*tmp++ != '(') ; + sscanf(tmp, "%d", num); + while (*tmp != 'I' && *tmp != 'i') ++tmp; + ++tmp; + sscanf(tmp, "%d", size); + return 0; +} + +static int zParseFloatFormat(char *buf, int *num, int *size) +{ + char *tmp, *period; + + tmp = buf; + while (*tmp++ != '(') ; + *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/ + while (*tmp != 'E' && *tmp != 'e' && *tmp != 'D' && *tmp != 'd' + && *tmp != 'F' && *tmp != 'f') { + /* May find kP before nE/nD/nF, like (1P6F13.6). In this case the + num picked up refers to P, which should be skipped. */ + if (*tmp=='p' || *tmp=='P') { + ++tmp; + *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/ + } else { + ++tmp; + } + } + ++tmp; + period = tmp; + while (*period != '.' && *period != ')') ++period ; + *period = '\0'; + *size = atoi(tmp); /*sscanf(tmp, "%2d", size);*/ + + return 0; +} + +static int ReadVector(FILE *fp, int n, int *where, int perline, int persize) +{ + register int i, j, item; + char tmp, buf[100]; + + i = 0; + while (i < n) { + fgets(buf, 100, fp); /* read a line at a time */ + for (j=0; j + * -- SuperLU routine (version 4.0) -- + * Lawrence Berkeley National Laboratory. + * June 30, 2009 + *
+ */ + +#include "slu_zdefs.h" + + +void +zreadtriple(int *m, int *n, int *nonz, + doublecomplex **nzval, int **rowind, int **colptr) +{ +/* + * Output parameters + * ================= + * (a,asub,xa): asub[*] contains the row subscripts of nonzeros + * in columns of matrix A; a[*] the numerical values; + * row i of A is given by a[k],k=xa[i],...,xa[i+1]-1. + * + */ + int j, k, jsize, nnz, nz; + doublecomplex *a, *val; + int *asub, *xa, *row, *col; + int zero_base = 0; + + /* Matrix format: + * First line: #rows, #cols, #non-zero + * Triplet in the rest of lines: + * row, col, value + */ + + scanf("%d%d", n, nonz); + *m = *n; + printf("m %d, n %d, nonz %d\n", *m, *n, *nonz); + zallocateA(*n, *nonz, nzval, rowind, colptr); /* Allocate storage */ + a = *nzval; + asub = *rowind; + xa = *colptr; + + val = (doublecomplex *) SUPERLU_MALLOC(*nonz * sizeof(doublecomplex)); + row = (int *) SUPERLU_MALLOC(*nonz * sizeof(int)); + col = (int *) SUPERLU_MALLOC(*nonz * sizeof(int)); + + for (j = 0; j < *n; ++j) xa[j] = 0; + + /* Read into the triplet array from a file */ + for (nnz = 0, nz = 0; nnz < *nonz; ++nnz) { + scanf("%d%d%lf%lf\n", &row[nz], &col[nz], &val[nz].r, &val[nz].i); + + if ( nnz == 0 ) { /* first nonzero */ + if ( row[0] == 0 || col[0] == 0 ) { + zero_base = 1; + printf("triplet file: row/col indices are zero-based.\n"); + } else + printf("triplet file: row/col indices are one-based.\n"); + } + + if ( !zero_base ) { + /* Change to 0-based indexing. */ + --row[nz]; + --col[nz]; + } + + if (row[nz] < 0 || row[nz] >= *m || col[nz] < 0 || col[nz] >= *n + /*|| val[nz] == 0.*/) { + fprintf(stderr, "nz %d, (%d, %d) = (%e,%e) out of bound, removed\n", + nz, row[nz], col[nz], val[nz].r, val[nz].i); + exit(-1); + } else { + ++xa[col[nz]]; + ++nz; + } + } + + *nonz = nz; + + /* Initialize the array of column pointers */ + k = 0; + jsize = xa[0]; + xa[0] = 0; + for (j = 1; j < *n; ++j) { + k += jsize; + jsize = xa[j]; + xa[j] = k; + } + + /* Copy the triplets into the column oriented storage */ + for (nz = 0; nz < *nonz; ++nz) { + j = col[nz]; + k = xa[j]; + asub[k] = row[nz]; + a[k] = val[nz]; + ++xa[j]; + } + + /* Reset the column pointers to the beginning of each column */ + for (j = *n; j > 0; --j) + xa[j] = xa[j-1]; + xa[0] = 0; + + SUPERLU_FREE(val); + SUPERLU_FREE(row); + SUPERLU_FREE(col); + +#ifdef CHK_INPUT + { + int i; + for (i = 0; i < *n; i++) { + printf("Col %d, xa %d\n", i, xa[i]); + for (k = xa[i]; k < xa[i+1]; k++) + printf("%d\t%16.10f\n", asub[k], a[k]); + } + } +#endif + +} + + +void zreadrhs(int m, doublecomplex *b) +{ + FILE *fp, *fopen(); + int i; + /*int j;*/ + + if ( !(fp = fopen("b.dat", "r")) ) { + fprintf(stderr, "dreadrhs: file does not exist\n"); + exit(-1); + } + for (i = 0; i < m; ++i) + fscanf(fp, "%lf%lf\n", &b[i].r, &b[i].i); + + /* readpair_(j, &b[i]);*/ + fclose(fp); +} Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsnode_bmod.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsnode_bmod.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsnode_bmod.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,29 +1,31 @@ -/* +/*! @file zsnode_bmod.c + * \brief Performs numeric block updates within the relaxed snode. + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "zsp_defs.h" +#include "slu_zdefs.h" -/* - * Performs numeric block updates within the relaxed snode. + +/*! \brief Performs numeric block updates within the relaxed snode. */ int zsnode_bmod ( Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsnode_dfs.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsnode_dfs.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsnode_dfs.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,28 +1,46 @@ - -/* +/*! @file zsnode_dfs.c + * \brief Determines the union of row structures of columns within the relaxed node + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ -#include "zsp_defs.h" -#include "util.h" +#include "slu_zdefs.h" + +/*! \brief + * + *
+ * Purpose
+ * =======
+ *    zsnode_dfs() - Determine the union of the row structures of those 
+ *    columns within the relaxed snode.
+ *    Note: The relaxed snodes are leaves of the supernodal etree, therefore, 
+ *    the portion outside the rectangular supernode must be zero.
+ *
+ * Return value
+ * ============
+ *     0   success;
+ *    >0   number of bytes allocated when run out of memory.
+ * 
+ */ + int zsnode_dfs ( const int jcol, /* in - start of the supernode */ @@ -35,19 +53,7 @@ GlobalLU_t *Glu /* modified */ ) { -/* Purpose - * ======= - * zsnode_dfs() - Determine the union of the row structures of those - * columns within the relaxed snode. - * Note: The relaxed snodes are leaves of the supernodal etree, therefore, - * the portion outside the rectangular supernode must be zero. - * - * Return value - * ============ - * 0 success; - * >0 number of bytes allocated when run out of memory. - * - */ + register int i, k, ifrom, ito, nextl, new_next; int nsuper, krow, kmark, mem_error; int *xsup, *supno; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsp_blas2.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsp_blas2.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsp_blas2.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,17 +1,20 @@ -/* +/*! @file zsp_blas2.c + * \brief Sparse BLAS 2, using some dense BLAS 2 operations + * + *
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * October 15, 2003
- *
+ * 
*/ /* * File name: zsp_blas2.c * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations. */ -#include "zsp_defs.h" +#include "slu_zdefs.h" /* * Function prototypes @@ -20,12 +23,9 @@ void zlsolve(int, int, doublecomplex*, doublecomplex*); void zmatvec(int, int, int, doublecomplex*, doublecomplex*, doublecomplex*); - -int -sp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, - SuperMatrix *U, doublecomplex *x, SuperLUStat_t *stat, int *info) -{ -/* +/*! \brief Solves one of the systems of equations A*x = b, or A'*x = b + * + *
  *   Purpose
  *   =======
  *
@@ -49,8 +49,8 @@
  *             On entry, trans specifies the equations to be solved as   
  *             follows:   
  *                trans = 'N' or 'n'   A*x = b.   
- *                trans = 'T' or 't'   A'*x = b.   
- *                trans = 'C' or 'c'   A'*x = b.   
+ *                trans = 'T' or 't'   A'*x = b.
+ *                trans = 'C' or 'c'   A^H*x = b.   
  *
  *   diag   - (input) char*
  *             On entry, diag specifies whether or not A is unit   
@@ -75,8 +75,12 @@
  *
  *   info    - (output) int*
  *             If *info = -i, the i-th argument had an illegal value.
- *
+ * 
*/ +int +sp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, + SuperMatrix *U, doublecomplex *x, SuperLUStat_t *stat, int *info) +{ #ifdef _CRAY _fcd ftcs1 = _cptofcd("L", strlen("L")), ftcs2 = _cptofcd("N", strlen("N")), @@ -85,8 +89,8 @@ SCformat *Lstore; NCformat *Ustore; doublecomplex *Lval, *Uval; + int incx = 1, incy = 1; doublecomplex temp; - int incx = 1, incy = 1; doublecomplex alpha = {1.0, 0.0}, beta = {1.0, 0.0}; doublecomplex comp_zero = {0.0, 0.0}; int nrow; @@ -98,7 +102,8 @@ /* Test the input parameters */ *info = 0; if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1; - else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans,"C") ) *info = -2; + else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && + !lsame_(trans, "C")) *info = -2; else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3; else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4; else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5; @@ -131,7 +136,8 @@ luptr = L_NZ_START(fsupc); nrow = nsupr - nsupc; - solve_ops += 4 * nsupc * (nsupc - 1); + /* 1 z_div costs 10 flops */ + solve_ops += 4 * nsupc * (nsupc - 1) + 10 * nsupc; solve_ops += 8 * nrow * nsupc; if ( nsupc == 1 ) { @@ -184,7 +190,8 @@ nsupc = L_FST_SUPC(k+1) - fsupc; luptr = L_NZ_START(fsupc); - solve_ops += 4 * nsupc * (nsupc + 1); + /* 1 z_div costs 10 flops */ + solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc; if ( nsupc == 1 ) { z_div(&x[fsupc], &x[fsupc], &Lval[luptr]); @@ -219,7 +226,7 @@ } /* for k ... */ } - } else if (lsame_(trans, "T") ) { /* Form x := inv(A')*x */ + } else if ( lsame_(trans, "T") ) { /* Form x := inv(A')*x */ if ( lsame_(uplo, "L") ) { /* Form x := inv(L')*x */ @@ -249,13 +256,13 @@ solve_ops += 4 * nsupc * (nsupc - 1); #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); - ftcs2 = _cptofcd(trans, strlen("T")); + ftcs2 = _cptofcd("T", strlen("T")); ftcs3 = _cptofcd("U", strlen("U")); CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #else - ztrsv_("L", trans, "U", &nsupc, &Lval[luptr], &nsupr, - &x[fsupc], &incx); + ztrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr, + &x[fsupc], &incx); #endif } } @@ -278,26 +285,27 @@ } } - solve_ops += 4 * nsupc * (nsupc + 1); + /* 1 z_div costs 10 flops */ + solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc; if ( nsupc == 1 ) { z_div(&x[fsupc], &x[fsupc], &Lval[luptr]); } else { #ifdef _CRAY ftcs1 = _cptofcd("U", strlen("U")); - ftcs2 = _cptofcd(trans, strlen("T")); + ftcs2 = _cptofcd("T", strlen("T")); ftcs3 = _cptofcd("N", strlen("N")); CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #else - ztrsv_("U", trans, "N", &nsupc, &Lval[luptr], &nsupr, - &x[fsupc], &incx); + ztrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr, + &x[fsupc], &incx); #endif } } /* for k ... */ } - } else { /* Form x := conj(inv(A'))*x */ - + } else { /* Form x := conj(inv(A'))*x */ + if ( lsame_(uplo, "L") ) { /* Form x := conj(inv(L'))*x */ if ( L->nrow == 0 ) return 0; /* Quick return */ @@ -321,19 +329,19 @@ z_sub(&x[jcol], &x[jcol], &comp_zero); iptr++; } - } - - if ( nsupc > 1 ) { + } + + if ( nsupc > 1 ) { solve_ops += 4 * nsupc * (nsupc - 1); #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); ftcs2 = _cptofcd(trans, strlen("T")); ftcs3 = _cptofcd("U", strlen("U")); - CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, + ZTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #else ztrsv_("L", trans, "U", &nsupc, &Lval[luptr], &nsupr, - &x[fsupc], &incx); + &x[fsupc], &incx); #endif } } @@ -357,25 +365,26 @@ } } - solve_ops += 4 * nsupc * (nsupc + 1); - + /* 1 z_div costs 10 flops */ + solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc; + if ( nsupc == 1 ) { - zz_conj(&temp, &Lval[luptr]) + zz_conj(&temp, &Lval[luptr]); z_div(&x[fsupc], &x[fsupc], &temp); } else { #ifdef _CRAY ftcs1 = _cptofcd("U", strlen("U")); ftcs2 = _cptofcd(trans, strlen("T")); ftcs3 = _cptofcd("N", strlen("N")); - CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, + ZTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #else ztrsv_("U", trans, "N", &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #endif - } - } /* for k ... */ - } + } + } /* for k ... */ + } } stat->ops[SOLVE] += solve_ops; @@ -385,65 +394,69 @@ +/*! \brief Performs one of the matrix-vector operations y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y + * + *
  
+ *   Purpose   
+ *   =======   
+ *
+ *   sp_zgemv()  performs one of the matrix-vector operations   
+ *      y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
+ *   where alpha and beta are scalars, x and y are vectors and A is a
+ *   sparse A->nrow by A->ncol matrix.   
+ *
+ *   Parameters   
+ *   ==========   
+ *
+ *   TRANS  - (input) char*
+ *            On entry, TRANS specifies the operation to be performed as   
+ *            follows:   
+ *               TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
+ *               TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
+ *               TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
+ *
+ *   ALPHA  - (input) doublecomplex
+ *            On entry, ALPHA specifies the scalar alpha.   
+ *
+ *   A      - (input) SuperMatrix*
+ *            Before entry, the leading m by n part of the array A must   
+ *            contain the matrix of coefficients.   
+ *
+ *   X      - (input) doublecomplex*, array of DIMENSION at least   
+ *            ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
+ *           and at least   
+ *            ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
+ *            Before entry, the incremented array X must contain the   
+ *            vector x.   
+ * 
+ *   INCX   - (input) int
+ *            On entry, INCX specifies the increment for the elements of   
+ *            X. INCX must not be zero.   
+ *
+ *   BETA   - (input) doublecomplex
+ *            On entry, BETA specifies the scalar beta. When BETA is   
+ *            supplied as zero then Y need not be set on input.   
+ *
+ *   Y      - (output) doublecomplex*,  array of DIMENSION at least   
+ *            ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
+ *            and at least   
+ *            ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
+ *            Before entry with BETA non-zero, the incremented array Y   
+ *            must contain the vector y. On exit, Y is overwritten by the 
+ *            updated vector y.
+ *	      
+ *   INCY   - (input) int
+ *            On entry, INCY specifies the increment for the elements of   
+ *            Y. INCY must not be zero.   
+ *
+ *    ==== Sparse Level 2 Blas routine.   
+ * 
+*/ int sp_zgemv(char *trans, doublecomplex alpha, SuperMatrix *A, doublecomplex *x, int incx, doublecomplex beta, doublecomplex *y, int incy) { -/* Purpose - ======= - sp_zgemv() performs one of the matrix-vector operations - y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, - where alpha and beta are scalars, x and y are vectors and A is a - sparse A->nrow by A->ncol matrix. - - Parameters - ========== - - TRANS - (input) char* - On entry, TRANS specifies the operation to be performed as - follows: - TRANS = 'N' or 'n' y := alpha*A*x + beta*y. - TRANS = 'T' or 't' y := alpha*A'*x + beta*y. - TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. - - ALPHA - (input) doublecomplex - On entry, ALPHA specifies the scalar alpha. - - A - (input) SuperMatrix* - Before entry, the leading m by n part of the array A must - contain the matrix of coefficients. - - X - (input) doublecomplex*, array of DIMENSION at least - ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. - Before entry, the incremented array X must contain the - vector x. - - INCX - (input) int - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - - BETA - (input) doublecomplex - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then Y need not be set on input. - - Y - (output) doublecomplex*, array of DIMENSION at least - ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. - Before entry with BETA non-zero, the incremented array Y - must contain the vector y. On exit, Y is overwritten by the - updated vector y. - - INCY - (input) int - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - - ==== Sparse Level 2 Blas routine. -*/ - /* Local variables */ NCformat *Astore; doublecomplex *Aval; Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsp_blas3.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsp_blas3.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsp_blas3.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,116 +1,122 @@ - -/* +/*! @file zsp_blas3.c + * \brief Sparse BLAS3, using some dense BLAS3 operations + * + *
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
  * November 15, 1997
- *
+ * 
*/ /* * File name: sp_blas3.c * Purpose: Sparse BLAS3, using some dense BLAS3 operations. */ -#include "zsp_defs.h" -#include "util.h" +#include "slu_zdefs.h" +/*! \brief + * + *
+ * Purpose   
+ *   =======   
+ * 
+ *   sp_z performs one of the matrix-matrix operations   
+ * 
+ *      C := alpha*op( A )*op( B ) + beta*C,   
+ * 
+ *   where  op( X ) is one of 
+ * 
+ *      op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
+ * 
+ *   alpha and beta are scalars, and A, B and C are matrices, with op( A ) 
+ *   an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. 
+ *   
+ * 
+ *   Parameters   
+ *   ==========   
+ * 
+ *   TRANSA - (input) char*
+ *            On entry, TRANSA specifies the form of op( A ) to be used in 
+ *            the matrix multiplication as follows:   
+ *               TRANSA = 'N' or 'n',  op( A ) = A.   
+ *               TRANSA = 'T' or 't',  op( A ) = A'.   
+ *               TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).   
+ *            Unchanged on exit.   
+ * 
+ *   TRANSB - (input) char*
+ *            On entry, TRANSB specifies the form of op( B ) to be used in 
+ *            the matrix multiplication as follows:   
+ *               TRANSB = 'N' or 'n',  op( B ) = B.   
+ *               TRANSB = 'T' or 't',  op( B ) = B'.   
+ *               TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).   
+ *            Unchanged on exit.   
+ * 
+ *   M      - (input) int   
+ *            On entry,  M  specifies  the number of rows of the matrix 
+ *	     op( A ) and of the matrix C.  M must be at least zero. 
+ *	     Unchanged on exit.   
+ * 
+ *   N      - (input) int
+ *            On entry,  N specifies the number of columns of the matrix 
+ *	     op( B ) and the number of columns of the matrix C. N must be 
+ *	     at least zero.
+ *	     Unchanged on exit.   
+ * 
+ *   K      - (input) int
+ *            On entry, K specifies the number of columns of the matrix 
+ *	     op( A ) and the number of rows of the matrix op( B ). K must 
+ *	     be at least  zero.   
+ *           Unchanged on exit.
+ *      
+ *   ALPHA  - (input) doublecomplex
+ *            On entry, ALPHA specifies the scalar alpha.   
+ * 
+ *   A      - (input) SuperMatrix*
+ *            Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
+ *            Currently, the type of A can be:
+ *                Stype = NC or NCP; Dtype = SLU_Z; Mtype = GE. 
+ *            In the future, more general A can be handled.
+ * 
+ *   B      - DOUBLE COMPLEX PRECISION array of DIMENSION ( LDB, kb ), where kb is 
+ *            n when TRANSB = 'N' or 'n',  and is  k otherwise.   
+ *            Before entry with  TRANSB = 'N' or 'n',  the leading k by n 
+ *            part of the array B must contain the matrix B, otherwise 
+ *            the leading n by k part of the array B must contain the 
+ *            matrix B.   
+ *            Unchanged on exit.   
+ * 
+ *   LDB    - (input) int
+ *            On entry, LDB specifies the first dimension of B as declared 
+ *            in the calling (sub) program. LDB must be at least max( 1, n ).  
+ *            Unchanged on exit.   
+ * 
+ *   BETA   - (input) doublecomplex
+ *            On entry, BETA specifies the scalar beta. When BETA is   
+ *            supplied as zero then C need not be set on input.   
+ *  
+ *   C      - DOUBLE COMPLEX PRECISION array of DIMENSION ( LDC, n ).   
+ *            Before entry, the leading m by n part of the array C must 
+ *            contain the matrix C,  except when beta is zero, in which 
+ *            case C need not be set on entry.   
+ *            On exit, the array C is overwritten by the m by n matrix 
+ *	     ( alpha*op( A )*B + beta*C ).   
+ *  
+ *   LDC    - (input) int
+ *            On entry, LDC specifies the first dimension of C as declared 
+ *            in the calling (sub)program. LDC must be at least max(1,m).   
+ *            Unchanged on exit.   
+ *  
+ *   ==== Sparse Level 3 Blas routine.   
+ * 
+ */ + int sp_zgemm(char *transa, char *transb, int m, int n, int k, doublecomplex alpha, SuperMatrix *A, doublecomplex *b, int ldb, doublecomplex beta, doublecomplex *c, int ldc) { -/* Purpose - ======= - - sp_z performs one of the matrix-matrix operations - - C := alpha*op( A )*op( B ) + beta*C, - - where op( X ) is one of - - op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), - - alpha and beta are scalars, and A, B and C are matrices, with op( A ) - an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. - - - Parameters - ========== - - TRANSA - (input) char* - On entry, TRANSA specifies the form of op( A ) to be used in - the matrix multiplication as follows: - TRANSA = 'N' or 'n', op( A ) = A. - TRANSA = 'T' or 't', op( A ) = A'. - TRANSA = 'C' or 'c', op( A ) = conjg( A' ). - Unchanged on exit. - - TRANSB - (input) char* - On entry, TRANSB specifies the form of op( B ) to be used in - the matrix multiplication as follows: - TRANSB = 'N' or 'n', op( B ) = B. - TRANSB = 'T' or 't', op( B ) = B'. - TRANSB = 'C' or 'c', op( B ) = conjg( B' ). - Unchanged on exit. - - M - (input) int - On entry, M specifies the number of rows of the matrix - op( A ) and of the matrix C. M must be at least zero. - Unchanged on exit. - - N - (input) int - On entry, N specifies the number of columns of the matrix - op( B ) and the number of columns of the matrix C. N must be - at least zero. - Unchanged on exit. - - K - (input) int - On entry, K specifies the number of columns of the matrix - op( A ) and the number of rows of the matrix op( B ). K must - be at least zero. - Unchanged on exit. - - ALPHA - (input) doublecomplex - On entry, ALPHA specifies the scalar alpha. - - A - (input) SuperMatrix* - Matrix A with a sparse format, of dimension (A->nrow, A->ncol). - Currently, the type of A can be: - Stype = NC or NCP; Dtype = SLU_Z; Mtype = GE. - In the future, more general A can be handled. - - B - DOUBLE COMPLEX PRECISION array of DIMENSION ( LDB, kb ), where kb is - n when TRANSB = 'N' or 'n', and is k otherwise. - Before entry with TRANSB = 'N' or 'n', the leading k by n - part of the array B must contain the matrix B, otherwise - the leading n by k part of the array B must contain the - matrix B. - Unchanged on exit. - - LDB - (input) int - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. LDB must be at least max( 1, n ). - Unchanged on exit. - - BETA - (input) doublecomplex - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then C need not be set on input. - - C - DOUBLE COMPLEX PRECISION array of DIMENSION ( LDC, n ). - Before entry, the leading m by n part of the array C must - contain the matrix C, except when beta is zero, in which - case C need not be set on entry. - On exit, the array C is overwritten by the m by n matrix - ( alpha*op( A )*B + beta*C ). - - LDC - (input) int - On entry, LDC specifies the first dimension of C as declared - in the calling (sub)program. LDC must be at least max(1,m). - Unchanged on exit. - - ==== Sparse Level 3 Blas routine. -*/ int incx = 1, incy = 1; int j; Deleted: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsp_defs.h =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsp_defs.h 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zsp_defs.h 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,237 +0,0 @@ - -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ -#ifndef __SUPERLU_zSP_DEFS /* allow multiple inclusions */ -#define __SUPERLU_zSP_DEFS - -/* - * File name: zsp_defs.h - * Purpose: Sparse matrix types and function prototypes - * History: - */ - -#ifdef _CRAY -#include -#include -#endif - -/* Define my integer type int_t */ -typedef int int_t; /* default */ - -#include "Cnames.h" -#include "supermatrix.h" -#include "util.h" -#include "dcomplex.h" - - -/* - * Global data structures used in LU factorization - - * - * nsuper: #supernodes = nsuper + 1, numbered [0, nsuper]. - * (xsup,supno): supno[i] is the supernode no to which i belongs; - * xsup(s) points to the beginning of the s-th supernode. - * e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12) - * xsup 0 1 2 4 7 12 - * Note: dfs will be performed on supernode rep. relative to the new - * row pivoting ordering - * - * (xlsub,lsub): lsub[*] contains the compressed subscript of - * rectangular supernodes; xlsub[j] points to the starting - * location of the j-th column in lsub[*]. Note that xlsub - * is indexed by column. - * Storage: original row subscripts - * - * During the course of sparse LU factorization, we also use - * (xlsub,lsub) for the purpose of symmetric pruning. For each - * supernode {s,s+1,...,t=s+r} with first column s and last - * column t, the subscript set - * lsub[j], j=xlsub[s], .., xlsub[s+1]-1 - * is the structure of column s (i.e. structure of this supernode). - * It is used for the storage of numerical values. - * Furthermore, - * lsub[j], j=xlsub[t], .., xlsub[t+1]-1 - * is the structure of the last column t of this supernode. - * It is for the purpose of symmetric pruning. Therefore, the - * structural subscripts can be rearranged without making physical - * interchanges among the numerical values. - * - * However, if the supernode has only one column, then we - * only keep one set of subscripts. For any subscript interchange - * performed, similar interchange must be done on the numerical - * values. - * - * The last column structures (for pruning) will be removed - * after the numercial LU factorization phase. - * - * (xlusup,lusup): lusup[*] contains the numerical values of the - * rectangular supernodes; xlusup[j] points to the starting - * location of the j-th column in storage vector lusup[*] - * Note: xlusup is indexed by column. - * Each rectangular supernode is stored by column-major - * scheme, consistent with Fortran 2-dim array storage. - * - * (xusub,ucol,usub): ucol[*] stores the numerical values of - * U-columns outside the rectangular supernodes. The row - * subscript of nonzero ucol[k] is stored in usub[k]. - * xusub[i] points to the starting location of column i in ucol. - * Storage: new row subscripts; that is subscripts of PA. - */ -typedef struct { - int *xsup; /* supernode and column mapping */ - int *supno; - int *lsub; /* compressed L subscripts */ - int *xlsub; - doublecomplex *lusup; /* L supernodes */ - int *xlusup; - doublecomplex *ucol; /* U columns */ - int *usub; - int *xusub; - int nzlmax; /* current max size of lsub */ - int nzumax; /* " " " ucol */ - int nzlumax; /* " " " lusup */ - int n; /* number of columns in the matrix */ - LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */ -} GlobalLU_t; - -typedef struct { - float for_lu; - float total_needed; - int expansions; -} mem_usage_t; - -#ifdef __cplusplus -extern "C" { -#endif - -/* Driver routines */ -extern void -zgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *, - SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *); -extern void -zgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *, - char *, double *, double *, SuperMatrix *, SuperMatrix *, - void *, int, SuperMatrix *, SuperMatrix *, - double *, double *, double *, double *, - mem_usage_t *, SuperLUStat_t *, int *); - -/* Supernodal LU factor related */ -extern void -zCreate_CompCol_Matrix(SuperMatrix *, int, int, int, doublecomplex *, - int *, int *, Stype_t, Dtype_t, Mtype_t); -extern void -zCreate_CompRow_Matrix(SuperMatrix *, int, int, int, doublecomplex *, - int *, int *, Stype_t, Dtype_t, Mtype_t); -extern void -zCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *); -extern void -zCreate_Dense_Matrix(SuperMatrix *, int, int, doublecomplex *, int, - Stype_t, Dtype_t, Mtype_t); -extern void -zCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, doublecomplex *, - int *, int *, int *, int *, int *, - Stype_t, Dtype_t, Mtype_t); -extern void -zCopy_Dense_Matrix(int, int, doublecomplex *, int, doublecomplex *, int); - -extern void countnz (const int, int *, int *, int *, GlobalLU_t *); -extern void fixupL (const int, const int *, GlobalLU_t *); - -extern void zallocateA (int, int, doublecomplex **, int **, int **); -extern void zgstrf (superlu_options_t*, SuperMatrix*, double, - int, int, int*, void *, int, int *, int *, - SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *); -extern int zsnode_dfs (const int, const int, const int *, const int *, - const int *, int *, int *, GlobalLU_t *); -extern int zsnode_bmod (const int, const int, const int, doublecomplex *, - doublecomplex *, GlobalLU_t *, SuperLUStat_t*); -extern void zpanel_dfs (const int, const int, const int, SuperMatrix *, - int *, int *, doublecomplex *, int *, int *, int *, - int *, int *, int *, int *, GlobalLU_t *); -extern void zpanel_bmod (const int, const int, const int, const int, - doublecomplex *, doublecomplex *, int *, int *, - GlobalLU_t *, SuperLUStat_t*); -extern int zcolumn_dfs (const int, const int, int *, int *, int *, int *, - int *, int *, int *, int *, int *, GlobalLU_t *); -extern int zcolumn_bmod (const int, const int, doublecomplex *, - doublecomplex *, int *, int *, int, - GlobalLU_t *, SuperLUStat_t*); -extern int zcopy_to_ucol (int, int, int *, int *, int *, - doublecomplex *, GlobalLU_t *); -extern int zpivotL (const int, const double, int *, int *, - int *, int *, int *, GlobalLU_t *, SuperLUStat_t*); -extern void zpruneL (const int, const int *, const int, const int, - const int *, const int *, int *, GlobalLU_t *); -extern void zreadmt (int *, int *, int *, doublecomplex **, int **, int **); -extern void zGenXtrue (int, int, doublecomplex *, int); -extern void zFillRHS (trans_t, int, doublecomplex *, int, SuperMatrix *, - SuperMatrix *); -extern void zgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *, - SuperMatrix *, SuperLUStat_t*, int *); - - -/* Driver related */ - -extern void zgsequ (SuperMatrix *, double *, double *, double *, - double *, double *, int *); -extern void zlaqgs (SuperMatrix *, double *, double *, double, - double, double, char *); -extern void zgscon (char *, SuperMatrix *, SuperMatrix *, - double, double *, SuperLUStat_t*, int *); -extern double zPivotGrowth(int, SuperMatrix *, int *, - SuperMatrix *, SuperMatrix *); -extern void zgsrfs (trans_t, SuperMatrix *, SuperMatrix *, - SuperMatrix *, int *, int *, char *, double *, - double *, SuperMatrix *, SuperMatrix *, - double *, double *, SuperLUStat_t*, int *); - -extern int sp_ztrsv (char *, char *, char *, SuperMatrix *, - SuperMatrix *, doublecomplex *, SuperLUStat_t*, int *); -extern int sp_zgemv (char *, doublecomplex, SuperMatrix *, doublecomplex *, - int, doublecomplex, doublecomplex *, int); - -extern int sp_zgemm (char *, char *, int, int, int, doublecomplex, - SuperMatrix *, doublecomplex *, int, doublecomplex, - doublecomplex *, int); - -/* Memory-related */ -extern int zLUMemInit (fact_t, void *, int, int, int, int, int, - SuperMatrix *, SuperMatrix *, - GlobalLU_t *, int **, doublecomplex **); -extern void zSetRWork (int, int, doublecomplex *, doublecomplex **, doublecomplex **); -extern void zLUWorkFree (int *, doublecomplex *, GlobalLU_t *); -extern int zLUMemXpand (int, int, MemType, int *, GlobalLU_t *); - -extern doublecomplex *doublecomplexMalloc(int); -extern doublecomplex *doublecomplexCalloc(int); -extern double *doubleMalloc(int); -extern double *doubleCalloc(int); -extern int zmemory_usage(const int, const int, const int, const int); -extern int zQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *); - -/* Auxiliary routines */ -extern void zreadhb(int *, int *, int *, doublecomplex **, int **, int **); -extern void zCompRow_to_CompCol(int, int, int, doublecomplex*, int*, int*, - doublecomplex **, int **, int **); -extern void zfill (doublecomplex *, int, doublecomplex); -extern void zinf_norm_error (int, SuperMatrix *, doublecomplex *); -extern void PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *, - doublecomplex, doublecomplex, doublecomplex *, doublecomplex *, char *); - -/* Routines for debugging */ -extern void zPrint_CompCol_Matrix(char *, SuperMatrix *); -extern void zPrint_SuperNode_Matrix(char *, SuperMatrix *); -extern void zPrint_Dense_Matrix(char *, SuperMatrix *); -extern void print_lu_col(char *, int, int, int *, GlobalLU_t *); -extern void check_tempv(int, doublecomplex *); - -#ifdef __cplusplus - } -#endif - -#endif /* __SUPERLU_zSP_DEFS */ - Modified: trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zutil.c =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zutil.c 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/SRC/zutil.c 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,26 +1,29 @@ -/* - * -- SuperLU routine (version 3.0) -- +/*! @file zutil.c + * \brief Matrix utility functions + * + *
+ * -- SuperLU routine (version 3.1) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  * and Lawrence Berkeley National Lab.
- * October 15, 2003
+ * August 1, 2008
  *
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ * 
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ * 
*/ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ + #include -#include "zsp_defs.h" +#include "slu_zdefs.h" void zCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, @@ -64,7 +67,7 @@ Astore->rowptr = rowptr; } -/* Copy matrix A into matrix B. */ +/*! \brief Copy matrix A into matrix B. */ void zCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B) { @@ -108,12 +111,7 @@ zCopy_Dense_Matrix(int M, int N, doublecomplex *X, int ldx, doublecomplex *Y, int ldy) { -/* - * - * Purpose - * ======= - * - * Copies a two-dimensional matrix X to another matrix Y. +/*! \brief Copies a two-dimensional matrix X to another matrix Y. */ int i, j; @@ -150,8 +148,7 @@ } -/* - * Convert a row compressed storage into a column compressed storage. +/*! \brief Convert a row compressed storage into a column compressed storage. */ void zCompRow_to_CompCol(int m, int n, int nnz, @@ -240,7 +237,8 @@ for (j = c; j < c + nsup; ++j) { d = Astore->nzval_colptr[j]; for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) { - printf("%d\t%d\t%e\t%e\n", rowind[i], j, dp[d++], dp[d++]); + printf("%d\t%d\t%e\t%e\n", rowind[i], j, dp[d], dp[d+1]); + d += 2; } } } @@ -266,23 +264,24 @@ void zPrint_Dense_Matrix(char *what, SuperMatrix *A) { - DNformat *Astore; - register int i; + DNformat *Astore = (DNformat *) A->Store; + register int i, j, lda = Astore->lda; double *dp; printf("\nDense matrix %s:\n", what); printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); - Astore = (DNformat *) A->Store; dp = (double *) Astore->nzval; - printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda); + printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda); printf("\nnzval: "); - for (i = 0; i < 2*A->nrow; ++i) printf("%f ", dp[i]); + for (j = 0; j < A->ncol; ++j) { + for (i = 0; i < 2*A->nrow; ++i) printf("%f ", dp[i + j*2*lda]); + printf("\n"); + } printf("\n"); fflush(stdout); } -/* - * Diagnostic print of column "jcol" in the U/L factor. +/*! \brief Diagnostic print of column "jcol" in the U/L factor. */ void zprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu) @@ -324,9 +323,7 @@ } -/* - * Check whether tempv[] == 0. This should be true before and after - * calling any numeric routines, i.e., "panel_bmod" and "column_bmod". +/*! \brief Check whether tempv[] == 0. This should be true before and after calling any numeric routines, i.e., "panel_bmod" and "column_bmod". */ void zcheck_tempv(int n, doublecomplex *tempv) { @@ -353,8 +350,7 @@ } } -/* - * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's +/*! \brief Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's */ void zFillRHS(trans_t trans, int nrhs, doublecomplex *x, int ldx, @@ -383,8 +379,7 @@ } -/* - * Fills a doublecomplex precision array with a given value. +/*! \brief Fills a doublecomplex precision array with a given value. */ void zfill(doublecomplex *a, int alen, doublecomplex dval) @@ -395,8 +390,7 @@ -/* - * Check the inf-norm of the error vector +/*! \brief Check the inf-norm of the error vector */ void zinf_norm_error(int nrhs, SuperMatrix *X, doublecomplex *xtrue) { @@ -424,7 +418,7 @@ -/* Print performance of the code. */ +/*! \brief Print performance of the code. */ void zPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage, double rpg, double rcond, double *ferr, @@ -452,9 +446,9 @@ printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz); printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz); - printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", - mem_usage->for_lu/1e6, mem_usage->total_needed/1e6, - mem_usage->expansions); + printf("L\\U MB %.3f\ttotal MB needed %.3f\n", + mem_usage->for_lu/1e6, mem_usage->total_needed/1e6); + printf("Number of memory expansions: %d\n", stat->expansions); printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n"); printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n", Deleted: trunk/scipy/sparse/linalg/dsolve/SuperLU/changes.txt =================================================================== --- trunk/scipy/sparse/linalg/dsolve/SuperLU/changes.txt 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/SuperLU/changes.txt 2010-04-27 21:54:15 UTC (rev 6344) @@ -1,21 +0,0 @@ - -Changes made to the SuperLU3.0 tree -====================================== - -* Altered SRC/util to call special abort, malloc, and free routines -* Removed ?myblas2.c routines from source directory (we will use VENDOR_BLAS) - -* Altered superlu_timer.c to allow a NO_TIMER def to be defined so that the - library can compile on Win32 (actually used the superlu_timer.c file from pysparse) - -* Altered SRC/?sp_defs.h to add header for ?Create_CompRow_Matrix - -* Altered SRC/util.c SRC/util.h to add Destroy_CompRow_Matrix - (a nicety because Destroy_CompCol_Matrix would work due to binary compatibility, but...) - -* Altered zsp_blas2.c, csp_blas2.c, zgstrs.c, cgstrs.c, dcomplex.h, scomplex.h to add support - for solving A**H * x = b after factoring. - -* Tried to get rid of some compiler errors (added void to argument lists in headers of util.h). - - Modified: trunk/scipy/sparse/linalg/dsolve/setup.py =================================================================== --- trunk/scipy/sparse/linalg/dsolve/setup.py 2010-04-27 21:46:38 UTC (rev 6343) +++ trunk/scipy/sparse/linalg/dsolve/setup.py 2010-04-27 21:54:15 UTC (rev 6344) @@ -17,7 +17,7 @@ superlu_defs.append(('USE_VENDOR_BLAS',1)) config.add_library('superlu_src', - sources = [join('SuperLU','SRC','*.c')], + sources = [join(superlu_src,'*.c')], macros = superlu_defs )