[Scipy-svn] r6251 - in trunk/scipy/sparse/linalg/isolve: . tests

Tue Mar 2 14:14:21 EST 2010

Author: stefan
Date: 2010-03-02 13:14:21 -0600 (Tue, 02 Mar 2010)
New Revision: 6251

Added:
   trunk/scipy/sparse/linalg/isolve/lsqr.py
   trunk/scipy/sparse/linalg/isolve/tests/test_lsqr.py
Modified:
   trunk/scipy/sparse/linalg/isolve/__init__.py
Log:
ENH: Add LSQR algorithm for solving sparse least squares problems to scipy.sparse.

Modified: trunk/scipy/sparse/linalg/isolve/__init__.py
===================================================================

--- trunk/scipy/sparse/linalg/isolve/__init__.py	2010-02-24 12:27:43 UTC (rev 6250)
+++ trunk/scipy/sparse/linalg/isolve/__init__.py	2010-03-02 19:14:21 UTC (rev 6251)
@@ -4,6 +4,7 @@
 from iterative import *
 from minres import minres
 from lgmres import lgmres
+from lsqr import lsqr
 
 __all__ = filter(lambda s:not s.startswith('_'),dir())
 from numpy.testing import Tester

Added: trunk/scipy/sparse/linalg/isolve/lsqr.py
===================================================================
--- trunk/scipy/sparse/linalg/isolve/lsqr.py	                        (rev 0)
+++ trunk/scipy/sparse/linalg/isolve/lsqr.py	2010-03-02 19:14:21 UTC (rev 6251)
@@ -0,0 +1,495 @@
+"""Sparse Equations and Least Squares.
+
+The original Fortran code was written by C. C. Paige and M. A. Saunders as
+described in
+
+C. C. Paige and M. A. Saunders, LSQR: An algorithm for sparse linear
+equations and sparse least squares, TOMS 8(1), 43--71 (1982).
+
+C. C. Paige and M. A. Saunders, Algorithm 583; LSQR: Sparse linear
+equations and least-squares problems, TOMS 8(2), 195--209 (1982).
+
+It is licensed under the following BSD license:
+
+Copyright (c) 2006, Systems Optimization Laboratory
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+    * Neither the name of Stanford University nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+The Fortran code was translated to Python for use in CVXOPT by Jeffery
+Kline with contributions by Mridul Aanjaneya and Bob Myhill.
+
+Adapted for SciPy by Stefan van der Walt.
+
+"""
+
+__all__ = ['lsqr']
+
+import numpy as np
+from math import sqrt
+from scipy.sparse.linalg.interface import aslinearoperator
+
+def _sym_ortho(a,b):
+    """
+    Jeffery Kline noted: I added the routine 'SymOrtho' for numerical
+    stability. This is recommended by S.-C. Choi in [1]_. It removes
+    the unpleasant potential of ``1/eps`` in some important places
+    (see, for example text following "Compute the next
+    plane rotation Qk" in minres_py).
+
+    References
+    ----------
+    .. [1] S.-C. Choi, "Iterative Methods for Singular Linear Equations
+           and Least-Squares Problems", Dissertation,
+           http://www.stanford.edu/group/SOL/dissertations/sou-cheng-choi-thesis.pdf
+
+    """
+    aa = abs(a)
+    ab = abs(b)
+    if b == 0.:
+        s = 0.
+        r = aa
+        if aa == 0.:
+            c = 1.
+        else:
+            c = a/aa
+    elif a == 0.:
+        c = 0.
+        s = b / ab
+        r = ab
+    elif ab >= aa:
+        sb = 1
+        if b < 0: sb=-1
+        tau = a/b
+        s = sb * (1 + tau**2)**-0.5
+        c = s * tau
+        r = b / s
+    elif aa > ab:
+        sa = 1
+        if a < 0: sa = -1
+        tau = b / a
+        c = sa * (1 + tau**2)**-0.5
+        s = c * tau
+        r = a / c
+
+    return c, s, r
+
+
+def lsqr(A, b, damp=0.0, atol=1e-8, btol=1e-8, conlim=1e8,
+         iter_lim=None, show=False, calc_var=False):
+    """Find the least-squares solution to a large, sparse, linear system
+    of equations.
+
+    The function solves ``Ax = b``  or  ``min ||b - Ax||^2`` or
+    ``min ||Ax - b||^2 + d^2 ||x||^2.
+
+    The matrix A may be square or rectangular (over-determined or
+    under-determined), and may have any rank.
+
+    ::
+
+      1. Unsymmetric equations --    solve  A*x = b
+
+      2. Linear least squares  --    solve  A*x = b
+                                     in the least-squares sense
+
+      3. Damped least squares  --    solve  (   A    )*x = ( b )
+                                            ( damp*I )     ( 0 )
+                                     in the least-squares sense
+
+    Parameters
+    ----------
+    A : {sparse matrix, ndarray, LinearOperatorLinear}
+        Representation of an mxn matrix.  It is required that
+        the linear operator can produce ``Ax`` and ``A^T x``.
+    b : (m,) ndarray
+        Right-hand side vector ``b``.
+    damp : float
+        Damping coefficient.
+    atol, btol : float
+        Stopping tolerances. If both are 1.0e-9 (say), the final
+        residual norm should be accurate to about 9 digits.  (The
+        final x will usually have fewer correct digits, depending on
+        cond(A) and the size of damp.)
+    conlim : float
+        Another stopping tolerance.  lsqr terminates if an estimate of
+        ``cond(A)`` exceeds `conlim`.  For compatible systems ``Ax =
+        b``, `conlim` could be as large as 1.0e+12 (say).  For
+        least-squares problems, conlim should be less than 1.0e+8.
+        Maximum precision can be obtained by setting ``atol = btol =
+        conlim = zero``, but the number of iterations may then be
+        excessive.
+    iter_lim : int
+        Explicit limitation on number of iterations (for safety).
+    show : bool
+        Display an iteration log.
+    calc_var : bool
+        Whether to estimate diagonals of ``(A'A + damp^2*I)^{-1}``.
+
+    Returns
+    -------
+    x : ndarray of float
+        The final solution.
+    istop : int
+        Gives the reason for termination.
+        1 means x is an approximate solution to Ax = b.
+        2 means x approximately solves the least-squares problem.
+    itn : int
+        Iteration number upon termination.
+    r1norm : float
+        ``norm(r)``, where ``r = b - Ax``.
+    r2norm : float
+        ``sqrt( norm(r)^2  +  damp^2 * norm(x)^2 )``.  Equal to `r1norm` if
+        ``damp == 0``.
+    anorm : float
+        Estimate of Frobenius norm of ``Abar = [[A]; [damp*I]]``.
+    acond : float
+        Estimate of ``cond(Abar)``.
+    arnorm : float
+        Estimate of ``norm(A'*r - damp^2*x)``.
+    xnorm : float
+        ``norm(x)``
+    var : ndarray of float
+        If ``calc_var`` is True, estimates all diagonals of
+        ``(A'A)^{-1}`` (if ``damp == 0``) or more generally ``(A'A +
+        damp^2*I)^{-1}``.  This is well defined if A has full column
+        rank or ``damp > 0``.  (Not sure what var means if ``rank(A)
+        < n`` and ``damp = 0.``)
+
+    Notes
+    -----
+    LSQR uses an iterative method to approximate the solution.  The
+    number of iterations required to reach a certain accuracy depends
+    strongly on the scaling of the problem.  Poor scaling of the rows
+    or columns of A should therefore be avoided where possible.
+
+    For example, in problem 1 the solution is unaltered by
+    row-scaling.  If a row of A is very small or large compared to
+    the other rows of A, the corresponding row of ( A  b ) should be
+    scaled up or down.
+
+    In problems 1 and 2, the solution x is easily recovered
+    following column-scaling.  Unless better information is known,
+    the nonzero columns of A should be scaled so that they all have
+    the same Euclidean norm (e.g., 1.0).
+
+    In problem 3, there is no freedom to re-scale if damp is
+    nonzero.  However, the value of damp should be assigned only
+    after attention has been paid to the scaling of A.
+
+    The parameter damp is intended to help regularize
+    ill-conditioned systems, by preventing the true solution from
+    being very large.  Another aid to regularization is provided by
+    the parameter acond, which may be used to terminate iterations
+    before the computed solution becomes very large.
+
+    If some initial estimate ``x0`` is known and if ``damp == 0``,
+    one could proceed as follows:
+
+      1. Compute a residual vector ``r0 = b - A*x0``.
+      2. Use LSQR to solve the system  ``A*dx = r0``.
+      3. Add the correction dx to obtain a final solution ``x = x0 + dx``.
+
+    This requires that ``x0`` be available before and after the call
+    to LSQR.  To judge the benefits, suppose LSQR takes k1 iterations
+    to solve A*x = b and k2 iterations to solve A*dx = r0.
+    If x0 is "good", norm(r0) will be smaller than norm(b).
+    If the same stopping tolerances atol and btol are used for each
+    system, k1 and k2 will be similar, but the final solution x0 + dx
+    should be more accurate.  The only way to reduce the total work
+    is to use a larger stopping tolerance for the second system.
+    If some value btol is suitable for A*x = b, the larger value
+    btol*norm(b)/norm(r0)  should be suitable for A*dx = r0.
+
+    Preconditioning is another way to reduce the number of iterations.
+    If it is possible to solve a related system ``M*x = b``
+    efficiently, where M approximates A in some helpful way (e.g. M -
+    A has low rank or its elements are small relative to those of A),
+    LSQR may converge more rapidly on the system ``A*M(inverse)*z =
+    b``, after which x can be recovered by solving M*x = z.
+
+    If A is symmetric, LSQR should not be used!
+
+    Alternatives are the symmetric conjugate-gradient method (cg)
+    and/or SYMMLQ.  SYMMLQ is an implementation of symmetric cg that
+    applies to any symmetric A and will converge more rapidly than
+    LSQR.  If A is positive definite, there are other implementations
+    of symmetric cg that require slightly less work per iteration than
+    SYMMLQ (but will take the same number of iterations).
+
+    References
+    ----------
+    .. [1] C. C. Paige and M. A. Saunders (1982a).
+           "LSQR: An algorithm for sparse linear equations and
+           sparse least squares", ACM TOMS 8(1), 43-71.
+    .. [2] C. C. Paige and M. A. Saunders (1982b).
+           "Algorithm 583.  LSQR: Sparse linear equations and least
+           squares problems", ACM TOMS 8(2), 195-209.
+    .. [3] M. A. Saunders (1995).  "Solution of sparse rectangular
+           systems using LSQR and CRAIG", BIT 35, 588-604.
+
+    """
+    A = aslinearoperator(A)
+    b = b.squeeze()
+
+    m, n = A.shape
+    if iter_lim is None: iter_lim = 2 * n
+    var = np.zeros(n)
+
+    msg=('The exact solution is  x = 0                              ',
+         'Ax - b is small enough, given atol, btol                  ',
+         'The least-squares solution is good enough, given atol     ',
+         'The estimate of cond(Abar) has exceeded conlim            ',
+         'Ax - b is small enough for this machine                   ',
+         'The least-squares solution is good enough for this machine',
+         'Cond(Abar) seems to be too large for this machine         ',
+         'The iteration limit has been reached                      ');
+
+    if show:
+        print ' '
+        print 'LSQR            Least-squares solution of  Ax = b'
+        str1 = 'The matrix A has %8g rows  and %8g cols' % (m, n)
+        str2 = 'damp = %20.14e   calc_var = %8g' % (damp, calc_var)
+        str3 = 'atol = %8.2e                 conlim = %8.2e'%( atol, conlim)
+        str4 = 'btol = %8.2e               iter_lim = %8g'  %( btol, iter_lim)
+        print str1
+        print str2
+        print str3
+        print str4
+
+    itn = 0
+    istop = 0
+    nstop = 0
+    ctol = 0
+    if conlim > 0: ctol = 1/conlim
+    anorm = 0
+    acond = 0
+    dampsq = damp**2
+    ddnorm = 0
+    res2 = 0
+    xnorm = 0
+    xxnorm = 0
+    z = 0
+    cs2 = -1
+    sn2 = 0
+
+    """
+    Set up the first vectors u and v for the bidiagonalization.
+    These satisfy  beta*u = b,  alfa*v = A'u.
+    """
+    __xm = np.zeros(m) # a matrix for temporary holding
+    __xn = np.zeros(n) # a matrix for temporary holding
+    v = np.zeros(n)
+    u = b
+    x = np.zeros(n)
+    alfa = 0
+    beta = np.linalg.norm(u)
+    w = np.zeros(n)
+
+    if beta > 0:
+        u = (1/beta) * u
+        v = A.rmatvec(u)
+        alfa = np.linalg.norm(v)
+
+    if alfa > 0:
+        v = (1/alfa) * v
+        w = v.copy()
+
+    rhobar = alfa
+    phibar = beta
+    bnorm = beta
+    rnorm = beta
+    r1norm = rnorm
+    r2norm = rnorm
+
+    # Reverse the order here from the original matlab code because
+    # there was an error on return when arnorm==0
+    arnorm = alfa * beta
+    if arnorm == 0:
+        print msg[0];
+        return x, istop, itn, r1norm, r2norm, anorm, acond, arnorm, xnorm, var
+
+    head1  = '   Itn      x[0]       r1norm     r2norm ';
+    head2  = ' Compatible    LS      Norm A   Cond A';
+
+    if show:
+        print ' '
+        print head1, head2
+        test1  = 1;             test2  = alfa / beta;
+        str1   = '%6g %12.5e'    %(    itn,   x[0] );
+        str2   = ' %10.3e %10.3e'%( r1norm, r2norm );
+        str3   = '  %8.1e %8.1e' %(  test1,  test2 );
+        print str1, str2, str3
+
+    # Main iteration loop.
+    while itn < iter_lim:
+        itn = itn + 1
+        """
+        %     Perform the next step of the bidiagonalization to obtain the
+        %     next  beta, u, alfa, v.  These satisfy the relations
+        %                beta*u  =  a*v   -  alfa*u,
+        %                alfa*v  =  A'*u  -  beta*v.
+        """
+        u = A.matvec(v) - alfa * u
+        beta = np.linalg.norm(u)
+
+        if beta > 0:
+            u = (1/beta) * u
+            anorm = sqrt(anorm**2 + alfa**2 + beta**2 + damp**2)
+            v = A.rmatvec(u) - beta * v
+            alfa  = np.linalg.norm(v)
+            if alfa > 0:
+                v = (1 / alfa) * v
+
+        # Use a plane rotation to eliminate the damping parameter.
+        # This alters the diagonal (rhobar) of the lower-bidiagonal matrix.
+        rhobar1 = sqrt(rhobar**2 + damp**2)
+        cs1 = rhobar / rhobar1
+        sn1 = damp / rhobar1
+        psi = sn1 * phibar
+        phibar = cs1 * phibar
+
+        # Use a plane rotation to eliminate the subdiagonal element (beta)
+        # of the lower-bidiagonal matrix, giving an upper-bidiagonal matrix.
+        cs, sn, rho = _sym_ortho(rhobar1, beta)
+
+        theta = sn * alfa
+        rhobar = -cs * alfa
+        phi = cs * phibar
+        phibar = sn * phibar
+        tau = sn * phi
+
+        # Update x and w.
+        t1 = phi / rho
+        t2 = -theta / rho
+        dk = (1 / rho) * w
+
+        x = x + t1 * w
+        w = v + t2 * w
+        ddnorm = ddnorm + np.linalg.norm(dk)**2
+
+        if calc_var:
+            var = var + dk**2
+
+        # Use a plane rotation on the right to eliminate the
+        # super-diagonal element (theta) of the upper-bidiagonal matrix.
+        # Then use the result to estimate norm(x).
+        delta = sn2 * rho
+        gambar = -cs2 * rho
+        rhs = phi - delta * z
+        zbar = rhs / gambar
+        xnorm = sqrt(xxnorm + zbar**2)
+        gamma = sqrt(gambar**2 +theta**2)
+        cs2 = gambar / gamma
+        sn2 = theta  / gamma
+        z = rhs / gamma
+        xxnorm = xxnorm  +  z**2
+
+        # Test for convergence.
+        # First, estimate the condition of the matrix  Abar,
+        # and the norms of  rbar  and  Abar'rbar.
+        acond = anorm * sqrt(ddnorm)
+        res1 = phibar**2
+        res2 = res2 + psi**2
+        rnorm = sqrt(res1 + res2)
+        arnorm = alfa * abs(tau)
+
+        # Distinguish between
+        #    r1norm = ||b - Ax|| and
+        #    r2norm = rnorm in current code
+        #           = sqrt(r1norm^2 + damp^2*||x||^2).
+        #    Estimate r1norm from
+        #    r1norm = sqrt(r2norm^2 - damp^2*||x||^2).
+        # Although there is cancellation, it might be accurate enough.
+        r1sq = rnorm**2 - dampsq * xxnorm
+        r1norm = sqrt(abs(r1sq))
+        if r1sq < 0:
+            r1norm = -r1norm
+        r2norm = rnorm
+
+        # Now use these norms to estimate certain other quantities,
+        # some of which will be small near a solution.
+        test1 = rnorm / bnorm
+        test2 = arnorm / (anorm * rnorm)
+        test3 = 1 / acond
+        t1 = test1 / (1 + anorm * xnorm / bnorm)
+        rtol = btol + atol *  anorm * xnorm / bnorm
+
+        # The following tests guard against extremely small values of
+        # atol, btol  or  ctol.  (The user may have set any or all of
+        # the parameters  atol, btol, conlim  to 0.)
+        # The effect is equivalent to the normal tests using
+        # atol = eps,  btol = eps,  conlim = 1/eps.
+        if itn >= iter_lim: istop = 7
+        if 1 + test3 <= 1: istop = 6
+        if 1 + test2 <= 1: istop = 5
+        if 1 + t1 <= 1: istop = 4
+
+        # Allow for tolerances set by the user.
+        if test3 <= ctol: istop = 3
+        if test2 <= atol: istop = 2
+        if test1 <= rtol: istop = 1
+
+        # See if it is time to print something.
+        prnt = False;
+        if n <= 40: prnt = True
+        if itn <= 10: prnt = True
+        if itn >= iter_lim-10: prnt = True
+        # if itn%10 == 0: prnt = True
+        if test3 <= 2*ctol: prnt = True
+        if test2 <= 10*atol: prnt = True
+        if test1 <= 10*rtol: prnt = True
+        if istop != 0: prnt = True
+
+        if prnt:
+            if show:
+                str1 = '%6g %12.5e' % (itn, x[0])
+                str2 = ' %10.3e %10.3e' % (r1norm, r2norm)
+                str3 = '  %8.1e %8.1e' % (test1, test2)
+                str4 = ' %8.1e %8.1e' % (anorm, acond)
+                print str1, str2, str3, str4
+
+        if istop != 0: break
+
+    # End of iteration loop.
+    # Print the stopping condition.
+    if show:
+        print ' '
+        print 'LSQR finished'
+        print msg[istop]
+        print ' '
+        str1 = 'istop =%8g   r1norm =%8.1e' % (istop, r1norm)
+        str2 = 'anorm =%8.1e   arnorm =%8.1e' % (anorm, arnorm)
+        str3 = 'itn   =%8g   r2norm =%8.1e' % (itn, r2norm)
+        str4 = 'acond =%8.1e   xnorm  =%8.1e' % (acond, xnorm)
+        print str1+ '   ' + str2
+        print str3+ '   ' + str4
+        print ' '
+
+    return x, istop, itn, r1norm, r2norm, anorm, acond, arnorm, xnorm, var

Added: trunk/scipy/sparse/linalg/isolve/tests/test_lsqr.py
===================================================================
--- trunk/scipy/sparse/linalg/isolve/tests/test_lsqr.py	                        (rev 0)
+++ trunk/scipy/sparse/linalg/isolve/tests/test_lsqr.py	2010-03-02 19:14:21 UTC (rev 6251)
@@ -0,0 +1,59 @@
+import numpy as np
+
+from scipy.sparse.linalg import lsqr
+from time import time
+
+# Set up a test problem
+n = 35
+G = np.eye(n)
+normal = np.random.normal
+norm = np.linalg.norm
+
+for jj in range(5):
+    gg = normal(size=n)
+    hh = gg * gg.T
+    G += (hh + hh.T) * 0.5
+    G += normal(size=n) * normal(size=n)
+
+b = normal(size=n)
+
+tol = 1e-10
+show = False
+maxit = None
+
+def test_basic():
+    svx = np.linalg.solve(G, b)
+    X = lsqr(G, b, show=show, atol=tol, btol=tol, iter_lim=maxit)
+    xo = X[0]
+    assert norm(svx - xo) < 1e-5
+
+if __name__ == "__main__":
+    svx = np.linalg.solve(G, b)
+
+    tic = time()
+    X = lsqr(G, b, show=show, atol=tol, btol=tol, iter_lim=maxit)
+    xo = X[0]
+    phio = X[3]
+    psio = X[7]
+    k = X[2]
+    chio = X[8]
+    mg = np.amax(G - G.T)
+    if mg > 1e-14:
+        sym='No'
+    else:
+        sym='Yes'
+
+    print 'LSQR'
+    print "Is linear operator symmetric? " + sym
+    print "n: %3g  iterations:   %3g" % (n, k)
+    print "Norms computed in %.2fs by LSQR" % (time() - tic)
+    print " ||x||  %9.4e  ||r|| %9.4e  ||Ar||  %9.4e " %( chio, phio, psio)
+    print "Residual norms computed directly:"
+    print " ||x||  %9.4e  ||r|| %9.4e  ||Ar||  %9.4e" %  (norm(xo),
+                                                          norm(G*xo - b),
+                                                          norm(G.T*(G*xo-b)))
+    print "Direct solution norms:"
+    print " ||x||  %9.4e  ||r|| %9.4e " %  (norm(svx), norm(G*svx -b))
+    print ""
+    print " || x_{direct} - x_{LSQR}|| %9.4e " % norm(svx-xo)
+    print ""