[Scipy-svn] r6673 - in trunk/scipy/optimize: . tests

scipy-svn at scipy.org scipy-svn at scipy.org
Sat Sep 4 16:54:05 EDT 2010


Author: ptvirtan
Date: 2010-09-04 15:54:05 -0500 (Sat, 04 Sep 2010)
New Revision: 6673

Added:
   trunk/scipy/optimize/tests/test_linesearch.py
Modified:
   trunk/scipy/optimize/linesearch.py
   trunk/scipy/optimize/optimize.py
   trunk/scipy/optimize/tests/test_optimize.py
Log:
ENH: optimize: refactor line searches to scalar search + line wrapper; move to linesearch.py

Each of the existing line search functions

    f(x + s*p) -> suitable minimum

is split to a scalar search function

    phi(s) -> suitable minimum

and a wrapper

    phi_p(s) = f(x + s*p)

that makes it a line search.

This refactoring makes the logic in the functions slightly clearer, and
makes reusing them in large-scale non-linear equation solvers (which
only need 1-d searches) more straightforward.

New tests for line searches are also added. These basically check that
they do what they promise, for a couple of problems.

Modified: trunk/scipy/optimize/linesearch.py
===================================================================
--- trunk/scipy/optimize/linesearch.py	2010-09-04 17:45:31 UTC (rev 6672)
+++ trunk/scipy/optimize/linesearch.py	2010-09-04 20:54:05 UTC (rev 6673)
@@ -1,53 +1,593 @@
-
 from scipy.optimize import minpack2
-import numpy
+import numpy as np
 
-import __builtin__
-pymin = __builtin__.min
+__all__ = ['line_search_wolfe1', 'line_search_wolfe2',
+           'scalar_search_wolfe1', 'scalar_search_wolfe2',
+           'line_search_armijo']
 
-def line_search(f, myfprime, xk, pk, gfk, old_fval, old_old_fval,
-                args=(), c1=1e-4, c2=0.9, amax=50):
+#------------------------------------------------------------------------------
+# Minpack's Wolfe line and scalar searches
+#------------------------------------------------------------------------------
 
-    fc = 0
-    gc = 0
-    phi0 = old_fval
-    derphi0 = numpy.dot(gfk,pk)
-    alpha1 = pymin(1.0,1.01*2*(phi0-old_old_fval)/derphi0)
+def line_search_wolfe1(f, fprime, xk, pk, gfk=None,
+                       old_fval=None, old_old_fval=None,
+                       args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8,
+                       xtol=1e-14):
+    """
+    As `scalar_search_wolfe1` but do a line search to direction `pk`
 
-    if isinstance(myfprime,type(())):
-        eps = myfprime[1]
-        fprime = myfprime[0]
-        newargs = (f,eps) + args
+    Parameters
+    ----------
+    f : callable
+        Function `f(x)`
+    fprime : callable
+        Gradient of `f`
+    xk : array-like
+        Current point
+    pk : array-like
+        Search direction
+
+    gfk : array-like, optional
+        Gradient of `f` at point `xk`
+    old_fval : float, optional
+        Value of `f` at point `xk`
+    old_old_fval : float, optional
+        Value of `f` at point preceding `xk`
+
+    The rest of the parameters are the same as for `scalar_search_wolfe1`.
+
+    Returns
+    -------
+    stp, f_count, g_count, fval, old_fval
+        As in `line_search_wolfe1`
+    gval : array
+        Gradient of `f` at the final point
+
+    """
+    if gfk is None:
+        gfk = fprime(xk)
+
+    if isinstance(fprime, tuple):
+        eps = fprime[1]
+        fprime = fprime[0]
+        newargs = (f, eps) + args
         gradient = False
     else:
-        fprime = myfprime
         newargs = args
         gradient = True
 
-    xtol = 1e-14
-    amin = 1e-8
-    isave = numpy.zeros((2,), numpy.intc)
-    dsave = numpy.zeros((13,), float)
+    gval = [gfk]
+    gc = [0]
+    fc = [0]
+
+    def phi(s):
+        fc[0] += 1
+        return f(xk + s*pk, *args)
+
+    def derphi(s):
+        gval[0] = fprime(xk + s*pk, *newargs)
+        if gradient:
+            gc[0] += 1
+        else:
+            fc[0] += len(xk) + 1
+        return np.dot(gval[0], pk)
+
+    derphi0 = np.dot(gfk, pk)
+
+    stp, fval, old_fval = scalar_search_wolfe1(
+            phi, derphi, old_fval, old_old_fval, derphi0,
+            c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol)
+
+    return stp, fc[0], gc[0], fval, old_fval, gval[0]
+
+
+def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None,
+                         c1=1e-4, c2=0.9,
+                         amax=50, amin=1e-8, xtol=1e-14):
+    """
+    Scalar function search for alpha that satisfies strong Wolfe conditions
+
+    alpha > 0 is assumed to be a descent direction.
+
+    Parameters
+    ----------
+    phi : callable phi(alpha)
+        Function at point `alpha`
+    derphi : callable dphi(alpha)
+        Derivative `d phi(alpha)/ds`. Returns a scalar.
+
+    phi0 : float, optional
+        Value of `f` at 0
+    old_phi0 : float, optional
+        Value of `f` at the previous point
+    derphi0 : float, optional
+        Value `derphi` at 0
+    amax : float, optional
+        Maximum step size
+    c1, c2 : float, optional
+        Wolfe parameters
+
+    Returns
+    -------
+    alpha : float
+        Step size, or None if no suitable step was found
+    phi : float
+        Value of `phi` at the new point `alpha`
+    phi0 : float
+        Value of `phi` at `alpha=0`
+
+    Notes
+    -----
+    Uses routine DCSRCH from MINPACK.
+
+    """
+
+    if phi0 is None:
+        phi0 = phi(0.)
+    if derphi0 is None:
+        derphi0 = derphi(0.)
+
+    if old_phi0 is not None:
+        alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
+        if alpha1 < 0:
+            alpha1 = 1.0
+    else:
+        alpha1 = 1.0
+
+    phi1 = phi0
+    derphi1 = derphi0
+    isave = np.zeros((2,), np.intc)
+    dsave = np.zeros((13,), float)
     task = 'START'
-    fval = old_fval
-    gval = gfk
 
     while 1:
-        stp,fval,derphi,task = minpack2.dcsrch(alpha1, phi0, derphi0, c1, c2,
-                                               xtol, task, amin, amax,isave,dsave)
-
+        stp, phi1, derphi1, task = minpack2.dcsrch(alpha1, phi1, derphi1,
+                                                   c1, c2, xtol, task,
+                                                   amin, amax, isave, dsave)
         if task[:2] == 'FG':
             alpha1 = stp
-            fval = f(xk+stp*pk,*args)
-            fc += 1
-            gval = fprime(xk+stp*pk,*newargs)
-            if gradient: gc += 1
-            else: fc += len(xk) + 1
-            phi0 = fval
-            derphi0 = numpy.dot(gval,pk)
+            phi1 = phi(stp)
+            derphi1 = derphi(stp)
         else:
             break
 
-    if task[:5] == 'ERROR' or task[1:4] == 'WARN':
+    if task[:5] == 'ERROR':
         stp = None  # failed
-    return stp, fc, gc, fval, old_fval, gval
+
+    return stp, phi1, phi0
+
+line_search = line_search_wolfe1
+
+#------------------------------------------------------------------------------
+# Pure-Python Wolfe line and scalar searches
+#------------------------------------------------------------------------------
+
+def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None,
+                       old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=50):
+    """Find alpha that satisfies strong Wolfe conditions.
+
+    Parameters
+    ----------
+    f : callable f(x,*args)
+        Objective function.
+    myfprime : callable f'(x,*args)
+        Objective function gradient (can be None).
+    xk : ndarray
+        Starting point.
+    pk : ndarray
+        Search direction.
+    gfk : ndarray, optional
+        Gradient value for x=xk (xk being the current parameter
+        estimate). Will be recomputed if omitted.
+    old_fval : float, optional
+        Function value for x=xk. Will be recomputed if omitted.
+    old_old_fval : float, optional
+        Function value for the point preceding x=xk
+    args : tuple, optional
+        Additional arguments passed to objective function.
+    c1 : float, optional
+        Parameter for Armijo condition rule.
+    c2 : float, optional
+        Parameter for curvature condition rule.
+
+    Returns
+    -------
+    alpha0 : float
+        Alpha for which ``x_new = x0 + alpha * pk``.
+    fc : int
+        Number of function evaluations made.
+    gc : int
+        Number of gradient evaluations made.
+
+    Notes
+    -----
+    Uses the line search algorithm to enforce strong Wolfe
+    conditions.  See Wright and Nocedal, 'Numerical Optimization',
+    1999, pg. 59-60.
+
+    For the zoom phase it uses an algorithm by [...].
+
+    """
+    fc = [0]
+    gc = [0]
+    gval = [None]
+    
+    def phi(alpha):
+        fc[0] += 1
+        return f(xk + alpha * pk, *args)
+
+    if isinstance(myfprime, tuple):
+        def derphi(alpha):
+            fc[0] += len(xk)+1
+            eps = myfprime[1]
+            fprime = myfprime[0]
+            newargs = (f,eps) + args
+            gval[0] = fprime(xk+alpha*pk, *newargs)  # store for later use
+            return np.dot(gval[0], pk)
+    else:
+        fprime = myfprime
+        def derphi(alpha):
+            gc[0] += 1
+            gval[0] = fprime(xk+alpha*pk, *args)  # store for later use
+            return np.dot(gval[0], pk)
+
+    derphi0 = np.dot(gfk, pk)
+
+    alpha_star, phi_star, old_fval, derphi_star = \
+                scalar_search_wolfe2(phi, derphi, old_fval, old_old_fval,
+                                     derphi0, c1, c2, amax)
+
+    if derphi_star is not None:
+        # derphi_star is a number (derphi) -- so use the most recently
+        # calculated gradient used in computing it derphi = gfk*pk
+        # this is the gradient at the next step no need to compute it
+        # again in the outer loop.
+        derphi_star = gval[0]
+
+    return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star
+
+
+def scalar_search_wolfe2(phi, derphi=None, phi0=None,
+                         old_phi0=None, derphi0=None,
+                         c1=1e-4, c2=0.9, amax=50):
+    """Find alpha that satisfies strong Wolfe conditions.
+
+    alpha > 0 is assumed to be a descent direction.
+
+    Parameters
+    ----------
+    phi : callable f(x,*args)
+        Objective scalar function.
+
+    derphi : callable f'(x,*args), optional
+        Objective function derivative (can be None)
+    phi0 : float, optional
+        Value of phi at s=0
+    old_phi0 : float, optional
+        Value of phi at previous point
+    derphi0 : float, optional
+        Value of derphi at s=0
+    args : tuple
+        Additional arguments passed to objective function.
+    c1 : float
+        Parameter for Armijo condition rule.
+    c2 : float
+        Parameter for curvature condition rule.
+
+    Returns
+    -------
+    alpha_star : float
+        Best alpha
+    phi_star
+        phi at alpha_star
+    phi0
+        phi at 0
+    derphi_star
+        derphi at alpha_star
+
+    Notes
+    -----
+    Uses the line search algorithm to enforce strong Wolfe
+    conditions.  See Wright and Nocedal, 'Numerical Optimization',
+    1999, pg. 59-60.
+
+    For the zoom phase it uses an algorithm by [...].
+
+    """
+
+    if phi0 is None:
+        phi0 = phi(0.)
+
+    if derphi0 is None and derphi is not None:
+        derphi0 = derphi(0.)
+
+    alpha0 = 0
+    if old_phi0 is not None:
+        alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
+    else:
+        alpha1 = 1.0
+
+    if alpha1 < 0:
+        alpha1 = 1.0
+
+    if alpha1 == 0:
+        # This shouldn't happen. Perhaps the increment has slipped below
+        # machine precision?  For now, set the return variables skip the
+        # useless while loop, and raise warnflag=2 due to possible imprecision.
+        alpha_star = None
+        phi_star = phi0
+        phi0 = old_phi0
+        derphi_star = None
+
+    phi_a1 = phi(alpha1)
+    #derphi_a1 = derphi(alpha1)  evaluated below
+
+    phi_a0 = phi0
+    derphi_a0 = derphi0
+
+    i = 1
+    maxiter = 10
+    while 1:         # bracketing phase
+        if alpha1 == 0:
+            break
+        if (phi_a1 > phi0 + c1*alpha1*derphi0) or \
+           ((phi_a1 >= phi_a0) and (i > 1)):
+            alpha_star, phi_star, derphi_star = \
+                        _zoom(alpha0, alpha1, phi_a0,
+                              phi_a1, derphi_a0, phi, derphi,
+                              phi0, derphi0, c1, c2)
+            break
+
+        derphi_a1 = derphi(alpha1)
+        if (abs(derphi_a1) <= -c2*derphi0):
+            alpha_star = alpha1
+            phi_star = phi_a1
+            derphi_star = derphi_a1
+            break
+
+        if (derphi_a1 >= 0):
+            alpha_star, phi_star, derphi_star = \
+                        _zoom(alpha1, alpha0, phi_a1,
+                              phi_a0, derphi_a1, phi, derphi,
+                              phi0, derphi0, c1, c2)
+            break
+
+        alpha2 = 2 * alpha1   # increase by factor of two on each iteration
+        i = i + 1
+        alpha0 = alpha1
+        alpha1 = alpha2
+        phi_a0 = phi_a1
+        phi_a1 = phi(alpha1)
+        derphi_a0 = derphi_a1
+
+        # stopping test if lower function not found
+        if i > maxiter:
+            alpha_star = alpha1
+            phi_star = phi_a1
+            derphi_star = None
+            break
+
+    return alpha_star, phi_star, phi0, derphi_star
+
+
+def _cubicmin(a,fa,fpa,b,fb,c,fc):
+    """
+    Finds the minimizer for a cubic polynomial that goes through the
+    points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa.
+    
+    If no minimizer can be found return None
+    
+    """
+    # f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
+
+    C = fpa
+    D = fa
+    db = b-a
+    dc = c-a
+    if (db == 0) or (dc == 0) or (b==c): return None
+    denom = (db*dc)**2 * (db-dc)
+    d1 = np.empty((2,2))
+    d1[0,0] = dc**2
+    d1[0,1] = -db**2
+    d1[1,0] = -dc**3
+    d1[1,1] = db**3
+    [A,B] = np.dot(d1, np.asarray([fb-fa-C*db,fc-fa-C*dc]).flatten())
+    A /= denom
+    B /= denom
+    radical = B*B-3*A*C
+    if radical < 0:  return None
+    if (A == 0): return None
+    xmin = a + (-B + np.sqrt(radical))/(3*A)
+    return xmin
+
+
+def _quadmin(a,fa,fpa,b,fb):
+    """
+    Finds the minimizer for a quadratic polynomial that goes through
+    the points (a,fa), (b,fb) with derivative at a of fpa,
+
+    """
+    # f(x) = B*(x-a)^2 + C*(x-a) + D
+    D = fa
+    C = fpa
+    db = b-a*1.0
+    if (db==0): return None
+    B = (fb-D-C*db)/(db*db)
+    if (B <= 0): return None
+    xmin = a  - C / (2.0*B)
+    return xmin
+
+def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo,
+          phi, derphi, phi0, derphi0, c1, c2):
+    """
+    Part of the optimization algorithm in `scalar_search_wolfe2`.
+    """
+
+    maxiter = 10
+    i = 0
+    delta1 = 0.2  # cubic interpolant check
+    delta2 = 0.1  # quadratic interpolant check
+    phi_rec = phi0
+    a_rec = 0
+    while 1:
+        # interpolate to find a trial step length between a_lo and
+        # a_hi Need to choose interpolation here.  Use cubic
+        # interpolation and then if the result is within delta *
+        # dalpha or outside of the interval bounded by a_lo or a_hi
+        # then use quadratic interpolation, if the result is still too
+        # close, then use bisection
+
+        dalpha = a_hi-a_lo;
+        if dalpha < 0: a,b = a_hi,a_lo
+        else: a,b = a_lo, a_hi
+
+        # minimizer of cubic interpolant
+        # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
+        #
+        # if the result is too close to the end points (or out of the
+        # interval) then use quadratic interpolation with phi_lo,
+        # derphi_lo and phi_hi if the result is stil too close to the
+        # end points (or out of the interval) then use bisection
+
+        if (i > 0):
+            cchk = delta1*dalpha
+            a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi, a_rec, phi_rec)
+        if (i==0) or (a_j is None) or (a_j > b-cchk) or (a_j < a+cchk):
+            qchk = delta2*dalpha
+            a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
+            if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk):
+                a_j = a_lo + 0.5*dalpha
+
+        # Check new value of a_j
+
+        phi_aj = phi(a_j)
+        if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo):
+            phi_rec = phi_hi
+            a_rec = a_hi
+            a_hi = a_j
+            phi_hi = phi_aj
+        else:
+            derphi_aj = derphi(a_j)
+            if abs(derphi_aj) <= -c2*derphi0:
+                a_star = a_j
+                val_star = phi_aj
+                valprime_star = derphi_aj
+                break
+            if derphi_aj*(a_hi - a_lo) >= 0:
+                phi_rec = phi_hi
+                a_rec = a_hi
+                a_hi = a_lo
+                phi_hi = phi_lo
+            else:
+                phi_rec = phi_lo
+                a_rec = a_lo
+            a_lo = a_j
+            phi_lo = phi_aj
+            derphi_lo = derphi_aj
+        i += 1
+        if (i > maxiter):
+            a_star = a_j
+            val_star = phi_aj
+            valprime_star = None
+            break
+    return a_star, val_star, valprime_star
+
+
+#------------------------------------------------------------------------------
+# Armijo line and scalar searches
+#------------------------------------------------------------------------------
+
+def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
+    """Minimize over alpha, the function ``f(xk+alpha pk)``.
+
+    Uses the interpolation algorithm (Armijo backtracking) as suggested by
+    Wright and Nocedal in 'Numerical Optimization', 1999, pg. 56-57
+
+    Returns
+    -------
+    alpha
+    f_count
+    f_val_at_alpha
+
+    """
+    xk = np.atleast_1d(xk)
+    fc = [0]
+
+    def phi(alpha1):
+        fc[0] += 1
+        return f(xk + alpha1*pk, *args)
+
+    if old_fval is None:
+        phi0 = phi(0.)
+    else:
+        phi0 = old_fval # compute f(xk) -- done in past loop
+
+    derphi0 = np.dot(gfk, pk)
+    alpha, phi1 = scalar_search_armijo(phi, phi0, derphi0, c1=c1, alpha0=alpha0)
+    return alpha, fc[0], phi1
+
+def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
+    """
+    Compatibility wrapper for `line_search_armijo`
+    """
+    r = line_search_armijo(f, xk, pk, gfk, old_fval, args=args, c1=c1,
+                           alpha0=alpha0)
+    return r[0], r[1], 0, r[2]
+
+def scalar_search_armijo(phi, phi0, derphi0, c1=1e-4, alpha0=1, amin=0):
+    """Minimize over alpha, the function ``phi(alpha)``.
+
+    Uses the interpolation algorithm (Armijo backtracking) as suggested by
+    Wright and Nocedal in 'Numerical Optimization', 1999, pg. 56-57
+
+    alpha > 0 is assumed to be a descent direction.
+
+    Returns
+    -------
+    alpha
+    phi1
+
+    """
+    phi_a0 = phi(alpha0)
+    if phi_a0 <= phi0 + c1*alpha0*derphi0:
+        return alpha0, phi_a0
+
+    # Otherwise compute the minimizer of a quadratic interpolant:
+
+    alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
+    phi_a1 = phi(alpha1)
+
+    if (phi_a1 <= phi0 + c1*alpha1*derphi0):
+        return alpha1, phi_a1
+
+    # Otherwise loop with cubic interpolation until we find an alpha which
+    # satifies the first Wolfe condition (since we are backtracking, we will
+    # assume that the value of alpha is not too small and satisfies the second
+    # condition.
+
+    while alpha1 > amin:       # we are assuming alpha>0 is a descent direction
+        factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
+        a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
+            alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
+        a = a / factor
+        b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
+            alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
+        b = b / factor
+
+        alpha2 = (-b + np.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a)
+        phi_a2 = phi(alpha2)
+
+        if (phi_a2 <= phi0 + c1*alpha2*derphi0):
+            return alpha2, phi_a2
+
+        if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
+            alpha2 = alpha1 / 2.0
+
+        alpha0 = alpha1
+        alpha1 = alpha2
+        phi_a0 = phi_a1
+        phi_a1 = phi_a2
+
+    # Failed to find a suitable step length
+    return None, phi_a1
+

Modified: trunk/scipy/optimize/optimize.py
===================================================================
--- trunk/scipy/optimize/optimize.py	2010-09-04 17:45:31 UTC (rev 6672)
+++ trunk/scipy/optimize/optimize.py	2010-09-04 20:54:05 UTC (rev 6673)
@@ -25,7 +25,9 @@
 import numpy
 from numpy import atleast_1d, eye, mgrid, argmin, zeros, shape, empty, \
      squeeze, vectorize, asarray, absolute, sqrt, Inf, asfarray, isinf
-import linesearch
+from linesearch import \
+     line_search_BFGS, line_search_wolfe1, line_search_wolfe2, \
+     line_search_wolfe2 as line_search
 
 # These have been copied from Numeric's MLab.py
 # I don't think they made the transition to scipy_core
@@ -294,324 +296,6 @@
     return retlist
 
 
-
-def _cubicmin(a,fa,fpa,b,fb,c,fc):
-    # finds the minimizer for a cubic polynomial that goes through the
-    #  points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa.
-    #
-    # if no minimizer can be found return None
-    #
-    # f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
-
-    C = fpa
-    D = fa
-    db = b-a
-    dc = c-a
-    if (db == 0) or (dc == 0) or (b==c): return None
-    denom = (db*dc)**2 * (db-dc)
-    d1 = empty((2,2))
-    d1[0,0] = dc**2
-    d1[0,1] = -db**2
-    d1[1,0] = -dc**3
-    d1[1,1] = db**3
-    [A,B] = numpy.dot(d1,asarray([fb-fa-C*db,fc-fa-C*dc]).flatten())
-    A /= denom
-    B /= denom
-    radical = B*B-3*A*C
-    if radical < 0:  return None
-    if (A == 0): return None
-    xmin = a + (-B + sqrt(radical))/(3*A)
-    return xmin
-
-
-def _quadmin(a,fa,fpa,b,fb):
-    # finds the minimizer for a quadratic polynomial that goes through
-    #  the points (a,fa), (b,fb) with derivative at a of fpa
-    # f(x) = B*(x-a)^2 + C*(x-a) + D
-    D = fa
-    C = fpa
-    db = b-a*1.0
-    if (db==0): return None
-    B = (fb-D-C*db)/(db*db)
-    if (B <= 0): return None
-    xmin = a  - C / (2.0*B)
-    return xmin
-
-def zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo,
-         phi, derphi, phi0, derphi0, c1, c2):
-    maxiter = 10
-    i = 0
-    delta1 = 0.2  # cubic interpolant check
-    delta2 = 0.1  # quadratic interpolant check
-    phi_rec = phi0
-    a_rec = 0
-    while 1:
-        # interpolate to find a trial step length between a_lo and
-        # a_hi Need to choose interpolation here.  Use cubic
-        # interpolation and then if the result is within delta *
-        # dalpha or outside of the interval bounded by a_lo or a_hi
-        # then use quadratic interpolation, if the result is still too
-        # close, then use bisection
-
-        dalpha = a_hi-a_lo;
-        if dalpha < 0: a,b = a_hi,a_lo
-        else: a,b = a_lo, a_hi
-
-        # minimizer of cubic interpolant
-        #    (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
-        #      if the result is too close to the end points (or out of
-        #        the interval) then use quadratic interpolation with
-        #        phi_lo, derphi_lo and phi_hi
-        #      if the result is stil too close to the end points (or
-        #        out of the interval) then use bisection
-
-        if (i > 0):
-            cchk = delta1*dalpha
-            a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi, a_rec, phi_rec)
-        if (i==0) or (a_j is None) or (a_j > b-cchk) or (a_j < a+cchk):
-            qchk = delta2*dalpha
-            a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
-            if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk):
-                a_j = a_lo + 0.5*dalpha
-#                print "Using bisection."
-#            else: print "Using quadratic."
-#        else: print "Using cubic."
-
-        # Check new value of a_j
-
-        phi_aj = phi(a_j)
-        if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo):
-            phi_rec = phi_hi
-            a_rec = a_hi
-            a_hi = a_j
-            phi_hi = phi_aj
-        else:
-            derphi_aj = derphi(a_j)
-            if abs(derphi_aj) <= -c2*derphi0:
-                a_star = a_j
-                val_star = phi_aj
-                valprime_star = derphi_aj
-                break
-            if derphi_aj*(a_hi - a_lo) >= 0:
-                phi_rec = phi_hi
-                a_rec = a_hi
-                a_hi = a_lo
-                phi_hi = phi_lo
-            else:
-                phi_rec = phi_lo
-                a_rec = a_lo
-            a_lo = a_j
-            phi_lo = phi_aj
-            derphi_lo = derphi_aj
-        i += 1
-        if (i > maxiter):
-            a_star = a_j
-            val_star = phi_aj
-            valprime_star = None
-            break
-    return a_star, val_star, valprime_star
-
-def line_search(f, myfprime, xk, pk, gfk, old_fval, old_old_fval,
-                args=(), c1=1e-4, c2=0.9, amax=50):
-    """Find alpha that satisfies strong Wolfe conditions.
-
-    Parameters
-    ----------
-    f : callable f(x,*args)
-        Objective function.
-    myfprime : callable f'(x,*args)
-        Objective function gradient (can be None).
-    xk : ndarray
-        Starting point.
-    pk : ndarray
-        Search direction.
-    gfk : ndarray
-        Gradient value for x=xk (xk being the current parameter
-        estimate).
-    args : tuple
-        Additional arguments passed to objective function.
-    c1 : float
-        Parameter for Armijo condition rule.
-    c2 : float
-        Parameter for curvature condition rule.
-
-    Returns
-    -------
-    alpha0 : float
-        Alpha for which ``x_new = x0 + alpha * pk``.
-    fc : int
-        Number of function evaluations made.
-    gc : int
-        Number of gradient evaluations made.
-
-    Notes
-    -----
-    Uses the line search algorithm to enforce strong Wolfe
-    conditions.  See Wright and Nocedal, 'Numerical Optimization',
-    1999, pg. 59-60.
-
-    For the zoom phase it uses an algorithm by [...].
-
-    """
-
-    global _ls_fc, _ls_gc, _ls_ingfk
-    _ls_fc = 0
-    _ls_gc = 0
-    _ls_ingfk = None
-    def phi(alpha):
-        global _ls_fc
-        _ls_fc += 1
-        return f(xk+alpha*pk,*args)
-
-    if isinstance(myfprime,type(())):
-        def phiprime(alpha):
-            global _ls_fc, _ls_ingfk
-            _ls_fc += len(xk)+1
-            eps = myfprime[1]
-            fprime = myfprime[0]
-            newargs = (f,eps) + args
-            _ls_ingfk = fprime(xk+alpha*pk,*newargs)  # store for later use
-            return numpy.dot(_ls_ingfk,pk)
-    else:
-        fprime = myfprime
-        def phiprime(alpha):
-            global _ls_gc, _ls_ingfk
-            _ls_gc += 1
-            _ls_ingfk = fprime(xk+alpha*pk,*args)  # store for later use
-            return numpy.dot(_ls_ingfk,pk)
-
-    alpha0 = 0
-    phi0 = old_fval
-    derphi0 = numpy.dot(gfk,pk)
-
-    alpha1 = pymin(1.0,1.01*2*(phi0-old_old_fval)/derphi0)
-
-    if alpha1 == 0:
-        # This shouldn't happen. Perhaps the increment has slipped below
-        # machine precision?  For now, set the return variables skip the
-        # useless while loop, and raise warnflag=2 due to possible imprecision.
-        alpha_star = None
-        fval_star = old_fval
-        old_fval = old_old_fval
-        fprime_star = None
-
-    phi_a1 = phi(alpha1)
-    #derphi_a1 = phiprime(alpha1)  evaluated below
-
-    phi_a0 = phi0
-    derphi_a0 = derphi0
-
-    i = 1
-    maxiter = 10
-    while 1:         # bracketing phase
-        if alpha1 == 0:
-            break
-        if (phi_a1 > phi0 + c1*alpha1*derphi0) or \
-           ((phi_a1 >= phi_a0) and (i > 1)):
-            alpha_star, fval_star, fprime_star = \
-                        zoom(alpha0, alpha1, phi_a0,
-                             phi_a1, derphi_a0, phi, phiprime,
-                             phi0, derphi0, c1, c2)
-            break
-
-        derphi_a1 = phiprime(alpha1)
-        if (abs(derphi_a1) <= -c2*derphi0):
-            alpha_star = alpha1
-            fval_star = phi_a1
-            fprime_star = derphi_a1
-            break
-
-        if (derphi_a1 >= 0):
-            alpha_star, fval_star, fprime_star = \
-                        zoom(alpha1, alpha0, phi_a1,
-                             phi_a0, derphi_a1, phi, phiprime,
-                             phi0, derphi0, c1, c2)
-            break
-
-        alpha2 = 2 * alpha1   # increase by factor of two on each iteration
-        i = i + 1
-        alpha0 = alpha1
-        alpha1 = alpha2
-        phi_a0 = phi_a1
-        phi_a1 = phi(alpha1)
-        derphi_a0 = derphi_a1
-
-        # stopping test if lower function not found
-        if (i > maxiter):
-            alpha_star = alpha1
-            fval_star = phi_a1
-            fprime_star = None
-            break
-
-    if fprime_star is not None:
-        # fprime_star is a number (derphi) -- so use the most recently
-        # calculated gradient used in computing it derphi = gfk*pk
-        # this is the gradient at the next step no need to compute it
-        # again in the outer loop.
-        fprime_star = _ls_ingfk
-
-    return alpha_star, _ls_fc, _ls_gc, fval_star, old_fval, fprime_star
-
-
-def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
-    """Minimize over alpha, the function ``f(xk+alpha pk)``.
-
-    Uses the interpolation algorithm (Armiijo backtracking) as suggested by
-    Wright and Nocedal in 'Numerical Optimization', 1999, pg. 56-57
-
-    :Returns: (alpha, fc, gc)
-
-    """
-
-    xk = atleast_1d(xk)
-    fc = 0
-    phi0 = old_fval # compute f(xk) -- done in past loop
-    phi_a0 = f(*((xk+alpha0*pk,)+args))
-    fc = fc + 1
-    derphi0 = numpy.dot(gfk,pk)
-
-    if (phi_a0 <= phi0 + c1*alpha0*derphi0):
-        return alpha0, fc, 0, phi_a0
-
-    # Otherwise compute the minimizer of a quadratic interpolant:
-
-    alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
-    phi_a1 = f(*((xk+alpha1*pk,)+args))
-    fc = fc + 1
-
-    if (phi_a1 <= phi0 + c1*alpha1*derphi0):
-        return alpha1, fc, 0, phi_a1
-
-    # Otherwise loop with cubic interpolation until we find an alpha which
-    # satifies the first Wolfe condition (since we are backtracking, we will
-    # assume that the value of alpha is not too small and satisfies the second
-    # condition.
-
-    while 1:       # we are assuming pk is a descent direction
-        factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
-        a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
-            alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
-        a = a / factor
-        b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
-            alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
-        b = b / factor
-
-        alpha2 = (-b + numpy.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a)
-        phi_a2 = f(*((xk+alpha2*pk,)+args))
-        fc = fc + 1
-
-        if (phi_a2 <= phi0 + c1*alpha2*derphi0):
-            return alpha2, fc, 0, phi_a2
-
-        if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
-            alpha2 = alpha1 / 2.0
-
-        alpha0 = alpha1
-        alpha1 = alpha2
-        phi_a0 = phi_a1
-        phi_a1 = phi_a2
-
-
 def approx_fprime(xk,f,epsilon,*args):
     f0 = f(*((xk,)+args))
     grad = numpy.zeros((len(xk),), float)
@@ -723,12 +407,12 @@
     while (gnorm > gtol) and (k < maxiter):
         pk = -numpy.dot(Hk,gfk)
         alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
-           linesearch.line_search(f,myfprime,xk,pk,gfk,
-                                  old_fval,old_old_fval)
+           line_search_wolfe1(f,myfprime,xk,pk,gfk,
+                              old_fval,old_old_fval)
         if alpha_k is None:  # line search failed try different one.
             alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
-                     line_search(f,myfprime,xk,pk,gfk,
-                                 old_fval,old_old_fval)
+                     line_search_wolfe2(f,myfprime,xk,pk,gfk,
+                                        old_fval,old_old_fval)
             if alpha_k is None:
                 # This line search also failed to find a better solution.
                 warnflag = 2
@@ -894,12 +578,12 @@
         old_old_fval_backup = old_old_fval
 
         alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
-           linesearch.line_search(f,myfprime,xk,pk,gfk,old_fval,
+                 line_search_wolfe1(f,myfprime,xk,pk,gfk,old_fval,
                                   old_old_fval,c2=0.4)
         if alpha_k is None:  # line search failed -- use different one.
             alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
-                     line_search(f,myfprime,xk,pk,gfk,
-                                 old_fval_backup,old_old_fval_backup)
+                     line_search_wolfe2(f,myfprime,xk,pk,gfk,
+                                        old_fval_backup,old_old_fval_backup)
             if alpha_k is None or alpha_k == 0:
                 # This line search also failed to find a better solution.
                 warnflag = 2

Added: trunk/scipy/optimize/tests/test_linesearch.py
===================================================================
--- trunk/scipy/optimize/tests/test_linesearch.py	                        (rev 0)
+++ trunk/scipy/optimize/tests/test_linesearch.py	2010-09-04 20:54:05 UTC (rev 6673)
@@ -0,0 +1,234 @@
+"""
+Tests for line search routines
+"""
+
+from numpy.testing import *
+import scipy.optimize.linesearch as ls
+import numpy as np
+
+def assert_wolfe(s, phi, derphi, c1=1e-4, c2=0.9, err_msg=""):
+    """
+    Check that strong Wolfe conditions apply
+    """
+    phi1 = phi(s)
+    phi0 = phi(0)
+    derphi0 = derphi(0)
+    derphi1 = derphi(s)
+    msg = "s = %s; phi(0) = %s; phi(s) = %s; phi'(0) = %s; phi'(s) = %s; %s" % (
+        s, phi0, phi1, derphi0, derphi1, err_msg)
+
+    assert_(phi1 <= phi0 + c1*s*derphi0, "Wolfe 1 failed: "+ msg)
+    assert_(abs(derphi1) <= abs(c2*derphi0), "Wolfe 2 failed: "+ msg)
+
+def assert_armijo(s, phi, c1=1e-4, err_msg=""):
+    """
+    Check that Armijo condition applies
+    """
+    phi1 = phi(s)
+    phi0 = phi(0)
+    msg = "s = %s; phi(0) = %s; phi(s) = %s; %s" % (s, phi0, phi1, err_msg)
+    assert_(phi1 <= (1 - c1*s)*phi0, msg)
+
+def assert_line_wolfe(x, p, s, f, fprime, **kw):
+    assert_wolfe(s, phi=lambda sp: f(x + p*sp),
+                 derphi=lambda sp: np.dot(fprime(x + p*sp), p), **kw)
+
+def assert_line_armijo(x, p, s, f, **kw):
+    assert_armijo(s, phi=lambda sp: f(x + p*sp), **kw)
+
+
+class TestLineSearch(object):
+    # -- scalar functions; must have dphi(0.) < 0
+    def _scalar_func_1(self, s):
+        self.fcount += 1
+        p = -s - s**3 + s**4
+        dp = -1 - 3*s**2 + 4*s**3
+        return p, dp
+
+    def _scalar_func_2(self, s):
+        self.fcount += 1
+        p = np.exp(-4*s) + s**2
+        dp = -4*np.exp(-4*s) + 2*s
+        return p, dp
+
+    def _scalar_func_3(self, s):
+        self.fcount += 1
+        p = -np.sin(10*s)
+        dp = -10*np.cos(10*s)
+        return p, dp
+
+    # -- n-d functions
+
+    def _line_func_1(self, x):
+        self.fcount += 1
+        f = np.dot(x, x)
+        df = 2*x
+        return f, df
+
+    def _line_func_2(self, x):
+        self.fcount += 1
+        f = np.dot(x, np.dot(self.A, x)) + 1
+        df = np.dot(self.A + self.A.T, x)
+        return f, df
+
+    # --
+
+    def __init__(self):
+        self.scalar_funcs = []
+        self.line_funcs = []
+        self.N = 20
+        self.fcount = 0
+
+        def bind_index(func, idx):
+            # Remember Python's closure semantics!
+            return lambda *a, **kw: func(*a, **kw)[idx]
+        
+        for name in sorted(dir(self)):
+            if name.startswith('_scalar_func_'):
+                value = getattr(self, name)
+                self.scalar_funcs.append(
+                    (name, bind_index(value, 0), bind_index(value, 1)))
+            elif name.startswith('_line_func_'):
+                value = getattr(self, name)
+                self.line_funcs.append(
+                    (name, bind_index(value, 0), bind_index(value, 1)))
+
+    def setUp(self):
+        np.random.seed(1234)
+        self.A = np.random.randn(self.N, self.N)
+
+    def scalar_iter(self):
+        for name, phi, derphi in self.scalar_funcs:
+            for old_phi0 in np.random.randn(3):
+                yield name, phi, derphi, old_phi0
+
+    def line_iter(self):
+        for name, f, fprime in self.line_funcs:
+            k = 0
+            while k < 9:
+                x = np.random.randn(self.N)
+                p = np.random.randn(self.N)
+                if np.dot(p, fprime(x)) >= 0:
+                    # always pick a descent direction
+                    continue
+                k += 1
+                old_fv = float(np.random.randn())
+                yield name, f, fprime, x, p, old_fv
+
+    # -- Generic scalar searches
+
+    def test_scalar_search_wolfe1(self):
+        c = 0
+        for name, phi, derphi, old_phi0 in self.scalar_iter():
+            c += 1
+            s, phi1, phi0 = ls.scalar_search_wolfe1(phi, derphi, phi(0),
+                                                    old_phi0, derphi(0))
+            assert_equal(phi0, phi(0))
+            assert_equal(phi1, phi(s))
+            assert_wolfe(s, phi, derphi)
+
+        assert c > 3 # check that the iterator really works...
+
+    def test_scalar_search_wolfe2(self):
+        for name, phi, derphi, old_phi0 in self.scalar_iter():
+            s, phi1, phi0, derphi1 = ls.scalar_search_wolfe2(
+                phi, derphi, phi(0), old_phi0, derphi(0))
+            assert_equal(phi0, phi(0), name)
+            assert_equal(phi1, phi(s), name)
+            if derphi1 is not None:
+                assert_equal(derphi1, derphi(s), name)
+            assert_wolfe(s, phi, derphi, err_msg="%s %g" % (name, old_phi0))
+
+    def test_scalar_search_armijo(self):
+        for name, phi, derphi, old_phi0 in self.scalar_iter():
+            s, phi1 = ls.scalar_search_armijo(phi, phi(0), derphi(0))
+            assert_equal(phi1, phi(s), name)
+            assert_armijo(s, phi, err_msg="%s %g" % (name, old_phi0))
+
+    # -- Generic line searches
+
+    def test_line_search_wolfe1(self):
+        c = 0
+        smax = 100
+        for name, f, fprime, x, p, old_f in self.line_iter():
+            f0 = f(x)
+            g0 = fprime(x)
+            self.fcount = 0
+            s, fc, gc, fv, ofv, gv = ls.line_search_wolfe1(f, fprime, x, p,
+                                                           g0, f0, old_f,
+                                                           amax=smax)
+            assert_equal(self.fcount, fc+gc)
+            assert_equal(ofv, f(x))
+            assert_equal(fv, f(x + s*p))
+            assert_equal(gv, fprime(x + s*p))
+            if s < smax:
+                c += 1
+                assert_line_wolfe(x, p, s, f, fprime, err_msg=name)
+
+        assert c > 3 # check that the iterator really works...
+
+    def test_line_search_wolfe2(self):
+        c = 0
+        smax = 100
+        for name, f, fprime, x, p, old_f in self.line_iter():
+            f0 = f(x)
+            g0 = fprime(x)
+            self.fcount = 0
+            s, fc, gc, fv, ofv, gv = ls.line_search_wolfe2(f, fprime, x, p,
+                                                           g0, f0, old_f,
+                                                           amax=smax)
+            assert_equal(self.fcount, fc+gc)
+            assert_equal(ofv, f(x))
+            assert_equal(fv, f(x + s*p))
+            if gv is not None:
+                assert_equal(gv, fprime(x + s*p))
+            if s < smax:
+                c += 1
+                assert_line_wolfe(x, p, s, f, fprime, err_msg=name)
+        assert c > 3 # check that the iterator really works...
+
+    def test_line_search_armijo(self):
+        c = 0
+        for name, f, fprime, x, p, old_f in self.line_iter():
+            f0 = f(x)
+            g0 = fprime(x)
+            self.fcount = 0
+            s, fc, fv = ls.line_search_armijo(f, x, p, g0, f0)
+            c += 1
+            assert_equal(self.fcount, fc)
+            assert_equal(fv, f(x + s*p))
+            assert_line_armijo(x, p, s, f, err_msg=name)
+        assert c >= 9
+
+    # -- More specific tests
+
+    def test_armijo_terminate_1(self):
+        # Armijo should evaluate the function only once if the trial step
+        # is already suitable
+        count = [0]
+        def phi(s):
+            count[0] += 1
+            return -s + 0.01*s**2
+        s, phi1 = ls.scalar_search_armijo(phi, phi(0), -1, alpha0=1)
+        assert_equal(s, 1)
+        assert_equal(count[0], 2)
+        assert_armijo(s, phi)
+
+    def test_wolfe_terminate(self):
+        # wolfe1 and wolfe2 should also evaluate the function only a few
+        # times if the trial step is already suitable
+
+        def phi(s):
+            count[0] += 1
+            return -s + 0.05*s**2
+
+        def derphi(s):
+            count[0] += 1
+            return -1 + 0.05*2*s
+
+        for func in [ls.scalar_search_wolfe1, ls.scalar_search_wolfe2]:
+            count = [0]
+            r = func(phi, derphi, phi(0), None, derphi(0))
+            assert r[0] is not None, (r, func)
+            assert count[0] <= 2 + 2, (count, func)
+            assert_wolfe(r[0], phi, derphi, err_msg=str(func))

Modified: trunk/scipy/optimize/tests/test_optimize.py
===================================================================
--- trunk/scipy/optimize/tests/test_optimize.py	2010-09-04 17:45:31 UTC (rev 6672)
+++ trunk/scipy/optimize/tests/test_optimize.py	2010-09-04 20:54:05 UTC (rev 6673)
@@ -177,18 +177,10 @@
         assert self.gradcalls == 18, self.gradcalls # 0.8.0
         #assert self.gradcalls == 22, self.gradcalls # 0.7.0
 
-        # Ensure that the function behaves the same;
-
-        # # This is from Scipy 0.7.0
-        #assert np.allclose(self.trace[3:5],
-        #                   [[-4.35700753e-07, -5.24869435e-01, 4.87527480e-01],
-        #                    [-4.35700753e-07, -5.24869401e-01, 4.87527774e-01]],
-        #                   atol=1e-14, rtol=1e-7), self.trace[3:5]
-
-        # This if from Scipy 0.8.0; with some fixes in ncg
+        # Ensure that the function behaves the same; this is from Scipy 0.7.0
         assert np.allclose(self.trace[3:5],
-                           [[-2.90334653e-07,-5.24869431e-01,4.87527470e-01],
-                            [-2.90334653e-07,-5.24869375e-01,4.87527680e-01]],
+                           [[-4.35700753e-07, -5.24869435e-01, 4.87527480e-01],
+                            [-4.35700753e-07, -5.24869401e-01, 4.87527774e-01]],
                            atol=1e-14, rtol=1e-7), self.trace[3:5]
 
 




More information about the Scipy-svn mailing list