/*
 * Brent.java
 *
 * Created on November 5, 2004, 10:19 PM
 *
 *  Copyright 2004 Daniel Wachsstock
 *  The contents of this file are subject to the Sun Public License
 *  Version 1.0 (the License); you may not use this file except in
 *  compliance with the License. A copy of the License is available at
 *  http://www.sun.com/ or http://www.geocities.com/tenua4java/license.html
 */

package nr.minimizer;
import nr.*;

/** Minimizes a function of several variables using Brent's modification of Powell's method.
 *  Note: as of now, this algorithm does not work. Do not use it.
 *  The method is described in Brent, RP (1973) <i>Algorithms for Minimization
 *  Without Derivatives.</i>
 *  Prentice Hall: New Jersey. The discussion is unfortunately not available on the web, though the
 *  original Powell's method is discussed in
 *  <a href="http://www.library.cornell.edu/nr/bookcpdf/c10-5.pdf">Numerical Recipes</a>,
 *  and they mention Brent's modification.
 *  The original routine in FORTRAN is called PRAXIS, and is available from many sources
 *  (search the
 *  web for <code>brent praxis</code>. One such source is
 *  <a href ="http://www.psc.edu/~burkardt/src/praxis/praxis.html">
 *  www.psc.edu/~burkardt/src/praxis<a>.
 *  <br>
 *  The algorithm works well for smooth functions (ones where the second derivative exists).
 *  These sorts of functions look like a paraboloid near the minimum. This discussion assumes
 *  you understand that statment; if not, review your multivariate calculus and Numerical
 *  Recipes chapter 10.
 *  <br>
 *  The basic idea is that we need to have a one-dimensional minimization algorithm and
 *  a set of directions (vectors) that are linearly independent, so we can minimize
 *  along each one in turn and search all of the domain of the function. To 
 *  <a name="minimizeDirection"> "minimize
 *  along a direction <b>u</b> from point <b>x</b>" </a>
 *  means, if our function is <i>f</i>(<b>x</b>)
 *  with <b>x</b> a vector of size <i>n</i>, 
 *  <ol>
 *    <li>
 *      construct the function of one variable <i>g</i>(<i>b</i>) =
 *      <i>f</i>(<b>x</b> + <i>b</i>*<b>u</b>)
 *    </li>
 *    <li>
 *      Using our one-dimensional minimization algorithm, find <i>b'</i>
 *      that minimizes <i>g</i>(<i>b</i>)
 *    </li>
 *    <li>
 *      Replace <b>x</b> with <b>x</b> + <i>b'</i>*<b>u</b>
 *    <li>
 *  </ol>
 *  <br>
 *  The direction set <a name = "basicProcedure">basic procedure</a> is:
 *  <ol>
 *    <li>
 *      Start with an initial guess for the minimum, <b>x'</b>
 *    </li>
 *    <li>
 *      Start with a list of <i>n</i> linearly independent vectors,
 *      <b>u</b>[<i>j</i>]
 *    </li>
 *    <li>
 *      Minimize along each <b>u</b>[<i>j</i>] in turn, updating <b>x'</b>
 *      each time.
 *    </li>
 *  </ol>
 *  <br>
 *  Since the <b>u</b>s span all <i>n</i>-space, one would hope that this would
 *  bring us to the minimum. Unfortunately, each successive scalar minimization
 *  ruins the minimization from before. We could repeat the
 *  <a href="#basicProcedure">basic procedure</a> over and over again until it
 *  converges, but that could be very slow (see Numerical Recipes chapter 10 for
 *  more discussion of this).
 *  <br>
 *  What we want is for the <b>u</b>s to not only be linearly independent, but to be
 *  <i>conjugate</i>, which means that minimizing along one direction does not spoil
 *  the other directions. <a name = "powell">Powell's algorithm</a>
 *  finds a mutually conjugate set of
 *  directions for a paraboloid:
 *  <ol>
 *    <li>
 *      Set <b>oldX'</b> = <b>x'</b>
 *    </li>
 *    <li>
 *      Do the <a href="#basicProcedure">basic procedure</a>
 *    </li>
 *    <li>
 *      Create a new direction, <b>u'</b> = <b>x'</b> - <b>oldX'</b>. This is
 *      the average direction moved in the <a href="#basicProcedure">basic procedure</a>.
 *    </li>
 *    <li>
 *      Minimize along <b>u'</b>.
 *    </li>
 *    <li>
 *      Replace one of the original <b>u</b>[<i>j</i>] with <b>u'</b>
 *    </li>
 *  </ol>
 *  <br>
 *  Each time <a name = "powell">Powell's algorithm</a> is run, the new <b>u'</b> is
 *  conjugate to all the previous <b>u'</b>s (for a paraboloid), so after <i>n</i>
 *  runs through the algorithm, the directions are all conjugate and the last
 *  <a href="#basicProcedure">basic procedure</a> got us to the minimum.
 *  <br>
 *  The algorithm doesn't specify which original <b>u</b>[<i>j</i>] to replace; it
 *  works no matter what you choose. The original algorithm just picked the first
 *  available, but empirically it seems to converge faster if we pick the one that
 *  made the best improvement in <i>f</i>(<b>x</b>) during the basic procedure.
 *  See the discussion in
 *  Numerical Recipes chapter 10.5, section "Discarding the Direction of Largest Decrease."
 *  <br>
 *  If the function is not a paraboloid, then we could repeat
 *  <a href = "#powell">Powell's algorithm</a> over and over until it converges,
 *  but replacing a vector by the difference of vectors tends to make the set linearly
 *  dependent, so the <b>u</b>[<i>j</i>]s don't span all of space and only minimize
 *  a subspace of the true domain, so we won't necessarily find the true minimum.
 *  <br>
 *  The answer is to reset the <b>u</b>[<i>j</i>]s to a set of orthogonal vectors after
 *  doing <a href = "#powell">Powell's algorithm</a> <i>n</i> times. Brent found an amazing
 *  result that, for a paraboloid, gives an orthogonal and conjugate set of directions, and
 *  even if the function is far from paraboloid, still gives an orthogonal set, so
 *  Powell's algorithm still works.
 *  <a name="resetDirections">Resetting the directions</a> is:
 *  <ol>
 *    <li>
 *      Create the <i>n</i>x<i>n</i> matrix <b>U</b> whose columns are the vectors
 *      <b>u</b>[<i>j</i>]
 *    </li>
 *    <li>
 *      Determine the approximate second derivative <b>d</b>[<i>j</i>] along each
 *      direction <b>u</b>[<i>j</i>]. If, as above, <i>g</i>(<i>b</i>) =
 *      <i>f</i>(<b>x</b> + <i>b</i>*<b>u</b>), then for any three values of <i>b</i>,
 *      <i>b<sub>1</sub></i>,<i>b<sub>2</sub></i>, and <i>b<sub>3</sub></i>,
 *      with values of <i>g</i> of <i>g<sub>1</sub></i>,<i>g<sub>2</sub></i>,
 *      and <i>g<sub>3</sub></i>, the first derivatives are
 *      <i>f'<sub>12</sub></i> =
 *      (<i>g<sub>1</sub></i>-<i>g<sub>2</sub></i>)/(<i>b<sub>1</sub></i>-<i>b<sub>2</sub></i>)
 *      and
 *      <i>f'<sub>23</sub></i> =
 *      (<i>g<sub>2</sub></i>-<i>g<sub>3</sub></i>)/(<i>b<sub>2</sub></i>-<i>b<sub>3</sub></i>)
 *      and the second derivative is
 *      <i>d</i> = 2 * 
 *      (<i>f'<sub>12</sub></i>-<i>f'<sub>23</sub></i>)/
 *      (<i>b<sub>1</sub></i>-<i>b<sub>3</sub></i>).
 *      If we have done a scalar minimization in direction <b>u</b>, we should have
 *      evaluated <i>g</i>(<i>b</i>) at a few points already, so finding <i>d</i> is easy.
 *      (We can even have the scalar minimization routine do it automatically).
 *    </li>
 *    <li>Determine the diagonal matrix <b>D</b> with
 *      <b>D</b>[<i>j</i>][<i>j</i>] = 1/sqrt(<b>d</b>[<i>j</i>]).
 *      If the function is anywhere close to a
 *      minimum of a paraboloid, the second derivative is > 0
 *      (if it is not, Brent just sets <b>d</b>[<i>j</i>] = an empiric small number).
 *    </li>
 *    <li>
 *      Set <b>U</b> = <b>U</b>*<b>D</b>
 *    </li>
 *    <li>
 *       Find the singular value decomposition of <b>U</b> and
 *       set <b>U</b> to the left-hand orthogonal matrix
 *       of that decomposition. For the algorithms in package nr, this is just
 *       <code>
 *       {@link SingularValueDecomposition} decomp = new SingularValueDecomposion (U);
 *       </code>
 *    </li>
 *    <li>
 *      Set the second derivative estimates to
 *      <b>d</b>[<i>j</i>] = 1/decomp.getSingularValues()[<i>j</i>]^2
 *    </li>
 *  </ol>
 *  The columns of <b>U</b> are now the desired new <b>u</b>[<i>j</i>], with no new function
 *  evaluations and only one order <i>n</i><sup>3</sup> method, the singular value
 *  decomposition.
 *  <br>
 *  Brent does a few other tricks:
 *  <ul>
 *    <li>
 *      <a name="lazyStep">Laziness</a>:
 *      Brent points out that the first run through <a href = "#powell">Powell's
 *      algorithm</a> there are no other "new" directions to be conjugate to, so there's
 *      no reason to replace any of the old directions. So he just minimizes along one
 *      direction, then does Powell's algorithm <i>n</i>-1 times to make the directions
 *      conjugate. There is no point in minimizing along successive non-conjugate directions,
 *      since each minimization will likely ruin the last one.
 *    <li>
 *      <a name="randomJump">Random jump</a>:
 *      If the function has very narrow valleys (which we can tell if the second
 *      derivatives in different directions have vastly different orders of magnitude) then
 *      rounding errors may keep the algorithm from finding its way down the valley.
 *      Brent calls this an "ill-conditioned" problem. If it is ill-conditioned,
 *      before starting <a href = "powell">Powell's algorithm</a>, move <b>x'</b>
 *      to a random point a little bit away ("little bit" is determined by the second
 *      derivatives and the machine precision).
 *    </li>
 *    <li>
 *      <a name="twisty">Twisty little passage</a>: If the valley is curved, then
 *      assuming a paraboloid won't turn the corners and convergence will be slow.
 *      Before resetting the directions, Brent uses the current value of
 *      <b>x'</b> and the previous 2 values at this point in the algorithm to
 *      approximate a curve and uses the scalar minimizer to minimize along that
 *      curve. If the algorithm has been done less than 3 times, just record
 *      the point for future times.
 *    </li>
 *    <li>
 *      <a name="scale">Automatic scaling</a>: For the algorithm to be
 *      efficient, all the components of <b>x</b> should be approximately on the same
 *      scale. Brent's original algorithm incorporates ways to try to automatically do
 *      this, but the version implemented here does not. The user must scale his
 *      variables prior to calling {@link #minimize(Vec)}.
 *    </li>
 *  </ul>
 *  So <a name="brent">Brent's algorithm</a> is:
 *  <ol>
 *    <li>
 *      Minimize along <b>u</b>[1].
 *    </li>
 *    <li>
 *      For <i>n</i>-1 times:
 *      <ul>
 *        <li>
 *          If the problem is ill-conditioned, do a <a href="#randomJump">Random jump</a>.
 *        </li>
 *        <li>
 *          Do <a href = "#powell">Powell's algorithm</a>.
 *        </li>
 *        <li>
 *          If <i>f</i>(<b>x'</b>) didn't improve much and the problem was not ill-conditioned,
 *          consider it ill-conditioned and restart the algorithm.
 *        </li>
 *        <li>
 *          If <b>x'</b> didn't change much, then exit with success. Note that if <b>x'</b> didn't change,
 *          then <i>f</i> didn't change, which (by the previous step) we always end up
 *          considering the problem ill-conditioned and doing a Random jump before exiting.
 *        </li>
 *      </ul>
 *    </li>
 *    <li>
 *      Do a <a href="#twisty">Twisty little passage</a> step.
 *    </li>
 *    <li>
 *      <a href="#resetDirections">Reset the directions</a>. Check the values of
 *      <b>d</b>[<i>j</i>] to see if the problem is ill-conditioned
 *    </li>
 *    <li>
 *      Exit with failure. We did not converge with this execution of Brent's algorithm, but
 *      hopefully left us with an improved <b>x'</b>, a set of conjugate and orthogonal directions
 *      <b>u</b>, and the corresponding estimates of the second derivative <i>d</i>.
 *    </li>
 *  </ol>
 *  <br>
 *  So the final algorithm is:
 *  <ol>
 *    <li>
 *      Set the direction vectors to the unit vectors in each direction
 *      (<b>U</b>[<i>i</i>,<i>j</i>] = <i>i</i>=<i>j</i> ? 1 : 0).
 *    </li>
 *    <li>
 *      Set the second derivative estimates to 0 (<b>d</b>[<i>j</i>] = 0).
 *    </li>
 *    <li>
 *      Repeat <a href="#brent">Brent's algorithm</a> until it exits with success.
 *    </li>
 *    <li>
 *      If desired (if you are worried about twisty narrow valleys in which the algorithm might get
 *      stuck without being at the minimum), take a <a href="#randomJump">Random jump</a> and restart.
 *      Brent restarts at least once.
 *    </li>
 *  </ol>
 *  <br>
 *  <a name="linearMin">The linear minimization</a> is intentionally a quick but poor one.
 *  The idea is that there is no point in perfectly minimizing the function along one particular
 *  direction, since the algorithm
 *  will have to take many steps in many directions anyway. The minimizer goes to the exact
 *  minimum for a perfect parabola, and tries to find a point that is lower than the current one
 *  for any other function.
 *  <br>
 *  The linear minimization algorithm takes the value at the current point, evaluates the function
 *  one "stepsize" away, and uses these two points plus the estimate of the second derivative to
 *  extrapolate a parabola and determines where the minimum of that parabola is. It then evaluates the
 *  function at that point, and if the value is less than the previous ones, it updates the estimate
 *  of the second derivative using the three points evaluated and returns the location of the minimum.
 *  <br>
 *  If the value at the predicted minimum is <i>more</i> than the other ones, then the current function
 *  is not a good parabola. Try a few points closer to the original looking for a lower function value
 *  (the algorithm uses just 2 tries). Return the lowest point found (possibly the original
 *  point) and set the second derivative to zero.
 *  <br>
 *  If we start with no good estimate of the second derivative, evaluate the function another
 *  "stepsize" away and use those three points to extrapolate a parabola, then proceed as above.
 *  <br>
 *  Determining the stepsize is the hard part. If there is a good second derivative estimate, use that
 *  as an estimate of the width of the parabola and use that and the desired tolerance to get a step size.
 *  If we do not have a second derivative estimate, use the size of the last change in
 *  <b>x'</b> as the stepsize. If this is the first time, use the length of <b>x'</b> as
 *  a crude guess.
 *  <br>
 *  This is not the exact step size determining algorithm that Brent used, but it seems simpler and
 *  seems to work.
 *  <br>
 *  The algorithm converges when each component of the change in <b>x'</b> is less than
 *  the corresponding element of <b>minDeltaX</b>. <b>minDeltaX</b>[<i>i</i>] is
 *  defined as <i>epsilon</i> * abs (<b>original value of x' at the start of the
 *  algorithm</b>[<i>i</i>], or just <i>epsilon</i> if that is zero. Thus, the
 *  initial guess for the minimum also sets the scale for the convergence.
 *  
 */
public class Brent extends VecMinimizerImp{
    private Mat _U; // the matrix of directions to search; each column is a direction vector
    private double[] _d; // the vector of second derivative estimates. Actually, it's one half
      // of the second derivative (because the formulas all have a 1/2 term in there; it just means
      // a bit less calculating).
    private boolean _illConditioned = false; // is the problem ill-conditioned (very narrow valleys
      // in some directions)?
    private boolean[] _isConjugate; // array of flags that a given direction has been replaced with
      // a conjugate one in Powell's algorithm
    private double _lastStep; // the length of the last step taken. Used for
      // a crude step size when trying to find the minimum, in the absence of
      // any other information. Should always be >= 0.
    private Vec _twistyLittlePassagePoint1 = null;
    private Vec _twistyLittlePassagePoint2 = null; // two points used for the twisty little passage algorithm

    private final int ITERATIONS = 2; // number of times to run the algorithm before accepting
      // the result
    private final double SMALL = 1e-52; // a very small number, such that if a ratio has
      // is less than this we should consider it zero in parts of the algorithm
    private final double FACTOR = 0.01; // empirical factor to estimate step size
      // as a fraction of _x
    private final double GOLDEN = 1.618; // The golden mean, for golden section search

    /** create a new instance of Brent.
     *  @param f the {@link ScalarFunction} to minimize
     */
    public Brent (ScalarFunction f){
        super (f);
    } // constructor

    protected void doMinimize (){
        // the directions start with the unit vectors in each direction
        _U = Mat_array.identity (_n);
        _d = new double [_n]; // initialized to 0
        _isConjugate = new boolean [_n];
        _lastStep = MatMath.norm2(_x); // crude estimate of the scale of the problem
        
        for (int i = 0; i < ITERATIONS; i++){
            System.out.println("Starting iteration "+i);
            // do a random jump before each restart
            if (i != 0) randomJump();
            // keep trying Brent's algorithm until it succeeds.
            // the routine will improve _x even if it does not succeed (or at
            // least, it will not worsen _x)
            while (!brent());
        } // for
    } // doMinimize
    
    static public String name ()  {return "Brent";}

    // do brent's algorithm, returning true if it converged.
    // One subtle detail: SingularValueDecomposition orders the singular values from
    // highest to lowest, and _d is 1/singularValue^2, so they end up ordered from
    // lowest to highest, so the one with the largest step (which is the one we want to
    // minimize first) will be the last (number _n-1).
    private boolean brent(){
        // assume nothing is conjugate
        for (int i = 0; i < _n; i++) _isConjugate [i] = false;
        // do a lazy step
        Vec oldX = _x.copy();
        minimizeAlong (_n-1, true);
        if (converged (_x, oldX)) return true;
        
        for (int i = 1; i < _n; i++){
           oldX = _x.copy();
           double oldF = _fx;
           if (_illConditioned) randomJump();
 
           // estimate conjugate directions
           powell();

           // check that the function didn't get stuck
           if (!_illConditioned && (_fx == oldF)){
               _illConditioned = true;
               return false;
           } // if

           // check for success
           if (converged (_x, oldX)) return true;
        } // for

        minimizeAlong (-1, false); // the twisty little passage step
        resetDirections();
        return false;
    } // brent

    // does Powell's algorithm for finding a set of mutually conjugate directions
    private void powell(){
        Vec oldX = _x.copy();
        double oldF = _fx;
        int bestDirection = basicProcedure();
        // set oldX = newX - oldX
        for (int i = 0; i < _n; i++) oldX.set (i, _x.get(i) - oldX.get(i));
        double delta = MatMath.norm2 (oldX);
        if (delta == 0d) return; // _x did not change at all
        _lastStep = delta;
        if (bestDirection < 0) return; // no good direction
        // calculate new U[bestDirection] = _x - oldX
        _U.getColumn(bestDirection).set(oldX);
        // normalize it
        for (int i = 0; i < _n; i++) _U.set (i, bestDirection, _U.get (i, bestDirection) / delta);
        _d [bestDirection] = 0; // we know nothing about the second derivative
        // now we want to do a good job of minimizing
        minimizeAlong (bestDirection, -delta, oldF, true);
        _isConjugate [bestDirection] = true;
    } // powell

    // minimize along each direction in turn
    // returns the direction that produced the best improvement in _fx of
    // of all the unconjugate directions, or -1 if nothing helped.
    // As noted in brent(), the vectors are ordered from lowest second derivative
    // to highest, so we minimize in reverse order.
    // The purpose of the minimizations is not to hit the true minimum, but to
    // get the set of conjugate directions, so we don't aggressively look for
    // the minimum (i.e. we call minimizeAlong(..., false)
    private int basicProcedure(){
        int bestDirection = -1;
        double bestImprovement = 0;
        for (int i = _n - 1; i >= 0; i--){
            double oldF = _fx;
            minimizeAlong (i, false); // don't need a good minimum; just an estimate
            if (_isConjugate [i]) continue; // don't replace conjugate directions
            double improvement = oldF - _fx;
            if (improvement > bestImprovement){
                bestImprovement = improvement;
                bestDirection = i;
            } // if
        } // for
        return bestDirection;
    } // basicProcedure

    // replace _U with an orthogonal matrix
    private void resetDirections(){
        System.out.println("in reset");
        // determine U * 1/sqrt(D), without dividing by zero
        for (int i = 0; i < _n; i++){
            double u_i = _U.get (i, i);
            double sqrt_d = Math.sqrt (_d [i]);
            if (tooSmall (sqrt_d, u_i)){
                _U.set (i, i, u_i/SMALL);
            }else{
                _U.set (i, i, u_i/sqrt_d);
            } // if
         } // for
        SingularValueDecomposition decomp = new SingularValueDecomposition (_U);
        double s[] = decomp.getSingularValues();
        for (int i = 0; i < _n; i++){
            if (s [i] == 0){
                _d [i] = 0;
            }else{
                _d [i] = 1d / (s [i] * s [i]);
            } // if
        } // for
        _illConditioned = (decomp.rank() < _n); // true if there are negligible singular values
        System.out.println("end of reset; _U = "+_U);
    } // resetDirections

    // take a random step, about 10 * _minDeltaX away from where we are.
    // Not as sophisticated as Brent's random jump in PRAXIS, but more in line with
    // the algorithm in section 5 of his book.
    private void randomJump() {
        // we take _n random jumps, each Math.random-0.5 (so they can go
        // positive or negative).
        // The expected length would be 0.5*sqrt(_n)
        // See, for instance, polymer.bu.edu/java/java/2drw/RandWalk2D.html
        // so if we want a length of 10, we use the following scale:
        double scale = 20/Math.sqrt(_n);
        for (int i = 0; i < _n; i++) {
            _x.set (i, _x.get(i) + _minDeltaX.get(i)*(Math.random()-0.5)*scale);
        }
        _fx = eval (_x);
    } // randomJump
    
    // Update _x by crudely minimizing along direction _U[*,dir]. If dir == -1,
    // minimize along Brent's curve.
    // If isSerious == false, then we try a few points looking for a point that
    // gives a lower value of f, but give up if we don't find one quickly.
    // If isSerious == false, then we keep trying until we converge (meaning we
    // are already at the minimum) or we find a new low point. Note that either
    // way, we are not looking for the absolute minimum, just a downhill point.
    // We rely on the higher-level routines to find the true minimum in all
    // dimensions.
    private void minimizeAlong (int dir, boolean isSerious){
        if (dir == -1 && _twistyLittlePassagePoint1 == null){
            // not enough points to do twistyLittlePassage
            addTwistyPassagePoint();
            return;
        } // if
        
        // find a second point and use it to get the minimum
        double step = estimateStep (dir);
        minimizeAlong (dir, step, evalLinear (dir, step), isSerious);
    } // minimizeAlong
    
    // minimize along direction dir using the current point (_x) as zero,
    // with known function value _fx, and a second point at x1 with function
    // value f1
    private void minimizeAlong (int dir, double x1, double f1, boolean isSerious){
        if (dir == -1 && _twistyLittlePassagePoint1 == null){
            // not enough points to do twistyLittlePassage
            addTwistyPassagePoint();
            return;
        } // if
        if (x1 == 0d) throw new DidNotConvergeException();

        if (dir == -1 || _d [dir] <= 0){
            // no valid second derivative; we need a 3d point to
            // estimate a minimum
            // The third (x2) will be at 2x or -x, whichever would be "downhill"
            double x2 = (_fx > f1) ? 2.0*x1 : -x1;
            minimizeAlong (dir, x1, f1, x2, evalLinear (dir, x2), isSerious);
            return;
        } // if
        
        double numerator = _fx - f1;
        double denominator = _d [dir] * x1;
        if (tooSmall (denominator, numerator)){
            // second derivative estimate is worthless
            _d [dir] = 0;
            minimizeAlong (dir, x1, f1, isSerious); // try again without the 2d derivative
            return;
        } // if
        // this formula is mine, from simple algebra
        double x2 = x1 - numerator/denominator;
        x2 = wiggle (x2, x1, x1);
        double f2 = evalLinear (dir, x2);
        if (_fx < f2 || f1 < f2){
            // the predicted minimum is worse than the other points.
            // obviously it's not a good parabola with the given 2d derivative
            minimizeAlong (dir, x1, f1, x2, f2, isSerious);
            return;
        } // if
        update2Derivative (dir, x1, f1, x2, f2);
        extrapolateAlong(dir, x2, _x);
        _fx = f2;
        addTwistyPassagePoint();
    } // minimizeAlong
 
    // minimize along direction dir using the current point (_x) as zero,
    // and 2 points (x1, f1) and (x2, f2)
    private void minimizeAlong (int dir, double x1, double f1,
      double x2, double f2, boolean isSerious){
        // find the predicted minimum of the parabola
        // This algorithm is from Jack Crenshaw,
        // www.embedded.com/showArticle.jhtml?articleID=9900284
        // though the formula is written wrong there; see the linked
        // program ftp://ftp.embedded.com/pub/2001/Mintest2.cpp
        double m0 = (f1 - _fx)/x1;
        double m1 = (f2 - _fx) /x2;
        if (tooSmall (m1 - m0, f2 - f1)){
            // m1 and m0 are almost the same, meaning the points are colinear.
            // Keep stepping downhill until we move back up again
            if (f2 >= _fx){
                // x2 should be downhill, if it is not, try a
                // larger step until the points are no longer colinear
                x2 = GOLDEN*x2;
                f2 = evalLinear (dir, x2);
            }
            minimizeAlong (dir, x2, f2, isSerious);
            return;
        } // if
        double newX = (x1 + x2 - (f2 - f1)/(m1 - m0)) / 2.0;
        newX = wiggle (newX, x1, x2);
        double newF = evalLinear (dir, newX);
        if (_fx < newF || f1 < newF || f2 < newF){
            // the predicted minimum is worse than the other points.
            // obviously we're going nowhere. Try a new point halfway in
            newX /= 2.0;
            newF = evalLinear (dir, newX);            
        } // if
        // now find the minimum of all the points we have tried
        // there is some redundancy with the if test immediately above.
        double xmin = 0.0;
        double fmin = _fx;
        if (newF < fmin) {xmin = newX; fmin = newF;}
        if (f2   < fmin) {xmin = x2  ; fmin = f2  ;}
        if (f1   < fmin) {xmin = x1  ; fmin = f1  ;}
        if (xmin != 0.0){
            update2Derivative (dir, x1, f1, x2, f2);
            extrapolateAlong(dir, xmin, _x);
            _fx = fmin;
            addTwistyPassagePoint();
            return;
        } // if
        // if we reach here, then _xmin (or x==0) has the lowest function value
        // so far. The other three points we have tested are x1, x2, and newX
        if (!isSerious) return; // just give up at this point
        
        // if we reach here, then isSerious == true, but _fx=f(0) < f1,f2 and newF
        // Either we have the minimum bracketed (at least one of x1,x2,newX is < 0
        // and at least one is > 0 or all three are on the same side
        if (x1*x2 > 0 && x1+newX > 0){
            // all on the same side; find the closest one
            if (Math.abs (x1) > Math.abs (x2)){
                x1 = x2;
                f1 = f2;
            } // if
            if (Math.abs (x1) > Math.abs (newX)){
                x1 = newX;
                f1 = newF;
            } // if
            minimizeAlongNotBracketed (dir, x1, f1);
        }else{
            // we have a bracket. Now find the closest points
            if (x1*newX > 0 && Math.abs (x1) > Math.abs (newX)){
                x1 = newX;
                f1 = newF;
            }
            if (x2*newX > 0 && Math.abs (x2) > Math.abs (newX)){
                x2 = newX;
                f2 = newF;
            }
            if (x1*x2 < 0){
                // x1 and x2 are the brackets
                minimizeAlongBracketed (dir, x1, f1, x2, f2);
            }else{
                // newX forms one bracket. Find the closest of the others
                if (Math.abs (x1) > Math.abs (x2)){
                    x1 = x2;
                    f1 = f2;
                } // if
                minimizeAlongBracketed (dir, x1, f1, newX, newF);
            } // if
        } // if
    } // minimizeAlong

    // find a low point or a bracketing point from x1
    private void minimizeAlongNotBracketed (int dir, double x1, double f1){
        double x2 = -GOLDEN * x1; // golden section search as in Numerical Recipes %%%
        double f2 = evalLinear (dir, x2);
        if (f2 < _fx){
            // We found a low point!
            update2Derivative (dir, x1, f1, x2, f2);
            extrapolateAlong(dir, x2, _x);
            _fx = f2;
            addTwistyPassagePoint();
            return;
        } // if
        // if we are here then f2 >= _fx, and x1*x2 < 0. That makes a bracket!
        minimizeAlongBracketed (dir, x1, f1, x2, f2);
    } // minimizeAlongNotBracketed

    // find a low point between x1 and x2. We are guaranteed that x1*x2 < 0
    // and f1 >= _fx and f2 >= _fx, so a minimum must exist.
    // Use golden section search to find it. If we reached here, then
    // parabolic interpolation failed, so we will not bother using it here.
    private void minimizeAlongBracketed (int dir, double x1, double f1,
      double x2, double f2){
        if (!(x1*x2 < 0 && f1 >= _fx && f2 >= _fx)) throw new DidNotConvergeException();
        // calculate the tolerance along dir
        double tol;
        if (dir > -1){
            tol = MatMath.dot (_U.getColumn(dir), _minDeltaX);
        }else{
            tol = MatMath.norm2 (_minDeltaX);
        } // if
        // set x1 < 0 < x2
        if (x1 > 0){
            double swap = x1;
            x1 = x2;
            x2 = swap;
            swap = f1;
            f1 = f2;
            f2 = swap;
        } // if
        while (x2-x1 > tol){
            // divide the largest section
            if (x2 > -x1){
                x2 /= GOLDEN;
                f2 = evalLinear(dir, x2);
                if (f2 < _fx){
                    update2Derivative (dir, x1, f1, x2, f2);
                    extrapolateAlong(dir, x2, _x);
                    _fx = f2;
                    addTwistyPassagePoint();
                    return;
                } // if
            }else{
                x1 /= GOLDEN;
                f1 = evalLinear(dir, x1);
                if (f1 < _fx){
                    update2Derivative (dir, x1, f1, x2, f2);
                    extrapolateAlong(dir, x1, _x);
                    _fx = f1;
                    addTwistyPassagePoint();
                    return;
                } // if
            } // if
        } // while
        // if we reach here, we failed to improve on x=0
        return;
    } // minimizeAlongBracketed

    
    private double estimateStep (int dir){
        // use the second derivative (if available) to estimate the smallest
        // distance that will produce a significant change in the function
        if (dir != -1 && _d [dir] > 0){
            double step = FACTOR * Math.sqrt ( Math.abs (_fx/_d [dir]));
            if (step > _lastStep/FACTOR) step = _lastStep/FACTOR; // not too large
            return step;
        } // if
        return FACTOR * _lastStep;
    } // estimateStep
    
    // update the estimate of the second derivative using the three points,
    // (0, _fx) & (x1, f1) & (x2, f2)
    private void update2Derivative (int dir, double x1, double f1, double x2, double f2){
        if (dir != -1){
            double numerator1 = f2-f1;
            double denominator1 = x2-x1;
            if (tooSmall (denominator1, numerator1)){
                _d [dir] = 0.0;
                return;
            } // if
            double numerator2 = f2-_fx;
            double denominator2 = x2; /* really x2 - 0.0 */
            if (tooSmall (denominator2, numerator2)){
                _d [dir] = 0.0;
                return;
            } // if
            numerator1 = numerator1/denominator1 - numerator2/denominator2;
            denominator1 = x1; /* really x1 - 0.0 */
            if (tooSmall (denominator1, numerator1)){
                _d [dir] = 0.0;
                return;
            } // if
            _d [dir] = numerator1/denominator1;
            if (_d [dir] < 0.0) _d [dir] = 0.0;
        } // if        
    } // update2Derivative
    
    // evaluate the function along a given direction
    private double evalLinear (int dir, double dist){
        Vec result = new Vec_array (_n);
        extrapolateAlong (dir, dist, result);
        System.out.println("extrapolated along "+dir+" by "+dist+" from "+_x+" to "+result);
        return eval (result);
    } // evalLinear
    
    // from Crenshaw's algorithm
    // www.embedded.com/showArticle.jhtml?articleID=9900357
    // makes sure x is not too close (within 1% of the span)
    // from 0, limit1 and limit2
    private double wiggle (double x, double limit1, double limit2){
        double span = Math.max (Math.abs (limit1), Math.abs (limit2));
        span = Math.max (span, Math.abs (limit1-limit2));
        // make sure it's not too far off
        if (x > span/FACTOR) x = span/FACTOR;
        if (x < -span/FACTOR) x = -span/FACTOR;
        span *= FACTOR; // 1% of the distance covered by all the limits
        // make sure it's not too close
        x = wiggleWithSpan (x, 0, span);
        x = wiggleWithSpan (x, limit1, span);
        x = wiggleWithSpan (x, limit2, span);
        return x;
    } // wiggle
    
    // move x away from limit by at least span
    private double wiggleWithSpan (double x, double limit, double span){
        if (x < limit){
            return Math.min (x, limit-span);
        }else
            return Math.max (x, limit+span);
    } // wiggleWithSpan
    

    // adds _x to the list of points we are using for the twisty little passage step
    private void addTwistyPassagePoint(){
        if (_twistyLittlePassagePoint2 != null){
            if (_twistyLittlePassagePoint1 == null){
                _twistyLittlePassagePoint1 = _twistyLittlePassagePoint2.copy();
            }else{
                _twistyLittlePassagePoint1.set (_twistyLittlePassagePoint2);
            } // if
            _twistyLittlePassagePoint2.set (_x);
        }else{
            _twistyLittlePassagePoint2 = _x.copy();
        } // if
    } // addTwistyPassagePoint

    // calculates a new vector distance dist along direction dir from _x
    // assumes result is not null
    private void extrapolateAlong (int dir, double dist, Vec result){
        if (result != _x) result.set (_x);
        if (dir > -1){
            // a real direction, along _U[dir]
            for (int i = 0; i < _n; i++) result.set (i, _x.get (i) + dist * _U.get (i, dir));
        }else{
            // formula from PRAXIS. This is slightly inefficient, since it calculates
            // d1 and d2 each time even though the points may not change
            double d1 = 0;
            double d0 = 0;
            for (int i = 0; i < _n; i++){
                double diff = (_twistyLittlePassagePoint2.get (i) - _x.get(i));
                d1 += diff*diff;
                diff = (_twistyLittlePassagePoint1.get (i) - _twistyLittlePassagePoint2.get(i));
                d0 += diff*diff;
            } // for
            d1 = Math.sqrt (d1);
            d0 = Math.sqrt (d0);
            double qa = (dist * (dist - d1)) / (d0 * (d0 + d1));
            double qb = ((dist + d0) * (d1 - dist)) / (d0 * d1);
            double qc = (dist * (dist + d0)) / (d1 * (d0 + d1));
            for (int i = 0; i < _n; i++){
                result.set (i, _twistyLittlePassagePoint1.get (i) * qa +
                               _x.get (i) * qb +
                               _twistyLittlePassagePoint2.get (i) * qc);
            } // for
            System.out.println("twisty little extrapolate , result = "+result);
        } // if
    } // extrapolateAlong

    // true if the ratio of a/b is less than SMALL
    private boolean tooSmall (double a, double b){
        if (a == 0) return true;
        if (b == 0) return false;
        double ratio = a/b;
        if (ratio < 0) return ratio > -SMALL;
        else return ratio < SMALL;
    } // tooSmall 
     
    /** test suite */
    public static void main (String[] args){
        ScalarFunction f = new ScalarFunction(){
            public double eval (Vec x){
                double result = 0.0;
                for (int i = 0; i < x.size(); i++){
                    result += Math.abs(x.get(i))*Math.abs(x.get(i));
                } // for
                return result;
            } // eval
        }; // new ScalarFunction
        VecMinimizer min = new Brent (f);
        min.setEpsilon (1e-3);
        Vec x = new Vec_array(2);
        x.set (0, 3.0);
        x.set (1, -3.0);
        System.out.println("Starting: " + x);
        min.minimize (x);
        System.out.println("Ending: " + x);
        System.out.println("Function calls:" + min.numFuncEvals());
    } // main
    
} // Brent 