/*
 * LevenbergMarquardtBroyden.java
 *
 * Created on Auguat 30, 2005, 10:19 PM
 *
 *  Copyright 2004 Daniel Wachsstock
 *  The contents of this file are subject to the Sun Public License
 *  Version 1.0 (the License); you may not use this file except in
 *  compliance with the License. A copy of the License is available at
 *  http://www.sun.com/ or http://www.geocities.com/tenua4java/license.html
 */

package nr.minimizer;
import nr.*;

/** Minimizes a function that generates a Vec by
 *  nonlinear least squares minimization of the difference
 *  between a given Vec of data and the function results.
 *  Uses the method of Levenberg-Marquardt (Numerical
 *  Recipes section 15.5). Assumes the
 *  sigmas (standard deviations) on the data are identically 1.
 *  Since the sigmas are implied, the covariance matrix says
 *  nothing about the goodness-of-fit, and this class
 *  does not return it.
 *  Calculates the Jacobian by symmetric finite differencing
 *  the first time, then updates it by Broyden's "good method"
 *  (Numerical Recipes equation 9.7.17).
 *  Converges when the computed sum-of-squares changes by
 *  less than epsilon.
 *  @author Daniel Wachsstock
 */
public class LevenbergMarquardtBroyden implements VecMinimizer{
    private final VecFunction _f;
    private final double[] _data;
    private int _nData; // the number of data points
    private int _n; // the number of parameters
    private double _epsilon = 0.001;
    private int _numFuncEvals = 0;
    private double _lambda; // the L-M lambda factor
    private double _chisq; // sum-of-squares
    private Mat_wrapper _alpha; // the curvature matrix
    private Vec_wrapper _beta; // see the algorithm for these variables
    private final Vec_wrapper _y; // the result of _f(_params)
    private Vec_wrapper[] _dy; // the jacobian of _y (dy/dparam for each param)
    private int _broydenCount=0; // the number of times we have used
    private double[] _deltaX; // the change in the parameters, for Broyden's
    private double[] _deltaY; // the change in y, for Broyden's method
      // Broyden's good method

    // the number of times to run the algorithm (in case it gets stuck the
    // first time).
    static private final int TRIES = 1;
    // the number of times we use Broyden's method before
    // reverting to the finite-difference Jacobian
    static private final int BROYDEN_TRIES=10;

    /** Create a new instance of LevenbergMarquardtBroyden.
     *  @param data the data to which to minimize the sum of squared
     *  differences.
     *  @param f the function that takes a vector of parameters
     *  and outputs a vector of simulated data
     */
    public LevenbergMarquardtBroyden (Vec data, VecFunction f){
        _data = data.asArray();
        _nData = _data.length;
        _f = f;
        _y = new Vec_wrapper (new double[_data.length]);
    } // constructor

    public double minimize (Vec v){
        nr.Vec params = v.copy();
        _n = params.size();
        _alpha = new Mat_wrapper (new double[_n][_n]);
        _beta  = new Vec_wrapper (new double[_n]);
        _dy = new Vec_wrapper[_n];
        for (int param = 0; param < _n; ++param)
          _dy[param] = new Vec_wrapper (new double[_nData]);
        for (int i = 0; i < TRIES; ++i){
            _lambda = 0.001;
            _broydenCount = 0;
            LMCoefficients(params);
            while (!doLM(params));
        } // for
        v.set (params);
        return _chisq;          
    } // minimize

    // one round of the Levenberg-Marquardt algorithm.
    // Adjusts _lambda appropriately, and returns true if
    // it converged
    private boolean doLM (nr.Vec params){
        if (_chisq == 0) return true; // zero error; can't get any better
        double oldChisq = _chisq;
        for (int i = 0; i < _n; ++i){
            // augment the diagonal
            _alpha.data[i][i] *= (1d + _lambda);
        } // for
        // solve alpha*x = beta, with the result put into beta
        (new LUDecomposition (_alpha)).solve (_beta);
        // develop the new parameters
        Vec tryParams = params.copy();
        for (int i=0; i<_n; ++i) tryParams.set(i, tryParams.get(i)+_beta.get(i));
        LMCoefficients(tryParams);
        if (_chisq <= oldChisq){
            // improved it!
            _lambda *= 0.1;
            params.set (tryParams);
            return isConverged(oldChisq, _chisq);
        }else{
            // no good
            _lambda *= 10;
            _chisq = oldChisq;
            return false;
        } // if
    } // doLM

    // calculate alpha, beta and chisq
    private void LMCoefficients (nr.Vec params){
        eval (params);
        // set beta and the lower half of alpha (it's symmetric;
        // we'll set the upper half later)
        for (int i = 0; i < _n; ++i){
            for (int j = 0; j <= i; ++j){
                _alpha.data[i][j] = 0d;
            } // for j
            _beta.data[i] = 0d;
        } // for
        _chisq = 0d;
        for (int datum = 0; datum < _nData; ++datum){
            double delta = _data[datum]-_y.data[datum];
            for (int k = 0; k < _n; ++k){
                // Eq. 15.5.6 and 15.5.8
                _beta.data [k] += _dy[k].data[datum]*delta;
                for (int L = 0; L <= k; ++L){
                    // Eq. 15.5.11
                    _alpha.data[k][L] += _dy[k].data[datum]*_dy[L].data[datum];
                } // for l
            } // for k
            _chisq += delta*delta;
        } // for datum
        // fill in the upper half of alpha
        for (int i = 0; i < _n; ++i){
            for (int j = 0; j < i; ++j){
                _alpha.data[j][i] = _alpha.data[i][j];
            } // for j
        } // for
    } // LMCoefficients

    static final double SMALL = 1e-5; // double-precision accuracy ^ 1/3
      // See NR eq. 5.7.8

    // calculate the function and the jacobian.
    // I can't use _f.jacobian, since that assumes a
    // square matrix, but we use the same algorithm here
    // (symmetric finite differencing, NR eq. 5.7.7) for the
    // initial evaluation, then Broyden's method
    // B += (deltaY - B * deltaX) outerProduct deltaX / (deltaX*deltaX)
    // we could probably do this more efficiently by updating a QR
    // decomposition of the Jacobian (Numerical Recipes sections 9.7 and 2.10)
    // but inverting an _n x _n is a small part of the evaluation (the big
    // thing is _f.eval)
    private void eval (nr.Vec params){
        _f.eval (params, _y);
        _numFuncEvals++;
        if (_broydenCount == 0){
            evalJacobian (params);
        }else{
            evalBroyden (params);
        } // if
        _broydenCount++;
        if (_broydenCount > BROYDEN_TRIES) _broydenCount=0;
        _deltaX = params.asArray();
        _deltaY = _y.asArray();
    } // eval
    
    private void evalJacobian (nr.Vec params){
        Vec y1 = new Vec_wrapper(new double[_nData]);
        Vec y2 = new Vec_wrapper(new double[_nData]);
        for (int param = 0; param < _n; ++param){
            double oldParam = params.get(param);
            double h = oldParam*SMALL;
            if (h == 0) h = SMALL; // watch for underflow
            // find f(x-h)
            params.set (param, oldParam - h);
            // trick to avoid roundoff errors; see NR eq. 5.7.4
            double h1 = oldParam - params.get(param);
            _f.eval (params, y1);
            // find f (x+h)
            params.set (param, oldParam + h);
            double h2 = params.get(param) - oldParam;
            _f.eval (params, y2);
            // calculate the derivative
            for (int i = 0; i < _nData; ++i){
                _dy[param].data[i] = (y2.get(i)-y1.get(i))/(h1+h2);
            } // for i
        } // for param
        _numFuncEvals += 2*_n;                
    } // evalJacobian
    
    private void evalBroyden (nr.Vec params){
        // find the deltas
        for (int i = 0; i < _n; ++i) _deltaX[i] -= params.get(i);
        double deltaXSquared = dot (_deltaX, _deltaX);
        double[] product = times (_dy, _deltaX);
        for (int i = 0; i < _nData; ++i){
            _deltaY[i] -= _y.get(i) + product[i];
            _deltaY[i] /= deltaXSquared;
        } // for
        // the outer product
        for (int i = 0; i < _n; ++i){
            for (int j = 0; j < _nData; ++j){
                _dy[i].data[j] += _deltaY[j]*_deltaX[i];
            } // for j
        } // for i
    } // evalBroyden
    
    // vector dot product
    double dot (double[] a, double[] b){
        double result = 0;
        for (int i = 0; i < a.length; ++i) result += a[i]*b[i];
        return result;
    } // dot

    // matrix multiplication
    double[] times (nr.Vec[] a, double[] b){
        double[] result = new double[_nData];
        for (int i = 0; i < _nData; ++i){
            result[i] = 0;
            for (int j = 0; j < _n; ++j){
                result[i] += a[j].get(i) * b[j];
            } // for j
        } // for i
        return result;
    } // times
    
    private boolean isConverged (double a, double b){
        double eps = _epsilon*a;
        if (eps < _epsilon) eps = _epsilon; // watch for _eps too small or a==0
        return (Math.abs (a-b) <= eps);
    } // isConverged
    
    public int numFuncEvals() {return _numFuncEvals; };
    public void setEpsilon (double epsilon) { _epsilon = epsilon; };
    public double getEpsilon () { return _epsilon; }

} // LevenbergMarquardt