/*
**  SimAnn
**
**  Purpose:
**    Implement Simulated Annealing in Ox following 
**      Goffe, William L., Gary D. Ferrier, and John Rogers (1994).  
**      Global Optimization of Statistical Functions with Simulated Annealing.  
**      Journal of Econometrics, 60(1/2):65 99.
**      http://emlab.berkeley.edu/Software/abstracts/goffe895.html
**
**  Original description (referring to FORTRAN code)

PROGRAM SIMANN
This file is an example of the Corana et al. simulated annealing
algorithm for multimodal and robust optimization as implemented
and modified by Goffe, Ferrier and Rogers. Counting the above line
ABSTRACT as 1, the routine itself (SA), with its supplementary
routines, is on lines 232-990. A multimodal example from Judge et al.
(FCN) is on lines 150-231. The rest of this file (lines 1-149) is a
driver routine with values appropriate for the Judge example. Thus, this
example is ready to run.

To understand the algorithm, the documentation for SA on lines 236-
484 should be read along with the parts of the paper that describe
simulated annealing. Then the following lines will then aid the user
in becomming proficient with this implementation of simulated
annealing.

Learning to use SA:
    Use the sample function from Judge with the following suggestions
to get a feel for how SA works. When you've done this, you should be
ready to use it on most any function with a fair amount of expertise.
  1. Run the program as is to make sure it runs okay. Take a look at
     the intermediate output and see how it optimizes as temperature
     (T) falls. Notice how the optimal point is reached and how
     falling T reduces VM.
  2. Look through the documentation to SA so the following makes a
     bit of sense. In line with the paper, it shouldn't be that hard
     to figure out. The core of the algorithm is described on pp. 68-70
     and on pp. 94-95. Also see Corana et al. pp. 264-9.
  3. To see how it selects points and makes decisions about uphill
     and downhill moves, set IPRINT = 3 (very detailed intermediate
     output) and MAXEVL = 100 (only 100 function evaluations to limit
     output).
  4. To see the importance of different temperatures, try starting
     with a very low one (say T = 10E-5). You'll see (i) it never
     escapes from the local optima (in annealing terminology, it
     quenches) & (ii) the step length (VM) will be quite small. This
     is a key part of the algorithm: as temperature (T) falls, step
     length does too. In a minor point here, note how VM is quickly
     reset from its initial value. Thus, the input VM is not very
     important. This is all the more reason to examine VM once the
     algorithm is underway.
  5. To see the effect of different parameters and their effect on
     the speed of the algorithm, try RT = .95 & RT = .1. Notice the
     vastly different speed for optimization. Also try NT = 20. Note
     that this sample function is quite easy to optimize, so it will
     tolerate big changes in these parameters. RT and NT are the
     parameters one should adjust to modify the runtime of the
     algorithm and its robustness.
  6. Try constraining the algorithm with either LB or UB.
**
**  Date:
**    2/10/2002
**
**  Author of Ox version:
**    Charles Bos
*/
#include <oxstd.h>      // Include the Ox standard library header
#include <oxfloat.h>
#import <maximize>
#import <packages/maxsa/maxsa>

/*
** This subroutine is from the example in Judge et al., The Theory and
** Practice of Econometrics, 2nd ed., pp. 956-7. There are two optima:
** F(.864,1.23) = 16.0817 (the global minumum) and F(2.35,-.319) = 20.9805.
*/
fnc(const vP, const adFunc, const avScore, const amHess)
{
  decl vY, vX2, vX3;
  
  vY= <4.284; 4.149; 3.877; 0.533; 2.211; 2.389; 2.145; 3.231; 1.998;
       1.379; 2.106; 1.428; 1.011; 2.179; 2.858; 1.388; 1.651; 1.593;
       1.046; 2.152>;
  vX2= <.286; .973; .384; .276; .973; .543; .957; .948; .543; .797;
        .936; .889; .006; .828; .399; .617; .939; .784; .072; .889>;

  vX3= <.645; .585; .310; .058; .455; .779; .259; .202; .028; .099;
        .142; .296; .175; .180; .842; .039; .103; .620; .158; .704>;       

  // Return the negative function, as MaxSA maximizes
  adFunc[0]= -sumsqrc(vP[0]+vP[1]*vX2+sqr(vP[1])*vX3-vY);

  return !ismissing(adFunc[0]);
}

main()
{
  decl ir, vP, dFunc, dT, dRt, iNS, 
       iNT, iNEps, vLo, vHi, vC, vM;

//  Note start at local, but not global, optima of the Judge function.
  vP= < 2.354471; -0.319186>;

//  Parameters used to compute step sizes
  dRt= .5;
  iNS= 20;
  iNT= 5;
  vM= 1;
  vC= 2;
  dT= 5;

// Run a preliminary estimation using MaxBFGS, to show trouble with
//   local optimum
  println ("\nEstimating using MaxBFGS");
  ir= MaxBFGS(fnc, &vP, &dFunc, 0, TRUE);
  println (MaxConvergenceMsg(ir), " at parameters ", vP', 
           "with function value ", double(dFunc));

// Estimate using MaxSA, to find global optimum
  println ("\nEstimating using MaxSA");
//  MaxSAControl(1e6, 1);
  MaxSAControlStep(iNS, iNT, dRt, vM, vC);
  ir= MaxSA(fnc, &vP, &dFunc, &dT);
  println (MaxSAConvergenceMsg(ir), " at parameters ", vP', 
           "with function value ", double(dFunc), " and temperature ",
           double(dT));

// Estimate using MaxSA, to find global optimum
  println ("\nReestimating using MaxSA, with bounds");
//  Note start at local, but not global, optima of the Judge function.
  vP= < 2.354471; -0.319186>;
  dT= 5;
  vLo= <0; -1>; 
  vHi= <5; 2>;
//  MaxSAControl(1e6, 1);
  MaxSAControlStep(iNS, iNT, dRt, vM, vC);
  ir= MaxSA(fnc, &vP, &dFunc, &dT, vLo, vHi);
  println (MaxSAConvergenceMsg(ir), " at parameters ", vP', 
           "with function value ", double(dFunc), " and temperature ",
           double(dT));
}