/*
**  MCStack
**
**  Purpose:
**    Run sampling on a model on Stackloss data
**
**  Date:
**    17/6/2004
**
**  Author:
**    Charles Bos
*/
#include <oxstd.h>      // Include the Ox standard library header
#include "include/oxprobig.ox"
#include <packages/gnudraw/gnudraw.h>
#include <packages/mc2pack/mc2pack.h>
#include <packages/oxutils/oxutils.h>

// Static declaration
static decl s_MX_mX, s_MX_vY, s_MX_vZ, s_MX_vSumZ, s_MX_iC;
                    
// Function declaration
InitData(const avY, const avMu, const bAug, const sDatafile);
AvgLnPriorStack(const vTheta, const adLnAvgP, const avScore, const amHessian);
AvgLnLiklStack(const vTheta, const adLnAvgP, const avScore, const amHessian);
AvgLnPostStack(const vTheta, const adLnAvgP, const avScore, const amHessian);
fnRanPriorStack(const iQ);
fnRanCondGG_Z(const vInd, const avP);
fnRanCondGG_Beta(const vInd, const avP);
                
main()
{
  decl ir, iP, iMethod, iSize, iSeed, bAug, bOpt, bNoSim, dMHf, dRW, bNoBeta, vMu, mS2, mBounds, vY, mTheta, vW, sVarnames,
       sOutbase,  mcmc, mX, mY, dTime, mRNE, vML;

  // Initialise
  dTime= timer();
  sVarnames= {"Air Flow", "Water Temp", "Acid Conc", "Sigma", "k",
              "Alpha", "zAug"};
  iMethod= MC_APS; iSize= 1000; bAug= bOpt= s_MX_vSumZ= s_MX_iC= 0;
  dMHf= 0.9, dRW= 0; iSeed= 1234; sOutbase= "mcs";
  mBounds= <-30, 30;  
            -30, 30;
            -30, 30;
              0.5, 10;
              1, 20;
              0.02, 1;
             -1, 2>;
  iP= rows(mBounds)-4; // Size of vector Beta

  setseed(iSeed);    
  println ("MCStack\n=======\nCommand line arguments:");
  println ("  nogr skip drawing graphs\n  m    method, 0=mh, 1=is, 2=aps, 3=apis, 4=gg");
  println ("  s    size of final sample, default= 1000");
  println ("  f    mahalanobis fraction, 0.9 is default");
  println ("  rw   use rw candidate, with fraction of covariance");
  println ("  nb   skip beta conditional density");
  println ("  ns   skip simulation");
  println ("  aug  use augmented Gibbs sampling");
  println ("  opt  optimize posterior at start");
  println ("  base output base");
  println ("  seed random seed");
  
  if (ReadArg(&ir, "nogr", 1))
    DrawAdjust(ADJ_SHOW, FALSE);
  ReadArg(&iMethod, "m", 1);
  ReadArg(&iSize, "s", 1);
  ReadArg(&iSeed, "seed", 1);
  ReadArg(&sOutbase, "base", -1);
  ReadArg(&dRW, "rw", 1);
  ReadArg(&dMHf, "f", 1);
  ReadArg(&bNoBeta, "nb", 0);
  ReadArg(&bNoSim, "ns", 0);
  if (ReadArg(&bAug, "aug", 0))  
    iMethod= MC_GG;  
  ReadArg(&bOpt, "opt", 0);
  sOutbase= sprint("excl/", sOutbase, "a", bAug, "o", bOpt, "s",
    round(log10(iSize)), "f", 10*dMHf, "m", iMethod, "r", 10*dRW, "b", 1-bNoBeta);
  println ("\nOutput into\n  ", sOutbase);

  InitData(&vY, &vMu, bAug, "data/stackloss.mat");
  mS2= unit(sizerc(vMu));

  // Use package
  mcmc= new MC2Pack();
  mcmc.SetMethod(iMethod);
  mcmc.SetParNames(sVarnames);
  if (dRW != 0)
    mcmc.SetCandidate(MC_CRW, dRW);
  else  
    mcmc.SetCandidate(MC_CSTUD, {1, 4});
  mcmc.SetInfo(.1);
  mcmc.SetIntegration(1, 1, TRUE, TRUE);
  mcmc.SetLimits(mBounds[:iP+2+bAug][]);    // Bounds on sampling region
  mcmc.SetSample(1000~iSize, .1*iSize, 0, 100, .1*iSize);
  mcmc.SetMahalanobisFraction(dMHf);
  mcmc.SetPosterior(AvgLnPostStack);
  if (bAug)
    {
      mcmc.SetConditional(iP+3, fnRanCondGG_Z);
      if (!bNoBeta)
        mcmc.SetConditional(range(0, iP-1), fnRanCondGG_Beta);
    }  
  mcmc.SetLocationScale(&vMu, &mS2, sizerc(vY), bOpt);
  mcmc.SetOutput(sOutbase, TRUE);
  if (!bNoSim)
    mcmc.Simulate();
  
  // Extract results
  ir= mcmc.GetDraws(&mTheta, &vW);

  // Plot results
  SetDrawWindow("");
  [mX, mY]= DrawDensity(0, mTheta[:iP+2][], mcmc.GetParNames(), 1, 0, 0, 0, 0, 0, 2, vW);
  DrawTitle(-1, sOutbase);
  SaveDrawWindow(sOutbase~"dens.plb");
  ShowDrawWindow();

  DrawBivDensity(0, mTheta[iP+1:iP+2][], mcmc.GetParNames()[iP+1:iP+2][], 1, 0, 0, 1, vW);         
  DrawBivDensity(1, mTheta[iP+1:iP+2][], mcmc.GetParNames()[iP+1:iP+2][], 1, 0, 0, 2, vW);
  if (vW == 1)
    DrawAcf(2, mTheta, mcmc.GetParNames(), 100, 1, 0, 0, 2, 2, 1);
  if (bAug)
    DrawBivDensity(3, mTheta[iP+2:iP+3][], mcmc.GetParNames()[iP+2:iP+3], 1, 0, 0, 2);
  DrawTitle(-1, sOutbase);
  SaveDrawWindow(sOutbase~"res.plb");
  ShowDrawWindow();

  if (bAug)
    {
      DrawMatrix(0, s_MX_vSumZ/s_MX_iC, "pZ", 1, 1, 1);
      DrawAdjust(ADJ_INDEX, 1);
      DrawTitle(-1, sOutbase);
      SaveDrawWindow(sOutbase~"pz.plb");
      ShowDrawWindow();
    }  

  // Perform RNE computations
  mcmc.SetTaper(.05);
  mRNE= mcmc.GetRNE();

  // Perform marginal likelihood computations
  [mX, mY]= DrawDensity(0, mTheta, mcmc.GetParNames(), 1, 0, 0, 0, 0, 0, 2, vW);
  CloseDrawWindow();  
  vMu= diagonal(mX[][limits(mY')[3][]'])';    // Extract the mode
  mcmc.SetRanPrior(fnRanPriorStack);
  mcmc.SetPrior(AvgLnPriorStack);
  mcmc.SetLikelihood(AvgLnLiklStack);
  
  vML= mcmc.Marglik(MC_MLKERN, vMu)~
       mcmc.Marglik(MC_MLHM, 1)~
       mcmc.Marglik(MC_MLGG, vMu, .1*iSize)~
       0~ // mcmc.Marglik(MC_MLINT, 20)~    // Takes enormous time
       mcmc.Marglik(MC_MLPR, .1*iSize)~
       (bAug == 0 ? mcmc.Marglik(MC_MLIS, vMu, M_NAN, .1*iSize) : 0);

  fopen("excl/mcstack.out", "la");
  println ("\nResults for ", sOutbase);
  print ("s= ", iSize, ", MHf= ", dMHf, ", aug= ", bAug, ", r= ", dRW,
         ", b= ", 1-bNoBeta, ", m= ", iMethod, ", seed= ", iSeed);
  print ("%c", {"Mean", "sDev", "Min", "Max", "Mode", "r100", "RNE"}, 
         "%r", sVarnames, "%cf", {"%10.3f"},
         meanisr(mTheta[:iP+2][], vW)~sqrt(varisr(mTheta[:iP+2][], vW))~
         limits(mTheta[:iP+2][]')[:1][]'~
         vMu[:iP+2]~
         (acfq(mTheta[:iP+2][]', 100))'~
         mRNE[:iP+2]);

  print ("%c", {"ML"}, 
         "%r", {"Kern", "HM", "GG", "Int", "Pr", "IS"}, "%cf",
         {"%10.3f"}, vML');

  println ("Time passed: ", timespan(dTime));         
  fclose("l");         
}

/*
**  Initialize()
**
*/
InitData(const avY, const avMu, const bAug, const sDatafile)
{
  decl vP, iN, mX, ir, dS, vBeta, dAlpha, dK;

  mX= loadmat(sDatafile);
  s_MX_mX= mX[:2][];
  avY[0]= s_MX_vY= mX[3][];
  iN= columns(s_MX_vY);

  ir= olsr(s_MX_vY, s_MX_mX, &vBeta);
  dS= sqrt(varr(s_MX_vY - vBeta*s_MX_mX));
  
  dAlpha= 0.1;
  dK= 1.5;

  avMu[0]= vBeta'|dS|dK|dAlpha;
  if (bAug)
    {
      s_MX_vZ= (ranu(1, iN) .< dAlpha);
      avMu[0]|= meanr(s_MX_vZ);
    }  
}

/*
**  AvgLnPriorStack(const vTheta, const adLnAvgPr, const avScore, const amHessian)
**
**  Purpose:
**    Compute prior 
*/
AvgLnPriorStack(const vTheta, const adLnAvgPr, const avScore, const amHessian)
{
  decl iP, iN, vBeta, dS, dK, dAlpha, dLnPr;
  
  iP= rows(s_MX_mX);
  iN= columns(s_MX_mX);
  vBeta= vTheta[:iP-1];
  dS= fabs(vTheta[iP]);
  dK= vTheta[iP+1];
  dAlpha= vTheta[iP+2];

  // Standard normal prior on Beta (also hardcoded into RanCond_Beta)
  dLnPr= -0.5*vBeta'vBeta - 0.5*iP*log(M_2PI);
  // IG(3, .05) on sigma
  dLnPr+= lndensigamma(dS, 3, .05);
  // Uniform on k, between the bounds as specified elsewhere
  dLnPr+= 0;
  // Beta(0.5, 1) prior on alpha
  dLnPr+= log(densbeta(dAlpha, 0.5, 1));
  if (sizerc(vTheta) > iP+3)  // Augmented model
    // Bernoulli prior on Z|alpha
    dLnPr+= sumr(s_MX_vZ*log(dAlpha) + (1-s_MX_vZ)*log(1-dAlpha));
    
  adLnAvgPr[0]= dLnPr/iN;
  
  return !ismissing(adLnAvgPr[0]);
}

/*
**  AvgLnLiklStack(const vTheta, const adLnAvgP, const avScore, const amHessian)
**
**  Purpose:
**    Compute the logarithm of a mixture density, divided by the size
**    of Y
**
**  Inputs:
**    vTheta    iK x 1 matrix with coefficients,
**              in order p, sigma, c1, c2, 
**    amHessian Not used
**
**  Outputs:
**    adLnAvgP  double, average logarithm of likelihood value at vTheta 
**    avScore   if !0 on input: iK x 1 matrix with first derivatives at
**              vTheta (not used?)
**  Return value:
**    1: successful, 0: function evaluation failed
*/
AvgLnLiklStack(const vTheta, const adLnAvgP, const avScore, const amHessian)
{
  decl ir, iP, iN, vBeta, dS, dK, dAlpha, vU, vS, vL, vU2s, 
       dL, dLnPost, dLnPr, bAug;

  ir= 0;
  adLnAvgP[0]= M_NAN;

  iP= rows(s_MX_mX);
  iN= columns(s_MX_mX);
  vBeta= vTheta[:iP-1];
  dS= fabs(vTheta[iP]);
  dK= vTheta[iP+1];
  dAlpha= vTheta[iP+2];
  bAug= (sizerc(vTheta) > iP+3);

  if ((dAlpha < 0) || (dAlpha > 1) || (dK < 1))
    return 0;
 
  if (bAug)
    {
      vS= dK .^ s_MX_vZ .* dS;
      vU= s_MX_vY - vBeta'*s_MX_mX;
      vL= -log(vS) - 0.5*sqr(vU./vS);

      dLnPost= meanr(vL)-0.5*log(M_2PI);
    }  
  else
    {
      vU= s_MX_vY - vBeta'*s_MX_mX;
      vU2s= 0.5*sqr(vU/dS);
      vL= (1-dAlpha)*exp(-vU2s)+dAlpha*exp(-vU2s/sqr(dK))/fabs(dK);
      dLnPost= meanr(log(vL))-0.5*log(M_2PI)-log(dS);
    }  

  adLnAvgP[0]= dLnPost;
  ir= (!isnan(adLnAvgP[0]) && !isdotinf(adLnAvgP[0]));
//   if (!ir)  
//     print (ir~adLnAvgP[0]~vTheta');

  return ir;
}  

/*
**  AvgLnPostStack(const vTheta, const adLnAvgP, const avScore, const amHessian)
**
**  Purpose:
**    Compute the logarithm of a mixture density, divided by the size
**    of Y
**
**  Inputs:
**    vTheta    iK x 1 matrix with coefficients,
**              in order p, sigma, c1, c2, 
**    amHessian Not used
**
**  Outputs:
**    adLnAvgP  double, average logarithm of posterior density value at vTheta 
**    avScore   if !0 on input: iK x 1 matrix with first derivatives at
**              vTheta (not used?)
**  Return value:
**    1: successful, 0: function evaluation failed
*/
AvgLnPostStack(const vTheta, const adLnAvgP, const avScore, const amHessian)
{
  decl ir, dLnPr, dLnLikl;

  ir= AvgLnPriorStack(vTheta, &dLnPr, 0, 0) && 
      AvgLnLiklStack(vTheta, &dLnLikl, 0, 0);
  adLnAvgP[0]= dLnPr + dLnLikl;

  return ir;
}  

/*
**  fnRanPriorStack(const iQ)
**
**  Purpose:
**    Sample from the prior density
*/
fnRanPriorStack(const iQ)
{
  decl mPrior, bAug, iP, iN;
  
  bAug= sizerc(s_MX_vZ) > 1;
  
  iP= rows(s_MX_mX);
  iN= columns(s_MX_mX);
  
  mPrior= new matrix [iP+3+bAug][iQ];
  mPrior[:iP-1][]= rann(3, iQ);
  mPrior[iP][]= ranigamma(1, iQ, 3, .05);
  mPrior[iP+1][]= ranu(1, iQ)*19+1;
  mPrior[iP+2][]= ranbeta(1, iQ, .05, 1);
  if (bAug)
    {
      if (iQ > 1)
        oxrunerror("Data augmentation in ranprior not implemented with iQ > 1...", 1);
      s_MX_vZ= (ranu(1, iN) .< mPrior[iP+2][0]);
    }  
              
  return mPrior;  
}

/*
**  fnRanCondGG_Z(const vInd, const avP)
**
**  Purpose:
**    Sample from the conditional density of the z-parameters
**
**  Input:
**    vInd      iP+3 by definition
**    avP[0]    iP+3 vector of parameters
**
**  Output:
**    avP       Element iP+3 changed to mean Z
**    s_MX_vZ   1 x iN vector with newly sampled indices
**
**  Return value:
**    ir        True if succeeded
**
**  Note:
**    Augmentation is implemented by handing over a single, extra
**    parameter to mc2pack, which is actually a dummy parameter. Every time
**    mc2pack tries to sample this one parameter, instead in a global
**    vector s_MX_vZ a full set of indices is generated from the conditional
**    density. This way, the package will not store the full parameter
**    vector of indices along with the other parameters, as that would take
**    too much memory/disk space.
*/
fnRanCondGG_Z(const vInd, const avP)
{
  decl iP, iN, vBeta, dS, dK, dAlpha, vP, vU;
  
  iP= rows(s_MX_mX);
  iN= columns(s_MX_mX);
  
  if (vInd != iP+3)
    oxrunerror("Incorrect index in fnRanCandGG_Z", 0);

  vBeta= avP[0][:iP-1];
  dS= fabs(avP[0][iP]);
  dK= avP[0][iP+1];
  dAlpha= avP[0][iP+2];
  
  vU= s_MX_vY - vBeta'*s_MX_mX;
  vP= dAlpha * densn(vU ./ (dK*dS)) ./
    (dAlpha * densn(vU ./ (dK*dS)) + dK*(1-dAlpha) * densn(vU ./ dS));
    
  s_MX_vZ= (ranu(1, iN) .< vP);
  s_MX_vSumZ+= s_MX_vZ;
  ++s_MX_iC;

  avP[0][vInd]= meanr(s_MX_vZ);
  
  return 1;
}

/*
**  fnRanCondGG_Beta(const vInd, const avP)
**
**  Purpose:
**    Sample from the conditional density of the beta-parameters
**
**  Input:
**    vInd      0:iP-1
**    avP[0]    iP+3 vector of parameters
**
**  Output:
**    avP       Element 0:iP-1 changed to new beta
**
**  Return value:
**    ir        True if succeeded
*/
fnRanCondGG_Beta(const vInd, const avP)
{
  decl iP, iN, dS, dK, dAlpha, mV, miXtVX, vBetaHat, mC, mS2Hat, mS2i,
       mS2, vBeta;
  
  iP= rows(s_MX_mX);
  iN= columns(s_MX_mX);
  if (vecr(vInd) != range(0, iP-1)')
    oxrunerror("Incorrect index in fnRanCandGG_Beta", 0);

  dS= fabs(avP[0][iP]);
  dK= avP[0][iP+1];
  dAlpha= avP[0][iP+2];
  
  mV= diag(dK .^ (-2*s_MX_vZ));
  miXtVX= invertsym(s_MX_mX*mV*s_MX_mX');
  vBetaHat= miXtVX*s_MX_mX*mV*s_MX_vY';
  mS2Hat= sqr(dS)*miXtVX;
  
  // Posterior variance, combination of ML variance and prior variance
  mS2i= invertsym(mS2Hat)+unit(iP);
  mS2= invertsym(mS2i);
  // Posterior expectation, combination of ML expectation and 0 prior expectation
  vBeta= mS2 * (mS2i*vBetaHat + 0);
  mC= choleski(mS2);
  avP[0][:iP-1]= vBeta + mC*rann(iP, 1);
 
  return 1;
}