/************************************************************************
*   U = mexchol(X)         X = U'*U 
*  Computes upper-triangular Cholesky factor U of full symmetric positive
*  definite matrix X. 
*
*  Uses loop unrolling, which makes it about twice as fast as MATLAB's
*  built-in "chol(X)", on Intel Pentium PRO 200 (this typically depends
*  on the system's architecture).
************************************************************************/

#include "mex.h"
#include <math.h>

#if !defined(SQR)
#define SQR(x) ((x)*(x))
#endif

#if !defined(MIN)
#define  MIN(A, B)   ((A) < (B) ? (A) : (B))
#endif
#if !defined(MAX)
#define  MAX(A, B)   ((A) > (B) ? (A) : (B))
#endif

/****************************************************************
   TIME-CRITICAL PROCEDURE -- r=realdot(x,y,n)
   Computes r=sum(x_i * y_i) using LEVEL 8 loop-unrolling.
*****************************************************************/
double realdot(const double *x, const double *y, const int n)
{
 int i;
 double r;

 r=0.0;
 for(r=0.0, i=0; i< n-7; i++){    /* LEVEL 8 */
   r+= x[i] * y[i]; i++;
   r+= x[i] * y[i]; i++;
   r+= x[i] * y[i]; i++;
   r+= x[i] * y[i]; i++;
   r+= x[i] * y[i]; i++;
   r+= x[i] * y[i]; i++;
   r+= x[i] * y[i]; i++;
   r+= x[i] * y[i];
 }
 if(i < n-3){                      /* LEVEL 4 */
   r+= x[i] * y[i]; i++;
   r+= x[i] * y[i]; i++;
   r+= x[i] * y[i]; i++;
   r+= x[i] * y[i]; i++;
 }
 if(i < n-1){                      /* LEVEL 2 */
   r+= x[i] * y[i]; i++;
   r+= x[i] * y[i]; i++;
 }
 if(i < n)                         /* LEVEL 1 */
   r+= x[i] * y[i];
 return r;
}
/******************************************************************
   PROCEDURE cholfacU  -  Computes triu matrix U s.t. U'*U = X
   INPUT:
     x - full n x n, should be symmetric
     n - order of u, x
   UPDATED:
     u - full n x n, zeros in tril on input, contains U on output.
   RETURNS:
     0 = "success", 1 = "X is NOT positive definite"
******************************************************************/
int cholfacU(double *x, double *diag, const int n)
{
  int i, j, jn;
  double uij, ujj, avediag, mindiag, eps, pertdiag;
  double *ui, *uj;

  if (n < 1)
     return 0;
  /*--------------------------------------------------------------
      add a small positive number to the diagonal elements
  ---------------------------------------------------------------*/
  avediag = 0; 
  for(j=0; j<n; j++) { avediag += x[j+j*n];  }
  avediag = avediag/n;   
  pertdiag = 2E-15*n*avediag;
  for (j=0; j<n; j++) { 
      jn = j*n; x[j+jn] += pertdiag;  diag[j] = x[j+jn]; }
  /*------------------------------------------------------------
     Let mindiag be a threshold on diagonal entries, under which we
     want to discard the corresponding row/column.
     Solve the columns of U, for j=0:n-1
  ---------------------------------------------------------------*/
  mindiag = 1E-16;
  for (uj=x, j=0; j<n; j++, uj+=n){
      /*------------------------------------------------------------
	 Solve "uij" from the identity
	   uii * uij = xij - u(1:i-1,i)'*u(1:i-1,j)
       -------------------------------------------------------------*/
      for (ui = x, i = 0, ujj = 0.0; i < j; i++, ui+=n){
           uij = x[i*n+j] - realdot(ui,uj,i);
           uij /= ui[i];
           uj[i] = uij;
           ujj += SQR(uij);
      }
      ujj = diag[j] - ujj;
    /*------------------------------------------------------------
       By now, "ujj" should contain the final u(j,j)^2. Check whether
       it is positive. 
     -------------------------------------------------------------*/
      if (ujj >= mindiag)
         uj[j] = sqrt(ujj);
      else {
        return 1; }
  }
  return 0;
}
/********************************************************************
  PROCEDURE mexFunction - Entry for Matlab
*********************************************************************/
void mexFunction(const int nlhs, mxArray *plhs[],
                 const int nrhs, const mxArray *prhs[])
{
  int n; 
  double *x;
  double *diag;
  double *indef;

  if(nrhs < 1)
    mexErrMsgTxt("mexchol: requires 1 input arguments.");
  if(nlhs > 1)
    mexErrMsgTxt("mexchol: requires 1 output argument.");

  x = mxGetPr(prhs[0]);
  if (mxIsSparse(prhs[0]))
     mexErrMsgTxt("Sparse X not supported by mexchol.");
  if ( (n = mxGetM(prhs[0])) != mxGetN(prhs[0]) )
     mexErrMsgTxt("X should be square.");
 
  plhs[0] = mxCreateDoubleMatrix(1,1,mxREAL); 
  indef = mxGetPr(plhs[0]); 
  diag = mxCalloc(n,sizeof(double));

  indef[0] = cholfacU(x,diag,n); 
}
/************************************************************************/




