disp('Running MATLAB script file MarronPerouList1FigC.m') ;
%
%    This makes Figure B for the Marron-Perou List paper.
%    which shows the example graphic illustrating the Cluster Index
%
%    Copied from GeneArray6.m
%    which was for analyzing Chuck Perou's two Gene Array data sets,
%    this was essentially ipart == 22, in that program
%



viscale = [3] ;     %  Index of scaling type
                    %  1 - raw ratios
                    %  2 - log2(raw ratios)
                    %  3 - row median normalized log2 ratios (Chuck's choice)
                    %  4 - row median & MAD standardized
                    %  5 - row median & max standardized



vidisc = [1] ;    %  Index of dicrimination method (when needed)
                  %  1 - Cluster Index




vntrim = [2; 5; 10; 20; 30; 50; 70; 100; 200; 459] ;
          %  numbers to trim to for discrimination
nntrim = length(vntrim) ;




asciifilestr = 'GA6OnlyInputs.txt' ;
filestr = 'GA6DataAll' ;

%  set up class labels
%
classlabelcellstr = {'Basal', ...
                     'ERB', ...
                     'Normal', ...
                     'Lum A', ...
                     'Lum BC'} ;
nclust = size(classlabelcellstr,2) ;
classcolorcellstr = {'m', ...
                     'c', ...
                     'g', ...
                     'b', ...
                     'r'} ;



%   Use preloaded version of main data

eval(['load ' filestr ' ;']) ;


mdataint = mdata(:,flagintrinsic) ;
mdatafumi = mdata(:,flagfumi) ;

flagclass2int = flagclass2(flagintrinsic) ;
flagclass2fumi = flagclass2(flagfumi) ;

d = size(mdata,1) ;
nint = size(mdataint,2) ;
nfumi = size(mdatafumi,2) ;

figh = figure(1) ;
clf ;




%  Set up color map
%
genecomap = [zeros(32,1) linspace(1,0,32)' zeros(32,1)] ;
genecomap = [genecomap; [linspace(0,1,32)' zeros(32,1) zeros(32,1)]] ;
genecomap = [genecomap; [0.5 0.5 0.5]] ;
        %  add a gray color
colormap(genecomap) ;


bottomi = -0.5 ;
topi = d + 0.5 ;
left = -0.5 ;
rightint = nint + 0.5 ;




%  do scaling
%
for iscale = viscale ;

  if iscale == 1 ;    %  raw ratios

    mdataintrs = mdataint ;
    mdatafumirs = mdatafumi ;

    ccenter = 1 ;
    cradius = (log(8) + 1) / 2 ;
        %  color scale, center and radius

    scaletitstr = 'Raw Ratios' ;
    scaleoutstr = 'RawR' ;

  elseif iscale == 2 ;    %  log2(raw ratios)

    mdataintrs = log(mdataint) / log(2) ;
    mdatafumirs = log(mdatafumi) / log(2) ;
        %  use log, not log2, for better handling of NaNs

    ccenter = 0 ;
    cradius = log2(8) ;
        %  color scale, center and radius

    scaletitstr = 'Log2 Ratios' ;
    scaleoutstr = 'Log2R' ;

  elseif iscale == 3 ;    %  row median normalized log2 ratios (Chuck's choice)

    mdataintrs = log(mdataint) / log(2) ;
    mdatafumirs = log(mdatafumi) / log(2) ;

    vmedian = (nanmedian(mdataintrs'))' ;
        %  double transpose, since nanmedian works down columns only
        %  nanmedian correctly handles NaNs

    mdataintrs = mdataintrs - vec2matSM(vmedian,nint) ;
    mdatafumirs = mdatafumirs - vec2matSM(vmedian,nfumi) ;

    ccenter = 0 ;
    cradius = log(8) ;
        %  color scale, center and radius

    scaletitstr = 'Log2 Ratios - med' ;
    scaleoutstr = 'L2Rmm' ;

  elseif iscale == 4 ;    %  row median & MAD standardized

    mdataintrs = log(mdataint) / log(2) ;
    mdatafumirs = log(mdatafumi) / log(2) ;

    vmedian = (nanmedian(mdataintrs'))' ;
        %  double transpose, since nanmedian works down columns only
        %  nanmedian correctly handles NaNs

    mdataintrs = mdataintrs - vec2matSM(vmedian,nint) ;
    mdatafumirs = mdatafumirs - vec2matSM(vmedian,nfumi) ;
    
    vmad = (nanmedian(abs(mdataint')))' ;
        %  double transpose, since nanmedian works down columns only
        %  nanmedian correctly handles NaNs
    vmad = vmad / (norminv(0.75) - norminv(0.25)) ;
        %  adjust to scale of standard deviation

    mdataintrs = mdataintrs ./ vec2matSM(vmad,nint) ;
    mdatafumirs = mdatafumirs ./ vec2matSM(vmad,nfumi) ;
    
    ccenter = 0 ;
    cradius = 2 ;
        %  color scale, center and radius

    scaletitstr = 'Log2(R - med) / mad' ;
    scaleoutstr = 'L2Rmmom' ;

  elseif iscale == 5 ;    %  row median & max standardized

    mdataintrs = log(mdataint) / log(2) ;
    mdatafumirs = log(mdatafumi) / log(2) ;

    vmedian = (nanmedian(mdataintrs'))' ;
        %  double transpose, since nanmedian works down columns only
        %  nanmedian correctly handles NaNs

    mdataintrs = mdataintrs - vec2matSM(vmedian,nint) ;
    mdatafumirs = mdatafumirs - vec2matSM(vmedian,nfumi) ;
    
    vmax = (nanmax(abs(mdataintrs')))' ;
        %  double transpose, since nanmedian works down columns only
        %  nanmax correctly handles NaNs

    mdataintrs = mdataintrs ./ vec2matSM(vmax,nint) ;
    mdatafumirs = mdatafumirs ./ vec2matSM(vmax,nfumi) ;

    ccenter = 0 ;
    cradius = 2 / 3 ;
        %  color scale, center and radius

    scaletitstr = 'Log2(R - med) / max' ;
    scaleoutstr = 'L2Romax' ;

  end ;



  %  First appropriately reorder data
  %
  %  sort columns according to classes, row 4
  %
  flagclass4int = flagclass4(flagintrinsic) ;
  [sortflag4int,vind] = sort(flagclass4int) ;
  
  vdif = sortflag4int(2:nint) - sortflag4int(1:(nint-1)) ;
  vbdry = 1.5:1:nint ;
  vbdry = vbdry(vdif > 0.5) ;


  mdataintrsco = mdataintrs(:,vind) ;
        %  use "case ordering", depending on labels

  sort4flag2int = flagclass2int(vind) ;
      %  corresponding version of row 2 labels


  %  order rows using the modified Chuck's ordering
  %  (with 3 new genes at end)
  %
  sortkey = load('GA6RowSortKey.txt') ;
  [temp,vind] = sort(sortkey) ;

  mdataintro = mdataintrsco(vind,:) ;
  mdatafumiro = mdatafumirs(vind,:) ;
      %  re-ordered versions  of rescaled data




  methstr = 'ClustInd' ;
  methodstr = 'Cluster Index' ;
  ylabelstr = 'SSbetween / SStotal' ;





  %  Loop through trimming amounts
  %
  vncorr = [] ;
  vncorrsp = [] ;
  for ntrim = vntrim' ;

    disp(['      ' methodstr ' Classification CV, ntrim = ' ...
                    num2str(ntrim)]) ;



    %  Loop through training data, leaving one out
    %
    mresults = zeros(nclust,nclust) ;
    for iloo = 1:nint ;

      flagloo = ones(1,nint) ;
      flagloo(iloo) = 0 ;
          %  all ones, except at the point to be left out
      flagloo = logical(flagloo) ;
      
      mdataloo = mdataintro(:,flagloo) ;
          %  leave one out test data vector
      sortflag4intloo = sortflag4int(flagloo) ;
          %  key to classes
      
      vdatatest = mdataintro(:,iloo) ;
          %  new test case, to classify
      
      testclass = sortflag4int(iloo) ;
          %  Row 2 classification of this vector



      %  Compute common quantities, for discrimination
      %  
      mdata1loo = mdataloo(:,sortflag4intloo == 1) ;
      mdata2loo = mdataloo(:,sortflag4intloo == 2) ;
      mdata3loo = mdataloo(:,sortflag4intloo == 3) ;
      mdata4loo = mdataloo(:,sortflag4intloo == 4) ;
      mdata5loo = mdataloo(:,sortflag4intloo == 5) ;

      vmeanoaloo = (nanmean(mdataloo'))' ;
      vmean1loo = (nanmean(mdata1loo'))' ;
      vmean2loo = (nanmean(mdata2loo'))' ;
      vmean3loo = (nanmean(mdata3loo'))' ;
      vmean4loo = (nanmean(mdata4loo'))' ;
      vmean5loo = (nanmean(mdata5loo'))' ;
          %  double transposes, since nanmean works down columns only
          %  nanmedian correctly handles NaNs



      %  Compute Clustering Indices (for each gene)
      %
      sst = (mdataloo - vec2matSM(vmeanoaloo,nint-1)).^2 ;
      sst = (nansum((sst)'))' ;
      ssb = sum(~isnan(mdata1loo),2) .* (vmean1loo - vmeanoaloo).^2 + ...
            sum(~isnan(mdata2loo),2) .* (vmean2loo - vmeanoaloo).^2 + ...
            sum(~isnan(mdata3loo),2) .* (vmean3loo - vmeanoaloo).^2 + ...
            sum(~isnan(mdata4loo),2) .* (vmean4loo - vmeanoaloo).^2 + ...
            sum(~isnan(mdata5loo),2) .* (vmean5loo - vmeanoaloo).^2 ;
      %ssw1 = (mdata1loo - vec2matSM(vmean1loo,size(mdata1loo,2))).^2 ;
      %ssw1 = (nansum(ssw1'))' ;
      %ssw2 = (mdata2loo - vec2matSM(vmean2loo,size(mdata2loo,2))).^2 ;
      %ssw2 = (nansum(ssw2'))' ;
      %ssw3 = (mdata3loo - vec2matSM(vmean3loo,size(mdata3loo,2))).^2 ;
      %ssw3 = (nansum(ssw3'))' ;
      %ssw4 = (mdata4loo - vec2matSM(vmean4loo,size(mdata4loo,2))).^2 ;
      %ssw4 = (nansum(ssw4'))' ;
      %ssw5 = (mdata5loo - vec2matSM(vmean5loo,size(mdata5loo,2))).^2 ;
      %ssw5 = (nansum(ssw5'))' ;
      %ssw = ssw1 + ssw2 + ssw3 + ssw4 + ssw5 ;


      %disp('  Check this is 0: ') ;
      %max(abs(sst - (ssb + ssw))) 
          %  lines to check calculations
      %pauseSM

      vindloo = ssb ./ sst ;
        %  vector of clustering indices






      %  now do trimming
      %
      sortvind = sort(vindloo) ;
          %  sorted in increasing order
      sortvind = flipud(sortvind) ;
          %  sorted in decreasing order

      thresh = sortvind(ntrim) ;
          %  cutoff, for ntrim biggest values of ind
      threshflag =  vindloo >= thresh  ;
          %  ones for genes to keep
      nthresh = sum(threshflag) ;
          %  number after trimming (should be ntrim)
      %nthresh
      %pauseSM ;
          %  used these lines to check

      mtdint = mdataloo(threshflag,:) ;
          %  matrix of trimmed data, leave one out

      vmean1trim = vmean1loo(threshflag) ;
      vmean2trim = vmean2loo(threshflag) ;
      vmean3trim = vmean3loo(threshflag) ;
      vmean4trim = vmean4loo(threshflag) ;
      vmean5trim = vmean5loo(threshflag) ;
  
      vdatatesttrim = vdatatest(threshflag) ;



      vfnonmiss = ~isnan(vdatatesttrim) ;
          %  ones where trimmed test data is non-missing
      nnonmiss = sum(vfnonmiss) ;
          %  number of nonmissing genes

      d1 = nansum((vdatatesttrim - vmean1trim).^2) ;
      d2 = nansum((vdatatesttrim - vmean2trim).^2) ;
      d3 = nansum((vdatatesttrim - vmean3trim).^2) ;
      d4 = nansum((vdatatesttrim - vmean4trim).^2) ;
      d5 = nansum((vdatatesttrim - vmean5trim).^2) ;
          %  NaN accounted sum of square diff's, for distances

      d1 = sqrt(d1 / nnonmiss) ;
      d2 = sqrt(d2 / nnonmiss) ;
      d3 = sqrt(d3 / nnonmiss) ;
      d4 = sqrt(d4 / nnonmiss) ;
      d5 = sqrt(d5 / nnonmiss) ;
          %  Euclidean distances to each class mean


      [temp, dclass] = min([d1; d2; d3; d4; d5]) ;





      %  Fill classification matrix with results
      %
      mresults(dclass,testclass) = mresults(dclass,testclass) + 1 ;
          %  update results matrix, by one in:
          %  row of chosen class, col of correct class

    end ;    %  of iloo, leave one out, loop 



    numcorrect = sum(diag(mresults)) ;
    numcorrectsp = sum(sum(mresults(1:2,1:2))) + ...
                           mresults(3,3) + ...
                   sum(sum(mresults(4:5,4:5))) ;


    vncorr = [vncorr; numcorrect/nint] ;
    vncorrsp = [vncorrsp; numcorrectsp/nint] ;


  end ;    %  of intrim loop through trimming amounts






  %  Make bar graph of correct class rate
  %
  barh1 = bar((1:length(vntrim))',vncorr) ;
  set(barh1,'FaceColor','r') ;
  axis([0.5, length(vntrim)+0.5, 0, 1]) ;
  set(gca,'XTickLabel',num2str(vntrim)) ;
  title(['5 Class CV Classification Rate, ' methodstr],'FontSize',15) ;
  xlabel('# genes used','FontSize',15) ;
  ylabel('Correct Class''n Rate','FontSize',15) ;





    outstr = 'MarronPerouList1FigC' ;
    orient landscape ;
  print('-dpsc2',outstr) ;
  print('-depsc2',outstr) ;
  print('-djpeg90',outstr) ;
  print('-dtiff',outstr) ;



end ;    %  of iscale loop



