disp('Running MATLAB script file MarronPerouList1FigB.m') ;
%
%    This makes Figure B for the Marron-Perou List paper.
%    which shows the example graphic illustrating the Cluster Index
%
%    Copied from GeneArray6.m
%    which was for analyzing Chuck Perou's two Gene Array data sets,
%    this was essentially ipart == 14, in that program
%



viscale = [3] ;     %  Index of scaling type
                    %  1 - raw ratios
                    %  2 - log2(raw ratios)
                    %  3 - row median normalized log2 ratios (Chuck's choice)
                    %  4 - row median & MAD standardized
                    %  5 - row median & max standardized



vidisc = [1] ;    %  Index of dicrimination method (when needed)
                  %  1 - Cluster Index




vntrim = [2; 5; 10; 20; 30; 50; 70; 100; 200; 459] ;
          %  numbers to trim to for discrimination
nntrim = length(vntrim) ;




asciifilestr = 'GA6OnlyInputs.txt' ;
filestr = 'GA6DataAll' ;

%  set up class labels
%
classlabelcellstr = {'Basal', ...
                     'ERB', ...
                     'Normal', ...
                     'Lum A', ...
                     'Lum BC'} ;
nclust = size(classlabelcellstr,2) ;
classcolorcellstr = {'m', ...
                     'c', ...
                     'g', ...
                     'b', ...
                     'r'} ;



%   Use preloaded version of main data

eval(['load ' filestr ' ;']) ;


mdataint = mdata(:,flagintrinsic) ;
mdatafumi = mdata(:,flagfumi) ;

flagclass2int = flagclass2(flagintrinsic) ;
flagclass2fumi = flagclass2(flagfumi) ;

d = size(mdata,1) ;
nint = size(mdataint,2) ;
nfumi = size(mdatafumi,2) ;

figh = figure(1) ;
clf ;




%  Set up color map
%
genecomap = [zeros(32,1) linspace(1,0,32)' zeros(32,1)] ;
genecomap = [genecomap; [linspace(0,1,32)' zeros(32,1) zeros(32,1)]] ;
genecomap = [genecomap; [0.5 0.5 0.5]] ;
        %  add a gray color
colormap(genecomap) ;


bottomi = -0.5 ;
topi = d + 0.5 ;
left = -0.5 ;
rightint = nint + 0.5 ;




%  do scaling
%
for iscale = viscale ;

  if iscale == 1 ;    %  raw ratios

    mdataintrs = mdataint ;
    mdatafumirs = mdatafumi ;

    ccenter = 1 ;
    cradius = (log(8) + 1) / 2 ;
        %  color scale, center and radius

    scaletitstr = 'Raw Ratios' ;
    scaleoutstr = 'RawR' ;

  elseif iscale == 2 ;    %  log2(raw ratios)

    mdataintrs = log(mdataint) / log(2) ;
    mdatafumirs = log(mdatafumi) / log(2) ;
        %  use log, not log2, for better handling of NaNs

    ccenter = 0 ;
    cradius = log2(8) ;
        %  color scale, center and radius

    scaletitstr = 'Log2 Ratios' ;
    scaleoutstr = 'Log2R' ;

  elseif iscale == 3 ;    %  row median normalized log2 ratios (Chuck's choice)

    mdataintrs = log(mdataint) / log(2) ;
    mdatafumirs = log(mdatafumi) / log(2) ;

    vmedian = (nanmedian(mdataintrs'))' ;
        %  double transpose, since nanmedian works down columns only
        %  nanmedian correctly handles NaNs

    mdataintrs = mdataintrs - vec2matSM(vmedian,nint) ;
    mdatafumirs = mdatafumirs - vec2matSM(vmedian,nfumi) ;

    ccenter = 0 ;
    cradius = log(8) ;
        %  color scale, center and radius

    scaletitstr = 'Log2 Ratios - med' ;
    scaleoutstr = 'L2Rmm' ;

  elseif iscale == 4 ;    %  row median & MAD standardized

    mdataintrs = log(mdataint) / log(2) ;
    mdatafumirs = log(mdatafumi) / log(2) ;

    vmedian = (nanmedian(mdataintrs'))' ;
        %  double transpose, since nanmedian works down columns only
        %  nanmedian correctly handles NaNs

    mdataintrs = mdataintrs - vec2matSM(vmedian,nint) ;
    mdatafumirs = mdatafumirs - vec2matSM(vmedian,nfumi) ;
    
    vmad = (nanmedian(abs(mdataint')))' ;
        %  double transpose, since nanmedian works down columns only
        %  nanmedian correctly handles NaNs
    vmad = vmad / (norminv(0.75) - norminv(0.25)) ;
        %  adjust to scale of standard deviation

    mdataintrs = mdataintrs ./ vec2matSM(vmad,nint) ;
    mdatafumirs = mdatafumirs ./ vec2matSM(vmad,nfumi) ;
    
    ccenter = 0 ;
    cradius = 2 ;
        %  color scale, center and radius

    scaletitstr = 'Log2(R - med) / mad' ;
    scaleoutstr = 'L2Rmmom' ;

  elseif iscale == 5 ;    %  row median & max standardized

    mdataintrs = log(mdataint) / log(2) ;
    mdatafumirs = log(mdatafumi) / log(2) ;

    vmedian = (nanmedian(mdataintrs'))' ;
        %  double transpose, since nanmedian works down columns only
        %  nanmedian correctly handles NaNs

    mdataintrs = mdataintrs - vec2matSM(vmedian,nint) ;
    mdatafumirs = mdatafumirs - vec2matSM(vmedian,nfumi) ;
    
    vmax = (nanmax(abs(mdataintrs')))' ;
        %  double transpose, since nanmedian works down columns only
        %  nanmax correctly handles NaNs

    mdataintrs = mdataintrs ./ vec2matSM(vmax,nint) ;
    mdatafumirs = mdatafumirs ./ vec2matSM(vmax,nfumi) ;

    ccenter = 0 ;
    cradius = 2 / 3 ;
        %  color scale, center and radius

    scaletitstr = 'Log2(R - med) / max' ;
    scaleoutstr = 'L2Romax' ;

  end ;



  %  First appropriately reorder data
  %
  %  sort columns according to classes, row 4
  %
  flagclass4int = flagclass4(flagintrinsic) ;
  [sortflag4int,vind] = sort(flagclass4int) ;
  
  vdif = sortflag4int(2:nint) - sortflag4int(1:(nint-1)) ;
  vbdry = 1.5:1:nint ;
  vbdry = vbdry(vdif > 0.5) ;


  mdataintrsco = mdataintrs(:,vind) ;
        %  use "case ordering", depending on labels

  sort4flag2int = flagclass2int(vind) ;
      %  corresponding version of row 2 labels


  %  order rows using the modified Chuck's ordering
  %  (with 3 new genes at end)
  %
  sortkey = load('GA6RowSortKey.txt') ;
  [temp,vind] = sort(sortkey) ;

  mdataintro = mdataintrsco(vind,:) ;
  mdatafumiro = mdatafumirs(vind,:) ;
      %  re-ordered versions  of rescaled data



  %  Compute common quantities, for discrimination
  %  
  mdata1 = mdataintro(:,sortflag4int == 1) ;
  mdata2 = mdataintro(:,sortflag4int == 2) ;
  mdata3 = mdataintro(:,sortflag4int == 3) ;
  mdata4 = mdataintro(:,sortflag4int == 4) ;
  mdata5 = mdataintro(:,sortflag4int == 5) ;

  vmeanoa = (nanmean(mdataintro'))' ;
  vmean1 = (nanmean(mdata1'))' ;
  vmean2 = (nanmean(mdata2'))' ;
  vmean3 = (nanmean(mdata3'))' ;
  vmean4 = (nanmean(mdata4'))' ;
  vmean5 = (nanmean(mdata5'))' ;
      %  double transposes, since nanmean works down columns only
      %  nanmedian correctly handles NaNs




  methstr = 'ClustInd' ;
  methodstr = 'Cluster Index' ;
  ylabelstr = 'SSbetween / SStotal' ;

  %  Compute Clustering Indices (for each gene)
  %
  sst = (mdataintro - vec2matSM(vmeanoa,nint)).^2 ;
  sst = (nansum((sst)'))' ;
  ssb = sum(~isnan(mdata1),2) .* (vmean1 - vmeanoa).^2 + ...
        sum(~isnan(mdata2),2) .* (vmean2 - vmeanoa).^2 + ...
        sum(~isnan(mdata3),2) .* (vmean3 - vmeanoa).^2 + ...
        sum(~isnan(mdata4),2) .* (vmean4 - vmeanoa).^2 + ...
        sum(~isnan(mdata5),2) .* (vmean5 - vmeanoa).^2 ;
  %ssw1 = (mdata1 - vec2matSM(vmean1,size(mdata1,2))).^2 ;
  %ssw1 = (nansum(ssw1'))' ;
  %ssw2 = (mdata2 - vec2matSM(vmean2,size(mdata2,2))).^2 ;
  %ssw2 = (nansum(ssw2'))' ;
  %ssw3 = (mdata3 - vec2matSM(vmean3,size(mdata3,2))).^2 ;
  %ssw3 = (nansum(ssw3'))' ;
  %ssw4 = (mdata4 - vec2matSM(vmean4,size(mdata4,2))).^2 ;
  %ssw4 = (nansum(ssw4'))' ;
  %ssw5 = (mdata5 - vec2matSM(vmean5,size(mdata5,2))).^2 ;
  %ssw5 = (nansum(ssw5'))' ;
  %ssw = ssw1 + ssw2 + ssw3 + ssw4 + ssw5 ;


  %disp('  Check this is 0: ') ;
  %max(abs(sst - (ssb + ssw))) 
      %  lines to check calculations
  %pauseSM

  vind = ssb ./ sst ;
    %  vector of clustering indices


  maxthresh = 0.48 ;
  indtop = 1 ;





  clf ;
  nframe = 2 ;
  vthresh = linspace(0,maxthresh,nframe) ;


  for iframe = 1:nframe ;    %  originally for movie frames

    subplot(1,2,iframe) ;

    if iframe == 1 ;
      nthresh = 150 ;
    elseif iframe == 2 ;
      nthresh = 30 ;
    end ;

    mind = [vind, (1:d)'] ;
    [temp,sortind] = sort(vind(:,1)) ;
        %  indices for putting first column of mind in increasing order
    mind = mind(sortind,:) ;
        %  reorders mind, so that first column increases
    mind = flipud(mind) ;
        %  reorders mind, so that first column decreases
    threshind = mind(1:nthresh,2) ;
        %  indices to keep
    threshflag = zeros(d,1) ;
    threshflag(threshind) = ones(nthresh,1) ;
    threshflag = logical(threshflag) ; 

    mtdata = ccenter * ones(d,nint) ;
    mtdata(threshflag,:) = mdataintro(threshflag,:) ;


    %  Transform to image color scale
    %
    imdata = 32 + (32/cradius) * (mtdata - ccenter) ;
          %  map values [ccenter - cradius,ccenter - cradius]
          %                      to [0,64]

    %  keep big values from going beyond upper end
    %    (and don't want to be shown as missing)
    %
    flagbig = imdata > 64 ;
        %  ones where imdata above upper end of color range
    nflagbig = sum(sum(flagbig)) ;
    if nflagbig > 0 ;     %  then adjust values above end
      imdata(flagbig) = 64 * ones(nflagbig,1) ;
    end ;

    %  turn the missings into value 65, to show as gray
    %
    flagmiss = isnan(imdata) ;
    nflagmiss = sum(sum(flagmiss)) ;
    if nflagmiss > 0 ;     %  then replace some NaNs
      imdata(flagmiss) = 65 ;
    end ;

    image(imdata) ;
      title(['Gene Paring, ' methodstr ...
               ', # genes = ' num2str(nthresh)],'FontSize',15) ;
      xlabel('Case','FontSize',15) ;
      ylabel('Gene','FontSize',15) ;

      hold on ;
        plot([[vbdry(1); vbdry(1)], [vbdry(2); vbdry(2)], ...
              [vbdry(3); vbdry(3)], [vbdry(4); vbdry(4)]], ...
             [[bottomi; topi], [bottomi; topi], ...
              [bottomi; topi], [bottomi; topi]], ...
             'w-') ;
      hold off ;


  end ;    %  of iframe loop





    outstr = 'MarronPerouList1FigB' ;
    orient landscape ;
  print('-dpsc2',outstr) ;
  print('-depsc2',outstr) ;
  print('-djpeg90',outstr) ;
  print('-dtiff',outstr) ;



end ;    %  of iscale loop



