%MATLAB SCRIPT FILE s322eg4.m  */
%
%    EXAMPLE 4 FOR STATISTICS 322
%    POSTED 8/29/96
%    INTRODUCES RANDOM NUMBER GENERATION AND SIMULATION

ipar = 7 ;     %  1,...,7

format compact ;
running = [10 'MATLAB script s322eg4.m, with ipar = ' num2str(ipar) 10]
      %  RECALL THAT "10" GIVES A LINE FEED


%    THE "MONTE CARLO METHOD" REFERS TO DOING PROBABILISTIC
%    CALCULATIONS (E.G. CALCULATION OF MOMENTS) BY GENERATING
%    A "SAMPLE" OF RANDOM VARIABLES WITH THE DESIRED DISTRIBUTION
%    AND USING THE CORRESPONDING EMPIRICAL QUANTITY (E.G. THE 
%    SAMPLE MOMENTS) AS AN APPROXIMATION.
%
%    THE STRENGTH OF THIS METHOD IS THAT MANY QUANTITIES WHICH ARE
%    INTRACTABLE TO ANALYTIS METHODS (THERE ARE ALL TOO MANY SUCH
%    IN REAL WORLD PROBLEMS), CAN BE EASILY CALCULATED.
%
%    THE MAIN DISADVANTAGES OF THE MONTE CARLO METHOD ARE:
%    1.  THE ANSWER DEPENDS ON THE PARTICULAR REALIZATION ONE HAS,
%          ALTHOUGH THIS EFFECT CAN BE QUANTIFIED, AND IT CAN BE
%          MITIGATED WITH A LARGE ENOUGH SAMPLE.
%    2.  FOR AN ACCRUATE ANSWER, A LARGE SAMPLE IS REQUIRED.  THIS
%          CAN BE VERY SLOW FOR COMPLICATED PROBLEMS.


if ipar == 1 ;    %  THEN DO PART 1 OF "UNIFORM RANDOM NUMBER GENERATION"

  %    GENERATION OF "RANDOM SAMPLES" IS TYPICALLY DONE WITH A 
  %    "PSEUDO-RANDOM NUMBER GENERATOR".
  %
  %    THE TERM "PSEUDO-RANDOM" IS USED BECAUSE SUCH ALGORITHMS GIVE
  %    A "DETERMINISTIC SEQUENCE", IN THE USUAL PROBABILISTIC SENSE.
  %    HOWEVER THESE SEQUENCES BEHAVE LIKE "TRULY RANDOM" SEQUENCES,
  %    IN THE SENSE THAT, FOR MOST PUPOSES, THEY EFFECTIVELY 
  %    APPROXIMATE IMPORTANT PROPERTIES SUCH AS THE LAWS OF LARGE
  %    NUMBERS, THE CENTRAL LIMIT THEOREM,....
  %
  %    warning:  THE HISTORY OF PSEUDO-RANDOM GENERATION IS FILLED WITH
  %    PROPOSED METHODS THAT HAVE LATER BEEN SHOWN TO BE BAD IN SOME 
  %    SENSE.  THIS IS USUALLY DONE BY FINDING PROBLEMS WHICH THE METHOD
  %    SOLVES INCORRECTLY BY THE MONTE CARLO METHOD.  SINCE IT IS 
  %    IMPOSSIBLE TO TRY ALL PROBLEMS, THE QUALITY OF CURRENTLY USED
  %    METHODS IS STILL OPEN TO DEBATE.  THE ONLY COMFORT WE HAVE IS 
  %    THAT THESE METHODS HAVE BEEN TESTED QUITE CAREFULLY ON PROBLEMS
  %    THAT PEOPLE HAVE BEEN ABLE TO THINK UP.  HISTORY SUGGESTS THAT
  %    THERE OTHER PROBLEMS WILL COME UP.
  %
  %    MATLAB USES A "LINEAR CONGRUENTIAL" ALGORITHM, DESCRIBED IN THE
  %    REFERENCE GUIDE, IN THE rand SECTION (AND A REFERENCE IS GIVEN,
  %    BUT BE AWARE THAT THIS LITERATURE IS HUGE).
  %
  %    THIS RANDOM NUMBER GENERATOR WORKS BY UPDATING A VARIABLE CALLED
  %    A "SEED", WHICH IS BETWEEN 1 AND 2^31.  SINCE THERE ARE ONLY 
  %    FINITELY MANY SUCH, THE SEQUENCE WILL EVENTUALLY CYCLE.  HOWEVER
  %    THE CYCLE TIME IS VERY LONG FOR MOST SEED VALUES.  BUT THIS IS
  %    ANOTHER SENSE IN WHICH THESE NUMBERS ARE "NOT REALLY RANDOM".
  %
  %    FOR SOME PURPOSES, YOU DO NOT CARE ABOUT THE VALUE OF THE SEED.
  %    THEN SIMPLY USE THE COMMAND rand TO GENERATE UNIF(0,1) 
  %    REALIZATIONS.

  note = 'a 2x5 matrix of Unif(0,1) rvs:'
  usamp1 = rand(2,5)

  note = 'another 2x5 matrix of Unif(0,1) rvs:'
  usamp2 = rand(2,5)
          %  EACH SUCCESSIVE CALL GIVES AN "INDEPENDENT" REALIZATION

  note = 'another 2x5 matrix of Unif(0,1) rvs:'
  usamp2 = rand(size(usamp2))
          %  RECALL size RETURNS A MATRIX WITH THE NUMBERS OF ROWS
          %  AND COLUMNS, rand ACCEPTS SUCH AN ARGUMENT.

  note = 'a bigger matrix of Unif(0,1) rvs:'
  usamp3 = rand(6)
          %  SINGLE ARGUMENT MEANS "nxn"


elseif ipar == 2 ;    %  THEN DO PART 2 OF "UNIFORM RANDOM NUMBER GENERATION"

  %    FOR SOME APPLICATIONS, IT IS IMPORTANT TO CONTROL THE SEED, E.G.
  %    WHEN YOU WANT TO GET THE SAME ANSWERS ON SUCCESSIVE RUNS OF A
  %    SIMULATION PROGRAM.  THERE IS NO SEED "VARIABLE" IN THE USUAL 
  %    SENSE, BUT YOU CAN WORK WITH THIS VALUE, USING THE COMMAND rand
  %    IN A RATHER DIFFERENT WAY.

  seed = rand('seed') 
          %  USING THIS "STRING" ARGUMENT TELLS RAND TO PRINT OUT THE
          %  VALUE OF THE SEED.  HERE I HAVE CREATED A VARIABLE 'seed',
          %  BUT THIS IS NOT WHAT MATLAB IS USING.

  rand('seed',53) ;
          %  THIS STRING, WITH A SECOND ARGUMENT SETS THE SEED TO THE
          %  SECOND ARGUMENT (USE AN INTEGER).
  note = 'After setting seed to 53, get:'
  seed = rand('seed') 

  note = 'This gives the next rv:'
  rand(1)

  note = 'after generating that, the seed became:'
  seed = rand('seed') 

  note = 'Now reset the seed to 53, to give the next rv:'
  rand('seed',53) ;
  rand(1)
  note = 'Same as above'

  %    I DON'T KNOW THIS RANDOM NUMBER GENERATOR WELL, BUT THERE ARE
  %    SOME THAT CAN GIVE SHORT CYCLES WHEN THE SEED IS SMALL, HENCE
  %    I SUGGEST USING A FAIRLY LARGE (BUT < 2^31) SEED WHEN SETTING
  %    THE SEED IN THIS WAY.
  %
  %    IF YOU ARE INTERESTED, FIND OUT BY EXPERIMENTATION WHAT HAPPENS
  %    WHEN YOU USE A SEED LARGER THAN 2^31.


elseif ipar == 3 ;    %  THEN DO PART 3 OF "UNIFORM RANDOM NUMBER GENERATION"

  %    DEPENDENCE OF RANDOM NUMBER GENERATORS ON THEIR "SEED" IS AN
  %    IMPORTANT CONCEPT TO KEEP IN MIND WHEN DOING SIMULATION.  HERE
  %    IS A SLIGHTLY DEEPER EXAMPLE:

  nobs = 100 ;        %  THINKING ABOUT "NUMBER OF OBSERVATIONS"
  ndset = 5 ;         %  THINKING ABOUT "NUMBER OF DATA SETS"

  sseed = 38768728 ;
          %  I LIKE sseed FOR "STARTING SEED", AND BECAUSE IT HELPS
          %  ME MENTALLY SEPARATE IT FROM THE STRING 'seed'.
  rand('seed',sseed) ;

  data = rand(nobs,ndset) ;
          %  THINK OF THESE AS 5 SAMPLES OF 100, ONE IN EACH COLUMN.
          %  DON'T USE LOOPS TO GENERATE THESE!

  vavgs = mean(data) 
          %  VECTOR OF AVERAGES.

  note = ['Recall the theoretical mean is: .5' 10]

  note = ['The average of the averages is: ' num2str(mean(vavgs'))]
  note = 'which is more accurate, but still subject to variabilty'

  %    SINCE SIMULATION ANSWERS CAN NEVER BE EXACT, FOR MEANINGFUL
  %    RESULTS, ACCURACY NEEDS TO BE ASSESSED IN SOME WAY.  IT IS
  %    USUALLY CONVENIENT TO USE STAT 11/23 STYLE IDEAS FOR
  %    QUANTIFYING THIS "RANDOM VARIABILITY".
  %
  %    ONE APPROACH TO PRESENTING ANSWERS, IS TO NOT GIVE A "POINT
  %    ESTIMATE" OF THE DESIRED VALUE, BUT INSTEAD GIVE AN
  %    "INTERVAL ESTIMATE" ALONG THE LINES OF A CONFIDENCE INTERVAL.

  vkcil = vavgs - 1.96 * sqrt(1/12) / sqrt(nobs) ;
  vkcir = vavgs + 1.96 * sqrt(1/12) / sqrt(nobs) ;
          %  LEFT AND RIGHT C.I. ENPTS WHEN THE POPULATION VARIANCE 
          %  IS KNOWN, RECALL 1/12 FOR THE UNIF(0,1).

  vsds = std(data) ;
          %  VECTOR OF STANDARD DEVIATIONS
  vecil = vavgs - 1.96 * vsds / sqrt(nobs) ;
  vecir = vavgs + 1.96 * vsds / sqrt(nobs) ;
          %  USUAL CLT BASED CI, WHEN THE VARIANCE IS ESTIMATED
          %  EACH TIME.


  note = ['95% C. I.s for the mean of Unif(0,1)s have endpts:' 10] ;
  note = [note '     var known               var estd' 10] ;
  note = [note '   left     right         left     right' 10] ;
    outm = [vkcil; vkcir; vecil; vecir;] ;
    frmtstr = '%8.4f %8.4f      %8.4f %8.4f\n' ;
          %  SPACES IN THIS STRING RESULT IN SPACES IN THE OUTPUT
  note = [note sprintf(frmtstr,outm)] 
          %  sprintf "PRINTS" A MATRIX TO A STRING.
          %  CAREFUL ABOUT THE STRUCTURE OF THE MATRIX outm
          %  sprintf READS "DOWN EACH COLUMN FIRST", SO outm
          %  IS THE TRANSPOSE OF WHAT WE WANT TO SEE.

  %    ANOTHER COMMON APPROACH TO HANDLING THE VARIABILITY INHERENT TO
  %    MONTE CARLO SIMULATIONS IS TO REPORT "STANDARD ERRORS"
  %  
  %    IN ANY CASE, IT IS ESSENTIAL TO ALWAYS KEEP THE VARIABILITY
  %    INHERENT TO THIS METHOD IN MIND.  IT IS SOMETIMES USEFUL TO
  %    UNDERSTAND THIS METHOD VIA THE QUESTION:
  %    "IF I VARY THE SEED, THEN BY HOW MUCH WILL MY ANSWERS CHANGE?"


elseif ipar == 4 ;    %  THEN DO PART 1 OF "NONUNIF RANDOM NUMBERS"

  %    UNIFORM(0,1) R.V.'S ARE TYPICALLY USED AS A FOUNDATION FOR THE
  %    GENERATION OF REALIZATIONS FROM OTHER DISTRIBUTIONS.
  % 
  %    FOR DISCRETE R.V.'S, THE COMMAND find GIVES ONE APPROACH
  %
  %    FOR EXAMPLE, LET X HAVE THE DISTRIBUTION:
  %                /  1    W.P. 1/6
  %           X = <   2    W.P. 1/3
  %                \  4    W.P. 1/2
  %
  %    FIRST LET'S GENERATE A LARGE NUMBER OF THESE:

  nobs = 4096 ;
  sseed = 62937424 ;  rand('seed',sseed) ;

  udata = rand(nobs,1) ;
          %  COLUMN VECTOR OF UNIF(0,1)'S

  values = [1 2 4] ;
  probs = [(1/6) (1/3) (1/2)] 
  cumprobs = cumsum(probs)
          %  cumsum GIVES "CUMULATIVE SUM".
          %  AS USUAL, EXPERIMENT IF YOU DON'T UNDERSTAND
          %  
          %  WRITE LAST TWO TO THE SCREEN AS A CHECK.
  cumprobs = [0 cumprobs] ;

  xdata = zeros(nobs,1) ;
  for i = 1:1:length(values) ;
    flag = cumprobs(i) < udata ;
    flag = flag & (udata <= cumprobs(i+1)) ;
          %  1 WHERE LOWER PROB < U(0,1) < UPPER PROB,
          %  WHICH HAPPENS WITH PROB = (UPPER PROB - LOWER PROB).
    vindex = find(flag) ;
          %  A VECTOR OF INDICES WHERE THIS VALUE OF THE R.V. WILL GO
    xdata(vindex) = values(i) * ones(length(vindex),1) ;
          %  INDICES ON THE LEFT OF AN ASSIGNMENT SAY: " ONLY CHANGE 
          %  THESE".  THE ones(length(vindex)) PART IS NEEDED TO MAKE
          %  THE NUMBER OF VALUES ON BOTH SIDES OF THE ASSIGNEMNT THE
          %  SAME.
%xdata
%note = 'any key to continue'
%pause ;
          %  I USED THE ABOVE LINES & nobs = 10 FOR DEVELOPING THIS CODE.
          %  ORDINARILY I WOULD JUST DELETE SUCH THINGS, AFTER THE 
          %  DEVELOPMENT, WHICH IS WHY THEY ARE NOT INDENTED.
  end ;

  %    THE LINES IN THE ABOVE for LOOP WERE WRITTEN IN AN EXPANDED FORM,
  %    SO THAT I COULD EASILY EXPLAIN EVERYTHING.  BUT THEY COULD HAVE 
  %    BEEN CONDENSED, E.G. TO:
  %  xdata(find((cumprobs(i) < udata)&(udata <= cumprobs(i+1)))) = ...
  %        values(i) * ones(length(vindex),1) ;
          %  RECALL THE ... IS FOR "CONTINUE ON THE NEXT LINE"
  %    SOME PERSONAL TRADEOFF, IN TERMS OF READABILITY, NEEDS TO BE
  %    MADE BETWEEN THE EXPANDED AND COMPRESSED FORMS.

  %    NEXT STUDY THE APPROXIMATION OF "EMPIRICAL PROBS", TO THE 
  %    THEORETICAL PROBS, AND HOW IT IMPROVES WITH SAMPLE SIZE

  nn = 6 ;
          %  nn FOR "NUMBER OF N'S" 
  vn = logspace(log10(4),log10(nobs),nn) ;
          %  GIVES nn NUMBERS "LOGARITHMICALLY SPACED" FROM:
          %      4 = 10^log10(4)   TO   4096 = 10^log10(nobs)
          %  TRY IT WITHOUT THE SEMICOLON, TO SEE.
          %  logspace WORKS IN TERMS OF LOG10, FOR CONVENIENT
          %  USE WITH GRAPHICS (E.G. LOG-LOG PLOT).

  meprobs = [] ;
          %  THE "EMPTY" MATRIX, WILL FILL WITH EMPIRICAL PROBS
          %  EXPLICITLY SETTING THIS VALUE AVOIDS SURPRISES LATER!

  for in = 1:1:nn ;    %  LOOP THROUGH SOME DIFFERENT SAMPLE SIZES
    n = round(vn(in)) ;
          %  WITHOUT THIS I GOT TROUBLE WITH MATLAB ROUNDING 
          %  64 DOWN TO 63 (I DUNNO WHY, BUT TRY IT YOURSELF!)
    sxdata = xdata(1:n) ;
          %  TAKE A SUBVECTOR OF SIZE vn(in) FROM THE DATA
    flag = ((sxdata * ones(1,3)) == (ones(vn(in),1) * values)) ;
          %  vn(in) x 3 MATRIX WITH ONES WHERE sxdata = value 
          %                                FOR THAT COLUMN
    eprobs = sum(flag) / vn(in) ;
          %  COUNTS THE NUMBER OF ONES 
    meprobs = [meprobs; eprobs] ;
          %  PROPORTION OF TIMES THAT VALUE IS TAKEN ON
  end ;

  note = ['Empirical Probs, based on given sample sizes are:' 10] ;
  note = [note '    n         p1     p2     p3 ' 10] ;
    outm = [vn; meprobs'] ;
    frmtstr = '%6.0f     %6.3f %6.3f %6.3f\n' ;
  note = [note sprintf(frmtstr,outm)] ;
    frmtstr = 'infinity   %6.3f %6.3f %6.3f\n' ;
  note = [note sprintf(frmtstr,probs)] 
    
  
elseif ipar == 5 ;    %  THEN DO PART 2 OF "NONUNIF RANDOM NUMBERS"

  %    HERE IS A RATHER DIFFERENT WAY TO GENERATE DISCRETE VARIABLES
  %    WITH THE SAME DISTRIBUTION AS ABOVE

  nobs = 10 ;
  values = [1 2 2 4 4 4] ;
          %  VALUES ARE REPLICATED ACCORDING TO THEIR DESIRED 
          %  RELATIVE FREQUENCIES

  vind = ceil(6 * rand(1,nobs)) ;
          %  ceil SAYS "ROUND TO THE NEXT LARGEST INTEGER" SO THIS
          %  IS AN nobs VECTOR OF RANDOM INTEGERS FROM 1,2,...,6

  note = 'Using the random indices'
  vind

  note = 'A vector of data from our discrete distribution is:'
  xdata = values(vind) 

  %    SINCE THE SEED IS NOT RESET, YOU CAN RUN THIS PART SEVERAL
  %    TIMES TO SEE WHAT HAPPENS WITH DIFFERENT REALIZATIONS
  %
  %    THERE MAY BE MORE CLEVER (E.G. SIMPLER & EASIER TO PROGRAM,
  %    OR ELSE COMPUTATIONALLY FASTER) WAYS TO GENERATE DISCRETE
  %    RANDOM VARIABLES, CAN YOU THINK OF ANY?


elseif ipar == 6 ;    %  THEN DO PART 3 OF "NONUNIF RANDOM NUMBERS"

  %    A WELL KNOWN DEVICE FOR USING UNIF(0,1) R.V.'S TO GENERATE
  %    OTHER TYPES IS THE "PROBABILITY (INTEGRAL) TRANSFORM", 
  %    WHICH USES THE FACT THAT IF  U ~ UNIF(0,1),  AND  F  IS A 
  %    C.D.F.,  THEN  F^(-1)(U)  HAS THE DISTRIBUTION  F,  WHERE
  %    F^(-1)  IS THE USUAL INVERSE FUNCTION.
  % 
  %    THE DISCRETE GENERATION DONE IN ipart = 4 WAS ACTUALLY A
  %    SPECIAL CASE OF THIS.
  %
  %    AS ANOTHER EXAMPLE, LET'S GENERATE SOME EXPONENTIALS, AND DO
  %    SOME SIMPLE CHECKS THAT THE DISTRIBUTION IS RIGHT.

  nobs = 8000 ;
  data = rand(nobs,1) ;

  data = -log(1 - data) ; 
          %  RECALL C.D.F. OF EXPONENTIAL IS:  F(X) = 1 - exp(-x)

  %  NOW CHECK THAT THESE BEHAVE LIKE EXPONENTIAL R.V.'S.
  
  x = [.5 1 2] ;
          %  SOME CUTOFF POINTS
  tprobs = 1 - exp(-x) ;
          %  C.D.F. AT CUTOFF POINTS, I.E. THEORETICAL PROBS
          %  THAT EXPON(1) ARE LESS THAN THE CUTOFF VALUES.

  for i = 1:length(x) ;
    eprobs(i) = length(find(data <= x(i))) / nobs ;
          %  find IS VECTOR OF INDICES WHERE data LESS THAN CUTOFF.
          %  length COUNTS HOW MANY OF THEM.
  end ;

  note = 'Do these empiricial and theoretical values look close?'
  tprobs
  eprobs

  %    HOW CLOSE SHOULD THESE BE?   GET MORE QUANTITATIVE WITH SOME 
  %    CONFIDENCE INTERVALS....

  vsds = sqrt(tprobs .* (1 - tprobs)) ;
          %  BINOMIAL S.D.'S
  cil = eprobs - 1.96 * vsds / sqrt(nobs) ;
  cir = eprobs + 1.96 * vsds / sqrt(nobs) ;

  cil = eprobs - 1.96 * vsds / sqrt(nobs) ;
  result = [10 'To check goodness of fit of the simulated exponentials:' 10] ;
  result = ['    x           cil    theory     cir' 10] ;
    outm = [x; cil; tprobs; cir] ;
    frmtstr = '%8.4f    %8.4f %8.4f %8.4f\n' ;
  result = [result sprintf(frmtstr,outm)] 

  %  YOU MAY WANT TO FIDDLE WITH SOME DIFFERENT VALUES OF nobs AND/OR
  %  JUST RUNNING THIS A FEW TIMES TO SEE HOW THE ANSWER CHANGES .


elseif ipar == 7 ;    %  THEN DO PART 4 OF "NONUNIF RANDOM NUMBERS"

  %    THE "PROBABILITY TRANSFORM" METHOD HAS AN IMPORTANT LIMITATION:
  %    YOU NEED TO KNOW THE INVERSE C.D.F.  F^(-1),  AND IT MUST BE
  %    CONVENIENT (AND FAST) TO COMPUTE.  SINCE THIS IS OFTEN NOT THE
  %    CASE, MUCH WORK HAS BEEN DONE TO DEVELOP ALTERNATIVE METHODS.
  %    THERE ARE MANY OTHER WAYS TO USE UNIF(0,1) R.V.'S TO GENERATE
  %    OTHER DISTRIBUTIONS.  
  %  
  %    AN EXAMPLE IS THE GENERATION OF POISSON R.V.'S.  NOTE THAT
  %    THE C.D.F. IS DISCRETE, BUT HAS INFINITELY MANY BRANCHES,
  %    WHICH ARE TRICKY TO WORK WITH, AND COULD INVOLVE SOME RATHER
  %    COMPLICATED LOOPING.  AND ALTERNATIVE IS TO USE THE FACT THAT
  %    THE POISSON PROCESS HAS EXPONENTIAL WAITING TIMES, AND IS 
  %    THUS STRAIGHTFORWARD TO SIMULATE.  BUT THE "INCREMENTS" OF
  %    THIS PROCESS ARE JUST INDEPENDENT POISSON VARIABLES.

  nobs = 1000 ;
          %  NUMBER OF UNDERLYING POISSONS DESIRED
  enobs = nobs ;
          %  SINCE THE EXPON(1) DISTRIBUTION HAS MEAN 1, THIS WILL
          %  BE THE RIGHT NUMBER OF EXPONENTIALS ON AVERAGE.  SINCE
          %  THE EXPON(1) DISTRIBUTION HAS VARIANCE 1, A BETTER
          %  CHOICE IN REAL APPLICATIONS MIGHT BE SOMETHING LIKE:
          %  enobs = nobs + 2 * sqrt(nobs) ;
          %       I.E. MEAN + 2 STANDARD DEVIATIONS
          %  BUT THE ABOVE GIVES YOU A GOOD CHANCE OF SEEING
          %  SOME "UPDATING" ACTUALLY HAPPEN          

  edata = rand(enobs,1) ;
  edata = -log(1 - edata) ; 
          %  I.I.D. EXPON(1)'S AS ABOVE
  ppevents = cumsum(edata) ;
          %  COMBINE EXPONENTIAL WAITING TIMES TO BECOME "EVENTS" OF
          %  A HOMOGENEOUS POISSON PROCESS

  %    IF THE DATA VECTOR OF "EVENTS" IS NOT LONG ENOUGH, THAN ADD 
  %    SOME MORE, WITH MORE EXPONENTIALS.
  while max(ppevents) < nobs ;
    note = '    adding more exponentials'
          %  GOOD TO SEE THIS ON THE SCREEN WHEN IT HAPPENS
  edata = rand(2*sqrt(enobs),1) ;
          %  NUMBER OF OBSERVATIONS IS 2 STANDARD DEVIATIONS.
  edata = -log(1 - edata) ; 
          %  GENERATE MORE EXPONENTIALS, AS ABOVE
  ppevents = [ppevents; cumsum([max(ppevents); edata])] ;    
          %  UPDATE CUMULATIVE SUM
  end ;

  ppevents = ppevents(find(ppevents < nobs)) ;
          %  KEEP ONLY THOSE POISSON EVENTS SMALLER THAN nobs

%  temp = 5 * sort(rand(1,7)) 
%  [counts,bincents] = hist(temp,.5:1:3.5)
          %  I USED THESE LINES TO FIGURE OUT HOW TO USE hist TO GIVE
          %  A VECTOR OF COUNTS.  YOU MIGHT TRY RUNNING THESE A FEW
          %  TIMES.

  [pdata,bincents] = hist(ppevents,.5:1:(nobs-.5)) ;

  %    FINALLY TEST EMPIRICAL VS. THEORETICAL PROBS USING A
  %    CHI SQUARE TEST OF GOODNESS OF FIT.

  tprobs = [1 1 (1/2) (1/6)] / exp(1) ;
          %  POISSON(1) PROBS OF 0,1,2,3
  tprobs = [tprobs (1 - sum(tprobs))] ;
          %  TACK ON POISSON(1) PROB OF  x > 3 ;  
  expcts = nobs * tprobs ;
          %  EXPECTED COUNTS FOR EACH "CELL"
  [obscts,bincents] = hist(pdata,0:1:4) ;
          %  OBSERVED COUNTS FOR EACH "CELL"
  chisq = sum((obscts - expcts).^2 ./ obscts) ;
          %  PEARSON'S CHI SQUARE STATISTIC
  pvalue = 1 - gamcdf(chisq,4/2,1/2) ;
          %  gamcdf IS A FUNCTION THAT I WROTE, TO BE DISCUSSED
          %  NEXT TIME.

  note = 'The p-value for rejecting the Poisson(1) dist''n is:'
  pvalue 

  %    THIS OUTPUT SHOUD BEHAVE LIKE A UNIF(0,1) R.V.  RUN THIS
  %    A FEW TIMES AND SEE IF YOU AGREE.


end ;
