9-29-2006

SAS Syntax

 

/*Note that this code is a definite increase in complexity over what we have worked

  with in previous recitations --  It is useful though, so make an effort to

  understand it*/

 

 

/*Problem 3.72*/

 

/* A simple way to simulate 100 coin tosses*/

data binomial;

      p=ranbin(0,100,.5);

run;

/*tells you how many events occurred -- here events are heads --should expect half to be heads*/

proc print data=binomial;

run;

/* A simple way to simulate 100 sets of 100 coin tosses*/

data binomial;

      do i=1 to 100;

            p=ranbin(0,100,.5);

      output;

  end;

run;

/*Mean should be half the number of tosses*/

proc means data=binomial;

var p;

run;

/*Mean should be half the number of tosses -- remember, this gives you a stem plot*/

proc univariate data=binomial plot;

var p;

run;

 

/*Problem 4.6 -- this is actually a silly question -- remember, probability is a relative frequency*/

/*so the count is not exactly the prob -- prob= count / (total # of trials)*/

/*Given that information, you should know what to expect.*/

data binomial;

      p=ranbin(0,40,.5);

run;

proc print data=binomial;

run;

 

data binomial;

      p=ranbin(0,120,.5);

run;

proc print data=binomial;

run;

 

 

/* The triangular distribution problem -- 4.56*/

data uniform;

/*Remember folks, area of a triangle=1/2(base*height)*/

/*The triangular distribution of the sum is best approximated by a normal distribution*/

/*Note the variance of a uniform(0,1) variable is 1/12 */

/*The mean of this distribution is 1/2 of the base length of the triangle,while the variance is the sum of the 2 variances (.16)*/

  do i=1 to 10000;

            a=ranuni(0);

            b=ranuni(0);

            c=a+b;

            norm=1+rannor(0)*.4;

      output;

 end;

 

run;

proc univariate data=uniform plot;

title 'Distribution of Functions of Uniform random variables';

histogram /cfill=salmon  barwidth=6   midpoints=0 to 2 by 0.08;

var a b c norm;

run;

data cdf;

/* The CDF function gives you the cumulative frequency up to a point in a distribution -- helps with the last problem*/

/*The 1,1,.16 arguments in the CDF function give the CDF up to a value of 1 in the distribution that is N(1,.16)*/

/*Why would I evaluate a Normal distribution that is N(1,.16)?*/

z=cdf('normal',1,1,.16);

run;

proc print data=cdf;

var z;

run;

 

 

 

 

 

 

 

/*Proving that the Normal distribution approximates the binomial distribution with large N and large P -- and some CLT*/

%macro CLT(n=);

%do i=1 %to &n.;

 

ods listing close;

filename foo dummy 'foo';

proc printto log=foo;

RUN;

QUIT;

 

data binomial;

do i=1 to 1000;

binprob=ranbin(0,1000,.8);

normprob=800+(rannor(0)*12.649);

output;

end;

run;

 

proc means data=binomial;

var normprob;

run;

 

 

proc kde data=Binomial out=density_bin;

var binprob;

run;

 

data density_bin;

set density_bin;

densitybin=density;

id=_n_;

keep id binprob densitybin;

run;

 

proc kde data=Binomial out=density_norm;

var normprob;

run;

 

data density_norm;

set density_norm;

densitynorm=density;

id=_n_;

keep id normprob densitynorm;

run;

 

 

 

proc means data=Binomial;

var binprob;

output out=means mean=mu std=sigma;

run;

 

proc print data=means;

run;

 

 

%if &i=1 %then %do;

      data m_plot;

            set means;

            run;

      %end;

      %else %do;

            data m_plot;

                  set m_plot means;

                  run;

            %end;

 

%end;

 

 

 

proc kde data=m_plot out=density_mu;

var mu;

run;

 

data density_mu;

set density_mu;

id=_n_;

run;

 

 

data mgd;

merge density_bin density_norm;

by id;

run;

 

 

ods listing;

proc printto log=log;

run;

 

goptions;

symbol1 color=salmon interpol=join w=5;

symbol2 color=steel interpol=join w=5;

title 'Binomial (NP, NPQ) Distribution versus Normal(NP, NPQ) Distribution, N=1k, p=.8';

PROC GPLOT DATA=mgd;

     PLOT densitybin*binprob densitynorm*normprob /OVERLAY  NOFRAME                                        

 

;

RUN;

 

goptions;

symbol1 color=steel interpol=join w=5;

title 'CLT Results, N=1k, p=.8';

PROC GPLOT DATA=density_mu;

     PLOT density*mu/OVERLAY  NOFRAME                                        

 

;

RUN;

 

%mend;

%CLT(N=1000);

 

 

/*Proving that the Normal distribution DOES NOT approximate the binomial distribution with small N and small P -- and some CLT*/

 

%macro CLT(n=);

%do i=1 %to &n.;

 

ods listing close;

filename foo dummy 'foo';

proc printto log=foo;

RUN;

QUIT;

 

data binomial;

do i=1 to 1000;

binprob=ranbin(0,10,.1);

normprob=1+(rannor(0)*.94868);

output;

end;

run;

 

proc means data=binomial;

var normprob;

run;

 

 

proc kde data=Binomial out=density_bin;

var binprob;

run;

 

data density_bin;

set density_bin;

densitybin=density;

id=_n_;

keep id binprob densitybin;

run;

 

proc kde data=Binomial out=density_norm;

var normprob;

run;

 

data density_norm;

set density_norm;

densitynorm=density;

id=_n_;

keep id normprob densitynorm;

run;

 

 

 

proc means data=Binomial;

var binprob;

output out=means mean=mu std=sigma;

run;

 

proc print data=means;

run;

 

 

%if &i=1 %then %do;

      data m_plot;

            set means;

            run;

      %end;

      %else %do;

            data m_plot;

                  set m_plot means;

                  run;

            %end;

 

%end;

 

 

 

proc kde data=m_plot out=density_mu;

var mu;

run;

 

data density_mu;

set density_mu;

id=_n_;

run;

 

 

data mgd;

merge density_bin density_norm;

by id;

run;

 

 

ods listing;

proc printto log=log;

run;

 

goptions;

symbol1 color=salmon interpol=join w=5;

symbol2 color=steel interpol=join w=5;

title 'Binomial (NP, NPQ) Distribution versus Normal(NP, NPQ) Distribution, N=10, p=.1';

PROC GPLOT DATA=mgd;

     PLOT densitybin*binprob densitynorm*normprob /OVERLAY  NOFRAME                                        

 

;

RUN;

 

goptions;

symbol1 color=steel interpol=join w=5;

title 'CLT Results, N=10, p=.1';

PROC GPLOT DATA=density_mu;

     PLOT density*mu/OVERLAY  NOFRAME                                        

 

;

RUN;

 

%mend;

%CLT(N=1000);

 

 

/* Other distributions CLT examples -- Amazing! */

 

/*Poisson distribution --  a distribution for counts –

like the telephone call data on day1*/

%macro CLT(n=);

%do i=1 %to &n.;

 

ods listing close;

filename foo dummy 'foo';

proc printto log=foo;

RUN;

QUIT;

 

data Poisson;

do i=1 to 1000;

prob=ranpoi(0,.5);

output;

end;

run;

 

 

proc kde data=Poisson out=density_poi;

var prob;

run;

 

data density_poi;

set density_poi;

densitypoi=density;

id=_n_;

keep id prob densitypoi;

run;

 

 

 

proc means data=Poisson;

var prob;

output out=means mean=mu std=sigma;

run;

 

proc print data=means;

run;

 

 

%if &i=1 %then %do;

      data m_plot;

            set means;

            run;

      %end;

      %else %do;

            data m_plot;

                  set m_plot means;

                  run;

            %end;

 

%end;

 

 

 

proc kde data=m_plot out=density_mu;

var mu;

run;

 

data density_mu;