|
/*Note that this code is a definite increase in complexity over what
we have worked
with in previous recitations
-- It is useful though, so make an
effort to
understand it*/
/*Problem 3.72*/
/* A simple way to simulate 100 coin tosses*/
data binomial;
p=ranbin(0,100,.5);
run;
/*tells you how many events occurred -- here events are heads
--should expect half to be heads*/
proc print data=binomial;
run;
/* A simple way to simulate 100 sets of 100 coin tosses*/
data binomial;
do i=1 to 100;
p=ranbin(0,100,.5);
output;
end;
run;
/*Mean should be half the number of tosses*/
proc means data=binomial;
var p;
run;
/*Mean should be half the number of tosses -- remember, this gives
you a stem plot*/
proc univariate data=binomial plot;
var p;
run;
/*Problem 4.6 -- this is actually a silly question -- remember,
probability is a relative frequency*/
/*so the count is not exactly the prob -- prob= count / (total # of trials)*/
/*Given that information, you should know what to expect.*/
data binomial;
p=ranbin(0,40,.5);
run;
proc print data=binomial;
run;
data binomial;
p=ranbin(0,120,.5);
run;
proc print data=binomial;
run;
/* The triangular distribution problem -- 4.56*/
data uniform;
/*Remember folks, area of a triangle=1/2(base*height)*/
/*The triangular distribution of the sum is best approximated by a
normal distribution*/
/*Note the variance of a uniform(0,1) variable is 1/12 */
/*The mean of this distribution is 1/2 of the base length of the triangle,while the variance is the sum of the 2
variances (.16)*/
do i=1 to 10000;
a=ranuni(0);
b=ranuni(0);
c=a+b;
norm=1+rannor(0)*.4;
output;
end;
run;
proc univariate data=uniform plot;
title 'Distribution of
Functions of Uniform random variables';
histogram /cfill=salmon
barwidth=6 midpoints=0 to 2 by 0.08;
var a b c norm;
run;
data cdf;
/* The CDF function gives you the cumulative frequency up to a point
in a distribution -- helps with the last problem*/
/*The 1,1,.16 arguments in the CDF function give the CDF up to a
value of 1 in the distribution that is N(1,.16)*/
/*Why would I evaluate a Normal distribution that is N(1,.16)?*/
z=cdf('normal',1,1,.16);
run;
proc print data=cdf;
var z;
run;
/*Proving that the Normal distribution approximates the binomial
distribution with large N and large P -- and some CLT*/
%macro CLT(n=);
%do i=1 %to &n.;
ods listing close;
filename foo dummy 'foo';
proc printto log=foo;
RUN;
QUIT;
data binomial;
do i=1 to 1000;
binprob=ranbin(0,1000,.8);
normprob=800+(rannor(0)*12.649);
output;
end;
run;
proc means data=binomial;
var normprob;
run;
proc kde data=Binomial out=density_bin;
var binprob;
run;
data density_bin;
set density_bin;
densitybin=density;
id=_n_;
keep id binprob densitybin;
run;
proc kde data=Binomial out=density_norm;
var normprob;
run;
data density_norm;
set density_norm;
densitynorm=density;
id=_n_;
keep id normprob densitynorm;
run;
proc means data=Binomial;
var binprob;
output out=means mean=mu std=sigma;
run;
proc print data=means;
run;
%if &i=1 %then %do;
data m_plot;
set means;
run;
%end;
%else %do;
data m_plot;
set m_plot means;
run;
%end;
%end;
proc kde data=m_plot
out=density_mu;
var mu;
run;
data density_mu;
set density_mu;
id=_n_;
run;
data mgd;
merge density_bin density_norm;
by id;
run;
ods listing;
proc printto log=log;
run;
goptions;
symbol1 color=salmon interpol=join w=5;
symbol2 color=steel interpol=join w=5;
title 'Binomial (NP, NPQ) Distribution versus Normal(NP, NPQ) Distribution, N=1k, p=.8';
PROC GPLOT DATA=mgd;
PLOT densitybin*binprob densitynorm*normprob /OVERLAY
NOFRAME
;
RUN;
goptions;
symbol1 color=steel interpol=join w=5;
title 'CLT Results, N=1k, p=.8';
PROC GPLOT DATA=density_mu;
PLOT density*mu/OVERLAY
NOFRAME
;
RUN;
%mend;
%CLT(N=1000);
/*Proving that the Normal distribution DOES NOT approximate the
binomial distribution with small N and small P -- and some CLT*/
%macro CLT(n=);
%do i=1 %to &n.;
ods listing close;
filename foo dummy 'foo';
proc printto log=foo;
RUN;
QUIT;
data binomial;
do i=1 to 1000;
binprob=ranbin(0,10,.1);
normprob=1+(rannor(0)*.94868);
output;
end;
run;
proc means data=binomial;
var normprob;
run;
proc kde data=Binomial out=density_bin;
var binprob;
run;
data density_bin;
set density_bin;
densitybin=density;
id=_n_;
keep id binprob densitybin;
run;
proc kde data=Binomial out=density_norm;
var normprob;
run;
data density_norm;
set density_norm;
densitynorm=density;
id=_n_;
keep id normprob densitynorm;
run;
proc means data=Binomial;
var binprob;
output out=means mean=mu std=sigma;
run;
proc print data=means;
run;
%if &i=1 %then %do;
data m_plot;
set means;
run;
%end;
%else %do;
data m_plot;
set m_plot means;
run;
%end;
%end;
proc kde data=m_plot
out=density_mu;
var mu;
run;
data density_mu;
set density_mu;
id=_n_;
run;
data mgd;
merge density_bin density_norm;
by id;
run;
ods listing;
proc printto log=log;
run;
goptions;
symbol1 color=salmon interpol=join w=5;
symbol2 color=steel interpol=join w=5;
title 'Binomial (NP, NPQ) Distribution versus Normal(NP, NPQ) Distribution, N=10, p=.1';
PROC GPLOT DATA=mgd;
PLOT densitybin*binprob densitynorm*normprob /OVERLAY
NOFRAME
;
RUN;
goptions;
symbol1 color=steel interpol=join w=5;
title 'CLT Results, N=10, p=.1';
PROC GPLOT DATA=density_mu;
PLOT density*mu/OVERLAY
NOFRAME
;
RUN;
%mend;
%CLT(N=1000);
/* Other distributions CLT examples -- Amazing! */
/*Poisson distribution -- a
distribution for counts –
like the telephone call data on day1*/
%macro CLT(n=);
%do i=1 %to &n.;
ods listing close;
filename foo dummy 'foo';
proc printto log=foo;
RUN;
QUIT;
data Poisson;
do i=1 to 1000;
prob=ranpoi(0,.5);
output;
end;
run;
proc kde data=Poisson out=density_poi;
var prob;
run;
data density_poi;
set density_poi;
densitypoi=density;
id=_n_;
keep id prob densitypoi;
run;
proc means data=Poisson;
var prob;
output out=means mean=mu std=sigma;
run;
proc print data=means;
run;
%if &i=1 %then %do;
data m_plot;
set means;
run;
%end;
%else %do;
data m_plot;
set m_plot means;
run;
%end;
%end;
proc kde data=m_plot
out=density_mu;
var mu;
run;
data density_mu;
|