# probabilty #% What is the probability of getting a bulls eye? #% square target with side = r1 #% round bulls eye radius = r2 #% radii r1 = 44; r2 = 1.78; P = (pi*r2^2)/(r1*r1) ## to get a factorial in R use the gamma function # n! = gamma(n+1) gamma(n+1) ## to get members of the binomial distribution ## you need to calculate n choose k # use the R function choose: choose(n, k) lchoose(n, k) ############ PASSCAL's triangle choose(5, 2) for (n in 0:10) print(choose(n, k = 0:n)) in class we considered the situation: p = (choose(15,3)*choose(30,7))/choose(45,10) ## structures in R ## R-programming - Functions ## discuss how you sample distributions in R: help(rnorm) help(runif) help.search("distribution") Beta Beta Distribution Binomial Binomial Distribution Cauchy Cauchy Distribution Chisquare (non-central) Chi-Squared Distribution Exponential Exponential Distribution FDist F Distribution GammaDist Gamma Distribution Geometric Geometric Distribution Hypergeometric Hypergeometric Distribution Logistic Logistic Distribution Lognormal Log Normal Distribution Multinomial Multinomial Distribution NegBinomial Negative Binomial Distribution Normal Normal Distribution Poisson Poisson Distribution SignRank Distribution of the Wilcoxon Signed Rank Statistic TDist Student t Distribution Tukey Studentized Range Distribution Uniform Uniform Distribution Weibull Weibull Distribution Wilcoxon Distribution of the Wilcoxon Rank Sum Statistic help("poisson", package = "stats") ## dnorm(x, mean=0, sd=1, log = FALSE) ## pnorm(q, mean=0, sd=1, lower.tail = TRUE, log.p = FALSE) ## qnorm(p, mean=0, sd=1, lower.tail = TRUE, log.p = FALSE) ## rnorm(n, mean=0, sd=1) ## dunif(x, min=0, max=1, log = FALSE) ## punif(q, min=0, max=1, lower.tail = TRUE, log.p = FALSE) ## qunif(p, min=0, max=1, lower.tail = TRUE, log.p = FALSE) ## runif(n, min=0, max=1) x = rnorm(1000, mean=20, sd=5) ## view the data: plot(x) hist(x) qqnorm(x) #### ### how do we calculate the quantile points of set of data? z1=qnorm(.05, mean=20, sd=5) z2=qnorm(.95, mean=20, sd=5) ## plot the histogram and then mark the quantiles hist(x) abline(v=c(z1,z2)) ### these mark the 5% and 95% quantiles of the distribution ################################################### hist(x) y = locator(type='p') ## stop by hitting the stop button on the top of the page 100*pnorm(y$x, mean=20, sd=5) ## these are the percentiles of the distribution ### % statistics ### % mean, median, mode, std, var ## get 10 uniformly distributed numbers a = runif(10) plot(a) ## what is the mean? sum(a)/length(a) ## in R this is a function mean(a) ## what about the variance? v = var(a) ## what is the standard deviation? std = sqrt(v) ## what is the median? m = median(a) ## now lets make a function from these mystats<-function(a) { m = mean(a) v = var(a) s = sqrt(v) M = median(a) return(list(m=m, v=v, s=s, M=m)) } # now paste this function in R ## and execute: mystats(a) ####################### create a new file with the function above. save the file in R execute: source("your_file_name") this is how you can create the function in R and save it to a file at the same time. ############################ % Simple plots x = 1:25; y = x*x plot(x,y) plot(x,y,col=2, type='p') plot(x,y,col=2, type='l') #### plotting error bars on scatter plots ####################################### plotCI <- function (x, y = NULL, uiw, liw = uiw, ..., sfrac = 0.01) { if (is.list(x)) { y <- x$y x <- x$x } if (is.null(y)) { if (is.null(x)) stop("both x and y NULL") y <- as.numeric(x) x <- seq(along = x) } ui <- y + uiw li <- y - liw plot(x, y, ylim = range(c(y, ui, li)), ...) smidge <- diff(par("usr")[1:2]) * sfrac segments(x, li, x, ui) x2 <- c(x, x) ul <- c(li, ui) segments(x2 - smidge, ul, x2 + smidge, ul) invisible(list(x = x, y = y)) } ## %%%%%%%%%%%%%% plotCI(x,y,rep(1, length(y) ) plotCI(x,y, 20*rnorm(length(y) ) ) plotCI(x,y, 10+20*rnorm(length(y)), 10+20*rnorm(length(y)) ) up = 10+20*rnorm(length(y)); lo = 10+20*rnorm(length(y)); z = (y-lo) + (up-lo)*rnorm(length(y)) plot(x,z, type= 'l' , col=3) plotCI(x,z, lo, up) grid() ############################# x = rnorm(100000) dx = density(x, kernel ="gaussian") hist(x, freq=FALSE) lines(dx$x, dx$y, col="red") ############################# What is the "mode" of this distribution? max(dx$y) which.max(dx$y) gx = seq(from=-4, to=4, length=1000) mx = 0 sdx = 1 yscale = max(dx$y) gy = ( 1/(sdx*sqrt(2*pi) ))* exp((-.5*( ((gx-mx))/sdx)^2)) lines(gx,gy, col="blue") hist(x, freq=FALSE) lines(dx$x, dx$y, col="red") ############################# lines(gx,gy, col="blue") getgauss = function(n, mx, sdx) { gx = seq(from=-4, to=4, length=n) mx = 0 sdx = 1 yscale = max(dx$y) gy = ( 1/(sdx*sqrt(2*pi) ))* exp((-.5*( ((gx-mx))/sdx)^2)) return(list(x=gx, y=gy)) } ############################# ############################# ############################# ############# central limit theorem ############################# N = 1000 z = rep(0,N) for(i in 1:N) { y = rlnorm(N) z[i] = mean(y) } ### normalize z for plotting purposes z = (z-mean(z))/sd(z) hist(z) dz = density(z, kernel ="gaussian") hist(z, freq=FALSE) lines(dz$x, dz$y, col="red") G = getgauss(1000, mean(z), sqrt(var(z))) ### plot(G$x, G$y, type='l', col="purple") lines( G$x, G$y, col="purple")