############################################################################# # Bock & Jones Constant Method Saltiness Data: x <- c(.3, .2, .1, -.1, -.2, -.3) N <- c(49, 47, 50, 48, 48, 49) r <- c(48, 38, 31, 13, 3, 2) quartz(width=8, height=8) # note: quartz is specific to Mac OS X; omit on PCs par(cex.axis=2, cex.lab=2, mar=(c(5, 8, 4, 2)+0.1), lab=c(7,3,7)) plot(x, (r/N), xlab="Difference in Salt Concentration", ylab="P('more salty')", col="blue") # for R procedures that work from individual data... responses <- c(rep(1,r[1]), rep(0,(N[1] - r[1])), rep(1,r[2]), rep(0,(N[2] - r[2])), rep(1,r[3]), rep(0,(N[3] - r[3])), rep(1,r[4]), rep(0,(N[4] - r[4])), rep(1,r[5]), rep(0,(N[5] - r[5])), rep(1,r[6]), rep(0,(N[6] - r[6]))) SaltConc <- c(rep(x[1], N[1]), rep(x[2], N[2]), rep(x[3], N[3]), rep(x[4], N[4]), rep(x[5], N[5]), rep(x[6], N[6])) # for comparison-reference for MCMC # for derivative-free ML (R minimizes the negative loglikelihood): ll.probit <- function(params, x, N, r) { alpha <- params[1] beta <- params[2] P <- pnorm(alpha + (beta*x)) l <- (-1)*sum(r*log(P) + ((N-r)*log(1-P))) } ############################################################################# params <- c(0,5) system.time( result <- nlm(f=ll.probit,p=params,hessian=TRUE,x=x,N=N,r=r) ) print(result) SE <- sqrt(diag(solve(result$hessian))) print(SE) ############################################################################# # add the ogive (smoothed this time) to the plot alpha <- result$estimate[1] beta <- result$estimate[2] xplot <- seq(-0.3, 0.3, 0.01) lines(xplot, pnorm(alpha + (beta*xplot)), col="blue") ############################################################################# ### plot likelihood (as contours) for alpha-beta ### x.alpha <- seq((result$estimate[1]-3.5*SE[1]), (result$estimate[1]+3.5*SE[1]), length=100) y.beta <- seq((result$estimate[2]-3.5*SE[2]), (result$estimate[2]+3.5*SE[2]), length=100) z <- matrix(0, nrow=100, ncol=100) for (i in 1:100) { for (j in 1:100) { params <- c(x.alpha[i], y.beta[j]) z[i,j] <- ll.probit(params, x, N, r) } } NMLL<- result$minimum quartz(width=8, height=8) # note: quartz is specific to Mac OS X; omit on PCs par(cex.axis=2, cex.lab=2, cex.main=2, mar=(c(5, 8, 4, 2)+0.1), lab=c(7,7,7)) contour(x.alpha, y.beta, z, levels = c(NMLL + 0.5, NMLL + 1.0, NMLL + 2.0, NMLL + 3.0), labels = c("1", "2", "4", "6"), labcex = 1.5, main = "Difference -2ll, alpha & beta", xlab = "alpha", ylab = "beta") ############################################################################# # compute the mean by 2-dimensional rectangular quadrature MeanAlphaBeta <- c(0,0) SumWts <- 0 for (i in 1:100) { for (j in 1:100) { params <- c(x.alpha[i], y.beta[j]) MeanAlphaBeta <- MeanAlphaBeta + params*z[i,j] SumWts <- SumWts + z[i,j] } } MeanAlphaBeta <- MeanAlphaBeta/SumWts print(MeanAlphaBeta) ############################################################################# # Use the Wash. U. folks' MCMC from the library MCMCpack (downloadable) library(MCMCpack) posterior <- MCMCprobit(responses~SaltConc, thin=5) plot(posterior) MCMC.out <- summary(posterior) MCMC.out ############################################################################# ### plot likelihood (as contours) for alpha-beta, with 2000 points ### NMLL<- result$minimum quartz(width=8, height=8) # note: quartz is specific to Mac OS X; omit on PCs par(cex.axis=2, cex.lab=2, cex.main=2, mar=(c(5, 8, 4, 2)+0.1), lab=c(7,7,7)) contour(x.alpha, y.beta, z, levels = c(NMLL + 0.5, NMLL + 1.0, NMLL + 2.0, NMLL + 3.0), labels = c("1", "2", "4", "6"), labcex = 1.5, main = "Difference -2ll, alpha & beta", xlab = "alpha", ylab = "beta") # 2000 point version: points(as.matrix(posterior)) ############################################################################# ### roll-our-own DA Gibbs MCMC estiamtion for the probit model: DrawZ <- function(alpha, beta, responses, X) { # 1 -> Z ~ N(alpha+beta*X,1) truncated at the left by 0 # 0 -> Z ~ N(alpha+beta*X,1) truncated at the right by 0 randunif <- matrix(runif(length(responses)), ncol = 1, nrow = length(responses)) Z1 <- matrix(0, ncol = 1, nrow = length(responses)) person <- 0 for(p in 1:length(responses)) { person <- person + 1 mu <- alpha + beta*X[person] BB <- pnorm(-mu) if (responses[p] == 0) { tt <- BB*randunif[person] Z1[person] <- qnorm(tt) + mu } else { tt <- (1-BB)*randunif[person] + BB Z1[person] <- qnorm(tt) + mu } } return(Z1) } DrawAlphaBeta <- function(X1, Z) { p <- matrix(0,nrow=2,ncol=1) ones <- matrix(1,nrow=length(X1),ncol=1) X <- matrix(c(ones, X1), nrow=length(X1), ncol=2) XtXinv <- solve(t(X)%*%X) meanparams <- XtXinv %*% t(X) %*% Z newparams <- matrix(mvrnorm(1, mu=meanparams, XtXinv), ncol=1, nrow=2) return(newparams) } ##### NOTE: WITH niter = 10000 THIS WILL TAKE A VERY LONG TIME! ####### ##### use 100 for class, start with the ML estimates ###### alpha <- -0.1660201 beta <- 5.821918 niter <- 1000 MCMCdraws <- matrix(0, nrow=niter, ncol=2) for (iter in 1:niter) { Z <- DrawZ(alpha, beta, responses, SaltConc) AlphaBeta <- DrawAlphaBeta(SaltConc, Z) MCMCdraws[iter,] <- t(AlphaBeta) alpha <- AlphaBeta[1] beta <- AlphaBeta[2] } colnames(MCMCdraws) <- c("alpha", "beta") library(coda) skip <- 5 MCMCout <- mcmc(MCMCdraws, 1, niter, skip) plot(MCMCout) summary(MCMCout) ############################################################################# # plot of Z's (didactic purposes only) quartz(width=10, height=10) # note: quartz is specific to Mac OS X; omit on PCs par(cex.axis=2, cex.lab=2, mar=(c(5, 8, 4, 2)+0.1), lab=c(9,7,7)) plot(0,0,xlab="Difference in Salt Concentration", ylab="Z", xlim=c(-.3,.3), ylim=c(-4,4), col="white") for (i in 1:length(responses)) { if (responses[i] == 1) ptcolor <- "blue" else ptcolor <- "red" points(SaltConc[i], Z[i], col=ptcolor) } lines(x, alpha+beta*x) # hint for the homework: a version of DrawAlphaBeta # that does not use mvrnorm: DrawAlphaBeta <- function(X1, Z) { p <- matrix(0,nrow=2,ncol=1) ones <- matrix(1,nrow=length(X1),ncol=1) X <- matrix(c(ones, X1), nrow=length(X1), ncol=2) XtXinv <- solve(t(X)%*%X) amat <- chol(XtXinv) meanparams <- XtXinv %*% t(X) %*% Z newparams <- t(amat) %*% matrix(rnorm(2),nrow=2,ncol=1)+meanparams return(newparams) }