Lecture 15 (lab 4) —February, January 7, 2006

What was covered?

R functions and commands demonstrated

R function options

R packages used

Creating a negative binomial loglikelihood function

> #generate data
> num.stems<-c(6,8,9,6,6,2,5,3,1,4)
> aphids<-0:9
> aphid.data<-rep(aphids,num.stems)

> #negative binomial loglikelihood
> NBfunc1<-function(mu,theta) sum(log(dnbinom(aphid.data,mu=mu,size=theta)))

As you can see the only difference from the Poisson function is that we need to specify two arguments, the mean and the dispersion parameter.

> #alternative vector version
> NBvec.pos<-function(p) sum(log(dnbinom(aphid.data,mu=p[1],size=p[2])))

> #first function
> NBfunc1(3,4)
[1] -116.1379
> #incorrect use of 2nd function with two separate arguments
> NBvec.pos(3,4)
Error in NBvec.pos(3, 4) : unused argument(s) ( ...)
> #correct usage in which arguments are entered as a vector
> NBvec.pos(c(3,4))
[1] -116.1379

> #negative loglikelihood for nlm
> NBvec.neg<-function(p) -NBvec.pos(p)

Graphing the negative binomial loglikelihood

Two-dimensional profiles

Fig. 1 Profile likelihoods for θ

> plot(seq(0.1,10,.1),sapply(seq(0.1,10,.1), function(x) NBvec.pos(c(3.46,x))), xlab=expression(theta), type='l', ylab='loglikelihood')
> lines(seq(0.1,10,.1), sapply(seq(0.1,10,.1),function(x) NBvec.pos(c(4.5,x))), col=2)
> lines(seq(0.1,10,.1), sapply(seq(0.1,10,.1),function(x) NBvec.pos(c(3,x))), col=4)
> lines(seq(0.1,10,.1), sapply(seq(0.1,10,.1),function(x) NBvec.pos(c(2.5,x))), col=3)
> legend(6.5,-130, c(expression(mu==3.46), expression(mu==4.5), expression(mu==3), expression(mu==2.5)),
col=c(1,2,4,3),lty=rep(1,4),bty='n')

function(x) NBvec.pos(c(3.46,x)))

What this construction does is to temporarily turn what is a two-parameter function into a one-parameter function. It permits me to specify a value for one of the parameters, μ, while allowing the other parameter, θ, to remain free.

Three-dimensional surface plots

> #Generating points in the plane
> expand.grid(1:3,8:11)

   Var1 Var2
1     1    8
2     2    8
3     3    8
4     1    9
5     2    9
6     3    9
7     1   10
8     2   10
9     3   10
10    1   11
11    2   11
12    3   11

Observe that the elements of the first vector are cycled through first.

> #create the z-coordinate
> test2<-function(x,y) 2*x+y
> expand.grid(1:3,8:11)->yuk
> test2(yuk[,1],yuk[,2])
 [1] 10 12 14 11 13 15 12 14 16 13 15 17

> #another way of doing it with vector argument and apply
> apply(yuk,1,function(x) 2*x[1]+x[2])
 1  2  3  4  5  6  7  8  9 10 11 12
10 12 14 11 13 15 12 14 16 13 15 17

> #create x-y grid for plotting loglikelihood
> g<-expand.grid(mu=seq(2,5,.1),theta=seq(1,4,.1))
> #examine first ten values
> g[1:10,]

    mu theta
1  2.0     1
2  2.1     1
3  2.2     1
4  2.3     1
5  2.4     1
6  2.5     1
7  2.6     1
8  2.7     1
9  2.8     1
10 2.9     1

> #create and append the z-coordinate, the loglikelihood
g$z<-apply(g,1,NBvec.pos)
dim(g)
[1] 961   3

The dim function returns the dimensions of our matrix. We see that there are 961 rows and 3 columns.

Method 1: Using the persp function

> #3-dimensional surface graph. Look at help on persp
>?persp

Fig. 2 Portions of persp help screen

> #we construct a matrix of z-values to match the grid
> z.matrix<-matrix(g$z,nrow=length(seq(2,5,.1)))

Fig. 3  3-D plot using persp

> persp(seq(2,5,.1), seq(1,4,.1), z.matrix, xlab="mu", ylab="theta", zlab='loglikelihood')

> #add ticks using ticktype option
> persp(seq(2,5,.1), seq(1,4,.1), z.matrix, xlab="mu",
ylab="theta", zlab='loglikelihood', ticktype='detailed'

> #change viewing position
> persp(seq(2,5,.1),seq(1,4,.1), z.matrix,xlab="mu", ylab="theta", zlab='loglikelihood', ticktype='detailed', theta=30, phi=30)

Method 2: Using lattice graphics

> library(lattice)

> #wireframe just takes ordinary vectors
> wireframe(z~mu*theta,data=g)

> #change background to transparent for current plot only
> wireframe(z~mu*theta, g, par.settings=list(background=list(col="transparent")))

> #get current background settings
> back.color<-trellis.par.get("background")
> back.color
$alpha
[1] 1

$col
[1] "#909090"

> #change color to transparent
> back.color$col <-"transparent"
> #set new background settings
> trellis.par.set("background",back.color)

To see all the lattice graphic settings issue specify trellis.par.get(), i.e., the function with no arguments. Be warned that what's displayed is a very long list.

> #can also get different viewpoint
> wireframe(z~mu*theta, data=g, screen = list(z = 20, x = -70, y = 3)

> #add tick marks and nice labels
> wireframe(z~mu*theta, data=g, xlab=expression(mu), ylab=expression(theta), zlab='loglikelihood', scales = list(arrows = FALSE))

Fig. 4  3-D plot using wireframe

> #split z-axis name
> wireframe(z~mu*theta, data=g, xlab=expression(mu), ylab=expression(theta), zlab="log\nlikelihood", scales = list(arrows = FALSE))

> #add color with drape argument
> wireframe(z~mu*theta, data=g, xlab=expression(mu), ylab=expression(theta), zlab="log\nlikelihood", scales = list(arrows = FALSE), drape=TRUE)

> wireframe(z~mu*theta, data=g, xlab=expression(mu), ylab=expression(theta), zlab="log\nlikelihood", scales = list(arrows = FALSE), drape=TRUE, par.settings=list(par.zlab.text=list(cex=.75), axis.text=list(cex=.75)))

Method 3: Contour plots

Fig. 5 Contour plot

> contour(seq(2,5,.1),seq(1,4,.1), z.matrix, xlab=expression(mu), ylab=expression(theta))
> #increase the number of contours
> contour(seq(2,5,.1), seq(1,4,.1), z.matrix, xlab=expression(mu), ylab=expression(theta), nlevels=30)

Obtaining the MLEs

> nlm(NBvec.neg,c(3,4),hessian=TRUE)
$minimum
[1] 114.7009
$estimate
[1] 3.459995 2.645024
$gradient
[1] -2.252383e-05  1.319529e-05
$hessian
             [,1]         [,2]
[1,] 6.2589599224 0.0002248452
[2,] 0.0002248452 0.9535298199
$code
[1] 1
$iterations
[1] 8

Wald confidence intervals

> #save results and hessian
> nlm(NBvec.neg,c(3,4),hessian=TRUE)->out
> #invert the hessian to obtain the covariance matrix
> solve(out$hessian)
              [,1]          [,2]
[1,]  1.597710e-01 -3.767448e-05
[2,] -3.767448e-05  1.048735e+00

> diag(solve(out$hessian))
[1] 0.1597710 1.0487360

> out$estimate
[1] 3.459995 2.645024

> names(out$estimate)<-c('mu','theta')
> low<-out$estimate-qnorm(.975)*sqrt(diag(solve(out$hessian)))
> high<-out$estimate+qnorm(.975)*sqrt(diag(solve(out$hessian)))
> cbind(low,high)->wald
> wald
            low     high
   mu 2.6765704 4.243419
theta 0.6378678 4.652180

Profile likelihood confidence intervals

Fig. 6   95% profile confidence contours

> #extend graph and create new z-values
> g<-expand.grid(mu=seq(2,5,.1), theta=seq(1,9,.1))
> g$z<-apply(g,1,NBvec.pos)
> z.matrix<-matrix(g$z, nrow=length(seq(2,5,.1)))
> #limits for marginal profile likelihood CI
> lower.limit1<- -out$minimum-.5*qchisq(.95,1)
> #add limit for joint CI
> lower.limit2<- -out$minimum-.5*qchisq(.95,2)
> contour(seq(2,5,.1), seq(1,9,.1), z.matrix, xlab=expression(mu), ylab=expression(theta), levels=c(lower.limit2, lower.limit1), lty=c(3,1), col=c(4,1))
> points(out$estimate[1], out$estimate[2], pch=16, col=2, cex=1.5)
> legend(2, 8.5, c('95% joint CI', '95% marginal CI'), lty=c(3,1), col=c(4,1), bty='n', cex=c(.8,.8))

Fig. 7   95% profile confidence intervals

> library(Bhat)

> x.in<-list(label=c('mu', 'theta'), est=out$estimate, low=c(2.5,1), high=c(4.8,9))
> plkhci(x.in, NBvec.neg,'mu')->profile.mu
> plkhci(x.in, NBvec.neg,'theta')->profile.theta
> rbind(profile.mu, profile.theta)->profile.ci
> colnames(profile.ci)<-c('low', 'high')
> profile.ci
                   low     high
   profile.mu 2.748964 4.371168
profile.theta 1.321214 6.465232

> #show boundaries of marginal CI
> abline(v=c(profile.mu),col=3,lty=2)
> abline(h=c(profile.theta),col=2,lty=2)

Goodness of fit

> dnbinom(0:9,mu=out$estimate[1],size=out$estimate[2])
[1] 0.10943985 0.16405655 0.16945422 0.14869883 0.11893284 0.08958118
[7] 0.06468935 0.045278220.03093792 0.02073880

> dnbinom(0:9,mu=out$estimate[1],size=out$estimate[2])*50
[1] 5.471993 8.202827 8.472711 7.434941 5.946642 4.479059 3.234468 2.263911
[9] 1.546896 1.036940

> #anything left over?
> sum(dnbinom(0:9,mu=out$estimate[1], size=out$estimate[2])*50)
[1] 48.09039

> expprobs<-c(dnbinom(0:8,mu=out$estimate[1], size=out$estimate[2]), 1-pnbinom(8, mu=out$estimate[1], size=out$estimate[2]))
> expprobs*50->Ei
> table(aphid.data)->Oi

> names(Oi)[length(Oi)]<-"9+"

Fig. 8  Fit of Poisson and negative binomial

> coords<-barplot(Oi, ylim=c(0,11))
> points(coords,Ei,col=2, pch=16, cex=1.2)
> lines(coords,Ei,col=2)
> #add poisson
> poisprobs<-c(dpois(0:8, lambda=3.46), 1-ppois(8, 3.46))
> poisprobs*50->ei.pois
> ei.pois
> points(coords, ei.pois, col=4, pch=15, cex=1.2)
> lines(coords, ei.pois, col=4)
> legend(coords[7],11, c('Poisson', 'negative binomial'), col=c(4,2), lty=c(1,1), pch=c(15,16), bty='n', cex=c(.9,.9))
> box()

> Ei
[1] 5.471993 8.202827 8.472711 7.434941 5.946642 4.479059 3.234468
[8] 2.263911 1.546896 2.946552

> Ei.merge<-c(Ei[1:5],sum(Ei[6:7]),sum(Ei[8:10]))
> Ei.merge
[1] 5.471993 8.202827 8.472711 7.434941 5.946642 7.713526 6.757360

> Oi.merge<-c(Oi[1:5],sum(Oi[6:7]),sum(Oi[8:10]))
> Pearson<-sum((Oi.merge-Ei.merge)^2/Ei.merge)
> Pearson
[1] 0.6607193
> Pearson.p<-1-pchisq(Pearson,length(Oi.merge)-1-2)
> Pearson.p
[1] 0.9560832

> #G2 test
> G2<-2*sum(Oi.merge*log(Oi.merge/Ei.merge))
> G2
[1] 0.6675889

> qchisq(.95,length(Oi.merge)-1-2)
[1] 9.487729

> length(Oi.merge)-1-2
[1] 4

> Pearson<-sum((Oi-Ei)^2/Ei)
> Pearson.p<-1-pchisq(Pearson,length(Oi)-1-2)
> Pearson
[1] 3.511331
> Pearson.p
[1] 0.8340238

> chisq.test(Oi,p=expprobs,simulate.p.value=TRUE)

Chi-squared test for given probabilities with simulated p-value
(based on 2000 replicates)

data: Oi
X-squared = 3.5113, df = NA, p-value = 0.9455

> new.oi<-c(Oi,0)
> new.expprob<-c(dnbinom(0:9,mu=out$estimate[1],
size=out$estimate[2]),1-pnbinom(9,mu=out$estimate[1],
size=out$estimate[2]))
> new.expprob
[1] 0.10943985 0.16405655 0.16945422 0.14869883 0.11893284 0.08958118
[7] 0.06468935 0.04527822 0.03093792 0.02073880 0.03819224

> new.expprob*50
[1] 5.471993 8.202827 8.472711 7.434941 5.946642 4.479059 3.234468
[8] 2.263911 1.546896 1.036940 1.909612

> chisq.test(new.oi,p=new.expprob,simulate.p.value=TRUE,B=2000)

Chi-squared test for given probabilities with simulated p-value
(based on 2000 replicates)

data: new.oi
X-squared = 13.5113, df = NA, p-value = 0.1844

Cited References

Course Home Page


Jack Weiss
Phone: (919) 962-5930
E-Mail: jack_weiss@unc.edu
Address: Curriculum in Ecology, Box 3275, University of North Carolina, Chapel Hill, 27516
Copyright © 2006
Last Revised--Sept 21, 2007
URL: http://www.unc.edu/courses/2006spring/ecol/145/001/docs/lectures/lecture15.htm