Lecture 19 (lab 5) —February 14, 2006

What was covered?

R functions and commands demonstrated

R function options

The Crawley slug data set

> #obtain data
> slugs<-read.table( 'http://www.bio.ic.ac.uk/research/mjcraw/statcomp/data/slugsurvey.txt', header=TRUE)
> names(slugs)
[1] "slugs" "field"

> table(slugs$slugs,slugs$field)

  Nursery Rookery
0      25       9
1       5       9
2       2       8
3       2       5
4       2       2
5       1       4
6       1       1
7       1       0
8       0       1
9       0       1
10      1       0

> table(slugs$slugs,slugs$field)->out
> barplot(out)

> barplot(t(out), beside=TRUE, angle=c(45,135), density=c(20,20), col=c('black','red'), legend.text=TRUE, xlab='# of slugs', ylab='frequency')

Fig. 1a  Stacked bars
Fig. 1b  Side-by-side bar charts

> coords<-barplot(t(out), beside=TRUE, angle=c(45,135), density=c(20,20), ylim=c(0,27), col=c('black','red'), xlab='# of slugs', ylab='frequency')
> box()
> legend(coords[1,8], 26, c('nursery','rookery'), density=c(20,20), angle=c(45,135), fill=c('black','red'), cex=c(.8,.8),bty='n')

Poisson models

Common means model

> poi.1<-function(data,p) -sum(log(dpois(data$slugs,lambda=p)))

#initial estimate
> mean(slugs$slugs)
[1] 1.775

> nlm(function(p) poi.1(slugs,p),2)->out1
> out1
$minimum
[1] 176.8383

$estimate
[1] 1.774999

$gradient
[1] 1.07282e-06

$code
[1] 1

$iterations
[1] 5

Separate means Poisson model

> as.numeric(slugs$field)
 [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[34] 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[67] 2 2 2 2 2 2 2 2 2 2 2 2 2 2

> poi.2<-function(data,p) {
field.dummy<-as.numeric(data$field)-1
mylambda<-p[1]+p[2]*field.dummy
negloglike<- -sum(log(dpois(data$slugs,lambda=mylambda)))
negloglike
}

mylambda = p[1] + p[2]*0 = p[1]

mylambda = p[1] + p[2]*1 = p[1] + p[2]

> tapply(slugs$slugs,slugs$field,mean)
Nursery Rookery
  1.275   2.275

> nlm(function(p) poi.2(slugs,p),c(1.2,1))->out2

> out2
$minimum
[1] 171.1275

$estimate
[1] 1.2749997 0.9999997

$gradient
[1] 1.114577e-05 -1.477929e-06

$code
[1] 1

$iterations
[1] 8

> out1$minimum
[1] 176.8383
> out2$minimum
[1] 171.1275

> my.aic<-function(output) -2*(-output$minimum) + 2*length(output$estimate)

> my.aic(out1)
[1] 355.6766
> my.aic(out2)
[1] 346.2551

Negative binomial models

ZIP models

  1. We can let θ and λ be the same in the two field types.
  2. We can let λ differ in the two field types but estimate a common θ.
  3. We can let θ differ in the two field types but estimate a common λ.
  4. We can estimate separate values of λ and θ in the two field types.

I consider each of these in turn.

Common mean and zero fraction ZIP model

> #common lambda and theta
> zip1<-function(data,p) {
lambda<-p[1]
theta<-p[2]
zero.term<-sum(log(theta+(1-theta)*
  dpois(data$slugs[data$slugs==0], lambda)))
nonzero.term<-sum(log((1- theta)* dpois(data$slugs[data$slugs>0],
  lambda)))
negloglike<- -(zero.term+nonzero.term)
negloglike
}

> mean(slugs$slugs[slugs$slugs>0])
[1] 3.086957
> table(slugs$slugs)[1]/sum(table(slugs$slugs))
    0
0.425

nlm(function(p) zip1(slugs,p),c(3,.4))->out7

> out7
$minimum
[1] 150.4711

$estimate
[1] 2.920557 0.392239

$gradient
[1] 2.530218e-07 5.400125e-07

$code
[1] 1

$iterations
[1] 5

Both components are approximately zero as they should be. From the estimate of θ we see that most of the zeros, 0.392/0.425 or 92% are ascribed to come from a separate distribution and only a small percentage are estimated to come from the Poisson distribution.

> my.aic(out7)
[1] 304.9422

Separate means and common zero fraction ZIP model

> # different lambda, same theta
> zip2<-function(data,p) {
field.dummy<-as.numeric(slugs$field)-1
mylambda<-p[1]+p[3]*field.dummy
theta<-p[2]
zero.term<-sum(ifelse(data$slugs==0,log(theta+(1-theta)*
  dpois(data$slugs,lambda=mylambda)),0))
nonzero.term<-sum(ifelse(data$slugs>0,log((1-theta)*
  dpois(data$slugs,lambda=mylambda)),0))
negloglike<- -(zero.term+nonzero.term)
negloglike
}

  1. The first argument specifies a condition to test. This is a Boolean expression. In my first use of ifelse the condition I test is data$slugs==0. So I'm testing if the slug count is zero or not.
  2. The second argument is the calculation to carry out and return if the condition is TRUE. Here I specify the corresponding formula for the ZIP model to use when the count is zero.
  3. The third argument is the value to return when the condition is FALSE. I choose to return the value of 0 when the condition is FALSE. Since I end up summing the values returned by the ifelse function, a return value of zero will contribute nothing to the sum.

I then do a similar set of calculations for the nonzero terms.

negloglike<- -sum(ifelse(data$slugs==0,log(theta+(1-theta)*
dpois(data$slugs,lambda=mylambda)),log((1-theta)*
dpois(data$slugs,lambda=mylambda))))

Thus in this version if the slug count is zero we calculate the loglikelihood term for zero counts. If the count is not zero we calculate the loglikelihood term for nonzero counts. The full function would be the following.

zip2.alt<-function(data,p) {
field.dummy<-as.numeric(slugs$field)-1
mylambda<-p[1]+p[3]*field.dummy
theta<-p[2]
negloglike<- -sum(ifelse(data$slugs==0,log(theta+(1-theta)*
   dpois(data$slugs,lambda=mylambda)),log((1-theta)*
   dpois(data$slugs,lambda=mylambda))))
negloglike
}

> tapply(slugs$slugs[slugs$slugs>0],slugs$field[slugs$slugs>0],mean)
Nursery Rookery
3.400000 2.935484

Based on this output we should estimate p[1] to be 3.4 and p[3] to be about –0.4, the rookery mean minus the nursery mean. I fit the model using these initial values and 0.42 as the initial guess for the zero fraction.

> nlm(function(p) zip2(slugs,p),c(3.4,.42,-.4))->out8
> out8
$minimum
[1] 150.4209

$estimate
[1] 3.0578696 0.3950236 -0.1890004

$gradient
[1] 9.164487e-06 2.498268e-05 6.508571e-06

$code
[1] 1

$iterations
[1] 15

Separate zero fraction and common mean ZIP model

> #different theta
> zip3<-function(data,p)
{
field.dummy<-as.numeric(data$field)-1
mylambda<-p[1]
theta<-p[2]+p[3]*field.dummy
zero.term<-sum(ifelse(data$slugs==0,log(theta+(1-theta)*
   dpois(data$slugs,lambda=mylambda)),0))
nonzero.term<-sum(ifelse(data$slugs>0,log((1-theta)*
   dpois(data$slugs,lambda=mylambda)),0))
negloglike<- -(zero.term+nonzero.term)
negloglike
}

> table(slugs$slugs,slugs$field)[1,]/40
Nursery Rookery
0.625 0.225

Thus I can use 0.6 as an estimate of p[2] and –0.4 as an estimate of p[3].

> nlm(function(p) zip3(slugs,p),c(3,.6,-.4))->out9
> out9
$minimum
[1] 143.7118

$estimate
[1] 2.9205557 0.6036338 -0.4227899

$gradient
[1] -1.214505e-05 -9.038104e-06 2.643219e-06

$code
[1] 1

$iterations
[1] 8

From the output we can see that the estimates converged and roughly to the values we anticipated.

Separate zero fraction and separate means ZIP model

Log-transformed normal models

Common mean and variance log-transformed normal model

> norm.neglike<-function(data,p) {
t.y<-log(data$slugs+1)
mu<-p[1]
my.sd<-p[2]
negloglike<- -sum(log(dnorm(t.y,mean=mu,sd=my.sd)))
negloglike
}

> mean(log(slugs$slugs+1))
[1] 0.733247
> sd(log(slugs$slugs+1))
[1] 0.7414184

> nlm(function(p) norm.neglike(slugs,p),c(.73,.74))->out.norm
> out.norm
$minimum
[1] 89.07672

$estimate
[1] 0.7332465 0.7367695

$gradient
[1] -8.100187e-07 2.117417e-06

$code
[1] 1

$iterations
[1] 4

From the output we can conclude the algorithm converged.

In our case, c = 1. The function below calculates the negative loglikelihood using this formula. It takes two arguments: the name of the data set containing the counts and the name of output file from nlm that contains the maximum likelihood estimates.

#calculate negative loglikelihood for AIC
> norm.like<-function(data,out)
{
t.y<-log(data$slugs+1)
mu<-out$estimate[1]
my.sd<-out$estimate[2]
negloglike<- -sum(log(dnorm(t.y,mean=mu,
  sd=my.sd)*1/(data$slugs+1)))
out<-list(negloglike,out$estimate)
names(out)<-c("minimum","estimate")
out
}

The function returns the value of the negative loglikelihood at the MLEs as well as the values of the MLEs. The format of the output was constructed to agree with the output returned by nlm. Using the function I generate the correct negative loglikelihood from which I compute the AIC.

> norm.like(slugs,out.norm)->out20
> my.aic(out20)
[1] 299.4730

Separate mean and common variance log-transformed normal model

> tapply(log(slugs$slugs+1),slugs$field,mean)
Nursery Rookery
0.4967358 0.9697582

norm.neglike2<-function(data,p)
{
t.y<-log(data$slugs+1)
field.dummy<-as.numeric(data$field)-1
mu<-p[1]+field.dummy*p[3]
my.sd<-p[2]
negloglike<- -sum(log(dnorm(t.y,mean=mu,sd=my.sd)))
negloglike
}

> nlm(function(p) norm.neglike2(slugs,p),c(.5,.7,.5))->outnorm2
> outnorm2
$minimum
[1] 84.7266

$estimate
[1] 0.4967359 0.6977763 0.4730215

$gradient
[1] 2.432898e-05 1.456613e-05 -2.644640e-05

$code
[1] 1

$iterations
[1] 6

> norm.like2<-function(data,out)
{
t.y<-log(data$slugs+1)
field.dummy<-as.numeric(data$field)-1
mu<-out$estimate[1]+field.dummy*out$estimate[3]
my.sd<-out$estimate[2]
negloglike<- -sum(log(dnorm(t.y,mean=mu,
  sd=my.sd)*1/(data$slugs+1)))
out<-list(negloglike,out$estimate)
names(out)<-c("minimum","estimate")
out
}

> norm.like2(slugs,outnorm2)->out21
> my.aic(out21)
[1] 292.7727
> my.aic(out9)
[1] 293.4236

Square root-transformed normal models

Carrying out the Burnham & Anderson protocol of model comparison

> model.names<-c('Pois.common','Pois.mean','Zip.common',
'Zip.mean','Zip.theta','lognormal','lognormal.mean')

> models<-list(out1,out2,out7,out8,out9,out20,out21)

> AIC.func<-function(model.list,n,modelnames)
{
output<-NULL
for (i in 1:length(model.list)) {
cur.model<-model.list[[i]]
LL<- -cur.model$minimum
K<-length(cur.model$estimate)
AIC<- -2*LL + 2*K
AICc<-AIC + 2*K*(K+1)/(n-K-1)
output<-rbind(output,c(LL,K,AIC,AICc))
}
colnames(output)<-c('LogL','K','AIC','AICc')
minAICc<-min(output[,"AICc"])
deltai<-output[,"AICc"]-minAICc
rel.like<-exp(-deltai/2)
wi<-round(rel.like/sum(rel.like),3)
out<-data.frame(modelnames,output,deltai,wi)
out
}

> AIC.func(models,80,model.names)
      modelnames      LogL K      AIC     AICc     deltai    wi
1    Pois.common -176.8383 1 355.6766 355.7279 62.6393865 0.000
2      Pois.mean -171.1275 2 346.2551 346.4109 53.3223890 0.000
3     Zip.common -150.4711 2 304.9422 305.0980 12.0095342 0.001
4       Zip.mean -150.4209 3 306.8417 307.1575 14.0689934 0.000
5      Zip.theta -143.7118 3 293.4236 293.7394  0.6509085 0.410
6      lognormal -147.7365 2 299.4730 299.6288  6.5402901 0.022
7 lognormal.mean -143.3864 3 292.7727 293.0885  0.0000000 0.567

Cited References

Course Home Page


Jack Weiss
Phone: (919) 962-5930
E-Mail: jack_weiss@unc.edu
Address: Curriculum in Ecology, Box 3275, University of North Carolina, Chapel Hill, 27516
Copyright © 2006
Last Revised--Feb 18, 2006
URL: http://www.unc.edu/courses/2006spring/ecol/145/001/docs/lectures/lecture19.htm