Lecture 31 (lab)—Friday, March 30, 2007

What was covered?

R functions and commands demonstrated

R function options

R packages used

The parametric bootstrap

> corals<-read.table('http://www.unc.edu/courses/2007spring/enst/562/001/assignments/assign7/corals.txt', header=T,sep=',')
> library(MASS)
> model<-glm.nb(Prevalence~WSSTA+I(WSSTA^2)+Coral.Cover, data=corals)

Inferential uncertainty: uncertainty in the coefficient estimates

> vcov(model)
             (Intercept)          WSSTA    I(WSSTA^2)   Coral.Cover
(Intercept)  0.1602111601 -1.658806e-02  6.823475e-04 -2.774434e-03
WSSTA       -0.0165880645  6.877421e-03 -3.210644e-04  1.278561e-05
I(WSSTA^2)   0.0006823475 -3.210644e-04  1.652645e-05 -1.256437e-06
Coral.Cover -0.0027744338  1.278561e-05 -1.256437e-06  8.723762e-05

> coef(model)
(Intercept)      WSSTA  I(WSSTA^2) Coral.Cover
-0.74110690 0.47187658 -0.02387003  0.04742894

where the distribution shown is the multivariate normal distribution, the multivariate analog of the ordinary univariate normal distribution.

> model$theta
[1] 0.3339442
> model$SE.theta
[1] 0.03734786

> beta<-mvrnorm(1000, coef(model), vcov(model))
> theta<-rnorm(1000, model$theta, sd=model$SE.theta)

or as a single matrix equation

xmat<-cbind(rep(1, dim(corals)[1]) ,corals$WSSTA ,corals$WSSTA^2, corals$Coral.Cover)

#obtain negative binomial means for first simulation
> as.vector(exp(xmat%*%beta[1,]))->test
#display first ten means
> test[1:10]

[1] 0.04169032 1.74834239 3.57797963 6.16198930 10.95517412
[6] 0.10016315 1.58017517 1.74648264 1.87648033  3.98037768

Predictive uncertainty: uncertainty in the individual outcomes

> n<-length(corals$WSSTA)
> n.sims<-1000
#initialize the array with missing values
> y.rep<-array(NA,c(n.sims,n))

for(s in 1:n.sims) {
    mus<- exp(xmat%*%beta[s,])
    y.rep[s,]<-sapply(mus,function(x) rnbinom(1,mu=x, size=theta[s]))
}

Assessing model fit with the parametric bootstrap

> par(mfrow=c(2,2))
> sapply(1:3,function(x) hist(y.rep[x,], breaks=seq(-.5, max(y.rep[x,])+5,5)))
#now add the real thing
> hist(corals$Prevalence, breaks=seq(-.5, max(corals$Prevalence)+5,5))

Fig. 1 Histograms of three simulated data sets and the real data set

#reset the graphics window
> par(mfrow=c(1,1))
#maximum prevalence
> apply(y.rep, 1, max)->max.out
> boxplot(max.out, horizontal=T, xlab='Maximum prevalence')
#is the observed maximum unusual?
> points(max(corals$Prevalence), 1, pch=8, col=2, cex=1.5)

Fig. 2 Maximum disease prevalence for simulated and observed data (*)

#zero fraction
> apply(y.rep,1, function(x) sum(x==0)/ length(x))->zero.frac
> hist(zero.frac)
> points(sum(corals$Prevalence==0)/ length(corals$Prevalence), 0, pch=8, col=2, cex=1.5)

Fig. 3 Fraction of zero prevalences for simulated and observed data (*)

#look at maximum value at each unique value of WSSTA
> sapply(1:1000, function(x) tapply(y.rep[x,], corals$WSSTA, max))->max.x
> t(max.x[,1:2])

      0  1   2   3   4   5   6   7   8  9 10  11 13 14  15 16 17 18 19 20
[1,] 23 24 191  17  17  29 107 159  85 23 35 339 34 65 101  3 29 40 42  4
[2,] 83 40  19 331 183 246  54  18 328 95 69  27 66  8  38  6 21 19  2  3
     21 22 23 24 25 26 27 30
[1,]  0  3  3  0  0  0  0  0
[2,]  0  0  2  0  0  0  0  0

#first column are the WSSTA labels; the second column is the maximum prevalences
>
data.frame(rep(as.numeric(rownames(max.x)),1000), as.vector(max.x))->max.WSSTA
> colnames(max.WSSTA)<-c('WSSTA','max.prevalence')

>max.WSSTA$cats<-factor(max.WSSTA$WSSTA, levels=sort(unique(max.WSSTA$WSSTA)))

#save boxplot locations
> boxplot(max.WSSTA$max.prevalence~ max.WSSTA$WSSTA, ylab='WSSTA', xlab='Maximum prevalence', horizontal=T, axes=F)->hmm
> axis(2, cex.axis=.8, las=2, at=sort(unique(hmm$group)), labels=levels(max.WSSTA$cats))
> axis(1)
> box()

> points(tapply(corals$Prevalence, corals$WSSTA,max), sort(unique(hmm$group)), pch=8, col=2, cex=1.2)

Fig. 5 Maximum prevalence as a function of WSSTA, simulated and observed

A randomization test for a two-sample comparison

> slugs<-read.table( 'http://www.bio.ic.ac.uk/research/mjcraw/statcomp/data/slugsurvey.txt', header=TRUE)
> names(slugs)
[1] "slugs" "field"
> tapply(slugs$slugs,slugs$field,length)
Nursery Rookery
     40      40

> library(combinat)
> nCm(80,40)

[1] 1.075072e+23

> tapply(slugs$slugs, slugs$field, mean)->truemean
> truediff<-truemean[1]-truemean[2]
> truediff

Nursery
     -1

myperm<-function() {
#permute the response
newslugs<-sample(slugs$slugs)
#obtain the test statistic
tapply(newslugs,slugs$field,mean)->mymeans
mymeans[1]-mymeans[2]
}

> set.seed(10)
> out.diff<-replicate(999,myperm())

> alldiffs<-c(out.diff,truediff)
> onetail<-sum(alldiffs<=truediff)/length(alldiffs)
> onetail

[1] 0.028

#symmetric p-value
> 2*sum(alldiffs<=truediff)/1000

[1] 0.056
#asymmetric p-value
> sum(abs(alldiffs)>=abs(truediff))/1000

[1] 0.056

myperm<-function() {
newslugs<-sample(slugs$slugs)
tapply(newslugs,slugs$field,mean)->mymeans
t.test(newslugs~slugs$field,var.equal=T)->out
c(mymeans[1]-mymeans[2],sum(newslugs[slugs$field=='Nursery']), out$statistic)
}

> replicate(4999,myperm())->equiv.stat

> t.test(slugs$slugs~slugs$field, var.equal=T)->out
> cbind(equiv.stat,c(truediff, sum(slugs$slugs[slugs$field=='Nursery']), out$statistic))->full.set
> sapply(1:3,function(x) sum(full.set[x,]<=full.set[x,5000])/5000)

[1] 0.029 0.029 0.029

> library(coin)
> oneway_test(slugs~field,data=slugs)

Asymptotic 2-Sample Permutation Test

data: slugs by groups Nursery, Rookery
Z = -1.9397, p-value = 0.05242
alternative hypothesis: true mu is not equal to 0

> oneway_test(slugs~field,data=slugs,distribution='approximate')

Approximative 2-Sample Permutation Test

data: slugs by groups Nursery, Rookery
Z = -1.9397, p-value = 0.05
alternative hypothesis: true mu is not equal to 0

> oneway_test(slugs~field, data=slugs, distribution=approximate(B=4999))

Approximative 2-Sample Permutation Test

data: slugs by groups Nursery, Rookery
Z = -1.9397, p-value = 0.05441
alternative hypothesis: true mu is not equal to 0

> oneway_test(slugs~field, data=slugs, distribution='exact')

Exact 2-Sample Permutation Test

data: slugs by groups Nursery, Rookery
Z = -1.9397, p-value = 0.05755
alternative hypothesis: true mu is not equal to 0

> oneway_test(slugs~field,data=slugs, distribution='exact', alternative='less')

Exact 2-Sample Permutation Test

data: slugs by groups Nursery, Rookery
Z = -1.9397, p-value = 0.02878
alternative hypothesis: true mu is less than 0

A randomization test for one-way analysis of variance [not done in lecture]

> X.data <- c(13, 242, 105, 8, 59, 20, 2, 245, 515, 488, 88, 233, 50, 600, 82, 40, 52, 1889, 18, 44, 21, 5, 6, 0)
> Month <- c("Jn", "Jn", "Jn", "J", "J", "J", "J", "J", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "S", "S", "S", "S", "S", "S")
> anova(lm(X.data~factor(Month)))

Analysis of Variance Table

Response: X.data
              Df  Sum Sq Mean Sq F value Pr(>F)
factor(Month)  3  726695  242232  1.6439 0.2110
Residuals     20 2947024  147351

> boxplot(X.data~factor(Month))

Fig. 5 Box plots showing heteroscedastic data

> anova(lm(log(X.data+1)~factor(Month)))

Analysis of Variance Table

Response: log(X.data + 1)
Df Sum Sq Mean Sq F value Pr(>F)
factor(Month) 3 36.808 12.269 6.0804 0.004099 **
Residuals 20 40.357 2.018
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

> oneway_test(log(X.data+1)~factor(Month), distribution='approximate')

Approximative K-Sample Permutation Test

data: log(X.data + 1) by groups A, J, Jn, S
maxT = 2.9187, p-value = 0.007

myperm<-function() {
newdata<-sample(X.data)
anova(lm(log(newdata+1)~factor(Month)))->out
out["F value"][1,1]
}

> replicate(4999,myperm())->results
> anova(lm(log(X.data+1)~factor(Month)))["F value"][1,1]->realguy
> results2<-c(results,realguy)
> sum(results2>=realguy)/length(results2)
[1] 0.0034

Cited References

Course Home Page


Jack Weiss
Phone: (919) 962-5930
E-Mail: jack_weiss@unc.edu
Address: Curriculum in Ecology, Box 3275, University of North Carolina, Chapel Hill, 27516
Copyright © 2007
Last Revised--April 3, 2007
URL: http://www.unc.edu/courses/2007spring/enst/562/001/docs/lectures/lecture31.htm