Assignment 8 — Solution

Question 1

Splitting the data

> clams<-read.table('http://www.unc.edu/courses/2007spring/enst/562/001/assignments/assign8/clams562.csv', header=T, sep=',')

#reorder season in correct order
> clams$season<-factor(clams$season, levels=c('SPR','FALL'))

> CO<-clams[clams$treatment=='CO',]
> COC<-clams[clams$treatment=='COC',]

> split(CO$height, list(CO$season, CO$year))->split.height.CO
> split(COC$height, list(COC$season, COC$year))->split.height.COC

> names(split.height.CO)
[1] "SPR.2001" "FALL.2001" "SPR.2002" "FALL.2002" "SPR.2003" "FALL.2003"

> rbind(sapply(split.height.CO,median),sapply(split.height.COC,median))->out.median
> rownames(out.median)<-c('CO','COC')
> out.median

    SPR.2001 FALL.2001 SPR.2002 FALL.2002 SPR.2003 FALL.2003
CO     56.95     44.85    46.85     51.00     57.0     60.25
COC    60.60     41.45    51.60     52.85     53.3     50.10

> rbind(sapply(split.height.CO,mad),sapply(split.height.COC,mad))->out.mad
> rownames(out.mad)<-c('CO','COC')
> out.mad

    SPR.2001 FALL.2001 SPR.2002 FALL.2002 SPR.2003 FALL.2003
CO  22.90617  15.56730 20.83053  21.86835 24.75942  30.83808
COC 19.71858  12.15732 18.08772  16.75338 13.93644  13.19514

Determining which bootstrap confidence interval to use

> median.func<-function(data,i) median(data[i])
> mad.func<-function(data,i) mad(data[i])
> cur.data<-split.height.CO[[1]]
> library(boot)
> my.boot1<-boot(cur.dat, median.func, 9999)
> my.boot1

ORDINARY NONPARAMETRIC BOOTSTRAP
Call:
boot(data = cur.dat, statistic = median.func, R = 9999)
Bootstrap Statistics :
    original      bias std. error
t1*    56.95 0.1863836   1.362823

> boot.ci(boot.out = my.boot1, conf = 0.95)->test
> test

BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 9999 bootstrap replicates

CALL :
boot.ci(boot.out = my.boot1, conf = 0.95)

Intervals :
Level          Normal           Basic
95%   (54.09, 59.43 ) (53.90, 59.70 )

Level      Percentile             BCa
95%   (54.20, 60.00 ) (53.90, 59.65 )
Calculations and Intervals on Original Scale

> my.boot2<-boot(cur.dat, mad.func, 9999)
> my.boot2

ORDINARY NONPARAMETRIC BOOTSTRAP
Call:
boot(data = cur.dat, statistic = mad.func, R = 9999)
Bootstrap Statistics :
    original       bias std. error
t1* 22.90617 -0.3885838    2.89791


> boot.ci(boot.out = my.boot2, conf = 0.95)->test1
> test1

BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 9999 bootstrap replicates

CALL :
boot.ci(boot.out = my.boot2, conf = 0.95)

Intervals :
Level          Normal           Basic
  95% (17.61, 28.97 ) (17.64, 29.06 )

Level      Percentile             BCa
  95% (16.75, 28.17 ) (17.20, 28.98 )
Calculations and Intervals on Original Scale

> hist(my.boot1$t, freq=F, breaks=seq(51,63,.25), main='Bootstrap distribution\nSpring 2001 CO', xlab='Median')
> lines(seq(51,63,.1), sapply(seq(51,63,.1), function(x) dnorm(x, mean(my.boot1$t), sd(my.boot1$t))), col=2)
> hist(my.boot2$t, freq=F, breaks=seq(12,34,1), main='Bootstrap distribution\nSpring 2001 CO', xlab='MAD')
> lines(12:34, sapply(12:34, function(x) dnorm(x, mean(my.boot2$t), sd(my.boot2$t))), col=2)

Fig. 1  Bootstrap distributions of median and MAD for CO, Spring 2001

Obtaining the bootstrap confidence intervals

#bootstrap median confidence interval function
my.median.func<-function(cur.dat,reps, ci='basic') {
median.func<-function(data,i) median(data[i])
my.boot1<-boot(cur.dat,median.func,reps)
boot.ci(boot.out = my.boot1, conf = 0.95, type=ci)->test1
if(ci=='norm') c(test1$t0, test1[[ci]][2:3]) else
c(test1$t0, test1[[ci]][4:5])
}

#obtain bootstrap confidence intervals of various types
> sapply(split.height.CO,function(x) my.median.func(x, 9999, 'basic'))->CO.basic
> sapply(split.height.CO,function(x) my.median.func(x, 9999, 'perc'))->CO.perc
> sapply(split.height.CO,function(x) my.median.func(x, 9999, 'norm'))->CO.norm
> sapply(split.height.COC,function(x) my.median.func(x, 9999, 'basic'))->COC.basic
> sapply(split.height.COC,function(x) my.median.func(x, 9999, 'perc'))->COC.perc
> sapply(split.height.COC,function(x) my.median.func(x, 9999, 'norm'))->COC.norm

> rownames(CO.basic)<-c('median','lower95','upper95')
> CO.basic

       SPR.2001 FALL.2001 SPR.2002 FALL.2002 SPR.2003 FALL.2003
median    56.95     44.85    46.85      51.0     57.0     60.25
lower95   53.80     39.85    39.10      46.4     51.4     50.10
upper95   59.70     48.50    51.50      53.8     59.7     69.60

> rownames(COC.basic)<-c('median','lower95','upper95')
> COC.basic

       SPR.2001 FALL.2001 SPR.2002 FALL.2002 SPR.2003 FALL.2003
median     60.6     41.45     51.6     52.85    53.30      50.1
lower95    57.4     36.90     48.1     49.90    50.30      46.7
upper95    63.6     43.50     56.0     54.65    55.25      53.1

mygraph<-function(CO.data,COC.data,ylabel='Median Height (mm)',title=NULL) {
plot(c(1,6), c(min(rbind(CO.data,COC.data)), max(rbind(CO.data,COC.data))), xlim=c(.8,6.2), type='n', xlab='', ylab=ylabel, axes=FALSE)
axis(1,at=1:6, labels=FALSE, cex.axis=.8)
axis(2)
box()
mtext(c('2001','2002','2003'),at=c(1.5,3.5,5.5), side=1, line=1.5, cex=.9)
mtext(rep(c('Spring','Fall'),3),at=1:6,side=1,line=.5, cex=.9)
points((1:6)-.1,COC.data[1,],pch=15,col=1)
lines((1:6)-.1,COC.data[1,],col=1)
arrows((1:6)-.1,COC.data[2,],(1:6)-.1,
COC.data[3,],code=3, angle=90, length=.1)
points((1:6)+.1,CO.data[1,],pch=16,col=2)
lines((1:6)+.1,CO.data[1,],col=2)
arrows((1:6)+.1,CO.data[2,],(1:6)+.1,
CO.data[3,],code=3, angle=90, length=.1,col=2)
abline(v=2.5,col=4,lty=2)
legend(3,max(rbind(CO.data,COC.data)),c('COC','CO'), pch=c(15,16), col=c(1,2),
lty=c(1,1),cex=.8,bty='n',ncol=2)
mtext(side=3,line=.5,text=title)
}

> mygraph(CO.basic,COC.basic,'Median(height)','Basic')
> mygraph(CO.perc,COC.perc,'Median(height)','Percentile')
> mygraph(CO.norm,COC.norm,'Median(height)','Normal')

Fig. 2   Comparing median height trajectories using different bootstrap confidence interval methods

my.mad.func<-function(cur.dat, reps, ci='basic') {
mad.func<-function(data,i) mad(data[i])
my.boot1<-boot(cur.dat, mad.func, reps)
boot.ci(boot.out = my.boot1, conf = 0.95, type=ci)->test1
if(ci=='norm') c(test1$t0, test1[[ci]][2:3]) else
c(test1$t0, test1[[ci]][4:5])
}
> sapply(split.height.CO, function(x) my.mad.func(x, 9999, 'basic'))->CO.mad.basic
> sapply(split.height.CO, function(x) my.mad.func(x, 9999, 'perc'))->CO.mad.perc
> sapply(split.height.CO, function(x) my.mad.func(x, 9999, 'norm'))->CO.mad.norm
> sapply(split.height.COC, function(x) my.mad.func(x, 9999, 'basic'))->COC.mad.basic
> sapply(split.height.COC, function(x) my.mad.func(x, 9999, 'perc'))->COC.mad.perc
> sapply(split.height.COC, function(x) my.mad.func(x, 9999, 'norm'))->COC.mad.norm
> mygraph(CO.mad.basic, COC.mad.basic, 'MAD(height)', 'Basic')
> mygraph(CO.mad.perc, COC.mad.perc, 'MAD(height)', 'Percentile')
> mygraph(CO.mad.norm, COC.mad.norm, 'MAD(height)', 'Normal')

Fig. 3  Comparing MAD height trajectories using different bootstrap confidence interval methods

Question 2

Obtaining the confidence intervals

> amy1<- read.table('http://www.unc.edu/courses/2007spring/enst/562/001/assignments/midterm/amy1.csv', header=T, sep=',')
> amy2<- read.table('http://www.unc.edu/courses/2007spring/enst/562/001/assignments/midterm/amy2.csv', header=T, sep=',')
> amy3<- read.table('http://www.unc.edu/courses/2007spring/enst/562/001/assignments/midterm/amy3a.csv', header=T, sep=',')
> amy4<-merge(amy1,amy2)
> amy5<-merge(amy4,amy3)
> library(survey)
> my.obj<-svydesign(id=~boma.id, strata=~SubVill, data=amy5, fpc=~BomaPop)

> svyby(~Tot.ha.01,~SubVill, subset(my.obj, !is.na(Tot.ha.01)), svyquantile, quantiles=.5, ci=T, alpha=.05)->q95
> svyby(~Tot.ha.01,~SubVill, subset(my.obj, !is.na(Tot.ha.01)), svyquantile, quantiles=.5, ci=T, alpha=.5)->q50

Produce the graph

> data.frame(1:9, unlist(q50$statistics.quantiles), matrix(unlist(q95$statistics.CIs), ncol=2, byrow=T), matrix(unlist(q50$statistics.CIs) ,ncol=2, byrow=T))->results
> colnames(results)<-c('village', 'median', 'lower95', 'upper95', 'lower50', 'upper50')
> rownames(results)<-q50$SubVill
> results

     village median  lower95  upper95   lower50   upper50
EngarkashA 1    1.5 0.800000 2.000000 0.9482144 2.0000000
EngarkashB 2    2.4 1.852469 2.800000 2.3362692 2.4637308
Lemooti    3    2.0 1.600000 2.400000 1.7952393 2.2047607
Loosasia   4    2.0 1.236034 4.363966 1.9308932 2.3455338
Madukani   5    0.8 0.800000 1.942424 0.8000000 0.9741525
Mbuko      6    3.4 2.000000 3.800000 2.8284261 3.6000000
Nyhorit    7    4.4 3.025420 5.849720 4.0000000 4.9182755
Olmotoo    8    2.4 2.000000 3.579572 2.4000000 2.4000000
Osilale    9    2.4 2.400000 8.000000 2.4000000 5.4840240

> oldmar<-par("mar")
> par(mar=c(5.1,7.1,2.1,1.1))
> par(lend=2)
> plot(range(results[,2:6]), c(1,9), type='n', axes=F, ylab='', xlab='Median Hectares Owned', ylim=c(.5,9.5),
xlim=c(0,max(results[,2:6])))
> axis(1,cex.axis=.8)
> axis(2,at=1:9, labels=rownames(results), cex.axis=.8, las=2)
#95% confidence interval
> apply(results,1, function(x) segments(x[3], x[1], x[4], x[1], lwd=3, col='grey65'))
#50% confidence interval
> apply(results,1, function(x) segments(x[5], x[1], x[6], x[1], lwd=5, col='grey50'))
> apply(results,1,function(x) points(x[2],x[1],pch=16, col='white', cex=1))
> apply(results,1,function(x) points(x[2],x[1],pch=1, cex=1.1))
> box()
#reset line ends back to the default value.
> par(lend=0)
> par(mar=oldmar)

Fig. 4  50% and 95% confidence intervals for the median hectares owned (by subvillage)

Course Home Page


Jack Weiss
Phone: (919) 962-5930
E-Mail: jack_weiss@unc.edu
Address: Curriculum in Ecology, Box 3275, University of North Carolina, Chapel Hill, 27516
Copyright © 2007
Last Revised--May 3, 2007
URL: http://www.unc.edu/courses/2007spring/enst/562/001/docs/solutions/assign8.htm