Lecture 35 (lab 9)—March 21, 2006

What was covered?

R functions and commands demonstrated

R function options

R libraries used

Grouped binary data

> #obtain data
> library(faraway)
> data(bliss)
> bliss
  dead alive conc
1    2    28    0
2    8    22    1
3   15    15    2
4   23     7    3
5   27     3    4

> out1<-glm(cbind(dead,alive)~conc,data=bliss,family=binomial)
> summary(out1,corr=FALSE)

Call:
glm(formula = cbind(dead, alive) ~ conc, family = binomial, data = bliss)

Deviance Residuals:
      1      2      3      4       5
-0.4510 0.3597 0.0000 0.0643 -0.2045

Coefficients:
            Estimate Std. Error z value Pr(>|z|)
(Intercept)  -2.3238     0.4179  -5.561 2.69e-08 ***
       conc   1.1619     0.1814   6.405 1.51e-10 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 64.76327 on 4 degrees of freedom
Residual deviance:  0.37875 on 3 degrees of freedom
AIC: 20.854

Number of Fisher Scoring iterations: 4

> exp(coef(out1)[2])
conc
3.195984

> anova(out1,test='Chisq')
Analysis of Deviance Table
Model: binomial, link: logit
Response: cbind(dead, alive)
Terms added sequentially (first to last)

     Df Deviance Resid. Df Resid. Dev P(>|Chi|)
NULL                     4     64.763
conc  1   64.385         3      0.379 1.024e-15

Although the results are consistent with the Wald test, notice that the obtained significance level is very different.

> plot(bliss$conc, log((bliss$dead+1/2)/(bliss$alive+1/2)), xlab='concentration', ylab='logit')
> abline(coef(out1),col=2)

> plot(bliss$conc, bliss$dead/(bliss$dead+bliss$alive), xlab='Concentration',
ylab='probability')
> lines(seq(0,4,.1), 1/(1+exp(-coef(out1)[1]-coef(out1)[2]*seq(0,4,.1))), col=2)

        

Fig. 1  Estimated logit and probability functions for logistic regression model with grouped binary data

#obtain group sample sizes
> ni<-apply(bliss[,1:2],1,sum)
> ni

 1  2  3  4  5
30 30 30 30 30

 
#calculate predicted probabilities
> 1/(1+exp(-coef(out1)[1]-coef(out1)[2]*0:4))
[1] 0.08917177 0.23832314 0.50000000 0.76167686 0.91082823
> 1/(1+exp(-coef(out1)[1]-coef(out1)[2]*0:4))->phat
 
#calculate expected values
> cbind(ni*phat,ni-ni*phat)->Ei
> Ei

       [,1]      [,2]
1  2.675153 27.324847
2  7.149694 22.850306
3 15.000000 15.000000
4 22.850306  7.149694
5 27.324847  2.675153

 
#obtain Oi and calculate Pearson
> Oi<-bliss[,1:2]
> pearson<-sum((Oi-Ei)^2/Ei)
> pearson

[1] 0.3672674
> 1-pchisq(pearson,dim(bliss)[1]-2)
[1] 0.9469181

Fig. 2  It's not easy being green (or mottled)!

Ungrouped binary data—Habitat suitability modeling

> library(DAAG)
> data(frogs)
> names(frogs)

[1] "pres.abs" "northing" "easting" "altitude" "distance"
[6] "NoOfPools" "NoOfSites" "avrain" "meanmin" "meanmax"

> help(frogs)

http://www.kidcyber.com.au/topics/frog_corrob.htm
http://www.abc.net.au/science/scribblygum/june2004/
http://www.arkive.org/species/GES/amphibians/Pseudophryne_corroboree/GES005645.html?size=large

Fig. 3 Spatial distribution of the corroboree frog

> plot(frogs$easting, frogs$northing, pch=1, col=2*(frogs$pres.abs+1))

> plot(frogs$meanmin, frogs$pres.abs)
#perhaps quadratic?
> lines(lowess(frogs$pres.abs~frogs$meanmin), col=1,lty=3)

> dim(frogs)
[1] 212 10
> meanmin.cuts<-cut(frogs$meanmin, quantile(frogs$meanmin, seq(0,1,.1)), include.lowest=TRUE)
> table(meanmin.cuts)

meanmin.cuts
[2.03,2.4] (2.4,2.5] (2.5,2.73] (2.73,2.87] (2.87,3] (3,3.25]
        29        19         18          21       20       20
(3.25,3.47] (3.47,3.63] (3.63,4.13] (4.13,4.33]
         25          18          24          18

       

Fig. 4 Determining the correct structural form of the regressor: on the original scale (left) and the logit scale (right)

#fit linear model
> glm(pres.abs~meanmin,data=frogs,family=binomial)->out0
> summary(out0)

Call:
glm(formula = pres.abs ~ meanmin, family = binomial, data = frogs)

Deviance Residuals:
Min 1Q Median 3Q Max
-1.5683 -0.8739 -0.6552 1.1951 1.8758

Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.3465 0.8257 -5.264 1.41e-07 ***
meanmin 1.2070 0.2531 4.769 1.85e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

Null deviance: 279.99 on 211 degrees of freedom
Residual deviance: 254.40 on 210 degrees of freedom
AIC: 258.4

Number of Fisher Scoring iterations: 4

#fit quadratic model
> glm(pres.abs~meanmin+I(meanmin^2), data=frogs, family=binomial)->out0b
> summary(out0b)

Call:
glm(formula = pres.abs ~ meanmin + I(meanmin^2), family = binomial,
data = frogs)

Deviance Residuals:
Min 1Q Median 3Q Max
-1.3660 -1.0032 -0.4631 1.0583 2.3412

Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -21.1754 5.2810 -4.010 6.08e-05 ***
meanmin 11.6649 3.1900 3.657 0.000255 ***
I(meanmin^2) -1.5743 0.4717 -3.337 0.000846 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

Null deviance: 279.99 on 211 degrees of freedom
Residual deviance: 241.76 on 209 degrees of freedom
AIC: 247.76

Number of Fisher Scoring iterations: 5

Fig. 5 Comparison of logistic regression models

> plot(frogs$meanmin,frogs$pres.abs, xlab='meanmin', ylab='presence/absence')
> #add linear model
> phat<-function(x) 1/(1+exp(-coef(out0)[1] - coef(out0)[2]*x))
> lines(seq(2,4.5,.01), phat(seq(2,4.5,.01)), col=2)
> #add quadratic
> phat2<-function(x) 1/(1+exp(-coef(out0b)[1] - coef(out0b)[2]*x - coef(out0b)[3]*x^2))
> lines(seq(2,4.5,.01), phat2(seq(2,4.5,.01)), col=4)
> #add lowess
> lines(lowess(frogs$pres.abs ~ frogs$meanmin), col=1, lty=3)
> legend(2.2, .9, c('linear', 'quadratic', 'lowess'), col=c(2,4,1), lty=c(1,1,3), bty='n', cex=c(.9,.9,.9))

Goodness of fit tests for ungrouped binary data

Grouping by values of the predictor

> cut(frogs$meanmin, quantile(frogs$meanmin, seq(0,1,.2)), include.lowest=TRUE)->test.cuts2
> table(frogs$pres.abs,test.cuts2)

test.cuts2
  [2.03,2.5] (2.5,2.87] (2.87,3.25] (3.25,3.63] (3.63,4.33]
0         42         32          23          14          22
1          6          7          17          29          20

> sum((Oi-Ei)^2/Ei)
[1] 4.624012
> 1-pchisq(sum((Oi-Ei)^2/Ei),df=5-3)
[1] 0.09906236

> tapply(fitted(out0),test.cuts2,sum)->np2
> apply(table(frogs$pres.abs,test.cuts2),2,sum)-np2->fail2
> rbind(fail2,np2)->Ei2
> Ei2

      [2.03,2.5] (2.5,2.87] (2.87,3.25] (3.25,3.63] (3.63,4.33]
fail2  39.278011   28.83784    26.30426    23.55765    15.02225
  np2   8.721989   10.16216    13.69574    19.44235    26.97775

> sum((Oi-Ei2)^2/Ei2)
[1] 17.20310
> 1-pchisq(sum((Oi-Ei2)^2/Ei2),df=5-2)
[1] 0.0006419176

Hosmer-Lemeshow Test

> logit.groups<-cut(predict(out0b), quantile(predict(out0b), seq(0,1,.1)), include.lowest=TRUE)
> table(logit.groups)

logit.groups
 [-3.97,-2.25] (-2.25,-1.85] (-1.85,-1.05] (-1.05,-0.673]
           29            19            18             21
(-0.673,-0.349] (-0.349,-0.00651] (-0.00651,0.131] (0.131,0.21]
             20               27                17           18
(0.21,0.385] (0.385,0.433]
          21            22

> table(frogs$pres.abs,logit.groups)->Oi

> tapply(fitted(out0b),logit.groups,sum)->success
> tapply(fitted(out0b),logit.groups,length)->ni
> ni-success->failure
> rbind(failure,success)->Ei
> Ei

        [-3.97,-2.25] (-2.25,-1.85] (-1.85,-1.05] (-1.05,-0.673]
failure     27.205735     16.676574     14.378504      14.460148
success      1.794265      2.323426      3.621496       6.539852
        (-0.673,-0.349] (-0.349,-0.00651] (-0.00651,0.131] (0.131,0.21]
failure       12.096493          14.11400         8.170336     8.249696
success        7.903507          12.88600         8.829664     9.750304
        (0.21,0.385] (0.385,0.433]
failure      8.90155      8.746968
success     12.09845     13.253032

> sum((Oi-Ei)^2/Ei)
[1] 7.567172
> 1-pchisq(sum((Oi-Ei)^2/Ei),df=8)
[1] 0.4768487

The Hosmer-Lemeshow-Cressie test in the Design package

> library(Design)
> #create quadratic term for use in lrm function  
> meanmin2<-frogs$meanmin^2
> out.harrell<-lrm(pres.abs~meanmin+meanmin2, data=frogs, x=TRUE, y=TRUE)
> residuals.lrm(out.harrell,type='gof')

 
Sum of squared errors     Expected value|H0               SD               Z               P
           41.2834597            41.6627336        0.3486647      -1.0877900       0.2766878

Course Home Page


Jack Weiss
Phone: (919) 962-5930
E-Mail: jack_weiss@unc.edu
Address: Curriculum in Ecology, Box 3275, University of North Carolina, Chapel Hill, 27516
Copyright © 2006
Last Revised--March 31, 2006
URL: http://www.unc.edu/courses/2006spring/ecol/145/001/docs/lectures/lecture35.htm