Lab 9---Logistic Regression

 

#grouped binary data

 

> library(faraway)

> data(bliss)

> bliss

  dead alive conc

1    2    28    0

2    8    22    1

3   15    15    2

4   23     7    3

5   27     3    4

 

#fit model

> out1<-glm(cbind(dead,alive)~conc,data=bliss,family=binomial)

> summary(out1)

 

#analysis of deviance--need to specify the test

 

> anova(out1)

> anova(out1,type='Chi')

 

#interpret the coefficient

 

> coef(out1)

(Intercept)        conc

  -2.323790    1.161895

> exp(coef(out1)[2])

    conc

3.195984

 

#odds of death increased by a factor of 3.2 by each one unit increase in conc.

#graph results

 

#empirical logit

 

logit.p<-log((bliss$dead+1/2)/(bliss$alive+1/2))

plot(bliss$conc,logit.p,xlab='concentration',ylab='logit(p)')

abline(coef(out1),col=2)

 

 

#on probability scale

 

plot(bliss$conc,bliss$dead/(bliss$dead+bliss$alive),xlab='Concentration',

ylab='probability')

lines(seq(0,4,.1),1/(1+exp(-coef(out1)[1]-coef(out1)[2]*seq(0,4,.1))),col=2)

 

 

#check for lack of fit

 

> bliss

  dead alive conc

1    2    28    0

2    8    22    1

3   15    15    2

4   23     7    3

5   27     3    4

 

#obtain group sample sizes

> ni<-apply(bliss[,1:2],1,sum)

> ni

 1  2  3  4  5

30 30 30 30 30

 

#calculate predicted probabilities

> 1/(1+exp(-coef(out1)[1]-coef(out1)[2]*0:4))

[1] 0.08917177 0.23832314 0.50000000 0.76167686 0.91082823

> 1/(1+exp(-coef(out1)[1]-coef(out1)[2]*0:4))->phat

 

#calculate expected values

> cbind(ni*phat,ni-ni*phat)->Ei

> Ei

       [,1]      [,2]

1  2.675153 27.324847

2  7.149694 22.850306

3 15.000000 15.000000

4 22.850306  7.149694

5 27.324847  2.675153

 

#obtain Oi and calculate Pearson

> Oi<-bliss[,1:2]

> pearson<-sum((Oi-Ei)^2/Ei)

> pearson

[1] 0.3672674

> 1-pchisq(pearson,dim(bliss)[1]-2)

[1] 0.9469181

 

#compare to sum of pearson residuals squared

> residuals(out1,type='pearson')

          1           2           3           4           5

-0.43252342  0.36437292  0.00000000  0.06414687 -0.20810684

> sum(residuals(out1,type='pearson')^2)

[1] 0.3672674

 

#try G2 test

 

> sum(2*Oi*log(Oi/Ei))

[1] 0.3787483

 

#look at sum of deviance residuals squared

> sum(residuals(out1,type='deviance')^2)

[1] 0.3787483

 

#look at output--the residual deviance is the G2 goodness of fit test.

> summary(out1)

 

 

Ungrouped Binary Data--Habitat suitability modeling

 

http://www.kidcyber.com.au/topics/frog_corrob.htm

http://www.arkive.org/species/GES/amphibians/Pseudophryne_corroboree/GES005645.html?size=large

http://www.kidcyber.com.au/topics/frog_corrob.htm

http://www.abc.net.au/science/scribblygum/june2004/

 

     Hunter, D. (2000) The conservation and demography of  the southern

     corroboree frog (Pseudophryne corroboree). M.Sc. thesis,

     University of Canberra, Canberra.

 

library(DAAG)

data(frogs)

names(frogs)

> help(frogs)

 

#examine data

 

#data have a spatial component

> plot(frogs$easting,frogs$northing,pch=1,col=2*(frogs$pres.abs+1))

 

 

#let's look at one predictor meanmin

> plot(frogs$meanmin,frogs$pres.abs)

#perhaps quadratic?

> lines(lowess(frogs$pres.abs~frogs$meanmin),col=1,lty=3)

> lines(lowess(frogs$pres.abs~frogs$meanmin,f=.9),col=2)

 

 

#fit model

> glm(pres.abs~meanmin,data=frogs,family=binomial)->out0

 

#plot result

> plot(frogs$meanmin,frogs$pres.abs)

> lines(lowess(frogs$pres.abs~frogs$meanmin),col=1,lty=3)

#add model

> phat<-function(x) 1/(1+exp(-coef(out0)[1]-coef(out0)[2]*x))

> lines(seq(2,4.5,.01),phat(seq(2,4.5,.01)),col=2)

 

#add a quadratic

 

> glm(pres.abs~meanmin+I(meanmin^2),data=frogs,family=binomial)->out0b

> phat2<-function(x) 1/(1+exp(-coef(out0b)[1]-coef(out0b)[2]*x-coef(out0b)[3]*x^2))

 

#plot results

 

> plot(frogs$meanmin,frogs$pres.abs)

> lines(seq(2,4.5,.01),phat(seq(2,4.5,.01)),col=2)

> lines(seq(2,4.5,.01),phat2(seq(2,4.5,.01)),col=4)

> lines(lowess(frogs$pres.abs~frogs$meanmin),col=1,lty=3)

 

 

#goodness of fit--grouping x-values

 

#not many repeated values

>table(frogs$meanmin)

 

> sum(table(frogs$meanmin))

[1] 212

 

#perhaps ten categories?

> quantile(frogs$meanmin,seq(0,1,.2))

      0%      20%      40%      60%      80%     100%

2.033333 2.500000 2.866667 3.253333 3.633333 4.333333

 

#lets just approximate this

> cut(frogs$meanmin,seq(2,4.5,.5))->test.cuts

> table(test.cuts)

test.cuts

(2,2.5] (2.5,3] (3,3.5] (3.5,4] (4,4.5]

     48      59      49      24      32

> table(frogs$pres.abs,test.cuts)

   test.cuts

    (2,2.5] (2.5,3] (3,3.5] (3.5,4] (4,4.5]

  0      42      44      23       8      16

  1       6      15      26      16      16

 

#now use quantiles

> cut(frogs$meanmin,quantile(frogs$meanmin,seq(0,1,.2)),include.lowest=TRUE)->test.cuts2

> sum(table(test.cuts2))

[1] 212

> table(frogs$pres.abs,test.cuts2)

   test.cuts2

    [2.03,2.5] (2.5,2.87] (2.87,3.25] (3.25,3.63] (3.63,4.33]

  0         42         32          23          14          22

  1          6          7          17          29          20

 

 

#calculate n*pi

 

> tapply(fitted(out0b),test.cuts2,sum)->np

 [2.03,2.5]  (2.5,2.87] (2.87,3.25] (3.25,3.63] (3.63,4.33]

   4.117692   10.161348   17.619857   24.697414   22.403690

 

#obtain cell counts, the ni

>apply(table(frogs$pres.abs,test.cuts2),2,sum)

 [2.03,2.5]  (2.5,2.87] (2.87,3.25] (3.25,3.63] (3.63,4.33]

         48          39          40          43          42

> apply(table(frogs$pres.abs,test.cuts2),2,sum)-np->fail

> table(frogs$pres.abs,test.cuts2)->Oi

 

> rbind(fail,np)->Ei

> Ei

     [2.03,2.5] (2.5,2.87] (2.87,3.25] (3.25,3.63] (3.63,4.33]

fail  43.882308   28.83865    22.38014    18.30259    19.59631

np     4.117692   10.16135    17.61986    24.69741    22.40369

> Oi

   test.cuts2

    [2.03,2.5] (2.5,2.87] (2.87,3.25] (3.25,3.63] (3.63,4.33]

  0         42         32          23          14          22

  1          6          7          17          29          20

> sum((Oi-Ei)^2/Ei)

[1] 4.624012

> 1-pchisq(sum((Oi-Ei)^2/Ei),df=5-3)

[1] 0.09906236

 

 

#was the quadratic term necessary?

 

#I go back and change the model

 

>  tapply(fitted(out0),test.cuts2,sum)->np2

> apply(table(frogs$pres.abs,test.cuts2),2,sum)-np2->fail2

> rbind(fail2,np2)->Ei2

> Ei2

      [2.03,2.5] (2.5,2.87] (2.87,3.25] (3.25,3.63] (3.63,4.33]

fail2  39.278011   28.83784    26.30426    23.55765    15.02225

np2     8.721989   10.16216    13.69574    19.44235    26.97775

> sum((Oi-Ei2)^2/Ei2)

[1] 17.20310

> 1-pchisq(sum((Oi-Ei2)^2/Ei2),df=3)

[1] 0.0006419176

 

#One problem: with the more than one predictor, this method becomes hopeless

 

#Hosmer-Lemeshow Test

 

#let cut decide

> p.groups<-cut(fitted(out0b),breaks=10)

> table(p.groups)

p.groups

(0.018,0.0769] (0.0769,0.136]  (0.136,0.195]  (0.195,0.254]  (0.254,0.313]  (0.313,0.372]   (0.372,0.43]

            19             29              8              6             13             14             18

  (0.43,0.489]  (0.489,0.548]  (0.548,0.607]

            17             45             43

 

#pick your own

> range(fitted(out0b))

[1] 0.01860714 0.60661375

> p.groups2<-cut(fitted(out0b),seq(0,0.66,.066))

> table(p.groups2)

p.groups2

    (0,0.066] (0.066,0.132] (0.132,0.198] (0.198,0.264]  (0.264,0.33]  (0.33,0.396] (0.396,0.462]

           18            24            14            10            13            22            11

(0.462,0.528] (0.528,0.594]  (0.594,0.66]

           33            45            22

 

#use quantiles

> p.groups3<-cut(fitted(out0b),quantile(fitted(out0b),seq(0,1,.1)),include.lowest=TRUE)

> table(p.groups3)

p.groups3

[0.0186,0.0956]  (0.0956,0.136]   (0.136,0.259]   (0.259,0.338]   (0.338,0.414]   (0.414,0.498]

             29              19              18              21              20              27

  (0.498,0.533]   (0.533,0.552]   (0.552,0.595]   (0.595,0.607]

             17              18              21              22

 

 

> table(p.groups3)

p.groups3

[0.0186,0.0956]  (0.0956,0.136]   (0.136,0.259]   (0.259,0.338]   (0.338,0.414]   (0.414,0.498]

             29              19              18              21              20              27

  (0.498,0.533]   (0.533,0.552]   (0.552,0.595]   (0.595,0.607]

             17              18              21              22

 

#obtain observed counts in logit deciles

 

 

> table(frogs$pres.abs,p.groups3)

   p.groups3

    [0.0186,0.0956] (0.0956,0.136] (0.136,0.259] (0.259,0.338] (0.338,0.414] (0.414,0.498] (0.498,0.533]

  0              27             15            15            17            12            17            10

  1               2              4             3             4             8            10             7

   p.groups3

    (0.533,0.552] (0.552,0.595] (0.595,0.607]

  0             7             6             7

  1            11            15            15

 

> table(frogs$pres.abs,p.groups3)->Oi

 

#obtain expected successes in each category

 

> tapply(fitted(out0b),p.groups3,sum)

[0.0186,0.0956]  (0.0956,0.136]   (0.136,0.259]   (0.259,0.338]   (0.338,0.414]   (0.414,0.498]

       1.794265        2.323426        3.621496        6.539852        7.903507       12.886003

  (0.498,0.533]   (0.533,0.552]   (0.552,0.595]   (0.595,0.607]

       8.829664        9.750304       12.098450       13.253032

 

#obtain total counts in each category

> apply(Oi,2,sum)->ni

> ni

[0.0186,0.0956]  (0.0956,0.136]   (0.136,0.259]   (0.259,0.338]   (0.338,0.414]   (0.414,0.498]

             29              19              18              21              20              27

  (0.498,0.533]   (0.533,0.552]   (0.552,0.595]   (0.595,0.607]

             17              18              21              22

 

 

#obtain expected failures and successes

> rbind(ni-tapply(fitted(out0b),p.groups3,sum),tapply(fitted(out0b),p.groups3,sum))->Ei

#carry out H-L Test

> sum((Oi-Ei)^2/Ei)

[1] 7.567172

 

#df is g-2 always, where g is the number of deciles

> 1-pchisq(sum((Oi-Ei)^2/Ei),df=8)

[1] 0.4768487

 

"The H-L test is largely obsolete, having been replaced by specific directed

tests of lack of fit and by the new test described in"

 

#an alternative test 

> library(Design)

 

> meanmin2<-frogs$meanmin^2

> out.harrell<-lrm(pres.abs~meanmin+meanmin2,data=frogs,x=TRUE,y=TRUE)

> residuals.lrm(out.harrell,type='gof')

 

Sum of squared errors     Expected value|H0                    SD                     Z                     P

           41.2834597            41.6627336             0.3486647            -1.0877900             0.2766878

>