#logistic regression #An Introduction to Generalized linear models (3rd ed.), Dobson and Barnett) #section 7.8 sen<-read.table('A:/senility.txt',header=T) > names(sen) [1] "WAIS" "senility" attach(sen) fit3<-glm(senility~WAIS, family=binomial) plot(predict(fit3), resid(fit3,type='pearson'), main="Residual plot for the logistic model") abline(0,0) -- this is common with continuous covariates ## use grouped data x<-unique(sort(WAIS)) length(x) #17 y<-NULL m<-NULL for (i in 1:17) { y<-c(y,sum(senility[WAIS==x[i]])) m<-c(m, sum(WAIS==x[i])) } cbind(x,y,m) fit4<-glm(cbind(y,m-y)~x,family=binomial) summary(fit4) #Residual deviance: 9.419 sum(resid(fit4,type='pearson')^2) #8.083029 (check p141) > qchisq(.95,df=15) [1] 24.99579 ## compare residual deviance and sum of squred pearson residuals to ## chisq with n-p degrees of freedom, where n=number diff x value combinations ## p=number of parameters (number of covariates +1) pihat<-round(predict(fit4,type='response'),3) p.res<-round(resid(fit4,type='pearson'),3) d.res<-round(resid(fit4,type='deviance'),3) cbind(x,y,m,pihat,p.res,d.res) > cbind(x,y,m,pihat,p.res,d.res) x y m pihat p.res d.res 1 4 1 2 0.752 -0.826 -0.766 2 5 1 1 0.687 0.675 0.866 3 6 1 2 0.614 -0.330 -0.326 4 7 2 3 0.535 0.458 0.464 5 8 2 2 0.454 1.551 1.777 6 9 2 6 0.376 -0.214 -0.216 ----------------------------- 7 10 1 6 0.303 -0.728 -0.771 8 11 1 6 0.240 -0.419 -0.436 9 12 0 2 0.186 -0.675 -0.906 10 13 1 6 0.142 0.176 0.172 ----------------------------- 11 14 2 7 0.107 1.535 1.306 12 15 0 3 0.080 -0.509 -0.705 13 16 0 4 0.059 -0.500 -0.696 14 17 0 1 0.043 -0.213 -0.297 15 18 0 1 0.032 -0.181 -0.254 16 19 0 1 0.023 -0.154 -0.216 17 20 0 1 0.017 -0.131 -0.184 #expected frequencies sum(m[1:6]*pihat[1:6]) # 8.188 sum(m[7:10]*pihat[7:10]) # 4.482 sum(m[11:17]*pihat[11:17]) # 1.34 #Hosmer-Lemeshow test: sum( (o-e)^2/e ) #1.15 compared to chisq(g-2) or chisq(1) #fit is OK