#Sta678 Handout #5: Cox proportional hazards model library(survival) pbc<-matrix(scan('http://www-rohan.sdsu.edu/~jjfan/sta678/pbcS.txt'), byrow=T,ncol=20) dim(pbc) #[1] 418 20 case<-pbc[,1] time<-pbc[,2] status<-pbc[,3] delta<-(status==2) trt<-pbc[,4] ## important covariates age<-pbc[,5] edema<-pbc[,10] bili<-pbc[,11] albumin<-pbc[,13] protime<-pbc[,19] hist(bili) hist(protime) hist(albumin) ################################# # Cox PH model and interpretation ################################# ageyr<-age/365.25 fit<-coxph(Surv(time,delta)~ageyr+edema+log(bili)+log(protime)+log(albumin)) # This is using all 418 patients. fit coef exp(coef) se(coef) z p ageyr 0.0396 1.0404 0.00767 5.16 2.4e-07 edema 0.8963 2.4505 0.27141 3.30 9.6e-04 log(bili) 0.8636 2.3716 0.08294 10.41 0.0e+00 log(protime) 2.3868 10.8791 0.76851 3.11 1.9e-03 log(albumin) -2.5069 0.0815 0.65292 -3.84 1.2e-04 Likelihood ratio test=231 on 5 df, p=0 n=416 (2 observations deleted due to missingness) ##interpret the parameter estimate for bili # Each 1 point increase in log(bilirubin) is associated with a 2.4-fold # increase in the risk of death from PBC, holding other covariates constant. ## According to this model, what is the relative risk for death comparing # a patient who was 43 years old at baseline, had a serum bilirubin level of # 0.8 gm/dl and had moderate edema to a patient who was 58 years old, had a # bili level of 3.4 gm/dl and had no edema, assuming the same baseline # values of prothrombin time and albumin? Does this relative risk change # over time? > exp(0.0396*(43-58)+0.8963*.5+0.8636*(log(.8)-log(3.4))) [1] 0.2477316 # The first patient's risk is about 1/4 of the risk of the second patient. ####################################################### # Wald, score and likelihood ratio tests for all beta's # i.e., H0: beta_1=...=beta_p=0 ####################################################### help('coxph.object') names(fit) ## var of beta estimates > fit$var [,1] [,2] [,3] [,4] [,5] [1,] 5.877855e-05 -0.0001016486 8.417268e-05 0.0002528214 0.0003681444 [2,] -1.016486e-04 0.0736633499 -3.146007e-03 -0.0403431161 0.0483906174 [3,] 8.417268e-05 -0.0031460069 6.879204e-03 -0.0054421872 0.0068123926 [4,] 2.528214e-04 -0.0403431161 -5.442187e-03 0.5906064877 0.0245296546 [5,] 3.681444e-04 0.0483906174 6.812393e-03 0.0245296546 0.4262988318 > round(sqrt(diag(fit$var)),5) [1] 0.00767 0.27141 0.08294 0.76851 0.65292 #se(coef) > fit$loglik [1] -866.9573 -751.4697 ## log likelihoods for null model and current model. Very useful for ## comparing nested models. > fit$score [1] 301.8424 > fit$wald.test [1] 234.1453 ## LRT=231 # all three tests are chi.square with 5 df, similar ## also see summary(fit) ###########confidence intervals can be calcuated based on wald test ################################################ # testing for individual beta, e.g. H0: beta_3=0 ################################################ #wald test: Z=10.41, so chisq=10.41^2 > 10.41^2 [1] 108.3681 # LRT: refit the model without log(bili) fit3<-coxph(Surv(time,delta)~ageyr+edema+log(protime)+log(albumin)) > fit3 coef exp(coef) se(coef) z p ageyr 0.0287 1.0291 0.00779 3.68 2.3e-04 edema 1.3304 3.7824 0.27259 4.88 1.1e-06 log(protime) 3.0735 21.6176 0.68568 4.48 7.4e-06 log(albumin) -3.4911 0.0305 0.59977 -5.82 5.9e-09 Likelihood ratio test=127 on 4 df, p=0 n=416 (2 observations deleted due to missing) > 231-127 [1] 104 # OR > fit$loglik [1] -866.9573 -751.4697 > fit3$loglik [1] -866.9573 -803.4200 > 2*(803.4200-751.4697) [1] 103.9006 ## Caution: ## Make sure the the full and reduced model fits have the same sample size ## 2 missing values in both full and reduced models in our case. # score test (Getting the score test is not as direct and it is rarely needed # in practice. You may skip this one.) fit4<-coxph(Surv(time,delta)~ageyr+edema+log(bili)+log(protime)+log(albumin), init=c(coef(fit)[1:2],0,coef(fit)[4:5]),iter=0) > fit4$score [1] 132.4735