#================================== #An assessment of the utility of a Bayesian framework to improve response propensity models in longitudinal data. #R codes for Waves 2 and 3 (waves 4 and 5 codes are replicated from wave 3) #================================================================================== rm(list=ls()) library(foreign) require(sp) require(INLA) inla.setOption(enable.inla.argument.weights=TRUE) require(fmsb) # for Negelkerke pseudo_R2 require(pscl) # for McFadden pseudo-R2 library(pROC) # for ROC library(colorspace) require(nnet) #multinomial logistic require(reshape) require(caret) require(epiR) #=============================================================================== setwd("") list.files() wave1_sub1<- read.csv("wave1_sub1.csv"); names(wave1_sub1);length(wave1_sub1$pidp) wave1_sub1$sub_sample<-rep(1.1,times=length(wave1_sub1$pidp)) wave1_sub2<- read.csv("wave1_sub2.csv") wave1_sub2$sub_sample<-rep(1.2,times=length(wave1_sub2$pidp)) wave3_sub1<- read.csv("wave3_sub1.csv") wave3_sub1$sub_sample<-rep(2.1,times=length(wave3_sub1$pidp)) wave3_sub2<- read.csv("wave3_sub2.csv") wave3_sub2$sub_sample<-rep(2.2,times=length(wave3_sub2$pidp)) wave4_sub1<- read.csv("wave4_sub1.csv") wave4_sub1$sub_sample<-rep(3.1,times=length(wave4_sub1$pidp)) wave4_sub2<- read.csv("wave4_sub2.csv") wave4_sub2$sub_sample<-rep(3.2,times=length(wave4_sub2$pidp)) wave5_sub1<- read.csv("wave5_sub1.csv") wave5_sub1$sub_sample<-rep(4.1,times=length(wave5_sub1$pidp)) wave5_sub2<- read.csv("wave5_sub2.csv") wave5_sub2$sub_sample<-rep(4.2,times=length(wave5_sub2$pidp)) #combining data files data_complete1<-rbind(wave1_sub1,wave1_sub2) data_complete2<-rbind(data_complete1,wave3_sub1) data_complete3<-rbind(data_complete2,wave3_sub2) data_complete4<-rbind(data_complete3,wave4_sub1) data_complete5<-rbind(data_complete4,wave4_sub2) data_complete6<-rbind(data_complete5,wave5_sub1) data_complete7<-rbind(data_complete6,wave5_sub2) data_complete<-data_complete7;names(data_complete);summary(data_complete) length(data_complete$pidp) #==================================================================================================================================== wave2_data<-subset(wave_data5,wave==2);length(wave2_data$pidp) summary(wave2_data$pw_gor_dv) summary(wave2_data$cw_call_interview_final) # Call Outcome interview_yes<-subset(wave2_data,cw_call_interview_final=="at least one interview" ) #at least one interview in a sequence interview_no<-subset(wave2_data,cw_call_interview_final=="no interview") #subset of long sequence (nrow(interview_yes)/nrow(wave2_data))*100 (nrow(interview_no)/nrow(wave2_data))*100 #call length summary(wave2_data$cw_calllengthCat) sequence_short<-subset(wave2_data,cw_calllengthCat=="short sequence (1-6)") sequence_long<-subset(wave2_data,cw_calllengthCat=="long sequence (7-30)") (nrow(sequence_short)/nrow(wave2_data))*100 (nrow(sequence_long)/nrow(wave2_data))*100 #==================================================================================================================================== #Call Outcome #==================================================================================================================================== wave2_univariate<-subset(wave_data5,wave==1);length(wave2_univariate$pidp) wave2_univariate$cw_inter_relevel<-relevel(wave2_univariate$cw_call_interview_final, ref="no interview") #Relevel contrasts contrasts(wave2_univariate$cw_inter_relevel) length(wave2_univariate$cw_inter_relevel) wave2_univariate$resp_interview<- as.integer(ordered(wave2_univariate$cw_inter_relevel,levels = c("no interview","at least one interview" ),labels = c(0,1))) wave2_univariate$inter_respon<-wave2_univariate$resp_interview-1;summary(wave2_univariate$inter_respon) names(wave2_univariate) wave2_univariate$cw_seq_relevel<-relevel(wave2_univariate$cw_calllengthCat, ref="long sequence (7-30)") #Relevel contrasts contrasts(wave2_univariate$cw_seq_relevel) length(wave2_univariate$cw_seq_relevel) wave2_univariate$resp_seq<- as.integer(ordered(wave2_univariate$cw_seq_relevel,levels = c("long sequence (7-30)","short sequence (1-6)" ),labels = c(0,1))) wave2_univariate$resp_seq<-wave2_univariate$resp_seq-1 summary(wave2_univariate$resp_seq) #Final models for final call outcome model_outcome3<- inla(inter_respon ~pw_gor_dv+pw_urban+pw_month2+pw_pensionage+pw_employed+pw_hiqual+pw_income_quartile+pw_tenure_dv+pw_dweltyp3+ pw_children+pw_number_cars+pw_propaothstgr+pw_propcontactgr+pw_propappgr+pw_propnoncontactgr+pw_calllength+pw_hhsizeCat, family='binomial',Ntrials = 1, control.compute = list(dic = TRUE, waic = TRUE),data=wave2_univariate,control.family=list(link='logit'),control.predictor = list(link = 1)) summary(model_outcome3) model_glm_call5<- glm(cw_inter_relevel ~pw_gor_dv+pw_urban+pw_month2+pw_pensionage+pw_employed+pw_hiqual+pw_income_quartile+pw_tenure_dv+pw_dweltyp3+ pw_children+pw_number_cars+pw_propaothstgr+pw_propcontactgr+pw_propappgr+pw_propnoncontactgr+pw_calllength+pw_hhsizeCat, family = binomial(link='logit'),data=wave2_univariate) summary(model_glm_call5) NagelkerkeR2(model_glm_call5) summary(as.factor(wave_data5$wave)) #======================================================================================================================= #Wave 2 data #======================================================================================================================= wave2_data<-subset(wave_data5,wave==4);length(wave2_data$pidp) summary(wave2_data$pw_gor_dv) summary(wave2_data$cw_call_interview_final) #weight<-rep(c(0.8),c(12225)) #wave2_data$weight<-weight # Call Outcome interview_yes<-subset(wave2_data,cw_call_interview_final=="at least one interview" ) #at least one interview in a sequence interview_no<-subset(wave2_data,cw_call_interview_final=="no interview") #subset of long sequence (nrow(interview_yes)/nrow(wave2_data))*100 (nrow(interview_no)/nrow(wave2_data))*100 wave2_data$cw_inter_relevel<-relevel(wave2_data$cw_call_interview_final, ref="no interview") #Relevel contrasts contrasts(wave2_data$cw_inter_relevel) length(wave2_data$cw_inter_relevel) wave2_data$resp_interview<- as.integer(ordered(wave2_data$cw_inter_relevel,levels = c("no interview","at least one interview" ),labels = c(0,1))) wave2_data$inter_respon<-wave2_data$resp_interview-1;summary(wave2_data$inter_respon) names(wave2_data) #============================================================================================================================= # Maximum likelihood #============================================================================================================================== wave2_data<-subset(wave_data5,sub_sample==1.1);length(wave2_data$pidp) summary(wave2_data$pw_gor_dv) summary(wave2_data$cw_call_interview_final) weight<-rep(c(0.8),c(12225)) wave2_data$weight<-weight # Call Outcome interview_yes<-subset(wave2_data,cw_call_interview_final=="at least one interview" ) #at least one interview in a sequence interview_no<-subset(wave2_data,cw_call_interview_final=="no interview") #subset of long sequence (nrow(interview_yes)/nrow(wave2_data))*100 (nrow(interview_no)/nrow(wave2_data))*100 wave2_data$cw_inter_relevel<-relevel(wave2_data$cw_call_interview_final, ref="no interview") #Relevel contrasts contrasts(wave2_data$cw_inter_relevel) length(wave2_data$cw_inter_relevel) wave2_data$resp_interview<- as.integer(ordered(wave2_data$cw_inter_relevel,levels = c("no interview","at least one interview" ),labels = c(0,1))) wave2_data$inter_respon<-wave2_data$resp_interview-1;summary(wave2_data$inter_respon) names(wave2_data) #call length summary(wave2_data$cw_calllengthCat) sequence_short<-subset(wave2_data,cw_calllengthCat=="short sequence (1-6)") sequence_long<-subset(wave2_data,cw_calllengthCat=="long sequence (7-30)") (nrow(sequence_short)/nrow(wave2_data))*100 (nrow(sequence_long)/nrow(wave2_data))*100 names(wave2_data) wave2_datab<-wave2_data wave2_sample<-subset(wave_data5,sub_sample==1.2);length(wave2_sample$pidp) #Out of Sample predicted probabilities weight<-rep(c(0.8),c(12225)) wave2_sample$weight<-weight #Out of Sample predicted probabilities weight<-rep(c(0.8),c(12225)) wave2_sample$weight<-weight # Call Outcome interview_yes<-subset(wave2_sample,cw_call_interview_final=="at least one interview" ) #at least one interview in a sequence interview_no<-subset(wave2_sample,cw_call_interview_final=="no interview") #subset of long sequence (nrow(interview_yes)/nrow(wave2_sample))*100 (nrow(interview_no)/nrow(wave2_sample))*100 #call length summary(wave2_data$cw_calllengthCat) sequence_short<-subset(wave2_sample,cw_calllengthCat=="short sequence (1-6)") sequence_long<-subset(wave2_sample,cw_calllengthCat=="long sequence (7-30)") (nrow(sequence_short)/nrow(wave2_sample))*100 (nrow(sequence_long)/nrow(wave2_sample))*100 wave2_sample$cw_inter_relevel<-relevel(wave2_sample$cw_call_interview_final, ref="no interview") #Relevel contrasts wave2_sample$resp_interview<- as.integer(ordered(wave2_sample$cw_inter_relevel,levels = c("no interview","at least one interview" ),labels = c(0,1))) length(wave2_data$cw_inter_relevel) wave2_sampleb<-wave2_sample mod_glm1<-glm(cw_inter_relevel ~pw_gor_dv+pw_urban+pw_month2+pw_pensionage+pw_employed+pw_hiqual+pw_income_quartile+pw_tenure_dv+pw_dweltyp3+ pw_children+pw_number_cars+pw_hhsizeCat+pw_propaothstgr+pw_propcontactgr+pw_propappgr+pw_propnoncontactgr+pw_calllength, family = binomial(link='logit'),data=wave2_data) summary(mod_glm1) # Nagelkerke R squared NagelkerkeR2(mod_glm1) # Pseudo R squared Measures pR2(mod_glm1) pred_mod_glm1<-predict(mod_glm1,newdata=wave2_sample,type="response") ;length(pred_mod_glm1) # A default threshold of 0.5 is chosen for binary reponse thresh <- 0.5 pred_probsCat2 <- cut(pred_mod_glm1, breaks=c(-Inf, thresh, Inf), labels=c("no interview","at least one interview")) # contingency table and marginal sums bTab <- table(wave2_sample$cw_inter_relevel, pred_probsCat2) addmargins(bTab) # percentage correct for training wave2_data sum(diag(bTab)) / sum(bTab) #Sensitivity, Specificity wave2_table1<- as.table(matrix(c(81,2672,74,9398), nrow = 2, byrow = TRUE)) epi.tests(wave2_table1) #ROC Curve wave2_sample$pred_mod_glm2<-pred_mod_glm1 # Smoothed ROC curve predlen_smooth<-roc(wave2_sample$cw_inter_relevel, wave2_sample$pred_mod_glm2,smooth=TRUE,ci=TRUE) plot(predlen_smooth, main="Frequentist ROC curve",col="blue") text(1.0, 0.8, paste("AUC =",0.643),pos=4) #============================================================================================================================== #uninformative #============================================================================================================================== wave2_datab$inter_respon<-wave2_datab$resp_interview-1;summary(wave2_datab$inter_respon) wave2_sampleb$inter_respon<-wave2_sampleb$resp_interview-1;summary(wave2_sampleb$inter_respon) wave2_datab$inter_respon2<-wave2_datab$inter_respon wave2_sampleb$inter_respon2<-wave2_sampleb$inter_respon wave2_sampleb$inter_respon2<-NA names(wave2_datab) names(wave2_sampleb) wave2_samp_data<-rbind(wave2_datab,wave2_sampleb) model1_u<- inla(inter_respon2~pw_gor_dv+pw_urban+pw_month2+pw_pensionage+pw_employed+pw_hiqual+pw_income_quartile+pw_tenure_dv+pw_dweltyp3+ pw_children+pw_number_cars+pw_hhsizeCat+pw_propaothstgr+pw_propcontactgr+pw_propappgr+pw_propnoncontactgr+pw_calllength, family='binomial',Ntrials = 1, control.compute = list(dic = TRUE, waic = TRUE),data=wave2_samp_data,control.family=list(link='logit'),control.predictor = list(link = 1)) summary(model1_u) #plot(model1_u, plot.prior=TRUE) linPred_u<-model1_u$summary.linear.predictor$mean prob_u<-exp(linPred_u)/(1+exp(linPred_u)) # choose a threshold for dichotomizing according to predicted probability thresh <- 0.5 prob_cat_u <- cut(prob_u, breaks=c(-Inf, thresh, Inf), labels=c("no interview","at least one interview")) prob_cat_u2 <- cut(prob_u, breaks=c(-Inf, thresh, Inf), labels=c(0,1)) wave2_samp_data$prob_cat_u2 <-prob_cat_u2 pred_names<-c("inter_respon","inter_respon2","prob_cat_u2") bayes2_pred<-wave2_samp_data[pred_names] names(bayes2_pred);head(bayes2_pred);tail(bayes2_pred) bayes2_pred_data<-subset(bayes2_pred, is.na(bayes2_pred$inter_respon2)) names(bayes2_pred_data);head(bayes2_pred_data);tail(bayes2_pred_data) # contingency table and marginal sums bayespred2_cTabB <- table(bayes2_pred_data$inter_respon ,bayes2_pred_data$prob_cat_u2) addmargins(bayespred2_cTabB) # Classification Table sum(diag(bayespred2_cTabB)) / sum(bayespred2_cTabB) wave2_table2<- as.table(matrix(c(81,2672,73,9399), nrow = 2, byrow = TRUE)) epi.tests(wave2_table2) #ROC Curve bayes2_pred$prob_u2<-prob_u bayes2_pred2<-subset(bayes2_pred, is.na(bayes2_pred$inter_respon2)) #Smoothed ROC curve bayes2_pred_model1smooth<-roc(bayes2_pred2$inter_respon ,bayes2_pred2$prob_u2, percent=TRUE,smooth=TRUE) plot(bayes2_pred_model1smooth) #========================================================================================================================================== #Wave 3 #========================================================================================================================================== wave3_data<-subset(wave_data5,sub_sample==1.1|sub_sample==2.1); length(wave3_data[,1]) summary(wave3_data$cw_call_interview_final) # Call Outcome interview_yes<-subset(wave3_data,cw_call_interview_final=="at least one interview") #at least one interview in a sequence interview_no<-subset(wave3_data,cw_call_interview_final=="no interview") #subset of long sequence (nrow(interview_yes)/nrow(wave3_data))*100 (nrow(interview_no)/nrow(wave3_data))*100 #call length summary(wave3_data$cw_calllengthCat) sequence_short<-subset(wave3_data,cw_calllengthCat=="short sequence (1-6)") sequence_long<-subset(wave3_data,cw_calllengthCat=="long sequence (7-30)") (nrow(sequence_short)/nrow(wave3_data))*100 (nrow(sequence_long)/nrow(wave3_data))*100 #============================================================================================================================= # Maximum likelihood (model 1) #============================================================================================================================== wave3_data$cw_inter_relevel<-relevel(wave3_data$cw_call_interview_final, ref="no interview") #Relevel contrasts contrasts(wave3_data$cw_inter_relevel) length(wave3_data$cw_inter_relevel) wave3_data$resp_interview<- as.integer(ordered(wave3_data$cw_inter_relevel,levels = c("no interview","at least one interview"),labels = c(0,1))) wave3_datab<-wave3_data wave3_datainf1<-wave3_data wave3_sample<-subset(wave_data5,sub_sample==2.2);length(wave3_sample$pidp) #Out of Sample predicted probabilities # Call Outcome interview_yes<-subset(wave3_sample,cw_call_interview_final=="at least one interview" ) #at least one interview in a sequence interview_no<-subset(wave3_sample,cw_call_interview_final=="no interview") #subset of long sequence (nrow(interview_yes)/nrow(wave3_sample))*100 (nrow(interview_no)/nrow(wave3_sample))*100 #call length summary(wave3_data$cw_calllengthCat) sequence_short<-subset(wave3_sample,cw_calllengthCat=="short sequence (1-6)") sequence_long<-subset(wave3_sample,cw_calllengthCat=="long sequence (7-30)") (nrow(sequence_short)/nrow(wave3_sample))*100 (nrow(sequence_long)/nrow(wave3_sample))*100 wave3_sample$cw_inter_relevel<-relevel(wave3_sample$cw_call_interview_final, ref="no interview") #Relevel contrasts wave3_sample$resp_interview<- as.integer(ordered(wave3_sample$cw_inter_relevel,levels = c("no interview","at least one interview"),labels = c(0,1))) wave3_sampleb<-wave3_sample wave3_sampleinf1<-wave3_sample mod_glm3<-glm(cw_inter_relevel ~pw_gor_dv+pw_urban+pw_month2+pw_pensionage+pw_employed+pw_hiqual+pw_income_quartile+pw_tenure_dv+pw_dweltyp3+ pw_children+pw_number_cars+pw_hhsizeCat+pw_propaothstgr+pw_propcontactgr+pw_propappgr+pw_propnoncontactgr+pw_calllength, family = binomial(link='logit'),data=wave3_data) summary(mod_glm3) # Nagelkerke R squared NagelkerkeR2(mod_glm3) # Pseudo R squared Measures pR2(mod_glm3) #predicted probabilities pred_mod_glm3<-predict(mod_glm3,newdata=wave3_sample,type="response") summary(pred_mod_glm3) # A default threshold of 0.5 is chosen for binary reponse thresh <- 0.5 pred_probsCat3 <- cut(pred_mod_glm3, breaks=c(-Inf, thresh, Inf), labels=c("no interview","at least one interview")) # contingency table and marginal sums bTab3 <- table(wave3_sample$cw_inter_relevel, pred_probsCat3) addmargins(bTab3) # percentage correct for training wave3_data sum(diag(bTab3)) / sum(bTab3) wave3_table1<- as.table(matrix(c(13,1738,25,7811), nrow = 2, byrow = TRUE)) epi.tests(wave3_table1) #ROC Curve wave3_sample$pred_mod_glm3<-pred_mod_glm3 # Smoothed ROC curve predlen_smooth3<-roc(wave3_sample$cw_inter_relevel, wave3_sample$pred_mod_glm3, smooth=TRUE) plot(predlen_smooth3, main="Frequentist ROC curve",col="blue") text(1.0, 0.8, paste("AUC =",0.625),pos=4) # #============================================================================================================================== #uninformative (model 2) #============================================================================================================================== wave3_datab$inter_respon<-wave3_datab$resp_interview-1;summary(wave3_datab$inter_respon) wave3_datab$inter_respon2<-wave3_datab$inter_respon wave3_sampleb$inter_respon<-wave3_sampleb$resp_interview-1;summary(wave3_sampleb$inter_respon) wave3_sampleb$inter_respon2<-wave3_sampleb$inter_respon wave3_sampleb$inter_respon2<-NA wave3_samp_data<-rbind(wave3_datab,wave3_sampleb) wave3_samp_data1<-wave3_samp_data wave3_samp_data2<-wave3_samp_data wave3_samp_data3<-wave3_samp_data wave3_samp_data4<-wave3_samp_data wave3_samp_data5<-wave3_samp_data wave3_samp_data6<-wave3_samp_data wave3_samp_data7<-wave3_samp_data uninf_model3<- inla(inter_respon2 ~pw_gor_dv+pw_urban+pw_month2+pw_pensionage+pw_employed+pw_hiqual+pw_income_quartile+pw_tenure_dv+pw_dweltyp3+ pw_children+pw_number_cars+pw_hhsizeCat+pw_propaothstgr+pw_propcontactgr+pw_propappgr+pw_propnoncontactgr+pw_calllength, family='binomial',Ntrials = 1, control.compute = list(dic = TRUE, waic = TRUE),data=wave3_samp_data,control.family=list(link='logit'),control.predictor = list(link = 1)) summary(uninf_model3) #plot(uninf_model3, plot.prior=TRUE) linPred<-uninf_model3$summary.linear.predictor$mean prob3_unif<-exp(linPred)/(1+exp(linPred)) # choose a threshold for dichotomizing according to predicted probability thresh <- 0.5 prob3_unif_cat <- cut(prob3_unif, breaks=c(-Inf, thresh, Inf), labels=c("no interview","at least one interview")) prob3_unif_cat2 <- cut(prob3_unif, breaks=c(-Inf, thresh, Inf), labels=c(0,1)) wave3_samp_data$prob3_unif_cat2 <-prob3_unif_cat2 pred_names<-c("inter_respon","inter_respon2","prob3_unif_cat2") bayes3_pred<-wave3_samp_data[pred_names] names(bayes3_pred);head(bayes3_pred);tail(bayes3_pred) bayes3_pred_data<-subset(bayes3_pred, is.na(bayes3_pred$inter_respon2)) names(bayes3_pred_data);head(bayes3_pred_data);tail(bayes3_pred_data) # contingency table and marginal sums bayespred3_cTabB <- table(bayes3_pred_data$inter_respon ,bayes3_pred_data$prob3_unif_cat2) addmargins(bayespred3_cTabB) # Classification Table sum(diag(bayespred3_cTabB)) / sum(bayespred3_cTabB) wave3_table2<- as.table(matrix(c(13,1738,25,7811), nrow = 2, byrow = TRUE)) epi.tests(wave3_table2) #ROC Curve bayes3_pred$prob3_unif_cat3<-prob3_unif bayes3_pred3<-subset(bayes3_pred, is.na(bayes3_pred$inter_respon2));head(bayes3_pred3) # Smoothed ROC curve bayes3_pred_model3smooth<-roc(bayes3_pred3$inter_respon ,bayes3_pred3$prob3_unif_cat3,smooth=TRUE) plot(bayes3_pred_model3smooth, main="Uninformative prior ROC curve",col="blue") text(1.0, 0.8, paste("AUC =",0.624),pos=4) #=============================================================================================================================== #Wave 2 data for informative priors (model 3) #wave2_inforP<-subset(wave_data5,wave==1);length(wave2_inforP$pidp) wave2_inforP<-subset(wave_data5,sub_sample==1.1);length(wave2_inforP$pidp) wave2_inforP$cw_inter_relevel<-relevel(wave2_inforP$cw_call_interview_final, ref="no interview") #Relevel contrasts contrasts(wave2_inforP$cw_inter_relevel) length(wave2_inforP$cw_inter_relevel) wave2_inforP$resp_interview<- as.integer(ordered(wave2_inforP$cw_inter_relevel,levels = c("no interview","at least one interview" ),labels = c(0,1))) wave2_inforP$inter_respon<-wave2_inforP$resp_interview-1;summary(wave2_inforP$inter_respon) names(wave2_inforP) wave2_inforP$cw_seq_relevel<-relevel(wave2_inforP$cw_calllengthCat, ref="long sequence (7-30)") #Relevel contrasts contrasts(wave2_inforP$cw_seq_relevel) length(wave2_inforP$cw_seq_relevel) wave2_inforP$resp_seq<- as.integer(ordered(wave2_inforP$cw_seq_relevel,levels = c("long sequence (7-30)","short sequence (1-6)" ),labels = c(0,1))) wave2_inforP$resp_seq<-wave2_inforP$resp_seq-1 summary(wave2_inforP$resp_seq) #Extract coefficient estimates from historical data models wave2_model_inf<- inla(inter_respon ~pw_gor_dv+pw_urban+pw_month2+pw_pensionage+pw_employed+pw_hiqual+pw_income_quartile+pw_tenure_dv+pw_dweltyp3+ pw_children+pw_number_cars+pw_hhsizeCat+pw_propaothstgr+pw_propcontactgr+pw_propappgr+pw_propnoncontactgr+pw_calllength, family='binomial',Ntrials = 1, control.compute = list(dic = TRUE, waic = TRUE),data=wave2_inforP,control.family=list(link='logit')) summary(wave2_model_inf) #======================================================================================== #Informative (model 3) mean_estimate_0.1<-wave2_model_inf$summary.fixed$mean #extract mean fixed_mean_est_0.1<-mean_estimate_0.1[-1] #exclude the mean intercept sd_estimate_0.1<-wave2_model_inf$summary.fixed$sd #extract sd fixed_sd_estimate_0.1<-sd_estimate_0.1[-1] #exclude the sd intercept fixed_var_est_0.1<-fixed_sd_estimate_0.1^2 # variance of the fixed effects fixed_preci_est_0.1<-1/fixed_var_est_0.1 #precision of the fixed effects intercept_mean_est_0.1<-mean_estimate_0.1[1] intercept_var_estimate_0.1<-sd_estimate_0.1[1]^2 #sd for intercept intercept_preci_est_0.1<-1/intercept_var_estimate_0.1 #precision for estimate wave3_model3<- inla(inter_respon2 ~pw_gor_dv+pw_urban+pw_month2+pw_pensionage+pw_employed+pw_hiqual+pw_income_quartile+pw_tenure_dv+pw_dweltyp3+ pw_children+pw_number_cars+pw_hhsizeCat+pw_propaothstgr+pw_propcontactgr+pw_propappgr+pw_propnoncontactgr+pw_calllength, family='binomial',Ntrials = 1, control.fixed=list(mean=fixed_mean_est_0.1,prec=fixed_preci_est_0.1,mean.intercept=0,prec.intercept=0.001), control.compute = list(dic = TRUE, waic = TRUE),data=wave3_samp_data1,control.family=list(link='logit'),control.predictor = list(link = 1)) summary(wave3_model3) #plot(wave3_model3,plot.prior=T,col="red") wave3_model3_linPred<-wave3_model3$summary.linear.predictor$mean wave3_model3_prob<-exp(wave3_model3_linPred)/(1+exp(wave3_model3_linPred)) # choose a threshold for dichotomizing according to predicted probability thresh <- 0.5 prob_wave3_model3_cata <- cut(wave3_model3_prob, breaks=c(-Inf, thresh, Inf), labels=c("no interview","at least one interview")) prob_wave3_model3_cata2 <- cut(wave3_model3_prob, breaks=c(-Inf, thresh, Inf), labels=c(0,1)) wave3_samp_data1$prob_wave3_model3_cata2 <-prob_wave3_model3_cata2 pred_names<-c("inter_respon","inter_respon2","prob_wave3_model3_cata2") wave3_model3_pred<-wave3_samp_data1[pred_names] names(wave3_model3_pred);head(wave3_model3_pred);tail(wave3_model3_pred) wave3_model3_pred_data<-subset(wave3_model3_pred, is.na(wave3_model3_pred$inter_respon2)) names(wave3_model3_pred_data);head(wave3_model3_pred_data);tail(wave3_model3_pred_data) # contingency table and marginal sums wave3_model3_cTabB <- table(wave3_model3_pred_data$inter_respon ,wave3_model3_pred_data$prob_wave3_model3_cata2) addmargins(wave3_model3_cTabB) # Classification Table sum(diag(wave3_model3_cTabB)) / sum(wave3_model3_cTabB) wave3_model3_table2<- as.table(matrix(c(2,1749,2,7834), nrow = 2, byrow = TRUE)) epi.tests(wave3_model3_table2) #ROC Curve wave3_model3_pred$wave3_model3_prob2<-wave3_model3_prob wave3_model3_predf<-subset(wave3_model3_pred, is.na(wave3_model3_pred$inter_respon2));head(wave3_model3_predf);tail(wave3_model3_predf) # Smoothed ROC curve wave3_pred_model3__smooth<-roc(wave3_model3_predf$inter_respon,wave3_model3_predf$wave3_model3_prob2,smooth=TRUE) plot(wave3_pred_model3__smooth,main="informative prior model 3 ROC curve",col="blue") text(1.0, 0.8, paste("AUC =",0.621),pos=4) #======================================================================================================================= #Informative (model 4) mean_estimate_2<-wave2_model_inf$summary.fixed$mean #extract mean fixed_mean_est_2<-mean_estimate_2[-1] #exclude the mean intercept sd_estimate_2<-wave2_model_inf$summary.fixed$sd #extract sd fixed_sd_estimate_2<-sd_estimate_2[-1] #exclude the sd intercept fixed_var_est_2<-fixed_sd_estimate_2^2 # variance of the fixed effects fixed_preci_est_2<-1/fixed_var_est_2 #precision of the fixed effects intercept_mean_est_2<-mean_estimate_2[1] intercept_var_estimate_2<-sd_estimate_2[1]^2 #sd for intercept intercept_preci_est_2<-1/intercept_var_estimate_2 #precision for estimate #Informative (model 3) wave3_model4<- inla(inter_respon2 ~pw_gor_dv+pw_urban+pw_month2+pw_pensionage+pw_employed+pw_hiqual+pw_income_quartile+pw_tenure_dv+pw_dweltyp3+ pw_children+pw_number_cars+pw_hhsizeCat+pw_propaothstgr+pw_propcontactgr+pw_propappgr+pw_propnoncontactgr+pw_calllength, family='binomial',Ntrials = 1, control.fixed=list(mean=fixed_mean_est_2,prec=fixed_preci_est_2,mean.intercept=0,prec.intercept=0.001), control.compute = list(dic = TRUE, waic = TRUE),data=wave3_samp_data1,control.family=list(link='logit'),control.predictor = list(link = 1)) summary(wave3_model4) #plot(wave3_model4,plot.prior=T,col="red") wave3_model4_linPred<-wave3_model4$summary.linear.predictor$mean wave3_model4_prob<-exp(wave3_model4_linPred)/(1+exp(wave3_model4_linPred)) # choose a threshold for dichotomizing according to predicted probability thresh <- 0.5 prob_wave3_model4_cata <- cut(wave3_model4_prob, breaks=c(-Inf, thresh, Inf), labels=c("no interview","at least one interview")) prob_wave3_model4_cata2 <- cut(wave3_model4_prob, breaks=c(-Inf, thresh, Inf), labels=c(0,1)) wave3_samp_data1$prob_wave3_model4_cata2 <-prob_wave3_model4_cata2 pred_names<-c("inter_respon","inter_respon2","prob_wave3_model4_cata2") wave3_model4_pred<-wave3_samp_data1[pred_names] names(wave3_model4_pred);head(wave3_model4_pred);tail(wave3_model4_pred) wave3_model4_pred_data<-subset(wave3_model4_pred, is.na(wave3_model4_pred$inter_respon2)) names(wave3_model4_pred_data);head(wave3_model4_pred_data);tail(wave3_model4_pred_data) # contingency table and marginal sums wave3_model4_cTabB <- table(wave3_model4_pred_data$inter_respon ,wave3_model4_pred_data$prob_wave3_model4_cata2) addmargins(wave3_model4_cTabB) # Classification Table sum(diag(wave3_model4_cTabB)) / sum(wave3_model4_cTabB) wave3_model4_table2<- as.table(matrix(c(2,1749,2,7834), nrow = 2, byrow = TRUE)) epi.tests(wave3_model4_table2) #ROC Curve wave3_model4_pred$wave3_model4_prob2<-wave3_model4_prob wave3_model4_predf<-subset(wave3_model4_pred, is.na(wave3_model4_pred$inter_respon2));head(wave3_model4_predf);tail(wave3_model4_predf) # Smoothed ROC curve wave3_pred_model4__smooth<-roc(wave3_model4_predf$inter_respon,wave3_model4_predf$wave3_model4_prob2,smooth=TRUE) plot(wave3_pred_model4__smooth,main="informative prior model 4 ROC curve",col="blue") #========================================================================================================= #informative (model 5) mean_estimate_5<-wave2_model_inf$summary.fixed$mean #extract mean fixed_mean_est_5<-mean_estimate_5[-1] #exclude the mean intercept sd_estimate_5<-wave2_model_inf$summary.fixed$sd #extract sd fixed_sd_estimate_5<-sd_estimate_5[-1] #exclude the sd intercept fixed_var_est_5<-fixed_sd_estimate_5^2 # variance of the fixed effects fixed_preci_est_5<-1/fixed_var_est_5 #precision of the fixed effects intercept_mean_est_5<-mean_estimate_5[1] intercept_var_estimate_5<-sd_estimate_5[1]^2 #sd for intercept intercept_preci_est_5<-1/intercept_var_estimate_5 #precision for estimate #Informative (model 3) wave3_model5<- inla(inter_respon2 ~pw_gor_dv+pw_urban+pw_month2+pw_pensionage+pw_employed+pw_hiqual+pw_income_quartile+pw_tenure_dv+pw_dweltyp3+ pw_children+pw_number_cars+pw_hhsizeCat+pw_propaothstgr+pw_propcontactgr+pw_propappgr+pw_propnoncontactgr+pw_calllength, family='binomial',Ntrials = 1, control.fixed=list(mean=fixed_mean_est_5,prec=fixed_preci_est_5,mean.intercept=0,prec.intercept=0.001), control.compute = list(dic = TRUE, waic = TRUE),data=wave3_samp_data1,control.family=list(link='logit'),control.predictor = list(link = 1)) summary(wave3_model5) #plot(wave3_model5,plot.prior=T,col="red") wave3_model5_linPred<-wave3_model5$summary.linear.predictor$mean wave3_model5_prob<-exp(wave3_model5_linPred)/(1+exp(wave3_model5_linPred)) # choose a threshold for dichotomizing according to predicted probability thresh <- 0.5 prob_wave3_model5_cata <- cut(wave3_model5_prob, breaks=c(-Inf, thresh, Inf), labels=c("no interview","at least one interview")) prob_wave3_model5_cata2 <- cut(wave3_model5_prob, breaks=c(-Inf, thresh, Inf), labels=c(0,1)) wave3_samp_data1$prob_wave3_model5_cata2 <-prob_wave3_model5_cata2 pred_names<-c("inter_respon","inter_respon2","prob_wave3_model5_cata2") wave3_model5_pred<-wave3_samp_data1[pred_names] names(wave3_model5_pred);head(wave3_model5_pred);tail(wave3_model5_pred) wave3_model5_pred_data<-subset(wave3_model5_pred, is.na(wave3_model5_pred$inter_respon2)) names(wave3_model5_pred_data);head(wave3_model5_pred_data);tail(wave3_model5_pred_data) # contingency table and marginal sums wave3_model5_cTabB <- table(wave3_model5_pred_data$inter_respon ,wave3_model5_pred_data$prob_wave3_model5_cata2) addmargins(wave3_model5_cTabB) # Classification Table sum(diag(wave3_model5_cTabB)) / sum(wave3_model5_cTabB) wave3_model5_table2<- as.table(matrix(c(2,1749,2,7834), nrow = 2, byrow = TRUE)) epi.tests(wave3_model5_table2) #ROC Curve wave3_model5_pred$wave3_model5_prob2<-wave3_model5_prob wave3_model5_predf<-subset(wave3_model5_pred, is.na(wave3_model5_pred$inter_respon2));head(wave3_model5_predf);tail(wave3_model5_predf) # Smoothed ROC curve wave3_pred_model5__smooth<-roc(wave3_model5_predf$inter_respon,wave3_model5_predf$wave3_model5_prob2,smooth=TRUE) plot(wave3_pred_model5__smooth,main="informative prior model 5 ROC curve",col="blue") #============================================================================================================================================== #Informative (model 6) mean_estimate_10<-wave2_model_inf$summary.fixed$mean #extract mean fixed_mean_est_10<-mean_estimate_10[-1] #exclude the mean intercept sd_estimate_10<-wave2_model_inf$summary.fixed$sd #extract sd fixed_sd_estimate_10<-sd_estimate_10[-1] #exclude the sd intercept fixed_var_est_10<-fixed_sd_estimate_10^2 # variance of the fixed effects fixed_preci_est_10<-1/fixed_var_est_10 #precision of the fixed effects intercept_mean_est_10<-mean_estimate_10[1] intercept_var_estimate_10<-sd_estimate_10[1]^2 #sd for intercept intercept_preci_est_10<-1/intercept_var_estimate_10 #precision for estimate #Informative (model 3) wave3_model6<- inla(inter_respon2 ~pw_gor_dv+pw_urban+pw_month2+pw_pensionage+pw_employed+pw_hiqual+pw_income_quartile+pw_tenure_dv+pw_dweltyp3+ pw_children+pw_number_cars+pw_hhsizeCat+pw_propaothstgr+pw_propcontactgr+pw_propappgr+pw_propnoncontactgr+pw_calllength, family='binomial',Ntrials = 1, control.fixed=list(mean=fixed_mean_est_10,prec=fixed_preci_est_10,mean.intercept=0,prec.intercept=0.001), control.compute = list(dic = TRUE, waic = TRUE),data=wave3_samp_data1,control.family=list(link='logit'),control.predictor = list(link = 1)) summary(wave3_model6) #plot(wave3_model6,plot.prior=T,col="red") wave3_model6_linPred<-wave3_model6$summary.linear.predictor$mean wave3_model6_prob<-exp(wave3_model6_linPred)/(1+exp(wave3_model6_linPred)) # choose a threshold for dichotomizing according to predicted probability thresh <- 0.5 prob_wave3_model6_cata <- cut(wave3_model6_prob, breaks=c(-Inf, thresh, Inf), labels=c("no interview","at least one interview")) prob_wave3_model6_cata2 <- cut(wave3_model6_prob, breaks=c(-Inf, thresh, Inf), labels=c(0,1)) wave3_samp_data1$prob_wave3_model6_cata2 <-prob_wave3_model6_cata2 pred_names<-c("inter_respon","inter_respon2","prob_wave3_model6_cata2") wave3_model6_pred<-wave3_samp_data1[pred_names] names(wave3_model6_pred);head(wave3_model6_pred);tail(wave3_model6_pred) wave3_model6_pred_data<-subset(wave3_model6_pred, is.na(wave3_model6_pred$inter_respon2)) names(wave3_model6_pred_data);head(wave3_model6_pred_data);tail(wave3_model6_pred_data) # contingency table and marginal sums wave3_model6_cTabB <- table(wave3_model6_pred_data$inter_respon ,wave3_model6_pred_data$prob_wave3_model6_cata2) addmargins(wave3_model6_cTabB) # Classification Table sum(diag(wave3_model6_cTabB)) / sum(wave3_model6_cTabB) wave3_model6_table2<- as.table(matrix(c(2,1749,2,7834), nrow = 2, byrow = TRUE)) epi.tests(wave3_model6_table2) #ROC Curve wave3_model6_pred$wave3_model6_prob2<-wave3_model6_prob wave3_model6_predf<-subset(wave3_model6_pred, is.na(wave3_model6_pred$inter_respon2));head(wave3_model6_predf);tail(wave3_model6_predf) # Smoothed ROC curve wave3_pred_model6__smooth<-roc(wave3_model6_predf$inter_respon,wave3_model6_predf$wave3_model6_prob2,smooth=TRUE) plot(wave3_pred_model6__smooth,main="informative prior model 6 ROC curve",col="blue") #========================================================================================================================== #Informative (model 7) mean_estimate_100<-wave2_model_inf$summary.fixed$mean #extract mean fixed_mean_est_100<-mean_estimate_100[-1] #exclude the mean intercept sd_estimate_100<-wave2_model_inf$summary.fixed$sd #extract sd fixed_sd_estimate_100<-sd_estimate_100[-1] #exclude the sd intercept fixed_var_est_100<-fixed_sd_estimate_100^2 # variance of the fixed effects fixed_preci_est_100<-1/fixed_var_est_100 #precision of the fixed effects intercept_mean_est_100<-mean_estimate_100[1] intercept_var_estimate_100<-sd_estimate_100[1]^2 #sd for intercept intercept_preci_est_100<-1/intercept_var_estimate_100 #precision for estimate #Informative (model 3) wave3_model7<- inla(inter_respon2 ~pw_gor_dv+pw_urban+pw_month2+pw_pensionage+pw_employed+pw_hiqual+pw_income_quartile+pw_tenure_dv+pw_dweltyp3+ pw_children+pw_number_cars+pw_hhsizeCat+pw_propaothstgr+pw_propcontactgr+pw_propappgr+pw_propnoncontactgr+pw_calllength, family='binomial',Ntrials = 1, control.fixed=list(mean=fixed_mean_est_100,prec=fixed_preci_est_100,mean.intercept=0,prec.intercept=0.001), control.compute = list(dic = TRUE, waic = TRUE),data=wave3_samp_data1,control.family=list(link='logit'),control.predictor = list(link = 1)) summary(wave3_model7) #plot(wave3_model7,plot.prior=T,col="red") wave3_model7_linPred<-wave3_model7$summary.linear.predictor$mean wave3_model7_prob<-exp(wave3_model7_linPred)/(1+exp(wave3_model7_linPred)) # choose a threshold for dichotomizing according to predicted probability thresh <- 0.5 prob_wave3_model7_cata <- cut(wave3_model7_prob, breaks=c(-Inf, thresh, Inf), labels=c("no interview","at least one interview")) prob_wave3_model7_cata2 <- cut(wave3_model7_prob, breaks=c(-Inf, thresh, Inf), labels=c(0,1)) wave3_samp_data1$prob_wave3_model7_cata2 <-prob_wave3_model7_cata2 pred_names<-c("inter_respon","inter_respon2","prob_wave3_model7_cata2") wave3_model7_pred<-wave3_samp_data1[pred_names] names(wave3_model7_pred);head(wave3_model7_pred);tail(wave3_model7_pred) wave3_model7_pred_data<-subset(wave3_model7_pred, is.na(wave3_model7_pred$inter_respon2)) names(wave3_model7_pred_data);head(wave3_model7_pred_data);tail(wave3_model7_pred_data) # contingency table and marginal sums wave3_model7_cTabB <- table(wave3_model7_pred_data$inter_respon ,wave3_model7_pred_data$prob_wave3_model7_cata2) addmargins(wave3_model7_cTabB) # Classification Table sum(diag(wave3_model7_cTabB)) / sum(wave3_model7_cTabB) wave3_model7_table2<- as.table(matrix(c(2,1749,2,7834), nrow = 2, byrow = TRUE)) epi.tests(wave3_model7_table2) #ROC Curve wave3_model7_pred$wave3_model7_prob2<-wave3_model7_prob wave3_model7_predf<-subset(wave3_model7_pred, is.na(wave3_model7_pred$inter_respon2));head(wave3_model7_predf);tail(wave3_model7_predf) # Smoothed ROC curve wave3_pred_model7__smooth<-roc(wave3_model7_predf$inter_respon,wave3_model7_predf$wave3_model7_prob2,smooth=TRUE) plot(wave3_pred_model7__smooth,main="informative prior model 7 ROC curve",col="blue") #==============================================================================================================================================================