library(mvtnorm)
library(MASS)
library(rms)
library(R2HTML)
library(glmmTMB)
library(lme4)
library(data.table)
library(plyr)
library(coin)
library(haven)

breaks10=c(0,50,60,70,80,120)
breaks10str=c('0-49','50-59','60-69','70-79','80+')
restriction_10=c('50-59','60-69','70-79')
namm='5079'

Release = 5 # SHARE release

# declare input and output directories
# data: 
REG_DIR='D:\\data\\rawdata\\706551\\Grunddata\\' #Register directory
SURVEY_DIR='D:\\data\\rawdata\\706551\\Eksterne data\\' #Survey directory

# directories for results and temporary and temporary files:
SAVE_DIR="./tmp/"
RES_DIR="./results/"
SOURCE_DIR="./code/"

# load set of usable functions
source(paste(SOURCE_DIR,"run_tools.r",sep=''))

# load and prepare survey data
source(paste(SOURCE_DIR,"run_mergeid2pnr.r",sep=''))

# functions to summarize data
source(paste(SOURCE_DIR,"run_summary.r",sep=''))

# read register data
SavedReg = paste(SAVE_DIR,'reg.full.dat',sep='')
SavedDead = paste(SAVE_DIR,'reg.death.dat',sep='')

force_reg_calc = FALSE #set it to TRUE if you are running first time
if (!file.exists(SavedReg) | !file.exists(SavedDead) | force_reg_calc) {
  source(paste(SOURCE_DIR,"run_readregister.r",sep='')) 
} else {
  load(file=SavedReg)
  load(file=SavedDead)
}

# First stage of anonymized filtering of the register data
pnrtorm=REGISTER$pnr[REGISTER$year==2004 & is.na(REGISTER$birth.raw)]
REGISTER=REGISTER[!REGISTER$pnr%in%pnrtorm,]

# functions for calculation of visits 
source(paste(SOURCE_DIR,"run_timeseries.r",sep=''))

# preparing data for analysis
source(paste(SOURCE_DIR,"run_preparedata.R",sep=''))

####################################################################################
# TABLE 3 - Fit negative binomial model to sample B, self-reported visits
####################################################################################

MOD1bDATf_$ageclass_10=cut(as.numeric(MOD1bDATf_$Age), breaks=breaks10, #2006 age
                           include.lowest=TRUE, right = FALSE, labels=breaks10str)

RES=MOD1bDATf_[MOD1bDATf_$ageclass_10%in% restriction_10,] 
RES$ageclass<-droplevels(RES$ageclass) 
RES$gr<-c('2004','2006')[RES$gr]
RES$has.children = factor(RES$has.children,levels=c('yes','no'))
RES$income3yB.cat<-factor(RES$income3yB.cat,levels=c('hi','med','lo'))
RES$civstat<-factor(RES$civstat,levels=c('partnered','divorced or separated', 'widowed','unpartnered'))

MRb_0_nb=glm.nb(reportedvisits~gr, data=RES)#
exp(coef(MRb_0_nb))
MRb_1_nb=update(MRb_0_nb,.~.+sex)
MRb_2_nb=update(MRb_1_nb,.~.+ageclass)
MRb_3_nb=update(MRb_2_nb,.~.+foreigner)
MRb_4_nb=update(MRb_3_nb,.~.+edusurv3lv)
MRb_5_nb=update(MRb_4_nb,.~.+income3yB.cat)
MRb_6_nb=update(MRb_5_nb,.~.+employ)
MRb_7_nb=update(MRb_6_nb,.~.+has.children)
MRb_8_nb=update(MRb_7_nb,.~.+civstat)
exp(coef(MRb_8_nb))

HTMLInitFile(outdir=RES_DIR,file=paste('TABLE3_',namm,'_nb',sep=''),Title='Table 3')
HTMLmod(MRb_0_nb)
HTMLmod(MRb_8_nb)
HTMLEndFile(file = HTMLGetFile())

##########################################
# TABLE 1 
##########################################

##############
# TAB 1 TAIL
##############

rraBIGcoo<-rraBIGco
rraBIGcoo$mvisits=rraBIGcoo$mvisits*12

write.csv2(make.table(group='coh',data=rraBIGcoo),paste('TABLE1_',namm,'_by_COH.csv',sep=''),row.names=FALSE)

##############
# TAB 1 HEAD - results copied manually into the TABLE1
##############

########
# Year before 2004 survey
########

rbind(c(round(12*mean(rraBIGco$XBefore/rraBIGco$XBefore.offs),2),
        round(12*mean((rraBIGco$XBefore/rraBIGco$XBefore.offs)[rraBIGco$coh=='Coh2004']),2),
        round(12*mean((rraBIGco$XBefore/rraBIGco$XBefore.offs)[rraBIGco$coh=='Coh2006']),2)),
      c(round(12*sd(rraBIGco$XBefore/rraBIGco$XBefore.offs),2),
        round(12*sd((rraBIGco$XBefore/rraBIGco$XBefore.offs)[rraBIGco$coh=='Coh2004']),2),
        round(12*sd((rraBIGco$XBefore/rraBIGco$XBefore.offs)[rraBIGco$coh=='Coh2006']),2)))

12*range(rraBIGco$XBefore/rraBIGco$XBefore.offs)
t.test(XBefore~coh, data=rraBIGco)

########
# Year after 2004 survey
########
rbind(c(round(12*mean(rraBIGco$XAfter/rraBIGco$XAfter.offs),2),
        round(12*mean((rraBIGco$XAfter/rraBIGco$XAfter.offs)[rraBIGco$coh=='Coh2004']),2),
        round(12*mean((rraBIGco$XAfter/rraBIGco$XAfter.offs)[rraBIGco$coh=='Coh2006']),2)),
      c(round(12*sd(rraBIGco$XAfter/rraBIGco$XAfter.offs),2),
        round(12*sd((rraBIGco$XAfter/rraBIGco$XAfter.offs)[rraBIGco$coh=='Coh2004']),2),
        round(12*sd((rraBIGco$XAfter/rraBIGco$XAfter.offs)[rraBIGco$coh=='Coh2006']),2)))

12*range(rraBIGco$XAfter/rraBIGco$XAfter.offs)
t.test(12*XAfter/XAfter.offs~coh, data=rraBIGco)

########
# 3 months before 2004 survey
########
rbind(c(round(3*mean(rraBIGco$XBefore3/rraBIGco$XBefore3.offs),2),
        round(3*mean((rraBIGco$XBefore3/rraBIGco$XBefore3.offs)[rraBIGco$coh=='Coh2004']),2),
        round(3*mean((rraBIGco$XBefore3/rraBIGco$XBefore3.offs)[rraBIGco$coh=='Coh2006']),2)),
      c(round(3*sd(rraBIGco$XBefore3/rraBIGco$XBefore3.offs),2),
        round(3*sd((rraBIGco$XBefore3/rraBIGco$XBefore3.offs)[rraBIGco$coh=='Coh2004']),2),
        round(3*sd((rraBIGco$XBefore3/rraBIGco$XBefore3.offs)[rraBIGco$coh=='Coh2006']),2)))

3*range(rraBIGco$XBefore3/rraBIGco$XBefore3.offs)
t.test(XBefore3~coh, data=rraBIGco)

########
# 3 months after 2004 survey
########
rbind(c(round(3*mean(rraBIGco$XAfter3/rraBIGco$XAfter3.offs),2),
        round(3*mean((rraBIGco$XAfter3/rraBIGco$XAfter3.offs)[rraBIGco$coh=='Coh2004']),2),
        round(3*mean((rraBIGco$XAfter3/rraBIGco$XAfter3.offs)[rraBIGco$coh=='Coh2006']),2)),
      c(round(3*sd(rraBIGco$XAfter3/rraBIGco$XAfter3.offs),2),
        round(3*sd((rraBIGco$XAfter3/rraBIGco$XAfter3.offs)[rraBIGco$coh=='Coh2004']),2),
        round(3*sd((rraBIGco$XAfter3/rraBIGco$XAfter3.offs)[rraBIGco$coh=='Coh2006']),2)))

3*range(rraBIGco$XAfter3/rraBIGco$XAfter3.offs)
t.test(3*XAfter3/XAfter3.offs~coh, data=rraBIGco)

########
# No register-recorded doctor's visits, Before & After
########
fishtest('XBeforeZero','coh',rraBIGco)
fishtest('XAfterZero','coh',rraBIGco)
100*table(rraBIGco$XBeforeZero)/sum(table(rraBIGco$XBeforeZero))
100*table(rraBIGco$XAfterZero)/sum(table(rraBIGco$XAfterZero))

##########################################
# RUNING MODELS FOR SAMPLE A 
##########################################

# Create long data format
v=seq_len(dim(rraBIGco)[1])
DATA_LONG<-lapply(v, function(k) data.frame(Visit=c(rraBIGco$XBefore[k],rraBIGco$XAfter[k]),
                                            offs=c(rraBIGco$XBefore.offs[k],rraBIGco$XAfter.offs[k]),
                                            Visit3=c(rraBIGco$XBefore3[k],rraBIGco$XAfter3[k]),
                                            offs3=c(rraBIGco$XBefore3.offs[k],rraBIGco$XAfter3.offs[k]),
                                            pnr=as.factor(rraBIGco$pnr[k]),
                                            Age2004=rraBIGco$Age2004[k],
                                            Age04=rraBIGco$ageclass2004[k],
                                            foreigner = rraBIGco$foreigner[k],
                                            edusurv3lv = rraBIGco$edusurv3lv[k],
                                            income3yB.cat = rraBIGco$income3yB.cat[k],
                                            employ = rraBIGco$employ[k],
                                            has.children = rraBIGco$has.children[k],
                                            civstat = rraBIGco$civstat[k],
                                            coh=rraBIGco$coh[k],
                                            sex=rraBIGco$sex[k],
                                            TIME=c('B','A')))

DATA_LONG = rbindlist(DATA_LONG)

# MODEL 1 - 12 months
formula <- Visit ~ TIME * coh + (1|pnr)
obj <- glmmTMB::glmmTMB(formula=formula,data=DATA_LONG,family=poisson(),offset=log(DATA_LONG$offs))
beta <- obj$fit$parfull[names(obj$fit$parfull)=='beta']
f0 <- update(formula,'. ~ 1 + (1|pnr)')
m0 <-lme4::glmer(formula=f0,data=DATA_LONG,family=poisson(),offset=log(DATA_LONG$offs))
MODEL1_12M =lme4::glmer(formula=formula,data=DATA_LONG,family=poisson(),
                        control=glmerControl(optimizer='bobyqa',optCtrl = list(maxfun=2e5)),
                        start=list(fixef=beta, theta=m0@theta),offset=log(DATA_LONG$offs))

# MODEL 1 - 3 months
formula <- Visit3 ~ TIME * coh + (1|pnr)
obj <- glmmTMB::glmmTMB(formula=formula,data=DATA_LONG,family=poisson(),offset=log(DATA_LONG$offs3))
beta <- obj$fit$parfull[names(obj$fit$parfull)=='beta']
f0 <- update(formula,'. ~ 1 + (1|pnr)')
m0 <-lme4::glmer(formula=f0,data=DATA_LONG,family=poisson(),offset=log(DATA_LONG$offs3))
MODEL1_3M =lme4::glmer(formula=formula,data=DATA_LONG,family=poisson(),
                       control=glmerControl(optimizer='bobyqa',optCtrl = list(maxfun=2e5)),
                       start=list(fixef=beta, theta=m0@theta),offset=log(DATA_LONG$offs3))

# MODEL 2 - 12 months
formula <- test_form<- Visit ~ TIME * coh + (1|pnr) +
  Age04 + sex  +
  foreigner + edusurv3lv +income3yB.cat + employ+has.children +civstat
obj <- glmmTMB::glmmTMB(formula=formula,data=DATA_LONG,family=poisson(),offset=log(DATA_LONG$offs))
beta <- obj$fit$parfull[names(obj$fit$parfull)=='beta']
f0 <- update(formula,'. ~ 1 + (1|pnr)')
m0 <-lme4::glmer(formula=f0,data=DATA_LONG,family=poisson(),offset=log(DATA_LONG$offs))
MODEL2_12M =lme4::glmer(formula=formula,data=DATA_LONG,family=poisson(),
                        control=glmerControl(optimizer='bobyqa',optCtrl = list(maxfun=2e5)),
                        start=list(fixef=beta, theta=m0@theta),offset=log(DATA_LONG$offs))

###########################################################
# TABLE 2
###########################################################
HTMLInitFile(outdir=RES_DIR,file=paste('TABLE2_',namm,'_Age2004',sep=''),Title='glmer test')
HTML(as.title('<b><font color="orange">M1:</b></font>'))

# MODEL 1 of TABLE 2
HTMLmod_rnd(MODEL1_12M)

HTML(as.title('<b><font color="orange">M3B, age int = 10 :</b></font>'))

# MODEL 2 of TABLE 2
HTMLmod_rnd(MODEL2_12M) 

HTMLEndFile(file = HTMLGetFile())

# deviance goodness of fit test shows that Poisson model fits well here (p<0.05)
# it it thus not necessary to use negative binomial model
testModel<-glm(update.formula(test_form,'.~.-+ (1|pnr)'), 
               family = poisson(),data=DATA_LONG,offset=log(DATA_LONG$offs))
(p<-1-pchisq(summary(testModel)$deviance, summary(testModel)$df.residual))


###########################################################
# BOOTSTRAPING CONFIDENCE INTERVALS
###########################################################
sim.glmer<-function(object, alpha=0.95, N.sim=1e3, return.boot=FALSE, DATA_LONG){
  get.random<-function (object) object@resp$eta-getME(object,'X')%*%fixef(object)-object@resp$offset
  length(get.random(object))
  length(DATA_LONG$TIME)
  R <- tapply(get.random(object),list(TIME=DATA_LONG$TIME, coh=DATA_LONG$coh),identity)
  rand1 <- R[1,1][[1]]
  rand2 <- R[2,1][[1]]
  rand3 <- R[1,2][[1]]
  rand4 <- R[2,2][[1]]
  FUN<-function(x , add.rnd.var=FALSE){
    if (add.rnd.var) {
      rand1_ <- sample(rand1,1)
      rand2_ <- sample(rand2,1)
      rand3_ <- sample(rand3,1)
      rand4_ <- sample(rand4,1)
    } else {
      rand1_ <- (rand1)
      rand2_ <- (rand2)
      rand3_ <- (rand3)
      rand4_ <- (rand4)
    }
    xint <-x['(Intercept)']
    list(c2004A=mean(exp(xint+rand1_)),
         c2004B=mean(exp(xint+x['TIMEB']+rand2_)),
         c2006A=mean(exp(xint+x['cohCoh2006']+rand3_)),
         c2006B=mean(exp(xint+x['TIMEB']+x['cohCoh2006']+x['TIMEB:cohCoh2006']+rand4_)),
         c2004Di=mean(exp(xint+rand1_))-mean(exp(xint+x['TIMEB']+rand2_)),
         c2006Di=mean(exp(xint+x['cohCoh2006']+rand3_))-
           mean(exp(xint+x['TIMEB']+x['cohCoh2006']+x['TIMEB:cohCoh2006']+rand4_)))
  }
  fixed <- fixef(object)
  varcov <- as.matrix(vcov(object))
  betas <- rmvnorm(n=N.sim, mean=fixed, sigma=varcov, method='chol')
  colnames(betas) <- names(fixed)
  vFUN <- apply(betas, 1, FUN)
  z <- names(vFUN[[1]])
  bootres<-lapply(z,function(k) as.matrix(sapply(seq_along(vFUN),function(j) vFUN[[j]][[k]])))
  names(bootres) <- z
  CIlo <- (1 - alpha)/2
  CIup <- 1 - CIlo
  CIres <- lapply(bootres, function(k) if (ncol(k)==1){
    c(mean=mean(k), quantile(as.vector(k), probs=c(CIlo,CIup)))
  } else {
    cbind(mean=apply(k,1,mean),t(apply(k,1,quantile,probs=c(CIlo,CIup))))
  })
  names(CIres) <- z
  if (return.boot) list(CI=CIres, boot=bootres) else list(CI=CIres)
}

nCI12<-  t(sapply(sim.glmer(object=MODEL1_12M, N.sim=1e5, DATA_LONG=DATA_LONG)$CI,'['))[c(2,1,4,3,5,6),]
nCI3 <-  t(sapply(sim.glmer(object=MODEL1_3M,  N.sim=1e5, DATA_LONG=DATA_LONG)$CI,'['))[c(2,1,4,3,5,6),]

nCI12<-nCI12*12
nCI3<-nCI3*3

v  <- 12 * c(tapply(exp(predict(MODEL1_12M))/DATA_LONG$offs,list(TIME=DATA_LONG$TIME, coh=DATA_LONG$coh),mean))[c(2,1,4,3)]
v_3<- 3 *  c(tapply(exp(predict(MODEL1_3M))/DATA_LONG$offs3,list(TIME=DATA_LONG$TIME, coh=DATA_LONG$coh),mean))[c(2,1,4,3)]

##########################################
# FIGURE 1 
##########################################

pdf(file=paste(RES_DIR,namm,'_FIGURE1','.pdf',sep=''),pointsize=11,width=8,height=6)
par(mar=c(5,4,1,1))
par(mfrow=c(1,2))
pos<-barplot(v, ylim=c(0,8.1),names.arg = rep(c('Before','After'),2),density=20,
             angle = c(45,45,-45,-45),col=c(2,2,4,4), ylab='Average number of visits per year')
for (j in 1:4) lines(c(pos[j],pos[j]),c(nCI12[j,3],nCI12[j,2]),lwd=3)
legend('topright',legend=c('Cohort 2004','Cohort 2006'),bty='n',angle=c(45,-45),density=20,fill=c(2,4))
pos<-barplot(c(v[2]-v[1],v[4]-v[3]), ylim=c(0,0.51),names.arg = rep(c('Cohort 2004','Cohort 2006')),
             density=20,angle = c(45,-45),col=c(2,4),ylab='Difference between after and before')
#abline(h=0)
for (j in 5:6) lines(c(pos[j-4],pos[j-4]),c(nCI12[j,3],nCI12[j,2]),lwd=3)
dev.off()

##########################################
# FIGURE 2
##########################################

pdf(file=paste(RES_DIR,namm,'_FIGURE2','.pdf',sep=''),pointsize=11,width=8,height=6)
par(mar=c(5,4,1,1))
par(mfrow=c(1,2))
pos<-barplot(v_3, ylim=c(0,8.1),names.arg = rep(c('Before','After'),2),density=20,
             angle = c(45,45,-45,-45),col=c(2,2,4,4), ylab='Average number of visits per 3 months')
for (j in 1:4) lines(c(pos[j],pos[j]),c(nCI3[j,3],nCI3[j,2]),lwd=3)
legend('topright',legend=c('Cohort 2004','Cohort 2006'),bty='n',angle=c(45,-45),density=20,fill=c(2,4))
pos<-barplot(c(v_3[2]-v_3[1],v_3[4]-v_3[3]), ylim=c(0,0.51),names.arg = rep(c('Cohort 2004','Cohort 2006')),
             density=20,angle = c(45,-45),col=c(2,4),ylab='Difference between after and before')
#abline(h=0)
for (j in 5:6) lines(c(pos[j-4],pos[j-4]),c(nCI3[j,3],nCI3[j,2]),lwd=3)
dev.off()


