#################################################################
# Constructing SURVEY data.frame
#################################################################

W1=fastmerge(W1,W1.visits,by='mergeid')
W2=fastmerge(W2,W2.visits,by='mergeid')
W4=fastmerge(W4,W4.visits,by='mergeid')
W5=fastmerge(W5,W5.visits,by='mergeid')
W1$Wave=1
W2$Wave=2
W3$Wave=3
W4$Wave=4
W5$Wave=5

W3$nchild=NA
W3$edusurv=NA
W3$allvisits=NA
W3$practitionervisits=NA
W3=as.data.frame(W3)[,names(W1)]

SURV=rbindlist(list(W1,W2,W3,W4,W5)) #
SURV=SURV[nchar(SURV$pnr)==12,]
SURV=SURV[!is.na(SURV$pnr),]

tmpSURVEY=fastmerge(SURV,DOD,by='pnr')
tmpSURVEY$dead.year=year(tmpSURVEY$dead.date)
tmpSURVEY$dead.month=month(tmpSURVEY$dead.date)

SURVEY=tmpSURVEY[!is.na(tmpSURVEY$mergeid),]
SURVEY$dead.year[is.na(SURVEY$dead.year)]=2020
SURVEY$dead.month[is.na(SURVEY$dead.month)]=1
SURVEY$mergeid=as.character(SURVEY$mergeid)
SURVEY$pnr=as.character(SURVEY$pnr)

#####################################################################################
#CHECK and remove mistakes in the data, to find mistakes data for all 5 waves is needed
#####################################################################################

inter=paste(SURVEY$pnr, SURVEY$mergeid,sep='_')
tmp_i=!duplicated(inter)
inter = data.frame(inter,pnr2=SURVEY$pnr, mer2=SURVEY$mergeid, dup=tmp_i)
inter = inter[tmp_i,]

list1=as.character(inter[duplicated(inter$pnr2),]$pnr2)
list2=as.character(inter[duplicated(inter$mer2),]$mer2)

test_ind = (SURVEY$pnr %in% list1) | (SURVEY$mergeid %in% list2)
Dtest=SURVEY[test_ind,]
Dtest

SURVEY = SURVEY[!test_ind,]

#####################################################################################

save('SURVEY',file=paste(SAVE_DIR,'reg.dates_2waves.dat',sep=''))
write.csv(SURVEY,file=paste(SAVE_DIR,'reg.dates_2waves.csv',sep=''))
########################################################################
# from now operate only on SURVEY and REGc, there are only two waves
########################################################################

#filtering for unneeded data
REGc=REGISTER; ind=REGc$pnr %in% SURVEY$pnr; REGc=REGc[ind,]

#peoples that died before the interview
SURVEY=SURVEY[!(SURVEY$year>SURVEY$dead.year)  ,]

ind.w1=as.character(SURVEY$pnr)[SURVEY$Wave==1]
ind.w2=as.character(SURVEY$pnr)[SURVEY$Wave==2]
ind.w3=as.character(SURVEY$pnr)[SURVEY$Wave==3]
ind.w4=as.character(SURVEY$pnr)[SURVEY$Wave==4]
ind.w5=as.character(SURVEY$pnr)[SURVEY$Wave==5]

# Load hospital data
source(paste(SOURCE_DIR,"run_hospitalsrv.R",sep=''))

#Calculate median moth
medmonth=median(SURVEY$month[SURVEY$Wave==1])

#interviewed in wave 1 and wave 2
G1=REGc[(REGc$pnr %in% ind.w1) & (REGc$pnr %in% ind.w2),]
G1o=REGc[(REGc$pnr %in% ind.w1),] 
#not interviewed in wave 1, but interviewed in wave 2
G2=REGc[(!(REGc$pnr %in% ind.w1)) & (REGc$pnr %in% ind.w2),]
G2o=G2
G23=REGc[(!(REGc$pnr %in% ind.w1)) & (REGc$pnr %in% ind.w2)& (REGc$pnr %in% as.character(W3$pnr)),]

G1.pre.W1= G1[G1$year %in% c(2003,2004), ]
G1.pre.W1$Dir='Backward'
apply(G1.pre.W1[G1.pre.W1$year%in%c(2004:2008),],2,function(k) sum(is.na(k)))

G1.post.W1= G1[G1$year %in% c(2004,2005), ]
G1.post.W1$Dir='Forward'
apply(G1.post.W1[G1.post.W1$year%in%c(2004:2008),],2,function(k) sum(is.na(k)))

G1o.post.W1= G1o[G1o$year %in% c(2004,2005), ]
G1o.post.W1$Dir='Forward'
apply(G1o.post.W1[G1o.post.W1$year%in%c(2004:2008),],2,function(k) sum(is.na(k)))

G2o.post.W1= G2o[G2o$year %in% c(2004,2005), ]
G2o.post.W1$Dir='Forward'
apply(G2o.post.W1[G2o.post.W1$year%in%c(2004:2008),],2,function(k) sum(is.na(k)))

G1.pre.W2= G1[G1$year %in% c(2005,2006,2007), ]
G1.pre.W2$Dir='Backward'
G1.post.W2= G1[G1$year %in% c(2006,2007,2008), ]
G1.post.W2$Dir='Forward'

G1o.pre.W2= G1o[G1o$year %in% c(2005,2006,2007), ]
G1o.pre.W2$Dir='Backward'
G1o.post.W2= G1o[G1o$year %in% c(2006,2007,2008), ]
G1o.post.W2$Dir='Forward'

G23.pre.W1= G23[G23$year %in% c(2003,2004), ]
G23.pre.W1$Dir='Backward'

G23.post.W1= G23[G23$year %in% c(2004,2005), ]
G23.post.W1$Dir='Forward'

G23.pre.W2= G23[G23$year %in% c(2005,2006,2007), ]
G23.pre.W2$Dir='Backward'

G23.post.W2= G23[G23$year %in% c(2006,2007,2008), ]
G23.post.W2$Dir='Forward'

#build dates matrices
builddates<-function (D, na2zero = TRUE){
  Y=sort(unique(D$year))
  n=paste('visit',c('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'),sep='.')
  if (!(length(Y) %in% 2:3)) stop('Wrong number of years')
  datesi=which(grepl('visit.',colnames(D),fixed=TRUE))
  subD=data.frame(pnr=D$pnr,as.data.frame(D)[,datesi])
  out=fastmerge(subD[D$year==Y[1],],subD[D$year==Y[2],],by='pnr')
  colnames(out)=c('pnr',paste(n,Y[1],sep='.'),paste(n,Y[2],sep='.'))
  if(length(Y)==3) {
    out=fastmerge(out,subD[D$year==Y[3],],by='pnr')
    colnames(out)=c('pnr',paste(n,Y[1],sep='.'),paste(n,Y[2],sep='.'),paste(n,Y[3],sep='.'))
  }
  if (na2zero) out[is.na(out)]=0
  out=data.frame(out)
  attr(out,'Y')=Y
  out
}

oneyear.at<-function(D, wave=1, Year=NULL, Month=NULL){
   testl=uniqueN(D$pnr)
  datesi=which(grepl('visit.',colnames(D),fixed=TRUE))
  subD=as.data.frame(D)[,-datesi]
  if(!length(Year)){
    subSURVEY=as.data.frame(SURVEY)[which(SURVEY$Wave==wave),] # select the wave 
    subSURVEY=subSURVEY[(subSURVEY$pnr %in% subD$pnr),]
    subSURVEY$survey.year=subSURVEY$year
    subSURVEY$survey.month=subSURVEY$month
    subSURVEY$year=NULL
    subSURVEY$month=NULL
    
    if(dim(subSURVEY)[1]<testl) warning('Not every DATE found for D.')
    if(dim(subSURVEY)[1]>testl) stop('Error')
    
    out=as.data.frame(fastmerge(subD,subSURVEY,by='pnr'))
    yind=which(NA2FALSE(out$year==out$survey.year))
    out=out[yind,]
    
    Age=as.Date(paste(out$survey.year,'-',out$survey.month,'-15',sep=''))-as.Date(out$birth.raw)
    out$Age=as.numeric(Age/365.25)
    if (testl!=uniqueN(out$pnr)) stop ('Something wrong')
    if (testl!=length(out$pnr)) stop ('Something wrong')
 
    medmonthf=as.character(medmonth)
    if (nchar(medmonthf)<2) medmonthf=paste('0',medmonthf,sep='')
    
    Age2=as.Date(paste(2004,'-',medmonthf,'-15',sep=''))-as.Date(out$birth.raw)
    out$AgeAt2004=as.numeric(Age2/365.25)
  } else {
    
    subSURVEY=SURVEY[SURVEY$Wave==wave,] 
    subSURVEY=subSURVEY[(subSURVEY$pnr %in% subD$pnr),]
    subSURVEY$survey.year=subSURVEY$year
    subSURVEY$survey.month=subSURVEY$month
    subSURVEY=subSURVEY[,-c('year','month')]
    if(dim(subSURVEY)[1]!=testl) stop('Error in SURVEY.')
    out=as.data.frame(fastmerge(subD,subSURVEY,by='pnr'))
   
    yind=NA2FALSE(out$year==out$survey.year)
    out=out[yind,]
    Age=as.Date(paste(Year,'-',Month,'-15',sep=''))-as.Date(out$birth.raw)
    out$Age=as.numeric(Age/365.25)
    out$AgeAt2004=out$Age
    if (testl!=uniqueN(out$pnr)) stop ('Something wrong')
    if (testl!=length(out$pnr)) stop ('Something wrong')
  }
  
  out
}

oneyear.at.O<-function(D, wave=1, Year=NULL, Month=NULL){ #month is needed to calcualte age
  has.wave1 = length(which((SURVEY$Wave==1) & (SURVEY$pnr %in% D$pnr)))>0
  testl=uniqueN(D$pnr)
  datesi=which(grepl('visit.',colnames(D),fixed=TRUE))
  subD=as.data.frame(D)[,-datesi]
  if (!has.wave1 && wave==1) stop('Wrong wave.') 
 
  subSURVEY1=as.data.frame(SURVEY)[which(SURVEY$Wave==1),] # select the wave 
  subSURVEY1=subSURVEY1[(subSURVEY1$pnr %in% subD$pnr),]
  subSURVEY1$survey.year=subSURVEY1$year
  subSURVEY1$survey.month=subSURVEY1$month
  subSURVEY1$year=NULL
  subSURVEY1$month=NULL
  
  subSURVEY2=as.data.frame(SURVEY)[which(SURVEY$Wave==2),] # select the wave 
  subSURVEY2=subSURVEY2[(subSURVEY2$pnr %in% subD$pnr),]
  subSURVEY2$survey.year=subSURVEY2$year
  subSURVEY2$survey.month=subSURVEY2$month
  subSURVEY2$year=NULL
  subSURVEY2$month=NULL
  
  if (wave==1) subSURVEY=subSURVEY1 else subSURVEY=subSURVEY2
  if(dim(subSURVEY)[1]<testl) stop('Not every DATE found for D. Refreshment present in wave=2')
  if(dim(subSURVEY)[1]>testl) stop('Error')
    
  if (!length(Year)) {
    # if there is no register 'year' than take survey date for register
    out=as.data.frame(fastmerge(subD, subSURVEY, by='pnr'))
    yind=which(NA2FALSE(out$year==out$survey.year))
    out=out[yind,]
    Age=as.Date(paste(out$survey.year,'-',out$survey.month,'-15',sep=''))-as.Date(out$birth.raw)
    out$Age=as.numeric(Age/365.25)
  } else {
    # if there is 'year' than register is taken from this 'year'
    out=as.data.frame(fastmerge(subD, subSURVEY, by='pnr'))
    yind=which(NA2FALSE(out$year==Year))
    out=out[yind,]
    if (!length(month)) stop('Month must be given')
    Age=as.Date(paste(Year,'-',Month,'-15',sep=''))-as.Date(out$birth.raw)
    out$Age=as.numeric(Age/365.25)
  }
  out$AgeAt2004=NA
  if (wave==1) out$AgeAt2004=out$age
  if (wave==2 && length(Year) && Year==2004) out$AgeAt2004=out$age
  
  if (testl!=uniqueN(out$pnr)) stop ('Something wrong')
  if (testl!=length(out$pnr)) stop ('Something wrong')
  
  out
}

# Merge with hospitalization data
H2.at.1=hospitalsurvey(Wave=2, Month = medmonth, Year = 2004) #measured in 2004 for individuals from wave 2

G1.pre.W2.at.2 = oneyear.at(G1,2)
Hosp=H2[H2$pnr %in% G1.pre.W2.at.2$pnr,]
G1.pre.W2.at.2=fastmerge(G1.pre.W2.at.2,Hosp,by='pnr')
 
G23.pre.W2.at.2 = oneyear.at(G23,2)
Hosp=H2[H2$pnr %in% G23.pre.W2.at.2$pnr,]
G23.pre.W2.at.2=fastmerge(G23.pre.W2.at.2,Hosp,by='pnr')

###########################
#MODEL 2o (visits measured at wave 1 education and parentalhood in wave 2)

G1o.W1.at.R1 = oneyear.at.O(G1o, wave=1)
Hosp=H1[H1$pnr %in% G1o.W1.at.R1$pnr,]
G1o.W1.at.R1=fastmerge(G1o.W1.at.R1,Hosp,by='pnr')
dim(G1o.W1.at.R1)

G2o.W2.at.R1 = oneyear.at(G2o, wave=2, Year=2004, Month = medmonth)
Hosp=H2.at.1[H2.at.1$pnr %in% G2o.W2.at.R1$pnr,]
G2o.W2.at.R1=fastmerge(G2o.W2.at.R1, Hosp,by='pnr')
dim(G2o.W2.at.R1)

checkvisits<-function(V, G, Year=NULL, Month=NULL, N=10){
  cat('All pnr from G in V?: ')
  if (sum(!(G$pnr %in% V$pnr))) cat('(',uniqueN(G$pnr),'), NO!\n') else cat('(',uniqueN(G$pnr),'), Yes\n') 
  cat('Number of unneeded pnr in V: ',sum(!(V$pnr %in% G$pnr)),'\n')
  if (sum(!(V$pnr %in% G$pnr))) print(as.data.table(V[!(V$pnr %in% G$pnr),]))
  cat('Number of NA visits among unneeded:',sum(is.na(V[!(V$pnr %in% G$pnr),]$visits)),'of',sum(!(V$pnr %in% G$pnr)),'\n')
  cat('Total number of NA visits:',sum(is.na(V$visits)),'\n')
  cat('Are dates built correctly? #1: ')
  BD=builddates(G)
  cnBD=colnames(BD)[-1]
  cat('N Years =',(dim(BD)[2]-1)/12,', pnrs =',dim(BD)[1],'=?=',uniqueN(G$pnr),'\n')
  cat('Years BD:',unique(substr(cnBD,nchar(cnBD)-3,nchar(cnBD))),'\n')
  cat('Years  G:',unique(G$year),'\n')
}

calcvisits<-function(G, Year=NULL, Month=NULL, mspan =12, offset=0){
  BD=builddates(G)
 
  dim(BD)
  dim(G)
  cnBD=colnames(BD)[-1]
  cat('N Years =',(dim(BD)[2]-1)/12,', pnrs =',dim(BD)[1],'=?=',uniqueN(G$pnr),'\n')
  cat('Years BD:',unique(substr(cnBD,nchar(cnBD)-3,nchar(cnBD))),'\n')
  cat('Years  G:',unique(G$year),'\n')
  
  if (length(Year) && length(Month)) { #no info for Wave 1 in SURVEY
    sdates=SURVEY[SURVEY$pnr%in% G$pnr,] #use some info from the next wave
    if (!NROW(sdates)){
      sdates=G[,c('pnr','dead.month','dead.year')]
      sdates$dead.month[is.na( sdates$dead.month)]=1
      sdates$dead.year[is.na( sdates$dead.year)]=2020
      sdates=sdates[!duplicated(sdates$pnr),]
    }
    yea=as.numeric(substr(cnBD,nchar(cnBD)-3,nchar(cnBD)))
    mon=rep(1:12,uniqueN(yea))
    d2p<-function(y,m) sort((y-yea[1])*12+m - (1:mspan)*(G$Dir[1]=="Backward") + (1:mspan)*(G$Dir[1]=="Forward"))
    
    d2pr<-function(y,m) sort((y-yea[1])*12+m) 
    out=NULL
    ugpnr=unique(G$pnr)
    om<-function(x) x[x<=uniqueN(yea)*12]
    for (k in 1:length(ugpnr)){ 
      sdates.=sdates[sdates$pnr==ugpnr[k],]
      if (NROW(sdates.)==2){
        cat(diff(sdates.$month),' ')
      }
      sdates.=sdates.[1,] 
      deathmonts=which(d2pr(sdates.$dead.year,sdates.$dead.month)<seq_len(length(yea)))
      
      if (length(deathmonts)) BD[which(sdates$pnr==ugpnr[k][1]),om(deathmonts)+1]=NA
      if (length(deathmonts)) d=deathmonts[1] else d=NA
      v=as.numeric(BD[k,1+d2p(Year,Month)])
     
      dfv=data.frame(pnr=sdates.$pnr,
                     visits=sum(v,na.rm=TRUE),
                     exposures=sum(!is.na(v)),
                     mvisits=mean(v,na.rm=TRUE),
                     d=d[1],
                     year=Year,
                     month=Month,
                     dir=G$Dir[1],
                     v=unname(as.list(v)))
      names(dfv)[9+(0:(mspan-1))]=paste('m',1:mspan,sep='')
      out=c(out,list(dfv))
    }
    out=rbindlist(out)
  } else {
    sdates=SURVEY[SURVEY$pnr %in% G$pnr,]
    sdates=sdates[sdates$year %in% (G$year-offset),] #istead of controling waves

    yea=as.numeric(substr(cnBD,nchar(cnBD)-3,nchar(cnBD)))
    mon=rep(1:12,uniqueN(yea))
    d2p<-function(y,m) sort((y+offset-yea[1])*12+m - (1:mspan)*(G$Dir[1]=="Backward") + (1:mspan)*(G$Dir[1]=="Forward"))
    d2pr<-function(y,m) sort((y+offset-yea[1])*12+m) 
    out=NULL
    ugpnr=unique(G$pnr)

    for (k in 1:length(ugpnr)){ 
      
      sdates.=sdates[sdates$pnr==ugpnr[k],]
      
      if (NROW(sdates.)==2){
        cat(diff(sdates.$month),' ')
      }
      sdates.=sdates.[1,] 
      
      deathmonts=which(d2pr(sdates.$dead.year,sdates.$dead.month)<seq_len(length(yea)))
      if (length(deathmonts)) BD[BD$pnr==ugpnr[k],1+deathmonts]=NA
      if (length(deathmonts)) d=deathmonts[1] else d=NA
      v=as.numeric(BD[BD$pnr==ugpnr[k],1+d2p(sdates.$year,sdates.$month)])
     
      dfv=data.frame(pnr=sdates.$pnr,
                     visits=sum(v,na.rm=TRUE),
                     exposures=sum(!is.na(v)),
                     mvisits=mean(v,na.rm=TRUE),
                     d=d[1],
                     year=sdates$year[k],
                     month=sdates$month[k],
                     dir=G$Dir[1],
                     v=unname(as.list(v)))
      names(dfv)[9+(0:(mspan-1))]=paste('m',1:mspan,sep='')
      out=c(out,list(dfv))
    }
    out=rbindlist(out)
    cat('\n')
  }
  out
}

#G1 visits before interview in wave=1 
V1.pre.W1=calcvisits(G=G1.pre.W1, Year=NULL, Month=NULL)
V23.pre.W1=calcvisits(G=G23.pre.W1, Year=2004, Month=medmonth)
V1o.post.W1=calcvisits(G=G1o.post.W1, Year=NULL, Month=NULL)
V2o.post.W1=calcvisits(G=G2o.post.W1, Year=2004, Month=medmonth)

#G1 visits after interview in wave=1 
V1.post.W1=calcvisits(G=G1.post.W1, Year=NULL, Month=NULL)
#G2 visits after median interview in wave=1
V23.post.W1=calcvisits(G=G23.post.W1, Year=2004, Month=medmonth)

#G1 visits before interview in wave=2 
V1.pre.W2=calcvisits(G=G1.pre.W2, Year=NULL, Month=NULL)
#G1 visits after interview in wave=2 
V1.post.W2=calcvisits(G=G1.post.W2, Year=NULL, Month=NULL)
#G2 visits before interview in wave=2 

V23.pre.W2=calcvisits(G=G23.pre.W2, Year=NULL, Month=NULL)
#G2 visits after interview in wave=2 
V23.post.W2=calcvisits(G=G23.post.W2,  Year=NULL, Month=NULL)

merge_G_V<-function(V,G) fastmerge(V[V$pnr %in% G$pnr,],G,by='pnr')

#MODEL 1b
# G1 vs. G2 both pre W2 measured at W2
tmp1=merge_G_V(V1.pre.W2, G1.pre.W2.at.2)
tmp2=merge_G_V(V23.pre.W2, G23.pre.W2.at.2)
tmp1$gr=1
tmp2$gr=2
MOD1bDAT=rbindlist(list(tmp1,tmp2))

#MODEL Opost
# G1 vs. G2 both post W1
tmp1=merge_G_V(V1o.post.W1, G1o.W1.at.R1)
tmp2=merge_G_V(V2o.post.W1, G2o.W2.at.R1)
names(tmp1)[!(names(tmp1)%in% names(tmp2))]
tmp1$gr=1
tmp2$gr=2
MODOpostDAT=rbindlist(list(tmp1,tmp2))

filterdata<-function(data, myDate=NULL,include.reported.visits=TRUE,extra=NULL){
  L=NROW(data)
  cat('Initial nb. of cases',L,'\n')
  data=as.data.frame(data)
  filterone<-function(nam, data) { 
    gg=is.na(data[,nam])
    cat('Droping',sum(gg),'NA for',nam,'\n')
    data[which(!gg),]
  }
  names(data)[names(data)=='allvisits']='reportedvisits'
  fields<-c('mvisits','visits','reportedvisits','exposures','employ','foreigner','hospvisits','sex','civstat',
            'edusurv','income3yB.cat','income3y.cat','nchild','birth.raw','Age','employ.raw')
  fields=unlist(sapply(fields, function(k) which(colnames(data)==k)))
  
  print(as.matrix(colSums(is.na(as.data.frame(data)[,fields]))))
  print(cbind(colSums(is.na(as.data.frame(data[data$gr==1,])[,fields])),
              colSums(is.na(as.data.frame(data[data$gr==2,])[,fields]))))
  
  data=filterone('mvisits',data)
  data=filterone('visits',data)
  data$zerovisits=c('no','yes')[1+(data$visits==0)]
  names(data)[names(data)=='allvisits']='reportedvisits'
  if(include.reported.visits){
    uniqueN(data$pnr)
    data=filterone('reportedvisits',data)
    uniqueN(data$pnr)
    data=data[data$reportedvisits<=31,] # FILTERING !!!!!!!!!!!!!!!!!
    uniqueN(data$pnr)
  }
  data$zeroreportedvisits=c('no','yes')[1+(data$reportedvisits==0)]
  data=filterone('exposures',data)
  data=filterone('employ',data)
  data=filterone('employ.raw',data)
  data=filterone('foreigner',data)
  data=filterone('hospvisits',data)
  data=filterone('sex',data)
  data=filterone('civstat',data)
  data$civstat=factor(data$civstat,levels(data$civstat)[c(2,1,3,4)])
  data=filterone('edusurv',data)
  data=filterone('income3yB.cat',data) 
  data=filterone('income3y.cat',data) 
  data=filterone('nchild',data)
  data$has.children=c('no','yes')[1+(data$nchild>0)]
  data=filterone('birth.raw',data)
  
  data$ageclass=cut(as.numeric(data$Age), breaks=c(0,60,70,80,120),
                    include.lowest=TRUE, right = FALSE, labels=c('50-59','60-69','70-79','80+'))
  data$edusurv3lv=cut(data$edusurv,breaks=c(0,3,4,7),include.lowest=TRUE,right = FALSE,labels=c('low','med','high'))
  data$edusurv3lv=factor(data$edusurv3lv,levels(data$edusurv3lv)[c(3,2,1)])
  
  cat('total removed',NROW(data)-L,'\n')
  data$sex[data$sex==1]='men'
  data$sex[data$sex==2]='women'
  data[,c('pnr','gr',extra,'Wave','dir','reportedvisits','zeroreportedvisits','visits','zerovisits',
          'exposures','mvisits','sex','Age','ageclass','has.children','nchild','income3yB.cat',
          'civstat','foreigner','hospvisits','employ','employ.raw','edusurv3lv','weight_i','psu','edusurv')]
}

MODOpostDATf_=filterdata(data=MODOpostDAT,include.reported.visits=FALSE)
MOD1bDATf_=filterdata(MOD1bDAT,include.reported.visits=TRUE)

##########################################
# Prepare data to run model for sample A
##########################################

# re-calculate vists according to more flexible new method
G1o.ts<-buildtimeseriesO(G=G1o, breaks10)
G2o.ts<-buildtimeseriesO(G=G2o, breaks10, Month=medmonth, Year=2004)

G1o.ts$censored$coh='Coh2004'
G2o.ts$censored$coh='Coh2006'

Xind=which(grepl('X',names(G1o.ts$censored),fixed=TRUE))

##########################################
#clean missing data, but still no age restriction
table(MODOpostDATf_$gr)
dim(G1o.ts$censored)
G1o.ts$censored=G1o.ts$censored[G1o.ts$censored$pnr %in% MODOpostDATf_$pnr[MODOpostDATf_$gr==1],]
dim(G1o.ts$censored)

table(MODOpostDATf_$gr)
dim(G2o.ts$censored)
G2o.ts$censored=G2o.ts$censored[G2o.ts$censored$pnr %in% MODOpostDATf_$pnr[MODOpostDATf_$gr==2],]
dim(G2o.ts$censored)

##########################################
# Calculate doctor vistis after and before

BIGco=rbind.data.frame(G1o.ts$censored,G2o.ts$censored)

BIGco$XAfter=rowSums(BIGco[,Xind][,14:25],na.rm=TRUE)  #12 months visits
BIGco$XBefore=rowSums(BIGco[,Xind][,1:12],na.rm=TRUE)  #12 months visits

BIGco$XAfterZero=rowSums(BIGco[,Xind][,14:25],na.rm=TRUE)==0 #Any visit in 12 months
BIGco$XBeforeZero=rowSums(BIGco[,Xind][,1:12],na.rm=TRUE)==0 #Any visit in 12 months

BIGco$XAfter.offs=rowSums(!is.na(BIGco[,Xind][,14:25])) #12 months exposures, all shoud be 12
BIGco$XBefore.offs=rowSums(!is.na(BIGco[,Xind][,1:12])) #12 months exposures, all shoud be 12

# BIGco$XAfter6=rowSums(BIGco[,Xind][,14:19],na.rm=TRUE) #6 months visits
# BIGco$XBefore6=rowSums(BIGco[,Xind][,7:12],na.rm=TRUE) #6 months visits
# 
# BIGco$XAfter6.offs=rowSums(!is.na(BIGco[,Xind][,14:19])) #6 months exposures, all shoud be 6
# BIGco$XBefore6.offs=rowSums(!is.na(BIGco[,Xind][,7:12])) #6 months exposures, all shoud be 6

BIGco$XAfter3=rowSums(BIGco[,Xind][,14:16],na.rm=TRUE) #3 months visits
BIGco$XBefore3=rowSums(BIGco[,Xind][,10:12],na.rm=TRUE) #3 months visits

BIGco$XAfter3.offs=rowSums(!is.na(BIGco[,Xind][,14:16])) #3 months exposures, all shoud be 3
BIGco$XBefore3.offs=rowSums(!is.na(BIGco[,Xind][,10:12])) #3 months exposures, all shoud be 3

#########################
# filter age at 2004:

table(BIGco$coh)
BIGco=BIGco[BIGco$ageclass2004%in% restriction_10,]
table(BIGco$coh)

##########################################
# Merge new (recalculated visits) and old data frames
##########################################

BIGco=as.data.frame(fastmerge(BIGco,MODOpostDATf_[MODOpostDATf_$pnr%in%BIGco$pnr,],'pnr'))
table(BIGco$coh)

# filter missing visits, 1 individual died right after interview, but it was removed earlier
ind1 <- (BIGco$XAfter.offs==0) | (BIGco$XBefore.offs==0)
BIGco[which(ind1),]

BIGco<-BIGco[which(!ind1),]

BIGco$ageclass2004=droplevels(BIGco$ageclass2004)

rraBIGco=BIGco

table(rraBIGco$XAfter,useNA='always')
table(rraBIGco$XBefore,useNA='always')

rraBIGco$has.children = factor(rraBIGco$has.children,levels=c('yes','no'))
rraBIGco$income3yB.cat<-factor(rraBIGco$income3yB.cat,levels=c('hi','med','lo'))
rraBIGco$civstat<-factor(rraBIGco$civstat,levels=c('partnered','divorced or separated', 'widowed','unpartnered'))
