mergeid2pnr<-function (wave=1, #Survey wave
                       missing.pnr=c('',NA)) 
                       
  #output: data.frame with columns
  #        mergeid
  #        pnr
  #        year - year of the interview  
  #        month - month of the interview  
  #        age - age at the interview  
  {
  
  missing.pnr=missing.pnr[1]
  
  #read files from the survey directory
  tmplix<-read_sas(paste(SURVEY_DIR,'Sharew',wave,'_rel',Release,'_0_0_dk_lix_no_relea.sas7bdat',sep=''))
  tmpcvr<-read_sas(paste(SURVEY_DIR,'Sharew',wave,'_rel',Release,'_0_0_dk_cv_r.sas7bdat',sep=''))
  tmpwgt<-read_sas(paste(SURVEY_DIR,'Sharew',wave,'_rel',Release,'_0_0_dk_gv_weights.sas7bdat',sep=''))
  if (wave != 3) {
    tmpimp<-read_sas(paste(SURVEY_DIR,'w',wave,'_rel',Release,'_0_0_dk_gv_imputations.sas7bdat',sep=''))
    tmpimpi=!duplicated(tmpimp$mergeid)
    tmpimp=tmpimp[tmpimpi,]
    if (NROW(tmpimp)!=NROW(tmplix)) stop('Problem with imputations file.')
  }
  
  #remove not interviewed
  tmpcvr<-tmpcvr[tmpcvr$mergeid %in% tmplix$mergeid,] 
  tmpwgt<-tmpwgt[tmpwgt$mergeid %in% tmplix$mergeid,] 
  
  #merge both data sets into idobj, leaving only needed columns
  idobj=fastmerge(DF1=data.frame(mergeid=tmplix$mergeid, pnr=tmplix$pnr, pnrp=tmplix$pnr_partner, hhid=unname(tmplix[paste('hhid',wave,sep='')])), 
                  DF2=data.frame(mergeid=tmpcvr$mergeid, year=tmpcvr$int_year, 
                                 month=tmpcvr$int_month, age=tmpcvr$age_int), 
                  by='mergeid', all.x = TRUE, all.y = TRUE)
  cciw=paste('cciw_w',wave,sep='')
  cchw=paste('cchw_w',wave,sep='')
  idobj=fastmerge(DF1=idobj, 
                  DF2=data.frame(mergeid=tmpwgt$mergeid, weight_i=unname(tmpwgt[,cciw]), weight_h=unname(tmpwgt[,cchw]),
                                 psu=tmpwgt$psu), 
                  by='mergeid', all.x = TRUE, all.y = TRUE)
  
  if (wave != 3) 
    idobj=fastmerge(DF1=idobj, DF2=data.frame(mergeid=tmpimp$mergeid, nchild=tmpimp$nchild, edusurv=tmpimp$isced),
                  by='mergeid', all.x = TRUE, all.y = TRUE)
    
  idobj$directpnr=nchr(idobj$pnr)>4
    
  #check how many interview years has a wave
  years=table(tmpcvr$int_year)
  years=names(years)[order(years,decreasing = TRUE)]
  if (any(years<1000)) warning('Missing interview years in the survey wave.')
  years=years[years>1000]
  cat('Wave',wave,'has',length(years),'year(s):',years,'\n')
  
  #read bef files for selected years
  tmpbef=lapply(years, function(k) read_sas(paste(REG_DIR,'bef',k,'.sas7bdat',sep=''))[,c('pnr','EFALLE')])
  names(tmpbef) = paste('y', years,sep='')
  
  z=lapply(seq_along(years), function(k) {
   
    #extract pnr(s) from idobj for specific year of interview
    y_idobj=idobj[years[k]==idobj$year,]
    
    #index for missing pnr having pnr.partner
    ind.p=nchr(y_idobj$pnrp)!=0 & nchr(y_idobj$pnr)==0 
    
    #reg indexes present int the survey in particular year
    ind.reg=tmpbef[[k]]$EFALLE %in% y_idobj$pnrp[ind.p]
    
    #extract just them
    res=tmpbef[[k]][ind.reg,]
    names(res)=c('pnr','pnrp')
    res
  })
  
  #make a dictionary
  dictionary=rbindlist(z)
 
  if (uniqueN(dictionary$pnr)!=uniqueN(dictionary$pnrp)) stop('non-unique dictionary')
  
  #extract missing pnrs
  ind.p=nchr(idobj$pnr)==0 & nchr(idobj$pnrp)!=0
  idobj$n=seq_len(NROW(idobj))
  sub.idobj=idobj[ind.p,-"pnr"]
  
  #translate pnrp to pnr
  sub.idobj=fastmerge(sub.idobj,dictionary,by='pnrp',all.x=TRUE,all.y=TRUE)
  sub.idobj=sub.idobj[order(sub.idobj$n),]
 
  if (dim(sub.idobj)[1]!=sum(ind.p)) stop('Something wrong.')
  if (sum(idobj$n[ind.p]!=sub.idobj$n)) stop('Unsorted')
  
  idobj$pnr=as.character(idobj$pnr)
  idobj$pnr[ind.p]=as.character(sub.idobj$pnr)
 
  #correct missing values
  idobj$pnr[is.na(idobj$pnr)]=missing.pnr
  idobj$pnr[nchr(idobj$pnr)<4]=missing.pnr
  
  #remove pnrp and check
  #idobj$pnrp=NULL
  idobj$n=NULL
  idmis=idobj$pnr!=missing.pnr
  cat('There is',sum(!idmis),'missing of',length(idmis),'pnrs \n')
  if (uniqueN(idobj$pnr[idmis])!=uniqueN(idobj$mergeid[idmis])) stop('Failed')
  if (uniqueN(idobj$pnr[idmis])!=NROW(idobj[idmis,])) stop('Failed')
  idobj
  
}

W1=mergeid2pnr(wave=1)
W2=mergeid2pnr(wave=2)
W3=mergeid2pnr(wave=3)
W4=mergeid2pnr(wave=4)
W5=mergeid2pnr(wave=5)

############################################################################################################
# Calculate self-reported visits
############################################################################################################

W1.hc<-read_sas(paste(SURVEY_DIR,'Sharew1_rel',Release,'_0_0_dk_hc.sas7bdat',sep=''))
W1.hc<-W1.hc[order(W1.hc$mergeid),]
W2.hc<-read_sas(paste(SURVEY_DIR,'Sharew2_rel',Release,'_0_0_dk_hc.sas7bdat',sep=''))
W2.hc<-W2.hc[order(W2.hc$mergeid),]
W3.hc<-read_sas(paste(SURVEY_DIR,'Sharew3_rel',Release,'_0_0_dk_hc.sas7bdat',sep=''))
W3.hc<-W3.hc[order(W3.hc$mergeid),]
W4.hc<-read_sas(paste(SURVEY_DIR,'Sharew4_rel',Release,'_0_0_dk_hc.sas7bdat',sep=''))
W4.hc<-W4.hc[order(W4.hc$mergeid),]
W5.hc<-read_sas(paste(SURVEY_DIR,'Sharew5_rel',Release,'_0_0_dk_hc.sas7bdat',sep=''))
W5.hc<-W5.hc[order(W5.hc$mergeid),]

W1.visits=data.frame(allvisits=absna(W1.hc$hc002_),
                     practitionervisits=absna(emptyna(W1.hc$hc003_)),
                     mergeid=W1.hc$mergeid)
W1.visits$practitionervisits[is.na(W1.visits$practitionervisits)&W1.visits$allvisits==0]=0

W2.visits=data.frame(allvisits=absna(W2.hc$hc002_),
                     practitionervisits=absna(emptyna(W2.hc$hc003_)),
                     mergeid=W2.hc$mergeid)
W2.visits$practitionervisits[is.na(W2.visits$practitionervisits)&W2.visits$allvisits==0]=0

W4.visits=data.frame(allvisits=absna(W4.hc$hc002_),
                     practitionervisits=absna(emptyna(W4.hc$hc003_)),
                     mergeid=W4.hc$mergeid)
W4.visits$practitionervisits[is.na(W4.visits$practitionervisits)&W4.visits$allvisits==0]=0

W5.visits=suppressWarnings( data.frame(allvisits=absna(W5.hc$hc002_),
                                       practitionervisits=absna(emptyna(W5.hc$hc003_)),
                                       mergeid=W5.hc$mergeid))
W5.visits$practitionervisits[is.na(W5.visits$practitionervisits)&W5.visits$allvisits==0]=0

rm(W1.hc,W2.hc,W3.hc,W4.hc,W5.hc)
gc()
