#Read register

options(scipen=3)
YEARS=2003:2014

##################################################################################################################
# Acquire death date from the register
##################################################################################################################

R.dod1<-read_sas(paste(REG_DIR,'dod',2013,'.sas7bdat',sep=''))
R.dod2<-read_sas(paste(REG_DIR,'dod',2016,'.sas7bdat',sep=''))
R.dod3<-read_sas(paste(REG_DIR,'dodsaasg',2015,'.sas7bdat',sep=''))
ress1=data.frame(pnr=R.dod1$pnr, dead.date=R.dod1$DODDATO)
ress2=data.frame(pnr=R.dod2$pnr, dead.date=R.dod2$DODDATO)
ress3=data.frame(pnr=R.dod3$pnr, dead.date=R.dod3$D_DODSDATO)
DOD=merge(merge(ress1,ress2,by='pnr',all.x=TRUE,all.y=TRUE),ress3,by='pnr',all.x=TRUE,all.y=TRUE)
dim(DOD)

myfst<-function(x) empty2NA(x[!is.na(x)][1])
DOD=data.frame(pnr=DOD$pnr, dead.date=as.Date(apply(DOD[,-1][c(2,1,3)],1,myfst)))
save('DOD',file=paste(SAVE_DIR,'reg.death.dat',sep=''))

rm(R.dod1,R.dod2,R.dod3,ress1,ress2,ress3)
gc()


##################################################################################################################
# Hospital records
##################################################################################################################

tmpy=YEARS
YEARS=2001:2014
LPRPOP=NULL
as.bin<-function(pnr,Year.in,Year.out,Month.in,Month.out){
  Month.in[Year.in<YEARS[1]]=1
  Year.in[Year.in<YEARS[1]]=YEARS[1]
  ind=Year.out>YEARS[length(YEARS)] | is.na(Year.out)
  Month.out[ind]=12
  Year.out[ind]=YEARS[length(YEARS)]
  pos.in= (Year.in-YEARS[1])*12 + Month.in
  pos.out= (Year.out-YEARS[1])*12 + Month.out
  patc =  matrix(0,length(Month.in),length(YEARS)*12)
  for (j in 1:length(Month.in)) patc[j,pos.in[j]:pos.out[j]]=1
  data.frame(pnr=pnr,h=patc)
}

for (y in YEARS){
   cat(y,'\n')
   R.lprpop<-read_sas(paste(REG_DIR,'lprpop',y,'.sas7bdat',sep=''))
   R.lprpop<-R.lprpop[R.lprpop$C_PATTYPE==0,]
   ress=data.frame(pnr=R.lprpop$pnr, 
                   year = y,
                   type = R.lprpop$C_PATTYPE,
                   inh = R.lprpop$D_INDDTO, 
                   outh = R.lprpop$D_UDDTO)
   LPRPOP = rbind(LPRPOP,ress)
}

res=as.bin(LPRPOP$pnr,year(LPRPOP$inh),year(LPRPOP$outh),month(LPRPOP$inh),month(LPRPOP$outh))
mycolSums<-function(mat) if(length(dim(mat))) colSums(mat) else mat
res2=sapply(unique(as.character(res$pnr)),function(k) mycolSums(res[which(res$pnr==k),-1]))
save(LPRPOP,file=paste(SAVE_DIR,'LPRPOP.dat',sep=''))
save(res,file=paste(SAVE_DIR,'hosp.dat',sep=''))
res2=t(res2)
res2=data.frame(pnr=row.names(res2),res2)
rm(res)
HOSPITAL=res2
rm(res2)
save(HOSPITAL,file=paste(SAVE_DIR,'hospU.dat',sep=''))
YEARS=tmpy

##################################################################################################################
# Doctor visits
##################################################################################################################

SSSY=NULL
SSSYw=NULL
SSSYALL=NULL
binmonth<-function(k) c(rep(0,k-1),1,rep(0,12-k))
monthvec<- function(m) if (length(m)) rowSums(sapply(m,binmonth)) else rep(0, 12)
z=NULL
for (y in YEARS){
  cat(y,'\n')
  if (y<2005) {
    R.sssy<-read_sas(paste(REG_DIR,'sysi',y,'.sas7bdat',sep=''))
  } else {
    R.sssy<-read_sas(paste(REG_DIR,'sssy',y,'.sas7bdat',sep=''))
  }
  
  ress=data.frame(pnr=R.sssy$pnr, 
                  #year = y,
                  year = 2000+as.numeric(substr(R.sssy$AFRPER,1,2)),
                  visitmonth = ti(R.sssy$AFRPER) %% 100, 
                  visittype = R.sssy$YDTYP,
                  specialist = as.numeric(substr(R.sssy$SPECIALE,1,2)))
  SSSYALL=rbind(SSSYALL,ress)
  
  print(table(ress$visittype))
  z=rbind(z,cbind(unique(as.character(ress$visittype)),y))
  exclude=sort(c(38:40,42,41,43:45,46,71,72:76,77:79,81,90:92,97:98,93,94,50,55,60,88))
  ress=ress[!(ress$visittype %in% exclude),]
  ress$visittype=droplevels(ress$visittype)
  ress$pnr=droplevels(ress$pnr)
  print(table(ress$visittype))
  
  #remove duplicated visits of the same type in the same month for the same patient
  ind=paste(ress$pnr,ress$visittype,ress$visitmonth,ress$year)
  ress=ress[!duplicated(ind),]
  if(uniqueN(ress$year)!=1) stop('More than one year!')
 
  tmp=tapply(ress$visitmonth,ress$pnr,monthvec)
  tmp2=t(sapply(tmp,c))
  colnames(tmp2)<-paste('visit',c('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'),sep='.')

  ress2<-data.frame(pnr=rownames(tmp2),
                    year=y,
                    tmp2)
  
  SSSY=rbind(SSSY,ress)
  SSSYw=rbind(SSSYw,ress2)
}
sort(unique(SSSYALL$visittype))

SSSY$visittype=as.character(SSSY$visittype)
SSSY$visittype[SSSY$visittype%in%c('05')]<-'alm. doctor'
SSSY$visittype[SSSY$visittype%in%c('08','10')]<-'doctor on call'
SSSY$visittype[SSSY$visittype%in%c('15')]<-'opthalmologist'
SSSY$visittype[SSSY$visittype%in%c('20')]<-'ear, nose, neck doctor'
SSSY$visittype[SSSY$visittype%in%c('25')]<-'second specialist'
SSSY$visittype[SSSY$visittype%in%c('50')]<-'chiropractor'
SSSY$visittype[SSSY$visittype%in%c('55')]<-'optician'
SSSY$visittype[SSSY$visittype%in%c('60')]<-'podiatrist'
SSSY$visittype[SSSY$visittype%in%c('66','70')]<-'labolatory'
SSSY$visittype[SSSY$visittype%in%c('80')]<-'house/contact doctor'
SSSY$visittype[SSSY$visittype%in%c('88')]<-'hearing aids'
SSSY$visittype[SSSY$visittype%in%c('93','94')]<-'psychologist'
SSSY$visittype[SSSY$visittype%in%c('46')]<-'Teddy O.'
SSSY$visittype[SSSY$visittype%in%c('97')]<-'interpreter'
SSSY$visittype[SSSY$visittype%in%c('43','44','45')]<-'physiotherapist'
SSSY$visittype[SSSY$visittype%in%c('40')]<-'dentist'

SSSYALL$visittype=as.character(SSSYALL$visittype)
SSSYALL$visittype[SSSYALL$visittype%in%c('05')]<-'alm. doctor'
SSSYALL$visittype[SSSYALL$visittype%in%c('08','10')]<-'doctor on call'
SSSYALL$visittype[SSSYALL$visittype%in%c('15')]<-'opthalmologist'
SSSYALL$visittype[SSSYALL$visittype%in%c('20')]<-'ear, nose, neck doctor'
SSSYALL$visittype[SSSYALL$visittype%in%c('25')]<-'second specialist'
SSSYALL$visittype[SSSYALL$visittype%in%c('50')]<-'chiropractor'
SSSYALL$visittype[SSSYALL$visittype%in%c('55')]<-'optician'
SSSYALL$visittype[SSSYALL$visittype%in%c('60')]<-'podiatrist'
SSSYALL$visittype[SSSYALL$visittype%in%c('66','70')]<-'labolatory'
SSSYALL$visittype[SSSYALL$visittype%in%c('80')]<-'house/contact doctor'
SSSYALL$visittype[SSSYALL$visittype%in%c('88')]<-'hearing aids'
SSSYALL$visittype[SSSYALL$visittype%in%c('93','94')]<-'psychologist'
SSSYALL$visittype[SSSYALL$visittype%in%c('46')]<-'Teddy O.'
SSSYALL$visittype[SSSYALL$visittype%in%c('97')]<-'interpreter'
SSSYALL$visittype[SSSYALL$visittype%in%c('43','44','45')]<-'physiotherapist'
SSSYALL$visittype[SSSYALL$visittype%in%c('40')]<-'dentist'

SSSYALL$visittype[SSSYALL$visittype=='physioterapist']='physiotherapist'

library(R2HTML)
HTMLInitFile(outdir=RES_DIR,file='VisitsByYear_WholeRegister',Title='YDTYP')
HTML(t(table(SSSYALL$year,SSSYALL$visittype)))
HTMLEndFile(file = HTMLGetFile())

save('SSSYALL',file=paste(SAVE_DIR,'reg.visits.all.raw.dat',sep=''))
save('SSSY',file=paste(SAVE_DIR,'reg.visits.raw.dat',sep=''))
save('SSSYw',file=paste(SAVE_DIR,'reg.visits.dat',sep=''))
rm(R.sssy,ress,ress2,tmp,tmp2,y,ind); gc();

##################################################################################################################
# Emplyment
##################################################################################################################

# AKM$SOCIO13 :
# 0 - NA, 110 -self-employed, 111-independent,
# 112 -self-employed, 113-independent,
# 114 -self-employed, 120 - employee spouse,
# 131 -top leader, 132 - high skills employee
# 133 -intermediate skills employee
# 134 - basic skills employee
# 135 - other employees
# 139 - employ job position not disclosed
# 210 - unemployed >6moths
# 220 - beneficiary of sickness/eductaion/holiday allowence benefits
# 310 - Pupils min 15y, under education
# 321 - early retires
# 322 - National pensioners
# 323 - Beneficiary
# 330 - Cash beneficiary
# 410 - others
# 420 - Children <15y

# AKM$BESKST02 (2002+) AKM$BESKST (2001) :
# 1- Self-employed
# 2- employ spouse 
# 3- employ owner of business
# 4- employ
# 5- employ with support
# 6- pensionist and owner of business
# 7- pensionist
# 8- other
# 9- beneficiary
# 10- unemployment >6m
# 11- beneficiary of unemployment benefit
# 12- cash beneficiary
# 99- not in AKM

AKM=NULL
for (y in YEARS){
  cat(y,'\n')
  R.akm<-read_sas(paste(REG_DIR,'akm',y,'.sas7bdat',sep=''))
  employ=ti(R.akm$BESKST13)
  employ[employ=99]=NA
  ress=data.frame(pnr=R.akm$pnr, 
                  year = y,
                  employ.raw = R.akm$BESKST13, 
                  employ = c('unemployed','employed')[1+(employ<=5)])
  AKM=rbind(AKM,ress)
}
save('AKM',file=paste(SAVE_DIR,'reg.employ.dat',sep=''))
rm(R.akm,ress,employ,y); gc();

##################################################################################################################
# Family
##################################################################################################################

#Number of children FAM$ANTBOERNF
#Family size FAM$ANTPERSF

FAM=NULL
for (y in YEARS){
  cat(y,'\n')
  R.fam<-read_sas(paste(REG_DIR,'fam',y,'.sas7bdat',sep=''))
  ress=data.frame(famid=R.fam$FAMILIE_ID, 
                  year = y,
                  children = R.fam$ANTBOERNF,
                  has.children = R.fam$ANTBOERNF>0,
                  fam.size =R.fam$ANTPERSF)
  FAM=rbind(FAM,ress)
}
save('FAM',file=paste(SAVE_DIR,'reg.family.dat',sep=''))
rm(R.fam,ress,y); gc();

##################################################################################################################
# Income
##################################################################################################################

IND=NULL

#the code could be optimized
for (y in c(YEARS)) {
  cat(y,'\n')
  R.ind0<-read_sas(paste(REG_DIR,'ind',y,'.sas7bdat',sep=''))
  R.ind1<-read_sas(paste(REG_DIR,'ind',y-1,'.sas7bdat',sep=''))
  R.ind2<-read_sas(paste(REG_DIR,'ind',y-2,'.sas7bdat',sep=''))
  
  if (y<2013) tmp0=R.ind0$DISPON_13 else tmp0=R.ind0$DISPON_13
  if ((y-1)<2013) tmp1=R.ind1$DISPON_13 else tmp1=R.ind1$DISPON_13
  if ((y-2)<2013) tmp2=R.ind2$DISPON_13 else tmp2=R.ind2$DISPON_13
  
  tmp0b <- R.ind0$AEKVIVADISP_13
  tmp1b <- R.ind1$AEKVIVADISP_13
  tmp2b <- R.ind2$AEKVIVADISP_13
  
  tmp0[tmp0<0]=NA
  tmp1[tmp1<0]=NA
  tmp2[tmp2<0]=NA
  
  tmp0b[tmp0b<0]=NA
  tmp1b[tmp1b<0]=NA
  tmp2b[tmp2b<0]=NA
  
  ress0=data.frame(pnr=R.ind0$pnr, year = y, income = tmp0)
  ress1=data.frame(pnr=R.ind1$pnr, year = y-1, income = tmp1)
  ress2=data.frame(pnr=R.ind2$pnr, year = y-2, income = tmp2)
  
  ress0b=data.frame(pnr=R.ind0$pnr, yearB = y, incomeB = tmp0b)
  ress1b=data.frame(pnr=R.ind1$pnr, yearB = y-1, incomeB = tmp1b)
  ress2b=data.frame(pnr=R.ind2$pnr, yearB = y-2, incomeB = tmp2b)
  
  ress.=fastmerge(fastmerge(ress0, ress1,by='pnr'),ress2,by='pnr')
  ress.b=fastmerge(fastmerge(ress0b, ress1b,by='pnr'),ress2b,by='pnr')
  ress.=fastmerge(ress., ress.b,by='pnr')
  
  ress.=ress.[order(ress.$pnr),]
  ress=data.frame(pnr=ress.$pnr,
                  year=y, 
                  income3y=rowMeans(ress.[,c(3,5,7)],na.rm=TRUE),
                  income2y=rowMeans(ress.[,c(3,5)],na.rm=TRUE),
                  income1y=unname(as.matrix(ress.[,3])),
                  income3yB=rowMeans(ress.[,c(9,11,13)],na.rm=TRUE),
                  income2yB=rowMeans(ress.[,c(9,11)],na.rm=TRUE),
                  income1yB=unname(as.matrix(ress.[,9])))
  
  IND=rbind(IND,ress)
}
save('IND',file=paste(SAVE_DIR,'reg.income.dat',sep=''))
rm(tmp0,tmp1,tmp2,ress.,ress1,ress2,ress0,ress,y,R.ind0,R.ind1,R.ind2); gc();

##################################################################################################################
# BEF
##################################################################################################################

BEF=NULL
for (y in YEARS){
  cat(y,'\n')
  R.bef<-read_sas(paste(REG_DIR,'bef',y,'.sas7bdat',sep=''))
  sex=ti(R.bef$KOEN)
  sex[sex>2 | sex<1] = NA
  country=ti(R.bef$OPR_LAND)
  country[country<5100]=NA
  civs=as.character(R.bef$CIVST)
  dead = civs == 'D' 
  
  civs.raw=civs
  civs[civs == 'G' | civs == 'P'] = 'partnered'
  civs[civs == 'F' | civs == 'O'] = 'divorced or separated'
  civs[civs == 'E' | civs == 'L'] = 'widowed'
  civs[civs == 'U' | civs == 'A'] = 'unpartnered'
  civs[civs == 'D' ] = NA
  table(civs,useNA='always')
  
  R.fam<-read_sas(paste(REG_DIR,'fam',y,'.sas7bdat',sep=''))
  A=uniqueN(R.fam$FAMILIE_ID)
  B=length(R.fam$FAMILIE_ID)
  if (A!=B) stop('wrong')
  A=uniqueN(R.bef$pnr)
  B=length(R.bef$pnr)
  if (A!=B) stop('wrong')
  
  children = tapply(R.bef$FAMILIE_ID, R.bef$pnr, function(X) empty2NA(R.fam$ANTBOERNF[R.fam$FAMILIE_ID%in%X]))
  has.children =  children>0
  
  ress=data.frame(pnr=R.bef$pnr, 
                  year = y,
                  sex = sex,
                  country.raw=country,
                  foreigner = country>5100,
                  children = children,
                  has.children = has.children,
                  birth.raw = R.bef$FOED_DAG,
                  civstat.raw=civs.raw,
                  civstat=civs,
                  dead=dead
  )
  BEF=rbind(BEF,ress)
}
save('BEF',file=paste(SAVE_DIR,'reg.general.dat',sep=''))
rm(R.bef, R.fam, A, B, children, has.children, y,  ress, sex, country, civs, civs.raw, dead)
gc()

##################################################################################################################
#Merge files
##################################################################################################################

SSSYw$pnry=interaction(SSSYw$pnr,SSSYw$year)

BEF$pnry=interaction(BEF$pnr,BEF$year)
AKM$pnry=interaction(AKM$pnr,AKM$year)
IND$pnry=interaction(IND$pnr,IND$year)

m2dt=fastmerge(BEF,SSSYw,by='pnry')
suppressWarnings({m2dt$pnr.x=NULL; m2dt$year.x=NULL;
m2dt$pnr.y=NULL; m2dt$year.y=NULL;})
m3dt=fastmerge(m2dt,IND,by='pnry')
suppressWarnings({m3dt$pnr.x=NULL; m3dt$year.x=NULL;
m3dt$pnr.y=NULL; m3dt$year.y=NULL;})
m4dt=fastmerge(m3dt,AKM,by='pnry')
suppressWarnings({m4dt$pnr.x=NULL; m4dt$year.x=NULL;
m4dt$pnr.y=NULL; m4dt$year.y=NULL;})
m4dt$oldpnr=m4dt$pnr
m4dt$pnr=as.factor(m4dt$pnr)
m4dt$pnr=substr(m4dt$pnry,1,12)
m4dt$year=ti(substr(m4dt$pnry,14,17))
m4dt[is.na(m4dt$oldpnr),]

#calculate income tertiles by year
Y=sort(unique(m4dt$year))
inc.tertiles.1y=sapply(Y, function (y) quantile(m4dt$income1y[m4dt$year==y],c(1/3,2/3),na.rm=TRUE))
inc.tertiles.2y=sapply(Y, function (y) quantile(m4dt$income2y[m4dt$year==y],c(1/3,2/3),na.rm=TRUE))
inc.tertiles.3y=sapply(Y, function (y) quantile(m4dt$income3y[m4dt$year==y],c(1/3,2/3),na.rm=TRUE))

inc.tertiles.1yB=sapply(Y, function (y) quantile(m4dt$income1yB[m4dt$year==y],c(1/3,2/3),na.rm=TRUE))
inc.tertiles.2yB=sapply(Y, function (y) quantile(m4dt$income2yB[m4dt$year==y],c(1/3,2/3),na.rm=TRUE))
inc.tertiles.3yB=sapply(Y, function (y) quantile(m4dt$income3yB[m4dt$year==y],c(1/3,2/3),na.rm=TRUE))

colnames(inc.tertiles.1y)=Y
colnames(inc.tertiles.2y)=Y
colnames(inc.tertiles.3y)=Y

colnames(inc.tertiles.1yB)=Y
colnames(inc.tertiles.2yB)=Y
colnames(inc.tertiles.3yB)=Y

#barplot(inc.tertiles.3y)
m4dt$income1y.cat=m4dt$income1y
m4dt$income2y.cat=m4dt$income2y
m4dt$income3y.cat=m4dt$income3y
m4dt$income1yB.cat=m4dt$income1yB
m4dt$income2yB.cat=m4dt$income2yB
m4dt$income3yB.cat=m4dt$income3yB
for (y in Y){
  m4dt$income1y.cat[y==m4dt$year]=as.character(cut(m4dt$income1y[y==m4dt$year],c(0,inc.tertiles.1y[,which(Y==y)],Inf),c('lo','med','hi')))
  m4dt$income2y.cat[y==m4dt$year]=as.character(cut(m4dt$income2y[y==m4dt$year],c(0,inc.tertiles.2y[,which(Y==y)],Inf),c('lo','med','hi')))
  m4dt$income3y.cat[y==m4dt$year]=as.character(cut(m4dt$income3y[y==m4dt$year],c(0,inc.tertiles.3y[,which(Y==y)],Inf),c('lo','med','hi')))
  
  m4dt$income1yB.cat[y==m4dt$year]=as.character(cut(m4dt$income1yB[y==m4dt$year],c(0,inc.tertiles.1yB[,which(Y==y)],Inf),c('lo','med','hi')))
  m4dt$income2yB.cat[y==m4dt$year]=as.character(cut(m4dt$income2yB[y==m4dt$year],c(0,inc.tertiles.2yB[,which(Y==y)],Inf),c('lo','med','hi')))
  m4dt$income3yB.cat[y==m4dt$year]=as.character(cut(m4dt$income3yB[y==m4dt$year],c(0,inc.tertiles.3yB[,which(Y==y)],Inf),c('lo','med','hi')))
}

REGISTER=m4dt

save('REGISTER',file=paste(SAVE_DIR,'reg.full.dat',sep=''))
write.csv(REGISTER,file=paste(SAVE_DIR,'reg.full.csv',sep=''))
rm(m2dt,m3dt,m4dt,Y,y)
gc()
