	*** Data preparation SUF Data

	*********************************************************************************************************************************************
	* Merge
	*********************************************************************************************************************************************

	* Identifying HBV in register data
	use hnr pnrzp* using $data\hh_register.dta, clear
	reshape long pnrzp, i(hnr) j(welle)
	rename pnrzp pnr

	* merge HHENDAT
	merge 1:1 hnr welle using $data\HHENDDAT.dta, keep(3) nogen
		
	* merge individual data of HBV
	merge 1:1 pnr welle using $data\PENDDAT.dta, keep(3) nogen

	* merge interviewer questionnaire 
	merge 1:1 pnr welle using $tmp\interviewernachbefragung_final.dta, keep(1 3) nogen
	recode interesse verstaendnis zuverlaessig schwierigkeit (0=2)
	lab val interesse verstaendnis zuverlaessig schwierigkeit janein


	*****************
	* Variables
	*****************
	 
	* Using information from the first interview for time-constant variables 
	* Rent / Home ownership
	recode HW0300 (-3=5)
	lab def HW0300_lb 5 "5. Shared apartment", modify

	* Amount of debt
	replace HEK1420 = HEK1400 if welle < 12
	replace HEK1420 = 0 if HEK1420 == -3

	* Pupil/Student/Education --> If Missing, then neither Pupil/Student/Education.
	recode PB0100 (-10 -4 -3 -2 . = 4)

	* Adapt partner variable to raw data (partner living in HH, regardless of whether unmarried or married).
	gen partner = -2
	replace partner = 1 if epartner == 1 | apartner == 1
	replace partner = 2 if epartner == 0 & apartner == 0 
	lab val partner janein

	* Marital status --> Partner filter = 1 = married
	replace PD0500 = 5 if epartner == 1
	recode PD0500 (-3 = .)(7=5)(6=4)

	* Close friends --> if in filter (PSK0100) no friends, then set number to zero
	replace PSK0200 = 0 if PSK0200 == -3 & PSK0100 == 2
	replace PSK0200 = PSK0100 if PSK0200 == -3 

	* Religious affiliation, fill up wave 3 first
	sort pnr welle
	replace PD0200 = PD0200[_n-1] if welle == 3 & pnr == pnr[_n-1]
	bysort pnr (welle): gen zaehler = _n
	xtset pnr zaehler
	replace PD0200 = L.PD0200 if L.PD0200 > 0 & PD0200 == -3
	recode PD0200 (-3 = .)

	* Linkage consent
		tab RegP0100
		bysort pnr (welle): egen zuspiel = max(RegP0100)
		recode zuspiel (-5 = -1)(-3 = -4)

	* PD0300 = Denomination, fill up wave 3 first
	sort pnr welle
	replace PD0300 = PD0300[_n-1] if welle == 3 & pnr == pnr[_n-1]
	sort pnr zaehler
	replace PD0300 = L.PD0300 if L.PD0300 > 0 & PD0300 == -3
	replace PD0300 = 7 if PD0200 == 2 // Not religious
	recode PD0300 (-3 = .)
	lab def PD0300_lb 7 "7. No religious affiliation", modify

	* Variable for refusal to answer health questions
	gen keinGesund = PG0100 == -5


	* Health, filter: Do not want to specify health 
	replace PG0500 = . if PG0500 == -3 & PG0100 == -5

	* Other diseases, filter: Do not want to provide health information
	replace PG0800 = . if PG0800 == -3 & PG0100 == -5

	* Migration --> Using information from the first interview 
	sort pnr zaehler
	replace PMI0100 = L.PMI0100 if L.PMI0100 > 0 & PMI0100 < 0

	* Country of birth
	sort pnr zaehler
	replace PMI0200 = L.PMI0200 if L.PMI0200 > 0 & PMI0200 < 0 & PMI0100 == 2 
	replace PMI0200 = 24 if PMI0100 == 1 & PMI0200 < 0
	lab def gebland_lb 24 "24. Germany", modify

	* Household income HEK0600: cap to 80.000  
	recode HEK0600 (999998 = -1) (999999 = -2)
	replace HEK0600 = 80000 if HEK0600 > 80000
		
	* Currently registered unemployed
	recode alakt (-3 = 2) 

	* Currently employed
	recode etakt (-3 = 2) 

	* Adjust W13 school degrees
	recode schul2 (4=3) (5=4) (6 7=5) (8 9=6)

	* Adapt professional degrees of W13
	recode beruf2 (4 5 6 7=3) (8 9 10 11=4) (12 13=5)

*****
*****

* Delete variable if not surveyed for all: -9 not surveyed in wave; -3 TNZ.
local nvar = c(k)
foreach var of varlist _all {
  qui: count if inlist(`var', -9, -3) & inrange(welle, 4, 12)
  if r(N) > 0 {
    drop `var'
  }
}

display "Deleted variables:" `nvar' - c(k)
display "Contained variables: " c(k)


** Delete further variables
 drop uhnr hintdat hintjahr hintmon hinttag sample oecdinca  ///
 blneualt bik hhincome kindu13 anzbg depindug2 depindg2 zpalthh ///
 pintdat pintjahr pintmon pinttag pintnum beruf1 hintnum ///
 PD0100 PET0100 PG1301 PMI0400 vhh mhh epartner apartner schul1 casmin isced97 ///
 bilzeit mschul1 mschul2 mberuf1 mberuf2 mcasmin misced97 mbilzeit vschul1 vschul2 ///
 vberuf1 vberuf2 vcasmin visced97 vbilzeit statakt gebhalbj ogebland ostaatan migration ///
 zaehler HLS1000a oecdincn hhtyp nbgbezug ekin1517 ekin614 ekind ekinu15 kindzihh alg1abez   

*********************************************************************************************************************************************
** Information on frequency of participation
*********************************************************************************************************************************************

* Possible preliminary waves of household
bysort hnr (welle): gen poss_wave = welle - (welle[1] - 1)

* Realized preliminary waves of household
bysort hnr (welle): gen real_wave = _n //- 1, da auch Erstebfragung dazugehört!

* Share of realized waves
gen prop_wave = real_wave/poss_wave

*********************************************************************************************************************************************
* Sample selection
*********************************************************************************************************************************************

* marking person-senior-transition
xtset hnr welle
gen senior = 1 if fb_vers == 1 & F.fb_vers == 3

* Deleting waves 1-3
drop if welle < 4

* Excluding seniors
drop if fb_vers==3

* Keeping panel cases only
drop if panel == 3
drop panel

* Check
tab altbefr 
tab fb_vers

*********************************************************************************************************************************************
* Merging contact data
*********************************************************************************************************************************************
isid hnr welle
merge 1:1 hnr welle using $tmp\kontaktdaten_final.dta, nogen keep(3)

*********************************************************************************************************************************************
* Merging duration of interview
*********************************************************************************************************************************************

merge m:1 hnr welle using $tmp\intDauernHHgen.dta, keep(1 3) nogen
merge 1:1 pnr welle using $tmp\intDauernPgen.dta, keep(1 3) nogen

forvalue w = 4/12 {
  sum dauerHH if welle == `w', d
  replace dauerHH = r(p1) if dauerHH < r(p1) & dauerHH != . & welle == `w'
  replace dauerHH = r(p99) if dauerHH > r(p99) & dauerHH != . & welle == `w'

  sum dauerP if welle == `w', d
  replace dauerP = r(p1) if dauerP < r(p1) & dauerP != . & welle == `w'
  replace dauerP = r(p99) if dauerP > r(p99) & dauerP != . & welle == `w'
}

*********************************************************************************************************************************************
** Merging information on interviewer
*********************************************************************************************************************************************

* Using Interviewer ID from SUF
sort hnr welle
merge m:1 hnr welle using $intnr\PASS_0618_v1_int_hnr.dta, nogen keep(3) keepus(int_lfd sex geb_j int_erfahrung schbild bild)
recode schbild (1=3) (2=4) (3=5)
	lab val schbild schul2_lb
recode bild (1=2) (4=3) (5=4) (6=5)
	lab val bild beruf2_lb
gen int_alter = (2006+welle)-geb_j

rename schbild int_schul
rename bild int_bild
rename sex int_sex
drop geb_j


*********************************************************************************************************************************************
* Generating outcome variable
*********************************************************************************************************************************************
 
* Check if all cases with HH interview have also marked a relalized interview in the contact data
tab welle interview // Cases without interview in contact data
replace interview = 1 // setting on 1

* Outcome
xtset hnr welle
gen autkam = F.interview
replace autkam = 0 if autkam == . & welle != 12
replace autkam = 1 if senior == 1
rename interview interview_old
merge m:1 hnr using $tmp\kontaktdaten_13.dta, nogen keep(1 3) keepus(interview)
replace autkam = interview if welle == 12
drop interview interview_old senior

*** Excluding persons older than 65
drop if palter >= 66


*********************************************************************************************************************************************
* Testing variables
*********************************************************************************************************************************************

local nvar = c(k)
foreach var of varlist * {
  qui: sum `var'
  if r(Var) == 0 {
    drop `var'
  }
}
display "deleted variables:" `nvar' - c(k)
display "contained variables: " c(k)

** Delete further variables
drop hnr pnr altbefr rcl_neu int_lfd int_bild

*********************************************************************************************************************************************
* Generating missing variables
*********************************************************************************************************************************************

* Total number of variables with missings
egen mis_pnr = anycount(_all), values(-2 -1)

* Filter missings and question mistakenly not asked are not evaluated as missings that can be used for estimation.
foreach var of varlist * {
  count if inlist(`var', -2, -1)
  if r(N) >= 200 {
    gen `var'_mis = 0
    replace `var'_mis = 1 if `var' == -2 | `var' == -1 | `var' == .
  }
  replace `var' = . if `var' < 0
}

* Labeling missings
label define val_mis 0 "no missing variable" 1 "missing" 
foreach var of varlist *_mis {
  lab values `var' val_mis
}

*** Remove highly correlated variables
corr hintmod pintmod
corr hintmod konart
corr poss_wave real_wave
corr hsprache psprache

* Deprivation index
egen depind = anycount(HLS??00a), values(2)
drop HLS??00a

* Federal state
recode bundesld (1/4=1)(5 6 7 10 = 2)(8 9 = 3)(11/16=4)

preserve
drop hhgr keinGesund pintmod konart poss_wave psprache //
unab varOut: autkam welle *mis*
unab varlist1 : _all
display "`varlist1'"
local varlist2 : list varlist1 - varOut
display "`varlist2'"
regress autkam `varlist2'
estat vif
restore

drop hhgr keinGesund pintmod konart poss_wave psprache int_schul_mis PD0300 HW0200

*********************************************************************************************************************************************
* saving data
*********************************************************************************************************************************************
compress
save $tmp\data.dta, replace

* delete data no longer needed
erase $tmp\intDauernHHgen.dta
erase $tmp\intDauernPgen.dta
erase $tmp\kontaktdaten_final.dta
erase $tmp\interviewernachbefragung_final.dta
