*** Data preparation preliminarry raw data

* Identifying HBVs in first interviewed housebolds 
	use $data_13\PASS-W13_P-Sen_Quer_anonym.dta, clear
	replace befr_hh = befr_hh * 100
	count if lfd - befr_hh != 1 & inlist(samplepost, 20, 21) & UNUM1_02 == 1
	keep if inlist(samplepost, 20, 21) & UNUM1_02 == 1
	keep lfd befr_hh
	rename lfd hbv
	tempfile hbvs
	save `hbvs'

* merging household data
	use $data_13\PASS-W13_HH_Quer_anonym.dta, clear
	merge 1:1 befr_hh using `hbvs', nogen

// identifying HBV
	count if hbvpost == 0
	replace hbvpost = hbv if hbvpost == 0 & hbv != .
	replace hbvpost = HH_PELFD_1 if hbvpost == 0
	drop hbv
 
// Preparation of household data

* Household income: cap to 80.000
	recode HEK0600 (999998 = -1) (999999 = -2)
	replace HEK0600 = 80000 if HEK0600 > 80000

* Child younger than 4
	gen kindu4 = 1 if KINDU4post == 1
	replace kindu4 = 0 if KINDU4post == 2

* Child younger than 15
	gen kindu15 = 1 if KINDU15post == 1
	replace kindu15 = 0 if KINDU15post == 2

* Receiving welfare benefit
   	clonevar alg2abez = ALG2Bez_Hpost
	recode alg2abez(0 = .)

* Interview mode
	rename caticapi	hintmod

* Language
	recode sprache (0=1)
	rename sprache hsprache
	lab def SPRACHE 1 "German", modify

* Duration of interview
	gen dauerHH = .
	replace dauerHH = zsthhend - zstintro1 if zsthhend > 0 & zsthhend != .
	replace dauerHH = . if dauerHH <= 0
	sum dauerHH, d

* Rent / Home ownership
	replace HW0300 = 5 if HW0200 <=3 & HW0300 == .
	recode HW0300 (9=.)
	lab def HW0300_lb 5 "5. Shared apartment", modify
	lab def HW0300 5 "5. Shared apartment", modify

* Interviewer ID Household
	rename internr internr_hh

* keeping relevant variables on household level
	keep hnr HW0200 HA0100 HLS0100a- HLS2500a HEK0100 HEK0120 HEK0200 HEK0400 HEK0600 HEK1200 ///
	HEK2000 HEK2200 kindu4 kindu15 alg2abez umzug hbvpost hintmod hsprache dauerHH HEK1420 HW0300 ///
	internr_hh
	gen welle = 13 

* Federal state from SUF
	merge 1:m hnr welle using $data_suf13\PASS-W13_HHENDDAT_4.dta, keepusing(bundesld) keep(1 3) nogen 

	
// Preparation of individual data


* merging individual data 
	rename hbvpost lfd
	merge 1:m lfd using $data_13\PASS-W13_P-Sen_Quer_anonym.dta, keep(3) nogen 

* Duration of interview
	gen dauerP = .
	replace dauerP = zstnachadress - zstbeg_person_int if welle==13
	replace dauerP = . if dauerP <= 0
	sum dauerP, d

* Mode
	rename caticapi	pintmod

* Language
	rename sprache psprache

* Linkage constent 
	tab RegP0100
	recode RegP0100 (6 = -8), gen(zuspiel)

	* Marital status --> Partner filter = 1 = married
	lab def PD0500_lb 1"1. single" 2"2. divorced" 3"3. widowed" 4 "married, separated" ///
						5"5. married, living together" 6"6. living separately.", modify
						lab val PD0500 PD0500_lb
	replace PD0500 = 5 if EPARTNER == 1
	recode PD0500 (6=4)

* partner
	rename PARTNER partner
	recode partner (0=2)
	
* Close friends --> if in filter (PSK0100) no friends, then set number to zero
	lab val PSK0100 janein
	replace PSK0200 = 0 if PSK0100 == 2
	recode PSK0200 (998 = -1) (999 = -2)

* panel 
	recode Altbefr_ppost (2=0), gen(altbefr)
	
* sex 
 	rename GESCHLpost zpsex

* age
	rename alter2 palter

* size of household  
	rename HHGRpost hhgr

* satisfaction with health (PA0100), satisfaction with housing (PA0200), satisfaction with standard of living (PA0300). 
* social participation (PA0800), social position (PA0900), life satisfaction (PA1000) --> in raw data, adjust missings.
	 foreach var of varlist(PA0100 PA0200 PA0300 PA0800 PA0900 PA1000) {
		  recode `var'  (99=-2) (98=-1)
		 }

* Pupil/Student/Education --> If Missing, then neither Pupil/Student/Education.
	recode PB0100 (. = 4)	 

* Number of doctor visits
	recode PG0100 (997 = -5) (998 = -1) (999 = -2)
	
* Child younger than 18
	rename E_KINDU18 ekinu18
	
* Currently registered unemployed 
	recode ALO_AKTpost (0 3 = -5), gen(alakt)
	lab def alakt_lb  1"currently registered unemployed" 2 "currently not registered unemployed", modify
	lab val alakt alakt_lb

* Currently employed 
	recode BeschStat_Aktpost (2 3 = 1) (0=2), gen(etakt)
	lab def etakt  1"currently employed" 2 "currently not employed", modify
	lab val etakt etakt_lb
	
* Interviewer questionnaire
recode inb01 (1 2 3 = 2) (4 5 = 1), gen(interesse)
recode inb02 (1 2 3 = 2) (4 5 = 1), gen(verstaendnis)
recode inb09 (1 2 3 = 2) (4 5 = 1), gen(zuverlaessig)
rename inb08 schwierigkeit

	lab def janein_inb 2 "no" 1"yes"
	lab val interesse verstaendnis zuverlaessig schwierigkeit janein_inb
	
	
* Keeping relevant variables only
keep welle hnr HW0200 HA0100 HLS0100a HLS0200a HLS0300a HLS0400a HLS0600a HLS0700a ///
HLS0800a HLS0900a HLS1100a HLS1200a HLS1400a HLS1500a HLS1600a HLS1700a HLS1800a ///
HLS1900a HLS2000a HLS2100a HLS2200a HLS2300a HLS2400a HLS2500a HEK0100 HEK0120 HEK0200 ///
HEK0400 HEK0600 HEK1200 HEK2000 HEK2200 bundesld kindu4 kindu15 alg2abez umzug pnr altbefr ///
zpsex palter hhgr PA0100 PA0200 PA0300 PA0800 PA0900 PA1000 PB0100 PET0510 PEK1400 ///
PEK1450 PSK0400a PSK0400b PSK0400c PSK0400d PSK0400e PG0100 PG0500 PG0800 PG1300 ///
PP0110 PMI0100 PMI0200 ekinu18 PD0500 PSK0200 PD0200 PD0300 PMI0100 PMI0200  ///
schuelerstatus_aktpost PB0400 Altbefr_ppost PB1200 PB1300a PB1300b PB1300c PB1300d PB1300e ///
PB1300f PB1300g PB1300h PB1300i PB1300j PB1600 etakt alakt PSK0100 hintmod hsprache ///
partner zuspiel pintmod psprache dauerP interesse verstaendnis zuverlaessig schwierigkeit ///
dauerHH PX10200 brpanel HEK1420 HW0300 internr_hh

* Adusting missings to SUF data
	foreach var of varlist(HW0200 HLS0100a- HLS2500a HEK0100-HEK0600 HEK1200 HEK2000 HEK2200 ///
							umzug PET0510 PEK1400 PEK1450 PSK0400a-PSK0400e ///
							zuspiel PG1300 PP0110 PMI0100 kindu15 kindu4 ekinu18 HEK1420 PD0500 ///
							PSK0100 PB0100 PG0500 PG0800) {
	  recode `var'  (9=-2) (8=-1)
	 }

gen hbv = 1
	 

// Prewave info from SUF
append using $data\PENDDAT.dta, keep(hnr schul2 beruf2 pnr welle PD0200 PD0300 PMI0100 PMI0200 RegP0100)

* marking hbvs 
bysort pnr hnr: egen hbv_m = max(hbv)

* Preparation
bysort pnr (welle): gen zaehler = _n
xtset pnr zaehler

* School degree only available for newly asked individuals, using prewave information for panel 
	sort pnr zaehler
	replace schul2 = L.schul2 if L.schul2 > 0 & Altbefr_ppost == 1
	recode schul2 (4=3) (5=4) (6 7=5) (8 9=6)
	* newly asked individuals
	replace schul2 = 1 if (schuelerstatus_aktpost>= 1 & schuelerstatus_aktpost<= 4) & Altbefr_ppost == 2
	replace schul2 = 3 if (PB0400 == 1 | PB0400 == 2 | PB0400 == 8) & Altbefr_ppost == 2
	replace schul2 = 4 if (PB0400 == 3 | PB0400 == 4 | PB0400 == 5) & Altbefr_ppost == 2
	replace schul2 = 5 if (PB0400 == 6 | PB0400 == 7) & Altbefr_ppost == 2
	replace schul2 = 6 if (PB0400 == 9 | PB0400 == 10) & Altbefr_ppost == 2
	replace schul2 = -2 if (PB0400 == 99) & Altbefr_ppost == 2
	lab def schul2_lb 	-2 "-2. no specification" 1 "1. Pupil" ///
						 2 "2. no graduation" 3 "3. Special/elementary/secondary school" ///
						 4 "secondary school" 5 "university degree" ///
						 6 "other", modify
	lab val schul2 schul2_lb
	tab schul2

* professional degree only available for newly asked individuals, using prewave information for panel
	sort pnr zaehler
	replace beruf2 = L.beruf2 if L.beruf2 > 0 & Altbefr_ppost == 1
	recode beruf2 (4 5 6 7=3) (8 9 10 11=4) (12 13=5)
	* newly asked individuals
	replace beruf2 = 1 if (schuelerstatus_aktpost>= 1 & schuelerstatus_aktpost<= 4) & Altbefr_ppost == 2
	replace beruf2 = 2 if PB1200 == 2 & Altbefr_ppost == 2
	replace beruf2 = 3 if (PB1200 == 1 & Altbefr_ppost == 2) & (PB1300a == 1 | PB1300b == 1 | PB1300c == 1 | ///
															  PB1300d == 1 | PB1300e == 1 | PB1600 == 1 | PB1600 == 2) 
	replace beruf2 = 4 if (PB1200 == 1 & Altbefr_ppost == 2) & (PB1300f == 1 | PB1300g == 1 | PB1300h == 1 | PB1600 == 3)
	replace beruf2 = 5 if (PB1200 == 1 &  Altbefr_ppost == 2) & (PB1300i == 1 | PB1300j == 1 | PB1600 == 4 | PB1600 == 5)
	lab def beruf2_lb 	-2 "-2. no specification" 1 "1. Pupil" ///
						 2 "2. no vocational qualification" 3 "3. apprenticeship/apprenticeship/vocational college/master's degree" ///
						 4 "University of cooperative education/university of applied sciences" 5 "Other (foreign) qualification", modify
	lab val beruf2 beruf2_lb

* Linkage constent
	replace RegP0100 = L.RegP0100 if (L.RegP0100 > 0 & L.RegP0100 ~= .) & (RegP0100 == -3 | RegP0100 == .)
	replace RegP0100 = zuspiel if RegP0100 == 2 | RegP0100 == .
	replace RegP0100 = . if RegP0100 < 0
	drop zuspiel 
	rename RegP0100 zuspiel

* Religious affiliation
	lab val PD0200 janein
	replace PD0200 = PD0200[_n-1] if welle == 3 & pnr == pnr[_n-1]
	replace PD0200 = L.PD0200 if (L.PD0200 > 0 & L.PD0200 ~= .) & PD0200 == -3
	replace PD0200 = L.PD0200 if (L.PD0200 > 0 & L.PD0200 ~= .) & Altbefr_ppost == 1
	recode PD0200 (-3 = .)

* PD0300 = Denomination, fill up wave 3 first
	replace PD0300 = PD0300[_n-1] if welle == 3 & pnr == pnr[_n-1]
	replace PD0300 = L.PD0300 if (L.PD0300 > 0 & L.PD0300 ~= .) & PD0300 == -3
	replace PD0300 = L.PD0300 if (L.PD0300 > 0 & L.PD0300 ~= .) & Altbefr_ppost == 1
	replace PD0300 = 7 if PD0200 == 2 // nicht religiös
	lab def LABBM 7 "7. keine Religionszugehörigkeit", modify
	recode PD0300 (-3 = .)

* Migration --> Using information from the first interview 
	lab val PMI0100 janein
	replace PMI0100 = L.PMI0100 if (L.PMI0100 > 0 & L.PMI0100 ~= .) & PMI0100 < 0
	replace PMI0100 = L.PMI0100 if (L.PMI0100 > 0 & L.PMI0100 ~= .) & Altbefr_ppost == 1
	recode PMI0100 (-3 = .)
	
* Country of birth
	replace PMI0200 = L.PMI0200 if (L.PMI0200 > 0 & L.PMI0200 ~= .) & PMI0100 == 2 & PMI0200 < 0
	replace PMI0200 = L.PMI0200 if (L.PMI0200 > 0 & L.PMI0200 ~= .) & PMI0100 == 2 & Altbefr_ppost == 1
	replace PMI0200 = 24 if PMI0100 == 1 & (PMI0200 < 0 | PMI0200 ==.)
	lab def gebland_lb 24 "24. Germany", modify
	recode PMI0200 (-3 996 = .)

* Amount of debt
	replace HEK1420 = 0 if HEK1420 == .
	
	drop schuelerstatus_aktpost altbefr zaehler PB0400 PB1200 PB1300a ///
	PB1300b PB1300c PB1300d PB1300e PB1600 PB1300f PB1300g PB1300h PB1300i PB1300j


* Adusting missings to SUF data
	foreach var of varlist(PD0200 PD0300) {
	  recode `var'  (9=-2) (8=-1)
	 }

 
* Delete variable if not surveyed for all: -9 not surveyed in wave; -3 TNZ.
	local nvar = c(k)
	foreach var of varlist _all {
	  count if inlist(`var', -9, -3) & inrange(welle, 4, 13)
	  if r(N) > 0 {
		drop `var'
	  }
	}

	display "Deleted variables:" `nvar' - c(k)
	display "Contained variables: " c(k)



*********************************************************************************************************************************************
** Information on frequency of participation
*********************************************************************************************************************************************

* Possible preliminary waves of household
	bysort hnr (welle): gen poss_wave = welle - (welle[1] - 1)

* Realized preliminary waves of household
	bysort hnr welle (pnr): gen lfd_w = _n
	replace lfd_w = . if lfd_w ~= 1
	sort hnr lfd_w welle
	bysort hnr lfd_w (welle): gen real_wave1 = _n if lfd_w == 1 
	bysort hnr welle: egen real_wave = max(real_wave1)
	drop real_wave1 lfd_w

* Share of realized waves
	gen prop_wave = real_wave/poss_wave


*********************************************************************************************************************************************
* Sample selection
*********************************************************************************************************************************************

* Keeping hbv
	keep if hbv_m == 1 
	drop hbv_m hbv 

** Keeping wave 13 only
	keep if welle == 13

* Excluding seniors
	drop if PX10200~=5 

* Keeping panel cases only
	drop if brpanel == 0
*Check
	tab Altbefr_ppost 


	drop Altbefr_ppost brpanel PX10200

*********************************************************************************************************************************************
* Merging contact data
*********************************************************************************************************************************************
	isid hnr
	merge m:1 hnr using $tmp\kontaktdaten_13.dta, nogen keep(3)
	rename sample_neu sample_kat
	drop sample

*********************************************************************************************************************************************
* Duration of interview
*********************************************************************************************************************************************
	sum dauerHH, d
	replace dauerHH = r(p1) if dauerHH < r(p1) & dauerHH != .
	replace dauerHH = r(p99) if dauerHH > r(p99) & dauerHH != .

	sum dauerP, d
	replace dauerP = r(p1) if dauerP < r(p1) & dauerP != .
	replace dauerP = r(p99) if dauerP > r(p99) & dauerP != .


******************************
** Merging information on interviewer
******************************
	rename internr_hh INTNR
	merge m:1 INTNR using $data_int\PASSW13-Interviewermerkmale.dta, nogen keep(3)

* Age
	gen int_alter = 2019-INT_GEBJ
	sum int_alter 
	drop INT_GEBJ

* Sex
	rename INT_SEX int_sex

* Education
* School degree
	recode INT_SAB (1=3) (2=4) (3 4 = 5) (7=2) (9=-2)
	lab val INT_SAB schul2_lb
	rename INT_SAB int_schul

* Occupational degree
	recode INT_BAB (1 2 = 3) (5=4) (6=5) (7=2) (9=-2)
	lab val INT_BAB beruf2_lb
	rename INT_BAB int_bild

* Experience
	rename INT_ERF int_erfahrung
	rename INTNR int_lfd


* Check if all cases with HH interview have also marked a relalized interview in the contact data
	tab welle interview // Cases without interview in contact data
	replace interview = 1 // setting on 1


*********************************************************************************************************************************************
* Testing variables
*********************************************************************************************************************************************

	local nvar = c(k)
	foreach var of varlist * {
	  qui: sum `var'
	  if r(Var) == 0 {
		drop `var'
	  }
	}
	display "deleted variables:" `nvar' - c(k)
	display "contained variables: " c(k)

** Delete further variables
	drop pnr rcl_neu int_bild INT_FELD PMI0200

*********************************************************************************************************************************************
* Generating missing variables
*********************************************************************************************************************************************

* Total number of variables with missings
	egen mis_pnr = anycount(_all), values(-2 -1)

* Filter missings and question mistakenly not asked are not evaluated as missings that can be used for estimation.
	foreach var of varlist (HLS2300a HLS2400a HEK0600 HEK1200 HEK1420 PA0900){
		gen `var'_mis = 0
		replace `var'_mis = 1 if `var' == -5 | `var' == -2 | `var' == -1 | `var' == .
	}

	foreach var of varlist * {
	   replace `var' = . if `var' < 0
	}

* Labeling missings
	label define val_mis 0 "no missing variable" 1 "missing" 
	foreach var of varlist *_mis {
	  lab values `var' val_mis
	}

*** Remove highly correlated variables
	drop hhgr pintmod konart poss_wave psprache int_lfd PD0300 HW0200

* Deprivation index
	egen depind = anycount(HLS??00a), values(2)
	drop HLS??00a

* Order of variables
	order hintmod hsprache bundesld HW0300 HA0100 depind HEK0100 HEK0120 HEK0200 HEK0400 HEK0600 HEK1200 HEK2000 HEK2200 kindu4 ///
	kindu15 alg2abez umzug zpsex palter PD0200 PD0500 PA0100 PA0200 PA0300 ///
	PA0800 PA0900 PA1000 PB0100 PET0510 PEK1400 PEK1450 PSK0100 PSK0200 PSK0400a ///
	PSK0400b PSK0400c PSK0400d PSK0400e PG0100 PG0500 PG0800 PG1300 PP0110 PMI0100 ///
	ekinu18 schul2 beruf2 etakt alakt interesse verstaendnis zuverlaessig schwierigkeit ///
	partner zuspiel real_wave prop_wave sample_kat kon_nrh dauerHH dauerP 

save $tmp\data_w13_tmp.dta, replace	

*********************************************************************************************************************************************
* only keep panel cases
*********************************************************************************************************************************************

use "$data/PASS_W14_Panelstichprobe_HH_infas_7123_20200108.dta", clear

tab1 sample einsatza ta_group smeth ssprach

* Nur Panelfälle
keep if einsatza == 1
keep if ssprach != "RU"

keep hnr
merge 1:1 hnr using "$tmp\data_w13_tmp.dta", keep(3) nogen

*********************************************************************************************************************************************
* saving data
*********************************************************************************************************************************************
compress
save $tmp\data_w13.dta, replace

* delete data no longer needed
erase $tmp\kontaktdaten_13.dta
erase $tmp\data_w13_tmp.dta
