*Created June 8th 2015

clear all
set more off


cd "D:\SESRI\Studies\Halton sequence\Data"

use "Halton0.dta",clear

keep if resptype==1 & verror==.

* check if the sequence is loaded properly: NOT PERFECT
*brow shura candgender rnd1 orating interimoutcome 
*brow smarket knowledge1a orating

count if rnd1!=. & orating==.
count if rnd1!=. & knowledge1a==.
count if rnd1!=. & smarket==.


*Create variable for name based on randomization
gen candname1 = .
replace candname1 = 1 if candfirst_rnd == 1 & candfam_rnd == 1
replace candname1 = 2 if candfirst_rnd == 2 & candfam_rnd == 1
replace candname1 = 3 if candfirst_rnd == 1 & candfam_rnd == 2
replace candname1 = 4 if candfirst_rnd == 2 & candfam_rnd == 2
replace candname1 = 5 if candfirst_rnd == 1 & candfam_rnd == 3
replace candname1 = 6 if candfirst_rnd == 2 & candfam_rnd == 3


*Creating an age variable for respondents
gen age = 2015 - birth if birth > 0
sum age , d

gen age_grp = .
replace age_grp = 1 if age < 25
replace age_grp = 2 if age >= 25 & age <30
replace age_grp = 3 if age >=30 & age < 40
replace age_grp = 4 if age >= 40 & age < 50
replace age_grp = 5 if age >= 50 & age < 60
replace age_grp = 6 if age >=60

label define age 1 "Younger than 25" 2 "Age 25 to 29" 3 "Age 30 to 39" 4 "Age 40 to 49" 5 "Age 50 to 59" 6 "60 or older"
label values age_grp age
tab age_grp

recode age_grp (1 2=1 "less than 30")(else=0),gen(young)
tab young


*Recoding education variable for respondents
gen educ_r = educ1
recode educ_r 2/4=2
recode educ_r 5/6=3
recode educ_r 7/9=4
replace educ_r = . if educ1 <= 0
replace educ_r = . if educ1 == 10
tab educ_r educ1

label define ed1 1 "Never Joined School" 2 "Primary School" 3 "Secondary School" 4 "Higher Education"
label values educ_r ed1
tab educ_r

recode educ_r (1 2 3=0)(4=1 "higher edu"),gen(highedu)
tab highedu
 

* gender
recode gender (2=0)(1=1)
tab gender

* marital
recode marital (-8 -9=.)(1=1)(else=0)
tab marital




***********************************************
*Creating flags for random numbers from Blaise*
***********************************************

* there are 2 cases with missing in Blaise but not in Halton
forvalue i=1(1)5 {
	replace rnd0`i'=uniform() if rnd0`i'==. & rnd`i'!=.
}


*Gender 
gen candgender_blaise = .
replace candgender_blaise = 1 if rnd01 <=.5
replace candgender_blaise = 2 if rnd01 >.5 & rnd01<=1
*tab candgender_blaise candgender_rnd 

*Candidate Name
gen candname_blaise = .
replace candname_blaise = 1 if rnd02 <1/6
replace candname_blaise = 2 if rnd02 >=1/6 & rnd02 <2/6
replace candname_blaise = 3 if rnd02 >=2/6 & rnd02 <3/6
replace candname_blaise = 4 if rnd02 >=3/6 & rnd02 <4/6
replace candname_blaise = 5 if rnd02 >=4/6 & rnd02 <5/6
replace candname_blaise = 6 if rnd02 >=5/6 & rnd02 <=1
*tab candname_blaise candname1 

*Education
gen canedu_blaise = .
replace canedu_blaise = 1 if rnd03 < 1/3
replace canedu_blaise = 2 if rnd03 >= 1/3 & rnd03 <2/3
replace canedu_blaise = 3 if rnd03 >= 2/3 & rnd03 <=1
*tab canedu_blaise canedu_rnd 

*Experience
gen canexp_blaise = .
replace canexp_blaise = 1 if rnd04 < 1/2
replace canexp_blaise =2 if rnd04 >=1/2 & rnd04 <=1
*tab canexp_blaise canexp_rnd 

*Religious
gen canrel_blaise = .
replace canrel_blaise = 1 if rnd05 < 1/2
replace canrel_blaise =2 if rnd05 >=1/2 & rnd05 <=1
*tab canrel_blaise canrel_rnd 

*Labels
label define gender 1 "Male" 2 "Female" 
label values candgender_rnd gender
label values candgender_blaise gender
label define name 1 "Name 1" 2 "Name 2" 3 "Name 3" 4 "Name 4" 5 "Name 5" 6 "Name 6"
label values candname1 name
label values candname_blaise name
label define edu 1 "Bachelor's" 2 "Master's" 3 "Not Mentioned"
label values canedu_rnd edu
label values canedu_blaise edu
label define exp 1 "Planning Coordinator" 2 "Not Mentioned"
label values canexp_rnd exp
label values canexp_blaise exp
label define rel 1 "Religious values" 2 "Not Mentioned"
label values canrel_rnd rel
label values canrel_blaise rel


drop if candgender_rnd==.		// drop respondents who don't reach the experiment

save "Halton1",replace
