//project: Comparability of web and telephone survey modes for the measurement of subjective well-being
//task: This dofile prepares the variables

//housekeeping
//The dofiles uses globals to manage lists of variables, charts options, etc.
//These globals are set in a separate do file called: globals.do. This file is invoked by the main do files. 

do globals.do


//program setup
clear all
set linesize 80
 

//it loads the data-set
use $dtapath/00-websurvey-V01.dta, clear

loc vars " "
loc vars "full part self rd stu home seek"
foreach v of loc vars{
replace occu`v' = . if occu`v' < 1
}

//dummies for canton: Luxembourg is divided in various cantons  
cap drop regio1-regio5
tab luregion, gen(regio)

//dummies for employment, We use the GEM harmonized category.
cap drop emp*
tab gemoccu, gen(emp)
lab var emp1 "full-time"
lab var emp2 "part-time"
lab var emp3 "retired or disabled"
lab var emp4 "home-maker"
lab var emp5 "student"
lab var emp6 "not working and other"
lab var emp7 "self-employed"

//dummies for year.
cap drop anno*
tab year, gen(anno)
 
//categorical variable for education
cap drop education
gen education = 1 if (lureduc==0 | lureduc==1)
replace education = 2 if (lureduc==2 | lureduc==3)
replace education = 3 if (lureduc==4)
replace education = 4 if (lureduc==5 | lureduc==6)
replace education = 5 if (lureduc==7)
replace education = 6 if (lureduc==8)

lab variable education "Education level"
lab def educat 1 "Primary" 2 "Secondary" 3 "Master craftsman" 4 "Bachelor" 5 "Master" 6 "Doctoral", replace
lab val education educat

cap drop edc
tab education, gen(edc)

//Income variable
cap drop income
gen income = . 
replace income = 10000 if luhhinc==1
replace income = 30000 if luhhinc==2
replace income = 50000 if luhhinc==3
replace income = 70000 if luhhinc==4
replace income = 90000 if luhhinc==5
replace income = 110000 if luhhinc==6

//it generates a deflator variable: cpi -> consumer price index
//we use the CPI available here: http://www.statistiques.public.lu/stat/ReportFolders/ReportFolder.aspx?IF_Language=eng&MainTheme=5&FldrName=5&RFPath=109
//series: National consumer price index (IPCN) 2011 - 2015
//looking at figures in tab: "EN_verso", IPCN - Retrospective Series, INDEX BASE 100 in 2005, Annual averages for 2013, 2014 and 2015
//2013: 120.42
//2014: 121.18
//2015: 121.75 

cap drop cpi 
gen cpi = 120.42 if year == 2013
replace cpi = 121.18 if year == 2014
replace cpi = 121.75 if year == 2015

// it deflates income and it generates the logarithm of it
cap drop realincome
gen  realincome = (income / cpi) * 100
gen log_income = log(realincome)

*dichotomize income
cap drop hhinc*
tab luhhinc, gen(hhinc)


cap drop age2
generate age2 = (age^2)/100
lab variable age7c Age

lab var luhhinc Income
lab var iporgtype "Employer type"
lab var luregion Region
lab var gender Gender
lab var wlslf "I am satisfied with my life"
label variable age2 "age squared / 100"


//it recodes the variable collecting information about the survey mode used.
cap drop tool
gen tool = .
replace tool = 0 if contact == 2
replace tool = 1 if contact == 4
label variable tool "survey mode"
label define tool 0 "telephone", modify
label define tool 1 "web-survey", modify
label values tool tool

cap drop sex
gen sex = .
replace sex = 0 if gender == 1
replace sex = 1 if gender == 2
label variable sex "gender"
label define sex 0 "men", modify
label define sex 1 "women", modify
label values sex sex

//it recodes missing values for life satisfaction
cap drop ls
gen ls = wlslf
replace ls = . if wlslf < 0

//it categorizes the variables income and age. 
loc vars " "
loc vars "age   log_income"
foreach v of loc vars{
cap drop d`v'
xtile d`v' = `v', nq(5)  

cap drop dd`v'_*
tab d`v', gen(dd`v'_)		   
}

lab define dlog_income 1 "first quintile", modify
lab define dlog_income 2 "second quintile", modify
lab define dlog_income 3 "third quintile", modify
lab define dlog_income 4 "fourth quintile", modify
lab define dlog_income 5 "fifth quintile", modify 
lab values dlog_income dinc

lab var ddlog_income_1 "first quintile"
lab var ddlog_income_2 "second quintile"
lab var ddlog_income_3 "third quintile"
lab var ddlog_income_4 "fourth quintile"
lab var ddlog_income_5 "fifth quintile"

lab define dage 1 "18 - 29", modify
lab define dage 2 "30 - 39", modify
lab define dage 3 "40 - 47", modify
lab define dage 4 "48 - 55", modify
lab define dage 5 "56 - 64", modify 
lab values dage dage

lab var ddage_1 "age: 18 - 29"
lab var ddage_2 "age: 30 - 39"
lab var ddage_3 "age: 40 - 47"
lab var ddage_4 "age: 48 - 55"
lab var ddage_5 "age: 56 - 64"

 
//we drop the respondents who did not have a phone. Thus we keep only those who 
//replied to a mode having the possibility to be selected for the alternative mode.
drop if doublecount == 2


//it saves the dataset
save $dtapath/01-websurvey-V03.dta, replace
	

