* -------------------------------------------------------------------------
* Stata do-file to article
* -------------------------------------------------------------------------
* Wenz, A., Jäckle, A., Burton, J., Couper, M. P., and Read, B. 
* Quality of expenditure data collected with a mobile receipt scanning app
* in a probability household panel
* -------------------------------------------------------------------------

* Set working directory ---------------------------------------------------
sysdir set PLUS "d:\home\user\ado\stbplus"
cd "I:\Research\Output quality\Data"

* Load Living Costs and Food Survey data ----------------------------------
use "UKDA-8351-stata\stata\stata13_se\2016_17_dv_set89%20ukanon.dta", clear

* Recode Living Costs and Food Survey data --------------------------------
// Make variable names lower case
rename *, lower

// Keep adult respondents
keep if perstyp2 == 1
drop perstyp2

// Generate COICOP indicators with different levels of digits (to match with Spending Study categories)
gen coi_2digit = regexs(0) if(regexm(coi_plus, "[0-9]+"))
la var coi_2digit "Coicop 2 digits"

gen coi_3digit = regexs(0) if(regexm(coi_plus, "[0-9]+\.[0-9]"))
la var coi_3digit "Coicop 3 digits"

gen coi_4digit = regexs(0) if(regexm(coi_plus, "[0-9]+\.[0-9]\.[0-9]"))
la var coi_4digit "Coicop 4 digits"

order coi_2digit coi_3digit coi_4digit, after(coi_plus)

// Generate category-level amount
egen food_amount = total(pdamount) if coi_2digit == "1", by(case person)
egen clothes_amount = total(pdamount) if coi_2digit == "3" & coi_plus != "3.1.2.3.1" ///
	& coi_plus != "3.1.2.3.2" & coi_plus != "3.1.2.3.3" & coi_plus != "3.1.2.3.4" ///
	& coi_plus != "3.1.3.1.3" & coi_plus != "3.2.1.3.1", by(case person)
egen transport_amount = total(pdamount) if coi_2digit == "7" & coi_plus != "7.3.5.1.3", ///
	by(case person)
egen child_amount = total(pdamount) if coi_plus == "3.1.2.3.1" | coi_plus == "3.1.2.3.2" | ///
	coi_plus == "3.1.2.3.3" | coi_plus == "3.1.2.3.4" | coi_plus == "3.1.3.1.3" | ///
	coi_plus == "3.2.1.3.1" | coi_plus == "7.3.5.1.3" | coi_2digit == "10" | ///
	coi_plus == "12.1.3.1.7" | coi_plus == "12.3.2.2.2" | coi_plus == "12.3.2.2.3" | ///
	coi_plus == "12.4.1.2.1" | coi_plus == "12.4.1.2.2" | coi_plus == "20.5.2.1.3" | ///
	coi_plus == "20.5.2.1.4", by(case person)
egen home_amount = total(pdamount) if coi_3digit == "4.3" | coi_2digit == "5" | ///
	coi_4digit == "9.3.3", by(case person)
egen health_amount = total(pdamount) if coi_2digit == "6" | coi_plus == "12.4.1.1.1" | ///
	coi_plus == "12.4.1.1.2", by(case person)
egen social_amount = total(pdamount) if coi_2digit == "2" | coi_3digit == "9.1" | ///
	coi_3digit == "9.2" | coi_3digit == "9.4" | coi_3digit == "11.1", by(case person)
egen disc_amount = total(pdamount) if coi_2digit == "8" | coi_4digit == "9.3.1" | ///
	coi_4digit == "9.3.2" | coi_4digit == "9.3.4" | coi_4digit == "9.3.5" | ///
	coi_3digit == "9.5" | (coi_3digit == "12.1" & coi_plus != "12.1.3.1.7") | ///
	(coi_3digit == "12.3" & coi_plus != "12.3.2.2.2" & coi_plus != "12.3.2.2.3"), ///
	by(case person)
egen holiday_amount = total(pdamount) if coi_3digit == "9.6" | coi_3digit == "11.2", by(case person)
egen gift_amount = total(pdamount) if coi_plus == "20.5.2.1.1" | coi_plus == "20.5.2.1.2" | ///
	coi_plus == "20.5.2.1.5" | coi_plus == "20.5.2.1.6" | coi_plus == "20.5.2.2.1" | ///
	coi_plus == "20.5.2.2.2" | coi_plus == "20.5.2.2.3" | coi_plus == "20.5.2.2.4", ///
	by(case person)
egen other_amount = total(pdamount) if coi_3digit == "12.2" | coi_3digit == "12.7", by(case person)

// Keep one row per person
collapse (mean) food_amount-other_amount, by(case person)

// Recode missing value as zero
foreach x of var food_amount-other_amount {
	replace `x' = 0 if `x' == .
}

// Generate total amount
egen tamount = rowtotal(food_amount-other_amount)

// Label variables
la var tamount "Total amount"

la var food_amount "Food and groceries"
la var clothes_amount "Clothes and footwear"
la var transport_amount "Transport costs"
la var child_amount "Child costs"
la var home_amount "Home improvements and household goods"
la var health_amount "Health expenses"
la var social_amount "Socialising and hobbies"
la var disc_amount "Other goods and services"
la var holiday_amount "Holidays"
la var gift_amount "Giving money or gifts to others"
la var other_amount "Other"

// Merge respondent characteristics
rename person Person
merge 1:1 Person case using "UKDA-8351-stata\stata\stata13_se\2016_17_dvper_ukanon.dta", keepusing(a005p A004 A200 P053p)
drop if _merge == 2 
drop _merge
rename Person person

// Merge respondent characteristics from raw file
rename person Person
merge 1:1 Person case using "UKDA-8351-stata\stata\stata13_se\2016_17_rawper_ukanon.dta", keepusing(HighEd1)
drop if _merge == 2
drop _merge
rename Person person

// Recode respondent characteristics
gen female = 1 if A004 == 2
replace female = 0 if A004 == 1
label var female "Female"
label def femalelb 1 "Female" 0 "Male"
label val female femalelb
drop A004

gen age = a005p
label var age "Age"
drop a005p

gen age2 = age^2
label var age2 "Age-squared"
order age2, after(age)

gen agegp = 1 if age >= 16 & age <= 25
replace agegp = 2 if age >= 26 & age <= 35
replace agegp = 3 if age >= 36 & age <= 45
replace agegp = 4 if age >= 46 & age <= 55
replace agegp = 5 if age >= 56 & age <= 65
replace agegp = 6 if age >= 66 & age <= 75
replace agegp = 7 if age >= 76
lab var agegp "Age: categories"
lab def agegpl 1 "16to25" 2 "26to35" 3 "36to45" 4 "46to55" 5 "56to65" 6 "66to75" 7 "76andolder"
lab val agegp agegpl

gen empl = 1 if A200 == 1 | A200 == 2 | A200 == 8
replace empl = 0 if A200  > 2 & A200 < 8
label var empl "Employed"
label def empllb 1 "Employed" 0 "Not employed"
label val empl empllb
drop A200

gen degree = 1 if HighEd1 == 1
replace degree = 0 if HighEd1 > 1 & HighEd1 <= 8
replace degree = 0 if degree == .
order degree, after(age)
lab var degree "Has degree"
lab def degreelb 1 "Has degree" 0 "Has no degree"
lab val degree degreelb
drop HighEd1

gen income = P053p * 4
label var income "Personal monthly gross income"
drop P053p

// Merge household characteristics
merge m:1 case using "UKDA-8351-stata\stata\stata13_se\2016_17_dvhh_ukanon.dta", keepusing(A049 A040 A041 A042 A099 A121 A1661 Gorx)
drop if _merge == 2
drop _merge

// Recode household characteristics
gen tenure = 1 if A121 == 5 | A121 == 6 | A121 == 7
replace tenure = 0 if A121 == 1 | A121 == 2 | A121 == 3 | A121 == 4 | A121 == 8
label var tenure "Housing tenure"
label def tenurelb 1 "House owned" 0 "House not owned"
label val tenure tenurelb
drop A121

rename A049 hhsize
label var hhsize "Household size"

gen nkids = A040 + A041 + A042
label var nkids "Number of children in household"
drop A040 A041 A042

gen pc = 1 if A1661 == 1
replace pc = 0 if A1661 == 2
label var pc "Computer in household"
label def yesnolb 1 "Yes" 0 "No"
label val pc yesnolb
drop A1661

rename Gorx region
label var region "Government Office Region"

gen london = 1 if region == 7 | region == 8
replace london = 0 if region != 7 & region != 8
label var london "London"

rename A099 quarter
label var quarter "Sample quarter"

order tenure hhsize nkids pc region quarter, after(income)

// Merge area characteristics
merge m:1 case using "UKDA-8351-stata\stata\stata13_se\2016_17_dvhh_urbanrural_ukanon.dta", keepusing(URGridEWp URGridSCp)
drop if _merge == 2
drop _merge

// Recode area characteristics
gen urban = 1 if URGridEWp == 1 | URGridSCp == 1
replace urban = 0 if URGridEWp == 2 | URGridSCp == 2
label var urban "Urban or rural area"
label def urbanlb 1 "Urban" 0 "Rural"
label val urban urbanlb
drop URGridEWp URGridSCp
order urban, after(pc)

gen femaleXhhsize = female * hhsize
label var femaleXhhsize "Female X Household size"
order femaleXhhsize, after(pc)

// Rename id variables
rename case i_hidp
rename person pidp
order pidp, before(i_hidp)

// Generate indicator for data source
gen source = 1
label var source "Data source"
la def sourcelb 1 "Living Costs and Food survey" 2 "Spending study", replace
la val source sourcelb

// Exclude respondents interviewed outside the fieldwork period Oct-Dec 2016 (6,876 participants dropped out)
drop if quarter == 1 | quarter == 2 | quarter == 3
drop quarter

// Exclude respondents resident in Northern Ireland (183 participants dropped out)
drop if region == 12 
drop region

// Drop participants with zero total amount (4 participants dropped out)
drop if tamount == 0

// Save data
save "Living_Costs_and_Food_Survey", replace
