* -------------------------------------------------------------------------
* Stata do-file to article
* -------------------------------------------------------------------------
* Wenz, A., Jäckle, A., Burton, J., Couper, M. P., and Read, B. 
* Quality of expenditure data collected with a mobile receipt scanning app
* in a probability household panel
* -------------------------------------------------------------------------

* Set working directory ---------------------------------------------------
sysdir set PLUS "d:\home\user\ado\stbplus"
cd "I:\Research\Output quality\Data"

* Set colour scheme -------------------------------------------------------
set scheme s2mono

* Load data ---------------------------------------------------------------
use "Spending_Study_LCFS_with_weights", clear

* Winsorize outliers ------------------------------------------------------
// Total expenditure
sum tamount if source == 1 [aw=ipw1], d
replace tamount = r(p99) if tamount > r(p99) & source == 1

sum tamount if source == 0 [aw=ipw1], d
replace tamount = r(p99) if tamount > r(p99) & source == 0

sum tamountscan if source == 0 [aw=ipw1], d
replace tamountscan = r(p99) if tamountscan > r(p99) & source == 0

// Category-level expenditure
local catname "food clothes transport child home health social disc holiday gift"
forvalues i = 1/10 {
	local a : word `i' of `catname'
	sum `a'_amount if source == 1 [aw=ipw1], d
	replace `a'_amount = r(p99) if `a'_amount > r(p99) & source == 1
	sum `a'_amount if source == 1 [aw=ipw1], d

	sum `a'_amount if source == 0 [aw=ipw1], d
	replace `a'_amount = r(p99) if `a'_amount > r(p99) & source == 0
	sum `a'_amount if source == 0 [aw=ipw1], d

	sum `a'_scan if source == 0 [aw=ipw1], d
	replace `a'_scan = r(p99) if `a'_scan > r(p99) & source == 0
	sum `a'_scan if source == 0 [aw=ipw1], d
}

* Recode scan variables ---------------------------------------------------
replace tamountscan = tamount if tamountscan == . & source == 1

replace food_scan = food_amount if food_scan == . & source == 1
replace clothes_scan = clothes_amount if clothes_scan == . & source == 1
replace transport_scan = transport_amount if transport_scan == . & source == 1
replace child_scan = child_amount if child_scan == . & source == 1
replace home_scan = home_amount if home_scan == . & source == 1
replace health_scan = health_amount if health_scan == . & source == 1
replace social_scan = social_amount if social_scan == . & source == 1
replace disc_scan = disc_amount if disc_scan == . & source == 1
replace holiday_scan = holiday_amount if holiday_scan == . & source == 1
replace gift_scan = gift_amount if gift_scan == . & source == 1

* Recode respondent characteristics ---------------------------------------
sum age if source == 1, d
gen agegroup = 0 if age < r(p50) 
replace agegroup = 1 if age >= r(p50)
lab var agegroup "Age"
lab def agegroup 0 "16-50" 1 "51-82"
lab val agegroup agegroup

gen incomehigh = 0 if income_cat <= 2
replace incomehigh = 1 if income_cat >= 3
lab var incomehigh "Personal monthly gross income"
lab def incomehigh 0 "Below median" 1 "Above median"
lab val incomehigh incomehigh

gen singlehh = 0 if hhsize > 1
replace singlehh = 1 if hhsize == 1
lab var singlehh "Household size"
lab def singlehh 0 "Single household" 1 "Non-single household"
lab val singlehh singlehh

* Table A7. Average non-zero and zero weekly expenditure by population subgroup.
foreach x of var agegroup female degree incomehigh singlehh urban {
	bysort source: sum tamount if `x' == 0 [aw=ipw1]
	reg tamount source if `x' == 0 [aw=ipw1]
	bysort source: sum tamount if `x' == 1 [aw=ipw1]
	reg tamount source if `x' == 1 [aw=ipw1]
}

foreach x of var agegroup female degree incomehigh singlehh urban {
	bysort source: sum tamountscan if `x' == 0 [aw=ipw1]
	reg tamountscan source if `x' == 0 [aw=ipw1]
	bysort source: sum tamountscan if `x' == 1 [aw=ipw1]
	reg tamountscan source if `x' == 1 [aw=ipw1]
}

* Table A7. Median non-zero and zero weekly expenditure by population subgroup.
foreach x of var agegroup female degree incomehigh singlehh urban {
	bysort source: sum tamount if `x' == 0 [aw=ipw1], d
	qreg tamount source if `x' == 0 [pw=ipw1]
	bysort source: sum tamount if `x' == 1 [aw=ipw1], d
	qreg tamount source if `x' == 1 [pw=ipw1]
}

foreach x of var agegroup female degree incomehigh singlehh urban {
	bysort source: sum tamountscan if `x' == 0 [aw=ipw1], d
	qreg tamountscan source if `x' == 0 [pw=ipw1]
	bysort source: sum tamountscan if `x' == 1 [aw=ipw1], d
	qreg tamountscan source if `x' == 1 [pw=ipw1]
}

* Figures 5-6. Distribution of average weekly total expenditure by population subgroup.
// Non-zero and zero expenditure
local catname "agegroup agegroup female female degree degree incomehigh incomehigh singlehh singlehh urban urban"
local catdescription ""Age: 16-50" "Age: 51-82" "Gender: Male" "Gender: Female" "Education: No degree" "Education: Degree" "Income: Below median" "Income: Above median" "Household size: Single" "Household size: Non-single" "Urbanicity: Rural" "Urbanicity: Urban""
local catvalue "0 1 0 1 0 1 0 1 0 1 0 1"
forvalues i = 1/12 {
	local a : word `i' of `catname'
	local b : word `i' of `catdescription'
	local c : word `i' of `catvalue'
	kdensity tamount if `a' == `c' [aw=ipw1], nograph generate(x fx)
	kdensity tamount if `a' == `c' & source == 1 [aw=ipw1], nograph generate(fx0) at(x)
	kdensity tamount if `a' == `c' & source == 0 [aw=ipw1], nograph generate(fx1) at(x)
	kdensity tamountscan if `a' == `c' & source == 0 [aw=ipw1], nograph generate(fx2) at(x)
	lab var fx0 "Living Costs and Food Survey"
	lab var fx1 "Spending Study: Scan + Direct Entry"
	lab var fx2 "Spending Study: Scan Only"
	line fx0 fx1 fx2 x, sort ytitle("Density") xlab(, grid) xtitle("GBP") title("`b'") lwidth(medthick medthick medthick) legend(cols(1) size(*0.8) symysize(*0.8)) ///
		name(Figure4_`a'_`c', replace)
	drop x fx fx0 fx1 fx2
}

local catname "agegroup agegroup female female degree degree incomehigh incomehigh singlehh singlehh urban urban"
local catdescription ""Age: 16-50" "Age: 51-82" "Gender: Male" "Gender: Female" "Education: No degree" "Education: Degree" "Income: Below median" "Income: Above median" "Household size: Single" "Household size: Non-single" "Urbanicity: Rural" "Urbanicity: Urban""
local catvalue "0 1 0 1 0 1 0 1 0 1 0 1"
forvalues i = 1/12 {
	local a : word `i' of `catname'
	local b : word `i' of `catdescription'
	local c : word `i' of `catvalue'
	ksmirnov2 tamount if `a' == `c' [aw=ipw1], by(source)
	ksmirnov2 tamountscan if `a' == `c' [aw=ipw1], by(source)
}

grc1leg2 Figure4_agegroup_0 Figure4_agegroup_1 Figure4_female_0 Figure4_female_1 Figure4_degree_0 Figure4_degree_1, rows(3) ysize(10) xsize(9) scale(0.8) legscale(2) symxsize(10)
graph export "I:\Research\Output quality\Figures\Figure5.svg", as(svg) replace

grc1leg2 Figure4_incomehigh_0 Figure4_incomehigh_1 Figure4_singlehh_0 Figure4_singlehh_1 Figure4_urban_0 Figure4_urban_1, rows(3) ysize(10) xsize(9) scale(0.8) legscale(2) symxsize(10)
graph export "I:\Research\Output quality\Figures\Figure6.svg", as(svg) replace
