* -------------------------------------------------------------------------
* Stata do-file to article
* -------------------------------------------------------------------------
* Wenz, A., Jäckle, A., Burton, J., Couper, M. P., and Read, B. 
* Quality of expenditure data collected with a mobile receipt scanning app
* in a probability household panel
* -------------------------------------------------------------------------

* Set working directory ---------------------------------------------------
sysdir set PLUS "d:\home\user\ado\stbplus"
cd "I:\Research\Output quality\Data"

* Set colour scheme -------------------------------------------------------
set scheme s2mono

* Load data ---------------------------------------------------------------
use "Spending_Study_LCFS_with_weights", clear

* Winsorize outliers ------------------------------------------------------
// Total expenditure
sum tamount if source == 1 [aw=ipw1], d
replace tamount = r(p99) if tamount > r(p99) & source == 1

sum tamount if source == 0 [aw=ipw1], d
replace tamount = r(p99) if tamount > r(p99) & source == 0

sum tamountscan if source == 0 [aw=ipw1], d
replace tamountscan = r(p99) if tamountscan > r(p99) & source == 0

// Category-level expenditure
local catname "food clothes transport child home health social disc"
forvalues i = 1/8 {
	local a : word `i' of `catname'
	sum `a'_amount if source == 1 [aw=ipw1], d
	replace `a'_amount = r(p99) if `a'_amount > r(p99) & source == 1
	sum `a'_amount if source == 1 [aw=ipw1], d

	sum `a'_amount if source == 0 [aw=ipw1], d
	replace `a'_amount = r(p99) if `a'_amount > r(p99) & source == 0
	sum `a'_amount if source == 0 [aw=ipw1], d

	sum `a'_scan if source == 0 [aw=ipw1], d
	replace `a'_scan = r(p99) if `a'_scan > r(p99) & source == 0
	sum `a'_scan if source == 0 [aw=ipw1], d
}

* Recode scan variables ---------------------------------------------------
replace tamountscan = tamount if tamountscan == . & source == 1

replace food_scan = food_amount if food_scan == . & source == 1
replace clothes_scan = clothes_amount if clothes_scan == . & source == 1
replace transport_scan = transport_amount if transport_scan == . & source == 1
replace child_scan = child_amount if child_scan == . & source == 1
replace home_scan = home_amount if home_scan == . & source == 1
replace health_scan = health_amount if health_scan == . & source == 1
replace social_scan = social_amount if social_scan == . & source == 1
replace disc_scan = disc_amount if disc_scan == . & source == 1
replace holiday_scan = holiday_amount if holiday_scan == . & source == 1
replace gift_scan = gift_amount if gift_scan == . & source == 1

* Recode respondent characteristics ---------------------------------------
sum age if source == 1, d
gen agegroup = 0 if age < r(p50) 
replace agegroup = 1 if age >= r(p50)
lab var agegroup "Age"
lab def agegroup 0 "16-50" 1 "51-82"
lab val agegroup agegroup

gen incomehigh = 0 if income_cat <= 2
replace incomehigh = 1 if income_cat >= 3
lab var incomehigh "Personal monthly gross income"
lab def incomehigh 0 "Below median" 1 "Above median"
lab val incomehigh incomehigh

gen singlehh = 0 if hhsize > 1
replace singlehh = 1 if hhsize == 1
lab var singlehh "Household size"
lab def singlehh 0 "Single household" 1 "Non-single household"
lab val singlehh singlehh

* Table 2. Mean budget shares.
gen tamount_subset = food_amount + clothes_amount + transport_amount + child_amount + home_amount + health_amount + social_amount + disc_amount
gen tamountscan_subset = food_scan + clothes_scan + transport_scan + child_scan + home_scan + health_scan + social_scan + disc_scan

foreach x of var food_amount clothes_amount transport_amount child_amount home_amount health_amount social_amount disc_amount {
	gen `x'_share = `x' / tamount_subset
	bysort source: sum `x'_share [aw=ipw1]
}

foreach x of var food_scan clothes_scan transport_scan child_scan home_scan health_scan social_scan disc_scan {
	gen `x'_share = `x' / tamountscan_subset
	sum `x'_share [aw=ipw1] if source == 0
}
