* -------------------------------------------------------------------------
* Stata do-file to article
* -------------------------------------------------------------------------
* Wenz, A., Jäckle, A., Burton, J., Couper, M. P., and Read, B. 
* Quality of expenditure data collected with a mobile receipt scanning app
* in a probability household panel
* -------------------------------------------------------------------------

* Set working directory ---------------------------------------------------
sysdir set PLUS "d:\home\user\ado\stbplus"
cd "I:\Research\Output quality\Data"

* Set colour scheme -------------------------------------------------------
set scheme s2mono

* Load data ---------------------------------------------------------------
use "Spending_Study_LCFS_with_weights", clear

* Winsorize outliers ------------------------------------------------------
// Total expenditure
sum tamount if source == 1 [aw=ipw1], d
replace tamount = r(p99) if tamount > r(p99) & source == 1

sum tamount if source == 0 [aw=ipw1], d
replace tamount = r(p99) if tamount > r(p99) & source == 0

sum tamountscan if source == 0 [aw=ipw1], d
replace tamountscan = r(p99) if tamountscan > r(p99) & source == 0

// Category-level expenditure
local catname "food clothes transport child home health social disc holiday gift"
forvalues i = 1/10 {
	local a : word `i' of `catname'
	sum `a'_amount if source == 1 [aw=ipw1], d
	replace `a'_amount = r(p99) if `a'_amount > r(p99) & source == 1
	sum `a'_amount if source == 1 [aw=ipw1], d

	sum `a'_amount if source == 0 [aw=ipw1], d
	replace `a'_amount = r(p99) if `a'_amount > r(p99) & source == 0
	sum `a'_amount if source == 0 [aw=ipw1], d

	sum `a'_scan if source == 0 [aw=ipw1], d
	replace `a'_scan = r(p99) if `a'_scan > r(p99) & source == 0
	sum `a'_scan if source == 0 [aw=ipw1], d
}

* Recode scan variables ---------------------------------------------------
replace tamountscan = tamount if tamountscan == . & source == 1

replace food_scan = food_amount if food_scan == . & source == 1
replace clothes_scan = clothes_amount if clothes_scan == . & source == 1
replace transport_scan = transport_amount if transport_scan == . & source == 1
replace child_scan = child_amount if child_scan == . & source == 1
replace home_scan = home_amount if home_scan == . & source == 1
replace health_scan = health_amount if health_scan == . & source == 1
replace social_scan = social_amount if social_scan == . & source == 1
replace disc_scan = disc_amount if disc_scan == . & source == 1
replace holiday_scan = holiday_amount if holiday_scan == . & source == 1
replace gift_scan = gift_amount if gift_scan == . & source == 1

* Table A5. Average non-zero and zero weekly expenditure.
// Total expenditure
sum tamount if source == 1 [aw=ipw1], d
di r(mean)
sum tamount if source == 0 [aw=ipw1], d
di r(mean)
sum tamountscan if source == 0 [aw=ipw1], d
di r(mean)
reg tamount source [aw=ipw1]
reg tamountscan source [aw=ipw1]

// Category-level expenditure
foreach x of var food_amount clothes_amount transport_amount child_amount home_amount health_amount social_amount disc_amount holiday_amount gift_amount {
	sum `x' if source == 1 [aw=ipw1], d
	di r(mean)
	sum `x' if source == 0 [aw=ipw1], d
	di r(mean)
	reg `x' source [aw=ipw1]
}

foreach x of var food_scan clothes_scan transport_scan child_scan home_scan health_scan social_scan disc_scan holiday_scan gift_scan {
	sum `x' if source == 0 [aw=ipw1], d
	di r(mean)
	reg `x' source [aw=ipw1]
}

* Table A6. Average non-zero weekly expenditure.
// Total expenditure
sum tamount if tamount == 0 & source == 1
di r(N)/2209
sum tamount if tamount == 0 & source == 0
di r(N)/262
sum tamountscan if tamountscan == 0 & source == 0
di r(N)/262

sum tamount if tamount > 0 & source == 1 [aw=ipw1], d
di r(mean)
sum tamount if tamount > 0 & source == 0 [aw=ipw1], d
di r(mean)
sum tamountscan if tamountscan > 0 & source == 0 [aw=ipw1], d
di r(mean)
reg tamount source if tamount > 0 [aw=ipw1]
reg tamountscan source if tamountscan > 0 [aw=ipw1]

// Category-level expenditure
foreach x of var food_amount clothes_amount transport_amount child_amount home_amount health_amount social_amount disc_amount holiday_amount gift_amount {
	sum `x' if `x' == 0 & source == 1 [aw=ipw1]
	di r(N)/2209
	sum `x' if `x' == 0 & source == 0 [aw=ipw1]
	di r(N)/262
}

foreach x of var food_scan clothes_scan transport_scan child_scan home_scan health_scan social_scan disc_scan holiday_scan gift_scan {
	sum `x' if `x' == 0 & source == 0 [aw=ipw1]
	di r(N)/262
}

foreach x of var food_amount clothes_amount transport_amount child_amount home_amount health_amount social_amount disc_amount holiday_amount gift_amount {
	sum `x' if `x' > 0 & source == 1 [aw=ipw1], d
	di r(mean)
	sum `x' if `x' > 0 & source == 0 [aw=ipw1], d
	di r(mean)
	reg `x' source if `x' > 0 [aw=ipw1]
}

foreach x of var food_scan clothes_scan transport_scan child_scan home_scan health_scan social_scan disc_scan holiday_scan gift_scan {
	sum `x' if `x' > 0 & source == 0 [aw=ipw1], d
	di r(mean)
	reg source `x' if `x' > 0 [aw=ipw1]
}

* Table A5. Median non-zero and zero weekly expenditure.
// Total expenditure
sum tamount if source == 1 [aw=ipw1], d
di r(p50)
sum tamount if source == 0 [aw=ipw1], d
di r(p50)
qreg tamount source [pw=ipw1]
sum tamountscan if source == 0 [aw=ipw1], d
di r(p50)
qreg tamountscan source [pw=ipw1]

// Category-level expenditure
foreach x of var food_amount clothes_amount transport_amount child_amount home_amount health_amount social_amount disc_amount holiday_amount gift_amount {
	sum `x' if source == 1 [aw=ipw1], d
	di r(p50)
	sum `x' if source == 0 [aw=ipw1], d
	di r(p50)
	qreg `x' source [pw=ipw1]
}

foreach x of var food_scan clothes_scan transport_scan child_scan home_scan health_scan social_scan disc_scan holiday_scan gift_scan {
	sum `x' if source == 0 [aw=ipw1], d
	di r(p50)
	qreg `x' source [pw=ipw1]
}

* Table A6. Median non-zero weekly expenditure.
// Total expenditure
sum tamount if tamount > 0 & source == 1 [aw=ipw1], d
di r(p50)
sum tamount if tamount > 0 & source == 0 [aw=ipw1], d
di r(p50)
qreg tamount source if tamount > 0 [pw=ipw1]
sum tamountscan if tamountscan > 0 & source == 0 [aw=ipw1], d
di r(p50)
qreg tamountscan source if tamountscan > 0 [pw=ipw1]

// Category-level expenditure
foreach x of var food_amount clothes_amount transport_amount child_amount home_amount health_amount social_amount disc_amount holiday_amount gift_amount {
	sum `x' if `x' > 0 & source == 1 [aw=ipw1], d
	di r(p50)
	sum `x' if `x' > 0 & source == 0 [aw=ipw1], d
	di r(p50)
	qreg `x' source if `x' > 0 [pw=ipw1]
}

foreach x of var food_scan clothes_scan transport_scan child_scan home_scan health_scan social_scan disc_scan holiday_scan gift_scan {
	sum `x' if `x' > 0 & source == 0 [aw=ipw1], d
	di r(p50)
	qreg `x' source if `x' > 0 [pw=ipw1]
}

* Figure 2. Distribution of average weekly total expenditure.
// Non-zero and zero expenditure
kdensity tamount [aw=ipw1], nograph generate(x fx)
kdensity tamount if source == 1 [aw=ipw1], nograph generate(fx0) at(x) 
kdensity tamount if source == 0 [aw=ipw1], nograph generate(fx1) at(x)
kdensity tamountscan if source == 0 [aw=ipw1], nograph generate(fx2) at(x)
lab var fx0 "Living Costs and Food Survey"
lab var fx1 "Spending Study: Scan + Direct Entry"
lab var fx2 "Spending Study: Scan Only"
line fx0 fx1 fx2 x, sort ytitle("Density") xlab(, grid) xtitle("GBP") title("{bf:Non-zero and zero expenditure}" "Total") lwidth(medthick medthick medthick) legend(cols(1) size(tiny) symysize(*0.6)) ///
	name(total_zero, replace)
drop x fx fx0 fx1 fx2

ksmirnov2 tamount [aw=ipw1], by(source)
ksmirnov2 tamountscan [aw=ipw1], by(source)

// Non-zero expenditure
kdensity tamount if tamount > 0 [aw=ipw1], nograph generate(x fx)
kdensity tamount if tamount > 0 & source == 1 [aw=ipw1], nograph generate(fx0) at(x)
kdensity tamount if tamount > 0 & source == 0 [aw=ipw1], nograph generate(fx1) at(x)
kdensity tamountscan if tamountscan > 0 & source == 0 [aw=ipw1], nograph generate(fx2) at(x)
lab var fx0 "Living Costs and Food Survey"
lab var fx1 "Spending Study: Scan + Direct Entry"
lab var fx2 "Spending Study: Scan Only"
line fx0 fx1 fx2 x, sort ytitle("Density") xlab(, grid) xtitle("GBP") title("{bf:Non-zero expenditure}" "Total") lwidth(medthick medthick medthick) legend(cols(1) size(tiny) symysize(*0.6)) ///
	name(total_nonzero, replace)
drop x fx fx0 fx1 fx2

ksmirnov2 tamount if tamount > 0 [aw=ipw1], by(source)
ksmirnov2 tamountscan if tamountscan > 0 [aw=ipw1], by(source)

grc1leg2 total_zero total_nonzero, rows(1) ysize(3) xsize(7) scale(1.5) legscale(2.5) symxsize(6)
graph export "I:\Research\Output quality\Figures\Figure2.svg", as(svg) replace

* Figures 3-4. Distribution of category-level weekly expenditure.
// Non-zero and zero expenditure
local catname "food clothes transport child home health social disc"
local catdescription ""Food and groceries" "Clothes and footwear" "Transport" "Child costs" "Home improvements" "Health" "Socialising and hobbies" "Other goods and services""
forvalues i = 1/8 {
	local a : word `i' of `catname'
	local b : word `i' of `catdescription'
	kdensity `a'_amount [aw=ipw1], nograph generate(x fx)
	kdensity `a'_amount if source == 1 [aw=ipw1], nograph generate(fx0) at(x)
	kdensity `a'_amount if source == 0 [aw=ipw1], nograph generate(fx1) at(x)
	kdensity `a'_scan if source == 0 [aw=ipw1], nograph generate(fx2) at(x)
	lab var fx0 "Living Costs and Food Survey"
	lab var fx1 "Spending Study: Scan + Direct Entry"
	lab var fx2 "Spending Study: Scan Only"
	line fx0 fx1 fx2 x, sort ytitle("Density") xlab(, grid) xtitle("GBP") title("`b'") lwidth(medthick medthick medthick) legend(cols(1) size(tiny) symysize(*0.6)) ///
		name(`a'_zero, replace)
	drop x fx fx0 fx1 fx2
}

local catname "food clothes transport child home health social disc"
local catdescription ""Food and groceries" "Clothes and footwear" "Transport" "Child costs" "Home improvements and household goods" "Health" "Socialising and hobbies" "Other goods and services""
forvalues i = 1/8 {
	local a : word `i' of `catname'
	local b : word `i' of `catdescription'
	ksmirnov2 `a'_amount [aw=ipw1], by(source)
	ksmirnov2 `a'_scan [aw=ipw1], by(source)
}

kdensity transport_amount [aw=ipw1], nograph generate(x fx)
kdensity transport_amount if source == 1 [aw=ipw1], nograph generate(fx0) at(x)
kdensity transport_amount if source == 0 [aw=ipw1], nograph generate(fx1) at(x)
kdensity transport_scan if source == 0 [aw=ipw1], nograph generate(fx2) at(x)
lab var fx0 "Living Costs and Food Survey"
lab var fx1 "Spending Study: Scan + Direct Entry"
lab var fx2 "Spending Study: Scan Only"
line fx0 fx1 fx2 x, sort ytitle("Density") xlab(, grid) xtitle("GBP") title("{bf:Non-zero and zero expenditure}" "Transport") lwidth(medthick medthick medthick) legend(cols(1) size(tiny) symysize(*0.6)) ///
	name(transport_zero, replace)
drop x fx fx0 fx1 fx2

kdensity health_amount [aw=ipw1], nograph generate(x fx)
kdensity health_amount if source == 1 [aw=ipw1], nograph generate(fx0) at(x)
kdensity health_amount if source == 0 [aw=ipw1], nograph generate(fx1) at(x)
kdensity health_scan if source == 0 [aw=ipw1], nograph generate(fx2) at(x)
lab var fx0 "Living Costs and Food Survey"
lab var fx1 "Spending Study: Scan + Direct Entry"
lab var fx2 "Spending Study: Scan Only"
line fx0 fx1 fx2 x, sort ytitle("Density") xlab(, grid) xtitle("GBP") title("{bf:Non-zero and zero expenditure}" "Health") lwidth(medthick medthick medthick) legend(cols(1) size(tiny) symysize(*0.6)) ///
	name(health_zero, replace)
drop x fx fx0 fx1 fx2

// Non-zero expenditure
local catname "food clothes transport child home health social disc"
local catdescription ""Food and groceries" "Clothes and footwear" "Transport" "Child costs" "Home improvements" "Health" "Socialising and hobbies" "Other goods and services""
forvalues i = 1/8 {
	local a : word `i' of `catname'
	local b : word `i' of `catdescription'
	kdensity `a'_amount if `a'_amount > 0 [aw=ipw1], nograph generate(x fx)
	kdensity `a'_amount if `a'_amount > 0 & source == 1 [aw=ipw1], nograph generate(fx0) at(x)
	kdensity `a'_amount if `a'_amount > 0 & source == 0 [aw=ipw1], nograph generate(fx1) at(x)
	kdensity `a'_scan if `a'_scan > 0 & source == 0 [aw=ipw1], nograph generate(fx2) at(x)
	lab var fx0 "Living Costs and Food Survey"
	lab var fx1 "Spending Study: Scan + Direct Entry"
	lab var fx2 "Spending Study: Scan Only"
	line fx0 fx1 fx2 x, sort ytitle("Density") xlab(, grid) xtitle("GBP") title("`b'") lwidth(medthick medthick medthick) legend(cols(1) size(tiny) symysize(*0.6)) ///
		name(`a'_nonzero, replace)
	drop x fx fx0 fx1 fx2
}

local catname "food clothes transport child home health social disc"
local catdescription ""Food and groceries" "Clothes and footwear" "Transport" "Child costs" "Home improvements and household goods" "Health" "Socialising and hobbies" "Other goods and services""
forvalues i = 1/8 {
	local a : word `i' of `catname'
	local b : word `i' of `catdescription'
	ksmirnov2 `a'_amount if `a'_amount > 0 [aw=ipw1], by(source)
	ksmirnov2 `a'_scan if `a'_scan > 0 [aw=ipw1], by(source)
}

kdensity transport_amount if transport_amount > 0 [aw=ipw1], nograph generate(x fx)
kdensity transport_amount if transport_amount > 0 & source == 1 [aw=ipw1], nograph generate(fx0) at(x)
kdensity transport_amount if transport_amount > 0 & source == 0 [aw=ipw1], nograph generate(fx1) at(x)
kdensity transport_scan if transport_scan > 0 & source == 0 [aw=ipw1], nograph generate(fx2) at(x)
lab var fx0 "Living Costs and Food Survey"
lab var fx1 "Spending Study: Scan + Direct Entry"
lab var fx2 "Spending Study: Scan Only"
line fx0 fx1 fx2 x, sort ytitle("Density") xlab(, grid) xtitle("GBP") title("{bf:Non-zero expenditure}" "Transport") lwidth(medthick medthick medthick) legend(cols(1) size(tiny) symysize(*0.6)) ///
	name(transport_nonzero, replace)
drop x fx fx0 fx1 fx2

kdensity health_amount if health_amount > 0 [aw=ipw1], nograph generate(x fx)
kdensity health_amount if health_amount > 0 & source == 1 [aw=ipw1], nograph generate(fx0) at(x)
kdensity health_amount if health_amount > 0 & source == 0 [aw=ipw1], nograph generate(fx1) at(x)
kdensity health_scan if health_scan > 0 & source == 0 [aw=ipw1], nograph generate(fx2) at(x)
lab var fx0 "Living Costs and Food Survey"
lab var fx1 "Spending Study: Scan + Direct Entry"
lab var fx2 "Spending Study: Scan Only"
line fx0 fx1 fx2 x, sort ytitle("Density") xlab(, grid) xtitle("GBP") title("{bf:Non-zero expenditure}" "Health") lwidth(medthick medthick medthick) legend(cols(1) size(tiny) symysize(*0.6)) ///
	name(health_nonzero, replace)
drop x fx fx0 fx1 fx2

grc1leg2 food_zero food_nonzero clothes_zero clothes_nonzero transport_zero transport_nonzero child_zero child_nonzero, rows(4) ysize(10) xsize(9) scale(0.8) legscale(2) symxsize(10)
graph export "I:\Research\Output quality\Figures\Figure3.svg", as(svg) replace

grc1leg2 home_zero home_nonzero health_zero health_nonzero social_zero social_nonzero disc_zero disc_nonzero, rows(4) ysize(10) xsize(9) scale(0.8) legscale(2) symxsize(10)
graph export "I:\Research\Output quality\Figures\Figure4.svg", as(svg) replace
