* -------------------------------------------------------------------------
* Stata do-file to article
* -------------------------------------------------------------------------
* Wenz, A., Jäckle, A., Burton, J., Couper, M. P., and Read, B. 
* Quality of expenditure data collected with a mobile receipt scanning app
* in a probability household panel
* -------------------------------------------------------------------------

* Set working directory ---------------------------------------------------
sysdir set PLUS "d:\home\user\ado\stbplus"
cd "I:\Research\Output quality\Data"

* Load Spending Study App paradata  ---------------------------------------
use "UKDA-8749-stata\stata\stata13\apd.dta", clear
keep pidp i_hidp entryid activitytype nr_amount-nr_other starttime_apd endtime_apd

* Recode Spending Study data ----------------------------------------------
// Generate day of study when submission took place
local daystrt=tc(21oct2016_00:00:00.000)
local dayend=`daystrt'+86399999	
gen day_dv=.

foreach day of numlist 1/76 {
	replace day_dv=`day' if endtime_apd>=`daystrt' & endtime_apd<=`dayend'
	local daystrt=`daystrt'+86400000	
	local dayend=`dayend'+86400000
}
la var day_dv "Day of study when submission took place"

// Generate week of study when submission took place
gen week_dv=1 if day_dv<11
local wkstrt=11
local wkend=17
foreach week of numlist 2/11{
	replace week_dv=`week' if day_dv>=`wkstrt' & day_dv<=`wkend'
	local wkstrt=`wkstrt'+7
	local wkend=`wkend'+7
}
la var week_dv "Week of study when submission took place"

order day_dv week_dv, after(activitytype)
drop starttime_apd endtime_apd

// Merge Spending Study Shopping receipt data
merge 1:1 entryid using "UKDA-8749-stata\stata\stata13\rct.dta"
tab _merge activitytype, mis // all 5,472 scanned receipts were matched
drop _merge

// Generate total amount per submission
gen amount = .
replace amount = totalspend if activitytype == 1 & totalspend != -999 & currency != 2 // if scanned receipt, total spending not missing, and currency not EUR
replace amount = totalspend * 0.89 if totalspend != -999 & currency == 2 // if scanned receipt, total spending not missing, and currency EUR: convert EUR to GBP, exchange rate: 16 January 2018
replace amount = nr_amount if activitytype == 2 & nr_amount != -9 // if direct entry and direct entry not missing
replace amount = 0 if activitytype == 3 // if nothing bought
label var amount "Total amount: scan + direct entry + nothing bought"

// Generate total amount per submission: scanned receipts only
gen amountscan = amount
replace amountscan = . if activitytype == 2 // if direct entry
label var amountscan "Total amount: scan + nothing bought"

// Generate # direct entry categories selected
egen countdirect = anycount(nr_food-nr_other), values(1)
replace countdirect = . if activitytype != 2
replace countdirect = . if nr_amount == -9

// Recode category-level amount for scanned receipts
gen food_scan = food if food != -9
gen clothes_scan = clothes if clothes != -9
gen transport_scan = transport if transport != -9
gen child_scan = child if child != -9
gen home_scan = home if home != -9
gen health_scan = health if health != -9
gen social_scan = social if social != -9
gen disc_scan = miscgoods if miscgoods != -9 
gen holiday_scan = holidays if holidays != -9
gen gift_scan = gifts if gifts != -9
gen other_scan = other if other != -9
gen alcoholtobacco_scan = alcoholtobacco if alcoholtobacco != -9

replace social_scan = social_scan + alcoholtobacco_scan // recode alcohol and tobacco as socialising and hobbies

foreach x of var food_scan-other_scan {
	replace `x' = 0 if `x' == . & activitytype == 1 & totalspend != -999
}
drop food clothes transport child home health social miscgoods holidays gifts other alcoholtobacco

// Generate category-level amount for direct entries
// -- Single category selected
gen food_direct = nr_amount if nr_food == 1 & nr_amount != -9 & countdirect == 1
gen clothes_direct = nr_amount if nr_clothes == 1 & nr_amount != -9 & countdirect == 1
gen transport_direct = nr_amount if nr_transport == 1 & nr_amount != -9 & countdirect == 1
gen child_direct = nr_amount if nr_child == 1 & nr_amount != -9 & countdirect == 1
gen home_direct = nr_amount if nr_home == 1 & nr_amount != -9 & countdirect == 1
gen health_direct = nr_amount if nr_health == 1 & nr_amount != -9 & countdirect == 1
gen social_direct = nr_amount if nr_social == 1 & nr_amount != -9 & countdirect == 1
gen disc_direct = nr_amount if nr_disc == 1 & nr_amount != -9 & countdirect == 1
gen holiday_direct = nr_amount if nr_holiday == 1 & nr_amount != -9 & countdirect == 1
gen gift_direct = nr_amount if nr_gift == 1 & nr_amount != -9 & countdirect == 1
gen other_direct = nr_amount if nr_other == 1 & nr_amount != -9 & countdirect == 1

foreach x of var food_direct-other_direct {
	replace `x' = 0 if `x' == . & activitytype == 2 & countdirect == 1
}

// -- Multiple categories selected: divide by ratio of spending reported in scan + direct entry
// ---- Generate category-level amount from 1) scanned receipts and 2) direct entry with single category
egen food = rowtotal(food_scan food_direct), missing
egen clothes = rowtotal(clothes_scan clothes_direct), missing
egen transport = rowtotal(transport_scan transport_direct), missing
egen child = rowtotal(child_scan child_direct), missing
egen home = rowtotal(home_scan home_direct), missing
egen health = rowtotal(health_scan health_direct), missing
egen social = rowtotal(social_scan social_direct), missing 
egen disc = rowtotal(disc_scan disc_direct), missing
egen holiday = rowtotal(holiday_scan holiday_direct), missing
egen gift = rowtotal(gift_scan gift_direct), missing
egen other = rowtotal(other_scan other_direct), missing

foreach x of var food-other {
	replace `x' = 0 if `x' == . & activitytype == 3 // if nothing bought
}

// ---- Examine category selection patterns 
mvdecode nr_food-nr_other if countdirect > 1 & countdirect != ., mv(1)
mvpatterns nr_food-nr_other if countdirect == 4, notable nodrop
mvpatterns nr_food-nr_other if countdirect == 3, notable nodrop
mvpatterns nr_food-nr_other if countdirect == 2, notable nodrop
mvencode nr_food-nr_other if countdirect > 1 & countdirect != ., mv(1)

// ---- Generate ratio of category-level amounts
// 4 categories selected
// Categories: 1-2-7-10
local total1 "total_1_2_7_10"
gen `total1' = food + clothes + social + gift
foreach x of var food clothes social gift {
	ratio `x'/`total1'
	gen `x'_`total1' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total1' if nr_food == 1 & nr_clothes == 1 & nr_social == 1 & nr_gift == 1 & nr_amount != -9 & countdirect == 4
}

// 3 categories selected
// Categories: 1-2-7
local total2 "total_1_2_7"
gen `total2' = food + clothes + social
foreach x of var food clothes social {
	ratio `x'/`total2'
	gen `x'_`total2' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total2' if nr_food == 1 & nr_clothes == 1 & nr_social == 1 & nr_amount != -9 & countdirect == 3
}

// Categories: 1-2-8
local total3 "total_1_2_8"
gen `total3' = food + clothes + disc
foreach x of var food clothes disc {
	ratio `x'/`total3'
	gen `x'_`total3' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total3' if nr_food == 1 & nr_clothes == 1 & nr_disc == 1 & nr_amount != -9 & countdirect == 3
}

// Categories: 1-3-10
local total4 "total_1_3_10"
gen `total4' = food + transport + gift
foreach x of var food transport gift {
	ratio `x'/`total4'
	gen `x'_`total4' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total4' if nr_food == 1 & nr_transport == 1 & nr_gift == 1 & nr_amount != -9 & countdirect == 3
}

// Categories: 1-6-10
local total5 "total_1_6_10"
gen `total5' = food + health + gift
foreach x of var food health gift {
	ratio `x'/`total5'
	gen `x'_`total5' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total5' if nr_food == 1 & nr_health == 1 & nr_gift == 1 & nr_amount != -9 & countdirect == 3
}

// Categories: 2-3-5
local total6 "total_2_3_5"
gen `total6' = clothes + transport + home
foreach x of var clothes transport home {
	ratio `x'/`total6'
	gen `x'_`total6' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total6' if nr_clothes == 1 & nr_transport == 1 & nr_home == 1 & nr_amount != -9 & countdirect == 3
}

// Categories: 2-3-8
local total7 "total_2_3_8"
gen `total7' = clothes + transport + disc
foreach x of var clothes transport disc {
	ratio `x'/`total7'
	gen `x'_`total7' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total7' if nr_clothes == 1 & nr_transport == 1 & nr_disc == 1 & nr_amount != -9 & countdirect == 3
}

// Categories: 2-5-7
local total8 "total_2_5_7"
gen `total8' = clothes + home + social
foreach x of var clothes home social {
	ratio `x'/`total8'
	gen `x'_`total8' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total8' if nr_clothes == 1 & nr_home == 1 & nr_social == 1 & nr_amount != -9 & countdirect == 3
}

// Categories: 2-7-8
local total9 "total_2_7_8"
gen `total9' = clothes + social + disc
foreach x of var clothes social disc {
	ratio `x'/`total9'
	gen `x'_`total9' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total9' if nr_clothes == 1 & nr_social == 1 & nr_disc == 1 & nr_amount != -9 & countdirect == 3
}

// Categories: 2-7-11
local total10 "total_2_7_11"
gen `total10' = clothes + social + other
foreach x of var clothes social other {
	ratio `x'/`total10'
	gen `x'_`total10' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total10' if nr_clothes == 1 & nr_social == 1 & nr_other == 1 & nr_amount != -9 & countdirect == 3
}

// 2 categories selected
// Categories: 1-2
local total11 "total_1_2"
gen `total11' = food + clothes
foreach x of var food clothes {
	ratio `x'/`total11'
	gen `x'_`total11' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total11' if nr_food == 1 & nr_clothes == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 1-3
local total12 "total_1_3"
gen `total12' = food + transport
foreach x of var food transport {
	ratio `x'/`total12'
	gen `x'_`total12' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total12' if nr_food == 1 & nr_transport == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 1-4
local total13 "total_1_4"
gen `total13' = food + child
foreach x of var food child {
	ratio `x'/`total13'
	gen `x'_`total13' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total13' if nr_food == 1 & nr_child == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 1-5
local total14 "total_1_5"
gen `total14' = food + home
foreach x of var food home {
	ratio `x'/`total14'
	gen `x'_`total14' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total14' if nr_food == 1 & nr_home == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 1-6
local total15 "total_1_6"
gen `total15' = food + health
foreach x of var food health {
	ratio `x'/`total15'
	gen `x'_`total15' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total15' if nr_food == 1 & nr_health == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 1-7
local total16 "total_1_7"
gen `total16' = food + social
foreach x of var food social {
	ratio `x'/`total16'
	gen `x'_`total16' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total16' if nr_food == 1 & nr_social == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 1-8
local total17 "total_1_8"
gen `total17' = food + disc
foreach x of var food disc {
	ratio `x'/`total17'
	gen `x'_`total17' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total17' if nr_food == 1 & nr_disc == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 1-10
local total18 "total_1_10"
gen `total18' = food + gift
foreach x of var food gift {
	ratio `x'/`total18'
	gen `x'_`total18' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total18' if nr_food == 1 & nr_gift == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 1-11
local total19 "total_1_11"
gen `total19' = food + other
foreach x of var food other {
	ratio `x'/`total19'
	gen `x'_`total19' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total19' if nr_food == 1 & nr_other == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 2-5
local total20 "total_2_5"
gen `total20' = clothes + home
foreach x of var clothes home {
	ratio `x'/`total20'
	gen `x'_`total20' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total20' if nr_clothes == 1 & nr_home == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 3-6
local total21 "total_3_6"
gen `total21' = transport + health
foreach x of var transport health {
	ratio `x'/`total21'
	gen `x'_`total21' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total21' if nr_transport == 1 & nr_health == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 3-7
local total22 "total_3_7"
gen `total22' = transport + social
foreach x of var transport social {
	ratio `x'/`total22'
	gen `x'_`total22' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total22' if nr_transport == 1 & nr_social == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 3-8
local total23 "total_3_8"
gen `total23' = transport + disc
foreach x of var transport disc {
	ratio `x'/`total23'
	gen `x'_`total23' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total23' if nr_transport == 1 & nr_disc == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 3-10
local total24 "total_3_10"
gen `total24' = transport + gift
foreach x of var transport gift {
	ratio `x'/`total24'
	gen `x'_`total24' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total24' if nr_transport == 1 & nr_gift == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 4-7
local total25 "total_4_7"
gen `total25' = child + social
foreach x of var child social {
	ratio `x'/`total25'
	gen `x'_`total25' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total25' if nr_child == 1 & nr_social == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 5-7
local total26 "total_5_7"
gen `total26' = home + social
foreach x of var home social {
	ratio `x'/`total26'
	gen `x'_`total26' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total26' if nr_home == 1 & nr_social == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 5-8
local total27 "total_5_8"
gen `total27' = home + disc
foreach x of var home disc {
	ratio `x'/`total27'
	gen `x'_`total27' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total27' if nr_home == 1 & nr_disc == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 5-11
local total28 "total_5_11"
gen `total28' = home + other
foreach x of var home other {
	ratio `x'/`total28'
	gen `x'_`total28' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total28' if nr_home == 1 & nr_other == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 6-8
local total29 "total_6_8"
gen `total29' = health + disc
foreach x of var health disc {
	ratio `x'/`total29'
	gen `x'_`total29' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total29' if nr_health == 1 & nr_disc == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 7-8
local total30 "total_7_8"
gen `total30' = social + disc
foreach x of var social disc {
	ratio `x'/`total30'
	gen `x'_`total30' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total30' if nr_social == 1 & nr_disc == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 7-9
local total31 "total_7_9"
gen `total31' = social + holiday
foreach x of var social holiday {
	ratio `x'/`total31'
	gen `x'_`total31' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total31' if nr_social == 1 & nr_holiday == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 7-10
local total32 "total_7_10"
gen `total32' = social + gift
foreach x of var social gift {
	ratio `x'/`total32'
	gen `x'_`total32' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total32' if nr_social == 1 & nr_gift == 1 & nr_amount != -9 & countdirect == 2
}

// Categories: 8-10
local total33 "total_8_10"
gen `total33' = disc + gift
foreach x of var disc gift {
	ratio `x'/`total33'
	gen `x'_`total33' = _b[_ratio_1]
	replace `x'_direct = nr_amount * `x'_`total33' if nr_disc == 1 & nr_gift == 1 & nr_amount != -9 & countdirect == 2
}

foreach x of var food_direct-other_direct {
	replace `x' = 0 if `x' == . & activitytype == 2 & countdirect > 1
}

// Generate category-level amount from 1) scanned receipts and 2) direct entry
egen food_amount = rowtotal(food_scan food_direct), missing
egen clothes_amount = rowtotal(clothes_scan clothes_direct), missing
egen transport_amount = rowtotal(transport_scan transport_direct), missing
egen child_amount = rowtotal(child_scan child_direct), missing
egen home_amount = rowtotal(home_scan home_direct), missing
egen health_amount = rowtotal(health_scan health_direct), missing
egen social_amount = rowtotal(social_scan social_direct), missing 
egen disc_amount = rowtotal(disc_scan disc_direct), missing
egen holiday_amount = rowtotal(holiday_scan holiday_direct), missing
egen gift_amount = rowtotal(gift_scan gift_direct), missing
egen other_amount = rowtotal(other_scan other_direct), missing

foreach x of var food_amount-other_amount {
	replace `x' = 0 if `x' == . & activitytype == 3 // if nothing bought
}

// Generate total amount and category-level amount by week
foreach x of var amount amountscan food_amount-other_amount food_scan-other_scan {
	bysort pidp week_dv: egen m`x' = total(`x'), missing
}
bysort pidp week_dv: gen seq = _n
keep if seq == 1 // keep one row per person-week
drop seq
keep pidp i_hidp week_dv mamount-mother_scan // keep variables on week level

// Keep first two weeks per person
bysort pidp: gen seq = _n
keep if seq == 1 | seq == 2

// Generate average weekly spending per person (average of two weeks)
foreach x of var mamount-mother_scan {
	bysort pidp: egen w`x' = mean(`x') 
}
keep if seq == 1 // keep one row per person
drop seq
drop week_dv mamount-mother_scan

// Rename variables
rename wm* *
rename amount tamount
rename amountscan tamountscan

// Label variables
label var tamount "Total amount: scan + direct entry + nothing bought"
label var tamountscan "Total amount: scan + nothing bought"

la var food_amount "Food and groceries"
la var clothes_amount "Clothes and footwear"
la var transport_amount "Transport costs"
la var child_amount "Child costs"
la var home_amount "Home improvements and household goods"
la var health_amount "Health expenses"
la var social_amount "Socialising and hobbies"
la var disc_amount "Other goods and services"
la var holiday_amount "Holidays"
la var gift_amount "Giving money or gifts to others"
la var other_amount "Other"

la var food_scan "Food and groceries: scanned receipt only"
la var clothes_scan "Clothes and footwear: scanned receipt only"
la var transport_scan "Transport costs: scanned receipt only"
la var child_scan "Child costs: scanned receipt only"
la var home_scan "Home improvements and household goods: scanned receipt only"
la var health_scan "Health expenses: scanned receipt only"
la var social_scan "Socialising and hobbies: scanned receipt only"
la var disc_scan "Other goods and services: scanned receipt only"
la var holiday_scan "Holidays: scanned receipt only"
la var gift_scan "Giving money or gifts to others: scanned receipt only"
la var other_scan "Other: scanned receipt only"

// Drop participants who did not scan/did not report any purchases (only selected nothing bought) (11 participants dropped out)
drop if tamount == 0

// Recode missing values in scan variables as zero
recode _all (missing = 0)

// Merge respondent characteristics
merge 1:1 pidp i_hidp using "UKDA-6849-stata13_se\stata13_se\i_indresp_ip.dta", keepusing(i_pdvage i_sex i_hiqual_dv i_jbstat i_fimngrs_dv i_urban_dv i_gor_dv)
tab _merge // respondent characteristics missing for 2 participants 
drop if _merge == 2
drop _merge

// Impute respondent characteristics from previous waves
replace i_pdvage = 58 if pidp == 1722242886 // age at wave 8: 57
replace i_pdvage = 18 if pidp == 1701495335 // age at wave 8: 17
replace i_pdvage = 35 if pidp == 1715823692 & i_pdvage == -7 // age at wave 8: 34

replace i_sex = 2 if pidp == 1722242886 // wave 8
replace i_sex = 2 if pidp == 1701495335 // wave 8

replace i_hiqual_dv = 2 if pidp == 1722242886 // wave 8
replace i_hiqual_dv = 4 if pidp == 1701495335 // wave 8
replace i_hiqual_dv = . if i_hiqual_dv < 0

replace i_jbstat = 2 if pidp == 1722242886 // wave 8
replace i_jbstat = 7 if pidp == 1701495335 // wave 8

replace i_fimngrs_dv = 15000 if pidp == 1722242886 // wave 8
replace i_fimngrs_dv = 269.322 if pidp == 1701495335 // wave 8
replace i_fimngrs_dv = . if i_fimngrs_dv < 0

replace i_urban_dv = 1 if pidp == 1722242886 // wave 8
replace i_urban_dv = 1 if pidp == 1701495335 // wave 8
replace i_urban_dv = 2 if pidp == 1700585487 // wave 5
replace i_urban_dv = 2 if pidp == 1700585491 // wave 5

replace i_gor_dv = 8 if pidp == 1700585487 // Wave 5
replace i_gor_dv = 8 if pidp == 1700585491 // Wave 5
replace i_gor_dv = 8 if pidp == 1701495335 // Wave 8
replace i_gor_dv = 4 if pidp == 1722242886 // Wave 8

// Impute respondent characteristics with median
tabstat i_hiqual_dv i_fimngrs_dv, stats(median)

replace i_hiqual_dv = 3 if pidp == 1702196488
replace i_hiqual_dv = 3 if pidp == 1703883562
replace i_fimngrs_dv = 1554.083 if pidp == 1715823692

// Recode respondent characteristics
gen female = 1 if i_sex == 2
replace female = 0 if i_sex == 1
label var female "Female"
la def femalelb 1 "Female" 0 "Male", replace
la val female femalelb
drop i_sex

gen age = i_pdvage
label var age "Age"
drop i_pdvage

gen age2 = age^2
label var age2 "Age-squared"

gen agegp = 1 if age >= 16 & age <= 25
replace agegp = 2 if age >= 26 & age <= 35
replace agegp = 3 if age >= 36 & age <= 45
replace agegp = 4 if age >= 46 & age <= 55
replace agegp = 5 if age >= 56 & age <= 65
replace agegp = 6 if age >= 66 & age <= 75
replace agegp = 7 if age >= 76
lab var agegp "Age: categories"
lab def agegpl 1 "16to25" 2 "26to35" 3 "36to45" 4 "46to55" 5 "56to65" 6 "66to75" 7 "76andolder"
lab val agegp agegpl

gen empl = 1 if i_jbstat == 1 | i_jbstat == 2
replace empl = 0 if i_jbstat > 2
label var empl "Employed"
label def empllb 1 "Employed" 0 "Not employed", replace
label val empl empllb
drop i_jbstat

gen degree = 1 if i_hiqual_dv == 1 | i_hiqual_dv == 2
replace degree = 0 if i_hiqual_dv >= 3 & i_hiqual_dv <= 9
label var degree "Has degree"
label def degreelb 1 "Has degree" 0 "Has no degree"
label val degree degreelb
drop i_hiqual_dv

gen income = i_fimngrs_dv
label var income "Personal monthly gross income"
drop i_fimngrs_dv

gen urban = 1 if i_urban_dv == 1
replace urban = 0 if i_urban_dv == 2
label var urban "Urban or rural area"
la def urbanlb 1 "Urban" 0 "Rural", replace
la val urban urbanlb
drop i_urban_dv

rename i_gor_dv region  

gen london = 1 if region == 7 | region == 8
replace london = 0 if region != 7 & region != 8
label var london "London/South East"
la def london 1 "London/South East" 0 "Not London/South East"
la val london london

// Merge household characteristics
merge m:1 i_hidp using "UKDA-6849-stata13_se\stata13_se\i_hhresp_ip.dta", keepusing(i_hsownd i_hhsize i_nkids_dv i_hhpc1 i_hhpc2 i_hhpc3 i_hhpc4 i_hhpc5 i_hhpc96)
tab _merge // household characteristics missing for 2 participants
drop if _merge == 2
drop _merge

// Impute household characteristics from previous waves (based on pidp)
replace i_hsownd = 4 if pidp == 1723222126 // wave 5
replace i_hhsize = 6 if pidp == 1723222126 // wave 5
replace i_nkids_dv = 1 if pidp == 1723222126 // wave 5
replace i_hhpc1 = 1 if pidp == 1723222126 // wave 5

// Impute household characteristics with median
tabstat i_hsownd i_hhsize i_nkids_dv i_hhpc1 i_hhpc2 i_hhpc3 i_hhpc4 i_hhpc5 i_hhpc96, stats(median)

replace i_hsownd = 2 if pidp == 1716449372
replace i_hhsize = 3 if pidp == 1716449372
replace i_nkids_dv = 0 if pidp == 1716449372
replace i_hhpc1 = 0 if pidp == 1716449372
replace i_hhpc2 = 1 if pidp == 1716449372
replace i_hhpc3 = 0 if pidp == 1716449372
replace i_hhpc4 = 1 if pidp == 1716449372
replace i_hhpc5 = 0 if pidp == 1716449372
replace i_hhpc96 = 0 if pidp == 1716449372

// Recode household characteristics
gen tenure = 1 if i_hsownd == 1 | i_hsownd == 2 | i_hsownd == 3
replace tenure = 0 if i_hsownd == 4 | i_hsownd == 5
label var tenure "Housing tenure"
label def tenurelb 1 "House owned" 0 "House not owned", replace
label val tenure tenurelb
drop i_hsownd

rename i_hhsize hhsize
label var hhsize "Household size"

rename i_nkids_dv nkids
label var nkids "Number of children in household"

gen pc = 1 if i_hhpc1 == 1 | i_hhpc2 == 1 | i_hhpc3 == 1 | i_hhpc4 == 1 | i_hhpc5 == 1
replace pc = 0 if i_hhpc96 == 1
label var pc "Computer in household"
label def yesnolb 1 "Yes" 0 "No", replace
label val pc yesnolb
drop i_hhpc1 i_hhpc2 i_hhpc3 i_hhpc4 i_hhpc5 i_hhpc96

gen femaleXhhsize = female * hhsize
label var femaleXhhsize "Female X Household size"

// Generate indicator for data source
gen source = 0
la var source "Data source"
la def sourcelb 0 "Spending Study" 1 "Living Costs and Food Survey", replace
la val source sourcelb

// Save data
save "Spending_Study", replace
