/*====================================================================
cr_resp_burden : CREATES RESPONDENT BURDEN PAPER ANALYSIS DATASETS

Project:       Respondent Burden Paper
Author:        Brendan Read
Organisation:  ISER, University of Essex
----------------------------------------------------------------------
Creation Date:    2016/10/19
Modification Date: 2018/12/12
Do-file version:    09

Input:

Understanding Society Innovation Panel 9 (UK Data Archive SN6849)
  i_indsamp_ip.dta - Sample members file
  i_indresp_ip.dta - Individual adult survey response file
  i_hhresp_ip.dta - Household survey response file

Understanding Society Spending Study 1 (UK Data Archive SN8348)
  sct.dta - Sample characteristics data
  eop.dta -  End of project questionnaire responses
  eow.dta - End of week questionnaire responses
  apd.dta - Spending Study App Paradata file
  
Understanding Society Mainstage Wave 7 (UK Data Archive SN6614)
  g_hhresp.dta - Household survey response file
  
Output:

  RQ1.dta - Analytical data set for RQ1
  RQ2_sub.dta - Analytical data set for RQ2 subjective analyses
  RQ2_obj.dta - Analytical data set for RQ2 objective analyses
  RQ3.dta - Analytical data set for RQ3
  RQ4_sub.dta - Analytical data set for RQ4 subjective analyses
  RQ4_obj.dta - Analytical data set for RQ4 objective analyses

====================================================================*/

*Set-up

local data "[data directory]"  //Local macro for the directory containing the data
cd `data'

/* RQ1 */

*Identify analytical sample from IP9 sample

use pidp i_ivfio i_finloc using "`data'/i_indsamp_ip.dta", clear
drop if i_finloc==0 //Removes original addresses for movers
merge 1:1 pidp using "`data'/sct.dta", keepusing(pidp) 
keep if _merge==3 //Retain the issued Spending Study sample (2432)
drop _merge
keep if i_ivfio == 1  //Retain respondents who gave full interviews at IP9 (2114)
save "`data'\RQ1.dta", replace
  
*Merge in other IDs to allow merging with apd, eop and reg data sets

use pidp hidp appid anon_id using "`data'\sct.dta", clear
merge 1:1 pidp using "`data'\RQ1.dta"
keep if _merge == 3 //Retain only matched cases - respondents who gave full interviews at IP9 (2114)
drop _merge
save "`data'\RQ1.dta", replace

*Restrict sample to Spending Study participants

use "`data'\apd.dta", clear
drop if activitystatus == 0 // Incomplete app uses

gen duration = endtime - starttime //Duration derived from start time and end time paradata
replace duration = duration / 1000

* Calculate outliers in terms of duration

gen ln_duration = ln(duration)
egen jnk_iqr = iqr(ln_duration)
quietly summarize ln_duration, detail
gen jnk_Q1 = r(p25)
gen jnk_Q3 = r(p75)
gen jnk_eq1 = 1.5*jnk_iqr
medcouple ln_duration
gen jnk_eq2 = -3 * e(mc)
gen jnk_eq3 = 4 * e(mc)
gen jnk_eq4 = exp(jnk_eq2)
gen jnk_eq5 = exp(jnk_eq3)
gen jnk_eq6 = jnk_eq1 * jnk_eq4
gen jnk_eq7 = jnk_eq1 * jnk_eq5 
gen jnk_lb = jnk_Q1 - jnk_eq6
gen jnk_ub = jnk_Q3 + jnk_eq7

summarize duration
summarize duration if ln_duration > jnk_lb & ln_duration < jnk_ub
keep if ln_duration > jnk_lb & ln_duration < jnk_ub

* Derive respondent level variables 

bysort pidp: egen avg_duration=mean(duration) // Individual level average duration across all app uses
bysort pidp: egen appuses=count(pidp) // Individual level number of app uses completed
bysort pidp: egen tot_duration = total(duration) // Individual level total duration of all app uses
bysort pidp: egen device = mode(devicetype)

label var avg_duration "Average duration across all app uses"
label var appuses "Number of app uses completed"
label var tot_duration "Total duration of all app uses"
label var device "Type of device most used"

keep pidp appuses avg_duration tot_duration device
bysort pidp: keep if _n == 1 //Keep one observation per individual

merge 1:1 pidp using "`data'\RQ1.dta"
keep if _merge ==3 //Retain only sample members who participated in the Spending Study (267)
drop _merge

 *Merge in eop data

merge 1:1 anon_id using "`data'\eop.dta", keepusing(eop_likely eop_effort eop_interest mdifficulty)
keep if _merge == 3 //Keep only participants who completed the End of Project survey (238)
keep pidp appuses avg_duration tot_duration eop_likely eop_effort eop_interest mdifficulty device

drop if eop_likely == -77 | eop_effort == -77 | /// Retain only participants who received the subjective burden questions (224)
eop_interest == -77 | mdifficulty == -77  

drop if eop_likely == -1 | eop_effort == -1 | /// Retain only participants who answered all subjective burden questions (223)
eop_interest == -1 | mdifficulty == -1

recode eop_likely (2/4=2), gen(two_likely)
recode mdifficulty (2/4=2), gen(two_difficulty)
recode eop_effort (2/3=2), gen(two_effort)
recode eop_interest (2/3=2), gen(two_interest)

foreach var of varlist two_likely two_difficulty two_effort two_interest{
  replace `var' = `var' - 1
}

*Save RQ1 dataset

save "`data'\RQ1.dta", replace

/* RQ2 */

**SUBJECTIVE BURDEN
  
* Load and sort end of week data
  
use pidp kantarweek wkdifficulty using "`data'\eow.dta", clear
sort pidp kantarweek
keep if kantarweek <5 & kantarweek > 0
replace wkdifficulty = 0 if wkdifficulty == -77
reshape wide wkdifficulty, i(pidp) j(kantarweek)

*Merge in RQ2 data to get analytical sample  
  
merge 1:1 pidp using "`data'\RQ1.dta", keepusing(pidp)
drop if _merge == 1 // Retain analytical sample (223)
drop _merge

*Replace missing values with zero  
  
foreach var of varlist wkdifficulty*{
  replace `var' = 0 if `var' == .
}

*Generate order variables for arranging sequence plot

gen order = wkdifficulty1 * 10 + wkdifficulty4
replace order = 45 - order
la var order "Ordering variable for sequence analysis graph"

*Reshape data

reshape long wkdifficulty, i(pidp) j(week)

*Prepare reversed version for sequence plot (increased burden is high)

gen revdiff = 5 - wkdifficulty
la def diff 1"Very easy" 2"Somewhat easy" 3"Somewhat difficult" 4"Very difficult" 5"Missing"
la val revdiff diff
la var revdif "Difficulty reversed for graph"

*Merge in PSU

merge m:1 pidp using "`data'\i_indresp_ip.dta", keepusing(i_psu)
keep if _merge==3 // Retain analytical sample (223)
  
*Save RQ2 subjective dataset

save "`data'\RQ2_sub.dta", replace
  
**OBJECTIVE BURDEN

*Load app paradata

use "`data'\apd.dta", clear
drop if activitystatus == 0
  
*Merge with RQ2 data to get analytical sample

merge m:1 pidp using "`data'\RQ1.dta"
keep if _merge ==3 // Retain analytical sample (223)
  
*Generate duration  
gen duration = endtime - starttime
replace duration = duration / 1000
label var duration "Duration of app use"

* Calculate outliers in terms of duration

gen ln_duration = ln(duration)
egen jnk_iqr = iqr(ln_duration)
quietly summarize ln_duration, detail
gen jnk_Q1 = r(p25)
gen jnk_Q3 = r(p75)
gen jnk_eq1 = 1.5*jnk_iqr
medcouple ln_duration
gen jnk_eq2 = -3 * e(mc)
gen jnk_eq3 = 4 * e(mc)
gen jnk_eq4 = exp(jnk_eq2)
gen jnk_eq5 = exp(jnk_eq3)
gen jnk_eq6 = jnk_eq1 * jnk_eq4
gen jnk_eq7 = jnk_eq1 * jnk_eq5 
gen jnk_lb = jnk_Q1 - jnk_eq6
gen jnk_ub = jnk_Q3 + jnk_eq7

summarize duration
summarize duration if ln_duration > jnk_lb & ln_duration < jnk_ub
keep if ln_duration > jnk_lb & ln_duration < jnk_ub

*Generate app use count and count split by activity type

sort pidp endtime
by pidp: gen appuse_no=_n
la var appuse_no "Count of app uses within individual"
sort pidp activitytype endtime
by pidp activitytype: gen activity_no=_n
la var activity_no "By type: count of app uses within individual"
 
keep pidp duration appuse_no activity_no activitytype
  
*Merge in PSU

merge m:1 pidp using "`data'\i_indresp_ip.dta", keepusing(i_psu)
keep if _merge==3 // Retain app uses by analytical sample members (10,179)
drop _merge

*Save RQ3 objective dataset
  
save "`data'\RQ2_obj.dta", replace

/* RQ3 */

*Load app paradata
use "`data'\apd.dta", clear
drop if activitystatus == 0 //Remove incomplete app uses
  
*Merge with RQ2 data to get analytical sample
merge m:1 pidp using "`data'\RQ1.dta"
keep if _merge ==3 //Retain analytical sample (223)
  
*Generate duration  
gen duration = endtime - starttime
replace duration = duration / 1000

* Calculate outliers in terms of duration

gen ln_duration = ln(duration)
egen jnk_iqr = iqr(ln_duration)
quietly summarize ln_duration, detail
gen jnk_Q1 = r(p25)
gen jnk_Q3 = r(p75)
gen jnk_eq1 = 1.5*jnk_iqr
medcouple ln_duration
gen jnk_eq2 = -3 * e(mc)
gen jnk_eq3 = 4 * e(mc)
gen jnk_eq4 = exp(jnk_eq2)
gen jnk_eq5 = exp(jnk_eq3)
gen jnk_eq6 = jnk_eq1 * jnk_eq4
gen jnk_eq7 = jnk_eq1 * jnk_eq5 
gen jnk_lb = jnk_Q1 - jnk_eq6
gen jnk_ub = jnk_Q3 + jnk_eq7

summarize duration
summarize duration if ln_duration > jnk_lb & ln_duration < jnk_ub
keep if ln_duration > jnk_lb & ln_duration < jnk_ub

*Drop unnecessary variables
keep pidp endtime activitytype duration

*Generate day of scan
local daystrt=tc(21oct2016_00:00:00.000)
local dayend=`daystrt'+86399999	
gen day_dv=.

foreach day of numlist 1/76{
  replace day_dv=`day' if endtime>=`daystrt' & endtime<=`dayend'
  local daystrt=`daystrt'+86400000	
  local dayend=`dayend'+86400000
}
la var day_dv "Day of the study in which the the submission took place"

*Generate relative day of scan 	
sort pidp endtime
by pidp: egen startday = min(day_dv)
gen relday = (day_dv - startday) + 1
drop day_dv startday
drop if relday > 28
  
*Calculate time spent participating up to that day  
foreach day of numlist 1/28{  
  by pidp: egen tempds`day' = total(duration) if relday <= `day'
}

egen durationsum = rowmin(tempds1 - tempds28)
drop tempds*

*Calculate app use count up to that day  
foreach day of numlist 1/28{  
  by pidp: egen tempauc`day' = count(pidp) if relday <= `day'
}
  
egen aucount = rowmin(tempauc1 - tempauc28)
drop tempauc*
  
*Calculate receipts scanned up to that day
foreach day of numlist 1/28{  
  by pidp: egen temprsc`day' = count(pidp) if relday <= `day' & activitytype==1
}
  
egen rsgap = rowmin(temprsc1 - temprsc28)
drop temprsc*
by pidp: egen rsstrtgap = min(relday) if rsgap!=.
by pidp: egen rsstrt = min(rsstrtgap)
  
foreach day of numlist 1/28{
  by pidp: egen rtrs`day' = max(rsgap) if relday <= `day'
}
egen rscount = rowfirst(rtrs1 - rtrs28) if relday >= rsstrt
replace rscount = 0 if relday < rsstrt
drop rtrs* rsgap rsstrt*
  
*Calculate purchases without receipts submitted up to that day
foreach day of numlist 1/28{  
  by pidp: egen temppwc`day' = count(pidp) if relday <= `day' & activitytype==2
}
  
egen pwgap = rowmin(temppwc1 - temppwc28)
drop temppwc*
by pidp: egen pwstrtgap = min(relday) if pwgap!=.
by pidp: egen pwstrt = min(pwstrtgap)
  
foreach day of numlist 1/28{
  by pidp: egen rtpw`day' = max(pwgap) if relday <= `day'
}
egen pwcount = rowfirst(rtpw1 - rtpw28) if relday >= pwstrt
replace pwcount = 0 if relday < pwstrt
drop rtpw* pwgap pwstrt*
    
*Calculate nothing boughts submitted up to that day
foreach day of numlist 1/28{  
  by pidp: egen tempnbc`day' = count(pidp) if relday <= `day' & activitytype==3
}
  
egen nbgap = rowmin(tempnbc1 - tempnbc28)
drop tempnbc*
by pidp: egen nbstrtgap = min(relday) if nbgap!=.
by pidp: egen nbstrt = min(nbstrtgap)
  
foreach day of numlist 1/28{
  by pidp: egen rtnb`day' = max(nbgap) if relday <= `day'
}
egen nbcount = rowfirst(rtnb1 - rtnb28) if relday >= nbstrt
replace nbcount = 0 if relday < nbstrt
drop rtnb* nbgap nbstrt*
  
*Average duration
gen avgdur = durationsum/aucount
 
*App use types proportions
gen rsprop = rscount/aucount
gen pwprop = pwcount/aucount
gen nbprop = nbcount/aucount

*Remove variables no longer needed
drop endtime activitytype duration rscount pwcount nbcount
duplicates drop
  
*Reshape wide  
reshape wide aucount avgdur rsprop pwprop nbprop durationsum, i(pidp) j(relday)
  
*Fill in missing app use totals
foreach num of numlist 2/28{
  local ffau = `num' - 1
  replace aucount`num' = aucount`ffau' if aucount`num' == . 
}
  
*Derive whether they used the app each day
gen auday1 = 1
foreach num of numlist 2/28{
  local ffaud = `num' - 1
  gen auday`num' = 1 if aucount`num' > aucount`ffaud'
  replace auday`num'=0 if auday`num'==.
}
  
*Derive dropout indicator 
gen dropout1 = 0
gen dropout28 = 1 - auday28
  
foreach num of numlist 27/2{
  gen dropout`num' = 1 - auday`num'
  local conseq = `num'+1
  replace dropout`num' = 0 if dropout`conseq'==0
}
  
 *Fill in missing values for average duration and proportions   
foreach num of numlist 2/28{
  local pd = `num' - 1
  replace avgdur`num' = avgdur`pd' if avgdur`num' == . 
  replace rsprop`num' = rsprop`pd' if rsprop`num'  == . 
  replace pwprop`num' = pwprop`pd' if pwprop`num' == . 
  replace nbprop`num' = nbprop`pd' if nbprop`num' == . 
  replace durationsum`num' = durationsum`pd' if durationsum`num' == . 
}
  
*Reshape long  
reshape long aucount avgdur rsprop pwprop nbprop auday dropout durationsum, i(pidp) j(day)
  
*Derive day without scanning  
gen daywithout = 1 - auday
drop aucount auday durationsum
  
*Merge in PSU
merge m:1 pidp using "`data'\i_indresp_ip.dta", keepusing(i_psu) //Retain analytical sample(6244 = 223 respondents * 28 days )
keep if _merge==3 
  
*Label variables
la var day "Count of days since the onset of risk (beginning participating)"
la var avgdur "Average duration of app uses"
la var rsprop "Scanned receipts as proportion of app uses"
la var pwprop "Purhcases without receipts as proportion of app uses"
la var rsprop "Nothing bought as proportion of app uses"
la var dropout "Participant has dropped out of the study"

*Recode day to subtract 1: count from onset of risk is the day their participation began (day = 0)
replace day = day-1  
  
*Save RQ4 dataset  
save "`data'\RQ3.dta", replace

/* RQ5 */

**SUBJECTIVE BURDEN

*Load Understanding Society Wave 7 data
use g_fihhmnnet1_dv g_fihhmngrs_dv g_hhdenui_xw g_intdatey g_ieqmoecd_dv ///
using "`data'\g_hhresp.dta", clear

*Replace with =. if income < 0
replace g_fihhmnnet1_dv = . if g_fihhmnnet1_dv  < 0
replace g_fihhmngrs_dv= . if g_fihhmngrs_dv  < 0

*Replace with =. if g_ieqmoecd_dv<0
replace g_ieqmoecd_dv = . if g_ieqmoecd_dv  < 0

*Merge with ONS CPI data and deflate income
gen year = g_intdatey
merge m:1 year using "`data'\ons_cpi.dta" 
label var cpi "CPI base year 2015 (=100)"
keep if _merge == 3
drop _merge year

*Derive net income inflation adjusted
gen realnetinc = g_fihhmnnet1_dv/(cpi/100) //2015 is the base year for CPI Index (=100)
label var realnetinc "Real hh net income, 2015 base yr"

*Derive gross income inflation adjusted
gen realgrsinc = g_fihhmngrs_dv/(cpi/100) //2015 is the base year for CPI Index (=100)
label var realgrsinc "Real hh gross income, 2015 base yr"

*Derive equivalised net income inflation adjusted 
gen eqnetinc = realnetinc/g_ieqmoecd_dv
label var eqnetinc "Real equivalised hh net income"

*Derive equivalised gross income inflation adjusted 
gen eqgrsinc = realgrsinc/g_ieqmoecd_dv
label var eqgrsinc "Real equivalised hh gross income"

*Calculate poverty line
_pctile eqnetinc [pweight = g_hhdenui_xw], p(50)
di r(r1)
local netpovline = 0.6*r(r1)
di `netpovline'

*Regress net income predicting gross 
regress eqgrsinc eqnetinc
local grspovline = _b[_cons] + (_b[eqnetinc]*`netpovline')
di `grspovline'

*Include IP9 covariates
use pidp i_scworry* i_scwill* /// Merge in the analytical variables from IP9
i_scstream* i_scsmpost* i_scsmlook* i_scskill* ///
i_scphoto* i_scother* i_scinstal* i_scgps* i_scgames* i_scemail* i_scbuy* ///
i_scbrowse* i_scblutth* i_scbank* i_pdvage i_jbstat i_hidp i_health ///
i_scsmtphone i_sctablet i_finbud* i_bankbal i_finnow ///
i_accountnt i_finhelp i_sex i_hiqual_dv i_jbsemp i_jbhrs i_jbttwt i_jsttwtb ///
i_jshrs i_hidp i_carechk i_psu using "`data'\i_indresp_ip.dta", clear

*Merge in household level variables for poverty indicator
merge m:1 i_hidp using "`data'\i_hhresp_ip.dta", keepusing(i_ieqmoecd_dv i_fihhmngrs_dv)
drop if _merge ==2
drop _merge
 
*Generate inflation weight for year of interviews - 2016
gen cpi = 101 //Inflation weight for 2016

*Replace with =. if income < 0
replace i_fihhmngrs_dv= . if i_fihhmngrs_dv  < 0

*Replace with =. if g_ieqmoecd_dv<0
replace i_ieqmoecd_dv = . if i_ieqmoecd_dv  < 0
 
*Derive gross income inflation adjusted
gen realgrsinc = i_fihhmngrs_dv/(cpi/100) //2015 is the base year for CPI Index (=100)
label var realgrsinc "Real hh gross income, 2015 base yr"

*Derive equivalised gross income inflation adjusted 
gen eqgrsinc = realgrsinc/i_ieqmoecd_dv
label var eqgrsinc "Real equivalised hh gross income"

*Derive poverty indicator
gen poverty=1 if eqgrsinc < `grspovline' 
replace poverty = 0 if poverty == .
la var poverty "Indicator of whether individual falls below poverty threshold"

*Illness or disability
recode i_health (-2 -1 2 = 0)
local varlab: value label i_health
la def `varlab' 0"no illness or disability reported" 1"reported an illness or disability" 2"", modify
rename i_health disill

*Sex
recode i_sex (1=0) (2=1)
la def sex 0 "male" 1 "female"
la val i_sex sex

*Device use
tab i_scsmtphone
tab i_sctablet
gen device = 1 if i_scsmtphone==1 | i_sctablet == 1
recode device (.=0)
la var device "Participant reported having a device at IP9"

*Activities variables
la def activitylb 0"No" 1"Yes"

foreach stub in i_scbrowse i_scemail i_scphoto i_scsmlook i_scsmpost i_scbuy ///
i_scbank i_scinstal i_scgps i_scblutth i_scgames i_scstream i_scother{
  replace `stub'sp = . if `stub'sp <1
  replace `stub'tb = . if `stub'tb <1
  replace `stub'sp = 2-`stub'sp
  replace `stub'tb = 2-`stub'tb
  gen `stub' = .
  replace `stub' = `stub'sp if i_scsmtphone==1 & i_sctablet==2
  replace `stub' = `stub'tb if i_scsmtphone==2 & i_sctablet==1
  replace `stub' = `stub'sp + `stub'tb if i_scsmtphone==1 & i_sctablet==1
  replace `stub' = 1 if `stub' == 2
  la val `stub' activitylb
  la val `stub'sp activitylb
  la val `stub'tb activitylb
}

la var i_scphoto "Activities on smartphone: taking photos"
la var i_scbank "activities on smartphone: online banking"
la var i_scinstal "Activities on smartphone: installing new apps"

foreach var of varlist i_scphoto i_scbank i_scinstal {
  recode `var' (.=0)
}

*Willingness
la def willinglb 4"very willing" 3"somewhat willing" 2"a little willing" 1"not at all willing "

foreach stub in i_scwill1 i_scwill2 i_scwill3 i_scwill5 i_scwill8{
  replace `stub'sp = . if `stub'sp <1
  replace `stub'tb = . if `stub'tb <1
  replace `stub'sp = 5-`stub'sp
  replace `stub'tb = 5-`stub'tb
  gen `stub' = .
  replace `stub' = `stub'sp if i_scsmtphone==1 & i_sctablet==2
  replace `stub' = `stub'tb if i_scsmtphone==2 & i_sctablet==1
  replace `stub' = max(`stub'sp,`stub'tb) if i_scsmtphone==1 & i_sctablet==1
  la val `stub' willinglb
  la val `stub'sp willinglb
  la val `stub'tb willinglb
}

la var i_scwill2 "willingness to participate (smartphone): complete an online via survey app"
la var i_scwill5 "willingness to participate (smartphone): use camera for barcodes"

recode i_scwill2 (1=0) (2/4=1), gen(two_app)
recode i_scwill5 (1=0) (2/4=1), gen(two_cam)

la var two_app "willingness to participate (smartphone): complete an online via survey app"
la var two_cam "willingness to participate (smartphone): use camera for barcodes"

foreach var of varlist two_app two_cam{
  recode `var' (.=0)
}

*Budgeting
gen finbud = 0

foreach budmeth in i_finbud1 i_finbud2 i_finbud3 i_finbud4 i_finbud5{
  replace finbud=1 if `budmeth'==1
  drop `budmeth'
}

la def finbudlb 0"doesn't keep a budget" 1"keeps a budget"
la val finbud finbudlb
la var finbud "Participant keeps a budget"

*Bank balance
recode i_bankbal (1 2 3 = 1) (-2 -1 4 5 6 = 0)
local varlab: value label i_bankbal
la def `varlab' 0"less than once a week" 1 "once a week or more" 3"" 4"" 5"" 6"", modify
rename i_bankbal bankbal
la var bankbal "Frequency of checking bank balane"

*Level of qualification
recode i_hiqual_dv (-9/-1=0) (1/2=1) (3/9=0)
la def i_hiqual_dv_lb 0"Less than degree"1"Degree or above"
la val i_hiqual_dv i_hiqual_dv_lb

*Long work or commute for time constraint var (@AW)
gen longwork = 1 if (i_jbsemp == 1 & i_jbhrs > 40) | (i_jbsemp == 2 & i_jshrs > 40)
replace longwork = 0 if (i_jbsemp == -8) | (i_jbsemp == 1 & i_jbhrs <= 40 & i_jbhrs > 0) | (i_jbsemp == 2 & i_jshrs <= 40 & i_jshrs > 0)

gen longcommute = 1 if (i_jbttwt > 60) | (i_jsttwtb > 60)
replace longcommute = 0 if (i_jbttwt >= 0 & i_jbttwt <= 60) | (i_jsttwtb >= 0 & i_jsttwtb <= 60)

gen busy = 0
replace busy = 1 if longwork == 1 | longcommute == 1
label var busy "Long working or commuting hours"

* Children or caring responsibilities for time constraint var (@AW)
merge m:1 i_hidp using "`data'\i_hhresp_ip", keepusing(i_nch02_dv i_nch34_dv)
keep if _merge == 1 | _merge == 3
drop _merge
gen caring = 0
replace caring = 1 if i_nch02_dv > 0 | i_nch34_dv | i_carechk == 1
label var caring "Young children or caring responsibilities"

gen timeconstraint = 1 if busy == 1 | caring == 1
replace timeconstraint = 0 if busy == 0 & caring == 0
la var timeconstraint "Participant is considered time constrained"

*Keep variables for analysis
keep pidp device i_scphoto i_scbank i_scinstal i_scwill2 i_scwill5 two_app two_cam bankbal ///
finbud disill i_sex i_pdvage i_hiqual_dv timeconstraint i_psu poverty

* Merge in subjective burden indicators
merge 1:1 pidp using "`data'\RQ2.dta", keepusing(pidp eop_likely eop_effort ///
eop_interest mdifficulty two_likely two_difficulty two_effort two_interest)
keep if _merge ==3
drop _merge

drop if eop_likely < 0 | eop_effort < 0 | eop_interest < 0 | mdifficulty <0

*Merge in incentive treatments

merge 1:1 pidp using "`data'\sct.dta", keepusing(incent unconditional)
keep if _merge ==3
drop _merge

*Initial incentive

recode incent (2=0) (6=1)

*Additional incentive

replace unconditional = 0 if unconditional != 1

save "`data'\RQ4_sub.dta", replace

**OBJECTIVE BURDEN

merge 1:m pidp using "`data'\apd.dta"
keep if _merge==3
drop _merge
drop if activitystatus == 0

gen duration = endtime - starttime
replace duration = duration / 1000
la var duration "Duration of app uses"
* Calculate outliers in terms of duration

gen ln_duration = ln(duration)
egen jnk_iqr = iqr(ln_duration)
quietly summarize ln_duration, detail
gen jnk_Q1 = r(p25)
gen jnk_Q3 = r(p75)
gen jnk_eq1 = 1.5*jnk_iqr
medcouple ln_duration
gen jnk_eq2 = -3 * e(mc)
gen jnk_eq3 = 4 * e(mc)
gen jnk_eq4 = exp(jnk_eq2)
gen jnk_eq5 = exp(jnk_eq3)
gen jnk_eq6 = jnk_eq1 * jnk_eq4
gen jnk_eq7 = jnk_eq1 * jnk_eq5 
gen jnk_lb = jnk_Q1 - jnk_eq6
gen jnk_ub = jnk_Q3 + jnk_eq7

summarize duration
summarize duration if ln_duration > jnk_lb & ln_duration < jnk_ub
keep if ln_duration > jnk_lb & ln_duration < jnk_ub

* Generate number of app uses per participant

sort pidp endtime
by pidp: gen appuse_no=_n
la var appuse_no "Count of app uses within individual"

sort pidp activitytype endtime
by pidp activitytype: gen activity_no=_n
la var activity_no "By type: count of app uses within individual"

keep i_psu pidp duration appuse_no activitytype i_pdvage disill bankbal i_sex ///
i_hiqual_dv device i_scphoto i_scbank i_scinstal i_scwill2 i_scwill5 two_app two_cam finbud ///
timeconstraint incent unconditional activity_no poverty devicetype

save "`data'\RQ4_obj.dta", replace
