clear
set more off
capture log close
version 15

* Filepath for data sets:
global j "temporaryfiles"


use $j/cr-3, clear


***********************************************
* Effect of mode on consent to data linkage
*-- Effect of mode on outcomes
*-- Effect of mode on respondent attitudes
*-- Effect of mode on consent decision process
*-- Does the wording of the consent question influence the effect of mode on outcomes
*-- Outcomes by device used to complete the web survey
***********************************************

* time
sum time if mode==0, detail
sum time if mode==1, detail

* % with missing timings data
count if time==.


* set time to missing for 1% of observations with longest times
recode time 275/max=., gen(time2)
count if time2==.

gen logtime2 = ln(time2)
lab var logtime2 "Log time"

sum time2 if mode==0, detail
sum time2 if mode==1, detail

mvdecode pdvage, mv(-9/-1)

* Treatment groups
************************

**** Table 1 ****

tab modealloc mode, row
tab cdiff mode, row

* check treatment groups are balanced - consent question wording 

* check treatment groups are balanced - mode allocations
foreach w in female ageg edu inwork nchresp hhsize2 gor_dv tenure  {
	tab `w' modealloc, col nofreq chi
	}

foreach w in female agegroup edu inwork nchresp hhsize2 gor_dv tenure  {	
	tab `w' cdiff, col nofreq chi
	}
	
	
	
* Summary statistics
****************************

**** Appendix Table 1 ****

tabout female mode using $j/sumstats.xls, c(freq col) ptotal(none) replace
foreach w in ageg edu consent csundst objundpart ccnfdnc privacy dtascrty cdata csensitive trust_surv trust_uni trust_hmrc decisionb leaflet diagram info {
	tabout `w' mode using $j/sumstats.xls, c(freq col) ptotal(none) append 
	}

tab objundpart

bysort mode: sum time2, detail


* Cross-tabs by mode of interview
************************************

**** Tables 2 - 5 ****

* save cross-tabs to an Excel table
tabout consent mode using $j/bymode.xls, c(col) ptotal(none) replace
foreach w in subj conf priv secur sens tsurv tuni thmrc decision leaflet diagram info {
	tabout `w' mode using $j/bymode.xls, c(col) ptotal(none) append 
	}

	
* save P-values to a matrix
matrix A = J(13,1,.)
matrix rownames A = consent subj conf priv secur sens tsurv tuni thmrc decision leaflet diagram info

local n=1
foreach w in consent subj conf priv secur sens tsurv tuni thmrc decision leaflet diagram info {
	tab `w' mode, col nofreq chi 
	matrix A[`n',1]=r(p)
	local n=`n'+1
	}
matrix list A

* mean objundpart
mean objundpart, over(mode)
test [objundpart]FTF = [objundpart]Web

* test of difference in median time
bysort mode: sum time2, detail
ranksum time2, by(mode)

tab mode intcread
tab intcread if mode==0 // 34% of FTF Rs skimmed the leaflet

* median time among those who did not read leaflet or diagram
ranksum time2 if leaflet==0 & diagram==0, by(mode)






* Cross-tabs by mode allocation
************************************

**** Tables 2 - 5 ****

* save cross-tabs to an Excel file
tabout consent modealloc using $j/bymodealloc.xls, c(col) ptotal(none) replace
foreach w in subj conf priv secur sens tsurv tuni thmrc decision leaflet diagram info {
	tabout `w' modealloc using $j/bymodealloc.xls, c(col) ptotal(none) append 
	}
	
* save P-values to a matrix
matrix B = J(13,1,.)
matrix rownames B = consent subj conf priv secur sens tsurv tuni thmrc decision leaflet diagram info

local n=1
foreach w in consent subj conf priv secur sens tsurv tuni thmrc decision leaflet diagram info {
	tab `w' modealloc, col nofreq chi
	matrix B[`n',1]=r(p)
	local n=`n'+1
	}
matrix list B	

	
* mean objundpart
mean objundpart, over(modealloc)
test[objundpart]_subpop_1 = [objundpart]_subpop_2

* test of difference in median time
bysort modealloc: sum time2, detail
ranksum time2, by(modealloc)

* median time among those who did not read leaflet or diagram
ranksum time2 if leaflet==0 & diagram==0, by(modealloc)


* IV regressions
************************************

foreach w in consent subj objundpart conf priv secur sens tsurv tuni thmrc procon logtime2 leaflet diagram info {
	ivreg `w' (mode = modealloc)
	est store `w'
	}
estout consent subj objundpart conf priv secur sens tsurv tuni thmrc procon logtime2 leaflet diagram info using $j/ivs.xls, ///
	cells(b(fmt(3)) se(fmt(3)) p(fmt(3))) drop(_cons) replace

	
* calculate % change in response time due to mode
ivreg logtime2 (mode = modealloc)
local n= exp(_b[mode]) 
di `n'
di (`n'-1)*100

* response time for those who neither read leaflet nor diagram
ivreg logtime2 (mode = modealloc) if leaflet==0 & diagram==0
local n= exp(_b[mode]) 
di `n'
di (`n'-1)*100


* log response time regressed on mode
reg logtime2 mode
local n= exp(_b[mode]) 
di `n'
di (`n'-1)*100

* log response time regressed on mode allocation
reg logtime2 modealloc
local n= exp(_b[modealloc]) 
di `n'
di (`n'-1)*100


* objund - dropping all observations with any missing test items
ivreg objund (mode = modealloc)



* Interaction of question wording and mode allocation
**********************************************************

ivregress 2sls consent i.cdiff (i.cdiff#i.mode i.mode = i.cdiff#i.modealloc i.modealloc)
est store r1
ivregress 2sls subj i.cdiff (i.cdiff#i.mode i.mode = i.cdiff#i.modealloc i.modealloc)
est store r2
ivregress 2sls objundpart i.cdiff (i.cdiff#i.mode i.mode = i.cdiff#i.modealloc i.modealloc)
est store r3
ivregress 2sls conf i.cdiff (i.cdiff#i.mode i.mode = i.cdiff#i.modealloc i.modealloc)
est store r4

estout r1 r2 r3 r4 using $j/interactions.xls, cells(b(fmt(3)) se(fmt(3)) p(fmt(3))) stats(r2) replace




* Does device used to complete the web survey relate to data security concerns, 
* how respondents processed the consent request, etc?
**********************************************************************

gen info2 = info==1
lab var info2 "Amount of information too much"

gen gut = decision==2
lab var gut "Gut feeling"


* test for differences in sample composition between device groups
* save cross-tabs to an Excel file
tabout female device using $j/devicecomp.xls, c(col) ptotal(none) replace
foreach w in agegroup edu inwork gor_dv tenure hhsize {
	tabout `w' device using $j/devicecomp.xls, c(col) ptotal(none) append 
	}

	
* save P-values to a matrix
matrix C = J(7,1,.)
matrix rownames C = female agegroup edu inwork gor_dv tenure hhsize

local n=1
foreach w in female agegroup edu inwork gor_dv tenure hhsize {
	tab `w' device, row nofreq chi
	matrix C[`n',1]=r(p)
	local n=`n'+1
	}
matrix list C


* logit/ols regressions of outcomes on device and socio-demographics
* to control for characteristics of smartphone/tablet/pc users
matrix D = J(16,1,.)
matrix rownames D = consent subj conf priv secur sens tsurv tuni thmrc procon gut leaflet diagram info2 objundpart logtime2
local n=1
foreach w in consent subj conf priv secur sens tsurv tuni thmrc procon gut leaflet diagram info2 {
	logit `w' i.device female pdvage i.edu inwork i.gor_dv i.tenure hhsize
	margins, dydx(*) post
	est store `w'
	test 2.device = 3.device
	matrix D[`n',1]=r(p)
	local n=`n'+1
	} 
	
 foreach w in objundpart logtime2 {
	regress `w' i.device female pdvage i.edu inwork i.gor_dv i.tenure hhsize
    est store `w'
	test 2.device = 3.device
	matrix D[`n',1]=r(p)
    local n=`n'+1
    }
matrix list D
estout consent subj objundpart conf priv secur sens tsurv tuni thmrc procon gut logtime2 leaflet diagram info2 using $j/devicemodels.xls, ///
	cells(b(fmt(3)) se(fmt(3)) p(fmt(3))) keep(2.device 3.device) replace


* calculate % change in response time due to device
regress logtime2 i.device female pdvage i.edu inwork i.gor_dv i.tenure
di _b[3.device]
local n= exp(_b[3.device]) 
di `n'
di (`n'-1)*100
	

log using $j/leebounds_objund.log, replace

*****************************************************************************
* Lee bounds for effect of mode allocation on objective understanding score
* (higher item non-response rate among web-first than ftf-first respondents)
*****************************************************************************

leebounds objund modealloc, cieffect 

/******************************************
Interpretation 
*******************************************
estimates bounds for difference in mean objund scores between treatment groups 
(mode allocation), when the rate of missingness in the outcome (objund) 
is unequal between the treatment groups
assumes all excess missing web-first 
cases would have been high scoreres or all low scorers
-------------------------------------------
2608 observations in the data
2358 with non-missing objund score
here: observations with any test items missing have a missing objund score
proportion missing is 0.0712 higher among those allocated to web-first than those allocated to ftf-first 
=> the bounds are calculated by dropping the 7.12% of ftf-first respondents with the highest/lowest objund scores respectively
i.e. assumes the excess missing cases among web-first respondents are all high scorers or all low scorers
the CI for the treatment effect is [-0.5861  0.0640]
=> if we assume that the missing web-first cases are all low scorers, the difference between treatment groups is b=-.47, p=0.000 
=> if we assume that the missing web-first cases are all high scorers, the differenc is b=-.05, p=0.457 

Results below:
* can tighten the bounds a tiny bit, if we specify 90 instead of 95% CI
  but doesn't make much difference
* in principle can tighten bounds if include covariates that are predictive of missinginess 
  (bounds calculated within cells defined by covariates, then weighted average bound calculated)
  have tried using education or agegroup but hardly shifts the bounds
* dropping only observations where all 8 test items are missing
  for others counting missing test items as incorrect
  reduces excess missingness from 0.0712 to 0.0661
  tightens the 90% CI bounds a bit to [-0.6260  0.0260] - but upper bound still n.s.
******************************************/


* 90% CI
***********
leebounds objund modealloc, cieffect  level(90) vce(boot, reps(250))


* Covariates: education or age 
***********************************
leebounds objund modealloc, cieffect  tight(edu)
leebounds objund modealloc, cieffect  tight(agegroup)


* Dropping only observations with all 8 test questions missing
**************************************************************
leebounds objundpart modealloc, cieffect 

log close



* no improvements here:
leebounds objundpart modealloc, cieffect  tight(agegroup)
leebounds objundpart modealloc, cieffect  level(90) vce(boot, reps(250)) 
leebounds objundpart modealloc, cieffect  level(90) vce(boot, reps(250)) tight(agegroup)





exit



