*** Syntax file for "Answer refused: Exploring how item non-response on domestic abuse questions in a social survey affects analysis and results"
*** Author: Valeria Skafida valeria.skafida@ed.ac.uk
*** Syntax file for Stata 16.0 to be used with Growing Up in Scotland survey data available under Special Licence at UK Data Service beta.ukdataservice.ac.uk/datacatalogue/series/series?id=200020


** Syntax begins

stop
clear
cd "USER TO SET CENTRAL DIRECTORY - replace this text with your directory"

use Idnumber DaHGnp03 MaHGr31 DaHGnp04 DaHGmag5 MaZspe01 using "C:\gus_cohort1_sw1_b_v4_protect.dta"
sort Idnumber 
save temp2, replace

use Idnumber DbHGnp03 MbHGr31  MbHGrsp03 DbPartID DbHGnp04 MbMrel* DbHGmag5 MbZspe02 using "C:\gus_cohort1_sw2_b_nov11_protect.dta"
sort Idnumber 
save temp3, replace

use Idnumber  DcHGnp03 McHGr31  McHGrsp03 DcPartID DcHGnp04 DcHGmag5 using "C:\gus_cohort1_sw3_b_nov11_protect.dta"
sort Idnumber 
save temp4, replace

use Idnumber DdHGnp03 MdHGr31 MdHGrsp03 DdPartID DdHGnp04 MdMrel* DdHGmag5 using "C:\gus_cohort1_sw4_b_nov11_protect.dta"
sort Idnumber 
save temp5, replace

use Idnumber DeHGnp03 MeHGr31 MeHGrsp03 DePartID DeHGnp04 DeHGmag5 using "C:\gus_cohort1_sw5_b_protect.dta"
sort Idnumber 
save temp6, replace

use "C:\GUS_Sweep6.dta" 
sort Idnumber
merge Idnumber using temp2 temp3 temp4 temp5 temp6
sort Idnumber 

	set more off
	mvdecode _all, mv(-9=.a\-8=.b\-7=.c\-6=.d\-5=.e\-4=.f\-3=.g\-2=.h\-1=.i)


** dont't remember
tab1 MfHdv015 MfHdv067 MfHdv037
gen dvdontrem = 0
replace dvdontrem = 1 if MfHdv015 == 1 | MfHdv067 == 1 | MfHdv037 == 1 
tab dvdontrem

** dont't wish to respond
tab1 MfHdv016 MfHdv068 MfHdv038
gen dvnonres = 0
replace dvnonres= 1 if MfHdv016 == 1 | MfHdv068 == 1 | MfHdv038 == 1 
tab dvnonres

*missing data*
generate anydvmiss = .
replace anydvmiss = 0 if MfHdv011 ==0 | MfHdv012 ==0 | MfHdv013 ==0 | MfHdv061==0 |MfHdv062 ==0 |MfHdv063 ==0 | MfHdv064 ==0 | MfHdv065 ==0 | MfHdv031 ==0 |MfHdv032 ==0 |MfHdv033 ==0 |MfHdv034 ==0 |MfHdv035 == 0
replace anydvmiss = 1 if MfHdv011 ==1 | MfHdv012 ==1 | MfHdv013 ==1 | MfHdv061==1 |MfHdv062 ==1 |MfHdv063 ==1 | MfHdv064 ==1 | MfHdv065 ==1 | MfHdv031 ==1 |MfHdv032 ==1 |MfHdv033 ==1 |MfHdv034 ==1 |MfHdv035  == 1
tab anydvmiss, m

**
generate anydv = .
replace anydv = 0 if MfHdv05 == .i 
replace anydv = 1 if MfHdv011 ==1 | MfHdv012 ==1 | MfHdv013 ==1 | MfHdv061==1 |MfHdv062 ==1 |MfHdv063 ==1 | MfHdv064 ==1 | MfHdv065 ==1  /// 
| MfHdv031 ==1 |MfHdv032 ==1 |MfHdv033 ==1 |MfHdv034 ==1 |MfHdv035 == 1
tab anydv

*generate coercive control
tab1 MfHdv011 MfHdv012 MfHdv013 
generate anydv_g = .
replace anydv_g = 0 if MfHdv05 == .i
replace anydv_g = 1 if MfHdv011 ==1 | MfHdv012 ==1 | MfHdv013 ==1 
tab anydv_g

*physical
generate anydv_p = .
replace anydv_p = 0 if MfHdv05 == .i
replace anydv_p = 1 if MfHdv061==1 |MfHdv062 ==1 |MfHdv063 ==1 | MfHdv064 ==1 | MfHdv065 ==1 
tab anydv_p

*threats

generate anydv_t = .
replace anydv_t = 0 if MfHdv05 == .i
replace anydv_t = 1 if  MfHdv031 ==1 |MfHdv032 ==1 |MfHdv033 ==1 |MfHdv034 ==1 | MfHdv035 ==1
tab anydv_t


******************************************
** create missing income response category
tab DfEqv5,m
replace DfEqv5 = 6 if DfEqv5 == .i
tab DfEqv5,m

**** Education
tab DfMedu01
drop if DfMedu01 ==5


tab DfEqv5 
gen incqui1 = 1 if DfEqv5 ==1
replace incqui1 = 0 if DfEqv5 > 1
tab incqui1 DfEqv5 , m

tab DfEqv5 
gen incqui5= 1 if DfEqv5 ==5
replace incqui5 = 0 if DfEqv5 < 5
tab incqui5 DfEqv5 , m

** create number of children var
tab DfHGnmkd
gen numberchildren = DfHGnmkd
replace numberchildren = 4 if DfHGnmkd >3
tab numberchildren

** Creating complete case dataset for regression
** filling in some missing data for age by crosschecking in other sweeps
tab1 anydv DfMedu01 DfEqv5 DfMsec10 DaHGmag5 numberchildren MfHGsx1, m
list Idnumber DfHGmag5 numberchildren if DaHGmag5  ==.g & anydv ==1
list Idnumber D*HGmag5 numberchildren if DfHGmag5  ==.g & anydv ==1

tab DfMedu01 anydv, m

replace DaHGmag5 = DfHGmag5 if DaHGmag5 == .g & DfHGmag5 != .g
replace DfHGmag5 = DaHGmag5 if DfHGmag5 == .g & DaHGmag5 != .g


******* cross checking education accross sweeps to complete picture for missing cases
tab1 DaMedu01 DbMedu01 DcMedu01 DeMedu01 DfMedu01 , m
list Idnumber DaMedu01 DbMedu01 DcMedu01 DeMedu01 if DfMedu01 == .g

replace DfMedu01 =  DaMedu01 if DfMedu01 == .g & Idnumber != 1006672

list Idnumber DaMedu01 DbMedu01 DcMedu01 DeMedu01 if DfMedu01 == .g

replace DfMedu01 =  DeMedu01 if DfMedu01 == .g & Idnumber == 1006672

list Idnumber DaMedu01 DbMedu01 DcMedu01 DeMedu01 if DfMedu01 == .i


/* 
generate a DV refusal variable - is this evenly distributed?
to include category .i == refused the entire module 
To make sense - must be compared to how many reported anyDV ==1
because non response must be compared relatively to response to DV
 */


tab anydv
gen dvref = .
replace dvref = 0 if anydv == 0 // answered no to all violence questions
replace dvref = 1 if MfHdv011 == .i // no response to module
replace dvref = 2 if anydv == 1 & (MfHdv015 != 1 & MfHdv016 != 1 & MfHdv067 != 1 & MfHdv068 != 1 & MfHdv037 != 1 & MfHdv038 != 1) // DV and no missing responses
replace dvref = 3 if anydv == 1 & (MfHdv015 == 1 | MfHdv016 == 1 | MfHdv067 == 1 | MfHdv068 == 1 | MfHdv037 == 1 | MfHdv038 == 1) // DV but refused or forgotten on any question
replace dvref = 4 if MfHdv011 != .i & (MfHdv015 == 1 | MfHdv016 == 1) & (MfHdv037 == 1 | MfHdv038 == 1) & (MfHdv067 == 1 | MfHdv068 == 1) // replied to module but refused all questions.
replace dvref = 3 if Idnumber == 1006154  // this case reported emotional violence but also refusal of each question.. does not fall within above coding scheme. 
lab def dvref 0 "No DV" 1 "DV unkown - Skipped module" 2 "DV & no refused/forgot" 3 "DV & some refused" 4 "DV unknown - all refused", replace 		
lab val dvref dvref 
tab dvref

tab1 MfHdv015 MfHdv016 MfHdv067 MfHdv068 MfHdv037 MfHdv038  
list MfHdv015 MfHdv016 MfHdv067 MfHdv068 MfHdv037 MfHdv038  if dvref ==4 

*** Recode of the above to merge the two refused categories
gen dvref2 = dvref
replace dvref2 =  1 if dvref ==4
lab def dvref2 0 "No DV" 1 "Skipped module/refused all" 2 "DV & no refused/forgot" 3 "DV & some refused", replace 		
lab val dvref2 dvref2
tab dvref2 dvref
tab dvref2

*** Recode of the above to merge all refusals into a category
gen dvref3 = dvref2
replace dvref3 =  1 if dvref2 ==3
lab def dvref3 0 "No DV" 1 "Skipped module/refused all/refused some" 2 "DV & no refused/forgot", replace 		
lab val dvref3 dvref3
tab dvref3 dvref
tab dvref3


*** Recode of the above to sepratate anydv from all refusals 
gen dvref4 = dvref2
replace dvref4 = 0 if anydv == 0
replace dvref4 = 1 if dvref2 ==1
replace dvref4 = 2 if dvref2 ==2 | dvref2 ==3
lab def dvref4 0 "No DV" 1 "Skipped module/refused all" 2 "DV incl.refused some", replace 		
lab val dvref4 dvref4
tab dvref4 dvref
tab dvref4


* 41 mothers report having experienced anyDV but also have refused/forgotten which partned for at least one DV type. 
list MfHdv015 MfHdv016  MfHdv067 MfHdv068 MfHdv037 MfHdv038 if anydv ==1 & dvref ==1


** create 'generous' measure of DV - classes as DV all item non-response
tab anydv 
tab dvref

gen anydvALL = .
replace anydvALL = 0 if anydv ==0
replace anydvALL = 1 if anydv ==1 | dvref == 1 | dvref == 3 | dvref == 4
tab anydvALL 
 

* scales of relationships toxicity at age 2 and age 4

alpha MbMrel03 MbMrel04 MbMrel07 MbMrel08 MbMrel09 MdMrel03 MdMrel04 MdMrel07 MdMrel08 MdMrel09 , gen(DbdHEscale) std 


** binary variables from scales 

tab DbdHEscale
gen DbdHEscaleB = .
replace DbdHEscaleB = 1 if DbdHEscale <      -.3894436 
replace DbdHEscaleB = 0 if DbdHEscale >=     -.3894436 
replace DbdHEscaleB = . if DbdHEscale == . 
tab DbdHEscaleB 


numlabel _all, add

*********************************************************************************

svyset, clear	
svyset DfPSU [pweight=DfWTbrth], strata(DfStrat)


egen cond2 = rownonmiss (DfMedu01 DfEqv5 DfMsec10 DfHGmag5 DaEthGpM numberchildren MfHGsx1) 
tab cond2 // 5 missing education data, 7 missing Age data. 
keep if cond2 == 7

**Table 2 contents
tabout DfMedu01 dvref4 using "domviprevalence.csv", app npos(col) nnoc c(row ci) f(1 1) svy style(csv) per cisep(-) stats(chi2)
tabout DfEqv5 dvref4 using "domviprevalence.csv", app npos(col) nnoc c(row ci) f(1 1) svy style(csv) per cisep(-) stats(chi2)
tabout DfMsec10 dvref4 using "domviprevalence.csv", app npos(col) nnoc c(row ci) f(1 1) svy style(csv) per cisep(-) stats(chi2)
tabout DfHGmag5 dvref4 using "domviprevalence.csv", app npos(col) nnoc c(row ci) f(1 1) svy style(csv) per cisep(-) stats(chi2)

tabout DfMedu01 dvref3 using "domviprevalence.csv", app npos(col) nnoc c(row ci) f(1 1) svy style(csv) per cisep(-) stats(chi2)
tabout DfEqv5 dvref3 using "domviprevalence.csv", app npos(col) nnoc c(row ci) f(1 1) svy style(csv) per cisep(-) stats(chi2)
tabout DfMsec10 dvref3 using "domviprevalence.csv", app npos(col) nnoc c(row ci) f(1 1) svy style(csv) per cisep(-) stats(chi2)
tabout DfHGmag5 dvref3 using "domviprevalence.csv", app npos(col) nnoc c(row ci) f(1 1) svy style(csv) per cisep(-) stats(chi2)

tabout DbdHEscaleB  dvref3 using "domviprevalence.csv", app npos(col) nnoc c(row ci) f(1 1) svy style(csv) per cisep(-) stats(chi2)
tabout DbdHEscaleB  dvref4 using "domviprevalence.csv", app npos(col) nnoc c(row ci) f(1 1) svy style(csv) per cisep(-) stats(chi2)


**Table 3 contents
** multinomial regression of dvref3 - do those with full and partial missing non response differ from each other
eststo clear
svy: mlogit dvref3 ib1.DfMedu01 ib5.DfEqv5 ib1.DfMsec10 ib4.DfHGmag5 i.DaEthGpM ib1.numberchildren ib2.MfHGsx1 , rr  b(0)
eststo dv5

svy: mlogit dvref3 ib1.DfMedu01 ib5.DfEqv5 ib1.DfMsec10 ib4.DfHGmag5 i.DaEthGpM ib1.numberchildren ib2.MfHGsx1 i.DbdHEscaleB , rr  b(0)
eststo dv6
esttab dv* using "domviregressions.rtf", replace ci nogaps b(2) stats(N r2) wide eform


** Table 4 contents
** Comparing conservative and generous measure of anydv 
eststo clear
set more off
svy: logit anydv ib1.DfMedu01 ib5.DfEqv5 ib1.DfMsec10 ib4.DfHGmag5 i.DaEthGpM ib1.numberchildren ib2.MfHGsx1   , or
eststo dv3

set more off
svy: logit anydvALL  ib1.DfMedu01 ib5.DfEqv5 ib1.DfMsec10 ib4.DfHGmag5 i.DaEthGpM ib1.numberchildren ib2.MfHGsx1  , or
eststo dv4
esttab dv* using "domviregressions.rtf", replace ci nogaps b(2) stats(N r2) wide eform

svy: logit anydv ib1.DfMedu01 ib5.DfEqv5 ib1.DfMsec10 ib4.DfHGmag5 i.DaEthGpM ib1.numberchildren ib2.MfHGsx1   , or
margins, dydx(*) atmeans post 
est store margins3
svy: logit anydvALL  ib1.DfMedu01 ib5.DfEqv5 ib1.DfMsec10 ib4.DfHGmag5 i.DaEthGpM ib1.numberchildren ib2.MfHGsx1  , or
margins, dydx(*) atmeans post 
est store margins4
esttab margins* using "margins.rtf", replace ci nogaps b(2) stats(N r2) wide


** end of syntax file **
