********************************************************************************
********************************************************************************
* Replication-File for the research article: 
*			The blind spot: Studying the association between 
*			survey non-response and adherence to COVID-19 governmental 
*			regulations in a population-based German web-survey 
*
*			Authors: Martin Wetzel & Bettina Hünteler
*
*			Journal: Survey Research Methods
*
*			November 2022
********************************************************************************
********************************************************************************

global temp  		"~\temp"	
global do    		"~\do"	
global data  		"~\data"							// COVID-19 data
global data2 		"~\Data\Stata" 						// pairfam SUF w11
global data_w12 	"~\Data\Stata"  					// pairfam SUF w12

set scheme  plotplainblind
putexcel set 		"$temp/results.xlsx", sheet(corr) modify


**
** Load data sets
********************************************************************************
use "$data/anchor12_covid-19.dta", clear
ds
rename * *_c
rename id_c id

glo varlist1 sex_gen doby_gen incnet isced pisced misced lfs bula east enrol age ///
			 nkids relstat cohort school mcs pcs per8i* migstatus gkpol hhsizemrd ///
			 intcont intsex intage intedu intdur mmrd fmrd 	///
		per1i2 per1i7 per1i13 					/// self esteem selfesteem
		per2i1 per2i2 per2i3 per2i4 			///
		per2i5 per2i6 per2i7 per2i8 			/// depressiveness depressive
		per2i9 per2i10							///
		inc27i2 inc27i3							/// Economic Deprivation Household ecodep2_hh
		/// Big 5
		per3i9 per3i4 per3i14 per3i19			/// neuroticism 	neurot
		per3i1 per3i11 per3i6 per3i16			/// extraversion 	extrav
		per3i2 per3i12 per3i17 per3i7			/// agreeableness 	agreeable
		per3i8 per3i3 per3i13 per3i18			/// conscientiousness 	conscient
		per3i21 per3i5 per3i10 per3i15 per3i20 	/// openness 		openness
		/// Dark Triad
		per8i1 per8i2 per8i3					/// machiavellianism machiav
		per8i2 per8i4 per8i6					/// psychopathy 	psychop
		per8i7 per8i9 per8i12					/// narcism: rivalry 	riv
		per8i8 per8i10 per8i11					/// narcism: admiration 	adm
		/// Trust
		val7i1 val7i2							/// trust trust
				
merge 1:1 id using  "$data2/anchor11.dta", keepus($varlist1)
keep if inlist(_m,2,3)					// 35 cases in covid-19 survey but not in W11 (no refresher!)
rename _m m11

		* im Release 11 fehlen im anchor11-Datensatz ein paar Variablen: 
		*	sdq1i* (SDQ), rta1/2/3*  (Risikoverhalten) und 
		*	lsr10/12* (Nutzung Internet/Nutzungsgrund Soz. Netzwerke).
		* Engl.: In Release 11.0 some variables have been forgotten: 
		*    Either use here release 12 or merge forgotten variables	
		
global varlist2 ///
		/// Internet use 
		lsr10i1 lsr10i2							/// Social networks / Messenger
		///
		/// SDQ-Behavior of the Anchor
		sdq1i5 sdq1i7 sdq1i12 sdq1i18 sdq1i22	/// conduct problems conduct
		sdq1i3 sdq1i8 sdq1i13 sdq1i16 sdq1i24	/// emotional symptoms emotion
		sdq1i1 sdq1i4 sdq1i9 sdq1i17 sdq1i20	/// prosocial behavior prosoc
		sdq1i2  sdq1i10 sdq1i15 sdq1i21 sdq1i25 /// hyperactivity hyper
		sdq1i11 sdq1i14 sdq1i6 sdq1i19 sdq1i23	// peer problems peerprob	
		
	
merge 1:1 id using "$data2/anchor11_MissingVars.dta", keepus($varlist2)
keep if inlist(_m,1,3)
rename _m m11b

merge 1:1 id using  "$data2/partner11.dta", keepus(id)
gen partn_part_w11 = _m == 3
drop _m

preserve
use id wave using "$data_w12/anchor12_capi.dta", clear
merge 1:1 id using "$data_w12/anchor12_cati.dta", keepus(id wave)
drop _m
save "$temp/anchor12.dta", replace
restore 

merge 1:1 id using "$temp/anchor12.dta"
gen part_w12 = _m == 3
drop if _m == 2
drop _m

mvdecode _all, mv(-7=.g \ -3=.c \ -2=.b \ -1=.a)

do "$do/01_genvars.do"

rename selfesteem esteem
glo varlist3 esteem depressive ecodep2_hh ///
		neurot extrav agreeable conscient openness machiav psychop riv adm trust ///
		conduct emotion prosoc hyper peerprob miss*
merge 1:1 id using  "$temp/missings.dta"
drop _m
		
mvdecode cor49* cor19i1 $varlist1 $varlist2 $varlist3, mv(-7=.g \ -3=.c \ -2=.b \ -1=.a)
mvdecode pcs mcs , mv(-77=.g )
keep if cohort == 4 

tabstat $varlist3 , s(mean sd n)


*** Build dependent variable
********************************************************************************
d cor49*
/*
twoway histogram cor49i1, frac legend(label(1 "I1: Household")) name(V_dplot, replace) color(ananas%70)  ///
		|| histogram cor49i1, frac legend(label(2 "")) color(ananas%70) recast(line) ///
		|| histogram cor49i2, frac legend(label(3 "I2: Older")) color(reddish%70)  ///
		|| histogram cor49i2, frac legend(label(4 "")) color(reddish%70) recast(line) ///
		|| histogram cor49i3, frac legend(label(5 "I3: Distance")) color(vermillion%70)  ///
		|| histogram cor49i3, frac legend(label(6 "")) color(vermillion%70) recast(line) ///
					legend(order(1 3 5)) xlabel(1 "1: never" 2 3 4 5 "5: always") 

tab cor49i1 cohort, m
pca cor49*
factor cor49*, pcf
alpha cor49*, item std 
*/


* Check for missings
egen rowmi = rowmiss(cor49*) 
tab rowmi, m													// 15 item-nonresponses

* Build mean score
egen rv = rowtotal(cor49*) if m11 == 3
replace rv = rv / (3 - rowmi)
label define rv 1 "Überhaupt nicht" 5 "Sehr oft", modify
lab val rv rv
tab rv cohort, m

tab rowmi m11
drop if rowmi == 3 & m11 == 3									// 4 full-item non-respones IV

replace rv = -rv + 6

* z-standardize for better interpretation as standard deviations
qui sum rv
gen zrv = (rv-`r(mean)')/`r(sd)'
sum rv zrv 

/*
pwcorr av rv zrv												// factor method yields same findings as mean scores
twoway kdensity av, bwidth(0.50) || kdensity rv, bwidth(0.50) || kdensity zrv, bwidth(0.50)
tabstat av rv zrv, s(mean sd n)								// use of zrv because it uses also cases with item nonresponse
*/


**************************************************************************************************
** Prepairing the predictor variables
**************************************************************************************************

** Socio-Demographics
gen age_17plus = age >= 17
gen women = sex_gen == 2		
gen single = inlist(relstat,1,6,9)
* gen LAT = inlist(relstat,2,5,7,10)
* gen inschool = isced == 0
gen edu_low_enr = inlist(enrol,1,2,4,5)			// incl. Gesamtschule + Förders
gen edu_hi_enr = inlist(enrol,3, 13, 14)
gen edu_voctr_enr = inlist(enrol, 11, 12)
gen edu_oth = inlist(enrol, 0, .g, 6, 7, 9, 15, 16) // + not enrolled (2,7%)
tab edu_oth

gen migr = inlist(migstatus,2,3) 				// 1. + 2. Generation
lab var hhsizemrd "HH_size"
		
global sociodemo ///							
	women age_17plus single edu_hi_enr edu_oth edu_voctr_enr migr  
	// m_abi p_abi
	
tab1 $sociodemo, m


** Geographical context
gen urban = inlist(gkpol,4,7)				// Municipality > 20.000 inhab
lab var east "East"

global geogr urban east
tab1 $geogr, m

** Experiences with COVID-19	- no information

** Situational factors			
	// Economic Deprivation Household		 ecodep2_hh

global sit ecodep2_hh 
tab1 $sit, m

** Personal charcteristics 1
	// trust 								trust		
	// depressiveness 						depressive
	// conduct problems 					conduct
	// emotional symptoms 					emotion
	// prosocial behavior 					prosoc
	// hyperactivity 						hyper
	// selfesteem							esteem
	// peer problems 						 peerprob
	
gen mhealth = mcs
gen phealth = pcs	
	
	// mhealth esteem	wegen kollinearität raus
	
global pers phealth depressive trust conduct prosoc hyper peerprob
** Personal charcteristics 2 - stronger personality
	// neuroticism 							neurot			Big Five
	// extraversion 						extrav
	// agreeableness 						agreeable
	// conscientiousness 					conscient
	// openness 							openness
	// machiavellianism 					machiav			Dark Triad
	// psychopathy 							psychop
	// narcism: rivalry 					riv	
	// narcism: admiration 					adm
	
	
		// riv + adm kolliniar --> narcism scale
		// psychop & machiav highly collinear --> Entscheidung für machiv
egen narc = rowmean(riv adm)
global pers2 neurot extrav agreeable conscient openness 
global pers3 machiav narc
tab1 $pers $pers2 $pers3, m

pwcorr $pers $pers2												// Hier nochmal die Variablenauswahl checken
pwcorr $pers2 $pers3

** Predictors of survey non-response only	[[PREVIOUS SURVEY - mit mode change]]
	// Total number of interviewer contacts with respondent 			intcont	
	// Interviewer's sex												intsex
	// Interviewer's age												intage
	// Interviewer's level of education									intedu
	// Interview duration in minutes									intdur	
gen intedu_low = inlist(intedu,1,2,3)
gen intsex_o = intsex != sex_gen									// colinear with intsex_o + sex
replace intdur = intdur / 10
gen intdur2 = intdur*intdur
gen intage2 = intage*intage
gen hhsizemrd2 = hhsizemrd*hhsizemrd

	// Anzahl von MISSINGS in W11 	- sehr schief - daher log 		lmiss_all
gen lmiss_all = log(miss_all)
	// Anzahl von MISSINGS in W11 sensible Fragen					miss_intim
gen lmiss_intim = log(miss_intim)	
	// W12-Teilnahme												part_w12
	// Partnerteilnahme W11											partn_part_w11
	// Cohabation Eltern											coh_parents
gen coh_parents = mmrd | fmrd
tab coh_parents mmrd


glo interv intcont intsex_o intage intage2 intedu_low intdur intdur2 ///
	hhsizemrd hhsizemrd2 lmiss_all part_w12 partn_part_w11 coh_parents
tab1 $interv, m

gen nonresp = mi(zrv)
gen resp 	= !mi(zrv)
tab nonresp

************
* Check for item nonresponse 
********************************************************************************
egen Nmiss = rowmiss($interv $geogr $sit $pers $pers2 $pers3)
tab Nmiss
for any $interv $geogr $sit $pers $pers2 $pers3: count if mi(X)

* 7 % mean imputation
foreach var of varlist $interv $geogr $sit $pers $pers2 $pers3 {
	qui count if mi(`var')
		if `r(N)' != 0 {
			loc n = `r(N)'
			qui sum `var'
			replace `var' = round(`r(mean)', 1) if mi(`var')
			di "# `var' = " _column(19)  %9.2f = (`n'*100/`r(N)') _c "% --- missings: n=" _c `n' _n
			}
	}

do "$do/02_labels.do"
save "$temp/heck-19_1.dta", replace

***********
* Reviewer question:
* Do those without Internet participate
********************************************************************************
recode lsr10i1 (1/5=1)(6 7=0), gen(internet)	// Uses internet at least at some days per week for soc networks
replace internet = 1 if inrange(lsr10i2,1,5)  	// Uses internet at least at some days per week for messenger
lab var internet "Uses internet"
tab resp internet, row

**************************************************************************************************
*** Analyses
**************************************************************************************************
use "$temp/heck-19_1.dta", clear

**
** First Step: We need a proper Selection model
********************************************************************************

* est clear
glo vars_sel $interv $sociodemo $geogr $sit $pers $pers2 $pers3
tabstat $interv , s(mean sd n)

*** Figure 1

probit resp $vars_sel  
margins , dydx($vars_sel) post
coefplot , drop(_cons) xline(0) ciopts(lcolor(gray)) mcolor(black)  ///
	groups(intcont coh_parents = `""{bf:Interview}" "{bf:situation}""' ///
			women migr 			= `""{bf:Socio-}" "{bf:demography}""'	///
			urban east ecodep2_hh = `""{bf:Context}" " ""' ///
			phealth peerprob	= "{bf:Personal}" ///
			neurot narc 		= "{bf:Personality}", angle(vertical) ) ///
	mlabel(cond(@pval<.05, "*", "")) ysize(10) xsize(8)  ///
	xtitle(Average maginal effects) mlabposition(2) mlabgap(0.5pt) /// 
	mlabcol(black) name(fig1, replace)
graph export "$temp/fig1.svg", replace	

/*
regr resp $vars_sel 
coefplot , drop(_cons) xline(0) ciopts(lcolor(blue)) mcolor(blue) mlabcol(blue)  ///
	groups(intcont coh_parents = `""{bf:Interview}" "{bf:situation}""' ///
			women migr 			= `""{bf:Socio-}" "{bf:demography}""'	///
			urban east ecodep2_hh = `""{bf:Context}" " ""' ///
			phealth peerprob	= "{bf:Personal}" ///
			neurot narc 		= "{bf:Personality}", angle(vertical) ) ///
	mlabel(cond(@pval<.05, "*", "")) ysize(10) xsize(8)  ///
	xtitle(Linear Predictions) mlabposition(2) mlabgap(0.5pt) /// 
	name(AME_linprob, replace)	

graph combine AME_logit AME_linprob,	xcomm

est clear
eststo: reg zrv $sociodemo $geogr $sit $pers $pers2 $pers3
eststo: reg resp $sociodemo $geogr $sit $pers $pers2 $pers3
coefplot (est1, drop(_cons) xline(0) ciopts(lcolor(blue)) mcolor(blue) mlabcol(blue)) ///
	(est2, drop(_cons)  ciopts(lcolor(green)) mcolor(green) mlabcol(green) ) ///			
			 ///
	 , groups(intcont coh_parents = `""{bf:Interview}" "{bf:situation}""' ///
			women migr 			= `""{bf:Socio-}" "{bf:demography}""'	///
			urban east ecodep2_hh = `""{bf:Context}" " ""' ///
			phealth peerprob	= "{bf:Personal}" ///
			neurot narc 		= "{bf:Personality}", angle(vertical) ) ///
	legend(order(2 "AV1: Survey participation" 4 "AV2: COVID-19") pos(6)) ///
	mlabel(cond(@pval<.05, "*", "")) ysize(10) xsize(8)  ///
	xtitle(Linear prediction) mlabposition(2) mlabgap(0.5pt)
*/

**
** Second Step: Heckman selection model
********************************************************************************

		global excl $interv // Exclusion Restrictions
        global vars $sociodemo 	$geogr $sit $pers $pers2 $pers3		// Prediction $sociodemo $geogr $sit $pers $pers2	
		glob i  = wordcount("$vars $excl")
		glob iv = wordcount("$vars")
		glob ie = wordcount("$excl")		
		di "all: $i  ~  vars: $iv  ~ excl: $ie "
		
    *  	Obtain full FIML estimates
        eststo: heckman zrv $vars, select($vars $excl) 
		
		*		regress zrv $vars
		mat R = r(table)
		cap drop lambda 
		for any pre* sel* name name2 miss label: cap drop X
		predict lambda, mills

		** collin $vars lambda if part != 1;		
			
	mat Pre = R[1,1..$iv] \ R[5..6,1..$iv] \ R[4,1..$iv]
	mat Pre = Pre'
	mat li Pre		
	mat Sel = R[1,$iv+2..$iv*2+1+$ie] \ R[5..6,$iv+2..$iv*2+1+$ie] \ R[4,$iv+2..$iv*2+1+$ie] 
	mat Sel = Sel'
	mat li Sel		
	
	for any sel pre: cap drop X
			*	for num 1/4: gen selX = 0
	svmat Sel, names(sel)			// selection variables	--> response
	svmat Pre, names(pre)			// prediction variables	--> complience 

	gen name = _n
	for any $vars $excl \ numlist 1 /$i: lab def name_lb Y "X", modify
	lab val name name_lb
	do "$do/03_labels_for_plot.do"
	
	gen miss = mi(pre1) 
	sum pre1
	glob boarder = -0.4 // `r(min)' - `r(sd)'
	
	di "$boarder"
	* sort id
	for any type boarder n nid: cap drop X
	gen type = mi(pre1)  & !mi(sel1) in 1/$i
	replace type = . if mi(sel1)
	gen nid = _n
	sort type sel1
	gen n = type * (_n - $iv)
	sort type n
	gen boarder = .
	
	list name sel* pre* boarder type n in 1/$i	
	foreach n of numlist 1/$ie {
		replace boarder = $boarder - (`n'* 0.4 / ($ie)) if mi(pre2)  & !mi(sel1) & _n == (`n'+$iv)
		}
	list name sel* pre* boarder type n in 1/$i		
	replace pre1 = boarder 					if mi(pre1)  & !mi(sel1)	

* Figure 3
twoway rcap sel2 sel3 pre1, vert lcolor(gs14) plotregion(margin(t=2cm)) ///
	|| rcap pre2 pre3 sel1, hor lcolor(gs14) || ///
	scatter sel1 pre1 if pre4 > 0.05 & sel4 > 0.05 , ///
				ytit("-  Likelihood of survey participation  +", color(black) size(medium)) ///
				xtit("-  Adherence COVID-19 regulations  +", color(black) size(medium)) /// 
				mcolor(gs10%70)  mlabel("")  mlabangle(35)	 mlabpos(2)  || ///
	scatter sel1 pre1 if pre4 <= 0.05 & sel4 > 0.05, mcolor(black) msymbol(oh) /// mcolor(red)
				mlabangle(35)	mlabel(label) mlabpos(2) mlabsize(medsmall) msize(medium) || 	///		
	scatter sel1 pre1 if pre4 > 0.05 & sel4 <= 0.05, mcolor(black) msize(small) /// mcolor(red)
				mlabel(label) mlabpos(2) mlabangle(35) msymbol(square) mlabsize(medsmall)|| 	///		
	scatter sel1 pre1 if pre4 <= 0.05 & sel4 <= 0.05, mcolor(black) msymbol(X) /// mcolor(blue)
				msize(medium) mlabangle(35)	mlabel(label) mlabpos(2) mlabsize(medsmall)  || 	///					
	scatter sel1 pre1 if miss & sel4 > 0.05, mcolor(gs10%70) mfcolor(white) mlab("") || ///
	scatter sel1 pre1 if miss & sel4 <= 0.05, mcolor(black) msymbol(+)  /// mcolor(red)
				legend(off) xline($boarder, lpattern(- - -) lcolor(gs2%50)) ///
				xlabel($boarder "" -0.4 " " -0.2(0.2)0.6, labsize(medium)) ///
				ylabel(,labsize(medium)) ///
				xline(0, lpattern(...) lcolor(gray)) ///
				yline(0, lpattern(...) lcolor(gray)) ///
				scale(0.68) xsize(17cm) ysize(13cm)	name(fig3, replace)
graph export "$temp/fig3.svg", replace
				
	list name label sel* pre* miss in 1/$i
		
**************************************************************************************************
* Compare corrected and uncorrected models showing the Deltas in prediction
**************************************************************************************************
**************************************************************************************************
est clear
* Only Socio-Demographics
global excl $interv $geogr $sit $pers $pers2 $pers3			// Exclusion Restrictions
global vars $sociodemo $geogr 
eststo: reg zrv $vars
eststo: heckman zrv $vars, select($vars $excl) 

coefplot (est1, drop(_cons) xline(0) ciopts(lcolor(eltgreen )) mcolor(eltgreen ) mlabcol(eltgreen )) ///
	(est2, drop(_cons)  ciopts(lcolor(green)) mcolor(green) mlabcol(green) ) ///			
			 ///
	 , groups(intcont coh_parents = `""{bf:Interview}" "{bf:situation}""' ///
			women migr 			= `""{bf:Socio-}" "{bf:demography}""'	///
			urban east ecodep2_hh = `""{bf:Context}" " ""' ///
			phealth peerprob	= "{bf:Personal}" ///
			neurot narc 		= "{bf:Personality}", angle(vertical) ) ///
	legend(order(2 "naive model" 4 "corrected model") pos(6)) ///
	mlabel(cond(@pval<.05, "*", "")) ysize(10) xsize(8)  ///
	xtitle(Linear prediction) mlabposition(2) mlabgap(0.5pt) name(GrA, replace)

* Difference test
suest est1 est2

mat B = J(10,3,.)
mat colnames B = t tp
mat rownames B = $vars

local i = 1
foreach var of varlist $vars {
	test [est1_mean]`var' = [est2_zrv]`var'
    mat B[`i',1] = r(chi2)
    mat B[`i',2] = r(p)
	loc ++i
	}

esttab matrix(B, fmt(%9,2f)),  del(;)	
	
* All predictors 
global excl $interv 			// Exclusion Restrictions
global vars $sociodemo $geogr $sit $pers $pers2 $pers3
eststo: reg zrv $sociodemo $geogr $sit $pers $pers2 $pers3
eststo: heckman zrv $vars, select($vars $excl) 

coefplot (est3, drop(_cons) xline(0) ciopts(lcolor(eltgreen )) mcolor(eltgreen ) mlabcol(eltgreen )) ///
	(est4, drop(_cons)  ciopts(lcolor(green)) mcolor(green) mlabcol(green) ) ///			
			 ///
	 , groups(intcont coh_parents = `""{bf:Interview}" "{bf:situation}""' ///
			women migr 			= `""{bf:Socio-}" "{bf:demography}""'	///
			urban east ecodep2_hh = `""{bf:Context}" " ""' ///
			phealth peerprob	= "{bf:Personal}" ///
			neurot narc 		= "{bf:Personality}", angle(vertical) ) ///
	legend(order(2 "naive model" 4 "corrected model") pos(6)) ///
	mlabel(cond(@pval<.05, "*", "")) ysize(10) xsize(8)  ///
	xtitle(Linear prediction) mlabposition(2) mlabgap(0.5pt) ///
	name(Gr1, replace)
	

* Difference test
suest est3 est4

mat C = J(36,3,.)
mat colnames C = t tp
mat rownames C = $vars

local i = 1
foreach var of varlist $vars {
	test [est3_mean]`var' = [est4_zrv]`var'
    mat C[`i',1] = r(chi2)
    mat C[`i',2] = r(p)
	loc ++i
	}

esttab matrix(C, fmt(%9,2f)),  del(;)	

estout est* , style(tab)  cells(b(star fmt(%9,2f)) ///
	se(par(`"="("' `")""') fmt(%9,2f))) title(AUTO-PREIS) stardetach delimiter(;) ///
    	replace leg numb numbers("Class " ")") dmarker(,) ///
		stats(rho sigma lambda chi2 r2 N , fmt(%9.2f) labels()) eqlab(R2, none)		
	
* Deltas in mean levels
quietly {

preserve
est clear
* Model 1
mean zrv 
eststo: qui reg zrv 

* Model 2
cap drop pr? w?
qui probit resp $sociodemo $geogr
predict pr1
generate w1 = 1 / pr1 
eststo: reg zrv [aw=w1]

* Model 3
cap drop pr? w?
qui probit resp $sociodemo $geogr $sit $pers $pers2 $pers3 
predict pr1
generate w1 = 1 / pr1 
eststo: reg zrv [aw=w1]

* Model 4
cap drop pr? w?
probit resp $sociodemo $geogr $sit $pers $pers2 $pers3 $interv
predict pr1 
generate w1 = 1 / pr1 
eststo: reg zrv [aw=w1]
}
esttab, p

*** Fig 2: "Mean levels in compliance with COVID-19 regulations")
set scheme plotplain
**# Bookmark #1
global form msize(medlarge)
coefplot (est1, $form msymbol(o) label("Uncontrolled sample mean"))  ///
		 (est2, $form msymbol(+) label("Controlled by socio-demographics")) ///
		 (est3, $form msymbol(D) label("   ... and by personal variables")) ///
 		 (est4, $form msymbol(X) label("   ... and by interview characterists") xline(0)), ///
		 name(fig2, replace) leg(pos(5) ring(0) size(medium)) ylabel("") xlabel(-.2(0.1)0.2, labsize(medium)) /// 
		 xtitle("Mean level in compliance with COVID-19 regulations (z-stand.)", size(medium)) 
graph export "$temp/fig2.svg", replace


**
** Appendix: We need a proper Selection model
********************************************************************************

** Twostep Heckman by hand
probit resp $excl $sociodemo 

cap drop probitxb pdf cdf imr pr_incl
predict probitxb
gen pdf = normalden(probitxb)
gen cdf = normal(probitxb)
gen imr = pdf/cdf
gen pr_incl = 1 / probitxb
replace pr_incl = 1 /(1 - probitxb) if resp == 0

sum imr
sum imr if resp == 1

reg zrv $sociodemo imr
heckman zrv $sociodemo , select($sociodemo  $excl)

eststo: reg zrv [aweight=pr_incl]


esttab



heckman zrv $sociodemo , select($sociodemo  $excl) twostep first
/*
preserve
recode resp nonresp (1=.)
gsem (zrv <- cons L) ///
	 (resp <- $excl L@1, /// 
	 family(gaussian, udepvar(nonresp))), var(L@1 e.zrv@a e.resp@a)

eststo: heckman zrv cons, select($excl) 
*/	
	

		