clear all cap log close ******************************************************************************** **# 0) Set-up ****************************************************************** ******************************************************************************** *** define paths cd "" global datadir ="" global dodir ="" global logdir ="" global resultsdir ="" *** load dataset (available from the Gesis archive under https://doi.org/10.7802/2652) use ${datadir}\SDCCP1_v1-0-0.dta, clear ******************************************************************************** **# 1) Preparing variables / dataset for analysis ****************************** ******************************************************************************** *** basic variables ************************************************************ *** mode egen modecount = anycount(v47_1 v47_2 v47_3 v47_4), values(1 2 3 4) gen mode=. replace mode=1 if v47_1==1 & v47_2==0 & v47_3==0 & v47_4==0 replace mode=2 if v47_1==0 & v47_2==1 & v47_3==0 & v47_4==0 replace mode=3 if v47_1==0 & v47_2==0 & v47_3==1 & v47_4==0 replace mode=4 if v47_1==0 & v47_2==0 & v47_3==0 & v47_4==1 replace mode=5 if modecount>1 recode mode (. = -9) label variable mode "Survey mode" label define mode 1 "CAPI" 2 "CATI" 3 "CAWI" 4 "PAPI" 5 "mixed-mode" -9 "no information", replace label value mode mode numlabel mode, add tab mode, m drop modecount *** nonprob gen nonprob = v28 == 2 if v28 != -9 recode nonprob (. = -9) label variable nonprob "survey is nonprob." label value nonprob yesno tab nonprob, m *** field_start_my gen field_start_my = ym(v63,v62) recode field_start_my (720/721 = 722) //3 surveys started before March 2020 (special cases), fieldwork start date recoded to March for the analysis *labels #delimit ; label define months 722 "Mar. 20" 723 "Apr. 20" 724 "May 20" 725 "June 20" 726 "July 20" 727 "Aug. 20" 728 "Sept. 20" 729 "Oct. 20" 730 "Nov. 20" 731 "Dec. 20" 732 "Jan. 21" 733 "Feb. 21" 734 "Mar. 21" 735 "Apr. 21" 736 "May 21" 737 "June 21" 738 "July 21" 739 "Aug. 21" 740 "Sept. 21" 741 "Oct. 21" 742 "Nov. 21" 743 "Dec. 21" ; #delimit cr label values field_start_my months tab field_start_my, m *** field_start_my_num - numbered variable with 03/20 = 1 gen field_start_my_num = field_start_my - 721 label values field_start_my_num months tab field_start_my_num, m *** number of fieldwork starts per month bysort field_start_my: egen fsmy_count=count(rowid) label variable fsmy_count "Total" *** survey design dimension **************************************************** *** desgroups - combinations of survey design (sampling (prob./nonprob.) + mode) gen desgroups =. replace desgroups = 1 if nonprob == 0 & mode==2 replace desgroups = 2 if nonprob == 0 & mode==3 replace desgroups = 3 if nonprob == 0 & mode==5 replace desgroups = 4 if nonprob == 0 & mode==1 | nonprob == 0 & mode==4 replace desgroups = 5 if nonprob == 1 & mode==3 replace desgroups = 6 if nonprob == 1 & mode==5 label define desgroups_lbl 1 "Prob./CATI" 2 "Prob./CAWI" 3 "Prob./mixed" 4 "Prob./CAPI|PAPI" 5 "Nonprob./CAWI" 6 "Nonprob./mixed" label values desgroups desgroups_lbl numlabel desgroups_lbl, add tab desgroups, m *** interpretability dimension ************************************************* *** v25_int (information on the target population) gen v25_int=0 if regexm(v25, "-9") == 1 replace v25_int=1 if v25_int!=0 replace v25_int=1 if rowid==46 | rowid==47 /* manual correction */ tab v25_int *** v39_int (information on the concrete sampling procedure) gen v39_int=0 if v39_1==-9 /* missing information on the concrete sampling procedure means -9 in v39_1 (and other categories v39_* as well) */ replace v39_int=1 if v39_int!=0 tab v39_int v39_1 *** v68_int (information on the sample size) gen v68_int=0 if v68==-9 replace v68_int=1 if v68_int!=0 tab v68 v68_int *** field_full_int (information on the date of data collection) gen field_full_int=0 if v61==-9 | v62==-9 | v63==-9 | v64==-9 | v65==-9 | v66==-9 replace field_full_int=1 if field_full_int!=0 tab field_full_int *** anyrate_int (information on any outcome rate (0 if no rates coded at all)) gen anyrate_int=. replace anyrate_int=1 if v70>0 | v72>0 | v74>0 | v76>0 | v78>0 replace anyrate_int=0 if v70<0 & v72<0 & v74<0 & v76<0 & v78<0 tab anyrate_int, m *** full interpretability index (5 variables) gen ind_int=. replace ind_int = v25_int+v39_int+v68_+field_full_int+anyrate_int label variable ind_int "Interpretability" tab ind_int *** restricted interpretability index (4 variables without any_rate, for robustness checks) gen ind_int_4=. replace ind_int_4 = v25_int+v39_int+v68_+field_full_int label variable ind_int_4 "Interpretability - without outcome rate" tab ind_int_4 *** accessibility dimension **************************************************** *** first results available gen results_avail = v93 == 1 *** month/year of first results publication gen results_my = ym(v96,v95) tab results_my, m *** time elapsed until results publication (in months); not published until coding = 999 gen results_timeelapse = results_my - field_start_my replace results_timeelapse = 999 if results_avail == 0 tab results_timeelapse *** data available gen data_avail = v86 == 1 *** month/year of first data publication gen data_my = ym(v92,v91) tab data_my, m *** time elapsed until data publication (in months); not published until coding = 999 gen data_timeelapse = data_my - field_start_my replace data_timeelapse = 999 if data_avail == 0 tab data_timeelapse *** drop missings and save dataset ********************************************* * observations with missing information on at least one of the following variables excluded: fieldwork start date, sampling procedure (prob.x nonprob.), mode drop if field_start_my==. /* 4 obs. dropped */ drop if desgroups==. | desgroups==6 /* 27 obs. dropped */ *** save the dataset for analysis save ${datadir}\SDCCP_AP1_analytical.dta, replace ******************************************************************************** **# 2) Building of further (EHA) datasets (accessibility of first results / data) ******************************************************************************** *** load and snapshot the analytical dataset *********************************** use ${datadir}\SDCCP_AP1_analytical.dta snapshot erase _all snapshot save, label(SDCCP_AP1_analytical) ******************************************************************************** **# 2a) Building the EHA datasets for assessing accessibility of results******** ******************************************************************************** * EHA = event history analysis *** Restrict dataset to useable observations *********************************** keep if results_my < . | results_avail == 0 //keep if A) date of results publication is known or B) no results were available at all. tab v1 //N = 483 *** snapshot of this datafile ************************************************** snapshot save, label(analytical_access_results) *** Building the EHA dataset *************************************************** *** independent variables - Appendix Table A1 tabstat (field_start_my_num), statistics(n mean sd) tab desgroups *** Expand dataset (i.e. creation of a discrete event history dataset format)*** gen last_obs = 760 gen last_obs_str = string(last_obs, "%tm") //data right-cesonderd from Mai 2023 onwards gen mdate_diff = last_obs - field_start_my * Duplicate each observation with the number of times stored in mdate_diff replace mdate_diff = mdate_diff + 1 expand mdate_diff, generate(n_expanded) sort rowid mdate_diff * Creating a variable that counts through the number of lines per ID (rowid) bysort rowid: gen seq_number = _n * same variable minus 1 (as this variable is used to compute the monthly date-variable in a next step) replace seq_number = seq_number - 1 * computation of monthly date-variable gen mdate = field_start_my + seq_number *> now every survey's first row represents the month of fieldwork start and it's last row represents the month 760 (i.e. Mai 2023) *compute event indicator gen event = results_my == mdate *censoring after event bysort rowid (mdate): gen helpvar1 = sum(event) bysort rowid (mdate): gen helpvar2 = sum(helpvar1) tab n_expanded if helpvar2 >=2 drop if helpvar2 >=2 drop helpvar1 helpvar2 *** Creating Additional Variables ********************************************** *** month since fieldwork start (i.e. process time) *month since fieldwork start - Various specifications gen msfs = seq_number //basic gen msfs_log = log(msfs+1) //log of *mean-centering of msfs summ msfs gen msfs_c = msfs - `r(mean)' *Dummy coding gen msfs_k1 = msfs recode msfs_k1 (24/99=24) *** month of fieldwork start (i.e. period / calendar-time indicator) *Transformation of field_start_my (1 = March 2020) replace field_start_my = field_start_my - 721 *mean-centering of field_start_my summ field_start_my gen fs_myc = field_start_my - `r(mean)' *** snapshot of this datafile ************************************************** snapshot save, label(EHA_access_results) ******************************************************************************** **# 2b) Building the EHA datasets for assessing accessibility of data*********** ******************************************************************************** snapshot restore 1 *** Restrict dataset to useable observations *********************************** keep if data_my < . | data_avail == 0 //keep if A) date of data publication is known or B) no data were available at all. tab v1 //N = 590 *** snapshot of this datafile ************************************************** snapshot save, label(analytical_access_data) *** Building the EHA dataset *************************************************** *** independent variables - Appendix Table A1 tabstat (field_start_my_num), statistics(n mean sd) tab desgroups *** Expand dataset (i.e. creation of a discrete event history dataset format)*** gen last_obs = 760 gen last_obs_str = string(last_obs, "%tm") //data right-cesonderd from Mai 2023 onwards gen mdate_diff = last_obs - field_start_my * Duplicate each observation with the number of times stored in mdate_diff replace mdate_diff = mdate_diff + 1 expand mdate_diff, generate(n_expanded) sort rowid mdate_diff * Creating a variable that counts through the number of lines per ID (rowid) bysort rowid: gen seq_number = _n * same variable minus 1 (as this variable is used to compute the monthly date-variable in a next step) replace seq_number = seq_number - 1 * computation of monthly date-variable gen mdate = field_start_my + seq_number *> now every survey's first row represents the month of fieldwork start and it's last row represents the month 760 (i.e. Mai 2023) *compute event indicator gen event = data_my == mdate *censoring after event bysort rowid (mdate): gen helpvar1 = sum(event) bysort rowid (mdate): gen helpvar2 = sum(helpvar1) tab n_expanded if helpvar2 >=2 drop if helpvar2 >=2 drop helpvar1 helpvar2 *** Creating Additional Variables ********************************************** *** month since fieldwork start (i.e. process time) *month since fieldwork start - Various specifications gen msfs = seq_number //basic gen msfs_log = log(msfs+1) //log of *mean-centering of msfs summ msfs gen msfs_c = msfs - `r(mean)' *Dummy coding gen msfs_k1 = msfs recode msfs_k1 (24/999=24) *** month of fieldwork start (i.e. period / calendar-time indicator) *Transformation of field_start_my (1 = March 2020) replace field_start_my = field_start_my - 721 *mean-centering of field_start_my summ field_start_my gen fs_myc = field_start_my - `r(mean)' *** snapshot of this datafile ************************************************** snapshot save, label(EHA_access_data) ******************************************************************************** **# 2c) Overview snapshots ******************************************************************************** snapshot list _all ******************************************************************************** **# 2d) Dataset description / overview ******************************************************************************** *** independent variables - Appendix Table A1 *DS for survey design and interpretability snapshot restore 1 tabstat (field_start_my_num), statistics(n mean sd) tab desgroups *DS for accessibility first results snapshot restore 2 tabstat (field_start_my_num), statistics(n mean sd) tab desgroups *DS for accessibility data snapshot restore 4 tabstat (field_start_my_num), statistics(n mean sd) tab desgroups *** describe the main analytical sample - survey_per_program preserve snapshot restore 1 sort rowid v2 egen tag = tag(v2) // Tags only the first occurrence of each unique value of v2 count if tag == 1 // Counts the unique values in v2 - 183 bysort v2: gen survey_per_program = _N if _n == 1 tab survey_per_program sum survey_per_program, detail restore ******************************************************************************** **# 3) Results - Survey quality during the pandemic **************************** ******************************************************************************** snapshot restore 1 *** survey design ************************************************************** tab nonprob tab desgroups *** interpretability *********************************************************** sum ind_int, detail tab ind_int **# Figure 1 - Accessibility of results and data after fieldwork start ********* /*Notes: - Computation of fitted survival probability (surv_v1) based on fitted hazard (haz_v1) as described in: Singer, J. D., & Willett, J. B. (2003). Applied longitudinal data analysis: Modeling change and event occurrence. Oxford university press, from there: Chapter 12: Extending the Discrete-time Hazard Model | Stata Textbook Examples (Syntax for Figure 12.1) - cumulative probability of event occurrence (kumprob) is computed as the inverse of surv_v1 1-surv_v1 an plotted in Figure 1 - The cumulative probability of event occurrence is also known as the "cumulative distribution function" */ *** Figure 1 - results by desgroups ******************************************** *** number of results pulications per design group per month snapshot restore 3 sort msfs desgroups by msfs desgroups: egen num_event=count(event) if event==1 table msfs desgroups, stat(count num_event) *** overall snapshot restore 3 *Compute (and plot) the fitted hazard rate capture drop ha* capture drop sur* sort msfs by msfs: egen haz_v1 = mean(event) *Compute (and plot) the fitted survival probability sort rowid msfs order rowid, before (msfs) gen surv_v1 = 1 replace surv_v1 = (1-haz_v1)*surv_v1 if msfs == 0 replace surv_v1 = (1-haz_v1)*surv_v1[_n-1] if msfs > 0 sort msfs *Compute and plot the cumulative probability of event occurence (shown in Figure 1) gen kumprob = 1-surv_v1 sort msfs gen h1 = msfs != msfs[_n-1] keep if h1 == 1 drop h1 keep msfs kumprob rename kumprob kumprob_results_0 save ${datadir}\kp_results_desgroups.dta, replace *** desgroups==1 snapshot restore 3 keep if desgroups==1 *Compute (and plot) the fitted hazard rate capture drop ha* capture drop sur* sort msfs by msfs: egen haz_v1 = mean(event) *Compute (and plot) the fitted survival probability sort rowid msfs order rowid, before (msfs) gen surv_v1 = 1 replace surv_v1 = (1-haz_v1)*surv_v1 if msfs == 0 replace surv_v1 = (1-haz_v1)*surv_v1[_n-1] if msfs > 0 sort msfs *Compute and plot the cumulative probability of event occurence (shown in Figure 1) gen kumprob = 1-surv_v1 sort msfs gen h1 = msfs != msfs[_n-1] keep if h1 == 1 drop h1 keep msfs kumprob rename kumprob kumprob_results_1 merge 1:1 msfs using ${datadir}\kp_results_desgroups.dta drop _merge save ${datadir}\kp_results_desgroups.dta, replace *** desgroups==2 snapshot restore 3 keep if desgroups==2 *Compute (and plot) the fitted hazard rate capture drop ha* capture drop sur* sort msfs by msfs: egen haz_v1 = mean(event) *Compute (and plot) the fitted survival probability sort rowid msfs order rowid, before (msfs) gen surv_v1 = 1 replace surv_v1 = (1-haz_v1)*surv_v1 if msfs == 0 replace surv_v1 = (1-haz_v1)*surv_v1[_n-1] if msfs > 0 sort msfs *Compute and plot the cumulative probability of event occurence (shown in Figure 1) gen kumprob = 1-surv_v1 sort msfs gen h1 = msfs != msfs[_n-1] keep if h1 == 1 drop h1 keep msfs kumprob rename kumprob kumprob_results_2 merge 1:1 msfs using ${datadir}\kp_results_desgroups.dta drop _merge save ${datadir}\kp_results_desgroups.dta, replace *** desgroups==3 snapshot restore 3 keep if desgroups==3 *Compute (and plot) the fitted hazard rate capture drop ha* capture drop sur* sort msfs by msfs: egen haz_v1 = mean(event) *Compute (and plot) the fitted survival probability sort rowid msfs order rowid, before (msfs) gen surv_v1 = 1 replace surv_v1 = (1-haz_v1)*surv_v1 if msfs == 0 replace surv_v1 = (1-haz_v1)*surv_v1[_n-1] if msfs > 0 sort msfs *Compute and plot the cumulative probability of event occurence (shown in Figure 1) gen kumprob = 1-surv_v1 sort msfs gen h1 = msfs != msfs[_n-1] keep if h1 == 1 drop h1 keep msfs kumprob rename kumprob kumprob_results_3 merge 1:1 msfs using ${datadir}\kp_results_desgroups.dta drop _merge save ${datadir}\kp_results_desgroups.dta, replace *** desgroups==4 snapshot restore 3 keep if desgroups==4 *Compute (and plot) the fitted hazard rate capture drop ha* capture drop sur* sort msfs by msfs: egen haz_v1 = mean(event) *Compute (and plot) the fitted survival probability sort rowid msfs order rowid, before (msfs) gen surv_v1 = 1 replace surv_v1 = (1-haz_v1)*surv_v1 if msfs == 0 replace surv_v1 = (1-haz_v1)*surv_v1[_n-1] if msfs > 0 sort msfs *Compute and plot the cumulative probability of event occurence (shown in Figure 1) gen kumprob = 1-surv_v1 sort msfs gen h1 = msfs != msfs[_n-1] keep if h1 == 1 drop h1 keep msfs kumprob rename kumprob kumprob_results_4 merge 1:1 msfs using ${datadir}\kp_results_desgroups.dta drop _merge save ${datadir}\kp_results_desgroups.dta, replace *** desgroups==5 snapshot restore 3 keep if desgroups==5 *Compute (and plot) the fitted hazard rate capture drop ha* capture drop sur* sort msfs by msfs: egen haz_v1 = mean(event) *Compute (and plot) the fitted survival probability sort rowid msfs order rowid, before (msfs) gen surv_v1 = 1 replace surv_v1 = (1-haz_v1)*surv_v1 if msfs == 0 replace surv_v1 = (1-haz_v1)*surv_v1[_n-1] if msfs > 0 sort msfs *Compute and plot the cumulative probability of event occurence (shown in Figure 1) gen kumprob = 1-surv_v1 sort msfs gen h1 = msfs != msfs[_n-1] keep if h1 == 1 drop h1 keep msfs kumprob rename kumprob kumprob_results_5 merge 1:1 msfs using ${datadir}\kp_results_desgroups.dta drop _merge save ${datadir}\kp_results_desgroups.dta, replace *** figure use ${datadir}\kp_results_desgroups.dta graph twoway (line kumprob_results_0 msfs, lpattern(solid) lwidth (medthick) color(black)) /// (line kumprob_results_1 msfs, lpattern(solid) lwidth (medthick) color(navy)) /// (line kumprob_results_2 msfs, lpattern(solid) lwidth (medthick) color(purple)) /// (line kumprob_results_3 msfs, lpattern(solid) lwidth (medthick) color(gold)) /// (line kumprob_results_4 msfs, lpattern(solid) lwidth (medthick) color(green)) /// (line kumprob_results_5 msfs, lpattern(solid) lwidth (medthick) color(orange)), /// xtitle("Months since fieldwork start") /// ytitle("Cumulative probability") /// xtick(0(1)38) /// xlabel(0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38, valuelabel labsize(small)) /// ylabel(0.1(0.1)1.0, grid format(%9.2f) gmin angle(horizontal)) /// scheme(s1mono) /// legend(lab(1 "Total") lab(2 "Prob./CATI") lab(3 "Prob./CAWI") lab(4 "Prob./mixed") lab(5 "Prob./CAPI|PAPI") /// lab(6 "Nonprob./CAWI") pos(6) ring(1) region(lstyle(none))) /// title("a) Accessibility of results (N = 483)") saving(figure1_results_bydesgroups_with_total) graph export ${resultsdir}\figure1_results_bydesgroups_with_total.emf, replace graph export ${resultsdir}\figure1_results_bydesgroups_with_total.eps, replace *** Figure 1 - data by desgroups *********************************************** *** overall snapshot restore 5 *Compute (and plot) the fitted hazard rate capture drop ha* capture drop sur* sort msfs by msfs: egen haz_v1 = mean(event) *Compute (and plot) the fitted survival probability sort rowid msfs order rowid, before (msfs) gen surv_v1 = 1 replace surv_v1 = (1-haz_v1)*surv_v1 if msfs == 0 replace surv_v1 = (1-haz_v1)*surv_v1[_n-1] if msfs > 0 sort msfs *Compute and plot the cumulative probability of event occurence (shown in Figure 1) gen kumprob = 1-surv_v1 sort msfs gen h1 = msfs != msfs[_n-1] keep if h1 == 1 drop h1 keep msfs kumprob rename kumprob kumprob_data_0 save ${datadir}\kp_data_desgroups.dta, replace *** desgroups==1 snapshot restore 5 keep if desgroups==1 *Compute (and plot) the fitted hazard rate capture drop ha* capture drop sur* sort msfs by msfs: egen haz_v1 = mean(event) *Compute (and plot) the fitted survival probability sort rowid msfs order rowid, before (msfs) gen surv_v1 = 1 replace surv_v1 = (1-haz_v1)*surv_v1 if msfs == 0 replace surv_v1 = (1-haz_v1)*surv_v1[_n-1] if msfs > 0 sort msfs *Compute and plot the cumulative probability of event occurence (shown in Figure 1) gen kumprob = 1-surv_v1 sort msfs gen h1 = msfs != msfs[_n-1] keep if h1 == 1 drop h1 keep msfs kumprob rename kumprob kumprob_data_1 merge 1:1 msfs using ${datadir}\kp_data_desgroups.dta drop _merge save ${datadir}\kp_data_desgroups.dta, replace *** desgroups==2 snapshot restore 5 keep if desgroups==2 *Compute (and plot) the fitted hazard rate capture drop ha* capture drop sur* sort msfs by msfs: egen haz_v1 = mean(event) *Compute (and plot) the fitted survival probability sort rowid msfs order rowid, before (msfs) gen surv_v1 = 1 replace surv_v1 = (1-haz_v1)*surv_v1 if msfs == 0 replace surv_v1 = (1-haz_v1)*surv_v1[_n-1] if msfs > 0 sort msfs *Compute and plot the cumulative probability of event occurence (shown in Figure 1) gen kumprob = 1-surv_v1 sort msfs gen h1 = msfs != msfs[_n-1] keep if h1 == 1 drop h1 keep msfs kumprob rename kumprob kumprob_data_2 merge 1:1 msfs using ${datadir}\kp_data_desgroups.dta drop _merge save ${datadir}\kp_data_desgroups.dta, replace *** desgroups==3 snapshot restore 5 keep if desgroups==3 *Compute (and plot) the fitted hazard rate capture drop ha* capture drop sur* sort msfs by msfs: egen haz_v1 = mean(event) *Compute (and plot) the fitted survival probability sort rowid msfs order rowid, before (msfs) gen surv_v1 = 1 replace surv_v1 = (1-haz_v1)*surv_v1 if msfs == 0 replace surv_v1 = (1-haz_v1)*surv_v1[_n-1] if msfs > 0 sort msfs *Compute and plot the cumulative probability of event occurence (shown in Figure 1) gen kumprob = 1-surv_v1 sort msfs gen h1 = msfs != msfs[_n-1] keep if h1 == 1 drop h1 keep msfs kumprob rename kumprob kumprob_data_3 merge 1:1 msfs using ${datadir}\kp_data_desgroups.dta drop _merge save ${datadir}\kp_data_desgroups.dta, replace *** desgroups==4 snapshot restore 5 keep if desgroups==4 *Compute (and plot) the fitted hazard rate capture drop ha* capture drop sur* sort msfs by msfs: egen haz_v1 = mean(event) *Compute (and plot) the fitted survival probability sort rowid msfs order rowid, before (msfs) gen surv_v1 = 1 replace surv_v1 = (1-haz_v1)*surv_v1 if msfs == 0 replace surv_v1 = (1-haz_v1)*surv_v1[_n-1] if msfs > 0 sort msfs *Compute and plot the cumulative probability of event occurence (shown in Figure 1) gen kumprob = 1-surv_v1 sort msfs gen h1 = msfs != msfs[_n-1] keep if h1 == 1 drop h1 keep msfs kumprob rename kumprob kumprob_data_4 merge 1:1 msfs using ${datadir}\kp_data_desgroups.dta drop _merge save ${datadir}\kp_data_desgroups.dta, replace *** desgroups==5 snapshot restore 5 keep if desgroups==5 *Compute (and plot) the fitted hazard rate capture drop ha* capture drop sur* sort msfs by msfs: egen haz_v1 = mean(event) *Compute (and plot) the fitted survival probability sort rowid msfs order rowid, before (msfs) gen surv_v1 = 1 replace surv_v1 = (1-haz_v1)*surv_v1 if msfs == 0 replace surv_v1 = (1-haz_v1)*surv_v1[_n-1] if msfs > 0 sort msfs *Compute and plot the cumulative probability of event occurence (shown in Figure 1) gen kumprob = 1-surv_v1 sort msfs gen h1 = msfs != msfs[_n-1] keep if h1 == 1 drop h1 keep msfs kumprob rename kumprob kumprob_data_5 merge 1:1 msfs using ${datadir}\kp_data_desgroups.dta drop _merge save ${datadir}\kp_data_desgroups.dta, replace *** figure use ${datadir}\kp_data_desgroups.dta graph twoway (line kumprob_data_0 msfs, lpattern(solid) lwidth (medthick) color(black)) /// (line kumprob_data_1 msfs, lpattern(solid) lwidth (medthick) color(navy)) /// (line kumprob_data_2 msfs, lpattern(solid) lwidth (medthick) color(purple)) /// (line kumprob_data_3 msfs, lpattern(solid) lwidth (medthick) color(gold)) /// (line kumprob_data_4 msfs, lpattern(solid) lwidth (medthick) color(green)) /// (line kumprob_data_5 msfs, lpattern(solid) lwidth (medthick) color(orange)), /// xtitle("Months since fieldwork start") /// ytitle("Cumulative probability") /// xtick(0(1)38) /// xlabel(0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38, valuelabel labsize(small)) /// ylabel(0.1(0.1)1.0, grid format(%9.2f) gmin angle(horizontal)) /// scheme(s1mono) /// legend(lab(1 "Total") lab(2 "Prob./CATI") lab(3 "Prob./CAWI") lab(4 "Prob./mixed") lab(5 "Prob./CAPI|PAPI") /// lab(6 "Nonprob./CAWI") pos(6) ring(1) region(lstyle(none))) /// title("b) Accessibility of data (N = 590)") saving(figure1_data_bydesgroups_with_total) graph export ${resultsdir}\figure1_data_bydesgroups_with_total.emf, replace graph export ${resultsdir}\figure1_data_bydesgroups_with_total.eps, replace *** Figure 1 - combined - results and data by desgroups ************************ graph combine figure1_results_bydesgroups_with_total.gph figure1_data_bydesgroups_with_total.gph, scale(0.8) graph export ${resultsdir}\figure1_combo.emf, replace graph export ${resultsdir}\figure1_combo.eps, replace **# Figure 2 - Number of surveys over time ************************************* snapshot restore 1 preserve levelsof desgroups, local(groups) levelsof field_start_my, local(months) foreach group in `groups' { gen fsmy_count_`group' = 0 label variable fsmy_count_`group' "Count for `group'" foreach month in `months' { quietly count if desgroups == `group' & field_start_my == `month' replace fsmy_count_`group' = r(N) if field_start_my == `month' } } twoway (line fsmy_count field_start_my, lwidth(medthick) color(black)) /// (line fsmy_count_1 field_start_my, lwidth(medthick) color(navy)) /// (line fsmy_count_2 field_start_my, lwidth(medthick) color(purple)) /// (line fsmy_count_3 field_start_my, lwidth(medthick) color(gold)) /// (line fsmy_count_4 field_start_my, lwidth(medthick) color(green)) /// (line fsmy_count_5 field_start_my, lwidth(medthick) color(orange)), /// xlabel(722(3)743, valuelabel angle(45)) scheme(s1mono) /// xtitle("Month of fieldwork start") ytitle("") ylabel(0(10)80, angle(0)) /// legend(lab(1 "Total") lab(2 "Prob./CATI") lab(3 "Prob./CAWI") lab(4 "Prob./mixed") /// lab(5 "Prob./CAPI|PAPI") lab(6 "Nonprob./CAWI") pos(6) ring(1) row(2) region(lstyle(none))) graph export ${resultsdir}\figure2.emf, replace graph export ${resultsdir}\figure2.eps, replace restore **# Figure 3 - Quality of social science surveys over time ********************* snapshot restore 1 *** survey quality over time - survey design *** *** additional variables for the figure tab desgroups, gen(desgroup) bysort field_start_my: egen desgroup1_count= sum(desgroup1) bysort field_start_my: egen desgroup2_count= sum(desgroup2) bysort field_start_my: egen desgroup3_count= sum(desgroup3) bysort field_start_my: egen desgroup4_count= sum(desgroup4) bysort field_start_my: egen desgroup5_count= sum(desgroup5) label variable desgroup1_count "Prob./CATI" label variable desgroup2_count "Prob./CAWI" label variable desgroup3_count "Prob./mixed" label variable desgroup4_count "Prob./CAPI|PAPI" label variable desgroup5_count "Non-prob./CAWI" gen desgroup_total=. replace desgroup_total=desgroup1_count+desgroup2_count+desgroup3_count+desgroup4_count+desgroup5_count gen desgroup1_pct=desgroup1_count/desgroup_total gen desgroup2_pct=desgroup2_count/desgroup_total gen desgroup3_pct=desgroup3_count/desgroup_total gen desgroup4_pct=desgroup4_count/desgroup_total gen desgroup5_pct=desgroup5_count/desgroup_total gen zero = 0 gen pct1=desgroup1_count/desgroup_total gen pct2=(desgroup1_count+desgroup2_count)/desgroup_total gen pct3=(desgroup1_count+desgroup2_count+desgroup3_count)/desgroup_total gen pct4=(desgroup1_count+desgroup2_count+desgroup3_count+desgroup4_count)/desgroup_total gen pct5= 1 *** figure 3a)- distribution of survey designs over time twoway (rarea zero pct1 field_start_my, fcolor (navy) lcolor(black) lwidth(vthin)) /// (rarea pct1 pct2 field_start_my, fcolor (purple) lcolor(black) lwidth(vthin)) /// (rarea pct2 pct3 field_start_my, fcolor (gold) lcolor(black) lwidth(vthin)) /// (rarea pct3 pct4 field_start_my, fcolor (green) lcolor(black) lwidth(vthin)) /// (rarea pct4 pct5 field_start_my, fcolor (orange) lcolor(black) lwidth(vthin)), /// legend(order (1 "Prob./CATI" 2 "Prob./CAWI" 3 "Prob./mixed" 4 "Prob./CAPI|PAPI" 5 "Non-prob./CAWI") position(6) rows(1) stack /// region(lstyle(foreground)) size(small)) /// graphregion(fcolor(white)) /// xlabel(722(3)743, valuelabel) xtitle("Month of fieldwork start") ytitle("Relative frequency (%)") /// title("a) Survey design distribution (N = 686)") /// saving(fig3a) graph export ${resultsdir}\figure3a.emf, replace *** survey quality over time - interpretability *** *** additional variables for the figure sort field_start_my by field_start_my: egen mind_int = mean(ind_int) if field_start_my <. *** figure 3b)- mean interpretability over time twoway (line mind_int field_start_my, cmissing(no) lcolor(gs10) lpattern(solid)) /// (lowess mind_int field_start_my, cmissing(no) lcolor(gs0) lpattern(solid)), /// xtitle("Month of fieldwork start") /// ytitle("Mean interpretability") /// xlabel(722(3)743, valuelabel nogrid) /// ylabel(3.4(0.1)4.1, grid format(%9.1f) gmin angle(horizontal)) /// legend(lab(1 "observed") lab(2 "smoothed (LOWESS)") position(6) rows(1) region(lstyle(foreground))) /// graphregion(fcolor(white)) /// title("b) Interpretability (N = 686)") /// saving(fig3b) graph export ${resultsdir}\figure3b.emf, replace *** survey quality over time - accessibility of results *** snapshot restore 2 *indicator: results publication within 2 months after fieldwork start gen results_2m = results_timeelapse <= 2 sort field_start_my by field_start_my: egen mresults_2m = mean(results_2m) tab field_start_my results_2m, row *** figure 3c)- accessibility of results (proportion of surveys with results publication within 2 months after fieldwork start) over time twoway (line mresults_2m field_start_my, cmissing(no) lcolor(gs10) lpattern(solid)) /// (lowess mresults_2m field_start_my, cmissing(no) lcolor(gs0) lpattern(solid)), /// xtitle("Month of fieldwork start") /// ytitle("Relative frequency (%)") /// xlabel(722(3)743, valuelabel nogrid) /// ylabel(0.2(0.1)0.7, grid format(%9.1f) gmin angle(horizontal)) /// legend(lab(1 "observed") lab(2 "smoothed (LOWESS)") position (6) rows(1) region(lstyle(foreground))) /// graphregion(fcolor(white)) /// title("c) Accessibility of results (N = 483)") /// saving(fig3c) graph export ${resultsdir}\figure3c.emf, replace *** survey quality over time - accessibility of data *** snapshot restore 4 *indicator: data publication within 12 months after fieldwork start gen data_12m = data_timeelapse <= 12 sort field_start_my by field_start_my: egen mdata_12m = mean(data_12m) tab field_start_my data_12m, row *** figure 3d)- accessibility of data (proportion of surveys with data publication within 12 months after fieldwork start) over time capture erase fig3d.gph twoway (line mdata_12m field_start_my, cmissing(no) lcolor(gs10) lpattern(solid)) /// (lowess mdata_12m field_start_my, cmissing(no) lcolor(gs0) lpattern(solid)), /// xtitle("Month of fieldwork start") /// ytitle("Relative frequency (%)") /// xlabel(722(3)743, valuelabel nogrid) /// ylabel(0.2(0.1)0.7, grid format(%9.1f) gmin angle(horizontal)) /// legend(lab(1 "observed") lab(2 "smoothed (LOWESS)") position (6) rows(1) region(lstyle(foreground))) /// graphregion(fcolor(white)) /// title("d) Accessibility of data (N = 590)") /// saving(fig3d) graph export ${resultsdir}\figure3d.emf, replace ***Combine graph *** graph combine fig3a.gph fig3b.gph fig3c.gph fig3d.gph, scale(0.8) graph export ${resultsdir}\figure3_combo.emf, replace graph export ${resultsdir}\figure3.eps, replace ******************************************************************************** **# 4) Results - Associations between quality dimensions *********************** ******************************************************************************** **# 4.1) interpretability ***************************************************** snapshot restore 1 *** model 1 - Table 2 mixed ind_int c.field_start_my##c.field_start_my || v2:, vce(cluster v2) estat icc estat ic mat s=r(S) local ll = s[1,3] // Log-Likelihood local aic = s[1,5] // AIC local bic = s[1,6] // BIC display "Log-Likelihood: `ll'" display "AIC: `aic'" display "BIC: `bic'" matrix b = e(b) local id_var_col = colnumb(b, "lns1_1_1:_cons") local id_variance = exp(b[1, `id_var_col'])^2 local res_var_col = colnumb(b, "lnsig_e:_cons") local res_variance = exp(b[1, `res_var_col'])^2 display `id_variance', `res_variance' outreg2 using "${resultsdir}/Table2_multi_interpretability.doc", replace label ctitle(Model 1) dec(3) addstat(Var_constant,"`id_variance'", Var_res_variance, "`res_variance'", Log-Likelihood, `ll', aic, `aic', bic, `bic') estimates store model1 *** model 2 - Table 2 mixed ind_int c.field_start_my##c.field_start_my ib5.desgroups || v2:, vce(cluster v2) estat icc estat ic mat s=r(S) local ll = s[1,3] // Log-Likelihood local aic = s[1,5] // AIC local bic = s[1,6] // BIC display "Log-Likelihood: `ll'" display "AIC: `aic'" display "BIC: `bic'" matrix b = e(b) local id_var_col = colnumb(b, "lns1_1_1:_cons") local id_variance = exp(b[1, `id_var_col'])^2 local res_var_col = colnumb(b, "lnsig_e:_cons") local res_variance = exp(b[1, `res_var_col'])^2 display `id_variance', `res_variance' outreg2 using "${resultsdir}/Table2_multi_interpretability.doc", label ctitle(Model 2) dec(3) append addstat(Var_constant,"`id_variance'", Var_res_variance, "`res_variance'", Log-Likelihood, `ll', aic, `aic', bic, `bic') estimates store model2 *** pseudo R2 * Step 1: Fit the null model (no predictors) and extract variance components mixed ind_int || v2:, vce(cluster v2) matrix b = e(b) local id_var_col_null = colnumb(b, "lns1_1_1:_cons") local var_v2_null = exp(b[1, `id_var_col_null'])^2 local res_var_col_null = colnumb(b, "lnsig_e:_cons") local var_residual_null = exp(b[1, `res_var_col_null'])^2 display `var_v2_null' `var_residual_null' * Step 2: Fit the full model and extract variance components mixed ind_int c.field_start_my##c.field_start_my ib5.desgroups || v2:, vce(cluster v2) matrix b = e(b) local id_var_col_full = colnumb(b, "lns1_1_1:_cons") local var_v2_full = exp(b[1, `id_var_col_full'])^2 local res_var_col_full = colnumb(b, "lnsig_e:_cons") local var_residual_full = exp(b[1, `res_var_col_full'])^2 display `var_v2_full' `var_residual_full' * * Step 3: Calculate pseudo-R2 and display results scalar R2_v2 = (`var_v2_null' - `var_v2_full') / `var_v2_null' scalar R2_res = (`var_residual_null' - `var_residual_full') / `var_residual_null' display "R2 for v2: " R2_v2 /* The random effect (v2) explains only about 11.8% of the variance in the dependent variable (ind_int) after accounting for the predictors in the full model. This suggests that adding the predictors (field_start_my and desgroups) does not significantly improve the variance explained by the random effect, and the random effect plays a minor role in explaining the outcome. */ display "R2 for residual: " R2_res /* The residual variance is reduced by about 7.4% when the predictors are included in the model. This suggests that the full model does a better job of explaining the variation in ind_int, especially in terms of the residual variance, but still leaves a significant amount of unexplained variance */ *** Wald test - desgroups mixed ind_int c.field_start_my##c.field_start_my ib5.desgroups || v2:, vce(cluster v2) testparm ib5.desgroups /* desgroups sign. improve the model */ *** AIC/BIC - desgroups mixed ind_int c.field_start_my##c.field_start_my || v2:, mle vce(cluster v2) estimates store reduced mixed ind_int c.field_start_my##c.field_start_my ib5.desgroups || v2:, mle vce(cluster v2) estimates store full estimates stats full reduced /* the full model fits better - AIC and BIC lower*/ *** AIC/BIC - quandratic x linerar term for field_start_my mixed ind_int c.field_start_my##c.field_start_my ib5.desgroups || v2:, vce(cluster v2) estimates store quadr mixed ind_int c.field_start_my ib5.desgroups || v2:, vce(cluster v2) estimates store lin estimates stats quadr lin *** LR test of random effects (v2) mixed ind_int c.field_start_my##c.field_start_my ib5.desgroups || v2:, mle estimates store re mixed ind_int c.field_start_my##c.field_start_my ib5.desgroups, mle estimates store nore lrtest re nore /* RE justified - the random intercepts for v2 account for meaningful variability in the outcome ind_int*/ *** robustness check - restricted interpretability index - Appendix Table A2 * restricted index mixed ind_int_4 c.field_start_my##c.field_start_my ib5.desgroups || v2:, vce(cluster v2) outreg2 using "${resultsdir}/TableA2_multi_robustcheck_interpret.doc", replace label ctitle(Interpret_ind_4) dec(3) * full index mixed ind_int c.field_start_my##c.field_start_my ib5.desgroups || v2:, vce(cluster v2) outreg2 using "${resultsdir}/TableA2_multi_robustcheck_interpret.doc", label ctitle(Interpret_ind_5) dec(3) append *** calendar time effect - predicted values - Appendix Figure A1 * regression models eststo clear eststo: mixed ind_int c.field_start_my##c.field_start_my || v2:, vce(cluster v2) margins, at(field_start_my=(722(1)743)) noestimcheck saving(file1, replace) eststo: mixed ind_int c.field_start_my##c.field_start_my ib5.desgroups || v2:, vce(cluster v2) margins, at(field_start_my=(722(1)743)) noestimcheck saving(file2, replace) esttab, not label r2 obslast star(+ 0.10 * 0.05 ** 0.01) nodepvars nonumbers mtitles("1" "2") title("Effect of sampling and mode on interpretability") * figure A1 combomarginsplot file1 file2, /// labels("unadjusted" "adjusted by survey design") /// noci /// file1opts(recast(line) lcolor(gs10) lpattern(solid)) /// file2opts(recast(line) lcolor(gs0) lpattern(solid)) /// xtitle("Month of fieldwork start") /// ytitle("Predicted mean") /// xlabel(722(3)743, valuelabel) /// ylabel(3.4(0.1)3.7, grid format(%9.2f) gmin angle(horizontal)) /// legend(colfirst) /// title("", justification(left)) /// scheme(s1mono) graph export ${resultsdir}\figurea1_multi_interpret_app.emf, replace graph export ${resultsdir}\figurea1_multi_interpret_app.eps, replace **# 4.2) accessibility first results ******************************************* snapshot restore 3 *** Choosing a specification for modelling process time *** *comparison of AIC/BIC criterion eststo clear eststo: melogit event c.msfs_c || v2:, vce(cluster v2) //1) linear eststo: melogit event c.msfs_c##c.msfs_c || v2:, vce(cluster v2) //2) quadratic eststo: melogit event c.msfs_c##c.msfs_c##c.msfs_c || v2:, vce(cluster v2) //3) cubic eststo: melogit event c.msfs_c##c.msfs_c##c.msfs_c##c.msfs_c || v2:, vce(cluster v2) //4) fourth order eststo: melogit event c.msfs_log || v2:, vce(cluster v2) //5) analogous to Weibull model (https://www.stata.com/manuals13/stdiscrete.pdf) eststo: melogit event c.msfs c.msfs_log || v2:, vce(cluster v2) //6) linear + log eststo: melogit event c.msfs##c.msfs_log || v2:, vce(cluster v2) //7) linear ## log eststo: melogit event c.msfs_log##c.msfs_log || v2:, vce(cluster v2) //8) log ## log eststo: melogit event i.msfs_k1 || v2:, vce(cluster v2) //9) Dummy coding esttab, aic bic not label pr2 obslast star(+ 0.10 * 0.05 ** 0.01) nodepvars nonumbers mtitles("1" "2" "3" "4" "5" "6" "7" "8") *** Choosing a specification for modelling calendar time dependency *** *comparison of AIC/BIC (I) eststo clear eststo: melogit event c.msfs_log##c.msfs_log c.fs_myc || v2:, vce(cluster v2) //1) linear eststo: melogit event c.msfs_log##c.msfs_log c.fs_myc##c.fs_myc || v2:, vce(cluster v2) //2) quadratic eststo: melogit event c.msfs_log##c.msfs_log c.fs_myc##c.fs_myc##c.fs_myc || v2:, vce(cluster v2) //3) cubic eststo: melogit event c.msfs_log##c.msfs_log c.fs_myc##c.fs_myc##c.fs_myc##c.fs_myc || v2:, vce(cluster v2) //4) fourth order esttab, aic bic not label pr2 obslast star(+ 0.10 * 0.05 ** 0.01) nodepvars nonumbers mtitles("1" "2" "3" "4") *** Models *** *** model 3 - Table 2 melogit event c.msfs_log##c.msfs_log c.fs_myc##c.fs_myc##c.fs_myc || v2:, vce(cluster v2) estimates store model5 estat icc estat ic mat s=r(S) local ll = s[1,3] // Log-Likelihood local aic = s[1,5] // AIC local bic = s[1,6] // BIC display "Log-Likelihood: `ll'" display "AIC: `aic'" display "BIC: `bic'" outreg2 using "${resultsdir}/Table2_multi_accessibility_results.doc", replace label ctitle(Model 3) dec(3) addstat(Log-Likelihood, `ll', aic, `aic', bic, `bic') *** model 4 - Table 2 melogit event c.msfs_log##c.msfs_log c.fs_myc##c.fs_myc##c.fs_myc ib5.desgroups || v2:, vce(cluster v2) estimates store model6 estat icc estat ic mat s=r(S) local ll = s[1,3] // Log-Likelihood local aic = s[1,5] // AIC local bic = s[1,6] // BIC display "Log-Likelihood: `ll'" display "AIC: `aic'" display "BIC: `bic'" outreg2 using "${resultsdir}/Table2_multi_accessibility_results.doc", label ctitle(Model 4) dec(3) append addstat(Log-Likelihood, `ll', aic, `aic', bic, `bic') *** Wald tests melogit event c.msfs_log##c.msfs_log c.fs_myc##c.fs_myc##c.fs_myc ib5.desgroups || v2:, vce(cluster v2) testparm ib5.desgroups testparm c.fs_myc##c.fs_myc##c.fs_myc *** Deriving predicted probabilities for Figure A1 (Appendix) *** melogit event c.msfs_log##c.msfs_log c.field_start_my##c.field_start_my##c.field_start_my || v2:, vce(cluster v2) margins, at(field_start_my=(1(1)22)) noestimcheck saving(file1, replace) melogit event c.msfs_log##c.msfs_log c.field_start_my##c.field_start_my##c.field_start_my ib5.desgroups || v2:, vce(cluster v2) margins, at(field_start_my=(1(1)22)) noestimcheck saving(file2, replace) combomarginsplot file1 file2, /// labels("unadjusted" "adjusted by survey design") /// noci /// file1opts(recast(line) lcolor(gs10) lpattern(solid)) /// file2opts(recast(line) lcolor(gs0) lpattern(solid)) /// xtitle("Month of fieldwork start") /// ytitle("Probability") /// xtick(1(1)22) /// xlabel(1 "Mar. 20" 3 "May 20" 5 "July 20" 7 "Sept. 20" 9 "Nov. 20" 11 "Jan. 21" 13 "Mar. 21" 15 "May 21" 17 "July 21" 19 "Sept. 21" 21 "Nov. 21", labsize(small)) /// ylabel(0.1(0.05)0.4, grid format(%9.2f) gmin angle(horizontal)) /// title("") /// /*subtitle ("unadjusted vs. adjusted by survey design", justification(left))*/ /// legend(colfirst) /// scheme(s1mono) graph export ${resultsdir}\figurea1_multi_access_results_app.emf, replace graph export ${resultsdir}\figurea1_multi_access_results_app.eps, replace **# 4.3) accessibility data **************************************************** snapshot restore 5 *** Choosing a specification for modelling process time *** *comparison of AIC/BIC criterion eststo clear eststo: melogit event c.msfs_c || v2:, vce(cluster v2) difficult intmethod(laplace) //1) linear eststo: melogit event c.msfs_c##c.msfs_c || v2:, vce(cluster v2) difficult intmethod(laplace) //2) quadratic eststo: melogit event c.msfs_c##c.msfs_c##c.msfs_c || v2:, vce(cluster v2) difficult intmethod(laplace) //3) cubic eststo: melogit event c.msfs_c##c.msfs_c##c.msfs_c##c.msfs_c || v2:, vce(cluster v2) difficult intmethod(laplace) //4) fourth order eststo: melogit event c.msfs_log || v2:, vce(cluster v2) difficult intmethod(laplace) //5) analogous to Weibull model eststo: melogit event c.msfs c.msfs_log || v2:, vce(cluster v2) difficult intmethod(laplace) //6) linear + log eststo: melogit event c.msfs##c.msfs_log || v2:, vce(cluster v2) difficult intmethod(laplace) //7) linear ## log eststo: melogit event c.msfs_log##c.msfs_log || v2:, vce(cluster v2) difficult intmethod(laplace) //8) log ## log eststo: melogit event i.msfs_k1 || v2:, vce(cluster v2) difficult intmethod(laplace) //9) Dummy coding esttab, aic bic not label pr2 obslast star(+ 0.10 * 0.05 ** 0.01) nodepvars nonumbers mtitles("1" "2" "3" "4" "5" "6" "7" "8" "9") *** Choosing a specification for modelling calendar time dependency *** *comparison of AIC/BIC eststo clear eststo: melogit event c.msfs_c##c.msfs_c##c.msfs_c##c.msfs_c c.fs_myc || v2:, vce(cluster v2) difficult intmethod(laplace) //1) linear eststo: melogit event c.msfs_c##c.msfs_c##c.msfs_c##c.msfs_c c.fs_myc##c.fs_myc || v2:, vce(cluster v2) difficult intmethod(laplace) //2) quadratic eststo: melogit event c.msfs_c##c.msfs_c##c.msfs_c##c.msfs_c c.fs_myc##c.fs_myc##c.fs_myc || v2:, vce(cluster v2) difficult intmethod(laplace) //3) cubic eststo: melogit event c.msfs_c##c.msfs_c##c.msfs_c##c.msfs_c c.fs_myc##c.fs_myc##c.fs_myc##c.fs_myc || v2:, vce(cluster v2) difficult intmethod(laplace) //4) fourth order esttab, aic bic not label pr2 obslast star(+ 0.10 * 0.05 ** 0.01) nodepvars nonumbers mtitles("1" "2" "3" "4") title("XXX") *** Models *** *** model 5 - Table 2 melogit event c.msfs_c##c.msfs_c##c.msfs_c##c.msfs_c c.fs_myc##c.fs_myc || v2:, vce(cluster v2) difficult intmethod(laplace) estimates store model5 estat icc estat ic mat s=r(S) local ll = s[1,3] // Log-Likelihood local aic = s[1,5] // AIC local bic = s[1,6] // BIC display "Log-Likelihood: `ll'" display "AIC: `aic'" display "BIC: `bic'" outreg2 using "${resultsdir}/Table2_multi_accessibility_data.doc", replace label ctitle(Model 5) dec(3) addstat(Log-Likelihood, `ll', aic, `aic', bic, `bic') *** model 6 - Table 2 melogit event c.msfs_c##c.msfs_c##c.msfs_c##c.msfs_c c.fs_myc##c.fs_myc ib5.desgroups || v2:, vce(cluster v2) difficult intmethod(laplace) estimates store model6 estat icc estat ic mat s=r(S) local ll = s[1,3] // Log-Likelihood local aic = s[1,5] // AIC local bic = s[1,6] // BIC display "Log-Likelihood: `ll'" display "AIC: `aic'" display "BIC: `bic'" outreg2 using "${resultsdir}/Table2_multi_accessibility_data.doc", label ctitle(Model 6) dec(3) append addstat(Log-Likelihood, `ll', aic, `aic', bic, `bic') *** Wald tests quietly melogit event c.msfs_c##c.msfs_c##c.msfs_c##c.msfs_c c.fs_myc##c.fs_myc ib5.desgroups || v2:, vce(cluster v2) difficult intmethod(laplace) testparm ib5.desgroups testparm c.fs_myc##c.fs_myc *** Deriving predicted probabilities for Figure A1 (Appendix) ****************** melogit event c.msfs_c##c.msfs_c##c.msfs_c##c.msfs_c c.field_start_my##c.field_start_my || v2:, vce(cluster v2) difficult intmethod(laplace) // field_start_my - uncentered margins, at(field_start_my=(1(1)22)) noestimcheck saving(file1, replace) melogit event c.msfs_c##c.msfs_c##c.msfs_c##c.msfs_c c.field_start_my##c.field_start_my ib5.desgroups || v2:, vce(cluster v2) difficult intmethod(laplace) //field_start_my - uncentered margins, at(field_start_my=(1(1)22)) noestimcheck saving(file2, replace) combomarginsplot file1 file2, /// labels("unadjusted" "adjusted by survey design") /// noci /// file1opts(recast(line) lcolor(gs10) lpattern(solid)) /// file2opts(recast(line) lcolor(gs0) lpattern(solid)) /// xtitle("Month of fieldwork start") /// ytitle("Probability") /// xtick(1(1)22) /// xlabel(1 "Mar. 20" 3 "May 20" 5 "July 20" 7 "Sept. 20" 9 "Nov. 20" 11 "Jan. 21" 13 "Mar. 21" 15 "May 21" 17 "July 21" 19 "Sept. 21" 21 "Nov. 21", labsize(small)) /// ylabel(0.1(0.01)0.17, grid format(%9.2f) gmin angle(horizontal)) /// title("") /// /*subtitle ("unadjusted vs. adjusted by survey design", justification(left))*/ /// legend(colfirst) /// scheme(s1mono) graph export ${resultsdir}\figurea1_multi_access_data_app.emf, replace graph export ${resultsdir}\figurea1_multi_access_data_app.eps, replace ******************************************************************************** **# 4.4) Robustness check survey design-accessibility (missing publication date) ****** ******************************************************************************** snapshot restore 1 *** prepare vars summ field_start_my gen fs_myc = field_start_my - `r(mean)' *** define category of known publication but an uknown date gen res_3cat=results_avail replace res_3cat=2 if results_avail==1 & results_my==. label var res_3cat "Results published" label define res_3cat 0 "No" 1 "Yes" 2 "Yes, but an unknown date", replace label value res_3cat res_3cat tab res_3cat gen data_3cat=data_avail replace data_3cat=2 if data_avail==1 & data_my==. label var data_3cat "Data published" label define data_3cat 0 "No" 1 "Yes" 2 "Yes, but an unknown date", replace label value data_3cat data_3cat tab data_3cat *** robustness check - mixed logit models melogit results_avail c.fs_myc##c.fs_myc##c.fs_myc ib5.desgroups if res_3cat!=2 || v2:, vce(cluster v2) difficult intmethod(laplace) outreg2 using "${resultsdir}/TableA3_robustcheck_multi_access_res.doc", replace label ctitle(Results - without unknown date) dec(3) melogit results_avail c.fs_myc##c.fs_myc##c.fs_myc ib5.desgroups || v2:, vce(cluster v2) difficult intmethod(laplace) outreg2 using "${resultsdir}/TableA3_robustcheck_multi_access_res.doc", append label ctitle(Results - with unknown date) dec(3) * data melogit data_avail c.fs_myc##c.fs_myc ib5.desgroups if data_3cat!=2 || v2:, vce(cluster v2) difficult intmethod(laplace) outreg2 using "${resultsdir}/TableA3_robustcheck_multi_access_data.doc", replace label ctitle(Results - without unknown date) dec(3) melogit data_avail c.fs_myc##c.fs_myc ib5.desgroups || v2:, vce(cluster v2) difficult intmethod(laplace) outreg2 using "${resultsdir}/TableA3_robustcheck_multi_access_data.doc", append label ctitle(Results - with unknown date) dec(3)