capture log close
log using srm-zack-descriptive, replace text
version 15.1
clear all
macro drop _all
set linesize 80
set scheme s1manual

//  Descriptive Statistics

//  Replication script for:
//
//      Can Nonprobability Samples Be Used for Social Science Research?
//      Elizabeth S. Zack, John M. Kennedy, and J. Scott Long
//
//  For questions, contact: Elizabeth Zack elizabeth.sarah.zack@gmail.com

//  Analysis use SPost13 commands. From the Stata command line, run:
//  -search spost13- and and install the package/

//  srm-zack-descriptive.do / Scott Long / 2019-06-26

//  ===============================================================
//  #1 load data, select cases, and choose variables

use srm-zack1, clear
keep if sampivnomiss==1

local lhsvars happy2 life2 helpful2 nextgen2 toofast2 advfront2 fechild2 ///
    sexeduc1 pillok2 abany1 helppoor2 helpnot2 getahead1 cappun1 courts1 ///
    wrkwayup2 kcore2 kradioact2 klasers2 kelectron2 kodds1 bible1

local rhsconbin female white educ2 age2 region3 // continous & binary
local rhscat marital1 incmid polviews1 partyid1 relig3 // categorical vars
local rhsvars `rhsconbin' `rhscat'

//  ===============================================================
//  #2 Table 2 - Descriptive

//  means and percentages

by data: sum `rhsconbin'

foreach var in marital1 incmid polviews1 partyid1 relig3 {
    tab `var' dataset, col  nofreq
}

//  test gss vs mturk and qualtrics

foreach var in female white educ2 {
    di _new ". tab gssmturk `var'"
    tab gssmturk `var', chi2  nofreq
    di _new ". tab gssqual `var'"
    tab gssqual `var', chi2  nofreq
}

ttest age2, by(gssmturk)
ttest age2, by(gssqual)

foreach var in region3 marital1 incmid polviews1 partyid1 relig3 {
    di _new ". tab gssmturk `var'"
    tab gssmturk `var', chi2 nofreq
    di _new ". tab gssqual `var'"
    tab gssqual `var', chi2 nofreq
}

//  ===============================================================
//  #3 Figure 1 - statistics corresponding to plot

foreach var in `lhsvars' {
    tab `var' dataset, col nofreq
}

//  ===============================================================
//  #4 sample size for each outcome

by dataset: sum `lhsvars'

//  ===============================================================
//  #5 GSS outcomes compared to mturk and qualtrics

matrix gVm = J(22,1,.)
    mat rownames gVm = `lhsvars'
    mat colnames gVm = pvalue
matrix gVq = J(22,1,.)
    mat rownames gVq = `lhsvars'
    mat colnames gVq = pvalue

local i = 1
foreach var in `lhsvars' {
    di _new ". tab gssmturk `var'"
        tab gssmturk `var', chi2  nofreq
        mat gVm[`i',1] = r(p)
    di _new ". tab gssqual `var'"
        tab gssqual `var', chi2  nofreq
        mat gVq[`i',1] = r(p)
    local ++i
}

/* Copy of results from log file

. matlist gVm, format(%5.2f)

             | pva~e
-------------+-------
      happy2 |  0.00
       life2 |  0.00
    toofast2 |  0.00
   advfront2 |  0.00
    fechild2 |  0.00
     pillok2 |  0.00
      abany1 |  0.00
   helppoor2 |  0.00
   getahead1 |  0.00
     cappun1 |  0.00
     courts1 |  0.00
   wrkwayup2 |  0.00
      kcore2 |  0.00
  kradioact2 |  0.00
    klasers2 |  0.00
  kelectron2 |  0.00
      bible1 |  0.00
    helpnot2 |  0.01
    nextgen2 |  0.05 #19
    sexeduc1 |  0.17
      kodds1 |  0.18
    helpful2 |  0.20 #22

. matlist gVq, format(%5.2f)

             | pva~e
-------------+-------
      happy2 |  0.00
       life2 |  0.00
   advfront2 |  0.00
    fechild2 |  0.00
    sexeduc1 |  0.00
      abany1 |  0.00
   getahead1 |  0.00
  kelectron2 |  0.00 #8
     cappun1 |  0.06
    toofast2 |  0.13
      kodds1 |  0.16
    nextgen2 |  0.19
      bible1 |  0.22
   wrkwayup2 |  0.25
     pillok2 |  0.28
  kradioact2 |  0.31
    helpful2 |  0.54
     courts1 |  0.57
   helppoor2 |  0.60
    klasers2 |  0.66
    helpnot2 |  0.80
      kcore2 |  0.96
*/

matlist gVm, format(%5.2f)
matlist gVq, format(%5.2f)

//  ===============================================================
//  #6 age restricted sample

//  mturk 18-45

count if dataset==2
    local Nmt = r(N)
count if age2>=18 & age2<=45 & dataset==2
    local Nmt1944 = r(N)
di 100*`Nmt1944'/`Nmt'

//  Ns in restricted sample

drop if age2<18
drop if age2>45
by dataset: sum `lhsvars'

log close
exit
