#####  ##############################################################################################  #####
#####  Is there an association between survey characteristics and representativeness? A meta-analysis  #####
#####  Carina Cornesse                                                                                 #####
#####  SFB 884 "Political Economy of Reforms", University of Mannheim                                  #####
#####  and GESIS - Leibniz Institute for the Social Sciences                                           ##### 
#####  and                                                                                             #####
#####  Michael Bosnjak                                                                                 #####
#####  GESIS - Leibniz Institute for the Social Sciences                                               ##### 
#####  and School of Social Sciences, University of Mannheim                                           #####
#####  ##############################################################################################  #####


rm(list = ls())

## load the R-Indicator dataset
Metaanalyse_R <- read.table("A:/A8/research/WORK/3_Metaanalyse/Representativeness/3_Data/R_Indicators/Meta_Analysis_R_20170307.txt", header=TRUE,
                     sep="\t")

## load the Median Absolute Bias dataset
Metaanalyse_Benchmarks <- read.table("A:/A8/research/WORK/3_Metaanalyse/Representativeness/3_Data/Benchmarks/Meta_Analysis_MAB_20170623.txt", header=TRUE,
                            sep="\t")

## load the Median Absolute Bias dataset
Metaanalyse_Benchmarks_mean <- read.table("A:/A8/research/WORK/3_Metaanalyse/Representativeness/3_Data/Benchmarks/Meta_Analysis_MeanAB_20170628.txt", header=TRUE,
                                     sep="\t")

## load the metafor package
# install.packages('metafor') when using metafor for the first time
library(metafor)

##### ########################## ######
#####     General findings       ######
##### (section 5.1 in the paper) ######
##### ########################## ######

#### R-Indicators ####

## Null-model with outlier inclusion
res_imp <- rma(measure="GEN", yi=es, vi=vi_imp, data=Metaanalyse_R)
res_imp

## Identification of outliers: Influence Plot
inf <- influence(res_imp)
par(mar=c(1,1,1,1))
plot(inf, layout=c(8,1)) 
# identified outlier: study No.43

## Null-model with outlier exclusion (as presented in the paper)
R.without.outliers.re <- subset(Metaanalyse_R, subset = (Metaanalyse_R$`X_mi_id`!=43)) 
res_imp.no.outliers <- rma(measure="GEN", yi=es, vi=vi_imp, data=R.without.outliers.re)
res_imp.no.outliers

#### Median Absolute Bias ####

## Null-model with outlier inclusion
res <- rma(measure="GEN", yi=es, vi=vi, data=Metaanalyse_Benchmarks)
res

## Identification of outliers: Influence Plot
inf <- influence(res) 
plot(inf, layout=c(8,1))
## identified outliers: studies No. 2 & 72

## Null-model with outlier exclusion (as presented in the paper)
Benchmarks.without.outliers <- subset(Metaanalyse_Benchmarks, 
                                      subset = (Metaanalyse_Benchmarks$`id_h`!=2 & 
                                                  Metaanalyse_Benchmarks$`id_h`!=72))

res_imp.without.outliers <-rma(measure="GEN", yi=es, vi=vi, data=Benchmarks.without.outliers)
res_imp.without.outliers

##### ########################## ######
#####     Moderator analyses     ######
##### (Section 5.2 in the paper) ######
##### ########################## ######

#### Probabilty vs. nonprobability surveys: R-Indicator findings ####

## No mixed-effects models, because k(nonprob)=0


#### Probabilty vs. nonprobability surveys: MAB findings ####

## Mixed effects model with outlier inclusion
res.metareg.sample <- rma(measure="GEN", yi=es, vi=vi, mods = sample, data=Metaanalyse_Benchmarks)
res.metareg.sample

## Identification of outliers: Influence Plot
inf <- influence(res.metareg.sample)
plot(inf, layout=c(8,1))
## identified outliers: studies No. 2 & 73

## Mixed effects model with outlier exclusion
Benchmarks.without.outliers.sample <- subset(Metaanalyse_Benchmarks, 
                                             subset = (Metaanalyse_Benchmarks$`id_h`!=2 
                                                       & Metaanalyse_Benchmarks$`id_h`!=73)) 

res.metareg.mixed.no.outlier <- rma(measure="GEN", yi=es, vi=vi, mods = sample, data=Benchmarks.without.outliers.sample)
res.metareg.mixed.no.outlier


#### Response rate: R-Indicator findings ####

## Mixed effects model with outlier inclusion
res.metareg.rr.imp <- rma(measure="GEN", yi=es, vi=vi_imp, mods = rr, data=Metaanalyse_R)
res.metareg.rr.imp

## Identification of outliers: Influence Plot
inf <- influence(res.metareg.rr.imp) 
plot(inf, layout=c(8,1))
## identified outliers: studies No. 14, 35, 36, 37, 49

## Mixed effects model with outlier exclusion
R.without.outliers.rr <- subset(Metaanalyse_R, 
                                subset = (  Metaanalyse_R$`X_mi_id`!=14 & 
                                              Metaanalyse_R$`X_mi_id`!=35 & 
                                              Metaanalyse_R$`X_mi_id`!=36 & 
                                              Metaanalyse_R$`X_mi_id`!=37 & 
                                              Metaanalyse_R$`X_mi_id`!=49))

res.metareg.rr.imp.no.outliers <- rma(measure="GEN", yi=es, vi=vi_imp, mods = rr, data=R.without.outliers.rr)
res.metareg.rr.imp.no.outliers


#### Response rate: MAB findings ####

## Mixed effects model with outlier inclusion
res.metareg.rr <- rma(measure="GEN", yi=es, vi=vi, mods = rr, data=Metaanalyse_Benchmarks)
res.metareg.rr

## Identification of outliers: Influence Plot
inf <- influence(res.metareg.rr) 
plot(inf, layout=c(8,1))
## identified outliers: studies No. 2 & 73

## Mixed effects model with outlier exclusion
Benchmarks.without.outliers.rr <- subset(Metaanalyse_Benchmarks, 
                                         subset = (Metaanalyse_Benchmarks$`id_h`!= 73 &
                                                     Metaanalyse_Benchmarks$`id_h`!= 2)) 

res.metareg.rr <- rma(measure="GEN", yi=es, vi=vi, mods = rr, data=Benchmarks.without.outliers.rr)
res.metareg.rr


#### Mixed vs. single mode: R-Indicator findings ####

## Mixed effects model with outlier inclusion
res.metareg.mixed.imp <- rma(measure="GEN", yi=es, vi=vi_imp, mods = mixed, data=Metaanalyse_R)
res.metareg.mixed.imp

## Identification of outliers: Influence Plot
inf <- influence(res.metareg.mixed.imp) 
plot(inf, layout=c(8,1))
## identified outliers: study No. 49

## Mixed effects model with outlier exclusion
R.without.outliers.mode <- subset(Metaanalyse_R, subset = (Metaanalyse_R$`X_mi_id`!=49)) 

res.metareg.mixed.imp.no.outliers <- rma(measure="GEN", yi=es, vi=vi_imp, mods = mixed, data=R.without.outliers.mode)
res.metareg.mixed.imp.no.outliers

#### Mixed vs. single mode: MAB findings ####

## No mixed-effects models, because k(mixed)=2


#### Web vs. other single mode: R-Indicator findings ####

## No mixed-effects models, because k(web)=1


#### Web vs. other single mode: R-Indicator findings ####

## Mixed effects model with outlier inclusion
res.metareg.web <- rma(measure="GEN", yi=es, vi=vi, mods = web, data=Metaanalyse_Benchmarks)
res.metareg.web

## Identification of outliers: Influence Plot
inf <- influence(res.metareg.web)
plot(inf, layout=c(8,1))
## identified outliers: studies No. 2 & 73

## Mixed effects model with outlier exclusion
Benchmarks.without.outliers.web <- subset(Metaanalyse_Benchmarks, 
                                          subset = (Metaanalyse_Benchmarks$`id_h`!=73
                                                    & Metaanalyse_Benchmarks$`id_h`!=2))

res.metareg.web <- rma(measure="GEN", yi=es, vi=vi, mods = web, data=Benchmarks.without.outliers.web)
res.metareg.web


#### Auxiliary variables: R-Indicator findings #####

## Mixed effects model with outlier inclusion
res.metareg.auxvars.imp <- rma(measure="GEN", yi=es, vi=vi_imp, mods = auxvars, data=Metaanalyse_R)
res.metareg.auxvars.imp

## Identification of outliers: Influence Plot
inf <- influence(res.metareg.auxvars.imp) 
plot(inf, layout=c(8,1))
## identified outliers: studies No. 107, 108, 109

## Mixed effects model with outlier exclusion
R.without.outliers.auxvars <- subset(Metaanalyse_R, 
                                     subset = (Metaanalyse_R$`X_mi_id`!=107 
                                               & Metaanalyse_R$`X_mi_id`!=108 
                                               & Metaanalyse_R$`X_mi_id`!=109))

res.metareg.auxvars.imp <- rma(measure="GEN", yi=es, vi=vi_imp, mods = auxvars, data=R.without.outliers.auxvars)
res.metareg.auxvars.imp                                                


#### Auxiliary variables: R-Indicator findings ####

## Mixed effects model with outlier inclusion
res.metareg.auxvars <- rma(measure="GEN", yi=es, vi=vi, mods = numvars, data=Metaanalyse_Benchmarks)
res.metareg.auxvars

## Identification of outliers: Influence Plot
inf <- influence(res.metareg.auxvars) 
plot(inf, layout=c(8,1))
## identified outliers: studies No. 2,31,32,68,72,73

## Mixed effects model with outlier exclusion
Benchmarks.without.outliers.auxvars <- subset(Metaanalyse_Benchmarks, 
                                              subset = (Metaanalyse_Benchmarks$`id_h`!=2 
                                                        & Metaanalyse_Benchmarks$`id_h`!=31 
                                                        & Metaanalyse_Benchmarks$`id_h`!=32 
                                                        & Metaanalyse_Benchmarks$`id_h`!=68
                                                        & Metaanalyse_Benchmarks$`id_h`!=72
                                                        & Metaanalyse_Benchmarks$`id_h`!=73))

res.metareg.auxvars <- rma(measure="GEN", yi=es, vi=vi, mods = numvars, data=Benchmarks.without.outliers.auxvars)
res.metareg.auxvars


### ############################## ###
### Graphs of subgroup comparisons ###
### ############################## ###

#### Probabilty vs. nonprobability surveys: R-Indicator findings ####

## No descriptive comparison of subgroups: prob. vs. nonprob. survey, because k(nonprob)=0


#### Probabilty vs. nonprobability surveys: MAB findings ####

## Descriptive comparison of subgroups
res.subset.sample.non <- rma(measure="GEN", yi=es, vi=vi, data=Metaanalyse_Benchmarks, subset=(sample=="0"))
res.subset.sample.prob <- rma(measure="GEN", yi=es, vi=vi, data=Metaanalyse_Benchmarks, subset=(sample=="1"))
res.subset.sample.non
res.subset.sample.prob


#### Figure 2: Subgroup comparison results by probability versus nonprobability surveys as a moderator 

## Figure 2: R-Indicators

par(mfrow = c(1,2))

res_imp <- rma(measure="GEN", yi=es, vi=vi_imp, data=Metaanalyse_R)
res_imp

boxplot(x = c(res_imp[1], 8),
        horizontal = F,
        main = "R-Indicators",
        names = c("Prob. (k = 110)", "Nonprob."),
        ylim = c(0.6,1),
        las = 1,
        pch = ".",
        width = NULL,
        border = "white",
        cex.axis = 0.8)

points(y = c(res_imp[1], 0),
       x = c(1,2),
       pch = 19,
       cex = 1)

# Vertical lines
lines(c(1,1),
      c(res_imp$ci.lb,res_imp$ci.ub))

lines(c(0.95,1.05),  
      c(res_imp$ci.ub,res_imp$ci.ub))

lines(c(0.95,1.05),  
      c(res_imp$ci.lb,res_imp$ci.lb))


## Figure 2: MAB

boxplot(x = c(res.subset.sample.prob[1], res.subset.sample.non[1]),
        horizontal = F,
        main = "MAB",
        names = c("Prob. (k = 61)", "Nonprob. (k = 49)"),
        ylim = c(7,0),
        las = 1,
        pch = ".",
        width = NULL,
        border = "white",
        cex.axis = 0.8)

points(y = c(res.subset.sample.prob[1], res.subset.sample.non[1]),
       x = c(1,2),
       pch = 19,
       cex = 1)

# Vertical lines
lines(c(2,2),
      c(res.subset.sample.non$ci.lb,res.subset.sample.non$ci.ub))

lines(c(1,1),
      c(res.subset.sample.prob$ci.lb,res.subset.sample.prob$ci.ub))

# Horizontal lines: Nonprob

lines(c(1.9,2.1),  
      c(res.subset.sample.non$ci.ub,res.subset.sample.non$ci.ub))

lines(c(1.9,2.1),  
      c(res.subset.sample.non$ci.lb,res.subset.sample.non$ci.lb))

# Horizontal lines: Prob

lines(c(0.9,1.1), 
      c(res.subset.sample.prob$ci.ub,res.subset.sample.prob$ci.ub))

lines(c(0.9,1.1), 
      c(res.subset.sample.prob$ci.lb,res.subset.sample.prob$ci.lb))

axis(side = 2, labels = c("%","%","%","%","%","%","%","%"),
     tick = FALSE, 
     line = NA,
     at = c(0,1,2,3,4,5,6,7), 
     padj = 0.49,
     hadj = 0.1,
     las = 1,
     cex = 0.3,
     col.ticks = "white")


#### Mixed vs. single mode: R-Indicator findings ####

## Descriptive comparison of subgroups
res.subset.mixed.imp <- rma(measure="GEN", yi=es, vi=vi_imp, data=Metaanalyse_R, subset=(mixed=="1"))
res.subset.single.imp  <- rma(measure="GEN", yi=es, vi=vi_imp, data=Metaanalyse_R, subset=(mixed=="0"))
res.subset.mixed.imp 
res.subset.single.imp 


#### Mixed vs. single mode: MAB findings ####

## Descriptive comparison of subgroups
res.subset.mixed <- rma(measure="GEN", yi=es, vi=vi, data=Metaanalyse_Benchmarks, subset=(mixed=="1"))
res.subset.single <- rma(measure="GEN", yi=es, vi=vi, data=Metaanalyse_Benchmarks, subset=(mixed=="0"))
res.subset.mixed
res.subset.single


#### Figure 3: Subgroup comparison results by mixed-mode versus single-mode surveys as a moderator 

## Figure 3: R-Indicators

par(mfrow = c(1,2))

boxplot(x = c(res.subset.mixed.imp[1], res.subset.single.imp[1]),
        horizontal = F,
        main = "R-Indicators",
        names = c("Mixed (k = 45)", "Single (k = 51)"),
        ylim = c(0.6,1),
        las = 1,
        pch = ".",
        width = NULL,
        border = "white",
        cex.axis = 0.8)

points(y = c(res.subset.mixed.imp[1], res.subset.single.imp[1]),
       x = c(1,2),
       pch = 19,
       cex = 1)

# Vertical lines 
lines(c(1,1),
      c(res.subset.mixed.imp$ci.lb,res.subset.mixed.imp$ci.ub))

lines(c(2,2),
      c(res.subset.single.imp$ci.lb,res.subset.single.imp$ci.ub))

# Horizontal lines: Mixed

lines(c(0.95,1.05),  
      c(res.subset.mixed.imp$ci.ub,res.subset.mixed.imp$ci.ub))

lines(c(0.95,1.05),  
      c(res.subset.mixed.imp$ci.lb,res.subset.mixed.imp$ci.lb))

# Horizontal lines: Single

lines(c(1.95,2.05), 
      c(res.subset.single.imp$ci.ub,res.subset.single.imp$ci.ub))

lines(c(1.95,2.05), 
      c(res.subset.single.imp$ci.lb,res.subset.single.imp$ci.lb))


## Figure 3: MAB

boxplot(x = c(res.subset.mixed[1], res.subset.single[1]),
        horizontal = F,
        main = "MAB",
        names = c("Mixed (k = 8)", "Single (k = 101)"),
        ylim = c(7,0),
        las = 1,
        pch = ".",
        width = NULL,
        border = "white",
        cex.axis = 0.8)

points(y = c(res.subset.mixed[1], res.subset.single[1]),
       x = c(1,2),
       pch = 19,
       cex = 1)

# Vertical lines 
lines(c(1,1),
      c(res.subset.mixed$ci.lb,res.subset.mixed$ci.ub))

lines(c(2,2),
      c(res.subset.single$ci.lb,res.subset.single$ci.ub))

# Horizontal lines: Mixed

lines(c(0.95,1.05), 
      c(res.subset.mixed$ci.ub,res.subset.mixed$ci.ub))

lines(c(0.95,1.05), 
      c(res.subset.mixed$ci.lb,res.subset.mixed$ci.lb))

# Horizontal lines: Single

lines(c(1.95,2.05),  
      c(res.subset.single$ci.ub,res.subset.single$ci.ub))

lines(c(1.95,2.05),  
      c(res.subset.single$ci.lb,res.subset.single$ci.lb))

axis(side = 2, labels = c("%","%","%","%","%","%","%","%"),
     tick = FALSE, 
     line = NA,
     at = c(0,1,2,3,4,5,6,7), 
     padj = 0.49,
     hadj = 0.1,
     las = 1,
     cex = 0.3,
     col.ticks = "white")


#### Web vs. other single mode: R-Indicator findings ####

## Descriptive comparison of subgroups
res.subset.web.imp <- rma(measure="GEN", yi=es, vi=vi_imp, data=Metaanalyse_R, subset=(web=="1"))
res.subset.other.imp <- rma(measure="GEN", yi=es, vi=vi_imp, data=Metaanalyse_R, subset=(web=="0"))
res.subset.web.imp
res.subset.other.imp

#### Web vs. other single mode: MAB findings ####

## Descriptive comparison of subgroups
res.subset.web <- rma(measure="GEN", yi=es, vi=vi, data=Metaanalyse_Benchmarks, subset=(web=="1"))
res.subset.other <- rma(measure="GEN", yi=es, vi=vi, data=Metaanalyse_Benchmarks, subset=(web=="0"))
res.subset.web
res.subset.other


#### Figure 4: Subgroup comparison results by web surveys versus other single-mode surveys as a moderator 

## Figure 4: R-Indicators
par(mfrow = c(1,2))

boxplot(x = c(res.subset.web.imp[1], res.subset.other.imp[1]),
        horizontal = F,
        main = "R-Indicators",
        names = c("Web (k = 1)", "Other (k = 50)"),
        ylim = c(0.6,1),
        las = 1,
        pch = ".",
        width = NULL,
        border = "white",
        cex.axis = 0.8)

points(y = c(res.subset.web.imp[1], res.subset.other.imp[1]),
       x = c(1,2),
       pch = 19,
       cex = 1)

# Vertical lines 
lines(c(1,1),
      c(res.subset.web.imp$ci.lb,res.subset.web.imp$ci.ub))

lines(c(2,2),
      c(res.subset.other.imp$ci.lb,res.subset.other.imp$ci.ub))

# Horizontal lines: Web

lines(c(0.95,1.05),  
      c(res.subset.web.imp$ci.ub,res.subset.web.imp$ci.ub))

lines(c(0.95,1.05),  
      c(res.subset.web.imp$ci.lb,res.subset.web.imp$ci.lb))

# Horizontal lines: Other

lines(c(1.95,2.05), 
      c(res.subset.other.imp$ci.ub,res.subset.other.imp$ci.ub))

lines(c(1.95,2.05), 
      c(res.subset.other.imp$ci.lb,res.subset.other.imp$ci.lb))


## Figure 4: MAB

boxplot(x = c(res.subset.web[1], res.subset.other[1]),
        horizontal = F,
        main = "MAB",
        names = c("Web (k = 56)", "Other (k = 55)"),
        ylim = c(7,0),
        las = 1,
        pch = ".",
        width = NULL,
        border = "white",
        cex.axis = 0.8)

points(y = c(res.subset.web[1], res.subset.other[1]),
       x = c(1,2),
       pch = 19,
       cex = 1)

# Vertical lines 
lines(c(1,1),
      c(res.subset.web$ci.lb,res.subset.web$ci.ub))

lines(c(2,2),
      c(res.subset.other$ci.lb,res.subset.other$ci.ub))

# Horizontal lines: web

lines(c(0.95,1.05), 
      c(res.subset.web$ci.ub,res.subset.web$ci.ub))

lines(c(0.95,1.05), 
      c(res.subset.web$ci.lb,res.subset.web$ci.lb))

# Horizontal lines: Other

lines(c(1.95,2.05),  
      c(res.subset.other$ci.ub,res.subset.other$ci.ub))

lines(c(1.95,2.05),  
      c(res.subset.other$ci.lb,res.subset.other$ci.lb))

axis(side = 2, labels = c("%","%","%","%","%","%","%","%"),
     tick = FALSE, 
     line = NA,
     at = c(0,1,2,3,4,5,6,7), 
     padj = 0.49,
     hadj = 0.1,
     las = 1,
     cex = 0.3,
     col.ticks = "white")

##### ########################### #####
#####      Further analyses       #####
##### (not reported in the paper) #####
##### ########################### #####

#### Sensitivity analysis: Trim & Fill funnel plot

## R-Indicator findins
taf <- trimfill(res_imp)
funnel(taf)

## MAB findings
taf <- trimfill(res)
funnel(taf)

#### Weighted vs. unweighted data: MAB only
res.subset.weighted.no <- rma(measure="GEN", yi=es, vi=vi, data=Metaanalyse_Benchmarks, subset=(weighted=="0"))
res.subset.weighted.yes <- rma(measure="GEN", yi=es, vi=vi, data=Metaanalyse_Benchmarks, subset=(weighted=="1"))
res.subset.weighted.no 
res.subset.weighted.yes

res.metareg.weighted <- rma(measure="GEN", yi=es, vi=vi, mods = weighted, data=Metaanalyse_Benchmarks)
res.metareg.weighted

#### Business surveys vs. non-business surveys: R-Indicators only
res.subset.business.no <- rma(measure="GEN", yi=es, vi=vi_imp, data=Metaanalyse_R, subset=(business=="0"))
res.subset.business.yes <- rma(measure="GEN", yi=es, vi=vi_imp, data=Metaanalyse_R, subset=(business=="1"))
res.subset.business.no
res.subset.business.yes

res.metareg.business <- rma(measure="GEN", yi=es, vi=vi_imp, mods = business, data=Metaanalyse_R)
res.metareg.business

#### Descriptive statistics
## number of auxiliary variables
table(Metaanalyse_R$auxvars)
table(Metaanalyse_Benchmarks$numvars)

## mean difference in auxiliary variables across probability vs. nonprobability samples
t.test(numvars ~ sample, Metaanalyse_Benchmarks)

## crosstable: sample type and mixed mode
table(Metaanalyse_Benchmarks$sample, Metaanalyse_Benchmarks$mixed)
# In the benchmark data are 47 nonprob. single-mode studies, 54 prob. single-mode studies, 2 nonprob. mixed-mode studies, and 6 prob. mixed-mode studies
table(Metaanalyse_R$sample, Metaanalyse_R$mixed)
# In the R-Indicator data are 0 nonprob. single-mode study, 0 nonprob. mixed-mode studies, 51 prob. single-mode studies, and 46 prob. mixed-mode studies

## crosstable: sample type and web mode
table(Metaanalyse_Benchmarks$sample, Metaanalyse_Benchmarks$web)
# In the benchmark data are 2 nonprob. non-web studies, 45 nonprob. web studies, 43 prob. non-web studies and 11 prob. web studies.
table(Metaanalyse_R$sample, Metaanalyse_R$web)
# In the R-Indicator data are 0 nonprob. studies, 50 prob. non-web and 1 prob. web study.

#### Sensitivity analysis: Mean Absolute Bias
## Null-model with outlier inclusion
res <- rma(measure="GEN", yi=es, vi=vi, data=Metaanalyse_Benchmarks_mean)
res

## Identification of outliers: Influence Plot
inf <- influence(res) 
plot(inf, layout=c(8,1))
## identified outliers: studies No. 2, 84, 85

## Null-model with outlier exclusion
Benchmarks.without.outliers.mean <- subset(Metaanalyse_Benchmarks_mean, 
                                      subset = (Metaanalyse_Benchmarks_mean$`id_h`!=2 & 
                                                  Metaanalyse_Benchmarks_mean$`id_h`!=84 &
                                                  Metaanalyse_Benchmarks_mean$`id_h`!=85))

res_mean.without.outliers <-rma(measure="GEN", yi=es, vi=vi, data=Benchmarks.without.outliers.mean)
res_mean.without.outliers


