#--- 7751_analyses.R --- [R] code for reproducing results of "Investigating
#--- selection bias of online surveys on coronavirus-related behavioral
#--- outcomes"
# 
# Title: 
# Filename: 7751_analyses.R
# Description: 
# Author: Bernd Weiss, Ines Schaurer
# Maintainer: Bernd Weiss
# DOI: 
# Created: Fr Mai 29 08:15:59 2020 (+0200)
# Last-Updated: Fr Mai 29 11:35:45 2020 (+0200)
#           By: weissbd
#     Update #: 17
# Version: 
# Dependencies: see below
# Depends on: 7751_functions.R 
# Data source: GESIS Panel Standard Edition + Corona Special Survey (unpublished yet) 
# URL: https://search.gesis.org/research_data/ZA5665
# Keywords: 
# Licence: Attribution 4.0 International (CC BY 4.0) 
# Licence-URL: https://creativecommons.org/licenses/by/4.0/ 
# Compatibility: 
# 
################################################################################
# 
## Commentary: 
# 
# This is [R|Stata|...] code can give insights into statistical analyses 
# conducted in the following paper :
# 
# "Investigating selection bias of online surveys on coronavirus-related
# behavioral outcomes.
# 
# Other code files may need to be executed first or this code file will be 
# called by another code file that hopefully is listed under "Depends on:".
# 
# Due to [German|...] data protection laws we may be unable to provide the 
# data. In that case you may be unable to reproduce our statistical analyses
# yourself. However, you might be able to obtain the data from [http://].
# 
################################################################################
# 
# 
# 
################################################################################
# 
## Change Log:
# 
# 
################################################################################
# 
## Code:




## -------------------------------------------------------------------------- ##
## +++ Load packages and data                                                 ##
## -------------------------------------------------------------------------- ##


## Load R packages. 
library(here)
library(DT)
library(glue)
library(tidyverse)
library(haven)

citation()
sessionInfo()

## Define some constants, e.g., the location of some subfolders, utilizing the
## power of here(). For reproducibility purposes, some of these folders have to
## created manually.
rootpath <- here()
rootpath
datapath <- file.path(rootpath, "data")
## Adopt figpath for paper submission, used to be: file.path(rootpath, "fig")
figpath <- file.path(rootpath, "submission/1_revision-1") 
tabpath <- file.path(rootpath, "tables")
srcpath <- file.path(rootpath, "src/R")


## Source ("run") helper functions. 
source(file.path(srcpath, "7751_functions.R"))


## Load data from cohort a1, d1, f1, and the corona special survey.
dfx <- read_stata(file.path(datapath, "hz_plus_standard_edition.dta"))
## Check dimensions of data objects.
dim(dfx)


## Keep panelists that have been active in wave gf.
dfx %>% filter(activegf==1) -> dfx


## Declare missing values NA. And, be carfull, this has to be done after
## defining certain flag variables!
dfx[dfx < 0] <- NA


## The following variables have additional, non-negative "I don’t know" codes.
dfx %>% mutate_at(vars(hzcy044a:hzcy052a), ~na_if(., 98)) -> dfx




## -------------------------------------------------------------------------- ##
## +++ Create or recode variables                                             ##
## -------------------------------------------------------------------------- ##



## -++ Survey methodological variables, e.g., pariticipation etc.             ##
## -------------------------------------------------------------------------- ##


## Indicator variable for participation in special survey.
## All panelists participated in wave hz (basis: invited hz)
dfx %>% mutate(parthz = case_when(as.numeric(hzzp201a) %in% c(31,32) ~ 1,
                                  as.numeric(hzza001a) == 1  ~ 0,
                                  TRUE ~ NA_real_)) -> dfx


## All panelists participated in wave hz (basis: active gf).
dfx %>% mutate(parthzgf = case_when(activegf == 1 & is.na(parthz) ~ 0,
                                    TRUE ~ parthz)) -> dfx


## Generate categorical education variable. 
dfx %>% mutate(education_cat = case_when(education %in% c(1:4,6,10) ~ 1,
                                         education %in% c(5,7) ~ 2,
                                         education %in% c(8,9) ~ 3,
                                         TRUE ~ NA_real_)) -> dfx

## Brief check.
table(dfx$parthz)
table(dfx$parthzgf)
table(dfx$education_cat)



## -++ Substantial, sociological and psychological variables                  ##
## -------------------------------------------------------------------------- ##


## Marital status.
dfx$marstat <- as_factor(dfx$marstat)


## Female.
dfx$female <- dfx$sex == 2


## Inverted: "gdze001a", "gdze003a", "gdze005a", "gdze007a", "gdze009a"
## Big Five: being reserved (inverted!)
dfx$bfi_reserved <- dichotomiz_variable(dfx$gdze001a)
table(dfx$bfi_reserved)
table(dfx$gdze001a)


## Big five: easily trust people.
dfx$bfi_trustpeople  <-  dichotomiz_variable(dfx$gdze002a)
table(dfx$bfi_trustpeople)
table(dfx$gdze002a)


## Big five: rather lazy.
dfx$bfi_lazy  <-  dichotomiz_variable(dfx$gdze003a)
table(dfx$bfi_lazy)
table(dfx$gdze003a)


## Big five: relaxed.
dfx$bfi_relaxed  <-  dichotomiz_variable(dfx$gdze004a)
table(dfx$bfi_relaxed)
table(dfx$gdze004a)


## Big five: only little artistic interest.
dfx$bfi_noartist  <-  dichotomiz_variable(dfx$gdze005a)
table(dfx$bfi_noartist)
table(dfx$gdze005a)


## Big five: communicative and sociable.
dfx$bfi_sociable  <-  dichotomiz_variable(dfx$gdze006a)
table(dfx$bfi_sociable)
table(dfx$gdze006a)


## Big five: tend to criticize others.
dfx$bfi_criticize  <-  dichotomiz_variable(dfx$gdze007a)
table(dfx$bfi_criticize)
table(dfx$gdze007a)


## Big five: accomplish tasks thoroughly.
dfx$bfi_thorough  <-  dichotomiz_variable(dfx$gdze008a)
table(dfx$bfi_thorough)
table(dfx$gdze008a)


## Big five: easily become nervous.
dfx$bfi_nervous  <-  dichotomiz_variable(dfx$gdze009a)
table(dfx$bfi_nervous)
table(dfx$gdze009a)


## Big five: have active imagination.
dfx$bfi_image  <-  dichotomiz_variable(dfx$gdze010a)
table(dfx$bfi_image)
table(dfx$gdze010a)




## -------------------------------------------------------------------------- ##
## +++ Run supplementary regression models                                    ##
## -------------------------------------------------------------------------- ##

source("7751_supplement_regressions.R")




## -------------------------------------------------------------------------- ##
## +++ Create dotplot (Figure 2)                                              ##
## -------------------------------------------------------------------------- ##


measures_labels <- c("Measures taken: Avoided places",                     
                     "Measures taken: Kept distance",                      
                     "Measures taken: Washed hands more often",            
                     "Measures taken: Reduced social interactions")        


## Store results for online participation (to be printed at the bottom of the
## figure).
online_lazy <- calc_ci_prop(with(dfx, table(parthzgf, bfi_lazy)))
online_trustpeople <- calc_ci_prop(with(dfx, table(parthzgf, bfi_trustpeople)))
dfx_online <- data.frame(rbind(online_lazy, online_trustpeople))
dfx_online <- round(dfx_online, 2)
dfx_online$bfi <- c("I see myself as someone\nwho tends to be lazy",
                    "I see myself as someone\nwho is generally trusting")
dfx_online


## Calculate 2 x 2 tables for risk-minimizing measures by two of the BFI-10
## items.
## Item: "I see myself as someone who tends to be lazy".
places_lazy <- calc_ci_prop(with(dfx, table(hzcy006a, bfi_lazy)))
distance_lazy <- calc_ci_prop(with(dfx, table(hzcy007a, bfi_lazy)))
work_lazy <- calc_ci_prop(with(dfx, table(hzcy008a, bfi_lazy)))
hands_lazy <- calc_ci_prop(with(dfx, table(hzcy011a, bfi_lazy)))
contacts_lazy <- calc_ci_prop(with(dfx, table(hzcy014a, bfi_lazy)))
## Item: "I see myself as someone who is generally trusting".
places_trustpeople <- calc_ci_prop(with(dfx, table(hzcy006a, bfi_trustpeople)))
distance_trustpeople <- calc_ci_prop(with(dfx, table(hzcy007a, bfi_trustpeople)))
work_trustpeople <- calc_ci_prop(with(dfx, table(hzcy008a, bfi_trustpeople)))
hands_trustpeople <- calc_ci_prop(with(dfx, table(hzcy011a, bfi_trustpeople)))
contacts_trustpeople <- calc_ci_prop(with(dfx, table(hzcy014a, bfi_trustpeople)))


## Put everything in a data frame for plotting. 
dfx_plot <- as.data.frame(rbind(places_lazy,
                                distance_lazy,
                                hands_lazy,
                                contacts_lazy,
                                places_trustpeople,
                                distance_trustpeople,
                                hands_trustpeople,
                                contacts_trustpeople))
dfx_plot$bfi <- c(rep("I see myself as someone\nwho tends to be lazy", 4),
                  rep("I see myself as someone\nwho is generally trusting",
                      4))
dfx_plot$measures <- c(measures_labels, measures_labels)
dfx_plot
## Remove manually results without effect on online participation AND
## risk-minimizing measures.
dfx_plot$delta.TRUE[c(5, 6)] <- NA
dfx_plot$ll[c(5, 6)] <- NA
dfx_plot$ul[c(5, 6)] <- NA


## Create dot plot.
ggplot(aes(x = delta.TRUE, y = measures), data = dfx_plot) +
    geom_point() + geom_errorbarh(aes(xmin = ll, xmax = ul, height = .2)) +
    geom_vline(xintercept = 0, linetype = 2) + 
    facet_wrap(vars(bfi)) +
    labs(y = "", x = "Difference in percentage points") + xlim(-11,10) + 
    geom_text(aes(x = -0.5, y = 0.5,
                  label = paste0("Difference in online participation: ",
                                delta.TRUE, " [",ll, ", ", ul, "]")),
              data = dfx_online, size = 6.7, color = "black") +
    geom_hline(yintercept = .5, color = "gray", alpha = 0.5, size=14.5) +
    theme_bw() + 
    theme(text = element_text(size = 30), legend.position = "top")


## Save plot. 
ggsave(file.path(figpath, "f_deltasplot.pdf"), width = 20, height = 7.5)


################################################################################
#--- 7751_analyses.R ends here
