---
title: Experimental Evidence on Panel Conditioning Effects when Increasing the Surveying
  Frequency in a Probability-Based Online Panel
author: ' '
date: "`r format(Sys.time(), '%d %B %Y')`"
output:
  pdf_document: 
    number_sections: yes
    fig_caption: yes
    keep_tex: yes
  word_document: default
header-includes: \usepackage{dcolumn}
keywords: panel conditioning, probability-based online panel, surveying frequency,
  large-scale experiment
bibliography: Conditioning.bib
bib-latex-options: style=apa
link-citations: yes
fig_caption: yes
nocite: |
  @blom21e, @blom21, @blom21a, @blom21b, @blom21c, @blom21d, @blom21g, @blom21h, @blom21i, @blom21j, @blom21k
---

**Abstract**

We investigate panel conditioning effects in a long-running probability-based online panel of the general population through a large-scale experiment conducted in 2020. Our experiment was specifically designed to study the effect of intensifying the surveying frequency for the treatment group (N = 5,598 panel members) during a 16-week corona study while keeping the control group (N = 799 panel members) at the usual bi-monthly surveying frequency. While many panel surveys have conducted add-on studies with increased surveying frequency during the COVID-19 pandemic, ours is among the very few which can provide experimental evidence regarding the potential impact of the add-on study on the underlying panel data infrastructure.Our results show that panel conditioning is only a minor issue when increasing surveying frequency and only matters for survey questions directly related to the corona study. It, therefore, provides re-assurance to researchers who have implemented or are planning to implement similar special-topic add-on studies.


```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

## Load (, if necessary install packages,) and set working directory -----------

## MLS: Check all packages in the end whether we use 'em or not

## Clear working memory
rm(list = ls())

## Install packages if not installed
# install.packages('pacman')
library(pacman)
p_load(
    "haven",
    "data.table",
    "ggplot2",
    "tidyverse",
    "kableExtra",
    "stargazer",
    "htmltools",
    "digest",
    "rvest",
    "texreg",
    "dotwhisker",
    "tinytex",
    "stringr",
    "checkpoint",
    "magrittr",
    "viridis",
    "ggthemes",
    "readxl",
    "broom",
    "tidyr",
    "lubridate",
    "lme4",
    "labelled",
    "ggpubr",
    "MASS"
 )


## Main Data -------------------------------------------------------------------

data <- read_dta("X:/team/Projekte/MCS_Conditioning/replications_w43-w56.dta") # change to your data path
data <- remove_var_label(data)


```

```{r Define functions, include = FALSE}

sim_function <- function(lm_obj, nsim = 1000, scenario){
  
  # Step 1: Get the regression coefficients
  beta_hat <- coef(lm_obj)
  
  # Step 2: Generate sampling distribution
  
  # Step 2.1: Get the variance-covariance matrix.
  V_hat <-  vcov(lm_obj) 
  
  # Step 2.2: Draw from the multivariate normal distribution.
  S <- mvrnorm(nsim, beta_hat, V_hat)
  
  # Step 3: Choose interesting covariate values. 
  # Make sure the matrix multiplication also works for single scenarios
  if(is.null(nrow(scenario))){
    scenario <- matrix(scenario, nrow = 1)
  }
  
  # Print a message if the scenario does not fit the regression.
  if(ncol(scenario) != length(lm_obj$coefficients)){
    return(cat("The scenario has the wrong number of variables."))
  } 
  
  # Step 4: Calculate Quantities of Interest - 
  # Expected Values
  EV <- S %*% t(scenario)
   
  c(mean = mean(EV),
    quants = quantile(EV, probs = c(0.025, 0.975)))
}



```


```{r Data processing, include = FALSE}

## with additional weight for probability of being included in sample by recruitment year

N2018 <- 83019213 * 0.836
N2014 <- 81197537 * 0.839
N2012 <- 80523746 * 0.837

n2018 <- 13050
n2014 <- 9316*1.77
n2012 <- 4878*1.78 

pi2018 <- n2018/N2018
pi2014 <- n2014/N2014
pi2012 <- n2012/N2012


data$weight[data$sample==3] <- 1/((pi2018+pi2014+pi2012)-(pi2014*pi2012)-(pi2018*pi2012)-(pi2018*pi2014)+(pi2018*pi2014*pi2012))
data$weight[data$sample==2] <- 1/((pi2018+pi2014)-(pi2014*pi2018))
data$weight[data$sample==1] <- 1/pi2018


df <-
  data %>% mutate(
    # Two treatment vars
    T1 = ifelse(invit_mcs == 1, 1, 0),   # Treatment 1: Invitation MCS
    T2 = ifelse(invit_mcs == 1 &
                  compl_mcs == 1, 1, 0), # Treatment 2: Actual participation MCS
    # Demographics
    id = id_g,
    citizenship = ifelse(AA43307 == 1, 1, ifelse(AA43307 == 2, 1, 0)),
    marital = factor(ifelse(AA43004==5, 'Single', ifelse(AA43004==1|AA43004==2|AA43004==6|AA43004==7,'Married', ifelse(AA43004==3|AA43004==4|AA43004==9, 'Other', NA)))),
    hhsize = factor(ifelse(AA43057==1|AA43057==2,AA43057, ifelse(AA43057==3|AA43057==4|AA43057==5,'3+',NA))),
    age = AA43413a,
    female = ifelse(AA43414a==-98, NA,
                    ((AA43414a - 2) * -1)),
    edu = as.factor(ifelse(
      AA43303 == 2 |AA43303 == 3, 1,
      ifelse(AA43303 == 1 | AA43303 == 4 | AA43303 == 5, 2, 
      ifelse(AA43303 == 6, 3, NA)))
    ),
    # Employment vars: Numbers indicate survey wave
    fulltime43=ifelse(AA43451 == 1, 1, 0),
    fulltime45=ifelse(AA45005 == 1, 1, 0),
    fulltime49=ifelse(AA49451 == 1, 1, 0),
    fulltime51=ifelse(AA51005 == 1, 1, 0),
    fulltime52=ifelse(AA52005 == 1, 1, 0),
    fulltime54=ifelse(AA54005 == 1, 1, 0),
    fulltime55=ifelse(AA55451 == 1, 1, 0),
    
    parttime43=ifelse(AA43451 == 2, 1, 0),
    parttime45=ifelse(AA45005 == 2, 1, 0),
    parttime49=ifelse(AA49451 == 2, 1, 0),
    parttime51=ifelse(AA51005 == 2, 1, 0),
    parttime52=ifelse(AA52005 == 2, 1, 0),
    parttime54=ifelse(AA54005 == 2, 1, 0),
    parttime55=ifelse(AA55451 == 2, 1, 0),
    
    unemp43=ifelse(AA43451 == 15, 1, 0),
    unemp45=ifelse(AA45005 == 15, 1, 0),
    unemp49=ifelse(AA49451 == 15, 1, 0),
    unemp51=ifelse(AA51005 == 15, 1, 0),
    unemp52=ifelse(AA52005 == 15, 1, 0),
    unemp54=ifelse(AA54005 == 15, 1, 0),
    unemp55=ifelse(AA55451 == 15, 1, 0),

    ## Substantive vars: Numbers indicate survey wave
    # Attitudes Covid-19
    tradeoff50 = ifelse(ZJ50053>0,((ZJ50053-8)*-1),NA), # Measures' benefit ..
    tradeoff52 = ifelse(ZJ52053>0,((ZJ52053-8)*-1),NA), # .. greater than harm
    tradeoff54 = ifelse(ZJ54053>0,((ZJ54053-8)*-1),NA), 
    threat50   = ifelse(ZJ50054>=0,(ZJ50054+1), NA),    # Personal Threat
    fear51     = ifelse(ZJ51057>0,ZJ51057,NA),          # Fear
    nervous51  = ifelse(ZJ51056>0,ZJ51056,NA),          # Nervousness
    govhc45    = ifelse(AC45050>=0,AC45050,NA),         # Gvt responsibility healthcare
    govhc51    = ifelse(AC51050>=0,AC51050,NA),
    govexp_hc45 = ifelse(AC45051>=0, ((AC45051-6)*-1), NA),
    govexp_hc51 = ifelse(AC51051>=0, ((AC51051-6)*-1), NA),
    health_perc45 = ifelse(AC45080>=0, ((AC45080-6)*-1), NA),
    health_perc51 = ifelse(AC51080>=0, ((AC51080-6)*-1), NA),
    govhc55    = ifelse(AC55050>=0,AC55050,NA),
    hoffice45  = ifelse(AC45220>0,((AC45220-5)*-1),NA), # Support home office
    hoffice55  = ifelse(AC55220>0,((AC55220-5)*-1),NA),
    satgovfd55 = ifelse(CE55499>0, CE55499, NA),
    prop_inf50 = ifelse(ZJ50055>=0, ZJ50055, NA), # Prob. infection pers.
    
    comp_fedgov55 = ifelse(CE55501>=0, CE55501, NA),
    comp_govstates55 = ifelse(CE55502>=0, CE55502, NA),
    comp_parl55 = ifelse(CE55503>=0, CE55503, NA),
    
    inf_gov55 = ifelse(CE55518>=0, CE55518, NA),
    inf_parl55 = ifelse(CE55519>=0, CE55519, NA),
    inf_parties55 = ifelse(CE55520>=0, CE55520, NA),
    inf_rki55 = ifelse(CE55521>=0, CE55521, NA),
    inf_tv55 = ifelse(CE55522>=0, CE55522, NA),
    inf_print55 = ifelse(CE55523>=0, CE55523, NA),
    inf_online55 = ifelse(CE55524>=0, CE55524, NA),
    inf_science55 = ifelse(CE55525>=0, CE55525, NA),
    
    free_speech_pand55 = ifelse(BI55003>=0, ((BI55003-2)*-1), NA),
    free_speech55 = ifelse(BI55004>=0, ((BI55004-2)*-1), NA),

    # Behaviour Covid-19
    appinstalled49 = ifelse(ZJ49051==1,2,  # App currently installed
                     ifelse(ZJ49051==2,0,  # App not installed
                     ifelse(ZJ49051==3,1,  # App previously installed & deleted
                     ifelse(ZJ49051==4,NA,NA)))), 
    appusage49 = ((ZJ49052-6)*-1),         # App usage
    homeoffice49 = ifelse(AA49468>=0,AA49468,NA),
    homeoffice51 = ifelse(AA51468>=0,AA51468,NA),
    homeoffice52 = ifelse(AA52468>=0,AA52468,NA),
    homeoffice54 = ifelse(AA54468>=0,AA54468,NA),
    covid_inf49 = ifelse(ZJ49050>=0, ((ZJ49050-5)*-1), NA), # Unconfirmed Covid infection
    covid_inf53 = ifelse(ZJ53065>=0, ((ZJ53065-5)*-1), NA),
    
    
    # Attitudes General
    satdem50 = ifelse(ZJ50032>0, ZJ50032, NA),             # Satisfaction w/ democracy
    concern_privacy44 = ifelse(AM44004>=0, AM44004, NA),   # Concerned abt privacy
    concern_privacy54 = ifelse(AM54004>=0, AM54004, NA),   
    attitude1_privacy44 = ifelse(AM44012>=0, ((AM44012-6)*-1), NA), # No problem to share data
    attitude1_privacy54 = ifelse(AM54012>=0, ((AM54012-6)*-1), NA),
    attitude2_privacy44 = ifelse(AM44013>=0, ((AM44013-6)*-1), NA), # Not possible not to share data
    attitude2_privacy54 = ifelse(AM54013>=0, ((AM54013-6)*-1), NA), 
    attitude3_privacy44 = ifelse(AM44014>=0, ((AM44014-6)*-1), NA), # Don't know who will see data
    attitude3_privacy54 = ifelse(AM54014>=0, ((AM54014-6)*-1), NA), 
    attitude4_privacy44 = ifelse(AM44015>=0, ((AM44015-6)*-1), NA), # No problem to exchange data for service
    attitude4_privacy54 = ifelse(AM54015>=0, ((AM54015-6)*-1), NA), 
    fff_demo44 = ifelse(ZJ44038>=0, ((ZJ44038-2)*-1), NA), # Participated FfF demo
    fff_demo48 = ifelse(BG48024_c>=0, BG48024_c, NA),
    controldata44 = ifelse(AM44016>0,((AM44016-2)*-1),NA), # Feel to have control over data
    controldata54 = ifelse(AM54016>0,((AM54016-2)*-1),NA), 
    
    govc45 =  ifelse(AC45142>=0, AC45142, NA), # Gov. Resp. Care (45)
    govexp_c45 = ifelse(AC45144>=0, ((AC45144-6)*-1), NA), # Gov. Expenses Care (45)
    govpen45 = ifelse(AC45056>=0, AC45056, NA), # Gov. Resp. Pension (45)
    govexp_pen45 = ifelse(AC45058>=0, ((AC45058-6)*-1), NA), # Gov. Expenses Pension (45)
    retire_age45= ifelse(AC45088>=0, AC45088, NA), # Retirement age opinion (45)
    govunemp45 = ifelse(AC45065>=0, AC45065, NA), # Gov. resp. unemployment (45)
    govexp_unemp45 = ifelse(AC45067>=0, ((AC45067-6)*-1), NA), # Gov. exp unemployment (45)
    govc51 =  ifelse(AC51142>=0, AC51142, NA), # Gov. Resp. Care (51)
    govexp_c51 = ifelse(AC51144>=0, ((AC51144-6)*-1), NA), # Gov. Expenses Care (51)
    govpen51 = ifelse(AC51056>=0, AC51056, NA), # Gov. Resp. Pension (51)
    govpen55 = ifelse(AC55056>=0, AC55056, NA), # Gov. Resp. Pension (55)
    govexp_pen51 = ifelse(AC51058>=0, ((AC51058-6)*-1), NA), # Gov. Expenses Pension (51)
    retire_age51= ifelse(AC51088>=0, AC51088, NA), # Retirement age opinion (51)
    govunemp51 = ifelse(AC51065>=0, AC51065, NA), # Gov. resp. unemployment (51)
    govunemp55 = ifelse(AC55065>=0, AC55065, NA), # Gov. resp. unemployment (55)
    govexp_unemp51 = ifelse(AC51067>=0, ((AC51067-6)*-1), NA), # Gov. exp unemployment (51)
    govfam45 = ifelse(AC45202>=0, AC45202, NA), # Gov. resp. family (45)
    govexp_fam45 = ifelse(AC45207>=0, ((AC45207-6)*-1), NA), # Gov. exp family (45)
    gov_policy45 = ifelse(AC45183==3, 0, ifelse(AC45183>=0, AC45183, NA)), # Fam policy (45)
    govfam51 = ifelse(AC51202>=0, AC51202, NA), # Gov. resp. family (51)
    govfam55 = ifelse(AC55202>=0, AC55202, NA), # Gov. resp. family (55)
    govexp_fam51 = ifelse(AC51207>=0, ((AC51207-6)*-1), NA), # Gov. exp family (51)
    gov_policy51 = ifelse(AC51183==3, 0, ifelse(AC51183>=0, AC51183, NA)), # Fam policy (51)
    child_well45 = ifelse(AC45231>=0, AC45231, NA), # Opinion childcare wellbeing (45)
    child_well55 = ifelse(AC55231>=0, AC55231, NA), # Opinion childcare wellbeing (55)
    
    activity_high51 = ifelse(ZJ51063_1>=0, ZJ51063_1, NA), # Hours high activity past week
    activity_mid51 = ifelse(ZJ51063_3>=0, ZJ51063_3, NA),  # Hours mid activity past week
    activity_low51 = ifelse(ZJ51063_5>=0, ZJ51063_5, NA),  # Hours low activity past week

    screentime_51 = ifelse(ZJ51061_1>=0, ZJ51061_1, NA),
    snacking51 = ifelse(ZJ51062>=0, ZJ51062, NA), # Snacking
    depression1_51 = ifelse(ZJ51058>=0, ZJ51058, NA), # Depression
    depression2_51 = ifelse(ZJ51059>=0, ZJ51059, NA), # Depression 2
    lonely51 = ifelse(ZJ51060>=0, ZJ51060, NA),  # Loneliness
    
    support_taxev53 = ifelse(CF53015>=0,((CF53015-7)*-1), NA),
    eu_opinion54 = ifelse(CE54145>=0, CE54145, NA),
    eu_authority53 = ifelse(RM53019>=0, ((RM53019-6)*-1), NA),
    reduce_ineq53 = ifelse(RM53016>=0, ((RM53016-6)*-1), NA),
    
    own_mobile48 = ifelse(AJ48030>=0, ((AJ48030-2)*-1), NA),
    own_computer48 = ifelse(AJ48031>=0, ((AJ48031-2)*-1), NA),
    use_fb48 = ifelse(AJ48038>=0, ((AJ48038-2)*-1), NA),
    use_insta48 = ifelse(AJ48039>=0, ((AJ48039-2)*-1), NA),
    use_linkedin48 = ifelse(AJ48040>=0, ((AJ48040-2)*-1), NA),
    use_twitter48 = ifelse(AJ48041>=0, ((AJ48041-2)*-1), NA),
    use_xing48 = ifelse(AJ48042>=0, ((AJ48042-2)*-1), NA),
    
    own_mobile52 = ifelse(AJ52030>=0, ((AJ52030-2)*-1), NA),
    own_computer52 = ifelse(AJ52031>=0, ((AJ52031-2)*-1), NA),
    use_fb52 = ifelse(AJ52038>=0, ((AJ52038-2)*-1), NA),
    use_insta52 = ifelse(AJ52039>=0, ((AJ52039-2)*-1), NA),
    use_linkedin52 = ifelse(AJ52040>=0, ((AJ52040-2)*-1), NA),
    use_twitter52 = ifelse(AJ52041>=0, ((AJ52041-2)*-1), NA),
    use_xing52 = ifelse(AJ52042>=0, ((AJ52042-2)*-1), NA),

    own_mobile54 = ifelse(AJ54126_b>=0, AJ54126_b, NA),
    own_computer54 = ifelse(AJ54126_c>=0, AJ54126_c, NA),

    trust_parl50 = ifelse(CE50451>=0, CE50451, NA),
    trust_parl55 = ifelse(CE55506>=0, CE55506, NA),
    
    labor_immig51 = ifelse(CF51094>=0, ((CF51094-4)*-1), NA),
    labor_immig52 = ifelse(CF52094>=0, ((CF52094-4)*-1), NA),
    
    cred_mig_stud51 = ifelse(CF51096>=0, ((CF51096-6)*-1), NA),
    cred_mig_stud52 = ifelse(CF52096>=0, ((CF52096-6)*-1), NA),
    
    trust_gov45 = ifelse(AK45034>=0, AK45034, NA),
    trust_gov55 = ifelse(CE55504>=0, CE55504, NA),
    
    trust_court45 = ifelse(AK45031>=0, AK45031, NA),

    trust_local55 = ifelse(CE55505>=0, CE55505, NA),
    trust_parties55 = ifelse(CE55507>=0, CE55507, NA),
    trust_ec55 = ifelse(CE55508>=0, CE55508, NA),
    trust_ep55 = ifelse(CE55509>=0, CE55509, NA),
    trust_tv55 = ifelse(CE55510>=0, CE55510, NA),
    trust_print55 = ifelse(CE55511>=0, CE55511, NA),
    trust_online55 = ifelse(CE55512>=0, CE55512, NA),
    trust_edu55 = ifelse(CE55513>=0, CE55513, NA),
    trust_police55 = ifelse(CE55514>=0, CE55514, NA),
    trust_health55 = ifelse(CE55515>=0, CE55515, NA),
    trust_court55 = ifelse(CE55516>=0, CE55516, NA),
    trust_jud55 = ifelse(CE55517>=0, CE55517, NA),
    
    
    satfam43 = ifelse(AA43023>=0,(AA43023+1),NA),          # Satisfaction w/ family
    satfam49 = ifelse(AA49023>=0,(AA49023+1),NA),
    satfam52 = ifelse(AA52023>=0,(AA52023+1),NA),
    satfam54 = ifelse(AA54023>=0,(AA54023+1),NA),
    satfam55 = ifelse(AA55023>=0,(AA55023+1),NA),
    satfin43 = ifelse(AA43025>=0,(AA43025+1),NA),          # Satisfaction w/ financial situation
    satfin49 = ifelse(AA49025>=0,(AA49025+1),NA),
    
    # Behaviour General
    usecomputer49 =  AA49429, # Use computer for other than GIP study
    work49 = ifelse(AA49466>=0,AA49466,NA), 
    work54 = ifelse(AA54466>=0,AA54466,NA), 
    work55 = ifelse(AA55466>=0,AA55466,NA)
  ) %>% 
  dplyr::select(
    # Two treatment vars
    T1,
    T2,
    
    # Demographics
    id,
    weight,
    sample,
    citizenship,
    marital,
    hhsize,
    age,
    female,
    edu,
    fulltime43:unemp55,
    dropout,
    
    tradeoff50,
    tradeoff52,
    tradeoff54,
    threat50,
    fear51,
    nervous51,
    govhc45,
    govhc51,
    govexp_hc45,
    govexp_hc51,
    health_perc45,
    health_perc51,
    govhc55,
    hoffice45,
    hoffice55,
    satgovfd55,
    prop_inf50,
    
    comp_fedgov55,
    comp_govstates55,
    comp_parl55,
    
    inf_gov55,
    inf_parl55,
    inf_parties55,
    inf_rki55,
    inf_tv55,
    inf_print55,
    inf_online55,
    inf_science55,
    
    free_speech_pand55,
    free_speech55,

    # Behaviour Covid-19
    appinstalled49, 
    appusage49,
    homeoffice49,
    homeoffice51,
    homeoffice52,
    homeoffice54,
    covid_inf49,
    covid_inf53,
    
    
    # Attitudes General
    satdem50,
    concern_privacy44,
    concern_privacy54, 
    attitude1_privacy44,
    attitude1_privacy54,
    attitude2_privacy44,
    attitude2_privacy54,
    attitude3_privacy44,
    attitude3_privacy54,
    attitude4_privacy44,
    attitude4_privacy54,
    fff_demo44,
    fff_demo48,
    controldata44,
    controldata54,
    
    govc45,
    govexp_c45,
    govpen45,
    govexp_pen45,
    retire_age45,
    govunemp45,
    govexp_unemp45,
    govc51,
    govexp_c51,
    govpen51,
    govpen55,
    govexp_pen51,
    retire_age51,
    govunemp51,
    govunemp55,
    govexp_unemp51,
    govfam45,
    govexp_fam45,
    gov_policy45,
    govfam51,
    govfam55,
    govexp_fam51,
    gov_policy51,
    child_well45,
    child_well55,
    
    activity_high51,
    activity_mid51,
    activity_low51,

    screentime_51,
    snacking51,
    depression1_51,
    depression2_51,
    lonely51,
    
    support_taxev53,
    eu_opinion54,
    eu_authority53,
    reduce_ineq53,
    
    own_mobile48,
    own_computer48,
    use_fb48,
    use_insta48,
    use_linkedin48,
    use_twitter48,
    use_xing48,
    
    own_mobile52,
    own_computer52,
    use_fb52,
    use_insta52,
    use_linkedin52,
    use_twitter52,
    use_xing52,

    own_mobile54,
    own_computer54,

    trust_parl50,
    trust_parl55,
    
    labor_immig51,
    labor_immig52,
    
    cred_mig_stud51,
    cred_mig_stud52,
    
    trust_gov45,
    trust_gov55,
    
    trust_court45,

    trust_local55,
    trust_parties55,
    trust_ec55,
    trust_ep55,
    trust_tv55,
    trust_print55,
    trust_online55,
    trust_edu55,
    trust_police55,
    trust_health55,
    trust_court55,
    trust_jud55,
    satfam43,
    satfam49,
    satfam52,
    satfam54,
    satfam55,
    satfin43,
    satfin49,
    
    # Behaviour General
    usecomputer49,
    work49,
    work54,
    work55
  )



```


```{r Basic descriptives, include = FALSE}

## Check for duplicates in the data
length(unique(df$id)) == nrow(data) # returns TRUE if no duplicates exist in the df

## How many respondents in treatment and control group?
table(df$T1) # 799 in control group, 5598 in treatment group
table(df$T2) # 2010 in control group, 4387 in treatment group
table(df$T1, df$T2) # Of those invited, 1211 people did not participate

```


```{r Random Assignment Checks, include = FALSE}

# Can we predict assignment to either treatment on the basis of demographics? 
# Or is assignment random (which we obvs prefer ;) )?

r1 <- glm(T1 ~ female + age + hhsize + marital + edu + citizenship, data = df, family = "binomial")
summary(r1)

r2 <- glm(T2 ~ female + age + hhsize + marital + edu + citizenship, data = df, family = "binomial")
summary(r2)

```




```{r Estimate Conditioning Models, include = FALSE}

dvs <- c(
    # Attitudes Covid-19
    "tradeoff50",
    "tradeoff52",
    "tradeoff54",
    "threat50",
    "fear51",
    "nervous51",
    "govhc51",
    "govexp_hc51",
    "health_perc51",
    "govhc55",
    "hoffice55",
    "satgovfd55",
    "prop_inf50",
    
    "comp_fedgov55",
    "comp_govstates55",
    "comp_parl55",
    
    "inf_gov55",
    "inf_parl55",
    "inf_parties55",
    "inf_rki55",
    "inf_tv55",
    "inf_print55",
    "inf_online55",
    "inf_science55",
    
    "free_speech_pand55",
    "free_speech55",

    # Behaviour Covid-19
    "appinstalled49", 
    "appusage49",
    "homeoffice49",
    "homeoffice51",
    "homeoffice52",
    "homeoffice54",
    "covid_inf49",
    "covid_inf53",
    
    
    # Attitudes General
    "satdem50",
    "concern_privacy54", 
    "attitude1_privacy54",
    "attitude2_privacy54",
    "attitude3_privacy54",
    "attitude4_privacy54",
    "fff_demo48",
    "controldata54",

    "govc51",
    "govexp_c51",
    "govpen51",
    "govpen55",
    "govexp_pen51",
    "retire_age51",
    "govunemp51",
    "govunemp55",
    "govexp_unemp51",
    "govfam51",
    "govfam55",
    "govexp_fam51",
    "gov_policy51",
    "child_well55",
    
    "activity_high51",
    "activity_mid51",
    "activity_low51",

    "screentime_51",
    "snacking51",
    "depression1_51",
    "depression2_51",
    "lonely51",
    
    "support_taxev53",
    "eu_opinion54",
    "eu_authority53",
    "reduce_ineq53",
    
    "own_mobile48",
    "own_computer48",
    "use_fb48",
    "use_insta48",
    "use_linkedin48",
    "use_twitter48",
    "use_xing48",
    
    "own_mobile52",
    "own_computer52",
    "use_fb52",
    "use_insta52",
    "use_linkedin52",
    "use_twitter52",
    "use_xing52",

    "own_mobile54",
    "own_computer54",

    "trust_parl50",
    "trust_parl55",
    
    "labor_immig51",
    "labor_immig52",
    
    "cred_mig_stud51",
    "cred_mig_stud52",
    
    "trust_gov55",
    

    "trust_local55",
    "trust_parties55",
    "trust_ec55",
    "trust_ep55",
    "trust_tv55",
    "trust_print55",
    "trust_online55",
    "trust_edu55",
    "trust_police55",
    "trust_health55",
    "trust_court55",
    "trust_jud55",
    "satfam49",
    "satfam52",
    "satfam54",
    "satfam55",
    "satfin49",
    
    # Behaviour General
    "usecomputer49",
    "work49",
    "work54",
    "work55"
    )

dv_names <- c(    
    "COVID-measure damage v benefits (50)",
    "COVID-measure damage v benefits (52)",
    "COVID-measure damage v benefit (54)",
    "Personal Covid threat (50)",
    "General COVID concern (51)",
    "Nervousness (51)",
    "Government healthcare (51)",
    "Gov. expenses healthcare (51)",
    "Perceived personal health (51)",
    "Gov. responsibility healthcare (55)",
    "Work from home laws (55)",
    "Satisfaction fed. gvt. (55)",
    "COVID infection probability (50)",
    
    "Federal government in pandemic (55)",
    "State government in pandemic (55)",
    "More competencies parliament (55)",
    
    "Informed by gvt. (55)",
    "Informed by parliament (55)",
    "Informed by parties (55)",
    "Informed by RKI (55)",
    "Informed by TV (55)",
    "Informed by print (55)",
    "Informed by internet (55)",
    "Informed by scientists (55)",
    
    "Perceived freedom of speech: pandemic (55)",
    "Perceived freedom of speech: general (55)",

    # Behaviour Covid-19
    "Contact tracing app installed (49)", 
    "COVID tracing app use (49)",
    "Worked from home (49)",
    "Worked from home (51)",
    "Worked from home (52)",
    "Worked from home (54)",
    "Previous Covid-19 infection (49)",
    "Previous Covid-19 infection (53)",
    
    
    # Attitudes General
    "Satisfaction with Democracy (50)",
    "Data privacy concerns (54)", 
    "Data sharing attitude 1 (54)",
    "Data sharing attitude 2 (54)",
    "Data sharing attitude 3 (54)",
    "Data sharing attitude 4 (54)",
    "Participated in Fridays for Future protest (48)",
    "Feeling of control over personal data (54)",
    
    "Gov. responsibility care (51)",
    "Gov. expenses care (51)",
    "Gov. responsibility pensions (51)",
    "govpen55",
    "govexp_pen51",
    "retire_age51",
    "Government care unemployed (51)",
    "Government care unemployed (55)",
    "govexp_unemp51",
    "govfam51",
    "govfam55",
    "govexp_fam51",
    "gov_policy51",
    "child_well55",
    
    "activity_high51",
    "Medium activity level (51)",
    "activity_low51",

    "screentime_51",
    "Snacking (51)",
    "Feeling depressed (51)",
    "Feeling depressed (51)", #2nd depression item
    "Feelings of loneliness (51)",
    
    "Tax evasion support (53)",
    "European integration (54)",
    "eu_authority53",
    "reduce_ineq53",
    
    "Owning a mobile phone (48)",
    "Owning a computer (48)",
    "use_fb48",
    "use_insta48",
    "use_linkedin48",
    "use_twitter48",
    "use_xing48",
    
    "Owning a mobile phone (52)",
    "Owning a computer (52)",
    "use_fb52",
    "use_insta52",
    "use_linkedin52",
    "use_twitter52",
    "use_xing52",

    "own_mobile54",
    "own_computer54",

    "trust_parl50",
    "trust_parl55",
    
    "labor_immig51",
    "labor_immig52",
    
    "cred_mig_stud51",
    "cred_mig_stud52",
    
    "trust_gov55",
    

    "Trust local government (55)",
    "trust_parties55",
    "trust_ec55",
    "Trust European Parliament (55)",
    "trust_tv55",
    "trust_print55",
    "Trust social media (55)",
    "trust_edu55",
    "trust_police55",
    "trust_health55",
    "trust_court55",
    "trust_jud55",
    "satfam49",
    "satfam52",
    "satfam54",
    "satfam55",
    "satfin49",
    
    # Behaviour General
    "Computer Usage (49)",
    "Short-time work (49)",
    "Short-time work (54)",
    "Short-time work (55)")

names(dv_names) <- dvs

covid_vars <- c(
    # Attitudes Covid-19
    "tradeoff50",
    "tradeoff52",
    "tradeoff54",
    "threat50",
    "fear51",
    "nervous51",
    "govhc51",
    "govhc55",
    "hoffice55",
    "satgovfd55",
    "prop_inf50",
    
    "comp_fedgov55",
    "comp_govstates55",
    "comp_parl55",
    
    "inf_gov55",
    "inf_parl55",
    "inf_parties55",
    "inf_rki55",
    "inf_tv55",
    "inf_print55",
    "inf_online55",
    "inf_science55",
    
    "free_speech_pand55",

    # Behaviour Covid-19
    "appinstalled49", 
    "appusage49",
    "homeoffice49",
    "homeoffice51",
    "homeoffice52",
    "homeoffice54",
    "covid_inf49",
    "covid_inf53",
    "satdem50",
    "activity_high51",
    "activity_mid51",
    "activity_low51",

    "screentime_51",
    "snacking51",
    "depression1_51",
    "depression2_51",
    "lonely51",
    
    "support_taxev53",
    "eu_opinion54",
    "eu_authority53",
    "reduce_ineq53",
    "satfam49",
    "satfam52",
    "satfam54",
    "satfam55",
    "work49",
    "work54",
    "work55"
    )

noncovid_vars <- c(
    # Attitudes General
    "free_speech55",
    "concern_privacy54", 
    "attitude1_privacy54",
    "attitude2_privacy54",
    "attitude3_privacy54",
    "attitude4_privacy54",
    "fff_demo48",
    "controldata54",
    
    "govc51",
    "govexp_c51",
    "govpen51",
    "govpen55",
    "govexp_pen51",
    "retire_age51",
    "govunemp51",
    "govunemp55",
    "govexp_unemp51",
    "govfam51",
    "govfam55",
    "govexp_fam51",
    "gov_policy51",
    "child_well55",
    
    
    "own_mobile48",
    "own_computer48",
    "use_fb48",
    "use_insta48",
    "use_linkedin48",
    "use_twitter48",
    "use_xing48",
    
    "own_mobile52",
    "own_computer52",
    "use_fb52",
    "use_insta52",
    "use_linkedin52",
    "use_twitter52",
    "use_xing52",

    "own_mobile54",
    "own_computer54",

    "trust_parl50",
    "trust_parl55",
    
    "labor_immig51",
    "labor_immig52",
    
    "cred_mig_stud51",
    "cred_mig_stud52",
    
    "trust_gov55",
    

    "trust_local55",
    "trust_parties55",
    "trust_ec55",
    "trust_ep55",
    "trust_tv55",
    "trust_print55",
    "trust_online55",
    "trust_edu55",
    "trust_police55",
    "trust_health55",
    "trust_court55",
    "trust_jud55",
    "satfin49",
    
    # Behaviour General
    "usecomputer49",
    "govexp_hc51", #?
    "health_perc51" #?
    )

subgroups <- c('T1', 'T2')


models <- list()
i <- 1
for(dv in dvs) {
    for(subgroup in subgroups) {
        
        if(subgroup == 'T1') {
          
        eq <- as.formula(paste(dv, 'T1', sep = '~')) 
        
        } else if(subgroup == 'T2') {
        
        eq <- as.formula(paste(dv, 'T2', sep = '~')) 
          
        } else{
            cat('Error!')
            stop()
        }
      

        print(eq)
        print(paste(subgroup))
        

        m <- lm(formula = eq, data = df, weights = weight)

        assign(x = paste('m', dv, subgroup, sep = '_'), value = m)
        
        models[[i]] <- m

    }
    i <- i + 1
}



```




```{r Coefplot Participation Treatment, echo=FALSE, message=FALSE, results='hide', warning=FALSE}

# Prep model results for plotting
    
    model_df <- data_frame()
    for(dv in dvs) {
        for(subgroup in subgroups) {
            model <- get(paste('m', dv, subgroup, sep = '_'))

            if(dv %in% covid_vars){
            bonferroni_val <-  (0.05/length(covid_vars))
            bonferroni_val_c <- bonferroni_val
            
            } else if(dv %in% noncovid_vars) {
            bonferroni_val <-  (0.05/length(noncovid_vars))
            bonferroni_val_nc <- bonferroni_val
            }
              else{
                cat('Error!')
                stop()
            }
             tmp <- tidy(model, conf.int = T, conf.level = .9) %>%
                filter(term == subgroup) %>%
                mutate(coefficient = term,
                       dv = dv,
                       term = factor(dv,
                                     levels = dvs,
                                     labels = dv_names),
                       subgroup = subgroup,
                       model = factor(subgroup,
                                      levels = c("T1", "T2"),
                                      labels = c("Invitation", "Participation")),
                       significant = p.value <=  0.05,
                       significant_bc = p.value <=  bonferroni_val) %>% # Correction: 0.05/31 = 0.001612903
                rename(ci90l = conf.low,
                       ci90h = conf.high)
            model_df <- bind_rows(model_df, tmp)
        }
    }
    rm(tmp)



# Visualise models in coefplot

new_coef_p <-
    model_df %>% filter(significant == T & model == "Participation") %>%
    mutate(term = factor(term, 
      levels = c("COVID−measure damage v benefits (50)",
"COVID−measure damage v benefits (52)",
"COVID−measure damage v benefit (54)",
"Personal Covid threat (50)",
"General COVID concern (51)",
"COVID infection probability (50)",
"State government in pandemic (55)",
"COVID tracing app use (49)",

"Government healthcare (51)",
"Work from home laws (55)",
"Medium activity level (51)",
"Snacking (51)",
"Feeling depressed (51)",
"Tax evasion support (53)",

"Government care unemployed (51)",
"Data privacy concerns (54)",
"Data sharing attitude 1 (54)",
"European integration (54)",
"Trust local government (55)",
"Trust European Parliament (55)",
"Trust social media (55)"
), 
      labels = c("COVID−measure damage v benefits (50)",
"COVID−measure damage v benefits (52)",
"COVID−measure damage v benefit (54)",
"Personal Covid threat (50)",
"General COVID concern (51)",
"COVID infection probability (50)",
"State government in pandemic (55)",
"COVID tracing app use (49)",

"Government healthcare (51)",
"Work from home laws (55)",
"Medium activity level (51)",
"Snacking (51)",
"Feeling depressed (51)",
"Tax evasion support (53)",

"Government care unemployed (51)",
"Data privacy concerns (54)",
"Data sharing attitude 1 (54)",
"European integration (54)",
"Trust local government (55)",
"Trust European Parliament (55)",
"Trust social media (55)"))) %>% 
  arrange(term) %>% 
    dwplot(
        .,
        vline = geom_vline(
            xintercept = 0,
            colour = "grey60",
            linetype = 2
        ),
        dot_args = list(size = 2),
        line_args = list(size = 0.5)) +
    geom_errorbarh(aes( y = term, xmin = ci90l, xmax = ci90h),
                   height = 0, size = 1) +
    theme_bw(base_size = 10) + xlab("Coefficient Estimate") + ylab("") +
    scale_colour_grey(start = .1,
                      end = .1,
                      guide = guide_legend(reverse = TRUE)) +
    scale_x_continuous(breaks = c(-.8, 0, .2),
                       labels = c('\u00AD0.8', '0.0', '0.2')) +
    theme(legend.position = 'none',
          legend.title = element_blank(),
          text=element_text(family="serif"),
          panel.grid.minor= element_blank(),
          strip.background = element_blank(),
          strip.text = element_text(face = 'bold'))


new_coef_pb <-
    model_df %>% filter(significant_bc == T & model == "Participation") %>%
    mutate(term = factor(term, 
      levels = c(
      "Personal Covid threat (50)",
      "General COVID concern (51)",
      "COVID infection probability (50)",
      
      "Feeling depressed (51)"
      ), 
      labels = c(
      "Personal Covid threat (50)",
      "General COVID concern (51)",
      "COVID infection probability (50)",
      
      "Feeling depressed (51)"))) %>% 
        arrange(term) %>% 
    dwplot(
        .,
        vline = geom_vline(
            xintercept = 0,
            colour = "grey60",
            linetype = 2
        ),
        dot_args = list(size = 2),
        line_args = list(size = 0.5)) +
    geom_errorbarh(aes( y = term, xmin = ci90l, xmax = ci90h),
                   height = 0, size = 1) +
    theme_bw(base_size = 10) + xlab("Coefficient Estimate") + ylab("") +
    scale_colour_grey(start = .1,
                      end = .1,
                      guide = guide_legend(reverse = TRUE)) +
    scale_x_continuous(breaks = c(-.8, 0, .2),
                       labels = c('\u00AD0.8', '0.0', '0.2')) +
    theme(legend.position = 'none',
          legend.title = element_blank(),
          text=element_text(family="serif"),
          panel.grid.minor= element_blank(),
          strip.background = element_blank(),
          strip.text = element_text(face = 'bold'))


pdf('C:/Users/masohniu/Documents/MCS_Conditioning/figure4.pdf',
    width = 10,
    height = 5,
    )
new_coef_p
dev.off()

pdf('C:/Users/masohniu/Documents/MCS_Conditioning/figure_a6.pdf',
    width = 10,
    height = 5,
    )
new_coef_pb
dev.off()


```





```{r Models with Demographics, echo=FALSE, message=FALSE, results='hide', warning=FALSE}

models_control <- list()
i <- 1
for(dv in dvs) {
    for(subgroup in subgroups) {
        
        if(subgroup == 'T1') {
          
        eq <- as.formula(paste(dv, '~T1+female+hhsize+marital+edu+citizenship', sep = '')) 
        
        } else if(subgroup == 'T2') {
        
        eq <- as.formula(paste(dv, '~T2+female+hhsize+marital+edu+citizenship', sep = '')) 
          
        } else{
            cat('Error!')
            stop()
        }
      

        print(eq)
        print(paste(subgroup))
        

        m <- lm(formula = eq, data = df, weights = weight)

        assign(x = paste('m', dv, subgroup, 'control', sep = '_'), value = m)
        
        models_control[[i]] <- m

    }
    i <- i + 1
}


 model_df_control <- data_frame()
    for(dv in dvs) {
        for(subgroup in subgroups) {
            model <- get(paste('m', dv, subgroup, 'control', sep = '_'))

            if(dv %in% covid_vars){
            bonferroni_val <-  (0.05/length(covid_vars))
            bonferroni_val_c <- bonferroni_val
            
            } else if(dv %in% noncovid_vars) {
            bonferroni_val <-  (0.05/length(noncovid_vars))
            bonferroni_val_nc <- bonferroni_val
            }
              else{
                cat('Error!')
                stop()
            }
             tmp <- tidy(model, conf.int = T, conf.level = .9) %>%
                filter(term == subgroup) %>%
                mutate(coefficient = term,
                       dv = dv,
                       term = factor(dv,
                                     levels = dvs,
                                     labels = dv_names),
                       subgroup = subgroup,
                       model = factor(subgroup,
                                      levels = c("T1", "T2"),
                                      labels = c("Invitation", "Participation")),
                       significant = p.value <=  0.05,
                       significant_bc = p.value <=  bonferroni_val) %>% # Correction: 0.05/31 = 0.001612903
                rename(ci90l = conf.low,
                       ci90h = conf.high)
            model_df_control <- bind_rows(model_df_control, tmp)
        }
    }
    rm(tmp)
    
new_coef_pbc <-
    model_df_control %>% filter(significant_bc == T & model == "Participation") %>%
    mutate(term = factor(term, 
      levels = c(
      "Personal Covid threat (50)",
      "General COVID concern (51)",
      "COVID infection probability (50)",
      "State government in pandemic (55)",
      "Feeling depressed (51)"
      ), 
      labels = c(
      "Personal Covid threat (50)",
      "General COVID concern (51)",
      "COVID infection probability (50)",
      "State government in pandemic (55)",
      "Feeling depressed (51)"))) %>% 
        arrange(term) %>% 
    dwplot(
        .,
        vline = geom_vline(
            xintercept = 0,
            colour = "grey60",
            linetype = 2
        ),
        dot_args = list(size = 2),
        line_args = list(size = 0.5)) +
    geom_errorbarh(aes( y = term, xmin = ci90l, xmax = ci90h),
                   height = 0, size = 1) +
    theme_bw(base_size = 10) + xlab("Coefficient Estimate") + ylab("") +
    scale_colour_grey(start = .1,
                      end = .1,
                      guide = guide_legend(reverse = TRUE)) +
    scale_x_continuous(breaks = c(-.8, 0, .2),
                       labels = c('\u00AD0.8', '0.0', '0.2')) +
    theme(legend.position = 'none',
          legend.title = element_blank(),
          text=element_text(family="serif"),
          panel.grid.minor= element_blank(),
          strip.background = element_blank(),
          strip.text = element_text(face = 'bold'))

pdf('C:/Users/masohniu/Documents/MCS_Conditioning/figure3.pdf',
    width = 10,
    height = 5,
    )
new_coef_pbc
dev.off()


```


```{r Predicted Change in Attitudes, include=FALSE}


sig_dvs_bc <- model_df_control %>% filter(significant_bc == T & model == "Participation") %>% 
  dplyr::pull(dv) 

scenario1 <- cbind(1, # Intercept
                   0, # Treatment
                   1, # Female
                   1, # HH Size 2
                   0, # HH Size 3+
                   0, # Marital: Other
                   0, # Marital: Single
                   1, # Education: Medium
                   0, # Education: High
                   1  # Citizenship: German
                   )
scenario2 <- cbind(1, # Intercept
                   1, # Treatment
                   1, # Female
                   1, # HH Size 2
                   0, # HH Size 3+
                   0, # Marital: Other
                   0, # Marital: Single
                   1, # Education: Medium
                   0, # Education: High
                   1  # Citizenship: German
                   )
for (dv in sig_dvs_bc) {
     model <- get(paste('m', dv, 'T2', 'control', sep = '_'))
     pred1 <- sim_function(model, nsim = 1000, scenario = scenario1) 
     assign(paste('pred',dv,'1', sep = '_'), pred1)
     
     pred2 <- sim_function(model, nsim = 1000, scenario = scenario2) 
     assign(paste('pred',dv,'2', sep = '_'), pred2)
            
}




```

```{r Recruitment Group Analysis, include = FALSE}

recruitment_groups <- c(1:3)
models_recruitment <- list()
i <- 1
for(dv in dvs) {
      for (recruitment_group in recruitment_groups) {
        
       if(recruitment_group==1) {
         
        df_temp <- subset(df, df$sample==1)
          
        
        } else if(recruitment_group == 2) {
        
        df_temp <- subset(df, df$sample==2)
          
        } else if(recruitment_group == 3) {
        
        df_temp <- subset(df, df$sample==3)
          
        } else{
            cat('Error!')
            stop()
        }
        eq <- as.formula(paste(dv, '~T2', sep = '')) 
          
        print(eq)
        print(paste(recruitment_group))
        

        m <- lm(formula = eq, data = df_temp, weights = weight)

        assign(x = paste('m', dv, recruitment_group, sep = '_'), value = m)
        
        models_recruitment[[i]] <- m

    }
    i <- i + 1
}


 model_df_recruitment_group <- data_frame()
    for(dv in dvs) {
        for(recruitment_group in recruitment_groups) {
            model <- get(paste('m', dv, recruitment_group, sep = '_'))

            if(dv %in% covid_vars){
            bonferroni_val <-  (0.05/length(covid_vars))
            bonferroni_val_c <- bonferroni_val
            
            } else if(dv %in% noncovid_vars) {
            bonferroni_val <-  (0.05/length(noncovid_vars))
            bonferroni_val_nc <- bonferroni_val
            }
              else{
                cat('Error!')
                stop()
            }
             tmp <- tidy(model, conf.int = T, conf.level = .95) %>%
                filter(term == "T2") %>%
                mutate(coefficient = term,
                       dv = dv,
                       term = factor(dv,
                                     levels = dvs,
                                     labels = dv_names),
                       recruitment_group = recruitment_group,
                       model = factor(recruitment_group,
                                      levels = c(1,2,3),
                                      labels = c("2012", "2014", "2018")),
                       significant = p.value <=  0.05,
                       significant_bc = p.value <=  bonferroni_val) %>% # Correction: 0.05/31 = 0.001612903
                rename(ci95l = conf.low,
                       ci95h = conf.high)
            model_df_recruitment_group <- bind_rows(model_df_recruitment_group, tmp)
        }
    }
    rm(tmp)
    
    


```

```{r Plot Recruitment Group Results, include=FALSE}

new_coef_pbr <-
  model_df_recruitment_group %>% filter(
    term %in% c(
      "Personal Covid threat (50)",
      "General COVID concern (51)",
      "State government in pandemic (55)"
    )
  ) %>%
    mutate(term = factor(term, 
      levels = c(
      "Personal Covid threat (50)",
      "General COVID concern (51)",
      "State government in pandemic (55)"
      ), 
      labels = c(
      "Personal Covid threat (50)",
      "General COVID concern (51)",
      "State government in pandemic (55)"))) %>% 
        arrange(term) %>% 
    dwplot(
        .,
        vline = geom_vline(
            xintercept = 0,
            colour = "grey60",
            linetype = 2
        ),
        dot_args = list(aes(shape = model), size = 2),
        line_args = list(size = 0.5)) +
    theme_bw(base_size = 10) + xlab("Coefficient Estimate") + ylab("") +
    scale_colour_grey(start = .1,
                      end = .8,
                      guide = guide_legend(reverse = TRUE),
                      name="Recruitment\nyear") +
  scale_shape_discrete(
        name = "Recruitment\nyear",
        breaks = c("2012", "2014", "2018"),
        labels = c("2012", "2014", "2018")
    ) + 
    theme(text=element_text(family="serif"),
          panel.grid.minor= element_blank(),
          strip.background = element_blank(),
          strip.text = element_text(face = 'bold'))

pdf('C:/Users/masohniu/Documents/MCS_Conditioning/figure_a7.pdf',
    width = 10,
    height = 5,
    )
new_coef_pbr
dev.off()




```


```{r Dropout Analysis, include = FALSE}

df$dropout_d <- ifelse(df$dropout==1,1,ifelse(df$dropout==2,1,ifelse(is.na(df$dropout),0,0)))
df$dropout_mcs <- ifelse(df$dropout==1,1,ifelse(is.na(df$dropout),0,0))
df$dropout_after <- ifelse(df$dropout==2,1,ifelse(is.na(df$dropout),0,0))

summary(lm(dropout_d~T2, data=df, weights = weight))
summary(lm(dropout_mcs~T2, data = df, weights = weight))  # sig. less likely during MCS...
summary(lm(dropout_after~T2, data = df, weights = weight))# but more likely after??
```




```{r Panel Response Rates Across Panel Waves, echo=FALSE, message=FALSE, results='hide'}

##############################################################
#load database


database <- "Y:/Bearbeitung/Panel_Management/forsa_Datenbank/"
path <- "W:/1 Feldarbeit GIP/COVID-19 Panel/"

# Connect database
source(file.path(database, "load_data_SQL.R"))
# List all names in relevant table
# dbListFields(con, "view_panel_member")

# List all table names of the database
# dbListTables(con)

# 16204 : September 2019
# 16518 : November 2019

df2 <-
  dbGetQuery(
    con,
    "SELECT ss.member_id, project_id, ss.status, ss.status_date,testuser
                       FROM view_panel_member pm
                       JOIN view_survey_sample ss ON pm.id = ss.member_id
                       WHERE testuser = 0
                       AND (project_id = 16204 OR project_id = 16518 OR 
                            project_id = 16811 OR project_id = 17378 OR 
                            project_id = 17965 OR project_id = 18388 OR 
                            project_id = 18115 OR project_id = 19306 OR 
                            project_id = 19687 OR project_id = 20013 OR 
                            project_id = 20472 OR project_id = 21012 OR 
                            project_id = 21666 OR project_id = 22133 OR 
                            project_id = 22556)"
  )
# Disconnect database
password <- "0"
dbDisconnect(con)


##############################################################
# Prepare dataset w43-w57
# Glimpse(df2)
df2$wave <- NA
df2$wave[df2$project_id == 16204] <- "w43"
df2$wave[df2$project_id == 16518] <- "w44"
df2$wave[df2$project_id == 16811] <- "w45"
df2$wave[df2$project_id == 17378] <- "w46"
df2$wave[df2$project_id == 17965] <- "w47"
df2$wave[df2$project_id == 18388] <- "w48"
df2$wave[df2$project_id == 18115] <- "w49"
df2$wave[df2$project_id == 19306] <- "w50"
df2$wave[df2$project_id == 19687] <- "w51"
df2$wave[df2$project_id == 20013] <- "w52"
df2$wave[df2$project_id == 20472] <- "w53"
df2$wave[df2$project_id == 21012] <- "w54"
df2$wave[df2$project_id == 21666] <- "w55"
df2$wave[df2$project_id == 22133] <- "w56"
df2$wave[df2$project_id == 22556] <- "w57"

#recode status
df2$status_<-df2$status
df2$status <- ifelse(df2$status == 1, 1, 0)
table(df2$status_,df2$status)

# Set datetime
df2<-df2%>%
mutate(date=ymd_hms(status_date))

# Split wave 46 
df2$wave_<-df2$wave
df2<-df2%>%
  mutate(wave = ifelse(wave=="w46" & date<ydm("2020-20-3"),"w46a",
                   ifelse(wave=="w46" & date>=ydm("2020-20-3"),"w46b",
                                 wave)))
table(df2$wave,df2$wave_,useNA="ifany")


##############################################################
# Merge with Covid group id

group <- read_excel("Y:/Bearbeitung/GIP-s/Sample/Sample_Covid19_Gruppenzuordnung.xlsx", sheet = "Sample_Covid19")
group <- group %>%
  transmute(member_id = key, group = Gruppe)

df1 <- group %>%
  left_join(df2, by = "member_id") #left join to keep the covid sample only

df_clean<-df1%>%
  dplyr::select(member_id,status,group, wave_)


df_wide <- df_clean %>%
  distinct() %>%
  spread(wave_, status)

dfw<-df_wide%>%
  dplyr::select(member_id,group,w43:w57)

# Group 1-7
dfw$group17 <- ifelse(dfw$group <= 7, 17, 8)
addmargins(table(dfw$group, dfw$group17,useNA="ifany"))

# Graph RRs ------------------------------------------------

prop.w43 <- with(dfw, table(group17, w43)) %>% 
  prop.table(margin = 1)
prop.w44 <- with(dfw, table(group17, w44)) %>% 
  prop.table(margin = 1)
prop.w45 <- with(dfw, table(group17, w45)) %>% 
  prop.table(margin = 1)
prop.w46 <- with(dfw, table(group17, w46)) %>% 
  prop.table(margin = 1)
prop.w47 <- with(dfw, table(group17, w47)) %>% 
  prop.table(margin = 1)
prop.w48 <- with(dfw, table(group17, w48)) %>% 
  prop.table(margin = 1)
prop.w49 <- with(dfw, table(group17, w49)) %>% 
  prop.table(margin = 1)
prop.w50 <- with(dfw, table(group17, w50)) %>% 
  prop.table(margin = 1)
prop.w51 <- with(dfw, table(group17, w51)) %>% 
  prop.table(margin = 1)
prop.w52 <- with(dfw, table(group17, w52)) %>% 
  prop.table(margin = 1)
prop.w53 <- with(dfw, table(group17, w53)) %>% 
  prop.table(margin = 1)
prop.w54 <- with(dfw, table(group17, w54)) %>% 
  prop.table(margin = 1)
prop.w55 <- with(dfw, table(group17, w55)) %>% 
  prop.table(margin = 1)
prop.w56 <- with(dfw, table(group17, w56)) %>% 
  prop.table(margin = 1)
prop.w57 <- with(dfw, table(group17, w57)) %>% 
  prop.table(margin = 1)

RRs <- as.data.frame(prop.w45) 
RRs <- RRs %>% mutate(wave=45) %>% dplyr::rename(respond = w45)
pre <- as.data.frame(prop.w46) %>% mutate(wave=46) %>% dplyr::rename(respond = w46)
pre2 <- as.data.frame(prop.w47) %>% mutate(wave=47) %>% dplyr::rename(respond = w47)
pre3 <- as.data.frame(prop.w48) %>% mutate(wave=48) %>% dplyr::rename(respond = w48)
pre4 <- as.data.frame(prop.w49) %>% mutate(wave=49) %>% dplyr::rename(respond = w49)
pre5 <- as.data.frame(prop.w50) %>% mutate(wave=50) %>% dplyr::rename(respond = w50)
pre6 <- as.data.frame(prop.w51) %>% mutate(wave=51) %>% dplyr::rename(respond = w51)
pre7 <- as.data.frame(prop.w52) %>% mutate(wave=52) %>% dplyr::rename(respond = w52)
pre8 <- as.data.frame(prop.w53) %>% mutate(wave=53) %>% dplyr::rename(respond = w53)
pre9 <- as.data.frame(prop.w54) %>% mutate(wave=54) %>% dplyr::rename(respond = w54)
pre10 <- as.data.frame(prop.w55) %>% mutate(wave=55) %>% dplyr::rename(respond = w55)
pre11 <- as.data.frame(prop.w56) %>% mutate(wave=56) %>% dplyr::rename(respond = w56)
pre12 <- as.data.frame(prop.w57) %>% mutate(wave=57) %>% dplyr::rename(respond = w57)
pre13 <- as.data.frame(prop.w43) %>% mutate(wave=43) %>% dplyr::rename(respond = w43)
pre14 <- as.data.frame(prop.w44) %>% mutate(wave=44) %>% dplyr::rename(respond = w44)

RRs <- rbind(pre13, pre14, RRs,pre, pre2, pre3, pre4, pre5, pre6, pre7, pre8, pre9, pre10, pre11, pre12)

RRs$N[RRs$group17==17] <- 5598
RRs$N[RRs$group17==8]  <- 799
RRs <-  RRs %>% mutate(freq=Freq*100, se_p = abs((freq*(100-freq))/N), se = sqrt(se_p), lower=freq-1.96*se, upper=freq+1.96*se) %>% filter(respond==1)

RRs$wave <- factor(RRs$wave,
                    levels = c(43,44,45,46,47,48,49,50,51,52,53,54,55,56,57),
                    labels = c("Sep '19", "Nov '19", "Jan '20", "Mar '20", "May '20", "Jul '20", "Sep '20","Nov '20", "Jan '21","Mar '21", "May '21", "Jul '21", "Sep '21","Nov '21", "Jan '22")) 


rrs_plot <- ggplot(RRs, aes(x=wave, y=freq, fill=group17)) + 
  geom_bar(position=position_dodge(), stat="identity") +    
  geom_errorbar(aes(ymin=lower, ymax=upper),
                size=.3,    # Thinner lines
                width=.2,
                position=position_dodge(.9)) +
  xlab("") +
  ylab("Response Rate in %") +
  scale_fill_grey(name="", # Legend label, use darker colors
                 breaks=c("8", "17"),
                 labels=c("Control Group", "MCS"),
                 start = 0.5,
                 end = 0.8) +
  ylim(0,100)+
  # geom_vline(xintercept = 3.5, linetype="dotted", 
  #               color = "grey70", size=1.5) +
  #   geom_vline(xintercept = 6.5, linetype="dotted", 
  #               color = "grey70", size=1.5) +
  theme(panel.background = element_blank(),
        panel.border = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        plot.caption = element_text(hjust = 0),
        axis.title = element_text(size = 10, color = "grey15"),
        axis.text = element_text(color = "grey15", size = 10),
        axis.text.x = element_text(angle = 45, hjust=0.7),
        axis.ticks = element_blank(),
        legend.position = "bottom",
        text=element_text(family="serif"),
        legend.text = element_text(size = 10),
        axis.title.y= element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x= element_text(margin = margin(t = 10, r = 0, b = 0, l = 0)),
        axis.title.y.right = element_text(margin = margin(t = 0, r = 0, b = 0, l = 10)
        ))



```


```{r Multilevel model for change, include = FALSE}


weight_mcs <- RRs$Freq[RRs$group17=='8'& RRs$wave=="Sep '19"]/RRs$Freq[RRs$group17=='17'& RRs$wave=="Sep '19"] 
weight_c <- 1
ids_part <- data %>% dplyr::select(compl_mcs, id_g)

df_ml <- df_clean %>% 
  mutate(wave_=sub('.', '', wave_)) %>% 
  mutate(mcs = as.integer(group %in% 1:7),
         weight = NA)

df_ml$weight[df_ml$mcs==1] <- weight_mcs
df_ml$weight[df_ml$mcs==0] <- weight_c
  # left_join(ids_part, by("member_id"="id_g"))


resp_models <- list()
i <- 1
for (wave in 43:55) {
  m <- glm(status ~ mcs, data = subset(df_ml,wave_==paste(wave)), family = 'binomial')
  resp_models[[i]] <- m
  i <- i+1
}

 resp_model_df <- data_frame()
 wave <- 43
 for (i in 1:13) {
                tmp <- tidy(resp_models[[i]], conf.int = T, conf.level = .95) %>%
                       mutate(significant = p.value <=  0.05,
                       significant_bc = p.value <=  0.003846154,
                       model = wave) %>% # Correction: 0.05/13 = 0.003846154
                  filter(term=="mcs") %>% 
                rename(ci95l = conf.low,
                       ci95h = conf.high)

            resp_model_df <- bind_rows(resp_model_df, tmp)

    rm(tmp)
   wave <- wave+1
 }

 

resp_model_plot <- ggplot(resp_model_df, aes(x=estimate, y=as.factor(model), colour=significant)) + 
  geom_vline(xintercept = 0, color = "black") +
  geom_point(position = position_dodge(width=.75)) + 
  geom_errorbarh(aes(xmin=ci95l, xmax=ci95h), position=position_dodge(width=.75), height=0) + 
  labs(x="Effect of MCS Participation", y="Wave", color ="Significance level") + 
  theme_bw() +
  scale_color_grey(labels = c(expression("p ">="0.05"), "p < 0.05"), start=0.8, end=0.2) +
  scale_y_discrete(limits=rev)+
  theme(text=element_text(family="serif"))

pdf('C:/Users/masohniu/Documents/MCS_Conditioning/figure2.pdf',
    width = 10,
    height = 5,
    )
resp_model_plot
dev.off()


#### merging 

key <- read_dta("Y:/Bearbeitung/Datenaufbereitung/key/key.dta") %>% mutate(hhid=as.numeric(hhid))

df_ml_ids <- df_ml %>% mutate(member_id=as.numeric(substr(member_id,1,nchar(member_id)-2)))

df_merged_pre <- inner_join(key,df_ml_ids, by=c("hhid_l"= "member_id"))

df$id <- as.numeric(substr(df$id,1,nchar(df$id)-2))

# , select = c(id, citizenship, marital, hhsize, age, female, edu)
df_merged <- df_merged_pre %>% left_join(df, by=c("hhid_g"="id")) %>% 
  dplyr::select(hhid_l, wave_, status, group, citizenship, marital, hhsize, age, female, edu) %>% 
  rename(id=hhid_l) %>% 
  mutate(mcs = as.integer(group %in% 1:7))




resp_models_dem <- list()
i <- 1
for (wave in 43:55) {
  m <- glm(status ~ mcs + female + age + hhsize + marital + edu + citizenship, data = subset(df_merged,wave_==paste(wave)), family = 'binomial')
  resp_models_dem[[i]] <- m
  i <- i+1
}

 resp_model_dem_df <- data_frame()
 wave <- 43
 for (i in 1:13) {
                tmp <- tidy(resp_models_dem[[i]], conf.int = T, conf.level = .95) %>%
                       mutate(significant = p.value <=  0.05,
                       significant_bc = p.value <=  0.003846154,
                       model = wave) %>% # Correction: 0.05/13 = 0.003846154
                rename(ci95l = conf.low,
                       ci95h = conf.high)

            resp_model_dem_df <- bind_rows(resp_model_dem_df, tmp)

    rm(tmp)
   wave <- wave+1
 }

 

resp_model_dem_plot <- ggplot(resp_model_dem_df[resp_model_dem_df$significant==TRUE,], aes(x=estimate, y=as.factor(model), colour=significant)) + 
  geom_vline(xintercept = 0, color = "black") +
  geom_point(position = position_dodge(width=.75)) + 
  geom_errorbarh(aes(xmin=ci95l, xmax=ci95h), position=position_dodge(width=.75), height=0) + 
  labs(x="Effect of MCS Participation", y="Wave", color ="p < 0.05") + 
  theme_bw() +
  scale_colour_grey(start=0.8, end=0.2) +
  scale_y_discrete(limits=rev)+
  theme(text=element_text(family="serif"))

wave <- 43
for (i in 1:13) {
  print(paste("Model ", wave, sep = ""))
 print(summary(resp_models_dem[[i]]))
 wave <- wave+1
 
}

################

fe_model <- glm(status ~ mcs + female + age + hhsize + marital + edu + citizenship + as.factor(wave_), data = subset(df_merged, between(as.numeric(wave_),46,55)), family = 'binomial')

summary(fe_model)

```


# Introduction

Panel surveys consist of repeated measurements of the same individuals’ attitudes, behaviour, and living conditions through self-completion questionnaires or interviewer-mediated surveys [@kasprzyk89]. For this purpose, respondents are re-surveyed, typically at regular intervals, for example once a year or every other month [@lynn19].

The panel survey design combines many attractive measurement qualities. In contrast to cross-sectional surveys, which only allow studying inter-personal differences, the panel survey design also allows studying intra-personal changes over time [@willson16]. Furthermore, panel surveys can establish the temporal links between dependent and independent variables needed for detecting causal relationships [@hoffmann21]. Finally, long-running panel surveys collect large amounts of data from the same individuals over time. This provides researchers with a large selection of potential control variables, thus reducing the risk of unobserved heterogeneity and endogeneity in their analyses [@wooldridge05].

However, panel surveys may have a limitation which results from the very characteristic that makes this form of data collection so attractive: the re-surveying process. Researchers fear that by frequently re-surveying the same individuals the respondents may alter their actual and/or reported behaviour and attitudes. This phenomenon is called panel conditioning [@das11] and may have a lasting impact on panel data [@warren12]. Thus, the mere act of re-surveying may distort the panel’s findings [@sun19]. Moreover, any change in panel design, especially any increase in surveying frequency or questionnaire length and content, may increase panel conditioning effects.

In this study, we explore a specific type of potential panel conditioning. A large-scale experiment in an existing probability-based online panel during the early phase of the COVID-19 pandemic in Germany allows us to investigate whether increasing the surveying frequency by a factor of nine, from bi-monthly to additional weekly surveys, generates panel conditioning effects and whether these effects have a lasting impact across panel data collection waves. Our data stem from an experiment in which 7/8th of an established panel sample was randomly allocated to additional weekly surveys for 16 consecutive weeks, whereas the other random 1/8th of the panel sample remained at the regular surveying frequency that respondents were already used to with surveys every other month.

# Previous Research

Panel conditioning is the learning effect that participating in a panel study has on the panel participants [@struminskaya21a].The learning effect can be advantageous for the data quality, for example panel participants learn to respond to the survey questions as intended by the researchers [e.g. no rounding in expenditure reports, @eckman21]. They can also be disadvantageous for the data quality, for example, if participants learn to avoid burdensome filter loops by providing less information (the so-called "motivated underreporting", see @eckman14). Moreover, the learning effect can affect panel participants' actual attitudes and behaviour rather than, or in addition to, their response behaviour. For example, repeatedly being asked about a certain survey topic may encourage panel participants to think about this topic more and to gather information about it. This may be beneficial for the panel respondent personally, but it threatens inference from the panel data, because it makes the panel participants different from the population they are supposed to represent [e.g., @toepoel09].
 
While it is universally acknowledged that panel conditioning effects may bias measurements in panel data collections, it remains unclear to what extent and under which conditions panel conditioning occurs. This may be due to the fact that little research conducted on this topic relies on experimental rather than observational [e.g., @muller20] or quasi-experimental studies [e.g., @binswanger13].

In one of the rare experimental studies on panel conditioning, @silber19 varied the frequency with which respondents were asked to provide information on their ego-centered social networks. Respondents received a social network assessment module with up to 49 questions either once or twice within little more than a month. Contrary to their expectations, the authors found that neither the reported network size and density nor the general data quality suffered from asking respondents to fill out the network module again. These findings suggest that conditioning is not as prevalent as some researchers might think, even when asking for relatively burdensome and time-consuming information. This is in line with a number of non-experimental studies that found either no, no consistent, or only very minor panel conditioning effects in a variety of research settings [e.g., @pennell92; @sun19; @bailar75].

Other experimental studies, however, found significant panel conditioning effects, although the expected direction of the effects remains unclear. For example, @halpern-manners14 find that repeatedly asking respondents about deviant behaviour (drunk driving, minor theft) increases their likelihood of reporting such behaviour. This suggests that positive experiences with previous panel waves increase people's willingness to provide sensitive information [i.e. potential advantageous panel conditioning effects, see also @struminskaya16; @kuhne18; @uhrig11]. However, @torche12 find the opposite: Earlier questions about substance use (alcohol, cigarettes, marijuana and cocaine) decreased the respondents' likelihood to report substance use in later data collections [i.e. potential disadvantageous panel conditioning effects, see also @yan12; @struminskaya16; @battaglia96]. In addition, the prevalence of conditioning effects across time remains unclear. @halpern-manners14 found conditioning effects only when the baseline and follow-up survey were one month apart but not when they were one year apart, suggesting that conditioning effects fade away with an increasing temporal distance between measurements. @torche12, however, found conditioning effects even when the survey waves were one year apart, thus suggesting long-lasting conditioning effects.

The literature also remains unclear about which types of survey questions may be particularly affected or unaffected by panel conditioning. Some studies indicate that behavioural questions and, in particular, those which are directly linked to the main topic of the panel are most affected. @bach19, for example, find that, over the course of an employment panel, unemployed persons increasingly report participating in governmental labour market programmes. By linking the respondents’ data to administrative employment records, the authors find that the reported increase is not just a change in reporting, but in actual behaviour. Furthermore, each additional exposure to the employment survey questions intensifies the effect on respondents' labour market programme participation. Through an instrumental variable approach, the authors establish that this increase in programme participation is disproportionate in relation to comparable people not selected for the panel. This is in line with other research which suggests, for example, that participation in election studies stimulates respondents to become more interested and engaged in elections [e.g., @clausen68; @yalch76; @traugott79].

While most studies investigate panel conditioning effects on behavioural survey items, some studies indicate that attitudinal questions may be affected, too [for a theoretical framework see @bergmann18]. @sturgis09 find support for their hypothesis that attitudes strengthen over the course of panel studies. This is in line with research by @waterton89, who find circumstantial evidence suggesting people get “politicized” over the course of the panel. However, both of these studies are observational and thus prohibit causal claims. In addition, some studies suggest that neither attitudinal nor behavioural survey questions are affected by conditioning, while other question types, such as knowledge questions, show panel conditioning biases [e.g., @das11]. Furthermore, researchers have suggested that some perceived conditioning effects may be an artifact of differential nonresponse and attrition between comparison groups [e.g., @struminskaya16; @wooden14].  

In summary, the literature typically suspects panel conditioning effects but finds little consistent evidence. Most studies on panel conditioning are either not based on experiments and, thus, hamper causal inference, or they are conducted on a small scale or on nonprobability samples and thus lack external validity. As a consequence, few studies find panel conditioning effects and even fewer find effects consistently for the same types of variables, topics, or surveying frequency. Therefore, still little is known about how re-surveying respondents in a panel survey affects the study findings.  

In this article, we report the results of a large-scale experiment on increasing the surveying frequency in a long-running probability-based online panel from bi-monthly to additional weekly surveys. It should be noted that we apply a broad definition of panel conditioning effects, where panel conditioning is any learning effect caused by a panel treatment which is observable in substantive survey responses. The random allocation of panelists to the treatment group of additional surveys or the control group without additional surveys allows us to examine the causal effect of the additional weekly surveys. Moreover, the underlying panel survey, which continued during the treatment and afterwards, allows us to examine a multitude of survey questions which may be affected by the treatment. In this way, we aim to contribute to answering the following research questions: Does increasing the surveying frequency in a panel study lead to conditioning effects? And if so, which types of survey questions are affected?

# Data and Methods
At the outbreak of the COVID-19 pandemic in Germany, the German Internet Panel (GIP) added an additional high-frequency data collection to its usual bi-monthly surveys: The Mannheim Corona Study (MCS). The goal of the MCS was to study the effect of the pandemic and the accompanying containment measures on individuals’ lives. It ran from 20th March till 10th July and surveyed GIP panel members by inviting them to complete weekly 10-minute questionnaires [see @blom20; @cornesse21b]. In a rotating panel design, the regular GIP sample of 6,397 panelists was divided into eight random sub-samples of approximately equal size.^[Note that all original panel members that had not actively de-registered were considered GIP panelists, independently of when they last participated in a GIP survey.] Seven sub-samples were each assigned to a specific day of the week. On this day of the week, the MCS panelists received an invitation to the current MCS questionnaire. MCS panelists were given 48 hours to complete the week's survey. They received invitations to the MCS for 16 consecutive weeks, in addition to their regular GIP invitations every other month. The seven GIP sub-samples that were invited to the MCS constitute our treatment sample (N = 5,598). The GIP and MCS data collections overlapped in March, May, and July (see Figure \ref{fig:fig1}). The eighth GIP sub-sample did not receive any invitation to the MCS but continued with the standard GIP survey intervals. This control group (N = 799) allows us to study panel conditioning effects experimentally in a probability-based sample of the general population.^[Note that an additional 149 GIP panel members were not part of the experiment, but continued with the standard bi-monthly surveying intervals, for survey practical reasons. Most of these panelists live in previously offline households and receive equipment and technical support to participate in the regular GIP surveys. Since some of these offliners do not have an email address, they could not be invited to the MCS at short notice. They were excluded from all analyses.]

![Schematic timeline of GIP survey waves in relation to the MCS fieldwork time\label{fig:fig1}](figure1.jpg) 


## German Internet Panel and Mannheim Corona Study 

The GIP is a probability-based online panel of the general population in Germany aged 16-75 at the time of recruitment. The first GIP panel members were recruited in 2012 [see @blom15] with additional rounds of recruitment in 2014 and 2018. During the first two rounds of recruitment, interviewers conducted initial face-to-face interviews and non-internet households were provided with the needed equipment and technical support to enable their participation in the GIP [see @blom17; @herzing19; @cornesse21]. In 2018, new panel members were recruited without interviewers and exclusively via postal mail [see @cornesse21a; @friedel22]. All panel members are invited to each survey wave. Consequently, by the time of the start of the MCS, even the most recent GIP sample members had been invited to ten regular panel waves (in addition to the recruitment surveys), and the earliest recruits had received 46 GIP survey invitations over the course of almost 8 years. All panel members were thus well-acquainted with the GIP procedures and surveying intervals.

The GIP is a multi-purpose panel with a focus on social, political, and economic questionnaire content. Approximately two thirds of its content is repeated at yearly intervals or more frequently, for example, for three consecutive waves. Thus, in contrast to some other probability-based online panels, the GIP is a longitudinal panel survey rather than a pool of respondents [see e.g. @blom16 for some alternative approaches in Europe]. In addition to general longitudinal survey items, respondents regularly receive questions that investigate their position regarding current events, such as election outcomes, terrorist attacks or natural disasters. Therefore, receiving questions about the COVID-19 pandemic would not have been surprising to GIP panelists. However, the MCS procedures with the increased surveying frequency and the short response window of 48 hours was new for the panelists.

The MCS was run by the GIP team together with a group of substantive researchers from the same research center. The key operational difference between the two studies was the considerably higher pace of the MCS. Instead of compiling questionnaires every two months, the MCS implemented a new questionnaire each week [see @cornesse22 for more information on the MCS measurement instruments]. Approximately two-thirds of each MCS questionnaire consisted of questions that were repeated each week or that were only slightly adjusted to account for social, economic, political or epidemic developments [e.g., @naumann20]. The final third of the questionnaire contained questions that were asked only once during the MCS fieldwork (though possibly repeated during the ensuing GIP waves), for example on the introduction of the official contact tracing app [e.g., @blom21f], or questions that were repeated only once or twice during the 16 weeks of MCS fieldwork [e.g., @mata21]. To ease the workload and to allow long-term comparisons with GIP data, the MCS adopted previous GIP items wherever possible [e.g., @mohring21]. The MCS covered topics related to people’s social and economic situation, political attitudes, behaviours and psychological health.

## Random Allocation to Treatment and Control Group
The allocation of the 6,397 GIP panelists to one of the MCS samples or the control group was strictly random. We investigated the even allocation of our sample to treatment and control group regarding key socio-demographic characteristics. Neither gender, age, household size, marital status, education, nor citizenship predicts the assignment of GIP panelists to the treatment or control group. Furthermore, comparing the socio-demographic characteristics of those who responded to the MCS invitation in the treatment group to the control group also shows no significant differences (see Table \ref{tab:tab1}).

```{r assignment-check, echo=FALSE, message=FALSE, results='asis'}

stargazer(
  r1, r2,
  header = FALSE,
  type = "latex",
  title = "Predicting assignment to the treatment",
  dep.var.labels = c("Invited to MCS", "Participated in MCS"),
  model.numbers = FALSE,
  dep.var.caption = "",
  covariate.labels = c("Female", "Age", "Household Size: (ref=1)\\-\\vspace{0.3cm} \\\\ \\-\\hspace{0.3cm}2", "\\-\\hspace{0.3cm}3 or more", "Marital Status: (ref=Married)\\-\\vspace{0.3cm} \\\\ \\-\\hspace{0.3cm}Other", "\\-\\hspace{0.3cm}Single", "Education: (ref=Low)\\-\\vspace{0.3cm} \\\\ \\-\\hspace{0.3cm}Medium", "\\-\\hspace{0.3cm}High", "Citizenship"),
  keep.stat = c("rsq", "f"),
  notes.align = "l",
  label = "tab:tab1"
  )
```


## GIP Survey Participation
In addition to checking for potential socio-demographic differences, we also examined whether the treatment and control group differed in terms of GIP survey participation. This could either be the case already before the MCS (i.e., as a result of the allocation procedure) or during and after the MCS (i.e., as a potential attrition effect of the MCS). Each of these could potentially impact our conclusions regarding conditioning effects in the GIP. For Figure \ref{fig:fig2} we fitted bivariate logistic regression models with participation in GIP survey waves as the outcome variable and assignment to the MCS treatment versus control group as the predictor variable. We run these analyses for the three GIP waves before the start of the MCS (waves 43 to 45), the three GIP waves during the MCS (waves 46 to 48) and all available GIP waves after the MCS (waves 49 to 55). 

```{r display-resp_model-plot, echo=FALSE, fig.width=6,fig.height=4,fig.cap="\\label{fig:fig2}Effects of MCS Participation on GIP Wave Participation"}


resp_model_plot

```

As Figure \ref{fig:fig2} shows, we find significant differences between the MCS treatment and control group only for GIP wave 46, which coincided with the start of the MCS, and waves 52 and 53, which were conducted more than half a year after the MCS. At each of theses three waves, the treatment group is less likely to participate than the control group. We find no significant differences in response propensities before the MCS. Moreover, examining a fixed-effects model for participation across GIP data collection waves (which ignores the inconsistent participation pattern by experimental group established in Figure \ref{fig:fig2}) shows that GIP survey participation generally declines across the observed data collection waves (see Table \ref{tab:tabA1} in the Appendix). This is typical for panel studies and therefore unsurprising. The model also shows systematic differences in participation by socio-demographic characteristics (gender, marital status, education, citizenship). 

## Hypotheses
Our unique research setting allows us to assess the impact of increasing the surveying frequency in a panel study with regard to conditioning effects. Unlike many other studies, we can exploit our experimental design, in which some members of the GIP sample were exposed to the increased surveying frequency while others were not. Since the literature on conditioning effects is unclear about whether conditioning effects will occur at all, we first test the following general hypothesis:

|      H1: Increasing the surveying frequency for the treatment group results in differences in their responses to 
|          GIP survey questions as compared to the control group.

If we find any indication of conditioning effects in our experiment, the next question is whether it affects some survey questions more than others. Some of the literature suggests that conditioning effects occur on the dominant topic of the surveys respondents were exposed to. For example, respondents may change their reporting of substance use [@halpern-manners14] or employment programme participation [@bach19] if previously asked about these topics. The dominant topic of the MCS is COVID-19 and its impact on all domains of the respondents' lives. We, therefore, test the following hypothesis:

|      H2: Differences in responses mainly occur on survey questions that are linked to the content of the MCS.

Some of the literature suggests that panel conditioning effects may fade away over time. For example, conditioning effects may be measureable if survey questions are repreated within little more than a month, but not if they are repeated after a year [@halpern-manners14]. This is in line with the general evidence on treatment effects in survey experiments, which have often been shown to diminish over time [e.g., @gaines07]. We, therefore, test the following hypothesis:

|      H3: Differences in responses mainly occur during or directly after the MCS fieldwork period (i.e. GIP 
|          waves 46 to 49).

To get a full picture of when and on which survey questions conditioning effects occur, we need to test a larger number of GIP questions for potential differences between the treatment and control group. Testing many variables for the same hypothesis, however, means that the conducted tests are not independent from one another. If this fact is ignored in the analyses, we may find evidence in support of a hypothesis simply because we tested so many items that, just by chance, we find some statistically significant effects [@savin80]. Indeed, not accounting for this multiple-comparison problem may be responsible for some of the rather inconsistent and hard to interpret findings on panel conditioning in the literature. We therefore test the following hypothesis:

|      H4: Differences in responses between treatment and control group are an artifact of multiple comparison 
|          testing.

## Variable Selection and Analysis Strategy
Panel conditioning effects due to the additional weekly MCS surveys may occur in all GIP survey waves which were implemented either during the 16 weeks of MCS data collection (i.e. GIP waves 46 through 48; see Figure \ref{fig:fig1}) or afterwards (i.e. 49 and after). At the time of our analyses, GIP data are available until wave 55, which means that we can examine 9 data collection waves, spanning a time of 1.5 years since the start of the MCS fieldwork (see https://paneldata.org/gip/ for documentation of all GIP survey questions by data collection wave). We test all survey questions for potential conditioning effects, except for panel administrative questions (e.g. phone number verification), socio-demographic questions (e.g. age, gender, education), and questions that were not well-suited for our analyses for other reasons, for example because they contained their own experimental splits (e.g. for question wording experiments). Furthermore, we excluded some large item batteries on perceptions of political parties (e.g. left-right placement of each party in the German parliament), because we did not think they would meaningfully contribute to the analyses and only lead to an unnecessary amount of spurious effects. All in all, we tested 86 question items for differences between the treatment and control group. 68 of them were implemented once in the GIP questionnaires during the time span which we examine, 12 items were implemented twice, 4 items three times, and two items four times. Overall, this results in 112 tests for differences between treatment and control group across items and survey waves (see Figures A1 to A3 in the appendix).

51 tests for differences between treatment and control group were conducted on items that relate to the content of the MCS. 22 of those tests explore differences on questions which explicitly mention the COVID-19 pandemic. This includes, for example, people's fear of the SARS-COV-2 virus, how they weigh the economic damages of the enforced COVID-19 counter-measures against their societal benefits, or whether they downloaded the official COVID-19 contact tracing app. In addition, 29 tests were conducted on items that do not specifically mention COVID-19, but that were replicated, sometimes with small adjustments, from the MCS questionnaires. This includes, for example, a questionnaire module on health behaviours (e.g., snacking, physical activity) and mental health (e.g. feelings of depression and loneliness) which was included in the MCS and replicated in GIP wave 51. 61 tests were conducted on items unrelated to the MCS, which includes questions on topics such as climate change and data protection.

For each question in our analysis, we examine potential conditioning effects by running regression models with the survey responses as the outcome variables and the treatment versus control group as the central predictor variable.^[Note that in the results section, we define define the treatment group as people who were both invited to the MCS and also participated in it at least once and the control group as anyone who was never invited to the MCS given that they could have been invited. For robustness analyses where we define the treatment group as anyone who was invited to the MCS regardless of whether they ever participated in it see Figures \ref{fig:figA1} and \ref{fig:figA2} in the Appendix.] Generally, all question items on which we find a significant difference in responses on a 95%-confidence level are considered to be potentially affected by conditioning. To examine the robustness of the findings, we run all models with and without accounting for multiple-comparison testing via Bonferroni corrections [see e.g., @haynes13]. For the Bonferroni correction, we divide the alpha-error of the initial estimates by the number of tests in the main comparison group it belongs to (MCS-related vs. MCS-unrelated tests). We also run all models with and without controlling for the socio-demographic characteristics related to differential GIP participation over time (see Table \ref{tab:tabA1} in the Appendix) in our models. 

# Results

When accounting for multiple-comparison testing and controlling for socio-demographics, only 5 out of the 112 tests performed for differences between the treatment and control group (i.e. 4% of all tests) suggest an impact of the MCS on responses to GIP survey questions (see Figure \ref{fig:fig3}).^[Note that with Bonferroni correction but without socio-demograophic controls, only 4 of the initially statistically significant potential conditioning effects remain significant, as can be seen in Figure \ref{fig:figA3} in the Appendix.]

```{r display-coefplot-bc-sig-coefs, echo=FALSE, fig.width=6,fig.height=4,fig.cap="\\label{fig:fig3}Significant conditioning effects with Bonferroni correction and inclusion of demographic controls (GIP survey wave of the tested item in parentheses)"}


new_coef_pbc


```

Four of the significant conditioning effects are directly related to the dominant topic of the MCS: Feeling personally threatened by COVID-19, general concern about COVID-19, perceived likelihood of contracting Sars-COV-2, and support for increasing the federal states' competencies in a pandemic. While robust for Bonferroni correction, these effects point into different directions as to how the MCS has impacted the respondents: MCS participants consider COVID-19 as less of a threat for themselves (-0.81 points on 11-point scale) and rate their likelihood of contracting the virus as lower than the control group (-0.31 points on 7-point scale). However, they also report greater general concern about COVID-19 (0.22 points on 4-point scale) and higher support for increasing the German federal states' decision-making competencies during a pandemic (0.47 points on 11-point scale). In addition to these four items that explicitly mention COVID-19 in the question text, one other conditioning effect transpires. It is on feelings of depression replicated from a questionnaire module on mental and physical health implemented in the MCS. The result suggests that MCS participants feel less depressed than the control group (-0.18 points on a four-point scale). Four out of five robust conditioning effects concern question items measured within six month of the end of the MCS, but none of them are from GIP surveys conducted during (waves 46 to 48) or directly after (wave 49) the MCS. One robust finding is from GIP wave 55, which was conducted more than a year after the end of the MCS.

Since Bonferroni corrections are sometimes judged as too conservative, we additionally examine the results of uncorrected tests as a robustness analysis: only 21 (i.e. 19%) of all tests initially indicate a potential impact of the MCS on responses to GIP survey questions when neither accounting for multiple-comparison testing nor controlling for socio-demographics (see Figure \ref{fig:fig4}).

```{r display-coefplot-all-sig-coefs, echo=FALSE, fig.width=6,fig.height=4,fig.cap="\\label{fig:fig4}Initially significant conditioning effects (GIP survey wave of the tested item in parentheses)"}


new_coef_p


```

Among the question items explicitly mentioning COVID-19, 8 tests out of 22 (i.e. 36%) showed significant differences between the treatment and control group. Three of those tests were on repeated measures of the same item (weighing economic damages against societal benefits of COVID-19 counter-measures) and showed significant differences at all three GIP waves in which it was implemented (waves 50, 52, and 54). The other 5 items (personal COVID threat, general COVID concern, perceived probability of infection, support of increasing state competencies in case of pandemic, and COVID-19 contact tracing app usage) were only implemented once, so it is not possible to say whether effects would continue to surface if repeated. 

Among the question items that do not directly mention COVID-19 but that were MCS replications, 6 out of 29 tests (i.e. 21%) showed significant differences between treatment and control group. One of those questions (government responsibility for public healthcare) was implemented twice since the MCS (waves 51 and 55), but only showed an effect at the first measurement time point, indicating a potential fade-away effect of conditioning after the MCS. All other tests with significant differences were on items only measured once. Three of them were part of an MCS questionnaire module on physical and mental health during the pandemic (i.e., engaging in medium levels of physical activity, snacking, feeling depressed). Two others relate to other MCS topics relevant to the pandemic debate in Germany at the time  (working from home legislation, tax evasion). 

Among the question items unrelated to COVID-19 and the MCS, 7 out of 61 tests (11%) showed significant differences between the treatment and control group. One of those question items (government responsibility for the unemployed) was implemented twice since the MCS (waves 51 and 55) in the same questionnaire module as the question on government responsibility for healthcare replicated from the MCS. Here again, significant differences only occur at the first measurement time point, slightly indicating potential fade-away effects. All other tests with significant differences were on items only measured once (data privacy concerns, data sharing attitudes, opinions on European integration, trust in local government, trust in European Parliament, trust in online social media). Robustness analyses do not indicate that the conditioning effects would be stronger for the newer GIP sample recruited in 2018 than the older (more "trained") samples from 2012 and 2014 (see Figure A7 in the appendix).

The findings generally indicate that conditioning effects occur, but only on a small scale and that more survey questions relating to the dominant topic of the add-on study may be affected than unrelated question items. We find no clear evidence for a fade-away effect of conditioning, since 15 out of 18 tested repeated measures question items (83%) never showed any significant differences in responses. Moreover, the question item on economic harms versus societal benefits of COVID-19 counter-measures displayed significant differences of approximately the same size at all its measurement time points. Most importantly, the vast majority of the effects is likely an artifact of multiple-comparison testing, as indicated by the results from the Bonferroni-corrected tests. 

# Conclusion and Discussion

This article contributes to answering the following research questions: Does increasing the surveying frequency in a panel study lead to conditioning effects? And if so, which types of survey questions are affected? In the study, we apply a broad definition of panel conditioning effects, where panel conditioning is any learning effect caused by a panel treatment and which is observable in substantive survey responses. From the literature, we derived 4 hypotheses relating to these questions and examined whether or not we find support for them in our experimental study.

Our experiment consisted of increasing the surveying frequency of a long-running probability-based online panel (i.e., the GIP), which had only ever exposed its panel members to bi-monthly data collection waves, by a factor of 9 during the early phase of the pandemic in Germany for 7/8th of the sample (i.e. the MCS, N = 5,598). The remaining randomly selected 1/8th of the panel sample (N = 799) was never exposed to the fast-frequency pandemic-related add-on study. By comparing survey responses of the treatment and control group across the regular GIP survey waves during and after the MCS, we could assess whether participating in the MCS had an impact on panel members reported feelings, attitudes, and behaviour. In the following, we will address and discuss our findings in relation to our hypotheses.

|      H1: Increasing the surveying frequency for the treatment group results in differences in their responses to 
|          GIP survey questions as compared to the control group.

Overall, we find marginal support for this hypothesis. Not accounting for multiple comparison testing, we find differences in survey responses between the groups in 21 out of 112 conducted tests. With Bonferroni correction and socio-demographic controls, only 5 robust effects remain, suggesting that conditioning effects of increasing surveying frequency are only a minor issue.

|      H2: Differences in responses mainly occur on survey questions that are linked to the content of the MCS.

4 out of 5 robust conditioning effects found in our study explicitly mention the COVID-19 pandemic in the question text. The only other robust effect relates to a questionnaire module on mental and physical health, which was implemented in one of the MCS questionnaires. This suggests that question items unrelated to the dominant topic of the add-on study are unaffected by conditioning, while question items related to the dominant topic may be marginally affected.

|      H3: Differences in responses mainly occur during or directly after the MCS fieldwork period (i.e. GIP 
|          waves 46 to 49).

We find no support for this hypothesis. In the initial analyses without accounting for multiple-comparison testing and controlling for socio-demographic characteristics, we detect only one potential conditioning effect in this time span: COVID-19 tracing app use in GIP wave 49. The other 20 tests with significant differences by group concern later survey waves. Moreover, the five robust effects were nearly all on question items implemented in GIP waves 50 and 51. This is still within half a year of the end of the MCS, so it may be worthwhile to explore this hypothesis again in a study with more repeated measures instruments. 

|      H4: Differences in responses between treatment and control group are an artifact of multiple comparison 
|          testing.

We find support for this hypothesis, as only 5 of the 21 initially statistically significant conditioning effects remained robust when adding Bonferroni correction factors. We suspect that at least some of the relatively inconsistent findings in the literature may be an artifact of multiple testing, just as most of the results from our initial tests [@sedgwick14]. Moreover, we suspect that more studies like ours, which look into conditioning effects of changing the panel protocol, may have found no or no compelling evidence of conditioning effects in the data. This may also in part be responsible for the quite incoherent picture of whether and under which conditions conditioning effects occur in the existing literature. Publishing null (or marginal) findings is often difficult [@franco14]. Future research should therefore look into identifying spurious correlations and closing the potential publication bias in the conditioning literature.

In terms of survey practical implications, our study provides reassurance to researchers and survey companies which, like us, increased the surveying frequency of their existing panel study to feature additional survey content during the pandemic [for examples see @kapteyn20; @kuhne20; @burton20]. The negative impact of doing this seems to be marginal at most, at least in panel infrastructures similar to the GIP. The results seem encouraging for conducting similar add-on studies in the future. However, our study has important limitations in terms of generalizability. First, the MCS was conducted in a very special situation: A pandemic which affected everyone and confined people to their homes where they had the time and willingness to provide additional data. Conditioning effects may have been even weaker in an add-on study on a topic which affects people only indirectly (e.g. government dissolution) or which keeps many people away from surveys or the internet (e.g. natural disasters). Second, the MCS was implemented in an existing panel study where all participants may already have generally altered their behaviour and/or attitudes as a result of being in the study. Conditioning effects may have been stronger in a fresh panel sample. We may also have found more conditioning effects of increasing the surveying frequency if we had replicated more pre-pandemic GIP items frequently during and after the MCS. A true experiment on the impact of increasing the surveying frequency on panel conditioning would have required this. Third, the GIP is a relatively high frequency panel anyway. So the impact of increasing the surveying frequency may have been stronger in a panel with less frequent data collection because the intervention would have been more dramatic for the panel participants. Last, the GIP and MCS collect data exclusively online. The impact of an interviewer-administered add-on study may have been stronger due to the potentially even higher salience of the study topic in people's minds after discussing it verbally with another person. All of these limitations of our study warrant future research. We, therefore, encourage researchers planning to conduct panel add-on studies on the urgent topics of the future to include and report the findings of similar experiments.


# References

<div id="refs"></div>

\newpage
# Appendix
\setcounter{table}{0}
\renewcommand{\thetable}{A\arabic{table}}

\setcounter{figure}{0}
\renewcommand{\thefigure}{A\arabic{figure}}

![Overview of all tested items\label{fig:fig_A1}](figure_a1.jpg) 

![Overview of all tested items (continued)\label{fig:fig_A2}](figure_a2.jpg) 

![Overview of all tested items (continued)\label{fig:fig_A3}](figure_a3.jpg) 

```{r participation-models-demographics, echo=FALSE, message=FALSE, results='asis'}

stargazer(fe_model,
          header = FALSE,
  type = "latex",
  title = "Predicting participation in the German Internet Panel",
  column.labels = c("Fixed effects"),
  colnames = FALSE,
  model.numbers = FALSE,
  dep.var.caption = "",
  dep.var.labels = "Participation in GIP",
  covariate.labels = c("MCS", "Female", "Age", "Household Size: (ref=1) \\\\ \\-\\hspace{0.3cm}2", "\\-\\hspace{0.3cm}3 or more", "Marital Status: (ref=Married) \\\\ \\-\\hspace{0.3cm}Other", "\\-\\hspace{0.3cm}Single", "Education: (ref=Low) \\\\ \\-\\hspace{0.3cm}Medium", "\\-\\hspace{0.3cm}High", "German Citizenship", "Wave: (ref=46) \\\\ \\-\\hspace{0.3cm} 47", "\\-\\hspace{0.3cm} 48", "\\-\\hspace{0.3cm} 49", "\\-\\hspace{0.3cm} 50", "\\-\\hspace{0.3cm} 51", "\\-\\hspace{0.3cm} 52", "\\-\\hspace{0.3cm} 53", "\\-\\hspace{0.3cm} 54", "\\-\\hspace{0.3cm} 55"),
  keep.stat = c("rsq", "f"),
  single.row = TRUE,
  notes.align = "l",
  label = "tab:tabA1")
```

```{r coefplots-invitation-treatment, echo=FALSE, message=FALSE, results='hide', warning=FALSE}

# Visualise models in coefplot

new_coef_i <-
    model_df %>% filter(significant == T & model == "Invitation") %>%
    mutate(term = factor(term, 
      levels = c("COVID−measure damage v benefits (50)",
"COVID−measure damage v benefits (52)",
"COVID−measure damage v benefit (54)",
"Personal Covid threat (50)",
"General COVID concern (51)",
"Nervousness (51)",
"COVID infection probability (50)",
"State government in pandemic (55)",
"COVID tracing app use (49)",

"Government care unemployed (51)",
"Work from home laws (55)",
"Medium activity level (51)",
"Snacking (51)",
"Feeling depressed (51)",

"Data privacy concerns (54)",
"Data sharing attitude 1 (54)",
"European integration (54)",
"Trust local government (55)",
"Trust European Parliament (55)",
"Trust social media (55)"
), 
      labels = c("COVID−measure damage v benefits (50)",
"COVID−measure damage v benefits (52)",
"COVID−measure damage v benefit (54)",
"Personal Covid threat (50)",
"General COVID concern (51)",
"Nervousness (51)",
"COVID infection probability (50)",
"State government in pandemic (55)",
"COVID tracing app use (49)",

"Government care unemployed (51)",
"Work from home laws (55)",
"Medium activity level (51)",
"Snacking (51)",
"Feeling depressed (51)",

"Data privacy concerns (54)",
"Data sharing attitude 1 (54)",
"European integration (54)",
"Trust local government (55)",
"Trust European Parliament (55)",
"Trust social media (55)"))) %>% 
  arrange(term) %>% 
    dwplot(
        .,
        vline = geom_vline(
            xintercept = 0,
            colour = "grey60",
            linetype = 2
        ),
        dot_args = list(size = 2),
        line_args = list(size = 0.5)) +
    geom_errorbarh(aes( y = term, xmin = ci90l, xmax = ci90h),
                   height = 0, size = 1) +
    theme_bw(base_size = 10) + xlab("Coefficient Estimate") + ylab("") +
    scale_colour_grey(start = .1,
                      end = .1,
                      guide = guide_legend(reverse = TRUE)) +
    scale_x_continuous(breaks = c(-.8, 0, .2),
                       labels = c('\u00AD0.8', '0.0', '0.2')) +
    theme(legend.position = 'none',
          legend.title = element_blank(),
          text=element_text(family="serif"),
          panel.grid.minor= element_blank(),
          strip.background = element_blank(),
          strip.text = element_text(face = 'bold'))

pdf('C:/Users/masohniu/Documents/MCS_Conditioning/figure_a4.pdf',
    width = 10,
    height = 5,
    )
new_coef_i
dev.off()

new_coef_ibc <-
    model_df_control %>% filter(significant_bc == T & model == "Invitation") %>%
    mutate(term = factor(term, 
      levels = c(
      "Personal Covid threat (50)",
      "General COVID concern (51)",
      "State government in pandemic (55)",
      "Data privacy concerns (54)",
      "Feeling depressed (51)"
      ), 
      labels = c(
      "Personal Covid threat (50)",
      "General COVID concern (51)",
      "State government in pandemic (55)",
      "Data privacy concerns (54)",
      "Feeling depressed (51)"))) %>% 
        arrange(term) %>% 
    dwplot(
        .,
        vline = geom_vline(
            xintercept = 0,
            colour = "grey60",
            linetype = 2
        ),
        dot_args = list(size = 2),
        line_args = list(size = 0.5)) +
    geom_errorbarh(aes( y = term, xmin = ci90l, xmax = ci90h),
                   height = 0, size = 1) +
    theme_bw(base_size = 10) + xlab("Coefficient Estimate") + ylab("") +
    scale_colour_grey(start = .1,
                      end = .1,
                      guide = guide_legend(reverse = TRUE)) +
    scale_x_continuous(breaks = c(-.8, 0, .2),
                       labels = c('\u00AD0.8', '0.0', '0.2')) +
    theme(legend.position = 'none',
          legend.title = element_blank(),
          text=element_text(family="serif"),
          panel.grid.minor= element_blank(),
          strip.background = element_blank(),
          strip.text = element_text(face = 'bold'))

pdf('C:/Users/masohniu/Documents/MCS_Conditioning/figure_a5.pdf',
    width = 10,
    height = 5,
    )
new_coef_ibc
dev.off()

new_coef_ib <-
    model_df %>% filter(significant_bc == T & model == "Invitation") %>%
    mutate(term = factor(term, 
      levels = c(
      "Personal Covid threat (50)",
      "General COVID concern (51)",
      "State government in pandemic (55)",
      "Data privacy concerns (54)",
      "Feeling depressed (51)"
      ), 
      labels = c(
      "Personal Covid threat (50)",
      "General COVID concern (51)",
      "State government in pandemic (55)",
      "Data privacy concerns (54)",
      "Feeling depressed (51)"))) %>% 
        arrange(term) %>% 
    dwplot(
        .,
        vline = geom_vline(
            xintercept = 0,
            colour = "grey60",
            linetype = 2
        ),
        dot_args = list(size = 2),
        line_args = list(size = 0.5)) +
    geom_errorbarh(aes( y = term, xmin = ci90l, xmax = ci90h),
                   height = 0, size = 1) +
    theme_bw(base_size = 10) + xlab("Coefficient Estimate") + ylab("") +
    scale_colour_grey(start = .1,
                      end = .1,
                      guide = guide_legend(reverse = TRUE)) +
    scale_x_continuous(breaks = c(-.8, 0, .2),
                       labels = c('\u00AD0.8', '0.0', '0.2')) +
    theme(legend.position = 'none',
          legend.title = element_blank(),
          text=element_text(family="serif"),
          panel.grid.minor= element_blank(),
          strip.background = element_blank(),
          strip.text = element_text(face = 'bold'))


```


```{r coefplots-invitation-treatment_display, echo=FALSE, fig.width=6,fig.height=4,fig.cap="\\label{fig:figA1}Initially significant conditioning effects with treatment group defined as anyone invited to the MCS"}

new_coef_i

```

```{r coefplots-invitation-treatment-correct-control, echo=FALSE, fig.width=6,fig.height=4,fig.cap="\\label{fig:figA2}Significant conditioning effects with Bonferroni correction and inclusion of demographic controls and treatment group defined as anyone invited to the MCS"}

new_coef_ibc


```


```{r coefplots-participation-treatment-correct, echo=FALSE, fig.width=6,fig.height=4,fig.cap="\\label{fig:figA3}Significant conditioning effects with Bonferroni correction but without demographic controls"}

new_coef_pb


```

```{r recruitment_wave_analyses_plot, echo=FALSE, fig.width=6,fig.height=4,fig.cap="\\label{fig:figA4}Conditioning effects with Bonferroni correction that were significant in at least one recruitment wave subsample by itself"}

new_coef_pbr


```



```{r export tables, include=FALSE}

stargazer(
  r1, r2,
  header = FALSE,
  type = "text",
  out = "table1.tex",
  title = "Predicting assignment to the treatment",
  dep.var.labels = c("Invited to MCS", "Participated in MCS"),
  model.numbers = FALSE,
  dep.var.caption = "",
  covariate.labels = c("Female", "Age", "Household Size: (ref=1)\\-\\vspace{0.3cm} \\\\ \\-\\hspace{0.3cm}2", "\\-\\hspace{0.3cm}3 or more", "Marital Status: (ref=Married)\\-\\vspace{0.3cm} \\\\ \\-\\hspace{0.3cm}Other", "\\-\\hspace{0.3cm}Single", "Education: (ref=Low)\\-\\vspace{0.3cm} \\\\ \\-\\hspace{0.3cm}Medium", "\\-\\hspace{0.3cm}High", "Citizenship"),
  keep.stat = c("rsq", "f"),
  notes.align = "l",
  label = "tab:tab1"
  )


stargazer(fe_model,
          header = FALSE,
  type = "text",
  out = "tablea1.tex",
  title = "Predicting participation in the German Internet Panel",
  column.labels = c("Fixed effects"),
  colnames = FALSE,
  model.numbers = FALSE,
  dep.var.caption = "",
  dep.var.labels = "Participation in GIP",
  covariate.labels = c("MCS", "Female", "Age", "Household Size: (ref=1) \\\\ \\-\\hspace{0.3cm}2", "\\-\\hspace{0.3cm}3 or more", "Marital Status: (ref=Married) \\\\ \\-\\hspace{0.3cm}Other", "\\-\\hspace{0.3cm}Single", "Education: (ref=Low) \\\\ \\-\\hspace{0.3cm}Medium", "\\-\\hspace{0.3cm}High", "German Citizenship", "Wave: (ref=46) \\\\ \\-\\hspace{0.3cm} 47", "\\-\\hspace{0.3cm} 48", "\\-\\hspace{0.3cm} 49", "\\-\\hspace{0.3cm} 50", "\\-\\hspace{0.3cm} 51", "\\-\\hspace{0.3cm} 52", "\\-\\hspace{0.3cm} 53", "\\-\\hspace{0.3cm} 54", "\\-\\hspace{0.3cm} 55"),
  keep.stat = c("rsq", "f"),
  single.row = TRUE,
  notes.align = "l",
  label = "tab:tabA1")

```