################################# |
##### REPLICATION MATERIALS ##### |
################################# |
################################# |
################################# |

# Paper: "Measurement quality of 67 common social sciences questions across
# countries and languages based on 28 Multitrait-Multimethod experiments 
# implemented in the European Social Survey" 
# 
# Authors: Carlos Poses, Melanie Revilla, Marc Asensio, Hannah Schwarz, Wiebke
# Weber

# Data (Online Appendix 1) can be found at UPF Repository searching name of the 
# article, or with Appendix DOI: https://doi.org/10.34810/data122

# Libraries ####

library(tidyverse)

# Load data ####
# (previously set working directory with setwd() or creating a project)

online_appendix <- read_csv2("online_appendix.csv")

# Results in Section 5.1 ####
# Compute mean quality, standard deviation of quality, realibility, validity 

online_appendix %>% 
  summarize(meanquality = mean(measurement_quality),
            sdquality = sd(measurement_quality),
            meanreliability = mean(reliability),
            meanvalidity = mean(validity)) 



# Table 1 ####

tab_1 <-
  online_appendix %>% 
  group_by(round, experiment_name, trait_number, name_ess_data, trait, response_scale_main_questionnaire) %>%  
  summarize(round(mean_cl_normal(measurement_quality, conf.int = .95), 2)) %>% 
  ungroup() %>% # Main calculations are done with the above code. From here, all we do is reshape the table
                # to make it more visually pleasing for the paper
  mutate(response_scale_main_questionnaire = recode(response_scale_main_questionnaire,
                                                    "[1] Agree strongly; [2] Agree; [3] Neither agree nor disagree; [4] Disagree; [5] Disagree strongly; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "5-points, Agree strongly/Disagree strongly, FL",
                                                    "[0] No time at all; [1] Less than 0,5 hour; [2] 0,5 hour to 1 hour; [3] More than 1 hour, up to 1,5 hours; [4] More than 1,5 hours, up to 2 hours; [5] More than 2 hours, up to 2,5 hours; [6] More than 2,5 hours, up to 3 hours; [7] More than 3 hours; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "8-points, 0 No time at all/ 7 More than 3 hours, FL",
                                                    "[1] Never; [2] Seldom; [3] Occasionally; [4] Regularly; [5] Frequently; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "5-points, *Never/Frequently, FL",
                                                    "[1] Never or almost never; [2] Some of the time; [3] About half the time; [4] Most of the time; [5] Always or almost always; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "5-points, 1-Never or almost neve/5 Always or almost always, FL",
                                                    "[0] Extremely dissatisfied; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Extremely satisfied; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "11-points, 0-Extremely Dissatisfied/10 Extremely Satisfied, PL",
                                                    "[1] Definitely not; [2] Probably not; [3] Not sure either way; [4] Probably; [5] Definitely; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "5-points, *Never/Frequently, FL",
                                                    "[1] Very difficult; [2] Difficult; [3] Neither difficult nor easy; [4] Easy; [5] Very easy; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "5-points, *Never/Frequently, FL",
                                                    "[0] No trust at all; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Complete trust; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "11-points, No trust at all/Complete trust, PL",
                                                    "[0] You can't be too careful; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Most people can be trusted; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "11-points, *You can't be too careful/Most people can be trusted, PL",
                                                    "[0] Most people try to take advantage of me; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Most people try to be fair; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "11-points, *You can't be too careful/Most people can be trusted, PL",
                                                    "[0] People mostly look out for themselves; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] People mostly try to be helpful; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "11-points, *You can't be too careful/Most people can be trusted, PL",
                                                    "[1] Not at all true; [2] A little true; [3] Quite true; [4] Very true; [NA] Not applicable; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "4-points, Not at all true/Very true, FL",
                                                    "[0] Bad for the economy; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Good for the economy; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "11-points, *Bad for the economy/Good for the economy, PL",
                                                    "[0] Cultural life undermined; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Cultural life enriched; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "11-points, *Bad for the economy/Good for the economy, PL",
                                                    "[0] Worse place to live; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Better place to live; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "11-points, *Bad for the economy/Good for the economy, PL",
                                                    "[1] Allow many to come and live here; [2] Allow some; [3] Allow a few; [4] Allow none; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "4-points, Allow many to come and live here/Allow none, PL",
                                                    "[1] Not at all likely; [2] Not very likely; [3] Likely; [4] Very likely; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "4-points, Not at all lilely/Very likely, FL",
                                                    "[0] Extremely unsuccessful; [1] 01; [2] 02; [3] 03; [4] 04; [5] 05; [6] 06; [7] 07; [8] 08; [9] 09; [10] Extremely successful; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "11-point, *Extremely unsuccesful/Extremely succesful, PL",
                                                    "[0] Extremely slowly; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Extremely quickly; [55] Violent crimes never occur near to where I live; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "11-point, *Extremely unsuccesful/Extremely succesful, PL",
                                                    "[0] Does not apply at all; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Applies completely; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "11-point, Does not apply at all/Applies completely, PL",
                                                    "[0] None of the time; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] All of the time; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "11-point, None of the time/All of the time, PL",
                                                    "[1] None or almost none of the time; [2] Some of the time; [3] Most of the time; [4] All or almost all of the time; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "4-point, None or almost one of the time/All or almost of the time, FL",
                                                    "[0] Extremely unimportant; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Extremely important; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "11-point, Extremely unimportant/Extremely important, PL",
                                                    "[0] Not at all able; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Completely able; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "11-point, *Not at all able/Completely able, PL",
                                                    "[0] Not at all confident; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Completely confident; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "11-point, *Not at all able/Completely able, PL",
                                                    "[0] Not at all easy; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Extremely easy; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "11-point, *Not at all able/Completely able, PL",
                                                    "[0] Not at all; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Completely; [NA] Refusal; [NA] Don't know; [NA] No answer" =
                                                      "11-point, Not at all/Completely, PL")) %>% 
  unite(conf_interval, c(ymin, ymax), sep = ",") %>% 
  mutate(conf_interval = paste0("[", conf_interval, "]")) %>% 
  unite(quality, c(y, conf_interval), sep = " ") %>% 
  unite(question_qual, c(trait, quality), sep = " /// ") %>% # Unite to make it easier to reshape. Later separated again
  select(-name_ess_data) %>% 
  pivot_wider(names_from = "trait_number", values_from = c(question_qual))  %>% 
  mutate(experiment_name = str_to_sentence(experiment_name)) %>% 
  unite(round_experiment, c(round, experiment_name), sep = " - ") %>% 
  mutate(round_experiment = paste0("R", round_experiment),
         round_experiment = str_replace_all(round_experiment, "_", " "))  %>% 
  relocate(response_scale_main_questionnaire, .after = T3) %>% 
  separate(T1, into = c("Trait 1", "quality_t1"), sep = "///") %>% 
  separate(T2, into = c("Trait 2", "quality_t2"), sep = "///") %>% 
  separate(T3, into = c("Trait 3", "quality_t3"), sep = "///") %>% 
  rename("Round - Experiment" = round_experiment,
         "Response Scale" = response_scale_main_questionnaire,
         "Quality T1" = quality_t1,
         "Quality T2" = quality_t2,
         "Quality T3" = quality_t3)


# Absolute average difference in quality between questions within the same experiment 
online_appendix %>% 
  select(round, experiment_name, trait_number, measurement_quality) %>% 
  group_by(round, experiment_name, trait_number) %>% 
  summarize(meanquality = round(mean(measurement_quality), 2)) %>% 
  pivot_wider(names_from = trait_number, values_from = meanquality)  %>% 
  mutate(dif12 = abs(T1-T2),
         dif13 = abs(T1-T3),
         dif23 = abs(T2-T3)) %>%  
  select(dif12,dif13,dif23)  %>% 
  pivot_longer(c(dif12, dif13, dif23), names_to = "method substracted", values_to = "difference") %>% 
  ungroup() %>% 
  summarize(mean_difference = mean(difference, na.rm = TRUE))


# Average measurement quality for each question classified according to thresholds 
# defined in Secion 4.4 

online_appendix %>% 
  group_by(round, experiment_name, trait_number, name_ess_data, trait, response_scale_main_questionnaire) %>%  
  summarize(mean_cl_normal(measurement_quality, conf.int = .95)) %>% 
  mutate(classification = case_when(
    (y >= 0.9) ~ "excellent",
    (y >= 0.8 & y < 0.9) ~ "good",
    (y >= 0.7 & y < 0.8) ~ "acceptable",
    (y >= 0.6 & y < 0.7) ~ "questionable",
    (y >= 0.5 & y < 0.6) ~ "poor",
    y < 0.5 ~ "unacceptable")) %>% 
  group_by(classification) %>% 
  count()  %>% 
  ungroup() %>% 
  mutate(sum = sum(n),
         perc = n/sum*100) 

# Results in Section 5.2 ####

# Table 2 ####

# Four steps

# 1. Create the average quality by countrylanguage and round

tab2_cntry_round <-
  online_appendix %>% 
  unite(cntrylanguage, country, language, sep = "-") %>% 
  group_by(cntrylanguage, round) %>%
  summarize(round(mean_cl_normal(measurement_quality, conf.int = .95), 2)) %>% 
  unite(conf_interval, c(ymin, ymax), sep = ",") %>% 
  mutate(conf_interval = paste0("[", conf_interval, "]")) %>% 
  unite(meanquality, c(y, conf_interval), sep = " ") %>% 
  pivot_wider(names_from = round, values_from = meanquality) %>% 
  rename("Round 1" = `1`,
         "Round 2" = `2`,
         "Round 3" = `3`,
         "Round 4" = `4`,
         "Round 5" = `5`,
         "Round 6" = `6`,
         "Round 7" = `7`) %>%
  relocate("Round 7", .after = "Round 6") %>% 
  ungroup()


# 2. Create the average quality by country overall

tab2_cntry <-
  online_appendix %>% 
  unite(cntrylanguage, country, language, sep = "-") %>% 
  group_by(cntrylanguage) %>%
  summarize(round(mean_cl_normal(measurement_quality, conf.int = .95), 2)) %>% 
  unite(conf_interval, c(ymin, ymax), sep = ",") %>% 
  mutate(conf_interval = paste0("[", conf_interval, "]")) %>% 
  unite(meanquality, c(y, conf_interval), sep = " ") %>% 
  ungroup()

# 3. Merge 1. and 2. to create table 2 as displayed in the paper

tab_2 <-
  tab2_cntry_round %>% 
  left_join(tab2_cntry, by = "cntrylanguage") %>% 
  rename("Country-Language" = "cntrylanguage") %>% 
  arrange(desc(meanquality))

# Main part of table 2 is done with previous code.
# Rows for country-language groups of Round 1 with "mixed" languages (see 
# "appendix_codebook") are removed from the table, and their values are 
# manually placed with the rest of values for its corresponding country-language
# groups

# Maximum and minimum mean values, as well as difference between them,
# were added manually and computed with the following code:

# 4. Compute max, min and mean across rounds 

# 4.1 Mean by round

tab_2_mean_round <- online_appendix %>%
  group_by(round) %>% 
  summarize(meanqual = mean(measurement_quality)) %>% 
  pivot_wider(names_from = round, values_from = meanqual) %>% 
  rename("Round 1" = `1`,
         "Round 2" = `2`,
         "Round 3" = `3`,
         "Round 4" = `4`,
         "Round 5" = `5`,
         "Round 6" = `6`,
         "Round 7" = `7`) 

# 4.1.Max and min within each round, and difference between max and min

tab_2_max_min <-
  online_appendix %>% 
  group_by(round, country, language) %>% 
  summarize(meanqual = mean(measurement_quality)) %>% 
  group_by(round) %>% 
  summarize(maxqual = max(meanqual),
            minqual = min(meanqual)) %>% 
  mutate(difference = maxqual - minqual) %>% 
  pivot_longer(c(maxqual:difference), names_to = "statistic", values_to = "value") %>% 
  pivot_wider(names_from = round, values_from = value) %>% 
  rename("Round 1" = `1`,
         "Round 2" = `2`,
         "Round 3" = `3`,
         "Round 4" = `4`,
         "Round 5" = `5`,
         "Round 6" = `6`,
         "Round 7" = `7`) 
  



# Table 2 - Classify quality estimates according to thresholds defined in Section 4.1

online_appendix %>% 
  unite(cntrylanguage, country, language, sep = "-") %>% 
  group_by(cntrylanguage, round) %>%
  summarize(round(mean_cl_normal(measurement_quality, conf.int = .95), 2))  %>% 
  mutate(classification = case_when(
    (y > 0.9) ~ "excellent",
    (y >= 0.8 & y < 0.9) ~ "good",
    (y >= 0.7 & y < 0.8) ~ "acceptable",
    (y >= 0.6 & y < 0.7) ~ "questionable",
    (y >= 0.5 & y < 0.6) ~ "poor",
    y < 0.5 ~ "unacceptable")) %>% 
  group_by(classification) %>% 
  count() %>% 
  ungroup() %>% 
  mutate(sum = sum(n),
         perc = (n/sum)*100)