################################# | ##### REPLICATION MATERIALS ##### | ################################# | ################################# | ################################# | # Paper: "Measurement quality of 67 common social sciences questions across # countries and languages based on 28 Multitrait-Multimethod experiments # implemented in the European Social Survey" # # Authors: Carlos Poses, Melanie Revilla, Marc Asensio, Hannah Schwarz, Wiebke # Weber # Data (Online Appendix 1) can be found at UPF Repository searching name of the # article, or with Appendix DOI: https://doi.org/10.34810/data122 # Libraries #### library(tidyverse) # Load data #### # (previously set working directory with setwd() or creating a project) online_appendix <- read_csv2("online_appendix.csv") # Results in Section 5.1 #### # Compute mean quality, standard deviation of quality, realibility, validity online_appendix %>% summarize(meanquality = mean(measurement_quality), sdquality = sd(measurement_quality), meanreliability = mean(reliability), meanvalidity = mean(validity)) # Table 1 #### tab_1 <- online_appendix %>% group_by(round, experiment_name, trait_number, name_ess_data, trait, response_scale_main_questionnaire) %>% summarize(round(mean_cl_normal(measurement_quality, conf.int = .95), 2)) %>% ungroup() %>% # Main calculations are done with the above code. From here, all we do is reshape the table # to make it more visually pleasing for the paper mutate(response_scale_main_questionnaire = recode(response_scale_main_questionnaire, "[1] Agree strongly; [2] Agree; [3] Neither agree nor disagree; [4] Disagree; [5] Disagree strongly; [NA] Refusal; [NA] Don't know; [NA] No answer" = "5-points, Agree strongly/Disagree strongly, FL", "[0] No time at all; [1] Less than 0,5 hour; [2] 0,5 hour to 1 hour; [3] More than 1 hour, up to 1,5 hours; [4] More than 1,5 hours, up to 2 hours; [5] More than 2 hours, up to 2,5 hours; [6] More than 2,5 hours, up to 3 hours; [7] More than 3 hours; [NA] Refusal; [NA] Don't know; [NA] No answer" = "8-points, 0 No time at all/ 7 More than 3 hours, FL", "[1] Never; [2] Seldom; [3] Occasionally; [4] Regularly; [5] Frequently; [NA] Refusal; [NA] Don't know; [NA] No answer" = "5-points, *Never/Frequently, FL", "[1] Never or almost never; [2] Some of the time; [3] About half the time; [4] Most of the time; [5] Always or almost always; [NA] Refusal; [NA] Don't know; [NA] No answer" = "5-points, 1-Never or almost neve/5 Always or almost always, FL", "[0] Extremely dissatisfied; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Extremely satisfied; [NA] Refusal; [NA] Don't know; [NA] No answer" = "11-points, 0-Extremely Dissatisfied/10 Extremely Satisfied, PL", "[1] Definitely not; [2] Probably not; [3] Not sure either way; [4] Probably; [5] Definitely; [NA] Refusal; [NA] Don't know; [NA] No answer" = "5-points, *Never/Frequently, FL", "[1] Very difficult; [2] Difficult; [3] Neither difficult nor easy; [4] Easy; [5] Very easy; [NA] Refusal; [NA] Don't know; [NA] No answer" = "5-points, *Never/Frequently, FL", "[0] No trust at all; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Complete trust; [NA] Refusal; [NA] Don't know; [NA] No answer" = "11-points, No trust at all/Complete trust, PL", "[0] You can't be too careful; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Most people can be trusted; [NA] Refusal; [NA] Don't know; [NA] No answer" = "11-points, *You can't be too careful/Most people can be trusted, PL", "[0] Most people try to take advantage of me; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Most people try to be fair; [NA] Refusal; [NA] Don't know; [NA] No answer" = "11-points, *You can't be too careful/Most people can be trusted, PL", "[0] People mostly look out for themselves; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] People mostly try to be helpful; [NA] Refusal; [NA] Don't know; [NA] No answer" = "11-points, *You can't be too careful/Most people can be trusted, PL", "[1] Not at all true; [2] A little true; [3] Quite true; [4] Very true; [NA] Not applicable; [NA] Refusal; [NA] Don't know; [NA] No answer" = "4-points, Not at all true/Very true, FL", "[0] Bad for the economy; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Good for the economy; [NA] Refusal; [NA] Don't know; [NA] No answer" = "11-points, *Bad for the economy/Good for the economy, PL", "[0] Cultural life undermined; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Cultural life enriched; [NA] Refusal; [NA] Don't know; [NA] No answer" = "11-points, *Bad for the economy/Good for the economy, PL", "[0] Worse place to live; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Better place to live; [NA] Refusal; [NA] Don't know; [NA] No answer" = "11-points, *Bad for the economy/Good for the economy, PL", "[1] Allow many to come and live here; [2] Allow some; [3] Allow a few; [4] Allow none; [NA] Refusal; [NA] Don't know; [NA] No answer" = "4-points, Allow many to come and live here/Allow none, PL", "[1] Not at all likely; [2] Not very likely; [3] Likely; [4] Very likely; [NA] Refusal; [NA] Don't know; [NA] No answer" = "4-points, Not at all lilely/Very likely, FL", "[0] Extremely unsuccessful; [1] 01; [2] 02; [3] 03; [4] 04; [5] 05; [6] 06; [7] 07; [8] 08; [9] 09; [10] Extremely successful; [NA] Refusal; [NA] Don't know; [NA] No answer" = "11-point, *Extremely unsuccesful/Extremely succesful, PL", "[0] Extremely slowly; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Extremely quickly; [55] Violent crimes never occur near to where I live; [NA] Refusal; [NA] Don't know; [NA] No answer" = "11-point, *Extremely unsuccesful/Extremely succesful, PL", "[0] Does not apply at all; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Applies completely; [NA] Refusal; [NA] Don't know; [NA] No answer" = "11-point, Does not apply at all/Applies completely, PL", "[0] None of the time; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] All of the time; [NA] Refusal; [NA] Don't know; [NA] No answer" = "11-point, None of the time/All of the time, PL", "[1] None or almost none of the time; [2] Some of the time; [3] Most of the time; [4] All or almost all of the time; [NA] Refusal; [NA] Don't know; [NA] No answer" = "4-point, None or almost one of the time/All or almost of the time, FL", "[0] Extremely unimportant; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Extremely important; [NA] Refusal; [NA] Don't know; [NA] No answer" = "11-point, Extremely unimportant/Extremely important, PL", "[0] Not at all able; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Completely able; [NA] Refusal; [NA] Don't know; [NA] No answer" = "11-point, *Not at all able/Completely able, PL", "[0] Not at all confident; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Completely confident; [NA] Refusal; [NA] Don't know; [NA] No answer" = "11-point, *Not at all able/Completely able, PL", "[0] Not at all easy; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Extremely easy; [NA] Refusal; [NA] Don't know; [NA] No answer" = "11-point, *Not at all able/Completely able, PL", "[0] Not at all; [1] 1; [2] 2; [3] 3; [4] 4; [5] 5; [6] 6; [7] 7; [8] 8; [9] 9; [10] Completely; [NA] Refusal; [NA] Don't know; [NA] No answer" = "11-point, Not at all/Completely, PL")) %>% unite(conf_interval, c(ymin, ymax), sep = ",") %>% mutate(conf_interval = paste0("[", conf_interval, "]")) %>% unite(quality, c(y, conf_interval), sep = " ") %>% unite(question_qual, c(trait, quality), sep = " /// ") %>% # Unite to make it easier to reshape. Later separated again select(-name_ess_data) %>% pivot_wider(names_from = "trait_number", values_from = c(question_qual)) %>% mutate(experiment_name = str_to_sentence(experiment_name)) %>% unite(round_experiment, c(round, experiment_name), sep = " - ") %>% mutate(round_experiment = paste0("R", round_experiment), round_experiment = str_replace_all(round_experiment, "_", " ")) %>% relocate(response_scale_main_questionnaire, .after = T3) %>% separate(T1, into = c("Trait 1", "quality_t1"), sep = "///") %>% separate(T2, into = c("Trait 2", "quality_t2"), sep = "///") %>% separate(T3, into = c("Trait 3", "quality_t3"), sep = "///") %>% rename("Round - Experiment" = round_experiment, "Response Scale" = response_scale_main_questionnaire, "Quality T1" = quality_t1, "Quality T2" = quality_t2, "Quality T3" = quality_t3) # Absolute average difference in quality between questions within the same experiment online_appendix %>% select(round, experiment_name, trait_number, measurement_quality) %>% group_by(round, experiment_name, trait_number) %>% summarize(meanquality = round(mean(measurement_quality), 2)) %>% pivot_wider(names_from = trait_number, values_from = meanquality) %>% mutate(dif12 = abs(T1-T2), dif13 = abs(T1-T3), dif23 = abs(T2-T3)) %>% select(dif12,dif13,dif23) %>% pivot_longer(c(dif12, dif13, dif23), names_to = "method substracted", values_to = "difference") %>% ungroup() %>% summarize(mean_difference = mean(difference, na.rm = TRUE)) # Average measurement quality for each question classified according to thresholds # defined in Secion 4.4 online_appendix %>% group_by(round, experiment_name, trait_number, name_ess_data, trait, response_scale_main_questionnaire) %>% summarize(mean_cl_normal(measurement_quality, conf.int = .95)) %>% mutate(classification = case_when( (y >= 0.9) ~ "excellent", (y >= 0.8 & y < 0.9) ~ "good", (y >= 0.7 & y < 0.8) ~ "acceptable", (y >= 0.6 & y < 0.7) ~ "questionable", (y >= 0.5 & y < 0.6) ~ "poor", y < 0.5 ~ "unacceptable")) %>% group_by(classification) %>% count() %>% ungroup() %>% mutate(sum = sum(n), perc = n/sum*100) # Results in Section 5.2 #### # Table 2 #### # Four steps # 1. Create the average quality by countrylanguage and round tab2_cntry_round <- online_appendix %>% unite(cntrylanguage, country, language, sep = "-") %>% group_by(cntrylanguage, round) %>% summarize(round(mean_cl_normal(measurement_quality, conf.int = .95), 2)) %>% unite(conf_interval, c(ymin, ymax), sep = ",") %>% mutate(conf_interval = paste0("[", conf_interval, "]")) %>% unite(meanquality, c(y, conf_interval), sep = " ") %>% pivot_wider(names_from = round, values_from = meanquality) %>% rename("Round 1" = `1`, "Round 2" = `2`, "Round 3" = `3`, "Round 4" = `4`, "Round 5" = `5`, "Round 6" = `6`, "Round 7" = `7`) %>% relocate("Round 7", .after = "Round 6") %>% ungroup() # 2. Create the average quality by country overall tab2_cntry <- online_appendix %>% unite(cntrylanguage, country, language, sep = "-") %>% group_by(cntrylanguage) %>% summarize(round(mean_cl_normal(measurement_quality, conf.int = .95), 2)) %>% unite(conf_interval, c(ymin, ymax), sep = ",") %>% mutate(conf_interval = paste0("[", conf_interval, "]")) %>% unite(meanquality, c(y, conf_interval), sep = " ") %>% ungroup() # 3. Merge 1. and 2. to create table 2 as displayed in the paper tab_2 <- tab2_cntry_round %>% left_join(tab2_cntry, by = "cntrylanguage") %>% rename("Country-Language" = "cntrylanguage") %>% arrange(desc(meanquality)) # Main part of table 2 is done with previous code. # Rows for country-language groups of Round 1 with "mixed" languages (see # "appendix_codebook") are removed from the table, and their values are # manually placed with the rest of values for its corresponding country-language # groups # Maximum and minimum mean values, as well as difference between them, # were added manually and computed with the following code: # 4. Compute max, min and mean across rounds # 4.1 Mean by round tab_2_mean_round <- online_appendix %>% group_by(round) %>% summarize(meanqual = mean(measurement_quality)) %>% pivot_wider(names_from = round, values_from = meanqual) %>% rename("Round 1" = `1`, "Round 2" = `2`, "Round 3" = `3`, "Round 4" = `4`, "Round 5" = `5`, "Round 6" = `6`, "Round 7" = `7`) # 4.1.Max and min within each round, and difference between max and min tab_2_max_min <- online_appendix %>% group_by(round, country, language) %>% summarize(meanqual = mean(measurement_quality)) %>% group_by(round) %>% summarize(maxqual = max(meanqual), minqual = min(meanqual)) %>% mutate(difference = maxqual - minqual) %>% pivot_longer(c(maxqual:difference), names_to = "statistic", values_to = "value") %>% pivot_wider(names_from = round, values_from = value) %>% rename("Round 1" = `1`, "Round 2" = `2`, "Round 3" = `3`, "Round 4" = `4`, "Round 5" = `5`, "Round 6" = `6`, "Round 7" = `7`) # Table 2 - Classify quality estimates according to thresholds defined in Section 4.1 online_appendix %>% unite(cntrylanguage, country, language, sep = "-") %>% group_by(cntrylanguage, round) %>% summarize(round(mean_cl_normal(measurement_quality, conf.int = .95), 2)) %>% mutate(classification = case_when( (y > 0.9) ~ "excellent", (y >= 0.8 & y < 0.9) ~ "good", (y >= 0.7 & y < 0.8) ~ "acceptable", (y >= 0.6 & y < 0.7) ~ "questionable", (y >= 0.5 & y < 0.6) ~ "poor", y < 0.5 ~ "unacceptable")) %>% group_by(classification) %>% count() %>% ungroup() %>% mutate(sum = sum(n), perc = (n/sum)*100)