Funding: This work was supported by a grant awarded by the National Science Centre, Poland (no. 2018/31/B/HS6/00403).
Auguie B (2022). ggflags: Plot flags of the world in ggplot2.. R package version 0.0.2.
Douglas B, Maechler M, Bolker B, Walker S (2015). Fitting Linear Mixed-Effects Models Using lme4. Journal of Statistical Software, 67(1), 1-48. doi:10.18637/jss.v067.i01.
Gohel D (2022). flextable: Functions for Tabular Reporting. R package version 0.7.1, https://CRAN.R-project.org/package=flextable.
Larmarange J (2022). labelled: Manipulating Labelled Data. R package version 2.9.1, https://CRAN.R-project.org/package=labelled.
Lüdecke D (2021). sjPlot: Data Visualization for Statistics in Social Science. R package version 2.8.10, https://CRAN.R-project.org/package=sjPlot.
R Core Team (2022). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/.
Yves R (2012). lavaan: An R Package for Structural Equation Modeling. Journal of Statistical Software, 48(2), 1-36. https://doi.org/10.18637/jss.v048.i02.
Wickham H, Miller E, Smith D (2022). haven: Import and Export ‘SPSS’, ‘Stata’ and ‘SAS’ Files. R package version 2.5.0, https://CRAN.R-project.org/package=haven.
Wickham et al., (2019). Welcome to the tidyverse. Journal of Open Source Software, 4(43), 1686, https://doi.org/10.21105/joss.01686.
R codes for data manipulation and for producing all results are available after clicking “Show” buttons
library(haven)
library(tidyverse)
library(labelled)
library(flextable)
library(lme4)
library(sjPlot)
library(ggflags)
library(lavaan)
library(broom)
# Data manipulations on ESS cumulative data (individual-level data)
#Import ESS data round 4-9
# Download the ESS data files:
# * (1.1) Use ESS Cumulative Data Wizard (https://ess-search.nsd.no/CDW/ConceptVariables) to download ESS1-9 cumulative data set:
# - Select all Variables
# - Select all Rounds/Countries
# - Download data
# Loading data
ESS_cumulative_data <- read_sav("ESS1-9e01_1.zip", user_na = TRUE)
# Selection of countries that participated in each ESS rounds 4-9; Sweden excluded due to inconsistent way of coding age of the interviewer
country_vector <- c("BE","CH","CZ","DE","EE","ES","FI","FR","GB","HU","IE","LT", "NL","NO","PL","PT","SI")
# Data manipulation on an individual-level cumulative dataset
data1 <- ESS_cumulative_data %>%
filter(cntry %in% country_vector,
essround >= 4) %>%
rowid_to_column() %>%
mutate(T_SURVEY_ID = paste0("ESS", essround, "_", cntry),
T_RESPONDENT_ID = paste0("ESS", essround, "_", cntry, "_", idno),
hinctnta_item_nonresp = as.factor(ifelse(hinctnta == 88, "DK",
ifelse(hinctnta == 77, "Refusal",
ifelse(hinctnta == 99, NA_character_, "Response")))),
hinctnta_item_nonresp = fct_relevel(hinctnta_item_nonresp, "Response", after = 0),
hincsrc_recode1 = case_when(hincsrca == 1 ~ "Wages or salaries",
hincsrca == 2 ~ "Income from self-employement or farming",
hincsrca == 3 ~ "Income from self-employement or farming",
hincsrca == 4 ~ "Pensions",
hincsrca == 5 ~ "Unemployement/refundancy benefits or any other social benefits/grants",
hincsrca == 6 ~ "Unemployement/refundancy benefits or any other social benefits/grants",
hincsrca == 7 ~ "Income from investments, savings or other sources",
hincsrca == 8 ~ "Income from self-employement or farming",
is.na(hincsrca) ~ NA_character_),
hincsrc_recode1 = fct_relevel(hincsrc_recode1, c("Wages or salaries",
"Pensions",
"Unemployement/refundancy benefits or any other social benefits/grants",
"Income from investments, savings or other sources",
"Income from self-employement or farming")),
hincsrc_recode2 = case_when(hincsrca == 1 ~ "Wages or salaries",
hincsrca == 2 ~ "Income from self-employement or farming",
hincsrca == 3 ~ "Income from self-employement or farming",
hincsrca == 4 ~ "Pensions",
hincsrca == 5 ~ "Unemployement/refundancy benefits or any other social benefits/grants",
hincsrca == 6 ~ "Unemployement/refundancy benefits or any other social benefits/grants",
hincsrca == 7 ~ "Income from investments, savings or other sources",
hincsrca == 8 ~ "Income from self-employement or farming",
hincsrca == 77 ~ "Refusal",
hincsrca == 88 ~ "DK",
hincsrca == 99 ~ NA_character_),
hincsrc_recode2 = fct_relevel(hincsrc_recode2, c("Wages or salaries",
"Pensions",
"Unemployement/refundancy benefits or any other social benefits/grants",
"Income from investments, savings or other sources",
"Income from self-employement or farming",
"DK",
"Refusal"))) %>%
labelled::remove_labels(user_na_to_na = TRUE) %>%
#This part of syntax uses information on the year of the interview and the year of born of each person in HH and counts number of children under 15
#ESS5: Estonia, Data on year of interview are not available; for further details please see item 46 in the Country reports in the ESS5-2010 Documentation Report.
#As fieldwork period for ESS round 5 in Estonia was 10.10.2010 - 28.05.2011; we assumed the year of the interview was 2011; this increased the age for some part of Estonian sample
mutate(inwyye = ifelse(cntry == "EE" & essround == 5, 2011, inwyye),
agea2 = ifelse (!is.na(yrbrn2) & !is.na(inwyye), inwyye - yrbrn2, NA),
agea3 = ifelse (!is.na(yrbrn3) & !is.na(inwyye), inwyye - yrbrn3, NA),
agea4 = ifelse (!is.na(yrbrn4) & !is.na(inwyye), inwyye - yrbrn4, NA),
agea5 = ifelse (!is.na(yrbrn5) & !is.na(inwyye), inwyye - yrbrn5, NA),
agea6 = ifelse (!is.na(yrbrn6) & !is.na(inwyye), inwyye - yrbrn6, NA),
agea7 = ifelse (!is.na(yrbrn7) & !is.na(inwyye), inwyye - yrbrn7, NA),
agea8 = ifelse (!is.na(yrbrn8) & !is.na(inwyye), inwyye - yrbrn8, NA),
agea9 = ifelse (!is.na(yrbrn9) & !is.na(inwyye), inwyye - yrbrn9, NA),
agea10 = ifelse (!is.na(yrbrn10) & !is.na(inwyye), inwyye - yrbrn10, NA),
agea11 = ifelse (!is.na(yrbrn11) & !is.na(inwyye), inwyye - yrbrn11, NA),
agea12 = ifelse (!is.na(yrbrn12) & !is.na(inwyye), inwyye - yrbrn12, NA),
agea13 = ifelse (!is.na(yrbrn13) & !is.na(inwyye), inwyye - yrbrn13, NA),
agea14 = ifelse (!is.na(yrbrn14) & !is.na(inwyye), inwyye - yrbrn14, NA),
agea15 = ifelse (!is.na(yrbrn15) & !is.na(inwyye), inwyye - yrbrn15, NA),
agea16 = ifelse (!is.na(yrbrn16) & !is.na(inwyye), inwyye - yrbrn16, NA),
agea17 = ifelse (!is.na(yrbrn17) & !is.na(inwyye), inwyye - yrbrn17, NA),
agea18 = ifelse (!is.na(yrbrn18) & !is.na(inwyye), inwyye - yrbrn18, NA),
hhmmb2_under15 = ifelse(is.na(agea2), 0, ifelse(agea2 < 15, 1, 0)),
hhmmb3_under15 = ifelse(is.na(agea3), 0, ifelse(agea3 < 15, 1, 0)),
hhmmb4_under15 = ifelse(is.na(agea4), 0, ifelse(agea4 < 15, 1, 0)),
hhmmb5_under15 = ifelse(is.na(agea5), 0, ifelse(agea5 < 15, 1, 0)),
hhmmb6_under15 = ifelse(is.na(agea6), 0, ifelse(agea6 < 15, 1, 0)),
hhmmb7_under15 = ifelse(is.na(agea7), 0, ifelse(agea7 < 15, 1, 0)),
hhmmb8_under15 = ifelse(is.na(agea8), 0, ifelse(agea8 < 15, 1, 0)),
hhmmb9_under15 = ifelse(is.na(agea9), 0, ifelse(agea9 < 15, 1, 0)),
hhmmb10_under15 = ifelse(is.na(agea10), 0, ifelse(agea10 < 15, 1, 0)),
hhmmb11_under15 = ifelse(is.na(agea11), 0, ifelse(agea11 < 15, 1, 0)),
hhmmb12_under15 = ifelse(is.na(agea12), 0, ifelse(agea12 < 15, 1, 0)),
hhmmb13_under15 = ifelse(is.na(agea13), 0, ifelse(agea13 < 15, 1, 0)),
hhmmb14_under15 = ifelse(is.na(agea14), 0, ifelse(agea14 < 15, 1, 0)),
hhmmb15_under15 = ifelse(is.na(agea15), 0, ifelse(agea15 < 15, 1, 0)),
hhmmb16_under15 = ifelse(is.na(agea16), 0, ifelse(agea16 < 15, 1, 0)),
hhmmb17_under15 = ifelse(is.na(agea17), 0, ifelse(agea17 < 15, 1, 0)),
hhmmb18_under15 = ifelse(is.na(agea18), 0, ifelse(agea18 < 15, 1, 0))) %>%
rowwise() %>%
mutate(hhmmb_under15 = sum(hhmmb2_under15, hhmmb3_under15, hhmmb4_under15, hhmmb5_under15, hhmmb6_under15,
hhmmb7_under15, hhmmb8_under15, hhmmb9_under15, hhmmb10_under15, hhmmb11_under15,
hhmmb12_under15, hhmmb13_under15, hhmmb14_under15, hhmmb15_under15, hhmmb16_under15,
hhmmb17_under15, hhmmb18_under15)) %>%
ungroup() %>%
#Below we use hhmmb_under15 to calculate the number of HH members aged 15+, variable: hhmmb_15plus
mutate(hhmmb_full = ifelse(!is.na(hhmmb), hhmmb-1, NA_integer_),
hhmmb_15plus = ifelse(!is.na(hhmmb), hhmmb - hhmmb_under15 - 1, NA_integer_),
gndr_male = ifelse(gndr == 1, 1,
ifelse(gndr == 2, 0, NA_integer_)),
age_GMC = ifelse(agea>=15 & !is.na(agea), agea - mean(agea, na.rm=T), NA_integer_),
agea = ifelse(agea>=15 & !is.na(agea), agea, NA_integer_),
age_2 = ifelse(agea>=15 & agea<=30, "Up to 30",
ifelse(agea>30 & agea<=40, "31-40",
ifelse(agea>40 & agea<=50, "41-50",
ifelse(agea>50 & agea<=60, "51-60",
ifelse(agea>60 & agea<=123, "61 and older", NA_character_))))),
age_2 = fct_relevel(age_2, "Up to 30", after = 0),
education = case_when((essround == 9 & edulvlb %in% c(0, 113, 129)) ~ "Less than lower secondary education (ISCED 0-1)",
(essround == 9 & edulvlb %in% c(212, 213, 222, 229)) ~ "Lower secondary education completed (ISCED 2)",
(essround == 9 & edulvlb %in% c(311, 312, 313, 321, 322, 323)) ~ "Upper secondary education completed (ISCED 3)",
(essround == 9 & edulvlb %in% c(412, 213, 421, 422, 423, 510, 520, 610, 620, 710, 720, 800)) ~ "Post-secondary non-teritary & teritary education completed (ISCED 4-6)",
(essround == 9 & is.na(edulvlb)) ~ NA_character_,
(essround %in% c(4,5,6,7,8) & edulvla == 1) ~ "Less than lower secondary education (ISCED 0-1)",
(essround %in% c(4,5,6,7,8) & edulvla == 2) ~ "Lower secondary education completed (ISCED 2)",
(essround %in% c(4,5,6,7,8) & edulvla == 3) ~ "Upper secondary education completed (ISCED 3)",
(essround %in% c(4,5,6,7,8) & edulvla %in% c(4,5)) ~ "Post-secondary non-teritary & teritary education completed (ISCED 4-6)",
(essround %in% c(4,5,6,7,8) & is.na(edulvla)) ~ "Post-secondary non-teritary & teritary education completed (ISCED 4-6)"),
education = fct_relevel(education, "Upper secondary education completed (ISCED 3)", after = 0)) %>%
#Response style - number of item-nonresponse preceding income question
mutate(ppltrst_item_nonresp = ifelse(is.na(ppltrst), 1, 0),
pplfair_item_nonresp = ifelse(is.na(pplfair), 1, 0),
pplhlp_item_nonresp = ifelse(is.na(pplhlp), 1, 0),
polintr_item_nonresp = ifelse(is.na(polintr), 1, 0),
trstprl_item_nonresp = ifelse(is.na(trstprl), 1, 0),
trstlgl_item_nonresp = ifelse(is.na(trstlgl), 1, 0),
trstplc_item_nonresp = ifelse(is.na(trstplc), 1, 0),
trstplt_item_nonresp = ifelse(is.na(trstplt), 1, 0),
trstprt_item_nonresp = ifelse(is.na(trstprt), 1, 0),
trstep_item_nonresp = ifelse(is.na(trstep), 1, 0),
trstun_item_nonresp = ifelse(is.na(trstun), 1, 0),
vote_item_nonresp = ifelse(is.na(vote), 1, 0),
contplt_item_nonresp = ifelse(is.na(contplt), 1, 0),
wrkprty_item_nonresp = ifelse(is.na(wrkprty), 1, 0),
wrkorg_item_nonresp = ifelse(is.na(wrkorg), 1, 0),
badge_item_nonresp = ifelse(is.na(badge), 1, 0),
sgnptit_item_nonresp = ifelse(is.na(sgnptit), 1, 0),
pbldmn_item_nonresp = ifelse(is.na(pbldmn), 1, 0),
bctprd_item_nonresp = ifelse(is.na(bctprd), 1, 0),
clsprty_item_nonresp = ifelse(is.na(clsprty), 1, 0),
lrscale_item_nonresp = ifelse(is.na(lrscale), 1, 0),
stflife_item_nonresp = ifelse(is.na(stflife), 1, 0),
stfeco_item_nonresp = ifelse(is.na(stfeco), 1, 0),
stfgov_item_nonresp = ifelse(is.na(stfgov), 1, 0),
stfdem_item_nonresp = ifelse(is.na(stfdem), 1, 0),
stfedu_item_nonresp = ifelse(is.na(stfedu), 1, 0),
stfhlth_item_nonresp = ifelse(is.na(stfhlth), 1, 0),
gincdif_item_nonresp = ifelse(is.na(gincdif), 1, 0),
freehms_item_nonresp = ifelse(is.na(freehms), 1, 0),
euftf_item_nonresp = ifelse(is.na(euftf), 1, 0),
imsmetn_item_nonresp = ifelse(is.na(imsmetn), 1, 0),
imdfetn_item_nonresp = ifelse(is.na(imdfetn), 1, 0),
impcntr_item_nonresp = ifelse(is.na(impcntr), 1, 0),
imbgeco_item_nonresp = ifelse(is.na(imbgeco), 1, 0),
imueclt_item_nonresp = ifelse(is.na(imueclt), 1, 0),
imwbcnt_item_nonresp = ifelse(is.na(imwbcnt), 1, 0),
happy_item_nonresp = ifelse(is.na(happy), 1, 0),
sclmeet_item_nonresp = ifelse(is.na(sclmeet), 1, 0),
sclact_item_nonresp = ifelse(is.na(sclact), 1, 0),
crmvct_item_nonresp = ifelse(is.na(crmvct), 1, 0),
aesfdrk_item_nonresp = ifelse(is.na(aesfdrk), 1, 0),
health_item_nonresp = ifelse(is.na(health), 1, 0),
hlthhmp_item_nonresp = ifelse(is.na(hlthhmp), 1, 0),
rlgblg_item_nonresp = ifelse(is.na(rlgblg), 1, 0),
rlgdgr_item_nonresp = ifelse(is.na(rlgdgr), 1, 0),
rlgatnd_item_nonresp = ifelse(is.na(rlgatnd), 1, 0),
pray_item_nonresp = ifelse(is.na(pray), 1, 0),
dscrgrp_item_nonresp = ifelse(is.na(dscrgrp), 1, 0),
hinctnta_inr = ifelse(is.na(hinctnta), 1, 0)) %>%
#Conservation as a proxy of social desirability (based on 21-PVQ Schwartz's basic human values)
rowwise() %>%
mutate(mrat = 7 - mean(c(ipcrtiv,imprich,ipeqopt,ipshabt,impsafe,impdiff,ipfrule,ipudrst,ipmodst,ipgdtim,impfree,iphlppl,ipsuces,ipstrgv,ipadvnt,ipbhprp,iprspot,iplylfr,impenv,imptrad,impfun), na.rm = T),
SEcenter = 7 - mean(c(impsafe, ipstrgv), na.rm = T) - mrat,
COcenter = 7 - mean(c(ipfrule, ipbhprp), na.rm = T) - mrat,
TRcenter = 7 - mean(c(ipmodst, imptrad), na.rm = T) - mrat,
BEcenter = 7 - mean(c(iphlppl, iplylfr), na.rm = T) - mrat,
UNcenter = 7 - mean(c(ipeqopt, ipudrst, impenv), na.rm = T) - mrat,
SDcenter = 7 - mean(c(ipcrtiv, impfree), na.rm = T) - mrat,
STcenter = 7 - mean(c(impdiff, ipadvnt), na.rm = T) - mrat,
HEcenter = 7 - mean(c(ipgdtim, impfun), na.rm = T) - mrat,
ACcenter = 7 - mean(c(ipshabt, ipsuces), na.rm = T) - mrat,
POcenter = 7 - mean(c(imprich, iprspot), na.rm = T) - mrat) %>%
ungroup() %>%
mutate(STcenter_reversed = -1 * STcenter,
HEcentre_reversed = -1 * HEcenter) %>%
rowwise() %>%
mutate(conservation = mean(c(COcenter, SEcenter, STcenter_reversed, HEcentre_reversed), na.rm = T)) %>%
#Social trust index
mutate(social_trust_index = sum(c(ppltrst, pplfair, pplhlp), na.rm = T)) %>%
ungroup()
# Standardized values of conservation and social_trust_index with the implementation of weights
Zconservation <- data1 %>% filter(!is.na(conservation)) %>%
mutate(Zconservation = as.vector(corpcor::wt.scale(conservation, w = anweight))) %>%
select(rowid, Zconservation)
Zsocial_trust_index <- data1 %>% filter(!is.na(social_trust_index)) %>%
mutate(Zsocial_trust_index = as.vector(corpcor::wt.scale(social_trust_index, w = anweight))) %>%
select(rowid, Zsocial_trust_index)
# Add standardized values to cumulative dataset
data2 <- left_join(data1, Zconservation, by = "rowid") %>%
left_join(Zsocial_trust_index, by = "rowid") %>%
select(cntry, cname, cedition, cproddat, cseqno, name, essround, edition, idno, dweight, pspwght, pweight, anweight,
T_SURVEY_ID, T_RESPONDENT_ID, hinctnta_item_nonresp, hincsrc_recode1, hincsrc_recode2, hhmmb_full, hhmmb_15plus, gndr_male,
age_GMC, agea, age_2, education, Zconservation, Zsocial_trust_index, mnactic, pdjobev, pdwrk,
ppltrst_item_nonresp, pplfair_item_nonresp, pplhlp_item_nonresp, polintr_item_nonresp, trstprl_item_nonresp, trstlgl_item_nonresp , trstplc_item_nonresp,
trstplt_item_nonresp, trstprt_item_nonresp, trstep_item_nonresp, trstun_item_nonresp, vote_item_nonresp, contplt_item_nonresp, wrkprty_item_nonresp,
wrkorg_item_nonresp , badge_item_nonresp, sgnptit_item_nonresp, pbldmn_item_nonresp, bctprd_item_nonresp, clsprty_item_nonresp, lrscale_item_nonresp,
stflife_item_nonresp, stfeco_item_nonresp, stfgov_item_nonresp, stfdem_item_nonresp, stfedu_item_nonresp, stfhlth_item_nonresp, gincdif_item_nonresp,
freehms_item_nonresp, euftf_item_nonresp, imsmetn_item_nonresp, imdfetn_item_nonresp, impcntr_item_nonresp, imbgeco_item_nonresp, imueclt_item_nonresp,
imwbcnt_item_nonresp, happy_item_nonresp, sclmeet_item_nonresp, sclact_item_nonresp, crmvct_item_nonresp, aesfdrk_item_nonresp, health_item_nonresp,
hlthhmp_item_nonresp, rlgblg_item_nonresp, rlgdgr_item_nonresp, rlgatnd_item_nonresp, pray_item_nonresp, dscrgrp_item_nonresp,
hinctnta_inr)
# Manipulations on data from the ESS Interviewer's questionnaire (interviewer-level data)
# Part 1: This part works with ESS4 - data from the Interviewer's questionnaire, ed. 3.0
#Data retrieved from https://www.europeansocialsurvey.org/download.html?file=ESS4INTe03&y=2008
#Download and unzip file
#Access to data: 05/05/2021
int_data_r4 <- read_sav("ESS4INTe03.sav", user_na = TRUE) %>%
filter(cntry %in% country_vector) %>%
labelled::remove_labels(user_na_to_na = TRUE) %>%
mutate(T_RESPONDENT_ID = paste0("ESS", essround, "_", cntry, "_", idno),
T_INTERVIEWER_ID = paste0(cntry, essround, "_", intnum),
preintf_recode = ifelse(preintf == 1, 1,
ifelse(preintf == 2, 0, NA_integer_)),
INTgndr_male = ifelse(intgndr == 1, 1,
ifelse(intgndr == 2, 0, NA_integer_)),
INTage = case_when(intage == 1 ~ "Up to 30",
intage == 2 ~ "31-40",
intage == 3 ~ "41-50",
intage == 4 ~ "51-60",
intage == 5 ~ "61 and older",
intage == 6 ~ "61 and older",
is.na(intage) ~ NA_character_),
INTage = fct_relevel(INTage, "Up to 30", after = 0)) %>%
group_by(T_INTERVIEWER_ID) %>%
mutate(INTworkload = n()) %>%
ungroup() %>%
select(T_RESPONDENT_ID, T_INTERVIEWER_ID, INTgndr_male, INTage, INTworkload)
#Part 2: This part works with ESS5 - data from the Interviewer's questionnaire, edition 3.0 (Austria not included)
#Data retrieved from https://www.europeansocialsurvey.org/download.html?file=ESS5INTe03&y=2010
#Download and unzip file
#Access to data: 05/05/2021
int_data_r5 <- read_sav("ESS5INTe03.sav", user_na = TRUE) %>%
filter(cntry %in% country_vector) %>%
labelled::remove_labels(user_na_to_na = TRUE) %>%
mutate(T_RESPONDENT_ID = paste0("ESS", essround, "_", cntry, "_", idno),
T_INTERVIEWER_ID = paste0(cntry, essround, "_", intnum),
preintf_recode = ifelse(preintf == 1, 1,
ifelse(preintf == 2, 0, NA_integer_)),
INTgndr_male = ifelse(intgndr == 1, 1,
ifelse(intgndr == 2, 0, NA_integer_)),
INTage = ifelse(cntry!= "FR" & intagea>=15 & intagea<=30, "Up to 30",
ifelse(cntry!= "FR" & intagea>30 & intagea<=40, "31-40",
ifelse(cntry!= "FR" & intagea>40 & intagea<=50, "41-50",
ifelse(cntry!= "FR" & intagea>50 & intagea<=60, "51-60",
ifelse(cntry!= "FR" & intagea>60 & intagea<=123, "61 and older",
ifelse(cntry == "FR" & intage == 1, "Up to 30",
ifelse(cntry== "FR" & intage == 2, "31-40",
ifelse(cntry == "FR" & intage == 3, "41-50",
ifelse(cntry == "FR" & intage == 4, "51-60",
ifelse(cntry == "FR" & intage == 5, "61 and older",
ifelse(cntry == "FR" & intage == 6, "61 and older", NA_character_))))))))))),
INTage = fct_relevel(INTage, "Up to 30", after = 0)) %>%
group_by(T_INTERVIEWER_ID) %>%
mutate(INTworkload = n()) %>%
ungroup() %>%
select(T_RESPONDENT_ID, T_INTERVIEWER_ID, INTgndr_male, INTage, INTworkload)
#Part 3: This part works with ESS6 - data from the Interviewer's questionnaire, edition 2.1
#Data retrieved from https://www.europeansocialsurvey.org/download.html?file=ESS6INTe02_1&y=2012
#Download and unzip file
#Access to data: 05/05/2021
int_data_r6 <- read_sav("ESS6INTe02_1.sav", user_na = TRUE) %>%
filter(cntry %in% country_vector) %>%
labelled::remove_labels(user_na_to_na = TRUE) %>%
mutate(T_RESPONDENT_ID = paste0("ESS", essround, "_", cntry, "_", idno),
T_INTERVIEWER_ID = paste0(cntry, essround, "_", intnum),
preintf_recode = ifelse(preintf == 1, 1,
ifelse(preintf == 2, 0, NA_integer_)),
INTgndr_male = ifelse(intgndr == 1, 1,
ifelse(intgndr == 2, 0, NA_integer_)),
INTage = ifelse(intagea>=15 & intagea<=30, "Up to 30",
ifelse(intagea>30 & intagea<=40, "31-40",
ifelse(intagea>40 & intagea<=50, "41-50",
ifelse(intagea>50 & intagea<=60, "51-60",
ifelse(intagea>60 & intagea<=123, "61 and older", NA_character_))))),
INTage = fct_relevel(INTage, "Up to 30", after = 0)) %>%
group_by(T_INTERVIEWER_ID) %>%
mutate(INTworkload = n()) %>%
ungroup() %>%
select(T_RESPONDENT_ID, T_INTERVIEWER_ID, INTgndr_male, INTage, INTworkload)
#Part 4: This part works with ESS7 - data from the Interviewer's questionnaire, edition 2.1
#Data retrieved from https://www.europeansocialsurvey.org/download.html?file=ESS7INTe02_1&y=2014
#Download and unzip file
#Access to data: 05/05/2021
int_data_r7 <- read_sav("ESS7INTe02_1.sav", user_na = TRUE) %>%
filter(cntry %in% country_vector) %>%
labelled::remove_labels(user_na_to_na = TRUE) %>%
mutate(T_RESPONDENT_ID = paste0("ESS", essround, "_", cntry, "_", idno),
T_INTERVIEWER_ID = paste0(cntry, essround, "_", intnum),
preintf_recode = ifelse(preintf == 1, 1,
ifelse(preintf == 2, 0, NA_integer_)),
INTgndr_male = ifelse(intgndr == 1, 1,
ifelse(intgndr == 2, 0, NA_integer_)),
INTage = ifelse(intagea>=15 & intagea<=30, "Up to 30",
ifelse(intagea>30 & intagea<=40, "31-40",
ifelse(intagea>40 & intagea<=50, "41-50",
ifelse(intagea>50 & intagea<=60, "51-60",
ifelse(intagea>60 & intagea<=123, "61 and older", NA_character_))))),
INTage = fct_relevel(INTage, "Up to 30", after = 0)) %>%
group_by(T_INTERVIEWER_ID) %>%
mutate(INTworkload = n()) %>%
ungroup() %>%
select(T_RESPONDENT_ID, T_INTERVIEWER_ID, INTgndr_male, INTage, INTworkload)
#Part 5: This part works with ESS8 - data from the Interviewer's questionnaire, edition 2.0
#Data retrieved from https://www.europeansocialsurvey.org/download.html?file=ESS8INTe02&y=2016
#Download and unzip file
#Access to data: 05/05/2021
int_data_r8 <- read_sav("ESS8INTe02.sav", user_na = TRUE) %>%
filter(cntry %in% country_vector) %>%
labelled::remove_labels(user_na_to_na = TRUE) %>%
mutate(T_RESPONDENT_ID = paste0("ESS", essround, "_", cntry, "_", idno),
T_INTERVIEWER_ID = paste0(cntry, essround, "_", intnum),
preintf_recode = ifelse(preintf == 1, 1,
ifelse(preintf == 2, 0, NA_integer_)),
INTgndr_male = ifelse(intgndr == 1, 1,
ifelse(intgndr == 2, 0, NA_integer_)),
INTage = ifelse(intagea>=15 & intagea<=30, "Up to 30",
ifelse(intagea>30 & intagea<=40, "31-40",
ifelse(intagea>40 & intagea<=50, "41-50",
ifelse(intagea>50 & intagea<=60, "51-60",
ifelse(intagea>60 & intagea<=123, "61 and older", NA_character_))))),
INTage = fct_relevel(INTage, "Up to 30", after = 0)) %>%
group_by(T_INTERVIEWER_ID) %>%
mutate(INTworkload = n()) %>%
ungroup() %>%
select(T_RESPONDENT_ID, T_INTERVIEWER_ID, INTgndr_male, INTage, INTworkload)
#Part 6: This part works with ESS9 - data from the Interviewer's questionnaire, edition 3.0
#Data retrieved from https://www.europeansocialsurvey.org/download.html?file=ESS9INTe03&y=2018
#Download and unzip file
#Access to data: 05/05/2021
int_data_r9 <- read_sav("ESS9INTe03.sav", user_na = TRUE) %>%
filter(cntry %in% country_vector) %>%
labelled::remove_labels(user_na_to_na = TRUE) %>%
mutate(T_RESPONDENT_ID = paste0("ESS", essround, "_", cntry, "_", idno),
T_INTERVIEWER_ID = paste0(cntry, essround, "_", intnum),
preintf_recode = ifelse(preintf == 1, 1,
ifelse(preintf == 2, 0, NA_integer_)),
INTgndr_male = ifelse(intgndr == 1, 1,
ifelse(intgndr == 2, 0, NA_integer_)),
INTage = ifelse(intagea>=15 & intagea<=30, "Up to 30",
ifelse(intagea>30 & intagea<=40, "31-40",
ifelse(intagea>40 & intagea<=50, "41-50",
ifelse(intagea>50 & intagea<=60, "51-60",
ifelse(intagea>60 & intagea<=123, "61 and older", NA_character_))))),
INTage = fct_relevel(INTage, "Up to 30", after = 0)) %>%
group_by(T_INTERVIEWER_ID) %>%
mutate(INTworkload = n()) %>%
ungroup() %>%
select(T_RESPONDENT_ID, T_INTERVIEWER_ID, INTgndr_male, INTage, INTworkload)
data3 <- rbind(int_data_r4, int_data_r5, int_data_r6, int_data_r7, int_data_r8, int_data_r9)
# Merging ESS 4-9 cumulative dataset with data from the interviewer's questionnaires
data4 <- left_join(data2, data3, by = "T_RESPONDENT_ID")
# Survey-level data
# Sample bias annd Absolute sample bias: strict approach (see Jabkowski, Cichocki, Kołczyńska 2021),
# i.e., the measure of unit-nonresponse bias based on the internal criterion of representativenes (Kohler 2007)
Internal_criterion <- data1 %>%
filter(hhmmb == 2,
rshipa2 == 1,
gndr != gndr2) %>%
srvyr::as_survey(weights = dweight) %>%
group_by(T_SURVEY_ID) %>%
summarise(prop_women = srvyr::survey_mean(gndr == 2, na.rm = T),
n_subsample = n()) %>%
mutate(Sample_bias = round((prop_women - 0.5)/sqrt(0.25/n_subsample), digits=2),
Abs_sample_bias = round(abs(prop_women - 0.5)/sqrt(0.25/n_subsample), digits=2)) %>%
select(T_SURVEY_ID, Sample_bias, Abs_sample_bias)
# Data manipulations on survey-level dataset with information retrieved from the ESS Survey Documentation Reports
ESS_survey_reports <- read_sav("ESS_4_9_survey_documentation_reports.sav", user_na = TRUE) %>%
labelled::remove_labels(user_na_to_na = TRUE) %>%
mutate(S_sample_selection = case_when(S_Sample_type == 1 ~ "Interviewers not involved in the selction of induviduals",
S_Sample_type %in% c(2,3) ~ "Within household selection performed by the interviewers"),
S_Fieldwok_agency_changed = case_when(S_Fieldwok_agency_changed == 0 ~ "No",
S_Fieldwok_agency_changed == 1 ~ "Yes"),
S_Mode = case_when(S_Mode == 0 ~ "PAPI",
S_Mode == 1 ~ "CAPI"),
S_Experienced_interviewers = round(ifelse((S_Experienced_interviewers/S_Number_of_interviewers)>1, 1, S_Experienced_interviewers/S_Number_of_interviewers), digits=3),
S_Trained_interviewers = round(ifelse((S_Interviewers_briefing/S_Number_of_interviewers)>1, 1, S_Interviewers_briefing/S_Number_of_interviewers), digits=3)) %>%
select(T_SURVEY_ID, S_RR_value, S_sample_selection, S_Fieldwok_agency_changed, S_Mode, S_Experienced_interviewers, S_Trained_interviewers)
# Merge Internal_criterion data with ESS_survey_reports data
data6 <- left_join(ESS_survey_reports, Internal_criterion, by = "T_SURVEY_ID") %>%
select(T_SURVEY_ID, S_RR_value, Sample_bias, Abs_sample_bias, S_sample_selection, S_Fieldwok_agency_changed, S_Mode, S_Experienced_interviewers, S_Trained_interviewers)
# Country-level data
#Geert Hofstede's Cultural Dimensions Theory Data
#Power distance index (PDI): The power distance index is defined as “the extent to which the less powerful members of organizations and institutions (like the family) accept and expect that power is distributed unequally.”
#Individualism vs. collectivism (IDV): This index explores the “degree to which people in a society are integrated into groups.”
#Uncertainty avoidance index (UAI): The uncertainty avoidance index is defined as “a society's tolerance for ambiguity,” in which people embrace or avert an event of something unexpected, unknown, or away from the status quo.
#Masculinity vs. femininity (MAS): In this dimension, masculinity is defined as “a preference in society for achievement, heroism, assertiveness and material rewards for success.”
#Long-term orientation vs. short-term orientation (LTO): This dimension associates the connection of the past with the current and future actions/challenges.
#Indulgence vs. restraint (IND): This dimension is essentially a measure of happiness; whether or not simple joys are fulfilled.
# Cumulative dataset for analysis (merged: individual, interviewer, and survey level data)
Hofstede_data <- read_csv("6-dimensions-for-website-2015-12-08-0-100.csv") %>%
mutate(cntry = countrycode::countrycode(country, origin = "country.name", destination = "iso2c")) %>%
filter(cntry %in% country_vector) %>%
distinct(cntry, .keep_all= TRUE) %>%
select(cntry, pdi, idv, mas, uai, lto = ltowvs, ind = ivr) %>%
mutate(across(pdi:ind, as.double)) %>%
mutate(across(pdi:ind, ~ round(. - mean(.), digits = 1), .names = "{.col}_GMC"))
# Size of the Shadow Economy of the 27 EU-Countries + United Kingdom (up to 2020) over 2003 – 2022 (in % of official GDP)
# source: https://www.europarl.europa.eu/RegData/etudes/STUD/2022/734007/IPOL_STU(2022)734007_EN.pdf
Shadow_economy <- read_csv("Sadow_economy_Europe.csv") %>%
mutate(cntry = countrycode::countrycode(Cntry, origin = "country.name", destination = "iso2c")) %>%
filter(cntry %in% country_vector) %>%
select(cntry, `2008`, `2010`, `2012`, `2014`, `2016`, `2018`) %>%
pivot_longer(cols = !cntry, names_to = "year", values_to = "shadow_econ_size") %>%
mutate(T_SURVEY = case_when(year == 2008 ~ "ESS4",
year == 2010 ~ "ESS5",
year == 2012 ~ "ESS6",
year == 2014 ~ "ESS7",
year == 2016 ~ "ESS8",
year == 2018 ~ "ESS9"),
T_SURVEY_ID = paste0(T_SURVEY, "_", cntry)) %>%
group_by(T_SURVEY) %>%
mutate(shadow_econ_size_GMC = shadow_econ_size - mean(shadow_econ_size)) %>%
select(T_SURVEY_ID, cntry, shadow_econ_size, shadow_econ_size_GMC)
# Measure: PISA Mathematics Scale: Overall Mathematics
# NOTE: The Reading, Mathematics and Science scale ranges from 0 to 1000. Some apparent differences between estimates may not be statistically significant.
# Mathematical performance, for PISA, measures the mathematical literacy of a 15 year-old to formulate, employ and interpret mathematics in a variety of contexts to describe, predict and explain phenomena, recognising the role that mathematics plays in the world. The mean score is the measure. A mathematically literate student recognises the role that mathematics plays in the world in order to make well-founded judgments and decisions needed by constructive, engaged and reflective citizens.
# source: https://data.oecd.org/pisa/mathematics-performance-pisa.htm#indicator-chart
PISA_Mathematics_Scale <- read_csv("DP_LIVE_16082023192238961.csv") %>%
select(cntry = LOCATION, year = TIME, PISA_math = Value) %>%
mutate(cntry = countrycode::countrycode(cntry, origin = "iso3c", destination = "iso2c")) %>%
filter(cntry %in% country_vector) %>%
group_by(cntry) %>%
summarise(PISA_math_mean_score = mean(PISA_math, na.rm = T)) %>%
mutate(PISA_math_mean_score_GMC = PISA_math_mean_score - mean(PISA_math_mean_score))
# Merge data for analysis
Data_for_analysis <- left_join(data4, data6, by = "T_SURVEY_ID") %>%
left_join(., Hofstede_data, by = "cntry") %>%
left_join(., PISA_Mathematics_Scale, by = "cntry") %>%
left_join(., Shadow_economy, by = "T_SURVEY_ID") %>%
rename(cntry = `cntry.x`)
Figure B1. Net sample sizes in each of the individual national surveys encompassed by the analysis
# Fig 1. Net sample sizes in each of the individual national surveys encompassed by the analysis
Figure1 <- ESS_cumulative_data %>%
filter(cntry %in% country_vector,
essround >= 4) %>%
mutate(Year = case_when(essround == 4 ~ 2008,
essround == 5 ~ 2010,
essround == 6 ~ 2012,
essround == 7 ~ 2014,
essround == 8 ~ 2016,
essround == 9 ~ 2018),
T_SURVEY_ID = paste0("ESS", essround, "_", cntry)) %>%
filter(T_SURVEY_ID != "ESS5_PT" & T_SURVEY_ID != "ESS7_EE" & T_SURVEY_ID != "ESS4_LT") %>%
group_by(cntry, Year) %>%
count() %>%
mutate(cntry_lower = tolower(cntry),
cntry = countrycode::countrycode(cntry, origin = "iso2c", destination = "country.name")) %>%
ggplot(aes(x = cntry, y = Year, label = n)) +
geom_label(color = "blue4", fill = "aliceblue") +
geom_flag(aes(y=2007.2, country = cntry_lower)) +
scale_y_continuous(breaks=c(2008, 2010, 2012, 2014, 2016, 2018)) +
coord_flip() +
xlab("") +
ylab("ESS round / year") +
annotate(geom = "text", colour = "red", x = 9, y = 2008, label = "excluded", hjust = 0.5, vjust = 0.5, size = 3.5) +
annotate(geom = "text", colour = "red", x = 13, y = 2010, label = "excluded", hjust = 0.5, vjust = 0.5, size = 3.5) +
annotate(geom = "text", colour = "red", x = 3, y = 2014, label = "excluded", hjust = 0.5, vjust = 0.5, size = 3.5) +
theme_bw() +
theme(legend.position = "none",
plot.title = element_text(size = 10, face = "bold"),
axis.text = element_text(color = "black", size = 10),
axis.title = element_text(color = "black", size = 10),
panel.grid.minor.x = element_blank())
Figure1
rm(ESS_cumulative_data)
Notes:
1) ESS4-Lithuania - missing data as no design
weights provided
2) ESS5-Portugal - missing data due to the
inconsistent way of coding HH total net income
3) ESS7-Estonia -
missing data as the income categories used in the questionnaire do not
represent the Estonian income deciles
This R code replicate Table 1 from the paper
Sample_size_total <- Data_for_analysis %>%
filter(T_SURVEY_ID != "ESS5_PT" & T_SURVEY_ID != "ESS7_EE" & T_SURVEY_ID != "ESS4_LT" & !is.na(hinctnta_item_nonresp)) %>%
nrow()
# Table 2 - Descriptive statistics for individual-level data
Table2 <- Data_for_analysis %>%
filter(T_SURVEY_ID != "ESS5_PT" & T_SURVEY_ID != "ESS7_EE" & T_SURVEY_ID != "ESS4_LT" & !is.na(hinctnta_item_nonresp)) %>%
mutate(education = fct_relevel(education, c("Less than lower secondary education (ISCED 0-1)",
"Lower secondary education completed (ISCED 2)",
"Upper secondary education completed (ISCED 3)",
"Post-secondary non-teritary & teritary education completed (ISCED 4-6)")),
gndr_male = ifelse(gndr_male == 1, "Male", ifelse(is.na(gndr_male), NA_character_, "Female"))) %>%
vtable::sumtable(vars = c("hinctnta_item_nonresp",
"hincsrc_recode2",
"hhmmb_15plus",
"Zconservation",
"Zsocial_trust_index",
"gndr_male",
"agea",
"education"),
labels = c("HH total net income:",
"Main source of income:",
"HH size (number of people 15+)",
"Conservation (proxy of social desirability)",
"Social trust index",
"Gender of respondent",
"Age of respondent",
"Level of education:"),
summ.names = c('Missing_cases','Mean','SD','Min', "Q1", "Q3", 'Max'),
group.weights = "anweight",
factor.counts = FALSE,
factor.percent = FALSE,
digits = 4,
out='return') %>%
select(-"Missing_cases") %>%
mutate(
Mean = round(as.numeric(Mean), 3),
SD = round(as.numeric(SD), 3),
Min = round(as.numeric(Min), 2),
Max = round(as.numeric(Max), 2)) %>%
rename("Mean /\nProportion" = "Mean") %>%
select(-Q1, -Q3) %>%
qflextable() %>%
align(j=2:5, align = "center", part="all")
Table2
Variable | Mean / | SD | Min | Max |
---|---|---|---|---|
HH total net income: | ||||
... Response | 0.812 | |||
... DK | 0.087 | |||
... Refusal | 0.101 | |||
Main source of income: | ||||
... Wages or salaries | 0.590 | |||
... Pensions | 0.244 | |||
... Unemployement/refundancy benefits or any other social benefits/grants | 0.054 | |||
... Income from investments, savings or other sources | 0.007 | |||
... Income from self-employement or farming | 0.088 | |||
... DK | 0.010 | |||
... Refusal | 0.007 | |||
HH size (number of people 15+) | 1.407 | 1.110 | 0.00 | 18.00 |
Conservation (proxy of social desirability) | 0.000 | 1.001 | -4.45 | 4.48 |
Social trust index | 0.003 | 0.999 | -2.92 | 2.73 |
Gender of respondent | ||||
... Female | 0.515 | |||
... Male | 0.485 | |||
Age of respondent | 47.280 | 18.840 | 15.00 | 123.00 |
Level of education: | ||||
... Less than lower secondary education (ISCED 0-1) | 0.134 | |||
... Lower secondary education completed (ISCED 2) | 0.207 | |||
... Upper secondary education completed (ISCED 3) | 0.342 | |||
... Post-secondary non-teritary & teritary education completed (ISCED 4-6) | 0.318 |
Note: N = 189,220 - total number of respondents
Table B1.1. MG-CFA analysis verifying cross-country measurement invariance of the latent variable measuring conservation
#Latent model for conservation
data_for_measurement_invaraince_check <- data1 %>%
filter(T_SURVEY_ID != "ESS5_PT" & T_SURVEY_ID != "ESS7_EE" & T_SURVEY_ID != "ESS4_LT" & !is.na(hinctnta_item_nonresp))
Conservation_latent_model <- "SE_latent =~ impsafe + ipstrgv
CO_latent =~ ipfrule + ipbhprp
HE_latent =~ ipgdtim + impfun
ST_latent =~ impdiff + ipadvnt
SE_latent ~~ CO_latent
SE_latent ~~ HE_latent
SE_latent ~~ ST_latent
CO_latent ~~ HE_latent
CO_latent ~~ ST_latent
HE_latent ~~ ST_latent"
#configural invariance: the same factor structure is imposed on all groups
fit1a <- cfa(Conservation_latent_model,
data = data_for_measurement_invaraince_check,
sampling.weights = "anweight",
group = "cntry")
#weak/metric invariance: the factor loadings are constrained to be equal across groups
fit2a <- cfa(Conservation_latent_model,
data = data_for_measurement_invaraince_check,
sampling.weights = "anweight",
group = "cntry",
group.equal = c("loadings"))
#strong/scalar invariance: the factor loadings and intercepts are constrained to be equal across groups
fit3a <- cfa(Conservation_latent_model,
data = data_for_measurement_invaraince_check,
sampling.weights = "anweight",
group = "cntry",
group.equal = c("intercepts","loadings"))
fit1_conservation <- fitmeasures(fit1a, c("chisq", "df", "pvalue", "cfi", "rmsea", "rmsea.ci.lower", "rmsea.ci.upper"))
fit2_conservation <- fitmeasures(fit2a, c("chisq", "df", "pvalue", "cfi", "rmsea", "rmsea.ci.lower", "rmsea.ci.upper"))
fit3_conservation <- fitmeasures(fit3a, c("chisq", "df", "pvalue", "cfi", "rmsea", "rmsea.ci.lower", "rmsea.ci.upper"))
Table3_1 <- rbind(fit1_conservation, fit2_conservation, fit3_conservation) %>% as_tibble() %>%
add_column(invariance = c("Configural", "Metric / weak", "Scalar / strong"), .before = "chisq") %>%
mutate(chisq = round(chisq, digits = 3),
pvalue = round(pvalue, digits = 3),
cfi = round(cfi, digits = 3),
rmsea = round(rmsea, digits = 3),
rmsea.ci.lower = round(rmsea.ci.lower, digits = 3),
rmsea.ci.upper = round(rmsea.ci.upper, digits = 3)) %>%
qflextable() %>%
align(j=2:8, align = "center", part="all")
Table3_1
invariance | chisq | df | pvalue | cfi | rmsea | rmsea.ci.lower | rmsea.ci.upper |
---|---|---|---|---|---|---|---|
Configural | 12,241.64 | 238 | 0 | 0.957 | 0.069 | 0.068 | 0.070 |
Metric / weak | 13,955.19 | 302 | 0 | 0.951 | 0.065 | 0.064 | 0.066 |
Scalar / strong | 51,301.48 | 366 | 0 | 0.816 | 0.114 | 0.114 | 0.115 |
Table B1.2. MG-CFA analysis verifying cross-country measurement invariance of latent variable measuring social trust
#Latent model for social trust index
Social_trust_latent_model <- "social_trust_latent =~ ppltrst + pplfair + pplhlp"
#configural invariance: the same factor structure is imposed on all groups
fit1b <- cfa(Social_trust_latent_model,
data = data_for_measurement_invaraince_check,
sampling.weights = "anweight",
group = "cntry")
#weak/metric invariance: the factor loadings are constrained to be equal across groups
fit2b <- cfa(Social_trust_latent_model,
data = data_for_measurement_invaraince_check,
sampling.weights = "anweight",
group = "cntry",
group.equal = c("loadings"))
#strong/scalar invariance: the factor loadings and intercepts are constrained to be equal across groups
fit3b <- cfa(Social_trust_latent_model,
data = data_for_measurement_invaraince_check,
sampling.weights = "anweight",
group = "cntry",
group.equal = c("intercepts","loadings"))
fit1_social_trust <- fitmeasures(fit1b, c("chisq", "df", "pvalue", "cfi", "rmsea", "rmsea.ci.lower", "rmsea.ci.upper"))
fit2_social_trust <- fitmeasures(fit2b, c("chisq", "df", "pvalue", "cfi", "rmsea", "rmsea.ci.lower", "rmsea.ci.upper"))
fit3_social_trust <- fitmeasures(fit3b, c("chisq", "df", "pvalue", "cfi", "rmsea", "rmsea.ci.lower", "rmsea.ci.upper"))
Table3_2 <- rbind(fit1_social_trust, fit2_social_trust, fit3_social_trust) %>% as_tibble() %>%
add_column(invariance = c("Configural", "Metric / weak", "Scalar / strong"), .before = "chisq") %>%
mutate(chisq = round(chisq, digits = 3),
pvalue = round(pvalue, digits = 3),
cfi = round(cfi, digits = 3),
rmsea = round(rmsea, digits = 3),
rmsea.ci.lower = round(rmsea.ci.lower, digits = 3),
rmsea.ci.upper = round(rmsea.ci.upper, digits = 3)) %>%
qflextable() %>%
align(j=2:8, align = "center", part="all")
Table3_2
invariance | chisq | df | pvalue | cfi | rmsea | rmsea.ci.lower | rmsea.ci.upper |
---|---|---|---|---|---|---|---|
Configural | 0.000 | 0 | 1.000 | 0.000 | 0.000 | 0.000 | |
Metric / weak | 466.536 | 32 | 0 | 0.996 | 0.035 | 0.032 | 0.038 |
Scalar / strong | 11,684.480 | 64 | 0 | 0.883 | 0.128 | 0.126 | 0.130 |
rm(data1, data_for_measurement_invaraince_check, fit1a, fit1b, fit2a, fit2b, fit3a, fit3b)
Figure B2. Correlation between PIAAC numeracy scale score and PISA mathematics scale score
# PIAAC data on numeracy
# source: https://doi.org/10.1787/888934020863
PIAAC_PISA_corr_plot <- read_csv("PIAAC_numeracy.csv") %>%
mutate(cntry = countrycode::countrycode(Country, origin = "country.name", destination = "iso2c")) %>%
left_join(., PISA_Mathematics_Scale, by = "cntry") %>%
ggplot(aes(x = PIAAC_numeracy, y = PISA_math_mean_score)) +
geom_smooth(method = "lm") +
geom_label(aes(label = cntry)) +
ggpubr::stat_cor(color = "grey10",
p.accuracy = 0.001,
r.accuracy = 0.01,
label.x.npc = 0.05,
label.y.npc = 0.95) +
labs(x = "PIAAC: numeracy scale score",
y = "PISA: mathematics scale score") +
theme_bw() +
theme(panel.grid.minor.x = element_blank(),
panel.background = element_rect(fill = "white"),
panel.grid.major = element_line(colour = "grey90"),
axis.text = element_text(color = "black", size = 9),
axis.title = element_text(color = "black", size = 10),
strip.background = element_rect(color = "black", fill = "aliceblue"),
strip.text.x = element_text(colour = "black", size = 9))
PIAAC_PISA_corr_plot
This R code replicates Table 2 from the paper
# Table 6 - Descriptive statistics for country-level data
#Masculinity vs. femininity (MAS): In this dimension, masculinity is defined as “a preference in society for achievement, heroism, assertiveness and material rewards for success.”
#Uncertainty avoidance index (UAI): The uncertainty avoidance index is defined as “a society's tolerance for ambiguity,” in which people embrace or avert an event of something unexpected, unknown, or away from the status quo.
Table6 <- Hofstede_data %>%
left_join(., Shadow_economy %>% group_by(cntry) %>% summarise(shadow_econ_size = mean(shadow_econ_size)), by = "cntry") %>%
left_join(., PISA_Mathematics_Scale, by = "cntry") %>%
vtable::sumtable(vars = c("uai", "PISA_math_mean_score", "mas", "shadow_econ_size"),
summ = c('mean(x)','sd(x)','min(x)','max(x)'),
summ.names = c('Mean','SD','Min','Max'),
labels = c("Uncertainty avoidance index (Hofstede's UAI)",
"PISA's Mathematics Scale",
"Masculinity vs. femininity (Hofstede's MAS)",
"Size of the Shadow Economy by Eurostat (in % of official GDP)"),
factor.percent = FALSE,
factor.counts = FALSE,
digits = 3,
fixed.digits = TRUE,
out='return') %>%
rename(`Country level factor` = Variable) %>%
mutate(across(Mean:Max, ~format(round(as.double(.), digits = 1), nsmall = 1))) %>%
qflextable() %>%
align(j=2:5, align = "center", part="all")
Table6
Country level factor | Mean | SD | Min | Max |
---|---|---|---|---|
Uncertainty avoidance index (Hofstede's UAI) | 69.5 | 20.1 | 35.0 | 99.0 |
PISA's Mathematics Scale | 502.0 | 15.0 | 481.0 | 526.0 |
Masculinity vs. femininity (Hofstede's MAS) | 45.0 | 23.6 | 8.0 | 88.0 |
Size of the Shadow Economy by Eurostat (in % of official GDP) | 16.5 | 6.4 | 7.1 | 27.0 |
Table B2. Descriptive statistics for interviewer-level data
# Table 4 - Descriptive statistics for interviewer-level data
Number_of_interviewers <- Data_for_analysis %>%
distinct(T_INTERVIEWER_ID, .keep_all=TRUE) %>%
filter(T_SURVEY_ID != "ESS5_PT" & T_SURVEY_ID != "ESS7_EE" & T_SURVEY_ID != "ESS4_LT"& !is.na(hinctnta_item_nonresp)) %>%
nrow()
Table4 <- Data_for_analysis %>% distinct(T_INTERVIEWER_ID, .keep_all=TRUE) %>%
filter(T_SURVEY_ID != "ESS5_PT" & T_SURVEY_ID != "ESS7_EE" & T_SURVEY_ID != "ESS4_LT"& !is.na(hinctnta_item_nonresp)) %>%
mutate(INTgndr_male = case_when(INTgndr_male == 0 ~ "Female",
INTgndr_male == 1 ~ "Male",
is.na(INTgndr_male) ~ NA_character_)) %>%
vtable::sumtable(vars = c('INTgndr_male','INTage'),
summ = c('notNA(x)','mean(x)','sd(x)','min(x)','max(x)'),
summ.names = c('Missing_cases','Mean','SD','Min','Max'),
labels = c("Gender:",
"Age:"),
factor.percent = FALSE,
factor.counts = FALSE,
digits = 3,
fixed.digits = FALSE,
out='return') %>%
select("Variable", "Mean") %>%
mutate(Mean = round(as.numeric(Mean), 3)) %>%
rename("Proportion" = "Mean") %>%
qflextable() %>%
align(j=2, align = "center", part="all")
Table4
Variable | Proportion |
---|---|
Gender: | |
... Female | 0.652 |
... Male | 0.348 |
Age: | |
... Up to 30 | 0.073 |
... 31-40 | 0.119 |
... 41-50 | 0.211 |
... 51-60 | 0.320 |
... 61 and older | 0.277 |
Note: N = 13,541 - total number of interviewers
Table B3. Descriptive statistics for survey-level data
# Table 5 - Descriptive statistics for survey-level data
Table5 <- Data_for_analysis %>%
distinct(T_SURVEY_ID, .keep_all=TRUE) %>%
filter(T_SURVEY_ID != "ESS5_PT" & T_SURVEY_ID != "ESS7_EE" & T_SURVEY_ID != "ESS4_LT") %>%
vtable::sumtable(vars = c('S_RR_value',"S_Experienced_interviewers", "S_sample_selection"),
summ = c('notNA(x)','mean(x)','sd(x)','min(x)','max(x)'),
summ.names = c('N of country surveys','Mean','SD','Min','Max'),
labels = c("Response rate (RR2)",
"Fraction of experienced interviewers",
"Sampling of the target respondent:"),
factor.percent = FALSE,
factor.counts = FALSE,
digits = 3,
fixed.digits = TRUE,
out='return') %>%
rename("Mean /\nProportion" = "Mean") %>%
qflextable() %>%
align(j=2:6, align = "center", part="all")
Table5
Variable | N of country surveys | Mean / | SD | Min | Max |
---|---|---|---|---|---|
Response rate (RR2) | 99 | 0.565 | 0.107 | 0.276 | 0.771 |
Fraction of experienced interviewers | 99 | 0.896 | 0.18 | 0 | 1 |
Sampling of the target respondent: | 99 | ||||
... Interviewers not involved in the selction of induviduals | 0.606 | ||||
... Within household selection performed by the interviewers | 0.394 |
Note: N = 99 - number of national surveys included in the analysis
This R code replicates Table 3 from the paper
#Table 6. The distribution of item non-response cases across level1 and level2 explanatory variables
Table6 <- Data_for_analysis %>%
filter(T_SURVEY_ID != "ESS5_PT" & T_SURVEY_ID != "ESS7_EE" & T_SURVEY_ID != "ESS4_LT" & !is.na(hinctnta_item_nonresp)) %>%
mutate(inr1 = ifelse(is.na(hincsrc_recode2), 1, 0),
inr2 = ifelse(is.na(education), 1, 0),
inr4 = ifelse(is.na(hhmmb_15plus), 1, 0),
inr5 = ifelse(is.na(Zconservation), 1, 0),
inr6 = ifelse(is.na(Zsocial_trust_index), 1, 0),
inr7 = ifelse(is.na(gndr_male), 1, 0),
inr8 = ifelse(is.na(agea), 1, 0),
inr9 = ifelse(is.na(INTgndr_male), 1, 0),
inr10 = ifelse(is.na(INTage), 1, 0),
inr11 = ifelse((is.na(hincsrc_recode2) |
is.na(education) |
is.na(hhmmb_15plus) |
is.na(Zconservation) |
is.na(Zsocial_trust_index) |
is.na(gndr_male) |
is.na(agea) |
is.na(INTgndr_male) |
is.na(INTage)), 1, 0)) %>%
summarise(inr1_n = sum(inr1),
inr2_n = sum(inr2),
inr4_n = sum(inr4),
inr5_n = sum(inr5),
inr6_n = sum(inr6),
inr7_n = sum(inr7),
inr8_n = sum(inr8),
inr9_n = sum(inr9),
inr10_n = sum(inr10),
inr11_n = sum(inr11),
inr1_mean = round(mean(inr1), digits = 3),
inr2_mean = round(mean(inr2), digits = 3),
inr4_mean = round(mean(inr4), digits = 3),
inr5_mean = round(mean(inr5), digits = 3),
inr6_mean = round(mean(inr6), digits = 3),
inr7_mean = round(mean(inr7), digits = 3),
inr8_mean = round(mean(inr8), digits = 3),
inr9_mean = round(mean(inr9), digits = 3),
inr10_mean = round(mean(inr10), digits = 3),
inr11_mean = round(mean(inr11), digits = 3),
cases_n = n()) %>%
pivot_longer(names_to = c("Variables", "summary"), cols = 1:21, names_sep = "_", values_to = "values") %>%
pivot_wider(id_cols = "Variables", names_from = "summary", values_from = "values") %>%
mutate(Variables = case_when(Variables == "inr1" ~ "Main source of income",
Variables == "inr2" ~ "Level of education",
Variables == "inr4" ~ "HH size (number of people 15+)",
Variables == "inr5" ~ "Conservation (proxy of social desirability)",
Variables == "inr6" ~ "Social trust index",
Variables == "inr7" ~ "Gender of respondent",
Variables == "inr8" ~ "Age of respondent",
Variables == "inr9" ~ "Gender of interviewer",
Variables == "inr10" ~ "Age of interviewer",
Variables == "inr11" ~ "Total number of excluded respondents",
Variables == "cases" ~ "Total number of respondents included in analysis")) %>%
rename("N of missing cases" = n,
"Fraction of missing cases" = mean) %>%
qflextable() %>%
align(j=2:3, align = "center", part="all") %>%
hline(i = 9, part = "body") %>%
align(j=1, i=10:11, align = "right")
Table6
Variables | N of missing cases | Fraction of missing cases |
---|---|---|
Main source of income | 413 | 0.002 |
Level of education | 782 | 0.004 |
HH size (number of people 15+) | 184 | 0.001 |
Conservation (proxy of social desirability) | 2,749 | 0.015 |
Social trust index | 0 | 0.000 |
Gender of respondent | 24 | 0.000 |
Age of respondent | 491 | 0.003 |
Gender of interviewer | 659 | 0.003 |
Age of interviewer | 910 | 0.005 |
Total number of excluded respondents | 5,390 | 0.028 |
Total number of respondents included in analysis | 189,220 |
# Multilevel cross-sectional logistic regression analysis
# Data for regression analysis
# Data manipulation on an individual-level cumulative dataset
Income_inr_DKvsResponse <- Data_for_analysis %>%
#Excluding two surveys with inconsistent way of measuring HH total net income
filter(T_SURVEY_ID != "ESS5_PT" & T_SURVEY_ID != "ESS7_EE" & T_SURVEY_ID != "ESS4_LT") %>%
#Excluding cases with missing values on any of the explanatory variable
filter(!is.na(hinctnta_item_nonresp) &
!is.na(hincsrc_recode2) &
!is.na(education) &
!is.na(hhmmb_15plus) &
!is.na(Zconservation) &
!is.na(Zsocial_trust_index) &
!is.na(gndr_male) &
!is.na(agea) &
!is.na(INTgndr_male)&
!is.na(INTage)) %>%
#Excluding "Refusals" on HH total net income
filter(hinctnta_item_nonresp != "Refusal") %>%
mutate(hinctnta_item_nonresp = case_when(hinctnta_item_nonresp == "Response" ~ 0,
hinctnta_item_nonresp == "DK" ~ 1))
Income_inr_RefusalvsResponse <- Data_for_analysis %>%
#Excluding two surveys with inconsistent way of measuring HH total net income
filter(T_SURVEY_ID != "ESS5_PT" & T_SURVEY_ID != "ESS7_EE" & T_SURVEY_ID != "ESS4_LT") %>%
#Excluding cases with missing values on any of the explanatory variable
filter(!is.na(hinctnta_item_nonresp) &
!is.na(hincsrc_recode2) &
!is.na(education) &
!is.na(hhmmb_15plus) &
!is.na(Zconservation) &
!is.na(Zsocial_trust_index) &
!is.na(gndr_male) &
!is.na(agea) &
!is.na(INTgndr_male)&
!is.na(INTage)) %>%
#Excluding "Don't know" on HH total net income
filter(hinctnta_item_nonresp != "DK") %>%
mutate(hinctnta_item_nonresp = case_when(hinctnta_item_nonresp == "Response" ~ 0,
hinctnta_item_nonresp == "Refusal" ~ 1))
Income_inr_RefusalvsDK <- Data_for_analysis %>%
#Excluding two surveys with inconsistent way of measuring HH total net income
filter(T_SURVEY_ID != "ESS5_PT" & T_SURVEY_ID != "ESS7_EE" & T_SURVEY_ID != "ESS4_LT") %>%
#Excluding cases with missing values on any of the explanatory variable
filter(!is.na(hinctnta_item_nonresp) &
!is.na(hincsrc_recode2) &
!is.na(education) &
!is.na(hhmmb_15plus) &
!is.na(Zconservation) &
!is.na(Zsocial_trust_index) &
!is.na(gndr_male) &
!is.na(agea) &
!is.na(INTgndr_male)&
!is.na(INTage)) %>%
#Excluding "Responses" on HH total net income
filter(hinctnta_item_nonresp != "Response") %>%
mutate(hinctnta_item_nonresp = case_when(hinctnta_item_nonresp == "DK" ~ 0,
hinctnta_item_nonresp == "Refusal" ~ 1))
# Multilevel modeling for DK
m0_DK <- glmer(hinctnta_item_nonresp ~
1 +
(1 | T_INTERVIEWER_ID) +
(1 | cntry) +
(1 | essround),
weights = anweight,
data = Income_inr_DKvsResponse,
family= binomial(link = "logit"), nAGQ=1)
m1_DK <- glmer(hinctnta_item_nonresp ~
hhmmb_15plus +
hincsrc_recode2 +
Zconservation +
Zsocial_trust_index +
education +
gndr_male +
agea +
INTgndr_male +
INTage +
S_RR_value +
S_Experienced_interviewers +
S_sample_selection +
uai_GMC +
PISA_math_mean_score_GMC +
(1 | T_INTERVIEWER_ID) +
(1 | cntry) +
(1 | essround),
weights = anweight,
data = Income_inr_DKvsResponse,
family= binomial(link = "logit"), nAGQ=1)
m2_DK <- glmer(hinctnta_item_nonresp ~
hhmmb_15plus +
hincsrc_recode2 +
Zconservation +
Zsocial_trust_index +
education +
gndr_male +
agea +
INTgndr_male +
INTage +
S_RR_value +
S_Experienced_interviewers +
S_sample_selection +
uai_GMC +
PISA_math_mean_score_GMC +
(1 | T_INTERVIEWER_ID) +
(hhmmb_15plus + Zconservation + Zsocial_trust_index | cntry) +
(1 | essround),
weights = anweight,
data = Income_inr_DKvsResponse,
family= binomial(link = "logit"), nAGQ=1)
# Multilevel modeling for Refusals
m0_Ref <- glmer(hinctnta_item_nonresp ~
1 +
(1 | T_INTERVIEWER_ID) +
(1 | cntry) +
(1 | essround),
weights = anweight,
data = Income_inr_RefusalvsResponse,
family= binomial(link = "logit"), nAGQ=1)
m1_Ref <- glmer(hinctnta_item_nonresp ~
hhmmb_15plus +
hincsrc_recode2 +
Zconservation +
Zsocial_trust_index +
education +
gndr_male +
agea +
INTgndr_male +
INTage +
S_RR_value +
S_Experienced_interviewers +
S_sample_selection +
mas_GMC +
shadow_econ_size_GMC +
(1 | T_INTERVIEWER_ID) +
(1 | cntry) +
(1 | essround),
weights = anweight,
data = Income_inr_RefusalvsResponse,
family= binomial(link = "logit"), nAGQ=1)
m2_Ref <- glmer(hinctnta_item_nonresp ~
hhmmb_15plus +
hincsrc_recode2 +
Zconservation +
Zsocial_trust_index +
education +
gndr_male +
agea +
INTgndr_male +
INTage +
S_RR_value +
S_Experienced_interviewers +
S_sample_selection +
mas_GMC +
shadow_econ_size_GMC +
(1 | T_INTERVIEWER_ID) +
(hhmmb_15plus + Zconservation + Zsocial_trust_index | cntry) +
(1 | essround),
weights = anweight,
data = Income_inr_RefusalvsResponse,
family= binomial(link = "logit"), nAGQ=1)
# Multilevel modeling for Ref vs DK
m0_Ref_DK <- glmer(hinctnta_item_nonresp ~
1 +
(1 | T_INTERVIEWER_ID) +
(1 | cntry) +
(1 | essround),
weights = anweight,
data = Income_inr_RefusalvsDK,
family= binomial(link = "logit"), nAGQ=1)
m1_Ref_DK <- glmer(hinctnta_item_nonresp ~
hhmmb_15plus +
hincsrc_recode2 +
Zconservation +
Zsocial_trust_index +
education +
gndr_male +
agea +
INTgndr_male +
INTage +
S_RR_value +
S_Experienced_interviewers +
S_sample_selection +
uai_GMC +
mas_GMC +
shadow_econ_size_GMC +
(1 | T_INTERVIEWER_ID) +
(1 | cntry) +
(1 | essround),
weights = anweight,
data = Income_inr_RefusalvsDK,
family= binomial(link = "logit"), nAGQ=1)
m2_Ref_DK <- glmer(hinctnta_item_nonresp ~
hhmmb_15plus +
hincsrc_recode2 +
Zconservation +
Zsocial_trust_index +
education +
gndr_male +
agea +
INTgndr_male +
INTage +
S_RR_value +
S_Experienced_interviewers +
S_sample_selection +
uai_GMC +
mas_GMC +
shadow_econ_size_GMC +
(1 | T_INTERVIEWER_ID) +
(hhmmb_15plus + Zconservation + Zsocial_trust_index | cntry) +
(1 | essround),
weights = anweight,
data = Income_inr_RefusalvsDK,
family= binomial(link = "logit"), nAGQ=1)
Table B4. Multilevel cross-classified logistic regression predicting the odds of “don’t know” responses (vs. response) in the income question
# Tab model for "Don't know" vs. Response
#Table 7 Models for DK
Table7 <- tab_model(m0_DK, m1_DK, m2_DK,
p.style = "stars",
p.threshold = c(0.05, 0.01, 0.001),
digits = 3,
digits.re = 3,
show.ci = FALSE,
show.se = TRUE,
show.aic = TRUE,
show.loglik = TRUE,
show.re.var = FALSE,
show.icc = FALSE,
show.r2 = FALSE,
pred.labels = c("Intercept",
"HH size (excluding children up to 14 years old)",
"Source of income: Pensions [vs. Wages or salaries]",
"Source of income: Unemployement benefits or any other social benefits [vs. Wages or salaries]",
"Source of income: Income from investments, savings or other sources [vs. Wages or salaries]",
"Source of income: Income from self-employement or farming [vs. Wages or salaries]",
"Source of income: 'Do not know' [vs. Wages or salaries]",
"Source of income: Refusal [vs. Wages or salaries]",
"Conservation (proxy of social desirability)",
"Social trust index",
"Education: Less than lower secondary education (ISCED 0-1) [vs. ISCED 3]",
"Lower secondary education completed (ISCED 2) [vs. ISCED 3]",
"Post-secondary non-teritary & teritary education completed (ISCED 4-6) [vs. ISCED 3]",
"Gender of the respondent [Male = 1]",
"Age of the respondent",
"Gender of the interviewer [Male = 1]",
"Age of the interviewer 31-40 [vs. up to 30]",
"Age of the interviewer 41-50 [vs. up to 30]",
"Age of the interviewer 51-60 [vs. up to 30]",
"Age of the interviewer 61 and older [vs. up to 30]",
"Response rate (RR1)",
"Fraction of experienced interviewers",
"Within household selection performed by the interviewers [vs. individual register sample]",
"Uncertainty avoidance index (Hofstede's UAI)",
"PISA's Mathematics Scale"),
dv.labels = c("Model 1: null model",
"Model 1: fixed slopes",
"Model 1: random slopes"))
Table7
Model 1: null model | Model 1: fixed slopes | Model 1: random slopes | ||||
---|---|---|---|---|---|---|
Predictors | Odds Ratios | std. Error | Odds Ratios | std. Error | Odds Ratios | std. Error |
Intercept | 0.062 *** | 0.009 | 0.030 *** | 0.009 | 0.021 *** | 0.006 |
HH size (excluding children up to 14 years old) | 1.729 *** | 0.018 | 1.755 *** | 0.043 | ||
Source of income: Pensions [vs. Wages or salaries] | 2.463 *** | 0.101 | 2.461 *** | 0.101 | ||
Source of income: Unemployement benefits or any other social benefits [vs. Wages or salaries] | 0.698 *** | 0.038 | 0.706 *** | 0.038 | ||
Source of income: Income from investments, savings or other sources [vs. Wages or salaries] | 3.875 *** | 0.523 | 3.900 *** | 0.525 | ||
Source of income: Income from self-employement or farming [vs. Wages or salaries] | 2.089 *** | 0.077 | 2.104 *** | 0.078 | ||
Source of income: ‘Do not know’ [vs. Wages or salaries] | 21.343 *** | 1.915 | 22.085 *** | 1.984 | ||
Source of income: Refusal [vs. Wages or salaries] | 7.943 *** | 2.047 | 7.916 *** | 2.049 | ||
Conservation (proxy of social desirability) | 0.970 * | 0.013 | 0.973 | 0.021 | ||
Social trust index | 1.016 | 0.013 | 1.028 | 0.016 | ||
Education: Less than lower secondary education (ISCED 0-1) [vs. ISCED 3] | 3.103 *** | 0.118 | 3.145 *** | 0.121 | ||
Lower secondary education completed (ISCED 2) [vs. ISCED 3] | 1.957 *** | 0.060 | 1.982 *** | 0.061 | ||
Post-secondary non-teritary & teritary education completed (ISCED 4-6) [vs. ISCED 3] | 0.654 *** | 0.022 | 0.665 *** | 0.022 | ||
Gender of the respondent [Male = 1] | 0.693 *** | 0.016 | 0.699 *** | 0.016 | ||
Age of the respondent | 0.953 *** | 0.001 | 0.954 *** | 0.001 | ||
Gender of the interviewer [Male = 1] | 0.806 *** | 0.038 | 0.821 *** | 0.038 | ||
Age of the interviewer 31-40 [vs. up to 30] | 1.007 | 0.121 | 1.042 | 0.126 | ||
Age of the interviewer 41-50 [vs. up to 30] | 1.111 | 0.124 | 1.168 | 0.131 | ||
Age of the interviewer 51-60 [vs. up to 30] | 1.166 | 0.126 | 1.236 | 0.135 | ||
Age of the interviewer 61 and older [vs. up to 30] | 1.046 | 0.116 | 1.106 | 0.123 | ||
Response rate (RR1) | 3.479 ** | 1.438 | 4.446 *** | 1.727 | ||
Fraction of experienced interviewers | 1.626 *** | 0.196 | 1.637 *** | 0.197 | ||
Within household selection performed by the interviewers [vs. individual register sample] | 1.459 *** | 0.160 | 1.739 *** | 0.179 | ||
Uncertainty avoidance index (Hofstede’s UAI) | 0.992 | 0.005 | 0.989 ** | 0.004 | ||
PISA’s Mathematics Scale | 0.999 | 0.007 | 1.004 | 0.006 | ||
N | 13285 T_INTERVIEWER_ID | 13285 T_INTERVIEWER_ID | 13285 T_INTERVIEWER_ID | |||
17 cntry | 17 cntry | 17 cntry | ||||
6 essround | 6 essround | 6 essround | ||||
Observations | 164406 | 164406 | 164406 | |||
AIC | 81999.605 | 66960.992 | 66882.261 | |||
log-Likelihood | -40995.803 | -33452.496 | -33404.131 | |||
|
Table B5. Multilevel cross-classified logistic regression predicting the odds of refusals (vs. response) in the income question
# Tab model for Refusal vs. Response
#Table 8 Models for Refusals
Table8 <- tab_model(m0_Ref, m1_Ref, m2_Ref,
p.style = "stars",
p.threshold = c(0.05, 0.01, 0.001),
digits = 3,
digits.re = 3,
show.ci = FALSE,
show.se = TRUE,
show.aic = TRUE,
show.loglik = TRUE,
show.re.var = FALSE,
show.icc = FALSE,
show.r2 = FALSE,
pred.labels = c("Intercept",
"HH size (excluding children up to 14 years old)",
"Source of income: Pensions [vs. Wages or salaries]",
"Source of income: Unemployement benefits or any other social benefits [vs. Wages or salaries]",
"Source of income: Income from investments, savings or other sources [vs. Wages or salaries]",
"Source of income: Income from self-employement or farming [vs. Wages or salaries]",
"Source of income: 'Do not know' [vs. Wages or salaries]",
"Source of income: Refusal [vs. Wages or salaries]",
"Conservation (proxy of social desirability)",
"Social trust index",
"Education: Less than lower secondary education (ISCED 0-1) [vs. ISCED 3]",
"Lower secondary education completed (ISCED 2) [vs. ISCED 3]",
"Post-secondary non-teritary & teritary education completed (ISCED 4-6) [vs. ISCED 3]",
"Gender of the respondent [Male = 1]",
"Age of the respondent",
"Gender of the interviewer [Male = 1]",
"Age of the interviewer 31-40 [vs. up to 30]",
"Age of the interviewer 41-50 [vs. up to 30]",
"Age of the interviewer 51-60 [vs. up to 30]",
"Age of the interviewer 61 and older [vs. up to 30]",
"Response rate (RR1)",
"Fraction of experienced interviewers",
"Within household selection performed by the interviewers [vs. individual register sample]",
"Masculinity vs. femininity [Hofstede's MAS]",
"Size of the Shadow Economy [in % of official GDP]"),
dv.labels = c("Model 2: null model",
"Model 2: fixed slopes",
"Model 2: random slopes"))
Table8
Model 2: null model | Model 2: fixed slopes | Model 2: random slopes | ||||
---|---|---|---|---|---|---|
Predictors | Odds Ratios | std. Error | Odds Ratios | std. Error | Odds Ratios | std. Error |
Intercept | 0.047 *** | 0.014 | 0.007 *** | 0.002 | 0.007 *** | 0.002 |
HH size (excluding children up to 14 years old) | 1.080 *** | 0.011 | 1.076 *** | 0.022 | ||
Source of income: Pensions [vs. Wages or salaries] | 0.888 *** | 0.030 | 0.881 *** | 0.030 | ||
Source of income: Unemployement benefits or any other social benefits [vs. Wages or salaries] | 0.597 *** | 0.035 | 0.582 *** | 0.035 | ||
Source of income: Income from investments, savings or other sources [vs. Wages or salaries] | 1.781 *** | 0.216 | 1.705 *** | 0.209 | ||
Source of income: Income from self-employement or farming [vs. Wages or salaries] | 1.506 *** | 0.055 | 1.475 *** | 0.054 | ||
Source of income: ‘Do not know’ [vs. Wages or salaries] | 2.028 *** | 0.254 | 1.971 *** | 0.247 | ||
Source of income: Refusal [vs. Wages or salaries] | 118.363 *** | 16.604 | 110.229 *** | 15.230 | ||
Conservation (proxy of social desirability) | 1.042 ** | 0.013 | 1.023 | 0.046 | ||
Social trust index | 0.852 *** | 0.010 | 0.848 *** | 0.016 | ||
Education: Less than lower secondary education (ISCED 0-1) [vs. ISCED 3] | 1.167 *** | 0.045 | 1.114 ** | 0.044 | ||
Lower secondary education completed (ISCED 2) [vs. ISCED 3] | 0.994 | 0.030 | 0.994 | 0.031 | ||
Post-secondary non-teritary & teritary education completed (ISCED 4-6) [vs. ISCED 3] | 1.095 *** | 0.030 | 1.096 *** | 0.030 | ||
Gender of the respondent [Male = 1] | 0.874 *** | 0.019 | 0.865 *** | 0.019 | ||
Age of the respondent | 1.013 *** | 0.001 | 1.014 *** | 0.001 | ||
Gender of the interviewer [Male = 1] | 0.848 *** | 0.040 | 0.827 *** | 0.039 | ||
Age of the interviewer 31-40 [vs. up to 30] | 1.341 * | 0.154 | 1.356 ** | 0.156 | ||
Age of the interviewer 41-50 [vs. up to 30] | 1.348 ** | 0.146 | 1.372 ** | 0.148 | ||
Age of the interviewer 51-60 [vs. up to 30] | 1.101 | 0.117 | 1.118 | 0.118 | ||
Age of the interviewer 61 and older [vs. up to 30] | 1.041 | 0.113 | 1.060 | 0.115 | ||
Response rate (RR1) | 2.486 * | 1.068 | 2.653 * | 1.140 | ||
Fraction of experienced interviewers | 1.491 ** | 0.188 | 1.458 ** | 0.183 | ||
Within household selection performed by the interviewers [vs. individual register sample] | 1.434 ** | 0.190 | 1.326 * | 0.178 | ||
Masculinity vs. femininity [Hofstede’s MAS] | 1.029 *** | 0.008 | 1.034 *** | 0.010 | ||
Size of the Shadow Economy [in % of official GDP] | 1.086 *** | 0.024 | 1.083 * | 0.034 | ||
N | 13427 T_INTERVIEWER_ID | 13427 T_INTERVIEWER_ID | 13427 T_INTERVIEWER_ID | |||
17 cntry | 17 cntry | 17 cntry | ||||
6 essround | 6 essround | 6 essround | ||||
Observations | 170522 | 170522 | 170522 | |||
AIC | 79916.920 | 76880.255 | 76734.449 | |||
log-Likelihood | -39954.460 | -38412.128 | -38330.225 | |||
|
Table B6. Multilevel cross-classified logistic regression predicting the odds of refusals (vs. ‘Do not know’) in the income question
# Tab model for Refusal vs. "Don't know"
#Table 9 Models for Refusals vs. "Don't know"
Table9 <- tab_model(m0_Ref_DK, m1_Ref_DK, m2_Ref_DK,
p.style = "stars",
p.threshold = c(0.05, 0.01, 0.001),
digits = 3,
digits.re = 3,
show.ci = FALSE,
show.se = TRUE,
show.aic = TRUE,
show.loglik = TRUE,
show.re.var = FALSE,
show.icc = FALSE,
show.r2 = FALSE,
pred.labels = c("Intercept",
"HH size (excluding children up to 14 years old)",
"Source of income: Pensions [vs. Wages or salaries]",
"Source of income: Unemployement benefits or any other social benefits [vs. Wages or salaries]",
"Source of income: Income from investments, savings or other sources [vs. Wages or salaries]",
"Source of income: Income from self-employement or farming [vs. Wages or salaries]",
"Source of income: 'Do not know' [vs. Wages or salaries]",
"Source of income: Refusal [vs. Wages or salaries]",
"Conservation (proxy of social desirability)",
"Social trust index",
"Education: Less than lower secondary education (ISCED 0-1) [vs. ISCED 3]",
"Lower secondary education completed (ISCED 2) [vs. ISCED 3]",
"Post-secondary non-teritary & teritary education completed (ISCED 4-6) [vs. ISCED 3]",
"Gender of the respondent [Male = 1]",
"Age of the respondent",
"Gender of the interviewer [Male = 1]",
"Age of the interviewer 31-40 [vs. up to 30]",
"Age of the interviewer 41-50 [vs. up to 30]",
"Age of the interviewer 51-60 [vs. up to 30]",
"Age of the interviewer 61 and older [vs. up to 30]",
"Response rate (RR1)",
"Fraction of experienced interviewers",
"Within household selection performed by the interviewers [vs. individual register sample]",
"Uncertainty avoidance index (Hofstede's UAI)",
"Masculinity vs. femininity [Hofstede's MAS]",
"Size of the Shadow Economy [in % of official GDP]"),
dv.labels = c("Model 3: null model",
"Model 3: fixed slopes",
"Model 3: random slopes"))
Table9
Model 3: null model | Model 3: fixed slopes | Model 3: random slopes | ||||
---|---|---|---|---|---|---|
Predictors | Odds Ratios | std. Error | Odds Ratios | std. Error | Odds Ratios | std. Error |
Intercept | 0.837 | 0.225 | 0.265 ** | 0.117 | 0.271 ** | 0.119 |
HH size (excluding children up to 14 years old) | 0.571 *** | 0.011 | 0.565 *** | 0.015 | ||
Source of income: Pensions [vs. Wages or salaries] | 0.452 *** | 0.030 | 0.442 *** | 0.029 | ||
Source of income: Unemployement benefits or any other social benefits [vs. Wages or salaries] | 0.766 * | 0.080 | 0.787 * | 0.082 | ||
Source of income: Income from investments, savings or other sources [vs. Wages or salaries] | 0.509 ** | 0.119 | 0.510 ** | 0.119 | ||
Source of income: Income from self-employement or farming [vs. Wages or salaries] | 0.579 *** | 0.035 | 0.593 *** | 0.036 | ||
Source of income: ‘Do not know’ [vs. Wages or salaries] | 0.109 *** | 0.015 | 0.111 *** | 0.015 | ||
Source of income: Refusal [vs. Wages or salaries] | 33.449 *** | 6.396 | 31.712 *** | 5.970 | ||
Conservation (proxy of social desirability) | 1.075 ** | 0.024 | 1.087 * | 0.043 | ||
Social trust index | 0.857 *** | 0.017 | 0.880 ** | 0.036 | ||
Education: Less than lower secondary education (ISCED 0-1) [vs. ISCED 3] | 0.410 *** | 0.027 | 0.395 *** | 0.027 | ||
Lower secondary education completed (ISCED 2) [vs. ISCED 3] | 0.503 *** | 0.026 | 0.522 *** | 0.027 | ||
Post-secondary non-teritary & teritary education completed (ISCED 4-6) [vs. ISCED 3] | 1.789 *** | 0.096 | 1.828 *** | 0.099 | ||
Gender of the respondent [Male = 1] | 1.346 *** | 0.053 | 1.376 *** | 0.054 | ||
Age of the respondent | 1.060 *** | 0.002 | 1.060 *** | 0.002 | ||
Gender of the interviewer [Male = 1] | 1.090 | 0.073 | 1.142 * | 0.077 | ||
Age of the interviewer 31-40 [vs. up to 30] | 1.396 * | 0.231 | 1.421 * | 0.235 | ||
Age of the interviewer 41-50 [vs. up to 30] | 1.384 * | 0.214 | 1.367 * | 0.211 | ||
Age of the interviewer 51-60 [vs. up to 30] | 1.046 | 0.159 | 1.050 | 0.159 | ||
Age of the interviewer 61 and older [vs. up to 30] | 1.105 | 0.172 | 1.108 | 0.172 | ||
Response rate (RR1) | 0.808 | 0.451 | 0.747 | 0.425 | ||
Fraction of experienced interviewers | 0.966 | 0.164 | 0.939 | 0.159 | ||
Within household selection performed by the interviewers [vs. individual register sample] | 0.751 | 0.138 | 0.779 | 0.139 | ||
Uncertainty avoidance index (Hofstede’s UAI) | 1.010 | 0.013 | 1.005 | 0.010 | ||
Masculinity vs. femininity [Hofstede’s MAS] | 1.021 * | 0.010 | 1.024 ** | 0.009 | ||
Size of the Shadow Economy [in % of official GDP] | 1.082 * | 0.035 | 1.098 ** | 0.033 | ||
N | 8650 T_INTERVIEWER_ID | 8650 T_INTERVIEWER_ID | 8650 T_INTERVIEWER_ID | |||
17 cntry | 17 cntry | 17 cntry | ||||
6 essround | 6 essround | 6 essround | ||||
Observations | 32732 | 32732 | 32732 | |||
AIC | 34523.817 | 26883.397 | 26838.651 | |||
log-Likelihood | -17257.908 | -13412.699 | -13381.325 | |||
|
Table B7. Item nonresponse on ‘household’s total net income’ across countries in the ESS rounds 4-9
#Table 1. Distribution of item nonresponse on 'household's total net income' in the ESS rounds 4-9
Table1 <- Data_for_analysis %>%
filter(!is.na(hinctnta_item_nonresp)) %>%
group_by(cntry, essround) %>%
count(hinctnta_item_nonresp) %>%
pivot_wider(names_from = hinctnta_item_nonresp, values_from = n) %>%
rowwise() %>%
mutate(n=sum(Response, DK, Refusal),
Response = round(Response/n, digits=3),
DK = round(DK/n, digits=3),
Refusal = round(Refusal/n, digits=3)) %>%
select(-n) %>%
qflextable() %>%
align(j=2:5, align = "center", part="all")
Table1
cntry | essround | Response | DK | Refusal |
---|---|---|---|---|
BE | 4 | 0.890 | 0.068 | 0.041 |
BE | 5 | 0.856 | 0.086 | 0.058 |
BE | 6 | 0.912 | 0.086 | 0.002 |
BE | 7 | 0.915 | 0.083 | 0.002 |
BE | 8 | 0.951 | 0.039 | 0.010 |
BE | 9 | 0.922 | 0.059 | 0.019 |
CH | 4 | 0.759 | 0.066 | 0.175 |
CH | 5 | 0.819 | 0.100 | 0.081 |
CH | 6 | 0.829 | 0.084 | 0.088 |
CH | 7 | 0.847 | 0.079 | 0.074 |
CH | 8 | 0.820 | 0.082 | 0.098 |
CH | 9 | 0.780 | 0.101 | 0.119 |
CZ | 4 | 0.727 | 0.059 | 0.214 |
CZ | 5 | 0.721 | 0.066 | 0.213 |
CZ | 6 | 0.733 | 0.054 | 0.213 |
CZ | 7 | 0.723 | 0.060 | 0.217 |
CZ | 8 | 0.761 | 0.061 | 0.178 |
CZ | 9 | 0.674 | 0.078 | 0.248 |
DE | 4 | 0.831 | 0.046 | 0.123 |
DE | 5 | 0.791 | 0.069 | 0.140 |
DE | 6 | 0.863 | 0.058 | 0.079 |
DE | 7 | 0.888 | 0.042 | 0.070 |
DE | 8 | 0.893 | 0.036 | 0.070 |
DE | 9 | 0.885 | 0.043 | 0.071 |
EE | 4 | 0.858 | 0.074 | 0.068 |
EE | 5 | 0.848 | 0.075 | 0.077 |
EE | 6 | 0.828 | 0.079 | 0.093 |
EE | 8 | 0.971 | 0.020 | 0.009 |
EE | 9 | 0.975 | 0.015 | 0.010 |
ES | 4 | 0.630 | 0.153 | 0.217 |
ES | 5 | 0.777 | 0.100 | 0.123 |
ES | 6 | 0.835 | 0.069 | 0.096 |
ES | 7 | 0.790 | 0.082 | 0.128 |
ES | 8 | 0.775 | 0.093 | 0.132 |
ES | 9 | 0.722 | 0.120 | 0.158 |
FI | 4 | 0.918 | 0.067 | 0.015 |
FI | 5 | 0.916 | 0.069 | 0.015 |
FI | 6 | 0.938 | 0.051 | 0.011 |
FI | 7 | 0.930 | 0.057 | 0.012 |
FI | 8 | 0.948 | 0.041 | 0.011 |
FI | 9 | 0.931 | 0.052 | 0.017 |
FR | 4 | 0.900 | 0.045 | 0.055 |
FR | 5 | 0.918 | 0.028 | 0.054 |
FR | 6 | 0.907 | 0.032 | 0.061 |
FR | 7 | 0.938 | 0.023 | 0.038 |
FR | 8 | 0.910 | 0.029 | 0.061 |
FR | 9 | 0.891 | 0.034 | 0.075 |
GB | 4 | 0.854 | 0.080 | 0.066 |
GB | 5 | 0.787 | 0.161 | 0.052 |
GB | 6 | 0.786 | 0.167 | 0.047 |
GB | 7 | 0.840 | 0.078 | 0.082 |
GB | 8 | 0.838 | 0.072 | 0.090 |
GB | 9 | 0.841 | 0.070 | 0.089 |
HU | 4 | 0.737 | 0.054 | 0.209 |
HU | 5 | 0.777 | 0.058 | 0.165 |
HU | 6 | 0.706 | 0.054 | 0.240 |
HU | 7 | 0.711 | 0.055 | 0.234 |
HU | 8 | 0.636 | 0.055 | 0.309 |
HU | 9 | 0.598 | 0.050 | 0.352 |
IE | 4 | 0.883 | 0.061 | 0.057 |
IE | 5 | 0.676 | 0.114 | 0.210 |
IE | 6 | 0.735 | 0.104 | 0.161 |
IE | 7 | 0.803 | 0.062 | 0.135 |
IE | 8 | 0.745 | 0.100 | 0.155 |
IE | 9 | 0.722 | 0.117 | 0.162 |
LT | 5 | 0.785 | 0.066 | 0.149 |
LT | 6 | 0.833 | 0.047 | 0.120 |
LT | 7 | 0.811 | 0.049 | 0.140 |
LT | 8 | 0.843 | 0.056 | 0.101 |
LT | 9 | 0.862 | 0.035 | 0.103 |
NL | 4 | 0.881 | 0.058 | 0.060 |
NL | 5 | 0.810 | 0.091 | 0.098 |
NL | 6 | 0.849 | 0.079 | 0.072 |
NL | 7 | 0.901 | 0.075 | 0.024 |
NL | 8 | 0.884 | 0.086 | 0.030 |
NL | 9 | 0.831 | 0.122 | 0.047 |
NO | 4 | 0.957 | 0.032 | 0.012 |
NO | 5 | 0.954 | 0.034 | 0.012 |
NO | 6 | 0.958 | 0.033 | 0.009 |
NO | 7 | 0.955 | 0.036 | 0.009 |
NO | 8 | 0.948 | 0.047 | 0.005 |
NO | 9 | 0.921 | 0.065 | 0.014 |
PL | 4 | 0.808 | 0.088 | 0.104 |
PL | 5 | 0.755 | 0.111 | 0.134 |
PL | 6 | 0.785 | 0.090 | 0.125 |
PL | 7 | 0.747 | 0.097 | 0.156 |
PL | 8 | 0.755 | 0.084 | 0.161 |
PL | 9 | 0.613 | 0.116 | 0.271 |
PT | 4 | 0.420 | 0.226 | 0.354 |
PT | 6 | 0.471 | 0.157 | 0.371 |
PT | 7 | 0.845 | 0.059 | 0.096 |
PT | 8 | 0.893 | 0.059 | 0.048 |
PT | 9 | 0.804 | 0.078 | 0.118 |
SI | 4 | 0.795 | 0.107 | 0.098 |
SI | 5 | 0.771 | 0.105 | 0.123 |
SI | 6 | 0.746 | 0.118 | 0.136 |
SI | 7 | 0.819 | 0.098 | 0.083 |
SI | 8 | 0.877 | 0.055 | 0.068 |
SI | 9 | 0.874 | 0.057 | 0.069 |
Note: Countries are labelled according to ISO31166-1
Figure B3. Fraction of item nonresponses across core module items in the ESS 4-9
#Figure Z. Fraction of item nonresponses across core module items in the ESS 4-9
FigureZ <- Data_for_analysis %>%
mutate(Year = case_when(essround == 4 ~ 2008,
essround == 5 ~ 2010,
essround == 6 ~ 2012,
essround == 7 ~ 2014,
essround == 8 ~ 2016,
essround == 9 ~ 2018),
T_SURVEY_ID = paste0("ESS", essround, "_", cntry)) %>%
group_by(Year) %>%
summarise(ppltrst_item_nonresp = mean(ppltrst_item_nonresp, na.rm = T),
pplfair_item_nonresp = mean(pplfair_item_nonresp, na.rm = T),
pplhlp_item_nonresp = mean(pplhlp_item_nonresp, na.rm = T),
polintr_item_nonresp = mean(polintr_item_nonresp, na.rm = T),
trstprl_item_nonresp = mean(trstprl_item_nonresp, na.rm = T),
trstlgl_item_nonresp = mean(trstlgl_item_nonresp, na.rm = T),
trstplc_item_nonresp = mean(trstplc_item_nonresp, na.rm = T),
trstplt_item_nonresp = mean(trstplt_item_nonresp, na.rm = T),
trstprt_item_nonresp = mean(trstprt_item_nonresp, na.rm = T),
trstep_item_nonresp = mean(trstep_item_nonresp, na.rm = T),
trstun_item_nonresp = mean(trstun_item_nonresp, na.rm = T),
vote_item_nonresp = mean(vote_item_nonresp, na.rm = T),
contplt_item_nonresp = mean(contplt_item_nonresp, na.rm = T),
wrkprty_item_nonresp = mean(wrkprty_item_nonresp, na.rm = T),
wrkorg_item_nonresp = mean(wrkorg_item_nonresp, na.rm = T),
badge_item_nonresp = mean(badge_item_nonresp, na.rm = T),
sgnptit_item_nonresp = mean(sgnptit_item_nonresp, na.rm = T),
pbldmn_item_nonresp = mean(pbldmn_item_nonresp, na.rm = T),
bctprd_item_nonresp = mean(bctprd_item_nonresp, na.rm = T),
clsprty_item_nonresp = mean(clsprty_item_nonresp, na.rm = T),
lrscale_item_nonresp = mean(lrscale_item_nonresp, na.rm = T),
stflife_item_nonresp = mean(stflife_item_nonresp, na.rm = T),
stfeco_item_nonresp = mean(stfeco_item_nonresp, na.rm = T),
stfgov_item_nonresp = mean(stfgov_item_nonresp, na.rm = T),
stfdem_item_nonresp = mean(stfdem_item_nonresp, na.rm = T),
stfedu_item_nonresp = mean(stfedu_item_nonresp, na.rm = T),
stfhlth_item_nonresp = mean(stfhlth_item_nonresp, na.rm = T),
gincdif_item_nonresp = mean(gincdif_item_nonresp, na.rm = T),
freehms_item_nonresp = mean(freehms_item_nonresp, na.rm = T),
euftf_item_nonresp = mean(euftf_item_nonresp, na.rm = T),
imsmetn_item_nonresp = mean(imsmetn_item_nonresp, na.rm = T),
imdfetn_item_nonresp = mean(imdfetn_item_nonresp, na.rm = T),
impcntr_item_nonresp = mean(impcntr_item_nonresp, na.rm = T),
imbgeco_item_nonresp = mean(imbgeco_item_nonresp, na.rm = T),
imueclt_item_nonresp = mean(imueclt_item_nonresp, na.rm = T),
imwbcnt_item_nonresp = mean(imwbcnt_item_nonresp, na.rm = T),
happy_item_nonresp = mean(happy_item_nonresp, na.rm = T),
sclmeet_item_nonresp = mean(sclmeet_item_nonresp, na.rm = T),
sclact_item_nonresp = mean(sclact_item_nonresp, na.rm = T),
crmvct_item_nonresp = mean(crmvct_item_nonresp, na.rm = T),
aesfdrk_item_nonresp = mean(aesfdrk_item_nonresp, na.rm = T),
health_item_nonresp = mean(health_item_nonresp, na.rm = T),
hlthhmp_item_nonresp = mean(hlthhmp_item_nonresp, na.rm = T),
rlgblg_item_nonresp = mean(rlgblg_item_nonresp, na.rm = T),
rlgdgr_item_nonresp = mean(rlgdgr_item_nonresp, na.rm = T),
rlgatnd_item_nonresp = mean(rlgatnd_item_nonresp, na.rm = T),
pray_item_nonresp = mean(pray_item_nonresp, na.rm = T),
dscrgrp_item_nonresp = mean(dscrgrp_item_nonresp, na.rm = T),
hinctnta_item_nonresp = mean(hinctnta_inr, na.rm = T)) %>%
pivot_longer(cols = !Year, names_to = "variable", values_to = "inr_rate") %>%
mutate(color = ifelse(variable == "hinctnta_item_nonresp", "1", "0"),
inr_rate = ifelse(inr_rate == 1, NA_real_, inr_rate)) %>%
ggplot(aes(x = Year, y = inr_rate)) +
geom_boxplot(aes(group = Year), width = 0.75, notch = T, outlier.shape = NA) +
geom_jitter(aes(colour = color, fill = color), alpha = 0.7, width = 0.15) +
scale_color_manual(values = c("grey60", "blue4")) +
scale_x_continuous(breaks=c(2008, 2010, 2012, 2014, 2016, 2018)) +
ylab("Fraction of item nonresponses\nacross the ESS core module items") +
xlab("ESS round / year") +
labs(caption = "Note: Blue dot indicate 'HH total net income' question") +
guides(color = F,
fill = F) +
theme_bw() +
theme(legend.position = "none",
plot.title = element_text(size = 10, face = "bold"),
axis.text = element_text(color = "black", size = 10),
axis.title = element_text(color = "black", size = 10),
panel.grid.minor.x = element_blank())
FigureZ
This R code replicates Figure 1 from the paper
#Fig2. Fraction of item-nonresponses in the ESS question on HH's total net income
Figure2 <- Data_for_analysis %>%
filter(!is.na(hinctnta_item_nonresp)) %>%
group_by(cntry, essround) %>%
count(hinctnta_item_nonresp) %>%
pivot_wider(names_from = hinctnta_item_nonresp, values_from = n) %>%
rowwise() %>%
mutate(n=sum(Response, DK, Refusal),
Response = round(Response/n, digits=3),
DK = as.numeric(round(DK/n, digits=3)),
Refusal = as.numeric(round(Refusal/n, digits=3)),
Year = case_when(essround == 4 ~ 2008,
essround == 5 ~ 2010,
essround == 6 ~ 2012,
essround == 7 ~ 2014,
essround == 8 ~ 2016,
essround == 9 ~ 2018),
cntry = countrycode::countrycode(cntry,
origin = "iso2c",
destination = "country.name")) %>%
select(cntry, essround, Year, DK, Refusal) %>%
pivot_longer(cols = 4:5, names_to = "inr_type", values_to = "prop") %>%
ggplot(., aes(x = Year, y = prop, alluvium = inr_type)) +
ggalluvial::geom_alluvium(aes(fill = inr_type, colour = inr_type),
alpha = .5, decreasing = FALSE, curve_type = "sigmoid") +
scale_x_continuous(breaks = seq(2008,2018, 2)) +
scale_color_manual(name = "Type of item nonresponse", values = c("#1380A1","#588300")) +
scale_fill_manual(name = "Type of item nonresponse", values = c("#1380A1","#588300")) +
ylab("Cumulative fraction of item nonresponse in the ESS question on HH's total net income") +
ylim(0, 0.7) +
xlab("ESS round") +
theme_bw() +
theme(legend.position = c(0.85,0.07),
panel.grid.minor.x = element_blank(),
panel.background = element_rect(fill = "white"),
panel.grid.major = element_line(colour = "grey90"),
axis.text = element_text(color = "black", size = 9),
axis.title = element_text(color = "black", size = 11),
strip.background = element_rect(color = "black", fill = "aliceblue"),
strip.text.x = element_text(colour = "black", size = 9)) +
facet_wrap("cntry", ncol = 3)
Figure2
Table B8. ICC values derived from null models
# Table 10. ICC derived from null models
model_summary_m0_DK <- summary(m0_DK)
variance_interviewer <- model_summary_m0_DK$varcor$T_INTERVIEWER_ID
variance_cntry <- model_summary_m0_DK$varcor$cntry
variance_essround <- model_summary_m0_DK$varcor$essround
variance_residual <- 3.29
icc_values_m0_DK_interviewer <- format(as.double(round(variance_interviewer / (variance_interviewer + variance_cntry + variance_essround + variance_residual), digits = 3)), nsmall=3)
icc_values_m0_DK_cntry <- format(as.double(round(variance_cntry / (variance_interviewer + variance_cntry + variance_essround + variance_residual), digits = 3)), nsmall=3)
icc_values_m0_DK_essround <- format(as.double(round(variance_essround / (variance_interviewer + variance_cntry + variance_essround + variance_residual), digits = 3)), nsmall=3)
model_summary_m0_Ref <- summary(m0_Ref)
variance_interviewer <- model_summary_m0_Ref$varcor$T_INTERVIEWER_ID
variance_cntry <- model_summary_m0_Ref$varcor$cntry
variance_essround <- model_summary_m0_Ref$varcor$essround
variance_residual <- 3.29
icc_values_m0_Ref_interviewer <- format(as.double(round(variance_interviewer / (variance_interviewer + variance_cntry + variance_essround + variance_residual), digits = 3)), nsmall=3)
icc_values_m0_Ref_cntry <- format(as.double(round(variance_cntry / (variance_interviewer + variance_cntry + variance_essround + variance_residual), digits = 3)), nsmall=3)
icc_values_m0_Ref_essround <- format(as.double(round(variance_essround / (variance_interviewer + variance_cntry + variance_essround + variance_residual), digits = 3)), nsmall=3)
model_summary_m0_Ref_DK <- summary(m0_Ref_DK)
variance_interviewer <- model_summary_m0_Ref_DK$varcor$T_INTERVIEWER_ID
variance_cntry <- model_summary_m0_Ref_DK$varcor$cntry
variance_essround <- model_summary_m0_Ref_DK$varcor$essround
variance_residual <- 3.29
icc_values_m0_Ref_DK_interviewer <- format(as.double(round(variance_interviewer / (variance_interviewer + variance_cntry + variance_essround + variance_residual), digits = 3)), nsmall=3)
icc_values_m0_Ref_DK_cntry <- format(as.double(round(variance_cntry / (variance_interviewer + variance_cntry + variance_essround + variance_residual), digits = 3)), nsmall=3)
icc_values_m0_Ref_DK_essround <- format(as.double(round(variance_essround / (variance_interviewer + variance_cntry + variance_essround + variance_residual), digits = 3)), nsmall=3)
Table11 <- data.frame(ICC = c("Interviewers", "Countries", "ESS rounds"),
model1 = c(icc_values_m0_DK_interviewer,
icc_values_m0_DK_cntry,
icc_values_m0_DK_essround),
model2 = c(icc_values_m0_Ref_interviewer,
icc_values_m0_Ref_cntry,
icc_values_m0_Ref_essround),
model3 = c(icc_values_m0_Ref_DK_interviewer,
icc_values_m0_Ref_DK_cntry,
icc_values_m0_Ref_DK_essround)) %>%
rename(Level = ICC,
`Null model 1:\n'Don't know' vs. Response` = model1,
`Null model 2:\nRefusal vs. Response` = model2,
`Null model 3:\nRefusalvs. 'Don't know'` = model3) %>%
qflextable() %>%
bold(bold = TRUE, part = "header") %>%
align(j = c(2,3,4), align = "center", part = "all")
Table11
Level | Null model 1: | Null model 2: | Null model 3: |
---|---|---|---|
Interviewers | 0.313 | 0.330 | 0.317 |
Countries | 0.047 | 0.188 | 0.177 |
ESS rounds | 0.006 | 0.006 | 0.002 |
m0_DK <- glmer(hinctnta_item_nonresp ~
1 +
(1 | T_INTERVIEWER_ID) +
(1 | cntry) +
(1 | essround),
weights = anweight,
data = Income_inr_DKvsResponse,
family= binomial(link = "logit"), nAGQ=1)
m0_DK_remove_ESS_round <- glmer(hinctnta_item_nonresp ~
1 +
(1 | T_INTERVIEWER_ID) +
(1 | cntry),
weights = anweight,
data = Income_inr_DKvsResponse,
family= binomial(link = "logit"), nAGQ=1)
m0_DK_remove_interviewers <- glmer(hinctnta_item_nonresp ~
1 +
(1 | cntry) +
(1 | essround),
weights = anweight,
data = Income_inr_DKvsResponse,
family= binomial(link = "logit"), nAGQ=1)
m0_DK_remove_countries <- glmer(hinctnta_item_nonresp ~
1 +
(1 | T_INTERVIEWER_ID) +
(1 | essround),
weights = anweight,
data = Income_inr_DKvsResponse,
family= binomial(link = "logit"), nAGQ=1)
anova(m0_DK_remove_ESS_round, m0_DK , refit = FALSE)
## Data: Income_inr_DKvsResponse
## Models:
## m0_DK_remove_ESS_round: hinctnta_item_nonresp ~ 1 + (1 | T_INTERVIEWER_ID) + (1 | cntry)
## m0_DK: hinctnta_item_nonresp ~ 1 + (1 | T_INTERVIEWER_ID) + (1 | cntry) + (1 | essround)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m0_DK_remove_ESS_round 3 82073 82103 -41033 82067
## m0_DK 4 82000 82040 -40996 81992 75.38 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m0_DK_remove_interviewers, m0_DK , refit = FALSE)
## Data: Income_inr_DKvsResponse
## Models:
## m0_DK_remove_interviewers: hinctnta_item_nonresp ~ 1 + (1 | cntry) + (1 | essround)
## m0_DK: hinctnta_item_nonresp ~ 1 + (1 | T_INTERVIEWER_ID) + (1 | cntry) + (1 | essround)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m0_DK_remove_interviewers 3 92773 92803 -46383 92767
## m0_DK 4 82000 82040 -40996 81992 10775 1 < 2.2e-16
##
## m0_DK_remove_interviewers
## m0_DK ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m0_DK_remove_countries, m0_DK , refit = FALSE)
## Data: Income_inr_DKvsResponse
## Models:
## m0_DK_remove_countries: hinctnta_item_nonresp ~ 1 + (1 | T_INTERVIEWER_ID) + (1 | essround)
## m0_DK: hinctnta_item_nonresp ~ 1 + (1 | T_INTERVIEWER_ID) + (1 | cntry) + (1 | essround)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m0_DK_remove_countries 3 82478 82508 -41236 82472
## m0_DK 4 82000 82040 -40996 81992 480.33 1 < 2.2e-16
##
## m0_DK_remove_countries
## m0_DK ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rm(m0_DK, m0_DK_remove_countries, m0_DK_remove_ESS_round, m0_DK_remove_interviewers)
m0_Ref <- glmer(hinctnta_item_nonresp ~
1 +
(1 | T_INTERVIEWER_ID) +
(1 | cntry) +
(1 | essround),
weights = anweight,
data = Income_inr_RefusalvsResponse,
family= binomial(link = "logit"), nAGQ=1)
m0_Ref_remove_ESS_round <- glmer(hinctnta_item_nonresp ~
1 +
(1 | T_INTERVIEWER_ID) +
(1 | cntry),
weights = anweight,
data = Income_inr_RefusalvsResponse,
family= binomial(link = "logit"), nAGQ=1)
m0_Ref_remove_interviewers <- glmer(hinctnta_item_nonresp ~
1 +
(1 | cntry) +
(1 | essround),
weights = anweight,
data = Income_inr_RefusalvsResponse,
family= binomial(link = "logit"), nAGQ=1)
m0_Ref_remove_countries <- glmer(hinctnta_item_nonresp ~
1 +
(1 | T_INTERVIEWER_ID) +
(1 | essround),
weights = anweight,
data = Income_inr_RefusalvsResponse,
family= binomial(link = "logit"), nAGQ=1)
anova(m0_Ref_remove_ESS_round, m0_Ref , refit = FALSE)
## Data: Income_inr_RefusalvsResponse
## Models:
## m0_Ref_remove_ESS_round: hinctnta_item_nonresp ~ 1 + (1 | T_INTERVIEWER_ID) + (1 | cntry)
## m0_Ref: hinctnta_item_nonresp ~ 1 + (1 | T_INTERVIEWER_ID) + (1 | cntry) + (1 | essround)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m0_Ref_remove_ESS_round 3 79981 80011 -39988 79975
## m0_Ref 4 79917 79957 -39954 79909 66.408 1 3.666e-16
##
## m0_Ref_remove_ESS_round
## m0_Ref ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m0_Ref_remove_interviewers, m0_Ref , refit = FALSE)
## Data: Income_inr_RefusalvsResponse
## Models:
## m0_Ref_remove_interviewers: hinctnta_item_nonresp ~ 1 + (1 | cntry) + (1 | essround)
## m0_Ref: hinctnta_item_nonresp ~ 1 + (1 | T_INTERVIEWER_ID) + (1 | cntry) + (1 | essround)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m0_Ref_remove_interviewers 3 97222 97253 -48608 97216
## m0_Ref 4 79917 79957 -39954 79909 17308 1 < 2.2e-16
##
## m0_Ref_remove_interviewers
## m0_Ref ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m0_Ref_remove_countries, m0_Ref , refit = FALSE)
## Data: Income_inr_RefusalvsResponse
## Models:
## m0_Ref_remove_countries: hinctnta_item_nonresp ~ 1 + (1 | T_INTERVIEWER_ID) + (1 | essround)
## m0_Ref: hinctnta_item_nonresp ~ 1 + (1 | T_INTERVIEWER_ID) + (1 | cntry) + (1 | essround)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m0_Ref_remove_countries 3 81312 81342 -40653 81306
## m0_Ref 4 79917 79957 -39954 79909 1396.7 1 < 2.2e-16
##
## m0_Ref_remove_countries
## m0_Ref ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rm(m0_Ref, m0_Ref_remove_countries, m0_Ref_remove_ESS_round, m0_Ref_remove_interviewers)
m0_Ref_DK <- glmer(hinctnta_item_nonresp ~
1 +
(1 | T_INTERVIEWER_ID) +
(1 | cntry) +
(1 | essround),
weights = anweight,
data = Income_inr_RefusalvsDK,
family= binomial(link = "logit"), nAGQ=1)
m0_Ref_DK_remove_ESS_round <- glmer(hinctnta_item_nonresp ~
1 +
(1 | T_INTERVIEWER_ID) +
(1 | cntry),
weights = anweight,
data = Income_inr_RefusalvsDK,
family= binomial(link = "logit"), nAGQ=1)
m0_Ref_DK_remove_interviewers <- glmer(hinctnta_item_nonresp ~
1 +
(1 | cntry) +
(1 | essround),
weights = anweight,
data = Income_inr_RefusalvsDK,
family= binomial(link = "logit"), nAGQ=1)
m0_Ref_DK_remove_countries <- glmer(hinctnta_item_nonresp ~
1 +
(1 | T_INTERVIEWER_ID) +
(1 | essround),
weights = anweight,
data = Income_inr_RefusalvsDK,
family= binomial(link = "logit"), nAGQ=1)
anova(m0_Ref_DK_remove_ESS_round, m0_Ref_DK , refit = FALSE)
## Data: Income_inr_RefusalvsDK
## Models:
## m0_Ref_DK_remove_ESS_round: hinctnta_item_nonresp ~ 1 + (1 | T_INTERVIEWER_ID) + (1 | cntry)
## m0_Ref_DK: hinctnta_item_nonresp ~ 1 + (1 | T_INTERVIEWER_ID) + (1 | cntry) + (1 | essround)
## npar AIC BIC logLik deviance Chisq Df
## m0_Ref_DK_remove_ESS_round 3 34531 34556 -17262 34525
## m0_Ref_DK 4 34524 34557 -17258 34516 9.2366 1
## Pr(>Chisq)
## m0_Ref_DK_remove_ESS_round
## m0_Ref_DK 0.002372 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m0_Ref_DK_remove_interviewers, m0_Ref_DK , refit = FALSE)
## Data: Income_inr_RefusalvsDK
## Models:
## m0_Ref_DK_remove_interviewers: hinctnta_item_nonresp ~ 1 + (1 | cntry) + (1 | essround)
## m0_Ref_DK: hinctnta_item_nonresp ~ 1 + (1 | T_INTERVIEWER_ID) + (1 | cntry) + (1 | essround)
## npar AIC BIC logLik deviance Chisq Df
## m0_Ref_DK_remove_interviewers 3 39900 39925 -19947 39894
## m0_Ref_DK 4 34524 34557 -17258 34516 5378.5 1
## Pr(>Chisq)
## m0_Ref_DK_remove_interviewers
## m0_Ref_DK < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m0_Ref_DK_remove_countries, m0_Ref_DK , refit = FALSE)
## Data: Income_inr_RefusalvsDK
## Models:
## m0_Ref_DK_remove_countries: hinctnta_item_nonresp ~ 1 + (1 | T_INTERVIEWER_ID) + (1 | essround)
## m0_Ref_DK: hinctnta_item_nonresp ~ 1 + (1 | T_INTERVIEWER_ID) + (1 | cntry) + (1 | essround)
## npar AIC BIC logLik deviance Chisq Df
## m0_Ref_DK_remove_countries 3 35528 35553 -17761 35522
## m0_Ref_DK 4 34524 34557 -17258 34516 1006.2 1
## Pr(>Chisq)
## m0_Ref_DK_remove_countries
## m0_Ref_DK < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rm(m0_Ref_DK, m0_Ref_DK_remove_countries, m0_Ref_DK_remove_ESS_round, m0_Ref_DK_remove_interviewers)
This R code replicates Table 4 from the paper
# Table 11. Summary of random slopes models
Table12 <- tab_model(m2_DK, m2_Ref, m2_Ref_DK,
p.style = "stars",
p.threshold = c(0.05, 0.01, 0.001),
digits = 3,
digits.re = 3,
show.ci = FALSE,
show.se = TRUE,
show.aic = TRUE,
show.loglik = TRUE,
show.re.var = FALSE,
show.icc = FALSE,
show.r2 = FALSE,
pred.labels = c("Intercept",
"HH size (excluding children up to 14 years old)",
"Source of income: Pensions [vs. Wages or salaries]",
"Source of income: Unemployement benefits or any other social benefits [vs. Wages or salaries]",
"Source of income: Income from investments, savings or other sources [vs. Wages or salaries]",
"Source of income: Income from self-employement or farming [vs. Wages or salaries]",
"Source of income: 'Do not know' [vs. Wages or salaries]",
"Source of income: Refusal [vs. Wages or salaries]",
"Conservation (proxy of social desirability)",
"Social trust index",
"Education: Less than lower secondary education (ISCED 0-1) [vs. ISCED 3]",
"Lower secondary education completed (ISCED 2) [vs. ISCED 3]",
"Post-secondary non-teritary & teritary education completed (ISCED 4-6) [vs. ISCED 3]",
"Gender of the respondent [Male = 1]",
"Age of the respondent",
"Gender of the interviewer [Male = 1]",
"Age of the interviewer 31-40 [vs. up to 30]",
"Age of the interviewer 41-50 [vs. up to 30]",
"Age of the interviewer 51-60 [vs. up to 30]",
"Age of the interviewer 61 and older [vs. up to 30]",
"Response rate (RR1)",
"Fraction of experienced interviewers",
"Within household selection performed by the interviewers [vs. individual register sample]",
"PISA's Mathematics Scale",
"Uncertainty avoidance index (Hofstede's UAI)",
"Masculinity vs. femininity [Hofstede's MAS]",
"Size of the Shadow Economy [in % of official GDP]"),
dv.labels = c("Model 1: 'Don't know' vs. Response (random slopes)",
"Model 2: Refusal vs. Response (random slopes)",
"Model 3: Refusal vs. 'Don't know' (random slopes)"))
Table12
Model 1: ‘Don’t know’ vs. Response (random slopes) | Model 2: Refusal vs. Response (random slopes) | Model 3: Refusal vs. ‘Don’t know’ (random slopes) | ||||
---|---|---|---|---|---|---|
Predictors | Odds Ratios | std. Error | Odds Ratios | std. Error | Odds Ratios | std. Error |
Intercept | 0.021 *** | 0.006 | 0.007 *** | 0.002 | 0.271 ** | 0.119 |
HH size (excluding children up to 14 years old) | 1.755 *** | 0.043 | 1.076 *** | 0.022 | 0.565 *** | 0.015 |
Source of income: Pensions [vs. Wages or salaries] | 2.461 *** | 0.101 | 0.881 *** | 0.030 | 0.442 *** | 0.029 |
Source of income: Unemployement benefits or any other social benefits [vs. Wages or salaries] | 0.706 *** | 0.038 | 0.582 *** | 0.035 | 0.787 * | 0.082 |
Source of income: Income from investments, savings or other sources [vs. Wages or salaries] | 3.900 *** | 0.525 | 1.705 *** | 0.209 | 0.510 ** | 0.119 |
Source of income: Income from self-employement or farming [vs. Wages or salaries] | 2.104 *** | 0.078 | 1.475 *** | 0.054 | 0.593 *** | 0.036 |
Source of income: ‘Do not know’ [vs. Wages or salaries] | 22.085 *** | 1.984 | 1.971 *** | 0.247 | 0.111 *** | 0.015 |
Source of income: Refusal [vs. Wages or salaries] | 7.916 *** | 2.049 | 110.229 *** | 15.230 | 31.712 *** | 5.970 |
Conservation (proxy of social desirability) | 0.973 | 0.021 | 1.023 | 0.046 | 1.087 * | 0.043 |
Social trust index | 1.028 | 0.016 | 0.848 *** | 0.016 | 0.880 ** | 0.036 |
Education: Less than lower secondary education (ISCED 0-1) [vs. ISCED 3] | 3.145 *** | 0.121 | 1.114 ** | 0.044 | 0.395 *** | 0.027 |
Lower secondary education completed (ISCED 2) [vs. ISCED 3] | 1.982 *** | 0.061 | 0.994 | 0.031 | 0.522 *** | 0.027 |
Post-secondary non-teritary & teritary education completed (ISCED 4-6) [vs. ISCED 3] | 0.665 *** | 0.022 | 1.096 *** | 0.030 | 1.828 *** | 0.099 |
Gender of the respondent [Male = 1] | 0.699 *** | 0.016 | 0.865 *** | 0.019 | 1.376 *** | 0.054 |
Age of the respondent | 0.954 *** | 0.001 | 1.014 *** | 0.001 | 1.060 *** | 0.002 |
Gender of the interviewer [Male = 1] | 0.821 *** | 0.038 | 0.827 *** | 0.039 | 1.142 * | 0.077 |
Age of the interviewer 31-40 [vs. up to 30] | 1.042 | 0.126 | 1.356 ** | 0.156 | 1.421 * | 0.235 |
Age of the interviewer 41-50 [vs. up to 30] | 1.168 | 0.131 | 1.372 ** | 0.148 | 1.367 * | 0.211 |
Age of the interviewer 51-60 [vs. up to 30] | 1.236 | 0.135 | 1.118 | 0.118 | 1.050 | 0.159 |
Age of the interviewer 61 and older [vs. up to 30] | 1.106 | 0.123 | 1.060 | 0.115 | 1.108 | 0.172 |
Response rate (RR1) | 4.446 *** | 1.727 | 2.653 * | 1.140 | 0.747 | 0.425 |
Fraction of experienced interviewers | 1.637 *** | 0.197 | 1.458 ** | 0.183 | 0.939 | 0.159 |
Within household selection performed by the interviewers [vs. individual register sample] | 1.739 *** | 0.179 | 1.326 * | 0.178 | 0.779 | 0.139 |
PISA’s Mathematics Scale | 0.989 ** | 0.004 | 1.005 | 0.010 | ||
Uncertainty avoidance index (Hofstede’s UAI) | 1.004 | 0.006 | ||||
Masculinity vs. femininity [Hofstede’s MAS] | 1.034 *** | 0.010 | 1.024 ** | 0.009 | ||
Size of the Shadow Economy [in % of official GDP] | 1.083 * | 0.034 | 1.098 ** | 0.033 | ||
N | 13285 T_INTERVIEWER_ID | 13427 T_INTERVIEWER_ID | 8650 T_INTERVIEWER_ID | |||
17 cntry | 17 cntry | 17 cntry | ||||
6 essround | 6 essround | 6 essround | ||||
Observations | 164406 | 170522 | 32732 | |||
AIC | 66882.261 | 76734.449 | 26838.651 | |||
log-Likelihood | -33404.131 | -38330.225 | -13381.325 | |||
|