# R syntax for data preparation  
#
# "Accounting for cross-country-cross-time variations in measurement invariance testing. A case of political participation"





library(haven)
library(dplyr)
library(rio)
library(tidyr)


#Cumulative file
setwd("")
E <- data.frame(as_factor(read_dta("ESS1-8e01.dta")))

table(E$essround)

names(E)

E$agea <- as.numeric(as.character(E$agea))

ESS1 <- E %>%
  filter(agea >= 18) %>%
  select(cntry, essround, idno, vote:bctprd, gndr, agea, pspwght) 

remove(E)


# ESS9
E9 <- data.frame(as_factor(read_dta("ESS9e02.dta")))

table(E9$essround)

names(E9)

E9$agea <- as.numeric(as.character(E9$agea))

ESS9 <- E9 %>%
  filter(agea >= 18) %>%
  select(cntry, essround, idno, vote,contplt:bctprd, gndr, agea, pspwght) 
  
remove(E9)

ESS9$essround <- as.factor(ESS9$essround)

#Merging Cumulative File with ESS9
ESS_M <- rbind(ESS1,ESS9)


#Recoding
sapply(ESS_M[, 4:11], table, useNA = "always")

ESS_ALL <- ESS_M %>% 
  mutate_at(4:11, list(~recode(., `Yes`=1, `No`=0, .default = NaN))) %>%
  mutate_at(12, list(~recode(., `Female`=1, `Male`=0, .default = NaN)))


sapply(ESS_ALL[, 4:12], table, useNA = "always")


# Descriptives for all countries
cnt0 <- function(x) length(which(x == 0))
cntNA <- function(x) length(which(is.na(x)))
cnt1 <- function(x) length(which(x == 1))


Freq_01NA <- ESS_ALL %>%
  group_by(cntry, essround) %>%
  summarise_at(vars(vote:bctprd), list(cnt0, cnt1, cntNA)) %>% 
  select(unique(c("cntry", "essround", sort(names(c(.))))))


# File for countries that took part in all editions of ESS since 2004
Subset_all <- Freq_01NA %>%
  count(cntry) %>%
  filter(n == 8)

cntries <- Subset_all$cntry

ESS_RED <- ESS_ALL %>%
  filter(cntry %in% cntries)

save(ESS_RED, file = "/ESS_RED.rda")
