# Scale direction exploration

# How scale direction effects responses?
# What is the moderating effect of scale and question characteristics?

# Design:
# - positive to negative vs. low to high
# - attitudinal with agreement vs. behavior with frequency
# - 5 vs. 7 point scale
# - full labels vs. endpoints
# -  2 (scale direction)*2 (question type)*2 (scale length)*2 (scale labeling)
# - same design in 2 waves except
#   - half get the same scale direction and have change


# code focuses on data cleaning



# Admin ------------

# install packages
# install.packages("tidyverse", dep = T)
# install.packages("haven", dep = T)
# install.packages("laveen", dep = T)
# install.packages("semTools", dep = T)


library(tidyverse)
library(haven)
library(lavaan)
library(semTools)
library(viridis)

# load data
dfw1 <- read_sav("./data/raw/L_ScaleDirection_Wave1_EN_05p.sav")
dfw2 <- read_sav("./data/raw/L_ScaleDirection_Wave2_EN_05p.sav")


# data exploration --------

glimpse(dfw1)
glimpse(dfw2)

# experimental design
count(dfw1, posNegWave1)
count(dfw1, attBeh)
count(dfw1, fivSev)
count(dfw1, fulEnd)
count(dfw1, posNegWave1, attBeh, fivSev, fulEnd)



# substantive questions

dfw1 %>%
  select(ends_with("seven_posneg")) %>%
  map(function(x) attributes(x) %>% .$labels)


dfw1 %>%
  select(ends_with("seven_posneg")) %>%
  map(function(x) attributes(x) %>% .$label)

count(dfw1, att_01_seven_posneg)
count(dfw1, att_15_seven_posneg)



# data cleaning ---------

## make clean experimental variables ----------

# wave 1
dfw1 <- mutate(
  dfw1,
  exp_pos = ifelse(posNegWave1 == 1, 1, 0),
  exp_att = ifelse(attBeh == 1, 1, 0),
  exp_five = ifelse(fivSev == 1, 1, 0),
  exp_full = ifelse(fulEnd == 1, 1, 0),
  exp_pos_fct = factor(exp_pos,
                       labels = c("Ascending", "Descending")),
  exp_att_fct = factor(exp_att,
                       labels = c("Frequency", "Agreement")),
  exp_five_fct = factor(exp_five,
                       labels = c("Seven point", "Five point")),
  exp_full_fct = factor(exp_full,
                       labels = c("End-labeled", "Fully-labeled"))
)

# old terminology
# exp_pos_fct = factor(exp_pos,
# labels = c("Negative first", "Positive first")),
# exp_att_fct = factor(exp_att,
#                      labels = c("Behaviour", "Attitude")),
# exp_five_fct = factor(exp_five,
#                       labels = c("Seven point", "Five point")),
# exp_full_fct = factor(exp_full,
#                       labels = c("Labelled ends", "Fully labelled"))



count(dfw1, posNegWave1, exp_pos, exp_pos_fct)
count(dfw1, attBeh, exp_att, exp_att_fct)
count(dfw1, fivSev, exp_five, exp_five_fct)
count(dfw1, fulEnd, exp_full, exp_full_fct)
count(dfw1, exp_pos, exp_att, exp_five, exp_full)


# wave 2
dfw2 <- mutate(
  dfw2,
  exp_pos = ifelse(posNegWave2 == 1, 1, 0),
  exp_att = ifelse(attBeh == 1, 1, 0),
  exp_five = ifelse(fivSev == 1, 1, 0),
  exp_full = ifelse(fulEnd == 1, 1, 0),
  exp_pos_fct = factor(exp_pos,
                       labels = c("Ascending", "Descending")),
  exp_att_fct = factor(exp_att,
                       labels = c("Frequency", "Agreement")),
  exp_five_fct = factor(exp_five,
                        labels = c("Seven point", "Five point")),
  exp_full_fct = factor(exp_full,
                        labels = c("End-labeled", "Fully-labeled"))
)

count(dfw2, posNegWave2, exp_pos, exp_pos_fct)
count(dfw2, attBeh, exp_att, exp_att_fct)
count(dfw2, fivSev, exp_five, exp_five_fct)
count(dfw2, fulEnd, exp_full, exp_full_fct)
count(dfw2, exp_pos, exp_att, exp_five, exp_full)


### clean device variables
dfw1 <- mutate(
  dfw1,
  pc = ifelse(device < 4, "PC", "Other"),
  pc = as.factor(pc)
)

dfw2 <- mutate(
  dfw2,
  pc = ifelse(device < 4, "PC", "Other"),
  pc = as.factor(pc)
)

count(dfw1, device, pc)
count(dfw2, device, pc)




## put data together -------------------

# wave 1

### put labeled and non labeled data together --------------


# make smaller data only with substantive variables
df_att <- select(dfw1, starts_with("att_") | starts_with("beh_"))

nms_vars_end <- df_att %>%
  select(ends_with("_end")) %>%
  names()

nms_vars <- str_remove(nms_vars_end, "_end")

df_att2 <- df_att

# if missing replace with ends variable
for (i in seq_along(nms_vars)) {
  df_att2[[nms_vars[i]]][is.na(df_att2[[nms_vars[i]]])] <-
    df_att2[[nms_vars_end[i]]][is.na(df_att2[[nms_vars[i]]])]
}

count(df_att2, att_03_seven_posneg_end, att_03_seven_posneg)

# remove all the end variables
df_att2 <- select(df_att2, -ends_with("_end"))

glimpse(df_att2)

### put together attitudes and behaviours ---------

nms_vars_beh <- df_att2 %>%
  select(starts_with("beh_")) %>%
  names()

nms_vars <- df_att2 %>%
  select(starts_with("att_")) %>%
  names()


df_att3 <- df_att2

# if missing replace with ends variable
for (i in seq_along(nms_vars)) {
  df_att3[[nms_vars[i]]][is.na(df_att3[[nms_vars[i]]])] <-
    df_att3[[nms_vars_beh[i]]][is.na(df_att3[[nms_vars[i]]])]
}

count(df_att3, beh_03_seven_highlow, att_03_seven_posneg)

# remove all the end variables
df_att3 <- select(df_att3, -starts_with("beh"))

glimpse(df_att3)


### put positive and negative data together ------------

# function for reverse coding
reverese_code <- function(var) {
  scale_range <- range(var, na.rm = T) %>% max()
  scale_range + 1 - var
}


# reverse code and get rid of text in name
df_att4 <- df_att3 %>%
  mutate_at(vars(matches("_posneg")),
            ~reverese_code(.))

count(df_att4, att_03_seven_posneg, att_03_seven_negpos)


nms_vars_posneg <- df_att4 %>%
  select(matches("posneg")) %>%
  names()

nms_vars <- df_att4 %>%
  select(matches("negpos")) %>%
  names()

# if missing replace with ends variable
for (i in seq_along(nms_vars)) {
  df_att4[[nms_vars[i]]][is.na(df_att4[[nms_vars[i]]])] <-
    df_att4[[nms_vars_posneg[i]]][is.na(df_att4[[nms_vars[i]]])]
}

count(df_att4, att_03_seven_posneg, att_03_seven_negpos)




# remove all "posneg" vars and rename

df_att4 <- df_att4 %>%
  select(matches("negpos")) %>%
  rename_all(~str_remove_all(., "_posneg|_negpos"))



### rescale and put 5 and 7 together -------------

glimpse(df_att4)

normalize <- function(x, na.rm = TRUE) {
  return((x- min(x, na.rm = T)) /(max(x, na.rm = T)-min(x, na.rm = T)))
}

df_att5 <- mutate_all(df_att4, ~normalize(.))



nms_vars_seven <- df_att5 %>%
  select(ends_with("_seven")) %>%
  names()

nms_vars <- df_att5 %>%
  select(ends_with("_five")) %>%
  names()


df_att6 <- df_att5

# if missing replace with ends variable
for (i in seq_along(nms_vars)) {
  df_att6[[nms_vars[i]]][is.na(df_att6[[nms_vars[i]]])] <-
    df_att6[[nms_vars_seven[i]]][is.na(df_att6[[nms_vars[i]]])]
}

count(df_att6, att_01_seven, att_01_five)

# remove all "posneg" vars and rename

df_att6 <- df_att6 %>%
  select(matches("five")) %>%
  rename_all(~str_remove_all(., "_five|_seven"))

glimpse(df_att6)
summary(df_att6)

dfw1_2 <- cbind(dfw1, df_att4, df_att6)

# check the different transformations
count(dfw1_2, att_03_seven_posneg, att_03)
count(dfw1_2, beh_03_seven_highlow, att_03)
count(dfw1_2, beh_03_seven_highlow_end, att_03)
count(dfw1_2, beh_03_five_highlow_end, att_03)



# wave 2

### put labeled and non labeled data together --------------


# make smaller data only with substantive variables
df_att <- select(dfw2, starts_with("att_") | starts_with("beh_"))

nms_vars_end <- df_att %>%
  select(ends_with("_end")) %>%
  names()

nms_vars <- str_remove(nms_vars_end, "_end")

df_att2 <- df_att

# if missing replace with ends variable
for (i in seq_along(nms_vars)) {
  df_att2[[nms_vars[i]]][is.na(df_att2[[nms_vars[i]]])] <-
    df_att2[[nms_vars_end[i]]][is.na(df_att2[[nms_vars[i]]])]
}

count(df_att2, att_03_seven_posneg_end, att_03_seven_posneg)

# remove all the end variables
df_att2 <- select(df_att2, -ends_with("_end"))

glimpse(df_att2)

### put together attitudes and behaviours ---------

nms_vars_beh <- df_att2 %>%
  select(starts_with("beh_")) %>%
  names()

nms_vars <- df_att2 %>%
  select(starts_with("att_")) %>%
  names()


df_att3 <- df_att2

# if missing replace with ends variable
for (i in seq_along(nms_vars)) {
  df_att3[[nms_vars[i]]][is.na(df_att3[[nms_vars[i]]])] <-
    df_att3[[nms_vars_beh[i]]][is.na(df_att3[[nms_vars[i]]])]
}

count(df_att3, beh_03_seven_highlow, att_03_seven_posneg)

# remove all the end variables
df_att3 <- select(df_att3, -starts_with("beh"))

glimpse(df_att3)


### put positive and negative data together ------------

# function for reverse coding
reverese_code <- function(var) {
  scale_range <- range(var, na.rm = T) %>% max()
  scale_range + 1 - var
}


# reverse code and get rid of text in name
df_att4 <- df_att3 %>%
  mutate_at(vars(matches("_posneg")),
            ~reverese_code(.))

count(df_att4, att_03_seven_posneg, att_03_seven_negpos)


nms_vars_posneg <- df_att4 %>%
  select(matches("posneg")) %>%
  names()

nms_vars <- df_att4 %>%
  select(matches("negpos")) %>%
  names()

# if missing replace with ends variable
for (i in seq_along(nms_vars)) {
  df_att4[[nms_vars[i]]][is.na(df_att4[[nms_vars[i]]])] <-
    df_att4[[nms_vars_posneg[i]]][is.na(df_att4[[nms_vars[i]]])]
}

count(df_att4, att_03_seven_posneg, att_03_seven_negpos)

# remove all "posneg" vars and rename

df_att4 <- df_att4 %>%
  select(matches("negpos")) %>%
  rename_all(~str_remove_all(., "_posneg|_negpos"))



### rescale and put 5 and 7 together -------------

glimpse(df_att4)

normalize <- function(x, na.rm = TRUE) {
  return((x- min(x, na.rm = T)) /(max(x, na.rm = T)-min(x, na.rm = T)))
}

df_att5 <- mutate_all(df_att4, ~normalize(.))



nms_vars_seven <- df_att5 %>%
  select(ends_with("_seven")) %>%
  names()

nms_vars <- df_att5 %>%
  select(ends_with("_five")) %>%
  names()


df_att6 <- df_att5

# if missing replace with ends variable
for (i in seq_along(nms_vars)) {
  df_att6[[nms_vars[i]]][is.na(df_att6[[nms_vars[i]]])] <-
    df_att6[[nms_vars_seven[i]]][is.na(df_att6[[nms_vars[i]]])]
}

count(df_att6, att_01_seven, att_01_five)

# remove all "posneg" vars and rename

df_att6 <- df_att6 %>%
  select(matches("five")) %>%
  rename_all(~str_remove_all(., "_five|_seven"))

glimpse(df_att6)
summary(df_att6)

dfw2_2 <- cbind(dfw2, df_att4, df_att6)

# check the different transformations
count(dfw2_2, att_03_seven_posneg, att_03)
count(dfw2_2, beh_03_seven_highlow, att_03)
count(dfw2_2, beh_03_seven_highlow_end, att_03)
count(dfw2_2, beh_03_five_highlow_end, att_03)


# make vector of variables of interest
var_names <- names(df_att6)


## primacy -----------------

exp_pos <- dfw1_2$att_01

# function to make primacy dummy
fun_primacy <- function(var) {
  case_when(exp_pos == 1 & var == 1 ~ 1,
            exp_pos == 0 & var == 0 ~ 1,
            TRUE ~ 0)
}

# create primacy score wave 1
dfw1_2 <- dfw1_2 %>%
  mutate_at(vars(all_of(var_names)),
            list("p" = ~fun_primacy(.))) %>%
  mutate(primacy = ((att_01_p + att_02_p + att_03_p + att_04_p + att_05_p +
           att_06_p + att_07_p + att_08_p + att_09_p + att_10_p + att_11_p +
           + att_12_p + att_13_p + att_14_p + att_15_p)/15) * 100)

exp_pos <- dfw2_2$att_01

# create primacy score wave 2
dfw2_2 <- dfw2_2 %>%
  mutate_at(vars(all_of(var_names)),
            list("p" = ~fun_primacy(.))) %>%
  mutate(primacy = ((att_01_p + att_02_p + att_03_p + att_04_p + att_05_p +
                       att_06_p + att_07_p + att_08_p + att_09_p +
                       att_10_p + att_11_p +
                       + att_12_p + att_13_p + att_14_p + att_15_p)/15) * 100)


## acquiescence ------------------------------


# create acquiescence score wave 1
dfw1_2 <- dfw1_2 %>%
  mutate_at(vars(all_of(var_names)),
            list("a" = ~ifelse(. == 1, 1, 0))) %>%
  mutate(acq = ((att_01_a + att_02_a + att_03_a + att_04_a + att_05_a +
                   att_06_a + att_07_a + att_08_a + att_09_a + att_10_a +
                   att_11_a + att_12_a + att_13_a + att_14_a +
                   att_15_a)/15) * 100)

count(dfw1_2, exp_pos, att_01, att_01_a)

# create acquiescence score wave 2
dfw2_2 <- dfw2_2 %>%
  mutate_at(vars(all_of(var_names)),
            list("a" = ~ifelse(. == 1, 1, 0))) %>%
  mutate(acq = ((att_01_a + att_02_a + att_03_a + att_04_a + att_05_a +
                   att_06_a + att_07_a + att_08_a + att_09_a + att_10_a +
                   att_11_a + att_12_a + att_13_a + att_14_a +
                   att_15_a)/15) * 100)

count(dfw2_2, exp_pos, att_01, att_01_a)



## straight-lining --------------------------------

# make function to find unique values
fun_straight <- function(df) {
  apply(df, 1, function(x) length(unique(x)) == 1)
}

# wave 1 coding
dfw1_2 <- dfw1_2 %>%
  mutate(same_1 = fun_straight(dfw1_2[var_names[1:6]]),
         same_2 = fun_straight(dfw1_2[var_names[7:15]]),
         same_any = ifelse(same_1 == 1 | same_2 == 1, 1, 0))

count(dfw1_2, same_1, same_2, same_any)

# wave 2 coding
dfw2_2 <- dfw2_2 %>%
  mutate(same_1 = fun_straight(dfw2_2[var_names[1:6]]),
         same_2 = fun_straight(dfw2_2[var_names[7:15]]),
         same_any = ifelse(same_1 == 1 | same_2 == 1, 1, 0))

count(dfw2_2, same_1, same_2, same_any)


# merge wave 1 and wave 2 in wide data

glimpse(dfw1_2)
glimpse(dfw2_2)

count(dfw1_2, nomem_encr) %>%
  filter(n > 1)

count(dfw2_2, nomem_encr) %>%
  filter(n > 1)

dim(dfw1_2)
dim(dfw2_2)


dfw1_2$nomem_encr %in% dfw2_2$nomem_encr %>% sum()
dfw2_2$nomem_encr %in% dfw1_2$nomem_encr %>% sum()

dfw1_2_small <- dfw1_2 %>%
  select(nomem_encr, starts_with("exp"), all_of(var_names)) %>%
  rename_at(vars(exp_pos:att_15),
            ~str_c(., "_1"))

dfw2_2_small <- dfw2_2 %>%
  select(nomem_encr, starts_with("exp"), all_of(var_names)) %>%
  rename_at(vars(exp_pos:att_15),
            ~str_c(., "_2"))


data_full <- full_join(dfw1_2_small, dfw2_2_small, by = "nomem_encr")

# make indicator if people changed direction
data_full <- data_full %>%
  mutate(exp_pos_change = ifelse(exp_pos_1 == exp_pos_2, 0, 1),
         exp_pos_change_fct = factor(exp_pos_change,
                                     labels = c("No change", "Change")))

count(data_full, exp_pos_1, exp_pos_2, exp_pos_change)
count(data_full, exp_full_1, exp_full_2)


## extreme and middle response styles --------------------


# create extreme score wave 1
dfw1_2 <- dfw1_2 %>%
  mutate_at(vars(all_of(var_names)),
            list("e" = ~ifelse(. %in% c(0, 1), 1, 0))) %>%
  mutate(extreme = ((att_01_e + att_02_e + att_03_e + att_04_e + att_05_e +
                   att_06_e + att_07_e + att_08_e + att_09_e + att_10_e +
                   att_11_e + att_12_e + att_13_e + att_14_e +
                   att_15_e)/15) * 100)

count(dfw1_2, exp_pos, att_01, att_01_e)


# create extreme score wave 2
dfw2_2 <- dfw2_2 %>%
  mutate_at(vars(all_of(var_names)),
            list("e" = ~ifelse(. %in% c(0, 1), 1, 0))) %>%
  mutate(extreme = ((att_01_e + att_02_e + att_03_e + att_04_e + att_05_e +
                       att_06_e + att_07_e + att_08_e + att_09_e + att_10_e +
                       att_11_e + att_12_e + att_13_e + att_14_e +
                       att_15_e)/15) * 100)

count(dfw2_2, exp_pos, att_01, att_01_e)


# create middle score wave 1
dfw1_2 <- dfw1_2 %>%
  mutate_at(vars(all_of(var_names)),
            list("m" = ~ifelse(. == 0.5, 1, 0))) %>%
  mutate(middle = ((att_01_m + att_02_m + att_03_m + att_04_m + att_05_m +
                       att_06_m + att_07_m + att_08_m + att_09_m + att_10_m +
                       att_11_m + att_12_m + att_13_m + att_14_m +
                       att_15_m)/15) * 100)

count(dfw1_2, exp_pos, att_01, att_01_m)


# create middle score wave 2
dfw2_2 <- dfw2_2 %>%
  mutate_at(vars(all_of(var_names)),
            list("m" = ~ifelse(. == 0.5, 1, 0))) %>%
  mutate(middle = ((att_01_m + att_02_m + att_03_m + att_04_m + att_05_m +
                       att_06_m + att_07_m + att_08_m + att_09_m + att_10_m +
                       att_11_m + att_12_m + att_13_m + att_14_m +
                       att_15_m)/15) * 100)

count(dfw2_2, exp_pos, att_01, att_01_m)






# make mtme data -------------
glimpse(data_full)

# move data in long format
data_long <- data_full %>%
  filter(!is.na(exp_pos_change)) %>%
  rename_all(~str_replace(., "_2$", ".2")) %>%
  rename_all(~str_replace(., "_1$", ".1")) %>%
  pivot_longer(cols = !c("nomem_encr", "exp_pos_change", "exp_pos_change_fct"),
               names_sep = "\\.",
               names_to = c(".value", "wave")) %>%
  arrange(nomem_encr, wave)

# make positive and negative worded questions
data_long <- data_long %>%
  mutate_at(vars(all_of(var_names)),
            list("p" = ~ifelse(exp_pos == 1, ., NA))) %>%
  mutate_at(vars(all_of(var_names)),
            list("n" = ~ifelse(exp_pos == 0, ., NA)))


count(data_long, wave, exp_pos, att_01_p)
count(data_long, wave, exp_pos, att_01_n)
count(data_long, wave, exp_pos, exp_pos_change)

# make wide data where we average the values for those that were consistent in
# the scale direction

mtme_data <- data_long %>%
  group_by(nomem_encr) %>%
  mutate_at(vars(ends_with("_p"), ends_with("_n")),
            ~mean(., na.rm = T)) %>%
  ungroup() %>%
  filter(wave == 1) %>%
  select(everything(), -wave)

mtme_data %>% select(nomem_encr, exp_pos_change,
                     att_01, att_01_p, att_01_n) %>%
  print(n = 100)

# export data ----------------------------

save(dfw1_2, dfw2_2, data_full, data_long, mtme_data,
     file = "./data/clean/clean_data.RData")
