# -----------------------------------------------------------------
# Code for: Keusch, Florian & Yan, Ting. (2018). Is Satisficing 
# responsible for response order effects in rating scale questions?
# Survey Research Methods, 12(3).
# Date: July 27, 2018
# Author: Florian Keusch
# -----------------------------------------------------------------
#Load required packages
library(plyr)
library(dplyr)
library(tidyr)
library(car)
library(gmodels)
library(lme4)
library(sjstats)
library(ggplot2)
library(svglite)
library(purrr)

# Set working directory
setwd()

# Load data
scale <- read.csv("Keusch_Yan_2018_SRM_replication.csv")

# -----------------------------------------------------------------
#Inspect data
str(scale)
View(scale)

# -----------------------------------------------------------------
#Sample descriptives
prop.table(table(scale$gender))

summary(scale$age)

prop.table(table(scale$edu))

# -----------------------------------------------------------------
#Recode scale questions from numeric to binary variable for picking
#from low side vs high side  
recode <- function(x){
  y <- ifelse(scale$direction=="Descending scale", 7-x, x) 
  y <- ifelse(y==1, 1, ifelse(
    y==2, 1, ifelse(
      y==3,  1, 2)))
  y <- factor(y, levels = c(1, 2), labels = c("1"="low side", "2"="high side"))
  return(y)
}
names(scale)

scale[8:62] <- map(scale[8:62], recode)

# -----------------------------------------------------------------
#Recode age into binary variable   
scale$age_65 <- NA
scale$age_65 <- ifelse(scale$age < 65, "under 65", "65+")
scale$age_65 <- as.factor(scale$age_65)

# -----------------------------------------------------------------
#Create speeding indicators from 225ms/word to 375ms/word
reading_speed <- seq(from = 225, to = 375, by = 5)

create_speeder <- function(x){
  y <- factor(c("Non-Speeder", "Speeder"))
  y <- ifelse(scale$duration <= (x/1000*1230+5*5+1*43), "Speeder", "Non-Speeder")
  return(y)
}

z <- data.frame(map(reading_speed, create_speeder))

names(z) <- paste0("speeder_", seq(from = 225, to = 375, by = 5))

scale <- cbind(scale, z)

#Standard indicator: 300ms/word * 1230 words + 5sec*5 brands + 1sec*43 pages = 437
prop.table(table(scale$speeder_300))

#Speeding indicators for sensitivity analysis: 250ms/word and 350ms/word
prop.table(table(scale$speeder_250))
prop.table(table(scale$speeder_350))

# -----------------------------------------------------------------
#Proportion of responses from the low side of the scale by scale condition (Table 1)
cross <- function(x){
  CrossTable(x, scale$direction, chisq = T, format = "SPSS")
}

#Grid 1
map(scale[8:18], cross)

#Grid 2
map(scale[19:29], cross)

#Grid 3
map(scale[30:41], cross)

#Grid 4
map(scale[42:51], cross)

#Grid5
map(scale[52:62], cross)

# -----------------------------------------------------------------
#Multivariate analyses of selecting the low side of the scale
#Create long format data sets for multilevel analysis
scale$id <- factor(scale$id)

scale_g1 <- gather(scale, item, response, g1_1, g1_2, g1_3, g1_4, g1_5, g1_6, g1_7, g1_8, g1_9, g1_10, g1_11)
scale_g1$response <- factor(scale_g1$response)

scale_g2 <- gather(scale, item, response, g2_1, g2_2, g2_3, g2_4, g2_5, g2_6, g2_7, g2_8, g2_9, g2_10, g2_11)
scale_g2$response <- factor(scale_g2$response)

scale_g3 <- gather(scale, item, response, g3_1, g3_2, g3_3, g3_4, g3_5, g3_6, g3_7, g3_8, g3_9, g3_10, g3_11, g3_12)
scale_g3$response <- factor(scale_g3$response)

scale_g4 <- gather(scale, item, response, g4_1, g4_2, g4_3, g4_4, g4_5, g4_6, g4_7, g4_8, g4_9, g4_10)
scale_g4$response <- factor(scale_g4$response)

scale_g5 <- gather(scale, item, response, g5_1, g5_2, g5_3, g5_4, g5_5, g5_6, g5_7, g5_8, g5_9, g5_10, g5_11)
scale_g5$response <- factor(scale_g5$response)

long_data_sets <- list(scale_g1, scale_g2, scale_g3, scale_g4, scale_g5)

#Null models
null_model <- function(x){
  y <- glmer(response ~ 1 + (1|id) + (1|item), family = binomial, data = x)
  icc(y)
}

map(long_data_sets, null_model)

#Multivariate models (Table 2)
multi_models <- function(x){
  y <- glmer(response ~ direction + edu + age_65 + speeder_300 +
               direction:edu + direction:age_65 + direction:speeder_300 +
               (1|id) + (1|item),
             family = binomial, data = x, control = glmerControl(optimizer = "bobyqa"))
  summary(y)
}

map(long_data_sets, multi_models)

#Multivariate models with different speeding threshold: 250ms/word (Table A1)
multi_models_250 <- function(x){
  y <- glmer(response ~ direction + edu + age_65 + speeder_250 +
               direction:edu + direction:age_65 + direction:speeder_250 +
               (1|id) + (1|item),
             family = binomial, data = x, control = glmerControl(optimizer = "bobyqa"))
  summary(y)
}

map(long_data_sets, multi_models_250)

#Multivariate models with different speeding threshold: 350ms/word (Table A2)
multi_models_350 <- function(x){
  y <- glmer(response ~ direction + edu + age_65 + speeder_350 +
               direction:edu + direction:age_65 + direction:speeder_350 +
               (1|id) + (1|item),
             family = binomial, data = x, control = glmerControl(optimizer = "bobyqa"))
  summary(y)
}

map(long_data_sets, multi_models_350)

# -----------------------------------------------------------------
#Test effect of numeric labels - No main or interaction effect of
#numeric labels (drop from analysis)
numeric_models <- function(x){
  y <- glmer(response ~ direction + numeric_labels + edu + age_65 + speeder_300 +
               direction:edu + direction:age_65 + direction:speeder_300 +
               direction:numeric_labels +
               (1|id) + (1|item),
             family = binomial, data = x, control = glmerControl(optimizer = "bobyqa"))
  summary(y)
}

map(long_data_sets, numeric_models)

# -----------------------------------------------------------------
#Interaction effects between speeding and scale direction
#Calculate difference between ascending and descending scale in selecting
#low side of the scale for speeders and non-speeders across 31 speeding
#thresholds
#Grid 1
calculate_difference_speeder <- function(x){
  df <- as.data.frame(prop.table(table(scale_g1$direction[which(scale_g1[,x]=="Speeder")],
                                       scale_g1$response[which(scale_g1[,x]=="Speeder")]),
                                 margin = 1)*100)
  y <- df[2,3]-df[1,3]
  print(y)
}

speeder_var <- names(scale_g1[53:83])
g1_speeder <- map(speeder_var, calculate_difference_speeder)
g1_speeder <- do.call(rbind.data.frame, g1_speeder)

calculate_difference_nonspeeder <- function(x){
  df <- as.data.frame(prop.table(table(scale_g1$direction[which(scale_g1[,x]=="Non-Speeder")],
                                       scale_g1$response[which(scale_g1[,x]=="Non-Speeder")]),
                                 margin = 1)*100)
  y <- df[2,3]-df[1,3]
  print(y)
}

g1_nonspeeder <- map(speeder_var, calculate_difference_nonspeeder)
g1_nonspeeder <- do.call(rbind.data.frame, g1_nonspeeder)

#Grid 2
calculate_difference_speeder <- function(x){
  df <- as.data.frame(prop.table(table(scale_g2$direction[which(scale_g2[,x]=="Speeder")],
                                       scale_g2$response[which(scale_g2[,x]=="Speeder")]),
                                 margin = 1)*100)
  y <- df[2,3]-df[1,3]
  print(y)
}

g2_speeder <- map(speeder_var, calculate_difference_speeder)
g2_speeder <- do.call(rbind.data.frame, g2_speeder)

calculate_difference_nonspeeder <- function(x){
  df <- as.data.frame(prop.table(table(scale_g2$direction[which(scale_g2[,x]=="Non-Speeder")],
                                       scale_g2$response[which(scale_g2[,x]=="Non-Speeder")]),
                                 margin = 1)*100)
  y <- df[2,3]-df[1,3]
  print(y)
}

g2_nonspeeder <- map(speeder_var, calculate_difference_nonspeeder)
g2_nonspeeder <- do.call(rbind.data.frame, g2_nonspeeder)

#Grid 3
calculate_difference_speeder <- function(x){
  df <- as.data.frame(prop.table(table(scale_g3$direction[which(scale_g3[,x]=="Speeder")],
                                       scale_g3$response[which(scale_g3[,x]=="Speeder")]),
                                 margin = 1)*100)
  y <- df[2,3]-df[1,3]
  print(y)
}

g3_speeder <- map(speeder_var, calculate_difference_speeder)
g3_speeder <- do.call(rbind.data.frame, g3_speeder)

calculate_difference_nonspeeder <- function(x){
  df <- as.data.frame(prop.table(table(scale_g3$direction[which(scale_g3[,x]=="Non-Speeder")],
                                       scale_g3$response[which(scale_g3[,x]=="Non-Speeder")]),
                                 margin = 1)*100)
  y <- df[2,3]-df[1,3]
  print(y)
}

g3_nonspeeder <- map(speeder_var, calculate_difference_nonspeeder)
g3_nonspeeder <- do.call(rbind.data.frame, g3_nonspeeder)

#Grid 4
calculate_difference_speeder <- function(x){
  df <- as.data.frame(prop.table(table(scale_g4$direction[which(scale_g4[,x]=="Speeder")],
                                       scale_g4$response[which(scale_g4[,x]=="Speeder")]),
                                 margin = 1)*100)
  y <- df[2,3]-df[1,3]
  print(y)
}

g4_speeder <- map(speeder_var, calculate_difference_speeder)
g4_speeder <- do.call(rbind.data.frame, g4_speeder)

calculate_difference_nonspeeder <- function(x){
  df <- as.data.frame(prop.table(table(scale_g4$direction[which(scale_g4[,x]=="Non-Speeder")],
                                       scale_g4$response[which(scale_g4[,x]=="Non-Speeder")]),
                                 margin = 1)*100)
  y <- df[2,3]-df[1,3]
  print(y)
}

g4_nonspeeder <- map(speeder_var, calculate_difference_nonspeeder)
g4_nonspeeder <- do.call(rbind.data.frame, g4_nonspeeder)

#Grid 5
calculate_difference_speeder <- function(x){
  df <- as.data.frame(prop.table(table(scale_g5$direction[which(scale_g5[,x]=="Speeder")],
                                       scale_g5$response[which(scale_g5[,x]=="Speeder")]),
                                 margin = 1)*100)
  y <- df[2,3]-df[1,3]
  print(y)
}

g5_speeder <- map(speeder_var, calculate_difference_speeder)
g5_speeder <- do.call(rbind.data.frame, g5_speeder)

calculate_difference_nonspeeder <- function(x){
  df <- as.data.frame(prop.table(table(scale_g5$direction[which(scale_g5[,x]=="Non-Speeder")],
                                       scale_g5$response[which(scale_g5[,x]=="Non-Speeder")]),
                                 margin = 1)*100)
  y <- df[2,3]-df[1,3]
  print(y)
}

g5_nonspeeder <- map(speeder_var, calculate_difference_nonspeeder)
g5_nonspeeder <- do.call(rbind.data.frame, g5_nonspeeder)

#Putting all differences into one data frame
diff <- list(g1_speeder, g1_nonspeeder,
             g2_speeder, g2_nonspeeder,
             g3_speeder, g3_nonspeeder,
             g4_speeder, g4_nonspeeder,
             g5_speeder, g5_nonspeeder)

diff <- map(diff, function(x){
  names(x) <- "difference"
  return(x)
}
)

diff <- data.frame(matrix(unlist(diff)))
names(diff) <- "difference"

#Add columns for speeding, grid, and threshold
speeder <- as.factor(rep(1:2, each=31, times=5))
speeder <- revalue(speeder, c("1"="Speeder", "2"="Non-Speeder"))

wpm <- as.factor(rep(1:31, times=10))
wpm <- revalue(wpm, c("1"="225", "2"="230", "3"="235", "4"="240", "5"="245",
                      "6"="250", "7"="255", "8"="260", "9"="265", "10"="270",
                      "11"="275", "12"="280", "13"="285", "14"="290", "15"="295",
                      "16"="300", "17"="305", "18"="310", "19"="315", "20"="320",
                      "21"="325", "22"="330", "23"="335", "24"="340", "25"="345",
                      "26"="350", "27"="355", "28"="360", "29"="365", "30"="370",
                      "31"="375"))

grid <- as.factor(rep(1:5, each=62))
grid <- revalue(grid, c("1"="Grid 1", "2"="Grid 2", "3"="Grid 3", "4"="Grid 4", "5"="Grid 5"))

diff <- data.frame(diff, speeder, wpm, grid)

#Difference in proportion of selecting low side of the scale between
#ascending and descending scale condition by speeding across different
#speeding thresholds (Figure 2)
ggplot(diff, aes(x=wpm, y=difference, colour=speeder, shape = grid, group=interaction(speeder, grid))) + 
  geom_line(aes(linetype=grid), size = 1) +
  labs(x = "\nms/word", y = "Difference ascending vs. descending scale\n(in percentage points)\n") +
  theme_classic() +
  scale_fill_manual("legend", values = c("Speeder" = "black", "Non-Speeder" = "grey")) +
  scale_color_manual("legend", values = c("Speeder" = "black", "Non-Speeder" = "grey")) +
  ylim(0,100) +
  theme(panel.grid = element_blank(),
        axis.text.y = element_text(size = 12, color = "black"),
        axis.text.x = element_text(size = 11, color = "black"),
        axis.title.y = element_text(size = 12, color = "black"),
        axis.title.x = element_text(size = 12, color = "black"),
        plot.title = element_blank(),
        legend.position="bottom",
        legend.text = element_text(size=12),
        legend.title = element_blank()
  )

#Safe figure
ggsave("Keusch_Yan_2018_Figure2.pdf", width = 12.5, height = 8, units = "in")

# -----------------------------------------------------------------
#Three-way interactions
#Models including interaction term speeding*scale direction*education 
threeway_models <- function(x){
  y <- glmer(response ~ direction + edu + age_65 + speeder_300 +
               direction:edu + direction:age_65 + direction:speeder_300 +
               speeder_300:direction:edu +
               (1|id) + (1|item),
             family = binomial, data = x, control = glmerControl(optimizer = "bobyqa"))
  summary(y)
}

map(long_data_sets, threeway_models)

#Calculate difference between ascending and descending scale in selecting
#low side of the scale for speeders and non-speeders by speeding by
#education for Grid 3
calculate_difference_speeder_edu <- function(x, y){
  df <- as.data.frame(prop.table(table(scale_g3$direction[which(scale_g3$speeder_300==x & scale_g3$edu==y)],
                                       scale_g3$response[which(scale_g3$speeder_300==x & scale_g3$edu==y)]),
                                 margin = 1)*100)
  y <- df[2,3]-df[1,3]
  print(y)
}

x1 <- calculate_difference_speeder_edu("Speeder", "with hs")
x2 <- calculate_difference_speeder_edu("Speeder", "without hs")
x3 <- calculate_difference_speeder_edu("Non-Speeder", "with hs")
x4 <- calculate_difference_speeder_edu("Non-Speeder", "without hs")

diff3 <- rbind(x1, x2, x3, x4)
colnames(diff3) <- "difference"

#Add columns for speeding and education
speeder3 <- as.factor(rep(1:2, each=2))
speeder3 <- revalue(speeder3, c("1"="Speeder", "2"="Non-Speeder"))

edu3 <- as.factor(rep(1:2, 2))
edu3 <- revalue(edu3, c("1"="with high school degree", "2"="without high school degree"))

diff3 <- data.frame(diff3, speeder3, edu3)

#Difference in proportion of selecting low side of the scale between ascending and
#descending scale condition by speeding by education for Grid 3 (Figure 3)
ggplot(diff3, aes(x=edu3, y=difference, color=speeder3, fill=speeder3)) + 
  geom_bar(stat="identity", position="dodge") +
  ggtitle("Grid 3") +
  labs(x = "\nms/word", y = "Difference ascending vs. descending scale\n(in percentage points)\n") +
  theme_classic() +
  scale_fill_manual("legend", values = c("Speeder" = "black", "Non-Speeder" = "lightgrey")) +
  scale_color_manual("legend", values = c("Speeder" = "black", "Non-Speeder" = "lightgrey")) +
  theme(panel.grid = element_blank(),
        axis.text.y = element_text(size = 12, color = "black"),
        axis.text.x = element_text(size = 11, color = "black"),
        axis.title.y = element_text(size = 12, color = "black"),
        axis.title.x = element_text(size = 12, color = "black"),
        plot.title = element_text(face="bold", size = 16, hjust=0.5),
        legend.position="bottom",
        legend.text = element_text(size=12),
        legend.title = element_blank()
  )

#Safe figure
ggsave("Keusch_Yan_2018_Figure3.pdf", width = 12.5, height = 8, units = "in")

# -----------------------------------------------------------------
#Three-way interactions
#Models including interaction term speeding*scale direction*age
#Models do not converge
threeway2_models <- function(x){
  y <- glmer(response ~ direction + edu + age_65 + speeder_300 +
               direction:edu + direction:age_65 + direction:speeder_300 +
               speeder_300:direction:age_65 +
               (1|id) + (1|item),
             family = binomial, data = x, control = glmerControl(optimizer = "bobyqa"))
  summary(y)
}

map(long_data_sets, threeway2_models)
# -----------------------------------------------------------------
