#######################################################
# Replication Code for                                #
# "Retrieving True Preference Under Authoritarianism" #
# Survey Research Methods                             #
# Authors: Jongyoon BAIK and Xiaoxiao SHEN            #
# Date: March 19, 2025                                #
#######################################################

library(MASS)
library(writexl)
library(readxl)
library(tidyverse)
library(tidyLPA)
library(dplyr)
library(ggplot2)


############################
# Part 1. Data Simulation  #
############################

# 1-A. Data generation
# NOTE: The specific numbers generated by the following code may differ from those in sim.dat.xlsx

N = 1000 

pA = 0.6 # Proportion of True supporters
pB = 0.3 # Proportion of Preference falsifiers
pC = 0.1 # Proportion of Candid non-supporters

population = runif(N, min = 0, max = 1) 

for (i in 1:1000){
  if (population[i] < pA){
    population[i] = 1
  } else if (population[i] < pA + pB){
    population[i] = 2
  } else {
    population[i] = 3
  }
}

pAtype1 = c(0, 0.02, 0.08, 0.3, 0.6) # distribution of group A's answers to Type1 question 1
pBtype1 = c(0, 0.02, 0.08, 0.3, 0.6) # distribution of group B's answers to Type1 question 1
pCtype1 = c(0.6, 0.3, 0.08, 0.02, 0) # distribution of group C's answers to Type1 question 1
rhoAtype1 = 0.8
rhoBtype1 = 0.7
rhoCtype1 = 0.9

pAtype2 = c(0, 0.02, 0.08, 0.3, 0.6) # distribution of group A's answers to Type2 question 1
pBtype2 = c(0.6, 0.3, 0.08, 0.02, 0) # distribution of group B's answers to Type2 question 1
pCtype2 = c(0.6, 0.3, 0.08, 0.02, 0) # distribution of group C's answers to Type2 question 1

rhoAq12 = 0.6
rhoAq13 = 0.7
rhoAq14 = 0.8
rhoAq23 = 0.65
rhoAq24 = 0.7
rhoAq34 = 0.9

rhoBq12 = 0.61
rhoBq13 = 0.71
rhoBq14 = 0.81
rhoBq23 = 0.651
rhoBq24 = 0.71
rhoBq34 = 0.91

rhoCq12 = 0.61
rhoCq13 = 0.72
rhoCq14 = 0.82
rhoCq23 = 0.652
rhoCq24 = 0.72
rhoCq34 = 0.92


generateAnswer <- function(pList, cov){
  m = dim(cov)[1] # number of questions
  ans = matrix(0, 1, m)
  draw = mvrnorm(length(ans), mu = ans, Sigma = cov)
  for (i in 1:m){
    for (j in 1:m){
      print(i)
      print(j)
      if (draw[i,j] < qnorm(pList[1])){
        print(draw[i,j])
        ans[i,j] = 1
      } else if (draw[i,j] < qnorm(pList[1] + pList[2])){
        print(draw[i,j])
        ans[i,j] = 2
      } else if (draw[i,j] < qnorm(pList[1] + pList[2] + pList[3])){
        print(draw[i,j])
        ans[i,j] = 3
      } else if (draw[i,j] < qnorm(pList[1] + pList[2] + pList[3] + pList[4])){
        print(draw[i,j])
        ans[i,j] = 4
      } else{
        print(draw[i,j])
        ans[i,j] = 5
      }
      
    }
    return (ans)
    print(ans)
  }
}

covAtype1 = matrix(c(c(1,rhoAtype1),c(rhoAtype1,1)), nrow = 2, byrow = TRUE)
covBtype1 = matrix(c(c(1,rhoBtype1),c(rhoBtype1,1)), nrow = 2, byrow = TRUE)
covCtype1 = matrix(c(c(1,rhoCtype1),c(rhoCtype1,1)), nrow = 2, byrow = TRUE)

covAtype2 = matrix(c(c(1, rhoAq12, rhoAq13, rhoAq14),
                     c(rhoAq12, 1, rhoAq23, rhoAq24),
                     c(rhoAq13, rhoAq23, 1, rhoAq34),
                     c(rhoAq14, rhoAq24, rhoAq34, 1)), nrow = 4, byrow = TRUE)
covBtype2 = matrix(c(c(1, rhoBq12, rhoBq13, rhoBq14),
                     c(rhoBq12, 1, rhoBq23, rhoBq24),
                     c(rhoBq13, rhoBq23, 1, rhoBq34),
                     c(rhoBq14, rhoBq24, rhoBq34, 1)), nrow = 4, byrow = TRUE)
covCtype2 = matrix(c(c(1, rhoCq12, rhoCq13, rhoCq14),
                     c(rhoCq12, 1, rhoCq23, rhoCq24),
                     c(rhoCq13, rhoCq23, 1, rhoCq34),
                     c(rhoCq14, rhoCq24, rhoCq34, 1)), nrow = 4, byrow = TRUE)

survey_data = matrix(0, N, 2+4)

for (i in 1:N){
  if (population[i] == 1){
    survey_data[i,1:2] = generateAnswer(pAtype1, covAtype1)
    survey_data[i,3:6] = generateAnswer(pAtype2, covAtype2)
  }else if (population[i] == 2){
    survey_data[i,1:2] = generateAnswer(pBtype1, covBtype1)
    survey_data[i,3:6] = generateAnswer(pBtype2, covBtype2)
  }else if (population[i] == 3){
    survey_data[i,1:2] = generateAnswer(pCtype1, covCtype1)
    survey_data[i,3:6] = generateAnswer(pCtype2, covCtype2)
  }else{
    print('Group does not exist')
  }
}

out_total = matrix(0, N, 7)
out_total[,1] = population
out_total[,2:7] = survey_data


sim.dat = data.frame(`Group Type` = out_total[,1],
                       `Type1 Q1` = out_total[,2],
                       `Type1 Q2` = out_total[,3],
                       `Type2 Q1` = out_total[,4],
                       `Type2 Q2` = out_total[,5],
                       `Type2 Q3` = out_total[,6],
                       `Type2 Q4` = out_total[,7])

# 1-B. LPA on the simulated data. Figure A.3 and Table A.1

sim.dat <- read_excel("sim.dat.xlsx")

sim.dat.LPA <- dplyr::select(sim.dat, -(Group.Type))
est.sim <- sim.dat.LPA %>%
  estimate_profiles(1:10)%>%
  compare_solutions(statistics=c("AIC", "BIC"))

est.sim <- as.data.frame(est.sim$fits)
plot(est.sim$Classes, est.sim$AIC, type="l", col="blue", 
     ylab="AIC and BIC", xlab="Number of Profile")
lines(est.sim$Classes, est.sim$BIC)
legend("bottomleft", c("AIC", "BIC"), fill=c("blue", "black"))

# Figure A.3. and Table A.1.
sim.dat.LPA %>%
  estimate_profiles(6) %>%
  plot_profiles(sd=FALSE,
                add_line = TRUE,
                alpha_range = c(0, 0.1),
                rawdata = FALSE)+
  labs(x="Survey Questions", y="Answers")

sim.result <- sim.dat.LPA %>%
  estimate_profiles(6)











#################################
# Part 2. LPA on CGSS 2006      #
#################################

Sys.setlocale("LC_ALL","Chinese")
shanghai <- read.csv("shanghai.csv")
                    #Only included the observations where province=='上海市'

shanghai_c <- filter(shanghai, chentreat==0)
shanghai_t <- filter(shanghai, chentreat==1)


shanghai_LPA_c <- shanghai_c %>%
  dplyr::select(DirectSupport_LPA, Rev_CourtGov_LPA, CourtGov2_LPA, 
                CourtGov3_LPA, Information, Happy,
                EconDemo_LPA, Conflict_LPA)
shanghai_LPA_t <- shanghai_t %>%
  dplyr::select(DirectSupport_LPA, Rev_CourtGov_LPA, CourtGov2_LPA, 
                CourtGov3_LPA, Information, Happy,
                EconDemo_LPA, Conflict_LPA)


                # Direct support: qe4711
                # Rev Court Gov: qe4710
                # Court Gov2: qe4712
                # Court Gov3: qe4736
                # Information: qe3913
                # Happy: qe49
                # Econ Demo: qe4732
                # Conflict: qe063


# 2-A. Control Group: Figure A.5 and Table A.4

sc <- shanghai_LPA_c %>%
  estimate_profiles(1:10) %>%
  compare_solutions(statistics=c("AIC", "BIC"))

sc <- as.data.frame(sc$fits)
plot(sc$Classes, sc$AIC, type="l", col="blue", 
     ylab="AIC and BIC", xlab="Number of Profiles",
     ylim=c(2200, 2900))
lines(sc$Classes, sc$BIC)
legend("bottomleft", c("AIC", "BIC"), fill=c("blue", "black"))

shanghai_LPA_c %>%
  estimate_profiles(4) %>%
  plot_profiles(sd=FALSE,
                add_line = TRUE,
                alpha_range = c(0, 0.1),
                rawdata = FALSE)+
  labs(x="Survey Questions", y="Answers")

control.result <- shanghai_LPA_c %>%
  estimate_profiles(4)


# 2-B. Treatment Group: Figure A.6 and Table A.5

st <- shanghai_LPA_t %>%
  estimate_profiles(1:10) %>%
  compare_solutions(statistics=c("AIC", "BIC"))

st <- as.data.frame(st$fits)
plot(st$Classes, st$AIC, type="l", col="blue", 
     ylab="AIC and BIC", xlab="Number of Profile")
lines(st$Classes, st$BIC)
legend("bottomleft", c("AIC", "BIC"), fill=c("blue", "black"))


shanghai_LPA_t %>%
  single_imputation() %>%
  estimate_profiles(6) %>%
  plot_profiles(sd=FALSE,
                add_line = TRUE,
                alpha_range = c(0, 0.1),
                rawdata = FALSE)+
  labs(x="Survey Questions", y="Answers")

treat.result <- shanghai_LPA_t %>%
  estimate_profiles(6)

# 2-C. All 400 respondents: Figures A.7 & A.8 and Table A.6

shanghai_LPA <- shanghai %>%
  dplyr::select(DirectSupport_LPA, Rev_CourtGov_LPA, CourtGov2_LPA, 
                CourtGov3_LPA, Information, Happy,
                EconDemo_LPA, Conflict_LPA)

s400 <- shanghai_LPA %>%
  estimate_profiles(1:12) %>%
  compare_solutions(statistics=c("AIC", "BIC"))

s400 <- as.data.frame(s400$fits)
plot(s400$Classes, s400$AIC, type="l", col="blue", 
     ylab="AIC and BIC", xlab="Number of Profiles",
     ylim=c(7900, 9300))
lines(s400$Classes, s400$BIC)
legend("bottomleft", c("AIC", "BIC"), fill=c("blue", "black"))


shanghai_LPA %>%
  estimate_profiles(8) %>%
  plot_profiles(sd=FALSE,
                add_line = TRUE,
                alpha_range = c(0, 0.1),
                rawdata = FALSE)+
  labs(x="Survey Questions", y="Answers")


LPA_400 <- shanghai_LPA %>%
  estimate_profiles(8)













###################################
# Part 3. LPA on WVS 2017 (U.S.)  #
###################################

USA7 <- read.csv("USA7.csv")
        #Only included Republicans (84001 in Q223) and Democrats (84002 in Q223)
        #Reflected in column RD (2: Rpublican, 1: Democrat)


USA7LPA <- dplyr::select(USA7, LR, Conf.gov, job_over_immigrant, immigrant,
                  Conf.church, God, homosexual, abortion)

          # LR: Q240, divided by 2
          # Conf.gov: Q71
          # job_over_immigrant: Q34
          # immigrant: Q121
          # Conf.church: Q64
          # God: Q164, divided by 2
          # homosexual: Q36
          # abortion: Q184, divided by 2




# 3-A. Forced to have two subgroups, Figure 5-(a) and Table A.11

USA7LPA %>%
  estimate_profiles(2) %>%
  plot_profiles(sd=FALSE,
                add_line = TRUE,
                alpha_range = c(0, 0.1),
                rawdata = FALSE)+
  labs(x="Survey Questions", y="Answers")

usaresult2 <- USA7LPA %>%
  estimate_profiles(2)



# 3-B. Figure A.12 and Table A.10

USA_np <- USA7LPA %>%
  estimate_profiles(1:9) %>%
  compare_solutions(statistics=c("AIC", "BIC"))

USA.fits <- as.data.frame(USA_np$fits)

plot(USA.fits$Classes, USA.fits$AIC, type="l", col="blue", 
     ylab="AIC and BIC", xlab="Number of Profile")
lines(USA.fits$Classes, USA.fits$BIC)
legend("bottomleft", c("AIC", "BIC"), fill=c("blue", "black"))

USA7LPA %>%
  #single_imputation() %>%
  #scale() %>%
  estimate_profiles(6) %>%
  plot_profiles(sd=FALSE,
                add_line = TRUE,
                alpha_range = c(0, 0.1),
                rawdata = FALSE)+
  labs(x="Survey Questions", y="Answers")

USA7_res <- USA7LPA5 %>%
  estimate_profiles(6)












