rm(list=ls())
load("R-DQ.RData")

# The data set is...
# - di:      Quality indicators (per study), computed via {R-Indicators.R}, one data.frame per study
# - dq:      Original data (studies 1/2) or conditions (study 3/4) due to non-author data ownership
# - QInds:   Description for the quality indicator variables (includes some varibales that have been tried but not reported)

# This script will write a file R-results.csv includingwith all the data (and more) reported in the paper

## Quality per indicator and study
# R² (max. performace), Sensitivity (Hit Rate), Sensitivity (valids only), LR+, LR+ (valids)

if (!require("rms")) {
    install.packages("rms")
    library(rms)
}
if (!require("pROC")) {
  install.packages("pROC")
  library(pROC)
}
source("R-Indicators.R")

# Stats

for (dc in names(dq)) {
  cat(paste("\n\n",dc,"\n"))
  print(
    table(dq[[dc]]$condition)
  )
  print(
    table(di[[dc]]$condition)
  )
}


res = c()
for (dc in names(dq)) {
    cat("\n", dc, "\n")
    assn = dq[[dc]]$condition
    indi = di[[dc]]
    res[[dc]] = NULL
    vars = names(QInds)[names(QInds) %in% names(indi)]
    for (ind in vars) {
        if (sum(!is.na(indi[[ind]])) == 0) {
            cat(paste("No data for indicator", ind, "\n"))
            res[[dc]] = rbind(res[[dc]], rep(NA, dim(res[[dc]])[2]))
            next
        }
        
        if (dc %in% c("S1","S2")) {
            res[[dc]] = rbind(res[[dc]], dcPerformance(assn, indi[[ind]], QInds[[ind]], 0))
        } else {
            # Manipulation check in S2/S4 removed people in CG who did not seriously complete the questionnaire
            res[[dc]] = rbind(res[[dc]], dcPerformance(assn, indi[[ind]], QInds[[ind]], 0))
        }
    }
    res[[dc]] = as.data.frame(res[[dc]])
    row.names(res[[dc]]) = vars
}
rm(dc, ind, vars, indi, assn)

outfile = "R-results.csv"
write.table(c("Study S1"), col.names=F, row.names=F, file=outfile)
write.table(t(c("Indicator", colnames(res$S1))), sep="\t", col.names=F, row.names=F, file=outfile, append=T)
write.table(res$S1, file=outfile, sep="\t", col.names=F, append=T)

write.table(c("", "Study S2"), col.names=F, row.names=F, file=outfile, append=T)
write.table(t(c("Indicator", colnames(res$S2))), sep="\t", col.names=F, row.names=F, file=outfile, append=T)
write.table(res$S2, file=outfile, sep="\t", col.names=F, append=T)

write.table(c("", "Study S3"), col.names=F, row.names=F, file=outfile, append=T)
write.table(t(c("Indicator", colnames(res$S3))), sep="\t", col.names=F, row.names=F, file=outfile, append=T)
write.table(res$S3, file=outfile, sep="\t", col.names=F, append=T)

write.table(c("", "Study S4"), col.names=F, row.names=F, file=outfile, append=T)
write.table(t(c("Indicator", colnames(res$S4))), sep="\t", col.names=F, row.names=F, file=outfile, append=T)
write.table(res$S4, file=outfile, sep="\t", col.names=F, append=T)

write.table(c("", "Study S5"), col.names=F, row.names=F, file=outfile, append=T)
write.table(t(c("Indicator", colnames(res$S5))), sep="\t", col.names=F, row.names=F, file=outfile, append=T)
write.table(res$S5, file=outfile, sep="\t", col.names=F, append=T)


# Study 1 separated by subconditions
for (dc2 in c("careless", "faking", "rushing")) {
  cat("\n", dc2, " (n_EG = ", sum(dq$S1$condition2 == dc2, na.rm=T), ")\n", sep="")
  caselist = (dq$S1$condition == "CG" | dq$S1$condition2 == dc2)
  assn = dq$S1[caselist, "condition"]
  indi = di$S1[caselist, ]
  resName = paste("S1", dc2, sep="_")
  res[[resName]] = NULL
  vars = names(QInds)[names(QInds) %in% names(indi)]
  for (ind in vars) {
    res[[resName]] = rbind(res[[resName]], dcPerformance(assn, indi[[ind]], QInds[[ind]], 0))
  }
  res[[resName]] = as.data.frame(res[[resName]])
  row.names(res[[resName]]) = vars
}
rm(dc2, ind, vars, indi, assn, resName, caselist)

write.table(c("", "Study S1 (careless)"), col.names=F, row.names=F, file=outfile, append=T)
write.table(t(c("Indicator", colnames(res$S1_careless))), sep="\t", col.names=F, row.names=F, file=outfile, append=T)
write.table(res$S1_careless, file=outfile, sep="\t", col.names=F, append=T)

write.table(c("", "Study S1 (faking)"), col.names=F, row.names=F, file=outfile, append=T)
write.table(t(c("Indicator", colnames(res$S1_faking))), sep="\t", col.names=F, row.names=F, file=outfile, append=T)
write.table(res$S1_faking, file=outfile, sep="\t", col.names=F, append=T)

write.table(c("", "Study S1 (rushing)"), col.names=F, row.names=F, file=outfile, append=T)
write.table(t(c("Indicator", colnames(res$S1_rushing))), sep="\t", col.names=F, row.names=F, file=outfile, append=T)
write.table(res$S1_rushing, file=outfile, sep="\t", col.names=F, append=T)


# Study 2 separated by subconditions
for (dc2 in c("careless", "faking","rushing")) {
  cat("\n", dc2, " (n_EG = ", sum(dq$S2$condition2 == dc2, na.rm=T), ")\n", sep="")
  caselist = (dq$S2$condition == "CG" | dq$S2$condition2 == dc2)
  assn = dq$S2[caselist, "condition"]
  indi = di$S2[caselist, ]
  resName = paste("S2", dc2, sep="_")
  res[[resName]] = NULL
  vars = names(QInds)[names(QInds) %in% names(indi)]
  for (ind in vars) {
    res[[resName]] = rbind(res[[resName]], dcPerformance(assn, indi[[ind]], QInds[[ind]], 0))
  }
  res[[resName]] = as.data.frame(res[[resName]])
  row.names(res[[resName]]) = vars
}
rm(dc2, ind, vars, indi, assn, resName, caselist)

write.table(c("", "Study S2 (careless)"), col.names=F, row.names=F, file=outfile, append=T)
write.table(t(c("Indicator", colnames(res$S2_careless))), sep="\t", col.names=F, row.names=F, file=outfile, append=T)
write.table(res$S2_careless, file=outfile, sep="\t", col.names=F, append=T)

write.table(c("", "Study S2 (faking)"), col.names=F, row.names=F, file=outfile, append=T)
write.table(t(c("Indicator", colnames(res$S2_faking))), sep="\t", col.names=F, row.names=F, file=outfile, append=T)
write.table(res$S2_faking, file=outfile, sep="\t", col.names=F, append=T)

write.table(c("", "Study S2 (rushing)"), col.names=F, row.names=F, file=outfile, append=T)
write.table(t(c("Indicator", colnames(res$S2_rushing))), sep="\t", col.names=F, row.names=F, file=outfile, append=T)
write.table(res$S2_rushing, file=outfile, sep="\t", col.names=F, append=T)


# Study 3 showed an attention cue or not

table(dq$S3$DQ02, dq$S3$condition, useNA="always")

#         CG  EG <NA> 
#   1    480   0    0 <- no message
#   2    479   0    0 <- attentive message
#   <NA>   0 368    0
#

# I the interest of simplicity, only "attentive" v. "careless" is compared
# Remove those from the control group who did the questionnaire before in the experimental group
for (dc2 in c(1,2)) {
  caselist = ((dq$S3$condition == "EG") | (dq$S3$DQ02 == dc2))
  cat("\n", dc2, " (n_EG = ", sum(dq$S3$condition == "EG"), ", n_CG = ", sum(dq$S3$condition == "CG"),")\n", sep="")
  print(table(dq$S3[caselist, "condition"]))
  assn = dq$S3[caselist, "condition"]
  indi = di$S3[caselist, ]
  resName = paste("S3", dc2, sep="_")
  res[[resName]] = NULL
  vars = names(QInds)[names(QInds) %in% names(indi)]
  for (ind in vars) {
    res[[resName]] = rbind(res[[resName]], dcPerformance(assn, indi[[ind]], QInds[[ind]], 0))
  }
  res[[resName]] = as.data.frame(res[[resName]])
  row.names(res[[resName]]) = vars
}
rm(dc2, ind, vars, indi, assn, resName, caselist)

write.table(c("", "Study S3 (v. simple)"), col.names=F, row.names=F, file=outfile, append=T)
write.table(t(c("Indicator", colnames(res$S3_1))), sep="\t", col.names=F, row.names=F, file=outfile, append=T)
write.table(res$S3_1, file=outfile, sep="\t", col.names=F, append=T)

write.table(c("", "Study S3 (v. attentive)"), col.names=F, row.names=F, file=outfile, append=T)
write.table(t(c("Indicator", colnames(res$S3_2))), sep="\t", col.names=F, row.names=F, file=outfile, append=T)
write.table(res$S3_2, file=outfile, sep="\t", col.names=F, append=T)



table("bogus" = (di$S4$qbogs < -2), "speed" = (di$S4$qfast < 1.48408449720374), useNA="ifany")
cor(di$S4$qbogs, di$S4$qfast, use="pairwise.complete.obs") # 0.8379092

for (dc2 in c("careless", "faking", "rushing")) {
  cat("\n", dc2, " (n_EG = ", sum(dq$S1$condition2 == dc2, na.rm=T), ")\n", sep="")
}

# [S3] IMC summary result
table(dq$S3$imcP, dq$S3$condition, useNA="ifany")
table(dq$S3$imcS, dq$S3$condition, useNA="ifany")

table(di$S3$qfast >= 1.413, dq$S3$condition, useNA="ifany")


# [S4] Sub-experiment

table(dq$S4$EX02_01) # 2 = bogus, 3 = instructed response, 4 = IMC
table(dq$S4$EX02_02) # 2 = Um eine hohe Datenqualität zu gewährleisten, verwenden wir in dieser Umfrage verschiedene Methoden um festzustellen ob die Fragen sorgfältig beantwortet wurden.
table(dq$S4$EX02_03) # 2 = EG Datenqualität
table(dq$S4$EX02_04) # Verbale Beschreibung

table(dq$S4$EX02_01, dq$S4$EX02_03) # Keine Kontrollgruppe 1 in der EG



# Crosscorrelation of indicators in Study 2

tmp = di$S2[c("qmis2","qmisr","qmidk","qstlg","qsdba","qptal","qptba","qsdst","qmdst","qcons","qrgrs","qtime","qtimo","qfast")]
tmpC = cor(tmp, method = "spearman", use="pairwise.complete.obs")

write.table(c("", "Study S2 Indicator Cross Correlations"), col.names=F, row.names=F, file=outfile, append=T)
write.table(tmpC, file=outfile, sep="\t", col.names=F, append=T)

tmp = di$S2[c("qmis2","qmisr","qsdba","qptba","qsdst","qmdst","qcons","qrgrs","qtime","qfast")]
tmpC = cor(tmp, method = "spearman", use="pairwise.complete.obs")

write.table(c("", "Study S2 Indicator Cross Correlations"), col.names=F, row.names=F, file=outfile, append=T)
write.table(tmpC, file=outfile, sep="\t", col.names=F, append=T)

nrow(di$S2) # N=10901
