### Name: Analysis for the UQMP follow up study
### Last modified: 16/03/2023
### Authors: Benedikt Iberl



### setting options

options(scipen = 999)
set.seed(123)


### data preparation

# set working directory
# setwd("...")

# read data
dat <- read.csv("appendix_data.csv", header = TRUE, sep = ";", skipNul = TRUE)

# exclude irrelevant columns
dat <- dat[-c(2, 3, 4, 5, 33)]

# rename columns
names(dat) <- c("id", "start", "attcheck", "eyecol.uqm", "eyecol.fnl",
		"pre.rsi", "part.conf", "del.code", "drivecheck", "alk.uqm",
		"anonym", "sensitive", "gender", "age", "education", 
		"bil.res.ID", "time1", "time2", "time3", "time4", "time5", 
		"time6", "time7", "time8", "time9", "time10", "time11", 
		"timesum", "lastdata", "finished", "qviewer", "lastpage",
		"maxpage", "missing", "missrel", "time.rsi",
		"deg.time")

# transforming "1 and 2"-variables into indicator variables (0 and 1)
qst <- c("eyecol.uqm", "eyecol.fnl", "alk.uqm")
for(i in qst){
	dat[i] <- dat[i]*(-1)+2
}

dat$eyecol.uqm[dat$eyecol.uqm == -1] <- NA
dat$eyecol.fnl[dat$eyecol.fnl == -1] <- NA

# transforming demographic variables into factors
trf.gender <- c("w", "m", "d")

for(i in 1:length(trf.gender)){
	dat$gender[dat$gender == i] <- trf.gender[i]
}
dat$gender <- as.factor(dat$gender)

trf.education <- c("none", "grundhaupt", "real", "abitur", "ausbildung", "uni")

for(i in 1:length(trf.education)){
	dat$education[dat$education == i] <- trf.education[i]
}
dat$education <- as.factor(dat$education)



### data exclusion

# exclude incomplete data
dat <- subset(dat, maxpage == 11)

# attention test: exclude anyone who didn't check the correct option ("London")
dat <- subset(dat, attcheck == 4)  # should be 0 because of the screening-out

# RSI (during sampling)
dat <- subset(dat, !is.na(eyecol.fnl))

# exclude 2 participants who participated after stopping rule
dat <- dat[1:(nrow(dat)-2),]



### sample description

## demographics
# age statistics
age.sum <- summary(dat$age)
age.sd  <- sd(dat$age, na.rm=TRUE)

# age quota
dat$quoteage <- character(nrow(dat))
dat$quoteage[dat$age < 30] <- "18-29"
dat$quoteage[dat$age >= 30 & dat$age < 40] <- "30-39"
dat$quoteage[dat$age >= 40 & dat$age < 50] <- "40-49"
dat$quoteage[dat$age >= 50 & dat$age < 60] <- "50-59"
dat$quoteage[dat$age >= 60] <- "60+"

age.perc <- round(table(dat$quoteage)/sum(table(dat$quoteage))*100, 1)


# sex statistics
sex.tab  <- table(dat$gender)
sex.perc <- round(table(dat$gender)/sum(table(dat$gender))*100, 1)

# education statistics
dat$education <- factor(dat$education,
			levels = c("none", "grundhaupt", "real", "abitur",
				   "ausbildung", "uni"),
			labels = c("none", "grundhaupt", "real",
				   "abitur", "ausbildung", "uni"))
edu.tab  <- table(dat$education)
edu.perc <- round(table(dat$education)/sum(table(dat$education))*100, 1)


## completion time
# completion time statistics for all participants
time.sum <- summary(dat$timesum)
time.sd  <- sd(dat$timesum)



### data analysis: questionnaire impression

# anonymity
anon.sum <- summary(dat$anonym)
anon.sd  <- sd(dat$anonym, na.rm=TRUE)

# sensitivity
sens.sum <- summary(dat$sensitive)
sens.sd  <- sd(dat$sensitive, na.rm=TRUE)



### data analysis: eye color

# N = 579
# H0: blue eye prev <= .387 (c_s = 265)
# H1: blue eye prev >= .489 (N - c_s + 1 = 315)

# UQM prevalence eye color
p <- 245.25/365.25		# prob. of getting sensitive question
q <- 181.25/365.25		# prob. of answering "yes" to neutral question
c <- 265
N <- nrow(dat)
gamma.eye <- (c - 1)/(N - 1)

UQM_fct  <- function(gamma, p, q) {
  pi_s   <- (gamma - (1-p)*q) / p
  print(pi_s)
}

UQM_var  <- function(gamma, p, n) {
  var_pi <- (gamma * (1-gamma)) / (n*p^2)
  print(var_pi)
}

UQM_ci   <- function(gamma, p, q, n) {
  pi_s   <- (gamma - (1-p)*q) / p
  var_pi <- (gamma * (1-gamma)) / (n*p^2)
  ci     <- c(pi_s - qnorm(0.975)*sqrt(var_pi),
	      pi_s + qnorm(0.975)*sqrt(var_pi))
  print(ci)
}

pi_rrt  <- UQM_fct(gamma.eye, p, q)
var_rrt <- UQM_var(gamma.eye, p, sum(table(dat$eyecol.uqm)))
ci_rrt  <- UQM_ci(gamma.eye, p, q, sum(table(dat$eyecol.uqm)))
