# Appendix: A priori simulations

### Study on UQM x Poisson Model
### Sample size simulation 
### Simulation code

# Assumptions:
# lambda = 1, PI = .3 (estimation based on Iberl (2021) and assuming lower
#	prevalence due to more restrictive question in this study)
# time intervals: t1 = 0.25 month (1 Week), t2 = 1 month, t3 = 6 month,
#	t4 = 12 month

library(bbmle)
lam_s <- 1	# lambda
PI_s  <- 0.3	# PI

p <- 0.671	# design parameter, prob. of answering the sensitive question
q <- 0.496	# design parameter, prob. for "yes"-answer to neutral question


# predicted prevalence curve
curve(PI_s*(1-exp(-lam_s*x)) + (1-p)*q, ylim = c(0, 1), xlim = c(0, 12))


lL <- function(lam, PI){
  pyes1 <- p*PI*(1-exp(-lam*t1))+(1-p)*q		# P(yes | t1)
  pyes2 <- p*PI*(1-exp(-lam*t2))+(1-p)*q		# P(yes | t2)
  pyes3 <- p*PI*(1-exp(-lam*t3))+(1-p)*q		# P(yes | t3)
  pyes4 <- p*PI*(1-exp(-lam*t4))+(1-p)*q		# P(yes | t4)
  
  lL1 <- a1*log(pyes1) + b1*log(1-pyes1)		# logLik for group 1
  lL2 <- a2*log(pyes2) + b2*log(1-pyes2)		# logLik for group 2
  lL3 <- a3*log(pyes3) + b3*log(1-pyes3)		# logLik for group 3
  lL4 <- a4*log(pyes4) + b4*log(1-pyes4)		# logLik for group 4
  
  lL <- -(lL1+lL2+lL3+lL4)		# negative log-likelihood function
}

lim <- 1e-15				# limit at 0
up_lim_lam <- 7				# upper limit for lambda

t1 <- 0.25				# time points
t2 <- 1
t3 <- 6
t4 <- 12

n <- 500				# sample size per group (varying)

m <- 300				# number of simulations


lam_est <- numeric(m)			# estimate for lambda
lam_sd <- numeric(m)			# SD of lambda

PI_est <- numeric(m)			# estimate for PI
PI_sd <- numeric(m)			# SD of PI

p_sig <- numeric(m)			# variable for significance test


for(i in 1:m){
  # yes-answers for each group (simulated with poisson-distribution)
  a1 <- sum(rpois(round(PI_s*n*p), lam_s*t1) > 0) + (1-p)*q*n	
  a2 <- sum(rpois(round(PI_s*n*p), lam_s*t2) > 0) + (1-p)*q*n 
  a3 <- sum(rpois(round(PI_s*n*p), lam_s*t3) > 0) + (1-p)*q*n
  a4 <- sum(rpois(round(PI_s*n*p), lam_s*t4) > 0) + (1-p)*q*n
  
  # no-answers for each group
  b1 <- n-a1
  b2 <- n-a2
  b3 <- n-a3
  b4 <- n-a4
  
  # ML-Estimation via own function lL()
  ML <- mle2(lL, start = list(lam = .1, PI = .1),
             method = 'L-BFGS-B',
             lower = c(lim, lim), upper = c((up_lim_lam), (1-lim)))
  
  # CI
  lam_est[i] <- coef(ML)[1]			# estimate for lambda
  lam_sd[i] <- sqrt(vcov(ML)[1,1])		# SD of lambda
  
  PI_est[i] <- coef(ML)[2]			# estimate for PI
  PI_sd[i] <- sqrt(vcov(ML)[2,2])		# SD of PI
  
}

# print summary of results
res <- paste0("number of simulations: ", m,
              "\nN: ", n,
              "\nmean lambda: ", round(mean(lam_est), 3),
	      ", mean standard deviation: ", 
              round(mean(lam_sd, na.rm = TRUE), 3),
              "\nmean PI: ", round(mean(PI_est), 3),
	      ", mean standard deviation: ", 
              round(mean(PI_sd, na.rm = TRUE), 3))
cat(res)


### Results for different sample sizes
# N: 100
# mean lambda: 1.026, mean standard deviation: 0.641
# mean PI: 0.298, mean standard deviation: 0.052

# N: 200
# mean lambda: 1.009, mean standard deviation: 0.434
# mean PI: 0.298, mean standard deviation: 0.036

# N: 300
# mean lambda: 1.014, mean standard deviation: 0.353
# mean PI: 0.298, mean standard deviation: 0.03

# N: 400
# mean lambda: 1.006, mean standard deviation: 0.3
# mean PI: 0.302, mean standard deviation: 0.026

# N: 500
# mean lambda: 1.013, mean standard deviation: 0.271
# mean PI: 0.301, mean standard deviation: 0.023

# N: 600
# mean lambda: 1.001, mean standard deviation: 0.244
# mean PI: 0.301, mean standard deviation: 0.021

# N: 700
# mean lambda: 1.004, mean standard deviation: 0.227
# mean PI: 0.3, mean standard deviation: 0.019

# N: 800
# mean lambda: 1.005, mean standard deviation: 0.212
# mean PI: 0.3, mean standard deviation: 0.018
