####################################################################################################
# ML evaluation
####################################################################################################

# Load data
load("DATA/AllModelsPaper.RData")

# Models as list
names(models)

# Going though models individually
model_test <- models$log # models$glmnet # models$tree # models$knn # models$rf # models$gbm #  models$xgb 

# prepare performance statistics at optimal threshold
roc_obj <- roc(validation$ausfall, predict(model_test, validation, type = "prob")[, "X1"])
coords(roc_obj, "best", "threshold")
optCut <- (coords(roc_obj, "best", "threshold")[1,1])
classes <- c("X0", "X1")
test_set <- data.frame(obs = validation$ausfall,
                       X1 =  predict(model_test, validation, type = "prob")["X1"])
test_set$X0 <- 1 - test_set$X1
test_set$pred <- factor(ifelse(test_set$X1 >= optCut, "X1", "X0"))

## performance statistics: At optimal threshold
twoClassSummary(test_set, lev = classes) # ROC AUC, Sen, Spec
confusionMatrix(test_set$pred, test_set$obs) # Balanced Accuracy

# prepare performance statistics at threshold at median
MedCut <- median(predict(model_test, validation, type = "prob")[, "X1"])
test_set$pred <- factor(ifelse(test_set$X1 >= MedCut, "X1", "X0"))

## performance statistics: At threshold at median
twoClassSummary(test_set, lev = classes) # ROC AUC, Sen, Spec
prSummary(test_set, lev = classes) # Balanced Accuracy
confusionMatrix(test_set$pred, test_set$obs) # Precision
