################################################################################
# Code to process European census data and calculate Duncan Dissimilarity Index
#
# Variable: Marital status
#
#
# Author: Jan-Lucas Schanze
# Date:   07.10.2022
# Paper: (SRM submission #7830)
################################################################################

### Preparing the data preparation and analysis

# 1. Store raw data (csv files) in a folder     -> directory will be defined as input below
# 2. Create folder for data outputs and graphs  -> directory will be defined as output below

### OVERVIEW OF VARIABLES

# gender:	        0 (male)
#                 1 (female)

# housing	        1 (living in a private household)
#                 2 (institutional household)
#                 3 (primary homeless)
#                 4 (Not stated)

# education	      0 (no formal education) -> LOW
#                 1 (ISCED 1)             -> LOW
#                 2 (ISCED 2)             -> LOW
#                 3 (ISCED 3)             -> MEDIUM
#                 4 (ISCED 4)             -> MEDIUM
#                 5 (ISCED 5)             -> HIGH
#                 6 (ISCED 6)             -> HIGH  
#                 7 (Not stated)

### Source for classification of education: https://ec.europa.eu/eurostat/statistics-explained/index.php/International_Standard_Classification_of_Education_(ISCED)#Correspondence_between_ISCED_2011_and_ISCED_1997


######## Installing packages (required once)

#install.packages("foreign")
#install.packages("haven")
#install.packages("Rcmdr")
#install.packages("readr")
#install.packages("lattice")
#install.packages("ggplot2")
#install.packages("tidyverse")
#install.packages("beanplot")
#install.packages("DAAG")
#install.packages("visreg")
#install.packages("HSAUR")
#install.packages("lme4")
#install.packages("readxl")
#install.packages("dplyr")
#install.packages("reshape2")
#install.packages(sjPlot)
#install.packages(mlmRev)
#install.packages("ggmosaic")
#install.packages("gridExtra")

###### Loading packages
library(foreign)
library(haven)
library(Rcmdr)
library(readr)
library(lattice)
library(ggplot2)
library(tidyverse)
library(beanplot)
library(DAAG)
library(visreg)
library(HSAUR)
library(lme4)
library(readxl)
library(dplyr)
library(reshape2)
library(ggmosaic)
library(gridExtra)

# Setting paths for analysis
getwd()

main.path <- "C:\\Users\\schanzjs\\Desktop\\source files\\data"  # change accordingly
save.path <- "C:\\Users\\schanzjs\\Desktop\\source files\\output" # change accordingly 

setwd(main.path)

# Loading dataset
hyper_edu <- read_xlsx("censushub_HC02_age_gender_education.xlsx", sheet = "Data") # Loading dataset with labels
hyper_edu[,1:10] # Have a look at columns 1 through 11

## Dropping countries with mv on educaton
hyper_edu <- subset(hyper_edu, cntry!="Denmark" & cntry!="Finland" & cntry!="Ireland" & cntry!="Italy" & cntry!="Poland")
## Dropping age cohort without information (younger than 15)
hyper_edu <- hyper_edu[-c(5)]
             
             
# Recoding missing values for all variables
for(i in 1:ncol(hyper_edu)){
  hyper_edu[hyper_edu[,i]=="NA",i] <-  NA
}

# Define as numeric
hyper_edu$gender <- as.numeric(as.character(hyper_edu$gender))
hyper_edu$housing <- as.numeric(as.character(hyper_edu$housing))
hyper_edu$education <- as.numeric(as.character(hyper_edu$education))

hyper_edu$age1529 <- as.numeric(as.character(hyper_edu$age1529))
hyper_edu$age3049 <- as.numeric(as.character(hyper_edu$age3049))
hyper_edu$age5064 <- as.numeric(as.character(hyper_edu$age5064))
hyper_edu$age6584 <- as.numeric(as.character(hyper_edu$age6584))
hyper_edu$age85plus <- as.numeric(as.character(hyper_edu$age85plus))

# AGGREGATING EDUCATION CATEGORIES

# Low education (0 + 1 + 2)
    hyper_low <- subset(hyper_edu, education==0 | education==1 | education==2)
    
    hyper_low_agg <- aggregate(cbind(age1529,
                                      age3049,
                                      age5064,
                                      age6584,
                                      age85plus) ~ cntry + gender + housing, data = hyper_low, FUN = function(x) c(gama = sum(x)))
    
    hyper_low_agg$education <- c(1) # generate variable education == 1 (low)

# Medium education (3 + 4)
    hyper_medium <- subset(hyper_edu, education==3 | education==4)
    
    hyper_medium_agg <- aggregate(cbind(age1529,
                                     age3049,
                                     age5064,
                                     age6584,
                                     age85plus) ~ cntry + gender + housing, data = hyper_medium, FUN = function(x) c(gama = sum(x)))
    
    hyper_medium_agg$education <- c(2) # generate variable education == 2 (medium)

# High education (5 + 6)
    hyper_high <- subset(hyper_edu, education==5 | education==6)
    
    hyper_high_agg <- aggregate(cbind(age1529,
                                        age3049,
                                        age5064,
                                        age6584,
                                        age85plus) ~ cntry + gender + housing, data = hyper_high, FUN = function(x) c(gama = sum(x)))
    
    hyper_high_agg$education <- c(3) # generate variable education == 3 (high)

## APPENDING
hyper_edu <- subset(hyper_edu, education==7)
hyper_edu <- rbind(hyper_edu, hyper_low_agg, hyper_medium_agg, hyper_high_agg)
    

## AGGREGATING NUMBERS FOR THE TOTAL 

# Aggregating all countries 
hyper_edu_eu <- hyper_edu 
                       
### check whether list of countries is correct
prop.table(table(hyper_edu_eu$cntry, hyper_edu_eu$gender), 2)

hyper_edu_eu_agg <- aggregate(cbind(age1529,
                                    age3049,
                                    age5064,
                                    age6584,
                                    age85plus) ~ gender + housing + education, data = hyper_edu_eu, FUN = function(x) c(gama = sum(x)))

hyper_edu_eu_agg$cntry <- c("Europe")

## Appending aggregated data
hyper_edu <- rbind(hyper_edu, hyper_edu_eu_agg)

# cleaning up
rm("hyper_edu_eu", "hyper_edu_eu_agg")


### dropping housing == 3 and housing == 4
hyper_edu <- subset(hyper_edu, (housing == 1 | housing == 2)) # dropping housing types not required

### Totals for education (housing collapsed)
totals_edu <- aggregate(cbind(age1529,
                              age3049,
                              age5064,
                              age6584,
                              age85plus) ~ cntry + gender + education, data = hyper_edu, FUN = function(x) c(gama = sum(x)))

colnames(totals_edu)[4] <- "totedu1529"
colnames(totals_edu)[5] <- "totedu3049"
colnames(totals_edu)[6] <- "totedu5064"
colnames(totals_edu)[7] <- "totedu6584"
colnames(totals_edu)[8] <- "totedu85plus"

### TOTALS by gender 
totals <- aggregate(cbind(age1529,
                          age3049,
                          age5064,
                          age6584,
                          age85plus) ~ cntry + gender, data = hyper_edu, FUN = function(x) c(gama = sum(x)))

## renaming variables
colnames(totals)[3] <- "total1529"
colnames(totals)[4] <- "total3049"
colnames(totals)[5] <- "total5064"
colnames(totals)[6] <- "total6584"
colnames(totals)[7] <- "total85plus"

## Total for entire population (by gender)
totals$total_allage <- totals$total1529 + totals$total3049 + totals$total5064 + totals$total6584 + totals$total85plus

### TOTALS by gender and housing
totals_within <- aggregate(cbind(age1529,
                                 age3049,
                                 age5064,
                                 age6584,
                                 age85plus) ~ cntry + gender + housing, data = hyper_edu, FUN = function(x) c(gama = sum(x)))

## renaming variables
colnames(totals_within)[4] <- "totwithin1529"
colnames(totals_within)[5] <- "totwithin3049"
colnames(totals_within)[6] <- "totwithin5064"
colnames(totals_within)[7] <- "totwithin6584"
colnames(totals_within)[8] <- "totwithin85plus"

## Total for entire population (by gender and housing)
totals_within$totwithin_allage <- totals_within$totwithin1529 + totals_within$totwithin3049 + totals_within$totwithin5064 + totals_within$totwithin6584 + totals_within$totwithin85plus

# generating a data frame with information from the three data frames
edu_fin1 <- merge(hyper_edu, totals, by=c("cntry","gender"))                    # Step 1
edu_fin2 <- merge(edu_fin1, totals_edu, by=c("cntry","gender", "education"))    # Step 2
edu_final <- merge(edu_fin2, totals_within, by=c("cntry","gender", "housing"))  # Step 3


############################
## CALCULATING SHARES
############################

# calculation: Share within age & gender cohorts, across all types of housing 
edu_final$share1529 <- round((edu_final$age1529/edu_final$total1529), digits = 4)
edu_final$share3049 <- round((edu_final$age3049/edu_final$total3049), digits = 4)
edu_final$share5064 <- round((edu_final$age5064/edu_final$total5064), digits = 4)
edu_final$share6584 <- round((edu_final$age6584/edu_final$total6584), digits = 4)
edu_final$share85plus <- round((edu_final$age85plus/edu_final$total85plus), digits = 4)

# calculation: Share within age & gender cohorts, types of housing separated 
edu_final$sharewithin1529 <- round((edu_final$age1529/edu_final$totwithin1529), digits = 4)
edu_final$sharewithin3049 <- round((edu_final$age3049/edu_final$totwithin3049), digits = 4)
edu_final$sharewithin5064 <- round((edu_final$age5064/edu_final$totwithin5064), digits = 4)
edu_final$sharewithin6584 <- round((edu_final$age6584/edu_final$totwithin6584), digits = 4)
edu_final$sharewithin85plus <- round((edu_final$age85plus/edu_final$totwithin85plus), digits = 4)

# calculation: Total share within age & gender cohorts, types of housing collapsed (for DID) 
edu_final$shareedu1529 <- round((edu_final$totedu1529/edu_final$total1529), digits = 4)
edu_final$shareedu3049 <- round((edu_final$totedu3049/edu_final$total3049), digits = 4)
edu_final$shareedu5064 <- round((edu_final$totedu5064/edu_final$total5064), digits = 4)
edu_final$shareedu6584 <- round((edu_final$totedu6584/edu_final$total6584), digits = 4)
edu_final$shareedu85plus <- round((edu_final$totedu85plus/edu_final$total85plus), digits = 4)

# Collapsing GENDERS for joint DID
### 1a: collapsing genders while leaving housing and education separate 
edu_did_1a <- aggregate(cbind(age1529,
                              age3049,
                              age5064,
                              age6584,
                              age85plus) ~ cntry + education + housing, data = edu_final, FUN = function(x) c(gama = sum(x)))

### 1b: collapsing genders and education while leaving housing separate
edu_did_1b <- aggregate(cbind(age1529,
                              age3049,
                              age5064,
                              age6584,
                              age85plus) ~ cntry + housing, data = edu_final, FUN = function(x) c(gama = sum(x)))

colnames(edu_did_1b)[3] <- "totwithin1529_both"
colnames(edu_did_1b)[4] <- "totwithin3049_both"
colnames(edu_did_1b)[5] <- "totwithin5064_both"
colnames(edu_did_1b)[6] <- "totwithin6584_both"
colnames(edu_did_1b)[7] <- "totwithin85plus_both"

### 2a: Totals for education, housing collapsed 
edu_tot_both <- aggregate(cbind(age1529,
                                age3049,
                                age5064,
                                age6584,
                                age85plus) ~ cntry + education, data = edu_final, FUN = function(x) c(gama = sum(x)))

colnames(edu_tot_both)[3] <- "totedu1529_both"
colnames(edu_tot_both)[4] <- "totedu3049_both"
colnames(edu_tot_both)[5] <- "totedu5064_both"
colnames(edu_tot_both)[6] <- "totedu6584_both"
colnames(edu_tot_both)[7] <- "totedu85plus_both"

### 2b: Calculating the total value of residents in respective age cohorts
totals_both <- aggregate(cbind(total1529,
                               total3049,
                               total5064,
                               total6584,
                               total85plus) ~ cntry, data = totals, FUN = function(x) c(gama = sum(x)))

# generating a dataframe with information from the three data frames
edu_temp1 <- merge(edu_did_1a, edu_did_1b, by=c("cntry","housing"))     # Step 1
edu_temp2 <- merge(edu_temp1, edu_tot_both, by=c("cntry","education"))  # Step 2
edu_did_both <- merge(edu_temp2, totals_both, by=c("cntry"))            # Step 3

## calculating the share of education within age cohorts / types of housing
edu_did_both$share_edu1529 <- round((edu_did_both$age1529/edu_did_both$totwithin1529_both), digits = 4)
edu_did_both$share_edu3049 <- round((edu_did_both$age3049/edu_did_both$totwithin3049_both), digits = 4)
edu_did_both$share_edu5064 <- round((edu_did_both$age5064/edu_did_both$totwithin5064_both), digits = 4)
edu_did_both$share_edu6584 <- round((edu_did_both$age6584/edu_did_both$totwithin6584_both), digits = 4)
edu_did_both$share_edu85plus <- round((edu_did_both$age85plus/edu_did_both$totwithin85plus_both), digits = 4)

# calculation: Total share of education within age cohorts, gender & types of housing collapsed (for DID) 
edu_did_both$fullshare1529 <- round((edu_did_both$totedu1529_both/edu_did_both$total1529), digits = 4)
edu_did_both$fullshare3049 <- round((edu_did_both$totedu3049_both/edu_did_both$total3049), digits = 4)
edu_did_both$fullshare5064 <- round((edu_did_both$totedu5064_both/edu_did_both$total5064), digits = 4)
edu_did_both$fullshare6584 <- round((edu_did_both$totedu6584_both/edu_did_both$total6584), digits = 4)
edu_did_both$fullshare85plus <- round((edu_did_both$totedu85plus_both/edu_did_both$total85plus), digits = 4)

## cleaning up
rm("edu_fin1", "edu_fin2", "edu_did_1a", "edu_did_1b", "edu_temp1", "edu_temp2")


### Calculating Duncan's Index of Dissimilarity (DID) -> SEPARATELY for the 2 genders

# 1) Subtract share in private households from total population
edu_final$duncan_edu1529    <- abs(edu_final$shareedu1529-edu_final$sharewithin1529)
edu_final$duncan_edu3049    <- abs(edu_final$shareedu3049-edu_final$sharewithin3049)
edu_final$duncan_edu5064    <- abs(edu_final$shareedu5064-edu_final$sharewithin5064) 
edu_final$duncan_edu6584    <- abs(edu_final$shareedu6584-edu_final$sharewithin6584)   
edu_final$duncan_edu85plus  <- abs(edu_final$shareedu85plus-edu_final$sharewithin85plus)   

# 2) Calculate sum over all categories of the variable
duncan_edu <- aggregate(cbind(duncan_edu1529,
                              duncan_edu3049,
                              duncan_edu5064,
                              duncan_edu6584,
                              duncan_edu85plus) ~ cntry + gender + housing, data = edu_final, FUN = function(x) c(gama = sum(x)))

# 3) Divide by 2 
duncan_edu$duncan_edu1529   <- (duncan_edu$duncan_edu1529)/2
duncan_edu$duncan_edu3049   <- (duncan_edu$duncan_edu3049)/2
duncan_edu$duncan_edu5064   <- (duncan_edu$duncan_edu5064)/2
duncan_edu$duncan_edu6584   <- (duncan_edu$duncan_edu6584)/2
duncan_edu$duncan_edu85plus <- (duncan_edu$duncan_edu85plus)/2

### Calculating Duncan's Index of Dissimilarity (DID) ->for the 2 genders COMBINED

# 1) Subtract share in private households from total population
edu_did_both$duncboth_edu1529    <- abs(edu_did_both$share_edu1529 - edu_did_both$fullshare1529)
edu_did_both$duncboth_edu3049    <- abs(edu_did_both$share_edu3049 - edu_did_both$fullshare3049)
edu_did_both$duncboth_edu5064    <- abs(edu_did_both$share_edu5064 - edu_did_both$fullshare5064) 
edu_did_both$duncboth_edu6584    <- abs(edu_did_both$share_edu6584 - edu_did_both$fullshare6584)   
edu_did_both$duncboth_edu85plus  <- abs(edu_did_both$share_edu85plus - edu_did_both$fullshare85plus)   

# 2) Calculate sum over all categories of the variable
duncboth_edu <- aggregate(cbind(duncboth_edu1529,
                                  duncboth_edu3049,
                                  duncboth_edu5064,
                                  duncboth_edu6584,
                                  duncboth_edu85plus) ~ cntry + housing, data = edu_did_both, FUN = function(x) c(gama = sum(x)))

# 3) Divide by 2 
duncboth_edu$duncboth_edu1529   <- (duncboth_edu$duncboth_edu1529)/2
duncboth_edu$duncboth_edu3049   <- (duncboth_edu$duncboth_edu3049)/2
duncboth_edu$duncboth_edu5064   <- (duncboth_edu$duncboth_edu5064)/2
duncboth_edu$duncboth_edu6584   <- (duncboth_edu$duncboth_edu6584)/2
duncboth_edu$duncboth_edu85plus <- (duncboth_edu$duncboth_edu85plus)/2


##############################
### PREPARING VISUALIZATION 
##############################

duncan_edu_phh <- subset(duncboth_edu, housing == 1)
duncan_edu_phh <- duncan_edu_phh[-c(2)]

duncan_edu_phhboth <- melt(duncan_edu_phh, id.vars = "cntry")

duncan_edu_phhboth$age <- 0
duncan_edu_phhboth$age[duncan_edu_phhboth$variable == "duncboth_edu1529"] <- 22
duncan_edu_phhboth$age[duncan_edu_phhboth$variable == "duncboth_edu3049"] <- 39.5
duncan_edu_phhboth$age[duncan_edu_phhboth$variable == "duncboth_edu5064"] <-57
duncan_edu_phhboth$age[duncan_edu_phhboth$variable == "duncboth_edu6584"] <- 74.5
duncan_edu_phhboth$age[duncan_edu_phhboth$variable == "duncboth_edu85plus"] <- 92.5

colnames(duncan_edu_phhboth)[1] <- "Countries"

### create identifier of types of countries

duncan_edu_phhboth$type <- 2

duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Austria"] <- 1 
duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Belgium"] <- 1 
duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Cyprus"] <- 1 
duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Germany"] <- 1 
duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Spain"] <- 1 
duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Finland"] <- 1 
duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Croatia"] <- 1 
duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Ireland"] <- 1 
duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Iceland"] <- 1 
duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Italy"] <- 1 
duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Norway"] <- 1 
duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Portugal"] <- 1  
duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Slovakia"] <- 1 

duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Bulgaria"] <- 3
duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Greece"] <- 3
duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Latvia"] <- 3
duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Lithuania"] <- 3
duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Poland"] <- 3
duncan_edu_phhboth$type[duncan_edu_phhboth$Countries == "Romania"] <- 3

duncan_edu_phhboth$type <- as.factor(duncan_edu_phhboth$type)


### Preparing edu_final for visualization
edu_final$education <- as.character(edu_final$education)

edu_final$housing_gender <- c("Blank")
edu_final$housing_gender[edu_final$gender == 0 & edu_final$housing == 1] <- "M. p.hh."
edu_final$housing_gender[edu_final$gender == 1 & edu_final$housing == 1] <- "F. p.hh."
edu_final$housing_gender[edu_final$gender == 0 & edu_final$housing == 2] <- "M. inst."
edu_final$housing_gender[edu_final$gender == 1 & edu_final$housing == 2] <- "F. inst."


### Saving data frames for visualization 
setwd(save.path)

save(duncan_edu_phhboth, file = "duncan_edu_phhboth.Rda")
save(edu_final, file = "edu_final.Rda")


### Function to extract legend
get_legend <- function(myggplot){
  tmp <- ggplot_gtable(ggplot_build(myggplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)
}


# Fixing error in ggmosiac
is.formula <- function (x) inherits(x, "formula")

is.discrete <- function(x) {
  is.factor(x) || is.character(x) || is.logical(x)
}

product_names <- function() {
  function(x) {
    #cat(" in product_breaks\n")
    #browser()
    unique(x)
  }
}

product_breaks <- function() {
  function(x) {
    #cat(" in product_breaks\n")
    #browser()
    unique(x)
  }
}

product_labels <- function() {
  function(x) {
    #cat(" in product_labels\n")
    #browser()
    
    unique(x)
  }
}

is.waive <- getFromNamespace("is.waive", "ggplot2")

#' Helper function for determining scales
#'
#' Used internally to determine class of variable x
#' @param x variable
#' @return character string "productlist"
#' @importFrom ggplot2 scale_type
#' @export
scale_type.productlist <- function(x) {
  #  cat("checking for type productlist\n")
  #browser()
  "productlist"
}

#' Determining scales for mosaics
#'
#' @param name set to pseudo waiver function `product_names` by default.
#' @inheritParams ggplot2::continuous_scale
#' @export
scale_x_productlist <- function(name = ggplot2::waiver(), breaks = product_breaks(),
                                minor_breaks = NULL, labels = product_labels(),
                                limits = NULL, expand = ggplot2::waiver(), oob = scales:::censor,
                                na.value = NA_real_, trans = "identity",
                                position = "bottom", sec.axis = ggplot2::waiver()) {
  #browser()
  sc <- ggplot2::continuous_scale(
    c("x", "xmin", "xmax", "xend", "xintercept", "xmin_final", "xmax_final", "xlower", "xmiddle", "xupper"),
    "position_c", identity, name = name, breaks = breaks,
    minor_breaks = minor_breaks, labels = labels, limits = limits,
    expand = expand, oob = oob, na.value = na.value, trans = trans,
    guide = ggplot2::waiver(), position = position, super = ScaleContinuousProduct
  )
  
  
  if (!is.waive(sec.axis)) {
    if (is.formula(sec.axis)) sec.axis <- ggplot2::sec_axis(sec.axis)
    is.sec_axis = getFromNamespace("is.sec_axis", "ggplot2")
    if (is.sec_axis(sec.axis)) stop("Secondary axes must be specified using 'sec_axis()'")
    sc$secondary.axis <- sec.axis
  }
  sc
}

#' @rdname scale_x_productlist
#' @param sec.axis specify a secondary axis
#' @export
scale_y_productlist <- function(name = ggplot2::waiver(), breaks = product_breaks(),
                                minor_breaks = NULL, labels = product_labels(),
                                limits = NULL, expand = ggplot2::waiver(), oob = scales:::censor,
                                na.value = NA_real_, trans = "identity",
                                position = "left", sec.axis = ggplot2::waiver()) {
  #browser()
  sc <- ggplot2::continuous_scale(
    c("y", "ymin", "ymax", "yend", "yintercept", "ymin_final", "ymax_final", "ylower", "ymiddle", "yupper"),
    "position_c", identity, name = name, breaks = breaks,
    minor_breaks = minor_breaks, labels = labels, limits = limits,
    expand = expand, oob = oob, na.value = na.value, trans = trans,
    guide = ggplot2::waiver(), position = position, super = ScaleContinuousProduct
  )
  
  if (!is.waive(sec.axis)) {
    if (is.formula(sec.axis)) sec.axis <- ggplot2::sec_axis(sec.axis)
    is.sec_axis = getFromNamespace("is.sec_axis", "ggplot2")
    if (is.sec_axis(sec.axis)) stop("Secondary axes must be specified using 'sec_axis()'")
    sc$secondary.axis <- sec.axis
  }
  sc
}

#' @rdname scale_x_productlist
#' @export
ScaleContinuousProduct <- ggproto(
  "ScaleContinuousProduct", ScaleContinuousPosition,
  train =function(self, x) {
    #cat("train in ScaleContinuousProduct\n")
    #cat("class of variable: ")
    #cat(class(x))
    #browser()
    if (is.list(x)) {
      x <- x[[1]]
      if ("Scale" %in% class(x)) {
        #browser()
        # re-assign the scale values now that we have the information - but only if necessary
        if (is.function(self$breaks)) self$breaks <- x$breaks
        if (is.function(self$labels)) self$labels <- x$labels
        if (is.waive(self$name)) {
          self$product_name <- gsub("x__alpha__", "", x$name)
          self$product_name <- gsub("x__fill__", "", self$product_name)
          self$product_name <- gsub("x__", "", self$product_name)
          self$product_name <- gsub("conds\\d__", "", self$product_name)
        }
        #cat("\n")
        return()
      }
    }
    if (is.discrete(x)) {
      self$range$train(x=c(0,1))
      #cat("\n")
      return()
    }
    self$range$train(x)
    #cat("\n")
  },
  map = function(self, x, limits = self$get_limits()) {
    #cat("map in ScaleContinuousProduct\n")
    #browser()
    if (is.discrete(x)) return(x)
    if (is.list(x)) return(0) # need a number
    scaled <- as.numeric(self$oob(x, limits))
    ifelse(!is.na(scaled), scaled, self$na.value)
  },
  dimension = function(self, expand = c(0, 0)) {
    #cat("dimension in ScaleContinuousProduct\n")
    c(-0.05,1.05)
  },
  make_title = function(title, self) {
    #browser()
    if(title %in% self$aesthetics){
      title <- self$product_name
    }
    else title
  }
)


#------------------------------------------------#
#      EXAMPLES FOR VISUALIZATION
#------------------------------------------------#

### 1. Duncan plots for both genders
theme_set(theme_light()) 
theme_replace(text = element_text(size=10), legend.position = "bottom")

### BOTH GENDERS              
fig_duncan_edu <- ggplot(duncan_edu_phhboth, aes(x = age, y = value, group = type)) + 
  geom_point(aes(shape=type), size=3) +
  scale_y_continuous(name="", limits=c(0, 0.075), labels = scales::percent_format(accuracy = 1L)) + 
  scale_x_continuous(breaks=c(7.5, 22, 39.5, 57, 74.5, 92.5), labels=c("<15", "15-29", "30-49", "50-64", "65-84", "85+"), name = NULL) +
  scale_shape_discrete(name = "Groups of countries by relative size of inst. pop.", labels = c("One-peak country", "Two-peak country", "Residual country")) +
  #geom_text(aes(label=ifelse(value>0.035,as.character(Countries),'')),hjust=1.2,vjust=0)
  ggtitle("Duncan's index of dissimilarity for level of education") + theme(plot.title = element_text(hjust = 0.5)) 

fig_duncan_edu

### 2. Mosaic plot for single country or specific age groups

Country_X <- "Europe"
edu_Country_X <- subset(edu_final, cntry==Country_X & (housing == 1 | housing == 2))  

### Extracting a legend for educaton
edu_1529_b <- ggplot(edu_Country_X, aes(y=housing_gender, x=sharewithin1529, fill=education)) +
  geom_col() +
  scale_fill_manual(values = alpha(c("darkslategray4", "firebrick2", "burlywood1", "gray49"), 0.85), name = "Education", labels = c("Low", "Medium", "High", "Not stated")) +
  scale_y_discrete(name="") +
  scale_x_continuous(name="", limits=c(0, 1.01), labels = scales::percent) +
  ggtitle("15 to 29 years") + theme(plot.title = element_text(hjust = 0.5)) + 
  theme_light()

## generating and saving a legend for one plot
edu_1529_b <- edu_1529_b + theme(legend.position="bottom", legend.text = element_text(colour="black", size=8))
legend_edu <- get_legend(edu_1529_b)

# Selecting a single country & the two main types of housing
edu_PT <- subset(edu_final, cntry=="Portugal" & (housing == 1 | housing == 2))         ## PT, one peak
edu_IS <- subset(edu_final, cntry=="Iceland" & (housing == 1 | housing == 2))         ## IS, one peak
edu_SE <- subset(edu_final, cntry=="Sweden" & (housing == 1 | housing == 2))          ## SE, two peaks
edu_UK <- subset(edu_final, cntry=="United Kingdom" & (housing == 1 | housing == 2))  ## UK, one peaks 

## adding labels to gender -> not required atm
# eco_cntry$gender[eco_cntry$gender == 0] <- "Male"
# eco_cntry$gender[eco_cntry$gender == 1] <- "Female"

# Mosaic plots
label_axis <- list(housing_gender = c("M. ph", "F. ph", "M. inst", "F. inst"))

edu1529_PT <- ggplot(data = edu_PT) +
  geom_mosaic(aes(weight = age1529, x = product(housing_gender),  fill=education), na.rm=TRUE, show.legend = FALSE)  +
  scale_y_continuous(name="", limits=c(0, 1), labels = scales::percent) +
  scale_fill_manual(values = alpha(c("darkslategray4", "firebrick2", "burlywood1", "gray49"), 0.85), name = "Education", labels = c("Low education", "Medium education", "High education", "Not stated")) +
  #facet_grid(gender~.) +
  labs(x ="") +
  ggtitle("Portugal") + theme(plot.title = element_text(hjust = 0.5)) + 
  theme_light() 

edu1529_IS <- ggplot(data = edu_IS) +
  geom_mosaic(aes(weight = age1529, x = product(housing_gender),  fill=education), na.rm=TRUE, show.legend = FALSE)  +
  scale_y_continuous(name="", limits=c(0, 1), labels = scales::percent) +
  scale_fill_manual(values = alpha(c("darkslategray4", "firebrick2", "burlywood1", "gray49"), 0.85), name = "Education", labels = c("Low education", "Medium education", "High education", "Not stated")) +
  #facet_grid(gender~.) +
  labs(x ="") +
  ggtitle("Iceland") + theme(plot.title = element_text(hjust = 0.5)) + 
  theme_light() 

edu1529_SE <- ggplot(data = edu_SE) +
  geom_mosaic(aes(weight = age1529, x = product(housing_gender),  fill=education), na.rm=TRUE, show.legend = FALSE)  +
  scale_y_continuous(name="", limits=c(0, 1), labels = scales::percent) +
  scale_fill_manual(values = alpha(c("darkslategray4", "firebrick2", "burlywood1", "gray49"), 0.85), name = "Education", labels = c("Low education", "Medium education", "High education", "Not stated")) +
  #facet_grid(gender~.) +
  labs(x ="") +
  ggtitle("Sweden") + theme(plot.title = element_text(hjust = 0.5)) + 
  theme_light() 


edu1529_UK <- ggplot(data = edu_UK) +
  geom_mosaic(aes(weight = age1529, x = product(housing_gender),  fill=education), na.rm=TRUE, show.legend = FALSE)  +
  scale_y_continuous(name="", limits=c(0, 1), labels = scales::percent) +
  scale_fill_manual(values = alpha(c("darkslategray4", "firebrick2", "burlywood1", "gray49"), 0.85), name = "Education", labels = c("Low education", "Medium education", "High education", "Not stated")) +
  #facet_grid(gender~.) +
  labs(x ="") +
  ggtitle("United Kingdom") + theme(plot.title = element_text(hjust = 0.5)) + 
  theme_light() 

## Not used: "divider=mosaic("v")"

fig_mosaic_edu <- grid.arrange(edu1529_PT, edu1529_IS, edu1529_SE, edu1529_UK, 
                               legend_edu, nrow = 3, ncol = 2,
                               layout_matrix = rbind(c(1,2), c(3,4), c(5,5)),
                               widths = c(5, 5), heights = c(3, 3, 0.5))

## Plotting the figure
fig_mosaic_edu

