################################################################################
# Code to process European census data and calculate Duncan Dissimilarity Index
#
# Variable: Labor force status
#
#
# Author: Jan-Lucas Schanze
# Date:   07.10.2022
# Paper: (SRM submission #7830)
################################################################################

### Preparing the data preparation and analysis

# 1. Store raw data (csv files) in a folder     -> directory will be defined as input below
# 2. Create folder for data outputs and graphs  -> directory will be defined as output below


### OVERVIEW OF VARIABLES

# gender:	        0 (male)
#                 1 (female)

# housing:	      1 (conventional dwelling)
#                 2 (collective living quarter)
#                 3 (other living quarter & homeless)
#                 4 (No information)

# labor force:    1 (employed)
#                 2 (unemployed)
#                 3 (currently not economically active)
#                 4 (Not stated)

# citizenship:	  will be aggregated and prepared in separate file



######## Installing packages (required once)

#install.packages("foreign")
#install.packages("haven")
#install.packages("Rcmdr")
#install.packages("readr")
#install.packages("lattice")
#install.packages("ggplot2")
#install.packages("tidyverse")
#install.packages("beanplot")
#install.packages("DAAG")
#install.packages("visreg")
#install.packages("HSAUR")
#install.packages("lme4")
#install.packages("readxl")
#install.packages("dplyr")
#install.packages("reshape2")
#install.packages(sjPlot)
#install.packages(mlmRev)
#install.packages("ggmosaic")
#install.packages("gridExtra")

###### Loading packages
library(foreign)
library(haven)
library(Rcmdr)
library(readr)
library(lattice)
library(ggplot2)
library(tidyverse)
library(beanplot)
library(DAAG)
library(visreg)
library(HSAUR)
library(lme4)
library(readxl)
library(dplyr)
library(reshape2)
library(ggmosaic)
library(gridExtra)

# Setting paths for analysis
getwd()

main.path <- "C:\\Users\\schanzjs\\Desktop\\source files\\data"  # change accordingly
save.path <- "C:\\Users\\schanzjs\\Desktop\\source files\\output" # change accordingly 

setwd(main.path)

# Loading dataset
hyper1 <- read_xlsx("censushub_HC39_age_gender_citizen_activity.xlsx", sheet = "Data") # Loading dataset with labels
hyper1[,1:11] # Have a look at columns 1 through 11

## Dropping countries not needed 
hyper1 <- subset(hyper1, hyper1$cntry!="Switzerland" & hyper1$cntry!="Poland") ## Poland only reporting missing values in institutions 

# Recoding missing values for all variables
for(i in 1:ncol(hyper1)){
  hyper1[hyper1[,i]=="NA",i] <-  NA
}

# Define as numeric
hyper1$gender <- as.numeric(as.character(hyper1$gender))
hyper1$housing <- as.numeric(as.character(hyper1$housing))
hyper1$activity <- as.numeric(as.character(hyper1$activity))
hyper1$citizenship <- as.numeric(as.character(hyper1$citizenship))

hyper1$ageupto15 <- as.numeric(as.character(hyper1$ageupto15))
hyper1$age1529 <- as.numeric(as.character(hyper1$age1529))
hyper1$age3049 <- as.numeric(as.character(hyper1$age3049))
hyper1$age5064 <- as.numeric(as.character(hyper1$age5064))
hyper1$age6584 <- as.numeric(as.character(hyper1$age6584))
hyper1$age85plus <- as.numeric(as.character(hyper1$age85plus))


## AGGREGATING NUMBERS FOR TOTAL

# Aggregate total
hyper1_eu <- hyper1
                    
## checking whether list of countries is correct
prop.table(table(hyper1_eu$cntry, hyper1_eu$gender), 2)

hyper1_eu_agg <- aggregate(cbind(hyper1_eu$ageupto15,
                                hyper1_eu$age1529,
                                hyper1_eu$age3049,
                                hyper1_eu$age5064,
                                hyper1_eu$age6584,
                                hyper1_eu$age85plus) ~ hyper1_eu$gender + hyper1_eu$housing + hyper1_eu$activity + hyper1_eu$citizenship, data = hyper1_eu, FUN = function(x) c(gama = sum(x)))

hyper1_eu_agg$cntry <- c("Europe")

colnames(hyper1_eu_agg)[1] <- "gender"
colnames(hyper1_eu_agg)[2] <- "housing"
colnames(hyper1_eu_agg)[3] <- "activity"
colnames(hyper1_eu_agg)[4] <- "citizenship"
colnames(hyper1_eu_agg)[5] <- "ageupto15"
colnames(hyper1_eu_agg)[6] <- "age1529"
colnames(hyper1_eu_agg)[7] <- "age3049"
colnames(hyper1_eu_agg)[8] <- "age5064"
colnames(hyper1_eu_agg)[9] <- "age6584"
colnames(hyper1_eu_agg)[10] <- "age85plus"

## Appending aggregate data
hyper1 <- rbind(hyper1, hyper1_eu_agg)

# cleaning up
rm("hyper1_eu", "hyper1_eu_agg")

### Aggregating citizenship for labor force status
hyper_eco <- aggregate(cbind(hyper1$ageupto15,
                                hyper1$age1529,
                                hyper1$age3049,
                                hyper1$age5064,
                                hyper1$age6584,
                                hyper1$age85plus) ~ hyper1$cntry + hyper1$gender + hyper1$housing + hyper1$activity, data = hyper1, FUN = function(x) c(gama = sum(x)))

## renaming variables
colnames(hyper_eco)[1] <- "cntry"
colnames(hyper_eco)[2] <- "gender"
colnames(hyper_eco)[3] <- "housing"
colnames(hyper_eco)[4] <- "activity"
colnames(hyper_eco)[5] <- "ageupto15"
colnames(hyper_eco)[6] <- "age1529"
colnames(hyper_eco)[7] <- "age3049"
colnames(hyper_eco)[8] <- "age5064"
colnames(hyper_eco)[9] <- "age6584"
colnames(hyper_eco)[10] <- "age85plus"

### dropping housing == 3 and housing == 4
hyper_eco <- subset(hyper_eco, hyper_eco$housing == 1 | hyper_eco$housing == 2) # dropping housing types not required

### Totals for labor force status (housing collapsed)
totals_eco <- aggregate(cbind(hyper_eco$ageupto15,
                              hyper_eco$age1529,
                              hyper_eco$age3049,
                              hyper_eco$age5064,
                              hyper_eco$age6584,
                              hyper_eco$age85plus) ~ hyper_eco$cntry + hyper_eco$gender + hyper_eco$activity, data = hyper_eco, FUN = function(x) c(gama = sum(x)))

# renaming variables
colnames(totals_eco)[1] <- "cntry"
colnames(totals_eco)[2] <- "gender"
colnames(totals_eco)[3] <- "activity"
colnames(totals_eco)[4] <- "totecoupto15"
colnames(totals_eco)[5] <- "toteco1529"
colnames(totals_eco)[6] <- "toteco3049"
colnames(totals_eco)[7] <- "toteco5064"
colnames(totals_eco)[8] <- "toteco6584"
colnames(totals_eco)[9] <- "toteco85plus"

### TOTALS by gender 
totals <- aggregate(cbind(hyper_eco$ageupto15,
                          hyper_eco$age1529,
                          hyper_eco$age3049,
                          hyper_eco$age5064,
                          hyper_eco$age6584,
                          hyper_eco$age85plus) ~ hyper_eco$cntry + hyper_eco$gender, data = hyper_eco, FUN = function(x) c(gama = sum(x)))

## renaming variables
colnames(totals)[1] <- "cntry"
colnames(totals)[2] <- "gender"
colnames(totals)[3] <- "totalupto15"
colnames(totals)[4] <- "total1529"
colnames(totals)[5] <- "total3049"
colnames(totals)[6] <- "total5064"
colnames(totals)[7] <- "total6584"
colnames(totals)[8] <- "total85plus"

## Total for entire population (by gender)
totals$total_allage <- totals$totalupto15 + totals$total1529 + totals$total3049 + totals$total5064 + totals$total6584 + totals$total85plus

### TOTALS by gender and housing (same for labor force status and citizen)
totals_within <- aggregate(cbind(hyper_eco$ageupto15,
                                 hyper_eco$age1529,
                                 hyper_eco$age3049,
                                 hyper_eco$age5064,
                                 hyper_eco$age6584,
                                 hyper_eco$age85plus) ~ hyper_eco$cntry + hyper_eco$gender + hyper_eco$housing, data = hyper_eco, FUN = function(x) c(gama = sum(x)))

## renaming variables
colnames(totals_within)[1] <- "cntry"
colnames(totals_within)[2] <- "gender"
colnames(totals_within)[3] <- "housing"
colnames(totals_within)[4] <- "totwithinupto15"
colnames(totals_within)[5] <- "totwithin1529"
colnames(totals_within)[6] <- "totwithin3049"
colnames(totals_within)[7] <- "totwithin5064"
colnames(totals_within)[8] <- "totwithin6584"
colnames(totals_within)[9] <- "totwithin85plus"

## Total for entire population (by gender and housing)
totals_within$totwithin_allage <- totals_within$totwithinupto15 + totals_within$totwithin1529 + totals_within$totwithin3049 + totals_within$totwithin5064 + totals_within$totwithin6584 + totals_within$totwithin85plus

## CALCULATING SHARES

# generating a data frame with information from the three data frames
eco_fin1 <- merge(hyper_eco, totals, by=c("cntry","gender"))                    # Step 1
eco_fin2 <- merge(eco_fin1, totals_eco, by=c("cntry","gender", "activity"))      # Step 2
eco_final <- merge(eco_fin2, totals_within, by=c("cntry","gender", "housing"))  # Step 3

# calculation: Share within age & gender cohorts, across all types of housing 
eco_final$shareupto15 <- round((eco_final$ageupto15/eco_final$totalupto15), digits = 4)
eco_final$share1529 <- round((eco_final$age1529/eco_final$total1529), digits = 4)
eco_final$share3049 <- round((eco_final$age3049/eco_final$total3049), digits = 4)
eco_final$share5064 <- round((eco_final$age5064/eco_final$total5064), digits = 4)
eco_final$share6584 <- round((eco_final$age6584/eco_final$total6584), digits = 4)
eco_final$share85plus <- round((eco_final$age85plus/eco_final$total85plus), digits = 4)

# calculation: Share within age & gender cohorts, types of housing separated 
eco_final$sharewithinupto15 <- round((eco_final$ageupto15/eco_final$totwithinupto15), digits = 4)
eco_final$sharewithin1529 <- round((eco_final$age1529/eco_final$totwithin1529), digits = 4)
eco_final$sharewithin3049 <- round((eco_final$age3049/eco_final$totwithin3049), digits = 4)
eco_final$sharewithin5064 <- round((eco_final$age5064/eco_final$totwithin5064), digits = 4)
eco_final$sharewithin6584 <- round((eco_final$age6584/eco_final$totwithin6584), digits = 4)
eco_final$sharewithin85plus <- round((eco_final$age85plus/eco_final$totwithin85plus), digits = 4)

# calculation: Total share within age & gender cohorts, types of housing collapsed (for DID) 
eco_final$shareecoupto15 <- round((eco_final$totecoupto15/eco_final$totalupto15), digits = 4)
eco_final$shareeco1529 <- round((eco_final$toteco1529/eco_final$total1529), digits = 4)
eco_final$shareeco3049 <- round((eco_final$toteco3049/eco_final$total3049), digits = 4)
eco_final$shareeco5064 <- round((eco_final$toteco5064/eco_final$total5064), digits = 4)
eco_final$shareeco6584 <- round((eco_final$toteco6584/eco_final$total6584), digits = 4)
eco_final$shareeco85plus <- round((eco_final$toteco85plus/eco_final$total85plus), digits = 4)

# Collapsing GENDERS for joint DID
### 1a: collapsing genders while leaving housing and activity separate 
eco_did_1a <- aggregate(cbind(ageupto15,
                                age1529,
                                age3049,
                                age5064,
                                age6584,
                                age85plus) ~ cntry + activity + housing, data = eco_final, FUN = function(x) c(gama = sum(x)))

### 1b: collapsing genders and activity while leaving housing separate
eco_did_1b <- aggregate(cbind(ageupto15,
                                age1529,
                                age3049,
                                age5064,
                                age6584,
                                age85plus) ~ cntry + housing, data = eco_final, FUN = function(x) c(gama = sum(x)))

colnames(eco_did_1b)[3] <- "totwithinupto15_both"
colnames(eco_did_1b)[4] <- "totwithin1529_both"
colnames(eco_did_1b)[5] <- "totwithin3049_both"
colnames(eco_did_1b)[6] <- "totwithin5064_both"
colnames(eco_did_1b)[7] <- "totwithin6584_both"
colnames(eco_did_1b)[8] <- "totwithin85plus_both"

### 2a: Totals for activity, housing collapsed 
eco_tot_both <- aggregate(cbind(ageupto15,
                                  age1529,
                                  age3049,
                                  age5064,
                                  age6584,
                                  age85plus) ~ cntry + activity, data = eco_final, FUN = function(x) c(gama = sum(x)))

colnames(eco_tot_both)[3] <- "totecoupto15_both"
colnames(eco_tot_both)[4] <- "toteco1529_both"
colnames(eco_tot_both)[5] <- "toteco3049_both"
colnames(eco_tot_both)[6] <- "toteco5064_both"
colnames(eco_tot_both)[7] <- "toteco6584_both"
colnames(eco_tot_both)[8] <- "toteco85plus_both"

### 2b: Calculating the total value of residents in respective age cohorts
totals_both <- aggregate(cbind(totalupto15,
                               total1529,
                               total3049,
                               total5064,
                               total6584,
                               total85plus) ~ cntry, data = totals, FUN = function(x) c(gama = sum(x)))

# generating a data frame with information from the three data frames
eco_temp1 <- merge(eco_did_1a, eco_did_1b, by=c("cntry","housing"))   # Step 1
eco_temp2 <- merge(eco_temp1, eco_tot_both, by=c("cntry","activity"))    # Step 2
eco_did_both <- merge(eco_temp2, totals_both, by=c("cntry"))            # Step 3


############################
## CALCULATING SHARES
############################

# calculation: Share within age & gender cohorts, across all types of housing 
eco_did_both$share_ecoupto15 <- round((eco_did_both$ageupto15/eco_did_both$totwithinupto15_both), digits = 4)
eco_did_both$share_eco1529 <- round((eco_did_both$age1529/eco_did_both$totwithin1529_both), digits = 4)
eco_did_both$share_eco3049 <- round((eco_did_both$age3049/eco_did_both$totwithin3049_both), digits = 4)
eco_did_both$share_eco5064 <- round((eco_did_both$age5064/eco_did_both$totwithin5064_both), digits = 4)
eco_did_both$share_eco6584 <- round((eco_did_both$age6584/eco_did_both$totwithin6584_both), digits = 4)
eco_did_both$share_eco85plus <- round((eco_did_both$age85plus/eco_did_both$totwithin85plus_both), digits = 4)

# calculation: Total share of activity within age cohorts, gender & types of housing collapsed (for DID) 
eco_did_both$fullshareupto15 <- round((eco_did_both$totecoupto15_both/eco_did_both$totalupto15), digits = 4)
eco_did_both$fullshare1529 <- round((eco_did_both$toteco1529_both/eco_did_both$total1529), digits = 4)
eco_did_both$fullshare3049 <- round((eco_did_both$toteco3049_both/eco_did_both$total3049), digits = 4)
eco_did_both$fullshare5064 <- round((eco_did_both$toteco5064_both/eco_did_both$total5064), digits = 4)
eco_did_both$fullshare6584 <- round((eco_did_both$toteco6584_both/eco_did_both$total6584), digits = 4)
eco_did_both$fullshare85plus <- round((eco_did_both$toteco85plus_both/eco_did_both$total85plus), digits = 4)

## cleaning up
rm("eco_fin1", "eco_fin2", "eco_did_1a", "eco_did_1b", "eco_temp1", "eco_temp2")


### Calculating Duncan's Index of Dissimilarity (DID) -> SEPARATELY for the 2 genders

# 1) Subtract share in private households from total population
eco_final$duncan_ecoupto15  <- abs(eco_final$shareecoupto15-eco_final$sharewithinupto15)
eco_final$duncan_eco1529    <- abs(eco_final$shareeco1529-eco_final$sharewithin1529)
eco_final$duncan_eco3049    <- abs(eco_final$shareeco3049-eco_final$sharewithin3049)
eco_final$duncan_eco5064    <- abs(eco_final$shareeco5064-eco_final$sharewithin5064) 
eco_final$duncan_eco6584    <- abs(eco_final$shareeco6584-eco_final$sharewithin6584)   
eco_final$duncan_eco85plus  <- abs(eco_final$shareeco85plus-eco_final$sharewithin85plus)   

# 2) Calculate sum over all categories of the variable
duncan_eco <- aggregate(cbind(duncan_ecoupto15,
                                duncan_eco1529,
                                duncan_eco3049,
                                duncan_eco5064,
                                duncan_eco6584,
                                duncan_eco85plus) ~ cntry + gender + housing, data = eco_final, FUN = function(x) c(gama = sum(x)))

# 3) Divide by 2 
duncan_eco$duncan_ecoupto15 <- (duncan_eco$duncan_ecoupto15)/2
duncan_eco$duncan_eco1529   <- (duncan_eco$duncan_eco1529)/2
duncan_eco$duncan_eco3049   <- (duncan_eco$duncan_eco3049)/2
duncan_eco$duncan_eco5064   <- (duncan_eco$duncan_eco5064)/2
duncan_eco$duncan_eco6584   <- (duncan_eco$duncan_eco6584)/2
duncan_eco$duncan_eco85plus <- (duncan_eco$duncan_eco85plus)/2

### Calculating Duncan's Index of Dissimilarity (DID) -> for the 2 genders COMBINED

# 1) Subtract share in private households from total population
eco_did_both$duncboth_ecoupto15  <- abs(eco_did_both$share_ecoupto15 - eco_did_both$fullshareupto15)
eco_did_both$duncboth_eco1529    <- abs(eco_did_both$share_eco1529 - eco_did_both$fullshare1529)
eco_did_both$duncboth_eco3049    <- abs(eco_did_both$share_eco3049 - eco_did_both$fullshare3049)
eco_did_both$duncboth_eco5064    <- abs(eco_did_both$share_eco5064 - eco_did_both$fullshare5064) 
eco_did_both$duncboth_eco6584    <- abs(eco_did_both$share_eco6584 - eco_did_both$fullshare6584)   
eco_did_both$duncboth_eco85plus  <- abs(eco_did_both$share_eco85plus - eco_did_both$fullshare85plus)   

# 2) Calculate sum over all categories of the variable
duncboth_eco <- aggregate(cbind(duncboth_ecoupto15,
                                  duncboth_eco1529,
                                  duncboth_eco3049,
                                  duncboth_eco5064,
                                  duncboth_eco6584,
                                  duncboth_eco85plus) ~ cntry + housing, data = eco_did_both, FUN = function(x) c(gama = sum(x)))

# 3) Divide by 2 
duncboth_eco$duncboth_ecoupto15 <- (duncboth_eco$duncboth_ecoupto15)/2
duncboth_eco$duncboth_eco1529   <- (duncboth_eco$duncboth_eco1529)/2
duncboth_eco$duncboth_eco3049   <- (duncboth_eco$duncboth_eco3049)/2
duncboth_eco$duncboth_eco5064   <- (duncboth_eco$duncboth_eco5064)/2
duncboth_eco$duncboth_eco6584   <- (duncboth_eco$duncboth_eco6584)/2
duncboth_eco$duncboth_eco85plus <- (duncboth_eco$duncboth_eco85plus)/2


##############################
### PREPARING VISUALIZATION 
##############################

### VISUAL 1a: DID across countries 
duncan_eco_phh <- subset(duncboth_eco, housing == 1)
duncan_eco_phh <- duncan_eco_phh[-c(2)]

duncan_eco_phhboth <- melt(duncan_eco_phh, id.vars = "cntry")

duncan_eco_phhboth$age <- 0
duncan_eco_phhboth$age[duncan_eco_phhboth$variable == "duncboth_ecoupto15"] <- 7.5
duncan_eco_phhboth$age[duncan_eco_phhboth$variable == "duncboth_eco1529"] <- 22
duncan_eco_phhboth$age[duncan_eco_phhboth$variable == "duncboth_eco3049"] <- 39.5
duncan_eco_phhboth$age[duncan_eco_phhboth$variable == "duncboth_eco5064"] <-57
duncan_eco_phhboth$age[duncan_eco_phhboth$variable == "duncboth_eco6584"] <- 74.5
duncan_eco_phhboth$age[duncan_eco_phhboth$variable == "duncboth_eco85plus"] <- 92.5

colnames(duncan_eco_phhboth)[1] <- "Countries"


### create identifier of types of countries

duncan_eco_phhboth$type <- 2

duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Austria"] <- 1 
duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Belgium"] <- 1 
duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Cyprus"] <- 1 
duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Germany"] <- 1 
duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Spain"] <- 1 
duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Finland"] <- 1 
duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Croatia"] <- 1 
duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Ireland"] <- 1 
duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Iceland"] <- 1 
duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Italy"] <- 1 
duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Norway"] <- 1 
duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Portugal"] <- 1  
duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Slovakia"] <- 1 

duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Bulgaria"] <- 3
duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Greece"] <- 3
duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Latvia"] <- 3
duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Lithuania"] <- 3
duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Poland"] <- 3
duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Romania"] <- 3

#duncan_eco_phhboth$type[duncan_eco_phhboth$Countries == "Europe"] <- "Europe"

duncan_eco_phhboth$type <- as.factor(duncan_eco_phhboth$type)


# Preparing eco_final the visualization
eco_final$activity <- as.character(eco_final$activity)

eco_final$housing_gender <- c("Blank")
eco_final$housing_gender[eco_final$gender == 0 & eco_final$housing == 1] <- "M. p.hh."
eco_final$housing_gender[eco_final$gender == 1 & eco_final$housing == 1] <- "F. p.hh."
eco_final$housing_gender[eco_final$gender == 0 & eco_final$housing == 2] <- "M. inst."
eco_final$housing_gender[eco_final$gender == 1 & eco_final$housing == 2] <- "F. inst."

## Saving data frames
setwd(save.path)

save(eco_final, file = "eco_final.Rda")
save(duncan_eco_phhboth, file = "duncan_eco_phhboth.Rda")



### Function to extract legend
get_legend <- function(myggplot){
  tmp <- ggplot_gtable(ggplot_build(myggplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)
}


# Fixing error in ggmosiac
is.formula <- function (x) inherits(x, "formula")

is.discrete <- function(x) {
  is.factor(x) || is.character(x) || is.logical(x)
}

product_names <- function() {
  function(x) {
    #cat(" in product_breaks\n")
    #browser()
    unique(x)
  }
}

product_breaks <- function() {
  function(x) {
    #cat(" in product_breaks\n")
    #browser()
    unique(x)
  }
}

product_labels <- function() {
  function(x) {
    #cat(" in product_labels\n")
    #browser()
    
    unique(x)
  }
}

is.waive <- getFromNamespace("is.waive", "ggplot2")

#' Helper function for determining scales
#'
#' Used internally to determine class of variable x
#' @param x variable
#' @return character string "productlist"
#' @importFrom ggplot2 scale_type
#' @export
scale_type.productlist <- function(x) {
  #  cat("checking for type productlist\n")
  #browser()
  "productlist"
}

#' Determining scales for mosaics
#'
#' @param name set to pseudo waiver function `product_names` by default.
#' @inheritParams ggplot2::continuous_scale
#' @export
scale_x_productlist <- function(name = ggplot2::waiver(), breaks = product_breaks(),
                                minor_breaks = NULL, labels = product_labels(),
                                limits = NULL, expand = ggplot2::waiver(), oob = scales:::censor,
                                na.value = NA_real_, trans = "identity",
                                position = "bottom", sec.axis = ggplot2::waiver()) {
  #browser()
  sc <- ggplot2::continuous_scale(
    c("x", "xmin", "xmax", "xend", "xintercept", "xmin_final", "xmax_final", "xlower", "xmiddle", "xupper"),
    "position_c", identity, name = name, breaks = breaks,
    minor_breaks = minor_breaks, labels = labels, limits = limits,
    expand = expand, oob = oob, na.value = na.value, trans = trans,
    guide = ggplot2::waiver(), position = position, super = ScaleContinuousProduct
  )
  
  
  if (!is.waive(sec.axis)) {
    if (is.formula(sec.axis)) sec.axis <- ggplot2::sec_axis(sec.axis)
    is.sec_axis = getFromNamespace("is.sec_axis", "ggplot2")
    if (is.sec_axis(sec.axis)) stop("Secondary axes must be specified using 'sec_axis()'")
    sc$secondary.axis <- sec.axis
  }
  sc
}

#' @rdname scale_x_productlist
#' @param sec.axis specify a secondary axis
#' @export
scale_y_productlist <- function(name = ggplot2::waiver(), breaks = product_breaks(),
                                minor_breaks = NULL, labels = product_labels(),
                                limits = NULL, expand = ggplot2::waiver(), oob = scales:::censor,
                                na.value = NA_real_, trans = "identity",
                                position = "left", sec.axis = ggplot2::waiver()) {
  #browser()
  sc <- ggplot2::continuous_scale(
    c("y", "ymin", "ymax", "yend", "yintercept", "ymin_final", "ymax_final", "ylower", "ymiddle", "yupper"),
    "position_c", identity, name = name, breaks = breaks,
    minor_breaks = minor_breaks, labels = labels, limits = limits,
    expand = expand, oob = oob, na.value = na.value, trans = trans,
    guide = ggplot2::waiver(), position = position, super = ScaleContinuousProduct
  )
  
  if (!is.waive(sec.axis)) {
    if (is.formula(sec.axis)) sec.axis <- ggplot2::sec_axis(sec.axis)
    is.sec_axis = getFromNamespace("is.sec_axis", "ggplot2")
    if (is.sec_axis(sec.axis)) stop("Secondary axes must be specified using 'sec_axis()'")
    sc$secondary.axis <- sec.axis
  }
  sc
}

#' @rdname scale_x_productlist
#' @export
ScaleContinuousProduct <- ggproto(
  "ScaleContinuousProduct", ScaleContinuousPosition,
  train =function(self, x) {
    #cat("train in ScaleContinuousProduct\n")
    #cat("class of variable: ")
    #cat(class(x))
    #browser()
    if (is.list(x)) {
      x <- x[[1]]
      if ("Scale" %in% class(x)) {
        #browser()
        # re-assign the scale values now that we have the information - but only if necessary
        if (is.function(self$breaks)) self$breaks <- x$breaks
        if (is.function(self$labels)) self$labels <- x$labels
        if (is.waive(self$name)) {
          self$product_name <- gsub("x__alpha__", "", x$name)
          self$product_name <- gsub("x__fill__", "", self$product_name)
          self$product_name <- gsub("x__", "", self$product_name)
          self$product_name <- gsub("conds\\d__", "", self$product_name)
        }
        #cat("\n")
        return()
      }
    }
    if (is.discrete(x)) {
      self$range$train(x=c(0,1))
      #cat("\n")
      return()
    }
    self$range$train(x)
    #cat("\n")
  },
  map = function(self, x, limits = self$get_limits()) {
    #cat("map in ScaleContinuousProduct\n")
    #browser()
    if (is.discrete(x)) return(x)
    if (is.list(x)) return(0) # need a number
    scaled <- as.numeric(self$oob(x, limits))
    ifelse(!is.na(scaled), scaled, self$na.value)
  },
  dimension = function(self, expand = c(0, 0)) {
    #cat("dimension in ScaleContinuousProduct\n")
    c(-0.05,1.05)
  },
  make_title = function(title, self) {
    #browser()
    if(title %in% self$aesthetics){
      title <- self$product_name
    }
    else title
  }
)


#------------------------------------------------#
#      EXAMPLES FOR VISUALIZATION
#------------------------------------------------#

### 1. Duncan plots for both genders
theme_set(theme_light()) 
theme_replace(text = element_text(size=10), legend.position = "bottom")

### BOTH GENDERS              
fig_duncan_eco <- ggplot(duncan_eco_phhboth, aes(x = age, y = value, group = type)) + 
  geom_point(aes(shape=type), size=3) +
  scale_y_continuous(name="", limits=c(0, 0.05), labels = scales::percent_format(accuracy = 1L)) + 
  scale_x_continuous(breaks=c(7.5, 22, 39.5, 57, 74.5, 92.5), labels=c("<15", "15-29", "30-49", "50-64", "65-84", "85+"), name = NULL) +
  scale_shape_discrete(name = "Groups of countries by relative size of inst. pop.", labels = c("One-peak country", "Two-peak country", "Residual country")) +
  #geom_text(aes(label=ifelse(value>0.02,as.character(Countries),'')),hjust=1.2,vjust=0)
  ggtitle("Duncan's index of dissimilarity for labor force status") + theme(plot.title = element_text(hjust = 0.5)) 

fig_duncan_eco


### 2. Mosaic plot for single country or specific age groups
# Selecting a single country & the two main types of housing
eco_EU <- subset(eco_final, cntry=="Europe" & (housing == 1 | housing == 2))  

## extracting a legend first
eco_1529_b <- ggplot(eco_EU, aes(y=housing_gender, x=sharewithin1529, fill=activity)) +
  geom_col() +
  scale_fill_manual(values = alpha(c("gray20", "gray50", "gray80", "black"), 0.85), name = "Labor force status", labels = c("Employed", "Unemployed", "Economically inactive", "Not stated")) +
  scale_y_discrete(name="") +
  scale_x_continuous(name="", limits=c(0, 1.01), labels = scales::percent) +
  ggtitle("15 to 29 years") + theme(plot.title = element_text(hjust = 0.5)) + 
  theme_light()

## generating and saving a legend for one plot
eco_1529_b <- eco_1529_b + theme(legend.position="bottom", legend.text = element_text(colour="gray20", size=8))
legend_eco <- get_legend(eco_1529_b)

## suppressing the legend for this plot
#eco_1529 <- eco_1529 + theme(legend.position = "none")

# Mosaic plots
label_axis <- list(housing_gender = c("M. ph", "F. ph", "M. inst", "F. inst"))

eco1529_EU <- ggplot(data = eco_EU) +
  geom_mosaic(aes(weight = age1529, x = product(housing_gender),  fill=activity), na.rm=TRUE, show.legend = FALSE)  +
  scale_y_continuous(name="", limits=c(0, 1), labels = scales::percent) +
  scale_fill_manual(values = alpha(c("gray20", "gray50", "gray80", "black"), 0.85), name = "Labor force status", labels = c("Employed", "Unemployed", "Economically inactive", "Not stated")) +
  #facet_grid(gender~.) +
  labs(x ="") +
  ggtitle("15 to 29 years") + theme(plot.title = element_text(hjust = 0.5)) + 
  theme_light() 

eco3049_EU <- ggplot(data = eco_EU) +
  geom_mosaic(aes(weight = age3049, x = product(housing_gender),  fill=activity), na.rm=TRUE, show.legend = FALSE)  +
  scale_y_continuous(name="", limits=c(0, 1), labels = scales::percent) +
  scale_fill_manual(values = alpha(c("gray20", "gray50", "gray80", "black"), 0.85), name = "Labor force status", labels = c("Employed", "Unemployed", "Economically inactive", "Not stated")) +
  #facet_grid(gender~.) +
  labs(x ="") +
  ggtitle("30 to 49 years") + theme(plot.title = element_text(hjust = 0.5)) + 
  theme_light() 

eco5064_EU <- ggplot(data = eco_EU) +
  geom_mosaic(aes(weight = age5064, x = product(housing_gender), fill=activity), na.rm=TRUE, show.legend = FALSE)  +
  scale_y_continuous(name="", limits=c(0, 1), labels = scales::percent) +
  scale_fill_manual(values = alpha(c("gray20", "gray50", "gray80", "black"), 0.85), name = "Labor force status", labels = c("Employed", "Unemployed", "Economically inactive", "Not stated")) +
  #facet_grid(gender~.) +
  labs(x ="") +
  ggtitle("50 to 64 years") + theme(plot.title = element_text(hjust = 0.5)) + 
  theme_light() 


eco6584_EU <- ggplot(data = eco_EU) +
  geom_mosaic(aes(weight = age6584, x = product(housing_gender),  fill=activity), na.rm=TRUE, show.legend = FALSE)  +
  scale_y_continuous(name="", limits=c(0, 1), labels = scales::percent) +
  scale_fill_manual(values = alpha(c("gray20", "gray50", "gray80", "black"), 0.85), name = "Labor force status", labels = c("Employed", "Unemployed", "Economically inactive", "Not stated")) +
  #facet_grid(gender~.) +
  labs(x ="") +
  ggtitle("65 to 84 years") + theme(plot.title = element_text(hjust = 0.5)) + 
  theme_light() 

## Not used: "divider=mosaic("v")"

fig_mosaic_eco <- grid.arrange(eco1529_EU, eco3049_EU, eco5064_EU, eco6584_EU, 
                               legend_eco, nrow = 3, ncol = 2,
                               layout_matrix = rbind(c(1,2), c(3,4), c(5,5)),
                               widths = c(5, 5), heights = c(3, 3, 0.5))

## Plotting the figure
fig_mosaic_eco