Packages

library(tidyverse)
── Attaching packages ────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.2.0     ✔ purrr   0.3.2
✔ tibble  2.1.3     ✔ dplyr   0.8.1
✔ tidyr   0.8.3     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.4.0
── Conflicts ───────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(xtable)
library(readxl)

variances

d2011 <- read.table("../data-raw/popyt-2011", header = F, sep  = ";", dec = ",", stringsAsFactors = F) %>%
  select(sekcja = V1, prec = V3) %>%
  mutate(rok = 2011)

d2013 <- read_excel("../data-raw/PW_popyt_na_prace_w_2013.xls", col_names = F, skip = 31) %>%
  na.omit() %>%
  mutate(sekcja = d2011$sekcja) %>%
  select(sekcja, prec = 3)%>%
  mutate(rok = 2013)
  
d2014 <- read_excel("../data-raw/popyt_na_prace_2014.xls", col_names = F, skip = 31) %>%
  na.omit() %>%
  mutate(sekcja = d2011$sekcja) %>%
  select(sekcja, prec = 3) %>%
  mutate(rok = 2014)

precs <- bind_rows(d2011,d2013, d2014)

precs %>% 
  spread(rok, prec) %>%
  add_row(sekcja = "overall", `2011` = 3.40, `2013` = 4.01, `2014` = 3.98, .before = 1) %>%
  xtable(caption = "Estimates on relative standard erros of estimators for vacancies of the demand for labour in IV quarter 2011, 2013 and 2014",
         label = "tab-rel-var") %>%
  print.xtable(include.rownames = F,
               caption.placement = "top")

Read the data

res_cmcgreg1 <- readRDS(file = "../results/res_glm_kod2.rds")
Error: vector memory exhausted (limit reached?)
totals %>%
  count(b, rok, wt= hat_wolne) %>%
  group_by(rok) %>%
  summarise(m = round(mean(n))) %>%
  spread(rok, m) %>%
  xtable(digits = 0, caption = "Estimated total number of vacancies at the end of 1Q based on the DL survey") %>%
  print.xtable(include.rownames = F, caption.placement = "top")
% latex table generated in R 3.5.1 by xtable 1.8-4 package
% Sat Jul 27 09:19:44 2019
\begin{table}[ht]
\centering
\caption{Estimated total number of vacancies at the end of 1Q based on the DL survey} 
\begin{tabular}{rrr}
  \hline
2011 & 2013 & 2014 \\ 
  \hline
71775 & 42889 & 52725 \\ 
   \hline
\end{tabular}
\end{table}
totals %>%
  count(b, rok, kod2, wt = hat_wolne) %>%
  add_count(b, rok, wt = n, name = "total") %>%
  mutate(p = n / total*100) %>%
  group_by(kod2) %>%
  summarise(pop = mean(p)) %>%
  left_join(
    final_data %>%
      count(rok, kod2) %>%
      add_count(rok, wt = n, name = "total") %>%
      mutate(p = n / total*100) %>%
      group_by(kod2) %>%
      summarise(bkl = mean(p))
  )  %>%
  xtable(digits = 2,
         caption = "Distribution of occupancy (2 digit codes) in Population and BKL data (average over 2011, 2013 and 2014)") %>%
  print.xtable(caption.placement = "top", include.rownames = F)
Joining, by = "kod2"
% latex table generated in R 3.5.1 by xtable 1.8-4 package
% Fri Jul 26 23:42:04 2019
\begin{table}[ht]
\centering
\caption{Distribution of occupancy (2 digit codes) in Population and BKL data (average over 2011, 2013 and 2014)} 
\begin{tabular}{lrr}
  \hline
kod2 & pop & bkl \\ 
  \hline
11 & 0.49 & 1.68 \\ 
  12 & 1.70 & 2.22 \\ 
  13 & 1.27 & 2.02 \\ 
  14 & 0.29 & 2.78 \\ 
  21 & 4.45 & 4.09 \\ 
  22 & 3.33 & 1.47 \\ 
  23 & 0.91 & 2.00 \\ 
  24 & 6.73 & 14.65 \\ 
  25 & 3.94 & 8.17 \\ 
  26 & 0.71 & 0.91 \\ 
  31 & 1.51 & 1.50 \\ 
  32 & 0.79 & 0.58 \\ 
  33 & 4.37 & 19.33 \\ 
  34 & 0.97 & 0.53 \\ 
  35 & 1.73 & 0.78 \\ 
  41 & 1.41 & 1.82 \\ 
  42 & 5.20 & 2.53 \\ 
  43 & 1.28 & 1.46 \\ 
  44 & 2.52 & 0.51 \\ 
  51 & 2.41 & 3.10 \\ 
  52 & 8.79 & 16.49 \\ 
  54 & 1.28 & 1.16 \\ 
  71 & 9.53 & 1.71 \\ 
  72 & 7.09 & 2.36 \\ 
  73 & 0.72 & 0.25 \\ 
  74 & 2.09 & 1.23 \\ 
  75 & 5.64 & 1.18 \\ 
  81 & 2.71 & 0.35 \\ 
  82 & 2.72 & 0.21 \\ 
  83 & 7.99 & 1.67 \\ 
  91 & 1.35 & 0.19 \\ 
  93 & 2.20 & 0.49 \\ 
  94 & 1.26 & 0.26 \\ 
  96 & 0.60 & 0.31 \\ 
   \hline
\end{tabular}
\end{table}
totals %>%
  group_by(rok, b, kod2) %>%
  summarise(N = sum(p*hat_wolne)) %>%
  group_by(rok, kod2) %>%
  summarise(cv = sd(N)/mean(N)*100,
            m = mean(N)) %>%
  group_by(kod2) %>%
  summarise(cv = mean(cv)) %>%
  xtable()
% latex table generated in R 3.5.1 by xtable 1.8-4 package
% Fri Jul 26 22:52:13 2019
\begin{table}[ht]
\centering
\begin{tabular}{rlr}
  \hline
 & kod2 & cv \\ 
  \hline
1 & 11 & 4.50 \\ 
  2 & 12 & 4.53 \\ 
  3 & 13 & 9.56 \\ 
  4 & 14 & 11.84 \\ 
  5 & 21 & 4.64 \\ 
  6 & 22 & 6.45 \\ 
  7 & 23 & 10.75 \\ 
  8 & 24 & 3.74 \\ 
  9 & 25 & 8.10 \\ 
  10 & 26 & 5.54 \\ 
  11 & 31 & 6.03 \\ 
  12 & 32 & 6.31 \\ 
  13 & 33 & 3.67 \\ 
  14 & 34 & 5.66 \\ 
  15 & 35 & 6.73 \\ 
  16 & 41 & 2.90 \\ 
  17 & 42 & 6.70 \\ 
  18 & 43 & 6.63 \\ 
  19 & 44 & 4.09 \\ 
  20 & 51 & 16.14 \\ 
  21 & 52 & 15.35 \\ 
  22 & 54 & 13.77 \\ 
  23 & 71 & 17.09 \\ 
  24 & 72 & 5.27 \\ 
  25 & 73 & 5.76 \\ 
  26 & 74 & 13.99 \\ 
  27 & 75 & 5.35 \\ 
  28 & 81 & 5.19 \\ 
  29 & 82 & 6.27 \\ 
  30 & 83 & 9.23 \\ 
  31 & 91 & 10.09 \\ 
  32 & 93 & 8.52 \\ 
  33 & 94 & 19.34 \\ 
  34 & 96 & 5.41 \\ 
   \hline
\end{tabular}
\end{table}

Change format of the data

res_calib_t <- transpose(res_calib)
res_lasso1_t <- transpose(res_lasso1)
res_lasso2_t <- transpose(res_lasso2)
res_alasso1_t <- transpose(res_alasso1)
res_cmcgreg1_t <- transpose(res_cmcgreg1)

res_cmcgreg1_t <- readRDS("../results/res_cmcgreg1_wynik_est.rds")
results <- bind_rows(wyn_naive,
                     wyn_calib_kod2, 
                     wyn_lasso1_kod2, 
                     wyn_lasso2_kod2,
                     wyn_alasso1_kod2,
                     wyn_ecmc_kod2) %>%
  mutate(komp = case_when(komp == "komp_kulturalne" ~ "Artistic",
                          komp == "komp_dyspozycyjne"~ "Availability",
                          komp == "komp_kognitywne" ~ "Cognitive",
                          komp == "komp_komputerowe" ~ "Computer",
                          komp == "komp_interpersonalne" ~ "Interpersonal",
                          komp == "komp_kierownicze" ~ "Managerial",
                          komp == "komp_matematyczne" ~ "Mathematical",
                          komp == "komp_biurowe" ~ "Office",
                          komp == "komp_fizyczne" ~ "Physical",
                          komp == "komp_indywidualne" ~ "Self-organization",
                          komp == "komp_techniczne"  ~ "Technical")) 
results %>%
  group_by(komp, estim) %>%
  summarise(m = mean(m)*100) %>%
  spread(estim, m) %>%
  select(comp = komp, naive, greg, ecmc, lasso1, lasso2, alasso1) %>%
  xtable(digits = 1, 
         caption = "Point estimates of fraction of skills for the pooled sample for 2011, 2013 and 2014",
         label = "tab-results-pool")  %>%
  print.xtable(include.rownames = F,
               caption.placement = "top")
% latex table generated in R 3.5.1 by xtable 1.8-4 package
% Fri Jul 26 22:11:46 2019
\begin{table}[ht]
\centering
\caption{Point estimates of fraction of skills for the pooled sample for 2011, 2013 and 2014} 
\label{tab-results-pool}
\begin{tabular}{lrrrrrr}
  \hline
comp & naive & greg & ecmc & lasso1 & lasso2 & alasso1 \\ 
  \hline
Artistic & 15.8 & 12.3 & 12.4 & 12.5 & 13.0 & 12.5 \\ 
  Availability & 20.9 & 19.8 & 19.7 & 19.6 & 21.5 & 19.5 \\ 
  Cognitive & 20.9 & 14.3 & 14.3 & 14.6 & 14.0 & 14.6 \\ 
  Computer & 33.0 & 22.2 & 22.0 & 22.3 & 23.0 & 22.6 \\ 
  Interpersonal & 53.8 & 34.5 & 34.5 & 35.1 & 35.0 & 34.9 \\ 
  Managerial & 26.2 & 16.7 & 16.5 & 16.8 & 17.7 & 16.8 \\ 
  Mathematical & 0.4 & 0.4 & 0.4 & 0.4 & 0.4 & 0.4 \\ 
  Office & 3.9 & 3.1 & 3.1 & 3.2 & 3.4 & 3.2 \\ 
  Physical & 5.4 & 7.4 & 7.6 & 7.5 & 8.2 & 7.6 \\ 
  Self-organization & 58.6 & 43.8 & 43.5 & 43.9 & 46.2 & 43.8 \\ 
  Technical & 4.3 & 7.5 & 7.7 & 7.7 & 8.3 & 7.7 \\ 
   \hline
\end{tabular}
\end{table}
results %>%
  group_by(komp, estim) %>%
  summarise(m = sd(m)/mean(m)*100) %>%
  spread(estim, m) %>%
  select(comp = komp, greg, ecmc, lasso1, lasso2, alasso1) %>%
  xtable(digits = 1, 
         caption = "Average estimates of relative standard errors for skills for over 2011, 2013 and 2014",
         label = "tab-results-pool")  %>%
  print.xtable(include.rownames = F,
               caption.placement = "top")
% latex table generated in R 3.5.1 by xtable 1.8-4 package
% Fri Jul 26 22:12:57 2019
\begin{table}[ht]
\centering
\caption{Average estimates of relative standard errors for skills for over 2011, 2013 and 2014} 
\label{tab-results-pool}
\begin{tabular}{lrrrrr}
  \hline
comp & greg & ecmc & lasso1 & lasso2 & alasso1 \\ 
  \hline
Artistic & 11.1 & 3.5 & 3.4 & 3.4 & 3.4 \\ 
  Availability & 22.0 & 1.0 & 0.9 & 1.5 & 1.0 \\ 
  Cognitive & 25.4 & 8.5 & 8.1 & 9.3 & 8.2 \\ 
  Computer & 24.9 & 12.9 & 12.4 & 12.7 & 12.4 \\ 
  Interpersonal & 17.6 & 6.6 & 6.3 & 6.6 & 6.4 \\ 
  Managerial & 15.3 & 5.6 & 5.3 & 5.5 & 5.4 \\ 
  Mathematical & 15.6 & 4.1 & 4.0 & 3.2 & 4.1 \\ 
  Office & 33.5 & 4.7 & 4.4 & 4.4 & 4.6 \\ 
  Physical & 32.6 & 4.1 & 4.2 & 4.7 & 4.3 \\ 
  Self-organization & 16.7 & 3.8 & 3.6 & 3.5 & 3.6 \\ 
  Technical & 25.1 & 5.3 & 5.2 & 7.8 & 5.2 \\ 
   \hline
\end{tabular}
\end{table}
ggsave(plot = p, file = "../results/fig-estims.png", width = 13)
Saving 13 x 7 in image
results %>%
  group_by(komp, estim) %>%
  summarise(m = mean(m)) %>%
  group_by(estim) %>%
  mutate(r = 12- rank(m, ties.method = "average")) %>%
  select(-m) %>%
  spread(estim, r) %>%
  arrange(naive) %>%
  select(komp, naive, greg:lasso2)

AUC

data.frame(lasso1 = do.call('cbind',auc_lasso1) %>% apply(., 2, mean),
           lasso2 = do.call('cbind',auc_lasso2) %>% apply(., 2, mean),
           alasso1 = do.call('cbind',auc_alasso1) %>% apply(., 2, mean),
           komp = komps)%>%
  mutate(komp = case_when(komp == "komp_kulturalne" ~ "Artistic",
                          komp == "komp_dyspozycyjne"~ "Availability",
                          komp == "komp_kognitywne" ~ "Cognitive",
                          komp == "komp_komputerowe" ~ "Computer",
                          komp == "komp_interpersonalne" ~ "Interpersonal",
                          komp == "komp_kierownicze" ~ "Managerial",
                          komp == "komp_matematyczne" ~ "Mathematical",
                          komp == "komp_biurowe" ~ "Office",
                          komp == "komp_fizyczne" ~ "Physical",
                          komp == "komp_indywidualne" ~ "Self-organization",
                          komp == "komp_techniczne"  ~ "Technical"))  %>%
  select(komp, lasso1, lasso2,alasso1) %>%
  xtable(digits = 3, 
         caption = "Quality of the model measured by Area Under Curve (AUC; average over 500 boostrap replicated)",
         label = "tab-estim-auc") %>%
  print.xtable(include.rownames = F,
               caption.placement = "top")
% latex table generated in R 3.5.1 by xtable 1.8-4 package
% Fri Jul 26 22:27:48 2019
\begin{table}[ht]
\centering
\caption{Quality of the model measured by Area Under Curve (AUC; average over 500 boostrap replicated)} 
\label{tab-estim-auc}
\begin{tabular}{lrrr}
  \hline
komp & lasso1 & lasso2 & alasso1 \\ 
  \hline
Technical & 0.829 & 0.846 & 0.829 \\ 
  Mathematical & 0.784 & 0.818 & 0.784 \\ 
  Artistic & 0.665 & 0.672 & 0.665 \\ 
  Computer & 0.748 & 0.755 & 0.748 \\ 
  Cognitive & 0.644 & 0.654 & 0.644 \\ 
  Managerial & 0.722 & 0.731 & 0.722 \\ 
  Interpersonal & 0.731 & 0.750 & 0.731 \\ 
  Self-organization & 0.695 & 0.708 & 0.695 \\ 
  Physical & 0.687 & 0.713 & 0.687 \\ 
  Availability & 0.605 & 0.635 & 0.604 \\ 
  Office & 0.671 & 0.681 & 0.670 \\ 
   \hline
\end{tabular}
\end{table}
---
title: "R Notebook"
output: html_notebook
---

Packages

```{r}
library(tidyverse)
library(xtable)
library(readxl)
```


# variances

```{r}
d2011 <- read.table("../data-raw/popyt-2011", header = F, sep  = ";", dec = ",", stringsAsFactors = F) %>%
  select(sekcja = V1, prec = V3) %>%
  mutate(rok = 2011)

d2013 <- read_excel("../data-raw/PW_popyt_na_prace_w_2013.xls", col_names = F, skip = 31) %>%
  na.omit() %>%
  mutate(sekcja = d2011$sekcja) %>%
  select(sekcja, prec = 3)%>%
  mutate(rok = 2013)
  
d2014 <- read_excel("../data-raw/popyt_na_prace_2014.xls", col_names = F, skip = 31) %>%
  na.omit() %>%
  mutate(sekcja = d2011$sekcja) %>%
  select(sekcja, prec = 3) %>%
  mutate(rok = 2014)

precs <- bind_rows(d2011,d2013, d2014)

precs %>% 
  spread(rok, prec) %>%
  add_row(sekcja = "overall", `2011` = 3.40, `2013` = 4.01, `2014` = 3.98, .before = 1) %>%
  xtable(caption = "Estimates on relative standard erros of estimators for vacancies of the demand for labour in IV quarter 2011, 2013 and 2014",
         label = "tab-rel-var") %>%
  print.xtable(include.rownames = F,
               caption.placement = "top")
```

Read the data

```{r}
res_calib <- readRDS(file = "../results/res_calib.rds")
res_lasso1 <- readRDS(file = "../results/res_lasso_kod2.rds")
res_lasso2 <- readRDS(file = "../results/res_lasso_kod2_nace.rds")
res_alasso1 <- readRDS(file = "../results/res_alasso_kod2.rds")
#res_cmcgreg1 <- readRDS(file = "../results/res_glm_kod2.rds")
```

```{r}


totals <- readRDS("../data/gus-woj-sek-boot-totals.rds") %>%
  filter(substr(kod2,1,1) != 6) %>%
  mutate(kod2 = ifelse(kod2 == 95, 96, kod2),
         kod2 = ifelse(kod2 == 92, 91, kod2),
         kod2 = ifelse(kod2 == 53, 54, kod2),
         #kod1 = as.character(kod1),
         kod2 = as.character(kod2),
         rok = as.character(rok)) 

final_data <- readRDS("../data/bkl-final.rds") %>%
  filter(zawod1 != 6, rok!=2012, nace %in% unique(totals$sekcja)) %>%
  mutate(zawod2 = ifelse(zawod2 == 95, 96, zawod2),
         zawod2 = ifelse(zawod2 == 92, 91, zawod2),
         zawod2 = ifelse(zawod2 == 99, 96, zawod2),
         zawod2 = ifelse(zawod2 == 53, 54, zawod2),
         zeros = str_extract(zawod6, "0+$"),
         zeros = str_count(zeros, "0"),
         zeros = ifelse(is.na(zeros), 6, 6-zeros),
         rok = as.character(rok)) %>%
  rename(kod1 = zawod1,
         kod2 = zawod2,
         kod6 = zawod6,
         sekcja = nace)  %>%
  filter(zeros > 1)  ## 138 records

```

```{r}
final_data %>%
  count(zeros)
```

```{r}
totals %>%
  count(b, rok, wt= hat_wolne) %>%
  group_by(rok) %>%
  summarise(m = round(mean(n))) %>%
  spread(rok, m) %>%
  xtable(digits = 0, caption = "Estimated total number of vacancies at the end of 1Q based on the DL survey") %>%
  print.xtable(include.rownames = F, caption.placement = "top")
```

```{r}
totals %>%
  count(b, rok, kod2, wt = hat_wolne) %>%
  add_count(b, rok, wt = n, name = "total") %>%
  mutate(p = n / total*100) %>%
  group_by(kod2) %>%
  summarise(pop = mean(p)) %>%
  left_join(
    final_data %>%
      count(rok, kod2) %>%
      add_count(rok, wt = n, name = "total") %>%
      mutate(p = n / total*100) %>%
      group_by(kod2) %>%
      summarise(bkl = mean(p))
  )  %>%
  xtable(digits = 2,
         caption = "Distribution of occupancy (2 digit codes) in Population and BKL data (average over 2011, 2013 and 2014)") %>%
  print.xtable(caption.placement = "top", include.rownames = F)
```


```{r}
totals %>%
  group_by(rok, b, kod2) %>%
  summarise(N = sum(p*hat_wolne)) %>%
  group_by(rok, kod2) %>%
  summarise(cv = sd(N)/mean(N)*100,
            m = mean(N)) %>%
  group_by(rok) %>%
  do(m = broom::tidy(summary(.$cv))) %>%
  unnest() %>%
  xtable(caption = "Estimates on relative standard erros of estimators for vacancies by 
         occupation (2 digit code) in IV quarter 2011, 2013 and 2014", 
         label = "tab-occup-var") %>%
  print.xtable(caption.placement = "top",
               include.rownames = F)

totals %>%
  group_by(rok, b, kod2) %>%
  summarise(N = sum(p*hat_wolne)) %>%
  group_by(rok, kod2) %>%
  summarise(cv = sd(N)/mean(N)*100,
            m = mean(N)) %>%
  group_by(kod2) %>%
  summarise(cv = mean(cv)) %>%
  xtable()
```

Change format of the data

```{r}
res_calib_t <- transpose(res_calib)
res_lasso1_t <- transpose(res_lasso1)
res_lasso2_t <- transpose(res_lasso2)
res_alasso1_t <- transpose(res_alasso1)
res_cmcgreg1_t <- transpose(res_cmcgreg1)

res_cmcgreg1_t <- readRDS("../results/res_cmcgreg1_wynik_est.rds")
```

```{r}
res_calib_t$calib_kod2 %>%
  bind_rows(.id = "boot") %>%
  gather(komp, vals, komp_techniczne:komp_biurowe) %>%
  arrange(komp, boot) %>%
  group_by(komp, rok) %>%
  summarise(m = mean(vals),
            bias = mean(vals - mean(vals)),
            sd = sd(vals),
            rmse = sd^2 + bias^2,
            cv = sqrt(rmse)/m*100,
            q025 = quantile(vals,0.025),
            q975 = quantile(vals,0.975),
            estim = "greg") -> wyn_calib_kod2


res_lasso1_t$wynik_est %>%
  bind_rows(.id = "boot") %>%
  gather(komp, vals, komp_techniczne:komp_biurowe) %>% 
  mutate(vals = as.numeric(vals)) %>%
  arrange(komp, boot) %>%
  group_by(komp, rok) %>%
  summarise(m = mean(vals),
            bias = mean(vals - mean(vals)),
            sd = sd(vals),
            rmse = sd^2 + bias^2,
            cv = sqrt(rmse)/m*100,
            q025 = quantile(vals,0.025),
            q975 = quantile(vals,0.975),
            estim = "lasso1") -> wyn_lasso1_kod2


res_lasso2_t$wynik_est %>%
  bind_rows(.id = "boot") %>%
  gather(komp, vals, komp_techniczne:komp_biurowe) %>%
  arrange(komp, boot) %>%
  group_by(komp, rok) %>%
  summarise(m = mean(vals),
            bias = mean(vals - mean(vals)),
            sd = sd(vals),
            rmse = sd^2 + bias^2,
            cv = sqrt(rmse)/m*100,
            q025 = quantile(vals,0.025),
            q975 = quantile(vals,0.975),
            estim = "lasso2") -> wyn_lasso2_kod2

res_alasso1_t$wynik_est %>%
  bind_rows(.id = "boot") %>%
  select(boot, rok, starts_with("komp")) %>%
  gather(komp, vals, komp_techniczne:komp_biurowe) %>%
  arrange(komp, boot) %>%
  group_by(komp, rok) %>%
  summarise(m = mean(vals),
            bias = mean(vals - mean(vals)),
            sd = sd(vals),
            rmse = sd^2 + bias^2,
            cv = sqrt(rmse)/m*100,
            q025 = quantile(vals,0.025),
            q975 = quantile(vals,0.975),
            estim = "alasso1") -> wyn_alasso1_kod2

res_cmcgreg1_t %>%
  bind_rows(.id = "boot") %>%
  select(boot, rok, starts_with("komp")) %>%
  gather(komp, vals, komp_techniczne:komp_biurowe) %>%
  arrange(komp, boot) %>%
  group_by(komp, rok) %>%
  summarise(m = mean(vals),
            bias = mean(vals - mean(vals)),
            sd = sd(vals),
            rmse = sd^2 + bias^2,
            cv = sqrt(rmse)/m*100,
            q025 = quantile(vals,0.025),
            q975 = quantile(vals,0.975),
            estim = "ecmc") -> wyn_ecmc_kod2

final_data %>%
  select(rok, komp_techniczne:komp_biurowe) %>%
  gather(komp, val, -rok) %>%
  group_by(rok,komp) %>%
  summarise(m = mean(val),
            estim = "naive") -> wyn_naive
```


```{r}
results <- bind_rows(wyn_naive,
                     wyn_calib_kod2, 
                     wyn_lasso1_kod2, 
                     wyn_lasso2_kod2,
                     wyn_alasso1_kod2,
                     wyn_ecmc_kod2) %>%
  mutate(komp = case_when(komp == "komp_kulturalne" ~ "Artistic",
                          komp == "komp_dyspozycyjne"~ "Availability",
                          komp == "komp_kognitywne" ~ "Cognitive",
                          komp == "komp_komputerowe" ~ "Computer",
                          komp == "komp_interpersonalne" ~ "Interpersonal",
                          komp == "komp_kierownicze" ~ "Managerial",
                          komp == "komp_matematyczne" ~ "Mathematical",
                          komp == "komp_biurowe" ~ "Office",
                          komp == "komp_fizyczne" ~ "Physical",
                          komp == "komp_indywidualne" ~ "Self-organization",
                          komp == "komp_techniczne"  ~ "Technical")) 
```

```{r}
results %>%
  group_by(komp, estim) %>%
  summarise(m = mean(m)*100) %>%
  spread(estim, m) %>%
  select(comp = komp, naive, greg, ecmc, lasso1, lasso2, alasso1) %>%
  xtable(digits = 1, 
         caption = "Point estimates of fraction of skills for the pooled sample for 2011, 2013 and 2014",
         label = "tab-results-pool")  %>%
  print.xtable(include.rownames = F,
               caption.placement = "top")
``` 

```{r}
results %>%
  group_by(komp, estim) %>%
  summarise(m = sd(m)/mean(m)*100) %>%
  spread(estim, m) %>%
  select(comp = komp, greg, ecmc, lasso1, lasso2, alasso1) %>%
  xtable(digits = 1, 
         caption = "Average estimates of relative standard errors for skills for over 2011, 2013 and 2014",
         label = "tab-results-pool")  %>%
  print.xtable(include.rownames = F,
               caption.placement = "top")
```

```{r}
results %>%
  mutate(estim = toupper(estim),
         estim = factor(estim, c("NAIVE", "GREG","ECMC", "LASSO1","LASSO2","ALASSO1"),
                        c("HTSRS","MCGREG","ECMC", "ECLASSO1","ECLASSO2","ECALASSO1"))) %>%
  ggplot(data = ., aes(x = estim, y = m, ymin = q025, ymax = q975, color = estim, group = estim)) +
  geom_errorbar(position = position_dodge(width = 1)) +
  scale_color_brewer(type = "qual", palette = "Set1", name = "Estimators") +
  geom_point(position = position_dodge(width = 1)) +
  facet_grid(rok~komp) +
  theme_bw() +
  scale_y_continuous(label=scales::percent) +
  labs(x = "Estimators", y = "Point estimates")  +
  theme(axis.text.x = element_text(angle = 45, vjust =1, hjust= 1),
        legend.position = "bottom") -> p


ggsave(plot = p, file = "../results/fig-estims.png", width = 13)

```

```{r}
results %>%
  mutate(estim = toupper(estim),
         estim = factor(estim, c("NAIVE", "GREG","ECMC", "LASSO1","LASSO2","ALASSO1"),
                        c("HTSRS","MCGREG","ECMC", "ECLASSO1","ECLASSO2","ECALASSO1"))) %>%
  ggplot(data = ., aes(x = rok, y = m, ymin = q025, ymax = q975, color = estim, group = estim)) +
  geom_point() +
  geom_line() + 
  facet_wrap(~komp) +
  scale_color_brewer(type = "qual", palette = "Set1", name = "Estimators") +
  theme_bw() +
  scale_y_continuous(label=scales::percent) +
  labs(x = "Year", y = "Point estimate") -> p

ggsave(plot = p, file = "../results/fig-dists.png", height = 5)
```

```{r}
results %>%
  group_by(komp, estim) %>%
  summarise(m = mean(m)) %>%
  group_by(estim) %>%
  mutate(r = 12- rank(m, ties.method = "average")) %>%
  select(-m) %>%
  spread(estim, r) %>%
  arrange(naive) %>%
  select(komp, naive, greg:lasso2)
```

AUC
```{r}

komps <- c("komp_techniczne", "komp_matematyczne", "komp_kulturalne", "komp_komputerowe", 
                             "komp_kognitywne", "komp_kierownicze", "komp_interpersonalne", 
                             "komp_indywidualne", "komp_fizyczne", "komp_dyspozycyjne", "komp_biurowe")

auc_lasso1 <- list()
auc_lasso2 <- list()
auc_alasso1 <- list()
for (i in 1:11) {
  auc_lasso1[[i]] <- res_lasso1_t$wynik_model %>%
    map_dbl(~.[[i]]$metrics[.[[i]]$best.metric.index,] %>% max())
  auc_lasso2[[i]] <- res_lasso2_t$wynik_model %>%
    map_dbl(~.[[i]]$metrics[.[[i]]$best.metric.index,] %>% max())
  auc_alasso1[[i]] <- res_alasso1_t$wynik_model %>%
    map_dbl(~.[[i]]$metrics[.[[i]]$best.metric.index,] %>% max())
} 


data.frame(lasso1 = do.call('cbind',auc_lasso1) %>% apply(., 2, mean),
           lasso2 = do.call('cbind',auc_lasso2) %>% apply(., 2, mean),
           alasso1 = do.call('cbind',auc_alasso1) %>% apply(., 2, mean),
           komp = komps)%>%
  mutate(komp = case_when(komp == "komp_kulturalne" ~ "Artistic",
                          komp == "komp_dyspozycyjne"~ "Availability",
                          komp == "komp_kognitywne" ~ "Cognitive",
                          komp == "komp_komputerowe" ~ "Computer",
                          komp == "komp_interpersonalne" ~ "Interpersonal",
                          komp == "komp_kierownicze" ~ "Managerial",
                          komp == "komp_matematyczne" ~ "Mathematical",
                          komp == "komp_biurowe" ~ "Office",
                          komp == "komp_fizyczne" ~ "Physical",
                          komp == "komp_indywidualne" ~ "Self-organization",
                          komp == "komp_techniczne"  ~ "Technical"))  %>%
  select(komp, lasso1, lasso2,alasso1) %>%
  xtable(digits = 3, 
         caption = "Quality of the model measured by Area Under Curve (AUC; average over 500 boostrap replicated)",
         label = "tab-estim-auc") %>%
  print.xtable(include.rownames = F,
               caption.placement = "top")
  



```

```{r}
final_data %>%
  group_by(rok) %>%
  summarise_at(vars(contains("komp")),sum)
```

