variances
d2011 <- read.table("../data-raw/popyt-2011", header = F, sep = ";", dec = ",", stringsAsFactors = F) %>%
select(sekcja = V1, prec = V3) %>%
mutate(rok = 2011)
d2013 <- read_excel("../data-raw/PW_popyt_na_prace_w_2013.xls", col_names = F, skip = 31) %>%
na.omit() %>%
mutate(sekcja = d2011$sekcja) %>%
select(sekcja, prec = 3)%>%
mutate(rok = 2013)
d2014 <- read_excel("../data-raw/popyt_na_prace_2014.xls", col_names = F, skip = 31) %>%
na.omit() %>%
mutate(sekcja = d2011$sekcja) %>%
select(sekcja, prec = 3) %>%
mutate(rok = 2014)
precs <- bind_rows(d2011,d2013, d2014)
precs %>%
spread(rok, prec) %>%
add_row(sekcja = "overall", `2011` = 3.40, `2013` = 4.01, `2014` = 3.98, .before = 1) %>%
xtable(caption = "Estimates on relative standard erros of estimators for vacancies of the demand for labour in IV quarter 2011, 2013 and 2014",
label = "tab-rel-var") %>%
print.xtable(include.rownames = F,
caption.placement = "top")
Read the data
res_cmcgreg1 <- readRDS(file = "../results/res_glm_kod2.rds")
Error: vector memory exhausted (limit reached?)
totals %>%
count(b, rok, wt= hat_wolne) %>%
group_by(rok) %>%
summarise(m = round(mean(n))) %>%
spread(rok, m) %>%
xtable(digits = 0, caption = "Estimated total number of vacancies at the end of 1Q based on the DL survey") %>%
print.xtable(include.rownames = F, caption.placement = "top")
% latex table generated in R 3.5.1 by xtable 1.8-4 package
% Sat Jul 27 09:19:44 2019
\begin{table}[ht]
\centering
\caption{Estimated total number of vacancies at the end of 1Q based on the DL survey}
\begin{tabular}{rrr}
\hline
2011 & 2013 & 2014 \\
\hline
71775 & 42889 & 52725 \\
\hline
\end{tabular}
\end{table}
totals %>%
count(b, rok, kod2, wt = hat_wolne) %>%
add_count(b, rok, wt = n, name = "total") %>%
mutate(p = n / total*100) %>%
group_by(kod2) %>%
summarise(pop = mean(p)) %>%
left_join(
final_data %>%
count(rok, kod2) %>%
add_count(rok, wt = n, name = "total") %>%
mutate(p = n / total*100) %>%
group_by(kod2) %>%
summarise(bkl = mean(p))
) %>%
xtable(digits = 2,
caption = "Distribution of occupancy (2 digit codes) in Population and BKL data (average over 2011, 2013 and 2014)") %>%
print.xtable(caption.placement = "top", include.rownames = F)
Joining, by = "kod2"
% latex table generated in R 3.5.1 by xtable 1.8-4 package
% Fri Jul 26 23:42:04 2019
\begin{table}[ht]
\centering
\caption{Distribution of occupancy (2 digit codes) in Population and BKL data (average over 2011, 2013 and 2014)}
\begin{tabular}{lrr}
\hline
kod2 & pop & bkl \\
\hline
11 & 0.49 & 1.68 \\
12 & 1.70 & 2.22 \\
13 & 1.27 & 2.02 \\
14 & 0.29 & 2.78 \\
21 & 4.45 & 4.09 \\
22 & 3.33 & 1.47 \\
23 & 0.91 & 2.00 \\
24 & 6.73 & 14.65 \\
25 & 3.94 & 8.17 \\
26 & 0.71 & 0.91 \\
31 & 1.51 & 1.50 \\
32 & 0.79 & 0.58 \\
33 & 4.37 & 19.33 \\
34 & 0.97 & 0.53 \\
35 & 1.73 & 0.78 \\
41 & 1.41 & 1.82 \\
42 & 5.20 & 2.53 \\
43 & 1.28 & 1.46 \\
44 & 2.52 & 0.51 \\
51 & 2.41 & 3.10 \\
52 & 8.79 & 16.49 \\
54 & 1.28 & 1.16 \\
71 & 9.53 & 1.71 \\
72 & 7.09 & 2.36 \\
73 & 0.72 & 0.25 \\
74 & 2.09 & 1.23 \\
75 & 5.64 & 1.18 \\
81 & 2.71 & 0.35 \\
82 & 2.72 & 0.21 \\
83 & 7.99 & 1.67 \\
91 & 1.35 & 0.19 \\
93 & 2.20 & 0.49 \\
94 & 1.26 & 0.26 \\
96 & 0.60 & 0.31 \\
\hline
\end{tabular}
\end{table}
totals %>%
group_by(rok, b, kod2) %>%
summarise(N = sum(p*hat_wolne)) %>%
group_by(rok, kod2) %>%
summarise(cv = sd(N)/mean(N)*100,
m = mean(N)) %>%
group_by(kod2) %>%
summarise(cv = mean(cv)) %>%
xtable()
% latex table generated in R 3.5.1 by xtable 1.8-4 package
% Fri Jul 26 22:52:13 2019
\begin{table}[ht]
\centering
\begin{tabular}{rlr}
\hline
& kod2 & cv \\
\hline
1 & 11 & 4.50 \\
2 & 12 & 4.53 \\
3 & 13 & 9.56 \\
4 & 14 & 11.84 \\
5 & 21 & 4.64 \\
6 & 22 & 6.45 \\
7 & 23 & 10.75 \\
8 & 24 & 3.74 \\
9 & 25 & 8.10 \\
10 & 26 & 5.54 \\
11 & 31 & 6.03 \\
12 & 32 & 6.31 \\
13 & 33 & 3.67 \\
14 & 34 & 5.66 \\
15 & 35 & 6.73 \\
16 & 41 & 2.90 \\
17 & 42 & 6.70 \\
18 & 43 & 6.63 \\
19 & 44 & 4.09 \\
20 & 51 & 16.14 \\
21 & 52 & 15.35 \\
22 & 54 & 13.77 \\
23 & 71 & 17.09 \\
24 & 72 & 5.27 \\
25 & 73 & 5.76 \\
26 & 74 & 13.99 \\
27 & 75 & 5.35 \\
28 & 81 & 5.19 \\
29 & 82 & 6.27 \\
30 & 83 & 9.23 \\
31 & 91 & 10.09 \\
32 & 93 & 8.52 \\
33 & 94 & 19.34 \\
34 & 96 & 5.41 \\
\hline
\end{tabular}
\end{table}
Change format of the data
res_calib_t <- transpose(res_calib)
res_lasso1_t <- transpose(res_lasso1)
res_lasso2_t <- transpose(res_lasso2)
res_alasso1_t <- transpose(res_alasso1)
res_cmcgreg1_t <- transpose(res_cmcgreg1)
res_cmcgreg1_t <- readRDS("../results/res_cmcgreg1_wynik_est.rds")
results <- bind_rows(wyn_naive,
wyn_calib_kod2,
wyn_lasso1_kod2,
wyn_lasso2_kod2,
wyn_alasso1_kod2,
wyn_ecmc_kod2) %>%
mutate(komp = case_when(komp == "komp_kulturalne" ~ "Artistic",
komp == "komp_dyspozycyjne"~ "Availability",
komp == "komp_kognitywne" ~ "Cognitive",
komp == "komp_komputerowe" ~ "Computer",
komp == "komp_interpersonalne" ~ "Interpersonal",
komp == "komp_kierownicze" ~ "Managerial",
komp == "komp_matematyczne" ~ "Mathematical",
komp == "komp_biurowe" ~ "Office",
komp == "komp_fizyczne" ~ "Physical",
komp == "komp_indywidualne" ~ "Self-organization",
komp == "komp_techniczne" ~ "Technical"))
results %>%
group_by(komp, estim) %>%
summarise(m = mean(m)*100) %>%
spread(estim, m) %>%
select(comp = komp, naive, greg, ecmc, lasso1, lasso2, alasso1) %>%
xtable(digits = 1,
caption = "Point estimates of fraction of skills for the pooled sample for 2011, 2013 and 2014",
label = "tab-results-pool") %>%
print.xtable(include.rownames = F,
caption.placement = "top")
% latex table generated in R 3.5.1 by xtable 1.8-4 package
% Fri Jul 26 22:11:46 2019
\begin{table}[ht]
\centering
\caption{Point estimates of fraction of skills for the pooled sample for 2011, 2013 and 2014}
\label{tab-results-pool}
\begin{tabular}{lrrrrrr}
\hline
comp & naive & greg & ecmc & lasso1 & lasso2 & alasso1 \\
\hline
Artistic & 15.8 & 12.3 & 12.4 & 12.5 & 13.0 & 12.5 \\
Availability & 20.9 & 19.8 & 19.7 & 19.6 & 21.5 & 19.5 \\
Cognitive & 20.9 & 14.3 & 14.3 & 14.6 & 14.0 & 14.6 \\
Computer & 33.0 & 22.2 & 22.0 & 22.3 & 23.0 & 22.6 \\
Interpersonal & 53.8 & 34.5 & 34.5 & 35.1 & 35.0 & 34.9 \\
Managerial & 26.2 & 16.7 & 16.5 & 16.8 & 17.7 & 16.8 \\
Mathematical & 0.4 & 0.4 & 0.4 & 0.4 & 0.4 & 0.4 \\
Office & 3.9 & 3.1 & 3.1 & 3.2 & 3.4 & 3.2 \\
Physical & 5.4 & 7.4 & 7.6 & 7.5 & 8.2 & 7.6 \\
Self-organization & 58.6 & 43.8 & 43.5 & 43.9 & 46.2 & 43.8 \\
Technical & 4.3 & 7.5 & 7.7 & 7.7 & 8.3 & 7.7 \\
\hline
\end{tabular}
\end{table}
results %>%
group_by(komp, estim) %>%
summarise(m = sd(m)/mean(m)*100) %>%
spread(estim, m) %>%
select(comp = komp, greg, ecmc, lasso1, lasso2, alasso1) %>%
xtable(digits = 1,
caption = "Average estimates of relative standard errors for skills for over 2011, 2013 and 2014",
label = "tab-results-pool") %>%
print.xtable(include.rownames = F,
caption.placement = "top")
% latex table generated in R 3.5.1 by xtable 1.8-4 package
% Fri Jul 26 22:12:57 2019
\begin{table}[ht]
\centering
\caption{Average estimates of relative standard errors for skills for over 2011, 2013 and 2014}
\label{tab-results-pool}
\begin{tabular}{lrrrrr}
\hline
comp & greg & ecmc & lasso1 & lasso2 & alasso1 \\
\hline
Artistic & 11.1 & 3.5 & 3.4 & 3.4 & 3.4 \\
Availability & 22.0 & 1.0 & 0.9 & 1.5 & 1.0 \\
Cognitive & 25.4 & 8.5 & 8.1 & 9.3 & 8.2 \\
Computer & 24.9 & 12.9 & 12.4 & 12.7 & 12.4 \\
Interpersonal & 17.6 & 6.6 & 6.3 & 6.6 & 6.4 \\
Managerial & 15.3 & 5.6 & 5.3 & 5.5 & 5.4 \\
Mathematical & 15.6 & 4.1 & 4.0 & 3.2 & 4.1 \\
Office & 33.5 & 4.7 & 4.4 & 4.4 & 4.6 \\
Physical & 32.6 & 4.1 & 4.2 & 4.7 & 4.3 \\
Self-organization & 16.7 & 3.8 & 3.6 & 3.5 & 3.6 \\
Technical & 25.1 & 5.3 & 5.2 & 7.8 & 5.2 \\
\hline
\end{tabular}
\end{table}
ggsave(plot = p, file = "../results/fig-estims.png", width = 13)
Saving 13 x 7 in image
results %>%
group_by(komp, estim) %>%
summarise(m = mean(m)) %>%
group_by(estim) %>%
mutate(r = 12- rank(m, ties.method = "average")) %>%
select(-m) %>%
spread(estim, r) %>%
arrange(naive) %>%
select(komp, naive, greg:lasso2)
AUC
data.frame(lasso1 = do.call('cbind',auc_lasso1) %>% apply(., 2, mean),
lasso2 = do.call('cbind',auc_lasso2) %>% apply(., 2, mean),
alasso1 = do.call('cbind',auc_alasso1) %>% apply(., 2, mean),
komp = komps)%>%
mutate(komp = case_when(komp == "komp_kulturalne" ~ "Artistic",
komp == "komp_dyspozycyjne"~ "Availability",
komp == "komp_kognitywne" ~ "Cognitive",
komp == "komp_komputerowe" ~ "Computer",
komp == "komp_interpersonalne" ~ "Interpersonal",
komp == "komp_kierownicze" ~ "Managerial",
komp == "komp_matematyczne" ~ "Mathematical",
komp == "komp_biurowe" ~ "Office",
komp == "komp_fizyczne" ~ "Physical",
komp == "komp_indywidualne" ~ "Self-organization",
komp == "komp_techniczne" ~ "Technical")) %>%
select(komp, lasso1, lasso2,alasso1) %>%
xtable(digits = 3,
caption = "Quality of the model measured by Area Under Curve (AUC; average over 500 boostrap replicated)",
label = "tab-estim-auc") %>%
print.xtable(include.rownames = F,
caption.placement = "top")
% latex table generated in R 3.5.1 by xtable 1.8-4 package
% Fri Jul 26 22:27:48 2019
\begin{table}[ht]
\centering
\caption{Quality of the model measured by Area Under Curve (AUC; average over 500 boostrap replicated)}
\label{tab-estim-auc}
\begin{tabular}{lrrr}
\hline
komp & lasso1 & lasso2 & alasso1 \\
\hline
Technical & 0.829 & 0.846 & 0.829 \\
Mathematical & 0.784 & 0.818 & 0.784 \\
Artistic & 0.665 & 0.672 & 0.665 \\
Computer & 0.748 & 0.755 & 0.748 \\
Cognitive & 0.644 & 0.654 & 0.644 \\
Managerial & 0.722 & 0.731 & 0.722 \\
Interpersonal & 0.731 & 0.750 & 0.731 \\
Self-organization & 0.695 & 0.708 & 0.695 \\
Physical & 0.687 & 0.713 & 0.687 \\
Availability & 0.605 & 0.635 & 0.604 \\
Office & 0.671 & 0.681 & 0.670 \\
\hline
\end{tabular}
\end{table}
