library(tidyverse)
library(data.table)
data.table 1.12.2 using 4 threads (see ?getDTthreads). Latest news: r-datatable.com
Attaching package: ‘data.table’
The following objects are masked from ‘package:dplyr’:
between, first, last
The following object is masked from ‘package:purrr’:
transpose
library(readxl)
library(VIM)
Loading required package: colorspace
VIM is ready to use.
Since version 4.0.0 the GUI is in its own package VIMGUI.
Please use the package to use the new (and old) GUI.
Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
Attaching package: ‘VIM’
The following object is masked from ‘package:datasets’:
sleep
library(vcd)
Read raw data
gus_woj <- fread("../data-raw/POPYT_POZNAN_2011_2017.csv") %>%
rename( kwartal = 3, woj = 4) %>%
select(rok = ROK, kwartal, woj, ends_with("1"), -P6_011, -V1) %>%
filter(rok %in% 2011:2014, kwartal == 1, woj != "POLSKA") %>%
mutate(woj = str_pad(woj,2,"0",side = "left")) %>%
gather(zawody, liczba, -rok, -kwartal,-woj) %>%
mutate(zawody = str_replace(zawody, "P6_",""),
zawody = str_replace(zawody, "1$",""),
zawody = as.numeric(zawody),
zawod1 = case_when(zawody %in% 2:5 ~ 1,
zawody %in% 6:11 ~ 2,
zawody %in% 12:16 ~ 3,
zawody %in% 17:20 ~ 4,
zawody %in% 21:24 ~ 5,
zawody %in% 25:27 ~ 6,
zawody %in% 28:32 ~ 7,
zawody %in% 33:35 ~ 8,
zawody %in% 36:41 ~ 9)) %>%
group_by(rok, kwartal, woj, zawod1) %>%
mutate(zawod2 = paste0(zawod1, row_number())) %>%
ungroup()
Read data from excel spreadsheets
dir(path = "../data-raw", pattern = "*00", full.names = T) %>%
set_names(basename(.)) %>%
map_df(~read_excel(path = .x, sheet = "Arkusz5", skip = 9, col_names = F) %>%
filter(`...1` != "OGÓŁEM") %>%
set_names(nm = c("zawod","ogolem", LETTERS[1:19])) %>%
select(-ogolem) %>%
mutate(kod = str_extract(zawod, "\\d{1,2}\\."),
kod = str_replace(kod, "\\.", ""),
kod = as.numeric(kod)) %>%
filter(kod > 10) %>%
gather(sekcja, wolne, -kod, -zawod) %>%
filter(wolne > 0),
.id = "rok") %>%
mutate(rok = readr::parse_number(rok),
kod1 = substr(kod, 1,1)) %>%
rename(kod2 = kod) %>%
select(rok, kod1, kod2, zawod, sekcja, wolne) -> gus_sek
-
/
New names:
* `` -> ...1
* `` -> ...2
* `` -> ...3
* `` -> ...4
* `` -> ...5
* … and 16 more problems
-
/
New names:
* `` -> ...1
* `` -> ...2
* `` -> ...3
* `` -> ...4
* `` -> ...5
* … and 16 more problems
-
/
New names:
* `` -> ...1
* `` -> ...2
* `` -> ...3
* `` -> ...4
* `` -> ...5
* … and 16 more problems
-
/
New names:
* `` -> ...1
* `` -> ...2
* `` -> ...3
* `` -> ...4
* `` -> ...5
* … and 16 more problems
gus_sek %>%
count(rok, wt = wolne)
Joining two things together
gus_sek %>%
select(-zawod) %>%
mutate(kod1 = as.numeric(kod1)) %>%
bind_rows(gus_woj %>%
mutate(kod2 = as.numeric(zawod2)) %>%
select(rok, woj, kod1 = zawod1, kod2 , woj, wolne = liczba)) -> gus_dane
saveRDS(object = gus_dane, file = "../data/gus-woj-sek.rds")
Data with standard errors
precs %>%
spread(rok, prec) %>%
add_row(sekcja = "overall", `2011` = 3.40, `2013` = 4.01, `2014` = 3.98, .before = 1) %>%
xtable(caption = "Estimates on relative standard erros of estimators for vacancies of the demand for labour in IV quarter 2011, 2013 and 2014",
label = "tab-rel-var") %>%
print.xtable(include.rownames = F,
caption.placement = "top")
% latex table generated in R 3.5.1 by xtable 1.8-4 package
% Thu Jul 25 23:10:15 2019
\begin{table}[ht]
\centering
\caption{Estimates on relative standard erros of estimators for vacancies of the demand for labour in IV quarter 2011, 2013 and 2014}
\label{tab-rel-var}
\begin{tabular}{lrrr}
\hline
sekcja & 2011 & 2013 & 2014 \\
\hline
overall & 3.40 & 4.01 & 3.98 \\
C & 5.50 & 5.27 & 5.64 \\
F & 13.86 & 19.21 & 15.12 \\
G & 13.69 & 15.75 & 16.33 \\
H & 8.07 & 9.93 & 9.17 \\
I & 15.99 & 20.78 & 18.26 \\
J & 6.30 & 7.04 & 11.50 \\
K & 7.00 & 8.36 & 7.43 \\
M & 8.12 & 8.71 & 12.01 \\
N & 23.09 & 12.89 & 17.76 \\
O & 3.19 & 3.50 & 2.56 \\
P & 8.85 & 10.65 & 12.06 \\
Q & 5.53 & 6.88 & 6.00 \\
R & 7.08 & 8.68 & 9.28 \\
S & 18.09 & 21.29 & 20.77 \\
\hline
\end{tabular}
\end{table}
Add bootstrap population data
Bootstraping totals
set.seed(123)
pop_totals %>%
group_by(rok, r, b) %>%
do(res = rmultinom(n = 100, size = .$r[[1]], p = .$data[[1]]$p) %>% as.data.frame(),
kod2 = .$data[[1]]$kod2,
sekcja = .$data[[1]]$sekcja) %>%
group_by(rok, b) %>%
unnest(kod2, sekcja, res) %>%
ungroup() %>%
arrange(rok, r, b) %>%
gather(b2, wolne, V1:V100) %>%
mutate(b2 = gsub("V", "", b2),
b2 = as.numeric(b2)) %>%
arrange(rok, b, b2) -> pop_totals2
saveRDS(boot_pop, file = "../data/gus-woj-sek-boot-totals.rds")
nace_sek <- readRDS("../data/bkl-finalne-model.rds") %>%
ungroup() %>%
count(sekcja_pkd, nace) %>%
select(-n) %>%
filter(!is.na(sekcja_pkd))
bkl <- readRDS("../data/bkl-oferty.rds") %>%
filter(rok != 2010) %>%
mutate(woj = str_pad(woj, 2, "0", side = "left"),
sekcja_pkd = as.character(sekcja_pkd),
zawod_9grup = ifelse(is.na(zawod_9grup), substr(zawod,1,1), zawod_9grup)) %>%
rename(id = plik, zawod6 = zawod, zawod1 = zawod_9grup) %>%
left_join(nace_sek) %>%
select(id, rok, zrodlo, forma_ogl, zrodlo_dokladne, zawod1, zawod6,
woj, podregion, nace, sekcja_pkd2, branza, wyksztalcenie, jezyk_angielski:komp_biurowe)
saveRDS(bkl, "../data/bkl-before-imp.rds")