library(VIM)
Loading required package: colorspace
Loading required package: grid
Loading required package: data.table
data.table 1.12.2 using 4 threads (see ?getDTthreads). Latest news: r-datatable.com
VIM is ready to use.
Since version 4.0.0 the GUI is in its own package VIMGUI.
Please use the package to use the new (and old) GUI.
Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
Attaching package: ‘VIM’
The following object is masked from ‘package:datasets’:
sleep
library(tidyverse)
[30m── [1mAttaching packages[22m ─────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──[39m
[30m[32m✔[30m [34mggplot2[30m 3.2.0 [32m✔[30m [34mpurrr [30m 0.3.2
[32m✔[30m [34mtibble [30m 2.1.3 [32m✔[30m [34mdplyr [30m 0.8.1
[32m✔[30m [34mtidyr [30m 0.8.3 [32m✔[30m [34mstringr[30m 1.4.0
[32m✔[30m [34mreadr [30m 1.3.1 [32m✔[30m [34mforcats[30m 0.4.0[39m
[30m── [1mConflicts[22m ────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[30m [34mdplyr[30m::[32mbetween()[30m masks [34mdata.table[30m::between()
[31m✖[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31m✖[30m [34mdplyr[30m::[32mfirst()[30m masks [34mdata.table[30m::first()
[31m✖[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()
[31m✖[30m [34mdplyr[30m::[32mlast()[30m masks [34mdata.table[30m::last()
[31m✖[30m [34mpurrr[30m::[32mtranspose()[30m masks [34mdata.table[30m::transpose()[39m
Read the data
bkl %>%
filter(rok !=2012) %>%
skimr::skim()
Skim summary statistics
n obs: 62171
n variables: 27
── Variable type:character ─────────────────────────────────────────────────────────────────────────────────────────────────────────
variable missing complete n min max empty n_unique
branza 21373 40798 62171 1 2 0 45
forma_ogl 0 62171 62171 1 1 0 3
nace 22211 39960 62171 1 1 0 17
podregion 5621 56550 62171 1 2 0 66
sekcja_pkd2 20750 41421 62171 3 3 0 331
woj 268 61903 62171 2 2 0 16
wyksztalcenie 24814 37357 62171 6 19 0 5
zawod1 254 61917 62171 1 1 0 9
zawod6 254 61917 62171 6 6 0 1357
zrodlo 0 62171 62171 1 1 0 2
── Variable type:logical ───────────────────────────────────────────────────────────────────────────────────────────────────────────
variable missing complete n mean count
jezyk_angielski 0 62171 62171 0.18 FAL: 50698, TRU: 11473, NA: 0
jezyk_niemiecki 0 62171 62171 0.05 FAL: 59071, TRU: 3100, NA: 0
komp_biurowe 0 62171 62171 0.031 FAL: 60241, TRU: 1930, NA: 0
komp_dyspozycyjne 0 62171 62171 0.15 FAL: 53041, TRU: 9130, NA: 0
komp_fizyczne 0 62171 62171 0.041 FAL: 59609, TRU: 2562, NA: 0
komp_indywidualne 0 62171 62171 0.42 FAL: 36298, TRU: 25873, NA: 0
komp_interpersonalne 0 62171 62171 0.38 FAL: 38447, TRU: 23724, NA: 0
komp_kierownicze 0 62171 62171 0.18 FAL: 51126, TRU: 11045, NA: 0
komp_kognitywne 0 62171 62171 0.14 FAL: 53526, TRU: 8645, NA: 0
komp_komputerowe 0 62171 62171 0.24 FAL: 47237, TRU: 14934, NA: 0
komp_kulturalne 0 62171 62171 0.11 FAL: 55138, TRU: 7033, NA: 0
komp_matematyczne 0 62171 62171 0.0026 FAL: 62008, TRU: 163, NA: 0
komp_techniczne 0 62171 62171 0.045 FAL: 59358, TRU: 2813, NA: 0
komp_zawodowe 26 62145 62171 0.26 FAL: 45846, TRU: 16299, NA: 26
── Variable type:numeric ───────────────────────────────────────────────────────────────────────────────────────────────────────────
variable missing complete n mean sd p0 p25 p50 p75 p100 hist
id 20081 42090 62171 211028.53 293297.03 1 5478 14308.5 610179.75 847595 ▇▁▁▁▁▃▁▁
rok 0 62171 62171 2012.68 1.25 2011 2011 2013 2014 2014 ▇▁▁▁▁▇▁▇
zrodlo_dokladne 2 62169 62171 255.12 170.08 1 135 262 353 723 ▅▃▃▇▁▁▂▁
dput(names(bkl))
c("id", "rok", "zrodlo", "forma_ogl", "zrodlo_dokladne", "zawod1",
"zawod6", "woj", "podregion", "nace", "sekcja_pkd2", "branza",
"wyksztalcenie", "jezyk_angielski", "jezyk_niemiecki", "komp_zawodowe",
"komp_techniczne", "komp_matematyczne", "komp_kulturalne", "komp_komputerowe",
"komp_kognitywne", "komp_kierownicze", "komp_interpersonalne",
"komp_indywidualne", "komp_fizyczne", "komp_dyspozycyjne", "komp_biurowe"
)
Imputation of missing data
Detected as categorical variable:
zrodlo,forma_ogl,zawod1,zawod6,woj,podregion,nace,sekcja_pkd2,branza,wyksztalcenie
Detected as ordinal variable:
Detected as numerical variable:
id,rok,zrodlo_dokladne
382items ofvariable:zawod1 imputed
286items ofvariable:woj imputed
29887items ofvariable:nace imputed
Time difference of 1.577767 hours
After imputation
dim(imputed)
[1] 83765 30
imputed %>%
ungroup() %>%
select(zrodlo, komp_techniczne:komp_biurowe) %>%
gather(comp, vals, -zrodlo) %>%
count(zrodlo, comp, vals) %>%
add_count(zrodlo, comp, wt = n, name = "total") %>%
mutate(p = n/total*100) %>%
filter(vals) %>%
select(zrodlo, comp, p) %>%
mutate(comp = case_when(comp == "komp_kulturalne" ~ "Artistic",
comp == "komp_dyspozycyjne"~ "Availability",
comp == "komp_kognitywne" ~ "Cognitive",
comp == "komp_komputerowe" ~ "Computer",
comp == "komp_interpersonalne" ~ "Interpersonal",
comp == "komp_kierownicze" ~ "Managerial",
comp == "komp_matematyczne" ~ "Mathematical",
comp == "komp_biurowe"~ "Office",
comp == "komp_fizyczne" ~ "Physical",
comp == "komp_indywidualne"~ "Self-organization",
comp =="komp_techniczne" ~ "Technical")) %>%
spread(zrodlo, p) %>%
left_join(imputed %>%
ungroup() %>%
select(zrodlo, komp_techniczne:komp_biurowe) %>%
gather(comp, vals, -zrodlo) %>%
count(comp, vals) %>%
add_count(comp, wt = n, name = "total") %>%
mutate(p = n/total*100) %>%
filter(vals) %>%
select(comp, p) %>%
mutate(comp = case_when(comp == "komp_kulturalne" ~ "Artistic",
comp == "komp_dyspozycyjne"~ "Availability",
comp == "komp_kognitywne" ~ "Cognitive",
comp == "komp_komputerowe" ~ "Computer",
comp == "komp_interpersonalne" ~ "Interpersonal",
comp == "komp_kierownicze" ~ "Managerial",
comp == "komp_matematyczne" ~ "Mathematical",
comp == "komp_biurowe"~ "Office",
comp == "komp_fizyczne" ~ "Physical",
comp == "komp_indywidualne"~ "Self-organization",
comp =="komp_techniczne" ~ "Technical"))) %>%
xtable(digits = 1, caption = "Share of skills included in job offers by data source based on pooled data 2011, 2013 and 2014") %>%
print.xtable(include.rownames = F, caption.placement = "top")
Joining, by = "comp"
% latex table generated in R 3.5.1 by xtable 1.8-4 package
% Sat Jul 20 23:33:26 2019
\begin{table}[ht]
\centering
\caption{Share of skills included in job offers by data source based on pooled data 2011, 2013 and 2014}
\begin{tabular}{lrrr}
\hline
comp & 1 & 2 & p \\
\hline
Artistic & 15.8 & 2.2 & 11.2 \\
Availability & 21.0 & 2.9 & 14.8 \\
Cognitive & 20.8 & 1.5 & 14.3 \\
Computer & 33.2 & 8.9 & 25.0 \\
Interpersonal & 55.9 & 6.9 & 39.3 \\
Managerial & 29.2 & 2.0 & 20.0 \\
Mathematical & 0.3 & 0.1 & 0.2 \\
Office & 3.8 & 1.5 & 3.0 \\
Physical & 6.0 & 2.0 & 4.7 \\
Self-organization & 59.1 & 7.6 & 41.6 \\
Technical & 4.3 & 5.1 & 4.6 \\
\hline
\end{tabular}
\end{table}
imputed %>%
ungroup() %>%
select(zrodlo, komp_techniczne:komp_biurowe) %>%
gather(comp, vals, -zrodlo) %>%
count(zrodlo, comp, vals) %>%
add_count(zrodlo, comp, wt = n, name = "total") %>%
mutate(p = n/total*100) %>%
filter(vals) %>%
select(zrodlo, comp, p) %>%
mutate(comp = case_when(comp == "komp_kulturalne" ~ "Artistic",
comp == "komp_dyspozycyjne"~ "Availability",
comp == "komp_kognitywne" ~ "Cognitive",
comp == "komp_komputerowe" ~ "Computer",
comp == "komp_interpersonalne" ~ "Interpersonal",
comp == "komp_kierownicze" ~ "Managerial",
comp == "komp_matematyczne" ~ "Mathematical",
comp == "komp_biurowe"~ "Office",
comp == "komp_fizyczne" ~ "Physical",
comp == "komp_indywidualne"~ "Self-organization",
comp =="komp_techniczne" ~ "Technical")) %>%
spread(zrodlo, p) %>%
left_join(imputed %>%
ungroup() %>%
select(zrodlo, komp_techniczne:komp_biurowe) %>%
gather(comp, vals, -zrodlo) %>%
count(comp, vals) %>%
add_count(comp, wt = n, name = "total") %>%
mutate(p = n/total*100) %>%
filter(vals) %>%
select(comp, p)) %>%
select(online=`1`, pup = `2`) %>%
cor(m = "s")
Joining, by = "comp"
online pup
online 1.0000000 0.7363636
pup 0.7363636 1.0000000