---
title: "SRM_replication_code"
max-width: 2000px
format:
html:
toc: true
toc-location: left
toc-depth: 2
self-contained: true
page-layout: article
editor: visual
code-tools: true
code-overflow: wrap
execute:
error: false
warning: false
column: page-inset-right
---
# Packages, Functions, and Data
## Packages
```{r}
library (haven)
library (broom)
library (knitr)
library (DT)
library (psych)
library (sjstats)
library (pwr)
library (equate)
library (tidyverse)
library (furrr)
library (viridis)
library (Cairo)
library (ggrepel)
library (kableExtra)
```
## Custom Functions
### Substantive Functions
```{r}
# -- MOC Interpolation -----------------------------------
# See section "Interval Quantity Questions" for the intervals_df structure
moc_interpolation <- function (intervals_df, alpha){
intervals_df %>%
mutate (
moc = case_when (
upper_bound != Inf ~ (lower_bound + upper_bound)/ 2 ,
upper_bound == Inf ~ lower_bound* (alpha/ (alpha-1 ))
)
) %>% select (moc) %>%
rename_with (~ paste0 ("alpha" ,alpha))
}
# -- OSE-RG ---------------------------------------------
# Wrapper function around the equate() function from the equate package
calculate_eq_equivalents <- function (freqtab_x, freqtab_y){
eq <- equate (freqtab_x, freqtab_y, type = "equipercentile" )
eq$ concordance$ yx
}
# -- Open-Ended Question Aggregation --------------------
# The function takes the vector of open-ended responses, selects values within a quantity interval, and calculates the mean quantity
# Optionally, it truncates the open-ended responses first, be removing the top x values
aggregate_open_interval <- function (lower_bound, upper_bound, v, trim_upper = 0 ){
v<- v[! is.na (v)]
v <- sort (v)
v <- v[1 : floor (length (v)* (1 - trim_upper))]
v[v>= lower_bound & v <= upper_bound] %>% mean ()
}
aggregate_open_interval_generalized <- function (lower_bound, upper_bound, v, f){
v<- v[! is.na (v)]
v[v>= lower_bound & v <= upper_bound] %>% f ()
}
# -- Assymetric Trimmed Mean -------------------------------------
# Only removes upper cases
# in contrast to the trim argument of base::mean()
mean_trim_upper <- function (x,trim_upper = 0.05 ) {
x <- x[! is.na (x)]
x <- sort (x)
mean (x[1 : floor (length (x)* (1 - trim_upper))])
}
```
### Utility Functions
```{r}
# Table styling shortcut
style_table <- function (df){
df %>%
kable () %>% kable_styling (full_width = FALSE , position = "left" ) }
# Extracts recoding information from the harmonized_equivalents_df
get_rec_vector <- function (condition, approach, rec_df = harmonized_equivalents_df){
rec_df %>%
filter (.data$ condition == .env$ condition) %>%
pull ({{approach}})
}
```
## Load data
```{r}
source ("SRM_replication_code_sub_population_analysis.R" )
full_pop_results <- sub_pop_format %>% filter (sub_pop == "Full sample" )
sub_pop_results <- sub_pop_format %>% filter (sub_pop != "Full sample" )
data<- read_sav ("SRM_replication_data.sav" )
# Dataframe only with valid answers to the quantity question:
valid_df <- data %>%
mutate (any_valid = coalesce (books_low_freq, books_med_freq, books_hi_freq, books_numeric)) %>%
drop_na (any_valid)
```
# Descriptives
## Sample size
```{r}
total_cases <- data %>% nrow ()
valid_cases <- valid_df %>% nrow ()
```
The dataset contains `r total_cases` cases in total. Of those, `r valid_cases` cases have valid answers to the quantity question.
## Demography
### Sex
```{r}
valid_df %>%
group_by (sex = D01 %>% as_factor) %>%
summarise (n = n ()) %>%
ungroup () %>%
mutate (percent = 100 * n/ sum (n),
percent = round (percent, 2 )) %>% style_table ()
```
### Age
```{r}
valid_df %>%
mutate (age = 2020 - D02) %>%
summarise (across (age, list (mean= mean, median= median, sd= sd,min= min,max= max))) %>%
t %>% style_table
```
### Education
```{r}
valid_df %>%
group_by (education %>% as_factor) %>%
summarise (n = n ()) %>%
ungroup () %>%
mutate (percent = 100 * n/ sum (n),
percent = round (percent, 2 )) %>%
style_table ()
```
# Preliminary analyses and instrument information
## Open ended quantity question percentiles
### Full sample result
```{r results='asis'}
full_pop_results %>% select (quantiles_table) %>%
pull () %>%
.[[1 ]] %>%
as_tibble () %>%
style_table () %>% print
```
### Subsample results
::: panel-tabset
```{r results="asis"}
for (cur_sub in sub_pop_results$ sub_pop){
cat ("## " ,cur_sub," \n\n " )
sub_pop_results %>%
filter (sub_pop == cur_sub) %>%
select (quantiles_table) %>%
pull () %>%
.[[1 ]] %>%
as_tibble () %>%
style_table () %>% print
cat (" \n\n " )
}
```
:::
## Interval quantity questions
### Full sample result
```{r}
full_pop_results %>%
select (histogram_question) %>%
pull () %>%
.[[1 ]] %>%
print
```
### Subsample results
::: panel-tabset
```{r results="asis"}
for (cur_sub in sub_pop_results$ sub_pop){
cat ("## " ,cur_sub," \n\n\n " )
sub_pop_results %>%
filter (sub_pop == cur_sub) %>%
select (histogram_question) %>%
pull () %>%
.[[1 ]] %>%
print
cat (" \n\n\n " )
}
```
:::
## Interval Quantity Question Boundaries
### Table
Note that the data structure `intervals_df` also severs as an input into the `moc_interpolation()` function. This is because the MOC interpolation requires information about the verbatim interval boundaries from the different quantity measurement instruments.
```{r}
intervals_df <- tribble (
~ condition, ~ lower_bound, ~ upper_bound,
"low quantity" , 0 , 10 ,
"low quantity" , 11 , 25 ,
"low quantity" , 26 , 50 ,
"low quantity" , 51 , Inf ,
"medium quantity" , 0 , 25 ,
"medium quantity" , 26 , 50 ,
"medium quantity" , 51 , 100 ,
"medium quantity" , 101 , Inf ,
"high quantity" , 0 , 50 ,
"high quantity" , 51 , 100 ,
"high quantity" , 101 , 250 ,
"high quantity" , 251 , Inf
) %>%
mutate (
condition = factor (condition, levels = c ("low quantity" , "medium quantity" , "high quantity" )) %>% fct_rev
)
intervals_df %>% style_table ()
```
### Plot
```{r}
intervals_df %>%
group_by (condition) %>%
mutate (row = row_number (),
condition_num = condition %>% as.numeric,
upper_bound_label = ifelse (upper_bound != Inf , upper_bound, NA ))%>%
ggplot ()+
annotate (geom = "segment" ,
x = 50 , xend = 50 , y = 0.65 , yend = 3.45 ,
size = 14 , color = "#aaaaaa" , lineend = "round" )+
annotate (geom = "segment" ,
x = 50 , xend = 50 , y = 0.65 , yend = 3.45 ,
size = 12 , color = "#eeeeee" , lineend = "round" )+
geom_rect (aes (ymin= condition_num-0.3 , ymax= condition_num+0.3 ,
xmin= lower_bound, xmax = upper_bound,
fill= row))+
geom_text (aes (0 , condition_num, label = condition), hjust = 1.2 , size = 4 )+
geom_text (aes (upper_bound_label, condition_num+0.4 , label = upper_bound_label), size = 4 )+
geom_text (aes (310 , condition_num+0.41 ), label = expression (phantom (0 )%->% ~~ infinity), size = 4 )+
scale_x_continuous (limits = c (- 100 , 310 ))+
scale_fill_viridis (option = "mako" , end = 0.7 , begin = 0.3 )+
theme_void ()+
theme (
legend.position = "none" ,
plot.background = element_rect (color = "white" , fill= "white" )
)
```
## Response bias demonstration
### Descriptive
Note how the share of respondents reporting to own 50 books or less depends on the way the interval question.
#### Full sample result
```{r}
valid_df %>%
mutate (
low_50_or_less = ifelse (books_low_freq <= 3 , TRUE , FALSE ),
med_50_or_less = ifelse (books_med_freq <= 2 , TRUE , FALSE ),
hi_50_or_less = ifelse (books_hi_freq <= 1 , TRUE , FALSE )
) %>%
select (contains ("_50_or_less" )) %>%
pivot_longer (everything (), names_to = "condition" , values_to = "50_or_less" ) %>%
drop_na (` 50_or_less ` ) %>%
group_by (condition) %>%
summarise (Percent_50_or_less = mean (` 50_or_less ` )* 100 ) %>%
mutate (condition = factor (condition,
levels = c ("low_50_or_less" ,
"med_50_or_less" ,
"hi_50_or_less" ),
labels = c ("low quantity" ,
"medium quantity" ,
"high quantity" ),
ordered = TRUE )
) %>% arrange (condition) %>% style_table ()
```
#### Subsample results
::: panel-tabset
```{r results="asis"}
for (cur_sub in sub_pop_results$ sub_pop){
cat ("## " ,cur_sub," \n\n " )
sub_pop_results %>%
filter (sub_pop == cur_sub) %>%
select (response_bias_table) %>%
pull () %>%
.[[1 ]] %>%
as_tibble () %>%
style_table () %>%
print
cat (" \n\n " )
}
```
:::
### Spearman Test
#### Full sample result
```{r}
spearman_df<- valid_df %>%
mutate (
low_50_or_less = ifelse (books_low_freq <= 3 , TRUE , FALSE ),
med_50_or_less = ifelse (books_low_freq <= 2 , TRUE , FALSE ),
hi_50_or_less = ifelse (books_low_freq <= 1 , TRUE , FALSE )
) %>%
select (contains ("_50_or_less" )) %>%
pivot_longer (everything (), names_to = "condition" , values_to = "50_or_less" ) %>%
drop_na (` 50_or_less ` ) %>%
mutate (more_than_50 = as.numeric (! ` 50_or_less ` ))%>%
mutate (condition = factor (condition, levels = c ("low_50_or_less" ,
"med_50_or_less" ,
"hi_50_or_less" ),
ordered = TRUE ),
condition_num = as.numeric (condition))
cor.test (spearman_df$ condition_num, spearman_df$ more_than_50, method = "spearman" ) %>% tidy %>%
mutate (across (where (is.numeric), round, 3 )) %>% t %>% style_table ()
```
#### Subsample results
::: panel-tabset
```{r results="asis"}
for (cur_sub in sub_pop_results$ sub_pop){
cat ("## " ,cur_sub," \n\n " )
sub_pop_results %>%
filter (sub_pop == cur_sub) %>%
select (response_bias_spearman) %>%
pull () %>%
.[[1 ]] %>% style_table () %>%
print
cat (" \n\n " )
}
```
:::
## Sensitivity of MOC to different Alpha values
```{r}
intervals_df %>%
add_column (
map_dfc (seq (1 ,3 ,length.out = 21 ), ~ moc_interpolation (intervals_df, .x))
) %>%
pivot_longer (starts_with ("alpha" ), names_to = "alpha" , values_to = "moc_equivalents" ) %>%
mutate (alpha = str_remove (alpha, "alpha" ) %>% as.numeric ()) %>%
filter (upper_bound== Inf ,
alpha != 1 ) %>%
mutate (response_label = paste0 ("More than " , lower_bound)) %>%
ggplot (aes (x= alpha, y= moc_equivalents))+
facet_grid (rows = vars (condition), scales = "free" )+
geom_point (aes (color= alpha), size = 4 )+
geom_text (aes (color= alpha, label = round (moc_equivalents, 0 )), angle = 45 , hjust = - 0.2 , size = 6 )+
geom_text (aes (x= Inf , y= Inf , label = response_label),
stat = "unique" , hjust = 1.05 , vjust = 1.3 , size = 6 )+
scale_y_continuous ("MOC interp. of highest interval" , limits = c (0 , 3600 ))+
scale_x_continuous (breaks = c (1.1 , 1.5 , 2.0 , 2.5 , 3.0 ))+
theme_bw (base_size = 14 )+
theme (
legend.position = "none" ,
plot.background = element_rect (colour = "white" , fill= "white" )
)+ coord_cartesian (clip = FALSE )
```
# Main Analyses
## All interval conditions harmonized towards the open ended format
```{r}
# Preparing Frequency tables for OSE-RG
freqtab_low <- freqtab (data$ books_low_freq)
freqtab_med <- freqtab (data$ books_med_freq)
freqtab_hi <- freqtab (data$ books_hi_freq)
freqtab_open <- freqtab (data$ books_numeric)
# Extracting the vector of open responses
open_vector <- data$ books_numeric
# Calculate a table with equivalent values for every interval response option
harmonized_equivalents_df <- intervals_df %>%
# Calculating MOC interpolated values
add_column (
moc_interpolation (intervals_df, alpha = 2 )
) %>%
# Calculating OSE-RG Equivalents
mutate (
equip_equating = c (
calculate_eq_equivalents (freqtab_low, freqtab_open),
calculate_eq_equivalents (freqtab_med, freqtab_open),
calculate_eq_equivalents (freqtab_hi, freqtab_open)
)) %>%
# Calculate average open-ended values within each interval
mutate (
# All open-ended responses
open_mean_quantity = map2_dbl (lower_bound, upper_bound,
~ aggregate_open_interval (.x, .y, open_vector)),
# Trimmed by removing the top 5% open-ended responses
open_mean_quantity_trimmed = map2_dbl (lower_bound, upper_bound,
~ aggregate_open_interval (.x, .y, open_vector, trim_upper = 0.05 )),
open_median_quantity = map2_dbl (lower_bound, upper_bound,
~ aggregate_open_interval_generalized (.x, .y, open_vector, median))
) %>%
# Adding numerical interval IDs and interval labels
group_by (condition) %>%
mutate (
interval_number = row_number (),
interval_label = paste0 ("[" ,lower_bound, ", " ,upper_bound,"]" )
)
```
### Plot equivalent values Interval -\> Open Ended
#### Full sample result
```{r fig.height= 8}
harmonized_equivalents_df %>%
mutate (interval_number = (- 1 * (interval_number-1 ))+ 4 ,
interval_label = paste0 (interval_number, " - " , interval_label),
interval_label = factor (interval_label) %>% fct_rev) %>%
pivot_longer (alpha2: open_median_quantity, values_to = "equivalents" , names_to = "approach" ) %>%
mutate (
condition = fct_rev (condition),
approach = factor (approach,
levels = c (
"alpha2" ,
"equip_equating" ,
"open_mean_quantity" ,
"open_mean_quantity_trimmed" ,
"open_median_quantity"
),
labels = c (
"MOC" ,
"OSE-RG" ,
"Open Q. (untrimmed)" ,
"Open Q. (trimmed)" ,
"Open Q. (median)"
))
) %>%
ggplot (aes (equivalents, interval_label, color= approach, shape = approach, linetype = approach))+
facet_wrap (vars (condition), ncol = 1 , scales = "free" )+
geom_line (aes (group = approach), size = 1 )+
geom_point (size = 4 , stroke = 2 )+
#geom_text(aes(0, label = interval_label, color = NA), hjust = 1, stat = "unique")+
scale_color_manual (values = c ("#497593" , "orange" , "#00ABA0" , "#00ABA0" , "#00ABA0" ))+
scale_linetype_manual (values = c (NA , NA , "solid" ,"dashed" , "dotted" ))+
scale_shape_manual (values = c (3 ,4 ,NA , NA , NA ))+
scale_x_continuous ("Reported books" )+
scale_y_discrete ("" )+
theme_bw (base_size = 20 )+
coord_flip ()+
#guides(color=guide_legend(ncol=2, byrow = TRUE))+
theme (
panel.grid.minor = element_blank (),
panel.grid.major.x = element_blank (),
strip.background = element_blank (),
#legend.position = "bottom",
legend.title = element_blank ()
)
```
#### Subsample results
::: panel-tabset
```{r results="asis"}
for (cur_sub in sub_pop_results$ sub_pop){
cat ("## " ,cur_sub," \n\n\n " )
sub_pop_results %>%
filter (sub_pop == cur_sub) %>%
select (numerical_equivalents_plot) %>%
pull () %>%
.[[1 ]] %>%
print
cat (" \n\n\n " )
}
```
:::
### Values underlying the plot
#### Full sample results
```{r}
harmonized_equivalents_df %>%
select (
condition,
interval = interval_label,
MOC = alpha2,
` OSE-RG ` = equip_equating,
` Open mean (untrimmed) ` = open_mean_quantity,
` Open mean (trimmed) ` = open_mean_quantity_trimmed,
) %>%
mutate (across (where (is.numeric), round, 1 )) %>%
style_table
```
#### Subsample results
::: panel-tabset
```{r results="asis"}
for (cur_sub in sub_pop_results$ sub_pop){
cat ("## " ,cur_sub," \n\n\n " )
sub_pop_results %>%
filter (sub_pop == cur_sub) %>%
select (numerical_equivalents_table) %>%
pull () %>%
.[[1 ]] %>%
style_table () %>%
print
cat (" \n\n\n " )
}
```
:::
## Harmonization Performance
### Aggregated Mean bias
#### Data preparation
```{r}
# Here, the average quantities are calculated for untrimmed data
aggregated_book_estimations_df <- data %>%
select (` low quantity ` = books_low_freq,
` medium quantity ` = books_med_freq,
` high quantity ` = books_hi_freq,
` open question ` = books_numeric) %>%
# Recode response data with the equivalent values calculated earlier
transmute (` open question ` = ` open question ` ,
# Recode all conditions via MOC
across (` low quantity ` : ` high quantity ` ,
~ get_rec_vector (cur_column (), "alpha2" )[.x],
.names = "{.col}_MOC" ),
# Recode all conditions via OSE-RG
across (` low quantity ` : ` high quantity ` ,
~ get_rec_vector (cur_column (), "equip_equating" )[.x],
.names = "{.col}_OSE-RG" ),
) %>%
# calculate mean book quantity for every harmonization approach and condition
summarise (across (everything (), mean, na.rm= TRUE )) %>%
# Wide to long format
pivot_longer (contains ("quantity" )) %>%
# Calculate differences from the open-ended question expectation
mutate (
delta_mean = value - ` open question `
) %>%
# tidy up table
separate (
name, into = c ("condition" , "approach" ), sep = "_"
) %>%
mutate (condition = factor (condition,
levels = c ("low quantity" , "medium quantity" , "high quantity" )),
trim = "untrimmed" )
# Here, the same is done as above, but this time with trimmed data
aggregated_book_estimations_trimmed_df <- data %>%
select (` low quantity ` = books_low_freq,
` medium quantity ` = books_med_freq,
` high quantity ` = books_hi_freq,
` open question ` = books_numeric) %>%
transmute (` open question ` = ` open question ` ,
across (` low quantity ` : ` high quantity ` ,
~ get_rec_vector (cur_column (), "alpha2" )[.x], .names = "{.col}_MOC" ),
across (` low quantity ` : ` high quantity ` ,
~ get_rec_vector (cur_column (), "equip_equating" )[.x], .names = "{.col}_OSE-RG" ),
) %>%
summarise (across (everything (), mean_trim_upper, trim_upper = 0.05 )) %>%
pivot_longer (contains ("quantity" )) %>%
mutate (
delta_mean = value - ` open question `
) %>%
separate (
name, into = c ("condition" , "approach" ), sep = "_"
) %>%
mutate (condition = factor (condition,
levels = c ("low quantity" , "medium quantity" , "high quantity" )),
trim = "trimmed 5%" )
```
#### Plot
##### Full sample result
```{r}
# Untrimmed and trimmed means are combined and then plotted
aggregated_book_estimations_df %>%
add_row (aggregated_book_estimations_trimmed_df) %>%
mutate (delta_mean_percent = 100 * (delta_mean / ` open question ` ),
trim = factor (trim,
levels = c ("untrimmed" , "trimmed 5%" )),
condition = fct_rev (condition),
label_hjust = ifelse (approach == "MOC" ,
delta_mean - lead (delta_mean, 3 ) < 1 ,
delta_mean - lag (delta_mean, 3 ) < 1 )
)%>%
ggplot (aes (delta_mean_percent, as.integer (condition), color = approach, group = approach))+
facet_grid (vars (trim))+
geom_vline (aes (xintercept= 0 ), color = "#666666" , size = 1 )+
geom_smooth (method = "lm" , se= FALSE , fullrange= TRUE , size = 2 )+
geom_smooth (method = "lm" , se= FALSE , fullrange= TRUE , color = "#ffffffaa" , size = 2 )+
geom_label (aes (label= paste0 (" " , round (delta_mean_percent), "% " ), hjust = label_hjust),
size = 4 , alpha = 0.7 , label.size = 1 , show.legend = FALSE )+
geom_point (size = 3 )+
scale_x_continuous ("Average quantity difference to open question (%)" , limits = c (- 500 , 100 ))+
scale_y_continuous ("" , labels = aggregated_book_estimations_df$ condition %>% fct_rev %>% levels (), breaks = c (1 ,2 ,3 ))+
coord_cartesian (ylim = c (0.5 ,3.5 ),
xlim = c (- 100 ,50 ))+
scale_color_manual (values = c ("#497593" , "orange" ))+
theme_bw (base_size = 12 )+
theme (
panel.grid.minor = element_blank ()
)
```
##### Subsample results
::: panel-tabset
```{r results="asis"}
for (cur_sub in sub_pop_results$ sub_pop){
cat ("## " ,cur_sub," \n\n\n " )
sub_pop_results %>%
filter (sub_pop == cur_sub) %>%
select (average_quantity_dif_plot) %>%
pull () %>%
.[[1 ]] %>%
print
cat (" \n\n\n " )
}
```
:::
## Interval to interval harmonization
```{r}
# Calculating the equivalent scores
interval_interval_equivalents <- harmonized_equivalents_df %>%
select (lower_bound, upper_bound, interval_label, interval_number, alpha2) %>%
ungroup () %>%
mutate (
equipercentile_towards_mid = c (
calculate_eq_equivalents (freqtab_low, freqtab_med),
calculate_eq_equivalents (freqtab_med, freqtab_med),
calculate_eq_equivalents (freqtab_hi, freqtab_med)
)
)
```
### Equivalents table
#### Full sample results
```{r}
interval_interval_equivalents %>%
filter (condition != "medium quantity" ) %>%
transmute (condition,
interval_label,
` original score ` = interval_number,
` OSE-RG towards medium quantity ` = equipercentile_towards_mid %>% round (2 )) %>%
style_table ()
```
#### Subsample results
::: panel-tabset
```{r results="asis"}
for (cur_sub in sub_pop_results$ sub_pop){
cat ("## " ,cur_sub," \n\n\n " )
sub_pop_results %>%
filter (sub_pop == cur_sub) %>%
select (harmonization_to_medium_quantity_equivalents_table) %>%
pull () %>%
.[[1 ]] %>%
style_table () %>%
print
cat (" \n\n\n " )
}
```
:::
### Equivalents plot
#### Full sample results
```{r}
interval_interval_equivalents %>%
mutate (condition = factor (condition),
cond_num = as.integer (condition),
interval_label = ifelse (str_detect (interval_label, "Inf" ),
paste0 (str_remove (interval_label, "Inf]" ), "\U221E)" ),
interval_label
))%>%
ggplot (aes (cond_num, equipercentile_towards_mid, color = interval_number))+
geom_segment (aes (x = cond_num-0.4 , xend = cond_num+0.4 ,
yend = equipercentile_towards_mid), size = 2 , lineend = "round" )+
geom_label (aes (label = interval_label), label.padding = unit (0.5 , "lines" ), size = 6 , label.size = 1 )+
scale_x_continuous ("" , labels = interval_interval_equivalents$ condition %>% levels, breaks = c (1 ,2 ,3 ))+
scale_y_continuous ("OSE-RG equivalents in score format" )+
scale_color_viridis (option = "mako" , end = 0.7 , begin = 0.3 )+
theme_minimal (base_size = 12 )+
theme (
legend.position = "none" ,
plot.background = element_rect (color = "white" , fill = "white" )
)
```
#### Subsample results
::: panel-tabset
```{r results="asis"}
for (cur_sub in sub_pop_results$ sub_pop){
cat ("## " ,cur_sub," \n\n\n " )
sub_pop_results %>%
filter (sub_pop == cur_sub) %>%
select (harmonization_to_medium_quantity_plot) %>%
pull () %>%
.[[1 ]] %>%
print
cat (" \n\n\n " )
}
```
:::
## Interval to interval harmonization performance
```{r}
# Calculating MOC equivalents
moc_low_med_hi <- intervals_df %>%
add_column (
moc_interpolation (intervals_df, 2 )
) %>%
pull (alpha2)
# Recoding data
interval_interval_data <- data %>%
transmute (
low_freq.raw = books_low_freq,
med_freq.raw = books_med_freq,
hi_freq.raw = books_hi_freq,
# Apply OSE-RG to harmonize towards medium frequency condition
low_freq.eq = calculate_eq_equivalents (freqtab_low, freqtab_med)[books_low_freq],
med_freq.eq = calculate_eq_equivalents (freqtab_med, freqtab_med)[books_med_freq],
hi_freq.eq = calculate_eq_equivalents (freqtab_hi, freqtab_med)[books_hi_freq],
# Apply MOC to interpolate all conditions
low_freq.moc = moc_low_med_hi[1 : 4 ][books_low_freq],
med_freq.moc = moc_low_med_hi[5 : 8 ][books_med_freq],
hi_freq.moc = moc_low_med_hi[9 : 12 ][books_hi_freq]
)
# Caluclate
interval_interval_aggregation <- interval_interval_data %>%
summarise (across (everything (), list (mean= mean, sd = sd), na.rm= TRUE , .names = "{.col}.{.fn}" )) %>%
pivot_longer (everything ()) %>%
separate (name, into = c ("condition" , "approach" , "statistic" ), sep = " \\ ." )
```
### table
#### Full sample results
```{r}
interval_interval_aggregation %>%
pivot_wider (names_from = statistic, values_from = value) %>%
mutate (
mid_mean = mean[c (2 ,2 ,2 ,5 ,5 ,5 ,8 ,8 ,8 )],
mid_sd = sd[c (2 ,2 ,2 ,5 ,5 ,5 ,8 ,8 ,8 )],
d = (mean- mid_mean)/ mid_sd,
condition = factor (condition, levels = c (
"low_freq" ,
"med_freq" ,
"hi_freq" ),
labels = c (
"low quantity" ,
"medium quantity" ,
"high quantity"
)) %>% fct_rev,
approach = factor (approach,
levels = c ("eq" , "moc" ),
labels = c ("OSE-RG" , "MOC" )) %>% fct_rev ()
) %>%
drop_na (approach) %>%
dplyr:: relocate (approach) %>%
mutate (across (where (is.numeric), round, 2 )) %>%
rename (
` medium condition mean ` = mid_mean,
` medium condition sd ` = mid_sd
) %>% kable () %>% kable_styling ()
```
#### Sub sample results
::: panel-tabset
```{r results="asis"}
for (cur_sub in sub_pop_results$ sub_pop){
cat ("## " ,cur_sub," \n\n\n " )
sub_pop_results %>%
filter (sub_pop == cur_sub) %>%
select (harmonization_to_medium_quantity_summary_table) %>%
pull () %>%
.[[1 ]] %>%
style_table () %>%
print
cat (" \n\n\n " )
}
```
:::