********************************************************************************
*** Replication Code: Invitation Messages for Business Surveys ***
*** A Multi-Armed Bandit Experiment ***

*** Authors: Johannes J. Gaul, Florian Keusch, Davud Rostam-Afschar, Thomas Simon ***
*** Section: Conjoint Analyses ***
********************************************************************************

* Installs *
ssc install conjoint

* Clear workspace *
clear all
set more off

* Define input and output directories *
global inputdir "YOUR PATH\01_Inputs"
global outputdir "YOUR PATH\02_Outputs"

* Import dataset *
import delimited "${inputdir}\GKRS2025_business_survey_mab.csv", delimiter(",")

********************************************************************************
*** I. DATA PREPARATION ***
********************************************************************************

* Define and apply value labels for message characteristics *
label define personalized 0 "No" 1 "Yes"
label value personalized personalized

label define authority 0 "Low" 1 "High"
label value authority authority

label define topurl 0 "Bottom" 1 "Top"
label value topurl topurl

label define dataprotection 0 "Unaccentuated" 1 "Emphasized"
label value dataprotection dataprotection

label define mode 0 "Offer" 1 "Plea"
label value mode mode

********************************************************************************
*** II. Figure 4 ***
********************************************************************************

preserve

* Retain only opened invitations *
keep if opened == 1

* Conjoint analysis: Estimate Average Marginal Component Effects (AMCEs) *
conjoint started personalized authority topurl dataprotection mode, est(amce) id(message)
matrix overall_results = e(results)*100

* Plot Average Marginal Component Effects (AMCEs) *
coefplot matrix(overall_results[,1]), ///
    bylabel(Overall) ///
    m(X) ///
    msize(4) ///
    color(cranberry) ||, ///
    ci(( 5 6)) ///
    ciopts(color(navy) lwidth(0.6)) ///
    keep(*:) ///
    xline(0.0, lpattern(-) lcolor(black)) ///
    coeflabels(_0= "Off (Baseline)" _1= "On", labsize(medlarge)) ///
    eqlabels("{bf:Personalization}" "{bf:Authority}" "{bf:URL}" "{bf:Data Protection}" "{bf:Message Frame}", ///
    asheadings labsize(medlarge)) ///
    byopts(graphregion(color(white)) cols(3)) ///
    subtitle(, fcolor(gs15) size(medlarge)) ///
    scale(0.55) ///
	grid (glstyle(dot) glcolor(gs11)) ///
    xtitle(" " "Average Marginal Component Effects (in pp.)", size(medlarge)) ///
    xlabel(, format(%9.1f) labsize(medium) grid glstyle(dot) glcolor(gs11)) ///
    ylabel(, labsize(medium)) ///
    plotregion(color(white) fcolor(white)) ///
    graphregion(color(white) fcolor(white)) ///
    ysize(4) xsize(6) ///

* Export graph *
graph export "${outputdir}\Figure_4.pdf", replace

restore

********************************************************************************
*** III. Figure 5 ***
********************************************************************************

preserve

* Retain only started invitations *
keep if started == 1

* Conjoint analysis: Estimate Average Marginal Component Effects (AMCEs) *
conjoint finished personalized authority topurl dataprotection mode, est(amce) id(message)
matrix overall_results = e(results)*100

coefplot matrix(overall_results[,1]), ///
    bylabel(Overall) ///
    m(X) ///
    msize(4) ///
    color(cranberry) ||, ///
    ci(( 5 6)) ///
    ciopts(color(navy) lwidth(0.6)) ///
    keep(*:) ///
    xline(0.0, lpattern(-) lcolor(black)) ///
    coeflabels(_0= "Off (Baseline)" _1= "On", labsize(medlarge)) ///
    eqlabels("{bf:Personalization}" "{bf:Authority}" "{bf:URL}" "{bf:Data Protection}" "{bf:Message Frame}", ///
    asheadings labsize(medlarge)) ///
    byopts(graphregion(color(white)) cols(3)) ///
    subtitle(, fcolor(gs15) size(medlarge)) ///
    scale(0.55) ///
	grid (glstyle(dot) glcolor(gs11)) ///
    xtitle(" " "Average Marginal Component Effects (in pp.)", size(medlarge)) ///
    xlabel(, format(%9.1f) labsize(medium) grid glstyle(dot) glcolor(gs11)) ///
    ylabel(, labsize(medium)) ///
    plotregion(color(white) fcolor(white)) ///
    graphregion(color(white) fcolor(white)) ///
    ysize(4) xsize(6) ///

* Export graph *
graph export "${outputdir}\Figure_5.pdf", replace

restore

********************************************************************************
*** IV. Figure 6 ***
********************************************************************************

preserve

* Retain only opened invitations *
keep if opened == 1

* Summary stats for staffing numbers *
su employees,d
// Median employees: 4

* Analysis for firms with median and below-median employees (<= 4) *
conjoint started personalized authority topurl dataprotection mode if employees <= 4 & employees!=., est(amce) id(message)
matrix low_results = e(results)*100

* Create a new matrix with only even rows *
matrix low_results_filtered = low_results[2,1..6]
matrix low_results_filtered = low_results_filtered \ low_results[4,1..6]
matrix low_results_filtered = low_results_filtered \ low_results[6,1..6]
matrix low_results_filtered = low_results_filtered \ low_results[8,1..6]
matrix low_results_filtered = low_results_filtered \ low_results[10,1..6]

* Display the filtered low_results matrix *
matrix list low_results_filtered
matrix roweq low_results_filtered = "" "" "" "" ""
matrix rownames low_results_filtered = "Personalization - Yes" "Authority - High" "URL - Top" "Data Protection - Emphasized" "Message Frame - Plea"

* Analysis for firms with above-median employees (> 4) *
conjoint started personalized authority topurl dataprotection mode if employees > 4 & employees!=., est(amce) id(message)
matrix high_results = e(results)*100

* Create a new matrix with only even rows *
matrix high_results_filtered = high_results[2,1..6]
matrix high_results_filtered = high_results_filtered \ high_results[4,1..6]
matrix high_results_filtered = high_results_filtered \ high_results[6,1..6]
matrix high_results_filtered = high_results_filtered \ high_results[8,1..6]
matrix high_results_filtered = high_results_filtered \ high_results[10,1..6]

* Display the filtered high_results matrix *
matrix list high_results_filtered
matrix roweq high_results_filtered = "" "" "" "" ""
matrix rownames high_results_filtered = "Personalization - Yes" "Authority - High" "URL - Top" "Data Protection - Emphasized" "Message Frame - Plea"

* Plot the filtered low_results and high_results matrices *
coefplot (matrix(low_results_filtered[,1]), m(X) ///
          ciopts(recast(r) lwidth(0.6) lcolor(navy)) ///
          mcolor(navy) lcolor(navy)) ///
         (matrix(high_results_filtered[,1]), m(X) ///
          ciopts(recast(r) lwidth(0.6) lcolor(cranberry)) ///
          mcolor(cranberry) lcolor(cranberry)), ///
         bylabel(Overall) ///
         msize(4) ///
         ci(( 5 6)) ///
         keep(*:) ///
         xline(0.0, lpattern(-) lcolor(black)) ///
         byopts(graphregion(color(white)) cols(3)) ///
         subtitle(, fcolor(gs15) size(medlarge)) ///
         scale(0.55) ///
         grid(glstyle(dot) glcolor(gs11)) ///
         xtitle(" " "Average Marginal Component Effects (in pp.)", size(medlarge)) ///
         xlabel(-0.8(0.2)0.8, format(%9.1f) labsize(medlarge) grid glstyle(dot) glcolor(gs11)) ///
         ylabel(, labsize(medium)) ///
         plotregion(color(white) fcolor(white)) ///
         graphregion(color(white) fcolor(white)) ///
         xscale(range(-0.8 0.8)) ///
         legend(order (2 "Median & below-median employees" 4 "Above-median employees")) ///

* Export graph *
graph export "${outputdir}\Figure_6.pdf", replace

restore

********************************************************************************
*** V. Figure 7 ***
********************************************************************************

preserve

* Retain only opened invitations and exclude prior participants*
keep if opened == 1
keep if previous == 0

* Conjoint analysis: Estimate Average Marginal Component Effects (AMCEs) *
conjoint started personalized authority topurl dataprotection mode, est(amce) id(message)
matrix overall_results = e(results)*100

* Plot Average Marginal Component Effects (AMCEs) *
coefplot matrix(overall_results[,1]), ///
    bylabel(Overall) ///
    m(X) ///
    msize(4) ///
    color(cranberry) ||, ///
    ci(( 5 6)) ///
    ciopts(color(navy) lwidth(0.6)) ///
    keep(*:) ///
    xline(0.0, lpattern(-) lcolor(black)) ///
    coeflabels(_0= "Off (Baseline)" _1= "On", labsize(medlarge)) ///
    eqlabels("{bf:Personalization}" "{bf:Authority}" "{bf:URL}" "{bf:Data Protection}" "{bf:Message Frame}", ///
    asheadings labsize(medlarge)) ///
    byopts(graphregion(color(white)) cols(3)) ///
    subtitle(, fcolor(gs15) size(medlarge)) ///
    scale(0.55) ///
	grid (glstyle(dot) glcolor(gs11)) ///
    xtitle(" " "Average Marginal Component Effects (in pp.)", size(medlarge)) ///
    xlabel(, format(%9.1f) labsize(medlarge) grid glstyle(dot) glcolor(gs11)) ///
    ylabel(, labsize(medium)) ///
    plotregion(color(white) fcolor(white)) ///
    graphregion(color(white) fcolor(white)) ///
    ysize(4) xsize(6) ///

* Export graph *
graph export "${outputdir}\Figure_7.pdf", replace

restore

********************************************************************************
*** V. Figure A.1(a) ***
********************************************************************************

preserve

* Retain observations with employee data *
drop if employees == .

* Log-transform employee numbers *
gen ln_employees = ln(employees)

* Calculate means and standard errors for ln_employees by message type *
collapse (mean) mean_ln_employees = ln_employees (sd) sd_ln_employees = ln_employees ///
        (count) n_ln_employees = ln_employees, by(message)

* Calculate standard error and 95% confidence intervals *
gen se_ln_employees = sd_ln_employees / sqrt(n_ln_employees)
gen lower_ln_employees = mean_ln_employees - 1.96 * se_ln_employees
gen upper_ln_employees = mean_ln_employees + 1.96 * se_ln_employees

* Create a matrix for plotting coefficients *
mkmat mean_ln_employees lower_ln_employees upper_ln_employees, matrix(ln_emp)

* Calculate the equally-weighted mean for reference line *
su mean_ln_employees, d
// Equally-weighted mean: 1.910766

* Assign row names to the matrix based on message type *
local rownames_ln
levelsof message, local(rownames_ln)
matrix rownames ln_emp = `rownames_ln'

* Plot the coefficients from the ln_emp matrix *
coefplot matrix(ln_emp[,1]), ///
    m(dot) ///
    msize(1) ///
    ci(( 2 3)) ///
	xline(1.910766, lpattern(-) lcolor(black)) ///
    drop(_cons) ///
    grid(glstyle(dot) glcolor(gs11)) ///
    xtitle(" " "ln(Employees)") ///
    xlabel(1(0.5)3, format(%9.1f) labsize(medium) grid glstyle(dot) glcolor(gs11)) ///
    ylabel(, labsize(medium)) ///
    ysize(20) xsize(8) ///
    plotregion(color(white) fcolor(white)) ///
    graphregion(color(white) fcolor(white)) ///
	
* Export graph *
graph export "${outputdir}\Figure_A1(a).pdf", replace
	
restore

********************************************************************************
*** VI. Figure A.1(b) ***
********************************************************************************

preserve

* Retain observations with geographic information *
drop if formereastgermany == .

* Calculate means and standard errors for formereastgermany by message type *
collapse (mean) mean_east_west = formereastgermany (sd) sd_east_west = formereastgermany ///
        (count) n_east_west = formereastgermany, by(message)
		
* Calculate standard error and 95% confidence intervals *
gen se_east_west = sd_east_west / sqrt(n_east_west)
gen lower_east_west = mean_east_west - 1.96 * se_east_west
gen upper_east_west = mean_east_west + 1.96 * se_east_west

* Create a matrix for plotting coefficients *
mkmat mean_east_west lower_east_west upper_east_west, matrix(east_west)

* Calculate the equally-weighted mean for reference line *
su mean_east_west, d
// Equally-weighted mean: 0.184925

* Assign row names to the matrix based on message type *
local rownames_east
levelsof message, local(rownames_east)
matrix rownames east_west = `rownames_east'
	
* Plot the coefficients from the east_west matrix *
coefplot matrix(east_west[,1]), ///
    m(dot) ///
    msize(1) ///
    ci(( 2 3)) ///
	xline(0.184925, lpattern(-) lcolor(black)) ///
    drop(_cons) ///
    grid(glstyle(dot) glcolor(gs11)) ///
    xtitle(" " "Share Former East Germany") ///
    xlabel(0(0.1)0.40, format(%9.1f) labsize(medium) grid glstyle(dot) glcolor(gs11)) ///
    ylabel(, labsize(medium)) ///
    ysize(20) xsize(8) ///
    plotregion(color(white) fcolor(white)) ///
    graphregion(color(white) fcolor(white)) ///

* Export graph *
graph export "${outputdir}\Figure_A1(b).pdf", replace

restore

********************************************************************************
*** VII. Figure A.3 ***
********************************************************************************

preserve

* Retain only burn-in phase and opened messages * 
drop if batch >= 5
keep if opened == 1

* Calculate the total number of observations (i.e., total opened messages) *
local total_opened = _N

* Calculate the number of opened messages for each version and their share relative to total *
collapse (count) n_opened = id, by(message)
gen share_opened = n_opened / `total_opened'

* Calculate standard error and 95% confidence intervals for the proportion *
gen se_share_opened = sqrt(share_opened * (1 - share_opened) / n_opened)
gen lower_share_opened = share_opened - 1.96 * se_share_opened
gen upper_share_opened = share_opened + 1.96 * se_share_opened

* Create a matrix for plotting *
mkmat share_opened lower_share_opened upper_share_opened, matrix(openedmatrix)

* Calculate the equally-weighted mean for reference line *
su share_opened, d
// Equally-weighted mean: 0.03125

* Assign row names to the matrix based on message type *
local rownames_op
levelsof message, local(rownames_op)
matrix rownames openedmatrix = `rownames_op'

* Plot the coefficients from the openedmatrix *
coefplot matrix(openedmatrix[,1]), ///
    m(dot) ///
    msize(1) ///
    ci(( 2 3)) ///
	xline(0.03125, lpattern(-) lcolor(black)) ///
    drop(_cons) ///
    grid(glstyle(dot) glcolor(gs11)) ///
    xtitle(" " "Share of Opened Messages") ///
    xlabel(0(0.01)0.06, format(%9.2f) labsize(medium) grid glstyle(dot) glcolor(gs11)) ///
    ylabel(, labsize(medium)) ///
    ysize(20) xsize(8) ///
    plotregion(color(white) fcolor(white)) ///
    graphregion(color(white) fcolor(white)) ///

* Export graph *
graph export "${outputdir}\Figure_A3.pdf", replace

restore

********************************************************************************
*** VIII. Figure A.4 ***
********************************************************************************

preserve

* Retain only opened invitations *
keep if opened == 1

* Apply bbandits as implemented by Kemper and Rostam-Afschar (2024) *
// This requires the following Python libraries: scipy, pandas, scikit-learn, statsmodels
net from https://rostam-afschar.de/bbandits
net install bbandits.pkg
bbandits started message batch
// The resulting estimates are plotted using Python (see supplemental replication material)

restore

********************************************************************************
*** IX. Table A.1 ***
********************************************************************************

preserve

* Retain only opened messages *
keep if opened == 1

* Scaling dummy variable by 100 *
replace started = 100 if started == 1

* Conjoint analysis: Estimate Average Marginal Component Effects (AMCEs) *
conjoint started personalized authority topurl dataprotection mode, est(amce)
conjoint started personalized authority topurl dataprotection mode, est(amce) id(id)
conjoint started personalized authority topurl dataprotection mode, est(amce) id(message)

* Conjoint analysis: Estimate Marginal Means (MMs) *
conjoint started  personalized authority topurl dataprotection mode, est(mm) id(message)

restore

********************************************************************************
*** X. Table A.2 ***
********************************************************************************

* Define constants *
global total_opened_messages 128364
global optimization_batch_start 5

********************************************************************************
*** Column (II): Mean Starting Rates ***
********************************************************************************

preserve

* Filter for opened messages during the optimization phase *
keep if opened == 1
drop if batch < $optimization_batch_start

* Calculate mean starting rates by message *
tabstat started, by(message) stats(n mean)

restore

********************************************************************************
*** Column (IV): Predicted Starts ***
********************************************************************************

preserve

* Filter for opened messages during the optimization phase *
keep if opened == 1
drop if batch < $optimization_batch_start

* Total number of opened messages during optimization *
tab opened
//Total number = $total_opened_messages

* Calculate predicted starts *
collapse (mean) started, by(message)
gen weight = 1 / 32
gen pred_starts = started * weight * $total_opened_messages
tabstat pred_starts, by(message)

restore

********************************************************************************
*** Column (V): Distribution Weights ***
********************************************************************************

preserve

* Filter for opened messages during the optimization phase *
keep if opened == 1
drop if batch < $optimization_batch_start

* Calculate distribution weights *
gen N = 1
collapse (count) N, by(message)
replace N = N * 100 / $total_opened_messages
tabstat N, by(message) stats(mean)

restore

********************************************************************************
*** Column (VI): Realized Starts ***
********************************************************************************

preserve

* Filter for opened messages during the optimization phase *
keep if opened == 1
drop if batch < $optimization_batch_start

* Calculate realized starts by message *
collapse (sum) started, by(message)
tabstat started, by(message)

restore

********************************************************************************
*** XI. Table A.3 ***
********************************************************************************

* Define treatment variables for analysis *
local treatments personalized authority topurl dataprotection mode

* Loop through treatment variables*
foreach var in `treatments' {
    preserve

    * Log-transform employees and filter data *
    gen ln_employees = ln(employees)
    keep if opened == 1
    keep if `var' == 1

    * Perform t-tests for both raw and log-transformed employees *
    ttest employees, by(started) unequal
    ttest ln_employees, by(started) unequal

    restore
}











