*******************************************************
* compute expected accuracy
program accuracy, rclass
	version 13.0
	* varlist: predicted probability of predicted probability , true category, predicted category
	* p_man= assumed manual accuracy (e.g. 0.8)
	* postaccuracy : post the output in filename 
	* shrink, interaction, bag, seed, boosting parameter for optional output
	* rater1, rater2:  categories assigned by individual raters (to compute kappas)
	*	// note true category is a combination of individual categories
	syntax  varlist (min=1 max=3) [in] [if] ,p_man(real) ///
		[ postaccuracy(str) shrink(real -9999) interaction(int -9999) ///
		bag(real -9999) seed(int -9999) rater1(str) rater2(str) ]

	marksample touse, novarlist
	preserve 
	qui drop if !`touse'
	
	local pred : word 1 of `varlist'  	// predicted probability of predicted category
	local code : word 2 of `varlist'    	// true category
	local pred_cat : word 3 of `varlist'	// predicted category

	if ("`code'"=="`rater1'") | ("`pred_cat'"=="`rater2'") {
		di as error "2 variables are identical (rater1 or rater2 and pred_cat)."
			" Kappas cannot be computed"
		exit
	}

	local n=_N   // only for the touse data
	di "(Test) Sample Size =`n'"
	tempvar pq_auto accvar_true
	// needed for margin of error
	qui gen `pq_auto'=`pred'* (1-`pred')
	qui gen `accvar_true'=.
	
	foreach thres of numlist  0.9 0.8 0.7 0.6 0.5 0 {
		* determine n_auto
		qui count if  missing(`pred')
		if (r(N)>0) di as error "Some predicted probabilities are missing"
		qui count if `pred'>=`thres'  
		local n_auto=r(N) 

		* compute p_auto_av (average prediction among predictions above the threshold)
		qui sum `pred' if `pred'>=`thres'  
		local p_auto_av=r(mean)

		* compute expected accuracy
		local acc= (1-`n_auto'/`n')* `p_man' + `n_auto'/`n' * `p_auto_av'
		local fraction = `n_auto'/`n' 
		
		* margin of error for combined
		local temp_man =(`n'-`n_auto')*`p_man'*(1-`p_man')
		qui sum `pq_auto' if  `pred'>=`thres'
		local temp_auto= r(sum)
		local var= 1/(`n'^2) * (`temp_man' + `temp_auto')
		local margin = 1.96 * sqrt(`var')
		
		* margin for automatic only
		local var_auto= 1/(`n_auto'^2) * (`temp_auto')
		local margin_auto = 1.96* sqrt(`var_auto')

		* if test data were manually coded, compute actual accuracy
		qui replace `accvar_true'=.
		qui replace `accvar_true'= (`code'==`pred_cat') if `pred'>=`thres' 
		qui sum `accvar_true'
		local accuracy_true= r(mean)

		* if test data was manually coded, compute actual kappa
		// need capture in case kappa fails because of too few observations
	 	cap qui kap `code' `pred_cat' if `pred'>=`thres' 
		local kappa_true= r(kappa)
		local kappa1_true=.
		local kappa2_true=.
		if ("`rater1'"!="") {
			// need capture in case kappa fails because of too few observations
			cap qui kap `code' `rater1' if `pred'>=`thres'
			local kappa1= r(kappa)
		}
		if ("`rater2'"!="") {
			// need capture in case kappa fails because of too few observations
			cap qui kap `code' `rater2' if `pred'>=`thres'
			local kappa2= r(kappa)
		}
		di "Thres=`thres'; AutoFrac=" %4.2f `fraction'  ///
			";  E(AutoAcc)=" %4.2f `p_auto_av' ///
			"; E(Acc)=" %4.2f `acc' "; margin +-" %5.3f `margin' ///
			"; TrueAutoAcc=" %4.2f `accuracy_true' ///
			"; TrueAutoKap=" %4.2f `kappa_true' ///
			"; MarginAuto=" %5.3f `margin_auto'
		if ("`postaccuracy'"!="") {
			// output explanation:
			// thres:   	Threshold beyond which automatic classification is used
			// fraction: 	Fraction automatically classified
			// p_auto_av: 	average prediction among predictions above the threshold
			// acc:		expected accuracy (formula)
			// margin:	1.96 se for combined manual+automatic categorization
			// margin_auto: 1.96 se for automatic categorization only
			// kappa_true:  kappa between true category and automatic coding above threshold
			// kappa1: kappa between rater1 and automatic coding above threshold
			// kappa2: kappa between rater2 and automatic coding above threshold
			post `postaccuracy'  (`shrink')  (`interaction')  (`bag') (`seed') /// 
			(`thres') (`fraction') (`p_auto_av') (`acc') (`margin') (`accuracy_true') /// 
			(`margin_auto') (`kappa_true') (`kappa1') (`kappa2')
		}
	}
	restore
end
