/*  This do-file processes the analysis dataset and conducts the analysis for Study 1 in 
Gideon, Hsu, and McFall, "Heaping at Round Numbers on Financial Questions: The Role of Satisficing"
version submitted 2016.12.22 

do-file outline:
0. set directories and global macros for variable lists
1. code for the program that extracts raw rounding information from each response 
2. load in data
3. process data and save analysis dataset 
4. Analysis for paper
*/

/* *************** 0. set directories and global macros for variable lists */

clear all
set more off
set maxvar 10000
adopath + /stata/ados
cd "Projects/rounding/"
global summarydata "scfb2013.dta"
global preprocesseddata "scf131.dta"

/* keep specific X-variables (question numbers) from full preprocessed dataset */
global surveyquestions "3506 3730 412 413 716 805 5303 5306 5311 3024 3029" 

/* Variables needed for processing for analysis
	xx1: respondent id
	x2: interviewer id
	x30006: disposition for identifying mode of survey completion 
	x6536: refer documents */ 
global keeplist "xx1 x2 x30006 age hhsex agecl edcl married racecl fin x6536"

/* *************** 1. code for the program that extracts raw rounding information from each response */

		/** Code for creating rounding variable: first input is the variable we want to get the
		rounded digits for, the second input is the variable that will be created that contains the nubmer of the rounded digits**/
		/* code copied from pgm/rounding2.ado */
		capture program drop rounding2
		program define rounding2
		version 13
		set more off
		syntax varlist

		/* Tokenize varlist */
		local exct `1'
		local round `2'

		tempvar round10 round100 round1000 round10000 round100000 round1000000 round10000000 round100000000 round1000000000

		/****************************************************************************
		****************************************************************************/

		quietly {

		gen `round10' = round(`exct',10)
		gen `round100' = round(`exct',100)
		gen `round1000' = round(`exct',1000)
		gen `round10000' = round(`exct',10000)
		gen `round100000' = round(`exct',100000)
		gen `round1000000' = round(`exct',1000000)
		gen `round10000000' = round(`exct',10000000)
		gen `round100000000' = round(`exct',100000000)
		gen `round1000000000' = round(`exct',1000000000)


		replace `round'=1 if !missing(`exct')
		replace `round'=10 if `exct'==`round10' & !missing(`exct')
		replace `round'=100 if `exct'==`round100' & !missing(`exct')
		replace `round'=1000 if `exct'==`round1000' & !missing(`exct')
		replace `round'=10000 if `exct'==`round10000' & !missing(`exct')
		replace `round'=100000 if `exct'==`round100000' & !missing(`exct')
		replace `round'=1000000 if `exct'==`round1000000' & !missing(`exct')
		replace `round'=10000000 if `exct'==`round10000000' & !missing(`exct')
		replace `round'=100000000 if `exct'==`round100000000' & !missing(`exct')
		replace `round'=1000000000 if `exct'==`round1000000000' & !missing(`exct')

		replace `round'=1 if `exct'==0

		**replace `round'=1 if missing(`round') & !missing(`exct')

		}				 
		end

		/*****************************************************************************/
		/*****************************************************************************/

/* *************** 2. LOAD IN DATA ****************  */
/* load Summary Extract Public Data with demographic and financial asset data: download from SCF website */
use $summarydata, clear
gen imp = x1 - (xx1*10)
keep if imp==1
save temp.dta, replace

/* read in full pre-production dataset -- this dataset does not have any imputed or edited values */
use $preprocesseddata, clear
rename *, lower

merge 1:1 xx1 using temp
erase temp.dta

/* ***************** 3. PROCESS THE DATA AND CREATE ANALYSIS DATASET ********* */
/* Run this "rounding2" command on each financial question used in this study */
foreach j in $surveyquestions { 
	gen r`j'=.
	rounding2  x`j' r`j'
}

/* ***************** */
#delimit;
foreach j in $surveyquestions {;

/* this processes responses that are identified with j-codes of 0. These responses are ONLY ORIGINAL VALUES-- NOT IMPUTED VALUES, AND NOT RANGES */
*gen x`j'_exctval = x`j' if j`j'==0;
gen x`j'_exctval = x`j' if j`j'==.;


gen x`j'_roundcat=0 if r`j'==1;
replace x`j'_roundcat=1 if r`j'==10;
replace x`j'_roundcat=2 if r`j'==100;
replace x`j'_roundcat=3 if r`j'==1000;
replace x`j'_roundcat=4 if r`j'==10000;
replace x`j'_roundcat=5 if r`j'==100000;
replace x`j'_roundcat=6 if r`j'==1000000;
replace x`j'_roundcat=7 if r`j'==10000000;

egen x`j'_mag =cut(x`j'_exct), at(0,10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000);

gen x`j'_magcat=1 if x`j'_mag==0;
replace x`j'_magcat=2 if x`j'_mag==10;
replace x`j'_magcat=3 if x`j'_mag==100;
replace x`j'_magcat=4 if x`j'_mag==1000;
replace x`j'_magcat=5 if x`j'_mag==10000;
replace x`j'_magcat=6 if x`j'_mag==100000;
replace x`j'_magcat=7 if x`j'_mag==1000000;
replace x`j'_magcat=8 if x`j'_mag==10000000;
*replace x`j'_magcat = . if j`j'>0;
replace x`j'_magcat = . if j`j'<.;

gen x`j'_sig= x`j'_magcat-x`j'_roundcat;

foreach k in sig exctval mag magcat roundcat  {;
rename x`j'_`k' `k'`j' ;
*replace `k'`j'=. if j`j'>0;
replace `k'`j'=. if j`j'<.;

};

/* generate ratio of significant digits to magnitude */
gen sigovermag`j'=sig`j'/magcat`j';

clonevar jcode`j' = j`j';
};

/* ***************** */

#delimit cr

keep $keeplist *sig*  *magcat* /* *exctval* */

/* process RHS variables */
gen lnasset = ln(fin)
gen nonwhite = racecl - 1
gen phonemode = cond(1, x30006==70 | x30006==72 | x30006==74,0)
gen referdocs = -1*x6536 + 4
label define referdocs 0 "Refer docs: never" 1 "Rarely" 2 "Sometimes" 3 "Frequently"
label values referdocs referdocs

egen fi = group(x2)
label variable fi "field interviewer"

/* Dataset is currently one observation per respondent.
Reshape so dataset is respondent-question level observations. */
reshape long  magcat sigovermag sig /* exctval */, i(xx1) j(question)

/* create FTZ = rounding (main outcome for analysis) */
gen ftznoindex = (1-sigovermag)
label var ftznoindex  "fraction of response that's trailing zeroes, NO INDEX"

gen double ftz=(magcat - sig)/(magcat-1)
replace ftz = 0 if ftz==. & ftznoindex==0 /* these are single-digit values -- technically undefined. recode */
label var ftz  "ROUNDING: fraction of response that's trailing zeroes, indexed to max possible ftz"

/* create binary version of ftz rounding for robustness checks in Appendix Table */
gen round=0 if !missing(ftz)
replace round=1 if ftz==1

/* label questions and topics */
/* question designates which survey question the observation refers to. Values are the question numbers on the survey (X-variable numbers) */
label define question 412 "CC new charges" 413 "CC still owed" 805 "Mort outstanding" 3024 "Food at home" 3506 "Checking acct 1" 3730 "Savings acct 1" 5306 "Soc Sec" 5311 "Soc Sec (sp)" 3029 "food away" 716 "home value"
set more off
label var question "X-variable number"

/* Create a version of "question" where there numerical values reflect the order of questions as they appear on the survey */
gen qorder = .
replace qorder = 1 if question ==412
replace qorder = 2 if question==413
replace qorder = 3 if question==716
replace qorder = 4 if question==805
replace qorder = 5 if question==3024
replace qorder = 6 if question==3029
replace qorder = 7 if question==3506
replace qorder = 8 if question==3730
replace qorder = 9 if question==5306	
replace qorder = 10 if question==5311

label define qorder 1 "cc (new charges)" 2 "cc (still owed)" 3 "home value" 4 "mortgage outstanding" 5 "food at home" 6 "food away" 7 "checking 1" 8 "savings 1" 9 "Soc Sec (self)" 10 "Soc Sec (sp)"
label values qorder qorder

/* generate indicator for whether or not question is a single-account, or knowable question */
gen qsingleacct = 0
	replace qsingleacct  = 1 if question==3506 | question==3730 | question==5306 | question==5311 | question==805 | question==716
gen qknowable = 0
	replace qknowable = 1 if question==412 | question==413 | question==805 | question==3506 | question==3730 | question==5306 | question==5311
replace qsingleacct = . if qknowable==0 

	/* ***** save final analysis dataset ***** */
	keep ftz qorder qknowable qsingle referdocs magcat agecl hhsex edcl married lnasset nonwhite phonemode fi xx1
	keep if ftz<.
	label variable ftz "rounding"
	save study1_analysis_dataset.dta, replace
	/* *************************************** */

/* *********************** 4. ANALYSIS FOR PAPER BEGINS HERE ***************************************** */
global RHSvar "i.referdocs i.qorder i.magcat i.agecl hhsex i.edcl married lnasset nonwhite phonemode"

/* figure 2 */
log using output/figure2.txt, text replace
disp "calculations for figure 2"
   mean ftz if esample==1, over(qorder)
log close


/* TABLE 2 */
xtreg ftz $RHSvar i.fi,  i(xx1) sa
gen esample = e(sample)
log using output/table2.txt, text replace
tabstat ftz if esample==1, by(qorder) statistics(mean sd n)
tabstat ftz if esample==1, by(qknowable) statistics(mean sd n)
tabstat ftz if esample==1, by(qsingle) statistics(mean sd n)
disp "not in table; results mentioned in text"
ttest ftz if esample==1, by(qknowable)
ttest ftz if esample==1, by(qsingle)
ttest ftz if esample==1 & qsingle~=1, by(qknowable)
 log close

/* TABLE 3 
estout will not export rho's from specifications with no covariates (estimates stored as reg*nocov). Enter in manually from log file below. */
set more off
estimates clear
set more off
xtreg ftz $RHSvar i.fi,  i(xx1) sa
est store reg1
xtreg ftz if e(sample)==1,  i(xx1) sa
est store reg1nocov
xtreg ftz $RHSvar i.fi if qknowable==0,  i(xx1) sa
est store reg2
xtreg ftz if qknowable==0 & e(sample)==1,  i(xx1) sa
est store reg2nocov
xtreg ftz $RHSvar i.fi if qknowable==1,  i(xx1) sa
est store reg3
xtreg ftz if qknowable==1 & e(sample)==1,  i(xx1) sa
est store reg3nocov
xtreg ftz $RHSvar i.fi if qsingle==0,  i(xx1) sa
est store reg4
xtreg ftz if qsingle==0 & e(sample)==1,  i(xx1) sa
est store reg4nocov
xtreg ftz $RHSvar i.fi if qsingle==1,  i(xx1) sa
est store reg5
xtreg ftz  if qsingle==1 & e(sample)==1,  i(xx1) sa
est store reg5nocov
estout reg1 reg2 reg3 reg4 reg5 using "output/srm_table3.txt", ///
	style(tab) replace label cells(b(star fmt(%9.3f)) se(par(`"="("'`")""') fmt(%9.4f))) numbers("(" ")") ///
	mlabels("all" "NOT knowable" "Knowable" "Aggregate q" "Single acct") collabels(none) ///
	drop(*fi*) starlevels(* .1 ** .05 *** .01)  stats( N N_g r2_o rho, fmt( 0 0 3 3) labels( "Observations" "Individuals" "Overall R2" "Rho" )) 
log using output/srm_table3_rhonocovars_log.txt, text replace
disp "this reports the last line of table 3, the rho values from regressions without coefficients"
forval i = 1/5 {
est replay reg`i'nocov
}
log close

/* TABLE 6 */
log using output/table6.txt, text replace
tabstat magcat if esample==1, by (qorder) statistics (mean sd n)	
log close


/* Appendix Table 1: robustness checks. "round" is binary form of ftz */
#delimit;
set more off;
#delimit;


gen round=0 if !missing(ftz);
replace round=1 if ftz==1;

#delimit;
set more off;
mixed ftz $RHSvar || fi: || xx1:;
est store robregmixed;
#delimit;
log using output/srm_apptable1.txt, text replace;
disp "icc for column 1";
estat icc; /* Estout command will not export the ICCs */
log close;

xttobit ftz $RHSvar, i(xx1) ul(1);
est store tobit;

margins, dydx(*) post;
est store tobitmfx;
#delimit;
set more off;
xtlogit round $RHSvar, i(xx1) re;
est store logit;

margins, dydx(*) post;
est store logitmfx;

xtreg round $RHSvar, i(xx1) sa;
est store lpm;
#delimit;
estout robregmixed tobitmfx logitmfx tobit logit lpm using "output/srm_apptable1.txt",
       style(tab) replace label cells(b(star fmt(%9.3f)) se(par(`"="("'`")""') fmt(%9.4f))) numbers("(" ")") 
       mlabels("Linear mixed RE" "Tobit mfx" "Logit mfx" "Tobit" "Logit" "LPM" ) collabels(none) 
       starlevels(* .1 ** .05 *** .01)  stats( N N_g r2_o rho, fmt( 0 0 3 3) 
	  labels( "Observations" "Individuals" "Overall R2" "Rho" ));