/*  This do-file processes the analysis dataset and conducts the analysis for Study 2 in 
Gideon, Helppie-McFall, Hsu, "Heaping at Round Numbers on Financial Questions: The Role of Satisficing"
version submitted 2016.12.22 

do-file outline:
0. set directories and global macros for variable lists
1. code for the program that extracts raw rounding information from each response 
2. load in data
3. process data and save analysis dataset 
4. Analysis for paper
*/

/* *************** 0. set directories and global macros for variable lists */

clear
capture log close
set mem 500M
set more off
#delimit;
set seed 1234567;
set type double, permanently;

/***** SET DIRECTORIES **********************************************************/

/**NEEDS TO BE CHANGED BY RESEARCHER**/
***cd "/SET_BY_RESEARCHER/";
cd "/Users/michaelgideon/Dropbox/Projects-not dissertation/Rounding/CogEcon/SRM replication files/replication materials/";

/**DATA PATHS--THESE FILE NAMES MIGHT NEED TO BE SET BY THE RESEARCHER**/
global ce2011data "rawdata/CE2011pubV1.dta";
global cognition "rawdata/ce_mcardle_merged_small_20111209.dta";
global Qtime "rawdata/Qtimes_071112_cld.dta";


/********************************************************************************/
/********************************************************************************/	
/* ***** 1. code for the program that extracts raw rounding information from each response */

/** Code for creating rounding variable: first input is the variable we want to get the
rounded digits for, the second input is the variable that will be created that contains the nubmer of the rounded digits**/
/* code copied from Rounding/CogEcon/rounding2.ado */
		
capture program drop rounding2;
program define rounding2;
version 13;
set more off;
syntax varlist;

/* Tokenize varlist */
local exct `1';
local round `2';

tempvar round10 round100 round1000 round10000 round100000 round1000000 round10000000 round100000000 round1000000000;

**********************************************;

quietly {;

gen `round10' = round(`exct',10);
gen `round100' = round(`exct',100);
gen `round1000' = round(`exct',1000);
gen `round10000' = round(`exct',10000);
gen `round100000' = round(`exct',100000);
gen `round1000000' = round(`exct',1000000);
gen `round10000000' = round(`exct',10000000);
gen `round100000000' = round(`exct',100000000);
gen `round1000000000' = round(`exct',1000000000);

replace `round'=1 if !missing(`exct');
replace `round'=10 if `exct'==`round10' & !missing(`exct');
replace `round'=100 if `exct'==`round100' & !missing(`exct');
replace `round'=1000 if `exct'==`round1000' & !missing(`exct');
replace `round'=10000 if `exct'==`round10000' & !missing(`exct');
replace `round'=100000 if `exct'==`round100000' & !missing(`exct');
replace `round'=1000000 if `exct'==`round1000000' & !missing(`exct');
replace `round'=10000000 if `exct'==`round10000000' & !missing(`exct');
replace `round'=100000000 if `exct'==`round100000000' & !missing(`exct');
replace `round'=1000000000 if `exct'==`round1000000000' & !missing(`exct');

replace `round'=1 if `exct'==0;

**replace `round'=1 if missing(`round') & !missing(`exct');

};				 
end;
	
/********************************************************************************/
/********************************************************************************/		
/* ***************** 2. LOAD IN DATA *****************************************  */


use "${ce2011data}", clear;

keep sampid chhid 
c3_C2_val c3_C2_val_flag c3_C5_val c3_C5_val_flag c3_C11_val c3_C11_val_flag c3_C16_val c3_C16_val_flag c3_C21_val c3_C21_val_flag c3_C33_val c3_C33_val_flag c3_C42_val c3_C42_val_flag c3_D5_val c3_D5_val_flag c3_D7_val c3_D7_val_flag c3_D15_val c3_D15_val_flag c3_D16_val c3_D16_val_flag c3_D16a_val c3_D16a_val_flag c3_D31_val c3_D31_val_flag c3_D34_val c3_D34_val_flag c3_E3_val c3_E3_val_flag c3_E19_val c3_E19_val_flag c3_E20_val c3_E20_val_flag c3_E21_val c3_E21_val_flag c3_E22_val c3_E22_val_flag c3_H1tax_ret c3_H1* c3_modesubmit c3_age racem1 racem2 hispanic gender white c3_finrA hs_deg coll_deg educ c3_result c3_relstat c3_D2 c3_D3
c3_D15a_val c3_D15b_val c3_D15c_val c3_D15d_val c3_D15e_val
c3_D16a_val c3_D16b_val c3_D16c_val c3_D16d_val c3_D16e_val;

sort sampid;

**********************************************;
**********************************************;
/** MERGE IN COGNITION & TIME STAMPS **/

/**merge in 2008-2009 data which contains cognition scores and ID variable to link to timestamp data**/
mmerge sampid using  "${cognition}", ukeep(c2_login_1 c2_userid c2_pr_userid sampid 
w2_D_em_p w2_BF3_pscore w2_BF5_pscore w2_NS_wscore w2_RF_wscore w2_NC1pscore w2_NC2pscore);

destring c2_login_1, gen(userid);
rename c2_login_1 userid;
sort userid;

duplicates drop;


/**MERGE IN TIMESTAMP DATA**/
mmerge userid using "${Qtime}", ukeep(C2 C5 C11 C16 C21 C33 C42 D5 D7 D15val D16 D16a D31 D34 E3 E19 E20 E21 E22);

drop if missing(userid);

rename D15val D15;

foreach var in C2 C5 C11 C16 C21 C33 C42 D5 D7 D15 D16 D16a D31 D34 E3 E19 E20 E21 E22{;
rename `var' time`var';
};

/**Only keep respondents from CogEcon 2011 **/
drop if missing(c3_modesubmit);

**********************************************;
**********************************************;

label define exctrng 0 "0" 1 "1-2500" 2 "2501-5000" 3 "5001-10000" 4 "10001-25000" 5 "25001-50000"
6 "50001-100000" 7 "100001-250000" 8 "250001-500000" 9 "500001-1000000" 10 "over 1mil" 99 "missing";

/********************************************************************************/
/* ***************** 3. PROCESS DATA AND SAVE ANALYSIS DATASET **************** */
/********************************************************************************/

foreach k in C2 C5 C11 C16 C21 C33 C42 D5 D7 D15 D16 D16a D31 D34 E3 E19 E20 E21 E22 {;
clonevar w3`k'_val=c3_`k'_val;
clonevar w3`k'_flag=c3_`k'_val_flag;
};

foreach j in C2 C5 C11 C16 C21 C33 C42 D5 D7 D15 D16 D16a D31 D34 E3 E19 E20 E21 E22 {;
gen w3`j'_exct= w3`j'_val if w3`j'_flag==1;

/** DROP REPORTED ZEROS **/
replace w3`j'_exct=. if w3`j'_exct==0; 

/** Create rounding variable: first variable is the one we want to get the
rounded digits for, the second is the variable you want to contain the rounded digits**/
**run rounding2.ado;
gen w3`j'_rounding=.;
rounding2 w3`j'_exct w3`j'_rounding ;

gen w3`j'_roundcat=0 if w3`j'_rounding==1;
replace w3`j'_roundcat=1 if w3`j'_rounding==10;
replace w3`j'_roundcat=2 if w3`j'_rounding==100;
replace w3`j'_roundcat=3 if w3`j'_rounding==1000;
replace w3`j'_roundcat=4 if w3`j'_rounding==10000;
replace w3`j'_roundcat=5 if w3`j'_rounding==100000;
replace w3`j'_roundcat=6 if w3`j'_rounding==1000000;
replace w3`j'_roundcat=7 if w3`j'_rounding==10000000;

egen w3`j'_mag =cut(w3`j'_exct), at(0,10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000);

gen w3`j'_magcat=1 if w3`j'_mag==0;
replace w3`j'_magcat=2 if w3`j'_mag==10;
replace w3`j'_magcat=3 if w3`j'_mag==100;
replace w3`j'_magcat=4 if w3`j'_mag==1000;
replace w3`j'_magcat=5 if w3`j'_mag==10000;
replace w3`j'_magcat=6 if w3`j'_mag==100000;
replace w3`j'_magcat=7 if w3`j'_mag==1000000;
replace w3`j'_magcat=8 if w3`j'_mag==10000000;

gen w3`j'_sig= w3`j'_magcat-w3`j'_roundcat;

clonevar sig`j'=w3`j'_sig;
clonevar exct`j'=w3`j'_exct;
clonevar magcat`j'=w3`j'_magcat;
clonevar roundcat`j'=w3`j'_roundcat;

gen sigovermag`j'=sig`j'/magcat`j';

/* 1. Generate exactvals rounded to nearest integer
2. generate indicator if exact value ~= nearest integer (ie, someone reported cents)
3. change sig to same value as magcat, and sigovermag==1, if icent==1 */
gen integerval`j' = round(exct`j');
gen icent`j' = cond(1,integerval`j'~=exct`j',0);
replace sig`j' = magcat`j' if (integerval`j'~=exct`j');
replace sigovermag`j'=1 if (integerval`j'~=exct`j');

};

**********************************************;

/* PROCESS RHS VARIABLES */

/**RACE/ETHNICITY: Non-white or hispanic  = 1 **/
gen double nonwhitehisp=0;
replace nonwhitehisp=1 if racem1~=1 | (racem2~=1 & racem2~=.) | hispanic==1;

/**FEMALE **/
gen female=1 if gender==2;
replace female=0 if gender==1;
label variable female "Female (=1)";

/** MARRIED OR PARTNERED **/
gen married=1 if c3_relstat==1 | c3_relstat==2; 
replace married=0 if c3_relstat==3 | c3_relstat==4; 

/**AGE **/
gen agecat=1 if c3_age<35;
replace agecat=2 if c3_age<45 & c3_age>=35 ;
replace agecat=3 if c3_age<55 & c3_age>=45 ;
replace agecat=4 if c3_age<65 & c3_age>=55 ;
replace agecat=5 if c3_age<75 & c3_age>=65 ;
replace agecat=6 if c3_age<. & c3_age>=75 ;

label define agecat 
1 "age<35" 2 "35=<age<45" 3 "45=<age<55" 4 "55=<age<45" 5 "65=<age<75" 6 "75=<age";

label value agecat agecat;

/**STANDARDIZED COGNITIVE ABILITY**/
egen NSnorm = std(w2_NS_wscore);
label variable NSnorm "W2 NS, standardized";

egen DEMnorm=std(w2_D_em_p);
label variable DEMnorm "W2 D_em, standardized";

foreach x in NC1 NC2 BF3_ BF5_ {;
egen `x'norm = std(w2_`x'pscore);
label variable `x'norm "W2 `x', standardized";
};

egen NCave = rmean(w2_NC1pscore w2_NC2pscore);
/** I confirm that this is equivalent to what I constructed using the raw NC data!**/

egen NCnorm=std(NCave);
label variable NCnorm "NC, standardized";

**********************************************;

/* USED FINANCIAL RECORDS */
gen usedrecords=0;
replace usedrecords=1 if c3_H1acct_stat==1 |  c3_H1other==1 | c3_H1per_fin_sft==1 | c3_H1tax_ret==1;

gen askedsomeone=0;
replace askedsomeone=1 if c3_H1ask_smn==1;

gen homeCFO=1 if married==0;
replace homeCFO= 0 if married==0 & c3_D2==4;
replace homeCFO=0 if married==1 & (c3_D2==2 | c3_D2==4 | c3_D3==2);
replace homeCFO=1 if married==1 & (c3_D2==1 | c3_D3==1);

gen CFOsomeone=1 if homeCFO==1;
replace CFOsomeone=2 if homeCFO==0 & askedsomeone==0;
replace CFOsomeone=3 if homeCFO==0 & askedsomeone==1;

label define CFOsomeone 1 "HomeCFO" 2 "NotHomeCFO,NotAsk" 3 "NotHomeCFO,Ask" ;

label val CFOsomeone CFOsomeone;


/** LOG OF TOTAL FINANCIAL ASSETS **/
egen finass=rowtotal(c3_D15_val c3_D16_val);
replace finass=. if c3_D15_val==. & c3_D16_val==.;

*list sampid if c3_finasset==. & finass~=.;

replace finass=. if sampid=="7011710010" | sampid=="7050920010";
/**cases had been replaced with missing for data quality concerns **/

gen double ln_finasset=log(finass);
replace ln_finasset=0 if finass==0;

**********************************************;
**********************************************;

/** RESTRICT TO WEB RESPONDENTS**/
drop if c3_modesubmit==1;

keep sig* exct* time* magcat* sigovermag* sampid chhid gender female married agecat nonwhitehisp 
hs_deg coll_deg educ c3_result c3_modesubmit c3_D2 c3_D3 ln_finasset
NSnorm  NCnorm DEMnorm BF3_norm  BF5_norm
usedrecords homeCFO CFOsomeone askedsomeone
;

/** RESHAPE DATA **/

reshape long sig exct time magcat roundcat sigovermag, i(sampid) j(quest) string;

destring sampid, replace;

gen logtime=log(time);

gen question=1 if quest=="C2";
replace question=2 if quest=="C5";
replace question=3 if quest=="C11";
replace question=4 if quest=="C16";
replace question=5 if quest=="C21";
replace question=6 if quest=="C33";
replace question=7 if quest=="C42";
replace question=8 if quest=="D5";
replace question=9 if quest=="D7";
replace question=10 if quest=="D15";
replace question=11 if quest=="D16";
replace question=12 if quest=="D16a";
replace question=13 if quest=="D31";
replace question=14 if quest=="D34";
replace question=15 if quest=="E3";
replace question=16 if quest=="E19";
replace question=17 if quest=="E20";
replace question=18 if quest=="E21";
replace question=19 if quest=="E22";

label define question 
1 "HH inc" 
2 "Soc Sec (self)" 
3 "Soc Sec (SP)" 
4 "Pension (self)"  
5 "Pension (SP)" 
6 "Earnings (self)" 
7 "Earnings (SP)" 
8 "Home val" 
9 "Mortgage"  
10 "Ret Assets" 
11 "Nonret Assets" 
12 "Check,Savings,CDs" 
13 "CC (still owed)"  
14 "other debt"
15 "401k contrib" 
16 "food at home" 
17 "food away"  
18 "Health insurance" 
19 "Health out-of-pocket"; 

label val question question;


/** INDICATORS FOR QUESTION TYPE **/

gen unknowable=0;
replace unknowable=1 if quest=="D5" | quest=="E19" | quest=="E20";

gen singleaccount=0;
replace singleaccount=1 if quest=="C5" | quest=="C11" | quest=="C16" | quest=="C21" | 
	 quest=="D7"  ;
replace singleaccount=. if unknowable==1;
	 
gen aggregated=1;
replace aggregated=0 if singleaccount==1;

**********************************************;
/** Create variable for fraction of trailing zeros**/
gen double ftz=1-sigovermag;
replace ftz=ftz*(magcat/(magcat-1));
replace ftz=0 if magcat==1 & missing(ftz);
label variable ftz "rounding";

**********************************************;

/**Only questions that were answered**/
drop if missing(question);

codebook sampid;

/** Study 2: Procedure (WHO IS DROPPED BC OF MISSING DATA?) **/

drop if missing(exct);
codebook sampid;
			 
gen miss_cogusa=1 if missing(NSnorm) | missing(DEMnorm) | missing(NCnorm) | missing(BF3_norm) | missing(BF5_norm);
drop if miss_cogusa==1;
codebook sampid;

gen miss_key=1 if missing(usedrecords) | missing(homeCFO);
drop if miss_key==1;

/** Missing control variables**/					 
gen miss_cont=1 if missing(nonwhitehisp) | missing(female) | missing(married) | missing(agecat) | missing(coll_deg);
drop if miss_cont==1;

/**Missing data on total financial assets**/
drop if missing(ln_finasset);
/**9 obs dropped**/

/**Check final sample size-- 490 unique individuals **/
codebook sampid;

**********************************************;

/**Keep Study 2 variables**/
keep sampid ftz unknowable  singleaccount NSnorm DEMnorm NCnorm BF3_norm BF5_norm CFOsomeone ln_finasset
 nonwhitehisp female married coll_deg agecat logtime usedrecords question magcat ;

/* ***** save final analysis dataset ***** */
save "output/study2_analysis_dataset.dta", replace;  

/*****************************************************************************/
/*****************************************************************************/
/* *********** 4. ANALYSIS FOR PAPER BEGINS HERE *************************** */


global RHSvar "NSnorm DEMnorm NCnorm BF3_norm BF5_norm i.CFOsomeone ln_finasset nonwhitehisp female 
				married coll_deg i.agecat logtime usedrecords i.question i.magcat" ;


xtreg ftz $RHSvar, i(sampid) sa;
gen sample1=1 if e(sample)==1; 

/* figure 2 */
log using "output/figure2.txt", text replace;
disp "calculations for figure 2";
   mean ftz if sample1==1, over(question);
log close;

**********************************************;
/* TABLE 4 */
log using "output/table4.txt", text replace;
tabstat ftz if sample1==1, by(question) statistics(mean sd n);
tabstat ftz if sample1==1, by(unknowable) statistics(mean sd n);
tabstat ftz if sample1==1, by(singleaccount) statistics(mean sd n);
disp "not in table; results mentioned in text";
ttest ftz if sample1==1, by(unknowable);
ttest ftz if sample1==1, by(singleaccount);
ttest ftz if sample1==1 & singleaccount~=1, by(unknowable);
log close;
 
**********************************************;

/* TABLE 5: estout will not export rho's from specifications with no covariates (estimates stored as reg*nocov). Enter in manually from log file below. */

xtreg ftz $RHSvar, i(sampid) sa;
est store reg1;
*gen sample1=1 if e(sample)==1; 

xtreg ftz NSnorm DEMnorm NCnorm BF3_norm BF5_norm i.CFOsomeone ln_finasset nonwhitehisp female 
				married coll_deg i.agecat logtime usedrecords ib16.question i.magcat if unknowable==1, i(sampid) sa;
est store reg2;

xtreg ftz NSnorm DEMnorm NCnorm BF3_norm BF5_norm i.CFOsomeone ln_finasset nonwhitehisp female 
				married coll_deg i.agecat logtime usedrecords ib1.question i.magcat  if unknowable==0, i(sampid) sa;
est store reg3;

xtreg ftz NSnorm DEMnorm NCnorm BF3_norm BF5_norm i.CFOsomeone ln_finasset nonwhitehisp female 
				married coll_deg i.agecat logtime usedrecords ib1.question i.magcat if singleaccount==0, i(sampid) sa;
est store reg4;

xtreg ftz NSnorm DEMnorm NCnorm BF3_norm BF5_norm i.CFOsomeone ln_finasset nonwhitehisp female 
				married coll_deg i.agecat logtime usedrecords i.question i.magcat if singleaccount==1, i(sampid) sa;
est store reg5;
gen sample2=1 if e(sample)==1; 


xtreg ftz i.question, i(sampid) sa ;
est store reg1nocov;
xtreg ftz i.question if unknowable==1, i(sampid) sa ;
est store reg2nocov;
xtreg ftz i.question if unknowable==0, i(sampid) sa ;
est store reg3nocov;
xtreg ftz i.question if singleaccount==0, i(sampid) sa ;
est store reg4nocov;
xtreg ftz i.question if singleaccount==1, i(sampid) sa ;
est store reg5nocov;

estout reg1 reg2 reg3 reg4 reg5   using "output/table5.xls",
  style(tab) replace label cells(b(star fmt(%9.3f)) se(par(`"="("'`")""'))) numbers("(" ")") 
  mlabels("all" "NOT knowable" "Knowable" "Aggregate q" "Single acct") collabels(none) 
  starlevels(* .1 ** .05 *** .01)  stats( N N_g r2_o rho, fmt( 0 0 3 3) 
  labels( "Observations" "Individuals" "Overall R2" "Rho" ));

log using "output/srm_table5_rhonocovars_log.txt", text replace;
disp "this reports the last line of table 5, the rho values from regressions without coefficients";
forval i = 1/5 {;
est replay reg`i'nocov;
};
log close;	   

**********************************************; 
 
/* TABLE 6 */
log using "output/table6.txt", text replace;
tabstat magcat if sample1==1, by (question) statistics (mean sd n);	
log close;

**********************************************;

/* Appendix Table 2: robustness checks. "round" is binary form of ftz */   
	 
gen round=0 if !missing(ftz);
replace round=1 if ftz==1;

xttobit ftz $RHSvar, i(sampid) ul(1);
est store robtobit;
margins, dydx(*) post;
est store robtobitmfx;

xtlogit round $RHSvar, i(sampid) re;
est store roblogit;
margins, dydx(*) post;
est store roblogitmfx;

xtreg round $RHSvar, i(sampid) sa;
est store roblpm;

estout robtobitmfx roblogitmfx robtobit roblogit roblpm using "output/appendtable2.xls",
       style(tab) replace label cells(b(star fmt(%9.3f)) se(par(`"="("'`")""'))) numbers("(" ")")
       mlabels("Tobit mfx" "Logit mfx" "Tobit" "Logit" "LPM,RE" ) collabels(none)
       starlevels(* .1 ** .05 *** .01)  stats( N N_g r2_o rho, fmt( 0 0 3 3)
                labels( "Observations" "Individuals" "Overall R2" "Rho" ));	   
			
/********************************************************************************/
/********************************************************************************/
