************** bracket test: Data Quantiles **************************************************

global ca "4 6 8 10 12 14 16 18 20 22 24 26"
global cut "90 91 92 93 94 95 96 97 98 99"

foreach t of global cut{ 
	foreach k of global ca {  

		use "C:\Users/$cdpath\Seafile\Meine Bibliothek\InterVerm\wealth_1988\cd_path\brtest_w2017_hh_inc_`t'_`k'_q.dta", clear

		forvalues d=7500(1)19999 {
		drop p`d'

		}

		keep p* ID

		reshape long p, i(ID)


		save  "${data_temp}\brtest_mcib_`t'_`k'_reshape_1_q_hh_inc.dta", replace 


		// reshape command does not work for >12000 vars - split sample
		use "C:\Users/$cdpath\Seafile\Meine Bibliothek\InterVerm\wealth_1988\cd_path\brtest_w2017_hh_inc_`t'_`k'_q.dta", clear

		forvalues d=1(1)7499 {
		drop p`d'

		}

		forvalues d=15000(1)19999 {
		drop p`d'

		}


		keep p* ID

		reshape long p, i(ID)

		save  "${data_temp}\brtest_mcib_`t'_`k'_reshape_2_q_hh_inc.dta", replace 


		use "C:\Users/$cdpath\Seafile\Meine Bibliothek\InterVerm\wealth_1988\cd_path\brtest_w2017_hh_inc_`t'_`k'_q.dta", clear

		forvalues d=1(1)14999 {
		drop p`d'

		}


		keep p* ID

		reshape long p, i(ID)

		save  "${data_temp}\brtest_mcib_`t'_`k'_reshape_3_q_hh_inc.dta", replace 


		clear
		append using "${data_temp}\brtest_mcib_`t'_`k'_reshape_1_q_hh_inc.dta"
		append using "${data_temp}\brtest_mcib_`t'_`k'_reshape_2_q_hh_inc.dta"
		append using "${data_temp}\brtest_mcib_`t'_`k'_reshape_3_q_hh_inc.dta"

		save  "${data_temp}\brtest_mcib_`t'_`k'_reshape_q_hh_inc.dta", replace 

	}
}


**** adjust reshaped data

foreach t of global cut{ 
		
	foreach k of global ca {  
		
		use  "${data_temp}\brtest_mcib_`t'_`k'_reshape_q_hh_inc.dta", clear

		rename p p_hh_inc_`t'_`k'



		forvalues b=3(2)25 {
				local p=`b'+1
				gen hh_inc_cat_`t'_`p'=.
				replace hh_inc_cat_`t'_`p' = `p' if p_hh_inc_`t'_`k' >= z1_`b'_gw_`t'_`p' & p_hh_inc_`t'_`k' != .
				forvalues i=1/`b' {
					local a=`i'-1
					if `i'==1 				replace hh_inc_cat_`t'_`p' = `i' if inrange(p_hh_inc_`t'_`k', 0, z2_`i'_gw_`t'_`p')
					if `i'>=2 			 	replace hh_inc_cat_`t'_`p' = `i' if inrange(p_hh_inc_`t'_`k', z1_`a'_gw_`t'_`p', z2_`i'_gw_`t'_`p' )				
				}
		}


		forvalues v=1/`k' {

			preserve
				keep if hh_inc_cat_`t'_`k'==`v' 

				set seed 220520
				gen ra=runiform()

				sort ra
				gen m_x=_n
			

				drop ra _j
				save "$data_temp\brtest_rand_m_`t'_`k'_`v'_q_hh_inc", replace

			restore
		}


		clear 
		forvalues v=1/`k' {

			append using "$data_temp\brtest_rand_m_`t'_`k'_`v'_q_hh_inc"

			
		}

			
		save "$data_temp\brtest_rand_m_`t'_`k'_q_hh_inc", replace

	}
	 
}
*


	*********
/*use hid cid syear hgi1hinc using  "${data_in}\hgen.dta", clear



	keep if syear==2017
	gen hh_inc = hgi1hinc  if hgi1hinc >0 & hgi1hinc !=.


	// calculate the median
	sum hh_inc, d

	tab hh_inc


	// brackets arranged by quantiles 
	/* top brackets starts at 95 percentile (check sensibility here later) */
	xtile x_hh_inc= hh_inc,  nquantiles(500) 

foreach t of global cut{ 

	forvalues b=3/25 {

				local d=`b'
				local p=`b'+1
				gen hh_inc_cat_`t'_`p' = .
				//(leaves room for 1 quantile)
				local s=`t'-1 
				scalar br_help_c=`t'/`b'*5
				scalar br_help=round(br_help_c,1)
				di br_help
			forvalues i=1/`b' {
				local a=`i'-1
				if `d'==`b' scalar br_`d'=`t'*5
				if `d'<`b' scalar br_`d'=`t'*5-`a'*br_help
				di br_`d'
				local d=`d'-1
			}

		forvalues i=1/`b' {
			//if quantiles	forvalues l {}
			qui sum hh_inc if x_hh_inc==br_`i'
			scalar z1_`i'_gw_`t'_`p'=r(max)
				if r(max)==. {  	//check for missing categories: in rare cases, the algorithm above picks empty quantiles. So I use one before or after
					forvalues c=1(1)10 {
						scalar br_miss=br_`i'+`c'
						qui sum hh_inc if x_hh_inc==br_miss 
						scalar z1_`i'_gw_`t'_`p'=r(max)
						di "z1_`i'_gw_`t'_`p' " z1_`i'_gw_`t'_`p'
						if r(max)!=. {
							di "solved +`c', cut of at " br_miss " instead of " br_`i'
						} 
						else {
							scalar br_miss=br_`i'-`c'
							qui sum hh_inc if x_hh_inc==br_miss
							scalar z1_`i'_gw_`t'_`p'=r(max)
							if r(max)!=. {
								di "solved -`c', cut of at " br_miss " instead of " br_`i'
							} 
						}
						// Found a quantile? Check!
						qui sum hh_inc if x_hh_inc==br_miss
						if r(max)!=. continue, break
					}
				}
			scalar z2_`i'_gw_`t'_`p'=z1_`i'_gw_`t'_`p'-0.01
			di "Ober z1_`i'_gw_`t'_`p' "  z1_`i'_gw_`t'_`p'  " Unter z2_`i'_gw_`t'_`p' " z2_`i'_gw_`t'_`p'
			scalar drop	br_`i'

		}


		/*forvalues i=1/`b' {	
			qui sum hh_inc if x_hh_inc==br_`i'
			scalar z1_`i'_gw_`t'_`p'=r(max)

		}*/

	}



	forvalues b=3/25 {
			local p=`b'+1
			replace hh_inc_cat_`t'_`p' = `p' if hh_inc >= z1_`b'_gw_`t'_`p' & hh_inc != .
			forvalues i=1/`b' {
			local a=`i'-1
			if `i'==1 				replace hh_inc_cat_`t'_`p' = `i' if inrange(hh_inc, 0, z2_`i'_gw_`t'_`p')
			if `i'>=2 			 	replace hh_inc_cat_`t'_`p' = `i' if inrange(hh_inc, z1_`a'_gw_`t'_`p', z2_`i'_gw_`t'_`p' )				
		}
	}

*/

use  "${data_temp}\hh_data_inc.dta", clear
foreach t of global cut{ 
	set seed 220520
			gen ra=runiform()

			sort ra
			
			drop ra
			
			

	foreach k of global ca {  

			bysort hh_inc_cat_`t'_`k': gen m_x=_n

			merge 1:1 hh_inc_cat_`t'_`k' m_x using  "$data_temp\brtest_rand_m_`t'_`k'_q_hh_inc", keep(1 3) nogen

			drop m_x
			

		}
}

	*windzoriz
		
	winsor2 hh_inc, cuts(0.5 99.5) by(syear)

foreach t of global cut{ 

	foreach k of global ca {  	
		winsor2 p_hh_inc_`t'_`k', cuts(0.5 99.5)  by(syear)
		/* by hand
		sort `k' 
		sum `k' 
		gen F_`k'=_n/r(N)*100 if `k'!=.
		sort p_`k' 
		sum p_`k' 
		gen F_p_`k'=_n/r(N)*100 if p_`k'!=. */
		

	}

}
*keep cid syear hid prime_prop* fin_assets* tan_assets* all_debt* hh_inc* busin_assets* p_*



save  "${data_temp}\brtest_wave_all_comparison_q_final_hh_inc.dta_heap", replace 









use  "${data_temp}\brtest_wave_all_comparison_q_final_hh_inc.dta", clear

global ca "4 6 8 10 12 14 16 18 20 22 24 26"

global cut "84 86 88 90 92 94 96 98 99"

foreach up_cut of global cut{ 

	foreach k of global ca {  

		use  "${data_temp}\brtest_wave_all_comparison_q_final.dta", clear

		// quick fix  add capture
		*replace p_build_loan_w=p_busin_assets_w if syear==2002
		*replace build_loan_w=busin_assets_w if syear==2002
		******
		local t=2017
		sum hh_inc_w, d 

		scalar mean_`k'_`t'o=r(mean)
		scalar med_`k'_`t'o=r(p50)
		scalar sd_`k'_`t'o=r(sd)
		scalar p5_`k'_`t'o=r(p5)
		scalar p10_`k'_`t'o=r(p10)
		scalar p25_`k'_`t'o=r(p25)
		scalar p75_`k'_`t'o=r(p75)
		scalar p90_`k'_`t'o=r(p90)
		scalar p95_`k'_`t'o=r(p95)
		scalar p99_`k'_`t'o=r(p99)
		scalar N_`k'_`t'o=r(N)
		scalar min_`k'_`t'o =r(min)
		scalar max_`k'_`t'o =r(max)

		inequal7 hh_inc_w, returnscalars
		scalar mld_`k'_`t'o=r(mld) 
		scalar Theil_`k'_`t'o=r(theil) 
		scalar Gini_`k'_`t'o=r(gini) 
		scalar COV_`k'_`t'o=r(cov) 
		scalar rmd_`k'_`t'o=r(rmd) 


		sum p_hh_inc_`up_cut'_`k'_w, d 

		scalar mean_`k'_`t'r=r(mean)
		scalar med_`k'_`t'r=r(p50)
		scalar sd_`k'_`t'r=r(sd)
		scalar p5_`k'_`t'r=r(p5)
		scalar p10_`k'_`t'r=r(p10)
		scalar p25_`k'_`t'r=r(p25)
		scalar p75_`k'_`t'r=r(p75)
		scalar p90_`k'_`t'r=r(p90)
		scalar p95_`k'_`t'r=r(p95)
		scalar p99_`k'_`t'r=r(p99)
		scalar N_`k'_`t'r=r(N)
		scalar min_`k'_`t'r =r(min)
		scalar max_`k'_`t'r =r(max)

		inequal7 p_hh_inc_`up_cut'_`k'_w, returnscalars
		scalar mld_`k'_`t'r=r(mld) 
		scalar Theil_`k'_`t'r=r(theil) 
		scalar Gini_`k'_`t'r=r(gini) 
		scalar COV_`k'_`t'r=r(cov) 
		scalar rmd_`k'_`t'r=r(rmd) 


		correlate p_hh_inc_`up_cut'_`k'_w hh_inc_w 
		scalar corr_`k'_`t'r=r(rho)


	}

	clear

	set obs 20
	local t=2017
	gen year=`t'
	gen n=_n

	gen stat=""
	replace stat= "mean" if n==1
	replace stat= "median" if n==2
	replace stat= "sd" if n==3
	replace stat= "p5" if n==4
	replace stat= "p10" if n==5
	replace stat= "p25" if n==6
	replace stat= "p75" if n==7
	replace stat= "p90" if n==8
	replace stat= "p95" if n==9
	replace stat= "p99" if n==10
	replace stat="N" if n==11
	replace stat="min" if n==12
	replace stat="max" if n==13
	replace stat="Inequality Measures" if n==14
	replace stat= "Mean Log Dev" if n==15
	replace stat="Theil" if n==16
	replace stat="Gini" if n==17
	replace stat="COV" if n==18
	replace stat="Rel. Mean Dev" if n==19
	replace stat="Correlaton" if n==20

	local f "o r"

	foreach k of global ca {  
		foreach l of local f  {
			gen gw_`k'_`l'=.
			replace gw_`k'_`l'= mean_`k'_`t'`l' if n==1
			
			replace gw_`k'_`l'= med_`k'_`t'`l' if n==2
			replace gw_`k'_`l'= sd_`k'_`t'`l' if n==3
			replace gw_`k'_`l'= p5_`k'_`t'`l' if n==4
			replace gw_`k'_`l'= p10_`k'_`t'`l' if n==5
			replace gw_`k'_`l'= p25_`k'_`t'`l' if n==6
			replace gw_`k'_`l'= p75_`k'_`t'`l' if n==7
			replace gw_`k'_`l'= p90_`k'_`t'`l' if n==8
			replace gw_`k'_`l'= p95_`k'_`t'`l' if n==9
			replace gw_`k'_`l'= p99_`k'_`t'`l' if n==10
			replace gw_`k'_`l'= N_`k'_`t'`l' if n==11
			replace gw_`k'_`l'= min_`k'_`t'`l' if n==12
		    replace gw_`k'_`l'= max_`k'_`t'`l' if n==13
		    replace gw_`k'_`l'= mld_`k'_`t'`l' if n==15
		    replace gw_`k'_`l'= Theil_`k'_`t'`l' if n==16
		    replace gw_`k'_`l'= Gini_`k'_`t'`l' if n==17
		    replace gw_`k'_`l'= COV_`k'_`t'`l' if n==18
		    replace gw_`k'_`l'= rmd_`k'_`t'`l' if n==19
		    if "`l'"=="r" replace gw_`k'_`l'= corr_`k'_`t'`l' if n==20
		
		

		}


		foreach k of local i {
			foreach l of local f  {
				replace gw_`k'_`l'=round(gw_`k'_`l',1)

			}
		}

	}
format *_r  %12.0g  

save  "$data_temp/descr_fits_2017_`up_cut'", replace
export excel using "${data_out}\bracket_test_stats_`up_cut'_wealth.xlsx", replace firstrow(variables)

}




*** Graph 
*use  "${data_temp}\brtest_wave_all_comparison_q_final_hh_inc.dta", clear
use "${data_temp}\brtest_wave_all_comparison_q_final_hh_inc_heap.dta", clear
global ca "4 6 8 10 12 14 16 18 20 22 24 26"
global cut "90 91 92 93 94 95 96 97 98 99"


foreach up_cut of global cut{ 

	foreach k of global ca {  

correlate p_hh_inc_`up_cut'_`k'_w hh_inc_w 
scalar corr_`up_cut'_`k'=r(rho)
	}
}

clear

set obs 200
gen cut_off=.
gen categories=.
gen correlation=.

gen n=_n
local i=1
foreach up_cut of global cut{ 
foreach k of global ca {  
	replace cut_off=`up_cut' if n==`i'
	replace categories=`k' if n==`i'
	replace correlation=corr_`up_cut'_`k' if n==`i'
	local i=`i'+1
	}
}

gen cor_b=.
forvalues w=70(5)95 {
local i=(`w'+5)/100
local c=`w'/100
replace cor_b=`c' if correlation>=`c' & correlation<`i'
} 

replace cor_b=69 if correlation<0.7

drop if cut_off==90
drop if cut_off==.
heatplot correlation cut_off categories, backfill colors(plasma) ylabel(90(1)99) xlabel(4(1)26) discrete(2) 
graph export "$data_graph\corr_heat_inc_annual.pdf", replace

cuts(.7(.02)1)


heatplot cor_b cut_off categories, backfill colors(plasma) ylabel(90(1)99) xlabel(4(1)26) discrete(2)
graph export "$data_graph\corr_heat.pdf", replace




hexplot correlation cut_off categories, backfill colors(plasma) ylabel(84(2)98) xlabel(4(2)26) discrete(2) 
graph export "$data_graph\corr_hex.pdf", replace



hexplot correlation cut_off categories, backfill colors(plasma) ylabel(84(2)98)  xlabel(4(2)26) 

gen r_correlation=round(correlation, 0.00)
hexplot r_correlation cut_off categories, backfill colors(plasma) ylabel(84(2)98)  xlabel(0(2)30)


//check windsor

gen d_diff=0
replace d_diff=1 if  hh_inc!=hh_inc_w
