******+ Category check for 2017 ***********************************************
***gb2 on income for Appendix (Simulation)
do "$do_path\Check_gbgfit_new_waves\02_c_mciblg.do"
set seed 220520
global ca "4 6 8 10 12 14 16 18 20 22 24 26"
global cut "90 91 92 93 94 95 96 97 98 99"

scalar drop _all

*use hid cid syear hgi1hinc using  "${data_in}\hgen.dta", clear
use hid cid syear i11102 using  "${data_in}\pequiv.dta", clear

keep if syear==2017
duplicates drop hid, force


*gen hh_inc = hgi1hinc if hgi1hinc>0 & hgi1hinc!=.
gen hh_inc = i11102 if i11102>0 & i11102!=.
// calculate gb2
drop if hh_inc==.
sum hh_inc

	// calculate the median
	sum hh_inc, d

	// brackets arranged by quantiles 
	/* top brackets starts at 95 percentile (check sensibility here later) */
	xtile x_hh_inc= hh_inc,  nquantiles(500) 




foreach t of global cut{ 

			forvalues b=3/25 {

				local d=`b'
				local p=`b'+1
				gen hh_inc_cat_`t'_`p' = .
				//(leaves room for 1 quantile)
				local s=`t'-1 
				local w=`t'*5
				sum hh_inc if x_hh_inc==`w'
				local min=r(min)
				scalar br_help_c=`min'/`b'
				scalar br_help=round(br_help_c,1)
				di br_help

				forvalues i=1/`b' {
					local a=`i'-1
					if `d'==`b' scalar br_`d'=`min'
					if `d'<`b' scalar br_`d'=`min'-`a'*br_help
					di  br_`d'
					local d=`d'-1
				}
				
				forvalues i=1/`b' {
					//if quantiles	forvalues l {}
					/*qui sum hh_inc if x_hh_inc==br_`i'
					r(max)
						if r(max)==. {  	//check for missing categories: in rare cases, the algorithm above picks empty quantiles. So I use one before or after
							forvalues c=1(1)10 {
								scalar br_miss=br_`i'+`c'
								qui sum hh_inc if x_hh_inc==br_miss 
								scalar z1_`i'_gw_`t'_`p'_b=r(max)
								di "z1_`i'_gw_`t'_`p'_b " z1_`i'_gw_`t'_`p'_b
								if r(max)!=. {
									di "solved +`c', cut of at " br_miss " instead of " br_`i'
								} 
								else {
									scalar br_miss=br_`i'-`c'
									qui sum hh_inc if x_hh_inc==br_miss
									scalar z1_`i'_gw_`t'_`p'_b=r(max)
									if r(max)!=. {
										di "solved -`c', cut of at " br_miss " instead of " br_`i'
									} 
								}
								// Found a quantile? Check!
								qui sum hh_inc if x_hh_inc==br_miss
								if r(max)!=. continue, break
							}
						} */
					scalar z1_`i'_gw_`t'_`p'_b=br_`i'
					scalar z2_`i'_gw_`t'_`p'_b=z1_`i'_gw_`t'_`p'_b-0.01
					di "Ober z1_`i'_gw_`t'_`p' "  z1_`i'_gw_`t'_`p'_b " Unter z2_`i'_gw_`t'_`p' " z2_`i'_gw_`t'_`p'_b
					scalar drop	br_`i'
				}

				/*forvalues i=1/`b' {	
					qui sum hh_inc if x_hh_inc==br_`i'
					scalar z1_`i'_gw_`t'_`p'=r(max)

				} */

			}

			di `unsolved'


			forvalues b=3/25 {
					local p=`b'+1
					replace hh_inc_cat_`t'_`p' = `p' if hh_inc >= z1_`b'_gw_`t'_`p'_b & hh_inc != . & hh_inc_cat_`t'_`p' ==.	
					forvalues i=1/`b' {
					local a=`i'-1
					if `i'==1 				replace hh_inc_cat_`t'_`p' = `i' if inrange(hh_inc, 0, z2_`i'_gw_`t'_`p'_b) & hh_inc_cat_`t'_`p' ==.	
					if `i'>=2 			 	replace hh_inc_cat_`t'_`p' = `i' if inrange(hh_inc, z1_`a'_gw_`t'_`p'_b, z2_`i'_gw_`t'_`p'_b)	& hh_inc_cat_`t'_`p' ==.		
				}
			}





			forvalues b=3/25 {

				local p=`b'+1


				gen d_cat_hh_inc_`t'_`p'=0
				replace d_cat_hh_inc_`t'_`p'=1 if hh_inc_cat_`t'_`p'>=0 & hh_inc_cat_`t'_`p'!=.
			}
			
}
*
	
save   "${data_temp}\hh_data_inchh_data_inc_b.dta.dta", replace


*** MCIB, takes 1 1/2 weeks

		/*check 
		foreach t of global cut{ 
			forvalues p=5/26 {

				sum hh_inc_cat_`t'_`p'
				local i=r(mean)
				local b=`p'-1
				sum hh_inc_cat_`t'_`b'
				local k=`i'-r(mean)
				if `k'<0 global check "$check, `t'_`p'"
			}
			local i=0
			local k=0
		}
		di "$check"

		macro drop check
		*/

use "${data_temp}\hh_data_inchh_data_inc_b.dta.dta", clear

		foreach t of global cut{ 

			foreach p of global ca {  
					local a=`p'-1
					preserve
					collapse (count) obs=d_cat_hh_inc_`t'_`p', by(hh_inc_cat_`t'_`p')

					g n=_n
					drop if hh_inc_cat_`t'_`p'==.

					gen  z1=0

					replace z1=0  if n==1
					forvalues b=1/`a' {
					local n=`b'+1
						replace z1=z1_`b'_gw_`t'_`p'_b if n==`n'

					}

					gen z2=.
				forvalues b=1/`a' {
						replace z2=z1_`b'_gw_`t'_`p'_b if n==`b'
					
				}

				mciblg obs z1 z2,  twopoint saving(brtest_w2017_hh_inc_`t'_`p'_q_b.dta) replace   

				di `p'
				restore
			}

		}





	************** bracket test: Data Quantiles **************************************************
	***income*****
	global ca "4 6 8 10 12 14 16 18 20 22 24 26"
	global cut "90 91 92 93 94 95 96 97 98 99"


	foreach t of global cut { 
		foreach k of global ca {  

			use "C:/Users/$cdpath/InterVerm/wealth_1988/cd_path/brtest_w2017_hh_inc_`t'_`k'_q_b.dta", clear

			forvalues d=7500(1)19999 {
			drop p`d'

			}

			keep p* ID

			reshape long p, i(ID)


			save  "${data_temp}/brtest_mcib_`t'_`k'_reshape_1_q_hh_inc_b.dta", replace 


			// reshape command does not work for >12000 vars - split sample
			use "C:/Users/$cdpath/InterVerm/wealth_1988/cd_path/brtest_w2017_hh_inc_`t'_`k'_q_b.dta", clear

			forvalues d=1(1)7499 {
			drop p`d'

			}

			forvalues d=15000(1)19999 {
			drop p`d'

			}


			keep p* ID

			reshape long p, i(ID)

			save  "${data_temp}/brtest_mcib_`t'_`k'_reshape_2_q_hh_inc_b.dta", replace 


			use "C:/Users/$cdpath/InterVerm/wealth_1988/cd_path/brtest_w2017_hh_inc_`t'_`k'_q_b.dta", clear

			forvalues d=1(1)14999 {
			drop p`d'

			}


			keep p* ID

			reshape long p, i(ID)

			save  "${data_temp}/brtest_mcib_`t'_`k'_reshape_3_q_hh_inc_b.dta", replace 


			clear
			append using "${data_temp}/brtest_mcib_`t'_`k'_reshape_1_q_hh_inc_b.dta"
			append using "${data_temp}/brtest_mcib_`t'_`k'_reshape_2_q_hh_inc_b.dta"
			append using "${data_temp}/brtest_mcib_`t'_`k'_reshape_3_q_hh_inc_b.dta"

			save  "${data_temp}/brtest_mcib_`t'_`k'_reshape_q_hh_inc_b.dta", replace 

		}
	}




	**** adjust reshaped data

	foreach t of global cut{ 
			
		foreach k of global ca {  
			
			use  "${data_temp}/brtest_mcib_`t'_`k'_reshape_q_hh_inc_b.dta", clear

			rename p p_hh_inc_`t'_`k'



			forvalues b=3(2)25 {
					local p=`b'+1
					gen hh_inc_cat_`t'_`p'=.
					replace hh_inc_cat_`t'_`p' = `p' if p_hh_inc_`t'_`k' >= z1_`b'_gw_`t'_`p'_b & p_hh_inc_`t'_`k' != .
					forvalues i=1/`b' {
						local a=`i'-1
						if `i'==1 				replace hh_inc_cat_`t'_`p' = `i' if inrange(p_hh_inc_`t'_`k', 0, z2_`i'_gw_`t'_`p'_b)
						if `i'>=2 			 	replace hh_inc_cat_`t'_`p' = `i' if inrange(p_hh_inc_`t'_`k', z1_`a'_gw_`t'_`p'_b, z2_`i'_gw_`t'_`p'_b)				
					}
			}


			forvalues v=1/`k' {

				preserve
					keep if hh_inc_cat_`t'_`k'==`v' 

					set seed 220520
					gen ra=runiform()

					sort ra
					gen m_x=_n
				

					drop ra _j
					save "$data_temp/brtest_rand_m_`t'_`k'_`v'_q_hh_inc_b", replace

				restore
			}


			clear 
			forvalues v=1/`k' {

				append using "$data_temp/brtest_rand_m_`t'_`k'_`v'_q_hh_inc_b"

				
			}

				
			save "$data_temp/brtest_rand_m_`t'_`k'_q_hh_inc_b", replace

		}
		 
	}
	*


	use  "${data_temp}/hh_data_inc.dta", clear
	foreach t of global cut{ 
		set seed 220520
				gen ra=runiform()

				sort ra
				
				drop ra
				
				

		foreach k of global ca {  

				bysort hh_inc_cat_`t'_`k': gen m_x=_n

				merge 1:1 hh_inc_cat_`t'_`k' m_x using  "$data_temp/brtest_rand_m_`t'_`k'_q_hh_inc_b", keep(1 3) nogen

				drop m_x
				

			}
	}

		*windzoriz
			
		winsor2 hh_inc, cuts(0.5 99.5) by(syear)

	foreach t of global cut{ 

		foreach k of global ca {  	
			winsor2 p_hh_inc_`t'_`k', cuts(0.5 99.5)  by(syear)
			/* by hand
			sort `k' 
			sum `k' 
			gen F_`k'=_n/r(N)*100 if `k'!=.
			sort p_`k' 
			sum p_`k' 
			gen F_p_`k'=_n/r(N)*100 if p_`k'!=. */
			

		}

	}
	*keep cid syear hid prime_prop* fin_assets* tan_assets* all_debt* hh_inc* busin_assets* p_*



	save  "${data_temp}/brtest_wave_all_comparison_q_final_hh_inc_b.dta", replace 





forvalues r=1(1)4{ 

	*** Graph 

	use "${data_temp}/brtest_wave_all_comparison_q_final_hh_inc_b.dta", clear
	global ca "4 6 8 10 12 14 16 18 20 22 24 26"
global cut "90 91 92 93 94 95 96 97 98 99"


foreach up_cut of global cut{ 

	foreach k of global ca {  

correlate p_hh_inc_`up_cut'_`k'_w hh_inc_w 
scalar corr_`up_cut'_`k'=r(rho)
	}
}

clear

set obs 200
gen cut_off=.
gen categories=.
gen correlation=.

gen n=_n
local i=1
foreach up_cut of global cut{ 
foreach k of global ca {  
	replace cut_off=`up_cut' if n==`i'
	replace categories=`k' if n==`i'
	replace correlation=corr_`up_cut'_`k' if n==`i'
	local i=`i'+1
	}
}

gen cor_b=.
forvalues w=70(5)95 {
local i=(`w'+5)/100
local c=`w'/100
replace cor_b=`c' if correlation>=`c' & correlation<`i'
} 

*replace cor_b=69 if correlation<0.7

drop if cut_off==90
drop if cut_off==.

help graph
heatplot correlation cut_off categories, backfill colors(plasma, reverse)   ylabel(90(1)99) xlabel(4(2)26) discrete(2)  cuts(.7(.02)1) xtitle("brackets") ytitle("upper cut-off (percentile)") plotregion(fcolor(white)) graphregion(fcolor(white))
	graph export "$data_graph/corr_heat_inc_annual_b.pdf", replace
}


	heatplot cor_b cut_off categories, backfill colors(plasma) ylabel(90(1)99) xlabel(4(1)26) discrete(2)
	graph export "$data_graph/corr_heat_b.pdf", replace




	hexplot correlation cut_off categories, backfill colors(plasma) ylabel(84(2)98) xlabel(4(2)26) discrete(2) 
	graph export "$data_graph/corr_hex_b.pdf", replace


	hexplot correlation cut_off categories, backfill colors(plasma) ylabel(84(2)98)  xlabel(4(2)26) 

	gen r_correlation=round(correlation, 0.00)
	hexplot r_correlation cut_off categories, backfill colors(plasma) ylabel(84(2)98)  xlabel(0(2)30)


	//check windsor

	gen d_diff=0
	replace d_diff=1 if  hh_inc!=hh_inc_w


	
	
