******+ Category check for 2017 ***********************************************
*** Extra analysis

local sampleslit=5

***wealth*****
do "$do_path\Check_gbgfit_new_waves\02_b_mciblg.do"

global ca "4 10"
global cut "98"

 scalar drop _all

use hid cid syear w010ha using  "${data_in}\hwealth.dta", clear

keep if syear==2017
gen gro_wealth = w010ha if w010ha>0 & w010ha!=.


gen ra=runiform()
sort ra
gen n=_n
sum n 
scalar sc_r_N=r(N)/`sampleslit'
drop if n>sc_r_N 
drop n ra

// brackets arranged by quantiles 
/* top brackets starts at 95 percentile (check sensibility here later) */
xtile x_gro_wealth= gro_wealth,  nquantiles(500) 

foreach t of global cut{ 
	//algorthim which creates  4 to 26 brackets based on 500 quantiles. 
	forvalues b=3/25 {

			local d=`b'
			local p=`b'+1
			gen gro_wealth_cat_`t'_`p' = .
			//(leaves room for 1 quantile)
			local s=`t'-1 
			scalar br_help_c=`t'/`b'*5
			scalar br_help=round(br_help_c,1)
			di br_help
		forvalues i=1/`b' {
			local a=`i'-1
			if `d'==`b' scalar br_`d'=`t'*5
			if `d'<`b' scalar br_`d'=`t'*5-`a'*br_help
			di br_`d'
			local d=`d'-1
		}
		
		forvalues i=1/`b' {
			//
			qui sum gro_wealth if x_gro_wealth==br_`i'
			scalar z1_`i'_gw_`t'_`p'=r(max)
				if r(max)==. {  	//check for missing categories: in some cases, the algorithm above picks empty quantiles. So I use one before or after
					forvalues c=1(1)5 {
						scalar br_miss=br_`i'+`c'
						qui sum gro_wealth  if x_gro_wealth==br_miss 
						scalar z1_`i'_gw_`t'_`p'=r(max)
						di "z1_`i'_gw_`t'_`p' " z1_`i'_gw_`t'_`p'
						if r(max)!=. {
							di "solved +`c', cut of at " br_miss " instead of " br_`i'
						}
						// if not solved, check c quantile(s) before 
						else { 
							scalar br_miss=br_`i'-`c'
							qui sum gro_wealth  if x_gro_wealth==br_miss 
							scalar z1_`i'_gw_`t'_`p'=r(max)
							if r(max)!=. {
								di "solved -`c', cut of at " br_miss " instead of " br_`i'
							} 
						}
						// Found a quantile? if so, stop! If not, continue.
						qui sum gro_wealth  if x_gro_wealth==br_miss 
						if r(max)!=. continue, break
					}
				}
			//defining second break	
			scalar z2_`i'_gw_`t'_`p'=z1_`i'_gw_`t'_`p'-0.01
			di "Ober z1_`i'_gw_`t'_`p' "  z1_`i'_gw_`t'_`p'  " Unter z2_`i'_gw_`t'_`p' " z2_`i'_gw_`t'_`p'
			scalar drop	br_`i'

		}


		/*forvalues i=1/`b' {	
			qui sum gro_wealth if x_gro_wealth==br_`i'
			scalar z1_`i'_gw_`t'_`p'=r(max)

		} */

	}

	di `unsolved'

		//including infos into wealth category variables

	forvalues b=3/25 {
			local p=`b'+1
			replace gro_wealth_cat_`t'_`p' = `p' if gro_wealth >= z1_`b'_gw_`t'_`p' & gro_wealth != . & gro_wealth_cat_`t'_`p' ==.	
			forvalues i=1/`b' {
			local a=`i'-1
			if `i'==1 				replace gro_wealth_cat_`t'_`p' = `i' if inrange(gro_wealth, 0, z2_`i'_gw_`t'_`p') & gro_wealth_cat_`t'_`p' ==.	
			if `i'>=2 			 	replace gro_wealth_cat_`t'_`p' = `i' if inrange(gro_wealth, z1_`a'_gw_`t'_`p', z2_`i'_gw_`t'_`p')	& gro_wealth_cat_`t'_`p' ==.		
		}
	}




	//create a dummy
	forvalues b=3/25 {

		local p=`b'+1


		gen d_cat_gro_wealth_`t'_`p'=0
		replace d_cat_gro_wealth_`t'_`p'=1 if gro_wealth_cat_`t'_`p'>=0 & gro_wealth_cat_`t'_`p'!=.
	}
		
}
*
/* //check if all categories have been generated
foreach t of global cut{ 
	forvalues p=5/26 {

		sum gro_wealth_cat_`t'_`p'
		local i=r(mean)
		local b=`p'-1
		sum gro_wealth_cat_`t'_`b'
		local k=`i'-r(mean)
		if `k'<0 global check "$check, `t'_`p'"
	}
	local i=0
	local k=0
}
di "$check"

macro drop check
*/

save   "${data_temp}\hh_wealth_reduced_`sampleslit'.dta", replace


// Get ready for mciblg
foreach t of global cut{ 

	foreach p of global ca {  
			local a=`p'-1
			preserve
			collapse (count) obs=d_cat_gro_wealth_`t'_`p', by(gro_wealth_cat_`t'_`p')

			g n=_n
			drop if gro_wealth_cat_`t'_`p'==.

			gen  z1=0

			replace z1=0  if n==1
			forvalues b=1/`a' {
			local n=`b'+1
				replace z1=z1_`b'_gw_`t'_`p' if n==`n'

			}

			gen z2=.
		forvalues b=1/`a' {
				replace z2=z1_`b'_gw_`t'_`p' if n==`b'
			
		}

		mciblg obs z1 z2,  twopoint saving(brtest_w2017_gro_wealth_`t'_`p'_q_reduced_`sampleslit'.dta) replace   
		di `p'
		restore
	}

}




******+ Category check for 2017 ***********************************************
*****income**********
do "$do_path\Check_gbgfit_new_waves\02_c_mciblg.do"
local sampleslit=5


use hid cid syear hgi1hinc using  "${data_in}\hgen.dta", clear


keep if syear==2017
duplicates drop hid, force

gen hh_inc = hgi1hinc if hgi1hinc>0 & hgi1hinc!=.

gen ra=runiform()
sort ra
gen n=_n
sum n 
scalar sc_r_N=r(N)/`sampleslit'
drop if n>sc_r_N 
drop n ra

// calculate the median
sum hh_inc, d

// brackets arranged by quantiles 
/* top brackets starts at 95 percentile (check sensibility here later) */
xtile x_hh_inc= hh_inc,  nquantiles(500) 



foreach t of global cut{ 

	forvalues b=3/25 {

		local d=`b'
		local p=`b'+1
		gen hh_inc_cat_`t'_`p' = .
		//(leaves room for 1 quantile)
		local s=`t'-1 
		scalar br_help_c=`t'/`b'*5
		scalar br_help=round(br_help_c,1)
		di br_help
		forvalues i=1/`b' {
			local a=`i'-1
			if `d'==`b' scalar br_`d'=`t'*5
			if `d'<`b' scalar br_`d'=`t'*5-`a'*br_help
			di br_`d'
			local d=`d'-1
		}
		
		forvalues i=1/`b' {
			//if quantiles	forvalues l {}
			qui sum hh_inc if x_hh_inc==br_`i'
			scalar z1_`i'_inc_`t'_`p'=r(max)
				if r(max)==. {  	//check for missing categories: in rare cases, the algorithm above picks empty quantiles. So I use one before or after
					forvalues c=1(1)10 {
						scalar br_miss=br_`i'+`c'
						qui sum hh_inc if x_hh_inc==br_miss 
						scalar z1_`i'_inc_`t'_`p'=r(max)
						di "z1_`i'_inc_`t'_`p' " z1_`i'_inc_`t'_`p'
						if r(max)!=. {
							di "solved +`c', cut of at " br_miss " instead of " br_`i'
						} 
						else {
							scalar br_miss=br_`i'-`c'
							qui sum hh_inc if x_hh_inc==br_miss
							scalar z1_`i'_inc_`t'_`p'=r(max)
							if r(max)!=. {
								di "solved -`c', cut of at " br_miss " instead of " br_`i'
							} 
						}
						// Found a quantile? Check!
						qui sum hh_inc if x_hh_inc==br_miss
						if r(max)!=. continue, break
					}
				}

			scalar z2_`i'_inc_`t'_`p'=z1_`i'_inc_`t'_`p'-0.01
			di "Ober z1_`i'_inc_`t'_`p' "  z1_`i'_inc_`t'_`p'  " Unter z2_`i'_inc_`t'_`p' " z2_`i'_inc_`t'_`p'
			scalar drop	br_`i'
		}


		/*forvalues i=1/`b' {	
			qui sum hh_inc if x_hh_inc==br_`i'
			scalar z1_`i'_inc_`t'_`p'=r(max)

		} */

	}

	di `unsolved'


	forvalues b=3/25 {
			local p=`b'+1
			replace hh_inc_cat_`t'_`p' = `p' if hh_inc >= z1_`b'_inc_`t'_`p' & hh_inc != . & hh_inc_cat_`t'_`p' ==.	
			forvalues i=1/`b' {
			local a=`i'-1
			if `i'==1 				replace hh_inc_cat_`t'_`p' = `i' if inrange(hh_inc, 0, z2_`i'_inc_`t'_`p') & hh_inc_cat_`t'_`p' ==.	
			if `i'>=2 			 	replace hh_inc_cat_`t'_`p' = `i' if inrange(hh_inc, z1_`a'_inc_`t'_`p', z2_`i'_inc_`t'_`p')	& hh_inc_cat_`t'_`p' ==.		
		}
	}





	forvalues b=3/25 {

		local p=`b'+1


		gen d_cat_hh_inc_`t'_`p'=0
		replace d_cat_hh_inc_`t'_`p'=1 if hh_inc_cat_`t'_`p'>=0 & hh_inc_cat_`t'_`p'!=.
	}
		
}
*

save   "${data_temp}\hh_data_inc_reduced_`sampleslit'.dta", replace

/*check 
foreach t of global cut{ 
	forvalues p=5/26 {

		sum hh_inc_cat_`t'_`p'
		local i=r(mean)
		local b=`p'-1
		sum hh_inc_cat_`t'_`b'
		local k=`i'-r(mean)
		if `k'<0 global check "$check, `t'_`p'"
	}
	local i=0
	local k=0
}
di "$check"

macro drop check
*/


foreach t of global cut{ 

	foreach p of global ca {  
			local a=`p'-1
			preserve
			collapse (count) obs=d_cat_hh_inc_`t'_`p', by(hh_inc_cat_`t'_`p')

			g n=_n
			drop if hh_inc_cat_`t'_`p'==.

			gen  z1=0

			replace z1=0  if n==1
			forvalues b=1/`a' {
			local n=`b'+1
				replace z1=z1_`b'_inc_`t'_`p' if n==`n'

			}

			gen z2=.
		forvalues b=1/`a' {
				replace z2=z1_`b'_inc_`t'_`p' if n==`b'
			
		}

		mciblg obs z1 z2,  twopoint saving(brtest_w2017_hh_inc_`t'_`p'_q_reduced_`sampleslit'.dta) replace   
		di `p'
		restore
	}

}




