****This file prepares the administrative data and computes the county totals from the administrative data that provide the measure of truth for the MSE calculations in the paper. Unfortunately, the data are confidential and can only be accessed by writing a proposal to the U.S. Census Bureau.
local f_year=2007
local l_year=2012
local create_data no
cap log using logs/01_admin_data.txt, text replace

label define cty 1 "Albany" 3 "Allegany" 5 "Bronx" 7 "Broome" 9 "Cataraugus" 11 "Cayuga" 13 "Chautauqa" 15 "Chemung" 17 "Chenango" 19 "Clinton" 21 "Columbia" 23 "Cortland" 25 "Delaware" 27 "Dutchess" 29 "Erie" 31 "Essex" 33 "Franklin" 35 "Fulton" 37 "Genesee" 39 "Greene" 41 "Hamilton" 43 "Herkimer" 45 "Jefferson" 47 "Kings" 49 "Lewis" 51 "Livingston" 53 "Madison" 55 "Monroe" 57 "Montgomery" 59 "Nassau" 61 "NewYork" 63 "Niagara" 65 "Oneida" 67 "Onondaga" 69 "Ontario" 71 "Orange" 73 "Orleans" 75 "Oswego" 77 "Otsego" 79 "Putnam" 81 "Queens" 83 "Rensselaer" 85 "Richmond" 87 "Rockland" 89 "StLawrence" 91 "Saratoga" 93 "Schenectady" 95 "Schoharie" 97 "Schuyler" 99 "Seneca" 101 "Steuben" 103 "Suffolk" 105 "Sullivan" 107 "Tioga" 109 "Tompkins" 111 "Ulster" 113 "Warren" 115 "Washington" 117 "Wayne" 119 "Westchester" 121 "Wyoming" 123 "Yates"

foreach year of numlist `f_year'/`l_year' {
	if "`create_data'"=="yes" {
		use pik maf_curcounty tgr_stand_orig_county report_month universe_cd birth_dt sex_cd maf_curstate tgr_stand_orig_state district_cd city_addr if universe_cd=="F" using ../data/ny_admin`year'.dta
		gen int month=real(substr(report_month,1,2))
		destring maf_curcounty tgr_stand_orig_county maf_curstate tgr_stand_orig_state district_cd, replace
		ren birth_dt birth_dt_o
		gen birth_dt=date(birth_dt_o,"MDY")
		drop birth_dt_o
		replace maf_curstate=tgr_stand_orig_state if maf_curstate==. & tgr_stand_orig_state!=.
		gen receipt=1
		qui: tab receipt
		local n_orig=`r(N)'
		qui: tab receipt if pik==""
		di "removing observations without a pik, since we cannot infer the number of people from the number of months of receipt. Losing `r(N)' number of months out of `n_orig' in `year'"
		drop if pik==""
		qui: tab receipt if maf_curstate!=36
		di "removing months of receipt from out of state (cannot allocate them to counties and removing them makes sure people who are in NY in any month get allocated to NY). Losing `r(N)' number of months out of `n_orig' in `year'"
		drop if maf_curstate!=36 & maf_curstate!=.
		*combine county IDs (have a few missing values)
		replace maf_curcounty=tgr_stand_orig_county if maf_curcounty==. & tgr_stand_orig_county!=.
		merge m:1 district_cd using data/district_cty_xwalk.dta
		replace maf_curcounty=mode_cty if maf_curcounty==.
		replace maf_curcounty=5 if maf_curcounty==. & district_cd==66 & (strmatch(city_addr,"*BRONX*") | strmatch(city_addr,"B*X") | strmatch(city_addr,"*NX") | strmatch(city_addr,"BNRON*"))
		replace maf_curcounty=47 if maf_curcounty==. & district_cd==66 & (strmatch(city_addr,"*BROOKLYN*") | strmatch(city_addr,"B*K*Y*") | strmatch(city_addr,"BR*Y*") | strmatch(city_addr,"B*Y*N*") | strmatch(city_addr,"BROOK**"))
		replace maf_curcounty=85 if maf_curcounty==. & district_cd==66 & (strmatch(city_addr,"*ST*ISL*") | strmatch(city_addr,"*S*SLAND*") | strmatch(city_addr,"*TEN*I*ND*") | strmatch(city_addr,"SI"))
		replace maf_curcounty=61 if maf_curcounty==. & district_cd==66 & (strmatch(city_addr,"N*Y*") | strmatch(city_addr,"MA*N"))
		replace maf_curcounty=81 if maf_curcounty==. & district_cd==66 & (strmatch(city_addr,"Q*N*"))
		replace maf_curcounty=81 if maf_curcounty==. & district_cd==66 & (strmatch(city_addr,"AST*") | strmatch(city_addr,"AR*NE") | strmatch(city_addr,"BELL*R*") | strmatch(city_addr,"*BAYSIDE*") | strmatch(city_addr,"BRI*WOOD*") | strmatch(city_addr,"CAM*RIA*") | strmatch(city_addr,"COLL*P*")  | strmatch(city_addr,"CO*NA*") | strmatch(city_addr,"*ELMHUR*") | strmatch(city_addr,"*R*C*K*WAY*") | strmatch(city_addr,"FAR*") | strmatch(city_addr,"*FLUSHIN*") | strmatch(city_addr,"FLUSH*G") | strmatch(city_addr,"FOREST*H*") | strmatch(city_addr,"FOR*HILL*") | strmatch(city_addr,"FRESH*") | strmatch(city_addr,"GLEND*") | strmatch(city_addr,"HOLLIS*") | strmatch(city_addr,"HOWARD*") | strmatch(city_addr,"AST*") | strmatch(city_addr,"JACKSON*") | strmatch(city_addr,"JAC*H*G*") | strmatch(city_addr,"JAM*CA*") | strmatch(city_addr,"KEW*") | strmatch(city_addr,"L*I*C")  | strmatch(city_addr,"L*I*CITY") | strmatch(city_addr,"LONG*") | strmatch(city_addr,"LAUR*TON*") | strmatch(city_addr,"MAS*ETH")  | strmatch(city_addr,"MIDDLE*") | strmatch(city_addr,"OZ*NE*") | strmatch(city_addr,"*OZONE*") | strmatch(city_addr,"*OZ*PARK") | strmatch(city_addr,"REGO*") | strmatch(city_addr,"*RICH*HILL*") | strmatch(city_addr,"RIDGEW*") | strmatch(city_addr,"R*WOOD") | strmatch(city_addr,"LAUR*TON*") | strmatch(city_addr,"ROSE*LE*") | strmatch(city_addr,"*ALBANS*")| strmatch(city_addr,"*SPRINGF*L*D*") | strmatch(city_addr,"S*NY*SIDE*") | strmatch(city_addr,"*SUNNY*") | strmatch(city_addr,"WHIT*ST*") | strmatch(city_addr,"WOOD*VEN*") | strmatch(city_addr,"WOODS*") | strmatch(city_addr,"WO*SIDE"))
		*pool Herkimer and Hamilton County
		recode maf_curcounty (41=43)
		tab maf_curcounty, mis
		drop universe_cd report_month maf_curstate tgr_stand_orig_state tgr_stand_orig_county mode_cty _merge district_cd
		sort pik month
		foreach var of varlist maf_curcounty birth_dt sex_cd {
			ren `var' `var'_o
			by pik: egen `var'=mode(`var'_o), maxmode
		}
		collapse (min) first_month=month min_maf_curcounty=maf_curcounty_o min_birth_dt=birth_dt_o (max) max_maf_curcounty=maf_curcounty_o  max_birth_dt=birth_dt_o last_month=month (count) nmonth=receipt (first) maf_curcounty birth_dt sex_cd receipt, by(pik) fast
		gen year=`year'
		compress
		**check discrepancy between min, max and mode measures and drop them
		foreach var in maf_curcounty birth_dt {
			qui: tab receipt if min_`var'!=max_`var'
			di "Number of individuals for whom `var' changes, `year': `r(N)'"
			drop min_`var' max_`var'	
			qui: tab receipt if `var'==.
			di "Number of individuals for whom `var' is missing, `year': `r(N)'"	
		}
		label val maf_curcounty cty
		
		*save yearly data file
		save data/recipients_`year'.dta, replace
	} 
	else use data/recipients_`year'

	***get county totals from admin data: # of recipients, # of female recipients, # of elderly recipients
	tab maf_curcounty, matcell(number) matrow(countyid)
	mat def tot_rec_all=(nullmat(tot_rec_all)\number)
	*female recipients
	tab maf_curcounty if sex_cd=="F", matcell(number)
	mat def tot_rec_female=(nullmat(tot_rec_female)\number)
	*elderly recipients
	tab maf_curcounty if date("3112`year'","DMY")-birth_dt>60*365.25, matcell(number)
	mat def tot_rec_eld=(nullmat(tot_rec_eld)\number)

	drop _all
}

**export yearly numbers to spreadsheet
*county names and ids
label define cty 1 "Albany" 3 "Allegany" 5 "Bronx" 7 "Broome" 9 "Cataraugus" 11 "Cayuga" 13 "Chautauqa" 15 "Chemung" 17 "Chenango" 19 "Clinton" 21 "Columbia" 23 "Cortland" 25 "Delaware" 27 "Dutchess" 29 "Erie" 31 "Essex" 33 "Franklin" 35 "Fulton" 37 "Genesee" 39 "Greene" 43 "Herkimer and Hamilton" 45 "Jefferson" 47 "Kings" 49 "Lewis" 51 "Livingston" 53 "Madison" 55 "Monroe" 57 "Montgomery" 59 "Nassau" 61 "NewYork" 63 "Niagara" 65 "Oneida" 67 "Onondaga" 69 "Ontario" 71 "Orange" 73 "Orleans" 75 "Oswego" 77 "Otsego" 79 "Putnam" 81 "Queens" 83 "Rensselaer" 85 "Richmond" 87 "Rockland" 89 "StLawrence" 91 "Saratoga" 93 "Schenectady" 95 "Schoharie" 97 "Schuyler" 99 "Seneca" 101 "Steuben" 103 "Suffolk" 105 "Sullivan" 107 "Tioga" 109 "Tompkins" 111 "Ulster" 113 "Warren" 115 "Washington" 117 "Wayne" 119 "Westchester" 121 "Wyoming" 123 "Yates", replace

*generate year and cty variable
foreach year of numlist `f_year'/`l_year' {
	mat def repcountyid=(nullmat(repcountyid)\countyid)
	mat def years=(nullmat(years)\J(rowsof(countyid),1,`year'))
}
svmat int repcountyid
label val repcountyid1 cty
ren repcountyid1 cty
decode cty, generate(county_name)
svmat int years
ren years1 year
sort year cty
numlist "`f_year'/`l_year'"
local yr_list="`r(numlist)'"
foreach mtrx in all female eld {
	svmat tot_rec_`mtrx', names(tot_rec_`mtrx'_ad)
	ren tot_rec_`mtrx'_ad1 tot_rec_`mtrx'_ad
	label var tot_rec_`mtrx'_ad "Admin Data, `mtrx'"
	export excel year cty county_name year tot_rec_`mtrx'* using results/small_area.xlsx, sh("`mtrx'_ad") sheetreplace firstrow(var) nol
	capture noisily merge 1:1 year cty using data/est_`mtrx'.dta, update
	capture drop _merge
	save data/est_`mtrx'.dta, replace
	keep cty county_name year
}
clear all
cap log close


