global cy 21 // current year of data MODIFY
global data C:\Other-Surveys\SHP // where original data is stored
global home "M:\SHP\Research\2017 Longitudinal income imputation in the SHP\" // home directory
global docs "${home}Docs\" // where xlsx tables are written to
*global temp c:\temp\ //  for home office
global SILC "N:\Team-Ursina\imputation_fe\silc_2016\" // This data contains income variables from the population register and is not allowed to be made publicly available
cd M:\Projects\CNEF-Imputation

global soc nbadult nbkid urban NW_Europe_US_AUS mininc_endsmeet sathhfinances housesubsid computer car hhgetsintodebt hhfinanceseasy married swiss partner eduyears parent_high male paymentarrears noisy dentist pillar3 resto invfriend holiday rooms owner hhcansave hhspendswhatitearns hheatssavings unemp retired mini part full // socdemo variables for regression impution (will be used together with job variables)
global job $soc treiman jobchange jobemployerch employerch supervision // job related variables for regression impution

global indepSILC agegr male edu3 swiss NW_Europe_US_AUS nbadult nbkid married big urban owner holiday car computer noisy /*resto dentist*/ mininc_endsmeet referenceperson part full partner // variables included in both SHP and SILC to check missingness meachanism of unit-nonresponse (unr)
global indepSILC_ i.agegr male m.edu3 swiss NW_Europe_US_AUS nbadult nbkid married i.big i.urban owner holiday car computer noisy /*resto dentist*/ mininc_endsmeet referenceperson part full m.partner // for ice imputation

*****************************


use ${SILC}tempSILC, clear // SILC data with  income from register (deciles), not made available!
tab unr inr, cell // frequency actual unr and inr
gen swiss=nationality==1 | nationality==2 | nationality==4 
gen NW_Europe_US_AUS=nationality==3 // proxy for EU, not swiss
cap drop agegr
recode rx010 (min/24=1 "<25")(25/44=2 "25-44")(45/64=3 "45-64")(65/max=4 ">64"), gen(agegr) lab(agegr)
recode urbanity (1/2=1), gen(urban)
rename (minendsmeet noise bigregion_) (mininc_endsmeet noisy bigregion)
su unr i.deciles $indepSILC
ice $indepSILC_, clear // impute few missing values from independent variables
compress
save tempSILC_ice, replace // includes ice imputed independent variables

use tempSILC_ice if _mj==1, clear // ice imputed dataset
rename bigregion bigreg
su inr unr i.deciles $indepSILC
logit unr $indepSILC // MAR unr (all independent variables except income)
est store SILCunrnoincome
logit unr i.deciles $indepSILC // NMAR unr, adding income
est store SILCunr
lrtest SILCunrnoincome SILCunr // UNR mechanism is NMAR (1%)

logit inr $indepSILC // MAR inr (all independent variables except income)
est store SILCinrnoincome
logit inr i.deciles $indepSILC // NMAR inr, adding income
est store SILCinr
lrtest SILCinrnoincome SILCinr // INR mechanism is MAR (11%)


use tempSILC_ice, clear // ice imputed dataset
drop unr
gen byte d=0
append using tempSILC_ice, nolab
keep if _mj==1
recode d (.=1)
replace unr=inr if d==0
drop inr
logit unr i.deciles $indepSILC if d==0 // inr
est sto inr
logit unr i.deciles $indepSILC if d==1 // unr
est sto unr
suest inr unr
hausman inr unr // inr coefficients are different from unr coefficients


*********************************************************
foreach j in 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 { // SHP dataset preparation, each wave
 use idpers idhous`j' relarp`j' age`j' tr1maj`j' p`j'* occupa`j' civsta`j' status`j' nat_1_`j' tr1maj`j' edyear`j' x`j'i04 x`j'i05 using $data\shp`j'_p_user, clear
 merge 1:1 idpers year using $data\shp`j'_p_user, keep(1 3) nogen keepusing(i`j'ptotn i`j'empyn) // ind. income
 merge m:1 idhous`j' using $data\shp`j'_h_user, keepusing(nbpers`j' nbkid`j' h`j'* region`j' com2_`j') nogen keep(3)
 merge 1:1 idpers using $data\shp_lj, keepusing(tr1laj) nogen // adding variables: last job
 merge 1:1 idpers using $data\shp_so, keepusing(p__o17 p__o34) nogen // social origin
 merge 1:1 idpers using $data\shp_mp, keepusing(filter$cy sex) nogen // sample and sex
 keep if (filter<5 | filter==8) & idhous<. // samples kept: SHP I, II, III, IV

 gen byte male=sex==1 if sex>0 & sex<. // variable renaming/re-coding/dummy definition
 gen byte jobchangelastyear=p`j'w18==1 if p`j'w18>0 & p`j'w18<. // only job change
 gen byte employerchangelastyear=p`j'w18==2 if p`j'w18>0 & p`j'w18<. // only employer change
 gen byte jobemployerchangelastyear=p`j'w18==3 if p`j'w18>0 & p`j'w18<. // job and employer change
 gen byte supervision=p`j'w87==1 if p`j'w87<. & p`j'w87>=0 // job involves supervision
 gen byte full=occupa==1 if occupa<. & occupa>0 // full time occupation
 gen byte part=occupa==2 if occupa<. & occupa>0 // part time occupation
 gen byte mini=occupa==3 | occupa==5 | occupa==6 if occupa<. & occupa>0 // mini job
 gen byte retired=occupa==8 if occupa<. & occupa>0
 gen byte retired_aiy=occupa==9 if occupa<. & occupa>0 // early retirement due to health issues
 gen byte unemp=occupa==10 if occupa<. & occupa>0 // unemployed
 gen byte married=civsta==2 if civsta>-1 & civsta<.
 gen byte swiss=nat_1_`j'==8100 if nat_1_`j'<. & nat_1_`j'>0 // first nationality Swiss
 gen byte NW_Europe_US_AUS=nat_1_`j'==8201 | nat_1_`j'==8204 | (nat_1_`j'>=8206 & nat_1_`j'<=8218) | (nat_1_`j'>=8222 & nat_1_`j'<=8229) | (nat_1_`j'>=8234 & nat_1_`j'<=8236) | nat_1_`j'==8439 | nat_1_`j'==8439 | (nat_1_`j'>=8601 & nat_1_`j'<=8607) if nat_1_`j'<. & nat_1_`j'>0 // first nationality  North-Western Europe, US,  Australia
 gen treiman=tr1maj`j' if tr1maj`j'>-1 & tr1maj`j'<. // prestige scale
 cap replace treiman=tr1laj`j' if treiman`j'==. & tr1laj`j'<.
 cap replace treiman=tr1laj_ if treiman`j'==. & tr1laj_<.
 gen byte satfinance=p`j'i01 if p`j'i01>-1 & p`j'i01!=. // personal satisfaction with finances
 gen byte partner=p`j'd29>=1 & p`j'd29<3 if p`j'd29>0 & p`j'd29<. // having a partner
 
 gen byte parent_high=1 if (p__o17>=12 & p__o17<=18) | (p__o34>=12 & p__o34<=18) // father or mother high edu 
 replace parent_high=0 if parent_high==. & ((p__o17>=1 & p__o17<=8) | (p__o34>=1 & p__o34<=8)) // father or mother low edu

 rename (nbkid`j' nbpers`j' age`j' idhous`j' status`j' i`j'ptotn relarp`j' i`j'empyn region`j' edyear`j') (nbkid nbpers age idhous status ptotn relHHhead empyn_orig bigregion eduyears)

 replace ptotn=x`j'i04 if x`j'i04>0 & x`j'i04<. & (ptotn<0 | ptotn==.) // individual total net income
 replace empyn_orig=x`j'i04 if x`j'i05==1 & x`j'i04>0 & x`j'i04<. & (empyn_orig<0 | empyn_orig==.) // original: income from independend employment
 replace empyn_orig=-2 if x`j'i05==1 & x`j'i04==. & (empyn_orig<0 | empyn_orig==.) /* work */
 gen byte INRempyn=empyn_orig==-1 | empyn_orig==-2 // item nonresponse 
 gen byte UNRempyn=(status==1 | status==2) & (full==1 | part==1 | mini==1) & p`j'i50!=2 & (empyn==. | empyn<0) & empyn!=-4 // unit nonresponse
* br idp status age occupa empyn_orig p`j'i50 x* UNRempyn INRempyn

 gen byte urban=com2_`j'==1 if com2_`j'>=0 & com2_`j'<. // urban/city 
 gen byte nbadults=nbpers-nbkid if nbpers<. & nbkid<. // number adults in HH
 gen byte rooms=h`j'h20 if h`j'h20>0 & h`j'h20<. // number rooms
 gen byte holiday=h`j'i06==1 if h`j'i06>0 & h`j'i06<. // variables holiday-noisy: Deprivation variables
 gen byte invfriend=h`j'i08==1 if h`j'i08>0 & h`j'i08<. // can afford inviting friends
 gen byte resto=h`j'i10==1 if h`j'i10>0 & h`j'i10<. // can afford going in restaurants
 gen byte car=h`j'i12==1 if h`j'i12>0 & h`j'i12<. // can afford car
 cap gen byte computer=h`j'i26==1 if h`j'i26>0 & h`j'i26<. // can afford computer
 cap gen byte savings=h`j'i20ac==1 if h`j'i20ac>0 & h`j'i20ac<. // can afford saving
 gen byte pillar3=h`j'i22==1 if h`j'i22>0 & h`j'i22<. // can afford third pillar (private retirement assurance)
 gen byte dentist=h`j'i24==1 if h`j'i24>0 & h`j'i24<. // can afford go to dentist
 gen byte noisy=h`j'h26==1 if h`j'h26>0 & h`j'h26<. // living environment is noisy
 gen byte owner=h`j'h29==2 if h`j'h29>-1 & h`j'h29<. // house owner
 gen byte housesubsid=h`j'h30==1 if h`j'h30>-1 & h`j'h30<. // household is susidized
 gen byte sathhfinances=h`j'i30 if h`j'i30>-1 & h`j'i30<. // household satisfaction with finances
 gen byte paymentarrears=h`j'i31==1 if h`j'i31>-1 & h`j'i31<. // payment arrears
 gen mininc_endsmeet=h`j'i54 if h`j'i54>0 & h`j'i54<. // min income needed to make ends meet
 gen byte hhcansave=h`j'i50==1 if h`j'i50>-1 & h`j'i50<. // household is able to save
 gen byte hhspendswhatitearns=h`j'i50==2 if h`j'i50>-1 & h`j'i50<. // household spends all income
 gen byte hheatssavings=h`j'i50==3 if h`j'i50>-1 & h`j'i50<. // savings are reducing
 gen byte hhgetsintodebt=h`j'i50==4 if h`j'i50>-1 & h`j'i50<. // household has to make debts
 gen byte hhfinanceseasy=h`j'i51 if h`j'i51>-1 & h`j'i51<. // household finances are easy to manage
 gen year=20`j'
 recode age relHHhead nbkid (-8/-1=.)
 
 drop h`j'* p`j'* x`j'* civsta tr1maj tr1laj nat_1 occupa sex com2
 compress
 save shp`j', replace
}

use shp21, clear
foreach j in 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 {
 append using shp`j', nolab // produce long file
}
recode age (min/24=1 "<25")(25/44=2 "25-44")(45/64=3 "45-64")(65/max=4 ">64"), gen(agegr) lab(agegr)
replace agegr=3 if agegr==. & idpers==23163101 // logical imputation from HH Info
replace partner=1 if partner==. & relHHhead==2 // logical imputation: partner of refpers
recode bigreg eduyears (-8/-1=.)
xtile deciles = empyn_orig if empyn_orig>=0, n(10) // deciles emplyoment income
recode ptotn -4=0 -8/-5=. -3/-1=. // set missing total income to sysmiss 
gen byte referenceperson=relHHhead==1 // HH refererence person 
sort idp year
bysort idp: gen byte wave=_n // no longer needed
compress
save longMAR1, replace // long file
***

use idpers year INRempyn UNRempyn empyn_orig using longMAR1, clear // distribution INR and UNR
fre UNRempyn if UNR==1 | (empyn_orig<. & empyn_orig>=0) // SHP: 22.6% UNR, SILC 2016: 11.2% UNR
fre INRempyn // SHP: 2.7% INR, SILC 2016: 3.1% INR
unique idp if empyn_orig>=0 | INRempyn==1 | UNRempyn==1 // sample size all
***

use idhous idpers year deciles filter$cy $job bigreg ptotn male agegr status INRempyn UNRempyn empyn_orig referenceperson using longMAR1 if status==0 & empyn>0 & empyn<., clear // validly given income from work
gen double empyn_=empyn_orig // for some people with valid empyn_, this variable will be set to .
recode eduyears (min/-1=.) (0/11.99=1) (11.991/12=2) (12.001/max=3), gen(edu3) // fit SILC education variable
su male i.agegr i.year i.filter i.bigreg i.referenceperson $job // distribution of indep variables to be imputed 
merge 1:1 idpers year using longMAR1, keepusing(INRempyn UNRempyn)

xtset idpers year
bysort idpers: gen prevUNRempyn=UNRempyn[_n-1]==1 // UNR in previours wave
reg UNRempyn prevUNRempyn // regr. coeff laggedd UNR 
gen reg_prevunr=_b[prevUNRempyn]*prevUNRempyn
bysort idpers: gen prevINRempyn=INRempyn[_n-1]==1 // INR in previours wave
reg INRempyn prevINRempyn // regr. coeff laggedd INR 
gen reg_previnr=_b[prevINRempyn]*prevINRempyn
keep if _merge==3
drop _merge INRempyn UNRempyn
compress
save indiv_initMAR21, replace // "gross" analysis dataset


************* impute missing independent variables
use idp year $indepSILC empyn_ deciles filter empyn_orig reg_prev?nr prev?NR* using indiv_initMAR21, clear
ice year $indepSILC empyn_ i.filter prev?NR*, clear match(year $indepSILC empyn_ prev?NR*) persist
tabstat year $indepSILC empyn_ deciles filter prev?NR*, by(_mj) s(n mean min max)
keep if _mj==1 // complete dataset
save indiv_initMAR21_complete, replace // analysis dataset

*** missingness mechanisms from SILC for (complete case) SHP 
use indiv_initMAR21_complete, clear
est rest SILCunr // coefficients determinig NMAR mechanism for UNR in SILC
predict pmissu // unr miss like in SILC
egen probmissu=rowmax(reg_prevunr pmissu) // reg_prevunr for 3.3%  UNR in previous wave
gen randomu=runiform()
su probmissu, det // 16.7% UNR
gen byte TIMPnmaru=randomu<probmissu // to test predictive power UNR
gen byte TIMPnmar=randomu<probmissu // 16.4% UNR
est rest SILCinr // coefficients determinig NMAR mechanism for INR in SILC
predict pmissi // INR as in SILC
egen probmissi=rowmax(reg_previnr pmissi)  // reg_previnr for 1.7% with INR in previous wave
gen randomi=runiform()
su probmissi, det // 3.4% INR
gen byte TIMPnmari=randomi<probmissi // to test predictive power INR 
replace TIMPnmar=1 if randomi<probmissi // + 3.4% INR 
su TIMPnmar // 19.4% NR overall
bysort idpers: gen byte nottodrop=runiform(1,_N) // those with . have 1 observation
gsort idpers -nottodrop
bysort idpers: replace empyn_=. if TIMPnmar==1 & _n>1	// here we produce missing values: NR mechanism in data
replace TIMPnmar=0 if empyn_!=.
su TIMPnmar
scalar NRoverall=r(mean) // to adjust missingness from MAR for NMAR, 15.4%

sort idpers year
gen double empyn_carry=empyn_ // initialise carryover by reported values
forv j=1/20 { // max waves,  carryover of closest reported value
 bysort idpers: replace empyn_carry=empyn_[_n-`j'] if empyn_carry==. & empyn_[_n-`j']<.
 bysort idpers: replace empyn_carry=empyn_[_n+`j'] if empyn_carry==. & empyn_[_n+`j']<.
}
save carry_nmar, replace

keep TIMPnmar empyn_orig empyn_ empyn_carry idp year filter
reshape wide TIMPnmar empyn_orig empyn_ empyn_carry, i(idp) j(year)
save temp_nmar, replace // wide data with missings in lnempyn_ according to NMAR mechanism

use temp_nmar, clear // check
egen presentempyn=rownonmiss(empyn_2*)
fre presentempyn // to check if all people have at least 1 nonmissing empyn
use idp year empyn_* TIMP* using carry_nmar, clear


****************
foreach file in nmar { // now Little and Su imputation (loop, because original version used both nmar and mar)
 use temp_`file', clear
 renvars TIMP`file'*, subst(`file') // mar/nmar have different missing values
 do "M:\SHP\Research\2017 Longitudinal income imputation in the SHP\test LittleSu empyn_2019" 
 save SHP_`file', replace
}


*************************  imputation with/without lagged income
foreach file in nmar { // Imputation: (log) income, lagged income, indepvars
use idp TIMP* empyn_orig* empyn_carry* empyn_LS* using SHP_`file', clear
qui reshape long TIMP empyn_orig empyn_LS empyn_carry, i(idpers) j(year)
keep if empyn_orig<.
merge 1:1 idpers year using longMAR1, keepusing(agegr year filter bigreg referenceperson $job) keep(3) nogen // incomplete
gen empyn_orig1=empyn_orig if TIMP==0 // listwise deleted
gen double lnempyn_orig1=ln(empyn_orig1)
gen l_lnempyn_orig1=l.lnempyn_orig1
ice lnempyn_orig1 l_lnempyn_orig1 i.agegr i.year i.filter m.bigreg referenceperson $job, clear /*add(3)*/ match(lnempyn_orig1 l_lnempyn_orig1 bigreg referenceperson $job) cmd(referenceperson urban NW_Eu housesubsid computer car hhgetsint married swiss partner parent_high paymentar noisy dentist pillar3 resto invfriend holiday owner hhcansave hheatssav unemp retired mini part full jobchange jobemploy employerc supervision:regress) persist
save OLSlag`file', replace // for the predicted regression with lag (iterative regression)
}

foreach file in nmar { // Imputation: indepvars
use idp TIMP* empyn_orig* empyn_carry* empyn_LS* using SHP_`file', clear
qui reshape long TIMP empyn_orig empyn_LS empyn_carry, i(idpers) j(year)
keep if empyn_orig<.
merge 1:1 idpers year using longMAR1, keepusing(agegr year filter bigreg referenceperson $job) keep(3) nogen // incomplete
gen empyn_orig1=empyn_orig if TIMP==0 // listwise deleted
gen double lnempyn_orig1=ln(empyn_orig1)
ice i.agegr i.year i.filter m.bigreg referenceperson $job, clear /*add(3)*/ match(bigreg referenceperson $job) cmd(referenceperson urban NW_Eu housesubsid computer car hhgetsint married swiss partner parent_high paymentar noisy dentist pillar3 resto invfriend holiday owner hhcansave hheatssav unemp retired mini part full jobchange jobemploy employerc supervision:regress) persist // we should have included lnempyn_orig1 in the imputation 
save OLSnolag`file', replace // imputed independent variables for our new approach
}

foreach file in nmar { // predict OLS / FE : mean&within (also possible in 1 step)
use OLSnolag`file' if _mj==1, clear // predictions OLS no lag
reg lnempyn_orig i.agegr i.year i.filter i.bigreg referenceperson $job if TIMP==0, vce(cl idpers)
predict double lnempyn_OLSpred
*replace lnempyn_OLSpred=lnempyn_orig if TIMP==0 // NEW 25.2.2023
xtreg lnempyn_orig i.agegr i.year i.filter i.bigreg referenceperson $job if TIMP==0, vce(cl idpers) fe
predict double lnempyn_FEpred
*replace lnempyn_FEpred=lnempyn_orig if TIMP==0 // NEW NEW 25.2.2023
keep idpers year lnempyn_OLSpred lnempyn_FEpred TIMP
save OLSpred`file', replace // no lags
}

***************************  construct deviation from individual mean
cd M:\Projects\CNEF-Imputation // working directory
foreach file in nmar {
 use idp year empyn_LS* empyn_orig* empyn_carry lnempyn_orig1 _mj TIMP using OLSlag`file' if _mj==1, clear
 merge m:1 idp using shp_`file', keepusing(row) nogen // row effect L&S
 gen double empyn_OLSpredlag=exp(lnempyn_orig1)
 merge 1:1 idpers year using OLSpred`file', keepusing(lnempyn_OLSpred lnempyn_FEpred) nogen

 bysort idp: egen mindiv_rep=mean(empyn_orig) if TIMP==0 // mean individual reported value
 sort idp mindiv_rep
 bysort idp: replace mindiv_rep=mindiv_rep[1] // impute mean for TIMP==1
 
 foreach var in OLS FE {
  gen double `var'meanwithin=empyn_orig if TIMP==0 // reported if not to impute
  gen double empyn_`var'pred=exp(lnempyn_`var'pred)
  bysort idp: egen m`var'pred=mean(empyn_`var'pred) // mean individual predicted value
  gen corr`var'=empyn_`var'pred/m`var'pred // correction factor
  replace `var'meanwithin=mindiv_rep*corr`var' if TIMP==1 // mean&within imputed value
 }
 gen double OLSrowwithin=empyn_orig if TIMP==0 // similar but with L&S row instead of indiv mean
 replace OLSrowwithin=row*corrOLS if TIMP==1

xtile centile_empyn=empyn_orig, nq(100) // 100 income centiles

gen ind10=.   //  price index: dec 2010=100
qui replace ind10=0.936 if year==2002
qui replace ind10=0.943 if year==2003
qui replace ind10=0.955 if year==2004
qui replace ind10=0.965 if year==2005
qui replace ind10=0.972 if year==2006
qui replace ind10=0.995 if year==2007
qui replace ind10=0.99 if year==2008
qui replace ind10=0.997 if year==2009
qui replace ind10=1 if year==2010
qui replace ind10=0.993 if year==2011
qui replace ind10=0.991 if year==2012
qui replace ind10=0.99 if year==2013
qui replace ind10=0.979 if year==2014
qui replace ind10=0.975 if year==2015
qui replace ind10=0.98 if year==2016
qui replace ind10=0.989 if year==2017
qui replace ind10=0.993 if year==2018
qui replace ind10=0.986 if year==2019
qui replace ind10=0.991 if year==2020
qui replace ind10=0.991 if year==2021 // imputed from 2020
label var ind10 "price index 2010=100"

renvars empyn_*, predrop(6) // shorten long name for income variable
foreach var in orig OLSpredlag LS OLSmeanwithin FEmeanwithin orig1 carry OLSrowwithin {
 qui replace `var'=`var'/ind10 // price index correction
}
drop _mj mindiv* ind10 ln*
compress
save "$home\Results\testempyn_`file'", replace
} // file (missingness mechanism)
***


cd "$docs"
foreach file in nmar {
 use "$home\Results\testempyn_`file'", clear
 save $temp\testempyn_`file', replace // reading from harddrive is faster ...
}



************************************ Results: Prepare xlsx file
cd "$docs"
*cd "$temp" // alternative (faster on c drive)

foreach file in nmar { // Descriptive Statistics
 use $temp\testempyn_`file', clear
 d,s
 qui putexcel set "`file'`c(current_date)'", sheet(fig_1, replace) modify
 qui putexcel B1="Mean"
 qui putexcel C1="Coeff. of variation"
 qui putexcel D1="Gini Coeff."
 qui putexcel E1="Mean log deviation"
 qui putexcel F1="99/50 ratio of percentiles"
 qui putexcel G1="N"
 qui putexcel A2="Reported (target)" // ALL
 qui putexcel A3="Iterative regression"
 qui putexcel A4="L&S "
 qui putexcel A5="OLS mean&within"
 qui putexcel A6="Listwise deleted"
 qui putexcel A7="Carryover"
 qui putexcel A8="OLS row&within"
 qui putexcel A9="FE row&within"
 qui putexcel A10="Standard errors (reported)"
local k=2
foreach var in orig OLSpredlag LS OLSmeanwithin orig1 carry OLSrowwithin FEmeanwithin { // descriptives
 cap drop `var'_p50_99
 cap drop `var'_cv
 cap drop `var'_gini
 cap drop `var'_mld
 qui su `var', det
 qui putexcel B`k'=`r(mean)', nformat(account)
 qui putexcel G`k'=`r(N)', nformat(number_sep)
 qui gen `var'_p50_99=`r(p99)'/`r(p50)'
 qui putexcel F`k'=`var'_p50_99, nformat(.000)
 qui egen `var'_cv=cov(`var')
 qui putexcel C`k'=`var'_cv, nformat(.000)
 qui egen `var'_gini=gini(`var')
 qui putexcel D`k'=`var'_gini, nformat(.000)
 qui egen `var'_mld=mld(`var')
 qui putexcel E`k'=`var'_mld, nformat(.000)
 local k=`k'+1
 }
 egen mean=rifvar(orig), mean
 egen gini=rifvar(orig), gini
 egen cv=rifvar(orig), cvar
 egen iqratio5099=rifvar(orig), iqratio(50 99)
 foreach v in mean gini cv iqratio5099 {
  qui su `v'
  qui scalar se_`v'=r(sd)/sqrt(r(N))
 }
 qui putexcel B10=se_mean, nformat(.000)
 qui putexcel C10=se_cv, nformat(.000)
 qui putexcel D10=se_gini, nformat(.000)
 qui putexcel F10=se_iqratio5099, nformat(.000)
}


foreach file in nmar {
 use $temp\testempyn_`file', clear
qui putexcel set "`file'`c(current_date)'", sheet(fig_2, replace) modify
qui putexcel B1="Full_sample"
qui putexcel C1="1-99 %tile"
qui putexcel D1="1-95 %tile"
qui putexcel E1="2-100 %tile"
qui putexcel F1="5-100 %tile"
qui putexcel A10="N"
qui putexcel A2="Iterative regression"
qui putexcel A3="L&S "
qui putexcel A4="OLS mean&within"
qui putexcel A5="Carryover"
qui putexcel A6="OLS row&within"
qui putexcel A7="FE mean&within"
local k=2
foreach var in OLSpredlag LS OLSmeanwithin carry OLSrowwithin FEmeanwithin { // corr with original var.
  qui corr orig `var'
  local full=`r(N)'
  qui putexcel B`k'=`r(rho)', nformat(.000)
  qui corr orig `var' if centile<=99
  local s1_99=`r(N)'
  qui putexcel C`k'=`r(rho)', nformat(.000)
  qui corr orig `var' if centile<=95
  local s1_95=`r(N)'
  qui putexcel D`k'=`r(rho)', nformat(.000)
  qui corr orig `var' if centile>=2
  local s2_100=`r(N)'
  qui putexcel E`k'=`r(rho)', nformat(.000)
  qui corr orig `var' if centile>=5
  local s5_100=`r(N)'
  qui putexcel F`k'=`r(rho)', nformat(.000)
  local k=`k'+1
 }
}
qui putexcel B10=`full', nformat(number_sep)
qui putexcel C10=`s1_99', nformat(number_sep)
qui putexcel D10=`s1_95', nformat(number_sep)
qui putexcel E10=`s2_100', nformat(number_sep)
qui putexcel F10=`s5_100', nformat(number_sep)


foreach file in nmar {
 use $temp\testempyn_`file', clear
qui putexcel set "`file'`c(current_date)'", sheet(fig_3, replace) modify // simple and squared absolute deviations
qui putexcel B1="Sum_of_rel._Differences_of_abs._values"
qui putexcel C1="N"
qui putexcel A2="Iterative regression"
qui putexcel A3="L&S "
qui putexcel A4="OLS mean&within"
qui putexcel A5="Carryover"
qui putexcel A6="OLS row&within"
qui putexcel A7="FE mean&within"
local k=2
foreach var in OLSpredlag LS OLSmeanwithin carry OLSrowwithin FEmeanwithin {
 cap drop absdiff*
 gen absdiff`var'= abs(orig-`var')/orig
 cap drop sumabsdiff*
 egen sumabsdiff`var'=sum(absdiff`var')
 qui putexcel B`k'=sumabsdiff`var', nformat(number_sep)
 qui putexcel C`k'=_N
 local k=`k'+1
 }
}


foreach file in nmar { // Significance Distributions: ksmirnov
qui putexcel set "`file'`c(current_date)'", sheet(fig_4, replace) modify // distributional accuracy
qui putexcel B1="Combined_K-S"
qui putexcel C1="p-value"
qui putexcel D1="N"
qui putexcel A2="Iterative regression"
qui putexcel A3="L&S "
qui putexcel A4="OLS mean&within"
qui putexcel A5="Carryover"
qui putexcel A6="OLS row&within"
qui putexcel A7="FE mean&within"
local k=2
foreach var in OLSpredlag LS OLSmeanwithin carry OLSrowwithin FEmeanwithin { 
 use idp year orig TIMP using $temp\testempyn_`file', clear
 gen byte origvar=1
 rename orig `var'
 append using $temp\testempyn_`file', keep(idp year `var' TIMP)
 replace origvar=0 if origvar==.
 ksmirnov `var', by(origvar)
 qui putexcel B`k'=`r(D)'
 qui putexcel C`k'=`r(p)'
 qui su if origvar==1, meanonly
 qui putexcel D`k'=`r(N)', nformat(number_sep)
 local k=`k'+1
 }
}


foreach file in nmar {
use $temp\testempyn_`file', clear
xtset idpers year
qui putexcel set "`file'`c(current_date)'", sheet(fig_5, replace) modify // within changes (lagged correlations) 
qui putexcel B1="Full_sample"
qui putexcel C1="1-99 %tile"
qui putexcel D1="1-95 %tile"
qui putexcel E1="2-100 %tile"
qui putexcel F1="5-100 %tile"
qui putexcel A10="N"
qui putexcel A2="Reported (target)"
qui putexcel A3="Iterative regression"
qui putexcel A4="L&S "
qui putexcel A5="OLS mean&within"
qui putexcel A6="Listwise deleted"
qui putexcel A7="Carryover"
qui putexcel A8="OLS row&within"
qui putexcel A9="FE mean&within"
local k=2
foreach var in orig OLSpredlag LS OLSmeanwithin orig1 carry OLSrowwithin FEmeanwithin {
 qui corr `var' l.`var' // new version
 local full=`r(N)'
 qui putexcel B`k'= `r(rho)', nformat(.000)
 qui corr `var' l.`var' if centile<=99
 local s1_99=`r(N)'
 qui putexcel C`k'= `r(rho)', nformat(.000)
 qui corr `var' l.`var' if centile<=95
 local s1_95=`r(N)'
 qui putexcel D`k'= `r(rho)', nformat(.000)
 qui corr `var' l.`var' if centile>=2
 local s2_100=`r(N)'
 qui putexcel E`k'= `r(rho)', nformat(.000)
 qui corr `var' l.`var' if centile>=5
 local s5_100=`r(N)'
 qui putexcel F`k'= `r(rho)', nformat(.000)
 local k=`k'+1
 }
}
qui putexcel B10=`full', nformat(number_sep)
qui putexcel C10=`s1_99', nformat(number_sep)
qui putexcel D10=`s1_95', nformat(number_sep)
qui putexcel E10=`s2_100', nformat(number_sep)
qui putexcel F10=`s5_100', nformat(number_sep)


foreach file in nmar {
 use $temp\testempyn_`file', clear
 xtset idp year
qui putexcel set "`file'`c(current_date)'", sheet(fig_6, replace) modify // distribution of income mobility

qui putexcel B1="Spearman"
qui putexcel C1="p-value"
qui putexcel D1="N"
qui putexcel A2="Reported (target) 1lag"
qui putexcel A3="Iterative regression 1lag"
qui putexcel A4="L&S 1lag"
qui putexcel A5="OLS mean&within 1lag"
qui putexcel A6="Listwise deleted 1lag"
qui putexcel A7="Carryover 1lag"
qui putexcel A8="OLS row&within 1lag"
qui putexcel A9="FE mean&within 1lag"
qui putexcel E2="1 year lag"
qui putexcel E11="5 year lag"

local k=2
foreach var in orig OLSpredlag LS OLSmeanwithin orig1 carry OLSrowwithin FEmeanwithin { // 1 lag
 cap drop *dec_`var'
 xtile dec_`var'=`var', nq(10) 
 qui gen l_dec_`var'=l.dec_`var'
 qui spearman dec_`var' l_dec_`var'
 qui putexcel B`k'=`r(rho)', nformat(.000)
 qui putexcel C`k'=`r(p)', nformat(.000)
 qui putexcel D`k'=`r(N)', nformat(number_sep)
 local k=`k'+1
 }

qui putexcel A11="Reported (target) 5lags"
qui putexcel A12="Iterative regression 5lags"
qui putexcel A13="L&S 5lags"
qui putexcel A14="OLS mean&within 5lags"
qui putexcel A15="Listwise deleted 5lags"
qui putexcel A16="Carryover 5lags"
qui putexcel A17="OLS row&within 5lags"
qui putexcel A18="FE mean&within 5lags"

local k=11
foreach var in orig OLSpredlag LS OLSmeanwithin orig1 carry OLSrowwithin FEmeanwithin { // 5 lags
 cap drop *dec_`var'
 xtile dec_`var'=`var', nq(10) 
 qui gen l5_dec_`var'=l5.dec_`var'
 qui spearman dec_`var' l5_dec_`var'
 qui putexcel B`k'=`r(rho)', nformat(.000)
 qui putexcel C`k'=`r(p)', nformat(.000)
 qui putexcel D`k'=`r(N)', nformat(number_sep)
 local k=`k'+1
 }
}


foreach file in nmar { 
 use "$home\Results\testempyn_`file'", clear
 qui putexcel set "`file'`c(current_date)'", sheet(fig_7, replace) modify
 qui putexcel B1="Within std"
 qui putexcel C1="Between std"
 qui putexcel D1="Nobs"
 qui putexcel E1="Nindiv"
 qui putexcel A2="Reported (target)" 
 qui putexcel A3="Iterative regression"
 qui putexcel A4="L&S"
 qui putexcel A5="OLS mean&within"
 qui putexcel A6="Listwise deleted"
 qui putexcel A7="OLS row&within"
 qui putexcel A8="FE mean&within"

local k=2
foreach var in orig OLSpredlag LS OLSmeanwithin orig1 OLSrowwithin FEmeanwithin { // xtsum results

 qui xtsum `var'
 qui putexcel B`k'=`r(sd_w)', nformat(.000)
 qui putexcel C`k'=`r(sd_b)', nformat(.000)
 qui putexcel D`k'=`r(N)', nformat(number_sep)
 qui putexcel E`k'=`r(n)', nformat(number_sep)
 local k=`k'+1
 }

 qui putexcel A10="Reported (target)" 
 qui putexcel A11="Iterative regression"
 qui putexcel A12="L&S"
 qui putexcel A13="OLS mean&within"
 qui putexcel A14="Listwise deleted"
 qui putexcel A15="OLS row&within"
 qui putexcel A16="FE mean&within"

 local k=10
foreach var in orig OLSpredlag LS OLSmeanwithin orig1 OLSrowwithin FEmeanwithin {
 qui xtsum `var' if TIMP==1
 qui putexcel B`k'=`r(sd_w)', nformat(.000)
 qui putexcel C`k'=`r(sd_b)', nformat(.000)
 qui putexcel D`k'=`r(N)', nformat(number_sep)
 qui putexcel E`k'=`r(n)', nformat(number_sep)
 local k=`k'+1
 }
} // nmar, mar



* modeling parameter accuracy: all obs
foreach j in 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 {
 use idpers p`j'p01 p`j'n34 p`j'c44 using $data\shp`j'_p_user, clear
 gen year=20`j'
 gen byte satlife=p`j'c44 if p`j'c44>=0 & p`j'c44<.
 gen byte politint=p`j'p01 if p`j'p01>=0 & p`j'p01<.
 gen byte club=p`j'n34==1 if p`j'n34>=0 & p`j'n34<.
 drop *p01 *n34 *c44
 save temp`j', replace
}
use temp02, clear
foreach j in 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 {
 append using temp`j', nolab
 erase temp`j'.dta
}
save temp, replace // only satlife
**
use temp, clear


foreach file in nmar { 
use $temp\testempyn_`file', clear // full sample
foreach var in orig orig1 OLSpredlag OLSmeanwithin LS carry OLSrowwithin FEmeanwithin { 
 qui replace `var'=`var'/100000
 _pctile `var', p(99)
 *disp "99% percentie of `var' `r(r1)'"
 qui replace `var'=r(r1) if `var'<. & `var'>r(r1)
}

qui merge 1:1 idpers year using M:\Projects\CNEF-Imputation\indiv_initMAR21, keepusing(agegr) keep(3) nogen
qui merge 1:1 idpers year using temp, keepusing(satlife) keep(3) nogen
qui xi i.agegr i.year
qui by idpers: center satlife orig orig1 OLSpredlag OLSmeanwithin LS carry OLSrowwithin _I* FEmeanwithin

foreach depvars in satlife { // we had more dependent variables in the original version, therefore a loop
 qui reg `depvars' orig i.agegr i.year
 est store orig
 gen b_orig=_b[orig] // target coefficient
 gen se_orig=_se[orig]
 foreach var in orig1 OLSpredlag OLSmeanwithin LS carry OLSrowwithin FEmeanwithin { 
  qui reg `depvars' `var' i.agegr i.year
  est store `var'
  gen b_`var'=_b[`var']
  gen dev_`var'=((_b[`var']-b_orig)/b_orig) // relative error to (between) target
 }

 qui reg c_`depvars' c_orig c__I*
 est store FEorig
 gen b_FE_orig=_b[c_orig] // target coefficient
 gen se_FE_orig=_se[c_orig]
 gen n_FE_orig=`e(N)'
 foreach var in orig1 OLSpredlag OLSmeanwithin LS carry OLSrowwithin FEmeanwithin { 
  qui reg c_`depvars' c_`var' c__I*
  est store FE`var'
  gen b_FE_`var'=_b[c_`var']
  gen n_FE_`var'=`e(N)'
  gen dev_FE_`var'=((_b[c_`var']-b_FE_orig)/b_FE_orig) // relative error to (within) target
 }

qui putexcel set "`file'`c(current_date)'", sheet(fig_8`depvars'_full, replace) modify // OLS / FE regression coefficients
qui putexcel B1="Satisfaction life"
qui putexcel D1="Income in 100'000 Sfr."
qui putexcel A2="Model"
qui putexcel A3="Reported"
qui putexcel A4="Iterative regression"
qui putexcel A5="L&S "
qui putexcel A6="OLS mean&within"
qui putexcel A7="Listwise deleted"
qui putexcel A8="Carryover"
qui putexcel A9="OLS row&within"
qui putexcel A10="FE mean&within"
qui putexcel B2="cross-sectional rel. deviation"
qui putexcel C2="cross-sectional estimate"
qui putexcel D2="p(chi2: OLS estimate <> orig)"
qui putexcel E2="within rel. deviation"
qui putexcel F2="within estimate"
qui putexcel G2="p(chi2: FE estimate <> orig)"
qui putexcel H2="N"
qui putexcel I3="SE reported OLS (orig)"
qui putexcel J3="SE reported FE (orig)"
qui putexcel B3=0, nformat(.000)
qui putexcel C3=b_orig, nformat(.000)
qui putexcel E3=0, nformat(.000)
qui putexcel F3=b_FE_orig, nformat(.000)
qui putexcel H3=n_FE_orig
local k=4 // was 5 with full sample
foreach var in OLSpredlag LS OLSmeanwithin orig1 carry OLSrowwithin FEmeanwithin {
 qui putexcel B`k'=dev_`var', nformat(.000)
 qui putexcel C`k'=b_`var', nformat(.000)
 qui putexcel E`k'=dev_FE_`var', nformat(.000)
 qui putexcel F`k'=b_FE_`var', nformat(.000)
 qui suest orig `var'
 qui test [orig_mean]orig=[`var'_mean]`var'
 qui putexcel D`k'=`r(p)', nformat(.000) // chi2 of unequal OLS regression coefficient
 qui suest FEorig FE`var'
 qui test [FEorig_mean]c_orig=[FE`var'_mean]c_`var'
 qui putexcel G`k'=`r(p)', nformat(.000) // chi2 of unequal FE regression coefficient
 qui putexcel H`k'= n_FE_`var'
 local k=`k'+1
 } // Model/Variable
 qui putexcel I4=se_orig, nformat(.000)
 qui putexcel J4=se_FE_orig, nformat(.000)
 } // depvar
} // file




foreach file in nmar { 
use $temp\testempyn_`file' if TIMP==1, clear // only imputed
foreach var in orig OLSpredlag OLSmeanwithin LS carry OLSrowwithin FEmeanwithin { 
 qui replace `var'=`var'/100000
 _pctile `var', p(99)
 qui replace `var'=r(r1) if `var'<. & `var'>r(r1)
}

qui merge 1:1 idpers year using M:\Projects\CNEF-Imputation\indiv_initMAR21, keepusing(agegr) keep(3) nogen
qui merge 1:1 idpers year using temp, keepusing(satlife politint club) keep(3) nogen
qui xi i.agegr i.year
qui by idpers: center satlife politint club orig OLSpredlag OLSmeanwithin LS carry _I* OLSrowwithin FEmeanwithin

foreach depvars in satlife { // we had more dependent variables in the original version, therefore a loop
 cap drop b_* se_* n_* dev_*
 qui reg `depvars' orig i.agegr i.year
 est store orig
 gen b_orig=_b[orig] // target coefficient
 gen se_orig=_se[orig]
 foreach var in OLSpredlag OLSmeanwithin LS carry OLSrowwithin FEmeanwithin { 
  qui reg `depvars' `var' i.agegr i.year
  est store `var'
  gen b_`var'=_b[`var']
  gen dev_`var'=((_b[`var']-b_orig)/b_orig) // relative error to (between) target
 }

 qui reg c_`depvars' c_orig c__I*
 est store FEorig
 gen b_FE_orig=_b[c_orig] // target coefficient
 gen se_FE_orig=_se[c_orig]
 gen n_FE_orig=`e(N)'
 foreach var in OLSpredlag OLSmeanwithin LS carry OLSrowwithin FEmeanwithin { 
  qui reg c_`depvars' c_`var' c__I*
  est store FE`var'
  gen b_FE_`var'=_b[c_`var']
  gen n_FE_`var'=`e(N)'
  gen dev_FE_`var'=((_b[c_`var']-b_FE_orig)/b_FE_orig) // relative error to (within) target
 }

qui putexcel set "`file'`c(current_date)'", sheet(fig_9`depvars'_imputed, replace) modify // OLS / FE regression coefficients
qui putexcel B1="`depvars'"
qui putexcel D1="Income in 100'000 Sfr."
qui putexcel A2="Model"
qui putexcel A3="Reported"
qui putexcel A4="Iterative regression"
qui putexcel A5="L&S "
qui putexcel A6="OLS mean&within"
qui putexcel A7="Carryover"
qui putexcel A8="OLS row&within"
qui putexcel A9="FE mean&within"
qui putexcel B2="cross-sectional rel. deviation"
qui putexcel C2="cross-sectional estimate"
qui putexcel D2="p(chi2: OLS estimate <> orig)"
qui putexcel E2="within rel. deviation"
qui putexcel F2="within estimate"
qui putexcel G2="p(chi2: FE estimate <> orig)"
qui putexcel H2="N"
qui putexcel I3="SE reported OLS (orig)"
qui putexcel J3="SE reported FE (orig)"
qui putexcel B3=0, nformat(.000)
qui putexcel C3=b_orig, nformat(.000)
qui putexcel E3=0, nformat(.000)
qui putexcel F3=b_FE_orig, nformat(.000)
qui putexcel H3=n_FE_orig
local k=4 // was 5 with full sample
foreach var in OLSpredlag LS OLSmeanwithin carry OLSrowwithin FEmeanwithin {
 qui putexcel B`k'=dev_`var', nformat(.000)
 qui putexcel C`k'=b_`var', nformat(.000)
 qui putexcel E`k'=dev_FE_`var', nformat(.000)
 qui putexcel F`k'=b_FE_`var', nformat(.000)
 qui suest orig `var'
 qui test [orig_mean]orig=[`var'_mean]`var'
 qui putexcel D`k'=`r(p)', nformat(.000) // chi2 of unequal OLS regression coefficient
 qui suest FEorig FE`var'
 qui test [FEorig_mean]c_orig=[FE`var'_mean]c_`var'
 qui putexcel G`k'=`r(p)', nformat(.000) // chi2 of unequal FE regression coefficient
 qui putexcel H`k'= n_FE_`var'
 local k=`k'+1
 } // Model/Variable
 qui putexcel I4=se_orig, nformat(.000)
 qui putexcel J4=se_FE_orig, nformat(.000)
 } // depvar
} // file








