/*STATA 15*/

ssc install tsspell
ssc install coefplot
net install grc1leg2.pkg, from (http://digital.cgdev.org/doc/stata/MO/Misc/)

**************preparing soep data

**set path to directory with original soep files
global path "[path_to_data]"
**set path to directory where datafile with external data on low earnings threshold is stored
global path_earnings "[path_to_data]"
**set path to directory where replication outputs are to be stored
global path_repl "[path to outputs]"
**create a directory to store generated files
mkdir $path_repl\mygen

/*generating files soep_pgen soep_pfad*/
clear
use $path\ppathl.dta
keep pid syear sex gebjahr phrf sampreg migback pbleib psample
sort pid syear
save $path_repl\mygen\soep_pfad, replace

clear
use $path\pgen.dta
keep pid syear pgfamstd pgisei08 pgisei88 pgsiops88 pgsiops08 pgisced11 pgoeffd /*
*/ pgerwzeit pgnace pgnace2 pgexp* pgbilzeit pgcasmin pgpsbil pgpbbil01 pgpbbil02 /*
*/ pgpbbil03 pglabgro pgimpgro pgpartnr pgisco88 pgjobch pgallbet
sort pid syear
save $path_repl\mygen\soep_pgen, replace

clear
clear matrix
clear mata
use $path\pl.dta
keep pid hid syear plb0186_h plc0065 plb0477_h plg0012 plb0282_v2 plb0022_h plb0037_h plb0568 plb0057_h plb0284_h plb0298 plb0299 plb0304_h plb0304_h pld0131 /*
*/ pld0132_h plb0443 plb0032 plb0033_v7 pmonin plc0016 plc0074 plb0474_h plc0013_h plc0014_h plb0186_h pab0002 plb0031_h plb0479 /*
*/ plb0478 plb0471_h pab0001_v3 pab0002 pab0003 pab0004 pab0005 pab0006 pab0007 pab0008 pab0009 pab0010 pab0011 pab0012 pab0013 pab0014 pab0015 /*
*/ plh0182 plh0173 plb0586
keep if syear>=2003
merge 1:1 pid syear using $path_repl\mygen\soep_pgen
drop if _m==2
drop _m
merge 1:1 pid syear using $path_repl\mygen\soep_pfad
drop if _m==2
drop _m
preserve

/****************** information on activities from calendar data
for obtaining n of job terminations and months in employment 
in each year */
clear
use $path\pkal.dta
drop if syear<2003
merge m:1 pid using $path\ppath.dta, keepusing(gebjahr sex)
drop if _m==2
drop _m

keep kal1* syear pid hid cid
drop kal1a*_v1
foreach n in 01 02 03 04 05 06 07 08 09 10 11 12 {
gen kal1w0`n'=kal1h0`n'_v2
replace kal1w0`n'=kal1h0`n'_v3 if syear>2011 & syear<2015
replace kal1w0`n'=kal1h0`n'_v4 if syear>2014 & syear<2018
replace kal1w0`n'=kal1h0`n'_v5 if syear==2018
}
rename kal1h01_v2 kal1w_if
replace kal1w_if=kal1h01_v3 if syear>2011
drop kal1h*
rename kal1a0??_v2 kal1a0??
rename kal1?02 kal1?_N
rename kal1?01 kal1?_if
rename kal1?0?? kal1???
rename kal1?0? kal1??
*transforming yearly calendar data into monthly data for each year
tostring pid, gen(pidstring)
tostring syear, gen(syearstring)
gen syearpid=syearstring+pidstring
destring syearpid, replace
reshape long kal1a kal1b kal1c kal1d kal1e kal1f kal1g kal1i kal1j kal1k kal1m kal1n kal1w, i(syearpid) j(month)

gen employed=0
replace employed=1 if kal1a==1 | kal1b==1 | kal1k==1 | kal1n==1
bysort syearpid: egen employed_nmths=total(employed)

/*
Variable njobterm - termination of any full time, part time or marginal employment spell.
Kurzarbeit prolongs full time and part time spells. If so, it is set to zero (so that not to duplicate jobs).
Kurzarbeit if not preceded by either ft or pt is treated as spell by itself. If it ends and next month no employment it generates job termination
*/
gen calmonth=month+(syear-2003)*12
xtset pid calmonth

gen ft=0
replace ft=1 if kal1a==1
gen pt=0
replace pt=1 if kal1b==1
gen kurz=0
replace kurz=1 if kal1k==1
gen marg=0
replace marg=1 if kal1n==1

replace ft=1 if ft==0 & kurz==1 & l.ft==1
replace pt=1 if pt==0 & kurz==1 & l.pt==1
replace kurz=0 if ft==1 | pt==1

tsset pid calmonth
tsspell pid
gen lastobs=1 if _end==1
drop _spell _seq _end

tsspell ft
gen ftterm=0
replace _end=0 if lastobs==1
replace ftterm=1 if _end==1 & ft==1
drop _spell _seq _end
tsspell pt
gen ptterm=0
replace _end=0 if lastobs==1
replace ptterm=1 if _end==1 & pt==1
drop _spell _seq _end
tsspell marg
gen margterm=0
replace _end=0 if lastobs==1
replace margterm=1 if _end==1 & marg==1
drop _spell _seq _end
tsspell kurz
gen kurzterm=0
replace _end=0 if lastobs==1
replace kurzterm=1 if _end==1 & kurz==1 & f.employed==0
egen jobterm=rowtotal(ftterm ptterm margterm kurzterm)
*a transition is marked also automatically after a gap, not a problem for subsequent analyses because we exclude sequences with gaps
bysort pid syear: egen njobterm=total(jobterm)
keep if month==1
keep pid hid cid syear kal*_if kal*N employed_nm njobterm
*adjusting to the year of reference (calendar data retrospective)
gen year=syear-1
drop syear
rename year syear
sort pid syear
save $path_repl\mygen\calendar

restore
merge 1:1 pid syear using $path_repl\mygen\calendar
drop if _m==2
rename _m mergecalendar

*accounting for potential job changes without period of inactivity in between
sort pid syear
xtset pid syear
gen jobtermination=1 if plb0299>=-1
replace jobtermination=1 if f.plb0298>=-1 & f.plb0298!=.
replace njobterm=1 if jobtermination==1 & njobterm==0

*earnings from work gross
xtset, clear
xtset pid syear
gen depworkinc_retr=f.plb0471_h*f.plc0016 if f.plc0016>0 & f.plb0471_h>0
gen selfempinc_retr=f.plb0474_h*f.plc0074 if f.plb0474_h>0 & f.plc0074>0
gen secinc_retr=f.plb0477_h*f.plc0065 if f.plb0477_h>0 & f.plc0065>0
egen inc_retr=rowtotal(depworkinc_retr selfempinc_retr secinc_retr), missing

*********
merge m:1 syear using $path_earnings\LowEarnThresGer
drop _merge

gen perc_linc=inc_retr/poverty_des
replace perc_linc=0 if employed_nmths==0

/* dropping observations with missing information on employment or earnings */
drop if employed_nmths==. | perc_linc==.

/*truncating to the working age population*/
gen age=syear-gebjahr
drop if age<18 | age>67

******************generating index values

///generating dummy variable for years affected by low earnings from work///
gen liperc_low=0
replace liperc_low=1 if perc_linc<1

///generating total n of years with low earnings and total n of month at work in a sequence///
macro drop _all
/*accounting for gaps*/
tsset pid syear
tsspell sex
sum _spell
local spell=r(max)
*set sequence length: insert desired sequence length(s) in the following line, after local seqlength ... (default is 5)
local seqlength 5
foreach sl in `seqlength' {
local range=`sl'
gen totalemplR`range'=.
gen totallipercR`range'=.
*providing maximum observation scope (max individual trajectory length)
sum _seq
local longest=r(max)
local bound=`longest'-`range'+1
forvalues aa=1/`bound'{
local last=`aa'+`sl'-1
bysort pid _spell: egen totalemplR=total(employed_nmths) if _seq>=`aa' & _seq<=`last'
replace totalemplR`range'=totalemplR if _seq==`last'
bysort pid _spell: egen totallipercR=total(liperc_low) if _seq>=`aa' & _seq<=`last'
replace totallipercR`range'= totallipercR if _seq==`last'
drop totalemplR totallipercR
}
}
drop _spell _end _seq

///generating non-employment component naci_nw///

macro drop _all
/*account for gaps*/
tsset pid syear
tsspell sex
sum _spell
local spell=r(max)
*set sequence length: insert desired sequence length(s) in the following line, after local seqlength ... (default is 5)
local seqlength 5
bysort pid _spell: egen EN=max(_seq)
foreach sl in `seqlength' {
local range=`sl'
gen nwincidenceR`range'=.
gen nw_recencyR`range'=.
gen nw_incidence_licznikR`range'=.
forvalues z=1(1)9{
gen naci_nw`z'R`range'=.
}
forvalues spl=1/`spell'{
*providing maximum observation scope (max individual trajectory length)
sum _seq if _spell==`spl'
local longest=r(max)
local bound=`longest'-`range'+1
forvalues aa=1/`bound'{
local last=`aa'+`sl'-1
quietly levelsof pid if employed_nmths<12 & _seq>=`aa' & _seq<=`last' & EN>=`last' & _spell==`spl', local(pidneet)
foreach l of local pidneet {
display `l'
levelsof syear if pid==`l' & employed_nmths<12 & _seq>=`aa' & _seq<=`last' & _spell==`spl', local(year)
local max=r(r)
local maxlessone=`max'-1
tokenize "`year'"
local sumpid=""
forvalues z=1/`maxlessone'{
local start `z'
local next=`z'+1
local sum ""
forvalues b=`next'/`max'{
local d``b''_``start''=((``b''-``start'')^-1)
local sum "`sum' + `d``b''_``start'''"
}
local sumpid="`sumpid' `sum'"
display `l'
}
replace nw_incidence_licznikR`range'=0`sumpid' if pid==`l' & _seq==`last' & _spell==`spl'
}
/*recencynw*/
gen lp`last'=.
gen lpnw`last'=.
local one=0
forvalues k=1/`sl'{
replace lp`last'=(1+ `one') if _seq==(`aa'+ `one') & _spell==`spl'
local one=`one'+1
}
replace lpnw`last'=lp`last'*(12-employed_nmths)/12
bysort pid: egen advnw_n=total(lpnw`last')
gen advnw=advnw_n/(`range'*(`range'+1)/2)
replace nw_recencyR`range'=advnw if _seq==`last' & _spell==`spl'
drop advnw_n advnw
drop lp`last' lpnw`last'
}
}
gen denominator=sum(_n/(`range'-_n))
local den=denominator in `range'
replace nwincidenceR`range'=nw_incidence_licznikR`range'/`den'
replace nwincidenceR`range'=0 if totalemplR`range'==60
drop denominator
/*generating component vals for different alpha*/
local i 1
foreach fa in .1 .2 .3 .4 .5 .6 .7 .8 .9 {
replace naci_nw`i'R`range'=`fa'*nwincidenceR`range'+(1-`fa')*nw_recencyR`range'
local i=`i'+1
}
}
drop _spell _end _seq EN																																																										

///generating low earnings component naci_liperc///

macro drop _all
/*account for gaps*/
tsset pid syear
tsspell sex
sum _spell
local spell=r(max)
*set sequence length: insert desired sequence length(s) in the following line, after local seqlength ... (default is 5)
local seqlength 5
bysort pid _spell: egen EN=max(_seq)
foreach sl in `seqlength' {
local range=`sl'
gen lipercincidenceR`range'=.
gen liperc_recencyR`range'=.
gen nlipercincidenceR`range'=.
gen liperc_incidence_licznikR`range'=.
forvalues z=1(1)9{
gen naci_liperc`z'R`range'=.
}
forvalues spl=1/`spell'{
*providing maximum observation scope (max individual trajectory length)
sum _seq if _spell==`spl'
local longest=r(max)
local bound=`longest'-`range'+1
forvalues aa=1/`bound'{
local last=`aa'+`sl'-1
quietly levelsof pid if perc_linc<1 & _seq>=`aa' & _seq<=`last' & EN>=`last' & _spell==`spl', local(pidlinc)
foreach l of local pidlinc {
display `l'
levelsof syear if pid==`l' & perc_linc<1 & _seq>=`aa' & _seq<=`last' & _spell==`spl', local(year)
local max=r(r)
local maxlessone=`max'-1
tokenize "`year'"
local sumpid=""
forvalues z=1/`maxlessone'{
local start `z'
local next=`z'+1
local sum ""
forvalues b=`next'/`max'{
local d``b''_``start''=(``b''-``start'')^-1
local sum "`sum' + `d``b''_``start'''"
}
local sumpid="`sumpid' `sum'"
display `l'
}
replace liperc_incidence_licznikR`range'=0`sumpid' if pid==`l' & _seq==`last' & _spell==`spl'
}

/*recency_earnings*/
gen lp`last'=.
gen lplincratio`last'=.
local one=0
forvalues k=1/`sl'{
replace lp`last'=(1+ `one') if _seq==(`aa'+ `one') & _spell==`spl'
local one=`one'+1
}
replace lplincratio`last'=lp`last'*(1-perc_linc)
replace lplincratio`last'=0 if lplincratio`last'<0
bysort pid: egen advlinc_n=total(lplincratio`last')
gen advlinc=advlinc_n/(`range'*(`range'+1)/2)
replace liperc_recencyR`range'=advlinc if _seq==`last' & _spell==`spl'
drop advlinc_n advlinc
drop lp`last' lplincratio`last'
}
}
gen denominator=sum(_n/(`range'-_n))
local den=denominator in `range'
replace nlipercincidenceR`range'=liperc_incidence_licznikR`range'/`den'
replace nlipercincidenceR`range'=0 if totallipercR`range'==0
drop denominator
/*generating component vals for different alpha*/
local i 1
foreach fa in .1 .2 .3 .4 .5 .6 .7 .8 .9 {
replace naci_liperc`i'R`range'=`fa'*nlipercincidenceR`range'+(1-`fa')*liperc_recencyR`range'
local i= `i'+1
}
}
drop _spell _end _seq EN

///generating job terminations component aejt///
/*
The code generates aejt for any desired max_jt. It can be set at the code line indicated by "set maximum job terminations threshold (max_jt)". The default is 3 years
*/
macro drop _all
/*account for gaps*/
tsset pid syear
tsspell sex
sum _spell
local spell=r(max)
*set sequence length: insert desired sequence length(s) in the following line, after local seqlength ... (default is 5)
local seqlength 5
bysort pid _spell: egen EN=max(_seq)
foreach sl in `seqlength' {
local range=`sl'
gen jtpersistR`range'=.
*set maximum job terminations threshold (max_jt): insert desired threshold in the following line, after local jtnorm ... (default is 3)
local jtnorm 3
foreach jtn in `jtnorm' {
gen jt_recencyR`range'_`jtn'=.
gen jt_recencyR`range'_`jtn'_licznik=.
}
gen jtpersist_licznikR`range'=.
gen totaljtR`range'=.
*gen jtincR`range'=.
/*generating totaljt*/
sum _seq
local longest=r(max)
local bound=`longest'-`range'+1
forvalues aa=1/`bound'{
local last=`aa'+`sl'-1
bysort pid _spell: egen totaljtR=total(njobterm) if _seq>=`aa' & _seq<=`last'
replace totaljtR`range'=totaljtR if _seq==`last'
/*recencynew*/
gen lp`last'=.
local one=0
forvalues k=1/`sl'{
replace lp`last'=(1+ `one') if _seq==(`aa'+ `one')
local one=`one'+1
}
foreach jtn in `jtnorm' {
gen lpjt`last'=.
gen inc=njobterm/`jtn'
replace inc=1 if inc>1 & inc!=.
replace lpjt`last'=lp`last'*inc
bysort pid _spell: egen licznik=total(lpjt`last')
replace jt_recencyR`range'_`jtn'_licznik=licznik if _seq==`last'
drop lpjt`last' inc licznik
}
drop lp`last' totaljtR
}
forvalues z=1(1)9{
*gen aejt`z'R`range'=.
foreach jtn in `jtnorm' {
gen aejt`jtn'`z'R`range'=.
}
}

forvalues spl=1/`spell'{
*provide maximum observation scope (max individual trajectory length)
sum _seq if _spell==`spl'
local longest=r(max)
local bound=`longest'-`range'+1
forvalues aa=1/`bound'{
local last=`aa'+`sl'-1
quietly levelsof pid if njobterm>0 & _seq>=`aa' & _seq<=`last' & EN>=`last' & _spell==`spl', local(pidneet)
foreach l of local pidneet {
display `l'
levelsof syear if pid==`l' & njobterm>0 & _seq>=`aa' & _seq<=`last' & _spell==`spl', local(year)
local max=r(r)
local maxlessone=`max'-1
tokenize "`year'"
local sumpid=""
forvalues z=1/`maxlessone'{
local start `z'
local next=`z'+1
local sum ""
forvalues b=`next'/`max'{
local d``b''_``start''=((``b''-``start'')^-1)
local sum "`sum' + `d``b''_``start'''"
}
local sumpid="`sumpid' `sum'"
display `l'
}
replace jtpersist_licznikR`range'=0`sumpid' if pid==`l' & _seq==`last' & _spell==`spl'
}

}
}
gen denominator=sum(_n/(`range'-_n))
local den=denominator in `range'
replace jtpersistR`range'=jtpersist_licznikR`range'/`den'
replace jtpersistR`range'=0 if totaljtR`range'==0
drop denominator
/*denominator recencynew*/
gen denominator_recency=sum(_n)
local den=denominator_recency in `range'
foreach jtn in `jtnorm' {
replace jt_recencyR`range'_`jtn'=jt_recencyR`range'_`jtn'_licznik/`den'
replace jt_recencyR`range'_`jtn'=0 if totaljtR`range'==0
}
drop denominator_recency

/*generating component vals for different alpha*/
local i 1
foreach fa in .1 .2 .3 .4 .5 .6 .7 .8 .9 {
*replace aejt`i'R`range'=`fa'*jtpersistR`range'+(1-`fa')*jt_recencyR`range'old
foreach jtn in `jtnorm' {
replace aejt`jtn'`i'R`range'=`fa'*jtpersistR`range'+(1-`fa')*jt_recencyR`range'_`jtn'
}
local i=`i'+1
}
}
drop _spell _end _seq EN

///generating CNPI///
/*The researcher specifies components' weights and the alpha parameter
li_weight - low earnings weight
nw_weight - non-employment weight
jt_weight - job terminations weight
alpha - weight attributed to the persistence dimension (in range 1-9, corresponding to 0.1-0.9)
jtn - maximum job terminations threshold*/

*define sequence length
local seqlength 5

*define specification-specific weights
capture program drop _all

program define default
gen li_weight=0.33
gen nw_weight=0.33
gen jt_weight=0.33
gen alpha=5
gen jtn=3
end

program define adjusted
gen li_weight=0.25
gen nw_weight=0.25
gen jt_weight=0.5
gen alpha=5
gen jtn=3
end

local specification default adjusted
macro list

foreach sl in `seqlength' {
foreach spec in `specification' {
`spec'
gen cnpi_`spec'R`sl'=.
local alpha=alpha in 1
local jtn=jtn in 1
replace cnpi_`spec'R`sl'=li_weight*naci_liperc`alpha'R`sl'+nw_weight*naci_nw`alpha'R`sl'+jt_weight*aejt`jtn'`alpha'R`sl'
drop li_weight nw_weight jt_weight alpha jtn
}
}

*******************analyses
mkdir $path_repl\output

lab var aejt35R5 "Job terminations" 
lab var naci_nw5R5 "Non-employment" 
lab var naci_liperc5R5 "Low earnings"

log using $path_repl\output\log

****corrs components (Table 1)

corr aejt35R5 naci_nw5R5 naci_liperc5R5 cnpi_defaultR5 if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012 [aw=phrf]

log off
*******5-year Index distributions (Figure 2)
gen zero=1 if cnpi_defaultR5==0
replace zero=0 if zero==.
sum zero if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012 [aw=phrf]
local mean=round(r(mean),.01)*100
drop zero
kdensity cnpi_defaultR5 if totalemplR5>0 & totalemplR5!=. & cnpi_defaultR5>0 & age>=33 & age<=37 & syear>2012 /*
*/[aw=phrf], saving($path_repl\output\fig2) note("CNPI=0 for `mean'% of the population") title("Germany") /*
*/xtitle(CNPI) graphr(c(white))

********distributions components (Figure 3)
graph box naci_nw5R5 naci_liperc5R5 aejt35R5 if totalemplR5>0 & totalemplR5!=. & cnpi_adjustedR5>0 /*
*/& age>=33 & age<=37 & syear>2012 [pw=phrf], noout box(1, col(white) lc(black)) /*
*/box(2, col(gs10) lc(black)) box(3, col(gs5) lc(black)) graphr(c(white)) /*
*/leg(c(3) region(lc(white))) saving($path_repl\output\fig3)

****clusters (Figure 4)
foreach com in naci_nw5R5 naci_liperc5R5 aejt35R5 {
egen std`com'=std(`com') if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012
}
cluster wardslinkage stdnaci_nw5R5 stdnaci_liperc5R5 stdaejt35R5 if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012, measure (L2squared) name(wards_linkagestd)
cluster dendrogram wards_linkagestd, cutnumber(10) showcount
cluster generate AclusterAGE533_2012wzero= groups(5), name(wards_linkagestd)
tab AclusterAGE533_2012wzero [aw=phrf]
graph box naci_nw5R5 naci_liperc5R5 aejt35R5 cnpi_defaultR5 cnpi_adjustedR5, over(AclusterAGE533_2012wzero) noout saving($path_repl\output\fig4)
drop stdnaci_nw5R5 stdnaci_liperc5R5 stdaejt35R5

******weights persistence vs recency
*Figure 5
twoway kdensity naci_liperc1R5 if totalemplR5>0 & totalemplR5!=. & cnpi_adjustedR5>0 & age>=33 & age<=37 & syear>2012 [aw=phrf], lp(shortdash) lc(black) leg(lab(1 "alpha=0.1")) || kdensity naci_liperc9R5 if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012 [aw=phrf], lp(solid) lc(black) leg(lab(2 "alpha=0.9")) xtitle(Low earnings component) ytitle(Density) graphr(c(white)) title(USA) leg(region(lc(white)) symx(7)) scale(1.5) saving($path_repl\output\fig5)
*Figure B4
graph drop _all
twoway kdensity aejt31R5 if totalemplR5>0 & totalemplR5!=. & cnpi_adjustedR5>0 & age>=33 & age<=37 & syear>2012 [aw=phrf], lp(shortdash) || kdensity aejt39R5 if totalemplR5>0 & totalemplR5!=. & cnpi_adjustedR5>0 & age>=33 & age<=37 & syear>2012 [aw=phrf], lp(solid) xtitle(Job terminations) name(jt) graphr(c(white)) legend(order(1 "alpha=0.1" 2 "alpha=0.9") region(lc(white)))
twoway kdensity naci_nw1R5 if totalemplR5>0 & totalemplR5!=. & cnpi_adjustedR5>0 & age>=33 & age<=37 & syear>2012 [aw=phrf], lp(shortdash) || kdensity naci_nw9R5 if totalemplR5>0 & totalemplR5!=. & cnpi_adjustedR5>0 & age>=33 & age<=37 & syear>2012 [aw=phrf], lp(solid) xtitle(Non-employment) name(nw) graphr(c(white))
twoway kdensity naci_liperc1R5 if totalemplR5>0 & totalemplR5!=. & cnpi_adjustedR5>0 & age>=33 & age<=37 & syear>2012 [aw=phrf], lp(shortdash) || kdensity naci_liperc9R5 if totalemplR5>0 & totalemplR5!=. & cnpi_adjustedR5>0 & age>=33 & age<=37 & syear>2012 [aw=phrf], lp(solid) xtitle(Low earnings) name(li) graphr(c(white))
grc1leg2 jt li nw, graphr(c(white)) c(3) leg(jt) xsize(6) ysize(2) scale(1.5) saving($path_repl\output\figB4)

log on
******corrs weights (Table 2)
corr aejt31R5 aejt39R5 if totalemplR5>0 & totalemplR5!=. & cnpi_adjustedR5>0 & age>=33 & age<=37 & syear>2012 [aw=phrf]
corr naci_nw1R5 naci_nw9R5 if totalemplR5>0 & totalemplR5!=. & cnpi_adjustedR5>0 & age>=33 & age<=37 & syear>2012 [aw=phrf]
corr naci_liperc1R5 naci_liperc9R5 if totalemplR5>0 & totalemplR5!=. & cnpi_adjustedR5>0 & age>=33 & age<=37 & syear>2012 [aw=phrf]

log off
*********employment form in year 6 (Figure 6 & 7, B6, Table A5 & A9)

/*correlates*/
gen workstatus=.
lab def workstatus 0 "not working" 1 "fixed-term contract, w/o contract" 2 "permanent contract" 3 "self-employed"
lab val workstatus workstatus
replace workstatus=0 if plb0022_h==9
replace workstatus=1 if plb0037_h==2 | plb0037_h==3
replace workstatus=2 if plb0037_h==1
replace workstatus=3 if plb0057_h>0 & plb0057_h!=.

gen school=.
lab def school 0 "no school" 1 "haupt" 2 "real" 3 "FHr" 4 "Abitur" 5 "higher"
lab val school school
replace school=0 if pgpsbil>5
replace school=1 if pgpsbil==1
replace school=2 if pgpsbil==2
replace school=3 if pgpsbil==3
replace school=4 if pgpsbil==4
replace school=5 if pgpbbil02>0

gen marital=pgfamstd if pgfamstd>0
replace marital=1 if marital==7
replace marital=2 if marital==8
replace marital=1 if marital==6
lab def marital 1 "married" 2 "married, living apart" 3 "single" 4 "divorced" 5 "widowed"
lab val marital marital

/*models (Fig6, Fig7, FigB6 & Table A9)*/
xtset pid syear
gen fworkstatus=f.workstatus
twoway kdensity cnpi_adjustedR5 if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012 & fwork==0 [aw=phrf], lc(black) lp(solid) leg(lab(1 "not working")) || kdensity cnpi_adjustedR5 if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012 & fwork==1 [aw=phrf], lc(black) lp(shortdash) leg(lab(2 "fixed-term employment")) || kdensity cnpi_adjustedR5 if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012 & fwork==2 [aw=phrf], lc(black) lp(dash) leg(lab(3 "permanent contract")) xtitle(CNPI) ytitle(Density) saving($path_repl\output\fig6)
qui mlogit fwork cnpi_adjustedR5 age i.marital i.school i.sex if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012 [pw=phrf], cluster(pid)
est sto mlogit
estout mlogit using $path_repl\output\tableA9.txt, cells(b(star fmt(%9.3f)) se(par)) stats(N, fmt(%9.3f %9.0g))
qui margins, dydx(cnpi_adjustedR5) vce(unconditional) post
coefplot, xsize(4) ysize(2) scale(2.5) xline(0) graphr(c(white)) coeflabels(1._predict="Not employed" 2._predict="Fixed-term contract" 3._predict="Permanent contract") saving($path_repl\output\fig7)
*FigB6
mlogit fwork cnpi_defaultR5 age i.marital i.school i.sex if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012 [pw=phrf], cluster(pid)
margins, dydx(cnpi_defaultR5) vce(unconditional) post
coefplot, xsize(4) ysize(2) scale(2) xline(0) graphr(c(white)) coeflabels(1._predict="not employed" 2._predict="fixed-term contract" 3._predict="permanent contract") title(Germany) saving($path_repl\output\figB6)

log on
*descriptives on Vars (Table A5)
sum i.fwork cnpi_adjustedR5 age i.marital i.school i.sex if e(sample) [aw=phrf]

log off
*********employment form in year 1 (Figure 8 & 9, B7, Table A7 & A11)
xtset pid syear
gen l4workstatus=l4.workstatus

twoway kdensity cnpi_adjustedR5 if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012 & l4work==0 [aw=phrf], lc(black) lp(solid) leg(lab(1 "not employed")) || kdensity cnpi_adjustedR5 if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012 & l4work==1 [aw=phrf], lc(black) lp(shortdash) leg(lab(2 "fixed-term employment")) || kdensity cnpi_adjustedR5 if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012 & l4work==2 [aw=phrf], lc(black) lp(dash) leg(lab(3 "permanent contract") c(3) region(lc(white)) symx(7)) xtitle(CNPI) ytitle("") graphr(c(white)) saving($path_repl\output\fig8)

reg cnpi_adjustedR5 age i.marital i.school i.sex i.l4work if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012 [pw=phrf], cluster(pid)
est sto reg
margins i.l4work, vce(unconditional) post
coefplot, keep(0.l4workstatus 1.l4workstatus 2.l4workstatus) recast(bar) ylabel(1 "not employed" 2 "fixed-term contract" 3 "permanent contract") col(black%20) xsize(5.5) ysize(2) scale(3) graphr(c(white)) cire(rcap) saving($path_repl\output\fig9)
estout reg using $path_repl\output\tableA11.txt, cells(b(star fmt(%9.3f)) se(par)) stats(r2_a N, fmt(%9.3f %9.0g) labels(R-squared))
*FigB7
reg cnpi_defaultR5 age i.marital i.school i.sex i.l4work if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012 [pw=phrf], cluster(pid)
margins i.l4work, vce(unconditional) post
coefplot, keep(0.l4workstatus 1.l4workstatus 2.l4workstatus) recast(bar) ylabel(1 "not employed" 2 "fixed-term contract" 3 "permanent contract") col(black%20) xsize(5.5) ysize(2) scale(2) title(Germany) graphr(c(white)) cire(rcap) saving($path_repl\output\figB7)

log on
*descriptives on IVs (Table A7)
sum i.l4work cnpi_adjustedR5 age i.marital i.school i.sex if e(sample) [aw=phrf]

log off
*******CNPI across life course (Figure 10)
reg cnpi_adjustedR5 i.age if totalemplR5>0 & totalemplR5!=. & age>=25 & age<=67 [pw=phrf], cluster(pid)
margins i.age, vce(unconditional) atmeans
marginsplot, title(Germany) xtitle(Age) ytitle(CNPI) graphr(c(white)) recast(bar) plotop(col(gs10) lc(black)) xlabel(25(5)67) ciopts(lc(black)) saving($path_repl\output\fig10)

log on
*****Appendix
*N months in employment (Table A1)
tabstat totalemplR5 if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012 [aw=phrf], st(N mean sd p25 median p75)

log off
*N years with low earnings (Figure B2)
tab totallipercR5 if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012, gen(sth)
graph bar sth* if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012 [aw=phrf], ascat yvaroptions(relabel(1 "0" 2 "1" 3 "2" 4 "3" 5 "4" 6 "5")) title(Germany) saving($path_repl\output\figB2)
drop sth*

*N job terminations (Figure B1, Table A2)
xtset pid syear
gen totalnjobterm=l4.njobterm+l3.njobterm+l2.njobterm+l1.njobterm+njobterm
log on
*N job terminations (Table A2)
tabstat totalnjobterm if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012 [aw=phrf], st(N mean sd p25 median p75)
log off
tab totalnjobterm if totalemplR5>0 & totalemplR5!=. & age>=33 & age<=37 & syear>2012, gen(sth)
graph bar sth* if age>=33 & age<=37 & syear>2012 [aw=phrf], ascat yvaroptions(relabel(1 "0" 2 "1" 3 "2" 4 "3" 5 "4" 6 "5" 7 "6" 8 "7" 9 "8")) saving($path_repl\output\figB1)
drop sth*

*yearly njobterm (Table A3)
gen validseq=1 if totalemplR5>0 & totalemplR5!=. & cnpi_adjustedR5!=. & age>=33 & age<=37 & syear>2012
xtset pid syear
replace validseq=1 if f1.validseq==1 | f2.validseq==1 | f3.validseq==1 | f4.validseq==1

log on
*yearly njobterm (Table A3)
tab njobterm if validseq==1 [aw=phrf]
sum njobterm if validseq==1 [aw=phrf]
log off

drop validseq

*distribution of intensity of low earnings in years with low earnings in the sample of 
*sequences under analysis (Figure B3, Table A4)
gen validseq=1 if totalemplR5>0 & totalemplR5!=. & cnpi_adjustedR5!=. & age>=33 & age<=37 & syear>2012
xtset pid syear
replace validseq=1 if f1.validseq==1 | f2.validseq==1 | f3.validseq==1 | f4.validseq==1
gen distance=1-perc_linc

log on
*Table A4
tabstat distance if validseq==1 & liperc_low==1 [aw=phrf], st(N mean sd p25 median p75)
log off

kdensity distance if validseq==1 & liperc_low==1 [aw=phrf], title(Germany) xtitle("Distance to low earnings threshold") note("") saving($path_repl\output\figB3)




