*******************************************************
** JJ Created 06 / 05 / 2014 **************************
** JJ updated 01 / 03 / 2016 ***************************
*******************************************************

****************************************************************************
*--- This do-file recodes the key FATHER variables used in the analysis ---*
****************************************************************************

*********************************************************************
*STEP 1 - OPEN THE DATA *********************************************
*********************************************************************
use "C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA2\PSID_FINAL.dta" , clear		/*** Open the PSID dataset ***/


*********************************************************************
*STEP 2 - KEEP MEN ONLY *********************************************
*********************************************************************
keep if ER32000 == 1			

*********************************************************************
*STEP 3 - CREATE NEW LABOR FORCE VARIABLES **************************
*********************************************************************
*`1' = YEAR
*`2' = Variable indicating whether HH head or not
*`3' = Person's age
*`4' = Person's labour market income
*`5' = Sequence number

*See https://psidonline.isr.umich.edu/Guide/FAQ.aspx?Type=1
*See https://psidonline.isr.umich.edu/Guide/FileStructure.pdf

*A. Create new program that will create new variables
	*i. Drop any program already with this name
	capture program drop HHH_Recode													/*** Drop any pogram that already has this name ***/										

	*ii. Create a new program
	program HHH_Recode																/*** Create new program ***/
		*a. Create new variable for HH head
		gen Father_Head_`1' = 1  /// 												/*** New variable = whether father a HHH in given year ***/
			if   ///
			(`2' == 1 | `2' == 10)	///												/*** If they are identified as a head... ***/
			& ///
			`5' ==1																	/*** And their sequence number =1 (see documentation) ***/
	
		*b. Create new variable. Age of person in given year
		gen Age_`1' = `3' ///														/* `3' = Person's age in Year = `1' */
			if /// 
			`3' > 0 & `3' < 100														/* Only replace if a logical value  ***/
	
		*c. Create new variable for father's labour market income
		gen Father_Labor_Income_`1' = `4'  /// 										/* New var. Father labor income in given year ... */
			if ///
				`4' > 0 & `4' <9000000 ///											/* Labour market earnings non-zero and a plausible value ***/
				& ///
				Father_Head_`1' == 1 ///											/* For father's who were household heads only */
				& ///
				Age_`1' > 29 &  Age_`1' <61											/* And only when the father was aged between 30 and 60 in a given year */
	
		*d. Replace  father's labour income as missing if takes a value of 99,999
		/*
		replace Father_Labor_Income_`1' = . ///										/* Set to missing.... */
			if ///
				Father_Labor_Income_`1' > 99998 ///
				& ///
				Father_Labor_Income_`1' < 100000 
		*/
		
		*e. Create an alternative variable (using father's income recorded at any age) */
		gen Father_Labor_Income_ALT_`1' = `4'  /// 									/*** NOT RESTRICTION ON AGE! ***/
			if ///
				`4' > 0 & `4' <9000000 ///											/*** Only where earnings non-zero and plausible value ***/
				& ///
				Father_Head_`1' == 1 ///											/*** For father's who were HHH only ***/

		/*
		replace Father_Labor_Income_ALT_`1' = . ///
			if ///
			Father_Labor_Income_ALT_`1' > 99998 ///
			& ///
			Father_Labor_Income_ALT_`1' < 100000 
		*/
end

*B. Set sequence number equal to 1 for all observations
	gen SQ1968= 1

*C. Run program created above
	*i.1968
	HHH_Recode  1968  ER30003  ER30004  V74 	 SQ1968								/*** 1968 ***/

	*ii.1969	
	HHH_Recode  1969  ER30022  ER30023  V514	 ER30021							/*** 1969 ***/

	*iii.1970	
	HHH_Recode  1970  ER30045  ER30046  V1196	 ER30044							/*** 1970 ***/

	*iv. 1971
	HHH_Recode  1971  ER30069  ER30070  V1897	 ER30068							/*** 1971 ***/

	*v. 1972	
	HHH_Recode  1972  ER30093  ER30094  V2498	 ER30092							/*** 1972 ***/

	*vi. 1973	
	HHH_Recode  1973  ER30119  ER30120  V3051	 ER30118							/*** 1973 ***/

	*vii. 1974	
	HHH_Recode  1974  ER30140  ER30141  V3463	 ER30139							/*** 1974 ***/

	*viii. 1975	
	HHH_Recode  1975  ER30162  ER30163  V3863	 ER30161							/*** 1975 ***/

	*ix. 1976	
	HHH_Recode  1976  ER30190  ER30191  V5031	 ER30189							/*** 1976 ***/

	*x. 1977	
	HHH_Recode  1977  ER30219  ER30220  V5627	 ER30218							/*** 1977 ***/

	*xi. 1978	
	HHH_Recode  1978  ER30248  ER30249  V6174	 ER30247							/*** 1978 ***/

	*xii. 1979	
	HHH_Recode  1979  ER30285  ER30286  V6767	 ER30284							/*** 1979 ***/

	*xiii. 1980
	HHH_Recode  1980  ER30315  ER30316  V7413	 ER30314							/*** 1980 ***/

	*xiv. 1981
	HHH_Recode  1981  ER30345  ER30346  V8066	 ER30344							/*** 1981 ***/

	*xv. 1982
	HHH_Recode  1982  ER30375  ER30376  V8690	 ER30374							/*** 1982 ***/

	*xvi. 1983
	HHH_Recode  1983  ER30401  ER30402  V9376	 ER30400							/*** 1983 ***/

	*xvii. 1984
	HHH_Recode  1984  ER30431  ER30432  V11023	 ER30430							/*** 1984 ***/

	*xviii. 1985
	HHH_Recode  1985  ER30465  ER30466  V12372	 ER30464							/*** 1985 ***/

	*xix. 1986
	HHH_Recode  1986  ER30500  ER30501  V13624	 ER30499							/*** 1986 ***/

	*xx. 1987
	HHH_Recode  1987  ER30537  ER30538  V14671	 ER30536							/*** 1987 ***/

	*xxi. 1988
	HHH_Recode  1988  ER30572  ER30573  V16145	 ER30571							/*** 1988 ***/

	*xxii. 1989
	HHH_Recode  1989  ER30608  ER30609  V17534	 ER30607							/*** 1989 ***/

	*xxiii. 1990
	HHH_Recode  1990  ER30644  ER30645  V18878	 ER30643							/*** 1990 ***/

	*xiv. 1991
	HHH_Recode  1991  ER30691  ER30692  V20178	 ER30690							/*** 1991 ***/

	*xv. 1992
	HHH_Recode  1992  ER30735  ER30736  V21484	 ER30734							/*** 1992 ***/

	*xvi. 1993
	HHH_Recode  1993  ER30808  ER30809  V23323	 ER30807							/*** 1993 ***/

	*xvii. 1994
	HHH_Recode  1994  ER33103  ER33104  ER4140	 ER33102							/*** 1994 ***/

	*xviii. 1995
	HHH_Recode  1995  ER33203  ER33204  ER6980	 ER33202							/*** 1995 ***/

	*xxix. 1996
	HHH_Recode  1996  ER33303  ER33304  ER9231	 ER33302							/*** 1996 ***/

	*xxx. 1997
	HHH_Recode  1997  ER33403  ER33404  ER12080	 ER33402							/*** 1997 ***/

	*xxxi. 1999
	HHH_Recode  1999  ER33503  ER33504  ER16463	 ER33502							/*** 1999 ***/

	*xxxii. 2001
	HHH_Recode  2001  ER33603  ER33604  ER20443	 ER33602							/*** 2001 ***/

	*xxxiii. 2003	
	HHH_Recode  2003  ER33703  ER33704  ER24116	 ER33702							/*** 2003 ***/

	*xxxiv. 2005	
	HHH_Recode  2005  ER33803  ER33804  ER27931	 ER33802							/*** 2005 ***/

	*xxxv. 2007	
	HHH_Recode  2007  ER33903  ER33904  ER40921	 ER33902							/*** 2007 ***/

	*xxxvi. 2009	
	HHH_Recode  2009  ER34003  ER34004  ER46829	 ER34002							/*** 2009 ***/

	*xxxvii. 2011	
	HHH_Recode  2011  ER34103  ER34104  ER52237	 ER34102							/*** 2011 ***/


*********************************************************************
*STEP 4 - INFLATE ALL EARNINGS TO CURRENT PRICES ********************
*********************************************************************
*4A. Create new variable containing US inflation rate each year since 1968
	gen Inflation_1968 = 1.042

	gen Inflation_1969 = 1.055

	gen Inflation_1970 = 1.057

	gen Inflation_1971 = 1.044

	gen Inflation_1972 = 1.032

	gen Inflation_1973 = 1.062

	gen Inflation_1974 = 1.11

	gen Inflation_1975 = 1.091

	gen Inflation_1976 = 1.058

	gen Inflation_1977 = 1.065  

	gen Inflation_1978 = 1.076

	gen Inflation_1979 = 1.113

	gen Inflation_1980 = 1.135

	gen Inflation_1981 = 1.103

	gen Inflation_1982 = 1.062 

	gen Inflation_1983 = 1.032 

	gen Inflation_1984 = 1.043 

	gen Inflation_1985 = 1.036 

	gen Inflation_1986 = 1.019  

	gen Inflation_1987 = 1.036 

	gen Inflation_1988 = 1.041 

	gen Inflation_1989 = 1.048 

	gen Inflation_1990 = 1.054 

	gen Inflation_1991 = 1.042 

	gen Inflation_1992 = 1.030 

	gen Inflation_1993 = 1.030 

	gen Inflation_1994 = 1.026 

	gen Inflation_1995 = 1.028 

	gen Inflation_1996 = 1.030 

	gen Inflation_1997 = 1.023 

	gen Inflation_1998 = 1.016 

	gen Inflation_1999 = 1.022 

	gen Inflation_2000 = 1.034 

	gen Inflation_2001 = 1.028 

	gen Inflation_2002 = 1.016 

	gen Inflation_2003 = 1.023 

	gen Inflation_2004 = 1.027 

	gen Inflation_2005 = 1.034 

	gen Inflation_2006 = 1.032 

	gen Inflation_2007 = 1.028 

	gen Inflation_2008 = 1.038 

	gen Inflation_2009 = 0.996 

	gen Inflation_2010 = 1.016 

	gen Inflation_2011 = 1.032 

*4B. Create a local maco containing all years where labor income available
*NOTE: From 1997 PSID became bi-annual
	global psid_years ///
		1968  1969 ///																/* 1960s */
		1970  1971  1972  1973  1974  1975  1976  1977  1978  1979 ///				/* 1970s */
		1980  1981  1982  1983  1984  1985  1986  1987  1988  1989 ///				/* 1980s */
		1990  1991  1992  1993  1994  1995  1996  1997  1999 ///					/* 1990s */
		2001  2003  2005  2007  2009  2011											/* 2000s */
	
*C. Create new program that will adjust for inflation
*NOTE: Need to use a program to take account for PSID becoming bi-annual since 1997
*      `1' = Start year
*	   `2' = Interval
*	   `3' = End year

	*i. Drop any program that already exists with this name
	capture ///																		/* Ignore command if no program with this name exists */
		program drop ///															/* Drop any program that already exists with this name */
		inflation_recode	

	*ii. Create new program to adjust for inflation
	program inflation_recode														/* Name of the new program */
		forvalues yrs = `1'(`2')`3'{												/*** Loop over every year between `1' and `3'  ***/
			forvalues i= `yrs'(1)2011 {												/*** Loop over every year given in the loop above to 2011 (note this goes to 2011 as inflation data available for every year - not biannual like PSID) ***/
				replace Father_Labor_Income_`yrs' = ///								/*** Replace father's labor income in the given year... **/
				Father_Labor_Income_`yrs' * Inflation_`i'							/*** With father's income * the inflation rate ***/

				replace Father_Labor_Income_ALT_`yrs' = ///							/*** Replace father's labor income in the given year... **/
				Father_Labor_Income_ALT_`yrs' * Inflation_`i'				
			}																		/*** Close the `i' loop ***/
		}																			/*** Close the 'yrs' loop ***/
end

*D. Run program above for 1968 - 1997 waves where PSID annual
	inflation_recode 1968 1 1997													/*** Loop over every year between 1968 and 1997 (when psid annual) ***/

*E. Run program above for 1999 - 2011 waves where PSID bi-annual
	inflation_recode 1999 2 2011													/*** Loop over every other year between 1999 and 2011 (psid bi-annual) ***/
			
*********************************************************************
*STEP 5 - CREATE PERMANENT EARNINGS MEASURE *************************
*********************************************************************				
	*A. Create Permanent labour earnings 
	egen ///																		/*** Generate new variable ***/
		Father_Perm_Labor_Earnings ///												/*** With this variable name ***/
			= ///																	
			rowmean(Father_Labor_Income_*) 											/*** Equal to average earnings of fathers across all years where data available ***/
		
	*B. Take the log of this variable
	gen ///
		Log_Father_Perm_Labor_Earnings ///
			= ///
			log(Father_Perm_Labor_Earnings)


	*C. Get number of observations used to create this permanent measure
	egen ///																		/*** Generate new variable ***/
		N_Perm_Labor_Earnings ///
			= ///																	/*** With this variable name ***/
			rownonmiss(Father_Labor_Income_1* Father_Labor_Income_2*) 				/*** Equal to the denominator of the mean calculation in A above ***/

		
*********************************************************************
*STEP 6 - CREATE SINGLE-SHOT MEASURE ********************************
*********************************************************************		
	*A. Create single-shot earnings measure when fathers age 40	
		*i. New variable. Set to missing by default
		gen Father_Age40_Labor_Earnings = .											/*** Create new variabl. Set equal to missing by default ***/

		*ii. New variable for fathers 'actual' age for the age 40 measure
		*NOTE: This is to reflect the fact that some fathers won't actually be 40
		*      when we record the data (due to missing data etc). We are going to
		*      use data from the closest available age.
		gen Actual_Age_For_Age40_Measure = .										/*** New var. Will capture ACTUAL age used for the "age 40" measure ***/

	*B. Create new program that will do the re-coding
		*i. Drop any program that already exists with this name
		capture ///																	/*** Don't run command if program does not already exist ***/
			program drop ///														/*** Drop any program already with this name ***/
			income_age40											
		
		*ii. Create new program
		program income_age40														/*** Create new program ***/
			*a. Loop over the macro defined above
			foreach yrs of global psid_years {										/*** Loop over global macro defined above ***/		

				*b. Replace value for the actual age variable
				replace Actual_Age_For_Age40_Measure = `1' ///						/*** Replace the "actual age" variable for their actual age.. ***/
					if ///
					Age_`yrs' == `1' ///											/*** If their age in that year is equal to `1' in the program ***/
					&  ///
					Father_Age40_Labor_Earnings == .								/*** ... and their earnings not already assigned for father age 40 earnings ***/
	
				*c. Replace the father's age 40 earnings variable
				replace Father_Age40_Labor_Earnings ///								/*** Replace the age 40 earnings variable... ***/
					= ///
					Father_Labor_Income_`yrs' ///									/***... with father's earnings in a given year ***/
					if ///
					Age_`yrs' == `1' ///											/*** If their age in that year is equal to `1' in the program ***/
					&  ///
					Father_Age40_Labor_Earnings == .								/*** ... and a value not already been assigned for father age 40 earnings ***/
			}																		/* CLOSE THE LOOP */
end


	*C. Run program above.
	*NOTE - make sure keep in this order.
	*Want to replace with father earnings at age 40 where possible
	*Where not possible, use age 41 instead....
	*Where still not possible, use age 39 instead....
	*... ETC ETC
		*i. Replace with father's earnings at age 40 if possible.....
		income_age40  40 

		*ii. ... if not then with earnings at age 41	
		income_age40  41 

		*. ... if not then with earnings at age 39			
		income_age40  39 

		*. ... if not then with earnings at age 42					
		income_age40  42 

		*. ... if not then with earnings at age 38					
		income_age40  38 

		*. ... if not then with earnings at age 43					
		income_age40  43 

		*. ... if not then with earnings at age 37			
		income_age40  37 

		*. ... if not then with earnings at age 44					
		income_age40  44 

		*. ... if not then with earnings at age 36					
		income_age40  36

		*. ... if not then with earnings at age 45					
		income_age40  45 

		*. ... if not then with earnings at age 35					
		income_age40  35 

		*. ... if not then with earnings at age 46					
		income_age40  46 

		*. ... if not then with earnings at age 47					
		income_age40  47 

		*. ... if not then with earnings at age 48					
		income_age40  48 

		*. ... if not then with earnings at age 49					
		income_age40  49 
		
		*. ... if not then with earnings at age 50					
		income_age40  50 

		*. ... if not then with earnings at age 51					
		income_age40  51 

		*. ... if not then with earnings at age 52					
		income_age40  52 

		*. ... if not then with earnings at age 53					
		income_age40  53 

		*. ... if not then with earnings at age 54					
		income_age40  54 

		*. ... if not then with earnings at age 55					
		income_age40  55 

		*. ... if not then with earnings at age 56					
		income_age40  56 
		
		*. ... if not then with earnings at age 57					
		income_age40  57 

		*. ... if not then with earnings at age 58					
		income_age40  58 

		*. ... if not then with earnings at age 59					
		income_age40  59 

	*D. Take the log of this variable
	gen ///																			/* Create new variable */
		Log_Father_Age40_Labor_Earnings ///											/* Take value of log fathers earnings... */
			= log(Father_Age40_Labor_Earnings)										

/*
*********************************************************************
*STEP 7 - CREATE 5-YEAR MEASURE *************************************
*********************************************************************	
	*7A. Drop any program with this name
	capture ///
		program drop ///
		five_year
		
	*7B. Create new program to create the 5-year father earnings measure
	*`1' = Time point (early or mid in the decade)
	*`2' = Which decade (e.g. 70s)
	*`3' = Starting year of the 5 year period
	*`4' = End year of the 5 year period.
		*i. Create the new program
		program five_year
			
			*ii. Create new variable for father's earnings
			gen Father_Earnings_`1'_`2's = .
			
			*iii. Looping between the lower and upper years....
			forvalues i = `3'(1)`4' {												/* Looping betweent the values in `3' and `4' */
					replace Father_Earnings_`1'_`2's ///							/* Replace the father's earnings variables */
						= Father_Labor_Income_`i' ///								/* with the father's earnings in Year `i' */
							if ///
							Father_Earnings_`1'_`2's == .
		}
end

five_year "Early" 70 1968 1972
five_year "Mid" 70 1973 1977
five_year "Early" 80 1978 1982
five_year "Mid" 80 1983 1987
five_year "Early" 90  1988  1992 
five_year "Mid" 90 	1993  1997
*/

/*	
*A. Create new varibale. Will eventually include father earnings at a given age
* Set to missing by default
forvalues age = 35(1)55 {
	gen Labor_Earnings_Father_`age' = .												/*** Create new variabl. Set equal to missing by default ***/
	}

*B. Replace with father's earnings at a given age	
forvalues age = 35(1)55 {
	foreach yrs of global psid_years {												/*** Loop over global macro defined above ***/		
		replace Labor_Earnings_Father_`age' ///										/*** Replace the earnings variable at that age... ***/
			= ///
			Father_Labor_Income_`yrs' ///											/***... with father's earnings in a given year ***/
				if ///
				Age_`yrs' == `age' 													/*** If they are given `age' in given `yrs' (year)  ***/
			}
		}

*C. Create five year measure			
gen Father_5yr_Labor_Earnings = .
capture program drop income_five_yr
program income_five_yr
	egen ///
		`1'_`2'_`3'_`4'_`5'_`6' ///
		=  ///
		`1' ///
			( ///
			Labor_Earnings_Father_`2' ///
			Labor_Earnings_Father_`3' /// 
			Labor_Earnings_Father_`4' ///
			Labor_Earnings_Father_`5' ///
			Labor_Earnings_Father_`6') 		
			
	capture replace Father_5yr_Labor_Earnings = ///
	rowmean_35_36_`4'_`5'_`6' ///
		if ///
		rownonmiss_35_36_`4'_`5'_`6' ==5 ///
		& ///
		Father_5yr_Labor_Earnings ==.
		
	capture drop rowmean_35_36_`4'_`5'_`6'	
end

*forvalues age2 = 35(1) 55 {
*	forvalues age3 = 36(1) 55 {
		forvalues age4 = 37(1) 55 {
			forvalues age5 = 38(1) 55 {
				forvalues age6 = 39(1) 55 {
					income_five_yr  rownonmiss 35  36  `age4' `age5' `age6'  				
					income_five_yr  rowmean  35  36  `age4' `age5' `age6' 
					}
				}	
			}
*		}
*	}
	
	
*/	
	
*********************************************************************
*STEP 8 - EDUCATION  ************************************************
*********************************************************************
*SEE Data_4 download for more info on which variables to use.
*Create variable - father's reports of own education
*A. Global macro containing the information from different waves ....
global education ///
	ER30010	 	 ///																/*** 1968 ***/	
	ER30052 	 ///																/*** 1970 ***/	
	ER30076		 ///																/*** 1971 ***/	
	ER30100		 ///																/*** 1972 ***/	
	ER30126		 ///																/*** 1973 ***/	
	ER30147		 ///																/*** 1974 ***/	
	ER30169		 ///																/*** 1975 ***/	
	ER30197		 ///																/*** 1976 ***/	
	ER30226		 ///																/*** 1977 ***/	
	ER30255		 ///																/*** 1978 ***/	
	ER30296		 ///																/*** 1979 ***/	
	ER30326		 ///																/*** 1980 ***/	
	ER30356		 ///																/*** 1981 ***/	
	ER30384		 ///																/*** 1982 ***/	
	ER30413		 ///																/*** 1983 ***/	
	ER30443		 ///																/*** 1984 ***/	
	ER30478		 ///																/*** 1985 ***/	
	ER30513		 ///																/*** 1986 ***/	
	ER30549		 ///																/*** 1987 ***/	
	ER30584		 ///																/*** 1988 ***/	
	ER30620		 ///																/*** 1989 ***/	
	ER30657		 ///																/*** 1990 ***/	
	ER30703		 ///																/*** 1991 ***/
	ER30748		 ///																/*** 1992 ***/	
	ER30820		 ///																/*** 1993 ***/
	ER33115		 ///																/*** 1994 ***/
	ER33215		 ///																/*** 1995 ***/	
	ER33315		 ///																/*** 1996 ***/	
	ER33415		 ///																/*** 1997 ***/	
	ER33516		 ///																/*** 1999 ***/	
	ER33616		 ///																/*** 2001 ***/	
	ER33716		 ///																/*** 2003 ***/	
	ER33817		 ///																/*** 2005 ***/	
	ER33917		 ///																/*** 2007 ***/	
	ER34020		 ///																/*** 2009 ***/	
	ER34119		 																	/*** 2011 ***/	

*B. Loop over and replace illogical values with missing	
foreach i of global education {														/*** For each of the variables in the macro above... ***/
		replace `i' = . if `i' == 0													/*** Replace it with missing if its equal to 0 ... ***/
		replace `i' = . if `i' > 35													/*** ... or if it is above 35 years of schooling (illogical) ***/
		}																			/* CLOSE THE LOOP */

*C. Create new variable containing father's education
* Note: Doing this way ensures we capture parents highest education level
gen Father_Ed = -9																	/*** Set to -9 by default ***/
foreach i of global education {														/*** Looping over all the variables above ... ***/
	replace Father_Ed = `i' ///														/*** Replace father education variable with value in i ***/
		if ///																		/*** if.... ***/
		`i'	!=. ///																	/*** i not missing ***/
		& `i' > Father_Ed															/*** and i is above what was in father ed previously ... ***/
		}
		
replace Father_Ed = . if Father_Ed == -9											/*** Set to missing of still equal to -9 ***/
tab  Father_Ed																		/*** Take a look at the variable ***/
		
*D. Create new father education variable so categories are consistent
* with the child reported variable
	
	*i. Create new variable. Misssing by default.
	gen Father_Report_Father_Ed = .													/*** Set equal to missing by default ***/

	*ii. Zero grades of schooling
	replace Father_Report_Father_Ed = 0 if Father_Ed == 0							/*** = 0 grades of schooling ***/

	*iii. 1 to 5 grades of schooling
	replace Father_Report_Father_Ed = 1 ///
		if ///
		Father_Ed > 0 & Father_Ed < 6												/*** = 1 to 5 grades of schooling ***/
		
	*iv. 6 to 8 grades of schooling
	replace Father_Report_Father_Ed = 2 ///
		if ///
		Father_Ed > 5 & Father_Ed < 9												/*** = 6 to 8 grades of schooling ***/

	*v. 9 to 11 grades of schooling
	replace Father_Report_Father_Ed = 3 ///
		if /// 
		Father_Ed > 8 & Father_Ed < 12												/*** = 9 to 11 grades of schooling ***/

	*vi. 12 grades of schooling
	replace Father_Report_Father_Ed = 4 /// 
		if ///
		Father_Ed == 12																/*** = 12 grades of schooling ***/

	*vii. 13 to 15 grades of schooling
	replace Father_Report_Father_Ed = 5 ///
		if /// 
		Father_Ed > 12 & Father_Ed < 16												/*** Set equal to 13, 14 or 15 years of schooling ***/

	*viii. 16 grades of schooling
	replace Father_Report_Father_Ed = 6 ///
		if /// 
		Father_Ed == 16																/*** Set equal to 16 years of schooling ***/

	*ix. 17 grades of schooling
	replace Father_Report_Father_Ed = 7 /// 
		if ///
		Father_Ed == 17																/*** Set equal to 17 years of schooling ***/

*E. Define label values
label define ///																	/*** Define new labels ***/
	Father_Report_Father_Ed ///
	0 "No Education"  ///															
	1 "Grades 1 - 5"  ///															
	2 "Grades 6 - 8"  ///															
	3 "Grades 9 - 11"  ///															
	4 "Grade 12 (HS completion)"  ///												
	5 "Some college / associates degree"  ///										
	6 "College degree"  ///															
	7 "Advanced college degree"  																									

*F. Assign labels to the variable
label values ///																	/*** Assign labels to the variable ***/
	Father_Report_Father_Ed /// 
	Father_Report_Father_Ed
	
*G. Cross-tabulate to check that recoded variable looks ok	
tab Father_Report_Father_Ed															/*** Take a look at the new variable ***/	
tab Father_Ed Father_Report_Father_Ed												/*** Look at the x-tab with the original variable ***/ 	
	
*********************************************************************
*STEP 9 - OCCUPATION AND INDUSTRY  **********************************
*********************************************************************
*Want to know father occ when child was growing up
*Use father occ when offspring age 15....		
*So below, Create new variable that records father occ in a given year
*After merging with child file, use this to identify the occupation father had
*when offspring was age 15......

*A. Create new program - generates new variable for father occ in each year
* `1' = Year
* `2' = Variable identifying heads occupation
	
	*i. Drop any program that already exists with this name
	capture ///
		program drop ///
		occ_recode			

	*ii. Create program to recode father occupation and industry
	program occ_recode
		gen Father_Report_Father_Occ_`1' = `2'
		gen Father_Report_Father_Ind_`1' = `3'
	end

	*iii. Run this program
	*Note: Don't run for 2003 to 2011 data. Uses a different occupational coding system
	*occ_recode  2011  ER47479  ER47480   												/*** Don't use this. Different occupational coding scheme used ***/
	*occ_recode  2009  ER42167  ER42168   
	*occ_recode  2007  ER36132  ER36133  
	*occ_recode  2005  ER25127  ER25128
	*occ_recode  2003  ER21145  ER21146  
		
	occ_recode  2001  ER17226  ER17227  
	occ_recode  1999  ER13215  ER13216  
	occ_recode  1997  ER12085  ER12086 
	occ_recode  1996  ER9108  ER9109   
	occ_recode  1995  ER6857  ER6858
	occ_recode  1994  ER4017  ER4018
	occ_recode  1993  V22456  V22457
	occ_recode  1992  V20701  V20702
	occ_recode  1991  V19401  V19402
	occ_recode  1990  V18101  V18102
	occ_recode  1989  V16663  V16664
	occ_recode  1988  V15162  V15163
	occ_recode  1987  V14154  V14155
	occ_recode  1986  V13054  V13055
	occ_recode  1985  V11651  V11652
	occ_recode  1984  V10460  V10461
	occ_recode  1983  V9011   V9012
	occ_recode  1982  V8380   V8381  
	occ_recode  1981  V7712  V8381													/*** Industry variable missing. SO use value from 1982 instead ***/
	occ_recode  1980  V7100_A  V7101_A  
	occ_recode  1979  V6497_A  V6498_A 
	occ_recode  1978  V5873_A  V5874_A   
	occ_recode  1977  V5374_A  V5375_A 
	occ_recode  1976  V4459_A  V4460_A  
	occ_recode  1975  V3968_A  V3969_A   
	occ_recode  1974  V3530_A  V3531_A  	
	occ_recode  1973  V3115_A  V3116_A  
	occ_recode 	1972  V2582_A  V2583_A  
	occ_recode 	1971  V1984_A  V1985_A  
	occ_recode 	1970  V1279_A  V1279_B  
	occ_recode 	1969  V640_A  V640_B 
	occ_recode 	1968  V197_A  V197_B

*********************************************************************
*STEP 10 - FATHER RACE **********************************************
*********************************************************************
	*10A. Create new global macro
	global ///
		father_race ///
			ER51904 ///
			ER46543 ///
			ER40565 ///
			ER27393 ///
			ER23426 ///
			ER19989 ///
			ER15928 ///
			ER11848 ///
			ER9060 ///
			ER6814 ///
			ER3944 ///
			V23276 ///
			V21420 ///
			V20114 ///
			V18814 ///
			V17483 ///
			V16086 ///
			V14612 ///
			V13565 ///
			V11938 
	
	*10B. Create new variable for father's race
	gen Father_Race = .
	
	*10C. Replace the values in the father race variable
	foreach i of global father_race {												/* Looping over each variable in the macro above.... */
		replace Father_Race = `i' ///												/* Set variable equal to `i' */
			if ///
			Father_Race ==.															/* If fathers race is missing */
	}																				/* CLOSE THE LOOP */

	*10D. Set father race to missing if it takes a value of 9
	replace Father_Race = 7 ///
		if ///
		Father_Race ==9

	*10E. If father race still unknown, use value from 1984 (when coding was slightly different
		*i. Coded at 'White' in 1984
		replace Father_Race = 1 ///													/
			if ///
			Father_Race == . ///
			& ///
			V11055 ==1							

		*ii. Coded as 'Black' in 1984
		replace Father_Race = 2 ///
			if ///
			Father_Race == . ///
			& ///
			V11055 ==2

		*iii. Coded as Spanish in 1984
		replace Father_Race = 7 /// 												/* Set to unknown / other */
			if ///
			Father_Race == . ///
			& ///
			V11055 ==3

		*iv. Race as 'other' in 1984
		replace Father_Race = 7 ///
			if /// 
			Father_Race == . /// 
			& ///
			V11055 ==7

		*v. Set to 'unknown' if takes a values of 0
		replace Father_Race = 7 ///
			if ///
			Father_Race == 0

		*vi. Set to 'unknown' if takes a values of 6			
		replace Father_Race = 7 ///
			if ///
			Father_Race == 6

		*vii. Set to 'unknown' if takes a values of 8 (mentioned more than 2 different races)
		*i.e. Michael Jackson......
		replace Father_Race = 7 ///
			if /// 
			Father_Race == 8

		*viii.	Set to unknown if still missing!
		replace Father_Race = 7 if Father_Race == .

	*10F. Define labels for variable
	label define ////
		Race ///
		1 "White" ///
		2 "Black" ///
		3 "American Indian" ///
		4 "Asian" ///
		5 "Pacific Islander" ///
		7 "Other / Unknown"

	*10G. Assign labels to variable
	label values ///
		Father_Race ///
		Race	
	
	*10H. Take a look at the distribution of the father race variable
	tab Father_Race 

*********************************************************************
*STEP 11 - FATHER AGE 2011 ******************************************
*********************************************************************
	*11A. Father's age in 2011
	gen Father_Age_2011 = ER34104


	
	
*********************************************************************
*STEP 12 - FATHER CROSS-SECTIONAL INCOME ****************************
*********************************************************************
*Get father cross-sectional income in 1980, or closest possible year	

*A. New variable. Father age when cross-sectional age recorded.
* Set to 1980 value by default (if it is available)
gen Father_Cross_Section_Age = Age_1980 ///
	if ///																			/*** New variable. Father age when cross-section wage recorded ***/
	Father_Labor_Income_ALT_1980 !=.												/*** Set to 1980 by default (as long as not missing) ***/

	
*B. New variable. Cross-sectional wage of the father.
* Set to 1980 value by default (if it is available)	
gen Father_Cross_Section_Wage = ///													/*** New variable. Cross-section wage in 1980 or closest year ***/
	Father_Labor_Income_ALT_1980 ///												/*** Set to 1980 value by default ***/
		if /// 
		Father_Labor_Income_ALT_1980 !=.											/* As long as information not missing */

		
*C. New variable. Cross-sectional OCC of the father.
* Set to 1980 value by default (if it is available)	
gen Father_Cross_Section_Occ = ///													/*** New variable. Cross-section wage in 1980 or closest year ***/
	Father_Report_Father_Occ_1980 ///												/*** Set to 1980 value by default ***/
		if /// 
		Father_Labor_Income_ALT_1980 !=.											/* ... as long as information not missing */
		

*D. New variable. Cross-sectional IND of the father.
* Set to 1980 value by default (if it is available)	
gen Father_Cross_Section_Ind = ///													/*** New variable. Cross-section wage in 1980 or closest year ***/
	Father_Report_Father_Ind_1980 ///												/*** Set to 1980 value by default ***/
		if /// 
		Father_Labor_Income_ALT_1980 !=.											/* ... as long as information not missing */
				
		

*E. Replace with a value between 1975 and 1992 if missing		
forvalues i = 1975(1)1992 {															/*** Loop between 1975 and 1992 ***/
	replace Father_Cross_Section_Age = ///											/*** Replace father cross-sectional age... ***/
	Age_`i' ///																		/*** ... with fathers age in year i ***/
		if ///
		Father_Cross_Section_Wage == .	///											/*** If they don't have anything yet in the cross-section wage variable ***/
		& ///
		Father_Labor_Income_ALT_`i' !=.												/*** ... But they do now for this particular year ***/

		
	replace Father_Cross_Section_Occ = ///											/*** Replace father cross-sectional age... ***/
	Father_Report_Father_Occ_`i' ///												/*** ... with fathers age in year i ***/
		if ///
		Father_Cross_Section_Wage == .	///											/*** If they don't have anything yet in the cross-section wage variable ***/
		& ///
		Father_Labor_Income_ALT_`i' !=.												/*** ... But they do now for this particular year ***/
	
		
	replace Father_Cross_Section_Ind = ///											/*** Replace father cross-sectional age... ***/
	Father_Report_Father_Ind_`i' ///												/*** ... with fathers age in year i ***/
		if ///
		Father_Cross_Section_Wage == .	///											/*** If they don't have anything yet in the cross-section wage variable ***/
		& ///
		Father_Labor_Income_ALT_`i' !=.												/*** ... But they do now for this particular year ***/
			
		
	replace Father_Cross_Section_Wage = ///											/*** Replace father's cross-sectional wage... ***/
	Father_Labor_Income_ALT_`i' ///													/*** ...equal to father labor income in year i ***/
		if ///
		Father_Cross_Section_Wage == .												/***... if there is nothing in there already... ***/
		}
		
*F. Take log earnings 
gen Log_Father_Cross_Section_Wage ///
	=log(Father_Cross_Section_Wage)


*G. New variable. As the above. But creating age dummy variables.....
	*i. Create new variable. Set to missing by default
	gen Father_Cross_Section_Age_Dummies = .										/*** Create the new variable ***/	
	
	*ii. Drop any program that already exists with this name
	capture  ///
		program drop ///
		agechange																	/*** Drop any program already with this name ***/

	*iii. Create new program to replace the variables
	*`1' = Value of the new dummy variable
	*`2' = Lower bound of age group
	*`3' = Upper bound of age group
	program agechange																/*** Create this as a new program ***/
		replace Father_Cross_Section_Age_Dummies = `1' ///							/*** `1' = value new variable to take ***/
			if ///
			Father_Cross_Section_Age > `2' ///										/*** `2' = If father age in 1985 above this value ***/
			& ///
			Father_Cross_Section_Age < `3'											/*** `3' = And father age i 1985 below this value ***/
	end																				/*** END THE PROGRAM */

	*iv. Run the program above to recode the data
	agechange 1 17 26	
	agechange 2 25 31
	agechange 3 30 36
	agechange 4 35 41
	agechange 5 40 46
	agechange 6 45 51
	agechange 7 50 56
	agechange 8 55 61
	agechange 9 60 66

	*v. Ser 40 to 46 year olds as reference group when using this measure
	replace Father_Cross_Section_Age_Dummies = 0 ///								/*** Set to 0 (reference group) if .... ***/
		if ///
		Father_Cross_Section_Age_Dummies == 5										/*** Father age group = 5 (40 to 45) ***/

	*vi. Define variable labels
	label define ///																/*** Define the labels for the new variable ***/
		Father_Cross_Section_Age_Dummies ///
		0 "41 to 45" ///
		1 "18 to 25" ///
		2 "26 to 30" ///
		3 "31 to 35" ///
		4 "36 to 40" ///
		6 "46 to 50" ///
		7 "51 to 55" ///
		8 "56 to 60" ///
		9 "61 to 65" 

	*vii. Assign labels to variable
	label values ///																/*** Assign labels to the variable ***/
		Father_Cross_Section_Age_Dummies ///
		Father_Cross_Section_Age_Dummies
	
	*viii. Tabulate the newly created variable
	tab Father_Cross_Section_Age_Dummies
	
	*ix. Look at summary statistics for age by the new categorical variable
	* i.e. double check it has worked ok!
	tab Father_Cross_Section_Age_Dummies  ///
		, ///
		sum(Father_Cross_Section_Age)	

*H. Create centred aged (centre on age 45)
	*i. linear term
	replace Father_Cross_Section_Age ///
		= /// 
		Father_Cross_Section_Age - 45												/*** Centre on age 45 for predictions ***/

	*ii. Squared-term
	gen Father_Cross_Section_Age_Squ ///
		= /// 
		Father_Cross_Section_Age * Father_Cross_Section_Age	
	
*********************************************************************
*STEP 12 - CREATE MERGE VARIABLES ***********************************
*********************************************************************	
	*12A. Create 1968 family ID
	gen MERGE_ID_1968 = ER30001 													/*** Set ID equal to 1968 interview number ***/

	*12B. Create 1968 person ID
	gen MERGE_PN_1968 = ER30002														/*** Set Person Number equal to 1968 interview number ***/								

	*12C. Keep only the variables I will use in the analysis
	keep ///																		/*** Keep only the variables needed ***/
		Father_* ///
		Age_* ///
		Log_* ///
		N_Perm_Labor_Earnings ///
		Actual_Age_For_Age40_Measure ///
		Father_Ed Father_Report_Father_Ed  ///
		Father_Cross_Section_Age_Dummies ///
		MERGE_ID_1968 MERGE_PN_1968

	*12D. Sort variables ready for the merge
	sort ///
		MERGE_ID_1968 MERGE_PN_1968													/*** Sort by merge ID and person number ***/

	*12E. Save the data file
	save C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA1\Father , replace			/*** Save the recoded dataset ***/









