*******************************************************
** JJ Created 14 / 05 / 2014 **************************
** JJ updated 03 / 03 / 2016 ***************************
*******************************************************
*
*********************************************************************
*--- This do-file create the predicted earnings using TSTSLS -------*
*********************************************************************

*****************************************************************
**** STEP 1 - Open data and some basic recoding *****************
*****************************************************************
	*1A. Open up the data
	cd "C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA1\"
	use ROBUSTNESS_DATA_FOR_ANALYSIS, clear														


	*1b. RENAME SOME VARIABLES TO MAKE THEM SHORTER
		*i. Fathers log permanent earnings
		rename ///
			Log_Father_Perm_Labor_Earnings ///
			Log_Fa_Perm_Earn													

		*ii. Fathers log age 40 earnings			
		rename ///
			Log_Father_Age40_Labor_Earnings ///
			Log_Fa_Age40_Earn

		*iii. Child log earnings in 2011			
		rename ///
			Child_Log_Earnings_2011 ///
			Log_Child_Earnings														

		*iv. Fathers cross-sectional earnings		
		rename ///
			Log_Father_Cross_Section_Wage ///
			Log_Fa_XSec_Earn

*****************************************************************
**** STEP 2 - New program to create predictions *****************
*****************************************************************
* NOTE: ONLY INCLUDE `4' (father age dummies) when using father cross-sectional earnings

*`1' = Variable to impute (Father PERMANENT earnings or CURRENT earnings)
*`2' = Predictors (Z variables) of permanent income
*`3' = Model number
*`4' = Father age dummies to include into model


	*2A. Drop any program already with this name
	capture ///
		program drop ///
		create_predictions												

	*2A. Create new program to create TSTSLS predictions
		*i. Name of new program
		program create_predictions													/*** Create a new program... ***/
			
			*ii. Create new variable with father age dummies in
			gen Father_Age`3'_Dummies ///								
				=  ///
				Father_Cross_Section_Age_Dummies
	
			*iii. Model behind the TSTSLS predictions
			regress /// 															/*** OLS model ***/
				`1' ///																/*** Dependent variable is in `1' ***/
				i.Father_Race `2' `4' ///											/*** The covariates in the model (i.e. predictors) ***/
					if ///
					DATASET == 1													/*** Using the AUXILIARY dataset ***/
			
			/*
			*iv. Output the model estimates to word
			outreg2 ///																/* Use outreg command */
				using myreg`3'.doc ///												/* Name of document where results will appear */
					, ///
					ctitle(Model M`3') ///											/* Title of columns */
					label ///
					noaster															/* No significance stars in tables */
			*/
			
			*v. Set the age dummy variables to 0 (age 41 to 45 reference group)
			*    before the predictions are created
			replace Father_Age`3'_Dummies = 0		
	
			*vi. Create the TSTSLS predicted earnings
			predict TSTSLS_Predicted_M`3' 											/*** Get the imputed (predicted) income variable ***/
	
			*vii. Summary statistics for the TSTSLS predictions
			sum TSTSLS_Predicted_M`3', d											/*** Look at summary statistics of this variable ***/
		end																			/* END THE PROGRAM */

*****************************************************************
**** STEP 3: PREDICTIONS: Father report. Permanent Earnings *****
*****************************************************************
*Z characteristics = PARENTAL REPORTS
*Imputation (stage 1) response varaible = PERMANENT FATHER EARNINGS

	*M1 = Father report parental ed
	create_predictions ///	
		Log_Fa_Perm_Earn ///															
		"i.Father_Report_Father_Ed" ///
		1	
	
	
	*M2 = Father report Ed and broad occ
	create_predictions ///
		Log_Fa_Perm_Earn ///															
		"i.Father_Report_Father_Ed  i.Father_Report_Class_Age_15"  ///
		2	

		
	*M3	= Father report Ed and broad occ and broad industry
	create_predictions ///
		Log_Fa_Perm_Earn ///															
		"i.Father_Report_Father_Ed  i.Father_Report_Class_Age_15 i.Father_Report_INDUSTRY"  ///
		3
	
	
	*M4	= Father report Ed and DETAILED occ and broad industry
	create_predictions ///
		Log_Fa_Perm_Earn ///															
		"i.Father_Report_Father_Ed  i.Father_Report_Occ_Age_15 i.Father_Report_INDUSTRY" ///
		4

		
	*M5	= Father report Ed and DETAILED occ and DETAILED industry	
	create_predictions ///
		Log_Fa_Perm_Earn ///															
		"i.Father_Report_Father_Ed  i.Father_Report_Occ_Age_15 i.Father_Report_Ind_Age_15" ///
		5


******************************************************************************
**** STEP 4: PREDICTIONS: Father report. Cross-sectional age 40 Earnings *****
******************************************************************************
*Z characteristics = PARENTAL REPORTS
*Imputation (stage 1) response varaible = AGE 40 FATHER EARNINGS

	*M6 = Father report parental ed
	create_predictions ///	
		Log_Fa_XSec_Earn ///															
		"i.Father_Report_Father_Ed" ///
		6 ///
		"i.Father_Age6_Dummies"
	
	
	*M7 = Father report Ed and broad occ
	create_predictions ///
		Log_Fa_XSec_Earn ///															
		"i.Father_Report_Father_Ed  i.Father_Report_Class_Age_15"  ///
		7 ///
		"i.Father_Age7_Dummies"

		
	*M8	= Father report Ed and broad occ and broad industry
	create_predictions ///
		Log_Fa_XSec_Earn ///															
		"i.Father_Report_Father_Ed  i.Father_Report_Class_Age_15 i.Father_Report_INDUSTRY"  ///
		8 ///
		"i.Father_Age8_Dummies"

	
	*M9	= Father report Ed and DETAILED occ and broad industry
	create_predictions ///
		Log_Fa_XSec_Earn ///															
		"i.Father_Report_Father_Ed  i.Father_Report_Occ_Age_15 i.Father_Report_INDUSTRY" ///
		9 ///
		"i.Father_Age9_Dummies"


	*M10 = Father report Ed and DETAILED occ and DETAILED industry	
	create_predictions ///
		Log_Fa_XSec_Earn ///															
		"i.Father_Report_Father_Ed  i.Father_Report_Occ_Age_15 i.Father_Report_Ind_Age_15" ///
		10 ///
		"i.Father_Age10_Dummies"
	
	
*****************************************************************
**** STEP 5: PREDICTIONS: CHILD report. Permanent Earnings *****
*****************************************************************
*Z characteristics = Child REPORTS
*Imputation (stage 1) response varaible = PERMANENT FATHER EARNINGS

	*M11 = Father report parental ed
	create_predictions ///	
		Log_Fa_Perm_Earn ///															
		"i.Child_Report_Father_Ed" ///
		11	
	

	*M12 = Father report Ed and broad occ
	create_predictions ///
		Log_Fa_Perm_Earn ///															
		"i.Child_Report_Father_Ed  i.Child_Report_Father_Class_1970"  ///
		12	

		
	*M13 = Father report Ed and broad occ and broad industry
	create_predictions ///
		Log_Fa_Perm_Earn ///															
		"i.Child_Report_Father_Ed  i.Child_Report_Father_Class_1970 i.Child_Report_INDUSTRY"  ///
		13
	
	
	*M14 = Father report Ed and DETAILED occ and broad industry
	create_predictions ///
		Log_Fa_Perm_Earn ///															
		"i.Child_Report_Father_Ed  i.Child_Report_Father_Occ_1970 i.Child_Report_INDUSTRY" ///
		14

		
	*M15 = Father report Ed and DETAILED occ and DETAILED industry	
	create_predictions ///
		Log_Fa_Perm_Earn ///															
		"i.Child_Report_Father_Ed  i.Child_Report_Father_Occ_1970 i.Child_Report_Father_Ind" ///
		15

		
*****************************************************************
**** STEP 6: PREDICTIONS: CHILD report. Age 40 Earnings *****
*****************************************************************
*Z characteristics = CHILD REPORTS
*Imputation (stage 1) response varaible = AGE 40 FATHER EARNINGS

	*M16 = Father report parental ed
	create_predictions ///	
		Log_Fa_XSec_Earn ///															
		"i.Child_Report_Father_Ed" ///
		16 ///
		"i.Father_Age16_Dummies"
	

	*M17 = Father report Ed and broad occ
	create_predictions ///
		Log_Fa_XSec_Earn ///															
		"i.Child_Report_Father_Ed  i.Child_Report_Father_Class_1970"  ///
		17 ///
		"i.Father_Age17_Dummies"


	*M18 = Father report Ed and broad occ and broad industry
	create_predictions ///
		Log_Fa_XSec_Earn ///															
		"i.Child_Report_Father_Ed  i.Child_Report_Father_Class_1970 i.Child_Report_INDUSTRY"  ///
		18 ///
		"i.Father_Age18_Dummies"

	
	*M19 = Father report Ed and DETAILED occ and broad industry
	create_predictions ///
		Log_Fa_XSec_Earn ///															
		"i.Child_Report_Father_Ed  i.Child_Report_Father_Occ_1970 i.Child_Report_INDUSTRY" ///
		19 ///
		"i.Father_Age19_Dummies"


	*M20 = Father report Ed and DETAILED occ and DETAILED industry	
	create_predictions ///
		Log_Fa_XSec_Earn ///															
		"i.Child_Report_Father_Ed  i.Child_Report_Father_Occ_1970 i.Child_Report_Father_Ind" ///
		20 ///
		"i.Father_Age20_Dummies"
	
*****************************************************************
**** STEP 7 - Look at correlation between predicted and actual **
*****************************************************************
	*A. Look at correlations
	corr ///																		/* Look at the correlation between .... */
		Father_Age40_Labor_Earnings ///												/* ...father's earnings at age 40 and */
		Log_Fa_Perm_Earn ///														/* Father's log permanent earnings.... */
		TSTSLS_Predicted_* ///														/* and TSTSLS predicted earnings.... */
			if DATASET==0															/* In the 'main' dataset */


*****************************************************************
**** STEP 8 - Save the data *************************************
*****************************************************************			
	save ///
		ROBUSTNESS_DATA_WITH_IMPUTED_EARNINGS ///
			, ///
			replace													

		
		