*******************************************************
** JJ Created 14 / 05 / 2014 **************************
** JJ updated 03 / 03 / 2016 **************************
*******************************************************


*--- This do-file performs the analysis ----------------------------*


cd "C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA1\"
use DATA_WITH_IMPUTED_EARNINGS, clear												/*** Open up the combined dataset ***/



*****************************************************************
**** STEP 1 - Look at transition matricies **********************
*****************************************************************
	*1A. Divide children's earnings into quartiles
	xtile Child_Qs ///																/* Use xtile command */
		= Log_Child_Earnings ///													/* Base upon child's log-earnings */
			if ///
			DATASET == 0 ///														/* Within the 'main' dataset only */
			, ///
			nq(4)																	/* Split child's 2001 earnings into 4 quartiles */ 

	*1B. Divide father's permanent earnings into quartiles
	xtile Perm_Qs ///																/* Use xtile command */
		= Log_Fa_Perm_Earn ///														/* Base upon child's log-earnings */
			if ///
			DATASET == 0 ///														/* Within the 'main' dataset only */
			, ///
			nq(4)																	/* Split child's 2001 earnings into 4 quartiles */ 			
			
	*1C. Look at the cross-tabs	
		*i. Numbe of observations
		tab Perm_Qs Child_Qs  														/* Cross-tab of father's and son's (observed) quartile */
		
		*ii. Percentages
		tab Perm_Qs Child_Qs ///
			, ///
			row ///
			nofreq

	*1D. Transition matricies using data from TSTSLS M1 to M5
		*i. Create transition matricies
		forvalues i = 1(1)5 {														/*** Looping from 1 to 5... ****/
			xtile TSTSLS_M`i'_Qs ///												/* Use the xtile command */
			= /// 
			TSTSLS_Predicted_M`i' ///												/* Using the TSTSLS predictions from model `i' */
				if ///
				DATASET == 0 ///													/* Only using data in the 'main' dataset */
					, ///
					nq(4)															/*** Divide each of the TSTSLS predictions into 4 quartiles ***/
					
		*ii. Cross-tabs without frequencies			
		tab TSTSLS_M`i'_Qs Child_Qs  												/*** Cross-tab father's predicted and sons earnings quartile ***/
		
		*iii. Cross-tabs with frequencies
		tab TSTSLS_M`i'_Qs Child_Qs  ///
			, row nofreq
	}																				/* CLOSE THE LOOP */

	
	
*****************************************************************
**** STEP 2 - Set-up post-file **********************************
*****************************************************************
	*2A. Drop any postfile already open
	capture ///
		postclose results															/*** Close any post-file already with this name ***/

	*2B. Create a new post-file
	postfile results ///															/*** Create a new postfile called 'results' ***/
		str20 MEASURE ///															/*** String varibale - WHich TSTSLS model used ***/
		str20 OUTCOME ///															/*** String varibale - What was the dependent variable ***/	
		str20 REPORT ///															/*** String varibale - who reported the imputer variables (parent or child) ***/	
		Beta Beta_SE ///															/*** True Beta's ***/
		SD_Father SD_Child ///														/* Standard deviation of father's and child's earnings */
		Rho ///																		/*** True intergenerational correlation (rho) ***/
		Beta_Q10  Beta_Q10_SE ///													/*** Qreg beta's.... ***/
		Beta_Q15  Beta_Q15_SE ///
		Beta_Q20  Beta_Q20_SE ///
		Beta_Q25  Beta_Q25_SE ///
		Beta_Q30  Beta_Q30_SE ///
		Beta_Q35  Beta_Q35_SE ///
		Beta_Q40  Beta_Q40_SE ///
		Beta_Q45  Beta_Q45_SE ///
		Beta_Q50  Beta_Q50_SE ///
		Beta_Q55  Beta_Q55_SE ///
		Beta_Q60  Beta_Q60_SE ///
		Beta_Q65  Beta_Q65_SE ///
		Beta_Q70  Beta_Q70_SE ///
		Beta_Q75  Beta_Q75_SE ///
		Beta_Q80  Beta_Q80_SE ///
		Beta_Q85  Beta_Q85_SE ///
		Beta_Q90  Beta_Q90_SE ///
			using "RESULTS" , replace												/*** Post results to RESULTS dataset ***/
		
		
*****************************************************************
**** STEP 3 - CREATE PROGRAM TO RUN ANALYSIS ********************
*****************************************************************
*`1' = Parental earnings variable
	
	*3A. Drop any porgram already with this name
	capture ///
		program drop ///
			do_analysis																/*** Drop any program that already exists ***/

	*3B. Create a new program to run the analysis
	program do_analysis																/*** Create new program ***/
			
		*i. Estimate intergenerational beta
			*a.Run the model
			regress Log_Child_Earnings ///											/*** Regression model to estimate beta ***/
				`1' ///																/*** Key covariate = measure of parental earnings ***/
				if DATASET == 0														/*** Use just the "main" dataset ***/

			*b. Create the beta estimate and the SE
			gen Beta = _b[`1']														/*** Create elasticity ***/
			gen Beta_SE = _se[`1']													/*** Create SE ***/
		
		
		*ii. Estimate intergenerational rho
			*a. Summary statistics for the father earnings variable
			sum `1' ///
				if ///
				 DATASET == 0 ///
				 ,  d																/*** Get summary statistics of the parental earnings variable ***/
			
			*b. Record the standard deviation in a new variable
			gen SD_`1' = r(sd)														/*** New variable recording the standard deviation of this ***/

			*c. Summary statistics for the sons' earnings variable			
			sum Log_Child_Earnings , d												/*** Get summary statistics for the child earnings variable ***/
	
			*d. Record the standard deviation in a new variable
			gen SD_Log_Child_Earnings = r(sd)										/*** New variable recording standard deviation of this... ****/

			*e. Create the 'rho' (intergenerational correlation)
			gen Rho ///
				= ///
				Beta*(SD_`1' / SD_Log_Child_Earnings)								/*** Calculate rho (intergen correlation) ***/
	
		*iii. Estimate quantile regressions	
			*a. Estimate the qregs
			forvalues i = 10(5)90 {													/*** For everything 5th percentile between 10 and 90... ***/
				qreg Log_Child_Earnings ///											/*** Do a qreg.... ***/
					`1' ///															/*** `1' = parental earnings measure ***/
						if DATASET == 0 ///											/*** Use main dataset only ***/
						, ///
						q(0.`i')													/*** Do for every ith (5th) percentile ***/
		
			*b. Create the beta estimates
			gen Beta_Q`i' = _b[`1']													/*** New variable capturing the qreg estimates ***/
	
			*c. Create the standard errors
			gen Beta_Q`i'_SE = _se[`1']												/*** New variable capturing qreg SE's ***/
	
			*d. Output qreg parameter estimtes to a word document
			outreg2 ///
				using QREGS`1'.doc ///
					, ///
					append ctitle(Q`i') ///
					label ///
					noaster ///
					auto(2) ///
					noparen ///
					sdec(1)
			}																		/*** Close the QREG loop ***/
	
	*3C. Post results out to file
	post results ///																/* Name of post-file where results will be */
		("`1'") ///																	/* `1' = Name of father's earning variable used */
		("`2'") ///																	/* `2' = Whether permanent or 1980 fathers earnings used */
		("`3'") ///																	/* `3' = Whether child or father reported the TSTSLS imputer variables */
		(Beta) (Beta_SE) ///														/* Intergenerational beta estimates */
		(SD_`1') (SD_Log_Child_Earnings) ///										/* Standard deviation estimates */
		(Rho) ///																	/* Rho estimates */
		(Beta_Q10)  (Beta_Q10_SE) ///												/* QREG ESTIMATES */
		(Beta_Q15)  (Beta_Q15_SE) ///
		(Beta_Q20)  (Beta_Q20_SE) ///
		(Beta_Q25)  (Beta_Q25_SE) ///
		(Beta_Q30)  (Beta_Q30_SE) ///
		(Beta_Q35)  (Beta_Q35_SE) ///
		(Beta_Q40)  (Beta_Q40_SE) ///
		(Beta_Q45)  (Beta_Q45_SE) ///
		(Beta_Q50)  (Beta_Q50_SE) ///
		(Beta_Q55)  (Beta_Q55_SE) ///
		(Beta_Q60)  (Beta_Q60_SE) ///
		(Beta_Q65)  (Beta_Q65_SE) ///
		(Beta_Q70)  (Beta_Q70_SE) ///
		(Beta_Q75)  (Beta_Q75_SE) ///
		(Beta_Q80)  (Beta_Q80_SE) ///
		(Beta_Q85)  (Beta_Q85_SE) ///
		(Beta_Q90)  (Beta_Q90_SE) 
	
		drop Beta* SD_* Rho															/* Drop all the variables created */
	end																				/* END OF PROGRAM */

*****************************************************************
**** STEP 3 -  RUN PROGRAM **************************************
*****************************************************************
	*3A. Run the model using fathers observed income
		*i. Father's cross-sectional (1980) earnings
		do_analysis ///
			Log_Fa_XSec_Earn ///													/* `1' = Name of father's earning variable used */						
			 "-" ///										
			 "-"
		
		*ii. Fathers permanent earnings
		do_analysis	/// 
			Log_Fa_Perm_Earn ///													/* `1' = Name of father's earning variable used */	
				"-" ///
				"-"
	
	*3B. Model using TSTSLS. Permanent earnings as dependent variable. 
	*    Father reports of the imputer variables.
		*i. Global macro with the different TSTSLS earnings measures in
		global earnings_measures1 ///
			TSTSLS_Predicted_M1 ///
			TSTSLS_Predicted_M2 ///
			TSTSLS_Predicted_M3 ///
			TSTSLS_Predicted_M4 ///
			TSTSLS_Predicted_M5 	
	
		*ii. Run the analysis
		foreach i of global earnings_measures1 {									/* For TSTSLS models M1 to M5 */
			do_analysis ///															/* Run the analysis */
				`i' ///																/* `i' = Name of the TSTSLS father earnings measure used */
				"P" ///																/* Whether permanent or 1980s earnings used */
				"F"																	/* Father or child reports of the earnings variables */
		}																			/* CLOSE THE LOOP */

	*3C. Model using TSTSLS. 1980 earnings as dependent variable. 
	*    Father reports of the imputer variables.
		*i. Global macro with the different TSTSLS earnings measures in
		global earnings_measures2 ///
			TSTSLS_Predicted_M6 ///
			TSTSLS_Predicted_M7 ///
			TSTSLS_Predicted_M8 ///
			TSTSLS_Predicted_M9 ///
			TSTSLS_Predicted_M10 		
	
		*ii. Run the analysis
		foreach i of global earnings_measures2 {									/* For TSTSLS models M6 to M10 */
			do_analysis ///															/* Run the analysis */
				`i' ///																/* `i' = Name of the TSTSLS father earnings measure used */
				"1980" ///															/* Whether permanent or 1980s earnings used */
				"F"																	/* Father or child reports of the earnings variables */
		}						
	
	*3D. Model using TSTSLS. Permanent earnings as dependent variable. 
	*    Child reports of the imputer variables.
		*i. Global macro with the different TSTSLS earnings measures in	
		global earnings_measures3 ///
			TSTSLS_Predicted_M11 ///
			TSTSLS_Predicted_M12 ///
			TSTSLS_Predicted_M13 ///
			TSTSLS_Predicted_M14 ///
			TSTSLS_Predicted_M15 	
	
		*ii. Run the analysis
		foreach i of global earnings_measures3 {									/* For TSTSLS models M11 to M15 */
			do_analysis ///															/* Run the analysis */
				`i' ///																/* `i' = Name of the TSTSLS father earnings measure used */
				"P" ///																/* Whether permanent or 1980s earnings used */
				"C"																	/* Father or child reports of the earnings variables */
		}				
	

	*3E. Model using TSTSLS. 1980 earnings as dependent variable. 
	*    Child reports of the imputer variables.
		*i. Global macro with the different TSTSLS earnings measures in	
		global earnings_measures4 ///
			TSTSLS_Predicted_M16 ///
			TSTSLS_Predicted_M17 ///
			TSTSLS_Predicted_M18 ///
			TSTSLS_Predicted_M19 ///
			TSTSLS_Predicted_M20 	
	
		foreach i of global earnings_measures4 {									/* For TSTSLS models M11 to M15 */
			do_analysis ///															/* Run the analysis */
				`i' ///																/* `i' = Name of the TSTSLS father earnings measure used */
				"1980" ///															/* Whether permanent or 1980s earnings used */
				"C"																	/* Father or child reports of the earnings variables */
		}		
	

	*3F. Close the postfile
	postclose results	
	
*****************************************************************
**** STEP 4 -  OUTPUT THE RESULTS TO EXCEL /*********************
*****************************************************************	
	*4A. Open up the results file
	use RESULTS , clear	

	*4B. Export the results to Excel
	export excel ///																/* Export to Excel */
		using ///
		"C:\Users\john\OneDrive\Documents\TSTSLS\FINAL_MARCH_2016\Excel_Results_Files\RESULTS" ///																/* Name of the Excel file */
			, ///
			sheet("Results1") ///													/* Name of the results sheet */
			firstrow(variables) ///													/* First row as variable names */
			sheetreplace															/* Replace the sheet if it already exists */ 

	
	
	
*****************************************************************************************	
***** NOTE: Another way to get the covariance between X and Y (without taking into	*****
***** consideration having age as a control) is:
*****************************************************************************************
*corr Log_Child_Earnings Log_Fa_Perm_Earn , cov
*corr Log_Child_Earnings TSTSLS_Predicted_M1 if DATASET==0, cov
*corr Log_Child_Earnings TSTSLS_Predicted_M5 if DATASET==0, cov
*Ect.

* Alternative, using formula for beta one can get it via the components below
*   *(var (X))	=   (cov (X,Y))
*****************************************************************************************
