*******************************************************
** JJ Created 14 / 05 / 2014 **************************
** JJ updated 07 / 04 / 2016 ***************************
*******************************************************

****************************************************************************
*--- This do-file merges father and offspring data together ---------------*
****************************************************************************


*********************************************************************
*STEP 1 - Merge the data together ***********************************
*********************************************************************	
	*A. Open Father data
	use ///
		C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA1\Father ///
		, clear				

	*B. Perform the merge with the children's data
	merge ///
		MERGE_ID_1968 MERGE_PN_1968 ///												/* Merge upon the 1968 family and person number */
			using ///
			C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA1\Offspring_2011		/* Merge in the offspring data */

	*C. Look at successful merges ( _merge ==3)	
	tab _merge


*********************************************************************
*STEP 2 - KEEP ONLY CERTAIN OBSERVATIONS ****************************
*********************************************************************
	*2A. Keep only observations where merge was successful
	keep if _merge == 3																/*** Only successfully merged observations ***/

	*2B. Keep only if we have a child report of father industry
	keep if Child_Report_Father_Ind != .											/*** Where child reports not missing ***/

	*2C. Keep only if we have child report of father occupation
	keep if Child_Report_Father_Occ_1970 != . 												

	*2D. Keep only if we have a child report of father education
	keep if Child_Report_Father_Ed != .

	*2E. Drop observation if fathers have less than 5 years of earning data
	drop if N_Perm_Labor_Earnings < 5												/*** If there are less than 5 observations in the perm earnings variable ***/

	*2F. Drop if children's earnings not available in 2011
	drop if Child_Log_Earnings_2011 == .											/*** Offspring earnings observed in 2011 ***/

	*2G. Keep if father's earnings at age 40 not missing
	keep if Log_Father_Age40_Labor_Earnings != .									/*** Where father age 40 earnings available ***/

	*2H. Keep if fathers 'one-shot' wage not missing
	keep if Father_Cross_Section_Wage !=.

*******************************************************************************
*STEP 3 - CREATE NEW VARIABLE FOR FATHER OCCUPATION WHEN CHILD WAS 15 *********
*******************************************************************************
	*A. Generate new variable
	gen Father_Report_Occ_Age_15 ///
		= Father_Report_Father_Occ_1968 ///											/*** Create new variable. Set to father occ in 1968 .... ***/
			if ///
			Child_Year_When_Age_15 <1969											/*** ...if the offspring was age 15 1968 or before ***/

	*B. Replace variable with data from relevant year				
	forvalues i = 1968(1)1997 {														/*** Looping between 1968 and 1997... ***/
		replace Father_Report_Occ_Age_15 = ///										/*** Replace father occupation ....
				Father_Report_Father_Occ_`i' ///									/*** With their occupation reported in year `i' ... ***/
					if ///
					Child_Year_When_Age_15 == `i' 									/*** ... if the child was age 15 in year i ***/
	}

	*C. Look at the distribution of the variable			
	tab Father_Report_Occ_Age_15

	*D. Create collapased version of father occupation
	*Note: Based upon 1970 code: www.usa.ipums.org/usa/volii/97occup.shtmi
	* `1' = Value for new variable to take
	* `2' = Value occupation code should be greater than
	* `3' = Value occupation code should be less than
	
		*i. Create new variable. Missing by default.
		gen Father_Report_Class_Age_15 = .
	
		*ii. Drop any program already with this name
		capture ///
			program drop ///
				code_class
		
		*iii. Create program to replace the variable	
		program code_class															/* Name of new program */
			replace Father_Report_Class_Age_15 = `1' ///							/* Set variable equal to `1' */
				if ///
				Father_Report_Occ_Age_15 > `2' ///									/* If value of 3 digit occupation greater than `2' */
				& ///
				Father_Report_Occ_Age_15 < `3'										/* ... abd value of 3 digit occupation less than `3' */
		end																			/* END OF PROGRAM */

		*iv. Run the program above
			*a. Professional, Technical, and Kindred Workers
			code_class  1  0  196
			
			*b. Managers and Administrators, except Farm
			code_class  2  200  248
			
			*c. Sales workers
			code_class  3  259  286
			
			*d. Clerical and Kindred Workers
			code_class  4  300  396
		
			*e. Craftsman and Kindred Workers
			code_class  5  400  601
			
			*f. Operatives, except Transport
			code_class  6  600  696
			
			*g. Transport Equipment Operatives
			code_class  7  700  716
			
			*h. Laborers, except Farm
			code_class  8  739  786
			
			*i. Farmers and Farm Managers
			code_class  9  800 825
			
			*j. Service Workers, except Private Household
			code_class  10  900  985
			
			*k. Missing
			code_class  99  -1  1
			
			*l. Missing
			code_class  99  986  1001

			*m. Missing
			replace Father_Report_Class_Age_15 = 99 ///
				if ///
				Father_Report_Class_Age_15 == .

	*E. Define value labels
	label define ///
		Father_Report_Class_Age_15 ///
		1 "Professional / technical" ///
		2 "Managers and administrators" ///
		3 "Sales workers" ///
		4 "Clerical" ///
		5 "Craftsmen" ///
		6 "Operatives" ///
		7 "Transport equipment" ///
		8 "laborers" ///
		9 "Farmers" ///
		10 "Service workers" ///
		99 "Unknown"
	
	*F. Assign labels to variable
	label values ///
		Father_Report_Class_Age_15 ///
		Father_Report_Class_Age_15

	*G. Tabulate fathers report of social class
	tab Father_Report_Class_Age_15	
	
*******************************************************************************
*STEP 4 - CREATE NEW VARIABLE FOR FATHER INDUSTRY WHEN CHILD WAS 15 ***********
*******************************************************************************
	*A. Generate new variable
	gen Father_Report_Ind_Age_15 ///
			= /// 
			Father_Report_Father_Ind_1968 ///										/*** Create new variable. Set to father occ in 1968 .... ***/
				if ///
				Child_Year_When_Age_15 <1969										/*** ...if the offspring was age 15 1968 or before ***/

	*B. Replace variable with data from relevant year				
	forvalues i = 1968(1)1997 {														/*** Looping between 1968 and 1997... ***/
		replace Father_Report_Ind_Age_15 = ///										/*** Replace father occupation ....
			Father_Report_Father_Ind_`i' ///										/*** With their occupation reported in year 1 ... ***/
				if ///
				Child_Year_When_Age_15 == `i' 										/*** ... if the child was age 15 in year i ***/
	}

	*C. New collapsed version
	*Note see PSID documentation for what codes mean
	*`1' = New code for collapsed version
	*`2' =	Minimum 3 digit value for the broad group
	*`3' = Maximum 3 digit value for the broad group		
		*i. Create new variable. Set to missing by default.
		gen Father_Report_INDUSTRY = .

		*ii. Drop any program that already exists with this name
		capture ///
			program drop ///
			industry_coding

		*iii. Create new program to recode the variable
		program industry_coding														/* Create new program */
			replace Father_Report_INDUSTRY	= `1' ///								/* Replace new variable with a value of `1' */
				if ///
				Father_Report_Ind_Age_15 > `2' ///									/* If the 2 digit father industry code is greater than `2'... */
				& ///
				Father_Report_Ind_Age_15 < `3' 										/* ... and if 2 digit father industry less than `3' */
		end																			/* END OF PROGRAM */

		*iv. Run the program to replace with 1 digit industry code
			*a. Agriculture
			industry_coding  1  16  29												/*** Agriculture ***/
			
			*b. Mining
			industry_coding  2 	46  58												/*** Mining ***/

			*c. Construction
			industry_coding  3  66  78												/*** Construction ***/

			*d. Manufacturing
			industry_coding  4  106  399											/*** Manufacturing ***/

			*e. Transport
			industry_coding  5  406  480											/*** Transportation and communications ***/

			*f. WHolesale + retail
			industry_coding  6  506  699											/*** Wholesale + retail ***/

			*g. Finance
			industry_coding  7  706  719											/*** Finance ***/

			*h. Business services
			industry_coding  8  726  760											/*** Business services ***/

			*i. Personal services
			industry_coding  9  768  799											/*** Personal services ***/

			*j. Entertainment and recreation
			industry_coding  10  806  810											/*** Entertainment + recreation ***/

			*k. Professional services
			industry_coding  11  827  898											/*** Professional services ***/

			*l. Public administration
			industry_coding  12  906  938											/*** Public admin ***/
			
			*m. Missing
			industry_coding  13  950  1100											/*** NA ***/

			*. Missing
			industry_coding  13  -1  1												/*** NA ***/

	*D. Define value labels
	label define ///
		Father_Report_INDUSTRY ///
		1 "Agriculture" ///
		2 "Mining" ///
		3 "Construction" ///
		4 "Manufacturing" ///
		5 "Transport and communication" ///
		6 "Wholesale and retail" ///
		7 "Finance" ///
		8 "Business services" ///
		9 "Personal services" ///
		10 "Entertainment" ///
		11 "Professional services" ///
		12 "Public administration" ///
		13 "Unknown" 

	*E. Assign values to the labels
	label values ///
		Father_Report_INDUSTRY ///
		Father_Report_INDUSTRY		

	*F. Tabulate the newly created industry variable
	tab Father_Report_INDUSTRY														/*** Look at the distribution of the variable ***/
		

*******************************************************************************
*STEP 5 - LOOK AT DESCRIPTIVES FOR SAMPLE *************************************
*******************************************************************************
	*A. Drop observations with missing father information on occupation and industry
		*i. Drop missing father occupation
		drop if Father_Report_Occ_Age_15 == .
		
		*ii. Drop missing father industry
		drop if Father_Report_Ind_Age_15 == .

	*B. Descriptives of sample
		*i. Gender
		tab Child_Gender																	/*** Gives number of observations ***/

		*ii. Number of years of data used to create permanent earnings measure
		tab N_Perm_Labor_Earnings 															/*** Number of obs used to create perm father earnings variable ***/		

		*iii. Summary stats for (log) fathers permanent earnings
		sum Log_Father_Perm_Labor_Earnings , d												/*** Summary statistics for FATHER PERMANENT earnings ***/

		*iv. Look at child log earnings
		sum Child_Log_Earnings_2011 , d														/*** Summary statistics for CHILD 2011 earnings ***/

		*v. Father's age in 2011
		sum Father_Age_2011 ,

		*vi. Sons age in 2011
		sum Child_Age_2011 ,

		*vii. Child's race
		tab Child_Race

		*viii. Fathers report of 1 digit occupation
		tab Father_Report_Class_Age_15

		*ix. Father reports of their own education
		tab Father_Report_Father_Ed

		*x. Basic intergenerational (beta) model - Permanent earnings
		regress Child_Log_Earnings_2011 ///											/* Outcome = log child's earnings */
			Log_Father_Perm_Labor_Earnings ///										/* Key covariate = Father permanent earnings */
				Child_Age_2011 Child_Age_2011_Squ 									/* Control for age and age-squared */
		
		*xi. Basic intergenerational (beta) model - transatory earnings
		regress Child_Log_Earnings_2011 ///											/* Outcome = log child's earnings */
			Log_Father_Age40_Labor_Earnings ///										/* Key covariate = Father permanent earnings */
				Child_Age_2011 Child_Age_2011_Squ 									/* Control for age and age-squared */		
		
		*xii. Summary stats fathers permanent earnings (not logged)
		sum Father_Perm_Labor_Earnings , d		

		*xiii. Summary stats fathers age 40 earnings (not logged)
		sum Father_Age40_Labor_Earnings, d		


*******************************************************************************
*STEP 6 - Save dataset ********************************************************
*******************************************************************************
	*A. Create new identifier for the dataset
		*i. Create new variable. Set to 0 by default
		gen DATASET =0

		*ii. Define labels for the new variable
		label define ///
			DATASET ///
			0 "Main" ///
			1 "Auxilliary"

		*iii. Apply labels to the newly created variable
		label values ///
			DATASET ///
			DATASET

	*B. Save the newly created dataset	
	save ///
		C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA1\MAIN_DATASET /// 
			, ///
			replace				
		
*******************************************************************************
*STEP 7 - Merge in father mode occupation *************************************
*******************************************************************************
	*7A. Re-open the data
	use "C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA1\MAIN_DATASET.dta" , replace
	
	*7B. Replace values for 3 digit father occ with missing values
	forvalues i = 1968(1)2001 {														/* Looping over all years */
		capture ///															
			replace Father_Report_Father_Occ_`i' = . ///							/* Set to missing */
				if ///
				Father_Report_Father_Occ_`i' < 1 ///								/* Where a value less than 1 */
				| Father_Report_Father_Occ_`i' > 985								/* Or value greater than 985 */
				}																	/* Close loop */

	*7C. Drop any merge variable that exists
	capture drop _merge
			
	*7D. Create a new ID variable
	gen id = _n
	
	*7E. Sort by this variable
	sort id

	*7F. Save this as a dataset
	save "C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA1\FATHER_MODE_OCC.dta" , replace

	*7G. Keep only the occupation variables and id
	keep Father_Report_Father_Occ_* id

	*7H. Reshape the data into long format
	reshape long Father_Report_Father_Occ_, i(id)

	*7I. Create the mode father occupation for each observation
	egen Father_Report_Occ_MODE = mode(Father_Report_Father_Occ_) , by(id) minmode

	*7J. Reshape into wide format
	reshape wide
	
	*7K. Sort the data by id
	sort id

	*7L. Merge with the dataset above
	merge id ///
		using ///
		"C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA1\FATHER_MODE_OCC.dta"

	*7M. Check the merge
	tab _merge
		
	*7N. Create new variabe for father model class
	gen Father_Report_Class_MODE = .	
		
	*7O. Replace the values in father modal class	
		*i. Create program
		capture program drop code_class_mode	
		program code_class_mode														/* Name of new program */
			replace Father_Report_Class_MODE = `1' ///								/* Set variable equal to `1' */
				if ///
				Father_Report_Occ_MODE > `2' ///									/* If value of 3 digit occupation greater than `2' */
				& ///
				Father_Report_Occ_MODE < `3'										/* ... abd value of 3 digit occupation less than `3' */
		end																			/* END OF PROGRAM */

		*ii. Run the program above
			*a. Professional, Technical, and Kindred Workers
			code_class_mode  1  0  196
			
			*b. Managers and Administrators, except Farm
			code_class_mode  2  200  248
			
			*c. Sales workers
			code_class_mode  3  259  286
			
			*d. Clerical and Kindred Workers
			code_class_mode  4  300  396
		
			*e. Craftsman and Kindred Workers
			code_class_mode  5  400  601
			
			*f. Operatives, except Transport
			code_class_mode  6  600  696
			
			*g. Transport Equipment Operatives
			code_class_mode  7  700  716
			
			*h. Laborers, except Farm
			code_class_mode  8  739  786
			
			*i. Farmers and Farm Managers
			code_class_mode  9  800 825
			
			*j. Service Workers, except Private Household
			code_class_mode  10  900  985
			
			*k. Missing
			code_class_mode  99  -1  1
			
			*l. Missing
			code_class_mode  99  986  1001

			*m. Missing
			replace Father_Report_Class_MODE = 99 ///
				if ///
				Father_Report_Class_MODE == .

	*7P. Define value labels
	label define ///
		Father_Report_Class_MODE ///
		1 "Professional / technical" ///
		2 "Managers and administrators" ///
		3 "Sales workers" ///
		4 "Clerical" ///
		5 "Craftsmen" ///
		6 "Operatives" ///
		7 "Transport equipment" ///
		8 "laborers" ///
		9 "Farmers" ///
		10 "Service workers" ///
		99 "Unknown"
	
	*7Q. Assign labels to variable
	label values ///
		Father_Report_Class_MODE ///
		Father_Report_Class_MODE

	*7R. Tabulate fathers report of social class
	tab Father_Report_Class_MODE			
		
			
	*7S.. Save the newly created dataset	
	save ///
		C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA1\MAIN_DATASET /// 
			, ///
			replace					
		
		
*******************************************************************************
*STEP 8 - Merge in father mode industry *************************************
*******************************************************************************
	*8A. Re-open the data
	use "C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA1\MAIN_DATASET.dta" , replace
	
	*8B. Replace values for 3 digit father occ with missing values
	forvalues i = 1968(1)2001 {														/* Looping over all years */
		capture ///															
			replace Father_Report_Father_Ind_`i' = . ///							/* Set to missing */
				if ///
				Father_Report_Father_Ind_`i' < 15 ///								/* Where a value less than 15 */
				| Father_Report_Father_Ind_`i' > 950								/* Or value greater than 950 */
				}																	/* Close loop */

	*8C. Drop any merge variable that exists
	capture drop _merge
				
	*8E. Sort by this variable
	sort id

	*8F. Save this as a dataset
	save "C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA1\FATHER_MODE_IND.dta" , replace

	*8G. Keep only the occupation variables and id
	keep Father_Report_Father_Ind_* id

	*8H. Reshape the data into long format
	reshape long Father_Report_Father_Ind_, i(id)

	*8I. Create the mode father occupation for each observation
	egen Father_Report_Ind_MODE = mode(Father_Report_Father_Ind_) , by(id) minmode

	*8J. Reshape into wide format
	reshape wide
	
	*8K. Sort the data by id
	sort id

	*8L. Merge with the dataset above
	merge id ///
		using ///
		"C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA1\FATHER_MODE_IND.dta"

	*8M. Check the merge
	tab _merge
		
	*8N. Create new variabe for father model class
		*i. Create new variable
		gen Father_Report_INDUSTRY_MODE = .	
	
		*ii. Create new program to recode the variable
		capture program drop industry_coding_mode
		program industry_coding_mode												/* Create new program */
			replace Father_Report_INDUSTRY_MODE	= `1' ///							/* Replace new variable with a value of `1' */
				if ///
				Father_Report_Ind_MODE > `2' ///									/* If the 2 digit father industry code is greater than `2'... */
				& ///
				Father_Report_Ind_MODE < `3' 										/* ... and if 2 digit father industry less than `3' */
		end																			/* END OF PROGRAM */
	
	
	*8O. Replace values in variable
			*a. Agriculture
			industry_coding_mode  1  16  29												/*** Agriculture ***/
			
			*b. Mining
			industry_coding_mode  2 	46  58												/*** Mining ***/

			*c. Construction
			industry_coding_mode  3  66  78												/*** Construction ***/

			*d. Manufacturing
			industry_coding_mode  4  106  399											/*** Manufacturing ***/

			*e. Transport
			industry_coding_mode  5  406  480											/*** Transportation and communications ***/

			*f. WHolesale + retail
			industry_coding_mode  6  506  699											/*** Wholesale + retail ***/

			*g. Finance
			industry_coding_mode  7  706  719											/*** Finance ***/

			*h. Business services
			industry_coding_mode  8  726  760											/*** Business services ***/

			*i. Personal services
			industry_coding_mode  9  768  799											/*** Personal services ***/

			*j. Entertainment and recreation
			industry_coding_mode  10  806  810											/*** Entertainment + recreation ***/

			*k. Professional services
			industry_coding_mode  11  827  898											/*** Professional services ***/

			*l. Public administration
			industry_coding_mode  12  906  938											/*** Public admin ***/
			
			*m. Missing
			industry_coding_mode  13  950  1100											/*** NA ***/

			*. Missing
			industry_coding_mode  13  -1  1												/*** NA ***/

	*8P. Define value labels
	label define ///
		Father_Report_INDUSTRY_MODE ///
		1 "Agriculture" ///
		2 "Mining" ///
		3 "Construction" ///
		4 "Manufacturing" ///
		5 "Transport and communication" ///
		6 "Wholesale and retail" ///
		7 "Finance" ///
		8 "Business services" ///
		9 "Personal services" ///
		10 "Entertainment" ///
		11 "Professional services" ///
		12 "Public administration" ///
		13 "Unknown" 

	*8Q. Assign values to the labels
	label values ///
		Father_Report_INDUSTRY_MODE ///
		Father_Report_INDUSTRY_MODE		

	*8R. Tabulate the newly created industry variable
	tab Father_Report_INDUSTRY_MODE												
	
	*8S. Assign labels to variable
	label values ///
		Father_Report_INDUSTRY_MODE ///
		Father_Report_INDUSTRY_MODE

	*8T. Tabulate fathers report of social class
	tab Father_Report_INDUSTRY_MODE			
		
			
	*8U. Save the newly created dataset	
	save ///
		C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA1\MAIN_DATASET /// 
			, ///
			replace							
		
