*******************************************************
** JJ Created 06 / 05 / 2014 **************************
** JJ updated 02 / 03 / 2016 **************************
*******************************************************

*********************************************************************
*--- This do-file recodes the key variables used in the analysis ---*
*********************************************************************

*********************************************************************
*STEP 1 - MERGE ALL THE PSID DOWNLOADED TOGETHER ********************
*********************************************************************
*NOTE: Can merge in new variables downloaded using the following variables (ER30001 ER30002)
forvalues i = 2(1)4 {
	use "C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA`i'\PSID_`i'.dta" , clear		/*** Open up the second lot of data that I downloaded ***/
	sort ER30001 ER30002																/*** Sort by the key identifying variables ***/
	save "C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA`i'\PSID_`i'.dta" , replace	/*** Save the sorted data ready for merge ***/
	}
	
use C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA1\PSID, clear					/*** Open up the first lot of data I downloaded ***/
sort ER30001 ER30002																/*** Sort by the key identifying variable ***/
merge ER30001 ER30002 using "C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA2\PSID_2.dta"	/*** Merge the two lots of PSID data together ***/
tab _merge																			/*** Should equal =3 for all observations ***/
capture drop _merge
sort ER30001 ER30002																/*** Sort by the key identifying variable ***/
merge ER30001 ER30002 using "C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA3\PSID_3.dta"	/*** Merge the two lots of PSID data together ***/
tab _merge	
capture drop _merge																		/*** Should equal =3 for all observations ***/
sort ER30001 ER30002																/*** Sort by the key identifying variable ***/
merge ER30001 ER30002 using "C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA4\PSID_4.dta"	/*** Merge the two lots of PSID data together ***/
tab _merge	
capture drop _merge	
sort ER30001 ER30002																/*** Sort by the key identifying variable ***/	
save "C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA2\PSID_FINAL.dta" , replace	/*** Save the recoded dataset ***/


*********************************************************************
*STEP 2 - RECODE THE OFFSPRING DATA *********************************
*********************************************************************
use "C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA2\PSID_FINAL.dta" , clear		/*** Open the PSID dataset ***/

	*A. Keep men and HHH only
	keep if ER32000 == 1															/*** Keep men only ***/
	gen Child_Gender = ER32000
	keep if ER34103 == 10															/*** Keep HHH only ***/

	*B. Child race
		*i. Replace variable
		tab ER51904																	/* Look at distribution */
		gen Child_Race = ER51904													/* Create new variable */
		replace Child_Race = 7 if Child_Race ==9									/* Combine other and unknown categories */

		*ii. Define value label
		label define ////
			Race ///
			1 "White" ///
			2 "Black" ///
			3 "American Indian" ///
			4 "Asian" ///
			5 "Pacific Islander" ///
			7 "Other / Unknown"

		*iii. Assign labels to varaible	
		label values ///
			Child_Race Race	

	*C. Keep only individuals between ages 30 and 60 in 2011
		*i. Keep individuals between 30 and 60 years old
		keep if ER34104 > 29 & ER34104 < 61											/* Keep if between 30 and 60 */
		
		*ii. Check the distribution of the variable
		tab ER34104
		
		*iii. Create new variable for child's age in 2011
		gen Child_Age_2011 = ER34104

		*iv. Create new variable for child's age squared
		gen Child_Age_2011_Squ = ///
			Child_Age_2011*Child_Age_2011

		*v. Create the year when the offspring turned age 15	
		gen Child_Year_When_Age_15 = ///
			2011 ///																/* Use 2011 as base year */
				- Child_Age_2011 ///												/* Subtract the child's age in 2011 (to basically get year of birth) */
				+ 15																/* Add on 15 (to get year when they turned 15) */

		*vi. Tabulate the variable 													/* Check year of turning 15. Should be between 1966 and 1996 */
		tab Child_Year_When_Age_15				
				
				
*C. Earnings (create labor market earnings)
	*i. 2011
		*a. Set to missing if earnings of individual in 2011 equal to 0
		replace ER52237 = . if ER52237 == 0											/*** Set to missing if equal to 0 ***/

		*b. Create new variable
		gen Child_Labor_Earnings_2011 = ER52237 									/*** New variable containing labour earnings of HH head in 2011 ***/												

		*c. Create new variable = natural log of this variable
		gen Child_Log_Earnings_2011 = log(Child_Labor_Earnings_2011)				/*** Take log of the above variable ***/

	*ii. 2009
		*a. Create new variable for child's earnings in 2009 
		gen Child_Labor_Earnings_2009 = ER46829										/*** New variable containing labour earnings of HH head in 2009 ***/

		*b. Set to missing if equal to 0
		replace	Child_Labor_Earnings_2009 = . ///
			if ///
			Child_Labor_Earnings_2009 == 0											/*** Replace with missing if equal to 0 ***/	

		*c. Replace with missing if age below 30 in 2009 (hence 32 in 2011)
			replace	Child_Labor_Earnings_2009 = . ///
				if ///
				Child_Age_2011 < 32						
		
		*d. Take the natural log of the variable
		gen Child_Log_Earnings_2009 ///
			= ///
			log(Child_Labor_Earnings_2009)
	
	*iii. 2007
		*a. Create new variable for child's earnings in 2009 
		gen Child_Labor_Earnings_2007 = ER40921										/*** New variable containing labour earnings of HH head in 2007 ***/												

		*b. Set to missing if equal to 0
		replace	Child_Labor_Earnings_2007 = . ///
			if ///
			Child_Labor_Earnings_2007 ==0											/* Replace with missing if equal to 0 */																				

		*c. Replace with missing if age below 30 in 2009 (hence 32 in 2011)
		replace	Child_Labor_Earnings_2007 = . ///
			if ///
			Child_Age_2011 < 34														/*** Replace with missing if age below 30 in 2007 (hence below 34 in 2011) ***/
						
		*d. Take the natural log of the variable
		gen Child_Log_Earnings_2007 = ///
			log(Child_Labor_Earnings_2007)
	
	*iv. 2005
		*a. Create new variable for child's earnings in 2009 
		gen Child_Labor_Earnings_2005 ///
			= ER27931																/*** New variable containing labour earnings of HH head in 2005 ***/												

		*b. Set to missing if equal to 0
		replace	Child_Labor_Earnings_2005 = . /// 
			if ///
			Child_Labor_Earnings_2005 ==0											/*** Replace with missing if equal to 0 ***/																				
	
		*c. Replace with missing if age below 30 in 2009 (hence 32 in 2011)
		replace	Child_Labor_Earnings_2005 = . ///
			if ///
			Child_Age_2011 < 36														/*** Replace with missing if age below 30 in 2005 (hence below 36 in 2011) ***/
		
		*d. Take the natural log of the variable	
		gen Child_Log_Earnings_2005 ///
			=  ///
			log(Child_Labor_Earnings_2005)
	
	
	*v. 2003
		*a. Create new variable for child's earnings in 2009 
		gen Child_Labor_Earnings_2003 ///
			= ER24116																/*** New variable containing labour earnings of HH head in 2003 ***/												

		*b. Set to missing if equal to 0
		replace	Child_Labor_Earnings_2003 = . ///
			if ///
			Child_Labor_Earnings_2003 ==0											/*** Replace with missing if equal to 0 ***/																				

		*c. Replace with missing if age below 30 in 2009 (hence 32 in 2011)
		replace	Child_Labor_Earnings_2003 = . ///
			if ///
			Child_Age_2011 < 38														/*** Replace with missing if age below 30 in 2003 (hence below 38 in 2011) ***/

		*d. Take the natural log of the variable
		gen Child_Log_Earnings_2003 ///
			= log(Child_Labor_Earnings_2003)

	*vi. Create a 5-year average
		*a. Number of observations
		egen Child_Labor_Earnings_5year_N = rownonmiss(Child_Labor_Earnings_*)

		*b. 5-year average of earnings
		egen Child_Labor_Earnings_5year = rowmean(Child_Labor_Earnings_*)


*D. Create variables capturing children's reports of father occupation
*Use reports from 2001 and before
*Why? All codes based upon 1970 occupational classification ....
*... Same classification as we have for parents reports....
*IMPORTANT NOTE: THE 1970 AT THE END OF THE VARIABLE REFERS TO THE FACT
*THE 1970 CENSUS CODE IS USED IN THE CODING.
*IT DOESNT'T CORRESPOND TO THE YEAR OF FATHER'S JOB

	*i. Create local macro containing children's reports of father's occupation
	local father_occ ///															/*** Create local macro to loop over... ***/
		ER19874 ///																	/*** 2001 ***/
		ER15813 ///																	/*** 1999 ***/ 
		ER12161 																	/*** 1997 ***/																

	*ii. Create new variable for children's report
	gen Child_Report_Father_Occ_1970 = .											/*** Create new variable. Set to missing by default ***/

	*iii. Replace the values in the variable
	foreach i of local father_occ {													/*** Loop over macro above... ***/
		replace Child_Report_Father_Occ_1970 = `i' ///								/*** Replace variable with value for father occupation in given year ***/
			if ///																	/*** if... ***/
			Child_Report_Father_Occ_1970 ==.   ///									/*** It currently has no value ... ***/
			& `i' <990 ///															/* The variable in the loop (`i') does not have a missing value */
			& `i' >0																/*** and i has a value greater than 0 and less than 990 ***/
		}																			/* CLOSE THE LOOP */

	*iv. Look at the distribition of the newly created variable
	tab Child_Report_Father_Occ_1970												/* Look at the newly created variable */												/*** Look at the newly created variable ***/


*E. Develop program to create a collapased version of father occupation
*Note: Based upon 1970 code: www.usa.ipums.org/usa/volii/97occup.shtmi
* `1' = Value for new variable to take
* `2' = Value occupation code should be greater than
* `3' = Value occupation code should be less than

	*i. Create new variable. Set to missing by default.
	gen Child_Report_Father_Class_1970 = .

	*ii. Drop any program already named 'code_class'
	capture ///																		/* Ignore if no program already with this name */
		program drop ///															/* Drop the following program if it exists */
		code_class
		
	*iii. Create the program 
	program code_class																/* Name of the new program to be created */
		replace Child_Report_Father_Class_1970 = `1' ///							/* `1' = Value to be place into the variable */
			if ///
			Child_Report_Father_Occ_1970 > `2' ///									/* Only replace if child report of (3 digit) father occ > `2' */ 
			& Child_Report_Father_Occ_1970 < `3'									/* .... and is also less than value given in `3' */
	end

	*iv. Run the program to replace values
		*a. Professional, Technical, and Kindred Workers
		code_class  1  0  196
		
		*b. Managers and Administrators, except Farm
		code_class  2  200  248

		*c.	Sales Workers	
		code_class  3  259  286

		*d.	Clerical and Kindred Workers	
		code_class  4  300  396

		*e.	Craftsman and Kindred Workers	
		code_class  5  400  601

		*f.	Operatives, except Transport	
		code_class  6  600  696

		*g.	Transport Equipment Operatives	
		code_class  7  700  716

		*h.	Laborers, except Farm	
		code_class  8  739  786

		*i. Farmers and Farm Managers
		code_class  9  800 825

		*j.	Service Workers, except Private Household	
		code_class  10  900  985

		*k. Don't know / other / missing etc
		code_class  99  -1  1

		*l. Don't know / other / missing etc
		code_class  99  986  1001

	*v. Define variable label	
		label define ///
			Child_Report_Father_Class_1970 ///
				1 "Professional / technical" ///
				2 "Managers and administrators" ///
				3 "Sales workers" ///
				4 "Clerical" ///
				5 "Craftsmen" ///
				6 "Operatives" ///
				7 "Transport equipment" ///
				8 "laborers" ///
				9 "Farmers" ///
				10 "Service workers" ///
				99 "Unknown"
	
	*vi. Assign labels to the variable
	label values ///
		Child_Report_Father_Class_1970 ///
		Child_Report_Father_Class_1970

	*vii. Look at the distribution of the newly derived variable	
	tab Child_Report_Father_Class_1970	

	*viii. Cross-tab between the 3 digit and 1 one digit occupation variables
	tab ///
		Child_Report_Father_Occ_1970 ///
		Child_Report_Father_Class_1970

		
*F. Create variables capturing father industry
*Don't want to go any earlier (e.g. ER19960) or later - as census codes used changed
	*i. Create a local macro with the variables for father industry included
	local father_ind ///
		ER19960	///																	/*** 2001 ***/ 
		ER15814 ///																	/*** 1999 ***/
		ER12153    																	/*** 1997 ***/

	*ii. Create new variable for child's report	
	gen Child_Report_Father_Ind = .	

	*iii. Replace the values in the variable
	foreach i of local father_ind {													/* Looping over the local macro above... */
		replace Child_Report_Father_Ind = `i' ///									/* Replace child's report with value in the variable `i' */
		if ///
		Child_Report_Father_Ind ==.   ///											/* Only if it is missing the information... */
		& `i' <990 & `i' >0															/* ...And takes a 'logical' value */
		}																			/* CLOSE THE FORVALUES LOOP */

	*iv. Take a look at the distribution of the variable
	tab Child_Report_Father_Ind														/* Look at the distribution of the 3 digit variable */
		
*F. New collapsed version of the industry code
*Note see PSID documentation for what codes mean
*`1' = New code for collapsed version
*`2' = Minimum 3 digit value for the broad group
*`3' = Maximum 3 digit value for the broad group
	
	*i. Create new variable. Set to missing by default.
	gen Child_Report_INDUSTRY = .

	*ii. Drop any program that already has this name
	capture ///																		/* Don't run the command if the program does not already exist */
		program drop ///															/* Drop the following program if it already exists... */
		industry_coding
	
	*iii. Create a new program as follows
	program industry_coding															/* Name of new program */
		replace Child_Report_INDUSTRY	= `1' ///									/* `1' = New one digit version of industry */
			if ///
			Child_Report_Father_Ind > `2' ///										/* `2' = If the 3 digit industry code is above this value.... */
			& ///
			Child_Report_Father_Ind < `3' 											/* `3' = ....If the 3 digit industry code is below this value */
	end																				/* END THE PROGRAM */ 

	*iv. Run the programme to recode for the following industries
		*a. Agriculture
		industry_coding  1  16  29													/*** Agriculture ***/
		
		*b. Mining
		industry_coding  2 	46  58													/*** Mining ***/

		*c. Construction
		industry_coding  3  66  78													/*** Construction ***/

		*d. Manufacturing
		industry_coding  4  106  399												/*** Manufacturing ***/

		*e. Transportation
		industry_coding  5  406  480												/*** Transportation and communications ***/
		
		*f. Wholesale & retail
		industry_coding  6  506  699												/*** Wholesale + retail ***/

		*g. Finance
		industry_coding  7  706  719												/*** Finance ***/

		*h. Business services
		industry_coding  8  726  760												/*** Business services ***/

		*i. Personal services
		industry_coding  9  768  799												/*** Personal services ***/

		*j. Entertainment and recreation
		industry_coding  10  806  810												/*** Entertainment + recreation ***/

		*k. Professional services
		industry_coding  11  827  898												/*** Professional services ***/

		*l. Public administration
		industry_coding  12  906  938												/*** Public admin ***/

		*m. Not available
		industry_coding  13  950  1100												/*** NA ***/

		*n. Not available
		industry_coding  13  -1  1													/*** NA ***/
		
		
	*v. Define variable label
	label define ///
		Child_Report_INDUSTRY ///
		1 "Agriculture" ///
		2 "Mining" ///
		3 "Construction" ///
		4 "Manufacturing" ///
		5 "Transport and communication" ///
		6 "Wholesale and retail" ///
		7 "Finance" ///
		8 "Business services" ///
		9 "Personal services" ///
		10 "Entertainment" ///
		11 "Professional services" ///
		12 "Public administration" ///
		13 "Unknown" 
		
		
	*vi. Assign labels to variable
	label values ///
		Child_Report_INDUSTRY Child_Report_INDUSTRY		

	*vii. Check the distribution of the variable
	tab Child_Report_INDUSTRY														/*** Look at the distribution of the variable ***/
		
*G. Create variables capturing father education
	*i. Create local macro with father education variables in
	local father_ed ///																/*** Local macro capturing father education variable ***/
		ER51869 ///
		ER46508 ///
		ER40531 ///
		ER27356 ///
		ER23392 
			
	*ii. Create new variable. Set to missing by default
	gen Child_Report_Father_Ed = .													/*** Set child report of father ed to missing ***/

	*iii. Replace values in the variable
	foreach i of local father_ed {													/*** Loop over variables above ***/
		replace Child_Report_Father_Ed = `i' ///									/*** Replace child report with value in i... ***/
			if ///
			Child_Report_Father_Ed ==.   ///										/*** If its not already been filled in ****/
			& `i' <10 & `i' >0														/*** ... and is not an illogical value ***/
	}																				/* Close the loop */
		
	*iv. Local macro containing whether father educated outside the US variables
	local father_ed_non_us ///														/*** Whether father obtained education in the US ***/
		ER51868 ///																	/*** Create new category for this... ***/
		ER46507 ///
		ER40530 ///
		ER27355 
	 		
	*v. Replace children's reports of fathers ed for those where father educated outside the US
	foreach i of local father_ed_non_us {											/*** Looping over variables in macro above ***/								
		replace Child_Report_Father_Ed = 10 ///										/*** Set child report equal to 10 ... ***/
			if ///
			`i' == 2  ///															/*** If father educated outside the US... ***/
			& Child_Report_Father_Ed == .											/*** ...and don't have anything else in the variable ***/

		replace Child_Report_Father_Ed = 0 ///										/*** Replace child report as 0 (no education)... ***/
			if `i' == 5  ///														/*** ... if variable in macro equals 5 (reported no education) ***/
			& Child_Report_Father_Ed == .											/*** ... and don't have anything else in the variable ***/
	}																				/* CLOSE THE LOOP */

	*vi. Tabulate the variable
	tab Child_Report_Father_Ed 														/*** Look at the distribution of the variable ***/	

	
	*vii. Create new variable. This is to make sure the categories for children's
	*reports if their fathers' education is consistent with the categories used 
	*for fathers' reports of their own education 
		*a. Create new variable. Set to missing by default.
		gen Child_Report_Father_Ed_NEW = .											/*** Create new variale ***/
		
		*b. No education (and born outside US)
		replace Child_Report_Father_Ed_NEW = 0 if Child_Report_Father_Ed == 0

		*c. Highest schooling grades 0 to 5
		replace Child_Report_Father_Ed_NEW = 1 ///
			if ///
				Child_Report_Father_Ed == 1

		*d. Highest schooling grades 6 - 8		
		replace Child_Report_Father_Ed_NEW = 2 ///
			if ///
				Child_Report_Father_Ed == 2
		
		*e. Highest schooling grades 9 - 11
		replace Child_Report_Father_Ed_NEW = 3 ///
			if ///
			Child_Report_Father_Ed == 3
			
		*f. Highest schooling grade 12 (completed high school)
		replace Child_Report_Father_Ed_NEW = 4 ///
			if /// 
			Child_Report_Father_Ed == 4

		*g. 12 grades plus 'some college' or 'some non-academic training' or
		*  'associates degree' 
		replace Child_Report_Father_Ed_NEW = 5 ///
			if ///
			(Child_Report_Father_Ed == 5 /// 										/* 12 grades plus nonacademic training */
			| ///
			Child_Report_Father_Ed == 6) 											/* Some college, no degree; Associate's degree */
			
		*h. College degree		
		replace Child_Report_Father_Ed_NEW = 6 ///
			if ///
			Child_Report_Father_Ed == 7
		
		*i. Advanced degree
		replace Child_Report_Father_Ed_NEW = 7 ///
			if ///
			Child_Report_Father_Ed == 8

		*j. Father educated outside of the US
		replace Child_Report_Father_Ed_NEW = 10 ///
			if ///
			Child_Report_Father_Ed == 10											/* Fatehr educated outside of the US */ 
		
	*viii. Look at cross-tab between original variable and the re-categorised
	*      variable
	tab ///
		Child_Report_Father_Ed_NEW Child_Report_Father_Ed							/*** x-tab to make sure it all looks ok... ***/

	*ix. Drop the original variable
	drop Child_Report_Father_Ed														/*** Drop anything that already has this name... ***/

	*x. Rename the newly created variable
	rename ///
		Child_Report_Father_Ed_NEW Child_Report_Father_Ed							/*** Rename the newly created variable as the old variable ***/
		
	*xi. Define variable labels	
	label define ///																/*** Create labels ***/
		Child_Report_Father_Ed  ///	
           0 "No Education" ///
           1 "Grades 1 - 5" ///
           2 "Grades 6 - 8" ///
           3 "Grades 9 - 11" ///
           4 "Grade 12 (HS completion)" ///
           5 "Some college / associates degree" ///
           6 "College degree" ///
           7 "Advanced college degree"
	
	*xii. Assign labels to the variable
	label values ///																/*** Apply labels to the variable ***/
		Child_Report_Father_Ed Child_Report_Father_Ed							
	
	*xiii. Look at the distribution of the variable
	tab Child_Report_Father_Ed 														/*** Look at the distribution of the variable ***/
	

*H. Sons occupation and social class in 2011	
	*i. Create new variable for occupation in 2011 (3 digit code)
	gen Child_Occ_2011 = ER47479													/*** New variable for child's occupation in 2011 ***/

	*ii. Create new variable for sons social class. Set to missing by default
	gen Child_Class_2011 = .														/*** New variable for child's class in 2011 ***/

	*iii. Drop any program with this name
	capture ///
		program drop ///
		class_replace	
		
	*iv. Create new program to replace values in the variable	
	program class_replace															/* Create new program with this name */
		replace Child_Class_2011 = `1' ///											/*** Replace class with value in `1' ***/
			if ///
			ER47479 > `2' ///														/*** If occupation greater than `2'... ****/
			& ///
			ER47479 < `3'															/*** ... and occupation less than `3' ***/
	end																				/* END THE PROGRAM */

	*v. Replace the values for different occupation groups
		*a. Management Occupations
		class_replace 1 0 44

		*b. Business Operations Specialists
		class_replace 2 49 74

		*c. Financial Specialists
		class_replace 3 79 96

		*d. Computer and Mathematical Occupations
		class_replace 4 99 125

		*e. Architecture and Engineering Occupations
		class_replace 5 129 157

		*f. Life, Physical, and Social Science Occupations
		class_replace 6 159 197

		*g. Community and Social Services Occupations
		class_replace 7 199 207

		*h. Legal Occupations
		class_replace 8 209 216

		*i. Education, Training, and Library Occupations
		class_replace 9 219 256

		*j. Arts, Design, Entertainment, Sports, and Media Occupations
		class_replace 10 259 297

		*k. Healthcare Practitioners and Technical Occupations
		class_replace 11 299 355

		*l. Healthcare Support Occupations
		class_replace 12 359 366

		*m. Protective Service Occupations
		class_replace 13 369 396

		*n. Food Preparation and Serving Occupations
		class_replace 14 399 417

		*o. Building and Grounds Cleaning and Maintenance Occupations
		class_replace 15 419 426

		*p. Personal Care and Service Occupations
		class_replace 16 429 466

		*q. Sales Occupations
		class_replace 17 469 497

		*r. Office and Administrative Support Occupations
		class_replace 18 499 594

		*s. Farming, Fishing, and Forestry Occupations
		class_replace 19 599 614

		*t. Construction Trades
		class_replace 20 619 677

		*u. Extraction Workers
		class_replace 21 679 695

		*v. Installation, Maintenance, and Repair Workers
		class_replace 22 699 763

		*w. Production Occupations
		class_replace 23 769 897

		*x. Transportation and Material Moving Occupations
		class_replace 24 899 976

		*y. Military
		class_replace 25 979 984

		*z. Missing
		class_replace 100 985 1001
		
	*vi. Define value labels
	label define ///
		Child_Class_2011 ///	
		1 "Management Occupations" ///
		2 "Business Operations Specialists" ///
		3 "Financial Specialists" ///
		4 "Computer and Mathematical Occupations" ///
		5 "Architecture and Engineering Occupations" ///
		6 "Life, Physical, and Social Science Occupations" ///
		7 "Community and Social Services Occupations" ///
		8 "Legal Occupations" ///
		9 "Education, Training, and Library Occupations" ///
		10 "Arts, Design, Entertainment, Sports, and Media Occupations" ///
		11 "Healthcare Practitioners and Technical Occupations" ///
		12 "Healthcare Support Occupations" ///
		13 "Protective Service Occupations" ///
		14 "Food Preparation and Serving Occupations" ///
		15 "Building and Grounds Cleaning and Maintenance Occupations" ///
		16 "Personal Care and Service Occupations" ///
		17 "Sales Occupations" ///
		18 "Office and Administrative Support Occupations" ///
		19 "Farming, Fishing, and Forestry Occupations" ///
		20 "Construction Trades" ///
		21 "Extraction Workers" ///
		22 "Installation, Maintenance, and Repair Workers" ///
		23 "Production Occupations" ///
		24 "Transportation and Material Moving Occupations" ///
		25 "Military Specific Occupations" ///
		26 "Missing" 

	*vii. Assign labels to variable
	label values ///
		Child_Class_2011 /// 
		Child_Class_2011		

*I. Sons INDUSTRY in 2011	
	*i. Create new variable with 2 digit industry code in
	gen Child_Industry_2011 = ER47480												/*** New variable for child's industry in 2011 ***/

	*ii. Create new variable for 1 digit industry code
	gen Child_IND_2011 = .

	*iii. Drop any program with this name already
	capture ///
		program drop ///
			ind_replace													

	*iv. Create new program to replace the content of the variable
	program ind_replace
		replace Child_IND_2011 = `1' ///											/*** Replace class with value in `1' ***/
			if ///
			Child_Industry_2011 > `2' ///											/*** If occupation greater than `2'... ****/
			& ///
			Child_Industry_2011 < `3'												/*** ... and occupation less than `3' ***/
	end																				/* END OF PROGRAM */

	*v. Use program to replace the content of the 1 digit industry variable
		*a. Agriculture, Forestry, Fishing, and Hunting
		ind_replace 1 16 30
		
		*b. Mining
		ind_replace 2 36 50

		*c. Utilities
		ind_replace 3 56 70

		*d. Construction
		ind_replace 4 76 78

		*e. Manufacturing
		ind_replace 5 106 400

		*f. Wholesale Trade
		ind_replace 6 406 460

		*g. Retail Trade
		ind_replace 7 466 580

		*h. Transportation and Warehousing
		ind_replace 8 606 640

		*i. Information
		ind_replace 9 646 680

		*j. Finance and Insurance
		ind_replace 10 686 700

		*k. Real Estate and Rental and Leasing
		ind_replace 11 706 720

		*l. Professional, Scientific, and Technical Services
		ind_replace 12 726 750

		*m. Management, Administrative and Support, and Waste Management 
		ind_replace 13 756 780

		*n. Educational Services
		ind_replace 14 785 790

		*o. Health Care and Social Assistance
		ind_replace 15 796 848

		*p. Arts, Entertainment, and Recreation
		ind_replace 16 855 860

		*q. Accommodations and Food Services
		ind_replace 17 865 870

		*r. Other Services (Except Public Administration
		ind_replace 18 876 930

		*s. Public Administration and Active Duty Military
		ind_replace 19 936 999

		*t. missing
		ind_replace 100 998 10001
		
	*vi. Define variable labels	
	label define ///
		Child_IND_2011 ///	
		1 "Agriculture, Forestry, Fishing, and Hunting" ///
		2 "Mining" ///
		3 "Utilities" ///
		4 "Construction" ///
		5 "Manufacturing" ///
		6 "Wholesale Trade" ///
		7 "Retail Trade" ///
		8 "Transportation and Warehousing" ///
		9 "Information" ///
		10 "Finance and Insurance" ///
		11 "Real Estate and Rental and Leasing" ///
		12 "Professional, Scientific, and Technical Services" ///
		13 "Management, Administrative and Support, and Waste Management Services" ///
		14 "Educational Services" ///
		15 "Health Care and Social Assistance" ///
		16 "Arts, Entertainment, and Recreation" ///
		17 "Accommodations and Food Services" ///
		18 "Other Services (Except Public Administration)" ///
		19 "Public Administration and Active Duty Military" ///
		100 "MISSING" 

	*vii. Assign labels to the variable
	label values ///
		Child_IND_2011 ///
		Child_IND_2011

*J. Sons EDUCATION in 2011	
	*i. Create new variable for child's education in 2011
	gen Child_Ed_2011 = . 

	*ii. Replace the content of the child education variable in 2011
		*a. Completed no schooling
		replace Child_Ed_2011 = 0 ///
			if ER52405 == 0															/*** = 0 grades of schooling ***/
		
		*b. Completed 1 to 5 grades of schooling
		replace Child_Ed_2011 = 1 ///
			if ///
				ER52405 > 0 & ER52405 < 6											/*** = 1 to 5 grades of schooling ***/

		*c. Completed 6 to 8 grades of schooling
		replace Child_Ed_2011 = 2 ///
			if ///
				ER52405 > 5 & ER52405 < 9											/*** = 6 to 8 grades of schooling ***/

		*d. Completed 9 to 11 grades of schooling
		replace Child_Ed_2011 = 3  ///
			if ///
				ER52405 > 8 & ER52405 < 12											/*** = 9 to 11 grades of schooling ***/

		*e. Completed 12 grades of schooling
		replace Child_Ed_2011 = 4 ///
			if ///
				ER52405 == 12														/*** = 12 grades of schooling ***/

		*f. Completed 13 to 15 grades of schooling
		replace Child_Ed_2011 = 5 ///
			if /// 
				ER52405 > 12 & ER52405 < 16											/*** Set equal to 13, 14 or 15 years of schooling ***/

		*g. Completed 16 grades of schooling
		replace Child_Ed_2011 = 6 ///
			if ///
			ER52405 == 16															/*** Set equal to 16 years of schooling ***/

		*h. Completed 17 grades of schooling
		replace Child_Ed_2011 = 7 ///
			if ///
			ER52405 == 17															/*** Set equal to 17 years of schooling ***/	

		*i. Missing
		replace Child_Ed_2011 = 100 ///
			if ///
			ER52405 > 17	
	
	*iii. Create variable labels
	label define ///
		Child_Ed_2011 ///
		0 "0 grades of schooling" ///
		1 "1 to 5 grades of schooling" ///
		2 "6 to 8 grades of schooling" ///
		3 "9 to 11 grades of schooling" ///
		4 "12 grades of schooling" ///
		5 "13 to 15 grades of schooling" ///
		6 "16 grades of schooling" ///
		7 "17 grades of schooling" ///
	
	*iv. Assign labels to variable
	label values ///
		Child_Ed_2011 ///
		Child_Ed_2011
	
*K. RACE	
	*i. Create new Race variable
	gen Child_Race2 = ER51904		

	*ii. Set to missing if value greater than 5
	replace Child_Race2 = 5 ///
		if /// 
		Child_Race>5																/*** Set equal to 5 if missing ***/
	
	*iii. Define value labels
	label define ///
		Child_Race2 ///
		1 "White" ///
		2 "Black" ///
		3 "American Indian" ///
		4 "Asian" ///
		5 "Other / missing"
		
	*iv. Assign labels to variable
	label values ///
		Child_Race2 Child_Race2
		
	*v. Look at distribution of the new variable
	tab Child_Race2
	
*G. Create variables to merge these offspring with their fathers					/*** NOTE: PN short for "person number" ***/
	*i. Drop if we don't know 1968 observation number of their father
	drop if ER32016 == 0 															/*** Drop if don't know who father is ***/

	*ii. Drop if we don't know 1968 observation number of their father
	drop if ER32017 == 0 															/*** Drop if don't know who father is  ***/
	
	*iii. Drop if no data from father is available
	drop if ER32017 > 899															/*** Drop if no father data available ***/
			
	*iv. Generate ID of FATHER to merge upon
	gen MERGE_ID_1968 = ER32016 													/*** Set merge variable 1 equal to Father's 1968 ID ***/													

	*v. Generate person number of FATEHR to merge upon
	gen MERGE_PN_1968 = ER32017														/*** Set merge variable 2 equal to Father's 1968 Person number ***/ 

*H. Keep only offspring variables going to use in analysis
	*i. New variable. Person's family number in 1968.
	gen Child_ER30001 = ER30001

	*ii. New variable. Person number in 1968
	gen Child_ER30002 = ER30002

	*iii. Keep only variables I will use in the analysis
	keep ///
		Child_ER30001 Child_ER30002 ///												/*** Keep their own identifiers ***/
		Child_Gender - MERGE_PN_1968 ///											/*** Keep all variables just created above ***/
		Child_Occ_2011 Child_Race2 Child_Ed_2011	///
		Child_Labor_Earnings_5year Child_Labor_Earnings_5year_N ///
		Child_IND_2011 Child_Class_2011
	
*I. Sort and save the recoded dataset	
	*i. Look at the number of observations
	tab Child_Gender																/*** n = 1,630 ***/

	*ii. Sort by variables to merge upon (merge with FATHERS data)
	sort ///
		MERGE_ID_1968 ///															/* Sort by father's ID in 1968... ***/
		MERGE_PN_1968																/* ... and by the person number in 1968 */

	*iii. Save the recoded offspring data
	save ///
		C:\Users\john\Pictures\STATA_VERSION_9\PSID\DATA1\Offspring_2011 ///
		, replace																	/*** Replace the data if it already exists ***/

