********************************************************************************
***************	META-ANALYSIS: basic data cleaning and preparation	************
********************************************************************************
* CC, 09/23/2016

use 	 "${path}\${data}\Metaanalyse_${c_date}.dta", replace
********************************************************************************
///	1. sort papers
sort 	 PaperNo

///	2. Change variable types from string to numeric where necessary
gen 	 es=real(ESSurvey)
lab var  es "Effect size (survey)"

gen 	 es_b=real(ESBenchmark)
lab var  es_b "Effect size (benchmark)"
format es_b %9.1f

gen 	 rr=real(RR)
lab var  rr "Response Rate Survey"

gen 	 ngroup_s=real(NGroupSurvey)
lab var  ngroup_s "Survey: number of cases per subgroup"

gen 	 ngroup_b=real(NGroupBenchmark)
lab var  ngroup_b "Benchmark: number of cases per subgroup"

foreach  X of varlist PaperID ///
					 Author1-Author3 ///
					 BusinessSurvey ///
					 WeightedESSurvey ///
					 DocType ///
					 Journal ///
					 Indicator-BenchmarkQuality ///
					 Mode ///
					 TargetPop-SampleType ///
					 Variable {
encode   `X', gen(`X'_e)

}

///	3. drop useless vars
keep 	es ///
		es_b ///
		rr ///
		PaperID_e ///
		Author1_e ///
		DocType_e ///
		Indicator_e ///
		BenchmarkQuality_e ///
		Mode_e ///
		TargetPop_e ///
		SurveyDesign_e ///
		SampleType_e ///
		Variable_e ///
		Characteristic ///
		ngroup_s ///
		ngroup_b ///
		YearofPublication ///
		TotalNSampleSizeSurvey ///
		TotalNRespondentsBenchmark ///
		CISurvey ///
		StudyID ///
		NoAuxVars-TypeLinkedData ///
		ESID ///
		Author1_e ///
		BenchmarkQuality_e ///
		SurveyDesign_e ///
		SampleType_e ///
		Variable_e ///
		BusinessSurvey_e ///
		WeightedESSurvey_e

/// 4. rename vars
rename YearofPublication year
rename Author1_e author
rename DocType_e doctype 
rename Mode_e mode
rename TargetPop_e target
rename SurveyDesign_e design
rename SampleType_e sample
rename PaperID_e paper
rename CISurvey ci
rename TotalNSampleSizeSurvey n
rename TotalNRespondentsBenchmark N
rename Characteristic cat
rename StudyID id
rename BenchmarkQuality_e quality_b
rename Variable_e var
rename Indicator_e indicator
rename NoAuxVars auxvars
rename TypeFrame framevar
rename TypeFieldwork fieldworkvar
rename TypeSocDem socdemvar
rename TypeIntObs intobsvar
rename TypeLinkedData linkedvar
rename BusinessSurvey_e business
rename WeightedESSurvey_e weighted


/// 5. reformat vars if necessary
format 	 rr %9.1f
format 	 es %9.2f

/// 6. correct missing values
recode rr (999.0 888=.)
recode n (999=.)
recode N (888 999=.)

/// 7. generate variable: number of respondents (& fill up missing values)
gen resp=n *(rr/100)
format resp %9.0f
lab var resp "Number of respondents"
*It seems that n, N and resp are sometimes confused --> check that!

/// 8. recode document type 0/1
recode doctype (2=1) (else=0)
lab var doctype "type of publication"
lab def doctype 0 "other" 1 "journal article"
lab val doctype doctype

///	9. recode mode var 
///		9.1 ordinal mode var
recode 	 mode 	(31=1) ///
				(32 33=2) ///
				(34=3) ///
				(2/23 25/27=4) ///
				(1=5) ///
				(24 28 29 30=.) 
				//24: business --> find out which mode is applied!  
				//28: simulation --> find out which mode is applied! 
				//30:self-enumeration?

lab def  mode	 1 "Personal" ///
				 2 "Telephone" ///
				 3 "Web" ///
				 4 "Mixed mode" ///
				 5 "Mail"
lab val  mode mode

///		9.2 mode dummies --> useful for mixed mode vs single mode hypothesis
tab mode, gen(mode)

rename mode1 personal
lab def personal 0 "No" 1 "Yes"
lab val personal personal

rename mode2 telephone
lab def telephone 0 "No" 1 "Yes"
lab val telephone telephone

rename mode3 web
lab var web "mode==Web"
lab def web 0 "No" 1 "Yes"
lab val web web

rename mode4 mixed
lab def mixed 0 "No" 1 "Yes"
lab val mixed mixed

rename mode5 mail
lab def mail 0 "No" 1 "Yes"
lab val mail mail

* mode dummy: web vs. other single modes
drop web
gen web=.
replace web=1 if mode==3
replace web=0 if mode<=2 | mode==5
lab val web web

///	10. recode sample size: ordinal (for descriptives?)
egen n_cat = cut(n), group(3) // missings (91) müssen unbedingt aufgefüllt werden !!!
lab var n_cat "Sample size in three groups"
lab def n_cat 0 "Small" 1 "Medium" 2 "Large"
lab val n_cat n_cat

///	11. recode response rate: ordinal (for descriptives?)
egen rr_g = cut(rr), group(3)
lab var rr_g "Response rate in three groups" // missings (40) müssen unbedingt aufgefüllt werden !!!
lab def rr_g 0 "Low" 1 "Medium" 2 "High"
lab val rr_g rr_g

/// 12. recode target population: binary
recode target (5/17=0) (1/4 =1)
lab var target "target population"
lab def target 1 "general population" 0 "special population"
lab val target target

/// 13. fill missings in response rates
replace rr=n/N*100 if rr==.

/// 14. recode business surveys
recode business (888=.) (2 5=1) (3 4=0)
lab def business 0 "No" 1 "Yes"
lab val business business

/// 15. recode weight
recode weight (1=0) (2 3=1)
lab var weight "Nonresponse weight?"
lab def weight 0 "No" 1 "Yes"
lab val weight weight

recode sample (1 2=0) (3=1)
lab def sample1 0 "Nonprob" 1 "Prob"
lab val sample sample1

/// 16.	order variables in data set
order paper author year doctype design sample id target resp n N rr mode personal ///
	  telephone web mixed mail indicator business weight ESID es ci es_b quality_b var cat ///
	  auxvars framevar fieldworkvar socdemvar intobsvar linkedvar ngroup_s ngroup_b n_cat rr_g


/// 17. decode author variable
decode author, gen(author_h)
drop   author
rename author_h author

*drop if business==1
*drop if weight==1

