#----------------------------------------------------#
#  Name-based measures of neighborhood composition:  #
#        how telling are neighbors' names?           #
#                                                    #
#                  Hanno Kruse                       #
#                 Joerg Dollmann                     #
#                                                    #
#                    Analysis                        #
#          using CILS4EU W1 Data (v.1.2.0)           #
#          and   R (v.3.2.3)                         #
#                                                    #
#   1) Theoretical scenarios                         #
#   2) Derive error rates                            #
#   3) Ethnic mix in German neighborhoods            #
#   4) Bias induced to compositional measures        #
#----------------------------------------------------#

	# load packages (make sure that you have installed the packages before running the code, using 'install.packages()')
	require(foreign)
	require(ggplot2)
    require(grid)
    require(useful)
	require(MASS)
	require(reshape2)
	require(weights)
	vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y)

	# specify folder where datasets are stored
	data_folder   <-'xxx'
	# specify folder where results should be saved
	results_folder<-'xxx'
	# specify folder where r-files are stored
	rfiles_folder <-'xxx'

	# load additional functions
	setwd(paste(rfiles_folder))
	source("xtra_neighb_measure.R")	
	source("xtra_ethmix_namebased_sim.R")	

	# save figures as vector graph or not?
	vector<-TRUE

	# weighted or unweighted regression results? [in paper only weighted results reported]
	weighted<-TRUE

	
# ---------------------------------------------------------------------
# 1) Theoretical scenarios
# ---------------------------------------------------------------------	

	p_gen1_total<- c(rep(0,11))		# whats the proportion of 1st generation within each immigrant ethnic group? (natives=0) [option not used in paper]
	eth_low_ses <- c(rep(0,11))   	# which immigrant group is rather low-ses? (yes=1) [option not used in paper]
	steps		<- 5				# incremental steps for native proportion in neighborhoods
	N_neighb 	<- 200				# how many neighborhoods?
	N_agents 	<- 5000				# how many agents per neighborhood?
	movers 		<- NULL				# proportion of agents of an ethnic group in a neighborhood that moves into the direction as suggested by stylized fact [option not used in paper]

	### counterfactual scenarios
		p_eth_total <- c(50,rep(10,10))	# whats the overall distribution of agents over ethnic groups?
	    range       <- 50              	# how many %-pts is the native proportion in a neighborhood supposed to deviate from its overall mean value?
		p_correct 	<- c(rep(.5,21))	
   		region<-neighbs(p_eth_total,p_gen1_total,eth_low_ses,range,steps,N_neighb,N_agents,movers)
		apply(region,2,sum)/sum(region)
		data_sc1<-measure(region)

		p_eth_total <- c(50,rep(10,10))	# whats the overall distribution of agents over ethnic groups?
	    range       <- 50              	# how many %-pts is the native proportion in a neighborhood supposed to deviate from its overall mean value?
		p_correct 	<- c(1,rep(.5,20))	
   		region<-neighbs(p_eth_total,p_gen1_total,eth_low_ses,range,steps,N_neighb,N_agents,movers)
		apply(region,2,sum)/sum(region)
		data_sc2<-measure(region)

		p_eth_total <- c(50,rep(10,10))	# whats the overall distribution of agents over ethnic groups?
	    range       <- 50              	# how many %-pts is the native proportion in a neighborhood supposed to deviate from its overall mean value?
		p_correct 	<- c(rep(1,21))	
   		region<-neighbs(p_eth_total,p_gen1_total,eth_low_ses,range,steps,N_neighb,N_agents,movers)
		apply(region,2,sum)/sum(region)
		data_sc3<-measure(region)

		p_eth_total <- c(50,rep(10,10))	# whats the overall distribution of agents over ethnic groups?
	    range       <- 50              	# how many %-pts is the native proportion in a neighborhood supposed to deviate from its overall mean value?
		p_correct 	<- c(.5,rep(1,20))	
   		region<-neighbs(p_eth_total,p_gen1_total,eth_low_ses,range,steps,N_neighb,N_agents,movers)
		apply(region,2,sum)/sum(region)
		data_sc4<-measure(region)

		data_temp<-cbind(rbind(data_sc1,data_sc2,data_sc3,data_sc4),c(rep(1,nrow(data_sc1)),rep(2,nrow(data_sc2)),rep(3,nrow(data_sc3)),rep(4,nrow(data_sc4))))
		names(data_temp)[3]<-"type"

	### plot
		fig<-
	    ggplot(data=data_temp)+
	        #geom_point(fill="white",size=2,alpha=0.2,colour="black",pch=21) +
	        xlab("majority share in neighborhood \n(actual)") + 
	        ylab("majority share in neighborhood \n(name-based)") +
	        #coord_cartesian(ylim = c(0,.5)) +
	        geom_smooth(aes(x = imm_neighb_orig, y=imm_neighb_HS,lty=factor(type)),method="loess",size=2, colour="black", fullrange=F,span=1,se=F) +
	        scale_linetype_manual(name = "",values = c(1,2,3,4),labels = c("1" = expression(paste("scenario A (",p[italic(fp)], "=.5 ", p[italic(fn)], "=.5)",sep="")), 
	        	                                                           "2" = expression(paste("scenario C (",p[italic(fp)], "=.5 ", p[italic(fn)], "= 0)",sep="")),
	        	                                                           "3" = expression(paste("scenario B (",p[italic(fp)], "= 0 ", p[italic(fn)], "= 0)",sep="")),
	        	                                                           "4" = expression(paste("scenario D (",p[italic(fp)], "= 0 ", p[italic(fn)], "=.5)",sep=""))))+
	        theme_classic() +
	        guides(lty=guide_legend(ncol=2))+
	        theme(legend.position="bottom",legend.key.width=unit(2.4,"cm"),text = element_text(size=23),legend.key = element_blank() ) 
		setwd(paste(results_folder))
		if (vector)  {pdf(file="fig1.pdf", 
						  useDingbats=FALSE,
						  width=9, 
						  height=10, 
						  pointsize=12)}
		if (!vector) {png(filename="fig1.png", 
		    			  type="cairo",
		    			  units="in", 
		    			  width=9, 
		    			  height=10, 
		    			  pointsize=12, 
		    			  res=200)}		
		grid.newpage()
		pushViewport(viewport(layout = grid.layout(1,1)))
		print(fig, vp = vplayout(1, 1))
		dev.off()
		rm(fig)


# ---------------------------------------------------------------------
# 2) Derive error rates
# ---------------------------------------------------------------------

  	### Load cils4eu data
		setwd(paste(data_folder))
		data_cils<-read.dta("cils4eu_w1_ger.dta", convert.factors = FALSE) # Run Stata do-files first to create the dta file.
		dim(data_cils)
		data_cils<-data_cils[!is.na(data_cils$error),]
		dim(data_cils)


	### Descriptive table 1
		if ( weighted) {
			table<-matrix(NA,3,3)
			table[1:2,1]<-wpct(data_cils$HS_CODE,weight=data_cils$houwgt)
			table[1,2]  <-weighted.mean(data_cils$ethgroups[data_cils$HS_CODE==0]==0,w=data_cils$houwgt[data_cils$HS_CODE==0],na.rm=T)
			table[2,2]  <-weighted.mean(data_cils$ethgroups[data_cils$HS_CODE!=0]!=0,w=data_cils$houwgt[data_cils$HS_CODE!=0],na.rm=T)
			table[1:2,3]<-table(data_cils$HS_CODE)
			table[3,1]  <-sum(wpct(data_cils$HS_CODE,weight=data_cils$houwgt))
			table[3,2]  <-weighted.mean(data_cils$error==0,w=data_cils$houwgt,na.rm=T)
			table[3,3]  <-length(data_cils$HS_CODE)
		    setwd(paste(results_folder))   
		    write.table(table,file="tab1.csv",sep=",",row.names=F)
		    rm(table)
		}
		if (!weighted) {
			table<-matrix(NA,3,3)
			table[1:2,1]<-table(data_cils$HS_CODE)/length(data_cils$HS_CODE)
			table[1,2]  <-mean(data_cils$ethgroups[data_cils$HS_CODE==0]==0,na.rm=T)
			table[2,2]  <-mean(data_cils$ethgroups[data_cils$HS_CODE!=0]!=0,na.rm=T)
			table[1:2,3]<-table(data_cils$HS_CODE)
			table[3,1]  <-sum(table(data_cils$HS_CODE)/length(data_cils$HS_CODE))
			table[3,2]  <-mean(data_cils$error==0,na.rm=T)
			table[3,3]  <-length(data_cils$HS_CODE)
		    setwd(paste(results_folder))   
		    write.table(table,file="tab1.csv",sep=",",row.names=F)
		    rm(table)
		}

	### Descriptive table 2
		table<-matrix(NA,9,4)
		if ( weighted) {
			table[1:7,1]<-wpct(data_cils$ethgroups,weight=data_cils$houwgt)
			for (i in c(1:7)) {
				table[i,2]<-weighted.mean(data_cils$immgen[data_cils$ethgroups==(i-1)]==2,w=data_cils$houwgt[data_cils$ethgroups==(i-1)],na.rm=T)
				table[i,3]<-weighted.mean(data_cils$error[data_cils$ethgroups==(i-1)]    ,w=data_cils$houwgt[data_cils$ethgroups==(i-1)],na.rm=T)
			}
			table[1:7,4]<-table(data_cils$ethgroups)
			table[8,1]<-weighted.mean(data_cils$ethgroups!=0,w=data_cils$houwgt,na.rm=T)
			table[8,2]<-weighted.mean(data_cils$immgen[data_cils$ethgroups!=0]==2,w=data_cils$houwgt[data_cils$ethgroups!=0],na.rm=T)
			table[8,3]<-weighted.mean(data_cils$error[ data_cils$ethgroups!=0]   ,w=data_cils$houwgt[data_cils$ethgroups!=0],na.rm=T)
			table[8,4]<-sum(data_cils$ethgroups!=0)
			table[9,1]<-1
			table[9,2]<-weighted.mean(data_cils$immgen==2,w=data_cils$houwgt,na.rm=T)
			table[9,3]<-weighted.mean(data_cils$error    ,w=data_cils$houwgt,na.rm=T)
			table[9,4]<-length(data_cils$ethgroups)
		    setwd(paste(results_folder))   
		    write.table(table,file="tab2.csv",sep=",",row.names=F)
		    rm(table)
		}
		if (!weighted) {
			table[1:7,1]<-table(data_cils$ethgroups)/length(data_cils$ethgroups)
			for (i in c(1:7)) {
				table[i,2]<-mean(data_cils$immgen[data_cils$ethgroups==(i-1)]==2,na.rm=T)
				table[i,3]<-mean(data_cils$error[data_cils$ethgroups==(i-1)],na.rm=T)
			}
			table[1:7,4]<-table(data_cils$ethgroups)
			table[8,1]<-sum(data_cils$ethgroups!=0)/length(data_cils$ethgroups)
			table[8,2]<-mean(data_cils$immgen[data_cils$ethgroups!=0]==2,na.rm=T)
			table[8,3]<-mean(data_cils$error[data_cils$ethgroups!=0],na.rm=T)
			table[8,4]<-sum(data_cils$ethgroups!=0)
			table[9,1]<-1
			table[9,2]<-mean(data_cils$immgen==2,na.rm=T)
			table[9,3]<-mean(data_cils$error,na.rm=T)
			table[9,4]<-length(data_cils$ethgroups)
		    setwd(paste(results_folder))   
		    write.table(table,file="tab2.csv",sep=",",row.names=F)
		    rm(table)
		}

    ### Logistic models
    	if ( weighted) {
			model_nat<-glm(error~ ethgroups!=0                    ,data=data_cils                         ,weights=houwgt, family = "binomial")
			model_mig<-glm(error~ factor(ethgroups)+factor(immgen),data=data_cils[data_cils$ethgroups!=0,],weights=houwgt, family = "binomial")
		}
    	if (!weighted) {
			model_nat<-glm(error~ ethgroups!=0                    ,data=data_cils                                        , family = "binomial")
			model_mig<-glm(error~ factor(ethgroups)+factor(immgen),data=data_cils[data_cils$ethgroups!=0,]               , family = "binomial")
		}

    ### Save to file
		out_nat<-summary(model_nat)$coef
		out_nat<-rbind(out_nat,
			           c(summary(model_nat)$aic,rep(NA,3)),
			           c(model_nat$df.null+1   ,rep(NA,3)))
		out_mig<-summary(model_mig)$coef
		out_mig<-rbind(out_mig,
			           c(summary(model_mig)$aic,rep(NA,3)),
			           c(model_mig$df.null+1   ,rep(NA,3)))
        setwd(paste(results_folder))
       	if ( weighted) {
		    write.table(out_nat,file="tabA1.csv",sep=",",row.names=F)
		    write.table(out_mig,file="tabA2.csv",sep=",",row.names=F)
 		}
        if (!weighted) {
		    write.table(out_nat,file="tabA1.csv",sep=",",row.names=F)
		    write.table(out_mig,file="tabA2.csv",sep=",",row.names=F)
 		}
 		rm(out_nat,out_mig)

    ### Create predicted probabilities
		### output
			c_nat<-model_nat$coefficients
			c_mig<-model_mig$coefficients
            c_mig<-c_mig[!is.na(c_mig)]
			se_nat<-coef(summary(model_nat))[, 2]
			se_mig<-coef(summary(model_mig))[, 2]
			cov_nat<-vcov(model_nat)
			cov_mig<-vcov(model_mig)
		### simulate probabilities
		    nsim <- 1000
		    set.seed(1234)
		    S <- mvrnorm(nsim, mu = c_nat, Sigma = cov_nat) 
		    ### for ols
			    #S<-c()
			    #for (x in 1:nsim) {  
			    #  S <- rbind(S,rnorm(length(c_mig), mean = c_mig, sd = se_mig )) 
			    #}
		    X_c <- rbind( c(1,0),
		                  c(1,1) ) 
		    theta_c <- S %*% t(X_c)
		    phi     <- 1/(1+exp(-theta_c))
		    eff_nat <- t(apply(phi,2,quantile,c(0.025,0.5,.975)))
		    eff_nat[,2]<-t(apply(phi,2,mean,na.rm=T))

		    phi_nat <- apply(phi,2,mean)[1]

		    S <- mvrnorm(nsim, mu = c_mig, Sigma = cov_mig) 
		    X_c <- rbind( c(1,rep(0,5),1),
		    		      c(1,rep(0,5),0),
		                  c(1,1,rep(0,4),1),
		                  c(1,1,rep(0,4),0),         				
		                  c(1,0,1,rep(0,3),1),
		                  c(1,0,1,rep(0,3),0),
		                  c(1,0,0,1,rep(0,2),1),
		                  c(1,0,0,1,rep(0,2),0),
		                  c(1,0,0,0,1,0,1),
		                  c(1,0,0,0,1,0,0),
		                  c(1,0,0,0,0,1,1),
		                  c(1,0,0,0,0,1,0)
                  		) 
		    theta_c <- S %*% t(X_c)
		    phi     <- 1/(1+exp(-theta_c))
		    eff_mig <- t(apply(phi,2,quantile,c(0.025,0.5,.975)))
		    eff_mig[,2]<-t(apply(phi,2,mean,na.rm=T))

		    eff<-rbind(eff_nat,eff_mig)
			p_correct<-eff[,2]
			phi_mig<-apply(phi,2,mean)

			phi_1stgen<-c(phi_nat,phi_mig[seq(1,12,2)])
			phi_2ndgen<-c(phi_nat,phi_mig[seq(2,12,2)])
			rm(phi,phi_nat,phi_mig)

    ### plot probabilities
		eth<-c("Majority","Minority","Turkish","Turkish","FSU","FSU","Polish","Polish","FYR","FYR","Other Western","Other Western","Other Non-Western","Other Non-Western")
		eth<- factor(eth, levels = c("Polish","FSU","Other Western","Other Non-Western","FYR","Turkish","Minority","Majority"))
		shape<-c("1","1",rep("2",12))
		data <- data.frame(eth=eth,med=eff[,2],low=eff[,1],hi=eff[,3],gen=c("16","16",rep(c("23","22"),6)))
		fig <-
			ggplot(data, position = position_dodge(width = 1/2)) +
			geom_linerange(aes(x = eth, ymin = low,ymax = hi),lwd = .7) +			
			geom_point(aes(x = eth, y = med,shape=gen),size=5, fill = "WHITE") +
			coord_flip() +
			ylab("p(error)") +
  			xlab("") + 
			scale_shape_manual(values=c(16,22,23),name="",labels=c("16"="overall","22"="first generation \nimmigrants","23"="second generation \nimmigrants")) +
	        guides(shape=guide_legend(ncol=3,byrow=T))+
			theme_classic()+
			theme(legend.position="bottom",legend.key.width=unit(1,"cm"),text = element_text(size=20), axis.ticks= element_blank() )
		setwd(paste(results_folder))
		if (weighted) {
			if (vector)  {pdf(file="fig2.pdf", 
			    			  useDingbats=FALSE,
			    			  width=9, 
			    			  height=9, 
			    			  pointsize=12)}
			if (!vector) {png(filename="fig2.png", 
			        		  type="cairo",
			        		  units="in", 
			        		  width=9, 
			        		  height=9, 
			        		  pointsize=12, 
			        		  res=200)}
		}
		if (!weighted) {
			if (vector)  {pdf(file="fig2.pdf", 
			    			  useDingbats=FALSE,
			    			  width=9, 
			    			  height=9, 
			    			  pointsize=12)}
			if (!vector) {png(filename="fig2.png", 
			        		  type="cairo",
			        		  units="in", 
			        		  width=9, 
			        		  height=9, 
			        		  pointsize=12, 
			        		  res=200)}
		}
		grid.newpage()
		pushViewport(viewport(layout = grid.layout(1,1)))
		print(fig, vp = vplayout(1, 1))
		dev.off()
		rm(fig)



# ---------------------------------------------------------------------
# 3) Ethnic mix in German neighborhoods
# ---------------------------------------------------------------------

	### Bias assuming homogenous immigrant group
		eff[2,2]      # native proportion in immigrant only neighborhood 
		1-eff[1,2]    # native proportion in native    only neighborhood 
		correction1<-c((-eff[2,2]),(1/(1- eff[1,2]-eff[2,2])))

	### Bias assuming heterogenous immigrant group
		# Load Nuremberg data
		setwd(paste(data_folder))
		data_nrmbrg<-read.csv("nuremberg.csv",sep=";")
		nrmbrg     <-as.matrix(data_nrmbrg[,2:8])
		# Load Berlin data
		setwd(paste(data_folder))
		data_berlin<-read.csv("berlin.csv",sep=";")
		berlin     <-as.matrix(data_berlin[,5:11])
		# Combine Nuremberg and Berlin data 
		combined   <-rbind(nrmbrg,berlin)

	### Summary statistics
		out<-matrix(NA,11,4)
		for (i in 1:3) {
			if (i==1) {fuppes<-nrmbrg}
			if (i==2) {fuppes<-berlin}
			if (i==3) {fuppes<-combined}
			out[1,i]        <-nrow(fuppes)
			out[2,i]        <-mean(apply(fuppes,1,sum))
			out[3,i]        <-sd(apply(fuppes,1,sum))
			out[c(4,6:11),i]<-apply(fuppes,2,sum)/sum(fuppes)
			out[        5,i]<-1-out[4,i]

		}
		setwd(paste(results_folder))
		write.table(out,file="tab3.csv",sep=",",row.names=F)		
		rm(fuppes,i,out)

	### Data for graph
		data_c<-ethmix(combined)


	### Graph results
		plot_it<-function(data_plot,i,x_label=FALSE) {
			eth_vals<-levels(data_plot$eth)
			out<-
				ggplot(na.omit(data_plot[data_plot$eth==eth_vals[i],]), aes(x = p_nat, y = p)) +
			    geom_bar(stat='identity') +
			    ylab("rel freq.") +
			    xlab("") +
				coord_cartesian(ylim = c(0,max(data_plot$p)), xlim = c(0.5,5.5)) +
			    # scale_fill_manual(name = "ethnic group",values =colorRampPalette(c("black", "gray80"))(6))+
				scale_x_continuous(breaks=c(1:5),labels = levels(data_plot$p_nat_cut)) +
				theme_classic() +
			    ggtitle(paste(eth_vals[i])) +
			    theme(plot.title = element_text(hjust = 0.5),legend.position="right",legend.key.width=unit(1,"cm"),text = element_text(size=21),legend.key = element_blank() ) 
			if (x_label) {out <- out + xlab("majority share in neighborhood (actual, quintiles)") }
			return(out)
		}

		# Berlin & Nuremberg combined
		for (i in 1:6) {
			if (i< 6) {eval(parse(text=paste("fig_",i,"<- plot_it(data_c,",i,")",sep="")))}
			if (i==6) {eval(parse(text=paste("fig_",i,"<- plot_it(data_c,",i,",x_label=TRUE)",sep="")))}
		}
		setwd(paste(results_folder))
		if (vector)  {pdf(file="fig3.pdf", 
						  useDingbats=FALSE,
						  width=10, 
						  height=12, 
						  pointsize=12)}
		if (!vector) {png(filename="fig3.png", 
		    			  type="cairo",
		    			  units="in", 
		    			  width=10, 
		    			  height=12, 
		    			  pointsize=12, 
		    			  res=200)}
		grid.newpage()
		pushViewport(viewport(layout = grid.layout(6,1)))
		print(fig_1, vp = vplayout(1, 1))
		print(fig_2, vp = vplayout(2, 1))
		print(fig_3, vp = vplayout(3, 1))
		print(fig_4, vp = vplayout(4, 1))
		print(fig_5, vp = vplayout(5, 1))
		print(fig_6, vp = vplayout(6, 1))
		dev.off()
		rm(fig_1,fig_2,fig_3,fig_4,fig_5,fig_6,i)



# ---------------------------------------------------------------------
# 4) Bias induced to compositional measures
# ---------------------------------------------------------------------

		# Load additional IRB Data (for figure A1)
		setwd(paste(data_folder))
		data_irb<-read.csv("irb.csv",sep=";")
		irb     <-as.matrix(data_irb[,2:8])
		# Simulate data
		data_c<-namebased_sim(combined,phi_1stgen,phi_2ndgen)
		data_i<-namebased_sim(irb,phi_1stgen,phi_2ndgen)
		# Graph results
		data_c1<-as.data.frame(cbind(rep(data_c[,1],2),c(data_c[,2],data_c[,3])))
		data_i1<-as.data.frame(cbind(rep(data_i[,1],2),c(data_i[,2],data_i[,3])))
		names(data_c1)<-names(data_i1)<-c("actual","namebased")
		fig_c<-
	    ggplot(data_c1)+
	        xlab("majority share in neighborhood \n(actual)") + 
	        ylab("majority share in neighborhood \n(name-based)") +
	        coord_cartesian(xlim = c(0.2,1),ylim = c(0.2,1)) +
	        # geom_point( aes(x = actual, y=namebased),size=3,pch=1,colour="gray80") +
	        # geom_smooth(aes(x = actual, y=namebased),method="lm",formula =  y ~ poly(x, 1),size=2,lty=3,colour="gray0", fullrange=F,span=.4,se=F, alpha=.5) +
	        geom_segment(aes(x = 0.2, y = 0.2, xend = 1, yend = 1)  ,lty=3,size=1,colour="gray80") +
	        # geom_segment(aes(x = 0.2, y = 0.6, xend = 1, yend = 1)  ,lty=4,size=1,colour="gray80") +
	        # geom_segment(aes(x = 0.2, y = 0.1, xend = 1, yend = 0.5),lty=2,size=1,colour="gray80") +
	        geom_segment(aes(x = 0.2, y = eff[2,2]+0.2*(1-eff[1,2]-eff[2,2]), xend = .975, yend = eff[2,2]+0.975*(1-eff[1,2]-eff[2,2]),lty="1"),size=1.5,colour="gray0") +
	        geom_smooth(aes(x = actual, y=namebased,lty="2"),method="loess",size=1.5,colour="gray0", fullrange=F,span=.6,se=F, alpha=.5) +
	        scale_linetype_manual(name="",values=c(2,1),labels=c("2"="assuming \nheterogenous groups","1"="assuming \nhomogeneous groups"))+
	        theme_classic() +
	        theme(legend.position="bottom",legend.key.width=unit(3,"cm"),text = element_text(size=23),legend.key = element_blank() ) 
		fig_i<-
	    ggplot(data_i1)+
	        xlab("majority share in neighborhood \n(actual)") + 
	        ylab("majority share in neighborhood \n(name-based)") +
	        coord_cartesian(xlim = c(0.2,1),ylim = c(0.2,1)) +
	        # geom_point( aes(x = actual, y=namebased),size=3,pch=1,colour="gray80") +
	        # geom_smooth(aes(x = actual, y=namebased),method="lm",formula =  y ~ poly(x, 1),size=2,lty=3,colour="gray0", fullrange=F,span=.4,se=F, alpha=.5) +
	        geom_segment(aes(x = 0.2, y = 0.2, xend = 1, yend = 1)  ,lty=3,size=1,colour="gray80") +
	        # geom_segment(aes(x = 0.2, y = 0.6, xend = 1, yend = 1)  ,lty=4,size=1,colour="gray80") +
	        # geom_segment(aes(x = 0.2, y = 0.1, xend = 1, yend = 0.5),lty=2,size=1,colour="gray80") +
	        geom_segment(aes(x = 0.2, y = eff[2,2]+0.2*(1-eff[1,2]-eff[2,2]), xend = .975, yend = eff[2,2]+0.975*(1-eff[1,2]-eff[2,2]),lty="1"),size=1.5,colour="gray0") +
	        geom_smooth(aes(x = actual, y=namebased,lty="2"),method="loess",size=1.5,colour="gray0", fullrange=F,span=.6,se=F, alpha=.5) +
	        scale_linetype_manual(name="",values=c(2,1),labels=c("2"="assuming \nheterogenous groups","1"="assuming \nhomogeneous groups"))+
	        theme_classic() +
	        theme(legend.position="bottom",legend.key.width=unit(3,"cm"),text = element_text(size=23),legend.key = element_blank() ) 
		setwd(paste(results_folder))
		if ( weighted) {
			if (vector)  {pdf(file="fig4.pdf", 
			    			  useDingbats=FALSE,
			    			  width=9, 
			    			  height=9, 
			    			  pointsize=12)}
			if (!vector) {png(filename="fig4.png", 
							  type="cairo",
							  units="in", 
							  width=9, 
							  height=9, 
							  pointsize=12, 
							  res=200)}
		}
		if (!weighted) {
			if (vector)  {pdf(file="fig4.pdf", 
			    			  useDingbats=FALSE,
			    			  width=9, 
			    			  height=9, 
			    			  pointsize=12)}
			if (!vector) {png(filename="fig4.png", 
							  type="cairo",
							  units="in", 
							  width=9, 
							  height=9, 
							  pointsize=12, 
							  res=200)}
		}
		grid.newpage()
		pushViewport(viewport(layout = grid.layout(1,1)))
		print(fig_c, vp = vplayout(1, 1))
		dev.off()

		setwd(paste(results_folder))
		if ( weighted) {
			if (vector)  {pdf(file="figA1.pdf", 
			    			  useDingbats=FALSE,
			    			  width=9, 
			    			  height=9, 
			    			  pointsize=12)}
			if (!vector) {png(filename="figA1.png", 
							  type="cairo",
							  units="in", 
							  width=9, 
							  height=9, 
							  pointsize=12, 
							  res=200)}
		}
		if (!weighted) {
			if (vector)  {pdf(file="figA1.pdf", 
			    			  useDingbats=FALSE,
			    			  width=9, 
			    			  height=9, 
			    			  pointsize=12)}
			if (!vector) {png(filename="figA1.png", 
							  type="cairo",
							  units="in", 
							  width=9, 
							  height=9, 
							  pointsize=12, 
							  res=200)}
		}
		grid.newpage()
		pushViewport(viewport(layout = grid.layout(1,1)))
		print(fig_i, vp = vplayout(1, 1))
		dev.off()		
		rm(fig_c,fig_i)

