  	# Function generating a data frame that informs about ethnic mix of neighborhoods in a city
  	# -----------------------------------------------------------------------------------------
		ethmix<-function(city) {
			# neighborhood population
			pop  <-apply(city,1,sum,na.rm=T)
			# native share (categorized)
			p_nat<-(city[,1]/pop)
			# get rid of small neighborhoods
			city <-city[pop>=1000,]
			p_nat<-p_nat[pop>=1000]

			# group neighborhoods
			# p_nat<-ifelse(p_nat>.9,5,ifelse(p_nat>.8,4,ifelse(p_nat>.7,3,ifelse(p_nat>.6,2,1))))
			cuts_n<-c(0,quantile(p_nat,probs=seq(0,1,0.2))[2:6])
			cuts_n<-round(cuts_n,2)
			p_nat_cut<-cut(p_nat,cuts_n)			
			p_nat<-as.numeric(p_nat_cut)

			# from wide to long format
			city<-city[,2:7]
			city.m <- melt(city)
			city.m<- cbind(city.m,p_nat,p_nat_cut)

			# add measure for relative sizes of ethnic groups in the neighborhood categories 
			n_totals<-aggregate(city.m$value, list(city.m$p_nat), sum, na.rm=T)[,2]
			p<-city.m$value
			for (i in 1:5) {
				p[p_nat==i]<-p[p_nat==i]/n_totals[i]
			}
			city.m<-cbind(city.m,p)

			# bring data frame in order
			names(city.m)<-c("id","eth","n","p_nat","p_nat_cut","p")
			city.m$eth<- factor(city.m$eth, levels = c("Polish","FSU","Other.Western","Other.Non.Western","FYR","Turkish"))
			city.m<-city.m[order(city.m$eth),]
			city.m$eth <- factor(city.m$eth, levels=rev(levels(city.m$eth)))
			levels(city.m$eth)<-c("Turkish","FYR","Other Non-Western","Other Western","FSU","Polish")
			# aggregate to ethnic group level
			city.m<-aggregate(city.m[,c("p")],list(p_nat=city.m$p_nat,p_nat_cut=city.m$p_nat_cut,eth=city.m$eth),sum)
			names(city.m)[4]<-"p"
			city.m<-city.m[nrow(city.m):1,]
			
			return(city.m)
		}


  	# Function simulating the name-based classification in a city based on predefined error rates
  	# -------------------------------------------------------------------------------------------
		namebased_sim<-function(city,phi_1stgen,phi_2ndgen) {
			# neighborhood population
			pop  <-apply(city,1,sum,na.rm=T)
			nat_neighb_orig<-nat_neighb_HS_1st<-nat_neighb_HS_2nd<-c()
			for (i in 1:1) {
				# original neighborhood composition
				fuppes_orig<- (city[,1]/pop)
				fuppes_orig<- fuppes_orig[pop>=1000]
				nat_neighb_orig<-c(nat_neighb_orig, fuppes_orig)
				# HS neighborhood compositions assuming all 1st generation
					# classification probability 1st gen.
					HS_prob<-1-phi_1stgen
					# correctly and falsely classified
					HS_true <-t(t(city[,])*HS_prob    )
					HS_false<-t(t(city[,])*(1-HS_prob))
					# composition measure
					fuppes_HS <- 1-((apply(HS_true[,2:7],1,sum,na.rm=T)+HS_false[,1])/pop)
					fuppes_HS <- fuppes_HS[pop>=1000]
					nat_neighb_HS_1st  <-c(nat_neighb_HS_1st  , fuppes_HS  )
				# neighborhood compositions assuming all 2nd generation
					# classification probability 2nd gen.
					HS_prob<-1-phi_2ndgen
					# correctly and falsely classified
					HS_true <-t(t(city[,])*HS_prob    )
					HS_false<-t(t(city[,])*(1-HS_prob))
					# composition measure
					fuppes_HS <- 1-((apply(HS_true[,2:7],1,sum,na.rm=T)+HS_false[,1])/pop)
					fuppes_HS <- fuppes_HS[pop>=1000]
					nat_neighb_HS_2nd  <-c(nat_neighb_HS_2nd  , fuppes_HS  )
			}
			data<-as.data.frame(cbind(nat_neighb_orig,nat_neighb_HS_1st,nat_neighb_HS_2nd))
			rm(HS_true,HS_false,pop,nat_neighb_orig,nat_neighb_HS_1st,nat_neighb_HS_2nd,city,i,fuppes_HS,fuppes_orig)
			return(data)
		}
