
cost.svm<-100


library(e1071)

mydata<-read.csv("joe_dutch_merged.csv")

data<-mydata[,c(-1,-2,-3)]

y<-mydata$category
data<-data.frame(data,y)


n<-dim(data)[1]
n.known<-1000

set.seed(65.8)
sample.known<-sample(c(1:n),size=n.known,replace=FALSE)
data.known<-data[sample.known,]
data.unknown<-data[-sample.known,]


#------------------------------------
#General case



#multi-class fliping for generating simulated codes
#Instead of a single error rate, users need to specify the 
#confusion matrix
#Matrix should be a L*L matrix
flip.matrix<-function(y,Matrix,L) {
  temp<-runif(length(y))
  #temp<-matrix(rep(temp,times=L),byrow=FALSE,ncol=L)
  cumsum.Matrix<-t(apply(Matrix,1,cumsum))
  
  
  flip.to<-rep(0,length=length(temp))
  for (k in c(1:length(temp))) {
    flip.prob<-cumsum.Matrix[y[k],]
    flip.target<-(temp[k]<=flip.prob)
    flip.target<-(c(1:L)[flip.target])[1]
    flip.to[k]<-flip.target
  }
  
  return(flip.to)
}




#multi-class log loss of the prediction
#The prob.pred should be a L-column matrix
logloss<-function(prob.pred,true,L) {
  temp<-matrix(0,nrow=dim(prob.pred)[1],ncol=L)
  for (i in c(1:L)) {
    index.temp<-(colnames(prob.pred)==i)
    if (sum(index.temp)!=0) {
      temp[,i]<-(true==i)*log(prob.pred[,index.temp])
    } else {temp[,i]<-(true==i)*log(0.000001)}
  }
  logloss<-apply(temp,1,sum)
  return(sum((-1)*logloss))
}


#let's get predictions from single-coded data and double-coded data with different options
#assume a fixed number of instances
SCvsDC.matrix.number<-function(times,data.known,data.unknown,margin.class,error.matrix,t,cost.svm,L) {
  #t is the salary of an expert / the salary of an annotator
  accuracy.sc<-rep(0,time=times)
  logloss.sc<-rep(0,time=times)
  #f.sc<-rep(0,time=times)
  
  accuracy.dc1<-rep(0,time=times)
  logloss.dc1<-rep(0,time=times)
  #f.dc1<-rep(0,time=times)
  
  accuracy.dc2<-rep(0,time=times)
  logloss.dc2<-rep(0,time=times)
  #f.dc2<-rep(0,time=times)
  
  accuracy.dc3<-rep(0,time=times)
  logloss.dc3<-rep(0,time=times)
  #f.dc3<-rep(0,time=times)
  
  accuracy.dc4<-rep(0,time=times)
  logloss.dc4<-rep(0,time=times)
  #f.dc4<-rep(0,time=times)
  
  n.sc<-dim(data.known)[1]
  n.dc1<-n.sc
  n.dc2<-n.sc
  n.dc3<-n.sc
  n.dc4<-n.sc
  
  
  for (i in c(1:times)) {
    print(i)
    
    #single-coding
    data.known.sc<-data.known
    data.known.sc$y<-flip.matrix(data.known.sc$y,error.matrix,L)
    data.known.sc$y<-as.factor(data.known.sc$y)
    
    #option 1: treat double-coded instances as replicates or duplicates
    data.known.dc1.1<-data.known
    data.known.dc1.1$y<-flip.matrix(data.known.dc1.1$y,error.matrix,L)
    data.known.dc1.2<-data.known
    data.known.dc1.2$y<-flip.matrix(data.known.dc1.2$y,error.matrix,L)
    
    data.known.dc1<-rbind(data.known.dc1.1,data.known.dc1.2)
    data.known.dc1$y<-as.factor(data.known.dc1$y)
    
    #option 2: removing instances with different double codes
    data.known.dc2.1<-data.known
    data.known.dc2.1$y<-flip.matrix(data.known.dc2.1$y,error.matrix,L)
    data.known.dc2.2<-data.known
    data.known.dc2.2$y<-flip.matrix(data.known.dc2.2$y,error.matrix,L)
    
    index.diff.dc2<-(data.known.dc2.1$y!=data.known.dc2.2$y)
    data.known.dc2<-data.known.dc2.1[!index.diff.dc2,]
    data.known.dc2$y<-as.factor(data.known.dc2$y)
    
    #option 3: have a 3rd annotator to code instances with different double codes
    data.known.dc3.1<-data.known
    data.known.dc3.1$y<-flip.matrix(data.known.dc3.1$y,error.matrix,L)
    data.known.dc3.2<-data.known
    data.known.dc3.2$y<-flip.matrix(data.known.dc3.2$y,error.matrix,L)
    data.known.dc3.true<-data.known
      
    index.diff.dc3<-(data.known.dc3.1$y!=data.known.dc3.2$y)
    data.known.dc3.same<-data.known.dc3.1[!index.diff.dc3,]
    data.known.dc3.diff.1<-data.known.dc3.1[index.diff.dc3,]
    data.known.dc3.diff.2<-data.known.dc3.2[index.diff.dc3,]
    data.known.dc3.diff.true<-data.known.dc3.true[index.diff.dc3,]
    
    #we assume that the 3rd coder only choose one from the first two codes
    data.known.dc3.diff.3<-(data.known.dc3.true)[index.diff.dc3,]
    if (sum(index.diff.dc3)!=0) {
      
      for (j in c(1:sum(index.diff.dc3))) {
        random.temp<-runif(1)
        prob1<-error.matrix[data.known.dc3.diff.true$y[j],data.known.dc3.diff.1$y[j]]
        prob2<-error.matrix[data.known.dc3.diff.true$y[j],data.known.dc3.diff.2$y[j]]
        index.temp<-(random.temp>(prob1/(prob1+prob2)))*1+1
        data.known.dc3.diff.3$y[j]<-c(data.known.dc3.diff.1$y[j],data.known.dc3.diff.2$y[j])[index.temp]
      }
    }
    data.known.dc3.diff<-data.known.dc3.diff.3
    
    data.known.dc3<-rbind(data.known.dc3.same,data.known.dc3.diff)
    data.known.dc3$y<-as.factor(data.known.dc3$y)
    
    #option 4: have an expert to code instances with differend double codes
    data.known.dc4.1<-data.known
    data.known.dc4.1$y<-flip.matrix(data.known.dc4.1$y,error.matrix,L)
    data.known.dc4.2<-data.known
    data.known.dc4.2$y<-flip.matrix(data.known.dc4.2$y,error.matrix,L)
    
    index.diff.dc4<-(data.known.dc4.1$y!=data.known.dc4.2$y)
    data.known.dc4.same<-data.known.dc4.1[!index.diff.dc4,]
    
    data.known.dc4.diff<-(data.known)[index.diff.dc4,]
    
    data.known.dc4<-rbind(data.known.dc4.same,data.known.dc4.diff)
    data.known.dc4$y<-as.factor(data.known.dc4$y)
    
    
    #fit a model
    
    svm.sc<-svm(y~.,data=data.known.sc,scale=FALSE,kernal="linear",probability=TRUE,cost=cost.svm)
    predict.unknown.sc<-predict(svm.sc,newdata=data.unknown,probability=TRUE)
    predict.unknown.sc<-attr(predict.unknown.sc, "probabilities")
    response.unknown.sc<-predict(svm.sc,newdata=data.unknown)
    
    svm.dc1<-svm(y~.,data=data.known.dc1,scale=FALSE,kernal="linear",probability=TRUE,cost=cost.svm)
    predict.unknown.dc1<-predict(svm.dc1,newdata=data.unknown,probability=TRUE)
    predict.unknown.dc1<-attr(predict.unknown.dc1, "probabilities")
    response.unknown.dc1<-predict(svm.dc1,newdata=data.unknown)
    
    svm.dc2<-svm(y~.,data=data.known.dc2,scale=FALSE,kernal="linear",probability=TRUE,cost=cost.svm)
    predict.unknown.dc2<-predict(svm.dc2,newdata=data.unknown,probability=TRUE)
    predict.unknown.dc2<-attr(predict.unknown.dc2, "probabilities")
    response.unknown.dc2<-predict(svm.dc2,newdata=data.unknown)
    
    svm.dc3<-svm(y~.,data=data.known.dc3,scale=FALSE,kernal="linear",probability=TRUE,cost=cost.svm)
    predict.unknown.dc3<-predict(svm.dc3,newdata=data.unknown,probability=TRUE)
    predict.unknown.dc3<-attr(predict.unknown.dc3, "probabilities")
    response.unknown.dc3<-predict(svm.dc3,newdata=data.unknown)
    
    svm.dc4<-svm(y~.,data=data.known.dc4,scale=FALSE,kernal="linear",probability=TRUE,cost=cost.svm)
    predict.unknown.dc4<-predict(svm.dc4,newdata=data.unknown,probability=TRUE)
    predict.unknown.dc4<-attr(predict.unknown.dc4, "probabilities")
    response.unknown.dc4<-predict(svm.dc4,newdata=data.unknown)
    
    #record prediction performance
    accuracy.sc[i]<-mean(response.unknown.sc==data.unknown$y)
    logloss.sc[i]<-logloss(predict.unknown.sc,data.unknown$y,L)/(n-n.known)
    #f.sc[i]<-Fmeasure(response.unknown.sc,data.unknown$y)
    
    accuracy.dc1[i]<-mean(response.unknown.dc1==data.unknown$y)
    logloss.dc1[i]<-logloss(predict.unknown.dc1,data.unknown$y,L)/(n-n.known)
    #f.dc1[i]<-Fmeasure(response.unknown.dc1,data.unknown$y)
    
    accuracy.dc2[i]<-mean(response.unknown.dc2==data.unknown$y)
    logloss.dc2[i]<-logloss(predict.unknown.dc2,data.unknown$y,L)/(n-n.known)
    #f.dc2[i]<-Fmeasure(response.unknown.dc2,data.unknown$y)
    
    accuracy.dc3[i]<-mean(response.unknown.dc3==data.unknown$y)
    logloss.dc3[i]<-logloss(predict.unknown.dc3,data.unknown$y,L)/(n-n.known)
    #f.dc3[i]<-Fmeasure(response.unknown.dc3,data.unknown$y)
    
    accuracy.dc4[i]<-mean(response.unknown.dc4==data.unknown$y)
    logloss.dc4[i]<-logloss(predict.unknown.dc4,data.unknown$y,L)/(n-n.known)
    #f.dc4[i]<-Fmeasure(response.unknown.dc4,data.unknown$y)
  }
  #return(data.frame(accuracy.sc,logloss.sc,f.sc,accuracy.dc1,logloss.dc1,f.dc1,accuracy.dc2,logloss.dc2,f.dc2,accuracy.dc3,logloss.dc3,f.dc3,accuracy.dc4,logloss.dc4,f.dc4))      
  return(data.frame(accuracy.sc,logloss.sc,accuracy.dc1,logloss.dc1,accuracy.dc2,logloss.dc2,accuracy.dc3,logloss.dc3,accuracy.dc4,logloss.dc4))      
  
}





#assume a fixed expected budget
SCvsDC.matrix.budget<-function(times,data.known,data.unknown,margin.class,error.matrix,t,cost.svm,L) {
  #t is the salary of an expert / the salary of an annotator
  accuracy.sc<-rep(0,time=times)
  logloss.sc<-rep(0,time=times)
  #f.sc<-rep(0,time=times)
  
  accuracy.dc1<-rep(0,time=times)
  logloss.dc1<-rep(0,time=times)
  #f.dc1<-rep(0,time=times)
  
  accuracy.dc2<-rep(0,time=times)
  logloss.dc2<-rep(0,time=times)
  #f.dc2<-rep(0,time=times)
  
  accuracy.dc3<-rep(0,time=times)
  logloss.dc3<-rep(0,time=times)
  #f.dc3<-rep(0,time=times)
  
  accuracy.dc4<-rep(0,time=times)
  logloss.dc4<-rep(0,time=times)
  #f.dc4<-rep(0,time=times)
  
  n.sc<-dim(data.known)[1]
  n.dc1<-round(n.sc/2)
  n.dc2<-round(n.sc/2)
  
  
  error.matrix.sumsq<-apply(error.matrix^2,1,sum)
  n.dc3<-n.sc/(3-sum(margin.class*error.matrix.sumsq))
  n.dc3<-round(n.dc3)
  n.dc4<-n.sc/(2+t-t*sum(margin.class*error.matrix.sumsq))
  n.dc4<-round(n.dc4)
  
  
  for (i in c(1:times)) {
    print(i)
    
    #single-coding
    data.known.sc<-data.known
    data.known.sc$y<-flip.matrix(data.known.sc$y,error.matrix,L)
    data.known.sc$y<-as.factor(data.known.sc$y)
    
    #option 1: treat double-coded instances as replicates or duplicates
    sample.dc1<-sample(c(1:n.sc),n.dc1,replace=FALSE)
    
    data.known.dc1.1<-data.known[sample.dc1,]
    data.known.dc1.1$y<-flip.matrix(data.known.dc1.1$y,error.matrix,L)
    data.known.dc1.2<-data.known[sample.dc1,]
    data.known.dc1.2$y<-flip.matrix(data.known.dc1.2$y,error.matrix,L)
    
    
    data.known.dc1<-rbind(data.known.dc1.1,data.known.dc1.2)
    data.known.dc1$y<-as.factor(data.known.dc1$y)
    
    #option 2: removing instances with different double codes
    sample.dc2<-sample(c(1:n.sc),n.dc2,replace=FALSE)
    
    data.known.dc2.1<-data.known[sample.dc2,]
    data.known.dc2.1$y<-flip.matrix(data.known.dc2.1$y,error.matrix,L)
    data.known.dc2.2<-data.known[sample.dc2,]
    data.known.dc2.2$y<-flip.matrix(data.known.dc2.2$y,error.matrix,L)
    
    index.diff.dc2<-(data.known.dc2.1$y!=data.known.dc2.2$y)
    data.known.dc2<-data.known.dc2.1[!index.diff.dc2,]
    data.known.dc2$y<-as.factor(data.known.dc2$y)
    
    #option 3: have a 3rd annotator to code instances with different double codes
    sample.dc3<-sample(c(1:n.sc),n.dc3,replace=FALSE)
    
    data.known.dc3.1<-data.known[sample.dc3,]
    data.known.dc3.1$y<-flip.matrix(data.known.dc3.1$y,error.matrix,L)
    data.known.dc3.2<-data.known[sample.dc3,]
    data.known.dc3.2$y<-flip.matrix(data.known.dc3.2$y,error.matrix,L)
    data.known.dc3.true<-data.known[sample.dc3,]
    
    index.diff.dc3<-(data.known.dc3.1$y!=data.known.dc3.2$y)
    data.known.dc3.same<-data.known.dc3.1[!index.diff.dc3,]
    data.known.dc3.diff.1<-data.known.dc3.1[index.diff.dc3,]
    data.known.dc3.diff.2<-data.known.dc3.2[index.diff.dc3,]
    data.known.dc3.diff.true<-data.known.dc3.2[index.diff.dc3,]
    
    #we assume that the 3rd coder only choose one from the first two codes
    data.known.dc3.diff.3<-(data.known)[sample.dc3,][index.diff.dc3,]
    if (sum(index.diff.dc3)!=0) {
      
      for (j in c(1:sum(index.diff.dc3))) {
        random.temp<-runif(1)
        prob1<-error.matrix[data.known.dc3.diff.true$y[j],data.known.dc3.diff.1$y[j]]
        prob2<-error.matrix[data.known.dc3.diff.true$y[j],data.known.dc3.diff.2$y[j]]
        index.temp<-(random.temp>(prob1/(prob1+prob2)))*1+1
        data.known.dc3.diff.3$y[j]<-c(data.known.dc3.diff.1$y[j],data.known.dc3.diff.2$y[j])[index.temp]
      }
    }
    data.known.dc3.diff<-data.known.dc3.diff.3
    
    
    data.known.dc3<-rbind(data.known.dc3.same,data.known.dc3.diff)
    data.known.dc3$y<-as.factor(data.known.dc3$y)
    
    
    #option 4: have an expert to code instances with differend double codes
    sample.dc4<-sample(c(1:n.sc),n.dc4,replace=FALSE)
    
    data.known.dc4.1<-data.known[sample.dc4,]
    data.known.dc4.1$y<-flip.matrix(data.known.dc4.1$y,error.matrix,L)
    data.known.dc4.2<-data.known[sample.dc4,]
    data.known.dc4.2$y<-flip.matrix(data.known.dc4.2$y,error.matrix,L)
    
    index.diff.dc4<-(data.known.dc4.1$y!=data.known.dc4.2$y)
    data.known.dc4.same<-data.known.dc4.1[!index.diff.dc4,]
    
    data.known.dc4.diff<-(data.known[sample.dc4,])[index.diff.dc4,]
    
    data.known.dc4<-rbind(data.known.dc4.same,data.known.dc4.diff)
    data.known.dc4$y<-as.factor(data.known.dc4$y)
    
    
    #fit a model
    
    svm.sc<-svm(y~.,data=data.known.sc,scale=FALSE,kernal="linear",probability=TRUE,cost=cost.svm)
    predict.unknown.sc<-predict(svm.sc,newdata=data.unknown,probability=TRUE)
    predict.unknown.sc<-attr(predict.unknown.sc, "probabilities")
    response.unknown.sc<-predict(svm.sc,newdata=data.unknown)
    
    svm.dc1<-svm(y~.,data=data.known.dc1,scale=FALSE,kernal="linear",probability=TRUE,cost=cost.svm)
    predict.unknown.dc1<-predict(svm.dc1,newdata=data.unknown,probability=TRUE)
    predict.unknown.dc1<-attr(predict.unknown.dc1, "probabilities")
    response.unknown.dc1<-predict(svm.dc1,newdata=data.unknown)
    
    svm.dc2<-svm(y~.,data=data.known.dc2,scale=FALSE,kernal="linear",probability=TRUE,cost=cost.svm)
    predict.unknown.dc2<-predict(svm.dc2,newdata=data.unknown,probability=TRUE)
    predict.unknown.dc2<-attr(predict.unknown.dc2, "probabilities")
    response.unknown.dc2<-predict(svm.dc2,newdata=data.unknown)
    
    svm.dc3<-svm(y~.,data=data.known.dc3,scale=FALSE,kernal="linear",probability=TRUE,cost=cost.svm)
    predict.unknown.dc3<-predict(svm.dc3,newdata=data.unknown,probability=TRUE)
    predict.unknown.dc3<-attr(predict.unknown.dc3, "probabilities")
    response.unknown.dc3<-predict(svm.dc3,newdata=data.unknown)
    
    svm.dc4<-svm(y~.,data=data.known.dc4,scale=FALSE,kernal="linear",probability=TRUE,cost=cost.svm)
    predict.unknown.dc4<-predict(svm.dc4,newdata=data.unknown,probability=TRUE)
    predict.unknown.dc4<-attr(predict.unknown.dc4, "probabilities")
    response.unknown.dc4<-predict(svm.dc4,newdata=data.unknown)
    
    #record prediction performance
    accuracy.sc[i]<-mean(response.unknown.sc==data.unknown$y)
    logloss.sc[i]<-logloss(predict.unknown.sc,data.unknown$y,L)/(n-n.known)
    #f.sc[i]<-Fmeasure(response.unknown.sc,data.unknown$y)
    
    accuracy.dc1[i]<-mean(response.unknown.dc1==data.unknown$y)
    logloss.dc1[i]<-logloss(predict.unknown.dc1,data.unknown$y,L)/(n-n.known)
    #f.dc1[i]<-Fmeasure(response.unknown.dc1,data.unknown$y)
    
    accuracy.dc2[i]<-mean(response.unknown.dc2==data.unknown$y)
    logloss.dc2[i]<-logloss(predict.unknown.dc2,data.unknown$y,L)/(n-n.known)
    #f.dc2[i]<-Fmeasure(response.unknown.dc2,data.unknown$y)
    
    accuracy.dc3[i]<-mean(response.unknown.dc3==data.unknown$y)
    logloss.dc3[i]<-logloss(predict.unknown.dc3,data.unknown$y,L)/(n-n.known)
    #f.dc3[i]<-Fmeasure(response.unknown.dc3,data.unknown$y)
    
    accuracy.dc4[i]<-mean(response.unknown.dc4==data.unknown$y)
    logloss.dc4[i]<-logloss(predict.unknown.dc4,data.unknown$y,L)/(n-n.known)
    #f.dc4[i]<-Fmeasure(response.unknown.dc4,data.unknown$y)
  }
  #return(data.frame(accuracy.sc,logloss.sc,f.sc,accuracy.dc1,logloss.dc1,f.dc1,accuracy.dc2,logloss.dc2,f.dc2,accuracy.dc3,logloss.dc3,f.dc3,accuracy.dc4,logloss.dc4,f.dc4))      
  return(data.frame(accuracy.sc,logloss.sc,accuracy.dc1,logloss.dc1,accuracy.dc2,logloss.dc2,accuracy.dc3,logloss.dc3,accuracy.dc4,logloss.dc4))      
  
}




#---------------------
#General Case 1
#--------------------

Matrix.Generator<-function(p,L) {
  error.matrix<-matrix(0,nrow=L,ncol=L)
  diag(error.matrix)<-(1-p)
  for (i in c(1:L)) {
    for (j in c(1:L)) {
      if ((abs(i-j)==1)&((i==1)|(i==L))) {error.matrix[i,j]<-p
        
      } else if ((abs(i-j)==1)&(i!=1)&(i!=L)) {error.matrix[i,j]<-p/2}
    }
  }
  return(error.matrix)
}

error.rate<-seq(from=0,to=0.5,by=0.025)
margin.class<-c(0.1,0.3,0.1,0.5)
times<-100
L<-4
t<-10


#---------------------
#Fixed number of observations

set.seed(81.8)

n.error.rate<-length(error.rate)
avg.accuracy.SC<-rep(0,time=n.error.rate)
avg.logloss.SC<-rep(0,time=n.error.rate)

avg.accuracy.DC1<-rep(0,time=n.error.rate)
avg.logloss.DC1<-rep(0,time=n.error.rate)

avg.accuracy.DC2<-rep(0,time=n.error.rate)
avg.logloss.DC2<-rep(0,time=n.error.rate)

avg.accuracy.DC3<-rep(0,time=n.error.rate)
avg.logloss.DC3<-rep(0,time=n.error.rate)

avg.accuracy.DC4<-rep(0,time=n.error.rate)
avg.logloss.DC4<-rep(0,time=n.error.rate)

for (k in c(1:n.error.rate)) {
  print(k)
  
  error.matrix<-Matrix.Generator(error.rate[k],L)
  
  test<-SCvsDC.matrix.number(times,data.known,data.unknown,margin.class,error.matrix,t,cost.svm,L)
  
  avg.accuracy.SC[k]<-mean(test$accuracy.sc)
  avg.logloss.SC[k]<-mean(test$logloss.sc)
  
  avg.accuracy.DC1[k]<-mean(test$accuracy.dc1)
  avg.logloss.DC1[k]<-mean(test$logloss.dc1)
  
  avg.accuracy.DC2[k]<-mean(test$accuracy.dc2)
  avg.logloss.DC2[k]<-mean(test$logloss.dc2)

  avg.accuracy.DC3[k]<-mean(test$accuracy.dc3)
  avg.logloss.DC3[k]<-mean(test$logloss.dc3)

  avg.accuracy.DC4[k]<-mean(test$accuracy.dc4)
  avg.logloss.DC4[k]<-mean(test$logloss.dc4)

  
}

comparison.number<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)





#-------------------

#Fixed budget

set.seed(81.8)

n.error.rate<-length(error.rate)
avg.accuracy.SC<-rep(0,time=n.error.rate)
avg.logloss.SC<-rep(0,time=n.error.rate)

avg.accuracy.DC1<-rep(0,time=n.error.rate)
avg.logloss.DC1<-rep(0,time=n.error.rate)

avg.accuracy.DC2<-rep(0,time=n.error.rate)
avg.logloss.DC2<-rep(0,time=n.error.rate)

avg.accuracy.DC3<-rep(0,time=n.error.rate)
avg.logloss.DC3<-rep(0,time=n.error.rate)

avg.accuracy.DC4<-rep(0,time=n.error.rate)
avg.logloss.DC4<-rep(0,time=n.error.rate)

for (k in c(1:n.error.rate)) {
  print(k)
  
  error.matrix<-Matrix.Generator(error.rate[k],L)
  
  test<-SCvsDC.matrix.budget(times,data.known,data.unknown,margin.class,error.matrix,t,cost.svm,L)
  
  avg.accuracy.SC[k]<-mean(test$accuracy.sc)
  avg.logloss.SC[k]<-mean(test$logloss.sc)
  
  avg.accuracy.DC1[k]<-mean(test$accuracy.dc1)
  avg.logloss.DC1[k]<-mean(test$logloss.dc1)
  
  avg.accuracy.DC2[k]<-mean(test$accuracy.dc2)
  avg.logloss.DC2[k]<-mean(test$logloss.dc2)
  
  avg.accuracy.DC3[k]<-mean(test$accuracy.dc3)
  avg.logloss.DC3[k]<-mean(test$logloss.dc3)
  
  avg.accuracy.DC4[k]<-mean(test$accuracy.dc4)
  avg.logloss.DC4[k]<-mean(test$logloss.dc4)
  
  
}

comparison.budget<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)


par(mfrow=c(1,2))

plot(ylim=c(0.4,0.76),error.rate,comparison.number$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.55,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))

plot(ylim=c(0.6,1.3),error.rate,comparison.number$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.3,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))




#plot - fixed expected budget


par(mfrow=c(1,2))

plot(ylim=c(0.35,0.76),error.rate,comparison.budget$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.5,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))


plot(ylim=c(0.6,1.4),error.rate,comparison.budget$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.4,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))


#--------------------------
#General Case 2
#--------------------------



Matrix.Generator<-function(p,g1,g2,L) {
  error.matrix<-matrix(0,nrow=L,ncol=L)
  #diag(error.matrix)<-(1-p)
  for (i in c(1:L)) {
    for (j in c(i:L)) {
      if (j==i&j==L) {error.matrix[i,j]<-1
        } else if (j==i&j!=L) {error.matrix[i,j]<-(1-p)
        } else if (j!=L&j==(i+1)) {error.matrix[i,j]<-p*(1-g1)
        } else if (j!=L&j==(i+2)) {error.matrix[i,j]<-p*g1*(1-g2)
        } else if (j==L&j==(i+3)) {error.matrix[i,j]<-p*g1*g2
        } else if (j==L&j==(i+2)) {error.matrix[i,j]<-p*g1
      } else if (j==L&j==(i+1)) {error.matrix[i,j]<-p}
    }
  }
  return(error.matrix)
}


error.rate<-seq(from=0,to=0.5,by=0.025)
margin.class<-c(0.1,0.3,0.1,0.5)
times<-100
L<-4
t<-10

#case 2.1
g1<-0.2
g2<-0.2

#case 2.2
g1<-0.2
g2<-0.5

#case 2.3
g1<-0.5
g2<-0.2

#case 2.4
g1<-0.5
g2<-0.5
#---------------------
#Fixed number of observations

set.seed(81.8)

n.error.rate<-length(error.rate)
avg.accuracy.SC<-rep(0,time=n.error.rate)
avg.logloss.SC<-rep(0,time=n.error.rate)

avg.accuracy.DC1<-rep(0,time=n.error.rate)
avg.logloss.DC1<-rep(0,time=n.error.rate)

avg.accuracy.DC2<-rep(0,time=n.error.rate)
avg.logloss.DC2<-rep(0,time=n.error.rate)

avg.accuracy.DC3<-rep(0,time=n.error.rate)
avg.logloss.DC3<-rep(0,time=n.error.rate)

avg.accuracy.DC4<-rep(0,time=n.error.rate)
avg.logloss.DC4<-rep(0,time=n.error.rate)

for (k in c(1:n.error.rate)) {
  print(k)
  
  error.matrix<-Matrix.Generator(error.rate[k],g1,g2,L)
  
  test<-SCvsDC.matrix.number(times,data.known,data.unknown,margin.class,error.matrix,t,cost.svm,L)
  
  avg.accuracy.SC[k]<-mean(test$accuracy.sc)
  avg.logloss.SC[k]<-mean(test$logloss.sc)
  
  avg.accuracy.DC1[k]<-mean(test$accuracy.dc1)
  avg.logloss.DC1[k]<-mean(test$logloss.dc1)
  
  avg.accuracy.DC2[k]<-mean(test$accuracy.dc2)
  avg.logloss.DC2[k]<-mean(test$logloss.dc2)
  
  avg.accuracy.DC3[k]<-mean(test$accuracy.dc3)
  avg.logloss.DC3[k]<-mean(test$logloss.dc3)
  
  avg.accuracy.DC4[k]<-mean(test$accuracy.dc4)
  avg.logloss.DC4[k]<-mean(test$logloss.dc4)
  
  
}

comparison.number.1<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)

comparison.number.2<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)

comparison.number.3<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)

comparison.number.4<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)


#-------------------

#Fixed budget

#case 2.1
g1<-0.2
g2<-0.2

#case 2.2
g1<-0.2
g2<-0.5

#case 2.3
g1<-0.5
g2<-0.2

#case 2.4
g1<-0.5
g2<-0.5


set.seed(81.8)

n.error.rate<-length(error.rate)
avg.accuracy.SC<-rep(0,time=n.error.rate)
avg.logloss.SC<-rep(0,time=n.error.rate)

avg.accuracy.DC1<-rep(0,time=n.error.rate)
avg.logloss.DC1<-rep(0,time=n.error.rate)

avg.accuracy.DC2<-rep(0,time=n.error.rate)
avg.logloss.DC2<-rep(0,time=n.error.rate)

avg.accuracy.DC3<-rep(0,time=n.error.rate)
avg.logloss.DC3<-rep(0,time=n.error.rate)

avg.accuracy.DC4<-rep(0,time=n.error.rate)
avg.logloss.DC4<-rep(0,time=n.error.rate)

for (k in c(1:n.error.rate)) {
  print(k)
  
  error.matrix<-Matrix.Generator(error.rate[k],g1,g2,L)
  
  test<-SCvsDC.matrix.budget(times,data.known,data.unknown,margin.class,error.matrix,t,cost.svm,L)
  
  avg.accuracy.SC[k]<-mean(test$accuracy.sc)
  avg.logloss.SC[k]<-mean(test$logloss.sc)
  
  avg.accuracy.DC1[k]<-mean(test$accuracy.dc1)
  avg.logloss.DC1[k]<-mean(test$logloss.dc1)
  
  avg.accuracy.DC2[k]<-mean(test$accuracy.dc2)
  avg.logloss.DC2[k]<-mean(test$logloss.dc2)
  
  avg.accuracy.DC3[k]<-mean(test$accuracy.dc3)
  avg.logloss.DC3[k]<-mean(test$logloss.dc3)
  
  avg.accuracy.DC4[k]<-mean(test$accuracy.dc4)
  avg.logloss.DC4[k]<-mean(test$logloss.dc4)
  
  
}

comparison.budget.1<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)

comparison.budget.2<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)

comparison.budget.3<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)

comparison.budget.4<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)



#-----------------
#plot

#case 2.1

par(mfrow=c(1,2))

plot(ylim=c(0.55,0.76),error.rate,comparison.number.1$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number.1$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number.1$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number.1$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number.1$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.65,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))

plot(ylim=c(0.6,1.1),error.rate,comparison.number.1$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number.1$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number.1$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number.1$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number.1$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.1,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))



#plot - fixed expected budget


par(mfrow=c(1,2))

plot(ylim=c(0.48,0.76),error.rate,comparison.budget.1$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget.1$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget.1$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget.1$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget.1$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.6,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))


plot(ylim=c(0.6,1.25),error.rate,comparison.budget.1$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget.1$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget.1$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget.1$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget.1$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.25,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))


#case 2.2


par(mfrow=c(1,2))

plot(ylim=c(0.57,0.77),error.rate,comparison.number.2$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number.2$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number.2$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number.2$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number.2$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.65,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))

plot(ylim=c(0.6,1.1),error.rate,comparison.number.2$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number.2$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number.2$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number.2$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number.2$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.1,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))



#plot - fixed expected budget


par(mfrow=c(1,2))

plot(ylim=c(0.5,0.76),error.rate,comparison.budget.2$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget.2$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget.2$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget.2$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget.2$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.6,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))


plot(ylim=c(0.6,1.25),error.rate,comparison.budget.2$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget.2$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget.2$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget.2$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget.2$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.25,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))





#case 2.3


par(mfrow=c(1,2))

plot(ylim=c(0.55,0.77),error.rate,comparison.number.3$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number.3$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number.3$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number.3$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number.3$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.64,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))

plot(ylim=c(0.6,1.2),error.rate,comparison.number.3$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number.3$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number.3$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number.3$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number.3$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.2,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))



#plot - fixed expected budget


par(mfrow=c(1,2))

plot(ylim=c(0.5,0.76),error.rate,comparison.budget.3$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget.3$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget.3$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget.3$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget.3$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.6,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))


plot(ylim=c(0.6,1.3),error.rate,comparison.budget.3$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget.3$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget.3$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget.3$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget.3$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.3,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))



#case 2.4


par(mfrow=c(1,2))

plot(ylim=c(0.55,0.76),error.rate,comparison.number.4$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number.4$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number.4$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number.4$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number.4$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.64,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))

plot(ylim=c(0.6,1.2),error.rate,comparison.number.4$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number.4$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number.4$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number.4$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number.4$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.2,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))



#plot - fixed expected budget


par(mfrow=c(1,2))

plot(ylim=c(0.5,0.76),error.rate,comparison.budget.4$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget.4$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget.4$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget.4$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget.4$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.6,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))


plot(ylim=c(0.6,1.3),error.rate,comparison.budget.4$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget.4$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget.4$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget.4$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget.4$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.3,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))


#--------------------------
#General Case 3
#--------------------------



Matrix.Generator<-function(p,g,L) {
  error.matrix<-matrix(0,nrow=L,ncol=L)
  #diag(error.matrix)<-(1-p)
  error.matrix[1,1]<-(1-p)
  error.matrix[2,2]<-(1-p)
  error.matrix[1,2]<-p
  error.matrix[2,1]<-p
  
  error.matrix[3,3]<-(1-g)
  error.matrix[4,4]<-(1-g)
  error.matrix[3,4]<-g
  error.matrix[4,3]<-g
  
  return(error.matrix)
}



error.rate<-seq(from=0,to=0.5,by=0.025)
margin.class<-c(0.1,0.3,0.1,0.5)
times<-100
L<-4
t<-10

#case 3.1
g1<-0.1

#case 3.2
g1<-0.3

#case 3.3
g1<-0.5

#---------------------
#Fixed number of observations

set.seed(81.8)

n.error.rate<-length(error.rate)
avg.accuracy.SC<-rep(0,time=n.error.rate)
avg.logloss.SC<-rep(0,time=n.error.rate)

avg.accuracy.DC1<-rep(0,time=n.error.rate)
avg.logloss.DC1<-rep(0,time=n.error.rate)

avg.accuracy.DC2<-rep(0,time=n.error.rate)
avg.logloss.DC2<-rep(0,time=n.error.rate)

avg.accuracy.DC3<-rep(0,time=n.error.rate)
avg.logloss.DC3<-rep(0,time=n.error.rate)

avg.accuracy.DC4<-rep(0,time=n.error.rate)
avg.logloss.DC4<-rep(0,time=n.error.rate)

for (k in c(1:n.error.rate)) {
  print(k)
  
  error.matrix<-Matrix.Generator(error.rate[k],g,L)
  
  test<-SCvsDC.matrix.number(times,data.known,data.unknown,margin.class,error.matrix,t,cost.svm,L)
  
  avg.accuracy.SC[k]<-mean(test$accuracy.sc)
  avg.logloss.SC[k]<-mean(test$logloss.sc)
  
  avg.accuracy.DC1[k]<-mean(test$accuracy.dc1)
  avg.logloss.DC1[k]<-mean(test$logloss.dc1)
  
  avg.accuracy.DC2[k]<-mean(test$accuracy.dc2)
  avg.logloss.DC2[k]<-mean(test$logloss.dc2)
  
  avg.accuracy.DC3[k]<-mean(test$accuracy.dc3)
  avg.logloss.DC3[k]<-mean(test$logloss.dc3)
  
  avg.accuracy.DC4[k]<-mean(test$accuracy.dc4)
  avg.logloss.DC4[k]<-mean(test$logloss.dc4)
  
  
}

comparison.number.31<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)

comparison.number.32<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)

comparison.number.33<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)




#-------------------

#Fixed budget

#case 3.1
g1<-0.1

#case 3.2
g1<-0.3

#case 3.3
g1<-0.5


set.seed(81.8)

n.error.rate<-length(error.rate)
avg.accuracy.SC<-rep(0,time=n.error.rate)
avg.logloss.SC<-rep(0,time=n.error.rate)

avg.accuracy.DC1<-rep(0,time=n.error.rate)
avg.logloss.DC1<-rep(0,time=n.error.rate)

avg.accuracy.DC2<-rep(0,time=n.error.rate)
avg.logloss.DC2<-rep(0,time=n.error.rate)

avg.accuracy.DC3<-rep(0,time=n.error.rate)
avg.logloss.DC3<-rep(0,time=n.error.rate)

avg.accuracy.DC4<-rep(0,time=n.error.rate)
avg.logloss.DC4<-rep(0,time=n.error.rate)

for (k in c(1:n.error.rate)) {
  print(k)
  
  error.matrix<-Matrix.Generator(error.rate[k],g,L)
  
  test<-SCvsDC.matrix.budget(times,data.known,data.unknown,margin.class,error.matrix,t,cost.svm,L)
  
  avg.accuracy.SC[k]<-mean(test$accuracy.sc)
  avg.logloss.SC[k]<-mean(test$logloss.sc)
  
  avg.accuracy.DC1[k]<-mean(test$accuracy.dc1)
  avg.logloss.DC1[k]<-mean(test$logloss.dc1)
  
  avg.accuracy.DC2[k]<-mean(test$accuracy.dc2)
  avg.logloss.DC2[k]<-mean(test$logloss.dc2)
  
  avg.accuracy.DC3[k]<-mean(test$accuracy.dc3)
  avg.logloss.DC3[k]<-mean(test$logloss.dc3)
  
  avg.accuracy.DC4[k]<-mean(test$accuracy.dc4)
  avg.logloss.DC4[k]<-mean(test$logloss.dc4)
  
  
}

comparison.budget.31<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)

comparison.budget.32<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)

comparison.budget.33<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)














#--------------------------------
#What if we change the marginal distribution 
#-------------------------------




#---------------------
#General Case 1
#--------------------

Matrix.Generator<-function(p,L) {
  error.matrix<-matrix(0,nrow=L,ncol=L)
  diag(error.matrix)<-(1-p)
  for (i in c(1:L)) {
    for (j in c(1:L)) {
      if ((abs(i-j)==1)&((i==1)|(i==L))) {error.matrix[i,j]<-p
      
      } else if ((abs(i-j)==1)&(i!=1)&(i!=L)) {error.matrix[i,j]<-p/2}
    }
  }
  return(error.matrix)
}

error.rate<-seq(from=0,to=0.5,by=0.025)
margin.class<-c(0.3,0.3,0.2,0.2)
times<-100
L<-4
t<-10


#---------------------
#Fixed number of observations

set.seed(81.8)

n.error.rate<-length(error.rate)
avg.accuracy.SC<-rep(0,time=n.error.rate)
avg.logloss.SC<-rep(0,time=n.error.rate)

avg.accuracy.DC1<-rep(0,time=n.error.rate)
avg.logloss.DC1<-rep(0,time=n.error.rate)

avg.accuracy.DC2<-rep(0,time=n.error.rate)
avg.logloss.DC2<-rep(0,time=n.error.rate)

avg.accuracy.DC3<-rep(0,time=n.error.rate)
avg.logloss.DC3<-rep(0,time=n.error.rate)

avg.accuracy.DC4<-rep(0,time=n.error.rate)
avg.logloss.DC4<-rep(0,time=n.error.rate)

for (k in c(1:n.error.rate)) {
  print(k)
  
  error.matrix<-Matrix.Generator(error.rate[k],L)
  
  test<-SCvsDC.matrix.number(times,data.known,data.unknown,margin.class,error.matrix,t,cost.svm,L)
  
  avg.accuracy.SC[k]<-mean(test$accuracy.sc)
  avg.logloss.SC[k]<-mean(test$logloss.sc)
  
  avg.accuracy.DC1[k]<-mean(test$accuracy.dc1)
  avg.logloss.DC1[k]<-mean(test$logloss.dc1)
  
  avg.accuracy.DC2[k]<-mean(test$accuracy.dc2)
  avg.logloss.DC2[k]<-mean(test$logloss.dc2)
  
  avg.accuracy.DC3[k]<-mean(test$accuracy.dc3)
  avg.logloss.DC3[k]<-mean(test$logloss.dc3)
  
  avg.accuracy.DC4[k]<-mean(test$accuracy.dc4)
  avg.logloss.DC4[k]<-mean(test$logloss.dc4)
  
  
}

comparison.number.margin2<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)





#-------------------

#Fixed budget

set.seed(81.8)

n.error.rate<-length(error.rate)
avg.accuracy.SC<-rep(0,time=n.error.rate)
avg.logloss.SC<-rep(0,time=n.error.rate)

avg.accuracy.DC1<-rep(0,time=n.error.rate)
avg.logloss.DC1<-rep(0,time=n.error.rate)

avg.accuracy.DC2<-rep(0,time=n.error.rate)
avg.logloss.DC2<-rep(0,time=n.error.rate)

avg.accuracy.DC3<-rep(0,time=n.error.rate)
avg.logloss.DC3<-rep(0,time=n.error.rate)

avg.accuracy.DC4<-rep(0,time=n.error.rate)
avg.logloss.DC4<-rep(0,time=n.error.rate)

for (k in c(1:n.error.rate)) {
  print(k)
  
  error.matrix<-Matrix.Generator(error.rate[k],L)
  
  test<-SCvsDC.matrix.budget(times,data.known,data.unknown,margin.class,error.matrix,t,cost.svm,L)
  
  avg.accuracy.SC[k]<-mean(test$accuracy.sc)
  avg.logloss.SC[k]<-mean(test$logloss.sc)
  
  avg.accuracy.DC1[k]<-mean(test$accuracy.dc1)
  avg.logloss.DC1[k]<-mean(test$logloss.dc1)
  
  avg.accuracy.DC2[k]<-mean(test$accuracy.dc2)
  avg.logloss.DC2[k]<-mean(test$logloss.dc2)
  
  avg.accuracy.DC3[k]<-mean(test$accuracy.dc3)
  avg.logloss.DC3[k]<-mean(test$logloss.dc3)
  
  avg.accuracy.DC4[k]<-mean(test$accuracy.dc4)
  avg.logloss.DC4[k]<-mean(test$logloss.dc4)
  
  
}

comparison.budget.margin2<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)


par(mfrow=c(1,2))

plot(ylim=c(0.38,0.76),error.rate,comparison.number.margin2$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number.margin2$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number.margin2$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number.margin2$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number.margin2$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.53,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))

plot(ylim=c(0.6,1.4),error.rate,comparison.number.margin2$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number.margin2$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number.margin2$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number.margin2$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number.margin2$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.4,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))




#plot - fixed expected budget


par(mfrow=c(1,2))

plot(ylim=c(0.25,0.76),error.rate,comparison.budget.margin2$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget.margin2$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget.margin2$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget.margin2$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget.margin2$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.45,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))


plot(ylim=c(0.6,1.52),error.rate,comparison.budget.margin2$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget.margin2$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget.margin2$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget.margin2$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget.margin2$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.52,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))




#--------------------------
#General Case 2
#--------------------------



Matrix.Generator<-function(p,g1,g2,L) {
  error.matrix<-matrix(0,nrow=L,ncol=L)
  #diag(error.matrix)<-(1-p)
  for (i in c(1:L)) {
    for (j in c(i:L)) {
      if (j==i&j==L) {error.matrix[i,j]<-1
      } else if (j==i&j!=L) {error.matrix[i,j]<-(1-p)
      } else if (j!=L&j==(i+1)) {error.matrix[i,j]<-p*(1-g1)
      } else if (j!=L&j==(i+2)) {error.matrix[i,j]<-p*g1*(1-g2)
      } else if (j==L&j==(i+3)) {error.matrix[i,j]<-p*g1*g2
      } else if (j==L&j==(i+2)) {error.matrix[i,j]<-p*g1
      } else if (j==L&j==(i+1)) {error.matrix[i,j]<-p}
    }
  }
  return(error.matrix)
}


error.rate<-seq(from=0,to=0.5,by=0.025)
margin.class<-c(0.3,0.3,0.2,0.2)
times<-100
L<-4
t<-10

#case 2.1
g1<-0.2
g2<-0.2

#case 2.2
g1<-0.2
g2<-0.5

#case 2.3
g1<-0.5
g2<-0.2

#case 2.4
g1<-0.5
g2<-0.5
#---------------------
#Fixed number of observations

set.seed(81.8)

n.error.rate<-length(error.rate)
avg.accuracy.SC<-rep(0,time=n.error.rate)
avg.logloss.SC<-rep(0,time=n.error.rate)

avg.accuracy.DC1<-rep(0,time=n.error.rate)
avg.logloss.DC1<-rep(0,time=n.error.rate)

avg.accuracy.DC2<-rep(0,time=n.error.rate)
avg.logloss.DC2<-rep(0,time=n.error.rate)

avg.accuracy.DC3<-rep(0,time=n.error.rate)
avg.logloss.DC3<-rep(0,time=n.error.rate)

avg.accuracy.DC4<-rep(0,time=n.error.rate)
avg.logloss.DC4<-rep(0,time=n.error.rate)

for (k in c(1:n.error.rate)) {
  print(k)
  
  error.matrix<-Matrix.Generator(error.rate[k],g1,g2,L)
  
  test<-SCvsDC.matrix.number(times,data.known,data.unknown,margin.class,error.matrix,t,cost.svm,L)
  
  avg.accuracy.SC[k]<-mean(test$accuracy.sc)
  avg.logloss.SC[k]<-mean(test$logloss.sc)
  
  avg.accuracy.DC1[k]<-mean(test$accuracy.dc1)
  avg.logloss.DC1[k]<-mean(test$logloss.dc1)
  
  avg.accuracy.DC2[k]<-mean(test$accuracy.dc2)
  avg.logloss.DC2[k]<-mean(test$logloss.dc2)
  
  avg.accuracy.DC3[k]<-mean(test$accuracy.dc3)
  avg.logloss.DC3[k]<-mean(test$logloss.dc3)
  
  avg.accuracy.DC4[k]<-mean(test$accuracy.dc4)
  avg.logloss.DC4[k]<-mean(test$logloss.dc4)
  
  
}

comparison.number.margin2.1<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)

comparison.number.margin2.2<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)

comparison.number.margin2.3<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)

comparison.number.margin2.4<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)


#-------------------

#Fixed budget

#case 2.1
g1<-0.2
g2<-0.2

#case 2.2
g1<-0.2
g2<-0.5

#case 2.3
g1<-0.5
g2<-0.2

#case 2.4
g1<-0.5
g2<-0.5


set.seed(81.8)

n.error.rate<-length(error.rate)
avg.accuracy.SC<-rep(0,time=n.error.rate)
avg.logloss.SC<-rep(0,time=n.error.rate)

avg.accuracy.DC1<-rep(0,time=n.error.rate)
avg.logloss.DC1<-rep(0,time=n.error.rate)

avg.accuracy.DC2<-rep(0,time=n.error.rate)
avg.logloss.DC2<-rep(0,time=n.error.rate)

avg.accuracy.DC3<-rep(0,time=n.error.rate)
avg.logloss.DC3<-rep(0,time=n.error.rate)

avg.accuracy.DC4<-rep(0,time=n.error.rate)
avg.logloss.DC4<-rep(0,time=n.error.rate)

for (k in c(1:n.error.rate)) {
  print(k)
  
  error.matrix<-Matrix.Generator(error.rate[k],g1,g2,L)
  
  test<-SCvsDC.matrix.budget(times,data.known,data.unknown,margin.class,error.matrix,t,cost.svm,L)
  
  avg.accuracy.SC[k]<-mean(test$accuracy.sc)
  avg.logloss.SC[k]<-mean(test$logloss.sc)
  
  avg.accuracy.DC1[k]<-mean(test$accuracy.dc1)
  avg.logloss.DC1[k]<-mean(test$logloss.dc1)
  
  avg.accuracy.DC2[k]<-mean(test$accuracy.dc2)
  avg.logloss.DC2[k]<-mean(test$logloss.dc2)
  
  avg.accuracy.DC3[k]<-mean(test$accuracy.dc3)
  avg.logloss.DC3[k]<-mean(test$logloss.dc3)
  
  avg.accuracy.DC4[k]<-mean(test$accuracy.dc4)
  avg.logloss.DC4[k]<-mean(test$logloss.dc4)
  
  
}

comparison.budget.margin2.1<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)

comparison.budget.margin2.2<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)

comparison.budget.margin2.3<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)

comparison.budget.margin2.4<-data.frame(avg.accuracy.SC,avg.logloss.SC,avg.accuracy.DC1,avg.logloss.DC1,avg.accuracy.DC2,avg.logloss.DC2,avg.accuracy.DC3,avg.logloss.DC3,avg.accuracy.DC4,avg.logloss.DC4)



#-----------------
#plot

#case 2.1

par(mfrow=c(1,2))

plot(ylim=c(0.55,0.76),error.rate,comparison.number.margin2.1$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number.margin2.1$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number.margin2.1$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number.margin2.1$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number.margin2.1$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.65,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))

plot(ylim=c(0.6,1.1),error.rate,comparison.number.margin2.1$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number.margin2.1$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number.margin2.1$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number.margin2.1$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number.margin2.1$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.1,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))



#plot - fixed expected budget


par(mfrow=c(1,2))

plot(ylim=c(0.48,0.76),error.rate,comparison.budget.margin2.1$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget.margin2.1$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget.margin2.1$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget.margin2.1$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget.margin2.1$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.6,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))


plot(ylim=c(0.6,1.25),error.rate,comparison.budget.margin2.1$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget.margin2.1$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget.margin2.1$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget.margin2.1$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget.margin2.1$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.25,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))


#case 2.2


par(mfrow=c(1,2))

plot(ylim=c(0.57,0.77),error.rate,comparison.number.margin2.2$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number.margin2.2$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number.margin2.2$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number.margin2.2$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number.margin2.2$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.65,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))

plot(ylim=c(0.6,1.1),error.rate,comparison.number.margin2.2$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number.margin2.2$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number.margin2.2$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number.margin2.2$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number.margin2.2$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.1,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))



#plot - fixed expected budget


par(mfrow=c(1,2))

plot(ylim=c(0.5,0.76),error.rate,comparison.budget.margin2.2$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget.margin2.2$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget.margin2.2$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget.margin2.2$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget.margin2.2$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.6,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))


plot(ylim=c(0.6,1.25),error.rate,comparison.budget.margin2.2$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget.margin2.2$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget.margin2.2$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget.margin2.2$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget.margin2.2$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.25,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))





#case 2.3


par(mfrow=c(1,2))

plot(ylim=c(0.55,0.77),error.rate,comparison.number.margin2.3$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number.margin2.3$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number.margin2.3$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number.margin2.3$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number.margin2.3$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.64,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))

plot(ylim=c(0.6,1.2),error.rate,comparison.number.margin2.3$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number.margin2.3$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number.margin2.3$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number.margin2.3$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number.margin2.3$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.2,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))



#plot - fixed expected budget


par(mfrow=c(1,2))

plot(ylim=c(0.5,0.76),error.rate,comparison.budget.margin2.3$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget.margin2.3$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget.margin2.3$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget.margin2.3$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget.margin2.3$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.6,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))


plot(ylim=c(0.6,1.3),error.rate,comparison.budget.margin2.3$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget.margin2.3$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget.margin2.3$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget.margin2.3$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget.margin2.3$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.3,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))



#case 2.4


par(mfrow=c(1,2))

plot(ylim=c(0.55,0.76),error.rate,comparison.number.margin2.4$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number.margin2.4$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number.margin2.4$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number.margin2.4$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number.margin2.4$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.64,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))

plot(ylim=c(0.6,1.2),error.rate,comparison.number.margin2.4$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.number.margin2.4$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.number.margin2.4$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.number.margin2.4$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.number.margin2.4$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.2,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))



#plot - fixed expected budget


par(mfrow=c(1,2))

plot(ylim=c(0.5,0.76),error.rate,comparison.budget.margin2.4$avg.accuracy.SC,xlab="p",ylab="averaged accuracy",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget.margin2.4$avg.accuracy.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget.margin2.4$avg.accuracy.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget.margin2.4$avg.accuracy.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget.margin2.4$avg.accuracy.DC4,col="purple",lty=5,lwd=2)
legend(0,0.6,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))


plot(ylim=c(0.6,1.3),error.rate,comparison.budget.margin2.4$avg.logloss.SC,xlab="p",ylab="averaged logloss",col="grey",type="l",lty=1,lwd=2,cex.lab=1.2)
lines(error.rate,comparison.budget.margin2.4$avg.logloss.DC1,col="blue",lty=2,lwd=2)
lines(error.rate,comparison.budget.margin2.4$avg.logloss.DC2,col="green",lty=3,lwd=2)
lines(error.rate,comparison.budget.margin2.4$avg.logloss.DC3,col="red",lty=4,lwd=2)
lines(error.rate,comparison.budget.margin2.4$avg.logloss.DC4,col="purple",lty=5,lwd=2)
legend(0,1.3,text.font=2,lwd=2,legend=c("Single-coding","Replicate","Remove Differences","Majority Vote","Expert Resolves"),cex=1.1,lty=c(1,2,3,4,5),col=c("grey","blue","green","red","purple"))

