### The replication code proceeds in 3 parts. Part A is used for the calculation of basic statistics and applies the matching procedure to the raw datasets. Part B takes the stacked matched dataset as input, performs the direct comparisons between online and offline respondents & calculates the necessary matrix for the logLinear models
## Part A

# installs and applies the necessary R packages
install.packages("mokken")
install.packages("MatchIt")
install.packages("weights")
install.packages("MASS")
install.packages("questionr")
library(mokken)
library(MatchIt)
library(weights)
library(MASS)
library(questionr)

# reads raw dataset and sets NAs
df_raw<-read.csv("1_combinedData_onlineOffline_Cyprus2013.csv")
df_raw[df_raw==-999]<-NA
df_raw[df_raw==-998]<-NA

# gives Ns of offline and online groups for raw dataset
table(df_raw$mode_nom)

# applies the cleaning criteria (see codebook)
df<-subset(df_raw,df_raw$invalid==0)

#gives Ns for offline and online groups for clean dataset
table(df$mode_nom)

# calculations to judge matching outcomes from clean dataset
chisq.test(df$sex,df$mode_nom, correct=FALSE)
chisq.test(df$education_uniDegree,df$mode_nom, correct=FALSE)
chisq.test(df$partyId,df$mode_nom, correct=FALSE)
chisq.test(df$previousVote2011,df$mode_nom, correct=FALSE)
chisq.test(df$voteIntention,df$mode_nom, correct=FALSE)
t.test(df$age~df$mode_nom)
t.test(df$selfPlace_LR~df$mode_nom)
t.test(df$selfPlace_ProgCons~df$mode_nom)

#creates the dataset for calculating the underlying dimension among q1:q19 through Mokken Scale Analysis
forMokken<-df[c("q13","q14","q16","q17","q18","q19")]
forMokken<-na.omit(forMokken)
forMokken<-forMokken-1
forMokken$q13r<-4-forMokken$q13
forMokken$q14r<-4-forMokken$q14
mok<-cbind(forMokken$q13r,forMokken$q14r,forMokken$q16,forMokken$q17,forMokken$q18,forMokken$q19)
coefH(mok)

# removes unnecessary dfs
rm(forMokken)
rm(mok)

# matching process
forMatching<-df[c("id","mode","sex","age","education_uniDegree","partyId","previousVote2011","voteIntention","selfPlace_LR","selfPlace_ProgCons")]
forMatching<-na.omit(forMatching)
forMatching$mode<-factor(forMatching$mode)
forMatching$sex<-factor(forMatching$sex)
forMatching$education_uniDegree<-factor(forMatching$education_uniDegree)
forMatching$partyId<-factor(forMatching$partyId)
forMatching$previousVote2011<-factor(forMatching$previousVote2011)
forMatching$voteIntention<-factor(forMatching$voteIntention)
set.seed(500)
m.out<-matchit(mode~sex+age+education_uniDegree+partyId+previousVote2011+voteIntention+selfPlace_LR+selfPlace_ProgCons, data=forMatching, method="nearest",model="logit",replace=FALSE, discard="both", ratio= 3, exact=c("sex","education_uniDegree","partyId","previousVote2011","voteIntention"), caliper=0.1)
m.out
matches<-as.data.frame(m.out[1])
weights<-as.data.frame(m.out[2])
summary(weights)

## Part B - comparisons using matched data
#reads the matched dataset (in stacked format)
df_matched<-read.csv("2_matchedDatasets.csv")
df_matched[df_matched==-999]<-NA
df_matched[df_matched==-998]<-NA

#comparisons for matching outcomes
table(df_matched$sex_off,df_matched$sex_on)
table(df_matched$education_uniDegree_off,df_matched$education_uniDegree_on)
table(df_matched$partyId_off,df_matched$partyId_off)
table(df_matched$previousVote2011_off,df_matched$previousVote2011_on)
table(df_matched$voteIntention_off,df_matched$voteIntention_on)
wtd.t.test(df_matched$age_off-df_matched$age_on,y=0,weight=df_matched$pairWeight,samedata=FALSE)
wtd.t.test(df_matched$selfPlace_LR_off-df_matched$selfPlace_LR_on,y=0,weight=df_matched$pairWeight,samedata=FALSE)
wtd.t.test(df_matched$selfPlace_ProgCons_off-df_matched$selfPlace_ProgCons_on,y=0,weight=df_matched$pairWeight,samedata=FALSE)

#direct comparisons of offline and online responding patterns
wtd.t.test(df_matched$numOfCDs_first19only_off-df_matched$numOfCDs_first19only_on,y=0,weight=df_matched$pairWeight,samedata=FALSE)
wtd.t.test(df_matched$numOfDs_first19only_off-df_matched$numOfDs_first19only_on,y=0,weight=df_matched$pairWeight,samedata=FALSE)
wtd.t.test(df_matched$numOfNs_first19only_off-df_matched$numOfNs_first19only_on,y=0,weight=df_matched$pairWeight,samedata=FALSE)
wtd.t.test(df_matched$numOfAs_first19only_off-df_matched$numOfAs_first19only_on,y=0,weight=df_matched$pairWeight,samedata=FALSE)
wtd.t.test(df_matched$numOfCAs_first19only_off-df_matched$numOfCAs_first19only_on,y=0,weight=df_matched$pairWeight,samedata=FALSE)
wtd.t.test(df_matched$Acquiescence_first19only_off-df_matched$Acquiescence_first19only_on,y=0,weight=df_matched$pairWeight,samedata=FALSE)
wtd.t.test(df_matched$Extremeness_first19only_off-df_matched$Extremeness_first19only_on,y=0,weight=df_matched$pairWeight,samedata=FALSE)
wtd.t.test(df_matched$scaleScore_off-df_matched$scaleScore_on,y=0,weight=df_matched$pairWeight,samedata=FALSE)
wtd.t.test(df_matched$nonDiffIndex_first19_off-df_matched$nonDiffIndex_first19_on,y=0,weight=df_matched$pairWeight,samedata=FALSE)
wilcox.test(df_matched$maxSameConsecAnswers_first19only_off, df_matched$maxSameConsecAnswers_first19only_on, paired=TRUE)

# creates matrix for loglinear models
mat1<-wtd.table(x=df_matched$q1_off,y=df_matched$q1_on,weights=df_matched$pairWeight)
mat2<-wtd.table(x=df_matched$q2_off,y=df_matched$q2_on,weights=df_matched$pairWeight)
mat3<-wtd.table(x=df_matched$q3_off,y=df_matched$q3_on,weights=df_matched$pairWeight)
mat4<-wtd.table(x=df_matched$q4_off,y=df_matched$q4_on,weights=df_matched$pairWeight)
mat5<-wtd.table(x=df_matched$q5_off,y=df_matched$q5_on,weights=df_matched$pairWeight)
mat6<-wtd.table(x=df_matched$q6_off,y=df_matched$q6_on,weights=df_matched$pairWeight)
mat7<-wtd.table(x=df_matched$q7_off,y=df_matched$q7_on,weights=df_matched$pairWeight)
mat8<-wtd.table(x=df_matched$q8_off,y=df_matched$q8_on,weights=df_matched$pairWeight)
mat9<-wtd.table(x=df_matched$q9_off,y=df_matched$q9_on,weights=df_matched$pairWeight)
mat10<-wtd.table(x=df_matched$q10_off,y=df_matched$q10_on,weights=df_matched$pairWeight)
mat11<-wtd.table(x=df_matched$q11_off,y=df_matched$q11_on,weights=df_matched$pairWeight)
mat12<-wtd.table(x=df_matched$q12_off,y=df_matched$q12_on,weights=df_matched$pairWeight)
mat13<-wtd.table(x=df_matched$q13_off,y=df_matched$q13_on,weights=df_matched$pairWeight)
mat14<-wtd.table(x=df_matched$q14_off,y=df_matched$q14_on,weights=df_matched$pairWeight)
mat15<-wtd.table(x=df_matched$q15_off,y=df_matched$q15_on,weights=df_matched$pairWeight)
mat16<-wtd.table(x=df_matched$q16_off,y=df_matched$q16_on,weights=df_matched$pairWeight)
mat17<-wtd.table(x=df_matched$q17_off,y=df_matched$q17_on,weights=df_matched$pairWeight)
mat18<-wtd.table(x=df_matched$q18_off,y=df_matched$q18_on,weights=df_matched$pairWeight)
mat19<-wtd.table(x=df_matched$q19_off,y=df_matched$q19_on,weights=df_matched$pairWeight)
mat<-mat1+mat2+mat3+mat4+mat5+mat6+mat7+mat8+mat9+mat10+mat11+mat12+mat13+mat14+mat15+mat16+mat17+mat18+mat19
mat

## Part C - LogLinear models
# loads the stacked file for log-linear model
forLogLin<-read.csv("3_forLogLinear.csv")
forLogLin$offline<-factor(forLogLin$offline,levels=1:5)
forLogLin$online<-factor(forLogLin$online,levels=1:5)

#log-linear models
independence<-glm(freq_rounded~online+offline,data=forLogLin,family=poisson(log))
plusAgreement<-glm(freq_rounded~online+offline+agree_CD+agree_D+agree_N+agree_A+agree_CA,data=forLogLin,family=poisson(log))
plusAssociation<-glm(freq_rounded~online+offline+agree_CD+agree_D+agree_N+agree_A+agree_CA+linearByLinearAssociation,data=forLogLin,family=poisson(log))
summary(independence)
summary(plusAgreement)
summary(plusAssociation)

#produces the table with std. residuals from plusAssociation logLinear model
std.res<-resid(plusAssociation, type="pearson")/sqrt(1-lm.influence(plusAssociation)$hat)
matrix(std.res,nrow=5,byrow=T)


