############################################################### # TITLE: ANALYSIS REPLICATION CODE # # FOR: Manuscript titled "Measuring Network Sizes in the # # Context of Respondent Driven Sampling: Evidence # # from Two Independent Surveys" accepted by # # Survey Research Methods # # BY: Sunghee Lee (sungheel@umich.edu) # # DATE: October 6, 2024 # ############################################################### ############################################################### # REQUIRED LIBRARIES # ############################################################### library(haven) library(tidyverse) library(doBy) library(RDS) library(ggplot2) library(ggpubr) library(gridExtra) library(grid) library(survey) library(dplyr) library(janitor) library(scales) ############################################################### # USER DEFINED FUNCTIONS # ############################################################### #Standard error se<-function(x){ sd(x,na.rm=T)/sqrt(length(x)-sum(is.na(x))) } # Finding Recruiter ID in HLSK # (See ANALYSIS STEP 4 section about its use) Find_Rec_ID = function(data){ id1 = match(data$UNIQUE_NUM, data$COUPON1_NUM, nomatch = 0) id2 = match(data$UNIQUE_NUM, data$COUPON2_NUM, nomatch = 0) id = `+`(id1, id2) id[id == 0] = NA return(data$UNIQUE_NUM[id]) } ############################################################### # DATA RETRIEVAL # ############################################################### # # 1. Project Positive Attitudes Towards Health (PATH) # Data retrievable from https://www.icpsr.umich.edu/web/ICPSR/studies/37957 # This manuscript uses both unrestricted and restricted data. # The restricted data requires an application. # # List of unrestricted variables used in the analyses: # M_IWCOMPLETE, M_AQ3_WHITE, M_AQ3_BLACK, M_NETSIZE, M_AQ1, IS_SC1 # # List of restricted variables used in the analyses: # M_AQ2, M_AQ3_AIAN, M_AQ3_ASIAN, M_AQ3_MENA, M_AQ3_NHPI, # M_AQ3_OTHER, M_SNQ4, M_SNQ1_1, M_SNQ1_2, M_NUMBER_PAT, M_NUMBER_INT, # M_NUMBER_CLS, LOCATION, M_HQ7, M_AQ6, M_HQ1, M_HQ3, M_HQ5, WAVE_NUM, # SEED, RECRUITERID, COUPON3_NUM, COUPON1_NUM, COUPON2_NUM, CPN_NUM # # 2. Health and Life Study of Koreans (HLSK) # Data retrievable from https://www.icpsr.umich.edu/web/ICPSR/studies/37635/ # This manuscript uses both unrestricted and restricted data. # The restricted data requires an application. # # List of unrestricted variables used in the analyses: # MAIN_COMP, CPN_NUM, AQ7, AQ8, AQ8_INDICATOR, NUMKR, CPN_NUM, AQ1_1, # BQ2, GQ9, LQ1, GQ6, INTVLANG, SITE # # List of restricted variables used in the analyses: # ZQ_EXP_APPROACH, ZQ_EXP_FAMILY, ZQ5_1, ZQ5_2, ZQ5_3, ZQ5_4, AQ1, LQ3, # UNIQUE_NUM, WAVE_NUM, COUPON1_NUM, COUPON2_NUM, SEED # # 3. American Community Survey (ACS) 2012-2016 Public Use Microdata Sample (PUMS) # To access ACS 2012-2016 PUMS, please visit # https://www2.census.gov/programs-surveys/acs/data/pums/2016/5-Year/ # Download csv_hca.zip (CA household), csv_pca.zip (CA person), # csv_hmi.zip (MI household), csv_pmi.zip (MI person) # # List of variables used: # From household files: SERIALNO, HINCP # From person files: SERIALNO, SPORDER, PUMA, ANC1P, ANC2P, # RAC2P, RAC3P, CIT, AGEP, SEX, SCHL, MAR, ESR # PWGTP, PWGTP1:PWGTP80 # ############################################################### ############################################################### # DATA LOADING # ############################################################### # # The loaded data sets are named as follows in the code: # PATH # HLSK # ACS_CA_H, ACS_CA_P, ACS_MI_H, ACS_MI_P # --> ACS household and person data sets are merged separately # for each site (LA and MI) # and stacked across both sites; this data set is named ACS # ############################################################### # PATH Data PATH<-read.table("YOUR FILE LOCATION AND YOUR FILE")%>% filter(!is.na(M_IWCOMPLETE)) # HLSK Data HLSK<-read.table("YOUR FILE LOCATION AND YOUR FILE")%>% filter(MAIN_COMP==1) # ACS CA Data ACS_CA_H<-read.table("YOUR FILE LOCATION AND YOUR FILE")%>% select(SERIALNO, HINCP) ACS_CA_P<-read.table("YOUR FILE LOCATION AND YOUR FILE")%>% select(SERIALNO, SPORDER, PUMA, ANC1P, ANC2P, RAC2P, RAC3P, CIT, AGEP, SEX, SCHL, MAR, ESR, PWGTP, PWGTP1:PWGTP80) #ACS_LA_KR includes only those are in LA and are non-US born Koreans ACS_LA_KR<-subset (ACS_CA_P, ((PUMA>=3701 & PUMA<=3769)) & (ANC1P==750|ANC2P==750| RAC2P==49|RAC3P==8|RAC3P==22| RAC3P==35|RAC3P==46| RAC3P==54|RAC3P==98) & CIT!=1 & AGEP>=18) #Merge with household data by SERIALNO ACS_LA<-merge(ACS_LA_KR, ACS_CA_H, by="SERIALNO") ACS_LA$Location<-"LA" # ACS MI Data ACS_MI_H<-read.table("YOUR FILE LOCATION AND YOUR FILE")%>% select(SERIALNO, HINCP) ACS_MI_P<-read.table("YOUR FILE LOCATION AND YOUR FILE")%>% select(SERIALNO, SPORDER, PUMA, ANC1P, ANC2P, RAC2P, RAC3P, CIT, AGEP, SEX, SCHL, MAR, ESR, PWGTP, PWGTP1:PWGTP80) #ACS_MI_KR includes only those are non-US born Koreans in MI ACS_MI_KR<-subset(ACS_MI_P, (ANC1P==750|ANC2P==750| RAC2P==49|RAC3P==8|RAC3P==22| RAC3P==35|RAC3P==46| RAC3P==54|RAC3P==98) & CIT!=1 & AGEP>=18) ACS_MI<-merge(ACS_MI_KR, ACS_MI_H, by="SERIALNO") ACS_MI$Location<-"MI" #Stacking ACS LA and MI Data ACS<-rbind(ACS_LA, ACS_MI) rm(ACS_CA_P,ACS_CA_H,ACS_LA_KR,ACS_LA, ACS_MI_P,ACS_MI_KR,ACS_MI_H,ACS_MI) ############################################################### # VARIABLE RECODING # ############################################################### # 1. PATH # Participant characteristics variables (Table 1) PATH<-PATH%>%mutate( Agecat=case_when( is.na(IS_SC1)~"NA", IS_SC1<30~"18-29", IS_SC1<40~"30s", IS_SC1<50~"40s", IS_SC1<60~"50s", TRUE~"60+"), Agecat3=case_when( Agecat=="18-29"| Agecat=="30s" ~ "<40", Agecat=="40s" | Agecat=="50s" ~ "<60s", TRUE~"61+"), Male=ifelse(M_AQ1==2,0,M_AQ1), Employed=case_when( is.na(M_HQ1)~NaN, M_HQ1<3~1, TRUE~0), IncomeLE20=case_when( is.na(M_HQ3)~NaN, M_HQ3<=20000~1, TRUE~0), IncomeLE20=ifelse(!is.na(IncomeLE20),IncomeLE20,ifelse( is.na(M_HQ5),NA,ifelse( M_HQ5%in% c(2,3),1,0))), NHWhite=case_when( M_AQ2==0 & M_AQ3_WHITE==1 & M_AQ3_AIAN!=1 & M_AQ3_ASIAN!=1 & M_AQ3_BLACK!=1 & M_AQ3_MENA!=1 & M_AQ3_NHPI!=1 & M_AQ3_OTHER!=1 ~ 1, is.na(M_AQ2)~NaN, TRUE~0), LivingAlone=case_when( is.na(M_HQ7)~NaN, M_HQ7==1~1, TRUE~0), Location2=case_when( LOCATION==1~"Det", TRUE~"Non-Det"), Educ=case_when( M_AQ6<4~"LT HS", M_AQ6<6~"HS", M_AQ6<=11~"MT HS", TRUE~"NA")) # Degree variables PATH<-PATH%>%mutate( D_TOTAL_STD=M_NETSIZE, D_TOTAL_PRIME=M_SNQ4, D_TOTAL_STD_RTYPE=case_when( is.na(D_TOTAL_STD)~"1.NA", D_TOTAL_STD==0~"2.0", TRUE~"3.Non0"), D_TOTAL_PRIME_RTYPE=case_when( is.na(D_TOTAL_PRIME)~"1.NA", D_TOTAL_PRIME==0~"2.0", TRUE~"3.Non0"), D_TOTAL_COMP=case_when( is.na(D_TOTAL_STD) | is.na(D_TOTAL_PRIME)~'NA', D_TOTAL_STD>D_TOTAL_PRIME~"1.STD>PRIME", D_TOTAL_STD==D_TOTAL_PRIME~"2.STD=PRIME", TRUE~"3.STD% select(D_TOTAL_STD, D_TOTAL_PRIME, D_TOTAL_PAT, D_PAT, D_INTERACT, D_CLOSE)%>% mutate_all(.,~log(.+1))%>% rename_all(.,~paste0("LOG_",.))%>% bind_cols(PATH) #Recruitment success PATH<-PATH%>%mutate( RECRUIT_NUM=CPN_USED_NUM, RECRUIT=case_when( is.na(CPN_USED_NUM)~NaN, CPN_USED_NUM==0~0, TRUE~1), SUCC=CPN_USED_NUM, FAIL=CPN_NUM-CPN_USED_NUM) # 2. HLSK # Participant characteristics variables (Table 1) HLSK<-HLSK%>% mutate( Agecat=ifelse(AQ1<30,"18-29", ifelse( AQ1<40,"30s", ifelse( AQ1<50,"40s", ifelse( AQ1<60,"50s", ifelse( AQ1>=60|AQ1_1==7,"60+",NA))))), Age18_29=ifelse(AQ1<30&AQ1>17,1,0), Male=BQ1_MALE, Employed=ifelse(is.na(LQ1),NA, ifelse( LQ1==1,1,0)), IncomeMT50=ifelse(is.na(LQ3),NA, ifelse( LQ3>50000,1,0)), Identity=ifelse(GQ6==1,"Korean", ifelse( GQ6==3,"Korean-Am","Other")), Married=ifelse(is.na(BQ2),NA, ifelse( BQ2==1,1,0)), Site=SITE, Educ=ifelse(is.na(GQ9),NA, ifelse( GQ9<9,"LT BA/S", ifelse( GQ9==9,"BA/S","MT BA/S"))), Edu_COLL=ifelse(GQ9==9,1,0), Language=INTVLANG ) # Degree variables HLSK<-HLSK%>% mutate( D_TOTAL= NUMKR, D_TOTAL_GT100=ifelse(is.na(D_TOTAL),NA,ifelse( D_TOTAL>100,1,0)), D_TOTAL_GT500=ifelse(is.na(D_TOTAL),NA,ifelse( D_TOTAL>500,1,0)), D_TOTAL_WOOUT=ifelse(is.na(D_TOTAL),NA, ifelse( D_TOTAL>=round(quantile(D_TOTAL, probs = .98, na.rm=T),0), round(quantile(D_TOTAL, probs = .98, na.rm=T),0), D_TOTAL)), D_TOTAL_RESPONSE_TYPE=ifelse(is.na(ZQ_EXP_APPROACH),NA, ifelse( is.na(D_TOTAL),"1.NR", ifelse( D_TOTAL==0, "2.0","3.Non0"))), D_OH=ZQ5_1, D_TOTAL_OH=ifelse(is.na(D_OH), NA, round(D_OH*(100/1.54),0)), D_OH_WOOUT=ifelse(is.na(D_OH),NA, ifelse(D_OH>=round(quantile(D_OH, probs = .98, na.rm=T),0), round(quantile(D_OH, probs = .98, na.rm=T),0), D_OH)), D_TOTAL_OH_WOOUT=ifelse(is.na(D_OH_WOOUT), NA, round(D_OH_WOOUT/(1.54/100),0)), D_FAM=ZQ5_2, D_FAM_WOOUT=ifelse(is.na(D_FAM),NA, ifelse(D_FAM>=round(quantile(D_FAM, probs = .98, na.rm=T),0), round(quantile(D_FAM, probs = .98, na.rm=T),0), D_FAM)), D_INTERACT=ZQ5_3, D_INTERACT_WOOUT=ifelse(is.na(D_INTERACT),NA, ifelse(D_INTERACT>=round(quantile(D_INTERACT, probs = .98, na.rm=T),0), round(quantile(D_INTERACT, probs = .98, na.rm=T),0), D_INTERACT)), D_CLOSE=ZQ5_4, D_CLOSE_WOOUT=ifelse(is.na(D_CLOSE),NA, ifelse(D_CLOSE>=round(quantile(D_CLOSE, probs = .98, na.rm=T),0), round(quantile(D_CLOSE, probs = .98, na.rm=T),0), D_CLOSE))) # Log transformed degrees variables HLSK<-HLSK%>% select(D_TOTAL,D_TOTAL_WOOUT, D_OH,D_OH_WOOUT, D_TOTAL_OH,D_TOTAL_OH_WOOUT, D_FAM, D_FAM_WOOUT, D_INTERACT, D_INTERACT_WOOUT, D_CLOSE, D_CLOSE_WOOUT)%>% mutate_all(.,~log(.+1))%>% rename_all(.,~paste0(.,"_LOG"))%>% bind_cols(HLSK) # Experimental variables HLSK<-HLSK%>% mutate( D_TOTAL_EXP_APPROACH=ifelse(is.na(ZQ_EXP_APPROACH),NA, ifelse( ZQ_EXP_APPROACH==1, "Sequential", ifelse( ZQ_EXP_APPROACH==2, "Direct", "Direct, Coupon"))), QuestionApproach=ifelse(D_TOTAL_EXP_APPROACH=="","",ifelse( D_TOTAL_EXP_APPROACH=="Direct", "a.Standard (n=211)", ifelse( D_TOTAL_EXP_APPROACH=="Direct, Coupon", "b.RecruitPrime (n=219)", "c.Sequential (n=207)"))), INTVLANG_BILINGUAL=ifelse(AQ7!=3, NA, ifelse( AQ7==3 & INTVLANG==1,"ENG","KOR")), InterviewLanguage=ifelse(is.na(INTVLANG_BILINGUAL), NA,ifelse( INTVLANG_BILINGUAL=="ENG","a.English (n=105)", "b.Korean (n=129)")) ) # Recruitment success HLSK<-HLSK%>%mutate( RECRUIT_NUM=CPN_USED_NUM, RECRUIT=ifelse(is.na(CPN_USED_NUM),NA, ifelse(CPN_USED_NUM==0,0,1)), SUCC=CPN_USED_NUM, FAIL=CPN_NUM-CPN_USED_NUM) # 3. ACS ACS<-ACS%>%mutate( # Age: 18-29 yrs Age18_29=ifelse(is.na(AGEP),NA, ifelse(AGEP<30,1,0)), # Sex: Male Male=ifelse(is.na(SEX),NA, ifelse(SEX==1,1,0)), # Marital status: Married Married=ifelse(is.na(MAR),NA, ifelse(MAR==1,1,0)), # Education: =4yr college degree Educ_COLL=ifelse(is.na(SCHL),NA, ifelse(SCHL==21,1,0)), # Income>$50K IncomeMT50=ifelse(is.na(HINCP), NA, ifelse(HINCP<=50000,0,1)), # Employed last week Employed=ifelse(is.na(ESR),NA, ifelse(ESR==3|ESR==6,0,1))) ############################################################### # PARTICIPANT CHARACTERISTICS (TABLE 1) # ############################################################### dim(PATH) table(PATH$Agecat3) table(PATH$Male) table(PATH$Employed) table(PATH$IncomeLE20) table(PATH$NHWhite) table(PATH$LivingAlone) table(PATH$Location2) table(PATH$Educ) dim(HLSK) table(HLSK$Agecat) table(HLSK$Male) table(HLSK$Employed) table(HLSK$IncomeMT50) table(HLSK$Identity) table(HLSK$Married) table(HLSK$Site) table(HLSK$Educ) table(HLSK$Language) table(HLSK$Agecat, HLSK$Site) table(HLSK$Male, HLSK$Site) table(HLSK$Employed, HLSK$Site) table(HLSK$IncomeMT50, HLSK$Site) table(HLSK$Identity, HLSK$Site) table(HLSK$Married, HLSK$Site) table(HLSK$Site, HLSK$Site) table(HLSK$Educ, HLSK$Site) table(HLSK$Language, HLSK$Site) ############################################################### # ANALYSIS STEP 1: # # Degree Distribution # ############################################################### round(prop.table(table(is.na(PATH$D_TOTAL_STD))),3) round(prop.table(table(is.na(PATH$D_TOTAL_PRIME))),3) round(prop.table(table(is.na(PATH$D_PAT))),3) round(prop.table(table(is.na(PATH$D_INTERACT))),3) round(prop.table(table(is.na(PATH$D_CLOSE))),3) round(prop.table(table(is.na(HLSK$D_TOTAL))),3) round(prop.table(table(is.na(HLSK$D_OH))),3) round(prop.table(table(is.na(HLSK$D_INTERACT))),3) round(prop.table(table(is.na(HLSK$D_CLOSE))),3) round(prop.table(table(is.na(HLSK$D_FAM))),3) summary(HLSK$D_TOTAL) summary(HLSK$D_OH) summary(HLSK$D_INTERACT) summary(HLSK$D_CLOSE) summary(HLSK$D_FAM) summary(HLSK$D_TOTAL_WOOUT) summary(HLSK$D_OH_WOOUT) summary(HLSK$D_INTERACT_WOOUT) summary(HLSK$D_CLOSE_WOOUT) summary(HLSK$D_FAM_WOOUT) ############################################################### # FIGURE 4A # ############################################################### wilcox.test(PATH$D_TOTAL_STD,PATH$D_TOTAL_PRIME,paired=TRUE) wilcox.test(PATH$D_TOTAL_STD,PATH$D_TOTAL_PAT,paired=TRUE) wilcox.test(PATH$D_TOTAL_PRIME,PATH$D_TOTAL_PAT,paired=TRUE) prop.table(table(PATH$D_TOTAL_STD>PATH$D_TOTAL_PRIME)) prop.table(table(PATH$D_TOTAL_STD% select(QuestionApproach, Degree=D_TOTAL_WOOUT) %>% mutate(DegreeType="1. Total (b,c)") Oh<-HLSK %>% select(QuestionApproach, Degree=D_OH_WOOUT) %>% mutate(DegreeType="2. Last Name Oh (b,c)") Interact<-HLSK %>% select(QuestionApproach, Degree=D_INTERACT_WOOUT) %>% mutate(DegreeType="3. Interact >1/Wk (b)") Close<-HLSK %>% select(QuestionApproach, Degree=D_CLOSE_WOOUT) %>% mutate(DegreeType="4. Close Relationship (b)") Fam<-HLSK %>% select(QuestionApproach, Degree=D_FAM_WOOUT) %>% mutate(DegreeType="5. Family (b,c)") Degree<-rbind(Total, Oh, Interact, Close, Fam) fig5a<-Degree%>% ggplot(aes(x=DegreeType, y=Degree, fill=QuestionApproach)) + geom_boxplot(outlier.size=1) + ylim(0,100)+ theme(legend.position=c(0.8, 0.7))+ scale_x_discrete(labels = c("Total \n(b,c)","Last Name Oh \n(b,c)", "Interact>Once/Wk\n(b)", "Close Relationship\n(b)", "Family \n(b,c)"))+ scale_fill_manual(values=c("white", "slateblue3", "orange")) + ylab("Degree\u2020")+xlab("Degree Type\u2021") fig5a #Saved as a pdf for a size6.6 x 4.4 table(Total$Degree>100) ############################################################### # FIGURE 5B: # # HLSK degrees by language -- Bilingual respondents # ############################################################### summary(glm(D_TOTAL_WOOUT_LOG~InterviewLanguage, data=subset(HLSK, !is.na(InterviewLanguage), family="Gamma"(link = "log")))) summary(glm(D_OH_WOOUT_LOG~INTVLANG_BILINGUAL, data=subset(HLSK, !is.na(InterviewLanguage), family="Gamma"(link = "log")))) summary(glm(D_INTERACT_WOOUT_LOG~InterviewLanguage, data=subset(HLSK, !is.na(InterviewLanguage), family="Gamma"(link = "log")))) summary(glm(D_CLOSE_WOOUT_LOG~InterviewLanguage, data=subset(HLSK, !is.na(InterviewLanguage), family="Gamma"(link = "log")))) summary(glm(D_FAM_WOOUT_LOG~InterviewLanguage, data=subset(HLSK, !is.na(InterviewLanguage), family="Gamma"(link = "log")))) aggregate(D_TOTAL_WOOUT~InterviewLanguage, data=subset(HLSK, !is.na(InterviewLanguage)), FUN=function(x){c(n=length(x), m=mean(x), sd=sd(x))}) aggregate(D_OH_WOOUT~InterviewLanguage, data=subset(HLSK, !is.na(InterviewLanguage)), FUN=function(x){c(n=length(x), m=mean(x), sd=sd(x))}) aggregate(D_INTERACT_WOOUT~InterviewLanguage, data=subset(HLSK, !is.na(InterviewLanguage)), FUN=function(x){c(n=length(x), m=mean(x), sd=sd(x))}) aggregate(D_CLOSE_WOOUT~InterviewLanguage, data=subset(HLSK, !is.na(InterviewLanguage)), FUN=function(x){c(n=length(x), m=mean(x), sd=sd(x))}) Total_b<-HLSK%>%filter(!is.na(InterviewLanguage))%>% select(InterviewLanguage, Degree=D_TOTAL_WOOUT)%>% mutate(DegreeType="1. Total") Oh_b<-HLSK%>% filter(!is.na(HLSK$InterviewLanguage))%>% select(InterviewLanguage, Degree=D_OH_WOOUT)%>% mutate(DegreeType="2. Last Name Oh (b)") Interact_b<-HLSK%>% filter(!is.na(HLSK$InterviewLanguage))%>% select(InterviewLanguage, Degree=D_INTERACT_WOOUT)%>% mutate(DegreeType="3. Interact>1/Wk") Close_b<-HLSK%>% filter(!is.na(HLSK$InterviewLanguage))%>% select(InterviewLanguage, Degree=D_CLOSE_WOOUT)%>% mutate(DegreeType="4. Close Relationship (b)") Fam_b<-HLSK%>% filter(!is.na(HLSK$InterviewLanguage))%>% select(InterviewLanguage, Degree=D_FAM_WOOUT)%>% mutate(DegreeType="5. Family") Degree_b<-rbind(Total_b, Oh_b, Interact_b, Close_b, Fam_b) fig5b<-Degree_b%>% #filter(!(DegreeType %in% c( "2. Total Oh_b Scale-Up"))) %>% ggplot(aes(x=DegreeType, y=Degree, fill=InterviewLanguage)) + geom_boxplot(outlier.size=1) + ylim(0,100)+ theme(legend.position=c(0.8, 0.7))+ scale_fill_manual(values=c("plum1","lightblue"))+ ylab("Degree\u2020")+xlab("Degree Type\u2021")+ scale_x_discrete(labels = c("Total","Last Name Oh \n(b)", "Interact>Once/Wk\n(b)", "Close Relationship\n(b)", "Family")) fig5b #Saved as a pdf for a size6.6 x 4.4 table(Total_b$Degree>100) ############################################################### # ANALYSIS STEP 2: # # Relationship across degrees # ############################################################### ############################################################### # FIGURE 6A # ############################################################### D_TOTAL_STD_PRIME_s<-ggscatter(PATH, x = "D_TOTAL_STD", y = "D_TOTAL_PRIME", color= "navy",fill="navy",size=1.25, add = "reg.line",add.params = list(color = "red"), ggtheme=theme_gray()) + ylim(0, 100)+ stat_cor(label.x = 90, label.y = 75, size=4, method="spearman", cor.coef.name="rho",p.accuracy = 0.001)+ geom_jitter(width = 10, height = 5,size=1, color="navy") + labs(x="Degree: Total Standard\n", y="\nDegree: Total Recruitment Primed") D_TOTAL_STD_PAT_s<-ggscatter(PATH, x = "D_TOTAL_STD", y = "D_TOTAL_PAT", color= "navy",fill="navy",size=1.25, add = "reg.line",add.params = list(color = "red"), ggtheme=theme_gray()) + ylim(0, 2000)+ stat_cor(label.x = 90, label.y = 1200, size=4, method="spearman", cor.coef.name="rho",p.accuracy = 0.001)+ geom_jitter(width = 12, height = 50, size=1, color="navy") + labs(x="Degree: Total Standard\n", y="Degree: Total Scale-up\nFirst Name Pat") D_TOTAL_STD_INTERACT_s<-ggscatter(PATH, x = "D_TOTAL_STD", y = "D_INTERACT", color= "navy",fill="navy",size=1.25, add = "reg.line",add.params = list(color = "red"), ggtheme=theme_gray()) + ylim(0, 100)+ stat_cor(label.x = 90, label.y = 75, size=4, method="spearman", cor.coef.name="rho",p.accuracy = 0.001)+ geom_jitter(width = 12, height = 50, size=1, color="navy") + labs(x="Degree: Total Standard\n", y="\nDegree: Interact >Once/Week") D_TOTAL_STD_CLOSE_s<-ggscatter(PATH, x = "D_TOTAL_STD", y = "D_CLOSE", color= "navy",fill="navy",size=1.25, add = "reg.line",add.params = list(color = "red"), ggtheme=theme_gray()) + ylim(0, 100)+ stat_cor(label.x = 90, label.y = 70, size=4, method="spearman", cor.coef.name="rho",p.accuracy = 0.001)+ geom_jitter(width = 12, height = 50,size=1, color="navy") + labs(x="Degree: Total Standard\n", y="\nDegree: Close Relationship") fig6a<-ggarrange(D_TOTAL_STD_PRIME_s, D_TOTAL_STD_PAT_s, D_TOTAL_STD_INTERACT_s, D_TOTAL_STD_CLOSE_s, nrow=2,ncol=2) fig6a #Saved as a pdf for a size6.3x6.3 ############################################################### # FIGURE 6B # ############################################################### D_TOTAL_OH_s<-ggscatter(HLSK, x = "D_TOTAL_WOOUT", y = "D_TOTAL_OH_WOOUT", color= "navy", fill="navy", size=1.25, add = "reg.line", add.params = list(color = "red"), ggtheme=theme_gray()) + ylim(0,800)+ stat_cor(label.x = 220, label.y = 600, size=4, method="spearman",cor.coef.name="rho",p.accuracy = 0.001)+ geom_jitter(width = 12, height = 50, size=1,color= "navy") + labs(x="Degree: Total\n", y="Degree: Total Scale-up\nLast Name Oh") D_TOTAL_FAM_s<-ggscatter(HLSK, x = "D_TOTAL_WOOUT", y = "D_FAM_WOOUT", color= "navy", fill="navy", size=1.25, add = "reg.line", add.params = list(color = "red"), ggtheme=theme_gray()) + ylim(0,20)+ stat_cor(label.x = 200, label.y = 10, size=4, method="spearman",cor.coef.name="rho",p.accuracy = 0.001)+ geom_jitter(width = 12, height = 4, size=1,color= "navy") + labs(x="Degree: Total\n", y="Degree: Family") D_TOTAL_INTERACT_s<-ggscatter(HLSK, x = "D_TOTAL_WOOUT", y = "D_INTERACT_WOOUT", color= "navy", fill="navy", size=1.25, add = "reg.line", add.params = list(color = "red"), ggtheme=theme_gray()) + ylim(0,50)+ stat_cor(label.x = 220, label.y = 600, size=4, method="spearman",cor.coef.name="rho",p.accuracy = 0.001)+ geom_jitter(width = 12, height = 5, size=1,color= "navy") + labs(x="Degree: Total\n", y="\nDegree: Interact >Once/Week") D_TOTAL_CLOSE_s<-ggscatter(HLSK, x = "D_TOTAL_WOOUT", y = "D_CLOSE_WOOUT", color= "navy", fill="navy", size=1.25, add = "reg.line", add.params = list(color = "red"), ggtheme=theme_gray()) + ylim(0,20)+ stat_cor(label.x = 220, label.y = 11, size=4, method="spearman",cor.coef.name="rho",p.accuracy = 0.001)+ geom_jitter(width = 12, height = 4, size=1,color= "navy") + labs(x="Degree: Total\n", y="\nDegree: Close Relationship") fig6b<-ggarrange(D_TOTAL_OH_s, D_TOTAL_INTERACT_s, D_TOTAL_CLOSE_s, D_TOTAL_FAM_s, nrow=2,ncol=2) fig6b #Saved as a pdf for a size6.3x6.3 ############################################################### # ANALYSIS STEP 3: # # Relationship between degrees and recruitment success # ############################################################### summary(PATH$RECRUIT_NUM/PATH$CPN_NUM) PATH %>% tabyl(CPN_NUM, RECRUIT_NUM)%>% adorn_totals(c("row","col")) summary(HLSK$RECRUIT_NUM/HLSK$CPN_NUM) HLSK %>% tabyl(CPN_NUM, RECRUIT_NUM)%>% adorn_totals(c("row","col")) ############################################################### # TABLE 2A # ############################################################### zero<-PATH[which(PATH$RECRUIT_NUM==0),] one<-PATH[which(PATH$RECRUIT_NUM==1),] two<-PATH[which(PATH$RECRUIT_NUM==2),] three<-PATH[which(PATH$RECRUIT_NUM==3),] table_2a<-cbind( rbind(mean(zero$D_TOTAL_STD, na.rm=T), mean(zero$D_TOTAL_PRIME, na.rm=T), mean(zero$D_TOTAL_PAT, na.rm=T), mean(zero$D_INTERACT, na.rm=T), mean(zero$D_CLOSE, na.rm=T)), rbind(se(zero$D_TOTAL_STD), se(zero$D_TOTAL_PRIME), se(zero$D_TOTAL_PAT), se(zero$D_INTERACT), se(zero$D_CLOSE)), rbind(mean(one$D_TOTAL_STD, na.rm=T), mean(one$D_TOTAL_PRIME, na.rm=T), mean(one$D_TOTAL_PAT, na.rm=T), mean(one$D_INTERACT, na.rm=T), mean(one$D_CLOSE, na.rm=T)), rbind(se(one$D_TOTAL_STD), se(one$D_TOTAL_PRIME), se(one$D_TOTAL_PAT), se(one$D_INTERACT), se(one$D_CLOSE)), rbind(mean(two$D_TOTAL_STD, na.rm=T), mean(two$D_TOTAL_PRIME, na.rm=T), mean(two$D_TOTAL_PAT, na.rm=T), mean(two$D_INTERACT, na.rm=T), mean(two$D_CLOSE, na.rm=T)), rbind(se(two$D_TOTAL_STD), se(two$D_TOTAL_PRIME), se(two$D_TOTAL_PAT), se(two$D_INTERACT), se(two$D_CLOSE)), rbind(mean(three$D_TOTAL_STD, na.rm=T), mean(three$D_TOTAL_PRIME, na.rm=T), mean(three$D_TOTAL_PAT, na.rm=T), mean(three$D_INTERACT, na.rm=T), mean(three$D_CLOSE, na.rm=T)), rbind(se(three$D_TOTAL_STD), se(three$D_TOTAL_PRIME), se(three$D_TOTAL_PAT), se(three$D_INTERACT), se(three$D_CLOSE))) round(table_2a,1) ############################################################### # TABLE 2B # ############################################################### zero<-HLSK[which(HLSK$RECRUIT_NUM==0),] one<-HLSK[which(HLSK$RECRUIT_NUM==1),] two<-HLSK[which(HLSK$RECRUIT_NUM==2),] table_2b<-cbind( #Mean of recruit counts rbind(mean(zero$D_TOTAL_WOOUT, na.rm=T), mean(zero$D_TOTAL_OH_WOOUT, na.rm=T), mean(zero$D_INTERACT_WOOUT, na.rm=T), mean(zero$D_CLOSE_WOOUT, na.rm=T), mean(zero$D_FAM_WOOUT, na.rm=T)), rbind(se(zero$D_TOTAL_WOOUT), se(zero$D_TOTAL_OH_WOOUT), se(zero$D_INTERACT_WOOUT), se(zero$D_CLOSE_WOOUT), se(zero$D_FAM_WOOUT)), rbind(mean(one$D_TOTAL_WOOUT, na.rm=T), mean(one$D_TOTAL_OH_WOOUT, na.rm=T), mean(one$D_INTERACT_WOOUT, na.rm=T), mean(one$D_CLOSE_WOOUT, na.rm=T), mean(one$D_FAM_WOOUT, na.rm=T)), rbind(se(one$D_TOTAL_WOOUT), se(one$D_TOTAL_OH_WOOUT), se(one$D_INTERACT_WOOUT), se(one$D_CLOSE_WOOUT), se(one$D_FAM_WOOUT)), rbind(mean(two$D_TOTAL_WOOUT, na.rm=T), mean(two$D_TOTAL_OH_WOOUT, na.rm=T), mean(two$D_INTERACT_WOOUT, na.rm=T), mean(two$D_CLOSE_WOOUT, na.rm=T), mean(two$D_FAM_WOOUT, na.rm=T)), rbind(se(two$D_TOTAL_WOOUT), se(two$D_TOTAL_OH_WOOUT), se(two$D_INTERACT_WOOUT), se(two$D_CLOSE_WOOUT), se(two$D_FAM_WOOUT))) round(table_2b,1) ############################################################### # TABLE 3A # ############################################################### PATH_m<-na.omit( PATH[,c("SUCC","FAIL","CPN_NUM", "Agecat3","Male","NHWhite", "Educ","Employed","IncomeLE20", "LivingAlone","Location2", "LOG_D_TOTAL_STD","LOG_D_TOTAL_PRIME","LOG_D_TOTAL_PAT", "LOG_D_INTERACT","LOG_D_CLOSE")]) m_null <- glm(cbind(SUCC,FAIL) ~ factor(CPN_NUM)+factor(Agecat3)+Male+NHWhite+ LivingAlone+factor(Educ)+Employed+factor(IncomeLE20)+ factor(Location2), family="quasibinomial", data=PATH_m) m_D_TOTAL_STD <- glm(cbind(SUCC,FAIL) ~ factor(CPN_NUM)+factor(Agecat3)+Male+NHWhite+ LivingAlone+factor(Educ)+Employed+factor(IncomeLE20)+ factor(Location2)+ scale(LOG_D_TOTAL_STD), family="quasibinomial", data=PATH_m) m_D_TOTAL_PRIME <- glm(cbind(SUCC,FAIL) ~ factor(CPN_NUM)+factor(Agecat3)+Male+NHWhite+ LivingAlone+factor(Educ)+Employed+factor(IncomeLE20)+ factor(Location2)+ scale(LOG_D_TOTAL_PRIME), family="quasibinomial", data=PATH_m) m_D_TOTAL_PAT <- glm(cbind(SUCC,FAIL) ~ factor(CPN_NUM)+factor(Agecat3)+Male+NHWhite+ LivingAlone+factor(Educ)+Employed+factor(IncomeLE20)+ factor(Location2)+ scale(LOG_D_TOTAL_PAT), family="quasibinomial", data=PATH_m) m_D_INTERACT <- glm(cbind(SUCC,FAIL) ~ factor(CPN_NUM)+factor(Agecat3)+Male+NHWhite+ LivingAlone+factor(Educ)+Employed+factor(IncomeLE20)+ factor(Location2)+ scale(LOG_D_INTERACT), family="quasibinomial", data=PATH_m) m_D_CLOSE <- glm(cbind(SUCC,FAIL) ~ factor(CPN_NUM)+factor(Agecat3)+Male+NHWhite+ LivingAlone+factor(Educ)+Employed+factor(IncomeLE20)+ factor(Location2)+ scale(LOG_D_CLOSE), family="quasibinomial", data=PATH_m) summary(m_null) table_3a<-cbind( rbind(m_null$deviance, m_D_TOTAL_STD$deviance-m_null$deviance, m_D_TOTAL_PRIME$deviance-m_null$deviance, m_D_TOTAL_PAT$deviance-m_null$deviance, m_D_INTERACT$deviance-m_null$deviance, m_D_CLOSE$deviance-m_null$deviance), rbind(NA, round(coef(summary(m_D_TOTAL_STD))[14,1],3), round(coef(summary(m_D_TOTAL_PRIME))[14,1],3), round(coef(summary(m_D_TOTAL_PAT))[14,1],3), round(coef(summary(m_D_INTERACT))[14,1],3), round(coef(summary(m_D_CLOSE))[14,1],3)), rbind(NA, round(coef(summary(m_D_TOTAL_STD))[14,2],3), round(coef(summary(m_D_TOTAL_PRIME))[14,2],3), round(coef(summary(m_D_TOTAL_PAT))[14,2],3), round(coef(summary(m_D_INTERACT))[14,2],3), round(coef(summary(m_D_CLOSE))[14,2],3)), rbind(NA, round(coef(summary(m_D_TOTAL_STD))[14,4],3), round(coef(summary(m_D_TOTAL_PRIME))[14,4],3), round(coef(summary(m_D_TOTAL_PAT))[14,4],3), round(coef(summary(m_D_INTERACT))[14,4],3), round(coef(summary(m_D_CLOSE))[14,4],3))) round(table_3a,3) ############################################################### # TABLE 3B # ############################################################### HLSK_m<-na.omit( HLSK[,c("SUCC","FAIL","Agecat","Male","Married", "Educ","Employed","IncomeMT50","Language","Site","Identity", "D_TOTAL_LOG","D_TOTAL_OH_LOG","D_FAM_LOG", "D_INTERACT_LOG","D_CLOSE_LOG")]) m_null<-glm (cbind(SUCC,FAIL)~factor(Agecat)+factor(Male)+factor(Married)+ factor(Educ)+factor(Employed)+factor(IncomeMT50)+factor(Language)+factor(Site)+factor(Identity), HLSK_m, family=quasibinomial) m_D_TOTAL_LOG<-glm (cbind(SUCC,FAIL)~factor(Agecat)+factor(Male)+factor(Married)+ factor(Educ)+factor(Employed)+factor(IncomeMT50)+factor(Language)+factor(Site)+factor(Identity)+ scale(D_TOTAL_LOG), HLSK_m, family=quasibinomial) m_D_TOTAL_OH_LOG<-glm (cbind(SUCC,FAIL)~factor(Agecat)+factor(Male)+factor(Married)+ factor(Educ)+factor(Employed)+factor(IncomeMT50)+factor(Language)+factor(Site)+factor(Identity)+ scale(D_TOTAL_OH_LOG), HLSK_m, family=quasibinomial) m_D_INTERACT_LOG<-glm (cbind(SUCC,FAIL)~factor(Agecat)+factor(Male)+factor(Married)+ factor(Educ)+factor(Employed)+factor(IncomeMT50)+factor(Language)+factor(Site)+factor(Identity)+ scale(D_INTERACT_LOG), HLSK_m, family=quasibinomial) m_D_CLOSE_LOG<-glm (cbind(SUCC,FAIL)~factor(Agecat)+factor(Male)+factor(Married)+ factor(Educ)+factor(Employed)+factor(IncomeMT50)+factor(Language)+factor(Site)+factor(Identity)+ scale(D_CLOSE_LOG), HLSK_m, family=quasibinomial) m_D_FAM_LOG<-glm (cbind(SUCC,FAIL)~factor(Agecat)+factor(Male)+factor(Married)+ factor(Educ)+factor(Employed)+factor(IncomeMT50)+factor(Language)+factor(Site)+factor(Identity)+ scale(D_FAM_LOG), HLSK_m, family=quasibinomial) summary(m_null) table_3b<-cbind( rbind(m_null$deviance, m_D_TOTAL_LOG$deviance-m_null$deviance, m_D_TOTAL_OH_LOG$deviance-m_null$deviance, m_D_INTERACT_LOG$deviance-m_null$deviance, m_D_CLOSE_LOG$deviance-m_null$deviance, m_D_FAM_LOG$deviance-m_null$deviance), rbind(NA, round(coef(summary(m_D_TOTAL_LOG))[16,1],3), round(coef(summary(m_D_TOTAL_OH_LOG))[16,1],3), round(coef(summary(m_D_INTERACT_LOG))[16,1],3), round(coef(summary(m_D_CLOSE_LOG))[16,1],3), round(coef(summary(m_D_FAM_LOG))[16,1],3)), rbind(NA, round(coef(summary(m_D_TOTAL_LOG))[16,2],3), round(coef(summary(m_D_TOTAL_OH_LOG))[16,2],3), round(coef(summary(m_D_INTERACT_LOG))[16,2],3), round(coef(summary(m_D_CLOSE_LOG))[16,2],3), round(coef(summary(m_D_FAM_LOG))[16,2],3)), rbind(NA, round(coef(summary(m_D_TOTAL_LOG))[16,4],3), round(coef(summary(m_D_TOTAL_OH_LOG))[16,4],3), round(coef(summary(m_D_INTERACT_LOG))[16,4],3), round(coef(summary(m_D_CLOSE_LOG))[16,4],3), round(coef(summary(m_D_FAM_LOG))[16,4],3))) round(table_3a,3) ############################################################### # TABLE 3C # ############################################################### HLSK_bi<-na.omit( HLSK[,c("SUCC","FAIL","Agecat","Male","Married", "Educ","Employed","IncomeMT50","Language","Site","Identity", "D_TOTAL_LOG","D_TOTAL_OH_LOG","D_FAM_LOG", "D_INTERACT_LOG","D_CLOSE_LOG", "InterviewLanguage")]) # ENGLISH INTERVIEWS e_null<-glm (cbind(SUCC,FAIL)~factor(Agecat)+factor(Male)+factor(Married)+ factor(Educ)+factor(Employed)+factor(IncomeMT50)+factor(Site)+factor(Identity), subset(HLSK_bi, Language==1), family=quasibinomial) e_D_TOTAL_LOG<-glm (cbind(SUCC,FAIL)~factor(Agecat)+factor(Male)+factor(Married)+ factor(Educ)+factor(Employed)+factor(IncomeMT50)+factor(Site)+factor(Identity)+ scale(D_TOTAL_LOG), subset(HLSK_bi, Language==1), family=quasibinomial) e_D_FAM_LOG<-glm (cbind(SUCC,FAIL)~factor(Agecat)+factor(Male)+factor(Married)+ factor(Educ)+factor(Employed)+factor(IncomeMT50)+factor(Site)+factor(Identity)+ scale(D_FAM_LOG), subset(HLSK_bi, Language==1), family=quasibinomial) e_D_TOTAL_OH_LOG<-glm (cbind(SUCC,FAIL)~factor(Agecat)+factor(Male)+factor(Married)+ factor(Educ)+factor(Employed)+factor(IncomeMT50)+factor(Site)+factor(Identity)+ scale(D_TOTAL_OH_LOG), subset(HLSK_bi, Language==1), family=quasibinomial) e_D_INTERACT_LOG<-glm (cbind(SUCC,FAIL)~factor(Agecat)+factor(Male)+factor(Married)+ factor(Educ)+factor(Employed)+factor(IncomeMT50)+factor(Site)+factor(Identity)+ scale(D_INTERACT_LOG), subset(HLSK_bi, Language==1), family=quasibinomial) e_D_CLOSE_LOG<-glm (cbind(SUCC,FAIL)~factor(Agecat)+factor(Male)+factor(Married)+ factor(Educ)+factor(Employed)+factor(IncomeMT50)+factor(Site)+factor(Identity)+ scale(D_CLOSE_LOG), subset(HLSK_bi, Language==1), family=quasibinomial) summary(e_null) table_3c_EN<-cbind( rbind(e_null$deviance, e_D_TOTAL_LOG$deviance-e_null$deviance, e_D_TOTAL_OH_LOG$deviance-e_null$deviance, e_D_INTERACT_LOG$deviance-e_null$deviance, e_D_CLOSE_LOG$deviance-e_null$deviance, e_D_FAM_LOG$deviance-e_null$deviance), rbind(NA, round(coef(summary(e_D_TOTAL_LOG))[15,1],3), round(coef(summary(e_D_TOTAL_OH_LOG))[15,1],3), round(coef(summary(e_D_INTERACT_LOG))[15,1],3), round(coef(summary(e_D_CLOSE_LOG))[15,1],3), round(coef(summary(e_D_FAM_LOG))[15,1],3)), rbind(NA, round(coef(summary(e_D_TOTAL_LOG))[15,2],3), round(coef(summary(e_D_TOTAL_OH_LOG))[15,2],3), round(coef(summary(e_D_INTERACT_LOG))[15,2],3), round(coef(summary(e_D_CLOSE_LOG))[15,2],3), round(coef(summary(e_D_FAM_LOG))[15,2],3)), rbind(NA, round(coef(summary(e_D_TOTAL_LOG))[15,4],3), round(coef(summary(e_D_TOTAL_OH_LOG))[15,4],3), round(coef(summary(e_D_INTERACT_LOG))[15,4],3), round(coef(summary(e_D_CLOSE_LOG))[15,4],3), round(coef(summary(e_D_FAM_LOG))[15,4],3))) round(table_3c_EN,3) # KOREAN INTERVIEWS k_null<-glm (cbind(SUCC,FAIL)~factor(Agecat)+factor(Male)+factor(Married)+ factor(Educ)+factor(Employed)+factor(IncomeMT50)+factor(Site)+factor(Identity), subset(HLSK_bi, Language==0), family=quasibinomial) k_D_TOTAL_LOG<-glm (cbind(SUCC,FAIL)~factor(Agecat)+factor(Male)+factor(Married)+ factor(Educ)+factor(Employed)+factor(IncomeMT50)+factor(Site)+factor(Identity)+ scale(D_TOTAL_LOG), subset(HLSK_bi, Language==0), family=quasibinomial) k_D_FAM_LOG<-glm (cbind(SUCC,FAIL)~factor(Agecat)+factor(Male)+factor(Married)+ factor(Educ)+factor(Employed)+factor(IncomeMT50)+factor(Site)+factor(Identity)+ scale(D_FAM_LOG), subset(HLSK_bi, Language==0), family=quasibinomial) k_D_TOTAL_OH_LOG<-glm (cbind(SUCC,FAIL)~factor(Agecat)+factor(Male)+factor(Married)+ factor(Educ)+factor(Employed)+factor(IncomeMT50)+factor(Site)+factor(Identity)+ scale(D_TOTAL_OH_LOG), subset(HLSK_bi, Language==0), family=quasibinomial) k_D_INTERACT_LOG<-glm (cbind(SUCC,FAIL)~factor(Agecat)+factor(Male)+factor(Married)+ factor(Educ)+factor(Employed)+factor(IncomeMT50)+factor(Site)+factor(Identity)+ scale(D_INTERACT_LOG), subset(HLSK_bi, Language==0), family=quasibinomial) k_D_CLOSE_LOG<-glm (cbind(SUCC,FAIL)~factor(Agecat)+factor(Male)+factor(Married)+ factor(Educ)+factor(Employed)+factor(IncomeMT50)+factor(Site)+factor(Identity)+ scale(D_CLOSE_LOG), subset(HLSK_bi, Language==0), family=quasibinomial) summary(k_null) table_3c_KR<-cbind( rbind(k_null$deviance, k_D_TOTAL_LOG$deviance-k_null$deviance, k_D_TOTAL_OH_LOG$deviance-k_null$deviance, k_D_INTERACT_LOG$deviance-k_null$deviance, k_D_CLOSE_LOG$deviance-k_null$deviance, k_D_FAM_LOG$deviance-k_null$deviance), rbind(NA, round(coef(summary(k_D_TOTAL_LOG))[15,1],3), round(coef(summary(k_D_TOTAL_OH_LOG))[15,1],3), round(coef(summary(k_D_INTERACT_LOG))[15,1],3), round(coef(summary(k_D_CLOSE_LOG))[15,1],3), round(coef(summary(k_D_FAM_LOG))[15,1],3)), rbind(NA, round(coef(summary(k_D_TOTAL_LOG))[15,2],3), round(coef(summary(k_D_TOTAL_OH_LOG))[15,2],3), round(coef(summary(k_D_INTERACT_LOG))[15,2],3), round(coef(summary(k_D_CLOSE_LOG))[15,2],3), round(coef(summary(k_D_FAM_LOG))[15,2],3)), rbind(NA, round(coef(summary(k_D_TOTAL_LOG))[15,4],3), round(coef(summary(k_D_TOTAL_OH_LOG))[15,4],3), round(coef(summary(k_D_INTERACT_LOG))[15,4],3), round(coef(summary(k_D_CLOSE_LOG))[15,4],3), round(coef(summary(k_D_FAM_LOG))[15,4],3))) round(table_3c_KR,3) ############################################################### # ANALYSIS STEP 4: # # Population Inference # ############################################################### ############################################################### # TABLE 4 ############################################################### # ACS dim(ACS) dACS= svrepdesign(data=ACS, weights=ACS$PWGTP, repweights=ACS[,31:110], type="BRR", combined.weights=TRUE) table_4_acs<-round(svymean(~Age18_29+Male+Married+Educ_COLL+IncomeMT50+Employed, dACS,na.rm=T),3) table_4_acs # HLSK HLSK$RECRUITER_ID <- as.character(Find_Rec_ID(HLSK)) # Recode SEEDs RECRUITER number to 1 HLSK$RECRUITER_ID[HLSK$SEED==1]<--1 HLSK$RECRUITER_ID[is.na(HLSK$RECRUITER_ID)==T]<--1 HLSK_R_TOTAL <- as.rds.data.frame(HLSK, id = "UNIQUE_NUM", recruiter.id = "RECRUITER_ID", network.size = "D_TOTAL_WOOUT", population.size = 1000000, max.coupons = 2) HLSK_R_TOTAL_OH <- as.rds.data.frame(HLSK, id = "UNIQUE_NUM", recruiter.id = "RECRUITER_ID", network.size = "D_TOTAL_OH_WOOUT", population.size = 1000000, max.coupons = 2) HLSK_R_INTERACT <- as.rds.data.frame(HLSK, id = "UNIQUE_NUM", recruiter.id = "RECRUITER_ID", network.size = "D_INTERACT_WOOUT", population.size = 1000000, max.coupons = 2) HLSK_R_CLOSE <- as.rds.data.frame(HLSK, id = "UNIQUE_NUM", recruiter.id = "RECRUITER_ID", network.size = "D_CLOSE_WOOUT", population.size = 1000000, max.coupons = 2) HLSK_R_FAM <- as.rds.data.frame(HLSK, id = "UNIQUE_NUM", recruiter.id = "RECRUITER_ID", network.size = "D_FAM_WOOUT", population.size = 1000000, max.coupons = 2) varlist<-c("Age18_29","Male","Married","Edu_COLL","IncomeMT50","Employed") #Note to users: "RDS.bootstrap.intervals" takes a bit of time set.seed(1234) TOTAL_rds2_bs_mean = c() TOTAL_rds2_bs_se = c() for(v in 1:length(varlist)){ temp<-RDS.bootstrap.intervals(HLSK_R_TOTAL,outcome.variable=varlist[v], weight.type="RDS-II", number.of.bootstrap.samples=500, N=158242) TOTAL_rds2_bs_mean[v] = ifelse(is.null(temp),0,temp[[2]][1]) TOTAL_rds2_bs_se[v] = ifelse(is.null(temp),0,temp[[2]][5]) } TOTAL_OH_rds2_bs_mean = c() TOTAL_OH_rds2_bs_se = c() for(v in 1:length(varlist)){ temp<-RDS.bootstrap.intervals(HLSK_R_TOTAL_OH,outcome.variable=varlist[v], weight.type="RDS-II", number.of.bootstrap.samples=500, N=158242) TOTAL_OH_rds2_bs_mean[v] = ifelse(is.null(temp),0,temp[[2]][1]) TOTAL_OH_rds2_bs_se[v] = ifelse(is.null(temp),0,temp[[2]][5]) } INTERACT_rds2_bs_mean = c() INTERACT_rds2_bs_se = c() for(v in 1:length(varlist)){ temp<-RDS.bootstrap.intervals(HLSK_R_INTERACT,outcome.variable=varlist[v], weight.type="RDS-II", number.of.bootstrap.samples=500, N=158242) INTERACT_rds2_bs_mean[v] = ifelse(is.null(temp),0,temp[[2]][1]) INTERACT_rds2_bs_se[v] = ifelse(is.null(temp),0,temp[[2]][5]) } CLOSE_rds2_bs_mean = c() CLOSE_rds2_bs_se = c() for(v in 1:length(varlist)){ temp<-RDS.bootstrap.intervals(HLSK_R_CLOSE,outcome.variable=varlist[v], weight.type="RDS-II", number.of.bootstrap.samples=500, N=158242) CLOSE_rds2_bs_mean[v] = ifelse(is.null(temp),0,temp[[2]][1]) CLOSE_rds2_bs_se[v] = ifelse(is.null(temp),0,temp[[2]][5]) } FAM_rds2_bs_mean = c() FAM_rds2_bs_se = c() for(v in 1:length(varlist)){ temp<-RDS.bootstrap.intervals(HLSK_R_FAM,outcome.variable=varlist[v], weight.type="RDS-II", number.of.bootstrap.samples=500, N=158242) FAM_rds2_bs_mean[v] = ifelse(is.null(temp),0,temp[[2]][1]) FAM_rds2_bs_se[v] = ifelse(is.null(temp),0,temp[[2]][5]) } Uwt_mean<-HLSK%>% select(varlist)%>% summarise(across(where(is.numeric), ~ mn(.x))) Uwt_se<-HLSK%>% select(varlist)%>% summarise(across(where(is.numeric), ~ se(.x))) table_4_hlsk<-round(cbind(rbind(Uwt_mean, TOTAL_rds2_bs_mean, TOTAL_OH_rds2_bs_mean, INTERACT_rds2_bs_mean, CLOSE_rds2_bs_mean, FAM_rds2_bs_mean), rbind(Uwt_se, TOTAL_rds2_bs_se, TOTAL_OH_rds2_bs_se, INTERACT_rds2_bs_se, CLOSE_rds2_bs_se, FAM_rds2_bs_se)),3) table_4_hlsk ############################################################### # APPENDIX 1 # ############################################################### options(scipen = 999) D_TOTAL_h<-ggplot(HLSK, aes(x=D_TOTAL)) + geom_histogram(color="navy", fill="navy", binwidth=1000)+ ylim(0, 100)+ geom_vline(aes(xintercept=mean(D_TOTAL, na.rm=T)), color="red", linetype="dashed", size=1)+ geom_text(aes(x = mean(D_TOTAL, na.rm=T)+40000, y =90, label = paste0("Mean: ", round(mean(D_TOTAL, na.rm=T), 2))), size = 4, color="black", fontface="italic", hjust = 0)+ geom_text(aes(x = mean(D_TOTAL, na.rm=T)+40000, y=80, label=paste0("SD: ", round(sd(D_TOTAL, na.rm=T),2)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_TOTAL, na.rm=T)+40000, y=70, label=paste0("CV: ", round(sd(D_TOTAL, na.rm=T)/ mean(D_TOTAL, na.rm=T),2)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_TOTAL, na.rm=T)+40000, y=60, label=paste0("Median: ", round(median(D_TOTAL, na.rm=T),1)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_TOTAL, na.rm=T)+40000, y=50, label=paste0("Maximum: ", round(max(D_TOTAL, na.rm=T),1)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_TOTAL, na.rm=T)+40000, y=40, label="98th Percentile: 500", fontface ="italic"), size = 4, color="black", hjust = 0)+ labs(x="Degree: Total\nwith Outliers\n", y="Sample Size")+ scale_x_continuous(labels = scales::scientific) D_TOTAL_OH_h<-ggplot(HLSK, aes(x=D_TOTAL_OH)) + geom_histogram(color="navy", fill="navy", binwidth=3000)+ ylim(0, 100)+ geom_vline(aes(xintercept=mean(D_TOTAL_OH, na.rm=T)), color="red", linetype="dashed", size=1)+ geom_text(aes(x = mean(D_TOTAL_OH, na.rm=T)+400000, y =90, label = paste0("Mean: ", round(mean(D_TOTAL_OH, na.rm=T), 2))), size = 4, color="black", fontface="italic", hjust = 0)+ geom_text(aes(x = mean(D_TOTAL_OH, na.rm=T)+400000, y=80, label=paste0("SD: ", round(sd(D_TOTAL_OH, na.rm=T),2)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_TOTAL_OH, na.rm=T)+400000, y=70, label=paste0("CV: ", round(sd(D_TOTAL_OH, na.rm=T)/ mean(D_TOTAL_OH, na.rm=T),2)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_TOTAL_OH, na.rm=T)+400000, y=60, label=paste0("Median: ", round(median(D_TOTAL_OH, na.rm=T),1)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_TOTAL_OH, na.rm=T)+400000, y=50, label=paste0("Maximum: ", round(max(D_TOTAL_OH, na.rm=T),1)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_TOTAL_OH, na.rm=T)+400000, y=40, label="98th Percentile: 31", fontface ="italic"), size = 4, color="black", hjust = 0)+ labs(x="Degree: Total Scale-Up\nLast Name Oh\nwith Outliers", y="Sample Size")+ scale_x_continuous(labels = scales::scientific) D_INTERACT_h<-ggplot(HLSK, aes(x=D_INTERACT)) + geom_histogram(color="navy", fill="navy", binwidth=1000)+ ylim(0, 100)+ geom_vline(aes(xintercept=mean(D_INTERACT, na.rm=T)), color="red", linetype="dashed", size=1)+ geom_text(aes(x = mean(D_INTERACT, na.rm=T)+25000, y =90, label = paste0("Mean: ", round(mean(D_INTERACT, na.rm=T), 2))), size = 4, color="black", fontface="italic", hjust = 0)+ geom_text(aes(x = mean(D_INTERACT, na.rm=T)+25000, y=80, label=paste0("SD: ", round(sd(D_INTERACT, na.rm=T),2)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_INTERACT, na.rm=T)+25000, y=70, label=paste0("CV: ", round(sd(D_INTERACT, na.rm=T)/ mean(D_INTERACT, na.rm=T),2)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_INTERACT, na.rm=T)+25000, y=60, label=paste0("Median: ", round(median(D_INTERACT, na.rm=T),1)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_INTERACT, na.rm=T)+25000, y=50, label=paste0("Maximum: ", round(max(D_INTERACT, na.rm=T),1)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_INTERACT, na.rm=T)+25000, y=40, label="98th Percentile: 50", fontface ="italic"), size = 4, color="black", hjust = 0)+ labs(x="Degree: Interact\n>Once/Week\nwith Outliers", y="Sample Size")+ scale_x_continuous(labels = scales::scientific, breaks=seq(0,250000,100000)) D_CLOSE_h<-ggplot(HLSK, aes(x=D_CLOSE)) + geom_histogram(color="navy", fill="navy", binwidth=1)+ ylim(0, 100)+ geom_vline(aes(xintercept=mean(D_CLOSE, na.rm=T)), color="red", linetype="dashed", size=1)+ geom_text(aes(x = mean(D_CLOSE, na.rm=T)+10, y =90, label = paste0("Mean: ", round(mean(D_CLOSE, na.rm=T), 2))), size = 4, color="black", fontface="italic", hjust = 0)+ geom_text(aes(x = mean(D_CLOSE, na.rm=T)+10, y=80, label=paste0("SD: ", round(sd(D_CLOSE, na.rm=T),2)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_CLOSE, na.rm=T)+10, y=70, label=paste0("CV: ", round(sd(D_CLOSE, na.rm=T)/ mean(D_CLOSE, na.rm=T),2)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_CLOSE, na.rm=T)+10, y=60, label=paste0("Median: ", round(median(D_CLOSE, na.rm=T),1)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_CLOSE, na.rm=T)+10, y=50, label=paste0("Maximum: ", round(max(D_CLOSE, na.rm=T),1)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_CLOSE, na.rm=T)+10, y=40, label="98th Percentile: 20", fontface ="italic"), size = 4, color="black", hjust = 0)+ labs(x="Degree: Close\nRelationship\nwith Outliers", y="Sample Size")+ scale_x_continuous() D_FAM_h<-ggplot(HLSK, aes(x=D_FAM)) + geom_histogram(color="navy", fill="navy", binwidth=1000)+ ylim(0, 100)+ geom_vline(aes(xintercept=mean(D_FAM, na.rm=T)), color="red", linetype="dashed", size=1)+ geom_text(aes(x = mean(D_FAM, na.rm=T)+20000, y =90, label = paste0("Mean: ", round(mean(D_FAM, na.rm=T), 2))), size = 4, color="black", fontface="italic", hjust = 0)+ geom_text(aes(x = mean(D_FAM, na.rm=T)+20000, y=80, label=paste0("SD: ", round(sd(D_FAM, na.rm=T),2)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_FAM, na.rm=T)+20000, y=70, label=paste0("CV: ", round(sd(D_FAM, na.rm=T)/ mean(D_FAM, na.rm=T),2)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_FAM, na.rm=T)+20000, y=60, label=paste0("Median: ", round(median(D_FAM, na.rm=T),1)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_FAM, na.rm=T)+20000, y=50, label=paste0("Maximum: ", round(max(D_FAM, na.rm=T),1)), fontface ="italic"), size = 4, color="black", hjust = 0)+ geom_text(aes(x = mean(D_FAM, na.rm=T)+20000, y=40, label="98th Percentile: 19", fontface ="italic"), size = 4, color="black", hjust = 0)+ labs(x="Degree: Family\nwith Outliers\n", y="Sample Size")+ scale_x_continuous(labels = scales::scientific, breaks=seq(0,100000,30000)) figAppendix<-ggarrange(D_TOTAL_h, D_TOTAL_OH_h, blank, D_INTERACT_h, D_CLOSE_h, D_FAM_h, ncol=3, nrow=2, common.legend = TRUE) figAppendix #Saved as a pdf for a size7.5 x 5