## Data import and cleaning

sw10clean <- function(root) {
  
## load required packages
  
  library(dplyr)

### Data import
sw10 <- read.delim(file.path(root, "UKDA-5760-tab/tab/gus_bc1sw10_protect.tab"))
sw10 <- sw10 %>%
  dplyr::select(IDNumber,
                MjInterviewMode,
                contains("SDQ"),
                CjGenId,
                Djwtbrthf,
                Djpsu,
                Djstrata,
                DjMedu02,
                MjWmnf01,
                MjZhou05,
                MjRCidDp,
                MjMorg01,
                Djhgrsp04,
                MjZspe01
  )
sw9 <- read.delim(file.path(root, "UKDA-5760-tab/tab/gus_cohort1_sw9_protect.tab"))
sw9 <- sw9 %>%
  dplyr::select(IDNumber,
                MiZspe01)

## Merge sweep 9 biographical information
data <- merge(sw10, sw9, by = "IDNumber", all.x=TRUE)

## remove unnecessary data frames
rm(sw9, sw10)

# Subset to pre March 2020
data <- subset(data, MjInterviewMode==1)

# Rename case identifier
data <- rename(data, "Idnumber" = IDNumber)

# setting missing data to NA
data[data < 0] <- NA

# setting NA for answers of "4" (Don't know) for maternal SDQ responses
for (col in colnames(data)) {
  if (grepl("MjSDQ", col)) {
    boolean_mask = (data[col] == 4)
    data[col][boolean_mask] <- NA
  }
}

#renaming adolescent reported data so it matches parental reported
data <- rename(data,
               "CjSDQ25" = CjSDQwk, #"Finish my work"
               "CjSDQ24" = CjSDQfe, #"Have many fears"
               "CjSDQ23" = CjSDQgo, #"I get on better with adults than people my own age"
               "CjSDQ22" = CjSDQst, #"I take things that are not mine"
               "CjSDQ21" = CjSDQth, #"I think before I do things"
               "CjSDQ20" = CjSDQvo, #"I often volunteer to help others"
               "CjSDQ19" = CjSDQpb, #"Other children or young people pick on me"
               "CjSDQ18" = CjSDQly, #"I am often accused of lying or cheating"
               "CjSDQ17" = CjSDQki, #"I am kind to younger children"
               "CjSDQ16" = CjSDQne, #"I am nervous in new situations"
               "CjSDQ15" = CjSDQdi, #"I am easily distracted"
               "CjSDQ14" = CjSDQli, #"Other people my age generally like me"
               "CjSDQ13" = CjSDQun, #"I am often unhappy, down-hearted or tearful"
               "CjSDQ12" = CjSDQfg, #"I fight a lot. Make other people do what I want"
               "CjSDQ11" = CjSDQfr, #"I have one good friend or more"
               "CjSDQ10" = CjSDQfi, #"I fidget or squirm"
               "CjSDQ09" = CjSDQhe, #"I am helpful if someone is hurt, upset"
               "CjSDQ08" = CjSDQwo, #"I worry a lot"
               "CjSDQ07" = CjSDQto, #"I usually do as I am told"
               "CjSDQ06" = CjSDQal, #"I am usually on my own"
               "CjSDQ05" = CjSDQan, #"I get very angry and often lose my temper"
               "CjSDQ04" = CjSDQsh, #"I usually share with others"
               "CjSDQ03" = CjSDQac, #"I get a lot of headaches, stomach-aches"
               "CjSDQ02" = CjSDQrt, #"I am restless, I cannot stay still for long"
               "CjSDQ01" = CjSDQni, #"I try to be nice to people."
)


# Correcting reverse-scored items
data <- data %>%
  mutate(CjSDQ07 = case_when(CjSDQ07 == 1 ~ 3,
                             CjSDQ07 == 2 ~ 2,
                             CjSDQ07 == 3 ~ 1,
                             CjSDQ07 == NA ~ NA))
data <- data %>%
  mutate(CjSDQ11 = case_when(CjSDQ11 == 1 ~ 3,
                             CjSDQ11 == 2 ~ 2,
                             CjSDQ11 == 3 ~ 1,
                             CjSDQ11 == NA ~ NA))
data <- data %>%
  mutate(CjSDQ14 = case_when(CjSDQ14 == 1 ~ 3,
                             CjSDQ14 == 2 ~ 2,
                             CjSDQ14 == 3 ~ 1,
                             CjSDQ14 == NA ~ NA))
data <- data %>%
  mutate(CjSDQ21 = case_when(CjSDQ21 == 1 ~ 3,
                             CjSDQ21 == 2 ~ 2,
                             CjSDQ21 == 3 ~ 1,
                             CjSDQ21 == NA ~ NA))
data <- data %>%
  mutate(CjSDQ25 = case_when(CjSDQ25 == 1 ~ 3,
                             CjSDQ25 == 2 ~ 2,
                             CjSDQ25 == 3 ~ 1,
                             CjSDQ25 == NA ~ NA))

data <- data %>%
  mutate(MjSDQ07 = case_when(MjSDQ07 == 1 ~ 3,
                             MjSDQ07 == 2 ~ 2,
                             MjSDQ07 == 3 ~ 1,
                             MjSDQ07 == NA ~ NA))
data <- data %>%
  mutate(MjSDQ11 = case_when(MjSDQ11 == 1 ~ 3,
                             MjSDQ11 == 2 ~ 2,
                             MjSDQ11 == 3 ~ 1,
                             MjSDQ11 == NA ~ NA))
data <- data %>%
  mutate(MjSDQ14 = case_when(MjSDQ14 == 1 ~ 3,
                             MjSDQ14 == 2 ~ 2,
                             MjSDQ14 == 3 ~ 1,
                             MjSDQ14 == NA ~ NA))
data <- data %>%
  mutate(MjSDQ21 = case_when(MjSDQ21 == 1 ~ 3,
                             MjSDQ21 == 2 ~ 2,
                             MjSDQ21 == 3 ~ 1,
                             MjSDQ21 == NA ~ NA))
data <- data %>%
  mutate(MjSDQ25 = case_when(MjSDQ25 == 1 ~ 3,
                             MjSDQ25 == 2 ~ 2,
                             MjSDQ25 == 3 ~ 1,
                             MjSDQ25 == NA ~ NA))

# Predictive variable recodes

# Gender
data <- data %>%
  mutate(gender = case_when(CjGenId == 1 ~ 0,
                            CjGenId == 2 ~ 1,
                            CjGenId == 3 ~ NA,
                            CjGenId == NA ~ NA))

# Family composition
data <- data %>%
  mutate(family = case_when(Djhgrsp04 == 0 ~ 0,
                            Djhgrsp04 == 1 ~ 1,
                            Djhgrsp04 == NA ~ NA))

# Tenure
data <- data %>%
  mutate(tenancy = case_when(MjZhou05 <= 2 ~ 1,
                             MjZhou05 >2 & MjZhou05 <= 8 ~ 0,
                             MjZhou05 == 9 ~ NA,
                             MjZhou05 == NA ~ NA))

# Parent ever experienced several days feeling depressed
data <- data %>%
  mutate(depress = case_when(MjRCidDp == 1 ~ 0,
                             MjRCidDp == 2 ~ 1,
                             MjRCidDp == NA ~ NA))

# Language spoken in the home (combining sweep 9 + 10 for consistent report)
data <- data %>%
  mutate(lang = case_when(MjZspe01 == 1 ~ 1,
                          MjZspe01 == 2 ~ 0,
                          MjZspe01 == 3 ~ 0,
                          MiZspe01 == 1 ~ 1,
                          MiZspe01 == 2 ~ 0,
                          MiZspe01 == 3 ~ 0,
                          TRUE ~ NA))

# Subjective household financial stability
data <- data %>%
  mutate(finance = case_when(MjWmnf01 <= 2 ~ 1,
                             MjWmnf01 >= 3 ~ 0,
                             MjWmnf01 == NA ~ NA))

## Setting data frame for weighted analysis
data <- data[complete.cases(data[ , c('Djwtbrthf')]), ] # removes observations where weight is missing


## return cleaned data frame

return(data)


}