# Import libraries
library(tidyverse)
library(emmeans)

# Change working directory !!!
setwd("C:/Users/Sam/Documents/Rstats");

# Read the CSV data file
Dataframe01 <- read_csv("ApoE_simul_dataset_v1.csv")

# View Dataframe01
head(Dataframe01)

# z-score Age (zAge) and add it to the end of Dataframe01 as a new variable
# I am being verbose here as there is you can just use the "scale" function
Dataframe01$zAge <- 
  (Dataframe01$age - mean(Dataframe01$age, na.rm = TRUE)) / 
  sd(Dataframe01$age, na.rm = TRUE)

# Dummy code Sex (capital "S") and add it to the end of Dataframe01
Dataframe01$Sex <- Dataframe01$sex - 1

# Delete unused variables (age + sex) so that we don't make any mistakes
Dataframe01 = subset(Dataframe01, select=-c(age,sex))

# Rename variables for consistency
names(Dataframe01)[names(Dataframe01) == 'apoe'] <- 'ApoE'
names(Dataframe01)[names(Dataframe01) == 'score'] <- 'Score'

# Re-order the variables in Dataframe01
Dataframe01 <- Dataframe01[,c(1,4,5,2,3)]

# View Dataframe01 again.
head(Dataframe01)

# Run a simple linear regression model ("Score ~ 1 + zAge*Sex*ApoE")
# We will call this model "Glme01".
# Note that we cannot construct a mixed-effects model in this case because ...
# each participant (unique ID) only contributes one measurement to the ...
# data-set. In other words, a mixed-effects model is not needed as the study ...
# design is between-subjects.
Glme01 <- lm(formula = Score ~ 1 + zAge*Sex*ApoE)

# Display a summary of the model parameters.
summary(Glme01)

# As the "Sex" and "ApoE" variables are dummy coded ...
# we need to run extra contrasts to test the main effect of each ...
# (this situation can be avoided by z-scoring all the variables before ...
# estimating the model).
# First, we must pull out the model parameters ("B") and the parameter ...
# covariance matrix ("C").
B <- matrix(as.numeric(coefficients(Glme01)))
C <- matrix(as.numeric(vcov(Glme01)),8,8)

# Then we can compute a parameter contrast matrix ("H"), t-value ("t"), ...
# and p-value ("p") for each main effect.
H.Sex <- t((c(0,0,2,0,0,0,1,0)))
t.Sex <- (H.Sex %*% B) / sqrt(H.Sex %*% C %*% t(H.Sex))
p.Sex <- 2*pt(-abs(t.Sex), df = Glme01$df.residual)

H.ApoE <- t((c(0,0,0,2,0,0,1,0)))
t.ApoE <- (H.ApoE %*% B) / sqrt(H.ApoE %*% C %*% t(H.ApoE))
p.ApoE <- 2*pt(-abs(t.ApoE), df = Glme01$df.residual)
# The residual degrees of freedom are given by "Glme01$df.residual".

# If you wish, you can use the model to estimate "marginal means" for ...
# each group.
Means.Sex <- emmeans(Glme01, "Sex")
summary(Means.Sex)

Means.ApoE <- emmeans(Glme01, "ApoE")
summary(Means.ApoE)