A skeletal response to a twitter question:

“ANOVA (time point x group) or ANCOVA (group with time point as a covariate) for intervention designs? Discuss.”

follow-up “Only 2 time points in this case (pre- and post-intervention), and would wanna basically answer the question of whether out of the 3 intervention groups, some improve on measure X more than others after the intervention”

Here I compare five methods using fake pre-post data, including

1. lm-cov. A linear model with baseline as a covariate. This is “ANCOVA” in the ANOVA-world.
2. clda. constrained longitudinal data analysis (cLDA). A linear mixed model in which the intercept is constrained to be equal (no treatment effect at time 0).
3. rmanova. Repeated measures ANOVA.
4. lmm. A linear mixed model version of rmanova. A difference is the rmanova will use a sphericity correction.
5. two-way. Linear model with Treatment and Time and their interaction as fixed factors. Equivalent to a Two-way fixed effects ANOVA in ANOVA-world.

I use a simulation to compute Type I error rate and Power. This is far from a comprehensive simulation. Like I said, its skeletal.

TL;DR. At the risk of concluding anything from a very skeletal simulation:

• lm-cov has best performance in since of combining control of Type I error and relative power
• smaller SE in clda comes at a cost of relatively high Type I error but increased powers. Do with this what you will.
• rmanova has smaller power than lm-cov

Not included: I didn’t save the results necessary to plot type I error conditional on baseline difference, which is well controlled by lm-cov (and clda) but not by lmm or rmanova.

# libraries

library(data.table)
library(nlme)
library(emmeans)
library(afex)
library(ggpubr)
library(ggsci)
library(cowplot)

# simulation functions

## pre-post fake data generator

My general algorithm for creating pre-post data with a single post time point differs from how I create pre-post data with multiplee post time points.

generate_pre_post <- function(
n = 6, # if vector then sample per group
sigma_0 = 1, # error at time 0
sigma_1 = 1, # error at time 1
rho = 0.6, # correlation between time 0 and 1
beta_0 = 10, # control mean at time 0
beta_1 = 1, # treatment effect at time 1
beta_2 = 5 # time effect at time 1
){

if(length(n) == 1){n <- rep(n,2)} # set n = n for all groups
N <- sum(n)

fd <- data.table(
treatment = factor(rep(c("cn", "tr"), n))
)
X <- model.matrix(~treatment, data=fd)

z <- rnorm(N) # z is the common factor giving correlation at time 0 and 1. This
# is easier for me than making glucose_1 = glucose_0 + treatment + noise
fd[, glucose_0 := beta_0 + sqrt(rho)*z + sqrt(1 - rho)*rnorm(N, sd=sigma_0)]
fd[, glucose_1 := beta_0 + sqrt(rho)*z*sigma_1 + sqrt(1 - rho)*rnorm(N, sd=sigma_1) +
X[,2]*beta_1 + # add treatment effect
]
fd[, change := glucose_1 - glucose_0] # change score
fd[, id := factor(1:.N)]
return(fd)
}

## test FDG

fd <- generate_pre_post(
n = c(10^5, 10^5),
sigma_0 = 1,
sigma_1 = 2,
rho = 0.6,
beta_0 = 10,
beta_1 = 1
)

m1 <- lm(glucose_1 ~ treatment, data = fd)
fd[, glucose_1_cond := coef(m1)[1] + residuals(m1)]
coef(summary(m1))
##               Estimate  Std. Error   t value Pr(>|t|)
## (Intercept) 15.0086453 0.006318270 2375.4359        0
## treatmenttr  0.9873514 0.008935383  110.4991        0
cov(fd[, .SD, .SDcols=c("glucose_0", "glucose_1_cond")])
##                glucose_0 glucose_1_cond
## glucose_0        1.00213       1.197850
## glucose_1_cond   1.19785       3.992034
cor(fd[, .SD, .SDcols=c("glucose_0", "glucose_1_cond")])
##                glucose_0 glucose_1_cond
## glucose_0       1.000000       0.598885
## glucose_1_cond  0.598885       1.000000
coef(summary(lm(glucose_1 ~ glucose_0 + treatment, data=fd)))
##              Estimate  Std. Error   t value Pr(>|t|)
## (Intercept) 3.0529788 0.036105062  84.55819        0
## glucose_0   1.1953072 0.003574099 334.43596        0
## treatmenttr 0.9911134 0.007155790 138.50511        0

## fake data wide to long

wide_to_long <- function(
dt, #data.table,
measure_prefix = "glucose"
){
# measure _prefix is the prefix of the measurement columns that will be melted. A "_" between the prefix and a time value is assumed. So with measure vars like glucose_0, glucose_15, glucose_20, the measure prefix is "glucose"
dt <- data.table(dt)
dt_long <- melt(dt,
id.vars=c("treatment", "id"),
measure.vars=patterns(paste0(measure_prefix, "_")),
variable.name = "time",
value.name = measure_prefix)
return(dt_long)
}

# Fit model functions

## two-way anova

two_way <- function(dt_long){
fit <- lm(glucose ~ time*treatment, data=dt_long)
return(fit)
}

## repeated measures anova

rmanova <- function(dt_long){
fit <- aov_4(glucose ~ time*treatment + (time|id),
data=dt_long)
return(fit)
}

## linear model with baseline as covariate

lm_cov <- function(dt){
fit <- lm(glucose_1 ~ glucose_0 + treatment, data = dt)
return(fit)
}

## linear mixed model

lmm <- function(dt_long){
fit <- lme(glucose ~ time*treatment,
random = ~1|id,
data = dt_long,
weights = varIdent(form= ~ 1 | time),
correlation= corSymm(form=~ 1 | id))
return(fit)
}

## cLDA functions

Two equivalent functions for CLDA

### clda-1

clda1 <- function(dt_long){
design <- model.matrix( ~ time + treatment:time, data = dt_long)
# remove intercept column and effect of tr at time 0
X <- design[, -c(1, which(colnames(design) == "timeglucose_0:treatmenttr"))]
colnames(X)[2] <- "timeglucose_1_treatmenttr"
dt_long_x <- cbind(dt_long, X)
form <- formula(paste0("glucose ~ ", paste(colnames(X), collapse = " + ")))
fit <- gls(form,
data = dt_long_x,
weights = varIdent(form= ~ 1 | time),
correlation= corSymm(form=~ 1| id))
return(fit)
}

### clda-2

clda2 <- function(dt_long){
dt_clda <- copy(dt_long)
dt_clda[, time.treatment := ifelse(time != "glucose_0" & treatment=="tr", paste0(time, ":tr"), "cont")]
dt_clda[, time.treatment := factor(time.treatment, c("cont","glucose_1:tr"))]
fit <- gls(glucose ~ time + time.treatment,
data = dt_clda,
weights = varIdent(form= ~ 1 | time),
correlation= corSymm(form=~ 1| id))
return(fit)
}

## test functions

Some equivalents

1. cLDA is equivalent estimate to the linear model with baseline covariate but has a smaller standard error.
2. the two-way (fixed) ANOVA and lmm have equivalent estimates but SE is larger
3. the lmm and rmanova have the same p-value (assuming sphericity)
fd <- generate_pre_post(
n = 6,
sigma_0 = 1,
sigma_1 = 1,
rho = 0.6,
beta_0 = 10,
beta_1 = 1
)

fd_long <- wide_to_long(fd,
measure_prefix = "glucose"
)

m1 <- lm_cov(fd)
m2 <- clda1(fd_long)
m3 <- clda2(fd_long)
m4 <- lmm(fd_long)
m5 <- two_way(fd_long)
m6 <- rmanova(fd_long)

linear model with baseline covariate

coef(summary(m1))
##              Estimate Std. Error  t value   Pr(>|t|)
## (Intercept) 9.4423880  3.3526784 2.816372 0.02016613
## glucose_0   0.5799923  0.3490826 1.661476 0.13098439
## treatmenttr 0.7087883  0.4448445 1.593340 0.14554812

cLDA (both R methods)

coef(summary(m2))
##                               Value Std.Error   t-value      p-value
## (Intercept)               9.6663085 0.1921116 50.316108 2.223746e-23
## timeglucose_1             5.3824640 0.3053176 17.629063 4.601669e-14
## timeglucose_1_treatmenttr 0.7087883 0.4164331  1.702046 1.035060e-01
coef(summary(m3))
##                                Value Std.Error   t-value      p-value
## (Intercept)                9.6663085 0.1921116 50.316108 2.223746e-23
## timeglucose_1              5.3824640 0.3053176 17.629063 4.601669e-14
## time.treatmentglucose_1:tr 0.7087883 0.4164331  1.702046 1.035060e-01

two-way (fixed effect) linear model

coef(summary(m5))
##                            Estimate Std. Error    t value     Pr(>|t|)
## (Intercept)               9.5630065  0.3101326 30.8352174 2.449397e-18
## timeglucose_1             5.4258517  0.4385937 12.3710192 7.923275e-11
## treatmenttr               0.2066040  0.4385937  0.4710601 6.426952e-01
## timeglucose_1:treatmenttr 0.6220131  0.6202652  1.0028179 3.279282e-01

linear mixed model

coef(summary(m4))
##                               Value Std.Error DF    t-value      p-value
## (Intercept)               9.5630065 0.2811776 10 34.0105547 1.142426e-11
## timeglucose_1             5.4258517 0.3172619 10 17.1021224 9.855274e-09
## treatmenttr               0.2066040 0.3976452 10  0.5195688 6.146692e-01
## timeglucose_1:treatmenttr 0.6220131 0.4486761 10  1.3863299 1.957756e-01

repeated measures ANOVA

summary(m6)
##
## Univariate Type III Repeated-Measures ANOVA Assuming Sphericity
##
##                Sum Sq num Df Error SS den Df   F value    Pr(>F)
## (Intercept)    3770.9      1   8.5222     10 4424.7551 1.436e-14 ***
## treatment         1.6      1   8.5222     10    1.8863    0.1996
## time            197.5      1   3.0197     10  653.9468 1.919e-10 ***
## treatment:time    0.6      1   3.0197     10    1.9219    0.1958
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# simulations

## single post time point (“pre-post”)

simulate_pre_post <- function(
niter = 1000, # number of iterations
n = 6, # if vector then sample per group
sigma_0 = 1, # error at time 0
sigma_1 = 1, # error at time 1
rho = 0.6, # correlation between time 0 and 1
beta_0 = 10, # control mean at time 0
beta_1 = 1, # treatment effect at time 1
beta_2 = 5 # time effect at time 1
){

if(length(n) == 1){n <- rep(n,2)} # set n = n for all groups
method_list <- c("lm_cov", "clda", "lmm", "rmanova")
prob_wide <- data.table(matrix(-9999, nrow=niter, ncol=length(method_list)))
colnames(prob_wide) <- method_list
for(iter in 1:niter){
fd <- generate_pre_post(
n = n,
sigma_0 = sigma_0,
sigma_1 = sigma_1,
rho = rho,
beta_0 = beta_0,
beta_1 = beta_1
)

fd_long <- wide_to_long(fd,
measure_prefix = "glucose"
)

m1 <- lm_cov(fd)
m2 <- clda1(fd_long)
m3 <- clda2(fd_long)
m4 <- lmm(fd_long)
m5 <- two_way(fd_long)
m6 <- rmanova(fd_long)

prob_wide[iter, lm_cov := coef(summary(m1))["treatmenttr", "Pr(>|t|)"]]
prob_wide[iter, clda := coef(summary(m2))["timeglucose_1_treatmenttr", "p-value"]]
prob_wide[iter, lmm := coef(summary(m4))["timeglucose_1:treatmenttr", "p-value"]]
prob_wide[iter, rmanova := summary(m6)\$univariate.tests["treatment:time", "Pr(>F)"]]

}

prob <- melt(prob_wide,
id.vars = NULL,
measure.vars = method_list,
variable.name = "method",
value.name = "p_value")
return(prob)
}

## Summary statistics

p_summary <- function(prob, niter){
prob_summary <- prob[, .(freq_lt_05 = sum(p_value < 0.05)/max(niter)),
by = .(method)]
return(prob_summary)
}

## Plot:

plot_it <- function(prob){
jco_pal <- pal_jco()(6)
group_colors <- jco_pal[c(2,5,4,6)]
group_shapes <- c(15, 16, 17,  18)

pd <- position_dodge(0.8)
gg1 <- ggplot(data = prob,
aes(x=method, y=freq_lt_05, color=method, shape=method)) +
geom_point(position = pd) +
scale_color_manual(values = group_colors) +
scale_shape_manual(values = group_shapes) +
ylab("Frequency p < 0.05") +
xlab("") +
theme_pubr() +
theme(legend.position="none") +
guides(col = guide_legend(ncol = 5, byrow = FALSE)) +
NULL
return(gg1)
}

## Type I error

# parameters of simulation
niter = 1000 # number of iterations
n = 6 # if vector then sample per group
sigma_0 = 1 # error at time 0
sigma_1 = 1 # error at time 1
rho = 0.6 # correlation between time 0 and 1
beta_0 = 10 # control mean at time 0
beta_1 = 0 # treatment effect at time 1
beta_2 = 5 # time effect at time 1

prob_type1 <- simulate_pre_post(niter = niter,
n = n,
sigma_0 = sigma_0,
sigma_1 = sigma_1,
rho = rho,
beta_0 = beta_0,
beta_1 = beta_1,
beta_2 = beta_2)

prob_type1_sum <- p_summary(prob_type1, niter)

## Power

# parameters of simulation
niter = 1000 # number of iterations
n = 6 # if vector then sample per group
sigma_0 = 1 # error at time 0
sigma_1 = 1 # error at time 1
rho = 0.6 # correlation between time 0 and 1
beta_0 = 10 # control mean at time 0
beta_1 = 1 # treatment effect at time 1
beta_2 = 5 # time effect at time 1

prob_power <- simulate_pre_post(niter = niter,
n = n,
sigma_0 = sigma_0,
sigma_1 = sigma_1,
rho = rho,
beta_0 = beta_0,
beta_1 = beta_1,
beta_2 = beta_2)

prob_power_sum <- p_summary(prob_power, niter)

# Tables

## Type I error rates

table1 <- prob_type1_sum
colnames(table1)[2] <- "Type I error rate"
knitr::kable(table1)
method Type I error rate
lm_cov 0.055
clda 0.099
lmm 0.040
rmanova 0.040

## Power

table2 <- prob_power_sum
colnames(table2)[2] <- "Power"
knitr::kable(table2, digits = 2)
method Power
lm_cov 0.43
clda 0.57
lmm 0.41
rmanova 0.41

# Plots

gg_a <- plot_it(prob_type1_sum)
gg_b <- plot_it(prob_power_sum)
plot_grid(gg_a, gg_b, ncol=2)