Paired t-test as a special case of linear model and hierarchical (linear mixed) model
Update – Fig. 2A is an analysis of the maximum endurance over three trials. This has consequences.
Figure 2A of the paper Meta-omics analysis of elite athletes identifies a performance-enhancing microbe that functions via lactate metabolism uses a paired t-test to compare endurance performance in mice treated with a control microbe (Lactobacillus bulgaricus) and a test microbe (Veillonella atypica) in a cross-over design (so each mouse was treated with both bacteria). The data are in the “Supplementary Tables” Excel file, which includes the raw data for the main paper.
Setup
library(ggplot2)
library(ggpubr)
library(readxl)
library(data.table)
library(lmerTest)
library(emmeans)
bookdown_it <- TRUE
if(bookdown_it==TRUE){
data_path <- "../data"
source("../../../R/clean_labels.R")
}else{
data_path <- "../content/data"
source("../../R/clean_labels.R")
}
Import
folder <- "Data from Meta-omics analysis of elite athletes identifies a performance-enhancing microbe that functions via lactate metabolism"
file_i <- "41591_2019_485_MOESM2_ESM.xlsx"
file_path <- paste(data_path, folder, file_i, sep="/")
sheet_i <- "ST3 Veillonella run times"
range_vec <- c("a5:d21", "f5:i21", "a24:d40", "f24:i40")
fig2a <- data.table(NULL)
poop <- c("Lb", "Va")
for(week_i in 1:4){
range_i <- range_vec[week_i]
if(week_i %in% c(1, 3)){
treatment <- rep(c(poop[1], poop[2]), each=8)
}else{
treatment <- rep(c(poop[2], poop[1]), each=8)
}
fig2a <- rbind(fig2a, data.table(week=week_i, treatment=treatment, data.table(
read_excel(file_path, sheet=sheet_i, range=range_i))))
}
# clean column names
setnames(fig2a, old="...1", new="ID")
setnames(fig2a, old=colnames(fig2a), new=clean_label(colnames(fig2a)))
# wide to long
fig2a <- melt(fig2a, id.vars=c("week", "ID", "treatment"),
variable.name="day",
value.name="time")
Because of the cross-over design, the ID is not in order within each level of treatment. Make a wide data table with new treatment columns matched by ID.
# get max for each week x ID x treatment combo
fig2a.max <- fig2a[, .(time_max=max(time)), by=.(week, ID, treatment)]
# match the two treatments applied to each ID
va <- fig2a.max[treatment=="Va", ]
lb <- fig2a.max[treatment=="Lb", ]
fig2a_wide <- merge(va, lb, by="ID")
Comparison
paired t-test
# paired t
res.t <- t.test(fig2a_wide[, time_max.x], fig2a_wide[, time_max.y], paired=TRUE)
res.t
##
## Paired t-test
##
## data: fig2a_wide[, time_max.x] and fig2a_wide[, time_max.y]
## t = 2.4117, df = 31, p-value = 0.02199
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 20.47859 244.89641
## sample estimates:
## mean of the differences
## 132.6875
linear model
# as a linear model
y <- fig2a_wide[, time_max.x] - fig2a_wide[, time_max.y] # Va - Lb
res.lm <- lm(y ~ 1)
coef(summary(res.lm))
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 132.6875 55.01749 2.411733 0.02198912
hierarchical (linear mixed) model with random intercept
# as multi-level model with random intercept
res.lmer <- lmer(time_max ~ treatment + (1|ID), data=fig2a.max)
coef(summary(res.lmer))
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1023.7187 43.37421 59.71689 23.602014 5.703668e-32
## treatmentVa 132.6875 55.01752 30.99995 2.411732 2.198920e-02