Update – Fig. 2A is an analysis of the maximum endurance over three trials. This has consequences.

Figure 2A of the paper Meta-omics analysis of elite athletes identifies a performance-enhancing microbe that functions via lactate metabolism uses a paired t-test to compare endurance performance in mice treated with a control microbe (Lactobacillus bulgaricus) and a test microbe (Veillonella atypica) in a cross-over design (so each mouse was treated with both bacteria). The data are in the “Supplementary Tables” Excel file, which includes the raw data for the main paper.



bookdown_it <- TRUE
  data_path <- "../data"
  data_path <- "../content/data"


folder <- "Data from Meta-omics analysis of elite athletes identifies a performance-enhancing microbe that functions via lactate metabolism"
file_i <- "41591_2019_485_MOESM2_ESM.xlsx"
file_path <- paste(data_path, folder, file_i, sep="/")
sheet_i <- "ST3 Veillonella run times"
range_vec <- c("a5:d21", "f5:i21", "a24:d40", "f24:i40")
fig2a <- data.table(NULL)
poop <- c("Lb", "Va")
for(week_i in 1:4){
  range_i <- range_vec[week_i]
  if(week_i %in% c(1, 3)){
    treatment <- rep(c(poop[1], poop[2]), each=8)
    treatment <- rep(c(poop[2], poop[1]), each=8)
  fig2a <- rbind(fig2a, data.table(week=week_i, treatment=treatment, data.table(
    read_excel(file_path, sheet=sheet_i, range=range_i))))

# clean column names
setnames(fig2a, old="...1", new="ID")
setnames(fig2a, old=colnames(fig2a), new=clean_label(colnames(fig2a)))

# wide to long
fig2a <- melt(fig2a, id.vars=c("week", "ID", "treatment"),

Because of the cross-over design, the ID is not in order within each level of treatment. Make a wide data table with new treatment columns matched by ID.

# get max for each week x ID x treatment combo
fig2a.max <- fig2a[, .(time_max=max(time)), by=.(week, ID, treatment)]

# match the two treatments applied to each ID
va <- fig2a.max[treatment=="Va", ]
lb <- fig2a.max[treatment=="Lb", ]
fig2a_wide <- merge(va, lb, by="ID")


paired t-test

# paired t
res.t <- t.test(fig2a_wide[, time_max.x], fig2a_wide[, time_max.y], paired=TRUE)
##  Paired t-test
## data:  fig2a_wide[, time_max.x] and fig2a_wide[, time_max.y]
## t = 2.4117, df = 31, p-value = 0.02199
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   20.47859 244.89641
## sample estimates:
## mean of the differences 
##                132.6875

linear model

# as a linear model
y <- fig2a_wide[, time_max.x] - fig2a_wide[, time_max.y] # Va - Lb
res.lm <- lm(y ~ 1)
##             Estimate Std. Error  t value   Pr(>|t|)
## (Intercept) 132.6875   55.01749 2.411733 0.02198912

hierarchical (linear mixed) model with random intercept

# as multi-level model with random intercept
res.lmer <- lmer(time_max ~ treatment + (1|ID), data=fig2a.max)
##              Estimate Std. Error       df   t value     Pr(>|t|)
## (Intercept) 1023.7187   43.37421 59.71689 23.602014 5.703668e-32
## treatmentVa  132.6875   55.01752 30.99995  2.411732 2.198920e-02