Update – Fig. 2A is an analysis of the maximum endurance over three trials. This has consequences.

Figure 2A of the paper Meta-omics analysis of elite athletes identifies a performance-enhancing microbe that functions via lactate metabolism uses a paired t-test to compare endurance performance in mice treated with a control microbe (Lactobacillus bulgaricus) and a test microbe (Veillonella atypica) in a cross-over design (so each mouse was treated with both bacteria). The data are in the “Supplementary Tables” Excel file, which includes the raw data for the main paper.

Setup

library(ggplot2)
library(ggpubr)
library(readxl)
library(data.table)
library(lmerTest)
library(emmeans)

bookdown_it <- TRUE
if(bookdown_it==TRUE){
  data_path <- "../data"
  source("../../../R/clean_labels.R")
}else{
  data_path <- "../content/data"
  source("../../R/clean_labels.R")
}

Import

folder <- "Data from Meta-omics analysis of elite athletes identifies a performance-enhancing microbe that functions via lactate metabolism"
file_i <- "41591_2019_485_MOESM2_ESM.xlsx"
file_path <- paste(data_path, folder, file_i, sep="/")
sheet_i <- "ST3 Veillonella run times"
range_vec <- c("a5:d21", "f5:i21", "a24:d40", "f24:i40")
fig2a <- data.table(NULL)
poop <- c("Lb", "Va")
for(week_i in 1:4){
  range_i <- range_vec[week_i]
  if(week_i %in% c(1, 3)){
    treatment <- rep(c(poop[1], poop[2]), each=8)
  }else{
    treatment <- rep(c(poop[2], poop[1]), each=8)
  }
  fig2a <- rbind(fig2a, data.table(week=week_i, treatment=treatment, data.table(
    read_excel(file_path, sheet=sheet_i, range=range_i))))
}

# clean column names
setnames(fig2a, old="...1", new="ID")
setnames(fig2a, old=colnames(fig2a), new=clean_label(colnames(fig2a)))

# wide to long
fig2a <- melt(fig2a, id.vars=c("week", "ID", "treatment"),
              variable.name="day",
              value.name="time")

Because of the cross-over design, the ID is not in order within each level of treatment. Make a wide data table with new treatment columns matched by ID.

# get max for each week x ID x treatment combo
fig2a.max <- fig2a[, .(time_max=max(time)), by=.(week, ID, treatment)]

# match the two treatments applied to each ID
va <- fig2a.max[treatment=="Va", ]
lb <- fig2a.max[treatment=="Lb", ]
fig2a_wide <- merge(va, lb, by="ID")

Comparison

paired t-test

# paired t
res.t <- t.test(fig2a_wide[, time_max.x], fig2a_wide[, time_max.y], paired=TRUE)
res.t
## 
##  Paired t-test
## 
## data:  fig2a_wide[, time_max.x] and fig2a_wide[, time_max.y]
## t = 2.4117, df = 31, p-value = 0.02199
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   20.47859 244.89641
## sample estimates:
## mean of the differences 
##                132.6875

linear model

# as a linear model
y <- fig2a_wide[, time_max.x] - fig2a_wide[, time_max.y] # Va - Lb
res.lm <- lm(y ~ 1)
coef(summary(res.lm))
##             Estimate Std. Error  t value   Pr(>|t|)
## (Intercept) 132.6875   55.01749 2.411733 0.02198912

hierarchical (linear mixed) model with random intercept

# as multi-level model with random intercept
res.lmer <- lmer(time_max ~ treatment + (1|ID), data=fig2a.max)
coef(summary(res.lmer))
##              Estimate Std. Error       df   t value     Pr(>|t|)
## (Intercept) 1023.7187   43.37421 59.71689 23.602014 5.703668e-32
## treatmentVa  132.6875   55.01752 30.99995  2.411732 2.198920e-02