If I’m going to evaluate the widespread use of t-tests/ANOVAs on count data in bench biology then I’d like to know what these data look like, specifically the shape (“overdispersion”) parameter.

Set up

library(ggplot2)
library(readxl)
library(ggpubr)
library(cowplot)
library(plyr) #mapvalues
library(data.table)

# glm packages
library(MASS)
library(pscl) #zeroinfl
library(DHARMa)
library(mvabund)

  data_path <- "../data" # notebook, console
  source("../../../R/clean_labels.R") # notebook, console

Data from The enteric nervous system promotes intestinal health by constraining microbiota composition

Import

read_enteric <- function(sheet_i, range_i, file_path, wide_2_long=TRUE){
  dt_wide <- data.table(read_excel(file_path, sheet=sheet_i, range=range_i))
  dt_long <- na.omit(melt(dt_wide, measure.vars=colnames(dt_wide), variable.name="treatment", value.name="count"))
  return(dt_long)
}

folder <- "Data from The enteric nervous system promotes intestinal health by constraining microbiota composition"
fn <- "journal.pbio.2000689.s008.xlsx"
file_path <- paste(data_path, folder, fn, sep="/")
fig1c <- read_enteric(sheet_i="Figure 1", range_i="a2:b11", file_path)
fig1e <- read_enteric(sheet_i="Figure 1", range_i="d2:g31", file_path)
fig1f <- read_enteric(sheet_i="Figure 1", range_i="i2:l53", file_path)
fig2a <- read_enteric(sheet_i="Figure 2", range_i="a2:d33", file_path)
fig2d <- read_enteric(sheet_i="Figure 2", range_i="F2:I24", file_path)
fig3a <- read_enteric(sheet_i="Figure 3", range_i="a2:c24", file_path)
fig3b <- read_enteric(sheet_i="Figure 3", range_i="e2:g12", file_path)
fig4a <- read_enteric(sheet_i="Figure 4", range_i="a2:b125", file_path)
fig5c <- read_enteric(sheet_i="Figure 5", range_i="i2:l205", file_path)
fig6d <- read_enteric(sheet_i="Figure 6", range_i="I2:L16", file_path)

Estimates of the shape parameter

plot_enteric <- function(fig_i, fig_num=NULL){
  fit <- glm.nb(count ~ treatment, data=fig_i)

  #fig_num <- names(fig_i)
  if(is.null(fig_num)){
    fig_num <- deparse(substitute(fig_i)) # this works when df is sent but not a list element
  }
  theta <- fit$theta
  fit_title <- paste0(fig_num, " (theta = ", round(theta,1), ")")
  gg <- ggdotplot(fig_i,
           x="treatment", 
           y="count",
           color="treatment",
           pallete="jco",
           add="mean") +
    #annotate("text", x=1, y= max(fig_i[, count]), label=paste("theta =", round(theta,1))) +
    ggtitle(fit_title) +
    rremove("legend") +
    NULL
  return(gg)
}

plot_enteric2 <- function(fig_i, fig_num, i){
  fit <- glm.nb(count ~ treatment, data=fig_i[[i]])
  #fig_no <- deparse(substitute(fig_i)) # this works when df is sent but not a list element
  #fig_no <- names(fig_i)
  theta <- fit$theta
  fit_title <- paste0(fig_num[[i]], " (theta = ", round(theta,1), ")")
  gg <- ggdotplot(fig_i[[i]],
           x="treatment", 
           y="count",
           color="treatment",
           pallete="jco",
           add="mean") +
    #annotate("text", x=1, y= max(fig_i[, count]), label=paste("theta =", round(theta,1))) +
    ggtitle(fit_title) +
    rremove("legend") +
    NULL
  return(gg)
}

fig_list_names <- c("fig1c", "fig1e", "fig1f", "fig2a", "fig2d", "fig3a", "fig3b", "fig4a", "fig5c", "fig6d")
fig_list <- list(fig1c, fig1e, fig1f, fig2a, fig2d, fig3a, fig3b, fig4a, fig5c, fig6d)
names(fig_list) <- fig_list_names # super kludgy
# this doesn't work
# gg_list <- lapply(fig_list, plot_enteric, names(fig_list))

# this works but requires i in the function which is unsatifying
#gg_list <- lapply(seq_along(fig_list), plot_enteric2, fig_i=fig_list, fig_num=names(fig_list))
gg_list <- list(NULL)
for(i in 1:length(fig_list)){
  gg_list[[i]] <- plot_enteric(fig_list[[i]], names(fig_list)[[i]])
}

plot_grid(plotlist=gg_list, ncol = 3)

Data from Organic cation transporter 3 (Oct3) is a distinct catecholamines clearance route in adipocytes mediating the beiging of white adipose tissue

Import

folder <- "Data from Organic cation transporter 3 (Oct3) is a distinct catecholamines clearance route in adipocytes mediating the beiging of white adipose tissue"
fn <- "journal.pbio.2006571.s012.xlsx"
file_path <- paste(data_path, folder, fn, sep="/")
fig5b <- read_enteric(sheet_i="Fig 5B", range_i="b2:c12", file_path)
plot_enteric(fig5b)

Estimates of the shape parameter

Plots of simulated samples that differ in \(\mu\) and \(\theta\)