Copyright © Jeffrey A. Walker
knitr::opts_chunk$set(echo = TRUE,
message=FALSE,
warning=FALSE)
library(here)
library(janitor)
library(readxl)
library(data.table)
# graphics and tables
library(ggplot2)
library(ggpubr)
library(ggsci)
library(ggforce)
library(ggthemes)
library(cowplot)
library(kableExtra)
library(lazyWeave) #pvalstring
# analysis packages
library(emmeans)
pal_okabe_ito <- colorblind_pal()(8)[2:8] # ggthemes
pal_okabe_ito_blue <- pal_okabe_ito[c(5,6,1,2,3,7,4)]
pal_okabe_ito_red <- pal_okabe_ito[c(6,5,3,1,2,7,4)]
pal_okabe_ito_4 <- pal_okabe_ito[c(5,6,7,2)]
minus <- "\u2013"
here <- here::here
data_folder <- "content/data"
output_folder <- "content/output"
The motivation for this post was to create a pipeline for generating publication-ready plots entirely within ggplot and avoid post-generation touch-ups in Illustrator or Inkscape. These scripts are a start. The ideal modification would be turning the chunks into functions with personalized detail so that a research team could quickly and efficiently generate multiple plots. I might try to turn the scripts into a very-general-but-not-ready-for-r-package function for my students.
The example data are from Figure 4b of
The design is \(2 \times 2\) factorial – there are two factor variables each with two levels. The goal is to create a \(2 \times 4\) grid of the factor levels below the plot. The rows of the grid contain the values (levels) for each of the two factor variables. The columns of the grid contain the values (levels) of each factor for each of the four groups. Communicating treatments this way has the advantage that treatment combinations are easy to see even if the plot were black-and-white (but why would anyone use black and white in 2020?).
plot_grid()
from the cowplot package. The top plot is the plot of the data and modeled means and 95% CIs. The bottom plot contains only the grid of factor levels.My preference – Each method has pros and cons. Method 3/variant 1 has the most upside and effectively zero downsides, at least if you’re comfortable with making the plot with a continuous x-axis. This is easy if you understand the numeric values underneath a ggplot discrete x-axis.
data_from <- "Adipsin preserves beta cells in diabetic mice and associates with protection from type 2 diabetes in humans"
fig_4_file <- "41591_2019_610_MOESM6_ESM.xlsx"
file_path <- here(data_folder, data_from, fig_4_file)
fig4 <- read_excel(file_path,
range = "A5:S5",
sheet = "Fig 4b",
col_names = FALSE) %>%
data.table() %>%
transpose()
colnames(fig4)[1] <- "tunel"
# populate factor columns
nsc_levels <- c("nsc_neg", "nsc_pos")
fig4[, nsc := c(rep(nsc_levels, c(5,5)),
rep(nsc_levels, c(5,4)))]
pa_levels <- c("pa_neg", "pa_pos")
fig4[, palmitate := rep(pa_levels, c(10, 9))]
# convert to factor
fig4[, nsc := factor(nsc,
levels = nsc_levels)]
fig4[, palmitate := factor(palmitate,
levels = pa_levels)]
treatment_levels <- c("vehicle", "nsc87877", "palmitate", "nsc87877+palmitate")
fig4[, treatment := rep(treatment_levels, c(5,5,5,4))]
# convert to factor
fig4[, treatment := factor(treatment,
levels = treatment_levels)]
# View(fig4)
m1 <- lm(tunel ~ nsc * palmitate, data = fig4)
# replicate
# t.test(tunel ~ palmitate,
# data = fig4[nsc == "nsc_neg"],
# var.equal = FALSE)
# data for plots
m1_coef <- cbind(coef(summary(m1)),
confint(m1))
m1_emm <- emmeans(m1, specs = c("nsc", "palmitate"))
m1_simple <- contrast(m1_emm,
method = "revpairwise",
simple = "each",
combine = TRUE,
adjust = "none") %>%
summary(infer = TRUE)
fig4_base <- ggplot(data = fig4,
aes(x = treatment,
y = tunel,
color = nsc)) +
# show the data
geom_sina() +
# geom_dotplot(binaxis='y',
# stackdir='center',
# alpha = 0.5) +
# x and y axis labels
ylab("Percent TUNEL+ cells") +
theme_pubr() +
theme(legend.position="none",
axis.title.x=element_blank()) +
scale_color_manual(values = pal_okabe_ito_blue) +
NULL
fig4_base
# need mean and ci from emmeans table
m1_emm_dt <- m1_emm %>%
summary() %>%
data.table()
# add treatment column for plot
m1_emm_dt[, treatment := treatment_levels]
# m1_emm_dt # check
fig4_response <- fig4_base +
# show the model
geom_point(data = m1_emm_dt,
aes(y = emmean),
size = 3) +
geom_errorbar(data = m1_emm_dt,
aes(y = emmean,
ymin = lower.CL,
ymax = upper.CL),
width = 0)
fig4_response
# p-values come from contrast table
m1_simple_dt <- m1_simple %>%
data.table()
# m1_emm_dt[, treatment]
# "vehicle", "nsc87877", "palmitate", "nsc87877+palmitate"
m1_simple_dt[, group1 := c("nsc87877",
"nsc87877+palmitate",
"palmitate",
"nsc87877+palmitate")]
m1_simple_dt[, group2 := c("vehicle",
"palmitate",
"vehicle",
"nsc87877")]
m1_simple_dt[, p_pretty := pvalString(p.value)]
rows <- c(2, 3)
y_start <- 20.5
y_inc <- 1
y_position <- c(y_start, y_start)
fig4_response <- fig4_response +
stat_pvalue_manual(m1_simple_dt[rows],
label = "p_pretty",
y.position = y_position,
tip.length = 0.01,
bracket.shorten = -0.05)
fig4_response
create the plot of the grid of treatments
nsc_values <- c(minus, "+", minus, "+")
pa_values <- c(minus, minus, "+", "+")
v_base <- 1
v_sep <- 0.5
v_breaks <- c(v_base + v_sep, v_base)
v_lims <- c(v_base, v_base + v_sep) + c(-v_sep/2, v_sep/2)
txt_size = 5
# starts with fig4_base to automate position of treatment levels
# but hides everything in fig_4_base by plotting below all y-values
# if overlap with y-values, just make v_base above more negative
# alternative is to get x positions using
# x_pos <- ggplot_build(gg)$data[[2]][, "x"]
fig4_table_1 <- fig4_base +
annotate(geom = "text",
label = nsc_values,
x = 1:4,
y = v_breaks[1],
size = txt_size) +
annotate(geom = "text",
label = pa_values,
x = 1:4,
y = v_breaks[2],
size = txt_size) +
coord_cartesian(ylim = v_lims) +
theme(axis.title = element_blank(),
axis.text.x = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(),
axis.line = element_blank(),
panel.background = element_blank(),
plot.margin = unit(c(0, 0, 0, 0), "pt")) +
scale_y_continuous(breaks = v_breaks,
labels = c("NSC-87877",
"palmitate")) +
NULL
# remove stuff from fig4_response
fig4_response_mod <- fig4_response +
theme(axis.text.x = element_blank(),
plot.margin = unit(c(0, 0, 0, 0), "pt")) +
NULL
fig4_method1_v1 <- plot_grid(fig4_response_mod,
fig4_table_1,
nrow = 2,
rel_heights = c(1, 0.15),
align = "v",
axis = "lr")
fig4_method1_v1
nsc_values <- c(paste("NSC-87877", minus),
paste("NSC-87877", "+"),
paste("NSC-87877", minus),
paste("NSC-87877", "+"))
pa_values <- c(paste("Pa", minus), "Pa +")
v_base <- 1
v_sep <- 0.5
v_breaks <- c(v_base + v_sep, v_base)
v_lims <- c(v_base, v_base + v_sep) + c(-v_sep/2, v_sep/2)
txt_size = 4
fudge <- 0.3
line_size <- 0.8
fig4_table_2 <- fig4_base +
annotate(geom = "text",
label = nsc_values,
x = 1:4,
y = v_breaks[1],
size = txt_size) +
annotate(geom = "text",
label = pa_values,
x = c(1.5, 3.5),
y = v_breaks[2],
size = txt_size) +
geom_segment(aes(x = 1-fudge,
y = sum(v_breaks)/2,
xend = 2+fudge,
yend = sum(v_breaks)/2),
color = "black",
size = line_size) +
geom_segment(aes(x = 3-fudge,
y = sum(v_breaks)/2,
xend = 4+fudge,
yend = sum(v_breaks)/2),
color = "black",
size = line_size) +
coord_cartesian(ylim = v_lims) +
theme(axis.title = element_blank(),
axis.text.x = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(),
axis.line = element_blank(),
panel.background = element_blank(),
plot.margin = unit(c(0, 0, 0, 0), "pt")) +
scale_y_continuous(breaks = v_breaks,
labels = c("",
"")) +
NULL
# remove stuff from fig4_response
fig4_response_mod <- fig4_response +
theme(axis.text.x = element_blank(),
plot.margin = unit(c(0, 0, 0, 0), "pt")) +
NULL
fig4_method1_v1 <- plot_grid(fig4_response_mod,
fig4_table_2,
nrow = 2,
rel_heights = c(1, 0.2),
align = "v",
axis = "lr")
fig4_method1_v1
x_levels <- rbind(
"NSC-87877" = c(minus, "+", minus, "+"),
"Pa" = c(minus, minus, "+", "+")
)
x_levels_text <- apply(x_levels, 2, paste0, collapse="\n")
x_levels_title <- paste(row.names(x_levels), collapse="\n")
x_axis_min <- 0.4
x_axis_max <- 4.5
y_plot_min <- 0
y_axis_min <- 1.5
y_axis_max <- 21
y_breaks <- seq(5, 20, by = 5)
y_labels <- as.character(y_breaks)
fig4_method2 <- fig4_response +
coord_cartesian(ylim = c(y_plot_min, y_axis_max)) +
scale_y_continuous(breaks = y_breaks) +
theme(
axis.line = element_blank(), # remove both x & y axis lines
axis.text.x = element_blank(), # remove x-axis labels
axis.ticks.x = element_blank() # remove x-axis ticks
) +
# add shortened y-axis line that doesn't extend to treatments
geom_segment(aes(x = x_axis_min + 0.01,
y = y_axis_min,
xend = x_axis_min + 0.01,
yend = y_axis_max),
size = 0.5,
color = "black") +
# add x-axis line above treatments
geom_segment(aes(x = x_axis_min,
y = y_axis_min,
xend = x_axis_max,
yend = y_axis_min),
size = 0.5,
color = "black") +
# add treatment combinations
annotate(geom = "text",
x = 1:4,
y = y_plot_min,
label = x_levels_text) +
# add factor names
annotate(geom = "text",
x = x_axis_min,
y = y_plot_min,
label = x_levels_title,
hjust = 0) +
NULL
fig4_method2
build
?# need m1_emm_dt and m1_simple_dt from previous chunks
set.seed(1) # maintain jitter
N <- nrow(fig4)
jitter_width <- 0.4
fig4[, treatment_i := as.integer(treatment)]
fig4[, treatment_x := treatment_i +
runif(N, -jitter_width/2, jitter_width/2)]
# treatment_x needs to be added to m1_emm_dt
m1_emm_dt[, treatment_x := 1:4]
# m1_simple_dt needs numeric positions for p-value bracket
m1_simple_dt[, group1_i := c(2,4,3,4)]
m1_simple_dt[, group2_i := c(1,3,1,2)]
# this requires objects created in earlier chunks
fig4_method3_base <- ggplot(data = fig4,
aes(x = treatment_x,
y = tunel,
color = nsc)) +
# show the data
geom_point() +
# add modeled mean
geom_point(data = m1_emm_dt,
aes(y = emmean),
size = 3) +
# add modeled CI
geom_errorbar(data = m1_emm_dt,
aes(y = emmean,
ymin = lower.CL,
ymax = upper.CL),
width = 0) +
# x and y axis labels
ylab("Percent TUNEL+ cells") +
stat_pvalue_manual(m1_simple_dt[rows],
label = "p_pretty",
xmax = "group2_i",
xmin = "group1_i",
y.position = y_position,
tip.length = 0.01,
bracket.shorten = -0.05) +
theme_pubr() +
theme(legend.position="none",
axis.title.x=element_blank()) +
scale_color_manual(values = pal_okabe_ito_blue) +
NULL
show_the_base_plot <- FALSE
if(show_the_base_plot == TRUE){fig4_method3_base}
x_levels <- rbind(
c("NSC-87877", minus, "+", minus, "+"),
c("Pa", minus, minus, "+", "+")
)
x_breaks_text <- apply(x_levels, 2, paste0, collapse="\n")
x_breaks <- c(0.5, 1:4)
fig4_method3 <- fig4_method3_base +
coord_cartesian(xlim = c(0.5, 4.5)) +
scale_x_continuous(breaks = x_breaks,
labels = x_breaks_text,
expand = c(0, 0)) +
theme(axis.ticks.x = element_blank()) + # remove x-axis ticks
NULL
fig4_method3
# modify data
new_levels <- c("fake", levels(fig4$treatment))
fig4_mod <- rbind(fig4[1,], fig4)
fig4_mod[1, treatment := "fake"]
fig4_mod[, treatment := factor(treatment,
levels = new_levels)]
fig4_mod[1, tunel := as.numeric(NA)]
# modify emm
m1_emm_dt_mod <- rbind(m1_emm_dt[1,], m1_emm_dt)
m1_emm_dt_mod[1, treatment := "fake"]
m1_emm_dt_mod[1, emmean := as.numeric(NA)]
m1_emm_dt_mod[1, lower.CL := as.numeric(NA)]
m1_emm_dt_mod[1, upper.CL := as.numeric(NA)]
fig4_method3_base <- ggplot(data = fig4_mod,
aes(x = treatment,
y = tunel,
color = nsc)) +
# show the data
geom_sina() +
# add modeled mean
geom_point(data = m1_emm_dt_mod,
aes(y = emmean),
size = 3) +
# add modeled CI
geom_errorbar(data = m1_emm_dt_mod,
aes(y = emmean,
ymin = lower.CL,
ymax = upper.CL),
width = 0) +
# x and y axis labels
ylab("Percent TUNEL+ cells") +
theme_pubr() +
theme(legend.position="none",
axis.title.x=element_blank()) +
scale_color_manual(values = pal_okabe_ito_blue) +
NULL
# add p-values
rows <- c(2, 3)
y_start <- 20.5
y_inc <- 1
y_position <- c(y_start, y_start)
fig4_method3_base <- fig4_method3_base +
stat_pvalue_manual(m1_simple_dt[rows],
label = "p_pretty",
y.position = y_position,
tip.length = 0.01,
bracket.shorten = -0.05)
show_the_base_plot <- FALSE
if(show_the_base_plot == TRUE){fig4_method3_base}
x_levels <- rbind(
c("NSC-87877", minus, "+", minus, "+"),
c("Pa", minus, minus, "+", "+")
)
x_breaks_text <- apply(x_levels, 2, paste0, collapse="\n")
fig4_method3_v2 <- fig4_method3_base +
scale_x_discrete(labels = x_breaks_text,
expand = c(-0.6, .6)) +
theme(axis.ticks.x = element_blank()) + # remove x-axis ticks
NULL
fig4_method3_v2
pd_width <- 0.6
pd <- position_dodge(pd_width)
fig4_factorial_m1_base <- ggplot(data = fig4,
aes(x = palmitate,
y = tunel,
color = nsc)) +
# show the data
geom_sina(position = pd) +
# geom_dotplot(binaxis='y',
# stackdir='center',
# alpha = 0.5) +
# x and y axis labels
ylab("Percent TUNEL+ cells") +
# show the model
geom_point(data = m1_emm_dt,
aes(y = emmean),
size = 3,
position = pd) +
geom_errorbar(data = m1_emm_dt,
aes(y = emmean,
ymin = lower.CL,
ymax = upper.CL),
width = 0,
position = pd) +
theme_pubr() +
theme(legend.position="top",
axis.title.x=element_blank()) +
scale_color_manual(values = pal_okabe_ito_blue,
labels = c(paste("NSC-87877", minus),
"NSC-87877 +")) +
scale_x_discrete(labels = c(paste("Palmitate", minus),
"Palmitate +")) +
NULL
# needs m1_simple_dt from earlier chunk
# the treatments are now organized 2 x 2 instead of flattened
# this complicates the use of stat_pvalue_manual
# "vehicle", "nsc87877", "palmitate", "nsc87877+palmitate"
m1_simple_dt[, x_min_col := c(1 + pd_width/4,
2 + pd_width/4,
2 - pd_width/4,
2 + pd_width/4)]
m1_simple_dt[, x_max_col := c(1 - pd_width/4,
2 - pd_width/4,
1 - pd_width/4,
1 + pd_width/4)]
y_position <- c(y_start, y_start)
fig4_factorial_m1_p <- fig4_factorial_m1_base +
stat_pvalue_manual(m1_simple_dt[rows],
label = "p_pretty",
y.position = y_position,
xmax = "x_max_col",
xmin = "x_min_col",
tip.length = 0.01,
bracket.shorten = -0.05)
fig4_factorial_m1_p
# remove stuff from base plot
fig4_factorial_m1_p <- fig4_factorial_m1_p +
theme(legend.position="none",
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
plot.margin = unit(c(0, 0, 0, 0), "pt")) +
NULL
nsc_values <- c(minus, "+", minus, "+")
pa_values <- c(minus, minus, "+", "+")
v_base <- 1
v_sep <- 0.5
v_breaks <- c(v_base + v_sep, v_base)
v_lims <- c(v_base, v_base + v_sep) + c(-v_sep/2, v_sep/2)
txt_size = 5
# Again -- starts with fig4_factorial_m1_base to automate position of
# treatment levels
# but hides everything in fig_4_base by plotting below all y-values
# if overlap with y-values, just make v_base above more negative
# alternative is to get x positions using
# x_pos <- ggplot_build(gg)$data[[2]][, "x"]
fig4_factorial_m1_table <- fig4_factorial_m1_base +
annotate(geom = "text",
label = nsc_values,
x = c(1 - pd_width/4,
1 + pd_width/4,
2 - pd_width/4,
2 + pd_width/4),
y = v_breaks[1],
size = txt_size) +
annotate(geom = "text",
label = pa_values,
x = c(1 - pd_width/4,
1 + pd_width/4,
2 - pd_width/4,
2 + pd_width/4),
y = v_breaks[2],
size = txt_size) +
coord_cartesian(ylim = v_lims) +
theme(legend.position="none",
axis.title = element_blank(),
axis.text.x = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(),
axis.line = element_blank(),
panel.background = element_blank(),
plot.margin = unit(c(0, 0, 0, 0), "pt")) +
scale_y_continuous(breaks = v_breaks,
labels = c("NSC-87877",
"palmitate")) +
NULL
fig4_factorial_method1 <- plot_grid(fig4_factorial_m1_p,
fig4_factorial_m1_table,
nrow = 2,
rel_heights = c(1, 0.15),
align = "v",
axis = "lr")
fig4_factorial_method1
Add dodged group assignment and jittered x to data
# need m1_emm_dt and m1_simple_dt from previous chunks
set.seed(1) # maintain jitter
N <- nrow(fig4)
dodge_width <- 0.3
jitter_width <- 0.2
fig4[, treatment_i := as.integer(treatment)]
# x position of group means
x_pos <- c(1 - dodge_width/2,
1 + dodge_width/2,
2 - dodge_width/2,
2 + dodge_width/2)
fig4[, treatment_x_mean := x_pos[treatment_i]]
# x position of jittered points
fig4[, treatment_x := treatment_x_mean + runif(N, -jitter_width/2, jitter_width/2)]
# treatment_x needs to be added to m1_emm_dt
m1_emm_dt[, treatment_x := x_pos]
# m1_simple_dt needs numeric positions for p-value bracket
# use x positions in m1_emm_dt to set these
m1_simple_dt[, group1_i := c(x_pos[2],
x_pos[4],
x_pos[3],
x_pos[4])]
m1_simple_dt[, group2_i := c(x_pos[1],
x_pos[3],
x_pos[1],
x_pos[2])]
Create the base plot with a continuous x-axis
# this requires objects created in earlier chunks
fig4_factorial_method3_base <- ggplot(data = fig4,
aes(x = treatment_x,
y = tunel,
color = nsc)) +
# show the data
geom_point() +
# add modeled mean
geom_point(data = m1_emm_dt,
aes(y = emmean),
size = 3) +
# add modeled CI
geom_errorbar(data = m1_emm_dt,
aes(y = emmean,
ymin = lower.CL,
ymax = upper.CL),
width = 0) +
# x and y axis labels
ylab("Percent TUNEL+ cells") +
stat_pvalue_manual(m1_simple_dt[rows],
label = "p_pretty",
xmax = "group2_i",
xmin = "group1_i",
y.position = y_position,
tip.length = 0.01,
bracket.shorten = -0.05) +
theme_pubr() +
theme(legend.position="none",
axis.title.x=element_blank()) +
scale_color_manual(values = pal_okabe_ito_blue) +
NULL
show_the_base_plot <- FALSE
if(show_the_base_plot == TRUE){fig4_factorial_method3_base}
Add the treatment grid
x_levels <- rbind(
c("NSC-87877", minus, "+", minus, "+"),
c("Pa", minus, minus, "+", "+")
)
x_min <- 0.5
x_max <- 2 + dodge_width
x_breaks_text <- apply(x_levels, 2, paste0, collapse="\n")
x_breaks <- c(x_min, x_pos)
fig4_factorial_method3 <- fig4_factorial_method3_base +
coord_cartesian(xlim = c(x_min, x_max)) +
scale_x_continuous(breaks = x_breaks,
labels = x_breaks_text) +
theme(axis.ticks.x = element_blank()) + # remove x-axis ticks
NULL
fig4_factorial_method3