+-------------------------------+-------------------------------+
| A PSU2022 field phenotypes | B PSU2024 significant |
| (6-trait, 2 x 3 grid) | (4-trait 2 x 2 grid) |
+-------------------------------+-------------------------------+
| C SAM quantification | D SAM micrograph (DIC) |
| (4 morphometric panels) | (placeholder — Overleaf) |
+-------------------------------+-------------------------------+
Panel A and C are imported as PNGs already saved by their producer
notebooks. Panel B is rebuilt fresh from the PSU2024
corrected-phenotypes CSV restricted to the four traits significant at
FDR < 0.05 in the marginal contrast (GWE, TGW, TGN, PH). Panel D
currently shows a placeholder; the DIC micrograph PNG lives on Overleaf
and would need to be added under data/ (or pointed at via
an absolute path) to be embedded here.
library(tidyverse)
library(ggtext)
library(ggfx)
library(ggpubr)
library(ggbeeswarm)
library(cowplot)
library(magick)
All boxplot panels (A, B, C) share this theme.
base_size = 14 matches the effective rendered size of the
legacy 6-trait Panel A image at the 12-inch canvas width, so panel
typography is internally consistent.
pheno_theme_panel <- theme_classic2(base_size = 14) +
theme(
plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
plot.margin = margin(t = 30, r = 5, b = 5, l = 5, unit = "pt"),
axis.title.y = element_text(size = 12),
axis.title.x = element_blank(),
axis.text.x = element_markdown(size = 12, face = "bold",
color = "black"),
axis.text.y = element_text(size = 10),
legend.position = "none"
)
# Single helper: trait_data has columns (genotype, trait, value);
# label_lookup maps trait -> y-axis label; star_lookup maps trait -> sig star;
# title_lookup (optional) maps trait -> displayed panel title (else uses trait)
make_boxplot <- function(tr, trait_data, label_lookup, star_lookup,
title_lookup = NULL) {
d <- trait_data %>% filter(trait == tr, !is.na(value))
if (!nrow(d)) return(NULL)
ymax <- max(d$value, na.rm = TRUE)
yrng <- diff(range(d$value, na.rm = TRUE))
star <- if (tr %in% names(star_lookup)) star_lookup[[tr]] else ""
bracket_df <- tibble(group1 = "CTRL", group2 = "Inv4m",
y.position = ymax + 0.08 * yrng,
label = star)
y_lab <- if (tr %in% names(label_lookup)) label_lookup[[tr]] else tr
panel_title <- if (!is.null(title_lookup) && tr %in% names(title_lookup))
title_lookup[[tr]] else tr
y_scale <- if (tr %in% c("DTA", "DTS")) {
scale_y_continuous(limits = c(70, 78), breaks = 70:78,
expand = expansion(mult = c(0.02, 0.10)))
} else {
scale_y_continuous(expand = expansion(mult = c(0.08, 0.20)))
}
ggplot(d, aes(x = genotype, y = value, color = genotype)) +
geom_boxplot(width = 0.25, linewidth = 1.0, alpha = 0) +
geom_quasirandom(size = 1.4) +
scale_color_manual(values = c("CTRL" = "gold", "Inv4m" = "purple4")) +
scale_x_discrete(labels = c("CTRL" = "CTRL", "Inv4m" = "*Inv4m*")) +
stat_pvalue_manual(bracket_df, tip.length = 0.01, size = 5,
bracket.size = 0.6, inherit.aes = FALSE) +
labs(title = panel_title, y = y_lab, x = NULL) +
y_scale +
pheno_theme_panel
}
psu2022 <- read_csv(
file.path(paths$intermediate,
"PSU2022_spatially_corrected_phenotypes.csv"),
show_col_types = FALSE) %>%
filter(genotype %in% c("CTRL", "Inv4m")) %>%
mutate(genotype = factor(genotype, levels = c("CTRL", "Inv4m")))
psu2022_marg <- read_csv(
file.path(paths$intermediate, "PSU2022_focal_contrast_marginal.csv"),
show_col_types = FALSE)
A_traits <- c("PH", "DTA", "DTS") # NIL 3-trait per Rubén
A_labels <- c(PH = "Plant height (cm)", DTA = "Days to anthesis",
DTS = "Days to silking")
A_titles <- NULL
A_stars <- setNames(psu2022_marg$signif, psu2022_marg$trait)
psu2022_long <- psu2022 %>%
pivot_longer(cols = any_of(A_traits), names_to = "trait",
values_to = "value") %>%
mutate(trait = factor(trait, levels = A_traits))
A_plots <- lapply(A_traits, make_boxplot,
trait_data = psu2022_long,
label_lookup = A_labels,
star_lookup = A_stars,
title_lookup = A_titles)
panel_A <- cowplot::plot_grid(plotlist = A_plots, nrow = 1, ncol = 3,
align = "hv", axis = "tblr")
panel_A
sp_corrected <- read_csv(
file.path(paths$intermediate,
"PSU2024_spatially_corrected_phenotypes.csv"),
show_col_types = FALSE)
all_contrasts <- read_csv(
file.path(paths$intermediate, "PSU2024_focal_contrast_all_27.csv"),
show_col_types = FALSE)
marginal <- all_contrasts %>% filter(contrast_type == "marginal") %>%
arrange(p_adj)
B_traits <- c("PH", "TGW", "TGN") # hybrid 3-trait per Rubén
B_labels <- c(PH = "Plant height (cm)",
TGW = "Total grain weight (g)",
TGN = "Total grain number")
B_stars <- setNames(marginal$signif, marginal$trait)
focal_long <- sp_corrected %>%
filter(geno_label %in% c("CTRL", "Inv4m")) %>%
mutate(genotype = factor(geno_label, levels = c("CTRL", "Inv4m"))) %>%
pivot_longer(cols = any_of(B_traits),
names_to = "trait", values_to = "value") %>%
filter(trait %in% B_traits) %>%
mutate(trait = factor(trait, levels = B_traits))
B_plots <- lapply(B_traits, make_boxplot,
trait_data = focal_long,
label_lookup = B_labels,
star_lookup = B_stars)
panel_B <- cowplot::plot_grid(plotlist = B_plots, nrow = 1, ncol = 3,
align = "hv", axis = "tblr")
panel_B
panel_C <- ggdraw() +
draw_image(image_read(here("data", "SAM_DIC_micrograph.png")), scale = 1)
panel_C
sam_raw <- read.csv(
here("data", "MeristemMesurements_B73-INV_December2024.csv")) %>%
dplyr::select(-starts_with("X")) %>%
mutate(
radius = Width / 2,
hr_ratio = Height / radius,
shape_factor = 1000 * Height / (radius^2),
genotype = factor(case_when(Genotype == "INV" ~ "Inv4m",
Genotype == "B73" ~ "CTRL",
TRUE ~ as.character(Genotype)),
levels = c("CTRL", "Inv4m"))
)
## FDR pool: the three elongation-related SAM dimensions (Height, h/r, h/r^2).
## Radius is excluded because the directional hypothesis is meristem
## elongation (Inv4m > CTRL on height-derived metrics); radius is the null
## anchor in Alex's original notebook, not part of the pertinent test family.
sam_all_traits <- c("Height", "hr_ratio", "shape_factor")
D_traits <- c("Height", "hr_ratio", "shape_factor")
D_labels <- c(Height = "Height (μm)",
hr_ratio = "h / r",
shape_factor = expression(h / r^2 ~ "(nm"^{-1}*")"))
D_titles <- c(Height = "Height",
hr_ratio = "h/r ratio",
shape_factor = "Shape Factor")
## One-tailed Welch t-tests (Inv4m > CTRL, matching the directional
## hypothesis of meristem elongation), BH-FDR over all four SAM tests
sam_one_tailed <- sam_raw %>%
pivot_longer(cols = all_of(sam_all_traits), names_to = "trait",
values_to = "value") %>%
filter(!is.na(value)) %>%
group_by(trait) %>%
summarise(
p = t.test(value ~ genotype, alternative = "less")$p.value,
.groups = "drop"
) %>%
mutate(p_adj = p.adjust(p, method = "fdr"))
star_from_p <- function(p) {
if (is.na(p)) return("ns")
if (p < 0.001) return("***")
if (p < 0.01) return("**")
if (p < 0.05) return("*")
"ns"
}
D_stars <- setNames(vapply(D_traits,
function(tr) star_from_p(
sam_one_tailed$p_adj[sam_one_tailed$trait == tr]),
character(1)),
D_traits)
print(sam_one_tailed)
## # A tibble: 3 × 3
## trait p p_adj
## <chr> <dbl> <dbl>
## 1 Height 0.0447 0.0447
## 2 hr_ratio 0.0184 0.0276
## 3 shape_factor 0.0167 0.0276
sam_long <- sam_raw %>%
pivot_longer(cols = all_of(D_traits),
names_to = "trait", values_to = "value") %>%
mutate(trait = factor(trait, levels = D_traits))
make_sam_panel <- function(tr) {
d <- sam_long %>% filter(trait == tr, !is.na(value))
ymax <- max(d$value); yrng <- diff(range(d$value))
bracket_df <- tibble(group1 = "CTRL", group2 = "Inv4m",
y.position = ymax + 0.08 * yrng,
label = D_stars[[tr]])
y_lab <- D_labels[[tr]]
p <- ggplot(d, aes(x = genotype, y = value, color = genotype)) +
geom_boxplot(width = 0.25, linewidth = 1.0, alpha = 0) +
geom_quasirandom(size = 1.4) +
scale_color_manual(values = c("CTRL" = "gold", "Inv4m" = "purple4")) +
scale_x_discrete(labels = c("CTRL" = "CTRL", "Inv4m" = "*Inv4m*")) +
stat_pvalue_manual(bracket_df, tip.length = 0.01, size = 5,
bracket.size = 0.6, inherit.aes = FALSE) +
labs(title = D_titles[[tr]], x = NULL) +
scale_y_continuous(expand = expansion(mult = c(0.08, 0.20))) +
pheno_theme_panel
if (is.expression(y_lab)) p + ylab(y_lab) else p + ylab(y_lab)
}
D_plots <- lapply(D_traits, make_sam_panel)
panel_D <- cowplot::plot_grid(plotlist = D_plots, nrow = 1, ncol = 3,
align = "hv", axis = "tblr")
panel_D
## Layout:
## A (NIL 3-trait, top-left) | C (micrograph, top-right)
## B (hybrid 3-trait, bot-l) | D (SAM 2-trait, bot-right)
## One plot_grid call so align="hv" aligns axes across the full grid.
fig2_grid <- cowplot::plot_grid(
panel_A, panel_C,
panel_B, panel_D,
nrow = 2, ncol = 2,
align = "hv",
axis = "tblr",
labels = c("A", "C", "B", "D"),
label_size = 18, label_fontface = "bold",
rel_widths = c(0.5, 0.5),
rel_heights = c(1, 1)
)
## Overlay descriptive labels next to A/B/C/D panel letters
fig2 <- ggdraw(fig2_grid) +
draw_label("NILs", x = 0.03, y = 0.975, hjust = 0,
fontface = "plain", size = 16) +
draw_label("NIL Seedling SAMs", x = 0.53, y = 0.975, hjust = 0,
fontface = "plain", size = 16) +
draw_label("Hybrids", x = 0.03, y = 0.475, hjust = 0,
fontface = "plain", size = 16) +
draw_label("NIL Seedling SAMs", x = 0.53, y = 0.475, hjust = 0,
fontface = "plain", size = 16)
print(fig2)
ggsave(file.path(paths$figures, "Figure_2_draft.png"),
fig2, width = 12, height = 7, dpi = 300, bg = "white")
cat("Saved:",
file.path(paths$figures, "Figure_2_draft.png"), "\n")
## Saved: /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/figures/Figure_2_draft.png
pheno_theme_panel (base_size = 14, linewidth = 1.0, dot
size = 1.4) so typography and stroke weight match.repId as fixed
factor (Treatment NOT in SpATS — kept in Stage 2)lm(corrected ~ Genotype * Treatment) per trait
+ emmeans contrasts (marginal Genotype pooled over P, plus conditional
within each P_LEVEL). Single FDR pool across (trait × contrast)
rows.PSU2022_focal_contrast_marginal.csv; Panel B from the
equivalent PSU2024_focal_contrast_marginal.csv).SAM_morphology_analysis.Rmd. No FDR
pool here — single-trait comparisons.