Macro layout (850 x 570 px draft)

+-------------------------------+-------------------------------+
|  A  PSU2022 field phenotypes  |  B  PSU2024 significant       |
|     (6-trait, 2 x 3 grid)     |     (4-trait 2 x 2 grid)      |
+-------------------------------+-------------------------------+
|  C  SAM quantification        |  D  SAM micrograph (DIC)      |
|     (4 morphometric panels)   |     (placeholder — Overleaf)  |
+-------------------------------+-------------------------------+

Panel A and C are imported as PNGs already saved by their producer notebooks. Panel B is rebuilt fresh from the PSU2024 corrected-phenotypes CSV restricted to the four traits significant at FDR < 0.05 in the marginal contrast (GWE, TGW, TGN, PH). Panel D currently shows a placeholder; the DIC micrograph PNG lives on Overleaf and would need to be added under data/ (or pointed at via an absolute path) to be embedded here.

1. Libraries

library(tidyverse)
library(ggtext)
library(ggfx)
library(ggpubr)
library(ggbeeswarm)
library(cowplot)
library(magick)

2. Harmonized theme + boxplot helper

All boxplot panels (A, B, C) share this theme. base_size = 14 matches the effective rendered size of the legacy 6-trait Panel A image at the 12-inch canvas width, so panel typography is internally consistent.

pheno_theme_panel <- theme_classic2(base_size = 14) +
  theme(
    plot.title       = element_text(size = 14, face = "bold", hjust = 0.5),
    plot.margin      = margin(t = 30, r = 5, b = 5, l = 5, unit = "pt"),
    axis.title.y     = element_text(size = 12),
    axis.title.x     = element_blank(),
    axis.text.x      = element_markdown(size = 12, face = "bold",
                                        color = "black"),
    axis.text.y      = element_text(size = 10),
    legend.position  = "none"
  )

# Single helper: trait_data has columns (genotype, trait, value);
# label_lookup maps trait -> y-axis label; star_lookup maps trait -> sig star;
# title_lookup (optional) maps trait -> displayed panel title (else uses trait)
make_boxplot <- function(tr, trait_data, label_lookup, star_lookup,
                         title_lookup = NULL) {
  d <- trait_data %>% filter(trait == tr, !is.na(value))
  if (!nrow(d)) return(NULL)
  ymax <- max(d$value, na.rm = TRUE)
  yrng <- diff(range(d$value, na.rm = TRUE))
  star <- if (tr %in% names(star_lookup)) star_lookup[[tr]] else ""
  bracket_df <- tibble(group1 = "CTRL", group2 = "Inv4m",
                       y.position = ymax + 0.08 * yrng,
                       label = star)
  y_lab <- if (tr %in% names(label_lookup)) label_lookup[[tr]] else tr
  panel_title <- if (!is.null(title_lookup) && tr %in% names(title_lookup))
                   title_lookup[[tr]] else tr
  y_scale <- if (tr %in% c("DTA", "DTS")) {
    scale_y_continuous(limits = c(70, 78), breaks = 70:78,
                       expand = expansion(mult = c(0.02, 0.10)))
  } else {
    scale_y_continuous(expand = expansion(mult = c(0.08, 0.20)))
  }
  ggplot(d, aes(x = genotype, y = value, color = genotype)) +
    geom_boxplot(width = 0.25, linewidth = 1.0, alpha = 0) +
    geom_quasirandom(size = 1.4) +
    scale_color_manual(values = c("CTRL" = "gold", "Inv4m" = "purple4")) +
    scale_x_discrete(labels = c("CTRL" = "CTRL", "Inv4m" = "*Inv4m*")) +
    stat_pvalue_manual(bracket_df, tip.length = 0.01, size = 5,
                       bracket.size = 0.6, inherit.aes = FALSE) +
    labs(title = panel_title, y = y_lab, x = NULL) +
    y_scale +
    pheno_theme_panel
}

3. Panel A — PSU2022 6-trait phenotypes (rebuilt native)

psu2022 <- read_csv(
  file.path(paths$intermediate,
            "PSU2022_spatially_corrected_phenotypes.csv"),
  show_col_types = FALSE) %>%
  filter(genotype %in% c("CTRL", "Inv4m")) %>%
  mutate(genotype = factor(genotype, levels = c("CTRL", "Inv4m")))

psu2022_marg <- read_csv(
  file.path(paths$intermediate, "PSU2022_focal_contrast_marginal.csv"),
  show_col_types = FALSE)

A_traits <- c("PH", "DTA", "DTS")  # NIL 3-trait per Rubén
A_labels <- c(PH = "Plant height (cm)", DTA = "Days to anthesis",
              DTS = "Days to silking")
A_titles <- NULL
A_stars  <- setNames(psu2022_marg$signif, psu2022_marg$trait)

psu2022_long <- psu2022 %>%
  pivot_longer(cols = any_of(A_traits), names_to = "trait",
               values_to = "value") %>%
  mutate(trait = factor(trait, levels = A_traits))

A_plots <- lapply(A_traits, make_boxplot,
                  trait_data = psu2022_long,
                  label_lookup = A_labels,
                  star_lookup = A_stars,
                  title_lookup = A_titles)
panel_A <- cowplot::plot_grid(plotlist = A_plots, nrow = 1, ncol = 3,
                              align = "hv", axis = "tblr")
panel_A

4. Panel B — PSU2024 4 significant traits (rebuilt native)

sp_corrected <- read_csv(
  file.path(paths$intermediate,
            "PSU2024_spatially_corrected_phenotypes.csv"),
  show_col_types = FALSE)
all_contrasts <- read_csv(
  file.path(paths$intermediate, "PSU2024_focal_contrast_all_27.csv"),
  show_col_types = FALSE)
marginal <- all_contrasts %>% filter(contrast_type == "marginal") %>%
  arrange(p_adj)

B_traits <- c("PH", "TGW", "TGN")  # hybrid 3-trait per Rubén
B_labels <- c(PH = "Plant height (cm)",
              TGW = "Total grain weight (g)",
              TGN = "Total grain number")
B_stars  <- setNames(marginal$signif, marginal$trait)

focal_long <- sp_corrected %>%
  filter(geno_label %in% c("CTRL", "Inv4m")) %>%
  mutate(genotype = factor(geno_label, levels = c("CTRL", "Inv4m"))) %>%
  pivot_longer(cols = any_of(B_traits),
               names_to = "trait", values_to = "value") %>%
  filter(trait %in% B_traits) %>%
  mutate(trait = factor(trait, levels = B_traits))

B_plots <- lapply(B_traits, make_boxplot,
                  trait_data = focal_long,
                  label_lookup = B_labels,
                  star_lookup = B_stars)
panel_B <- cowplot::plot_grid(plotlist = B_plots, nrow = 1, ncol = 3,
                              align = "hv", axis = "tblr")
panel_B

5. Panel C — SAM DIC micrograph (top-left per Rubén)

panel_C <- ggdraw() +
  draw_image(image_read(here("data", "SAM_DIC_micrograph.png")), scale = 1)
panel_C

6. Panel D — SAM 2-trait morphometry (bottom-left; two-sided per Rubén)

sam_raw <- read.csv(
  here("data", "MeristemMesurements_B73-INV_December2024.csv")) %>%
  dplyr::select(-starts_with("X")) %>%
  mutate(
    radius       = Width / 2,
    hr_ratio     = Height / radius,
    shape_factor = 1000 * Height / (radius^2),
    genotype = factor(case_when(Genotype == "INV" ~ "Inv4m",
                                Genotype == "B73" ~ "CTRL",
                                TRUE              ~ as.character(Genotype)),
                      levels = c("CTRL", "Inv4m"))
  )

## FDR pool: the three elongation-related SAM dimensions (Height, h/r, h/r^2).
## Radius is excluded because the directional hypothesis is meristem
## elongation (Inv4m > CTRL on height-derived metrics); radius is the null
## anchor in Alex's original notebook, not part of the pertinent test family.
sam_all_traits <- c("Height", "hr_ratio", "shape_factor")
D_traits       <- c("Height", "hr_ratio", "shape_factor")
D_labels <- c(Height       = "Height (μm)",
              hr_ratio     = "h / r",
              shape_factor = expression(h / r^2 ~ "(nm"^{-1}*")"))
D_titles <- c(Height       = "Height",
              hr_ratio     = "h/r ratio",
              shape_factor = "Shape Factor")

## One-tailed Welch t-tests (Inv4m > CTRL, matching the directional
## hypothesis of meristem elongation), BH-FDR over all four SAM tests
sam_one_tailed <- sam_raw %>%
  pivot_longer(cols = all_of(sam_all_traits), names_to = "trait",
               values_to = "value") %>%
  filter(!is.na(value)) %>%
  group_by(trait) %>%
  summarise(
    p = t.test(value ~ genotype, alternative = "less")$p.value,
    .groups = "drop"
  ) %>%
  mutate(p_adj = p.adjust(p, method = "fdr"))

star_from_p <- function(p) {
  if (is.na(p)) return("ns")
  if (p < 0.001) return("***")
  if (p < 0.01)  return("**")
  if (p < 0.05)  return("*")
  "ns"
}
D_stars <- setNames(vapply(D_traits,
                           function(tr) star_from_p(
                             sam_one_tailed$p_adj[sam_one_tailed$trait == tr]),
                           character(1)),
                    D_traits)
print(sam_one_tailed)
## # A tibble: 3 × 3
##   trait             p  p_adj
##   <chr>         <dbl>  <dbl>
## 1 Height       0.0447 0.0447
## 2 hr_ratio     0.0184 0.0276
## 3 shape_factor 0.0167 0.0276
sam_long <- sam_raw %>%
  pivot_longer(cols = all_of(D_traits),
               names_to = "trait", values_to = "value") %>%
  mutate(trait = factor(trait, levels = D_traits))

make_sam_panel <- function(tr) {
  d <- sam_long %>% filter(trait == tr, !is.na(value))
  ymax <- max(d$value); yrng <- diff(range(d$value))
  bracket_df <- tibble(group1 = "CTRL", group2 = "Inv4m",
                       y.position = ymax + 0.08 * yrng,
                       label = D_stars[[tr]])
  y_lab <- D_labels[[tr]]
  p <- ggplot(d, aes(x = genotype, y = value, color = genotype)) +
    geom_boxplot(width = 0.25, linewidth = 1.0, alpha = 0) +
    geom_quasirandom(size = 1.4) +
    scale_color_manual(values = c("CTRL" = "gold", "Inv4m" = "purple4")) +
    scale_x_discrete(labels = c("CTRL" = "CTRL", "Inv4m" = "*Inv4m*")) +
    stat_pvalue_manual(bracket_df, tip.length = 0.01, size = 5,
                       bracket.size = 0.6, inherit.aes = FALSE) +
    labs(title = D_titles[[tr]], x = NULL) +
    scale_y_continuous(expand = expansion(mult = c(0.08, 0.20))) +
    pheno_theme_panel
  if (is.expression(y_lab)) p + ylab(y_lab) else p + ylab(y_lab)
}

D_plots <- lapply(D_traits, make_sam_panel)
panel_D <- cowplot::plot_grid(plotlist = D_plots, nrow = 1, ncol = 3,
                              align = "hv", axis = "tblr")
panel_D

6. Assemble 2x2 macro layout at 850 x 570 px

## Layout:
##   A (NIL 3-trait, top-left)  |  C (micrograph, top-right)
##   B (hybrid 3-trait, bot-l)  |  D (SAM 2-trait, bot-right)
## One plot_grid call so align="hv" aligns axes across the full grid.
fig2_grid <- cowplot::plot_grid(
  panel_A, panel_C,
  panel_B, panel_D,
  nrow = 2, ncol = 2,
  align = "hv",
  axis = "tblr",
  labels = c("A", "C", "B", "D"),
  label_size = 18, label_fontface = "bold",
  rel_widths  = c(0.5, 0.5),
  rel_heights = c(1, 1)
)

## Overlay descriptive labels next to A/B/C/D panel letters
fig2 <- ggdraw(fig2_grid) +
  draw_label("NILs",                    x = 0.03, y = 0.975, hjust = 0,
             fontface = "plain", size = 16) +
  draw_label("NIL Seedling SAMs",       x = 0.53, y = 0.975, hjust = 0,
             fontface = "plain", size = 16) +
  draw_label("Hybrids",                 x = 0.03, y = 0.475, hjust = 0,
             fontface = "plain", size = 16) +
  draw_label("NIL Seedling SAMs",       x = 0.53, y = 0.475, hjust = 0,
             fontface = "plain", size = 16)

print(fig2)

ggsave(file.path(paths$figures, "Figure_2_draft.png"),
       fig2, width = 12, height = 7, dpi = 300, bg = "white")

cat("Saved:",
    file.path(paths$figures, "Figure_2_draft.png"), "\n")
## Saved: /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/figures/Figure_2_draft.png

7. Notes

  • All boxplot panels (A, B, C) rendered native with a shared pheno_theme_panel (base_size = 14, linewidth = 1.0, dot size = 1.4) so typography and stroke weight match.
  • Stats pipeline (Panels A and B share it):
    • Stage 1: SpATS spatial correction with repId as fixed factor (Treatment NOT in SpATS — kept in Stage 2)
    • Stage 2: lm(corrected ~ Genotype * Treatment) per trait + emmeans contrasts (marginal Genotype pooled over P, plus conditional within each P_LEVEL). Single FDR pool across (trait × contrast) rows.
    • Marginal-Genotype p_adj drives the panel star (Panel A from PSU2022_focal_contrast_marginal.csv; Panel B from the equivalent PSU2024_focal_contrast_marginal.csv).
  • Panel C uses one-tailed Welch t-tests (Inv4m > CTRL), matching the convention used in SAM_morphology_analysis.Rmd. No FDR pool here — single-trait comparisons.
  • Output dimensions: 12 x 8.05 in @ 300 dpi (3600 x 2415 px), aspect ≈ 1.49.