Overview

This notebook fits a SpATS PSANOVA surface per phenotype and returns spatially-corrected plot-level values for each phenotype, including GDD-based traits (GDDA and GDDS). The approach uses statgenHTP::fitModels() (which wraps SpATS when provided a TimePoints object) and getCorrected() to extract corrected plot-level observations. The workflow:

  1. Load GDD lookup table for PA2022.
  2. Load and clean data (as in prior notebook).
  3. Create plot_data (plot-level means) and merge GDD values.
  4. Build a TimePoints object for statgenHTP.
  5. Loop over usable phenotypes (including GDDA, GDDS) and apply fitModels() → getCorrected().
  6. Merge corrected values into a wide table sp_corrected.
  7. Save corrected table for downstream analyses.

Warning: fitting many SpATS models can be computationally intensive. Run interactively on a machine with sufficient RAM/CPU.

1. Load libraries

library(tidyverse)
library(dplyr)
library(lubridate)
library(ggplot2)
library(ggtext)
library(ggfx)
library(nlme)
library(gstat)
library(rstatix)
library(ggpubr)
library(VIM)
library(knitr)
library(SpATS)
library(statgenHTP)
library(ggbeeswarm)
library(cowplot)
library(emmeans)

1b. Configuration and load GDD lookup table

environment_name <- "PSU2022"
planting_date <- mdy("5/26/2022")
gdd_lookup_file <- file.path(paths$intermediate, "gdd_lookup_table.csv")

gdd_at_phenology <- read_csv(gdd_lookup_file) %>%
  filter(environment == environment_name)

cat("Loaded GDD lookup for", environment_name, "with", 
    nrow(gdd_at_phenology), "dates\n")
## Loaded GDD lookup for PSU2022 with 101 dates
cat("GDD range:", min(gdd_at_phenology$cumulative_gdd_from_planting), "to", 
    max(gdd_at_phenology$cumulative_gdd_from_planting), "°C\n")
## GDD range: 6.28 to 1203.705 °C

2. Load and prepare data

DATA_DIR <- paths$data
plant_csv <- file.path(DATA_DIR, "22_NCS_PSU_LANGEBIO_FIELDS_PSU_P_field.csv")
ear_csv <- file.path(DATA_DIR, "22_NCS_PSU_LANGEBIO_FIELDS_PSU_P_field_ear_pheno.csv")

field_data_raw <- read.csv(plant_csv, na.strings = c("", "#N/A", "NA"))
ear_data_raw <- read.csv(ear_csv, na.strings = c("", "n/a", "NA"), skip = 1)

field_data <- field_data_raw %>%
  filter(P22. >= 3004, P22. <= 4192) %>%
  rename(
    rowid = P22.,
    Genotype = Who.What,
    PH = Height_Anthesis,
    STW40 = X40_DAP_dw,
    STW50 = X50_DAP_dw, 
    STW60 = X60_DAP_dw,
    STWHV = harvest_dw
  ) %>%
  mutate(
    Treatment = factor(Treatment, levels = c("HighP", "LowP")),
    Genotype = factor(Genotype),
    Rep = as.factor(Rep)
  ) %>%
  mutate(
    Treatment = fct_recode(Treatment, "+P" = "HighP", "-P" = "LowP"),
    Genotype = fct_recode(Genotype, "Inv4m" = "INV4M")
  )

ear_data <- ear_data_raw %>%
  select(-description, -RK, -CC, -NIR, ear_rep = rep) %>%
  rename(rowid = row) %>%
  arrange(rowid) %>%
  group_by(rowid) %>%
  select(-ear_rep) %>%
  summarise_all(mean, na.rm = TRUE) %>%
  droplevels()

field_data_complete <- field_data %>%
  left_join(ear_data, by = "rowid") %>%
  mutate(HI = TKW / STWHV) %>%
  mutate(
    Plot_Column = case_when(
      Plot == "PlotVIII" ~ Plot_Column + 10,
      Plot == "PlotVI" ~ Plot_Column,
      TRUE ~ Plot_Column
    )
  ) %>%
  filter(!is.na(PH)) %>%
  droplevels()

base_phenotypes <- c("PH", "DTA", "DTS", "STW40", "STW50", "STW60", "STWHV")
ear_phenotypes <- c("EW", "EL", "ED", "KRN", "CD", "TKW", "KW50", "TKN", "HI")
all_phenotypes <- unique(c(base_phenotypes, ear_phenotypes))

cat("Field data dimensions:", dim(field_data_complete), "\n")
## Field data dimensions: 127 39
cat("Phenotypes available:", paste(all_phenotypes, collapse = ", "), "\n")
## Phenotypes available: PH, DTA, DTS, STW40, STW50, STW60, STWHV, EW, EL, ED, KRN, CD, TKW, KW50, TKN, HI

3. Create plot-level data with GDD integration

plot_data <- field_data_complete %>%
  group_by(rowid, Rep, Plot_Row, Plot_Column, Genotype, Treatment) %>%
  summarise(
    across(all_of(all_phenotypes), ~ mean(.x, na.rm = TRUE)),
    n_plants = n(),
    .groups = 'drop'
  ) %>%
  mutate(
    across(all_of(all_phenotypes), ~ ifelse(is.nan(.x), NA, .x)),
    environment = environment_name,
    anthesis_date = planting_date + days(round(DTA)),
    silking_date = planting_date + days(round(DTS))
  ) %>%
  left_join(
    gdd_at_phenology %>% 
      rename(anthesis_date = date, GDDA = cumulative_gdd_from_planting),
    by = c("environment", "anthesis_date")
  ) %>%
  left_join(
    gdd_at_phenology %>% 
      rename(silking_date = date, GDDS = cumulative_gdd_from_planting),
    by = c("environment", "silking_date")
  ) %>%
  select(-environment, -anthesis_date, -silking_date)

plot_data$Plot_Row <- as.numeric(plot_data$Plot_Row)
plot_data$Plot_Column <- as.numeric(plot_data$Plot_Column)
plot_data$Rep <- as.factor(plot_data$Rep)

cat("Plot-level summary dimensions:", dim(plot_data), "\n")
## Plot-level summary dimensions: 127 25
cat("Average plants per plot:", round(mean(plot_data$n_plants, na.rm = TRUE), 1), "\n")
## Average plants per plot: 1
cat("GDD columns added: GDDA (anthesis), GDDS (silking)\n")
## GDD columns added: GDDA (anthesis), GDDS (silking)
cat("Plots with GDDA:", sum(!is.na(plot_data$GDDA)), "\n")
## Plots with GDDA: 127
cat("Plots with GDDS:", sum(!is.na(plot_data$GDDS)), "\n")
## Plots with GDDS: 127

4. Identify usable phenotypes (<50% missing)

all_traits <- c(all_phenotypes, "GDDA", "GDDS")

missing_summary <- plot_data %>%
  select(all_of(all_traits)) %>%
  summarise(across(everything(), ~ sum(is.na(.)))) %>%
  pivot_longer(everything(), names_to = "phenotype", values_to = "missing_count") %>%
  mutate(
    total_obs = nrow(plot_data),
    missing_pct = round(missing_count / total_obs * 100, 1),
    available_n = total_obs - missing_count
  ) %>%
  arrange(missing_pct)

print(missing_summary)
## # A tibble: 18 × 5
##    phenotype missing_count total_obs missing_pct available_n
##    <chr>             <int>     <int>       <dbl>       <int>
##  1 PH                    0       127         0           127
##  2 DTA                   0       127         0           127
##  3 DTS                   0       127         0           127
##  4 GDDA                  0       127         0           127
##  5 GDDS                  0       127         0           127
##  6 STW40                 2       127         1.6         125
##  7 STWHV                 2       127         1.6         125
##  8 STW50                15       127        11.8         112
##  9 STW60                20       127        15.7         107
## 10 EW                   22       127        17.3         105
## 11 EL                   22       127        17.3         105
## 12 ED                   22       127        17.3         105
## 13 KRN                  22       127        17.3         105
## 14 CD                   22       127        17.3         105
## 15 TKW                  22       127        17.3         105
## 16 KW50                 22       127        17.3         105
## 17 TKN                  22       127        17.3         105
## 18 HI                   22       127        17.3         105
usable_phenotypes <- missing_summary %>%
  filter(missing_pct < 50) %>%
  pull(phenotype)

cat("Usable phenotypes (<50% missing):", paste(usable_phenotypes, collapse = ", "), "\n")
## Usable phenotypes (<50% missing): PH, DTA, DTS, GDDA, GDDS, STW40, STWHV, STW50, STW60, EW, EL, ED, KRN, CD, TKW, KW50, TKN, HI

5. Build TimePoints object for statgenHTP

single_time <- plot_data %>%
  mutate(
    Genotype = as.character(Genotype),
    Treatment = as.character(Treatment),
    Rep = as.character(Rep),
    Plot_Row = as.numeric(Plot_Row),
    Plot_Column = as.numeric(Plot_Column),
    time = as.POSIXct("2022-05-22 10:00:00")
  )

tp_data <- createTimePoints(
  dat = single_time,
  experimentName = environment_name,
  genotype = "Genotype",
  timePoint = "time",
  plotId = "rowid",
  repId = "Rep",
  rowNum = "Plot_Row",
  colNum = "Plot_Column",
  checkGenotypes = c("NUE", "B73"),
  addCheck = TRUE
)

summary(tp_data)
## tp_data contains data for experiment PSU2022.
## 
## It contains 1 time points.
## First time point: 2022-05-22 10:00:00 
## Last time point: 2022-05-22 10:00:00 
## 
## The following genotypes are defined as check genotypes: NUE, B73.

6. Fit SpATS per phenotype and extract corrected values

corrected_list <- list()
fit_info <- list()
all_models <- list()

for (phen in usable_phenotypes) {
  message("Fitting SpATS model for: ", phen)
  
  no_nas <- single_time %>%
    filter(!is.na(.data[[phen]]))
  
  if (nrow(no_nas) < 10) {
    message("Insufficient data for ", phen, " (n=", nrow(no_nas), ")")
    next
  }
  
  fit_try <- try(
    fitModels(
      TP = tp_data,
      trait = phen,
      extraFixedFactors = c("repId"),  # Treatment lives in Stage-2 lm
      timePoints = 1,
      what = "fixed"
    ),
    silent = TRUE
  )
  
  if (inherits(fit_try, "try-error")) {
    message("Fit failed for ", phen, ": ", fit_try)
    next
  }
  
  all_models[[phen]] <- fit_try
  
  corr <- try(getCorrected(fit_try), silent = TRUE)
  if (inherits(corr, "try-error") || is.null(corr)) {
    message("getCorrected() failed for ", phen)
    next
  }
  
  corr <- corr %>%
    select(-phen)
  
  corrected_list[[phen]] <- corr
  fit_info[[phen]] <- list(n_obs = nrow(no_nas), status = "success")
}

sp_corrected <- lapply(
  corrected_list,
  function(corr_df) {
    corr_df %>%
      select(timeNumber, timePoint, genotype, repId,
             rowId, colId, plotId)
  }
) %>%
  bind_rows() %>%
  arrange(plotId) %>%
  distinct()

## Treatment is no longer in SpATS extraFixedFactors (Stage-2 lm handles it)
## — re-attach it from single_time so downstream lm + emmeans has access.
sp_corrected <- sp_corrected %>%
  left_join(single_time %>%
              mutate(plotId = as.character(rowid)) %>%
              select(plotId, Treatment) %>%
              distinct(),
            by = "plotId")

for (phen in names(corrected_list)) {
  phen_corr <- paste0(phen, "_corr")
  corr_df <- corrected_list[[phen]] %>%
    select(plotId, all_of(phen_corr))
  sp_corrected <- left_join(sp_corrected, corr_df, by = "plotId")
}

colnames(sp_corrected) <- gsub("_corr", "", colnames(sp_corrected))

cat("Final sp_corrected dimensions:", dim(sp_corrected), "\n")
cat("Corrected traits:", 
    paste(intersect(usable_phenotypes, colnames(sp_corrected)), 
          collapse = ", "), "\n")
print(head(sp_corrected))

7. Spatial correction visualization (example for PH and HI)

if ("PH" %in% names(all_models)) {
  plot(all_models$PH, timePoints = 1)
  plot(all_models$PH, timePoints = 1, plotType = "spatial", spaTrend = "raw")
}

if ("HI" %in% names(all_models)) {
  plot(all_models$HI, timePoints = 1)
  plot(all_models$HI, timePoints = 1, plotType = "spatial", spaTrend = "raw")
}

if ("GDDA" %in% names(all_models)) {
  cat("\n### Spatial plots for GDDA\n")
  plot(all_models$GDDA, timePoints = 1)
  plot(all_models$GDDA, timePoints = 1, plotType = "spatial", spaTrend = "raw")
}
## 
## ### Spatial plots for GDDA

if ("GDDS" %in% names(all_models)) {
  cat("\n### Spatial plots for GDDS\n")
  plot(all_models$GDDS, timePoints = 1)
  plot(all_models$GDDS, timePoints = 1, plotType = "spatial", spaTrend = "raw")
}
## 
## ### Spatial plots for GDDS

8. Stage-2 contrasts: lm + emmeans (matches PSU2024 §8)

NIL_corrected <- sp_corrected %>%
  filter(genotype %in% c("Inv4m", "CTRL")) %>%
  mutate(genotype = factor(genotype, levels = c("CTRL", "Inv4m")),
         Treatment = factor(Treatment)) %>%
  droplevels()

## Per-trait lm + emmeans: marginal Genotype contrast (pooled over P) and
## conditional Genotype contrasts within each P_LEVEL. G x T interaction p
## stored as diagnostic (NOT in FDR pool).
fit_one_trait <- function(trait_name, data) {
  d <- data %>%
    pivot_longer(cols = all_of(trait_name),
                 names_to = "trait", values_to = "value") %>%
    filter(!is.na(value))
  if (nrow(d) < 4 || length(unique(d$genotype)) < 2 ||
      length(unique(d$Treatment)) < 2) return(NULL)

  m <- tryCatch(lm(value ~ genotype * Treatment, data = d),
                error = function(e) NULL)
  if (is.null(m)) return(NULL)

  anov  <- anova(m)
  p_int <- if ("genotype:Treatment" %in% rownames(anov))
             anov["genotype:Treatment", "Pr(>F)"] else NA_real_

  marg <- pairs(emmeans(m, ~ genotype), reverse = TRUE) %>%
    as_tibble() %>%
    transmute(trait = trait_name,
              contrast_type = "marginal",
              contrast_level = "marginal",
              estimate, SE, df, t = t.ratio, p = p.value,
              p_int_GxT = p_int)

  cond <- pairs(emmeans(m, ~ genotype | Treatment), reverse = TRUE) %>%
    as_tibble() %>%
    transmute(trait = trait_name,
              contrast_type = "conditional",
              contrast_level = paste0("Genotype|", Treatment),
              estimate, SE, df, t = t.ratio, p = p.value,
              p_int_GxT = p_int)

  bind_rows(marg, cond)
}

fitted_traits <- intersect(usable_phenotypes, names(NIL_corrected))

all_contrasts <- lapply(fitted_traits, fit_one_trait,
                        data = NIL_corrected) %>%
  bind_rows() %>%
  mutate(p_adj = p.adjust(p, method = "fdr"),
         signif = case_when(
           p_adj < 0.001 ~ "***",
           p_adj < 0.01  ~ "**",
           p_adj < 0.05  ~ "*",
           TRUE          ~ "ns"
         ))

cat("FDR pool size:", nrow(all_contrasts), "contrasts\n")
## FDR pool size: 54 contrasts
cat("Significant (FDR < 0.05):",
    sum(all_contrasts$p_adj < 0.05), "rows\n\n")
## Significant (FDR < 0.05): 19 rows
knitr::kable(all_contrasts %>% arrange(p_adj) %>%
               select(trait, contrast_type, contrast_level,
                      estimate, SE, df, t, p, p_adj, signif, p_int_GxT),
             digits = 4,
             caption = paste("All Stage-2 contrasts. p = nominal,",
                             "p_adj = BH-FDR across the pool."))
All Stage-2 contrasts. p = nominal, p_adj = BH-FDR across the pool.
trait contrast_type contrast_level estimate SE df t p p_adj signif p_int_GxT
PH marginal marginal 6.4849 1.1154 60 5.8138 0.0000 0.0000 *** 0.4200
PH conditional Genotype|-P 7.3906 1.5775 60 4.6851 0.0000 0.0002 *** 0.4200
DTA marginal marginal -1.2923 0.2648 60 -4.8811 0.0000 0.0002 *** 0.6078
DTS marginal marginal -1.0546 0.2221 60 -4.7475 0.0000 0.0002 *** 0.1578
GDDA marginal marginal -18.7391 3.9079 60 -4.7951 0.0000 0.0002 *** 0.4584
GDDS marginal marginal -14.9196 3.2005 60 -4.6617 0.0000 0.0002 *** 0.0869
GDDS conditional Genotype|+P -20.4898 4.5261 60 -4.5270 0.0000 0.0002 *** 0.0869
DTS conditional Genotype|+P -1.3724 0.3142 60 -4.3685 0.0001 0.0003 *** 0.1578
GDDA conditional Genotype|+P -21.6557 5.5267 60 -3.9184 0.0002 0.0014 ** 0.4584
DTA conditional Genotype|+P -1.4290 0.3744 60 -3.8163 0.0003 0.0017 ** 0.6078
PH conditional Genotype|+P 5.5792 1.5775 60 3.5368 0.0008 0.0039 ** 0.4200
CD conditional Genotype|-P -2.2160 0.6250 49 -3.5454 0.0009 0.0039 ** 0.0156
DTA conditional Genotype|-P -1.1557 0.3744 60 -3.0866 0.0031 0.0127 * 0.6078
STW40 conditional Genotype|+P -3.1330 1.0726 59 -2.9209 0.0049 0.0190 * 0.0455
GDDA conditional Genotype|-P -15.8226 5.5267 60 -2.8630 0.0058 0.0208 * 0.4584
STW50 conditional Genotype|+P -4.9892 1.8034 53 -2.7666 0.0078 0.0263 * 0.0330
HI marginal marginal 0.1151 0.0422 49 2.7241 0.0089 0.0283 * 0.5071
STWHV conditional Genotype|+P -13.3954 5.0233 58 -2.6667 0.0099 0.0297 * 0.1784
STWHV marginal marginal -8.6387 3.4923 58 -2.4737 0.0163 0.0464 * 0.1784
DTS conditional Genotype|-P -0.7368 0.3142 60 -2.3454 0.0223 0.0585 ns 0.1578
CD marginal marginal -1.0727 0.4562 49 -2.3514 0.0228 0.0585 ns 0.0156
HI conditional Genotype|+P 0.1433 0.0616 49 2.3285 0.0241 0.0590 ns 0.5071
ED conditional Genotype|-P -1.9903 0.9195 49 -2.1645 0.0353 0.0771 ns 0.2100
KRN conditional Genotype|-P -0.6956 0.3208 49 -2.1681 0.0350 0.0771 ns 0.3742
TKW marginal marginal 8.0148 3.7108 49 2.1599 0.0357 0.0771 ns 0.5907
STW40 marginal marginal -1.5960 0.7523 59 -2.1215 0.0381 0.0791 ns 0.0455
GDDS conditional Genotype|-P -9.3495 4.5261 60 -2.0657 0.0432 0.0836 ns 0.0869
KRN marginal marginal -0.4856 0.2341 49 -2.0740 0.0434 0.0836 ns 0.3742
EL conditional Genotype|+P 0.8117 0.3980 49 2.0395 0.0468 0.0872 ns 0.1108
EW marginal marginal 8.6107 4.4427 49 1.9382 0.0584 0.1051 ns 0.7230
STW50 marginal marginal -2.3120 1.2230 53 -1.8905 0.0642 0.1118 ns 0.0330
TKW conditional Genotype|+P 10.0238 5.4063 49 1.8541 0.0697 0.1177 ns 0.5907
ED marginal marginal -1.1378 0.6711 49 -1.6955 0.0963 0.1576 ns 0.2100
EW conditional Genotype|+P 10.1946 6.4726 49 1.5751 0.1217 0.1933 ns 0.7230
STW60 conditional Genotype|-P 7.1062 4.6379 50 1.5322 0.1318 0.2033 ns 0.1120
HI conditional Genotype|-P 0.0869 0.0579 49 1.5004 0.1399 0.2099 ns 0.5071
EL marginal marginal 0.3680 0.2732 49 1.3472 0.1841 0.2687 ns 0.1108
TKN marginal marginal 33.9659 26.6347 49 1.2753 0.2082 0.2959 ns 0.9310
TKW conditional Genotype|-P 6.0059 5.0845 49 1.1812 0.2432 0.3368 ns 0.5907
EW conditional Genotype|-P 7.0268 6.0874 49 1.1543 0.2540 0.3428 ns 0.7230
TKN conditional Genotype|+P 36.2834 38.8041 49 0.9350 0.3544 0.4667 ns 0.9310
TKN conditional Genotype|-P 31.6483 36.4947 49 0.8672 0.3901 0.5015 ns 0.9310
STWHV conditional Genotype|-P -3.8821 4.8529 58 -0.7999 0.4270 0.5241 ns 0.1784
KRN conditional Genotype|+P -0.2756 0.3411 49 -0.8081 0.4230 0.5241 ns 0.3742
STW60 conditional Genotype|+P -3.8955 4.9724 50 -0.7834 0.4371 0.5245 ns 0.1120
STW60 marginal marginal 1.6054 3.3998 50 0.4722 0.6388 0.7340 ns 0.1120
KW50 conditional Genotype|+P 0.2554 0.5378 49 0.4749 0.6370 0.7340 ns 0.7560
KW50 marginal marginal 0.1400 0.3691 49 0.3793 0.7061 0.7943 ns 0.7560
ED conditional Genotype|+P -0.2853 0.9777 49 -0.2918 0.7716 0.8504 ns 0.2100
STW50 conditional Genotype|-P 0.3652 1.6524 53 0.2210 0.8259 0.8901 ns 0.0330
EL conditional Genotype|-P -0.0756 0.3743 49 -0.2021 0.8407 0.8901 ns 0.1108
CD conditional Genotype|+P 0.0707 0.6646 49 0.1064 0.9157 0.9509 ns 0.0156
STW40 conditional Genotype|-P -0.0591 1.0552 59 -0.0560 0.9555 0.9613 ns 0.0455
KW50 conditional Genotype|-P 0.0247 0.5058 49 0.0488 0.9613 0.9613 ns 0.7560
## y.position per trait for downstream stat_pvalue_manual
y_positions <- NIL_corrected %>%
  pivot_longer(cols = any_of(fitted_traits),
               names_to = "trait", values_to = "value") %>%
  filter(!is.na(value)) %>%
  group_by(trait) %>%
  summarise(y.position = max(value, na.rm = TRUE) +
                          0.10 * diff(range(value, na.rm = TRUE)),
            .groups = "drop")

## contrast_df = marginal row per trait; drives the panel star
contrast_df <- all_contrasts %>%
  filter(contrast_type == "marginal") %>%
  left_join(y_positions, by = "trait") %>%
  mutate(group1 = "CTRL", group2 = "Inv4m",
         p.adj = p_adj,
         p.adj.signif = signif)

## Back-compat alias: downstream boxplot code references `htest` with rstatix
## column names (group1/group2/y.position/p.adj.signif).
htest <- contrast_df %>%
  select(trait, group1, group2, y.position, p.adj, p.adj.signif,
         signif, p_int_GxT,
         effect = estimate, se_diff = SE, t, p, df)

alpha <- 0.05
sig_traits <- contrast_df %>% filter(p_adj < alpha) %>%
  arrange(p_adj) %>% pull(trait)

cat("Significant traits — marginal Inv4m - CTRL (FDR < 0.05):",
    if (length(sig_traits)) paste(sig_traits, collapse = ", ") else "none",
    "\n")
## Significant traits — marginal Inv4m - CTRL (FDR < 0.05): PH, DTA, DTS, GDDA, GDDS, HI, STWHV
pheno_theme2 <- theme_classic2(base_size = 20) +
  theme(
    plot.title = element_markdown(hjust = 0.5, face = "bold"),
    axis.title.y = element_markdown(),
    axis.title.x = element_blank(),
    axis.text.x = element_text(size = 25, face = "bold", color = "black"),
    strip.background = element_blank(),
    strip.text = element_markdown(size = 20),
    legend.position = "none"
  )

if (length(sig_traits) == 0) {
  warning("No traits significant after FDR correction.")
} else {
  # --- Main Figure 2 Panel A: 6 traits with units ---
  main_traits <- c("PH", "DTA", "DTS", "HI", "STWHV", "CD")
  trait_units <- c(PH = "cm", DTA = "Days", DTS = "Days", HI = "", STWHV = "g", CD = "mm")

  plot_data_main <- NIL_corrected %>%
    pivot_longer(
      cols = all_of(usable_phenotypes),
      names_to = "trait",
      values_to = "value"
    ) %>%
    filter(trait %in% main_traits, !is.na(value)) %>%
    mutate(
      trait = factor(trait, levels = main_traits),
      genotype = factor(genotype, levels = c("CTRL", "Inv4m"))
    )

  test_main <- htest %>%
    filter(trait %in% main_traits)

  # Theme with bold strip text, no title, rotated x-axis with ggtext
 pheno_theme_panel <- theme_classic2(base_size = 20) +
    theme(
      plot.title = element_blank(),
      axis.title.y = element_text(size = 18),
      axis.title.x = element_blank(),
      axis.text.x = element_markdown(size = 18, face = "bold", color = "black"),
      axis.text.y = element_text(size = 16),
      strip.background = element_blank(),
      strip.text = element_text(size = 20, face = "bold"),
      legend.position = "none"
    )

  # Create individual plots with custom y-axis labels
  make_trait_plot <- function(trait_name, data, tests, units) {
    d <- filter(data, trait == trait_name)
    t <- filter(tests, trait == trait_name)
    y_label <- units[trait_name]

    p <- ggplot(d, aes(x = genotype, y = value, color = genotype)) +
      geom_boxplot(width = 0.25, linewidth = 1.5, alpha = 0) %>%
      with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
      geom_quasirandom(size = 2) %>%
      with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
      scale_color_manual(values = c("CTRL" = "gold", "Inv4m" = "purple4")) +
      scale_x_discrete(labels = c("CTRL" = "CTRL", "Inv4m" = "*Inv4m*")) +
      labs(title = trait_name, y = y_label, x = NULL) +
      stat_pvalue_manual(t, tip.length = 0.01, step.height = 0.08, size = 8, bracket.size = 0.8) +
      scale_y_continuous(expand = expansion(mult = c(0.1, 0.15))) +
      pheno_theme_panel +
      theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5))

    # Custom 1-day breaks for DTA/DTS
    if (trait_name %in% c("DTA", "DTS")) {
      y_range <- range(d$value, na.rm = TRUE)
      p <- p + scale_y_continuous(
        breaks = seq(floor(y_range[1]), ceiling(y_range[2]), by = 1),
        expand = expansion(mult = c(0.1, 0.15))
      )
    }
    return(p)
  }

  trait_plots <- lapply(main_traits, make_trait_plot, data = plot_data_main, tests = test_main, units = trait_units)

  main_panel <- cowplot::plot_grid(
    plotlist = trait_plots,
    nrow = 2,
    ncol = 3,
    align = "hv",
    axis = "tblr"
  )

  print(main_panel)

  # Save main panel for Figure 2 (2 rows x 3 cols)
  ggsave(
    file.path(paths$figures, "field_phenotypes_panel.pdf"),
    main_panel,
    width = 10, height = 7, dpi = 300
  )
  ggsave(
    file.path(paths$figures, "field_phenotypes_panel.png"),
    main_panel,
    width = 10, height = 7, dpi = 300
  )
  cat("Saved field phenotypes panel to:", file.path(paths$figures, "field_phenotypes_panel.png"), "\n")

  # --- Supplementary: GDDA, GDDS panel ---
  gdd_traits <- c("GDDA", "GDDS")
  gdd_traits_present <- intersect(gdd_traits, sig_traits)

  if (length(gdd_traits_present) > 0) {
    plot_data_gdd <- NIL_corrected %>%
      pivot_longer(cols = all_of(usable_phenotypes), names_to = "trait", values_to = "value") %>%
      filter(trait %in% gdd_traits_present, !is.na(value)) %>%
      mutate(trait = factor(trait, levels = gdd_traits_present))

    test_gdd <- htest %>% filter(trait %in% gdd_traits_present)

    gdd_plot <- ggplot(plot_data_gdd, aes(x = genotype, y = value, color = genotype)) +
      geom_boxplot(width = 0.25, linewidth = 1.5, alpha = 0) %>%
      with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
      geom_quasirandom(size = 2) %>%
      with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
      scale_color_manual(values = c("CTRL" = "gold", "Inv4m" = "purple4")) +
      labs(y = "GDD (°C·days)", x = NULL) +
      stat_pvalue_manual(test_gdd, tip.length = 0.01, step.height = 0.08, size = 8, bracket.size = 0.8) +
      scale_y_continuous(expand = expansion(mult = c(0.1, 0.15))) +
      facet_wrap(~ trait, scales = "free_y", ncol = 2) +
      pheno_theme_panel

    print(gdd_plot)

    ggsave(file.path(paths$figures, "gdd_phenotypes_panel.pdf"), gdd_plot, width = 6, height = 4, dpi = 300)
    ggsave(file.path(paths$figures, "gdd_phenotypes_panel.png"), gdd_plot, width = 6, height = 4, dpi = 300)
    cat("Saved GDD phenotypes panel to:", file.path(paths$figures, "gdd_phenotypes_panel.png"), "\n")
  }

  # --- All significant traits plot (for reference) ---
  plot_data_sig <- NIL_corrected %>%
    pivot_longer(cols = all_of(usable_phenotypes), names_to = "trait", values_to = "value") %>%
    filter(trait %in% sig_traits, !is.na(value)) %>%
    mutate(trait = as.character(trait))

  test_to_plot <- htest %>% filter(trait %in% sig_traits)

  diff_plot <- ggplot(plot_data_sig, aes(x = genotype, y = value, color = genotype)) +
    geom_boxplot(width = 0.25, linewidth = 1.5, alpha = 0) %>%
    with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
    geom_quasirandom(size = 2) %>%
    with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
    scale_color_manual(values = c("CTRL" = "gold", "Inv4m" = "purple4")) +
    labs(title = "Inv4m vs CTRL: Spatially Corrected Phenotypes (FDR-Adjusted)", y = "Corrected Value", x = "Genotype") +
    stat_pvalue_manual(test_to_plot, tip.length = 0.01, step.height = 0.08, size = 10, bracket.size = 0.8) +
    scale_y_continuous(expand = expansion(mult = c(0.1, 0.15))) +
    facet_wrap(~ factor(trait, levels = sig_traits), scales = "free_y", ncol = 4) +
    pheno_theme_panel

  print(diff_plot)
}

## Saved field phenotypes panel to: /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/figures/field_phenotypes_panel.png

## Saved GDD phenotypes panel to: /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/figures/gdd_phenotypes_panel.png

8b. Save Stage-2 contrasts (matches PSU2024 CSV naming)

write_csv(contrast_df %>% select(-y.position, -group1, -group2, -p.adj,
                                  -p.adj.signif),
          file.path(paths$intermediate,
                    paste0(environment_name,
                           "_focal_contrast_marginal.csv")))

write_csv(all_contrasts,
          file.path(paths$intermediate,
                    paste0(environment_name,
                           "_focal_contrast_all.csv")))

cat("Outputs:\n  ",
    file.path(paths$intermediate,
              paste0(environment_name, "_focal_contrast_marginal.csv")),
    "\n  ",
    file.path(paths$intermediate,
              paste0(environment_name, "_focal_contrast_all.csv")), "\n")
## Outputs:
##    /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/intermediate/PSU2022_focal_contrast_marginal.csv 
##    /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/intermediate/PSU2022_focal_contrast_all.csv

9. Summary and export

cat("\n=== Analysis Summary ===\n")
## 
## === Analysis Summary ===
cat("Total phenotypes evaluated:", length(all_traits), "\n")
## Total phenotypes evaluated: 18
cat("Usable phenotypes (<50% missing):", length(usable_phenotypes), "\n")
## Usable phenotypes (<50% missing): 18
cat("Successfully fitted models:", length(all_models), "\n")
## Successfully fitted models: 18
if (exists("sp_corrected") && nrow(sp_corrected) > 0) {
  cat("Plots with corrected data:", nrow(sp_corrected), "\n")

  output_file <- file.path(paths$intermediate, paste0(environment_name, "_spatially_corrected_phenotypes.csv"))
  write_csv(sp_corrected, output_file)
  cat("Spatially corrected data exported to:", output_file, "\n")
} else {
  cat("No corrected data to export\n")
}
## Plots with corrected data: 127 
## Spatially corrected data exported to: /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/intermediate/PSU2022_spatially_corrected_phenotypes.csv
if (length(fit_info) > 0) {
  cat("\n=== Model Fit Details ===\n")
  for (trait in names(fit_info)) {
    info <- fit_info[[trait]]
    cat(sprintf("%s: n=%d, status=%s\n", trait, info$n_obs, info$status))
  }
}
## 
## === Model Fit Details ===
## PH: n=127, status=success
## DTA: n=127, status=success
## DTS: n=127, status=success
## GDDA: n=127, status=success
## GDDS: n=127, status=success
## STW40: n=125, status=success
## STWHV: n=125, status=success
## STW50: n=112, status=success
## STW60: n=107, status=success
## EW: n=105, status=success
## EL: n=105, status=success
## ED: n=105, status=success
## KRN: n=105, status=success
## CD: n=105, status=success
## TKW: n=105, status=success
## KW50: n=105, status=success
## TKN: n=105, status=success
## HI: n=105, status=success