Overview

This notebook fits a SpATS PSANOVA surface per phenotype and returns spatially-corrected plot-level values for each phenotype, including GDD-based traits (GDDA and GDDS). The approach uses statgenHTP::fitModels() (which wraps SpATS when provided a TimePoints object) and getCorrected() to extract corrected plot-level observations. The workflow:

  1. Load GDD lookup table for PA2022.
  2. Load and clean data (as in prior notebook).
  3. Create plot_data (plot-level means) and merge GDD values.
  4. Build a TimePoints object for statgenHTP.
  5. Loop over usable phenotypes (including GDDA, GDDS) and apply fitModels()getCorrected().
  6. Merge corrected values into a wide table sp_corrected.
  7. Save corrected table for downstream analyses.

Warning: fitting many SpATS models can be computationally intensive. Run interactively on a machine with sufficient RAM/CPU.

1. Load libraries

library(tidyverse)
library(dplyr)
library(lubridate)
library(ggplot2)
library(ggtext)
library(ggfx)
library(nlme)
library(gstat)
library(rstatix)
library(ggpubr)
library(VIM)
library(knitr)
library(SpATS)
library(statgenHTP)
library(ggbeeswarm)
library(cowplot)

1b. Configuration and load GDD lookup table

environment_name <- "PSU2022"
planting_date <- mdy("5/26/2022")
gdd_lookup_file <- file.path(paths$intermediate, "gdd_lookup_table.csv")

gdd_at_phenology <- read_csv(gdd_lookup_file) %>%
  filter(environment == environment_name)

cat("Loaded GDD lookup for", environment_name, "with", 
    nrow(gdd_at_phenology), "dates\n")
## Loaded GDD lookup for PSU2022 with 101 dates
cat("GDD range:", min(gdd_at_phenology$cumulative_gdd_from_planting), "to", 
    max(gdd_at_phenology$cumulative_gdd_from_planting), "°C\n")
## GDD range: 6.28 to 1203.705 °C

2. Load and prepare data

DATA_DIR <- paths$data
plant_csv <- file.path(DATA_DIR, "22_NCS_PSU_LANGEBIO_FIELDS_PSU_P_field.csv")
ear_csv <- file.path(DATA_DIR, "22_NCS_PSU_LANGEBIO_FIELDS_PSU_P_field_ear_pheno.csv")

field_data_raw <- read.csv(plant_csv, na.strings = c("", "#N/A", "NA"))
ear_data_raw <- read.csv(ear_csv, na.strings = c("", "n/a", "NA"), skip = 1)

field_data <- field_data_raw %>%
  filter(P22. >= 3004, P22. <= 4192) %>%
  rename(
    rowid = P22.,
    Genotype = Who.What,
    PH = Height_Anthesis,
    STW40 = X40_DAP_dw,
    STW50 = X50_DAP_dw, 
    STW60 = X60_DAP_dw,
    STWHV = harvest_dw
  ) %>%
  mutate(
    Treatment = factor(Treatment, levels = c("HighP", "LowP")),
    Genotype = factor(Genotype),
    Rep = as.factor(Rep)
  ) %>%
  mutate(
    Treatment = fct_recode(Treatment, "+P" = "HighP", "-P" = "LowP"),
    Genotype = fct_recode(Genotype, "Inv4m" = "INV4M")
  )

ear_data <- ear_data_raw %>%
  select(-description, -RK, -CC, -NIR, ear_rep = rep) %>%
  rename(rowid = row) %>%
  arrange(rowid) %>%
  group_by(rowid) %>%
  select(-ear_rep) %>%
  summarise_all(mean, na.rm = TRUE) %>%
  droplevels()

field_data_complete <- field_data %>%
  left_join(ear_data, by = "rowid") %>%
  mutate(HI = TKW / STWHV) %>%
  mutate(
    Plot_Column = case_when(
      Plot == "PlotVIII" ~ Plot_Column + 10,
      Plot == "PlotVI" ~ Plot_Column,
      TRUE ~ Plot_Column
    )
  ) %>%
  filter(!is.na(PH)) %>%
  droplevels()

base_phenotypes <- c("PH", "DTA", "DTS", "STW40", "STW50", "STW60", "STWHV")
ear_phenotypes <- c("EW", "EL", "ED", "KRN", "CD", "TKW", "KW50", "TKN", "HI")
all_phenotypes <- unique(c(base_phenotypes, ear_phenotypes))

cat("Field data dimensions:", dim(field_data_complete), "\n")
## Field data dimensions: 127 39
cat("Phenotypes available:", paste(all_phenotypes, collapse = ", "), "\n")
## Phenotypes available: PH, DTA, DTS, STW40, STW50, STW60, STWHV, EW, EL, ED, KRN, CD, TKW, KW50, TKN, HI

3. Create plot-level data with GDD integration

plot_data <- field_data_complete %>%
  group_by(rowid, Rep, Plot_Row, Plot_Column, Genotype, Treatment) %>%
  summarise(
    across(all_of(all_phenotypes), ~ mean(.x, na.rm = TRUE)),
    n_plants = n(),
    .groups = 'drop'
  ) %>%
  mutate(
    across(all_of(all_phenotypes), ~ ifelse(is.nan(.x), NA, .x)),
    environment = environment_name,
    anthesis_date = planting_date + days(round(DTA)),
    silking_date = planting_date + days(round(DTS))
  ) %>%
  left_join(
    gdd_at_phenology %>% 
      rename(anthesis_date = date, GDDA = cumulative_gdd_from_planting),
    by = c("environment", "anthesis_date")
  ) %>%
  left_join(
    gdd_at_phenology %>% 
      rename(silking_date = date, GDDS = cumulative_gdd_from_planting),
    by = c("environment", "silking_date")
  ) %>%
  select(-environment, -anthesis_date, -silking_date)

plot_data$Plot_Row <- as.numeric(plot_data$Plot_Row)
plot_data$Plot_Column <- as.numeric(plot_data$Plot_Column)
plot_data$Rep <- as.factor(plot_data$Rep)

cat("Plot-level summary dimensions:", dim(plot_data), "\n")
## Plot-level summary dimensions: 127 25
cat("Average plants per plot:", round(mean(plot_data$n_plants, na.rm = TRUE), 1), "\n")
## Average plants per plot: 1
cat("GDD columns added: GDDA (anthesis), GDDS (silking)\n")
## GDD columns added: GDDA (anthesis), GDDS (silking)
cat("Plots with GDDA:", sum(!is.na(plot_data$GDDA)), "\n")
## Plots with GDDA: 127
cat("Plots with GDDS:", sum(!is.na(plot_data$GDDS)), "\n")
## Plots with GDDS: 127

4. Identify usable phenotypes (<50% missing)

all_traits <- c(all_phenotypes, "GDDA", "GDDS")

missing_summary <- plot_data %>%
  select(all_of(all_traits)) %>%
  summarise(across(everything(), ~ sum(is.na(.)))) %>%
  pivot_longer(everything(), names_to = "phenotype", values_to = "missing_count") %>%
  mutate(
    total_obs = nrow(plot_data),
    missing_pct = round(missing_count / total_obs * 100, 1),
    available_n = total_obs - missing_count
  ) %>%
  arrange(missing_pct)

print(missing_summary)
## # A tibble: 18 × 5
##    phenotype missing_count total_obs missing_pct available_n
##    <chr>             <int>     <int>       <dbl>       <int>
##  1 PH                    0       127         0           127
##  2 DTA                   0       127         0           127
##  3 DTS                   0       127         0           127
##  4 GDDA                  0       127         0           127
##  5 GDDS                  0       127         0           127
##  6 STW40                 2       127         1.6         125
##  7 STWHV                 2       127         1.6         125
##  8 STW50                15       127        11.8         112
##  9 STW60                20       127        15.7         107
## 10 EW                   22       127        17.3         105
## 11 EL                   22       127        17.3         105
## 12 ED                   22       127        17.3         105
## 13 KRN                  22       127        17.3         105
## 14 CD                   22       127        17.3         105
## 15 TKW                  22       127        17.3         105
## 16 KW50                 22       127        17.3         105
## 17 TKN                  22       127        17.3         105
## 18 HI                   22       127        17.3         105
usable_phenotypes <- missing_summary %>%
  filter(missing_pct < 50) %>%
  pull(phenotype)

cat("Usable phenotypes (<50% missing):", paste(usable_phenotypes, collapse = ", "), "\n")
## Usable phenotypes (<50% missing): PH, DTA, DTS, GDDA, GDDS, STW40, STWHV, STW50, STW60, EW, EL, ED, KRN, CD, TKW, KW50, TKN, HI

5. Build TimePoints object for statgenHTP

single_time <- plot_data %>%
  mutate(
    Genotype = as.character(Genotype),
    Treatment = as.character(Treatment),
    Rep = as.character(Rep),
    Plot_Row = as.numeric(Plot_Row),
    Plot_Column = as.numeric(Plot_Column),
    time = as.POSIXct("2022-05-22 10:00:00")
  )

tp_data <- createTimePoints(
  dat = single_time,
  experimentName = environment_name,
  genotype = "Genotype",
  timePoint = "time",
  plotId = "rowid",
  repId = "Rep",
  rowNum = "Plot_Row",
  colNum = "Plot_Column",
  checkGenotypes = c("NUE", "B73"),
  addCheck = TRUE
)

summary(tp_data)
## tp_data contains data for experiment PSU2022.
## 
## It contains 1 time points.
## First time point: 2022-05-22 10:00:00 
## Last time point: 2022-05-22 10:00:00 
## 
## The following genotypes are defined as check genotypes: NUE, B73.

6. Fit SpATS per phenotype and extract corrected values

corrected_list <- list()
fit_info <- list()
all_models <- list()

for (phen in usable_phenotypes) {
  message("Fitting SpATS model for: ", phen)
  
  no_nas <- single_time %>%
    filter(!is.na(.data[[phen]]))
  
  if (nrow(no_nas) < 10) {
    message("Insufficient data for ", phen, " (n=", nrow(no_nas), ")")
    next
  }
  
  fit_try <- try(
    fitModels(
      TP = tp_data,
      trait = phen,
      extraFixedFactors = c("repId", "Treatment"),
      timePoints = 1,
      what = "fixed"
    ),
    silent = TRUE
  )
  
  if (inherits(fit_try, "try-error")) {
    message("Fit failed for ", phen, ": ", fit_try)
    next
  }
  
  all_models[[phen]] <- fit_try
  
  corr <- try(getCorrected(fit_try), silent = TRUE)
  if (inherits(corr, "try-error") || is.null(corr)) {
    message("getCorrected() failed for ", phen)
    next
  }
  
  corr <- corr %>%
    select(-phen)
  
  corrected_list[[phen]] <- corr
  fit_info[[phen]] <- list(n_obs = nrow(no_nas), status = "success")
}

sp_corrected <- lapply(
  corrected_list,
  function(corr_df) {
    corr_df %>%
      select(timeNumber, timePoint, genotype, repId, Treatment, 
             rowId, colId, plotId)
  }
) %>%
  bind_rows() %>%
  arrange(plotId) %>%
  distinct()

for (phen in names(corrected_list)) {
  phen_corr <- paste0(phen, "_corr")
  corr_df <- corrected_list[[phen]] %>%
    select(plotId, all_of(phen_corr))
  sp_corrected <- left_join(sp_corrected, corr_df, by = "plotId")
}

colnames(sp_corrected) <- gsub("_corr", "", colnames(sp_corrected))

cat("Final sp_corrected dimensions:", dim(sp_corrected), "\n")
cat("Corrected traits:", 
    paste(intersect(usable_phenotypes, colnames(sp_corrected)), 
          collapse = ", "), "\n")
print(head(sp_corrected))

7. Spatial correction visualization (example for PH and HI)

if ("PH" %in% names(all_models)) {
  plot(all_models$PH, timePoints = 1)
  plot(all_models$PH, timePoints = 1, plotType = "spatial", spaTrend = "raw")
}

if ("HI" %in% names(all_models)) {
  plot(all_models$HI, timePoints = 1)
  plot(all_models$HI, timePoints = 1, plotType = "spatial", spaTrend = "raw")
}

if ("GDDA" %in% names(all_models)) {
  cat("\n### Spatial plots for GDDA\n")
  plot(all_models$GDDA, timePoints = 1)
  plot(all_models$GDDA, timePoints = 1, plotType = "spatial", spaTrend = "raw")
}
## 
## ### Spatial plots for GDDA

if ("GDDS" %in% names(all_models)) {
  cat("\n### Spatial plots for GDDS\n")
  plot(all_models$GDDS, timePoints = 1)
  plot(all_models$GDDS, timePoints = 1, plotType = "spatial", spaTrend = "raw")
}
## 
## ### Spatial plots for GDDS

8. Spatially Corrected Inv4m Effects from Combined P Treatments

NIL_corrected <- sp_corrected %>%
  filter(genotype %in% c("Inv4m", "CTRL")) %>%
  droplevels()

htest <- NIL_corrected %>%
  pivot_longer(
    cols = all_of(usable_phenotypes), 
    names_to = "trait", 
    values_to = "value"
  ) %>%
  filter(!is.na(value)) %>%
  group_by(trait) %>%
  t_test(value ~ genotype) %>%
  adjust_pvalue(method = "fdr") %>%
  add_y_position(scales = "free") %>%
  add_significance() %>%
  arrange(p.adj)

print(htest)
## # A tibble: 18 × 13
##    trait .y.   group1 group2    n1    n2 statistic    df           p      p.adj
##    <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>       <dbl>      <dbl>
##  1 PH    value CTRL   Inv4m     32    32    -5.82   61.3 0.000000232 0.00000418
##  2 DTA   value CTRL   Inv4m     32    32     4.89   60.4 0.00000793  0.0000714 
##  3 DTS   value CTRL   Inv4m     32    32     4.73   58.9 0.0000143   0.0000738 
##  4 GDDA  value CTRL   Inv4m     32    32     4.69   60.1 0.0000164   0.0000738 
##  5 GDDS  value CTRL   Inv4m     32    32     4.60   55.7 0.0000247   0.0000889 
##  6 HI    value CTRL   Inv4m     26    27    -2.65   48.6 0.0107      0.0321    
##  7 CD    value CTRL   Inv4m     26    27     2.41   35.6 0.0211      0.0475    
##  8 STWHV value CTRL   Inv4m     30    32     2.41   58.6 0.0191      0.0475    
##  9 TKW   value CTRL   Inv4m     26    27    -2.12   43.6 0.0396      0.0792    
## 10 KRN   value CTRL   Inv4m     26    27     2.00   43.7 0.0515      0.0854    
## 11 STW40 value CTRL   Inv4m     31    32     1.98   59.9 0.0522      0.0854    
## 12 EW    value CTRL   Inv4m     26    27    -1.95   43.6 0.0582      0.0873    
## 13 ED    value CTRL   Inv4m     26    27     1.74   34.0 0.0913      0.126     
## 14 STW50 value CTRL   Inv4m     28    29     1.57   51.0 0.123       0.158     
## 15 EL    value CTRL   Inv4m     26    27    -1.31   48.6 0.195       0.234     
## 16 TKN   value CTRL   Inv4m     26    27    -1.27   42.8 0.21        0.236     
## 17 STW60 value CTRL   Inv4m     26    28    -0.549  48.7 0.586       0.620     
## 18 KW50  value CTRL   Inv4m     26    27    -0.332  37.4 0.742       0.742     
## # ℹ 3 more variables: y.position <dbl>, groups <named list>, p.adj.signif <chr>
alpha <- 0.05
sig_traits <- htest %>%
  filter(p.adj < alpha) %>%
  arrange(p.adj) %>%
  pull(trait)

cat("Significant traits (FDR < 0.05):", paste(sig_traits, collapse = ", "), "\n")
## Significant traits (FDR < 0.05): PH, DTA, DTS, GDDA, GDDS, HI, CD, STWHV
pheno_theme2 <- theme_classic2(base_size = 20) +
  theme(
    plot.title = element_markdown(hjust = 0.5, face = "bold"),
    axis.title.y = element_markdown(),
    axis.title.x = element_blank(),
    axis.text.x = element_text(size = 25, face = "bold", color = "black"),
    strip.background = element_blank(),
    strip.text = element_markdown(size = 20),
    legend.position = "none"
  )

if (length(sig_traits) == 0) {
  warning("No traits significant after FDR correction.")
} else {
  # --- Main Figure 2 Panel A: 6 traits with units ---
  main_traits <- c("PH", "DTA", "DTS", "HI", "STWHV", "CD")
  trait_units <- c(PH = "cm", DTA = "Days", DTS = "Days", HI = "", STWHV = "g", CD = "mm")

  plot_data_main <- NIL_corrected %>%
    pivot_longer(
      cols = all_of(usable_phenotypes),
      names_to = "trait",
      values_to = "value"
    ) %>%
    filter(trait %in% main_traits, !is.na(value)) %>%
    mutate(
      trait = factor(trait, levels = main_traits),
      genotype = factor(genotype, levels = c("CTRL", "Inv4m"))
    )

  test_main <- htest %>%
    filter(trait %in% main_traits)

  # Theme with bold strip text, no title, rotated x-axis with ggtext
 pheno_theme_panel <- theme_classic2(base_size = 20) +
    theme(
      plot.title = element_blank(),
      axis.title.y = element_text(size = 18),
      axis.title.x = element_blank(),
      axis.text.x = element_markdown(size = 18, face = "bold", color = "black"),
      axis.text.y = element_text(size = 16),
      strip.background = element_blank(),
      strip.text = element_text(size = 20, face = "bold"),
      legend.position = "none"
    )

  # Create individual plots with custom y-axis labels
  make_trait_plot <- function(trait_name, data, tests, units) {
    d <- filter(data, trait == trait_name)
    t <- filter(tests, trait == trait_name)
    y_label <- units[trait_name]

    p <- ggplot(d, aes(x = genotype, y = value, color = genotype)) +
      geom_boxplot(width = 0.25, linewidth = 1.5, alpha = 0) %>%
      with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
      geom_quasirandom(size = 2) %>%
      with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
      scale_color_manual(values = c("CTRL" = "gold", "Inv4m" = "purple4")) +
      scale_x_discrete(labels = c("CTRL" = "CTRL", "Inv4m" = "*Inv4m*")) +
      labs(title = trait_name, y = y_label, x = NULL) +
      stat_pvalue_manual(t, tip.length = 0.01, step.height = 0.08, size = 8, bracket.size = 0.8) +
      scale_y_continuous(expand = expansion(mult = c(0.1, 0.15))) +
      pheno_theme_panel +
      theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5))

    # Custom 1-day breaks for DTA/DTS
    if (trait_name %in% c("DTA", "DTS")) {
      y_range <- range(d$value, na.rm = TRUE)
      p <- p + scale_y_continuous(
        breaks = seq(floor(y_range[1]), ceiling(y_range[2]), by = 1),
        expand = expansion(mult = c(0.1, 0.15))
      )
    }
    return(p)
  }

  trait_plots <- lapply(main_traits, make_trait_plot, data = plot_data_main, tests = test_main, units = trait_units)

  main_panel <- cowplot::plot_grid(
    plotlist = trait_plots,
    nrow = 2,
    ncol = 3,
    align = "hv",
    axis = "tblr"
  )

  print(main_panel)

  # Save main panel for Figure 2 (2 rows x 3 cols)
  ggsave(
    file.path(paths$figures, "field_phenotypes_panel.pdf"),
    main_panel,
    width = 10, height = 7, dpi = 300
  )
  ggsave(
    file.path(paths$figures, "field_phenotypes_panel.png"),
    main_panel,
    width = 10, height = 7, dpi = 300
  )
  cat("Saved field phenotypes panel to:", file.path(paths$figures, "field_phenotypes_panel.png"), "\n")

  # --- Supplementary: GDDA, GDDS panel ---
  gdd_traits <- c("GDDA", "GDDS")
  gdd_traits_present <- intersect(gdd_traits, sig_traits)

  if (length(gdd_traits_present) > 0) {
    plot_data_gdd <- NIL_corrected %>%
      pivot_longer(cols = all_of(usable_phenotypes), names_to = "trait", values_to = "value") %>%
      filter(trait %in% gdd_traits_present, !is.na(value)) %>%
      mutate(trait = factor(trait, levels = gdd_traits_present))

    test_gdd <- htest %>% filter(trait %in% gdd_traits_present)

    gdd_plot <- ggplot(plot_data_gdd, aes(x = genotype, y = value, color = genotype)) +
      geom_boxplot(width = 0.25, linewidth = 1.5, alpha = 0) %>%
      with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
      geom_quasirandom(size = 2) %>%
      with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
      scale_color_manual(values = c("CTRL" = "gold", "Inv4m" = "purple4")) +
      labs(y = "GDD (°C·days)", x = NULL) +
      stat_pvalue_manual(test_gdd, tip.length = 0.01, step.height = 0.08, size = 8, bracket.size = 0.8) +
      scale_y_continuous(expand = expansion(mult = c(0.1, 0.15))) +
      facet_wrap(~ trait, scales = "free_y", ncol = 2) +
      pheno_theme_panel

    print(gdd_plot)

    ggsave(file.path(paths$figures, "gdd_phenotypes_panel.pdf"), gdd_plot, width = 6, height = 4, dpi = 300)
    ggsave(file.path(paths$figures, "gdd_phenotypes_panel.png"), gdd_plot, width = 6, height = 4, dpi = 300)
    cat("Saved GDD phenotypes panel to:", file.path(paths$figures, "gdd_phenotypes_panel.png"), "\n")
  }

  # --- All significant traits plot (for reference) ---
  plot_data_sig <- NIL_corrected %>%
    pivot_longer(cols = all_of(usable_phenotypes), names_to = "trait", values_to = "value") %>%
    filter(trait %in% sig_traits, !is.na(value)) %>%
    mutate(trait = as.character(trait))

  test_to_plot <- htest %>% filter(trait %in% sig_traits)

  diff_plot <- ggplot(plot_data_sig, aes(x = genotype, y = value, color = genotype)) +
    geom_boxplot(width = 0.25, linewidth = 1.5, alpha = 0) %>%
    with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
    geom_quasirandom(size = 2) %>%
    with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
    scale_color_manual(values = c("CTRL" = "gold", "Inv4m" = "purple4")) +
    labs(title = "Inv4m vs CTRL: Spatially Corrected Phenotypes (FDR-Adjusted)", y = "Corrected Value", x = "Genotype") +
    stat_pvalue_manual(test_to_plot, tip.length = 0.01, step.height = 0.08, size = 10, bracket.size = 0.8) +
    scale_y_continuous(expand = expansion(mult = c(0.1, 0.15))) +
    facet_wrap(~ factor(trait, levels = sig_traits), scales = "free_y", ncol = 4) +
    pheno_theme_panel

  print(diff_plot)
}

## Saved field phenotypes panel to: /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/figures/field_phenotypes_panel.png

## Saved GDD phenotypes panel to: /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/figures/gdd_phenotypes_panel.png

8b. Save effect sizes for reproducibility

# Compute mean differences (Inv4m - CTRL) with SE for manuscript reporting
effect_sizes <- NIL_corrected %>%
  pivot_longer(
    cols = all_of(usable_phenotypes),
    names_to = "trait",
    values_to = "value"
  ) %>%
  filter(!is.na(value)) %>%
  group_by(trait, genotype) %>%
  summarise(
    mean = mean(value),
    sd = sd(value),
    n = n(),
    .groups = "drop"
  ) %>%
  pivot_wider(
    names_from = genotype,
    values_from = c(mean, sd, n)
  ) %>%
  mutate(
    effect = mean_Inv4m - mean_CTRL,
    se_diff = sqrt(sd_CTRL^2 / n_CTRL + sd_Inv4m^2 / n_Inv4m)
  ) %>%
  left_join(
    htest %>% select(trait, statistic, df, p, p.adj, p.adj.signif),
    by = "trait"
  ) %>%
  arrange(p.adj)

print(effect_sizes)
## # A tibble: 18 × 14
##    trait mean_CTRL mean_Inv4m sd_CTRL sd_Inv4m n_CTRL n_Inv4m  effect se_diff
##    <chr>     <dbl>      <dbl>   <dbl>    <dbl>  <int>   <int>   <dbl>   <dbl>
##  1 PH      163.       169.      4.21     4.69      32      32   6.48   1.11  
##  2 DTA      73.9       72.5     0.975    1.15      32      32  -1.30   0.266 
##  3 DTS      75.2       74.2     0.781    0.986     32      32  -1.05   0.222 
##  4 GDDA    886.       867.     15.1     18.1       32      32 -19.5    4.17  
##  5 GDDS    905.       891.     10.6     15.0       32      32 -14.9    3.24  
##  6 HI        0.725      0.832   0.160    0.133     26      27   0.107  0.0405
##  7 CD       26.0       24.9     2.21     1.06      26      27  -1.16   0.479 
##  8 STWHV    95.9       87.6    14.2     13.0       30      32  -8.32   3.46  
##  9 TKW      64.5       72.3    15.5     10.5       26      27   7.75   3.65  
## 10 KRN      16.7       16.3     0.997    0.674     26      27  -0.470  0.235 
## 11 STW40     8.90       7.36    3.25     2.92      31      32  -1.54   0.778 
## 12 EW       83.8       92.3    18.5     12.4       26      27   8.45   4.34  
## 13 ED       38.9       37.7     3.18     1.40      26      27  -1.18   0.680 
## 14 STW50    26.5       24.5     5.31     4.13      28      29  -1.98   1.26  
## 15 EL       15.1       15.5     1.06     0.874     26      27   0.350  0.267 
## 16 TKN     367.       400.    109.      71.6       26      27  32.5   25.5   
## 17 STW60    58.0       59.8    13.5     11.2       26      28   1.86   3.38  
## 18 KW50      9.49       9.61    1.62     0.848     26      27   0.118  0.357 
## # ℹ 5 more variables: statistic <dbl>, df <dbl>, p <dbl>, p.adj <dbl>,
## #   p.adj.signif <chr>
write_csv(
  effect_sizes,
  file.path(paths$intermediate, paste0(environment_name, "_inv4m_effect_sizes.csv"))
)
cat("Effect sizes saved to:",
    file.path(paths$intermediate, paste0(environment_name, "_inv4m_effect_sizes.csv")), "\n")
## Effect sizes saved to: /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/intermediate/PSU2022_inv4m_effect_sizes.csv

9. Summary and export

cat("\n=== Analysis Summary ===\n")
## 
## === Analysis Summary ===
cat("Total phenotypes evaluated:", length(all_traits), "\n")
## Total phenotypes evaluated: 18
cat("Usable phenotypes (<50% missing):", length(usable_phenotypes), "\n")
## Usable phenotypes (<50% missing): 18
cat("Successfully fitted models:", length(all_models), "\n")
## Successfully fitted models: 18
if (exists("sp_corrected") && nrow(sp_corrected) > 0) {
  cat("Plots with corrected data:", nrow(sp_corrected), "\n")

  output_file <- file.path(paths$intermediate, paste0(environment_name, "_spatially_corrected_phenotypes.csv"))
  write_csv(sp_corrected, output_file)
  cat("Spatially corrected data exported to:", output_file, "\n")
} else {
  cat("No corrected data to export\n")
}
## Plots with corrected data: 127 
## Spatially corrected data exported to: /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/intermediate/PSU2022_spatially_corrected_phenotypes.csv
if (length(fit_info) > 0) {
  cat("\n=== Model Fit Details ===\n")
  for (trait in names(fit_info)) {
    info <- fit_info[[trait]]
    cat(sprintf("%s: n=%d, status=%s\n", trait, info$n_obs, info$status))
  }
}
## 
## === Model Fit Details ===
## PH: n=127, status=success
## DTA: n=127, status=success
## DTS: n=127, status=success
## GDDA: n=127, status=success
## GDDS: n=127, status=success
## STW40: n=125, status=success
## STWHV: n=125, status=success
## STW50: n=112, status=success
## STW60: n=107, status=success
## EW: n=105, status=success
## EL: n=105, status=success
## ED: n=105, status=success
## KRN: n=105, status=success
## CD: n=105, status=success
## TKW: n=105, status=success
## KW50: n=105, status=success
## TKN: n=105, status=success
## HI: n=105, status=success