This notebook fits a SpATS PSANOVA surface per
phenotype and returns spatially-corrected plot-level values for
each phenotype, including GDD-based traits (GDDA and GDDS). The approach
uses statgenHTP::fitModels() (which wraps SpATS when
provided a TimePoints object) and getCorrected() to extract
corrected plot-level observations. The workflow:
plot_data (plot-level means) and merge GDD
values.TimePoints object for
statgenHTP.fitModels() → getCorrected().sp_corrected.Warning: fitting many SpATS models can be computationally intensive. Run interactively on a machine with sufficient RAM/CPU.
library(tidyverse)
library(dplyr)
library(lubridate)
library(ggplot2)
library(ggtext)
library(ggfx)
library(nlme)
library(gstat)
library(rstatix)
library(ggpubr)
library(VIM)
library(knitr)
library(SpATS)
library(statgenHTP)
library(ggbeeswarm)
library(cowplot)
library(emmeans)
environment_name <- "PSU2022"
planting_date <- mdy("5/26/2022")
gdd_lookup_file <- file.path(paths$intermediate, "gdd_lookup_table.csv")
gdd_at_phenology <- read_csv(gdd_lookup_file) %>%
filter(environment == environment_name)
cat("Loaded GDD lookup for", environment_name, "with",
nrow(gdd_at_phenology), "dates\n")
## Loaded GDD lookup for PSU2022 with 101 dates
cat("GDD range:", min(gdd_at_phenology$cumulative_gdd_from_planting), "to",
max(gdd_at_phenology$cumulative_gdd_from_planting), "°C\n")
## GDD range: 6.28 to 1203.705 °C
DATA_DIR <- paths$data
plant_csv <- file.path(DATA_DIR, "22_NCS_PSU_LANGEBIO_FIELDS_PSU_P_field.csv")
ear_csv <- file.path(DATA_DIR, "22_NCS_PSU_LANGEBIO_FIELDS_PSU_P_field_ear_pheno.csv")
field_data_raw <- read.csv(plant_csv, na.strings = c("", "#N/A", "NA"))
ear_data_raw <- read.csv(ear_csv, na.strings = c("", "n/a", "NA"), skip = 1)
field_data <- field_data_raw %>%
filter(P22. >= 3004, P22. <= 4192) %>%
rename(
rowid = P22.,
Genotype = Who.What,
PH = Height_Anthesis,
STW40 = X40_DAP_dw,
STW50 = X50_DAP_dw,
STW60 = X60_DAP_dw,
STWHV = harvest_dw
) %>%
mutate(
Treatment = factor(Treatment, levels = c("HighP", "LowP")),
Genotype = factor(Genotype),
Rep = as.factor(Rep)
) %>%
mutate(
Treatment = fct_recode(Treatment, "+P" = "HighP", "-P" = "LowP"),
Genotype = fct_recode(Genotype, "Inv4m" = "INV4M")
)
ear_data <- ear_data_raw %>%
select(-description, -RK, -CC, -NIR, ear_rep = rep) %>%
rename(rowid = row) %>%
arrange(rowid) %>%
group_by(rowid) %>%
select(-ear_rep) %>%
summarise_all(mean, na.rm = TRUE) %>%
droplevels()
field_data_complete <- field_data %>%
left_join(ear_data, by = "rowid") %>%
mutate(HI = TKW / STWHV) %>%
mutate(
Plot_Column = case_when(
Plot == "PlotVIII" ~ Plot_Column + 10,
Plot == "PlotVI" ~ Plot_Column,
TRUE ~ Plot_Column
)
) %>%
filter(!is.na(PH)) %>%
droplevels()
base_phenotypes <- c("PH", "DTA", "DTS", "STW40", "STW50", "STW60", "STWHV")
ear_phenotypes <- c("EW", "EL", "ED", "KRN", "CD", "TKW", "KW50", "TKN", "HI")
all_phenotypes <- unique(c(base_phenotypes, ear_phenotypes))
cat("Field data dimensions:", dim(field_data_complete), "\n")
## Field data dimensions: 127 39
cat("Phenotypes available:", paste(all_phenotypes, collapse = ", "), "\n")
## Phenotypes available: PH, DTA, DTS, STW40, STW50, STW60, STWHV, EW, EL, ED, KRN, CD, TKW, KW50, TKN, HI
plot_data <- field_data_complete %>%
group_by(rowid, Rep, Plot_Row, Plot_Column, Genotype, Treatment) %>%
summarise(
across(all_of(all_phenotypes), ~ mean(.x, na.rm = TRUE)),
n_plants = n(),
.groups = 'drop'
) %>%
mutate(
across(all_of(all_phenotypes), ~ ifelse(is.nan(.x), NA, .x)),
environment = environment_name,
anthesis_date = planting_date + days(round(DTA)),
silking_date = planting_date + days(round(DTS))
) %>%
left_join(
gdd_at_phenology %>%
rename(anthesis_date = date, GDDA = cumulative_gdd_from_planting),
by = c("environment", "anthesis_date")
) %>%
left_join(
gdd_at_phenology %>%
rename(silking_date = date, GDDS = cumulative_gdd_from_planting),
by = c("environment", "silking_date")
) %>%
select(-environment, -anthesis_date, -silking_date)
plot_data$Plot_Row <- as.numeric(plot_data$Plot_Row)
plot_data$Plot_Column <- as.numeric(plot_data$Plot_Column)
plot_data$Rep <- as.factor(plot_data$Rep)
cat("Plot-level summary dimensions:", dim(plot_data), "\n")
## Plot-level summary dimensions: 127 25
cat("Average plants per plot:", round(mean(plot_data$n_plants, na.rm = TRUE), 1), "\n")
## Average plants per plot: 1
cat("GDD columns added: GDDA (anthesis), GDDS (silking)\n")
## GDD columns added: GDDA (anthesis), GDDS (silking)
cat("Plots with GDDA:", sum(!is.na(plot_data$GDDA)), "\n")
## Plots with GDDA: 127
cat("Plots with GDDS:", sum(!is.na(plot_data$GDDS)), "\n")
## Plots with GDDS: 127
all_traits <- c(all_phenotypes, "GDDA", "GDDS")
missing_summary <- plot_data %>%
select(all_of(all_traits)) %>%
summarise(across(everything(), ~ sum(is.na(.)))) %>%
pivot_longer(everything(), names_to = "phenotype", values_to = "missing_count") %>%
mutate(
total_obs = nrow(plot_data),
missing_pct = round(missing_count / total_obs * 100, 1),
available_n = total_obs - missing_count
) %>%
arrange(missing_pct)
print(missing_summary)
## # A tibble: 18 × 5
## phenotype missing_count total_obs missing_pct available_n
## <chr> <int> <int> <dbl> <int>
## 1 PH 0 127 0 127
## 2 DTA 0 127 0 127
## 3 DTS 0 127 0 127
## 4 GDDA 0 127 0 127
## 5 GDDS 0 127 0 127
## 6 STW40 2 127 1.6 125
## 7 STWHV 2 127 1.6 125
## 8 STW50 15 127 11.8 112
## 9 STW60 20 127 15.7 107
## 10 EW 22 127 17.3 105
## 11 EL 22 127 17.3 105
## 12 ED 22 127 17.3 105
## 13 KRN 22 127 17.3 105
## 14 CD 22 127 17.3 105
## 15 TKW 22 127 17.3 105
## 16 KW50 22 127 17.3 105
## 17 TKN 22 127 17.3 105
## 18 HI 22 127 17.3 105
usable_phenotypes <- missing_summary %>%
filter(missing_pct < 50) %>%
pull(phenotype)
cat("Usable phenotypes (<50% missing):", paste(usable_phenotypes, collapse = ", "), "\n")
## Usable phenotypes (<50% missing): PH, DTA, DTS, GDDA, GDDS, STW40, STWHV, STW50, STW60, EW, EL, ED, KRN, CD, TKW, KW50, TKN, HI
single_time <- plot_data %>%
mutate(
Genotype = as.character(Genotype),
Treatment = as.character(Treatment),
Rep = as.character(Rep),
Plot_Row = as.numeric(Plot_Row),
Plot_Column = as.numeric(Plot_Column),
time = as.POSIXct("2022-05-22 10:00:00")
)
tp_data <- createTimePoints(
dat = single_time,
experimentName = environment_name,
genotype = "Genotype",
timePoint = "time",
plotId = "rowid",
repId = "Rep",
rowNum = "Plot_Row",
colNum = "Plot_Column",
checkGenotypes = c("NUE", "B73"),
addCheck = TRUE
)
summary(tp_data)
## tp_data contains data for experiment PSU2022.
##
## It contains 1 time points.
## First time point: 2022-05-22 10:00:00
## Last time point: 2022-05-22 10:00:00
##
## The following genotypes are defined as check genotypes: NUE, B73.
corrected_list <- list()
fit_info <- list()
all_models <- list()
for (phen in usable_phenotypes) {
message("Fitting SpATS model for: ", phen)
no_nas <- single_time %>%
filter(!is.na(.data[[phen]]))
if (nrow(no_nas) < 10) {
message("Insufficient data for ", phen, " (n=", nrow(no_nas), ")")
next
}
fit_try <- try(
fitModels(
TP = tp_data,
trait = phen,
extraFixedFactors = c("repId"), # Treatment lives in Stage-2 lm
timePoints = 1,
what = "fixed"
),
silent = TRUE
)
if (inherits(fit_try, "try-error")) {
message("Fit failed for ", phen, ": ", fit_try)
next
}
all_models[[phen]] <- fit_try
corr <- try(getCorrected(fit_try), silent = TRUE)
if (inherits(corr, "try-error") || is.null(corr)) {
message("getCorrected() failed for ", phen)
next
}
corr <- corr %>%
select(-phen)
corrected_list[[phen]] <- corr
fit_info[[phen]] <- list(n_obs = nrow(no_nas), status = "success")
}
sp_corrected <- lapply(
corrected_list,
function(corr_df) {
corr_df %>%
select(timeNumber, timePoint, genotype, repId,
rowId, colId, plotId)
}
) %>%
bind_rows() %>%
arrange(plotId) %>%
distinct()
## Treatment is no longer in SpATS extraFixedFactors (Stage-2 lm handles it)
## — re-attach it from single_time so downstream lm + emmeans has access.
sp_corrected <- sp_corrected %>%
left_join(single_time %>%
mutate(plotId = as.character(rowid)) %>%
select(plotId, Treatment) %>%
distinct(),
by = "plotId")
for (phen in names(corrected_list)) {
phen_corr <- paste0(phen, "_corr")
corr_df <- corrected_list[[phen]] %>%
select(plotId, all_of(phen_corr))
sp_corrected <- left_join(sp_corrected, corr_df, by = "plotId")
}
colnames(sp_corrected) <- gsub("_corr", "", colnames(sp_corrected))
cat("Final sp_corrected dimensions:", dim(sp_corrected), "\n")
cat("Corrected traits:",
paste(intersect(usable_phenotypes, colnames(sp_corrected)),
collapse = ", "), "\n")
print(head(sp_corrected))
if ("PH" %in% names(all_models)) {
plot(all_models$PH, timePoints = 1)
plot(all_models$PH, timePoints = 1, plotType = "spatial", spaTrend = "raw")
}
if ("HI" %in% names(all_models)) {
plot(all_models$HI, timePoints = 1)
plot(all_models$HI, timePoints = 1, plotType = "spatial", spaTrend = "raw")
}
if ("GDDA" %in% names(all_models)) {
cat("\n### Spatial plots for GDDA\n")
plot(all_models$GDDA, timePoints = 1)
plot(all_models$GDDA, timePoints = 1, plotType = "spatial", spaTrend = "raw")
}
##
## ### Spatial plots for GDDA
if ("GDDS" %in% names(all_models)) {
cat("\n### Spatial plots for GDDS\n")
plot(all_models$GDDS, timePoints = 1)
plot(all_models$GDDS, timePoints = 1, plotType = "spatial", spaTrend = "raw")
}
##
## ### Spatial plots for GDDS
NIL_corrected <- sp_corrected %>%
filter(genotype %in% c("Inv4m", "CTRL")) %>%
mutate(genotype = factor(genotype, levels = c("CTRL", "Inv4m")),
Treatment = factor(Treatment)) %>%
droplevels()
## Per-trait lm + emmeans: marginal Genotype contrast (pooled over P) and
## conditional Genotype contrasts within each P_LEVEL. G x T interaction p
## stored as diagnostic (NOT in FDR pool).
fit_one_trait <- function(trait_name, data) {
d <- data %>%
pivot_longer(cols = all_of(trait_name),
names_to = "trait", values_to = "value") %>%
filter(!is.na(value))
if (nrow(d) < 4 || length(unique(d$genotype)) < 2 ||
length(unique(d$Treatment)) < 2) return(NULL)
m <- tryCatch(lm(value ~ genotype * Treatment, data = d),
error = function(e) NULL)
if (is.null(m)) return(NULL)
anov <- anova(m)
p_int <- if ("genotype:Treatment" %in% rownames(anov))
anov["genotype:Treatment", "Pr(>F)"] else NA_real_
marg <- pairs(emmeans(m, ~ genotype), reverse = TRUE) %>%
as_tibble() %>%
transmute(trait = trait_name,
contrast_type = "marginal",
contrast_level = "marginal",
estimate, SE, df, t = t.ratio, p = p.value,
p_int_GxT = p_int)
cond <- pairs(emmeans(m, ~ genotype | Treatment), reverse = TRUE) %>%
as_tibble() %>%
transmute(trait = trait_name,
contrast_type = "conditional",
contrast_level = paste0("Genotype|", Treatment),
estimate, SE, df, t = t.ratio, p = p.value,
p_int_GxT = p_int)
bind_rows(marg, cond)
}
fitted_traits <- intersect(usable_phenotypes, names(NIL_corrected))
all_contrasts <- lapply(fitted_traits, fit_one_trait,
data = NIL_corrected) %>%
bind_rows() %>%
mutate(p_adj = p.adjust(p, method = "fdr"),
signif = case_when(
p_adj < 0.001 ~ "***",
p_adj < 0.01 ~ "**",
p_adj < 0.05 ~ "*",
TRUE ~ "ns"
))
cat("FDR pool size:", nrow(all_contrasts), "contrasts\n")
## FDR pool size: 54 contrasts
cat("Significant (FDR < 0.05):",
sum(all_contrasts$p_adj < 0.05), "rows\n\n")
## Significant (FDR < 0.05): 19 rows
knitr::kable(all_contrasts %>% arrange(p_adj) %>%
select(trait, contrast_type, contrast_level,
estimate, SE, df, t, p, p_adj, signif, p_int_GxT),
digits = 4,
caption = paste("All Stage-2 contrasts. p = nominal,",
"p_adj = BH-FDR across the pool."))
| trait | contrast_type | contrast_level | estimate | SE | df | t | p | p_adj | signif | p_int_GxT |
|---|---|---|---|---|---|---|---|---|---|---|
| PH | marginal | marginal | 6.4849 | 1.1154 | 60 | 5.8138 | 0.0000 | 0.0000 | *** | 0.4200 |
| PH | conditional | Genotype|-P | 7.3906 | 1.5775 | 60 | 4.6851 | 0.0000 | 0.0002 | *** | 0.4200 |
| DTA | marginal | marginal | -1.2923 | 0.2648 | 60 | -4.8811 | 0.0000 | 0.0002 | *** | 0.6078 |
| DTS | marginal | marginal | -1.0546 | 0.2221 | 60 | -4.7475 | 0.0000 | 0.0002 | *** | 0.1578 |
| GDDA | marginal | marginal | -18.7391 | 3.9079 | 60 | -4.7951 | 0.0000 | 0.0002 | *** | 0.4584 |
| GDDS | marginal | marginal | -14.9196 | 3.2005 | 60 | -4.6617 | 0.0000 | 0.0002 | *** | 0.0869 |
| GDDS | conditional | Genotype|+P | -20.4898 | 4.5261 | 60 | -4.5270 | 0.0000 | 0.0002 | *** | 0.0869 |
| DTS | conditional | Genotype|+P | -1.3724 | 0.3142 | 60 | -4.3685 | 0.0001 | 0.0003 | *** | 0.1578 |
| GDDA | conditional | Genotype|+P | -21.6557 | 5.5267 | 60 | -3.9184 | 0.0002 | 0.0014 | ** | 0.4584 |
| DTA | conditional | Genotype|+P | -1.4290 | 0.3744 | 60 | -3.8163 | 0.0003 | 0.0017 | ** | 0.6078 |
| PH | conditional | Genotype|+P | 5.5792 | 1.5775 | 60 | 3.5368 | 0.0008 | 0.0039 | ** | 0.4200 |
| CD | conditional | Genotype|-P | -2.2160 | 0.6250 | 49 | -3.5454 | 0.0009 | 0.0039 | ** | 0.0156 |
| DTA | conditional | Genotype|-P | -1.1557 | 0.3744 | 60 | -3.0866 | 0.0031 | 0.0127 | * | 0.6078 |
| STW40 | conditional | Genotype|+P | -3.1330 | 1.0726 | 59 | -2.9209 | 0.0049 | 0.0190 | * | 0.0455 |
| GDDA | conditional | Genotype|-P | -15.8226 | 5.5267 | 60 | -2.8630 | 0.0058 | 0.0208 | * | 0.4584 |
| STW50 | conditional | Genotype|+P | -4.9892 | 1.8034 | 53 | -2.7666 | 0.0078 | 0.0263 | * | 0.0330 |
| HI | marginal | marginal | 0.1151 | 0.0422 | 49 | 2.7241 | 0.0089 | 0.0283 | * | 0.5071 |
| STWHV | conditional | Genotype|+P | -13.3954 | 5.0233 | 58 | -2.6667 | 0.0099 | 0.0297 | * | 0.1784 |
| STWHV | marginal | marginal | -8.6387 | 3.4923 | 58 | -2.4737 | 0.0163 | 0.0464 | * | 0.1784 |
| DTS | conditional | Genotype|-P | -0.7368 | 0.3142 | 60 | -2.3454 | 0.0223 | 0.0585 | ns | 0.1578 |
| CD | marginal | marginal | -1.0727 | 0.4562 | 49 | -2.3514 | 0.0228 | 0.0585 | ns | 0.0156 |
| HI | conditional | Genotype|+P | 0.1433 | 0.0616 | 49 | 2.3285 | 0.0241 | 0.0590 | ns | 0.5071 |
| ED | conditional | Genotype|-P | -1.9903 | 0.9195 | 49 | -2.1645 | 0.0353 | 0.0771 | ns | 0.2100 |
| KRN | conditional | Genotype|-P | -0.6956 | 0.3208 | 49 | -2.1681 | 0.0350 | 0.0771 | ns | 0.3742 |
| TKW | marginal | marginal | 8.0148 | 3.7108 | 49 | 2.1599 | 0.0357 | 0.0771 | ns | 0.5907 |
| STW40 | marginal | marginal | -1.5960 | 0.7523 | 59 | -2.1215 | 0.0381 | 0.0791 | ns | 0.0455 |
| GDDS | conditional | Genotype|-P | -9.3495 | 4.5261 | 60 | -2.0657 | 0.0432 | 0.0836 | ns | 0.0869 |
| KRN | marginal | marginal | -0.4856 | 0.2341 | 49 | -2.0740 | 0.0434 | 0.0836 | ns | 0.3742 |
| EL | conditional | Genotype|+P | 0.8117 | 0.3980 | 49 | 2.0395 | 0.0468 | 0.0872 | ns | 0.1108 |
| EW | marginal | marginal | 8.6107 | 4.4427 | 49 | 1.9382 | 0.0584 | 0.1051 | ns | 0.7230 |
| STW50 | marginal | marginal | -2.3120 | 1.2230 | 53 | -1.8905 | 0.0642 | 0.1118 | ns | 0.0330 |
| TKW | conditional | Genotype|+P | 10.0238 | 5.4063 | 49 | 1.8541 | 0.0697 | 0.1177 | ns | 0.5907 |
| ED | marginal | marginal | -1.1378 | 0.6711 | 49 | -1.6955 | 0.0963 | 0.1576 | ns | 0.2100 |
| EW | conditional | Genotype|+P | 10.1946 | 6.4726 | 49 | 1.5751 | 0.1217 | 0.1933 | ns | 0.7230 |
| STW60 | conditional | Genotype|-P | 7.1062 | 4.6379 | 50 | 1.5322 | 0.1318 | 0.2033 | ns | 0.1120 |
| HI | conditional | Genotype|-P | 0.0869 | 0.0579 | 49 | 1.5004 | 0.1399 | 0.2099 | ns | 0.5071 |
| EL | marginal | marginal | 0.3680 | 0.2732 | 49 | 1.3472 | 0.1841 | 0.2687 | ns | 0.1108 |
| TKN | marginal | marginal | 33.9659 | 26.6347 | 49 | 1.2753 | 0.2082 | 0.2959 | ns | 0.9310 |
| TKW | conditional | Genotype|-P | 6.0059 | 5.0845 | 49 | 1.1812 | 0.2432 | 0.3368 | ns | 0.5907 |
| EW | conditional | Genotype|-P | 7.0268 | 6.0874 | 49 | 1.1543 | 0.2540 | 0.3428 | ns | 0.7230 |
| TKN | conditional | Genotype|+P | 36.2834 | 38.8041 | 49 | 0.9350 | 0.3544 | 0.4667 | ns | 0.9310 |
| TKN | conditional | Genotype|-P | 31.6483 | 36.4947 | 49 | 0.8672 | 0.3901 | 0.5015 | ns | 0.9310 |
| STWHV | conditional | Genotype|-P | -3.8821 | 4.8529 | 58 | -0.7999 | 0.4270 | 0.5241 | ns | 0.1784 |
| KRN | conditional | Genotype|+P | -0.2756 | 0.3411 | 49 | -0.8081 | 0.4230 | 0.5241 | ns | 0.3742 |
| STW60 | conditional | Genotype|+P | -3.8955 | 4.9724 | 50 | -0.7834 | 0.4371 | 0.5245 | ns | 0.1120 |
| STW60 | marginal | marginal | 1.6054 | 3.3998 | 50 | 0.4722 | 0.6388 | 0.7340 | ns | 0.1120 |
| KW50 | conditional | Genotype|+P | 0.2554 | 0.5378 | 49 | 0.4749 | 0.6370 | 0.7340 | ns | 0.7560 |
| KW50 | marginal | marginal | 0.1400 | 0.3691 | 49 | 0.3793 | 0.7061 | 0.7943 | ns | 0.7560 |
| ED | conditional | Genotype|+P | -0.2853 | 0.9777 | 49 | -0.2918 | 0.7716 | 0.8504 | ns | 0.2100 |
| STW50 | conditional | Genotype|-P | 0.3652 | 1.6524 | 53 | 0.2210 | 0.8259 | 0.8901 | ns | 0.0330 |
| EL | conditional | Genotype|-P | -0.0756 | 0.3743 | 49 | -0.2021 | 0.8407 | 0.8901 | ns | 0.1108 |
| CD | conditional | Genotype|+P | 0.0707 | 0.6646 | 49 | 0.1064 | 0.9157 | 0.9509 | ns | 0.0156 |
| STW40 | conditional | Genotype|-P | -0.0591 | 1.0552 | 59 | -0.0560 | 0.9555 | 0.9613 | ns | 0.0455 |
| KW50 | conditional | Genotype|-P | 0.0247 | 0.5058 | 49 | 0.0488 | 0.9613 | 0.9613 | ns | 0.7560 |
## y.position per trait for downstream stat_pvalue_manual
y_positions <- NIL_corrected %>%
pivot_longer(cols = any_of(fitted_traits),
names_to = "trait", values_to = "value") %>%
filter(!is.na(value)) %>%
group_by(trait) %>%
summarise(y.position = max(value, na.rm = TRUE) +
0.10 * diff(range(value, na.rm = TRUE)),
.groups = "drop")
## contrast_df = marginal row per trait; drives the panel star
contrast_df <- all_contrasts %>%
filter(contrast_type == "marginal") %>%
left_join(y_positions, by = "trait") %>%
mutate(group1 = "CTRL", group2 = "Inv4m",
p.adj = p_adj,
p.adj.signif = signif)
## Back-compat alias: downstream boxplot code references `htest` with rstatix
## column names (group1/group2/y.position/p.adj.signif).
htest <- contrast_df %>%
select(trait, group1, group2, y.position, p.adj, p.adj.signif,
signif, p_int_GxT,
effect = estimate, se_diff = SE, t, p, df)
alpha <- 0.05
sig_traits <- contrast_df %>% filter(p_adj < alpha) %>%
arrange(p_adj) %>% pull(trait)
cat("Significant traits — marginal Inv4m - CTRL (FDR < 0.05):",
if (length(sig_traits)) paste(sig_traits, collapse = ", ") else "none",
"\n")
## Significant traits — marginal Inv4m - CTRL (FDR < 0.05): PH, DTA, DTS, GDDA, GDDS, HI, STWHV
pheno_theme2 <- theme_classic2(base_size = 20) +
theme(
plot.title = element_markdown(hjust = 0.5, face = "bold"),
axis.title.y = element_markdown(),
axis.title.x = element_blank(),
axis.text.x = element_text(size = 25, face = "bold", color = "black"),
strip.background = element_blank(),
strip.text = element_markdown(size = 20),
legend.position = "none"
)
if (length(sig_traits) == 0) {
warning("No traits significant after FDR correction.")
} else {
# --- Main Figure 2 Panel A: 6 traits with units ---
main_traits <- c("PH", "DTA", "DTS", "HI", "STWHV", "CD")
trait_units <- c(PH = "cm", DTA = "Days", DTS = "Days", HI = "", STWHV = "g", CD = "mm")
plot_data_main <- NIL_corrected %>%
pivot_longer(
cols = all_of(usable_phenotypes),
names_to = "trait",
values_to = "value"
) %>%
filter(trait %in% main_traits, !is.na(value)) %>%
mutate(
trait = factor(trait, levels = main_traits),
genotype = factor(genotype, levels = c("CTRL", "Inv4m"))
)
test_main <- htest %>%
filter(trait %in% main_traits)
# Theme with bold strip text, no title, rotated x-axis with ggtext
pheno_theme_panel <- theme_classic2(base_size = 20) +
theme(
plot.title = element_blank(),
axis.title.y = element_text(size = 18),
axis.title.x = element_blank(),
axis.text.x = element_markdown(size = 18, face = "bold", color = "black"),
axis.text.y = element_text(size = 16),
strip.background = element_blank(),
strip.text = element_text(size = 20, face = "bold"),
legend.position = "none"
)
# Create individual plots with custom y-axis labels
make_trait_plot <- function(trait_name, data, tests, units) {
d <- filter(data, trait == trait_name)
t <- filter(tests, trait == trait_name)
y_label <- units[trait_name]
p <- ggplot(d, aes(x = genotype, y = value, color = genotype)) +
geom_boxplot(width = 0.25, linewidth = 1.5, alpha = 0) %>%
with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
geom_quasirandom(size = 2) %>%
with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
scale_color_manual(values = c("CTRL" = "gold", "Inv4m" = "purple4")) +
scale_x_discrete(labels = c("CTRL" = "CTRL", "Inv4m" = "*Inv4m*")) +
labs(title = trait_name, y = y_label, x = NULL) +
stat_pvalue_manual(t, tip.length = 0.01, step.height = 0.08, size = 8, bracket.size = 0.8) +
scale_y_continuous(expand = expansion(mult = c(0.1, 0.15))) +
pheno_theme_panel +
theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5))
# Custom 1-day breaks for DTA/DTS
if (trait_name %in% c("DTA", "DTS")) {
y_range <- range(d$value, na.rm = TRUE)
p <- p + scale_y_continuous(
breaks = seq(floor(y_range[1]), ceiling(y_range[2]), by = 1),
expand = expansion(mult = c(0.1, 0.15))
)
}
return(p)
}
trait_plots <- lapply(main_traits, make_trait_plot, data = plot_data_main, tests = test_main, units = trait_units)
main_panel <- cowplot::plot_grid(
plotlist = trait_plots,
nrow = 2,
ncol = 3,
align = "hv",
axis = "tblr"
)
print(main_panel)
# Save main panel for Figure 2 (2 rows x 3 cols)
ggsave(
file.path(paths$figures, "field_phenotypes_panel.pdf"),
main_panel,
width = 10, height = 7, dpi = 300
)
ggsave(
file.path(paths$figures, "field_phenotypes_panel.png"),
main_panel,
width = 10, height = 7, dpi = 300
)
cat("Saved field phenotypes panel to:", file.path(paths$figures, "field_phenotypes_panel.png"), "\n")
# --- Supplementary: GDDA, GDDS panel ---
gdd_traits <- c("GDDA", "GDDS")
gdd_traits_present <- intersect(gdd_traits, sig_traits)
if (length(gdd_traits_present) > 0) {
plot_data_gdd <- NIL_corrected %>%
pivot_longer(cols = all_of(usable_phenotypes), names_to = "trait", values_to = "value") %>%
filter(trait %in% gdd_traits_present, !is.na(value)) %>%
mutate(trait = factor(trait, levels = gdd_traits_present))
test_gdd <- htest %>% filter(trait %in% gdd_traits_present)
gdd_plot <- ggplot(plot_data_gdd, aes(x = genotype, y = value, color = genotype)) +
geom_boxplot(width = 0.25, linewidth = 1.5, alpha = 0) %>%
with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
geom_quasirandom(size = 2) %>%
with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
scale_color_manual(values = c("CTRL" = "gold", "Inv4m" = "purple4")) +
labs(y = "GDD (°C·days)", x = NULL) +
stat_pvalue_manual(test_gdd, tip.length = 0.01, step.height = 0.08, size = 8, bracket.size = 0.8) +
scale_y_continuous(expand = expansion(mult = c(0.1, 0.15))) +
facet_wrap(~ trait, scales = "free_y", ncol = 2) +
pheno_theme_panel
print(gdd_plot)
ggsave(file.path(paths$figures, "gdd_phenotypes_panel.pdf"), gdd_plot, width = 6, height = 4, dpi = 300)
ggsave(file.path(paths$figures, "gdd_phenotypes_panel.png"), gdd_plot, width = 6, height = 4, dpi = 300)
cat("Saved GDD phenotypes panel to:", file.path(paths$figures, "gdd_phenotypes_panel.png"), "\n")
}
# --- All significant traits plot (for reference) ---
plot_data_sig <- NIL_corrected %>%
pivot_longer(cols = all_of(usable_phenotypes), names_to = "trait", values_to = "value") %>%
filter(trait %in% sig_traits, !is.na(value)) %>%
mutate(trait = as.character(trait))
test_to_plot <- htest %>% filter(trait %in% sig_traits)
diff_plot <- ggplot(plot_data_sig, aes(x = genotype, y = value, color = genotype)) +
geom_boxplot(width = 0.25, linewidth = 1.5, alpha = 0) %>%
with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
geom_quasirandom(size = 2) %>%
with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
scale_color_manual(values = c("CTRL" = "gold", "Inv4m" = "purple4")) +
labs(title = "Inv4m vs CTRL: Spatially Corrected Phenotypes (FDR-Adjusted)", y = "Corrected Value", x = "Genotype") +
stat_pvalue_manual(test_to_plot, tip.length = 0.01, step.height = 0.08, size = 10, bracket.size = 0.8) +
scale_y_continuous(expand = expansion(mult = c(0.1, 0.15))) +
facet_wrap(~ factor(trait, levels = sig_traits), scales = "free_y", ncol = 4) +
pheno_theme_panel
print(diff_plot)
}
## Saved field phenotypes panel to: /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/figures/field_phenotypes_panel.png
## Saved GDD phenotypes panel to: /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/figures/gdd_phenotypes_panel.png
write_csv(contrast_df %>% select(-y.position, -group1, -group2, -p.adj,
-p.adj.signif),
file.path(paths$intermediate,
paste0(environment_name,
"_focal_contrast_marginal.csv")))
write_csv(all_contrasts,
file.path(paths$intermediate,
paste0(environment_name,
"_focal_contrast_all.csv")))
cat("Outputs:\n ",
file.path(paths$intermediate,
paste0(environment_name, "_focal_contrast_marginal.csv")),
"\n ",
file.path(paths$intermediate,
paste0(environment_name, "_focal_contrast_all.csv")), "\n")
## Outputs:
## /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/intermediate/PSU2022_focal_contrast_marginal.csv
## /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/intermediate/PSU2022_focal_contrast_all.csv
cat("\n=== Analysis Summary ===\n")
##
## === Analysis Summary ===
cat("Total phenotypes evaluated:", length(all_traits), "\n")
## Total phenotypes evaluated: 18
cat("Usable phenotypes (<50% missing):", length(usable_phenotypes), "\n")
## Usable phenotypes (<50% missing): 18
cat("Successfully fitted models:", length(all_models), "\n")
## Successfully fitted models: 18
if (exists("sp_corrected") && nrow(sp_corrected) > 0) {
cat("Plots with corrected data:", nrow(sp_corrected), "\n")
output_file <- file.path(paths$intermediate, paste0(environment_name, "_spatially_corrected_phenotypes.csv"))
write_csv(sp_corrected, output_file)
cat("Spatially corrected data exported to:", output_file, "\n")
} else {
cat("No corrected data to export\n")
}
## Plots with corrected data: 127
## Spatially corrected data exported to: /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/intermediate/PSU2022_spatially_corrected_phenotypes.csv
if (length(fit_info) > 0) {
cat("\n=== Model Fit Details ===\n")
for (trait in names(fit_info)) {
info <- fit_info[[trait]]
cat(sprintf("%s: n=%d, status=%s\n", trait, info$n_obs, info$status))
}
}
##
## === Model Fit Details ===
## PH: n=127, status=success
## DTA: n=127, status=success
## DTS: n=127, status=success
## GDDA: n=127, status=success
## GDDS: n=127, status=success
## STW40: n=125, status=success
## STWHV: n=125, status=success
## STW50: n=112, status=success
## STW60: n=107, status=success
## EW: n=105, status=success
## EL: n=105, status=success
## ED: n=105, status=success
## KRN: n=105, status=success
## CD: n=105, status=success
## TKW: n=105, status=success
## KW50: n=105, status=success
## TKN: n=105, status=success
## HI: n=105, status=success