Purpose: Characterize the greenyellow WGCNA module, which contains the plant height GWAS candidate sec6 and the cell proliferation marker pcna2. This module has the highest bootstrap support (LFP = 0.80) among all WGCNA modules.
Key findings:
suppressPackageStartupMessages({
library(tidyverse)
library(here)
library(knitr)
})
# Load project paths
source(here("scripts", "utils", "setup_paths.R"))
paths <- setup_project_paths("inversion_paper")
# Field perturbation run directory
run_dir <- here("results/inversion_paper/field_perturbation/run_20251231_201332")
# Input files
CONSENSUS_FILE <- file.path(run_dir, "04_consensus_networks/ctrl/consensus_modules.csv")
GO_BP_FILE <- file.path(run_dir, "07_module_annotation/all_modules_GO_BP.csv")
LABELS_FILE <- here("data", "locus_labels_master.csv")
SUPPORT_FILE <- file.path(run_dir, "05_bootstrap_support/ctrl/module_support.csv")
# Verify files exist
stopifnot(file.exists(CONSENSUS_FILE))
stopifnot(file.exists(GO_BP_FILE))
stopifnot(file.exists(LABELS_FILE))
stopifnot(file.exists(SUPPORT_FILE))
cat("All input files found.\n")
## All input files found.
# Load CTRL consensus module assignments
ctrl_modules <- read_csv(CONSENSUS_FILE, show_col_types = FALSE)
# Load GO BP enrichment results
go_bp <- read_csv(GO_BP_FILE, show_col_types = FALSE)
# Load gene annotations with expression data
labels <- read_csv(LABELS_FILE, show_col_types = FALSE)
# Load module support statistics
support <- read_csv(SUPPORT_FILE, show_col_types = FALSE)
cat("Loaded", nrow(ctrl_modules), "genes with module assignments\n")
## Loaded 465 genes with module assignments
cat("Loaded", nrow(go_bp), "GO BP enrichment terms\n")
## Loaded 343 GO BP enrichment terms
cat("Loaded", nrow(labels), "gene annotations\n")
## Loaded 471 gene annotations
The greenyellow module has the highest bootstrap support among all modules.
# Extract greenyellow support stats
gy_support <- support %>%
filter(color == "greenyellow")
cat("Greenyellow module bootstrap support:\n")
## Greenyellow module bootstrap support:
cat(" Mean LFP:", round(gy_support$mean_lfp, 3), "\n")
## Mean LFP: 0.798
cat(" Stability:", gy_support$stability, "\n")
## Stability: Stable
cat(" N genes:", gy_support$n_genes, "\n")
## N genes: 16
# Compare to other modules
cat("\nTop 5 modules by LFP:\n")
##
## Top 5 modules by LFP:
support %>%
arrange(desc(mean_lfp)) %>%
head(5) %>%
select(color, mean_lfp, n_genes, stability) %>%
kable()
| color | mean_lfp | n_genes | stability |
|---|---|---|---|
| greenyellow | 0.7975625 | 16 | Stable |
| blue | 0.7945882 | 51 | Stable |
| purple | 0.7404091 | 22 | Stable |
| pink | 0.7151333 | 30 | Stable |
| magenta | 0.7127917 | 24 | Stable |
The greenyellow module contains sec6 (plant height GWAS candidate) and pcna2 (proliferating cell nuclear antigen), both upregulated in Inv4m plants.
# Extract greenyellow module genes
gy_genes <- ctrl_modules %>%
filter(color == "greenyellow") %>%
pull(gene)
cat("Greenyellow module contains", length(gy_genes), "genes\n")
## Greenyellow module contains 16 genes
# Verify key genes are present
key_genes <- c(
"Zm00001eb194380", # sec6
"Zm00001eb192050", # pcna2
"Zm00001eb188240" # ropgef14
)
cat("\nsec6 in greenyellow module:", key_genes[1] %in% gy_genes, "\n")
##
## sec6 in greenyellow module: TRUE
cat("pcna2 in greenyellow module:", key_genes[2] %in% gy_genes, "\n")
## pcna2 in greenyellow module: TRUE
cat("ropgef14 in greenyellow module:", key_genes[3] %in% gy_genes, "\n")
## ropgef14 in greenyellow module: TRUE
Table sorted by statistical significance (FDR, ascending = most significant first).
# Join greenyellow genes with annotations
gy_annotated <- ctrl_modules %>%
filter(color == "greenyellow") %>%
left_join(labels, by = "gene") %>%
mutate(
# Use locus_label if available, else locus_symbol
symbol = case_when(
!is.na(locus_label) & locus_label != "" ~ locus_label,
!is.na(locus_symbol) & locus_symbol != "" ~ locus_symbol,
TRUE ~ "---"
),
# Clean up description - use desc_merged or desc_pannzer
description = case_when(
!is.na(desc_merged) & desc_merged != "" ~ desc_merged,
!is.na(desc_pannzer) & desc_pannzer != "" ~ desc_pannzer,
TRUE ~ "---"
),
# Format log2FC and FDR
log2FC = round(logFC, 2),
FDR_val = adj.P.Val
) %>%
# Sort by FDR (most significant first)
arrange(FDR_val) %>%
select(gene, symbol, description, log2FC, FDR_val)
# Display count
cat("Annotated", nrow(gy_annotated), "greenyellow module genes\n")
## Annotated 16 greenyellow module genes
# Create formatted table
gy_table <- gy_annotated %>%
mutate(
FDR = formatC(FDR_val, format = "e", digits = 1),
# Truncate long descriptions
description = str_trunc(description, 50)
) %>%
select(gene, symbol, description, log2FC, FDR)
# Display with kable
kable(
gy_table,
col.names = c("Gene ID", "Label", "Description", "log2FC", "FDR"),
caption = "Greenyellow module DEGs sorted by statistical significance (FDR)"
)
| Gene ID | Label | Description | log2FC | FDR |
|---|---|---|---|---|
| Zm00001eb194380 | sec6 | Exocyst complex component Sec6 | 2.87 | 2.9e-17 |
| Zm00001eb188240 | ropgef14 | Rop guanine nucleotide exchange factor 14 | -2.98 | 2.6e-14 |
| Zm00001eb112470 | engd2 | — | 6.14 | 4.9e-13 |
| Zm00001eb191260 | pbl19 | probable serine/threonine-protein kinase PBL19 | -1.39 | 3.5e-09 |
| Zm00001eb195290 | pap1 | PAP-associated domain-containing protein | 1.45 | 4.7e-08 |
| Zm00001eb386670 | rglg2 | E3 ubiquitin-protein ligase RGLG2 | -1.01 | 5.2e-04 |
| Zm00001eb187450 | mybr58 | HTH myb-type domain-containing protein | 1.74 | 3.1e-03 |
| Zm00001eb192050 | pcna2 | Proliferating cell nuclear antigen | 2.99 | 9.3e-03 |
| Zm00001eb290800 | pk8 | Protein kinase domain-containing protein | -0.37 | 2.5e-02 |
| Zm00001eb392440 | iqm1 | IQ calmodulin-binding motif family protein, exp… | -0.85 | 2.6e-02 |
| Zm00001eb080160 | hipp16 | Heavy metal-associated isoprenylated plant prot… | -0.99 | 3.4e-02 |
| Zm00001eb404820 | esk1 | Protein ESKIMO like | -0.65 | 3.4e-02 |
| Zm00001eb064190 | parg | Poly(ADP-ribose) glycohydrolase | -0.29 | 4.2e-02 |
| Zm00001eb190580 | pla2 | probable phospholipase A2 homolog 1 | -0.69 | 4.8e-02 |
| Zm00001eb375460 | pk3 | Protein kinase domain-containing protein | -0.74 | 4.8e-02 |
| Zm00001eb279370 | phi1b | Phi-1-like phosphate-induced protein | 0.72 | 5.0e-02 |
# Filter GO BP results for greenyellow module
gy_go <- go_bp %>%
filter(module_color == "greenyellow") %>%
arrange(pvalue) %>%
select(ID, Description, GeneRatio, pvalue, Count, geneID) %>%
mutate(
pvalue_fmt = formatC(pvalue, format = "e", digits = 2),
Description = str_trunc(Description, 60)
)
cat("Greenyellow module has", nrow(gy_go), "enriched GO BP terms\n")
## Greenyellow module has 54 enriched GO BP terms
# Display top GO terms
kable(
gy_go %>%
head(20) %>%
select(ID, Description, GeneRatio, pvalue_fmt, Count),
col.names = c("GO ID", "Description", "Gene Ratio", "p-value", "Count"),
caption = "Top 20 GO Biological Process terms for greenyellow module"
)
| GO ID | Description | Gene Ratio | p-value | Count |
|---|---|---|---|---|
| GO:0009856 | pollination | 3/9 | 3.66e-05 | 3 |
| GO:0044706 | multi-multicellular organism process | 3/9 | 3.66e-05 | 3 |
| GO:0006282 | regulation of DNA repair | 2/9 | 7.95e-05 | 2 |
| GO:0009860 | pollen tube growth | 2/9 | 4.19e-04 | 2 |
| GO:0009932 | cell tip growth | 2/9 | 5.96e-04 | 2 |
| GO:0080135 | regulation of cellular response to stress | 2/9 | 5.96e-04 | 2 |
| GO:0048588 | developmental cell growth | 2/9 | 8.45e-04 | 2 |
| GO:0048868 | pollen tube development | 2/9 | 9.52e-04 | 2 |
| GO:0051052 | regulation of DNA metabolic process | 2/9 | 1.14e-03 | 2 |
| GO:0046903 | secretion | 2/9 | 1.39e-03 | 2 |
| GO:0048468 | cell development | 2/9 | 1.69e-03 | 2 |
| GO:0009826 | unidimensional cell growth | 2/9 | 4.00e-03 | 2 |
| GO:0060560 | developmental growth involved in morphogenesis | 2/9 | 4.97e-03 | 2 |
| GO:0045739 | positive regulation of DNA repair | 1/9 | 5.47e-03 | 1 |
| GO:0060321 | acceptance of pollen | 1/9 | 5.47e-03 | 1 |
| GO:0070206 | protein trimerization | 1/9 | 6.56e-03 | 1 |
| GO:0015908 | fatty acid transport | 1/9 | 7.10e-03 | 1 |
| GO:0000902 | cell morphogenesis | 2/9 | 8.04e-03 | 2 |
| GO:0016049 | cell growth | 2/9 | 8.35e-03 | 2 |
| GO:0031503 | protein-containing complex localization | 1/9 | 8.73e-03 | 1 |
# Manually categorize key genes
categories <- tribble(
~category, ~genes, ~description,
"Cell proliferation", "pcna2", "DNA replication licensing - cell cycle S phase",
"Exocytosis/Cytokinesis", "sec6", "Exocyst complex - vesicle trafficking, cell plate formation",
"Cell polarity/Growth", "ropgef14, pla2", "Rho GTPase signaling - polarized cell growth",
"DNA repair", "parg, pcna2", "Poly(ADP-ribose) metabolism, DNA damage response",
"Transcription factor", "mybr58", "MYB-related TF - potentially growth-related targets",
"Signaling kinases", "pbl19, pk3, pk8", "Protein kinases - signal transduction"
)
kable(
categories,
col.names = c("Category", "Genes", "Biological Role"),
caption = "Functional categories in the greenyellow module"
)
| Category | Genes | Biological Role |
|---|---|---|
| Cell proliferation | pcna2 | DNA replication licensing - cell cycle S phase |
| Exocytosis/Cytokinesis | sec6 | Exocyst complex - vesicle trafficking, cell plate formation |
| Cell polarity/Growth | ropgef14, pla2 | Rho GTPase signaling - polarized cell growth |
| DNA repair | parg, pcna2 | Poly(ADP-ribose) metabolism, DNA damage response |
| Transcription factor | mybr58 | MYB-related TF - potentially growth-related targets |
| Signaling kinases | pbl19, pk3, pk8 | Protein kinases - signal transduction |
The greenyellow module represents a coherent cell growth/division network with the highest bootstrap support (LFP = 0.80) among all WGCNA modules:
The GO enrichment confirms:
The presence of both sec6 (GWAS plant height candidate) and pcna2 (cell proliferation marker) in this highly stable module supports the hypothesis that Inv4m introgression perturbs a coordinated cell growth/division program, consistent with the altered SAM morphology and plant height phenotypes.
# Save annotated table (CSV)
write_csv(
gy_annotated %>%
mutate(FDR = formatC(FDR_val, format = "e", digits = 2)) %>%
select(gene, symbol, description, log2FC, FDR),
file.path(paths$intermediate, "greenyellow_module_annotated_DEGs.csv")
)
# Save GO enrichment
write_csv(
gy_go %>% select(-pvalue_fmt),
file.path(paths$intermediate, "greenyellow_module_GO_BP.csv")
)
cat("\nSaved outputs:\n")
##
## Saved outputs:
cat(" - greenyellow_module_annotated_DEGs.csv\n")
## - greenyellow_module_annotated_DEGs.csv
cat(" - greenyellow_module_GO_BP.csv\n")
## - greenyellow_module_GO_BP.csv
# Generate LaTeX table for main.tex
latex_header <- '\\begin{table}[!ht]
\\caption{\\textbf{Greenyellow module DEGs containing growth and cell division genes.} Genes assigned to the greenyellow WGCNA consensus module, which has the highest bootstrap support (LFP = 0.80) among all modules. Contains the plant height GWAS candidate \\textit{sec6} and the cell proliferation marker \\textit{pcna2}. Sorted by statistical significance (FDR). GO enrichment includes ``developmental cell growth\'\' ($p = 8.5 \\times 10^{-4}$) and ``regulation of DNA replication\'\' ($p = 0.019$).}
\\label{tab::greenyellow_module}
\\centering
\\small
\\begin{tabular}{@{}lllrr@{}}
\\toprule
\\textbf{Gene ID} & \\textbf{Label} & \\textbf{Description} & \\textbf{log$_2$FC} & \\textbf{FDR} \\\\
\\midrule'
latex_footer <- '\\bottomrule
\\end{tabular}
\\end{table}'
# Format each row
format_latex_row <- function(gene, symbol, desc, log2fc, fdr) {
# Format label
label <- if (is.na(symbol) || symbol == "" || symbol == "---") {
"---"
} else {
paste0("\\textit{", symbol, "}")
}
# Format description
desc_clean <- if (is.na(desc) || desc == "" || desc == "---") {
"---"
} else {
str_trunc(desc, 40) %>%
str_replace_all("_", "\\\\_") %>%
str_replace_all("&", "\\\\&") %>%
str_replace_all("%", "\\\\%")
}
# Format log2FC with sign
log2fc_fmt <- if (log2fc >= 0) {
paste0("$+", sprintf("%.2f", log2fc), "$")
} else {
paste0("$", sprintf("%.2f", log2fc), "$")
}
# Format FDR in scientific notation
fdr_exp <- floor(log10(fdr))
fdr_mant <- fdr / 10^fdr_exp
fdr_fmt <- paste0("$", sprintf("%.1f", fdr_mant), " \\times 10^{", fdr_exp, "}$")
paste(gene, label, desc_clean, log2fc_fmt, fdr_fmt, sep = " & ", collapse = "") %>%
paste0(" \\\\")
}
# Generate all rows
latex_rows <- gy_annotated %>%
rowwise() %>%
mutate(
latex_row = format_latex_row(gene, symbol, description, log2FC, FDR_val)
) %>%
pull(latex_row)
# Combine into full table
latex_table <- paste(
latex_header,
paste(latex_rows, collapse = "\n"),
latex_footer,
sep = "\n"
)
# Save to tables folder
writeLines(latex_table, file.path(paths$tables, "greenyellow_module_DEGs.tex"))
cat("\nLaTeX table saved:\n")
##
## LaTeX table saved:
cat(" - tables/greenyellow_module_DEGs.tex\n")
## - tables/greenyellow_module_DEGs.tex
sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.3.1
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] knitr_1.51 here_1.0.2 lubridate_1.9.4 forcats_1.0.1
## [5] stringr_1.6.0 dplyr_1.1.4 purrr_1.2.0 readr_2.1.6
## [9] tidyr_1.3.2 tibble_3.3.0 ggplot2_4.0.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] bit_4.6.0 gtable_0.3.6 jsonlite_2.0.0 crayon_1.5.3
## [5] compiler_4.5.2 tidyselect_1.2.1 parallel_4.5.2 dichromat_2.0-0.1
## [9] jquerylib_0.1.4 scales_1.4.0 yaml_2.3.12 fastmap_1.2.0
## [13] R6_2.6.1 generics_0.1.4 rprojroot_2.1.1 tzdb_0.5.0
## [17] bslib_0.9.0 pillar_1.11.1 RColorBrewer_1.1-3 rlang_1.1.6
## [21] stringi_1.8.7 cachem_1.1.0 xfun_0.55 sass_0.4.10
## [25] S7_0.2.1 bit64_4.6.0-1 otel_0.2.0 timechange_0.3.0
## [29] cli_3.6.5 withr_3.0.2 magrittr_2.0.4 digest_0.6.39
## [33] grid_4.5.2 vroom_1.6.7 hms_1.1.4 lifecycle_1.0.4
## [37] vctrs_0.6.5 evaluate_1.0.5 glue_1.8.0 farver_2.1.2
## [41] rmarkdown_2.30 tools_4.5.2 pkgconfig_2.0.3 htmltools_0.5.9