Overview

Purpose: Characterize the greenyellow WGCNA module, which contains the plant height GWAS candidate sec6 and the cell proliferation marker pcna2. This module has the highest bootstrap support (LFP = 0.80) among all WGCNA modules.

Key findings:

  • The greenyellow module contains 16 DEGs including sec6 and pcna2
  • GO enrichment reveals “developmental cell growth” (p = 8.5e-4) driven by ropgef14/pla2
  • Module includes DNA replication (pcna2), exocytosis (sec6), and cell polarity genes
  • Supports the hypothesis that Inv4m perturbs a growth/cell division network

Setup

suppressPackageStartupMessages({
  library(tidyverse)
  library(here)
  library(knitr)
})

# Load project paths
source(here("scripts", "utils", "setup_paths.R"))
paths <- setup_project_paths("inversion_paper")
# Field perturbation run directory
run_dir <- here("results/inversion_paper/field_perturbation/run_20251231_201332")

# Input files
CONSENSUS_FILE <- file.path(run_dir, "04_consensus_networks/ctrl/consensus_modules.csv")
GO_BP_FILE <- file.path(run_dir, "07_module_annotation/all_modules_GO_BP.csv")
LABELS_FILE <- here("data", "locus_labels_master.csv")
SUPPORT_FILE <- file.path(run_dir, "05_bootstrap_support/ctrl/module_support.csv")

# Verify files exist
stopifnot(file.exists(CONSENSUS_FILE))
stopifnot(file.exists(GO_BP_FILE))
stopifnot(file.exists(LABELS_FILE))
stopifnot(file.exists(SUPPORT_FILE))

cat("All input files found.\n")
## All input files found.

Load Data

# Load CTRL consensus module assignments
ctrl_modules <- read_csv(CONSENSUS_FILE, show_col_types = FALSE)

# Load GO BP enrichment results
go_bp <- read_csv(GO_BP_FILE, show_col_types = FALSE)

# Load gene annotations with expression data
labels <- read_csv(LABELS_FILE, show_col_types = FALSE)

# Load module support statistics
support <- read_csv(SUPPORT_FILE, show_col_types = FALSE)

cat("Loaded", nrow(ctrl_modules), "genes with module assignments\n")
## Loaded 465 genes with module assignments
cat("Loaded", nrow(go_bp), "GO BP enrichment terms\n")
## Loaded 343 GO BP enrichment terms
cat("Loaded", nrow(labels), "gene annotations\n")
## Loaded 471 gene annotations

Module Bootstrap Support

The greenyellow module has the highest bootstrap support among all modules.

# Extract greenyellow support stats
gy_support <- support %>%
  filter(color == "greenyellow")

cat("Greenyellow module bootstrap support:\n")
## Greenyellow module bootstrap support:
cat("  Mean LFP:", round(gy_support$mean_lfp, 3), "\n")
##   Mean LFP: 0.798
cat("  Stability:", gy_support$stability, "\n")
##   Stability: Stable
cat("  N genes:", gy_support$n_genes, "\n")
##   N genes: 16
# Compare to other modules
cat("\nTop 5 modules by LFP:\n")
## 
## Top 5 modules by LFP:
support %>%
  arrange(desc(mean_lfp)) %>%
  head(5) %>%
  select(color, mean_lfp, n_genes, stability) %>%
  kable()
color mean_lfp n_genes stability
greenyellow 0.7975625 16 Stable
blue 0.7945882 51 Stable
purple 0.7404091 22 Stable
pink 0.7151333 30 Stable
magenta 0.7127917 24 Stable

Greenyellow Module Genes

The greenyellow module contains sec6 (plant height GWAS candidate) and pcna2 (proliferating cell nuclear antigen), both upregulated in Inv4m plants.

# Extract greenyellow module genes
gy_genes <- ctrl_modules %>%
  filter(color == "greenyellow") %>%
  pull(gene)

cat("Greenyellow module contains", length(gy_genes), "genes\n")
## Greenyellow module contains 16 genes
# Verify key genes are present
key_genes <- c(
  "Zm00001eb194380",  # sec6
  "Zm00001eb192050",  # pcna2
  "Zm00001eb188240"   # ropgef14
)
cat("\nsec6 in greenyellow module:", key_genes[1] %in% gy_genes, "\n")
## 
## sec6 in greenyellow module: TRUE
cat("pcna2 in greenyellow module:", key_genes[2] %in% gy_genes, "\n")
## pcna2 in greenyellow module: TRUE
cat("ropgef14 in greenyellow module:", key_genes[3] %in% gy_genes, "\n")
## ropgef14 in greenyellow module: TRUE

Annotated DEG Table

Table sorted by statistical significance (FDR, ascending = most significant first).

# Join greenyellow genes with annotations
gy_annotated <- ctrl_modules %>%
  filter(color == "greenyellow") %>%
  left_join(labels, by = "gene") %>%
  mutate(
    # Use locus_label if available, else locus_symbol
    symbol = case_when(
      !is.na(locus_label) & locus_label != "" ~ locus_label,
      !is.na(locus_symbol) & locus_symbol != "" ~ locus_symbol,
      TRUE ~ "---"
    ),
    # Clean up description - use desc_merged or desc_pannzer
    description = case_when(
      !is.na(desc_merged) & desc_merged != "" ~ desc_merged,
      !is.na(desc_pannzer) & desc_pannzer != "" ~ desc_pannzer,
      TRUE ~ "---"
    ),
    # Format log2FC and FDR
    log2FC = round(logFC, 2),
    FDR_val = adj.P.Val
  ) %>%
  # Sort by FDR (most significant first)
  arrange(FDR_val) %>%
  select(gene, symbol, description, log2FC, FDR_val)

# Display count
cat("Annotated", nrow(gy_annotated), "greenyellow module genes\n")
## Annotated 16 greenyellow module genes
# Create formatted table
gy_table <- gy_annotated %>%
  mutate(
    FDR = formatC(FDR_val, format = "e", digits = 1),
    # Truncate long descriptions
    description = str_trunc(description, 50)
  ) %>%
  select(gene, symbol, description, log2FC, FDR)

# Display with kable
kable(
  gy_table,
  col.names = c("Gene ID", "Label", "Description", "log2FC", "FDR"),
  caption = "Greenyellow module DEGs sorted by statistical significance (FDR)"
)
Greenyellow module DEGs sorted by statistical significance (FDR)
Gene ID Label Description log2FC FDR
Zm00001eb194380 sec6 Exocyst complex component Sec6 2.87 2.9e-17
Zm00001eb188240 ropgef14 Rop guanine nucleotide exchange factor 14 -2.98 2.6e-14
Zm00001eb112470 engd2 6.14 4.9e-13
Zm00001eb191260 pbl19 probable serine/threonine-protein kinase PBL19 -1.39 3.5e-09
Zm00001eb195290 pap1 PAP-associated domain-containing protein 1.45 4.7e-08
Zm00001eb386670 rglg2 E3 ubiquitin-protein ligase RGLG2 -1.01 5.2e-04
Zm00001eb187450 mybr58 HTH myb-type domain-containing protein 1.74 3.1e-03
Zm00001eb192050 pcna2 Proliferating cell nuclear antigen 2.99 9.3e-03
Zm00001eb290800 pk8 Protein kinase domain-containing protein -0.37 2.5e-02
Zm00001eb392440 iqm1 IQ calmodulin-binding motif family protein, exp… -0.85 2.6e-02
Zm00001eb080160 hipp16 Heavy metal-associated isoprenylated plant prot… -0.99 3.4e-02
Zm00001eb404820 esk1 Protein ESKIMO like -0.65 3.4e-02
Zm00001eb064190 parg Poly(ADP-ribose) glycohydrolase -0.29 4.2e-02
Zm00001eb190580 pla2 probable phospholipase A2 homolog 1 -0.69 4.8e-02
Zm00001eb375460 pk3 Protein kinase domain-containing protein -0.74 4.8e-02
Zm00001eb279370 phi1b Phi-1-like phosphate-induced protein 0.72 5.0e-02

GO Enrichment for Greenyellow Module

# Filter GO BP results for greenyellow module
gy_go <- go_bp %>%
  filter(module_color == "greenyellow") %>%
  arrange(pvalue) %>%
  select(ID, Description, GeneRatio, pvalue, Count, geneID) %>%
  mutate(
    pvalue_fmt = formatC(pvalue, format = "e", digits = 2),
    Description = str_trunc(Description, 60)
  )

cat("Greenyellow module has", nrow(gy_go), "enriched GO BP terms\n")
## Greenyellow module has 54 enriched GO BP terms
# Display top GO terms
kable(
  gy_go %>%
    head(20) %>%
    select(ID, Description, GeneRatio, pvalue_fmt, Count),
  col.names = c("GO ID", "Description", "Gene Ratio", "p-value", "Count"),
  caption = "Top 20 GO Biological Process terms for greenyellow module"
)
Top 20 GO Biological Process terms for greenyellow module
GO ID Description Gene Ratio p-value Count
GO:0009856 pollination 3/9 3.66e-05 3
GO:0044706 multi-multicellular organism process 3/9 3.66e-05 3
GO:0006282 regulation of DNA repair 2/9 7.95e-05 2
GO:0009860 pollen tube growth 2/9 4.19e-04 2
GO:0009932 cell tip growth 2/9 5.96e-04 2
GO:0080135 regulation of cellular response to stress 2/9 5.96e-04 2
GO:0048588 developmental cell growth 2/9 8.45e-04 2
GO:0048868 pollen tube development 2/9 9.52e-04 2
GO:0051052 regulation of DNA metabolic process 2/9 1.14e-03 2
GO:0046903 secretion 2/9 1.39e-03 2
GO:0048468 cell development 2/9 1.69e-03 2
GO:0009826 unidimensional cell growth 2/9 4.00e-03 2
GO:0060560 developmental growth involved in morphogenesis 2/9 4.97e-03 2
GO:0045739 positive regulation of DNA repair 1/9 5.47e-03 1
GO:0060321 acceptance of pollen 1/9 5.47e-03 1
GO:0070206 protein trimerization 1/9 6.56e-03 1
GO:0015908 fatty acid transport 1/9 7.10e-03 1
GO:0000902 cell morphogenesis 2/9 8.04e-03 2
GO:0016049 cell growth 2/9 8.35e-03 2
GO:0031503 protein-containing complex localization 1/9 8.73e-03 1

Functional Categories

# Manually categorize key genes
categories <- tribble(
  ~category, ~genes, ~description,
  "Cell proliferation", "pcna2", "DNA replication licensing - cell cycle S phase",
  "Exocytosis/Cytokinesis", "sec6", "Exocyst complex - vesicle trafficking, cell plate formation",
  "Cell polarity/Growth", "ropgef14, pla2", "Rho GTPase signaling - polarized cell growth",
  "DNA repair", "parg, pcna2", "Poly(ADP-ribose) metabolism, DNA damage response",
  "Transcription factor", "mybr58", "MYB-related TF - potentially growth-related targets",
  "Signaling kinases", "pbl19, pk3, pk8", "Protein kinases - signal transduction"
)

kable(
  categories,
  col.names = c("Category", "Genes", "Biological Role"),
  caption = "Functional categories in the greenyellow module"
)
Functional categories in the greenyellow module
Category Genes Biological Role
Cell proliferation pcna2 DNA replication licensing - cell cycle S phase
Exocytosis/Cytokinesis sec6 Exocyst complex - vesicle trafficking, cell plate formation
Cell polarity/Growth ropgef14, pla2 Rho GTPase signaling - polarized cell growth
DNA repair parg, pcna2 Poly(ADP-ribose) metabolism, DNA damage response
Transcription factor mybr58 MYB-related TF - potentially growth-related targets
Signaling kinases pbl19, pk3, pk8 Protein kinases - signal transduction

Key Genes: sec6 and pcna2

sec6 (Exocyst complex component)

  • Gene ID: Zm00001eb194380
  • log2FC: +2.87 (upregulated in Inv4m)
  • FDR: 2.9e-17
  • GWAS support: Plant height candidate (Peiffer et al. 2014)
  • Function: Exocyst complex component involved in vesicle trafficking and cell plate formation during cytokinesis

pcna2 (Proliferating cell nuclear antigen 2)

  • Gene ID: Zm00001eb192050
  • log2FC: +2.99 (upregulated in Inv4m)
  • FDR: 9.3e-03
  • Function: DNA replication processivity factor, marker of cell proliferation, essential for S phase progression

ropgef14 (Rop guanine nucleotide exchange factor 14)

  • Gene ID: Zm00001eb188240
  • log2FC: -2.98 (downregulated in Inv4m)
  • FDR: 2.6e-14
  • Function: Activator of Rho-type GTPases, required for polarized cell growth including pollen tube elongation

Summary

The greenyellow module represents a coherent cell growth/division network with the highest bootstrap support (LFP = 0.80) among all WGCNA modules:

  1. Cell proliferation: pcna2 (DNA replication), parg (DNA repair)
  2. Cytokinesis: sec6 (exocyst complex, cell plate formation)
  3. Polarized growth: ropgef14, pla2 (Rho GTPase signaling, tip growth)
  4. Signaling: Multiple protein kinases (pbl19, pk3, pk8)

The GO enrichment confirms:

  • “Developmental cell growth” (p = 8.5e-4) - polarized growth genes
  • “Cell tip growth” (p = 6.0e-4) - pollen tube elongation genes
  • “Regulation of DNA replication” (p = 0.019) - pcna2

The presence of both sec6 (GWAS plant height candidate) and pcna2 (cell proliferation marker) in this highly stable module supports the hypothesis that Inv4m introgression perturbs a coordinated cell growth/division program, consistent with the altered SAM morphology and plant height phenotypes.


Save Outputs

# Save annotated table (CSV)
write_csv(
  gy_annotated %>%
    mutate(FDR = formatC(FDR_val, format = "e", digits = 2)) %>%
    select(gene, symbol, description, log2FC, FDR),
  file.path(paths$intermediate, "greenyellow_module_annotated_DEGs.csv")
)

# Save GO enrichment
write_csv(
  gy_go %>% select(-pvalue_fmt),
  file.path(paths$intermediate, "greenyellow_module_GO_BP.csv")
)

cat("\nSaved outputs:\n")
## 
## Saved outputs:
cat("  - greenyellow_module_annotated_DEGs.csv\n")
##   - greenyellow_module_annotated_DEGs.csv
cat("  - greenyellow_module_GO_BP.csv\n")
##   - greenyellow_module_GO_BP.csv
# Generate LaTeX table for main.tex
latex_header <- '\\begin{table}[!ht]
\\caption{\\textbf{Greenyellow module DEGs containing growth and cell division genes.} Genes assigned to the greenyellow WGCNA consensus module, which has the highest bootstrap support (LFP = 0.80) among all modules. Contains the plant height GWAS candidate \\textit{sec6} and the cell proliferation marker \\textit{pcna2}. Sorted by statistical significance (FDR). GO enrichment includes ``developmental cell growth\'\' ($p = 8.5 \\times 10^{-4}$) and ``regulation of DNA replication\'\' ($p = 0.019$).}
\\label{tab::greenyellow_module}
\\centering
\\small
\\begin{tabular}{@{}lllrr@{}}
\\toprule
\\textbf{Gene ID} & \\textbf{Label} & \\textbf{Description} & \\textbf{log$_2$FC} & \\textbf{FDR} \\\\
\\midrule'

latex_footer <- '\\bottomrule
\\end{tabular}
\\end{table}'

# Format each row
format_latex_row <- function(gene, symbol, desc, log2fc, fdr) {
  # Format label
  label <- if (is.na(symbol) || symbol == "" || symbol == "---") {
    "---"
  } else {
    paste0("\\textit{", symbol, "}")
  }

  # Format description
  desc_clean <- if (is.na(desc) || desc == "" || desc == "---") {
    "---"
  } else {
    str_trunc(desc, 40) %>%
      str_replace_all("_", "\\\\_") %>%
      str_replace_all("&", "\\\\&") %>%
      str_replace_all("%", "\\\\%")
  }

  # Format log2FC with sign
  log2fc_fmt <- if (log2fc >= 0) {
    paste0("$+", sprintf("%.2f", log2fc), "$")
  } else {
    paste0("$", sprintf("%.2f", log2fc), "$")
  }

  # Format FDR in scientific notation
  fdr_exp <- floor(log10(fdr))
  fdr_mant <- fdr / 10^fdr_exp
  fdr_fmt <- paste0("$", sprintf("%.1f", fdr_mant), " \\times 10^{", fdr_exp, "}$")

  paste(gene, label, desc_clean, log2fc_fmt, fdr_fmt, sep = " & ", collapse = "") %>%
    paste0(" \\\\")
}

# Generate all rows
latex_rows <- gy_annotated %>%
  rowwise() %>%
  mutate(
    latex_row = format_latex_row(gene, symbol, description, log2FC, FDR_val)
  ) %>%
  pull(latex_row)

# Combine into full table
latex_table <- paste(
  latex_header,
  paste(latex_rows, collapse = "\n"),
  latex_footer,
  sep = "\n"
)

# Save to tables folder
writeLines(latex_table, file.path(paths$tables, "greenyellow_module_DEGs.tex"))

cat("\nLaTeX table saved:\n")
## 
## LaTeX table saved:
cat("  - tables/greenyellow_module_DEGs.tex\n")
##   - tables/greenyellow_module_DEGs.tex

Session Info

sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.3.1
## 
## Matrix products: default
## BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] knitr_1.51      here_1.0.2      lubridate_1.9.4 forcats_1.0.1  
##  [5] stringr_1.6.0   dplyr_1.1.4     purrr_1.2.0     readr_2.1.6    
##  [9] tidyr_1.3.2     tibble_3.3.0    ggplot2_4.0.1   tidyverse_2.0.0
## 
## loaded via a namespace (and not attached):
##  [1] bit_4.6.0          gtable_0.3.6       jsonlite_2.0.0     crayon_1.5.3      
##  [5] compiler_4.5.2     tidyselect_1.2.1   parallel_4.5.2     dichromat_2.0-0.1 
##  [9] jquerylib_0.1.4    scales_1.4.0       yaml_2.3.12        fastmap_1.2.0     
## [13] R6_2.6.1           generics_0.1.4     rprojroot_2.1.1    tzdb_0.5.0        
## [17] bslib_0.9.0        pillar_1.11.1      RColorBrewer_1.1-3 rlang_1.1.6       
## [21] stringi_1.8.7      cachem_1.1.0       xfun_0.55          sass_0.4.10       
## [25] S7_0.2.1           bit64_4.6.0-1      otel_0.2.0         timechange_0.3.0  
## [29] cli_3.6.5          withr_3.0.2        magrittr_2.0.4     digest_0.6.39     
## [33] grid_4.5.2         vroom_1.6.7        hms_1.1.4          lifecycle_1.0.4   
## [37] vctrs_0.6.5        evaluate_1.0.5     glue_1.8.0         farver_2.1.2      
## [41] rmarkdown_2.30     tools_4.5.2        pkgconfig_2.0.3    htmltools_0.5.9