Purpose: Characterize the pink WGCNA module, which contains the JMJ2/JMJ4 histone demethylase cluster. This post-hoc analysis explores the functional coherence of genes co-expressed with JMJ2/JMJ4 in CTRL samples.
Key findings:
suppressPackageStartupMessages({
library(tidyverse)
library(here)
library(knitr)
})
# Load project paths
source(here("scripts", "utils", "setup_paths.R"))
paths <- setup_project_paths("inversion_paper")
# Field perturbation run directory
run_dir <- here("results/inversion_paper/field_perturbation/run_20251231_201332")
# Input files
CONSENSUS_FILE <- file.path(run_dir, "04_consensus_networks/ctrl/consensus_modules.csv")
GO_BP_FILE <- file.path(run_dir, "07_module_annotation/all_modules_GO_BP.csv")
LABELS_FILE <- here("data", "locus_labels_master.csv")
# Verify files exist
stopifnot(file.exists(CONSENSUS_FILE))
stopifnot(file.exists(GO_BP_FILE))
stopifnot(file.exists(LABELS_FILE))
cat("All input files found.\n")
## All input files found.
# Load CTRL consensus module assignments
ctrl_modules <- read_csv(CONSENSUS_FILE, show_col_types = FALSE)
# Load GO BP enrichment results
go_bp <- read_csv(GO_BP_FILE, show_col_types = FALSE)
# Load gene annotations with expression data
labels <- read_csv(LABELS_FILE, show_col_types = FALSE)
cat("Loaded", nrow(ctrl_modules), "genes with module assignments\n")
## Loaded 465 genes with module assignments
cat("Loaded", nrow(go_bp), "GO BP enrichment terms\n")
## Loaded 343 GO BP enrichment terms
cat("Loaded", nrow(labels), "gene annotations\n")
## Loaded 465 gene annotations
The pink module contains jmj2 and jmj4, the JUMONJI histone demethylases that are consistently downregulated in Inv4m plants.
# Extract pink module genes
pink_genes <- ctrl_modules %>%
filter(color == "pink") %>%
pull(gene)
cat("Pink module contains", length(pink_genes), "genes\n")
## Pink module contains 30 genes
# Verify jmj2/jmj4 are present
jmj_genes <- c("Zm00001eb191790", "Zm00001eb191820")
cat("jmj2 in pink module:", jmj_genes[1] %in% pink_genes, "\n")
## jmj2 in pink module: TRUE
cat("jmj4 in pink module:", jmj_genes[2] %in% pink_genes, "\n")
## jmj4 in pink module: TRUE
Table sorted by statistical significance (FDR, ascending = most significant first).
# Join pink genes with annotations
pink_annotated <- ctrl_modules %>%
filter(color == "pink") %>%
left_join(labels, by = "gene") %>%
mutate(
# Use locus_label if available, else locus_symbol
symbol = case_when(
!is.na(locus_label) & locus_label != "" ~ locus_label,
!is.na(locus_symbol) & locus_symbol != "" ~ locus_symbol,
TRUE ~ "—"
),
# Clean up description - use desc_merged or desc_pannzer
description = case_when(
!is.na(desc_merged) & desc_merged != "" ~ desc_merged,
!is.na(desc_pannzer) & desc_pannzer != "" ~ desc_pannzer,
TRUE ~ "—"
),
# Format log2FC and FDR
log2FC = round(logFC, 2),
FDR_val = adj.P.Val
) %>%
# Sort by FDR (most significant first)
arrange(FDR_val) %>%
select(gene, symbol, description, log2FC, FDR_val)
# Display count
cat("Annotated", nrow(pink_annotated), "pink module genes\n")
## Annotated 30 pink module genes
# Create formatted table
pink_table <- pink_annotated %>%
mutate(
FDR = formatC(FDR_val, format = "e", digits = 1),
# Truncate long descriptions
description = str_trunc(description, 50)
) %>%
select(gene, symbol, description, log2FC, FDR)
# Display with kable
kable(
pink_table,
col.names = c("Gene ID", "Label", "Description", "log2FC", "FDR"),
caption = "Pink module DEGs sorted by statistical significance (FDR)"
)
| Gene ID | Label | Description | log2FC | FDR |
|---|---|---|---|---|
| Zm00001eb191790 | jmj2 | JUMONJI-transcription factor 2 | -3.49 | 1.2e-22 |
| Zm00001eb191820 | jmj4 | JUMONJI-transcription factor 4 | -2.84 | 1.9e-20 |
| Zm00001eb188750 | hsp70-17 | Heat shock 70 kDa protein 17 | -6.48 | 6.9e-15 |
| Zm00001eb196760 | — | — | -8.29 | 9.2e-14 |
| Zm00001eb195120 | swc6 | SWR1 complex subunit 6 | -1.15 | 6.4e-13 |
| Zm00001eb196830 | pan1 | PAN domain-containing protein | -5.35 | 3.9e-11 |
| Zm00001eb249540 | rpl29 | 50S ribosomal protein L29, chloroplastic | 4.41 | 6.9e-10 |
| Zm00001eb192350 | pkwd1 | Protein kinase family protein / WD-40 repeat fa… | -2.20 | 1.9e-09 |
| Zm00001eb195790 | traf1 | TRAF-type domain-containing protein | 3.15 | 3.4e-09 |
| Zm00001eb192360 | ppr5 | Pentatricopeptide repeat-containing protein | -0.89 | 6.9e-09 |
| Zm00001eb195560 | lurp8 | Protein LURP-one-related 8 | -5.49 | 5.3e-08 |
| Zm00001eb360380 | — | — | 0.60 | 1.5e-04 |
| Zm00001eb188350 | rfc3 | Replication factor C subunit 3 | -0.83 | 1.8e-04 |
| Zm00001eb194610 | yae1 | Essential protein Yae1 N-terminal domain-contai… | -2.54 | 4.1e-04 |
| Zm00001eb193640 | ycge | HTH-type transcriptional repressor YcgE | 0.95 | 5.8e-04 |
| Zm00001eb195550 | eif2b2 | Eukaryotic translation initiation factor 2B fam… | 0.98 | 1.7e-03 |
| Zm00001eb196080 | bpg2 | GTP-binding protein BRASSINAZOLE INSENSITIVE PA… | 0.45 | 2.0e-03 |
| Zm00001eb122880 | ddx14 | DEAD-box ATP-dependent RNA helicase 14 | -0.23 | 2.0e-02 |
| Zm00001eb220970 | noc2 | Nucleolar complex protein 2-like protein | -0.35 | 2.0e-02 |
| Zm00001eb017010 | sec | Secreted protein | -1.47 | 2.0e-02 |
| Zm00001eb353990 | cchh1 | Transcription factor IIIA | -0.33 | 2.4e-02 |
| Zm00001eb432550 | bcd135b | Ribosomal RNA processing protein A | -0.43 | 2.7e-02 |
| Zm00001eb330750 | eif2b | Eukaryotic translation initiation factor 2 subu… | -0.27 | 3.3e-02 |
| Zm00001eb194670 | degp9 | Protease Do-like protein 9 | -0.29 | 3.9e-02 |
| Zm00001eb205670 | rps8 | 40S ribosomal protein S8 | -0.34 | 4.4e-02 |
| Zm00001eb080580 | — | — | -0.34 | 4.4e-02 |
| Zm00001eb012770 | rps18 | Ribosomal protein S18 | -0.26 | 4.4e-02 |
| Zm00001eb273610 | cchh40 | Zinc finger CCCH domain-containing protein 40 | -0.22 | 4.5e-02 |
| Zm00001eb126770 | cl59140_1 | — | -0.35 | 4.6e-02 |
| Zm00001eb160040 | rpc3 | DNA-directed RNA polymerase III subunit RPC3 | -0.28 | 4.8e-02 |
# Filter GO BP results for pink module
pink_go <- go_bp %>%
filter(module_color == "pink") %>%
arrange(pvalue) %>%
select(ID, Description, GeneRatio, pvalue, Count, geneID) %>%
mutate(
pvalue_fmt = formatC(pvalue, format = "e", digits = 2),
Description = str_trunc(Description, 60)
)
cat("Pink module has", nrow(pink_go), "enriched GO BP terms\n")
## Pink module has 28 enriched GO BP terms
# Display top GO terms
kable(
pink_go %>%
head(15) %>%
select(ID, Description, GeneRatio, pvalue_fmt, Count),
col.names = c("GO ID", "Description", "Gene Ratio", "p-value", "Count"),
caption = "Top 15 GO Biological Process terms for pink module"
)
| GO ID | Description | Gene Ratio | p-value | Count |
|---|---|---|---|---|
| GO:0042254 | ribosome biogenesis | 5/21 | 2.61e-04 | 5 |
| GO:0045814 | negative regulation of gene expression, epigenetic | 2/21 | 2.92e-04 | 2 |
| GO:0006364 | rRNA processing | 4/21 | 4.85e-04 | 4 |
| GO:0016072 | rRNA metabolic process | 4/21 | 8.34e-04 | 4 |
| GO:0040029 | regulation of gene expression, epigenetic | 2/21 | 1.30e-03 | 2 |
| GO:0045927 | positive regulation of growth | 2/21 | 1.69e-03 | 2 |
| GO:0040008 | regulation of growth | 3/21 | 1.90e-03 | 3 |
| GO:0006413 | translational initiation | 3/21 | 4.40e-03 | 3 |
| GO:0010629 | negative regulation of gene expression | 3/21 | 5.83e-03 | 3 |
| GO:0006338 | chromatin remodeling | 2/21 | 1.25e-02 | 2 |
| GO:0010558 | negative regulation of macromolecule biosynthetic process | 3/21 | 1.43e-02 | 3 |
| GO:0043066 | negative regulation of apoptotic process | 1/21 | 1.52e-02 | 1 |
| GO:0009890 | negative regulation of biosynthetic process | 3/21 | 1.63e-02 | 3 |
| GO:0040007 | growth | 3/21 | 1.69e-02 | 3 |
| GO:0071456 | cellular response to hypoxia | 1/21 | 2.03e-02 | 1 |
# Manually categorize key genes
categories <- tribble(
~category, ~genes, ~description,
"Epigenetic regulators", "jmj2, jmj4, swc6", "Histone demethylases and chromatin remodeling",
"DNA replication", "rfc3", "Replication factor C - cell cycle progression",
"Ribosome biogenesis", "rpl29, rps8, rps18, ddx14, noc2, bcd135b", "Protein synthesis capacity for growth",
"Translation initiation", "eif2b, eif2b2, lurp8", "Translation machinery",
"Growth signaling", "bpg2", "Brassinosteroid pathway - growth regulation"
)
kable(
categories,
col.names = c("Category", "Genes", "Biological Role"),
caption = "Functional categories in the pink module"
)
| Category | Genes | Biological Role |
|---|---|---|
| Epigenetic regulators | jmj2, jmj4, swc6 | Histone demethylases and chromatin remodeling |
| DNA replication | rfc3 | Replication factor C - cell cycle progression |
| Ribosome biogenesis | rpl29, rps8, rps18, ddx14, noc2, bcd135b | Protein synthesis capacity for growth |
| Translation initiation | eif2b, eif2b2, lurp8 | Translation machinery |
| Growth signaling | bpg2 | Brassinosteroid pathway - growth regulation |
The pink module represents a coherent growth/chromatin regulatory network centered on the JMJ2/JMJ4 histone demethylases:
The GO enrichment confirms:
This supports the hypothesis that JMJ2/JMJ4 downregulation in Inv4m disrupts a coordinated program of epigenetic regulation and growth-related gene expression.
# Save annotated table (CSV)
write_csv(
pink_annotated %>%
mutate(FDR = formatC(FDR_val, format = "e", digits = 2)) %>%
select(gene, symbol, description, log2FC, FDR),
file.path(paths$intermediate, "pink_module_annotated_DEGs.csv")
)
# Save GO enrichment
write_csv(
pink_go %>% select(-pvalue_fmt),
file.path(paths$intermediate, "pink_module_GO_BP.csv")
)
cat("\nSaved outputs:\n")
##
## Saved outputs:
cat(" - pink_module_annotated_DEGs.csv\n")
## - pink_module_annotated_DEGs.csv
cat(" - pink_module_GO_BP.csv\n")
## - pink_module_GO_BP.csv
# Generate LaTeX table for main.tex
latex_header <- '\\begin{table}[!ht]
\\caption{\\textbf{Pink module DEGs co-expressed with \\jmjii/\\jmjiv.} Genes assigned to the pink WGCNA consensus module, which contains the JMJ2/JMJ4 histone demethylase cluster. Sorted by statistical significance (FDR). GO enrichment for this module includes ``positive regulation of growth\'\' ($p = 0.0017$) and ``ribosome biogenesis\'\' ($p = 0.0003$).}
\\label{tab::pink_module}
\\centering
\\small
\\begin{tabular}{@{}lllrr@{}}
\\toprule
\\textbf{Gene ID} & \\textbf{Label} & \\textbf{Description} & \\textbf{log$_2$FC} & \\textbf{FDR} \\\\
\\midrule'
latex_footer <- '\\bottomrule
\\end{tabular}
\\end{table}'
# Format each row
format_latex_row <- function(gene, symbol, desc, log2fc, fdr) {
# Format label
label <- if (is.na(symbol) || symbol == "" || symbol == "—") {
"---"
} else {
paste0("\\textit{", symbol, "}")
}
# Format description
desc_clean <- if (is.na(desc) || desc == "" || desc == "—") {
"---"
} else {
str_trunc(desc, 40) %>%
str_replace_all("_", "\\\\_") %>%
str_replace_all("&", "\\\\&") %>%
str_replace_all("%", "\\\\%")
}
# Format log2FC with sign
log2fc_fmt <- if (log2fc >= 0) {
paste0("$+", sprintf("%.2f", log2fc), "$")
} else {
paste0("$", sprintf("%.2f", log2fc), "$")
}
# Format FDR in scientific notation
fdr_exp <- floor(log10(fdr))
fdr_mant <- fdr / 10^fdr_exp
fdr_fmt <- paste0("$", sprintf("%.1f", fdr_mant), " \\times 10^{", fdr_exp, "}$")
paste(gene, label, desc_clean, log2fc_fmt, fdr_fmt, sep = " & ", collapse = "") %>%
paste0(" \\\\")
}
# Generate all rows
latex_rows <- pink_annotated %>%
rowwise() %>%
mutate(
latex_row = format_latex_row(gene, symbol, description, log2FC, FDR_val)
) %>%
pull(latex_row)
# Combine into full table
latex_table <- paste(
latex_header,
paste(latex_rows, collapse = "\n"),
latex_footer,
sep = "\n"
)
# Save to tables folder
writeLines(latex_table, file.path(paths$tables, "pink_module_DEGs.tex"))
cat("\nLaTeX table saved:\n")
##
## LaTeX table saved:
cat(" - tables/pink_module_DEGs.tex\n")
## - tables/pink_module_DEGs.tex
sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.2
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] C.UTF-8/C.UTF-8/C.UTF-8/C/C.UTF-8/C.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] knitr_1.51 here_1.0.2 lubridate_1.9.4 forcats_1.0.1
## [5] stringr_1.6.0 dplyr_1.1.4 purrr_1.2.0 readr_2.1.6
## [9] tidyr_1.3.2 tibble_3.3.0 ggplot2_4.0.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] bit_4.6.0 gtable_0.3.6 jsonlite_2.0.0 crayon_1.5.3
## [5] compiler_4.5.2 tidyselect_1.2.1 parallel_4.5.2 dichromat_2.0-0.1
## [9] jquerylib_0.1.4 scales_1.4.0 yaml_2.3.12 fastmap_1.2.0
## [13] R6_2.6.1 generics_0.1.4 rprojroot_2.1.1 tzdb_0.5.0
## [17] bslib_0.9.0 pillar_1.11.1 RColorBrewer_1.1-3 rlang_1.1.6
## [21] stringi_1.8.7 cachem_1.1.0 xfun_0.55 sass_0.4.10
## [25] S7_0.2.1 bit64_4.6.0-1 otel_0.2.0 timechange_0.3.0
## [29] cli_3.6.5 withr_3.0.2 magrittr_2.0.4 digest_0.6.39
## [33] grid_4.5.2 vroom_1.6.7 hms_1.1.4 lifecycle_1.0.4
## [37] vctrs_0.6.5 evaluate_1.0.5 glue_1.8.0 farver_2.1.2
## [41] rmarkdown_2.30 tools_4.5.2 pkgconfig_2.0.3 htmltools_0.5.9