Overview

Purpose: Characterize the pink WGCNA module, which contains the JMJ2/JMJ4 histone demethylase cluster. This post-hoc analysis explores the functional coherence of genes co-expressed with JMJ2/JMJ4 in CTRL samples.

Key findings:

The pink module contains 30 DEGs including jmj2 and jmj4
GO enrichment reveals “positive regulation of growth” driven by jmj2/jmj4
Module includes DNA replication (rfc3), ribosome biogenesis, and chromatin genes
Supports the hypothesis that JMJ2/JMJ4 are epigenetic regulators of growth

Setup

suppressPackageStartupMessages({
  library(tidyverse)
  library(here)
  library(knitr)
})

# Load project paths
source(here("scripts", "utils", "setup_paths.R"))
paths <- setup_project_paths("inversion_paper")

# Field perturbation run directory
run_dir <- here("results/inversion_paper/field_perturbation/run_20251231_201332")

# Input files
CONSENSUS_FILE <- file.path(run_dir, "04_consensus_networks/ctrl/consensus_modules.csv")
GO_BP_FILE <- file.path(run_dir, "07_module_annotation/all_modules_GO_BP.csv")
LABELS_FILE <- here("data", "locus_labels_master.csv")

# Verify files exist
stopifnot(file.exists(CONSENSUS_FILE))
stopifnot(file.exists(GO_BP_FILE))
stopifnot(file.exists(LABELS_FILE))

cat("All input files found.\n")

## All input files found.

Load Data

# Load CTRL consensus module assignments
ctrl_modules <- read_csv(CONSENSUS_FILE, show_col_types = FALSE)

# Load GO BP enrichment results
go_bp <- read_csv(GO_BP_FILE, show_col_types = FALSE)

# Load gene annotations with expression data
labels <- read_csv(LABELS_FILE, show_col_types = FALSE)

cat("Loaded", nrow(ctrl_modules), "genes with module assignments\n")

## Loaded 465 genes with module assignments

cat("Loaded", nrow(go_bp), "GO BP enrichment terms\n")

## Loaded 343 GO BP enrichment terms

cat("Loaded", nrow(labels), "gene annotations\n")

## Loaded 465 gene annotations

Pink Module Genes

The pink module contains jmj2 and jmj4, the JUMONJI histone demethylases that are consistently downregulated in Inv4m plants.

# Extract pink module genes
pink_genes <- ctrl_modules %>%
  filter(color == "pink") %>%
  pull(gene)

cat("Pink module contains", length(pink_genes), "genes\n")

## Pink module contains 30 genes

# Verify jmj2/jmj4 are present
jmj_genes <- c("Zm00001eb191790", "Zm00001eb191820")
cat("jmj2 in pink module:", jmj_genes[1] %in% pink_genes, "\n")

## jmj2 in pink module: TRUE

cat("jmj4 in pink module:", jmj_genes[2] %in% pink_genes, "\n")

## jmj4 in pink module: TRUE

Annotated DEG Table

Table sorted by statistical significance (FDR, ascending = most significant first).

# Join pink genes with annotations
pink_annotated <- ctrl_modules %>%
  filter(color == "pink") %>%
  left_join(labels, by = "gene") %>%
  mutate(
    # Use locus_label if available, else locus_symbol
    symbol = case_when(
      !is.na(locus_label) & locus_label != "" ~ locus_label,
      !is.na(locus_symbol) & locus_symbol != "" ~ locus_symbol,
      TRUE ~ "—"
    ),
    # Clean up description - use desc_merged or desc_pannzer
    description = case_when(
      !is.na(desc_merged) & desc_merged != "" ~ desc_merged,
      !is.na(desc_pannzer) & desc_pannzer != "" ~ desc_pannzer,
      TRUE ~ "—"
    ),
    # Format log2FC and FDR
    log2FC = round(logFC, 2),
    FDR_val = adj.P.Val
  ) %>%
  # Sort by FDR (most significant first)
  arrange(FDR_val) %>%
  select(gene, symbol, description, log2FC, FDR_val)

# Display count
cat("Annotated", nrow(pink_annotated), "pink module genes\n")

## Annotated 30 pink module genes

# Create formatted table
pink_table <- pink_annotated %>%
  mutate(
    FDR = formatC(FDR_val, format = "e", digits = 1),
    # Truncate long descriptions
    description = str_trunc(description, 50)
  ) %>%
  select(gene, symbol, description, log2FC, FDR)

# Display with kable
kable(
  pink_table,
  col.names = c("Gene ID", "Label", "Description", "log2FC", "FDR"),
  caption = "Pink module DEGs sorted by statistical significance (FDR)"
)

Pink module DEGs sorted by statistical significance (FDR)
Gene ID	Label	Description	log2FC	FDR
Zm00001eb191790	jmj2	JUMONJI-transcription factor 2	-3.49	1.2e-22
Zm00001eb191820	jmj4	JUMONJI-transcription factor 4	-2.84	1.9e-20
Zm00001eb188750	hsp70-17	Heat shock 70 kDa protein 17	-6.48	6.9e-15
Zm00001eb196760	—	—	-8.29	9.2e-14
Zm00001eb195120	swc6	SWR1 complex subunit 6	-1.15	6.4e-13
Zm00001eb196830	pan1	PAN domain-containing protein	-5.35	3.9e-11
Zm00001eb249540	rpl29	50S ribosomal protein L29, chloroplastic	4.41	6.9e-10
Zm00001eb192350	pkwd1	Protein kinase family protein / WD-40 repeat fa…	-2.20	1.9e-09
Zm00001eb195790	traf1	TRAF-type domain-containing protein	3.15	3.4e-09
Zm00001eb192360	ppr5	Pentatricopeptide repeat-containing protein	-0.89	6.9e-09
Zm00001eb195560	lurp8	Protein LURP-one-related 8	-5.49	5.3e-08
Zm00001eb360380	—	—	0.60	1.5e-04
Zm00001eb188350	rfc3	Replication factor C subunit 3	-0.83	1.8e-04
Zm00001eb194610	yae1	Essential protein Yae1 N-terminal domain-contai…	-2.54	4.1e-04
Zm00001eb193640	ycge	HTH-type transcriptional repressor YcgE	0.95	5.8e-04
Zm00001eb195550	eif2b2	Eukaryotic translation initiation factor 2B fam…	0.98	1.7e-03
Zm00001eb196080	bpg2	GTP-binding protein BRASSINAZOLE INSENSITIVE PA…	0.45	2.0e-03
Zm00001eb122880	ddx14	DEAD-box ATP-dependent RNA helicase 14	-0.23	2.0e-02
Zm00001eb220970	noc2	Nucleolar complex protein 2-like protein	-0.35	2.0e-02
Zm00001eb017010	sec	Secreted protein	-1.47	2.0e-02
Zm00001eb353990	cchh1	Transcription factor IIIA	-0.33	2.4e-02
Zm00001eb432550	bcd135b	Ribosomal RNA processing protein A	-0.43	2.7e-02
Zm00001eb330750	eif2b	Eukaryotic translation initiation factor 2 subu…	-0.27	3.3e-02
Zm00001eb194670	degp9	Protease Do-like protein 9	-0.29	3.9e-02
Zm00001eb205670	rps8	40S ribosomal protein S8	-0.34	4.4e-02
Zm00001eb080580	—	—	-0.34	4.4e-02
Zm00001eb012770	rps18	Ribosomal protein S18	-0.26	4.4e-02
Zm00001eb273610	cchh40	Zinc finger CCCH domain-containing protein 40	-0.22	4.5e-02
Zm00001eb126770	cl59140_1	—	-0.35	4.6e-02
Zm00001eb160040	rpc3	DNA-directed RNA polymerase III subunit RPC3	-0.28	4.8e-02

GO Enrichment for Pink Module

# Filter GO BP results for pink module
pink_go <- go_bp %>%
  filter(module_color == "pink") %>%
  arrange(pvalue) %>%
  select(ID, Description, GeneRatio, pvalue, Count, geneID) %>%
  mutate(
    pvalue_fmt = formatC(pvalue, format = "e", digits = 2),
    Description = str_trunc(Description, 60)
  )

cat("Pink module has", nrow(pink_go), "enriched GO BP terms\n")

## Pink module has 28 enriched GO BP terms

# Display top GO terms
kable(
  pink_go %>%
    head(15) %>%
    select(ID, Description, GeneRatio, pvalue_fmt, Count),
  col.names = c("GO ID", "Description", "Gene Ratio", "p-value", "Count"),
  caption = "Top 15 GO Biological Process terms for pink module"
)

Top 15 GO Biological Process terms for pink module
GO ID	Description	Gene Ratio	p-value	Count
GO:0042254	ribosome biogenesis	5/21	2.61e-04	5
GO:0045814	negative regulation of gene expression, epigenetic	2/21	2.92e-04	2
GO:0006364	rRNA processing	4/21	4.85e-04	4
GO:0016072	rRNA metabolic process	4/21	8.34e-04	4
GO:0040029	regulation of gene expression, epigenetic	2/21	1.30e-03	2
GO:0045927	positive regulation of growth	2/21	1.69e-03	2
GO:0040008	regulation of growth	3/21	1.90e-03	3
GO:0006413	translational initiation	3/21	4.40e-03	3
GO:0010629	negative regulation of gene expression	3/21	5.83e-03	3
GO:0006338	chromatin remodeling	2/21	1.25e-02	2
GO:0010558	negative regulation of macromolecule biosynthetic process	3/21	1.43e-02	3
GO:0043066	negative regulation of apoptotic process	1/21	1.52e-02	1
GO:0009890	negative regulation of biosynthetic process	3/21	1.63e-02	3
GO:0040007	growth	3/21	1.69e-02	3
GO:0071456	cellular response to hypoxia	1/21	2.03e-02	1

Growth-Related Genes

Highlight genes related to growth, cell cycle, and cell proliferation.

# Define growth-related GO terms
growth_go_ids <- pink_go %>%
  filter(grepl("growth|proliferation|cell cycle|chromatin|epigenetic|ribosom",
               Description, ignore.case = TRUE)) %>%
  pull(ID)

# Extract genes from these terms
growth_gene_string <- pink_go %>%
  filter(ID %in% growth_go_ids) %>%
  pull(geneID) %>%
  paste(collapse = "/")

growth_genes <- unique(unlist(str_split(growth_gene_string, "/")))

cat("Growth-related genes in pink module:", length(growth_genes), "\n")

## Growth-related genes in pink module: 8

# Annotate growth genes
growth_annotated <- pink_annotated %>%
  filter(gene %in% growth_genes) %>%
  mutate(
    FDR = formatC(FDR_val, format = "e", digits = 1)
  ) %>%
  select(gene, symbol, description, log2FC, FDR)

kable(
  growth_annotated,
  col.names = c("Gene ID", "Symbol", "Description", "log2FC", "FDR"),
  caption = "Pink module genes in growth-related GO terms"
)

Pink module genes in growth-related GO terms
Gene ID	Symbol	Description	log2FC	FDR
Zm00001eb191790	jmj2	JUMONJI-transcription factor 2	-3.49	1.2e-22
Zm00001eb191820	jmj4	JUMONJI-transcription factor 4	-2.84	1.9e-20
Zm00001eb195120	swc6	SWR1 complex subunit 6	-1.15	6.4e-13
Zm00001eb196080	bpg2	GTP-binding protein BRASSINAZOLE INSENSITIVE PALE GREEN 2, chloroplastic	0.45	2.0e-03
Zm00001eb122880	ddx14	DEAD-box ATP-dependent RNA helicase 14	-0.23	2.0e-02
Zm00001eb220970	noc2	Nucleolar complex protein 2-like protein	-0.35	2.0e-02
Zm00001eb432550	bcd135b	Ribosomal RNA processing protein A	-0.43	2.7e-02
Zm00001eb205670	rps8	40S ribosomal protein S8	-0.34	4.4e-02

Functional Categories

# Manually categorize key genes
categories <- tribble(
  ~category, ~genes, ~description,
  "Epigenetic regulators", "jmj2, jmj4, swc6", "Histone demethylases and chromatin remodeling",
  "DNA replication", "rfc3", "Replication factor C - cell cycle progression",
  "Ribosome biogenesis", "rpl29, rps8, rps18, ddx14, noc2, bcd135b", "Protein synthesis capacity for growth",
  "Translation initiation", "eif2b, eif2b2, lurp8", "Translation machinery",
  "Growth signaling", "bpg2", "Brassinosteroid pathway - growth regulation"
)

kable(
  categories,
  col.names = c("Category", "Genes", "Biological Role"),
  caption = "Functional categories in the pink module"
)

Functional categories in the pink module
Category	Genes	Biological Role
Epigenetic regulators	jmj2, jmj4, swc6	Histone demethylases and chromatin remodeling
DNA replication	rfc3	Replication factor C - cell cycle progression
Ribosome biogenesis	rpl29, rps8, rps18, ddx14, noc2, bcd135b	Protein synthesis capacity for growth
Translation initiation	eif2b, eif2b2, lurp8	Translation machinery
Growth signaling	bpg2	Brassinosteroid pathway - growth regulation

Summary

The pink module represents a coherent growth/chromatin regulatory network centered on the JMJ2/JMJ4 histone demethylases:

Epigenetic control: jmj2/jmj4 (histone demethylation) and swc6 (chromatin remodeling)
Cell proliferation machinery: rfc3 (DNA replication), ribosomal proteins
Translation capacity: Multiple translation initiation factors
Growth signaling: bpg2 (brassinosteroid pathway)

The GO enrichment confirms:

“Positive regulation of growth” (p = 0.0017) - driven by jmj2/jmj4
“Ribosome biogenesis” (p = 0.0003) - protein synthesis capacity
“Chromatin remodeling” (p = 0.013) - epigenetic regulation

This supports the hypothesis that JMJ2/JMJ4 downregulation in Inv4m disrupts a coordinated program of epigenetic regulation and growth-related gene expression.

Save Outputs

# Save annotated table (CSV)
write_csv(
  pink_annotated %>%
    mutate(FDR = formatC(FDR_val, format = "e", digits = 2)) %>%
    select(gene, symbol, description, log2FC, FDR),
  file.path(paths$intermediate, "pink_module_annotated_DEGs.csv")
)

# Save GO enrichment
write_csv(
  pink_go %>% select(-pvalue_fmt),
  file.path(paths$intermediate, "pink_module_GO_BP.csv")
)

cat("\nSaved outputs:\n")

## 
## Saved outputs:

cat("  - pink_module_annotated_DEGs.csv\n")

##   - pink_module_annotated_DEGs.csv

cat("  - pink_module_GO_BP.csv\n")

##   - pink_module_GO_BP.csv

# Generate LaTeX table for main.tex
latex_header <- '\\begin{table}[!ht]
\\caption{\\textbf{Pink module DEGs co-expressed with \\jmjii/\\jmjiv.} Genes assigned to the pink WGCNA consensus module, which contains the JMJ2/JMJ4 histone demethylase cluster. Sorted by statistical significance (FDR). GO enrichment for this module includes ``positive regulation of growth\'\' ($p = 0.0017$) and ``ribosome biogenesis\'\' ($p = 0.0003$).}
\\label{tab::pink_module}
\\centering
\\small
\\begin{tabular}{@{}lllrr@{}}
\\toprule
\\textbf{Gene ID} & \\textbf{Label} & \\textbf{Description} & \\textbf{log$_2$FC} & \\textbf{FDR} \\\\
\\midrule'

latex_footer <- '\\bottomrule
\\end{tabular}
\\end{table}'

# Format each row
format_latex_row <- function(gene, symbol, desc, log2fc, fdr) {
  # Format label
  label <- if (is.na(symbol) || symbol == "" || symbol == "—") {
    "---"
  } else {
    paste0("\\textit{", symbol, "}")
  }

  # Format description
  desc_clean <- if (is.na(desc) || desc == "" || desc == "—") {
    "---"
  } else {
    str_trunc(desc, 40) %>%
      str_replace_all("_", "\\\\_") %>%
      str_replace_all("&", "\\\\&") %>%
      str_replace_all("%", "\\\\%")
  }

  # Format log2FC with sign
  log2fc_fmt <- if (log2fc >= 0) {
    paste0("$+", sprintf("%.2f", log2fc), "$")
  } else {
    paste0("$", sprintf("%.2f", log2fc), "$")
  }

  # Format FDR in scientific notation
  fdr_exp <- floor(log10(fdr))
  fdr_mant <- fdr / 10^fdr_exp
  fdr_fmt <- paste0("$", sprintf("%.1f", fdr_mant), " \\times 10^{", fdr_exp, "}$")

  paste(gene, label, desc_clean, log2fc_fmt, fdr_fmt, sep = " & ", collapse = "") %>%
    paste0(" \\\\")
}

# Generate all rows
latex_rows <- pink_annotated %>%
  rowwise() %>%
  mutate(
    latex_row = format_latex_row(gene, symbol, description, log2FC, FDR_val)
  ) %>%
  pull(latex_row)

# Combine into full table
latex_table <- paste(
  latex_header,
  paste(latex_rows, collapse = "\n"),
  latex_footer,
  sep = "\n"
)

# Save to tables folder
writeLines(latex_table, file.path(paths$tables, "pink_module_DEGs.tex"))

cat("\nLaTeX table saved:\n")

## 
## LaTeX table saved:

cat("  - tables/pink_module_DEGs.tex\n")

##   - tables/pink_module_DEGs.tex

Session Info

sessionInfo()

## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.2
## 
## Matrix products: default
## BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] C.UTF-8/C.UTF-8/C.UTF-8/C/C.UTF-8/C.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] knitr_1.51      here_1.0.2      lubridate_1.9.4 forcats_1.0.1  
##  [5] stringr_1.6.0   dplyr_1.1.4     purrr_1.2.0     readr_2.1.6    
##  [9] tidyr_1.3.2     tibble_3.3.0    ggplot2_4.0.1   tidyverse_2.0.0
## 
## loaded via a namespace (and not attached):
##  [1] bit_4.6.0          gtable_0.3.6       jsonlite_2.0.0     crayon_1.5.3      
##  [5] compiler_4.5.2     tidyselect_1.2.1   parallel_4.5.2     dichromat_2.0-0.1 
##  [9] jquerylib_0.1.4    scales_1.4.0       yaml_2.3.12        fastmap_1.2.0     
## [13] R6_2.6.1           generics_0.1.4     rprojroot_2.1.1    tzdb_0.5.0        
## [17] bslib_0.9.0        pillar_1.11.1      RColorBrewer_1.1-3 rlang_1.1.6       
## [21] stringi_1.8.7      cachem_1.1.0       xfun_0.55          sass_0.4.10       
## [25] S7_0.2.1           bit64_4.6.0-1      otel_0.2.0         timechange_0.3.0  
## [29] cli_3.6.5          withr_3.0.2        magrittr_2.0.4     digest_0.6.39     
## [33] grid_4.5.2         vroom_1.6.7        hms_1.1.4          lifecycle_1.0.4   
## [37] vctrs_0.6.5        evaluate_1.0.5     glue_1.8.0         farver_2.1.2      
## [41] rmarkdown_2.30     tools_4.5.2        pkgconfig_2.0.3    htmltools_0.5.9

JMJ Pink Module Characterization

Analysis Pipeline

2026-01-28