Purpose: Identify and characterize trans-regulatory networks mediated by genes within the Inv4m introgression region. This analysis integrates differential expression results with co-expression data from MaizeNetome to construct regulatory networks.
Approach: 1. Define genomic regions (Inv4m inversion, flanking introgression, background) 2. Test for enrichment of DEGs within cis vs trans regions 3. Query MaizeNetome for reference co-expression edges involving introgressed genes 4. Calculate bipartite cis-trans co-expression network from expression data 5. Classify edges as Reference (in MaizeNetome) or Novel (not in MaizeNetome) 6. Visualize networks with igraph/visNetwork
Expected outcomes: - Identification of trans-regulatory hubs within Inv4m - Network visualization showing regulatory relationships - Statistical enrichment of DEGs by genomic region (cis vs trans) - Output files for downstream analysis and manuscript figures
library(dplyr)
library(tidyr)
library(rtracklayer)
library(GenomicRanges)
library(igraph)
library(visNetwork)
#' Format nodes for network visualization
#'
#' Creates standardized node data frame with region colors, labels, and styling.
#'
#' @param node_ids Character vector of gene IDs
#' @param effects_df Data frame with gene expression data
#' @param custom_labels Named vector of custom gene labels
#'
#' @return Data frame with formatted node attributes
format_network_nodes <- function(node_ids, effects_df, custom_labels = NULL) {
nodes <- data.frame(id = node_ids) %>%
left_join(
effects_df %>%
group_by(gene) %>%
dplyr::slice(1) %>%
ungroup() %>%
dplyr::select(gene, locus_label, logFC),
by = c("id" = "gene")
) %>%
mutate(
region = case_when(
id %in% inv4m_regulators ~ "inv4m",
id %in% flanking_regulators ~ "flank",
TRUE ~ "out"
),
upregulated = logFC > 0,
color.border = case_when(
region == "inv4m" ~ "#551A8B",
region == "flank" ~ "#FF1493",
region == "out" ~ "#FFD700"
),
color.background = case_when(
upregulated & region == "inv4m" ~ "#551A8B",
upregulated & region == "flank" ~ "#FF1493",
upregulated & region == "out" ~ "#FFD700",
!upregulated ~ "white"
),
borderWidth = 4,
label = coalesce(locus_label, id)
)
# Check for duplicate locus_label values (excluding NA)
dup_labels <- nodes %>%
filter(!is.na(locus_label), locus_label != "") %>%
group_by(locus_label) %>%
filter(n() > 1) %>%
pull(id)
if (length(dup_labels) > 0) {
nodes <- nodes %>%
mutate(label = ifelse(
id %in% dup_labels,
paste0(locus_label, " (", id, ")"),
label
))
}
if (!is.null(custom_labels) && any(names(custom_labels) %in% nodes$id)) {
cat("Custom labels found for nodes:",
sum(names(custom_labels) %in% nodes$id), "\n")
for (gene_id in names(custom_labels)) {
if (gene_id %in% nodes$id) {
nodes$label[nodes$id == gene_id] <- custom_labels[gene_id]
}
}
}
nodes$label <- paste0("<i>", nodes$label, "</i>")
nodes
}
#' Create MST visualization from edge list
#'
#' Builds minimum spanning tree and creates interactive visNetwork plot.
#'
#' @param edges Data frame with from, to, distance, r, color, weight columns
#' @param nodes Data frame with formatted node attributes
#' @param effects_df Data frame with logFC for arrow type calculation
#' @param layout igraph layout algorithm name
#' @param font_size Node label font size
#'
#' @return visNetwork htmlwidget
visualize_mst <- function(edges, nodes, effects_df,
layout = "layout_nicely", font_size = 20) {
# DIAGNOSTIC: Check for duplicate labels
cat("\n=== DIAGNOSTIC: Checking node labels ===\n")
cat("Total nodes:", nrow(nodes), "\n")
cat("Unique node IDs:", length(unique(nodes$id)), "\n")
cat("Unique labels:", length(unique(nodes$label)), "\n")
if (any(duplicated(nodes$label))) {
dup_labels_vec <- nodes$label[duplicated(nodes$label)]
cat("DUPLICATE LABELS FOUND:", length(dup_labels_vec), "\n")
cat("Duplicates:\n")
dup_info <- nodes %>%
filter(label %in% dup_labels_vec) %>%
dplyr::select(id, locus_label, label) %>%
arrange(label)
for (i in 1:nrow(dup_info)) {
cat(sprintf(" %s | locus_label: %s | label: %s\n",
dup_info$id[i],
ifelse(is.na(dup_info$locus_label[i]), "NA", dup_info$locus_label[i]),
dup_info$label[i]))
}
cat("\n")
stop("Cannot proceed with duplicate labels")
}
g <- graph_from_data_frame(
d = edges %>% dplyr::select(from, to, distance, r, color, weight),
directed = TRUE,
vertices = nodes
)
g_mst <- mst(g, weights = E(g)$distance)
cat("MST - Edges:", length(E(g_mst)), "| Nodes:", length(V(g_mst)), "\n")
cat("Sample labels:", head(V(g_mst)$label, 10), "\n")
mst_edges <- as_data_frame(g_mst, what = "edges") %>%
left_join(
effects_df %>%
group_by(gene) %>%
dplyr::slice(1) %>%
ungroup() %>%
dplyr::select(gene, logFC_from = logFC),
by = c("from" = "gene")
) %>%
left_join(
effects_df %>%
group_by(gene) %>%
dplyr::slice(1) %>%
ungroup() %>%
dplyr::select(gene, logFC_to = logFC),
by = c("to" = "gene")
) %>%
mutate(
concordant = sign(logFC_from) == sign(logFC_to),
color = ifelse(is.na(color), "lightgrey", color)
)
g_mst_vis <- g_mst %>%
set_edge_attr(
"arrows.to.type",
value = ifelse(mst_edges$concordant, "arrow", "bar")
) %>%
set_edge_attr("width", value = 3) %>%
set_edge_attr("color", value = mst_edges$color)
V(g_mst_vis)$name <- V(g_mst_vis)$label
visIgraph(g_mst_vis) %>%
visNodes(font = list(size = font_size, multi = "html")) %>%
visIgraphLayout(layout = layout) %>%
visOptions(
highlightNearest = list(enabled = TRUE, degree = 1, hover = TRUE),
nodesIdSelection = TRUE
)
}
#' Visualize full network (non-MST)
#'
#' Creates interactive network visualization for smaller networks.
#'
#' @param edges Data frame with from, to, r, color columns
#' @param nodes Data frame with formatted node attributes
#' @param effects_df Data frame with logFC for arrow type calculation
#' @param layout igraph layout algorithm name
#' @param font_size Node label font size
#'
#' @return visNetwork htmlwidget
visualize_full_network <- function(edges, nodes, effects_df,
layout = "layout_nicely", font_size = 20) {
g <- graph_from_data_frame(
d = edges %>% dplyr::select(from, to, r, color, weight = distance),
directed = TRUE,
vertices = nodes
)
cat("Full Network - Edges:", length(E(g)), "| Nodes:", length(V(g)), "\n")
cat("Sample labels:", head(V(g)$label, 10), "\n")
edge_df <- as_data_frame(g, what = "edges") %>%
left_join(
effects_df %>%
group_by(gene) %>%
dplyr::slice(1) %>%
ungroup() %>%
dplyr::select(gene, logFC_from = logFC),
by = c("from" = "gene")
) %>%
left_join(
effects_df %>%
group_by(gene) %>%
dplyr::slice(1) %>%
ungroup() %>%
dplyr::select(gene, logFC_to = logFC),
by = c("to" = "gene")
) %>%
mutate(
concordant = sign(logFC_from) == sign(logFC_to),
color = ifelse(is.na(color), "lightgrey", color)
)
g_vis <- g %>%
set_edge_attr(
"arrows.to.type",
value = ifelse(edge_df$concordant, "arrow", "bar")
) %>%
set_edge_attr("width", value = 3) %>%
set_edge_attr("color", value = edge_df$color)
V(g_vis)$name <- V(g_vis)$label
visIgraph(g_vis) %>%
visNodes(font = list(size = font_size, multi = "html")) %>%
visIgraphLayout(layout = layout) %>%
visOptions(
highlightNearest = list(enabled = TRUE, degree = 1, hover = TRUE),
nodesIdSelection = TRUE
)
}
#' Main network visualization wrapper
#'
#' Filters edges by variant, prepares nodes, and calls appropriate visualization.
#'
#' @param edges Data frame with from, to, r columns (bipartite: cis → trans)
#' @param effects_df Data frame with DEG statistics
#' @param variant Character: "full", "flanking", or "inv4m"
#' @param module_name Character: network title prefix
#' @param custom_labels Named vector of custom gene labels
#' @param edge_threshold Threshold for switching to MST
#' @param font_size Font size for labels
#'
#' @return visNetwork htmlwidget
visualize_trans_network <- function(edges,
effects_df,
variant = c("full", "flanking", "inv4m"),
module_name = "Network",
custom_labels = NULL,
edge_threshold = 200,
font_size = 20) {
variant <- match.arg(variant)
filtered_edges <- switch(
variant,
"full" = edges,
"flanking" = edges %>% filter(from %in% flanking_regulators),
"inv4m" = edges %>% filter(from %in% inv4m_regulators)
)
variant_label <- switch(
variant,
"full" = "Full Trans Co-expression",
"flanking" = "Flanking Trans Co-expression",
"inv4m" = "Inv4m Trans Co-expression"
)
title <- paste(module_name, "-", variant_label, "Network")
cat("\n===", title, "===\n")
cat("Edges:", nrow(filtered_edges), "\n")
if (nrow(filtered_edges) == 0) {
cat("No edges for this variant\n")
return(NULL)
}
node_ids <- unique(c(filtered_edges$from, filtered_edges$to))
nodes <- format_network_nodes(node_ids, effects_df, custom_labels)
cat("Nodes:", nrow(nodes), "\n")
prepared_edges <- filtered_edges %>%
mutate(
distance = 1 - abs(r),
color = ifelse(r > 0, "#4DAF4A", "#E41A1C"),
weight = abs(r)
)
if (nrow(filtered_edges) >= edge_threshold) {
cat("Using MST (edges >=", edge_threshold, ")\n")
visualize_mst(prepared_edges, nodes, effects_df, font_size = font_size)
} else {
cat("Showing full network\n")
visualize_full_network(prepared_edges, nodes, effects_df,
font_size = font_size)
}
}
v5_gff <- file.path(paths$data, "Zea_mays.Zm-B73-REFERENCE-NAM-5.0.59.chr.gff3")
v5 <- rtracklayer::import(v5_gff) %>%
subset(type == "gene" & seqnames %in% 1:10)
genes <- as.data.frame(v5)
genes$ID <- gsub("gene:", "", genes$ID)
cat("Total genes loaded:", nrow(genes), "\n")
## Total genes loaded: 43459
inv4m_start <- genes[genes$ID == "Zm00001eb190470", "start"]
inv4m_end <- genes[genes$ID == "Zm00001eb194800", "end"]
cat("Inv4m region:", inv4m_start, "-", inv4m_end, "\n")
## Inv4m region: 172883675 - 188132113
inv4m_gene_ids <- genes %>%
filter(
seqnames == 4,
start >= inv4m_start,
end <= inv4m_end
) %>%
pull(ID)
cat("Genes in Inv4m:", length(inv4m_gene_ids), "\n")
## Genes in Inv4m: 434
introgression_start <- 157012149
introgression_end <- 195900523
cis_introgression_gene_ids <- genes %>%
filter(
seqnames == 4,
start >= introgression_start,
end <= introgression_end
) %>%
pull(ID)
cat("Genes in cis introgression:", length(cis_introgression_gene_ids), "\n")
## Genes in cis introgression: 1099
flanking_introgression_gene_ids <- cis_introgression_gene_ids[
!cis_introgression_gene_ids %in% inv4m_gene_ids
]
cat("Genes in flanking regions:", length(flanking_introgression_gene_ids), "\n")
## Genes in flanking regions: 665
effects_df <- read.csv(file.path(paths$intermediate, "predictor_effects_leaf_interaction_model.csv"),
na.strings = c("NA", NA, "")) %>%
filter(predictor == "Inv4m")
curated <- read.csv(file.path(paths$data, "selected_DEGs_curated_locus_label_2.csv"),
na.strings = c("NA", NA, "")) %>%
dplyr::select(gene, locus_label)
effects_df$locus_label[effects_df$locus_label == "A0A1D6NG77"] <- NA
effects_df <- effects_df %>%
left_join(curated, by = "gene", suffix = c("", "_curated")) %>%
mutate(locus_label = coalesce(locus_label_curated, locus_label)) %>%
dplyr::select(-locus_label_curated)
curated_updates <- tibble::tribble(
~gene, ~locus_label,
"Zm00001eb384770", "zcn26",
"Zm00001eb192330", "roc4-1",
"Zm00001eb193260", "trxl2",
"Zm00001eb194300", "paspa",
"Zm00001eb213980", "dywl",
"Zm00001eb194080", "ugt85a2",
"Zm00001eb112470", "engd2",
"Zm00001eb332450", "roc4-2",
"Zm00001eb193270", "trxl5",
"Zm00001eb066620", "tipin",
"Zm00001eb241410", "etfa",
"Zm00001eb013800", "mrlk",
"Zm00001eb249540", "prpl29",
"Zm00001eb000540", "prd3",
"Zm00001eb060540", "actin-1",
"Zm00001eb192580", "map4k8",
"Zm00001eb193120", "o3l1",
"Zm00001eb043750", "cdrk9",
"Zm00001eb034810", "mgd2",
"Zm00001eb241920", "gpx1",
"Zm00001eb162710", "spx4",
"Zm00001eb038730", "pht7",
"Zm00001eb370610", "rfk1",
"Zm00001eb386270", "spx6",
"Zm00001eb335670", "sqd3",
"Zm00001eb151650", "pap1",
"Zm00001eb154650", "ppa1",
"Zm00001eb280120", "pfk1",
"Zm00001eb036910", "gpx3",
"Zm00001eb406610", "glk4",
"Zm00001eb048730", "spx2",
"Zm00001eb361620", "ppa2",
"Zm00001eb297970", "sqd2",
"Zm00001eb347070", "sqd1",
"Zm00001eb144680", "rns1",
"Zm00001eb430590", "nrx3",
"Zm00001eb247580", "ppck3",
"Zm00001eb351780", "ugp3",
"Zm00001eb222510", "pht1",
"Zm00001eb126380", "phos1",
"Zm00001eb047070", "pht2",
"Zm00001eb041390", "rns3",
"Zm00001eb116580", "spd1",
"Zm00001eb069630", "oct4",
"Zm00001eb083520", "dgd1",
"Zm00001eb130570", "sag21",
"Zm00001eb239700", "ppa3",
"Zm00001eb258130", "mgd3",
"Zm00001eb202100", "pap14",
"Zm00001eb144670", "rns2",
"Zm00001eb087720", "pht13",
"Zm00001eb277280", "gst19",
"Zm00001eb339870", "pld16",
"Zm00001eb289800", "pah1",
"Zm00001eb369590", "nrx1",
"Zm00001eb238670", "pep2"
)
effects_df <- effects_df %>%
left_join(curated_updates, by = "gene", suffix = c("", "_new")) %>%
mutate(locus_label = coalesce(locus_label_new, locus_label)) %>%
dplyr::select(-locus_label_new)
field_degs <- effects_df %>% filter(is_hiconf_DEG) %>% pull(gene) %>% unique()
cat("Total field DEGs:", length(field_degs), "\n")
## Total field DEGs: 165
cis_trans_table <- with(effects_df, table(in_cis, is_DEG))
cis_trans_test <- fisher.test(cis_trans_table)
cat("=== CIS VS TRANS ENRICHMENT ===\n")
## === CIS VS TRANS ENRICHMENT ===
print(cis_trans_table)
## is_DEG
## in_cis FALSE TRUE
## FALSE 22932 475
## TRUE 307 303
cat("\nFisher's p-value:", format(cis_trans_test$p.value, scientific = TRUE), "\n")
##
## Fisher's p-value: 1.688983e-301
cat("Odds ratio:", round(cis_trans_test$estimate, 3), "\n\n")
## Odds ratio: 47.613
inv4m_trans_table <- with(
effects_df %>% filter(!in_flank),
table(in_Inv4m, is_DEG)
)
inv4m_trans_test <- fisher.test(inv4m_trans_table)
cat("=== INV4M VS TRANS ENRICHMENT ===\n")
## === INV4M VS TRANS ENRICHMENT ===
print(inv4m_trans_table)
## is_DEG
## in_Inv4m FALSE TRUE
## FALSE 22932 475
## TRUE 125 130
cat("\nFisher's p-value:", format(inv4m_trans_test$p.value, scientific = TRUE), "\n")
##
## Fisher's p-value: 9.83221e-140
cat("Odds ratio:", round(inv4m_trans_test$estimate, 3), "\n\n")
## Odds ratio: 50.163
flanking_trans_table <- with(
effects_df %>% filter(in_flank | in_trans),
table(in_flank, is_DEG)
)
flanking_trans_test <- fisher.test(flanking_trans_table)
cat("=== FLANKING VS TRANS ENRICHMENT ===\n")
## === FLANKING VS TRANS ENRICHMENT ===
print(flanking_trans_table)
## is_DEG
## in_flank FALSE TRUE
## FALSE 22932 475
## TRUE 182 173
cat("\nFisher's p-value:", format(flanking_trans_test$p.value, scientific = TRUE), "\n")
##
## Fisher's p-value: 3.080185e-178
cat("Odds ratio:", round(flanking_trans_test$estimate, 3), "\n\n")
## Odds ratio: 45.853
inv4m_flanking_table <- with(
effects_df %>% filter(in_cis),
table(in_Inv4m, is_DEG)
)
inv4m_flanking_test <- fisher.test(inv4m_flanking_table)
cat("=== INV4M VS FLANKING ENRICHMENT ===\n")
## === INV4M VS FLANKING ENRICHMENT ===
print(inv4m_flanking_table)
## is_DEG
## in_Inv4m FALSE TRUE
## FALSE 182 173
## TRUE 125 130
cat("\nFisher's p-value:", format(inv4m_flanking_test$p.value, scientific = TRUE), "\n")
##
## Fisher's p-value: 6.225037e-01
cat("Odds ratio:", round(inv4m_flanking_test$estimate, 3), "\n\n")
## Odds ratio: 1.094
left_flanking_maizenetome <- read.csv(
file.path(paths$data, "left_MaizeNetome_coexpression_edges.csv")
)
Inv4m_maizenetome <- read.csv(
file.path(paths$data, "inv4m_MaizeNetome_coexpression_edges.csv")
)
right_flanking_maizenetome <- read.csv(
file.path(paths$data, "right_MaizeNetome_coexpression_edges.csv")
)
MaizeNetome <- rbind(
left_flanking_maizenetome %>%
separate_longer_delim(cols = Direct.Connetation.Node, delim = "\n") %>%
mutate(network = "coexpression"),
Inv4m_maizenetome %>%
separate_longer_delim(cols = Direct.Connetation.Node, delim = "\n") %>%
mutate(network = "coexpression"),
right_flanking_maizenetome %>%
separate_longer_delim(cols = Direct.Connetation.Node, delim = "\n") %>%
mutate(network = "coexpression")
) %>%
dplyr::rename(Direct.Connection = Direct.Connetation.Node)
cat("Total MaizeNetome edges:", nrow(MaizeNetome), "\n")
## Total MaizeNetome edges: 640599
v4_v5 <- read.table(
file = file.path(paths$data, "B73v4_to_B73v5.tsv"),
sep = "\t",
header = FALSE
) %>%
dplyr::rename(v4 = V1, v5 = V2) %>%
separate_longer_delim(cols = v5, delim = ",")
cat("v4 to v5 mappings:", nrow(v4_v5), "\n")
## v4 to v5 mappings: 43680
MaizeNetome_v5 <- MaizeNetome %>%
left_join(v4_v5, by = c("Element.ID" = "v4")) %>%
dplyr::rename(Element.ID.v5 = v5) %>%
filter(!is.na(Element.ID.v5))
MaizeNetome_v5 <- MaizeNetome_v5 %>%
left_join(v4_v5, by = c("Direct.Connection" = "v4")) %>%
dplyr::rename(Direct.Connection.v5 = v5) %>%
filter(!is.na(Direct.Connection.v5))
cat("Edges after v4->v5 conversion:", nrow(MaizeNetome_v5), "\n")
## Edges after v4->v5 conversion: 767533
cis_degs <- effects_df %>%
filter(in_cis == TRUE, is_hiconf_DEG == TRUE) %>%
arrange(adj.P.Val) %>%
dplyr::select(gene:desc_merged, logFC, adj.P.Val) %>%
inner_join(v4_v5, by = c(gene = "v5")) %>%
group_by(gene) %>%
arrange(adj.P.Val)
trans_degs <- effects_df %>%
filter(in_trans == TRUE, is_hiconf_DEG == TRUE) %>%
arrange(adj.P.Val) %>%
dplyr::select(gene:desc_merged, logFC, adj.P.Val) %>%
inner_join(v4_v5, by = c(gene = "v5")) %>%
group_by(gene) %>%
arrange(adj.P.Val)
cat("Cis genes:", length(unique(cis_degs$gene)), "\n")
## Cis genes: 107
cat("Trans genes:", length(unique(trans_degs$gene)), "\n")
## Trans genes: 39
cis_genes_all <- cis_degs$gene %>% sort() %>% unique()
trans_genes_all <- trans_degs$gene %>% sort() %>% unique()
inv4m_regulators <- cis_genes_all[cis_genes_all %in% inv4m_gene_ids]
flanking_regulators <- cis_genes_all[cis_genes_all %in% flanking_introgression_gene_ids]
cat("Inv4m regulators:", length(inv4m_regulators), "\n")
## Inv4m regulators: 37
cat("Flanking regulators:", length(flanking_regulators), "\n")
## Flanking regulators: 70
reference_edges_maizenetome <- MaizeNetome_v5 %>%
filter(
Element.ID.v5 %in% cis_genes_all,
Direct.Connection.v5 %in% trans_genes_all
) %>%
dplyr::rename(
cis_gene = Element.ID.v5,
trans_gene = Direct.Connection.v5
) %>%
dplyr::select(cis_gene, trans_gene, network)
cat("Reference edges from MaizeNetome:", nrow(reference_edges_maizenetome), "\n")
## Reference edges from MaizeNetome: 103
voomR <- readRDS(file.path(paths$intermediate, "normalized_expression_voom_object_leaf_trt.rds"))
cat("Expression matrix:", nrow(voomR$E), "genes x", ncol(voomR$E), "samples\n")
## Expression matrix: 24249 genes x 43 samples
linked <- read.table(file.path(paths$intermediate, "inv4m_linked_genes_to_filter.txt"), header = TRUE)
cat("LD-linked genes loaded:", nrow(linked), "\n")
## LD-linked genes loaded: 58
trans_genes_filtered <- trans_genes_all[!trans_genes_all %in% linked$gene_id]
cat("Trans genes before LD filtering:", length(trans_genes_all), "\n")
## Trans genes before LD filtering: 39
cat("Trans genes after LD filtering:", length(trans_genes_filtered), "\n")
## Trans genes after LD filtering: 37
reference_edges_maizenetome <- reference_edges_maizenetome %>%
filter(trans_gene %in% trans_genes_filtered)
cat("Reference edges after LD filtering:", nrow(reference_edges_maizenetome), "\n")
## Reference edges after LD filtering: 103
cis_genes <- cis_genes_all[cis_genes_all %in% rownames(voomR$E)]
trans_genes <- trans_genes_filtered[trans_genes_filtered %in% rownames(voomR$E)]
cat("Cis genes in expression data:", length(cis_genes), "\n")
## Cis genes in expression data: 107
cat("Trans genes in expression data:", length(trans_genes), "\n")
## Trans genes in expression data: 37
expr_cis <- t(voomR$E)[, cis_genes]
expr_trans <- t(voomR$E)[, trans_genes]
corr_bipartite <- cor(expr_cis, expr_trans, use = "pairwise.complete.obs")
n_samples <- nrow(expr_cis)
p_values <- matrix(NA, nrow = nrow(corr_bipartite), ncol = ncol(corr_bipartite))
for (i in 1:nrow(corr_bipartite)) {
for (j in 1:ncol(corr_bipartite)) {
r <- corr_bipartite[i, j]
t_stat <- r * sqrt((n_samples - 2) / (1 - r^2))
p_values[i, j] <- 2 * pt(-abs(t_stat), df = n_samples - 2)
}
}
p_adj <- matrix(p.adjust(c(p_values), method = "fdr"), nrow = nrow(p_values))
corr_bipartite_filtered <- corr_bipartite
corr_bipartite_filtered[p_adj > 0.05] <- 0
cat("Significant pairs (FDR < 0.05):", sum(corr_bipartite_filtered != 0), "\n")
## Significant pairs (FDR < 0.05): 3766
edges_idx <- which(corr_bipartite_filtered != 0, arr.ind = TRUE)
bipartite_edges <- data.frame(
from = rownames(corr_bipartite_filtered)[edges_idx[, 1]],
to = colnames(corr_bipartite_filtered)[edges_idx[, 2]],
r = corr_bipartite_filtered[edges_idx]
)
cat("Bipartite network edges:", nrow(bipartite_edges), "\n")
## Bipartite network edges: 3766
# Export bipartite edges for downstream analysis
write.csv(bipartite_edges,
file.path(paths$intermediate, "inv4m_bipartite_edges.csv"),
row.names = FALSE)
cat("Exported bipartite edges to intermediate/\n")
## Exported bipartite edges to intermediate/
bipartite_edges$in_maizenetome <- mapply(
function(f, t) {
any(reference_edges_maizenetome$cis_gene == f &
reference_edges_maizenetome$trans_gene == t)
},
bipartite_edges$from,
bipartite_edges$to
)
bipartite_edges$network_module <- ifelse(
bipartite_edges$in_maizenetome,
"Reference",
"Novel"
)
cat("Edge classification:\n")
## Edge classification:
print(table(bipartite_edges$network_module))
##
## Novel Reference
## 3669 97
reference_edges <- bipartite_edges %>% filter(network_module == "Reference")
novel_edges <- bipartite_edges %>% filter(network_module == "Novel")
cat("\nReference module edges:", nrow(reference_edges), "\n")
##
## Reference module edges: 97
cat("Novel module edges:", nrow(novel_edges), "\n")
## Novel module edges: 3669
visualize_trans_network(
edges = novel_edges,
effects_df = effects_df,
variant = "full",
module_name = "NOVEL"
)
##
## === NOVEL - Full Trans Co-expression Network ===
## Edges: 3669
## Nodes: 144
## Using MST (edges >= 200 )
##
## === DIAGNOSTIC: Checking node labels ===
## Total nodes: 144
## Unique node IDs: 144
## Unique labels: 144
## MST - Edges: 143 | Nodes: 144
## Sample labels: <i>Zm00001eb186400</i> <i>Zm00001eb186410</i> <i>Zm00001eb186610</i> <i>Zm00001eb186670</i> <i>adf7</i> <i>Zm00001eb186840</i> <i>Zm00001eb186860</i> <i>msl4</i> <i>Zm00001eb187030</i> <i>gpm458</i>
visualize_trans_network(
edges = novel_edges,
effects_df = effects_df,
variant = "flanking",
module_name = "NOVEL"
)
##
## === NOVEL - Flanking Trans Co-expression Network ===
## Edges: 2399
## Nodes: 107
## Using MST (edges >= 200 )
##
## === DIAGNOSTIC: Checking node labels ===
## Total nodes: 107
## Unique node IDs: 107
## Unique labels: 107
## MST - Edges: 106 | Nodes: 107
## Sample labels: <i>Zm00001eb186400</i> <i>Zm00001eb186410</i> <i>Zm00001eb186610</i> <i>Zm00001eb186670</i> <i>adf7</i> <i>Zm00001eb186840</i> <i>Zm00001eb186860</i> <i>msl4</i> <i>Zm00001eb187030</i> <i>gpm458</i>
visualize_trans_network(
edges = novel_edges,
effects_df = effects_df,
variant = "inv4m",
module_name = "NOVEL"
)
##
## === NOVEL - Inv4m Trans Co-expression Network ===
## Edges: 1270
## Nodes: 74
## Using MST (edges >= 200 )
##
## === DIAGNOSTIC: Checking node labels ===
## Total nodes: 74
## Unique node IDs: 74
## Unique labels: 74
## MST - Edges: 73 | Nodes: 74
## Sample labels: <i>zfrvt</i> <i>Zm00001eb191000</i> <i>nactf51</i> <i>jmj2</i> <i>jmj4</i> <i>pig3</i> <i>pcna2</i> <i>Zm00001eb192120</i> <i>Zm00001eb192170</i> <i>Zm00001eb192180</i>
visualize_trans_network(
edges = reference_edges,
effects_df = effects_df,
variant = "full",
module_name = "REFERENCE"
)
##
## === REFERENCE - Full Trans Co-expression Network ===
## Edges: 97
## Nodes: 49
## Showing full network
## Full Network - Edges: 97 | Nodes: 49
## Sample labels: <i>Zm00001eb187400</i> <i>Zm00001eb187440</i> <i>smn</i> <i>Zm00001eb188030</i> <i>his2b5</i> <i>jmj21</i> <i>roc4-1</i> <i>Zm00001eb192850</i> <i>trxl2</i> <i>Zm00001eb195080</i>
visualize_trans_network(
edges = reference_edges,
effects_df = effects_df,
variant = "flanking",
module_name = "REFERENCE"
)
##
## === REFERENCE - Flanking Trans Co-expression Network ===
## Edges: 60
## Nodes: 32
## Showing full network
## Full Network - Edges: 60 | Nodes: 32
## Sample labels: <i>Zm00001eb187400</i> <i>Zm00001eb187440</i> <i>smn</i> <i>Zm00001eb188030</i> <i>his2b5</i> <i>Zm00001eb195080</i> <i>zhd18</i> <i>pin4</i> <i>his402</i> <i>Zm00001eb197300</i>
visualize_trans_network(
edges = reference_edges,
effects_df = effects_df,
variant = "inv4m",
module_name = "REFERENCE"
)
##
## === REFERENCE - Inv4m Trans Co-expression Network ===
## Edges: 37
## Nodes: 25
## Showing full network
## Full Network - Edges: 37 | Nodes: 25
## Sample labels: <i>jmj21</i> <i>roc4-1</i> <i>Zm00001eb192850</i> <i>trxl2</i> <i>pcna2</i> <i>map4k8</i> <i>ugt85a2</i> <i>jmj2</i> <i>Zm00001eb192630</i> <i>o3l1</i>
reference_genes <- unique(c(reference_edges$from, reference_edges$to))
novel_genes <- unique(c(novel_edges$from, novel_edges$to))
reference_gene_info <- effects_df %>%
filter(gene %in% reference_genes, predictor == "Inv4m") %>%
dplyr::select(gene, locus_label, desc_merged, logFC, adj.P.Val) %>%
distinct() %>%
mutate(gene_location = ifelse(gene %in% cis_genes, "cis", "trans"))
write.csv(reference_gene_info,
file.path(paths$intermediate, "reference_trans_network_genes.csv"),
row.names = FALSE)
write.csv(reference_edges,
file.path(paths$intermediate, "reference_trans_network_edges.csv"),
row.names = FALSE)
novel_gene_info <- effects_df %>%
filter(gene %in% novel_genes, predictor == "Inv4m") %>%
dplyr::select(gene, locus_label, desc_merged, logFC, adj.P.Val) %>%
distinct() %>%
mutate(gene_location = ifelse(gene %in% cis_genes, "cis", "trans"))
write.csv(novel_gene_info,
file.path(paths$intermediate, "novel_trans_network_genes.csv"),
row.names = FALSE)
write.csv(novel_edges,
file.path(paths$intermediate, "novel_trans_network_edges.csv"),
row.names = FALSE)
cat("Network modules exported to paths$intermediate\n")
## Network modules exported to paths$intermediate
Purpose: Create combined network files for downstream GO enrichment analysis.
# Combine novel and reference edges with annotations
all_edges <- bind_rows(
novel_edges %>% mutate(network_module = "Novel"),
reference_edges %>% mutate(network_module = "Reference")
) %>%
mutate(
regulator_region = case_when(
from %in% inv4m_regulators ~ "inv4m",
from %in% flanking_regulators ~ "flanking",
TRUE ~ "other"
)
)
# Extract unique regulators (cis genes) with region info
trans_regulators <- data.frame(
gene = unique(all_edges$from)
) %>%
left_join(
effects_df %>% select(gene, locus_label, locus_symbol, desc_merged, logFC, adj.P.Val),
by = "gene"
) %>%
mutate(
node_type = "regulator",
region = case_when(
gene %in% inv4m_regulators ~ "inv4m",
gene %in% flanking_regulators ~ "flanking",
TRUE ~ "other"
)
) %>%
distinct()
# Extract unique targets (trans genes)
trans_targets <- data.frame(
gene = unique(all_edges$to)
) %>%
left_join(
effects_df %>% select(gene, locus_label, locus_symbol, desc_merged, logFC, adj.P.Val),
by = "gene"
) %>%
mutate(
node_type = "target",
region = "trans"
) %>%
distinct()
# All genes in network with node type and region
trans_network_all <- bind_rows(
trans_regulators,
trans_targets
) %>%
distinct()
# Export combined files
write.csv(trans_regulators,
file.path(paths$intermediate, "trans_network_regulators.csv"),
row.names = FALSE)
write.csv(trans_targets,
file.path(paths$intermediate, "trans_network_targets.csv"),
row.names = FALSE)
write.csv(trans_network_all,
file.path(paths$intermediate, "trans_network_all_genes.csv"),
row.names = FALSE)
write.csv(all_edges,
file.path(paths$intermediate, "trans_network_edges_annotated.csv"),
row.names = FALSE)
cat("\nCombined network files exported:\n")
##
## Combined network files exported:
cat(" Regulators (cis):", nrow(trans_regulators), "\n")
## Regulators (cis): 109
cat(" Targets (trans):", nrow(trans_targets), "\n")
## Targets (trans): 37
cat(" All genes:", nrow(trans_network_all), "\n")
## All genes: 146
cat(" All edges:", nrow(all_edges), "\n")
## All edges: 3766
sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] visNetwork_2.1.4 igraph_2.2.1 rtracklayer_1.70.0
## [4] GenomicRanges_1.62.1 Seqinfo_1.0.0 IRanges_2.44.0
## [7] S4Vectors_0.48.0 BiocGenerics_0.56.0 generics_0.1.4
## [10] tidyr_1.3.1 dplyr_1.1.4 here_1.0.2
##
## loaded via a namespace (and not attached):
## [1] SummarizedExperiment_1.40.0 rjson_0.2.23
## [3] xfun_0.55 bslib_0.9.0
## [5] htmlwidgets_1.6.4 Biobase_2.70.0
## [7] lattice_0.22-7 vctrs_0.6.5
## [9] tools_4.5.2 bitops_1.0-9
## [11] curl_7.0.0 parallel_4.5.2
## [13] tibble_3.3.0 pkgconfig_2.0.3
## [15] Matrix_1.7-4 cigarillo_1.0.0
## [17] lifecycle_1.0.4 stringr_1.6.0
## [19] compiler_4.5.2 Rsamtools_2.26.0
## [21] Biostrings_2.78.0 statmod_1.5.1
## [23] codetools_0.2-20 htmltools_0.5.9
## [25] sass_0.4.10 RCurl_1.98-1.17
## [27] yaml_2.3.12 pillar_1.11.1
## [29] crayon_1.5.3 jquerylib_0.1.4
## [31] BiocParallel_1.44.0 limma_3.66.0
## [33] cachem_1.1.0 DelayedArray_0.36.0
## [35] abind_1.4-8 tidyselect_1.2.1
## [37] digest_0.6.39 stringi_1.8.7
## [39] purrr_1.2.0 restfulr_0.0.16
## [41] rprojroot_2.1.1 fastmap_1.2.0
## [43] grid_4.5.2 cli_3.6.5
## [45] SparseArray_1.10.6 magrittr_2.0.4
## [47] S4Arrays_1.10.1 XML_3.99-0.20
## [49] withr_3.0.2 rmarkdown_2.30
## [51] XVector_0.50.0 httr_1.4.7
## [53] matrixStats_1.5.0 evaluate_1.0.5
## [55] knitr_1.50 BiocIO_1.20.0
## [57] rlang_1.1.6 glue_1.8.0
## [59] jsonlite_2.0.0 R6_2.6.1
## [61] MatrixGenerics_1.22.0 GenomicAlignments_1.46.0