Purpose: Identify and visualize trans-regulatory networks controlled by genes within the Inv4m inversion region. We focus on the Inv4m-specific network (genes whose strongest regulator is within Inv4m, not flanking) and split it into Reference (previously reported in MaizeNetome) and Novel (Inv4m-specific) edges. Approach:
Key visualizations:
Output files (for GO_Enrichment_Trans_Network.Rmd):
trans_network_edges_annotated.csvtrans_network_regulators.csvtrans_network_targets.csvtrans_network_all_genes.csvlibrary(dplyr)
library(tidyr)
library(rtracklayer)
library(GenomicRanges)
library(igraph)
library(visNetwork)
#' Calculate mutual information from correlation coefficient
mutual_info <- function(r) {
0.5 * log2(1 / (1 - r^2))
}
#' Calculate standardized euclidean distance for MST construction
std_dist <- function(r_scaled, n) {
sqrt(2 * n * (1 - abs(r_scaled)))
}
#' Format nodes for network visualization
format_network_nodes <- function(node_ids, effects_df,
inv4m_regulators, flanking_regulators,
custom_labels = NULL) {
nodes <- data.frame(id = node_ids) %>%
left_join(
effects_df %>%
group_by(gene) %>%
dplyr::slice(1) %>%
ungroup() %>%
dplyr::select(gene, locus_label, logFC),
by = c("id" = "gene")
) %>%
mutate(
region = case_when(
id %in% inv4m_regulators ~ "inv4m",
id %in% flanking_regulators ~ "flank",
TRUE ~ "out"
),
upregulated = logFC > 0,
color.border = case_when(
region == "inv4m" ~ "#551A8B",
region == "flank" ~ "#FF1493",
region == "out" ~ "#FFD700"
),
color.background = case_when(
upregulated & region == "inv4m" ~ "#551A8B",
upregulated & region == "flank" ~ "#FF1493",
upregulated & region == "out" ~ "#FFD700",
!upregulated ~ "white"
),
borderWidth = 4,
label = coalesce(locus_label, id)
)
# Handle duplicate labels
dup_labels <- nodes %>%
filter(!is.na(locus_label), locus_label != "") %>%
group_by(locus_label) %>%
filter(n() > 1) %>%
pull(id)
if (length(dup_labels) > 0) {
nodes <- nodes %>%
mutate(label = ifelse(
id %in% dup_labels,
paste0(locus_label, " (", id, ")"),
label
))
}
# Apply custom labels
if (!is.null(custom_labels) && any(names(custom_labels) %in% nodes$id)) {
for (gene_id in names(custom_labels)) {
if (gene_id %in% nodes$id) {
nodes$label[nodes$id == gene_id] <- custom_labels[gene_id]
}
}
}
nodes$label <- paste0("<i>", nodes$label, "</i>")
nodes
}
#' Create MST visualization from edge list
visualize_mst <- function(edges, nodes, effects_df, font_size = 20) {
if (any(duplicated(nodes$label))) {
stop("Cannot proceed with duplicate labels")
}
g <- graph_from_data_frame(
d = edges %>% dplyr::select(from, to, r, corr_std, std_dist, mi),
directed = TRUE,
vertices = nodes
)
g_mst <- mst(g, weights = E(g)$std_dist)
cat("MST - Edges:", length(E(g_mst)), "| Nodes:", length(V(g_mst)), "\n")
mst_edges <- igraph::as_data_frame(g_mst, what = "edges") %>%
left_join(
effects_df %>%
group_by(gene) %>%
dplyr::slice(1) %>%
ungroup() %>%
dplyr::select(gene, logFC_from = logFC),
by = c("from" = "gene")
) %>%
left_join(
effects_df %>%
group_by(gene) %>%
dplyr::slice(1) %>%
ungroup() %>%
dplyr::select(gene, logFC_to = logFC),
by = c("to" = "gene")
) %>%
mutate(
is_concordant = sign(logFC_from) == sign(logFC_to),
width = mi
)
# Filter nodes to only those in MST
mst_node_ids <- unique(c(mst_edges$from, mst_edges$to))
mst_nodes <- nodes %>% filter(id %in% mst_node_ids)
mst_edges_vis <- mst_edges %>%
mutate(
arrows.to.type = ifelse(is_concordant, "arrow", "bar"),
color = "DarkGray"
) %>%
dplyr::select(from, to, width, arrows.to.type, color)
visNetwork(mst_nodes, mst_edges_vis) %>%
visNodes(font = list(size = font_size, multi = "html")) %>%
visEdges(arrows = "to") %>%
visOptions(
highlightNearest = list(enabled = TRUE, degree = 1, hover = TRUE),
nodesIdSelection = TRUE
)
}
#' Visualize full network (non-MST) for smaller networks
visualize_full_network <- function(edges, nodes, effects_df, font_size = 20) {
edge_df <- edges %>%
left_join(
effects_df %>%
group_by(gene) %>%
dplyr::slice(1) %>%
ungroup() %>%
dplyr::select(gene, logFC_from = logFC),
by = c("from" = "gene")
) %>%
left_join(
effects_df %>%
group_by(gene) %>%
dplyr::slice(1) %>%
ungroup() %>%
dplyr::select(gene, logFC_to = logFC),
by = c("to" = "gene")
) %>%
mutate(
is_concordant = sign(logFC_from) == sign(logFC_to),
arrows.to.type = ifelse(is_concordant, "arrow", "bar"),
color = "DarkGray",
width = mi
) %>%
dplyr::select(from, to, width, arrows.to.type, color)
cat("Full Network - Edges:", nrow(edge_df), "| Nodes:", nrow(nodes), "\n")
visNetwork(nodes, edge_df) %>%
visNodes(font = list(size = font_size, multi = "html")) %>%
visEdges(arrows = "to") %>%
visOptions(
highlightNearest = list(enabled = TRUE, degree = 1, hover = TRUE),
nodesIdSelection = TRUE
)
}
v5_gff <- file.path(paths$data, "Zea_mays.Zm-B73-REFERENCE-NAM-5.0.59.chr.gff3")
v5 <- rtracklayer::import(v5_gff) %>%
subset(type == "gene" & seqnames %in% 1:10)
genes <- as.data.frame(v5)
genes$ID <- gsub("gene:", "", genes$ID)
cat("Total genes loaded:", nrow(genes), "\n")
## Total genes loaded: 43459
inv4m_start <- genes[genes$ID == "Zm00001eb190470", "start"]
inv4m_end <- genes[genes$ID == "Zm00001eb194800", "end"]
cat("Inv4m region:", inv4m_start, "-", inv4m_end, "\n")
## Inv4m region: 172883675 - 188132113
inv4m_gene_ids <- genes %>%
filter(
seqnames == 4,
start >= inv4m_start,
end <= inv4m_end
) %>%
pull(ID)
cat("Genes in Inv4m:", length(inv4m_gene_ids), "\n")
## Genes in Inv4m: 434
introgression_start <- 157012149
introgression_end <- 195900523
cis_introgression_gene_ids <- genes %>%
filter(
seqnames == 4,
start >= introgression_start,
end <= introgression_end
) %>%
pull(ID)
flanking_introgression_gene_ids <- cis_introgression_gene_ids[
!cis_introgression_gene_ids %in% inv4m_gene_ids
]
cat("Genes in cis introgression:", length(cis_introgression_gene_ids), "\n")
## Genes in cis introgression: 1099
cat("Genes in flanking regions:", length(flanking_introgression_gene_ids), "\n")
## Genes in flanking regions: 665
effects_df <- read.csv(
file.path(paths$intermediate, "predictor_effects_leaf_interaction_model.csv"),
na.strings = c("NA", NA, "")
) %>%
filter(predictor == "Inv4m")
# Load consolidated labels from master file (includes curated + symbol + LLM-proposed)
master_labels <- read.csv(
file.path(paths$data, "locus_labels_master.csv"),
na.strings = c("NA", NA, "")
) %>%
dplyr::select(gene, locus_label)
effects_df <- effects_df %>%
dplyr::select(-any_of("locus_label")) %>%
left_join(master_labels, by = "gene")
field_degs <- effects_df %>%
filter(is_hiconf_DEG) %>%
pull(gene) %>%
unique()
cat("Total high-confidence DEGs:", length(field_degs), "\n")
## Total high-confidence DEGs: 155
left_flanking_maizenetome <- read.csv(
file.path(paths$data, "left_MaizeNetome_coexpression_edges.csv")
)
Inv4m_maizenetome <- read.csv(
file.path(paths$data, "inv4m_MaizeNetome_coexpression_edges.csv")
)
right_flanking_maizenetome <- read.csv(
file.path(paths$data, "right_MaizeNetome_coexpression_edges.csv")
)
MaizeNetome <- rbind(
left_flanking_maizenetome %>%
separate_longer_delim(cols = Direct.Connetation.Node, delim = "\n") %>%
mutate(network = "coexpression"),
Inv4m_maizenetome %>%
separate_longer_delim(cols = Direct.Connetation.Node, delim = "\n") %>%
mutate(network = "coexpression"),
right_flanking_maizenetome %>%
separate_longer_delim(cols = Direct.Connetation.Node, delim = "\n") %>%
mutate(network = "coexpression")
) %>%
dplyr::rename(Direct.Connection = Direct.Connetation.Node)
cat("Total MaizeNetome edges:", nrow(MaizeNetome), "\n")
## Total MaizeNetome edges: 640599
v4_v5 <- read.table(
file = file.path(paths$data, "B73v4_to_B73v5.tsv"),
sep = "\t",
header = FALSE
) %>%
dplyr::rename(v4 = V1, v5 = V2) %>%
separate_longer_delim(cols = v5, delim = ",")
MaizeNetome_v5 <- MaizeNetome %>%
left_join(v4_v5, by = c("Element.ID" = "v4")) %>%
dplyr::rename(Element.ID.v5 = v5) %>%
filter(!is.na(Element.ID.v5)) %>%
left_join(v4_v5, by = c("Direct.Connection" = "v4")) %>%
dplyr::rename(Direct.Connection.v5 = v5) %>%
filter(!is.na(Direct.Connection.v5))
cat("Edges after v4->v5 conversion:", nrow(MaizeNetome_v5), "\n")
## Edges after v4->v5 conversion: 767533
cis_degs <- effects_df %>%
filter(in_cis == TRUE, is_hiconf_DEG == TRUE) %>%
inner_join(v4_v5, by = c(gene = "v5")) %>%
group_by(gene) %>%
arrange(adj.P.Val) %>%
dplyr::slice(1) %>%
ungroup()
trans_degs <- effects_df %>%
filter(in_trans == TRUE, is_hiconf_DEG == TRUE) %>%
inner_join(v4_v5, by = c(gene = "v5")) %>%
group_by(gene) %>%
arrange(adj.P.Val) %>%
dplyr::slice(1) %>%
ungroup()
cis_genes_all <- cis_degs$gene %>% sort() %>% unique()
trans_genes_all <- trans_degs$gene %>% sort() %>% unique()
inv4m_regulators <- cis_genes_all[cis_genes_all %in% inv4m_gene_ids]
flanking_regulators <- cis_genes_all[cis_genes_all %in% flanking_introgression_gene_ids]
cat("Cis DEGs:", length(cis_genes_all), "\n")
## Cis DEGs: 103
cat(" Inv4m regulators:", length(inv4m_regulators), "\n")
## Inv4m regulators: 33
cat(" Flanking regulators:", length(flanking_regulators), "\n")
## Flanking regulators: 70
cat("Trans DEGs:", length(trans_genes_all), "\n")
## Trans DEGs: 34
reference_edges_maizenetome <- MaizeNetome_v5 %>%
filter(
Element.ID.v5 %in% cis_genes_all,
Direct.Connection.v5 %in% trans_genes_all
) %>%
dplyr::rename(
cis_gene = Element.ID.v5,
trans_gene = Direct.Connection.v5
) %>%
dplyr::select(cis_gene, trans_gene, network)
cat("Reference edges from MaizeNetome:", nrow(reference_edges_maizenetome), "\n")
## Reference edges from MaizeNetome: 90
voomR <- readRDS(
file.path(paths$intermediate, "normalized_expression_voom_object_leaf_trt.rds")
)
n_samples <- ncol(voomR$E)
cat("Expression matrix:", nrow(voomR$E), "genes x", n_samples, "samples\n")
## Expression matrix: 24249 genes x 43 samples
linked <- read.table(
file.path(paths$intermediate, "inv4m_linked_genes_to_filter.txt"),
header = TRUE
)
trans_genes_filtered <- trans_genes_all[!trans_genes_all %in% linked$gene_id]
cat("Trans genes after LD filtering:",
length(trans_genes_all), "->", length(trans_genes_filtered), "\n")
## Trans genes after LD filtering: 34 -> 32
reference_edges_maizenetome <- reference_edges_maizenetome %>%
filter(trans_gene %in% trans_genes_filtered)
cis_genes <- cis_genes_all[cis_genes_all %in% rownames(voomR$E)]
trans_genes <- trans_genes_filtered[trans_genes_filtered %in% rownames(voomR$E)]
cat("Genes in expression data:\n")
## Genes in expression data:
cat(" Cis:", length(cis_genes), "\n")
## Cis: 103
cat(" Trans:", length(trans_genes), "\n")
## Trans: 32
# Calculate correlations
expr_cis <- t(voomR$E)[, cis_genes]
expr_trans <- t(voomR$E)[, trans_genes]
corr_bipartite <- cor(expr_cis, expr_trans, use = "pairwise.complete.obs")
# Scaled correlations for MST
expr_cis_scaled <- scale(expr_cis)
expr_trans_scaled <- scale(expr_trans)
corr_bipartite_scaled <- cor(expr_cis_scaled, expr_trans_scaled,
use = "pairwise.complete.obs")
# Calculate p-values
p_values <- matrix(NA, nrow = nrow(corr_bipartite), ncol = ncol(corr_bipartite))
for (i in 1:nrow(corr_bipartite)) {
for (j in 1:ncol(corr_bipartite)) {
r <- corr_bipartite[i, j]
t_stat <- r * sqrt((n_samples - 2) / (1 - r^2))
p_values[i, j] <- 2 * pt(-abs(t_stat), df = n_samples - 2)
}
}
p_adj <- matrix(p.adjust(c(p_values), method = "fdr"), nrow = nrow(p_values))
# Filter by FDR
corr_bipartite_filtered <- corr_bipartite
corr_bipartite_filtered[p_adj > 0.05] <- 0
cat("Significant pairs (FDR < 0.05):", sum(corr_bipartite_filtered != 0), "\n")
## Significant pairs (FDR < 0.05): 3262
# Create edge list
edges_idx <- which(corr_bipartite_filtered != 0, arr.ind = TRUE)
bipartite_edges <- data.frame(
from = rownames(corr_bipartite_filtered)[edges_idx[, 1]],
to = colnames(corr_bipartite_filtered)[edges_idx[, 2]],
r = corr_bipartite_filtered[edges_idx],
corr_std = corr_bipartite_scaled[edges_idx]
)
cat("Bipartite network edges:", nrow(bipartite_edges), "\n")
## Bipartite network edges: 3262
Purpose: Filter to targets where the Inv4m regulator has the strongest correlation, ensuring we capture Inv4m-specific effects rather than shared flanking regulation.
# For each target, find the strongest correlated regulator
strongest_edge_per_target <- bipartite_edges %>%
mutate(abs_r = abs(r)) %>%
group_by(to) %>%
slice_max(abs_r, n = 1, with_ties = FALSE) %>%
ungroup()
# Targets whose strongest regulator is NOT in flanking region
targets_strongest_nonflanking <- strongest_edge_per_target %>%
filter(!from %in% flanking_regulators) %>%
pull(to)
# Inv4m edges to these targets
inv4m_trans_edges <- bipartite_edges %>%
filter(
to %in% targets_strongest_nonflanking,
from %in% inv4m_regulators
)
cat("\n=== Inv4m Network (Strongest Regulator Criterion) ===\n")
##
## === Inv4m Network (Strongest Regulator Criterion) ===
cat("Targets with non-flanking as strongest:", length(targets_strongest_nonflanking), "\n")
## Targets with non-flanking as strongest: 17
cat("Inv4m trans edges:", nrow(inv4m_trans_edges), "\n")
## Inv4m trans edges: 552
cat("Unique genes:", length(unique(c(inv4m_trans_edges$from, inv4m_trans_edges$to))), "\n")
## Unique genes: 50
inv4m_trans_edges$in_maizenetome <- mapply(
function(f, t) {
any(reference_edges_maizenetome$cis_gene == f &
reference_edges_maizenetome$trans_gene == t)
},
inv4m_trans_edges$from,
inv4m_trans_edges$to
)
inv4m_trans_edges$network_module <- ifelse(
inv4m_trans_edges$in_maizenetome,
"Reference",
"Novel"
)
cat("\n=== Edge Classification ===\n")
##
## === Edge Classification ===
print(table(inv4m_trans_edges$network_module))
##
## Novel Reference
## 536 16
reference_edges <- inv4m_trans_edges %>% filter(network_module == "Reference")
novel_edges <- inv4m_trans_edges %>% filter(network_module == "Novel")
cat("\nReference edges:", nrow(reference_edges), "\n")
##
## Reference edges: 16
cat("Novel edges:", nrow(novel_edges), "\n")
## Novel edges: 536
Purpose: Overview of all Inv4m trans-regulatory connections before splitting into Reference and Novel components.
# Prepare edges with distance metrics
whole_edges <- inv4m_trans_edges %>%
mutate(
std_dist = std_dist(corr_std, n_samples),
mi = mutual_info(r)
)
# Format nodes
whole_node_ids <- unique(c(whole_edges$from, whole_edges$to))
whole_nodes <- format_network_nodes(
whole_node_ids, effects_df,
inv4m_regulators, flanking_regulators
)
cat("\n=== Whole Inv4m Trans Network ===\n")
##
## === Whole Inv4m Trans Network ===
cat("Edges:", nrow(whole_edges), "\n")
## Edges: 552
cat("Nodes:", nrow(whole_nodes), "\n")
## Nodes: 50
visualize_mst(whole_edges, whole_nodes, effects_df, font_size = 20)
## MST - Edges: 49 | Nodes: 50
Purpose: Edges previously reported in MaizeNetome database. These represent conserved co-expression relationships. Key genes: jmj2, pcna2, mrlk, engd2.
if (nrow(reference_edges) > 0) {
ref_edges <- reference_edges %>%
mutate(
std_dist = std_dist(corr_std, n_samples),
mi = mutual_info(r)
)
ref_node_ids <- unique(c(ref_edges$from, ref_edges$to))
ref_nodes <- format_network_nodes(
ref_node_ids, effects_df,
inv4m_regulators, flanking_regulators
)
cat("\n=== Reference Network (MaizeNetome) ===\n")
cat("Edges:", nrow(ref_edges), "\n")
cat("Nodes:", nrow(ref_nodes), "\n")
visualize_mst(ref_edges, ref_nodes, effects_df, font_size = 25)
} else {
cat("No Reference edges found\n")
}
##
## === Reference Network (MaizeNetome) ===
## Edges: 16
## Nodes: 16
## MST - Edges: 15 | Nodes: 16
Purpose: New co-expression relationships not previously reported in MaizeNetome. These represent Inv4m-specific regulatory effects. Key genes: jmj4 neighborhood including pcna2, yhcf.
if (nrow(novel_edges) > 0) {
novel_edges_prep <- novel_edges %>%
mutate(
std_dist = std_dist(corr_std, n_samples),
mi = mutual_info(r)
)
novel_node_ids <- unique(c(novel_edges_prep$from, novel_edges_prep$to))
novel_nodes <- format_network_nodes(
novel_node_ids, effects_df,
inv4m_regulators, flanking_regulators
)
cat("\n=== Novel Network (Inv4m-Specific) ===\n")
cat("Edges:", nrow(novel_edges_prep), "\n")
cat("Nodes:", nrow(novel_nodes), "\n")
visualize_mst(novel_edges_prep, novel_nodes, effects_df, font_size = 22)
} else {
cat("No Novel edges found\n")
}
##
## === Novel Network (Inv4m-Specific) ===
## Edges: 536
## Nodes: 50
## MST - Edges: 49 | Nodes: 50
Purpose: Extract the jmj4-centered subgraph from the novel network MST. This preserves the MST structure while focusing on jmj4, engd2, and their targets (kan1, sec6, nii2, roc4-2, etc.).
# Define the jmj4 neighborhood genes (from manual selection of novel MST)
jmj4_neighborhood_labels <- c(
"jmj4", "engd2", "roc4-2", "kan1", "sec6", "nii2", "tet2", "flz22", "plat1"
)
# Genes with only assembly IDs (no locus labels)
jmj4_neighborhood_ids <- c(
"Zm00001eb132370",
"Zm00001eb193130",
"Zm00001eb192970"
)
# Get gene IDs matching labels
jmj4_genes_by_label <- effects_df %>%
filter(locus_label %in% jmj4_neighborhood_labels) %>%
pull(gene) %>%
unique()
# Combine both sources
jmj4_all_genes <- unique(c(jmj4_genes_by_label, jmj4_neighborhood_ids))
cat("jmj4 neighborhood genes:", length(jmj4_all_genes), "\n")
## jmj4 neighborhood genes: 12
if (nrow(novel_edges_prep) > 0 && length(jmj4_all_genes) > 0) {
# Build the full novel MST first
g_novel <- graph_from_data_frame(
d = novel_edges_prep %>% dplyr::select(from, to, r, corr_std, std_dist, mi),
directed = TRUE,
vertices = novel_nodes
)
g_novel_mst <- mst(g_novel, weights = E(g_novel)$std_dist)
# Extract subgraph containing jmj4 neighborhood nodes
jmj4_nodes_in_mst <- V(g_novel_mst)$name[V(g_novel_mst)$name %in% jmj4_all_genes]
if (length(jmj4_nodes_in_mst) > 0) {
g_jmj4_subgraph <- induced_subgraph(g_novel_mst, jmj4_nodes_in_mst)
# Get edges and nodes from subgraph
jmj4_mst_edges <- igraph::as_data_frame(g_jmj4_subgraph, what = "edges")
jmj4_node_ids <- V(g_jmj4_subgraph)$name
jmj4_nodes <- format_network_nodes(
jmj4_node_ids, effects_df,
inv4m_regulators, flanking_regulators
)
cat("\n=== jmj4 Neighborhood (Novel MST Subgraph) ===\n")
cat("MST Edges:", nrow(jmj4_mst_edges), "\n")
cat("Nodes:", length(jmj4_node_ids), "\n")
# Prepare edges for visualization
jmj4_vis_edges <- jmj4_mst_edges %>%
left_join(
effects_df %>% group_by(gene) %>% dplyr::slice(1) %>% ungroup() %>%
dplyr::select(gene, logFC_from = logFC),
by = c("from" = "gene")
) %>%
left_join(
effects_df %>% group_by(gene) %>% dplyr::slice(1) %>% ungroup() %>%
dplyr::select(gene, logFC_to = logFC),
by = c("to" = "gene")
) %>%
mutate(
is_concordant = sign(logFC_from) == sign(logFC_to),
arrows.to.type = ifelse(is_concordant, "arrow", "bar"),
color = "DarkGray",
width = mi
) %>%
dplyr::select(from, to, width, arrows.to.type, color)
visNetwork(jmj4_nodes, jmj4_vis_edges) %>%
visNodes(font = list(size = 25, multi = "html")) %>%
visEdges(arrows = "to") %>%
visOptions(
highlightNearest = list(enabled = TRUE, degree = 1, hover = TRUE),
nodesIdSelection = TRUE
)
} else {
cat("No jmj4 neighborhood genes found in novel MST\n")
}
} else {
cat("No novel edges or jmj4 genes to process\n")
}
##
## === jmj4 Neighborhood (Novel MST Subgraph) ===
## MST Edges: 11
## Nodes: 12
# Add regulator_region to full bipartite edges for GO enrichment
trans_network_edges_annotated <- bipartite_edges %>%
mutate(
regulator_region = case_when(
from %in% inv4m_regulators ~ "inv4m",
from %in% flanking_regulators ~ "flanking",
TRUE ~ "other"
)
) %>%
dplyr::select(from, to, r, regulator_region)
write.csv(
trans_network_edges_annotated,
file.path(paths$intermediate, "trans_network_edges_annotated.csv"),
row.names = FALSE
)
cat("=== trans_network_edges_annotated.csv ===\n")
## === trans_network_edges_annotated.csv ===
cat("Total edges:", nrow(trans_network_edges_annotated), "\n")
## Total edges: 3262
cat("By regulator_region:\n")
## By regulator_region:
print(table(trans_network_edges_annotated$regulator_region))
##
## flanking inv4m
## 2221 1041
# Regulators
trans_network_regulators <- effects_df %>%
filter(gene %in% unique(bipartite_edges$from)) %>%
group_by(gene) %>%
dplyr::slice(1) %>%
ungroup() %>%
dplyr::select(gene, locus_label, locus_symbol, desc_merged, logFC, adj.P.Val) %>%
mutate(
node_type = "regulator",
region = case_when(
gene %in% inv4m_regulators ~ "inv4m",
gene %in% flanking_regulators ~ "flanking",
TRUE ~ "other"
)
)
write.csv(
trans_network_regulators,
file.path(paths$intermediate, "trans_network_regulators.csv"),
row.names = FALSE
)
cat("\n=== trans_network_regulators.csv ===\n")
##
## === trans_network_regulators.csv ===
cat("Total:", nrow(trans_network_regulators), "\n")
## Total: 103
print(table(trans_network_regulators$region))
##
## flanking inv4m
## 70 33
# Targets
trans_network_targets <- effects_df %>%
filter(gene %in% unique(bipartite_edges$to)) %>%
group_by(gene) %>%
dplyr::slice(1) %>%
ungroup() %>%
dplyr::select(gene, locus_label, locus_symbol, desc_merged, logFC, adj.P.Val) %>%
mutate(node_type = "target", region = "trans")
write.csv(
trans_network_targets,
file.path(paths$intermediate, "trans_network_targets.csv"),
row.names = FALSE
)
cat("\n=== trans_network_targets.csv ===\n")
##
## === trans_network_targets.csv ===
cat("Total:", nrow(trans_network_targets), "\n")
## Total: 32
# All genes
trans_network_all_genes <- bind_rows(
trans_network_regulators,
trans_network_targets
) %>%
distinct(gene, .keep_all = TRUE)
write.csv(
trans_network_all_genes,
file.path(paths$intermediate, "trans_network_all_genes.csv"),
row.names = FALSE
)
cat("\n=== trans_network_all_genes.csv ===\n")
##
## === trans_network_all_genes.csv ===
cat("Total unique genes:", nrow(trans_network_all_genes), "\n")
## Total unique genes: 135
# Reference network genes and edges
reference_genes <- unique(c(reference_edges$from, reference_edges$to))
reference_gene_info <- effects_df %>%
filter(gene %in% reference_genes) %>%
group_by(gene) %>%
dplyr::slice(1) %>%
ungroup() %>%
dplyr::select(gene, locus_label, desc_merged, logFC, adj.P.Val) %>%
mutate(gene_location = ifelse(gene %in% cis_genes, "cis", "trans"))
write.csv(
reference_gene_info,
file.path(paths$intermediate, "reference_trans_network_genes.csv"),
row.names = FALSE
)
write.csv(
reference_edges,
file.path(paths$intermediate, "reference_trans_network_edges.csv"),
row.names = FALSE
)
cat("\n=== Reference Network ===\n")
##
## === Reference Network ===
cat("Genes:", nrow(reference_gene_info), "\n")
## Genes: 16
cat("Edges:", nrow(reference_edges), "\n")
## Edges: 16
# Novel network genes and edges
novel_genes <- unique(c(novel_edges$from, novel_edges$to))
novel_gene_info <- effects_df %>%
filter(gene %in% novel_genes) %>%
group_by(gene) %>%
dplyr::slice(1) %>%
ungroup() %>%
dplyr::select(gene, locus_label, desc_merged, logFC, adj.P.Val) %>%
mutate(gene_location = ifelse(gene %in% cis_genes, "cis", "trans"))
write.csv(
novel_gene_info,
file.path(paths$intermediate, "novel_trans_network_genes.csv"),
row.names = FALSE
)
write.csv(
novel_edges,
file.path(paths$intermediate, "novel_trans_network_edges.csv"),
row.names = FALSE
)
cat("\n=== Novel Network ===\n")
##
## === Novel Network ===
cat("Genes:", nrow(novel_gene_info), "\n")
## Genes: 50
cat("Edges:", nrow(novel_edges), "\n")
## Edges: 536
sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.2
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] C.UTF-8/C.UTF-8/C.UTF-8/C/C.UTF-8/C.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] visNetwork_2.1.4 igraph_2.2.1 rtracklayer_1.70.1
## [4] GenomicRanges_1.62.1 Seqinfo_1.0.0 IRanges_2.44.0
## [7] S4Vectors_0.48.0 BiocGenerics_0.56.0 generics_0.1.4
## [10] tidyr_1.3.2 dplyr_1.1.4 here_1.0.2
##
## loaded via a namespace (and not attached):
## [1] SummarizedExperiment_1.40.0 rjson_0.2.23
## [3] xfun_0.55 bslib_0.9.0
## [5] htmlwidgets_1.6.4 Biobase_2.70.0
## [7] lattice_0.22-7 vctrs_0.6.5
## [9] tools_4.5.2 bitops_1.0-9
## [11] curl_7.0.0 parallel_4.5.2
## [13] tibble_3.3.0 pkgconfig_2.0.3
## [15] Matrix_1.7-4 cigarillo_1.0.0
## [17] lifecycle_1.0.4 stringr_1.6.0
## [19] compiler_4.5.2 Rsamtools_2.26.0
## [21] Biostrings_2.78.0 statmod_1.5.1
## [23] codetools_0.2-20 htmltools_0.5.9
## [25] sass_0.4.10 RCurl_1.98-1.17
## [27] yaml_2.3.12 pillar_1.11.1
## [29] crayon_1.5.3 jquerylib_0.1.4
## [31] BiocParallel_1.44.0 limma_3.66.0
## [33] cachem_1.1.0 DelayedArray_0.36.0
## [35] abind_1.4-8 tidyselect_1.2.1
## [37] digest_0.6.39 stringi_1.8.7
## [39] purrr_1.2.0 restfulr_0.0.16
## [41] rprojroot_2.1.1 fastmap_1.2.0
## [43] grid_4.5.2 cli_3.6.5
## [45] SparseArray_1.10.8 magrittr_2.0.4
## [47] S4Arrays_1.10.1 XML_3.99-0.20
## [49] withr_3.0.2 rmarkdown_2.30
## [51] XVector_0.50.0 httr_1.4.7
## [53] matrixStats_1.5.0 otel_0.2.0
## [55] evaluate_1.0.5 knitr_1.51
## [57] BiocIO_1.20.0 rlang_1.1.6
## [59] glue_1.8.0 jsonlite_2.0.0
## [61] R6_2.6.1 MatrixGenerics_1.22.0
## [63] GenomicAlignments_1.46.0