BLAST+ megablast hits of chromosomal knob repeats (knob180 M32524.1 and TR-1 AY083942.1) plotted along chromosome 4 of four Zea genomes. Vertical lines mark inversion breakpoint boundaries from AnchorWave genome alignments.
This generates Figure 1 Panel B (repeat annotation) of the inversion paper in two variants:
BLAST searches were run with
scripts/02_genomics_foundation/run_repeat_annotation_4genomes.sh.
library(dplyr)
library(ggplot2)
library(scales)
library(cowplot)
blastcols <- c("qseqid", "sseqid", "pident", "length", "mismatch",
"gapopen", "qstart", "qend", "sstart", "send",
"evalue", "bitscore")
genomes <- c("TIL18", "PT", "Mi21", "B73")
read_blast <- function(rep_label, prefix) {
files <- setNames(
file.path(paths$intermediate, paste0(prefix, "_", genomes, ".blast")),
genomes)
lapply(genomes, function(g) {
blast <- read.table(files[g], sep = "\t", header = FALSE,
col.names = blastcols,
colClasses = c(qseqid = "character", sseqid = "character"))
blast$genome <- g
blast
}) %>% bind_rows() %>%
mutate(bitscaled = rescale(bitscore, to = c(0, 100)), rep = rep_label)
}
to_plot <- bind_rows(
read_blast("knob180", "knob180"),
read_blast("TR-1", "TR-1")
) %>% mutate(genome = factor(genome, levels = genomes), rep = factor(rep))
to_plot %>%
group_by(genome, rep) %>%
summarise(n_hits = n(), .groups = "drop") %>%
tidyr::pivot_wider(names_from = rep, values_from = n_hits)
## # A tibble: 4 × 3
## genome knob180 `TR-1`
## <fct> <int> <int>
## 1 TIL18 158444 38277
## 2 PT 783 207
## 3 Mi21 790 5286
## 4 B73 1303 5268
All genomes are aligned so that the upstream breakpoint maps to the B73 upstream coordinate (172,882,309). This allows visual comparison of inversion extent across genomes.
bp_all <- data.frame(
genome = c("TIL18","TIL18","PT","PT","B73","B73","Mi21","Mi21"),
side = rep(c("upstream","downstream"), 4),
xpos = c(180365316, 193570651, 173486186, 186925483,
172882309, 188131461, 156752504, 167621035),
stringsAsFactors = FALSE)
b73_upstream <- 172882309
offsets <- bp_all %>% filter(side == "upstream") %>%
mutate(offset = b73_upstream - xpos) %>% select(genome, offset)
bp_aligned <- bp_all %>% left_join(offsets, by = "genome") %>%
mutate(xpos_aligned = xpos + offset)
to_plot <- to_plot %>% left_join(offsets, by = "genome") %>%
mutate(sstart_aligned = sstart + offset)
# Midlines: mean repeat position between breakpoints per genome
midlines <- lapply(unique(bp_all$genome), function(g) {
bps <- bp_all$xpos[bp_all$genome == g]
hits <- to_plot %>% filter(genome == g,
sstart >= min(bps) + 1e6, sstart <= max(bps) - 1e6)
if (nrow(hits) == 0) return(NULL)
data.frame(genome = g,
xmid = mean(hits$sstart),
xmid_aligned = mean(hits$sstart) + offsets$offset[offsets$genome == g])
}) %>% bind_rows()
bp_aligned %>% select(genome, side, xpos, xpos_aligned) %>%
tidyr::pivot_wider(names_from = side, values_from = c(xpos, xpos_aligned))
## # A tibble: 4 × 5
## genome xpos_upstream xpos_downstream xpos_aligned_upstream
## <chr> <dbl> <dbl> <dbl>
## 1 TIL18 180365316 193570651 172882309
## 2 PT 173486186 186925483 172882309
## 3 B73 172882309 188131461 172882309
## 4 Mi21 156752504 167621035 172882309
## # ℹ 1 more variable: xpos_aligned_downstream <dbl>
Faceted plot of all four genomes at native coordinates with
free_x to show each genome’s breakpoint distance from its
chromosome origin.
genome_order <- c("TIL18", "PT", "Mi21", "B73")
annotations <- bp_all %>%
mutate(genome = factor(genome, levels = genome_order))
midlines_native <- midlines %>%
mutate(genome = factor(genome, levels = genome_order))
# Chr4 lengths (bp) — from FASTA indices
# B73: Zm-B73-REFERENCE-NAM-5.0.fa.fai
# PT: Zm-PT-REFERENCE-HiLo-1.0.fa.fai (PT04)
# Mi21: ragtag.merge.fasta.fai (scf00000010_RagTag)
# TIL18: Zx-TIL18-REFERENCE-PanAnd-1.0.fa.fai
chr4_lengths <- c(TIL18 = 274445545, PT = 248418599, Mi21 = 229108085, B73 = 250330460)
chr4_ends <- data.frame(
genome = factor(names(chr4_lengths), levels = genome_order),
chr_end = chr4_lengths)
p_faceted <- to_plot %>%
mutate(genome = factor(genome, levels = genome_order)) %>%
ggplot(aes(x = sstart, y = bitscaled, color = rep)) +
ylab("Normalized Repeat Match Score") +
geom_vline(data = midlines_native, aes(xintercept = xmid),
color = "gray55", linewidth = 1.2) +
geom_vline(data = annotations, aes(xintercept = xpos),
color = "purple4", linewidth = 1.2) +
geom_vline(data = chr4_ends, aes(xintercept = chr_end),
color = "black", linewidth = 0.8) +
geom_point(size = 0.8) +
facet_wrap(~ genome, nrow = 4, ncol = 1, scales = "free_x") +
scale_color_manual(
values = c("knob180" = "gray50", "TR-1" = "gray80"),
labels = c("knob180" = "knob 180", "TR-1" = "TR-1")) +
guides(color = guide_legend(override.aes = list(size = 5))) +
scale_x_continuous(labels = unit_format(unit = "", scale = 1e-6),
breaks = seq(0, 275e6, by = 25e6)) +
annotate("segment", x = -Inf, xend = Inf, y = -Inf, yend = -Inf) +
annotate("segment", x = -Inf, xend = -Inf, y = -Inf, yend = Inf) +
theme_minimal(base_size = 20) +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(hjust = 0.8),
strip.background = element_blank(),
legend.position = "top",
legend.title = element_blank())
p_faceted
Color palette, axis config, and SVG canvas dimensions. Layout helper
functions are in fig1_panel_helpers.R.
# --- Color palette ---
col_inv4m <- "purple4"
col_knob <- "#00954A"
col_gold <- "gold"
col_gray <- "gray50"
col_ancestry <- "gold"
lw <- 1.2
cfg <- list(
xlim = c(150e6, 235e6),
xbreaks = seq(150e6, 225e6, by = 25e6))
ENABLE_CONNECTORS <- TRUE
# --- SVG canvas dimensions ---
svg_standalone <- list(W = 648, H = 432, lw = 2.56) # 9x6 inch standalone
svg_combined <- list(W = 725.76, H = 576, lw = 2.56) # Panel B viewport in 18x8
# Active svg/xtick_x — set by build_repeat_panel() per variant
svg <- svg_standalone
xtick_x <- c("175" = 160.11, "200" = 306.61, "225" = 453.10, "250" = 599.59)
# --- Load panel helpers (source into current environment so helpers see our variables) ---
source(here("scripts", "inversion_paper", "fig1_panel_helpers.R"), local = TRUE)
variant_pt <- list(
genomes = c("TIL18", "PT", "B73"),
svg = svg_standalone,
xtick_x = c("175" = 160.11, "200" = 306.61, "225" = 453.10, "250" = 599.59),
vlines = list(
TIL18 = list(up_x = 147.70, dn_x = 225.08, top_y = 57.75, bot_y = 140.45),
PT = list(up_x = 147.70, dn_x = 226.45, top_y = 170.33, bot_y = 253.03),
B73 = list(up_x = 147.70, dn_x = 237.06, top_y = 282.91, bot_y = 351.81)),
mid_x = list(TIL18 = 168.76, PT = 170.64, B73 = 214.67),
pairs = list(
list(top = "TIL18", bot = "PT", inverted = FALSE),
list(top = "PT", bot = "B73", inverted = TRUE)),
genome_labels = list(
list(name = "TIL18", y = 0.835, desc_prefix = "teosinte", desc_italic = "mexicana", italic_offset = 0.125),
list(name = "PT", y = 0.574, desc = "highland maize"),
list(name = "B73", y = 0.314, desc = NULL)),
ancestry = c("mexicana", "mexicana", "b73"))
make_pt <- build_repeat_panel(variant_pt)
make_pt(TRUE)
ggsave(file.path(paths$figures, "fig1_panel_B_skeleton.svg"),
plot = make_pt(FALSE), width = 9, height = 6, device = "svg",
fix_text_size = TRUE)
ggsave(file.path(paths$figures, "fig1_panel_B_skeleton.png"),
plot = make_pt(TRUE), width = 9, height = 6, dpi = 300, bg = "white")
variant_mi21 <- list(
genomes = c("TIL18", "Mi21", "B73"),
svg = svg_combined,
xtick_x = c("150" = 95.49, "175" = 265.18, "200" = 434.87, "225" = 604.56),
vlines = list(
TIL18 = list(up_x = 250.80, dn_x = 340.44, top_y = 103.13, bot_y = 194.99),
Mi21 = list(up_x = 250.80, dn_x = 324.58, top_y = 253.24, bot_y = 345.09),
B73 = list(up_x = 250.80, dn_x = 354.31, top_y = 403.35, bot_y = 495.20)),
mid_x = list(TIL18 = 275.19, Mi21 = 277.22, B73 = 328.37),
pairs = list(
list(top = "TIL18", bot = "Mi21", inverted = FALSE),
list(top = "Mi21", bot = "B73", inverted = TRUE)),
genome_labels = list(
list(name = "TIL18", y = 0.835, desc_prefix = "teosinte", desc_italic = "mexicana", italic_offset = 0.125),
list(name = "Mi21 NIL", y = 0.574, desc = "highland maize"),
list(name = "B73", y = 0.314, desc = NULL)),
introgression = list(mi21_left = 155.45, mi21_right = 190.79,
b73_left = 157.01, b73_right = 195.90),
ancestry = c("mexicana", "nil", "b73"))
make_mi21 <- build_repeat_panel(variant_mi21)
make_mi21(TRUE)
ggsave(file.path(paths$figures, "fig1_panel_B_mi21_skeleton.svg"),
plot = make_mi21(FALSE), width = 9, height = 6, device = "svg",
fix_text_size = TRUE)
ggsave(file.path(paths$figures, "fig1_panel_B_mi21_skeleton.png"),
plot = make_mi21(TRUE), width = 9, height = 6, dpi = 300, bg = "white")
LAST self-vs-self dotplots at upstream and downstream Inv4m
breakpoint segments. Dotplots were generated by
scripts/02_genomics_foundation/run_breakpoint_dotplots_4genomes.sh.
The figure uses three genomes matching the Panel B repeat annotation variant: TIL18, Mi21 NIL, B73.
library(png)
panel_c_genomes <- c("TIL18", "Mi21", "B73")
panel_c_coords <- list(
TIL18 = list(up = "180269950-180365316", down = "193570651-193734606"),
Mi21 = list(up = "156519809-156752504", down = "167621035-167839092"),
B73 = list(up = "172561959-172882309", down = "188131461-188220418"))
panel_c_labels <- c(TIL18 = "TIL18", Mi21 = "Mi21 NIL", B73 = "B73")
seg_size_kb <- function(range_str) {
parts <- as.numeric(strsplit(range_str, "-")[[1]])
round((parts[2] - parts[1]) / 1e3)
}
# Rotate PNG array 90 degrees counter-clockwise
rotate_ccw <- function(img) {
aperm(img[, rev(seq_len(ncol(img))), , drop = FALSE], c(2, 1, 3))
}
read_dotplot <- function(genome, direction) {
fname <- sprintf("%s_brkpt_%s.png", genome, direction)
fpath <- file.path(paths$intermediate, fname)
if (!file.exists(fpath)) stop("Missing: ", fpath)
rotate_ccw(readPNG(fpath))
}
make_dotplot_cell <- function(img, size_kb) {
ggdraw() +
draw_image(img, x = 0, y = 0.1, width = 1, height = 0.9) +
draw_label(paste0(size_kb, " kb"), x = 0.5, y = 0.06, size = 14, color = "gray30")
}
# Mi21 PNGs are named by B73-sense orientation but the inversion flips
# upstream/downstream, so swap which file goes in which column
png_dir <- list(
TIL18 = c(up = "up", down = "down"),
Mi21 = c(up = "down", down = "up"),
B73 = c(up = "up", down = "down"))
cells <- list()
for (g in panel_c_genomes) {
for (dir in c("up", "down")) {
img <- read_dotplot(g, png_dir[[g]][dir])
coord_key <- dir
size <- seg_size_kb(panel_c_coords[[g]][[coord_key]])
cells[[paste0(g, "_", dir)]] <- make_dotplot_cell(img, size)
}
}
# Assemble 3x2 grid with genome labels
rows <- lapply(panel_c_genomes, function(g) {
label <- ggdraw() + draw_label(panel_c_labels[g], size = 24, fontface = "bold", angle = 90)
plot_grid(label, cells[[paste0(g, "_up")]], cells[[paste0(g, "_down")]],
nrow = 1, rel_widths = c(0.08, 0.46, 0.46))
})
col_headers <- plot_grid(
ggdraw(),
ggdraw() + draw_label("Upstream", size = 24, fontface = "bold", y = 0.5),
ggdraw() + draw_label("Downstream", size = 24, fontface = "bold", y = 0.5),
nrow = 1, rel_widths = c(0.08, 0.46, 0.46))
title_row <- ggdraw() +
draw_label(expression(italic("Inv4m") ~ "Breakpoints"), x = 0.50, y = 0.5, size = 24, fontface = "bold")
p_panelc_brkpt <- plot_grid(
ggdraw(), # top spacer — align with Panel B repeat header
title_row,
rows[[1]], rows[[2]], rows[[3]],
col_headers,
ggdraw(), # bottom spacer — align with Panel B repeat x-axis
ncol = 1, rel_heights = c(0.03, 0.07, 0.26, 0.26, 0.26, 0.07, 0.06))
p_panelc_brkpt
ggsave(file.path(paths$figures, "Fig1_panel_D.png"),
plot = p_panelc_brkpt, width = 5.5, height = 8, dpi = 300, bg = "white")
ggsave(file.path(paths$figures, "Fig1_panel_D.svg"),
plot = p_panelc_brkpt, width = 5.5, height = 8, device = "svg",
fix_text_size = TRUE)
AnchorWave whole-genome alignment dotplots zoomed to chromosome 4, showing Mi21 NIL (x-axis) vs reference genomes (y-axis). The inversion appears as a reversed diagonal in B73 (standard karyotype) but not in TIL18 or PT (inverted karyotype).
Data from
nilhifimi21/anchorwave/vs_{B73,PT}/dotplot.tab, filtered to
chr4 vs scf00000010_RagTag (Mi21 chr4).
# AnchorWave dotplot format: ref_chr ref_pos query_chr query_pos strand
read_dotplot_tab <- function(file, ref_label) {
d <- read.table(file, header = FALSE, sep = "\t",
col.names = c("ref_chr", "ref_pos", "query_chr", "query_pos", "strand"))
d$ref <- ref_label
d
}
panel_a_files <- list(
B73 = file.path(paths$intermediate, "Mi21_vs_B73_chr4.tab"),
PT = file.path(paths$intermediate, "Mi21_vs_PT_chr4.tab"))
# TIL18: use real data if available, otherwise duplicate PT as placeholder
# (both carry the inversion so the dotplot pattern is similar)
til18_file <- file.path(paths$intermediate, "Mi21_vs_TIL18_chr4.tab")
if (file.exists(til18_file)) {
panel_a_files[["TIL18"]] <- til18_file
}
dotplot_data <- do.call(rbind, lapply(names(panel_a_files), function(ref) {
read_dotplot_tab(panel_a_files[[ref]], ref)
}))
# If TIL18 not available, duplicate PT data as TIL18 placeholder
if (!"TIL18" %in% dotplot_data$ref) {
til18_placeholder <- dotplot_data[dotplot_data$ref == "PT", ]
til18_placeholder$ref <- "TIL18"
dotplot_data <- rbind(til18_placeholder, dotplot_data)
}
# Order top to bottom: TIL18, PT, B73
dotplot_data$ref <- factor(dotplot_data$ref, levels = c("TIL18", "PT", "B73"))
cat("References available:", paste(levels(dotplot_data$ref), collapse = ", "), "\n")
## References available: TIL18, PT, B73
cat("Anchor points per reference:\n")
## Anchor points per reference:
print(table(dotplot_data$ref))
##
## TIL18 PT B73
## 6100 3131 2641
# Color by strand: + = blue (collinear), - = red (inverted)
# Zoom: 25 Mb window per genome, centered on each genome's Inv4m midpoint
zoom_half <- 20
# Introgression boundaries
# B73 coords from RNAseq genotype manual inspection (Section 3.2)
# Mi21 coords from AnchorWave coordinate conversion
introg_mi21 <- c(139.32, 174.66) # Mi21 native Mb (x-axis)
introg_b73 <- c(157.01, 195.90) # B73 Mb (y-axis)
# Mi21 introgression center (x-axis, shared)
mi21_center <- mean(introg_mi21) # ~155.5
# Reference centers (y-axis, per genome)
# B73: center on introgression; TIL18/PT: center on inversion
ref_centers <- c(
TIL18 = (180365316 + 193570651) / 2 / 1e6, # 187.0
PT = (173486186 + 186925483) / 2 / 1e6, # 180.2
B73 = mean(introg_b73)) # 174.5
# Build one dotplot per genome with coord_fixed and proper zoom
make_dotplot_facet <- function(genome, show_xlab = FALSE) {
d <- dotplot_data[dotplot_data$ref == genome, ]
yc <- ref_centers[genome]
p <- ggplot(d, aes(x = query_pos / 1e6, y = ref_pos / 1e6, color = strand)) +
{ if (genome == "B73")
annotate("rect",
xmin = introg_mi21[1], xmax = introg_mi21[2],
ymin = introg_b73[1], ymax = introg_b73[2],
fill = NA, color = "black", linewidth = 0.8, linetype = "dashed") } +
geom_point(size = 0.5) +
scale_color_manual(values = c("+" = "blue", "-" = "red"), guide = "none") +
coord_fixed(ratio = 1,
xlim = c(mi21_center - zoom_half, mi21_center + zoom_half),
ylim = c(yc - zoom_half, yc + zoom_half)) +
labs(x = NULL, y = NULL) +
theme_minimal(base_size = 14) +
theme(panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
panel.grid.minor = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.margin = margin(4, 2, 4, 2))
p
}
# Stack with genome labels on the left — flush left to align with "C" label
panel_a_plots <- lapply(c("TIL18", "PT", "B73"), function(g) {
label <- ggdraw() + draw_label(g, size = 24, fontface = "bold", angle = 90, x = 0.6)
plot_grid(label, make_dotplot_facet(g, show_xlab = (g == "B73")),
nrow = 1, rel_widths = c(0.08, 0.92))
})
p_panela <- plot_grid(
ggdraw(), # top spacer — align with Panel B repeat header
panel_a_plots[[1]], panel_a_plots[[2]], panel_a_plots[[3]],
ggdraw() + draw_label("Mi21 NIL", size = 24, fontface = "bold", x = 0.55),
ggdraw(), # bottom spacer — align with Panel B repeat x-axis
ncol = 1, rel_heights = c(0.25, 0.70, 0.70, 0.70, 0.15, 0.20))
p_panela
ggsave(file.path(paths$figures, "Fig1_panel_C.png"),
plot = p_panela, width = 5.5, height = 8, dpi = 300, bg = "white")
ggsave(file.path(paths$figures, "Fig1_panel_C.svg"),
plot = p_panela, width = 5.5, height = 8, device = "svg",
fix_text_size = TRUE)
Assemble the three panels side by side. Panel B (repeat annotation) is wide, Panel C (AnchorWave dotplots) is narrow, Panel D (breakpoint dotplots) is medium — placing the two dotplot panels adjacent.
# Build combined figure helper (skeleton vs points for SVG vs PNG)
# Sigmoids are calibrated at the combined 18x7 canvas, not the standalone 9x6.
build_fig1_top <- function(show_points, w = 18, h = 7) {
# Panel B WITHOUT sigmoids (pass ENABLE_CONNECTORS=FALSE via target dims trick)
p_repeat <- make_mi21(show_points, target_width = 0, target_height = 0)
fig1_grid <- plot_grid(
p_repeat,
p_panela,
p_panelc_brkpt,
nrow = 1,
rel_widths = c(0.62, 0.13, 0.25))
label_y <- 0.965
base <- ggdraw(fig1_grid) +
draw_label("C", x = 0.62, y = label_y, size = 45, fontface = "bold",
hjust = 0.2, vjust = 1) +
draw_label("D", x = 0.755, y = label_y, size = 45, fontface = "bold",
hjust = 0.2, vjust = 1)
# Calibrate x-axis break labels at the combined canvas size
xbreaks <- calibrate_xbreaks(base, w, h,
variant_mi21$genomes[3])
for (b in xbreaks) base <- base + b
# Calibrate sigmoids at the combined canvas size
if (ENABLE_CONNECTORS) {
sigs <- calibrate_sigmoids(base, w, h,
variant_mi21$pairs, variant_mi21$genomes,
variant_mi21$introgression)
for (s in sigs) base <- base + s
}
base
}
build_fig1_top(TRUE)
ggsave(file.path(paths$figures, "Fig1_top_BCD.png"),
plot = build_fig1_top(TRUE), width = 18, height = 7, dpi = 300, bg = "white")
ggsave(file.path(paths$figures, "Fig1_top_BCD.svg"),
plot = build_fig1_top(FALSE), width = 18, height = 7, device = "svg",
fix_text_size = TRUE)
saveRDS(build_fig1_top(TRUE), file.path(paths$intermediate, "fig1_panel_BCD.rds"))
sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.3
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] C.UTF-8/C.UTF-8/C.UTF-8/C/C.UTF-8/C.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] png_0.1-8 cowplot_1.2.0 scales_1.4.0 ggplot2_4.0.1 dplyr_1.1.4
## [6] here_1.0.2
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 jsonlite_2.0.0 compiler_4.5.2 Rcpp_1.1.0
## [5] tidyselect_1.2.1 magick_2.9.0 dichromat_2.0-0.1 tidyr_1.3.2
## [9] jquerylib_0.1.4 textshaping_1.0.4 systemfonts_1.3.1 yaml_2.3.12
## [13] fastmap_1.2.0 R6_2.6.1 labeling_0.4.3 generics_0.1.4
## [17] knitr_1.51 tibble_3.3.0 rprojroot_2.1.1 svglite_2.2.2
## [21] bslib_0.9.0 pillar_1.11.1 RColorBrewer_1.1-3 rlang_1.1.6
## [25] utf8_1.2.6 cachem_1.1.0 xfun_0.55 sass_0.4.10
## [29] S7_0.2.1 otel_0.2.0 cli_3.6.5 withr_3.0.2
## [33] magrittr_2.0.4 digest_0.6.39 grid_4.5.2 lifecycle_1.0.4
## [37] vctrs_0.6.5 evaluate_1.0.5 glue_1.8.0 farver_2.1.2
## [41] ragg_1.5.0 purrr_1.2.0 rmarkdown_2.30 tools_4.5.2
## [45] pkgconfig_2.0.3 htmltools_0.5.9