This notebook examines how expression correlation and fold change magnitude vary along the Inv4m introgression on chromosome 4. Three questions:
library(here)
library(dplyr)
library(ggplot2)
library(zoo)
library(tidyr)
source(here("scripts", "utils", "setup_paths.R"))
paths <- setup_project_paths("inversion_paper")
effects_df <- read.csv(
file.path(paths$intermediate, "predictor_effects_leaf_interaction_model.csv")
) %>%
filter(predictor == "Inv4m")
nrow(effects_df)
## [1] 24011
effects_df$P <- effects_df$P.Value
effects_df$neglogP <- -log10(effects_df$P)
xp <- readRDS(file.path(paths$intermediate, "normalized_expression_logCPM_leaf_trt.rds"))
# Remap column names to match the original analysis
# DE script uses: in_Inv4m, in_cis (shared introgression), in_flank, in_trans
# Add convenience columns
effects_df <- effects_df %>%
mutate(
in_inv4m = in_Inv4m,
in_flanking = in_flank,
in_shared = in_cis,
region = case_when(
in_Inv4m ~ "inv4m",
in_flank ~ "flanking",
TRUE ~ "outside"
),
is_significant = adj.P.Val < 0.05
)
bonferroni_threshold <- 0.05 / nrow(effects_df)
Identify Bonferroni-significant DEGs in each region, removing duplicated gene names (e.g., JMJ paralogs mapped to the same locus label).
inv4_inv4 <- effects_df$gene[effects_df$P < bonferroni_threshold & effects_df$in_inv4m] %>%
sort() %>% unique()
inv4_flanking <- effects_df$gene[effects_df$P < bonferroni_threshold & effects_df$in_flanking] %>%
sort() %>% unique()
inv4_outside <- effects_df$gene[effects_df$P < bonferroni_threshold & !effects_df$in_shared] %>%
sort() %>% unique()
inv4_introgression <- effects_df$gene[effects_df$P < bonferroni_threshold & effects_df$in_shared] %>%
sort() %>% unique()
cat("DEGs by region (Bonferroni):\n")
## DEGs by region (Bonferroni):
cat(" Inv4m:", length(inv4_inv4), "\n")
## Inv4m: 63
cat(" Flanking:", length(inv4_flanking), "\n")
## Flanking: 91
cat(" Outside:", length(inv4_outside), "\n")
## Outside: 36
cat(" Total introgression:", length(inv4_introgression), "\n")
## Total introgression: 154
Compute pairwise expression correlations between outside DEGs and DEGs inside Inv4m or in the flanking region. This tests whether genes in one region of the introgression are more tightly co-regulated with the rest of the genome than genes in the other region.
out_inv4m <- cor(t(xp[inv4_outside, ]), t(xp[inv4_inv4, ]))
out_flanking <- cor(t(xp[inv4_outside, ]), t(xp[inv4_flanking, ]))
out_introgression <- cor(t(xp[inv4_outside, ]), t(xp[inv4_introgression, ]))
cat("Correlation matrix dimensions:\n")
## Correlation matrix dimensions:
cat(" Outside x Inv4m:", paste(dim(out_inv4m), collapse = " x "), "\n")
## Outside x Inv4m: 36 x 63
cat(" Outside x Flanking:", paste(dim(out_flanking), collapse = " x "), "\n")
## Outside x Flanking: 36 x 91
cat(" Outside x Introgression:", paste(dim(out_introgression), collapse = " x "), "\n")
## Outside x Introgression: 36 x 154
to_plot <- rbind(
data.frame(region = "inv4m", cor = as.numeric(out_inv4m)),
data.frame(region = "flanking", cor = as.numeric(out_flanking))
)
to_plot$R2 <- to_plot$cor^2
to_plot %>%
ggplot(aes(x = region, y = R2)) +
geom_boxplot() +
ylab(expression(R^2 ~ "(squared correlation with outside DEGs)")) +
ggpubr::theme_classic2(base_size = 16)
cat("=== R2 ~ region (all correlations) ===\n")
## === R2 ~ region (all correlations) ===
lm(R2 ~ region, data = to_plot) %>% summary() %>% print()
##
## Call:
## lm(formula = R2 ~ region, data = to_plot)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.57945 -0.12469 0.01374 0.14120 0.41028
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.579728 0.003350 173.076 <2e-16 ***
## regioninv4m -0.001084 0.005237 -0.207 0.836
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1917 on 5542 degrees of freedom
## Multiple R-squared: 7.725e-06, Adjusted R-squared: -0.0001727
## F-statistic: 0.04281 on 1 and 5542 DF, p-value: 0.8361
cat("\n=== R2 ~ region (negative correlations only) ===\n")
##
## === R2 ~ region (negative correlations only) ===
lm(R2 ~ region, data = to_plot[to_plot$cor < 0, ]) %>% summary() %>% print()
##
## Call:
## lm(formula = R2 ~ region, data = to_plot[to_plot$cor < 0, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5902 -0.1351 0.0143 0.1582 0.3742
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.585673 0.004742 123.510 <2e-16 ***
## regioninv4m 0.007842 0.007311 1.073 0.284
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2008 on 3093 degrees of freedom
## Multiple R-squared: 0.0003719, Adjusted R-squared: 4.867e-05
## F-statistic: 1.151 on 1 and 3093 DF, p-value: 0.2835
cat("\n=== R2 ~ region (positive correlations only) ===\n")
##
## === R2 ~ region (positive correlations only) ===
lm(R2 ~ region, data = to_plot[to_plot$cor > 0, ]) %>% summary() %>% print()
##
## Call:
## lm(formula = R2 ~ region, data = to_plot[to_plot$cor > 0, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.57048 -0.11243 0.01308 0.12417 0.41746
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.572542 0.004642 123.350 <2e-16 ***
## regioninv4m -0.013939 0.007390 -1.886 0.0594 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1787 on 2447 degrees of freedom
## Multiple R-squared: 0.001452, Adjusted R-squared: 0.001044
## F-statistic: 3.557 on 1 and 2447 DF, p-value: 0.0594
How tightly co-regulated are DEGs within each region? Compute pairwise correlations within inv4m, flanking, and outside DEGs separately.
inner_inv4m <- cor(t(xp[inv4_inv4, ]))
inner_flanking <- cor(t(xp[inv4_flanking, ]))
inner_outside <- cor(t(xp[inv4_outside, ]))
to_plot <- rbind(
data.frame(
comparison = "inv4m\noutside",
cor = out_inv4m[upper.tri(out_inv4m, diag = FALSE)]
),
data.frame(
comparison = "flanking\noutside",
cor = out_flanking[upper.tri(out_flanking, diag = FALSE)]
),
data.frame(
comparison = "inv4m\ninv4m",
cor = inner_inv4m[upper.tri(inner_inv4m, diag = FALSE)]
),
data.frame(
comparison = "flanking\nflanking",
cor = inner_flanking[upper.tri(inner_flanking, diag = FALSE)]
),
data.frame(
comparison = "outside\noutside",
cor = inner_outside[upper.tri(inner_outside, diag = FALSE)]
)
)
to_plot$R2 <- to_plot$cor^2
to_plot %>%
mutate(comparison = factor(comparison)) %>%
mutate(comparison = forcats::fct_reorder(comparison, -R2)) %>%
ggplot(aes(x = comparison, y = R2)) +
ggtitle("Correlation between Inv4m DEGs by region") +
ylab(expression(R^2 ~ "(squared correlation)")) +
geom_boxplot() +
ggpubr::theme_classic2(base_size = 16) +
theme(legend.position = "none")
lm(R2 ~ comparison, data = to_plot) %>% summary()
##
## Call:
## lm(formula = R2 ~ comparison, data = to_plot)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.58814 -0.12483 0.01714 0.14575 0.42694
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.573064 0.003024 189.521 < 2e-16 ***
## comparisonflanking\noutside 0.023088 0.004846 4.764 1.92e-06 ***
## comparisoninv4m\ninv4m 0.004948 0.005321 0.930 0.35248
## comparisoninv4m\noutside 0.018207 0.005702 3.193 0.00141 **
## comparisonoutside\noutside 0.013185 0.008281 1.592 0.11137
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1935 on 10885 degrees of freedom
## Multiple R-squared: 0.002498, Adjusted R-squared: 0.002131
## F-statistic: 6.814 on 4 and 10885 DF, p-value: 1.79e-05
For each 25-gene window along chromosome 4, compute the mean R² between genes in that window and all outside DEGs. This shows how the strength of trans-correlation varies along the introgression.
in_out_cor <- t(out_introgression)
in_out_R2 <- in_out_cor^2
rollme <- cbind(
data.frame(gene = unique(colnames(out_introgression))),
as.data.frame(in_out_cor)
)
get_midelement <- function(vec) vec[ceiling(length(vec) / 2)]
r2 <- rollapply(
rollme$gene,
width = 25,
function(x) mean(as.numeric(in_out_R2[x, ])),
by.column = FALSE
)
mid_gene <- rollapply(
rollme$gene,
width = 25,
function(x) get_midelement(x),
by.column = FALSE
)
by_window <- data.frame(r2_win = r2, gene = mid_gene)
# Derive boundaries from gene positions in the effects data
introgression_limit <- c(
min(effects_df$BP[effects_df$in_shared]),
max(effects_df$BP[effects_df$in_shared])
) / 1e6
inversion_limit <- c(
min(effects_df$BP[effects_df$in_inv4m]),
max(effects_df$BP[effects_df$in_inv4m])
) / 1e6
to_plot <- effects_df %>%
mutate(BP = BP / 1e6) %>%
inner_join(by_window, by = "gene")
to_plot %>%
ggplot(aes(x = BP, y = r2_win, col = region)) +
geom_vline(
xintercept = c(introgression_limit, inversion_limit),
linetype = "dashed"
) +
ylab(expression(R^2 ~ "with outside DEGs (25-gene window mean)")) +
xlab("Chromosome 4 Position [Mb]") +
geom_point() +
ggpubr::theme_classic2(base_size = 16) +
theme(legend.position = "top")
Separate sliding windows for upregulated and downregulated genes show how the magnitude and direction of the Inv4m effect vary along the physical map. This addresses whether expression changes are directionally biased (as in Guerrero et al. 2016, Solanum introgression lines) or symmetrically dampened.
up_genes <- effects_df %>%
filter(logFC > 0, CHR == 4) %>%
arrange(BP) %>%
distinct(gene, .keep_all = TRUE)
fc_up <- rollapply(up_genes$logFC, FUN = mean, width = 10)
mid_up <- rollapply(up_genes$gene, width = 10,
function(x) get_midelement(x), by.column = FALSE)
by_window_up <- data.frame(foldchange = fc_up, gene = mid_up)
to_plot_up <- effects_df %>%
filter(CHR == 4) %>%
arrange(BP) %>%
inner_join(by_window_up, by = "gene")
down_genes <- effects_df %>%
filter(logFC < 0, CHR == 4) %>%
arrange(BP) %>%
distinct(gene, .keep_all = TRUE)
fc_down <- rollapply(down_genes$logFC, FUN = mean, width = 10)
mid_down <- rollapply(down_genes$gene, width = 10,
function(x) get_midelement(x), by.column = FALSE)
by_window_down <- data.frame(foldchange = fc_down, gene = mid_down)
to_plot_down <- effects_df %>%
filter(CHR == 4) %>%
arrange(BP) %>%
inner_join(by_window_down, by = "gene")
pal <- c("purple4", "plum", "gold")
names(pal) <- c("inv4m", "flanking", "outside")
logFC_label <- expression(log[2]("Fold change"))
effects_df %>%
filter(CHR == 4, BP > 150e6, BP < 200e6) %>%
ggplot(aes(x = BP / 1e6, y = logFC, col = region)) +
annotate("rect", xmin = 150, xmax = 200, ymin = -2, ymax = 2,
alpha = 0.5, fill = "grey75") +
geom_point() +
scale_color_manual(values = pal) +
coord_cartesian(xlim = c(150, 200)) +
geom_line(
data = to_plot_up,
aes(x = BP / 1e6, y = foldchange), inherit.aes = FALSE,
col = "darkgreen", linewidth = 1
) +
geom_line(
data = to_plot_down,
aes(x = BP / 1e6, y = foldchange), inherit.aes = FALSE,
col = "darkred", linewidth = 1
) +
ylab(logFC_label) +
xlab("Chromosome 4 Position [Mb]") +
annotate("text", x = c(151, 151), y = c(6.5, -5),
label = c("Upregulated", "Downregulated"),
color = c("darkgreen", "darkred"),
fontface = "bold", size = 5, hjust = 0) +
geom_hline(yintercept = 0) +
ggpubr::theme_classic2(base_size = 16) +
theme(legend.position = "none")
regulation_order <- c("Downregulated", "Upregulated")
names(regulation_order) <- c("-1", "1")
to_plot <- effects_df %>%
filter(CHR == 4) %>%
mutate(
regulation = regulation_order[as.character(sign(logFC))],
zone = case_when(
in_flanking & BP < 180e6 ~ "left",
in_flanking & BP > 180e6 ~ "right",
in_inv4m ~ "inv4m",
.default = "outside"
),
region = factor(region, levels = c("inv4m", "flanking", "outside"))
)
to_plot$zone <- factor(to_plot$zone,
levels = c("left", "inv4m", "right", "outside"))
All expressed genes on chromosome 4, no up/down split.
wilcox_region <- to_plot %>%
rstatix::wilcox_test(logFC ~ region) %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance()
wilcox_region
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 logFC inv4m flanking 253 355 43890 0.634 0.634 ns
## 2 logFC inv4m outside 253 1850 217764 0.073 0.109 ns
## 3 logFC flanking outside 355 1850 303548 0.024 0.072 ns
# Manual bracket positions:
# inv4m=1, flanking=2, outside=3
# Level 1: inv4m-flanking and flanking-outside at same height
# Level 2: inv4m-outside spanning above both
ymax <- max(to_plot$logFC, na.rm = TRUE)
level1 <- ymax * 1.05
level2 <- ymax * 1.20
wilcox_region$xmin <- c(1, 1, 2.1)
wilcox_region$xmax <- c(1.9, 3, 3)
wilcox_region$y.position <- c(level1, level2, level1)
p_logfc_region <- to_plot %>%
ggplot(aes(x = region, y = logFC, color = region)) +
ggbeeswarm::geom_quasirandom() +
geom_boxplot(width = 0.25, fill = NA, color = "black", outliers = FALSE) +
scale_color_manual(values = pal) +
scale_fill_manual(values = pal) +
ggpubr::stat_pvalue_manual(wilcox_region, hide.ns = FALSE) +
scale_y_continuous(expand = expansion(mult = c(0.05, 0.20))) +
ggpubr::theme_classic2(base_size = 25) +
theme(legend.position = "none")
p_logfc_region
saveRDS(p_logfc_region, file.path(paths$intermediate, "panel_logfc_by_region.rds"))
wilcox_zone <- to_plot %>%
rstatix::wilcox_test(logFC ~ zone, ref.group = "inv4m") %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance() %>%
rstatix::add_xy_position(x = "zone", step.increase = 0.05)
wilcox_zone
## # A tibble: 3 × 13
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif y.position
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 logFC inv4m left 253 223 27437 0.606 0.814 ns 10.7
## 2 logFC inv4m right 253 132 16453 0.814 0.814 ns 11.1
## 3 logFC inv4m outside 253 1850 217764 0.073 0.219 ns 11.6
## # ℹ 3 more variables: groups <named list>, xmin <dbl>, xmax <dbl>
to_plot %>%
ggplot(aes(x = zone, y = logFC, color = region)) +
ggbeeswarm::geom_quasirandom() +
geom_boxplot(width = 0.25, fill = NA, color = "black", outliers = FALSE) +
scale_color_manual(values = pal) +
scale_fill_manual(values = pal) +
ggpubr::stat_pvalue_manual(wilcox_zone, hide.ns = TRUE) +
ggpubr::theme_classic2(base_size = 16) +
theme(legend.position = "none")
wilcox_nlp_region <- to_plot %>%
rstatix::wilcox_test(neglogP ~ region, ref.group = "inv4m") %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance() %>%
rstatix::add_xy_position(x = "region", step.increase = 0.03)
wilcox_nlp_region
## # A tibble: 2 × 13
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 neglogP inv4m flanking 253 355 43944 6.52e- 1 6.52e- 1 ns
## 2 neglogP inv4m outside 253 1850 364895 2.64e-47 5.28e-47 ****
## # ℹ 4 more variables: y.position <dbl>, groups <named list>, xmin <dbl>,
## # xmax <dbl>
to_plot %>%
ggplot(aes(x = region, y = neglogP, color = region)) +
ggbeeswarm::geom_quasirandom() +
geom_boxplot(width = 0.25, fill = NA, color = "black", outliers = FALSE) +
scale_color_manual(values = pal) +
scale_fill_manual(values = pal) +
ggpubr::stat_pvalue_manual(wilcox_nlp_region, hide.ns = TRUE) +
ggpubr::theme_classic2(base_size = 16) +
theme(legend.position = "none")
wilcox_nlp_zone <- to_plot %>%
rstatix::wilcox_test(neglogP ~ zone, ref.group = "inv4m") %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance() %>%
rstatix::add_xy_position(x = "zone", step.increase = 0.03)
wilcox_nlp_zone
## # A tibble: 3 × 13
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 neglogP inv4m left 253 223 27451 6.13e- 1 8.44e- 1 ns
## 2 neglogP inv4m right 253 132 16493 8.44e- 1 8.44e- 1 ns
## 3 neglogP inv4m outside 253 1850 364895 2.64e-47 7.92e-47 ****
## # ℹ 4 more variables: y.position <dbl>, groups <named list>, xmin <dbl>,
## # xmax <dbl>
to_plot %>%
ggplot(aes(x = zone, y = neglogP, color = region)) +
ggbeeswarm::geom_quasirandom() +
geom_boxplot(width = 0.25, fill = NA, color = "black", outliers = FALSE) +
scale_color_manual(values = pal) +
scale_fill_manual(values = pal) +
ggpubr::stat_pvalue_manual(wilcox_nlp_zone, hide.ns = TRUE) +
ggpubr::theme_classic2(base_size = 16) +
theme(legend.position = "none")
Same panels but faceted by up/down.
wilcox_region_dir <- to_plot %>%
group_by(regulation) %>%
rstatix::wilcox_test(logFC ~ region, ref.group = "inv4m") %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance() %>%
rstatix::add_xy_position(x = "region", step.increase = 0.05)
wilcox_region_dir
## # A tibble: 4 × 14
## regulation .y. group1 group2 n1 n2 statistic p p.adj
## <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 Downregulated logFC inv4m flanking 147 208 15225 9.48e- 1 9.48e- 1
## 2 Downregulated logFC inv4m outside 147 1249 39296 6.87e-30 2.75e-29
## 3 Upregulated logFC inv4m flanking 106 147 6617 4.1 e- 2 5.47e- 2
## 4 Upregulated logFC inv4m outside 106 601 46074 2.22e-13 4.44e-13
## # ℹ 5 more variables: p.adj.signif <chr>, y.position <dbl>,
## # groups <named list>, xmin <dbl>, xmax <dbl>
to_plot %>%
ggplot(aes(x = region, y = logFC, color = region)) +
ggbeeswarm::geom_quasirandom() +
geom_boxplot(width = 0.25, fill = NA, color = "black", outliers = FALSE) +
scale_color_manual(values = pal) +
scale_fill_manual(values = pal) +
ggpubr::stat_pvalue_manual(wilcox_region_dir, hide.ns = TRUE) +
facet_wrap(. ~ regulation, scales = "free_y") +
ggpubr::theme_classic2(base_size = 16) +
theme(legend.position = "none", strip.background = element_blank())
wilcox_zone_dir <- to_plot %>%
group_by(regulation) %>%
rstatix::wilcox_test(logFC ~ zone, ref.group = "inv4m") %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance() %>%
rstatix::add_xy_position(x = "zone", step.increase = 0.05)
wilcox_zone_dir
## # A tibble: 6 × 14
## regulation .y. group1 group2 n1 n2 statistic p p.adj
## <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 Downregulated logFC inv4m left 147 134 9636 7.55e- 1 8.54e- 1
## 2 Downregulated logFC inv4m right 147 74 5589 7.39e- 1 8.54e- 1
## 3 Downregulated logFC inv4m outside 147 1249 39296 6.87e-30 4.12e-29
## 4 Upregulated logFC inv4m left 106 89 3597 4 e- 3 8 e- 3
## 5 Upregulated logFC inv4m right 106 58 3020 8.54e- 1 8.54e- 1
## 6 Upregulated logFC inv4m outside 106 601 46074 2.22e-13 6.66e-13
## # ℹ 5 more variables: p.adj.signif <chr>, y.position <dbl>,
## # groups <named list>, xmin <dbl>, xmax <dbl>
to_plot %>%
ggplot(aes(x = zone, y = logFC, color = region)) +
ggbeeswarm::geom_quasirandom() +
geom_boxplot(width = 0.25, fill = NA, color = "black", outliers = FALSE) +
scale_color_manual(values = pal) +
scale_fill_manual(values = pal) +
ggpubr::stat_pvalue_manual(wilcox_zone_dir, hide.ns = TRUE) +
facet_wrap(. ~ regulation, scales = "free_y") +
ggpubr::theme_classic2(base_size = 16) +
theme(legend.position = "none", strip.background = element_blank())
wilcox_nlp_region_dir <- to_plot %>%
group_by(regulation) %>%
rstatix::wilcox_test(neglogP ~ region, ref.group = "inv4m") %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance() %>%
rstatix::add_xy_position(x = "region", step.increase = 0.03)
wilcox_nlp_region_dir
## # A tibble: 4 × 14
## regulation .y. group1 group2 n1 n2 statistic p p.adj
## <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 Downregulated neglogP inv4m flanking 147 208 15676 6.84e- 1 6.84e- 1
## 2 Downregulated neglogP inv4m outside 147 1249 144021 1.39e-29 5.56e-29
## 3 Upregulated neglogP inv4m flanking 106 147 7161 2.73e- 1 3.64e- 1
## 4 Upregulated neglogP inv4m outside 106 601 50482 7.35e-22 1.47e-21
## # ℹ 5 more variables: p.adj.signif <chr>, y.position <dbl>,
## # groups <named list>, xmin <dbl>, xmax <dbl>
to_plot %>%
ggplot(aes(x = region, y = neglogP, color = region)) +
ggbeeswarm::geom_quasirandom() +
geom_boxplot(width = 0.25, fill = NA, color = "black", outliers = FALSE) +
scale_color_manual(values = pal) +
scale_fill_manual(values = pal) +
ggpubr::stat_pvalue_manual(wilcox_nlp_region_dir, hide.ns = TRUE) +
facet_wrap(. ~ regulation) +
ggpubr::theme_classic2(base_size = 16) +
theme(legend.position = "none", strip.background = element_blank())
wilcox_nlp_zone_dir <- to_plot %>%
group_by(regulation) %>%
rstatix::wilcox_test(neglogP ~ zone, ref.group = "inv4m") %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance() %>%
rstatix::add_xy_position(x = "zone", step.increase = 0.03)
to_plot %>%
ggplot(aes(x = zone, y = neglogP, color = region)) +
ggbeeswarm::geom_quasirandom() +
geom_boxplot(width = 0.25, fill = NA, color = "black", outliers = FALSE) +
scale_color_manual(values = pal) +
scale_fill_manual(values = pal) +
ggpubr::stat_pvalue_manual(wilcox_nlp_zone_dir, hide.ns = TRUE) +
facet_wrap(. ~ regulation) +
ggpubr::theme_classic2(base_size = 16) +
theme(legend.position = "none", strip.background = element_blank())
with(effects_df %>% filter(is_significant), table(region))
## region
## flanking inv4m outside
## 158 114 193
sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.3.1
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] C.UTF-8/C.UTF-8/C.UTF-8/C/C.UTF-8/C.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] tidyr_1.3.2 zoo_1.8-15 ggplot2_4.0.1 dplyr_1.1.4 here_1.0.2
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.6 sass_0.4.10 generics_0.1.4 rstatix_0.7.3
## [5] lattice_0.22-7 digest_0.6.39 magrittr_2.0.4 evaluate_1.0.5
## [9] grid_4.5.2 RColorBrewer_1.1-3 fastmap_1.2.0 rprojroot_2.1.1
## [13] jsonlite_2.0.0 backports_1.5.0 Formula_1.2-5 purrr_1.2.0
## [17] scales_1.4.0 jquerylib_0.1.4 abind_1.4-8 cli_3.6.5
## [21] rlang_1.1.6 withr_3.0.2 cachem_1.1.0 yaml_2.3.12
## [25] otel_0.2.0 ggbeeswarm_0.7.3 tools_4.5.2 ggsignif_0.6.4
## [29] ggpubr_0.6.2 forcats_1.0.1 broom_1.0.11 vctrs_0.6.5
## [33] R6_2.6.1 lifecycle_1.0.4 car_3.1-3 vipor_0.4.7
## [37] beeswarm_0.4.0 pkgconfig_2.0.3 pillar_1.11.1 bslib_0.9.0
## [41] gtable_0.3.6 glue_1.8.0 xfun_0.55 tibble_3.3.0
## [45] tidyselect_1.2.1 knitr_1.51 dichromat_2.0-0.1 farver_2.1.2
## [49] htmltools_0.5.9 rmarkdown_2.30 carData_3.0-5 labeling_0.4.3
## [53] compiler_4.5.2 S7_0.2.1