Overview

This analysis examines internode length measurements from the NC2025 field experiment to characterize the effect of the Inv4m introgression on plant architecture.

Research Questions: 1. Does Inv4m affect internode length patterns along the stem? 2. Does Inv4m affect the total number of nodes? 3. How do internode profiles relate to overall plant height?

1. Load Libraries and Data

library(tidyverse)
library(rstatix)
library(ggpubr)
library(ggtext)
library(ggbeeswarm)
library(broom)
library(patchwork)
# Load field metadata
field_data <- read_csv(file.path(paths$data, "CLY25_Inv4m.csv"),
                       na = c("", "NA", NA))

# Extract metadata columns
metadata <- field_data %>%
  select(plant:group, X_pos, Y_pos)

# Load internode measurements
phytomere <- read_csv(file.path(paths$data, "CLY25_Inv4m_Internode.csv"),
                      na = c("", "na", "NA", NA))

# Load dissection validation data
dissection <- read_csv(file.path(paths$data, "CLY25_Inv4m_dissection.csv"))

cat("Field data dimensions:", dim(field_data), "\n")
## Field data dimensions: 442 23
cat("Internode measurements:", nrow(phytomere), "records\n")
## Internode measurements: 9005 records
cat("Dissection records:", nrow(dissection), "records\n")
## Dissection records: 835 records

2. Data Processing

# Join metadata with internode measurements
phytomere_processed <- metadata %>%
  inner_join(phytomere, by = "plant") %>%
  rename(genotype = "inv4m_gt") %>%
  filter(!is.na(length)) %>%
  filter(!(grepl("no tassel", notes))) %>%
  group_by(plant) %>%
  arrange(-internode) %>%
  mutate(from_top = row_number()) %>%
  ungroup()

# Filter by donor (focus on MI21 and TMEX with ≤10 internodes from top)
phytomere_processed <- phytomere_processed %>%
  filter(donor == "MI21" | (donor == "TMEX" & from_top <= 10))

# Create genotype labels for plotting
phytomere_processed <- phytomere_processed %>%
  mutate(
    Genotype_label = case_when(
      genotype == "CTRL" ~ "CTRL",
      genotype == "INV4M" ~ "<i>Inv4m</i>",
      TRUE ~ genotype
    )
  )

cat("\nProcessed data dimensions:", dim(phytomere_processed), "\n")
## 
## Processed data dimensions: 4952 19
cat("Plants:", n_distinct(phytomere_processed$plant), "\n")
## Plants: 424
cat("Genotypes:", paste(unique(phytomere_processed$genotype), collapse = ", "), "\n")
## Genotypes: CTRL, INV4M
cat("Donors:", paste(unique(phytomere_processed$donor), collapse = ", "), "\n")
## Donors: MI21, TMEX

3. Summary Statistics

# Summary by genotype and donor
summary_by_genotype <- phytomere_processed %>%
  group_by(donor, genotype) %>%
  summarise(
    n_plants = n_distinct(plant),
    n_internodes = n(),
    mean_length = mean(length, na.rm = TRUE),
    sd_length = sd(length, na.rm = TRUE),
    .groups = "drop"
  )

cat("Summary by genotype and donor:\n")
## Summary by genotype and donor:
print(summary_by_genotype)
## # A tibble: 4 × 6
##   donor genotype n_plants n_internodes mean_length sd_length
##   <chr> <chr>       <int>        <int>       <dbl>     <dbl>
## 1 MI21  CTRL          112         1556        13.1      5.47
## 2 MI21  INV4M         116         1607        12.9      5.23
## 3 TMEX  CTRL          101          936        16.3      4.69
## 4 TMEX  INV4M          95          853        17.6      4.61
# Node count per plant
node_count <- phytomere_processed %>%
  group_by(donor, genotype, plant) %>%
  summarise(n_nodes = n(), .groups = "drop")

cat("\nNode count summary:\n")
## 
## Node count summary:
node_count %>%
  group_by(donor, genotype) %>%
  summarise(
    n_plants = n(),
    mean_nodes = mean(n_nodes),
    sd_nodes = sd(n_nodes),
    .groups = "drop"
  ) %>%
  print()
## # A tibble: 4 × 5
##   donor genotype n_plants mean_nodes sd_nodes
##   <chr> <chr>       <int>      <dbl>    <dbl>
## 1 MI21  CTRL          112      13.9     0.842
## 2 MI21  INV4M         116      13.9     0.857
## 3 TMEX  CTRL          101       9.27    0.999
## 4 TMEX  INV4M          95       8.98    0.668

4. Internode Length Profiles

4.1 Theme and Color Palette

# Color palette (gold for CTRL, purple for Inv4m)
pal <- c("CTRL" = "gold", "<i>Inv4m</i>" = "purple4", "INV4M" = "purple4")

# Phenotype theme (matching ionome style)
pheno_theme <- theme_classic(base_size = 16) +
  theme(
    plot.title = element_markdown(hjust = 0.5, face = "bold"),
    axis.title.y = element_markdown(),
    axis.title.x = element_markdown(),
    axis.text.x = element_text(face = "bold", color = "black"),
    strip.background = element_blank(),
    strip.text = element_markdown(size = 14),
    legend.position = "top",
    legend.title = element_blank()
  )

4.2 Line Plot by Position from Top

plot_from_top_lines <- phytomere_processed %>%
  mutate(Genotype_label = factor(Genotype_label, levels = c("CTRL", "<i>Inv4m</i>"))) %>%
  arrange(Genotype_label, length) %>%
  ggplot(aes(x = factor(from_top), y = length, color = Genotype_label, group = plant)) +
  geom_line(alpha = 0.6) +
  geom_point(data = phytomere_processed %>%
               filter(has_ear == 1) %>%
               mutate(Genotype_label = factor(Genotype_label, levels = c("CTRL", "<i>Inv4m</i>"))),
             aes(x = factor(from_top), y = length, fill = Genotype_label),
             size = 3,
             shape = 21,
             color = "white",
             stroke = 1) +
  xlab("Internode position from top") +
  ylab("Internode length (cm)") +
  scale_x_discrete(limits = rev) +
  scale_color_manual(values = pal) +
  scale_fill_manual(values = pal) +
  facet_wrap(~ donor, nrow = 2) +
  labs(title = "**Internode Length Profiles by Position from Top**",
       subtitle = "Filled points indicate ear node position") +
  pheno_theme +
  theme(strip.background = element_blank())

print(plot_from_top_lines)

# Save figure
ggsave(file.path(paths$figures, "internode_profiles_from_top.pdf"),
       plot_from_top_lines, width = 12, height = 10)
ggsave(file.path(paths$figures, "internode_profiles_from_top.png"),
       plot_from_top_lines, width = 12, height = 10, dpi = 300)

4.3 Boxplot by Position from Top

plot_from_top_boxplot <- phytomere_processed %>%
  mutate(Genotype_label = factor(Genotype_label, levels = c("CTRL", "<i>Inv4m</i>"))) %>%
  ggplot(aes(x = factor(from_top), y = length, color = Genotype_label)) +
  geom_boxplot(width = 0.6, position = position_dodge(width = 0.7)) +
  xlab("Internode position from top") +
  ylab("Internode length (cm)") +
  scale_color_manual(values = pal) +
  scale_x_discrete(limits = rev) +
  facet_wrap(~ donor, nrow = 2) +
  labs(title = "**Internode Length Distribution by Position**") +
  pheno_theme +
  theme(strip.background = element_blank())

print(plot_from_top_boxplot)

ggsave(file.path(paths$figures, "internode_boxplot_by_position.pdf"),
       plot_from_top_boxplot, width = 12, height = 10)
ggsave(file.path(paths$figures, "internode_boxplot_by_position.png"),
       plot_from_top_boxplot, width = 12, height = 10, dpi = 300)

5. Node Count Analysis

# Statistical tests: comparing genotypes WITHIN each donor
stat_test <- node_count %>%
  group_by(donor) %>%
  t_test(n_nodes ~ genotype) %>%
  add_significance("p") %>%
  add_xy_position(x = "genotype")

cat("Statistical test results by donor:\n")
## Statistical test results by donor:
print(stat_test)
## # A tibble: 2 × 14
##   donor .y.     group1 group2    n1    n2 statistic    df     p p.signif
##   <chr> <chr>   <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>   
## 1 MI21  n_nodes CTRL   INV4M    112   116     0.350  226. 0.727 ns      
## 2 TMEX  n_nodes CTRL   INV4M    101    95     2.39   176. 0.018 *       
## # ℹ 4 more variables: y.position <dbl>, groups <named list>, xmin <dbl>,
## #   xmax <dbl>
# Summary statistics for reporting
node_summary_stats <- node_count %>%
  group_by(donor, genotype) %>%
  get_summary_stats(n_nodes, type = "mean_sd")
cat("\nSummary statistics:\n")
## 
## Summary statistics:
print(node_summary_stats)
## # A tibble: 4 × 6
##   donor genotype variable     n  mean    sd
##   <chr> <chr>    <fct>    <dbl> <dbl> <dbl>
## 1 MI21  CTRL     n_nodes    112 13.9  0.842
## 2 MI21  INV4M    n_nodes    116 13.9  0.857
## 3 TMEX  CTRL     n_nodes    101  9.27 0.999
## 4 TMEX  INV4M    n_nodes     95  8.98 0.668
# Create plot with significance brackets
node_count_plot <- node_count %>%
  mutate(
    Genotype_label = case_when(
      genotype == "CTRL" ~ "CTRL",
      genotype == "INV4M" ~ "<i>Inv4m</i>",
      TRUE ~ genotype
    ),
    Genotype_label = factor(Genotype_label, levels = c("CTRL", "<i>Inv4m</i>"))
  ) %>%
  ggplot(aes(x = Genotype_label, y = n_nodes, color = Genotype_label)) +
  geom_boxplot(width = 0.4, linewidth = 1, outlier.shape = NA) +
  geom_beeswarm(size = 2.5, alpha = 0.7) +
  scale_color_manual(values = pal) +
  facet_wrap(~ donor, nrow = 1) +
  labs(
    title = "**Node Count by Genotype**",
    x = NULL,
    y = "Number of nodes"
  ) +
  pheno_theme +
  theme(
    strip.background = element_blank(),
    axis.text.x = element_markdown()
  )

print(node_count_plot)

ggsave(file.path(paths$figures, "node_count_boxplot.pdf"),
       node_count_plot, width = 10, height = 8)
ggsave(file.path(paths$figures, "node_count_boxplot.png"),
       node_count_plot, width = 10, height = 8, dpi = 300)

6. Height Validation: Sum of Internodes vs Direct Measurement

# Calculate sum of internodes per plant
sum_internodes <- phytomere_processed %>%
  group_by(plant, donor, genotype) %>%
  summarise(
    sum_nodes = sum(length, na.rm = TRUE),
    n_nodes = n(),
    .groups = "drop"
  )

# Join with field data for direct height measurement
compare_height <- sum_internodes %>%
  inner_join(field_data %>% select(plant, PH, X_pos, Y_pos), by = "plant") %>%
  filter(!is.na(PH), !is.na(sum_nodes))

# Linear regression
height_model <- lm(sum_nodes ~ PH, data = compare_height)
r_squared <- summary(height_model)$r.squared
slope <- coef(height_model)[2]
intercept <- coef(height_model)[1]

cat("\nHeight comparison regression:\n")
## 
## Height comparison regression:
cat(sprintf("y = %.2fx + %.2f\n", slope, intercept))
## y = -0.10x + 188.45
cat(sprintf("R² = %.3f\n", r_squared))
## R² = 0.005
# Create comparison plot
equation_text <- sprintf("y = %.2fx + %.2f\nR² = %.3f", slope, intercept, r_squared)

height_comparison_plot <- compare_height %>%
  mutate(
    Genotype_label = case_when(
      genotype == "CTRL" ~ "CTRL",
      genotype == "INV4M" ~ "<i>Inv4m</i>",
      TRUE ~ genotype
    )
  ) %>%
  ggplot(aes(x = PH, y = sum_nodes, color = Genotype_label)) +
  geom_point(size = 3, alpha = 0.7) +
  geom_smooth(method = "lm", se = TRUE, color = "black", linetype = "dashed") +
  geom_abline(slope = 1, intercept = 0, color = "red", linetype = "solid", linewidth = 1) +
  annotate("text",
           x = min(compare_height$PH, na.rm = TRUE) +
             0.05 * diff(range(compare_height$PH, na.rm = TRUE)),
           y = max(compare_height$sum_nodes, na.rm = TRUE) -
             0.1 * diff(range(compare_height$sum_nodes, na.rm = TRUE)),
           label = equation_text,
           hjust = 0, vjust = 1, size = 5) +
  labs(
    title = "**Plant Height Comparison**",
    subtitle = "Red line = 1:1 reference; Dashed = regression fit",
    x = "Direct height measurement (cm)",
    y = "Sum of internode lengths (cm)"
  ) +
  scale_color_manual(values = pal) +
  pheno_theme

print(height_comparison_plot)

ggsave(file.path(paths$figures, "internode_height_validation.pdf"),
       height_comparison_plot, width = 10, height = 8)
ggsave(file.path(paths$figures, "internode_height_validation.png"),
       height_comparison_plot, width = 10, height = 8, dpi = 300)

7. Dissection Validation

# Join dissection data with processed internode data
# Note: dissection uses 'from_top' column, not 'internode'
compare_dissection <- dissection %>%
  left_join(phytomere_processed, by = c("plant", "from_top")) %>%
  filter(!is.na(length), !is.na(length_dissection))

cat("Dissection comparison records:", nrow(compare_dissection), "\n")
## Dissection comparison records: 497
# Linear regression
dissection_model <- lm(length_dissection ~ length, data = compare_dissection)
slope_d <- round(coef(dissection_model)[2], 3)
intercept_d <- round(coef(dissection_model)[1], 3)
r_squared_d <- round(summary(dissection_model)$r.squared, 3)

# Create equation text
if (intercept_d >= 0) {
  equation_d <- paste0("y = ", slope_d, "x + ", intercept_d)
} else {
  equation_d <- paste0("y = ", slope_d, "x - ", abs(intercept_d))
}

dissection_plot <- compare_dissection %>%
  ggplot(aes(x = length, y = length_dissection)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  annotate("text",
           x = 15, y = 30,
           label = paste(equation_d, "\nR² =", r_squared_d),
           hjust = 1.1, vjust = 1.1,
           size = 6,
           fontface = "bold") +
  labs(
    title = "**Dissection Validation**",
    subtitle = "Comparing field measurements vs dissection measurements",
    x = "Field measurement (cm)",
    y = "Dissection measurement (cm)"
  ) +
  pheno_theme

print(dissection_plot)

ggsave(file.path(paths$figures, "internode_dissection_validation.pdf"),
       dissection_plot, width = 10, height = 8)
ggsave(file.path(paths$figures, "internode_dissection_validation.png"),
       dissection_plot, width = 10, height = 8, dpi = 300)

8. Statistical Summary

# Genotype effects on internode length by position
internode_effects <- phytomere_processed %>%
  group_by(donor, from_top) %>%
  filter(n_distinct(genotype) == 2) %>%
  summarise(
    ctrl_mean = mean(length[genotype == "CTRL"], na.rm = TRUE),
    inv4m_mean = mean(length[genotype == "INV4M"], na.rm = TRUE),
    diff = inv4m_mean - ctrl_mean,
    p_value = tryCatch(
      t.test(length ~ genotype)$p.value,
      error = function(e) NA_real_
    ),
    .groups = "drop"
  ) %>%
  mutate(
    significance = case_when(
      p_value < 0.001 ~ "***",
      p_value < 0.01 ~ "**",
      p_value < 0.05 ~ "*",
      TRUE ~ "ns"
    )
  )

cat("\nInternode length effects by position (Inv4m - CTRL):\n")
## 
## Internode length effects by position (Inv4m - CTRL):
print(internode_effects %>% filter(!is.na(p_value)))
## # A tibble: 26 × 7
##    donor from_top ctrl_mean inv4m_mean    diff  p_value significance
##    <chr>    <int>     <dbl>      <dbl>   <dbl>    <dbl> <chr>       
##  1 MI21         1      24.2       25.1  0.907  7.48e- 2 ns          
##  2 MI21         2      22.3       18.1 -4.12   2.29e-19 ***         
##  3 MI21         3      11.9       10.7 -1.12   5.66e- 7 ***         
##  4 MI21         4      12.4       11.3 -1.09   1.78e-12 ***         
##  5 MI21         5      13.0       12.4 -0.586  1.19e- 4 ***         
##  6 MI21         6      13.2       13.0 -0.152  1.99e- 1 ns          
##  7 MI21         7      13.1       13.2  0.0647 6.68e- 1 ns          
##  8 MI21         8      15.2       15.2 -0.0508 8.16e- 1 ns          
##  9 MI21         9      15.1       15.9  0.755  3.54e- 4 ***         
## 10 MI21        10      13.1       14.1  1.03   2.25e- 5 ***         
## # ℹ 16 more rows
# Export summary statistics
write_csv(summary_by_genotype, file.path(paths$intermediate, "internode_summary_by_genotype.csv"))
write_csv(node_count, file.path(paths$intermediate, "internode_node_count_by_plant.csv"))
write_csv(compare_height, file.path(paths$intermediate, "internode_height_comparison.csv"))
write_csv(internode_effects, file.path(paths$intermediate, "internode_position_effects.csv"))

cat("\nResults exported to:", paths$intermediate, "\n")
## 
## Results exported to: /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/intermediate

9. Internode-by-Position T-Tests with FDR Correction

# Hypothesis: The difference in plant height is mainly due to internode 2 (just below tassel)
# Test: t-test for each internode position, corrected for multiple testing

# Focus on MI21 donor for the main analysis
internode_ttest <- phytomere_processed %>%
  filter(donor == "MI21") %>%
  group_by(from_top) %>%
  filter(n_distinct(genotype) == 2, n() >= 6) %>%  # Ensure both genotypes present with sufficient n
  t_test(length ~ genotype, detailed = TRUE) %>%
  adjust_pvalue(method = "fdr") %>%
  add_significance("p.adj") %>%
  arrange(from_top)

cat("=== T-TESTS BY INTERNODE POSITION (MI21 donor) ===\n")
## === T-TESTS BY INTERNODE POSITION (MI21 donor) ===
cat("H₀: No difference in internode length between CTRL and Inv4m\n")
## H₀: No difference in internode length between CTRL and Inv4m
cat("FDR correction applied across all positions\n\n")
## FDR correction applied across all positions
print(internode_ttest %>%
        select(from_top, n1, n2, estimate, estimate1, estimate2, statistic, p, p.adj, p.adj.signif))
## # A tibble: 16 × 10
##    from_top    n1    n2 estimate estimate1 estimate2 statistic        p    p.adj
##       <int> <int> <int>    <dbl>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>
##  1        1   112   116  -0.907      24.2      25.1     -1.79  7.48e- 2 1.50e- 1
##  2        2   112   116   4.12       22.3      18.1      9.90  2.29e-19 3.66e-18
##  3        3   112   116   1.12       11.9      10.7      5.15  5.66e- 7 3.02e- 6
##  4        4   112   116   1.09       12.4      11.3      7.48  1.78e-12 1.42e-11
##  5        5   112   116   0.586      13.0      12.4      3.92  1.19e- 4 3.81e- 4
##  6        6   112   116   0.152      13.2      13.0      1.29  1.99e- 1 2.45e- 1
##  7        7   112   116  -0.0647     13.1      13.2     -0.429 6.68e- 1 7.13e- 1
##  8        8   112   116   0.0508     15.2      15.2      0.234 8.16e- 1 8.16e- 1
##  9        9   112   116  -0.755      15.1      15.9     -3.63  3.54e- 4 9.44e- 4
## 10       10   112   116  -1.03       13.1      14.1     -4.33  2.25e- 5 9   e- 5
## 11       11   112   116  -0.752      10.7      11.4     -3.20  1.55e- 3 3.54e- 3
## 12       12   112   116  -0.384       8.62      9       -1.39  1.65e- 1 2.4 e- 1
## 13       13   107   110  -0.341       5.82      6.16    -1.49  1.37e- 1 2.38e- 1
## 14       14    79    80  -0.0821      4.43      4.51    -0.471 6.38e- 1 7.13e- 1
## 15       15    23    21   0.364       4.17      3.81     1.32  1.94e- 1 2.45e- 1
## 16       16     3     4   1.25        4         2.75     1.99  1.49e- 1 2.38e- 1
## # ℹ 1 more variable: p.adj.signif <chr>
# Calculate effect sizes (difference and percent change)
internode_effects_detailed <- phytomere_processed %>%
  filter(donor == "MI21") %>%
  group_by(from_top) %>%
  filter(n_distinct(genotype) == 2) %>%
  summarise(
    n_ctrl = sum(genotype == "CTRL"),
    n_inv4m = sum(genotype == "INV4M"),
    ctrl_mean = mean(length[genotype == "CTRL"], na.rm = TRUE),
    inv4m_mean = mean(length[genotype == "INV4M"], na.rm = TRUE),
    diff_cm = inv4m_mean - ctrl_mean,
    pct_change = 100 * (inv4m_mean - ctrl_mean) / ctrl_mean,
    .groups = "drop"
  ) %>%
  left_join(internode_ttest %>% select(from_top, p, p.adj, p.adj.signif), by = "from_top") %>%
  arrange(from_top)

cat("\n=== EFFECT SIZES BY INTERNODE POSITION ===\n")
## 
## === EFFECT SIZES BY INTERNODE POSITION ===
print(internode_effects_detailed)
## # A tibble: 16 × 10
##    from_top n_ctrl n_inv4m ctrl_mean inv4m_mean diff_cm pct_change        p
##       <int>  <int>   <int>     <dbl>      <dbl>   <dbl>      <dbl>    <dbl>
##  1        1    112     116     24.2       25.1   0.907       3.75  7.48e- 2
##  2        2    112     116     22.3       18.1  -4.12      -18.5   2.29e-19
##  3        3    112     116     11.9       10.7  -1.12       -9.48  5.66e- 7
##  4        4    112     116     12.4       11.3  -1.09       -8.81  1.78e-12
##  5        5    112     116     13.0       12.4  -0.586      -4.51  1.19e- 4
##  6        6    112     116     13.2       13.0  -0.152      -1.16  1.99e- 1
##  7        7    112     116     13.1       13.2   0.0647      0.493 6.68e- 1
##  8        8    112     116     15.2       15.2  -0.0508     -0.334 8.16e- 1
##  9        9    112     116     15.1       15.9   0.755       5.00  3.54e- 4
## 10       10    112     116     13.1       14.1   1.03        7.88  2.25e- 5
## 11       11    112     116     10.7       11.4   0.752       7.05  1.55e- 3
## 12       12    112     116      8.62       9     0.384       4.46  1.65e- 1
## 13       13    107     110      5.82       6.16  0.341       5.86  1.37e- 1
## 14       14     79      80      4.43       4.51  0.0821      1.85  6.38e- 1
## 15       15     23      21      4.17       3.81 -0.364      -8.73  1.94e- 1
## 16       16      3       4      4          2.75 -1.25      -31.2   1.49e- 1
## # ℹ 2 more variables: p.adj <dbl>, p.adj.signif <chr>
# Summarize contribution to total height difference
total_diff <- sum(internode_effects_detailed$diff_cm, na.rm = TRUE)
internode_contribution <- internode_effects_detailed %>%
  mutate(
    contribution_cm = diff_cm,
    contribution_pct = 100 * diff_cm / total_diff
  ) %>%
  select(from_top, diff_cm, contribution_pct, p.adj, p.adj.signif) %>%
  arrange(desc(abs(contribution_pct)))

cat("\n=== CONTRIBUTION TO TOTAL HEIGHT DIFFERENCE ===\n")
## 
## === CONTRIBUTION TO TOTAL HEIGHT DIFFERENCE ===
cat(sprintf("Total height difference (sum of internode diffs): %.2f cm\n\n", total_diff))
## Total height difference (sum of internode diffs): -4.42 cm
print(internode_contribution)
## # A tibble: 16 × 5
##    from_top diff_cm contribution_pct    p.adj p.adj.signif
##       <int>   <dbl>            <dbl>    <dbl> <chr>       
##  1        2 -4.12              93.2  3.66e-18 ****        
##  2       16 -1.25              28.3  2.38e- 1 ns          
##  3        3 -1.12              25.4  3.02e- 6 ****        
##  4        4 -1.09              24.7  1.42e-11 ****        
##  5       10  1.03             -23.3  9   e- 5 ****        
##  6        1  0.907            -20.5  1.50e- 1 ns          
##  7        9  0.755            -17.1  9.44e- 4 ***         
##  8       11  0.752            -17.0  3.54e- 3 **          
##  9        5 -0.586             13.2  3.81e- 4 ***         
## 10       12  0.384             -8.68 2.4 e- 1 ns          
## 11       15 -0.364              8.24 2.45e- 1 ns          
## 12       13  0.341             -7.72 2.38e- 1 ns          
## 13        6 -0.152              3.45 2.45e- 1 ns          
## 14       14  0.0821            -1.86 7.13e- 1 ns          
## 15        7  0.0647            -1.46 7.13e- 1 ns          
## 16        8 -0.0508             1.15 8.16e- 1 ns
# Test hypothesis: Is internode 2 the main contributor?
internode2_contrib <- internode_contribution %>% filter(from_top == 2)
if (nrow(internode2_contrib) > 0) {
  cat("\n=== HYPOTHESIS TEST: Internode 2 contribution ===\n")
  cat(sprintf("Internode 2 difference: %.2f cm (%.1f%% of total)\n",
              internode2_contrib$diff_cm, internode2_contrib$contribution_pct))
  cat(sprintf("FDR-adjusted p-value: %.4f (%s)\n",
              internode2_contrib$p.adj, internode2_contrib$p.adj.signif))
}
## 
## === HYPOTHESIS TEST: Internode 2 contribution ===
## Internode 2 difference: -4.12 cm (93.2% of total)
## FDR-adjusted p-value: 0.0000 (****)
# Export results
write_csv(internode_ttest, file.path(paths$intermediate, "internode_ttest_by_position.csv"))
write_csv(internode_effects_detailed, file.path(paths$intermediate, "internode_effects_detailed.csv"))
write_csv(internode_contribution, file.path(paths$intermediate, "internode_contribution_to_height.csv"))

cat("\nResults exported to:", paths$intermediate, "\n")
## 
## Results exported to: /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/intermediate

9.1 Visualization of Significant Differences

# Create a bar plot showing internode contribution with significance markers
contribution_plot <- internode_contribution %>%
  mutate(
    from_top = factor(from_top),
    significant = p.adj < 0.05
  ) %>%
  ggplot(aes(x = from_top, y = diff_cm, fill = significant)) +
  geom_col(width = 0.7) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  geom_text(aes(label = p.adj.signif,
                y = diff_cm + sign(diff_cm) * 0.3),
            size = 6, fontface = "bold") +
  scale_fill_manual(values = c("TRUE" = "purple4", "FALSE" = "gray70"),
                    labels = c("TRUE" = "FDR < 0.05", "FALSE" = "ns"),
                    name = "Significance") +
  scale_x_discrete(limits = rev) +
  coord_flip() +
  labs(
    title = "**Internode Length Difference (*Inv4m* − CTRL)**",
    subtitle = "MI21 donor; significance after FDR correction",
    x = "Internode position (from top)",
    y = "Difference (cm)"
  ) +
  theme_classic(base_size = 18) +
  theme(
    plot.title = element_markdown(),
    plot.subtitle = element_text(size = 14),
    legend.position = "top"
  )

print(contribution_plot)

ggsave(file.path(paths$figures, "internode_contribution_barplot.pdf"),
       contribution_plot, width = 10, height = 8)
ggsave(file.path(paths$figures, "internode_contribution_barplot.png"),
       contribution_plot, width = 10, height = 8, dpi = 300)

11. Key Findings

cat("=== KEY FINDINGS ===\n\n")
## === KEY FINDINGS ===
# Node count differences
node_count_summary <- node_count %>%
  group_by(donor, genotype) %>%
  summarise(mean_nodes = mean(n_nodes), .groups = "drop") %>%
  pivot_wider(names_from = genotype, values_from = mean_nodes) %>%
  mutate(diff = INV4M - CTRL)

cat("1. Node Count Differences:\n")
## 1. Node Count Differences:
print(node_count_summary)
## # A tibble: 2 × 4
##   donor  CTRL INV4M    diff
##   <chr> <dbl> <dbl>   <dbl>
## 1 MI21  13.9  13.9  -0.0394
## 2 TMEX   9.27  8.98 -0.288
# Overall internode length effect
overall_effect <- phytomere_processed %>%
  group_by(genotype) %>%
  summarise(mean_length = mean(length, na.rm = TRUE), .groups = "drop")

cat("\n2. Overall Mean Internode Length:\n")
## 
## 2. Overall Mean Internode Length:
print(overall_effect)
## # A tibble: 2 × 2
##   genotype mean_length
##   <chr>          <dbl>
## 1 CTRL            14.3
## 2 INV4M           14.6
# Height validation
cat("\n3. Height Validation:\n")
## 
## 3. Height Validation:
cat(sprintf("   Sum of internodes explains %.1f%% of plant height variance\n", r_squared * 100))
##    Sum of internodes explains 0.5% of plant height variance

12. Session Information

sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.3.1
## 
## Matrix products: default
## BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] C.UTF-8/C.UTF-8/C.UTF-8/C/C.UTF-8/C.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] patchwork_1.3.2  broom_1.0.11     ggbeeswarm_0.7.3 ggtext_0.1.2    
##  [5] ggpubr_0.6.2     rstatix_0.7.3    lubridate_1.9.4  forcats_1.0.1   
##  [9] stringr_1.6.0    dplyr_1.1.4      purrr_1.2.0      readr_2.1.6     
## [13] tidyr_1.3.2      tibble_3.3.0     ggplot2_4.0.1    tidyverse_2.0.0 
## [17] here_1.0.2      
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6       beeswarm_0.4.0     xfun_0.55          bslib_0.9.0       
##  [5] lattice_0.22-7     tzdb_0.5.0         vctrs_0.6.5        tools_4.5.2       
##  [9] generics_0.1.4     parallel_4.5.2     pkgconfig_2.0.3    Matrix_1.7-4      
## [13] RColorBrewer_1.1-3 S7_0.2.1           lifecycle_1.0.4    compiler_4.5.2    
## [17] farver_2.1.2       textshaping_1.0.4  carData_3.0-5      litedown_0.9      
## [21] vipor_0.4.7        htmltools_0.5.9    sass_0.4.10        yaml_2.3.12       
## [25] Formula_1.2-5      pillar_1.11.1      car_3.1-3          crayon_1.5.3      
## [29] jquerylib_0.1.4    cachem_1.1.0       abind_1.4-8        nlme_3.1-168      
## [33] commonmark_2.0.0   tidyselect_1.2.1   digest_0.6.39      stringi_1.8.7     
## [37] splines_4.5.2      labeling_0.4.3     rprojroot_2.1.1    fastmap_1.2.0     
## [41] grid_4.5.2         cli_3.6.5          magrittr_2.0.4     utf8_1.2.6        
## [45] dichromat_2.0-0.1  withr_3.0.2        scales_1.4.0       backports_1.5.0   
## [49] bit64_4.6.0-1      timechange_0.3.0   rmarkdown_2.30     bit_4.6.0         
## [53] otel_0.2.0         ggsignif_0.6.4     ragg_1.5.0         hms_1.1.4         
## [57] evaluate_1.0.5     knitr_1.51         mgcv_1.9-4         markdown_2.0      
## [61] rlang_1.1.6        gridtext_0.1.5     Rcpp_1.1.0         glue_1.8.0        
## [65] xml2_1.5.1         vroom_1.6.7        jsonlite_2.0.0     R6_2.6.1          
## [69] systemfonts_1.3.1