library(tidyverse)
library(lubridate)
library(ggrepel)

# Display label mapping (input files retain original names)
site_labels <- c("PSU2022" = "PA2022", "PSU2025" = "PA2025", "CLY2025" = "NC2025")

Function Definitions

calculate_corn_gdd <- function(tmax, tmin) {
  capped_tmax <- pmin(tmax, 30)
  
  daily_gdd <- ((capped_tmax + tmin) / 2) - 10
  
  pmax(daily_gdd, 0)
}
read_nasa_temp <- function(filepath) {
  data <- read_csv(
    filepath,
    skip = 9,
    col_types = cols(
      YEAR = col_integer(),
      MO = col_integer(),
      DY = col_integer(),
      HR = col_integer(),
      T2M = col_double()
    )
  )
  
  data <- data %>%
    mutate(
      date = make_date(YEAR, MO, DY),
      datetime = make_datetime(YEAR, MO, DY, HR)
    ) %>%
    filter(T2M != -999)
  
  return(data)
}
calculate_daily_gdd_from_hourly <- function(hourly_data) {
  daily_temps <- hourly_data %>%
    group_by(date) %>%
    summarise(
      tmax = max(T2M, na.rm = TRUE),
      tmin = min(T2M, na.rm = TRUE),
      .groups = "drop"
    )
  
  daily_temps <- daily_temps %>%
    mutate(gdd = calculate_corn_gdd(tmax, tmin))
  
  return(daily_temps)
}

Define Environments

# NASA POWER temperature data files (need to be placed in data/)
environments <- tibble(
  environment = c("PSU2025", "CLY2025", "PSU2022"),
  planting_date = mdy(c("5/20/2025", "4/04/2025", "5/26/2022")),
  temp_file = c(
    file.path(paths$data, "POWER_PSU2025.csv"),
    file.path(paths$data, "POWER_CLY2025.csv"),
    file.path(paths$data, "POWER_PSU2022.csv")
  )
)

# Verify input files exist
missing_files <- environments$temp_file[!file.exists(environments$temp_file)]
if (length(missing_files) > 0) {
  warning("Missing temperature files:\n",
          paste(" -", missing_files, collapse = "\n"),
          "\nDownload from NASA POWER and place in: ", paths$data)
}

Process Temperature Data

gdd_data <- environments %>%
  mutate(
    temp_data = map(temp_file, read_nasa_temp),
    daily_gdd = map(temp_data, calculate_daily_gdd_from_hourly)
  ) %>%
  select(environment, planting_date, daily_gdd) %>%
  unnest(daily_gdd) %>%
  mutate(display_env = site_labels[environment])
hourly_temp_100_dap <- environments %>%
  mutate(temp_data = map(temp_file, read_nasa_temp)) %>%
  select(environment, planting_date, temp_data) %>%
  unnest(temp_data) %>%
  mutate(days_after_planting = as.integer(date - planting_date)) %>%
  filter(days_after_planting >= 0 & days_after_planting <= 100)

# Export to data/ (used by GxE temperature sensitivity analysis)
hourly_output <- file.path(paths$data, "hourly_temp_100_DAP.csv")
write_csv(hourly_temp_100_dap, hourly_output)

cat("Exported hourly_temp_100_DAP.csv to:", hourly_output, "\n")
## Exported hourly_temp_100_DAP.csv to: /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/data/hourly_temp_100_DAP.csv
cat("Rows:", nrow(hourly_temp_100_dap), "\n")
## Rows: 7272
gdd_data <- gdd_data %>%
  mutate(
    days_after_planting = as.integer(date - planting_date),
    julian_day = yday(date)
  ) %>%
  filter(days_after_planting >= 0 &  days_after_planting <= 100) %>%
  group_by(environment) %>%
  mutate(cumulative_gdd_from_planting = cumsum(gdd)) %>%
  ungroup()

Prepare GDD Lookup for Phenology Data

gdd_at_phenology <- gdd_data %>%
  select(environment, date, cumulative_gdd_from_planting)

cat("GDD lookup table prepared with", nrow(gdd_at_phenology), "rows\n")
## GDD lookup table prepared with 303 rows
cat("Environments:", paste(site_labels[unique(gdd_at_phenology$environment)], collapse = ", "), "\n")
## Environments: PA2025, NC2025, PA2022

Summary Statistics

gdd_summary <- gdd_data %>%
  group_by(environment) %>%
  summarise(
    start_date = min(date),
    end_date = max(date),
    total_gdd = max(cumulative_gdd_from_planting),
    mean_daily_gdd = mean(gdd),
    days_tracked = n(),
    .groups = "drop"
  )

print(gdd_summary)
## # A tibble: 3 × 6
##   environment start_date end_date   total_gdd mean_daily_gdd days_tracked
##   <chr>       <date>     <date>         <dbl>          <dbl>        <int>
## 1 CLY2025     2025-04-04 2025-07-13     1243.           12.3          101
## 2 PSU2022     2022-05-26 2022-09-03     1204.           11.9          101
## 3 PSU2025     2025-05-20 2025-08-28     1141.           11.3          101

Visualization

ggplot(gdd_data, aes(x = date, y = gdd, color = display_env)) +
  geom_line(alpha = 0.7) +
  labs(
    title = "Daily Growing Degree Days",
    x = "Date",
    y = "Daily GDD (°C)",
    color = "Environment"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5),
    legend.position = "bottom"
  )

ggplot(gdd_data, aes(x = days_after_planting, y = gdd, color = display_env)) +
  geom_line(alpha = 0.7) +
  labs(
    title = "Daily Growing Degree Days",
    x = "Days After Planting",
    y = "Daily GDD (°C)",
    color = "Environment"
  ) +
  xlim(0, 100) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5),
    legend.position = "bottom"
  )

ggplot(gdd_data, aes(x = julian_day, y = gdd, color = display_env)) +
  geom_line(alpha = 0.7) +
  labs(
    title = "Daily Growing Degree Days by Julian Day",
    x = "Julian Day",
    y = "Daily GDD (°C)",
    color = "Environment"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5),
    legend.position = "bottom"
  )

ggplot(gdd_data, aes(x = days_after_planting, y = cumulative_gdd_from_planting,
                     color = display_env)) +
  geom_line(linewidth = 1) +
  labs(
    title = "Growing Degree Day Accumulation Across Environments",
    x = "Days After Planting",
    y = "Cumulative GDD (°C)",
    color = "Environment"
  ) +
  ylim(0, 1250) +
  xlim(0, 100) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5),
    legend.position = "top"
  )

Export GDD Data

# Export GDD data to shared_paper intermediate
gdd_all_output <- file.path(paths$intermediate, "gdd_all_environments.csv")
write_csv(gdd_data, gdd_all_output)

# Export GDD lookup to inversion_paper intermediate (used by spatial correction scripts)
gdd_lookup_output <- file.path(paths$intermediate, "gdd_lookup_table.csv")
write_csv(gdd_at_phenology, gdd_lookup_output)

cat("Exported files:\n")
## Exported files:
cat("- ", gdd_all_output, "\n")
## -  /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/intermediate/gdd_all_environments.csv
cat("- ", gdd_lookup_output, "\n")
## -  /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/intermediate/gdd_lookup_table.csv