CDCgov · dylanhmorris · Dec 2, 2024 · Nov 22, 2024 · Nov 22, 2024 · Nov 22, 2024
@@ -90,6 +90,29 @@ process_state_forecast <- function(model_run_dir, save = TRUE) {
       values_to = ".value"
     )
 
+  epiweekly_forecast_samples <- forecast_samples |>
+    dplyr::filter(disease != "prop_disease_ed_visits") |>
+    dplyr::group_by(disease) |>
+    dplyr::group_modify(~ forecasttools::daily_to_epiweekly(.x,
+      value_col = ".value", weekly_value_name = ".value",
+      strict = TRUE
+    )) |>
+    dplyr::ungroup() |>
+    tidyr::pivot_wider(
+      names_from = disease,
+      values_from = .value
+    ) |>
+    dplyr::mutate(prop_disease_ed_visits = Disease /
+      (Disease + Other)) |>
+    tidyr::pivot_longer(c(Disease, Other, prop_disease_ed_visits),
+      names_to = "disease",
+      values_to = ".value"
+    ) |>
+    dplyr::mutate(date = forecasttools::epiweek_to_date(
+      epiweek,
+      epiyear,
+      day_of_week = 7
+    ))
 
   forecast_ci <-
     forecast_samples |>
@@ -113,6 +136,12 @@ process_state_forecast <- function(model_run_dir, save = TRUE) {
         ext = "parquet"
       )
     )
+    arrow::write_parquet(
+      epiweekly_forecast_samples,
+      fs::path(model_run_dir, "epiweekly_forecast_samples",
+        ext = "parquet"
+      )
+    )
 
     arrow::write_parquet(
       forecast_ci,
@@ -124,6 +153,7 @@ process_state_forecast <- function(model_run_dir, save = TRUE) {
   return(list(
     combined_dat = combined_dat,
     forecast_samples = forecast_samples,
+    epiweekly_forecast_samples = epiweekly_forecast_samples,
     forecast_ci = forecast_ci
   ))
 }
@@ -5,21 +5,27 @@
 #' @param model_run_dir Path to a directory containing
 #' forecast draws to process, whose basename is the forecasted
 #' location.
-#' @param report_date Report date for which to generate epiweekly quantiles.
+#' @param report_date Report date for which to generate epiweekly
+#' quantiles.
 #' @param max_lookback_days How many days before the report date
 #' to look back when generating epiweekly quantiles (determines how
 #' many negative epiweekly forecast horizons (i.e. nowcast/backcast)
 #' quantiles will be generated.
+#' @param epiweekly_other Use an expressly epiweekly forecast
+#' for non-target ED visits instead of a daily forecast aggregated
+#' to epiweekly? Boolean, default `FALSE`.
 #' @return A [`tibble`][tibble::tibble()] of quantiles.
 #' @export
 to_epiweekly_quantiles <- function(model_run_dir,
                                    report_date,
-                                   max_lookback_days) {
+                                   max_lookback_days,
+                                   epiweekly_other = FALSE) {
   message(glue::glue("Processing {model_run_dir}..."))
   draws_path <- fs::path(model_run_dir,
     "forecast_samples",
     ext = "parquet"
   )
+
   location <- fs::path_file(model_run_dir)
 
   draws <- arrow::read_parquet(draws_path) |>
@@ -42,19 +48,34 @@ to_epiweekly_quantiles <- function(model_run_dir,
       strict = TRUE
     )
 
-  epiweekly_total_draws <- draws |>
-    dplyr::filter(.data$disease == "Other") |>
-    forecasttools::daily_to_epiweekly(
-      date_col = "date",
-      value_col = ".value",
-      id_cols = ".draw",
-      weekly_value_name = "epiweekly_total",
-      strict = TRUE
+  if (!epiweekly_other) {
+    epiweekly_other_draws <- draws |>
+      dplyr::filter(.data$disease == "Other") |>
+      forecasttools::daily_to_epiweekly(
+        date_col = "date",
+        value_col = ".value",
+        id_cols = ".draw",
+        weekly_value_name = "epiweekly_other",
+        strict = TRUE
+      )
+  } else {
+    denom_path <- fs::path(model_run_dir,
+      "epiweekly_other_ed_visits_forecast",
+      ext = "parquet"
     )
 
+    epiweekly_other_draws <- arrow::read_parquet(denom_path) |>
+      dplyr::filter(.data$date >= lubridate::ymd(!!report_date) -
+        lubridate::days(!!max_lookback_days)) |>
+      dplyr::rename("epiweekly_other" = "other_ed_visits") |>
+      dplyr::mutate(
+        epiweek = lubridate::epiweek(.data$date),
+        epiyear = lubridate::epiyear(.data$date)
+      )
+  }
   epiweekly_prop_draws <- dplyr::inner_join(
     epiweekly_disease_draws,
-    epiweekly_total_draws,
+    epiweekly_other_draws,
     by = c(
       "epiweek",
       "epiyear",
@@ -63,7 +84,8 @@ to_epiweekly_quantiles <- function(model_run_dir,
   ) |>
     dplyr::mutate(
       "epiweekly_proportion" =
-        .data$epiweekly_disease / .data$epiweekly_total
+        .data$epiweekly_disease / (.data$epiweekly_disease +
+          .data$epiweekly_other)
     )
 
 
@@ -90,11 +112,17 @@ to_epiweekly_quantiles <- function(model_run_dir,
 #' `{disease}_r_{reference_date}_f_{first_data_date}_t_{last_data_date}`.
 #' @param exclude Locations to exclude, if any, as a list of strings.
 #' Default `NULL` (exclude nothing).
-#'
+#' @param epiweekly_other Use an expressly epiweekly forecast
+#' for non-target ED visits instead of a daily forecast aggregated
+#' to epiweekly? Boolean, default `FALSE`.
+#' @return The complete hubverse-format [`tibble`][tibble::tibble()].
 #' @export
 to_epiweekly_quantile_table <- function(model_batch_dir,
-                                        exclude = NULL) {
-  locations_to_process <- fs::dir_ls(model_batch_dir,
+                                        exclude = NULL,
+                                        epiweekly_other = FALSE) {
+  model_runs_path <- fs::path(model_batch_dir, "model_runs")
+
+  locations_to_process <- fs::dir_ls(model_runs_path,
     type = "directory"
   )
 
@@ -129,12 +157,14 @@ to_epiweekly_quantile_table <- function(model_batch_dir,
       to_epiweekly_quantiles(
         x,
         report_date = report_date,
-        max_lookback_days = 8
+        max_lookback_days = 8,
+        epiweekly_other = epiweekly_other
       )
     }
-    ## ensures we get the full -1 horizon but do not
-    ## waste time quantilizing draws that will not be
-    ## included in the final table.
+    ## max_lookback_days = 8 ensures we get
+    ## the full -1 horizon but do not waste
+    ## time quantilizing draws that will not
+    ## be included in the final table.
   ) |>
     dplyr::bind_rows() |>
     forecasttools::get_hubverse_table(

@@ -8,6 +8,7 @@
 import argparse
 import datetime
 import itertools
+from pathlib import Path
 
 import polars as pl
 from azure.batch import models
@@ -21,6 +22,7 @@ def main(
     job_id: str,
     pool_id: str,
     diseases: str,
+    output_subdir: str | Path = "./",
     container_image_name: str = "pyrenew-hew",
     container_image_version: str = "latest",
     excluded_locations: list[str] = [
@@ -46,6 +48,11 @@ def main(
         as a whitespace-separated string. Supported
         values are 'COVID-19' and 'Influenza'.
 
+    output_subdir
+        Subdirectory of the output blob storage container
+        in which to save results.
+
+
     container_image_name:
         Name of the container to use for the job.
         This container should exist within the Azure
@@ -108,7 +115,7 @@ def main(
                 "target": "/pyrenew-hew/params",
             },
             {
-                "source": "pyrenew-hew-prod-output",
+                "source": "pyrenew-test-output",
                 "target": "/pyrenew-hew/output",
             },
             {
@@ -123,17 +130,17 @@ def main(
         "python pipelines/forecast_state.py "
         "--disease {disease} "
         "--state {state} "
-        "--n-training-days 365 "
+        "--n-training-days {n_training} "
         "--n-warmup 1000 "
         "--n-samples 500 "
         "--facility-level-nssp-data-dir nssp-etl/gold "
         "--state-level-nssp-data-dir "
         "nssp-archival-vintages/gold "
         "--param-data-dir params "
-        "--output-data-dir output "
+        "--output-dir {output_dir} "
         "--priors-path config/eval_priors.py "
         "--report-date {report_date:%Y-%m-%d} "
-        "--exclude-last-n-days 2 "
+        "--exclude-last-n-days {exclude_last_n} "
         "--score "
         "--eval-data-path "
         "nssp-archival-vintages/latest_comprehensive.parquet"
@@ -158,12 +165,17 @@ def main(
     for disease, report_date, loc in itertools.product(
         disease_list, report_dates, all_locations
     ):
+        n_training = 90
+        exclude_last_n = 3
         task = get_task_config(
             f"{job_id}-{loc}-{disease}-{report_date}",
             base_call=base_call.format(
                 state=loc,
                 disease=disease,
                 report_date=report_date,
+                n_training=n_training,
+                exclude_last_n=exclude_last_n,
+                output_dir=str(Path("output", output_subdir)),
             ),
             container_settings=container_settings,
         )
@@ -190,6 +202,17 @@ def main(
     ),
 )
 
+parser.add_argument(
+    "--output-subdir",
+    type=str,
+    help=(
+        "Subdirectory of the output blob storage container "
+        "in which to save results."
+    ),
+    default="./",
+)
+
+
 parser.add_argument(
     "--container-image-name",
     type=str,