cmu-delphi · dsweber2 · Jan 23, 2025 · Jan 23, 2025 · Jan 23, 2025 · Jan 23, 2025
@@ -1,6 +1,6 @@
 Package: epipredict
 Title: Basic epidemiology forecasting methods
-Version: 0.1.6
+Version: 0.1.7
 Authors@R: c(
     person("Daniel J.", "McDonald", , "[email protected]", role = c("aut", "cre")),
     person("Ryan", "Tibshirani", , "[email protected]", role = "aut"),

@@ -35,6 +35,8 @@ R -e 'devtools::document()'
 R -e 'pkgdown::build_site()'
 ```
 
+Note that sometimes the caches from either `pkgdown` or `knitr` can cause difficulties. To clear those, run `make`, with either `clean_knitr`, `clean_site`, or `clean` (which does both).
+
 If you work without R Studio and want to iterate on documentation, you might
 find [this
 script](https://gist.github.com/gadenbuie/d22e149e65591b91419e41ea5b2e0621)

@@ -0,0 +1,14 @@
+##
+# epipredict docs build
+#
+
+# knitr doesn't actually clean it's own cache properly; this just deletes any of
+# the article knitr caches in vignettes or the base
+clean_knitr:
+	rm -r *_cache; rm -r vignettes/*_cache
+clean_site:
+	Rscript -e "pkgdown::clean_cache(); pkgdown::clean_site()"
+# this combines 
+clean: clean_knitr clean_site
+
+# end
@@ -16,6 +16,8 @@ ggplot2::autoplot
 #' @param object An `epi_workflow`
 #' @param predictions A data frame with predictions. If `NULL`, only the
 #'   original data is shown.
+#' @param plot_data An epi_df of the data to plot against. This is for the case
+#'   where you have the actual results to compare the forecast against.
 #' @param .levels A numeric vector of levels to plot for any prediction bands.
 #'   More than 3 levels begins to be difficult to see.
 #' @param ... Ignored
@@ -84,7 +86,9 @@ NULL
 #' @export
 #' @rdname autoplot-epipred
 autoplot.epi_workflow <- function(
-    object, predictions = NULL,
+    object,
+    predictions = NULL,
+    plot_data = NULL,
     .levels = c(.5, .8, .95), ...,
     .color_by = c("all_keys", "geo_value", "other_keys", ".response", "all", "none"),
     .facet_by = c(".response", "other_keys", "all_keys", "geo_value", "all", "none"),
@@ -111,30 +115,32 @@ autoplot.epi_workflow <- function(
   }
   keys <- c("geo_value", "time_value", "key")
   mold_roles <- names(mold$extras$roles)
-  edf <- bind_cols(mold$extras$roles[mold_roles %in% keys], y)
-  if (starts_with_impl("ahead_", names(y))) {
-    old_name_y <- unlist(strsplit(names(y), "_"))
-    shift <- as.numeric(old_name_y[2])
-    new_name_y <- paste(old_name_y[-c(1:2)], collapse = "_")
-    edf <- rename(edf, !!new_name_y := !!names(y))
-  } else if (starts_with_impl("lag_", names(y))) {
-    old_name_y <- unlist(strsplit(names(y), "_"))
-    shift <- -as.numeric(old_name_y[2])
-    new_name_y <- paste(old_name_y[-c(1:2)], collapse = "_")
-    edf <- rename(edf, !!new_name_y := !!names(y))
-  }
-
-  if (!is.null(shift)) {
-    edf <- mutate(edf, time_value = time_value + shift)
+  # extract the relevant column names for plotting
+  old_name_y <- unlist(strsplit(names(y), "_"))
+  new_name_y <- paste(old_name_y[-c(1:2)], collapse = "_")
+  if (is.null(plot_data)) {
+    # the outcome has shifted, so we need to shift it forward (or back)
+    # by the corresponding amount
+    plot_data <- bind_cols(mold$extras$roles[mold_roles %in% keys], y)
+    if (starts_with_impl("ahead_", names(y))) {
+      shift <- as.numeric(old_name_y[2])
+    } else if (starts_with_impl("lag_", names(y))) {
+      old_name_y <- unlist(strsplit(names(y), "_"))
+      shift <- -as.numeric(old_name_y[2])
+    }
+    plot_data <- rename(plot_data, !!new_name_y := !!names(y))
+    if (!is.null(shift)) {
+      plot_data <- mutate(plot_data, time_value = time_value + shift)
+    }
+    other_keys <- setdiff(key_colnames(object), c("geo_value", "time_value"))
+    plot_data <- as_epi_df(plot_data,
+      as_of = object$fit$meta$as_of,
+      other_keys = other_keys
+    )
   }
-  other_keys <- setdiff(key_colnames(object), c("geo_value", "time_value"))
-  edf <- as_epi_df(edf,
-    as_of = object$fit$meta$as_of,
-    other_keys = other_keys
-  )
   if (is.null(predictions)) {
     return(autoplot(
-      edf, new_name_y,
+      plot_data, new_name_y,
       .color_by = .color_by, .facet_by = .facet_by, .base_color = .base_color,
       .max_facets = .max_facets
     ))
@@ -146,27 +152,27 @@ autoplot.epi_workflow <- function(
     }
     predictions <- rename(predictions, time_value = target_date)
   }
-  pred_cols_ok <- hardhat::check_column_names(predictions, key_colnames(edf))
+  pred_cols_ok <- hardhat::check_column_names(predictions, key_colnames(plot_data))
   if (!pred_cols_ok$ok) {
     cli_warn(c(
       "`predictions` is missing required variables: {.var {pred_cols_ok$missing_names}}.",
       i = "Plotting the original data."
     ))
     return(autoplot(
-      edf, !!new_name_y,
+      plot_data, !!new_name_y,
       .color_by = .color_by, .facet_by = .facet_by, .base_color = .base_color,
       .max_facets = .max_facets
     ))
   }
 
   # First we plot the history, always faceted by everything
-  bp <- autoplot(edf, !!new_name_y,
+  bp <- autoplot(plot_data, !!new_name_y,
     .color_by = "none", .facet_by = "all_keys",
     .base_color = "black", .max_facets = .max_facets
   )
 
   # Now, prepare matching facets in the predictions
-  ek <- epi_keys_only(edf)
+  ek <- epi_keys_only(plot_data)
   predictions <- predictions %>%
     mutate(
       .facets = interaction(!!!rlang::syms(as.list(ek)), sep = "/"),
@@ -204,7 +210,7 @@ autoplot.epi_workflow <- function(
 #' @export
 #' @rdname autoplot-epipred
 autoplot.canned_epipred <- function(
-    object, ...,
+    object, plot_data = NULL, ...,
     .color_by = c("all_keys", "geo_value", "other_keys", ".response", "all", "none"),
     .facet_by = c(".response", "other_keys", "all_keys", "geo_value", "all", "none"),
     .base_color = "dodgerblue4",
@@ -218,7 +224,7 @@ autoplot.canned_epipred <- function(
   predictions <- object$predictions %>%
     rename(time_value = target_date)
 
-  autoplot(ewf, predictions,
+  autoplot(ewf, predictions, plot_data, ...,
     .color_by = .color_by, .facet_by = .facet_by,
     .base_color = .base_color, .max_facets = .max_facets
   )

@@ -16,7 +16,7 @@
 #'
 #' @param epi_data An [epiprocess::epi_df][epiprocess::as_epi_df]
 #' @param outcome A scalar character for the column name we wish to predict.
-#' @param args_list A list of dditional arguments as created by the
+#' @param args_list A list of additional arguments as created by the
 #'   [flatline_args_list()] constructor function.
 #'
 #' @return A data frame of point (and optionally interval) forecasts at a single