From bdda1c5e362f54179583f2d395593b382cf1abc8 Mon Sep 17 00:00:00 2001 From: nmercadeb Date: Thu, 15 Aug 2024 12:15:54 -0700 Subject: [PATCH 1/5] deprecate rolling_origin, issue #448 --- R/rolling_origin.R | 6 ++++++ vignettes/Common_Patterns.Rmd | 8 -------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/R/rolling_origin.R b/R/rolling_origin.R index 1a49993b..96352293 100644 --- a/R/rolling_origin.R +++ b/R/rolling_origin.R @@ -59,6 +59,12 @@ #' @export rolling_origin <- function(data, initial = 5, assess = 1, cumulative = TRUE, skip = 0, lag = 0, ...) { + + lifecycle::signal_stage( + stage = "superseded", what = "rolling_origin()", + with = I("sliding_window(), sliding_index() and sliding_period()") + ) + check_dots_empty() n <- nrow(data) diff --git a/vignettes/Common_Patterns.Rmd b/vignettes/Common_Patterns.Rmd index 039cf0ac..a69a0c7d 100644 --- a/vignettes/Common_Patterns.Rmd +++ b/vignettes/Common_Patterns.Rmd @@ -223,11 +223,3 @@ sliding_period(Chicago, date, "year") %>% head(2) ``` -All of these functions produce analysis sets of the same size, with the start and end of the analysis set "sliding" down your data frame. If you'd rather have your analysis set get progressively larger, so that you're predicting new data based upon a growing set of older observations, you can use the `rolling_origin()` function: - -```{r} -rolling_origin(Chicago) %>% - head(2) -``` - -Note that all of these time-based resampling functions are deterministic: unlike the rest of the package, running these functions repeatedly under different random seeds will always return the same results. From 73036620755892c496e78fc6bbd4b4a7011ce2d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=BAria=20Mercad=C3=A9-Besora?= <61558739+nmercadeb@users.noreply.github.com> Date: Thu, 15 Aug 2024 14:26:09 -0700 Subject: [PATCH 2/5] Update vignettes/Common_Patterns.Rmd Co-authored-by: Michael Mahoney --- vignettes/Common_Patterns.Rmd | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vignettes/Common_Patterns.Rmd b/vignettes/Common_Patterns.Rmd index a69a0c7d..01516084 100644 --- a/vignettes/Common_Patterns.Rmd +++ b/vignettes/Common_Patterns.Rmd @@ -222,4 +222,13 @@ And if you want to set the size of windows based on units of time, for instance sliding_period(Chicago, date, "year") %>% head(2) ``` +All of these functions produce analysis sets of the same size, with the start and end of the analysis set "sliding" down your data frame. If you'd rather have your analysis set get progressively larger, so that you're predicting new data based upon a growing set of older observations, you can use the `sliding_window()` function with `lookback = -Inf`: +```{r} +sliding_window(Chicago, lookback = Inf) %>% + head(2) +``` + +This is commonly referred to as "evaluation on a rolling forecasting origin", or more colloquially, "rolling origin cross-validation". + +Note that all of these time-based resampling functions are deterministic: unlike the rest of the package, running these functions repeatedly under different random seeds will always return the same results. From bbb42f4ef9494f84b7666feda3a59df7911762da Mon Sep 17 00:00:00 2001 From: nmercadeb Date: Fri, 16 Aug 2024 08:34:43 -0700 Subject: [PATCH 3/5] superseded --- R/rolling_origin.R | 2 ++ vignettes/Common_Patterns.Rmd | 1 + 2 files changed, 3 insertions(+) diff --git a/R/rolling_origin.R b/R/rolling_origin.R index 96352293..3325961a 100644 --- a/R/rolling_origin.R +++ b/R/rolling_origin.R @@ -1,5 +1,7 @@ #' Rolling Origin Forecast Resampling #' +#' `r lifecycle::badge("superseded")` +#' #' This resampling method is useful when the data set has a strong time #' component. The resamples are not random and contain data points that are #' consecutive values. The function assumes that the original data set are diff --git a/vignettes/Common_Patterns.Rmd b/vignettes/Common_Patterns.Rmd index 01516084..a7c51356 100644 --- a/vignettes/Common_Patterns.Rmd +++ b/vignettes/Common_Patterns.Rmd @@ -222,6 +222,7 @@ And if you want to set the size of windows based on units of time, for instance sliding_period(Chicago, date, "year") %>% head(2) ``` + All of these functions produce analysis sets of the same size, with the start and end of the analysis set "sliding" down your data frame. If you'd rather have your analysis set get progressively larger, so that you're predicting new data based upon a growing set of older observations, you can use the `sliding_window()` function with `lookback = -Inf`: ```{r} From 64d2739f42975b18fabd73d6c863d90d9982b042 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 18:01:14 +0100 Subject: [PATCH 4/5] Add context in description --- R/rolling_origin.R | 12 ++++++++++-- man/rolling_origin.Rd | 7 +++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/R/rolling_origin.R b/R/rolling_origin.R index 3325961a..a9099d46 100644 --- a/R/rolling_origin.R +++ b/R/rolling_origin.R @@ -1,11 +1,18 @@ #' Rolling Origin Forecast Resampling #' +#' @description #' `r lifecycle::badge("superseded")` #' #' This resampling method is useful when the data set has a strong time #' component. The resamples are not random and contain data points that are #' consecutive values. The function assumes that the original data set are #' sorted in time order. +#' +#' This function is superseded by [sliding_window()], [sliding_index()], and +#' [sliding_period()] which provide more flexibility and control. Superseded +#' functions will not go away, but active development will be focused on the new +#' functions. +#' #' @details The main options, `initial` and `assess`, control the number of #' data points from the original data that are in the analysis and assessment #' set, respectively. When `cumulative = TRUE`, the analysis set will grow as @@ -63,8 +70,9 @@ rolling_origin <- function(data, initial = 5, assess = 1, cumulative = TRUE, skip = 0, lag = 0, ...) { lifecycle::signal_stage( - stage = "superseded", what = "rolling_origin()", - with = I("sliding_window(), sliding_index() and sliding_period()") + stage = "superseded", + what = "rolling_origin()", + with = I("`sliding_window()`, `sliding_index()` and `sliding_period()`") ) check_dots_empty() diff --git a/man/rolling_origin.Rd b/man/rolling_origin.Rd index 27597678..48f4c39e 100644 --- a/man/rolling_origin.Rd +++ b/man/rolling_origin.Rd @@ -42,10 +42,17 @@ and a column called \code{id} that has a character string with the resample identifier. } \description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} + This resampling method is useful when the data set has a strong time component. The resamples are not random and contain data points that are consecutive values. The function assumes that the original data set are sorted in time order. + +This function is superseded by \code{\link[=sliding_window]{sliding_window()}}, \code{\link[=sliding_index]{sliding_index()}}, and +\code{\link[=sliding_period]{sliding_period()}} which provide more flexibility and control. Superseded +functions will not go away, but active development will be focused on the new +functions. } \details{ The main options, \code{initial} and \code{assess}, control the number of From cde1d32782b4308db336070c99db4efcb0977e78 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 18:05:40 +0100 Subject: [PATCH 5/5] Add acknowledgment --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index a6f6374c..93778ddc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,8 @@ * Fixed example for `nested_cv()` (@seb09, #520). +* `rolling_origin()` is now superseded by `sliding_window()`, `sliding_index()`, and `sliding_period()` which provide more flexibility and control (@nmercadeb, #524). + ## Bug fixes * `vfold_cv()` now utilizes the `breaks` argument correctly for repeated cross-validation (@ZWael, #471).