Skip to content

Commit

Permalink
Trim trailing zeroes from various datasets
Browse files Browse the repository at this point in the history
- Add `max_date()` function for trimming to a specified max date
- Limit date of returned data for `get_phac_d()` weekly datasets
- ccodwg/CovidTimelineCanada#131
  • Loading branch information
jeanpaulrsoucy committed Jan 23, 2024
1 parent 1944396 commit 56bb421
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 0 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export(get_phac_d)
export(get_pt)
export(load_datasets)
export(load_ds)
export(max_date)
export(plot_datasets)
export(pushover)
export(read_d)
Expand Down
34 changes: 34 additions & 0 deletions R/assemble_final_datasets.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ assemble_final_datasets <- function() {
ab8 <- dplyr::bind_rows(ab6, ab7)
cases_ab <- append_daily_d(cases_ab, ab8)
rm(ab1, ab2, ab3, ab4, ab5, ab6, ab6_pt, ab7, ab8) # clean up
# trim to max date
cases_ab <- max_date(cases_ab, "2023-12-30")

## bc
cases_bc <- read_d("raw_data/reports/bc/bc_monthly_report_cumulative.csv") |>
Expand Down Expand Up @@ -80,6 +82,8 @@ assemble_final_datasets <- function() {
cases_nb <- append_daily_d(cases_nb, nb1)
cases_nb <- append_daily_d(cases_nb, nb2)
rm(nb1, nb2) # cleanup
# trim to max date
cases_nb <- max_date(cases_nb, "2023-12-30")

## nl
cases_nl <- dplyr::bind_rows(
Expand All @@ -104,6 +108,8 @@ assemble_final_datasets <- function() {
add_hr_col("Unknown") |>
convert_hr_names()
)
# trim to max date
cases_nl <- max_date(cases_nl, "2023-12-30")

## ns
ns1 <- read_d("raw_data/static/ns/ns_cases_hr_ts_1.csv")
Expand Down Expand Up @@ -168,6 +174,8 @@ assemble_final_datasets <- function() {
add_hr_col("Prince Edward Island") %>%
dplyr::filter(.data$date >= as.Date("2022-06-11"))
)
# trim to max date
cases_pe <- max_date(cases_pe, "2023-12-30")

## qc
tryCatch(
Expand Down Expand Up @@ -203,6 +211,8 @@ assemble_final_datasets <- function() {
sub_region_1 = ifelse(.data$sub_region_1 == "Not Assigned", "Unknown", .data$sub_region_1))
cases_sk <- append_daily_d(cases_sk, sk3)
rm(sk1, sk2, sk3) # cleanup
# trim to max date
cases_sk <- max_date(cases_sk, "2023-12-30")
},
error = function(e) {
print(e)
Expand Down Expand Up @@ -255,6 +265,8 @@ assemble_final_datasets <- function() {
.data$name, .data$region, .data$sub_region_1, .data$date, value = .data$value + .data$value2)
deaths_ab <- dplyr::bind_rows(deaths_ab, ab4)
rm(ab1, ab2, ab2_max, ab3, ab_max, ab4) # clean up
# trim to max date
deaths_ab <- max_date(deaths_ab, "2023-12-30")

## bc
deaths_bc <- read_d("raw_data/reports/bc/bc_monthly_report_cumulative.csv") |>
Expand Down Expand Up @@ -412,6 +424,8 @@ assemble_final_datasets <- function() {
convert_hr_names()
deaths_on <- append_daily_d(on1, on2)
rm(on1, on2) # clean up
# trim to max date
deaths_on <- max_date(deaths_on, "2023-12-30")

## pe
deaths_pe <- dplyr::bind_rows(
Expand All @@ -421,6 +435,8 @@ assemble_final_datasets <- function() {
add_hr_col("Prince Edward Island") %>%
dplyr::filter(.data$date >= as.Date("2022-06-11"))
)
# trim to max date
deaths_pe <- max_date(deaths_pe, "2023-12-30")

## qc
tryCatch(
Expand Down Expand Up @@ -470,6 +486,8 @@ assemble_final_datasets <- function() {
cat("Error in processing pipeline", fill = TRUE)
}
)
# trim to max date
deaths_sk <- max_date(deaths_sk, "2023-12-30")

## yt
deaths_yt <- dplyr::bind_rows(
Expand Down Expand Up @@ -509,6 +527,8 @@ assemble_final_datasets <- function() {
# remove seemingly erroneous data (unexplained spikes)
hospitalizations_bc <- hospitalizations_bc[
!hospitalizations_bc$date %in% as.Date(c("2021-04-01", "2021-05-05", "2022-04-28", "2022-08-18")), ]
# trim to max date
hospitalizations_bc <- max_date(hospitalizations_bc, "2023-12-21")

## mb
hospitalizations_mb <- dplyr::bind_rows(
Expand Down Expand Up @@ -613,6 +633,8 @@ assemble_final_datasets <- function() {
# remove seemingly erroneous data (unexplained spikes)
icu_bc <- icu_bc[
!icu_bc$date %in% as.Date(c("2021-04-01", "2021-05-05", "2022-04-28", "2022-08-18")), ]
# trim to max date
icu_bc <- max_date(icu_bc, "2023-12-21")

## mb
icu_mb <- dplyr::bind_rows(
Expand Down Expand Up @@ -711,6 +733,8 @@ assemble_final_datasets <- function() {
.data$region,
.data$date,
value = cumsum(.data$value_daily))
# trim to max date
hosp_admissions_ab <- max_date(hosp_admissions_ab, "2023-12-30")

## bc
hosp_admissions_bc <- read_d("raw_data/reports/bc/bc_monthly_report_cumulative.csv") |>
Expand Down Expand Up @@ -772,6 +796,8 @@ assemble_final_datasets <- function() {
.data$region,
.data$date,
value = cumsum(.data$value_daily))
# trim to max date
hosp_admissions_on <- max_date(hosp_admissions_on, "2023-12-30")

## pe
hosp_admissions_pe <- dplyr::bind_rows(
Expand All @@ -797,6 +823,8 @@ assemble_final_datasets <- function() {
report_pluck("hosp_admissions", "new_hospitalizations", "value_daily", "pt") |>
report_recent()
)
# trim to max date
hosp_admissions_sk <- max_date(hosp_admissions_sk, "2023-12-30")

## qc
hosp_admissions_qc <- read_d("raw_data/static/qc/qc_hosp_admissions_pt_ts.csv") |>
Expand Down Expand Up @@ -830,6 +858,8 @@ assemble_final_datasets <- function() {
.data$region,
.data$date,
value = cumsum(.data$value_daily))
# trim to max date
icu_admissions_ab <- max_date(icu_admissions_ab, "2023-12-30")

## bc
icu_admissions_bc <- read_d("raw_data/reports/bc/bc_monthly_report_cumulative.csv") |>
Expand Down Expand Up @@ -891,6 +921,8 @@ assemble_final_datasets <- function() {
report_pluck("icu_admissions", "new_icu", "value_daily", "pt") |>
report_recent()
)
# trim to max date
icu_admissions_sk <- max_date(icu_admissions_sk, "2023-12-30")

## collate and process final dataset
suppressWarnings(rm(icu_admissions_pt)) # if re-running manually
Expand Down Expand Up @@ -990,6 +1022,8 @@ assemble_final_datasets <- function() {
# add ON back to main dataset
tests_completed_pt <- dplyr::bind_rows(tests_completed_pt, on3)
rm(on1, on2, on3) # clean up
# trim to max date
tests_completed_pt <- max_date(tests_completed_pt, "2023-12-30")

## add PE data
tests_completed_pt <- dplyr::bind_rows(
Expand Down
21 changes: 21 additions & 0 deletions R/process_funs.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#' @param geo The geographic level of the data. One of "pt", "hr", "sub-hr".
#' @param d1 Dataset to append to. A cumulative value dataset.
#' @param d2 Dataset being appended. A daily value dataset.
#' @param max_date The maximum date to trim the output dataset to.
#'
#' @name process_funs
NULL
Expand Down Expand Up @@ -214,6 +215,24 @@ report_recent <- function(d) {
)
}

#' Filter a dataset to a maximum date
#'
#' @rdname process_funs
#'
#' @export
max_date <- function(d, max_date) {
tryCatch(
{
d %>%
dplyr::filter(.data$date <= as.Date(max_date))
},
error = function(e) {
print(e)
cat("Error in max_date", fill = TRUE)
}
)
}

#' Append a daily value dataset to a cumulative value dataset
#'
#' @rdname process_funs
Expand Down Expand Up @@ -400,6 +419,8 @@ agg2can <- function(d) {
agg2can_completeness <- function(d) {
tryCatch(
{
# limit to maximum date (2023-12-31)
d <- d[d$date <= as.Date("2023-12-31"), ]
# get region values for each date
out_pt <- split(d$region, d$date)
# count number of regions for each date
Expand Down
4 changes: 4 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,10 @@ get_phac_d <- function(val, region, exclude_repatriated = TRUE, keep_up_to_date
warning("keep_up_to_date = TRUE is not supported with this value, ignoring...")
}
}
# filter weekly data to max date (2023-12-30)
if (val %in% c("cases", "deaths", "tests_completed", "tests_completed_rvdss")) {
d <- d[d$date <= as.Date("2023-12-30"), ]
}
# return data
d
},
Expand Down
5 changes: 5 additions & 0 deletions man/process_funs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 56bb421

Please sign in to comment.