Skip to content

Commit

Permalink
Create combined epiweekly data (#274)
Browse files Browse the repository at this point in the history
  • Loading branch information
damonbayer authored Jan 7, 2025
1 parent e07eab0 commit 1a5b8bd
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 6 deletions.
93 changes: 90 additions & 3 deletions pipelines/generate_epiweekly.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ script_packages <- c(
"dplyr",
"forecasttools",
"fs",
"readr"
"readr",
"lubridate"
)

## load in packages without messages
Expand Down Expand Up @@ -81,9 +82,95 @@ convert_daily_to_epiweekly <- function(
write_delim(epiweekly_data, output_file, delim = delim)
}

convert_comb_daily_to_ewkly <- function(
model_run_dir, dataname = "combined_training_data.tsv",
strict = FALSE, day_of_week = 7) {
message(glue::glue("Generating epi-weekly data {model_run_dir}..."))

data_basename <- path_ext_remove(dataname)
data_path <- path(model_run_dir, "data", dataname)

raw_data <- read_tsv(
data_path,
col_types = cols(
date = col_date(),
geo_value = col_character(),
disease = col_character(),
data_type = col_character(),
value_type = col_character(),
value = col_double()
)
)

daily_ed_visit_data <- raw_data |>
filter(value_type == "ed_visits")

ewkly_hospital_admission_data <- raw_data |>
filter(value_type == "hospital_admissions") |>
mutate(
epiweek = epiweek(date),
epiyear = epiyear(date)
)

# Verify hospital admissions dates are epiweekly
invalid_dates <-
ewkly_hospital_admission_data |>
mutate(implied_date = epiweek_to_date(epiweek,
epiyear,
day_of_week = day_of_week
)) |>
filter(date != implied_date) |>
pull(date)

if (length(invalid_dates) > 0) {
stop(glue::glue(
"Invalid dates found in hospital admissions data: ",
"{paste0(invalid_dates, collapse = ', ')}"
))
}

epiweekly_ed_visit_data <- daily_ed_visit_data |>
forecasttools::daily_to_epiweekly(
value_col = "value",
weekly_value_name = "value",
id_cols = c("disease", "geo_value", "data_type", "value_type"),
strict = strict
) |>
mutate(date = epiweek_to_date(epiweek,
epiyear,
day_of_week = day_of_week
))

epiweekly_data <- bind_rows(
epiweekly_ed_visit_data,
ewkly_hospital_admission_data
) |>
arrange(date, value_type, disease) |>
select(date, everything())

output_file <- path(
model_run_dir, "data",
glue::glue("epiweekly_{data_basename}"),
ext = "tsv"
)

write_tsv(epiweekly_data, output_file)
}


main <- function(model_run_dir) {
convert_daily_to_epiweekly(model_run_dir, dataname = "data.tsv")
convert_daily_to_epiweekly(model_run_dir, dataname = "eval_data.tsv")
convert_daily_to_epiweekly(model_run_dir,
dataname = "data.tsv"
)
convert_daily_to_epiweekly(model_run_dir,
dataname = "eval_data.tsv"
)
convert_comb_daily_to_ewkly(model_run_dir,
dataname = "combined_training_data.tsv"
)
convert_comb_daily_to_ewkly(model_run_dir,
dataname = "combined_eval_data.tsv"
)
}

# Create a parser
Expand Down
3 changes: 1 addition & 2 deletions pipelines/prep_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ def combine_nssp_and_nhsn(
)
.with_columns(
pl.lit(disease).alias("disease"),
pl.lit("train").alias("data_type"),
)
)

Expand Down Expand Up @@ -379,7 +378,7 @@ def process_and_save_state(
end_date=last_training_date,
disease=disease,
state_abb=state_abb,
)
).with_columns(pl.lit("train").alias("data_type"))

nssp_training_dates = (
nssp_training_data.get_column("date").unique().to_list()
Expand Down
2 changes: 1 addition & 1 deletion pipelines/prep_eval_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def save_eval_data(
end_date=last_training_date,
disease=disease,
state_abb=state,
)
).with_columns(data_type=pl.lit("eval"))

combined_eval_dat = combine_nssp_and_nhsn(
nssp_data=nssp_data,
Expand Down

0 comments on commit 1a5b8bd

Please sign in to comment.