diff --git a/.Rbuildignore b/.Rbuildignore index d2ee6dad..ece1a957 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,13 +1,15 @@ +^.*\.Rproj$ +^CODE_OF_CONDUCT\.md$ +^LICENSE\.md$ ^README\.Rmd$ ^README_cache$ -^data-raw$ -^.*\.Rproj$ ^\.Rproj\.user$ -^codecov\.yml$ ^\.github$ +^\.pre-commit-config\.yaml$ ^_pkgdown\.yml$ +^codecov\.yml$ +^data-raw$ ^docs$ ^pkgdown$ -^CODE_OF_CONDUCT\.md$ -^LICENSE\.md$ ^touchstone$ +^paper$ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 3f76b1b6..a3ac6182 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -18,7 +18,7 @@ jobs: fail-fast: false matrix: config: - - {os: macOS-latest, r: 'release'} + - {os: macos-latest, r: 'release'} - {os: windows-latest, r: 'release'} - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} @@ -29,7 +29,7 @@ jobs: R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: r-lib/actions/setup-pandoc@v2 @@ -41,9 +41,7 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: | - any::rcmdcheck - any::XML + extra-packages: any::rcmdcheck needs: check - uses: r-lib/actions/check-r-package@v2 diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 59ae3087..087f0b05 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -1,33 +1,46 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/master/examples +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: branches: [main, master] - tags: ['*'] + pull_request: + branches: [main, master] + release: + types: [published] + workflow_dispatch: name: pkgdown jobs: pkgdown: runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: r-lib/actions/setup-pandoc@v1 + - uses: r-lib/actions/setup-pandoc@v2 - - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-r@v2 with: use-public-rspm: true - - uses: r-lib/actions/setup-r-dependencies@v1 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: pkgdown + extra-packages: any::pkgdown, local::. needs: website - - name: Deploy package - run: | - git config --local user.name "$GITHUB_ACTOR" - git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" - Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.4.1 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 3c0da1c9..2c5bb502 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -1,4 +1,4 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/master/examples +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: @@ -15,16 +15,36 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-r@v2 with: use-public-rspm: true - - uses: r-lib/actions/setup-r-dependencies@v1 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: covr + extra-packages: any::covr + needs: coverage - name: Test coverage - run: covr::codecov() + run: | + covr::codecov( + quiet = FALSE, + clean = FALSE, + install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") + ) shell: Rscript {0} + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v3 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package diff --git a/.gitignore b/.gitignore index d18b0495..fad5789a 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,13 @@ inst/doc .DS_Store docs -README_cache \ No newline at end of file +README_cache +.Rprofile +.vscode/launch.json +.pre-commit-config.yaml +tests/README.md +paper/*.html +paper/*.pdf +paper/paper_cache/ +paper/paper_files/ +chitra/ \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 4660279e..0efad24f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: conmat Title: Builds contact matrices using GAMs and population data -Version: 0.0.0.9004 +Version: 0.0.2.9000 Authors@R: c( person(given = "Nicholas", family = "Tierney", @@ -17,6 +17,11 @@ Authors@R: c( role = c("aut"), email = "aarathybabu907@gmail.com", comment = c(ORCID = "https://orcid.org/0000-0002-6982-5989")), + person(given = "Michael", + family = "Lydeamore", + role = "aut", + email = "michael.lydeamore@monash.edu", + comment = c(ORCID = "https://orcid.org/0000-0001-6515-827X")), person("Commonwealth of Australia", "AEC", role = c("cph")), person("Australian Bureau of Statistics", "ABS", role = c("cph")) ) @@ -25,35 +30,44 @@ Description: Builds contact matrices using GAMs and population data. This packag Electoral Commission and Australian Bureau of Statistics) 2020. License: MIT + file LICENSE Depends: - R (>= 2.10) + R (>= 4.1.0) Suggests: covr, knitr, + vdiffr, testthat (>= 3.0.0), rmarkdown, - future + future, + spelling, + deSolve VignetteBuilder: knitr Config/testthat/edition: 3 Encoding: UTF-8 -Language: en-GB +Language: en-US LazyData: true LazyDataCompression: xz Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.1 +RoxygenNote: 7.3.2 Imports: mgcv, - dplyr, + dplyr (>= 1.0.9), + tidyr (>= 1.2.0), + cli, stats, - tidyr, - socialmixr, + socialmixr (>= 0.2.0), ggplot2, - tibble, + tibble (>= 3.1.8), patchwork, magrittr, - stringr, - rlang, - glue, + rlang (>= 1.0.4), + glue (>= 1.6.2), readr, furrr, - purrr + purrr (>= 1.0.1), + tidyselect, + vctrs, + scales, + english, + waldo, + stringr diff --git a/NAMESPACE b/NAMESPACE index 75664264..1a7cc4fd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,42 +1,124 @@ # Generated by roxygen2: do not edit by hand +S3method(age_breaks,array) +S3method(age_breaks,conmat_age_matrix) +S3method(age_breaks,conmat_setting_prediction_matrix) +S3method(age_breaks,default) +S3method(age_breaks,matrix) +S3method(age_breaks,ngm_setting_matrix) +S3method(age_breaks,numeric) +S3method(age_breaks,predicted_contacts) +S3method(age_breaks,setting_contact_model) +S3method(age_breaks,setting_data) +S3method(age_breaks,setting_vaccination_matrix) +S3method(age_breaks,transmission_probability_matrix) +S3method(age_label,conmat_population) +S3method(age_label,default) +S3method(as_conmat_population,"NULL") +S3method(as_conmat_population,data.frame) +S3method(as_conmat_population,default) +S3method(as_conmat_population,grouped_df) +S3method(as_conmat_population,list) +S3method(as_setting_prediction_matrix,conmat_setting_prediction_matrix) +S3method(as_setting_prediction_matrix,default) +S3method(as_setting_prediction_matrix,list) +S3method(autoplot,conmat_age_matrix) +S3method(autoplot,conmat_setting_prediction_matrix) +S3method(autoplot,ngm_setting_matrix) +S3method(autoplot,setting_vaccination_matrix) +S3method(autoplot,transmission_probability_matrix) +S3method(generate_ngm,conmat_population) +S3method(generate_ngm,conmat_setting_prediction_matrix) +S3method(get_age_population_function,conmat_population) +S3method(get_age_population_function,data.frame) +S3method(population_label,conmat_population) +S3method(population_label,default) +S3method(predictions_to_matrix,predicted_contacts) +S3method(print,conmat_age_matrix) +S3method(print,conmat_population) +S3method(print,conmat_setting_prediction_matrix) +S3method(print,ngm_setting_matrix) +S3method(print,setting_contact_model) +S3method(print,setting_data) +S3method(print,setting_vaccination_matrix) +S3method(print,transmission_probability_matrix) export("%>%") -export(abbreviate_states) +export(abs_abbreviate_states) +export(abs_age_education_lga) +export(abs_age_education_state) export(abs_age_lga) export(abs_age_state) +export(abs_age_work_lga) +export(abs_age_work_state) +export(abs_unabbreviate_states) +export(add_age_partial_sum) export(add_intergenerational) export(add_modelling_features) export(add_offset) export(add_population_age_to) export(add_school_work_participation) +export(add_symmetrical_features) +export(age) +export(age_breaks) +export(age_label) export(age_population) export(aggregate_predicted_contacts) export(apply_vaccination) -export(check_lga_name) +export(as_conmat_population) +export(as_setting_prediction_matrix) +export(autoplot) +export(clean_term_names) +export(conmat_population) +export(create_age_grid) export(estimate_setting_contacts) +export(extract_term_names) export(extrapolate_polymod) export(fit_setting_contacts) export(fit_single_contact_model) export(generate_ngm) +export(generate_ngm_oz) +export(get_abs_household_size_distribution) +export(get_abs_household_size_population) +export(get_abs_per_capita_household_size) +export(get_abs_per_capita_household_size_lga) +export(get_abs_per_capita_household_size_state) export(get_age_population_function) -export(get_data_abs_age_education) -export(get_data_abs_age_work) -export(get_household_size_distribution) -export(get_per_capita_household_size) export(get_polymod_contact_data) export(get_polymod_per_capita_household_size) export(get_polymod_population) export(get_polymod_setting_data) export(get_setting_transmission_matrices) +export(gg_age_partial_pred_long) +export(gg_age_partial_sum) +export(gg_age_terms_settings) export(matrix_to_predictions) +export(new_age_matrix) +export(new_ngm_setting_matrix) +export(new_setting_data) export(per_capita_household_size) -export(plot_matrix) -export(plot_setting_matrices) +export(pivot_longer_age_preds) export(polymod) +export(population) +export(population_label) export(predict_contacts) export(predict_contacts_1y) +export(predict_individual_terms) export(predict_setting_contacts) export(predictions_to_matrix) -export(unabbreviate_states) +export(prepare_population_for_modelling) +export(raw_eigenvalue) +export(scaling) +export(setting_prediction_matrix) +export(transmission_probability_matrix) +import(rlang) +importFrom(ggplot2,aes) +importFrom(ggplot2,autoplot) +importFrom(ggplot2,coord_fixed) +importFrom(ggplot2,facet_grid) +importFrom(ggplot2,facet_wrap) +importFrom(ggplot2,geom_tile) +importFrom(ggplot2,ggplot) +importFrom(ggplot2,scale_fill_viridis_c) +importFrom(ggplot2,theme_minimal) importFrom(magrittr,"%>%") importFrom(stats,predict) diff --git a/NEWS.md b/NEWS.md index 7ad78ba5..410a336b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,54 @@ +# conmat 0.0.2.9000 + +## Changes + +* Adds new functions: + * `age_breaks()` accessor function for `conmat_setting_prediction_matrix` + * `setting_prediction_matrix()` + * `as_setting_prediction_matrix()` for coercing lists into a `setting_prediction_matrix` + * `transmission_probability_matrix()` for creating new transmission probability + matrices + +* there is now a print method for age group information in setting matrices - [#139](https://github.com/njtierney/conmat/issues/139) + +* improved age break checking [#138](https://github.com/njtierney/conmat/issues/138) + +* extended `add_school_work_participation()`, `add_modelling_features()`, `fit_single_contact_model()`, `fit_setting_contacts()`, `estimate_setting_contacts()` to use different school and work demographics arguments. (#82 and #15, resolved by #153). + +* exports `new_ngm_setting_matrix()` and `new_setting_data()` +* Exports new `age_breaks()` method for class `setting_contact_model` (used in `fit_setting_contacts()`). + +## Breaking changes + +* change `get_per_capita_household_size` to `get_abs_per_capita_household_size` +* change `get_data_abs_age_work` to `abs_age_work` +* change `get_data_abs_age_education` to `abs_age_education` +* change `get_household_size_distribution` -> `get_abs_household_size_distribution` +* change `abs_household_size_population` -> `get_abs_household_size_population` +* change `abs_per_capita_household_size_lga` -> `get_abs_per_capita_household_size_lga` +* change `abs_per_capita_household_size_state` -> `get_abs_per_capita_household_size_state` +* `abbreviate_states` -> `abs_abbreviate_states` +* `unabbreviate_states` -> `abs_unabbreviate_states` + +# conmat 0.0.1.9000 + +## Changes + +* new `conmat_age_matrix` class, replaces `conmat_prediction_matrix`, knows about its age breaks +* accessor method, `age_breaks()`, which accesses age break information +* updated autoplot method for `conmat_age_matrix` +* accessor method, `raw_eigenvalue()` for getting the raw eigenvalue from a next generation matrix, "ngm_setting_matrix". +* accessor method, `scaling` for getting the value of R_target/raw eigenvalue. +* add `autoplot` methods for ngm, vaccination, and transmission probability + + +## Breaking Changes + +* `generate_ngm` no longer accepts LGA or state inputs, which now occurs in `generate_ngm_oz`. The `generate_ngm` function has had S3 methods created for it, + so it can take input from `conmat_population` (such as the output from + [abs_age_lga()]), or a `conmat_setting_prediction_matrix`, which is the + output from [extrapolate_polymod()] or [predict_setting_contacts()]. + # conmat 0.0.0.9004 * Added a `NEWS.md` file to track changes to the package. @@ -9,4 +60,7 @@ * Add transmission probability data from Eyre et al., closes [#39](https://github.com/njtierney/conmat/issues/39) * Add household adjustment - [#41](https://github.com/njtierney/conmat/issues/41) * Added setting weights, related to #44 (but no longer Eyre weights) -* Added `apply_vaccination` to take in vaccination rates of ages and apply to contact matrices [#40](https://github.com/njtierney/conmat/issues/40) \ No newline at end of file +* Added `apply_vaccination` to take in vaccination rates of ages and apply to contact matrices [#40](https://github.com/njtierney/conmat/issues/40) +* Data from `get_polymod_population` has been revised as a result of the [socialmixr package](https://github.com/epiforecasts/socialmixr/blob/main/NEWS.md) being updated to version 0.2.0, where the world population data has been updated to 2017 by switching from the wpp2015 to wpp2017 package. We have explored the new data and found it to be very similar and should not introduce any errant errors. See the exploration [here](https://gist.github.com/njtierney/4862fa73abab97093d779fa7f2904d11). +* Added new print methods for: `conmat_setting_prediction_matrix`, `conmat_prediction_matrix`, `ngm_setting_matrix`, `setting_contact_model`, `setting_vaccination_matrix`, and `setting_data`, see #116. Main change is that list objects don't return the entire list and output but a summary of the list contents and details on what that object contains. +* Added a new S3 method for `conmat_population` to avoid fragile use of the `lower.age.limit` variable name. This resolves #77. diff --git a/R/abbreviate_states.R b/R/abbreviate_states.R index 28357832..d5cb95a0 100644 --- a/R/abbreviate_states.R +++ b/R/abbreviate_states.R @@ -1,18 +1,18 @@ #' Abbreviate Australian State Names -#' -#' Given a full name (Title Case) of an Australian State or Territory, produces +#' +#' Given a full name (Title Case) of an Australian State or Territory, produces #' the abbreviated state name. #' #' @param state_names vector of state names in long form #' #' @return shortened state names #' -#' @seealso [unabbreviate_states()] +#' @seealso [abs_unabbreviate_states()] #' @examples -#' abbreviate_states("Victoria") -#' abbreviate_states(c("Victoria", "Queensland")) +#' abs_abbreviate_states("Victoria") +#' abs_abbreviate_states(c("Victoria", "Queensland")) #' @export -abbreviate_states <- function(state_names) { +abs_abbreviate_states <- function(state_names) { dplyr::case_when( state_names %in% c("Australian Capital Territory", "ACT") ~ "ACT", state_names %in% c("New South Wales", "NSW") ~ "NSW", @@ -23,4 +23,28 @@ abbreviate_states <- function(state_names) { state_names %in% c("Victoria", "VIC") ~ "VIC", state_names %in% c("Western Australia", "WA") ~ "WA" ) -} \ No newline at end of file +} + +#' Un-abbreviate Australian state names +#' +#' @param state_names vector of state names in short form +#' +#' @return Longer state names +#' @seealso [abs_abbreviate_states()] +#' +#' @examples +#' abs_unabbreviate_states("VIC") +#' abs_unabbreviate_states(c("VIC", "QLD")) +#' @export +abs_unabbreviate_states <- function(state_names) { + dplyr::case_when( + state_names %in% c("Australian Capital Territory", "ACT") ~ "Australian Capital Territory", + state_names %in% c("New South Wales", "NSW") ~ "New South Wales", + state_names %in% c("Northern Territory", "NT") ~ "Northern Territory", + state_names %in% c("Queensland", "QLD") ~ "Queensland", + state_names %in% c("South Australia", "SA") ~ "South Australia", + state_names %in% c("Tasmania", "TAS") ~ "Tasmania", + state_names %in% c("Victoria", "VIC") ~ "Victoria", + state_names %in% c("Western Australia", "WA") ~ "Western Australia" + ) +} diff --git a/R/abs-helpers.R b/R/abs-helpers.R index 139eb933..577ac2f2 100644 --- a/R/abs-helpers.R +++ b/R/abs-helpers.R @@ -1,18 +1,18 @@ #' @title Return Australian Bureau of Statistics (ABS) age population data for a #' given Local Government Area (LGA) or state -#' @param lga_name lga name - can be a partial match, e.g., although the official name might be "Albury (C)", "Albury" is fine. It must also match exactly one LGA. See [check_lga_name()] for more details. -#' @return dataset of: `lga` (or `state`), `lower.age.limit`, `year`, -#' and `population`. +#' @param lga_name lga name - can be a partial match, e.g., although the official name might be "Albury (C)", "Albury" is fine. +#' @return a `conmat_population` dataset containing: `lga` (or `state`), +#' `lower.age.limit`, `year`, and `population`. #' @name abs_age_data #' @export #' @examples -#' abs_age_lga("Albury (C)") -#' abs_age_state("NSW") +#' abs_age_lga(c("Albury (C)", "Fairfield (C)")) +#' abs_age_state(c("NSW", "VIC")) abs_age_lga <- function(lga_name) { - check_lga_name(lga_name) + check_lga_name(lga_name, multiple_lga = TRUE) - abs_pop_age_lga_2020 %>% - dplyr::filter(lga == lga_name) %>% + abs_population <- abs_pop_age_lga_2020 %>% + dplyr::filter(lga %in% lga_name) %>% dplyr::select( lga, age_group, @@ -21,21 +21,22 @@ abs_age_lga <- function(lga_name) { ) %>% dplyr::mutate(age_group = readr::parse_number(as.character(age_group))) %>% dplyr::rename(lower.age.limit = age_group) + + conmat_population( + data = abs_population, + age = lower.age.limit, + population = population + ) } #' @param state_name shortened state name #' @rdname abs_age_data #' @export abs_age_state <- function(state_name) { - check_state_name(state_name) + check_state_name(state_name, multiple_state = TRUE) - abs_pop_age_lga_2020 %>% - dplyr::filter( - stringr::str_detect( - string = state, - pattern = state_name - ) - ) %>% + abs_population <- abs_pop_age_lga_2020 %>% + dplyr::filter(state %in% state_name) %>% dplyr::select( state, age_group, @@ -46,4 +47,10 @@ abs_age_state <- function(state_name) { dplyr::rename(lower.age.limit = age_group) %>% dplyr::group_by(year, state, lower.age.limit) %>% dplyr::summarise(population = sum(population)) + + conmat_population( + data = abs_population, + age = lower.age.limit, + population = population + ) } diff --git a/R/abs_age_education.R b/R/abs_age_education.R new file mode 100644 index 00000000..517d5564 --- /dev/null +++ b/R/abs_age_education.R @@ -0,0 +1,45 @@ +#' @title Return data on educated population for a given age and state or +#' lga of Australia. +#' @param state target Australian state name or a vector with multiple state +#' names in its abbreviated form, such as "QLD", "NSW", or "TAS" +#' @param age a numeric or numeric vector denoting ages between 0 to 115. The +#' default is to return all ages. +#' @return dataset with information on the number of educated people belonging +#' to a particular age, its total population and the corresponding proportion. +#' @rdname abs-age-education +#' @export +#' @examples +#' abs_age_education_state(state = "VIC") +#' abs_age_education_state(state = "WA", age = 1:5) +#' abs_age_education_state(state = c("QLD", "TAS"), age = 5) +#' abs_age_education_lga(lga = c("Albury (C)", "Barcoo (S)"), age = 10) +#' +abs_age_education_state <- function(state = NULL, age = NULL) { + check_state_name(state, multiple_state = TRUE) + data_subset <- data_abs_state_education %>% + dplyr::filter(state %in% {{ state }}) + + if (!is.null(age)) { + data_subset <- data_subset %>% dplyr::filter(age %in% {{ age }}) + } + + data_subset +} + +#' @param lga target Australian local government area (LGA) name, such as +#' "Fairfield (C)" or a vector with multiple lga names. See +#' [abs_lga_lookup()] for list of lga names. +#' @name abs-age-education +#' @export +abs_age_education_lga <- function(lga = NULL, age = NULL) { + check_lga_name(lga, multiple_lga = TRUE) + + data_subset <- data_abs_lga_education %>% + dplyr::filter(lga %in% {{ lga }}) + + if (!is.null(age)) { + data_subset <- data_subset %>% dplyr::filter(age %in% {{ age }}) + } + + data_subset +} diff --git a/R/abs_age_work.R b/R/abs_age_work.R new file mode 100644 index 00000000..088d1ad9 --- /dev/null +++ b/R/abs_age_work.R @@ -0,0 +1,42 @@ +#' @title Return data on employed population for a given age and state or +#' lga of Australia +#' @param lga target Australian local government area (LGA) name, such as +#' "Fairfield (C)" or a vector with multiple lga names. See +#' [abs_lga_lookup()] for list of lga names. +#' @param age a numeric or numeric vector denoting ages between 0 to 115. +#' The default is to return all ages. +#' @return data set with information on the number of employed people belonging +#' to a particular age, its total population and the corresponding proportion. +#' @rdname abs-age-work +#' @export +#' @examples +#' abs_age_work_state(state = "NSW") +#' abs_age_work_state(state = c("QLD", "TAS"), age = 5) +#' abs_age_work_lga(lga = "Albany (C)", age = 1:5) +#' abs_age_work_lga(lga = c("Albury (C)", "Barcoo (S)"), age = 39) +#' +abs_age_work_lga <- function(lga = NULL, age = NULL) { + check_lga_name(lga, multiple_lga = TRUE) + data_subset <- data_abs_lga_work %>% dplyr::filter(lga %in% {{ lga }}) + + if (!is.null(age)) { + data_subset <- data_subset %>% dplyr::filter(age %in% {{ age }}) + } + + data_subset +} + +#' @param state target Australian state name or a vector with multiple state +#' names in its abbreviated form, such as "QLD", "NSW", or "TAS" +#' @name abs-age-work +#' @export +abs_age_work_state <- function(state = NULL, age = NULL) { + check_state_name(state, multiple_state = TRUE) + data_subset <- data_abs_state_work %>% dplyr::filter(state %in% {{ state }}) + + if (!is.null(age)) { + data_subset <- data_subset %>% dplyr::filter(age %in% {{ age }}) + } + + data_subset +} diff --git a/R/abs_household_size_population.R b/R/abs_household_size_population.R new file mode 100644 index 00000000..9ea42622 --- /dev/null +++ b/R/abs_household_size_population.R @@ -0,0 +1,41 @@ +#' @title Get population associated with each household size in an LGA or a state +#' @param state target Australian state name in abbreviated form, such as "QLD", "NSW", or "TAS" +#' @param lga target Australian local government area (LGA) name, such as "Fairfield (C)". See +#' [abs_lga_lookup()] for list of lga names +#' @return returns a data frame with household size and the population associated with it in each LGA or state. +#' @export +#' @examples +#' get_abs_household_size_population(state = "NSW") +get_abs_household_size_population <- function(state = NULL, lga = NULL) { + level <- dplyr::case_when( + is.null(state) & is.null(lga) ~ "national", + !is.null(state) & is.null(lga) ~ "state", + is.null(state) & !is.null(lga) ~ "lga", + TRUE ~ "erroneous" + ) + + # get state mean household sizes + household_data <- abs_household_lga %>% + dplyr::filter( + year == max(year), + n_persons_usually_resident != "total" + ) %>% + dplyr::mutate( + # household size as a number, assuming all people in households 8+ are + # exactly 8 + size = readr::parse_number(n_persons_usually_resident), + # number of *people* in a household of that size + n_people = n_households * size, + ) %>% + dplyr::select(-c(n_persons_usually_resident, n_households)) %>% + dplyr::rename(household_size = size) + + household_data <- switch(level, + national = household_data, + state = household_data %>% + dplyr::filter(state == !!state), + lga = household_data %>% + dplyr::filter(lga == !!lga) + ) + return(household_data) +} diff --git a/R/abs_per_capita_household_size_lga.R b/R/abs_per_capita_household_size_lga.R new file mode 100644 index 00000000..c420ec64 --- /dev/null +++ b/R/abs_per_capita_household_size_lga.R @@ -0,0 +1,31 @@ +#' @title Get household size distribution based on LGA name +#' @param lga target Australian local government area (LGA) name, such as "Fairfield (C)". See +#' [abs_lga_lookup()] for list of lga names +#' @return returns a numeric value depicting the per capita household size of the specified LGA +#' @export +#' @examples +#' get_abs_per_capita_household_size_lga(lga = "Fairfield (C)") +#' +get_abs_per_capita_household_size_lga <- function(lga = NULL) { + check_lga_name(lga, multiple_lga = FALSE) + + # given ABS data on household sizes for a *single location*, get average + # household sizes *per person* from ABS - assuming a max of 8 people per + # households. Note - I tried computing the mean size of the households larger + # than 7, by comparing with LGA populations, but they were improbably + # enormous, probably because some of the population lives in facilities, not + # households. + + lga <- rlang::enquo(lga) + + household_data <- get_abs_household_size_population(lga = lga) + # set up aggregation + household_data <- household_data %>% + dplyr::filter(lga == !!lga) %>% + dplyr::group_by(lga) + + + # aggregate and average household sizes + household_data %>% + per_capita_household_size() +} diff --git a/R/abs_per_capita_household_size_state.R b/R/abs_per_capita_household_size_state.R new file mode 100644 index 00000000..03fa5f4a --- /dev/null +++ b/R/abs_per_capita_household_size_state.R @@ -0,0 +1,31 @@ +#' @title Get household size distribution based on state name +#' @param state target Australian state name in abbreviated form, such as "QLD", "NSW", or "TAS" +#' @return returns a numeric value depicting the per capita household size of the specified state +#' @export +#' @examples +#' get_abs_per_capita_household_size_state(state = "NSW") +get_abs_per_capita_household_size_state <- function(state = NULL) { + check_state_name(state, multiple_state = FALSE) + + # given ABS data on household sizes for a *single location*, get average + # household sizes *per person* from ABS - assuming a max of 8 people per + # households. Note - I tried computing the mean size of the households larger + # than 7, by comparing with LGA populations, but they were improbably + # enormous, probably because some of the population lives in facilities, not + # households. + + state <- rlang::enquo(state) + # get state mean household sizes + household_data <- get_abs_household_size_population(state = state) + + + + # set up aggregation + household_data <- household_data %>% + dplyr::filter(state == !!state) %>% + dplyr::group_by(state) + + # aggregate and average household sizes + household_data %>% + per_capita_household_size() +} diff --git a/R/add_modelling_features.R b/R/add_modelling_features.R index 944eaae7..737daaa9 100644 --- a/R/add_modelling_features.R +++ b/R/add_modelling_features.R @@ -1,33 +1,46 @@ #' Add features required for modelling to the dataset -#' +#' #' This function adds three main groups of features to the data. It is used #' internally in [fit_single_contact_model()] and [predict_contacts_1y()]. -#' It requires columns named `age_to` and `age_from`. The three types of +#' It requires columns named `age_to` and `age_from`. The three types of #' features it adds are described below: -#' 1) Population distribution of contact ages from the function -#' [add_population_age_to()], which requires a column called "age_to" +#' 1) Population distribution of contact ages from the function +#' [add_population_age_to()], which requires a column called "age_to" #' representing the age of the person who had contact. It creates a column #' called `pop_age_to`. [add_population_age_to()] takes an extra argument #' for population, which defaults to [get_polymod_population()], but needs -#' to be a data frame with columns, `lower.age.limit`, and `population`. -#' 2) School work participation, which is from the function -#' [add_school_work_participation()]. This requires columns `age_to` and +#' to be a `conmat_population` object, which specifies the `age` and +#' `population` characteristics, or a data frame with columns, +#' `lower.age.limit`, and `population`. +#' 2) School work participation, which is from the function +#' [add_school_work_participation()]. This requires columns `age_to` and #' `age_from`, but will operate on any column starting with `age` and adds -#' columns: `school_probability`, `work_probability`, +#' columns: `school_probability`, `work_probability`, #' `school_year_probability`, and `school_weighted_pop_fraction`. -#' 3) Offset is added on to the data using [add_offset()]. This requires -#' variables `school_weighted_pop_fraction` (from -#' [add_school_work_participation()]) and `pop_age_to` (from -#' [add_school_work_participation()]). It adds two columns, +#' 3) Offset is added on to the data using [add_offset()]. This requires +#' variables `school_weighted_pop_fraction` (from +#' [add_school_work_participation()]) and `pop_age_to` (from +#' [add_school_work_participation()]). It adds two columns, #' `log_contactable_population_school`, and `log_contactable_population`. #' #' @param contact_data contact data with columns `age_to` and `age_from` -#' @param ... extra dots passed to `population` argument of -#' [add_population_age_to()] +#' @param population the `population` argument of [add_population_age_to()] +#' @param school_demographics (optional) defaults to census average proportion +#' at school. You can provide a dataset with columns, "age" (numeric), and +#' "school_fraction" (0-1), if you would like to specify these +#' details. See `abs_avg_school` for the default values. If you would like to +#' use the original school demographics used in conmat, these are provided in +#' the dataset, `conmat_original_school_demographics`. +#' @param work_demographics (optional) defaults to census average proportion +#' employed. You can provide a dataset with columns, "age" (numeric), and +#' "work_fraction", if you would like to specify these details. See +#' `abs_avg_work` for the default values. If you would like to +#' use the original work demographics used in conmat, these are provided in +#' the dataset, `conmat_original_work_demographics`. #' @return data frame with 11 extra columns - the contents of `contact_data`, -#' plus: pop_age_to, school_fraction_age_from, work_fraction_age_from, -#' school_fraction_age_to, work_fraction_age_to, school_probability, -#' work_probability, school_year_probability, school_weighted_pop_fraction, +#' plus: pop_age_to, school_fraction_age_from, work_fraction_age_from, +#' school_fraction_age_to, work_fraction_age_to, school_probability, +#' work_probability, school_year_probability, school_weighted_pop_fraction, #' log_contactable_population_school, and log_contactable_population. #' @examples #' age_min <- 10 @@ -35,34 +48,47 @@ #' all_ages <- age_min:age_max #' library(tidyr) #' example_df <- expand_grid( -#' age_from = all_ages, -#' age_to = all_ages, -#' ) -#' add_modelling_features(example_df) +#' age_from = all_ages, +#' age_to = all_ages, +#' ) +#' add_modelling_features(example_df) +#' add_modelling_features( +#' example_df, +#' school_demographics = conmat_original_school_demographics, +#' work_demographics = conmat_original_work_demographics +#' ) +#' #' @export -add_modelling_features <- function(contact_data, ...) { - - # use interpolated population of "age_to" (contact age) & +add_modelling_features <- function(contact_data, + school_demographics = NULL, + work_demographics = NULL, + population = get_polymod_population()) { + # use interpolated population of "age_to" (contact age) & # get the relative population grouped by "age_from" or participant age - # add new variables for: - # school & work going fraction for contact & participant ages - # probability that a person of the other age goes to the same work/school - # probability that a person of the other age would be in the same school year - # weighted combination of contact population & school year probability. + # add new variables for: + # school & work going fraction for contact & participant ages + # probability that a person of the other age goes to the same work/school + # probability that a person of the other age would be in the same school year + # weighted combination of contact population & school year probability. # [ for using outside of classroom?] - # offset for school setting & the rest. - contact_data %>% + # offset for school setting & the rest. + res <- contact_data %>% # Adds interpolated age population - specifically, `pop_age_to` - add_population_age_to(...) %>% + add_population_age_to(population = population) %>% # Adds school and work offset - add_school_work_participation() %>% - # adds columns + add_symmetrical_features() %>% + add_school_work_participation( + school_demographics = school_demographics, + work_demographics = work_demographics + ) %>% + # adds columns # `log_contactable_population_school`, and ` log_contactable_population` add_offset() + } #' Add column, "intergenerational" -#' +#' #' For modelling purposes it is useful to have a feature that is the absolute #' difference between `age_from` and `age_to` columns. #' @@ -71,16 +97,15 @@ add_modelling_features <- function(contact_data, ...) { #' @return data.frame with extra column, `intergenerational` #' #' @examples -#' +#' #' polymod_contact <- get_polymod_contact_data() -#' +#' #' polymod_contact %>% add_intergenerational() -#' -#' +#' #' @export -add_intergenerational <- function(data){ - data %>% +add_intergenerational <- function(data) { + data %>% dplyr::mutate( intergenerational = abs(age_from - age_to) - ) + ) } diff --git a/R/add_offset.R b/R/add_offset.R index 0ed8f066..379c9835 100644 --- a/R/add_offset.R +++ b/R/add_offset.R @@ -1,31 +1,31 @@ #' @title Adds offset variables -#' -#' @description Mostly used internally in `add_modelling_features()`. Adds two +#' +#' @description Mostly used internally in `add_modelling_features()`. Adds two #' offset variables to be used in [fit_single_contact_model()]: -#' 1) `log_contactable_population_school`, and +#' 1) `log_contactable_population_school`, and #' 2) `log_contactable_population`. -#' These two variables require variables `school_weighted_pop_fraction` (from +#' These two variables require variables `school_weighted_pop_fraction` (from #' [add_school_work_participation()]) and `pop_age_to` (from #' [add_school_work_participation()]). This provides separate offsets -#' for school setting when compared to the other settings such as home, -#' work and other. The offset for school captures cohorting of students -#' for schools and takes the logarithm of the weighted combination of -#' contact population age distribution & school year probability calculated +#' for school setting when compared to the other settings such as home, +#' work and other. The offset for school captures cohorting of students +#' for schools and takes the logarithm of the weighted combination of +#' contact population age distribution & school year probability calculated #' in [add_school_work_participation()]. See "details" for more information. -#' -#' @details why double offsets? There are two offsets specified, once in the -#' model formula, and once in the "offset" argument of `mgcv::bam`. The -#' offsets get added together when the model first fit. In addition, the -#' setting specific offset from `offset_variable`, which is included in the -#' GAM model as `... + offset(log_contactable_population)` is used in -#' prediction, whereas the other offset, included as an argument in the GAM -#' as `offset = log(participants)` is only included when the model is +#' +#' @details why double offsets? There are two offsets specified, once in the +#' model formula, and once in the "offset" argument of `mgcv::bam`. The +#' offsets get added together when the model first fit. In addition, the +#' setting specific offset from `offset_variable`, which is included in the +#' GAM model as `... + offset(log_contactable_population)` is used in +#' prediction, whereas the other offset, included as an argument in the GAM +#' as `offset = log(participants)` is only included when the model is #' initially created. See more detail in [fit_single_contact_model()]. -#' -#' @return data.frame of `contact_data` with two extra columns: +#' +#' @return data.frame of `contact_data` with two extra columns: #' `log_contactable_population_school` and `log_contactable_population` #' @param contact_data contact data - must contain columns `age_to`, `age_from`, -#' `pop_age_to` (from [add_population_age_to()], and +#' `pop_age_to` (from [add_population_age_to()], and #' `school_weighted_pop_fraction` (from [add_school_work_participation()])). #' @author Nick Golding #' @export @@ -35,15 +35,14 @@ #' all_ages <- age_min:age_max #' library(tidyr) #' example_df <- expand_grid( -#' age_from = all_ages, -#' age_to = all_ages, -#' ) -#' example_df %>% -#' add_population_age_to() %>% -#' add_school_work_participation() %>% -#' add_offset() +#' age_from = all_ages, +#' age_to = all_ages, +#' ) +#' example_df %>% +#' add_population_age_to() %>% +#' add_school_work_participation() %>% +#' add_offset() add_offset <- function(contact_data) { - # define the offset variable for the model: the log population for most # settings, one that captures cohorting of students for schools. Define both, # and set the choice of which one in the model formula @@ -52,5 +51,4 @@ add_offset <- function(contact_data) { log_contactable_population_school = log(school_weighted_pop_fraction), log_contactable_population = log(pop_age_to) ) - } diff --git a/R/add_population_age_to.R b/R/add_population_age_to.R index dd062b3f..bfc649df 100644 --- a/R/add_population_age_to.R +++ b/R/add_population_age_to.R @@ -1,18 +1,21 @@ #' Add the population distribution for contact ages. -#' -#' Adds the population distribution of contact ages. Requires a column called -#' "age_to", representing the contact age - the age of the person who had -#' contact. It creates a column, `pop_age_to`. The `population` argument -#' defaults to [get_polymod_population()], but can be any data frame with -#' columns, `lower.age.limit`, and `population`. If population is 'polymod' -#' then use the participant-weighted average of POLYMOD country/year -#' distributions. It adds the population via interpolation, using -#' [get_age_population_function()] to create a function that generates -#' population from ages. -#' +#' +#' Adds the population distribution of contact ages. Requires a column called +#' "age_to", representing the contact age - the age of the person who had +#' contact. It creates a column, `pop_age_to`. The `population` argument +#' defaults to [get_polymod_population()], which is a `conmat_population` +#' object, which has `age` and `population` specified. But this can also be +#' a data frame with columns, `lower.age.limit`, and `population`. If +#' population is 'polymod' then use the participant-weighted average of +#' POLYMOD country/year distributions. It adds the population via +#' interpolation, using [get_age_population_function()] to create a +#' function that generates population from ages. +#' #' @param contact_data contact data containing columns `age_to` and `age_from` -#' @param population Defaults to [get_polymod_population()], but can be any -#' data frame with columns, `lower.age.limit`, and `population`. +#' @param population Defaults to [get_polymod_population()], a +#' `conmat_population` object, which specifies the `age` and `population` +#' columns. But it can optionally be any data frame with columns, +#' `lower.age.limit`, and `population`. #' @return data frame #' @examples #' age_min <- 10 @@ -20,14 +23,13 @@ #' all_ages <- age_min:age_max #' library(tidyr) #' example_df <- expand_grid( -#' age_from = all_ages, -#' age_to = all_ages, -#' ) +#' age_from = all_ages, +#' age_to = all_ages, +#' ) #' add_population_age_to(example_df) #' @export -add_population_age_to <- function(contact_data, +add_population_age_to <- function(contact_data, population = get_polymod_population()) { - # get function to interpolate population age distributions to 1y bins age_population_function <- get_age_population_function( population @@ -37,14 +39,13 @@ add_population_age_to <- function(contact_data, contact_data %>% # add interpolated population to "age_to" dplyr::mutate( - pop_age_to = age_population_function(age_to) + pop_age_to = age_population_function(age_to) ) %>% dplyr::group_by(age_from) %>% # normalise to get a relative population dplyr::mutate( pop_age_to = pop_age_to / sum(pop_age_to) ) %>% - dplyr::ungroup() %>% + dplyr::ungroup() %>% add_intergenerational() - } diff --git a/R/add_school_work_participation.R b/R/add_school_work_participation.R index 061ce6e9..c5bcc2bb 100644 --- a/R/add_school_work_participation.R +++ b/R/add_school_work_participation.R @@ -1,84 +1,137 @@ -#' Add columns describing the fractions of the population in each age group +#' Add columns describing the fractions of the population in each age group #' that attend school/work (average FTE) #' #' Add fractions of the population in each age group that attend school/work #' (average FTE) to compute the probability that both participant and -#' contact attend school/work. Requires columns `age_to` and `age_from`. -#' Note that it will operate on any column starting with `age`. Adds columns: -#' `school_probability`, `work_probability`, `school_year_probability`, and -#' `school_weighted_pop_fraction`. The columns `school_probability` and -#' `work_probability` represent the probability a person of the other age -#' goes to the same work/school. `school_year_probability` represents the -#' probability that a person of the other age would be in the same school -#' year. `school_weighted_pop_fraction` represents the weighted combination -#' of contact population age distribution & school year probability, so that -#' if the contact is in the same school year, the weight is 1, and otherwise -#' it is the population age fraction. This can be used as an offset, so that -#' population age distribution can be used outside the classroom, but does -#' not affect classroom contacts (which due to cohorting and regularised +#' contact attend school/work. Requires columns `age_to` and `age_from`. +#' Note that it will operate on any column starting with `age`. Adds columns: +#' `school_probability`, `work_probability`, `school_year_probability`, and +#' `school_weighted_pop_fraction`. The columns `school_probability` and +#' `work_probability` represent the probability a person of the other age +#' goes to the same work/school. `school_year_probability` represents the +#' probability that a person of the other age would be in the same school +#' year. `school_weighted_pop_fraction` represents the weighted combination +#' of contact population age distribution & school year probability, so that +#' if the contact is in the same school year, the weight is 1, and otherwise +#' it is the population age fraction. This can be used as an offset, so that +#' population age distribution can be used outside the classroom, but does +#' not affect classroom contacts (which due to cohorting and regularised #' class sizes are unlikely to depend on the population age distribution). -#' -#' @param contact_data contact data containing columns: `age_to`, `age_from`, +#' +#' @param contact_data contact data containing columns: `age_to`, `age_from`, #' and `pop_age_to` (from [add_population_age_to()]) +#' @param school_demographics (optional) defaults to census average proportion +#' at school. You can provide a dataset with columns, "age" (numeric), and +#' "school_fraction" (0-1), if you would like to specify these +#' details. See `abs_avg_school` for the default values. If you would like to +#' use the original school demographics used in conmat, these are provided in +#' the dataset, `conmat_original_school_demographics`. +#' @param work_demographics (optional) defaults to census average proportion +#' employed. You can provide a dataset with columns, "age" (numeric), and +#' "work_fraction", if you would like to specify these details. See +#' `abs_avg_work` for the default values. If you would like to +#' use the original work demographics used in conmat, these are provided in +#' the dataset, `conmat_original_work_demographics`. #' @return dataset with 9 extra columns: school_fraction_age_from, #' work_fraction_age_from, school_fraction_age_to, work_fraction_age_to, #' school_probability, work_probability, school_year_probability, and #' school_weighted_pop_fraction. -#' @note this uses fake data that will get replaced with abs data input soon +#' @note To use previous approach input the arguments `school_demographics` and +#' `work_demographics` with `conmat_original_school_demographics` and `conmat_original_work_demographics`, respectively. #' @examples #' age_min <- 10 #' age_max <- 15 #' all_ages <- age_min:age_max #' library(tidyr) #' example_df <- expand_grid( -#' age_from = all_ages, -#' age_to = all_ages, -#' ) -#' -#' example_df %>% -#' add_population_age_to() %>% -#' add_school_work_participation() +#' age_from = all_ages, +#' age_to = all_ages, +#' ) +#' +#' example_df %>% +#' add_population_age_to() %>% +#' add_school_work_participation() +#' +#' example_df %>% +#' add_population_age_to() %>% +#' add_school_work_participation( +#' school_demographics = conmat_original_school_demographics, +#' work_demographics = conmat_original_work_demographics +#' ) #' @export -add_school_work_participation <- function(contact_data) { +add_school_work_participation <- function(contact_data, + school_demographics = NULL, + work_demographics = NULL) { + contact_data %>% + add_school_fraction(school_demographics) %>% + add_school_probability() %>% + add_work_fraction(work_demographics) %>% + add_work_probability() +} + +add_work_fraction <- function(contact_data, work_demographics = NULL) { + # user can provide their own work demographic data, however by default + # we will use averaged data from the ABS. + if (is.null(work_demographics)) { + work_demographics <- abs_avg_work + } + + # check is has the right kind of data structure + check_work_demographics(work_demographics) + + # add work fraction data to both age_from and age_to + contact_data %>% + dplyr::left_join( + work_demographics, + dplyr::join_by(age_from == age) + ) %>% + dplyr::rename( + work_fraction_age_from = work_fraction + ) %>% + dplyr::left_join( + work_demographics, + dplyr::join_by(age_to == age) + ) %>% + dplyr::rename( + work_fraction_age_to = work_fraction + ) +} + +add_school_fraction <- function(contact_data, school_demographics = NULL) { + # user can provide their own school demographic data, however by default + # we will use averaged data from the ABS. + if (is.null(school_demographics)) { + school_demographics <- abs_avg_school + } + + # check is has the right kind of data structure + check_school_demographics(school_demographics) + + # add work fraction data to both age_from and age_to + contact_data %>% + dplyr::left_join( + school_demographics, + dplyr::join_by(age_from == age) + ) %>% + dplyr::rename( + school_fraction_age_from = school_fraction + ) %>% + dplyr::left_join( + school_demographics, + dplyr::join_by(age_to == age) + ) %>% + dplyr::rename( + school_fraction_age_to = school_fraction + ) +} + +add_school_probability <- function(contact_data) { contact_data %>% dplyr::mutate( - dplyr::across( - dplyr::starts_with("age"), - .fns = list( - # made up example - replace with education statistics - school_fraction = ~ dplyr::case_when( - # preschool - .x %in% 2:4 ~ 0.5, - # compulsory education - .x %in% 5:16 ~ 1, - # voluntary education - .x %in% 17:18 ~ 0.5, - # university - .x %in% 19:25 ~ 0.1, - # other - TRUE ~ 0.05 - ), - # made up example - replace with labour force statistics - work_fraction = ~ dplyr::case_when( - # child labour - .x %in% 12:19 ~ 0.2, - # young adults (not at school) - .x %in% 20:24 ~ 0.7, - # main workforce - .x %in% 25:60 ~ 1, - # possibly retired - .x %in% 61:65 ~ 0.7, - # other - TRUE ~ 0.05 - ) - ), - .names = "{.fn}_{.col}" - ), # the probability that a person of the other age other party goes to the # same school/work. May not be the same place. But proportional to the # increase in contacts due to attendance. So this helps school_probability = school_fraction_age_from * school_fraction_age_to, - work_probability = work_fraction_age_from * work_fraction_age_to, # the probability that a person of the other age would be in the same # school year # So, if ages are the same, we get (2 - 0) / 4 = 0.5 @@ -101,3 +154,10 @@ add_school_work_participation <- function(contact_data) { school_weighted_pop_fraction = pop_age_to * (1 - school_year_probability) + 1 * school_year_probability ) } + +add_work_probability <- function(contact_data) { + contact_data %>% + dplyr::mutate( + work_probability = work_fraction_age_from * work_fraction_age_to + ) +} diff --git a/R/add_symmetrical_features.R b/R/add_symmetrical_features.R new file mode 100644 index 00000000..a063e7c6 --- /dev/null +++ b/R/add_symmetrical_features.R @@ -0,0 +1,42 @@ +#' @title Add symmetrical, age based features +#' @description This function adds 6 columns to assist with describing +#' various age based interactions for model fitting. Requires that the +#' age columns are called "age_from", and "age_to" +#' +#' @param data data.frame with columns, `age_from`, and `age_to` +#' @return data.frame with 6 more columns, `gam_age_offdiag`, `gam_age_offdiag_2`, `gam_age_diag_prod`, `gam_age_diag_sum`, `gam_age_pmax`, `gam_age_pmin`, +#' @examples +#' vec_age <- 0:2 +#' dat_age <- expand.grid( +#' age_from = vec_age, +#' age_to = vec_age +#' ) +#' +#' add_symmetrical_features(dat_age) +#' +#' @export +add_symmetrical_features <- function(data) { + # add terms back into the data frame + data %>% + dplyr::mutate( + gam_age_offdiag = abs(age_from - age_to), + gam_age_offdiag_2 = abs(age_from - age_to)^2, + gam_age_diag_prod = abs(age_from * age_to), + gam_age_diag_sum = abs(age_from + age_to), + gam_age_pmax = pmax(age_from, age_to), + gam_age_pmin = pmin(age_from, age_to) + ) +} + +# gam( +# response ~ +# s(I(abs(age_from - age_to))) + +# s(I(abs(age_from - age_to)^2)) + +# s(I(abs(age_from * age_to))) + +# s(I(abs(age_from + age_to))) + +# s(I(pmax(age_from, age_to))) + +# s(I(pmin(age_from, age_to))), +# family = poisson, +# offset = log(participants), +# data = data +# ) diff --git a/R/adjust_household_contact_matrix.R b/R/adjust_household_contact_matrix.R index 6027bea4..27bac388 100644 --- a/R/adjust_household_contact_matrix.R +++ b/R/adjust_household_contact_matrix.R @@ -1,19 +1,19 @@ #' @title Adjust Household Contact Matrix -#' -#' @description This function is used internally within -#' [predict_setting_contacts()]. See details below for why we use the per +#' +#' @description This function is used internally within +#' [predict_setting_contacts()]. See details below for why we use the per #' capita adjustment -#' +#' #' @details We use Per-capita household size instead of mean household size. -#' Per-capita household size is different to mean household size, as the -#' household size averaged over people in the **population**, not over -#' households, so larger households get upweighted. It is calculated by -#' taking a distribution of the number of households of each size in a -#' population, multiplying the size by the household by the household count -#' to get the number of people with that size of household, and computing -#' the population-weighted average of household sizes. We use per-capita -#' household size as it is a more accurate reflection of the average -#' number of household members a person in the population can have contact +#' Per-capita household size is different to mean household size, as the +#' household size averaged over people in the **population**, not over +#' households, so larger households get upweighted. It is calculated by +#' taking a distribution of the number of households of each size in a +#' population, multiplying the size by the household by the household count +#' to get the number of people with that size of household, and computing +#' the population-weighted average of household sizes. We use per-capita +#' household size as it is a more accurate reflection of the average +#' number of household members a person in the population can have contact #' with. #' #' @param setting_matrices setting matrix @@ -27,23 +27,21 @@ adjust_household_contact_matrix <- function(setting_matrices, per_capita_household_size, model_per_capita_household_size) { - # given a list of 4 setting-specific synthetic contact matrices (including # 'home'), and a mean household size, adjust the number of household contacts # to match the average household size in that LGA from ABS, accounting for the # fact that household contacts are proportional to (but not the same as) the # number of other household members - + # get ratio between expected number of other household members (household size # minus 1) for this place from ABS data, and the average from the data used to # train the model ratio <- (per_capita_household_size - 1) / (model_per_capita_household_size - 1) - + # adjust home matrix and recompute all matrix settings <- setdiff(names(setting_matrices), "all") setting_matrices$home <- setting_matrices$home * ratio - setting_matrices$all <- Reduce('+', setting_matrices[settings]) - + setting_matrices$all <- Reduce("+", setting_matrices[settings]) + setting_matrices - } diff --git a/R/age-population-year.R b/R/age-population.R similarity index 87% rename from R/age-population-year.R rename to R/age-population.R index c06cf56c..98a9c0f8 100644 --- a/R/age-population-year.R +++ b/R/age-population.R @@ -2,22 +2,22 @@ #' #' @description This function helps clean up datasets of population data, which #' might be similar to `socialmixr::wpp_age()` or a dataset with columns -#' representing: population, location, age, and year. If age is numeric, it -#' groups ages into age groups with 5 year bins (0-4, 5-9, etc). It then -#' separates age groups into two column of these lower and upper limits. -#' Finally, it filters data passed to the specified year and location. If no +#' representing: population, location, age, and year. If age is numeric, it +#' groups ages into age groups with 5 year bins (0-4, 5-9, etc). It then +#' separates age groups into two column of these lower and upper limits. +#' Finally, it filters data passed to the specified year and location. If no #' year or location is provided then all years or locations are used. #' @param data dataset containing information on population for a given age, #' country, and year #' @param age_col bare variable name for the column with age information -#' @param location_col bare variable name for the column with location +#' @param location_col bare variable name for the column with location #' information. If using, both `location_col` & `location` must be specified. -#' @param location character vector with location names. If using, both +#' @param location character vector with location names. If using, both #' `location_col` & `location` must be specified. -#' @param year_col bare variable name for the column with year information. If +#' @param year_col bare variable name for the column with year information. If #' using, both `year_col` & `year` must be specified. -#' @param year numeric vector representing the desired year(s). If using, both +#' @param year numeric vector representing the desired year(s). If using, both #' `year_col` & `year` must be specified. #' @return tidy dataset with information on population of different age bands #' @export @@ -41,7 +41,7 @@ #' location = "Afghanistan", #' age_col = lower.age.limit #' ) -#' +#' #' # Tidy data for a given location irrespective of location #' age_population( #' data = world_data, @@ -74,20 +74,17 @@ #' year = 2020 #' ) #' -age_population <- function( - data, - location_col = NULL, - location = NULL, - age_col, - year_col = NULL, - year = NULL -) { - +age_population <- function(data, + location_col = NULL, + location = NULL, + age_col, + year_col = NULL, + year = NULL) { # checks the data type of age col and puts age into buckets if its numeric # which gets separated later as lower and upper limits age_var <- dplyr::pull(data, {{ age_col }}) - + if (is.numeric(age_var)) { label <- c(paste( seq(0, max(age_var), by = 5), @@ -113,8 +110,6 @@ age_population <- function( year_col = {{ year_col }}, year = {{ year }} ) - - return(age_population_df) } else { age_population_df <- clean_age_population_year( data = data, @@ -124,7 +119,11 @@ age_population <- function( year_col = {{ year_col }}, year = {{ year }} ) - - return(age_population_df) } -} \ No newline at end of file + age_population_df <- conmat_population( + data = age_population_df, + age = lower.age.limit, + population = population + ) + return(age_population_df) +} diff --git a/R/aggregate_predicted_contacts.R b/R/aggregate_predicted_contacts.R index 1551686b..0ce88102 100644 --- a/R/aggregate_predicted_contacts.R +++ b/R/aggregate_predicted_contacts.R @@ -1,42 +1,45 @@ #' @title Aggregate predicted contacts to specified age breaks -#' -#' @description Aggregates contacts rate from, say, a 1 year level into +#' +#' @description Aggregates contacts rate from, say, a 1 year level into #' provided age breaks, weighting the contact rate by the specified age -#' population. For example, if you specify breaks as c(0, 5, 10, 15, Inf), -#' it will return age groups as 0-5, 5-10, 10-15, and 15+ (Inf). Used +#' population. For example, if you specify breaks as c(0, 5, 10, 15, Inf), +#' it will return age groups as 0-5, 5-10, 10-15, and 15+ (Inf). Used #' internally within [predict_contacts()], although can be used by users. -#' -#' @param predicted_contacts_1y contacts in 1 year breaks (could technically +#' +#' @param predicted_contacts_1y contacts in 1 year breaks (could technically #' by in other year breaks). Data must contain columns, `age_from`, `age_to`, -#' `contacts`, and `se_contacts`, which is the same output as +#' `contacts`, and `se_contacts`, which is the same output as #' [predict_contacts_1y()] - see examples below. -#' @param population population with columns `lower.age.limit`, and -#' `population`. See examples below. +#' @param population a `conmat_population` object, which has the `age` and +#' `population` columns specified, or a dataframe with columns +#' `lower.age.limit`, and `population`. See examples below. #' @param age_breaks vector of ages. Default: c(seq(0, 75, by = 5), Inf) #' @return data frame with columns, `age_group_from`, `age_group_to`, and #' `contacts`, which is the aggregated model. #' @examples -#' fairfield_abs_data <- abs_age_lga("Fairfield (C)") -#' -#' fairfield_abs_data -#' -#' # We can predict the contact rate for Fairfield from the existing contact -#' # data, say, between the age groups of 0-15 in 5 year bins for school: -#' +#' fairfield <- abs_age_lga("Fairfield (C)") +#' +#' fairfield +#' +#' # We can predict the contact rate for Fairfield from the existing contact +#' # data, say, between the age groups of 0-15 in 5 year bins for school: +#' #' fairfield_contacts_1 <- predict_contacts_1y( #' model = polymod_setting_models$home, -#' population = fairfield_abs_data, +#' population = fairfield, #' age_min = 0, #' age_max = 15 #' ) -#' -#' fairfield_contacts_1 -#' +#' +#' fairfield_contacts_1 +#' #' aggregated_fairfield <- aggregate_predicted_contacts( #' predicted_contacts_1y = fairfield_contacts_1, -#' population = fairfield_abs_data, -#' age_breaks = c(0, 5, 10, 15,Inf) -#' ) +#' population = fairfield, +#' age_breaks = c(0, 5, 10, 15, Inf) +#' ) +#' +#' aggregated_fairfield #' @export aggregate_predicted_contacts <- function(predicted_contacts_1y, population, @@ -44,7 +47,6 @@ aggregate_predicted_contacts <- function(predicted_contacts_1y, seq(0, 75, by = 5), Inf )) { - # get function for 1y age populations in this country age_population_function <- get_age_population_function(population) diff --git a/R/apply_vaccination.R b/R/apply_vaccination.R index 2f09191e..fe6bdf16 100644 --- a/R/apply_vaccination.R +++ b/R/apply_vaccination.R @@ -5,7 +5,7 @@ #' transmission in each age group. #' #' @details Vaccination improves a person's immunity from a disease. When a -#' sizable section of the population receives vaccinations or when vaccine +#' sizeable section of the population receives vaccinations or when vaccine #' coverage is sufficient enough, the likelihood that the unvaccinated #' population will contract the disease is decreased. This helps to slow #' infectious disease spread as well as lessen its severity. For this reason, @@ -44,14 +44,44 @@ #' transmission matching the next generation matrices #' #' @examples -#' +#' # examples take 20 second to run so skipping +#' \dontrun{ #' # example data frame with vaccine coverage, acquisition and transmission #' # efficacy of different age groups #' vaccination_effect_example_data #' #' # Generate next generation matrices +#' +#' perth <- abs_age_lga("Perth (C)") +#' perth_hh <- get_abs_per_capita_household_size(lga = "Perth (C)") +#' +#' age_breaks_0_80 <- c(seq(0, 80, by = 5), Inf) +#' +#' # refit the model - note that the default if age_breaks isn't specified is +#' # 0 to 75 +#' perth_contact_0_80 <- extrapolate_polymod( +#' perth, +#' per_capita_household_size = perth_hh, +#' age_breaks = age_breaks_0_80 +#' ) +#' +#' perth_ngm_0_80 <- generate_ngm(perth_contact_0_80, +#' age_breaks = age_breaks_0_80, +#' per_capita_household_size = perth_hh, +#' R_target = 1.5 +#' ) +#' +#' # In the old way we used to be able to pass age_breaks_0_80 along +#' generate_ngm_oz( +#' lga_name = "Perth (C)", +#' age_breaks = age_breaks_0_80, +#' R_target = 1.5 +#' ) +#' +#' +#' # another way to do this using the previous method for generating NGMs #' # The number of age breaks must match the vaccination effect data -#' ngm_nsw <- generate_ngm( +#' ngm_nsw <- generate_ngm_oz( #' state_name = "NSW", #' age_breaks = c(seq(0, 80, by = 5), Inf), #' R_target = 1.5 @@ -65,15 +95,19 @@ #' acquisition_col = acquisition, #' transmission_col = transmission #' ) -#' +#' } #' @export -apply_vaccination <- function( - ngm, - data, - coverage_col, - acquisition_col, - transmission_col -) { +apply_vaccination <- function(ngm, + data, + coverage_col, + acquisition_col, + transmission_col) { + # NOTE + # `apply_vaccination` should accept an ngm class object otherwise + # give an error maybe? + # also should it be `vaccination_data` not `data`, so it is more descriptive? + check_dimensions(ngm, data) + transmission_reduction_matrix <- data %>% # compute percentage reduction in acquisition and transmission in each age group dplyr::mutate( @@ -102,5 +136,9 @@ apply_vaccination <- function( dplyr::pull(transmission_reduction_matrix) ngm_vaccinated <- Map("*", ngm, transmission_reduction_matrix) + ngm_vaccinated <- new_setting_vaccination_matrix( + ngm_vaccinated, + age_breaks = age_breaks(ngm) + ) return(ngm_vaccinated) } diff --git a/R/autoplot.R b/R/autoplot.R new file mode 100644 index 00000000..1f9ae847 --- /dev/null +++ b/R/autoplot.R @@ -0,0 +1,106 @@ +#' Plot setting matrices using ggplot2 +#' +#' @param object A matrix or a list of square matrices, with row and column names +#' indicating the age groups. +#' @param ... Other arguments passed on +#' @param title Title to give to plot setting matrices. Defaults are provided for certain objects +#' @return a ggplot visualisation of contact rates +#' @importFrom ggplot2 autoplot +#' @name autoplot-conmat +#' @examples +#' \dontrun{ +#' if (interactive()) { +#' polymod_contact_data <- get_polymod_setting_data() +#' polymod_survey_data <- get_polymod_population() +#' +#' setting_models <- fit_setting_contacts( +#' contact_data_list = polymod_contact_data, +#' population = polymod_survey_data +#' ) +#' +#' fairfield <- abs_age_lga("Fairfield (C)") +#' +#' fairfield_hh_size <- +#' get_abs_per_capita_household_size(lga = "Fairfield (C)") +#' +#' synthetic_settings_5y_fairfield_hh <- predict_setting_contacts( +#' population = fairfield, +#' contact_model = setting_models, +#' age_breaks = c(seq(0, 85, by = 5), Inf), +#' per_capita_household_size = fairfield_hh_size +#' ) +#' +#' # Plotting synthetic contact matrices across all settings +#' +#' autoplot( +#' object = synthetic_settings_5y_fairfield_hh, +#' title = "Setting specific synthetic contact matrices" +#' ) +#' +#' # Work setting specific synthetic contact matrices +#' autoplot( +#' object = synthetic_settings_5y_fairfield_hh$work, +#' title = "Work" +#' ) +#' } +#' } +#' @export +autoplot.conmat_age_matrix <- function(object, + ..., + title = "Contact Matrices") { + plot_matrix(object) + + ggplot2::ggtitle(title) +} + +#' @rdname autoplot-conmat +#' @export +autoplot.conmat_setting_prediction_matrix <- function(object, + ..., + title = "Setting-specific synthetic contact matrices") { + plot_setting_matrices( + object, + title = title + ) +} + +#' @rdname autoplot-conmat +#' @export +autoplot.transmission_probability_matrix <- function(object, + ..., + title = "Setting-specific transmission probability matrices") { + plot_setting_matrices( + object, + title = title + ) + + ggplot2::labs( + subtitle = "Relative probability of individuals in an age group infecting an individual in another age group" + ) +} + +#' @rdname autoplot-conmat +#' @export +autoplot.ngm_setting_matrix <- function(object, + ..., + title = "Setting-specific NGM matrices") { + plot_setting_matrices( + object, + title = title + ) + + ggplot2::labs( + subtitle = "The number of newly infected individuals for a specified age group in each setting" + ) +} + +#' @rdname autoplot-conmat +#' @export +autoplot.setting_vaccination_matrix <- function(object, + ..., + title = "Setting-specific vaccination matrices") { + plot_setting_matrices( + object, + title = title + ) + + ggplot2::labs( + subtitle = "Number of newly infected individuals for age groups, adjusted based on proposed age group vaccination rates" + ) +} diff --git a/R/check_state_name.R b/R/check_state_name.R deleted file mode 100644 index 8faea32a..00000000 --- a/R/check_state_name.R +++ /dev/null @@ -1,23 +0,0 @@ -#' @title Check state name in Australia -#' @param state_name character of length 1 -#' @return errors if state name not in ABS data -#' @keywords internal -#' @noRd -check_state_name <- function(state_name) { - state_match <- stringr::str_detect( - string = abs_pop_age_lga_2020$state, - pattern = state_name - ) - - does_state_match <- !any(state_match) - - if (does_state_match) { - rlang::abort( - message = c( - "The state name provided does not match states in Australia", - x = glue::glue("The state name '{state_name}' did not match"), - i = "See `abs_lga_lookup` for a list of all states" - ) - ) - } -} diff --git a/R/check_work_school_demographics.R b/R/check_work_school_demographics.R new file mode 100644 index 00000000..6f5545de --- /dev/null +++ b/R/check_work_school_demographics.R @@ -0,0 +1,59 @@ +#' @title Check Work Demographics +#' @param work_demographics work data +#' @keywords internal +#' @author njtierney +check_work_demographics <- function(work_demographics) { + work_names <- c("age", "work_fraction") + names_correct <- all(work_names %in% names(work_demographics)) + if (!names_correct) { + cli::cli_abort( + c( + "work demographic data must be named {.var {work_names}}", + "we see: {.var {names(work_demographics)}}" + ) + ) + } + + vctrs::vec_assert(work_demographics$work_fraction, numeric()) + + is_proportion <- all(dplyr::between(work_demographics$work_fraction, 0, 1)) + + if (!is_proportion) { + cli::cli_abort( + c( + "{.var work_fraction} must be between 0 and 1, however the range is:", + "{range(work_demographics$work_fraction)}" + ) + ) + } +} + +#' @title Check School Demographics +#' @param school_demographics school data +#' @keywords internal +#' @author njtierney +check_school_demographics <- function(school_demographics) { + school_names <- c("age", "school_fraction") + names_correct <- all(school_names %in% names(school_demographics)) + if (!names_correct) { + cli::cli_abort( + c( + "school demographic data must be named {.var {school_names}}", + "we see: {.var {names(school_demographics)}}" + ) + ) + } + + vctrs::vec_assert(school_demographics$school_fraction, numeric()) + + is_proportion <- all(dplyr::between(school_demographics$school_fraction, 0, 1)) + + if (!is_proportion) { + cli::cli_abort( + c( + "{.var school_fraction} must be between 0 and 1, however the range is:", + "{range(school_demographics$school_fraction)}" + ) + ) + } +} diff --git a/R/checkers.R b/R/checkers.R index cc4ac432..4f92cb23 100644 --- a/R/checkers.R +++ b/R/checkers.R @@ -4,12 +4,14 @@ #' @param multiple_lga logical response that allows multiple lgas to be checked #' if set to `TRUE`. Default is FALSE. #' @return errors if LGA name not in ABS data, otherwise returns nothing -#' @examples +#' @examples #' # returns nothing #' check_lga_name("Fairfield (C)") #' # if you want to check multiple LGAs you must use the `multiple_lga` flag. -#' check_lga_name(lga_name = c("Fairfield (C)", "Sydney (C)"), -#' multiple_lga = TRUE) +#' check_lga_name( +#' lga_name = c("Fairfield (C)", "Sydney (C)"), +#' multiple_lga = TRUE +#' ) #' # this will error, so isn't run #' \dontrun{ #' # don't set the `multiple_lga` flag @@ -17,45 +19,44 @@ #' # not a fully specified LGA #' check_lga_name("Fairfield") #' } -#' @export -check_lga_name <- function( - lga_name, - multiple_lga = FALSE -) { +#' @keywords internal +#' @noRd +check_lga_name <- function(lga_name, + multiple_lga = FALSE) { lga_match <- dplyr::filter( abs_pop_age_lga_2020, lga %in% lga_name ) - + does_lga_match <- nrow(lga_match) > 1 if (!does_lga_match) { - rlang::abort( + cli::cli_abort( message = c( "The LGA name provided does not match LGAs in Australia", - x = glue::glue("The lga name '{lga_name}' did not match (it probably \\ - needs '{lga_name} (C)' or similar"), - i = "See `abs_lga_lookup` for a list of all LGAs" - ) + "x" = "The lga name '{lga_name}' did not match (it probably \\ + needs '{lga_name} (C)' or similar)", + "i" = "See `abs_lga_lookup` for a list of all LGAs" + ) ) } - + if (does_lga_match) { unique_lga_names <- abs_pop_age_lga_2020 %>% dplyr::filter(lga %in% lga_name) %>% dplyr::pull(lga) %>% unique() - + more_than_one_lga <- length(unique_lga_names) > 1 - + if (more_than_one_lga & !multiple_lga) { - rlang::abort( + cli::cli_abort( message = c( "The LGA name provided matches multiple LGAs", - i = "Specify the exact LGA name or set {.arg {multiple_lga}} = \\ + "i" = "Specify the exact LGA name or set {.arg {multiple_lga}} = \\ `TRUE`. See {.code {abs_lga_lookup}} for a list of all LGAs", - x = glue::glue("The lga name '{lga_name}' matched multiple LGAs:"), - glue::glue("{unique_lga_names}") + "x" = "The lga name '{lga_name}' matched multiple LGAs: \\ + {unique_lga_names}" ) ) } # end if there is more than one matching LGA @@ -67,20 +68,144 @@ check_lga_name <- function( #' @return errors if state name not in ABS data #' @keywords internal #' @noRd -check_state_name <- function(state_name) { - state_match <- stringr::str_detect( - string = abs_pop_age_lga_2020$state, - pattern = state_name - ) - - does_state_match <- !any(state_match) +check_state_name <- function(state_name, multiple_state = FALSE) { + state_that_matches <- abs_pop_age_lga_2020 %>% + dplyr::select(state) %>% + dplyr::distinct() %>% + dplyr::filter(state %in% state_name) %>% + dplyr::pull(state) + + state_match <- is.element(state_name, state_that_matches) + + all_match <- all(state_match) + state_that_doesnt_match <- setdiff(state_name, state_that_matches) - if (does_state_match) { - rlang::abort( + if (!all_match) { + cli::cli_abort( message = c( "The state name provided does not match states in Australia", - x = glue::glue("The state name '{state_name}' did not match"), - i = "See `abs_lga_lookup` for a list of all states" + "x" = "The state name '{state_that_doesnt_match}' did not match", + "i" = "See `abs_lga_lookup` for a list of all states" + ) + ) + } + more_than_one_state <- length(state_that_matches) > 1 + if (more_than_one_state & !multiple_state) { + cli::cli_abort( + message = c( + "The state name provided matches multiple states", + "i" = "Specify the exact state name or set {.arg {multiple_state}} = \\ + `TRUE`. See {.code {abs_lga_lookup}} for a list of all states", + "x" = "The state name '{state_name}' matched multiple states: \\ + { state_that_matches}" + ) + ) + } +} + + +#' @title Check dimensions +#' @description An internal function used within [apply_vaccination()] to warn users of incompatible dimensions of +#' data and the next generation matrices +#' +#' @param data data frame +#' @param ngm list with next generation matrices at different settings +#' @keywords internal +check_dimensions <- function(ngm, data) { + nrow_data <- nrow(data) + ngm_cols <- purrr::map_int(ngm, ncol) + dim_match <- all(nrow_data == ngm_cols) + + if (!dim_match) { + cli::cli_abort( + c( + "Non-conformable arrays present.", + "i" = "The number of columns in {.var ngm} must match the number of rows in {.var data}.", + "x" = "Number of columns in {.var ngm} for the settings: {names(ngm)} are {purrr::map_int(ngm, ncol)} respectively.", + "x" = "Number of rows in {.var data} is {nrow(data)}." + ) + ) + } +} + +check_if_var_numeric <- function(data, var, attribute) { + var_val <- data[[var]] + + if (!is.numeric(var_val)) { + cli::cli_abort( + c( + "{.var {attribute}} must be {.cls numeric}", + "{.var {var_lab}} has been entered to represent {.var {attribute}}", + "But {.var {var_lab}} is of class {.cls {class(var_val)}}" + ) + ) + } +} + +check_if_data_frame <- function(x, + arg = rlang::caller_arg(x), + call = rlang::caller_env()) { + if (!is.data.frame(x)) { + cli::cli_abort( + message = c( + "{.arg {arg}} must be a {.cls data.frame}", + "i" = "{.arg {arg}} is {.cls {class(x)}}" + ), + call = call + ) + } +} + +error_old_ngm_arg <- function(arg) { + cli::cli_abort( + c( + "{arg} is no longer used in {.code generate_ngm}", + "i" = "Please use {.code generate_ngm_oz} instead" + ) + ) +} + + +#' +#' @title Check if data is a list +#' @param contact_data data on the contacts between two ages at different settings +#' @keywords internal +check_if_list <- function(contact_data) { + if (!inherits(contact_data, "list")) { + cli::cli_abort( + c( + "i" = "Function expects {.var contact_data} to be of class {.cls list}", + "x" = "We see {.var contact_data} is of class {.cls {class(contact_data)}}." + ) + ) + } +} + +check_if_all_matrix <- function(x) { + if (!all_matrix(x)) { + cli::cli_abort( + c("Inputs must all be of class {.cls matrix}") + ) + } +} + +check_age_breaks <- function(x, + y, + x_arg = "old", + y_arg = "new") { + if (!identical(x, y)) { + compare_res <- waldo::compare( + x = x, + y = y, + x_arg = x_arg, + y_arg = y_arg + ) + + rlang::abort( + c( + "Age breaks must be the same, but they are different:", + compare_res, + "i" = "You can check the age breaks using `age_breaks()`" ) ) } diff --git a/R/conmat-package.R b/R/conmat-package.R index 372dd475..d5b88b60 100644 --- a/R/conmat-package.R +++ b/R/conmat-package.R @@ -2,6 +2,14 @@ #' @importFrom stats predict "_PACKAGE" +# generics to re-export + +#' @importFrom ggplot2 autoplot +#' @export +ggplot2::autoplot + +#' @import rlang + # The following block is used by usethis to automatically manage # roxygen namespace tags. Modify with care! ## usethis namespace: start @@ -13,6 +21,8 @@ globalVariables( c( "abs_household_lga", "abs_pop_age_lga_2020", + "abs_avg_school", + "abs_avg_work", "acquisition_multiplier", "across", "age", @@ -62,6 +72,7 @@ globalVariables( "missing_any_contact_setting", "model_per_capita_household_size", "modelled_pop", + "na.omit", "n_households", "n_people", "n_persons_usually_resident", @@ -78,6 +89,7 @@ globalVariables( "probability", "ratio", "required_pop", + "school_fraction", "school_fraction_age_from", "school_fraction_age_to", "school_probability", @@ -97,6 +109,7 @@ globalVariables( "weight", "weight_sum", "work_education", + "work_fraction", "work_fraction_age_from", "work_fraction_age_to", "year", diff --git a/R/conmat-population.R b/R/conmat-population.R new file mode 100644 index 00000000..0e40c59e --- /dev/null +++ b/R/conmat-population.R @@ -0,0 +1,198 @@ +#' Create a new `conmat_population` class object +#' +#' @param data data.frame +#' @param age bare column name of numeric data on age +#' @param population bare column name of numeric data on population +#' +#' @return object with class `conmat_population` +#' @keywords internal +new_conmat_population <- function(data, age, population) { + label_age <- as_name(age) + label_population <- as_name(population) + tibble::new_tibble( + data, + nrow = vctrs::vec_size(data), + "age" = label_age, + "population" = label_population, + class = "conmat_population" + ) +} + +validate_conmat_population <- function(x) { + check_if_data_frame(x) + check_if_var_numeric(x, age_label(x), "age") + check_if_var_numeric(x, population_label(x), "population") +} + +#' Define a conmat population +#' +#' A conmat population is a dataframe that stores which columns represent the +#' age and population information. This is useful as it means we can refer +#' to this information throughout other functions in the conmat package +#' without needing to specify or hard code which columns represent the age +#' and population information. +#' +#' @param data data.frame +#' @param age bare name representing the age column +#' @param population bare name representing the population column +#' +#' @return a data frame with age and population attributes +#' @export +#' +#' @examples +#' perth <- abs_age_lga("Perth (C)") +conmat_population <- function(data, age, population) { + population <- new_conmat_population( + data = data, + age = enquo(age), + population = enquo(population) + ) + validate_conmat_population(population) + population +} + +#' @title Convert to conmat population +#' @param data data.frame +#' @param ... extra arguments +#' @name as_conmat_population +#' +#' @export +as_conmat_population <- function(data, ...) { + UseMethod("as_conmat_population") +} + +#' @rdname as_conmat_population +#' @export +as_conmat_population.default <- function(data, ...) { + abort("Cannot currently convert object of class {.cls {class(data)}} into \\ + a {.cls conmat_population} object.") +} + +#' @param age age column - an unquoted variable of numeric integer ages +#' @param population population column - an unquoted variable, numeric value +#' @rdname as_conmat_population +#' @export +#' @examples +#' some_age_pop <- data.frame( +#' age = 1:10, +#' pop = 101:110 +#' ) +#' +#' some_age_pop +#' +#' as_conmat_population( +#' some_age_pop, +#' age = age, +#' population = pop +#' ) +as_conmat_population.data.frame <- function(data, age, population, ...) { + # strip any existing classes + data <- as.data.frame(data) + age <- enquo(age) + population <- enquo(population) + conmat_population( + data = data, + age = !!age, + population = !!population + ) +} + +#' @rdname as_conmat_population +#' @export +as_conmat_population.list <- as_conmat_population.data.frame + +#' @rdname as_conmat_population +#' @export +as_conmat_population.grouped_df <- as_conmat_population.data.frame + +#' @keywords internal +#' @export +as_conmat_population.NULL <- function(data, ...) { + abort("A {conmat_population} must not be NULL") +} + +#' Accessing conmat attributes +#' +#' @param x conmat_population data frame +#' +#' @return age or population symbol or label +#' +#' @rdname accessors +#' @examples +#' \dontrun{ +#' perth <- abs_age_lga("Perth (C)") +#' age(perth) +#' age_label(perth) +#' population(perth) +#' population_label(perth) +#' } +#' @export +age <- function(x) { + sym(age_label(x)) +} + +#' @rdname accessors +#' @export +age_label <- function(x) { + UseMethod("age_label") +} + +#' @rdname accessors +#' @export +age_label.default <- function(x) { + cli::cli_abort("Cannot access {.val age} information from class \\ + {.cls {class(x)}}") +} + +#' @rdname accessors +#' @export +age_label.conmat_population <- function(x) { + x %@% "age" +} + +#' @rdname accessors +#' @export +population_label <- function(x) { + UseMethod("population_label") +} + +#' @rdname accessors +#' @export +population_label.default <- function(x) { + cli::cli_abort("Cannot access {.val population} information from class \\ + {.cls {class(x)}") +} + +#' @rdname accessors +#' @export +population_label.conmat_population <- function(x) { + x %@% "population" +} + +#' @rdname accessors +#' @export +population <- function(x) { + sym(population_label(x)) +} + +#' @export +print.conmat_population <- function(x, ...) { + txt <- glue::glue("({class(x)[[1]]})") + out <- cli::col_red(txt) + age_txt <- glue::glue( + "- age: {cli::style_bold(age_label(x))}" + ) + population_txt <- glue::glue( + "- population: {cli::style_bold(population_label(x))}" + ) + age_out <- cli::col_grey(age_txt) + population_out <- cli::col_grey(population_txt) + # adds to the top of the tibble + msg <- sprintf( + "%s %s\n %s\n %s\n", format(x)[1], out, + age_out, + population_out + ) + cat(msg) + cli::cat_line(format(x)[-1]) +} diff --git a/R/constructors.R b/R/constructors.R new file mode 100644 index 00000000..f81232e2 --- /dev/null +++ b/R/constructors.R @@ -0,0 +1,255 @@ +add_new_class <- function(x, new_class) { + class(x) <- c(new_class, class(x)) + x +} + +#' Build new age matrix +#' +#' A matrix that knows about its age breaks - which are by default provided as +#' its rownames. Mostly intended for internal use. +#' +#' @param matrix numeric matrix +#' @param age_breaks character vector of age breaks, by default the rownames. +#' +#' @return matrix with age breaks attribute +#' +#' @examples +#' age_break_names <- c("[0,5)", "[5,10)", "[10, 15)") +#' age_mat <- matrix( +#' runif(9), +#' nrow = 3, +#' ncol = 3, +#' dimnames = list( +#' age_break_names, +#' age_break_names +#' ) +#' ) +#' +#' new_age_matrix( +#' age_mat, +#' age_breaks = age_break_names +#' ) +#' +#' @export +new_age_matrix <- function(matrix, age_breaks) { + structure( + matrix, + age_breaks = age_breaks, + class = c("conmat_age_matrix", class(matrix)) + ) +} + +#' Extract age break attribute information +#' +#' @param x an object containing age break information +#' +#' @return age breaks character vector +#' @examples +#' age_breaks <- c(0, 5, 19, 15) +#' age_break_names <- c("[0,5)", "[5,10)", "[10, 15)") +#' age_mat <- matrix( +#' runif(9), +#' nrow = 3, +#' ncol = 3, +#' dimnames = list( +#' age_break_names, +#' age_break_names +#' ) +#' ) +#' +#' age_mat <- new_age_matrix(age_mat, age_breaks) +#' +#' age_breaks(age_mat) +#' @export +age_breaks <- function(x) { + UseMethod("age_breaks") +} + +#' @describeIn age_breaks Get age break information +#' @export +age_breaks.conmat_age_matrix <- function(x) { + attr(x, "age_breaks") +} + +#' @describeIn age_breaks Get age break information +#' @export +age_breaks.conmat_setting_prediction_matrix <- function(x) { + attr(x, "age_breaks") +} + +#' @describeIn age_breaks Get age break information +#' @export +age_breaks.setting_data <- function(x) { + attr(x, "age_breaks") +} + +#' @describeIn age_breaks Get age break information +#' @export +age_breaks.ngm_setting_matrix <- function(x) { + attr(x, "age_breaks") +} + +#' @describeIn age_breaks Get age break information +#' @export +age_breaks.setting_vaccination_matrix <- function(x) { + attr(x, "age_breaks") +} + +#' @describeIn age_breaks Get age break information +#' @export +age_breaks.numeric <- function(x) { + x +} + +#' @describeIn age_breaks Get age break information +#' @export +age_breaks.matrix <- function(x) { + cli::cli_abort( + "no method for {.code age_breaks()} defined for object of class {.cls {class(x)}} yet" + ) +} + +#' @describeIn age_breaks Get age break information +#' @export +age_breaks.array <- function(x) { + cli::cli_abort( + "no method for {.code age_breaks()} defined for object of class {.cls {class(x)}} yet" + ) +} + +#' @describeIn age_breaks Get age break information +#' @export +age_breaks.predicted_contacts <- function(x) { + attr(x, "age_breaks") +} + +#' @describeIn age_breaks Get age break information +#' @export +age_breaks.transmission_probability_matrix <- function(x) { + attr(x, "age_breaks") +} + +#' @describeIn age_breaks Get age break information +#' @export +age_breaks.setting_contact_model <- function(x){ + attr(x, "age_breaks") +} + +#' @describeIn age_breaks Get age break information +#' @export +age_breaks.default <- function(x) { + cli::cli_abort( + "no method for {.code age_breaks()} defined for object of class {.cls {class(x)}} yet" + ) +} + +#' Establish new setting data +#' +#' @param list_df list of data frames +#' +#' @return object with additional (primary) class "setting data" and an "age_breaks attribute. +#' @export +new_setting_data <- function(list_df) { + structure( + list_df, + age_breaks = unique(list_df$home$age_from), + class = c("setting_data", class(list_df)) + ) +} + +#' Establish new BGM setting data +#' +#' @param list_matrix list of matrices +#' @param raw_eigenvalue the raw eigenvalue +#' @param scaling scaling factor +#' @param age_breaks vector of age breaks +#' +#' @return object with additional (primary) class "ngm_setting_matrix", and attributes for "age_breaks", "scaling", and "raw_eigenvalue". +#' @export +new_ngm_setting_matrix <- function(list_matrix, + raw_eigenvalue, + scaling, + age_breaks) { + structure( + list_matrix, + raw_eigenvalue = raw_eigenvalue, + scaling = scaling, + age_breaks = age_breaks, + class = c("ngm_setting_matrix", class(list_matrix)) + ) +} + + +#' Get raw eigvenvalue from NGM matrix +#' +#' @param list_matrix object of class `ngm_setting_matrix` +#' +#' @return raw eigenvalue +#' +#' @examples +#' # examples not run as they take a long time +#' \dontrun{ +#' perth <- abs_age_lga("Perth (C)") +#' perth_contact <- extrapolate_polymod(perth) +#' perth_ngm <- generate_ngm( +#' perth_contact, +#' age_breaks = c(seq(0, 85, by = 5), Inf) +#' ) +#' raw_eigenvalue(perth_ngm) +#' } +#' @export +raw_eigenvalue <- function(list_matrix) { + attr(list_matrix, "raw_eigenvalue") +} + +#' Get the scaling from NGM matrix +#' +#' This value is `scaling <- R_target / R_raw`, where `R_target` is the target +#' R value provided to the NGM, and `R_raw` is the raw eigenvalue. +#' +#' @param list_matrix object of class `ngm_setting_matrix` +#' +#' @return scaling +#' +#' @examples +#' # examples not run as they take a long time +#' \dontrun{ +#' perth <- abs_age_lga("Perth (C)") +#' perth_contact <- extrapolate_polymod(perth) +#' perth_ngm <- generate_ngm( +#' perth_contact, +#' age_breaks = c(seq(0, 85, by = 5), Inf) +#' ) +#' raw_eigenvalue(perth_ngm) +#' scaling(perth_ngm) +#' } +#' @export +scaling <- function(list_matrix) { + attr(list_matrix, "scaling") +} + +new_setting_contact_model <- function(list_model, + age_breaks) { + structure( + list_model, + age_breaks = age_breaks, + class = c("setting_contact_model", class(list_model)) + ) +} + +new_setting_vaccination_matrix <- function(list_matrix, + age_breaks) { + structure( + list_matrix, + age_breaks = age_breaks, + class = c("setting_vaccination_matrix", class(list_matrix)) + ) +} + +new_predicted_contacts <- function(df, age_breaks) { + tibble::new_tibble( + x = df, + age_breaks = age_breaks, + class = "predicted_contacts" + ) +} diff --git a/R/data-abs-avg-work-school.R b/R/data-abs-avg-work-school.R new file mode 100644 index 00000000..7161600e --- /dev/null +++ b/R/data-abs-avg-work-school.R @@ -0,0 +1,31 @@ +#' ABS work data for 2016 +#' +#' An internal dataset containing Australian Bureau of Statistics work data for +#' each age in 2016. The data is averaged across each state to provide an +#' overall average, and is used to provide estimated work populations for +#' model fitting in [add_school_work_participation()], which is used in [fit_single_contact_model()]. The data is summarised from `data_abs_state_work`, +#' see `?data_abs_state_work` for more details. +#' +#' @format A data frame with 116 rows and 2 variables: +#' \describe{ +#' \item{age}{0 to 115} +#' \item{work_fraction}{fraction of population working.} +#' } +#' @source {Census of Population and Housing, 2016, TableBuilder} +"abs_avg_work" + +#' ABS education data for 2016 +#' +#' An internal dataset containing Australian Bureau of Statistics education data for +#' each age in 2016. The data is averaged across each state to provide an +#' overall average, and is used to provide estimated education populations for +#' model fitting in [add_school_work_participation()], which is used in [fit_single_contact_model()]. The data is summarised from `data_abs_state_education`, +#' see `?data_abs_state_education` for more details. +#' +#' @format A data frame with 116 rows and 2 variables: +#' \describe{ +#' \item{age}{0 to 115} +#' \item{school_fraction}{fraction of population at school} +#' } +#' @source {Census of Population and Housing, 2016, TableBuilder} +"abs_avg_school" diff --git a/R/data-davies-age-extended.R b/R/data-davies-age-extended.R index e8fb9afb..1db303a8 100644 --- a/R/data-davies-age-extended.R +++ b/R/data-davies-age-extended.R @@ -2,16 +2,15 @@ #' #' A dataset containing data from \url{https://www.nature.com/articles/s41591-020-0962-9#code-availability} #' When using this data, ensure that you cite the original authors at: -#' +#' #' "Davies, N.G., Klepac, P., Liu, Y. et al. Age-dependent effects in the transmission and control of COVID-19 epidemics. Nat Med 26, 1205–1211 (2020). https://doi.org/10.1038/s41591-020-0962-9" #' #' @format A data frame of the probability of transmission from a case to a contact. There are 101 rows and 4 variables. #' \describe{ #' \item{age}{from 0 to 100} -#' \item{clinical_fraction}{Estimate of fraction with clinical symptoms, or the age-specific proportion of infections resulting in clinical sympttoms inferred by applying a smoothing spline to the mean estimates from Davies et al. } -#' \item{davies_original}{Age specific parameters of the relative susceptibility to +#' \item{clinical_fraction}{Estimate of fraction with clinical symptoms, or the age-specific proportion of infections resulting in clinical symptoms inferred by applying a smoothing spline to the mean estimates from Davies et al. } +#' \item{davies_original}{Age specific parameters of the relative susceptibility to #' infection inferred from a smoothing-spline estimate of the mean relative susceptibility estimate from Davies et al.} #' \item{davies_updated}{Re-estimated parameter of the susceptibility profile for under-16s that is estimated in a similar way but to the age-distribution of infections in England from the UK ONS prevalence survey rather than case counts which may undercount children} #' } "davies_age_extended" - \ No newline at end of file diff --git a/R/data_abs_education_state_2020.R b/R/data_abs_education_state_2020.R index c461267f..90199dda 100644 --- a/R/data_abs_education_state_2020.R +++ b/R/data_abs_education_state_2020.R @@ -1,9 +1,9 @@ -#' @title 2020 ABS education population data, interpolated into 1 year bins, +#' @title 2020 ABS education population data, interpolated into 1 year bins, #' by state. #' -#' @description A dataset containing Australian Bureau of Statistics education -#' data by state for 2020. These were interpolated into 1 year age bins. -#' There are still some issued with the methods used, as the interpolated +#' @description A dataset containing Australian Bureau of Statistics education +#' data by state for 2020. These were interpolated into 1 year age bins. +#' There are still some issued with the methods used, as the interpolated #' values are sometimes higher than the population. #' #' @format A data frame with 808 rows and 6 variables: diff --git a/R/data_abs_employ_age_lga.R b/R/data_abs_employ_age_lga.R index 158a52df..8c6746b1 100644 --- a/R/data_abs_employ_age_lga.R +++ b/R/data_abs_employ_age_lga.R @@ -8,7 +8,7 @@ #' \item{year}{year - 2016} #' \item{state}{state - short state or territory name} #' \item{lga}{local government area name} -#' \item{age_group}{age groups are as follows: 15-19, 20-24, 25-34, 35-44, +#' \item{age_group}{age groups are as follows: 15-19, 20-24, 25-34, 35-44, #' 45-54, 55-64, 65-74, 75-84, 85+, total} #' \item{total_employed}{total number of people employed} #' \item{total_unemployed}{total number of people unemployed} diff --git a/R/data_abs_household_lga.R b/R/data_abs_household_lga.R index 55dc2770..75a7efe3 100644 --- a/R/data_abs_household_lga.R +++ b/R/data_abs_household_lga.R @@ -1,11 +1,11 @@ #' @title ABS household data for 2016 #' -#' @description A dataset containing Australian Bureau of Statistics household +#' @description A dataset containing Australian Bureau of Statistics household #' data for 2016. The data is filtered to "Total Households". Contains -#' information on the number of people typically in a residence in the region -#' and the number of households associated with those number of residents. -#' This data is typically used to obtain the household size distributions to -#' compute the per capita household size of a particular region. +#' information on the number of people typically in a residence in the region +#' and the number of households associated with those number of residents. +#' This data is typically used to obtain the household size distributions to +#' compute the per capita household size of a particular region. #' #' @format A data frame with 4986 rows and 6 variables: #' \describe{ diff --git a/R/data_abs_lga_education.R b/R/data_abs_lga_education.R index 9e54f3de..5c507537 100644 --- a/R/data_abs_lga_education.R +++ b/R/data_abs_lga_education.R @@ -2,26 +2,26 @@ #' #' A dataset containing Australian Bureau of Statistics education data by #' lga for 2016. The data sourced from 2016 Census - Employment, Income and -#' Education through TableBuilder have been randomly adjusted by the ABS to -#' avoid the release of confidential data. As a result of this, there are -#' some cases where the estimated number of people being educated is higher -#' than the population of those people. Such cases have been flagged under the +#' Education through TableBuilder have been randomly adjusted by the ABS to +#' avoid the release of confidential data. As a result of this, there are +#' some cases where the estimated number of people being educated is higher +#' than the population of those people. Such cases have been flagged under the #' `anomaly_flag` variable. #' @format A data frame with 64,264 rows and 8 variables: #' \describe{ #' \item{year}{2016, data is based on 2016 Census of Population and Housing.} -#' \item{state}{String denoting abbreviated name of state or territory, for +#' \item{state}{String denoting abbreviated name of state or territory, for #' example, 'NSW', 'VIC', and 'QLD'.} -#' \item{lga}{String denoting the official name of Local Government Area. +#' \item{lga}{String denoting the official name of Local Government Area. #' For example, 'Albury (C).'} #' \item{age}{Ages from 0 to 115.} -#' \item{population_educated}{Number of people educated including students -#' with full-time, part-time status, as well as the people who mentioned -#' just the type of educational institution they attend and not their +#' \item{population_educated}{Number of people educated including students +#' with full-time, part-time status, as well as the people who mentioned +#' just the type of educational institution they attend and not their #' student status.} -#' \item{total_population}{Number depicting the total population belonging +#' \item{total_population}{Number depicting the total population belonging #' to the age.} -#' \item{proportion}{Number denoting the measure of the ratio of +#' \item{proportion}{Number denoting the measure of the ratio of #' educated population and total population belonging to the age i.e, #' population_educated / total_population} #' \item{anomaly_flag}{Logical variable flagging abnormal observations. E.g., diff --git a/R/data_abs_lga_work.R b/R/data_abs_lga_work.R index 486c4407..5b301dc0 100644 --- a/R/data_abs_lga_work.R +++ b/R/data_abs_lga_work.R @@ -1,11 +1,11 @@ #' LGA wise ABS work population data on different ages for year 2016 #' -#' A dataset containing Australian Bureau of Statistics labour force population -#' data by lga for 2016. The data sourced from 2016 Census - Employment, -#' Income and Education through TableBuilder have been randomly adjusted by -#' the ABS to avoid the release of confidential data. As a result of this, -#' there are some cases where the estimated number of people being employed -#' is higher than the population of those people. Such cases have been +#' A dataset containing Australian Bureau of Statistics labour force population +#' data by lga for 2016. The data sourced from 2016 Census - Employment, +#' Income and Education through TableBuilder have been randomly adjusted by +#' the ABS to avoid the release of confidential data. As a result of this, +#' there are some cases where the estimated number of people being employed +#' is higher than the population of those people. Such cases have been #' flagged under the `anomaly_flag` variable. #' #' @format A data frame with 64,496 rows and 8 variables: @@ -13,16 +13,16 @@ #' \item{year}{2016, as data is from 2016 Census of Population and Housing.} #' \item{state}{String denoting the abbreviated name of state or territory #' name such as 'NSW', 'VIC', 'QLD' etc.} -#' \item{lga}{String denoting the official name of Local Government Area. +#' \item{lga}{String denoting the official name of Local Government Area. #' For example, 'Albury (C).'} #' \item{age}{Ages from 0 to 115.} -#' \item{employed_population}{Number of people employed including people +#' \item{employed_population}{Number of people employed including people #' with full-time, part-time employment status.} #' \item{total_population}{Total population of age in row.} #' \item{proportion}{The ratio of employed population and total population #' belonging to the age i.e, employed_population/ total_population.} -#' \item{anomaly_flag}{Logical variable flagging abnormal observations, such +#' \item{anomaly_flag}{Logical variable flagging abnormal observations, such #' as total population lesser than employed_population as TRUE.} #' } #' @source {Census of Population and Housing, 2016, TableBuilder} -"data_abs_lga_work" \ No newline at end of file +"data_abs_lga_work" diff --git a/R/data_abs_state_age.R b/R/data_abs_state_age.R index b9e7df2e..80754bfa 100644 --- a/R/data_abs_state_age.R +++ b/R/data_abs_state_age.R @@ -1,6 +1,6 @@ #' ABS state population data for 2020 #' -#' Dataset containing Australian Bureau of Statistics state level population +#' Dataset containing Australian Bureau of Statistics state level population #' data for 2020 #' #' @format A data frame with 168 rows and 3 variables: diff --git a/R/data_abs_state_education.R b/R/data_abs_state_education.R index b0f20b69..f207f027 100644 --- a/R/data_abs_state_education.R +++ b/R/data_abs_state_education.R @@ -1,18 +1,18 @@ #' State wise ABS education population data on different ages for year 2016 #' #' A dataset containing Australian Bureau of Statistics education data by state -#' for 2016. The data sourced from 2016 Census - Employment, Income and -#' Education through TableBuilder have been randomly adjusted by the ABS to +#' for 2016. The data sourced from 2016 Census - Employment, Income and +#' Education through TableBuilder have been randomly adjusted by the ABS to #' avoid the release of confidential data. -#' +#' #' @format A data frame with 1044 rows and 6 variables: #' \describe{ #' \item{year}{2016, as data is from 2016 Census of Population and Housing.} #' \item{state}{String of abbreviated name of state or territory names, e.g., #' 'NSW', 'VIC', 'QLD' and so on.} #' \item{age}{Ages from 0 to 115.} -#' \item{population_educated}{Number of people educated, including students -#' with full-time, part-time status, and people who mentioned only the type +#' \item{population_educated}{Number of people educated, including students +#' with full-time, part-time status, and people who mentioned only the type #' of educational institution they attend and not their student status.} #' \item{total_population}{Total population belonging to age in a row.} #' \item{proportion}{The ratio of educated population and total population diff --git a/R/data_abs_state_work.R b/R/data_abs_state_work.R index 95690052..0849515c 100644 --- a/R/data_abs_state_work.R +++ b/R/data_abs_state_work.R @@ -1,8 +1,8 @@ #' State wise ABS work population data on different ages for year 2016 #' -#' A dataset containing Australian Bureau of Statistics labour force population -#' data by state for 2016. The data sourced from 2016 Census - Employment, -#' Income and Education through TableBuilder have been randomly adjusted by +#' A dataset containing Australian Bureau of Statistics labour force population +#' data by state for 2016. The data sourced from 2016 Census - Employment, +#' Income and Education through TableBuilder have been randomly adjusted by #' the ABS to avoid the release of confidential data. #' #' @format A data frame with 1044 rows and 6 variables: @@ -11,11 +11,11 @@ #' \item{state}{String. Abbreviated name of state or territory, e.g., 'NSW', #' 'VIC', 'QLD' and so on.} #' \item{age}{Ages from 0 to 115.} -#' \item{employed_population}{Number of people employed including people +#' \item{employed_population}{Number of people employed including people #' with full-time, part-time employment status.} #' \item{total_population}{Total population belonging to the age.} #' \item{proportion}{The ratio of employed population and total population #' belonging to the age i.e, employed_population/ total_population} #' } #' @source {Census of Population and Housing, 2016, TableBuilder} -"data_abs_state_work" \ No newline at end of file +"data_abs_state_work" diff --git a/R/data_age_group_lookup.R b/R/data_age_group_lookup.R index 752e1fda..6e7df719 100644 --- a/R/data_age_group_lookup.R +++ b/R/data_age_group_lookup.R @@ -1,7 +1,7 @@ #' Lookup table of age groups in 5 year bins #' -#' A dataset containing age lower and upper levels with age group -#' +#' A dataset containing age lower and upper levels with age group +#' #' #' @format A data frame with 21 rows and 3 variables: #' \describe{ diff --git a/R/data_example_vaccination_effect.R b/R/data_example_vaccination_effect.R index 361313cd..2774fa1f 100644 --- a/R/data_example_vaccination_effect.R +++ b/R/data_example_vaccination_effect.R @@ -5,7 +5,7 @@ #' acquisition/susceptibility and efficacy of transmission/infectiousness #' for the ordered age groups from lowest to highest of the next generation #' matrix. -#' +#' #' @format A data frame with 17 rows and 4 variables #' \describe{ #' \item{age_band}{character. age bands: 0-4,5-11, 12-15, 16-19, 20-24, etc} @@ -14,4 +14,4 @@ #' \item{transmission}{example transmission coverage, between 0-1} #' } #' -"vaccination_effect_example_data" \ No newline at end of file +"vaccination_effect_example_data" diff --git a/R/data_eyre_transmission_probabilities.R b/R/data_eyre_transmission_probabilities.R index ce9da2ce..31674b65 100644 --- a/R/data_eyre_transmission_probabilities.R +++ b/R/data_eyre_transmission_probabilities.R @@ -1,11 +1,11 @@ #' @title Transmission probabilities of COVID19 from Eyre et al. #' -#' @description A dataset containing data digitised from "The impact of -#' SARS-CoV-2 vaccination on Alpha & Delta variant transmission", by David W +#' @description A dataset containing data digitised from "The impact of +#' SARS-CoV-2 vaccination on Alpha & Delta variant transmission", by David W #' Eyre, Donald Taylor, Mark Purver, David Chapman, Tom Fowler, Koen B Pouwels, -#' A Sarah Walker, Tim EA Peto +#' A Sarah Walker, Tim EA Peto #' (\doi{https://doi.org/10.1101/2021.09.28.21264260}. The figures were taken -#' from +#' from #' \url{https://www.medrxiv.org/content/10.1101/2021.09.28.21264260v1.full-text}, #' and the code to digitise these figures is in `data-raw` under #' "read_eyre_transmission_probabilities.R". When using this data, ensure that @@ -18,13 +18,13 @@ #' \item{case_age}{from 0 to 100} #' \item{contact_age}{from ages 0 to 100} #' \item{case_age_5y}{If case is between ages 0-4, in 5 year bins up to 100} -#' \item{contact_age_5y}{If contact is between ages 0-4, in 5 year bins up +#' \item{contact_age_5y}{If contact is between ages 0-4, in 5 year bins up #' to 100} #' \item{probability}{probability of transmission. Value is 0 - 1} #' } -#' @examples +#' @examples #' \dontrun{ -#' +#' #' # plot this #' library(ggplot2) #' library(stringr) @@ -49,10 +49,10 @@ #' across( #' ends_with("age"), #' ~ factor(.x, -#' levels = str_sort( -#' unique(.x), -#' numeric = TRUE -#' ) +#' levels = str_sort( +#' unique(.x), +#' numeric = TRUE +#' ) #' ) #' ) #' ) %>% @@ -71,6 +71,5 @@ #' theme( #' axis.text = element_text(angle = 45, hjust = 1) #' ) -#' #' } "eyre_transmission_probabilities" diff --git a/R/data_original_school_work.R b/R/data_original_school_work.R new file mode 100644 index 00000000..c3e9ea13 --- /dev/null +++ b/R/data_original_school_work.R @@ -0,0 +1,27 @@ +#' Original school demographics for conmat +#' +#' An internal dataset containing the original estimates of which fraction of +#' ages were attending school in Australia. These can be used inside of +#' [fit_single_contact_model()] and [fit_setting_contacts()]. +#' +#' @format A data frame with 121 rows and 2 variables: +#' \describe{ +#' \item{age}{0 to 120} +#' \item{school_fraction}{fraction of population at school} +#' } +#' @source {Census of Population and Housing, 2016, TableBuilder} +"conmat_original_school_demographics" + +#' Original work demographics for conmat +#' +#' An internal dataset containing the original estimates of which fraction of +#' ages were working in Australia. These can be used inside of +#' [fit_single_contact_model()] and [fit_setting_contacts()]. +#' +#' @format A data frame with 121 rows and 2 variables: +#' \describe{ +#' \item{age}{0 to 120} +#' \item{work_fraction}{fraction of population working.} +#' } +#' @source {Census of Population and Housing, 2016, TableBuilder} +"conmat_original_work_demographics" diff --git a/R/data_polymod_model.R b/R/data_polymod_model.R index 75d9d3ce..f8b08cb2 100644 --- a/R/data_polymod_model.R +++ b/R/data_polymod_model.R @@ -1,23 +1,23 @@ #' @title Polymod Settings models -#' -#' @description A data object containing a list of fitted gam models +#' +#' @description A data object containing a list of fitted gam models #' predicting the number of contacts in each of the four settings which are -#' "home","work","school" and "other". For more details on model fitting, -#' see [fit_setting_contacts()]. This object has been provided as data to +#' "home","work","school" and "other". For more details on model fitting, +#' see [fit_setting_contacts()]. This object has been provided as data to #' avoid recomputing a relatively common type of model for use with `conmat`. -#' +#' #' @seealso [fit_setting_contacts()] -#' @examples +#' @examples #' \dontrun{ #' # code used to produce this data #' library(conmat) -#' set.seed(2022-08-26) +#' set.seed(2022 - 08 - 26) #' polymod_contact_data <- get_polymod_setting_data() #' polymod_survey_data <- get_polymod_population() -#'polymod_setting_models <- fit_setting_contacts( -#' contact_data_list = polymod_contact_data, -# population = polymod_survey_data -#') +#' polymod_setting_models <- fit_setting_contacts( +#' contact_data_list = polymod_contact_data, +#' # population = polymod_survey_data +#' ) #' } -#' +#' "polymod_setting_models" diff --git a/R/data_prem_contact_matrices.R b/R/data_prem_contact_matrices.R new file mode 100644 index 00000000..b01032ef --- /dev/null +++ b/R/data_prem_contact_matrices.R @@ -0,0 +1,23 @@ +#' Contact matrices as calculated by Prem. et al. +#' +#' Contact matrices as calculated by Prem. et al. (2021) PLoS Computational Biology. Updated to use the latest corrected matrices from their 2021 publication. +#' DOI: 10.1371/journal.pcbi.1009098 +#' +#' @format A list with 5 elements: +#' \describe{ +#' \item{home}{A 16x16 matrix containing the number of home contacts, by 5 +#' year age group} +#' \item{work}{A 16x16 matrix containing the number of workplace contacts, by +#' 5 year age group} +#' \item{school}{A 16x16 matrix containing the number of school contacts, by 5 +#' year age group} +#' \item{other}{A 16x16 matrix containing the number of other contacts, by 5 +#' year age group} +#' \item{all}{A 16x16 matrix containing the number of all contacts, by 5 +#' year age group} +#' } +#' All age groups are 5 year age bands, from 0 to 80. +#' +#' @source \url{https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1009098} +#' @source \url{https://github.com/kieshaprem/synthetic-contact-matrices} +"prem_germany_contact_matrices" diff --git a/R/estimate_setting_contacts.R b/R/estimate_setting_contacts.R index 8ddfd235..bd21cad7 100644 --- a/R/estimate_setting_contacts.R +++ b/R/estimate_setting_contacts.R @@ -1,64 +1,117 @@ #' @title Get predicted setting specific as well as combined contact matrices -#' -#' @description Given a named list of contact datasets (with names giving +#' +#' @description Given a named list of contact datasets (with names giving #' the setting, and assumed to together make up the full set of contacts for -#' individuals in the survey), a representative population distribution for -#' the survey, and a set of age breaks at which to aggregate contacts, return +#' individuals in the survey), a representative population distribution for +#' the survey, and a set of age breaks at which to aggregate contacts, return #' a set of predicted contact matrices for each setting, and for all combined. -#' -#' @param contact_data_list list of data sets with information on the contacts +#' Note that this function is parallelisable with `future`, and will be +#' impacted by any `future` plans provided. +#' +#' @param contact_data_list list of data sets with information on the contacts #' of individuals at different settings -#' -#' @param survey_population representative population distribution for the +#' +#' @param survey_population representative population distribution for the #' survey -#' -#' @param prediction_population population for prediction. The default value +#' +#' @param prediction_population population for prediction. The default value #' set is survey_population -#' -#' @param age_breaks vector depicting age values. For example, +#' +#' @param age_breaks vector depicting age values. For example, #' `c(seq(0, 75, by = 5), Inf)` -#' -#' @param per_capita_household_size Optional (defaults to NULL). When set, it -#' adjusts the household contact matrix by some per capita household size. +#' +#' @param per_capita_household_size Optional (defaults to NULL). When set, it +#' adjusts the household contact matrix by some per capita household size. #' To set it, provide a single number, the per capita household size. More -#' information is provided below in Details. See -#' [get_per_capita_household_size()] function for a helper for Australian +#' information is provided below in Details. See +#' [get_abs_per_capita_household_size()] function for a helper for Australian #' data with a workflow on how to get this number. -#' +#' +#' @param symmetrical whether to enforce symmetrical terms in the model. +#' Defaults to TRUE. See `details` of `fit_single_contact_model` for more +#' information. +#' +#' @param school_demographics (optional) defaults to census average proportion +#' at school. You can provide a dataset with columns, "age" (numeric), and +#' "school_fraction" (0-1), if you would like to specify these +#' details. See `abs_avg_school` for the default values. If you would like to +#' use the original school demographics used in conmat, these are provided in +#' the dataset, `conmat_original_school_demographics`. +#' @param work_demographics (optional) defaults to census average proportion +#' employed. You can provide a dataset with columns, "age" (numeric), and +#' "work_fraction", if you would like to specify these details. See +#' `abs_avg_work` for the default values. If you would like to +#' use the original work demographics used in conmat, these are provided in +#' the dataset, `conmat_original_work_demographics`. +#' #' @return predicted setting specific contact matrices, and for all combined -#' +#' #' @examples -#' #' \dontrun{ #' # takes a long time to run #' settings_estimated_contacts <- estimate_setting_contacts( #' contact_data_list = get_polymod_setting_data(), #' survey_population = get_polymod_population(), #' prediction_population = get_polymod_population(), -#' age_breaks = c(seq(0, 75, by = 5), Inf), +#' age_breaks = c(seq(0, 85, by = 5), Inf), #' per_capita_household_size = NULL #' ) -#' +#' +#' # or predict to fairfield +#' fairfield_hh <- get_abs_per_capita_household_size(lga = "Fairfield (C)") +#' contact_model_pred_est <- estimate_setting_contacts( +#' contact_data_list = get_polymod_setting_data(), +#' survey_population = get_polymod_population(), +#' prediction_population = abs_age_lga("Fairfield (C)"), +#' age_breaks = c(seq(0, 85, by = 5), Inf), +#' per_capita_household_size = fairfield_hh +#' ) +#' +#' # or use different populations in school or work demographics +#' fairfield_hh <- get_abs_per_capita_household_size(lga = "Fairfield (C)") +#' contact_model_pred_est <- estimate_setting_contacts( +#' contact_data_list = get_polymod_setting_data(), +#' survey_population = get_polymod_population(), +#' prediction_population = abs_age_lga("Fairfield (C)"), +#' age_breaks = c(seq(0, 85, by = 5), Inf), +#' per_capita_household_size = fairfield_hh, +#' school_demographics = conmat_original_school_demographics, +#' work_demographics = conmat_original_work_demographics +#' ) +#' +#' # or use non-symmetric model terms +#' contact_model_pred_est <- estimate_setting_contacts( +#' contact_data_list = get_polymod_setting_data(), +#' survey_population = get_polymod_population(), +#' prediction_population = abs_age_lga("Fairfield (C)"), +#' age_breaks = c(seq(0, 85, by = 5), Inf), +#' per_capita_household_size = fairfield_hh, +#' symmetrical = FALSE +#' ) #' } #' @export estimate_setting_contacts <- function(contact_data_list, survey_population, prediction_population = survey_population, age_breaks, - per_capita_household_size = NULL) { - + per_capita_household_size = NULL, + symmetrical = TRUE, + school_demographics = NULL, + work_demographics = NULL) { setting_models <- fit_setting_contacts( contact_data_list = contact_data_list, - population = survey_population + population = survey_population, + symmetrical = symmetrical, + school_demographics = school_demographics, + work_demographics = work_demographics ) - + contact_model_pred <- predict_setting_contacts( - population = prediction_population, - contact_model = setting_models, - age_breaks = age_breaks, - per_capita_household_size = per_capita_household_size - ) - - contact_model_pred + population = prediction_population, + contact_model = setting_models, + age_breaks = age_breaks, + per_capita_household_size = per_capita_household_size + ) + contact_model_pred } diff --git a/R/extrapolate_polymod.R b/R/extrapolate_polymod.R index d0ee7d1d..a03bfeae 100644 --- a/R/extrapolate_polymod.R +++ b/R/extrapolate_polymod.R @@ -1,20 +1,29 @@ #' Fit all-of-polymod model and extrapolate to a given population an age breaks -#' +#' #' Uses [estimate_setting_contacts()] to fit a contact model on the data from -#' polymod and later extrapolate on to a desired population. +#' polymod and later extrapolate on to a desired population. Note that this +#' function is parallelisable with `future`, and will be impacted by any +#' `future` plans provided. +#' +#' Also note that since this model uses the already fit `polymod_setting_models` +#' data, which has been fit using symmetrical model terms, if you want to +#' fit a model with asymmetric model terms, you will need to go through +#' the full process of building new models. You can find this detail in last +#' section of the vignette "example pipeline". #' -#' @param population data set with information on the population of the desired -#' location - containing `lower.age.limit` and `population` columns. See -#' `get_polymod_population()` for an example of this data. -#' @param age_breaks vector depicting age values. Default value is +#' @param population a `conmat_population` object, specifying the `age` +#' and `population` characteristics. Or a data frame with `lower.age.limit` +#' and `population` columns. See `get_polymod_population()` for an example +#' of this data. +#' @param age_breaks vector depicting age values. Default value is #' `c(seq(0, 75, by = 5), Inf)` -#' @param per_capita_household_size Optional (defaults to NULL). When set, it -#' adjusts the household contact matrix by some per capita household size. +#' @param per_capita_household_size Optional (defaults to NULL). When set, it +#' adjusts the household contact matrix by some per capita household size. #' To set it, provide a single number, the per capita household size. More -#' information is provided below in Details. See -#' [get_per_capita_household_size()] function for a helper for Australian +#' information is provided below in Details. See +#' [get_abs_per_capita_household_size()] function for a helper for Australian #' data with a workflow on how to get this number. -#' @return Returns setting-specific and combined contact matrices for the +#' @return Returns setting-specific and combined contact matrices for the #' desired ages. #' @examples #' \dontrun{ @@ -27,22 +36,19 @@ #' population = abs_age_lga("Fairfield (C)") #' ) #' synthetic_settings_5y_fairfield -#' ) #' } #' @export extrapolate_polymod <- function(population, age_breaks = c(seq(0, 75, by = 5), Inf), per_capita_household_size = NULL) { - contact_model_pred <- predict_setting_contacts( population = population, - # using already fit polymod_setting_models object + # using already fit polymod_setting_models object # from `create-polymod-model.R` contact_model = polymod_setting_models, age_breaks = age_breaks, per_capita_household_size = per_capita_household_size ) - + contact_model_pred - } diff --git a/R/fit_setting_contacts.R b/R/fit_setting_contacts.R index 0078e579..082ddc07 100644 --- a/R/fit_setting_contacts.R +++ b/R/fit_setting_contacts.R @@ -1,47 +1,83 @@ -#' Fit a contact model to a survey poulation -#' +#' Fit a contact model to a survey population +#' #' fits a gam model for each setting on the survey population data & the -#' setting wise contact data. The underlying method is described in more +#' setting wise contact data. The underlying method is described in more #' detail in [fit_single_contact_model()]. The models can be fit in parallel, -#' see the examples. -#' -#' @param contact_data_list A list of dataframes, each containing informatio +#' see the examples. Note that this function is parallelisable with `future`, +#' and will be impacted by any `future` plans provided. +#' +#' @param contact_data_list A list of dataframes, each containing information #' on the setting (home, work, school, other), age_from, age_to, #' the number of contacts, and the number of participants. Example data #' can be retrieved with [get_polymod_setting_data()]. -#' @param population survey population data, containing columns +#' @param population `conmat_population` object or dataset with columns #' `lower.age.limit` and `population`. Example data can be retrieved with #' [get_polymod_population()]. +#' @param symmetrical whether to enforce symmetrical terms in the model. +#' Defaults to TRUE. See `details` of `fit_single_contact_model` for more +#' information. +#' @param school_demographics (optional) defaults to census average proportion +#' at school. You can provide a dataset with columns, "age" (numeric), and +#' "school_fraction" (0-1), if you would like to specify these +#' details. See `abs_avg_school` for the default values. If you would like to +#' use the original school demographics used in conmat, these are provided in +#' the dataset, `conmat_original_school_demographics`. +#' @param work_demographics (optional) defaults to census average proportion +#' employed. You can provide a dataset with columns, "age" (numeric), and +#' "work_fraction", if you would like to specify these details. See +#' `abs_avg_work` for the default values. If you would like to +#' use the original work demographics used in conmat, these are provided in +#' the dataset, `conmat_original_work_demographics`. #' @return list of fitted gam models - one for each setting provided #' @author Nicholas Tierney #' @export -#' @examples +#' @examples #' # These aren't being run as they take too long to fit #' \dontrun{ #' contact_model <- fit_setting_contacts( -#' contact_data_list = get_polymod_setting_data(), +#' contact_data_list = get_polymod_setting_data(), #' population = get_polymod_population() #' ) -#' +#' #' # can fit the model in parallel #' library(future) #' plan(multisession, workers = 4) -#' +#' #' polymod_setting_data <- get_polymod_setting_data() #' polymod_population <- get_polymod_population() -#' +#' #' contact_model <- fit_setting_contacts( #' contact_data_list = polymod_setting_data, #' population = polymod_population #' ) +#' +#' # you can specify your own population data for school and work demographics +#' contact_model_diff_data <- fit_setting_contacts( +#' contact_data_list = polymod_setting_data, +#' population = polymod_population, +#' school_demographics = conmat_original_school_demographics, +#' work_demographics = conmat_original_work_demographics +#' ) #' } -fit_setting_contacts <- function(contact_data_list, population) { +fit_setting_contacts <- function(contact_data_list, + population, + symmetrical = TRUE, + school_demographics = NULL, + work_demographics = NULL) { + check_if_list(contact_data_list) - furrr::future_map( + fitted_setting_contacts <- furrr::future_map( .x = contact_data_list, .f = fit_single_contact_model, population = population, + symmetrical = symmetrical, + school_demographics = NULL, + work_demographics = NULL, .options = furrr::furrr_options(seed = TRUE) ) + new_setting_contact_model( + list_model = fitted_setting_contacts, + age_breaks = age_breaks(contact_data_list) + ) } diff --git a/R/fit_single_contact_model.R b/R/fit_single_contact_model.R index d1fc1fdb..ccde37aa 100644 --- a/R/fit_single_contact_model.R +++ b/R/fit_single_contact_model.R @@ -1,46 +1,84 @@ #' @title Fit a single GAM contact model to a dataset -#' +#' #' @description This is the workhorse of the `conmat` package, and is typically -#' used inside [fit_setting_contacts()]. It predicts the contact rate between -#' all age bands (the contact rate between ages 0 and 1, 0 and 2, 0 and 3, -#' and so on), for a specified setting, with specific terms being added for +#' used inside [fit_setting_contacts()]. It predicts the contact rate between +#' all age bands (the contact rate between ages 0 and 1, 0 and 2, 0 and 3, +#' and so on), for a specified setting, with specific terms being added for #' given settings. See "details" for further information. -#' -#' @details The model fit is a Generalised Additive Model (GAM). To help -#' account for assortativity with age, where people of similar ages have -#' more contact with each other, we include predictors `age_from`, and -#' `age_to`. To account for intergenerational contact patterns, where -#' parents and grandparents will interact with their children and grand -#' children, we include a term that is the absolute difference of age_from -#' and age_to. We also include the interaction of intergenerational contact -#' patterns with contacts from one age with the term that is the absolute -#' difference of age_from and age_to, and age_from. These terms are fit with -#' a smoothing function. Specifically, the relevant code looks like this: -#' +#' +#' @details The model fit is a Generalised Additive Model (GAM). We provide two +#' "modes" for model fitting. Either using "symmetric" or "non-symmetric" +#' model predictor terms with the logical variance "symmetrical", which is set +#' to TRUE by default. We recommend using the "symmetrical" terms as it +#' reflects the fact that contacts are symmetric - person A having contact +#' with person B means person B has had contact with person A. We've included +#' a variety of terms to account for assortativity with age, where people of +#' similar ages have more contact with each other. And included terms to +#' account for intergenerational contact patterns, where parents and +#' grandparents will interact with their children and grand children. +#' These terms are fit with a smoothing function. Specifically, the relevant +#' code looks like this: +#' #' ``` r -#' s(age_to) + -#' s(age_from) + -#' s(abs(age_from - age_to)) + -#' s(abs(age_from - age_to), age_from) +#' # abs(age_from - age_to) +#' s(gam_age_offdiag) + +#' # abs(age_from - age_to)^2 +#' s(gam_age_offdiag_2) + +#' # abs(age_from * age_to) +#' s(gam_age_diag_prod) + +#' # abs(age_from + age_to) +#' s(gam_age_diag_sum) + +#' # pmax(age_from, age_to) +#' s(gam_age_pmax) + +#' # pmin(age_from, age_to) +#' s(gam_age_pmin) #' ``` -#' -#' We also include predictors for the probability of attending school, and -#' attending work. These are computed as the probability that a person goes +#' +#' We also include predictors for the probability of attending school, and +#' attending work. These are computed as the probability that a person goes #' to the same school/work, proportional to the increase in contacts due to -#' attendance. These terms are calculated from estimated proportion of -#' people in age groups attending school and work. See +#' attendance. These terms are calculated from estimated proportion of +#' people in age groups attending school and work. See #' [add_modelling_features()] for more details. -#' -#' Finally, we include two offset terms so that we estimate the contact rate, +#' +#' Finally, we include two offset terms so that we estimate the contact rate, #' that is the contacts per capita, instead of the number of contacts. These -#' offset terms are `log(contactable_population)`, and -#' `log(contactable_population_school)` when the model is fit to a school -#' setting. The contactable population is estimated as the interpolated +#' offset terms are `log(contactable_population)`, and +#' `log(contactable_population_school)` when the model is fit to a school +#' setting. The contactable population is estimated as the interpolated #' 1 year ages from the data. For schools this is the contactable population #' weighted by the proportion of the population attending school. -#' +#' #' This leaves us with a model that looks like so: -#' +#' +#' ``` r +#' mgcv::bam( +#' formula = contacts ~ +#' # abs(age_from - age_to) +#' s(gam_age_offdiag) + +#' # abs(age_from - age_to)^2 +#' s(gam_age_offdiag_2) + +#' # abs(age_from * age_to) +#' s(gam_age_diag_prod) + +#' # abs(age_from + age_to) +#' s(gam_age_diag_sum) + +#' # pmax(age_from, age_to) +#' s(gam_age_pmax) + +#' # pmin(age_from, age_to) +#' s(gam_age_pmin) + +#' school_probability + +#' work_probability + +#' offset(log_contactable_population) + +#' # or for school settings +#' # offset(log_contactable_population_school) +#' family = stats::poisson, +#' offset = log(participants), +#' data = population_data +#' ) +#' ``` +#' +#' But if the term `symmetrical = FALSE` is used, you get: +#' #' ``` r #' mgcv::bam( #' formula = contacts ~ @@ -49,75 +87,122 @@ #' s(abs(age_from - age_to)) + #' s(abs(age_from - age_to), age_from) + #' school_probability + -#' work_probability + +#' work_probability + #' offset(log_contactable_population) + #' # or for school settings -#' # offset(log_contactable_population_school) +#' # offset(log_contactable_population_school) #' family = stats::poisson, #' offset = log(participants), #' data = population_data #' ) #' ``` +#' #' @param contact_data dataset with columns `age_to`, `age_from`, `setting`, #' `contacts`, and `participants`. See [get_polymod_contact_data()] for #' an example dataset - or the dataset in examples below. -#' @param population population data, with columns `lower.age.limit` and -#' `population`. See [get_polymod_population()] for an example. +#' @param population `conmat_population` object, or data frame with columns +#' `lower.age.limit` and `population`. See [get_polymod_population()] for +#' an example. +#' @param symmetrical whether to enforce symmetrical terms in the model. +#' Defaults to TRUE. See `details` for more information. +#' @param school_demographics (optional) defaults to census average proportion +#' at school. You can provide a dataset with columns, "age" (numeric), and +#' "school_fraction" (0-1), if you would like to specify these +#' details. See `abs_avg_school` for the default values. If you would like to +#' use the original school demographics used in conmat, these are provided in +#' the dataset, `conmat_original_school_demographics`. +#' @param work_demographics (optional) defaults to census average proportion +#' employed. You can provide a dataset with columns, "age" (numeric), and +#' "work_fraction", if you would like to specify these details. See +#' `abs_avg_work` for the default values. If you would like to +#' use the original work demographics used in conmat, these are provided in +#' the dataset, `conmat_original_work_demographics`. #' @return single model #' @examples #' example_contact <- get_polymod_contact_data(setting = "home") #' example_contact #' example_population <- get_polymod_population() -#' +#' #' library(dplyr) -#' -#' example_contact_20 <- example_contact %>% -#' filter(age_to <= 20, -#' age_from <= 20) -#' +#' +#' example_contact_20 <- example_contact %>% +#' filter( +#' age_to <= 20, +#' age_from <= 20 +#' ) +#' #' my_mod <- fit_single_contact_model( #' contact_data = example_contact_20, #' population = example_population #' ) +#' +#' # you can specify your own population data for school and work demographics +#' my_mod_diff_data <- fit_single_contact_model( +#' contact_data = example_contact_20, +#' population = example_population, +#' school_demographics = conmat_original_school_demographics, +#' work_demographics = conmat_original_work_demographics +#' ) #' @export -fit_single_contact_model <- function(contact_data, population) { +fit_single_contact_model <- function(contact_data, + population, + symmetrical = TRUE, + school_demographics = NULL, + work_demographics = NULL) { # programatically add the offset term to the formula, so the model defines # information about the setting, without us having to pass it through to the # prediction data - formula_no_offset <- contacts ~ - # Prem method did a post-hoc smoothing - # deviation of contact age distribution from population age distribution - s(age_to) + - # number of contacts by age - s(age_from) + - # intergenerational contact patterns - enables the off-diagonals - # intergenerational is defined as: - # intergenerational = abs(age_from - age_to) - s(intergenerational) + - # interaction between intergenerational patterns and age_from, to remove - # ridge for some ages and settings - # - s(intergenerational, age_from) + - # probabilities of both attending (any) school/work - school_probability + - work_probability - + + if (symmetrical) { + formula_no_offset <- contacts ~ + # Prem method did a post-hoc smoothing + # abs(age_from - age_to) + s(gam_age_offdiag) + + # abs(age_from - age_to)^2 + s(gam_age_offdiag_2) + + # abs(age_from * age_to) + s(gam_age_diag_prod) + + # abs(age_from + age_to) + s(gam_age_diag_sum) + + # pmax(age_from, age_to) + s(gam_age_pmax) + + # pmin(age_from, age_to) + s(gam_age_pmin) + + school_probability + + work_probability + } else if (!symmetrical) { + formula_no_offset <- contacts ~ + # # deviation of contact age distribution from population age distribution + s(age_to) + + # # number of contacts by age + s(age_from) + + # # intergenerational contact patterns - enables the off-diagonals + # # intergenerational is defined as: + # # intergenerational = abs(age_from - age_to) + s(intergenerational) + + # # interaction between intergenerational patterns and age_from, to remove + # # ridge for some ages and settings + s(intergenerational, age_from) + + # probabilities of both attending (any) school/work + school_probability + + work_probability + } + # choose the offset variable based on the setting setting <- contact_data$setting[1] - offset_variable <- switch( - setting, + offset_variable <- switch(setting, school = "log_contactable_population_school", "log_contactable_population" ) - + # add multiplicative offset for population contactable, to enable # extrapolation to new demographies - # in mgcv, this part of the offset gets used in prediction, which + # in mgcv, this part of the offset gets used in prediction, which # is what we want. Those are the "contactable" parts, which we use # to extrapolate to new demographics. formula_offset <- sprintf("~. + offset(%s)", offset_variable) formula <- update(formula_no_offset, formula_offset) - + # contact model for all locations together contact_data %>% # NOTE @@ -127,10 +212,12 @@ fit_single_contact_model <- function(contact_data, population) { add_modelling_features( # NOTE # The modelling features added here are: - # the school and work offsets - # pop_age_to (interpolated population) - # `log_contactable_population_school`, and ` log_contactable_population` - population = population + # the school and work offsets + # pop_age_to (interpolated population) + # `log_contactable_population_school`, and ` log_contactable_population` + population = population, + school_demographics = school_demographics, + work_demographics = work_demographics ) %>% mgcv::bam( formula = formula, @@ -141,5 +228,4 @@ fit_single_contact_model <- function(contact_data, population) { offset = log(participants), data = . ) - } diff --git a/R/generate-ngm.R b/R/generate-ngm.R index 1aefff53..7b5ee1bf 100644 --- a/R/generate-ngm.R +++ b/R/generate-ngm.R @@ -1,103 +1,316 @@ #' @title Calculate next generation contact matrices -#' -#' @description Once infected, a person can transmit an infectious disease to -#' another, creating generations of infected individuals. We can define a -#' matrix describing the number of newly infected individuals in given -#' categories, such as age, for consecutive generations. This matrix is -#' called a "next generation matrix" (NGM). -#' -#' @details The NGM can be used to calculate the expected number of secondary -#' infections in a given age group. Given certain age breaks, we compute the -#' unscaled next generation matrices for that location across different -#' settings & age groups using the contact rates extrapolated from POLYMOD -#' survey data on the specified location, adjusted by the per capita -#' household size and the setting-specific relative per-contact transmission -#' probability matrices for the same age groups. These NGMs are then scaled -#' according to a target reproduction number (which is provided as an -#' argument) using the ratio of the desired R0 and the R0 of the NGM -#' for the combination of all settings. The R0 of the combination of all -#' settings is obtained by calculating the unique, positive eigen value of -#' the combination NGM. This ratio is then used to scale all the setting -#' specific NGMs. -#' -#' @param state_name target Australian state name in abbreviated form, such -#' as "QLD", "NSW", or "TAS" -#' @param lga_name target Australian local government area (LGA) name, such -#' as "Fairfield (C)". See [abs_lga_lookup()] for list of lga names. -#' @param age_breaks vector depicting age values with the highest age depicted +#' +#' @description Once infected, a person can transmit an infectious disease to +#' another, creating generations of infected individuals. We can define a +#' matrix describing the number of newly infected individuals in given +#' categories, such as age, for consecutive generations. This matrix is +#' called a "next generation matrix" (NGM). We can generate an NGM from two +#' sources - a `conmat_population` object (such as the output from +#' [abs_age_lga()]), or a `conmat_setting_prediction_matrix`, which is the +#' output from [extrapolate_polymod()] or [predict_setting_contacts()]. +#' +#' @details The NGM can be used to calculate the expected number of secondary +#' infections in a given age group. Given certain age breaks, we compute the +#' unscaled next generation matrices for that location across different +#' settings & age groups using the contact rates extrapolated from POLYMOD +#' survey data on the specified location, adjusted by the per capita +#' household size and the setting-specific relative per-contact transmission +#' probability matrices for the same age groups. These NGMs are then scaled +#' according to a target reproduction number (which is provided as an +#' argument) using the ratio of the desired R0 and the R0 of the NGM +#' for the combination of all settings. The R0 of the combination of all +#' settings is obtained by calculating the unique, positive eigen value of +#' the combination NGM. This ratio is then used to scale all the setting +#' specific NGMs. +#' +#' @note When using a setting prediction contact matrix (such as one generated +#' by `extrapolate_polymod`, with class `conmat_setting_prediction_matrix`), +#' the age breaks specified in `generate_ngm` must be the same as the age +#' breaks specified in the synthetic contact matrix, otherwise it will error +#' as it is trying to multiple incompatible matrices. +#' +#' @param x data input - could be a `conmat_population` (such as the output from +#' [abs_age_lga()]), or a `conmat_setting_prediction_matrix`, which is the +#' output from [extrapolate_polymod()] or [predict_setting_contacts()]. +#' @param age_breaks vector depicting age values with the highest age depicted #' as `Inf`. For example, c(seq(0, 85, by = 5), Inf) #' @param R_target target reproduction number +#' @param setting_transmission_matrix default is NULL, which calculates the transmission +#' matrix using `get_setting_transmission_matrices(age_breaks)`. You can +#' provide your own transmission matrix, but its rows and columns must match +#' the number of rows and columns, and must be a list of one matrix for each +#' setting. See the output for `get_setting_transmission_matrices(age_breaks)` +#' to get a sense of the structure. See [get_setting_transmission_matrices()] +#' for more detail. +#' @param ... extra arguments, currently not used +#' @name generate_ngm +#' @examples +#' \dontrun{ +#' perth <- abs_age_lga("Perth (C)") +#' perth_hh <- get_abs_per_capita_household_size(lga = "Perth (C)") +#' +#' age_breaks_0_80_plus <- c(seq(0, 80, by = 5), Inf) +#' +#' # you can also run this without `per_capita_household_size` +#' perth_ngm_lga <- generate_ngm( +#' perth, +#' age_breaks = age_breaks_0_80_plus, +#' per_capita_household_size = perth_hh, +#' R_target = 1.5 +#' ) +#' +#' perth_contact <- extrapolate_polymod( +#' perth, +#' per_capita_household_size = perth_hh +#' ) +#' +#' perth_ngm <- generate_ngm( +#' perth_contact, +#' age_breaks = age_breaks_0_80_plus, +#' R_target = 1.5 +#' ) +#' +#' # using our own transmission matrix +#' new_transmission_matrix <- get_setting_transmission_matrices( +#' age_breaks = age_breaks_0_80_plus, +#' # is normally 0.5 +#' asymptomatic_relative_infectiousness = 0.75 +#' ) +#' +#' new_transmission_matrix +#' +#' perth_ngm_0_80_new_tmat <- generate_ngm( +#' perth_contact, +#' age_breaks = age_breaks_0_80_plus, +#' R_target = 1.5, +#' setting_transmission_matrix = new_transmission_matrix +#' ) +#' } +#' @export +generate_ngm <- function(x, + age_breaks, + R_target, + setting_transmission_matrix, + ...) { + # detect if state_name or lga_name are used + # then give an informative error that the user should use + # `generate_ngm_oz` + # instead + # state_name + # lga_name + UseMethod("generate_ngm") +} + +#' @param lga_name now defunct, but capturing arguments for informative error +#' @param state_name now defunct, but capturing arguments for informative error +#' @examples +#' # examples not run as they take a long time +#' \dontrun{ +#' perth <- abs_age_lga("Perth (C)") +#' perth_contact <- extrapolate_polymod(perth) +#' generate_ngm(perth_contact, age_breaks = c(seq(0, 85, by = 5), Inf)) +#' } +#' @rdname generate_ngm +#' @export +generate_ngm.conmat_setting_prediction_matrix <- function(x, + age_breaks, + R_target, + setting_transmission_matrix = NULL, + per_capita_household_size = NULL, + ..., + lga_name, + state_name) { + if (!missing(state_name)) { + error_old_ngm_arg(state_name) + } + if (!missing(lga_name)) { + error_old_ngm_arg(lga_name) + } + + check_age_breaks( + x = age_breaks(x), + y = age_breaks, + x_arg = "x", + y_arg = "age_breaks" + ) + + setting_transmission_matrix <- check_transmission_probabilities( + setting_transmission_matrix, + age_breaks = age_breaks + ) + + calculate_ngm( + setting_prediction_matrix = x, + age_breaks, + R_target, + setting_transmission_matrix = setting_transmission_matrix + ) +} + +#' @param per_capita_household_size default is NULL - which defaults to [get_polymod_per_capita_household_size()], which gives 3.248971 +#' @rdname generate_ngm +#' @export +generate_ngm.conmat_population <- function(x, + age_breaks, + R_target, + setting_transmission_matrix = NULL, + per_capita_household_size = NULL, + ..., + lga_name, + state_name) { + setting_contact_rates <- extrapolate_polymod( + population = x, + age_breaks = age_breaks, + per_capita_household_size = per_capita_household_size + ) + + check_age_breaks( + x = age_breaks(setting_contact_rates), + y = age_breaks, + x_arg = "x", + y_arg = "age_breaks" + ) + + setting_transmission_matrix <- check_transmission_probabilities( + setting_transmission_matrix, + age_breaks = age_breaks + ) + + calculate_ngm( + setting_prediction_matrix = setting_contact_rates, + age_breaks = age_breaks, + R_target = R_target, + setting_transmission_matrix = setting_transmission_matrix + ) +} + +#' @title Calculate next generation contact matrices from ABS data +#' +#' @description This function calculates a next generation matrix (NGM) +#' based on state or LGA data from the Australian Bureau of Statistics (ABS). +#' For full details see [generate_ngm()]. +# +#' @param state_name target Australian state name in abbreviated form, such +#' as "QLD", "NSW", or "TAS" +#' @param lga_name target Australian local government area (LGA) name, such +#' as "Fairfield (C)". See [abs_lga_lookup()] for list of lga names. +#' @inheritParams generate_ngm #' #' @export #' @examples #' # don't run as both together takes a long time to run #' \dontrun{ -#' ngm_nsw <- generate_ngm( +#' ngm_nsw <- generate_ngm_oz( #' state_name = "NSW", #' age_breaks = c(seq(0, 85, by = 5), Inf), #' R_target = 1.5 #' ) -#' ngm_fairfield <- generate_ngm( +#' ngm_fairfield <- generate_ngm_oz( #' lga_name = "Fairfield (C)", #' age_breaks = c(seq(0, 85, by = 5), Inf), #' R_target = 1.5 #' ) #' } -generate_ngm <- function(state_name = NULL, - lga_name = NULL, - age_breaks, - R_target) { - - # pull out the age distribution of the target population & - # the per-capita (ie. averaged over people, not households) household - # size in this population - - if (!is.null(state_name)){ +generate_ngm_oz <- function(state_name = NULL, + lga_name = NULL, + age_breaks, + R_target, + setting_transmission_matrix = NULL) { + # pull out the age distribution of the target population & + # the per-capita (ie. averaged over people, not households) household + # size in this population + if (!is.null(state_name)) { population <- abs_age_state(state_name = {{ state_name }}) - household_size <- get_per_capita_household_size(state = {{ state_name }}) - } else{ + household_size <- get_abs_per_capita_household_size(state = {{ state_name }}) + } else { population <- abs_age_lga(lga_name = {{ lga_name }}) - household_size <- get_per_capita_household_size(lga = {{ lga_name }}) + household_size <- get_abs_per_capita_household_size(lga = {{ lga_name }}) } - # predict from the model to contact rates for a population with these characteristics, - # and for these age breaks - - setting_contact_rates <- extrapolate_polymod(population, - age_breaks = age_breaks, - per_capita_household_size = household_size) - - # get relative (ie. needing to be scaled to a given R) transmission + + # predict from the model to contact rates for a population with these + # characteristics, and for these age breaks + + setting_contact_rates <- extrapolate_polymod( + population, + age_breaks = age_breaks, + per_capita_household_size = household_size + ) + + setting_transmission_matrix <- check_transmission_probabilities( + setting_transmission_matrix, + age_breaks = age_breaks + ) + + calculate_ngm( + setting_prediction_matrix = setting_contact_rates, + age_breaks = age_breaks, + R_target = R_target, + setting_transmission_matrix = setting_transmission_matrix + ) +} + + +calculate_ngm <- function(setting_prediction_matrix, + age_breaks, + R_target, + setting_transmission_matrix) { + # get relative (ie. needing to be scaled to a given R) transmission # probabilities between pairs of ages in different settings - these incorporate # relative infectiousness by age (based on symptomatic fraction), relative # susceptibility by age, and setting-specific weights to account for different # transmission probabilities in different settings, calibrated to UK infection # survey data. - - setting_rel_transmission_probs <- get_setting_transmission_matrices(age_breaks = age_breaks) - + + # Need to double check that the ages match in each + # in previous versions this would work + # check_if_age_breaks_match(setting_transmission_matrix, + # setting_prediction_matrix) + # combine to get relative setting-specific NGMs - keeping the four settings in # the right order - - settings <- names(setting_rel_transmission_probs) - setting_rel_ngms <- mapply("*", - setting_contact_rates[settings], - setting_rel_transmission_probs[settings], - SIMPLIFY = FALSE) - + settings <- names(setting_transmission_matrix) + setting_rel_ngms <- mapply( + "*", + setting_prediction_matrix[settings], + setting_transmission_matrix[settings], + SIMPLIFY = FALSE + ) + # add an 'all locations' matrix, so we can scale the whole thing setting_rel_ngms$all <- Reduce("+", setting_rel_ngms) - + # scale to a required R_target # the eigenvalue is the R R_raw <- Re(eigen(setting_rel_ngms$all)$values[1]) scaling <- R_target / R_raw - + # could be lapply - setting_ngms <- mapply("*", - setting_rel_ngms, - scaling, - SIMPLIFY = FALSE) - - setting_ngms + setting_ngms <- mapply( + "*", + setting_rel_ngms, + scaling, + SIMPLIFY = FALSE + ) + + new_ngm_setting_matrix(setting_ngms, + raw_eigenvalue = R_raw, + scaling = scaling, + age_breaks = age_breaks + ) } +check_transmission_probabilities <- function(input_transmission_probs, age_breaks) { + if (is.null(input_transmission_probs)) { + input_transmission_probs <- get_setting_transmission_matrices( + age_breaks = age_breaks + ) + } + + if (!inherits(input_transmission_probs, "transmission_probability_matrix")) { + cli::cli_abort( + "Input {.var input_transmission_probs} must have class \\ + {.cls transmission_probability_matrix}" + ) + } + input_transmission_probs +} diff --git a/R/get-age-population-function-internals.R b/R/get-age-population-function-internals.R new file mode 100644 index 00000000..5e8e40ea --- /dev/null +++ b/R/get-age-population-function-internals.R @@ -0,0 +1,239 @@ +#' Prepare population data for generating an age population function +#' +#' Prepares objects for use in [get_age_population_function()]. +#' +#' @param data data.frame +#' @param ... extra arguments +#' @return list of objects, `max_bound` `pop_model_bounded` `bounded_pop` `unbounded_pop` for use in [get_age_population_function()] +#' @author njtierney +#' @keywords internal +#' @name prepare_population_for_modelling +#' @export +prepare_population_for_modelling <- function(data, ...) { + UseMethod("prepare_population_for_modelling") +} + +#' @rdname prepare_population_for_modelling +prepare_population_for_modelling.conmat_population <- function(data, ...) { + age_col <- age(data) + pop_col <- population(data) + pop_model <- data %>% + dplyr::arrange( + !!age_col + ) %>% + dplyr::mutate( + # model based on bin midpoint + bin_width = bin_widths(!!age_col), + midpoint = !!age_col + bin_width / 2, + # scaling down the population appropriately + log_pop = log(!!pop_col / bin_width) + ) + + # find the maximum of the bounded age groups, and the populations above and + # below + max_bound <- max(pop_model %>% + dplyr::pull(!!age_col)) + + # filter to just the bounded age groups for fitting + pop_model_bounded <- pop_model %>% + dplyr::filter( + !!age_col < max_bound + ) + + total_pop <- dplyr::pull(pop_model, !!pop_col) %>% sum() + bounded_pop <- dplyr::pull(pop_model_bounded, !!pop_col) %>% sum() + unbounded_pop <- total_pop - bounded_pop + + return( + tibble::lst( + max_bound, + pop_model_bounded, + bounded_pop, + unbounded_pop + ) + ) +} + +#' @name prepare_population_for_modelling +#' @param age_col column of ages +#' @param pop_col column of population, +#' @param ... extra arguments +prepare_population_for_modelling.data.frame <- function(data = data, + age_col = age_col, + pop_col = pop_col, + ...) { + pop_model <- data %>% + dplyr::arrange( + {{ age_col }} + ) %>% + dplyr::mutate( + # model based on bin midpoint + bin_width = bin_widths({{ age_col }}), + midpoint = {{ age_col }} + bin_width / 2, + # scaling down the population appropriately + log_pop = log({{ pop_col }} / bin_width) + ) + + # find the maximum of the bounded age groups, and the populations above and + # below + max_bound <- max(pop_model %>% + dplyr::pull({{ age_col }})) + + # filter to just the bounded age groups for fitting + pop_model_bounded <- pop_model %>% + dplyr::filter( + {{ age_col }} < max_bound + ) + + total_pop <- dplyr::pull(pop_model, {{ pop_col }}) %>% sum() + bounded_pop <- dplyr::pull(pop_model_bounded, {{ pop_col }}) %>% sum() + unbounded_pop <- total_pop - bounded_pop + + return( + tibble::lst( + max_bound, + pop_model_bounded, + bounded_pop, + unbounded_pop + ) + ) +} + +#' @title Return a function for determining population based on age, used in +#' [get_age_population_function()]. +#' @param pop_model population model data list object from +#' @return function with age input, returning population estimate +#' @author njtierney +#' @noRd +#' @keywords internal +return_age_population_function <- function(pop_model) { + # browser() + fit <- fit_bounded_age_groups(pop_model$pop_model_bounded) + + pred <- predict_to_long_age_ranges(pop_model, fit) + + # return a function to look up populations for integer ages + function(age) { + build_lookup_populations(age, pred) + } +} + +#' @title Predict log population based on age midpoints +#' @description Used within the internal function, +#' [return_age_population_function()], ultimately for the +#' [get_age_population_function()] function. +#' @param pop_model_bounded population data frame with columns of an age +#' `midpoint`, and log population (`log_pop`). +#' @return model with predictions for log population +#' @author njtierney +#' @keywords internal +#' @noRd +fit_bounded_age_groups <- function(pop_model_bounded) { + pop_model_bounded %>% + with( + smooth.spline( + x = midpoint, + y = log_pop, + df = pmin(10, nrow(pop_model_bounded)) + ) + ) +} + +#' @title Build prediction table +#' @description Internal function used in [return_age_population_function()], +#' ultimately for the [get_age_population_function()] function. +#' @param pop_model population model object from +#' [prepare_population_for_modelling()]. +#' @param fit model predictions +#' @return tibble with predicted population to various ages +#' @author njtierney +#' @noRd +#' @keywords internal +predict_to_long_age_ranges <- function(pop_model, fit) { + max_bound <- pop_model$max_bound + bounded_pop <- pop_model$bounded_pop + unbounded_pop <- pop_model$unbounded_pop + + # predict to a long range of ages, to deal with upper bound + pred <- tibble::tibble( + age = 0:200 + ) %>% + dplyr::mutate( + log_pred = predict(fit, age)$y, + pred = exp(log_pred) + ) %>% + # group into whether it is in the bounded or unbounded population + dplyr::mutate( + bounded = age < max_bound, + ) %>% + dplyr::group_by( + bounded + ) %>% + # adjust populations within bounded ages to match totals + dplyr::mutate( + required_pop = ifelse(bounded, bounded_pop, unbounded_pop), + modelled_pop = sum(pred), + ratio = required_pop / modelled_pop, + pred_adj = pred * ratio + ) %>% + dplyr::ungroup() %>% + # adjust the unbounded region to drop off smoothly, based on the weights + dplyr::mutate( + # this is a weird way of getting the population of the final age bin in + # the bounded group. Needs to happen after the previous grouped + # reweighting step, and needs to be ungrouped now to do it. + max_bound_pop = pred_adj[bounded][sum(bounded)], + ) %>% + dplyr::group_by( + bounded + ) %>% + dplyr::mutate( + # linearly extrapolate the final population group over years past the + # upper bound. Select the number of years past such that all the excess + # population is used up + max_years_over = 2 * required_pop / max_bound_pop, + years_over = pmax(0, age - max_bound), + weight = pmax(0, 1 - years_over / max_years_over), + weight_sum = sum(weight), + target_weight_sum = required_pop / max_bound_pop, + weight = weight * target_weight_sum / weight_sum, + population = ifelse(bounded, pred_adj, max_bound_pop * weight) + ) %>% + dplyr::ungroup() %>% + dplyr::select( + age, + population + ) %>% + dplyr::filter( + population > 0 + ) + + pred +} + +#' @title Build a population table for use in lookup +#' @description this function is used internally in the also internal function, +#' [return_age_population_function()], ultimately for the +#' [get_age_population_function()] function. +#' @param pred model predictions +#' @param age vector of ages +#' @return tibble with population information for age ranges +#' @author njtierney +#' @noRd +#' @keywords internal +build_lookup_populations <- function(age, pred) { + # browser() + tibble::tibble( + age = age + ) %>% + dplyr::left_join( + pred, + by = "age" + ) %>% + dplyr::mutate( + population = tidyr::replace_na(population, 0) + ) %>% + dplyr::pull( + population + ) +} diff --git a/R/get_per_capita_household_size.R b/R/get_abs_per_capita_household_size.R similarity index 80% rename from R/get_per_capita_household_size.R rename to R/get_abs_per_capita_household_size.R index 89f1bd10..cf1b7c68 100644 --- a/R/get_per_capita_household_size.R +++ b/R/get_abs_per_capita_household_size.R @@ -5,45 +5,44 @@ #' or LGA. #' @author Nick Golding #' @export -#' @examples -#' get_per_capita_household_size(lga = "Fairfield (C)") -#' get_per_capita_household_size(state = "NSW") +#' @examples +#' get_abs_per_capita_household_size(lga = "Fairfield (C)") +#' get_abs_per_capita_household_size(state = "NSW") #' \dontrun{ #' # cannot specify both state and LGA -#' get_per_capita_household_size(state = "NSW", lga = "Fairfield (C)") +#' get_abs_per_capita_household_size(state = "NSW", lga = "Fairfield (C)") #' } -get_per_capita_household_size <- function(state = NULL, lga = NULL) { - +get_abs_per_capita_household_size <- function(state = NULL, lga = NULL) { level <- dplyr::case_when( is.null(state) & is.null(lga) ~ "national", !is.null(state) & is.null(lga) ~ "state", is.null(state) & !is.null(lga) ~ "lga", TRUE ~ "erroneous" ) - + if (level == "erroneous") { - stop ("only one of state and lga may be specified") + rlang::abort("only one of state and lga may be specified") } - + if (length(state) > 1 | length(lga) > 1) { - stop ("only one state or LGA at a time, please") + rlang::abort("only one state or LGA at a time, please") } - + if (!is.null(state)) { check_state_name(state) } - - if (!is.null(lga)){ + + if (!is.null(lga)) { check_lga_name(lga) } - + # given ABS data on household sizes for a *single location*, get average # household sizes *per person* from ABS - assuming a max of 8 people per # households. Note - I tried computing the mean size of the households larger # than 7, by comparing with LGA populations, but they were improbably # enormous, probably because some of the population lives in facilities, not # households. - + # get state mean household sizes household_data <- abs_household_lga %>% dplyr::filter( @@ -57,33 +56,27 @@ get_per_capita_household_size <- function(state = NULL, lga = NULL) { # number of *people* in a household of that size n_people = n_households * size, ) - + state <- rlang::enquo(state) lga <- rlang::enquo(lga) - + # set up aggregation - household_data <- switch( - - level, - + household_data <- switch(level, national = household_data, - state = household_data %>% dplyr::filter(state == !!state) %>% dplyr::group_by(state), - lga = household_data %>% dplyr::filter(lga == !!lga) %>% dplyr::group_by(lga) - ) - + if (nrow(household_data) == 0) { - stop( - glue::glue("{level} '{paste(get(level))[2]}' not found") - ) + cli::cli_abort( + "{level} '{paste(get(level))[2]}' not found" + ) } - + # aggregate and average household sizes household_data %>% dplyr::group_by( @@ -105,5 +98,4 @@ get_per_capita_household_size <- function(state = NULL, lga = NULL) { dplyr::pull( per_capita_household_size ) - } diff --git a/R/get_age_population_function.R b/R/get_age_population_function.R index c3acafa3..a496896a 100644 --- a/R/get_age_population_function.R +++ b/R/get_age_population_function.R @@ -4,180 +4,95 @@ #' populations in 1y age increments from chunkier distributions produced by #' `socialmixr::wpp_age()`. #' -#' @param data dataset containing information on population of a given +#' @param data dataset containing information on population of a given #' age/age group -#' @param age_col bare variable name for the column with age information -#' @param pop_col bare variable name for the column with population information -#' +#' @param ... extra arguments +#' #' @details The function first prepares the data to fit a smoothing spline to -#' the data for ages below the maximum age. It arranges the data by the lower -#' limit of the age group to obtain the bin width/differences of the lower -#' age limits. The mid point of the bin width is later added to the ages and -#' the population is scaled as per the bin widths. The maximum age is later +#' the data for ages below the maximum age. It arranges the data by the lower +#' limit of the age group to obtain the bin width/differences of the lower +#' age limits. The mid point of the bin width is later added to the ages and +#' the population is scaled as per the bin widths. The maximum age is later #' obtained and the populations for different above and below are filtered out -#' along with the sum of populations with and without maximum age. A cubic -#' smoothing spline is then fitted to the data for ages below the maximum with -#' predictor variable as the ages with the mid point of the bins added to it +#' along with the sum of populations with and without maximum age. A cubic +#' smoothing spline is then fitted to the data for ages below the maximum with +#' predictor variable as the ages with the mid point of the bins added to it #' where as the response variable is the log-scaled population. Using the -#' smoothing spline fit, the predicted population of ages 0 to 200 is obtained -#' and the predicted population is adjusted further using a ratio of the sum +#' smoothing spline fit, the predicted population of ages 0 to 200 is obtained +#' and the predicted population is adjusted further using a ratio of the sum #' of the population across all ages from the data and predicted population. -#' The ratio is based on whether the ages are under the maximum age as the -#' total population across all ages differs for ages above and below the +#' The ratio is based on whether the ages are under the maximum age as the +#' total population across all ages differs for ages above and below the #' maximum age. The maximum age population is adjusted further to drop off #' smoothly, based on the weights. The final population is then linearly #' extrapolated over years past the upper bound from the data. For ages above -#' the maximum age from data, the population is calculated as a weighted -#' population of the maximum age that depends on the years past the upper +#' the maximum age from data, the population is calculated as a weighted +#' population of the maximum age that depends on the years past the upper #' bound. Older ages would have lower weights, therefore lower population. -#' +#' #' @return An interpolating function to get populations in 1y age increments #' @examples #' polymod_pop <- get_polymod_population() -#' +#' #' polymod_pop -#' +#' #' # But these ages and populations are binned every 5 years. So we can now #' # provide a specified age and get the estimated population for that 1 year #' # age group. First we create the new function like so -#' -#' age_pop_function <- get_age_population_function(data=polymod_pop, -#' age_col = lower.age.limit, -#' pop_col= population) +#' +#' age_pop_function <- get_age_population_function( +#' data = polymod_pop +#' ) #' # Then we pass it a year to get the estimated population for a particular age #' age_pop_function(4) -#' +#' #' # Or a vector of years, to get the estimated population for a particular age #' # range #' age_pop_function(1:4) -#' -#' # Notice that we get a _pretty similar_ number of 0-4 if we sum it up, as +#' +#' # Notice that we get a _pretty similar_ number of 0-4 if we sum it up, as #' # the first row of the table: #' head(polymod_pop, 1) #' sum(age_pop_function(age = 0:4)) -#' -#' # Usage in dplyr +#' +#' # Usage in dplyr #' library(dplyr) #' example_df <- slice_head(abs_education_state, n = 5) #' example_df %>% -#' mutate(population_est = age_pop_function(age)) -#' +#' mutate(population_est = age_pop_function(age)) +#' +#' @export +#' @rdname get_age_population_function +get_age_population_function <- function(data, ...) { + UseMethod("get_age_population_function") +} + +#' @name get_age_population_function +#' @export +get_age_population_function.conmat_population <- function(data = population, + ...) { + # prepare population data for modelling + pop_model <- prepare_population_for_modelling( + data = data + ) + + return_age_population_function(pop_model) +} + +#' @param age_col bare variable name for the column with age information +#' @param pop_col bare variable name for the column with population information +#' @name get_age_population_function #' @export -get_age_population_function <- function(data = population, - age_col= lower.age.limit, - pop_col= population) { - - +get_age_population_function.data.frame <- function(data = population, + age_col = lower.age.limit, + pop_col = population, + ...) { # prepare population data for modelling - pop_model <- data %>% - dplyr::arrange( - {{ age_col }} - ) %>% - dplyr::mutate( - # model based on bin midpoint - bin_width = bin_widths( {{ age_col }} ), - midpoint = {{ age_col }} + bin_width / 2, - # scaling down the population appropriately - log_pop = log({{ pop_col }} / bin_width) - ) - - # find the maximum of the bounded age groups, and the populations above and - # below - max_bound <- max(pop_model%>% - dplyr::pull({{ age_col }}) + pop_model <- prepare_population_for_modelling( + data = data, + age_col = age_col, + pop_col = pop_col ) - - # filter to just the bounded age groups for fitting - pop_model_bounded <- pop_model %>% - dplyr::filter( - {{age_col}} < max_bound - ) - - total_pop <- dplyr::pull(pop_model, {{ pop_col }}) %>% sum() - bounded_pop <- dplyr::pull(pop_model_bounded, {{ pop_col }}) %>% sum() - unbounded_pop <- total_pop - bounded_pop - - # fit to bounded age groups - fit <- pop_model_bounded %>% - with( - smooth.spline( - x = midpoint, - y = log_pop, - df = pmin(10, nrow(pop_model_bounded)) - ) - ) - - # predict to a long range of ages, to deal with upper bound - pred <- tibble::tibble( - age = 0:200 - ) %>% - dplyr::mutate( - log_pred = predict(fit, age)$y, - pred = exp(log_pred) - ) %>% - # group into whether it is in the bounded or unbounded population - dplyr::mutate( - bounded = age < max_bound, - ) %>% - dplyr::group_by( - bounded - ) %>% - # adjust populations within bounded ages to match totals - dplyr::mutate( - required_pop = ifelse(bounded, bounded_pop, unbounded_pop), - modelled_pop = sum(pred), - ratio = required_pop / modelled_pop, - pred_adj = pred * ratio - ) %>% - dplyr::ungroup() %>% - # adjust the unbounded region to drop off smoothly, based on the weights - dplyr::mutate( - # this is a weird way of getting the population of the final age bin in - # the bounded group. Needs to happen after the previous grouped - # reweighting step, and needs to be ungrouped now to do it. - max_bound_pop = pred_adj[bounded][sum(bounded)], - ) %>% - dplyr::group_by( - bounded - ) %>% - dplyr::mutate( - # linearly extrapolate the final population group over years past the - # upper bound. Select the number of years past such that all the excess - # population is used up - max_years_over = 2 * required_pop / max_bound_pop, - years_over = pmax(0, age - max_bound), - weight = pmax(0, 1 - years_over / max_years_over), - weight_sum = sum(weight), - target_weight_sum = required_pop / max_bound_pop, - weight = weight * target_weight_sum / weight_sum, - population = ifelse(bounded, pred_adj, max_bound_pop * weight) - ) %>% - dplyr::ungroup() %>% - dplyr::select( - age, - population - ) %>% - dplyr::filter( - population > 0 - ) - - # return a function to look up populations for integer ages - function(age) { - - tibble::tibble( - age = age - ) %>% - dplyr::left_join( - pred, - by = "age" - ) %>% - dplyr::mutate( - population = tidyr::replace_na(population, 0) - ) %>% - dplyr::pull( - population - ) - - } - + + return_age_population_function(pop_model) } diff --git a/R/get_data_abs_age_education.R b/R/get_data_abs_age_education.R deleted file mode 100644 index 7611dbb2..00000000 --- a/R/get_data_abs_age_education.R +++ /dev/null @@ -1,75 +0,0 @@ -#' @title Return data on educated population for a given age and state or -#' lga of Australia. -#' @param state target Australian state name or a vector with multiple state -#' names in its abbreviated form, such as "QLD", "NSW", or "TAS" -#' @param lga target Australian local government area (LGA) name, such as -#' "Fairfield (C)" or a vector with multiple lga names. See -#' [abs_lga_lookup()] for list of lga names. -#' @param age a numeric or numeric vector denoting ages between 0 to 115. The -#' default is to return all ages. -#' @return dataset with information on the number of educated people belonging -#' to a particular age, its total population and the corresponding proportion. -#' @export -#' @examples -#' get_data_abs_age_education(state="VIC") -#' get_data_abs_age_education(state="WA",lga="Albany (C)",age=1:5) -#' get_data_abs_age_education(state=c("QLD","TAS"),age=5) -#' get_data_abs_age_education(lga=c("Albury (C)","Barcoo (S)"),age=10) - -get_data_abs_age_education <- function(age = NULL, state = NULL, lga = NULL) { - - # checks only for state - if (!is.null(state) & is.null(lga)) { - - check_state_name(state) - data_subset <- data_abs_state_education %>% - dplyr::filter(state %in% {{ state }}) - - } else{ - # checks for lga - if (!is.null(lga)) { - - check_lga_name(lga, multiple_lga = TRUE) - - # checks if there is a state argument as well - # if state argument present, it filters for it - if (!is.null(state)) - { - data_subset <- data_abs_lga_education %>% - dplyr::filter(lga %in% {{ lga }} , state %in% {{ state }}) - - # if empty tibble , error message - if (nrow(data_subset) == 0) - { - rlang::abort( - message = c( - "The LGA name provided does not belong to the state", - i = "Specify the exact LGA name and the corresponding state \\ - it belongs to. See `abs_lga_lookup` for a list of all LGAs and\\ - the state it belongs to", - x = glue::glue( - "The lga name '{lga}' does not belong to the state '{state}'" - ) - ) - ) - } else{ - # if tibble not empty, do nothing - data_subset - } - } else { - # if there is no state argument along with lga - data_subset <- data_abs_lga_education %>% - dplyr::filter(lga %in% {{ lga }}) - } - } - } # end check for state & lga - if (!is.null(age)) - { - data_subset %>% - dplyr::filter(age %in% {{ age }}) - - } else { - data_subset - } - - } \ No newline at end of file diff --git a/R/get_data_abs_age_work.R b/R/get_data_abs_age_work.R deleted file mode 100644 index 6e900ffc..00000000 --- a/R/get_data_abs_age_work.R +++ /dev/null @@ -1,74 +0,0 @@ -#' @title Return data on employed population for a given age and state or -#' lga of Australia -#' @param state target Australian state name or a vector with multiple state -#' names in its abbreviated form, such as "QLD", "NSW", or "TAS" -#' @param lga target Australian local government area (LGA) name, such as -#' "Fairfield (C)" or a vector with multiple lga names. See -#' [abs_lga_lookup()] for list of lga names. -#' @param age a numeric or numeric vector denoting ages between 0 to 115. -#' The default is to return all ages. -#' @return data set with information on the number of employed people belonging -#' to a particular age, its total population and the corresponding proportion. -#' @export -#' @examples -#' get_data_abs_age_work(state="NSW") -#' get_data_abs_age_work(state="WA",lga="Albany (C)",age=1:5) -#' get_data_abs_age_work(state=c("QLD","TAS"),age=5) -#' get_data_abs_age_work(lga=c("Albury (C)","Barcoo (S)"),age=39) - -get_data_abs_age_work <- function(age=NULL, state=NULL, lga=NULL) { - # checks only for state - if (!is.null(state) & is.null(lga)) { - - check_state_name(state) - data_subset <- data_abs_state_work %>% - dplyr::filter(state %in% {{ state }}) - - } else{ - # checks for lga - if (!is.null(lga)) { - - check_lga_name(lga, multiple_lga = TRUE) - - # checks if there is a state argument as well - # if state argument present, it filters for it - if (!is.null(state)) - { - data_subset <- data_abs_lga_work %>% - dplyr::filter(lga %in% {{ lga }} , state %in% {{ state }}) - - # if empty tibble , error message - if (nrow(data_subset) == 0) - { - rlang::abort( - message = c( - "The LGA name provided does not belong to the state", - i = "Specify the exact LGA name and the corresponding state \\ - it belongs to. See `abs_lga_lookup` for a list of all LGAs and \\ - the state it belongs to", - x = glue::glue( - "The lga name '{lga}' does not belong to the state '{state}'" - ) - ) - ) - } else{ - # if tibble not empty, do nothing - data_subset - } - } else { - # if there is no state argument along with lga - data_subset <- data_abs_lga_work %>% - dplyr::filter(lga %in% {{ lga }}) - } - } - } # end check for state & lga - if (!is.null(age)) - { - data_subset %>% - dplyr::filter(age %in% {{ age }}) - - } else { - data_subset - } - - } \ No newline at end of file diff --git a/R/get_household_size_distribution.R b/R/get_household_size_distribution.R index 09b88d07..22ac20c1 100644 --- a/R/get_household_size_distribution.R +++ b/R/get_household_size_distribution.R @@ -1,48 +1,47 @@ #' @title Get household size distribution based on state or LGA name #' @param state target Australian state name in abbreviated form, such as "QLD", "NSW", or "TAS" -#' @param lga target Australian local government area (LGA) name, such as "Fairfield (C)". See +#' @param lga target Australian local government area (LGA) name, such as "Fairfield (C)". See #' [abs_lga_lookup()] for list of lga names #' @return returns a data frame with household size distributions of a specific state or LGA #' @export -#' @examples -#' get_household_size_distribution(lga = "Fairfield (C)") -#' get_household_size_distribution(state = "NSW") +#' @examples +#' get_abs_household_size_distribution(lga = "Fairfield (C)") +#' get_abs_household_size_distribution(state = "NSW") #' \dontrun{ #' # cannot specify both state and LGA -#' get_household_size_distribution(state = "NSW", lga = "Fairfield (C)") +#' get_abs_household_size_distribution(state = "NSW", lga = "Fairfield (C)") #' } -get_household_size_distribution <- function(state = NULL, lga = NULL) { - +get_abs_household_size_distribution <- function(state = NULL, lga = NULL) { level <- dplyr::case_when( is.null(state) & is.null(lga) ~ "national", !is.null(state) & is.null(lga) ~ "state", is.null(state) & !is.null(lga) ~ "lga", TRUE ~ "erroneous" ) - + if (level == "erroneous") { - stop ("only one of state and lga may be specified") + abort("only one of state and lga may be specified") } - + if (length(state) > 1 | length(lga) > 1) { - stop ("only one state or LGA at a time, please") + abort("only one state or LGA at a time, please") } - + if (!is.null(state)) { check_state_name(state) } - - if (!is.null(lga)){ + + if (!is.null(lga)) { check_lga_name(lga) } - + # given ABS data on household sizes for a *single location*, get average # household sizes *per person* from ABS - assuming a max of 8 people per # households. Note - I tried computing the mean size of the households larger # than 7, by comparing with LGA populations, but they were improbably # enormous, probably because some of the population lives in facilities, not # households. - + # get state mean household sizes household_data <- abs_household_lga %>% dplyr::filter( @@ -55,36 +54,28 @@ get_household_size_distribution <- function(state = NULL, lga = NULL) { size = readr::parse_number(n_persons_usually_resident), # number of *people* in a household of that size n_people = n_households * size, - )%>% - dplyr::select(-c(n_persons_usually_resident,n_households))%>% - dplyr::rename(household_size=size) - + ) %>% + dplyr::select(-c(n_persons_usually_resident, n_households)) %>% + dplyr::rename(household_size = size) + state <- rlang::enquo(state) lga <- rlang::enquo(lga) - + # set up aggregation - household_data <- switch( - - level, - + household_data <- switch(level, national = household_data, - state = household_data %>% dplyr::filter(state == !!state) %>% dplyr::group_by(state), - lga = household_data %>% dplyr::filter(lga == !!lga) %>% dplyr::group_by(lga) - ) - + if (nrow(household_data) == 0) { - stop( - glue::glue("{level} '{paste(get(level))[2]}' not found") + cli::cli_abort( + "{level} '{paste(get(level))[2]}' not found" ) } household_data } - - diff --git a/R/get_polymod_contact_data.R b/R/get_polymod_contact_data.R index ff8bba14..d6f48920 100644 --- a/R/get_polymod_contact_data.R +++ b/R/get_polymod_contact_data.R @@ -1,30 +1,30 @@ #' @title Format POLYMOD data and filter contacts to certain settings -#' +#' #' @description Provides contact and participant POLYMOD data from selected -#' countries. It impute missing contact ages via one of three methods: +#' countries. It impute missing contact ages via one of three methods: #' 1) imputing contact ages from a random uniform distribution from the range -#' of ages. 2) using the average of the ages, 3) removal of those -#' participants. The contact settings are then classified as "home", +#' of ages. 2) using the average of the ages, 3) removal of those +#' participants. The contact settings are then classified as "home", #' "school", "work" and "others", where "others" include locations such as -#' leisure, transport or other places. The participants with missing contact -#' ages or settings are removed, and the number of contacts per participant +#' leisure, transport or other places. The participants with missing contact +#' ages or settings are removed, and the number of contacts per participant #' and contact age from ages 0-100 are obtained for various countries and #' settings. #' #' @param setting Which setting to extract data from. Default is all settings. #' Options are: "all", "home", "work", "school", and "other". -#' @param countries countries to extract data from. Default is all countries +#' @param countries countries to extract data from. Default is all countries #' from this list: "Belgium", "Finland", "Germany", "Italy", "Luxembourg", #' "Netherlands", "Poland", and "United Kingdom". #' @param ages Which ages to return. Default is ages 0 to 100. -#' @param contact_age_imputation How to handle age when it is missing. Choose -#' one of three methods: 1) "sample", which imputes contact ages from a +#' @param contact_age_imputation How to handle age when it is missing. Choose +#' one of three methods: 1) "sample", which imputes contact ages from a #' random uniform distribution from the range of ages. 2) "mean", use the -#' average of the ages, 3) "remove_participant" removal of those +#' average of the ages, 3) "remove_participant" removal of those #' participants. Default is "sample". #' @return A data.frame with columns: "setting" (all, work, home, etc. as -#' specified in "setting" argument); "age_from" - the age of the participant; -#' "age_to" - the age of the person the participant had contact with; +#' specified in "setting" argument); "age_from" - the age of the participant; +#' "age_to" - the age of the person the participant had contact with; #' "contacts" the number of contacts that person had; "participants" the #' number of participants in that row. #' @examples @@ -37,21 +37,19 @@ #' get_polymod_contact_data(contact_age_imputation = "mean") #' get_polymod_contact_data(contact_age_imputation = "remove_participant") #' @export -get_polymod_contact_data <- function( - setting = c("all", "home", "work", "school", "other"), - countries = c( - "Belgium", - "Finland", - "Germany", - "Italy", - "Luxembourg", - "Netherlands", - "Poland", - "United Kingdom" - ), - ages = 0:100, - contact_age_imputation = c("sample", "mean", "remove_participant") -) { +get_polymod_contact_data <- function(setting = c("all", "home", "work", "school", "other"), + countries = c( + "Belgium", + "Finland", + "Germany", + "Italy", + "Luxembourg", + "Netherlands", + "Poland", + "United Kingdom" + ), + ages = 0:100, + contact_age_imputation = c("sample", "mean", "remove_participant")) { setting <- match.arg(setting) contact_age_imputation <- match.arg(contact_age_imputation) diff --git a/R/get_polymod_per_capita_household_size.R b/R/get_polymod_per_capita_household_size.R index 51b6b0da..6a964850 100644 --- a/R/get_polymod_per_capita_household_size.R +++ b/R/get_polymod_per_capita_household_size.R @@ -1,11 +1,11 @@ -#' @title Get polymod per capita houshold size. -#' +#' @title Get polymod per capita household size. +#' #' @description Convenience function to help get the per capita household size. #' This is calculated as `mean(socialmixr::polymod$participants$hh_size)`. -#' +#' #' @return number, 3.248971 #' @author Nicholas Tierney #' @export get_polymod_per_capita_household_size <- function() { - mean(socialmixr::polymod$participants$hh_size) -} \ No newline at end of file + mean(socialmixr::polymod$participants$hh_size) +} diff --git a/R/get_polymod_population.R b/R/get_polymod_population.R index 96117314..f81163d8 100644 --- a/R/get_polymod_population.R +++ b/R/get_polymod_population.R @@ -5,35 +5,38 @@ #' participants). Note that we don't want to weight by survey age #' distributions for this, since the total number of *participants* #' represents the sampling. It uses the participant data from the polymod -#' survey as well as the age specific population data from `socialmixr` -#' R package to return the age specific average population of different, -#' countries weighted by the number of participants from those countries who +#' survey as well as the age specific population data from `socialmixr` +#' R package to return the age specific average population of different, +#' countries weighted by the number of participants from those countries who #' participated in the polymod survey. #' #' @param countries countries to extract data from. Default is to get: Belgium, -#' Finland, Germany, Italy, Luxembourg, Netherlands, Poland, and +#' Finland, Germany, Italy, Luxembourg, Netherlands, Poland, and #' United Kingdom. -#' @return data frame with two columns: `lower.age.limit` and `population` +#' @return A `conmat_population` data frame with two columns: `lower.age.limit` +#' and `population` #' @examples #' get_polymod_population() -#' get_polymod_population("Belgium") -#' get_polymod_population("United Kingdom") -#' get_polymod_population("Italy") +#' get_polymod_population("Belgium") +#' get_polymod_population("United Kingdom") +#' get_polymod_population("Italy") #' @export get_polymod_population <- function(countries = c( - "Belgium", - "Finland", - "Germany", - "Italy", - "Luxembourg", - "Netherlands", - "Poland", - "United Kingdom" - )) { + "Belgium", + "Finland", + "Germany", + "Italy", + "Luxembourg", + "Netherlands", + "Poland", + "United Kingdom" + )) { socialmixr::polymod$participants %>% dplyr::filter( !is.na(year), - country %in% countries + country %in% countries, + # there were two luxembourgs otherwise - one in 2005! + year == 2006 ) %>% dplyr::group_by( country, @@ -44,7 +47,7 @@ get_polymod_population <- function(countries = c( .groups = "drop" ) %>% dplyr::left_join( - socialmixr::wpp_age() %>% dplyr::filter(year == 2005), + socialmixr::wpp_age() %>% dplyr::filter(year == 2005) %>% dplyr::select(country, lower.age.limit, population), by = c("country") ) %>% dplyr::filter( @@ -55,5 +58,9 @@ get_polymod_population <- function(countries = c( ) %>% dplyr::summarise( population = stats::weighted.mean(population, participants) + ) %>% + conmat_population( + age = lower.age.limit, + population = population ) } diff --git a/R/get_polymod_setting_data.R b/R/get_polymod_setting_data.R index 28c71e71..6a5796b2 100644 --- a/R/get_polymod_setting_data.R +++ b/R/get_polymod_setting_data.R @@ -1,18 +1,20 @@ #' Get polymod setting data -#' -#' `get_polymod_setting_data()` acts as an extension of -#' `get_polymod_contact_data()`, and extracts the setting wise contact data +#' +#' `get_polymod_setting_data()` acts as an extension of +#' `get_polymod_contact_data()`, and extracts the setting wise contact data #' on the desired country, as a list. #' @param countries countries to extract data from -#' @return A list of data frames, of the polymod data. One list per setting: +#' @return A list of data frames, of the polymod data. One list per setting: #' "home", "work", "school", and "other". #' @examples #' get_polymod_setting_data() #' get_polymod_setting_data("Belgium") #' @export -get_polymod_setting_data <- function(countries = c("Belgium", "Finland", "Germany", "Italy", "Luxembourg", "Netherlands", - "Poland", "United Kingdom")) { +get_polymod_setting_data <- function(countries = c( + "Belgium", "Finland", "Germany", "Italy", "Luxembourg", "Netherlands", + "Poland", "United Kingdom" + )) { list( home = get_polymod_contact_data( setting = "home", @@ -30,5 +32,6 @@ get_polymod_setting_data <- function(countries = c("Belgium", "Finland", "German setting = "other", countries = countries ) - ) + ) %>% + new_setting_data() } diff --git a/R/get_setting_transmission_matrices.R b/R/get_setting_transmission_matrices.R index 193c715a..77381fbf 100644 --- a/R/get_setting_transmission_matrices.R +++ b/R/get_setting_transmission_matrices.R @@ -22,7 +22,7 @@ #' transmissibility in work and other settings due to hygiene behaviour; and #' estimates of the relative transmissibility in household vs non-household #' settings - scaled linearly for non-household transmission and binomially for -#' household transmission (so that onward onfections do not to exceed the number +#' household transmission (so that onward infections do not to exceed the number #' of other household members). #' #' When using this data, ensure that you cite this package, and the original @@ -32,15 +32,15 @@ #' transmission and control of COVID-19 epidemics. Nat Med 26, 1205–1211 (2020). #' https://doi.org/10.1038/s41591-020-0962-9 #' -#' @param age_breaks vector of age breaks, defaults to +#' @param age_breaks vector of age breaks, defaults to #' `c(seq(0, 80, by = 5), Inf)` #' @param asymptomatic_relative_infectiousness the assumed ratio of onward -#' infectiousness between asymptomatic and symptomatic cases. This represents -#' the infectiousness of asymptomatic relative to symptomatic. -#' Default value is 0.5, which means the asymptomatic cases are 50% less +#' infectiousness between asymptomatic and symptomatic cases. This represents +#' the infectiousness of asymptomatic relative to symptomatic. +#' Default value is 0.5, which means the asymptomatic cases are 50% less #' infectious than symptomatic cases. #' @param susceptibility_estimate Which estimate to use for susceptibility by -#' age. Either, the smoothed original Davies et al estimates, +#' age. Either, the smoothed original Davies et al estimates, #' "davies_original" or, the set updated to match UK under-16 infections #' (the default), "davies_updated". #' @@ -60,11 +60,11 @@ #' age_breaks <- c(seq(0, 80, by = 5), Inf) #' #' # define a new population age distribution to predict to -#' fairfield_age_pop <- abs_age_lga("Fairfield (C)") +#' fairfield <- abs_age_lga("Fairfield (C)") #' #' # predict setting-specific contact matrices to a new population #' contact_matrices <- predict_setting_contacts( -#' population = fairfield_age_pop, +#' population = fairfield, #' contact_model = setting_models, #' age_breaks = age_breaks #' ) @@ -89,21 +89,16 @@ #' # get the all-settings NGM #' ngm_overall <- Reduce("+", next_generation_matrices) #' } -get_setting_transmission_matrices <- function( - age_breaks = c(seq(0, 80, by = 5), Inf), - asymptomatic_relative_infectiousness = 0.5, - susceptibility_estimate = c("davies_updated", "davies_original") - ) { - - - if(!dplyr::last(is.infinite(age_breaks))) - { +get_setting_transmission_matrices <- function(age_breaks = c(seq(0, 80, by = 5), Inf), + asymptomatic_relative_infectiousness = 0.5, + susceptibility_estimate = c("davies_updated", "davies_original")) { + if (!dplyr::last(is.infinite(age_breaks))) { age_breaks <- c(age_breaks, Inf) } - - # which parameter estimates to use for susceptibility by age + + # which parameter estimates to use for susceptibility by age susceptibility_estimate <- rlang::arg_match(susceptibility_estimate) - + # format the setting transmission scalings (calibrated for these transmission # probabilities and conmat contact matrices against English infection data) # into a tibble to join to the transmission probabilities @@ -111,11 +106,11 @@ get_setting_transmission_matrices <- function( setting = names(setting_weights), weight = setting_weights ) - + # load the age-dependent susceptibility and clinical fraction parameters, and # convert into infectiousness and susceptibility - age_effects <- davies_age_extended%>% - dplyr::filter(age<=max(age_breaks))%>% + age_effects <- davies_age_extended %>% + dplyr::filter(age <= max(age_breaks)) %>% dplyr::mutate( infectiousness = clinical_fraction + (1 - clinical_fraction) * asymptomatic_relative_infectiousness @@ -126,7 +121,7 @@ get_setting_transmission_matrices <- function( infectiousness, susceptibility = !!susceptibility_estimate ) - + # expand out to all settings and age combinations data <- tidyr::expand_grid( setting = setting_weights_tibble$setting, @@ -138,7 +133,7 @@ get_setting_transmission_matrices <- function( age_effects, age_from = age, infectiousness - ), + ), by = "age_from" ) %>% dplyr::left_join( @@ -165,7 +160,7 @@ get_setting_transmission_matrices <- function( c(infectiousness, susceptibility), mean ), - .groups = "drop" + .groups = "drop" ) %>% # attach and apply the weights dplyr::left_join( @@ -175,7 +170,7 @@ get_setting_transmission_matrices <- function( dplyr::mutate( relative_probability = infectiousness * susceptibility, probability = dplyr::case_when( - setting == "home" ~ 1 - (1 - relative_probability) ^ weight, + setting == "home" ~ 1 - (1 - relative_probability)^weight, TRUE ~ relative_probability * weight ) ) %>% @@ -185,7 +180,7 @@ get_setting_transmission_matrices <- function( age_group_to, probability ) - + matrices <- data %>% # convert into matrices tidyr::nest( @@ -202,11 +197,15 @@ get_setting_transmission_matrices <- function( names_from = age_group_from, values_from = probability ), - matrix = lapply(matrix, - tibble::column_to_rownames, - "age_group_to"), - matrix = lapply(matrix, - as.matrix) + matrix = lapply( + matrix, + tibble::column_to_rownames, + "age_group_to" + ), + matrix = lapply( + matrix, + as.matrix + ) ) %>% # turn this into a list to return tidyr::pivot_wider( @@ -215,8 +214,8 @@ get_setting_transmission_matrices <- function( ) %>% as.list() %>% lapply(purrr::pluck, 1) - - matrices[c("home", "school", "work", "other")] - -} + new_transmission_probability_matrix( + matrices[c("home", "school", "work", "other")] + ) +} diff --git a/R/matrix_to_predictions.R b/R/matrix_to_predictions.R index 42457a11..2c3f5f5d 100644 --- a/R/matrix_to_predictions.R +++ b/R/matrix_to_predictions.R @@ -1,39 +1,40 @@ #' @title Convert a contact matrix as output into a long-form tibble -#' +#' #' @description This function is the opposite of [predictions_to_matrix()]. It -#' converts a wide matrix into a long data frame. It is mostly used within +#' converts a wide matrix into a long data frame. It is mostly used within #' plotting functions. #' #' @param contact_matrix square matrix with age group to and from information #' in the row and column names. -#' +#' #' @return data.frame with columns `age_group_to`, `age_group_from`, and #' `contacts`. -#' +#' #' @examples -#' fairfield_abs_data <- abs_age_lga("Fairfield (C)") -#' -#' # We can convert the predictions into a matrix -#' -#' fairfield_school_contacts <- predict_contacts( -#' model = polymod_setting_models$school, -#' population = fairfield_abs_data, -#' age_breaks = c(0, 5, 10, 15,Inf) -#' ) -#' -#' fairfield_school_contacts -#' -#' fairfield_school_mat <- predictions_to_matrix(fairfield_school_contacts) -#' -#' fairfield_school_mat -#' -#' matrix_to_predictions(fairfield_school_mat) +#' fairfield <- abs_age_lga("Fairfield (C)") +#' +#' # We can convert the predictions into a matrix +#' +#' fairfield_school_contacts <- predict_contacts( +#' model = polymod_setting_models$school, +#' population = fairfield, +#' age_breaks = c(0, 5, 10, 15, Inf) +#' ) +#' +#' fairfield_school_contacts +#' +#' fairfield_school_mat <- predictions_to_matrix(fairfield_school_contacts) +#' +#' fairfield_school_mat +#' +#' matrix_to_predictions(fairfield_school_mat) #' @export matrix_to_predictions <- function(contact_matrix) { contact_matrix %>% tibble::as_tibble( rownames = "age_group_to" ) %>% + dplyr::mutate(across(!age_group_to, as.double)) %>% tidyr::pivot_longer( cols = -c(age_group_to), names_to = "age_group_from", diff --git a/R/model-tidiers.R b/R/model-tidiers.R new file mode 100644 index 00000000..a204a623 --- /dev/null +++ b/R/model-tidiers.R @@ -0,0 +1,77 @@ +#' Extract out formula terms +#' +#' @param model model object +#' +#' @name formula-terms +#' @keywords internal +#' @examples +#' \dontrun{ +#' formula_terms <- get_formulas_terms(sim_m) +#' formula_terms +#' } +#' +get_formulas_terms <- function(model) { + as.character(attr(stats::terms(model$formula), "variables"))[-c(1, 2)] +} + +# extract_term_name(formula_terms) +extract_term_name <- function(x) { + term <- as.character(stringr::str_extract_all(x, "(?<=\\().+?(?=\\))")) + glue::glue("fitted_{term}") +} + +# head(predict_gam_term(sim_m, sim_data, formula_terms[1])) +# tail(predict_gam_term(sim_m, sim_data, formula_terms[1])) +predict_gam_term <- function(model, data, terms) { + c( + predict(model, + data, + type = "terms", + terms = terms + ) + ) +} + +add_intercept <- function(data, model) { + dplyr::mutate( + .data = data, + fitted_intercept = model$coefficients[1] + ) +} + +tidy_predict_term <- function(data, + model, + term) { + term_name <- extract_term_name(term) + + dat_term <- tibble::tibble(x = predict_gam_term(model, data, term)) + + stats::setNames(dat_term, term_name) +} + +add_fitted_overall <- function(data) { + data %>% + dplyr::mutate( + fitted_overall = rowSums( + dplyr::across( + .cols = c(tidyselect::starts_with("fitted")) + ) + ) + ) +} + +add_gam_predictions <- function(data, model, term) { + terms <- get_formulas_terms(model) + data_modelling <- add_modelling_features(data) + predictions <- purrr::map_dfc( + .x = terms, + .f = tidy_predict_term, + data = data_modelling, + model = model + ) + + data %>% + add_intercept(model) %>% + dplyr::bind_cols(predictions) %>% + add_fitted_overall() +} diff --git a/R/partial-prediction-helpers.R b/R/partial-prediction-helpers.R new file mode 100644 index 00000000..da7cbcbf --- /dev/null +++ b/R/partial-prediction-helpers.R @@ -0,0 +1,277 @@ +#' .. content for \description{} (no empty lines) .. +#' +#' .. content for \details{} .. +#' +#' @title +#' @param ages +#' @return +#' @author njtierney +#' @export +create_age_grid <- function(ages) { + ## TODO + ## Wrap this up into a function that generates an age grid data frame + ## with all the terms needed to fit a conmat model + ## (from `fit_single_contact_model.R`) + age_grid <- expand.grid( + age_from = ages, + age_to = ages + ) |> + tibble::as_tibble() |> + # prepare the age data so it has all the right column names + # that are used inside of `fit_single_contact_model()` + # conmat::add_symmetrical_features() |> + add_symmetrical_features() |> + # this ^^^ does the same as the commented part below: + # dplyr::mutate( + # gam_age_offdiag = abs(age_from - age_to), + # gam_age_offdiag_2 = abs(age_from - age_to)^2, + # gam_age_diag_prod = abs(age_from * age_to), + # gam_age_diag_sum = abs(age_from + age_to), + # gam_age_pmax = pmax(age_from, age_to), + # gam_age_pmin = pmin(age_from, age_to) + # ) |> + # This is to add the school_probability and work_probability columns + # that are used inside fit_single_contact_model() when fitting the model. + # conmat::add_modelling_features() + add_modelling_features() + + +} + +#' .. content for \description{} (no empty lines) .. +#' +#' .. content for \details{} .. +#' +#' @title +#' @param fit_home +#' @return +#' @author njtierney +#' @export +extract_term_names <- function(fit_home) { + + coef_names <- names(fit_home$coefficients) |> + stringr::str_remove_all("\\.[^.]*$") |> + unique() |> + stringr::str_subset("^s\\(") + + coef_names + +} + +#' .. content for \description{} (no empty lines) .. +#' +#' .. content for \details{} .. +#' +#' @title +#' @param term_names +#' @return +#' @author njtierney +#' @export +clean_term_names <- function(term_names) { + + term_names |> + stringr::str_remove_all("^s\\(gam_age_") |> + stringr::str_remove_all("\\)") + +} + +#' .. content for \description{} (no empty lines) .. +#' +#' .. content for \details{} .. +#' +#' @title +#' @param age_grid +#' @param term_names +#' @param term_var_names +#' @return +#' @author njtierney +#' @export +predict_individual_terms <- function(age_grid, fit, term_names, term_var_names) { + + predicted_term <- function(age_grid, fit, term_name, term_var_name){ + predict(object = fit, + newdata = age_grid, + type = "terms", + terms = term_name) |> + tibble::as_tibble() |> + setNames(glue::glue("pred_{term_var_name}")) + } + + all_predicted_terms <- purrr::map2_dfc( + .x = term_names, + .y = term_var_names, + .f = function(.x, .y){ + predicted_term(age_grid = age_grid, + fit = fit, + term_name = .x, + term_var_name = .y) + } + ) + + dplyr::bind_cols(age_grid, all_predicted_terms) + +} + +#' .. content for \description{} (no empty lines) .. +#' +#' .. content for \details{} .. +#' +#' @title +#' @param age_predictions_all_settings +#' @return +#' @author njtierney +#' @importFrom ggplot2 ggplot aes geom_tile facet_grid coord_fixed scale_fill_viridis_c theme_minimal facet_wrap +#' @export +gg_age_terms_settings <- function(age_predictions_all_settings) { + + pred_all_setting_longer <- age_predictions_all_settings |> + tidyr::pivot_longer( + dplyr::starts_with("pred"), + names_to = "pred", + values_to = "value", + names_prefix = "pred_" + ) |> + dplyr::select(age_from, + age_to, + value, + pred, + setting) + + facet_age_plot <- function(data, place){ + data |> + dplyr::filter(setting == place) |> + ggplot(aes(x = age_from, + y = age_to, + fill = value)) + + geom_tile() + + facet_grid(setting~pred, + switch = "y") + + coord_fixed() + + } + + p_home <- facet_age_plot(pred_all_setting_longer, "home") + + scale_fill_viridis_c() + p_work <- facet_age_plot(pred_all_setting_longer, "work") + + scale_fill_viridis_c(option = "rocket") + p_school <- facet_age_plot(pred_all_setting_longer, "school") + + scale_fill_viridis_c(option = "plasma") + p_other <- facet_age_plot(pred_all_setting_longer, "other") + + scale_fill_viridis_c(option = "mako") + + patchwork::wrap_plots( + p_home, + p_work, + p_school, + p_other, + nrow = 4 + ) + +} + +#' .. content for \description{} (no empty lines) .. +#' +#' .. content for \details{} .. +#' +#' @title +#' @param age_predictions +#' @return +#' @author njtierney +#' @export +pivot_longer_age_preds <- function(age_predictions) { + age_predictions |> + tidyr::pivot_longer( + dplyr::starts_with("pred"), + names_to = "pred", + values_to = "value", + names_prefix = "pred_" + ) +} + +#' .. content for \description{} (no empty lines) .. +#' +#' .. content for \details{} .. +#' +#' @title +#' @param age_predictions_long +#' @return +#' @author njtierney +#' @export +gg_age_partial_pred_long <- function(age_predictions_long) { + + facet_names <- data.frame( + pred = c("diag_prod", "diag_sum", "offdiag", "offdiag_2", "pmax", "pmin"), + math_name = c("i x j", "i + j", "|i - j|", "|i - j|²", "max(i, j)", "min(i, j)") + ) + + age_predictions_long %>% + dplyr::left_join(facet_names, by = dplyr::join_by("pred")) %>% + ggplot( + aes( + x = age_from, + y = age_to, + group = math_name, + fill = value + ) + ) + + facet_wrap(~math_name, ncol = 3) + + geom_tile() + + scale_fill_viridis_c( + name = "log\ncontacts" + ) + + theme_minimal() + +} + +#' .. content for \description{} (no empty lines) .. +#' +#' .. content for \details{} .. +#' +#' @title +#' @param age_predictions_long +#' @return +#' @author njtierney +#' @export +add_age_partial_sum <- function(age_predictions_long) { + + age_partial_sum <- age_predictions_long |> + dplyr::group_by(age_from, + age_to) |> + dplyr::summarise( + gam_total_term = exp(sum(value)), + .groups = "drop" + ) + + age_partial_sum + +} + +#' .. content for \description{} (no empty lines) .. +#' +#' .. content for \details{} .. +#' +#' @title +#' @param age_predictions_long_sum +#' @return +#' @author njtierney +#' @export +gg_age_partial_sum <- function(age_predictions_long_sum) { + + ggplot( + data = age_predictions_long_sum, + aes( + x = age_from, + y = age_to, + fill = gam_total_term + ) + ) + + geom_tile() + + scale_fill_viridis_c( + name = "Num.\ncontacts", + option = "magma", + limits = c(0, 12) + ) + + theme_minimal() + +} + diff --git a/R/per_capita_household_size.R b/R/per_capita_household_size.R index 398e9049..c0ab22eb 100644 --- a/R/per_capita_household_size.R +++ b/R/per_capita_household_size.R @@ -1,33 +1,33 @@ #' @title Get per capita household size with household size distribution -#' -#' @description Returns the per capita household size for a location given -#' its household size distribution. See [get_household_size_distribution()] +#' +#' @description Returns the per capita household size for a location given +#' its household size distribution. See [get_abs_household_size_distribution()] #' function for retrieving household size distributions for a given place. -#' +#' #' @param household_data data set with information on the household size -#' distribution of specific state or LGA. +#' distribution of specific state or LGA. #' @param household_size_col bare variable name of the column depicting the #' household size. Default is 'household_size' from -#' [get_household_size_distribution()]. +#' [get_abs_per_capita_household_size_lga()]. #' @param n_people_col bare variable name of the column depicting the total #' number of people belonging to the respective household size. Default is -#' 'n_people' from [get_household_size_distribution()]. -#' @return Numeric of length 1 - the per capita household size for a given +#' 'n_people' from [get_abs_per_capita_household_size_lga()]. +#' @return Numeric of length 1 - the per capita household size for a given #' state or LGA. #' @author Nick Golding #' @export -#' @examples -#' demo_data <- get_household_size_distribution(lga = "Fairfield (C)") +#' @examples +#' demo_data <- get_abs_household_size_population(lga = "Fairfield (C)") #' demo_data -#' per_capita_household_size(household_data=demo_data, -#' household_size_col=household_size, -#' n_people_col=n_people) -#' -#' +#' per_capita_household_size( +#' household_data = demo_data, +#' household_size_col = household_size, +#' n_people_col = n_people +#' ) +#' per_capita_household_size <- function(household_data, - household_size_col=household_size, - n_people_col=n_people) { - + household_size_col = household_size, + n_people_col = n_people) { # aggregate and average household sizes household_data %>% dplyr::group_by( @@ -49,5 +49,4 @@ per_capita_household_size <- function(household_data, dplyr::pull( per_capita_household_size ) - } diff --git a/R/plot_matrix.R b/R/plot_matrix.R index af29f091..0b4c66d4 100644 --- a/R/plot_matrix.R +++ b/R/plot_matrix.R @@ -1,53 +1,54 @@ #' Visualise predicted contact matrix -#' -#' Visualising the predicted contact rates facilitates understanding the -#' underlying patterns and relationships between age groups in different +#' +#' Visualising the predicted contact rates facilitates understanding the +#' underlying patterns and relationships between age groups in different #' settings (workplace, home, school, other). The `plot_matrix()` function -#' takes a contact matrix and visualises it, with the x and y axes being +#' takes a contact matrix and visualises it, with the x and y axes being #' different age groups. -#' +#' #' @param matrix Square matrix with row and column names indicating the age #' groups #' @return a ggplot visualisation of contact rates #' @examples #' \dontrun{ -#' -#' set.seed(2021-09-24) +#' +#' set.seed(2021 - 09 - 24) #' polymod_contact_data <- get_polymod_setting_data() #' polymod_survey_data <- get_polymod_population() -#' +#' #' setting_models <- fit_setting_contacts( #' contact_data_list = polymod_contact_data, #' population = polymod_survey_data #' ) -#' -#' fairfield_age_pop <- abs_age_lga("Fairfield (C)") -#' -#' fairfield_age_pop -#' +#' +#' fairfield <- abs_age_lga("Fairfield (C)") +#' +#' fairfield +#' #' synthetic_settings_5y_fairfield <- predict_setting_contacts( -#' population = fairfield_age_pop, +#' population = fairfield, #' contact_model = setting_models, #' age_breaks = c(seq(0, 85, by = 5), Inf) #' ) -#' -#' # Visualise the projected contact rates for "home" in the Fairfield LGA. The -#' # strong diagonal shows us that similar age groups typically have higher -#' # rates of home contact. We can also see parental links include middle-aged -#' # age groups having observable contact rates with younger age groups. -#' # For instance, ages between 25 and 45 have higher contact rates with -#' # newborns (0 to 5) and children (5 to 10), but older age groups, -#' # including those over 55, tend to have higher contact rates with age -#' # groups between 25 and 45, showing the interactions between parents and -#' # children. In addition to this, other patterns that indicates contact -#' # between children and their grandparents where higher contact rates between -#' # young children (years 0-5) and grandparents (age 55+) are present. -#' -#' +#' +#' # Visualise the projected contact rates for "home" in the Fairfield LGA. The +#' # strong diagonal shows us that similar age groups typically have higher +#' # rates of home contact. We can also see parental links include middle-aged +#' # age groups having observable contact rates with younger age groups. +#' # For instance, ages between 25 and 45 have higher contact rates with +#' # newborns (0 to 5) and children (5 to 10), but older age groups, +#' # including those over 55, tend to have higher contact rates with age +#' # groups between 25 and 45, showing the interactions between parents and +#' # children. In addition to this, other patterns that indicates contact +#' # between children and their grandparents where higher contact rates between +#' # young children (years 0-5) and grandparents (age 55+) are present. +#' +#' #' plot_matrix(synthetic_settings_5y_fairfield$home) #' } -#' -#' @export +#' +#' @noRd +#' @keywords internal plot_matrix <- function(matrix) { matrix %>% matrix_to_predictions() %>% diff --git a/R/plot_setting_matrices.R b/R/plot_setting_matrices.R index a63fa93d..8ef14c37 100644 --- a/R/plot_setting_matrices.R +++ b/R/plot_setting_matrices.R @@ -1,47 +1,47 @@ #' Visualise predicted contact matrix for each setting -#' +#' #' This is an extension of [plot_matrix()], which visualises the contact #' matrix for each setting. It uses `patchwork` to combine all the matrices #' together -#' -#' @param matrices A list of square matrices, with row and column names +#' +#' @param matrices A list of square matrices, with row and column names #' indicating the age groups. -#' @param title Title to give to plot setting matrices. Default value is: +#' @param title Title to give to plot setting matrices. Default value is: #' "Setting-specific synthetic contact matrices (all polymod data)'" #' @return ggplot visualisation of contact rates for each setting #' @examples #' \dontrun{ -#' -#' set.seed(2021-09-24) +#' +#' set.seed(2021 - 09 - 24) #' polymod_contact_data <- get_polymod_setting_data() #' polymod_survey_data <- get_polymod_population() -#' +#' #' setting_models <- fit_setting_contacts( #' contact_data_list = polymod_contact_data, #' population = polymod_survey_data #' ) -#' -#' fairfield_age_pop <- abs_age_lga("Fairfield (C)") -#' -#' fairfield_age_pop -#' +#' +#' fairfield <- abs_age_lga("Fairfield (C)") +#' +#' fairfield +#' #' synthetic_settings_5y_fairfield <- predict_setting_contacts( -#' population = fairfield_age_pop, +#' population = fairfield, #' contact_model = setting_models, #' age_breaks = c(seq(0, 85, by = 5), Inf) #' ) -#' +#' #' plot_setting_matrix(synthetic_settings_5y_fairfield) #' } -#' @export +#' @noRd +#' @keywords internal plot_setting_matrices <- function(matrices, title = "Setting-specific synthetic contact matrices") { - plot_home <- plot_matrix(matrices$home) + ggplot2::ggtitle("home") - plot_school <- plot_matrix(matrices$school) + ggplot2::ggtitle("school") + plot_school <- plot_matrix(matrices$school) + ggplot2::ggtitle("school") plot_work <- plot_matrix(matrices$work) + ggplot2::ggtitle("work") plot_other <- plot_matrix(matrices$other) + ggplot2::ggtitle("other") - + patchwork::wrap_plots( plot_home, plot_school, diff --git a/R/polymod-data.R b/R/polymod-data.R index d751815d..d3d2240b 100644 --- a/R/polymod-data.R +++ b/R/polymod-data.R @@ -1,6 +1,30 @@ -#' Polymod data +#' Social contact data from 8 European countries (imported from `socialmixr`) +#' +#' A dataset containing social mixing diary data from 8 European countries: +#' Belgium, Germany, Finland, Great Britain, Italy, Luxembourg, +#' The Netherlands and Poland. +#' +#' This data has been sourced from the [socialmixr](https://CRAN.R-project.org/package=socialmixr) package. +#' +#' The Data are fully described in Mossong J, Hens N, Jit M, Beutels P, Auranen +#' K, Mikolajczyk R, et al. (2008) Social Contacts and Mixing Patterns Relevant +#' to the Spread of Infectious Diseases. PLoS Med 5(3): e74. +#' +#' @format A list of two data frames: +#' \describe{ +#' \item{participants}{the study participant, with age, country, year and day +#' of the week (starting with 1 = Monday)} +#' \item{contacts}{reported contacts of the study participants. The variable +#' phys_contact has two levels (1 denotes physical contact while 2 denotes +#' non-physical contact), duration_multi has five levels (1 is less than 5 +#' minutes while 5 is more than 4 hours, increasing in the order found in +#' Figure 1 in Mossong et al.), and frequency_multi has five levels (1 is +#' daily, 2 is weekly, 3 is monthly, 4 is less often, and 5 is first time)} +#' All other variables are described on the Zenodo repository of the data, +#' available at \doi{10.5281/zenodo.1043437} +#' } +#' @source \doi{10.1371/journal.pmed.0050074} #' -#' Imported from `socialmixr` #' @name polymod #' @export "polymod" diff --git a/R/predict_contacts.R b/R/predict_contacts.R index 0a4f538f..5f8a7122 100644 --- a/R/predict_contacts.R +++ b/R/predict_contacts.R @@ -1,74 +1,75 @@ #' @title Predict contact rate between two age populations, given some model. -#' -#' @description Predicts the expected contact rate over specified age breaks, -#' given some model of contact rate and population age structure. -#' This function is used internally in [predict_setting_contacts()], which -#' performs this prediction across all settings (home, work, school, other), -#' and optionally performs an adjustment for per capita household size. You -#' can use `predict_contacts()` by itself, just be aware you will need to -#' separately apply a per capita household size adjustment if required. See +#' +#' @description Predicts the expected contact rate over specified age breaks, +#' given some model of contact rate and population age structure. +#' This function is used internally in [predict_setting_contacts()], which +#' performs this prediction across all settings (home, work, school, other), +#' and optionally performs an adjustment for per capita household size. You +#' can use `predict_contacts()` by itself, just be aware you will need to +#' separately apply a per capita household size adjustment if required. See #' details below on `adjust_household_contact_matrix` for more information. -#' -#' @details The population data is used to determine age range to predict -#' contact rates, and removes ages with zero population, so we do not -#' make predictions for ages with zero populations. Contact rates are -#' predicted yearly between the age groups, using [predict_contacts_1y()], -#' then aggregates these predicted contacts using -#' [aggregate_predicted_contacts()], which aggregates the predictions back to -#' the same resolution as the data, appropriately weighting the contact rate +#' +#' @details The population data is used to determine age range to predict +#' contact rates, and removes ages with zero population, so we do not +#' make predictions for ages with zero populations. Contact rates are +#' predicted yearly between the age groups, using [predict_contacts_1y()], +#' then aggregates these predicted contacts using +#' [aggregate_predicted_contacts()], which aggregates the predictions back to +#' the same resolution as the data, appropriately weighting the contact rate #' by the population. -#' -#' Regarding the `adjust_household_contact_matrix` function, we use +#' +#' Regarding the `adjust_household_contact_matrix` function, we use #' Per-capita household size instead of mean household size. -#' Per-capita household size is different to mean household size, as the -#' household size averaged over people in the **population**, not over -#' households, so larger households get upweighted. It is calculated by -#' taking a distribution of the number of households of each size in a -#' population, multiplying the size by the household by the household count -#' to get the number of people with that size of household, and computing -#' the population-weighted average of household sizes. We use per-capita -#' household size as it is a more accurate reflection of the average -#' number of household members a person in the population can have contact +#' Per-capita household size is different to mean household size, as the +#' household size averaged over people in the **population**, not over +#' households, so larger households get upweighted. It is calculated by +#' taking a distribution of the number of households of each size in a +#' population, multiplying the size by the household by the household count +#' to get the number of people with that size of household, and computing +#' the population-weighted average of household sizes. We use per-capita +#' household size as it is a more accurate reflection of the average +#' number of household members a person in the population can have contact #' with. #' #' @param model A single fitted model of contact rate (e.g., -#' [fit_single_contact_model()]) -#' @param population a dataframe of age population information, with columns +#' [fit_single_contact_model()]) +#' @param population a dataframe of age population information, with columns #' indicating some lower age limit, and population, (e.g., [get_polymod_population()]) -#' @param age_breaks the ages to predict to. By default, the age breaks are +#' @param age_breaks the ages to predict to. By default, the age breaks are #' 0-75 in 5 year groups. -#' @return A dataframe with three columns: `age_group_from`, `age_group_to`, -#' and `contacts`. The age groups are factors, broken up into 5 year bins -#' `[0,5)`, `[5,10)`. The `contact` column is the predicted number of +#' @return A dataframe with three columns: `age_group_from`, `age_group_to`, +#' and `contacts`. The age groups are factors, broken up into 5 year bins +#' `[0,5)`, `[5,10)`. The `contact` column is the predicted number of #' contacts from the specified age group to the other one. #' @examples -#' # If we have a model of contact rate at home, and age population structure -#' # for an LGA, say, Fairfield, in NSW: -#' -#' polymod_setting_models$home -#' -#' fairfield_abs_data <- abs_age_lga("Fairfield (C)") -#' -#' fairfield_abs_data -#' -#' # We can predict the contact rate for Fairfield from the existing contact -#' # data, say, between the age groups of 0-15 in 5 year bins for school: -#' -#' fairfield_school_contacts <- predict_contacts( -#' model = polymod_setting_models$school, -#' population = fairfield_abs_data, -#' age_breaks = c(0, 5, 10, 15,Inf) -#' ) -#' -#' fairfield_school_contacts -#' +#' # If we have a model of contact rate at home, and age population structure +#' # for an LGA, say, Fairfield, in NSW: +#' +#' polymod_setting_models$home +#' +#' fairfield <- abs_age_lga("Fairfield (C)") +#' +#' fairfield +#' +#' # We can predict the contact rate for Fairfield from the existing contact +#' # data, say, between the age groups of 0-15 in 5 year bins for school: +#' +#' fairfield_school_contacts <- predict_contacts( +#' model = polymod_setting_models$school, +#' population = fairfield, +#' age_breaks = c(0, 5, 10, 15, Inf) +#' ) +#' +#' fairfield_school_contacts +#' #' @export predict_contacts <- function(model, population, age_breaks = c(seq(0, 75, by = 5), Inf)) { - # NOTE - need to remove / upgrade fragile use of `lower.age.limit` + age <- age(population) + age_var <- age_label(population) population <- population %>% - dplyr::arrange(lower.age.limit) + dplyr::arrange(!!age) # get the age ranges plus the final age bin (which is the bin width) the_age_ranges <- age_ranges(population$lower.age.limit) @@ -82,7 +83,7 @@ predict_contacts <- function(model, valid <- pop_fun(ages) > 0 age_min_integration <- min(ages[valid]) age_max_integration <- max(ages[valid]) - + pred_1y <- predict_contacts_1y( model = model, population = population, @@ -98,5 +99,8 @@ predict_contacts <- function(model, age_breaks = age_breaks ) - pred_groups + new_predicted_contacts( + pred_groups, + age_breaks = age_breaks + ) } diff --git a/R/predict_contacts_1y.R b/R/predict_contacts_1y.R index 2692d18f..f71f6779 100644 --- a/R/predict_contacts_1y.R +++ b/R/predict_contacts_1y.R @@ -37,7 +37,7 @@ #' #' fairfield_contacts_1 <- predict_contacts_1y( #' model = polymod_setting_models$home, -#' population = fairfield_abs_data, +#' population = fairfield, #' age_min = 0, #' age_max = 2 #' ) diff --git a/R/predict_setting_contacts.R b/R/predict_setting_contacts.R index a45a9d99..bc54045b 100644 --- a/R/predict_setting_contacts.R +++ b/R/predict_setting_contacts.R @@ -1,128 +1,115 @@ -#' @title Predict contact rate between age groups across all settings -#' -#' @description Predicts the expected contact rate across all settings ("home", -#' "school", "work", and "other") over specified age breaks, -#' given some model of contact rate and population age structure. Optionally -#' performs an adjustment for per capita household size. See "details" for more -#' information. -#' -#' @details The population data is used to determine age range to predict -#' contact rates, and removes ages with zero population, so we do not -#' make predictions for ages with zero populations. Contact rates are -#' predicted yearly between the age groups, using [predict_contacts_1y()], -#' then aggregates these predicted contacts using -#' [aggregate_predicted_contacts()], which aggregates the predictions back to -#' the same resolution as the data, appropriately weighting the contact rate -#' by the population. Predictions are converted to matrix format using -#' [predictions_to_matrix()] to produce contact matrices for all age groups -#' combinations across different settings or location of contact. -#' -#' We use Per-capita household size instead of mean household size. -#' Per-capita household size is different to mean household size, as the -#' household size averaged over **people** in the population, not over -#' households, so larger households get upweighted. It is calculated by -#' taking a distribution of the number of households of each size in a -#' population, multiplying the size by the household by the household count -#' to get the number of people with that size of household, and computing -#' the population-weighted average of household sizes. We use per-capita -#' household size as it is a more accurate reflection of the average -#' number of household members a person in the population can have contact -#' with. -#' -#' @param population a dataframe of age population information, with columns -#' indicating some lower age limit, and population, (e.g., [get_polymod_population()]) -#' @param contact_model A list of GAM models for each setting. See example -#' output from `fit_setting_contact` below -#' @param age_breaks A vector of age breaks. -#' @param per_capita_household_size Optional (defaults to NULL). When set, it -#' adjusts the household contact matrix by some per capita household size. + +#' Predict setting contacts +#' +#' Predict contact rate for each setting. Note that this function is +#' parallelisable with `future`, and will be impacted by any `future` plans +#' provided. +#' +#' @param population population +#' @param contact_model contact_model +#' @param age_breaks age_breaks +#' @param per_capita_household_size Optional (defaults to NULL). When set, it +#' adjusts the household contact matrix by some per capita household size. #' To set it, provide a single number, the per capita household size. More -#' information is provided below in Details. See -#' [get_per_capita_household_size()] function for a helper for Australian +#' information is provided below in Details. See +#' [get_abs_per_capita_household_size()] function for a helper for Australian #' data with a workflow on how to get this number. #' @param model_per_capita_household_size modelled per capita household size. -#' Default values for this are from +#' Default values for this are from #' [get_polymod_per_capita_household_size()], which ends up being 3.248971 #' -#' -#' @return List of contact rate of matrices for each setting: ("home", "work", -#' "school", "other"). -#' +#' @details We use Per-capita household size instead of mean household size. +#' Per-capita household size is different to mean household size, as the +#' household size averaged over **people** in the population, not over +#' households, so larger households get upweighted. It is calculated by +#' taking a distribution of the number of households of each size in a +#' population, multiplying the size by the household by the household count +#' to get the number of people with that size of household, and computing +#' the population-weighted average of household sizes. We use per-capita +#' household size as it is a more accurate reflection of the average +#' number of household members a person in the population can have contact +#' with. +#' +#' @return List of setting matrices #' @author Nicholas Tierney -#' @examples +#' @export +#' @examples #' # don't run as it takes too long to fit #' \dontrun{ -#' fairfield_age_pop <- abs_age_lga("Fairfield (C)") -#' fairfield_age_pop -#' +#' fairfield <- abs_age_lga("Fairfield (C)") +#' fairfield +#' +#' age_break_0_85_plus <- c(seq(0, 85, by = 5), Inf) +#' #' polymod_contact_data <- get_polymod_setting_data() #' polymod_survey_data <- get_polymod_population() -#' +#' #' setting_models <- fit_setting_contacts( #' contact_data_list = polymod_contact_data, #' population = polymod_survey_data #' ) -#' +#' #' synthetic_settings_5y_fairfield <- predict_setting_contacts( -#' population = fairfield_age_pop, +#' population = fairfield, #' contact_model = setting_models, -#' age_breaks = c(seq(0, 85, by = 5), Inf) +#' age_breaks = age_break_0_85_plus #' ) -#' -#' fairfield_hh_size <- get_per_capita_household_size(lga = "Fairfield (C)") +#' +#' fairfield_hh_size <- get_abs_per_capita_household_size(lga = "Fairfield (C)") #' fairfield_hh_size -#' +#' #' synthetic_settings_5y_fairfield_hh <- predict_setting_contacts( -#' population = fairfield_age_pop, +#' population = fairfield, #' contact_model = setting_models, -#' age_breaks = c(seq(0, 85, by = 5), Inf), +#' age_breaks = age_break_0_85_plus, #' per_capita_household_size = fairfield_hh_size #' ) #' } #' @export -predict_setting_contacts <- function(population, - contact_model, +predict_setting_contacts <- function(population, + contact_model, age_breaks, per_capita_household_size = NULL, model_per_capita_household_size = get_polymod_per_capita_household_size()) { - - setting_predictions <- purrr::map( + setting_predictions <- furrr::future_map( .x = contact_model, .f = predict_contacts, population = population, age_breaks = age_breaks - # .options = furrr::furrr_options(seed = TRUE) + .options = furrr::furrr_options(seed = TRUE) ) - + setting_matrices <- furrr::future_map( .x = setting_predictions, .f = predictions_to_matrix, .options = furrr::furrr_options(seed = TRUE) ) - + combination <- Reduce("+", setting_matrices) setting_matrices$all <- combination - + setting_matrices$all <- new_age_matrix( + matrix = setting_matrices$all, + age_breaks = age_breaks + ) + # if we haven't set anything for the per capita household size, return this # adjusted matrix - if (is.null(per_capita_household_size)) { - return(setting_matrices) - - # otherwise we want to adjust the household contact matrix (which also - # updates the "all" setting matrix). - } else if (!is.null(per_capita_household_size)){ + # otherwise we want to adjust the household contact matrix (which also + # updates the "all" setting matrix). + if (!is.null(per_capita_household_size)) { setting_matrices <- adjust_household_contact_matrix( setting_matrices = setting_matrices, # how do we choose this household size? per_capita_household_size = per_capita_household_size, # extra arguments to adjust_household_contact_matrix model_per_capita_household_size = model_per_capita_household_size - ) - return( - setting_matrices ) - } + setting_matrices <- new_setting_prediction_matrix(setting_matrices, + age_breaks = age_breaks + ) + + return(setting_matrices) } diff --git a/R/predictions_to_matrix.R b/R/predictions_to_matrix.R index 1c62cc00..fdd29f45 100644 --- a/R/predictions_to_matrix.R +++ b/R/predictions_to_matrix.R @@ -1,34 +1,40 @@ #' @title Convert dataframe of predicted contacts into matrix -#' +#' #' @description Helper function to convert predictions of contact rates in data -#' frames to matrix format with the survey participant age groups as columns +#' frames to matrix format with the survey participant age groups as columns #' and contact age groups as rows. -#' -#' @param contact_predictions data frame with columns `age_group_from`, +#' +#' @param contact_predictions data frame with columns `age_group_from`, #' `age_group_to`, and `contacts`. -#' -#' @return Square matrix with the unique age groups from `age_group_from/to` +#' @param ... extra arguments +#' +#' @return Square matrix with the unique age groups from `age_group_from/to` #' in the rows and columns and `contacts` as the values. -#' +#' #' @examples -#' fairfield_abs_data <- abs_age_lga("Fairfield (C)") -#' -#' # We can convert the predictions into a matrix -#' -#' fairfield_school_contacts <- predict_contacts( -#' model = polymod_setting_models$school, -#' population = fairfield_abs_data, -#' age_breaks = c(0, 5, 10, 15,Inf) -#' ) -#' -#' fairfield_school_contacts -#' -#' # convert them back to a matrix -#' predictions_to_matrix(fairfield_school_contacts) -#' +#' fairfield <- abs_age_lga("Fairfield (C)") +#' +#' # We can convert the predictions into a matrix +#' +#' fairfield_school_contacts <- predict_contacts( +#' model = polymod_setting_models$school, +#' population = fairfield, +#' age_breaks = c(0, 5, 10, 15, Inf) +#' ) +#' +#' fairfield_school_contacts +#' +#' # convert them back to a matrix +#' predictions_to_matrix(fairfield_school_contacts) +#' #' @export -predictions_to_matrix <- function(contact_predictions) { - contact_predictions %>% +predictions_to_matrix <- function(contact_predictions, ...) { + UseMethod("predictions_to_matrix") +} + +#' @export +predictions_to_matrix.predicted_contacts <- function(contact_predictions, ...) { + prediction_matrix <- contact_predictions %>% tidyr::pivot_wider( names_from = age_group_from, values_from = contacts @@ -36,5 +42,8 @@ predictions_to_matrix <- function(contact_predictions) { tibble::column_to_rownames( "age_group_to" ) %>% - as.matrix() + as.matrix() %>% + new_age_matrix(age_breaks = age_breaks(contact_predictions)) + + prediction_matrix } diff --git a/R/setting-prediction-matrix.R b/R/setting-prediction-matrix.R new file mode 100644 index 00000000..80bb46c5 --- /dev/null +++ b/R/setting-prediction-matrix.R @@ -0,0 +1,139 @@ +new_setting_prediction_matrix <- function(list_matrix, + age_breaks = NULL) { + structure( + list_matrix, + age_breaks = age_breaks, + class = c("conmat_setting_prediction_matrix", class(list_matrix)) + ) +} + +#' Create a setting prediction matrix +#' +#' Helper function to create your own setting prediction matrix, which you +#' may want to use in `generate_ngm`, or `autoplot`. This class is the +#' output of functions like `extrapolate_polymod`, and +#' `predict_setting_contacts`. We recommend using this function is only for +#' advanced users, who are creating their own setting prediction matrix. +#' +#' @param ... list of matrices +#' @param age_breaks age breaks - numeric +#' +#' @return setting prediction matrix +#' +#' @examples +#' +#' age_breaks_0_80_plus <- c(seq(0, 80, by = 10), Inf) +#' one_by_nine <- matrix(1, nrow = 9, ncol = 9) +#' +#' x_example <- setting_prediction_matrix( +#' home = one_by_nine, +#' work = one_by_nine, +#' age_breaks = age_breaks_0_80_plus +#' ) +#' +#' x_example <- setting_prediction_matrix( +#' one_by_nine, +#' one_by_nine, +#' age_breaks = age_breaks_0_80_plus +#' ) +#' +#' x_example +#' +#' @export +setting_prediction_matrix <- function(..., + age_breaks) { + list_matrix <- prepare_list_matrix(...) + + setting_pred_matrix <- set_age_breaks_matrices( + list_matrix, + age_breaks + ) + + # add add setting only if it doesn't exist + setting_pred_matrix <- add_all_setting(setting_pred_matrix) + + new_setting_prediction_matrix( + list_matrix = setting_pred_matrix, + age_breaks = age_breaks + ) +} + +#' Coerce object to a setting prediction matrix +#' +#' This will also calculate an `all` matrix, if `all` is not specified. This +#' is the sum of all other matrices. +#' +#' @param list_matrix list of matrices +#' @param age_breaks numeric vector of ages +#' @param ... extra arguments (currently not used) +#' +#' @return object of class setting prediction matrix +#' +#' @examples +#' +#' age_breaks_0_80_plus <- c(seq(0, 80, by = 10), Inf) +#' one_by_nine <- matrix(1, nrow = 9, ncol = 9) +#' +#' mat_list <- list( +#' home = one_by_nine, +#' work = one_by_nine +#' ) +#' +#' mat_list +#' +#' mat_set <- as_setting_prediction_matrix( +#' mat_list, +#' age_breaks = age_breaks_0_80_plus +#' ) +#' +#' mat_set +#' +#' @export +as_setting_prediction_matrix <- function(list_matrix, + age_breaks, + ...) { + UseMethod("as_setting_prediction_matrix") +} + +#' @export +as_setting_prediction_matrix.default <- function(list_matrix, + age_breaks, + ...) { + cli::cli_abort( + "{.code as_setting_prediction_matrix} method not implemented for {.cls \\ + {class(list_matrix)}}" + ) +} + +#' @export +as_setting_prediction_matrix.conmat_setting_prediction_matrix <- function(list_matrix, + age_breaks, + ...) { + cli::cli_warn( + "{.code as_setting_prediction_matrix} not used as this object is alreadt of + a {.cls conmat_setting_prediction_matrix} method not implemented for \\ + {.cls {class(list_matrix)}}." + ) +} + +#' @export +as_setting_prediction_matrix.list <- function(list_matrix, + age_breaks, + ...) { + check_if_all_matrix(list_matrix) + # do something if list_matrix doesn't have any names + list_matrix <- repair_list_matrix_names(list_matrix) + + setting_pred_matrix <- set_age_breaks_matrices( + list_matrix, + age_breaks + ) + + # add add setting only if it doesn't exist + setting_pred_matrix <- add_all_setting(setting_pred_matrix) + + new_setting_prediction_matrix( + list_matrix = setting_pred_matrix, + age_breaks = age_breaks + ) +} diff --git a/R/setting-transmission-matrix.R b/R/setting-transmission-matrix.R new file mode 100644 index 00000000..4245c4c8 --- /dev/null +++ b/R/setting-transmission-matrix.R @@ -0,0 +1,56 @@ +new_transmission_probability_matrix <- function(list_matrix, + age_breaks = NULL) { + structure( + list_matrix, + age_breaks = age_breaks, + class = c("transmission_probability_matrix", class(list_matrix)) + ) +} + +#' Create a setting transmission matrix +#' +#' Helper function to create your own setting transmission matrix, which you +#' may want to use in ... or `autoplot`. This class is the +#' output of functions like `...`, and ... . We recommend using this +#' function is only for advanced users, who are creating their own +#' transmission probability matrix. +#' +#' @param ... list of matrices +#' @param age_breaks age breaks - numeric +#' +#' @return transmission probability matrix +#' +#' @examples +#' +#' age_breaks_0_80_plus <- c(seq(0, 80, by = 10), Inf) +#' one_05 <- matrix(0.05, nrow = 9, ncol = 9) +#' +#' x_example <- transmission_probability_matrix( +#' home = one_05, +#' work = one_05, +#' age_breaks = age_breaks_0_80_plus +#' ) +#' +#' x_example <- transmission_probability_matrix( +#' one_05, +#' one_05, +#' age_breaks = age_breaks_0_80_plus +#' ) +#' +#' x_example +#' +#' @export +transmission_probability_matrix <- function(..., + age_breaks) { + list_matrix <- prepare_list_matrix(...) + + setting_transmission_mat <- set_age_breaks_matrices( + list_matrix, + age_breaks + ) + + new_transmission_probability_matrix( + list_matrix = setting_transmission_mat, + age_breaks = age_breaks + ) +} diff --git a/R/unabbreviate_states.R b/R/unabbreviate_states.R deleted file mode 100644 index d85ba39e..00000000 --- a/R/unabbreviate_states.R +++ /dev/null @@ -1,23 +0,0 @@ -#' Un-abbreviate Australian state names -#' -#' @param state_names vector of state names in short form -#' -#' @return Longer state names -#' @seealso [abbreviate_states()] -#' -#' @examples -#' unabbreviate_states("VIC") -#' unabbreviate_states(c("VIC", "QLD")) -#' @export -unabbreviate_states <- function(state_names) { - dplyr::case_when( - state_names %in% c("Australian Capital Territory", "ACT") ~ "Australian Capital Territory", - state_names %in% c("New South Wales", "NSW") ~ "New South Wales", - state_names %in% c("Northern Territory", "NT") ~ "Northern Territory", - state_names %in% c("Queensland", "QLD") ~ "Queensland", - state_names %in% c("South Australia", "SA") ~ "South Australia", - state_names %in% c("Tasmania", "TAS") ~ "Tasmania", - state_names %in% c("Victoria", "VIC") ~ "Victoria", - state_names %in% c("Western Australia", "WA") ~ "Western Australia" - ) -} diff --git a/R/utils.R b/R/utils.R index b65f8542..4d5ddd96 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,7 +1,7 @@ #' Prepare age population data -#' -#' Internally used function within [age_population()] to separate age groups -#' into its lower & upper limit and then filter the data passed to the desired +#' +#' Internally used function within [age_population()] to separate age groups +#' into its lower & upper limit and then filter the data passed to the desired #' year and location. #' #' @param data data.frame @@ -10,55 +10,54 @@ #' @param age_col bare unquoted variable referring to age column #' @param year_col bare unquoted variable referring to year column #' @param year year to filter to. If not specified, gives all years. -#' +#' #' @keywords internal #' -#' @return data frame with lower.age.limit and upper.age.limit and optionally -#' filtered down to specific location or year. +#' @return data frame with `lower.age.limit` and `upper.age.limit` and +#' optionally filtered down to specific location or year. clean_age_population_year <- function(data, - location_col = NULL, - location = NULL, - age_col, - year_col = NULL, - year = NULL) { - - # separate age group into "lower.age.limit" & "upper.age.limit" + location_col = NULL, + location = NULL, + age_col, + year_col = NULL, + year = NULL) { + # separate age group into "lower.age.limit" & "upper.age.limit" age_population_year_df <- separate_age_group(data, {{ age_col }}) - + # filter year only - + if (!is.null(year)) { age_population_year_df <- age_population_year_df %>% dplyr::filter({{ year_col }} %in% {{ year }}) } - + # filter year & location if (!is.null(location)) { age_population_location_df <- age_population_year_df %>% dplyr::filter({{ location_col }} %in% {{ location }}) - + return(age_population_location_df) } else { - return(age_population_year_df) - # return the whole data frame or the df with only year filter + return(age_population_year_df) + # return the whole data frame or the df with only year filter # if year variable is present } } #' Separate age groups -#' +#' #' An internal function used within [clean_age_population_year()] to -#' separate age groups in a data set into two variables, `lower.age.limit`, +#' separate age groups in a data set into two variables, `lower.age.limit`, #' and `upper.age.limit` #' #' @param data data frame #' @param age_col bare unquoted column referring to age column #' @keywords internal -#' @return data frame with two extra columns, `lower.age.limit` and +#' @return data frame with two extra columns, `lower.age.limit` and #' `upper.age.limit` separate_age_group <- function(data, age_col) { result <- data %>% - tibble::as_tibble() %>% + tibble::as_tibble() %>% tidyr::separate( {{ age_col }}, c("lower.age.limit", "upper.age.limit"), @@ -76,7 +75,7 @@ separate_age_group <- function(data, age_col) { } #' @title Return the widths of bins denoted by a sequence of lower bounds -#' @description Return the widths of bins denoted by a sequence of lower +#' @description Return the widths of bins denoted by a sequence of lower #' bounds (assuming the final is the same as the `[penultimate]`). #' @param lower_bound lower bound value - a numeric vector #' @return vector @@ -84,10 +83,281 @@ separate_age_group <- function(data, age_col) { #' @keywords internal #' @noRd bin_widths <- function(lower_bound) { - # return the widths of bins denoted by a sequence of lower bounds (assumming # the final is the same as the [penultimate]) diffs <- diff(lower_bound) c(diffs, diffs[length(diffs)]) - +} + +print_list_dim <- function(x, object_class) { + dim_char <- purrr::map_chr( + x, + ~ paste(scales::comma(dim(.x)), collapse = "x") + ) + names_x <- glue::glue( + "{.strong [names(dim_char)]}: a [dim_char] {.cls [object_class]}", + .open = "[", + .close = "]" + ) + cli::cli_li(names_x) +} + +print_model_info <- function(x, object_class) { + dim_char <- purrr::map_chr( + x, + ~ scales::comma(summary(.x)$n) + ) + names_x <- glue::glue( + "{.strong [names(dim_char)]}: a {.cls [object_class]} model ([dim_char] obs)", + .open = "[", + .close = "]" + ) + cli::cli_li(names_x) +} + +print_setting_info <- function(x, + heading, + description = NULL, + list_print_fun = print_list_dim(x, object_class), + object_class) { + age_breaks <- age_breaks(x) + cli::cli_h1(heading) + cli::cat_line() + cli::cli_text(cli::style_italic(description)) + cli::cat_line() + print_age_breaks(age_breaks) + cli::cat_line() + + list_print_fun + + cli::cli_alert_info( + "Access each {.cls {object_class}} with {.code x$name}" + ) + cli::cli_alert_info("e.g., {.code x${names(x)[1]}}") + return(invisible(x)) +} + +#' @export +print.conmat_age_matrix <- function(x, ...) { + # remove class and attributes in printing as they + # appear at the end of the object when printing + x_copy <- x + attr(x_copy, "age_breaks") <- NULL + print(unclass(x_copy)) + return(invisible(x)) +} + +#' @export +print.conmat_setting_prediction_matrix <- function(x, ...) { + print_setting_info( + x = x, + heading = "Setting Prediction Matrices", + description = "A list of matrices containing the model predicted contact rate between ages in each setting.", + object_class = "matrix" + ) +} + +#' @export +print.ngm_setting_matrix <- function(x, ...) { + print_setting_info( + x = x, + heading = "NGM Setting Matrices", + description = "A list of matrices, each {.cls matrix} containing the number of newly infected individuals for a specified age group.", + object_class = "matrix" + ) +} + +#' @export +print.setting_vaccination_matrix <- function(x, ...) { + print_setting_info( + x = x, + heading = "Vaccination Setting Matrices", + description = "A list of matrices, each {.cls matrix} containing the {.strong adjusted} number of newly infected individuals for age groups. These numbers have been adjusted based on proposed vaccination rates in age groups", + object_class = "matrix" + ) +} + +#' @export +print.transmission_probability_matrix <- function(x, ...) { + print_setting_info( + x = x, + heading = "Transmission Probability Matrices", + description = "A list of matrices, each {.cls matrix} containing the {.strong relative} probability of individuals in a given age group infecting an individual in another age group, for that setting.", + object_class = "matrix" + ) +} + +#' @export +print.setting_contact_model <- function(x, ...) { + print_setting_info( + x = x, + heading = "Fitted Setting Contact Models", + description = "A list of fitted {.cls bam} models for each setting. Each {.cls bam} model predicts the contact rate between ages, for that setting.", + list_print_fun = print_model_info(x, "bam"), + object_class = "bam" + ) +} + +#' @export +print.setting_data <- function(x, ...) { + print_setting_info( + x = x, + heading = "Setting Data", + description = "A list of {.cls data.frame}s containing the number of contacts between ages in each setting.", + object_class = "data.frame" + ) +} + +group_age_breaks <- function(x) { + from <- stats::na.omit(dplyr::lag(x)) + to <- stats::na.omit(dplyr::lead(x)) + glue::glue("[{from},{to})") +} + +ungroup_age_breaks <- function(x) { + strsplit( + x = x, + # remove , and ) and [ + split = ",|\\)|\\[" + ) %>% + unlist() %>% + as.numeric() %>% + stats::na.omit() %>% + unique() %>% + sort() +} +# +# group_age_breaks(1:10, pad_inf = FALSE) +# group_age_breaks(1:10, pad_inf = FALSE) %>% ungroup_age_breaks() +# group_age_breaks(1:10, pad_inf = TRUE) +# group_age_breaks(1:10, pad_inf = TRUE) %>% ungroup_age_breaks() +# +# group_age_breaks(c(1:10, Inf), pad_inf = TRUE) +# group_age_breaks(c(1:10, Inf), pad_inf = TRUE) %>% ungroup_age_breaks() +# +# group_age_breaks(c(1:10, Inf), pad_inf = FALSE) +# group_age_breaks(c(1:10, Inf), pad_inf = FALSE) %>% ungroup_age_breaks() +# +# group_age_breaks(age_breaks_0_80_plus) +# group_age_breaks(1:10, pad_inf = FALSE) +# group_age_breaks(1:9, pad_inf = FALSE) +# group_age_breaks(1:8, pad_inf = FALSE) +# group_age_breaks(1:10) +# group_age_breaks(1:9) +# group_age_breaks(1:8) + +all_matrix <- function(x) { + all(vapply( + X = x, + inherits, + what = "matrix", + FUN.VALUE = logical(1) + )) +} + +name_list <- function(list) { + n_list <- length(list) + names(list) <- english::english(seq_len(n_list)) + list +} + + +repair_list_matrix_names <- function(list_matrix) { + if (is.null(names(list_matrix))) { + list_matrix <- name_list(list_matrix) + } + list_matrix +} + +prepare_list_matrix <- function(...) { + list_matrix <- list(...) + check_if_all_matrix(list_matrix) + + # do something if ... doesn't have any names + repair_list_matrix_names(list_matrix) +} + +add_all_setting <- function(list_matrix) { + if (!("all" %in% names(list_matrix))) { + list_matrix$all <- Reduce("+", list_matrix) + } + list_matrix +} + +set_age_breaks_matrix <- function(matrix, age_breaks) { + dimnames(matrix) <- list( + group_age_breaks(age_breaks), + group_age_breaks(age_breaks) + ) + matrix +} + +set_age_breaks_matrices <- function(list_matrix, age_breaks) { + lapply( + list_matrix, + set_age_breaks_matrix, + age_breaks + ) +} + +# age_breaks(perth_contact_0_75) + +remove_inf <- function(x) { + x_inf <- is.infinite(x) + if (!any(x_inf)) { + return(x) + } else if (any(x_inf)) { + inf_index <- which(x_inf) + return(x[-inf_index]) + } +} + +is_equally_spaced <- function(x) { + double_diff <- remove_inf(x) %>% + diff() %>% + diff() + all(double_diff == 0) +} + +age_interval <- function(x) { + if (is_equally_spaced(x)) { + age_int <- remove_inf(x) %>% + diff() %>% + unique() + } else if (!is_equally_spaced(x)) { + age_int <- remove_inf(x) %>% + diff() %>% + mean() %>% + round(2) + } + age_int +} + +print_age_breaks <- function(age_breaks) { + has_inf <- any(is.infinite(age_breaks)) + n_age_breaks <- length(age_breaks) - 1 + age_range <- range(age_breaks, finite = TRUE) + min_age <- age_range[1] + max_age <- age_range[2] + + equally_spaced <- is_equally_spaced(age_breaks) + year_gap <- age_interval(age_breaks) + + if (has_inf) { + age_info <- glue::glue("There are {n_age_breaks} age breaks, ranging {min_age}-{max_age}+ years, ") + } else if (!has_inf) { + age_info <- glue::glue("There are {n_age_breaks} age breaks, ranging {min_age}-{max_age} years, ") + } + if (equally_spaced) { + age_gap_info <- glue::glue("with a regular {year_gap} year interval") + } else if (!equally_spaced) { + age_gap_info <- glue::glue("with an irregular year interval, (on average, {year_gap} years)") + } + + cli::cli_text( + cli::style_italic( + age_info, + age_gap_info + ) + ) } diff --git a/README.Rmd b/README.Rmd index 5149dbf2..869c5f23 100644 --- a/README.Rmd +++ b/README.Rmd @@ -16,9 +16,8 @@ knitr::opts_chunk$set( # conmat -[![R-CMD-check](https://github.com/njtierney/conmat/workflows/R-CMD-check/badge.svg)](https://github.com/njtierney/conmat/actions) -[![Codecov test coverage](https://codecov.io/gh/njtierney/conmat/branch/master/graph/badge.svg)](https://codecov.io/gh/njtierney/conmat?branch=master) -[![R-CMD-check](https://github.com/njtierney/conmat/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/njtierney/conmat/actions/workflows/R-CMD-check.yaml) +[![R-CMD-check](https://github.com/idem-lab/conmat/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/idem-lab/conmat/actions/workflows/R-CMD-check.yaml) +[![Codecov test coverage](https://codecov.io/gh/idem-lab/conmat/branch/master/graph/badge.svg)](https://codecov.io/gh/idem-lab/conmat?branch=master) The goal of conmat is to make it easy to generate synthetic contact matrices for a given age population. @@ -42,10 +41,10 @@ cmat <- matrix( dimnames = list( name_vec, name_vec - ) + ) ) -diag(cmat) <- c(10,11,13) +diag(cmat) <- c(10, 11, 13) cmat[upper.tri(cmat)] <- 3:5 cmat[lower.tri(cmat)] <- 3:5 @@ -75,14 +74,14 @@ We need methods that allow for flexibly creating synthetic contact matrices for You can install the development version with: ```r -install.packages("conmat", repos = "https://njtierney.r-universe.dev") +install.packages("conmat", repos = "https://idem-lab.r-universe.dev") ``` Or alternatively you can use `remotes` ``` r # install.packages("remotes") -remotes::install_github("njtierney/conmat") +remotes::install_github("idem-lab/conmat") ``` ## Example @@ -108,11 +107,11 @@ polymod_survey_data We can create a model of the contact *rate* with the function `fit_single_contact_model` ```{r fit-polymod} -set.seed(2022-09-06) +set.seed(2022 - 09 - 06) contact_model <- fit_single_contact_model( contact_data = polymod_contact_data, population = polymod_survey_data - ) +) ``` This fits a generalised additive model (GAM), predicting the contact rate, based on a series of prediction terms that describe various features of the contact rates. @@ -126,17 +125,17 @@ We can use this contact model to then predict the contact rate in a new populati As a demonstration, let's take an age population from a given LGA in Australia (this was the initial motivation for the package, so there are some helper functions for Australian specific data). ```{r fairfield} -fairfield_age_pop <- abs_age_lga("Fairfield (C)") -fairfield_age_pop +fairfield <- abs_age_lga("Fairfield (C)") +fairfield ``` We can then pass the contact model through to `predict_contacts`, along with the fairfield age population data, and some age breaks that we want to predict to. ```{r predict-contacts} -set.seed(2022-09-06) +set.seed(2022 - 09 - 06) synthetic_contact_fairfield <- predict_contacts( model = contact_model, - population = fairfield_age_pop, + population = fairfield, age_breaks = c(seq(0, 85, by = 5), Inf) ) @@ -145,12 +144,12 @@ synthetic_contact_fairfield ## Plotting -Let's visualise the matrix to get a sense of the predictions with `plot_matrix`. First we need to transform the predictions to a matrix: +Let's visualise the matrix to get a sense of the predictions with `autoplot`. First we need to transform the predictions to a matrix: ```{r plot-matrix-differents} -synthetic_contact_fairfield %>% - predictions_to_matrix() %>% - plot_matrix() +synthetic_contact_fairfield %>% + predictions_to_matrix() %>% + autoplot() ``` ## Applying the model across all settings. @@ -187,5 +186,3 @@ The contact matrices created using this package are transposed when compared to ## Code of Conduct Please note that the conmat project is released with a [Contributor Code of Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. - - diff --git a/README.md b/README.md index 48361f11..9e8d99d0 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,9 @@ -[![R-CMD-check](https://github.com/njtierney/conmat/workflows/R-CMD-check/badge.svg)](https://github.com/njtierney/conmat/actions) +[![R-CMD-check](https://github.com/idem-lab/conmat/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/idem-lab/conmat/actions/workflows/R-CMD-check.yaml) [![Codecov test -coverage](https://codecov.io/gh/njtierney/conmat/branch/master/graph/badge.svg)](https://codecov.io/gh/njtierney/conmat?branch=master) +coverage](https://codecov.io/gh/idem-lab/conmat/branch/master/graph/badge.svg)](https://codecov.io/gh/idem-lab/conmat?branch=master) The goal of conmat is to make it easy to generate synthetic contact @@ -62,14 +62,14 @@ representation of community infection in many regions. You can install the development version with: ``` r -install.packages("conmat", repos = "https://njtierney.r-universe.dev") +install.packages("conmat", repos = "https://idem-lab.r-universe.dev") ``` Or alternatively you can use `remotes` ``` r # install.packages("remotes") -remotes::install_github("njtierney/conmat") +remotes::install_github("idem-lab/conmat") ``` ## Example @@ -98,21 +98,22 @@ the population in that age group. ``` r polymod_survey_data -#> # A tibble: 21 × 2 +#> # A tibble: 21 × 2 (conmat_population) +#> - age: lower.age.limit +#> - population: population #> lower.age.limit population #> -#> 1 0 1841420. -#> 2 5 1950666. -#> 3 10 2122856. -#> 4 15 2323822. -#> 5 20 2406141. -#> 6 25 2377541. -#> 7 30 2552587. -#> 8 35 2982293. -#> 9 40 3044427. -#> 10 45 2828202. -#> # … with 11 more rows -#> # ℹ Use `print(n = ...)` to see more rows +#> 1 0 1898966. +#> 2 5 2017632. +#> 3 10 2192410. +#> 4 15 2369985. +#> 5 20 2467873. +#> 6 25 2484327. +#> 7 30 2649826. +#> 8 35 3043704. +#> 9 40 3117812. +#> 10 45 2879510. +#> # ℹ 11 more rows ``` ## Predicting the contact rate @@ -121,13 +122,11 @@ We can create a model of the contact *rate* with the function `fit_single_contact_model` ``` r -set.seed(2022-09-06) +set.seed(2022 - 09 - 06) contact_model <- fit_single_contact_model( contact_data = polymod_contact_data, population = polymod_survey_data - ) -#> Warning in bgam.fit(G, mf, chunk.size, gp, scale, gamma, method = method, : -#> fitted rates numerically 0 occurred +) ``` This fits a generalised additive model (GAM), predicting the contact @@ -146,9 +145,9 @@ contact_model #> school_probability + work_probability + offset(log_contactable_population) #> #> Estimated degrees of freedom: -#> 1.00 4.26 5.40 6.32 7.90 7.38 total = 35.26 +#> 1.85 3.95 3.82 7.10 7.59 4.77 total = 32.09 #> -#> fREML score: 23815.8 rank: 55/57 +#> fREML score: 24429.55 rank: 55/57 ``` We can use this contact model to then predict the contact rate in a new @@ -159,9 +158,11 @@ Australia (this was the initial motivation for the package, so there are some helper functions for Australian specific data). ``` r -fairfield_age_pop <- abs_age_lga("Fairfield (C)") -fairfield_age_pop -#> # A tibble: 18 × 4 +fairfield <- abs_age_lga("Fairfield (C)") +fairfield +#> # A tibble: 18 × 4 (conmat_population) +#> - age: lower.age.limit +#> - population: population #> lga lower.age.limit year population #> #> 1 Fairfield (C) 0 2020 12261 @@ -189,10 +190,10 @@ with the fairfield age population data, and some age breaks that we want to predict to. ``` r -set.seed(2022-09-06) +set.seed(2022 - 09 - 06) synthetic_contact_fairfield <- predict_contacts( model = contact_model, - population = fairfield_age_pop, + population = fairfield, age_breaks = c(seq(0, 85, by = 5), Inf) ) @@ -200,29 +201,28 @@ synthetic_contact_fairfield #> # A tibble: 324 × 3 #> age_group_from age_group_to contacts #> -#> 1 [0,5) [0,5) 0.00213 -#> 2 [0,5) [5,10) 0.00361 -#> 3 [0,5) [10,15) 0.00292 -#> 4 [0,5) [15,20) 0.00419 -#> 5 [0,5) [20,25) 0.0106 -#> 6 [0,5) [25,30) 0.0216 -#> 7 [0,5) [30,35) 0.0316 -#> 8 [0,5) [35,40) 0.0341 -#> 9 [0,5) [40,45) 0.0334 -#> 10 [0,5) [45,50) 0.0324 -#> # … with 314 more rows -#> # ℹ Use `print(n = ...)` to see more rows +#> 1 [0,5) [0,5) 0.00281 +#> 2 [0,5) [5,10) 0.00318 +#> 3 [0,5) [10,15) 0.00345 +#> 4 [0,5) [15,20) 0.00571 +#> 5 [0,5) [20,25) 0.0133 +#> 6 [0,5) [25,30) 0.0261 +#> 7 [0,5) [30,35) 0.0356 +#> 8 [0,5) [35,40) 0.0372 +#> 9 [0,5) [40,45) 0.0349 +#> 10 [0,5) [45,50) 0.0317 +#> # ℹ 314 more rows ``` ## Plotting Let’s visualise the matrix to get a sense of the predictions with -`plot_matrix`. First we need to transform the predictions to a matrix: +`autoplot`. First we need to transform the predictions to a matrix: ``` r -synthetic_contact_fairfield %>% - predictions_to_matrix() %>% - plot_matrix() +synthetic_contact_fairfield %>% + predictions_to_matrix() %>% + autoplot() ``` @@ -254,7 +254,9 @@ so: ``` r abs_age_lga("Brisbane (C)") -#> # A tibble: 18 × 4 +#> # A tibble: 18 × 4 (conmat_population) +#> - age: lower.age.limit +#> - population: population #> lga lower.age.limit year population #> #> 1 Brisbane (C) 0 2020 72894 @@ -295,16 +297,16 @@ abs_lga_lookup #> 8 NSW 10600 Bellingen (A) #> 9 NSW 10650 Berrigan (A) #> 10 NSW 10750 Blacktown (C) -#> # … with 534 more rows -#> # ℹ Use `print(n = ...)` to see more rows +#> # ℹ 534 more rows ``` Or get the information for states like so: ``` r abs_age_state(state_name = "QLD") -#> # A tibble: 18 × 4 -#> # Groups: year, state [1] +#> # A tibble: 18 × 4 (conmat_population) +#> - age: lower.age.limit +#> - population: population #> year state lower.age.limit population #> #> 1 2020 QLD 0 314602 diff --git a/_pkgdown.yml b/_pkgdown.yml index 5782abfe..58e8027c 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,89 +1,115 @@ -template: - params: - bootswatch: flatly home: - links: - - text: Learn more - href: https://github.com/njtierney/conmat - + title: An R Package For Creating Synthetic Contact Matrices + description: > + Builds synthetic contact matrices using Generalised Additive Models, + the POLYMOD contact survey data, you just provide the population data. + This package also incorporates data that is copyright Commonwealth of + Australia (Australian Electoral Commission and Australian Bureau of + Statistics) 2020. + +authors: + Nick Tierney: + href: https://njtierney.com/ + Nick Golding: + href: https://www.telethonkids.org.au/contact-us/our-people/g/nick-golding/ + Aarathy Babu: + href: https://aarathybabu.netlify.app/ + +template: + bootstrap: 5 + bootswatch: flatly + +development: + mode: auto +# +# navbar: +# left: +# - text: Getting Started +# href: articles/getting-started.html +# - text: Example Pipeline +# href: articles/example-pipeline.Rmd +# - text: Data Sources +# href: articles/data-sources.Rmd +# - text: Reference +# href: reference/index.html +# - text: News +# href: news/index.html + reference: + - title: "Core functions" + desc: > + These are the core functions that we intend users to interface with + contents: + - extrapolate_polymod + - apply_vaccination + - generate_ngm + - generate_ngm_oz + - autoplot + - autoplot-conmat + + - title: "ABS Data accessors and helpers" + desc: > + Functions and data objects to access data from the Australian Bureau of Statistics + contents: + - starts_with("abs_") + - starts_with("data_abs_") + - starts_with("get_abs_") + - get_abs_household_size_distribution + - get_abs_per_capita_household_size + - abs_abbreviate_states + - abs_unabbreviate_states - - title: "Data" + - title: "Data and model objects" desc: > - Data provided with the package to assist with model fitting + Functions and objects for accessing data, models, and assisting with model fitting. contents: - - starts_with("abs_") - - data_abs_lga_education - - data_abs_lga_work - - data_abs_state_education - - data_abs_state_work + - starts_with("get_polymod") + - age_population + - vaccination_effect_example_data + - polymod_setting_models - davies_age_extended - eyre_transmission_probabilities - polymod - setting_weights - - - title: "Model fitting and predictions" + - age_group_lookup + - get_setting_transmission_matrices + - prem_germany_contact_matrices + - conmat_original_school_demographics + - conmat_original_work_demographics + - abs_avg_school + - abs_avg_work + + - title: "Extra functions for model fitting and prediction" desc: > - Functions for model fitting and predictions + Functions for model fitting and predictions - for more advanced use contents: - fit_single_contact_model - fit_setting_contacts - - adjust_household_contact_matrix - aggregate_predicted_contacts - estimate_setting_contacts - - extrapolate_polymod - predict_contacts - predict_contacts_1y + - predict_setting_contacts - predictions_to_matrix - matrix_to_predictions + - get_age_population_function + - per_capita_household_size + - starts_with("add_") - - title: "Plotting" - desc: > - For plotting the matrix outputs - contents: - - plot_matrix - - plot_setting_matrices - - - title: "Helper functions" - desc: > - For dealing with ABS data and other features - contents: - - abbreviate_states - - age_population - - age_group_lookup - - bin_widths - - check_lga_name - - check_state_name - - unabbreviate_states - - - title: "Helpers for adding and joining data" + - title: "Creating and accessing information on `conmat_population` objects" desc: > - For joining data and adding other features + Store and use age and population information in a dataframe for use in + other functions in conmat. contents: - - add_modelling_features - - add_offset - - add_population_age_to - - add_school_work_participation - - apply_vaccination - - generate_ngm - - get_age_population_function - - get_data_abs_age_education - - get_data_abs_age_work - - get_household_size_distribution - - get_per_capita_household_size - - get_polymod_contact_data - - get_polymod_per_capita_household_size - - get_polymod_population - - get_polymod_setting_data - - get_setting_transmission_matrices - - per_capita_household_size - -navbar: - left: - - text: Getting Started - href: articles/getting-started.html - - text: Reference - href: reference/index.html - - text: News - href: news/index.html + - conmat_population + - as_conmat_population + - age + - population + - age_breaks + - raw_eigenvalue + - scaling + - new_age_matrix + - setting_prediction_matrix + - as_setting_prediction_matrix + - transmission_probability_matrix diff --git a/data-raw/abs-avg-work-school.R b/data-raw/abs-avg-work-school.R new file mode 100644 index 00000000..a65072c4 --- /dev/null +++ b/data-raw/abs-avg-work-school.R @@ -0,0 +1,21 @@ +## code to prepare `abs-avg-work-school` dataset goes here +library(tidyverse) +abs_avg_work <- data_abs_state_work %>% + group_by( + age + ) %>% + summarise( + work_fraction = mean(proportion) + ) + +usethis::use_data(abs_avg_work, overwrite = TRUE) + +abs_avg_school <- data_abs_state_education %>% + group_by( + age + ) %>% + summarise( + school_fraction = mean(proportion) + ) + +usethis::use_data(abs_avg_school, overwrite = TRUE) diff --git a/data-raw/clean-education-2020.R b/data-raw/clean-education-2020.R index e2334cb1..e723157d 100644 --- a/data-raw/clean-education-2020.R +++ b/data-raw/clean-education-2020.R @@ -13,29 +13,34 @@ abs_education_state_2020_raw <- abs_education_state %>% state, age = 0:100, fill = list(population = 0) - ) + ) -abs_education_state_2020_aggregated <- abs_education_state_2020_raw %>% +abs_education_state_2020_aggregated <- abs_education_state_2020_raw %>% left_join( age_group_lookup, - by = c("age" = "lower") - ) %>% - select(-upper) %>% - fill(age_group) %>% - group_by(year, state, age_group) %>% + by = c("age" = "lower"), + multiple = "all" + ) %>% + select(-upper) %>% + fill(age_group) %>% + group_by(year, state, age_group) %>% summarise(population_educated = sum(population_educated, na.rm = TRUE)) -abs_education_state_2020_aggregated %>% +abs_education_state_2020_aggregated %>% left_join( abs_state_age, - by = c("state", - "age_group")) %>% + by = c( + "state", + "age_group" + ), + multiple = "all" + ) %>% mutate(prop = population_educated / population) abs_state_age_lookup <- abs_state_age %>% mutate( lower.age.limit = parse_number(as.character(age_group)), - state = abbreviate_states(state) + state = abs_abbreviate_states(state) ) %>% select( state, @@ -53,21 +58,23 @@ abs_state_age_lookup <- abs_state_age %>% abs_education_state_2020_raw %>% left_join(abs_state_age_lookup, - by = c( - "state", - "age" - ) - ) %>% + by = c( + "state", + "age" + ) + ) %>% left_join( lookup, by = c("age" = "lower") - ) %>% - select(-upper) %>% - fill(age_group) %>% - group_by(year, state, age_group) %>% - summarise(population_educated = sum(population_educated, na.rm = TRUE), - population_interpolated = sum(population_interpolated, na.rm = TRUE)) %>% - mutate(prop = population_educated / population_interpolated) + ) %>% + select(-upper) %>% + fill(age_group) %>% + group_by(year, state, age_group) %>% + summarise( + population_educated = sum(population_educated, na.rm = TRUE), + population_interpolated = sum(population_interpolated, na.rm = TRUE) + ) %>% + mutate(prop = population_educated / population_interpolated) abs_education_state_2020 diff --git a/data-raw/clean-education.R b/data-raw/clean-education.R index f2162671..4cf9adab 100644 --- a/data-raw/clean-education.R +++ b/data-raw/clean-education.R @@ -63,8 +63,10 @@ abs_education_state <- abs_education_state_raw %>% # NOTE # we are collapsing 4 and under into 4 # and 21 and older into 21 - mutate(age = parse_number(age), - state = toupper(state)) %>% + mutate( + age = parse_number(age), + state = toupper(state) + ) %>% arrange(age) %>% ungroup() diff --git a/data-raw/clean-employment.R b/data-raw/clean-employment.R index 91b21b4f..6a113587 100644 --- a/data-raw/clean-employment.R +++ b/data-raw/clean-employment.R @@ -25,7 +25,7 @@ abs_employ_age_lga <- abs_employment_raw %>% ) %>% clean_names() %>% # remove "total" - filter(age != "Total") %>% + filter(age != "Total") %>% mutate( diff = persons - (males + females), age = case_when( @@ -64,26 +64,30 @@ abs_employ_age_lga <- abs_employment_raw %>% labour_force_status, age, everything() - ) %>% - filter(str_detect(labour_force_status, "Total")) %>% - select(-males, - -females, - -diff) %>% + ) %>% + filter(str_detect(labour_force_status, "Total")) %>% + select( + -males, + -females, + -diff + ) %>% pivot_wider( names_from = labour_force_status, values_from = persons - ) %>% - clean_names() %>% - mutate(state = abbreviate_states(state)) %>% + ) %>% + clean_names() %>% + mutate(state = abs_abbreviate_states(state)) %>% # drop "other territories" - drop_na() %>% - select(-lga_code) %>% - rename(age_group = age) %>% - select(year, - state, - lga, - age_group, - total_employed) + drop_na() %>% + select(-lga_code) %>% + rename(age_group = age) %>% + select( + year, + state, + lga, + age_group, + total_employed + ) abs_employ_age_lga %>% pull(age_group) @@ -93,37 +97,37 @@ abs_employ_age_lga use_data(abs_employ_age_lga, overwrite = TRUE) # interpolation stuff -abs_employ_age_lga %>% +abs_employ_age_lga %>% ungroup() %>% # take the lower? age group? - mutate(age = parse_number(as.character(age_group))) %>% + mutate(age = parse_number(as.character(age_group))) %>% complete( year, state, lga, age = 0:100, fill = list(total_employed = 0) - ) %>% - select(-age_group) %>% + ) %>% + select(-age_group) %>% left_join( age_group_lookup, by = c("age" = "lower") - ) %>% - select(-upper) %>% - fill(age_group) %>% + ) %>% + select(-upper) %>% + fill(age_group) %>% mutate( lower.age.limit = parse_number(as.character(age_group)), - ) %>% + ) %>% select( state, lga, lower.age.limit, total_employed - ) %>% + ) %>% group_by(state) %>% - nest() %>% + nest() %>% mutate(age_function = map(data, get_age_population_function)) - select(-data) %>% +select(-data) %>% summarise( population_interpolated = map_dbl(0:100, age_function), age = 0:100 @@ -145,5 +149,3 @@ abs_employ_age_lga %>% mutate(abs_pct = (abs(diff) / persons) * 100) %>% pull(abs_pct) %>% hist() - - diff --git a/data-raw/clean-household.R b/data-raw/clean-household.R index 5842d23e..917bfd61 100644 --- a/data-raw/clean-household.R +++ b/data-raw/clean-household.R @@ -41,10 +41,10 @@ abs_household_lga <- read_csv(file = here("data-raw/ABS_C16_T23_LGA_060920211607 n_persons_usually_resident == "Eight or more persons" ~ "8+" ) ) %>% - filter(household_composition == "Total Households") %>% - select( - household_composition) %>% - mutate(state = abbreviate_states(state)) %>% - drop_na() %>% + filter(household_composition == "Total Households") %>% + select(-household_composition) %>% + mutate(state = abs_abbreviate_states(state)) %>% + drop_na() %>% rename(lga = lga_name) %>% # renaming LGAs according to abs_lga_lookup & abs_pop_age_lga_2020 mutate( diff --git a/data-raw/clean-lga-lookup.R b/data-raw/clean-lga-lookup.R index 5717922e..d5b6594a 100644 --- a/data-raw/clean-lga-lookup.R +++ b/data-raw/clean-lga-lookup.R @@ -8,23 +8,29 @@ library(conmat) # data downloaded from https://www.abs.gov.au/statistics/people/population/regional-population-age-and-sex/2020/32350DS0003_2020.xls file_path <- here("data-raw/32350DS0003_2020.xls") names_1 <- file_path %>% - read_excel(sheet = "Table 3", - skip = 8, - n_max = 1) %>% + read_excel( + sheet = "Table 3", + skip = 8, + n_max = 1 + ) %>% names() names_2 <- file_path %>% - read_excel(sheet = "Table 3", - skip = 7, - n_max = 1) %>% + read_excel( + sheet = "Table 3", + skip = 7, + n_max = 1 + ) %>% names() names <- c(names_1[1:4], names_2[-(1:4)]) abs_pop_age_lga_2020_raw <- file_path %>% - read_excel(sheet = "Table 3", - skip = 10, - col_names = names) %>% - select(-`S/T code`,-`Total persons`) %>% + read_excel( + sheet = "Table 3", + skip = 10, + col_names = names + ) %>% + select(-`S/T code`, -`Total persons`) %>% rename( `85+` = `85 and over`, state = `S/T name`, @@ -40,14 +46,18 @@ abs_pop_age_lga_2020_raw <- file_path %>% mutate(age = str_replace(age, "–", "-")) %>% clean_names() %>% mutate(year = 2020, .before = state) %>% - rename(lga_code = lga_code19, - lga = lga_name19) %>% - mutate(state = abbreviate_states(state)) + rename( + lga_code = lga_code19, + lga = lga_name19 + ) %>% + mutate(state = abs_abbreviate_states(state)) abs_lga_lookup <- abs_pop_age_lga_2020_raw %>% - select(state, - lga_code, - lga) %>% + select( + state, + lga_code, + lga + ) %>% distinct() %>% mutate(state = case_when( lga == "Unincorp. Other Territories" ~ "OT", @@ -56,4 +66,3 @@ abs_lga_lookup <- abs_pop_age_lga_2020_raw %>% filter_all(any_vars(!is.na(.))) use_data(abs_lga_lookup, overwrite = TRUE) - diff --git a/data-raw/clean-population-by-age-2016.R b/data-raw/clean-population-by-age-2016.R index 033bbd71..143bcddc 100644 --- a/data-raw/clean-population-by-age-2016.R +++ b/data-raw/clean-population-by-age-2016.R @@ -36,7 +36,7 @@ abs_pop_age_lga_2016_raw <- file_path %>% state = `S/T name`, LGA_NAME19 = `LGA name`, LGA_CODE19 = `LGA code` - ) %>% + ) %>% pivot_longer( cols = -c(state, LGA_NAME19, LGA_CODE19), names_to = "age", @@ -53,25 +53,30 @@ abs_pop_age_lga_2016_raw <- file_path %>% lga_code = lga_code19, lga = lga_name19 ) %>% - mutate(state = abbreviate_states(state)) %>% - select(-lga_code) %>% - rename(age_group = age) %>% + mutate(state = abs_abbreviate_states(state)) %>% + select(-lga_code) %>% + rename(age_group = age) %>% # replace emdash mutate( - age_group = str_replace_all(age_group, - "–", - "-"), + age_group = str_replace_all( + age_group, + "–", + "-" + ), age_group = factor(age_group, - levels = str_sort(unique(age_group), numeric = TRUE)) - ) %>% - arrange(state, - age_group) - + levels = str_sort(unique(age_group), numeric = TRUE) + ) + ) %>% + arrange( + state, + age_group + ) + # there's about 1% missing... -abs_pop_age_lga_2016_raw %>% +abs_pop_age_lga_2016_raw %>% naniar::miss_var_summary() -abs_pop_age_lga_2016 <- abs_pop_age_lga_2016_raw %>% +abs_pop_age_lga_2016 <- abs_pop_age_lga_2016_raw %>% drop_na() use_data(abs_pop_age_lga_2016, overwrite = TRUE) diff --git a/data-raw/clean-population-by-age-2020.R b/data-raw/clean-population-by-age-2020.R index 2ed2e3e9..a8d1fcd0 100644 --- a/data-raw/clean-population-by-age-2020.R +++ b/data-raw/clean-population-by-age-2020.R @@ -49,29 +49,34 @@ abs_pop_age_lga_2020_raw <- file_path %>% rename( lga_code = lga_code19, lga = lga_name19 - ) %>% - mutate(state = abbreviate_states(state)) %>% - select(-lga_code) %>% - rename(age_group = age) %>% + ) %>% + mutate(state = abs_abbreviate_states(state)) %>% + select(-lga_code) %>% + rename(age_group = age) %>% # replace emdash mutate( - age_group = str_replace_all(age_group, - "–", - "-"), + age_group = str_replace_all( + age_group, + "–", + "-" + ), age_group = factor(age_group, - levels = str_sort(unique(age_group), numeric = TRUE)) - ) %>% - arrange(state, - age_group) + levels = str_sort(unique(age_group), numeric = TRUE) + ) + ) %>% + arrange( + state, + age_group + ) # about 1% missing data for state, lga, and population -abs_pop_age_lga_2020_raw %>% +abs_pop_age_lga_2020_raw %>% naniar::miss_var_summary() -abs_pop_age_lga_2020_raw %>% +abs_pop_age_lga_2020_raw %>% naniar::gg_miss_var(facet = state) -abs_pop_age_lga_2020 <- abs_pop_age_lga_2020_raw %>% +abs_pop_age_lga_2020 <- abs_pop_age_lga_2020_raw %>% drop_na() use_data(abs_pop_age_lga_2020, overwrite = TRUE) diff --git a/data-raw/clean-state-age-pop.R b/data-raw/clean-state-age-pop.R index b89bd6a9..66af862a 100644 --- a/data-raw/clean-state-age-pop.R +++ b/data-raw/clean-state-age-pop.R @@ -27,25 +27,28 @@ abs_state_age <- read_excel( filter( age_group != "All ages", state != "Australia" - ) %>% - mutate(state = abbreviate_states(state)) %>% + ) %>% + mutate(state = abs_abbreviate_states(state)) %>% relocate( - state, + state, age_group, population - ) %>% + ) %>% mutate( # replace emdash. - age_group = str_replace_all(age_group, - "–", - "-"), - age_group = case_when( - age_group == "100 and over" ~ "100+", - TRUE ~ age_group - ), + age_group = str_replace_all( + age_group, + "–", + "-" + ), + age_group = case_when( + age_group == "100 and over" ~ "100+", + TRUE ~ age_group + ), age_group = factor(age_group, - levels = str_sort(unique(age_group), numeric = TRUE)) + levels = str_sort(unique(age_group), numeric = TRUE) ) + ) abs_state_age diff --git a/data-raw/conmat-original-school-work-demographics.R b/data-raw/conmat-original-school-work-demographics.R new file mode 100644 index 00000000..a8343d00 --- /dev/null +++ b/data-raw/conmat-original-school-work-demographics.R @@ -0,0 +1,41 @@ +## code to prepare `conmat-original-school-work-demographics` dataset goes here +conmat_original_school_demographics <- tibble( + age = 0:120, + school_fraction = 0 +) %>% + mutate( + school_fraction = case_when( + # preschool + age %in% 2:4 ~ 0.5, + # compulsory education + age %in% 5:16 ~ 1, + # voluntary education + age %in% 17:18 ~ 0.5, + # university + age %in% 19:25 ~ 0.1, + # other + .default = 0.05 + ) + ) + +usethis::use_data(conmat_original_school_demographics, overwrite = TRUE) + +conmat_original_work_demographics <- tibble( + age = 0:120, + work_fraction = 0 +) %>% + mutate( + work_fraction = case_when( + # child labour + age %in% 12:19 ~ 0.2, + # young adults (not at school) + age %in% 20:24 ~ 0.7, + # main workforce + age %in% 25:60 ~ 1, + # possibly retired + age %in% 61:65 ~ 0.7, + .default = 0.05 + ) + ) + +usethis::use_data(conmat_original_work_demographics, overwrite = TRUE) diff --git a/data-raw/create-age-lookup.R b/data-raw/create-age-lookup.R index cfbeabf1..9747d662 100644 --- a/data-raw/create-age-lookup.R +++ b/data-raw/create-age-lookup.R @@ -5,17 +5,18 @@ agebreaks <- seq(5, 95, by = 5) age_group_lookup <- tibble( lower = c(0, agebreaks, 100), - upper = c(agebreaks-1, 99, Inf), + upper = c(agebreaks - 1, 99, Inf), age_group = as.character(glue::glue("{lower}-{upper}")) -) %>% +) %>% mutate( age_group = case_when( age_group == "100-Inf" ~ "100+", TRUE ~ age_group ), age_group = factor(age_group, - levels = str_sort(age_group, numeric = TRUE)) - ) %>% + levels = str_sort(age_group, numeric = TRUE) + ) + ) %>% arrange(age_group) use_data(age_group_lookup, compress = "xz", overwrite = TRUE) diff --git a/data-raw/create-polymod-model.R b/data-raw/create-polymod-model.R index bed1c4b8..a890c00b 100644 --- a/data-raw/create-polymod-model.R +++ b/data-raw/create-polymod-model.R @@ -1,5 +1,5 @@ library(conmat) -set.seed(2022 - 08 - 26) +set.seed(2022 - 12 - 19) polymod_contact_data <- get_polymod_setting_data() polymod_survey_data <- get_polymod_population() polymod_setting_models <- fit_setting_contacts( diff --git a/data-raw/data_abs_census_education_2016.R b/data-raw/data_abs_census_education_2016.R index 057320b2..9ad38ba2 100644 --- a/data-raw/data_abs_census_education_2016.R +++ b/data-raw/data_abs_census_education_2016.R @@ -1,6 +1,6 @@ -#Data Source: Census of Population and Housing, 2016, TableBuilder +# Data Source: Census of Population and Housing, 2016, TableBuilder library(readr) library(conmat) @@ -9,7 +9,7 @@ library(tidyverse) -#TYPP included +# TYPP included data_abs_state_education <- read_csv("data-raw/2016_census_education.csv", skip = 8) %>% @@ -22,16 +22,19 @@ data_abs_state_education <- "Cells in this table have been randomly adjusted to avoid the release of confidential data. No reliance should be placed on small cells." ) ) %>% - # Institution (TYPP) stated, full-time/part-time status (STUP) not stated -> considered - mutate(state = abbreviate_states(`STATE (UR)`), - state=replace_na(state,"OT")) %>% - rename(age = "AGEP Age", - student_type = "STUP Full-Time/Part-Time Student Status") %>% + mutate( + state = abs_abbreviate_states(`STATE (UR)`), + state = replace_na(state, "OT") + ) %>% + rename( + age = "AGEP Age", + student_type = "STUP Full-Time/Part-Time Student Status" + ) %>% select(state, age, student_type, Count) %>% filter( student_type %in% c( - "Full-time student" , + "Full-time student", "Part-time student", "Total", "Institution (TYPP) stated, full-time/part-time status (STUP) not stated" @@ -41,9 +44,11 @@ data_abs_state_education <- str_detect(student_type, "TYPP") ~ "TYPP", TRUE ~ as.character(student_type) )) %>% - pivot_wider(names_from = student_type, - values_from = Count, - values_fn = list) %>% + pivot_wider( + names_from = student_type, + values_from = Count, + values_fn = list + ) %>% unnest(cols = everything()) %>% mutate(total_population = as.numeric(Total)) %>% mutate( @@ -56,27 +61,30 @@ data_abs_state_education <- mutate(proportion = case_when( total_population == 0 & population_educated == 0 ~ 0, TRUE ~ as.numeric(proportion) - ))%>% - #filter(total_population != 0) %>% - select(year, - state, - age, - population_educated, - total_population, - proportion) -# + )) %>% + # filter(total_population != 0) %>% + select( + year, + state, + age, + population_educated, + total_population, + proportion + ) +# data_abs_state_education %>% ggplot(aes(x = age, y = proportion)) + - geom_point() + facet_wrap( ~ state) + geom_point() + + facet_wrap(~state) use_data(data_abs_state_education, compress = "xz", overwrite = TRUE) -# +# # data_census_education %>% # filter(state == "VIC", age > 90) %>% # ggplot(aes(x = age, y = proportion)) + # geom_point() -# +# # data_agg_data <- data_census_education %>% # mutate( # age_group = case_when( @@ -95,9 +103,9 @@ use_data(data_abs_state_education, compress = "xz", overwrite = TRUE) # mutate(age_group = factor(age_group, levels = c("2-4", "5-16", "17-18", "19-25", "Other"))) %>% # group_by(age_group) %>% # summarise(prop = sum(population_educated) / sum(total_population)) -# +# # # TYPP not considered -# +# # misc_census_education <- # read_csv("data-raw/2016_census_education.csv", skip = 8) %>% # row_to_names(row_number = 1) %>% @@ -113,7 +121,7 @@ use_data(data_abs_state_education, compress = "xz", overwrite = TRUE) # # Type of Educational Institution Attending (TYPP). Definition in below link # # https://www.abs.gov.au/ausstats/abs@.nsf/Lookup/by%20Subject/2900.0~2016~Main%20Features~TYPP%20Type%20of%20Educational%20Institution%20Attending~10086 # # Institution (TYPP) stated, full-time/part-time status (STUP) not stated -> Not considered -# mutate(state = abbreviate_states(`STATE (UR)`)) %>% +# mutate(state = abs_abbreviate_states(`STATE (UR)`)) %>% # rename(age = "AGEP Age", # student_type = "STUP Full-Time/Part-Time Student Status") %>% # select(state, age, student_type, Count) %>% @@ -137,20 +145,20 @@ use_data(data_abs_state_education, compress = "xz", overwrite = TRUE) # population_educated, # total_population, # proportion) -# +# # misc_census_education %>% # ggplot(aes(x = age, y = proportion)) + # geom_point() + facet_wrap( ~ state) -# +# # misc_census_education %>% # filter(state == "VIC", age > 90) %>% # ggplot(aes(x = age, y = proportion)) + # geom_point() -# +# # misc_census_education %>% # filter(state == "VIC", age > 90) %>% # arrange(-proportion) -# +# # misc_agg_data <- misc_census_education %>% # mutate( # age_group = case_when( @@ -169,17 +177,17 @@ use_data(data_abs_state_education, compress = "xz", overwrite = TRUE) # mutate(age_group = factor(age_group, levels = c("2-4", "5-16", "17-18", "19-25", "Other"))) %>% # group_by(age_group) %>% # summarise(prop_excluding_typp = sum(population_educated) / sum(total_population)) -# +# # misc_agg_data -# +# # conmat_prop_data <- # tibble( # age_group = c("2-4", "5-16", "17-18", "19-25", "Other"), # conmat_prop = c(0.5, 1, 0.5, 0.1, 0.05) # ) -# +# # inner_join(conmat_prop_data,misc_agg_data) -> misc_comparison_table -# +# # inner_join(conmat_prop_data,data_agg_data ) %>% # inner_join(misc_comparison_table) -> comparison_table # comparison_table diff --git a/data-raw/data_abs_census_education_lga_2016.R b/data-raw/data_abs_census_education_lga_2016.R index f6a6a196..785d1c0c 100644 --- a/data-raw/data_abs_census_education_lga_2016.R +++ b/data-raw/data_abs_census_education_lga_2016.R @@ -5,7 +5,7 @@ library(conmat) data_abs_lga_education <- list() for (i in 1:6) - + { abs_lga_education_df <- read_excel( @@ -14,12 +14,12 @@ for (i in 1:6) skip = 7, n_max = 566 ) - + df <- abs_lga_education_df %>% row_to_names(1) colnames(df)[2] <- "lga" df <- df[-1, ] - + data_abs_lga_education[[i]] <- df %>% select(-c(Total, `AGEP Age`)) %>% filter(lga != "Total") %>% @@ -53,12 +53,13 @@ data_abs_census_lga_education <- data_abs_lga_education %>% anomaly_flag = as.logical(anomaly_flag) ) %>% select(year, - lga, - age, - population_educated, - total_population = total, - proportion, - anomaly_flag)%>% + lga, + age, + population_educated, + total_population = total, + proportion, + anomaly_flag + ) %>% mutate( lga = case_when( lga == "Botany Bay (C)" ~ "Bayside (A)", @@ -83,10 +84,12 @@ summary(data_abs_census_lga_education$anomaly_flag) data_lga_state <- read_csv("data-raw/2011_lga_state.csv") %>% - select(state = STATE_NAME_2011, - lga_code = LGA_CODE_2011, - lga = LGA_NAME_2011) %>% - mutate(state = abbreviate_states(state)) %>% + select( + state = STATE_NAME_2011, + lga_code = LGA_CODE_2011, + lga = LGA_NAME_2011 + ) %>% + mutate(state = abs_abbreviate_states(state)) %>% distinct_all() %>% dplyr::mutate(state = replace_na(state, "Other Territories")) %>% mutate( @@ -165,32 +168,31 @@ lga_state <- lgas_in_education_census %>% data_abs_census_lga_education %>% left_join(lga_state, by = "lga") %>% - relocate(year, state, everything())%>% - filter(!str_detect(lga,"No usual address"))%>% + relocate(year, state, everything()) %>% + filter(!str_detect(lga, "No usual address")) %>% mutate(lga = case_when( (state == "VIC" & lga == "Kingston (C)") ~ "Kingston (C) (Vic.)", (state == "VIC" & lga == "Latrobe (C)") ~ "Latrobe (C) (Vic.)", (state == "QLD" & lga == "Central Highlands (R)") ~ "Central Highlands (R) (Qld)", (state == "QLD" & lga == "Flinders (S)") ~ "Flinders (S) (Qld)", (state == "SA" & lga == "Campbelltown (C)") ~ "Campbelltown (C) (SA)", - (state == "SA" & lga == "Kingston (DC)") ~ "Kingston (DC) (SA)", (state == "TAS" & lga == "Central Coast (M)") ~ "Central Coast (M) (Tas.)", (state == "TAS" & lga == "Flinders (M)") ~ "Flinders (M) (Tas.)", (state == "TAS" & lga == "Central Highlands (M)") ~ "Central Highlands (M) (Tas.)", (state == "TAS" & lga == "Latrobe (M)") ~ "Latrobe (M) (Tas.)", TRUE ~ as.character(lga) - ))-> data_abs_lga_education - # mutate(lga = case_when( - # str_detect(lga, "Migratory - Offshore - Shipping") ~ as.character(lga), - # TRUE ~ str_trim(str_remove_all(lga, pattern = patterns)) - # )) + )) -> data_abs_lga_education +# mutate(lga = case_when( +# str_detect(lga, "Migratory - Offshore - Shipping") ~ as.character(lga), +# TRUE ~ str_trim(str_remove_all(lga, pattern = patterns)) +# )) -data_abs_lga_education%>% - select(lga)%>% - distinct()%>% - left_join(conmat_abs_household_data)-> check_lga +data_abs_lga_education %>% + select(lga) %>% + distinct() %>% + left_join(conmat_abs_household_data) -> check_lga summary(data_abs_census_lga_education) skimr::skim(data_abs_lga_education) diff --git a/data-raw/data_abs_census_work_2016.R b/data-raw/data_abs_census_work_2016.R index 63b849f8..3faad9ec 100644 --- a/data-raw/data_abs_census_work_2016.R +++ b/data-raw/data_abs_census_work_2016.R @@ -2,39 +2,45 @@ library(tidyverse) library(janitor) -aus_states <- c("New South Wales","Victoria","Queensland", - "South Australia", - "Western Australia", - "Tasmania" , - "Northern Territory", - "Australian Capital Territory") - -abs_census_labour_status <- read_csv("data-raw/2016_abs_census_labour_status.csv", - skip = 8,n_max = 138) - -get_data <- function(i){ - abs_census_labour_status%>% - slice(i:c(i+11))%>% - slice(-1)%>% - row_to_names(1)%>% - rename(Status=`AGEP Age`)%>% - select(-`Total`)%>% - pivot_longer(-Status,names_to = "Age",values_to="population")%>% - filter(grepl('Employed|Total',Status))%>% - pivot_wider(names_from = Status,values_from = population)%>% - clean_names()%>% - #filter(total!= 0)%>% +aus_states <- c( + "New South Wales", "Victoria", "Queensland", + "South Australia", + "Western Australia", + "Tasmania", + "Northern Territory", + "Australian Capital Territory" +) + +abs_census_labour_status <- read_csv("data-raw/2016_abs_census_labour_status.csv", + skip = 8, n_max = 138 +) + +get_data <- function(i) { + abs_census_labour_status %>% + slice(i:c(i + 11)) %>% + slice(-1) %>% + row_to_names(1) %>% + rename(Status = `AGEP Age`) %>% + select(-`Total`) %>% + pivot_longer(-Status, names_to = "Age", values_to = "population") %>% + filter(grepl("Employed|Total", Status)) %>% + pivot_wider(names_from = Status, values_from = population) %>% + clean_names() %>% + # filter(total!= 0)%>% + mutate( + year = 2016, + age = as.numeric(age), + employed_population = as.numeric(employed_worked_full_time + + employed_worked_part_time + + employed_away_from_work), + proportion = employed_population / total, + state = abs_census_labour_status$...1[[i]] + ) %>% mutate( - year=2016, - age=as.numeric(age), - employed_population=as.numeric(employed_worked_full_time+ - employed_worked_part_time+ - employed_away_from_work), - proportion=employed_population/total, - state=abs_census_labour_status$...1[[i]])%>% - mutate(state=conmat::abbreviate_states(state), - state=replace_na(state,"OT"))%>% - select(year,state,age,employed_population,total_population=total,proportion)%>% + state = conmat::abs_abbreviate_states(state), + state = replace_na(state, "OT") + ) %>% + select(year, state, age, employed_population, total_population = total, proportion) %>% mutate(proportion = case_when( total_population == 0 & employed_population == 0 ~ 0, TRUE ~ as.numeric(proportion) @@ -42,24 +48,25 @@ get_data <- function(i){ } -data_abs_state_work <- map_dfr(seq(1,113,14),get_data) +data_abs_state_work <- map_dfr(seq(1, 113, 14), get_data) use_data(data_abs_state_work, compress = "xz", overwrite = TRUE) -data_abs_state_work_2016%>% +data_abs_state_work_2016 %>% ggplot(aes(x = age, y = proportion)) + - geom_point() + facet_wrap( ~ state) + geom_point() + + facet_wrap(~state) -# maybe mention in the documentation that data from abs have been randomly adjusted -# to avoid the release of confidential data. +# maybe mention in the documentation that data from abs have been randomly adjusted +# to avoid the release of confidential data. # No reliance should be placed on small cells. -data_abs_state_work_2016%>% - filter(state=="VIC")%>% - arrange(-proportion) +data_abs_state_work_2016 %>% + filter(state == "VIC") %>% + arrange(-proportion) -work_fraction = ~ dplyr::case_when( +work_fraction <- ~ dplyr::case_when( # child labour .x %in% 12:19 ~ 0.2, # young adults (not at school) @@ -72,8 +79,8 @@ work_fraction = ~ dplyr::case_when( TRUE ~ 0.05 ) -data_abs_state_work_2016%>% - mutate(age_group=case_when( +data_abs_state_work_2016 %>% + mutate(age_group = case_when( # child labour age %in% 12:19 ~ "12-19", # young adults (not at school) @@ -83,11 +90,10 @@ data_abs_state_work_2016%>% # possibly retired age %in% 61:65 ~ "61-64", # other - TRUE ~"Other" - ) - )%>% + TRUE ~ "Other" + )) %>% group_by(age_group) %>% - summarise(work_fraction = sum(employed_population) / sum(total_population))->work_fraction + summarise(work_fraction = sum(employed_population) / sum(total_population)) -> work_fraction conmat_work_prop_data <- @@ -96,5 +102,5 @@ conmat_work_prop_data <- conmat_work_prop = c(0.2, 0.7, 1, 0.7, 0.05) ) -inner_join(conmat_work_prop_data,work_fraction,by="age_group") -> work_fraction_comparison_table -work_fraction_comparison_table \ No newline at end of file +inner_join(conmat_work_prop_data, work_fraction, by = "age_group") -> work_fraction_comparison_table +work_fraction_comparison_table diff --git a/data-raw/data_abs_census_work_lga_2016.R b/data-raw/data_abs_census_work_lga_2016.R index 90c124ce..59092a62 100644 --- a/data-raw/data_abs_census_work_lga_2016.R +++ b/data-raw/data_abs_census_work_lga_2016.R @@ -4,10 +4,12 @@ library(janitor) library(conmat) data_lga_state <- read_csv("data-raw/2011_lga_state.csv") %>% - select(state = STATE_NAME_2011, - lga_code = LGA_CODE_2011, - lga = LGA_NAME_2011) %>% - mutate(state = abbreviate_states(state)) %>% + select( + state = STATE_NAME_2011, + lga_code = LGA_CODE_2011, + lga = LGA_NAME_2011 + ) %>% + mutate(state = abs_abbreviate_states(state)) %>% distinct_all() %>% dplyr::mutate(state = replace_na(state, "Other Territories")) %>% mutate( @@ -34,9 +36,13 @@ abs_census_lga_work <- col_types = cols(...1 = col_character()), skip = 8, n_max = 65873 - ) %>% row_to_names(1) %>% clean_names() %>% - rename(age = na, - lga = lfsp_labour_force_status) %>% + ) %>% + row_to_names(1) %>% + clean_names() %>% + rename( + age = na, + lga = lfsp_labour_force_status + ) %>% slice(-1) %>% mutate( year = 2016, @@ -81,12 +87,13 @@ data_abs_census_lga_work <- abs_census_lga_work %>% TRUE ~ as.numeric(proportion) )) %>% select(year, - lga, - age, - employed_population, - total_population = total, - proportion, - anomaly_flag)%>% + lga, + age, + employed_population, + total_population = total, + proportion, + anomaly_flag + ) %>% mutate( lga = case_when( lga == "Botany Bay (C)" ~ "Bayside (A)", @@ -111,7 +118,7 @@ data_abs_census_lga_work <- abs_census_lga_work %>% data_lga_state <- data_lga_state %>% mutate(lga = case_when( (state == "NSW" & - lga == "Campbelltown (C)") ~ "Campbelltown (C) (NSW)", + lga == "Campbelltown (C)") ~ "Campbelltown (C) (NSW)", TRUE ~ as.character(lga) )) @@ -120,7 +127,7 @@ conmat::abs_household_lga %>% lgas_in_work_census <- data_abs_census_lga_work %>% select(lga) %>% - distinct()%>% + distinct() %>% mutate( lga = case_when( lga == "Botany Bay (C)" ~ "Bayside (A)", @@ -186,25 +193,25 @@ data_abs_census_lga_work %>% lga == "Kalamunda (S)" ~ "Kalamunda (C)", lga == "Kalgoorlie/Boulder (C)" ~ "Kalgoorlie-Boulder (C)", TRUE ~ lga - ))%>% + ) + ) %>% mutate(lga = case_when( (state == "VIC" & lga == "Kingston (C)") ~ "Kingston (C) (Vic.)", (state == "VIC" & lga == "Latrobe (C)") ~ "Latrobe (C) (Vic.)", (state == "QLD" & lga == "Central Highlands (R)") ~ "Central Highlands (R) (Qld)", (state == "QLD" & lga == "Flinders (S)") ~ "Flinders (S) (Qld)", (state == "SA" & lga == "Campbelltown (C)") ~ "Campbelltown (C) (SA)", - (state == "SA" & lga == "Kingston (DC)") ~ "Kingston (DC) (SA)", (state == "TAS" & lga == "Central Coast (M)") ~ "Central Coast (M) (Tas.)", (state == "TAS" & lga == "Flinders (M)") ~ "Flinders (M) (Tas.)", (state == "TAS" & lga == "Central Highlands (M)") ~ "Central Highlands (M) (Tas.)", (state == "TAS" & lga == "Latrobe (M)") ~ "Latrobe (M) (Tas.)", TRUE ~ as.character(lga) - ))-> data_abs_lga_work - # mutate(lga = case_when( - # str_detect(lga, "Migratory - Offshore - Shipping") ~ as.character(lga), - # TRUE ~ str_trim(str_remove_all(lga, pattern = patterns)) - # )) + )) -> data_abs_lga_work +# mutate(lga = case_when( +# str_detect(lga, "Migratory - Offshore - Shipping") ~ as.character(lga), +# TRUE ~ str_trim(str_remove_all(lga, pattern = patterns)) +# )) summary(data_abs_lga_work) @@ -228,6 +235,6 @@ use_data(data_abs_lga_work, compress = "xz", overwrite = TRUE) visdat::vis_miss(data_abs_lga_work) -data_abs_lga_work%>% - filter(is.na(state))%>% - distinct(lga)->missing +data_abs_lga_work %>% + filter(is.na(state)) %>% + distinct(lga) -> missing diff --git a/data-raw/digitise_eyre_matrix.R b/data-raw/digitise_eyre_matrix.R index ce606b94..25bcface 100644 --- a/data-raw/digitise_eyre_matrix.R +++ b/data-raw/digitise_eyre_matrix.R @@ -8,28 +8,25 @@ #' @return #' @author Nick Golding #' @export -digitise_eyre_matrix <- function( - matrix_file, - legend_file = "data-raw/eyre_legend_raw.png", - matrix_age_range = c(4.5, 70.5), - legend_probability_range = c(0.16, 0.8), - age_breaks = seq(0, 100) -) { - +digitise_eyre_matrix <- function(matrix_file, + legend_file = "data-raw/eyre_legend_raw.png", + matrix_age_range = c(4.5, 70.5), + legend_probability_range = c(0.16, 0.8), + age_breaks = seq(0, 100)) { # load the rasters matrix <- png::readPNG(matrix_file)[, , 1:3] legend <- png::readPNG(legend_file)[, , 1:3] - + # bounds of the age matrix n_contact_pixels <- dim(matrix)[1] n_case_pixels <- dim(matrix)[2] min_age <- min(matrix_age_range) max_age <- max(matrix_age_range) - + # ages for intereger-year summary and aggregation age_breaks_1y <- 0:100 max_age_aggregate <- max(age_breaks) - + # convert legend into 3 channel lookup legend_vals <- legend[75, , ] %>% `colnames<-`(c("R", "G", "B")) %>% @@ -41,7 +38,7 @@ digitise_eyre_matrix <- function( length.out = n() ) ) - + # convert matrix into tibble with age labels case_ages <- seq(min_age, max_age, length.out = n_case_pixels) contact_ages <- seq(min_age, max_age, length.out = n_contact_pixels) @@ -54,18 +51,20 @@ digitise_eyre_matrix <- function( contact_age = rep(rev(contact_ages), n_case_pixels), .before = everything() ) - + # interpolate linearly from colours to probabilities, based on the legend mod <- lm(probability ~ R + G + B, - data = legend_vals) - + data = legend_vals + ) + matrix_vals_prob <- matrix_vals %>% mutate( probability = predict( object = mod, - newdata = .) + newdata = . + ) ) - + # aggregate to 1y resolution matrix_vals_prob_1y <- matrix_vals_prob %>% mutate( @@ -84,7 +83,7 @@ digitise_eyre_matrix <- function( ), .groups = "drop" ) - + # extrapolate to all ages, filling in the value for the nearest age pair matrix_vals_prob_1y_all <- expand_grid( case_age = age_breaks_1y, @@ -93,19 +92,16 @@ digitise_eyre_matrix <- function( euclidean_join( matrix_vals_prob_1y ) - + # optionally aggregate up to the specified age breaks if (identical(age_breaks, age_breaks_1y)) { - matrix_vals_prob_agg <- matrix_vals_prob_1y_all - } else { - age_group_lookup <- get_age_group_lookup( age_breaks, age_breaks_1y ) - + matrix_vals_prob_agg <- matrix_vals_prob_1y_all %>% left_join( age_group_lookup, @@ -115,7 +111,7 @@ digitise_eyre_matrix <- function( -case_age ) %>% rename( - case_age = age_group, + case_age = age_group, ) %>% left_join( age_group_lookup, @@ -135,10 +131,10 @@ digitise_eyre_matrix <- function( across( ends_with("age"), ~ factor(.x, - levels = str_sort( - unique(.x), - numeric = TRUE - ) + levels = str_sort( + unique(.x), + numeric = TRUE + ) ) ) ) %>% @@ -153,9 +149,7 @@ digitise_eyre_matrix <- function( ), .groups = "drop" ) - } - + matrix_vals_prob_agg - -} \ No newline at end of file +} diff --git a/data-raw/euclidean_join.R b/data-raw/euclidean_join.R index 887a60ea..fbac54bc 100644 --- a/data-raw/euclidean_join.R +++ b/data-raw/euclidean_join.R @@ -7,28 +7,27 @@ #' @return #' @author Nick Golding #' @export -euclidean_join <- function (.data, - legend, - by = intersect(names(.data), names(legend))) { +euclidean_join <- function(.data, + legend, + by = intersect(names(.data), names(legend))) { # Do Euclidean lookup from matrix pixels to legends - + # get Euclidean distance matrix between data and legend distances <- rdist( .data[, by], legend[, by] ) - + # find the index to the nearest value in the legend row_idx <- apply(distances, 1, which.min) - + # get the new columns from the legend by_col_idx <- match(by, names(legend)) new_cols <- legend[, -by_col_idx] - + # combine them bind_cols( .data, new_cols[row_idx, ] ) - } diff --git a/data-raw/example_vaccination_effect.R b/data-raw/example_vaccination_effect.R index a7915458..895d3196 100644 --- a/data-raw/example_vaccination_effect.R +++ b/data-raw/example_vaccination_effect.R @@ -1,5 +1,5 @@ -vaccination_effect_example_data<- readr::read_csv("data-raw/example_vaccine_coverage_effects.csv")%>% - select(age_band,coverage,acquisition,transmission)%>% +vaccination_effect_example_data <- readr::read_csv("data-raw/example_vaccine_coverage_effects.csv") %>% + select(age_band, coverage, acquisition, transmission) %>% tibble::as_tibble() -use_data(vaccination_effect_example_data, overwrite = TRUE,compress = "xz") \ No newline at end of file +use_data(vaccination_effect_example_data, overwrite = TRUE, compress = "xz") diff --git a/data-raw/get_age_group_lookup.R b/data-raw/get_age_group_lookup.R index ad72adf7..8ab6ab2d 100644 --- a/data-raw/get_age_group_lookup.R +++ b/data-raw/get_age_group_lookup.R @@ -8,12 +8,9 @@ #' @return #' @author Nick Golding #' @export -get_age_group_lookup <- function( - age_breaks, - age_breaks_1y = 0:100, - label_includes_upper = FALSE -) { - +get_age_group_lookup <- function(age_breaks, + age_breaks_1y = 0:100, + label_includes_upper = FALSE) { n_breaks <- length(age_breaks) age_group_lookup <- expand.grid( age = age_breaks_1y, diff --git a/data-raw/prem/contact_all.rdata b/data-raw/prem/contact_all.rdata new file mode 100644 index 00000000..8042dec0 Binary files /dev/null and b/data-raw/prem/contact_all.rdata differ diff --git a/data-raw/prem/contact_home.rdata b/data-raw/prem/contact_home.rdata new file mode 100644 index 00000000..843de1cc Binary files /dev/null and b/data-raw/prem/contact_home.rdata differ diff --git a/data-raw/prem/contact_others.rdata b/data-raw/prem/contact_others.rdata new file mode 100644 index 00000000..7dd77327 Binary files /dev/null and b/data-raw/prem/contact_others.rdata differ diff --git a/data-raw/prem/contact_school.rdata b/data-raw/prem/contact_school.rdata new file mode 100644 index 00000000..087f5209 Binary files /dev/null and b/data-raw/prem/contact_school.rdata differ diff --git a/data-raw/prem/contact_work.rdata b/data-raw/prem/contact_work.rdata new file mode 100644 index 00000000..1b920f2c Binary files /dev/null and b/data-raw/prem/contact_work.rdata differ diff --git a/data-raw/prem/rural/contact_all_rural.rdata b/data-raw/prem/rural/contact_all_rural.rdata new file mode 100644 index 00000000..9384b306 Binary files /dev/null and b/data-raw/prem/rural/contact_all_rural.rdata differ diff --git a/data-raw/prem/rural/contact_home_rural.rdata b/data-raw/prem/rural/contact_home_rural.rdata new file mode 100644 index 00000000..4f7dfa12 Binary files /dev/null and b/data-raw/prem/rural/contact_home_rural.rdata differ diff --git a/data-raw/prem/rural/contact_others_rural.rdata b/data-raw/prem/rural/contact_others_rural.rdata new file mode 100644 index 00000000..a3181666 Binary files /dev/null and b/data-raw/prem/rural/contact_others_rural.rdata differ diff --git a/data-raw/prem/rural/contact_school_rural.rdata b/data-raw/prem/rural/contact_school_rural.rdata new file mode 100644 index 00000000..33aaf4b6 Binary files /dev/null and b/data-raw/prem/rural/contact_school_rural.rdata differ diff --git a/data-raw/prem/rural/contact_work_rural.rdata b/data-raw/prem/rural/contact_work_rural.rdata new file mode 100644 index 00000000..e831faaf Binary files /dev/null and b/data-raw/prem/rural/contact_work_rural.rdata differ diff --git a/data-raw/prem/urban/contact_all_urban.rdata b/data-raw/prem/urban/contact_all_urban.rdata new file mode 100644 index 00000000..a345fb9f Binary files /dev/null and b/data-raw/prem/urban/contact_all_urban.rdata differ diff --git a/data-raw/prem/urban/contact_home_urban.rdata b/data-raw/prem/urban/contact_home_urban.rdata new file mode 100644 index 00000000..07bc8d08 Binary files /dev/null and b/data-raw/prem/urban/contact_home_urban.rdata differ diff --git a/data-raw/prem/urban/contact_others_urban.rdata b/data-raw/prem/urban/contact_others_urban.rdata new file mode 100644 index 00000000..82d29669 Binary files /dev/null and b/data-raw/prem/urban/contact_others_urban.rdata differ diff --git a/data-raw/prem/urban/contact_school_urban.rdata b/data-raw/prem/urban/contact_school_urban.rdata new file mode 100644 index 00000000..9da7e118 Binary files /dev/null and b/data-raw/prem/urban/contact_school_urban.rdata differ diff --git a/data-raw/prem/urban/contact_work_urban.rdata b/data-raw/prem/urban/contact_work_urban.rdata new file mode 100644 index 00000000..6de30db1 Binary files /dev/null and b/data-raw/prem/urban/contact_work_urban.rdata differ diff --git a/data-raw/prem_germany_contact_matrices.R b/data-raw/prem_germany_contact_matrices.R new file mode 100644 index 00000000..fd0b2a6f --- /dev/null +++ b/data-raw/prem_germany_contact_matrices.R @@ -0,0 +1,34 @@ +# Loads in the Prem matrices for Germany, taken from the 2021 paper. + +load("data-raw/prem/contact_home.rdata") +load("data-raw/prem/contact_work.rdata") +load("data-raw/prem/contact_school.rdata") +load("data-raw/prem/contact_others.rdata") +load("data-raw/prem/contact_all.rdata") + +# ISO3 code for Germany is DEU + +prem_home <- contact_home[["DEU"]] %>% + t() + +prem_work <- contact_work[["DEU"]] %>% + t() + +prem_school <- contact_school[["DEU"]] %>% + t() + +prem_other <- contact_others[["DEU"]] %>% + t() + +prem_all <- contact_all[["DEU"]] %>% + t() + +prem_germany_contact_matrices <- list( + "home" = prem_home, + "work" = prem_work, + "school" = prem_school, + "other" = prem_other, + "all" = prem_all +) + +usethis::use_data(prem_germany_contact_matrices, overwrite = TRUE) diff --git a/data-raw/read_eyre_transmission_probabilities.R b/data-raw/read_eyre_transmission_probabilities.R index fcf5357b..0d9838c4 100644 --- a/data-raw/read_eyre_transmission_probabilities.R +++ b/data-raw/read_eyre_transmission_probabilities.R @@ -55,7 +55,7 @@ eyre_transmission_probabilities <- eyre_transmission_probabilities_with_5y readr::write_csv( x = eyre_transmission_probabilities, file = "data-raw/eyre_transmission_probabilities.csv" - ) +) zip::zip( zipfile = "data-raw/eyre_transmission_probabilities.csv.gz", diff --git a/data-raw/read_setting_weights.R b/data-raw/read_setting_weights.R index ebaee79d..bd9db10f 100644 --- a/data-raw/read_setting_weights.R +++ b/data-raw/read_setting_weights.R @@ -1,6 +1,8 @@ -setting_weights <- c(home = 0.732154285228522, - school = 0.286882796193768, - work = 0.286882796193768, - other = 0.286882796193768) +setting_weights <- c( + home = 0.732154285228522, + school = 0.286882796193768, + work = 0.286882796193768, + other = 0.286882796193768 +) use_data(setting_weights, compress = "xz", overwrite = TRUE) diff --git a/data-raw/susceptibility_clinical_fraction.R b/data-raw/susceptibility_clinical_fraction.R index a92ee03f..ce6ced06 100644 --- a/data-raw/susceptibility_clinical_fraction.R +++ b/data-raw/susceptibility_clinical_fraction.R @@ -1,108 +1,108 @@ # define susceptibility and clinical fraction parameters davies_age_extended <- tibble::tribble( - ~age, ~clinical_fraction, ~davies_original, ~davies_updated, - 0L, 0.300839660771568, 0.405243125439588, 0.274004152218713, - 1L, 0.297586599464828, 0.402502562049369, 0.274004152218713, - 2L, 0.294208301590236, 0.399703435419883, 0.274004152218713, - 3L, 0.29051957414156, 0.396767294839426, 0.287971835280787, - 4L, 0.286293944054413, 0.393631526404334, 0.30202412593778, - 5L, 0.281297196807437, 0.39024837115024, 0.315969936850741, - 6L, 0.27532628857806, 0.386639120630588, 0.329807021459044, - 7L, 0.268313789505417, 0.382962806655906, 0.343252033229079, - 8L, 0.260278873472712, 0.379457962719388, 0.356057219950796, - 9L, 0.25151164285796, 0.376578047143387, 0.368287559034645, - 10L, 0.242623803184268, 0.374974292206878, 0.379898148409262, - 11L, 0.234390556914771, 0.37551811523472, 0.391068420742893, - 12L, 0.227534377060263, 0.37914484476869, 0.401701350527736, - 13L, 0.222433214901116, 0.386843956530518, 0.412174391845095, - 14L, 0.219262779263751, 0.399593187250161, 0.422423014051142, - 15L, 0.218132212839674, 0.418310718938954, 0.432745312484442, - 16L, 0.219007317855831, 0.443686311967971, 0.443686311967971, - 17L, 0.221784110265796, 0.47606573657563, 0.47606573657563, - 18L, 0.226235803861586, 0.514887514579814, 0.514887514579814, - 19L, 0.232049453113255, 0.558936696339759, 0.558936696339759, - 20L, 0.23869497040697, 0.605235588597757, 0.605235588597757, - 21L, 0.245609064320696, 0.650219777449733, 0.650219777449733, - 22L, 0.252316661836057, 0.690765197596418, 0.690765197596418, - 23L, 0.258684635130281, 0.725721651138198, 0.725721651138198, - 24L, 0.264737956231352, 0.754821349991641, 0.754821349991641, - 25L, 0.270630228996984, 0.778558441695052, 0.778558441695052, - 26L, 0.276553920048141, 0.797672844392098, 0.797672844392098, - 27L, 0.282712455604753, 0.813127844894791, 0.813127844894791, - 28L, 0.289209182915552, 0.825738269481566, 0.825738269481566, - 29L, 0.296059616143947, 0.836168846621602, 0.836168846621602, - 30L, 0.303031609510398, 0.844650726312517, 0.844650726312517, - 31L, 0.309808598259769, 0.851115001001754, 0.851115001001754, - 32L, 0.316137172162428, 0.855475701865309, 0.855475701865309, - 33L, 0.322049660503632, 0.857781939986349, 0.857781939986349, - 34L, 0.327702819226518, 0.858121211564892, 0.858121211564892, - 35L, 0.33333773119303, 0.85661508991501, 0.85661508991501, - 36L, 0.33921769995894, 0.853396603572318, 0.853396603572318, - 37L, 0.345583190055924, 0.848634916364279, 0.848634916364279, - 38L, 0.352532036118614, 0.842603877145873, 0.842603877145873, - 39L, 0.360069544028309, 0.83564492682245, 0.83564492682245, - 40L, 0.367920217958573, 0.828315328873318, 0.828315328873318, - 41L, 0.375610890052003, 0.821329042907506, 0.821329042907506, - 42L, 0.382952793862932, 0.81514623794299, 0.81514623794299, - 43L, 0.389900935960663, 0.810101263424909, 0.810101263424909, - 44L, 0.396655795866673, 0.80631065923119, 0.80631065923119, - 45L, 0.403522410648723, 0.803777247132055, 0.803777247132055, - 46L, 0.410814103582096, 0.802471571141377, 0.802471571141377, - 47L, 0.41882378960398, 0.80229950264122, 0.80229950264122, - 48L, 0.427745652062345, 0.803146988536476, 0.803146988536476, - 49L, 0.43750414030479, 0.80486784763572, 0.80486784763572, - 50L, 0.447844724898509, 0.807291215530529, 0.807291215530529, - 51L, 0.458184144145412, 0.810216233141427, 0.810216233141427, - 52L, 0.468325453638762, 0.813567249486111, 0.813567249486111, - 53L, 0.478270945409754, 0.817327682421881, 0.817327682421881, - 54L, 0.488330392142053, 0.821574603669556, 0.821574603669556, - 55L, 0.498928943990846, 0.826404494869757, 0.826404494869757, - 56L, 0.510485370318433, 0.831897495252623, 0.831897495252623, - 57L, 0.523338634781543, 0.838065466684734, 0.838065466684734, - 58L, 0.537642536835924, 0.844813797692746, 0.844813797692746, - 59L, 0.55307493784313, 0.851790755198216, 0.851790755198216, - 60L, 0.569009979460861, 0.858489878458342, 0.858489878458342, - 61L, 0.58425252052942, 0.864102749266343, 0.864102749266343, - 62L, 0.59815084659305, 0.86799674664621, 0.86799674664621, - 63L, 0.610346875128792, 0.869648201095943, 0.869648201095943, - 64L, 0.620937438784477, 0.86870170386519, 0.86870170386519, - 65L, 0.630196505980292, 0.864889588131732, 0.864889588131732, - 66L, 0.638485754269898, 0.858119979544064, 0.858119979544064, - 67L, 0.646180255022669, 0.848381547117261, 0.848381547117261, - 68L, 0.653575750264358, 0.83591416963295, 0.83591416963295, - 69L, 0.660734224129279, 0.821411784109917, 0.821411784109917, - 70L, 0.667578313494737, 0.805826995276759, 0.805826995276759, - 71L, 0.673746895348781, 0.790612084492718, 0.790612084492718, - 72L, 0.67901777760772, 0.776773260388288, 0.776773260388288, - 73L, 0.683372065476387, 0.764679929756006, 0.764679929756006, - 74L, 0.686855807518284, 0.754484635842462, 0.754484635842462, - 75L, 0.689617752595992, 0.745975990685674, 0.745975990685674, - 76L, 0.691835258172059, 0.73879303288655, 0.73879303288655, - 77L, 0.693686291239087, 0.732546839645335, 0.732546839645335, - 78L, 0.695335752518966, 0.726829395297702, 0.726829395297702, - 79L, 0.696898868800772, 0.721341371191107, 0.721341371191107, - 80L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 81L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 82L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 83L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 84L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 85L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 86L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 87L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 88L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 89L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 90L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 91L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 92L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 93L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 94L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 95L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 96L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 97L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 98L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 99L, 0.698453096414112, 0.71587836634111, 0.71587836634111, - 100L, 0.698453096414112, 0.71587836634111, 0.71587836634111 + ~age, ~clinical_fraction, ~davies_original, ~davies_updated, + 0L, 0.300839660771568, 0.405243125439588, 0.274004152218713, + 1L, 0.297586599464828, 0.402502562049369, 0.274004152218713, + 2L, 0.294208301590236, 0.399703435419883, 0.274004152218713, + 3L, 0.29051957414156, 0.396767294839426, 0.287971835280787, + 4L, 0.286293944054413, 0.393631526404334, 0.30202412593778, + 5L, 0.281297196807437, 0.39024837115024, 0.315969936850741, + 6L, 0.27532628857806, 0.386639120630588, 0.329807021459044, + 7L, 0.268313789505417, 0.382962806655906, 0.343252033229079, + 8L, 0.260278873472712, 0.379457962719388, 0.356057219950796, + 9L, 0.25151164285796, 0.376578047143387, 0.368287559034645, + 10L, 0.242623803184268, 0.374974292206878, 0.379898148409262, + 11L, 0.234390556914771, 0.37551811523472, 0.391068420742893, + 12L, 0.227534377060263, 0.37914484476869, 0.401701350527736, + 13L, 0.222433214901116, 0.386843956530518, 0.412174391845095, + 14L, 0.219262779263751, 0.399593187250161, 0.422423014051142, + 15L, 0.218132212839674, 0.418310718938954, 0.432745312484442, + 16L, 0.219007317855831, 0.443686311967971, 0.443686311967971, + 17L, 0.221784110265796, 0.47606573657563, 0.47606573657563, + 18L, 0.226235803861586, 0.514887514579814, 0.514887514579814, + 19L, 0.232049453113255, 0.558936696339759, 0.558936696339759, + 20L, 0.23869497040697, 0.605235588597757, 0.605235588597757, + 21L, 0.245609064320696, 0.650219777449733, 0.650219777449733, + 22L, 0.252316661836057, 0.690765197596418, 0.690765197596418, + 23L, 0.258684635130281, 0.725721651138198, 0.725721651138198, + 24L, 0.264737956231352, 0.754821349991641, 0.754821349991641, + 25L, 0.270630228996984, 0.778558441695052, 0.778558441695052, + 26L, 0.276553920048141, 0.797672844392098, 0.797672844392098, + 27L, 0.282712455604753, 0.813127844894791, 0.813127844894791, + 28L, 0.289209182915552, 0.825738269481566, 0.825738269481566, + 29L, 0.296059616143947, 0.836168846621602, 0.836168846621602, + 30L, 0.303031609510398, 0.844650726312517, 0.844650726312517, + 31L, 0.309808598259769, 0.851115001001754, 0.851115001001754, + 32L, 0.316137172162428, 0.855475701865309, 0.855475701865309, + 33L, 0.322049660503632, 0.857781939986349, 0.857781939986349, + 34L, 0.327702819226518, 0.858121211564892, 0.858121211564892, + 35L, 0.33333773119303, 0.85661508991501, 0.85661508991501, + 36L, 0.33921769995894, 0.853396603572318, 0.853396603572318, + 37L, 0.345583190055924, 0.848634916364279, 0.848634916364279, + 38L, 0.352532036118614, 0.842603877145873, 0.842603877145873, + 39L, 0.360069544028309, 0.83564492682245, 0.83564492682245, + 40L, 0.367920217958573, 0.828315328873318, 0.828315328873318, + 41L, 0.375610890052003, 0.821329042907506, 0.821329042907506, + 42L, 0.382952793862932, 0.81514623794299, 0.81514623794299, + 43L, 0.389900935960663, 0.810101263424909, 0.810101263424909, + 44L, 0.396655795866673, 0.80631065923119, 0.80631065923119, + 45L, 0.403522410648723, 0.803777247132055, 0.803777247132055, + 46L, 0.410814103582096, 0.802471571141377, 0.802471571141377, + 47L, 0.41882378960398, 0.80229950264122, 0.80229950264122, + 48L, 0.427745652062345, 0.803146988536476, 0.803146988536476, + 49L, 0.43750414030479, 0.80486784763572, 0.80486784763572, + 50L, 0.447844724898509, 0.807291215530529, 0.807291215530529, + 51L, 0.458184144145412, 0.810216233141427, 0.810216233141427, + 52L, 0.468325453638762, 0.813567249486111, 0.813567249486111, + 53L, 0.478270945409754, 0.817327682421881, 0.817327682421881, + 54L, 0.488330392142053, 0.821574603669556, 0.821574603669556, + 55L, 0.498928943990846, 0.826404494869757, 0.826404494869757, + 56L, 0.510485370318433, 0.831897495252623, 0.831897495252623, + 57L, 0.523338634781543, 0.838065466684734, 0.838065466684734, + 58L, 0.537642536835924, 0.844813797692746, 0.844813797692746, + 59L, 0.55307493784313, 0.851790755198216, 0.851790755198216, + 60L, 0.569009979460861, 0.858489878458342, 0.858489878458342, + 61L, 0.58425252052942, 0.864102749266343, 0.864102749266343, + 62L, 0.59815084659305, 0.86799674664621, 0.86799674664621, + 63L, 0.610346875128792, 0.869648201095943, 0.869648201095943, + 64L, 0.620937438784477, 0.86870170386519, 0.86870170386519, + 65L, 0.630196505980292, 0.864889588131732, 0.864889588131732, + 66L, 0.638485754269898, 0.858119979544064, 0.858119979544064, + 67L, 0.646180255022669, 0.848381547117261, 0.848381547117261, + 68L, 0.653575750264358, 0.83591416963295, 0.83591416963295, + 69L, 0.660734224129279, 0.821411784109917, 0.821411784109917, + 70L, 0.667578313494737, 0.805826995276759, 0.805826995276759, + 71L, 0.673746895348781, 0.790612084492718, 0.790612084492718, + 72L, 0.67901777760772, 0.776773260388288, 0.776773260388288, + 73L, 0.683372065476387, 0.764679929756006, 0.764679929756006, + 74L, 0.686855807518284, 0.754484635842462, 0.754484635842462, + 75L, 0.689617752595992, 0.745975990685674, 0.745975990685674, + 76L, 0.691835258172059, 0.73879303288655, 0.73879303288655, + 77L, 0.693686291239087, 0.732546839645335, 0.732546839645335, + 78L, 0.695335752518966, 0.726829395297702, 0.726829395297702, + 79L, 0.696898868800772, 0.721341371191107, 0.721341371191107, + 80L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 81L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 82L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 83L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 84L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 85L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 86L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 87L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 88L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 89L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 90L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 91L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 92L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 93L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 94L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 95L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 96L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 97L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 98L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 99L, 0.698453096414112, 0.71587836634111, 0.71587836634111, + 100L, 0.698453096414112, 0.71587836634111, 0.71587836634111 ) use_data(davies_age_extended, compress = "xz", overwrite = TRUE) diff --git a/data/abs_avg_school.rda b/data/abs_avg_school.rda new file mode 100644 index 00000000..c03053ec Binary files /dev/null and b/data/abs_avg_school.rda differ diff --git a/data/abs_avg_work.rda b/data/abs_avg_work.rda new file mode 100644 index 00000000..e441508c Binary files /dev/null and b/data/abs_avg_work.rda differ diff --git a/data/conmat_original_school_demographics.rda b/data/conmat_original_school_demographics.rda new file mode 100644 index 00000000..345c38ce Binary files /dev/null and b/data/conmat_original_school_demographics.rda differ diff --git a/data/conmat_original_work_demographics.rda b/data/conmat_original_work_demographics.rda new file mode 100644 index 00000000..81f9c3eb Binary files /dev/null and b/data/conmat_original_work_demographics.rda differ diff --git a/data/polymod_setting_models.rda b/data/polymod_setting_models.rda index 10d8f59a..b7b32b51 100644 Binary files a/data/polymod_setting_models.rda and b/data/polymod_setting_models.rda differ diff --git a/data/prem_germany_contact_matrices.rda b/data/prem_germany_contact_matrices.rda new file mode 100644 index 00000000..3bd58497 Binary files /dev/null and b/data/prem_germany_contact_matrices.rda differ diff --git a/inst/WORDLIST b/inst/WORDLIST index e69de29b..e710f3b6 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -0,0 +1,111 @@ +Albury +Auranen +BGM +Beutels +CMD +COVID +CoV +Codecov +DOI +Diekmann +FTE +Fairfield +Generalised +Heesterbrook +Jit +Klepac +Koen +LABOUR +LGA +LGAs +Labour +Liu +Mikolajczyk +Mossong +NGM +NGMs +NSW +ONS +ORCID +PLoS +POLYMOD +Peto +Polymod +Pouwels +Prem +Preprint +Purver +QLD +TAS +TableBuilder +Un +Visualisation +Zenodo +al +assortativity +asymptomatics +autoplot +behaviour +centred +cohorting +davies +deSolve +digitise +digitised +doi +ecovered +eigen +eigvenvalue +embarrasingly +emoved +et +eyre +fairfield +finalise +focussed +focusses +gam +generalised +ggplot +helpfile +helpfiles +https +intergenerational +ish +labour +lga +magrittr +modelled +modellers +modelling +multisession +nfected +ngm +njtierney +parallelisable +parallelisation +parallelise +pcbi +polymod +pre +probabilists +regularised +sizeable +socialmixr +summarise +summarised +symptomatics +th +tibble +tibbles +transmissibility +undercount +unscaled +unvaccinated +upweighted +usceptible +visualisation +visualisations +visualise +visualising +wpp diff --git a/man/get_data_abs_age_education.Rd b/man/abs-age-education.Rd similarity index 64% rename from man/get_data_abs_age_education.Rd rename to man/abs-age-education.Rd index e1b7d28b..65cd1cd1 100644 --- a/man/get_data_abs_age_education.Rd +++ b/man/abs-age-education.Rd @@ -1,19 +1,23 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_data_abs_age_education.R -\name{get_data_abs_age_education} -\alias{get_data_abs_age_education} +% Please edit documentation in R/abs_age_education.R +\name{abs_age_education_state} +\alias{abs_age_education_state} +\alias{abs-age-education} +\alias{abs_age_education_lga} \title{Return data on educated population for a given age and state or lga of Australia.} \usage{ -get_data_abs_age_education(age = NULL, state = NULL, lga = NULL) +abs_age_education_state(state = NULL, age = NULL) + +abs_age_education_lga(lga = NULL, age = NULL) } \arguments{ -\item{age}{a numeric or numeric vector denoting ages between 0 to 115. The -default is to return all ages.} - \item{state}{target Australian state name or a vector with multiple state names in its abbreviated form, such as "QLD", "NSW", or "TAS"} +\item{age}{a numeric or numeric vector denoting ages between 0 to 115. The +default is to return all ages.} + \item{lga}{target Australian local government area (LGA) name, such as "Fairfield (C)" or a vector with multiple lga names. See \code{\link[=abs_lga_lookup]{abs_lga_lookup()}} for list of lga names.} @@ -27,8 +31,9 @@ Return data on educated population for a given age and state or lga of Australia. } \examples{ -get_data_abs_age_education(state="VIC") -get_data_abs_age_education(state="WA",lga="Albany (C)",age=1:5) -get_data_abs_age_education(state=c("QLD","TAS"),age=5) -get_data_abs_age_education(lga=c("Albury (C)","Barcoo (S)"),age=10) +abs_age_education_state(state = "VIC") +abs_age_education_state(state = "WA", age = 1:5) +abs_age_education_state(state = c("QLD", "TAS"), age = 5) +abs_age_education_lga(lga = c("Albury (C)", "Barcoo (S)"), age = 10) + } diff --git a/man/get_data_abs_age_work.Rd b/man/abs-age-work.Rd similarity index 66% rename from man/get_data_abs_age_work.Rd rename to man/abs-age-work.Rd index 8b13b076..9725f76b 100644 --- a/man/get_data_abs_age_work.Rd +++ b/man/abs-age-work.Rd @@ -1,22 +1,26 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_data_abs_age_work.R -\name{get_data_abs_age_work} -\alias{get_data_abs_age_work} +% Please edit documentation in R/abs_age_work.R +\name{abs_age_work_lga} +\alias{abs_age_work_lga} +\alias{abs-age-work} +\alias{abs_age_work_state} \title{Return data on employed population for a given age and state or lga of Australia} \usage{ -get_data_abs_age_work(age = NULL, state = NULL, lga = NULL) +abs_age_work_lga(lga = NULL, age = NULL) + +abs_age_work_state(state = NULL, age = NULL) } \arguments{ +\item{lga}{target Australian local government area (LGA) name, such as +"Fairfield (C)" or a vector with multiple lga names. See +\code{\link[=abs_lga_lookup]{abs_lga_lookup()}} for list of lga names.} + \item{age}{a numeric or numeric vector denoting ages between 0 to 115. The default is to return all ages.} \item{state}{target Australian state name or a vector with multiple state names in its abbreviated form, such as "QLD", "NSW", or "TAS"} - -\item{lga}{target Australian local government area (LGA) name, such as -"Fairfield (C)" or a vector with multiple lga names. See -\code{\link[=abs_lga_lookup]{abs_lga_lookup()}} for list of lga names.} } \value{ data set with information on the number of employed people belonging @@ -27,8 +31,9 @@ Return data on employed population for a given age and state or lga of Australia } \examples{ -get_data_abs_age_work(state="NSW") -get_data_abs_age_work(state="WA",lga="Albany (C)",age=1:5) -get_data_abs_age_work(state=c("QLD","TAS"),age=5) -get_data_abs_age_work(lga=c("Albury (C)","Barcoo (S)"),age=39) +abs_age_work_state(state = "NSW") +abs_age_work_state(state = c("QLD", "TAS"), age = 5) +abs_age_work_lga(lga = "Albany (C)", age = 1:5) +abs_age_work_lga(lga = c("Albury (C)", "Barcoo (S)"), age = 39) + } diff --git a/man/abbreviate_states.Rd b/man/abs_abbreviate_states.Rd similarity index 61% rename from man/abbreviate_states.Rd rename to man/abs_abbreviate_states.Rd index a40d3e6c..6b17b2c5 100644 --- a/man/abbreviate_states.Rd +++ b/man/abs_abbreviate_states.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/abbreviate_states.R -\name{abbreviate_states} -\alias{abbreviate_states} +\name{abs_abbreviate_states} +\alias{abs_abbreviate_states} \title{Abbreviate Australian State Names} \usage{ -abbreviate_states(state_names) +abs_abbreviate_states(state_names) } \arguments{ \item{state_names}{vector of state names in long form} @@ -17,9 +17,9 @@ Given a full name (Title Case) of an Australian State or Territory, produces the abbreviated state name. } \examples{ -abbreviate_states("Victoria") -abbreviate_states(c("Victoria", "Queensland")) +abs_abbreviate_states("Victoria") +abs_abbreviate_states(c("Victoria", "Queensland")) } \seealso{ -\code{\link[=unabbreviate_states]{unabbreviate_states()}} +\code{\link[=abs_unabbreviate_states]{abs_unabbreviate_states()}} } diff --git a/man/abs_age_data.Rd b/man/abs_age_data.Rd index 033c1a41..6a3ef389 100644 --- a/man/abs_age_data.Rd +++ b/man/abs_age_data.Rd @@ -12,19 +12,19 @@ abs_age_lga(lga_name) abs_age_state(state_name) } \arguments{ -\item{lga_name}{lga name - can be a partial match, e.g., although the official name might be "Albury (C)", "Albury" is fine. It must also match exactly one LGA. See \code{\link[=check_lga_name]{check_lga_name()}} for more details.} +\item{lga_name}{lga name - can be a partial match, e.g., although the official name might be "Albury (C)", "Albury" is fine.} \item{state_name}{shortened state name} } \value{ -dataset of: \code{lga} (or \code{state}), \code{lower.age.limit}, \code{year}, -and \code{population}. +a \code{conmat_population} dataset containing: \code{lga} (or \code{state}), +\code{lower.age.limit}, \code{year}, and \code{population}. } \description{ Return Australian Bureau of Statistics (ABS) age population data for a given Local Government Area (LGA) or state } \examples{ -abs_age_lga("Albury (C)") -abs_age_state("NSW") +abs_age_lga(c("Albury (C)", "Fairfield (C)")) +abs_age_state(c("NSW", "VIC")) } diff --git a/man/abs_avg_school.Rd b/man/abs_avg_school.Rd new file mode 100644 index 00000000..9575806d --- /dev/null +++ b/man/abs_avg_school.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data-abs-avg-work-school.R +\docType{data} +\name{abs_avg_school} +\alias{abs_avg_school} +\title{ABS education data for 2016} +\format{ +A data frame with 116 rows and 2 variables: +\describe{ +\item{age}{0 to 115} +\item{school_fraction}{fraction of population at school} +} +} +\source{ +{Census of Population and Housing, 2016, TableBuilder} +} +\usage{ +abs_avg_school +} +\description{ +An internal dataset containing Australian Bureau of Statistics education data for +each age in 2016. The data is averaged across each state to provide an +overall average, and is used to provide estimated education populations for +model fitting in \code{\link[=add_school_work_participation]{add_school_work_participation()}}, which is used in \code{\link[=fit_single_contact_model]{fit_single_contact_model()}}. The data is summarised from \code{data_abs_state_education}, +see \code{?data_abs_state_education} for more details. +} +\keyword{datasets} diff --git a/man/abs_avg_work.Rd b/man/abs_avg_work.Rd new file mode 100644 index 00000000..9c80a9c1 --- /dev/null +++ b/man/abs_avg_work.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data-abs-avg-work-school.R +\docType{data} +\name{abs_avg_work} +\alias{abs_avg_work} +\title{ABS work data for 2016} +\format{ +A data frame with 116 rows and 2 variables: +\describe{ +\item{age}{0 to 115} +\item{work_fraction}{fraction of population working.} +} +} +\source{ +{Census of Population and Housing, 2016, TableBuilder} +} +\usage{ +abs_avg_work +} +\description{ +An internal dataset containing Australian Bureau of Statistics work data for +each age in 2016. The data is averaged across each state to provide an +overall average, and is used to provide estimated work populations for +model fitting in \code{\link[=add_school_work_participation]{add_school_work_participation()}}, which is used in \code{\link[=fit_single_contact_model]{fit_single_contact_model()}}. The data is summarised from \code{data_abs_state_work}, +see \code{?data_abs_state_work} for more details. +} +\keyword{datasets} diff --git a/man/abs_unabbreviate_states.Rd b/man/abs_unabbreviate_states.Rd new file mode 100644 index 00000000..4cfb8afa --- /dev/null +++ b/man/abs_unabbreviate_states.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/abbreviate_states.R +\name{abs_unabbreviate_states} +\alias{abs_unabbreviate_states} +\title{Un-abbreviate Australian state names} +\usage{ +abs_unabbreviate_states(state_names) +} +\arguments{ +\item{state_names}{vector of state names in short form} +} +\value{ +Longer state names +} +\description{ +Un-abbreviate Australian state names +} +\examples{ +abs_unabbreviate_states("VIC") +abs_unabbreviate_states(c("VIC", "QLD")) +} +\seealso{ +\code{\link[=abs_abbreviate_states]{abs_abbreviate_states()}} +} diff --git a/man/accessors.Rd b/man/accessors.Rd new file mode 100644 index 00000000..df4e7ef8 --- /dev/null +++ b/man/accessors.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/conmat-population.R +\name{age} +\alias{age} +\alias{age_label} +\alias{age_label.default} +\alias{age_label.conmat_population} +\alias{population_label} +\alias{population_label.default} +\alias{population_label.conmat_population} +\alias{population} +\title{Accessing conmat attributes} +\usage{ +age(x) + +age_label(x) + +\method{age_label}{default}(x) + +\method{age_label}{conmat_population}(x) + +population_label(x) + +\method{population_label}{default}(x) + +\method{population_label}{conmat_population}(x) + +population(x) +} +\arguments{ +\item{x}{conmat_population data frame} +} +\value{ +age or population symbol or label +} +\description{ +Accessing conmat attributes +} +\examples{ +\dontrun{ +perth <- abs_age_lga("Perth (C)") +age(perth) +age_label(perth) +population(perth) +population_label(perth) +} +} diff --git a/man/add_intergenerational.Rd b/man/add_intergenerational.Rd index c0766e3a..ebdf0630 100644 --- a/man/add_intergenerational.Rd +++ b/man/add_intergenerational.Rd @@ -22,5 +22,4 @@ polymod_contact <- get_polymod_contact_data() polymod_contact \%>\% add_intergenerational() - } diff --git a/man/add_modelling_features.Rd b/man/add_modelling_features.Rd index cd1ed7cb..f5e60811 100644 --- a/man/add_modelling_features.Rd +++ b/man/add_modelling_features.Rd @@ -4,13 +4,31 @@ \alias{add_modelling_features} \title{Add features required for modelling to the dataset} \usage{ -add_modelling_features(contact_data, ...) +add_modelling_features( + contact_data, + school_demographics = NULL, + work_demographics = NULL, + population = get_polymod_population() +) } \arguments{ \item{contact_data}{contact data with columns \code{age_to} and \code{age_from}} -\item{...}{extra dots passed to \code{population} argument of -\code{\link[=add_population_age_to]{add_population_age_to()}}} +\item{school_demographics}{(optional) defaults to census average proportion +at school. You can provide a dataset with columns, "age" (numeric), and +"school_fraction" (0-1), if you would like to specify these +details. See \code{abs_avg_school} for the default values. If you would like to +use the original school demographics used in conmat, these are provided in +the dataset, \code{conmat_original_school_demographics}.} + +\item{work_demographics}{(optional) defaults to census average proportion +employed. You can provide a dataset with columns, "age" (numeric), and +"work_fraction", if you would like to specify these details. See +\code{abs_avg_work} for the default values. If you would like to +use the original work demographics used in conmat, these are provided in +the dataset, \code{conmat_original_work_demographics}.} + +\item{population}{the \code{population} argument of \code{\link[=add_population_age_to]{add_population_age_to()}}} } \value{ data frame with 11 extra columns - the contents of \code{contact_data}, @@ -30,7 +48,9 @@ features it adds are described below: representing the age of the person who had contact. It creates a column called \code{pop_age_to}. \code{\link[=add_population_age_to]{add_population_age_to()}} takes an extra argument for population, which defaults to \code{\link[=get_polymod_population]{get_polymod_population()}}, but needs -to be a data frame with columns, \code{lower.age.limit}, and \code{population}. +to be a \code{conmat_population} object, which specifies the \code{age} and +\code{population} characteristics, or a data frame with columns, +\code{lower.age.limit}, and \code{population}. \item School work participation, which is from the function \code{\link[=add_school_work_participation]{add_school_work_participation()}}. This requires columns \code{age_to} and \code{age_from}, but will operate on any column starting with \code{age} and adds @@ -49,8 +69,14 @@ age_max <- 15 all_ages <- age_min:age_max library(tidyr) example_df <- expand_grid( - age_from = all_ages, - age_to = all_ages, - ) -add_modelling_features(example_df) + age_from = all_ages, + age_to = all_ages, +) +add_modelling_features(example_df) +add_modelling_features( + example_df, + school_demographics = conmat_original_school_demographics, + work_demographics = conmat_original_work_demographics +) + } diff --git a/man/add_offset.Rd b/man/add_offset.Rd index fd2a4656..8f926d2c 100644 --- a/man/add_offset.Rd +++ b/man/add_offset.Rd @@ -47,13 +47,13 @@ age_max <- 15 all_ages <- age_min:age_max library(tidyr) example_df <- expand_grid( - age_from = all_ages, - age_to = all_ages, - ) -example_df \%>\% - add_population_age_to() \%>\% - add_school_work_participation() \%>\% - add_offset() + age_from = all_ages, + age_to = all_ages, +) +example_df \%>\% + add_population_age_to() \%>\% + add_school_work_participation() \%>\% + add_offset() } \author{ Nick Golding diff --git a/man/add_population_age_to.Rd b/man/add_population_age_to.Rd index 237ed0fa..20379d60 100644 --- a/man/add_population_age_to.Rd +++ b/man/add_population_age_to.Rd @@ -9,8 +9,10 @@ add_population_age_to(contact_data, population = get_polymod_population()) \arguments{ \item{contact_data}{contact data containing columns \code{age_to} and \code{age_from}} -\item{population}{Defaults to \code{\link[=get_polymod_population]{get_polymod_population()}}, but can be any -data frame with columns, \code{lower.age.limit}, and \code{population}.} +\item{population}{Defaults to \code{\link[=get_polymod_population]{get_polymod_population()}}, a +\code{conmat_population} object, which specifies the \code{age} and \code{population} +columns. But it can optionally be any data frame with columns, +\code{lower.age.limit}, and \code{population}.} } \value{ data frame @@ -19,12 +21,13 @@ data frame Adds the population distribution of contact ages. Requires a column called "age_to", representing the contact age - the age of the person who had contact. It creates a column, \code{pop_age_to}. The \code{population} argument -defaults to \code{\link[=get_polymod_population]{get_polymod_population()}}, but can be any data frame with -columns, \code{lower.age.limit}, and \code{population}. If population is 'polymod' -then use the participant-weighted average of POLYMOD country/year -distributions. It adds the population via interpolation, using -\code{\link[=get_age_population_function]{get_age_population_function()}} to create a function that generates -population from ages. +defaults to \code{\link[=get_polymod_population]{get_polymod_population()}}, which is a \code{conmat_population} +object, which has \code{age} and \code{population} specified. But this can also be +a data frame with columns, \code{lower.age.limit}, and \code{population}. If +population is 'polymod' then use the participant-weighted average of +POLYMOD country/year distributions. It adds the population via +interpolation, using \code{\link[=get_age_population_function]{get_age_population_function()}} to create a +function that generates population from ages. } \examples{ age_min <- 10 @@ -32,8 +35,8 @@ age_max <- 15 all_ages <- age_min:age_max library(tidyr) example_df <- expand_grid( - age_from = all_ages, - age_to = all_ages, - ) + age_from = all_ages, + age_to = all_ages, +) add_population_age_to(example_df) } diff --git a/man/add_school_work_participation.Rd b/man/add_school_work_participation.Rd index cb16d350..4d288e3a 100644 --- a/man/add_school_work_participation.Rd +++ b/man/add_school_work_participation.Rd @@ -5,11 +5,29 @@ \title{Add columns describing the fractions of the population in each age group that attend school/work (average FTE)} \usage{ -add_school_work_participation(contact_data) +add_school_work_participation( + contact_data, + school_demographics = NULL, + work_demographics = NULL +) } \arguments{ \item{contact_data}{contact data containing columns: \code{age_to}, \code{age_from}, and \code{pop_age_to} (from \code{\link[=add_population_age_to]{add_population_age_to()}})} + +\item{school_demographics}{(optional) defaults to census average proportion +at school. You can provide a dataset with columns, "age" (numeric), and +"school_fraction" (0-1), if you would like to specify these +details. See \code{abs_avg_school} for the default values. If you would like to +use the original school demographics used in conmat, these are provided in +the dataset, \code{conmat_original_school_demographics}.} + +\item{work_demographics}{(optional) defaults to census average proportion +employed. You can provide a dataset with columns, "age" (numeric), and +"work_fraction", if you would like to specify these details. See +\code{abs_avg_work} for the default values. If you would like to +use the original work demographics used in conmat, these are provided in +the dataset, \code{conmat_original_work_demographics}.} } \value{ dataset with 9 extra columns: school_fraction_age_from, @@ -36,7 +54,8 @@ not affect classroom contacts (which due to cohorting and regularised class sizes are unlikely to depend on the population age distribution). } \note{ -this uses fake data that will get replaced with abs data input soon +To use previous approach input the arguments \code{school_demographics} and +\code{work_demographics} with \code{conmat_original_school_demographics} and \code{conmat_original_work_demographics}, respectively. } \examples{ age_min <- 10 @@ -44,11 +63,18 @@ age_max <- 15 all_ages <- age_min:age_max library(tidyr) example_df <- expand_grid( - age_from = all_ages, - age_to = all_ages, - ) - -example_df \%>\% - add_population_age_to() \%>\% - add_school_work_participation() + age_from = all_ages, + age_to = all_ages, +) + +example_df \%>\% + add_population_age_to() \%>\% + add_school_work_participation() + +example_df \%>\% + add_population_age_to() \%>\% + add_school_work_participation( + school_demographics = conmat_original_school_demographics, + work_demographics = conmat_original_work_demographics + ) } diff --git a/man/add_symmetrical_features.Rd b/man/add_symmetrical_features.Rd new file mode 100644 index 00000000..8006527e --- /dev/null +++ b/man/add_symmetrical_features.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add_symmetrical_features.R +\name{add_symmetrical_features} +\alias{add_symmetrical_features} +\title{Add symmetrical, age based features} +\usage{ +add_symmetrical_features(data) +} +\arguments{ +\item{data}{data.frame with columns, \code{age_from}, and \code{age_to}} +} +\value{ +data.frame with 6 more columns, \code{gam_age_offdiag}, \code{gam_age_offdiag_2}, \code{gam_age_diag_prod}, \code{gam_age_diag_sum}, \code{gam_age_pmax}, \code{gam_age_pmin}, +} +\description{ +This function adds 6 columns to assist with describing +various age based interactions for model fitting. Requires that the +age columns are called "age_from", and "age_to" +} +\examples{ +vec_age <- 0:2 +dat_age <- expand.grid( + age_from = vec_age, + age_to = vec_age +) + +add_symmetrical_features(dat_age) + +} diff --git a/man/age_breaks.Rd b/man/age_breaks.Rd new file mode 100644 index 00000000..b3ebde90 --- /dev/null +++ b/man/age_breaks.Rd @@ -0,0 +1,97 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constructors.R +\name{age_breaks} +\alias{age_breaks} +\alias{age_breaks.conmat_age_matrix} +\alias{age_breaks.conmat_setting_prediction_matrix} +\alias{age_breaks.setting_data} +\alias{age_breaks.ngm_setting_matrix} +\alias{age_breaks.setting_vaccination_matrix} +\alias{age_breaks.numeric} +\alias{age_breaks.matrix} +\alias{age_breaks.array} +\alias{age_breaks.predicted_contacts} +\alias{age_breaks.transmission_probability_matrix} +\alias{age_breaks.setting_contact_model} +\alias{age_breaks.default} +\title{Extract age break attribute information} +\usage{ +age_breaks(x) + +\method{age_breaks}{conmat_age_matrix}(x) + +\method{age_breaks}{conmat_setting_prediction_matrix}(x) + +\method{age_breaks}{setting_data}(x) + +\method{age_breaks}{ngm_setting_matrix}(x) + +\method{age_breaks}{setting_vaccination_matrix}(x) + +\method{age_breaks}{numeric}(x) + +\method{age_breaks}{matrix}(x) + +\method{age_breaks}{array}(x) + +\method{age_breaks}{predicted_contacts}(x) + +\method{age_breaks}{transmission_probability_matrix}(x) + +\method{age_breaks}{setting_contact_model}(x) + +\method{age_breaks}{default}(x) +} +\arguments{ +\item{x}{an object containing age break information} +} +\value{ +age breaks character vector +} +\description{ +Extract age break attribute information +} +\section{Methods (by class)}{ +\itemize{ +\item \code{age_breaks(conmat_age_matrix)}: Get age break information + +\item \code{age_breaks(conmat_setting_prediction_matrix)}: Get age break information + +\item \code{age_breaks(setting_data)}: Get age break information + +\item \code{age_breaks(ngm_setting_matrix)}: Get age break information + +\item \code{age_breaks(setting_vaccination_matrix)}: Get age break information + +\item \code{age_breaks(numeric)}: Get age break information + +\item \code{age_breaks(matrix)}: Get age break information + +\item \code{age_breaks(array)}: Get age break information + +\item \code{age_breaks(predicted_contacts)}: Get age break information + +\item \code{age_breaks(transmission_probability_matrix)}: Get age break information + +\item \code{age_breaks(setting_contact_model)}: Get age break information + +\item \code{age_breaks(default)}: Get age break information + +}} +\examples{ +age_breaks <- c(0, 5, 19, 15) +age_break_names <- c("[0,5)", "[5,10)", "[10, 15)") +age_mat <- matrix( + runif(9), + nrow = 3, + ncol = 3, + dimnames = list( + age_break_names, + age_break_names + ) +) + +age_mat <- new_age_matrix(age_mat, age_breaks) + +age_breaks(age_mat) +} diff --git a/man/age_population.Rd b/man/age_population.Rd index b89afbc9..c226bfa3 100644 --- a/man/age_population.Rd +++ b/man/age_population.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/age-population-year.R +% Please edit documentation in R/age-population.R \name{age_population} \alias{age_population} \title{Get cleaned population data with lower and upper limits of age.} diff --git a/man/aggregate_predicted_contacts.Rd b/man/aggregate_predicted_contacts.Rd index 726d0f0b..341a8d80 100644 --- a/man/aggregate_predicted_contacts.Rd +++ b/man/aggregate_predicted_contacts.Rd @@ -16,8 +16,9 @@ by in other year breaks). Data must contain columns, \code{age_from}, \code{age_ \code{contacts}, and \code{se_contacts}, which is the same output as \code{\link[=predict_contacts_1y]{predict_contacts_1y()}} - see examples below.} -\item{population}{population with columns \code{lower.age.limit}, and -\code{population}. See examples below.} +\item{population}{a \code{conmat_population} object, which has the \code{age} and +\code{population} columns specified, or a dataframe with columns +\code{lower.age.limit}, and \code{population}. See examples below.} \item{age_breaks}{vector of ages. Default: c(seq(0, 75, by = 5), Inf)} } @@ -33,25 +34,27 @@ it will return age groups as 0-5, 5-10, 10-15, and 15+ (Inf). Used internally within \code{\link[=predict_contacts]{predict_contacts()}}, although can be used by users. } \examples{ - fairfield_abs_data <- abs_age_lga("Fairfield (C)") - - fairfield_abs_data - - # We can predict the contact rate for Fairfield from the existing contact - # data, say, between the age groups of 0-15 in 5 year bins for school: - +fairfield <- abs_age_lga("Fairfield (C)") + +fairfield + +# We can predict the contact rate for Fairfield from the existing contact +# data, say, between the age groups of 0-15 in 5 year bins for school: + fairfield_contacts_1 <- predict_contacts_1y( model = polymod_setting_models$home, - population = fairfield_abs_data, + population = fairfield, age_min = 0, age_max = 15 ) - - fairfield_contacts_1 - + +fairfield_contacts_1 + aggregated_fairfield <- aggregate_predicted_contacts( predicted_contacts_1y = fairfield_contacts_1, - population = fairfield_abs_data, - age_breaks = c(0, 5, 10, 15,Inf) - ) + population = fairfield, + age_breaks = c(0, 5, 10, 15, Inf) +) + +aggregated_fairfield } diff --git a/man/apply_vaccination.Rd b/man/apply_vaccination.Rd index c1051b64..c15cddfe 100644 --- a/man/apply_vaccination.Rd +++ b/man/apply_vaccination.Rd @@ -36,7 +36,7 @@ transmission in each age group. } \details{ Vaccination improves a person's immunity from a disease. When a -sizable section of the population receives vaccinations or when vaccine +sizeable section of the population receives vaccinations or when vaccine coverage is sufficient enough, the likelihood that the unvaccinated population will contract the disease is decreased. This helps to slow infectious disease spread as well as lessen its severity. For this reason, @@ -56,14 +56,44 @@ transmission reduction matrix and the next generation matrices passed to the function as an argument. } \examples{ - +# examples take 20 second to run so skipping +\dontrun{ # example data frame with vaccine coverage, acquisition and transmission # efficacy of different age groups vaccination_effect_example_data # Generate next generation matrices + +perth <- abs_age_lga("Perth (C)") +perth_hh <- get_abs_per_capita_household_size(lga = "Perth (C)") + +age_breaks_0_80 <- c(seq(0, 80, by = 5), Inf) + +# refit the model - note that the default if age_breaks isn't specified is +# 0 to 75 +perth_contact_0_80 <- extrapolate_polymod( + perth, + per_capita_household_size = perth_hh, + age_breaks = age_breaks_0_80 +) + +perth_ngm_0_80 <- generate_ngm(perth_contact_0_80, + age_breaks = age_breaks_0_80, + per_capita_household_size = perth_hh, + R_target = 1.5 +) + +# In the old way we used to be able to pass age_breaks_0_80 along +generate_ngm_oz( + lga_name = "Perth (C)", + age_breaks = age_breaks_0_80, + R_target = 1.5 +) + + +# another way to do this using the previous method for generating NGMs # The number of age breaks must match the vaccination effect data -ngm_nsw <- generate_ngm( +ngm_nsw <- generate_ngm_oz( state_name = "NSW", age_breaks = c(seq(0, 80, by = 5), Inf), R_target = 1.5 @@ -77,5 +107,5 @@ ngm_nsw_vacc <- apply_vaccination( acquisition_col = acquisition, transmission_col = transmission ) - +} } diff --git a/man/as_conmat_population.Rd b/man/as_conmat_population.Rd new file mode 100644 index 00000000..31272b0b --- /dev/null +++ b/man/as_conmat_population.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/conmat-population.R +\name{as_conmat_population} +\alias{as_conmat_population} +\alias{as_conmat_population.default} +\alias{as_conmat_population.data.frame} +\alias{as_conmat_population.list} +\alias{as_conmat_population.grouped_df} +\title{Convert to conmat population} +\usage{ +as_conmat_population(data, ...) + +\method{as_conmat_population}{default}(data, ...) + +\method{as_conmat_population}{data.frame}(data, age, population, ...) + +\method{as_conmat_population}{list}(data, age, population, ...) + +\method{as_conmat_population}{grouped_df}(data, age, population, ...) +} +\arguments{ +\item{data}{data.frame} + +\item{...}{extra arguments} + +\item{age}{age column - an unquoted variable of numeric integer ages} + +\item{population}{population column - an unquoted variable, numeric value} +} +\description{ +Convert to conmat population +} +\examples{ +some_age_pop <- data.frame( + age = 1:10, + pop = 101:110 +) + +some_age_pop + +as_conmat_population( + some_age_pop, + age = age, + population = pop +) +} diff --git a/man/as_setting_prediction_matrix.Rd b/man/as_setting_prediction_matrix.Rd new file mode 100644 index 00000000..c700fb19 --- /dev/null +++ b/man/as_setting_prediction_matrix.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/setting-prediction-matrix.R +\name{as_setting_prediction_matrix} +\alias{as_setting_prediction_matrix} +\title{Coerce object to a setting prediction matrix} +\usage{ +as_setting_prediction_matrix(list_matrix, age_breaks, ...) +} +\arguments{ +\item{list_matrix}{list of matrices} + +\item{age_breaks}{numeric vector of ages} + +\item{...}{extra arguments (currently not used)} +} +\value{ +object of class setting prediction matrix +} +\description{ +This will also calculate an \code{all} matrix, if \code{all} is not specified. This +is the sum of all other matrices. +} +\examples{ + +age_breaks_0_80_plus <- c(seq(0, 80, by = 10), Inf) +one_by_nine <- matrix(1, nrow = 9, ncol = 9) + +mat_list <- list( + home = one_by_nine, + work = one_by_nine +) + +mat_list + +mat_set <- as_setting_prediction_matrix( + mat_list, + age_breaks = age_breaks_0_80_plus +) + +mat_set + +} diff --git a/man/autoplot-conmat.Rd b/man/autoplot-conmat.Rd new file mode 100644 index 00000000..89c528db --- /dev/null +++ b/man/autoplot-conmat.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/autoplot.R +\name{autoplot-conmat} +\alias{autoplot-conmat} +\alias{autoplot.conmat_age_matrix} +\alias{autoplot.conmat_setting_prediction_matrix} +\alias{autoplot.transmission_probability_matrix} +\alias{autoplot.ngm_setting_matrix} +\alias{autoplot.setting_vaccination_matrix} +\title{Plot setting matrices using ggplot2} +\usage{ +\method{autoplot}{conmat_age_matrix}(object, ..., title = "Contact Matrices") + +\method{autoplot}{conmat_setting_prediction_matrix}(object, ..., title = "Setting-specific synthetic contact matrices") + +\method{autoplot}{transmission_probability_matrix}( + object, + ..., + title = "Setting-specific transmission probability matrices" +) + +\method{autoplot}{ngm_setting_matrix}(object, ..., title = "Setting-specific NGM matrices") + +\method{autoplot}{setting_vaccination_matrix}(object, ..., title = "Setting-specific vaccination matrices") +} +\arguments{ +\item{object}{A matrix or a list of square matrices, with row and column names +indicating the age groups.} + +\item{...}{Other arguments passed on} + +\item{title}{Title to give to plot setting matrices. Defaults are provided for certain objects} +} +\value{ +a ggplot visualisation of contact rates +} +\description{ +Plot setting matrices using ggplot2 +} +\examples{ +\dontrun{ +if (interactive()) { + polymod_contact_data <- get_polymod_setting_data() + polymod_survey_data <- get_polymod_population() + + setting_models <- fit_setting_contacts( + contact_data_list = polymod_contact_data, + population = polymod_survey_data + ) + + fairfield <- abs_age_lga("Fairfield (C)") + + fairfield_hh_size <- + get_abs_per_capita_household_size(lga = "Fairfield (C)") + + synthetic_settings_5y_fairfield_hh <- predict_setting_contacts( + population = fairfield, + contact_model = setting_models, + age_breaks = c(seq(0, 85, by = 5), Inf), + per_capita_household_size = fairfield_hh_size + ) + + # Plotting synthetic contact matrices across all settings + + autoplot( + object = synthetic_settings_5y_fairfield_hh, + title = "Setting specific synthetic contact matrices" + ) + + # Work setting specific synthetic contact matrices + autoplot( + object = synthetic_settings_5y_fairfield_hh$work, + title = "Work" + ) +} +} +} diff --git a/man/check_dimensions.Rd b/man/check_dimensions.Rd new file mode 100644 index 00000000..8ef2d14b --- /dev/null +++ b/man/check_dimensions.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/checkers.R +\name{check_dimensions} +\alias{check_dimensions} +\title{Check dimensions} +\usage{ +check_dimensions(ngm, data) +} +\arguments{ +\item{ngm}{list with next generation matrices at different settings} + +\item{data}{data frame} +} +\description{ +An internal function used within \code{\link[=apply_vaccination]{apply_vaccination()}} to warn users of incompatible dimensions of +data and the next generation matrices +} +\keyword{internal} diff --git a/man/check_if_list.Rd b/man/check_if_list.Rd new file mode 100644 index 00000000..45e2b425 --- /dev/null +++ b/man/check_if_list.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/checkers.R +\name{check_if_list} +\alias{check_if_list} +\title{Check if data is a list} +\usage{ +check_if_list(contact_data) +} +\arguments{ +\item{contact_data}{data on the contacts between two ages at different settings} +} +\description{ +Check if data is a list +} +\keyword{internal} diff --git a/man/check_lga_name.Rd b/man/check_lga_name.Rd deleted file mode 100644 index b84fd623..00000000 --- a/man/check_lga_name.Rd +++ /dev/null @@ -1,35 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/checkers.R -\name{check_lga_name} -\alias{check_lga_name} -\title{Check LGA name in Australia} -\usage{ -check_lga_name(lga_name, multiple_lga = FALSE) -} -\arguments{ -\item{lga_name}{a string denoting the official name of Local Government Area. -For example, 'Albury (C).'} - -\item{multiple_lga}{logical response that allows multiple lgas to be checked -if set to \code{TRUE}. Default is FALSE.} -} -\value{ -errors if LGA name not in ABS data, otherwise returns nothing -} -\description{ -Check LGA name in Australia -} -\examples{ -# returns nothing -check_lga_name("Fairfield (C)") -# if you want to check multiple LGAs you must use the `multiple_lga` flag. -check_lga_name(lga_name = c("Fairfield (C)", "Sydney (C)"), - multiple_lga = TRUE) -# this will error, so isn't run -\dontrun{ -# don't set the `multiple_lga` flag -check_lga_name(lga_name = c("Fairfield (C)", "Sydney (C)")) -# not a fully specified LGA -check_lga_name("Fairfield") -} -} diff --git a/man/check_school_demographics.Rd b/man/check_school_demographics.Rd new file mode 100644 index 00000000..6a0f175c --- /dev/null +++ b/man/check_school_demographics.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/check_work_school_demographics.R +\name{check_school_demographics} +\alias{check_school_demographics} +\title{Check School Demographics} +\usage{ +check_school_demographics(school_demographics) +} +\arguments{ +\item{school_demographics}{school data} +} +\description{ +Check School Demographics +} +\author{ +njtierney +} +\keyword{internal} diff --git a/man/check_work_demographics.Rd b/man/check_work_demographics.Rd new file mode 100644 index 00000000..0724151b --- /dev/null +++ b/man/check_work_demographics.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/check_work_school_demographics.R +\name{check_work_demographics} +\alias{check_work_demographics} +\title{Check Work Demographics} +\usage{ +check_work_demographics(work_demographics) +} +\arguments{ +\item{work_demographics}{work data} +} +\description{ +Check Work Demographics +} +\author{ +njtierney +} +\keyword{internal} diff --git a/man/clean_age_population_year.Rd b/man/clean_age_population_year.Rd index 5888ff39..fbd7e4ea 100644 --- a/man/clean_age_population_year.Rd +++ b/man/clean_age_population_year.Rd @@ -27,8 +27,8 @@ clean_age_population_year( \item{year}{year to filter to. If not specified, gives all years.} } \value{ -data frame with lower.age.limit and upper.age.limit and optionally -filtered down to specific location or year. +data frame with \code{lower.age.limit} and \code{upper.age.limit} and +optionally filtered down to specific location or year. } \description{ Internally used function within \code{\link[=age_population]{age_population()}} to separate age groups diff --git a/man/conmat-package.Rd b/man/conmat-package.Rd index 0505c288..570456bf 100644 --- a/man/conmat-package.Rd +++ b/man/conmat-package.Rd @@ -15,6 +15,7 @@ Authors: \itemize{ \item Nick Golding \email{nick.golding.research@gmail.com} (\href{https://orcid.org/0000-0001-8916-5570}{ORCID}) \item Aarathy Babu \email{aarathybabu907@gmail.com} (\href{https://orcid.org/0000-0002-6982-5989}{ORCID}) + \item Michael Lydeamore \email{michael.lydeamore@monash.edu} (\href{https://orcid.org/0000-0001-6515-827X}{ORCID}) } Other contributors: diff --git a/man/conmat_original_school_demographics.Rd b/man/conmat_original_school_demographics.Rd new file mode 100644 index 00000000..5c87516c --- /dev/null +++ b/man/conmat_original_school_demographics.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data_original_school_work.R +\docType{data} +\name{conmat_original_school_demographics} +\alias{conmat_original_school_demographics} +\title{Original school demographics for conmat} +\format{ +A data frame with 121 rows and 2 variables: +\describe{ +\item{age}{0 to 120} +\item{school_fraction}{fraction of population at school} +} +} +\source{ +{Census of Population and Housing, 2016, TableBuilder} +} +\usage{ +conmat_original_school_demographics +} +\description{ +An internal dataset containing the original estimates of which fraction of +ages were attending school in Australia. These can be used inside of +\code{\link[=fit_single_contact_model]{fit_single_contact_model()}} and \code{\link[=fit_setting_contacts]{fit_setting_contacts()}}. +} +\keyword{datasets} diff --git a/man/conmat_original_work_demographics.Rd b/man/conmat_original_work_demographics.Rd new file mode 100644 index 00000000..eec3bb92 --- /dev/null +++ b/man/conmat_original_work_demographics.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data_original_school_work.R +\docType{data} +\name{conmat_original_work_demographics} +\alias{conmat_original_work_demographics} +\title{Original work demographics for conmat} +\format{ +A data frame with 121 rows and 2 variables: +\describe{ +\item{age}{0 to 120} +\item{work_fraction}{fraction of population working.} +} +} +\source{ +{Census of Population and Housing, 2016, TableBuilder} +} +\usage{ +conmat_original_work_demographics +} +\description{ +An internal dataset containing the original estimates of which fraction of +ages were working in Australia. These can be used inside of +\code{\link[=fit_single_contact_model]{fit_single_contact_model()}} and \code{\link[=fit_setting_contacts]{fit_setting_contacts()}}. +} +\keyword{datasets} diff --git a/man/conmat_population.Rd b/man/conmat_population.Rd new file mode 100644 index 00000000..15a5e8e8 --- /dev/null +++ b/man/conmat_population.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/conmat-population.R +\name{conmat_population} +\alias{conmat_population} +\title{Define a conmat population} +\usage{ +conmat_population(data, age, population) +} +\arguments{ +\item{data}{data.frame} + +\item{age}{bare name representing the age column} + +\item{population}{bare name representing the population column} +} +\value{ +a data frame with age and population attributes +} +\description{ +A conmat population is a dataframe that stores which columns represent the +age and population information. This is useful as it means we can refer +to this information throughout other functions in the conmat package +without needing to specify or hard code which columns represent the age +and population information. +} +\examples{ +perth <- abs_age_lga("Perth (C)") +} diff --git a/man/davies_age_extended.Rd b/man/davies_age_extended.Rd index ef29f2b5..c42d4a1d 100644 --- a/man/davies_age_extended.Rd +++ b/man/davies_age_extended.Rd @@ -8,7 +8,7 @@ A data frame of the probability of transmission from a case to a contact. There are 101 rows and 4 variables. \describe{ \item{age}{from 0 to 100} -\item{clinical_fraction}{Estimate of fraction with clinical symptoms, or the age-specific proportion of infections resulting in clinical sympttoms inferred by applying a smoothing spline to the mean estimates from Davies et al. } +\item{clinical_fraction}{Estimate of fraction with clinical symptoms, or the age-specific proportion of infections resulting in clinical symptoms inferred by applying a smoothing spline to the mean estimates from Davies et al. } \item{davies_original}{Age specific parameters of the relative susceptibility to infection inferred from a smoothing-spline estimate of the mean relative susceptibility estimate from Davies et al.} \item{davies_updated}{Re-estimated parameter of the susceptibility profile for under-16s that is estimated in a similar way but to the age-distribution of infections in England from the UK ONS prevalence survey rather than case counts which may undercount children} diff --git a/man/estimate_setting_contacts.Rd b/man/estimate_setting_contacts.Rd index c9dd9fec..672a0278 100644 --- a/man/estimate_setting_contacts.Rd +++ b/man/estimate_setting_contacts.Rd @@ -9,7 +9,10 @@ estimate_setting_contacts( survey_population, prediction_population = survey_population, age_breaks, - per_capita_household_size = NULL + per_capita_household_size = NULL, + symmetrical = TRUE, + school_demographics = NULL, + work_demographics = NULL ) } \arguments{ @@ -29,8 +32,26 @@ set is survey_population} adjusts the household contact matrix by some per capita household size. To set it, provide a single number, the per capita household size. More information is provided below in Details. See -\code{\link[=get_per_capita_household_size]{get_per_capita_household_size()}} function for a helper for Australian +\code{\link[=get_abs_per_capita_household_size]{get_abs_per_capita_household_size()}} function for a helper for Australian data with a workflow on how to get this number.} + +\item{symmetrical}{whether to enforce symmetrical terms in the model. +Defaults to TRUE. See \code{details} of \code{fit_single_contact_model} for more +information.} + +\item{school_demographics}{(optional) defaults to census average proportion +at school. You can provide a dataset with columns, "age" (numeric), and +"school_fraction" (0-1), if you would like to specify these +details. See \code{abs_avg_school} for the default values. If you would like to +use the original school demographics used in conmat, these are provided in +the dataset, \code{conmat_original_school_demographics}.} + +\item{work_demographics}{(optional) defaults to census average proportion +employed. You can provide a dataset with columns, "age" (numeric), and +"work_fraction", if you would like to specify these details. See +\code{abs_avg_work} for the default values. If you would like to +use the original work demographics used in conmat, these are provided in +the dataset, \code{conmat_original_work_demographics}.} } \value{ predicted setting specific contact matrices, and for all combined @@ -41,18 +62,50 @@ the setting, and assumed to together make up the full set of contacts for individuals in the survey), a representative population distribution for the survey, and a set of age breaks at which to aggregate contacts, return a set of predicted contact matrices for each setting, and for all combined. +Note that this function is parallelisable with \code{future}, and will be +impacted by any \code{future} plans provided. } \examples{ - \dontrun{ # takes a long time to run settings_estimated_contacts <- estimate_setting_contacts( contact_data_list = get_polymod_setting_data(), survey_population = get_polymod_population(), prediction_population = get_polymod_population(), - age_breaks = c(seq(0, 75, by = 5), Inf), + age_breaks = c(seq(0, 85, by = 5), Inf), per_capita_household_size = NULL ) +# or predict to fairfield +fairfield_hh <- get_abs_per_capita_household_size(lga = "Fairfield (C)") +contact_model_pred_est <- estimate_setting_contacts( + contact_data_list = get_polymod_setting_data(), + survey_population = get_polymod_population(), + prediction_population = abs_age_lga("Fairfield (C)"), + age_breaks = c(seq(0, 85, by = 5), Inf), + per_capita_household_size = fairfield_hh +) + +# or use different populations in school or work demographics +fairfield_hh <- get_abs_per_capita_household_size(lga = "Fairfield (C)") +contact_model_pred_est <- estimate_setting_contacts( + contact_data_list = get_polymod_setting_data(), + survey_population = get_polymod_population(), + prediction_population = abs_age_lga("Fairfield (C)"), + age_breaks = c(seq(0, 85, by = 5), Inf), + per_capita_household_size = fairfield_hh, + school_demographics = conmat_original_school_demographics, + work_demographics = conmat_original_work_demographics +) + +# or use non-symmetric model terms +contact_model_pred_est <- estimate_setting_contacts( + contact_data_list = get_polymod_setting_data(), + survey_population = get_polymod_population(), + prediction_population = abs_age_lga("Fairfield (C)"), + age_breaks = c(seq(0, 85, by = 5), Inf), + per_capita_household_size = fairfield_hh, + symmetrical = FALSE +) } } diff --git a/man/extrapolate_polymod.Rd b/man/extrapolate_polymod.Rd index fddb38ef..601b2e19 100644 --- a/man/extrapolate_polymod.Rd +++ b/man/extrapolate_polymod.Rd @@ -11,9 +11,10 @@ extrapolate_polymod( ) } \arguments{ -\item{population}{data set with information on the population of the desired -location - containing \code{lower.age.limit} and \code{population} columns. See -\code{get_polymod_population()} for an example of this data.} +\item{population}{a \code{conmat_population} object, specifying the \code{age} +and \code{population} characteristics. Or a data frame with \code{lower.age.limit} +and \code{population} columns. See \code{get_polymod_population()} for an example +of this data.} \item{age_breaks}{vector depicting age values. Default value is \code{c(seq(0, 75, by = 5), Inf)}} @@ -22,7 +23,7 @@ location - containing \code{lower.age.limit} and \code{population} columns. See adjusts the household contact matrix by some per capita household size. To set it, provide a single number, the per capita household size. More information is provided below in Details. See -\code{\link[=get_per_capita_household_size]{get_per_capita_household_size()}} function for a helper for Australian +\code{\link[=get_abs_per_capita_household_size]{get_abs_per_capita_household_size()}} function for a helper for Australian data with a workflow on how to get this number.} } \value{ @@ -31,7 +32,16 @@ desired ages. } \description{ Uses \code{\link[=estimate_setting_contacts]{estimate_setting_contacts()}} to fit a contact model on the data from -polymod and later extrapolate on to a desired population. +polymod and later extrapolate on to a desired population. Note that this +function is parallelisable with \code{future}, and will be impacted by any +\code{future} plans provided. +} +\details{ +Also note that since this model uses the already fit \code{polymod_setting_models} +data, which has been fit using symmetrical model terms, if you want to +fit a model with asymmetric model terms, you will need to go through +the full process of building new models. You can find this detail in last +section of the vignette "example pipeline". } \examples{ \dontrun{ @@ -44,6 +54,5 @@ synthetic_settings_5y_fairfield <- extrapolate_polymod( population = abs_age_lga("Fairfield (C)") ) synthetic_settings_5y_fairfield -) } } diff --git a/man/eyre_transmission_probabilities.Rd b/man/eyre_transmission_probabilities.Rd index 284339a3..5fc0927a 100644 --- a/man/eyre_transmission_probabilities.Rd +++ b/man/eyre_transmission_probabilities.Rd @@ -59,10 +59,10 @@ eyre_transmission_probabilities \%>\% across( ends_with("age"), ~ factor(.x, - levels = str_sort( - unique(.x), - numeric = TRUE - ) + levels = str_sort( + unique(.x), + numeric = TRUE + ) ) ) ) \%>\% @@ -81,7 +81,6 @@ eyre_transmission_probabilities \%>\% theme( axis.text = element_text(angle = 45, hjust = 1) ) - } } \keyword{datasets} diff --git a/man/figures/README-plot-matrix-differents-1.png b/man/figures/README-plot-matrix-differents-1.png index 909bc66f..15f87ad4 100644 Binary files a/man/figures/README-plot-matrix-differents-1.png and b/man/figures/README-plot-matrix-differents-1.png differ diff --git a/man/fit_setting_contacts.Rd b/man/fit_setting_contacts.Rd index d1fed150..d72d36c5 100644 --- a/man/fit_setting_contacts.Rd +++ b/man/fit_setting_contacts.Rd @@ -2,19 +2,43 @@ % Please edit documentation in R/fit_setting_contacts.R \name{fit_setting_contacts} \alias{fit_setting_contacts} -\title{Fit a contact model to a survey poulation} +\title{Fit a contact model to a survey population} \usage{ -fit_setting_contacts(contact_data_list, population) +fit_setting_contacts( + contact_data_list, + population, + symmetrical = TRUE, + school_demographics = NULL, + work_demographics = NULL +) } \arguments{ -\item{contact_data_list}{A list of dataframes, each containing informatio +\item{contact_data_list}{A list of dataframes, each containing information on the setting (home, work, school, other), age_from, age_to, the number of contacts, and the number of participants. Example data can be retrieved with \code{\link[=get_polymod_setting_data]{get_polymod_setting_data()}}.} -\item{population}{survey population data, containing columns +\item{population}{\code{conmat_population} object or dataset with columns \code{lower.age.limit} and \code{population}. Example data can be retrieved with \code{\link[=get_polymod_population]{get_polymod_population()}}.} + +\item{symmetrical}{whether to enforce symmetrical terms in the model. +Defaults to TRUE. See \code{details} of \code{fit_single_contact_model} for more +information.} + +\item{school_demographics}{(optional) defaults to census average proportion +at school. You can provide a dataset with columns, "age" (numeric), and +"school_fraction" (0-1), if you would like to specify these +details. See \code{abs_avg_school} for the default values. If you would like to +use the original school demographics used in conmat, these are provided in +the dataset, \code{conmat_original_school_demographics}.} + +\item{work_demographics}{(optional) defaults to census average proportion +employed. You can provide a dataset with columns, "age" (numeric), and +"work_fraction", if you would like to specify these details. See +\code{abs_avg_work} for the default values. If you would like to +use the original work demographics used in conmat, these are provided in +the dataset, \code{conmat_original_work_demographics}.} } \value{ list of fitted gam models - one for each setting provided @@ -23,13 +47,14 @@ list of fitted gam models - one for each setting provided fits a gam model for each setting on the survey population data & the setting wise contact data. The underlying method is described in more detail in \code{\link[=fit_single_contact_model]{fit_single_contact_model()}}. The models can be fit in parallel, -see the examples. +see the examples. Note that this function is parallelisable with \code{future}, +and will be impacted by any \code{future} plans provided. } \examples{ # These aren't being run as they take too long to fit \dontrun{ contact_model <- fit_setting_contacts( - contact_data_list = get_polymod_setting_data(), + contact_data_list = get_polymod_setting_data(), population = get_polymod_population() ) @@ -44,6 +69,14 @@ contact_model <- fit_setting_contacts( contact_data_list = polymod_setting_data, population = polymod_population ) + +# you can specify your own population data for school and work demographics +contact_model_diff_data <- fit_setting_contacts( + contact_data_list = polymod_setting_data, + population = polymod_population, + school_demographics = conmat_original_school_demographics, + work_demographics = conmat_original_work_demographics +) } } \author{ diff --git a/man/fit_single_contact_model.Rd b/man/fit_single_contact_model.Rd index b33b47f9..b66e027a 100644 --- a/man/fit_single_contact_model.Rd +++ b/man/fit_single_contact_model.Rd @@ -4,15 +4,39 @@ \alias{fit_single_contact_model} \title{Fit a single GAM contact model to a dataset} \usage{ -fit_single_contact_model(contact_data, population) +fit_single_contact_model( + contact_data, + population, + symmetrical = TRUE, + school_demographics = NULL, + work_demographics = NULL +) } \arguments{ \item{contact_data}{dataset with columns \code{age_to}, \code{age_from}, \code{setting}, \code{contacts}, and \code{participants}. See \code{\link[=get_polymod_contact_data]{get_polymod_contact_data()}} for an example dataset - or the dataset in examples below.} -\item{population}{population data, with columns \code{lower.age.limit} and -\code{population}. See \code{\link[=get_polymod_population]{get_polymod_population()}} for an example.} +\item{population}{\code{conmat_population} object, or data frame with columns +\code{lower.age.limit} and \code{population}. See \code{\link[=get_polymod_population]{get_polymod_population()}} for +an example.} + +\item{symmetrical}{whether to enforce symmetrical terms in the model. +Defaults to TRUE. See \code{details} for more information.} + +\item{school_demographics}{(optional) defaults to census average proportion +at school. You can provide a dataset with columns, "age" (numeric), and +"school_fraction" (0-1), if you would like to specify these +details. See \code{abs_avg_school} for the default values. If you would like to +use the original school demographics used in conmat, these are provided in +the dataset, \code{conmat_original_school_demographics}.} + +\item{work_demographics}{(optional) defaults to census average proportion +employed. You can provide a dataset with columns, "age" (numeric), and +"work_fraction", if you would like to specify these details. See +\code{abs_avg_work} for the default values. If you would like to +use the original work demographics used in conmat, these are provided in +the dataset, \code{conmat_original_work_demographics}.} } \value{ single model @@ -25,21 +49,31 @@ and so on), for a specified setting, with specific terms being added for given settings. See "details" for further information. } \details{ -The model fit is a Generalised Additive Model (GAM). To help -account for assortativity with age, where people of similar ages have -more contact with each other, we include predictors \code{age_from}, and -\code{age_to}. To account for intergenerational contact patterns, where -parents and grandparents will interact with their children and grand -children, we include a term that is the absolute difference of age_from -and age_to. We also include the interaction of intergenerational contact -patterns with contacts from one age with the term that is the absolute -difference of age_from and age_to, and age_from. These terms are fit with -a smoothing function. Specifically, the relevant code looks like this: +The model fit is a Generalised Additive Model (GAM). We provide two +"modes" for model fitting. Either using "symmetric" or "non-symmetric" +model predictor terms with the logical variance "symmetrical", which is set +to TRUE by default. We recommend using the "symmetrical" terms as it +reflects the fact that contacts are symmetric - person A having contact +with person B means person B has had contact with person A. We've included +a variety of terms to account for assortativity with age, where people of +similar ages have more contact with each other. And included terms to +account for intergenerational contact patterns, where parents and +grandparents will interact with their children and grand children. +These terms are fit with a smoothing function. Specifically, the relevant +code looks like this: -\if{html}{\out{
}}\preformatted{s(age_to) + - s(age_from) + - s(abs(age_from - age_to)) + - s(abs(age_from - age_to), age_from) +\if{html}{\out{
}}\preformatted{# abs(age_from - age_to) +s(gam_age_offdiag) + +# abs(age_from - age_to)^2 +s(gam_age_offdiag_2) + +# abs(age_from * age_to) +s(gam_age_diag_prod) + +# abs(age_from + age_to) +s(gam_age_diag_sum) + +# pmax(age_from, age_to) +s(gam_age_pmax) + +# pmin(age_from, age_to) +s(gam_age_pmin) }\if{html}{\out{
}} We also include predictors for the probability of attending school, and @@ -59,6 +93,33 @@ weighted by the proportion of the population attending school. This leaves us with a model that looks like so: +\if{html}{\out{
}}\preformatted{mgcv::bam( + formula = contacts ~ + # abs(age_from - age_to) + s(gam_age_offdiag) + + # abs(age_from - age_to)^2 + s(gam_age_offdiag_2) + + # abs(age_from * age_to) + s(gam_age_diag_prod) + + # abs(age_from + age_to) + s(gam_age_diag_sum) + + # pmax(age_from, age_to) + s(gam_age_pmax) + + # pmin(age_from, age_to) + s(gam_age_pmin) + + school_probability + + work_probability + + offset(log_contactable_population) + + # or for school settings + # offset(log_contactable_population_school) + family = stats::poisson, + offset = log(participants), + data = population_data +) +}\if{html}{\out{
}} + +But if the term \code{symmetrical = FALSE} is used, you get: + \if{html}{\out{
}}\preformatted{mgcv::bam( formula = contacts ~ s(age_to) + @@ -66,10 +127,10 @@ This leaves us with a model that looks like so: s(abs(age_from - age_to)) + s(abs(age_from - age_to), age_from) + school_probability + - work_probability + + work_probability + offset(log_contactable_population) + # or for school settings - # offset(log_contactable_population_school) + # offset(log_contactable_population_school) family = stats::poisson, offset = log(participants), data = population_data @@ -83,12 +144,22 @@ example_population <- get_polymod_population() library(dplyr) -example_contact_20 <- example_contact \%>\% - filter(age_to <= 20, - age_from <= 20) - +example_contact_20 <- example_contact \%>\% + filter( + age_to <= 20, + age_from <= 20 + ) + my_mod <- fit_single_contact_model( contact_data = example_contact_20, population = example_population ) + +# you can specify your own population data for school and work demographics +my_mod_diff_data <- fit_single_contact_model( + contact_data = example_contact_20, + population = example_population, + school_demographics = conmat_original_school_demographics, + work_demographics = conmat_original_work_demographics +) } diff --git a/man/formula-terms.Rd b/man/formula-terms.Rd new file mode 100644 index 00000000..7370d5fc --- /dev/null +++ b/man/formula-terms.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/model-tidiers.R +\name{formula-terms} +\alias{formula-terms} +\alias{get_formulas_terms} +\title{Extract out formula terms} +\usage{ +get_formulas_terms(model) +} +\arguments{ +\item{model}{model object} +} +\description{ +Extract out formula terms +} +\examples{ +\dontrun{ +formula_terms <- get_formulas_terms(sim_m) +formula_terms +} + +} +\keyword{internal} diff --git a/man/generate_ngm.Rd b/man/generate_ngm.Rd index fced84ae..d47b2b5d 100644 --- a/man/generate_ngm.Rd +++ b/man/generate_ngm.Rd @@ -2,28 +2,69 @@ % Please edit documentation in R/generate-ngm.R \name{generate_ngm} \alias{generate_ngm} +\alias{generate_ngm.conmat_setting_prediction_matrix} +\alias{generate_ngm.conmat_population} \title{Calculate next generation contact matrices} \usage{ -generate_ngm(state_name = NULL, lga_name = NULL, age_breaks, R_target) +generate_ngm(x, age_breaks, R_target, setting_transmission_matrix, ...) + +\method{generate_ngm}{conmat_setting_prediction_matrix}( + x, + age_breaks, + R_target, + setting_transmission_matrix = NULL, + per_capita_household_size = NULL, + ..., + lga_name, + state_name +) + +\method{generate_ngm}{conmat_population}( + x, + age_breaks, + R_target, + setting_transmission_matrix = NULL, + per_capita_household_size = NULL, + ..., + lga_name, + state_name +) } \arguments{ -\item{state_name}{target Australian state name in abbreviated form, such -as "QLD", "NSW", or "TAS"} - -\item{lga_name}{target Australian local government area (LGA) name, such -as "Fairfield (C)". See \code{\link[=abs_lga_lookup]{abs_lga_lookup()}} for list of lga names.} +\item{x}{data input - could be a \code{conmat_population} (such as the output from +\code{\link[=abs_age_lga]{abs_age_lga()}}), or a \code{conmat_setting_prediction_matrix}, which is the +output from \code{\link[=extrapolate_polymod]{extrapolate_polymod()}} or \code{\link[=predict_setting_contacts]{predict_setting_contacts()}}.} \item{age_breaks}{vector depicting age values with the highest age depicted as \code{Inf}. For example, c(seq(0, 85, by = 5), Inf)} \item{R_target}{target reproduction number} + +\item{setting_transmission_matrix}{default is NULL, which calculates the transmission +matrix using \code{get_setting_transmission_matrices(age_breaks)}. You can +provide your own transmission matrix, but its rows and columns must match +the number of rows and columns, and must be a list of one matrix for each +setting. See the output for \code{get_setting_transmission_matrices(age_breaks)} +to get a sense of the structure. See \code{\link[=get_setting_transmission_matrices]{get_setting_transmission_matrices()}} +for more detail.} + +\item{...}{extra arguments, currently not used} + +\item{per_capita_household_size}{default is NULL - which defaults to \code{\link[=get_polymod_per_capita_household_size]{get_polymod_per_capita_household_size()}}, which gives 3.248971} + +\item{lga_name}{now defunct, but capturing arguments for informative error} + +\item{state_name}{now defunct, but capturing arguments for informative error} } \description{ Once infected, a person can transmit an infectious disease to another, creating generations of infected individuals. We can define a matrix describing the number of newly infected individuals in given categories, such as age, for consecutive generations. This matrix is -called a "next generation matrix" (NGM). +called a "next generation matrix" (NGM). We can generate an NGM from two +sources - a \code{conmat_population} object (such as the output from +\code{\link[=abs_age_lga]{abs_age_lga()}}), or a \code{conmat_setting_prediction_matrix}, which is the +output from \code{\link[=extrapolate_polymod]{extrapolate_polymod()}} or \code{\link[=predict_setting_contacts]{predict_setting_contacts()}}. } \details{ The NGM can be used to calculate the expected number of secondary @@ -40,18 +81,59 @@ settings is obtained by calculating the unique, positive eigen value of the combination NGM. This ratio is then used to scale all the setting specific NGMs. } +\note{ +When using a setting prediction contact matrix (such as one generated +by \code{extrapolate_polymod}, with class \code{conmat_setting_prediction_matrix}), +the age breaks specified in \code{generate_ngm} must be the same as the age +breaks specified in the synthetic contact matrix, otherwise it will error +as it is trying to multiple incompatible matrices. +} \examples{ -# don't run as both together takes a long time to run \dontrun{ -ngm_nsw <- generate_ngm( - state_name = "NSW", - age_breaks = c(seq(0, 85, by = 5), Inf), +perth <- abs_age_lga("Perth (C)") +perth_hh <- get_abs_per_capita_household_size(lga = "Perth (C)") + +age_breaks_0_80_plus <- c(seq(0, 80, by = 5), Inf) + +# you can also run this without `per_capita_household_size` +perth_ngm_lga <- generate_ngm( + perth, + age_breaks = age_breaks_0_80_plus, + per_capita_household_size = perth_hh, R_target = 1.5 ) -ngm_fairfield <- generate_ngm( - lga_name = "Fairfield (C)", - age_breaks = c(seq(0, 85, by = 5), Inf), + +perth_contact <- extrapolate_polymod( + perth, + per_capita_household_size = perth_hh +) + +perth_ngm <- generate_ngm( + perth_contact, + age_breaks = age_breaks_0_80_plus, R_target = 1.5 ) + +# using our own transmission matrix +new_transmission_matrix <- get_setting_transmission_matrices( + age_breaks = age_breaks_0_80_plus, + # is normally 0.5 + asymptomatic_relative_infectiousness = 0.75 +) + +new_transmission_matrix + +perth_ngm_0_80_new_tmat <- generate_ngm( + perth_contact, + age_breaks = age_breaks_0_80_plus, + R_target = 1.5, + setting_transmission_matrix = new_transmission_matrix +) +} +# examples not run as they take a long time +\dontrun{ +perth <- abs_age_lga("Perth (C)") +perth_contact <- extrapolate_polymod(perth) +generate_ngm(perth_contact, age_breaks = c(seq(0, 85, by = 5), Inf)) } } diff --git a/man/generate_ngm_oz.Rd b/man/generate_ngm_oz.Rd new file mode 100644 index 00000000..8217dbd9 --- /dev/null +++ b/man/generate_ngm_oz.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate-ngm.R +\name{generate_ngm_oz} +\alias{generate_ngm_oz} +\title{Calculate next generation contact matrices from ABS data} +\usage{ +generate_ngm_oz( + state_name = NULL, + lga_name = NULL, + age_breaks, + R_target, + setting_transmission_matrix = NULL +) +} +\arguments{ +\item{state_name}{target Australian state name in abbreviated form, such +as "QLD", "NSW", or "TAS"} + +\item{lga_name}{target Australian local government area (LGA) name, such +as "Fairfield (C)". See \code{\link[=abs_lga_lookup]{abs_lga_lookup()}} for list of lga names.} + +\item{age_breaks}{vector depicting age values with the highest age depicted +as \code{Inf}. For example, c(seq(0, 85, by = 5), Inf)} + +\item{R_target}{target reproduction number} + +\item{setting_transmission_matrix}{default is NULL, which calculates the transmission +matrix using \code{get_setting_transmission_matrices(age_breaks)}. You can +provide your own transmission matrix, but its rows and columns must match +the number of rows and columns, and must be a list of one matrix for each +setting. See the output for \code{get_setting_transmission_matrices(age_breaks)} +to get a sense of the structure. See \code{\link[=get_setting_transmission_matrices]{get_setting_transmission_matrices()}} +for more detail.} +} +\description{ +This function calculates a next generation matrix (NGM) +based on state or LGA data from the Australian Bureau of Statistics (ABS). +For full details see \code{\link[=generate_ngm]{generate_ngm()}}. +} +\examples{ +# don't run as both together takes a long time to run +\dontrun{ +ngm_nsw <- generate_ngm_oz( + state_name = "NSW", + age_breaks = c(seq(0, 85, by = 5), Inf), + R_target = 1.5 +) +ngm_fairfield <- generate_ngm_oz( + lga_name = "Fairfield (C)", + age_breaks = c(seq(0, 85, by = 5), Inf), + R_target = 1.5 +) +} +} diff --git a/man/get_household_size_distribution.Rd b/man/get_abs_household_size_distribution.Rd similarity index 67% rename from man/get_household_size_distribution.Rd rename to man/get_abs_household_size_distribution.Rd index c6fd2772..86eb2658 100644 --- a/man/get_household_size_distribution.Rd +++ b/man/get_abs_household_size_distribution.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_household_size_distribution.R -\name{get_household_size_distribution} -\alias{get_household_size_distribution} +\name{get_abs_household_size_distribution} +\alias{get_abs_household_size_distribution} \title{Get household size distribution based on state or LGA name} \usage{ -get_household_size_distribution(state = NULL, lga = NULL) +get_abs_household_size_distribution(state = NULL, lga = NULL) } \arguments{ \item{state}{target Australian state name in abbreviated form, such as "QLD", "NSW", or "TAS"} @@ -19,10 +19,10 @@ returns a data frame with household size distributions of a specific state or LG Get household size distribution based on state or LGA name } \examples{ -get_household_size_distribution(lga = "Fairfield (C)") -get_household_size_distribution(state = "NSW") +get_abs_household_size_distribution(lga = "Fairfield (C)") +get_abs_household_size_distribution(state = "NSW") \dontrun{ # cannot specify both state and LGA -get_household_size_distribution(state = "NSW", lga = "Fairfield (C)") +get_abs_household_size_distribution(state = "NSW", lga = "Fairfield (C)") } } diff --git a/man/get_abs_household_size_population.Rd b/man/get_abs_household_size_population.Rd new file mode 100644 index 00000000..2ef215e7 --- /dev/null +++ b/man/get_abs_household_size_population.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/abs_household_size_population.R +\name{get_abs_household_size_population} +\alias{get_abs_household_size_population} +\title{Get population associated with each household size in an LGA or a state} +\usage{ +get_abs_household_size_population(state = NULL, lga = NULL) +} +\arguments{ +\item{state}{target Australian state name in abbreviated form, such as "QLD", "NSW", or "TAS"} + +\item{lga}{target Australian local government area (LGA) name, such as "Fairfield (C)". See +\code{\link[=abs_lga_lookup]{abs_lga_lookup()}} for list of lga names} +} +\value{ +returns a data frame with household size and the population associated with it in each LGA or state. +} +\description{ +Get population associated with each household size in an LGA or a state +} +\examples{ +get_abs_household_size_population(state = "NSW") +} diff --git a/man/get_per_capita_household_size.Rd b/man/get_abs_per_capita_household_size.Rd similarity index 52% rename from man/get_per_capita_household_size.Rd rename to man/get_abs_per_capita_household_size.Rd index e032f95d..3d7447b7 100644 --- a/man/get_per_capita_household_size.Rd +++ b/man/get_abs_per_capita_household_size.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_per_capita_household_size.R -\name{get_per_capita_household_size} -\alias{get_per_capita_household_size} +% Please edit documentation in R/get_abs_per_capita_household_size.R +\name{get_abs_per_capita_household_size} +\alias{get_abs_per_capita_household_size} \title{Get per capita household size based on state or LGA name} \usage{ -get_per_capita_household_size(state = NULL, lga = NULL) +get_abs_per_capita_household_size(state = NULL, lga = NULL) } \arguments{ \item{state}{state name} @@ -19,11 +19,11 @@ or LGA. Get per capita household size based on state or LGA name } \examples{ -get_per_capita_household_size(lga = "Fairfield (C)") -get_per_capita_household_size(state = "NSW") +get_abs_per_capita_household_size(lga = "Fairfield (C)") +get_abs_per_capita_household_size(state = "NSW") \dontrun{ # cannot specify both state and LGA -get_per_capita_household_size(state = "NSW", lga = "Fairfield (C)") +get_abs_per_capita_household_size(state = "NSW", lga = "Fairfield (C)") } } \author{ diff --git a/man/get_abs_per_capita_household_size_lga.Rd b/man/get_abs_per_capita_household_size_lga.Rd new file mode 100644 index 00000000..a757d198 --- /dev/null +++ b/man/get_abs_per_capita_household_size_lga.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/abs_per_capita_household_size_lga.R +\name{get_abs_per_capita_household_size_lga} +\alias{get_abs_per_capita_household_size_lga} +\title{Get household size distribution based on LGA name} +\usage{ +get_abs_per_capita_household_size_lga(lga = NULL) +} +\arguments{ +\item{lga}{target Australian local government area (LGA) name, such as "Fairfield (C)". See +\code{\link[=abs_lga_lookup]{abs_lga_lookup()}} for list of lga names} +} +\value{ +returns a numeric value depicting the per capita household size of the specified LGA +} +\description{ +Get household size distribution based on LGA name +} +\examples{ +get_abs_per_capita_household_size_lga(lga = "Fairfield (C)") + +} diff --git a/man/get_abs_per_capita_household_size_state.Rd b/man/get_abs_per_capita_household_size_state.Rd new file mode 100644 index 00000000..6c79683a --- /dev/null +++ b/man/get_abs_per_capita_household_size_state.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/abs_per_capita_household_size_state.R +\name{get_abs_per_capita_household_size_state} +\alias{get_abs_per_capita_household_size_state} +\title{Get household size distribution based on state name} +\usage{ +get_abs_per_capita_household_size_state(state = NULL) +} +\arguments{ +\item{state}{target Australian state name in abbreviated form, such as "QLD", "NSW", or "TAS"} +} +\value{ +returns a numeric value depicting the per capita household size of the specified state +} +\description{ +Get household size distribution based on state name +} +\examples{ +get_abs_per_capita_household_size_state(state = "NSW") +} diff --git a/man/get_age_population_function.Rd b/man/get_age_population_function.Rd index e17177e8..c31f240f 100644 --- a/man/get_age_population_function.Rd +++ b/man/get_age_population_function.Rd @@ -2,18 +2,27 @@ % Please edit documentation in R/get_age_population_function.R \name{get_age_population_function} \alias{get_age_population_function} +\alias{get_age_population_function.conmat_population} +\alias{get_age_population_function.data.frame} \title{Return an interpolating function for populations in 1y age increments} \usage{ -get_age_population_function( +get_age_population_function(data, ...) + +\method{get_age_population_function}{conmat_population}(data = population, ...) + +\method{get_age_population_function}{data.frame}( data = population, age_col = lower.age.limit, - pop_col = population + pop_col = population, + ... ) } \arguments{ \item{data}{dataset containing information on population of a given age/age group} +\item{...}{extra arguments} + \item{age_col}{bare variable name for the column with age information} \item{pop_col}{bare variable name for the column with population information} @@ -58,9 +67,9 @@ polymod_pop # provide a specified age and get the estimated population for that 1 year # age group. First we create the new function like so -age_pop_function <- get_age_population_function(data=polymod_pop, - age_col = lower.age.limit, - pop_col= population) +age_pop_function <- get_age_population_function( + data = polymod_pop +) # Then we pass it a year to get the estimated population for a particular age age_pop_function(4) @@ -68,15 +77,15 @@ age_pop_function(4) # range age_pop_function(1:4) -# Notice that we get a _pretty similar_ number of 0-4 if we sum it up, as +# Notice that we get a _pretty similar_ number of 0-4 if we sum it up, as # the first row of the table: head(polymod_pop, 1) sum(age_pop_function(age = 0:4)) -# Usage in dplyr +# Usage in dplyr library(dplyr) example_df <- slice_head(abs_education_state, n = 5) example_df \%>\% -mutate(population_est = age_pop_function(age)) + mutate(population_est = age_pop_function(age)) } diff --git a/man/get_polymod_per_capita_household_size.Rd b/man/get_polymod_per_capita_household_size.Rd index 53d90282..24cc98e4 100644 --- a/man/get_polymod_per_capita_household_size.Rd +++ b/man/get_polymod_per_capita_household_size.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/get_polymod_per_capita_household_size.R \name{get_polymod_per_capita_household_size} \alias{get_polymod_per_capita_household_size} -\title{Get polymod per capita houshold size.} +\title{Get polymod per capita household size.} \usage{ get_polymod_per_capita_household_size() } diff --git a/man/get_polymod_population.Rd b/man/get_polymod_population.Rd index 209a2bbb..a94eeb96 100644 --- a/man/get_polymod_population.Rd +++ b/man/get_polymod_population.Rd @@ -15,7 +15,8 @@ Finland, Germany, Italy, Luxembourg, Netherlands, Poland, and United Kingdom.} } \value{ -data frame with two columns: \code{lower.age.limit} and \code{population} +A \code{conmat_population} data frame with two columns: \code{lower.age.limit} +and \code{population} } \description{ returns the polymod-average population age distribution in @@ -30,7 +31,7 @@ participated in the polymod survey. } \examples{ get_polymod_population() -get_polymod_population("Belgium") -get_polymod_population("United Kingdom") -get_polymod_population("Italy") +get_polymod_population("Belgium") +get_polymod_population("United Kingdom") +get_polymod_population("Italy") } diff --git a/man/get_setting_transmission_matrices.Rd b/man/get_setting_transmission_matrices.Rd index ede0fee0..520d1411 100644 --- a/man/get_setting_transmission_matrices.Rd +++ b/man/get_setting_transmission_matrices.Rd @@ -53,7 +53,7 @@ estimates, and accounting for vaccination, reduced mixing, and reduced transmissibility in work and other settings due to hygiene behaviour; and estimates of the relative transmissibility in household vs non-household settings - scaled linearly for non-household transmission and binomially for -household transmission (so that onward onfections do not to exceed the number +household transmission (so that onward infections do not to exceed the number of other household members). When using this data, ensure that you cite this package, and the original @@ -75,11 +75,11 @@ setting_models <- fit_setting_contacts( age_breaks <- c(seq(0, 80, by = 5), Inf) # define a new population age distribution to predict to -fairfield_age_pop <- abs_age_lga("Fairfield (C)") +fairfield <- abs_age_lga("Fairfield (C)") # predict setting-specific contact matrices to a new population contact_matrices <- predict_setting_contacts( - population = fairfield_age_pop, + population = fairfield, contact_model = setting_models, age_breaks = age_breaks ) diff --git a/man/matrix_to_predictions.Rd b/man/matrix_to_predictions.Rd index ae1c92e1..36fc7f14 100644 --- a/man/matrix_to_predictions.Rd +++ b/man/matrix_to_predictions.Rd @@ -20,21 +20,21 @@ converts a wide matrix into a long data frame. It is mostly used within plotting functions. } \examples{ - fairfield_abs_data <- abs_age_lga("Fairfield (C)") - - # We can convert the predictions into a matrix - - fairfield_school_contacts <- predict_contacts( - model = polymod_setting_models$school, - population = fairfield_abs_data, - age_breaks = c(0, 5, 10, 15,Inf) - ) - - fairfield_school_contacts - - fairfield_school_mat <- predictions_to_matrix(fairfield_school_contacts) - - fairfield_school_mat - - matrix_to_predictions(fairfield_school_mat) +fairfield <- abs_age_lga("Fairfield (C)") + +# We can convert the predictions into a matrix + +fairfield_school_contacts <- predict_contacts( + model = polymod_setting_models$school, + population = fairfield, + age_breaks = c(0, 5, 10, 15, Inf) +) + +fairfield_school_contacts + +fairfield_school_mat <- predictions_to_matrix(fairfield_school_contacts) + +fairfield_school_mat + +matrix_to_predictions(fairfield_school_mat) } diff --git a/man/new_age_matrix.Rd b/man/new_age_matrix.Rd new file mode 100644 index 00000000..e07ce64b --- /dev/null +++ b/man/new_age_matrix.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constructors.R +\name{new_age_matrix} +\alias{new_age_matrix} +\title{Build new age matrix} +\usage{ +new_age_matrix(matrix, age_breaks) +} +\arguments{ +\item{matrix}{numeric matrix} + +\item{age_breaks}{character vector of age breaks, by default the rownames.} +} +\value{ +matrix with age breaks attribute +} +\description{ +A matrix that knows about its age breaks - which are by default provided as +its rownames. Mostly intended for internal use. +} +\examples{ +age_break_names <- c("[0,5)", "[5,10)", "[10, 15)") +age_mat <- matrix( + runif(9), + nrow = 3, + ncol = 3, + dimnames = list( + age_break_names, + age_break_names + ) +) + +new_age_matrix( + age_mat, + age_breaks = age_break_names +) + +} diff --git a/man/new_conmat_population.Rd b/man/new_conmat_population.Rd new file mode 100644 index 00000000..dd7362ff --- /dev/null +++ b/man/new_conmat_population.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/conmat-population.R +\name{new_conmat_population} +\alias{new_conmat_population} +\title{Create a new \code{conmat_population} class object} +\usage{ +new_conmat_population(data, age, population) +} +\arguments{ +\item{data}{data.frame} + +\item{age}{bare column name of numeric data on age} + +\item{population}{bare column name of numeric data on population} +} +\value{ +object with class \code{conmat_population} +} +\description{ +Create a new \code{conmat_population} class object +} +\keyword{internal} diff --git a/man/new_ngm_setting_matrix.Rd b/man/new_ngm_setting_matrix.Rd new file mode 100644 index 00000000..a1c2ee2f --- /dev/null +++ b/man/new_ngm_setting_matrix.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constructors.R +\name{new_ngm_setting_matrix} +\alias{new_ngm_setting_matrix} +\title{Establish new BGM setting data} +\usage{ +new_ngm_setting_matrix(list_matrix, raw_eigenvalue, scaling, age_breaks) +} +\arguments{ +\item{list_matrix}{list of matrices} + +\item{raw_eigenvalue}{the raw eigenvalue} + +\item{scaling}{scaling factor} + +\item{age_breaks}{vector of age breaks} +} +\value{ +object with additional (primary) class "ngm_setting_matrix", and attributes for "age_breaks", "scaling", and "raw_eigenvalue". +} +\description{ +Establish new BGM setting data +} diff --git a/man/new_setting_data.Rd b/man/new_setting_data.Rd new file mode 100644 index 00000000..ef952bbb --- /dev/null +++ b/man/new_setting_data.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constructors.R +\name{new_setting_data} +\alias{new_setting_data} +\title{Establish new setting data} +\usage{ +new_setting_data(list_df) +} +\arguments{ +\item{list_df}{list of data frames} +} +\value{ +object with additional (primary) class "setting data" and an "age_breaks attribute. +} +\description{ +Establish new setting data +} diff --git a/man/per_capita_household_size.Rd b/man/per_capita_household_size.Rd index ef028523..903f37a9 100644 --- a/man/per_capita_household_size.Rd +++ b/man/per_capita_household_size.Rd @@ -16,11 +16,11 @@ distribution of specific state or LGA.} \item{household_size_col}{bare variable name of the column depicting the household size. Default is 'household_size' from -\code{\link[=get_household_size_distribution]{get_household_size_distribution()}}.} +\code{\link[=get_abs_per_capita_household_size_lga]{get_abs_per_capita_household_size_lga()}}.} \item{n_people_col}{bare variable name of the column depicting the total number of people belonging to the respective household size. Default is -'n_people' from \code{\link[=get_household_size_distribution]{get_household_size_distribution()}}.} +'n_people' from \code{\link[=get_abs_per_capita_household_size_lga]{get_abs_per_capita_household_size_lga()}}.} } \value{ Numeric of length 1 - the per capita household size for a given @@ -28,16 +28,17 @@ state or LGA. } \description{ Returns the per capita household size for a location given -its household size distribution. See \code{\link[=get_household_size_distribution]{get_household_size_distribution()}} +its household size distribution. See \code{\link[=get_abs_household_size_distribution]{get_abs_household_size_distribution()}} function for retrieving household size distributions for a given place. } \examples{ -demo_data <- get_household_size_distribution(lga = "Fairfield (C)") +demo_data <- get_abs_household_size_population(lga = "Fairfield (C)") demo_data -per_capita_household_size(household_data=demo_data, - household_size_col=household_size, - n_people_col=n_people) - +per_capita_household_size( + household_data = demo_data, + household_size_col = household_size, + n_people_col = n_people +) } \author{ diff --git a/man/plot_matrix.Rd b/man/plot_matrix.Rd deleted file mode 100644 index 75f927f4..00000000 --- a/man/plot_matrix.Rd +++ /dev/null @@ -1,61 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plot_matrix.R -\name{plot_matrix} -\alias{plot_matrix} -\title{Visualise predicted contact matrix} -\usage{ -plot_matrix(matrix) -} -\arguments{ -\item{matrix}{Square matrix with row and column names indicating the age -groups} -} -\value{ -a ggplot visualisation of contact rates -} -\description{ -Visualising the predicted contact rates facilitates understanding the -underlying patterns and relationships between age groups in different -settings (workplace, home, school, other). The \code{plot_matrix()} function -takes a contact matrix and visualises it, with the x and y axes being -different age groups. -} -\examples{ -\dontrun{ - -set.seed(2021-09-24) -polymod_contact_data <- get_polymod_setting_data() -polymod_survey_data <- get_polymod_population() - -setting_models <- fit_setting_contacts( - contact_data_list = polymod_contact_data, - population = polymod_survey_data -) - -fairfield_age_pop <- abs_age_lga("Fairfield (C)") - -fairfield_age_pop - -synthetic_settings_5y_fairfield <- predict_setting_contacts( - population = fairfield_age_pop, - contact_model = setting_models, - age_breaks = c(seq(0, 85, by = 5), Inf) -) - -# Visualise the projected contact rates for "home" in the Fairfield LGA. The -# strong diagonal shows us that similar age groups typically have higher -# rates of home contact. We can also see parental links include middle-aged -# age groups having observable contact rates with younger age groups. -# For instance, ages between 25 and 45 have higher contact rates with -# newborns (0 to 5) and children (5 to 10), but older age groups, -# including those over 55, tend to have higher contact rates with age -# groups between 25 and 45, showing the interactions between parents and -# children. In addition to this, other patterns that indicates contact -# between children and their grandparents where higher contact rates between -# young children (years 0-5) and grandparents (age 55+) are present. - - -plot_matrix(synthetic_settings_5y_fairfield$home) -} - -} diff --git a/man/plot_setting_matrices.Rd b/man/plot_setting_matrices.Rd deleted file mode 100644 index e072eab8..00000000 --- a/man/plot_setting_matrices.Rd +++ /dev/null @@ -1,51 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plot_setting_matrices.R -\name{plot_setting_matrices} -\alias{plot_setting_matrices} -\title{Visualise predicted contact matrix for each setting} -\usage{ -plot_setting_matrices( - matrices, - title = "Setting-specific synthetic contact matrices" -) -} -\arguments{ -\item{matrices}{A list of square matrices, with row and column names -indicating the age groups.} - -\item{title}{Title to give to plot setting matrices. Default value is: -"Setting-specific synthetic contact matrices (all polymod data)'"} -} -\value{ -ggplot visualisation of contact rates for each setting -} -\description{ -This is an extension of \code{\link[=plot_matrix]{plot_matrix()}}, which visualises the contact -matrix for each setting. It uses \code{patchwork} to combine all the matrices -together -} -\examples{ -\dontrun{ - -set.seed(2021-09-24) -polymod_contact_data <- get_polymod_setting_data() -polymod_survey_data <- get_polymod_population() - -setting_models <- fit_setting_contacts( - contact_data_list = polymod_contact_data, - population = polymod_survey_data -) - -fairfield_age_pop <- abs_age_lga("Fairfield (C)") - -fairfield_age_pop - -synthetic_settings_5y_fairfield <- predict_setting_contacts( - population = fairfield_age_pop, - contact_model = setting_models, - age_breaks = c(seq(0, 85, by = 5), Inf) -) - -plot_setting_matrix(synthetic_settings_5y_fairfield) -} -} diff --git a/man/polymod.Rd b/man/polymod.Rd index 2673f878..0e8092be 100644 --- a/man/polymod.Rd +++ b/man/polymod.Rd @@ -3,14 +3,38 @@ \docType{data} \name{polymod} \alias{polymod} -\title{Polymod data} +\title{Social contact data from 8 European countries (imported from \code{socialmixr})} \format{ -An object of class \code{survey} of length 3. +A list of two data frames: +\describe{ +\item{participants}{the study participant, with age, country, year and day +of the week (starting with 1 = Monday)} +\item{contacts}{reported contacts of the study participants. The variable +phys_contact has two levels (1 denotes physical contact while 2 denotes +non-physical contact), duration_multi has five levels (1 is less than 5 +minutes while 5 is more than 4 hours, increasing in the order found in +Figure 1 in Mossong et al.), and frequency_multi has five levels (1 is +daily, 2 is weekly, 3 is monthly, 4 is less often, and 5 is first time)} +All other variables are described on the Zenodo repository of the data, +available at \doi{10.5281/zenodo.1043437} +} +} +\source{ +\doi{10.1371/journal.pmed.0050074} } \usage{ polymod } \description{ -Imported from \code{socialmixr} +A dataset containing social mixing diary data from 8 European countries: +Belgium, Germany, Finland, Great Britain, Italy, Luxembourg, +The Netherlands and Poland. +} +\details{ +This data has been sourced from the \href{https://CRAN.R-project.org/package=socialmixr}{socialmixr} package. + +The Data are fully described in Mossong J, Hens N, Jit M, Beutels P, Auranen +K, Mikolajczyk R, et al. (2008) Social Contacts and Mixing Patterns Relevant +to the Spread of Infectious Diseases. PLoS Med 5(3): e74. } \keyword{datasets} diff --git a/man/polymod_setting_models.Rd b/man/polymod_setting_models.Rd index ed344b35..2c7f653e 100644 --- a/man/polymod_setting_models.Rd +++ b/man/polymod_setting_models.Rd @@ -21,11 +21,12 @@ avoid recomputing a relatively common type of model for use with \code{conmat}. \dontrun{ # code used to produce this data library(conmat) -set.seed(2022-08-26) +set.seed(2022 - 08 - 26) polymod_contact_data <- get_polymod_setting_data() polymod_survey_data <- get_polymod_population() polymod_setting_models <- fit_setting_contacts( - contact_data_list = polymod_contact_data, + contact_data_list = polymod_contact_data, + # population = polymod_survey_data ) } diff --git a/man/predict_contacts.Rd b/man/predict_contacts.Rd index c5373783..d146eebd 100644 --- a/man/predict_contacts.Rd +++ b/man/predict_contacts.Rd @@ -56,24 +56,24 @@ number of household members a person in the population can have contact with. } \examples{ - # If we have a model of contact rate at home, and age population structure - # for an LGA, say, Fairfield, in NSW: - - polymod_setting_models$home - - fairfield_abs_data <- abs_age_lga("Fairfield (C)") - - fairfield_abs_data - - # We can predict the contact rate for Fairfield from the existing contact - # data, say, between the age groups of 0-15 in 5 year bins for school: - - fairfield_school_contacts <- predict_contacts( - model = polymod_setting_models$school, - population = fairfield_abs_data, - age_breaks = c(0, 5, 10, 15,Inf) - ) - - fairfield_school_contacts - +# If we have a model of contact rate at home, and age population structure +# for an LGA, say, Fairfield, in NSW: + +polymod_setting_models$home + +fairfield <- abs_age_lga("Fairfield (C)") + +fairfield + +# We can predict the contact rate for Fairfield from the existing contact +# data, say, between the age groups of 0-15 in 5 year bins for school: + +fairfield_school_contacts <- predict_contacts( + model = polymod_setting_models$school, + population = fairfield, + age_breaks = c(0, 5, 10, 15, Inf) +) + +fairfield_school_contacts + } diff --git a/man/predict_contacts_1y.Rd b/man/predict_contacts_1y.Rd index aa86726a..28a88871 100644 --- a/man/predict_contacts_1y.Rd +++ b/man/predict_contacts_1y.Rd @@ -44,15 +44,15 @@ participant & contact ages. } \examples{ -fairfield_abs_data <- abs_age_lga("Fairfield (C)") +fairfield <- abs_age_lga("Fairfield (C)") -fairfield_abs_data +fairfield # predict the contact rates in 1 year blocks to Fairfield data fairfield_contacts_1 <- predict_contacts_1y( model = polymod_setting_models$home, - population = fairfield_abs_data, + population = fairfield, age_min = 0, age_max = 2 ) diff --git a/man/predict_setting_contacts.Rd b/man/predict_setting_contacts.Rd index 0ac5fbae..ffdf2973 100644 --- a/man/predict_setting_contacts.Rd +++ b/man/predict_setting_contacts.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/predict_setting_contacts.R \name{predict_setting_contacts} \alias{predict_setting_contacts} -\title{Predict contact rate between age groups across all settings} +\title{Predict setting contacts} \usage{ predict_setting_contacts( population, @@ -13,19 +13,17 @@ predict_setting_contacts( ) } \arguments{ -\item{population}{a dataframe of age population information, with columns -indicating some lower age limit, and population, (e.g., \code{\link[=get_polymod_population]{get_polymod_population()}})} +\item{population}{population} -\item{contact_model}{A list of GAM models for each setting. See example -output from \code{fit_setting_contact} below} +\item{contact_model}{contact_model} -\item{age_breaks}{A vector of age breaks.} +\item{age_breaks}{age_breaks} \item{per_capita_household_size}{Optional (defaults to NULL). When set, it adjusts the household contact matrix by some per capita household size. To set it, provide a single number, the per capita household size. More information is provided below in Details. See -\code{\link[=get_per_capita_household_size]{get_per_capita_household_size()}} function for a helper for Australian +\code{\link[=get_abs_per_capita_household_size]{get_abs_per_capita_household_size()}} function for a helper for Australian data with a workflow on how to get this number.} \item{model_per_capita_household_size}{modelled per capita household size. @@ -33,28 +31,14 @@ Default values for this are from \code{\link[=get_polymod_per_capita_household_size]{get_polymod_per_capita_household_size()}}, which ends up being 3.248971} } \value{ -List of contact rate of matrices for each setting: ("home", "work", -"school", "other"). +List of setting matrices } \description{ -Predicts the expected contact rate across all settings ("home", -"school", "work", and "other") over specified age breaks, -given some model of contact rate and population age structure. Optionally -performs an adjustment for per capita household size. See "details" for more -information. +Predict contact rate for each setting. Note that this function is +parallelisable with \code{future}, and will be impacted by any \code{future} plans +provided. } \details{ -The population data is used to determine age range to predict -contact rates, and removes ages with zero population, so we do not -make predictions for ages with zero populations. Contact rates are -predicted yearly between the age groups, using \code{\link[=predict_contacts_1y]{predict_contacts_1y()}}, -then aggregates these predicted contacts using -\code{\link[=aggregate_predicted_contacts]{aggregate_predicted_contacts()}}, which aggregates the predictions back to -the same resolution as the data, appropriately weighting the contact rate -by the population. Predictions are converted to matrix format using -\code{\link[=predictions_to_matrix]{predictions_to_matrix()}} to produce contact matrices for all age groups -combinations across different settings or location of contact. - We use Per-capita household size instead of mean household size. Per-capita household size is different to mean household size, as the household size averaged over \strong{people} in the population, not over @@ -70,8 +54,10 @@ with. \examples{ # don't run as it takes too long to fit \dontrun{ -fairfield_age_pop <- abs_age_lga("Fairfield (C)") -fairfield_age_pop +fairfield <- abs_age_lga("Fairfield (C)") +fairfield + +age_break_0_85_plus <- c(seq(0, 85, by = 5), Inf) polymod_contact_data <- get_polymod_setting_data() polymod_survey_data <- get_polymod_population() @@ -82,18 +68,18 @@ setting_models <- fit_setting_contacts( ) synthetic_settings_5y_fairfield <- predict_setting_contacts( - population = fairfield_age_pop, + population = fairfield, contact_model = setting_models, - age_breaks = c(seq(0, 85, by = 5), Inf) + age_breaks = age_break_0_85_plus ) -fairfield_hh_size <- get_per_capita_household_size(lga = "Fairfield (C)") +fairfield_hh_size <- get_abs_per_capita_household_size(lga = "Fairfield (C)") fairfield_hh_size synthetic_settings_5y_fairfield_hh <- predict_setting_contacts( - population = fairfield_age_pop, + population = fairfield, contact_model = setting_models, - age_breaks = c(seq(0, 85, by = 5), Inf), + age_breaks = age_break_0_85_plus, per_capita_household_size = fairfield_hh_size ) } diff --git a/man/predictions_to_matrix.Rd b/man/predictions_to_matrix.Rd index 310a540a..633115c3 100644 --- a/man/predictions_to_matrix.Rd +++ b/man/predictions_to_matrix.Rd @@ -4,11 +4,13 @@ \alias{predictions_to_matrix} \title{Convert dataframe of predicted contacts into matrix} \usage{ -predictions_to_matrix(contact_predictions) +predictions_to_matrix(contact_predictions, ...) } \arguments{ \item{contact_predictions}{data frame with columns \code{age_group_from}, \code{age_group_to}, and \code{contacts}.} + +\item{...}{extra arguments} } \value{ Square matrix with the unique age groups from \code{age_group_from/to} @@ -20,19 +22,19 @@ frames to matrix format with the survey participant age groups as columns and contact age groups as rows. } \examples{ - fairfield_abs_data <- abs_age_lga("Fairfield (C)") - - # We can convert the predictions into a matrix - - fairfield_school_contacts <- predict_contacts( - model = polymod_setting_models$school, - population = fairfield_abs_data, - age_breaks = c(0, 5, 10, 15,Inf) - ) - - fairfield_school_contacts - - # convert them back to a matrix - predictions_to_matrix(fairfield_school_contacts) - +fairfield <- abs_age_lga("Fairfield (C)") + +# We can convert the predictions into a matrix + +fairfield_school_contacts <- predict_contacts( + model = polymod_setting_models$school, + population = fairfield, + age_breaks = c(0, 5, 10, 15, Inf) +) + +fairfield_school_contacts + +# convert them back to a matrix +predictions_to_matrix(fairfield_school_contacts) + } diff --git a/man/prem_germany_contact_matrices.Rd b/man/prem_germany_contact_matrices.Rd new file mode 100644 index 00000000..18a6114f --- /dev/null +++ b/man/prem_germany_contact_matrices.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data_prem_contact_matrices.R +\docType{data} +\name{prem_germany_contact_matrices} +\alias{prem_germany_contact_matrices} +\title{Contact matrices as calculated by Prem. et al.} +\format{ +A list with 5 elements: +\describe{ +\item{home}{A 16x16 matrix containing the number of home contacts, by 5 +year age group} +\item{work}{A 16x16 matrix containing the number of workplace contacts, by +5 year age group} +\item{school}{A 16x16 matrix containing the number of school contacts, by 5 +year age group} +\item{other}{A 16x16 matrix containing the number of other contacts, by 5 +year age group} +} +All age groups are 5 year age bands, from 0 to 80. +} +\source{ +\url{https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1005697} +} +\usage{ +prem_germany_contact_matrices +} +\description{ +Contact matrices as calculated by Prem. et al. PLoS Computational Biology. +DOI: 10.1371/journal.pcbi.1005697 +} +\keyword{datasets} diff --git a/man/prepare_population_for_modelling.Rd b/man/prepare_population_for_modelling.Rd new file mode 100644 index 00000000..7edc805a --- /dev/null +++ b/man/prepare_population_for_modelling.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get-age-population-function-internals.R +\name{prepare_population_for_modelling} +\alias{prepare_population_for_modelling} +\alias{prepare_population_for_modelling.conmat_population} +\alias{prepare_population_for_modelling.data.frame} +\title{Prepare population data for generating an age population function} +\usage{ +prepare_population_for_modelling(data, ...) + +\method{prepare_population_for_modelling}{conmat_population}(data, ...) + +\method{prepare_population_for_modelling}{data.frame}( + data = data, + age_col = age_col, + pop_col = pop_col, + ... +) +} +\arguments{ +\item{data}{data.frame} + +\item{...}{extra arguments} + +\item{age_col}{column of ages} + +\item{pop_col}{column of population,} +} +\value{ +list of objects, \code{max_bound} \code{pop_model_bounded} \code{bounded_pop} \code{unbounded_pop} for use in \code{\link[=get_age_population_function]{get_age_population_function()}} +} +\description{ +Prepares objects for use in \code{\link[=get_age_population_function]{get_age_population_function()}}. +} +\author{ +njtierney +} +\keyword{internal} diff --git a/man/raw_eigenvalue.Rd b/man/raw_eigenvalue.Rd new file mode 100644 index 00000000..3e470ee9 --- /dev/null +++ b/man/raw_eigenvalue.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constructors.R +\name{raw_eigenvalue} +\alias{raw_eigenvalue} +\title{Get raw eigvenvalue from NGM matrix} +\usage{ +raw_eigenvalue(list_matrix) +} +\arguments{ +\item{list_matrix}{object of class \code{ngm_setting_matrix}} +} +\value{ +raw eigenvalue +} +\description{ +Get raw eigvenvalue from NGM matrix +} +\examples{ +# examples not run as they take a long time +\dontrun{ +perth <- abs_age_lga("Perth (C)") +perth_contact <- extrapolate_polymod(perth) +perth_ngm <- generate_ngm( + perth_contact, + age_breaks = c(seq(0, 85, by = 5), Inf) +) +raw_eigenvalue(perth_ngm) +} +} diff --git a/man/reexports.Rd b/man/reexports.Rd new file mode 100644 index 00000000..43048ff2 --- /dev/null +++ b/man/reexports.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/conmat-package.R +\docType{import} +\name{reexports} +\alias{reexports} +\alias{autoplot} +\title{Objects exported from other packages} +\keyword{internal} +\description{ +These objects are imported from other packages. Follow the links +below to see their documentation. + +\describe{ + \item{ggplot2}{\code{\link[ggplot2]{autoplot}}} +}} + diff --git a/man/scaling.Rd b/man/scaling.Rd new file mode 100644 index 00000000..d4453e4a --- /dev/null +++ b/man/scaling.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constructors.R +\name{scaling} +\alias{scaling} +\title{Get the scaling from NGM matrix} +\usage{ +scaling(list_matrix) +} +\arguments{ +\item{list_matrix}{object of class \code{ngm_setting_matrix}} +} +\value{ +scaling +} +\description{ +This value is \code{scaling <- R_target / R_raw}, where \code{R_target} is the target +R value provided to the NGM, and \code{R_raw} is the raw eigenvalue. +} +\examples{ +# examples not run as they take a long time +\dontrun{ +perth <- abs_age_lga("Perth (C)") +perth_contact <- extrapolate_polymod(perth) +perth_ngm <- generate_ngm( + perth_contact, + age_breaks = c(seq(0, 85, by = 5), Inf) +) +raw_eigenvalue(perth_ngm) +scaling(perth_ngm) +} +} diff --git a/man/setting_prediction_matrix.Rd b/man/setting_prediction_matrix.Rd new file mode 100644 index 00000000..e5d88db4 --- /dev/null +++ b/man/setting_prediction_matrix.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/setting-prediction-matrix.R +\name{setting_prediction_matrix} +\alias{setting_prediction_matrix} +\title{Create a setting prediction matrix} +\usage{ +setting_prediction_matrix(..., age_breaks) +} +\arguments{ +\item{...}{list of matrices} + +\item{age_breaks}{age breaks - numeric} +} +\value{ +setting prediction matrix +} +\description{ +Helper function to create your own setting prediction matrix, which you +may want to use in \code{generate_ngm}, or \code{autoplot}. This class is the +output of functions like \code{extrapolate_polymod}, and +\code{predict_setting_contacts}. We recommend using this function is only for +advanced users, who are creating their own setting prediction matrix. +} +\examples{ + +age_breaks_0_80_plus <- c(seq(0, 80, by = 10), Inf) +one_by_nine <- matrix(1, nrow = 9, ncol = 9) + +x_example <- setting_prediction_matrix( + home = one_by_nine, + work = one_by_nine, + age_breaks = age_breaks_0_80_plus +) + +x_example <- setting_prediction_matrix( + one_by_nine, + one_by_nine, + age_breaks = age_breaks_0_80_plus +) + +x_example + +} diff --git a/man/transmission_probability_matrix.Rd b/man/transmission_probability_matrix.Rd new file mode 100644 index 00000000..2c27efdd --- /dev/null +++ b/man/transmission_probability_matrix.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/setting-transmission-matrix.R +\name{transmission_probability_matrix} +\alias{transmission_probability_matrix} +\title{Create a setting transmission matrix} +\usage{ +transmission_probability_matrix(..., age_breaks) +} +\arguments{ +\item{...}{list of matrices} + +\item{age_breaks}{age breaks - numeric} +} +\value{ +transmission probability matrix +} +\description{ +Helper function to create your own setting transmission matrix, which you +may want to use in ... or \code{autoplot}. This class is the +output of functions like \code{...}, and ... . We recommend using this +function is only for advanced users, who are creating their own +transmission probability matrix. +} +\examples{ + +age_breaks_0_80_plus <- c(seq(0, 80, by = 10), Inf) +one_05 <- matrix(0.05, nrow = 9, ncol = 9) + +x_example <- transmission_probability_matrix( + home = one_05, + work = one_05, + age_breaks = age_breaks_0_80_plus +) + +x_example <- transmission_probability_matrix( + one_05, + one_05, + age_breaks = age_breaks_0_80_plus +) + +x_example + +} diff --git a/man/unabbreviate_states.Rd b/man/unabbreviate_states.Rd deleted file mode 100644 index 8988f299..00000000 --- a/man/unabbreviate_states.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/unabbreviate_states.R -\name{unabbreviate_states} -\alias{unabbreviate_states} -\title{Un-abbreviate Australian state names} -\usage{ -unabbreviate_states(state_names) -} -\arguments{ -\item{state_names}{vector of state names in short form} -} -\value{ -Longer state names -} -\description{ -Un-abbreviate Australian state names -} -\examples{ -unabbreviate_states("VIC") -unabbreviate_states(c("VIC", "QLD")) -} -\seealso{ -\code{\link[=abbreviate_states]{abbreviate_states()}} -} diff --git a/paper/paper.md b/paper/paper.md new file mode 100644 index 00000000..f0a2b854 --- /dev/null +++ b/paper/paper.md @@ -0,0 +1,276 @@ +--- +title: 'conmat: generate synthetic contact matrices for a given age-stratified population' +authors: +- affiliation: 1 + name: Nicholas Tierney + orcid: 0000-0003-1460-8722 +- affiliation: 1,2 + name: Nick Golding + orcid: 0000-0001-8916-5570 +- affiliation: 1,3 + name: Aarathy Babu + orcid: +- affiliation: 4 + name: Michael Lydeamore + orcid: 0000-0001-6515-827X +- affiliation: 1,3 + name: Chitra Saraswati + orcid: 0000-0002-8159-0414 +date: "03 May 2024" +output: + html_document: + keep_md: yes + pdf_document: default +bibliography: references.bib +tags: +- epidemiology +- R +- infectious disease +affiliations: +- index: 1 + name: Telethon Kids Institute +- index: 2 + name: Curtin University +- index: 3 + name: +- index: 4 + name: Monash University +--- + + + +# Summary + +[ A summary describing the high-level functionality and purpose of the software for a diverse, non-specialist audience. ] + +This article introduces `conmat`, an R package which generates synthetic contact matrices. + +There are currently few options for a user to generate their own synthetic contact matrices. +Existing methods to generate synthetic contact matrices are not designed for replicability, do not have enough granularity, and does not cover enough administrative areas (in other words, some countries are not included). + +[ What's different and useful about conmat? ] +Users might have their own contact survey data that they would like to generate synthetic contact matrices from. +Perhaps the population demography is different, or the contact rates varying. +A higher level of granularity however is sometimes required to make public health decisions for a given population. + +`conmat` also provides flexibility, in that it allows users to specify the area in which they would like the contact matrices to be generated; it allows users to specify their own age groups and population structures; it allows users to upload their own contact surveys to fit the model on; and it allows users to generate the contact matrices at different settings. + +[ What else is covered in this paper? ] An example use-case for `conmat` is provided for a local government area (i.e. at the sub-national level) in Australia. +Also provided is an analysis pipeline to support conmat, [`syncomat`](https://github.com/idem-lab/syncomat), which generates synthetic contact matrices for 200 countries. + +# Statement of need + +[ A Statement of need section that clearly illustrates the research purpose of the software and places it in the context of related work. ] + +#TODO - A better first sentence that encapsulates conmat use? +Understanding the dynamics of infectious disease transmission is an important task (?) for epidemiologists and public policy makers. +Identifying vulnerable groups and predicting disease transmission (?)dynamics / how diseases spread are essential for informed public health decision-making. +Infectious diseases such as influenza and coronavirus spread through human-to-human interactions, or in other words, "social contact". Quantifying social contact and its patterns can provide critical insights into how these diseases spread. [ Is this circular? ] / and how best to mitigate the spread of these diseases. + +We can measure social contact through social contact surveys, where people describe the number and type of social contact they have. These surveys provide (?) a measure of contact rates: an empirical estimate of the number of social contacts from one age group to another and the setting of contact. For example, we might learn from a contact survey that homes have higher contact between 25-50 year olds and 0-15 year olds, whereas workplaces might have high contact within 25-60 year olds. + +These social contact surveys exist for a few countries. As an example, the "POLYMOD" study by @mossong2008 covered 8 European countries: Belgium, Germany, Finland, Great Britain, Italy, Luxembourg, The Netherlands, and Poland [@mossong2008]. However, what do we do when we want to estimate contact rates in other countries where this is not yet measured? We can use existing data--the contact rates obtained from contact surveys--to help us project / predict these estimates to countries or places that do not have them. These are called "synthetic contact matrices". A popular approach by @prem2017 projected contact rates from the POLYMOD study to 152 countries. This was later updated to include synthetic contact matrices for 177 countries at "urban" and "rural" levels for each country [@prem2021]. +[ #TODO is project or predict a better word? Does it matter? ] + +However, there were major limitations with the methods in @prem2021. First, not all countries were included in their analyses. Second, some of the synthetic contact matrices did not have enough granularity; in other words, they covered areas that are too large, such as the "urban" or "rural" parts of a country. This is disadvantageous as public health groups might need to make predictions for more fine-grained areas within a country, such as a district or municipality. Third, the methodology used by Prem et al. was challenging to reuse in other contexts. Prem et al. provided the code used for their analysis, but that code was not designed for replicability and easy modification with user-defined inputs. + +[REVISED PARAGRAPH BELOW] The `conmat` package was created to fill a specific need for creating synthetic contact matrices for specific local government areas for Australia, for work commissioned by the Australian government. We created methods and software to facilitate the following: + +The `conmat` package was developed to fill the specific need of creating synthetic contact matrices for local government areas in Australia. This package is used for [ #TODO what work, specifically? Health? Provide example. Or is *this* package commissioned by the Aus govt? ] work commissioned by the Australian government. +We developed methods and software to facilitate the following tasks. + +- Generate, as output, synthetic contact matrices from age-stratfied population data. +- Create next generation matrices (NGMs). +- Apply vaccination reduction to NGMs. +- Use NGMs in disease modelling. +- Provide tidied population data from the Australian Bureau of Statistics. + +# Example + +As an example, let us generate a contact matrix for a local government area within Australia, using a model fitted from the POLYMOD data. + +Suppose we want to generate a contact matrix for the City of Perth. We can get the age-stratified population data for Perth from the helper function `abs_age_lga`: + + +``` r +library(conmat) +perth <- abs_age_lga("Perth (C)") +perth +``` + +``` +#> # A tibble: 18 × 4 (conmat_population) +#> - age: lower.age.limit +#> - population: population +#> lga lower.age.limit year population +#> +#> 1 Perth (C) 0 2020 1331 +#> 2 Perth (C) 5 2020 834 +#> 3 Perth (C) 10 2020 529 +#> 4 Perth (C) 15 2020 794 +#> 5 Perth (C) 20 2020 3615 +#> 6 Perth (C) 25 2020 5324 +#> 7 Perth (C) 30 2020 4667 +#> 8 Perth (C) 35 2020 3110 +#> 9 Perth (C) 40 2020 1650 +#> 10 Perth (C) 45 2020 1445 +#> 11 Perth (C) 50 2020 1299 +#> 12 Perth (C) 55 2020 1344 +#> 13 Perth (C) 60 2020 1359 +#> 14 Perth (C) 65 2020 1145 +#> 15 Perth (C) 70 2020 1004 +#> 16 Perth (C) 75 2020 673 +#> 17 Perth (C) 80 2020 481 +#> 18 Perth (C) 85 2020 367 +``` + +We can then generate a contact matrix for `perth` using the `extrapolate_polymod` function, where the contact matrix is generated using a model fitted from the POLYMOD data. + + +``` r +perth_contact <- extrapolate_polymod(population = perth) +perth_contact +``` + +``` +#> +``` + +``` +#> ── Setting Prediction Matrices ───────────────────────────────────────────────── +``` + +``` +#> A list of matrices containing the model predicted contact rate between ages in +#> each setting. +``` + +``` +#> There are 16 age breaks, ranging 0-75+ years, with a regular 5 year interval +``` + +``` +#> • home: a 16x16 +``` + +``` +#> • work: a 16x16 +``` + +``` +#> • school: a 16x16 +``` + +``` +#> • other: a 16x16 +``` + +``` +#> • all: a 16x16 +``` + +``` +#> ℹ Access each with `x$name` +``` + +``` +#> ℹ e.g., `x$home` +``` + +We can plot the resulting contact matrix for Perth with `autoplot`: + + +``` r +autoplot(perth_contact) +``` + + + + +# Implementation + +`conmat` was built to predict at four settings: work, school, home, and other. +One model is fitted for each setting. +Each model fitted is a Poisson generalised additive model (GAM) which predicts the count of contacts, with an offset for the log of participants. +The model has six (?)covariates/terms to explain six key features of the relationship between ages, +and two optional terms for attendance at school or work. +The two optional terms are included depending on which setting the model is fitted for. + +Each cell in the resulting contact matrix, indexed *i*, *j*, is the predicted number of people in age group *j* that a single individual in age group *i* will have contact with per day. If you sum across all the *j* age groups for each *i* age group, you get the predicted total number of contacts per day for each individual of age group *i*. [ #TODO expected, predicted, or average? Does it matter? ] + +The six terms are +$|i-j|$, +${|i-j|}^{2}$, +$i + j$, +$i \times j$, +$\text{max}(i, j)$ and +$\text{min}(i, j)$. + +The six key features of the relationship between the age groups, represented by the six terms, are displayed in the figure below. +[ #TODO notes-to-self: the model structure wasn't generated through any particularly robust process, it was just coming up with structures that looked mildly appropriate for our use case. ] + + +``` r +# Show partial dep plot of the six main terms +``` + +Note that these partial dependency plots are on the log scale. +When the six terms are added up together for each setting (in other words, each model) and exponentiated, they show the following patterns: + + +``` r +# Show combined partial dep plot (i.e. sum of the partial dependencies for all six terms) in each setting: home, school, work and other +``` + +In other words, the six terms above provide patterns that are useful in modelling the different settings, +and correspond with real-life situations of how contact would look like. +In the home setting for example, [ #TODO describe how children interact with parents and elderly generation, grandparents ]. +When the terms for school and work are added, these terms also provide patterns that correspond with real-life situations. +https://idem-lab.github.io/conmat/dev/articles/visualising-conmat.html +In the school setting, children tend to contact other children in the same age groups as them. +In the work setting, there are no contacts with children under the age of ten and minimal contact with adults beyond retirement age. + +One of the issues with the contact matrices generated by @prem2017 is that some countries are missing. To remedy this we generated synthetic contact matrices for 200 countries, based on a list of country names by the UN, fitted on the POLYMOD contact surveys. +We also ensured that the analysis pipeline is reproducible and transparent by utilising a targets workflow, which allows ease of editing for users. +The resulting synthetic contact matrices, and a replicable / extensible (?) analysis pipeline, can be found in the syncomat analysis pipeline ([GitHub](https://github.com/idem-lab/syncomat), [Zenodo](https://zenodo.org/records/11365943)). + +## Model interfaces + +We provide functions for model fitting at various use cases. Further detail for each of the following functions are available at: https://idem-lab.github.io/conmat/dev/ + +* `fit_single_contact_model()` + * Fits a generalised additive model (GAM) using contact survey data and population size information. This function is recommended when you want to fit a model to only one setting, for which you might want to provide your own contact survey data. + +* `predict_contacts()` + + * This takes a fitted model from `fit_single_contact_model()` and predicts [ #TODO what is predicted? ] to a provided (?) population structure. + +* `fit_setting_contacts()` + * Fits the `fit_single_contact_model()` to each setting. This function is useful for when you have multiple settings to fit. Returns a list of fitted models. + +* `predict_setting_contacts()` + * Takes a list of fitted models from `fit_setting_contacts()` and predicts [ #TODO what is predicted? ] to a given population for each setting. + +* `estimate_setting_contacts()` + * A convenience function that fits multiple models, one for each setting. This means fitting `fit_setting_contacts()` and then `predict_setting_contacts()`. Recommended for when you have multiple settings to fit and want to predict to a given population as well. + +* `extrapolate_polymod()` + * Takes population information and projects pre-fit model from POLYMOD - used for speed when you know you want to take an already fit model from POLYMOD and just fit it to your provided population. + +[ #TODO for the above it's good to explain what exactly is predicted. Otherwise it's confusing for the user to understand what each of the model outputs? ] + +# Conclusions and future directions + +Our future direction for `conmat` includes adding the following functionalities: +* Create a contact matrix using a custom contact survey from another source, such as the `socialmixr` R package. +* Predict to any age brackets - such as monthly ages, for example, 1, 3, 6, month year old infants +* Add ability to fit multiple contact surveys at once, e.g., POLYMOD and another dataset +* Add ability to include known household age distributions as offsets in the 'home' setting model, in place of the whole population distribution. So compute household age matrices (like age-structured contact matrices, but for household members instead of contacts) from POLYMOD data. If we compute a different one for each household size, in the POLYMOD data (probably estimated with another GAM, to make best use of the limited data) we might be able to extrapolate household age matrices to new countries based on the distribution of household sizes. +* Add methods for including household size distributions +* Add uncertainty to estimates +* Move Australian centric data into its own package +* Add documentation on specifying your own GAM model and using this workflow + +[ #TODO Change concluding sentence. The following is copied ad verbatim from JSS bizicount because I like it ] For now, however, we feel that our base `bizicount` package is sufficiently general to assist in estimating the models most often encountered by applied researchers. + +# References diff --git a/paper/paper.pdf.md b/paper/paper.pdf.md new file mode 100644 index 00000000..be3b4a2d --- /dev/null +++ b/paper/paper.pdf.md @@ -0,0 +1,340 @@ +--- +title: '`conmat`: generate synthetic contact matrices for a given age-stratified population' +authors: +- affiliation: 1 + name: Nicholas Tierney + orcid: 0000-0003-1460-8722 +- affiliation: 1,2 + name: Nick Golding + orcid: 0000-0001-8916-5570 +- affiliation: 1,3 + name: Aarathy Babu + orcid: +- affiliation: 4 + name: Michael Lydeamore + orcid: 0000-0001-6515-827X +- affiliation: 1,3 + name: Chitra Saraswati + orcid: 0000-0002-8159-0414 +date: today +bibliography: references.bib +cite-method: biblatex +tags: +- epidemiology +- R +- infectious disease +affiliations: +- index: 1 + name: Telethon Kids Institute +- index: 2 + name: Curtin University +- index: 3 + name: +- index: 4 + name: Monash University +execute: + echo: true + cache: false +format: + pdf: + keep-md: true + fig-height: 4 + fig-align: center + fig-format: png + dpi: 300 + html: + keep-md: true + fig-height: 4 + fig-align: center + fig-format: png + dpi: 300 +--- + + +::: {.cell} + +::: + +::: {.cell} + +::: + + + + +# Summary + +Contact matrices describe the number of contacts between individuals. They are used to create models of infectious disease spread. `conmat` is an R package which generates synthetic contact matrices for arbitrary input demography, ready for use in infectious diseases modelling. + +There are currently few options for a user to access synthetic contact matrices [@socialmixr; @prem2017]. Existing code to generate synthetic contact matrices from @prem2017 are not designed for replicability, are restricted to select countries, and provide no sub-national demographic estimates. + +The `conmat` package exposes model fitting and prediction separately to the user. Users can fit a model based on a contact survey such as POLYMOD [@mossong2008], then predict from this model to their own demographic data. This means users can generate synthetic contact matrices for any region, with any contact survey. + +We demonstrate a use-case for `conmat` by creating contact matrices for sub-national level (in this case, a state) in Australia. + +For users who do not wish to run the entire `conmat` pipeline, we have pre-generated synthetic contact matrices for 200 countries, based on a list of countries from the United Nations, using a model fit to the POLYMOD contact survey. These resulting synthetic contact matrices, and the associated code, can be found in the syncomat analysis pipeline ([GitHub](https://github.com/idem-lab/syncomat), [Zenodo](https://zenodo.org/records/11365943)) [@syncomat]. + +# Statement of need + +Infectious diseases like influenza and COVID19 spread via social contact. If we can understand patterns of contact---which individuals are more likely be in contact with each other---then we will be able to create models of how disease spreads. Epidemiologists and public policy makers can use these models to make decisions to keep a population safe and healthy. + +Empirical estimates of social contact are provided by social contact surveys. These provide samples of the frequency and type of social contact across different settings (home, work, school, other). + +A prominent contact survey is the "POLYMOD" study by @mossong2008, which surveyed 8 European countries: Belgium, Germany, Finland, Great Britain, Italy, Luxembourg, The Netherlands, and Poland [@mossong2008]. + +These social contact surveys can be projected on to a given demographic structure to produce estimated daily contact rates between age groups. These are known as "contact matrices" or "synthetic contact matrices". A widely used approach by @prem2017 [@prem2021] produced contact matrices for 177 countries at "urban" and "rural" levels for each country. + +However, there were major limitations with the methods in @prem2021. First, not all countries were included in their analyses. Second, the contact matrices only covered broad scale areas. This presents challenges for decision makers who are often working at a sub-national geographical scale. Third, the code provided by Prem et al. was not designed for replicability and easy modification with user-defined inputs. + +The `conmat` package was developed to fill the specific need of creating contact matrices for arbitrary age categories and populations (as shown in the below example). We developed the method primarily to output synthetic contact matrices. We also provided methods to create next generation matrices. + +# Example + +We will generate a contact matrix for Tasmania, a state in Australia, using a model fitted from the POLYMOD contact survey. We can get the age-stratified population data for Tasmania from the Australian Bureau of Statistics (ABS) with the helper function, `abs_age_state()`: + + + + +::: {.cell} + +```{.r .cell-code} +tasmania <- abs_age_state("TAS") +head(tasmania) +``` + +::: {.cell-output .cell-output-stdout} + +``` +# A tibble: 6 x 4 (conmat_population) + - age: lower.age.limit + - population: population + year state lower.age.limit population + +1 2020 TAS 0 29267 +2 2020 TAS 5 31717 +3 2020 TAS 10 33318 +4 2020 TAS 15 31019 +5 2020 TAS 20 31641 +6 2020 TAS 25 34115 +``` + + +::: +::: + + + + +We can then generate a contact matrix for Tasmania, from the POLYMOD study with `extrapolate_polymod()`. + + + + +::: {.cell} + +```{.r .cell-code} +tasmania_contact <- extrapolate_polymod(population = tasmania) +tasmania_contact +``` + +::: {.cell-output .cell-output-stderr} + +``` + +``` + + +::: + +::: {.cell-output .cell-output-stderr} + +``` +-- Setting Prediction Matrices ------------------------------------------------- +``` + + +::: + + +::: {.cell-output .cell-output-stderr} + +``` +A list of matrices containing the model predicted contact rate between ages in +each setting. +``` + + +::: + + +::: {.cell-output .cell-output-stderr} + +``` +There are 16 age breaks, ranging 0-75+ years, with a regular 5 year interval +``` + + +::: + + +::: {.cell-output .cell-output-stderr} + +``` +* home: a 16x16 +``` + + +::: + +::: {.cell-output .cell-output-stderr} + +``` +* work: a 16x16 +``` + + +::: + +::: {.cell-output .cell-output-stderr} + +``` +* school: a 16x16 +``` + + +::: + +::: {.cell-output .cell-output-stderr} + +``` +* other: a 16x16 +``` + + +::: + +::: {.cell-output .cell-output-stderr} + +``` +* all: a 16x16 +``` + + +::: + +::: {.cell-output .cell-output-stderr} + +``` +i Access each with `x$name` +``` + + +::: + +::: {.cell-output .cell-output-stderr} + +``` +i e.g., `x$home` +``` + + +::: +::: + + + + +We can plot the resulting contact matrix for Tasmania with `autoplot`, shown in @fig-autoplot-contacts. + + + + +::: {.cell} + +```{.r .cell-code} +autoplot(tasmania_contact) +``` + +::: {.cell-output-display} +![Contact patterns between individuals for different age groups across four settings: home, work, school, and other. The x axis shows the age group from, and the y axis shows the age group to, coloured by the average number of contacts between individuals in two age groups. We see different contact patterns in different settings, for example, diagonal with wings for the home setting.](paper_files/figure-pdf/fig-autoplot-contacts-1.png){#fig-autoplot-contacts fig-pos='H'} +::: +::: + + + + +# Implementation + +The overall approach of `conmat` has two parts: + +1) fit a model to predict individual contact rate, using an existing contact survey; +2) predict a synthetic contact matrix using age population data. + +## Model fitting + +`conmat` was built to predict at four settings: work, school, home, and other. +One model is fitted for each setting. +Each model fitted is a Poisson generalised additive model (GAM) with a log link function, which predicts the count of contacts, with an offset for the log of participants. +The model has six covariates to explain six key features of the relationship between ages, +and two optional covariates for attendance at school or work. +The two optional covariates are included depending on which setting the model is fitted for. + +Each cell in the resulting contact matrix (after back-transformation of the link function), indexed ($i$, $j$), is the predicted number of people in age group $j$ that a single individual in age group $i$ will have contact with per day. The sum over all age groups $j$ for a particular age group $i$ is the predicted total number of contacts per day for each individual of age group $i$. + +The six covariates are: + +- $|i-j|$, +- $|i-j|^{2}$, +- $i + j$, +- $i \times j$, +- $\text{max}(i, j)$ and +- $\text{min}(i, j)$. + +These covariates capture typical features of inter-person contact, where individuals primarily interact with people of similar age (the diagonals of the matrix), and with grandparents and/or children (the so-called 'wings' of the matrix). The key features of the relationship between the age groups, represented by the six covariates, are displayed in @fig-show-partial-plots for the home setting. The $|i-j|$ term gives the strong diagonal, modelling people generally living with those of similar age, and the $\max(i,j)$ and $\min(i,j)$ terms give the intergenerational effect of parents and grandparents with children. + + + + +::: {.cell} + +::: + +::: {.cell} +::: {.cell-output-display} +![Partial predictive plot (A) and overall synthetic contact matrix (B) for the Poisson GAM fitted to the POLYMOD contact survey in the home setting. The strong diagonal elements, and parents/grandparents interacting with children result in the classic 'diagonal with wings' shape.](paper_files/figure-pdf/fig-show-partial-plots-1.png){#fig-show-partial-plots} +::: +::: + + + + +Visualising the partial predictive plots for other settings (school, work and other) show patterns that correspond with real-life situations. A full visualisation pipeline is available at https://idem-lab.github.io/conmat/dev/articles/visualising-conmat.html + +# Conclusions and future directions + +The `conmat` software provides a flexible interface to generating synthetic contact matrices using population data and contact surveys. These contact matrices can then be used in infectious disease modelling and surveillance. + +The main strength of `conmat` is its interface requiring only age population data to create a synthetic contact matrix. Current approaches provide only a selection of country level contact matrices. This software can predict to arbitrary demography, such as sub-national or simulated populations. + +We provide a trained model of contact rate that is fit to the POLYMOD survey for ease of use. The software also has an interface to use other contact surveys, such as @comix. This is important as POLYMOD represents contact patterns in 8 countries in Europe, and contact patterns are known to differ across nations and cultures. + +The covariates used by `conmat` were designed to represent the key features that are typically present in a contact matrix for different settings (work, school, home, other). Including other sources of information that may better describe these contact patterns, such as inter-generational mixing, or differences in school ages of a local demographic, may improve model performance. + +The interface to the model formula in `conmat` is fixed; users cannot change the covariates of the model. This means if there is an unusual structure in their contact data it might not be accurately captured by `conmat`. This fixed formula was a design decision made to focus on the key feature of `conmat`: using only age population data to predict a contact matrix. + +Public health decisions are often based on age specific information, which means the more accurate your age specific models are, the better those decisions are likely to be. This is the first piece of software that will provide appropriate contact matrices for a population, which means more accurate models of disease. + +This work was used as a key input into several models for COVID-19 transmission and control in Australia and contributed to decisions around vaccination policy [@DohertyModelling]. + +Some future directions for this software include: + +* demonstrate fitting a model to other contact surveys from sources such as the `socialmixr` R package [@socialmixr]; +* predict to any age brackets - such as monthly ages, for example, 1, 3, 6, month year old infants; +* fitting to multiple contact surveys simultaneously, e.g., POLYMOD and CoMix; +* add uncertainty to estimates; +* add methods for including household size distributions; +* include known household age distributions as offsets in the 'home' setting model, in place of the whole population distribution. + +Software is never finished, and the software in its current format has proven useful for infectious disease modelling. In time we hope it can become more widely used and be useful for more applications in epidemiology and public health. + +# References diff --git a/paper/paper.qmd b/paper/paper.qmd new file mode 100644 index 00000000..efc7510a --- /dev/null +++ b/paper/paper.qmd @@ -0,0 +1,262 @@ +--- +title: '`conmat`: generate synthetic contact matrices for a given age-stratified population' +authors: +- affiliation: 1 + name: Nicholas Tierney + orcid: 0000-0003-1460-8722 +- affiliation: 1,2 + name: Nick Golding + orcid: 0000-0001-8916-5570 +- affiliation: 1,3 + name: Aarathy Babu + orcid: +- affiliation: 4 + name: Michael Lydeamore + orcid: 0000-0001-6515-827X +- affiliation: 1,3 + name: Chitra Saraswati + orcid: 0000-0002-8159-0414 +date: today +bibliography: references.bib +cite-method: biblatex +tags: +- epidemiology +- R +- infectious disease +affiliations: +- index: 1 + name: Telethon Kids Institute +- index: 2 + name: Curtin University +- index: 3 + name: +- index: 4 + name: Monash University +execute: + echo: true + cache: false +format: + pdf: + keep-md: true + fig-height: 4 + fig-align: center + fig-format: png + dpi: 300 + html: + keep-md: true + fig-height: 4 + fig-align: center + fig-format: png + dpi: 300 +--- + +```{r} +#| label: setup +#| echo: false +#| message: false +#| warning: false +options(tinytex.clean = FALSE) +``` + +```{r} +#| label: libraries +#| echo: false +#| output: false + +library(purrr) +library(patchwork) +library(conmat) +library(ggplot2) +``` + +# Summary + +Contact matrices describe the number of contacts between individuals. They are used to create models of infectious disease spread. `conmat` is an R package which generates synthetic contact matrices for arbitrary input demography, ready for use in infectious diseases modelling. + +There are currently few options for a user to access synthetic contact matrices [@socialmixr; @prem2017]. Existing code to generate synthetic contact matrices from @prem2017 are not designed for replicability, are restricted to select countries, and provide no sub-national demographic estimates. + +The `conmat` package exposes model fitting and prediction separately to the user. Users can fit a model based on a contact survey such as POLYMOD [@mossong2008], then predict from this model to their own demographic data. This means users can generate synthetic contact matrices for any region, with any contact survey. + +We demonstrate a use-case for `conmat` by creating contact matrices for sub-national level (in this case, a state) in Australia. + +For users who do not wish to run the entire `conmat` pipeline, we have pre-generated synthetic contact matrices for 200 countries, based on a list of countries from the United Nations, using a model fit to the POLYMOD contact survey. These resulting synthetic contact matrices, and the associated code, can be found in the syncomat analysis pipeline ([GitHub](https://github.com/idem-lab/syncomat), [Zenodo](https://zenodo.org/records/11365943)) [@syncomat]. + +# Statement of need + +Infectious diseases like influenza and COVID19 spread via social contact. If we can understand patterns of contact---which individuals are more likely be in contact with each other---then we will be able to create models of how disease spreads. Epidemiologists and public policy makers can use these models to make decisions to keep a population safe and healthy. + +Empirical estimates of social contact are provided by social contact surveys. These provide samples of the frequency and type of social contact across different settings (home, work, school, other). + +A prominent contact survey is the "POLYMOD" study by @mossong2008, which surveyed 8 European countries: Belgium, Germany, Finland, Great Britain, Italy, Luxembourg, The Netherlands, and Poland [@mossong2008]. + +These social contact surveys can be projected on to a given demographic structure to produce estimated daily contact rates between age groups. These are known as "contact matrices" or "synthetic contact matrices". A widely used approach by @prem2017 [@prem2021] produced contact matrices for 177 countries at "urban" and "rural" levels for each country. + +However, there were major limitations with the methods in @prem2021. First, not all countries were included in their analyses. Second, the contact matrices only covered broad scale areas. This presents challenges for decision makers who are often working at a sub-national geographical scale. Third, the code provided by Prem et al. was not designed for replicability and easy modification with user-defined inputs. + +The `conmat` package was developed to fill the specific need of creating contact matrices for arbitrary age categories and populations (as shown in the below example). We developed the method primarily to output synthetic contact matrices. We also provided methods to create next generation matrices. + +# Example + +We will generate a contact matrix for Tasmania, a state in Australia, using a model fitted from the POLYMOD contact survey. We can get the age-stratified population data for Tasmania from the Australian Bureau of Statistics (ABS) with the helper function, `abs_age_state()`: + +```{r} +#| label: abs-age +tasmania <- abs_age_state("TAS") +head(tasmania) +``` + +We can then generate a contact matrix for Tasmania, from the POLYMOD study with `extrapolate_polymod()`. + +```{r} +#| label: extrapolate-polymod +tasmania_contact <- extrapolate_polymod(population = tasmania) +tasmania_contact +``` + +We can plot the resulting contact matrix for Tasmania with `autoplot`, shown in @fig-autoplot-contacts. + +```{r} +#| label: fig-autoplot-contacts +#| fig-cap: "Contact patterns between individuals for different age groups across four settings: home, work, school, and other. The x axis shows the age group from, and the y axis shows the age group to, coloured by the average number of contacts between individuals in two age groups. We see different contact patterns in different settings, for example, diagonal with wings for the home setting." +#| fig-width: 8 +#| fig-height: 8 +autoplot(tasmania_contact) +``` + +# Implementation + +The overall approach of `conmat` has two parts: + +1) fit a model to predict individual contact rate, using an existing contact survey; +2) predict a synthetic contact matrix using age population data. + +## Model fitting + +`conmat` was built to predict at four settings: work, school, home, and other. +One model is fitted for each setting. +Each model fitted is a Poisson generalised additive model (GAM) with a log link function, which predicts the count of contacts, with an offset for the log of participants. +The model has six covariates to explain six key features of the relationship between ages, +and two optional covariates for attendance at school or work. +The two optional covariates are included depending on which setting the model is fitted for. + +Each cell in the resulting contact matrix (after back-transformation of the link function), indexed ($i$, $j$), is the predicted number of people in age group $j$ that a single individual in age group $i$ will have contact with per day. The sum over all age groups $j$ for a particular age group $i$ is the predicted total number of contacts per day for each individual of age group $i$. + +The six covariates are: + +- $|i-j|$, +- $|i-j|^{2}$, +- $i + j$, +- $i \times j$, +- $\text{max}(i, j)$ and +- $\text{min}(i, j)$. + +These covariates capture typical features of inter-person contact, where individuals primarily interact with people of similar age (the diagonals of the matrix), and with grandparents and/or children (the so-called 'wings' of the matrix). The key features of the relationship between the age groups, represented by the six covariates, are displayed in @fig-show-partial-plots for the home setting. The $|i-j|$ term gives the strong diagonal, modelling people generally living with those of similar age, and the $\max(i,j)$ and $\min(i,j)$ terms give the intergenerational effect of parents and grandparents with children. + +```{r} +#| label: partial-plots-create +#| echo: false + +fit_home <- polymod_setting_models$home +age_grid <- create_age_grid(ages = 1:99) +term_names <- extract_term_names(fit_home) +term_var_names <- clean_term_names(term_names) +age_predictions <- predict_individual_terms( + age_grid = age_grid, + fit = fit_home, + term_names = term_names, + term_var_names = term_var_names +) + +age_predictions_all_settings <- map_dfr( + .x = polymod_setting_models, + .f = function(x) { + predict_individual_terms( + age_grid = age_grid, + fit = x, + term_names = term_names, + term_var_names = term_var_names + ) + }, + .id = "setting" +) + +plot_age_term_settings <- gg_age_terms_settings(age_predictions_all_settings) +age_predictions_long <- pivot_longer_age_preds(age_predictions) +plot_age_predictions_long <- gg_age_partial_pred_long(age_predictions_long) + + coord_equal() + + labs( + x = "Age from", + y = "Age to" + ) + + theme( + legend.position = "bottom", + axis.text = element_text(size = 6), + panel.spacing = unit(x = 1, units = "lines") + ) + + scale_x_continuous(expand = c(0,0)) + + scale_y_continuous(expand = c(0,0)) + + expand_limits(x = c(0, 100), y = c(0, 100)) + +age_predictions_long_sum <- add_age_partial_sum(age_predictions_long) +plot_age_predictions_sum <- gg_age_partial_sum(age_predictions_long_sum) + coord_equal() + + labs(x = "Age from", + y = "Age to") + + theme( + legend.position = "bottom" + ) + + scale_x_continuous(expand = c(0,0)) + + scale_y_continuous(expand = c(0,0)) + + expand_limits(x = c(0, 100), y = c(0, 100)) +``` + + +```{r} +#| label: fig-show-partial-plots +#| echo: false +#| fig-cap: "Partial predictive plot (A) and overall synthetic contact matrix (B) for the Poisson GAM fitted to the POLYMOD contact survey in the home setting. The strong diagonal elements, and parents/grandparents interacting with children result in the classic 'diagonal with wings' shape." +plot_all_terms_sum <- plot_age_predictions_long + + plot_age_predictions_sum + + plot_layout(design = " + AAAAABBBB + AAAAABBBB + AAAAABBBB + AAAAABBBB + ") + + plot_annotation( + tag_levels = "A" + ) + + +plot_all_terms_sum +``` + +Visualising the partial predictive plots for other settings (school, work and other) show patterns that correspond with real-life situations. A full visualisation pipeline is available at https://idem-lab.github.io/conmat/dev/articles/visualising-conmat.html + +# Conclusions and future directions + +The `conmat` software provides a flexible interface to generating synthetic contact matrices using population data and contact surveys. These contact matrices can then be used in infectious disease modelling and surveillance. + +The main strength of `conmat` is its interface requiring only age population data to create a synthetic contact matrix. Current approaches provide only a selection of country level contact matrices. This software can predict to arbitrary demography, such as sub-national or simulated populations. + +We provide a trained model of contact rate that is fit to the POLYMOD survey for ease of use. The software also has an interface to use other contact surveys, such as @comix. This is important as POLYMOD represents contact patterns in 8 countries in Europe, and contact patterns are known to differ across nations and cultures. + +The covariates used by `conmat` were designed to represent the key features that are typically present in a contact matrix for different settings (work, school, home, other). Including other sources of information that may better describe these contact patterns, such as inter-generational mixing, or differences in school ages of a local demographic, may improve model performance. + +The interface to the model formula in `conmat` is fixed; users cannot change the covariates of the model. This means if there is an unusual structure in their contact data it might not be accurately captured by `conmat`. This fixed formula was a design decision made to focus on the key feature of `conmat`: using only age population data to predict a contact matrix. + +Public health decisions are often based on age specific information, which means the more accurate your age specific models are, the better those decisions are likely to be. This is the first piece of software that will provide appropriate contact matrices for a population, which means more accurate models of disease. + +This work was used as a key input into several models for COVID-19 transmission and control in Australia and contributed to decisions around vaccination policy [@DohertyModelling]. + +Some future directions for this software include: + +* demonstrate fitting a model to other contact surveys from sources such as the `socialmixr` R package [@socialmixr]; +* predict to any age brackets - such as monthly ages, for example, 1, 3, 6, month year old infants; +* fitting to multiple contact surveys simultaneously, e.g., POLYMOD and CoMix; +* add uncertainty to estimates; +* add methods for including household size distributions; +* include known household age distributions as offsets in the 'home' setting model, in place of the whole population distribution. + +Software is never finished, and the software in its current format has proven useful for infectious disease modelling. In time we hope it can become more widely used and be useful for more applications in epidemiology and public health. + +# References diff --git a/paper/paper_files/figure-html/autoplot-contacts-1.png b/paper/paper_files/figure-html/autoplot-contacts-1.png new file mode 100644 index 00000000..dbc08719 Binary files /dev/null and b/paper/paper_files/figure-html/autoplot-contacts-1.png differ diff --git a/paper/references.bib b/paper/references.bib new file mode 100644 index 00000000..83d827a1 --- /dev/null +++ b/paper/references.bib @@ -0,0 +1,123 @@ +@article{prem2017, + title = {Projecting social contact matrices in 152 countries using contact surveys and demographic data}, + volume = {13}, + issn = {1553-7358}, + url = {https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1005697}, + doi = {10.1371/journal.pcbi.1005697}, + language = {en}, + number = {9}, + urldate = {2024-05-03}, + journal = {PLOS Computational Biology}, + author = {Prem, Kiesha and Cook, Alex R. and Jit, Mark}, + month = sep, + year = {2017}, + keywords = {Infectious disease epidemiology, Schools, South Africa, Age groups, Asia, Bolivia, Germany, Home education}, + pages = {e1005697}, +} + +@article{prem2021, + title = {Projecting contact matrices in 177 geographical regions: {An} update and comparison with empirical data for the {COVID}-19 era}, + volume = {17}, + issn = {1553-7358}, + shorttitle = {Projecting contact matrices in 177 geographical regions}, + url = {https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1009098}, + doi = {10.1371/journal.pcbi.1009098}, + language = {en}, + number = {7}, + urldate = {2024-05-03}, + journal = {PLOS Computational Biology}, + author = {Prem, Kiesha and Zandvoort, Kevin van and Klepac, Petra and Eggo, Rosalind M. and Davies, Nicholas G. and Group, Centre for the Mathematical Modelling of Infectious Diseases COVID-19 Working and Cook, Alex R. and Jit, Mark}, + month = jul, + year = {2021}, + keywords = {Infectious disease epidemiology, COVID 19, Geographical regions, Age groups, Pandemics, Schools, Urban geography, Surveys}, + pages = {e1009098}, +} + +@article{mossong2008, + title = {Social contacts and mixing patterns relevant to the spread of infectious diseases}, + volume = {5}, + issn = {1549-1676}, + url = {https://journals.plos.org/plosmedicine/article?id=10.1371/journal.pmed.0050074}, + doi = {10.1371/journal.pmed.0050074}, + language = {en}, + number = {3}, + urldate = {2024-05-03}, + journal = {PLOS Medicine}, + author = {Mossong, Joël and Hens, Niel and Jit, Mark and Beutels, Philippe and Auranen, Kari and Mikolajczyk, Rafael and Massari, Marco and Salmaso, Stefania and Tomba, Gianpaolo Scalia and Wallinga, Jacco and Heijne, Janneke and Sadkowska-Todys, Malgorzata and Rosinska, Magdalena and Edmunds, W. John}, + month = mar, + year = {2008}, + keywords = {Infectious disease epidemiology, Respiratory infections, Age groups, Epidemiology, Europe, Mathematical models, Surveys, Infectious diseases}, + pages = {e74}, +} + +@dataset{comix, + author = {Jarvis, Christopher and + Coletti, Pietro and + Backer, Jantien and + Munday, James and + Faes, Christel and + Beutels, Philippe and + Althaus, Christian and + Low, Nicola and + Wallinga, Jacco and + Hens, Niel and + Edmunds, John}, + title = {CoMix data (last round in BE, CH, NL and UK)}, + month = may, + year = 2024, + publisher = {Zenodo}, + doi = {10.5281/zenodo.11154066}, + url = {https://doi.org/10.5281/zenodo.11154066} +} + + +@misc{socialmixr, + title = {Socialmixr: social mixing matrices for infectious disease modelling}, + shorttitle = {Socialmixr}, + url = {https://CRAN.R-project.org/package=socialmixr}, + doi = {10.32614/CRAN.package.socialmixr}, + abstract = {Provides methods for sampling contact matrices from diary data for use in infectious disease modelling, as discussed in Mossong et al. (2008) {\textless}doi:10.1371/journal.pmed.0050074{\textgreater}.}, + language = {en}, + urldate = {2024-08-09}, + author = {Funk, Sebastian and Willem, Lander and Gruson, Hugo}, + month = jan, + year = {2018}, +} + + @misc{DohertyModelling, + title = {Doherty Institute - Modelling}, + author = {McVernon, Jodie and + McCaw, James and + Tierney, Nicholas and + Miller, Joel and + Lydeamore, Michael and + Golding, Nick and + Shearer, Freya and + Geard, Nic and + Zachreson Cameron and + Baker, Chris and + Walker, Camelia and + Ross, Joshua and + Wood, James and + Conway, Eamon and + Mueller, Ivo}, + urldate = {2024-08-19}, + month = aug, + url={https://www.doherty.edu.au/our-work/institute-themes/viral-infectious-diseases/covid-19/covid-19-modelling/modelling} +} + +@dataset{syncomat, + author = {Saraswati, Chitra M and + Lydeamore, Michael and + Golding, Nick and + Babu, Aarathy and + Tierney, Nicholas}, + title = {{syncomat: Synthetic Contact Matrices for 200 UN + Countries}}, + month = may, + year = 2024, + publisher = {Zenodo}, + version = {v1.0.0}, + doi = {10.5281/zenodo.11365943}, + url = {https://doi.org/10.5281/zenodo.11365943} +} \ No newline at end of file diff --git a/tests/spelling.R b/tests/spelling.R new file mode 100644 index 00000000..13f77d96 --- /dev/null +++ b/tests/spelling.R @@ -0,0 +1,6 @@ +if (requireNamespace("spelling", quietly = TRUE)) { + spelling::spell_check_test( + vignettes = TRUE, error = FALSE, + skip_on_cran = TRUE + ) +} diff --git a/tests/testthat/_snaps/abbreviation.md b/tests/testthat/_snaps/abbreviation.md index de758e0a..109333ab 100644 --- a/tests/testthat/_snaps/abbreviation.md +++ b/tests/testthat/_snaps/abbreviation.md @@ -1,14 +1,14 @@ -# abbreviate_states() works +# abs_abbreviate_states() works Code - abbreviate_states("New South Wales") + abs_abbreviate_states("New South Wales") Output [1] "NSW" -# unabbreviate_states() works +# abs_unabbreviate_states() works Code - unabbreviate_states("NSW") + abs_unabbreviate_states("NSW") Output [1] "New South Wales" diff --git a/tests/testthat/_snaps/abs-age-education.md b/tests/testthat/_snaps/abs-age-education.md new file mode 100644 index 00000000..b970f446 --- /dev/null +++ b/tests/testthat/_snaps/abs-age-education.md @@ -0,0 +1,93 @@ +# abs_age_education_state works + + Code + abs_age_education_state(state = "VIC") + Output + # A tibble: 116 x 6 + year state age population_educated total_population proportion + + 1 2016 VIC 0 0 70920 0 + 2 2016 VIC 1 0 74723 0 + 3 2016 VIC 2 0 74475 0 + 4 2016 VIC 3 25354 76009 0.334 + 5 2016 VIC 4 49763 75084 0.663 + 6 2016 VIC 5 64489 73468 0.878 + 7 2016 VIC 6 70717 74780 0.946 + 8 2016 VIC 7 69543 73391 0.948 + 9 2016 VIC 8 69673 73373 0.950 + 10 2016 VIC 9 69894 73627 0.949 + # i 106 more rows + +--- + + Code + abs_age_education_state(state = "WA", age = 1:5) + Output + # A tibble: 5 x 6 + year state age population_educated total_population proportion + + 1 2016 WA 1 0 32754 0 + 2 2016 WA 2 0 32629 0 + 3 2016 WA 3 7633 32783 0.233 + 4 2016 WA 4 26355 32463 0.812 + 5 2016 WA 5 30743 32910 0.934 + +--- + + Code + abs_age_education_state(state = c("QLD", "TAS"), age = 5) + Output + # A tibble: 2 x 6 + year state age population_educated total_population proportion + + 1 2016 QLD 5 55416 61981 0.894 + 2 2016 TAS 5 5745 6132 0.937 + +--- + + Code + abs_age_education_lga(lga = "Albury (C)") + Output + # A tibble: 116 x 8 + year state lga age population_educated total_population proportion + + 1 2016 NSW Albury (C) 0 0 676 0 + 2 2016 NSW Albury (C) 1 0 664 0 + 3 2016 NSW Albury (C) 2 0 626 0 + 4 2016 NSW Albury (C) 3 250 706 0.354 + 5 2016 NSW Albury (C) 4 431 609 0.708 + 6 2016 NSW Albury (C) 5 533 608 0.877 + 7 2016 NSW Albury (C) 6 595 631 0.943 + 8 2016 NSW Albury (C) 7 608 644 0.944 + 9 2016 NSW Albury (C) 8 575 609 0.944 + 10 2016 NSW Albury (C) 9 630 680 0.926 + # i 106 more rows + # i 1 more variable: anomaly_flag + +--- + + Code + abs_age_education_lga(lga = "Albury (C)", age = 1:5) + Output + # A tibble: 5 x 8 + year state lga age population_educated total_population proportion + + 1 2016 NSW Albury (C) 1 0 664 0 + 2 2016 NSW Albury (C) 2 0 626 0 + 3 2016 NSW Albury (C) 3 250 706 0.354 + 4 2016 NSW Albury (C) 4 431 609 0.708 + 5 2016 NSW Albury (C) 5 533 608 0.877 + # i 1 more variable: anomaly_flag + +--- + + Code + abs_age_education_lga(lga = c("Albury (C)", "Barcoo (S)"), age = 10) + Output + # A tibble: 2 x 8 + year state lga age population_educated total_population proportion + + 1 2016 NSW Albury (C) 10 615 654 0.940 + 2 2016 QLD Barcoo (S) 10 7 7 1 + # i 1 more variable: anomaly_flag + diff --git a/tests/testthat/_snaps/abs-age-lga.md b/tests/testthat/_snaps/abs-age-lga.md index 6f989613..d3e29333 100644 --- a/tests/testthat/_snaps/abs-age-lga.md +++ b/tests/testthat/_snaps/abs-age-lga.md @@ -3,7 +3,9 @@ Code abs_age_lga("Albury (C)") Output - # A tibble: 18 x 4 + # A tibble: 18 x 4 (conmat_population) + - age: lower.age.limit + - population: population lga lower.age.limit year population 1 Albury (C) 0 2020 3764 @@ -27,10 +29,7 @@ # abs_age_lga() returns the right shape errors - Code - abs_age_lga("Imaginary World") - Error - The LGA name provided does not match LGAs in Australia - x The lga name 'Imaginary World' did not match (it probably needs 'Imaginary World (C)' or similar - i See `abs_lga_lookup` for a list of all LGAs + The LGA name provided does not match LGAs in Australia + x The lga name 'Imaginary World' did not match (it probably needs 'Imaginary World (C)' or similar) + i See `abs_lga_lookup` for a list of all LGAs diff --git a/tests/testthat/_snaps/abs-age-work.md b/tests/testthat/_snaps/abs-age-work.md new file mode 100644 index 00000000..3722996b --- /dev/null +++ b/tests/testthat/_snaps/abs-age-work.md @@ -0,0 +1,58 @@ +# abs_age_work_state works + + Code + abs_age_work_state(state = "NSW") + Output + # A tibble: 116 x 6 + year state age employed_population total_population proportion + + 1 2016 NSW 0 0 87708 0 + 2 2016 NSW 1 0 92876 0 + 3 2016 NSW 2 0 93584 0 + 4 2016 NSW 3 0 95179 0 + 5 2016 NSW 4 0 95791 0 + 6 2016 NSW 5 0 95216 0 + 7 2016 NSW 6 0 96479 0 + 8 2016 NSW 7 0 95142 0 + 9 2016 NSW 8 0 95833 0 + 10 2016 NSW 9 0 95516 0 + # i 106 more rows + +--- + + Code + abs_age_work_state(state = c("QLD", "TAS"), age = 5) + Output + # A tibble: 2 x 6 + year state age employed_population total_population proportion + + 1 2016 QLD 5 0 61981 0 + 2 2016 TAS 5 0 6132 0 + +# abs_age_work_lga works + + Code + abs_age_work_lga(lga = "Albany (C)", age = 1:5) + Output + # A tibble: 5 x 8 + year state lga age employed_population total_population proportion + + 1 2016 WA Albany (C) 1 0 348 0 + 2 2016 WA Albany (C) 2 0 413 0 + 3 2016 WA Albany (C) 3 0 434 0 + 4 2016 WA Albany (C) 4 0 418 0 + 5 2016 WA Albany (C) 5 0 376 0 + # i 1 more variable: anomaly_flag + +--- + + Code + abs_age_work_lga(lga = c("Albury (C)", "Barcoo (S)"), age = 39) + Output + # A tibble: 2 x 8 + year state lga age employed_population total_population proportion + + 1 2016 NSW Albury (C) 39 434 587 0.739 + 2 2016 QLD Barcoo (S) 39 5 4 1.25 + # i 1 more variable: anomaly_flag + diff --git a/tests/testthat/_snaps/age-population.md b/tests/testthat/_snaps/age-population.md new file mode 100644 index 00000000..ae505ad8 --- /dev/null +++ b/tests/testthat/_snaps/age-population.md @@ -0,0 +1,47 @@ +# age_population works + + Code + age_population(data = world_data, location_col = country, location = c("Asia", + "Afghanistan"), age_col = lower.age.limit, year_col = year, year = c(2010: + 2020)) + Output + # A tibble: 42 x 5 (conmat_population) + - age: lower.age.limit + - population: population + country year population lower.age.limit upper.age.limit + + 1 Afghanistan 2010 5199366 0 4 + 2 Afghanistan 2015 5239401 0 4 + 3 Afghanistan 2010 4662577 5 9 + 4 Afghanistan 2015 5141850 5 9 + 5 Afghanistan 2010 3905430 10 14 + 6 Afghanistan 2015 4642280 10 14 + 7 Afghanistan 2010 3025604 15 19 + 8 Afghanistan 2015 3944912 15 19 + 9 Afghanistan 2010 2450674 20 24 + 10 Afghanistan 2015 3117448 20 24 + # i 32 more rows + +--- + + Code + age_population(data = world_data, location_col = country, location = "Afghanistan", + age_col = lower.age.limit) + Output + # A tibble: 294 x 5 (conmat_population) + - age: lower.age.limit + - population: population + country year population lower.age.limit upper.age.limit + + 1 Afghanistan 1950 1291622 0 4 + 2 Afghanistan 1955 1355054 0 4 + 3 Afghanistan 1960 1539494 0 4 + 4 Afghanistan 1965 1762117 0 4 + 5 Afghanistan 1970 2025591 0 4 + 6 Afghanistan 1975 2326731 0 4 + 7 Afghanistan 1980 2484405 0 4 + 8 Afghanistan 1985 2276913 0 4 + 9 Afghanistan 1990 2377868 0 4 + 10 Afghanistan 1995 3325482 0 4 + # i 284 more rows + diff --git a/tests/testthat/_snaps/apply_vaccination.md b/tests/testthat/_snaps/apply_vaccination.md index 854d1f69..07423d87 100644 --- a/tests/testthat/_snaps/apply_vaccination.md +++ b/tests/testthat/_snaps/apply_vaccination.md @@ -1,11 +1,34 @@ # apply_vaccination() errors when there's an incorrect variable name + i In argument: `acquisition_multiplier = 1 - acquisition_column * coverage`. + Caused by error: + ! object 'acquisition_column' not found + +# apply_vaccination() produces expected output + Code - apply_vaccination(ngm = ngm_VIC, data = vaccination_effect_example_data, - coverage_col = coverage, acquisition_col = acquisition_column, - transmission_col = transmission) - Error - Problem while computing `acquisition_multiplier = 1 - acquisition_column * coverage`. - Caused by error: - ! object 'acquisition_column' not found + ngm_VIC_vacc + Message + + -- Vaccination Setting Matrices ------------------------------------------------ + Output + + Message + A list of matrices, each containing the adjusted number of newly + infected individuals for age groups. These numbers have been adjusted based on + proposed vaccination rates in age groups + Output + + Message + There are 17 age breaks, ranging 0-80+ years, with a regular 5 year interval + Output + + Message + * home: a 17x17 + * school: a 17x17 + * work: a 17x17 + * other: a 17x17 + * all: a 17x17 + i Access each with `x$name` + i e.g., `x$home` diff --git a/tests/testthat/_snaps/autoplot/autoplot-all-settinge.new.svg b/tests/testthat/_snaps/autoplot/autoplot-all-settinge.new.svg new file mode 100644 index 00000000..481d5bdc --- /dev/null +++ b/tests/testthat/_snaps/autoplot/autoplot-all-settinge.new.svg @@ -0,0 +1,307 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + +1 +2 +3 +home + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + +2 +4 +6 +school + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + + + + + +0.5 +1.0 +1.5 +2.0 +2.5 +work + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + + + +1 +2 +3 +4 +other +Setting-specific synthetic contact matrices + + diff --git a/tests/testthat/_snaps/autoplot/autoplot-all-settinge.svg b/tests/testthat/_snaps/autoplot/autoplot-all-settinge.svg new file mode 100644 index 00000000..481d5bdc --- /dev/null +++ b/tests/testthat/_snaps/autoplot/autoplot-all-settinge.svg @@ -0,0 +1,307 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + +1 +2 +3 +home + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + +2 +4 +6 +school + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + + + + + +0.5 +1.0 +1.5 +2.0 +2.5 +work + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + + + +1 +2 +3 +4 +other +Setting-specific synthetic contact matrices + + diff --git a/tests/testthat/_snaps/autoplot/autoplot-ngm.new.svg b/tests/testthat/_snaps/autoplot/autoplot-ngm.new.svg new file mode 100644 index 00000000..6da1398b --- /dev/null +++ b/tests/testthat/_snaps/autoplot/autoplot-ngm.new.svg @@ -0,0 +1,302 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + +0.2 +0.4 +0.6 +home + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + +0.1 +0.2 +school + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + + + +0.05 +0.10 +0.15 +0.20 +work + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + + + +0.1 +0.2 +0.3 +0.4 +The number of newly infected individuals for a specified age group in each setting +other +Setting-specific NGM matrices + + diff --git a/tests/testthat/_snaps/autoplot/autoplot-ngm.svg b/tests/testthat/_snaps/autoplot/autoplot-ngm.svg new file mode 100644 index 00000000..6da1398b --- /dev/null +++ b/tests/testthat/_snaps/autoplot/autoplot-ngm.svg @@ -0,0 +1,302 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + +0.2 +0.4 +0.6 +home + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + +0.1 +0.2 +school + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + + + +0.05 +0.10 +0.15 +0.20 +work + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + + + +0.1 +0.2 +0.3 +0.4 +The number of newly infected individuals for a specified age group in each setting +other +Setting-specific NGM matrices + + diff --git a/tests/testthat/_snaps/autoplot/autoplot-single-setting.new.svg b/tests/testthat/_snaps/autoplot/autoplot-single-setting.new.svg new file mode 100644 index 00000000..822c6956 --- /dev/null +++ b/tests/testthat/_snaps/autoplot/autoplot-single-setting.new.svg @@ -0,0 +1,92 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +age_group_to +contacts + + + + + + + + + + + +0.5 +1.0 +1.5 +2.0 +2.5 +Work + + diff --git a/tests/testthat/_snaps/autoplot/autoplot-single-setting.svg b/tests/testthat/_snaps/autoplot/autoplot-single-setting.svg new file mode 100644 index 00000000..822c6956 --- /dev/null +++ b/tests/testthat/_snaps/autoplot/autoplot-single-setting.svg @@ -0,0 +1,92 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +age_group_to +contacts + + + + + + + + + + + +0.5 +1.0 +1.5 +2.0 +2.5 +Work + + diff --git a/tests/testthat/_snaps/autoplot/autoplot-vaccination.new.svg b/tests/testthat/_snaps/autoplot/autoplot-vaccination.new.svg new file mode 100644 index 00000000..985dfe32 --- /dev/null +++ b/tests/testthat/_snaps/autoplot/autoplot-vaccination.new.svg @@ -0,0 +1,305 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + + + +0.025 +0.050 +0.075 +0.100 +home + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + +0.02 +0.04 +0.06 +school + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + + + +0.004 +0.008 +0.012 +0.016 +work + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + +0.01 +0.02 +0.03 +Number of newly infected individuals for age groups, adjusted based on proposed age group vaccination rates +other +Setting-specific vaccination matrices + + diff --git a/tests/testthat/_snaps/autoplot/autoplot-vaccination.svg b/tests/testthat/_snaps/autoplot/autoplot-vaccination.svg new file mode 100644 index 00000000..985dfe32 --- /dev/null +++ b/tests/testthat/_snaps/autoplot/autoplot-vaccination.svg @@ -0,0 +1,305 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + + + +0.025 +0.050 +0.075 +0.100 +home + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + +0.02 +0.04 +0.06 +school + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + + + +0.004 +0.008 +0.012 +0.016 +work + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + +0.01 +0.02 +0.03 +Number of newly infected individuals for age groups, adjusted based on proposed age group vaccination rates +other +Setting-specific vaccination matrices + + diff --git a/tests/testthat/_snaps/autoplot/autoplot.new.svg b/tests/testthat/_snaps/autoplot/autoplot.new.svg new file mode 100644 index 00000000..e654bd04 --- /dev/null +++ b/tests/testthat/_snaps/autoplot/autoplot.new.svg @@ -0,0 +1,317 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + +0.2 +0.3 +0.4 +home + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + + + + + +0.050 +0.075 +0.100 +0.125 +0.150 +school + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + + + + + +0.050 +0.075 +0.100 +0.125 +0.150 +work + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + + + + + +0.050 +0.075 +0.100 +0.125 +0.150 +Relative probability of individuals in an age group infecting an individual in another age group +other +Setting-specific transmission probability matrices + + diff --git a/tests/testthat/_snaps/autoplot/autoplot.svg b/tests/testthat/_snaps/autoplot/autoplot.svg new file mode 100644 index 00000000..e654bd04 --- /dev/null +++ b/tests/testthat/_snaps/autoplot/autoplot.svg @@ -0,0 +1,317 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + +0.2 +0.3 +0.4 +home + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + + + + + +0.050 +0.075 +0.100 +0.125 +0.150 +school + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + + + + + +0.050 +0.075 +0.100 +0.125 +0.150 +work + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_to +[0,5) +[5,10) +[10,15) +[15,Inf) +age_group_from +contacts + + + + + + + + + + + +0.050 +0.075 +0.100 +0.125 +0.150 +Relative probability of individuals in an age group infecting an individual in another age group +other +Setting-specific transmission probability matrices + + diff --git a/tests/testthat/_snaps/check-age-breaks.md b/tests/testthat/_snaps/check-age-breaks.md new file mode 100644 index 00000000..db36f95e --- /dev/null +++ b/tests/testthat/_snaps/check-age-breaks.md @@ -0,0 +1,16 @@ +# check_age_breaks works + + Code + check_age_breaks(age_one, age_one) + +--- + + Code + check_age_breaks(age_one, age_two) + Condition + Error in `check_age_breaks()`: + ! Age breaks must be the same, but they are different: + `old`: 1 2 3 Inf + `new`: 1 2 3 + i You can check the age breaks using `age_breaks()` + diff --git a/tests/testthat/_snaps/check-if-data-frame.md b/tests/testthat/_snaps/check-if-data-frame.md new file mode 100644 index 00000000..ee9a05f9 --- /dev/null +++ b/tests/testthat/_snaps/check-if-data-frame.md @@ -0,0 +1,14 @@ +# check_if_data_frame works + + Code + check_if_data_frame(mtcars) + +--- + + Code + check_if_data_frame(volcano) + Condition + Error: + ! `volcano` must be a + i `volcano` is + diff --git a/tests/testthat/_snaps/check-if-data-list.md b/tests/testthat/_snaps/check-if-data-list.md new file mode 100644 index 00000000..8788e9b9 --- /dev/null +++ b/tests/testthat/_snaps/check-if-data-list.md @@ -0,0 +1,5 @@ +# check_if_list() returns error when argument class is not a list + + i Function expects `contact_data` to be of class + x We see `contact_data` is of class . + diff --git a/tests/testthat/_snaps/check-lga-name.md b/tests/testthat/_snaps/check-lga-name.md index 143dadf7..d946aa1c 100644 --- a/tests/testthat/_snaps/check-lga-name.md +++ b/tests/testthat/_snaps/check-lga-name.md @@ -2,17 +2,19 @@ Code check_lga_name("Imaginary World") - Error - The LGA name provided does not match LGAs in Australia - x The lga name 'Imaginary World' did not match (it probably needs 'Imaginary World (C)' or similar + Condition + Error in `check_lga_name()`: + ! The LGA name provided does not match LGAs in Australia + x The lga name 'Imaginary World' did not match (it probably needs 'Imaginary World (C)' or similar) i See `abs_lga_lookup` for a list of all LGAs # check_lga_name() errors when the name is ambiguous Code check_lga_name("Sydney") - Error - The LGA name provided does not match LGAs in Australia - x The lga name 'Sydney' did not match (it probably needs 'Sydney (C)' or similar + Condition + Error in `check_lga_name()`: + ! The LGA name provided does not match LGAs in Australia + x The lga name 'Sydney' did not match (it probably needs 'Sydney (C)' or similar) i See `abs_lga_lookup` for a list of all LGAs diff --git a/tests/testthat/_snaps/check-state-name.md b/tests/testthat/_snaps/check-state-name.md index 44b9363d..ddbffda6 100644 --- a/tests/testthat/_snaps/check-state-name.md +++ b/tests/testthat/_snaps/check-state-name.md @@ -3,8 +3,9 @@ Code abs_age_state("NSW") Output - # A tibble: 18 x 4 - # Groups: year, state [1] + # A tibble: 18 x 4 (conmat_population) + - age: lower.age.limit + - population: population year state lower.age.limit population 1 2020 NSW 0 495091 @@ -30,8 +31,9 @@ Code abs_age_state("Imaginary World") - Error - The state name provided does not match states in Australia + Condition + Error in `check_state_name()`: + ! The state name provided does not match states in Australia x The state name 'Imaginary World' did not match i See `abs_lga_lookup` for a list of all states diff --git a/tests/testthat/_snaps/check_dimensions.md b/tests/testthat/_snaps/check_dimensions.md new file mode 100644 index 00000000..1af7fc64 --- /dev/null +++ b/tests/testthat/_snaps/check_dimensions.md @@ -0,0 +1,14 @@ +# check_dimensions() returns error + + Non-conformable arrays present. + i The number of columns in `ngm` must match the number of rows in `data`. + x Number of columns in `ngm` for the settings: matrix_a and matrix_b are 2 and 2 respectively. + x Number of rows in `data` is 6. + +# apply_vaccination gives error when incompatible dimensions present + + Non-conformable arrays present. + i The number of columns in `ngm` must match the number of rows in `data`. + x Number of columns in `ngm` for the settings: home, school, work, other, and all are 4, 4, 4, 4, and 4 respectively. + x Number of rows in `data` is 17. + diff --git a/tests/testthat/_snaps/conmat-population.md b/tests/testthat/_snaps/conmat-population.md new file mode 100644 index 00000000..6d7271a7 --- /dev/null +++ b/tests/testthat/_snaps/conmat-population.md @@ -0,0 +1,58 @@ +# conmat_population works + + Code + conmat_population(data = fairfield, age = lower.age.limit, population = population) + Output + # A tibble: 18 x 4 (conmat_population) + - age: lower.age.limit + - population: population + lga lower.age.limit year population + + 1 Fairfield (C) 0 2020 12261 + 2 Fairfield (C) 5 2020 13093 + 3 Fairfield (C) 10 2020 13602 + 4 Fairfield (C) 15 2020 14323 + 5 Fairfield (C) 20 2020 15932 + 6 Fairfield (C) 25 2020 16190 + 7 Fairfield (C) 30 2020 14134 + 8 Fairfield (C) 35 2020 13034 + 9 Fairfield (C) 40 2020 12217 + 10 Fairfield (C) 45 2020 13449 + 11 Fairfield (C) 50 2020 13419 + 12 Fairfield (C) 55 2020 13652 + 13 Fairfield (C) 60 2020 12907 + 14 Fairfield (C) 65 2020 10541 + 15 Fairfield (C) 70 2020 8227 + 16 Fairfield (C) 75 2020 5598 + 17 Fairfield (C) 80 2020 4006 + 18 Fairfield (C) 85 2020 4240 + +# as_conmat_population works + + Code + as_conmat_population(data = fairfield, age = lower.age.limit, population = population) + Output + # A tibble: 18 x 4 (conmat_population) + - age: lower.age.limit + - population: population + lga lower.age.limit year population + + 1 Fairfield (C) 0 2020 12261 + 2 Fairfield (C) 5 2020 13093 + 3 Fairfield (C) 10 2020 13602 + 4 Fairfield (C) 15 2020 14323 + 5 Fairfield (C) 20 2020 15932 + 6 Fairfield (C) 25 2020 16190 + 7 Fairfield (C) 30 2020 14134 + 8 Fairfield (C) 35 2020 13034 + 9 Fairfield (C) 40 2020 12217 + 10 Fairfield (C) 45 2020 13449 + 11 Fairfield (C) 50 2020 13419 + 12 Fairfield (C) 55 2020 13652 + 13 Fairfield (C) 60 2020 12907 + 14 Fairfield (C) 65 2020 10541 + 15 Fairfield (C) 70 2020 8227 + 16 Fairfield (C) 75 2020 5598 + 17 Fairfield (C) 80 2020 4006 + 18 Fairfield (C) 85 2020 4240 + diff --git a/tests/testthat/_snaps/estimate-setting-contacts.md b/tests/testthat/_snaps/estimate-setting-contacts.md new file mode 100644 index 00000000..9ba22bc7 --- /dev/null +++ b/tests/testthat/_snaps/estimate-setting-contacts.md @@ -0,0 +1,59 @@ +# estimate_setting_contacts works + + Code + estimate_setting_contacts(contact_data_list = contact_data_cut, + survey_population = get_polymod_population(), prediction_population = get_polymod_population(), + age_breaks = c(seq(0, 10, by = 5), Inf), per_capita_household_size = NULL) + Message + + -- Setting Prediction Matrices ------------------------------------------------- + Output + + Message + A list of matrices containing the model predicted contact rate between ages in + each setting. + Output + + Message + There are 3 age breaks, ranging 0-10+ years, with a regular 5 year interval + Output + + Message + * home: a 3x3 + * work: a 3x3 + * school: a 3x3 + * other: a 3x3 + * all: a 3x3 + i Access each with `x$name` + i e.g., `x$home` + +# estimate_setting_contacts works with different demographic data + + Code + estimate_setting_contacts(contact_data_list = contact_data_cut, + survey_population = get_polymod_population(), prediction_population = get_polymod_population(), + age_breaks = c(seq(0, 10, by = 5), Inf), school_demographics = conmat_original_school_demographics, + work_demographics = conmat_original_work_demographics) + Message + + -- Setting Prediction Matrices ------------------------------------------------- + Output + + Message + A list of matrices containing the model predicted contact rate between ages in + each setting. + Output + + Message + There are 3 age breaks, ranging 0-10+ years, with a regular 5 year interval + Output + + Message + * home: a 3x3 + * work: a 3x3 + * school: a 3x3 + * other: a 3x3 + * all: a 3x3 + i Access each with `x$name` + i e.g., `x$home` + diff --git a/tests/testthat/_snaps/fit-single-contact-model.md b/tests/testthat/_snaps/fit-single-contact-model.md index 175ca74e..0ad3cee4 100644 --- a/tests/testthat/_snaps/fit-single-contact-model.md +++ b/tests/testthat/_snaps/fit-single-contact-model.md @@ -2,6 +2,31 @@ Code names(m_all$coefficients) + Output + [1] "(Intercept)" "school_probability" "work_probability" + [4] "s(gam_age_offdiag).1" "s(gam_age_offdiag).2" "s(gam_age_offdiag).3" + [7] "s(gam_age_offdiag).4" "s(gam_age_offdiag).5" "s(gam_age_offdiag).6" + [10] "s(gam_age_offdiag).7" "s(gam_age_offdiag).8" "s(gam_age_offdiag).9" + [13] "s(gam_age_offdiag_2).1" "s(gam_age_offdiag_2).2" "s(gam_age_offdiag_2).3" + [16] "s(gam_age_offdiag_2).4" "s(gam_age_offdiag_2).5" "s(gam_age_offdiag_2).6" + [19] "s(gam_age_offdiag_2).7" "s(gam_age_offdiag_2).8" "s(gam_age_offdiag_2).9" + [22] "s(gam_age_diag_prod).1" "s(gam_age_diag_prod).2" "s(gam_age_diag_prod).3" + [25] "s(gam_age_diag_prod).4" "s(gam_age_diag_prod).5" "s(gam_age_diag_prod).6" + [28] "s(gam_age_diag_prod).7" "s(gam_age_diag_prod).8" "s(gam_age_diag_prod).9" + [31] "s(gam_age_diag_sum).1" "s(gam_age_diag_sum).2" "s(gam_age_diag_sum).3" + [34] "s(gam_age_diag_sum).4" "s(gam_age_diag_sum).5" "s(gam_age_diag_sum).6" + [37] "s(gam_age_diag_sum).7" "s(gam_age_diag_sum).8" "s(gam_age_diag_sum).9" + [40] "s(gam_age_pmax).1" "s(gam_age_pmax).2" "s(gam_age_pmax).3" + [43] "s(gam_age_pmax).4" "s(gam_age_pmax).5" "s(gam_age_pmax).6" + [46] "s(gam_age_pmax).7" "s(gam_age_pmax).8" "s(gam_age_pmax).9" + [49] "s(gam_age_pmin).1" "s(gam_age_pmin).2" "s(gam_age_pmin).3" + [52] "s(gam_age_pmin).4" "s(gam_age_pmin).5" "s(gam_age_pmin).6" + [55] "s(gam_age_pmin).7" "s(gam_age_pmin).8" "s(gam_age_pmin).9" + +--- + + Code + names(m_all_not_sym$coefficients) Output [1] "(Intercept)" "school_probability" [3] "work_probability" "s(age_to).1" diff --git a/tests/testthat/_snaps/generate-ngm.md b/tests/testthat/_snaps/generate-ngm.md new file mode 100644 index 00000000..2b57596d --- /dev/null +++ b/tests/testthat/_snaps/generate-ngm.md @@ -0,0 +1,92 @@ +# NGMs from each generate_ngm type return the same object + + Code + perth_ngm_lga + Message + + -- NGM Setting Matrices -------------------------------------------------------- + Output + + Message + A list of matrices, each containing the number of newly infected + individuals for a specified age group. + Output + + Message + There are 16 age breaks, ranging 0-75+ years, with a regular 5 year interval + Output + + Message + * home: a 16x16 + * school: a 16x16 + * work: a 16x16 + * other: a 16x16 + * all: a 16x16 + i Access each with `x$name` + i e.g., `x$home` + +--- + + Code + perth_ngm + Message + + -- NGM Setting Matrices -------------------------------------------------------- + Output + + Message + A list of matrices, each containing the number of newly infected + individuals for a specified age group. + Output + + Message + There are 16 age breaks, ranging 0-75+ years, with a regular 5 year interval + Output + + Message + * home: a 16x16 + * school: a 16x16 + * work: a 16x16 + * other: a 16x16 + * all: a 16x16 + i Access each with `x$name` + i e.g., `x$home` + +--- + + Code + perth_ngm_oz + Message + + -- NGM Setting Matrices -------------------------------------------------------- + Output + + Message + A list of matrices, each containing the number of newly infected + individuals for a specified age group. + Output + + Message + There are 16 age breaks, ranging 0-75+ years, with a regular 5 year interval + Output + + Message + * home: a 16x16 + * school: a 16x16 + * work: a 16x16 + * other: a 16x16 + * all: a 16x16 + i Access each with `x$name` + i e.g., `x$home` + +# generate_ngm fails when given wrong age breaks + + Code + generate_ngm(perth_contact, age_breaks = age_breaks_0_85, R_target = 1.5) + Condition + Error in `check_age_breaks()`: + ! Age breaks must be the same, but they are different: + `x[14:17]`: 65 70 75 Inf + `age_breaks[14:19]`: 65 70 75 80 85 Inf + i You can check the age breaks using `age_breaks()` + diff --git a/tests/testthat/_snaps/get-abs-household-size-distribution.md b/tests/testthat/_snaps/get-abs-household-size-distribution.md new file mode 100644 index 00000000..e66d4ec0 --- /dev/null +++ b/tests/testthat/_snaps/get-abs-household-size-distribution.md @@ -0,0 +1,39 @@ +# get_abs_household_size_distribution works + + Code + get_abs_household_size_distribution(lga = "Fairfield (C)") + Output + # A tibble: 8 x 5 + # Groups: lga [1] + year state lga household_size n_people + + 1 2016 NSW Fairfield (C) 1 9002 + 2 2016 NSW Fairfield (C) 2 26776 + 3 2016 NSW Fairfield (C) 3 31599 + 4 2016 NSW Fairfield (C) 4 44676 + 5 2016 NSW Fairfield (C) 5 33890 + 6 2016 NSW Fairfield (C) 6 21216 + 7 2016 NSW Fairfield (C) 7 9800 + 8 2016 NSW Fairfield (C) 8 10976 + +--- + + Code + get_abs_household_size_distribution(state = "NSW") + Output + # A tibble: 1,048 x 5 + # Groups: state [1] + year state lga household_size n_people + + 1 2016 NSW Albury (C) 1 6020 + 2 2016 NSW Albury (C) 2 13476 + 3 2016 NSW Albury (C) 3 8220 + 4 2016 NSW Albury (C) 4 10164 + 5 2016 NSW Albury (C) 5 5205 + 6 2016 NSW Albury (C) 6 1866 + 7 2016 NSW Albury (C) 7 392 + 8 2016 NSW Albury (C) 8 336 + 9 2016 NSW Armidale Regional (A) 1 2983 + 10 2016 NSW Armidale Regional (A) 2 7326 + # i 1,038 more rows + diff --git a/tests/testthat/_snaps/get-polymod-population.md b/tests/testthat/_snaps/get-polymod-population.md index 223f05b9..ddfd6930 100644 --- a/tests/testthat/_snaps/get-polymod-population.md +++ b/tests/testthat/_snaps/get-polymod-population.md @@ -16,34 +16,62 @@ 8 all 0 7 12 92 9 all 0 8 7 92 10 all 0 9 8 92 - # ... with 8,777 more rows + # i 8,777 more rows # get_polymod_population() works Code get_polymod_population() Output - # A tibble: 21 x 2 + # A tibble: 21 x 2 (conmat_population) + - age: lower.age.limit + - population: population lower.age.limit population - 1 0 1841420. - 2 5 1950666. - 3 10 2122856. - 4 15 2323822. - 5 20 2406141. - 6 25 2377541. - 7 30 2552587. - 8 35 2982293. - 9 40 3044427. - 10 45 2828202. - # ... with 11 more rows + 1 0 1898966. + 2 5 2017632. + 3 10 2192410. + 4 15 2369985. + 5 20 2467873. + 6 25 2484327. + 7 30 2649826. + 8 35 3043704. + 9 40 3117812. + 10 45 2879510. + # i 11 more rows -# get_polymod_setting_data() works +# get_polymod_setting_data() and derivatives work Code - get_polymod_setting_data() + polymod_setting_data + Message + + -- Setting Data ---------------------------------------------------------------- + Output + + Message + A list of s containing the number of contacts between ages in each + setting. + Output + + Message + There are 86 age breaks, ranging 0-90 years, with an irregular year interval, + (on average, 1.05 years) + Output + + Message + * home: a 8,787x5 + * work: a 8,787x5 + * school: a 8,787x5 + * other: a 8,787x5 + i Access each with `x$name` + i e.g., `x$home` + +--- + + Code + polymod_setting_data$home Output - $home # A tibble: 8,787 x 5 setting age_from age_to contacts participants @@ -57,9 +85,13 @@ 8 home 0 7 9 92 9 home 0 8 6 92 10 home 0 9 6 92 - # ... with 8,777 more rows - - $work + # i 8,777 more rows + +--- + + Code + polymod_setting_data$work + Output # A tibble: 8,787 x 5 setting age_from age_to contacts participants @@ -73,9 +105,13 @@ 8 work 0 7 0 92 9 work 0 8 0 92 10 work 0 9 0 92 - # ... with 8,777 more rows - - $school + # i 8,777 more rows + +--- + + Code + polymod_setting_data$school + Output # A tibble: 8,787 x 5 setting age_from age_to contacts participants @@ -89,9 +125,13 @@ 8 school 0 7 0 92 9 school 0 8 0 92 10 school 0 9 0 92 - # ... with 8,777 more rows - - $other + # i 8,777 more rows + +--- + + Code + polymod_setting_data$other + Output # A tibble: 8,787 x 5 setting age_from age_to contacts participants @@ -105,6 +145,5 @@ 8 other 0 7 5 92 9 other 0 8 2 92 10 other 0 9 3 92 - # ... with 8,777 more rows - + # i 8,777 more rows diff --git a/tests/testthat/_snaps/lga_household_works.md b/tests/testthat/_snaps/lga_household_works.md index 1246d17e..dbeff6d9 100644 --- a/tests/testthat/_snaps/lga_household_works.md +++ b/tests/testthat/_snaps/lga_household_works.md @@ -1,12 +1,16 @@ -# get_per_capita_household_size errors for some lgas +# get_abs_per_capita_household_size errors for some lgas - The LGA name provided does not match LGAs in Australia - x The lga name 'Migratory - Offshore - Shipping (NSW)' did not match (it probably needs 'Migratory - Offshore - Shipping (NSW) (C)' or similar + i In index: 130. + Caused by error in `check_lga_name()`: + ! The LGA name provided does not match LGAs in Australia + x The lga name 'Migratory - Offshore - Shipping (NSW)' did not match (it probably needs 'Migratory - Offshore - Shipping (NSW) (C)' or similar) i See `abs_lga_lookup` for a list of all LGAs # check_lga_name errors for some lgas - The LGA name provided does not match LGAs in Australia - x The lga name 'Migratory - Offshore - Shipping (NSW)' did not match (it probably needs 'Migratory - Offshore - Shipping (NSW) (C)' or similar + i In index: 130. + Caused by error in `check_lga_name()`: + ! The LGA name provided does not match LGAs in Australia + x The lga name 'Migratory - Offshore - Shipping (NSW)' did not match (it probably needs 'Migratory - Offshore - Shipping (NSW) (C)' or similar) i See `abs_lga_lookup` for a list of all LGAs diff --git a/tests/testthat/_snaps/matrix-to-predictions.md b/tests/testthat/_snaps/matrix-to-predictions.md new file mode 100644 index 00000000..cc200c85 --- /dev/null +++ b/tests/testthat/_snaps/matrix-to-predictions.md @@ -0,0 +1,36 @@ +# matrix_to_predictions works + + Code + matrix_to_predictions(fairfield_school_mat) + Output + # A tibble: 16 x 3 + age_group_to age_group_from contacts + + 1 [0,5) [0,5) 0.918 + 2 [0,5) [5,10) 0.361 + 3 [0,5) [10,15) 0.0608 + 4 [0,5) [15,Inf) 0.0470 + 5 [5,10) [0,5) 0.380 + 6 [5,10) [5,10) 4.72 + 7 [5,10) [10,15) 0.422 + 8 [5,10) [15,Inf) 0.0955 + 9 [10,15) [0,5) 0.0677 + 10 [10,15) [5,10) 0.441 + 11 [10,15) [10,15) 7.20 + 12 [10,15) [15,Inf) 0.147 + 13 [15,Inf) [0,5) 0.660 + 14 [15,Inf) [5,10) 1.27 + 15 [15,Inf) [10,15) 1.85 + 16 [15,Inf) [15,Inf) 1.36 + +# predictions_to_matrix works + + Code + predictions_to_matrix(fairfield_school_contacts) + Output + [0,5) [5,10) [10,15) [15,Inf) + [0,5) 0.91761752 0.3613661 0.06083403 0.04696433 + [5,10) 0.38026461 4.7206582 0.42158012 0.09549837 + [10,15) 0.06772895 0.4409607 7.20182626 0.14732824 + [15,Inf) 0.65963688 1.2684675 1.85420868 1.35593915 + diff --git a/tests/testthat/_snaps/models-fit-with-furrr.md b/tests/testthat/_snaps/models-fit-with-furrr.md index 36814092..d1794700 100644 --- a/tests/testthat/_snaps/models-fit-with-furrr.md +++ b/tests/testthat/_snaps/models-fit-with-furrr.md @@ -1,3 +1,56 @@ +# predict_setting_contact model prints appropriately + + Code + contact_model_pred + Message + + -- Setting Prediction Matrices ------------------------------------------------- + Output + + Message + A list of matrices containing the model predicted contact rate between ages in + each setting. + Output + + Message + There are 5 age breaks, ranging 0-20+ years, with a regular 5 year interval + Output + + Message + * home: a 5x5 + * work: a 5x5 + * school: a 5x5 + * other: a 5x5 + * all: a 5x5 + i Access each with `x$name` + i e.g., `x$home` + +# fit_setting_contact model prints appropriately + + Code + contact_model + Message + + -- Fitted Setting Contact Models ----------------------------------------------- + Output + + Message + A list of fitted models for each setting. Each model predicts the + contact rate between ages, for that setting. + Output + + Message + There are 20 age breaks, ranging 0-20 years, with a regular 1 year interval + Output + + Message + * home: a model (441 obs) + * work: a model (441 obs) + * school: a model (441 obs) + * other: a model (441 obs) + i Access each with `x$name` + i e.g., `x$home` + # list names are kept Code @@ -17,140 +70,100 @@ Code names(contact_model[[1]]$coefficients) Output - [1] "(Intercept)" "school_probability" - [3] "work_probability" "s(age_to).1" - [5] "s(age_to).2" "s(age_to).3" - [7] "s(age_to).4" "s(age_to).5" - [9] "s(age_to).6" "s(age_to).7" - [11] "s(age_to).8" "s(age_to).9" - [13] "s(age_from).1" "s(age_from).2" - [15] "s(age_from).3" "s(age_from).4" - [17] "s(age_from).5" "s(age_from).6" - [19] "s(age_from).7" "s(age_from).8" - [21] "s(age_from).9" "s(intergenerational).1" - [23] "s(intergenerational).2" "s(intergenerational).3" - [25] "s(intergenerational).4" "s(intergenerational).5" - [27] "s(intergenerational).6" "s(intergenerational).7" - [29] "s(intergenerational).8" "s(intergenerational).9" - [31] "s(intergenerational,age_from).1" "s(intergenerational,age_from).2" - [33] "s(intergenerational,age_from).3" "s(intergenerational,age_from).4" - [35] "s(intergenerational,age_from).5" "s(intergenerational,age_from).6" - [37] "s(intergenerational,age_from).7" "s(intergenerational,age_from).8" - [39] "s(intergenerational,age_from).9" "s(intergenerational,age_from).10" - [41] "s(intergenerational,age_from).11" "s(intergenerational,age_from).12" - [43] "s(intergenerational,age_from).13" "s(intergenerational,age_from).14" - [45] "s(intergenerational,age_from).15" "s(intergenerational,age_from).16" - [47] "s(intergenerational,age_from).17" "s(intergenerational,age_from).18" - [49] "s(intergenerational,age_from).19" "s(intergenerational,age_from).20" - [51] "s(intergenerational,age_from).21" "s(intergenerational,age_from).22" - [53] "s(intergenerational,age_from).23" "s(intergenerational,age_from).24" - [55] "s(intergenerational,age_from).25" "s(intergenerational,age_from).26" - [57] "s(intergenerational,age_from).27" + [1] "(Intercept)" "school_probability" "work_probability" + [4] "s(gam_age_offdiag).1" "s(gam_age_offdiag).2" "s(gam_age_offdiag).3" + [7] "s(gam_age_offdiag).4" "s(gam_age_offdiag).5" "s(gam_age_offdiag).6" + [10] "s(gam_age_offdiag).7" "s(gam_age_offdiag).8" "s(gam_age_offdiag).9" + [13] "s(gam_age_offdiag_2).1" "s(gam_age_offdiag_2).2" "s(gam_age_offdiag_2).3" + [16] "s(gam_age_offdiag_2).4" "s(gam_age_offdiag_2).5" "s(gam_age_offdiag_2).6" + [19] "s(gam_age_offdiag_2).7" "s(gam_age_offdiag_2).8" "s(gam_age_offdiag_2).9" + [22] "s(gam_age_diag_prod).1" "s(gam_age_diag_prod).2" "s(gam_age_diag_prod).3" + [25] "s(gam_age_diag_prod).4" "s(gam_age_diag_prod).5" "s(gam_age_diag_prod).6" + [28] "s(gam_age_diag_prod).7" "s(gam_age_diag_prod).8" "s(gam_age_diag_prod).9" + [31] "s(gam_age_diag_sum).1" "s(gam_age_diag_sum).2" "s(gam_age_diag_sum).3" + [34] "s(gam_age_diag_sum).4" "s(gam_age_diag_sum).5" "s(gam_age_diag_sum).6" + [37] "s(gam_age_diag_sum).7" "s(gam_age_diag_sum).8" "s(gam_age_diag_sum).9" + [40] "s(gam_age_pmax).1" "s(gam_age_pmax).2" "s(gam_age_pmax).3" + [43] "s(gam_age_pmax).4" "s(gam_age_pmax).5" "s(gam_age_pmax).6" + [46] "s(gam_age_pmax).7" "s(gam_age_pmax).8" "s(gam_age_pmax).9" + [49] "s(gam_age_pmin).1" "s(gam_age_pmin).2" "s(gam_age_pmin).3" + [52] "s(gam_age_pmin).4" "s(gam_age_pmin).5" "s(gam_age_pmin).6" + [55] "s(gam_age_pmin).7" "s(gam_age_pmin).8" "s(gam_age_pmin).9" --- Code names(contact_model[[2]]$coefficients) Output - [1] "(Intercept)" "school_probability" - [3] "work_probability" "s(age_to).1" - [5] "s(age_to).2" "s(age_to).3" - [7] "s(age_to).4" "s(age_to).5" - [9] "s(age_to).6" "s(age_to).7" - [11] "s(age_to).8" "s(age_to).9" - [13] "s(age_from).1" "s(age_from).2" - [15] "s(age_from).3" "s(age_from).4" - [17] "s(age_from).5" "s(age_from).6" - [19] "s(age_from).7" "s(age_from).8" - [21] "s(age_from).9" "s(intergenerational).1" - [23] "s(intergenerational).2" "s(intergenerational).3" - [25] "s(intergenerational).4" "s(intergenerational).5" - [27] "s(intergenerational).6" "s(intergenerational).7" - [29] "s(intergenerational).8" "s(intergenerational).9" - [31] "s(intergenerational,age_from).1" "s(intergenerational,age_from).2" - [33] "s(intergenerational,age_from).3" "s(intergenerational,age_from).4" - [35] "s(intergenerational,age_from).5" "s(intergenerational,age_from).6" - [37] "s(intergenerational,age_from).7" "s(intergenerational,age_from).8" - [39] "s(intergenerational,age_from).9" "s(intergenerational,age_from).10" - [41] "s(intergenerational,age_from).11" "s(intergenerational,age_from).12" - [43] "s(intergenerational,age_from).13" "s(intergenerational,age_from).14" - [45] "s(intergenerational,age_from).15" "s(intergenerational,age_from).16" - [47] "s(intergenerational,age_from).17" "s(intergenerational,age_from).18" - [49] "s(intergenerational,age_from).19" "s(intergenerational,age_from).20" - [51] "s(intergenerational,age_from).21" "s(intergenerational,age_from).22" - [53] "s(intergenerational,age_from).23" "s(intergenerational,age_from).24" - [55] "s(intergenerational,age_from).25" "s(intergenerational,age_from).26" - [57] "s(intergenerational,age_from).27" + [1] "(Intercept)" "school_probability" "work_probability" + [4] "s(gam_age_offdiag).1" "s(gam_age_offdiag).2" "s(gam_age_offdiag).3" + [7] "s(gam_age_offdiag).4" "s(gam_age_offdiag).5" "s(gam_age_offdiag).6" + [10] "s(gam_age_offdiag).7" "s(gam_age_offdiag).8" "s(gam_age_offdiag).9" + [13] "s(gam_age_offdiag_2).1" "s(gam_age_offdiag_2).2" "s(gam_age_offdiag_2).3" + [16] "s(gam_age_offdiag_2).4" "s(gam_age_offdiag_2).5" "s(gam_age_offdiag_2).6" + [19] "s(gam_age_offdiag_2).7" "s(gam_age_offdiag_2).8" "s(gam_age_offdiag_2).9" + [22] "s(gam_age_diag_prod).1" "s(gam_age_diag_prod).2" "s(gam_age_diag_prod).3" + [25] "s(gam_age_diag_prod).4" "s(gam_age_diag_prod).5" "s(gam_age_diag_prod).6" + [28] "s(gam_age_diag_prod).7" "s(gam_age_diag_prod).8" "s(gam_age_diag_prod).9" + [31] "s(gam_age_diag_sum).1" "s(gam_age_diag_sum).2" "s(gam_age_diag_sum).3" + [34] "s(gam_age_diag_sum).4" "s(gam_age_diag_sum).5" "s(gam_age_diag_sum).6" + [37] "s(gam_age_diag_sum).7" "s(gam_age_diag_sum).8" "s(gam_age_diag_sum).9" + [40] "s(gam_age_pmax).1" "s(gam_age_pmax).2" "s(gam_age_pmax).3" + [43] "s(gam_age_pmax).4" "s(gam_age_pmax).5" "s(gam_age_pmax).6" + [46] "s(gam_age_pmax).7" "s(gam_age_pmax).8" "s(gam_age_pmax).9" + [49] "s(gam_age_pmin).1" "s(gam_age_pmin).2" "s(gam_age_pmin).3" + [52] "s(gam_age_pmin).4" "s(gam_age_pmin).5" "s(gam_age_pmin).6" + [55] "s(gam_age_pmin).7" "s(gam_age_pmin).8" "s(gam_age_pmin).9" --- Code names(contact_model[[3]]$coefficients) Output - [1] "(Intercept)" "school_probability" - [3] "work_probability" "s(age_to).1" - [5] "s(age_to).2" "s(age_to).3" - [7] "s(age_to).4" "s(age_to).5" - [9] "s(age_to).6" "s(age_to).7" - [11] "s(age_to).8" "s(age_to).9" - [13] "s(age_from).1" "s(age_from).2" - [15] "s(age_from).3" "s(age_from).4" - [17] "s(age_from).5" "s(age_from).6" - [19] "s(age_from).7" "s(age_from).8" - [21] "s(age_from).9" "s(intergenerational).1" - [23] "s(intergenerational).2" "s(intergenerational).3" - [25] "s(intergenerational).4" "s(intergenerational).5" - [27] "s(intergenerational).6" "s(intergenerational).7" - [29] "s(intergenerational).8" "s(intergenerational).9" - [31] "s(intergenerational,age_from).1" "s(intergenerational,age_from).2" - [33] "s(intergenerational,age_from).3" "s(intergenerational,age_from).4" - [35] "s(intergenerational,age_from).5" "s(intergenerational,age_from).6" - [37] "s(intergenerational,age_from).7" "s(intergenerational,age_from).8" - [39] "s(intergenerational,age_from).9" "s(intergenerational,age_from).10" - [41] "s(intergenerational,age_from).11" "s(intergenerational,age_from).12" - [43] "s(intergenerational,age_from).13" "s(intergenerational,age_from).14" - [45] "s(intergenerational,age_from).15" "s(intergenerational,age_from).16" - [47] "s(intergenerational,age_from).17" "s(intergenerational,age_from).18" - [49] "s(intergenerational,age_from).19" "s(intergenerational,age_from).20" - [51] "s(intergenerational,age_from).21" "s(intergenerational,age_from).22" - [53] "s(intergenerational,age_from).23" "s(intergenerational,age_from).24" - [55] "s(intergenerational,age_from).25" "s(intergenerational,age_from).26" - [57] "s(intergenerational,age_from).27" + [1] "(Intercept)" "school_probability" "work_probability" + [4] "s(gam_age_offdiag).1" "s(gam_age_offdiag).2" "s(gam_age_offdiag).3" + [7] "s(gam_age_offdiag).4" "s(gam_age_offdiag).5" "s(gam_age_offdiag).6" + [10] "s(gam_age_offdiag).7" "s(gam_age_offdiag).8" "s(gam_age_offdiag).9" + [13] "s(gam_age_offdiag_2).1" "s(gam_age_offdiag_2).2" "s(gam_age_offdiag_2).3" + [16] "s(gam_age_offdiag_2).4" "s(gam_age_offdiag_2).5" "s(gam_age_offdiag_2).6" + [19] "s(gam_age_offdiag_2).7" "s(gam_age_offdiag_2).8" "s(gam_age_offdiag_2).9" + [22] "s(gam_age_diag_prod).1" "s(gam_age_diag_prod).2" "s(gam_age_diag_prod).3" + [25] "s(gam_age_diag_prod).4" "s(gam_age_diag_prod).5" "s(gam_age_diag_prod).6" + [28] "s(gam_age_diag_prod).7" "s(gam_age_diag_prod).8" "s(gam_age_diag_prod).9" + [31] "s(gam_age_diag_sum).1" "s(gam_age_diag_sum).2" "s(gam_age_diag_sum).3" + [34] "s(gam_age_diag_sum).4" "s(gam_age_diag_sum).5" "s(gam_age_diag_sum).6" + [37] "s(gam_age_diag_sum).7" "s(gam_age_diag_sum).8" "s(gam_age_diag_sum).9" + [40] "s(gam_age_pmax).1" "s(gam_age_pmax).2" "s(gam_age_pmax).3" + [43] "s(gam_age_pmax).4" "s(gam_age_pmax).5" "s(gam_age_pmax).6" + [46] "s(gam_age_pmax).7" "s(gam_age_pmax).8" "s(gam_age_pmax).9" + [49] "s(gam_age_pmin).1" "s(gam_age_pmin).2" "s(gam_age_pmin).3" + [52] "s(gam_age_pmin).4" "s(gam_age_pmin).5" "s(gam_age_pmin).6" + [55] "s(gam_age_pmin).7" "s(gam_age_pmin).8" "s(gam_age_pmin).9" --- Code names(contact_model[[4]]$coefficients) Output - [1] "(Intercept)" "school_probability" - [3] "work_probability" "s(age_to).1" - [5] "s(age_to).2" "s(age_to).3" - [7] "s(age_to).4" "s(age_to).5" - [9] "s(age_to).6" "s(age_to).7" - [11] "s(age_to).8" "s(age_to).9" - [13] "s(age_from).1" "s(age_from).2" - [15] "s(age_from).3" "s(age_from).4" - [17] "s(age_from).5" "s(age_from).6" - [19] "s(age_from).7" "s(age_from).8" - [21] "s(age_from).9" "s(intergenerational).1" - [23] "s(intergenerational).2" "s(intergenerational).3" - [25] "s(intergenerational).4" "s(intergenerational).5" - [27] "s(intergenerational).6" "s(intergenerational).7" - [29] "s(intergenerational).8" "s(intergenerational).9" - [31] "s(intergenerational,age_from).1" "s(intergenerational,age_from).2" - [33] "s(intergenerational,age_from).3" "s(intergenerational,age_from).4" - [35] "s(intergenerational,age_from).5" "s(intergenerational,age_from).6" - [37] "s(intergenerational,age_from).7" "s(intergenerational,age_from).8" - [39] "s(intergenerational,age_from).9" "s(intergenerational,age_from).10" - [41] "s(intergenerational,age_from).11" "s(intergenerational,age_from).12" - [43] "s(intergenerational,age_from).13" "s(intergenerational,age_from).14" - [45] "s(intergenerational,age_from).15" "s(intergenerational,age_from).16" - [47] "s(intergenerational,age_from).17" "s(intergenerational,age_from).18" - [49] "s(intergenerational,age_from).19" "s(intergenerational,age_from).20" - [51] "s(intergenerational,age_from).21" "s(intergenerational,age_from).22" - [53] "s(intergenerational,age_from).23" "s(intergenerational,age_from).24" - [55] "s(intergenerational,age_from).25" "s(intergenerational,age_from).26" - [57] "s(intergenerational,age_from).27" + [1] "(Intercept)" "school_probability" "work_probability" + [4] "s(gam_age_offdiag).1" "s(gam_age_offdiag).2" "s(gam_age_offdiag).3" + [7] "s(gam_age_offdiag).4" "s(gam_age_offdiag).5" "s(gam_age_offdiag).6" + [10] "s(gam_age_offdiag).7" "s(gam_age_offdiag).8" "s(gam_age_offdiag).9" + [13] "s(gam_age_offdiag_2).1" "s(gam_age_offdiag_2).2" "s(gam_age_offdiag_2).3" + [16] "s(gam_age_offdiag_2).4" "s(gam_age_offdiag_2).5" "s(gam_age_offdiag_2).6" + [19] "s(gam_age_offdiag_2).7" "s(gam_age_offdiag_2).8" "s(gam_age_offdiag_2).9" + [22] "s(gam_age_diag_prod).1" "s(gam_age_diag_prod).2" "s(gam_age_diag_prod).3" + [25] "s(gam_age_diag_prod).4" "s(gam_age_diag_prod).5" "s(gam_age_diag_prod).6" + [28] "s(gam_age_diag_prod).7" "s(gam_age_diag_prod).8" "s(gam_age_diag_prod).9" + [31] "s(gam_age_diag_sum).1" "s(gam_age_diag_sum).2" "s(gam_age_diag_sum).3" + [34] "s(gam_age_diag_sum).4" "s(gam_age_diag_sum).5" "s(gam_age_diag_sum).6" + [37] "s(gam_age_diag_sum).7" "s(gam_age_diag_sum).8" "s(gam_age_diag_sum).9" + [40] "s(gam_age_pmax).1" "s(gam_age_pmax).2" "s(gam_age_pmax).3" + [43] "s(gam_age_pmax).4" "s(gam_age_pmax).5" "s(gam_age_pmax).6" + [46] "s(gam_age_pmax).7" "s(gam_age_pmax).8" "s(gam_age_pmax).9" + [49] "s(gam_age_pmin).1" "s(gam_age_pmin).2" "s(gam_age_pmin).3" + [52] "s(gam_age_pmin).4" "s(gam_age_pmin).5" "s(gam_age_pmin).6" + [55] "s(gam_age_pmin).7" "s(gam_age_pmin).8" "s(gam_age_pmin).9" # Matrix dims are kept diff --git a/tests/testthat/_snaps/print-conmat-matrix-method.md b/tests/testthat/_snaps/print-conmat-matrix-method.md new file mode 100644 index 00000000..c3dcefb4 --- /dev/null +++ b/tests/testthat/_snaps/print-conmat-matrix-method.md @@ -0,0 +1,84 @@ +# Print method for setting prediction matrices works + + Code + synthetic_settings_5y_perth + Message + + -- Setting Prediction Matrices ------------------------------------------------- + Output + + Message + A list of matrices containing the model predicted contact rate between ages in + each setting. + Output + + Message + There are 16 age breaks, ranging 0-75+ years, with a regular 5 year interval + Output + + Message + * home: a 16x16 + * work: a 16x16 + * school: a 16x16 + * other: a 16x16 + * all: a 16x16 + i Access each with `x$name` + i e.g., `x$home` + +--- + + Code + synthetic_settings_5y_perth$home + Output + [0,5) [5,10) [10,15) [15,20) [20,25) [25,30) + [0,5) 0.40076181 0.33477812 0.16555012 0.11608014 0.17617098 0.32079759 + [5,10) 0.19885124 0.32216319 0.20765643 0.07226656 0.05193349 0.09275907 + [10,15) 0.07470057 0.15774971 0.26654020 0.12883717 0.04419741 0.03535209 + [15,20) 0.08170674 0.08563793 0.20097718 0.34425132 0.17025978 0.06763197 + [20,25) 0.33292330 0.16522893 0.18510249 0.45711126 0.73008179 0.41491477 + [25,30) 1.11525912 0.54291253 0.27237380 0.33403867 0.76329736 1.15720681 + [30,35) 1.45860486 1.15187002 0.52666015 0.27078533 0.33556433 0.71610024 + [35,40) 0.74668485 1.02027190 0.74962874 0.32720560 0.17284734 0.20851823 + [40,45) 0.24541744 0.41797497 0.55192548 0.38836042 0.16874222 0.08987479 + [45,50) 0.12179342 0.15376654 0.25912643 0.33988507 0.23163274 0.10557474 + [50,55) 0.11563694 0.09409502 0.11753342 0.20020602 0.24542407 0.17519639 + [55,60) 0.13503961 0.09876405 0.07843632 0.09845190 0.15621575 0.19574806 + [60,65) 0.12361018 0.11014329 0.07653905 0.05845473 0.07100810 0.11555725 + [65,70) 0.08050583 0.09083407 0.07498176 0.04709679 0.03585191 0.04657126 + [70,75) 0.04571279 0.05636194 0.05763523 0.04176239 0.02672337 0.02309248 + [75,Inf) 0.04433925 0.05437000 0.05768114 0.05328648 0.04628235 0.04056598 + [30,35) [35,40) [40,45) [45,50) [50,55) [55,60) + [0,5) 0.41986733 0.32009689 0.17360957 0.11569004 0.12037866 0.13963653 + [5,10) 0.19694689 0.25979529 0.17562638 0.08675713 0.05818228 0.06066078 + [10,15) 0.06840680 0.14500559 0.17617452 0.11106526 0.05520885 0.03659734 + [15,20) 0.05486552 0.09873357 0.19337627 0.22725016 0.14669997 0.07165755 + [20,25) 0.18254084 0.14002857 0.22558064 0.41579752 0.48281403 0.30526183 + [25,30) 0.71662662 0.31076553 0.22102981 0.34863971 0.63404890 0.70368779 + [30,35) 1.09947651 0.66335662 0.25976689 0.18402632 0.30705923 0.55689293 + [35,40) 0.44542786 0.69615807 0.38905818 0.14785069 0.11414398 0.19791347 + [40,45) 0.10570365 0.23577102 0.38790919 0.21832411 0.08906823 0.07049240 + [45,50) 0.05576763 0.06672604 0.16259149 0.29222377 0.17596322 0.07176800 + [50,55) 0.08490703 0.04700502 0.06052547 0.16056139 0.29887960 0.17643556 + [55,60) 0.15502733 0.08205057 0.04822505 0.06592728 0.17762380 0.32453421 + [60,65) 0.16323144 0.14348475 0.07851056 0.04681736 0.06587880 0.18132674 + [65,70) 0.08575999 0.13412044 0.11910768 0.06317103 0.03886021 0.05882613 + [70,75) 0.03362083 0.06630028 0.10307451 0.08846877 0.04884175 0.03304499 + [75,Inf) 0.03722015 0.04251769 0.06915906 0.11440610 0.14081425 0.12619944 + [60,65) [65,70) [70,75) [75,Inf) + [0,5) 0.12872383 0.09279565 0.06409376 0.03956186 + [5,10) 0.06812931 0.06218996 0.04693915 0.02881503 + [10,15) 0.03596518 0.03899872 0.03646368 0.02322291 + [15,20) 0.04284739 0.03821128 0.04121576 0.03346612 + [20,25) 0.13974053 0.07809486 0.07080747 0.07803935 + [25,30) 0.41835658 0.18662205 0.11256237 0.12583317 + [30,35) 0.59051931 0.34340814 0.16376160 0.11536980 + [35,40) 0.34855102 0.36062108 0.21684493 0.08849404 + [40,45) 0.11557517 0.19407598 0.20429657 0.08723069 + [45,50) 0.05132625 0.07665599 0.13058578 0.10746461 + [50,55) 0.06590183 0.04302814 0.06578340 0.12069301 + [55,60) 0.18261175 0.06557415 0.04480699 0.10889502 + [60,65) 0.33689152 0.19066624 0.07151884 0.07706387 + [65,70) 0.17225754 0.32955054 0.18796283 0.06150905 + [70,75) 0.05311866 0.15452338 0.28435950 0.09695813 + [75,Inf) 0.08994295 0.07946041 0.15236100 0.24375399 + diff --git a/tests/testthat/_snaps/setting-prediction-matrix.md b/tests/testthat/_snaps/setting-prediction-matrix.md new file mode 100644 index 00000000..136a93f3 --- /dev/null +++ b/tests/testthat/_snaps/setting-prediction-matrix.md @@ -0,0 +1,66 @@ +# setting_prediction_matrix works + + Code + setting_prediction_matrix(home = one_by_nine, work = one_by_nine, age_breaks = age_breaks_0_80_plus) + Message + + -- Setting Prediction Matrices ------------------------------------------------- + Output + + Message + A list of matrices containing the model predicted contact rate between ages in + each setting. + Output + + Message + There are 9 age breaks, ranging 0-80+ years, with a regular 10 year interval + Output + + Message + * home: a 9x9 + * work: a 9x9 + * all: a 9x9 + i Access each with `x$name` + i e.g., `x$home` + +# as_setting_prediction_matrix works + + Code + as_setting_prediction_matrix(mat_list, age_breaks = age_breaks_0_80_plus) + Message + + -- Setting Prediction Matrices ------------------------------------------------- + Output + + Message + A list of matrices containing the model predicted contact rate between ages in + each setting. + Output + + Message + There are 9 age breaks, ranging 0-80+ years, with a regular 10 year interval + Output + + Message + * home: a 9x9 + * work: a 9x9 + * all: a 9x9 + i Access each with `x$name` + i e.g., `x$home` + +# as_setting_prediction_matrix warns when setting pred matrix given + + Code + as_setting_prediction_matrix(setting_mat, age_breaks = age_breaks_0_80_plus) + Condition + Warning: + `as_setting_prediction_matrix` not used as this object is alreadt of a method not implemented for . + +# as_setting_prediction_matrix fails when wrong object given + + Code + as_setting_prediction_matrix(iris, age_breaks = age_breaks_0_80_plus) + Condition + Error in `as_setting_prediction_matrix()`: + ! `as_setting_prediction_matrix` method not implemented for + diff --git a/tests/testthat/_snaps/setting-transmission-matrix.md b/tests/testthat/_snaps/setting-transmission-matrix.md new file mode 100644 index 00000000..5536dd2f --- /dev/null +++ b/tests/testthat/_snaps/setting-transmission-matrix.md @@ -0,0 +1,50 @@ +# transmission_probability_matrix works + + Code + transmission_probability_matrix(home = one_05, work = one_05, age_breaks = age_breaks_0_80_plus) + Message + + -- Transmission Probability Matrices ------------------------------------------- + Output + + Message + A list of matrices, each containing the relative probability of + individuals in a given age group infecting an individual in another age group, + for that setting. + Output + + Message + There are 9 age breaks, ranging 0-80+ years, with a regular 10 year interval + Output + + Message + * home: a 9x9 + * work: a 9x9 + i Access each with `x$name` + i e.g., `x$home` + +--- + + Code + transmission_probability_matrix(one_05, one_05, age_breaks = age_breaks_0_80_plus) + Message + + -- Transmission Probability Matrices ------------------------------------------- + Output + + Message + A list of matrices, each containing the relative probability of + individuals in a given age group infecting an individual in another age group, + for that setting. + Output + + Message + There are 9 age breaks, ranging 0-80+ years, with a regular 10 year interval + Output + + Message + * one: a 9x9 + * two: a 9x9 + i Access each with `x$name` + i e.g., `x$one` + diff --git a/tests/testthat/test-abbreviation.R b/tests/testthat/test-abbreviation.R index 13186175..1591ffea 100644 --- a/tests/testthat/test-abbreviation.R +++ b/tests/testthat/test-abbreviation.R @@ -1,7 +1,7 @@ -test_that("abbreviate_states() works", { - expect_snapshot(abbreviate_states("New South Wales")) +test_that("abs_abbreviate_states() works", { + expect_snapshot(abs_abbreviate_states("New South Wales")) }) -test_that("unabbreviate_states() works", { - expect_snapshot(unabbreviate_states("NSW")) +test_that("abs_unabbreviate_states() works", { + expect_snapshot(abs_unabbreviate_states("NSW")) }) diff --git a/tests/testthat/test-abs-age-education.R b/tests/testthat/test-abs-age-education.R new file mode 100644 index 00000000..c4243a2c --- /dev/null +++ b/tests/testthat/test-abs-age-education.R @@ -0,0 +1,23 @@ +test_that("abs_age_education_state works", { + expect_snapshot( + abs_age_education_state(state = "VIC") + ) + expect_snapshot( + abs_age_education_state(state = "WA", age = 1:5) + ) + expect_snapshot( + abs_age_education_state(state = c("QLD", "TAS"), age = 5) + ) +}) + +test_that("abs_age_education_state works", { + expect_snapshot( + abs_age_education_lga(lga = "Albury (C)") + ) + expect_snapshot( + abs_age_education_lga(lga = "Albury (C)", age = 1:5) + ) + expect_snapshot( + abs_age_education_lga(lga = c("Albury (C)", "Barcoo (S)"), age = 10) + ) +}) diff --git a/tests/testthat/test-abs-age-lga.R b/tests/testthat/test-abs-age-lga.R index b898bc01..c7814d06 100644 --- a/tests/testthat/test-abs-age-lga.R +++ b/tests/testthat/test-abs-age-lga.R @@ -3,8 +3,7 @@ test_that("abs_age_lga() returns the right shape works", { }) test_that("abs_age_lga() returns the right shape errors", { - expect_snapshot( - error = TRUE, + expect_snapshot_error( abs_age_lga("Imaginary World") - ) + ) }) diff --git a/tests/testthat/test-abs-age-work.R b/tests/testthat/test-abs-age-work.R new file mode 100644 index 00000000..d580ffb6 --- /dev/null +++ b/tests/testthat/test-abs-age-work.R @@ -0,0 +1,17 @@ +test_that("abs_age_work_state works", { + expect_snapshot( + abs_age_work_state(state = "NSW") + ) + expect_snapshot( + abs_age_work_state(state = c("QLD", "TAS"), age = 5) + ) +}) + +test_that("abs_age_work_lga works", { + expect_snapshot( + abs_age_work_lga(lga = "Albany (C)", age = 1:5) + ) + expect_snapshot( + abs_age_work_lga(lga = c("Albury (C)", "Barcoo (S)"), age = 39) + ) +}) diff --git a/tests/testthat/test-age-population.R b/tests/testthat/test-age-population.R new file mode 100644 index 00000000..47af821d --- /dev/null +++ b/tests/testthat/test-age-population.R @@ -0,0 +1,25 @@ +world_data <- socialmixr::wpp_age() + +test_that("age_population works", { + expect_snapshot( + # Tidy data for multiple locations across different years + age_population( + data = world_data, + location_col = country, + location = c("Asia", "Afghanistan"), + age_col = lower.age.limit, + year_col = year, + year = c(2010:2020) + ) + ) + + expect_snapshot( + # Tidy data for a given location irrespective of year + age_population( + data = world_data, + location_col = country, + location = "Afghanistan", + age_col = lower.age.limit + ) + ) +}) diff --git a/tests/testthat/test-apply_vaccination.R b/tests/testthat/test-apply_vaccination.R index a52139a4..4d436c78 100644 --- a/tests/testthat/test-apply_vaccination.R +++ b/tests/testthat/test-apply_vaccination.R @@ -1,7 +1,7 @@ library(dplyr) set.seed(2022) -ngm_VIC <- generate_ngm( +ngm_VIC <- generate_ngm_oz( state_name = "VIC", age_breaks = c(seq(0, 80, by = 5), Inf), R_target = 1.5 @@ -28,14 +28,19 @@ test_that("apply_vaccination() returns expected matrices", { }) test_that("apply_vaccination() errors when there's an incorrect variable name", { - expect_snapshot( - error = TRUE, - apply_vaccination( - ngm = ngm_VIC, - data = vaccination_effect_example_data, - coverage_col = coverage, - acquisition_col = acquisition_column, - transmission_col = transmission - ) - ) - }) + expect_snapshot_error( + apply_vaccination( + ngm = ngm_VIC, + data = vaccination_effect_example_data, + coverage_col = coverage, + acquisition_col = acquisition_column, + transmission_col = transmission + ) + ) +}) + +test_that("apply_vaccination() produces expected output", { + expect_snapshot( + ngm_VIC_vacc + ) +}) diff --git a/tests/testthat/test-autoplot.R b/tests/testthat/test-autoplot.R new file mode 100644 index 00000000..2d4fb8c8 --- /dev/null +++ b/tests/testthat/test-autoplot.R @@ -0,0 +1,83 @@ +fairfield <- abs_age_lga("Fairfield (C)") + +age_break_0_15_plus <- c(seq(0, 15, by = 5), Inf) + +fairfield_contact <- extrapolate_polymod( + population = fairfield, + age_breaks = age_break_0_15_plus +) + +fairfield_ngm <- generate_ngm( + fairfield_contact, + age_breaks = age_break_0_15_plus, + R_target = 1.5 +) + +# example only +vaccination_effect_example_data_0_15 <- tibble::tribble( + ~age_band, ~coverage, ~acquisition, ~transmission, + "0-4", 0, 0, 0, + "5-11", 0.782088952510108, 0.583348020448795, 0.254242125175986, + "12-15", 0.997143279318327, 0.630736845626691, 0.29520141450591, + "15+", 0.998557451078776, 0.843558083086652, 0.555937989293065 +) + +fairfield_vaccination <- apply_vaccination( + ngm = fairfield_ngm, + data = vaccination_effect_example_data_0_15, + coverage_col = coverage, + acquisition_col = acquisition, + transmission_col = transmission +) + +transmission_matrices_0_15 <- get_setting_transmission_matrices( + age_breaks = age_break_0_15_plus +) + +# autoplot.conmat_age_matrix +test_that("autoplot works for age matrix", { + skip_on_cran() + skip_on_ci() + set.seed(2023 - 1 - 17) + autoplot_work <- autoplot( + object = fairfield_contact$work, + title = "Work" + ) + vdiffr::expect_doppelganger("autoplot-single-setting", autoplot_work) +}) + +# autoplot.conmat_setting_prediction_matrix +test_that("autoplot works for setting prediction matrix", { + skip_on_cran() + skip_on_ci() + set.seed(2023 - 1 - 17) + autoplot_all_settings <- autoplot(fairfield_contact) + vdiffr::expect_doppelganger("autoplot-all-settinge", autoplot_all_settings) +}) + +# autoplot.ngm_setting_matrix +test_that("autoplot works for NGMs", { + skip_on_cran() + skip_on_ci() + set.seed(2023 - 1 - 17) + autoplot_ngm <- autoplot(object = fairfield_ngm) + vdiffr::expect_doppelganger("autoplot-ngm", autoplot_ngm) +}) + +# autoplot.setting_vaccination_matrix +test_that("autoplot works for vaccination setting matrices", { + skip_on_cran() + skip_on_ci() + set.seed(2023 - 1 - 17) + autoplot_vaccination <- autoplot(object = fairfield_vaccination) + vdiffr::expect_doppelganger("autoplot-vaccination", autoplot_vaccination) +}) + +# autoplot.transmission_probability_matrix +test_that("autoplot works for transmission probability matrices", { + skip_on_cran() + skip_on_ci() + set.seed(2023 - 1 - 17) + autoplot_transmission <- autoplot(object = transmission_matrices_0_15) + vdiffr::expect_doppelganger("autoplot-", autoplot_transmission) +}) diff --git a/tests/testthat/test-check-age-breaks.R b/tests/testthat/test-check-age-breaks.R new file mode 100644 index 00000000..9cc8291d --- /dev/null +++ b/tests/testthat/test-check-age-breaks.R @@ -0,0 +1,13 @@ +age_one <- c(1, 2, 3, Inf) +age_two <- c(1, 2, 3) + +test_that("check_age_breaks works", { + expect_snapshot( + check_age_breaks(age_one, age_one) + ) + + expect_snapshot( + error = TRUE, + check_age_breaks(age_one, age_two) + ) +}) diff --git a/tests/testthat/test-check-if-data-frame.R b/tests/testthat/test-check-if-data-frame.R new file mode 100644 index 00000000..1f942bca --- /dev/null +++ b/tests/testthat/test-check-if-data-frame.R @@ -0,0 +1,9 @@ +test_that("check_if_data_frame works", { + expect_snapshot( + check_if_data_frame(mtcars) + ) + expect_snapshot( + error = TRUE, + check_if_data_frame(volcano) + ) +}) diff --git a/tests/testthat/test-check-if-data-list.R b/tests/testthat/test-check-if-data-list.R new file mode 100644 index 00000000..32bb0111 --- /dev/null +++ b/tests/testthat/test-check-if-data-list.R @@ -0,0 +1,11 @@ +test_that("check_if_list() returns error when argument class is not a list", { + set.seed(2021 - 10 - 4) + polymod_setting_data <- get_polymod_setting_data() + expect_snapshot_error(check_if_list(polymod_setting_data$home)) +}) + +test_that("check_if_list() returns nothing when argument class is a list", { + set.seed(2021 - 10 - 4) + polymod_setting_data <- get_polymod_setting_data() + expect_silent(check_if_list(polymod_setting_data)) +}) diff --git a/tests/testthat/test-check-lga-name.R b/tests/testthat/test-check-lga-name.R index 1a58c5a4..e4ad4986 100644 --- a/tests/testthat/test-check-lga-name.R +++ b/tests/testthat/test-check-lga-name.R @@ -14,4 +14,4 @@ test_that("check_lga_name() errors when the name is ambiguous", { error = TRUE, check_lga_name("Sydney") ) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-check-state-name.R b/tests/testthat/test-check-state-name.R index 6e6b0715..e246eefb 100644 --- a/tests/testthat/test-check-state-name.R +++ b/tests/testthat/test-check-state-name.R @@ -7,4 +7,4 @@ test_that("abs_age_state() returns an error", { error = TRUE, abs_age_state("Imaginary World") ) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-check_dimensions.R b/tests/testthat/test-check_dimensions.R new file mode 100644 index 00000000..dd8a302e --- /dev/null +++ b/tests/testthat/test-check_dimensions.R @@ -0,0 +1,63 @@ + + + + + +test_that("check_dimensions() returns nothing when compatible dimensions", { + list( + matrix(1:6, nrow = 3, ncol = 2), + matrix(1:6, nrow = 3, ncol = 2) + ) -> demo_matrix + + names(demo_matrix) <- c("matrix_a", "matrix_b") + + demo_data <- tibble::tibble(x = 1:2, y = 2 * x) + + expect_silent(check_dimensions( + demo_matrix, + demo_data + )) +}) + + + +test_that("check_dimensions() returns error", { + list( + matrix(1:6, nrow = 3, ncol = 2), + matrix(1:6, nrow = 3, ncol = 2) + ) -> demo_matrix + + names(demo_matrix) <- c("matrix_a", "matrix_b") + + demo_data <- tibble::tibble(x = 1:6, y = 2 * x) + expect_snapshot_error(check_dimensions( + demo_matrix, + demo_data + )) +}) + +test_that("apply_vaccination gives error when incompatible dimensions present", { + matrix(1:16, + nrow = 4, + ncol = 4, + dimnames = list( + c("[0,5)", "[5,10)", "[10,15)", "[15,Inf)"), + c("[0,5)", "[5,10)", "[10,15)", "[15,Inf)") + ) + ) -> demo_matrix + + demo_matrix <- replicate(5, demo_matrix, simplify = FALSE) + + names(demo_matrix) <- c("home", "school", "work", "other", "all") + + + expect_snapshot_error( + apply_vaccination( + ngm = demo_matrix, + data = vaccination_effect_example_data, + coverage_col = coverage, + acquisition_col = acquisition, + transmission_col = transmission + ) + ) +}) diff --git a/tests/testthat/test-conmat-population.R b/tests/testthat/test-conmat-population.R new file mode 100644 index 00000000..709e9bc0 --- /dev/null +++ b/tests/testthat/test-conmat-population.R @@ -0,0 +1,26 @@ +# need to remove the conmat_population class +fairfield <- as.data.frame(abs_age_lga("Fairfield (C)")) + +test_that("conmat_population works", { + expect_snapshot( + conmat_population( + data = fairfield, + age = lower.age.limit, + population = population + ) + ) +}) + +test_that("as_conmat_population works", { + expect_snapshot( + as_conmat_population( + data = fairfield, + age = lower.age.limit, + population = population + ) + ) +}) + +# TODO +# Need to add some defensive programming stuff +# Need to detect when you try and input a conmat population again diff --git a/tests/testthat/test-estimate-setting-contacts.R b/tests/testthat/test-estimate-setting-contacts.R new file mode 100644 index 00000000..2a01153c --- /dev/null +++ b/tests/testthat/test-estimate-setting-contacts.R @@ -0,0 +1,45 @@ +polymod_contacts <- get_polymod_setting_data() + +filter_age <- function(df, age) { + df %>% + dplyr::filter(age_from <= age, age_to <= age) +} + +filter_setting_age <- function(list_df, age) { + lapply( + list_df, + filter_age, + age + ) %>% new_setting_data() +} + +contact_data_cut <- filter_setting_age(polymod_contacts, 10) + +test_that("estimate_setting_contacts works", { + skip_on_ci() + skip_on_cran() + expect_snapshot( + estimate_setting_contacts( + contact_data_list = contact_data_cut, + survey_population = get_polymod_population(), + prediction_population = get_polymod_population(), + age_breaks = c(seq(0, 10, by = 5), Inf), + per_capita_household_size = NULL + ) + ) +}) + +test_that("estimate_setting_contacts works with different demographic data", { + skip_on_ci() + skip_on_cran() + expect_snapshot( + estimate_setting_contacts( + contact_data_list = contact_data_cut, + survey_population = get_polymod_population(), + prediction_population = get_polymod_population(), + age_breaks = c(seq(0, 10, by = 5), Inf), + school_demographics = conmat_original_school_demographics, + work_demographics = conmat_original_work_demographics + ) + ) +}) diff --git a/tests/testthat/test-fit-single-contact-model.R b/tests/testthat/test-fit-single-contact-model.R index 08727eaa..eee9452d 100644 --- a/tests/testthat/test-fit-single-contact-model.R +++ b/tests/testthat/test-fit-single-contact-model.R @@ -1,9 +1,11 @@ library(dplyr) -contact_data <- get_polymod_contact_data("all") %>% - filter(age_from <= 20, - age_to <= 20) +contact_data <- get_polymod_contact_data("all") %>% + filter( + age_from <= 20, + age_to <= 20 + ) -population <- get_polymod_population() %>% +population <- get_polymod_population() %>% filter(lower.age.limit <= 20) m_all <- fit_single_contact_model( @@ -11,11 +13,34 @@ m_all <- fit_single_contact_model( population = population ) +m_all_not_sym <- fit_single_contact_model( + contact_data = contact_data, + population = population, + symmetrical = FALSE +) -test_that("Model fits", { +test_that("Model returns bam model", { expect_s3_class(m_all, "bam") + expect_s3_class(m_all_not_sym, "bam") }) test_that("Model coefficients are the same", { expect_snapshot(names(m_all$coefficients)) + expect_snapshot(names(m_all_not_sym$coefficients)) +}) + +test_that("Model fits", { + expect_no_warning( + fit_single_contact_model( + contact_data = contact_data, + population = population, + symmetrical = FALSE + ) + ) + expect_no_warning( + fit_single_contact_model( + contact_data = contact_data, + population = population, + ) + ) }) diff --git a/tests/testthat/test-generate-ngm.R b/tests/testthat/test-generate-ngm.R new file mode 100644 index 00000000..f3b723c5 --- /dev/null +++ b/tests/testthat/test-generate-ngm.R @@ -0,0 +1,57 @@ +perth <- abs_age_lga("Perth (C)") +perth_hh <- get_abs_per_capita_household_size(lga = "Perth (C)") +age_breaks_0_75 <- c(seq(0, 75, by = 5), Inf) +age_breaks_0_85 <- c(seq(0, 85, by = 5), Inf) + +perth_contact <- extrapolate_polymod( + perth, + per_capita_household_size = perth_hh +) + +perth_ngm_lga <- generate_ngm( + perth, + age_breaks = age_breaks_0_75, + per_capita_household_size = perth_hh, + R_target = 1.5 +) + +perth_ngm <- generate_ngm( + perth_contact, + age_breaks = age_breaks_0_75, + R_target = 1.5 +) + +perth_ngm_oz <- generate_ngm_oz( + lga_name = "Perth (C)", + age_breaks = age_breaks_0_75, + R_target = 1.5 +) + +test_that("the three variants of the generate_ngm produce the same result", { + expect_true(all.equal(perth_ngm_lga, perth_ngm)) + expect_true(all.equal(perth_ngm_lga, perth_ngm_oz)) + expect_true(all.equal(perth_ngm_oz, perth_ngm)) +}) + +test_that("NGMs from each generate_ngm type return the same object", { + expect_snapshot( + perth_ngm_lga + ) + expect_snapshot( + perth_ngm + ) + expect_snapshot( + perth_ngm_oz + ) +}) + +test_that("generate_ngm fails when given wrong age breaks", { + expect_snapshot( + error = TRUE, + generate_ngm( + perth_contact, + age_breaks = age_breaks_0_85, + R_target = 1.5 + ) + ) +}) diff --git a/tests/testthat/test-get-abs-household-size-distribution.R b/tests/testthat/test-get-abs-household-size-distribution.R new file mode 100644 index 00000000..4e213b0f --- /dev/null +++ b/tests/testthat/test-get-abs-household-size-distribution.R @@ -0,0 +1,8 @@ +test_that("get_abs_household_size_distribution works", { + expect_snapshot( + get_abs_household_size_distribution(lga = "Fairfield (C)") + ) + expect_snapshot( + get_abs_household_size_distribution(state = "NSW") + ) +}) diff --git a/tests/testthat/test-get-polymod-population.R b/tests/testthat/test-get-polymod-population.R index cd65fb03..adf0d85c 100644 --- a/tests/testthat/test-get-polymod-population.R +++ b/tests/testthat/test-get-polymod-population.R @@ -1,14 +1,22 @@ test_that("get_polymod_contact_data() works", { - set.seed(2021-10-4) + skip_on_ci() + set.seed(2021 - 10 - 4) expect_snapshot(get_polymod_contact_data()) }) test_that("get_polymod_population() works", { - set.seed(2021-10-4) + skip_on_ci() + set.seed(2021 - 10 - 4) expect_snapshot(get_polymod_population()) }) -test_that("get_polymod_setting_data() works", { - set.seed(2021-10-4) - expect_snapshot(get_polymod_setting_data()) +test_that("get_polymod_setting_data() and derivatives work", { + skip_on_ci() + set.seed(2021 - 10 - 4) + polymod_setting_data <- get_polymod_setting_data() + expect_snapshot(polymod_setting_data) + expect_snapshot(polymod_setting_data$home) + expect_snapshot(polymod_setting_data$work) + expect_snapshot(polymod_setting_data$school) + expect_snapshot(polymod_setting_data$other) }) diff --git a/tests/testthat/test-lga_household_works.R b/tests/testthat/test-lga_household_works.R index 11e2ae59..c9c93903 100644 --- a/tests/testthat/test-lga_household_works.R +++ b/tests/testthat/test-lga_household_works.R @@ -1,37 +1,45 @@ library(purrr) library(conmat) -test_that("get_per_capita_household_size errors for some lgas", { +test_that("get_abs_per_capita_household_size errors for some lgas", { + skip_on_ci() expect_snapshot_error(map( .x = unique(abs_household_lga$lga), - .f = ~get_per_capita_household_size(lga = .x) + .f = ~ get_abs_per_capita_household_size(lga = .x) )) }) -safe_get_per_capita_household_size <- safely(get_per_capita_household_size) - -household_per_capita_runs <- map( - .x = unique(abs_household_lga$lga), - .f = ~safe_get_per_capita_household_size(lga = .x) -) - -# t_household_per_capita_runs <- transpose(household_per_capita_runs) -# compact(t_household_per_capita_runs$error) test_that("check_lga_name errors for some lgas", { + skip_on_ci() expect_snapshot_error(map( .x = unique(abs_household_lga$lga), - .f = ~check_lga_name(lga = .x) + .f = ~ check_lga_name(lga = .x) )) }) -safe_check_lga_name <- safely(check_lga_name) +# safe_get_abs_per_capita_household_size <- safely(get_abs_per_capita_household_size) +# +# household_per_capita_runs <- map( +# .x = unique(abs_household_lga$lga), +# .f = ~ safe_get_abs_per_capita_household_size(lga = .x) +# ) -check_lga_name_runs <- map( - .x = unique(abs_household_lga$lga), - .f = ~safe_check_lga_name(lga = .x) -) - -# t_check_lga_name_runs <- transpose(check_lga_name_runs) -# compact(t_check_lga_name_runs$error) +# t_household_per_capita_runs <- transpose(household_per_capita_runs) +# compact(t_household_per_capita_runs$error) +# thinking about which ones error? +# safe_check_lga_name <- safely(check_lga_name) +# +# check_lga_name_runs <- map( +# .x = unique(abs_household_lga$lga), +# .f = ~ safe_check_lga_name(lga = .x) +# ) +# +# transpose_errors <- transpose(check_lga_name_runs) +# +# which_errors <- which(map_lgl( +# transpose_errors$error, +# \(x) !is.null(x) +# ) +# ) diff --git a/tests/testthat/test-matrix-to-predictions.R b/tests/testthat/test-matrix-to-predictions.R new file mode 100644 index 00000000..c54fbab8 --- /dev/null +++ b/tests/testthat/test-matrix-to-predictions.R @@ -0,0 +1,21 @@ +fairfield <- abs_age_lga("Fairfield (C)") + +fairfield_school_contacts <- predict_contacts( + model = polymod_setting_models$school, + population = fairfield, + age_breaks = c(0, 5, 10, 15, Inf) +) + +fairfield_school_mat <- predictions_to_matrix(fairfield_school_contacts) + +test_that("matrix_to_predictions works", { + expect_snapshot( + matrix_to_predictions(fairfield_school_mat) + ) +}) + +test_that("predictions_to_matrix works", { + expect_snapshot( + predictions_to_matrix(fairfield_school_contacts) + ) +}) diff --git a/tests/testthat/test-models-fit-with-furrr.R b/tests/testthat/test-models-fit-with-furrr.R index 5cd98140..74dc47a5 100644 --- a/tests/testthat/test-models-fit-with-furrr.R +++ b/tests/testthat/test-models-fit-with-furrr.R @@ -10,7 +10,8 @@ polymod_setting_short <- map( .f = function(x) { x %>% filter(age_from <= 20, age_to <= 20) } -) +) |> + new_setting_data() polymod_population_short <- polymod_population %>% filter(lower.age.limit <= 20) @@ -18,12 +19,29 @@ contact_model <- fit_setting_contacts( contact_data_list = polymod_setting_short, population = polymod_population_short ) + contact_model_pred <- predict_setting_contacts( population = polymod_population_short, contact_model = contact_model, age_breaks = c(seq(0, 20, by = 5), Inf) ) +test_that("predict_setting_contact model prints appropriately", { + expect_snapshot(contact_model_pred) +}) + +# check that you can specify your own population data for school and work demographics +contact_model_diff_data <- fit_setting_contacts( + contact_data_list = polymod_setting_short, + population = polymod_population_short, + school_demographics = conmat_original_school_demographics, + work_demographics = conmat_original_work_demographics +) + +test_that("fit_setting_contact model prints appropriately",{ + expect_snapshot(contact_model) +}) + test_that("list names are kept", { expect_snapshot(names(contact_model)) expect_snapshot(names(contact_model_pred)) @@ -45,4 +63,4 @@ test_that("Model coefficients are the same", { test_that("Matrix dims are kept", { expect_snapshot(map(contact_model_pred, dim)) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-per_capita_household_size.R b/tests/testthat/test-per_capita_household_size.R new file mode 100644 index 00000000..0b6998d9 --- /dev/null +++ b/tests/testthat/test-per_capita_household_size.R @@ -0,0 +1,22 @@ + + +test_that("refactored code works", { + expect_equal(get_abs_per_capita_household_size_state("NSW"), get_abs_per_capita_household_size("NSW")) +}) + +test_that("refactored code works with lga", { + expect_equal( + get_abs_per_capita_household_size_lga(unique(abs_lga_lookup$lga)[1]), + get_abs_per_capita_household_size(lga = unique(abs_lga_lookup$lga)[1]) + ) +}) + + +test_that("errors when given incorrect state", { + expect_error(get_abs_per_capita_household_size_state("NSA")) +}) + + +test_that("errors when given incorrect lga", { + expect_error(get_abs_per_capita_household_size_lga("Fairfield")) +}) diff --git a/tests/testthat/test-predict-contacts.R b/tests/testthat/test-predict-contacts.R index 331c0d62..93a852e9 100644 --- a/tests/testthat/test-predict-contacts.R +++ b/tests/testthat/test-predict-contacts.R @@ -1,9 +1,11 @@ library(dplyr) -contact_data <- get_polymod_contact_data("all") %>% - filter(age_from <= 20, - age_to <= 20) +contact_data <- get_polymod_contact_data("all") %>% + filter( + age_from <= 20, + age_to <= 20 + ) -population <- get_polymod_population() %>% +population <- get_polymod_population() %>% filter(lower.age.limit <= 20) m_all <- fit_single_contact_model( @@ -14,13 +16,11 @@ m_all <- fit_single_contact_model( age_breaks_5y <- c(seq(0, 10, by = 5), Inf) synthetic_pred <- predict_contacts( - model = m_all, + model = m_all, population = population, age_breaks = age_breaks_5y ) -synthetic_pred - test_that("predict_contacts() works", { expect_s3_class(synthetic_pred, "tbl_df") expect_snapshot(names(synthetic_pred)) diff --git a/tests/testthat/test-print-conmat-matrix-method.R b/tests/testthat/test-print-conmat-matrix-method.R new file mode 100644 index 00000000..1bb5e341 --- /dev/null +++ b/tests/testthat/test-print-conmat-matrix-method.R @@ -0,0 +1,10 @@ +perth_city <- abs_age_lga("Perth (C)") +set.seed(2022 - 12 - 14) +synthetic_settings_5y_perth <- extrapolate_polymod( + population = perth_city +) + +test_that("Print method for setting prediction matrices works", { + expect_snapshot(synthetic_settings_5y_perth) + expect_snapshot(synthetic_settings_5y_perth$home) +}) diff --git a/tests/testthat/test-setting-prediction-matrix.R b/tests/testthat/test-setting-prediction-matrix.R new file mode 100644 index 00000000..eeece177 --- /dev/null +++ b/tests/testthat/test-setting-prediction-matrix.R @@ -0,0 +1,51 @@ +age_breaks_0_80_plus <- c(seq(0, 80, by = 10), Inf) +one_by_nine <- matrix(1, nrow = 9, ncol = 9) + +test_that("setting_prediction_matrix works", { + expect_snapshot( + setting_prediction_matrix( + home = one_by_nine, + work = one_by_nine, + age_breaks = age_breaks_0_80_plus + ) + ) +}) + +mat_list <- list( + home = one_by_nine, + work = one_by_nine +) + +test_that("as_setting_prediction_matrix works", { + expect_snapshot( + as_setting_prediction_matrix( + mat_list, + age_breaks = age_breaks_0_80_plus + ) + ) +}) + +setting_mat <- setting_prediction_matrix( + home = one_by_nine, + work = one_by_nine, + age_breaks = age_breaks_0_80_plus +) + +test_that("as_setting_prediction_matrix warns when setting pred matrix given", { + expect_snapshot( + as_setting_prediction_matrix( + setting_mat, + age_breaks = age_breaks_0_80_plus + ) + ) +}) + +test_that("as_setting_prediction_matrix fails when wrong object given", { + expect_snapshot( + error = TRUE, + as_setting_prediction_matrix( + iris, + age_breaks = age_breaks_0_80_plus + ) + ) +}) diff --git a/tests/testthat/test-setting-transmission-matrix.R b/tests/testthat/test-setting-transmission-matrix.R new file mode 100644 index 00000000..48a2a01a --- /dev/null +++ b/tests/testthat/test-setting-transmission-matrix.R @@ -0,0 +1,20 @@ +age_breaks_0_80_plus <- c(seq(0, 80, by = 10), Inf) +one_05 <- matrix(0.05, nrow = 9, ncol = 9) + +test_that("transmission_probability_matrix works", { + expect_snapshot( + transmission_probability_matrix( + home = one_05, + work = one_05, + age_breaks = age_breaks_0_80_plus + ) + ) + + expect_snapshot( + transmission_probability_matrix( + one_05, + one_05, + age_breaks = age_breaks_0_80_plus + ) + ) +}) diff --git a/vignettes/conmat-population.Rmd b/vignettes/conmat-population.Rmd new file mode 100644 index 00000000..96a2a997 --- /dev/null +++ b/vignettes/conmat-population.Rmd @@ -0,0 +1,214 @@ +--- +title: "Conmat Population Data" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Conmat Population Data} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(conmat) +``` + +The main goal of conmat is to estimate contact rates between age groups. This means we require data describing the age population distribution. Effectively this is data that has a column describing age, and a column describing population, like this: + +```{r} +library(tibble) + +dat_age <- tibble( + age = seq(0, 25, by = 5), + population = seq(1410, 1350, by = -12) +) + +dat_age +``` + +We use this kind of data frequently in conmat, and it means that your code might sometimes have lots of repetition like this: + +```{r} +#| eval: false +calculation( + data, + age_col = age, + population_col = population +) + +estimation( + data, + age_col = age, + population_col = population +) +``` + +The issue with repeating arguments is that it is unnecessary and sometimes leads to forgetting to include them, or including them erroneously. The code could instead look like this: + +```{r} +#| eval: false +calculation(data) +estimation(data) +``` + +We can achieve this by creating a special object that is a dataframe that knows which columns represent age, and population. This is a `conmat_population` object. + +We can create one with `as_conmat_population`: + +```{r} +dat_age_pop <- as_conmat_population( + data = dat_age, + age = age, + population = population +) + +dat_age_pop +``` + +You can see when we print this out to the console that this class is noted in parentheses (`conmat_population`), and the columns are noted. + +# Accessing age and population information + +If you want to access the age and population information, there are 2 main functions: + +- `age()` +- `population()` + +These return symbols, which can be used in programming. + +```{r} +age(dat_age_pop) +population(dat_age_pop) +``` + +alternatively there are functions that return character information: + +- `age_label()` +- `population_label()` + +```{r} +age_label(dat_age_pop) +population_label(dat_age_pop) +``` + +# Brief example of using accessor functions + +You could use this to extract out the values from the data and then summarise it, for example: + +```{r} +pop_var <- age_label(dat_age_pop) + +dat_age_pop[[pop_var]] +mean(dat_age_pop[[pop_var]]) +sd(dat_age_pop[[pop_var]]) + +age_var <- population_label(dat_age_pop) + +dat_age_pop[[age_var]] +mean(dat_age_pop[[age_var]]) +sd(dat_age_pop[[age_var]]) +``` + +You could then wrap this in a function if you like: + +```{r} +summary_pop <- function(data) { + dat_age_pop[[pop_var]] + mean_pop <- mean(dat_age_pop[[pop_var]]) + sd_pop <- sd(dat_age_pop[[pop_var]]) + + age_var <- population_label(dat_age_pop) + + dat_age_pop[[age_var]] + mean_age <- mean(dat_age_pop[[age_var]]) + sd_age <- sd(dat_age_pop[[age_var]]) + + return( + tibble( + mean_pop, + sd_pop, + mean_age, + sd_age + ) + ) +} + +summary_pop(dat_age_pop) +``` + + +However if you would like to program with these variables, for example write a function that uses functions like `mutate` and `arrange`, from `dplyr`, you would need to get the symbols and then evaluate them with `!!`, like so: + +```{r} +library(dplyr) +my_age_summary <- function(data) { + age_col <- age(data) + data %>% + summarise( + mean_age = mean(!!age_col) + ) +} + +my_age_summary(dat_age_pop) +``` + +And for a slightly more complex use case + +```{r} +my_age_pop_summary <- function(data) { + age_col <- age(data) + pop_col <- population(data) + + data %>% + summarise( + across(c(!!age_col, !!pop_col), + c(mean = mean, sd = sd), + .names = "{.fn}_{.col}" + ) + ) +} + +my_age_pop_summary(dat_age_pop) +``` + +# An example use from the package + +Internally within conmat we do some modelling work that requires us to know the midpoint of the ages, and a couple of other bits - here's an example of how we write that code now: + +```{r} +add_modelling_info <- function(data) { + age_col <- age(data) + age_var <- age_label(data) + pop_col <- population(data) + + diffs <- diff(data[[age_var]]) + bin_widths <- c(diffs, diffs[length(diffs)]) + + data %>% + dplyr::arrange( + !!age_col + ) %>% + dplyr::mutate( + # model based on bin midpoint + bin_width = bin_widths, + midpoint = !!age_col + bin_width / 2, + # scaling down the population appropriately + log_pop = log(!!pop_col / bin_width) + ) +} + +add_modelling_info(dat_age_pop) +``` + +# Using these as S3 methods in an R package + +If you want to use `conmat_population` within your R package, then please get in touch with the maintainer. We currently do not export the underlying internal functions, but this can easily be changed. + +# Conclusion + +That's how we can use the conmat population information! Please go ahead and use and enjoy! diff --git a/vignettes/data-sources.Rmd b/vignettes/data-sources.Rmd index 23b6f530..6c85fcee 100644 --- a/vignettes/data-sources.Rmd +++ b/vignettes/data-sources.Rmd @@ -14,43 +14,71 @@ knitr::opts_chunk$set( ) ``` -We provide access to a variety of different data sources in `conmat`. Most of these are centered around Australian data, as the package was initially created for disease modelling work in Australia. The aim of this vignette is to give a quick tour of the data sources available in `conmat`. +We provide access to a variety of different data sources in `conmat`. Most of these are centred around Australian data, as the package was initially created for disease modelling work in Australia. The aim of this vignette is to give a quick tour of the data sources available in `conmat`. ```{r setup} library(conmat) ``` +## World data + +We provide functions to clean up world population data from `socialmixr`. + +```{r} +world_data <- socialmixr::wpp_age() + +head(world_data) +``` + +We can tidy the data up, filtering down to a specified location and year with the `age_population` function: + +```{r} +nz_2015 <- age_population( + data = world_data, + location_col = country, + location = "New Zealand", + age_col = lower.age.limit, + year_col = year, + year = 2015 +) + +nz_2015 +``` + +This returns a `conmat_population` object, which is a data frame that knows which columns represent `age` and `population` information. This is useful for other modelling parts of the `conmat` package. + ## Australian Bureau of Statistics (ABS) data ### Accessing Functions -We provide two functions to access (Local Government Area) LGA and state level population age data, which are provided in 5 year age bins from 0, 5, up to 85+ +We provide two functions to access LGA (Local Government Area), and state level population age data, which are provided in 5 year age bins from 0, 5, up to 85+. These data are `conmat_population` tibbles, which means that they know which columns represent the `age` and `population` information. This means that functions inside of `conmat` can work a bit smoother as we refer to these columns frequently. #### `abs_age_lga()` ```{r abs-age-lga} -abs_age_lga(lga_name = "Fairfield (C)") -abs_age_lga(lga_name = "Brisbane (C)") +fairfield <- abs_age_lga(lga_name = "Fairfield (C)") +fairfield ``` -It requires you to know the exact name of the LGA, you can see them in the dataset, `abs_lga_lookup` +Note that this is a `conmat_population` object, which prints in red at the top of the data frame. This provides the information on the `age` and `population` columns, stating: `age: lower.age.limit`, and `population: population`, indicating which columns refer to the appropriate variables. + +Also note that `abs_age_lga` requires you to know the exact name of the LGA, you can see them in the dataset, `abs_lga_lookup` ```{r abs-lga-lookup} abs_lga_lookup ``` -And if you're not sure about a particular name of a place, you can use `agrep` and `filter` like so: +And if you're not sure about a particular name of a place, you can use `agrep` and `filter`, to match on similar-ish characters, like so: ```{r abs-lga-lookup-filter} library(dplyr) -abs_lga_lookup %>% +abs_lga_lookup %>% filter(agrepl("Sydney", lga)) ``` - #### `abs_age_state()` -This takes in the abbreviated state names +This takes in the abbreviated state names, and is also a `conmat_population` object. ```{r abs-age-state} abs_age_state(state_name = "NSW") @@ -132,47 +160,46 @@ library(ggplot2) library(stringr) library(dplyr) eyre_transmission_probabilities %>% - group_by( - setting, - case_age_5y, - contact_age_5y - ) %>% - summarise( - across( - probability, - mean - ), - .groups = "drop" - ) %>% - rename( - case_age = case_age_5y, - contact_age = contact_age_5y - ) %>% - mutate( - across( - ends_with("age"), - ~ factor(.x, - levels = str_sort( - unique(.x), - numeric = TRUE - ) - ) - ) - ) %>% - ggplot( - aes( - x = case_age, - y = contact_age, - fill = probability - ) - ) + - facet_wrap(~setting) + - geom_tile() + - scale_fill_viridis_c() + - coord_fixed() + - theme_minimal() + - theme( - axis.text = element_text(angle = 45, hjust = 1) - ) - + group_by( + setting, + case_age_5y, + contact_age_5y + ) %>% + summarise( + across( + probability, + mean + ), + .groups = "drop" + ) %>% + rename( + case_age = case_age_5y, + contact_age = contact_age_5y + ) %>% + mutate( + across( + ends_with("age"), + ~ factor(.x, + levels = str_sort( + unique(.x), + numeric = TRUE + ) + ) + ) + ) %>% + ggplot( + aes( + x = case_age, + y = contact_age, + fill = probability + ) + ) + + facet_wrap(~setting) + + geom_tile() + + scale_fill_viridis_c() + + coord_fixed() + + theme_minimal() + + theme( + axis.text = element_text(angle = 45, hjust = 1) + ) ``` diff --git a/vignettes/example-pipeline.Rmd b/vignettes/example-pipeline.Rmd index 0ef934d1..402dc243 100644 --- a/vignettes/example-pipeline.Rmd +++ b/vignettes/example-pipeline.Rmd @@ -30,18 +30,22 @@ This vignette outlines a basic workflow of: We can create a synthetic matrix from all POLYMOD data by using the `extrapolate_polymod` function. First, let's extract an age distribution from the ABS data. ```{r fairfield} -fairfield_population <- abs_age_lga("Fairfield (C)") -fairfield_population +fairfield <- abs_age_lga("Fairfield (C)") +fairfield ``` -The key features of this age distribution are the columns, `lower.age.limit`, and `population`. +Note that this is a `conmat_population` object, which is just a data frame that knows which columns represent the `age` and `population` information. -We then extrapolate this using the full POLYMOD data. +We then extrapolate this to home, work, school, other and all settings, using the full POLYMOD data. This gives us a setting prediction matrix. ```{r extrapolate-fairfield} +age_breaks_0_80_plus <- c(seq(0, 80, by = 5), Inf) synthetic_fairfield_5y <- extrapolate_polymod( - population = fairfield_population + population = fairfield, + age_breaks = age_breaks_0_80_plus ) +synthetic_fairfield_5y +synthetic_fairfield_5y$home ``` By full POLYMOD data, we mean these data: @@ -52,6 +56,7 @@ polymod_setting <- get_polymod_setting_data() polymod_population <- get_polymod_population() polymod_setting +polymod_setting$home polymod_population ``` @@ -62,7 +67,13 @@ The `extrapolate_polymod()` function does the following: It also has options to predict to specified age brackets, defaulting to 5 year age groups up to 75, then 75 and older. -This object, `synthetic_fairfield_5y`, contains a matrix of predictions for each of the settings, home, work, school, other, and all settings. +This object, `synthetic_fairfield_5y`, contains a matrix of predictions for each of the settings, home, work, school, other, and all settings, which is summarised when you print the object to the console: + +```{r} +synthetic_fairfield_5y +``` + +You can see more detail by using `str` if you like: ```{r} str(synthetic_fairfield_5y) @@ -72,12 +83,54 @@ str(synthetic_fairfield_5y) Once infected, a person can transmit an infectious disease to another, creating generations of infected individuals. We can define a matrix describing the number of newly infected individuals in given categories, such as age, for consecutive generations. This matrix is called a "next generation matrix" (NGM). -We can generate an NGM for Australian specific data like so (in a future release we will change this to work for any predicted population) +We can generate an NGM using the population data + +```{r} +fairfield_ngm_age_data <- generate_ngm( + fairfield, + age_breaks = age_breaks_0_80_plus, + R_target = 1.5 +) +``` + +Or if you've already got the fitted settings contact matrices, then you can pass that to `generate_ngm` instead: + +```{r} +fairfield_ngm <- generate_ngm( + synthetic_fairfield_5y, + age_breaks = age_breaks_0_80_plus, + R_target = 1.5 +) +``` + +However, note in these cases the age breaks specified in `generate_ngm` must be the same as the age breaks specified in the synthetic contact matrix, otherwise it will error as it is trying to multiple incompatible matrices. + +You can also specify your own transmission matrix, like so: + +```{r} +# using our own transmission matrix +new_transmission_matrix <- get_setting_transmission_matrices( + age_breaks = age_breaks_0_80_plus, + # is normally 0.5 + asymptomatic_relative_infectiousness = 0.75 +) + +new_transmission_matrix + +fairfield_ngm_0_80_new_tmat <- generate_ngm( + synthetic_fairfield_5y, + age_breaks = age_breaks_0_80_plus, + R_target = 1.5, + setting_transmission_matrix = new_transmission_matrix +) +``` + +We can also generate an NGM for Australian specific data like so, which refits and extrapolates the data based on the Australian state or LGA provided. ```{r ngm-fairfield} -ngm_fairfield <- generate_ngm( +ngm_fairfield <- generate_ngm_oz( lga_name = "Fairfield (C)", - age_breaks = c(seq(0, 80, by = 5), Inf), + age_breaks = age_breaks_0_80_plus, R_target = 1.5 ) ``` @@ -89,6 +142,7 @@ ngm_fairfield$home str(ngm_fairfield) ``` + ## Applying Vaccination Rates It is important to understand the effect of vaccination on the next @@ -111,21 +165,101 @@ Each row contains information, for each age band: * Acquisition - probability of acquiring COVID * Transmission - the probability of transmission -Then you need to specify the columns in the vaccination effect data frame related to covarege, acquisition, and transmission. +Then you need to specify the columns in the vaccination effect data frame related to coverage, acquisition, and transmission. ```{r} # Apply vaccination effect to next generation matrices ngm_nsw_vacc <- apply_vaccination( - ngm = ngm_fairfield, - data = vaccination_effect_example_data, - coverage_col = coverage, - acquisition_col = acquisition, - transmission_col = transmission + ngm = ngm_fairfield, + data = vaccination_effect_example_data, + coverage_col = coverage, + acquisition_col = acquisition, + transmission_col = transmission ) ngm_nsw_vacc ``` +# Fitting a new model with asymmetric terms + +In the examples so far we have focussed on using `extrapolate_polymod` to fit the contact model - this is very useful because it doesn't involve many lines of code to fit: + +```{r} +#| eval: FALSE +fairfield <- abs_age_lga("Fairfield (C)") +age_breaks_0_80_plus <- c(seq(0, 80, by = 5), Inf) +synthetic_fairfield_5y <- extrapolate_polymod( + population = fairfield, + age_breaks = age_breaks_0_80_plus +) +``` + +It also fits quite quickly, since it uses a pre-computed model, `polymod_setting_models`, (See `?polymod_setting_models` for more details). + +Under the hood of `extrapolate_polymod`, this uses this already fit model for each setting (home, work, school, other), and then predicts using that model, and the provided data, to predict the new contact rates. + +So the process is: + +1. Create a model that predicts contact rate for each setting +2. Predict to a new population using that model + +Let's show each step and unpack them. + +First let's create a model that predicts contact rate for each setting: + +```{r} +polymod_setting_data <- get_polymod_setting_data() +polymod_population <- get_polymod_population() + +contact_setting_model_not_sym <- fit_setting_contacts( + contact_data_list = polymod_setting_data, + population = polymod_population, + symmetrical = FALSE +) +``` + +Here, we first get the polymod setting data (`polymod_setting_data`), and the polymod population (`polymod_population`), to create a model for each setting. These data look like this, if you are interested. + +```{r} +polymod_setting_data +polymod_population +``` + +We also specify the `symmetrical = FALSE` option - by default this is TRUE. Briefly, this changes some of the terms we use in creating the model, to use terms that aren't strictly symmetric. + +Now that we've got our model, we can predict to our fairfield data, like so: + +```{r} +fairfield_hh <- get_abs_per_capita_household_size(lga = "Fairfield (C)") +fairfield_hh +contact_model_pred <- predict_setting_contacts( + population = fairfield, + contact_model = contact_setting_model_not_sym, + age_breaks = age_breaks_0_80_plus, + per_capita_household_size = fairfield_hh +) +``` + +* `population` is our population to predict to +* `contact_model` is our contact rate model for each setting +* `age_breaks` are our age breaks to predict to +* `per_capita_household_size` is the household size for that population, in our case we have a helper function, `get_abs_per_capita_household_size` which works for each LGA in Australia. + +alternatively, you can use the `estimate_setting_contacts` function to do a similar task: + +```{r} +contact_model_pred_est <- estimate_setting_contacts( + contact_data_list = polymod_setting_data, + survey_population = polymod_population, + prediction_population = fairfield, + age_breaks = age_breaks_0_80_plus, + per_capita_household_size = fairfield_hh, + symmetrical = FALSE +) +``` + +This is a bit briefer than the two step process, and might be preferable to creating a separate model. + diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd index ef25b2ec..ec7f9aa7 100644 --- a/vignettes/getting-started.Rmd +++ b/vignettes/getting-started.Rmd @@ -24,13 +24,11 @@ library(mgcv) library(patchwork) ``` -The goal of conmat is to make it easy to generate synthetic contact matrices for a given age population. +The goal of conmat is to simplify the process of generating synthetic contact matrices for a given age population. **What is a contact matrix?** -Contact matrices describe the degree of contact between individuals of given age groups. - -For example, this matrix describes the number of contacts between individuals +Contact matrices describe the degree of contact between individuals of given age groups. For example, this matrix describes the number of contacts between individuals. ```{r} #| label: show-contact @@ -45,35 +43,110 @@ cmat <- matrix( dimnames = list( name_vec, name_vec - ) + ) ) -diag(cmat) <- c(10,11,13) +diag(cmat) <- c(10, 11, 13) cmat[upper.tri(cmat)] <- 3:5 cmat[lower.tri(cmat)] <- 3:5 cmat ``` -The rows and columns represent the age groups of the people. On the main diagonal we see that we have a higher number of contacts - showing that people of similar ages tend to interact more with one another. - -We can use the information in these matrices to model how diseases such as COVID-19 spread in a population through social contact. +The rows and columns represent the age groups of the people. On the main diagonal we see that we have a higher number of contacts - showing that people of similar ages tend to interact more with one another. We can use the information in these matrices to model how diseases such as COVID-19 spread in a population through social contact. **Why do we need _synthetic_ contact matrices?** -Contact matrices are produced from empirical data resulting from a contact survey, which requires individuals to diary the amount and manner of contact a person has in a day. - -However, these surveys are highly time-consuming and expensive to run, meaning that only a handful of these empirical datasets exist globally. +Contact matrices are produced from empirical data resulting from a contact survey, which requires individuals to diary the amount and manner of contact a person has in a day. However, these surveys are highly time-consuming and expensive to run, meaning that only a handful of these empirical datasets exist globally. We can use statistical methods to create _synthetic contact matrices_, which are new contact matrices that have been generalised to new countries based on existing surveys. **Why do we need `conmat`?** -Existing methods only provide outputs of the contact matrices for each country, or at best, for urban and rural areas for a given country. +Existing methods only provide outputs of the contact matrices for each country, or at best, for urban and rural areas for a given country. We need methods that allow for flexibly creating synthetic contact matrices for a specified age population. This is because the age population distribution of many countries (e.g., Australia), are quite heterogeneous, and assuming it is homogeneous would result in inaccurate representation of community infection in many regions. + +## Quick example using Australian data + +Suppose we want to get a contact matrix for a given region in Australia, let's say the city of Perth. We can get that from a helper function, `abs_age_lga`. + +```{r} +perth <- abs_age_lga("Perth (C)") +perth +``` + +(You can learn more about the data sources we provide in `data-sources.Rmd`) + +We can get a contact matrix made for `perth` using the `extrapolate_polymod` function: + +```{r} +perth_contact <- extrapolate_polymod( + population = perth +) + +perth_contact +``` + +We can plot this with `autoplot` + +```{r} +autoplot(perth_contact) +``` + +And you can see each contact matrix in a setting by referring to its name - for example, the "home" setting contact matrix: -We need methods that allow for flexibly creating synthetic contact matrices for a specified age population, as the age population distribution of many countries (e.g., Australia), are quite heterogeneous, and assuming it is homogeneous would result in inaccurate representation of community infection in many regions. +```{r} +perth_contact$home +``` + +These contact matrices could then be used in subsequent modelling, such as the input in a SIR (Susceptible, Infected, Recovered) model. + +## Quick example using world data -## Example +Similarly to above, we can access some world data from another data source - we have some helpers to pull data from the world population: + +```{r} +world_data <- socialmixr::wpp_age() + +head(world_data) +``` + +We can tidy the data up, filtering down to a specified location and year with the `age_population` function: + +```{r} +nz_2015 <- age_population( + data = world_data, + location_col = country, + location = "New Zealand", + age_col = lower.age.limit, + year_col = year, + year = 2015 +) + +nz_2015 +``` + +Then we could create a contact matrix for NZ population data for 2015 like so: + +```{r} +nz_contact <- extrapolate_polymod( + population = nz_2015 +) +autoplot(nz_contact) +nz_contact$home +``` + +## What next? + +From here you might want to: + +- Create a next generation matrix (NGM) +- Apply vaccination to an NGM + +See the vignette, "example pipeline" for more details. + +# A More in depth example + +The above example showed how we might extract a contact matrix based on the polymod data - this example now shows all the steps that can be taken for full flexibility, and provides more detail on the initial datasets that could be used. First we want to fit the model to the POLYMOD data, which contains various survey and population data. @@ -97,7 +170,7 @@ We also provide control over the POLYMOD data retrieved from `get_polymod_contac polymod_contact_data_belgium_0_10 <- get_polymod_contact_data( setting = "work", countries = "Belgium", - ages = c(0,5,10) + ages = c(0, 5, 10) ) polymod_contact_data_belgium_0_10 @@ -117,18 +190,20 @@ You can see the available countries in the helpfile with `?get_polymod_populatio We can create a model of the contact *rate* with the function `fit_single_contact_model`. We're first going to use some smaller sets of the data, to save on computation time. ```{r fit-polymod} -set.seed(2022-10-04) -polymod_contact_data_home_small <- polymod_contact_data_home %>% - filter(age_from <= 30, - age_to <= 30) +set.seed(2022 - 10 - 04) +polymod_contact_data_home_small <- polymod_contact_data_home %>% + filter( + age_from <= 30, + age_to <= 30 + ) -polymod_survey_data_small <- polymod_survey_data %>% +polymod_survey_data_small <- polymod_survey_data %>% filter(lower.age.limit <= 30) contact_model <- fit_single_contact_model( contact_data = polymod_contact_data_home_small, population = polymod_survey_data_small - ) +) ``` This fits a generalised additive model (GAM), predicting the contact rate, based on a series of prediction terms that describe various features of the contact rates. @@ -142,17 +217,17 @@ We can use this contact model to then predict the contact rate in a new populati As a demonstration, let's take an age population from a given LGA in Australia (this was the initial motivation for the package, so there are some helper functions for Australian specific data). ```{r fairfield} -fairfield_age_pop <- abs_age_lga("Fairfield (C)") -fairfield_age_pop +fairfield <- abs_age_lga("Fairfield (C)") +fairfield ``` -We can then pass the contact model through to `predict_contacts`, along with the fairfield age population data, and some age breaks that we want to predict to. Note that these age breaks could be any size, we just ahve them set to 5 year age brackets in most of the examples, but these could be 1 year, 2 year, or even sub year. +We can then pass the contact model through to `predict_contacts`, along with the fairfield age population data, and some age breaks that we want to predict to. Note that these age breaks could be any size, we just have them set to 5 year age brackets in most of the examples, but these could be 1 year, 2 year, or even sub year. ```{r predict-contacts} -set.seed(2022-10-04) +set.seed(2022 - 10 - 04) synthetic_contact_fairfield <- predict_contacts( model = contact_model, - population = fairfield_age_pop, + population = fairfield, age_breaks = c(seq(0, 30, by = 5), Inf) ) @@ -161,12 +236,12 @@ synthetic_contact_fairfield ## Plotting -Let's visualise the matrix to get a sense of the predictions with `plot_matrix`. First we need to transform the predictions to a matrix: +Let's visualise the matrix to get a sense of the predictions with `autoplot`. First we need to transform the predictions to a matrix: ```{r plot-matrix-differents} -synthetic_contact_fairfield %>% - predictions_to_matrix() %>% - plot_matrix() +synthetic_contact_fairfield %>% + predictions_to_matrix() %>% + autoplot() ``` ## Note @@ -182,17 +257,18 @@ Our experience has been that we would be fitting the same models to each setting We can create a model for each of the settings with `fit_setting_contacts()`. ```{r fit-polymod-setting} -set.seed(2021-09-24) +set.seed(2021 - 09 - 24) polymod_setting_data <- get_polymod_setting_data() -polymod_setting_data_small <- polymod_setting_data %>% - lapply(FUN = function(x) x %>% filter(age_from <= 20, age_to <= 20)) +polymod_setting_data_small <- polymod_setting_data %>% + lapply(FUN = function(x) x %>% filter(age_from <= 20, age_to <= 20)) |> + new_setting_data() setting_models <- fit_setting_contacts( contact_data_list = polymod_setting_data_small, population = polymod_survey_data - ) +) ``` This contains a list of models, one for each setting. We can look at one, and get summary information out: @@ -209,12 +285,12 @@ So this gives us our baseline model of a contact model. We have fit this model t Then we take the model we had earlier, and extrapolate to the fairfield data with `predict_setting_contacts`, also providing some age breaks we want to predict to ```{r fairfield-synth-5} -set.seed(2021-10-04) +set.seed(2021 - 10 - 04) synthetic_settings_5y_fairfield <- predict_setting_contacts( - population = fairfield_age_pop, + population = fairfield, contact_model = setting_models, age_breaks = c(seq(0, 20, by = 5), Inf) - ) +) ``` This then returns a list of synthetic matrices, "home", "work", "school", "other", and "all", which is the sum of all matrices. @@ -225,155 +301,12 @@ synthetic_settings_5y_fairfield$home synthetic_settings_5y_fairfield$all ``` -We can use `plot_setting_matrix` to plot all at once +We can use `autoplot` to plot all at once ```{r fairfield-synth-5-plot} # this code is erroring for the moment - something to do with rendering a large plot I think. -plot_setting_matrices( +autoplot( synthetic_settings_5y_fairfield, title = "Setting-specific synthetic contact matrices (fairfield 2020 projected)" ) ``` - - -### Speeding up computation with `future` - -`conmat` supports parallelisation, which is useful in a couple of contexts with the model fitting, here is an example: - -```{r load-future} -library(future) -plan(multisession, workers = 4) -``` - -We set the future plan, saying "multisession", with 4 workers. Then we run the same code as above (note that you must specify the plan, otherwise it does not know how to parallelise. See the [future package documentation](https://future.futureverse.org/reference/plan.html) for more details): - -```{r show-off-furrr} -contact_model <- fit_setting_contacts( - contact_data_list = polymod_setting_data_small, - population = polymod_survey_data_small -) - -contact_model_pred <- predict_setting_contacts( - population = polymod_survey_data_small, - contact_model = contact_model, - age_breaks = c(seq(0, 20, by = 5), Inf) -) -``` - -Notably this is about 3 times faster than without using that plan. - -### Extrapolate from all POLYMOD data - -The above model fitting and prediction steps are all done on the full POLYMOD data. But you could modify the polymod data used in the model, to say be only for Finland, or Germany, or a specific age group. - -However, if you want to predict to all settings, using the full POLYMOD data, to a specific new age population, you can use `extrapolate_polymod`, which takes arguments for a new population, age breaks, and optionally household per capita size. By default it uses 5 year age breaks from 0-75, then 75 and above. - -We can demonstrate fitting a model using just Fairfield data, like so: - -```{r} -# compute setting-specific and combined age matrices for polymod -synthetic_settings_5y_fairfield <- extrapolate_polymod( - population = fairfield_age_pop -) - -plot_setting_matrices(synthetic_settings_5y_fairfield) -``` - -We can also do the same, for the polymod data itself: - -```{r} -# compute setting-specific and combined age matrices for polymod -polymod_population <- get_polymod_population() -synthetic_settings_5y_polymod <- extrapolate_polymod( - population = polymod_population -) - -plot_setting_matrices(synthetic_settings_5y_polymod) -``` - -And for fun we can compare some of the data together, say polymod data for home against fairfield home, using the `patchwork` package: - -```{r} -library(patchwork) -gg_polymod_home <- plot_matrix(synthetic_settings_5y_polymod$home) + - labs(title = "polymod home") - -gg_fairfield_home <- plot_matrix(synthetic_settings_5y_fairfield$home) + - labs(title = "Fairfield home") - -gg_polymod_home + gg_fairfield_home -``` - -This might not look that different, but we can compare this to Alice Springs - -```{r} -synthetic_settings_5y_alice <- extrapolate_polymod( - population = abs_age_lga("Alice Springs (T)") -) - -gg_alice_home <- plot_matrix(synthetic_settings_5y_alice$home) + - labs(title = "Alice Springs home") - -gg_polymod_home + gg_alice_home -gg_polymod_home + gg_fairfield_home -``` - -We can use some other functions from `socialmixr` to extract similar estimates for different populations in different countries. - -```{r} -library(socialmixr) - -italy_2005 <- wpp_age("Italy", "2005") - -italy_2005 -``` - -This italian population data could then be put into `extrapolate_polymod` as above. - -## For interest's sake: visualising the empirical contact rate data - -```{r} -# visualise empirical contact rate estimates -bind_rows( - home = get_polymod_contact_data("home"), - school = get_polymod_contact_data("school"), - work = get_polymod_contact_data("work"), - other = get_polymod_contact_data("other"), - .id = "setting" -) %>% - mutate( - rate = contacts / participants, - setting = factor( - setting, - levels = c( - "home", "school", "work", "other" - ) - ) - ) %>% - group_by( - setting - ) %>% - mutate( - `relative contact rate` = rate / max(rate) - ) %>% - ungroup() %>% - ggplot( - aes( - x = age_from, - y = age_to, - fill = `relative contact rate` - ) - ) + - facet_wrap( - ~ setting, - ncol = 2, - scales = "free" - ) + - geom_tile() + - scale_fill_distiller( - direction = 1, - trans = "sqrt" - ) + - theme_minimal() -``` - diff --git a/vignettes/other-data-sources.Rmd b/vignettes/other-data-sources.Rmd new file mode 100644 index 00000000..51b92503 --- /dev/null +++ b/vignettes/other-data-sources.Rmd @@ -0,0 +1,101 @@ +--- +title: "Using other data sources" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Using other data sources} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(conmat) +``` + +The primary goal of the conmat package is to be able to get a contact matrix for a given age population. It was initially written for work done in Australia, and so the initial focus was on cleaning and extracting data from the Australian Bureau of Statistics. + +This vignette focusses on using other data sources with conmat. + +We can use some other functions from `socialmixr` to extract similar estimates for different populations in different countries. + +We could extract some data from Italy using the [`socialmixr`](https://epiforecasts.io/socialmixr/) R package + +```{r} +library(socialmixr) + +italy_2005 <- wpp_age("Italy", "2005") + +head(italy_2005) +``` + +We can then convert this data into a `conmat_population` object and use it in `extrapolate_polymod` + +```{r} +italy_2005_pop <- as_conmat_population( + data = italy_2005, + age = lower.age.limit, + population = population +) +``` + +```{r} +age_breaks_0_80_plus <- c(seq(0, 80, by = 5), Inf) +italy_contact <- extrapolate_polymod( + population = italy_2005_pop, + age_breaks = age_breaks_0_80_plus +) + +italy_contact +``` + +# Creating a next generation matrix (NGM) + +To create a next generation matrix, you can use either a conmat population +object, or setting prediction matrices, like so: + +```{r} +# using a conmat population object +italy_2005_ngm <- generate_ngm( + italy_2005_pop, + age_breaks = age_breaks_0_80_plus, + R_target = 1.5 +) + +italy_2005_ngm + +# using a setting prediction matrix +italy_2005_ngm_polymod <- generate_ngm( + italy_contact, + age_breaks = age_breaks_0_80_plus, + R_target = 1.5 +) + +italy_2005_ngm_polymod + +# these are the same +identical(italy_2005_ngm, italy_2005_ngm_polymod) +``` + +# Applying vaccination to an NGM + +We can then take a next generation matrix and apply vaccination data, such as the provided `vaccination_effect_example_data` dataset. + +```{r} +vaccination_effect_example_data + +ngm_italy_vacc <- apply_vaccination( + ngm = italy_2005_ngm, + data = vaccination_effect_example_data, + coverage_col = coverage, + acquisition_col = acquisition, + transmission_col = transmission +) + +ngm_italy_vacc +``` diff --git a/vignettes/parallel-computing.Rmd b/vignettes/parallel-computing.Rmd new file mode 100644 index 00000000..eadf9013 --- /dev/null +++ b/vignettes/parallel-computing.Rmd @@ -0,0 +1,72 @@ +--- +title: "Parallel Computing" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Parallel Computing} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(conmat) +``` + +`conmat` supports parallelisation via the [`future`](https://future.futureverse.org/) and [`furrr`](https://furrr.futureverse.org/) R packages. The functions that will be impacted by this are: + +* extrapolate_polymod +* estimate_setting_contacts +* fit_setting_contacts (called in above functions) +* predict_setting_contacts (called in above functions) + +First we set the future plan, saying "multisession", with 4 workers. + +```{r load-future} +library(future) +plan(multisession, workers = 4) +``` + +Then we run our code as normal to get the parallelisation! (note that you must specify the plan, otherwise it does not know how to parallelise. See the [future package documentation](https://future.futureverse.org/reference/plan.html) for more details). + +Note that these functions will run about 3 times faster than normal, they might still take some time. They are able to run in parallel as we are fitting a model to each setting, which is a task that is embarrasingly parallel. + +```{r show-off-furrr-1} +perth <- abs_age_lga("Perth (C)") +perth_contacts <- extrapolate_polymod( + population = perth +) +``` + +```{r} +settings_estimated_contacts <- estimate_setting_contacts( + contact_data_list = get_polymod_setting_data(), + survey_population = get_polymod_population(), + prediction_population = get_polymod_population(), + age_breaks = c(seq(0, 75, by = 5), Inf), + per_capita_household_size = NULL +) +``` + +```{r} +polymod_setting_data <- get_polymod_setting_data() +polymod_population <- get_polymod_population() + +contact_model <- fit_setting_contacts( + contact_data_list = polymod_setting_data, + population = polymod_population +) +``` + +```{r} +synthetic_settings_5y_perth <- predict_setting_contacts( + population = perth, + contact_model = contact_model, + age_breaks = c(seq(0, 85, by = 5), Inf) +) +``` diff --git a/vignettes/sir-model.Rmd b/vignettes/sir-model.Rmd new file mode 100644 index 00000000..da0ef35c --- /dev/null +++ b/vignettes/sir-model.Rmd @@ -0,0 +1,566 @@ +--- +title: "SIR modelling with conmat" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{SIR modelling with conmat} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(conmat) +library(deSolve) +library(tidyr) +library(ggplot2) +library(dplyr) +library(purrr) +``` + +# Introduction: What is an SIR model? + +SIR (Susceptible, Infected, Recovered) models, sometimes known as [compartmental models](https://en.wikipedia.org/wiki/Compartmental_models_in_epidemiology), are a mathematical modelling technique used to understand facets of an epidemic. They can help answer questions like: + +* What is the duration of an epidemic? +* What is the total number of infected people? +* How does the disease spread? +* What is the reproductive number? +* What is the impact of a public health intervention? + +The **SIR** refers to the number of: + +* S: *s*usceptible +* I: *i*nfected +* R: *r*ecovered / *r*emoved + +people at a given time point. We can model how these numbers change at each time step, based on initial population numbers, and other parameters like how infection spreads (is it more likely to infect younger or older people?), + +# An SIR Model with homogenous mixing + +We start with a complicated version of a relatively simple model: an age-stratified SIR Model, but with all age groups acting exactly the same. + +We will use 17 age groups, each in 5 year age bands, and turn these into a `conmat_population` object. This is an object that knows which columns represent age and population, which is used by other functions within `conmat`. + +```{r} +#| label: create-population +homogeneous_population <- data.frame( + age = seq(0, 80, by = 10), + population = rep(100, times = 9) +) |> + as_conmat_population( + age = age, + population = population + ) + +homogeneous_population +``` + +Then, we extrapolate these into a set of contact matrices, which we can construct using `setting_prediction_matrix`. We set these as matrices of 1 - the contact rate is homogenous and exactly the same. + +```{r} +#| label: homogenous-contact +age_breaks_0_80_plus <- c(seq(0, 80, by = 10), Inf) +mat_ones <- matrix(1, nrow = 9, ncol = 9) + +# Relative number of contacts between individuals in 2 age categories +# Think of as P(contact) +homogeneous_contact <- setting_prediction_matrix( + home = mat_ones, + work = mat_ones, + school = mat_ones, + other = mat_ones, + age_breaks = age_breaks_0_80_plus +) + +homogeneous_contact +``` + +Similarly, we construct a set of transmission matrices, which provide the probability of transmission for each age group, using `transmission_probability_matrix`. These all have the same transmission probability - 0.05 (1 in 20). + +```{r} +#| label: homogenous-transmission +mat_05 <- matrix(0.05, nrow = 9, ncol = 9) +transmission_matrix <- transmission_probability_matrix( + home = mat_05, + work = mat_05, + school = mat_05, + other = mat_05, + age_breaks = age_breaks_0_80_plus +) + +transmission_matrix +``` + +We also need to set up our population structures. We'll have all the S states, then I, then R. Since we're using `deSolve` to solve this system, we need to make sure this order stays the same throughout! + +```{r} +#| label: initial-condition +S0 <- rep(999, times = 9) +I0 <- rep(1, times = 9) +R0 <- rep(0, times = 9) +initial_condition <- c(S0, I0, R0) +names(initial_condition) <- paste( + rep(c("S0", "I0", "R0"), each = 9), + age_breaks_0_80_plus[1:9], + sep = "_" +) + +initial_condition +``` + +For an SIR model, we need to compute the *force of infection*, which is +$$\lambda(t) = \beta I(t).$$ The $\beta$ term is the product of the probability of infection given contact, and the probability of contact, for which we can use the matrices we have just defined: + +```{r} +#| label: parameters +parameters <- list( + "transmission_matrix" = transmission_matrix, + "homogeneous_contact" = homogeneous_contact, + "gamma" = 1, + "s_indexes" = 1:9, + "i_indexes" = 10:18, + "r_indexes" = 19:27 +) + +parameters +``` + +Now we construct a function for the age structured SIR model, to pass to deSolve. This calculates the force of infection for each setting. + +```{r} +age_structured_sir <- function(time, state, parameters) { + # Calculate the force of infection for each setting: + # unstructured SIR beta is age_group_n / pop_n + + N_by_age <- map_dbl( + .x = parameters$s_indexes, + .f = function(i) { + current_indexes_to_sum <- c( + parameters$s_indexes[i], + parameters$i_indexes[i], + parameters$r_indexes[i] + ) + sum(state[current_indexes_to_sum]) + } + ) + + # normalise by the age population + N_infected_by_age <- state[parameters$i_indexes] / N_by_age + + # functional method for takign the product of two matrices + product <- function(transmission, contact) { + map2(transmission, contact, `*`) + } + + age_normalise <- function(beta) { + # matrix multiply by infected and normalise by age population + map(beta, function(beta) { + beta %*% N_infected_by_age + }) + } + + lambdas <- tibble( + setting = names(parameters$transmission_matrix), + transmission_matrix = parameters$transmission_matrix, + homogeneous_contact = parameters$homogeneous_contact[1:4] + ) %>% + mutate( + beta = product(transmission_matrix, homogeneous_contact), + lambda = age_normalise(beta) + ) + + # Combine them all into one term for ease of computation + lambda_total <- Reduce("+", lambdas$lambda) + + # Don't forget to normalise your infection rate by the population! + dSdt <- -lambda_total * state[parameters$s_indexes] + + dIdt <- lambda_total * state[parameters$s_indexes] - + parameters$gamma * state[parameters$i_indexes] + + dRdt <- parameters$gamma * state[parameters$i_indexes] + + return( + list( + c( + dSdt, + dIdt, + dRdt + ) + ) + ) +} +``` + +Then we solve the ODE like so: + +```{r} +#| label: solve-ode-homogeneous +times <- seq(0, 20, by = 0.1) +homogeneous_soln <- ode( + y = initial_condition, + times = times, + func = age_structured_sir, + parms = parameters +) + +# Have to convert ode output to a data frame to do any plotting +homogeneous_soln <- as.data.frame(homogeneous_soln) %>% as_tibble() +``` + +Now, let's compare this to an SIR model with no stratification - as in, no age groups: + +```{r} +#| label: standard-sir +parameters_sir <- c("beta" = 1.8, "gamma" = 1) +initial_condition_sir <- c("S" = 8991, "I" = 9, "R" = 0) + +sir <- function(time, state, parameters) { + N <- sum(state) + lambda_total <- parameters["beta"] * state["I"] + dSdt <- -lambda_total / N * state["S"] + dIdt <- parameters["beta"] / N * state["S"] * state["I"] - parameters["gamma"] * state["I"] + dRdt <- parameters["gamma"] * state["I"] + + return(list(c(dSdt, dIdt, dRdt))) +} + +sir_soln <- ode( + y = initial_condition_sir, + times = times, + func = sir, + parms = parameters_sir +) + +sir_soln <- as_tibble(as.data.frame(sir_soln)) +``` + + + +```{r} +#| label: plot-standard-sir +ungrouped_structure <- sir_soln %>% + pivot_longer(cols = -time) + +# we are going to tidy up ODE output a few times, so wrap it into a function: +tidy_ode <- function(ode_soln) { + ode_soln %>% + pivot_longer(cols = -time) %>% + mutate(parent_state = substr(name, 1, 1)) %>% + group_by(time, parent_state) %>% + summarise(value = sum(value)) %>% + ungroup() %>% + rename(name = parent_state) +} + +# For the stratified model, we have to add up all the age categories together for a fair comparison. +grouped_structure <- tidy_ode(homogeneous_soln) + +combined_solutions <- bind_rows( + "non_structured" = ungrouped_structure, + "age_structured" = grouped_structure, + .id = "type" +) %>% + mutate( + name = factor(name, levels = c("S", "I", "R")) + ) + +combined_solutions +``` + +Now let's plot these two models approaches - the age structure and the non age structured: + +```{r} +#| label: plot-both-models +gg_combined_solutions <- ggplot( + combined_solutions, + aes(x = time, y = value, colour = name, linetype = type) +) + + geom_line() + + labs(x = "Time", y = "Value", colour = "State", linetype = "Model") + +gg_combined_solutions +``` + +Voila! These lines are the same! We can double check this by plotting them as facets: + +```{r} +#| label: facetted-sir +gg_combined_solutions + facet_wrap(~type) +``` + +So, we have successfully collapsed our stratified model down to the non-stratified model, which is a great sense check for every time you write out a complicated model. + +# Comparison to other age matrices + +Now that we've established an age-structured SIR model, we can repeat the process with `conmat` matrices. This process is the same as in the vignette, "Data Sources". + +```{r setup-aus-matrices} +world_data <- socialmixr::wpp_age() %>% + mutate( + new_lower_age = if_else(lower.age.limit >= 75, 75L, lower.age.limit) + ) %>% + group_by(new_lower_age, country, year) %>% + summarise( + population = sum(population) + ) +germany_2015 <- age_population( + data = world_data, + location_col = country, + location = "Germany", + age_col = new_lower_age, + year_col = year, + year = 2015 +) + +germany_2015 +``` + +Now let's construct a non-homogenous contact matrix, and transmission probability matrix from the data we have on Germany. + +```{r conmat-germany} +age_breaks_socialmixr <- c(seq(0, 75, by = 5), Inf) + +germany_contacts <- extrapolate_polymod( + population = germany_2015, + age_breaks = age_breaks_socialmixr +) + +n_finite_states <- length(age_breaks_socialmixr) - 1 +socialmixr_matrix <- matrix(0.1761765, + nrow = n_finite_states, + ncol = n_finite_states +) + +transmission_matrix <- transmission_probability_matrix( + home = socialmixr_matrix, + work = socialmixr_matrix, + school = socialmixr_matrix, + other = socialmixr_matrix, + age_breaks = age_breaks_socialmixr +) + +parameters <- list( + "transmission_matrix" = transmission_matrix, + "homogeneous_contact" = germany_contacts, + "gamma" = 1, + "s_indexes" = 1:n_finite_states, + "i_indexes" = (n_finite_states + 1):(2 * n_finite_states), + "r_indexes" = (2 * n_finite_states + 1):(3 * n_finite_states) +) + +S0 <- germany_2015$population +I0 <- rep(1, times = n_finite_states) +R0 <- rep(0, times = n_finite_states) +initial_condition <- c(S0, I0, R0) +names(initial_condition) <- paste( + rep(c("S0", "I0", "R0"), each = n_finite_states), + age_breaks_socialmixr[1:n_finite_states], + sep = "_" +) +``` + +Then, similar to above, we solve the ODE + +```{r solve-ode-germany} +times <- seq(0, 100, by = 0.1) +germany_soln <- ode( + y = initial_condition, + times = times, + func = age_structured_sir, + parms = parameters +) + +# Have to convert ode output to a data frame to do any plotting +germany_soln <- as_tibble(as.data.frame(germany_soln)) + +head(germany_soln) +tail(germany_soln) + +germany_soln_long <- germany_soln %>% + tidy_ode() %>% + mutate(type = "age_structured") + +germany_soln_long +``` + +```{r germany-sir-plot} +gg_germany_sir <- ggplot( + germany_soln_long, + aes(x = time, y = value / sum(initial_condition), colour = name) +) + + geom_line() + + labs(x = "Time", y = "Proportion") + +gg_germany_sir +``` + +Let's compare to the Prem matrices. Prem only has 16 age classes so we do need to re-do our population. + +```{r load-prem} +# NOTE - consider ways to present this data nicer +# str(prem_germany_contact_matrices) +as_setting_prediction_matrix( + prem_germany_contact_matrices, + age_breaks = seq(0, 80, by = 5) +) +``` + +So we go through a similar process, setting up parameters, and solving the ODE + +```{r setup-prem} +parameters_prem <- list( + "transmission_matrix" = transmission_matrix, + "homogeneous_contact" = prem_germany_contact_matrices, + "gamma" = 1, + "s_indexes" = 1:n_finite_states, + "i_indexes" = (n_finite_states + 1):(2 * n_finite_states), + "r_indexes" = (2 * n_finite_states + 1):(3 * n_finite_states) +) + +prem_soln <- ode( + y = initial_condition, + times = times, + func = age_structured_sir, + parms = parameters_prem +) + +# Have to convert ode output to a data frame to do any plotting +prem_soln <- as_tibble(as.data.frame(prem_soln)) + +tail(prem_soln) +``` + +```{r conmat-prem-plot} +germany_aggregated <- tidy_ode(germany_soln) + +# For the stratified model, we have to add up all the age categories together for a fair comparison. +prem_aggregated <- tidy_ode(prem_soln) + +conmat_prem_soln <- bind_rows( + conmat = germany_aggregated, + prem = prem_aggregated, + .id = "type" +) %>% + mutate(name = factor(name, levels = c("S", "I", "R"))) + +head(conmat_prem_soln) +tail(conmat_prem_soln) +``` + +```{r plot-conmat-prem} +ggplot(conmat_prem_soln, aes(x = time, y = value, colour = type)) + + geom_line() + + labs(x = "Time", y = "Value", colour = "Model") + + facet_wrap(~name, nrow = 1) +``` + +These are really different, but we have to be careful about why. The contact matrices might refer to the same quantity, but if we dive a little deeper, we find out that might not be the case... + +## Calculating reproductive number - R0 + +To fairly compare a dynamic disease model that differs _only_ by it's contact matrices, it's important to remember that the $(i,j)$th element of one of these matrices is the *relative* number of contacts between individuals of age $i$ and age $j$. But, what the number is relative to might be different, and this will lead to different basic reproduction numbers, which will give misleading model conclusions. + +At this point, it is important to point out the two definitions of a next generation matrix. + +1. The next generation of the offspring distribution assuming infinite lifetime, which probabilists will be used to, and +2. The number of newly infected individuals over the course of one generation of infections, which infectious diseases modellers will be used to. + +`conmat` calculates the first of these in it's functions (such as `generate_ngm`), hence why the arguments to these functions have no concept of an infectious period (which is analogous to 'death' in a branching process). + +Following the approach of Diekmann, Heesterbrook and Roberts (2009), one can think of the NGM generated by `conmat` as only the transmissions term of Equation 2.9. So, to ensure both models have the same value of $R_0$, we can multiply each matrix by a scaling factor to give a target $R_0$. + +To target $R_0=1.5$ for example, +```{r calculate-r0-prem-fun} +calculate_R0 <- function(multiplier, transmission_matrices, contact_matrices) { + total_matrix <- transmission_matrices$home * contact_matrices$home + + transmission_matrices$work * contact_matrices$work + + transmission_matrices$school * contact_matrices$school + + transmission_matrices$other * contact_matrices$other + + abs(Re(eigen(total_matrix * multiplier)$values[1]) - 1.5) +} + +scaling_factor <- function(contact_matrix) { + optimize( + f = calculate_R0, + interval = c(0.001, 5), + transmission_matrices = transmission_matrix, + contact_matrices = contact_matrix + ) +} + +scaling_factor_prem <- scaling_factor(prem_germany_contact_matrices) +scaling_factor_socialmixr <- scaling_factor(germany_contacts) + +scaling_factor_prem$minimum +scaling_factor_socialmixr$minimum +``` + +We can adjust our contact matrices with these factors, and then our R0s will be the same, meaning that the only difference between the two models should be differences in the contact matrices. + +```{r adjust-contact-matrices} +prem_germany_contact_matrices <- lapply(prem_germany_contact_matrices, `*`, scaling_factor_prem$minimum) +germany_contacts <- lapply(germany_contacts, `*`, scaling_factor_socialmixr$minimum) + +parameters$homogeneous_contact <- germany_contacts +germany_soln <- ode( + y = initial_condition, + times = times, + func = age_structured_sir, + parms = parameters +) + +parameters$homogeneous_contact <- prem_germany_contact_matrices +prem_soln <- ode( + y = initial_condition, + times = times, + func = age_structured_sir, + parms = parameters +) + +germany_aggregated <- tidy_ode(as_tibble(as.data.frame(germany_soln))) +prem_aggregated <- tidy_ode(as_tibble(as.data.frame(prem_soln))) + +conmat_prem_soln <- bind_rows( + conmat = germany_aggregated, + prem = prem_aggregated, + .id = "type" +) %>% + mutate(name = factor(name, levels = c("S", "I", "R"))) +``` + + +```{r} +library(scales) +conmat_prem_soln %>% + filter(time <= 50) %>% + ggplot(aes(x = time, y = value, colour = type)) + + geom_line() + + facet_wrap(~name, + ncol = 1, + scales = "free_y", + labeller = labeller( + name = c( + S = "Susceptible", + I = "Infected", + R = "Recovered" + ) + ) + ) + + scale_y_continuous( + labels = label_number(scale_cut = cut_si("")), + n.breaks = 3 + ) + + scale_colour_brewer(palette = "Dark2") + + labs(x = "Time", y = "Population", colour = "Model") + + theme_minimal() +``` + + +So now we have as fair of a comparison of the two matrices as we will get, and yet, there are significant differences in the dynamics of the two models. diff --git a/vignettes/visualising-conmat.Rmd b/vignettes/visualising-conmat.Rmd new file mode 100644 index 00000000..5c144efd --- /dev/null +++ b/vignettes/visualising-conmat.Rmd @@ -0,0 +1,85 @@ +--- +title: "Visualisation gallery" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Visualisation gallery} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.width = 8, + fig.height = 8, + dev = "png" +) +``` + +```{r setup} +library(conmat) +``` + +This vignette will contain various visualisations that you can perform with `conmat`. For the most part we have tried to make `autoplot` work for most of the matrix type objects. As time goes on we will include other visualisations here. + +# extrapolate polymod + +```{r} +perth <- abs_age_lga("Perth (C)") + +perth_contact <- extrapolate_polymod( + perth +) + +autoplot(perth_contact) +``` + +## For interest's sake: visualising the empirical contact rate data + +```{r} +library(dplyr) +library(ggplot2) +# visualise empirical contact rate estimates +bind_rows( + home = get_polymod_contact_data("home"), + school = get_polymod_contact_data("school"), + work = get_polymod_contact_data("work"), + other = get_polymod_contact_data("other"), + .id = "setting" +) %>% + mutate( + rate = contacts / participants, + setting = factor( + setting, + levels = c( + "home", "school", "work", "other" + ) + ) + ) %>% + group_by( + setting + ) %>% + mutate( + `relative contact rate` = rate / max(rate) + ) %>% + ungroup() %>% + ggplot( + aes( + x = age_from, + y = age_to, + fill = `relative contact rate` + ) + ) + + facet_wrap( + ~setting, + ncol = 2, + scales = "free" + ) + + geom_tile() + + scale_fill_distiller( + direction = 1, + trans = "sqrt" + ) + + theme_minimal() +```