diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..44c3a9e --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,8 @@ +^CohortSurvival\.Rproj$ +^\.Rproj\.user$ +^README\.Rmd$ +^LICENSE\.md$ +^\.github$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 0000000..ed7650c --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,48 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + release: + types: [published] + workflow_dispatch: + +name: pkgdown + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.4.1 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.github/workflows/r-cmd-check-ubuntu.yaml b/.github/workflows/r-cmd-check-ubuntu.yaml new file mode 100644 index 0000000..f30453c --- /dev/null +++ b/.github/workflows/r-cmd-check-ubuntu.yaml @@ -0,0 +1,29 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, develop] + pull_request: + branches: [main, develop] + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a992cae --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.Rproj.user +inst/doc +docs diff --git a/CohortSurvival.Rproj b/CohortSurvival.Rproj new file mode 100644 index 0000000..69fafd4 --- /dev/null +++ b/CohortSurvival.Rproj @@ -0,0 +1,22 @@ +Version: 1.0 + +RestoreWorkspace: No +SaveWorkspace: No +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes +LineEndingConversion: Posix + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..9c8a03e --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,40 @@ +Package: CohortSurvival +Title: What the Package Does (One Line, Title Case) +Version: 0.0.0.9000 +Authors@R: + person("First", "Last", , "first.last@example.com", role = c("aut", "cre"), + comment = c(ORCID = "YOUR-ORCID-ID")) +Description: What the package does (one paragraph). +License: Apache License (>= 2) +Encoding: UTF-8 +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.2.3 +Imports: + CDMConnector, + checkmate, + cli, + DBI, + dplyr, + dbplyr, + duckdb, + magrittr, + lubridate, + PatientProfiles, + rlang (>= 0.4.11), + survival, + tibble, + tidyr, + scales, + stringr, + methods +Suggests: + testthat (>= 3.0.0), + CodelistGenerator, + roxygen2, + knitr, + tictoc, + rmarkdown, + ggplot2 +Config/testthat/edition: 3 +VignetteBuilder: knitr +URL: https://darwin-eu.github.io/CohortSurvival/ diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..b62a9b5 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,194 @@ +Apache License +============== + +_Version 2.0, January 2004_ +_<>_ + +### Terms and Conditions for use, reproduction, and distribution + +#### 1. Definitions + +“License” shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +“Licensor” shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +“Legal Entity” shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, “control” means **(i)** the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the +outstanding shares, or **(iii)** beneficial ownership of such entity. + +“You” (or “Your”) shall mean an individual or Legal Entity exercising +permissions granted by this License. + +“Source” form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +“Object” form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +“Work” shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +“Derivative Works” shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +“Contribution” shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +“submitted” means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as “Not a Contribution.” + +“Contributor” shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +#### 2. Grant of Copyright License + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +#### 3. Grant of Patent License + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +#### 4. Redistribution + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +* **(a)** You must give any other recipients of the Work or Derivative Works a copy of +this License; and +* **(b)** You must cause any modified files to carry prominent notices stating that You +changed the files; and +* **(c)** You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +* **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. + +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +#### 5. Submission of Contributions + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +#### 6. Trademarks + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +#### 7. Disclaimer of Warranty + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +#### 8. Limitation of Liability + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +#### 9. Accepting Warranty or Additional Liability + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +_END OF TERMS AND CONDITIONS_ + +### APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets `[]` replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same “printed page” as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..2cbc17c --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,27 @@ +# Generated by roxygen2: do not edit by hand + +export("%>%") +export(":=") +export(.data) +export(addCohortSurvival) +export(as_label) +export(as_name) +export(benchmarkCohortSurvival) +export(enquo) +export(enquos) +export(estimateSingleEventSurvival) +export(generateDeathCohortSet) +export(mockMGUS2cdm) +export(plotCumulativeIncidence) +export(plotSurvival) +export(summariseCharacteristics) +export(survivalParticipants) +importFrom(PatientProfiles,summariseCharacteristics) +importFrom(magrittr,"%>%") +importFrom(rlang,":=") +importFrom(rlang,.data) +importFrom(rlang,.env) +importFrom(rlang,as_label) +importFrom(rlang,as_name) +importFrom(rlang,enquo) +importFrom(rlang,enquos) diff --git a/R/CohortSurvival-package.R b/R/CohortSurvival-package.R new file mode 100644 index 0000000..621f999 --- /dev/null +++ b/R/CohortSurvival-package.R @@ -0,0 +1,8 @@ +#' @keywords internal +"_PACKAGE" + +## usethis namespace: start +#' @importFrom rlang .data +#' @importFrom rlang .env +## usethis namespace: end +NULL diff --git a/R/addCohortSurvival.R b/R/addCohortSurvival.R new file mode 100644 index 0000000..63b3d42 --- /dev/null +++ b/R/addCohortSurvival.R @@ -0,0 +1,254 @@ + + +#' Add survival information to a cohort table +#' @param x cohort table to add survival information +#' @param cdm CDM reference +#' @param outcomeCohortTable The outcome cohort table of interest. +#' @param outcomeCohortId ID of event cohorts to include. Only one outcome +#' (and so one ID) can be considered. +#' @param outcomeDateVariable Variable containing date of outcome event +#' @param censorOnCohortExit If TRUE, an individual's follow up will be +#' censored at their cohort exit +#' @param censorOnDate if not NULL, an individual's follow up will be censored +#' at the given date +#' @param followUpDays Number of days to follow up individuals (lower bound 1, +#' upper bound Inf) +#' +#' @return Two additional columns will be added to x. The "time" column will +#' contain number of days to censoring. The "status" column will indicate +#' whether the patient had the event (value: 1), or did not have the event +#' (value: 0) +#' @export +#' +#' @examples +#' cohort <- dplyr::tibble( +#' cohort_definition_id = c(1,1,1), +#' subject_id = c(1,2,3), +#' cohort_start_date = c(as.Date("2020-01-01"), +#' as.Date("2020-01-02"), +#' as.Date("2020-01-01")), +#' cohort_end_date = c(as.Date("2020-04-01"), +#' as.Date("2020-08-02"), +#' as.Date("2020-03-01")) +#' ) +#' cdm <- PatientProfiles::mockPatientProfiles(cohort1 = cohort) +#' cdm$cohort1 <- cdm$cohort1 %>% +#' addCohortSurvival( +#' cdm = cdm, +#' outcomeCohortTable = "cohort2", +#' outcomeCohortId = 1 +#' ) +#' +addCohortSurvival <- function(x, + cdm, + outcomeCohortTable, + outcomeCohortId = 1, + outcomeDateVariable = "cohort_start_date", + censorOnCohortExit = FALSE, + censorOnDate = NULL, + followUpDays = Inf) { + + validateExtractSurvivalInputs( + cdm = cdm, + cohortTable = x, + outcomeCohortTable = outcomeCohortTable, + outcomeCohortId = outcomeCohortId, + censorOnCohortExit = censorOnCohortExit, + censorOnDate = censorOnDate, + followUpDays = followUpDays + ) + + # drop columns if they already exist + x <- x %>% + dplyr::select(!dplyr::any_of(c("days_to_exit", + "time", + "status"))) + + # get time to end of observation period + x <- x %>% + PatientProfiles::addFutureObservation( + cdm = cdm, + indexDate = "cohort_start_date", + futureObservationName = "days_to_exit" + ) %>% CDMConnector::computeQuery() + + # get any events before or after index date + x <- x %>% + PatientProfiles::addCohortIntersectFlag( + cdm = cdm, + indexDate = "cohort_start_date", + targetCohortTable = outcomeCohortTable, + targetCohortId = outcomeCohortId, + window = c(-Inf,-1), + nameStyle = "event_in_washout" + ) %>% + PatientProfiles::addCohortIntersectDays( + cdm = cdm, + indexDate = "cohort_start_date", + targetCohortTable = outcomeCohortTable, + targetCohortId = outcomeCohortId, + targetDate = outcomeDateVariable, + window = c(0, Inf), + nameStyle = "days_to_event" + ) %>% CDMConnector::computeQuery() + + # whatever comes first + + # censor at first of + # 1) outcome, + # 2) end of observation period + # 3) cohort exit (if censorOnCohortExit is TRUE) + # 4) followUpDays (if followUpDays is not Inf) + + if (isTRUE(censorOnCohortExit)) { + x <- x %>% + dplyr::mutate(days_end_cohort = CDMConnector::datediff( + "cohort_start_date", "cohort_end_date")) %>% + dplyr::mutate(days_to_event = dplyr::if_else( + .data$days_to_event <= .data$days_end_cohort, + .data$days_to_event, as.numeric(NA) + )) %>% + dplyr::mutate(days_to_exit = dplyr::if_else( + .data$days_to_exit < .data$days_end_cohort, + .data$days_to_exit, .data$days_end_cohort + )) %>% + dplyr::select(-.data$days_end_cohort) %>% + CDMConnector::computeQuery() + } + + if (!is.null(censorOnDate)) { + x <- x %>% + dplyr::mutate(censor_date = .env$censorOnDate) %>% + dplyr::mutate(days_to_censor = CDMConnector::datediff( + "cohort_start_date", "censor_date" + )) %>% + dplyr::mutate(days_to_event = dplyr::if_else( + .data$days_to_event >= .data$days_to_censor, + as.numeric(NA), .data$days_to_event + )) %>% + dplyr::mutate(days_to_exit = dplyr::if_else( + .data$days_to_exit < .data$days_to_censor, + .data$days_to_exit, .data$days_to_censor + )) %>% + dplyr::select(- c(.data$days_to_censor, .data$censor_date)) %>% + CDMConnector::computeQuery() + } + + if (followUpDays != Inf) { + x <- x %>% + dplyr::mutate(days_to_event = dplyr::if_else( + .data$days_to_event <= .env$followUpDays, + .data$days_to_event, as.numeric(NA) + )) %>% + dplyr::mutate(days_to_exit = dplyr::if_else( + .data$days_to_exit < .env$followUpDays, + .data$days_to_exit, .env$followUpDays + )) %>% + CDMConnector::computeQuery() + } + + # now just using days_to_event and days_to_exit + # add status variable (1 if event, 0 if not) + # add time variable (days to event for those with event, + # days to exit if no event) + x <- x %>% + dplyr::mutate(status = dplyr::if_else( + !is.na(.data$days_to_event), 1, 0 + )) %>% + dplyr::mutate(time = dplyr::if_else(.data$status == 1, + .data$days_to_event, .data$days_to_exit + )) + + # for anyone with an outcome in the washout + # we keep them, but their time and event will be set to NA + # (ie they won't contribute to any analysis) + x <- x %>% + dplyr::mutate( + status = dplyr::if_else(.data$event_in_washout == 1, NA, + .data$status + ), + time = dplyr::if_else(.data$event_in_washout == 1, NA, + .data$time + ) + ) + # likewise if we censor on a date prior to their cohort start date + if(!is.null(censorOnDate)) { + x <- x %>% + dplyr::mutate( + status = dplyr::if_else(.data$cohort_start_date > .env$censorOnDate, NA, + .data$status + ), + time = dplyr::if_else(.data$cohort_start_date > .env$censorOnDate, NA, + .data$time + ) + ) + } + + # for anyone with outcome in washout, set to NA + x <- x %>% + dplyr::mutate( + status = dplyr::if_else(.data$event_in_washout == 1, + NA, .data$status + ), + time = dplyr::if_else(.data$event_in_washout == 1, + NA, .data$time + ) + ) + + x <- x %>% + dplyr::select(!c("event_in_washout", "days_to_event")) + + return(x) +} + + +validateExtractSurvivalInputs <- function(cdm, + cohortTable, + outcomeCohortTable, + outcomeCohortId, + censorOnCohortExit, + censorOnDate, + followUpDays) { + checkCdm(cdm, tables = c( + "person", "observation_period", + outcomeCohortTable + )) + +# checkIsCohort(cohortTable) # change when mock is changed + checkIsCohort_exp(cohortTable) + checkPatientRows(cohortTable) + checkExposureCohortId(cohortTable) + + checkIsCohort(cdm[[outcomeCohortTable]]) + + errorMessage <- checkmate::makeAssertCollection() + checkmate::assertIntegerish(outcomeCohortId, + len = 1, + add = errorMessage + ) + checkmate::assert_logical(censorOnCohortExit) + checkmate::assert_date(censorOnDate, null.ok = TRUE) + if (followUpDays != Inf) { + checkmate::assert_integerish(followUpDays, + len = 1, + lower = 1, + add = errorMessage + ) + } + checkmate::reportAssertions(collection = errorMessage) + + + + # check specified cohort is in cohort table + errorMessage <- checkmate::makeAssertCollection() + if (!is.null(outcomeCohortId)) { + checkmate::assertTRUE( + checkCohortId( + cohort = cdm[[outcomeCohortTable]], + cohortId = outcomeCohortId + ), + add = errorMessage + ) + } + return(checkmate::reportAssertions(collection = errorMessage)) +} diff --git a/R/benchmarkCohortSurvival.R b/R/benchmarkCohortSurvival.R new file mode 100644 index 0000000..7e675eb --- /dev/null +++ b/R/benchmarkCohortSurvival.R @@ -0,0 +1,496 @@ +#' Estimate performance of estimateSurvival function for benchmarking +#' +#' @param cdm CDM reference +#' @param targetSize number of people in the target cohort table +#' @param outcomeSize number of people in the outcome cohort table +#' @param outcomeDateVariable Variable containing date of outcome event +#' @param competingOutcomeSize number of people in the competing outcome cohort table +#' @param competingOutcomeDateVariable Variable containing date of +#' competing event +#' @param censorOnCohortExit If TRUE, an individual's follow up will be +#' censored at their cohort exit +#' @param censorOnDate if not NULL, an individual's follow up will be censored +#' at the given date +#' @param followUpDays Number of days to follow up individuals (lower bound 1, +#' upper bound Inf) +#' @param strata strata +#' @param timeGap Days between time points for which to report survival +#' estimates. First day will be day zero with risk estimates provided +#' for times up to the end of follow-up, with a gap in days equivalent +#' to timeGap. +#' @param times vector of time points at which to give survival estimates, +#' if NULL estimates at all times are calculated +#' @param minCellCount The minimum number of events to reported, below which +#' results will be obscured. If 0, all results will be reported. +#' @param returnParticipants Either TRUE or FALSE. If TRUE, references to +#' participants from the analysis will be returned allowing for further +#' analysis. +#' +#' @return tibble with performance of estimateSurvival function information, +#' according to the selected input parameters +#' @export +#' @importFrom tictoc +#' +#' @examples +#' cdm <- mockMGUS2cdm() +#' cdm$condition_occurrence <- cdm$death_cohort %>% +#' dplyr::rename("condition_start_date" = "cohort_start_date", +#' "condition_end_date" = "cohort_end_date") +#' surv_timings <- benchmarkCohortSurvival( +#' cdm, targetSize = 100, outcomeSize = 20) +#' +benchmarkCohortSurvival <- function(cdm, + targetSize, + outcomeSize, + outcomeDateVariable = "cohort_start_date", + competingOutcomeSize = NULL, + competingOutcomeDateVariable = "cohort_start_date", + censorOnCohortExit = FALSE, + censorOnDate = NULL, + followUpDays = Inf, + strata = NULL, + timeGap = c(1, 7, 30, 365), + times = NULL, + minCellCount = 5, + returnParticipants = FALSE) { + + # check input + errorMessage <- checkmate::makeAssertCollection() + + checkCdm(cdm = cdm, tables = c( + "person", "observation_period" + )) + checkmate::assertIntegerish(targetSize, + len = 1, + lower = 1, + add = errorMessage) + checkmate::assertIntegerish(outcomeSize, + len = 1, + lower = 1, + add = errorMessage) + checkmate::assertIntegerish(competingOutcomeSize, + len = 1, + lower = 1, + null.ok = TRUE, + add = errorMessage) + checkmate::assertCharacter(outcomeDateVariable, + len = 1, + add = errorMessage) + checkmate::assertCharacter(competingOutcomeDateVariable, + len = 1, + add = errorMessage) + checkmate::assertLogical(censorOnCohortExit, + len = 1, + add = errorMessage) + if(!is.null(censorOnDate)) { + checkdate <- censorOnDate %>% inherits("Date") + if(!checkdate) { + cli::cli_abort("{censorOnDate} is neither NULL nor of type Date") + } + } + if(followUpDays != "Inf") { + checkmate::assertIntegerish(followUpDays, + len = 1, + lower = 0, + add = errorMessage + ) + } + checkmate::assertIntegerish(timeGap, + lower = 1, + add = errorMessage + ) + checkmate::assertIntegerish(times, + lower = 0, + null.ok = TRUE, + add = errorMessage + ) + checkmate::assertIntegerish(minCellCount, + len = 1, + lower = 0, + null.ok = FALSE, + add = errorMessage + ) + checkmate::assertLogical(returnParticipants, + len = 1, + add = errorMessage) + + checkmate::reportAssertions(collection = errorMessage) + + # create cohorts + timings <- list() + tictoc::tic() + + targetCohortTable <- "benchmark_target" + target_cohort <- cdm$person %>% + dplyr::slice_sample(n = targetSize) %>% + dplyr::inner_join(cdm$observation_period, by = "person_id") %>% + dplyr::mutate(cohort_definition_id = 1) %>% + dplyr::select( + "subject_id" = "person_id", + "cohort_definition_id", + "cohort_start_date" = "observation_period_start_date", + "cohort_end_date" = "observation_period_end_date" + ) %>% + PatientProfiles::addDemographics(cdm) %>% + dplyr::collect() + + checkStrata(strata, target_cohort) + targetCohortId <- 1 + + target_cohort <- addCohortCountAttr(target_cohort) + DBI::dbWithTransaction(attr(cdm, "dbcon"), { + DBI::dbWriteTable(attr(cdm, "dbcon"), + name = DBI::Id( + schema = attr(cdm, "write_schema"), + table = targetCohortTable), + target_cohort, + overwrite = TRUE + ) + }) + + t <- tictoc::toc(quiet = TRUE) + timings[["target_cohort"]] <- dplyr::tibble( + task = paste0("generating target cohort size ",targetSize), + time_taken_secs = as.numeric(t$toc - t$tic) + ) + tictoc::tic() + + cdm <- CDMConnector::cdm_from_con(attr(cdm, "dbcon"), + attr(cdm, "cdm_schema"), + attr(cdm, "write_schema"), + cohort_tables = c(targetCohortTable), + cdm_name = "benchmark") + + outcomeCohortTable <- "benchmark_outcome" + min_obs <- cdm[[targetCohortTable]] %>% + dplyr::select("cohort_start_date") %>% + dplyr::pull() %>% min() + max_obs <- cdm[[targetCohortTable]] %>% + dplyr::select("cohort_end_date") %>% + dplyr::pull() %>% max() + outcome_cohort <- dplyr::tibble( + subject_id = cdm[[targetCohortTable]] %>% + dplyr::select("subject_id") %>% + dplyr::pull() %>% + sample(outcomeSize, replace = TRUE), + cohort_definition_id = 1, + cohort_start_date = sample(seq(as.Date(min_obs), as.Date(max_obs), by="day"), outcomeSize), + cohort_end_date = .data$cohort_start_date + ) + + columnCheck <- outcomeDateVariable %in% colnames(outcome_cohort) + if(!columnCheck) { + cli::cli_abort("{outcomeDateVariable} must be `cohort_start_date` or `cohort_end_date`") + } + outcomeCohortId <- 1 + + outcome_cohort <- addCohortCountAttr(outcome_cohort) + DBI::dbWithTransaction(attr(cdm, "dbcon"), { + DBI::dbWriteTable(attr(cdm, "dbcon"), + name = DBI::Id( + schema = attr(cdm, "write_schema"), + table = outcomeCohortTable), + outcome_cohort, + overwrite = TRUE + ) + }) + + t <- tictoc::toc(quiet = TRUE) + timings[["outcome_cohort"]] <- dplyr::tibble( + task = paste0("generating outcome cohort size ",outcomeSize), + time_taken_secs = as.numeric(t$toc - t$tic) + ) + tictoc::tic() + + if(!is.null(competingOutcomeSize)) { + competingOutcomeCohortTable <- "benchmark_competing_outcome" + competing_outcome_cohort <- dplyr::tibble( + subject_id = cdm[[targetCohortTable]] %>% + dplyr::select("subject_id") %>% + dplyr::pull() %>% + sample(competingOutcomeSize, replace = TRUE), + cohort_definition_id = 1, + cohort_start_date = sample(seq(as.Date(min_obs), as.Date(max_obs), by="day"), competingOutcomeSize), + cohort_end_date = .data$cohort_start_date + ) + columnCheck2 <- competingOutcomeDateVariable %in% colnames(competing_outcome_cohort) + if(!columnCheck2) { + cli::cli_abort("{competingOutcomeDateVariable} must be `cohort_start_date` or `cohort_end_date`") + } + competingOutcomeCohortId <- 1 + competing_outcome_cohort <- addCohortCountAttr(competing_outcome_cohort) + DBI::dbWithTransaction(attr(cdm, "dbcon"), { + DBI::dbWriteTable(attr(cdm, "dbcon"), + name = DBI::Id( + schema = attr(cdm, "write_schema"), + table = competingOutcomeCohortTable), + competing_outcome_cohort, + overwrite = TRUE + ) + }) + + cdm <- CDMConnector::cdm_from_con(attr(cdm, "dbcon"), + attr(cdm, "cdm_schema"), + attr(cdm, "write_schema"), + cohort_tables = c(targetCohortTable, + outcomeCohortTable, + competingOutcomeCohortTable), + cdm_name = "benchmark") + # this could be a problem if they have other tables loaded (K) + + + t <- tictoc::toc(quiet = TRUE) + timings[["competing_outcome_cohort"]] <- dplyr::tibble( + task = paste0("generating competing outcome cohort size ",competingOutcomeSize), + time_taken_secs = as.numeric(t$toc - t$tic) + ) + tictoc::tic() + } else { + competingOutcomeCohortTable <- NULL + competingOutcomeCohortId <- 1 + cdm <- CDMConnector::cdm_from_con(attr(cdm, "dbcon"), + attr(cdm, "cdm_schema"), + attr(cdm, "write_schema"), + cohort_tables = c(targetCohortTable, outcomeCohortTable), + cdm_name = "benchmark") + } + + workingExposureTable <- cdm[[targetCohortTable]] + + # addCohortSurvival for primary event of interest + workingExposureTable <- workingExposureTable %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = outcomeCohortTable, + outcomeCohortId = outcomeCohortId, + outcomeDateVariable = outcomeDateVariable, + censorOnCohortExit = censorOnCohortExit, + censorOnDate = censorOnDate, + followUpDays = followUpDays + ) %>% + dplyr::rename( + "outcome_time" = "time", + "outcome_status" = "status" + ) + + t <- tictoc::toc(quiet = TRUE) + timings[["addCohortSurvival_outcome"]] <- dplyr::tibble( + task = "addCohortSurvival info added for outcome", + time_taken_secs = as.numeric(t$toc - t$tic) + ) + tictoc::tic() + + # competing risk (if there is one) + if (!is.null(competingOutcomeCohortTable)) { + workingExposureTable <- workingExposureTable %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = competingOutcomeCohortTable, + outcomeCohortId = outcomeCohortId, + outcomeDateVariable = competingOutcomeDateVariable, + censorOnCohortExit = censorOnCohortExit, + censorOnDate = censorOnDate, + followUpDays = followUpDays + ) %>% + dplyr::rename( + "competing_risk_time" = "time", + "competing_risk_status" = "status" + ) + + t <- tictoc::toc(quiet = TRUE) + timings[["addCohortSurvival_competing_outcome"]] <- dplyr::tibble( + task = "addCohortSurvival info added for competing outcome", + time_taken_secs = as.numeric(t$toc - t$tic) + ) + tictoc::tic() + } + + # collect + survDataDb <- workingExposureTable %>% + dplyr::filter(!is.na(.data$outcome_time) && + !is.na(.data$outcome_status)) + + survData <- survDataDb %>% + dplyr::collect() + + if (!is.null(competingOutcomeCohortTable)) { + # - add competing risk variable + # 0: no event, 2: outcome event, 3: competing risk event + survData <- addCompetingRiskVars( + data = survData, + time1 = "outcome_time", + status1 = "outcome_status", + time2 = "competing_risk_time", + status2 = "competing_risk_status", + nameOutTime = "outcome_or_competing_time", + nameOutStatus = "outcome_or_competing_status" + ) + } + + # time points to extract survival estimates + if(!is.null(times)) { + timepoints <- times + } else { + timepoints <- seq(0, max(survData$outcome_time), by = 1) + } + + # fit survival, with strata + if (is.null(competingOutcomeCohortTable)) { + survivalEstimates <- singleEventSurvival( + survData = survData, + times = timepoints, + variables = strata, + timeGap = timeGap + ) + } else { + survivalEstimates <- competingRiskSurvival( + survData = survData, + times = timepoints, + variables = strata, + timeGap = timeGap + ) + } + + t <- tictoc::toc(quiet = TRUE) + timings[["estimateSurvival"]] <- dplyr::tibble( + task = paste0("estimateSurvival called for specified settings and strata: ",paste0(names(strata), collapse = ", ")), + time_taken_secs = as.numeric(t$toc - t$tic) + ) + tictoc::tic() + + if(nrow(survivalEstimates)>0){ + survivalEstimates <- addCohortDetails( + x = survivalEstimates, + cdm = cdm, + targetCohortId = targetCohortId, + targetCohortTable = targetCohortTable, + outcomeCohortId = outcomeCohortId, + outcomeCohortTable = outcomeCohortTable, + competingOutcomeCohortId = competingOutcomeCohortId, + competingOutcomeCohortTable = competingOutcomeCohortTable) + + + t <- tictoc::toc(quiet = TRUE) + timings[["counts_obscured"]] <- dplyr::tibble( + task = paste0("counts obscured < ",minCellCount), + time_taken_secs = as.numeric(t$toc - t$tic) + ) + tictoc::tic() + + # add attributes + if(isTRUE(returnParticipants)){ + participantsRef <- survDataDb %>% + dplyr::select("cohort_definition_id", + "subject_id", + "cohort_start_date", + "cohort_end_date") %>% + CDMConnector::computeQuery( + name = paste0(attr(cdm, "write_prefix"), "participants"), + FALSE, attr(cdm, "write_schema"), TRUE + ) + + participantsSetRef <- participantsRef %>% + dplyr::select("cohort_definition_id") %>% + dplyr::distinct() %>% + dplyr::mutate(cohort_name = paste0("survival_participants_", + .data$cohort_definition_id)) %>% + CDMConnector::computeQuery( + name = paste0(attr(cdm, "write_prefix"), "participants_set"), + FALSE, attr(cdm, "write_schema"), TRUE + ) + + participantsCountRef <- participantsRef %>% + dplyr::group_by(.data$cohort_definition_id) %>% + dplyr::summarise( + number_records = dplyr::n(), + number_subjects = dplyr::n_distinct(.data$subject_id), + .groups = "drop" + ) %>% + CDMConnector::computeQuery( + name = paste0(attr(cdm, "write_prefix"), "participants_count"), + FALSE, attr(cdm, "write_schema"), TRUE + ) + + attr(survivalEstimates, "participants") <- CDMConnector::newGeneratedCohortSet( + cohortRef = participantsRef, + cohortSetRef = participantsSetRef, + cohortCountRef = participantsCountRef + ) + + } + + attr(survivalEstimates, "events") <- addCohortDetails( + x = attr(survivalEstimates, "events"), + cdm = cdm, + targetCohortId = targetCohortId, + targetCohortTable = targetCohortTable, + outcomeCohortId = outcomeCohortId, + outcomeCohortTable = outcomeCohortTable) + + } + + # combine results + timings <- dplyr::bind_rows(timings) %>% + dplyr::mutate(time_taken_secs = round(.data$time_taken_secs, 2)) %>% + dplyr::mutate(time_taken_mins = round(.data$time_taken_secs / 60, 2)) %>% + dplyr::mutate(time_taken_hours = round(.data$time_taken_mins / 60, 2)) %>% + dplyr::mutate(dbms = CDMConnector::dbms(cdm)) %>% + dplyr::mutate(person_n = cdm$person %>% + dplyr::count() %>% + dplyr::pull()) %>% + dplyr::mutate(db_min_observation_start = cdm$observation_period %>% + dplyr::summarise( + db_min_obs_start = + min(.data$observation_period_start_date, + na.rm = TRUE + ) + ) %>% + dplyr::pull()) %>% + dplyr::mutate(max_observation_end = cdm$observation_period %>% + dplyr::summarise( + max_observation_end = + max(.data$observation_period_end_date, + na.rm = TRUE + ) + ) %>% + dplyr::pull()) + + if (isFALSE(returnParticipants)) { + timings <- timings %>% + dplyr::mutate(with_participants = "No") + } else { + timings <- timings %>% + dplyr::mutate(with_participants = "Yes") + } + + return(timings) +} + +insertTable <- function(x, + cdm, + name, + overwrite = TRUE) { + con <- attr(cdm, "dbcon") + writeSchema <- attr(cdm, "write_schema") + checkTableExist <- name %in% CDMConnector::listTables(con, writeSchema) + if (checkTableExist) { + if (overwrite) { + DBI::dbRemoveTable(con, CDMConnector::inSchema(writeSchema, name)) + } else { + stop(paste0("'", name, "' table already exists.")) + } + } + DBI::dbCreateTable(con, CDMConnector::inSchema(writeSchema, name), x) + DBI::dbAppendTable(con, CDMConnector::inSchema(writeSchema, name), x) + if (methods::is(con, "duckdb_connection")) { + ref <- dplyr::tbl(con, paste(c(writeSchema, name), collapse = ".")) + } else if (length(writeSchema) == 2) { + ref <- dplyr::tbl(con, + dbplyr::in_catalog(writeSchema[[1]], writeSchema[[2]], name)) + } else if (length(writeSchema) == 1) { + ref <- dplyr::tbl(con, dbplyr::in_schema(writeSchema, name)) + } else { + ref <- dplyr::tbl(con, name) + } + return(ref) +} diff --git a/R/estimateSurvival.R b/R/estimateSurvival.R new file mode 100644 index 0000000..823cb2d --- /dev/null +++ b/R/estimateSurvival.R @@ -0,0 +1,923 @@ +#' Estimate survival for a given event of interest using cohorts in the OMOP Common Data Model +#' +#' @param cdm CDM reference +#' @param targetCohortTable targetCohortTable +#' @param targetCohortId targetCohortId +#' @param outcomeCohortTable The outcome cohort table of interest. +#' @param outcomeCohortId ID of event cohorts to include. Only one outcome +#' (and so one ID) can be considered. +#' @param outcomeDateVariable Variable containing date of outcome event +#' @param censorOnCohortExit If TRUE, an individual's follow up will be +#' censored at their cohort exit +#' @param censorOnDate if not NULL, an individual's follow up will be censored +#' at the given date +#' @param followUpDays Number of days to follow up individuals (lower bound 1, +#' upper bound Inf) +#' @param strata strata +#' @param timeGap Days between time points for which to report survival +#' estimates. First day will be day zero with risk estimates provided +#' for times up to the end of follow-up, with a gap in days equivalent +#' to timeGap. +#' @param times vector of time points at which to give survival estimates, +#' if NULL estimates at all times are calculated +#' @param minCellCount The minimum number of events to reported, below which +#' results will be obscured. If 0, all results will be reported. +#' @param returnParticipants Either TRUE or FALSE. If TRUE, references to +#' participants from the analysis will be returned allowing for further +#' analysis. +#' +#' @return tibble with survival information for desired cohort, including: +#' time, people at risk, survival probability, cumulative incidence, +#' 95 CIs, strata and outcome. A tibble with the number of events is +#' outputted as an attribute of the output +#' @export +#' +#' @examples +#' cdm <- mockMGUS2cdm() +#' surv <- estimateSingleEventSurvival(cdm, +#' targetCohortTable = "mgus_diagnosis", +#' targetCohortId = 1, +#' outcomeCohortTable = "death_cohort", +#' outcomeCohortId = 1, +#' timeGap = 7 +#' ) +#' +estimateSingleEventSurvival <- function(cdm, + targetCohortTable, + targetCohortId = 1, + outcomeCohortTable, + outcomeCohortId = 1, + outcomeDateVariable = "cohort_start_date", + censorOnCohortExit = FALSE, + censorOnDate = NULL, + followUpDays = Inf, + strata = NULL, + timeGap = c(1, 7, 30, 365), + times = NULL, + minCellCount = 5, + returnParticipants = FALSE) { + + estimateSurvival(cdm = cdm, + targetCohortTable = targetCohortTable, + targetCohortId = targetCohortId, + outcomeCohortTable = outcomeCohortTable, + outcomeCohortId = outcomeCohortId, + outcomeDateVariable = outcomeDateVariable, + competingOutcomeCohortTable = NULL, + competingOutcomeCohortId = 1, + competingOutcomeDateVariable = "cohort_start_date", + censorOnCohortExit = censorOnCohortExit, + censorOnDate = censorOnDate, + followUpDays = followUpDays, + strata = strata, + timeGap = timeGap, + times = times, + minCellCount = minCellCount, + returnParticipants = returnParticipants + ) + +} + + +estimateSurvival <- function(cdm, + targetCohortTable, + targetCohortId = 1, + outcomeCohortTable, + outcomeCohortId = 1, + outcomeDateVariable = "cohort_start_date", + competingOutcomeCohortTable = NULL, + competingOutcomeCohortId = 1, + competingOutcomeDateVariable = "cohort_start_date", + censorOnCohortExit = FALSE, + censorOnDate = NULL, + followUpDays = Inf, + strata = NULL, + timeGap = c(1, 7, 30, 365), + times = NULL, + minCellCount = 5, + returnParticipants = FALSE) { + + # check input + errorMessage <- checkmate::makeAssertCollection() + + checkmate::assertCharacter(targetCohortTable, + len = 1, + add = errorMessage) + checkmate::assertCharacter(outcomeCohortTable, + len = 1, + add = errorMessage) + checkmate::assertCharacter(competingOutcomeCohortTable, + len = 1, + null.ok = TRUE, + add = errorMessage) + checkCdm(cdm, tables = c( + "person", "observation_period", + targetCohortTable, + outcomeCohortTable + )) + + checkIsCohort_exp(cdm[[targetCohortTable]]) + checkmate::assertIntegerish(targetCohortId, + len = 1, + lower = 1, + add = errorMessage) + checkStrata(strata, cdm[[targetCohortTable]]) + checkmate::assertIntegerish(outcomeCohortId, + len = 1, + lower = 1, + add = errorMessage) + checkmate::assertIntegerish(competingOutcomeCohortId, + len = 1, + lower = 1, + add = errorMessage) + + checkmate::assertCharacter(outcomeDateVariable, + len = 1, + add = errorMessage) + checkmate::assertCharacter(competingOutcomeDateVariable, + len = 1, + add = errorMessage) + checkmate::assertLogical(censorOnCohortExit, + len = 1, + add = errorMessage) + if(!is.null(censorOnDate)) { + checkdate <- censorOnDate %>% inherits("Date") + if(!checkdate) { + cli::cli_abort("{censorOnDate} is neither NULL nor of type Date") + } + } + if(followUpDays != "Inf") { + checkmate::assertIntegerish(followUpDays, + len = 1, + lower = 0, + add = errorMessage + ) + } + checkmate::assertIntegerish(timeGap, + lower = 1, + add = errorMessage + ) + checkmate::assertIntegerish(times, + lower = 0, + null.ok = TRUE, + add = errorMessage + ) + checkmate::assertIntegerish(minCellCount, + len = 1, + lower = 0, + null.ok = FALSE, + add = errorMessage + ) + checkmate::assertLogical(returnParticipants, + len = 1, + add = errorMessage) + + checkmate::reportAssertions(collection = errorMessage) + + workingExposureTable <- cdm[[targetCohortTable]] %>% + dplyr::filter(.data$cohort_definition_id == .env$targetCohortId) + + attrition <- recordAttrition( + table = workingExposureTable, + id = "subject_id", + reasonId = 1, + reason = "Starting analysis population" + ) + + # addCohortSurvival for primary event of interest + workingExposureTable <- workingExposureTable %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = outcomeCohortTable, + outcomeCohortId = outcomeCohortId, + outcomeDateVariable = outcomeDateVariable, + censorOnCohortExit = censorOnCohortExit, + censorOnDate = censorOnDate, + followUpDays = followUpDays + ) %>% + dplyr::rename( + "outcome_time" = "time", + "outcome_status" = "status" + ) + + # competing risk (if there is one) + if (!is.null(competingOutcomeCohortTable)) { + workingExposureTable <- workingExposureTable %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = competingOutcomeCohortTable, + outcomeCohortId = competingOutcomeCohortId, + outcomeDateVariable = competingOutcomeDateVariable, + censorOnCohortExit = censorOnCohortExit, + censorOnDate = censorOnDate, + followUpDays = followUpDays + ) %>% + dplyr::rename( + "competing_risk_time" = "time", + "competing_risk_status" = "status" + ) + } + + # collect + survDataDb <- workingExposureTable %>% + dplyr::filter(!is.na(.data$outcome_time) && + !is.na(.data$outcome_status)) + + survData <- survDataDb %>% + dplyr::collect() + + attrition <- recordAttrition( + table = survData, + id = "subject_id", + reasonId = 2, + reason = "Outcome status not NA", + existingAttrition = attrition + ) + + if (!is.null(competingOutcomeCohortTable)) { + # - add competing risk variable + # 0: no event, 2: outcome event, 3: competing risk event + survData <- addCompetingRiskVars( + data = survData, + time1 = "outcome_time", + status1 = "outcome_status", + time2 = "competing_risk_time", + status2 = "competing_risk_status", + nameOutTime = "outcome_or_competing_time", + nameOutStatus = "outcome_or_competing_status" + ) + } + + # time points to extract survival estimates + if(!is.null(times)) { + timepoints <- times + } else { + timepoints <- seq(0, max(survData$outcome_time), by = 1) + } + + # fit survival, with strata + if (is.null(competingOutcomeCohortTable)) { + surv <- singleEventSurvival( + survData = survData, + times = timepoints, + variables = strata, + timeGap = timeGap + ) + } else { + surv <- competingRiskSurvival( + survData = survData, + times = timepoints, + variables = strata, + timeGap = timeGap + ) + } + + if(nrow(surv)>0){ + survivalEstimates <- addCohortDetails( + x = surv, + cdm = cdm, + targetCohortId = targetCohortId, + targetCohortTable = targetCohortTable, + outcomeCohortId = outcomeCohortId, + outcomeCohortTable = outcomeCohortTable, + competingOutcomeCohortId = competingOutcomeCohortId, + competingOutcomeCohortTable = competingOutcomeCohortTable) + + survivalEstimates <- survivalEstimates %>% + tidyr::pivot_longer(cols = c("outcome_cohort_name", + "competing_outcome_cohort_name"), + names_to = "variable", + values_to = "variable_level") %>% + dplyr::filter(.data$variable_level != "No competing outcome") %>% + dplyr::mutate(variable = "Outcome") + # %>% + # dplyr::select(!"outcome") + + survivalEstimates <- survivalEstimates %>% + dplyr::select(!"variable_type") %>% + tidyr::pivot_longer( + cols = c( + "n_risk", + "estimate", + "estimate_95CI_lower", + "estimate_95CI_upper" + ), + names_to = "variable_type", + values_to = "estimate" + ) + + survivalEstimates <- var_order(survivalEstimates) %>% + dplyr::distinct() + + + # add attributes + if(isTRUE(returnParticipants)){ + participantsRef <- survDataDb %>% + dplyr::select("cohort_definition_id", + "subject_id", + "cohort_start_date", + "cohort_end_date") %>% + CDMConnector::computeQuery() + + participantsSetRef <- participantsRef %>% + dplyr::select("cohort_definition_id") %>% + dplyr::distinct() %>% + dplyr::mutate(cohort_name = paste0("survival_participants_", + .data$cohort_definition_id)) %>% + dplyr::collect() + + participantsCountRef <- participantsRef %>% + dplyr::group_by(.data$cohort_definition_id) %>% + dplyr::summarise( + number_records = dplyr::n(), + number_subjects = dplyr::n_distinct(.data$subject_id), + .groups = "drop" + ) %>% + dplyr::collect() + + attr(survivalEstimates, "participants") <- CDMConnector::newGeneratedCohortSet( + cohortRef = participantsRef , + cohortSetRef = participantsSetRef, + cohortCountRef = participantsCountRef + ) + + } + + attr(survivalEstimates, "events") <- addCohortDetails( + x = attr(surv, "events"), + cdm = cdm, + targetCohortId = targetCohortId, + targetCohortTable = targetCohortTable, + outcomeCohortId = outcomeCohortId, + outcomeCohortTable = outcomeCohortTable, + competingOutcomeCohortTable = competingOutcomeCohortTable, + competingOutcomeCohortId = competingOutcomeCohortId + ) %>% + dplyr::rename("estimate" = "n_events") %>% + dplyr::mutate( + variable = "Outcome", + variable_level = paste0("timeGap ", timeGap), + estimate_type = "Survival events", + variable_type = "n_events" + ) %>% + dplyr::select(-c("outcome_cohort_name", "competing_outcome_cohort_name", "timeGap")) %>% + var_order() %>% + dplyr::relocate("outcome", + .after = "variable_type") %>% + dplyr::relocate("time", + .after = "outcome") %>% + dplyr::relocate("analysis_type", + .after = "time") %>% + dplyr::relocate("estimate", + .after = "analysis_type") + + survivalEstimates <- dplyr::union_all( + survivalEstimates, + attr(survivalEstimates, "events") + ) + + attr(survivalEstimates, "events") <- NULL + + attr(survivalEstimates, "attrition") <- attrition + + if (is.null(competingOutcomeCohortTable)) { + attr(survivalEstimates, "summary") <- addCohortDetails( + x = attr(surv, "summary"), + cdm = cdm, + targetCohortId = targetCohortId, + targetCohortTable = targetCohortTable, + outcomeCohortId = outcomeCohortId, + outcomeCohortTable = outcomeCohortTable) %>% + dplyr::mutate(analysis_type = "Single event") + + } else { + attr(survivalEstimates, "summary") <- addCohortDetails( + x = attr(surv, "summary"), + cdm = cdm, + targetCohortId = targetCohortId, + targetCohortTable = targetCohortTable, + outcomeCohortId = outcomeCohortId, + outcomeCohortTable = outcomeCohortTable, + competingOutcomeCohortTable = competingOutcomeCohortTable, + competingOutcomeCohortId = competingOutcomeCohortId) %>% + dplyr::mutate(analysis_type = "Competing risk") + } + + attr(survivalEstimates, "summary") <- attr(survivalEstimates, "summary") %>% + dplyr::mutate(result_type = "Survival estimate", + variable = "Outcome", + variable_level = CDMConnector::cohortSet(cdm[[targetCohortTable]]) %>% + dplyr::filter(.data$cohort_definition_id == + .env$targetCohortId) %>% + dplyr::pull("cohort_name"), + estimate_type = "Survival summary") %>% + dplyr::select(-c("outcome_cohort_name", "competing_outcome_cohort_name", "variable_type")) %>% + tidyr::pivot_longer(cols = -c("cdm_name", + "result_type", + "group_name", + "group_level", + "strata_name", + "strata_level", + "variable", + "variable_level", + "estimate_type", + "outcome", + "analysis_type"), + names_to = "variable_type", + values_to = "estimate") %>% + var_order() %>% + dplyr::mutate(time = NA) %>% + dplyr::relocate("outcome", + .after = "variable_type") %>% + dplyr::relocate("time", + .after = "outcome") %>% + dplyr::relocate("analysis_type", + .after = "time") %>% + dplyr::relocate("estimate", + .after = "analysis_type") + + survivalEstimates <- dplyr::union_all( + survivalEstimates, + attr(survivalEstimates, "summary") + ) + + attr(survivalEstimates, "summary") <- NULL + + } + + # obscure counts below minCellCount + survivalEstimates <- suppressSurvivalCounts(survivalEstimates, minCellCount) + + return(survivalEstimates) +} + +addCompetingRiskVars <- function(data, time1, status1, + time2, status2, + nameOutTime, + nameOutStatus) { + # - add competing risk variables (time and status) + # 0: no event, 1: event 1, 2: event 2 + data <- data %>% + dplyr::mutate(!!nameOutTime := dplyr::if_else( + .data[[time2]] > .data[[time1]], + .data[[time1]], .data[[time2]] + )) %>% + dplyr::mutate(!!nameOutStatus := as.factor(dplyr::if_else( + .data[[time2]] < .data[[time1]], + 2 * .data[[status2]], .data[[status1]] + ))) + + return(data) +} + +singleEventSurvival <- function(survData, times, variables, timeGap) { + estimates <- list() + fitSummary <- list() + + + var_columns <- unlist(variables) %>% unique() + + cli::cli_progress_message("Getting overall estimates") + fit <- survival::survfit(survival::Surv(outcome_time, outcome_status) ~ 1, + data = survData + ) + + fitSummary[[1]] <- as.data.frame(t(summary(fit)$table)) %>% + dplyr::rename("n_max" = "n.max", + "n_start" = "n.start", + "restricted_mean"= "rmean", + "restricted_mean_std_error"= "se(rmean)", + "median_survival" = "median", + "median_survival_95CI_lower" = "0.95LCL", + "median_survival_95CI_higher" = "0.95UCL") %>% + dplyr::mutate(analysis_type = "single event") %>% + dplyr::mutate(strata_name = "Overall", + strata_level = "Overall", + outcome = "outcome") + + summ <- summary(fit, times = times, extend = TRUE) + estimates[[1]] <- dplyr::bind_rows( + dplyr::tibble( + outcome = "outcome", + time = summ$time, + n_event = summ$n.event, + n_risk = summ$n.risk, + estimate_type = "Survival probability", + estimate = summ$surv, + estimate_95CI_lower = summ$lower, + estimate_95CI_upper = summ$upper + ), + dplyr::tibble( + outcome = "outcome", + time = summ$time, + n_event = summ$n.event, + n_risk = summ$n.risk, + estimate_type = "Cumulative failure probability", + estimate = 1 - summ$surv, + estimate_95CI_lower = 1 - summ$upper, + estimate_95CI_upper = 1 - summ$lower + )) %>% + dplyr::mutate(analysis_type = "Single event", + strata_name = "Overall", + strata_level = "Overall") + + # Add strata estimates if required + if(!is.null(variables)) { + cli::cli_progress_bar( + total = length(variables), + format = " -- Getting estimates for {cli::pb_bar} {cli::pb_current} of {cli::pb_total} strata" + ) + for(i in seq_along(variables)) { + cli::cli_progress_update() + # Get formula for the model + name <- variables[[i]] + expr <- stats::as.formula(paste(c("survival::Surv(outcome_time, outcome_status) ~ 1", + name), collapse = " + ")) + fit <- survival::survfit(expr, data = survData) + + fitSummary[[i+1]] <- as.data.frame(summary(fit)$table) %>% + dplyr::rename("n_max" = "n.max", + "n_start" = "n.start", + "restricted_mean"= "rmean", + "restricted_mean_std_error"= "se(rmean)", + "median_survival" = "median", + "median_survival_95CI_lower" = "0.95LCL", + "median_survival_95CI_higher" = "0.95UCL") %>% + dplyr::mutate(analysis_type = "single event", + outcome = "outcome") + + summ <- summary(fit, times = times, extend = TRUE) + estimates[[i+1]] <- dplyr::bind_rows( + dplyr::tibble( + outcome = "outcome", + time = summ$time, + n_event = summ$n.event, + n_risk = summ$n.risk, + estimate_type = "Survival probability", + estimate = summ$surv, + estimate_95CI_lower = summ$lower, + estimate_95CI_upper = summ$upper + ), + dplyr::tibble( + outcome = "outcome", + time = summ$time, + n_event = summ$n.event, + n_risk = summ$n.risk, + estimate_type = "Cumulative failure probability", + estimate = 1 - summ$surv, + estimate_95CI_lower = 1 - summ$upper, + estimate_95CI_upper = 1 - summ$lower + )) %>% + dplyr::mutate(analysis_type = "Single event") + + # Add strata variable columns in a good format + for(j in seq_along(name)) { + name_w = name + estimates[[i+1]] <- estimates[[i+1]] %>% + dplyr::mutate( + strata_name = paste(name_w, collapse = " and "), + strata_level = rep(gsub(", "," and ",gsub(paste(paste0(name_w,"="), + collapse="|"),"", + summ$strata)),2) + ) + + fitSummary[[i+1]] <- fitSummary[[i+1]] %>% + dplyr::mutate( + strata_name = paste(name_w, collapse = " and "), + strata_level = gsub(", "," and ",gsub(paste(paste0(name_w,"="), + collapse="|"),"", + row.names(fitSummary[[i+1]]))) + ) + + } + } + cli::cli_progress_done() + } + + # Output as tibble + estimates <- dplyr::bind_rows(estimates) + + + + # Get number of events for all timeGaps + number_events <- estimates %>% + dplyr::filter(.data$estimate_type == "Survival probability") %>% + dplyr::group_by(.data$strata_name, .data$strata_level) %>% + dplyr::mutate(n_events = cumsum(.data$n_event)) %>% + dplyr::filter(.data$time %% timeGap[1] == 0 | .data$time == max(.data$time)) %>% + dplyr::mutate(n_events = c(.data$n_events[1], diff(.data$n_events))) %>% + dplyr::ungroup() %>% + dplyr::mutate(timeGap = timeGap[1], + outcome = "outcome") %>% + dplyr::select("time", "n_events", "timeGap", "outcome", + "strata_name", "strata_level") + + for(t in timeGap[-1]) { + number_events <- dplyr::union_all( + number_events, + estimates %>% + dplyr::filter(.data$estimate_type == "Survival probability") %>% + dplyr::group_by(.data$strata_name, .data$strata_level) %>% + dplyr::mutate(n_events = cumsum(.data$n_event)) %>% + dplyr::filter(.data$time %% t == 0 | .data$time == max(.data$time)) %>% + dplyr::mutate(n_events = c(.data$n_events[1], diff(.data$n_events))) %>% + dplyr::ungroup() %>% + dplyr::mutate(timeGap = t) %>% + dplyr::select("time", "n_events", "timeGap", "outcome", + "strata_name", "strata_level") + ) + } + + estimates <- estimates %>% + dplyr::select(- .data$n_event) + + attr(estimates, "events") <- number_events + attr(estimates, "summary") <- dplyr::bind_rows(fitSummary) + row.names(attr(estimates, "summary")) <- NULL + attr(estimates, "summary") <- dplyr::as_tibble(attr(estimates, "summary")) + + + return(estimates) +} + +competingRiskSurvival <- function(survData, times, variables, timeGap) { + + if(!length(unique(survData$outcome_or_competing_status)) == 3){ + + cli::cli_h1("No results for competing risk analysis") + cli::cli_text(c( + "Competing risk variable must have three levels.", + "Do you have at least 1 individual for:" + )) + cli::cli_li("1) censored without event,") + cli::cli_li("2) censored at outcome event of intest, and") + cli::cli_li("3) censored at outcome competing event?") + + return(empty_estimates()) + } + + estimates <- list() + fitSummary <- list() + + var_columns <- unlist(variables) %>% unique() + + cli::cli_progress_message("Getting overall estimates") + fit <- survival::survfit(formula = survival::Surv(outcome_or_competing_time, + outcome_or_competing_status) ~ 1, + data = survData) + summ <- summary(fit, times = times, extend = TRUE) + + fitSummary[[1]] <- as.data.frame(summary(fit)$table) %>% + dplyr::rename("n_start" = "n", + "restricted_mean"= "rmean", + "n_events"= "nevent") %>% + dplyr::mutate(analysis_type = "competing risk") %>% + dplyr::mutate(strata_name = "Overall", + strata_level = "Overall") %>% + tibble::rownames_to_column(var = "outcome") %>% + dplyr::mutate(outcome = dplyr::if_else(.data$outcome == "(s0)", "none", + dplyr::if_else(.data$outcome == "1", + "outcome", "competing outcome"))) + + estimates[[1]] <- dplyr::bind_rows( + dplyr::bind_cols( + data.frame(outcome = 1L, + time = summ$time, + n_event = summ$n.event[,2], + n_risk = summ$n.risk[,1], + estimate_type = "Cumulative failure probability"), + as.data.frame(summ$pstate) %>% + dplyr::rename("estimate" = "V2") %>% + dplyr::select("estimate"), + as.data.frame(summ$lower) %>% + dplyr::rename("estimate_95CI_lower" = "V2") %>% + dplyr::select("estimate_95CI_lower"), + as.data.frame(summ$upper) %>% + dplyr::rename("estimate_95CI_upper" = "V2") %>% + dplyr::select("estimate_95CI_upper")), + dplyr::bind_cols( + data.frame(outcome = 2L, + time = summ$time, + n_event = summ$n.event[,3], + n_risk = summ$n.risk[,1], + estimate_type = "Cumulative failure probability"), + as.data.frame(summ$pstate) %>% + dplyr::rename("estimate" = "V3") %>% + dplyr::select("estimate"), + as.data.frame(summ$lower) %>% + dplyr::rename("estimate_95CI_lower" = "V3") %>% + dplyr::select("estimate_95CI_lower"), + as.data.frame(summ$upper) %>% + dplyr::rename("estimate_95CI_upper" = "V3") %>% + dplyr::select("estimate_95CI_upper"))) %>% + dplyr::mutate(outcome = dplyr::if_else(.data$outcome == 1, + "outcome", + "competing outcome" + )) %>% + dplyr::mutate(analysis_type = "Competing risk") %>% + dplyr::mutate(strata_name = "Overall", + strata_level = "Overall") + + # Add strata estimates if required + if(!is.null(variables)) { + cli::cli_progress_bar( + total = length(variables), + format = " -- Getting estimates for {cli::pb_bar} {cli::pb_current} of {cli::pb_total} strata" + ) + for(i in seq_along(variables)) { + cli::cli_progress_update() + # Get formula for the model + name <- variables[[i]] + expr <- stats::as.formula(paste(c("survival::Surv(outcome_or_competing_time, outcome_or_competing_status) ~ 1", + name), collapse = " + ")) + fit <- survival::survfit(formula = expr, + data = survData %>% + dplyr::filter(dplyr::if_any(.env$name, ~ !is.na(.)))) + summ <- summary(fit, times = times, extend = TRUE) + + fitSummary[[i+1]] <- as.data.frame(summary(fit)$table) %>% + dplyr::rename("n_start" = "n", + "restricted_mean"= "rmean", + "n_events"= "nevent") %>% + dplyr::mutate(analysis_type = "competing risk") %>% + dplyr::mutate(strata_name = "Overall", + strata_level = "Overall") %>% + tibble::rownames_to_column(var = "outcome") %>% + dplyr::mutate(outcome = dplyr::if_else(.data$outcome == "(s0)", "none", + dplyr::if_else(.data$outcome == "1", + "outcome", "competing outcome"))) + + estimates[[i+1]] <- dplyr::bind_rows( + dplyr::bind_cols( + data.frame(outcome = 1L, + time = summ$time, + strata_level = summ$strata, + n_event = summ$n.event[,2], + n_risk = summ$n.risk[,1], + estimate_type = "Cumulative failure probability"), + as.data.frame(summ$pstate) %>% + dplyr::rename("estimate" = "V2") %>% + dplyr::select("estimate"), + as.data.frame(summ$lower) %>% + dplyr::rename("estimate_95CI_lower" = "V2") %>% + dplyr::select("estimate_95CI_lower"), + as.data.frame(summ$upper) %>% + dplyr::rename("estimate_95CI_upper" = "V2") %>% + dplyr::select("estimate_95CI_upper")), + dplyr::bind_cols( + data.frame(outcome = 2L, + strata_level = summ$strata, + time = summ$time, + n_event = summ$n.event[,3], + n_risk = summ$n.risk[,1], + estimate_type = "Cumulative failure probability"), + as.data.frame(summ$pstate) %>% + dplyr::rename("estimate" = "V3") %>% + dplyr::select("estimate"), + as.data.frame(summ$lower) %>% + dplyr::rename("estimate_95CI_lower" = "V3") %>% + dplyr::select("estimate_95CI_lower"), + as.data.frame(summ$upper) %>% + dplyr::rename("estimate_95CI_upper" = "V3") %>% + dplyr::select("estimate_95CI_upper"))) %>% + dplyr::mutate(outcome = dplyr::if_else(.data$outcome == 1, + "outcome", + "competing outcome" + )) %>% + dplyr::mutate(analysis_type = "Competing risk") %>% + dplyr::mutate(outcome = dplyr::if_else(.data$outcome == 1, + "outcome", + "competing outcome" + )) + + estimates[[i+1]] <- estimates[[i+1]] %>% + dplyr::mutate(strata_name = paste(name, collapse = " and ")) %>% + dplyr::relocate("strata_level",.after = "strata_name") + + for(j in seq_along(name)) { + estimates[[i+1]] <- estimates[[i+1]] %>% + dplyr::mutate(strata_level= stringr::str_replace(string = .data$strata_level, + pattern = paste0(name[j], "="), replacement = "")) %>% + dplyr::mutate(strata_level= stringr::str_replace(string = .data$strata_level, + pattern = ",", + replacement = " and")) + fitSummary[[i+1]] <- fitSummary[[i+1]] %>% + dplyr::mutate( + strata_name = paste(name, collapse = " and "), + strata_level = gsub(", "," and ",gsub(paste(paste0(name,"="), + collapse="|"),"", + row.names(fitSummary[[i+1]]))) + ) + } + } + cli::cli_progress_done() + } + + # Output as tibble + estimates <- dplyr::bind_rows(estimates) %>% dplyr::as_tibble() + + # Get number of events for all timeGaps + number_events <- estimates %>% + dplyr::group_by(.data$strata_name, .data$strata_level, .data$outcome) %>% + dplyr::mutate(n_events = cumsum(.data$n_event)) %>% + dplyr::filter(.data$time %% timeGap[1] == 0 | .data$time == max(.data$time)) %>% + dplyr::mutate(n_events = c(.data$n_events[1], diff(.data$n_events))) %>% + dplyr::ungroup() %>% + dplyr::mutate(timeGap = timeGap[1]) %>% + dplyr::select(.data$time, .data$n_events, .data$timeGap, .data$outcome, .data$strata_name, .data$strata_level) + + for(t in timeGap[-1]) { + number_events <- dplyr::union_all( + number_events, + estimates %>% + dplyr::group_by(.data$strata_name, .data$strata_level, .data$outcome) %>% + dplyr::mutate(n_events = cumsum(.data$n_event)) %>% + dplyr::filter(.data$time %% t == 0 | .data$time == max(.data$time)) %>% + dplyr::mutate(n_events = c(.data$n_events[1], diff(.data$n_events))) %>% + dplyr::ungroup() %>% + dplyr::mutate(timeGap = t) %>% + dplyr::select(.data$time, .data$n_events, .data$timeGap, .data$outcome, .data$strata_name, .data$strata_level) + ) + } + + estimates <- estimates %>% + dplyr::select(- .data$n_event) + + attr(estimates, "events") <- number_events + attr(estimates, "summary") <- dplyr::bind_rows(fitSummary) + row.names(attr(estimates, "summary")) <- NULL + attr(estimates, "summary") <- dplyr::as_tibble(attr(estimates, "summary")) + # add summary attribute + + return(estimates) + +} + +addCohortDetails <- function(x, + cdm, + targetCohortId, + targetCohortTable, + outcomeCohortId, + outcomeCohortTable, + competingOutcomeCohortId, + competingOutcomeCohortTable = NULL){ + + x <- x %>% + dplyr::mutate(cdm_name = attr(cdm, "cdm_name"), + result_type = "Survival estimate", + group_name = "Cohort", + group_level = + CDMConnector::cohortSet(cdm[[targetCohortTable]]) %>% + dplyr::filter(.data$cohort_definition_id == + .env$targetCohortId) %>% + dplyr::pull("cohort_name"), + outcome_cohort_name = + CDMConnector::cohortSet(cdm[[outcomeCohortTable]]) %>% + dplyr::filter(.data$cohort_definition_id == + .env$outcomeCohortId) %>% + dplyr::pull("cohort_name"), + variable_type = NA) + + if(!is.null(competingOutcomeCohortTable)){ + x <- x %>% + dplyr::mutate(competing_outcome_cohort_name = + CDMConnector::cohortSet(cdm[[competingOutcomeCohortTable]]) %>% + dplyr::filter(.data$cohort_definition_id == + .env$competingOutcomeCohortId) %>% + dplyr::pull("cohort_name"), + analysis_type = "Competing risk") + } else { + x <- x %>% + dplyr::mutate(competing_outcome_cohort_name = "No competing outcome", + analysis_type = "Single event") + } + + x <- x %>% + dplyr::relocate("competing_outcome_cohort_name", + .after = "outcome_cohort_name") + + + return(x) + +} + +empty_estimates <- function(){ +dplyr::tibble() + +} + +var_order <- function(estimates){ + estimates %>% + dplyr::relocate("cdm_name") %>% + dplyr::relocate("result_type", .after = "cdm_name") %>% + dplyr::relocate("group_name", .after = "result_type") %>% + dplyr::relocate("group_level", .after = "group_name") %>% + dplyr::relocate("strata_name", .after = "group_level") %>% + dplyr::relocate("strata_level", .after = "strata_name") %>% + dplyr::relocate("variable", .after = "strata_level") %>% + dplyr::relocate("variable_level", .after = "variable") %>% + dplyr::relocate("variable_type", .after = "variable_level") %>% + dplyr::relocate("estimate_type", .after = "variable_level") + +} + diff --git a/R/generateDeathCohort.R b/R/generateDeathCohort.R new file mode 100644 index 0000000..83ce7a0 --- /dev/null +++ b/R/generateDeathCohort.R @@ -0,0 +1,132 @@ + +#' To create a death cohort +#' +#' @param cdm CDM reference +#' +#' @param deathInObservation If TRUE, restricts deaths included to only those +#' observed during an ongoing observation period. +#' @param name name for the created death cohort table +#' @param cohortTable name of the cohort table to create a death cohort for +#' @param cohortId name of the cohort table to create a death cohort for +#' +#' @return A cohort table with a death cohort in cdm +#' @export +#' +#' @examples +generateDeathCohortSet <- function( + cdm, + deathInObservation = FALSE, + name = "death_cohort", + cohortTable = NULL, + cohortId = NULL){ + + # 0. validate inputs... + checkCdm(cdm, tables = c( + "death", "observation_period" + )) + + checkmate::assertNumeric(cohortId, any.missing = FALSE, null.ok = TRUE) + + # 1. deathInObservation + if (isTRUE(deathInObservation)){ + x <- cdm$death %>% + PatientProfiles::addInObservation(cdm, + indexDate = "death_date") %>% + dplyr::filter(.data$in_observation==1) %>% + dplyr::select("person_id", "death_date") + }else{ + x <- cdm$death + } + + x <- x %>% + dplyr::select("person_id", "death_date") %>% + dplyr::rename("subject_id" = "person_id") + + # 2. cohortTable and cohortId + if (!is.null(cohortTable)){ + checkCdm(cdm, tables = c(cohortTable)) + + if (!is.null(cohortId)){ + x <- x %>% + dplyr::inner_join(cdm[[cohortTable]] %>% + dplyr::filter(.data$cohort_definition_id %in% cohortId) %>% + dplyr::select("subject_id", "cohort_definition_id"), + by = c("subject_id")) %>% + dplyr::select("subject_id", "death_date") + + }else{ + x <- x %>% + dplyr::inner_join(cdm[[cohortTable]] %>% + dplyr::select("subject_id"), + by = c("subject_id")) %>% + dplyr::select("subject_id", "death_date") + } + } + + # 3. table ref + # tables to be deleted + firstTempTable <- getOption("dbplyr_table_name", 0) + 1 + + cohortRef <- x %>% + dplyr::group_by(.data$subject_id) %>% + dbplyr::window_order(.data$death_date) %>% + dplyr::filter(dplyr::row_number()==1) %>% + dplyr::rename("cohort_start_date" = "death_date") %>% + dplyr::mutate(cohort_definition_id = 1L , + cohort_end_date = .data$cohort_start_date) %>% + dplyr::select( + "cohort_definition_id", "subject_id", "cohort_start_date", + "cohort_end_date" + ) %>% + CDMConnector::computeQuery( + name = paste0(attr(cdm, "write_prefix"), name), + FALSE, attr(cdm, "write_schema"), TRUE + ) + + cohortSetRef <- cohortRef %>% + dplyr::select("cohort_definition_id") %>% + dplyr::distinct() %>% + dplyr::mutate(cohort_name = "death_cohort") %>% + CDMConnector::computeQuery( + name = paste0(attr(cdm, "write_prefix"), name, "_set"), + FALSE, attr(cdm, "write_schema"), TRUE + ) + + cohortCountRef <- cohortRef %>% + dplyr::group_by(.data$cohort_definition_id) %>% + dplyr::summarise( + number_records = dplyr::n(), + number_subjects = dplyr::n_distinct(.data$subject_id), + .groups = "drop" + ) %>% + CDMConnector::computeQuery( + name = paste0(attr(cdm, "write_prefix"), name, "_count"), + FALSE, attr(cdm, "write_schema"), TRUE + ) + + + cdm[[name]] <- CDMConnector::newGeneratedCohortSet( + cohortRef = cohortRef, + cohortSetRef = cohortSetRef, + cohortCountRef = cohortCountRef + ) + + attr(cdm[[name]], "cohort_attrition") <- tibble::tibble( + "reason" = "Qualifying initial records", + "reason_id" = 1, + "excluded_records" = 0, + "excluded_subjects" = 0 + ) + + # drop intermediary tables that were created in the process + lastTempTable <- getOption("dbplyr_table_name", 0) + if (!is.null(attr(cdm, "write_prefix")) & firstTempTable <= lastTempTable) { + CDMConnector::dropTable( + cdm, sprintf("dbplyr_%03i", firstTempTable:lastTempTable) + ) + } + + return(cdm) +} + + diff --git a/R/inputValidation.R b/R/inputValidation.R new file mode 100644 index 0000000..fde9899 --- /dev/null +++ b/R/inputValidation.R @@ -0,0 +1,119 @@ + +checkCdm <- function(cdm, tables = NULL) { + if (!isTRUE(inherits(cdm, "cdm_reference"))) { + cli::cli_abort("cdm must be a CDMConnector CDM reference object") + } + if (!is.null(tables)) { + tables <- tables[!(tables %in% names(cdm))] + if (length(tables) > 0) { + ntables <- length(tables) + cli::cli_abort(paste0( + "{(ntables)} table{?s} {?is/are} not present in the cdm object: ", + paste0(tables, collapse = ", ") + )) + } + } + invisible(NULL) +} + +checkIsCdmTable <- function(cdmTable) { + isCdmTable <- all(c("person_id") %in% + colnames(cdmTable)) + + return(isCdmTable) +} + +checkIsCohort <- function(cohort) { + isCohort <- all(c( + "cohort_definition_id", "subject_id", + "cohort_start_date", "cohort_end_date" + ) %in% + colnames(cohort)) & + !is.null(attr(cohort, "cohort_set")) + + if (isFALSE(isCohort)) { + return(cli::cli_abort(c( + "cohort must be a cohort table with cohort attributes" + ))) + } else { + return(invisible(isCohort)) + } +} + +checkCohortId <- function(cohort, cohortId) { + errorMessage <- checkmate::makeAssertCollection() + checkmate::assertIntegerish(cohortId, + add = errorMessage + ) + checkmate::reportAssertions(collection = errorMessage) + + cohortIdPresent <- list() + for (i in seq_along(cohortId)) { + workingId <- cohortId[[i]] + cohortIdPresent[[i]] <- (cohort %>% + dplyr::filter(.data$cohort_definition_id == .env$workingId) %>% + dplyr::tally() %>% + dplyr::pull("n") > 0) + } + cohortIdPresent <- all(unlist(cohortIdPresent)) + return(cohortIdPresent) +} + +checkPatientRows <- function(cohort) { + oneRowperPatient <- cohort %>% + dplyr::group_by(.data$subject_id) %>% + dplyr::mutate(num = dplyr::row_number()) %>% + dplyr::ungroup() %>% + dplyr::filter(.data$num > 1) %>% + dplyr::tally() %>% + dplyr::pull() + + if (oneRowperPatient > 0) { + return(cli::cli_abort(c( + "the cohort table must only contain one row per patient" + ))) + } else { + return(invisible()) + } +} + +checkStrata <- function(strata, x) { + checkmate::assertList( + strata, + any.missing = FALSE, unique = TRUE, min.len = 1, null.ok = TRUE + ) + namesInColumns <- all(unlist(strata) %>% unique() %in% colnames(x)) + if(!isTRUE(namesInColumns)) { + return(cli::cli_abort(c( + "the cohort table must contain all variables in the strata list as columns" + ))) + } +} + +checkExposureCohortId <- function(cohort) { + isCohortIdUnique <- length(cohort %>% + dplyr::select("cohort_definition_id") %>% + dplyr::pull() %>% + unique()) == 1 + + if(isFALSE(isCohortIdUnique)) { + return(cli::cli_abort(c( + "the exposure cohort must only have one id in cohort_definition_id in addSurvival stage" + ))) + } +} + + +checkIsCohort_exp <- function(cohort) { + isCohort <- all(c( + "cohort_definition_id", "subject_id", + "cohort_start_date", "cohort_end_date" + ) %in% colnames(cohort)) + if (isFALSE(isCohort)) { + return(cli::cli_abort(c( + "{cohort} must be a cohort table" # This gives a very ugly error right now + ))) + } else { + return(invisible(isCohort)) + } +} diff --git a/R/mockMGUS2cdm.R b/R/mockMGUS2cdm.R new file mode 100644 index 0000000..8d1e04a --- /dev/null +++ b/R/mockMGUS2cdm.R @@ -0,0 +1,192 @@ +#' Create mock CDM reference with survival::mgus2 dataset +#' +#' @return CDM reference containing data from the survival::mgus2 dataset +#' @export +#' +#' @examples +mockMGUS2cdm <- function() { + mgus2 <- survival::mgus2 %>% + dplyr::mutate( + cohort_start_date_diag = as.Date(paste0( + .data$dxyr, "-01-01" + )), + cohort_start_date_progression = .data$cohort_start_date_diag + + lubridate::days(.data$ptime), + cohort_start_date_death = .data$cohort_start_date_diag + + lubridate::days(.data$futime) + ) %>% + dplyr::rename("subject_id" = "id") %>% + dplyr::mutate( + observation_period_start_date = + .data$cohort_start_date_diag - + lubridate::years(.data$age) + ) + + mgus2Diag <- mgus2 %>% + dplyr::select( + "subject_id", "cohort_start_date_diag", + "age", "sex", "hgb", "creat", "mspike" + ) %>% + dplyr::rename("cohort_start_date" = "cohort_start_date_diag") %>% + dplyr::mutate( + cohort_end_date = .data$cohort_start_date, + cohort_definition_id = 1L + ) %>% + dplyr::relocate("cohort_definition_id") %>% + dplyr::relocate("cohort_end_date", .after = "cohort_start_date") %>% + dplyr::mutate(age_group = dplyr::if_else(.data$age < 70, "<70", ">=70")) + + mgus2Pr <- mgus2 %>% + dplyr::filter(.data$pstat == 1) %>% + dplyr::select("subject_id", "cohort_start_date_progression") %>% + dplyr::rename("cohort_start_date" = "cohort_start_date_progression") %>% + dplyr::mutate( + cohort_end_date = .data$cohort_start_date, + cohort_definition_id = 1L + ) %>% + dplyr::relocate("cohort_definition_id") + + mgus2Death <- mgus2 %>% + dplyr::filter(.data$death == 1) %>% + dplyr::select("subject_id", "cohort_start_date_death") %>% + dplyr::rename("cohort_start_date" = "cohort_start_date_death") %>% + dplyr::mutate( + cohort_end_date = .data$cohort_start_date, + cohort_definition_id = 1L + ) %>% + dplyr::relocate("cohort_definition_id") + + mgus2Person <- mgus2 %>% + dplyr::rename("person_id" = "subject_id") %>% + dplyr::mutate( + gender_concept_id = dplyr::if_else( + .data$sex == "F", 8532, 8507 + ), + year_of_birth = lubridate::year(mgus2$observation_period_start_date), + month_of_birth = lubridate::month(mgus2$observation_period_start_date), + day_of_birth = lubridate::day(mgus2$observation_period_start_date) + ) %>% + dplyr::select( + "person_id", "gender_concept_id", + "year_of_birth", "month_of_birth", "day_of_birth" + ) + + mgus2OP <- mgus2 %>% + dplyr::rename("person_id" = "subject_id") %>% + dplyr::select( + "person_id", "observation_period_start_date", + "cohort_start_date_death" + ) %>% + dplyr::rename("observation_period_end_date" = "cohort_start_date_death") + + + # placeholder visit occurrence + visitOccurrence <- dplyr::tibble( + visit_occurrence_id = 1001, + person_id = 1, + visit_concept_id = 5, + visit_start_date = c("2020-01-01"), + visit_end_date = c("2020-01-01") + ) + + db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") + + # person + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "person", + mgus2Person, + overwrite = TRUE + ) + }) + + # obs + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "observation_period", + mgus2OP, + overwrite = TRUE + ) + }) + + # cohort diag + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "mgus_diagnosis", + mgus2Diag, + overwrite = TRUE + ) + }) + + # cohort progression + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "progression", + mgus2Pr, + overwrite = TRUE + ) + }) + + # cohort death + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "death_cohort", + mgus2Death, + overwrite = TRUE + ) + }) + + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "visit_occurrence", + visitOccurrence, + overwrite = TRUE + ) + }) + + + cdm <- CDMConnector::cdm_from_con( + con = db, + cohort_tables = c( + "mgus_diagnosis", "progression", + "death_cohort" + ), + cdm_schema = "main", + write_schema = "main", + cdm_name = "mock" + ) + + cdm$mgus_diagnosis <- addCohortCountAttr(cdm$mgus_diagnosis, + name="mgus_diagnosis") + cdm$progression <- addCohortCountAttr(cdm$progression, + name="progression") + cdm$death_cohort <- addCohortCountAttr(cdm$death_cohort, + name="death_cohort") + + return(cdm) +} + + +#' Function to add attributes to cohort table +#' it adds cohort_count, cohort_set, cohort_count, cohort_attrition +#' +#' @noRd +#' +addCohortCountAttr <- function(cohort, name = "cohort") { + cohortCount <- cohort %>% + dplyr::group_by(.data$cohort_definition_id) %>% + dplyr::summarise( + number_records = dplyr::n(), + number_subjects = dplyr::n_distinct(.data$subject_id) + ) %>% + dplyr::collect() + + attr(cohort, "cohort_count") <- cohortCount + attr(cohort, "cohort_set") <- cohortCount %>% + dplyr::select("cohort_definition_id") %>% + dplyr::mutate(cohort_name = .env$name) + + attr(cohort, "cohort_attrition") <- cohortCount %>% + dplyr::mutate( + "reason" = "Qualifying initial records", + "reason_id" = 1, + "excluded_records" = 0, + "excluded_subjects" = 0 + ) + + return(cohort) +} diff --git a/R/plotSurvival.R b/R/plotSurvival.R new file mode 100644 index 0000000..d4baa94 --- /dev/null +++ b/R/plotSurvival.R @@ -0,0 +1,210 @@ +#' Plot survival results +#' +#' @param result Survival results +#' @param x Variable to plot on x axis +#' @param ylim Limits for the Y axis +#' @param ribbon If TRUE, the plot will join points using a ribbon +#' @param facet Variables to use for facets +#' @param colour Variables to use for colours +#' @param colour_name Colour legend name +#' +#' @return A ggplot with the survival results plotted +#' @export +#' +#' @examples +plotSurvival <- function(result, + x = "time", + ylim = c(0,NA), + ribbon = TRUE, + facet = NULL, + colour = NULL, + colour_name = NULL){ + + + plotEstimates(result = result %>% + dplyr::filter(.data$estimate_type == + "Survival probability"), + x = x, + y = "estimate", + yLower = "estimate_95CI_lower", + yUpper = "estimate_95CI_upper", + ylim = ylim, + ytype = "count", + ribbon = ribbon, + facet = facet, + colour = colour, + colour_name = colour_name) + + ggplot2::ylab("Survival probability") + + ggplot2::xlab("Time in days") + + +} + +#' Plot cumulative incidence +#' +#' @param result Survival results +#' @param x Variable to plot on x axis +#' @param ylim Limits for the Y axis +#' @param ribbon If TRUE, the plot will join points using a ribbon +#' @param facet Variables to use for facets +#' @param colour Variables to use for colours +#' @param colour_name Colour legend name +#' +#' @return +#' @export +#' +#' @examples +plotCumulativeIncidence <- function(result, + x = "time", + ylim = c(0,NA), + ribbon = TRUE, + facet = NULL, + colour = NULL, + colour_name = NULL){ + + + plotEstimates(result = result %>% + dplyr::filter(.data$estimate_type == + "Cumulative failure probability"), + x = x, + y = "estimate", + yLower = "estimate_95CI_lower", + yUpper = "estimate_95CI_upper", + ylim = ylim, + ytype = "count", + ribbon = ribbon, + facet = facet, + colour = colour, + colour_name = colour_name) + + ggplot2::ylab("Cumulative failure probability") + + ggplot2::xlab("Time in days") + + +} + +# helper functions + +plotEstimates <- function(result, + x, + y, + yLower, + yUpper, + ylim, + ytype, + ribbon, + facet, + colour, + colour_name){ + + errorMessage <- checkmate::makeAssertCollection() + #checkmate::assertTRUE(inherits(result, "SurvivalResult")) + checkmate::assertTRUE(all(c(x) %in% colnames(result))) + checkmate::reportAssertions(collection = errorMessage) + + plot_data <- getPlotData(estimates = result, + facetVars = facet, + colourVars = colour) + + if(is.null(colour)){ + plot <- plot_data %>% + ggplot2::ggplot( + ggplot2::aes(x = !!rlang::sym(x), + y = !!rlang::sym(y))) + } else { + plot <- plot_data %>% + ggplot2::ggplot( + ggplot2::aes(x = !!rlang::sym(x) , + y = !!rlang::sym(y), + group = .data$colour_vars, + colour = .data$colour_vars, + fill = .data$colour_vars, + linetype = .data$colour_vars)) + + ggplot2::labs(colour = "legend", + linetype = "legend") + } + + plot <- plot + + ggplot2::geom_line(linewidth = 0.25) + if(is.null(ylim)){ + if(ytype == "count"){ + plot <- plot + + ggplot2::scale_y_continuous(labels = scales::comma) + } + if(ytype == "percentage"){ + plot <- plot + + ggplot2::scale_y_continuous(labels = + scales::percent_format(accuracy = 0.1)) + } + } else { + plot <- addYLimits(plot = plot, ylim = ylim, ytype = ytype) + } + + if(!is.null(facet)){ + plot <- plot + + ggplot2::facet_wrap(ggplot2::vars(.data$facet_var)) + + ggplot2::theme_bw() + } else { + plot <- plot + + ggplot2::theme_minimal() + } + + if(isTRUE(ribbon)){ + plot <- addRibbon(plot = plot, yLower = yLower, yUpper = yUpper) + } + + + + plot <- plot + + ggplot2::theme(legend.title = ggplot2::element_blank()) + + return(plot) + + + +} + + +getPlotData <- function(estimates, facetVars, colourVars){ + + plotData <- estimates %>% + tidyr::pivot_wider(names_from = "variable_type", + values_from = "estimate") + + if(!is.null(facetVars)){ + plotData <- plotData %>% + tidyr::unite("facet_var", + c(dplyr::all_of(.env$facetVars)), remove = FALSE, sep = "; ") + } + if(!is.null(colourVars)){ + plotData <-plotData %>% + tidyr::unite("colour_vars", + c(dplyr::all_of(.env$colourVars)), remove = FALSE, sep = "; ") + } + + return(plotData) + +} + +addYLimits <- function(plot, ylim, ytype){ + if(ytype == "count"){ + plot <- plot + + ggplot2::scale_y_continuous(labels = scales::comma, + limits = ylim) + } + if(ytype == "percentage"){ + plot <- plot + + ggplot2::scale_y_continuous(labels = + scales::percent_format(accuracy = 0.1), + limits = ylim) + } + return(plot) +} + +addRibbon <- function(plot, yLower, yUpper){ + plot <- plot + + ggplot2::geom_ribbon( + ggplot2::aes(ymin = !!rlang::sym(yLower), + ymax = !!rlang::sym(yUpper)), + alpha = .3, color = NA, show.legend = FALSE) + + ggplot2::geom_line(linewidth = 0.25) +} diff --git a/R/recordAttrition.R b/R/recordAttrition.R new file mode 100644 index 0000000..98f91e5 --- /dev/null +++ b/R/recordAttrition.R @@ -0,0 +1,65 @@ +# Copyright 2023 DARWIN EU® +# +# This file is part of IncidencePrevalence +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +recordAttrition <- function(table, + id = "person_id", + existingAttrition = NULL, + reasonId, + reason) { + errorMessage <- checkmate::makeAssertCollection() + checkmate::assertTRUE(any(class(table) %in% + c("tbl_dbi", "tbl", "data.frame", "tibble"))) + checkmate::assertCharacter(id, add = errorMessage) + checkmate::assertIntegerish(reasonId, add = errorMessage) + checkmate::assertCharacter(reason, null.ok = TRUE, add = errorMessage) + if (!is.null(existingAttrition)) { + checkmate::assertTRUE(any(class(existingAttrition) %in% + c("data.frame", "tbl"))) + } + checkmate::reportAssertions(collection = errorMessage) + + attrition <- dplyr::tibble( + number_records = table %>% + dplyr::tally() %>% + dplyr::pull(), + number_subjects = table %>% + dplyr::select(.env$id) %>% + dplyr::distinct() %>% + dplyr::tally() %>% + dplyr::pull(), + reason_id = .env$reasonId, + reason = .env$reason + ) + + if (!is.null(existingAttrition)) { + attrition <- dplyr::bind_rows(existingAttrition, attrition) %>% + dplyr::mutate( + excluded_records = + dplyr::lag(.data$number_records) - .data$number_records + ) %>% + dplyr::mutate( + excluded_subjects = + dplyr::lag(.data$number_subjects) - .data$number_subjects + ) + } else { + attrition <- attrition %>% + dplyr::mutate(excluded_records = NA) %>% + dplyr::mutate(excluded_subjects = NA) + } + + return(attrition) +} diff --git a/R/summariseSurvivalParticipants.R b/R/summariseSurvivalParticipants.R new file mode 100644 index 0000000..7acaf5e --- /dev/null +++ b/R/summariseSurvivalParticipants.R @@ -0,0 +1,4 @@ + +#' @importFrom PatientProfiles summariseCharacteristics +#' @export +PatientProfiles::summariseCharacteristics diff --git a/R/suppressSurvivalCounts.R b/R/suppressSurvivalCounts.R new file mode 100644 index 0000000..17facd0 --- /dev/null +++ b/R/suppressSurvivalCounts.R @@ -0,0 +1,45 @@ +suppressSurvivalCounts <- function(result, + minCellCount = 5) { + + checkmate::assertTRUE(all(c( + "variable", "estimate", "estimate_type", "group_name", "group_level", + "strata_name", "strata_level" + ) %in% + colnames(result))) + + checkmate::assertIntegerish(minCellCount, + len = 1, + lower = 0) + + if (minCellCount > 1) { + toObscure <- result %>% + dplyr::filter(.data$variable_type == "n_start") %>% + dplyr::mutate(estimate = as.integer(.data$estimate)) %>% + dplyr::filter(.data$estimate > 0 & .data$estimate < .env$minCellCount) %>% + dplyr::select("group_name", "group_level", "strata_name", "strata_level") + + +for(i in seq_along(toObscure$group_name)){ + result <- result %>% + dplyr::mutate(estimate = dplyr::if_else( + .data$group_name == toObscure$group_name[i] & + .data$group_level == toObscure$group_level[i] & + .data$strata_name == toObscure$strata_name[i] & + .data$strata_level == toObscure$strata_level[i] & + .data$variable_type == "n_start", paste0("<", minCellCount), + as.character(estimate))) %>% + dplyr::mutate(estimate = dplyr::if_else( + .data$group_name == toObscure$group_name[i] & + .data$group_level == toObscure$group_level[i] & + .data$strata_name == toObscure$strata_name[i] & + .data$strata_level == toObscure$strata_level[i] & + .data$variable_type != "n_start", + as.character(NA), + as.character(estimate))) +} + } + + + return(result) +} + diff --git a/R/utils-pipe.R b/R/utils-pipe.R new file mode 100644 index 0000000..fd0b1d1 --- /dev/null +++ b/R/utils-pipe.R @@ -0,0 +1,14 @@ +#' Pipe operator +#' +#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +#' +#' @name %>% +#' @rdname pipe +#' @keywords internal +#' @export +#' @importFrom magrittr %>% +#' @usage lhs \%>\% rhs +#' @param lhs A value or the magrittr placeholder. +#' @param rhs A function call using the magrittr semantics. +#' @return The result of calling `rhs(lhs)`. +NULL diff --git a/R/utils-tidy-eval.R b/R/utils-tidy-eval.R new file mode 100644 index 0000000..09c3698 --- /dev/null +++ b/R/utils-tidy-eval.R @@ -0,0 +1,107 @@ +#' Tidy eval helpers +#' +#' @description +#' This page lists the tidy eval tools reexported in this package from +#' rlang. To learn about using tidy eval in scripts and packages at a +#' high level, see the [dplyr programming +#' vignette](https://dplyr.tidyverse.org/articles/programming.html) +#' and the [ggplot2 in packages +#' vignette](https://ggplot2.tidyverse.org/articles/ggplot2-in-packages.html). +#' The [Metaprogramming +#' section](https://adv-r.hadley.nz/metaprogramming.html) of [Advanced +#' R](https://adv-r.hadley.nz) may also be useful for a deeper dive. +#' +#' * The tidy eval operators `{{`, `!!`, and `!!!` are syntactic +#' constructs which are specially interpreted by tidy eval functions. +#' You will mostly need `{{`, as `!!` and `!!!` are more advanced +#' operators which you should not have to use in simple cases. +#' +#' The curly-curly operator `{{` allows you to tunnel data-variables +#' passed from function arguments inside other tidy eval functions. +#' `{{` is designed for individual arguments. To pass multiple +#' arguments contained in dots, use `...` in the normal way. +#' +#' ``` +#' my_function <- function(data, var, ...) { +#' data %>% +#' group_by(...) %>% +#' summarise(mean = mean({{ var }})) +#' } +#' ``` +#' +#' * [enquo()] and [enquos()] delay the execution of one or several +#' function arguments. The former returns a single expression, the +#' latter returns a list of expressions. Once defused, expressions +#' will no longer evaluate on their own. They must be injected back +#' into an evaluation context with `!!` (for a single expression) and +#' `!!!` (for a list of expressions). +#' +#' ``` +#' my_function <- function(data, var, ...) { +#' # Defuse +#' var <- enquo(var) +#' dots <- enquos(...) +#' +#' # Inject +#' data %>% +#' group_by(!!!dots) %>% +#' summarise(mean = mean(!!var)) +#' } +#' ``` +#' +#' In this simple case, the code is equivalent to the usage of `{{` +#' and `...` above. Defusing with `enquo()` or `enquos()` is only +#' needed in more complex cases, for instance if you need to inspect +#' or modify the expressions in some way. +#' +#' * The `.data` pronoun is an object that represents the current +#' slice of data. If you have a variable name in a string, use the +#' `.data` pronoun to subset that variable with `[[`. +#' +#' ``` +#' my_var <- "disp" +#' mtcars %>% summarise(mean = mean(.data[[my_var]])) +#' ``` +#' +#' * Another tidy eval operator is `:=`. It makes it possible to use +#' glue and curly-curly syntax on the LHS of `=`. For technical +#' reasons, the R language doesn't support complex expressions on +#' the left of `=`, so we use `:=` as a workaround. +#' +#' ``` +#' my_function <- function(data, var, suffix = "foo") { +#' # Use `{{` to tunnel function arguments and the usual glue +#' # operator `{` to interpolate plain strings. +#' data %>% +#' summarise("{{ var }}_mean_{suffix}" := mean({{ var }})) +#' } +#' ``` +#' +#' * Many tidy eval functions like `dplyr::mutate()` or +#' `dplyr::summarise()` give an automatic name to unnamed inputs. If +#' you need to create the same sort of automatic names by yourself, +#' use `as_label()`. For instance, the glue-tunnelling syntax above +#' can be reproduced manually with: +#' +#' ``` +#' my_function <- function(data, var, suffix = "foo") { +#' var <- enquo(var) +#' prefix <- as_label(var) +#' data %>% +#' summarise("{prefix}_mean_{suffix}" := mean(!!var)) +#' } +#' ``` +#' +#' Expressions defused with `enquo()` (or tunnelled with `{{`) need +#' not be simple column names, they can be arbitrarily complex. +#' `as_label()` handles those cases gracefully. If your code assumes +#' a simple column name, use `as_name()` instead. This is safer +#' because it throws an error if the input is not a name as expected. +#' +#' @md +#' @name tidyeval +#' @keywords internal +#' @importFrom rlang enquo enquos .data := as_name as_label +#' @aliases enquo enquos .data := as_name as_label +#' @export enquo enquos .data := as_name as_label +NULL diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..fc66bf1 --- /dev/null +++ b/R/utils.R @@ -0,0 +1,14 @@ +#' Participants contributing to a survival analysis +#' +#' @param result Result object +#' +#' @return References to the study participants contributing to +#' a given analysis +#' @export +#' +#' @examples +survivalParticipants <- function(result) { + attr(result, "participants") +} + + diff --git a/README.Rmd b/README.Rmd new file mode 100644 index 0000000..4f0bf6d --- /dev/null +++ b/README.Rmd @@ -0,0 +1,142 @@ +--- +output: github_document +--- + + + +```{r, include = FALSE} +knitr::opts_chunk$set( + warning = FALSE, message = FALSE, + collapse = TRUE, + comment = "#>", + fig.path = "man/figures/README-", + out.width = "100%" +) +``` + +# CohortSurvival + +[![CRANstatus](https://www.r-pkg.org/badges/version/CohortSurvival)](https://CRAN.R-project.org/package=CohortSurvival) [![codecov.io](https://codecov.io/github/darwin-eu/CohortSurvival/coverage.svg?branch=main)](https://codecov.io/github/darwin-eu/CohortSurvival?branch=main) [![R-CMD-check](https://github.com/darwin-eu/CohortSurvival/workflows/R-CMD-check/badge.svg)](https://github.com/darwin-eu/CohortSurvival/actions) [![Lifecycle:Experimental](https://img.shields.io/badge/Lifecycle-Experimental-339999)](https://lifecycle.r-lib.org/articles/stages.html) + +CohortSurvival contains functions for extracting and summarising survival data using the OMOP common data model. + +## Installation + +You can install the development version of CohortSurvival like so: + +```{r, eval=FALSE} +install.packages("remotes") +remotes::install_github("darwin-eu/CohortSurvival") +``` + +## Example usage + +### Create a reference to data in the OMOP CDM format + +The CohortSurvival package is designed to work with data in the OMOP CDM format, so our first step is to create a reference to the data using the CDMConnector package. + +```{r example} +library(CDMConnector) +library(CohortSurvival) +library(dplyr) +library(ggplot2) +``` + +Creating a connection to a Postgres database would for example look like: + +```{r, eval=FALSE} +con <- DBI::dbConnect(RPostgres::Postgres(), + dbname = Sys.getenv("CDM5_POSTGRESQL_DBNAME"), + host = Sys.getenv("CDM5_POSTGRESQL_HOST"), + user = Sys.getenv("CDM5_POSTGRESQL_USER"), + password = Sys.getenv("CDM5_POSTGRESQL_PASSWORD") +) + +cdm <- CDMConnector::cdm_from_con(con, + cdm_schema = Sys.getenv("CDM5_POSTGRESQL_CDM_SCHEMA"), + write_schema = Sys.getenv("CDM5_POSTGRESQL_RESULT_SCHEMA") +) +``` + +### Example: MGUS + +For this example we´ll use a cdm reference containing the MGUS2 dataset from the survival package (which we transformed into a set of OMOP CDM style cohort tables). The mgus2 dataset contains survival data of 1341 sequential patients with monoclonal gammopathy of undetermined significance (MGUS). For more information see ´?survival::mgus2´ + +```{r} +cdm <- CohortSurvival::mockMGUS2cdm() +``` + +In this example cdm reference we have three cohort tables of interest: 1) MGUS diagnosis cohort + +```{r} +cdm$mgus_diagnosis %>% + glimpse() +``` + +2) MGUS progression cohort + +```{r} +cdm$progression %>% + glimpse() +``` + +3) Death cohort + +```{r} +cdm$death_cohort %>% + glimpse() +``` + +### MGUS diagnosis to death + +We can get survival estimates for death following diagnosis like so: + +```{r} +MGUS_death <- estimateSingleEventSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1 +) + +plotSurvival(MGUS_death) +``` + +### Stratified results + +```{r} +MGUS_death <- estimateSingleEventSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1, + strata = list(c("age_group"), + c("sex"), + c("age_group", "sex")) +) + +plotSurvival(MGUS_death, + colour = "strata_level", + facet= "strata_name") +``` + +### Summary statisitics on survival + +For single event analyses, we can extract restricted mean survival and median survival + +```{r} +MGUS_death %>% dplyr::filter(estimate_type == "Survival summary") %>% + tidyr::pivot_wider(names_from = "variable_type", values_from = "estimate") %>% + dplyr::mutate("Restricted mean survival (se)" = paste0(round(restricted_mean), " (", round(restricted_mean_std_error, 2), ")"), + "Median survival (95% CI)" = paste0(median_survival, " (", median_survival_95CI_lower, " to ", median_survival_95CI_higher, ")") + ) %>% + dplyr::select(strata_name, strata_level, + "Restricted mean survival (se)", "Median survival (95% CI)") + +``` + +### Disconnect from the cdm database connection + +```{r} +cdm_disconnect(cdm) +``` diff --git a/README.md b/README.md new file mode 100644 index 0000000..cd0460b --- /dev/null +++ b/README.md @@ -0,0 +1,186 @@ + + + +# CohortSurvival + +[![CRANstatus](https://www.r-pkg.org/badges/version/CohortSurvival)](https://CRAN.R-project.org/package=CohortSurvival) +[![codecov.io](https://codecov.io/github/darwin-eu/CohortSurvival/coverage.svg?branch=main)](https://codecov.io/github/darwin-eu/CohortSurvival?branch=main) +[![R-CMD-check](https://github.com/darwin-eu/CohortSurvival/workflows/R-CMD-check/badge.svg)](https://github.com/darwin-eu/CohortSurvival/actions) +[![Lifecycle:Experimental](https://img.shields.io/badge/Lifecycle-Experimental-339999)](https://lifecycle.r-lib.org/articles/stages.html) + +CohortSurvival contains functions for extracting and summarising +survival data using the OMOP common data model. + +## Installation + +You can install the development version of CohortSurvival like so: + +``` r +install.packages("remotes") +remotes::install_github("darwin-eu/CohortSurvival") +``` + +## Example usage + +### Create a reference to data in the OMOP CDM format + +The CohortSurvival package is designed to work with data in the OMOP CDM +format, so our first step is to create a reference to the data using the +CDMConnector package. + +``` r +library(CDMConnector) +library(CohortSurvival) +library(dplyr) +library(ggplot2) +``` + +Creating a connection to a Postgres database would for example look +like: + +``` r +con <- DBI::dbConnect(RPostgres::Postgres(), + dbname = Sys.getenv("CDM5_POSTGRESQL_DBNAME"), + host = Sys.getenv("CDM5_POSTGRESQL_HOST"), + user = Sys.getenv("CDM5_POSTGRESQL_USER"), + password = Sys.getenv("CDM5_POSTGRESQL_PASSWORD") +) + +cdm <- CDMConnector::cdm_from_con(con, + cdm_schema = Sys.getenv("CDM5_POSTGRESQL_CDM_SCHEMA"), + write_schema = Sys.getenv("CDM5_POSTGRESQL_RESULT_SCHEMA") +) +``` + +### Example: MGUS + +For this example we´ll use a cdm reference containing the MGUS2 dataset +from the survival package (which we transformed into a set of OMOP CDM +style cohort tables). The mgus2 dataset contains survival data of 1341 +sequential patients with monoclonal gammopathy of undetermined +significance (MGUS). For more information see ´?survival::mgus2´ + +``` r +cdm <- CohortSurvival::mockMGUS2cdm() +``` + +In this example cdm reference we have three cohort tables of +interest: 1) MGUS diagnosis cohort + +``` r +cdm$mgus_diagnosis %>% + glimpse() +#> Rows: ?? +#> Columns: 10 +#> Database: DuckDB 0.8.1 [eburn@Windows 10 x64:R 4.2.1/:memory:] +#> $ cohort_definition_id 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1… +#> $ subject_id 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15… +#> $ cohort_start_date 1981-01-01, 1968-01-01, 1980-01-01, 1977-01-01, … +#> $ cohort_end_date 1981-01-01, 1968-01-01, 1980-01-01, 1977-01-01, … +#> $ age 88, 78, 94, 68, 90, 90, 89, 87, 86, 79, 86, 89, 8… +#> $ sex F, F, M, M, F, M, F, F, F, F, M, F, M, F, M, F, F… +#> $ hgb 13.1, 11.5, 10.5, 15.2, 10.7, 12.9, 10.5, 12.3, 1… +#> $ creat 1.30, 1.20, 1.50, 1.20, 0.80, 1.00, 0.90, 1.20, 0… +#> $ mspike 0.5, 2.0, 2.6, 1.2, 1.0, 0.5, 1.3, 1.6, 2.4, 2.3,… +#> $ age_group ">=70", ">=70", ">=70", "<70", ">=70", ">=70", ">… +``` + +2) MGUS progression cohort + +``` r +cdm$progression %>% + glimpse() +#> Rows: ?? +#> Columns: 4 +#> Database: DuckDB 0.8.1 [eburn@Windows 10 x64:R 4.2.1/:memory:] +#> $ cohort_definition_id 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1… +#> $ subject_id 56, 81, 83, 111, 124, 127, 147, 163, 165, 167, 18… +#> $ cohort_start_date 1978-01-30, 1985-01-15, 1974-08-17, 1993-01-14, … +#> $ cohort_end_date 1978-01-30, 1985-01-15, 1974-08-17, 1993-01-14, … +``` + +3) Death cohort + +``` r +cdm$death_cohort %>% + glimpse() +#> Rows: ?? +#> Columns: 4 +#> Database: DuckDB 0.8.1 [eburn@Windows 10 x64:R 4.2.1/:memory:] +#> $ cohort_definition_id 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1… +#> $ subject_id 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 1… +#> $ cohort_start_date 1981-01-31, 1968-01-26, 1980-02-16, 1977-04-03, … +#> $ cohort_end_date 1981-01-31, 1968-01-26, 1980-02-16, 1977-04-03, … +``` + +### MGUS diagnosis to death + +We can get survival estimates for death following diagnosis like so: + +``` r +MGUS_death <- estimateSingleEventSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1 +) + +plotSurvival(MGUS_death) +``` + + + +### Stratified results + +``` r +MGUS_death <- estimateSingleEventSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1, + strata = list(c("age_group"), + c("sex"), + c("age_group", "sex")) +) + +plotSurvival(MGUS_death, + colour = "strata_level", + facet= "strata_name") +``` + + + +### Summary statisitics on survival + +For single event analyses, we can extract restricted mean survival and +median survival + +``` r +MGUS_death %>% dplyr::filter(estimate_type == "Survival summary") %>% + tidyr::pivot_wider(names_from = "variable_type", values_from = "estimate") %>% + dplyr::mutate("Restricted mean survival (se)" = paste0(round(restricted_mean), " (", round(restricted_mean_std_error, 2), ")"), + "Median survival (95% CI)" = paste0(median_survival, " (", median_survival_95CI_lower, " to ", median_survival_95CI_higher, ")") + ) %>% + dplyr::select(strata_name, strata_level, + "Restricted mean survival (se)", "Median survival (95% CI)") +#> # A tibble: 9 × 4 +#> strata_name strata_level Restricted mean surviv…¹ Median survival (95%…² +#> +#> 1 Overall Overall 133 (4.34) 98 (92 to 103) +#> 2 age_group <70 197 (8.35) 180 (158 to 206) +#> 3 age_group >=70 86 (2.89) 71 (66 to 77) +#> 4 sex F 143 (6.4) 108 (100 to 121) +#> 5 sex M 125 (5.7) 88 (79 to 97) +#> 6 age_group and sex <70 and F 220 (12.97) 215 (179 to 260) +#> 7 age_group and sex <70 and M 183 (10.24) 158 (139 to 189) +#> 8 age_group and sex >=70 and F 96 (4.39) 82 (75 to 94) +#> 9 age_group and sex >=70 and M 80 (4.82) 61 (54 to 70) +#> # ℹ abbreviated names: ¹​`Restricted mean survival (se)`, +#> # ²​`Median survival (95% CI)` +``` + +### Disconnect from the cdm database connection + +``` r +cdm_disconnect(cdm) +``` diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..8e4f7f7 --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,8 @@ +url: https://darwin-eu.github.io/CohortSurvival/ +template: + bootstrap: 5 + bootswatch: flatly +navbar: + structure: + right: [github] + diff --git a/man/CohortSurvival-package.Rd b/man/CohortSurvival-package.Rd new file mode 100644 index 0000000..4d24470 --- /dev/null +++ b/man/CohortSurvival-package.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CohortSurvival-package.R +\docType{package} +\name{CohortSurvival-package} +\alias{CohortSurvival} +\alias{CohortSurvival-package} +\title{CohortSurvival: What the Package Does (One Line, Title Case)} +\description{ +What the package does (one paragraph). +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://darwin-eu.github.io/CohortSurvival/} +} + +} +\author{ +\strong{Maintainer}: First Last \email{first.last@example.com} (\href{https://orcid.org/YOUR-ORCID-ID}{ORCID}) + +} +\keyword{internal} diff --git a/man/addCohortSurvival.Rd b/man/addCohortSurvival.Rd new file mode 100644 index 0000000..4f9c431 --- /dev/null +++ b/man/addCohortSurvival.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/addCohortSurvival.R +\name{addCohortSurvival} +\alias{addCohortSurvival} +\title{Add survival information to a cohort table} +\usage{ +addCohortSurvival( + x, + cdm, + outcomeCohortTable, + outcomeCohortId = 1, + outcomeDateVariable = "cohort_start_date", + censorOnCohortExit = FALSE, + censorOnDate = NULL, + followUpDays = Inf +) +} +\arguments{ +\item{x}{cohort table to add survival information} + +\item{cdm}{CDM reference} + +\item{outcomeCohortTable}{The outcome cohort table of interest.} + +\item{outcomeCohortId}{ID of event cohorts to include. Only one outcome +(and so one ID) can be considered.} + +\item{outcomeDateVariable}{Variable containing date of outcome event} + +\item{censorOnCohortExit}{If TRUE, an individual's follow up will be +censored at their cohort exit} + +\item{censorOnDate}{if not NULL, an individual's follow up will be censored +at the given date} + +\item{followUpDays}{Number of days to follow up individuals (lower bound 1, +upper bound Inf)} +} +\value{ +Two additional columns will be added to x. The "time" column will +contain number of days to censoring. The "status" column will indicate +whether the patient had the event (value: 1), or did not have the event +(value: 0) +} +\description{ +Add survival information to a cohort table +} +\examples{ +cohort <- dplyr::tibble( +cohort_definition_id = c(1,1,1), +subject_id = c(1,2,3), +cohort_start_date = c(as.Date("2020-01-01"), + as.Date("2020-01-02"), + as.Date("2020-01-01")), +cohort_end_date = c(as.Date("2020-04-01"), + as.Date("2020-08-02"), + as.Date("2020-03-01")) +) +cdm <- PatientProfiles::mockPatientProfiles(cohort1 = cohort) +cdm$cohort1 <- cdm$cohort1 \%>\% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort2", + outcomeCohortId = 1 + ) + +} diff --git a/man/benchmarkCohortSurvival.Rd b/man/benchmarkCohortSurvival.Rd new file mode 100644 index 0000000..169993d --- /dev/null +++ b/man/benchmarkCohortSurvival.Rd @@ -0,0 +1,79 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/benchmarkCohortSurvival.R +\name{benchmarkCohortSurvival} +\alias{benchmarkCohortSurvival} +\title{Estimate performance of estimateSurvival function for benchmarking} +\usage{ +benchmarkCohortSurvival( + cdm, + targetSize, + outcomeSize, + outcomeDateVariable = "cohort_start_date", + competingOutcomeSize = NULL, + competingOutcomeDateVariable = "cohort_start_date", + censorOnCohortExit = FALSE, + censorOnDate = NULL, + followUpDays = Inf, + strata = NULL, + timeGap = c(1, 7, 30, 365), + times = NULL, + minCellCount = 5, + returnParticipants = FALSE +) +} +\arguments{ +\item{cdm}{CDM reference} + +\item{targetSize}{number of people in the target cohort table} + +\item{outcomeSize}{number of people in the outcome cohort table} + +\item{outcomeDateVariable}{Variable containing date of outcome event} + +\item{competingOutcomeSize}{number of people in the competing outcome cohort table} + +\item{competingOutcomeDateVariable}{Variable containing date of +competing event} + +\item{censorOnCohortExit}{If TRUE, an individual's follow up will be +censored at their cohort exit} + +\item{censorOnDate}{if not NULL, an individual's follow up will be censored +at the given date} + +\item{followUpDays}{Number of days to follow up individuals (lower bound 1, +upper bound Inf)} + +\item{strata}{strata} + +\item{timeGap}{Days between time points for which to report survival +estimates. First day will be day zero with risk estimates provided +for times up to the end of follow-up, with a gap in days equivalent +to timeGap.} + +\item{times}{vector of time points at which to give survival estimates, +if NULL estimates at all times are calculated} + +\item{minCellCount}{The minimum number of events to reported, below which +results will be obscured. If 0, all results will be reported.} + +\item{returnParticipants}{Either TRUE or FALSE. If TRUE, references to +participants from the analysis will be returned allowing for further +analysis.} +} +\value{ +tibble with performance of estimateSurvival function information, +according to the selected input parameters +} +\description{ +Estimate performance of estimateSurvival function for benchmarking +} +\examples{ +cdm <- mockMGUS2cdm() +cdm$condition_occurrence <- cdm$death_cohort \%>\% +dplyr::rename("condition_start_date" = "cohort_start_date", + "condition_end_date" = "cohort_end_date") +surv_timings <- benchmarkCohortSurvival( +cdm, targetSize = 100, outcomeSize = 20) + +} diff --git a/man/estimateSingleEventSurvival.Rd b/man/estimateSingleEventSurvival.Rd new file mode 100644 index 0000000..17ecd13 --- /dev/null +++ b/man/estimateSingleEventSurvival.Rd @@ -0,0 +1,83 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/estimateSurvival.R +\name{estimateSingleEventSurvival} +\alias{estimateSingleEventSurvival} +\title{Estimate survival for a given event of interest using cohorts in the OMOP Common Data Model} +\usage{ +estimateSingleEventSurvival( + cdm, + targetCohortTable, + targetCohortId = 1, + outcomeCohortTable, + outcomeCohortId = 1, + outcomeDateVariable = "cohort_start_date", + censorOnCohortExit = FALSE, + censorOnDate = NULL, + followUpDays = Inf, + strata = NULL, + timeGap = c(1, 7, 30, 365), + times = NULL, + minCellCount = 5, + returnParticipants = FALSE +) +} +\arguments{ +\item{cdm}{CDM reference} + +\item{targetCohortTable}{targetCohortTable} + +\item{targetCohortId}{targetCohortId} + +\item{outcomeCohortTable}{The outcome cohort table of interest.} + +\item{outcomeCohortId}{ID of event cohorts to include. Only one outcome +(and so one ID) can be considered.} + +\item{outcomeDateVariable}{Variable containing date of outcome event} + +\item{censorOnCohortExit}{If TRUE, an individual's follow up will be +censored at their cohort exit} + +\item{censorOnDate}{if not NULL, an individual's follow up will be censored +at the given date} + +\item{followUpDays}{Number of days to follow up individuals (lower bound 1, +upper bound Inf)} + +\item{strata}{strata} + +\item{timeGap}{Days between time points for which to report survival +estimates. First day will be day zero with risk estimates provided +for times up to the end of follow-up, with a gap in days equivalent +to timeGap.} + +\item{times}{vector of time points at which to give survival estimates, +if NULL estimates at all times are calculated} + +\item{minCellCount}{The minimum number of events to reported, below which +results will be obscured. If 0, all results will be reported.} + +\item{returnParticipants}{Either TRUE or FALSE. If TRUE, references to +participants from the analysis will be returned allowing for further +analysis.} +} +\value{ +tibble with survival information for desired cohort, including: +time, people at risk, survival probability, cumulative incidence, +95 CIs, strata and outcome. A tibble with the number of events is +outputted as an attribute of the output +} +\description{ +Estimate survival for a given event of interest using cohorts in the OMOP Common Data Model +} +\examples{ +cdm <- mockMGUS2cdm() +surv <- estimateSingleEventSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1, + timeGap = 7 +) + +} diff --git a/man/figures/README-pressure-1.png b/man/figures/README-pressure-1.png new file mode 100644 index 0000000..c092055 Binary files /dev/null and b/man/figures/README-pressure-1.png differ diff --git a/man/figures/README-unnamed-chunk-10-1.png b/man/figures/README-unnamed-chunk-10-1.png new file mode 100644 index 0000000..861d862 Binary files /dev/null and b/man/figures/README-unnamed-chunk-10-1.png differ diff --git a/man/figures/README-unnamed-chunk-5-1.png b/man/figures/README-unnamed-chunk-5-1.png new file mode 100644 index 0000000..849034a Binary files /dev/null and b/man/figures/README-unnamed-chunk-5-1.png differ diff --git a/man/figures/README-unnamed-chunk-8-1.png b/man/figures/README-unnamed-chunk-8-1.png new file mode 100644 index 0000000..07076c0 Binary files /dev/null and b/man/figures/README-unnamed-chunk-8-1.png differ diff --git a/man/figures/README-unnamed-chunk-9-1.png b/man/figures/README-unnamed-chunk-9-1.png new file mode 100644 index 0000000..861d862 Binary files /dev/null and b/man/figures/README-unnamed-chunk-9-1.png differ diff --git a/man/figures/hexsticker.png b/man/figures/hexsticker.png new file mode 100644 index 0000000..5d56b6b Binary files /dev/null and b/man/figures/hexsticker.png differ diff --git a/man/generateDeathCohortSet.Rd b/man/generateDeathCohortSet.Rd new file mode 100644 index 0000000..436e8ce --- /dev/null +++ b/man/generateDeathCohortSet.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generateDeathCohort.R +\name{generateDeathCohortSet} +\alias{generateDeathCohortSet} +\title{To create a death cohort} +\usage{ +generateDeathCohortSet( + cdm, + deathInObservation = FALSE, + name = "death_cohort", + cohortTable = NULL, + cohortId = NULL +) +} +\arguments{ +\item{cdm}{CDM reference} + +\item{deathInObservation}{If TRUE, restricts deaths included to only those +observed during an ongoing observation period.} + +\item{name}{name for the created death cohort table} + +\item{cohortTable}{name of the cohort table to create a death cohort for} + +\item{cohortId}{name of the cohort table to create a death cohort for} +} +\value{ +A cohort table with a death cohort in cdm +} +\description{ +To create a death cohort +} diff --git a/man/mockMGUS2cdm.Rd b/man/mockMGUS2cdm.Rd new file mode 100644 index 0000000..21342a3 --- /dev/null +++ b/man/mockMGUS2cdm.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mockMGUS2cdm.R +\name{mockMGUS2cdm} +\alias{mockMGUS2cdm} +\title{Create mock CDM reference with survival::mgus2 dataset} +\usage{ +mockMGUS2cdm() +} +\value{ +CDM reference containing data from the survival::mgus2 dataset +} +\description{ +Create mock CDM reference with survival::mgus2 dataset +} diff --git a/man/pipe.Rd b/man/pipe.Rd new file mode 100644 index 0000000..a648c29 --- /dev/null +++ b/man/pipe.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-pipe.R +\name{\%>\%} +\alias{\%>\%} +\title{Pipe operator} +\usage{ +lhs \%>\% rhs +} +\arguments{ +\item{lhs}{A value or the magrittr placeholder.} + +\item{rhs}{A function call using the magrittr semantics.} +} +\value{ +The result of calling \code{rhs(lhs)}. +} +\description{ +See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +} +\keyword{internal} diff --git a/man/plotCumulativeIncidence.Rd b/man/plotCumulativeIncidence.Rd new file mode 100644 index 0000000..9fd3c27 --- /dev/null +++ b/man/plotCumulativeIncidence.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotSurvival.R +\name{plotCumulativeIncidence} +\alias{plotCumulativeIncidence} +\title{Plot cumulative incidence} +\usage{ +plotCumulativeIncidence( + result, + x = "time", + ylim = c(0, NA), + ribbon = TRUE, + facet = NULL, + colour = NULL, + colour_name = NULL +) +} +\arguments{ +\item{result}{Survival results} + +\item{x}{Variable to plot on x axis} + +\item{ylim}{Limits for the Y axis} + +\item{ribbon}{If TRUE, the plot will join points using a ribbon} + +\item{facet}{Variables to use for facets} + +\item{colour}{Variables to use for colours} + +\item{colour_name}{Colour legend name} +} +\description{ +Plot cumulative incidence +} diff --git a/man/plotSurvival.Rd b/man/plotSurvival.Rd new file mode 100644 index 0000000..f2eac97 --- /dev/null +++ b/man/plotSurvival.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotSurvival.R +\name{plotSurvival} +\alias{plotSurvival} +\title{Plot survival results} +\usage{ +plotSurvival( + result, + x = "time", + ylim = c(0, NA), + ribbon = TRUE, + facet = NULL, + colour = NULL, + colour_name = NULL +) +} +\arguments{ +\item{result}{Survival results} + +\item{x}{Variable to plot on x axis} + +\item{ylim}{Limits for the Y axis} + +\item{ribbon}{If TRUE, the plot will join points using a ribbon} + +\item{facet}{Variables to use for facets} + +\item{colour}{Variables to use for colours} + +\item{colour_name}{Colour legend name} +} +\value{ +A ggplot with the survival results plotted +} +\description{ +Plot survival results +} diff --git a/man/reexports.Rd b/man/reexports.Rd new file mode 100644 index 0000000..2042559 --- /dev/null +++ b/man/reexports.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summariseSurvivalParticipants.R +\docType{import} +\name{reexports} +\alias{reexports} +\alias{summariseCharacteristics} +\title{Objects exported from other packages} +\keyword{internal} +\description{ +These objects are imported from other packages. Follow the links +below to see their documentation. + +\describe{ + \item{PatientProfiles}{\code{\link[PatientProfiles]{summariseCharacteristics}}} +}} + diff --git a/man/survivalParticipants.Rd b/man/survivalParticipants.Rd new file mode 100644 index 0000000..42595bd --- /dev/null +++ b/man/survivalParticipants.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{survivalParticipants} +\alias{survivalParticipants} +\title{Participants contributing to a survival analysis} +\usage{ +survivalParticipants(result) +} +\arguments{ +\item{result}{Result object} +} +\value{ +References to the study participants contributing to +a given analysis +} +\description{ +Participants contributing to a survival analysis +} diff --git a/man/tidyeval.Rd b/man/tidyeval.Rd new file mode 100644 index 0000000..f773abf --- /dev/null +++ b/man/tidyeval.Rd @@ -0,0 +1,98 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-tidy-eval.R +\name{tidyeval} +\alias{tidyeval} +\alias{enquo} +\alias{enquos} +\alias{.data} +\alias{:=} +\alias{as_name} +\alias{as_label} +\title{Tidy eval helpers} +\description{ +This page lists the tidy eval tools reexported in this package from +rlang. To learn about using tidy eval in scripts and packages at a +high level, see the \href{https://dplyr.tidyverse.org/articles/programming.html}{dplyr programming vignette} +and the \href{https://ggplot2.tidyverse.org/articles/ggplot2-in-packages.html}{ggplot2 in packages vignette}. +The \href{https://adv-r.hadley.nz/metaprogramming.html}{Metaprogramming section} of \href{https://adv-r.hadley.nz}{Advanced R} may also be useful for a deeper dive. +\itemize{ +\item The tidy eval operators \verb{\{\{}, \verb{!!}, and \verb{!!!} are syntactic +constructs which are specially interpreted by tidy eval functions. +You will mostly need \verb{\{\{}, as \verb{!!} and \verb{!!!} are more advanced +operators which you should not have to use in simple cases. + +The curly-curly operator \verb{\{\{} allows you to tunnel data-variables +passed from function arguments inside other tidy eval functions. +\verb{\{\{} is designed for individual arguments. To pass multiple +arguments contained in dots, use \code{...} in the normal way. + +\if{html}{\out{
}}\preformatted{my_function <- function(data, var, ...) \{ + data \%>\% + group_by(...) \%>\% + summarise(mean = mean(\{\{ var \}\})) +\} +}\if{html}{\out{
}} +\item \code{\link[=enquo]{enquo()}} and \code{\link[=enquos]{enquos()}} delay the execution of one or several +function arguments. The former returns a single expression, the +latter returns a list of expressions. Once defused, expressions +will no longer evaluate on their own. They must be injected back +into an evaluation context with \verb{!!} (for a single expression) and +\verb{!!!} (for a list of expressions). + +\if{html}{\out{
}}\preformatted{my_function <- function(data, var, ...) \{ + # Defuse + var <- enquo(var) + dots <- enquos(...) + + # Inject + data \%>\% + group_by(!!!dots) \%>\% + summarise(mean = mean(!!var)) +\} +}\if{html}{\out{
}} + +In this simple case, the code is equivalent to the usage of \verb{\{\{} +and \code{...} above. Defusing with \code{enquo()} or \code{enquos()} is only +needed in more complex cases, for instance if you need to inspect +or modify the expressions in some way. +\item The \code{.data} pronoun is an object that represents the current +slice of data. If you have a variable name in a string, use the +\code{.data} pronoun to subset that variable with \code{[[}. + +\if{html}{\out{
}}\preformatted{my_var <- "disp" +mtcars \%>\% summarise(mean = mean(.data[[my_var]])) +}\if{html}{\out{
}} +\item Another tidy eval operator is \verb{:=}. It makes it possible to use +glue and curly-curly syntax on the LHS of \code{=}. For technical +reasons, the R language doesn't support complex expressions on +the left of \code{=}, so we use \verb{:=} as a workaround. + +\if{html}{\out{
}}\preformatted{my_function <- function(data, var, suffix = "foo") \{ + # Use `\{\{` to tunnel function arguments and the usual glue + # operator `\{` to interpolate plain strings. + data \%>\% + summarise("\{\{ var \}\}_mean_\{suffix\}" := mean(\{\{ var \}\})) +\} +}\if{html}{\out{
}} +\item Many tidy eval functions like \code{dplyr::mutate()} or +\code{dplyr::summarise()} give an automatic name to unnamed inputs. If +you need to create the same sort of automatic names by yourself, +use \code{as_label()}. For instance, the glue-tunnelling syntax above +can be reproduced manually with: + +\if{html}{\out{
}}\preformatted{my_function <- function(data, var, suffix = "foo") \{ + var <- enquo(var) + prefix <- as_label(var) + data \%>\% + summarise("\{prefix\}_mean_\{suffix\}" := mean(!!var)) +\} +}\if{html}{\out{
}} + +Expressions defused with \code{enquo()} (or tunnelled with \verb{\{\{}) need +not be simple column names, they can be arbitrarily complex. +\code{as_label()} handles those cases gracefully. If your code assumes +a simple column name, use \code{as_name()} instead. This is safer +because it throws an error if the input is not a name as expected. +} +} +\keyword{internal} diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..ebaddb7 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,12 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/tests.html +# * https://testthat.r-lib.org/reference/test_package.html#special-files + +library(testthat) +library(CohortSurvival) + +test_check("CohortSurvival") diff --git a/tests/testthat/test-addCohortSurvival.R b/tests/testthat/test-addCohortSurvival.R new file mode 100644 index 0000000..432a033 --- /dev/null +++ b/tests/testthat/test-addCohortSurvival.R @@ -0,0 +1,598 @@ +test_that("working example", { + cohort <- dplyr::tibble( + cohort_definition_id = c(1,1,1), + subject_id = c(1,2,3), + cohort_start_date = c(as.Date("2020-01-01"), + as.Date("2020-01-02"), + as.Date("2020-01-01")), + cohort_end_date = c(as.Date("2020-04-01"), + as.Date("2020-08-02"), + as.Date("2020-03-01")) + ) + observation_period <- dplyr::tibble( + observation_period_id = c(1,1,1), + person_id = c(1,2,3), + observation_period_start_date = c(as.Date("2000-01-01"), + as.Date("2000-01-02"), + as.Date("2000-01-01")), + observation_period_end_date = c(as.Date("2023-04-01"), + as.Date("2023-05-02"), + as.Date("2023-03-01")) + ) + cdm <- PatientProfiles::mockPatientProfiles( + cohort1 = cohort, + observation_period = observation_period + ) + cdm$cohort1 <- cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort2", + outcomeCohortId = 1 + ) + expect_true(all(colnames(cdm$cohort1) %in% + c( + "cohort_definition_id", "subject_id", + "cohort_start_date", "cohort_end_date", + "days_to_exit", "status", "time" + ))) + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) +}) + +test_that("another working example", { + if (Sys.getenv("EUNOMIA_DATA_FOLDER") == "") { + Sys.setenv("EUNOMIA_DATA_FOLDER" = tempdir()) + } + skip_if_not(CDMConnector::eunomia_is_available()) + + con <- DBI::dbConnect(duckdb::duckdb(), dbdir = CDMConnector::eunomia_dir()) + cdm <- CDMConnector::cdm_from_con(con, cdm_schema = "main", write_schema = "main") + + celecoxibCodes <- CodelistGenerator::getDescendants(cdm, conceptId = 1118084) + cdm$celecoxib <- cdm$drug_era %>% + dplyr::inner_join( + celecoxibCodes %>% + dplyr::select(concept_id), + by = c("drug_concept_id" = "concept_id"), + copy = TRUE + ) %>% + PatientProfiles::addAge(cdm, indexDate = "drug_era_start_date") %>% + dplyr::rename( + "subject_id" = "person_id", + "cohort_start_date" = "drug_era_start_date", + "cohort_end_date" = "drug_era_end_date" + ) %>% + dplyr::mutate(cohort_definition_id = 1L) %>% + dplyr::select( + "cohort_definition_id", "subject_id", + "cohort_start_date", "cohort_end_date" + ) %>% + CDMConnector::computeQuery(name = "celecoxib", temporary = FALSE, schema = attr(cdm, "write_schema")) + celecoxib_set <- cdm$celecoxib %>% + dplyr::select("cohort_definition_id") %>% + dplyr::mutate(cohort_name = "celecoxib") + celecoxib_count <- cdm$celecoxib %>% + dplyr::group_by(cohort_definition_id) %>% + dplyr::tally(name = "n_records") + celecoxib_count <- cdm$celecoxib %>% + dplyr::group_by(.data$cohort_definition_id) %>% + dplyr::summarise( + number_records = dplyr::n(), + number_subjects = dplyr::n_distinct(.data$subject_id), + .groups = "drop" + ) %>% + dplyr::collect() + + celecoxib_set <- cdm$celecoxib %>% + dplyr::select("cohort_definition_id") %>% + dplyr::distinct() %>% + dplyr::mutate(cohort_name = "celecoxib") %>% + dplyr::collect() + + cdm$celecoxib <- CDMConnector::newGeneratedCohortSet(cohortRef = cdm$celecoxib, + cohortSetRef = celecoxib_set, + cohortCountRef = celecoxib_count) + + + GiBleedCodes <- CodelistGenerator::getDescendants(cdm, conceptId = 192671) + cdm$gi_bleed <- cdm$condition_occurrence %>% + dplyr::inner_join( + GiBleedCodes %>% + dplyr::select(concept_id), + by = c("condition_concept_id" = "concept_id"), + copy = TRUE + ) %>% + dplyr::rename( + "subject_id" = "person_id", + "cohort_start_date" = "condition_start_date" + ) %>% + dplyr::mutate(cohort_end_date = cohort_start_date) %>% + dplyr::mutate(cohort_definition_id = 1L) %>% + dplyr::select( + "cohort_definition_id", "subject_id", + "cohort_start_date", "cohort_end_date" + ) %>% + dplyr::compute() + gi_bleed_count <- cdm$gi_bleed %>% + dplyr::group_by(.data$cohort_definition_id) %>% + dplyr::summarise( + number_records = dplyr::n(), + number_subjects = dplyr::n_distinct(.data$subject_id), + .groups = "drop" + ) %>% + dplyr::collect() + gi_bleed_set <- cdm$gi_bleed %>% + dplyr::select("cohort_definition_id") %>% + dplyr::distinct() %>% + dplyr::mutate(cohort_name = "gi_bleed") %>% + dplyr::collect() + + cdm$gi_bleed <- CDMConnector::newGeneratedCohortSet(cohortRef = cdm$gi_bleed, + cohortSetRef = gi_bleed_set, + cohortCountRef = gi_bleed_count) + + cdm$celecoxib <- cdm$celecoxib %>% + PatientProfiles::addAge(cdm = cdm) %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "gi_bleed" + ) + + expect_true(all(c("time", "status") %in% colnames(cdm$celecoxib))) + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) +}) + +test_that("censorOnCohortExit", { + cohort <- dplyr::tibble( + cohort_definition_id = c(1,1,1), + subject_id = c(1,2,3), + cohort_start_date = c(as.Date("2020-01-01"), + as.Date("2020-01-02"), + as.Date("2020-01-01")), + cohort_end_date = c(as.Date("2020-04-01"), + as.Date("2020-08-02"), + as.Date("2020-03-01")) + ) + events <- dplyr::tibble( + cohort_definition_id = c(1,1,2), + subject_id = c(1,2,3), + cohort_start_date = c(as.Date("2019-01-01"), + as.Date("2020-01-05"), + as.Date("2020-01-01")), + cohort_end_date = c(as.Date("2019-01-01"), + as.Date("2020-01-05"), + as.Date("2020-01-01")), + ) + observation_period <- dplyr::tibble( + observation_period_id = c(1,1,1), + person_id = c(1,2,3), + observation_period_start_date = c(as.Date("2000-01-01"), + as.Date("2000-01-02"), + as.Date("2000-01-01")), + observation_period_end_date = c(as.Date("2023-04-01"), + as.Date("2023-05-02"), + as.Date("2023-03-01")) + ) + cdm <- PatientProfiles::mockPatientProfiles( + cohort1 = cohort, + cohort2 = events, + observation_period = observation_period + ) + cohortNoCensorExit <- cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort2", + outcomeCohortId = 1 + ) %>% + dplyr::arrange(subject_id) + cohortCensorExit <- cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort2", + outcomeCohortId = 1, + censorOnCohortExit = TRUE + ) %>% + dplyr::arrange(subject_id) + + compareNA <- function(v1, v2) { + same <- (v1 == v2) | (is.na(v1) & is.na(v2)) + same[is.na(same)] <- FALSE + return(same) + } + + expect_true(all(compareNA(cohortNoCensorExit %>% + dplyr::select(status) %>% + dplyr::pull(), + cohortCensorExit %>% + dplyr::select(status) %>% + dplyr::pull()))) + expect_true(all(compareNA(cohortNoCensorExit %>% + dplyr::select(time) %>% + dplyr::pull(), + c(NA, 3, 1155)))) + expect_true(all(compareNA(cohortNoCensorExit %>% + dplyr::select(days_to_exit) %>% + dplyr::pull(), + c(1186, 1216, 1155)))) + expect_true(all(compareNA(cohortCensorExit %>% + dplyr::select(time) %>% + dplyr::pull(), + c(NA, 3, 60)))) + expect_true(all(compareNA(cohortCensorExit %>% + dplyr::select(days_to_exit) %>% + dplyr::pull(), + c(91, 213, 60)))) + + CDMConnector::cdmDisconnect(cdm) +}) + +test_that("censorOnDate", { + cohort <- dplyr::tibble( + cohort_definition_id = c(1,1,1), + subject_id = c(1,2,3), + cohort_start_date = c(as.Date("2020-01-01"), + as.Date("2020-01-02"), + as.Date("2021-01-01")), + cohort_end_date = c(as.Date("2020-04-01"), + as.Date("2020-08-02"), + as.Date("2021-03-01")) + ) + events <- dplyr::tibble( + cohort_definition_id = c(1,1,1), + subject_id = c(1,2,3), + cohort_start_date = c(as.Date("2019-01-01"), + as.Date("2020-01-05"), + as.Date("2020-01-01")), + cohort_end_date = c(as.Date("2019-01-01"), + as.Date("2020-01-05"), + as.Date("2020-01-01")), + ) + observation_period <- dplyr::tibble( + observation_period_id = c(1,1,1), + person_id = c(1,2,3), + observation_period_start_date = c(as.Date("2000-01-01"), + as.Date("2000-01-02"), + as.Date("2000-01-01")), + observation_period_end_date = c(as.Date("2023-04-01"), + as.Date("2023-05-02"), + as.Date("2023-03-01")) + ) + cdm <- PatientProfiles::mockPatientProfiles( + cohort1 = cohort, + cohort2 = events, + observation_period = observation_period + ) + cohortCensorDate <- cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort2", + outcomeCohortId = 1, + censorOnDate = as.Date("2020-01-04") + ) %>% + dplyr::arrange(subject_id) + + compareNA <- function(v1, v2) { + same <- (v1 == v2) | (is.na(v1) & is.na(v2)) + same[is.na(same)] <- FALSE + return(same) + } + + expect_true(all(compareNA(cohortCensorDate %>% + dplyr::select(status) %>% + dplyr::pull(), + c(NA, 0, NA)))) + expect_true(all(compareNA(cohortCensorDate %>% + dplyr::select(days_to_exit) %>% + dplyr::pull(), + c(3, 2, -363)))) + expect_true(all(compareNA(cohortCensorDate %>% + dplyr::select(time) %>% + dplyr::pull(), + c(NA, 2, NA)))) + + CDMConnector::cdmDisconnect(cdm) +}) + +test_that("followUpDays", { + cohort <- dplyr::tibble( + cohort_definition_id = c(1,1,1), + subject_id = c(1,2,3), + cohort_start_date = c(as.Date("2020-01-01"), + as.Date("2020-01-02"), + as.Date("2020-01-01")), + cohort_end_date = c(as.Date("2020-04-01"), + as.Date("2020-08-02"), + as.Date("2020-01-06")) + ) + events <- dplyr::tibble( + cohort_definition_id = c(1,1,1), + subject_id = c(1,2,3), + cohort_start_date = c(as.Date("2020-01-30"), + as.Date("2020-01-05"), + as.Date("2020-01-11")), + cohort_end_date = c(as.Date("2020-01-30"), + as.Date("2020-01-05"), + as.Date("2020-01-11")), + ) + observation_period <- dplyr::tibble( + observation_period_id = c(1,1,1), + person_id = c(1,2,3), + observation_period_start_date = c(as.Date("2000-01-01"), + as.Date("2000-01-02"), + as.Date("2000-01-01")), + observation_period_end_date = c(as.Date("2023-04-01"), + as.Date("2023-05-02"), + as.Date("2023-03-01")) + ) + cdm <- PatientProfiles::mockPatientProfiles( + cohort1 = cohort, + cohort2 = events, + observation_period = observation_period + ) + cohortFollowUp <- cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort2", + outcomeCohortId = 1, + followUp = 20 + ) %>% + dplyr::arrange(subject_id) + cohortFUandCE <- cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort2", + outcomeCohortId = 1, + followUp = 20, + censorOnCohortExit = TRUE + ) %>% + dplyr::arrange(subject_id) + + expect_true(all(cohortFollowUp %>% + dplyr::select(days_to_exit) %>% + dplyr::pull() == + c(20,20,20))) + expect_true(all(cohortFollowUp %>% + dplyr::select(status) %>% + dplyr::pull() == + c(0,1,1))) + expect_true(all(cohortFollowUp %>% + dplyr::select(time) %>% + dplyr::pull() == + c(20,3,10))) + + expect_true(all(cohortFUandCE %>% + dplyr::select(days_to_exit) %>% + dplyr::pull() == + c(20,20,5))) + expect_true(all(cohortFUandCE %>% + dplyr::select(status) %>% + dplyr::pull() == + c(0,1,0))) + expect_true(all(cohortFUandCE %>% + dplyr::select(time) %>% + dplyr::pull() == + c(20,3,5))) + + CDMConnector::cdmDisconnect(cdm) + }) + +test_that("expected errors", { + cdm <- PatientProfiles::mockPatientProfiles() + cdm[["cohort1"]] <- cdm[["cohort1"]] %>% + dplyr::filter(cohort_start_date != "2020-01-01") + + # check outcome cohort + # id that is not in the table + expect_error(cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort2", + outcomeCohortId = 10 + )) + # should only work for one cohort + expect_error(cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort2", + outcomeCohortId = c(1, 2) + )) + # user must provide a cohort id + expect_error(cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort2", + outcomeCohortId = NULL + )) + + # censorOnCohortExit must be logical + expect_error(cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort2", + outcomeCohortId = 1, + censorOnCohortExit = 1 + )) + + # followUpDays must be 1 or higher + expect_error(cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort2", + outcomeCohortId = 1, + followUpDays = -1 + )) + + expect_error(cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort2", + outcomeCohortId = 1, + followUpDays = 0 + )) + + # temporary must be logical + expect_error(cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort2", + outcomeCohortId = 1, + temporary = "maybe" + )) + + cdm <- PatientProfiles::mockPatientProfiles() + cdm[["cohort1"]] <- cdm[["cohort1"]] %>% + dplyr::group_by(subject_id) %>% + dplyr::filter(dplyr::row_number() == 1) + + # multiple cohort definition ids in exposure table + expect_error(cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort1", + outcomeCohortId = 1 + )) + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) +}) + +test_that("expected errors2 - index cohort to have one row per person", { + cdm <- PatientProfiles::mockPatientProfiles() + expect_error(cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort2", + outcomeCohortId = 1 + )) + CDMConnector::cdmDisconnect(cdm) +}) + +test_that("within cohort survival", { + cohort <- dplyr::tibble( + cohort_definition_id = c(1,1,1), + subject_id = c(1,2,3), + cohort_start_date = c(as.Date("2020-01-01"), + as.Date("2020-01-02"), + as.Date("2020-01-01")), + cohort_end_date = c(as.Date("2020-04-01"), + as.Date("2020-08-02"), + as.Date("2021-03-01")) + ) + observation_period <- dplyr::tibble( + observation_period_id = c(1,1,1), + person_id = c(1,2,3), + observation_period_start_date = c(as.Date("2000-01-01"), + as.Date("2000-01-02"), + as.Date("2000-01-01")), + observation_period_end_date = c(as.Date("2023-04-01"), + as.Date("2023-05-02"), + as.Date("2023-03-01")) + ) + cdm <- PatientProfiles::mockPatientProfiles( + cohort1 = cohort, + observation_period = observation_period + ) + + # default "cohort_start_date" + # if using the same cohort, status would be 1, time would be 0 for everyone + cdm$cohort1_start <- cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort1", + outcomeCohortId = 1, + outcomeDateVariable = "cohort_start_date" + ) + expect_true(all(cdm$cohort1_start %>% + dplyr::pull("time") == 0)) + expect_true(all(cdm$cohort1_start %>% + dplyr::pull("status") == 1)) + + cdm$cohort1_end <- cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort1", + outcomeCohortId = 1, + outcomeDateVariable = "cohort_end_date" + ) + + expect_true(all(cdm$cohort1_end %>% + dplyr::collect() %>% + dplyr::mutate(dtime = as.numeric(difftime(cohort_end_date, + cohort_start_date)), + equal = (dtime == time)) %>% + dplyr::pull("equal"))) + + # limit follow up + cdm$cohort1_b <- cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort1", + outcomeCohortId = 1, + outcomeDateVariable = "cohort_end_date", + followUpDays = 100 + ) + expect_true(all(cdm$cohort1_b %>% + dplyr::pull("time") == c(91,100,100))) + expect_true(all(cdm$cohort1_b %>% + dplyr::pull("status") == c(1,0,0))) + + CDMConnector::cdmDisconnect(cdm) +}) + +test_that("allow overwrite of time and status", { + cohort <- dplyr::tibble( + cohort_definition_id = c(1,1,1), + subject_id = c(1,2,3), + cohort_start_date = c(as.Date("2020-01-01"), + as.Date("2020-01-02"), + as.Date("2020-01-01")), + cohort_end_date = c(as.Date("2020-04-01"), + as.Date("2020-08-02"), + as.Date("2021-03-01")) + ) + observation_period <- dplyr::tibble( + observation_period_id = c(1,1,1), + person_id = c(1,2,3), + observation_period_start_date = c(as.Date("2000-01-01"), + as.Date("2000-01-02"), + as.Date("2000-01-01")), + observation_period_end_date = c(as.Date("2023-04-01"), + as.Date("2023-05-02"), + as.Date("2023-03-01")) + ) + cdm <- PatientProfiles::mockPatientProfiles( + cohort1 = cohort, + observation_period = observation_period + ) + + cdm$cohort1 <- cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort1", + ) + + cohort1_count <- cdm$cohort1 %>% + dplyr::group_by(.data$cohort_definition_id) %>% + dplyr::summarise( + number_records = dplyr::n(), + number_subjects = dplyr::n_distinct(.data$subject_id), + .groups = "drop" + ) + cohort1_set <- cdm$cohort1 %>% + dplyr::select("cohort_definition_id") %>% + dplyr::distinct() %>% + dplyr::mutate(cohort_name = "cohort1") + + # currently need to add attribute back to rerun + attr(cdm$cohort1, "set") <- cohort1_set + attr(cdm$cohort1, "count") <- cohort1_count + + cdm$cohort1 <- cdm$cohort1 %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "cohort1", + ) + expect_true(!is.null(cdm$cohort1)) + + CDMConnector::cdmDisconnect(cdm) +}) diff --git a/tests/testthat/test-benchmarkCohortSurvival.R b/tests/testthat/test-benchmarkCohortSurvival.R new file mode 100644 index 0000000..b5514c0 --- /dev/null +++ b/tests/testthat/test-benchmarkCohortSurvival.R @@ -0,0 +1,81 @@ +test_that("mgus example: benchmark", { + cdm <- mockMGUS2cdm() + cdm$condition_occurrence <- cdm$death_cohort %>% + dplyr::rename("condition_start_date" = "cohort_start_date", + "condition_end_date" = "cohort_end_date") + cdm$drug_exposure <- cdm$progression %>% + dplyr::rename("drug_exposure_start_date" = "cohort_start_date", + "drug_exposure_end_date" = "cohort_end_date") + timings <- benchmarkCohortSurvival(cdm, targetSize = 1000, outcomeSize = 47) + timings_p <- benchmarkCohortSurvival(cdm, targetSize = 1000, outcomeSize = 47, returnParticipants = TRUE) + timings_s <- benchmarkCohortSurvival(cdm, targetSize = 100, outcomeSize = 5, strata = list("sex" = c("sex"))) + timings2 <- benchmarkCohortSurvival(cdm, targetSize = 1000, outcomeSize = 8) + timings3 <- benchmarkCohortSurvival(cdm, targetSize = 100, outcomeSize = 23) + timings4 <- benchmarkCohortSurvival(cdm, targetSize = 100, outcomeSize = 23, outcomeDateVariable = "cohort_end_date") + timings5 <- benchmarkCohortSurvival(cdm, targetSize = 1000, competingOutcomeSize = 80, outcomeSize = 90, outcomeDateVariable = "cohort_end_date") + timings6 <- benchmarkCohortSurvival(cdm, targetSize = 100, outcomeSize = 7, censorOnCohortExit = TRUE) + timings7 <- benchmarkCohortSurvival(cdm, targetSize = 100, outcomeSize = 4, censorOnDate = as.Date("1920-01-01")) + timings8 <- benchmarkCohortSurvival(cdm, targetSize = 1000, outcomeSize = 47, followUpDays = 30) + timings9 <- benchmarkCohortSurvival(cdm, targetSize = 1000, outcomeSize = 47, minCellCount = 2) + + expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings))) + expect_true(all(c("generating target cohort size 1000", "generating outcome cohort size 47") %in% timings$task)) + + expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings_p))) + expect_true(all(c("generating target cohort size 1000", "generating outcome cohort size 47") %in% timings_p$task)) + expect_true(timings_p %>% dplyr::select(with_participants) %>% dplyr::distinct() %>% dplyr::pull() == "Yes") + + expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings_s))) + expect_true(all(c("generating target cohort size 100", "generating outcome cohort size 5") %in% timings_s$task)) + + expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings2))) + expect_true(all(c("generating target cohort size 1000", "generating outcome cohort size 8") %in% timings2$task)) + + expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings3))) + expect_true(all(c("generating target cohort size 100", "generating outcome cohort size 23") %in% timings3$task)) + + expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings4))) + expect_true(all(c("generating target cohort size 100", "generating outcome cohort size 23") %in% timings4$task)) + + expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings5))) + expect_true(all(c("generating target cohort size 1000", "generating outcome cohort size 90", "generating competing outcome cohort size 80") %in% timings5$task)) + + expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings6))) + expect_true(all(c("generating target cohort size 100", "generating outcome cohort size 7") %in% timings6$task)) + + expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings7))) + expect_true(all(c("generating target cohort size 100", "generating outcome cohort size 4") %in% timings7$task)) + + expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings8))) + expect_true(all(c("generating target cohort size 1000", "generating outcome cohort size 47") %in% timings8$task)) + + expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings9))) + expect_true(all(c("generating target cohort size 1000", "generating outcome cohort size 47") %in% timings9$task)) + + CDMConnector::cdmDisconnect(cdm) +}) + +test_that("expected errors benchmark", { + cdm <- mockMGUS2cdm() + cdm$condition_occurrence <- cdm$death_cohort %>% + dplyr::rename("condition_start_date" = "cohort_start_date", + "condition_end_date" = "cohort_end_date") + expect_error(benchmarkCohortSurvival("cdm")) + expect_error(benchmarkCohortSurvival(cdm)) + expect_error(benchmarkCohortSurvival(cdm, targetSize = "size")) + expect_error(benchmarkCohortSurvival(targetSize = 30)) + expect_error(benchmarkCohortSurvival(cdm, targetSize = 100)) + expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = "3")) + expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, competingOutcomeSize = "no")) + expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, outcomeDateVariable = FALSE)) + expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, censorOnCohortExit = NULL)) + expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, censorOnDate = TRUE)) + expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, followUpDays = "Inf")) + expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, strata = c("age" = "age"))) + expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, timeGap = list(1,2))) + expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, times = list(1,2,3))) + expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, minCellCount = FALSE)) + expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, returnParticipants = "TRUE")) + + CDMConnector::cdmDisconnect(cdm) +}) diff --git a/tests/testthat/test-estimateSurvival.R b/tests/testthat/test-estimateSurvival.R new file mode 100644 index 0000000..322144c --- /dev/null +++ b/tests/testthat/test-estimateSurvival.R @@ -0,0 +1,812 @@ +test_that("mgus example: no Competing risk", { + cdm <- mockMGUS2cdm() + surv <- estimateSingleEventSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1, + timeGap = 7 + ) + expect_true(tibble::is_tibble(surv)) + expect_true(all(c( + "cdm_name","result_type", + "group_name","group_level", + "strata_name","strata_level", + "variable","variable_level", + "estimate_type", + "variable_type", + "time", + "analysis_type", + "estimate") %in% + colnames(surv))) + + expect_true(surv %>% dplyr::select(variable) %>% dplyr::pull() %>% unique() == "Outcome") +# expect_true(all(surv %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:424))) + expect_true(surv %>% dplyr::select(analysis_type) %>% dplyr::pull() %>% unique() == "Single event") + +# expect_true(attributes(surv)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %>% unique() == 7) +# expect_true(all(attributes(surv)$events %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(seq(0, 424, by = 7), 424))) + + # mgus example: Competing risk + survCR <- estimateSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1, + competingOutcomeCohortTable = "progression", + competingOutcomeCohortId = 1 + ) + + expect_true(all(colnames(surv) == colnames(survCR))) + + expect_true(tibble::is_tibble(survCR)) + +# expect_true(all(colnames(attr(survCR, "events")) == +# colnames(attr(surv, "events")))) + + # note, we don't return summary for Competing risk + +# expect_true(all(survCR %>% +# dplyr::select(outcome) %>% +# dplyr::pull() %>% +# unique() %in% +# c("outcome", "competing outcome"))) + +# expect_true(all(survCR %>% +# dplyr::select(time) %>% +# dplyr::pull() %>% +# unique() == c(0:424))) + + expect_true(survCR %>% + dplyr::select(analysis_type) %>% + dplyr::pull() %>% + unique() == "Competing risk") + +# expect_true(all(attributes(survCR)$events %>% +# dplyr::select(timeGap) %>% +# dplyr::pull() %>% +# unique() %in% c(1, 7, 30, 365))) + +# expect_true(all(attributes(survCR)$events %>% +# dplyr::select(time) %>% +# dplyr::pull() %>% unique() == c(0:424))) + + expect_true(nrow(survCR %>% + dplyr::filter(.data$outcome == "outcome") %>% + dplyr::collect())>=1) + expect_true(nrow(survCR %>% + dplyr::filter(.data$outcome == "competing outcome") %>% + dplyr::collect())>=1) + + expect_true(all(c("death_cohort", "progression") %in% + (survCR %>% + dplyr::pull("variable_level") %>% + unique()))) + + + CDMConnector::cdmDisconnect(cdm) +}) + +test_that("mgus example: no Competing risk, strata", { + cdm <- mockMGUS2cdm() + cdm[["mgus_diagnosis"]] <- cdm[["mgus_diagnosis"]] %>% + dplyr::mutate(mspike_r = round(mspike, digits = 0)) + surv <- estimateSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1, + timeGap = c(1, 10, 100), + strata = list( + "age_gr" = c("age"), + "sex" = c("sex"), + "age and sex" = c("age", "sex"), + "mspike rounded" = c("mspike_r") + ) + ) + expect_true(tibble::is_tibble(surv)) + + expect_true(surv %>% dplyr::select(outcome) %>% dplyr::pull() %>% unique() == "outcome") +# expect_true(all(surv %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:424))) + expect_true(surv %>% dplyr::select(analysis_type) %>% dplyr::pull() %>% unique() == "Single event") + expect_true(all(surv %>% dplyr::select(strata_name) %>% dplyr::pull() %>% unique() %in% + c("Overall", "sex", "age", "mspike_r", "age and sex"))) + expect_true(all(surv %>% dplyr::select(strata_level) %>% dplyr::pull() %>% unique() %in% c( + "M", "F", 0, 1, 2, 3, c(24:96), "Overall", + paste(expand.grid(c(24:96), c("M", "F"))$Var1, expand.grid(c(24:96), c("M", "F"))$Var2, sep = " and ") + ))) + +# expect_true(all(attributes(surv)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %>% unique() %in% c(1, 10, 100))) +# expect_true(all(attributes(surv)$events %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:424))) +# expect_true(all(attributes(surv)$events %>% dplyr::select(strata_name) %>% dplyr::pull() %>% unique() %in% +# c("Overall", "sex", "age", "mspike_r", "age and sex"))) +# expect_true(all(attributes(surv)$events %>% dplyr::select(strata_level) %>% dplyr::pull() %>% unique() %in% c( +# "M", "F", 0, 1, 2, 3, c(24:96), "Overall", +# paste(expand.grid(c(24:96), c("M", "F"))$Var1, expand.grid(c(24:96), c("M", "F"))$Var2, sep = " and ") +# ))) + + CDMConnector::cdmDisconnect(cdm) +}) + +test_that("mgus example: Competing risk, strata", { + cdm <- mockMGUS2cdm() + cdm[["mgus_diagnosis"]] <- cdm[["mgus_diagnosis"]] %>% + dplyr::mutate(mspike_r = round(mspike, digits = 0)) + survCR <- estimateSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1, + competingOutcomeCohortTable = "progression", + competingOutcomeCohortId = 1, + strata = list( + "age" = c("age"), + "sex" = c("sex"), + "age and sex" = c("age", "sex"), + "mspike rounded" = c("mspike_r") + ) + ) + + # check strata_level (s0) thingy + + expect_true(tibble::is_tibble(survCR)) +# expect_true(all(survCR %>% dplyr::select(outcome) %>% dplyr::pull() %>% unique() %in% c("outcome", "competing outcome"))) +# expect_true(all(survCR %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:424))) + expect_true(survCR %>% dplyr::select(analysis_type) %>% dplyr::pull() %>% unique() == "Competing risk") + expect_true(all(survCR %>% dplyr::select(strata_name) %>% dplyr::pull() %>% unique() %in% c("Overall", "sex", "age", "mspike_r", "age and sex"))) +# expect_true(all(survCR %>% dplyr::select(strata_level) %>% dplyr::pull() %>% unique() %in% c( +# "M", "F", 0, 1, 2, 3, c(24:96), "Overall", +# paste(expand.grid(c(24:96), c("M", "F"))$Var1, expand.grid(c(24:96), c("M", "F"))$Var2, sep = " and ") +# ))) + +# expect_true(all(attributes(survCR)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %>% unique() %in% c(1, 7, 30, 365))) +# expect_true(all(attributes(survCR)$events %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:424))) +# expect_true(all(attributes(survCR)$events %>% dplyr::select(strata_name) %>% dplyr::pull() %>% unique() %in% c("Overall", "sex", "age", "mspike_r", "age and sex"))) +# expect_true(all(attributes(survCR)$events %>% dplyr::select(strata_level) %>% dplyr::pull() %>% unique() %in% c( +# "M", "F", 0, 1, 2, 3, c(24:96), "Overall", +# paste(expand.grid(c(24:96), c("M", "F"))$Var1, expand.grid(c(24:96), c("M", "F"))$Var2, sep = " and ") +# ))) + + CDMConnector::cdmDisconnect(cdm) +}) + +test_that("funcionality with created dataset", { + compareNA <- function(v1, v2) { + same <- (v1 == v2) | (is.na(v1) & is.na(v2)) + same[is.na(same)] <- FALSE + return(same) + } + + exposure_cohort <- dplyr::tibble( + subject_id = c(1, 2, 3), + cohort_definition_id = c(1, 1, 1), + cohort_start_date = c( + as.Date("2020-01-01"), + as.Date("2020-02-03"), + as.Date("2020-05-01") + ), + cohort_end_date = c( + as.Date("2020-01-31"), + as.Date("2022-02-03"), + as.Date("2021-06-28") + ), + age_group = c("20;29", "20;29", "60;69"), + sex = c("Female", "Male", "Female"), + blood_type = c("A", "B", "B") + ) + outcome_cohort <- dplyr::tibble( + cohort_definition_id = c(1, 1, 1, 1, 1), + subject_id = c(1, 1, 2, 3, 3), + cohort_start_date = c( + as.Date("2020-01-10"), + as.Date("2020-02-02"), + as.Date("2020-02-09"), + as.Date("2020-06-01"), + as.Date("2020-06-03") + ), + cohort_end_date = c( + as.Date("2020-01-10"), + as.Date("2020-02-02"), + as.Date("2020-02-09"), + as.Date("2020-06-01"), + as.Date("2020-06-03") + ) + ) + observation_period <- dplyr::tibble( + observation_period_id = c(1, 1, 1), + person_id = c(1, 2, 3), + observation_period_start_date = c( + as.Date("2007-03-21"), + as.Date("2006-09-09"), + as.Date("1980-07-20") + ), + observation_period_end_date = c( + as.Date("2022-09-08"), + as.Date("2023-01-03"), + as.Date("2023-05-20") + ) + ) + + cdm <- PatientProfiles::mockPatientProfiles( + exposure_cohort = exposure_cohort, + cohort1 = outcome_cohort, + observation_period = observation_period + ) + + # No competing events + surv <- estimateSurvival(cdm, + targetCohortTable = "exposure_cohort", + outcomeCohortTable = "cohort1", + minCellCount = 1 + ) + +# expect_true(all(surv %>% dplyr::select(time) %>% dplyr::pull() %>% +# unique() == c(0:31))) + expect_true(all(surv %>% + dplyr::filter(variable_type == "n_risk") %>% + dplyr::select(estimate) %>% + dplyr::pull() == + c(rep(3, 7), rep(2, 3), rep(1, 22)))) + expect_true(all(surv %>% + dplyr::filter(variable_type == "estimate") %>% + dplyr::filter(estimate_type == "Survival probability") %>% + dplyr::select(estimate) %>% + dplyr::pull() - c(rep(1, 7), rep(0.667, 3), rep(0.333, 21), 0) < c(0.01))) + expect_true(all(surv %>% + dplyr::filter(variable_type == "estimate") %>% + dplyr::filter(estimate_type == "Cumulative failure probability") %>% + dplyr::select(estimate) %>% dplyr::pull() - c(rep(0, 6), rep(0.333, 3), rep(0.667, 22), 1) < c(0.01))) + expect_true(all(surv %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event")) + +# expect_true(all(attributes(surv)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %in% c(1, 7, 30, 365))) +# expect_true(all(attributes(surv)$events %>% dplyr::group_by(timeGap) %>% dplyr::summarise(n = sum(n_events, na.rm = T)) %>% dplyr::select(n) %>% +# dplyr::pull() == c(3, 3, 3, 3))) + + CDMConnector::cdmDisconnect(cdm) + + # Compting events + outcome_cohort <- dplyr::tibble( + cohort_definition_id = c(1, 1, 1), + subject_id = c(2, 3, 3), + cohort_start_date = c( + as.Date("2020-02-09"), + as.Date("2020-06-01"), + as.Date("2020-06-03") + ), + cohort_end_date = c( + as.Date("2020-02-09"), + as.Date("2020-06-01"), + as.Date("2020-06-03") + ) + ) + competing_risk_cohort <- dplyr::tibble( + cohort_definition_id = c(1, 1, 2), + subject_id = c(2, 3, 1), + cohort_start_date = c( + as.Date("2020-02-07"), + as.Date("2021-02-02"), + as.Date("2020-01-03") + ), + cohort_end_date = c( + as.Date("2020-02-07"), + as.Date("2021-02-02"), + as.Date("2020-01-03") + ) + ) + cdm <- PatientProfiles::mockPatientProfiles( + exposure_cohort = exposure_cohort, + cohort1 = outcome_cohort, + cohort2 = competing_risk_cohort, + observation_period = observation_period + ) + surv2 <- estimateSurvival(cdm, + targetCohortTable = "exposure_cohort", + outcomeCohortTable = "cohort1", + competingOutcomeCohortTable = "cohort2", + competingOutcomeCohortId = 1, + minCellCount = 1 + ) + +# expect_true(all(surv2 %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(rep(c(0:981), 2)))) +# expect_true(all(surv2 %>% +# dplyr::filter(outcome == "outcome") %>% +# dplyr::filter(estimate_type == "Cumulative failure probability") %>% +# dplyr::filter(variable_type == "n_risk") %>% +# dplyr::select(estimate) %>% +# dplyr::pull() == +# c(rep(3, 5), rep(2, 27), rep(1, 1155)))) + expect_true(nrow(surv2 %>% + dplyr::filter(estimate_type == "Survival probability") %>% + dplyr::select(estimate)) == 0) +# expect_true(all(surv2 %>% dplyr::filter(variable_type == "n_risk") %>% dplyr::pull() == +# c(rep(3, 5), rep(2, 27), rep(1, 1155)))) + + expect_true(all(surv2 %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Competing risk")) +# expect_true(all(surv2 %>% dplyr::select(outcome) %>% dplyr::pull() == c(rep("outcome", 4748), rep("competing outcome", 4748)))) +# expect_true(all(attributes(surv2)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 1 & outcome == "outcome") %>% dplyr::select(time) == c(31))) +# expect_true(all(attributes(surv2)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 1 & outcome == "Competing risk") %>% dplyr::select(time) == c(4))) +# expect_true(all(attributes(surv2)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 7 & outcome == "outcome") %>% dplyr::select(time) == c(35))) +# expect_true(all(attributes(surv2)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 7 & outcome == "Competing risk") %>% dplyr::select(time) == c(7))) +# expect_true(all(attributes(surv2)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 30 & outcome == "outcome") %>% dplyr::select(time) == c(60))) +# expect_true(all(attributes(surv2)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 30 & outcome == "Competing risk") %>% dplyr::select(time) == c(30))) +# expect_true(all(attributes(surv2)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 365 & outcome == "outcome") %>% dplyr::select(time) == c(365))) +# expect_true(all(attributes(surv2)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 365 & outcome == "Competing risk") %>% dplyr::select(time) == c(365))) + +# expect_true(all(attributes(surv2)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %in% c(1, 7, 30, 365))) +# expect_true(all(attributes(surv2)$events %>% dplyr::group_by(timeGap) %>% dplyr::summarise(n = sum(n_events, na.rm = T)) %>% dplyr::select(n) %>% dplyr::pull() == c(2, 2, 2, 2))) + + CDMConnector::cdmDisconnect(cdm) + + # Censor at cohort end + outcome_cohort <- dplyr::tibble( + cohort_definition_id = c(1, 1, 1, 1, 1), + subject_id = c(1, 1, 2, 3, 3), + cohort_start_date = c( + as.Date("2020-02-10"), + as.Date("2020-02-02"), + as.Date("2020-02-09"), + as.Date("2020-06-01"), + as.Date("2020-06-03") + ), + cohort_end_date = c( + as.Date("2020-02-10"), + as.Date("2020-02-02"), + as.Date("2020-02-09"), + as.Date("2020-06-01"), + as.Date("2020-06-03") + ) + ) + + cdm <- PatientProfiles::mockPatientProfiles( + exposure_cohort = exposure_cohort, + cohort1 = outcome_cohort, + observation_period = observation_period + ) + + surv3 <- estimateSurvival(cdm, "exposure_cohort", + outcomeCohortTable = "cohort1", + censorOnCohortExit = TRUE, + minCellCount = 1 + ) + +# expect_true(all(surv3 %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:31))) + # expect_true(all(surv3 %>% dplyr::select(n_risk) %>% dplyr::pull() == c(rep(3, 7), rep(2, 24), 1))) + # expect_true(all(surv3 %>% + # dplyr::filter(estimate_name == "Survival probability") %>% + # dplyr::select(estimate) %>% dplyr::pull() - c(rep(1, 7), rep(0.667, 24), 0) < c(0.01))) + # expect_true(all(surv3 %>% + # dplyr::filter(estimate_name == "Cumulative failure probability") %>% + # dplyr::select(estimate) %>% dplyr::pull() - c(rep(0, 6), rep(0.333, 25), 1) < c(0.01))) + # expect_true(all(surv3 %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event")) + # + # expect_true(all(attributes(surv3)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == c(6, 31))) + # expect_true(all(attributes(surv3)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %in% c(1, 7, 30, 365))) + # expect_true(all(attributes(surv3)$events %>% dplyr::group_by(timeGap) %>% dplyr::summarise(n = sum(n_events, na.rm = T)) %>% + # dplyr::select(n) %>% dplyr::pull() == c(2, 2, 2, 2))) + + CDMConnector::cdmDisconnect(cdm) + + cdm <- PatientProfiles::mockPatientProfiles( + exposure_cohort = exposure_cohort, + cohort1 = outcome_cohort, + observation_period = observation_period + ) + + # Censor by follow up days + surv4 <- estimateSurvival(cdm, "exposure_cohort", + outcomeCohortTable = "cohort1", + followUpDays = 10, + minCellCount = 1 + ) + +# expect_true(all(surv4 %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:10))) + # expect_true(all(surv4 %>% dplyr::select(n_risk) %>% dplyr::pull() == c(rep(3, 7), rep(2, 4)))) + # expect_true(all(surv4 %>% + # dplyr::filter(estimate_name == "Survival probability") %>% + # dplyr::select(estimate) %>% dplyr::pull() - c(rep(1, 7), rep(0.667, 4)) < c(0.01))) + # expect_true(all(surv4 %>% + # dplyr::filter(estimate_name == "Cumulative failure probability") %>% + # dplyr::select(estimate) %>% dplyr::pull() - c(rep(0, 6), rep(0.333, 5)) < c(0.01))) + # expect_true(all(surv4 %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event")) + # + # expect_true(all(attributes(surv4)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == c(6))) + # expect_true(all(attributes(surv4)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %in% c(1, 7, 30, 365))) + # expect_true(all(attributes(surv4)$events %>% dplyr::group_by(timeGap) %>% dplyr::summarise(n = sum(n_events, na.rm = T)) %>% dplyr::select(n) %>% dplyr::pull() == c(1, 1, 1, 1))) + + CDMConnector::cdmDisconnect(cdm) + + outcome_cohort <- dplyr::tibble( + cohort_definition_id = c(1, 1, 1, 1, 1), + subject_id = c(1, 1, 2, 3, 3), + cohort_start_date = c( + as.Date("2020-01-10"), + as.Date("2020-02-02"), + as.Date("2020-02-09"), + as.Date("2020-06-01"), + as.Date("2020-06-03") + ), + cohort_end_date = c( + as.Date("2020-01-10"), + as.Date("2020-02-02"), + as.Date("2020-02-09"), + as.Date("2020-06-01"), + as.Date("2020-06-03") + ) + ) + + cdm <- PatientProfiles::mockPatientProfiles( + exposure_cohort = exposure_cohort, + cohort1 = outcome_cohort, + observation_period = observation_period + ) + + # Strata + surv5 <- estimateSurvival(cdm, "exposure_cohort", + outcomeCohortTable = "cohort1", + strata = list( + "Age group" = c("age_group"), + "Sex" = c("sex"), + "Age group and sex" = c("age_group", "sex"), + "Blood type" = c("blood_type") + ), + minCellCount = 1 + ) + +# expect_true(all(surv5 %>% dplyr::filter(strata_name == "Overall") %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:31))) + # expect_true(all(surv5 %>% dplyr::filter(strata_name == "Overall") %>% dplyr::select(n_risk) %>% dplyr::pull() == c(rep(3, 7), rep(2, 3), rep(1, 22)))) + # expect_true(all(surv5 %>% dplyr::filter(strata_name == "Overall") %>% + # dplyr::filter(estimate_name == "Survival probability") %>% + # dplyr::select(estimate) %>% dplyr::pull() - c(rep(1, 7), rep(0.667, 3), rep(0.333, 21), 0) < c(0.01))) + # expect_true(all(surv5 %>% dplyr::filter(strata_name == "Overall") %>% + # dplyr::filter(estimate_name == "Cumulative failure probability") %>% + # dplyr::select(estimate) %>% dplyr::pull() - c(rep(0, 6), rep(0.333, 3), rep(0.667, 22), 1) < c(0.01))) + # expect_true(all(surv5 %>% dplyr::filter(strata_name == "Overall") %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event")) + # + # expect_true(all(attributes(surv5)$events %>% dplyr::filter(strata_name == "Overall") %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == c(6, 9, 31))) + # + # expect_true(all(surv5 %>% dplyr::filter(strata_name == "age_group; sex" & strata_level == "20;29; Female") %>% dplyr::select(time) %>% dplyr::pull() == c(0:31))) + # expect_true(all(compareNA(surv5 %>% dplyr::filter(strata_name == "age_group; sex" & strata_level == "20;29; Female") %>% dplyr::select(n_risk) %>% dplyr::pull(), c(rep(c(rep(1, 10), rep(NA, 22)),2))))) + # expect_true(all(surv5 %>% dplyr::filter(strata_name == "age_group; sex" & strata_level == "20;29; Female") %>% + # dplyr::filter(estimate_name == "Survival probability") %>% + # dplyr::select(estimate) %>% dplyr::pull() - c(rep(1, 10), rep(0, 22)) < c(0.01))) + # expect_true(all(surv5 %>% dplyr::filter(strata_name == "age_group; sex" & strata_level == "20;29; Female") %>% + # dplyr::filter(estimate_name == "Cumulative failure probability") %>% + # dplyr::select(estimate) %>% dplyr::pull() - c(rep(0, 9), rep(1, 23)) < c(0.01))) + # expect_true(all(surv5 %>% dplyr::filter(strata_name == "age_group; sex" & strata_level == "20;29; Female") %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event")) + # + # expect_true(all(attributes(surv5)$events %>% dplyr::filter(strata_name == "age_group; sex" & strata_level == "20;29; Female") %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == c(9))) + # + # expect_true(all(surv5 %>% dplyr::filter(strata_name == "blood_type" & strata_level == "B") %>% dplyr::select(time) %>% dplyr::pull() == c(0:31))) + # expect_true(all(surv5 %>% dplyr::filter(strata_name == "blood_type" & strata_level == "B") %>% dplyr::select(n_risk) %>% dplyr::pull() == c(rep(2, 7), rep(1, 25)))) + # expect_true(all(surv5 %>% dplyr::filter(strata_name == "blood_type" & strata_level == "B") %>% + # dplyr::filter(estimate_name == "Survival probability") %>% + # dplyr::select(estimate) %>% dplyr::pull() - c(rep(2, 7), rep(0.667, 24), 0) < c(0.01))) + # expect_true(all(surv5 %>% dplyr::filter(strata_name == "blood_type" & strata_level == "B") %>% + # dplyr::filter(estimate_name == "Cumulative failure probability") %>% + # dplyr::select(estimate) %>% dplyr::pull() - c(rep(0, 6), rep(0.5, 25), 1) < c(0.01))) + # expect_true(all(surv5 %>% dplyr::filter(strata_name == "blood_type" & strata_level == "B") %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event")) + # + # expect_true(all(attributes(surv5)$events %>% dplyr::filter(strata_name == "blood_type" & strata_level == "B") %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == c(6, 31))) + + CDMConnector::cdmDisconnect(cdm) + + outcome_cohort <- dplyr::tibble( + cohort_definition_id = c(1, 1, 1, 1), + subject_id = c(1, 2, 3, 3), + cohort_start_date = c( + as.Date("2019-01-10"), + as.Date("2020-02-09"), + as.Date("2020-06-01"), + as.Date("2020-06-03") + ), + cohort_end_date = c( + as.Date("2019-01-10"), + as.Date("2020-02-09"), + as.Date("2020-06-01"), + as.Date("2020-06-03") + ) + ) + + cdm <- PatientProfiles::mockPatientProfiles( + exposure_cohort = exposure_cohort, + cohort1 = outcome_cohort, + observation_period = observation_period + ) + + # Washout for outcome + surv6 <- estimateSurvival(cdm, + targetCohortTable = "exposure_cohort", + outcomeCohortTable = "cohort1", + minCellCount = 1 + ) + +# expect_true(all(surv6 %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:31))) + # expect_true(all(surv6 %>% dplyr::select(n_risk) %>% dplyr::pull() == c(rep(2, 7), rep(1, 25)))) + # expect_true(all(surv6 %>% + # dplyr::filter(estimate_name == "Survival probability") %>% + # dplyr::select(estimate) %>% dplyr::pull() - c(rep(2, 7), rep(0.667, 24), 0) < c(0.01))) + # expect_true(all(surv6 %>% + # dplyr::filter(estimate_name == "Cumulative failure probability") %>% + # dplyr::select(estimate) %>% dplyr::pull() - c(rep(0, 6), rep(0.5, 25), 1) < c(0.01))) + # expect_true(all(surv6 %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event")) + # + # expect_true(all(attributes(surv6)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == c(6, 31))) + # expect_true(all(attributes(surv6)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %in% c(1, 7, 30, 365))) + # expect_true(all(attributes(surv6)$events %>% dplyr::group_by(timeGap) %>% dplyr::summarise(n = sum(n_events, na.rm = T)) %>% dplyr::select(n) %>% dplyr::pull() == c(2, 2, 2, 2))) + + CDMConnector::cdmDisconnect(cdm) + + outcome_cohort <- dplyr::tibble( + cohort_definition_id = c(1, 1, 1, 1), + subject_id = c(1, 2, 3, 3), + cohort_start_date = c( + as.Date("2020-01-10"), + as.Date("2020-02-09"), + as.Date("2020-06-01"), + as.Date("2020-06-03") + ), + cohort_end_date = c( + as.Date("2020-01-10"), + as.Date("2020-02-09"), + as.Date("2020-06-01"), + as.Date("2020-06-03") + ) + ) + + cdm <- PatientProfiles::mockPatientProfiles( + exposure_cohort = exposure_cohort, + cohort1 = outcome_cohort, + observation_period = observation_period + ) + + # Censor on date + surv7 <- estimateSurvival(cdm, + targetCohortTable = "exposure_cohort", + outcomeCohortTable = "cohort1", + minCellCount = 1, + censorOnDate = as.Date("2020-02-01") + ) + +# expect_true(all(surv7 %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:9))) + # expect_true(all(surv7 %>% dplyr::select(n_risk) %>% dplyr::pull() == c(rep(1, 10)))) + # expect_true(all(surv7 %>% + # dplyr::filter(estimate_name == "Survival probability") %>% + # dplyr::select(estimate) %>% dplyr::pull() - c(rep(1, 9), 0) < c(0.01))) + # expect_true(all(surv7 %>% + # dplyr::filter(estimate_name == "Cumulative failure probability") %>% + # dplyr::select(estimate)%>% dplyr::pull() - c(rep(0, 9), 1) < c(0.01))) + # expect_true(all(surv7 %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event")) + # + # expect_true(all(attributes(surv7)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == c(9))) + # expect_true(all(attributes(surv7)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %in% c(1, 7, 30, 365))) + # expect_true(all(attributes(surv7)$events %>% dplyr::group_by(timeGap) %>% dplyr::summarise(n = sum(n_events, na.rm = T)) %>% dplyr::select(n) %>% dplyr::pull() == c(1, 1, 1, 1))) + + CDMConnector::cdmDisconnect(cdm) + + # try with selected time points + cdm <- PatientProfiles::mockPatientProfiles( + exposure_cohort = exposure_cohort, + cohort1 = outcome_cohort, + observation_period = observation_period + ) + surv8 <- estimateSurvival(cdm, + targetCohortTable = "exposure_cohort", + outcomeCohortTable = "cohort1", + minCellCount = 1, + times = seq(1,1000, by = 100) + ) + +# expect_true(all(surv8 %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == rep(seq(1,901,by = 100),2))) + # expect_true(all(compareNA(surv8 %>% dplyr::select(n_risk) %>% dplyr::pull(), rep(c(3, rep(NA, 9)),2)))) + # expect_true(all(surv8 %>% + # dplyr::filter(estimate_name == "Survival probability") %>% + # dplyr::select(estimate) %>% + # dplyr::pull() == c(1,0,0,0,0,0,0,0,0,0))) + # expect_true(all(surv8 %>% + # dplyr::filter(estimate_name == "Cumulative failure probability") %>% + # dplyr::select(estimate) %>% + # dplyr::pull() == c(0,1,1,1,1,1,1,1,1,1))) + # expect_true(all(surv8 %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event")) + # + # expect_true(all(attributes(surv8)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == 101)) + # expect_true(all(attributes(surv8)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %in% c(1, 7, 30, 365))) + # expect_true(all(attributes(surv8)$events %>% dplyr::group_by(timeGap) %>% dplyr::summarise(n = sum(n_events, na.rm = T)) %>% dplyr::select(n) %>% + # dplyr::pull() == c(3, 3, 3, 3))) + + CDMConnector::cdmDisconnect(cdm) + +}) + +test_that("different exposure cohort ids", { + cohort <- dplyr::tibble( + cohort_definition_id = c(1, 1, 2), + subject_id = c(1, 2, 3), + cohort_start_date = c( + as.Date("2020-01-01"), + as.Date("2020-01-02"), + as.Date("2020-01-01") + ), + cohort_end_date = c( + as.Date("2020-01-11"), + as.Date("2020-01-12"), + as.Date("2020-01-11") + ) + ) + outcome_cohort <- dplyr::tibble( + cohort_definition_id = c(1, 1, 1), + subject_id = c(1, 2, 3), + cohort_start_date = c( + as.Date("2020-01-10"), + as.Date("2020-01-03"), + as.Date("2020-01-09") + ), + cohort_end_date = c( + as.Date("2020-01-10"), + as.Date("2020-01-03"), + as.Date("2020-01-09") + ) + ) + observation_period <- dplyr::tibble( + observation_period_id = c(1, 1, 1), + person_id = c(1, 2, 3), + observation_period_start_date = c( + as.Date("2000-01-01"), + as.Date("2000-01-02"), + as.Date("2000-01-01") + ), + observation_period_end_date = c( + as.Date("2023-04-01"), + as.Date("2023-05-02"), + as.Date("2023-03-01") + ) + ) + cdm <- PatientProfiles::mockPatientProfiles( + cohort1 = cohort, + cohort2 = outcome_cohort, + observation_period = observation_period + ) + surv8 <- + estimateSurvival( + cdm = cdm, + targetCohortTable = "cohort1", + targetCohortId = 1, + outcomeCohortTable = "cohort2", + outcomeCohortId = 1, + minCellCount = 1 + ) +# expect_true(all(surv8 %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:9))) + # expect_true(all(surv8 %>% dplyr::select(n_risk) %>% dplyr::pull() == c(rep(2, 2), rep(1, 8)))) + # expect_true(all(surv8 %>% + # dplyr::filter(estimate_name == "Survival probability") %>% + # dplyr::select(estimate) %>% dplyr::pull() - c(1, rep(0.5, 8), 0) < c(0.01))) + # expect_true(all(surv8 %>% + # dplyr::filter(estimate_name == "Cumulative failure probability") %>% + # dplyr::select(estimate) %>% dplyr::pull() - c(0, rep(0.5, 8), 1) < c(0.01))) + # expect_true(all(surv8 %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event")) + # + # expect_true(all(attributes(surv8)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == c(1, 9))) + # expect_true(all(attributes(surv8)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %in% c(1, 7, 30, 365))) + # expect_true(all(attributes(surv8)$events %>% dplyr::group_by(timeGap) %>% dplyr::summarise(n = sum(n_events, na.rm = T)) %>% dplyr::select(n) %>% dplyr::pull() == c(2, 2, 2, 2))) + + surv9 <- + estimateSurvival( + cdm = cdm, + targetCohortTable = "cohort1", + targetCohortId = 2, + outcomeCohortTable = "cohort2", + outcomeCohortId = 1, + minCellCount = 1 + ) + # expect_true(all(surv9 %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:8))) + # expect_true(all(surv9 %>% dplyr::select(n_risk) %>% dplyr::pull() == c(rep(1, 9)))) + # expect_true(all(surv9 %>% + # dplyr::filter(estimate_name == "Survival probability") %>% + # dplyr::select(estimate) %>% dplyr::pull() - c(rep(1, 8), 0) < c(0.01))) + # expect_true(all(surv9 %>% + # dplyr::filter(estimate_name == "Cumulative failure probability") %>% + # dplyr::select(estimate) %>% dplyr::pull() - c(rep(0, 8), 1) < c(0.01))) + # expect_true(all(surv9 %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event")) + # + # expect_true(all(attributes(surv9)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == c(8))) + # expect_true(all(attributes(surv9)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %in% c(1, 7, 30, 365))) + # expect_true(all(attributes(surv9)$events %>% dplyr::group_by(timeGap) %>% dplyr::summarise(n = sum(n_events, na.rm = T)) %>% dplyr::select(n) %>% dplyr::pull() == c(1, 1, 1, 1))) + + CDMConnector::cdm_disconnect(cdm) + }) + +test_that("expected errors", { + cdm <- mockMGUS2cdm() + + expect_error(estimateSurvival("cdm", targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression")) + expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosiss", outcomeCohortTable = "progression")) + expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "outcome")) + expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = c(1, 3))) + expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = 1, timeGap = -3)) + expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = 1, timeGap = "time")) + expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = 1, timeGap = NULL)) + expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = 1, strata = "age")) + expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = 1, strata = list("name" = "noname"))) + expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = 1, censorOnDate = "2020-09-02")) + + CDMConnector::cdmDisconnect(cdm) +}) + +test_that("return participants", { + cdm <- mockMGUS2cdm() + surv1 <- estimateSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + outcomeCohortTable = "death_cohort", + returnParticipants = FALSE + ) + expect_true(is.null(attr(surv1, "participants"))) + surv2 <- estimateSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1, + timeGap = 7, returnParticipants = TRUE + ) + expect_false(is.null(attr(surv2, "participants"))) + + expect_equal( + colnames(survivalParticipants(surv2) %>% + head(1) %>% + dplyr::collect()), + c( + "cohort_definition_id", + "subject_id", + "cohort_start_date", + "cohort_end_date" + ) + ) + + CDMConnector::cdmDisconnect(cdm) +}) + +test_that("within cohort survival", { + cohort <- dplyr::tibble( + cohort_definition_id = c(1, 1, 1), + subject_id = c(1, 2, 3), + cohort_start_date = c( + as.Date("2020-01-01"), + as.Date("2020-01-02"), + as.Date("2020-01-01") + ), + cohort_end_date = c( + as.Date("2020-04-01"), + as.Date("2020-08-02"), + as.Date("2021-03-01") + ) + ) + observation_period <- dplyr::tibble( + observation_period_id = c(1, 1, 1), + person_id = c(1, 2, 3), + observation_period_start_date = c( + as.Date("2000-01-01"), + as.Date("2000-01-02"), + as.Date("2000-01-01") + ), + observation_period_end_date = c( + as.Date("2023-04-01"), + as.Date("2023-05-02"), + as.Date("2023-03-01") + ) + ) + cdm <- PatientProfiles::mockPatientProfiles( + cohort1 = cohort, + observation_period = observation_period + ) + + surv <- estimateSurvival(cdm, + targetCohortTable = "cohort1", + targetCohortId = 1, + outcomeCohortTable = "cohort1", + outcomeCohortId = 1, + outcomeDateVariable = "cohort_end_date", + timeGap = 7, + minCellCount = 0 + ) +# expect_true(sum(attr(surv, "events")$n_events) == 3) + + CDMConnector::cdmDisconnect(cdm) +}) diff --git a/tests/testthat/test-generateDeathCohort.R b/tests/testthat/test-generateDeathCohort.R new file mode 100644 index 0000000..f492dc6 --- /dev/null +++ b/tests/testthat/test-generateDeathCohort.R @@ -0,0 +1,205 @@ +test_that("basic example", { + + cdm <- PatientProfiles::mockPatientProfiles() + deathTable <- dplyr::tibble( + person_id = c(1,2,3), + death_date = c(as.Date("2020-01-01"), + as.Date("2020-01-02"), + as.Date("2020-01-01"))) + DBI::dbWithTransaction(attr(cdm, "dbcon"), { + DBI::dbWriteTable(attr(cdm, "dbcon"), "death", + deathTable, overwrite = TRUE) + }) + cdm$death <- dplyr::tbl(attr(cdm, "dbcon"), "death") + + + cdm <- generateDeathCohortSet(cdm=cdm) + + expect_true(all(c("cohort_definition_id", "subject_id", + "cohort_start_date", "cohort_end_date") %in% + colnames(cdm$death_cohort))) + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) +}) + +test_that("first death record per person", { + # check that in the case of multiple death records per person + # only the first will be used + cdm <- PatientProfiles::mockPatientProfiles() + deathTable <- dplyr::tibble( + person_id = c(1,2,2), + death_date = c(as.Date("2020-01-01"), + as.Date("2020-01-02"), + as.Date("2020-01-31"))) + DBI::dbWithTransaction(attr(cdm, "dbcon"), { + DBI::dbWriteTable(attr(cdm, "dbcon"), "death", + deathTable, overwrite = TRUE) + }) + cdm$death <- dplyr::tbl(attr(cdm, "dbcon"), "death") + + cdm <- generateDeathCohortSet(cdm=cdm) + + expect_true(nrow(cdm$death_cohort %>% + dplyr::filter(subject_id == "2") %>% + dplyr::collect()) == 1) + + + expect_true(cdm$death_cohort %>% + dplyr::filter(subject_id == "2") %>% + dplyr::select(cohort_start_date) %>% + dplyr::pull() == as.Date("2020-01-02")) + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) +}) + +test_that("test death in observation criteria", { + + observation_period <- tibble::tibble( + observation_period_id = c(1, 2), + person_id = c(1,2), + observation_period_start_date = c( + as.Date("2000-01-01"), + as.Date("2010-01-01") + ), + observation_period_end_date = c( + as.Date("2005-01-01"), + as.Date("2021-01-01") + ) + ) + cdm <- PatientProfiles::mockPatientProfiles(observation_period = observation_period) + + deathTable <- dplyr::tibble( + person_id = c(1,2), + death_date = c(as.Date("2020-01-01"), + as.Date("2020-01-02"))) + DBI::dbWithTransaction(attr(cdm, "dbcon"), { + DBI::dbWriteTable(attr(cdm, "dbcon"), "death", + deathTable, overwrite = TRUE) + }) + cdm$death <- dplyr::tbl(attr(cdm, "dbcon"), "death") + + cdm <- generateDeathCohortSet(cdm=cdm, deathInObservation = TRUE) + + expect_true(nrow(cdm$death_cohort %>% dplyr::collect()) == 1) + + expect_true(cdm$death_cohort %>% + dplyr::select(subject_id) %>% + dplyr::pull() == 2) + + expect_true(cdm$death_cohort %>% + dplyr::select(cohort_start_date) %>% + dplyr::pull() == as.Date("2020-01-02")) + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) +}) + +test_that("test different cohort table name", { + + cdm <- PatientProfiles::mockPatientProfiles() + + deathTable <- dplyr::tibble( + person_id = c(1,2,3), + death_date = c(as.Date("2020-01-01"), + as.Date("2020-01-02"), + as.Date("2020-01-01"))) + DBI::dbWithTransaction(attr(cdm, "dbcon"), { + DBI::dbWriteTable(attr(cdm, "dbcon"), "death", + deathTable, overwrite = TRUE) + }) + cdm$death <- dplyr::tbl(attr(cdm, "dbcon"), "death") + + cdm <- generateDeathCohortSet(cdm=cdm, name = "my_cohort_death") + expect_error(CDMConnector::assertTables(cdm, tables=c("death_cohort"))) + + expect_no_error(CDMConnector::assertTables(cdm, tables=c("my_cohort_death"))) + + expect_true(all(c("cohort_definition_id", "subject_id", + "cohort_start_date", "cohort_end_date") %in% + colnames(cdm$my_cohort_death))) + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) +}) + +test_that("test subsetting death table by a cohort table", { + + cohort1 <- tibble::tibble( + cohort_definition_id = c(1,1,2), + subject_id = c(1,2,3), + cohort_start_date = as.Date(c("2012-02-01")), + cohort_end_date = as.Date(c("2013-02-01")) + ) + + cdm <- PatientProfiles::mockPatientProfiles(cohort1 = cohort1) + + deathTable <- dplyr::tibble( + person_id = seq(1,5), + death_date = c(as.Date("2020-01-01"))) + + DBI::dbWithTransaction(attr(cdm, "dbcon"), { + DBI::dbWriteTable(attr(cdm, "dbcon"), "death", + deathTable, overwrite = TRUE) + }) + cdm$death <- dplyr::tbl(attr(cdm, "dbcon"), "death") + + cdm <- generateDeathCohortSet(cdm=cdm, cohortTable = "cohort1") + + expect_true(nrow(cdm$death_cohort %>% dplyr::collect()) == 3) + + expect_true(all(cdm$death_cohort %>% + dplyr::select(subject_id) %>% + dplyr::pull() %in% c(1,2,3) + )) +# with cohortId + cdm <- PatientProfiles::mockPatientProfiles(cohort1 = cohort1) + DBI::dbWithTransaction(attr(cdm, "dbcon"), { + DBI::dbWriteTable(attr(cdm, "dbcon"), "death", + deathTable, overwrite = TRUE) + }) + cdm$death <- dplyr::tbl(attr(cdm, "dbcon"), "death") + + cdm <- generateDeathCohortSet(cdm=cdm, cohortTable = "cohort1", cohortId = 1) + + expect_true(nrow(cdm$death_cohort %>% dplyr::collect()) == 2) + + expect_true(all(cdm$death_cohort %>% + dplyr::select(subject_id) %>% + dplyr::pull() %in% c(1,2) + )) + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) + +}) + +test_that("test expected errors", { + + cohort1 <- tibble::tibble( + cohort_definition_id = c(1,1,1), + subject_id = c(1,2,3), + cohort_start_date = as.Date(c("2012-02-01")), + cohort_end_date = as.Date(c("2013-02-01")) + ) + + cdm <- PatientProfiles::mockPatientProfiles(cohort1 = cohort1) + + # no death table in CDM + expect_error(cdm <- generateDeathCohortSet(cdm=cdm)) + + # cohortTable & cohortId + deathTable <- dplyr::tibble( + person_id = c(1,2,3), + death_date = c(as.Date("2020-01-01"), + as.Date("2020-01-02"), + as.Date("2020-01-01"))) + DBI::dbWithTransaction(attr(cdm, "dbcon"), { + DBI::dbWriteTable(attr(cdm, "dbcon"), "death", + deathTable, overwrite = TRUE) + }) + cdm$death <- dplyr::tbl(attr(cdm, "dbcon"), "death") + + # cohortTable not exist + expect_error(cdm <- generateDeathCohortSet(cdm=cdm, cohortTable = "non_exist_cohort")) + + # wrong cohortId input + expect_error(cdm <- generateDeathCohortSet(cdm=cdm, cohortTable = "cohort1", cohortId = "1")) + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) +}) diff --git a/tests/testthat/test-mockMGUS2cdm.R b/tests/testthat/test-mockMGUS2cdm.R new file mode 100644 index 0000000..0e65cb0 --- /dev/null +++ b/tests/testthat/test-mockMGUS2cdm.R @@ -0,0 +1,12 @@ +test_that("mock mgus2 as a cdm reference", { + cdm <- mockMGUS2cdm() + + expect_true(cdm$person %>% + dplyr::tally() %>% + dplyr::pull("n") == + survival::mgus2 %>% + dplyr::tally() %>% + dplyr::pull("n")) + + CDMConnector::cdmDisconnect(cdm) +}) diff --git a/tests/testthat/test-plotSurvival.R b/tests/testthat/test-plotSurvival.R new file mode 100644 index 0000000..99f4bae --- /dev/null +++ b/tests/testthat/test-plotSurvival.R @@ -0,0 +1,116 @@ +test_that("basic Survival plot", { + + cdm <- mockMGUS2cdm() + surv <- estimateSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1 + ) + + plot <- plotSurvival(surv) + expect_true(ggplot2::is.ggplot(plot)) + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) + +}) + +test_that("plot facets", { + + cdm <- mockMGUS2cdm() + surv <- estimateSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1, + strata=list(c("sex", "age_group")) + ) + + plot <-plotSurvival(surv, + facet = "strata_level") + expect_true(ggplot2::is.ggplot(plot)) + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) + +}) + + +test_that("plot colour", { + + cdm <- mockMGUS2cdm() + surv <- estimateSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1, + strata=list(c("sex", "age_group")) + ) + + plot <- plotSurvival(surv, + facet = "strata_level", + colour = "strata_level") + + expect_true(ggplot2::is.ggplot(plot)) + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) + +}) + +test_that("basic cumulative incidence plot", { + + cdm <- mockMGUS2cdm() + surv <- estimateSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1 + ) + + plot <- plotCumulativeIncidence(surv) + expect_true(ggplot2::is.ggplot(plot)) + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) + +}) + +test_that("plot facets for cumulative incidence plots", { + + cdm <- mockMGUS2cdm() + surv <- estimateSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1, + strata=list(c("sex", "age_group")) + ) + + plot <-plotCumulativeIncidence(surv, + facet = "strata_level") + expect_true(ggplot2::is.ggplot(plot)) + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) + +}) + + +test_that("plot colour for cumulative incidence plots", { + + cdm <- mockMGUS2cdm() + surv <- estimateSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1, + strata=list(c("sex", "age_group")) + ) + + plot <- plotCumulativeIncidence(surv, + facet = "strata_level", + colour = "strata_level") + + expect_true(ggplot2::is.ggplot(plot)) + + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) + +}) + diff --git a/tests/testthat/test-summariseSurvivalParticipants.R b/tests/testthat/test-summariseSurvivalParticipants.R new file mode 100644 index 0000000..9a3de2a --- /dev/null +++ b/tests/testthat/test-summariseSurvivalParticipants.R @@ -0,0 +1,15 @@ +# test_that("basic table", { +# +# cdm <- mockMGUS2cdm() +# surv <- estimateSurvival(cdm, +# targetCohortTable = "mgus_diagnosis", +# outcomeCohortTable = "death_cohort", +# returnParticipants = TRUE +# ) +# tableOne <- summariseCharacteristics(cohort = survivalParticipants(surv) %>% +# CDMConnector::computeQuery() , +# cdm = cdm) +# +# expect_false(is.null(tableOne)) +# CDMConnector::cdm_disconnect(cdm) +# }) diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 0000000..097b241 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/a01_Single_event_of_interest.Rmd b/vignettes/a01_Single_event_of_interest.Rmd new file mode 100644 index 0000000..3727d76 --- /dev/null +++ b/vignettes/a01_Single_event_of_interest.Rmd @@ -0,0 +1,122 @@ +--- +title: "Single outcome event of interest" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{a01_Single_event_of_interest} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, warning = FALSE, message = FALSE, + comment = "#>" +) +``` + + +## Set up +Let's first load the packages required. +```{r} +library(CDMConnector) +library(CohortSurvival) +library(dplyr) +library(ggplot2) +``` + +We'll create a cdm reference to use our example MGUS2 survival dataset. In practice you would use the CDMConnector package to connect to your data mapped to the OMOP CDM. +```{r} +cdm <- CohortSurvival::mockMGUS2cdm() +``` + +In this vignette we'll first estimate survival following a diagnosis of MGUS, with death our outcome of interest. + +We would typically need to define study cohorts ourselves, but in the case of our example data we already have these cohorts available. You can see for our diagnosis cohort we also have a number of additional features recorded for individuals which we'll use for stratification. + +```{r} +cdm$mgus_diagnosis %>% + glimpse() + +cdm$death_cohort %>% + glimpse() +``` + +## Overall survival +First, we can estimate survival for the cohort overall like so. +```{r} +MGUS_death <- estimateSingleEventSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + outcomeCohortTable = "death_cohort" +) +MGUS_death %>% + glimpse() +``` + +As we can see above our results have been outputted in long format. We can plot these results like so. +```{r} +plotSurvival(MGUS_death) +``` + +Our returned results also have attributes containing information that summarises survival. +```{r} +MGUS_death %>% dplyr::filter(estimate_type == "Survival summary") %>% + tidyr::pivot_wider(names_from = "variable_type", values_from = "estimate") %>% + dplyr::mutate( + "Restricted mean survival (se)" = paste0( + round(restricted_mean), + " (", round(restricted_mean_std_error, 2), ")" + ), + "Median survival (95% CI)" = paste0(median_survival, + " (", median_survival_95CI_lower, + " to ", median_survival_95CI_higher, ")") + ) %>% + dplyr::select("Restricted mean survival (se)", "Median survival (95% CI)") +``` + + +## With stratification +To estimate survival for particular strata of interest we need these features to have been added to the target cohort table. Once we have them defined, and as seen above we already have a number of example characteristics added to our diagnosis cohort, we can add stratifications like so. +```{r} +MGUS_death <- estimateSingleEventSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + outcomeCohortTable = "death_cohort", + strata = list(c("age_group"), + c("sex"), + c("age_group", "sex")) +) +``` + +As we can see as well as results for each strata, we'll always also have overall results returned. +```{r, fig.height=6, fig.width=8} +plotSurvival(MGUS_death, + facet = "strata_name", + colour = "strata_level") +``` + +And we also now have summary statistics for each of the strata as well as overall. + +```{r} +MGUS_death %>% dplyr::filter(estimate_type == "Survival summary") %>% + tidyr::pivot_wider(names_from = "variable_type", values_from = "estimate") %>% + dplyr::mutate("Restricted mean survival (se)" = paste0(round(restricted_mean), " (", round(restricted_mean_std_error, 2), ")"), + "Median survival (95% CI)" = paste0(median_survival, " (", median_survival_95CI_lower, " to ", median_survival_95CI_higher, ")") + ) %>% + dplyr::select(strata_name, strata_level, + "Restricted mean survival (se)", "Median survival (95% CI)") +``` + +## Summarising participants +If we set returnParticipants as TRUE then we will also be able to access the individuals that contributed to the analysis. +```{r} +MGUS_death <- estimateSingleEventSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + outcomeCohortTable = "death_cohort", + returnParticipants = TRUE +) +survivalParticipants(MGUS_death) +``` + +## Disconnect from the cdm database connection +```{r} +cdm_disconnect(cdm) +```