diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..deab654 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,48 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check + +permissions: read-all + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: ubuntu-latest, r: 'release'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/.github/workflows/r-cmd-check-ubuntu.yaml b/.github/workflows/r-cmd-check-ubuntu.yaml deleted file mode 100644 index bb3d18e..0000000 --- a/.github/workflows/r-cmd-check-ubuntu.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples -# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help -on: - push: - branches: [main, develop] - pull_request: - branches: [main, develop] - -name: R-CMD-check - -jobs: - R-CMD-check: - runs-on: ubuntu-latest - env: - R_KEEP_PKG_SOURCE: yes - steps: - - uses: actions/checkout@v3 - - - uses: r-lib/actions/setup-r@v2 - with: - use-public-rspm: true - - - uses: r-lib/actions/setup-r-dependencies@v2 - with: - extra-packages: any::rcmdcheck - needs: check - - - uses: r-lib/actions/check-r-package@v2 diff --git a/DESCRIPTION b/DESCRIPTION index 599941b..9f72d15 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,26 +1,30 @@ Package: CohortSurvival Title: Estimate Survival from Common Data Model Cohorts -Version: 0.5.1 +Version: 0.6.0 Authors@R: c( person("Edward", "Burn", email = "edward.burn@ndorms.ox.ac.uk", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9286-1128")), - person("Kim", "Lopez", email = "kim.lopez@spc.ox.ac.uk", role = c("aut"), + person("Kim", "López-Güell", email = "kim.lopez@spc.ox.ac.uk", + role = c("aut"), comment = c(ORCID = "0000-0002-8462-8668")), person("Marti", "Catala", email = "marti.catalasabate@ndorms.ox.ac.uk", - role = c("ctb"), + role = c("aut"), comment = c(ORCID = "0000-0003-3308-9905")), person("Xintong", "Li", email = "xintong.li@ndorms.ox.ac.uk", - role = c("ctb"), - comment = c(ORCID = "0000-0003-3308-9905")), + role = c("aut"), + comment = c(ORCID = "0000-0002-6872-5804")), person("Danielle", "Newby", email = "danielle.newby@ndorms.ox.ac.uk", - role = c("ctb"), - comment = c(ORCID = "0000-0002-3001-1478"))) + role = c("aut"), + comment = c(ORCID = "0000-0002-3001-1478")), + person("Nuria", "Mercade-Besora", , "nuria.mercadebesora@ndorms.ox.ac.uk", + role = c("aut"), + comment = c(ORCID = "0009-0006-7948-3747"))) Description: Estimate survival using data mapped to the Observational Medical Outcomes Partnership common data model. Survival can be estimated based on user-defined study cohorts. License: Apache License (>= 2) Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Imports: CDMConnector (>= 1.3.0), omopgenerics (>= 0.2.0), @@ -35,11 +39,13 @@ Imports: PatientProfiles, visOmopResults (>= 0.3.0), rlang (>= 0.4.11), - survival, + survival (>= 3.7.0), scales, stringr, tibble, - tidyr + tidyr, + purrr, + lifecycle Suggests: testthat (>= 3.0.0), CodelistGenerator, diff --git a/NAMESPACE b/NAMESPACE index 07f71e9..4c3da9c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,18 +8,32 @@ export(addCompetingRiskCohortSurvival) export(asSurvivalResult) export(as_label) export(as_name) -export(benchmarkCohortSurvival) +export(attrition) +export(bind) +export(cohortCodelist) +export(cohortCount) export(enquo) export(enquos) export(estimateCompetingRiskSurvival) export(estimateSingleEventSurvival) +export(exportSummarisedResult) export(generateDeathCohortSet) +export(importSummarisedResult) export(mockMGUS2cdm) export(optionsTableSurvival) export(plotSurvival) -export(survivalParticipants) +export(settings) +export(suppress) export(tableSurvival) importFrom(magrittr,"%>%") +importFrom(omopgenerics,attrition) +importFrom(omopgenerics,bind) +importFrom(omopgenerics,cohortCodelist) +importFrom(omopgenerics,cohortCount) +importFrom(omopgenerics,exportSummarisedResult) +importFrom(omopgenerics,importSummarisedResult) +importFrom(omopgenerics,settings) +importFrom(omopgenerics,suppress) importFrom(rlang,":=") importFrom(rlang,.data) importFrom(rlang,.env) diff --git a/R/addCohortSurvival.R b/R/addCohortSurvival.R index 495bf86..c882a40 100644 --- a/R/addCohortSurvival.R +++ b/R/addCohortSurvival.R @@ -81,23 +81,37 @@ addCohortSurvival <- function(x, futureObservationName = "days_to_exit" ) %>% dplyr::compute() - # get any events before or after index date - x <- x %>% - PatientProfiles::addCohortIntersectFlag( - indexDate = "cohort_start_date", - targetCohortTable = outcomeCohortTable, - targetCohortId = outcomeCohortId, - window = c(-outcomeWashout,-1), - nameStyle = "event_in_washout" - ) %>% - PatientProfiles::addCohortIntersectDays( - indexDate = "cohort_start_date", - targetCohortTable = outcomeCohortTable, - targetCohortId = outcomeCohortId, - targetDate = outcomeDateVariable, - window = c(0, Inf), - nameStyle = "days_to_event" - ) %>% dplyr::compute() + if(outcomeWashout == 0) { + # get any events before or after index date + x <- x %>% + dplyr::mutate(event_in_washout = 0L) %>% + PatientProfiles::addCohortIntersectDays( + indexDate = "cohort_start_date", + targetCohortTable = outcomeCohortTable, + targetCohortId = outcomeCohortId, + targetDate = outcomeDateVariable, + window = c(0, Inf), + nameStyle = "days_to_event" + ) %>% dplyr::compute() + } else { + # get any events before or after index date + x <- x %>% + PatientProfiles::addCohortIntersectFlag( + indexDate = "cohort_start_date", + targetCohortTable = outcomeCohortTable, + targetCohortId = outcomeCohortId, + window = c(-outcomeWashout,-1), + nameStyle = "event_in_washout" + ) %>% + PatientProfiles::addCohortIntersectDays( + indexDate = "cohort_start_date", + targetCohortTable = outcomeCohortTable, + targetCohortId = outcomeCohortId, + targetDate = outcomeDateVariable, + window = c(0, Inf), + nameStyle = "days_to_event" + ) %>% dplyr::compute() + } # whatever comes first @@ -352,42 +366,15 @@ validateExtractSurvivalInputs <- function(cdm, censorOnCohortExit, censorOnDate, followUpDays) { - checkCdm(cdm, tables = c( - "person", "observation_period", - outcomeCohortTable - )) - - checkIsCohort_exp(cohortTable) + omopgenerics::validateCdmArgument(cdm) + omopgenerics::validateCohortArgument(cdm[[outcomeCohortTable]]) checkExposureCohortId(cohortTable) - - checkIsCohort(cdm[[outcomeCohortTable]]) - + omopgenerics::assertDate(censorOnDate, null = TRUE, ) checkCensorOnDate(cohortTable, censorOnDate) - - errorMessage <- checkmate::makeAssertCollection() - checkmate::assertIntegerish(outcomeCohortId, - len = 1, - add = errorMessage - ) - checkmate::assert_logical(censorOnCohortExit) - checkmate::assert_date(censorOnDate, null.ok = TRUE) - if (followUpDays != Inf) { - checkmate::assert_integerish(followUpDays, - len = 1, - lower = 1, - add = errorMessage - ) - } - if (outcomeWashout != "Inf") { - checkmate::assertIntegerish(outcomeWashout, - len = 1, - lower = 1, - add = errorMessage - ) - } - checkmate::reportAssertions(collection = errorMessage) - - + omopgenerics::assertNumeric(outcomeCohortId, length = 1, min = 1) + omopgenerics::assertLogical(censorOnCohortExit, length = 1) + omopgenerics::assertNumeric(followUpDays, length = 1, min = 1, integerish = TRUE) + omopgenerics::assertNumeric(outcomeWashout, length = 1, min = 0, integerish = TRUE) # check specified cohort is in cohort table errorMessage <- checkmate::makeAssertCollection() diff --git a/R/asSurvivalResult.R b/R/asSurvivalResult.R index 9c8cdb8..2c2853a 100644 --- a/R/asSurvivalResult.R +++ b/R/asSurvivalResult.R @@ -43,38 +43,54 @@ asSurvivalResult <- function(result) { cli::cli_abort("result is not a valid `summarised_result` object.") } result <- result %>% - visOmopResults::addSettings() %>% - # suppress(minCellCount = minCellCount) %>% - dplyr::select(-c("package_name", "package_version", "estimate_type")) %>% + visOmopResults::addSettings() %>% + dplyr::select(c("cdm_name", "group_name", "group_level", "strata_name", + "strata_level", "variable_name", "variable_level", + "estimate_name", "estimate_type", "estimate_value", + "additional_name", "additional_level", "result_type", + "outcome", "competing_outcome", + "eventgap")) %>% visOmopResults::splitAdditional() %>% visOmopResults::splitGroup() %>% dplyr::mutate(estimate_value = as.numeric(.data$estimate_value)) + estimates <- result %>% - dplyr::filter(.data$variable_name %in% + dplyr::filter(.data$result_type %in% c("survival_probability", "cumulative_failure_probability")) %>% - dplyr::select(-dplyr::any_of('eventgap')) %>% - dplyr::mutate(time = as.numeric(.data$time)) - if("competing_outcome" %in% colnames(estimates)) { - estimates <- estimates %>% - dplyr::relocate("outcome", .after = "cohort") %>% + dplyr::select(-dplyr::any_of(c("eventgap", "reason_id"))) %>% + dplyr::mutate(time = as.numeric(.data$time)) %>% + dplyr::relocate("outcome", .after = "target_cohort") %>% dplyr::relocate("competing_outcome", .after = "outcome") - } else { - estimates <- estimates %>% - dplyr::relocate("outcome", .after = "cohort") - } + summary <- result %>% - dplyr::filter(.data$variable_name == 'survival_summary') %>% - dplyr::select(-dplyr::any_of(c('variable_name', 'time', 'eventgap'))) + dplyr::filter(.data$result_type == "survival_summary") %>% + dplyr::select(-dplyr::any_of(c("variable_name", "time", "eventgap", "result_type", "reason_id"))) %>% + dplyr::mutate(estimate_name = dplyr::if_else( + grepl("count", .data$estimate_name), + gsub("_count","",.data$estimate_name), + .data$estimate_name + )) + events <- result %>% - dplyr::filter(.data$variable_name == 'survival_events') %>% - dplyr::select(-dplyr::any_of('variable_name')) %>% + dplyr::filter(.data$result_type == "survival_events") %>% + dplyr::select(-dplyr::any_of(c("reason_id", "result_type"))) %>% dplyr::distinct() %>% - dplyr::mutate(time = as.numeric(.data$time)) + dplyr::mutate(time = as.numeric(.data$time)) %>% + dplyr::mutate(estimate_name = dplyr::if_else( + grepl("count", .data$estimate_name), + gsub("_count","",.data$estimate_name), + .data$estimate_name + )) + + attrition <- result %>% + dplyr::filter(.data$result_type == "survival_attrition") %>% + dplyr::select(-c("result_type", "time", "eventgap")) result_final <- estimates - attr(result_final, 'events') <- events - attr(result_final, 'summary') <- summary + attr(result_final, "events") <- events + attr(result_final, "summary") <- summary + attr(result_final, "attrition") <- attrition return(result_final) } diff --git a/R/benchmarkCohortSurvival.R b/R/benchmarkCohortSurvival.R deleted file mode 100644 index 03c5eab..0000000 --- a/R/benchmarkCohortSurvival.R +++ /dev/null @@ -1,541 +0,0 @@ -# Copyright 2023 DARWIN EU® -# -# This file is part of CohortSurvival -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -#' Estimate performance of estimateSurvival function for benchmarking -#' -#' @param cdm CDM reference -#' @param targetSize number of people in the target cohort table -#' @param outcomeSize number of people in the outcome cohort table -#' @param outcomeDateVariable Variable containing date of outcome event -#' @param competingOutcomeSize number of people in the competing outcome cohort table -#' @param competingOutcomeDateVariable Variable containing date of -#' competing event -#' @param censorOnCohortExit If TRUE, an individual's follow up will be -#' censored at their cohort exit -#' @param censorOnDate if not NULL, an individual's follow up will be censored -#' at the given date -#' @param followUpDays Number of days to follow up individuals (lower bound 1, -#' upper bound Inf) -#' @param strata strata -#' @param eventGap Days between time points for which to report survival -#' estimates. First day will be day zero with risk estimates provided -#' for times up to the end of follow-up, with a gap in days equivalent -#' to eventGap. -#' @param estimateGap vector of time points at which to give survival estimates, -#' if NULL estimates at all times are calculated -#' @param minCellCount The minimum number of events to reported, below which -#' results will be obscured. If 0, all results will be reported. -#' @param returnParticipants Either TRUE or FALSE. If TRUE, references to -#' participants from the analysis will be returned allowing for further -#' analysis. -#' -#' @return tibble with performance of estimateSurvival function information, -#' according to the selected input parameters -#' @export -#' -#' @examples -#' \donttest{ -#' cdm <- mockMGUS2cdm() -#' cdm$condition_occurrence <- cdm$death_cohort %>% -#' dplyr::rename("condition_start_date" = "cohort_start_date", -#' "condition_end_date" = "cohort_end_date") %>% -#' dplyr::compute() -#' surv_timings <- benchmarkCohortSurvival( -#' cdm, targetSize = 100, outcomeSize = 20) -#'} -#' -benchmarkCohortSurvival <- function(cdm, - targetSize, - outcomeSize, - outcomeDateVariable = "cohort_start_date", - competingOutcomeSize = NULL, - competingOutcomeDateVariable = "cohort_start_date", - censorOnCohortExit = FALSE, - censorOnDate = NULL, - followUpDays = Inf, - strata = NULL, - eventGap = 30, - estimateGap = 1, - minCellCount = 5, - returnParticipants = FALSE) { - - # check input - errorMessage <- checkmate::makeAssertCollection() - - checkCdm(cdm = cdm, tables = c( - "person", "observation_period" - )) - checkmate::assertIntegerish(targetSize, - len = 1, - lower = 1, - add = errorMessage) - checkmate::assertIntegerish(outcomeSize, - len = 1, - lower = 1, - add = errorMessage) - checkmate::assertIntegerish(competingOutcomeSize, - len = 1, - lower = 1, - null.ok = TRUE, - add = errorMessage) - checkmate::assertCharacter(outcomeDateVariable, - len = 1, - add = errorMessage) - checkmate::assertCharacter(competingOutcomeDateVariable, - len = 1, - add = errorMessage) - checkmate::assertLogical(censorOnCohortExit, - len = 1, - add = errorMessage) - if(!is.null(censorOnDate)) { - checkdate <- censorOnDate %>% inherits("Date") - if(!checkdate) { - cli::cli_abort("{censorOnDate} is neither NULL nor of type Date") - } - } - if(followUpDays != "Inf") { - checkmate::assertIntegerish(followUpDays, - len = 1, - lower = 0, - add = errorMessage - ) - } - checkmate::assertIntegerish(eventGap, - lower = 1, - add = errorMessage - ) - checkmate::assertIntegerish(estimateGap, - lower = 1, - add = errorMessage - ) - checkmate::assertIntegerish(minCellCount, - len = 1, - lower = 0, - null.ok = FALSE, - add = errorMessage - ) - checkmate::assertLogical(returnParticipants, - len = 1, - add = errorMessage) - - checkmate::reportAssertions(collection = errorMessage) - - # create cohorts - timings <- list() - tictoc::tic() - - targetCohortTable <- "benchmark_target" - target_cohort <- cdm$person %>% - dplyr::slice_sample(n = targetSize) %>% - dplyr::inner_join(cdm$observation_period, by = "person_id") %>% - dplyr::mutate(cohort_definition_id = 1) %>% - dplyr::select( - "subject_id" = "person_id", - "cohort_definition_id", - "cohort_start_date" = "observation_period_start_date", - "cohort_end_date" = "observation_period_end_date" - ) %>% - PatientProfiles::addDemographics() %>% - dplyr::collect() %>% - dplyr::as_tibble() - - checkStrata(strata, target_cohort) - targetCohortId <- 1 - - if(!is.null(censorOnDate)) { - target_cohort <- target_cohort %>% - dplyr::filter( - .data$cohort_start_date < .env$censorOnDate - ) - } - - t <- tictoc::toc(quiet = TRUE) - timings[["target_cohort"]] <- dplyr::tibble( - task = paste0("generating target cohort size ",targetSize), - time_taken_secs = as.numeric(t$toc - t$tic) - ) - tictoc::tic() - - outcomeCohortTable <- "benchmark_outcome" - outcome_cohort <- dplyr::tibble( - subject_id = target_cohort %>% - dplyr::select("subject_id") %>% - dplyr::pull() %>% - sample(outcomeSize, replace = TRUE), - cohort_definition_id = 1 - ) %>% - dplyr::left_join( - cdm$observation_period, - by = c("subject_id" = "person_id"), - copy = TRUE - ) - - start_dates <- outcome_cohort %>% dplyr::select("observation_period_start_date") %>% dplyr::pull() - end_dates <- outcome_cohort %>% dplyr::select("observation_period_end_date") %>% dplyr::pull() - - cohort_dates <- c() - for(i in 1:length(start_dates)) { - cohort_dates[i] <- as.character(sample(seq(start_dates[i], end_dates[i], by = "day"), 1)) - } - - outcome_cohort <- outcome_cohort %>% - dplyr::mutate( - cohort_start_date = as.Date(cohort_dates), - cohort_end_date = .data$cohort_start_date - ) %>% - dplyr::select( - "subject_id", - "cohort_definition_id", - "cohort_start_date", - "cohort_end_date" - ) - - columnCheck <- outcomeDateVariable %in% colnames(outcome_cohort) - if(!columnCheck) { - cli::cli_abort("{outcomeDateVariable} must be `cohort_start_date` or `cohort_end_date`") - } - outcomeCohortId <- 1 - - t <- tictoc::toc(quiet = TRUE) - timings[["outcome_cohort"]] <- dplyr::tibble( - task = paste0("generating outcome cohort size ",outcomeSize), - time_taken_secs = as.numeric(t$toc - t$tic) - ) - tictoc::tic() - - if(!is.null(competingOutcomeSize)) { - competingOutcomeCohortTable <- "benchmark_competing_outcome" - competing_outcome_cohort <- dplyr::tibble( - subject_id = target_cohort %>% - dplyr::select("subject_id") %>% - dplyr::pull() %>% - sample(competingOutcomeSize, replace = TRUE), - cohort_definition_id = 1 - ) %>% - dplyr::left_join( - cdm$observation_period, - by = c("subject_id" = "person_id"), - copy = TRUE - ) - - start_dates <- competing_outcome_cohort %>% dplyr::select("observation_period_start_date") %>% dplyr::pull() - end_dates <- competing_outcome_cohort %>% dplyr::select("observation_period_end_date") %>% dplyr::pull() - - cohort_dates <- c() - for(i in 1:length(start_dates)) { - cohort_dates[i] <- as.character(sample(seq(start_dates[i], end_dates[i], by = "day"), 1)) - } - - competing_outcome_cohort <- competing_outcome_cohort %>% - dplyr::mutate( - cohort_start_date = as.Date(cohort_dates), - cohort_end_date = .data$cohort_start_date - ) %>% - dplyr::select( - "subject_id", - "cohort_definition_id", - "cohort_start_date", - "cohort_end_date" - ) - - columnCheck2 <- competingOutcomeDateVariable %in% colnames(competing_outcome_cohort) - if(!columnCheck2) { - cli::cli_abort("{competingOutcomeDateVariable} must be `cohort_start_date` or `cohort_end_date`") - } - competingOutcomeCohortId <- 1 - - person = cdm$person %>% - dplyr::collect() - - observation_period = cdm$observation_period %>% - dplyr::collect() - - cdm1 <- omopgenerics::cdmFromTables( - tables = list( - person = person, - observation_period = observation_period - ), - cohortTables = list( - benchmark_target = target_cohort, - benchmark_outcome = outcome_cohort, - benchmark_competing_outcome = competing_outcome_cohort - ), - cdmName = "benchmark" - ) - - db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - cdm2 = CDMConnector::copy_cdm_to(db, - cdm1, - schema = "main", - overwrite = TRUE) - - # Add schema information - attr(cdm2, "cdm_schema") <- "main" - attr(cdm2, "write_schema") <- "main" - - t <- tictoc::toc(quiet = TRUE) - timings[["competing_outcome_cohort"]] <- dplyr::tibble( - task = paste0("generating competing outcome cohort size ",competingOutcomeSize), - time_taken_secs = as.numeric(t$toc - t$tic) - ) - tictoc::tic() - } else { - competingOutcomeCohortTable <- NULL - competingOutcomeCohortId <- 1 - - person = cdm$person %>% - dplyr::collect() - - observation_period = cdm$observation_period %>% - dplyr::collect() - - cdm1 <- omopgenerics::cdmFromTables( - tables = list( - person = person, - observation_period = observation_period - ), - cohortTables = list( - benchmark_target = target_cohort, - benchmark_outcome = outcome_cohort - ), - cdmName = "benchmark" - ) - - db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - cdm2 = CDMConnector::copy_cdm_to(db, - cdm1, - schema = "main", - overwrite = TRUE) - - # Add schema information - attr(cdm2, "cdm_schema") <- "main" - attr(cdm2, "write_schema") <- "main" - } - - workingExposureTable <- cdm2[[targetCohortTable]] - - # addCohortSurvival for primary event of interest - workingExposureTable <- workingExposureTable %>% - addCohortSurvival( - cdm = cdm2, - outcomeCohortTable = outcomeCohortTable, - outcomeCohortId = outcomeCohortId, - outcomeDateVariable = outcomeDateVariable, - censorOnCohortExit = censorOnCohortExit, - censorOnDate = censorOnDate, - followUpDays = followUpDays - ) %>% - dplyr::rename( - "outcome_time" = "time", - "outcome_status" = "status" - ) - - t <- tictoc::toc(quiet = TRUE) - timings[["addCohortSurvival_outcome"]] <- dplyr::tibble( - task = "addCohortSurvival info added for outcome", - time_taken_secs = as.numeric(t$toc - t$tic) - ) - tictoc::tic() - - # competing risk (if there is one) - if (!is.null(competingOutcomeCohortTable)) { - workingExposureTable <- workingExposureTable %>% - addCohortSurvival( - cdm = cdm2, - outcomeCohortTable = competingOutcomeCohortTable, - outcomeCohortId = outcomeCohortId, - outcomeDateVariable = competingOutcomeDateVariable, - censorOnCohortExit = censorOnCohortExit, - censorOnDate = censorOnDate, - followUpDays = followUpDays - ) %>% - dplyr::rename( - "competing_risk_time" = "time", - "competing_risk_status" = "status" - ) - - t <- tictoc::toc(quiet = TRUE) - timings[["addCohortSurvival_competing_outcome"]] <- dplyr::tibble( - task = "addCohortSurvival info added for competing outcome", - time_taken_secs = as.numeric(t$toc - t$tic) - ) - tictoc::tic() - } - - # collect - survDataDb <- workingExposureTable %>% - dplyr::filter(!is.na(.data$outcome_time) && - !is.na(.data$outcome_status)) - - survData <- survDataDb %>% - dplyr::collect() - - if (!is.null(competingOutcomeCohortTable)) { - # - add competing risk variable - # 0: no event, 2: outcome event, 3: competing risk event - survData <- addCompetingRiskVars( - data = survData, - time1 = "outcome_time", - status1 = "outcome_status", - time2 = "competing_risk_time", - status2 = "competing_risk_status", - nameOutTime = "outcome_or_competing_time", - nameOutStatus = "outcome_or_competing_status" - ) - } - - # time points to extract survival estimates - timepoints <- seq(0, max(survData$outcome_time), by = estimateGap) - - # fit survival, with strata - if (is.null(competingOutcomeCohortTable)) { - survivalEstimates <- singleEventSurvival( - survData = survData, - times = timepoints, - variables = strata, - eventGap = eventGap - ) - } else { - survivalEstimates <- competingRiskSurvival( - survData = survData, - times = timepoints, - variables = strata, - eventGap = eventGap - ) - } - - t <- tictoc::toc(quiet = TRUE) - timings[["estimateSurvival"]] <- dplyr::tibble( - task = paste0("estimateSurvival called for specified settings and strata: ",paste0(names(strata), collapse = ", ")), - time_taken_secs = as.numeric(t$toc - t$tic) - ) - tictoc::tic() - - if(nrow(survivalEstimates)>0){ - survivalEstimates <- addCohortDetails( - x = survivalEstimates, - cdm = cdm2, - targetCohortId = targetCohortId, - targetCohortTable = targetCohortTable, - outcomeCohortId = outcomeCohortId, - outcomeCohortTable = outcomeCohortTable, - competingOutcomeCohortId = competingOutcomeCohortId, - competingOutcomeCohortTable = competingOutcomeCohortTable) - - - t <- tictoc::toc(quiet = TRUE) - timings[["counts_obscured"]] <- dplyr::tibble( - task = paste0("counts obscured < ",minCellCount), - time_taken_secs = as.numeric(t$toc - t$tic) - ) - tictoc::tic() - - t <- tictoc::toc(quiet = TRUE) - timings[["counts_obscured"]] <- dplyr::tibble( - task = paste0("counts obscured < ",minCellCount), - time_taken_secs = as.numeric(t$toc - t$tic) - ) - tictoc::tic() - - # add attributes - if(isTRUE(returnParticipants)){ - participantsRef <- survDataDb %>% - dplyr::select( - "cohort_definition_id", - "subject_id", - "cohort_start_date", - "cohort_end_date" - ) %>% - dplyr::compute() - - attr(participantsRef, "cohort_set") <- participantsRef %>% - dplyr::select("cohort_definition_id") %>% - dplyr::distinct() %>% - dplyr::mutate(cohort_name = paste0( - "survival_participants_", - as.integer(.data$cohort_definition_id) - )) %>% - dplyr::collect() - - attr(participantsRef, "cohort_attrition") <- participantsRef %>% - dplyr::group_by(.data$cohort_definition_id) %>% - dplyr::summarise( - number_records = dplyr::n(), - number_subjects = dplyr::n_distinct(.data$subject_id), - .groups = "drop" - ) %>% - dplyr::mutate( - "reason_id" = 1, - "reason" = "Initial qualifying events", - "excluded_records" = 0, - "excluded_subjects" = 0 - ) %>% - dplyr::collect() - - attr(participantsRef, "tbl_name") <- "survival_participants" - - attr(survivalEstimates, "participants") <- omopgenerics::newCohortTable( - participantsRef - ) - } - - attr(survivalEstimates, "events") <- addCohortDetails( - x = attr(survivalEstimates, "events"), - cdm = cdm2, - targetCohortId = targetCohortId, - targetCohortTable = targetCohortTable, - outcomeCohortId = outcomeCohortId, - outcomeCohortTable = outcomeCohortTable) - - } - - # combine results - timings <- dplyr::bind_rows(timings) %>% - dplyr::mutate(time_taken_secs = round(.data$time_taken_secs, 2)) %>% - dplyr::mutate(time_taken_mins = round(.data$time_taken_secs / 60, 2)) %>% - dplyr::mutate(time_taken_hours = round(.data$time_taken_mins / 60, 2)) %>% - dplyr::mutate(dbms = "duckdb") %>% - dplyr::mutate(person_n = cdm2$person %>% - dplyr::count() %>% - dplyr::pull()) %>% - dplyr::mutate(db_min_observation_start = cdm2$observation_period %>% - dplyr::summarise( - db_min_obs_start = - min(.data$observation_period_start_date, - na.rm = TRUE - ) - ) %>% - dplyr::pull()) %>% - dplyr::mutate(max_observation_end = cdm2$observation_period %>% - dplyr::summarise( - max_observation_end = - max(.data$observation_period_end_date, - na.rm = TRUE - ) - ) %>% - dplyr::pull()) - - if (isFALSE(returnParticipants)) { - timings <- timings %>% - dplyr::mutate(with_participants = "No") - } else { - timings <- timings %>% - dplyr::mutate(with_participants = "Yes") - } - - return(timings) -} diff --git a/R/deathDiagnostics.R b/R/deathDiagnostics.R index c238fb4..14c1052 100644 --- a/R/deathDiagnostics.R +++ b/R/deathDiagnostics.R @@ -19,14 +19,12 @@ deathDiagnostics <- function(cdm, cohortId = 1){ # 0. validate inputs... - checkCdm(cdm, tables = c("death", "observation_period")) - checkmate::assert_character(cohortTable, len = 1, null.ok = TRUE) + omopgenerics::validateCdmArgument(cdm) + omopgenerics::assertTable(cdm[["death"]]) + omopgenerics::assertCharacter(cohortTable, length = 1, null = TRUE) if(!is.null(cohortTable)) { - checkIsCohort(cdm[[cohortTable]]) - cohortIdCheck = checkCohortId(cdm[[cohortTable]], cohortId) - if(!cohortIdCheck) { - cli::cli_abort(paste0("cohortId provided is not a valid id for the table ",cohortTable)) - } + omopgenerics::validateCohortArgument(cdm[[cohortTable]]) + omopgenerics::validateCohortIdArgument(cohortId, cdm[[cohortTable]]) } # 1. start diagnosis whole table diff --git a/R/estimateSurvival.R b/R/estimateSurvival.R index 68ea977..7c5c7a1 100644 --- a/R/estimateSurvival.R +++ b/R/estimateSurvival.R @@ -19,8 +19,8 @@ #' #' @param cdm CDM reference #' @param targetCohortTable targetCohortTable -#' @param targetCohortId targetCohortId #' @param outcomeCohortTable The outcome cohort table of interest. +#' @param targetCohortId targetCohortId #' @param outcomeCohortId ID of event cohorts to include. Only one outcome #' (and so one ID) can be considered. #' @param outcomeDateVariable Variable containing date of outcome event @@ -44,9 +44,6 @@ #' to have survived #' @param minCellCount The minimum number of events to reported, below which #' results will be obscured. If 0, all results will be reported. -#' @param returnParticipants Either TRUE or FALSE. If TRUE, references to -#' participants from the analysis will be returned allowing for further -#' analysis. #' #' @return tibble with survival information for desired cohort, including: #' time, people at risk, survival probability, cumulative incidence, @@ -69,8 +66,8 @@ #' estimateSingleEventSurvival <- function(cdm, targetCohortTable, - targetCohortId = NULL, outcomeCohortTable, + targetCohortId = NULL, outcomeCohortId = NULL, outcomeDateVariable = "cohort_start_date", outcomeWashout = Inf, @@ -82,8 +79,7 @@ estimateSingleEventSurvival <- function(cdm, estimateGap = 1, restrictedMeanFollowUp = NULL, minimumSurvivalDays = 1, - minCellCount = 5, - returnParticipants = FALSE) { + minCellCount = 5) { if (is.null(targetCohortId)) { CDMConnector::assertTables(cdm, targetCohortTable) targetCohortId <- CDMConnector::cohort_count(cdm[[targetCohortTable]]) %>% @@ -97,17 +93,17 @@ estimateSingleEventSurvival <- function(cdm, } # make sure attrition is up to date for our outcome cohort - cdm[[outcomeCohortTable]] <- cdm[[outcomeCohortTable]] |> + cdm[[outcomeCohortTable]] <- cdm[[outcomeCohortTable]] %>% omopgenerics::recordCohortAttrition("update attrition") - emptyOutcomes <- omopgenerics::settings(cdm[[outcomeCohortTable]]) |> - dplyr::filter(.data$cohort_definition_id %in% .env$outcomeCohortId) |> + emptyOutcomes <- omopgenerics::settings(cdm[[outcomeCohortTable]]) %>% + dplyr::filter(.data$cohort_definition_id %in% .env$outcomeCohortId) %>% dplyr::left_join( omopgenerics::cohortCount(cdm[[outcomeCohortTable]]), - by = "cohort_definition_id") |> + by = "cohort_definition_id") %>% dplyr::filter(.data$number_records == 0) if(nrow(emptyOutcomes) > 0){ - emptyOutcomenames <- emptyOutcomes |> dplyr::pull("cohort_name") + emptyOutcomenames <- emptyOutcomes %>% dplyr::pull("cohort_name") cli::cli_warn("Outcome cohort{?s} {emptyOutcomenames} {?is/are} empty") } @@ -115,7 +111,6 @@ estimateSingleEventSurvival <- function(cdm, events <- list() attrition <- list() summary <- list() - participants <- list() for (i in seq_along(targetCohortId)) { working_target_id <- targetCohortId[i] working_target <- omopgenerics::settings(cdm[[targetCohortTable]]) %>% @@ -148,63 +143,168 @@ estimateSingleEventSurvival <- function(cdm, estimateGap = estimateGap, restrictedMeanFollowUp = restrictedMeanFollowUp, minimumSurvivalDays = minimumSurvivalDays, - minCellCount = minCellCount, - returnParticipants = returnParticipants + minCellCount = minCellCount ) - attrition[[paste0(i, "_", j)]] <- attr(surv[[paste0(i, "_", j)]], "attrition") %>% + attrition[[paste0(i, "_", j)]] <- attr(surv[[paste0(i, "_", j)]], "cohort_attrition") %>% dplyr::mutate( - exposure_id = i, - outcome_id = j - ) + target_cohort = working_target, + outcome = working_outcome + ) %>% + dplyr::collect() %>% + dplyr::filter(.data$cohort_definition_id == working_target_id) events[[paste0(i, "_", j)]] <- attr(surv[[paste0(i, "_", j)]], 'events') summary[[paste0(i, "_", j)]] <- attr(surv[[paste0(i, "_", j)]], 'summary') - if(returnParticipants) { - participants[[paste0(i, "_", j)]] <- attr(surv[[paste0(i, "_", j)]], "participants") %>% - dplyr::mutate( - exposure_id = i, - outcome_id = j - ) %>% - dplyr::collect() - } } } - surv_estimates <- dplyr::bind_rows(surv) - # add attributes - + estimates <- dplyr::bind_rows(surv) events <- dplyr::bind_rows(events) - attr(surv_estimates, "attrition") <- dplyr::bind_rows(attrition) + attrition <- dplyr::bind_rows(attrition) summary <- dplyr::bind_rows(summary) - surv_estimates <- dplyr::bind_rows( - surv_estimates, - events, - summary - ) %>% - dplyr::mutate(estimate_value = as.character(.data$estimate_value), - result_id = 1L) - - if(returnParticipants) { - attr(surv_estimates, 'participants') <- dplyr::bind_rows(participants) + # Put all outputs in the general omopgenerics format + attrition <- attrition %>% + dplyr::distinct() %>% + dplyr::mutate( + cdm_name = attr(cdm, "cdm_name"), + package_name = "CohortSurvival", + package_version = as.character(utils::packageVersion("CohortSurvival")), + result_type = "survival_attrition", + group_name = "target_cohort", + group_level = .data$target_cohort, + variable_level = .data$outcome, + analysis_type = "single_event", + estimate_name = "count" + ) %>% + tidyr::pivot_longer( + cols = c( + "number_records", "number_subjects", "excluded_records", + "excluded_subjects" + ), + names_to = "variable_name", + values_to = "estimate_value" + ) %>% + dplyr::mutate( + "estimate_value" = as.character(.data$estimate_value), + "estimate_type" = "integer", + competing_outcome = "none" + ) %>% + visOmopResults::uniteStrata("reason") %>% + visOmopResults::uniteAdditional("reason_id") + + if(attrition %>% dplyr::group_by("target_cohort") %>% dplyr::tally() %>% dplyr::pull("n") == + attrition %>% dplyr::group_by("target_cohort", "cohort_definition_id") %>% dplyr::tally() %>% dplyr::pull("n")) { + attrition <- attrition %>% + dplyr::mutate(target_cohort = paste0(.data$target_cohort,"_",.data$cohort_definition_id), + group_level = .data$target_cohort) } - attr(surv_estimates, 'events') <- NULL - attr(surv_estimates, 'summary') <- NULL + attrition <- attrition %>% + dplyr::select(-c("cohort_definition_id")) - - settings <- surv_estimates %>% + settings <- estimates %>% dplyr::select("result_type", "package_name", "package_version", - "analysis_type") %>% - dplyr::distinct() %>% - dplyr::mutate(result_id = 1L) + "analysis_type", + "outcome", + "competing_outcome") %>% + dplyr::distinct() + + settings <- settings %>% + dplyr::mutate(eventgap = NA) %>% + dplyr::union_all( + events %>% + dplyr::select("result_type", + "package_name", + "package_version", + "analysis_type", + "outcome", + "competing_outcome", + "eventgap") %>% + dplyr::distinct() + ) - surv_estimates <- surv_estimates %>% + settings <- settings %>% + dplyr::union_all( + summary %>% + dplyr::select("result_type", + "package_name", + "package_version", + "analysis_type", + "outcome", + "competing_outcome") %>% + dplyr::mutate(eventgap = NA) %>% + dplyr::distinct() + ) + + settings <- settings %>% + dplyr::union_all( + attrition %>% + dplyr::select("result_type", + "analysis_type", + "package_name", + "package_version", + "outcome") %>% + dplyr::mutate(eventgap = NA, + competing_outcome = "none") %>% + dplyr::distinct() + ) + + settings <- settings %>% + dplyr::mutate(result_id = c(1:nrow(settings))) %>% + dplyr::relocate(.data$result_id, .before = "result_type") + + estimates <- estimates %>% + dplyr::mutate(additional_name = "time", + time = as.character(.data$time)) %>% + dplyr::rename("additional_level" = .data$time) + + events <- events %>% + dplyr::mutate(additional_name = "time", + time = as.character(.data$time)) %>% + dplyr::rename("additional_level" = .data$time) + + complete_results <- estimates %>% + dplyr::bind_rows(events) %>% + dplyr::bind_rows(summary) %>% + dplyr::mutate(estimate_value = as.character(.data$estimate_value)) %>% + dplyr::bind_rows(attrition) %>% + dplyr::left_join(settings, + by = c("analysis_type", "package_name", "package_version", "result_type", "outcome", "competing_outcome", "eventgap")) %>% + dplyr::mutate( + additional_name = dplyr::if_else(is.na(.data$additional_name), "overall", .data$additional_name), + additional_level = dplyr::if_else(is.na(.data$additional_level), "overall", .data$additional_level) + ) + + complete_results <- complete_results %>% dplyr::select(omopgenerics::resultColumns()) - surv_estimates <- omopgenerics::newSummarisedResult(surv_estimates, + settings <- settings %>% + dplyr::mutate( + outcome_date_variable = .env$outcomeDateVariable, + outcome_washout = .env$outcomeWashout, + censor_on_cohort_exit = .env$censorOnCohortExit, + censor_on_date = .env$censorOnDate, + follow_up_days = .env$followUpDays, + restricted_mean_follow_up = .env$restrictedMeanFollowUp, + minimum_survival_days = .env$minimumSurvivalDays, + min_cell_count = .env$minCellCount + ) + + surv_estimates <- omopgenerics::newSummarisedResult(complete_results, settings = settings) + + attr(surv_estimates, "cohort_attrition") <- NULL + + # Suppress results with omopgenerics + surv_estimates <- surv_estimates %>% + dplyr::mutate(estimate_name = dplyr::if_else( + .data$estimate_name %in% c("n_risk", "n_events", "n_censor", "number_records"), + paste0(.data$estimate_name,"_count"), .data$estimate_name + )) %>% + omopgenerics::suppress(minCellCount = minCellCount) + return(surv_estimates) } @@ -213,13 +313,13 @@ estimateSingleEventSurvival <- function(cdm, #' #' @param cdm CDM reference #' @param targetCohortTable targetCohortTable -#' @param targetCohortId targetCohortId #' @param outcomeCohortTable The outcome cohort table of interest. +#' @param competingOutcomeCohortTable The competing outcome cohort table of interest. +#' @param targetCohortId targetCohortId #' @param outcomeCohortId ID of event cohorts to include. Only one outcome #' (and so one ID) can be considered. #' @param outcomeDateVariable Variable containing date of outcome event #' @param outcomeWashout Washout time in days for the outcome -#' @param competingOutcomeCohortTable The competing outcome cohort table of interest. #' @param competingOutcomeCohortId ID of event cohorts to include. Only one competing outcome #' (and so one ID) can be considered. #' @param competingOutcomeDateVariable Variable containing date of competing outcome event @@ -243,9 +343,6 @@ estimateSingleEventSurvival <- function(cdm, #' to have survived #' @param minCellCount The minimum number of events to reported, below which #' results will be obscured. If 0, all results will be reported. -#' @param returnParticipants Either TRUE or FALSE. If TRUE, references to -#' participants from the analysis will be returned allowing for further -#' analysis. #' #' @return tibble with survival information for desired cohort, including: #' time, people at risk, survival probability, cumulative incidence, @@ -270,12 +367,12 @@ estimateSingleEventSurvival <- function(cdm, #' estimateCompetingRiskSurvival <- function(cdm, targetCohortTable, - targetCohortId = NULL, outcomeCohortTable, + competingOutcomeCohortTable, + targetCohortId = NULL, outcomeCohortId = NULL, outcomeDateVariable = "cohort_start_date", outcomeWashout = Inf, - competingOutcomeCohortTable, competingOutcomeCohortId = NULL, competingOutcomeDateVariable = "cohort_start_date", competingOutcomeWashout = Inf, @@ -287,8 +384,7 @@ estimateCompetingRiskSurvival <- function(cdm, estimateGap = 1, restrictedMeanFollowUp = NULL, minimumSurvivalDays = 1, - minCellCount = 5, - returnParticipants = FALSE) { + minCellCount = 5) { if (is.null(targetCohortId)) { CDMConnector::assertTables(cdm, targetCohortTable) targetCohortId <- CDMConnector::cohort_count(cdm[[targetCohortTable]]) %>% @@ -296,23 +392,43 @@ estimateCompetingRiskSurvival <- function(cdm, dplyr::pull("cohort_definition_id") } if (is.null(outcomeCohortId)) { - CDMConnector::assertTables(cdm, outcomeCohortTable) + CDMConnector::assertTables(cdm, outcomeCohortTable, empty.ok = TRUE) outcomeCohortId <- CDMConnector::cohort_count(cdm[[outcomeCohortTable]]) %>% dplyr::filter(.data$number_records >0) %>% dplyr::pull("cohort_definition_id") } if (is.null(competingOutcomeCohortId)) { - CDMConnector::assertTables(cdm, competingOutcomeCohortTable) + CDMConnector::assertTables(cdm, competingOutcomeCohortTable, empty.ok = TRUE) competingOutcomeCohortId <- CDMConnector::cohort_count(cdm[[competingOutcomeCohortTable]]) %>% dplyr::filter(.data$number_records >0) %>% dplyr::pull("cohort_definition_id") } + emptyOutcomes <- omopgenerics::settings(cdm[[outcomeCohortTable]]) %>% + dplyr::filter(.data$cohort_definition_id %in% .env$outcomeCohortId) %>% + dplyr::left_join( + omopgenerics::cohortCount(cdm[[outcomeCohortTable]]), + by = "cohort_definition_id") %>% + dplyr::filter(.data$number_records == 0) + emptyCompetingOutcomes <- omopgenerics::settings(cdm[[competingOutcomeCohortTable]]) %>% + dplyr::filter(.data$cohort_definition_id %in% .env$competingOutcomeCohortId) %>% + dplyr::left_join( + omopgenerics::cohortCount(cdm[[competingOutcomeCohortTable]]), + by = "cohort_definition_id") %>% + dplyr::filter(.data$number_records == 0) + if(nrow(emptyOutcomes) > 0){ + emptyOutcomenames <- emptyOutcomes %>% dplyr::pull("cohort_name") + cli::cli_warn("Outcome cohort{?s} {emptyOutcomenames} {?is/are} empty") + } + if(nrow(emptyCompetingOutcomes) > 0){ + emptyCompetingOutcomenames <- emptyCompetingOutcomes %>% dplyr::pull("cohort_name") + cli::cli_warn("Competing outcome cohort{?s} {emptyCompetingOutcomenames} {?is/are} empty") + } + surv <- list() attrition <- list() events <- list() summary <- list() - participants <- list() for (i in seq_along(targetCohortId)) { working_target_id <- targetCohortId[i] working_target <- omopgenerics::settings(cdm[[targetCohortTable]]) %>% @@ -352,27 +468,19 @@ estimateCompetingRiskSurvival <- function(cdm, estimateGap = estimateGap, restrictedMeanFollowUp = restrictedMeanFollowUp, minimumSurvivalDays = minimumSurvivalDays, - minCellCount = minCellCount, - returnParticipants = returnParticipants + minCellCount = minCellCount ) if(length(surv[[paste0(i, "_", j, "_", k)]]) > 0) { - attrition[[paste0(i, "_", j, "_", k)]] <- attr(surv[[paste0(i, "_", j, "_", k)]], "attrition") %>% + attrition[[paste0(i, "_", j, "_", k)]] <- attr(surv[[paste0(i, "_", j, "_", k)]], "cohort_attrition") %>% dplyr::mutate( - exposure_id = i, - outcome_id = j, - competing_outcome_id = k - ) + target_cohort = working_target, + outcome = working_outcome, + competing_outcome = working_competing_outcome + ) %>% + dplyr::collect() %>% + dplyr::filter(.data$cohort_definition_id == working_target_id) events[[paste0(i, "_", j, "_", k)]] <- attr(surv[[paste0(i, "_", j, "_", k)]], 'events') summary[[paste0(i, "_", j, "_", k)]] <- attr(surv[[paste0(i, "_", j, "_", k)]], 'summary') - if(returnParticipants) { - participants[[paste0(i, "_", j, "_", k)]] <- attr(surv[[paste0(i, "_", j, "_", k)]], "participants") %>% - dplyr::mutate( - exposure_id = i, - outcome_id = j, - competing_outcome_id = k - ) %>% - dplyr::collect() - } } } } @@ -381,46 +489,159 @@ estimateCompetingRiskSurvival <- function(cdm, # Remove empty elements for analysis which have no output surv[lengths(surv) == 0] <- NULL - surv_estimates <- dplyr::bind_rows(surv) - # add attributes - + estimates <- dplyr::bind_rows(surv) events <- dplyr::bind_rows(events) - attr(surv_estimates, 'attrition') <- dplyr::bind_rows(attrition) + attrition <- dplyr::bind_rows(attrition) summary <- dplyr::bind_rows(summary) - if(returnParticipants) { - attr(surv_estimates, 'participants') <- dplyr::bind_rows(participants) + # Put all outputs in the general omopgenerics format + attrition <- attrition %>% + dplyr::distinct() %>% + dplyr::mutate( + cdm_name = attr(cdm, "cdm_name"), + package_name = "CohortSurvival", + package_version = as.character(utils::packageVersion("CohortSurvival")), + result_type = "survival_attrition", + group_name = "target_cohort", + group_level = .data$target_cohort, + variable_level = paste0(.data$outcome, " &&& ",.data$competing_outcome), + analysis_type = "competing_risk", + estimate_name = "count" + ) %>% + tidyr::pivot_longer( + cols = c( + "number_records", "number_subjects", "excluded_records", + "excluded_subjects" + ), + names_to = "variable_name", + values_to = "estimate_value" + ) %>% + dplyr::mutate( + "estimate_value" = as.character(.data$estimate_value), + "estimate_type" = "integer" + ) %>% + visOmopResults::uniteStrata("reason") %>% + visOmopResults::uniteAdditional("reason_id") + + if(attrition %>% dplyr::group_by("target_cohort") %>% dplyr::tally() %>% dplyr::pull("n") == + attrition %>% dplyr::group_by("target_cohort", "cohort_definition_id") %>% dplyr::tally() %>% dplyr::pull("n")) { + attrition <- attrition %>% + dplyr::mutate(target_cohort = paste0(.data$target_cohort,"_",.data$cohort_definition_id), + group_level = .data$target_cohort) } - surv_estimates <- dplyr::bind_rows( - surv_estimates, - events, - summary - ) %>% - dplyr::mutate(estimate_value = as.character(.data$estimate_value), - result_id = 1L) - - attr(surv_estimates, 'events') <- NULL - attr(surv_estimates, 'summary') <- NULL + attrition <- attrition %>% + dplyr::select(-c("cohort_definition_id")) - settings <- surv_estimates %>% + settings <- estimates %>% dplyr::select("result_type", "package_name", "package_version", - "analysis_type") %>% - dplyr::distinct() %>% - dplyr::mutate(result_id = 1L) + "analysis_type", + "outcome", + "competing_outcome") %>% + dplyr::distinct() + + settings <- settings %>% + dplyr::mutate(eventgap = NA) %>% + dplyr::union_all( + events %>% + dplyr::select("result_type", + "package_name", + "package_version", + "analysis_type", + "outcome", + "competing_outcome", + "eventgap") %>% + dplyr::distinct() + ) - surv_estimates <- surv_estimates %>% + settings <- settings %>% + dplyr::union_all( + summary %>% + dplyr::select("result_type", + "package_name", + "package_version", + "analysis_type", + "outcome", + "competing_outcome") %>% + dplyr::mutate(eventgap = NA) %>% + dplyr::distinct() + ) + + settings <- settings %>% + dplyr::union_all( + attrition %>% + dplyr::select("result_type", + "analysis_type", + "package_name", + "package_version", + "outcome", + "competing_outcome") %>% + dplyr::mutate(eventgap = NA) %>% + dplyr::distinct() + ) + + settings <- settings %>% + dplyr::mutate(result_id = c(1:nrow(settings))) %>% + dplyr::relocate(.data$result_id, .before = "result_type") + + estimates <- estimates %>% + dplyr::mutate(additional_name = "time", + time = as.character(.data$time)) %>% + dplyr::rename("additional_level" = .data$time) + + events <- events %>% + dplyr::mutate(additional_name = "time", + time = as.character(.data$time)) %>% + dplyr::rename("additional_level" = .data$time) + + complete_results <- estimates %>% + dplyr::bind_rows(events) %>% + dplyr::bind_rows(summary) %>% + dplyr::mutate(estimate_value = as.character(.data$estimate_value)) %>% + dplyr::bind_rows(attrition) %>% + dplyr::left_join(settings, + by = c("analysis_type", "package_name", "package_version", "result_type", "outcome", "competing_outcome", "eventgap")) %>% + dplyr::mutate( + additional_name = dplyr::if_else(is.na(.data$additional_name), "overall", .data$additional_name), + additional_level = dplyr::if_else(is.na(.data$additional_level), "overall", .data$additional_level) + ) + + complete_results <- complete_results %>% dplyr::select(omopgenerics::resultColumns()) - surv_estimates <- omopgenerics::newSummarisedResult(surv_estimates, + settings <- settings %>% + dplyr::mutate( + outcome_date_variable = .env$outcomeDateVariable, + outcome_washout = .env$outcomeWashout, + competing_outcome_date_variable = .env$competingOutcomeDateVariable, + competing_outcome_washout = .env$competingOutcomeWashout, + censor_on_cohort_exit = .env$censorOnCohortExit, + censor_on_date = .env$censorOnDate, + follow_up_days = .env$followUpDays, + restricted_mean_follow_up = .env$restrictedMeanFollowUp, + minimum_survival_days = .env$minimumSurvivalDays, + min_cell_count = .env$minCellCount + ) + + surv_estimates <- omopgenerics::newSummarisedResult(complete_results, settings = settings) + + attr(surv_estimates, "cohort_attrition") <- NULL + + # Suppress results with omopgenerics + surv_estimates <- surv_estimates %>% + dplyr::mutate(estimate_name = dplyr::if_else( + .data$estimate_name %in% c("n_risk", "n_events", "n_censor", "number_records"), + paste0(.data$estimate_name,"_count"), .data$estimate_name + )) %>% + omopgenerics::suppress(minCellCount = minCellCount) + return(surv_estimates) } - estimateSurvival <- function(cdm, targetCohortTable, targetCohortId = NULL, @@ -429,7 +650,7 @@ estimateSurvival <- function(cdm, outcomeDateVariable = "cohort_start_date", outcomeWashout = Inf, competingOutcomeCohortTable = NULL, - competingOutcomeCohortId = NULL, + competingOutcomeCohortId = 1, competingOutcomeDateVariable = "cohort_start_date", competingOutcomeWashout = Inf, censorOnCohortExit = FALSE, @@ -440,276 +661,89 @@ estimateSurvival <- function(cdm, estimateGap = 1, restrictedMeanFollowUp = NULL, minimumSurvivalDays = 1, - minCellCount = 5, - returnParticipants = FALSE) { - # check input - errorMessage <- checkmate::makeAssertCollection() - - checkmate::assertCharacter(targetCohortTable, - len = 1, - add = errorMessage - ) - checkmate::assertCharacter(outcomeCohortTable, - len = 1, - add = errorMessage - ) - checkmate::assertCharacter(competingOutcomeCohortTable, - len = 1, - null.ok = TRUE, - add = errorMessage - ) - checkCdm(cdm, tables = c( - "person", "observation_period", - targetCohortTable, - outcomeCohortTable - )) - - checkIsCohort_exp(cdm[[targetCohortTable]]) - checkmate::assertIntegerish(targetCohortId, - len = 1, - lower = 1, - add = errorMessage - ) - checkStrata(strata, cdm[[targetCohortTable]]) - checkmate::assertIntegerish(outcomeCohortId, - len = 1, - lower = 1, - add = errorMessage - ) - checkmate::assertIntegerish(competingOutcomeCohortId, - len = 1, - lower = 1, - add = errorMessage - ) - - checkmate::assertCharacter(outcomeDateVariable, - len = 1, - add = errorMessage - ) - checkmate::assertCharacter(competingOutcomeDateVariable, - len = 1, - add = errorMessage - ) - checkmate::assertLogical(censorOnCohortExit, - len = 1, - add = errorMessage - ) - if (!is.null(censorOnDate)) { - checkdate <- censorOnDate %>% inherits("Date") - if (!checkdate) { - cli::cli_abort("{censorOnDate} is neither NULL nor of type Date") - } - } - if (followUpDays != "Inf") { - checkmate::assertIntegerish(followUpDays, - len = 1, - lower = 0, - add = errorMessage - ) - } - if (outcomeWashout != "Inf") { - checkmate::assertIntegerish(outcomeWashout, - len = 1, - lower = 1, - add = errorMessage - ) - } - if (competingOutcomeWashout != "Inf") { - checkmate::assertIntegerish(competingOutcomeWashout, - len = 1, - lower = 0, - add = errorMessage - ) - } - checkmate::assertIntegerish(eventGap, - lower = 1, - add = errorMessage - ) - checkmate::assertIntegerish(estimateGap, - lower = 0, - add = errorMessage - ) - checkmate::assertIntegerish(restrictedMeanFollowUp, - lower = 1, - len = 1, - null.ok = TRUE, - add = errorMessage) - checkmate::assertIntegerish(minimumSurvivalDays, - len = 1, - lower = 0, - add = errorMessage - ) - checkmate::assertIntegerish(minCellCount, - len = 1, - lower = 0, - null.ok = FALSE, - add = errorMessage - ) - checkmate::assertLogical(returnParticipants, - len = 1, - add = errorMessage - ) - - checkmate::reportAssertions(collection = errorMessage) - + minCellCount = 5) { + + # check inputs + validateInputSurvival(cdm, targetCohortTable, targetCohortId, outcomeCohortTable, + outcomeCohortId, outcomeDateVariable, outcomeWashout, + competingOutcomeCohortTable, competingOutcomeCohortId, + competingOutcomeDateVariable, competingOutcomeWashout, + censorOnCohortExit, censorOnDate, followUpDays, + strata, eventGap, estimateGap, restrictedMeanFollowUp, + minimumSurvivalDays, minCellCount) + +# extract and prepare exposure data workingExposureTable <- cdm[[targetCohortTable]] %>% - dplyr::filter(.data$cohort_definition_id == .env$targetCohortId) + dplyr::filter(.data$cohort_definition_id == .env$targetCohortId) %>% + addCohortSurvival(cdm, outcomeCohortTable, outcomeCohortId, outcomeDateVariable, + outcomeWashout, censorOnCohortExit, censorOnDate, followUpDays) %>% + dplyr::rename( "outcome_time" = "time", "outcome_status" = "status") %>% + dplyr::compute(temporary = FALSE) - attrition <- recordAttrition( - table = workingExposureTable, - id = "subject_id", - reasonId = 1, - reason = "Starting analysis population" - ) - - # addCohortSurvival for primary event of interest - workingExposureTable <- workingExposureTable %>% - addCohortSurvival( - cdm = cdm, - outcomeCohortTable = outcomeCohortTable, - outcomeCohortId = outcomeCohortId, - outcomeDateVariable = outcomeDateVariable, - outcomeWashout = outcomeWashout, - censorOnCohortExit = censorOnCohortExit, - censorOnDate = censorOnDate, - followUpDays = followUpDays - ) %>% - dplyr::rename( - "outcome_time" = "time", - "outcome_status" = "status" - ) - - # competing risk (if there is one) + # handle competing risks if (!is.null(competingOutcomeCohortTable)) { workingExposureTable <- workingExposureTable %>% - addCohortSurvival( - cdm = cdm, - outcomeCohortTable = competingOutcomeCohortTable, - outcomeCohortId = competingOutcomeCohortId, - outcomeDateVariable = competingOutcomeDateVariable, - outcomeWashout = competingOutcomeWashout, - censorOnCohortExit = censorOnCohortExit, - censorOnDate = censorOnDate, - followUpDays = followUpDays - ) %>% - dplyr::rename( - "competing_risk_time" = "time", - "competing_risk_status" = "status" - ) + addCohortSurvival(cdm, competingOutcomeCohortTable, competingOutcomeCohortId, + competingOutcomeDateVariable, competingOutcomeWashout, censorOnCohortExit, + censorOnDate, followUpDays) %>% + dplyr::rename("competing_risk_time" = "time", "competing_risk_status" = "status") %>% + dplyr::compute(temporary = FALSE) } # collect - survDataDb <- workingExposureTable %>% + workingExposureTable <- workingExposureTable %>% dplyr::filter(!is.na(.data$outcome_time) && - !is.na(.data$outcome_status)) - - survData <- survDataDb %>% - dplyr::collect() + !is.na(.data$outcome_status)) %>% + dplyr::compute(temporary = FALSE) %>% + omopgenerics::recordCohortAttrition(reason = "No outcome event in washout period") - attrition <- recordAttrition( - table = survData, - id = "subject_id", - reasonId = 2, - reason = "Outcome status not NA", - existingAttrition = attrition - ) - - survData <- survData %>% - dplyr::filter(.data$outcome_time >= .env$minimumSurvivalDays) - - attrition <- recordAttrition( - table = survData, - id = "subject_id", - reasonId = 3, - reason = paste0("Survival days for outcome less than ", minimumSurvivalDays), - existingAttrition = attrition - ) + workingExposureTable <- workingExposureTable %>% + dplyr::filter(.data$outcome_time >= .env$minimumSurvivalDays) %>% + omopgenerics::recordCohortAttrition(reason = paste0("Survival days for outcome less than ", minimumSurvivalDays)) if (!is.null(competingOutcomeCohortTable)) { - survData <- survData %>% - dplyr::filter(.data$competing_risk_time >= .env$minimumSurvivalDays) - - attrition <- recordAttrition( - table = survData, - id = "subject_id", - reasonId = 4, - reason = paste0("Survival days for competing outcome less than ", minimumSurvivalDays), - existingAttrition = attrition - ) + workingExposureTable <- workingExposureTable %>% + dplyr::filter(.data$competing_risk_time >= .env$minimumSurvivalDays) %>% + omopgenerics::recordCohortAttrition(reason = paste0("Survival days for competing outcome less than ", minimumSurvivalDays)) } + survData <- workingExposureTable %>% + dplyr::collect() + if (!is.null(competingOutcomeCohortTable)) { - # - add competing risk variable - # 0: no event, 2: outcome event, 3: competing risk event - survData <- addCompetingRiskVars( - data = survData, - time1 = "outcome_time", - status1 = "outcome_status", - time2 = "competing_risk_time", - status2 = "competing_risk_status", - nameOutTime = "outcome_or_competing_time", - nameOutStatus = "outcome_or_competing_status" - ) + survData <- addCompetingRiskVars(data = survData, time1 = "outcome_time", + status1 = "outcome_status", time2 = "competing_risk_time", + status2 = "competing_risk_status", nameOutTime = "outcome_or_competing_time", + nameOutStatus = "outcome_or_competing_status") } # time points to extract survival estimates - if(followUpDays == "Inf") { - timepoints <- seq(0, max(survData$outcome_time), by = estimateGap) + if(survData %>% dplyr::tally() %>% dplyr::pull() != 0) { + timepoints <- seq(0, if (followUpDays == "Inf") max(survData$outcome_time) else followUpDays, by = estimateGap) } else { - timepoints <- seq(0, max(survData$outcome_time, followUpDays), by = estimateGap) + timepoints <- c(0) } # fit survival, with strata if (is.null(competingOutcomeCohortTable)) { - surv <- singleEventSurvival( - survData = survData, - times = timepoints, - variables = strata, - eventGap = eventGap, - restrictedMeanFollowUp = restrictedMeanFollowUp - ) + surv <- singleEventSurvival(survData, timepoints, strata, eventGap, restrictedMeanFollowUp) } else { - surv <- competingRiskSurvival( - survData = survData, - times = timepoints, - variables = strata, - eventGap = eventGap, - restrictedMeanFollowUp = restrictedMeanFollowUp - ) + surv <- competingRiskSurvival(survData, timepoints, strata, eventGap, restrictedMeanFollowUp) } + # process and summarise results if (nrow(surv) > 0) { - survivalEstimates <- addCohortDetails( - x = surv, - cdm = cdm, - targetCohortId = targetCohortId, - targetCohortTable = targetCohortTable, - outcomeCohortId = outcomeCohortId, - outcomeCohortTable = outcomeCohortTable, - competingOutcomeCohortId = competingOutcomeCohortId, - competingOutcomeCohortTable = competingOutcomeCohortTable - ) - - survivalEstimates <- survivalEstimates %>% + survivalEstimates <- addCohortDetails(surv, cdm, targetCohortId, targetCohortTable, + outcomeCohortId, outcomeCohortTable, competingOutcomeCohortId, competingOutcomeCohortTable, + "survival") %>% + tidyr::pivot_longer(cols = "outcome", names_to = "variable_name", + values_to = "variable_level") %>% + dplyr::mutate(result_type = dplyr::if_else( + .data$analysis_type == "competing_risk", "cumulative_failure_probability", + "survival_probability")) %>% + dplyr::select(!c("n_risk","variable_type", "n_censor")) %>% tidyr::pivot_longer( - cols = "outcome", - names_to = "variable_name", - values_to = "variable_level" - ) - survivalEstimates <- survivalEstimates %>% - dplyr::mutate(variable_name = dplyr::if_else( - .data$analysis_type == "competing_risk", - "cumulative_failure_probability", - "survival_probability")) - - survivalEstimates <- survivalEstimates %>% - dplyr::select(!c("n_risk","variable_type")) %>% - tidyr::pivot_longer( - cols = c( - "estimate", - "estimate_95CI_lower", - "estimate_95CI_upper" - ), + cols = c("estimate", "estimate_95CI_lower", "estimate_95CI_upper"), names_to = "estimate_name", values_to = "estimate_value" ) %>% @@ -722,88 +756,29 @@ estimateSurvival <- function(cdm, if(!is.null(competingOutcomeCohortTable)) { survivalEstimates <- survivalEstimates %>% dplyr::mutate(competing_outcome = omopgenerics::settings(cdm[[competingOutcomeCohortTable]]) %>% - dplyr::filter(.data$cohort_definition_id == .env$competingOutcomeCohortId) %>% - dplyr::pull("cohort_name")) %>% - visOmopResults::uniteAdditional(cols = c("time", "outcome", "competing_outcome")) + dplyr::filter(.data$cohort_definition_id == .env$competingOutcomeCohortId) %>% + dplyr::pull("cohort_name")) } else { survivalEstimates <- survivalEstimates %>% - visOmopResults::uniteAdditional(cols = c("time", "outcome")) + dplyr::mutate(competing_outcome = "none") } survivalEstimates <- dplyr::distinct(survivalEstimates) # add attributes - if (isTRUE(returnParticipants)) { - participantsRef <- survDataDb %>% - dplyr::select( - "cohort_definition_id", - "subject_id", - "cohort_start_date", - "cohort_end_date" - ) %>% - dplyr::compute() - - attr(participantsRef, "cohort_set") <- participantsRef %>% - dplyr::select("cohort_definition_id") %>% - dplyr::distinct() %>% - dplyr::mutate(cohort_name = paste0( - "survival_participants_", - as.integer(.data$cohort_definition_id) - )) %>% - dplyr::collect() - - attr(participantsRef, "cohort_attrition") <- participantsRef %>% - dplyr::group_by(.data$cohort_definition_id) %>% - dplyr::summarise( - number_records = dplyr::n(), - number_subjects = dplyr::n_distinct(.data$subject_id), - .groups = "drop" - ) %>% - dplyr::mutate( - "reason_id" = 1, - "reason" = "Initial qualifying events", - "excluded_records" = 0, - "excluded_subjects" = 0 - ) %>% - dplyr::collect() - - attr(participantsRef, "tbl_name") <- "survival_participants" - - attr(survivalEstimates, "participants") <- omopgenerics::newCohortTable( - participantsRef - ) - } + events <- attr(surv, "events") - events <- attr(surv, "events") %>% - dplyr::group_by(.data$eventGap, .data$strata_name, .data$strata_level, .data$outcome) %>% - dplyr::mutate(to_suppress = dplyr::if_else(.data$n_risk < .env$minCellCount, - 1, 0)) %>% - dplyr::mutate(to_suppress = cumsum(.data$to_suppress)) %>% - dplyr::ungroup() %>% - dplyr::filter(.data$to_suppress == 0) %>% - dplyr::select(!"to_suppress") - - events <- events %>% - dplyr::mutate(n_events = dplyr::if_else(.data$n_events < 5 & .data$n_events >0, - NA, .data$n_events)) - - attr(survivalEstimates, "events") <- addCohortDetails( - x = events, - cdm = cdm, - targetCohortId = targetCohortId, - targetCohortTable = targetCohortTable, - outcomeCohortId = outcomeCohortId, - outcomeCohortTable = outcomeCohortTable, - competingOutcomeCohortTable = competingOutcomeCohortTable, - competingOutcomeCohortId = competingOutcomeCohortId) %>% + attr(survivalEstimates, "events") <- addCohortDetails(events, cdm, + targetCohortId, targetCohortTable, outcomeCohortId, + outcomeCohortTable, competingOutcomeCohortId, competingOutcomeCohortTable, + "survival_events") %>% dplyr::select(!"variable_type") %>% - tidyr::pivot_longer(cols = c("n_risk", "n_events"), + tidyr::pivot_longer(cols = c("n_risk", "n_events", "n_censor"), names_to = "estimate_name", values_to = "estimate_value") %>% dplyr::mutate( - variable_name = "survival_events", estimate_type = "numeric", - result_type = "survival" + variable_name = "outcome" ) %>% dplyr::rename(variable_level = "outcome") %>% dplyr::mutate(outcome = omopgenerics::settings(cdm[[outcomeCohortTable]]) %>% @@ -815,127 +790,83 @@ estimateSurvival <- function(cdm, attr(survivalEstimates, "events") <- attr(survivalEstimates, "events") %>% dplyr::mutate(competing_outcome = omopgenerics::settings(cdm[[competingOutcomeCohortTable]]) %>% dplyr::filter(.data$cohort_definition_id == .env$competingOutcomeCohortId) %>% - dplyr::pull("cohort_name")) %>% - visOmopResults::uniteAdditional(cols = c("time", "eventgap", 'outcome', 'competing_outcome')) + dplyr::pull("cohort_name")) } else { attr(survivalEstimates, "events") <- attr(survivalEstimates, "events") %>% - visOmopResults::uniteAdditional(cols = c("time", "eventgap", 'outcome')) + dplyr::mutate(competing_outcome = "none") } - attr(survivalEstimates, "attrition") <- attrition + attr(survivalEstimates, "cohort_attrition") <- attr(workingExposureTable, "cohort_attrition") if (is.null(competingOutcomeCohortTable)) { - attr(survivalEstimates, "summary") <- addCohortDetails( - x = attr(surv, "summary"), - cdm = cdm, - targetCohortId = targetCohortId, - targetCohortTable = targetCohortTable, - outcomeCohortId = outcomeCohortId, - outcomeCohortTable = outcomeCohortTable, - summary = TRUE + summary <- addCohortDetails( + x = attr(surv, "summary"), cdm, targetCohortId, targetCohortTable, + outcomeCohortId, outcomeCohortTable, resultType = "survival_summary" ) %>% dplyr::mutate(analysis_type = "single_event") } else { attr(surv, "summary") <- attr(surv, "summary") %>% dplyr::filter(.data$outcome != "none") - attr(survivalEstimates, "summary") <- addCohortDetails( - x = attr(surv, "summary"), - cdm = cdm, - targetCohortId = targetCohortId, - targetCohortTable = targetCohortTable, - outcomeCohortId = outcomeCohortId, - outcomeCohortTable = outcomeCohortTable, - competingOutcomeCohortTable = competingOutcomeCohortTable, - competingOutcomeCohortId = competingOutcomeCohortId, - summary = TRUE + summary <- addCohortDetails( + x = attr(surv, "summary"), cdm, targetCohortId, targetCohortTable, + outcomeCohortId, outcomeCohortTable, competingOutcomeCohortId, + competingOutcomeCohortTable, "survival_summary" ) %>% dplyr::mutate(analysis_type = "competing_risk") } - attr(survivalEstimates, "summary") <- attr(survivalEstimates, "summary") %>% + summary <- summary %>% dplyr::filter(.data$outcome != "none") %>% - dplyr::filter(.data$number_records >= .env$minCellCount) %>% - dplyr::mutate(n_events = dplyr::if_else(.data$n_events < .env$minCellCount & .data$n_events > 0, - NA, .data$n_events)) %>% dplyr::mutate( - result_type = "survival", - variable_name = "survival_summary", + variable_name = "outcome", variable_level = .data$outcome, estimate_type = "numeric" ) %>% - dplyr::select(!c("variable_type", "outcome")) %>% + dplyr::select(-c("variable_type", "outcome")) %>% tidyr::pivot_longer( - cols = -c( - "cdm_name", - "package_name", - "package_version", - "result_type", - "group_name", - "group_level", - "strata_name", - "strata_level", - "variable_name", - "variable_level", - "estimate_type", - "analysis_type" - ), + cols = -c("cdm_name", "package_name", "package_version", "result_type", "group_name", "group_level", + "strata_name", "strata_level", "variable_name", "variable_level", "estimate_type", "analysis_type"), names_to = "estimate_name", values_to = "estimate_value" ) %>% - dplyr::mutate(estimate_value = round(.data$estimate_value)) %>% - dplyr::relocate("analysis_type", - .after = "estimate_name" - ) %>% - dplyr::relocate("estimate_value", - .after = "analysis_type" - ) %>% - dplyr::mutate(outcome = omopgenerics::settings(cdm[[outcomeCohortTable]]) %>% + dplyr::mutate(estimate_value = round(.data$estimate_value), + outcome = omopgenerics::settings(cdm[[outcomeCohortTable]]) %>% dplyr::filter(.data$cohort_definition_id == .env$outcomeCohortId) %>% dplyr::pull("cohort_name")) if(!is.null(competingOutcomeCohortTable)) { - attr(survivalEstimates, "summary") <- attr(survivalEstimates, "summary") %>% + summary <- summary %>% dplyr::mutate(competing_outcome = omopgenerics::settings(cdm[[competingOutcomeCohortTable]]) %>% dplyr::filter(.data$cohort_definition_id == .env$competingOutcomeCohortId) %>% - dplyr::pull("cohort_name")) %>% - visOmopResults::uniteAdditional(cols = c('outcome', 'competing_outcome')) + dplyr::pull("cohort_name")) } else { - attr(survivalEstimates, "summary") <- attr(survivalEstimates, "summary") %>% - visOmopResults::uniteAdditional(cols = c('outcome')) + summary <- summary %>% + dplyr::mutate(competing_outcome = "none") } + attr(survivalEstimates, "summary") <- summary + # round estimates survivalEstimates <- survivalEstimates %>% dplyr::mutate(estimate_value = round(.data$estimate_value, 4)) - # obscure counts below minCellCount - survivalEstimates <- suppressSurvivalCounts(survivalEstimates, minCellCount) } else { survivalEstimates <- surv } - return(survivalEstimates) } -addCompetingRiskVars <- function(data, time1, status1, - time2, status2, - nameOutTime, - nameOutStatus) { +addCompetingRiskVars <- function(data, time1, status1, time2, status2, + nameOutTime, nameOutStatus) { # - add competing risk variables (time and status) # 0: no event, 1: event 1, 2: event 2 - data <- data %>% - dplyr::mutate(!!nameOutTime := dplyr::if_else( - .data[[time2]] > .data[[time1]], - .data[[time1]], .data[[time2]] - )) %>% - dplyr::mutate(!!nameOutStatus := as.factor(dplyr::if_else( - .data[[time2]] <= .data[[time1]], - 2 * .data[[status2]], .data[[status1]] - ))) - - return(data) + data %>% + dplyr::mutate( + !!nameOutTime := pmin(.data[[time1]], .data[[time2]]), + !!nameOutStatus := factor(dplyr::if_else(.data[[time2]] <= .data[[time1]], 2 * .data[[status2]], .data[[status1]])) + ) } singleEventSurvival <- function(survData, times, variables, eventGap, @@ -950,11 +881,13 @@ singleEventSurvival <- function(survData, times, variables, eventGap, data = survData ) + # Calculate quantiles q0 <- stats::quantile(fit, probs = 0) q25 <- stats::quantile(fit, probs = 0.25) q75 <- stats::quantile(fit, probs = 0.75) q100 <- stats::quantile(fit, probs = 1) + # Create summary with selected and renamed columns fitSummary[[1]] <- as.data.frame(t(summary(fit, rmean = restrictedMeanFollowUp)$table)) %>% dplyr::select(!dplyr::any_of(c("n.max", "n.start"))) %>% dplyr::rename( @@ -978,29 +911,24 @@ singleEventSurvival <- function(survData, times, variables, eventGap, q75_survival_95CI_higher = .env$q75$upper, q100_survival = .env$q100$quantile, q100_survival_95CI_lower = .env$q100$lower, - q100_survival_95CI_higher = .env$q100$upper - ) %>% - dplyr::mutate(analysis_type = "single event") %>% - dplyr::mutate( + q100_survival_95CI_higher = .env$q100$upper, + analysis_type = "single event", strata_name = "overall", strata_level = "overall", outcome = "outcome" ) summ <- summary(fit, times = times, extend = TRUE) - estimates[[1]] <- dplyr::bind_rows( - dplyr::tibble( + estimates[[1]] <- dplyr::tibble( outcome = "outcome", time = summ$time, n_event = summ$n.event, + n_censor = summ$n.censor, n_risk = summ$n.risk, estimate_type = "numeric", estimate = summ$surv, estimate_95CI_lower = summ$lower, - estimate_95CI_upper = summ$upper - ) - ) %>% - dplyr::mutate( + estimate_95CI_upper = summ$upper, analysis_type = "single_event", strata_name = "overall", strata_level = "overall" @@ -1092,29 +1020,25 @@ singleEventSurvival <- function(survData, times, variables, eventGap, dplyr::group_by(.data$strata) %>% dplyr::summarise(max_time = max(.data$time, na.rm = TRUE)), by = "strata" - ) - - fitSummary[[i + 1]] <- fitSummary[[i + 1]] %>% + ) %>% dplyr::mutate( analysis_type = "single event", outcome = "outcome" ) summ <- summary(fit, times = times, extend = TRUE) - estimates[[i + 1]] <- dplyr::bind_rows( - dplyr::tibble( + estimates[[i + 1]] <- dplyr::tibble( strata = summ$strata, outcome = "outcome", time = summ$time, n_event = summ$n.event, + n_censor = summ$n.censor, n_risk = summ$n.risk, estimate_type = "numeric", estimate = summ$surv, estimate_95CI_lower = summ$lower, - estimate_95CI_upper = summ$upper - ) - ) %>% - dplyr::mutate(analysis_type = "single_event") + estimate_95CI_upper = summ$upper, + analysis_type = "single_event") # Add strata variable columns in a good format for (j in seq_along(name)) { @@ -1160,59 +1084,44 @@ singleEventSurvival <- function(survData, times, variables, eventGap, cli::cli_progress_done() } - # Output as tibble estimates <- dplyr::bind_rows(estimates) - # Get number of events for all eventGaps - number_events <- estimates %>% - dplyr::filter(.data$estimate_type == "numeric") %>% - dplyr::group_by(.data$strata_name, .data$strata_level) %>% - dplyr::mutate(n_events = cumsum(.data$n_event)) %>% - dplyr::filter(.data$time %% eventGap[1] == 0 | .data$time == max(.data$time)) %>% - dplyr::mutate(n_events = c(.data$n_events[1], diff(.data$n_events))) %>% - dplyr::ungroup() %>% - dplyr::mutate( - eventGap = eventGap[1], - outcome = "outcome" - ) %>% - dplyr::select( - "time", "n_risk", "n_events", "eventGap", "outcome", - "strata_name", "strata_level" - ) - - for (t in eventGap[-1]) { - number_events <- dplyr::union_all( - number_events, - estimates %>% - dplyr::filter(.data$estimate_type == "numeric") %>% - dplyr::group_by(.data$strata_name, .data$strata_level) %>% - dplyr::mutate(n_events = cumsum(.data$n_event)) %>% - dplyr::filter(.data$time %% t == 0 | .data$time == max(.data$time)) %>% - dplyr::mutate(n_events = c(.data$n_events[1], diff(.data$n_events))) %>% - dplyr::ungroup() %>% - dplyr::mutate(eventGap = t) %>% - dplyr::select( - "time", "n_risk","n_events", "eventGap", "outcome", - "strata_name", "strata_level" - ) - ) + # Helper function to calculate events for a given eventGap + calculate_events <- function(data, gap) { + data %>% + dplyr::filter(.data$estimate_type == "numeric") %>% + dplyr::group_by(.data$strata_name, .data$strata_level) %>% + dplyr::mutate( + n_events = cumsum(.data$n_event), + n_censor = cumsum(.data$n_censor) + ) %>% + dplyr::filter(.data$time %% gap == 0 | .data$time == max(.data$time)) %>% + dplyr::mutate( + n_events = c(.data$n_events[1], diff(.data$n_events)), + n_censor = c(.data$n_censor[1], diff(.data$n_censor)), + eventGap = gap, + outcome = "outcome" + ) %>% + dplyr::ungroup() %>% + dplyr::select("time", "n_risk", "n_events", "n_censor", "eventGap", "outcome", "strata_name", "strata_level") } + # Calculate number of events for all eventGaps + number_events <- purrr::map_dfr(eventGap, ~calculate_events(estimates, .x)) + estimates <- estimates %>% dplyr::select(!"n_event") attr(estimates, "events") <- number_events - attr(estimates, "summary") <- dplyr::bind_rows(fitSummary) - row.names(attr(estimates, "summary")) <- NULL - attr(estimates, "summary") <- dplyr::as_tibble(attr(estimates, "summary")) - + attr(estimates, "summary") <- dplyr::as_tibble(dplyr::bind_rows(fitSummary)) return(estimates) } competingRiskSurvival <- function(survData, times, variables, eventGap, restrictedMeanFollowUp = NULL) { - if (!length(unique(as.character(survData$outcome_or_competing_status))) == 3) { + if (!length(unique(as.character(survData %>% + dplyr::pull("outcome_or_competing_status")))) == 3) { cli::cli_h1("No results for competing risk analysis") cli::cli_text(c( "Competing risk variable must have three levels.", @@ -1238,9 +1147,9 @@ competingRiskSurvival <- function(survData, times, variables, eventGap, ) ~ 1, data = survData ) - summ <- summary(fit, times = times, extend = TRUE) + summ <- summary(fit, times = times, extend = TRUE, data.frame = TRUE) - if(ncol(summ$n.event) < 3){ + if(summ %>% dplyr::select("state") %>% dplyr::distinct() %>% dplyr::pull() %>% length() < 3){ cli::cli_h1("No results for competing risk analysis") cli::cli_text(c( "Competing risk variable must have three levels.", @@ -1271,60 +1180,22 @@ competingRiskSurvival <- function(survData, times, variables, eventGap, ) )) - estimates[[1]] <- dplyr::bind_rows( - dplyr::bind_cols( - data.frame( - outcome = 1L, - time = summ$time, - n_event = summ$n.event[, 2], - n_risk = summ$n.risk[, 1], - estimate_type = "numeric" - ), - as.data.frame(summ$pstate) %>% - # from survival version 3.6-4 summ$pstate became named - dplyr::rename(dplyr::any_of(c("estimate" = "V2", - "estimate" = "1"))) %>% - dplyr::select("estimate"), - as.data.frame(summ$lower) %>% - dplyr::rename(dplyr::any_of(c("estimate_95CI_lower" = "V2", - "estimate_95CI_lower" = "1"))) |> - dplyr::select("estimate_95CI_lower"), - as.data.frame(summ$upper) %>% - dplyr::rename(dplyr::any_of(c("estimate_95CI_upper" = "V2", - "estimate_95CI_upper" = "1"))) |> - dplyr::select("estimate_95CI_upper") - ), - dplyr::bind_cols( - data.frame( - outcome = 2L, - time = summ$time, - n_event = summ$n.event[, 3], - n_risk = summ$n.risk[, 1], - estimate_type = "numeric" - ), - as.data.frame(summ$pstate) %>% - dplyr::rename(dplyr::any_of(c("estimate" = "V3", - "estimate" = "2"))) %>% - dplyr::select("estimate"), - as.data.frame(summ$lower) %>% - dplyr::rename(dplyr::any_of(c("estimate_95CI_lower" = "V3", - "estimate_95CI_lower" = "2"))) |> - dplyr::select("estimate_95CI_lower"), - as.data.frame(summ$upper) %>% - dplyr::rename(dplyr::any_of(c("estimate_95CI_upper" = "V3", - "estimate_95CI_upper" = "2"))) |> - dplyr::select("estimate_95CI_upper") - ) - ) %>% + estimates[[1]] <- summ %>% + dplyr::rename( + "estimate" = "pstate", + "estimate_95CI_lower" = "lower", + "estimate_95CI_upper" = "upper", + "outcome" = "state", + "n_risk" = "n.risk", + "n_event" = "n.event", + "n_censor" = "n.censor" + ) %>% dplyr::mutate(outcome = dplyr::if_else(.data$outcome == 1, - "outcome", - "competing_outcome" - ), - estimate = .data$estimate, - estimate_95CI_upper = .data$estimate_95CI_upper, - estimate_95CI_lower = .data$estimate_95CI_lower + "outcome", dplyr::if_else(.data$outcome == 2, + "competing_outcome", "none")) ) %>% - dplyr::mutate(analysis_type = "competing_risk") %>% + dplyr::mutate(analysis_type = "competing_risk", + estimate_type = "numeric") %>% dplyr::mutate( strata_name = "overall", strata_level = "overall" @@ -1359,13 +1230,12 @@ competingRiskSurvival <- function(survData, times, variables, eventGap, data = survData %>% dplyr::filter(dplyr::if_any(.env$name, ~ !is.na(.x))) ) - summ <- summary(fit, times = times, extend = TRUE) + summ <- summary(fit, times = times, extend = TRUE, data.frame = TRUE) tidyFit <- broom::tidy(fit) maxTimes <- tidyFit %>% - dplyr::group_by(.data$strata, .data$state) %>% - dplyr::summarise(max_time = max(.data$time, na.rm = TRUE)) - + dplyr::group_by(.data$strata, .data$state) %>% + dplyr::summarise(max_time = max(.data$time, na.rm = TRUE)) fitSummary[[i + 1]] <- as.data.frame(summary(fit, rmean = restrictedMeanFollowUp)$table) %>% dplyr::rename( @@ -1378,10 +1248,10 @@ competingRiskSurvival <- function(survData, times, variables, eventGap, tidyr::separate_wider_delim(.data$rowname, delim = ", ", names = c(variables[[i]], "state")) %>% - tidyr::unite(col = "strata", variables[[i]], sep = ", ") |> + tidyr::unite(col = "strata", variables[[i]], sep = ", ") %>% dplyr::left_join(maxTimes, - by = c("strata", "state")) |> - dplyr::rename("outcome" = "state") |> + by = c("strata", "state")) %>% + dplyr::rename("outcome" = "state") %>% dplyr::mutate( strata_name = paste(name, collapse = " &&& "), strata_level = gsub(", ", " &&& ", gsub( @@ -1399,73 +1269,28 @@ competingRiskSurvival <- function(survData, times, variables, eventGap, dplyr::if_else(.data$outcome == "1", "outcome", "competing_outcome" ) - )) |> + )) %>% dplyr::mutate( strata = sub(",([^,]*)$", "", .data$strata) ) - estimates[[i + 1]] <- dplyr::bind_rows( - dplyr::bind_cols( - data.frame( - outcome = 1L, - time = summ$time, - strata_level = summ$strata, - n_event = summ$n.event[, 2], - n_risk = summ$n.risk[, 1], - estimate_type = "numeric" - ), - as.data.frame(summ$pstate) %>% - # from survival version 3.6-4 summ$pstate became named - dplyr::rename(dplyr::any_of(c("estimate" = "V2", - "estimate" = "1"))) %>% - dplyr::select("estimate"), - as.data.frame(summ$lower) %>% - dplyr::rename(dplyr::any_of(c("estimate_95CI_lower" = "V2", - "estimate_95CI_lower" = "1"))) |> - dplyr::select("estimate_95CI_lower"), - as.data.frame(summ$upper) %>% - dplyr::rename(dplyr::any_of(c("estimate_95CI_upper" = "V2", - "estimate_95CI_upper" = "1"))) |> - dplyr::select("estimate_95CI_upper") - ), - dplyr::bind_cols( - data.frame( - outcome = 2L, - strata_level = summ$strata, - time = summ$time, - n_event = summ$n.event[, 3], - n_risk = summ$n.risk[, 1], - estimate_type = "numeric" - ), - as.data.frame(summ$pstate) %>% - # from survival version 3.6-4 summ$pstate became named - dplyr::rename(dplyr::any_of(c("estimate" = "V3", - "estimate" = "2"))) %>% - dplyr::select("estimate"), - as.data.frame(summ$lower) %>% - dplyr::rename(dplyr::any_of(c("estimate_95CI_lower" = "V3", - "estimate_95CI_lower" = "2"))) |> - dplyr::select("estimate_95CI_lower"), - as.data.frame(summ$upper) %>% - dplyr::rename(dplyr::any_of(c("estimate_95CI_upper" = "V3", - "estimate_95CI_upper" = "2"))) |> - dplyr::select("estimate_95CI_upper") - ) - ) %>% - dplyr::mutate(outcome = dplyr::if_else(.data$outcome == 1, - "outcome", - "competing_outcome" - ), - estimate = .data$estimate, - estimate_95CI_upper = .data$estimate_95CI_upper, - estimate_95CI_lower = .data$estimate_95CI_lower + estimates[[i + 1]] <- summ %>% + dplyr::rename( + "estimate" = "pstate", + "estimate_95CI_lower" = "lower", + "estimate_95CI_upper" = "upper", + "outcome" = "state", + "n_risk" = "n.risk", + "n_event" = "n.event", + "n_censor" = "n.censor" ) %>% - dplyr::mutate(analysis_type = "competing_risk") - - estimates[[i + 1]] <- estimates[[i + 1]] %>% - dplyr::mutate(strata_name = paste(name, collapse = " &&& ")) %>% - dplyr::relocate("strata_level", .after = "strata_name") %>% - dplyr::mutate(strata = .data$strata_level) # to use in below join + dplyr::mutate(outcome = dplyr::if_else(.data$outcome == 1, + "outcome", dplyr::if_else(.data$outcome == 2, + "competing_outcome", "none")), + analysis_type = "competing_risk", + estimate_type = "numeric", + strata_name = paste(name, collapse = " &&& "), + strata_level = .data$strata) # to use in below join for (j in seq_along(name)) { estimates[[i + 1]] <- estimates[[i + 1]] %>% @@ -1498,46 +1323,45 @@ competingRiskSurvival <- function(survData, times, variables, eventGap, } # Output as tibble - estimates <- dplyr::bind_rows(estimates) %>% dplyr::as_tibble() - - # Get number of events for all eventGaps - number_events <- estimates %>% - dplyr::group_by(.data$strata_name, .data$strata_level, .data$outcome) %>% - dplyr::mutate(n_events = cumsum(.data$n_event)) %>% - dplyr::filter(.data$time %% eventGap[1] == 0 | .data$time == max(.data$time)) %>% - dplyr::mutate(n_events = c(.data$n_events[1], diff(.data$n_events))) %>% - dplyr::ungroup() %>% - dplyr::mutate(eventGap = eventGap[1]) %>% - dplyr::select( - "time", "n_risk", "n_events", - "eventGap", "outcome", - "strata_name", "strata_level" - ) - - for (t in eventGap[-1]) { - number_events <- dplyr::union_all( - number_events, - estimates %>% - dplyr::group_by(.data$strata_name, .data$strata_level, .data$outcome) %>% - dplyr::mutate(n_events = cumsum(.data$n_event)) %>% - dplyr::filter(.data$time %% t == 0 | .data$time == max(.data$time)) %>% - dplyr::mutate(n_events = c(.data$n_events[1], diff(.data$n_events))) %>% - dplyr::ungroup() %>% - dplyr::mutate(eventGap = t) %>% - dplyr::select( - "time", "n_risk","n_events", "eventGap", "outcome", "strata_name", - "strata_level" - ) - ) + estimates <- dplyr::bind_rows(estimates) %>% + dplyr::as_tibble() %>% + dplyr::select(-"std.err") + + # Helper function to calculate events for a specific eventGap + calculate_number_events <- function(data, gap) { + data %>% + dplyr::select("time", "n_event", "outcome", "strata_name", "strata_level") %>% + dplyr::filter(.data$outcome != "none") %>% + dplyr::left_join( + data %>% + dplyr::filter(.data$outcome == "none") %>% + dplyr::select("time", "n_risk", "n_censor", "strata_name", "strata_level"), + by = c("time", "strata_name", "strata_level") + ) %>% + dplyr::mutate(eventGap = gap) %>% + dplyr::group_by(.data$strata_name, .data$strata_level, .data$outcome) %>% + dplyr::mutate( + n_events = cumsum(.data$n_event), + n_censor = cumsum(.data$n_censor) + ) %>% + dplyr::filter(.data$time %% gap == 0 | .data$time == max(.data$time)) %>% + dplyr::mutate( + n_events = c(.data$n_events[1], diff(.data$n_events)), + n_censor = c(.data$n_censor[1], diff(.data$n_censor)) + ) %>% + dplyr::ungroup() %>% + dplyr::select(-"n_event") } + # Apply the function for all eventGaps and combine results + number_events <- purrr::map_dfr(eventGap, ~calculate_number_events(estimates, .x)) + estimates <- estimates %>% - dplyr::select(!"n_event") + dplyr::select(-c("n_event")) %>% + dplyr::filter(.data$outcome != "none") attr(estimates, "events") <- number_events - attr(estimates, "summary") <- dplyr::bind_rows(fitSummary) - row.names(attr(estimates, "summary")) <- NULL - attr(estimates, "summary") <- dplyr::as_tibble(attr(estimates, "summary")) + attr(estimates, "summary") <- dplyr::as_tibble(dplyr::bind_rows(fitSummary)) return(estimates) } @@ -1550,7 +1374,7 @@ addCohortDetails <- function(x, outcomeCohortTable, competingOutcomeCohortId, competingOutcomeCohortTable = NULL, - summary = FALSE) { + resultType) { outcomeCohortName <- omopgenerics::settings(cdm[[outcomeCohortTable]]) %>% dplyr::filter(.data$cohort_definition_id == .env$outcomeCohortId) %>% @@ -1561,8 +1385,8 @@ addCohortDetails <- function(x, cdm_name = attr(cdm, "cdm_name"), package_name = "CohortSurvival", package_version = as.character(utils::packageVersion("CohortSurvival")), - result_type = "survival", - group_name = "cohort", + result_type = .env$resultType, + group_name = "target_cohort", group_level = omopgenerics::settings(cdm[[targetCohortTable]]) %>% dplyr::filter(.data$cohort_definition_id == @@ -1601,3 +1425,50 @@ addCohortDetails <- function(x, empty_estimates <- function() { dplyr::tibble() } + +validateInputSurvival <- function(cdm, + targetCohortTable, + targetCohortId, + outcomeCohortTable, + outcomeCohortId, + outcomeDateVariable, + outcomeWashout, + competingOutcomeCohortTable, + competingOutcomeCohortId, + competingOutcomeDateVariable, + competingOutcomeWashout, + censorOnCohortExit, + censorOnDate, + followUpDays, + strata, + eventGap, + estimateGap, + restrictedMeanFollowUp, + minimumSurvivalDays, + minCellCount) { + + omopgenerics::assertCharacter(targetCohortTable, length = 1) + omopgenerics::assertCharacter(outcomeCohortTable, length = 1) + omopgenerics::assertCharacter(competingOutcomeCohortTable, length = 1, null = TRUE) + omopgenerics::validateCdmArgument(cdm) + omopgenerics::validateCohortArgument(cdm[[targetCohortTable]]) + omopgenerics::validateCohortArgument(cdm[[outcomeCohortTable]]) + omopgenerics::assertNumeric(targetCohortId, integerish = TRUE, length = 1, min = 1) + omopgenerics::assertList(strata, null = TRUE) + omopgenerics::assertChoice(unlist(strata), choices = colnames(cdm[[targetCohortTable]]), null = TRUE) + omopgenerics::assertNumeric(outcomeCohortId, length = 1, min = 1) + omopgenerics::assertNumeric(competingOutcomeCohortId, length = 1, min = 1) + omopgenerics::assertCharacter(outcomeDateVariable, length = 1) + omopgenerics::assertCharacter(competingOutcomeDateVariable, length = 1) + omopgenerics::assertLogical(censorOnCohortExit, length = 1) + omopgenerics::assertDate(censorOnDate, null = TRUE) + omopgenerics::assertNumeric(followUpDays, length = 1, min = 1, integerish = TRUE) + omopgenerics::assertNumeric(outcomeWashout, length = 1, min = 0, integerish = TRUE) + omopgenerics::assertNumeric(competingOutcomeWashout, length = 1, min = 0, integerish = TRUE) + omopgenerics::assertNumeric(eventGap, integerish = TRUE, min = 1) + omopgenerics::assertNumeric(estimateGap, integerish = TRUE, min = 1) + omopgenerics::assertNumeric(restrictedMeanFollowUp, integerish = TRUE, min = 1, length = 1, null = TRUE) + omopgenerics::assertNumeric(minimumSurvivalDays, integerish = TRUE, min = 0, length = 1) + omopgenerics::assertNumeric(minCellCount, integerish = TRUE, min = 0, length = 1) + +} diff --git a/R/generateDeathCohort.R b/R/generateDeathCohort.R index 40ff9ed..07a1957 100644 --- a/R/generateDeathCohort.R +++ b/R/generateDeathCohort.R @@ -16,6 +16,8 @@ #' To create a death cohort #' +#' `r lifecycle::badge("deprecated")` +#' #' @param cdm CDM reference #' #' @param name name for the created death cohort table @@ -83,10 +85,15 @@ generateDeathCohortSet <- function( cohortTable = NULL, cohortId = NULL){ + lifecycle::deprecate_soft( + "0.6.0", "generateDeathCohortSet()" + ) + # 0. validate inputs... - checkCdm(cdm, tables = c("death", "observation_period")) - checkmate::assertNumeric(cohortId, any.missing = FALSE, null.ok = TRUE, len = 1) - checkmate::assertCharacter(name, min.chars = 1, any.missing = FALSE, len = 1) + omopgenerics::validateCdmArgument(cdm) + omopgenerics::assertTable(cdm[["death"]]) + omopgenerics::assertNumeric(cohortId, null = TRUE, length = 1) + omopgenerics::assertCharacter(name, length = 1) x <- cdm$death %>% PatientProfiles::addInObservation(indexDate = "death_date") %>% @@ -96,7 +103,8 @@ generateDeathCohortSet <- function( # 1. cohortTable and cohortId if (!is.null(cohortTable)){ - checkCdm(cdm, tables = c(cohortTable)) + omopgenerics::validateCdmArgument(cdm) + omopgenerics::validateCohortArgument(cdm[[cohortTable]]) if (!is.null(cohortId)){ x <- x %>% diff --git a/R/inputValidation.R b/R/inputValidation.R index d9b2f13..52fbaa9 100644 --- a/R/inputValidation.R +++ b/R/inputValidation.R @@ -14,47 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -checkCdm <- function(cdm, tables = NULL) { - if (!isTRUE(inherits(cdm, "cdm_reference"))) { - cli::cli_abort("cdm must be a CDMConnector CDM reference object") - } - if (!is.null(tables)) { - tables <- tables[!(tables %in% names(cdm))] - if (length(tables) > 0) { - ntables <- length(tables) - cli::cli_abort(paste0( - "{(ntables)} table{?s} {?is/are} not present in the cdm object: ", - paste0(tables, collapse = ", ") - )) - } - } - invisible(NULL) -} - -checkIsCdmTable <- function(cdmTable) { - isCdmTable <- all(c("person_id") %in% - colnames(cdmTable)) - - return(isCdmTable) -} - -checkIsCohort <- function(cohort) { - isCohort <- all(c( - "cohort_definition_id", "subject_id", - "cohort_start_date", "cohort_end_date" - ) %in% - colnames(cohort)) & - !is.null(attr(cohort, "cohort_set")) - - if (isFALSE(isCohort)) { - return(cli::cli_abort(c( - "cohort must be a cohort table with cohort attributes" - ))) - } else { - return(invisible(isCohort)) - } -} - checkCohortId <- function(cohort, cohortId) { errorMessage <- checkmate::makeAssertCollection() checkmate::assertIntegerish(cohortId, @@ -66,19 +25,6 @@ checkCohortId <- function(cohort, cohortId) { dplyr::filter(.data$cohort_definition_id %in% .env$cohortId)) == length(cohortId) } -checkStrata <- function(strata, x) { - checkmate::assertList( - strata, - any.missing = FALSE, unique = TRUE, min.len = 1, null.ok = TRUE - ) - namesInColumns <- all(unlist(strata) %>% unique() %in% colnames(x)) - if(!isTRUE(namesInColumns)) { - return(cli::cli_abort(c( - "the cohort table must contain all variables in the strata list as columns" - ))) - } -} - checkExposureCohortId <- function(cohort) { isCohortIdUnique <- length(cohort %>% dplyr::select("cohort_definition_id") %>% @@ -92,21 +38,6 @@ checkExposureCohortId <- function(cohort) { } } - -checkIsCohort_exp <- function(cohort) { - isCohort <- all(c( - "cohort_definition_id", "subject_id", - "cohort_start_date", "cohort_end_date" - ) %in% colnames(cohort)) - if (isFALSE(isCohort)) { - return(cli::cli_abort(c( - "{cohort} must be a cohort table" # This gives a very ugly error right now - ))) - } else { - return(invisible(isCohort)) - } -} - checkCensorOnDate <- function(cohort, censorOnDate) { if(!is.null(censorOnDate)) { start_dates <- cohort %>% diff --git a/R/mockMGUS2cdm.R b/R/mockMGUS2cdm.R index f0841c8..de91b22 100644 --- a/R/mockMGUS2cdm.R +++ b/R/mockMGUS2cdm.R @@ -40,7 +40,8 @@ mockMGUS2cdm <- function() { observation_period_start_date = .data$cohort_start_date_diag - lubridate::years(.data$age) - ) + ) %>% + dplyr::mutate(subject_id = as.integer(.data$subject_id)) mgus2Diag <- mgus2 %>% @@ -60,7 +61,7 @@ mockMGUS2cdm <- function() { mgus2Diag <- dplyr::as_tibble(mgus2Diag) attr(mgus2Diag, "cohort_set") <- dplyr::tibble( - cohort_definition_id = 1, cohort_name = "mgus_diagnosis" + cohort_definition_id = 1L, cohort_name = "mgus_diagnosis" ) attr(mgus2Diag, "cohort_attrition") <- addAttrition(mgus2Diag, attr(mgus2Diag, "cohort_set")) @@ -78,7 +79,7 @@ mockMGUS2cdm <- function() { mgus2Pr <- dplyr::as_tibble(mgus2Pr) attr(mgus2Pr, "cohort_set") <- dplyr::tibble( - cohort_definition_id = 1, cohort_name = "progression" + cohort_definition_id = 1L, cohort_name = "progression" ) attr(mgus2Pr, "cohort_attrition") <- addAttrition(mgus2Pr, attr(mgus2Pr, "cohort_set")) @@ -94,12 +95,13 @@ mockMGUS2cdm <- function() { mgus2Pr2 %>% dplyr::mutate(cohort_definition_id = 2 + dplyr::row_number() %% 2) ) %>% - dplyr::relocate("cohort_definition_id") + dplyr::relocate("cohort_definition_id") %>% + dplyr::mutate(cohort_definition_id = as.integer(.data$cohort_definition_id)) mgus2Pr2 <- dplyr::as_tibble(mgus2Pr2) attr(mgus2Pr2, "cohort_set") <- dplyr::tibble( - cohort_definition_id = 1:3, + cohort_definition_id = c(1L,2L,3L), cohort_name = c("any_progression", "progression_type_1", "progression_type_2") ) attr(mgus2Pr2, "cohort_attrition") <- addAttrition(mgus2Pr2, attr(mgus2Pr2, "cohort_set")) @@ -117,7 +119,7 @@ mockMGUS2cdm <- function() { mgus2Death <- dplyr::as_tibble(mgus2Death) attr(mgus2Death, "cohort_set") <- dplyr::tibble( - cohort_definition_id = 1, cohort_name = "death_cohort" + cohort_definition_id = 1L, cohort_name = "death_cohort" ) attr(mgus2Death, "cohort_attrition") <- addAttrition(mgus2Death, attr(mgus2Death, "cohort_set")) @@ -125,10 +127,10 @@ mockMGUS2cdm <- function() { dplyr::rename("person_id" = "subject_id") %>% dplyr::mutate( gender_concept_id = dplyr::if_else( - .data$sex == "F", 8532, 8507 + .data$sex == "F", 8532L, 8507L ), - year_of_birth = lubridate::year(mgus2$observation_period_start_date), - month_of_birth = lubridate::month(mgus2$observation_period_start_date), + year_of_birth = as.integer(lubridate::year(mgus2$observation_period_start_date)), + month_of_birth = as.integer(lubridate::month(mgus2$observation_period_start_date)), day_of_birth = lubridate::day(mgus2$observation_period_start_date), race_concept_id = 0L, ethnicity_concept_id = 0L @@ -154,11 +156,11 @@ mockMGUS2cdm <- function() { # placeholder visit occurrence visitOccurrence <- dplyr::tibble( - visit_occurrence_id = 1001, - person_id = 1, - visit_concept_id = 5, - visit_start_date = c("2020-01-01"), - visit_end_date = c("2020-01-01"), + visit_occurrence_id = 1001L, + person_id = 1L, + visit_concept_id = 5L, + visit_start_date = as.Date("2020-01-01"), + visit_end_date = as.Date("2020-01-01"), visit_type_concept_id = 44818518L ) @@ -208,10 +210,16 @@ addAttrition <- function(cohort, set) { "number_subjects" = dplyr::if_else( is.na(.data$number_subjects), 0, .data$number_subjects ), - "reason_id" = 1, + "reason_id" = 1L, "reason" = "Initial qualifying events", "excluded_records" = 0, "excluded_subjects" = 0 ) %>% - dplyr::collect() + dplyr::collect() %>% + dplyr::mutate( + number_records = as.integer(.data$number_records), + number_subjects = as.integer(.data$number_subjects), + excluded_records = as.integer(.data$excluded_records), + excluded_subjects = as.integer(.data$excluded_subjects) + ) } diff --git a/R/plotSurvival.R b/R/plotSurvival.R index 1c25347..b9a6f98 100644 --- a/R/plotSurvival.R +++ b/R/plotSurvival.R @@ -20,12 +20,14 @@ #' @param x Variable to plot on x axis #' @param xscale X axis scale. Can be "days" or "years". #' @param ylim Limits for the Y axis +#' @param xlim Limits for the X axis #' @param cumulativeFailure whether to plot the cumulative failure probability #' instead of the survival probability #' @param ribbon If TRUE, the plot will join points using a ribbon #' @param facet Variables to use for facets #' @param colour Variables to use for colours -#' @param colourName Colour legend name +#' @param riskTable Whether to print risk table below the plot +#' @param riskInterval Interval of time to print risk table below the plot #' #' @return A plot of survival probabilities over time #' @export @@ -42,27 +44,32 @@ plotSurvival <- function(result, x = "time", xscale = "days", - ylim = c(0,NA), + ylim = c(0, NA), + xlim = NULL, cumulativeFailure = FALSE, ribbon = TRUE, facet = NULL, colour = NULL, - colourName = NULL){ + riskTable = FALSE, + riskInterval = 30) { + # Missing input checks + omopgenerics::assertNumeric(xlim, min = 1, length = 1, integerish = TRUE, null = TRUE) + if(is.null(xlim)) {xlim <- "Inf"} result <- result %>% - asSurvivalResult() + asSurvivalResult() %>% + dplyr::filter(.data$time <= xlim) %>% + dplyr::compute() - # cumulativeFailure must be true for competing risk analysis - if(isFALSE(cumulativeFailure) && - "cumulative_failure_probability" %in% unique(result$variable_name)){ + if (isFALSE(cumulativeFailure) && "cumulative_failure_probability" %in% unique(result$result_type)) { cli::cli_abort("cumulativeFailure must be TRUE if result comes from a competing risk analysis") } - if(cumulativeFailure) { + if (cumulativeFailure) { result <- result %>% dplyr::mutate( - estimate_value = dplyr::if_else(.data$variable_name == "cumulative_failure_probability", + estimate_value = dplyr::if_else(.data$result_type == "cumulative_failure_probability", .data$estimate_value, 1 - .data$estimate_value) ) @@ -71,31 +78,183 @@ plotSurvival <- function(result, plot_name <- "Survival probability" } - plot <- plotEstimates(result, - x = x, - xscale = xscale, - y = "estimate", - yLower = "estimate_95CI_lower", - yUpper = "estimate_95CI_upper", - ylim = ylim, - ytype = "count", - ribbon = ribbon, - facet = facet, - colour = colour, - colourName = colourName) + + plot <- plotEstimates(result, + x = x, + xscale = xscale, + y = "estimate", + yLower = "estimate_95CI_lower", + yUpper = "estimate_95CI_upper", + ylim = ylim, + ytype = "count", + ribbon = ribbon, + facet = facet, + colour = colour) + ggplot2::ylab(plot_name) - if(xscale == "years"){ - plot <- plot+ - ggplot2::xlab("Time in years")+ + if (xscale == "years") { + plot <- plot + + ggplot2::xlab("Time in years") + ggplot2::scale_x_continuous(breaks = function(x) unique(floor(pretty(seq(0, (max(x) + 1) * 1.1))))) } else { - plot <- plot+ + plot <- plot + ggplot2::xlab("Time in days") } - return(plot) + if (riskTable) { + max_t <- result %>% + dplyr::pull(.data$time) %>% + max() + + riskTimes <- seq(0, max_t, by = riskInterval) + + if (!is.null(facet)) { + result <- result %>% + dplyr::mutate(!!facet := gsub("&&&","and",result[[facet]])) + attr(result, "events") <- attr(result, "events") %>% + dplyr::mutate(!!facet := gsub("&&&","and",attr(result, "events")[[facet]])) + + facetLevels <- unique(result[[facet]]) + + plotList <- list() + + applyPlot <- function(plotList, level) { + subResult <- result %>% + dplyr::filter(!!rlang::sym(facet) == level) %>% + dplyr::compute() + attr(subResult, "events") <- attr(result, "events") %>% + dplyr::filter(!!rlang::sym(facet) == level) %>% + dplyr::compute() + + facetPlot <- plotEstimates(subResult, + x = x, + xscale = xscale, + y = "estimate", + yLower = "estimate_95CI_lower", + yUpper = "estimate_95CI_upper", + ylim = ylim, + ytype = "count", + ribbon = ribbon, + facet = NULL, + colour = colour) + + ggplot2::ggtitle(level) + + riskData <- generateRiskData(subResult, riskTimes, colour) + + if (nrow(riskData) == 0) { + cli::cli_abort("Check the riskInterval provided. It seems that interval + does not provide the times for which n_risk can be retrieved. + Check the `events` attribute from your asSurvivalResult() + object to know the times at which `n_risk` information is + available") + } + + names_risk <- riskData %>% + dplyr::select(-c(dplyr::starts_with("time"))) %>% + colnames() + + nameRisk <- paste0("p",as.character(level)) + assign(nameRisk, riskData %>% + tidyr::pivot_longer(c(names_risk, .data$time), names_to = "layer", values_to = "label") %>% + ggplot2::ggplot(ggplot2::aes(x = .data$timeb)) + + ggplot2::geom_text(ggplot2::aes(y = factor(.data$layer, c(names_risk, "time")), label = dplyr::if_else(is.na(.data$label), sprintf("NA"), .data$label))) + + ggplot2::labs(y = "", x = NULL) + + ggplot2::theme_minimal() + + ggplot2::theme(axis.line = ggplot2::element_blank(), axis.ticks = ggplot2::element_blank(), axis.text.x = ggplot2::element_blank(), + panel.grid = ggplot2::element_blank(), strip.text = ggplot2::element_blank()) + ) + + plotList[[as.character(level)]] <- facetPlot / patchwork::wrap_elements(get(nameRisk)) + patchwork::plot_layout(heights = c(8, 1)) + return(plotList) + } + + plotList <- purrr::reduce(facetLevels, applyPlot, .init = plotList) + + finalPlot <- patchwork::wrap_plots(plotList) + return(finalPlot) + } else { + riskData <- generateRiskData(result, riskTimes, colour) + + if (nrow(riskData) == 0) { + cli::cli_abort("Check the riskInterval provided. It seems that interval + does not provide the times for which n_risk can be retrieved. + Check the `events` attribute from your asSurvivalResult() + object to know the times at which `n_risk` information is + available") + } + + names_risk <- riskData %>% + dplyr::select(-c(dplyr::starts_with("time"))) %>% + colnames() + + p2 <- riskData %>% + tidyr::pivot_longer(c(names_risk, .data$time), names_to = "layer", values_to = "label") %>% + ggplot2::ggplot(ggplot2::aes(x = .data$timeb)) + + ggplot2::geom_text(ggplot2::aes(y = factor(.data$layer, c(names_risk, "time")), label = dplyr::if_else(is.na(.data$label), sprintf("NA"), .data$label))) + + ggplot2::labs(y = "", x = NULL) + + ggplot2::theme_minimal() + + ggplot2::theme(axis.line = ggplot2::element_blank(), axis.ticks = ggplot2::element_blank(), axis.text.x = ggplot2::element_blank(), + panel.grid = ggplot2::element_blank(), strip.text = ggplot2::element_blank()) + + plot <- plot / patchwork::wrap_elements(p2) + patchwork::plot_layout(heights = c(8, 1)) + } + } + return(plot) +} + +generateRiskData <- function(result, riskTimes, colour) { + if (is.null(colour)) { + riskdata <- attr(result, "events") %>% + dplyr::filter(.data$estimate_name == "n_risk", + .data$time %in% riskTimes) %>% + dplyr::mutate(n_risk = as.character(.data$estimate_value)) %>% + dplyr::select("time", "n_risk") %>% + dplyr::mutate(timeb = .data$time, + time = as.character(.data$time)) + + riskdataend <- dplyr::tibble( + time = as.character(riskTimes), + timeb = riskTimes + ) %>% + dplyr::filter(!(.data$time %in% (riskdata %>% dplyr::pull("time")))) + + for (i in colnames(riskdata %>% dplyr::select(dplyr::starts_with("n_risk")))) { + riskdataend <- riskdataend %>% + dplyr::mutate(!!i := as.character(NA)) + } + + riskdata <- dplyr::union_all(riskdata, riskdataend) %>% + dplyr::mutate(n_risk = dplyr::if_else(is.na(.data$n_risk), "", .data$n_risk)) + } else { + riskdata <- attr(result, "events") %>% + dplyr::filter(.data$estimate_name == "n_risk", + .data$time %in% riskTimes) %>% + dplyr::mutate(n_risk = as.character(.data$estimate_value)) %>% + dplyr::select("time", "n_risk", colour) %>% + dplyr::mutate(!!colour := stringr::str_replace_all(.data[[colour]], "&&&", "and")) %>% + dplyr::mutate(timeb = .data$time, + time = as.character(.data$time), + !!colour := paste0("n_risk_", .data[[colour]])) %>% + dplyr::distinct() %>% + tidyr::pivot_wider(names_from = colour, values_from = .data$n_risk) + + riskdataend <- dplyr::tibble( + time = as.character(riskTimes), + timeb = riskTimes + ) %>% + dplyr::filter(!(.data$time %in% (riskdata %>% dplyr::pull("time")))) + + for (i in colnames(riskdata %>% dplyr::select(dplyr::starts_with("n_risk")))) { + riskdataend <- riskdataend %>% + dplyr::mutate(!!i := as.character(NA)) + } + + riskdata <- dplyr::union_all(riskdata, riskdataend) %>% + dplyr::mutate(dplyr::across(dplyr::everything(), ~ tidyr::replace_na(.x, ""))) + + names(riskdata) <- gsub("n risk ", "", gsub("_", " ", names(riskdata))) + } + return(riskdata) } # helper functions @@ -109,15 +268,11 @@ plotEstimates <- function(result, ytype, ribbon, facet, - colour, - colourName){ + colour){ - errorMessage <- checkmate::makeAssertCollection() - checkmate::assert_character(xscale, len = 1) - checkmate::assertTRUE(xscale %in% c("days", "years")) - #checkmate::assertTRUE(inherits(result, "SurvivalResult")) - checkmate::assertTRUE(all(c(x) %in% colnames(result))) - checkmate::reportAssertions(collection = errorMessage) + omopgenerics::assertCharacter(xscale, length = 1) + omopgenerics::assertChoice(xscale, c("days", "years")) + omopgenerics::assertChoice(c(x), colnames(result)) plot_data <- getPlotData(estimates = result, facetVars = facet, @@ -128,8 +283,6 @@ plotEstimates <- function(result, dplyr::mutate(time = .data$time / 365.25) } - - if(is.null(colour)){ plot <- plot_data %>% ggplot2::ggplot( diff --git a/R/recordAttrition.R b/R/recordAttrition.R deleted file mode 100644 index a9477ec..0000000 --- a/R/recordAttrition.R +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright 2023 DARWIN EU® -# -# This file is part of CohortSurvival -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -recordAttrition <- function(table, - id = "person_id", - existingAttrition = NULL, - reasonId, - reason) { - errorMessage <- checkmate::makeAssertCollection() - checkmate::assertTRUE(any(class(table) %in% - c("tbl_dbi", "tbl", "data.frame", "tibble"))) - checkmate::assertCharacter(id, add = errorMessage) - checkmate::assertIntegerish(reasonId, add = errorMessage) - checkmate::assertCharacter(reason, null.ok = TRUE, add = errorMessage) - if (!is.null(existingAttrition)) { - checkmate::assertTRUE(any(class(existingAttrition) %in% - c("data.frame", "tbl"))) - } - checkmate::reportAssertions(collection = errorMessage) - - attrition <- dplyr::tibble( - number_records = table %>% - dplyr::tally() %>% - dplyr::pull(), - number_subjects = table %>% - dplyr::select(.env$id) %>% - dplyr::distinct() %>% - dplyr::tally() %>% - dplyr::pull(), - reason_id = .env$reasonId, - reason = .env$reason - ) - - if (!is.null(existingAttrition)) { - attrition <- dplyr::bind_rows(existingAttrition, attrition) %>% - dplyr::mutate( - excluded_records = - dplyr::lag(.data$number_records) - .data$number_records - ) %>% - dplyr::mutate( - excluded_subjects = - dplyr::lag(.data$number_subjects) - .data$number_subjects - ) - } else { - attrition <- attrition %>% - dplyr::mutate(excluded_records = NA) %>% - dplyr::mutate(excluded_subjects = NA) - } - - return(attrition) -} diff --git a/R/reexport-omopgenerics.R b/R/reexport-omopgenerics.R new file mode 100644 index 0000000..5ddb7f6 --- /dev/null +++ b/R/reexport-omopgenerics.R @@ -0,0 +1,47 @@ +# Copyright 2024 DARWIN EU® +# +# This file is part of IncidencePrevalence +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#' @importFrom omopgenerics cohortCount +#' @export +omopgenerics::cohortCount + +#' @importFrom omopgenerics cohortCodelist +#' @export +omopgenerics::cohortCodelist + +#' @importFrom omopgenerics settings +#' @export +omopgenerics::settings + +#' @importFrom omopgenerics attrition +#' @export +omopgenerics::attrition + +#' @importFrom omopgenerics suppress +#' @export +omopgenerics::suppress + +#' @importFrom omopgenerics bind +#' @export +omopgenerics::bind + +#' @importFrom omopgenerics exportSummarisedResult +#' @export +omopgenerics::exportSummarisedResult + +#' @importFrom omopgenerics importSummarisedResult +#' @export +omopgenerics::importSummarisedResult diff --git a/R/suppressSurvivalCounts.R b/R/suppressSurvivalCounts.R deleted file mode 100644 index 0af8795..0000000 --- a/R/suppressSurvivalCounts.R +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright 2023 DARWIN EU® -# -# This file is part of CohortSurvival -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -suppressSurvivalCounts <- function(result, - minCellCount = 5) { - - checkmate::assertTRUE(all(c( - "variable_name", "estimate_value", "estimate_type", "group_name", "group_level", - "strata_name", "strata_level" - ) %in% - colnames(result))) - - checkmate::assertIntegerish(minCellCount, - len = 1, - lower = 0) - - if (minCellCount > 1) { - toObscure <- result %>% - dplyr::filter(.data$estimate_name == "n_start") %>% - dplyr::mutate(estimate_value = as.integer(.data$estimate_value)) %>% - dplyr::filter(.data$estimate_value > 0 & .data$estimate_value < .env$minCellCount) %>% - dplyr::select("group_name", "group_level", "strata_name", "strata_level") - - -for(i in seq_along(toObscure$group_name)){ - result <- result %>% - dplyr::mutate(estimate_value = dplyr::if_else( - .data$group_name == toObscure$group_name[i] & - .data$group_level == toObscure$group_level[i] & - .data$strata_name == toObscure$strata_name[i] & - .data$strata_level == toObscure$strata_level[i] & - .data$estimate_name == "n_start", paste0("<", minCellCount), - as.character(.data$estimate_value))) %>% - dplyr::mutate(estimate_value = dplyr::if_else( - .data$group_name == toObscure$group_name[i] & - .data$group_level == toObscure$group_level[i] & - .data$strata_name == toObscure$strata_name[i] & - .data$strata_level == toObscure$strata_level[i] & - .data$estimate_name != "n_start", - as.character(NA), - as.character(.data$estimate_value))) -} - } - - - return(result) -} - diff --git a/R/tableSurvival.R b/R/tableSurvival.R index e67ada3..da2aca1 100644 --- a/R/tableSurvival.R +++ b/R/tableSurvival.R @@ -53,15 +53,9 @@ tableSurvival <- function(x, .options = list()){ # initial checks - checkmate::assert_numeric(times, - lower = 0, - null.ok = TRUE) - checkmate::assert_character(timeScale, - len = 1) - if(!(timeScale %in% c("days", "years"))){ - cli::cli_abort(paste0("The input `timeScale` must be `days` or `years` - but it is `",timeScale,"`")) - } + omopgenerics::assertNumeric(times, min = 0, null = TRUE) + omopgenerics::assertCharacter(timeScale, length = 1) + omopgenerics::assertChoice(timeScale, c("days", "years")) # .options: userOptions <- .options @@ -71,36 +65,65 @@ tableSurvival <- function(x, } # check times in x - x_clean <- x %>% visOmopResults::splitAdditional() + x_clean <- x %>% visOmopResults::splitAdditional() %>% + dplyr::filter(.data$result_id %in% (omopgenerics::settings(x) %>% + dplyr::filter(grepl("probability", .data$result_type) | grepl("summary", .data$result_type)) %>% + dplyr::pull("result_id"))) if (!is.null(times)) { + times_final <- dplyr::tibble( + name = times, + value = c(NA) + ) if (timeScale == "years") { summary_times <- x_clean %>% dplyr::filter(.data$time != "overall") %>% - dplyr::mutate(time = round(as.numeric(.data$time)/364.25, digits = 3)) + dplyr::mutate(time = round(as.numeric(.data$time)/365.25, digits = 3)) } else { summary_times <- x_clean } for (t in times) { if (!(t %in% summary_times$time)) { if ((round(t + 0.001, digits = 3) %in% summary_times$time & timeScale == "years")) { - times[times == t] <- round(t + 0.001, digits = 3) + times_final <- times_final %>% + dplyr::mutate(value = dplyr::if_else(.data$name == t, + round(t + 0.001, digits = 3), + .data$value)) cli::cli_alert(paste0("Because of the conversion from days to years, - the requested time ",t," has now been changed to ",t + 0.001)) + the requested estimate for time ",t," will be given by ",t + 0.001)) } else if ((round(t - 0.001, digits = 3) %in% summary_times$time & timeScale == "years")) { - times[times == t] <- round(t - 0.001, digits = 3) + times_final <- times_final %>% + dplyr::mutate(value = dplyr::if_else(.data$name == t, + round(t - 0.001, digits = 3), + .data$value)) cli::cli_alert(paste0("Because of the conversion from days to years, - the requested time ",t," has now been changed to ",t - 0.001)) + the requested estimate for time ",t," will be given by ",t - 0.001)) } else { cli::cli_alert(paste0("Requested time ",t," is not in the list of times of the survival output provided, so no estimate will be included in the summary")) } } + if(t %in% summary_times$time) { + times_final <- times_final %>% + dplyr::mutate(value = dplyr::if_else(.data$name == t, + t, + .data$value)) + } } summary_times <- summary_times %>% - dplyr::filter(.data$time %in% .env$times) + dplyr::left_join(omopgenerics::settings(x_clean) %>% + dplyr::select(c("result_id", "result_type", "outcome", "competing_outcome")), + by = "result_id") %>% + dplyr::filter(.data$time %in% (times_final %>% + dplyr::pull("value")), + grepl("probability", .data$result_type)) + + if(typeof(summary_times$time) == "character") { + times_final <- times_final %>% + dplyr::mutate(value = as.character(.data$value)) + } if (nrow(summary_times) > 0) { summary_times <- summary_times %>% @@ -117,8 +140,9 @@ tableSurvival <- function(x, c(" survival estimate" = " (, )") ) %>% + dplyr::left_join(times_final, by = c("time" = "value")) %>% dplyr::mutate( - "estimate_name" = paste0(.data$time, " ", .env$timeScale, .data$estimate_name) + "estimate_name" = paste0(.data$name, " ", .env$timeScale, .data$estimate_name) ) } } @@ -126,16 +150,16 @@ tableSurvival <- function(x, summary_table <- x_clean %>% dplyr::filter( .data$estimate_name %in% - c("median_survival", "number_records", "n_events", + c("median_survival", "number_records_count", "n_events_count", "median_survival_95CI_lower", "median_survival_95CI_higher", "restricted_mean_survival", "restricted_mean_survival_se"), .data$time == "overall" ) %>% - dplyr::select(!"time") %>% + dplyr::select(!c("time")) %>% dplyr::mutate( "estimate_name" = dplyr::case_when( - .data$estimate_name == "n_events" ~ "Number events", - .data$estimate_name == "number_records" ~ "Number records", + .data$estimate_name == "n_events_count" ~ "Number events", + .data$estimate_name == "number_records_count" ~ "Number records", .default = .data$estimate_name ), "estimate_type" = dplyr::if_else( @@ -152,9 +176,25 @@ tableSurvival <- function(x, dplyr::arrange(.data$estimate_name) %>% dplyr::mutate("estimate_name" = as.character(.data$estimate_name)) + if(timeScale == "years") { + summary_table <- summary_table %>% + dplyr::mutate( + "estimate_value" = dplyr::if_else(grepl("mean", .data$estimate_name) | grepl("median", .data$estimate_name), + as.character(round(as.numeric(.data$estimate_value)/365.25,3)), + .data$estimate_value) + ) + } + + summary_table <- summary_table %>% + dplyr::left_join( + omopgenerics::settings(summary_table) %>% + dplyr::select("result_id", "result_type", "outcome", "competing_outcome"), + by = "result_id" + ) + if (!is.null(times)) { summary_table <- summary_table %>% - dplyr::bind_rows(summary_times %>% dplyr::select(!"time")) + dplyr::bind_rows(summary_times %>% dplyr::select(!c("name","time"))) } @@ -165,7 +205,7 @@ tableSurvival <- function(x, excludeCols <- c("result_id", "estimate_type") - if ("competing_outcome" %in% visOmopResults::additionalColumns(x)) { + if ((summary_table %>% dplyr::pull("competing_outcome") %>% unique()) != "none") { summary_table <- summary_table %>% dplyr::mutate( "variable_name" = dplyr::if_else( diff --git a/R/utils.R b/R/utils.R deleted file mode 100644 index 4a90bcd..0000000 --- a/R/utils.R +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright 2023 DARWIN EU® -# -# This file is part of CohortSurvival -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#' Participants contributing to a survival analysis -#' -#' @param result Result object -#' -#' @return References to the study participants contributing to -#' a given analysis -#' @export -#' -#' @examples -#' \donttest{ -#' cdm <- mockMGUS2cdm() -#' surv <- estimateSingleEventSurvival(cdm, -#' targetCohortTable = "mgus_diagnosis", -#' outcomeCohortTable = "death_cohort", -#' returnParticipants = TRUE) -#' survivalParticipants(surv) -#'} -survivalParticipants <- function(result) { - attr(result, "participants") -} - -getColumns <- function(result, col, overall) { - # initial checks - checkmate::assertTibble(result) - checkmate::assertCharacter(col, any.missing = FALSE, len = 1) - checkmate::assertTRUE(col %in% colnames(result)) - checkmate::assertLogical(overall, any.missing = FALSE, len = 1) - - # extract columns - x <- result %>% - dplyr::pull(dplyr::all_of(col)) %>% - unique() %>% - lapply(strsplit, split = " and ") %>% - unlist() %>% - unique() - - # eliminate overall - if (!overall) { - x <- x[x != "overall"] - } - - return(x) -} diff --git a/README.Rmd b/README.Rmd index 0a597e2..ea5af6d 100644 --- a/README.Rmd +++ b/README.Rmd @@ -14,9 +14,12 @@ knitr::opts_chunk$set( ) ``` -# CohortSurvival +# CohortSurvival CohortSurvival website -[![CRANstatus](https://www.r-pkg.org/badges/version/CohortSurvival)](https://CRAN.R-project.org/package=CohortSurvival)[![codecov.io](https://codecov.io/github/darwin-eu/CohortSurvival/coverage.svg?branch=main)](https://app.codecov.io/github/darwin-eu/CohortSurvival?branch=main) [![R-CMD-check](https://github.com/darwin-eu/CohortSurvival/workflows/R-CMD-check/badge.svg)](https://github.com/darwin-eu/CohortSurvival/actions) [![Lifecycle:Experimental](https://img.shields.io/badge/Lifecycle-Experimental-339999)](https://lifecycle.r-lib.org/articles/stages.html) +[![CRANstatus](https://www.r-pkg.org/badges/version/CohortSurvival)](https://CRAN.R-project.org/package=CohortSurvival) +[![codecov.io](https://codecov.io/github/darwin-eu/CohortSurvival/coverage.svg?branch=main)](https://app.codecov.io/github/darwin-eu/CohortSurvival?branch=main) +[![R-CMD-check](https://github.com/darwin-eu/CohortSurvival/workflows/R-CMD-check/badge.svg)](https://github.com/darwin-eu/CohortSurvival/actions) +[![Lifecycle:Experimental](https://img.shields.io/badge/Lifecycle-Experimental-339999)](https://lifecycle.r-lib.org/articles/stages.html) CohortSurvival contains functions for extracting and summarising survival data using the OMOP common data model. diff --git a/README.md b/README.md index 6d7b987..ddc6c70 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,10 @@ -# CohortSurvival +# CohortSurvival CohortSurvival website -[![CRANstatus](https://www.r-pkg.org/badges/version/CohortSurvival)](https://CRAN.R-project.org/package=CohortSurvival)[![codecov.io](https://codecov.io/github/darwin-eu/CohortSurvival/coverage.svg?branch=main)](https://app.codecov.io/github/darwin-eu/CohortSurvival?branch=main) +[![CRANstatus](https://www.r-pkg.org/badges/version/CohortSurvival)](https://CRAN.R-project.org/package=CohortSurvival) +[![codecov.io](https://codecov.io/github/darwin-eu/CohortSurvival/coverage.svg?branch=main)](https://app.codecov.io/github/darwin-eu/CohortSurvival?branch=main) [![R-CMD-check](https://github.com/darwin-eu/CohortSurvival/workflows/R-CMD-check/badge.svg)](https://github.com/darwin-eu/CohortSurvival/actions) [![Lifecycle:Experimental](https://img.shields.io/badge/Lifecycle-Experimental-339999)](https://lifecycle.r-lib.org/articles/stages.html) @@ -49,9 +50,9 @@ cdm$mgus_diagnosis %>% glimpse() #> Rows: ?? #> Columns: 10 -#> Database: DuckDB v0.10.3-dev163 [eburn@Windows 10 x64:R 4.2.1/:memory:] +#> Database: DuckDB v1.0.0 [eburn@Windows 10 x64:R 4.4.0/:memory:] #> $ cohort_definition_id 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1… -#> $ subject_id 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15… +#> $ subject_id 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15… #> $ cohort_start_date 1981-01-01, 1968-01-01, 1980-01-01, 1977-01-01, … #> $ cohort_end_date 1981-01-01, 1968-01-01, 1980-01-01, 1977-01-01, … #> $ age 88, 78, 94, 68, 90, 90, 89, 87, 86, 79, 86, 89, 8… @@ -69,9 +70,9 @@ cdm$progression %>% glimpse() #> Rows: ?? #> Columns: 4 -#> Database: DuckDB v0.10.3-dev163 [eburn@Windows 10 x64:R 4.2.1/:memory:] +#> Database: DuckDB v1.0.0 [eburn@Windows 10 x64:R 4.4.0/:memory:] #> $ cohort_definition_id 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1… -#> $ subject_id 56, 81, 83, 111, 124, 127, 147, 163, 165, 167, 18… +#> $ subject_id 56, 81, 83, 111, 124, 127, 147, 163, 165, 167, 18… #> $ cohort_start_date 1978-01-30, 1985-01-15, 1974-08-17, 1993-01-14, … #> $ cohort_end_date 1978-01-30, 1985-01-15, 1974-08-17, 1993-01-14, … ``` @@ -83,9 +84,9 @@ cdm$death_cohort %>% glimpse() #> Rows: ?? #> Columns: 4 -#> Database: DuckDB v0.10.3-dev163 [eburn@Windows 10 x64:R 4.2.1/:memory:] +#> Database: DuckDB v1.0.0 [eburn@Windows 10 x64:R 4.4.0/:memory:] #> $ cohort_definition_id 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1… -#> $ subject_id 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 1… +#> $ subject_id 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 1… #> $ cohort_start_date 1981-01-31, 1968-01-26, 1980-02-16, 1977-04-03, … #> $ cohort_end_date 1981-01-31, 1968-01-26, 1980-02-16, 1977-04-03, … ``` @@ -102,7 +103,7 @@ MGUS_death <- estimateSingleEventSurvival(cdm, ) MGUS_death |> glimpse() -#> Rows: 1,318 +#> Rows: 1,330 #> Columns: 13 #> $ result_id 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,… #> $ cdm_name "mock", "mock", "mock", "mock", "mock", "mock", "mock… diff --git a/inst/WORDLIST b/inst/WORDLIST index b489f3b..3927559 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -20,7 +20,6 @@ gammopathy ggplot magrittr mgus -returnParticipants rlang stratifications summarise diff --git a/man/CohortSurvival-package.Rd b/man/CohortSurvival-package.Rd index f36eb44..92406f4 100644 --- a/man/CohortSurvival-package.Rd +++ b/man/CohortSurvival-package.Rd @@ -6,6 +6,8 @@ \alias{CohortSurvival-package} \title{CohortSurvival: Estimate Survival from Common Data Model Cohorts} \description{ +\if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} + Estimate survival using data mapped to the Observational Medical Outcomes Partnership common data model. Survival can be estimated based on user-defined study cohorts. } \seealso{ @@ -20,14 +22,11 @@ Useful links: Authors: \itemize{ - \item Kim Lopez \email{kim.lopez@spc.ox.ac.uk} (\href{https://orcid.org/0000-0002-8462-8668}{ORCID}) -} - -Other contributors: -\itemize{ - \item Marti Catala \email{marti.catalasabate@ndorms.ox.ac.uk} (\href{https://orcid.org/0000-0003-3308-9905}{ORCID}) [contributor] - \item Xintong Li \email{xintong.li@ndorms.ox.ac.uk} (\href{https://orcid.org/0000-0003-3308-9905}{ORCID}) [contributor] - \item Danielle Newby \email{danielle.newby@ndorms.ox.ac.uk} (\href{https://orcid.org/0000-0002-3001-1478}{ORCID}) [contributor] + \item Kim López-Güell \email{kim.lopez@spc.ox.ac.uk} (\href{https://orcid.org/0000-0002-8462-8668}{ORCID}) + \item Marti Catala \email{marti.catalasabate@ndorms.ox.ac.uk} (\href{https://orcid.org/0000-0003-3308-9905}{ORCID}) + \item Xintong Li \email{xintong.li@ndorms.ox.ac.uk} (\href{https://orcid.org/0000-0002-6872-5804}{ORCID}) + \item Danielle Newby \email{danielle.newby@ndorms.ox.ac.uk} (\href{https://orcid.org/0000-0002-3001-1478}{ORCID}) + \item Nuria Mercade-Besora \email{nuria.mercadebesora@ndorms.ox.ac.uk} (\href{https://orcid.org/0009-0006-7948-3747}{ORCID}) } } diff --git a/man/benchmarkCohortSurvival.Rd b/man/benchmarkCohortSurvival.Rd deleted file mode 100644 index c287d70..0000000 --- a/man/benchmarkCohortSurvival.Rd +++ /dev/null @@ -1,82 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/benchmarkCohortSurvival.R -\name{benchmarkCohortSurvival} -\alias{benchmarkCohortSurvival} -\title{Estimate performance of estimateSurvival function for benchmarking} -\usage{ -benchmarkCohortSurvival( - cdm, - targetSize, - outcomeSize, - outcomeDateVariable = "cohort_start_date", - competingOutcomeSize = NULL, - competingOutcomeDateVariable = "cohort_start_date", - censorOnCohortExit = FALSE, - censorOnDate = NULL, - followUpDays = Inf, - strata = NULL, - eventGap = 30, - estimateGap = 1, - minCellCount = 5, - returnParticipants = FALSE -) -} -\arguments{ -\item{cdm}{CDM reference} - -\item{targetSize}{number of people in the target cohort table} - -\item{outcomeSize}{number of people in the outcome cohort table} - -\item{outcomeDateVariable}{Variable containing date of outcome event} - -\item{competingOutcomeSize}{number of people in the competing outcome cohort table} - -\item{competingOutcomeDateVariable}{Variable containing date of -competing event} - -\item{censorOnCohortExit}{If TRUE, an individual's follow up will be -censored at their cohort exit} - -\item{censorOnDate}{if not NULL, an individual's follow up will be censored -at the given date} - -\item{followUpDays}{Number of days to follow up individuals (lower bound 1, -upper bound Inf)} - -\item{strata}{strata} - -\item{eventGap}{Days between time points for which to report survival -estimates. First day will be day zero with risk estimates provided -for times up to the end of follow-up, with a gap in days equivalent -to eventGap.} - -\item{estimateGap}{vector of time points at which to give survival estimates, -if NULL estimates at all times are calculated} - -\item{minCellCount}{The minimum number of events to reported, below which -results will be obscured. If 0, all results will be reported.} - -\item{returnParticipants}{Either TRUE or FALSE. If TRUE, references to -participants from the analysis will be returned allowing for further -analysis.} -} -\value{ -tibble with performance of estimateSurvival function information, -according to the selected input parameters -} -\description{ -Estimate performance of estimateSurvival function for benchmarking -} -\examples{ -\donttest{ -cdm <- mockMGUS2cdm() -cdm$condition_occurrence <- cdm$death_cohort \%>\% -dplyr::rename("condition_start_date" = "cohort_start_date", - "condition_end_date" = "cohort_end_date") \%>\% - dplyr::compute() -surv_timings <- benchmarkCohortSurvival( -cdm, targetSize = 100, outcomeSize = 20) -} - -} diff --git a/man/estimateCompetingRiskSurvival.Rd b/man/estimateCompetingRiskSurvival.Rd index 593ecef..09feaea 100644 --- a/man/estimateCompetingRiskSurvival.Rd +++ b/man/estimateCompetingRiskSurvival.Rd @@ -8,12 +8,12 @@ cohorts in the OMOP Common Data Model} estimateCompetingRiskSurvival( cdm, targetCohortTable, - targetCohortId = NULL, outcomeCohortTable, + competingOutcomeCohortTable, + targetCohortId = NULL, outcomeCohortId = NULL, outcomeDateVariable = "cohort_start_date", outcomeWashout = Inf, - competingOutcomeCohortTable, competingOutcomeCohortId = NULL, competingOutcomeDateVariable = "cohort_start_date", competingOutcomeWashout = Inf, @@ -25,8 +25,7 @@ estimateCompetingRiskSurvival( estimateGap = 1, restrictedMeanFollowUp = NULL, minimumSurvivalDays = 1, - minCellCount = 5, - returnParticipants = FALSE + minCellCount = 5 ) } \arguments{ @@ -34,10 +33,12 @@ estimateCompetingRiskSurvival( \item{targetCohortTable}{targetCohortTable} -\item{targetCohortId}{targetCohortId} - \item{outcomeCohortTable}{The outcome cohort table of interest.} +\item{competingOutcomeCohortTable}{The competing outcome cohort table of interest.} + +\item{targetCohortId}{targetCohortId} + \item{outcomeCohortId}{ID of event cohorts to include. Only one outcome (and so one ID) can be considered.} @@ -45,8 +46,6 @@ estimateCompetingRiskSurvival( \item{outcomeWashout}{Washout time in days for the outcome} -\item{competingOutcomeCohortTable}{The competing outcome cohort table of interest.} - \item{competingOutcomeCohortId}{ID of event cohorts to include. Only one competing outcome (and so one ID) can be considered.} @@ -81,10 +80,6 @@ to have survived} \item{minCellCount}{The minimum number of events to reported, below which results will be obscured. If 0, all results will be reported.} - -\item{returnParticipants}{Either TRUE or FALSE. If TRUE, references to -participants from the analysis will be returned allowing for further -analysis.} } \value{ tibble with survival information for desired cohort, including: diff --git a/man/estimateSingleEventSurvival.Rd b/man/estimateSingleEventSurvival.Rd index 58627b7..498ff78 100644 --- a/man/estimateSingleEventSurvival.Rd +++ b/man/estimateSingleEventSurvival.Rd @@ -7,8 +7,8 @@ estimateSingleEventSurvival( cdm, targetCohortTable, - targetCohortId = NULL, outcomeCohortTable, + targetCohortId = NULL, outcomeCohortId = NULL, outcomeDateVariable = "cohort_start_date", outcomeWashout = Inf, @@ -20,8 +20,7 @@ estimateSingleEventSurvival( estimateGap = 1, restrictedMeanFollowUp = NULL, minimumSurvivalDays = 1, - minCellCount = 5, - returnParticipants = FALSE + minCellCount = 5 ) } \arguments{ @@ -29,10 +28,10 @@ estimateSingleEventSurvival( \item{targetCohortTable}{targetCohortTable} -\item{targetCohortId}{targetCohortId} - \item{outcomeCohortTable}{The outcome cohort table of interest.} +\item{targetCohortId}{targetCohortId} + \item{outcomeCohortId}{ID of event cohorts to include. Only one outcome (and so one ID) can be considered.} @@ -67,10 +66,6 @@ to have survived} \item{minCellCount}{The minimum number of events to reported, below which results will be obscured. If 0, all results will be reported.} - -\item{returnParticipants}{Either TRUE or FALSE. If TRUE, references to -participants from the analysis will be returned allowing for further -analysis.} } \value{ tibble with survival information for desired cohort, including: diff --git a/man/figures/README-unnamed-chunk-10-1.png b/man/figures/README-unnamed-chunk-10-1.png index f9e0ca4..08b4d85 100644 Binary files a/man/figures/README-unnamed-chunk-10-1.png and b/man/figures/README-unnamed-chunk-10-1.png differ diff --git a/man/figures/README-unnamed-chunk-11-1.png b/man/figures/README-unnamed-chunk-11-1.png index f883043..ec201d5 100644 Binary files a/man/figures/README-unnamed-chunk-11-1.png and b/man/figures/README-unnamed-chunk-11-1.png differ diff --git a/man/figures/README-unnamed-chunk-9-1.png b/man/figures/README-unnamed-chunk-9-1.png index 5b129db..b2f8c4e 100644 Binary files a/man/figures/README-unnamed-chunk-9-1.png and b/man/figures/README-unnamed-chunk-9-1.png differ diff --git a/man/figures/hexsticker.png b/man/figures/hexsticker.png deleted file mode 100644 index 5d56b6b..0000000 Binary files a/man/figures/hexsticker.png and /dev/null differ diff --git a/man/figures/logo.png b/man/figures/logo.png new file mode 100644 index 0000000..22c0115 Binary files /dev/null and b/man/figures/logo.png differ diff --git a/man/figures/logo_old.png b/man/figures/logo_old.png new file mode 100644 index 0000000..72b3eca Binary files /dev/null and b/man/figures/logo_old.png differ diff --git a/man/generateDeathCohortSet.Rd b/man/generateDeathCohortSet.Rd index 684a0a3..1743ba6 100644 --- a/man/generateDeathCohortSet.Rd +++ b/man/generateDeathCohortSet.Rd @@ -19,7 +19,7 @@ generateDeathCohortSet(cdm, name, cohortTable = NULL, cohortId = NULL) A cohort table with a death cohort in cdm } \description{ -To create a death cohort +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} } \examples{ \donttest{ diff --git a/man/plotSurvival.Rd b/man/plotSurvival.Rd index 90b3485..500f378 100644 --- a/man/plotSurvival.Rd +++ b/man/plotSurvival.Rd @@ -9,11 +9,13 @@ plotSurvival( x = "time", xscale = "days", ylim = c(0, NA), + xlim = NULL, cumulativeFailure = FALSE, ribbon = TRUE, facet = NULL, colour = NULL, - colourName = NULL + riskTable = FALSE, + riskInterval = 30 ) } \arguments{ @@ -25,6 +27,8 @@ plotSurvival( \item{ylim}{Limits for the Y axis} +\item{xlim}{Limits for the X axis} + \item{cumulativeFailure}{whether to plot the cumulative failure probability instead of the survival probability} @@ -34,7 +38,9 @@ instead of the survival probability} \item{colour}{Variables to use for colours} -\item{colourName}{Colour legend name} +\item{riskTable}{Whether to print risk table below the plot} + +\item{riskInterval}{Interval of time to print risk table below the plot} } \value{ A plot of survival probabilities over time diff --git a/man/reexports.Rd b/man/reexports.Rd new file mode 100644 index 0000000..b9d785a --- /dev/null +++ b/man/reexports.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reexport-omopgenerics.R +\docType{import} +\name{reexports} +\alias{reexports} +\alias{cohortCount} +\alias{cohortCodelist} +\alias{settings} +\alias{attrition} +\alias{suppress} +\alias{bind} +\alias{exportSummarisedResult} +\alias{importSummarisedResult} +\title{Objects exported from other packages} +\keyword{internal} +\description{ +These objects are imported from other packages. Follow the links +below to see their documentation. + +\describe{ + \item{omopgenerics}{\code{\link[omopgenerics]{attrition}}, \code{\link[omopgenerics]{bind}}, \code{\link[omopgenerics]{cohortCodelist}}, \code{\link[omopgenerics]{cohortCount}}, \code{\link[omopgenerics]{exportSummarisedResult}}, \code{\link[omopgenerics]{importSummarisedResult}}, \code{\link[omopgenerics]{settings}}, \code{\link[omopgenerics]{suppress}}} +}} + diff --git a/man/survivalParticipants.Rd b/man/survivalParticipants.Rd deleted file mode 100644 index 8ec2b01..0000000 --- a/man/survivalParticipants.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{survivalParticipants} -\alias{survivalParticipants} -\title{Participants contributing to a survival analysis} -\usage{ -survivalParticipants(result) -} -\arguments{ -\item{result}{Result object} -} -\value{ -References to the study participants contributing to -a given analysis -} -\description{ -Participants contributing to a survival analysis -} -\examples{ -\donttest{ -cdm <- mockMGUS2cdm() -surv <- estimateSingleEventSurvival(cdm, - targetCohortTable = "mgus_diagnosis", - outcomeCohortTable = "death_cohort", - returnParticipants = TRUE) -survivalParticipants(surv) -} -} diff --git a/pkgdown/favicon/apple-touch-icon.png b/pkgdown/favicon/apple-touch-icon.png new file mode 100644 index 0000000..d013d1a Binary files /dev/null and b/pkgdown/favicon/apple-touch-icon.png differ diff --git a/pkgdown/favicon/favicon-96x96.png b/pkgdown/favicon/favicon-96x96.png new file mode 100644 index 0000000..127afa8 Binary files /dev/null and b/pkgdown/favicon/favicon-96x96.png differ diff --git a/pkgdown/favicon/favicon.ico b/pkgdown/favicon/favicon.ico new file mode 100644 index 0000000..38571a8 Binary files /dev/null and b/pkgdown/favicon/favicon.ico differ diff --git a/pkgdown/favicon/favicon.svg b/pkgdown/favicon/favicon.svg new file mode 100644 index 0000000..d90fd29 --- /dev/null +++ b/pkgdown/favicon/favicon.svg @@ -0,0 +1,3 @@ + \ No newline at end of file diff --git a/pkgdown/favicon/site.webmanifest b/pkgdown/favicon/site.webmanifest new file mode 100644 index 0000000..4ebda26 --- /dev/null +++ b/pkgdown/favicon/site.webmanifest @@ -0,0 +1,21 @@ +{ + "name": "", + "short_name": "", + "icons": [ + { + "src": "/web-app-manifest-192x192.png", + "sizes": "192x192", + "type": "image/png", + "purpose": "maskable" + }, + { + "src": "/web-app-manifest-512x512.png", + "sizes": "512x512", + "type": "image/png", + "purpose": "maskable" + } + ], + "theme_color": "#ffffff", + "background_color": "#ffffff", + "display": "standalone" +} \ No newline at end of file diff --git a/pkgdown/favicon/web-app-manifest-192x192.png b/pkgdown/favicon/web-app-manifest-192x192.png new file mode 100644 index 0000000..3fccea4 Binary files /dev/null and b/pkgdown/favicon/web-app-manifest-192x192.png differ diff --git a/pkgdown/favicon/web-app-manifest-512x512.png b/pkgdown/favicon/web-app-manifest-512x512.png new file mode 100644 index 0000000..28f53ef Binary files /dev/null and b/pkgdown/favicon/web-app-manifest-512x512.png differ diff --git a/tests/testthat/test-addCohortSurvival.R b/tests/testthat/test-addCohortSurvival.R index f93738b..043e72d 100644 --- a/tests/testthat/test-addCohortSurvival.R +++ b/tests/testthat/test-addCohortSurvival.R @@ -547,6 +547,14 @@ test_that("expected errors", { temporary = "maybe" )) + expect_error(cdm$mgus_diagnosis %>% + addCohortSurvival( + cdm = cdm, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1, + outcomeWashout = c(0,1) + )) + # cdm <- PatientProfiles::mockPatientProfiles() # cdm[["cohort1"]] <- cdm[["cohort1"]] %>% # dplyr::group_by(subject_id) %>% @@ -738,6 +746,7 @@ test_that("allow overwrite of time and status", { # currently need to add attribute back to rerun attr(cdm2$cohort1, "set") <- cohort1_set attr(cdm2$cohort1, "count") <- cohort1_count + attr(cdm2$cohort1, "tbl_name") <- "cohort1" cdm2$cohort1 <- cdm2$cohort1 %>% addCohortSurvival( diff --git a/tests/testthat/test-benchmarkCohortSurvival.R b/tests/testthat/test-benchmarkCohortSurvival.R deleted file mode 100644 index 15a3000..0000000 --- a/tests/testthat/test-benchmarkCohortSurvival.R +++ /dev/null @@ -1,88 +0,0 @@ -test_that("mgus example: benchmark", { - skip_on_cran() - - cdm <- mockMGUS2cdm() - cdm$condition_occurrence <- cdm$death_cohort %>% - dplyr::rename("condition_start_date" = "cohort_start_date", - "condition_end_date" = "cohort_end_date") %>% - dplyr::compute() - cdm$drug_exposure <- cdm$progression %>% - dplyr::rename("drug_exposure_start_date" = "cohort_start_date", - "drug_exposure_end_date" = "cohort_end_date")%>% - dplyr::compute() - timings <- benchmarkCohortSurvival(cdm, targetSize = 1000, outcomeSize = 47) - timings_p <- benchmarkCohortSurvival(cdm, targetSize = 1000, outcomeSize = 47, returnParticipants = TRUE) - timings_s <- benchmarkCohortSurvival(cdm, targetSize = 100, outcomeSize = 5, strata = list("sex" = c("sex"))) - timings2 <- benchmarkCohortSurvival(cdm, targetSize = 1000, outcomeSize = 8) - timings3 <- benchmarkCohortSurvival(cdm, targetSize = 100, outcomeSize = 23) - timings4 <- benchmarkCohortSurvival(cdm, targetSize = 100, outcomeSize = 23, outcomeDateVariable = "cohort_end_date") - timings5 <- benchmarkCohortSurvival(cdm, targetSize = 1000, competingOutcomeSize = 80, outcomeSize = 90, outcomeDateVariable = "cohort_end_date") - timings6 <- benchmarkCohortSurvival(cdm, targetSize = 100, outcomeSize = 7, censorOnCohortExit = TRUE) - timings7 <- benchmarkCohortSurvival(cdm, targetSize = 100, outcomeSize = 4, censorOnDate = as.Date("1920-01-01")) - timings8 <- benchmarkCohortSurvival(cdm, targetSize = 1000, outcomeSize = 47, followUpDays = 30) - timings9 <- benchmarkCohortSurvival(cdm, targetSize = 1000, outcomeSize = 47, minCellCount = 2) - - expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings))) - expect_true(all(c("generating target cohort size 1000", "generating outcome cohort size 47") %in% timings$task)) - - expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings_p))) - expect_true(all(c("generating target cohort size 1000", "generating outcome cohort size 47") %in% timings_p$task)) - expect_true(timings_p %>% dplyr::select(with_participants) %>% dplyr::distinct() %>% dplyr::pull() == "Yes") - - expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings_s))) - expect_true(all(c("generating target cohort size 100", "generating outcome cohort size 5") %in% timings_s$task)) - - expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings2))) - expect_true(all(c("generating target cohort size 1000", "generating outcome cohort size 8") %in% timings2$task)) - - expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings3))) - expect_true(all(c("generating target cohort size 100", "generating outcome cohort size 23") %in% timings3$task)) - - expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings4))) - expect_true(all(c("generating target cohort size 100", "generating outcome cohort size 23") %in% timings4$task)) - - expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings5))) - expect_true(all(c("generating target cohort size 1000", "generating outcome cohort size 90", "generating competing outcome cohort size 80") %in% timings5$task)) - - expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings6))) - expect_true(all(c("generating target cohort size 100", "generating outcome cohort size 7") %in% timings6$task)) - - expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings7))) - expect_true(all(c("generating target cohort size 100", "generating outcome cohort size 4") %in% timings7$task)) - - expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings8))) - expect_true(all(c("generating target cohort size 1000", "generating outcome cohort size 47") %in% timings8$task)) - - expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings9))) - expect_true(all(c("generating target cohort size 1000", "generating outcome cohort size 47") %in% timings9$task)) - - CDMConnector::cdmDisconnect(cdm) -}) - -test_that("expected errors benchmark", { - skip_on_cran() - - cdm <- mockMGUS2cdm() - cdm$condition_occurrence <- cdm$death_cohort %>% - dplyr::rename("condition_start_date" = "cohort_start_date", - "condition_end_date" = "cohort_end_date") %>% - dplyr::compute() - expect_error(benchmarkCohortSurvival("cdm")) - expect_error(benchmarkCohortSurvival(cdm)) - expect_error(benchmarkCohortSurvival(cdm, targetSize = "size")) - expect_error(benchmarkCohortSurvival(targetSize = 30)) - expect_error(benchmarkCohortSurvival(cdm, targetSize = 100)) - expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = "3")) - expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, competingOutcomeSize = "no")) - expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, outcomeDateVariable = FALSE)) - expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, censorOnCohortExit = NULL)) - expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, censorOnDate = TRUE)) - expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, followUpDays = "Inf")) - expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, strata = c("age" = "age"))) - expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, eventGap = list(1,2))) - expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, estimateGap = 1)) - expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, minCellCount = FALSE)) - expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, returnParticipants = "TRUE")) - - CDMConnector::cdmDisconnect(cdm) -}) diff --git a/tests/testthat/test-estimateSurvival.R b/tests/testthat/test-estimateSurvival.R index c910c88..7f0e18a 100644 --- a/tests/testthat/test-estimateSurvival.R +++ b/tests/testthat/test-estimateSurvival.R @@ -15,15 +15,14 @@ test_that("mgus example: no Competing risk", { ) %>% asSurvivalResult() expect_true(tibble::is_tibble(surv)) expect_true(all(c( - "cdm_name","result_type", - "cohort", + "cdm_name", "result_type", + "target_cohort", "strata_name", "strata_level", "variable_name","variable_level", "estimate_name", "estimate_value", "time", - "outcome", - "analysis_type") %in% + "outcome") %in% colnames(surv))) expect_true(surv %>% dplyr::select(time) %>% dplyr::distinct() %>% dplyr::tally() == 425) @@ -235,7 +234,7 @@ test_that("multiple exposures, multiple outcomes: single event", { race_concept_id = c(rep(0,5)) ) - cdm <- omopgenerics::cdmFromTables( + suppressWarnings(cdm <- omopgenerics::cdmFromTables( tables = list( person = person, observation_period = observation_period @@ -245,24 +244,24 @@ test_that("multiple exposures, multiple outcomes: single event", { cohort1 = outcome_cohort ), cdmName = "mock_es" - ) + )) db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - cdm2 = CDMConnector::copy_cdm_to(db, + suppressWarnings(cdm2 <- CDMConnector::copy_cdm_to(db, cdm, schema = "main", - overwrite = TRUE) + overwrite = TRUE)) attr(cdm2, "cdm_schema") <- "main" attr(cdm2, "write_schema") <- "main" # one target, one outcome - expect_no_error(surv <- estimateSingleEventSurvival(cdm2, + expect_warning(surv <- estimateSingleEventSurvival(cdm2, targetCohortTable = "exposure_cohort", targetCohortId = 1, outcomeCohortTable = "cohort1", outcomeCohortId = 2 ) %>% asSurvivalResult()) - expect_equal(unique(surv$cohort), + expect_equal(unique(surv$target_cohort), omopgenerics::settings(cdm$exposure_cohort) %>% dplyr::filter(cohort_definition_id == 1) %>% dplyr::pull("cohort_name")) @@ -278,7 +277,7 @@ test_that("multiple exposures, multiple outcomes: single event", { outcomeCohortTable = "cohort1", outcomeCohortId = 2 ) %>% asSurvivalResult() - expect_equal(sort(unique(surv$cohort)), + expect_equal(sort(unique(surv$target_cohort)), sort(omopgenerics::settings(cdm$exposure_cohort) %>% dplyr::filter(cohort_definition_id %in% c(1,2)) %>% dplyr::pull("cohort_name"))) @@ -295,7 +294,7 @@ test_that("multiple exposures, multiple outcomes: single event", { outcomeCohortId = c(2,3), minCellCount = 0 ) %>% asSurvivalResult() - expect_equal(sort(unique(surv$cohort)), + expect_equal(sort(unique(surv$target_cohort)), sort(omopgenerics::settings(cdm$exposure_cohort) %>% dplyr::filter(cohort_definition_id %in% c(1,2)) %>% dplyr::pull("cohort_name"))) @@ -304,7 +303,7 @@ test_that("multiple exposures, multiple outcomes: single event", { dplyr::filter(cohort_definition_id %in% c(2,3)) %>% dplyr::pull("cohort_name")) - expect_equal(sort(unique(attr(surv, "event")$cohort)), + expect_equal(sort(unique(attr(surv, "event")$target_cohort)), sort(omopgenerics::settings(cdm$exposure_cohort) %>% dplyr::filter(cohort_definition_id %in% c(1,2)) %>% dplyr::pull("cohort_name"))) @@ -313,7 +312,7 @@ test_that("multiple exposures, multiple outcomes: single event", { dplyr::filter(cohort_definition_id %in% c(2,3)) %>% dplyr::pull("cohort_name")) - expect_equal(sort(unique(attr(surv, "summary")$cohort)), + expect_equal(sort(unique(attr(surv, "summary")$target_cohort)), sort(omopgenerics::settings(cdm$exposure_cohort) %>% dplyr::filter(cohort_definition_id %in% c(1,2)) %>% dplyr::pull("cohort_name"))) @@ -327,7 +326,7 @@ test_that("multiple exposures, multiple outcomes: single event", { targetCohortTable = "exposure_cohort", outcomeCohortTable = "cohort1" ) %>% asSurvivalResult() - expect_equal(sort(unique(surv$cohort)), + expect_equal(sort(unique(surv$target_cohort)), sort(omopgenerics::settings(cdm$exposure_cohort) %>% dplyr::filter(cohort_definition_id %in% c(1,2)) %>% dplyr::pull("cohort_name"))) @@ -424,7 +423,7 @@ test_that("multiple exposures, multiple outcomes: competing risk", { race_concept_id = c(rep(0,5)) ) - cdm <- omopgenerics::cdmFromTables( + suppressWarnings(cdm <- omopgenerics::cdmFromTables( tables = list( person = person, observation_period = observation_period @@ -435,13 +434,13 @@ test_that("multiple exposures, multiple outcomes: competing risk", { cohort2 = competing_cohort ), cdmName = "mock_es" - ) + )) db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - cdm2 = CDMConnector::copy_cdm_to(db, + suppressWarnings(cdm2 <- CDMConnector::copy_cdm_to(db, cdm, schema = "main", - overwrite = TRUE) + overwrite = TRUE)) attr(cdm2, "cdm_schema") <- "main" attr(cdm2, "write_schema") <- "main" @@ -455,7 +454,7 @@ test_that("multiple exposures, multiple outcomes: competing risk", { competingOutcomeCohortTable = "cohort2", competingOutcomeCohortId = 4 ) %>% asSurvivalResult()) - expect_equal(unique(surv$cohort), + expect_equal(unique(surv$target_cohort), omopgenerics::settings(cdm2$exposure_cohort) %>% dplyr::filter(cohort_definition_id == 1) %>% dplyr::pull("cohort_name")) @@ -476,7 +475,7 @@ test_that("multiple exposures, multiple outcomes: competing risk", { competingOutcomeCohortTable = "cohort2", competingOutcomeCohortId = 4 ) %>% asSurvivalResult()) - expect_equal(sort(unique(surv$cohort)), + expect_equal(sort(unique(surv$target_cohort)), sort(omopgenerics::settings(cdm2$exposure_cohort) %>% dplyr::filter(cohort_definition_id %in% c(1,2)) %>% dplyr::pull("cohort_name"))) @@ -497,7 +496,7 @@ test_that("multiple exposures, multiple outcomes: competing risk", { competingOutcomeCohortTable = "cohort2", competingOutcomeCohortId = 4 ) %>% asSurvivalResult() - expect_equal(sort(unique(surv$cohort)), + expect_equal(sort(unique(surv$target_cohort)), sort(omopgenerics::settings(cdm2$exposure_cohort) %>% dplyr::filter(cohort_definition_id %in% c(1,2)) %>% dplyr::pull("cohort_name"))) @@ -519,7 +518,7 @@ test_that("multiple exposures, multiple outcomes: competing risk", { competingOutcomeCohortId = c(4,5), minCellCount = 1 ) %>% asSurvivalResult() - expect_equal(sort(unique(surv$cohort)), + expect_equal(sort(unique(surv$target_cohort)), sort(omopgenerics::settings(cdm2$exposure_cohort) %>% dplyr::filter(cohort_definition_id %in% c(1,2)) %>% dplyr::pull("cohort_name"))) @@ -537,7 +536,7 @@ test_that("multiple exposures, multiple outcomes: competing risk", { outcomeCohortTable = "cohort1", competingOutcomeCohortTable = "cohort2" ) %>% asSurvivalResult() - expect_equal(sort(unique(surv$cohort)), + expect_equal(sort(unique(surv$target_cohort)), sort(omopgenerics::settings(cdm2$exposure_cohort) %>% dplyr::filter(cohort_definition_id %in% c(1,2)) %>% dplyr::pull("cohort_name"))) @@ -632,7 +631,7 @@ test_that("required estimateGap", { race_concept_id = c(rep(0,5)) ) - cdm <- omopgenerics::cdmFromTables( + suppressWarnings(cdm <- omopgenerics::cdmFromTables( tables = list( person = person, observation_period = observation_period @@ -643,13 +642,13 @@ test_that("required estimateGap", { cohort2 = competing_cohort ), cdmName = "mock_es" - ) + )) db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - cdm2 = CDMConnector::copy_cdm_to(db, + suppressWarnings(cdm2 <- CDMConnector::copy_cdm_to(db, cdm, schema = "main", - overwrite = TRUE) + overwrite = TRUE)) attr(cdm2, "cdm_schema") <- "main" attr(cdm2, "write_schema") <- "main" @@ -673,7 +672,7 @@ test_that("required estimateGap", { surv2 <- surv %>% dplyr::filter(time %in% seq(0, 1235, by = 2)) - expect_true(all.equal(surv_pair, surv2)) + expect_true(all.equal(surv_pair, surv2, check.attributes = FALSE)) # two targets, two outcomes, competing risk event survCR <- estimateCompetingRiskSurvival(cdm2, @@ -698,7 +697,7 @@ test_that("required estimateGap", { survCR <- survCR %>% dplyr::filter(time %in% seq(0,1235,by = 2)) - expect_true(all.equal(survCR, survCR_time)) + expect_true(all.equal(survCR, survCR_time, check.attributes = FALSE)) CDMConnector::cdmDisconnect(cdm2) @@ -767,7 +766,7 @@ test_that("funcionality with created dataset", { race_concept_id = c(rep(0,5)) ) - cdm <- omopgenerics::cdmFromTables( + suppressWarnings(cdm <- omopgenerics::cdmFromTables( tables = list( person = person, observation_period = observation_period @@ -777,13 +776,13 @@ test_that("funcionality with created dataset", { cohort1 = outcome_cohort ), cdmName = "mock_es" - ) + )) db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - cdm2 = CDMConnector::copy_cdm_to(db, + suppressWarnings(cdm2 <- CDMConnector::copy_cdm_to(db, cdm, schema = "main", - overwrite = TRUE) + overwrite = TRUE)) attr(cdm2, "cdm_schema") <- "main" attr(cdm2, "write_schema") <- "main" @@ -859,7 +858,7 @@ test_that("funcionality with created dataset", { race_concept_id = c(rep(0,5)) ) - cdm <- omopgenerics::cdmFromTables( + suppressWarnings(cdm <- omopgenerics::cdmFromTables( tables = list( person = person, observation_period = observation_period @@ -870,13 +869,13 @@ test_that("funcionality with created dataset", { cohort2 = competing_risk_cohort ), cdmName = "mock_es" - ) + )) db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - cdm2 = CDMConnector::copy_cdm_to(db, + suppressWarnings(cdm2 <- CDMConnector::copy_cdm_to(db, cdm, schema = "main", - overwrite = TRUE) + overwrite = TRUE)) attr(cdm2, "cdm_schema") <- "main" attr(cdm2, "write_schema") <- "main" @@ -935,7 +934,7 @@ test_that("funcionality with created dataset", { race_concept_id = c(rep(0,5)) ) - cdm <- omopgenerics::cdmFromTables( + suppressWarnings(cdm <- omopgenerics::cdmFromTables( tables = list( person = person, observation_period = observation_period @@ -945,13 +944,13 @@ test_that("funcionality with created dataset", { cohort1 = outcome_cohort ), cdmName = "mock_es" - ) + )) db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - cdm2 = CDMConnector::copy_cdm_to(db, + suppressWarnings(cdm2 <- CDMConnector::copy_cdm_to(db, cdm, schema = "main", - overwrite = TRUE) + overwrite = TRUE)) attr(cdm2, "cdm_schema") <- "main" attr(cdm2, "write_schema") <- "main" @@ -973,7 +972,7 @@ test_that("funcionality with created dataset", { CDMConnector::cdmDisconnect(cdm2) - cdm <- omopgenerics::cdmFromTables( + suppressWarnings(cdm <- omopgenerics::cdmFromTables( tables = list( person = person, observation_period = observation_period @@ -983,13 +982,13 @@ test_that("funcionality with created dataset", { cohort1 = outcome_cohort ), cdmName = "mock_es" - ) + )) db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - cdm2 = CDMConnector::copy_cdm_to(db, + suppressWarnings(cdm2 <- CDMConnector::copy_cdm_to(db, cdm, schema = "main", - overwrite = TRUE) + overwrite = TRUE)) attr(cdm2, "cdm_schema") <- "main" attr(cdm2, "write_schema") <- "main" @@ -1042,7 +1041,7 @@ test_that("funcionality with created dataset", { ) ) - cdm <- omopgenerics::cdmFromTables( + suppressWarnings(cdm <- omopgenerics::cdmFromTables( tables = list( person = person, observation_period = observation_period @@ -1052,13 +1051,13 @@ test_that("funcionality with created dataset", { cohort1 = outcome_cohort ), cdmName = "mock_es" - ) + )) db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - cdm2 = CDMConnector::copy_cdm_to(db, + suppressWarnings(cdm2 <- CDMConnector::copy_cdm_to(db, cdm, schema = "main", - overwrite = TRUE) + overwrite = TRUE)) attr(cdm2, "cdm_schema") <- "main" attr(cdm2, "write_schema") <- "main" @@ -1126,7 +1125,7 @@ test_that("funcionality with created dataset", { ) ) - cdm <- omopgenerics::cdmFromTables( + suppressWarnings(cdm <- omopgenerics::cdmFromTables( tables = list( person = person, observation_period = observation_period @@ -1136,13 +1135,13 @@ test_that("funcionality with created dataset", { cohort1 = outcome_cohort ), cdmName = "mock_es" - ) + )) db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - cdm2 = CDMConnector::copy_cdm_to(db, + suppressWarnings(cdm2 <- CDMConnector::copy_cdm_to(db, cdm, schema = "main", - overwrite = TRUE) + overwrite = TRUE)) attr(cdm2, "cdm_schema") <- "main" attr(cdm2, "write_schema") <- "main" @@ -1181,7 +1180,7 @@ test_that("funcionality with created dataset", { ) ) - cdm <- omopgenerics::cdmFromTables( + suppressWarnings(cdm <- omopgenerics::cdmFromTables( tables = list( person = person, observation_period = observation_period @@ -1191,13 +1190,13 @@ test_that("funcionality with created dataset", { cohort1 = outcome_cohort ), cdmName = "mock_es" - ) + )) db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - cdm2 = CDMConnector::copy_cdm_to(db, + suppressWarnings(cdm2 <- CDMConnector::copy_cdm_to(db, cdm, schema = "main", - overwrite = TRUE) + overwrite = TRUE)) attr(cdm2, "cdm_schema") <- "main" attr(cdm2, "write_schema") <- "main" @@ -1277,7 +1276,7 @@ test_that("different exposure cohort ids", { race_concept_id = c(rep(0,5)) ) - cdm <- omopgenerics::cdmFromTables( + suppressWarnings(cdm <- omopgenerics::cdmFromTables( tables = list( person = person, observation_period = observation_period @@ -1287,13 +1286,13 @@ test_that("different exposure cohort ids", { cohort2 = outcome_cohort ), cdmName = "mock_es" - ) + )) db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - cdm2 = CDMConnector::copy_cdm_to(db, + suppressWarnings(cdm2 <- CDMConnector::copy_cdm_to(db, cdm, schema = "main", - overwrite = TRUE) + overwrite = TRUE)) attr(cdm2, "cdm_schema") <- "main" attr(cdm2, "write_schema") <- "main" @@ -1363,47 +1362,12 @@ test_that("expected errors", { expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = 1, censorOnDate = "2020-09-02")) expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = 1, minimumSurvivalDays = -3)) expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = 1, minimumSurvivalDays = c(0,3))) - expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = 1, outcomeWashout = 0)) + expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = 1, outcomeWashout = -1)) expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = 1, competingOutcomeWashout = "1")) CDMConnector::cdmDisconnect(cdm) }) -test_that("return participants", { - skip_on_cran() - cdm <- mockMGUS2cdm() - surv1 <- estimateSingleEventSurvival(cdm, - targetCohortTable = "mgus_diagnosis", - outcomeCohortTable = "death_cohort", - returnParticipants = FALSE - ) %>% asSurvivalResult() - expect_true(is.null(attr(surv1, "participants"))) - surv2 <- estimateSingleEventSurvival(cdm, - targetCohortTable = "mgus_diagnosis", - targetCohortId = 1, - outcomeCohortTable = "death_cohort", - outcomeCohortId = 1, - eventGap = 7, returnParticipants = TRUE - ) %>% asSurvivalResult() - expect_false(is.null(attr(surv2, "participants"))) - - expect_equal( - colnames(survivalParticipants(surv2) %>% - head(1) %>% - dplyr::collect()), - c( - "cohort_definition_id", - "subject_id", - "cohort_start_date", - "cohort_end_date", - "exposure_id", - "outcome_id" - ) - ) - - CDMConnector::cdmDisconnect(cdm) -}) - test_that("within cohort survival", { skip_on_cran() cohort <- dplyr::tibble( @@ -1446,7 +1410,7 @@ test_that("within cohort survival", { race_concept_id = c(rep(0,5)) ) - cdm <- omopgenerics::cdmFromTables( + suppressWarnings(cdm <- omopgenerics::cdmFromTables( tables = list( person = person, observation_period = observation_period @@ -1455,13 +1419,13 @@ test_that("within cohort survival", { cohort1 = cohort ), cdmName = "mock_es" - ) + )) db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - cdm2 = CDMConnector::copy_cdm_to(db, + suppressWarnings(cdm2 <- CDMConnector::copy_cdm_to(db, cdm, schema = "main", - overwrite = TRUE) + overwrite = TRUE)) attr(cdm2, "cdm_schema") <- "main" attr(cdm2, "write_schema") <- "main" @@ -1560,7 +1524,7 @@ test_that("strata specific survival", { race_concept_id = c(rep(0,5)) ) - cdm <- omopgenerics::cdmFromTables( + suppressWarnings(cdm <- omopgenerics::cdmFromTables( tables = list( person = person, observation_period = observation_period @@ -1571,13 +1535,13 @@ test_that("strata specific survival", { cohort2 = other_outcome_cohort ), cdmName = "mock_es" - ) + )) db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - cdm2 = CDMConnector::copy_cdm_to(db, + suppressWarnings(cdm2 <- CDMConnector::copy_cdm_to(db, cdm, schema = "main", - overwrite = TRUE) + overwrite = TRUE)) attr(cdm2, "cdm_schema") <- "main" attr(cdm2, "write_schema") <- "main" @@ -1599,7 +1563,7 @@ test_that("strata specific survival", { # only males cdm2[["exposure_cohort_m"]] <- cdm2$exposure_cohort %>% dplyr::filter(sex =="Male") %>% - dplyr::compute() + dplyr::compute(temporary = FALSE, name = "exposure_cohort_m") surv_m <- estimateSingleEventSurvival(cdm2, targetCohortTable = "exposure_cohort_m", outcomeCohortTable = "cohort1", @@ -1746,7 +1710,7 @@ test_that("multiple rows per person - same observation period", { race_concept_id = c(rep(0,5)) ) - cdm <- omopgenerics::cdmFromTables( + suppressWarnings(cdm <- omopgenerics::cdmFromTables( tables = list( person = person, observation_period = observation_period @@ -1757,13 +1721,13 @@ test_that("multiple rows per person - same observation period", { cohort2 = competing_cohort ), cdmName = "mock_es" - ) + )) db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - cdm2 = CDMConnector::copy_cdm_to(db, + suppressWarnings(cdm2 <- CDMConnector::copy_cdm_to(db, cdm, schema = "main", - overwrite = TRUE) + overwrite = TRUE)) attr(cdm2, "cdm_schema") <- "main" attr(cdm2, "write_schema") <- "main" @@ -1771,7 +1735,6 @@ test_that("multiple rows per person - same observation period", { expect_no_error(surv <- estimateSingleEventSurvival(cdm2, targetCohortTable = "exposure_cohort", outcomeCohortTable = "cohort1", - returnParticipants = TRUE, minCellCount = 1 ) %>% asSurvivalResult()) @@ -1789,8 +1752,7 @@ test_that("multiple rows per person - same observation period", { expect_no_error(surv <- estimateCompetingRiskSurvival(cdm2, targetCohortTable = "exposure_cohort", outcomeCohortTable = "cohort1", - competingOutcomeCohortTable = "cohort2", - returnParticipants = TRUE + competingOutcomeCohortTable = "cohort2" ) %>% asSurvivalResult()) @@ -1962,7 +1924,7 @@ test_that("minimum survival days", { race_concept_id = c(rep(0,5)) ) - cdm <- omopgenerics::cdmFromTables( + suppressWarnings(cdm <- omopgenerics::cdmFromTables( tables = list( person = person, observation_period = observation_period @@ -1973,13 +1935,13 @@ test_that("minimum survival days", { cohort2 = other_outcome_cohort ), cdmName = "mock_es" - ) + )) db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - cdm2 = CDMConnector::copy_cdm_to(db, + suppressWarnings(cdm2 <- CDMConnector::copy_cdm_to(db, cdm, schema = "main", - overwrite = TRUE) + overwrite = TRUE)) attr(cdm2, "cdm_schema") <- "main" attr(cdm2, "write_schema") <- "main" @@ -1992,8 +1954,9 @@ test_that("minimum survival days", { ) %>% asSurvivalResult() expect_true(attr(surv, "attrition") %>% - dplyr::filter(reason == "Survival days for outcome less than 1") %>% - dplyr::select(excluded_records) == 1) + dplyr::filter(strata_level == "Survival days for outcome less than 1", + variable_name == "excluded_records") %>% + dplyr::pull(estimate_value) == 1) surv_cr <- estimateCompetingRiskSurvival(cdm2, targetCohortTable = "exposure_cohort", @@ -2004,8 +1967,9 @@ test_that("minimum survival days", { ) %>% asSurvivalResult() expect_true(attr(surv_cr, "attrition") %>% - dplyr::filter(reason == "Survival days for competing outcome less than 1") %>% - dplyr::select(excluded_records) == 1) + dplyr::filter(strata_level == "Survival days for outcome less than 1", + variable_name == "excluded_records") %>% + dplyr::pull(estimate_value) == 1) CDMConnector::cdm_disconnect(cdm2) @@ -2093,7 +2057,7 @@ test_that("outcomeWashout", { race_concept_id = c(rep(0,5)) ) - cdm <- omopgenerics::cdmFromTables( + suppressWarnings(cdm <- omopgenerics::cdmFromTables( tables = list( person = person, observation_period = observation_period @@ -2104,13 +2068,12 @@ test_that("outcomeWashout", { cohort2 = other_outcome_cohort ), cdmName = "mock_es" - ) + )) db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - cdm2 = CDMConnector::copy_cdm_to(db, - cdm, + suppressWarnings(cdm2 <- CDMConnector::copy_cdm_to(db,cdm, schema = "main", - overwrite = TRUE) + overwrite = TRUE)) attr(cdm2, "cdm_schema") <- "main" attr(cdm2, "write_schema") <- "main" @@ -2164,10 +2127,10 @@ test_that("restrictedMeanFollowUp", { tsurv <- tableSurvival(survCR, type = "tibble", .options = list(includeHeaderKey = FALSE)) tsurvrmean <- tableSurvival(survCR_rmean, type = "tibble", .options = list(includeHeaderKey = FALSE)) - expect_true(all.equal(tsurv %>% dplyr::select(-"Restricted mean survival"), - tsurvrmean %>% dplyr::select(-"Restricted mean survival"))) - expect_true(all(tsurv %>% dplyr::pull("Restricted mean survival") == c("35.00", "260.00"))) - expect_true(all(tsurvrmean %>% dplyr::pull("Restricted mean survival") == c("3.00", "28.00"))) + expect_true(all.equal(tsurv %>% dplyr::select(- dplyr::contains("Restricted mean survival")), + tsurvrmean %>% dplyr::select(- dplyr::contains("Restricted mean survival")))) + expect_true(all(tsurv %>% dplyr::pull(dplyr::contains("Restricted mean survival")) == c("35.00", "260.00"))) + expect_true(all(tsurvrmean %>% dplyr::pull(dplyr::contains("Restricted mean survival")) == c("3.00", "28.00"))) CDMConnector::cdm_disconnect(cdm) }) @@ -2194,3 +2157,101 @@ test_that("no outcomes among cohort", { )) }) + +test_that("mgus example: empty outcome tables or cohorts", { + cdm <- mockMGUS2cdm() + cdm$death_c <- cdm$death_cohort %>% + dplyr::filter(cohort_definition_id == 2) %>% + dplyr::compute(name = "death_c") + attr(cdm$death_c, "cohort_set") <- dplyr::tibble( + cohort_definition_id = 1, + cohort_name = "death_c" + ) + attr(cdm$death_c, "cohort_attrition") <- omopgenerics:::defaultCohortAttrition(cdm$death_c, attr(cdm$death_c, "cohort_set")) + attr(cdm$death_c, "tbl_name") <- "death_c" + + # Whole empty table throws warning for outcome + expect_warning(estimateSingleEventSurvival(cdm, targetCohortTable = "mgus_diagnosis", + outcomeCohortTable = "death_c")) + + # and error for target + expect_error(estimateSingleEventSurvival(cdm, targetCohortTable = "death_c", + outcomeCohortTable = "mgus_diagnosis")) + + # Some empty cohortIds are just not calculated, for both primary and competing outcomes + attr(cdm$death_cohort, "cohort_set") <- dplyr::tibble( + cohort_definition_id = c(1,3), + cohort_name = c("death_cohort", "death_test_empty") + ) + attr(cdm$death_cohort, "cohort_attrition") <- omopgenerics:::defaultCohortAttrition(cdm$death_cohort, attr(cdm$death_cohort, "cohort_set")) + attr(cdm$progression, "cohort_set") <- dplyr::tibble( + cohort_definition_id = c(1,2), + cohort_name = c("progression", "progression_fake_empty") + ) + attr(cdm$progression, "cohort_attrition") <- omopgenerics:::defaultCohortAttrition(cdm$progression, attr(cdm$progression, "cohort_set")) + + expect_warning(emptyResultBis <- estimateSingleEventSurvival(cdm, targetCohortTable = "mgus_diagnosis", + outcomeCohortTable = "death_cohort", + outcomeCohortId = c(1,3))) + expect_true(all(emptyResultBis %>% + dplyr::filter(variable_level == "death_test_empty" & variable_name == "survival_probability") %>% + dplyr::pull("estimate_value") == c(1))) + + expect_warning(emptyResultBisBis <- estimateCompetingRiskSurvival(cdm, targetCohortTable = "mgus_diagnosis", + outcomeCohortTable = "progression", + outcomeCohortId = c(1,2), + competingOutcomeCohortTable = "death_cohort", + competingOutcomeCohortId = c(1,3))) + + expect_true(emptyResultBisBis %>% + dplyr::select(variable_level) %>% + dplyr::distinct() %>% + dplyr::tally() %>% + dplyr::pull() == 3) + + PatientProfiles::mockDisconnect(cdm) + }) + +test_that("n_censor", { + skip_on_cran() + cdm <- mockMGUS2cdm() + + surv <- estimateSingleEventSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1 + ) + + eventstable <- attr(surv %>% asSurvivalResult(), "events") + + expect_true(all(eventstable %>% + dplyr::arrange(time) %>% + dplyr::filter(estimate_name == "n_censor_count") %>% + dplyr::pull(estimate_value) == c(0,3,27,79,74,54,54,58,33,18,10,6))) + + CDMConnector::cdm_disconnect(cdm) + }) + +test_that("no outcomes among cohort", { + + cdm <- mockMGUS2cdm() + cdm$death_cohort <- cdm$death_cohort |> + dplyr::filter(subject_id == 1) + cdm$mgus_diagnosis <- cdm$mgus_diagnosis |> + dplyr::filter(subject_id != 1) + + expect_no_error(surv <- estimateSingleEventSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + outcomeCohortTable = "death_cohort" + )) + + # empty death table + cdm$death_cohort <- cdm$death_cohort |> + dplyr::filter(subject_id == 2) + expect_warning(surv <- estimateSingleEventSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + outcomeCohortTable = "death_cohort" + )) + + }) diff --git a/tests/testthat/test-plotSurvival.R b/tests/testthat/test-plotSurvival.R index 505288c..105f401 100644 --- a/tests/testthat/test-plotSurvival.R +++ b/tests/testthat/test-plotSurvival.R @@ -167,3 +167,40 @@ test_that("plot colour for cumulative incidence plots", { CDMConnector::cdmDisconnect(cdm) }) + +test_that("plot risk tables", { + skip_on_cran() + cdm <- mockMGUS2cdm() + surv <- estimateSingleEventSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1, + strata=list(c("sex", "age_group")) + ) + plot <- plotSurvival(surv, + facet = "strata_name", + colour = "strata_level", + riskTable = TRUE) + + expect_true(ggplot2::is.ggplot(plot)) + CDMConnector::cdmDisconnect(cdm) +}) + +test_that("plot options", { + skip_on_cran() + cdm <- mockMGUS2cdm() + surv <- estimateSingleEventSurvival(cdm, + targetCohortTable = "mgus_diagnosis", + targetCohortId = 1, + outcomeCohortTable = "death_cohort", + outcomeCohortId = 1) + plot <- plotSurvival(surv, + xlim = 100, + ylim = c(0.25, 1), + riskTable = TRUE, + riskInterval = 10) + + expect_true(ggplot2::is.ggplot(plot)) + CDMConnector::cdmDisconnect(cdm) +}) diff --git a/tests/testthat/test-reexports-omopgenerics.R b/tests/testthat/test-reexports-omopgenerics.R new file mode 100644 index 0000000..0c4020d --- /dev/null +++ b/tests/testthat/test-reexports-omopgenerics.R @@ -0,0 +1,38 @@ + +test_that("omopgenerics reexports work", { + cdm <- mockMGUS2cdm() + surv <- estimateSingleEventSurvival(cdm, + "mgus_diagnosis", + "death_cohort") + + survCR <- estimateCompetingRiskSurvival(cdm, + "mgus_diagnosis", + "progression", + "death_cohort") + + # importing and exporting + result_path <- tempdir("result") + omopgenerics::exportSummarisedResult(surv, path = result_path) + surv_imported <- omopgenerics::importSummarisedResult(result_path) + expect_no_error(tableSurvival(surv_imported, type = "tibble")) + expect_no_error(dplyr::is.tbl(omopgenerics::settings(surv_imported))) + + # result type using bind + expect_no_error(omopgenerics::validateResultArgument(surv_imported)) + expect_no_error(omopgenerics::validateResultArgument(omopgenerics::bind(surv, survCR))) + + # suppresing results + surv_nosup <- estimateSingleEventSurvival(cdm, + "mgus_diagnosis", + "death_cohort", + minCellCount = 0) + + expect_false(isTRUE(all.equal(surv, surv_nosup, check.attributes = FALSE))) + + omopgenerics::exportSummarisedResult(surv_nosup, path = result_path) + surv_nosup_imported <- omopgenerics::importSummarisedResult(result_path) + expect_true(isTRUE(all.equal(surv, surv_nosup_imported, check.attributes = FALSE))) + + CDMConnector::cdmDisconnect(cdm) + +}) diff --git a/tests/testthat/test-tableSurvival.R b/tests/testthat/test-tableSurvival.R index 7b8b01d..8a12ce0 100644 --- a/tests/testthat/test-tableSurvival.R +++ b/tests/testthat/test-tableSurvival.R @@ -11,14 +11,16 @@ test_that("survival summary", { ) res <- tableSurvival(surv, times = c(100,200), type = "tibble") expect_true(res %>% - dplyr::tally() == 1) - expect_true(all( - colnames(res) == - c('CDM name', 'Cohort', 'Outcome name', '[header_level]Number records', - '[header_level]Number events', '[header_level]Median survival (95% CI)', - '[header_level]Restricted mean survival (SE)', - '[header_level]100 days survival estimate', - '[header_level]200 days survival estimate'))) + dplyr::tally() == 1) + expect_true(all( + colnames(res) == + c('CDM name', 'Target cohort', 'Outcome name', + '[header_name]Estimate name\n[header_level]Number records', + '[header_name]Estimate name\n[header_level]Number events', + '[header_name]Estimate name\n[header_level]Median survival (95% CI)', + '[header_name]Estimate name\n[header_level]Restricted mean survival (SE)', + '[header_name]Estimate name\n[header_level]100 days survival estimate', + '[header_name]Estimate name\n[header_level]200 days survival estimate'))) survCR <- estimateCompetingRiskSurvival(cdm, targetCohortTable = "mgus_diagnosis", @@ -31,21 +33,22 @@ test_that("survival summary", { gt1 <- tableSurvival(survCR, times = c(100,200)) expect_true(gt1$`_data` %>% dplyr::tally() == 2) - expect_true(all( - colnames(gt1$`_data`) == - c('CDM name', 'Cohort', 'Outcome type', 'Outcome name', - '[header_level]Number records', '[header_level]Number events', - '[header_level]Restricted mean survival', - '[header_level]100 days survival estimate', - '[header_level]200 days survival estimate'))) + expect_true(all( + colnames(gt1$`_data`) == + c('CDM name', 'Target cohort', 'Outcome type', 'Outcome name', + '[header_name]Estimate name\n[header_level]Number records', + '[header_name]Estimate name\n[header_level]Number events', + '[header_name]Estimate name\n[header_level]Restricted mean survival', + '[header_name]Estimate name\n[header_level]100 days survival estimate', + '[header_name]Estimate name\n[header_level]200 days survival estimate'))) fx1 <- tableSurvival(survCR, type = "flextable") expect_true(fx1$body$dataset %>% dplyr::tally() == 2) - expect_true(all( - colnames(fx1$body$dataset ) == - c('CDM name', 'Cohort', 'Outcome type', 'Outcome name', - 'Number records', 'Number events', - 'Restricted mean survival'))) + expect_true(all( + colnames(fx1$body$dataset ) == + c('CDM name', 'Target cohort', 'Outcome type', 'Outcome name', + 'Estimate name\nNumber records', 'Estimate name\nNumber events', + 'Estimate name\nRestricted mean survival'))) survsex <- estimateSingleEventSurvival(cdm, targetCohortTable = "mgus_diagnosis", @@ -57,22 +60,25 @@ test_that("survival summary", { ) gt2 <- tableSurvival(survsex) - expect_true(all( - colnames(gt2$`_data`) == - c('CDM name', 'Cohort', 'Sex', 'Outcome name', - '[header_level]Number records', '[header_level]Number events', - '[header_level]Median survival (95% CI)', - '[header_level]Restricted mean survival (SE)'))) + expect_true(all( + colnames(gt2$`_data`) == + c('CDM name', 'Target cohort', 'Sex', 'Outcome name', + '[header_name]Estimate name\n[header_level]Number records', + '[header_name]Estimate name\n[header_level]Number events', + '[header_name]Estimate name\n[header_level]Median survival (95% CI)', + '[header_name]Estimate name\n[header_level]Restricted mean survival (SE)'))) gt3 <- tableSurvival(survsex, header = c("cdm_name", "group"), splitStrata = FALSE) - expect_true(all( + expect_true(all( colnames(gt3$`_data`) == - c('Strata name', 'Strata level', 'Outcome name', 'Estimate name', - '[header]CDM name\n[header_level]mock\n[header]Cohort\n[header_level]Mgus diagnosis'))) + c('Sex', 'Outcome name', 'Estimate name', + '[header_name]CDM name\n[header_level]mock\n[header_name]Target cohort\n[header_level]mgus_diagnosis'))) # In years - expect_true(all(tableSurvival(surv, times = c(365,420), type = "tibble") == - tableSurvival(surv, times = c(1,1.15), timeScale = "years", type = "tibble"))) + expect_true(all(tableSurvival(surv, times = c(365,420), type = "tibble") %>% + dplyr::select(-dplyr::contains("mean"), -dplyr::contains("median")) == + tableSurvival(surv, times = c(1,1.15), timeScale = "years", type = "tibble") %>% + dplyr::select(-dplyr::contains("mean"), -dplyr::contains("median")) )) CDMConnector::cdmDisconnect(cdm) diff --git a/vignettes/a01_Single_event_of_interest.Rmd b/vignettes/a01_Single_event_of_interest.Rmd index 60c285b..2b7164f 100644 --- a/vignettes/a01_Single_event_of_interest.Rmd +++ b/vignettes/a01_Single_event_of_interest.Rmd @@ -2,7 +2,7 @@ title: "Single outcome event of interest" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{a01_Single_event_of_interest} + %\VignetteIndexEntry{Single outcome event of interest} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -96,17 +96,6 @@ And we also now have summary statistics for each of the strata as well as overal tableSurvival(MGUS_death) ``` -## Summarising participants -If we set returnParticipants as TRUE then we will also be able to access the individuals that contributed to the analysis. -```{r} -MGUS_death <- estimateSingleEventSurvival(cdm, - targetCohortTable = "mgus_diagnosis", - outcomeCohortTable = "death_cohort", - returnParticipants = TRUE -) -survivalParticipants(MGUS_death) -``` - ## Disconnect from the cdm database connection ```{r} cdm_disconnect(cdm) diff --git a/vignettes/a02_Competing_risk_survival.Rmd b/vignettes/a02_Competing_risk_survival.Rmd index 1c158e7..a18afaa 100644 --- a/vignettes/a02_Competing_risk_survival.Rmd +++ b/vignettes/a02_Competing_risk_survival.Rmd @@ -2,7 +2,7 @@ title: "Competing risk survival" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{a02_Competing_risk_survival} + %\VignetteIndexEntry{Competing risk survival} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -109,21 +109,6 @@ And we also now have summary statistics for each of the strata as well as overal tableSurvival(MGUS_death_prog) ``` -## Summarising participants - -If we set returnParticipants as TRUE then we will also be able to access the individuals that contributed to the analysis. - -```{r} -MGUS_death_prog <- estimateCompetingRiskSurvival(cdm, - targetCohortTable = "mgus_diagnosis", - outcomeCohortTable = "progression", - competingOutcomeCohortTable = "death_cohort", - returnParticipants = TRUE -) -survivalParticipants(MGUS_death_prog) -``` - - ## Disconnect from the cdm database connection ```{r} diff --git a/vignettes/a03_Further_survival_analyses.Rmd b/vignettes/a03_Further_survival_analyses.Rmd index c035e8c..881367c 100644 --- a/vignettes/a03_Further_survival_analyses.Rmd +++ b/vignettes/a03_Further_survival_analyses.Rmd @@ -2,7 +2,7 @@ title: "Further survival analyses" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{a03_Further_survival_analyses} + %\VignetteIndexEntry{Further survival analyses} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} ---