+#> 1 Overall Overall 133 (4.34) 98 (92 to 103)
+#> 2 age_group <70 197 (8.35) 180 (158 to 206)
+#> 3 age_group >=70 86 (2.89) 71 (66 to 77)
+#> 4 sex F 143 (6.4) 108 (100 to 121)
+#> 5 sex M 125 (5.7) 88 (79 to 97)
+#> 6 age_group and sex <70 and F 220 (12.97) 215 (179 to 260)
+#> 7 age_group and sex <70 and M 183 (10.24) 158 (139 to 189)
+#> 8 age_group and sex >=70 and F 96 (4.39) 82 (75 to 94)
+#> 9 age_group and sex >=70 and M 80 (4.82) 61 (54 to 70)
+#> # ℹ abbreviated names: ¹​`Restricted mean survival (se)`,
+#> # ²​`Median survival (95% CI)`
+```
+
+### Disconnect from the cdm database connection
+
+``` r
+cdm_disconnect(cdm)
+```
diff --git a/_pkgdown.yml b/_pkgdown.yml
new file mode 100644
index 0000000..8e4f7f7
--- /dev/null
+++ b/_pkgdown.yml
@@ -0,0 +1,8 @@
+url: https://darwin-eu.github.io/CohortSurvival/
+template:
+ bootstrap: 5
+ bootswatch: flatly
+navbar:
+ structure:
+ right: [github]
+
diff --git a/man/CohortSurvival-package.Rd b/man/CohortSurvival-package.Rd
new file mode 100644
index 0000000..4d24470
--- /dev/null
+++ b/man/CohortSurvival-package.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CohortSurvival-package.R
+\docType{package}
+\name{CohortSurvival-package}
+\alias{CohortSurvival}
+\alias{CohortSurvival-package}
+\title{CohortSurvival: What the Package Does (One Line, Title Case)}
+\description{
+What the package does (one paragraph).
+}
+\seealso{
+Useful links:
+\itemize{
+ \item \url{https://darwin-eu.github.io/CohortSurvival/}
+}
+
+}
+\author{
+\strong{Maintainer}: First Last \email{first.last@example.com} (\href{https://orcid.org/YOUR-ORCID-ID}{ORCID})
+
+}
+\keyword{internal}
diff --git a/man/addCohortSurvival.Rd b/man/addCohortSurvival.Rd
new file mode 100644
index 0000000..4f9c431
--- /dev/null
+++ b/man/addCohortSurvival.Rd
@@ -0,0 +1,67 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/addCohortSurvival.R
+\name{addCohortSurvival}
+\alias{addCohortSurvival}
+\title{Add survival information to a cohort table}
+\usage{
+addCohortSurvival(
+ x,
+ cdm,
+ outcomeCohortTable,
+ outcomeCohortId = 1,
+ outcomeDateVariable = "cohort_start_date",
+ censorOnCohortExit = FALSE,
+ censorOnDate = NULL,
+ followUpDays = Inf
+)
+}
+\arguments{
+\item{x}{cohort table to add survival information}
+
+\item{cdm}{CDM reference}
+
+\item{outcomeCohortTable}{The outcome cohort table of interest.}
+
+\item{outcomeCohortId}{ID of event cohorts to include. Only one outcome
+(and so one ID) can be considered.}
+
+\item{outcomeDateVariable}{Variable containing date of outcome event}
+
+\item{censorOnCohortExit}{If TRUE, an individual's follow up will be
+censored at their cohort exit}
+
+\item{censorOnDate}{if not NULL, an individual's follow up will be censored
+at the given date}
+
+\item{followUpDays}{Number of days to follow up individuals (lower bound 1,
+upper bound Inf)}
+}
+\value{
+Two additional columns will be added to x. The "time" column will
+contain number of days to censoring. The "status" column will indicate
+whether the patient had the event (value: 1), or did not have the event
+(value: 0)
+}
+\description{
+Add survival information to a cohort table
+}
+\examples{
+cohort <- dplyr::tibble(
+cohort_definition_id = c(1,1,1),
+subject_id = c(1,2,3),
+cohort_start_date = c(as.Date("2020-01-01"),
+ as.Date("2020-01-02"),
+ as.Date("2020-01-01")),
+cohort_end_date = c(as.Date("2020-04-01"),
+ as.Date("2020-08-02"),
+ as.Date("2020-03-01"))
+)
+cdm <- PatientProfiles::mockPatientProfiles(cohort1 = cohort)
+cdm$cohort1 <- cdm$cohort1 \%>\%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort2",
+ outcomeCohortId = 1
+ )
+
+}
diff --git a/man/benchmarkCohortSurvival.Rd b/man/benchmarkCohortSurvival.Rd
new file mode 100644
index 0000000..169993d
--- /dev/null
+++ b/man/benchmarkCohortSurvival.Rd
@@ -0,0 +1,79 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/benchmarkCohortSurvival.R
+\name{benchmarkCohortSurvival}
+\alias{benchmarkCohortSurvival}
+\title{Estimate performance of estimateSurvival function for benchmarking}
+\usage{
+benchmarkCohortSurvival(
+ cdm,
+ targetSize,
+ outcomeSize,
+ outcomeDateVariable = "cohort_start_date",
+ competingOutcomeSize = NULL,
+ competingOutcomeDateVariable = "cohort_start_date",
+ censorOnCohortExit = FALSE,
+ censorOnDate = NULL,
+ followUpDays = Inf,
+ strata = NULL,
+ timeGap = c(1, 7, 30, 365),
+ times = NULL,
+ minCellCount = 5,
+ returnParticipants = FALSE
+)
+}
+\arguments{
+\item{cdm}{CDM reference}
+
+\item{targetSize}{number of people in the target cohort table}
+
+\item{outcomeSize}{number of people in the outcome cohort table}
+
+\item{outcomeDateVariable}{Variable containing date of outcome event}
+
+\item{competingOutcomeSize}{number of people in the competing outcome cohort table}
+
+\item{competingOutcomeDateVariable}{Variable containing date of
+competing event}
+
+\item{censorOnCohortExit}{If TRUE, an individual's follow up will be
+censored at their cohort exit}
+
+\item{censorOnDate}{if not NULL, an individual's follow up will be censored
+at the given date}
+
+\item{followUpDays}{Number of days to follow up individuals (lower bound 1,
+upper bound Inf)}
+
+\item{strata}{strata}
+
+\item{timeGap}{Days between time points for which to report survival
+estimates. First day will be day zero with risk estimates provided
+for times up to the end of follow-up, with a gap in days equivalent
+to timeGap.}
+
+\item{times}{vector of time points at which to give survival estimates,
+if NULL estimates at all times are calculated}
+
+\item{minCellCount}{The minimum number of events to reported, below which
+results will be obscured. If 0, all results will be reported.}
+
+\item{returnParticipants}{Either TRUE or FALSE. If TRUE, references to
+participants from the analysis will be returned allowing for further
+analysis.}
+}
+\value{
+tibble with performance of estimateSurvival function information,
+according to the selected input parameters
+}
+\description{
+Estimate performance of estimateSurvival function for benchmarking
+}
+\examples{
+cdm <- mockMGUS2cdm()
+cdm$condition_occurrence <- cdm$death_cohort \%>\%
+dplyr::rename("condition_start_date" = "cohort_start_date",
+ "condition_end_date" = "cohort_end_date")
+surv_timings <- benchmarkCohortSurvival(
+cdm, targetSize = 100, outcomeSize = 20)
+
+}
diff --git a/man/estimateSingleEventSurvival.Rd b/man/estimateSingleEventSurvival.Rd
new file mode 100644
index 0000000..17ecd13
--- /dev/null
+++ b/man/estimateSingleEventSurvival.Rd
@@ -0,0 +1,83 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/estimateSurvival.R
+\name{estimateSingleEventSurvival}
+\alias{estimateSingleEventSurvival}
+\title{Estimate survival for a given event of interest using cohorts in the OMOP Common Data Model}
+\usage{
+estimateSingleEventSurvival(
+ cdm,
+ targetCohortTable,
+ targetCohortId = 1,
+ outcomeCohortTable,
+ outcomeCohortId = 1,
+ outcomeDateVariable = "cohort_start_date",
+ censorOnCohortExit = FALSE,
+ censorOnDate = NULL,
+ followUpDays = Inf,
+ strata = NULL,
+ timeGap = c(1, 7, 30, 365),
+ times = NULL,
+ minCellCount = 5,
+ returnParticipants = FALSE
+)
+}
+\arguments{
+\item{cdm}{CDM reference}
+
+\item{targetCohortTable}{targetCohortTable}
+
+\item{targetCohortId}{targetCohortId}
+
+\item{outcomeCohortTable}{The outcome cohort table of interest.}
+
+\item{outcomeCohortId}{ID of event cohorts to include. Only one outcome
+(and so one ID) can be considered.}
+
+\item{outcomeDateVariable}{Variable containing date of outcome event}
+
+\item{censorOnCohortExit}{If TRUE, an individual's follow up will be
+censored at their cohort exit}
+
+\item{censorOnDate}{if not NULL, an individual's follow up will be censored
+at the given date}
+
+\item{followUpDays}{Number of days to follow up individuals (lower bound 1,
+upper bound Inf)}
+
+\item{strata}{strata}
+
+\item{timeGap}{Days between time points for which to report survival
+estimates. First day will be day zero with risk estimates provided
+for times up to the end of follow-up, with a gap in days equivalent
+to timeGap.}
+
+\item{times}{vector of time points at which to give survival estimates,
+if NULL estimates at all times are calculated}
+
+\item{minCellCount}{The minimum number of events to reported, below which
+results will be obscured. If 0, all results will be reported.}
+
+\item{returnParticipants}{Either TRUE or FALSE. If TRUE, references to
+participants from the analysis will be returned allowing for further
+analysis.}
+}
+\value{
+tibble with survival information for desired cohort, including:
+time, people at risk, survival probability, cumulative incidence,
+95 CIs, strata and outcome. A tibble with the number of events is
+outputted as an attribute of the output
+}
+\description{
+Estimate survival for a given event of interest using cohorts in the OMOP Common Data Model
+}
+\examples{
+cdm <- mockMGUS2cdm()
+surv <- estimateSingleEventSurvival(cdm,
+ targetCohortTable = "mgus_diagnosis",
+ targetCohortId = 1,
+ outcomeCohortTable = "death_cohort",
+ outcomeCohortId = 1,
+ timeGap = 7
+)
+
+}
diff --git a/man/figures/README-pressure-1.png b/man/figures/README-pressure-1.png
new file mode 100644
index 0000000..c092055
Binary files /dev/null and b/man/figures/README-pressure-1.png differ
diff --git a/man/figures/README-unnamed-chunk-10-1.png b/man/figures/README-unnamed-chunk-10-1.png
new file mode 100644
index 0000000..861d862
Binary files /dev/null and b/man/figures/README-unnamed-chunk-10-1.png differ
diff --git a/man/figures/README-unnamed-chunk-5-1.png b/man/figures/README-unnamed-chunk-5-1.png
new file mode 100644
index 0000000..849034a
Binary files /dev/null and b/man/figures/README-unnamed-chunk-5-1.png differ
diff --git a/man/figures/README-unnamed-chunk-8-1.png b/man/figures/README-unnamed-chunk-8-1.png
new file mode 100644
index 0000000..07076c0
Binary files /dev/null and b/man/figures/README-unnamed-chunk-8-1.png differ
diff --git a/man/figures/README-unnamed-chunk-9-1.png b/man/figures/README-unnamed-chunk-9-1.png
new file mode 100644
index 0000000..861d862
Binary files /dev/null and b/man/figures/README-unnamed-chunk-9-1.png differ
diff --git a/man/figures/hexsticker.png b/man/figures/hexsticker.png
new file mode 100644
index 0000000..5d56b6b
Binary files /dev/null and b/man/figures/hexsticker.png differ
diff --git a/man/generateDeathCohortSet.Rd b/man/generateDeathCohortSet.Rd
new file mode 100644
index 0000000..436e8ce
--- /dev/null
+++ b/man/generateDeathCohortSet.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/generateDeathCohort.R
+\name{generateDeathCohortSet}
+\alias{generateDeathCohortSet}
+\title{To create a death cohort}
+\usage{
+generateDeathCohortSet(
+ cdm,
+ deathInObservation = FALSE,
+ name = "death_cohort",
+ cohortTable = NULL,
+ cohortId = NULL
+)
+}
+\arguments{
+\item{cdm}{CDM reference}
+
+\item{deathInObservation}{If TRUE, restricts deaths included to only those
+observed during an ongoing observation period.}
+
+\item{name}{name for the created death cohort table}
+
+\item{cohortTable}{name of the cohort table to create a death cohort for}
+
+\item{cohortId}{name of the cohort table to create a death cohort for}
+}
+\value{
+A cohort table with a death cohort in cdm
+}
+\description{
+To create a death cohort
+}
diff --git a/man/mockMGUS2cdm.Rd b/man/mockMGUS2cdm.Rd
new file mode 100644
index 0000000..21342a3
--- /dev/null
+++ b/man/mockMGUS2cdm.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mockMGUS2cdm.R
+\name{mockMGUS2cdm}
+\alias{mockMGUS2cdm}
+\title{Create mock CDM reference with survival::mgus2 dataset}
+\usage{
+mockMGUS2cdm()
+}
+\value{
+CDM reference containing data from the survival::mgus2 dataset
+}
+\description{
+Create mock CDM reference with survival::mgus2 dataset
+}
diff --git a/man/pipe.Rd b/man/pipe.Rd
new file mode 100644
index 0000000..a648c29
--- /dev/null
+++ b/man/pipe.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils-pipe.R
+\name{\%>\%}
+\alias{\%>\%}
+\title{Pipe operator}
+\usage{
+lhs \%>\% rhs
+}
+\arguments{
+\item{lhs}{A value or the magrittr placeholder.}
+
+\item{rhs}{A function call using the magrittr semantics.}
+}
+\value{
+The result of calling \code{rhs(lhs)}.
+}
+\description{
+See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
+}
+\keyword{internal}
diff --git a/man/plotCumulativeIncidence.Rd b/man/plotCumulativeIncidence.Rd
new file mode 100644
index 0000000..9fd3c27
--- /dev/null
+++ b/man/plotCumulativeIncidence.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plotSurvival.R
+\name{plotCumulativeIncidence}
+\alias{plotCumulativeIncidence}
+\title{Plot cumulative incidence}
+\usage{
+plotCumulativeIncidence(
+ result,
+ x = "time",
+ ylim = c(0, NA),
+ ribbon = TRUE,
+ facet = NULL,
+ colour = NULL,
+ colour_name = NULL
+)
+}
+\arguments{
+\item{result}{Survival results}
+
+\item{x}{Variable to plot on x axis}
+
+\item{ylim}{Limits for the Y axis}
+
+\item{ribbon}{If TRUE, the plot will join points using a ribbon}
+
+\item{facet}{Variables to use for facets}
+
+\item{colour}{Variables to use for colours}
+
+\item{colour_name}{Colour legend name}
+}
+\description{
+Plot cumulative incidence
+}
diff --git a/man/plotSurvival.Rd b/man/plotSurvival.Rd
new file mode 100644
index 0000000..f2eac97
--- /dev/null
+++ b/man/plotSurvival.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plotSurvival.R
+\name{plotSurvival}
+\alias{plotSurvival}
+\title{Plot survival results}
+\usage{
+plotSurvival(
+ result,
+ x = "time",
+ ylim = c(0, NA),
+ ribbon = TRUE,
+ facet = NULL,
+ colour = NULL,
+ colour_name = NULL
+)
+}
+\arguments{
+\item{result}{Survival results}
+
+\item{x}{Variable to plot on x axis}
+
+\item{ylim}{Limits for the Y axis}
+
+\item{ribbon}{If TRUE, the plot will join points using a ribbon}
+
+\item{facet}{Variables to use for facets}
+
+\item{colour}{Variables to use for colours}
+
+\item{colour_name}{Colour legend name}
+}
+\value{
+A ggplot with the survival results plotted
+}
+\description{
+Plot survival results
+}
diff --git a/man/reexports.Rd b/man/reexports.Rd
new file mode 100644
index 0000000..2042559
--- /dev/null
+++ b/man/reexports.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/summariseSurvivalParticipants.R
+\docType{import}
+\name{reexports}
+\alias{reexports}
+\alias{summariseCharacteristics}
+\title{Objects exported from other packages}
+\keyword{internal}
+\description{
+These objects are imported from other packages. Follow the links
+below to see their documentation.
+
+\describe{
+ \item{PatientProfiles}{\code{\link[PatientProfiles]{summariseCharacteristics}}}
+}}
+
diff --git a/man/survivalParticipants.Rd b/man/survivalParticipants.Rd
new file mode 100644
index 0000000..42595bd
--- /dev/null
+++ b/man/survivalParticipants.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{survivalParticipants}
+\alias{survivalParticipants}
+\title{Participants contributing to a survival analysis}
+\usage{
+survivalParticipants(result)
+}
+\arguments{
+\item{result}{Result object}
+}
+\value{
+References to the study participants contributing to
+a given analysis
+}
+\description{
+Participants contributing to a survival analysis
+}
diff --git a/man/tidyeval.Rd b/man/tidyeval.Rd
new file mode 100644
index 0000000..f773abf
--- /dev/null
+++ b/man/tidyeval.Rd
@@ -0,0 +1,98 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils-tidy-eval.R
+\name{tidyeval}
+\alias{tidyeval}
+\alias{enquo}
+\alias{enquos}
+\alias{.data}
+\alias{:=}
+\alias{as_name}
+\alias{as_label}
+\title{Tidy eval helpers}
+\description{
+This page lists the tidy eval tools reexported in this package from
+rlang. To learn about using tidy eval in scripts and packages at a
+high level, see the \href{https://dplyr.tidyverse.org/articles/programming.html}{dplyr programming vignette}
+and the \href{https://ggplot2.tidyverse.org/articles/ggplot2-in-packages.html}{ggplot2 in packages vignette}.
+The \href{https://adv-r.hadley.nz/metaprogramming.html}{Metaprogramming section} of \href{https://adv-r.hadley.nz}{Advanced R} may also be useful for a deeper dive.
+\itemize{
+\item The tidy eval operators \verb{\{\{}, \verb{!!}, and \verb{!!!} are syntactic
+constructs which are specially interpreted by tidy eval functions.
+You will mostly need \verb{\{\{}, as \verb{!!} and \verb{!!!} are more advanced
+operators which you should not have to use in simple cases.
+
+The curly-curly operator \verb{\{\{} allows you to tunnel data-variables
+passed from function arguments inside other tidy eval functions.
+\verb{\{\{} is designed for individual arguments. To pass multiple
+arguments contained in dots, use \code{...} in the normal way.
+
+\if{html}{\out{}}\preformatted{my_function <- function(data, var, ...) \{
+ data \%>\%
+ group_by(...) \%>\%
+ summarise(mean = mean(\{\{ var \}\}))
+\}
+}\if{html}{\out{
}}
+\item \code{\link[=enquo]{enquo()}} and \code{\link[=enquos]{enquos()}} delay the execution of one or several
+function arguments. The former returns a single expression, the
+latter returns a list of expressions. Once defused, expressions
+will no longer evaluate on their own. They must be injected back
+into an evaluation context with \verb{!!} (for a single expression) and
+\verb{!!!} (for a list of expressions).
+
+\if{html}{\out{}}\preformatted{my_function <- function(data, var, ...) \{
+ # Defuse
+ var <- enquo(var)
+ dots <- enquos(...)
+
+ # Inject
+ data \%>\%
+ group_by(!!!dots) \%>\%
+ summarise(mean = mean(!!var))
+\}
+}\if{html}{\out{
}}
+
+In this simple case, the code is equivalent to the usage of \verb{\{\{}
+and \code{...} above. Defusing with \code{enquo()} or \code{enquos()} is only
+needed in more complex cases, for instance if you need to inspect
+or modify the expressions in some way.
+\item The \code{.data} pronoun is an object that represents the current
+slice of data. If you have a variable name in a string, use the
+\code{.data} pronoun to subset that variable with \code{[[}.
+
+\if{html}{\out{}}\preformatted{my_var <- "disp"
+mtcars \%>\% summarise(mean = mean(.data[[my_var]]))
+}\if{html}{\out{
}}
+\item Another tidy eval operator is \verb{:=}. It makes it possible to use
+glue and curly-curly syntax on the LHS of \code{=}. For technical
+reasons, the R language doesn't support complex expressions on
+the left of \code{=}, so we use \verb{:=} as a workaround.
+
+\if{html}{\out{}}\preformatted{my_function <- function(data, var, suffix = "foo") \{
+ # Use `\{\{` to tunnel function arguments and the usual glue
+ # operator `\{` to interpolate plain strings.
+ data \%>\%
+ summarise("\{\{ var \}\}_mean_\{suffix\}" := mean(\{\{ var \}\}))
+\}
+}\if{html}{\out{
}}
+\item Many tidy eval functions like \code{dplyr::mutate()} or
+\code{dplyr::summarise()} give an automatic name to unnamed inputs. If
+you need to create the same sort of automatic names by yourself,
+use \code{as_label()}. For instance, the glue-tunnelling syntax above
+can be reproduced manually with:
+
+\if{html}{\out{}}\preformatted{my_function <- function(data, var, suffix = "foo") \{
+ var <- enquo(var)
+ prefix <- as_label(var)
+ data \%>\%
+ summarise("\{prefix\}_mean_\{suffix\}" := mean(!!var))
+\}
+}\if{html}{\out{
}}
+
+Expressions defused with \code{enquo()} (or tunnelled with \verb{\{\{}) need
+not be simple column names, they can be arbitrarily complex.
+\code{as_label()} handles those cases gracefully. If your code assumes
+a simple column name, use \code{as_name()} instead. This is safer
+because it throws an error if the input is not a name as expected.
+}
+}
+\keyword{internal}
diff --git a/tests/testthat.R b/tests/testthat.R
new file mode 100644
index 0000000..ebaddb7
--- /dev/null
+++ b/tests/testthat.R
@@ -0,0 +1,12 @@
+# This file is part of the standard setup for testthat.
+# It is recommended that you do not modify it.
+#
+# Where should you do additional test configuration?
+# Learn more about the roles of various files in:
+# * https://r-pkgs.org/tests.html
+# * https://testthat.r-lib.org/reference/test_package.html#special-files
+
+library(testthat)
+library(CohortSurvival)
+
+test_check("CohortSurvival")
diff --git a/tests/testthat/test-addCohortSurvival.R b/tests/testthat/test-addCohortSurvival.R
new file mode 100644
index 0000000..432a033
--- /dev/null
+++ b/tests/testthat/test-addCohortSurvival.R
@@ -0,0 +1,598 @@
+test_that("working example", {
+ cohort <- dplyr::tibble(
+ cohort_definition_id = c(1,1,1),
+ subject_id = c(1,2,3),
+ cohort_start_date = c(as.Date("2020-01-01"),
+ as.Date("2020-01-02"),
+ as.Date("2020-01-01")),
+ cohort_end_date = c(as.Date("2020-04-01"),
+ as.Date("2020-08-02"),
+ as.Date("2020-03-01"))
+ )
+ observation_period <- dplyr::tibble(
+ observation_period_id = c(1,1,1),
+ person_id = c(1,2,3),
+ observation_period_start_date = c(as.Date("2000-01-01"),
+ as.Date("2000-01-02"),
+ as.Date("2000-01-01")),
+ observation_period_end_date = c(as.Date("2023-04-01"),
+ as.Date("2023-05-02"),
+ as.Date("2023-03-01"))
+ )
+ cdm <- PatientProfiles::mockPatientProfiles(
+ cohort1 = cohort,
+ observation_period = observation_period
+ )
+ cdm$cohort1 <- cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort2",
+ outcomeCohortId = 1
+ )
+ expect_true(all(colnames(cdm$cohort1) %in%
+ c(
+ "cohort_definition_id", "subject_id",
+ "cohort_start_date", "cohort_end_date",
+ "days_to_exit", "status", "time"
+ )))
+
+ DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
+})
+
+test_that("another working example", {
+ if (Sys.getenv("EUNOMIA_DATA_FOLDER") == "") {
+ Sys.setenv("EUNOMIA_DATA_FOLDER" = tempdir())
+ }
+ skip_if_not(CDMConnector::eunomia_is_available())
+
+ con <- DBI::dbConnect(duckdb::duckdb(), dbdir = CDMConnector::eunomia_dir())
+ cdm <- CDMConnector::cdm_from_con(con, cdm_schema = "main", write_schema = "main")
+
+ celecoxibCodes <- CodelistGenerator::getDescendants(cdm, conceptId = 1118084)
+ cdm$celecoxib <- cdm$drug_era %>%
+ dplyr::inner_join(
+ celecoxibCodes %>%
+ dplyr::select(concept_id),
+ by = c("drug_concept_id" = "concept_id"),
+ copy = TRUE
+ ) %>%
+ PatientProfiles::addAge(cdm, indexDate = "drug_era_start_date") %>%
+ dplyr::rename(
+ "subject_id" = "person_id",
+ "cohort_start_date" = "drug_era_start_date",
+ "cohort_end_date" = "drug_era_end_date"
+ ) %>%
+ dplyr::mutate(cohort_definition_id = 1L) %>%
+ dplyr::select(
+ "cohort_definition_id", "subject_id",
+ "cohort_start_date", "cohort_end_date"
+ ) %>%
+ CDMConnector::computeQuery(name = "celecoxib", temporary = FALSE, schema = attr(cdm, "write_schema"))
+ celecoxib_set <- cdm$celecoxib %>%
+ dplyr::select("cohort_definition_id") %>%
+ dplyr::mutate(cohort_name = "celecoxib")
+ celecoxib_count <- cdm$celecoxib %>%
+ dplyr::group_by(cohort_definition_id) %>%
+ dplyr::tally(name = "n_records")
+ celecoxib_count <- cdm$celecoxib %>%
+ dplyr::group_by(.data$cohort_definition_id) %>%
+ dplyr::summarise(
+ number_records = dplyr::n(),
+ number_subjects = dplyr::n_distinct(.data$subject_id),
+ .groups = "drop"
+ ) %>%
+ dplyr::collect()
+
+ celecoxib_set <- cdm$celecoxib %>%
+ dplyr::select("cohort_definition_id") %>%
+ dplyr::distinct() %>%
+ dplyr::mutate(cohort_name = "celecoxib") %>%
+ dplyr::collect()
+
+ cdm$celecoxib <- CDMConnector::newGeneratedCohortSet(cohortRef = cdm$celecoxib,
+ cohortSetRef = celecoxib_set,
+ cohortCountRef = celecoxib_count)
+
+
+ GiBleedCodes <- CodelistGenerator::getDescendants(cdm, conceptId = 192671)
+ cdm$gi_bleed <- cdm$condition_occurrence %>%
+ dplyr::inner_join(
+ GiBleedCodes %>%
+ dplyr::select(concept_id),
+ by = c("condition_concept_id" = "concept_id"),
+ copy = TRUE
+ ) %>%
+ dplyr::rename(
+ "subject_id" = "person_id",
+ "cohort_start_date" = "condition_start_date"
+ ) %>%
+ dplyr::mutate(cohort_end_date = cohort_start_date) %>%
+ dplyr::mutate(cohort_definition_id = 1L) %>%
+ dplyr::select(
+ "cohort_definition_id", "subject_id",
+ "cohort_start_date", "cohort_end_date"
+ ) %>%
+ dplyr::compute()
+ gi_bleed_count <- cdm$gi_bleed %>%
+ dplyr::group_by(.data$cohort_definition_id) %>%
+ dplyr::summarise(
+ number_records = dplyr::n(),
+ number_subjects = dplyr::n_distinct(.data$subject_id),
+ .groups = "drop"
+ ) %>%
+ dplyr::collect()
+ gi_bleed_set <- cdm$gi_bleed %>%
+ dplyr::select("cohort_definition_id") %>%
+ dplyr::distinct() %>%
+ dplyr::mutate(cohort_name = "gi_bleed") %>%
+ dplyr::collect()
+
+ cdm$gi_bleed <- CDMConnector::newGeneratedCohortSet(cohortRef = cdm$gi_bleed,
+ cohortSetRef = gi_bleed_set,
+ cohortCountRef = gi_bleed_count)
+
+ cdm$celecoxib <- cdm$celecoxib %>%
+ PatientProfiles::addAge(cdm = cdm) %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "gi_bleed"
+ )
+
+ expect_true(all(c("time", "status") %in% colnames(cdm$celecoxib)))
+
+ DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
+})
+
+test_that("censorOnCohortExit", {
+ cohort <- dplyr::tibble(
+ cohort_definition_id = c(1,1,1),
+ subject_id = c(1,2,3),
+ cohort_start_date = c(as.Date("2020-01-01"),
+ as.Date("2020-01-02"),
+ as.Date("2020-01-01")),
+ cohort_end_date = c(as.Date("2020-04-01"),
+ as.Date("2020-08-02"),
+ as.Date("2020-03-01"))
+ )
+ events <- dplyr::tibble(
+ cohort_definition_id = c(1,1,2),
+ subject_id = c(1,2,3),
+ cohort_start_date = c(as.Date("2019-01-01"),
+ as.Date("2020-01-05"),
+ as.Date("2020-01-01")),
+ cohort_end_date = c(as.Date("2019-01-01"),
+ as.Date("2020-01-05"),
+ as.Date("2020-01-01")),
+ )
+ observation_period <- dplyr::tibble(
+ observation_period_id = c(1,1,1),
+ person_id = c(1,2,3),
+ observation_period_start_date = c(as.Date("2000-01-01"),
+ as.Date("2000-01-02"),
+ as.Date("2000-01-01")),
+ observation_period_end_date = c(as.Date("2023-04-01"),
+ as.Date("2023-05-02"),
+ as.Date("2023-03-01"))
+ )
+ cdm <- PatientProfiles::mockPatientProfiles(
+ cohort1 = cohort,
+ cohort2 = events,
+ observation_period = observation_period
+ )
+ cohortNoCensorExit <- cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort2",
+ outcomeCohortId = 1
+ ) %>%
+ dplyr::arrange(subject_id)
+ cohortCensorExit <- cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort2",
+ outcomeCohortId = 1,
+ censorOnCohortExit = TRUE
+ ) %>%
+ dplyr::arrange(subject_id)
+
+ compareNA <- function(v1, v2) {
+ same <- (v1 == v2) | (is.na(v1) & is.na(v2))
+ same[is.na(same)] <- FALSE
+ return(same)
+ }
+
+ expect_true(all(compareNA(cohortNoCensorExit %>%
+ dplyr::select(status) %>%
+ dplyr::pull(),
+ cohortCensorExit %>%
+ dplyr::select(status) %>%
+ dplyr::pull())))
+ expect_true(all(compareNA(cohortNoCensorExit %>%
+ dplyr::select(time) %>%
+ dplyr::pull(),
+ c(NA, 3, 1155))))
+ expect_true(all(compareNA(cohortNoCensorExit %>%
+ dplyr::select(days_to_exit) %>%
+ dplyr::pull(),
+ c(1186, 1216, 1155))))
+ expect_true(all(compareNA(cohortCensorExit %>%
+ dplyr::select(time) %>%
+ dplyr::pull(),
+ c(NA, 3, 60))))
+ expect_true(all(compareNA(cohortCensorExit %>%
+ dplyr::select(days_to_exit) %>%
+ dplyr::pull(),
+ c(91, 213, 60))))
+
+ CDMConnector::cdmDisconnect(cdm)
+})
+
+test_that("censorOnDate", {
+ cohort <- dplyr::tibble(
+ cohort_definition_id = c(1,1,1),
+ subject_id = c(1,2,3),
+ cohort_start_date = c(as.Date("2020-01-01"),
+ as.Date("2020-01-02"),
+ as.Date("2021-01-01")),
+ cohort_end_date = c(as.Date("2020-04-01"),
+ as.Date("2020-08-02"),
+ as.Date("2021-03-01"))
+ )
+ events <- dplyr::tibble(
+ cohort_definition_id = c(1,1,1),
+ subject_id = c(1,2,3),
+ cohort_start_date = c(as.Date("2019-01-01"),
+ as.Date("2020-01-05"),
+ as.Date("2020-01-01")),
+ cohort_end_date = c(as.Date("2019-01-01"),
+ as.Date("2020-01-05"),
+ as.Date("2020-01-01")),
+ )
+ observation_period <- dplyr::tibble(
+ observation_period_id = c(1,1,1),
+ person_id = c(1,2,3),
+ observation_period_start_date = c(as.Date("2000-01-01"),
+ as.Date("2000-01-02"),
+ as.Date("2000-01-01")),
+ observation_period_end_date = c(as.Date("2023-04-01"),
+ as.Date("2023-05-02"),
+ as.Date("2023-03-01"))
+ )
+ cdm <- PatientProfiles::mockPatientProfiles(
+ cohort1 = cohort,
+ cohort2 = events,
+ observation_period = observation_period
+ )
+ cohortCensorDate <- cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort2",
+ outcomeCohortId = 1,
+ censorOnDate = as.Date("2020-01-04")
+ ) %>%
+ dplyr::arrange(subject_id)
+
+ compareNA <- function(v1, v2) {
+ same <- (v1 == v2) | (is.na(v1) & is.na(v2))
+ same[is.na(same)] <- FALSE
+ return(same)
+ }
+
+ expect_true(all(compareNA(cohortCensorDate %>%
+ dplyr::select(status) %>%
+ dplyr::pull(),
+ c(NA, 0, NA))))
+ expect_true(all(compareNA(cohortCensorDate %>%
+ dplyr::select(days_to_exit) %>%
+ dplyr::pull(),
+ c(3, 2, -363))))
+ expect_true(all(compareNA(cohortCensorDate %>%
+ dplyr::select(time) %>%
+ dplyr::pull(),
+ c(NA, 2, NA))))
+
+ CDMConnector::cdmDisconnect(cdm)
+})
+
+test_that("followUpDays", {
+ cohort <- dplyr::tibble(
+ cohort_definition_id = c(1,1,1),
+ subject_id = c(1,2,3),
+ cohort_start_date = c(as.Date("2020-01-01"),
+ as.Date("2020-01-02"),
+ as.Date("2020-01-01")),
+ cohort_end_date = c(as.Date("2020-04-01"),
+ as.Date("2020-08-02"),
+ as.Date("2020-01-06"))
+ )
+ events <- dplyr::tibble(
+ cohort_definition_id = c(1,1,1),
+ subject_id = c(1,2,3),
+ cohort_start_date = c(as.Date("2020-01-30"),
+ as.Date("2020-01-05"),
+ as.Date("2020-01-11")),
+ cohort_end_date = c(as.Date("2020-01-30"),
+ as.Date("2020-01-05"),
+ as.Date("2020-01-11")),
+ )
+ observation_period <- dplyr::tibble(
+ observation_period_id = c(1,1,1),
+ person_id = c(1,2,3),
+ observation_period_start_date = c(as.Date("2000-01-01"),
+ as.Date("2000-01-02"),
+ as.Date("2000-01-01")),
+ observation_period_end_date = c(as.Date("2023-04-01"),
+ as.Date("2023-05-02"),
+ as.Date("2023-03-01"))
+ )
+ cdm <- PatientProfiles::mockPatientProfiles(
+ cohort1 = cohort,
+ cohort2 = events,
+ observation_period = observation_period
+ )
+ cohortFollowUp <- cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort2",
+ outcomeCohortId = 1,
+ followUp = 20
+ ) %>%
+ dplyr::arrange(subject_id)
+ cohortFUandCE <- cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort2",
+ outcomeCohortId = 1,
+ followUp = 20,
+ censorOnCohortExit = TRUE
+ ) %>%
+ dplyr::arrange(subject_id)
+
+ expect_true(all(cohortFollowUp %>%
+ dplyr::select(days_to_exit) %>%
+ dplyr::pull() ==
+ c(20,20,20)))
+ expect_true(all(cohortFollowUp %>%
+ dplyr::select(status) %>%
+ dplyr::pull() ==
+ c(0,1,1)))
+ expect_true(all(cohortFollowUp %>%
+ dplyr::select(time) %>%
+ dplyr::pull() ==
+ c(20,3,10)))
+
+ expect_true(all(cohortFUandCE %>%
+ dplyr::select(days_to_exit) %>%
+ dplyr::pull() ==
+ c(20,20,5)))
+ expect_true(all(cohortFUandCE %>%
+ dplyr::select(status) %>%
+ dplyr::pull() ==
+ c(0,1,0)))
+ expect_true(all(cohortFUandCE %>%
+ dplyr::select(time) %>%
+ dplyr::pull() ==
+ c(20,3,5)))
+
+ CDMConnector::cdmDisconnect(cdm)
+ })
+
+test_that("expected errors", {
+ cdm <- PatientProfiles::mockPatientProfiles()
+ cdm[["cohort1"]] <- cdm[["cohort1"]] %>%
+ dplyr::filter(cohort_start_date != "2020-01-01")
+
+ # check outcome cohort
+ # id that is not in the table
+ expect_error(cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort2",
+ outcomeCohortId = 10
+ ))
+ # should only work for one cohort
+ expect_error(cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort2",
+ outcomeCohortId = c(1, 2)
+ ))
+ # user must provide a cohort id
+ expect_error(cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort2",
+ outcomeCohortId = NULL
+ ))
+
+ # censorOnCohortExit must be logical
+ expect_error(cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort2",
+ outcomeCohortId = 1,
+ censorOnCohortExit = 1
+ ))
+
+ # followUpDays must be 1 or higher
+ expect_error(cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort2",
+ outcomeCohortId = 1,
+ followUpDays = -1
+ ))
+
+ expect_error(cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort2",
+ outcomeCohortId = 1,
+ followUpDays = 0
+ ))
+
+ # temporary must be logical
+ expect_error(cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort2",
+ outcomeCohortId = 1,
+ temporary = "maybe"
+ ))
+
+ cdm <- PatientProfiles::mockPatientProfiles()
+ cdm[["cohort1"]] <- cdm[["cohort1"]] %>%
+ dplyr::group_by(subject_id) %>%
+ dplyr::filter(dplyr::row_number() == 1)
+
+ # multiple cohort definition ids in exposure table
+ expect_error(cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort1",
+ outcomeCohortId = 1
+ ))
+
+ DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
+})
+
+test_that("expected errors2 - index cohort to have one row per person", {
+ cdm <- PatientProfiles::mockPatientProfiles()
+ expect_error(cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort2",
+ outcomeCohortId = 1
+ ))
+ CDMConnector::cdmDisconnect(cdm)
+})
+
+test_that("within cohort survival", {
+ cohort <- dplyr::tibble(
+ cohort_definition_id = c(1,1,1),
+ subject_id = c(1,2,3),
+ cohort_start_date = c(as.Date("2020-01-01"),
+ as.Date("2020-01-02"),
+ as.Date("2020-01-01")),
+ cohort_end_date = c(as.Date("2020-04-01"),
+ as.Date("2020-08-02"),
+ as.Date("2021-03-01"))
+ )
+ observation_period <- dplyr::tibble(
+ observation_period_id = c(1,1,1),
+ person_id = c(1,2,3),
+ observation_period_start_date = c(as.Date("2000-01-01"),
+ as.Date("2000-01-02"),
+ as.Date("2000-01-01")),
+ observation_period_end_date = c(as.Date("2023-04-01"),
+ as.Date("2023-05-02"),
+ as.Date("2023-03-01"))
+ )
+ cdm <- PatientProfiles::mockPatientProfiles(
+ cohort1 = cohort,
+ observation_period = observation_period
+ )
+
+ # default "cohort_start_date"
+ # if using the same cohort, status would be 1, time would be 0 for everyone
+ cdm$cohort1_start <- cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort1",
+ outcomeCohortId = 1,
+ outcomeDateVariable = "cohort_start_date"
+ )
+ expect_true(all(cdm$cohort1_start %>%
+ dplyr::pull("time") == 0))
+ expect_true(all(cdm$cohort1_start %>%
+ dplyr::pull("status") == 1))
+
+ cdm$cohort1_end <- cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort1",
+ outcomeCohortId = 1,
+ outcomeDateVariable = "cohort_end_date"
+ )
+
+ expect_true(all(cdm$cohort1_end %>%
+ dplyr::collect() %>%
+ dplyr::mutate(dtime = as.numeric(difftime(cohort_end_date,
+ cohort_start_date)),
+ equal = (dtime == time)) %>%
+ dplyr::pull("equal")))
+
+ # limit follow up
+ cdm$cohort1_b <- cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort1",
+ outcomeCohortId = 1,
+ outcomeDateVariable = "cohort_end_date",
+ followUpDays = 100
+ )
+ expect_true(all(cdm$cohort1_b %>%
+ dplyr::pull("time") == c(91,100,100)))
+ expect_true(all(cdm$cohort1_b %>%
+ dplyr::pull("status") == c(1,0,0)))
+
+ CDMConnector::cdmDisconnect(cdm)
+})
+
+test_that("allow overwrite of time and status", {
+ cohort <- dplyr::tibble(
+ cohort_definition_id = c(1,1,1),
+ subject_id = c(1,2,3),
+ cohort_start_date = c(as.Date("2020-01-01"),
+ as.Date("2020-01-02"),
+ as.Date("2020-01-01")),
+ cohort_end_date = c(as.Date("2020-04-01"),
+ as.Date("2020-08-02"),
+ as.Date("2021-03-01"))
+ )
+ observation_period <- dplyr::tibble(
+ observation_period_id = c(1,1,1),
+ person_id = c(1,2,3),
+ observation_period_start_date = c(as.Date("2000-01-01"),
+ as.Date("2000-01-02"),
+ as.Date("2000-01-01")),
+ observation_period_end_date = c(as.Date("2023-04-01"),
+ as.Date("2023-05-02"),
+ as.Date("2023-03-01"))
+ )
+ cdm <- PatientProfiles::mockPatientProfiles(
+ cohort1 = cohort,
+ observation_period = observation_period
+ )
+
+ cdm$cohort1 <- cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort1",
+ )
+
+ cohort1_count <- cdm$cohort1 %>%
+ dplyr::group_by(.data$cohort_definition_id) %>%
+ dplyr::summarise(
+ number_records = dplyr::n(),
+ number_subjects = dplyr::n_distinct(.data$subject_id),
+ .groups = "drop"
+ )
+ cohort1_set <- cdm$cohort1 %>%
+ dplyr::select("cohort_definition_id") %>%
+ dplyr::distinct() %>%
+ dplyr::mutate(cohort_name = "cohort1")
+
+ # currently need to add attribute back to rerun
+ attr(cdm$cohort1, "set") <- cohort1_set
+ attr(cdm$cohort1, "count") <- cohort1_count
+
+ cdm$cohort1 <- cdm$cohort1 %>%
+ addCohortSurvival(
+ cdm = cdm,
+ outcomeCohortTable = "cohort1",
+ )
+ expect_true(!is.null(cdm$cohort1))
+
+ CDMConnector::cdmDisconnect(cdm)
+})
diff --git a/tests/testthat/test-benchmarkCohortSurvival.R b/tests/testthat/test-benchmarkCohortSurvival.R
new file mode 100644
index 0000000..b5514c0
--- /dev/null
+++ b/tests/testthat/test-benchmarkCohortSurvival.R
@@ -0,0 +1,81 @@
+test_that("mgus example: benchmark", {
+ cdm <- mockMGUS2cdm()
+ cdm$condition_occurrence <- cdm$death_cohort %>%
+ dplyr::rename("condition_start_date" = "cohort_start_date",
+ "condition_end_date" = "cohort_end_date")
+ cdm$drug_exposure <- cdm$progression %>%
+ dplyr::rename("drug_exposure_start_date" = "cohort_start_date",
+ "drug_exposure_end_date" = "cohort_end_date")
+ timings <- benchmarkCohortSurvival(cdm, targetSize = 1000, outcomeSize = 47)
+ timings_p <- benchmarkCohortSurvival(cdm, targetSize = 1000, outcomeSize = 47, returnParticipants = TRUE)
+ timings_s <- benchmarkCohortSurvival(cdm, targetSize = 100, outcomeSize = 5, strata = list("sex" = c("sex")))
+ timings2 <- benchmarkCohortSurvival(cdm, targetSize = 1000, outcomeSize = 8)
+ timings3 <- benchmarkCohortSurvival(cdm, targetSize = 100, outcomeSize = 23)
+ timings4 <- benchmarkCohortSurvival(cdm, targetSize = 100, outcomeSize = 23, outcomeDateVariable = "cohort_end_date")
+ timings5 <- benchmarkCohortSurvival(cdm, targetSize = 1000, competingOutcomeSize = 80, outcomeSize = 90, outcomeDateVariable = "cohort_end_date")
+ timings6 <- benchmarkCohortSurvival(cdm, targetSize = 100, outcomeSize = 7, censorOnCohortExit = TRUE)
+ timings7 <- benchmarkCohortSurvival(cdm, targetSize = 100, outcomeSize = 4, censorOnDate = as.Date("1920-01-01"))
+ timings8 <- benchmarkCohortSurvival(cdm, targetSize = 1000, outcomeSize = 47, followUpDays = 30)
+ timings9 <- benchmarkCohortSurvival(cdm, targetSize = 1000, outcomeSize = 47, minCellCount = 2)
+
+ expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings)))
+ expect_true(all(c("generating target cohort size 1000", "generating outcome cohort size 47") %in% timings$task))
+
+ expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings_p)))
+ expect_true(all(c("generating target cohort size 1000", "generating outcome cohort size 47") %in% timings_p$task))
+ expect_true(timings_p %>% dplyr::select(with_participants) %>% dplyr::distinct() %>% dplyr::pull() == "Yes")
+
+ expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings_s)))
+ expect_true(all(c("generating target cohort size 100", "generating outcome cohort size 5") %in% timings_s$task))
+
+ expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings2)))
+ expect_true(all(c("generating target cohort size 1000", "generating outcome cohort size 8") %in% timings2$task))
+
+ expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings3)))
+ expect_true(all(c("generating target cohort size 100", "generating outcome cohort size 23") %in% timings3$task))
+
+ expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings4)))
+ expect_true(all(c("generating target cohort size 100", "generating outcome cohort size 23") %in% timings4$task))
+
+ expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings5)))
+ expect_true(all(c("generating target cohort size 1000", "generating outcome cohort size 90", "generating competing outcome cohort size 80") %in% timings5$task))
+
+ expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings6)))
+ expect_true(all(c("generating target cohort size 100", "generating outcome cohort size 7") %in% timings6$task))
+
+ expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings7)))
+ expect_true(all(c("generating target cohort size 100", "generating outcome cohort size 4") %in% timings7$task))
+
+ expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings8)))
+ expect_true(all(c("generating target cohort size 1000", "generating outcome cohort size 47") %in% timings8$task))
+
+ expect_true(all(c("time_taken_secs", "time_taken_mins", "time_taken_hours", "dbms", "person_n", "db_min_observation_start", "max_observation_end","with_participants") %in% colnames(timings9)))
+ expect_true(all(c("generating target cohort size 1000", "generating outcome cohort size 47") %in% timings9$task))
+
+ CDMConnector::cdmDisconnect(cdm)
+})
+
+test_that("expected errors benchmark", {
+ cdm <- mockMGUS2cdm()
+ cdm$condition_occurrence <- cdm$death_cohort %>%
+ dplyr::rename("condition_start_date" = "cohort_start_date",
+ "condition_end_date" = "cohort_end_date")
+ expect_error(benchmarkCohortSurvival("cdm"))
+ expect_error(benchmarkCohortSurvival(cdm))
+ expect_error(benchmarkCohortSurvival(cdm, targetSize = "size"))
+ expect_error(benchmarkCohortSurvival(targetSize = 30))
+ expect_error(benchmarkCohortSurvival(cdm, targetSize = 100))
+ expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = "3"))
+ expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, competingOutcomeSize = "no"))
+ expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, outcomeDateVariable = FALSE))
+ expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, censorOnCohortExit = NULL))
+ expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, censorOnDate = TRUE))
+ expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, followUpDays = "Inf"))
+ expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, strata = c("age" = "age")))
+ expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, timeGap = list(1,2)))
+ expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, times = list(1,2,3)))
+ expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, minCellCount = FALSE))
+ expect_error(benchmarkCohortSurvival(cdm, targetSize = 100, outcomSize = 40, returnParticipants = "TRUE"))
+
+ CDMConnector::cdmDisconnect(cdm)
+})
diff --git a/tests/testthat/test-estimateSurvival.R b/tests/testthat/test-estimateSurvival.R
new file mode 100644
index 0000000..322144c
--- /dev/null
+++ b/tests/testthat/test-estimateSurvival.R
@@ -0,0 +1,812 @@
+test_that("mgus example: no Competing risk", {
+ cdm <- mockMGUS2cdm()
+ surv <- estimateSingleEventSurvival(cdm,
+ targetCohortTable = "mgus_diagnosis",
+ targetCohortId = 1,
+ outcomeCohortTable = "death_cohort",
+ outcomeCohortId = 1,
+ timeGap = 7
+ )
+ expect_true(tibble::is_tibble(surv))
+ expect_true(all(c(
+ "cdm_name","result_type",
+ "group_name","group_level",
+ "strata_name","strata_level",
+ "variable","variable_level",
+ "estimate_type",
+ "variable_type",
+ "time",
+ "analysis_type",
+ "estimate") %in%
+ colnames(surv)))
+
+ expect_true(surv %>% dplyr::select(variable) %>% dplyr::pull() %>% unique() == "Outcome")
+# expect_true(all(surv %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:424)))
+ expect_true(surv %>% dplyr::select(analysis_type) %>% dplyr::pull() %>% unique() == "Single event")
+
+# expect_true(attributes(surv)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %>% unique() == 7)
+# expect_true(all(attributes(surv)$events %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(seq(0, 424, by = 7), 424)))
+
+ # mgus example: Competing risk
+ survCR <- estimateSurvival(cdm,
+ targetCohortTable = "mgus_diagnosis",
+ targetCohortId = 1,
+ outcomeCohortTable = "death_cohort",
+ outcomeCohortId = 1,
+ competingOutcomeCohortTable = "progression",
+ competingOutcomeCohortId = 1
+ )
+
+ expect_true(all(colnames(surv) == colnames(survCR)))
+
+ expect_true(tibble::is_tibble(survCR))
+
+# expect_true(all(colnames(attr(survCR, "events")) ==
+# colnames(attr(surv, "events"))))
+
+ # note, we don't return summary for Competing risk
+
+# expect_true(all(survCR %>%
+# dplyr::select(outcome) %>%
+# dplyr::pull() %>%
+# unique() %in%
+# c("outcome", "competing outcome")))
+
+# expect_true(all(survCR %>%
+# dplyr::select(time) %>%
+# dplyr::pull() %>%
+# unique() == c(0:424)))
+
+ expect_true(survCR %>%
+ dplyr::select(analysis_type) %>%
+ dplyr::pull() %>%
+ unique() == "Competing risk")
+
+# expect_true(all(attributes(survCR)$events %>%
+# dplyr::select(timeGap) %>%
+# dplyr::pull() %>%
+# unique() %in% c(1, 7, 30, 365)))
+
+# expect_true(all(attributes(survCR)$events %>%
+# dplyr::select(time) %>%
+# dplyr::pull() %>% unique() == c(0:424)))
+
+ expect_true(nrow(survCR %>%
+ dplyr::filter(.data$outcome == "outcome") %>%
+ dplyr::collect())>=1)
+ expect_true(nrow(survCR %>%
+ dplyr::filter(.data$outcome == "competing outcome") %>%
+ dplyr::collect())>=1)
+
+ expect_true(all(c("death_cohort", "progression") %in%
+ (survCR %>%
+ dplyr::pull("variable_level") %>%
+ unique())))
+
+
+ CDMConnector::cdmDisconnect(cdm)
+})
+
+test_that("mgus example: no Competing risk, strata", {
+ cdm <- mockMGUS2cdm()
+ cdm[["mgus_diagnosis"]] <- cdm[["mgus_diagnosis"]] %>%
+ dplyr::mutate(mspike_r = round(mspike, digits = 0))
+ surv <- estimateSurvival(cdm,
+ targetCohortTable = "mgus_diagnosis",
+ targetCohortId = 1,
+ outcomeCohortTable = "death_cohort",
+ outcomeCohortId = 1,
+ timeGap = c(1, 10, 100),
+ strata = list(
+ "age_gr" = c("age"),
+ "sex" = c("sex"),
+ "age and sex" = c("age", "sex"),
+ "mspike rounded" = c("mspike_r")
+ )
+ )
+ expect_true(tibble::is_tibble(surv))
+
+ expect_true(surv %>% dplyr::select(outcome) %>% dplyr::pull() %>% unique() == "outcome")
+# expect_true(all(surv %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:424)))
+ expect_true(surv %>% dplyr::select(analysis_type) %>% dplyr::pull() %>% unique() == "Single event")
+ expect_true(all(surv %>% dplyr::select(strata_name) %>% dplyr::pull() %>% unique() %in%
+ c("Overall", "sex", "age", "mspike_r", "age and sex")))
+ expect_true(all(surv %>% dplyr::select(strata_level) %>% dplyr::pull() %>% unique() %in% c(
+ "M", "F", 0, 1, 2, 3, c(24:96), "Overall",
+ paste(expand.grid(c(24:96), c("M", "F"))$Var1, expand.grid(c(24:96), c("M", "F"))$Var2, sep = " and ")
+ )))
+
+# expect_true(all(attributes(surv)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %>% unique() %in% c(1, 10, 100)))
+# expect_true(all(attributes(surv)$events %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:424)))
+# expect_true(all(attributes(surv)$events %>% dplyr::select(strata_name) %>% dplyr::pull() %>% unique() %in%
+# c("Overall", "sex", "age", "mspike_r", "age and sex")))
+# expect_true(all(attributes(surv)$events %>% dplyr::select(strata_level) %>% dplyr::pull() %>% unique() %in% c(
+# "M", "F", 0, 1, 2, 3, c(24:96), "Overall",
+# paste(expand.grid(c(24:96), c("M", "F"))$Var1, expand.grid(c(24:96), c("M", "F"))$Var2, sep = " and ")
+# )))
+
+ CDMConnector::cdmDisconnect(cdm)
+})
+
+test_that("mgus example: Competing risk, strata", {
+ cdm <- mockMGUS2cdm()
+ cdm[["mgus_diagnosis"]] <- cdm[["mgus_diagnosis"]] %>%
+ dplyr::mutate(mspike_r = round(mspike, digits = 0))
+ survCR <- estimateSurvival(cdm,
+ targetCohortTable = "mgus_diagnosis",
+ targetCohortId = 1,
+ outcomeCohortTable = "death_cohort",
+ outcomeCohortId = 1,
+ competingOutcomeCohortTable = "progression",
+ competingOutcomeCohortId = 1,
+ strata = list(
+ "age" = c("age"),
+ "sex" = c("sex"),
+ "age and sex" = c("age", "sex"),
+ "mspike rounded" = c("mspike_r")
+ )
+ )
+
+ # check strata_level (s0) thingy
+
+ expect_true(tibble::is_tibble(survCR))
+# expect_true(all(survCR %>% dplyr::select(outcome) %>% dplyr::pull() %>% unique() %in% c("outcome", "competing outcome")))
+# expect_true(all(survCR %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:424)))
+ expect_true(survCR %>% dplyr::select(analysis_type) %>% dplyr::pull() %>% unique() == "Competing risk")
+ expect_true(all(survCR %>% dplyr::select(strata_name) %>% dplyr::pull() %>% unique() %in% c("Overall", "sex", "age", "mspike_r", "age and sex")))
+# expect_true(all(survCR %>% dplyr::select(strata_level) %>% dplyr::pull() %>% unique() %in% c(
+# "M", "F", 0, 1, 2, 3, c(24:96), "Overall",
+# paste(expand.grid(c(24:96), c("M", "F"))$Var1, expand.grid(c(24:96), c("M", "F"))$Var2, sep = " and ")
+# )))
+
+# expect_true(all(attributes(survCR)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %>% unique() %in% c(1, 7, 30, 365)))
+# expect_true(all(attributes(survCR)$events %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:424)))
+# expect_true(all(attributes(survCR)$events %>% dplyr::select(strata_name) %>% dplyr::pull() %>% unique() %in% c("Overall", "sex", "age", "mspike_r", "age and sex")))
+# expect_true(all(attributes(survCR)$events %>% dplyr::select(strata_level) %>% dplyr::pull() %>% unique() %in% c(
+# "M", "F", 0, 1, 2, 3, c(24:96), "Overall",
+# paste(expand.grid(c(24:96), c("M", "F"))$Var1, expand.grid(c(24:96), c("M", "F"))$Var2, sep = " and ")
+# )))
+
+ CDMConnector::cdmDisconnect(cdm)
+})
+
+test_that("funcionality with created dataset", {
+ compareNA <- function(v1, v2) {
+ same <- (v1 == v2) | (is.na(v1) & is.na(v2))
+ same[is.na(same)] <- FALSE
+ return(same)
+ }
+
+ exposure_cohort <- dplyr::tibble(
+ subject_id = c(1, 2, 3),
+ cohort_definition_id = c(1, 1, 1),
+ cohort_start_date = c(
+ as.Date("2020-01-01"),
+ as.Date("2020-02-03"),
+ as.Date("2020-05-01")
+ ),
+ cohort_end_date = c(
+ as.Date("2020-01-31"),
+ as.Date("2022-02-03"),
+ as.Date("2021-06-28")
+ ),
+ age_group = c("20;29", "20;29", "60;69"),
+ sex = c("Female", "Male", "Female"),
+ blood_type = c("A", "B", "B")
+ )
+ outcome_cohort <- dplyr::tibble(
+ cohort_definition_id = c(1, 1, 1, 1, 1),
+ subject_id = c(1, 1, 2, 3, 3),
+ cohort_start_date = c(
+ as.Date("2020-01-10"),
+ as.Date("2020-02-02"),
+ as.Date("2020-02-09"),
+ as.Date("2020-06-01"),
+ as.Date("2020-06-03")
+ ),
+ cohort_end_date = c(
+ as.Date("2020-01-10"),
+ as.Date("2020-02-02"),
+ as.Date("2020-02-09"),
+ as.Date("2020-06-01"),
+ as.Date("2020-06-03")
+ )
+ )
+ observation_period <- dplyr::tibble(
+ observation_period_id = c(1, 1, 1),
+ person_id = c(1, 2, 3),
+ observation_period_start_date = c(
+ as.Date("2007-03-21"),
+ as.Date("2006-09-09"),
+ as.Date("1980-07-20")
+ ),
+ observation_period_end_date = c(
+ as.Date("2022-09-08"),
+ as.Date("2023-01-03"),
+ as.Date("2023-05-20")
+ )
+ )
+
+ cdm <- PatientProfiles::mockPatientProfiles(
+ exposure_cohort = exposure_cohort,
+ cohort1 = outcome_cohort,
+ observation_period = observation_period
+ )
+
+ # No competing events
+ surv <- estimateSurvival(cdm,
+ targetCohortTable = "exposure_cohort",
+ outcomeCohortTable = "cohort1",
+ minCellCount = 1
+ )
+
+# expect_true(all(surv %>% dplyr::select(time) %>% dplyr::pull() %>%
+# unique() == c(0:31)))
+ expect_true(all(surv %>%
+ dplyr::filter(variable_type == "n_risk") %>%
+ dplyr::select(estimate) %>%
+ dplyr::pull() ==
+ c(rep(3, 7), rep(2, 3), rep(1, 22))))
+ expect_true(all(surv %>%
+ dplyr::filter(variable_type == "estimate") %>%
+ dplyr::filter(estimate_type == "Survival probability") %>%
+ dplyr::select(estimate) %>%
+ dplyr::pull() - c(rep(1, 7), rep(0.667, 3), rep(0.333, 21), 0) < c(0.01)))
+ expect_true(all(surv %>%
+ dplyr::filter(variable_type == "estimate") %>%
+ dplyr::filter(estimate_type == "Cumulative failure probability") %>%
+ dplyr::select(estimate) %>% dplyr::pull() - c(rep(0, 6), rep(0.333, 3), rep(0.667, 22), 1) < c(0.01)))
+ expect_true(all(surv %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event"))
+
+# expect_true(all(attributes(surv)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %in% c(1, 7, 30, 365)))
+# expect_true(all(attributes(surv)$events %>% dplyr::group_by(timeGap) %>% dplyr::summarise(n = sum(n_events, na.rm = T)) %>% dplyr::select(n) %>%
+# dplyr::pull() == c(3, 3, 3, 3)))
+
+ CDMConnector::cdmDisconnect(cdm)
+
+ # Compting events
+ outcome_cohort <- dplyr::tibble(
+ cohort_definition_id = c(1, 1, 1),
+ subject_id = c(2, 3, 3),
+ cohort_start_date = c(
+ as.Date("2020-02-09"),
+ as.Date("2020-06-01"),
+ as.Date("2020-06-03")
+ ),
+ cohort_end_date = c(
+ as.Date("2020-02-09"),
+ as.Date("2020-06-01"),
+ as.Date("2020-06-03")
+ )
+ )
+ competing_risk_cohort <- dplyr::tibble(
+ cohort_definition_id = c(1, 1, 2),
+ subject_id = c(2, 3, 1),
+ cohort_start_date = c(
+ as.Date("2020-02-07"),
+ as.Date("2021-02-02"),
+ as.Date("2020-01-03")
+ ),
+ cohort_end_date = c(
+ as.Date("2020-02-07"),
+ as.Date("2021-02-02"),
+ as.Date("2020-01-03")
+ )
+ )
+ cdm <- PatientProfiles::mockPatientProfiles(
+ exposure_cohort = exposure_cohort,
+ cohort1 = outcome_cohort,
+ cohort2 = competing_risk_cohort,
+ observation_period = observation_period
+ )
+ surv2 <- estimateSurvival(cdm,
+ targetCohortTable = "exposure_cohort",
+ outcomeCohortTable = "cohort1",
+ competingOutcomeCohortTable = "cohort2",
+ competingOutcomeCohortId = 1,
+ minCellCount = 1
+ )
+
+# expect_true(all(surv2 %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(rep(c(0:981), 2))))
+# expect_true(all(surv2 %>%
+# dplyr::filter(outcome == "outcome") %>%
+# dplyr::filter(estimate_type == "Cumulative failure probability") %>%
+# dplyr::filter(variable_type == "n_risk") %>%
+# dplyr::select(estimate) %>%
+# dplyr::pull() ==
+# c(rep(3, 5), rep(2, 27), rep(1, 1155))))
+ expect_true(nrow(surv2 %>%
+ dplyr::filter(estimate_type == "Survival probability") %>%
+ dplyr::select(estimate)) == 0)
+# expect_true(all(surv2 %>% dplyr::filter(variable_type == "n_risk") %>% dplyr::pull() ==
+# c(rep(3, 5), rep(2, 27), rep(1, 1155))))
+
+ expect_true(all(surv2 %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Competing risk"))
+# expect_true(all(surv2 %>% dplyr::select(outcome) %>% dplyr::pull() == c(rep("outcome", 4748), rep("competing outcome", 4748))))
+# expect_true(all(attributes(surv2)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 1 & outcome == "outcome") %>% dplyr::select(time) == c(31)))
+# expect_true(all(attributes(surv2)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 1 & outcome == "Competing risk") %>% dplyr::select(time) == c(4)))
+# expect_true(all(attributes(surv2)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 7 & outcome == "outcome") %>% dplyr::select(time) == c(35)))
+# expect_true(all(attributes(surv2)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 7 & outcome == "Competing risk") %>% dplyr::select(time) == c(7)))
+# expect_true(all(attributes(surv2)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 30 & outcome == "outcome") %>% dplyr::select(time) == c(60)))
+# expect_true(all(attributes(surv2)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 30 & outcome == "Competing risk") %>% dplyr::select(time) == c(30)))
+# expect_true(all(attributes(surv2)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 365 & outcome == "outcome") %>% dplyr::select(time) == c(365)))
+# expect_true(all(attributes(surv2)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 365 & outcome == "Competing risk") %>% dplyr::select(time) == c(365)))
+
+# expect_true(all(attributes(surv2)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %in% c(1, 7, 30, 365)))
+# expect_true(all(attributes(surv2)$events %>% dplyr::group_by(timeGap) %>% dplyr::summarise(n = sum(n_events, na.rm = T)) %>% dplyr::select(n) %>% dplyr::pull() == c(2, 2, 2, 2)))
+
+ CDMConnector::cdmDisconnect(cdm)
+
+ # Censor at cohort end
+ outcome_cohort <- dplyr::tibble(
+ cohort_definition_id = c(1, 1, 1, 1, 1),
+ subject_id = c(1, 1, 2, 3, 3),
+ cohort_start_date = c(
+ as.Date("2020-02-10"),
+ as.Date("2020-02-02"),
+ as.Date("2020-02-09"),
+ as.Date("2020-06-01"),
+ as.Date("2020-06-03")
+ ),
+ cohort_end_date = c(
+ as.Date("2020-02-10"),
+ as.Date("2020-02-02"),
+ as.Date("2020-02-09"),
+ as.Date("2020-06-01"),
+ as.Date("2020-06-03")
+ )
+ )
+
+ cdm <- PatientProfiles::mockPatientProfiles(
+ exposure_cohort = exposure_cohort,
+ cohort1 = outcome_cohort,
+ observation_period = observation_period
+ )
+
+ surv3 <- estimateSurvival(cdm, "exposure_cohort",
+ outcomeCohortTable = "cohort1",
+ censorOnCohortExit = TRUE,
+ minCellCount = 1
+ )
+
+# expect_true(all(surv3 %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:31)))
+ # expect_true(all(surv3 %>% dplyr::select(n_risk) %>% dplyr::pull() == c(rep(3, 7), rep(2, 24), 1)))
+ # expect_true(all(surv3 %>%
+ # dplyr::filter(estimate_name == "Survival probability") %>%
+ # dplyr::select(estimate) %>% dplyr::pull() - c(rep(1, 7), rep(0.667, 24), 0) < c(0.01)))
+ # expect_true(all(surv3 %>%
+ # dplyr::filter(estimate_name == "Cumulative failure probability") %>%
+ # dplyr::select(estimate) %>% dplyr::pull() - c(rep(0, 6), rep(0.333, 25), 1) < c(0.01)))
+ # expect_true(all(surv3 %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event"))
+ #
+ # expect_true(all(attributes(surv3)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == c(6, 31)))
+ # expect_true(all(attributes(surv3)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %in% c(1, 7, 30, 365)))
+ # expect_true(all(attributes(surv3)$events %>% dplyr::group_by(timeGap) %>% dplyr::summarise(n = sum(n_events, na.rm = T)) %>%
+ # dplyr::select(n) %>% dplyr::pull() == c(2, 2, 2, 2)))
+
+ CDMConnector::cdmDisconnect(cdm)
+
+ cdm <- PatientProfiles::mockPatientProfiles(
+ exposure_cohort = exposure_cohort,
+ cohort1 = outcome_cohort,
+ observation_period = observation_period
+ )
+
+ # Censor by follow up days
+ surv4 <- estimateSurvival(cdm, "exposure_cohort",
+ outcomeCohortTable = "cohort1",
+ followUpDays = 10,
+ minCellCount = 1
+ )
+
+# expect_true(all(surv4 %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:10)))
+ # expect_true(all(surv4 %>% dplyr::select(n_risk) %>% dplyr::pull() == c(rep(3, 7), rep(2, 4))))
+ # expect_true(all(surv4 %>%
+ # dplyr::filter(estimate_name == "Survival probability") %>%
+ # dplyr::select(estimate) %>% dplyr::pull() - c(rep(1, 7), rep(0.667, 4)) < c(0.01)))
+ # expect_true(all(surv4 %>%
+ # dplyr::filter(estimate_name == "Cumulative failure probability") %>%
+ # dplyr::select(estimate) %>% dplyr::pull() - c(rep(0, 6), rep(0.333, 5)) < c(0.01)))
+ # expect_true(all(surv4 %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event"))
+ #
+ # expect_true(all(attributes(surv4)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == c(6)))
+ # expect_true(all(attributes(surv4)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %in% c(1, 7, 30, 365)))
+ # expect_true(all(attributes(surv4)$events %>% dplyr::group_by(timeGap) %>% dplyr::summarise(n = sum(n_events, na.rm = T)) %>% dplyr::select(n) %>% dplyr::pull() == c(1, 1, 1, 1)))
+
+ CDMConnector::cdmDisconnect(cdm)
+
+ outcome_cohort <- dplyr::tibble(
+ cohort_definition_id = c(1, 1, 1, 1, 1),
+ subject_id = c(1, 1, 2, 3, 3),
+ cohort_start_date = c(
+ as.Date("2020-01-10"),
+ as.Date("2020-02-02"),
+ as.Date("2020-02-09"),
+ as.Date("2020-06-01"),
+ as.Date("2020-06-03")
+ ),
+ cohort_end_date = c(
+ as.Date("2020-01-10"),
+ as.Date("2020-02-02"),
+ as.Date("2020-02-09"),
+ as.Date("2020-06-01"),
+ as.Date("2020-06-03")
+ )
+ )
+
+ cdm <- PatientProfiles::mockPatientProfiles(
+ exposure_cohort = exposure_cohort,
+ cohort1 = outcome_cohort,
+ observation_period = observation_period
+ )
+
+ # Strata
+ surv5 <- estimateSurvival(cdm, "exposure_cohort",
+ outcomeCohortTable = "cohort1",
+ strata = list(
+ "Age group" = c("age_group"),
+ "Sex" = c("sex"),
+ "Age group and sex" = c("age_group", "sex"),
+ "Blood type" = c("blood_type")
+ ),
+ minCellCount = 1
+ )
+
+# expect_true(all(surv5 %>% dplyr::filter(strata_name == "Overall") %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:31)))
+ # expect_true(all(surv5 %>% dplyr::filter(strata_name == "Overall") %>% dplyr::select(n_risk) %>% dplyr::pull() == c(rep(3, 7), rep(2, 3), rep(1, 22))))
+ # expect_true(all(surv5 %>% dplyr::filter(strata_name == "Overall") %>%
+ # dplyr::filter(estimate_name == "Survival probability") %>%
+ # dplyr::select(estimate) %>% dplyr::pull() - c(rep(1, 7), rep(0.667, 3), rep(0.333, 21), 0) < c(0.01)))
+ # expect_true(all(surv5 %>% dplyr::filter(strata_name == "Overall") %>%
+ # dplyr::filter(estimate_name == "Cumulative failure probability") %>%
+ # dplyr::select(estimate) %>% dplyr::pull() - c(rep(0, 6), rep(0.333, 3), rep(0.667, 22), 1) < c(0.01)))
+ # expect_true(all(surv5 %>% dplyr::filter(strata_name == "Overall") %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event"))
+ #
+ # expect_true(all(attributes(surv5)$events %>% dplyr::filter(strata_name == "Overall") %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == c(6, 9, 31)))
+ #
+ # expect_true(all(surv5 %>% dplyr::filter(strata_name == "age_group; sex" & strata_level == "20;29; Female") %>% dplyr::select(time) %>% dplyr::pull() == c(0:31)))
+ # expect_true(all(compareNA(surv5 %>% dplyr::filter(strata_name == "age_group; sex" & strata_level == "20;29; Female") %>% dplyr::select(n_risk) %>% dplyr::pull(), c(rep(c(rep(1, 10), rep(NA, 22)),2)))))
+ # expect_true(all(surv5 %>% dplyr::filter(strata_name == "age_group; sex" & strata_level == "20;29; Female") %>%
+ # dplyr::filter(estimate_name == "Survival probability") %>%
+ # dplyr::select(estimate) %>% dplyr::pull() - c(rep(1, 10), rep(0, 22)) < c(0.01)))
+ # expect_true(all(surv5 %>% dplyr::filter(strata_name == "age_group; sex" & strata_level == "20;29; Female") %>%
+ # dplyr::filter(estimate_name == "Cumulative failure probability") %>%
+ # dplyr::select(estimate) %>% dplyr::pull() - c(rep(0, 9), rep(1, 23)) < c(0.01)))
+ # expect_true(all(surv5 %>% dplyr::filter(strata_name == "age_group; sex" & strata_level == "20;29; Female") %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event"))
+ #
+ # expect_true(all(attributes(surv5)$events %>% dplyr::filter(strata_name == "age_group; sex" & strata_level == "20;29; Female") %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == c(9)))
+ #
+ # expect_true(all(surv5 %>% dplyr::filter(strata_name == "blood_type" & strata_level == "B") %>% dplyr::select(time) %>% dplyr::pull() == c(0:31)))
+ # expect_true(all(surv5 %>% dplyr::filter(strata_name == "blood_type" & strata_level == "B") %>% dplyr::select(n_risk) %>% dplyr::pull() == c(rep(2, 7), rep(1, 25))))
+ # expect_true(all(surv5 %>% dplyr::filter(strata_name == "blood_type" & strata_level == "B") %>%
+ # dplyr::filter(estimate_name == "Survival probability") %>%
+ # dplyr::select(estimate) %>% dplyr::pull() - c(rep(2, 7), rep(0.667, 24), 0) < c(0.01)))
+ # expect_true(all(surv5 %>% dplyr::filter(strata_name == "blood_type" & strata_level == "B") %>%
+ # dplyr::filter(estimate_name == "Cumulative failure probability") %>%
+ # dplyr::select(estimate) %>% dplyr::pull() - c(rep(0, 6), rep(0.5, 25), 1) < c(0.01)))
+ # expect_true(all(surv5 %>% dplyr::filter(strata_name == "blood_type" & strata_level == "B") %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event"))
+ #
+ # expect_true(all(attributes(surv5)$events %>% dplyr::filter(strata_name == "blood_type" & strata_level == "B") %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == c(6, 31)))
+
+ CDMConnector::cdmDisconnect(cdm)
+
+ outcome_cohort <- dplyr::tibble(
+ cohort_definition_id = c(1, 1, 1, 1),
+ subject_id = c(1, 2, 3, 3),
+ cohort_start_date = c(
+ as.Date("2019-01-10"),
+ as.Date("2020-02-09"),
+ as.Date("2020-06-01"),
+ as.Date("2020-06-03")
+ ),
+ cohort_end_date = c(
+ as.Date("2019-01-10"),
+ as.Date("2020-02-09"),
+ as.Date("2020-06-01"),
+ as.Date("2020-06-03")
+ )
+ )
+
+ cdm <- PatientProfiles::mockPatientProfiles(
+ exposure_cohort = exposure_cohort,
+ cohort1 = outcome_cohort,
+ observation_period = observation_period
+ )
+
+ # Washout for outcome
+ surv6 <- estimateSurvival(cdm,
+ targetCohortTable = "exposure_cohort",
+ outcomeCohortTable = "cohort1",
+ minCellCount = 1
+ )
+
+# expect_true(all(surv6 %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:31)))
+ # expect_true(all(surv6 %>% dplyr::select(n_risk) %>% dplyr::pull() == c(rep(2, 7), rep(1, 25))))
+ # expect_true(all(surv6 %>%
+ # dplyr::filter(estimate_name == "Survival probability") %>%
+ # dplyr::select(estimate) %>% dplyr::pull() - c(rep(2, 7), rep(0.667, 24), 0) < c(0.01)))
+ # expect_true(all(surv6 %>%
+ # dplyr::filter(estimate_name == "Cumulative failure probability") %>%
+ # dplyr::select(estimate) %>% dplyr::pull() - c(rep(0, 6), rep(0.5, 25), 1) < c(0.01)))
+ # expect_true(all(surv6 %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event"))
+ #
+ # expect_true(all(attributes(surv6)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == c(6, 31)))
+ # expect_true(all(attributes(surv6)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %in% c(1, 7, 30, 365)))
+ # expect_true(all(attributes(surv6)$events %>% dplyr::group_by(timeGap) %>% dplyr::summarise(n = sum(n_events, na.rm = T)) %>% dplyr::select(n) %>% dplyr::pull() == c(2, 2, 2, 2)))
+
+ CDMConnector::cdmDisconnect(cdm)
+
+ outcome_cohort <- dplyr::tibble(
+ cohort_definition_id = c(1, 1, 1, 1),
+ subject_id = c(1, 2, 3, 3),
+ cohort_start_date = c(
+ as.Date("2020-01-10"),
+ as.Date("2020-02-09"),
+ as.Date("2020-06-01"),
+ as.Date("2020-06-03")
+ ),
+ cohort_end_date = c(
+ as.Date("2020-01-10"),
+ as.Date("2020-02-09"),
+ as.Date("2020-06-01"),
+ as.Date("2020-06-03")
+ )
+ )
+
+ cdm <- PatientProfiles::mockPatientProfiles(
+ exposure_cohort = exposure_cohort,
+ cohort1 = outcome_cohort,
+ observation_period = observation_period
+ )
+
+ # Censor on date
+ surv7 <- estimateSurvival(cdm,
+ targetCohortTable = "exposure_cohort",
+ outcomeCohortTable = "cohort1",
+ minCellCount = 1,
+ censorOnDate = as.Date("2020-02-01")
+ )
+
+# expect_true(all(surv7 %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:9)))
+ # expect_true(all(surv7 %>% dplyr::select(n_risk) %>% dplyr::pull() == c(rep(1, 10))))
+ # expect_true(all(surv7 %>%
+ # dplyr::filter(estimate_name == "Survival probability") %>%
+ # dplyr::select(estimate) %>% dplyr::pull() - c(rep(1, 9), 0) < c(0.01)))
+ # expect_true(all(surv7 %>%
+ # dplyr::filter(estimate_name == "Cumulative failure probability") %>%
+ # dplyr::select(estimate)%>% dplyr::pull() - c(rep(0, 9), 1) < c(0.01)))
+ # expect_true(all(surv7 %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event"))
+ #
+ # expect_true(all(attributes(surv7)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == c(9)))
+ # expect_true(all(attributes(surv7)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %in% c(1, 7, 30, 365)))
+ # expect_true(all(attributes(surv7)$events %>% dplyr::group_by(timeGap) %>% dplyr::summarise(n = sum(n_events, na.rm = T)) %>% dplyr::select(n) %>% dplyr::pull() == c(1, 1, 1, 1)))
+
+ CDMConnector::cdmDisconnect(cdm)
+
+ # try with selected time points
+ cdm <- PatientProfiles::mockPatientProfiles(
+ exposure_cohort = exposure_cohort,
+ cohort1 = outcome_cohort,
+ observation_period = observation_period
+ )
+ surv8 <- estimateSurvival(cdm,
+ targetCohortTable = "exposure_cohort",
+ outcomeCohortTable = "cohort1",
+ minCellCount = 1,
+ times = seq(1,1000, by = 100)
+ )
+
+# expect_true(all(surv8 %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == rep(seq(1,901,by = 100),2)))
+ # expect_true(all(compareNA(surv8 %>% dplyr::select(n_risk) %>% dplyr::pull(), rep(c(3, rep(NA, 9)),2))))
+ # expect_true(all(surv8 %>%
+ # dplyr::filter(estimate_name == "Survival probability") %>%
+ # dplyr::select(estimate) %>%
+ # dplyr::pull() == c(1,0,0,0,0,0,0,0,0,0)))
+ # expect_true(all(surv8 %>%
+ # dplyr::filter(estimate_name == "Cumulative failure probability") %>%
+ # dplyr::select(estimate) %>%
+ # dplyr::pull() == c(0,1,1,1,1,1,1,1,1,1)))
+ # expect_true(all(surv8 %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event"))
+ #
+ # expect_true(all(attributes(surv8)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == 101))
+ # expect_true(all(attributes(surv8)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %in% c(1, 7, 30, 365)))
+ # expect_true(all(attributes(surv8)$events %>% dplyr::group_by(timeGap) %>% dplyr::summarise(n = sum(n_events, na.rm = T)) %>% dplyr::select(n) %>%
+ # dplyr::pull() == c(3, 3, 3, 3)))
+
+ CDMConnector::cdmDisconnect(cdm)
+
+})
+
+test_that("different exposure cohort ids", {
+ cohort <- dplyr::tibble(
+ cohort_definition_id = c(1, 1, 2),
+ subject_id = c(1, 2, 3),
+ cohort_start_date = c(
+ as.Date("2020-01-01"),
+ as.Date("2020-01-02"),
+ as.Date("2020-01-01")
+ ),
+ cohort_end_date = c(
+ as.Date("2020-01-11"),
+ as.Date("2020-01-12"),
+ as.Date("2020-01-11")
+ )
+ )
+ outcome_cohort <- dplyr::tibble(
+ cohort_definition_id = c(1, 1, 1),
+ subject_id = c(1, 2, 3),
+ cohort_start_date = c(
+ as.Date("2020-01-10"),
+ as.Date("2020-01-03"),
+ as.Date("2020-01-09")
+ ),
+ cohort_end_date = c(
+ as.Date("2020-01-10"),
+ as.Date("2020-01-03"),
+ as.Date("2020-01-09")
+ )
+ )
+ observation_period <- dplyr::tibble(
+ observation_period_id = c(1, 1, 1),
+ person_id = c(1, 2, 3),
+ observation_period_start_date = c(
+ as.Date("2000-01-01"),
+ as.Date("2000-01-02"),
+ as.Date("2000-01-01")
+ ),
+ observation_period_end_date = c(
+ as.Date("2023-04-01"),
+ as.Date("2023-05-02"),
+ as.Date("2023-03-01")
+ )
+ )
+ cdm <- PatientProfiles::mockPatientProfiles(
+ cohort1 = cohort,
+ cohort2 = outcome_cohort,
+ observation_period = observation_period
+ )
+ surv8 <-
+ estimateSurvival(
+ cdm = cdm,
+ targetCohortTable = "cohort1",
+ targetCohortId = 1,
+ outcomeCohortTable = "cohort2",
+ outcomeCohortId = 1,
+ minCellCount = 1
+ )
+# expect_true(all(surv8 %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:9)))
+ # expect_true(all(surv8 %>% dplyr::select(n_risk) %>% dplyr::pull() == c(rep(2, 2), rep(1, 8))))
+ # expect_true(all(surv8 %>%
+ # dplyr::filter(estimate_name == "Survival probability") %>%
+ # dplyr::select(estimate) %>% dplyr::pull() - c(1, rep(0.5, 8), 0) < c(0.01)))
+ # expect_true(all(surv8 %>%
+ # dplyr::filter(estimate_name == "Cumulative failure probability") %>%
+ # dplyr::select(estimate) %>% dplyr::pull() - c(0, rep(0.5, 8), 1) < c(0.01)))
+ # expect_true(all(surv8 %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event"))
+ #
+ # expect_true(all(attributes(surv8)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == c(1, 9)))
+ # expect_true(all(attributes(surv8)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %in% c(1, 7, 30, 365)))
+ # expect_true(all(attributes(surv8)$events %>% dplyr::group_by(timeGap) %>% dplyr::summarise(n = sum(n_events, na.rm = T)) %>% dplyr::select(n) %>% dplyr::pull() == c(2, 2, 2, 2)))
+
+ surv9 <-
+ estimateSurvival(
+ cdm = cdm,
+ targetCohortTable = "cohort1",
+ targetCohortId = 2,
+ outcomeCohortTable = "cohort2",
+ outcomeCohortId = 1,
+ minCellCount = 1
+ )
+ # expect_true(all(surv9 %>% dplyr::select(time) %>% dplyr::pull() %>% unique() == c(0:8)))
+ # expect_true(all(surv9 %>% dplyr::select(n_risk) %>% dplyr::pull() == c(rep(1, 9))))
+ # expect_true(all(surv9 %>%
+ # dplyr::filter(estimate_name == "Survival probability") %>%
+ # dplyr::select(estimate) %>% dplyr::pull() - c(rep(1, 8), 0) < c(0.01)))
+ # expect_true(all(surv9 %>%
+ # dplyr::filter(estimate_name == "Cumulative failure probability") %>%
+ # dplyr::select(estimate) %>% dplyr::pull() - c(rep(0, 8), 1) < c(0.01)))
+ # expect_true(all(surv9 %>% dplyr::select(analysis_type) %>% dplyr::pull() == "Single event"))
+ #
+ # expect_true(all(attributes(surv9)$events %>% dplyr::filter(!is.na(n_events) & timeGap == 1) %>% dplyr::select(time) == c(8)))
+ # expect_true(all(attributes(surv9)$events %>% dplyr::select(timeGap) %>% dplyr::pull() %in% c(1, 7, 30, 365)))
+ # expect_true(all(attributes(surv9)$events %>% dplyr::group_by(timeGap) %>% dplyr::summarise(n = sum(n_events, na.rm = T)) %>% dplyr::select(n) %>% dplyr::pull() == c(1, 1, 1, 1)))
+
+ CDMConnector::cdm_disconnect(cdm)
+ })
+
+test_that("expected errors", {
+ cdm <- mockMGUS2cdm()
+
+ expect_error(estimateSurvival("cdm", targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression"))
+ expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosiss", outcomeCohortTable = "progression"))
+ expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "outcome"))
+ expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = c(1, 3)))
+ expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = 1, timeGap = -3))
+ expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = 1, timeGap = "time"))
+ expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = 1, timeGap = NULL))
+ expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = 1, strata = "age"))
+ expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = 1, strata = list("name" = "noname")))
+ expect_error(estimateSurvival(cdm, targetCohortTable = "mgus_diagnosis", outcomeCohortTable = "progression", outcomeCohortId = 1, censorOnDate = "2020-09-02"))
+
+ CDMConnector::cdmDisconnect(cdm)
+})
+
+test_that("return participants", {
+ cdm <- mockMGUS2cdm()
+ surv1 <- estimateSurvival(cdm,
+ targetCohortTable = "mgus_diagnosis",
+ outcomeCohortTable = "death_cohort",
+ returnParticipants = FALSE
+ )
+ expect_true(is.null(attr(surv1, "participants")))
+ surv2 <- estimateSurvival(cdm,
+ targetCohortTable = "mgus_diagnosis",
+ targetCohortId = 1,
+ outcomeCohortTable = "death_cohort",
+ outcomeCohortId = 1,
+ timeGap = 7, returnParticipants = TRUE
+ )
+ expect_false(is.null(attr(surv2, "participants")))
+
+ expect_equal(
+ colnames(survivalParticipants(surv2) %>%
+ head(1) %>%
+ dplyr::collect()),
+ c(
+ "cohort_definition_id",
+ "subject_id",
+ "cohort_start_date",
+ "cohort_end_date"
+ )
+ )
+
+ CDMConnector::cdmDisconnect(cdm)
+})
+
+test_that("within cohort survival", {
+ cohort <- dplyr::tibble(
+ cohort_definition_id = c(1, 1, 1),
+ subject_id = c(1, 2, 3),
+ cohort_start_date = c(
+ as.Date("2020-01-01"),
+ as.Date("2020-01-02"),
+ as.Date("2020-01-01")
+ ),
+ cohort_end_date = c(
+ as.Date("2020-04-01"),
+ as.Date("2020-08-02"),
+ as.Date("2021-03-01")
+ )
+ )
+ observation_period <- dplyr::tibble(
+ observation_period_id = c(1, 1, 1),
+ person_id = c(1, 2, 3),
+ observation_period_start_date = c(
+ as.Date("2000-01-01"),
+ as.Date("2000-01-02"),
+ as.Date("2000-01-01")
+ ),
+ observation_period_end_date = c(
+ as.Date("2023-04-01"),
+ as.Date("2023-05-02"),
+ as.Date("2023-03-01")
+ )
+ )
+ cdm <- PatientProfiles::mockPatientProfiles(
+ cohort1 = cohort,
+ observation_period = observation_period
+ )
+
+ surv <- estimateSurvival(cdm,
+ targetCohortTable = "cohort1",
+ targetCohortId = 1,
+ outcomeCohortTable = "cohort1",
+ outcomeCohortId = 1,
+ outcomeDateVariable = "cohort_end_date",
+ timeGap = 7,
+ minCellCount = 0
+ )
+# expect_true(sum(attr(surv, "events")$n_events) == 3)
+
+ CDMConnector::cdmDisconnect(cdm)
+})
diff --git a/tests/testthat/test-generateDeathCohort.R b/tests/testthat/test-generateDeathCohort.R
new file mode 100644
index 0000000..f492dc6
--- /dev/null
+++ b/tests/testthat/test-generateDeathCohort.R
@@ -0,0 +1,205 @@
+test_that("basic example", {
+
+ cdm <- PatientProfiles::mockPatientProfiles()
+ deathTable <- dplyr::tibble(
+ person_id = c(1,2,3),
+ death_date = c(as.Date("2020-01-01"),
+ as.Date("2020-01-02"),
+ as.Date("2020-01-01")))
+ DBI::dbWithTransaction(attr(cdm, "dbcon"), {
+ DBI::dbWriteTable(attr(cdm, "dbcon"), "death",
+ deathTable, overwrite = TRUE)
+ })
+ cdm$death <- dplyr::tbl(attr(cdm, "dbcon"), "death")
+
+
+ cdm <- generateDeathCohortSet(cdm=cdm)
+
+ expect_true(all(c("cohort_definition_id", "subject_id",
+ "cohort_start_date", "cohort_end_date") %in%
+ colnames(cdm$death_cohort)))
+
+ DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
+})
+
+test_that("first death record per person", {
+ # check that in the case of multiple death records per person
+ # only the first will be used
+ cdm <- PatientProfiles::mockPatientProfiles()
+ deathTable <- dplyr::tibble(
+ person_id = c(1,2,2),
+ death_date = c(as.Date("2020-01-01"),
+ as.Date("2020-01-02"),
+ as.Date("2020-01-31")))
+ DBI::dbWithTransaction(attr(cdm, "dbcon"), {
+ DBI::dbWriteTable(attr(cdm, "dbcon"), "death",
+ deathTable, overwrite = TRUE)
+ })
+ cdm$death <- dplyr::tbl(attr(cdm, "dbcon"), "death")
+
+ cdm <- generateDeathCohortSet(cdm=cdm)
+
+ expect_true(nrow(cdm$death_cohort %>%
+ dplyr::filter(subject_id == "2") %>%
+ dplyr::collect()) == 1)
+
+
+ expect_true(cdm$death_cohort %>%
+ dplyr::filter(subject_id == "2") %>%
+ dplyr::select(cohort_start_date) %>%
+ dplyr::pull() == as.Date("2020-01-02"))
+
+ DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
+})
+
+test_that("test death in observation criteria", {
+
+ observation_period <- tibble::tibble(
+ observation_period_id = c(1, 2),
+ person_id = c(1,2),
+ observation_period_start_date = c(
+ as.Date("2000-01-01"),
+ as.Date("2010-01-01")
+ ),
+ observation_period_end_date = c(
+ as.Date("2005-01-01"),
+ as.Date("2021-01-01")
+ )
+ )
+ cdm <- PatientProfiles::mockPatientProfiles(observation_period = observation_period)
+
+ deathTable <- dplyr::tibble(
+ person_id = c(1,2),
+ death_date = c(as.Date("2020-01-01"),
+ as.Date("2020-01-02")))
+ DBI::dbWithTransaction(attr(cdm, "dbcon"), {
+ DBI::dbWriteTable(attr(cdm, "dbcon"), "death",
+ deathTable, overwrite = TRUE)
+ })
+ cdm$death <- dplyr::tbl(attr(cdm, "dbcon"), "death")
+
+ cdm <- generateDeathCohortSet(cdm=cdm, deathInObservation = TRUE)
+
+ expect_true(nrow(cdm$death_cohort %>% dplyr::collect()) == 1)
+
+ expect_true(cdm$death_cohort %>%
+ dplyr::select(subject_id) %>%
+ dplyr::pull() == 2)
+
+ expect_true(cdm$death_cohort %>%
+ dplyr::select(cohort_start_date) %>%
+ dplyr::pull() == as.Date("2020-01-02"))
+
+ DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
+})
+
+test_that("test different cohort table name", {
+
+ cdm <- PatientProfiles::mockPatientProfiles()
+
+ deathTable <- dplyr::tibble(
+ person_id = c(1,2,3),
+ death_date = c(as.Date("2020-01-01"),
+ as.Date("2020-01-02"),
+ as.Date("2020-01-01")))
+ DBI::dbWithTransaction(attr(cdm, "dbcon"), {
+ DBI::dbWriteTable(attr(cdm, "dbcon"), "death",
+ deathTable, overwrite = TRUE)
+ })
+ cdm$death <- dplyr::tbl(attr(cdm, "dbcon"), "death")
+
+ cdm <- generateDeathCohortSet(cdm=cdm, name = "my_cohort_death")
+ expect_error(CDMConnector::assertTables(cdm, tables=c("death_cohort")))
+
+ expect_no_error(CDMConnector::assertTables(cdm, tables=c("my_cohort_death")))
+
+ expect_true(all(c("cohort_definition_id", "subject_id",
+ "cohort_start_date", "cohort_end_date") %in%
+ colnames(cdm$my_cohort_death)))
+
+ DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
+})
+
+test_that("test subsetting death table by a cohort table", {
+
+ cohort1 <- tibble::tibble(
+ cohort_definition_id = c(1,1,2),
+ subject_id = c(1,2,3),
+ cohort_start_date = as.Date(c("2012-02-01")),
+ cohort_end_date = as.Date(c("2013-02-01"))
+ )
+
+ cdm <- PatientProfiles::mockPatientProfiles(cohort1 = cohort1)
+
+ deathTable <- dplyr::tibble(
+ person_id = seq(1,5),
+ death_date = c(as.Date("2020-01-01")))
+
+ DBI::dbWithTransaction(attr(cdm, "dbcon"), {
+ DBI::dbWriteTable(attr(cdm, "dbcon"), "death",
+ deathTable, overwrite = TRUE)
+ })
+ cdm$death <- dplyr::tbl(attr(cdm, "dbcon"), "death")
+
+ cdm <- generateDeathCohortSet(cdm=cdm, cohortTable = "cohort1")
+
+ expect_true(nrow(cdm$death_cohort %>% dplyr::collect()) == 3)
+
+ expect_true(all(cdm$death_cohort %>%
+ dplyr::select(subject_id) %>%
+ dplyr::pull() %in% c(1,2,3)
+ ))
+# with cohortId
+ cdm <- PatientProfiles::mockPatientProfiles(cohort1 = cohort1)
+ DBI::dbWithTransaction(attr(cdm, "dbcon"), {
+ DBI::dbWriteTable(attr(cdm, "dbcon"), "death",
+ deathTable, overwrite = TRUE)
+ })
+ cdm$death <- dplyr::tbl(attr(cdm, "dbcon"), "death")
+
+ cdm <- generateDeathCohortSet(cdm=cdm, cohortTable = "cohort1", cohortId = 1)
+
+ expect_true(nrow(cdm$death_cohort %>% dplyr::collect()) == 2)
+
+ expect_true(all(cdm$death_cohort %>%
+ dplyr::select(subject_id) %>%
+ dplyr::pull() %in% c(1,2)
+ ))
+ DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
+
+})
+
+test_that("test expected errors", {
+
+ cohort1 <- tibble::tibble(
+ cohort_definition_id = c(1,1,1),
+ subject_id = c(1,2,3),
+ cohort_start_date = as.Date(c("2012-02-01")),
+ cohort_end_date = as.Date(c("2013-02-01"))
+ )
+
+ cdm <- PatientProfiles::mockPatientProfiles(cohort1 = cohort1)
+
+ # no death table in CDM
+ expect_error(cdm <- generateDeathCohortSet(cdm=cdm))
+
+ # cohortTable & cohortId
+ deathTable <- dplyr::tibble(
+ person_id = c(1,2,3),
+ death_date = c(as.Date("2020-01-01"),
+ as.Date("2020-01-02"),
+ as.Date("2020-01-01")))
+ DBI::dbWithTransaction(attr(cdm, "dbcon"), {
+ DBI::dbWriteTable(attr(cdm, "dbcon"), "death",
+ deathTable, overwrite = TRUE)
+ })
+ cdm$death <- dplyr::tbl(attr(cdm, "dbcon"), "death")
+
+ # cohortTable not exist
+ expect_error(cdm <- generateDeathCohortSet(cdm=cdm, cohortTable = "non_exist_cohort"))
+
+ # wrong cohortId input
+ expect_error(cdm <- generateDeathCohortSet(cdm=cdm, cohortTable = "cohort1", cohortId = "1"))
+
+ DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
+})
diff --git a/tests/testthat/test-mockMGUS2cdm.R b/tests/testthat/test-mockMGUS2cdm.R
new file mode 100644
index 0000000..0e65cb0
--- /dev/null
+++ b/tests/testthat/test-mockMGUS2cdm.R
@@ -0,0 +1,12 @@
+test_that("mock mgus2 as a cdm reference", {
+ cdm <- mockMGUS2cdm()
+
+ expect_true(cdm$person %>%
+ dplyr::tally() %>%
+ dplyr::pull("n") ==
+ survival::mgus2 %>%
+ dplyr::tally() %>%
+ dplyr::pull("n"))
+
+ CDMConnector::cdmDisconnect(cdm)
+})
diff --git a/tests/testthat/test-plotSurvival.R b/tests/testthat/test-plotSurvival.R
new file mode 100644
index 0000000..99f4bae
--- /dev/null
+++ b/tests/testthat/test-plotSurvival.R
@@ -0,0 +1,116 @@
+test_that("basic Survival plot", {
+
+ cdm <- mockMGUS2cdm()
+ surv <- estimateSurvival(cdm,
+ targetCohortTable = "mgus_diagnosis",
+ targetCohortId = 1,
+ outcomeCohortTable = "death_cohort",
+ outcomeCohortId = 1
+ )
+
+ plot <- plotSurvival(surv)
+ expect_true(ggplot2::is.ggplot(plot))
+
+ DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
+
+})
+
+test_that("plot facets", {
+
+ cdm <- mockMGUS2cdm()
+ surv <- estimateSurvival(cdm,
+ targetCohortTable = "mgus_diagnosis",
+ targetCohortId = 1,
+ outcomeCohortTable = "death_cohort",
+ outcomeCohortId = 1,
+ strata=list(c("sex", "age_group"))
+ )
+
+ plot <-plotSurvival(surv,
+ facet = "strata_level")
+ expect_true(ggplot2::is.ggplot(plot))
+
+ DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
+
+})
+
+
+test_that("plot colour", {
+
+ cdm <- mockMGUS2cdm()
+ surv <- estimateSurvival(cdm,
+ targetCohortTable = "mgus_diagnosis",
+ targetCohortId = 1,
+ outcomeCohortTable = "death_cohort",
+ outcomeCohortId = 1,
+ strata=list(c("sex", "age_group"))
+ )
+
+ plot <- plotSurvival(surv,
+ facet = "strata_level",
+ colour = "strata_level")
+
+ expect_true(ggplot2::is.ggplot(plot))
+ DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
+
+})
+
+test_that("basic cumulative incidence plot", {
+
+ cdm <- mockMGUS2cdm()
+ surv <- estimateSurvival(cdm,
+ targetCohortTable = "mgus_diagnosis",
+ targetCohortId = 1,
+ outcomeCohortTable = "death_cohort",
+ outcomeCohortId = 1
+ )
+
+ plot <- plotCumulativeIncidence(surv)
+ expect_true(ggplot2::is.ggplot(plot))
+
+ DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
+
+})
+
+test_that("plot facets for cumulative incidence plots", {
+
+ cdm <- mockMGUS2cdm()
+ surv <- estimateSurvival(cdm,
+ targetCohortTable = "mgus_diagnosis",
+ targetCohortId = 1,
+ outcomeCohortTable = "death_cohort",
+ outcomeCohortId = 1,
+ strata=list(c("sex", "age_group"))
+ )
+
+ plot <-plotCumulativeIncidence(surv,
+ facet = "strata_level")
+ expect_true(ggplot2::is.ggplot(plot))
+
+ DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
+
+})
+
+
+test_that("plot colour for cumulative incidence plots", {
+
+ cdm <- mockMGUS2cdm()
+ surv <- estimateSurvival(cdm,
+ targetCohortTable = "mgus_diagnosis",
+ targetCohortId = 1,
+ outcomeCohortTable = "death_cohort",
+ outcomeCohortId = 1,
+ strata=list(c("sex", "age_group"))
+ )
+
+ plot <- plotCumulativeIncidence(surv,
+ facet = "strata_level",
+ colour = "strata_level")
+
+ expect_true(ggplot2::is.ggplot(plot))
+
+
+ DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
+
+})
+
diff --git a/tests/testthat/test-summariseSurvivalParticipants.R b/tests/testthat/test-summariseSurvivalParticipants.R
new file mode 100644
index 0000000..9a3de2a
--- /dev/null
+++ b/tests/testthat/test-summariseSurvivalParticipants.R
@@ -0,0 +1,15 @@
+# test_that("basic table", {
+#
+# cdm <- mockMGUS2cdm()
+# surv <- estimateSurvival(cdm,
+# targetCohortTable = "mgus_diagnosis",
+# outcomeCohortTable = "death_cohort",
+# returnParticipants = TRUE
+# )
+# tableOne <- summariseCharacteristics(cohort = survivalParticipants(surv) %>%
+# CDMConnector::computeQuery() ,
+# cdm = cdm)
+#
+# expect_false(is.null(tableOne))
+# CDMConnector::cdm_disconnect(cdm)
+# })
diff --git a/vignettes/.gitignore b/vignettes/.gitignore
new file mode 100644
index 0000000..097b241
--- /dev/null
+++ b/vignettes/.gitignore
@@ -0,0 +1,2 @@
+*.html
+*.R
diff --git a/vignettes/a01_Single_event_of_interest.Rmd b/vignettes/a01_Single_event_of_interest.Rmd
new file mode 100644
index 0000000..3727d76
--- /dev/null
+++ b/vignettes/a01_Single_event_of_interest.Rmd
@@ -0,0 +1,122 @@
+---
+title: "Single outcome event of interest"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{a01_Single_event_of_interest}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+ collapse = TRUE, warning = FALSE, message = FALSE,
+ comment = "#>"
+)
+```
+
+
+## Set up
+Let's first load the packages required.
+```{r}
+library(CDMConnector)
+library(CohortSurvival)
+library(dplyr)
+library(ggplot2)
+```
+
+We'll create a cdm reference to use our example MGUS2 survival dataset. In practice you would use the CDMConnector package to connect to your data mapped to the OMOP CDM.
+```{r}
+cdm <- CohortSurvival::mockMGUS2cdm()
+```
+
+In this vignette we'll first estimate survival following a diagnosis of MGUS, with death our outcome of interest.
+
+We would typically need to define study cohorts ourselves, but in the case of our example data we already have these cohorts available. You can see for our diagnosis cohort we also have a number of additional features recorded for individuals which we'll use for stratification.
+
+```{r}
+cdm$mgus_diagnosis %>%
+ glimpse()
+
+cdm$death_cohort %>%
+ glimpse()
+```
+
+## Overall survival
+First, we can estimate survival for the cohort overall like so.
+```{r}
+MGUS_death <- estimateSingleEventSurvival(cdm,
+ targetCohortTable = "mgus_diagnosis",
+ outcomeCohortTable = "death_cohort"
+)
+MGUS_death %>%
+ glimpse()
+```
+
+As we can see above our results have been outputted in long format. We can plot these results like so.
+```{r}
+plotSurvival(MGUS_death)
+```
+
+Our returned results also have attributes containing information that summarises survival.
+```{r}
+MGUS_death %>% dplyr::filter(estimate_type == "Survival summary") %>%
+ tidyr::pivot_wider(names_from = "variable_type", values_from = "estimate") %>%
+ dplyr::mutate(
+ "Restricted mean survival (se)" = paste0(
+ round(restricted_mean),
+ " (", round(restricted_mean_std_error, 2), ")"
+ ),
+ "Median survival (95% CI)" = paste0(median_survival,
+ " (", median_survival_95CI_lower,
+ " to ", median_survival_95CI_higher, ")")
+ ) %>%
+ dplyr::select("Restricted mean survival (se)", "Median survival (95% CI)")
+```
+
+
+## With stratification
+To estimate survival for particular strata of interest we need these features to have been added to the target cohort table. Once we have them defined, and as seen above we already have a number of example characteristics added to our diagnosis cohort, we can add stratifications like so.
+```{r}
+MGUS_death <- estimateSingleEventSurvival(cdm,
+ targetCohortTable = "mgus_diagnosis",
+ outcomeCohortTable = "death_cohort",
+ strata = list(c("age_group"),
+ c("sex"),
+ c("age_group", "sex"))
+)
+```
+
+As we can see as well as results for each strata, we'll always also have overall results returned.
+```{r, fig.height=6, fig.width=8}
+plotSurvival(MGUS_death,
+ facet = "strata_name",
+ colour = "strata_level")
+```
+
+And we also now have summary statistics for each of the strata as well as overall.
+
+```{r}
+MGUS_death %>% dplyr::filter(estimate_type == "Survival summary") %>%
+ tidyr::pivot_wider(names_from = "variable_type", values_from = "estimate") %>%
+ dplyr::mutate("Restricted mean survival (se)" = paste0(round(restricted_mean), " (", round(restricted_mean_std_error, 2), ")"),
+ "Median survival (95% CI)" = paste0(median_survival, " (", median_survival_95CI_lower, " to ", median_survival_95CI_higher, ")")
+ ) %>%
+ dplyr::select(strata_name, strata_level,
+ "Restricted mean survival (se)", "Median survival (95% CI)")
+```
+
+## Summarising participants
+If we set returnParticipants as TRUE then we will also be able to access the individuals that contributed to the analysis.
+```{r}
+MGUS_death <- estimateSingleEventSurvival(cdm,
+ targetCohortTable = "mgus_diagnosis",
+ outcomeCohortTable = "death_cohort",
+ returnParticipants = TRUE
+)
+survivalParticipants(MGUS_death)
+```
+
+## Disconnect from the cdm database connection
+```{r}
+cdm_disconnect(cdm)
+```