Skip to content

Commit

Permalink
Merge pull request #194 from darwin-eu/phoebe
Browse files Browse the repository at this point in the history
use concept_recommended from phoebe when available
  • Loading branch information
edward-burn authored Jul 9, 2024
2 parents 5c6f5b4 + 1d1c3df commit e36e1e7
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 22 deletions.
6 changes: 3 additions & 3 deletions R/mockVocabRef.R
Original file line number Diff line number Diff line change
Expand Up @@ -304,13 +304,13 @@ mockVocabRef <- function(backend = "data_frame") {
stratum_5_name = NA,
is_default = NA,
category = NA )
achillesResults <- dplyr::tibble(analysis_id = c(401, 401),
stratum_1 = c(4, 5),
achillesResults <- dplyr::tibble(analysis_id = c(401, 401, 401),
stratum_1 = c(4, 5, 9),
stratum_2 = NA,
stratum_3 = NA,
stratum_4 = NA,
stratum_5 = NA,
count_value = c(400, 200))
count_value = c(400, 200, 100))
achillesResultsDist <- dplyr::tibble(analysis_id = 1,
stratum_1 = NA,
stratum_2 = NA,
Expand Down
67 changes: 49 additions & 18 deletions R/summariseOrphanCodes.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ summariseOrphanCodes <- function(x,
if(isFALSE(inherits(cdm, "cdm_reference"))){
cli::cli_abort("cdm is not a cdm reference but is {class(cdm)}")
}
if(isFALSE(inherits(x, "codelist")) && isFALSE(is.list(x))){
cli::cli_abort("x is not a codelist but is {class(cdm)}")
}

x <- omopgenerics::newCodelist(x)

# will only return codes that are used in the database
Expand All @@ -56,7 +60,16 @@ summariseOrphanCodes <- function(x,
dplyr::inner_join(codesUsed,
by = c("concept_id_2"="concept_id"))


if("concept_recommended" %in% names(cdm)){
phoebe <- TRUE
phoebeUsed <- cdm$concept_recommended |>
dplyr::inner_join(codesUsed,
by = c("concept_id_2"="concept_id"))
} else {
phoebe <- FALSE
cli::cli_inform(c("PHOEBE results not available",
"i" = "The concept_recommened table is not present in the cdm."))
}

orphanCodes <- list()
tableCodelist <- paste0(omopgenerics::uniqueTableName(),
Expand All @@ -75,40 +88,58 @@ summariseOrphanCodes <- function(x,
dplyr::inner_join(descendantsUsed,
by = c("concept_id" = "ancestor_concept_id")) |>
dplyr::select("concept_id" = "descendant_concept_id") |>
dplyr::distinct()
dplyr::filter(!is.na(.data$concept_id)) |>
dplyr::distinct() |>
dplyr::pull("concept_id")

# get ancestors used in db
orphanAncestors <- cdm[[tableCodelist]] |>
dplyr::left_join(ancestorsUsed,
by = c("concept_id" = "descendant_concept_id")) |>
dplyr::select("concept_id" = "ancestor_concept_id") |>
dplyr::distinct()
dplyr::select("concept_id" = "ancestor_concept_id") |>
dplyr::filter(!is.na(.data$concept_id)) |>
dplyr::distinct() |>
dplyr::pull("concept_id")

# get relationship 1
orphanRelationship1 <- cdm[[tableCodelist]] |>
dplyr::left_join(relationshipUsed1,
by = c("concept_id" = "concept_id_2")) |>
dplyr::select("concept_id" = "concept_id_1",
"relationship_id") |>
dplyr::distinct()
dplyr::select("concept_id" = "concept_id_1") |>
dplyr::filter(!is.na(.data$concept_id)) |>
dplyr::distinct() |>
dplyr::pull("concept_id")

# get relationship 2
orphanRelationship2 <- cdm[[tableCodelist]] |>
dplyr::left_join(relationshipUsed1,
by = c("concept_id" = "concept_id_1")) |>
dplyr::select("concept_id" = "concept_id_2") |>
dplyr::distinct()

orphanCodes[[names(x)[i]]] <- dplyr::union_all(orphanDescendants,
orphanAncestors,
orphanRelationship1,
orphanRelationship2) |>
dplyr::distinct() |>
dplyr::anti_join(cdm[[tableCodelist]],
by = "concept_id") |>
dplyr::select("concept_id" = "concept_id_2") |>
dplyr::filter(!is.na(.data$concept_id)) |>
dplyr::distinct() |>
dplyr::pull("concept_id")

orphanCodes[[names(x)[i]]] <- orphanCodes[[names(x)[i]]][!is.na(orphanCodes[[names(x)[i]]])]
orphanCodes[[names(x)[i]]] <- c(orphanDescendants,
orphanAncestors,
orphanRelationship1,
orphanRelationship2)


if(isTRUE(phoebe)){
phoebeCodes <- cdm[[tableCodelist]] |>
dplyr::left_join(phoebeUsed,
by = c("concept_id" = "concept_id_1")) |>
dplyr::select("concept_id" = "concept_id_2") |>
dplyr::filter(!is.na(.data$concept_id)) |>
dplyr::distinct() |>
dplyr::pull("concept_id")

orphanCodes[[names(x)[i]]] <- c(orphanCodes[[names(x)[i]]],
phoebeCodes)
}

# make sure we don't have any of the original codes
orphanCodes[[names(x)[i]]] <- setdiff(orphanCodes[[names(x)[i]]], x[[i]])
}

orphanCodes <- orphanCodes |> vctrs::list_drop_empty()
Expand Down
8 changes: 7 additions & 1 deletion tests/testthat/test-dbms.R
Original file line number Diff line number Diff line change
Expand Up @@ -305,16 +305,22 @@ test_that("postgres", {
achilles_schema = Sys.getenv("CDM5_POSTGRESQL_CDM_SCHEMA")
)

# check orphan code use performance
# check orphan code use
expect_no_error(summariseOrphanCodes(list("asthma"=317009), cdm))

asthma <- getCandidateCodes(cdm, "asthma", domains = c("condition",
"observation"))
asthma_orphan <- summariseOrphanCodes(list(asthma = asthma$concept_id),
cdm)

# limit orphan codes to a domain
# we won't have any orphan codes from drug
asthma_oc <- summariseOrphanCodes(list("asthma"= c(317009)),
cdm,
domain = "drug")
expect_true(nrow(asthma_oc) == 0)


codes <- getDrugIngredientCodes(cdm, "metformin")
codes[["asthma"]] <- 317009

Expand Down
25 changes: 25 additions & 0 deletions tests/testthat/test-summariseOrphanCodes.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,31 @@ test_that("tests with mock db", {
settings <- omopgenerics::settings(orphan_codes)
expect_true(all(settings$result_type == "orphan_code_use"))

# with phoebe present
cdm <- omopgenerics::insertTable(cdm,
name = "concept_recommended",
table = data.frame(concept_id_1 = 1,
concept_id_2 = 9,
relationship_id = "from phoebe"),
overwrite = TRUE,
temporary = FALSE)

orphan_codes <- summariseOrphanCodes(x = list("msk" = codes$concept_id),
cdm = cdm)
expect_true(9 %in% orphan_codes$variable_level)


cdm <- omopgenerics::insertTable(cdm,
name = "concept_recommended",
table = data.frame(concept_id_1 = 8,
concept_id_2 = 9,
relationship_id = "from phoebe"),
overwrite = TRUE,
temporary = FALSE)
orphan_codes <- summariseOrphanCodes(x = list("msk" = codes$concept_id),
cdm = cdm)
expect_true(!9 %in% orphan_codes$variable_level)

#expected error
expect_error(summariseOrphanCodes(x = "a", cdm = cdm))
expect_error(summariseOrphanCodes(x = list("msk" = codes$concept_id),
Expand Down

0 comments on commit e36e1e7

Please sign in to comment.