Skip to content

Commit

Permalink
Merge pull request #191 from darwin-eu/orphan_domains
Browse files Browse the repository at this point in the history
orphan_domains
  • Loading branch information
edward-burn authored Jul 9, 2024
2 parents 9135efe + 0f13d92 commit 5c6f5b4
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 155 deletions.
169 changes: 15 additions & 154 deletions R/summariseOrphanCodes.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#'
#' @param x A codelist for which to find related codes used in the database
#' @param cdm cdm_reference via CDMConnector
#' @param domain The domains to restrict results too. Only concepts from these
#' domains will be returned.
#'
#' @return A summarised result containg the frequency of codes related
#' to (but not in) the codelist
Expand All @@ -22,7 +24,14 @@
#' CDMConnector::cdmDisconnect(cdm)
#' }
summariseOrphanCodes <- function(x,
cdm){
cdm,
domain = c("condition",
"device",
"drug",
"measurement",
"observation",
"procedure",
"visit")){

if(isFALSE(inherits(cdm, "cdm_reference"))){
cli::cli_abort("cdm is not a cdm reference but is {class(cdm)}")
Expand Down Expand Up @@ -119,161 +128,13 @@ summariseOrphanCodes <- function(x,
attr(orphanCodes, "settings")$result_type <- "orphan_code_use"
}

if(nrow(orphanCodes) >= 1){
orphanCodes <- orphanCodes |>
dplyr::filter(.data$strata_level %in% .env$domain)
}

orphanCodes

}


# summariseOrphanCodes <- function(x,
# cdm,
# domains = "Condition",
# standardConcept = "Standard",
# searchInSynonyms = TRUE,
# searchNonStandard = TRUE,
# includeDescendants = TRUE,
# includeAncestor = TRUE,
# minCellCount = lifecycle::deprecated()) {
#
# if (lifecycle::is_present(minCellCount)) {
# lifecycle::deprecate_warn("2.3.0", "summariseOrphanCodes()", with = "omopgenerics::suppress()")
# }
#
# errorMessage <- checkmate::makeAssertCollection()
# checkDbType(cdm = cdm, type = "cdm_reference", messageStore = errorMessage)
# checkmate::assertVector(domains, add = errorMessage)
# checkmate::assertVector(standardConcept, add = errorMessage)
# standardConceptCheck <- all(tolower(standardConcept) %in%
# c(
# "standard",
# "classification",
# "non-standard"
# ))
# if (!isTRUE(standardConceptCheck)) {
# errorMessage$push(
# "- standardConcept must be from Standard, Non-standard, or Classification"
# )
# }
# checkmate::assertTRUE(standardConceptCheck, add = errorMessage)
# checkmate::assert_logical(searchInSynonyms, add = errorMessage)
# checkmate::assert_logical(searchNonStandard, add = errorMessage)
# checkmate::assert_logical(includeDescendants, add = errorMessage)
# checkmate::assert_logical(includeAncestor, add = errorMessage)
# checkmate::reportAssertions(collection = errorMessage)
#
# checkmate::assertList(x)
# if(length(names(x)) != length(x)){
# cli::cli_abort("Must be a named list")
# }
#
#
# x <- addDetails(cdm = cdm, conceptList = x)
#
# orphanConcepts <- list()
# # rerun search
# for (i in seq_along(x)) {
# cli::cli_inform("Searching for orphan codes for {names(x)[i]}")
#
# suppressMessages(
# candidateCodes <- getCandidateCodes(
# cdm = cdm,
# keywords = x[[i]]$concept_name,
# domains = domains,
# standardConcept = standardConcept,
# searchInSynonyms = searchInSynonyms,
# searchNonStandard = searchNonStandard,
# includeDescendants = includeDescendants,
# includeAncestor = includeAncestor))
#
# # Exclude codes that are in the original set of codes
# candidateCodes <- candidateCodes %>%
# dplyr::anti_join(
# x[[i]] %>% dplyr::select("concept_id"),
# by = "concept_id"
# )
#
# # Use achilles counts to summarise code use
# if ("achilles_results" %in% names(cdm)) {
# cli::cli_inform("Using achilles results to restict to codes that appear in the cdm reference")
# orphanConcepts[[i]] <- summariseAchillesCodeUse(
# x = list("cs" = candidateCodes$concept_id),
# cdm = cdm
# )
# if (nrow(orphanConcepts[[i]]) > 0) {
# # transform to orphan codes result format
# orphanConcepts[[i]] <- orphanConcepts[[i]] %>%
# dplyr::left_join(
# candidateCodes %>%
# dplyr::mutate("variable_level" = as.character(.data$concept_id)) %>%
# dplyr::select("variable_level", "found_from"),
# by = "variable_level"
# ) %>%
# dplyr::mutate(
# additional_name = paste0(.data$additional_name, " &&& relationship_id"),
# additional_level = paste0(.data$additional_level, " &&& ", .data$found_from)
# ) |>
# dplyr::select(!"found_from")
# }
# } else {
# cli::cli_inform("Achilles tables not found in cdm reference - querying cdm directly for code counts")
# orphanConcepts[[i]] <- summariseCodeUse(
# x = list("cs" = candidateCodes$concept_id),
# cdm = cdm,
# countBy = "record"
# )
# if (nrow(orphanConcepts[[i]]) > 0) {
# # transform to orphan codes result format
# orphanConcepts[[i]] <- orphanConcepts[[i]] %>%
# dplyr::filter(.data$variable_name != "overall") %>%
# visOmopResults::splitAdditional() %>%
# dplyr::select(!c("strata_name", "strata_level", "source_concept_name", "source_concept_id")) %>%
# dplyr::left_join(
# candidateCodes %>%
# dplyr::mutate("variable_level" = as.character(.data$concept_id)) %>%
# dplyr::select(
# "variable_level", "vocabulary_id", "standard_concept", "relationship_id" = "found_from"
# ) %>%
# dplyr::mutate(
# standard_concept = dplyr::case_when(
# standard_concept == "S" ~ "standard",
# standard_concept == "C" ~ "classification",
# is.na(standard_concept) ~ "non-standard"
# )
# ),
# by = "variable_level"
# ) %>%
# visOmopResults::uniteAdditional(cols = c("standard_concept", "vocabulary_id", "relationship_id")) %>%
# visOmopResults::uniteStrata(cols = "domain_id")
# }
# }
#
# if (nrow(orphanConcepts[[i]]) == 0) {
# cli::cli_inform("-- No orphan codes found for codelist {names(x)[i]}")
# orphanConcepts[[i]] <- omopgenerics::emptySummarisedResult()
# }
# }
#
# orphanConcepts <- dplyr::bind_rows(orphanConcepts) |>
# dplyr::as_tibble()
# attr(orphanConcepts, "settings") <- NULL
#
# orphanConcepts <- orphanConcepts %>%
# dplyr::mutate(result_id = 1L) %>%
# omopgenerics::newSummarisedResult(
# settings = dplyr::tibble(
# result_id = 1L,
# result_type = "orphan_codes",
# package_name = "CodelistGenerator",
# package_version = as.character(utils::packageVersion(
# pkg = "CodelistGenerator")),
# search_domains = paste0(domains, collapse = " &&& "), # "search" added not to mistake with code domain in summarised result
# search_standard_concept = paste0(standardConcept, collapse = " &&& "), # "search" added not to mistake with strata_concept in summarised result
# search_in_synonyms = searchInSynonyms,
# search_non_standard = searchNonStandard,
# include_descendants = includeDescendants,
# include_ancestor = includeAncestor
# )
# )
#
# return(orphanConcepts)
# }
#
10 changes: 9 additions & 1 deletion man/summariseOrphanCodes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions tests/testthat/test-dbms.R
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,13 @@ test_that("postgres", {
# check orphan code use performance
expect_no_error(summariseOrphanCodes(list("asthma"=317009), cdm))

# limit orphan codes to a domain
# we won't have any orphan codes from drug
asthma_oc <- summariseOrphanCodes(list("asthma"= c(317009)),
cdm,
domain = "drug")
expect_true(nrow(asthma_oc) == 0)

codes <- getDrugIngredientCodes(cdm, "metformin")
codes[["asthma"]] <- 317009

Expand Down

0 comments on commit 5c6f5b4

Please sign in to comment.