Skip to content

Commit

Permalink
Merge pull request #95 from darwin-eu/develop
Browse files Browse the repository at this point in the history
v1.7.0
  • Loading branch information
edward-burn authored Aug 16, 2023
2 parents 797bb0f + 9930f18 commit d25f8f6
Show file tree
Hide file tree
Showing 17 changed files with 849 additions and 28 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: CodelistGenerator
Title: Generate Code Lists for the OMOP Common Data Model
Version: 1.6.0
Version: 1.7.0
Authors@R: c(
person("Edward", "Burn", email = "[email protected]",
role = c("aut", "cre"),
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# CodelistGenerator 1.7.0
* Added function codesFromCohort.

# CodelistGenerator 1.6.0
* Improved getICD10StandardCodes function.
* Added function codesFromConceptSet.
Expand Down
53 changes: 46 additions & 7 deletions R/codesFromConceptSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,18 @@ codesFromConceptSet <- function(path, cdm, withConceptDetails = FALSE) {

#' Get concept ids from a provided path to cohort json files
#'
#' @param path Path to a file or folder containing JSONs of concept sets
#' @param path Path to a file or folder containing JSONs of cohort definitions
#' @param cdm A cdm reference created with CDMConnector
#' @param withConceptDetails If FALSE a vector of concept IDs will be returned
#' for each concept set. If TRUE a tibble will be returned with additional
#' information on the identified concepts.
#'
#' @return Named list with concept_ids for each concept set
#' @export
#'
codesFromCohort <- function(path, cdm) {
codesFromCohort <- function(path, cdm, withConceptDetails = FALSE) {
# initial checks
#checkInput(path = path, cdm = cdm)
checkInputs(path = path, cdm = cdm)

# list jsons
files <- listJsonFromPath(path)
Expand All @@ -112,6 +115,11 @@ codesFromCohort <- function(path, cdm) {
# split into list
codelist <- tibbleToList(codelistTibble)

if(isTRUE(withConceptDetails)){
codelist <- addDetails(conceptList = codelist,
cdm = cdm)
}

# return
return(codelist)
}
Expand Down Expand Up @@ -146,7 +154,12 @@ extractCodes <- function(file, unknown) {
conceptId <- NULL
includeDescendants <- NULL
isExcluded <- NULL

for (j in seq_along(concepts)) {
if(!is.null(concepts[[j]][["includeMapped"]])){
cli::cli_abort(
glue::glue("Mapped as TRUE not supported (found in {name})"))
}
conceptId <- c(conceptId, concepts[[j]][["concept"]][["CONCEPT_ID"]])
exc <- concepts[[j]][["isExcluded"]]
isExcluded <- c(
Expand All @@ -161,7 +174,8 @@ extractCodes <- function(file, unknown) {
dplyr::union_all(dplyr::tibble(
codelist_name = name, concept_id = conceptId,
include_descendants = includeDescendants, is_excluded = isExcluded
))
) %>%
dplyr::mutate(filename = file))
}
return(codelistTibble)
}
Expand Down Expand Up @@ -198,15 +212,40 @@ excludeCodes <- function(codelistTibble) {
}

tibbleToList <- function(codelistTibble) {
nam <- unique(codelistTibble$codelist_name)

codelistTibble <- codelistTibble %>%
dplyr::mutate(nam = paste0(.data$codelist_name, "; ",
.data$filename))

nam <- unique(codelistTibble$nam)
codelist <- lapply(nam, function(x) {
codelistTibble %>%
dplyr::filter(.data$codelist_name == .env$x) %>%
dplyr::filter(.data$nam == .env$x) %>%
dplyr::pull("concept_id") %>%
unique()
})
names(codelist) <- nam
return(codelist)


# check if we have any concept sets with the same name but different definitions
# keep first for each name
cs_names <- stringr::str_extract(names(codelist), "^[^;]*")
cs_names_unique <- unique(cs_names)

codelist_dedup <- list()

for(i in seq_along(cs_names_unique)){
same_name_cs <- codelist[which(cs_names_unique[i] == cs_names)]
check_consistent <- all(sapply(same_name_cs, identical, same_name_cs[[1]]))

if(isFALSE(check_consistent)){
cli::cli_abort(message = "Different definitions for concept set {cs_names_unique[i]} found")
}
# keep first
codelist_dedup[[cs_names_unique[i]]] <- same_name_cs[[1]]
}

return(codelist_dedup)
}

addDetails <- function(conceptList, cdm){
Expand Down
19 changes: 10 additions & 9 deletions R/drugCodes.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,14 @@ getATCCodes <- function(cdm,
checkmate::assertTRUE(atcCheck, add = errorMessage)
checkmate::reportAssertions(collection = errorMessage)

# to avoid potential memory problems will batch
if (nrow(atc_groups) > 0) {
atc_descendants <- fetchBatchedDescendants(cdm = cdm,
codes = atc_groups$concept_id,
batchSize = 500,
doseForm = doseForm)
}

atc_descendants <- getDescendants(
cdm = cdm,
conceptId = atc_groups$concept_id,
withAncestor = TRUE,
doseForm = doseForm
)
if (nrow(atc_descendants) > 0) {
atc_descendants <- atc_descendants %>%
dplyr::select("concept_id", "concept_name",
Expand Down Expand Up @@ -207,7 +208,7 @@ getDrugIngredientCodes <- function(cdm,

# to avoid potential memory problems will batch
if (nrow(ingredientConcepts) > 0) {
ingredientCodes <- fetchBatchedDrugIngredientCodes(cdm,
ingredientCodes <- fetchBatchedDescendants(cdm,
codes = ingredientConcepts$concept_id,
batchSize = 500,
doseForm = doseForm
Expand Down Expand Up @@ -267,7 +268,7 @@ getDrugIngredientCodes <- function(cdm,
return(ingredientCodes)
}

fetchBatchedDrugIngredientCodes <- function(cdm, codes, batchSize, doseForm) {
fetchBatchedDescendants <- function(cdm, codes, batchSize, doseForm) {
codeBatches <- split(
codes,
ceiling(seq_along(codes) / batchSize)
Expand All @@ -277,7 +278,7 @@ fetchBatchedDrugIngredientCodes <- function(cdm, codes, batchSize, doseForm) {

cli::cli_progress_bar(
total = length(descendants),
format = " -- getting descendants {cli::pb_bar} {cli::pb_current} of {cli::pb_total} ingredient groups"
format = " -- getting descendants {cli::pb_bar} {cli::pb_current} of {cli::pb_total} batched groups"
)
for (i in seq_along(descendants)) {
cli::cli_progress_update()
Expand Down
4 changes: 2 additions & 2 deletions R/mockVocabRef.R
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ mockVocabRef <- function(backend = "database") {
CDMConnector::stow(cdm, dOut)

if (backend == "arrow") {
if(utils::packageVersion("CDMConnector")<"1.10"){
if(utils::packageVersion("CDMConnector")<"1.1.0"){
cdmArrow <- CDMConnector::cdm_from_files(
path = dOut,
as_data_frame = FALSE
Expand All @@ -338,7 +338,7 @@ mockVocabRef <- function(backend = "database") {
}

if (backend == "data_frame") {
if(utils::packageVersion("CDMConnector")<"1.10"){
if(utils::packageVersion("CDMConnector")<"1.1.0"){
cdmDF <- CDMConnector::cdm_from_files(
path = dOut,
as_data_frame = TRUE
Expand Down
6 changes: 0 additions & 6 deletions R/summariseCodeUse.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,6 @@ summariseCodeUse <- function(x,
.data$estimate_suppressed == "TRUE",
NA, .data$estimate))

if(!"record" %in% countBy){
codeCounts <- codeCounts %>%
dplyr::mutate(concept_name= NA,
concept_id =NA)
}

codeCounts <- codeCounts %>%
dplyr::mutate(group_level = dplyr::if_else(.data$group_name == "By concept",
paste0(.data$concept_name, " (",
Expand Down
88 changes: 88 additions & 0 deletions inst/cohorts/cohorts.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
{
"ConceptSets": [
{
"id": 0,
"name": "acetaminophen",
"expression": {
"items": [
{
"concept": {
"CONCEPT_CLASS_ID": "Ingredient",
"CONCEPT_CODE": "161",
"CONCEPT_ID": 1125315,
"CONCEPT_NAME": "acetaminophen",
"DOMAIN_ID": "Drug",
"INVALID_REASON": "V",
"INVALID_REASON_CAPTION": "Valid",
"STANDARD_CONCEPT": "S",
"STANDARD_CONCEPT_CAPTION": "Standard",
"VOCABULARY_ID": "RxNorm",
"VALID_START_DATE": "1970-01-01",
"VALID_END_DATE": "2099-12-31"
},
"includeDescendants": true
}
]
}
},
{
"id": 1,
"name": "influenza",
"expression": {
"items": [
{
"concept": {
"CONCEPT_CLASS_ID": "Clinical Finding",
"CONCEPT_CODE": "6142004",
"CONCEPT_ID": 4266367,
"CONCEPT_NAME": "Influenza",
"DOMAIN_ID": "Condition",
"INVALID_REASON": "V",
"INVALID_REASON_CAPTION": "Valid",
"STANDARD_CONCEPT": "S",
"STANDARD_CONCEPT_CAPTION": "Standard",
"VOCABULARY_ID": "SNOMED",
"VALID_START_DATE": "2002-01-31",
"VALID_END_DATE": "2099-12-31"
}
}
]
}
}
],
"PrimaryCriteria": {
"CriteriaList": [
{
"ConditionOccurrence": {
"CodesetId": 1
}
},
{
"DrugExposure": {
"CodesetId": 0
}
}
],
"ObservationWindow": {
"PriorDays": 0,
"PostDays": 0
},
"PrimaryCriteriaLimit": {
"Type": "First"
}
},
"QualifiedLimit": {
"Type": "First"
},
"ExpressionLimit": {
"Type": "First"
},
"InclusionRules": [],
"CensoringCriteria": [],
"CollapseSettings": {
"CollapseType": "ERA",
"EraPad": 0
},
"CensorWindow": {},
"cdmVersionRange": ">=5.0.0"
}
Empty file added inst/cohorts/extra_file.txt
Empty file.
89 changes: 89 additions & 0 deletions inst/cohorts_for_mock/oa_desc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
{
"ConceptSets": [
{
"id": 0,
"name": "OA",
"expression": {
"items": [
{
"concept": {
"CONCEPT_CLASS_ID": "Clinical Finding",
"CONCEPT_CODE": "396275006",
"CONCEPT_ID": 3,
"CONCEPT_NAME": "Osteoarthritis",
"DOMAIN_ID": "Condition",
"INVALID_REASON": "V",
"INVALID_REASON_CAPTION": "Valid",
"STANDARD_CONCEPT": "S",
"STANDARD_CONCEPT_CAPTION": "Standard",
"VOCABULARY_ID": "SNOMED"
},
"includeDescendants": true
}
]
}
},
{
"id": 1,
"name": "Other",
"expression": {
"items": [
{
"concept": {
"CONCEPT_CLASS_ID": "Clinical Finding",
"CONCEPT_CODE": "422504002",
"CONCEPT_ID": 5,
"CONCEPT_NAME": "Ischemic stroke",
"DOMAIN_ID": "Condition",
"INVALID_REASON": "V",
"INVALID_REASON_CAPTION": "Valid",
"STANDARD_CONCEPT": "S",
"STANDARD_CONCEPT_CAPTION": "Standard",
"VOCABULARY_ID": "SNOMED"
}
}
]
}
}
],
"PrimaryCriteria": {
"CriteriaList": [
{
"ConditionOccurrence": {
"CodesetId": 0
}
},
{
"Observation": {
"CodesetId": 0
}
},
{
"ConditionOccurrence": {
"CodesetId": 1
}
}
],
"ObservationWindow": {
"PriorDays": 0,
"PostDays": 0
},
"PrimaryCriteriaLimit": {
"Type": "First"
}
},
"QualifiedLimit": {
"Type": "First"
},
"ExpressionLimit": {
"Type": "First"
},
"InclusionRules": [],
"CensoringCriteria": [],
"CollapseSettings": {
"CollapseType": "ERA",
"EraPad": 0
},
"CensorWindow": {},
"cdmVersionRange": ">=5.0.0"
}
Loading

0 comments on commit d25f8f6

Please sign in to comment.