Skip to content

Commit

Permalink
Merge pull request #8 from thehyve/small_updates
Browse files Browse the repository at this point in the history
Small updates
  • Loading branch information
guuswilmink authored Aug 9, 2024
2 parents 401cba4 + e72ea42 commit 706d0dc
Show file tree
Hide file tree
Showing 4 changed files with 316 additions and 45 deletions.
36 changes: 29 additions & 7 deletions R/countOccurrences.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,26 +43,48 @@ countOccurrences <- function(v, tables, links, db_connection, cdm_schema, vocab_

# Combined SQL query for direct and descendant counts
combined_sql <- sprintf(
# WITH clause to define common table expressions (CTEs)
"WITH direct_counts AS (
-- Select the concept_id and count the distinct persons and total records for each concept_id
SELECT %s AS concept_id, COUNT(DISTINCT person_id) AS count_persons, COUNT(*) AS count_records
-- From the cdm schema and the specified table
FROM %s.%s
-- Where the concept_id is in the provided vector
WHERE %s IN (%s)
-- Group by the concept_id
GROUP BY %s
), descendant_counts AS (
SELECT b.ancestor_concept_id AS concept_id, COUNT(DISTINCT a.person_id) AS descendant_count_person, COUNT(*) AS descendant_count_record
-- Define the second expression for descendants
), desc_counts AS (
-- Select the ancestor_concept_id and count the distinct persons and total records for each concept_id
SELECT b.ancestor_concept_id AS concept_id, COUNT(DISTINCT a.person_id) AS desc_count_person, COUNT(*) AS desc_count_record
-- From the vocab schema and concept ancestor table (vocab schema must contain concept_ancestor table; this can be the same schema as the cdm)
FROM %s.%s a
-- Join the concept_ancestor table to get the ancestor_concept_id
JOIN %s.concept_ancestor b ON a.%s = b.descendant_concept_id
-- Where the descendant_concept_id is in the provided vector
WHERE b.ancestor_concept_id IN (%s)
-- Group by the ancestor_concept_id
GROUP BY b.ancestor_concept_id
)
SELECT coalesce(d.concept_id, dc.concept_id) AS concept_id, coalesce(count_persons, 0) AS count_persons, coalesce(count_records, 0) AS count_records, coalesce(descendant_count_person, 0) AS descendant_count_person, coalesce(descendant_count_record, 0) AS descendant_count_record
-- Combine the direct and descendant counts into one result set
SELECT coalesce(d.concept_id, dc.concept_id) AS concept_id, coalesce(count_persons, 0) AS count_persons, coalesce(count_records, 0) AS count_records, coalesce(desc_count_person, 0) AS desc_count_person, coalesce(desc_count_record, 0) AS desc_count_record
FROM direct_counts d
FULL OUTER JOIN descendant_counts dc ON d.concept_id = dc.concept_id",
FULL OUTER JOIN desc_counts dc ON d.concept_id = dc.concept_id",
concept_id_field, cdm_schema, table, concept_id_field, paste(v, collapse = ","), concept_id_field,
cdm_schema, table, vocab_schema, concept_id_field, paste(v, collapse = ",")
)

combined_res <- dbGetQuery(db_connection, combined_sql)

not_in_data <- v[!(v %in% combined_res$concept_id)]
combined_res <- combined_res |>
bind_rows(tibble(
concept_id = not_in_data,
count_persons = 0,
count_records = 0,
desc_count_person = 0,
desc_count_record = 0
))

# Append results
results[[table]] <- combined_res
Expand All @@ -74,12 +96,12 @@ countOccurrences <- function(v, tables, links, db_connection, cdm_schema, vocab_
summarise(
count_persons = sum(count_persons),
count_records = sum(count_records),
descendant_count_person = sum(descendant_count_person),
descendant_count_record = sum(descendant_count_record)
desc_count_person = sum(desc_count_person),
desc_count_record = sum(desc_count_record)
) %>%
ungroup() %>%
mutate(concept_name = names(v)[match(concept_id, v)]) %>%
arrange(desc(count_records + descendant_count_record))
arrange(desc(count_records + desc_count_record))

if (!is.null(save_path)) {
readr::write_csv(final_res, paste0(save_path, '/', 'count_occurrences.csv'))
Expand Down
56 changes: 25 additions & 31 deletions inst/templates/cohortCapr.R
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
## ----knitr, include=FALSE-----------------------------------------------------------------
## ----knitr, include=FALSE-----------------------------------------------------
knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())

knitr::opts_chunk$set(echo = TRUE)

knitr::purl(
input = './inst/templates/cohortCapr_md.Rmd',
output = './inst/templates/cohortCapr.R'
)
# knitr::purl(
# input = './inst/templates/cohortCapr_md.Rmd',
# output = './inst/templates/cohortCapr.R'
# )


## ----Get project configurations-----------------------------------------------------------
## ----Get project configurations-----------------------------------------------
connectionConfig <- config::get(config = 'config', file = './inst/config/connection_config.yml')
config_oth <- config::get(config = 'config', file = './inst/config/config.yml')


## ----Load libraries-----------------------------------------------------------------------
## ----Load libraries-----------------------------------------------------------
library(RSQLite)
library(tibble)
library(DatabaseConnector)
Expand All @@ -23,7 +23,7 @@ library(CirceR)
library(Capr)


## ----connect to database, eval=TRUE, include=TRUE-----------------------------------------
## ----connect to database, eval=TRUE, include=TRUE-----------------------------
# Use connection details from configuration
connectionDetails <- createConnectionDetails(
dbms = connectionConfig$dbms,
Expand All @@ -36,7 +36,7 @@ connectionDetails <- createConnectionDetails(
)


## ----concept sets, echo=TRUE--------------------------------------------------------------
## ----concept sets, echo=TRUE--------------------------------------------------
## Concept sets
source("./R/conceptSets.R")

Expand All @@ -53,7 +53,7 @@ conceptSets$conceptSets <- conceptSets$conceptSets %>%
disconnect(con)


## ----count occurences---------------------------------------------------------------------
## ----count occurences---------------------------------------------------------
## Count occurrences of each concept in data

# Establish connection
Expand All @@ -65,21 +65,22 @@ source("./R/countOccurrences.R")
# Get links between tables and fields as input
source("./R/table_linked_to_concept_field.R")

# count occurrences of each concept 'x' and print results; cardiac complications as example
# cardiacComplicationsCounts <-
# countOccurrences(
# conceptSets$concepts$cardiacComplications, c("condition_occurrence", "procedure_occurrence"), links, con, connectionConfig$cdm_schema
# ) %>% print()
labTestsCounts <-
additionalVarsCounts <-
countOccurrences(
conceptSets$concepts$labTests, c("measurement"), links, con, connectionConfig$cdm_schema, connectionConfig$vocabulary_schema
conceptSets$concepts$additional,
c("condition_occurrence", "procedure_occurrence", "measurement", "observation"),
links,
con,
connectionConfig$cdm_schema,
connectionConfig$vocabulary_schema,
save_path = config_oth$save_path_counts
) %>% print()

# Disconnect
disconnect(con)


## ----Standard non-standard check----------------------------------------------------------
## ----Standard non-standard check----------------------------------------------
# Connect to DB
con <- connect(connectionDetails)

Expand All @@ -101,7 +102,7 @@ nonStandard



## ----Standard non-standard check concept set----------------------------------------------
## ----Standard non-standard check concept set----------------------------------
# connect to DB
con <- connect(connectionDetails)

Expand All @@ -124,7 +125,7 @@ disconnect(con)
nonStandardCS


## ----Cohort definition--------------------------------------------------------------------
## ----Cohort definition--------------------------------------------------------
## Cohort definition
# Create cohort definition
ch <- cohort(
Expand All @@ -148,13 +149,6 @@ ch <- cohort(
query = procedure(conceptSets$conceptSets$cardiacComplications)
)
),
withAny(
atLeast(
x = 1,
# include patients who have had cardiac surgery
query = procedure(conceptSets$conceptSets$cardiacSurgery)
)
),
withAny(
atLeast(
x = 1,
Expand All @@ -169,7 +163,7 @@ ch <- cohort(
)


## ----json and sql-------------------------------------------------------------------------
## ----json and sql-------------------------------------------------------------
## Cohort json and sql
# Generate json for cohort
chJson <- ch %>%
Expand All @@ -184,7 +178,7 @@ sql <- CirceR::buildCohortQuery(
)


## ----Save cohort and concept set json-----------------------------------------------------
## ----Save cohort and concept set json-----------------------------------------
write(chJson, paste0(config_oth$save_path_json, "/cohort.json"))
for (cs in names(conceptSets$conceptSets)) {
writeConceptSet(
Expand All @@ -194,7 +188,7 @@ for (cs in names(conceptSets$conceptSets)) {
}


## ----Create and generate cohorts----------------------------------------------------------
## ----Create and generate cohorts----------------------------------------------
# Establish connection
con <- connect(connectionDetails)

Expand Down Expand Up @@ -236,7 +230,7 @@ disconnect(con)
cohortCounts


## ----Number of people in DB---------------------------------------------------------------
## ----Number of people in DB---------------------------------------------------
# Establish connection
con <- connect(connectionDetails)

Expand Down
14 changes: 7 additions & 7 deletions inst/templates/cohortCapr_md.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,14 @@ source("./R/countOccurrences.R")
# Get links between tables and fields as input
source("./R/table_linked_to_concept_field.R")
# count occurrences of each concept 'x' and print results; cardiac complications as example
# cardiacComplicationsCounts <-
# countOccurrences(
# conceptSets$concepts$cardiacComplications, c("condition_occurrence", "procedure_occurrence"), links, con, connectionConfig$cdm_schema
# ) %>% print()
labTestsCounts <-
additionalVarsCounts <-
countOccurrences(
conceptSets$concepts$labTests, c("measurement"), links, con, connectionConfig$cdm_schema, connectionConfig$vocabulary_schema,
conceptSets$concepts$additional,
c("condition_occurrence", "procedure_occurrence", "measurement", "observation"),
links,
con,
connectionConfig$cdm_schema,
connectionConfig$vocabulary_schema,
save_path = config_oth$save_path_counts
) %>% print()
Expand Down
Loading

0 comments on commit 706d0dc

Please sign in to comment.