-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Big update, icd category conversions, custom times to mets, displays …
…for tables, etc
- Loading branch information
1 parent
33703ce
commit 3a34005
Showing
8 changed files
with
336 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
library(here) | ||
library(readr) | ||
library(dplyr) | ||
library(tidyr) | ||
|
||
icd_map <- readr::read_csv( | ||
here('analysis', 'explore', 'bpc_bladder_icd_map.csv') | ||
) | ||
|
||
icd_map <- icd_map |> | ||
replace_na(list(is_local = FALSE)) |> | ||
arrange(is_local, map_custom, icd_code) |> | ||
select(is_local, map_custom, everything()) | ||
|
||
readr::write_csv( | ||
icd_map, | ||
here('data-raw', 'manual', 'icd_map_custom.csv'), | ||
na = "" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
|
||
library(purrr); library(fs); library(here) | ||
purrr::walk(.x = fs::dir_ls('R'), .f = source) | ||
|
||
icd_custom <- readr::read_csv( | ||
here('data-raw', 'manual', 'icd_map_custom.csv') | ||
) | ||
|
||
img <- readr::read_rds( | ||
here('data', 'cohort', 'img.rds') | ||
) | ||
ca_ind <- readr::read_rds( | ||
here('data', 'cohort', 'ca_ind.rds') | ||
) | ||
|
||
img_ca <- img %>% | ||
# not using dx_scan_days because its from the first bpc project cancer. | ||
# can't imagine a scenario where that's useful. | ||
select(record_id, scan_number, image_scan_int, image_ca, | ||
matches('image_casite[0-9]{1,2}')) | ||
|
||
img_ca %<>% | ||
pivot_longer( | ||
cols = matches('image_casite[0-9]{1,2}'), | ||
names_to = 'loc_num', | ||
values_to = 'icd_str' | ||
) %>% | ||
filter(!is.na(icd_str)) | ||
|
||
# varying amount of whitespace in here, so we'll split and trim in steps. | ||
img_ca %<>% | ||
separate_wider_delim( | ||
cols = icd_str, | ||
delim = " ", | ||
names = c('icd_code', 'icd_desc'), | ||
too_many = 'merge', | ||
cols_remove = FALSE | ||
) %>% | ||
mutate( | ||
icd_code = str_trim(icd_code), | ||
icd_desc = str_trim(icd_desc) | ||
) | ||
|
||
img_ca <- left_join( | ||
img_ca, | ||
select(icd_custom, icd_code, is_local, map_custom), | ||
by = "icd_code" | ||
) | ||
|
||
|
||
# for each person find the time to each met category. | ||
# we can always make a category for local lymph if needed, for but now those | ||
# will be removed. | ||
img_ca %<>% filter(!is_local) | ||
|
||
met_time_custom <- img_ca %>% | ||
group_by(record_id, map_custom) %>% | ||
summarize( | ||
dob_met_days = min(image_scan_int, na.rm = T), | ||
.groups = 'drop' | ||
) | ||
|
||
# across everything, find the people with mets. | ||
met_time_total <- met_time_custom %>% | ||
group_by(record_id) %>% | ||
summarize( | ||
dob_met_days = min(dob_met_days, na.rm = T) | ||
) | ||
|
||
diff_derived_custom <- setdiff( | ||
get_dmet_time(ca_ind)$record_id, met_time_total$record_id | ||
) | ||
diff_custom_derived <- setdiff( | ||
met_time_total$record_id, get_dmet_time(ca_ind)$record_id | ||
) | ||
|
||
ca_ind %>% | ||
filter(record_id %in% diff_derived_custom) %>% | ||
count(stage_dx_iv) | ||
|
||
ca_ind %>% | ||
filter(record_id %in% diff_derived_custom) %>% | ||
select(record_id, matches('dx_to_dmets_.*_days')) %>% | ||
mutate( | ||
across( | ||
.cols = matches('dx_to_dmets_.*_days'), | ||
.fns = \(x) !is.na(x) | ||
) | ||
) %>% | ||
pivot_longer( | ||
cols = -record_id | ||
) %>% | ||
filter(value) %>% # only those with a met in that location | ||
count(name) # number of people with mets at each spot | ||
# abdomen we know about - renal pelvis is the big one I know of. | ||
# Pelvis is pretty vague and not interesting, so we can probably move on there. | ||
|
||
|
||
met_time_custom <- met_time_custom %>% | ||
mutate( | ||
map_custom = case_when( | ||
map_custom %in% "Lymph node (distant)" ~ "lymph_distant", | ||
T ~ tolower(map_custom) | ||
) | ||
) %>% | ||
pivot_wider( | ||
names_from = 'map_custom', | ||
values_from = 'dob_met_days' | ||
) | ||
|
||
readr::write_rds( | ||
x = met_time_custom, | ||
file = here('data', 'cohort', 'met_time_custom.rds') | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
|
||
library(purrr); library(fs); library(here) | ||
purrr::walk(.x = fs::dir_ls('R'), .f = source) | ||
|
||
icd_custom <- readr::read_csv( | ||
here('data-raw', 'manual', 'icd_map_custom.csv') | ||
) | ||
|
||
img <- readr::read_rds( | ||
here('data', 'cohort', 'img.rds') | ||
) | ||
ca_ind <- readr::read_rds( | ||
here('data', 'cohort', 'ca_ind.rds') | ||
) | ||
|
||
img_ca <- img %>% | ||
# not using dx_scan_days because its from the first bpc project cancer. | ||
# can't imagine a scenario where that's useful. | ||
select(record_id, scan_number, image_scan_int, image_ca, | ||
matches('image_casite[0-9]{1,2}')) | ||
|
||
img_ca %<>% | ||
pivot_longer( | ||
cols = matches('image_casite[0-9]{1,2}'), | ||
names_to = 'loc_num', | ||
values_to = 'icd_str' | ||
) %>% | ||
filter(!is.na(icd_str)) | ||
|
||
# varying amount of whitespace in here, so we'll split and trim in steps. | ||
img_ca %<>% | ||
separate_wider_delim( | ||
cols = icd_str, | ||
delim = " ", | ||
names = c('icd_code', 'icd_desc'), | ||
too_many = 'merge', | ||
cols_remove = FALSE | ||
) %>% | ||
mutate( | ||
icd_code = str_trim(icd_code), | ||
icd_desc = str_trim(icd_desc) | ||
) | ||
|
||
img_ca <- left_join( | ||
img_ca, | ||
select(icd_custom, icd_code, is_local, map_custom), | ||
by = "icd_code" | ||
) | ||
|
||
|
||
# for each person find the time to each met category. | ||
# we can always make a category for local lymph if needed, for but now those | ||
# will be removed. | ||
img_ca %<>% filter(!is_local) | ||
|
||
met_time_custom <- img_ca %>% | ||
group_by(record_id, map_custom) %>% | ||
summarize( | ||
dob_met_days = min(image_scan_int, na.rm = T), | ||
.groups = 'drop' | ||
) | ||
|
||
# across everything, find the people with mets. | ||
met_time_total <- met_time_custom %>% | ||
group_by(record_id) %>% | ||
summarize( | ||
dob_met_days = min(dob_met_days, na.rm = T) | ||
) | ||
|
||
diff_derived_custom <- setdiff( | ||
get_dmet_time(ca_ind)$record_id, met_time_total$record_id | ||
) | ||
diff_custom_derived <- setdiff( | ||
met_time_total$record_id, get_dmet_time(ca_ind)$record_id | ||
) | ||
|
||
ca_ind %>% | ||
filter(record_id %in% diff_derived_custom) %>% | ||
count(stage_dx_iv) | ||
|
||
ca_ind %>% | ||
filter(record_id %in% diff_derived_custom) %>% | ||
select(record_id, matches('dx_to_dmets_.*_days')) %>% | ||
mutate( | ||
across( | ||
.cols = matches('dx_to_dmets_.*_days'), | ||
.fns = \(x) !is.na(x) | ||
) | ||
) %>% | ||
pivot_longer( | ||
cols = -record_id | ||
) %>% | ||
filter(value) %>% # only those with a met in that location | ||
count(name) # number of people with mets at each spot | ||
# abdomen we know about - renal pelvis is the big one I know of. | ||
# Pelvis is pretty vague and not interesting, so we can probably move on there. | ||
|
||
|
||
met_time_custom <- met_time_custom %>% | ||
mutate( | ||
map_custom = case_when( | ||
map_custom %in% "Lymph node (distant)" ~ "lymph_distant", | ||
T ~ tolower(map_custom) | ||
) | ||
) %>% | ||
pivot_wider( | ||
names_from = 'map_custom', | ||
values_from = 'dob_met_days' | ||
) | ||
|
||
readr::write_rds( | ||
x = met_time_custom, | ||
file = here('data', 'cohort', 'met_time_custom.rds') | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.