Skip to content

Commit

Permalink
Big update, icd category conversions, custom times to mets, displays …
Browse files Browse the repository at this point in the history
…for tables, etc
  • Loading branch information
alexpaynter committed Jan 24, 2025
1 parent 33703ce commit 3a34005
Show file tree
Hide file tree
Showing 8 changed files with 336 additions and 0 deletions.
19 changes: 19 additions & 0 deletions analysis/explore/icd_sort_store.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
library(here)
library(readr)
library(dplyr)
library(tidyr)

icd_map <- readr::read_csv(
here('analysis', 'explore', 'bpc_bladder_icd_map.csv')
)

icd_map <- icd_map |>
replace_na(list(is_local = FALSE)) |>
arrange(is_local, map_custom, icd_code) |>
select(is_local, map_custom, everything())

readr::write_csv(
icd_map,
here('data-raw', 'manual', 'icd_map_custom.csv'),
na = ""
)
114 changes: 114 additions & 0 deletions analysis/explore/new_time_to_met.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@

library(purrr); library(fs); library(here)
purrr::walk(.x = fs::dir_ls('R'), .f = source)

icd_custom <- readr::read_csv(
here('data-raw', 'manual', 'icd_map_custom.csv')
)

img <- readr::read_rds(
here('data', 'cohort', 'img.rds')
)
ca_ind <- readr::read_rds(
here('data', 'cohort', 'ca_ind.rds')
)

img_ca <- img %>%
# not using dx_scan_days because its from the first bpc project cancer.
# can't imagine a scenario where that's useful.
select(record_id, scan_number, image_scan_int, image_ca,
matches('image_casite[0-9]{1,2}'))

img_ca %<>%
pivot_longer(
cols = matches('image_casite[0-9]{1,2}'),
names_to = 'loc_num',
values_to = 'icd_str'
) %>%
filter(!is.na(icd_str))

# varying amount of whitespace in here, so we'll split and trim in steps.
img_ca %<>%
separate_wider_delim(
cols = icd_str,
delim = " ",
names = c('icd_code', 'icd_desc'),
too_many = 'merge',
cols_remove = FALSE
) %>%
mutate(
icd_code = str_trim(icd_code),
icd_desc = str_trim(icd_desc)
)

img_ca <- left_join(
img_ca,
select(icd_custom, icd_code, is_local, map_custom),
by = "icd_code"
)


# for each person find the time to each met category.
# we can always make a category for local lymph if needed, for but now those
# will be removed.
img_ca %<>% filter(!is_local)

met_time_custom <- img_ca %>%
group_by(record_id, map_custom) %>%
summarize(
dob_met_days = min(image_scan_int, na.rm = T),
.groups = 'drop'
)

# across everything, find the people with mets.
met_time_total <- met_time_custom %>%
group_by(record_id) %>%
summarize(
dob_met_days = min(dob_met_days, na.rm = T)
)

diff_derived_custom <- setdiff(
get_dmet_time(ca_ind)$record_id, met_time_total$record_id
)
diff_custom_derived <- setdiff(
met_time_total$record_id, get_dmet_time(ca_ind)$record_id
)

ca_ind %>%
filter(record_id %in% diff_derived_custom) %>%
count(stage_dx_iv)

ca_ind %>%
filter(record_id %in% diff_derived_custom) %>%
select(record_id, matches('dx_to_dmets_.*_days')) %>%
mutate(
across(
.cols = matches('dx_to_dmets_.*_days'),
.fns = \(x) !is.na(x)
)
) %>%
pivot_longer(
cols = -record_id
) %>%
filter(value) %>% # only those with a met in that location
count(name) # number of people with mets at each spot
# abdomen we know about - renal pelvis is the big one I know of.
# Pelvis is pretty vague and not interesting, so we can probably move on there.


met_time_custom <- met_time_custom %>%
mutate(
map_custom = case_when(
map_custom %in% "Lymph node (distant)" ~ "lymph_distant",
T ~ tolower(map_custom)
)
) %>%
pivot_wider(
names_from = 'map_custom',
values_from = 'dob_met_days'
)

readr::write_rds(
x = met_time_custom,
file = here('data', 'cohort', 'met_time_custom.rds')
)
80 changes: 80 additions & 0 deletions analysis/report/genie-bpc-bladder-clinical.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,22 @@ set_flextable_defaults(
```

```{r}
dt_wrapper <- function(x, font.size = '10pt') {
DT::datatable(
x,
style = "bootstrap4",
fillContainer = F,
options=list(
initComplete = htmlwidgets::JS(
"function(settings, json) {",
paste0("$(this.api().table().container()).css({'font-size': '", font.size, "'});"),
"}")
)
)
}
```


```{r, warning = T}
read_wrap <- function(p) {
Expand Down Expand Up @@ -939,6 +955,43 @@ gg_met_loc_timing



```{r}
met_time_custom <- readr::read_rds(here('data', 'cohort', 'met_time_custom.rds'))
met_yn_cohort <- left_join(
select(dft_pt, record_id),
met_time_custom,
by = 'record_id'
) %>%
# True for those with a met time listed, false otherwise
mutate(
across(
.cols = -c(record_id),
.fns = \(x) !is.na(x)
)
)
```

### R1.1.6e: Reanalysis of met locations

The above classifications are all based on the derived variables provided with the BPC data. UC physicians on the team let us know that these were suboptimal, for example "thorax" and "abdomen" are heterogeneous groups that don't mean a ton in light of the established conventions in the field. This sections starts from the raw imaging data and maps the ICD codes into groups that should be more meaningful.

The table showing the mappings from ICD codes can be found in the appendices. The following summary table shows the number of participants (%) who had a metastasis at any time to an anatomic location:

```{r, include = TRUE}
gtsummary::tbl_summary(
data = select(met_yn_cohort, -record_id)
)
```

**Note:** Local mets are **not shown** at all in this table. See the appendix for image codes that I categorized as local (builds on the data guide appendix provided by the MSK stats team). And let me know if there are mistakes, of course.








Expand Down Expand Up @@ -1925,6 +1978,33 @@ Notes:
- Possible actions: (1) ignore them, default solution or (2) split regimens at metastasis so they are counted in LoT tables.


```{r}
icd_map_custom <- readr::read_csv(
here(
'data-raw', 'manual', 'icd_map_custom.csv'
)
)
```

#### Custom met locations

The table shown in R1.6e works from the imaging data and creates new categories for metastasis. This requires mapping ICD codes from the image that showed evidence of cancer to a category. This table shows those mapping choices.

Definitions of the columns shown here:

- `is_local`: Is this icd code a local location to bladder cancer? The starting point for these choices was revewing the data guide, created by the MSK stats team. If `is_local = TRUE` then this ICD code will not represent a distant metastasis in our re-analysis.
- `map_custom`: Our new categories.
- `icd_code`: The ICD code, pulled from {`image_casite1`, ..., `image_casite15`} in the BPC data.
- `n_times_reported`: The total number of times this `icd_code` pops up in our data (more than one per person possible).

```{r, include = TRUE}
icd_map_custom %>%
select(-c(Map,notes)) %>% # confusing, Map is an intermediary map provided by Neil.
dt_wrapper(.)
```





### Issues
Expand Down
8 changes: 8 additions & 0 deletions analysis/script/ddr_definition_comparison.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,13 @@ ddr_def <- bind_rows(
tibble(
source = 'GENIE int 2024',
gene = genie_intersect_panel,
),
# also adding in definition we used for the ASCO abstract:
tibble(
source = '2025 ASCO GU abstract',
gene = c('ERCC2', 'ERCC5',
'BRCA1', 'BRCA2', 'RECQL4', 'RAD51C', 'ATM',
'ATR', 'FANCC')
)
)

Expand Down Expand Up @@ -405,3 +412,4 @@ readr::write_rds(




114 changes: 114 additions & 0 deletions analysis/script/derive_custom_time_to_met.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@

library(purrr); library(fs); library(here)
purrr::walk(.x = fs::dir_ls('R'), .f = source)

icd_custom <- readr::read_csv(
here('data-raw', 'manual', 'icd_map_custom.csv')
)

img <- readr::read_rds(
here('data', 'cohort', 'img.rds')
)
ca_ind <- readr::read_rds(
here('data', 'cohort', 'ca_ind.rds')
)

img_ca <- img %>%
# not using dx_scan_days because its from the first bpc project cancer.
# can't imagine a scenario where that's useful.
select(record_id, scan_number, image_scan_int, image_ca,
matches('image_casite[0-9]{1,2}'))

img_ca %<>%
pivot_longer(
cols = matches('image_casite[0-9]{1,2}'),
names_to = 'loc_num',
values_to = 'icd_str'
) %>%
filter(!is.na(icd_str))

# varying amount of whitespace in here, so we'll split and trim in steps.
img_ca %<>%
separate_wider_delim(
cols = icd_str,
delim = " ",
names = c('icd_code', 'icd_desc'),
too_many = 'merge',
cols_remove = FALSE
) %>%
mutate(
icd_code = str_trim(icd_code),
icd_desc = str_trim(icd_desc)
)

img_ca <- left_join(
img_ca,
select(icd_custom, icd_code, is_local, map_custom),
by = "icd_code"
)


# for each person find the time to each met category.
# we can always make a category for local lymph if needed, for but now those
# will be removed.
img_ca %<>% filter(!is_local)

met_time_custom <- img_ca %>%
group_by(record_id, map_custom) %>%
summarize(
dob_met_days = min(image_scan_int, na.rm = T),
.groups = 'drop'
)

# across everything, find the people with mets.
met_time_total <- met_time_custom %>%
group_by(record_id) %>%
summarize(
dob_met_days = min(dob_met_days, na.rm = T)
)

diff_derived_custom <- setdiff(
get_dmet_time(ca_ind)$record_id, met_time_total$record_id
)
diff_custom_derived <- setdiff(
met_time_total$record_id, get_dmet_time(ca_ind)$record_id
)

ca_ind %>%
filter(record_id %in% diff_derived_custom) %>%
count(stage_dx_iv)

ca_ind %>%
filter(record_id %in% diff_derived_custom) %>%
select(record_id, matches('dx_to_dmets_.*_days')) %>%
mutate(
across(
.cols = matches('dx_to_dmets_.*_days'),
.fns = \(x) !is.na(x)
)
) %>%
pivot_longer(
cols = -record_id
) %>%
filter(value) %>% # only those with a met in that location
count(name) # number of people with mets at each spot
# abdomen we know about - renal pelvis is the big one I know of.
# Pelvis is pretty vague and not interesting, so we can probably move on there.


met_time_custom <- met_time_custom %>%
mutate(
map_custom = case_when(
map_custom %in% "Lymph node (distant)" ~ "lymph_distant",
T ~ tolower(map_custom)
)
) %>%
pivot_wider(
names_from = 'map_custom',
values_from = 'dob_met_days'
)

readr::write_rds(
x = met_time_custom,
file = here('data', 'cohort', 'met_time_custom.rds')
)
1 change: 1 addition & 0 deletions main.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ source(here('analysis', 'script', 'create_dmet_data.R'))
source(here('analysis', 'script', 'estimate_reg_year.R'))
source(here('analysis', 'script', 'create_neoadj_dat.R'))
source(here('analysis', 'script', 'derive_lines_of_therapy.R'))
source(here('analysis', 'script', 'derive_custom_time_to_met.R'))
rmarkdown::render(
input = here('analysis', 'report', 'genie-bpc-bladder-clinical.rmd'),
output_file = '01-genie-bpc-bladder-clinical.html',
Expand Down
Binary file modified output/aacr_ss24/tfl_obj/01_characteristics.rds
Binary file not shown.
Binary file modified output/aacr_ss24/tfl_obj/01_obs_count.rds
Binary file not shown.

0 comments on commit 3a34005

Please sign in to comment.