From 907919248c590c1282e5ba28e62da90c74b863dc Mon Sep 17 00:00:00 2001 From: rxu17 <26471741+rxu17@users.noreply.github.com> Date: Tue, 29 Oct 2024 01:44:40 -0700 Subject: [PATCH] add ability to add version comment --- main.nf | 4 ++-- modules/merge_and_uncode_rca_uploads.nf | 1 + .../uploads/merge_and_uncode_rca_uploads.R | 24 +++++++++++++++---- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/main.nf b/main.nf index 9ce78336..3b78decc 100644 --- a/main.nf +++ b/main.nf @@ -96,12 +96,12 @@ workflow BPC_PIPELINE { update_potential_phi_fields_table(ch_comment, params.production) // validate_data.out.view() } else if (params.step == "merge_and_uncode_rca_uploads"){ - merge_and_uncode_rca_uploads("default", ch_cohort, params.production) + merge_and_uncode_rca_uploads("default", ch_cohort, ch_comment, params.production) } else if (params.step == "genie_bpc_pipeline"){ update_potential_phi_fields_table(ch_comment, params.production) run_quac_upload_report_error(update_potential_phi_fields_table.out, ch_cohort) run_quac_upload_report_warning(run_quac_upload_report_error.out, ch_cohort, params.production) - merge_and_uncode_rca_uploads(run_quac_upload_report_warning.out, ch_cohort, params.production) + merge_and_uncode_rca_uploads(run_quac_upload_report_warning.out, ch_cohort, ch_comment, params.production) // remove_patients_from_merged(merge_and_uncode_rca_uploads.out, ch_cohort, params.production) update_data_table(merge_and_uncode_rca_uploads.out, ch_comment, params.production) update_date_tracking_table(update_data_table.out, ch_cohort, ch_comment, params.production) diff --git a/modules/merge_and_uncode_rca_uploads.nf b/modules/merge_and_uncode_rca_uploads.nf index 4ad9a79f..88bead78 100644 --- a/modules/merge_and_uncode_rca_uploads.nf +++ b/modules/merge_and_uncode_rca_uploads.nf @@ -10,6 +10,7 @@ process merge_and_uncode_rca_uploads { input: val previous val cohort + val comment val production output: diff --git a/scripts/uploads/merge_and_uncode_rca_uploads.R b/scripts/uploads/merge_and_uncode_rca_uploads.R index fdc04fe1..85239a66 100644 --- a/scripts/uploads/merge_and_uncode_rca_uploads.R +++ b/scripts/uploads/merge_and_uncode_rca_uploads.R @@ -487,12 +487,22 @@ get_irr <- function(data) { return(irr) } -save_to_synapse <- function(path, parent_id, file_name = NA, prov_name = NA, prov_desc = NA, prov_used = NA, prov_exec = NA) { +#' Stores the file, version comment and provenance to Synapse +#' @param path (string) name of cohort +#' @param parent_id (string) whether we are running in production env or staging env +#' @param comment (string) some sort of comment about the new version of the file +#' @param file_name (string) file name +#' @param prov_name (string) name of provenance +#' @param prov_desc (string) provenance description +#' @param prov_used (string) url/link to provenance used +#' @param prov_exec (string) url/link to what was used to execute provenance +save_to_synapse <- function(path, parent_id, comment, file_name = NA, prov_name = NA, prov_desc = NA, prov_used = NA, prov_exec = NA) { if (is.na(file_name)) { file_name = path } file <- File(path = path, parentId = parent_id, name = file_name) + file$properties$versionComment <- comment if (!is.na(prov_name) || !is.na(prov_desc) || !is.na(prov_used) || !is.na(prov_exec)) { act <- Activity(name = prov_name, @@ -635,7 +645,9 @@ get_output_folder_id <- function(config, environment){ #' Remove leading and trailing whitespace from a string. #' @param cohort (string) name of cohort #' @param environment (string) whether we are running in production env or staging env -save_output_synapse <- function(cohort, environment) { +#' @param comment (string) some sort of comment about the new version of the file +#' related to cohort run +save_output_synapse <- function(cohort, environment, comment) { parent_id <- get_output_folder_id(config, environment) file_output_pri <- get_pri_file_name(cohort) @@ -647,6 +659,7 @@ save_output_synapse <- function(cohort, environment) { save_to_synapse(path = file_output_pri, file_name = gsub(pattern = ".csv|.tsv", replacement = "", x = file_output_pri), parent_id = parent_id, + comment = comment, prov_name = "BPC non-IRR upload data", prov_desc = "Merged and uncoded BPC upload data from sites academic REDCap instances with IRR cases removed", prov_used = c(as.character(unlist(config$upload[[cohort]])), @@ -658,6 +671,7 @@ save_output_synapse <- function(cohort, environment) { save_to_synapse(path = file_output_irr, file_name = gsub(pattern = ".csv|.tsv", replacement = "", x = file_output_irr), parent_id = parent_id, + comment = comment, prov_name = "BPC IRR upload data", prov_desc = "Merged and uncoded BPC upload IRR case data from sites academic REDCap instances", prov_used = c(as.character(unlist(config$upload[[cohort]])), @@ -690,7 +704,9 @@ main <- function(){ make_option(c("--production"), action="store_true", default = FALSE, help="Whether to run in production mode or not (staging mode)."), make_option(c("-v", "--verbose"), action="store_true", default = FALSE, - help="Print out verbose output on script progress") + help="Print out verbose output on script progress"), + make_option(c("-c", "--comment"), type = "character", + help="Comment for new table snapshot version. This must be unique and is tied to the cohort run.") ) opt <- parse_args(OptionParser(option_list=option_list)) @@ -792,7 +808,7 @@ main <- function(){ print(glue("{now(timeOnly = T)}: Saving uncoded data to Synapse...")) } - save_output_synapse(cohort, environment = env) + save_output_synapse(cohort, environment = env, comment = opt$comment) } # clean up for memory