From 907919248c590c1282e5ba28e62da90c74b863dc Mon Sep 17 00:00:00 2001
From: rxu17 <26471741+rxu17@users.noreply.github.com>
Date: Tue, 29 Oct 2024 01:44:40 -0700
Subject: [PATCH] add ability to add version comment

---
 main.nf                                       |  4 ++--
 modules/merge_and_uncode_rca_uploads.nf       |  1 +
 .../uploads/merge_and_uncode_rca_uploads.R    | 24 +++++++++++++++----
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/main.nf b/main.nf
index 9ce78336..3b78decc 100644
--- a/main.nf
+++ b/main.nf
@@ -96,12 +96,12 @@ workflow BPC_PIPELINE {
     update_potential_phi_fields_table(ch_comment, params.production)
     // validate_data.out.view()
    } else if (params.step == "merge_and_uncode_rca_uploads"){
-    merge_and_uncode_rca_uploads("default", ch_cohort, params.production)
+    merge_and_uncode_rca_uploads("default", ch_cohort, ch_comment, params.production)
    } else if (params.step == "genie_bpc_pipeline"){
     update_potential_phi_fields_table(ch_comment, params.production)
     run_quac_upload_report_error(update_potential_phi_fields_table.out, ch_cohort)
     run_quac_upload_report_warning(run_quac_upload_report_error.out, ch_cohort, params.production)
-    merge_and_uncode_rca_uploads(run_quac_upload_report_warning.out, ch_cohort, params.production)
+    merge_and_uncode_rca_uploads(run_quac_upload_report_warning.out, ch_cohort, ch_comment, params.production)
     // remove_patients_from_merged(merge_and_uncode_rca_uploads.out, ch_cohort, params.production)
     update_data_table(merge_and_uncode_rca_uploads.out, ch_comment, params.production)
     update_date_tracking_table(update_data_table.out, ch_cohort, ch_comment, params.production)
diff --git a/modules/merge_and_uncode_rca_uploads.nf b/modules/merge_and_uncode_rca_uploads.nf
index 4ad9a79f..88bead78 100644
--- a/modules/merge_and_uncode_rca_uploads.nf
+++ b/modules/merge_and_uncode_rca_uploads.nf
@@ -10,6 +10,7 @@ process merge_and_uncode_rca_uploads {
    input:
    val previous
    val cohort
+   val comment
    val production
 
    output:
diff --git a/scripts/uploads/merge_and_uncode_rca_uploads.R b/scripts/uploads/merge_and_uncode_rca_uploads.R
index fdc04fe1..85239a66 100644
--- a/scripts/uploads/merge_and_uncode_rca_uploads.R
+++ b/scripts/uploads/merge_and_uncode_rca_uploads.R
@@ -487,12 +487,22 @@ get_irr <- function(data) {
   return(irr)
 }
 
-save_to_synapse <- function(path, parent_id, file_name = NA, prov_name = NA, prov_desc = NA, prov_used = NA, prov_exec = NA) {
+#' Stores the file, version comment and provenance to Synapse
+#' @param path (string) name of cohort
+#' @param parent_id (string) whether we are running in production env or staging env
+#' @param comment (string) some sort of comment about the new version of the file 
+#' @param file_name (string) file name
+#' @param prov_name (string) name of provenance
+#' @param prov_desc (string) provenance description
+#' @param prov_used (string) url/link to provenance used
+#' @param prov_exec (string) url/link to what was used to execute provenance
+save_to_synapse <- function(path, parent_id, comment, file_name = NA, prov_name = NA, prov_desc = NA, prov_used = NA, prov_exec = NA) {
   
   if (is.na(file_name)) {
     file_name = path
   } 
   file <- File(path = path, parentId = parent_id, name = file_name)
+  file$properties$versionComment <- comment
   
   if (!is.na(prov_name) || !is.na(prov_desc) || !is.na(prov_used) || !is.na(prov_exec)) {
     act <- Activity(name = prov_name,
@@ -635,7 +645,9 @@ get_output_folder_id <- function(config, environment){
 #' Remove leading and trailing whitespace from a string.
 #' @param cohort (string) name of cohort
 #' @param environment (string) whether we are running in production env or staging env
-save_output_synapse <- function(cohort, environment) {
+#' @param comment (string) some sort of comment about the new version of the file 
+#'  related to cohort run
+save_output_synapse <- function(cohort, environment, comment) {
   
   parent_id <- get_output_folder_id(config, environment)
   file_output_pri <- get_pri_file_name(cohort)
@@ -647,6 +659,7 @@ save_output_synapse <- function(cohort, environment) {
   save_to_synapse(path = file_output_pri,
                   file_name = gsub(pattern = ".csv|.tsv", replacement = "", x = file_output_pri),
                   parent_id = parent_id,
+                  comment = comment,
                   prov_name = "BPC non-IRR upload data",
                   prov_desc = "Merged and uncoded BPC upload data from sites academic REDCap instances with IRR cases removed",
                   prov_used = c(as.character(unlist(config$upload[[cohort]])), 
@@ -658,6 +671,7 @@ save_output_synapse <- function(cohort, environment) {
     save_to_synapse(path = file_output_irr,
                     file_name = gsub(pattern = ".csv|.tsv", replacement = "", x = file_output_irr),
                     parent_id = parent_id,
+                    comment = comment,
                     prov_name = "BPC IRR upload data",
                     prov_desc = "Merged and uncoded BPC upload IRR case data from sites academic REDCap instances",
                     prov_used = c(as.character(unlist(config$upload[[cohort]])), 
@@ -690,7 +704,9 @@ main <- function(){
     make_option(c("--production"), action="store_true", default = FALSE, 
                 help="Whether to run in production mode or not (staging mode)."),
     make_option(c("-v", "--verbose"), action="store_true", default = FALSE, 
-                help="Print out verbose output on script progress")
+                help="Print out verbose output on script progress"),
+    make_option(c("-c", "--comment"), type = "character",
+              help="Comment for new table snapshot version. This must be unique and is tied to the cohort run.")
   )
   opt <- parse_args(OptionParser(option_list=option_list))
 
@@ -792,7 +808,7 @@ main <- function(){
         print(glue("{now(timeOnly = T)}: Saving uncoded data to Synapse..."))
       }
       
-      save_output_synapse(cohort, environment = env)
+      save_output_synapse(cohort, environment = env, comment = opt$comment)
     }
     
     # clean up for memory