From 30d4bf3ab57c6296a81d6f792911c87586ca896e Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Sat, 5 Oct 2024 12:29:37 +0100
Subject: [PATCH 01/19] usethis::pr_init("Implement error handling in acc2lin.R
 functions

- Added validation checks for input parameters (accessions, ipg_file, assembly_path, lineagelookup_path).
- Included error messages for missing or invalid inputs and file existence checks.
- Wrapped main logic in tryCatch for graceful error handling during execution.
")
---
 R/acc2lin.R       | 267 ++++++++++++++++++++++++++++++++++------------
 man/acc2lin.Rd    |   3 +-
 man/efetch_ipg.Rd |   3 +-
 man/ipg2lin.Rd    |   3 +-
 man/sink.reset.Rd |   1 +
 5 files changed, 207 insertions(+), 70 deletions(-)

diff --git a/R/acc2lin.R b/R/acc2lin.R
index f8d71949..dfb33da9 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -10,6 +10,8 @@
 #' Sink Reset
 #'
 #' @return No return, but run to close all outstanding `sink()`s
+#'         and handles any errors or warnings that occur during the process.
+#'
 #' @export
 #'
 #' @examples
@@ -17,9 +19,19 @@
 #' sink.reset()
 #' }
 sink.reset <- function() {
+  # Handle all errors and warnings
+  tryCatch({
     for (i in seq_len(sink.number())) {
-        sink(NULL)
+      sink(NULL)
     }
+    print("All sinks closed")
+  }, error = function(e) {
+    print(paste("Error: ", e$message))
+  }, warning = function(w) {
+    print(paste("Warning: ", w$message))
+  }, finally = {
+    print("resetSink function execution completed.")
+  })
 }
 
 
@@ -44,23 +56,61 @@ sink.reset <- function() {
 #' add_lins()
 #' }
 add_lins <- function(df, acc_col = "AccNum", assembly_path,
-    lineagelookup_path, ipgout_path = NULL, plan = "sequential") {
-    s_acc_col <- sym(acc_col)
-    accessions <- df %>% pull(acc_col)
-    lins <- acc2lin(accessions, assembly_path, lineagelookup_path, ipgout_path, plan)
+                     lineagelookup_path, ipgout_path = NULL,
+                     plan = "sequential") {
+  # check for validate inputs
+  if (!is.data.frame(df)) {
+    stop("Input 'df' must be a data frame.")
+  }
+
+  if (!acc_col %in% colnames(df)) {
+    stop(paste("Column", acc_col, "not found in data frame."))
+  }
+
+  # Ensure paths are character strings
+  if (!is.character(assembly_path) || !is.character(lineagelookup_path)) {
+    stop("Both 'assembly_path' and 
+         'lineagelookup_path' must be character strings.")
+  }
+
+  # Ensure paths exist
+  if (!file.exists(assembly_path)) {
+    stop(paste("Assembly file not found at:", assembly_path))
+  }
 
-    # Drop a lot of the unimportant columns for now? will make merging much easier
-    lins <- lins[, c(
+  if (!file.exists(lineagelookup_path)) {
+    stop(paste("Lineage lookup file not found at:", lineagelookup_path))
+  }
+    tryCatch({
+      # Attempt to add lineages
+      acc_col <- sym(acc_col)
+      accessions <- df %>% pull(acc_col)
+      lins <- acc2lin(
+        accessions, assembly_path, lineagelookup_path, ipgout_path, plan
+      )
+
+      # Drop a lot of the unimportant columns for now? 
+      # will make merging much easier
+      lins <- lins[, c(
         "Strand", "Start", "Stop", "Nucleotide Accession", "Source",
         "Id", "Strain"
-    ) := NULL]
-    lins <- unique(lins)
+      ) := NULL]
+      lins <- unique(lins)
+
+      # dup <- lins %>% group_by(Protein) %>% 
+      # summarize(count = n()) %>% filter(count > 1) %>%
+      # pull(Protein)
 
-    # dup <- lins %>% group_by(Protein) %>% summarize(count = n()) %>% filter(count > 1) %>%
-    #   pull(Protein)
+      merged <- merge(df, lins, by.x = acc_col, by.y = "Protein", all.x = TRUE)
+      return(merged)
+    }, error = function(e) {
+      print(paste("Error: ", e$message))
+    }, warning = function(w) {
+      print(paste("Warning: ", w$message))
+    }, finally = {
+      print("addLineages function execution completed.")
+    })
 
-    merged <- merge(df, lins, by.x = acc_col, by.y = "Protein", all.x = TRUE)
-    return(merged)
 }
 
 
@@ -68,7 +118,8 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path,
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
-#' @description This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set
+#' @description This function combines 'efetch_ipg()'
+#'              and 'ipg2lin()' to map a set
 #' of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
 #'
 #' @param accessions Character vector of protein accessions
@@ -76,7 +127,8 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path,
 #' This file can be generated using the "DownloadAssemblySummary()" function
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
-#' @param ipgout_path Path to write the results of the efetch run of the accessions
+#' @param ipgout_path Path to write the results 
+#'                    of the efetch run of the accessions
 #' on the ipg database. If NULL, the file will not be written. Defaults to NULL
 #' @param plan
 #'
@@ -87,27 +139,43 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path,
 #' \dontrun{
 #' acc2lin()
 #' }
-acc2lin <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential") {
-    tmp_ipg <- F
-    if (is.null(ipgout_path)) {
-        tmp_ipg <- T
-        ipgout_path <- tempfile("ipg", fileext = ".txt")
-    }
+acc2lin <- function(accessions, assembly_path, 
+                    lineagelookup_path, ipgout_path = NULL, 
+                    plan = "sequential") {
+  tmp_ipg <- F
+  if (is.null(ipgout_path)) {
+    tmp_ipg <- T
+    ipgout_path <- tempfile("ipg", fileext = ".txt")
+  }
+
+  lins <- NULL
+  tryCatch({
+    # Attempt to fetch IPG
     efetch_ipg(accessions, out_path = ipgout_path, plan)
 
+    # Attempt to process IPG to lineages
     lins <- ipg2lin(accessions, ipgout_path, assembly_path, lineagelookup_path)
+  }, error = function(e) {
+    print(paste("An error occurred: ", e$message))
+  }, warning = function(w) {
+    print(paste("Warning: ", w$message))
+  }, finally = {
+    print("acc2lin function execution completed.")
+  })
 
-    if (tmp_ipg) {
-        unlink(tempdir(), recursive = T)
-    }
-    return(lins)
+  if (tmp_ipg) {
+    unlink(tempdir(), recursive = T)
+  }
+  return(lins)
 }
 
+
 #' efetch_ipg
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
-#' @description Perform efetch on the ipg database and write the results to out_path
+#' @description Perform efetch on the ipg database
+#'              and write the results to out_path
 #'
 #' @param accnums Character vector containing the accession numbers to query on
 #' the ipg database
@@ -126,57 +194,84 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path, ipgout_path =
 #' efetch_ipg()
 #' }
 efetch_ipg <- function(accnums, out_path, plan = "sequential") {
-    if (length(accnums) > 0) {
-        partition <- function(in_data, groups) {
-            # \\TODO This function should be defined outside of efetch_ipg(). It can be non-exported/internal
-            # Partition data to limit number of queries per second for rentrez fetch:
-            # limit of 10/second w/ key
-            l <- length(in_data)
-
-            partitioned <- list()
-            for (i in 1:groups)
-            {
-                partitioned[[i]] <- in_data[seq.int(i, l, groups)]
-            }
-
-            return(partitioned)
-        }
+  # Argument validation
+  if (!is.character(accnums) || length(accnums) == 0) {
+    stop("Error: 'accnums' must be a non-empty character vector.")
+  }
+
+  if (!is.character(out_path) || nchar(out_path) == 0) {
+    stop("Error: 'out_path' must be a non-empty string.")
+  }
+
+  if (!is.function(plan)) {
+    stop("Error: 'plan' must be a valid plan function.")
+  }
+  if (length(accnums) > 0) {
+    partition <- function(in_data, groups) {
+      # \\TODO This function should be defined outside of efetch_ipg().
+      # It can be non-exported/internal
+      # Partition data to limit number of queries per second for rentrez fetch:
+      # limit of 10/second w/ key
+      l <- length(in_data)
 
-        plan(strategy = plan, .skip = T)
-
-
-        min_groups <- length(accnums) / 200
-        groups <- min(max(min_groups, 15), length(accnums))
-        partitioned_acc <- partition(accnums, groups)
-        sink(out_path)
-
-        a <- future_map(1:length(partitioned_acc), function(x) {
-            # Avoid hitting the rate API limit
-            if (x %% 9 == 0) {
-                Sys.sleep(1)
-            }
-            cat(
-                entrez_fetch(
-                    id = partitioned_acc[[x]],
-                    db = "ipg",
-                    rettype = "xml",
-                    api_key = "YOUR_KEY_HERE" ## Can this be included in public package?
-                )
-            )
-        })
-        sink(NULL)
+      partitioned <- list()
+      for (i in 1:groups){
+        partitioned[[i]] <- in_data[seq.int(i, l, groups)]
+      }
+
+      return(partitioned)
     }
+    tryCatch({
+      # Set the future plan strategy
+      plan(strategy = plan, .skip = T)
+
+
+      min_groups <- length(accnums) / 200
+      groups <- min(max(min_groups, 15), length(accnums))
+      partitioned_acc <- partition(accnums, groups)
+
+      # Open the sink to the output path
+      sink(out_path)
+
+      a <- future_map(1:length(partitioned_acc), function(x) {
+        # Avoid hitting the rate API limit
+        if (x %% 9 == 0) {
+          Sys.sleep(1)
+        }
+        cat(
+          entrez_fetch(
+            id = partitioned_acc[[x]],
+            db = "ipg",
+            rettype = "xml",
+            api_key = "YOUR_KEY_HERE" ## Can this be included in public package?
+          )
+        )
+      })
+      sink(NULL)
+    }, error = function(e) {
+      print(paste("An error occurred: ", e$message))
+    }, warning = function(w) {
+      print(paste("Warning: ", w$message))
+    }, finally = {
+      print("efetch_ipg function execution completed.")
+    })
+  }
 }
 
+
+
 #' ipg2lin
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
-#' @description Takes the resulting file of an efetch run on the ipg database and
+#' @description Takes the resulting file
+#'              of an efetch run on the ipg database and
 #'
 #' @param accessions Character vector of protein accessions
-#' @param ipg_file Filepath to the file containing results of an efetch run on the
-#' ipg database. The protein accession in 'accessions' should be contained in this
+#' @param ipg_file Filepath to the file
+#'                 containing results of an efetch run on the
+#' ipg database. The protein accession in
+#'               'accessions' should be contained in this
 #' file
 #' @param assembly_path String of the path to the assembly_summary path
 #' This file can be generated using the "DownloadAssemblySummary()" function
@@ -195,16 +290,54 @@ efetch_ipg <- function(accnums, out_path, plan = "sequential") {
 #' }
 #'
 ipg2lin <- function(accessions, ipg_file, assembly_path, lineagelookup_path) {
+  # Argument validation for accessions
+  if (!is.character(accessions) || length(accessions) == 0) {
+    stop("Input 'accessions' must be a non-empty character vector.")
+  }
+
+  # check for validate inputs
+  if (!is.character(ipg_file)) {
+    stop("Input 'ipg_file' must be a character string.")
+  }
+  # Ensure paths are character strings
+  if (!is.character(assembly_path) || !is.character(lineagelookup_path)) {
+    stop("Both 'assembly_path' and 
+         'lineagelookup_path' must be character strings.")
+  }
+
+  # Ensure paths exist
+  if (!file.exists(assembly_path)) {
+    stop(paste("Assembly file not found at:", assembly_path))
+  }
+
+  if (!file.exists(lineagelookup_path)) {
+    stop(paste("Lineage lookup file not found at:", lineagelookup_path))
+  }
+
+  try({
+    # Attempt to read the IPG file
     ipg_dt <- fread(ipg_file, sep = "\t", fill = T)
 
+    # Filter the IPG data table to only include the accessions
     ipg_dt <- ipg_dt[Protein %in% accessions]
 
+    # Rename the 'Assembly' column to 'GCA_ID'
     ipg_dt <- setnames(ipg_dt, "Assembly", "GCA_ID")
 
+    # Convert the IPG data table to a lineage data table
     lins <- GCA2Lins(prot_data = ipg_dt, assembly_path, lineagelookup_path)
+
+    # Filter out rows with missing lineage information
     lins <- lins[!is.na(Lineage)] %>% unique()
 
     return(lins)
+  }, error = function(e) {
+    print(paste("An error occurred: ", e$message))
+  }, warning = function(w) {
+    print(paste("Warning: ", w$message))
+  }, finally = {
+    print("ipg2lin function execution completed.")
+  })
 }
 
 
diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd
index 6255b290..d3f2468b 100644
--- a/man/acc2lin.Rd
+++ b/man/acc2lin.Rd
@@ -38,7 +38,8 @@ on the ipg database. If NULL, the file will not be written. Defaults to NULL}
 Describe return, in detail
 }
 \description{
-This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set
+This function combines 'efetch_ipg()'
+and 'ipg2lin()' to map a set
 of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
 
 Function to map protein accession numbers to lineage
diff --git a/man/efetch_ipg.Rd b/man/efetch_ipg.Rd
index ec5b6bcb..1fbb9d92 100644
--- a/man/efetch_ipg.Rd
+++ b/man/efetch_ipg.Rd
@@ -23,7 +23,8 @@ the ipg database}
 Describe return, in detail
 }
 \description{
-Perform efetch on the ipg database and write the results to out_path
+Perform efetch on the ipg database
+and write the results to out_path
 
 Perform efetch on the ipg database and write the results to out_path
 }
diff --git a/man/ipg2lin.Rd b/man/ipg2lin.Rd
index 3a14eada..453668b0 100644
--- a/man/ipg2lin.Rd
+++ b/man/ipg2lin.Rd
@@ -38,7 +38,8 @@ This file can be generated using the "DownloadAssemblySummary()" function}
 Describe return, in detail
 }
 \description{
-Takes the resulting file of an efetch run on the ipg database and
+Takes the resulting file
+of an efetch run on the ipg database and
 
 Takes the resulting file of an efetch run on the ipg database and
 append lineage, and taxid columns
diff --git a/man/sink.reset.Rd b/man/sink.reset.Rd
index a31b841d..64087c49 100644
--- a/man/sink.reset.Rd
+++ b/man/sink.reset.Rd
@@ -8,6 +8,7 @@ sink.reset()
 }
 \value{
 No return, but run to close all outstanding \code{sink()}s
+and handles any errors or warnings that occur during the process.
 }
 \description{
 Sink Reset

From 4aeaa113927b6f94b21c9f0dd0956bb7e48004a5 Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Mon, 7 Oct 2024 22:50:16 +0100
Subject: [PATCH 02/19] Add error handling to multiple functions

- Implement error handling for mapOption2Process, get_proc_medians, write_proc_medians_table, get_proc_weights, advanced_opts2est_walltime, assign_job_queue, and plot_estimated_walltimes .
- Validate input arguments for each function to ensure they meet expected criteria.
- Use tryCatch blocks to gracefully handle errors and warnings.
- Provide informative error messages and detailed logging where appropriate.
- Ensure functions fail gracefully and provide useful feedback.

Also renamed the functions to the following;
assign_job_queue -> assignJobQueue
make_opts2procs	-> mapOption2Process
map_advanced_opts2procs	-> mapAdvOption2Process
get_proc_medians - calculateProcessRuntime
write_proc_medians_table -> writeProcessRuntime2TSV
write_proc_medians_yml -> writeProcessRuntime2YML
get_proc_weights -> getProcessRuntimeWeights
advanced_opts2est_walltime -> calculateEstimatedWallTimeFromOpts
plot_estimated_walltimes -> plotEstimatedWallTimes
---
 NAMESPACE                                     |  26 +-
 R/assign_job_queue.R                          | 484 ++++++++++++------
 R/clean_clust_file.R                          |   4 +-
 R/combine_analysis.R                          |   4 +-
 R/combine_files.R                             |  10 +-
 R/create_lineage_lookup.R                     |   6 +-
 ...{assign_job_queue.Rd => assignJobQueue.Rd} |  13 +-
 ... calculateEstimatedWallTimeFromOptions.Rd} |  12 +-
 ..._medians.Rd => calculateProcessRuntime.Rd} |  10 +-
 ...lean_clust_file.Rd => cleanClusterFile.Rd} |   8 +-
 man/{combine_files.Rd => combineFiles.Rd}     |   6 +-
 ...combine_full.Rd => combineFullAnalysis.Rd} |   6 +-
 man/{combine_ipr.Rd => combineIPR.Rd}         |   6 +-
 ...neage_lookup.Rd => createLineageLookup.Rd} |   6 +-
 ...weights.Rd => getProcessRuntimeWeights.Rd} |   8 +-
 ..._opts2procs.Rd => mapAdvOption2Process.Rd} |   8 +-
 ...ake_opts2procs.Rd => mapOption2Process.Rd} |   8 +-
 ...walltimes.Rd => plotEstimatedWallTimes.Rd} |  11 +-
 ...ns_table.Rd => writeProcessRuntime2TSV.Rd} |   8 +-
 ...ans_yml.Rd => writeProcessRuntimeToYML.Rd} |  13 +-
 20 files changed, 416 insertions(+), 241 deletions(-)
 rename man/{assign_job_queue.Rd => assignJobQueue.Rd} (64%)
 rename man/{advanced_opts2est_walltime.Rd => calculateEstimatedWallTimeFromOptions.Rd} (68%)
 rename man/{get_proc_medians.Rd => calculateProcessRuntime.Rd} (76%)
 rename man/{clean_clust_file.Rd => cleanClusterFile.Rd} (82%)
 rename man/{combine_files.Rd => combineFiles.Rd} (92%)
 rename man/{combine_full.Rd => combineFullAnalysis.Rd} (69%)
 rename man/{combine_ipr.Rd => combineIPR.Rd} (74%)
 rename man/{create_lineage_lookup.Rd => createLineageLookup.Rd} (91%)
 rename man/{get_proc_weights.Rd => getProcessRuntimeWeights.Rd} (73%)
 rename man/{map_advanced_opts2procs.Rd => mapAdvOption2Process.Rd} (76%)
 rename man/{make_opts2procs.Rd => mapOption2Process.Rd} (75%)
 rename man/{plot_estimated_walltimes.Rd => plotEstimatedWallTimes.Rd} (55%)
 rename man/{write_proc_medians_table.Rd => writeProcessRuntime2TSV.Rd} (77%)
 rename man/{write_proc_medians_yml.Rd => writeProcessRuntimeToYML.Rd} (61%)

diff --git a/NAMESPACE b/NAMESPACE
index 16cf0813..9c038631 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -12,26 +12,27 @@ export(add_leaves)
 export(add_lins)
 export(add_name)
 export(add_tax)
-export(advanced_opts2est_walltime)
 export(alignFasta)
 export(assert_count_df)
-export(assign_job_queue)
+export(assignJobQueue)
+export(calculateEstimatedWallTimeFromOptions)
+export(calculateProcessRuntime)
 export(cleanup_GeneDesc)
 export(cleanup_clust)
 export(cleanup_domarch)
 export(cleanup_gencontext)
 export(cleanup_lineage)
 export(cleanup_species)
-export(combine_files)
-export(combine_full)
-export(combine_ipr)
+export(combineFiles)
+export(combineFullAnalysis)
+export(combineIPR)
 export(convert_aln2fa)
 export(convert_fa2tre)
 export(count_bycol)
 export(count_to_sunburst)
 export(count_to_treemap)
+export(createLineageLookup)
 export(create_all_col_params)
-export(create_lineage_lookup)
 export(create_one_col_params)
 export(domain_network)
 export(efetch_ipg)
@@ -45,10 +46,9 @@ export(generate_all_aln2fa)
 export(generate_fa2tre)
 export(generate_msa)
 export(generate_trees)
+export(getProcessRuntimeWeights)
 export(get_accnums_from_fasta_file)
 export(get_job_message)
-export(get_proc_medians)
-export(get_proc_weights)
 export(ipg2lin)
 export(ipr2viz)
 export(ipr2viz_web)
@@ -58,12 +58,12 @@ export(lineage.domain_repeats.plot)
 export(lineage.neighbors.plot)
 export(lineage_sunburst)
 export(make_job_results_url)
-export(make_opts2procs)
+export(mapAdvOption2Process)
+export(mapOption2Process)
 export(map_acc2name)
-export(map_advanced_opts2procs)
 export(msa_pdf)
 export(pick_longer_duplicate)
-export(plot_estimated_walltimes)
+export(plotEstimatedWallTimes)
 export(prot2tax)
 export(prot2tax_old)
 export(remove_astrk)
@@ -95,8 +95,8 @@ export(wordcloud2_element)
 export(wordcloud3)
 export(wordcloud_element)
 export(write.MsaAAMultipleAlignment)
-export(write_proc_medians_table)
-export(write_proc_medians_yml)
+export(writeProcessRuntime2TSV)
+export(writeProcessRuntimeToYML)
 importFrom(Biostrings,AAStringSet)
 importFrom(Biostrings,readAAStringSet)
 importFrom(Biostrings,toString)
diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index bc5253d4..f1fcb6db 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -3,22 +3,32 @@
 # pipeline.
 # to use this, construct paths like so: file.path(common_root, "path", "to", "file.R")
 # for example, the reference for this file would be:
-# file.path(common_root, "molevol_scripts", "R", "assign_job_queue.R")
+# file.path(common_root, "molevol_scripts", "R", "assignJobQueue.R")
 common_root <- Sys.getenv("COMMON_SRC_ROOT")
 
 #' Construct list where names (MolEvolvR advanced options) point to processes
 #'
 #' @return list where names (MolEvolvR advanced options) point to processes
 #'
-#' example: list_opts2procs <- make_opts2procs
+#' example: list_opts2procs <- mapOption2Process
 #' @export
-make_opts2procs <- function() {
+mapOption2Process <- function() {
+  tryCatch({
     opts2processes <- list(
-        "homology_search" = c("dblast", "dblast_cleanup"),
-        "domain_architecture" = c("iprscan", "ipr2lineage", "ipr2da"),
-        "always" = c("blast_clust", "clust2table") # processes always present agnostic of advanced options
+      "homology_search" = c("dblast", "dblast_cleanup"),
+      "domain_architecture" = c("iprscan", "ipr2lineage", "ipr2da"),
+      # processes always present agnostic of advanced options
+      "always" = c("blast_clust", "clust2table")
     )
     return(opts2processes)
+  }, error = function(e) {
+    message(paste("Encountered an error: ", e$message))
+  }, warning = function(w) {
+    message(paste("Warning: ", w$message))
+  }, finally = {
+    message("mapOption2Process function execution completed.")
+  })
+
 }
 
 #' Use MolEvolvR advanced options to get associated processes
@@ -30,17 +40,29 @@ make_opts2procs <- function() {
 #'
 #' example:
 #' advanced_opts <- c("homology_search", "domain_architecture")
-#' procs <- map_advanced_opts2procs(advanced_opts)
+#' procs <- mapAdvOption2Process(advanced_opts)
 #' @export
-map_advanced_opts2procs <- function(advanced_opts) {
+mapAdvOption2Process <- function(advanced_opts) {
+  if (!is.character(advanced_opts)) {
+    stop("Argument must be a character vector!")
+  }
+  tryCatch({
     # append 'always' to add procs that always run
     advanced_opts <- c(advanced_opts, "always")
-    opts2proc <- make_opts2procs()
+    opts2proc <- mapOption2Process()
     # setup index for opts2proc based on advanced options
     idx <- which(names(opts2proc) %in% advanced_opts)
     # extract processes that will run
     procs <- opts2proc[idx] |> unlist()
     return(procs)
+  }, error = function(e) {
+    message(paste("Encountered an error: ", e$message))
+  }, warning = function(w) {
+    message(paste("Warning: ", w$message))
+  }, finally = {
+    message("mapOption2Process function execution completed.")
+  })
+
 }
 
 #' Scrape MolEvolvR logs and calculate median processes
@@ -58,47 +80,68 @@ map_advanced_opts2procs <- function(advanced_opts) {
 #'
 #' 1)
 #' dir_job_results <- "/data/scratch/janani/molevolvr_out"
-#' list_proc_medians <- get_proc_medians(dir_job_results)
+#' list_proc_medians <- calculateProcessRuntime(dir_job_results)
 #'
 #' 2) from outside container environment
 #' common_root <- "/data/molevolvr_transfer/molevolvr_dev"
 #' dir_job_results <- "/data/molevolvr_transfer/molevolvr_dev/job_results"
-#' list_proc_medians <- get_proc_medians(dir_job_results)
+#' list_proc_medians <- calculateProcessRuntime(dir_job_results)
 #' @export
-get_proc_medians <- function(dir_job_results) {
+calculateProcessRuntime <- function(dir_job_results) {
+  tryCatch({
+    # Check if dir_job_results is a character string
+    if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
+      stop("Input 'dir_job_results' must be a single character string.")
+    }
+
+    # Check if dir_job_results exists
+    if (!dir.exists(dir_job_results)) {
+      stop(paste("The directory", dir_job_results, "does not exist."))
+    }
+
     source(file.path(common_root, "molevol_scripts", "R", "metrics.R"))
 
     # aggregate logs from
-    path_log_data <- file.path(common_root, "molevol_scripts", "log_data", "prod_logs.rda")
+    path_log_data <- file.path(common_root,
+                               "molevol_scripts", "log_data", "prod_logs.rda")
 
     # ensure the folder exists to the location
     if (!dir.exists(path_log_data)) {
-        dir.create(dirname(path_log_data), recursive = TRUE, showWarnings = FALSE)
+      dir.create(dirname(path_log_data),
+                 recursive = TRUE, showWarnings = FALSE)
     }
 
     # attempt to load pre-generated logdata
     if (!file.exists(path_log_data)) {
-        logs <- aggregate_logs(dir_job_results, latest_date = Sys.Date() - 60)
-        save(logs, file = path_log_data)
+      logs <- aggregate_logs(dir_job_results, latest_date = Sys.Date() - 60)
+      save(logs, file = path_log_data)
     } else {
-        load(path_log_data) # loads the logs object
+      load(path_log_data) # loads the logs object
     }
     df_log <- logs$df_log
     procs <- c(
-        "dblast", "dblast_cleanup", "iprscan",
-        "ipr2lineage", "ipr2da", "blast_clust",
-        "clust2table"
+      "dblast", "dblast_cleanup", "iprscan",
+      "ipr2lineage", "ipr2da", "blast_clust",
+      "clust2table"
     )
     list_proc_medians <- df_log |>
-        dplyr::select(dplyr::all_of(procs)) |>
-        dplyr::summarise(
-            dplyr::across(
-                dplyr::everything(),
-                \(x) median(x, na.rm = TRUE)
-            )
-        ) |>
-        as.list()
+      dplyr::select(dplyr::all_of(procs)) |>
+      dplyr::summarise(
+        dplyr::across(
+          dplyr::everything(),
+          \(x) median(x, na.rm = TRUE)
+        )
+      ) |>
+      as.list()
     return(list_proc_medians)
+  }, error = function(e) {
+    message(paste("Encountered an error: ", e$message))
+  }, warning = function(w) {
+    message(paste("Warning: ", w$message))
+  }, finally = {
+    message("calculateProcessRuntime function execution completed.")
+  })
+
 }
 
 #' Write a table of 2 columns: 1) process and 2) median seconds
@@ -113,51 +156,99 @@ get_proc_medians <- function(dir_job_results) {
 #'
 #' @return [tbl_df] 2 columns: 1) process and 2) median seconds
 #'
-#' example: write_proc_medians_table(
+#' example: writeProcessRuntime2TSV(
 #'   "/data/scratch/janani/molevolvr_out/",
 #'   "/data/scratch/janani/molevolvr_out/log_tbl.tsv"
 #' )
 #' @export
-write_proc_medians_table <- function(dir_job_results, filepath) {
-    df_proc_medians <- get_proc_medians(dir_job_results) |>
-        tibble::as_tibble() |>
-        tidyr::pivot_longer(
-            dplyr::everything(),
-            names_to = "process",
-            values_to = "median_seconds"
-        ) |>
-        dplyr::arrange(dplyr::desc(median_seconds))
+writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
+  tryCatch({
+    # Error handling for input arguments
+    if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
+      stop("Input 'dir_job_results' must be a single character string.")
+    }
+
+    if (!dir.exists(dir_job_results)) {
+      stop(paste("The directory", dir_job_results, "does not exist."))
+    }
+
+    if (!is.character(filepath) || length(filepath) != 1) {
+      stop("Input 'filepath' must be a single character string.")
+    }
+    df_proc_medians <- calculateProcessRuntime(dir_job_results) |>
+      tibble::as_tibble() |>
+      tidyr::pivot_longer(
+        dplyr::everything(),
+        names_to = "process",
+        values_to = "median_seconds"
+      ) |>
+      dplyr::arrange(dplyr::desc(median_seconds))
+
+    # Write the resulting tibble to a TSV file
     readr::write_tsv(df_proc_medians, file = filepath)
     return(df_proc_medians)
+  }, error = function(e) {
+    message(paste("Encountered an error: ", e$message))
+  }, warning = function(w) {
+    message(paste("Warning: ", w$message))
+  }, finally = {
+    message("writeProcessRuntime2TSV function execution completed.")
+  })
+
 }
 
 #' Compute median process runtimes, then write a YAML list of the processes and
 #' their median runtimes in seconds to the path specified by 'filepath'.
 #'
 #' The default value of filepath is the value of the env var
-#' MOLEVOLVR_PROC_WEIGHTS, which get_proc_weights() also uses as its default
+#' MOLEVOLVR_PROC_WEIGHTS, which writeProcessRuntimeToYML() also uses as its default
 #' read location.
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results directory
-#' @param filepath [chr] path to save YAML file; if NULL, uses ./molevol_scripts/log_data/job_proc_weights.yml
+#' @param filepath [chr] path to save YAML file; if NULL, 
+#'                 uses ./molevol_scripts/log_data/job_proc_weights.yml
 #'
 #' @importFrom yaml write_yaml
 #'
 #' @examples
 #' \dontrun{
-#' write_proc_medians_yml(
+#' writeProcessRuntimeToYML(
 #'     "/data/scratch/janani/molevolvr_out/",
 #'     "/data/scratch/janani/molevolvr_out/log_tbl.yml"
 #' )
 #' }
 #' @export
-write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
+writeProcessRuntimeToYML <- function(dir_job_results, filepath = NULL) {
+  tryCatch({
+    # Error handling for dir_job_results arguments
+    if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
+      stop("Input 'dir_job_results' must be a single character string.")
+    }
+
+    if (!dir.exists(dir_job_results)) {
+      stop(paste("The directory", dir_job_results, "does not exist."))
+    }
     if (is.null(filepath)) {
-        filepath <- file.path(common_root, "molevol_scripts", "log_data", "job_proc_weights.yml")
+      filepath <- file.path(common_root,
+                            "molevol_scripts",
+                            "log_data",
+                            "job_proc_weights.yml")
+    }
+    if (!is.character(filepath) || length(filepath) != 1) {
+      stop("Input 'filepath' must be a single character string.")
     }
 
-    medians <- get_proc_medians(dir_job_results)
+    medians <- calculateProcessRuntime(dir_job_results)
     yaml::write_yaml(medians, filepath)
+  }, error = function(e) {
+    message(paste("Encountered an error: "), e$message)
+  }, warning = function(w) {
+    message(paste("Warning: "), w$message)
+  }, finally = {
+    message("write_proc_medians_table function execution completed.")
+  }
+  )
+
 }
 
 #' Quickly get the runtime weights for MolEvolvR backend processes
@@ -170,50 +261,52 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
 #'
 #' @return [list] names: processes; values: median runtime (seconds)
 #'
-#' example: get_proc_weights()
+#' example: writeProcessRuntimeToYML()
 #' @export
-get_proc_weights <- function(medians_yml_path = NULL) {
-    if (is.null(medians_yml_path)) {
-        medians_yml_path <- file.path(common_root, "molevol_scripts", "log_data", "job_proc_weights.yml")
+getProcessRuntimeWeights <- function(medians_yml_path = NULL) {
+  if (is.null(medians_yml_path)) {
+    medians_yml_path <- file.path(common_root,
+                                  "molevol_scripts",
+                                  "log_data",
+                                  "job_proc_weights.yml")
+  }
+
+  proc_weights <- tryCatch({
+    # attempt to read the weights from the YAML file produced by
+    # writeProcessRuntimeToYML()
+    if (stringr::str_trim(medians_yml_path) == "") {
+      stop(
+        stringr::str_glue("medians_yml_path is empty 
+                          ({medians_yml_path}), returning default weights")
+      )
     }
 
-    proc_weights <- tryCatch(
-        {
-            # attempt to read the weights from the YAML file produced by
-            # write_proc_medians_yml()
-            if (stringr::str_trim(medians_yml_path) == "") {
-                stop(
-                    stringr::str_glue("medians_yml_path is empty ({medians_yml_path}), returning default weights")
-                )
-            }
-
-            proc_weights <- yaml::read_yaml(medians_yml_path)
-        },
-        # to avoid fatal errors in reading the proc weights yaml,
-        # some median process runtimes have been hardcoded based on
-        # the result of get_proc_medians() from Jan 2024
-        error = function(cond) {
-            proc_weights <- list(
-                "dblast" = 2810,
-                "iprscan" = 1016,
-                "dblast_cleanup" = 79,
-                "ipr2lineage" = 18,
-                "ipr2da" = 12,
-                "blast_clust" = 2,
-                "clust2table" = 2
-            )
-            proc_weights
-        }
+    proc_weights <- yaml::read_yaml(medians_yml_path)
+  },
+  # to avoid fatal errors in reading the proc weights yaml,
+  # some median process runtimes have been hardcoded based on
+  # the result of calculateProcessRuntime() from Jan 2024
+  error = function(cond) {
+    proc_weights <- list(
+      "dblast" = 2810,
+      "iprscan" = 1016,
+      "dblast_cleanup" = 79,
+      "ipr2lineage" = 18,
+      "ipr2da" = 12,
+      "blast_clust" = 2,
+      "clust2table" = 2
     )
+    proc_weights
+  })
 
-    return(proc_weights)
+  return(proc_weights)
 }
 
 #' Given MolEvolvR advanced options and number of inputs,
 #' calculate the total estimated walltime for the job
 #'
 #' @param advanced_opts character vector of MolEvolvR advanced options
-#' (see make_opts2procs for the options)
+#' (see mapOption2Process for the options)
 #' @param n_inputs total number of input proteins
 #'
 #' @importFrom dplyr if_else
@@ -221,68 +314,129 @@ get_proc_weights <- function(medians_yml_path = NULL) {
 #'
 #' @return total estimated number of seconds a job will process (walltime)
 #'
-#' example: advanced_opts2est_walltime(c("homology_search", "domain_architecture"), n_inputs = 3, n_hits = 50L)
+#' example: calculateEstimatedWallTimeFromOptions(c("homology_search",
+#'                                       "domain_architecture"),
+#'                                       n_inputs = 3, n_hits = 50L)
 #' @export
-advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L, n_hits = NULL, verbose = FALSE) {
+calculateEstimatedWallTimeFromOptions <- function(advanced_opts,
+                                                  n_inputs = 1L,
+                                                  n_hits = NULL,
+                                                  verbose = FALSE) {
+
+  tryCatch({
     # to calculate est walltime for a homology search job, the number of hits
     # must be provided
     validation_fail <- is.null(n_hits) && "homology_search" %in% advanced_opts
     stopifnot(!validation_fail)
 
-    proc_weights <- get_proc_weights()
+    # Validate advanced_opts
+    if (!is.character(advanced_opts)) {
+      stop("Argument 'advanced_opts' must be a character vector.")
+    }
+
+    # Validate n_inputs
+    if (!is.numeric(n_inputs) || length(n_inputs) != 1 || n_inputs <= 0) {
+      stop("Argument 'n_inputs' must be a single positive numeric value.")
+    }
+
+    # Validate n_hits if homology_search is in advanced_opts
+    if ("homology_search" %in% advanced_opts &&
+          (is.null(n_hits)|| !is.numeric(n_hits)
+           || length(n_hits) != 1 || n_hits < 0)) {
+      stop("Argument 'n_hits' must be a single non-negative numeric value when 
+           'homology_search' is in 'advanced_opts'.")
+    }
+
+    # Get process weights
+    proc_weights <- writeProcessRuntimeToYML()
+    if (!is.list(proc_weights)) {
+      stop("Process weights could not be retrieved correctly.")
+    }
+
     # sort process weights by names and convert to vec
     proc_weights <- proc_weights[order(names(proc_weights))] |> unlist()
     all_procs <- names(proc_weights) |> sort()
     # get processes from advanced options and sort by names
-    procs_from_opts <- map_advanced_opts2procs(advanced_opts)
+    procs_from_opts <- mapAdvOption2Process(advanced_opts)
     procs_from_opts <- sort(procs_from_opts)
     # binary encode: yes proc will run (1); else 0
     binary_proc_vec <- dplyr::if_else(all_procs %in% procs_from_opts, 1L, 0L)
     # dot product of weights and procs to run; scaled by the number of inputs
     est_walltime <- (n_inputs * (binary_proc_vec %*% proc_weights)) |>
-        as.numeric()
+      as.numeric()
     # calculate the additional processes to run for the homologous hits
     if ("homology_search" %in% advanced_opts) {
-        opts2procs <- make_opts2procs()
-        # exclude the homology search processes for the homologous hits
-        procs2exclude_for_homologs <- opts2procs[["homology_search"]]
-        procs_homologs <- procs_from_opts[!(procs_from_opts %in% procs2exclude_for_homologs)]
-        binary_proc_vec_homolog <- dplyr::if_else(all_procs %in% procs_homologs, 1L, 0L)
-        # add the estimated walltime for processes run on the homologous hits
-        est_walltime <- est_walltime +
-            (n_hits * (binary_proc_vec_homolog %*% proc_weights) |> as.numeric())
+      opts2procs <- mapOption2Process()
+      # exclude the homology search processes for the homologous hits
+      procs2exclude_for_homologs <- opts2procs[["homology_search"]]
+      procs_homologs <- procs_from_opts[!(procs_from_opts 
+                                          %in% procs2exclude_for_homologs)]
+      binary_proc_vec_homolog <- dplyr::if_else(all_procs 
+                                                %in% procs_homologs, 1L, 0L)
+      # add the estimated walltime for processes run on the homologous hits
+      est_walltime <- est_walltime +
+        (n_hits * (binary_proc_vec_homolog
+                   %*% proc_weights) |> as.numeric())
     }
     if (verbose) {
-        msg <- stringr::str_glue(
-            "warnings from advanced_opts2est_walltime():\n",
-            "\tn_inputs={n_inputs}\n",
-            "\tn_hits={ifelse(is.null(n_hits), 'null', n_hits)}\n",
-            "\test_walltime={est_walltime}\n\n"
-        )
-        cat(file = stderr(), msg)
+      msg <- stringr::str_glue(
+        "warnings from calculateEstimatedWallTimeFromOptions():\n",
+        "\tn_inputs={n_inputs}\n",
+        "\tn_hits={ifelse(is.null(n_hits), 'null', n_hits)}\n",
+        "\test_walltime={est_walltime}\n\n"
+      )
+      cat(file = stderr(), msg)
     }
     return(est_walltime)
+  }, error = function(e) {
+    message(paste("Encountered an error: ", e$message))
+  }, warning = function(w) {
+    message(paste("Warning: ", w$message))
+  }, finally = {
+    message("calculateEstimatedWallTimeFromOptions 
+            function execution completed.")
+  })
+
 }
 
+
 #' Decision function to assign job queue
 #'
 #' @param t_sec_estimate estimated number of seconds a job will process
-#' (from advanced_opts2est_walltime())
+#' (from calculateEstimatedWallTimeFromOptions())
 #' @param t_long threshold value that defines the lower bound for assigning a
 #' job to the "long queue"
 #'
 #' @return a string of "short" or "long"
 #'
 #' example:
-#' advanced_opts2est_walltime(c("homology_search", "domain_architecture"), 3) |>
-#'   assign_job_queue()
+#' calculateEstimatedWallTimeFromOptions(c("homology_search",
+#'                                         "domain_architecture"), 3) |>
+#'   assignJobQueue()
 #' @export
-assign_job_queue <- function(
-        t_sec_estimate,
-        t_cutoff = 21600 # 6 hours
-    ) {
+assignJobQueue <- function(
+  t_sec_estimate,
+  t_cutoff = 21600 # 6 hours
+) {
+  tryCatch({
+    if (!is.numeric(t_sec_estimate) || length(t_sec_estimate) != 1) {
+      stop("Argument 't_sec_estimate' must be a single numeric value.")
+    }
+
+    if (!is.numeric(t_cutoff) || length(t_cutoff) != 1 || t_cutoff < 0) {
+      stop("Argument 't_cutoff' must be a single non-negative numeric value.")
+    }
+
     queue <- ifelse(t_sec_estimate > t_cutoff, "long", "short")
     return(queue)
+  }, error = function(e) {
+    message(paste("Encountered an error: ", e$message))
+  }, warning = function(w) {
+    message(paste("Warning: ", w$message))
+  }, finally = {
+    message("assignJobQueue function execution completed.")
+  })
+
 }
 
 #' Plot the estimated runtimes for different advanced options and number
@@ -297,81 +451,97 @@ assign_job_queue <- function(
 #' @return line plot object
 #'
 #' example:
-#' p <- plot_estimated_walltimes()
-#' ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
+#' p <- plotEstimatedWallTimes()
+#' ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_
+#'                 dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
 #' @export
-plot_estimated_walltimes <- function() {
-    opts <- make_opts2procs() |> names()
+plotEstimatedWallTimes <- function() {
+  tryCatch({
+    opts <- mapOption2Process() |> names()
     # get all possible submission permutations (powerset)
     get_powerset <- function(vec) {
-        # generate powerset (do not include empty set)
-        n <- length(vec)
-        indices <- 1:n
-        powerset <- lapply(1:n, function(x) combn(indices, x, simplify = FALSE))
-        powerset <- unlist(powerset, recursive = FALSE)
-        powerset <- lapply(powerset, function(index) vec[index])
-        powerset
+      # generate powerset (do not include empty set)
+      n <- length(vec)
+      indices <- 1:n
+      powerset <- lapply(1:n, function(x) combn(indices, x, simplify = FALSE))
+      powerset <- unlist(powerset, recursive = FALSE)
+      powerset <- lapply(powerset, function(index) vec[index])
+      powerset
     }
     opts_power_set <- get_powerset(opts)
     est_walltimes <- list()
     for (i in 1:20) {
-        est_walltimes <- append(
-            x = est_walltimes,
-            values = sapply(
-                opts_power_set,
-                FUN = function(advanced_opts) {
-                    # for simplicity, assume the default number of homologus hits (100)
-                    n_hits <- if ("homology_search" %in% advanced_opts) {
-                        100
-                    } else {
-                        NULL
-                    }
-                    est_walltime <- advanced_opts2est_walltime(
-                        advanced_opts,
-                        n_inputs = i,
-                        n_hits = n_hits,
-                        verbose = TRUE
-                    )
-                    names(est_walltime) <- paste0(advanced_opts, collapse = "_")
-                    est_walltime
-                }
+      est_walltimes <- append(
+        x = est_walltimes,
+        values = sapply(
+          opts_power_set,
+          FUN = function(advanced_opts) {
+            # for simplicity, assume the default number of homologus hits (100)
+            n_hits <- if ("homology_search" %in% advanced_opts) {
+              100
+            } else {
+                NULL
+              }
+            est_walltime <- calculateEstimatedWallTimeFromOptions(
+              advanced_opts,
+              n_inputs = i,
+              n_hits = n_hits,
+              verbose = TRUE
             )
+            names(est_walltime) <- paste0(advanced_opts, collapse = "_")
+            est_walltime
+          }
         )
+      )
     }
     # concat all results to their unique names
     est_walltimes <- tapply(
-        unlist(
-            est_walltimes,
-            use.names = FALSE
-        ),
-        rep(
-            names(est_walltimes),
-            lengths(est_walltimes)
-        ),
-        FUN = c
+      unlist(
+        est_walltimes,
+        use.names = FALSE
+      ),
+      rep(
+        names(est_walltimes),
+        lengths(est_walltimes)
+      ),
+      FUN = c
     )
     df_walltimes <- est_walltimes |>
-        unlist() |>
-        matrix(nrow = length(est_walltimes[[1]]), ncol = length(names(est_walltimes)))
+      unlist() |>
+      matrix(nrow = length(est_walltimes[[1]]),
+             ncol = length(names(est_walltimes)))
     colnames(df_walltimes) <- names(est_walltimes)
     df_walltimes <- df_walltimes |> tibble::as_tibble()
     # rm always col or powerset outcome without the "always" processes
     col_idx_keep <- grep(pattern = "always$", x = names(df_walltimes))
     df_walltimes <- df_walltimes |>
-        dplyr::select(col_idx_keep)
+      dplyr::select(col_idx_keep)
     # bind n_inputs
     df_walltimes <- df_walltimes |>
-        dplyr::mutate(n_inputs = 1:20)
-    df_walltimes <- tidyr::gather(df_walltimes, key = "advanced_opts", value = "est_walltime", -n_inputs)
+      dplyr::mutate(n_inputs = 1:20)
+    df_walltimes <- tidyr::gather(df_walltimes,
+                                  key = "advanced_opts",
+                                  value = "est_walltime",
+                                  n_inputs)
     # sec to hrs
     df_walltimes <- df_walltimes |>
-        dplyr::mutate(est_walltime = est_walltime / 3600)
-    p <- ggplot2::ggplot(df_walltimes, ggplot2::aes(x = n_inputs, y = est_walltime, color = advanced_opts)) +
-        ggplot2::geom_line() +
-        ggplot2::labs(
-            title = "MolEvolvR estimated runtimes",
-            x = "Number of inputs",
-            y = "Estimated walltime (hours)"
-        )
+      dplyr::mutate(est_walltime = est_walltime / 3600)
+    p <- ggplot2::ggplot(df_walltimes, ggplot2::aes(x = n_inputs, 
+                                                    y = est_walltime, 
+                                                    color = advanced_opts)) +
+      ggplot2::geom_line() +
+      ggplot2::labs(
+        title = "MolEvolvR estimated runtimes",
+        x = "Number of inputs",
+        y = "Estimated walltime (hours)"
+      )
     return(p)
+  }, error = function(e) {
+    message(paste("Encountered an error: ", e$message))
+  }, warning = function(w) {
+    message(paste("Warning: ", w$message))
+  }, finally = {
+    message("plotEstimatedWallTimes function execution completed.")
+  })
+
 }
diff --git a/R/clean_clust_file.R b/R/clean_clust_file.R
index d3f813e5..87dcde70 100755
--- a/R/clean_clust_file.R
+++ b/R/clean_clust_file.R
@@ -55,9 +55,9 @@
 #'
 #' @examples
 #' \dontrun{
-#' clean_clust_file("data/pspa.op_ins_cls", writepath = NULL, query = "pspa")
+#' cleanClusterFile("data/pspa.op_ins_cls", writepath = NULL, query = "pspa")
 #' }
-clean_clust_file <- function(path, writepath = NULL, query) {
+cleanClusterFile <- function(path, writepath = NULL, query) {
     # ?? does the following line need to be changed to read_lines()?
     prot <- read_tsv(path, col_names = F)
 
diff --git a/R/combine_analysis.R b/R/combine_analysis.R
index bb3b3ce2..58ce1f14 100755
--- a/R/combine_analysis.R
+++ b/R/combine_analysis.R
@@ -17,7 +17,7 @@
 #' @export
 #'
 #' @examples
-combine_full <- function(inpath, ret = FALSE) {
+combineFullAnalysis <- function(inpath, ret = FALSE) {
     ## Combining full_analysis files
     full_combnd <- combine_files(inpath,
         pattern = "*.full_analysis.tsv", skip = 0,
@@ -44,7 +44,7 @@ combine_full <- function(inpath, ret = FALSE) {
 #' @export
 #'
 #' @examples
-combine_ipr <- function(inpath, ret = FALSE) {
+combineIPR <- function(inpath, ret = FALSE) {
     ## Combining clean ipr files
     ipr_combnd <- combine_files(inpath,
         pattern = "*.iprscan_cln.tsv", skip = 0,
diff --git a/R/combine_files.R b/R/combine_files.R
index 76c5fa09..455ddd53 100755
--- a/R/combine_files.R
+++ b/R/combine_files.R
@@ -38,7 +38,7 @@
 #' @export
 #'
 #' @examples
-combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense/"),
+combineFiles <- function(inpath = c("../molevol_data/project_data/phage_defense/"),
     pattern = "*full_analysis.tsv",
     delim = "\t", skip = 0,
     col_names = T) {
@@ -67,7 +67,7 @@ combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense
 ## Sample Runs ##
 #################
 # ## Combining full_analysis files
-# full_combnd <- combine_files(inpath,
+# full_combnd <- combineFiles(inpath,
 #                             pattern="*full_analysis.txt", skip=0,
 #                             col_names=T)
 #
@@ -75,7 +75,7 @@ combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense
 #           path="../molevol_data/project_data/slps/full_combined.tsv")
 #
 # ## Combining clean files
-# cln_combnd <- combine_files(inpath,
+# cln_combnd <- combineFiles(inpath,
 #                             pattern="^.*cln.txt", skip=0,
 #                             col_names=T)
 #
@@ -86,14 +86,14 @@ combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense
 # ## Less helpful examples!
 # ## Combining BLAST files
 # ## Likely makes no sense since clustering is done per query
-# cl_blast_combnd <- combine_files(inpath,
+# cl_blast_combnd <- combineFiles(inpath,
 #                                  pattern="^.*refseq.1e-5.txt", skip=0,
 #                                  col_names=cl_blast_colnames) %>%
 #   select(-PcPositive, -ClusterID)
 #
 # ## Combining IPR files
 # ## Likely makes no sense since there may be repeated AccNum from indiv. files!
-# ipr_combnd <- combine_files(inpath,
+# ipr_combnd <- combineFiles(inpath,
 #                             pattern="*iprscan.lins*",  skip=0,
 #                             col_names=ipr_colnames)
 #
diff --git a/R/create_lineage_lookup.R b/R/create_lineage_lookup.R
index e7374df3..d911934a 100644
--- a/R/create_lineage_lookup.R
+++ b/R/create_lineage_lookup.R
@@ -26,9 +26,9 @@
 #' @export
 #'
 #' @examples
-create_lineage_lookup <- function(lineage_file = here("data/rankedlineage.dmp"),
+createLineageLookup <- function(lineage_file = here("data/rankedlineage.dmp"),
     outfile, taxonomic_rank = "phylum") {
-    shorten_NA <- function(Lineage) {
+    .shortenNA <- function(Lineage) {
         first_NA <- str_locate(Lineage, "NA")[1]
         if (is.na(first_NA)) {
             # No NAs
@@ -92,7 +92,7 @@ create_lineage_lookup <- function(lineage_file = here("data/rankedlineage.dmp"),
     # Takes a while (2million rows after all)
     rankedLinsCombined <- rankedLins %>%
         unite(col = "Lineage", all_of(combined_taxonomy), sep = ">") %>%
-        mutate(Lineage = unlist(map(Lineage, shorten_NA)))
+        mutate(Lineage = unlist(map(Lineage, .shortenNA)))
 
 
 
diff --git a/man/assign_job_queue.Rd b/man/assignJobQueue.Rd
similarity index 64%
rename from man/assign_job_queue.Rd
rename to man/assignJobQueue.Rd
index ceb6fa77..27511b6a 100644
--- a/man/assign_job_queue.Rd
+++ b/man/assignJobQueue.Rd
@@ -1,14 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{assign_job_queue}
-\alias{assign_job_queue}
+\name{assignJobQueue}
+\alias{assignJobQueue}
 \title{Decision function to assign job queue}
 \usage{
-assign_job_queue(t_sec_estimate, t_cutoff = 21600)
+assignJobQueue(t_sec_estimate, t_cutoff = 21600)
 }
 \arguments{
 \item{t_sec_estimate}{estimated number of seconds a job will process
-(from advanced_opts2est_walltime())}
+(from calculateEstimatedWallTimeFromOptions())}
 
 \item{t_long}{threshold value that defines the lower bound for assigning a
 job to the "long queue"}
@@ -17,8 +17,9 @@ job to the "long queue"}
 a string of "short" or "long"
 
 example:
-advanced_opts2est_walltime(c("homology_search", "domain_architecture"), 3) |>
-assign_job_queue()
+calculateEstimatedWallTimeFromOptions(c("homology_search",
+"domain_architecture"), 3) |>
+assignJobQueue()
 }
 \description{
 Decision function to assign job queue
diff --git a/man/advanced_opts2est_walltime.Rd b/man/calculateEstimatedWallTimeFromOptions.Rd
similarity index 68%
rename from man/advanced_opts2est_walltime.Rd
rename to man/calculateEstimatedWallTimeFromOptions.Rd
index ea4b29e6..e4eec3fd 100644
--- a/man/advanced_opts2est_walltime.Rd
+++ b/man/calculateEstimatedWallTimeFromOptions.Rd
@@ -1,11 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{advanced_opts2est_walltime}
-\alias{advanced_opts2est_walltime}
+\name{calculateEstimatedWallTimeFromOptions}
+\alias{calculateEstimatedWallTimeFromOptions}
 \title{Given MolEvolvR advanced options and number of inputs,
 calculate the total estimated walltime for the job}
 \usage{
-advanced_opts2est_walltime(
+calculateEstimatedWallTimeFromOptions(
   advanced_opts,
   n_inputs = 1L,
   n_hits = NULL,
@@ -14,14 +14,16 @@ advanced_opts2est_walltime(
 }
 \arguments{
 \item{advanced_opts}{character vector of MolEvolvR advanced options
-(see make_opts2procs for the options)}
+(see mapOption2Process for the options)}
 
 \item{n_inputs}{total number of input proteins}
 }
 \value{
 total estimated number of seconds a job will process (walltime)
 
-example: advanced_opts2est_walltime(c("homology_search", "domain_architecture"), n_inputs = 3, n_hits = 50L)
+example: calculateEstimatedWallTimeFromOptions(c("homology_search",
+"domain_architecture"),
+n_inputs = 3, n_hits = 50L)
 }
 \description{
 Given MolEvolvR advanced options and number of inputs,
diff --git a/man/get_proc_medians.Rd b/man/calculateProcessRuntime.Rd
similarity index 76%
rename from man/get_proc_medians.Rd
rename to man/calculateProcessRuntime.Rd
index b6db0b56..bb6dd1ed 100644
--- a/man/get_proc_medians.Rd
+++ b/man/calculateProcessRuntime.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{get_proc_medians}
-\alias{get_proc_medians}
+\name{calculateProcessRuntime}
+\alias{calculateProcessRuntime}
 \title{Scrape MolEvolvR logs and calculate median processes}
 \usage{
-get_proc_medians(dir_job_results)
+calculateProcessRuntime(dir_job_results)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results
@@ -21,12 +21,12 @@ examples:
 }
 
 dir_job_results <- "/data/scratch/janani/molevolvr_out"
-list_proc_medians <- get_proc_medians(dir_job_results)
+list_proc_medians <- calculateProcessRuntime(dir_job_results)
 \enumerate{
 \item from outside container environment
 common_root <- "/data/molevolvr_transfer/molevolvr_dev"
 dir_job_results <- "/data/molevolvr_transfer/molevolvr_dev/job_results"
-list_proc_medians <- get_proc_medians(dir_job_results)
+list_proc_medians <- calculateProcessRuntime(dir_job_results)
 }
 }
 \description{
diff --git a/man/clean_clust_file.Rd b/man/cleanClusterFile.Rd
similarity index 82%
rename from man/clean_clust_file.Rd
rename to man/cleanClusterFile.Rd
index bba3072e..d2818662 100644
--- a/man/clean_clust_file.Rd
+++ b/man/cleanClusterFile.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/clean_clust_file.R
-\name{clean_clust_file}
-\alias{clean_clust_file}
+\name{cleanClusterFile}
+\alias{cleanClusterFile}
 \title{Clean Cluster File}
 \usage{
-clean_clust_file(path, writepath = NULL, query)
+cleanClusterFile(path, writepath = NULL, query)
 }
 \arguments{
 \item{path}{A character to the path of the cluster file to be cleaned}
@@ -24,6 +24,6 @@ This function reads a space-separated cluster file and converts it to a cleaned
 }
 \examples{
 \dontrun{
-clean_clust_file("data/pspa.op_ins_cls", writepath = NULL, query = "pspa")
+cleanClusterFile("data/pspa.op_ins_cls", writepath = NULL, query = "pspa")
 }
 }
diff --git a/man/combine_files.Rd b/man/combineFiles.Rd
similarity index 92%
rename from man/combine_files.Rd
rename to man/combineFiles.Rd
index 4126eb9e..3b56b923 100644
--- a/man/combine_files.Rd
+++ b/man/combineFiles.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/combine_files.R
-\name{combine_files}
-\alias{combine_files}
+\name{combineFiles}
+\alias{combineFiles}
 \title{Download the combined assembly summaries of genbank and refseq}
 \usage{
-combine_files(
+combineFiles(
   inpath = c("../molevol_data/project_data/phage_defense/"),
   pattern = "*full_analysis.tsv",
   delim = "\\t",
diff --git a/man/combine_full.Rd b/man/combineFullAnalysis.Rd
similarity index 69%
rename from man/combine_full.Rd
rename to man/combineFullAnalysis.Rd
index f4e6597b..35925e86 100644
--- a/man/combine_full.Rd
+++ b/man/combineFullAnalysis.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/combine_analysis.R
-\name{combine_full}
-\alias{combine_full}
+\name{combineFullAnalysis}
+\alias{combineFullAnalysis}
 \title{Combining full_analysis files}
 \usage{
-combine_full(inpath, ret = FALSE)
+combineFullAnalysis(inpath, ret = FALSE)
 }
 \arguments{
 \item{ret}{}
diff --git a/man/combine_ipr.Rd b/man/combineIPR.Rd
similarity index 74%
rename from man/combine_ipr.Rd
rename to man/combineIPR.Rd
index 52aa3057..035c4274 100644
--- a/man/combine_ipr.Rd
+++ b/man/combineIPR.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/combine_analysis.R
-\name{combine_ipr}
-\alias{combine_ipr}
+\name{combineIPR}
+\alias{combineIPR}
 \title{Combining clean ipr files}
 \usage{
-combine_ipr(inpath, ret = FALSE)
+combineIPR(inpath, ret = FALSE)
 }
 \arguments{
 \item{ret}{}
diff --git a/man/create_lineage_lookup.Rd b/man/createLineageLookup.Rd
similarity index 91%
rename from man/create_lineage_lookup.Rd
rename to man/createLineageLookup.Rd
index 51670f35..5dbab978 100644
--- a/man/create_lineage_lookup.Rd
+++ b/man/createLineageLookup.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/create_lineage_lookup.R
-\name{create_lineage_lookup}
-\alias{create_lineage_lookup}
+\name{createLineageLookup}
+\alias{createLineageLookup}
 \title{Create a look up table that goes from TaxID, to Lineage}
 \usage{
-create_lineage_lookup(
+createLineageLookup(
   lineage_file = here("data/rankedlineage.dmp"),
   outfile,
   taxonomic_rank = "phylum"
diff --git a/man/get_proc_weights.Rd b/man/getProcessRuntimeWeights.Rd
similarity index 73%
rename from man/get_proc_weights.Rd
rename to man/getProcessRuntimeWeights.Rd
index 0f4beb57..8eff0347 100644
--- a/man/get_proc_weights.Rd
+++ b/man/getProcessRuntimeWeights.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{get_proc_weights}
-\alias{get_proc_weights}
+\name{getProcessRuntimeWeights}
+\alias{getProcessRuntimeWeights}
 \title{Quickly get the runtime weights for MolEvolvR backend processes}
 \usage{
-get_proc_weights(medians_yml_path = NULL)
+getProcessRuntimeWeights(medians_yml_path = NULL)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results
@@ -13,7 +13,7 @@ directory}
 \value{
 \link{list} names: processes; values: median runtime (seconds)
 
-example: get_proc_weights()
+example: writeProcessRuntimeToYML()
 }
 \description{
 Quickly get the runtime weights for MolEvolvR backend processes
diff --git a/man/map_advanced_opts2procs.Rd b/man/mapAdvOption2Process.Rd
similarity index 76%
rename from man/map_advanced_opts2procs.Rd
rename to man/mapAdvOption2Process.Rd
index 631708b4..5bd9ee65 100644
--- a/man/map_advanced_opts2procs.Rd
+++ b/man/mapAdvOption2Process.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{map_advanced_opts2procs}
-\alias{map_advanced_opts2procs}
+\name{mapAdvOption2Process}
+\alias{mapAdvOption2Process}
 \title{Use MolEvolvR advanced options to get associated processes}
 \usage{
-map_advanced_opts2procs(advanced_opts)
+mapAdvOption2Process(advanced_opts)
 }
 \arguments{
 \item{advanced_opts}{character vector of MolEvolvR advanced options}
@@ -15,7 +15,7 @@ the advanced options
 
 example:
 advanced_opts <- c("homology_search", "domain_architecture")
-procs <- map_advanced_opts2procs(advanced_opts)
+procs <- mapAdvOption2Process(advanced_opts)
 }
 \description{
 Use MolEvolvR advanced options to get associated processes
diff --git a/man/make_opts2procs.Rd b/man/mapOption2Process.Rd
similarity index 75%
rename from man/make_opts2procs.Rd
rename to man/mapOption2Process.Rd
index 07e208b2..ff6905c5 100644
--- a/man/make_opts2procs.Rd
+++ b/man/mapOption2Process.Rd
@@ -1,15 +1,15 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{make_opts2procs}
-\alias{make_opts2procs}
+\name{mapOption2Process}
+\alias{mapOption2Process}
 \title{Construct list where names (MolEvolvR advanced options) point to processes}
 \usage{
-make_opts2procs()
+mapOption2Process()
 }
 \value{
 list where names (MolEvolvR advanced options) point to processes
 
-example: list_opts2procs <- make_opts2procs
+example: list_opts2procs <- mapOption2Process
 }
 \description{
 Construct list where names (MolEvolvR advanced options) point to processes
diff --git a/man/plot_estimated_walltimes.Rd b/man/plotEstimatedWallTimes.Rd
similarity index 55%
rename from man/plot_estimated_walltimes.Rd
rename to man/plotEstimatedWallTimes.Rd
index 3669e0e0..0d53cb32 100644
--- a/man/plot_estimated_walltimes.Rd
+++ b/man/plotEstimatedWallTimes.Rd
@@ -1,18 +1,19 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{plot_estimated_walltimes}
-\alias{plot_estimated_walltimes}
+\name{plotEstimatedWallTimes}
+\alias{plotEstimatedWallTimes}
 \title{Plot the estimated runtimes for different advanced options and number
 of inputs}
 \usage{
-plot_estimated_walltimes()
+plotEstimatedWallTimes()
 }
 \value{
 line plot object
 
 example:
-p <- plot_estimated_walltimes()
-ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
+p <- plotEstimatedWallTimes()
+ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_
+dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
 }
 \description{
 this function was just for fun; very, very messy code
diff --git a/man/write_proc_medians_table.Rd b/man/writeProcessRuntime2TSV.Rd
similarity index 77%
rename from man/write_proc_medians_table.Rd
rename to man/writeProcessRuntime2TSV.Rd
index 2ae7a97b..03cbbd68 100644
--- a/man/write_proc_medians_table.Rd
+++ b/man/writeProcessRuntime2TSV.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{write_proc_medians_table}
-\alias{write_proc_medians_table}
+\name{writeProcessRuntime2TSV}
+\alias{writeProcessRuntime2TSV}
 \title{Write a table of 2 columns: 1) process and 2) median seconds}
 \usage{
-write_proc_medians_table(dir_job_results, filepath)
+writeProcessRuntime2TSV(dir_job_results, filepath)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results}
@@ -14,7 +14,7 @@ write_proc_medians_table(dir_job_results, filepath)
 \value{
 \link{tbl_df} 2 columns: 1) process and 2) median seconds
 
-example: write_proc_medians_table(
+example: writeProcessRuntime2TSV(
 "/data/scratch/janani/molevolvr_out/",
 "/data/scratch/janani/molevolvr_out/log_tbl.tsv"
 )
diff --git a/man/write_proc_medians_yml.Rd b/man/writeProcessRuntimeToYML.Rd
similarity index 61%
rename from man/write_proc_medians_yml.Rd
rename to man/writeProcessRuntimeToYML.Rd
index a3d8ee5f..e4a5c8ad 100644
--- a/man/write_proc_medians_yml.Rd
+++ b/man/writeProcessRuntimeToYML.Rd
@@ -1,25 +1,26 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{write_proc_medians_yml}
-\alias{write_proc_medians_yml}
+\name{writeProcessRuntimeToYML}
+\alias{writeProcessRuntimeToYML}
 \title{Compute median process runtimes, then write a YAML list of the processes and
 their median runtimes in seconds to the path specified by 'filepath'.}
 \usage{
-write_proc_medians_yml(dir_job_results, filepath = NULL)
+writeProcessRuntimeToYML(dir_job_results, filepath = NULL)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results directory}
 
-\item{filepath}{\link{chr} path to save YAML file; if NULL, uses ./molevol_scripts/log_data/job_proc_weights.yml}
+\item{filepath}{\link{chr} path to save YAML file; if NULL,
+uses ./molevol_scripts/log_data/job_proc_weights.yml}
 }
 \description{
 The default value of filepath is the value of the env var
-MOLEVOLVR_PROC_WEIGHTS, which get_proc_weights() also uses as its default
+MOLEVOLVR_PROC_WEIGHTS, which writeProcessRuntimeToYML() also uses as its default
 read location.
 }
 \examples{
 \dontrun{
-write_proc_medians_yml(
+writeProcessRuntimeToYML(
     "/data/scratch/janani/molevolvr_out/",
     "/data/scratch/janani/molevolvr_out/log_tbl.yml"
 )

From 091d32ebb31b6f295268b4e0a38ef0fab1066358 Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Tue, 8 Oct 2024 07:17:56 +0100
Subject: [PATCH 03/19] fixing merge issue in NAMESPACE

---
 NAMESPACE | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/NAMESPACE b/NAMESPACE
index 739c76d7..d2ef5463 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -29,6 +29,9 @@ export(cleanSpecies)
 export(combineFiles)
 export(combineFullAnalysis)
 export(combineIPR)
+export(condenseRepeatedDomains)
+export(convert2TitleCase)
+export(convertAlignment2FA)
 export(convert_aln2fa)
 export(convert_fa2tre)
 export(count_bycol)
@@ -63,13 +66,15 @@ export(lineage.domain_repeats.plot)
 export(lineage.neighbors.plot)
 export(lineage_sunburst)
 export(make_job_results_url)
+export(mapAcc2Name)
 export(mapAdvOption2Process)
 export(mapOption2Process)
-export(mapAcc2Name)
+export(map_acc2name)
 export(msa_pdf)
 export(pick_longer_duplicate)
 export(plotEstimatedWallTimes)
 export(prot2tax)
+export(prot2tax_old)
 export(removeAsterisks)
 export(removeEmptyRows)
 export(removeTails)

From fc63187c4985d8a9fad15582691b4ee4f9c273e6 Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Tue, 8 Oct 2024 08:18:42 +0100
Subject: [PATCH 04/19] Added updated function name to NAMESPACE and removed
 unused argument in readAAStringSet

---
 NAMESPACE |  3 +--
 R/msa.R   | 24 ++++++++++++------------
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index d2ef5463..cd135cc8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -20,9 +20,9 @@ export(assert_count_df)
 export(assignJobQueue)
 export(calculateEstimatedWallTimeFromOptions)
 export(calculateProcessRuntime)
-export(cleanGeneDescription)
 export(cleanClusters)
 export(cleanDomainArchitecture)
+export(cleanGeneDescription)
 export(cleanGenomicContext)
 export(cleanLineage)
 export(cleanSpecies)
@@ -71,7 +71,6 @@ export(mapAdvOption2Process)
 export(mapOption2Process)
 export(map_acc2name)
 export(msa_pdf)
-export(pick_longer_duplicate)
 export(plotEstimatedWallTimes)
 export(prot2tax)
 export(prot2tax_old)
diff --git a/R/msa.R b/R/msa.R
index e56cc32c..0b1b6e34 100644
--- a/R/msa.R
+++ b/R/msa.R
@@ -197,21 +197,21 @@ msa_pdf <- function(fasta_path, out_path = NULL,
 #'
 #' @examples
 generate_msa <- function(fa_file = "", outfile = "") {
-    prot_aa <- readAAStringSet(
-        path = fa_file,
-        format = "fasta"
-    )
-    prot_aa
+  prot_aa <- readAAStringSet(
+    fa_file,
+    format = "fasta"
+  )
+  prot_aa
 
-    ## Install kalign ?rMSA_INSTALL
-    ## Messed up! Reimplement from kalign.R
-    ## https://github.com/mhahsler/rMSA/blob/master/R/kalign.R
+  ## Install kalign ?rMSA_INSTALL
+  ## Messed up! Reimplement from kalign.R
+  ## https://github.com/mhahsler/rMSA/blob/master/R/kalign.R
 
-    # source("scripts/c2r.R")
+  # source("scripts/c2r.R")
 
-    ## align the sequences
-    al <- kalign(prot_aa) # !! won't work!
-    al
+  ## align the sequences
+  al <- kalign(prot_aa) # !! won't work!
+  al
 }
 
 ############################

From 38f3cb000ddf35028c1e7c940920dd051db1a2dc Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Wed, 9 Oct 2024 11:32:03 +0100
Subject: [PATCH 05/19] added error handling functionality for the
 run_deltablast and run_rpsblast functions. This includes arguments check
 before wrapping code logic  in a tryCatch block.

---
 R/blastWrappers.R | 109 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 84 insertions(+), 25 deletions(-)

diff --git a/R/blastWrappers.R b/R/blastWrappers.R
index 552b1ff6..15484a1b 100755
--- a/R/blastWrappers.R
+++ b/R/blastWrappers.R
@@ -18,25 +18,56 @@
 #'
 #' @examples
 run_deltablast <- function(deltablast_path, db_search_path,
-    db = "refseq", query, evalue = "1e-5",
-    out, num_alignments, num_threads = 1) {
-    start <- Sys.time()
+                           db = "refseq", query, evalue = "1e-5",
+                           out, num_alignments, num_threads = 1) {
 
+  # Argument validation
+  if (!file.exists(deltablast_path)) {
+    stop("The DELTABLAST executable path is invalid: ", deltablast_path)
+  }
+  if (!dir.exists(db_search_path)) {
+    stop("The database search path is invalid: ", db_search_path)
+  }
+  if (!file.exists(query)) {
+    stop("The query file path is invalid: ", query)
+  }
+  if (!is.numeric(as.numeric(evalue)) || as.numeric(evalue) <= 0) {
+    stop("The evalue must be a positive number: ", evalue)
+  }
+  if (!is.numeric(num_alignments) || num_alignments <= 0) {
+    stop("The number of alignments must be a 
+         positive integer: ", num_alignments)
+  }
+  if (!is.numeric(num_threads) || num_threads <= 0) {
+    stop("The number of threads must be a positive integer: ", num_threads)
+  }
+
+  start <- Sys.time()
+
+  tryCatch({
     system(paste0("export BLASTDB=/", db_search_path))
 
     system2(
-        command = deltablast_path,
-        args = c(
-            "-db", db,
-            "-query", query,
-            "-evalue", evalue,
-            "-out", out,
-            "-num_threads", num_threads,
-            "-num_alignments", num_alignments
-            #   ,"-outfmt", outfmt
-        )
+      command = deltablast_path,
+      args = c(
+        "-db", db,
+        "-query", query,
+        "-evalue", evalue,
+        "-out", out,
+        "-num_threads", num_threads,
+        "-num_alignments", num_alignments
+        #   ,"-outfmt", outfmt
+      )
     )
     print(Sys.time() - start)
+  }, error = function(e) {
+    message(paste("Error in run_deltablast: ", e))
+  }, warning = function(w) {
+    message(paste("Warning in run_deltablast: ", w))
+  }, finally = {
+    message("run_deltablast completed")
+  })
+
 }
 
 
@@ -55,20 +86,48 @@ run_deltablast <- function(deltablast_path, db_search_path,
 #'
 #' @examples
 run_rpsblast <- function(rpsblast_path, db_search_path,
-    db = "refseq", query, evalue = "1e-5",
-    out, num_threads = 1) {
-    start <- Sys.time()
+                         db = "refseq", query, evalue = "1e-5",
+                         out, num_threads = 1) {
+  # Argument validation
+  if (!file.exists(rpsblast_path)) {
+    stop("The RPSBLAST executable path is invalid: ", rpsblast_path)
+  }
+  if (!dir.exists(db_search_path)) {
+    stop("The database search path is invalid: ", db_search_path)
+  }
+  if (!file.exists(query)) {
+    stop("The query file path is invalid: ", query)
+  }
+  if (!is.numeric(as.numeric(evalue)) || as.numeric(evalue) <= 0) {
+    stop("The evalue must be a positive number: ", evalue)
+  }
+  if (!is.numeric(num_threads) || num_threads <= 0) {
+    stop("The number of threads must be a positive integer: ", num_threads)
+  }
+
+  start <- Sys.time()
+
+  tryCatch({
+
     system(paste0("export BLASTDB=/", db_search_path))
+
     system2(
-        command = rpsblast_path,
-        args = c(
-            "-db", db,
-            "-query", query,
-            "-evalue", evalue,
-            "-out", out,
-            "-num_threads", num_threads
-            #                  , "-outfmt", outfmt
-        )
+      command = rpsblast_path,
+      args = c(
+        "-db", db,
+        "-query", query,
+        "-evalue", evalue,
+        "-out", out,
+        "-num_threads", num_threads
+      )
     )
     print(Sys.time() - start)
+  }, error = function(e) {
+    message(paste("Error in run_rpsblast: ", e))
+  }, warning = function(w) {
+    message(paste("Warning in run_rpsblast: ", w))
+  }, finally = {
+    message("run_rpsblast completed")
+  })
+
 }

From 4ff68fb06395842093879dea47e45aaae1967225 Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Thu, 10 Oct 2024 08:27:02 +0100
Subject: [PATCH 06/19] Reverting to old function names for the following
 functions to create a separate pr for their updates and on a different
 branch: R/combine_analysis.R combine_full combine_ipr

R/combine_files.R
combine_files

R/create_lineage_lookup.R
create_lineage_lookup
shorten_NA
---
 R/combine_analysis.R      |  4 ++--
 R/combine_files.R         | 10 +++++-----
 R/create_lineage_lookup.R |  8 ++++----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/R/combine_analysis.R b/R/combine_analysis.R
index 58ce1f14..bb3b3ce2 100755
--- a/R/combine_analysis.R
+++ b/R/combine_analysis.R
@@ -17,7 +17,7 @@
 #' @export
 #'
 #' @examples
-combineFullAnalysis <- function(inpath, ret = FALSE) {
+combine_full <- function(inpath, ret = FALSE) {
     ## Combining full_analysis files
     full_combnd <- combine_files(inpath,
         pattern = "*.full_analysis.tsv", skip = 0,
@@ -44,7 +44,7 @@ combineFullAnalysis <- function(inpath, ret = FALSE) {
 #' @export
 #'
 #' @examples
-combineIPR <- function(inpath, ret = FALSE) {
+combine_ipr <- function(inpath, ret = FALSE) {
     ## Combining clean ipr files
     ipr_combnd <- combine_files(inpath,
         pattern = "*.iprscan_cln.tsv", skip = 0,
diff --git a/R/combine_files.R b/R/combine_files.R
index 455ddd53..76c5fa09 100755
--- a/R/combine_files.R
+++ b/R/combine_files.R
@@ -38,7 +38,7 @@
 #' @export
 #'
 #' @examples
-combineFiles <- function(inpath = c("../molevol_data/project_data/phage_defense/"),
+combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense/"),
     pattern = "*full_analysis.tsv",
     delim = "\t", skip = 0,
     col_names = T) {
@@ -67,7 +67,7 @@ combineFiles <- function(inpath = c("../molevol_data/project_data/phage_defense/
 ## Sample Runs ##
 #################
 # ## Combining full_analysis files
-# full_combnd <- combineFiles(inpath,
+# full_combnd <- combine_files(inpath,
 #                             pattern="*full_analysis.txt", skip=0,
 #                             col_names=T)
 #
@@ -75,7 +75,7 @@ combineFiles <- function(inpath = c("../molevol_data/project_data/phage_defense/
 #           path="../molevol_data/project_data/slps/full_combined.tsv")
 #
 # ## Combining clean files
-# cln_combnd <- combineFiles(inpath,
+# cln_combnd <- combine_files(inpath,
 #                             pattern="^.*cln.txt", skip=0,
 #                             col_names=T)
 #
@@ -86,14 +86,14 @@ combineFiles <- function(inpath = c("../molevol_data/project_data/phage_defense/
 # ## Less helpful examples!
 # ## Combining BLAST files
 # ## Likely makes no sense since clustering is done per query
-# cl_blast_combnd <- combineFiles(inpath,
+# cl_blast_combnd <- combine_files(inpath,
 #                                  pattern="^.*refseq.1e-5.txt", skip=0,
 #                                  col_names=cl_blast_colnames) %>%
 #   select(-PcPositive, -ClusterID)
 #
 # ## Combining IPR files
 # ## Likely makes no sense since there may be repeated AccNum from indiv. files!
-# ipr_combnd <- combineFiles(inpath,
+# ipr_combnd <- combine_files(inpath,
 #                             pattern="*iprscan.lins*",  skip=0,
 #                             col_names=ipr_colnames)
 #
diff --git a/R/create_lineage_lookup.R b/R/create_lineage_lookup.R
index d911934a..8e365cbb 100644
--- a/R/create_lineage_lookup.R
+++ b/R/create_lineage_lookup.R
@@ -26,9 +26,9 @@
 #' @export
 #'
 #' @examples
-createLineageLookup <- function(lineage_file = here("data/rankedlineage.dmp"),
+create_lineage_lookup <- function(lineage_file = here("data/rankedlineage.dmp"),
     outfile, taxonomic_rank = "phylum") {
-    .shortenNA <- function(Lineage) {
+    shorten_NA <- function(Lineage) {
         first_NA <- str_locate(Lineage, "NA")[1]
         if (is.na(first_NA)) {
             # No NAs
@@ -92,7 +92,7 @@ createLineageLookup <- function(lineage_file = here("data/rankedlineage.dmp"),
     # Takes a while (2million rows after all)
     rankedLinsCombined <- rankedLins %>%
         unite(col = "Lineage", all_of(combined_taxonomy), sep = ">") %>%
-        mutate(Lineage = unlist(map(Lineage, .shortenNA)))
+        mutate(Lineage = unlist(map(Lineage, shorten_NA)))
 
 
 
@@ -101,7 +101,7 @@ createLineageLookup <- function(lineage_file = here("data/rankedlineage.dmp"),
 
 
 
-#' CreateLineageLookup <- function(assembly_path, updateAssembly = FALSE, file_type = "tsv")
+#' create_lineage_lookup <- function(assembly_path, updateAssembly = FALSE, file_type = "tsv")
 #' {
 #'   #' Create a look up table that goes from GCA_ID, to TaxID, to Lineage
 #'   #' @author Samuel Chen

From 035c5e13b4cfe54b4ba7ff1d5c7618ade13720d1 Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Thu, 10 Oct 2024 08:41:47 +0100
Subject: [PATCH 07/19] minor updates to namespace and Rd files after running
 devtool::check()

---
 NAMESPACE                                                | 8 ++++----
 man/{combineFiles.Rd => combine_files.Rd}                | 6 +++---
 man/{combineFullAnalysis.Rd => combine_full.Rd}          | 6 +++---
 man/{combineIPR.Rd => combine_ipr.Rd}                    | 6 +++---
 man/{createLineageLookup.Rd => create_lineage_lookup.Rd} | 6 +++---
 5 files changed, 16 insertions(+), 16 deletions(-)
 rename man/{combineFiles.Rd => combine_files.Rd} (92%)
 rename man/{combineFullAnalysis.Rd => combine_full.Rd} (69%)
 rename man/{combineIPR.Rd => combine_ipr.Rd} (74%)
 rename man/{createLineageLookup.Rd => create_lineage_lookup.Rd} (91%)

diff --git a/NAMESPACE b/NAMESPACE
index cd135cc8..f49975b4 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -26,9 +26,9 @@ export(cleanGeneDescription)
 export(cleanGenomicContext)
 export(cleanLineage)
 export(cleanSpecies)
-export(combineFiles)
-export(combineFullAnalysis)
-export(combineIPR)
+export(combine_files)
+export(combine_full)
+export(combine_ipr)
 export(condenseRepeatedDomains)
 export(convert2TitleCase)
 export(convertAlignment2FA)
@@ -37,8 +37,8 @@ export(convert_fa2tre)
 export(count_bycol)
 export(count_to_sunburst)
 export(count_to_treemap)
-export(createLineageLookup)
 export(create_all_col_params)
+export(create_lineage_lookup)
 export(create_one_col_params)
 export(domain_network)
 export(efetch_ipg)
diff --git a/man/combineFiles.Rd b/man/combine_files.Rd
similarity index 92%
rename from man/combineFiles.Rd
rename to man/combine_files.Rd
index 3b56b923..4126eb9e 100644
--- a/man/combineFiles.Rd
+++ b/man/combine_files.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/combine_files.R
-\name{combineFiles}
-\alias{combineFiles}
+\name{combine_files}
+\alias{combine_files}
 \title{Download the combined assembly summaries of genbank and refseq}
 \usage{
-combineFiles(
+combine_files(
   inpath = c("../molevol_data/project_data/phage_defense/"),
   pattern = "*full_analysis.tsv",
   delim = "\\t",
diff --git a/man/combineFullAnalysis.Rd b/man/combine_full.Rd
similarity index 69%
rename from man/combineFullAnalysis.Rd
rename to man/combine_full.Rd
index 35925e86..f4e6597b 100644
--- a/man/combineFullAnalysis.Rd
+++ b/man/combine_full.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/combine_analysis.R
-\name{combineFullAnalysis}
-\alias{combineFullAnalysis}
+\name{combine_full}
+\alias{combine_full}
 \title{Combining full_analysis files}
 \usage{
-combineFullAnalysis(inpath, ret = FALSE)
+combine_full(inpath, ret = FALSE)
 }
 \arguments{
 \item{ret}{}
diff --git a/man/combineIPR.Rd b/man/combine_ipr.Rd
similarity index 74%
rename from man/combineIPR.Rd
rename to man/combine_ipr.Rd
index 035c4274..52aa3057 100644
--- a/man/combineIPR.Rd
+++ b/man/combine_ipr.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/combine_analysis.R
-\name{combineIPR}
-\alias{combineIPR}
+\name{combine_ipr}
+\alias{combine_ipr}
 \title{Combining clean ipr files}
 \usage{
-combineIPR(inpath, ret = FALSE)
+combine_ipr(inpath, ret = FALSE)
 }
 \arguments{
 \item{ret}{}
diff --git a/man/createLineageLookup.Rd b/man/create_lineage_lookup.Rd
similarity index 91%
rename from man/createLineageLookup.Rd
rename to man/create_lineage_lookup.Rd
index 5dbab978..51670f35 100644
--- a/man/createLineageLookup.Rd
+++ b/man/create_lineage_lookup.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/create_lineage_lookup.R
-\name{createLineageLookup}
-\alias{createLineageLookup}
+\name{create_lineage_lookup}
+\alias{create_lineage_lookup}
 \title{Create a look up table that goes from TaxID, to Lineage}
 \usage{
-createLineageLookup(
+create_lineage_lookup(
   lineage_file = here("data/rankedlineage.dmp"),
   outfile,
   taxonomic_rank = "phylum"

From fb5ac23f8a3e8e5709498aa24308a950802d1c29 Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Thu, 10 Oct 2024 09:20:22 +0100
Subject: [PATCH 08/19] Renamed the following function; R/combine_analysis.R
 combine_full combine_ipr

R/combine_files.R
combine_files

R/create_lineage_lookup.R
create_lineage_lookup
shorten_NA with approved names from #44
---
 NAMESPACE                                              |  8 ++++----
 R/acc2lin.R                                            |  2 +-
 R/combine_analysis.R                                   |  8 ++++----
 R/combine_files.R                                      | 10 +++++-----
 R/create_lineage_lookup.R                              |  8 ++++----
 R/lineage.R                                            |  4 ++--
 man/GCA2lin.Rd                                         |  2 +-
 man/{combine_files.Rd => combineFiles.Rd}              |  6 +++---
 man/{combine_full.Rd => combineFullAnalysis.Rd}        |  6 +++---
 man/{combine_ipr.Rd => combineIPR.Rd}                  |  6 +++---
 ...create_lineage_lookup.Rd => createLineageLookup.Rd} |  6 +++---
 man/ipg2lin.Rd                                         |  2 +-
 12 files changed, 34 insertions(+), 34 deletions(-)
 rename man/{combine_files.Rd => combineFiles.Rd} (92%)
 rename man/{combine_full.Rd => combineFullAnalysis.Rd} (69%)
 rename man/{combine_ipr.Rd => combineIPR.Rd} (74%)
 rename man/{create_lineage_lookup.Rd => createLineageLookup.Rd} (91%)

diff --git a/NAMESPACE b/NAMESPACE
index f49975b4..cd135cc8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -26,9 +26,9 @@ export(cleanGeneDescription)
 export(cleanGenomicContext)
 export(cleanLineage)
 export(cleanSpecies)
-export(combine_files)
-export(combine_full)
-export(combine_ipr)
+export(combineFiles)
+export(combineFullAnalysis)
+export(combineIPR)
 export(condenseRepeatedDomains)
 export(convert2TitleCase)
 export(convertAlignment2FA)
@@ -37,8 +37,8 @@ export(convert_fa2tre)
 export(count_bycol)
 export(count_to_sunburst)
 export(count_to_treemap)
+export(createLineageLookup)
 export(create_all_col_params)
-export(create_lineage_lookup)
 export(create_one_col_params)
 export(domain_network)
 export(efetch_ipg)
diff --git a/R/acc2lin.R b/R/acc2lin.R
index dfb33da9..a6551247 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -277,7 +277,7 @@ efetch_ipg <- function(accnums, out_path, plan = "sequential") {
 #' This file can be generated using the "DownloadAssemblySummary()" function
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
-#' "create_lineage_lookup()" function
+#' "createLineageLookup()" function
 #'
 #' @importFrom data.table fread
 #'
diff --git a/R/combine_analysis.R b/R/combine_analysis.R
index bb3b3ce2..55e36925 100755
--- a/R/combine_analysis.R
+++ b/R/combine_analysis.R
@@ -17,9 +17,9 @@
 #' @export
 #'
 #' @examples
-combine_full <- function(inpath, ret = FALSE) {
+combineFullAnalysis <- function(inpath, ret = FALSE) {
     ## Combining full_analysis files
-    full_combnd <- combine_files(inpath,
+    full_combnd <- combineFiles(inpath,
         pattern = "*.full_analysis.tsv", skip = 0,
         col_names = T
     )
@@ -44,9 +44,9 @@ combine_full <- function(inpath, ret = FALSE) {
 #' @export
 #'
 #' @examples
-combine_ipr <- function(inpath, ret = FALSE) {
+combineIPR <- function(inpath, ret = FALSE) {
     ## Combining clean ipr files
-    ipr_combnd <- combine_files(inpath,
+    ipr_combnd <- combineFiles(inpath,
         pattern = "*.iprscan_cln.tsv", skip = 0,
         col_names = T
     )
diff --git a/R/combine_files.R b/R/combine_files.R
index 76c5fa09..455ddd53 100755
--- a/R/combine_files.R
+++ b/R/combine_files.R
@@ -38,7 +38,7 @@
 #' @export
 #'
 #' @examples
-combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense/"),
+combineFiles <- function(inpath = c("../molevol_data/project_data/phage_defense/"),
     pattern = "*full_analysis.tsv",
     delim = "\t", skip = 0,
     col_names = T) {
@@ -67,7 +67,7 @@ combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense
 ## Sample Runs ##
 #################
 # ## Combining full_analysis files
-# full_combnd <- combine_files(inpath,
+# full_combnd <- combineFiles(inpath,
 #                             pattern="*full_analysis.txt", skip=0,
 #                             col_names=T)
 #
@@ -75,7 +75,7 @@ combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense
 #           path="../molevol_data/project_data/slps/full_combined.tsv")
 #
 # ## Combining clean files
-# cln_combnd <- combine_files(inpath,
+# cln_combnd <- combineFiles(inpath,
 #                             pattern="^.*cln.txt", skip=0,
 #                             col_names=T)
 #
@@ -86,14 +86,14 @@ combine_files <- function(inpath = c("../molevol_data/project_data/phage_defense
 # ## Less helpful examples!
 # ## Combining BLAST files
 # ## Likely makes no sense since clustering is done per query
-# cl_blast_combnd <- combine_files(inpath,
+# cl_blast_combnd <- combineFiles(inpath,
 #                                  pattern="^.*refseq.1e-5.txt", skip=0,
 #                                  col_names=cl_blast_colnames) %>%
 #   select(-PcPositive, -ClusterID)
 #
 # ## Combining IPR files
 # ## Likely makes no sense since there may be repeated AccNum from indiv. files!
-# ipr_combnd <- combine_files(inpath,
+# ipr_combnd <- combineFiles(inpath,
 #                             pattern="*iprscan.lins*",  skip=0,
 #                             col_names=ipr_colnames)
 #
diff --git a/R/create_lineage_lookup.R b/R/create_lineage_lookup.R
index 8e365cbb..78e79048 100644
--- a/R/create_lineage_lookup.R
+++ b/R/create_lineage_lookup.R
@@ -26,9 +26,9 @@
 #' @export
 #'
 #' @examples
-create_lineage_lookup <- function(lineage_file = here("data/rankedlineage.dmp"),
+createLineageLookup <- function(lineage_file = here("data/rankedlineage.dmp"),
     outfile, taxonomic_rank = "phylum") {
-    shorten_NA <- function(Lineage) {
+    .shortenNA <- function(Lineage) {
         first_NA <- str_locate(Lineage, "NA")[1]
         if (is.na(first_NA)) {
             # No NAs
@@ -92,7 +92,7 @@ create_lineage_lookup <- function(lineage_file = here("data/rankedlineage.dmp"),
     # Takes a while (2million rows after all)
     rankedLinsCombined <- rankedLins %>%
         unite(col = "Lineage", all_of(combined_taxonomy), sep = ">") %>%
-        mutate(Lineage = unlist(map(Lineage, shorten_NA)))
+        mutate(Lineage = unlist(map(Lineage, .shortenNA)))
 
 
 
@@ -101,7 +101,7 @@ create_lineage_lookup <- function(lineage_file = here("data/rankedlineage.dmp"),
 
 
 
-#' create_lineage_lookup <- function(assembly_path, updateAssembly = FALSE, file_type = "tsv")
+#' createLineageLookup <- function(assembly_path, updateAssembly = FALSE, file_type = "tsv")
 #' {
 #'   #' Create a look up table that goes from GCA_ID, to TaxID, to Lineage
 #'   #' @author Samuel Chen
diff --git a/R/lineage.R b/R/lineage.R
index 20acec04..7ceed847 100644
--- a/R/lineage.R
+++ b/R/lineage.R
@@ -77,7 +77,7 @@ DownloadAssemblySummary <- function(outpath,
 #' This file can be generated using the "DownloadAssemblySummary()" function
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
-#' "create_lineage_lookup()" function
+#' "createLineageLookup()" function
 #' @param acc_col
 #'
 #' @importFrom dplyr pull
@@ -309,7 +309,7 @@ efetch_ipg <- function(accessions, out_path, plan = "multicore") {
 #' @param genbank_assembly_path
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
-#' "create_lineage_lookup()" function
+#' "createLineageLookup()" function
 #'
 #' @importFrom data.table fread setnames
 #'
diff --git a/man/GCA2lin.Rd b/man/GCA2lin.Rd
index ad83ca39..47acc3d7 100644
--- a/man/GCA2lin.Rd
+++ b/man/GCA2lin.Rd
@@ -19,7 +19,7 @@ This file can be generated using the "DownloadAssemblySummary()" function}
 
 \item{lineagelookup_path}{String of the path to the lineage lookup file
 (taxid to lineage mapping). This file can be generated using the
-"create_lineage_lookup()" function}
+"createLineageLookup()" function}
 
 \item{acc_col}{}
 }
diff --git a/man/combine_files.Rd b/man/combineFiles.Rd
similarity index 92%
rename from man/combine_files.Rd
rename to man/combineFiles.Rd
index 4126eb9e..3b56b923 100644
--- a/man/combine_files.Rd
+++ b/man/combineFiles.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/combine_files.R
-\name{combine_files}
-\alias{combine_files}
+\name{combineFiles}
+\alias{combineFiles}
 \title{Download the combined assembly summaries of genbank and refseq}
 \usage{
-combine_files(
+combineFiles(
   inpath = c("../molevol_data/project_data/phage_defense/"),
   pattern = "*full_analysis.tsv",
   delim = "\\t",
diff --git a/man/combine_full.Rd b/man/combineFullAnalysis.Rd
similarity index 69%
rename from man/combine_full.Rd
rename to man/combineFullAnalysis.Rd
index f4e6597b..35925e86 100644
--- a/man/combine_full.Rd
+++ b/man/combineFullAnalysis.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/combine_analysis.R
-\name{combine_full}
-\alias{combine_full}
+\name{combineFullAnalysis}
+\alias{combineFullAnalysis}
 \title{Combining full_analysis files}
 \usage{
-combine_full(inpath, ret = FALSE)
+combineFullAnalysis(inpath, ret = FALSE)
 }
 \arguments{
 \item{ret}{}
diff --git a/man/combine_ipr.Rd b/man/combineIPR.Rd
similarity index 74%
rename from man/combine_ipr.Rd
rename to man/combineIPR.Rd
index 52aa3057..035c4274 100644
--- a/man/combine_ipr.Rd
+++ b/man/combineIPR.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/combine_analysis.R
-\name{combine_ipr}
-\alias{combine_ipr}
+\name{combineIPR}
+\alias{combineIPR}
 \title{Combining clean ipr files}
 \usage{
-combine_ipr(inpath, ret = FALSE)
+combineIPR(inpath, ret = FALSE)
 }
 \arguments{
 \item{ret}{}
diff --git a/man/create_lineage_lookup.Rd b/man/createLineageLookup.Rd
similarity index 91%
rename from man/create_lineage_lookup.Rd
rename to man/createLineageLookup.Rd
index 51670f35..5dbab978 100644
--- a/man/create_lineage_lookup.Rd
+++ b/man/createLineageLookup.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/create_lineage_lookup.R
-\name{create_lineage_lookup}
-\alias{create_lineage_lookup}
+\name{createLineageLookup}
+\alias{createLineageLookup}
 \title{Create a look up table that goes from TaxID, to Lineage}
 \usage{
-create_lineage_lookup(
+createLineageLookup(
   lineage_file = here("data/rankedlineage.dmp"),
   outfile,
   taxonomic_rank = "phylum"
diff --git a/man/ipg2lin.Rd b/man/ipg2lin.Rd
index 453668b0..5850e86c 100644
--- a/man/ipg2lin.Rd
+++ b/man/ipg2lin.Rd
@@ -29,7 +29,7 @@ file}
 
 \item{lineagelookup_path}{String of the path to the lineage lookup file
 (taxid to lineage mapping). This file can be generated using the
-"create_lineage_lookup()" function}
+"createLineageLookup()" function}
 
 \item{assembly_path}{String of the path to the assembly_summary path
 This file can be generated using the "DownloadAssemblySummary()" function}

From 106eb14b4e2eace66737a07cf5840011e490d116 Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Thu, 10 Oct 2024 10:24:49 +0100
Subject: [PATCH 09/19] reverting to old function names; make_opts2procs,
 map_advanced_opts2procs, get_proc_medians, write_proc_medians_table,
 write_proc_medians_yml, get_proc_weights, advanced_opts2est_walltime in
 R/assign_job_queue.R to be updated in a separate full request

---
 NAMESPACE                                     | 18 ++--
 R/assign_job_queue.R                          | 84 +++++++++----------
 ...tions.Rd => advanced_opts2est_walltime.Rd} | 10 +--
 ...{assignJobQueue.Rd => assign_job_queue.Rd} | 12 +--
 ...eProcessRuntime.Rd => get_proc_medians.Rd} | 10 +--
 ...sRuntimeWeights.Rd => get_proc_weights.Rd} |  8 +-
 ...apOption2Process.Rd => make_opts2procs.Rd} |  8 +-
 ...2Process.Rd => map_advanced_opts2procs.Rd} |  8 +-
 ...llTimes.Rd => plot_estimated_walltimes.Rd} |  8 +-
 ...ime2TSV.Rd => write_proc_medians_table.Rd} |  8 +-
 ...timeToYML.Rd => write_proc_medians_yml.Rd} | 10 +--
 11 files changed, 92 insertions(+), 92 deletions(-)
 rename man/{calculateEstimatedWallTimeFromOptions.Rd => advanced_opts2est_walltime.Rd} (73%)
 rename man/{assignJobQueue.Rd => assign_job_queue.Rd} (68%)
 rename man/{calculateProcessRuntime.Rd => get_proc_medians.Rd} (76%)
 rename man/{getProcessRuntimeWeights.Rd => get_proc_weights.Rd} (73%)
 rename man/{mapOption2Process.Rd => make_opts2procs.Rd} (75%)
 rename man/{mapAdvOption2Process.Rd => map_advanced_opts2procs.Rd} (76%)
 rename man/{plotEstimatedWallTimes.Rd => plot_estimated_walltimes.Rd} (77%)
 rename man/{writeProcessRuntime2TSV.Rd => write_proc_medians_table.Rd} (77%)
 rename man/{writeProcessRuntimeToYML.Rd => write_proc_medians_yml.Rd} (74%)

diff --git a/NAMESPACE b/NAMESPACE
index f49975b4..b4be51ec 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -15,11 +15,10 @@ export(add_leaves)
 export(add_lins)
 export(add_name)
 export(add_tax)
+export(advanced_opts2est_walltime)
 export(alignFasta)
 export(assert_count_df)
-export(assignJobQueue)
-export(calculateEstimatedWallTimeFromOptions)
-export(calculateProcessRuntime)
+export(assign_job_queue)
 export(cleanClusters)
 export(cleanDomainArchitecture)
 export(cleanGeneDescription)
@@ -54,9 +53,10 @@ export(generate_all_aln2fa)
 export(generate_fa2tre)
 export(generate_msa)
 export(generate_trees)
-export(getProcessRuntimeWeights)
 export(get_accnums_from_fasta_file)
 export(get_job_message)
+export(get_proc_medians)
+export(get_proc_weights)
 export(ipg2lin)
 export(ipr2viz)
 export(ipr2viz_web)
@@ -66,12 +66,12 @@ export(lineage.domain_repeats.plot)
 export(lineage.neighbors.plot)
 export(lineage_sunburst)
 export(make_job_results_url)
+export(make_opts2procs)
 export(mapAcc2Name)
-export(mapAdvOption2Process)
-export(mapOption2Process)
 export(map_acc2name)
+export(map_advanced_opts2procs)
 export(msa_pdf)
-export(plotEstimatedWallTimes)
+export(plot_estimated_walltimes)
 export(prot2tax)
 export(prot2tax_old)
 export(removeAsterisks)
@@ -103,8 +103,8 @@ export(wordcloud2_element)
 export(wordcloud3)
 export(wordcloud_element)
 export(write.MsaAAMultipleAlignment)
-export(writeProcessRuntime2TSV)
-export(writeProcessRuntimeToYML)
+export(write_proc_medians_table)
+export(write_proc_medians_yml)
 importFrom(Biostrings,AAStringSet)
 importFrom(Biostrings,readAAStringSet)
 importFrom(Biostrings,toString)
diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index f1fcb6db..c531fb09 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -3,16 +3,16 @@
 # pipeline.
 # to use this, construct paths like so: file.path(common_root, "path", "to", "file.R")
 # for example, the reference for this file would be:
-# file.path(common_root, "molevol_scripts", "R", "assignJobQueue.R")
+# file.path(common_root, "molevol_scripts", "R", "assign_job_queue.R")
 common_root <- Sys.getenv("COMMON_SRC_ROOT")
 
 #' Construct list where names (MolEvolvR advanced options) point to processes
 #'
 #' @return list where names (MolEvolvR advanced options) point to processes
 #'
-#' example: list_opts2procs <- mapOption2Process
+#' example: list_opts2procs <- make_opts2procs
 #' @export
-mapOption2Process <- function() {
+make_opts2procs <- function() {
   tryCatch({
     opts2processes <- list(
       "homology_search" = c("dblast", "dblast_cleanup"),
@@ -26,7 +26,7 @@ mapOption2Process <- function() {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("mapOption2Process function execution completed.")
+    message("make_opts2procs function execution completed.")
   })
 
 }
@@ -40,16 +40,16 @@ mapOption2Process <- function() {
 #'
 #' example:
 #' advanced_opts <- c("homology_search", "domain_architecture")
-#' procs <- mapAdvOption2Process(advanced_opts)
+#' procs <- map_advanced_opts2procs(advanced_opts)
 #' @export
-mapAdvOption2Process <- function(advanced_opts) {
+map_advanced_opts2procs <- function(advanced_opts) {
   if (!is.character(advanced_opts)) {
     stop("Argument must be a character vector!")
   }
   tryCatch({
     # append 'always' to add procs that always run
     advanced_opts <- c(advanced_opts, "always")
-    opts2proc <- mapOption2Process()
+    opts2proc <- make_opts2procs()
     # setup index for opts2proc based on advanced options
     idx <- which(names(opts2proc) %in% advanced_opts)
     # extract processes that will run
@@ -60,7 +60,7 @@ mapAdvOption2Process <- function(advanced_opts) {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("mapOption2Process function execution completed.")
+    message("make_opts2procs function execution completed.")
   })
 
 }
@@ -80,14 +80,14 @@ mapAdvOption2Process <- function(advanced_opts) {
 #'
 #' 1)
 #' dir_job_results <- "/data/scratch/janani/molevolvr_out"
-#' list_proc_medians <- calculateProcessRuntime(dir_job_results)
+#' list_proc_medians <- get_proc_medians(dir_job_results)
 #'
 #' 2) from outside container environment
 #' common_root <- "/data/molevolvr_transfer/molevolvr_dev"
 #' dir_job_results <- "/data/molevolvr_transfer/molevolvr_dev/job_results"
-#' list_proc_medians <- calculateProcessRuntime(dir_job_results)
+#' list_proc_medians <- get_proc_medians(dir_job_results)
 #' @export
-calculateProcessRuntime <- function(dir_job_results) {
+get_proc_medians <- function(dir_job_results) {
   tryCatch({
     # Check if dir_job_results is a character string
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
@@ -139,7 +139,7 @@ calculateProcessRuntime <- function(dir_job_results) {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("calculateProcessRuntime function execution completed.")
+    message("get_proc_medians function execution completed.")
   })
 
 }
@@ -156,12 +156,12 @@ calculateProcessRuntime <- function(dir_job_results) {
 #'
 #' @return [tbl_df] 2 columns: 1) process and 2) median seconds
 #'
-#' example: writeProcessRuntime2TSV(
+#' example: write_proc_medians_table(
 #'   "/data/scratch/janani/molevolvr_out/",
 #'   "/data/scratch/janani/molevolvr_out/log_tbl.tsv"
 #' )
 #' @export
-writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
+write_proc_medians_table <- function(dir_job_results, filepath) {
   tryCatch({
     # Error handling for input arguments
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
@@ -175,7 +175,7 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
     if (!is.character(filepath) || length(filepath) != 1) {
       stop("Input 'filepath' must be a single character string.")
     }
-    df_proc_medians <- calculateProcessRuntime(dir_job_results) |>
+    df_proc_medians <- get_proc_medians(dir_job_results) |>
       tibble::as_tibble() |>
       tidyr::pivot_longer(
         dplyr::everything(),
@@ -192,7 +192,7 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("writeProcessRuntime2TSV function execution completed.")
+    message("write_proc_medians_table function execution completed.")
   })
 
 }
@@ -201,7 +201,7 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
 #' their median runtimes in seconds to the path specified by 'filepath'.
 #'
 #' The default value of filepath is the value of the env var
-#' MOLEVOLVR_PROC_WEIGHTS, which writeProcessRuntimeToYML() also uses as its default
+#' MOLEVOLVR_PROC_WEIGHTS, which write_proc_medians_yml() also uses as its default
 #' read location.
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results directory
@@ -212,13 +212,13 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
 #'
 #' @examples
 #' \dontrun{
-#' writeProcessRuntimeToYML(
+#' write_proc_medians_yml(
 #'     "/data/scratch/janani/molevolvr_out/",
 #'     "/data/scratch/janani/molevolvr_out/log_tbl.yml"
 #' )
 #' }
 #' @export
-writeProcessRuntimeToYML <- function(dir_job_results, filepath = NULL) {
+write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
   tryCatch({
     # Error handling for dir_job_results arguments
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
@@ -238,7 +238,7 @@ writeProcessRuntimeToYML <- function(dir_job_results, filepath = NULL) {
       stop("Input 'filepath' must be a single character string.")
     }
 
-    medians <- calculateProcessRuntime(dir_job_results)
+    medians <- get_proc_medians(dir_job_results)
     yaml::write_yaml(medians, filepath)
   }, error = function(e) {
     message(paste("Encountered an error: "), e$message)
@@ -261,9 +261,9 @@ writeProcessRuntimeToYML <- function(dir_job_results, filepath = NULL) {
 #'
 #' @return [list] names: processes; values: median runtime (seconds)
 #'
-#' example: writeProcessRuntimeToYML()
+#' example: write_proc_medians_yml()
 #' @export
-getProcessRuntimeWeights <- function(medians_yml_path = NULL) {
+get_proc_weights <- function(medians_yml_path = NULL) {
   if (is.null(medians_yml_path)) {
     medians_yml_path <- file.path(common_root,
                                   "molevol_scripts",
@@ -273,7 +273,7 @@ getProcessRuntimeWeights <- function(medians_yml_path = NULL) {
 
   proc_weights <- tryCatch({
     # attempt to read the weights from the YAML file produced by
-    # writeProcessRuntimeToYML()
+    # write_proc_medians_yml()
     if (stringr::str_trim(medians_yml_path) == "") {
       stop(
         stringr::str_glue("medians_yml_path is empty 
@@ -285,7 +285,7 @@ getProcessRuntimeWeights <- function(medians_yml_path = NULL) {
   },
   # to avoid fatal errors in reading the proc weights yaml,
   # some median process runtimes have been hardcoded based on
-  # the result of calculateProcessRuntime() from Jan 2024
+  # the result of get_proc_medians() from Jan 2024
   error = function(cond) {
     proc_weights <- list(
       "dblast" = 2810,
@@ -306,7 +306,7 @@ getProcessRuntimeWeights <- function(medians_yml_path = NULL) {
 #' calculate the total estimated walltime for the job
 #'
 #' @param advanced_opts character vector of MolEvolvR advanced options
-#' (see mapOption2Process for the options)
+#' (see make_opts2procs for the options)
 #' @param n_inputs total number of input proteins
 #'
 #' @importFrom dplyr if_else
@@ -314,11 +314,11 @@ getProcessRuntimeWeights <- function(medians_yml_path = NULL) {
 #'
 #' @return total estimated number of seconds a job will process (walltime)
 #'
-#' example: calculateEstimatedWallTimeFromOptions(c("homology_search",
+#' example: advanced_opts2est_walltime	(c("homology_search",
 #'                                       "domain_architecture"),
 #'                                       n_inputs = 3, n_hits = 50L)
 #' @export
-calculateEstimatedWallTimeFromOptions <- function(advanced_opts,
+advanced_opts2est_walltime	 <- function(advanced_opts,
                                                   n_inputs = 1L,
                                                   n_hits = NULL,
                                                   verbose = FALSE) {
@@ -348,7 +348,7 @@ calculateEstimatedWallTimeFromOptions <- function(advanced_opts,
     }
 
     # Get process weights
-    proc_weights <- writeProcessRuntimeToYML()
+    proc_weights <- write_proc_medians_yml()
     if (!is.list(proc_weights)) {
       stop("Process weights could not be retrieved correctly.")
     }
@@ -357,7 +357,7 @@ calculateEstimatedWallTimeFromOptions <- function(advanced_opts,
     proc_weights <- proc_weights[order(names(proc_weights))] |> unlist()
     all_procs <- names(proc_weights) |> sort()
     # get processes from advanced options and sort by names
-    procs_from_opts <- mapAdvOption2Process(advanced_opts)
+    procs_from_opts <- map_advanced_opts2procs(advanced_opts)
     procs_from_opts <- sort(procs_from_opts)
     # binary encode: yes proc will run (1); else 0
     binary_proc_vec <- dplyr::if_else(all_procs %in% procs_from_opts, 1L, 0L)
@@ -366,7 +366,7 @@ calculateEstimatedWallTimeFromOptions <- function(advanced_opts,
       as.numeric()
     # calculate the additional processes to run for the homologous hits
     if ("homology_search" %in% advanced_opts) {
-      opts2procs <- mapOption2Process()
+      opts2procs <- make_opts2procs()
       # exclude the homology search processes for the homologous hits
       procs2exclude_for_homologs <- opts2procs[["homology_search"]]
       procs_homologs <- procs_from_opts[!(procs_from_opts 
@@ -380,7 +380,7 @@ calculateEstimatedWallTimeFromOptions <- function(advanced_opts,
     }
     if (verbose) {
       msg <- stringr::str_glue(
-        "warnings from calculateEstimatedWallTimeFromOptions():\n",
+        "warnings from advanced_opts2est_walltime	():\n",
         "\tn_inputs={n_inputs}\n",
         "\tn_hits={ifelse(is.null(n_hits), 'null', n_hits)}\n",
         "\test_walltime={est_walltime}\n\n"
@@ -393,7 +393,7 @@ calculateEstimatedWallTimeFromOptions <- function(advanced_opts,
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("calculateEstimatedWallTimeFromOptions 
+    message("advanced_opts2est_walltime	 
             function execution completed.")
   })
 
@@ -403,18 +403,18 @@ calculateEstimatedWallTimeFromOptions <- function(advanced_opts,
 #' Decision function to assign job queue
 #'
 #' @param t_sec_estimate estimated number of seconds a job will process
-#' (from calculateEstimatedWallTimeFromOptions())
+#' (from advanced_opts2est_walltime	())
 #' @param t_long threshold value that defines the lower bound for assigning a
 #' job to the "long queue"
 #'
 #' @return a string of "short" or "long"
 #'
 #' example:
-#' calculateEstimatedWallTimeFromOptions(c("homology_search",
+#' advanced_opts2est_walltime	(c("homology_search",
 #'                                         "domain_architecture"), 3) |>
-#'   assignJobQueue()
+#'   assign_job_queue()
 #' @export
-assignJobQueue <- function(
+assign_job_queue <- function(
   t_sec_estimate,
   t_cutoff = 21600 # 6 hours
 ) {
@@ -434,7 +434,7 @@ assignJobQueue <- function(
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("assignJobQueue function execution completed.")
+    message("assign_job_queue function execution completed.")
   })
 
 }
@@ -451,13 +451,13 @@ assignJobQueue <- function(
 #' @return line plot object
 #'
 #' example:
-#' p <- plotEstimatedWallTimes()
+#' p <- plot_estimated_walltimes()
 #' ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_
 #'                 dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
 #' @export
-plotEstimatedWallTimes <- function() {
+plot_estimated_walltimes <- function() {
   tryCatch({
-    opts <- mapOption2Process() |> names()
+    opts <- make_opts2procs() |> names()
     # get all possible submission permutations (powerset)
     get_powerset <- function(vec) {
       # generate powerset (do not include empty set)
@@ -482,7 +482,7 @@ plotEstimatedWallTimes <- function() {
             } else {
                 NULL
               }
-            est_walltime <- calculateEstimatedWallTimeFromOptions(
+            est_walltime <- advanced_opts2est_walltime	(
               advanced_opts,
               n_inputs = i,
               n_hits = n_hits,
@@ -541,7 +541,7 @@ plotEstimatedWallTimes <- function() {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("plotEstimatedWallTimes function execution completed.")
+    message("plot_estimated_walltimes function execution completed.")
   })
 
 }
diff --git a/man/calculateEstimatedWallTimeFromOptions.Rd b/man/advanced_opts2est_walltime.Rd
similarity index 73%
rename from man/calculateEstimatedWallTimeFromOptions.Rd
rename to man/advanced_opts2est_walltime.Rd
index e4eec3fd..02ae9621 100644
--- a/man/calculateEstimatedWallTimeFromOptions.Rd
+++ b/man/advanced_opts2est_walltime.Rd
@@ -1,11 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{calculateEstimatedWallTimeFromOptions}
-\alias{calculateEstimatedWallTimeFromOptions}
+\name{advanced_opts2est_walltime}
+\alias{advanced_opts2est_walltime}
 \title{Given MolEvolvR advanced options and number of inputs,
 calculate the total estimated walltime for the job}
 \usage{
-calculateEstimatedWallTimeFromOptions(
+advanced_opts2est_walltime(
   advanced_opts,
   n_inputs = 1L,
   n_hits = NULL,
@@ -14,14 +14,14 @@ calculateEstimatedWallTimeFromOptions(
 }
 \arguments{
 \item{advanced_opts}{character vector of MolEvolvR advanced options
-(see mapOption2Process for the options)}
+(see make_opts2procs for the options)}
 
 \item{n_inputs}{total number of input proteins}
 }
 \value{
 total estimated number of seconds a job will process (walltime)
 
-example: calculateEstimatedWallTimeFromOptions(c("homology_search",
+example: advanced_opts2est_walltime	(c("homology_search",
 "domain_architecture"),
 n_inputs = 3, n_hits = 50L)
 }
diff --git a/man/assignJobQueue.Rd b/man/assign_job_queue.Rd
similarity index 68%
rename from man/assignJobQueue.Rd
rename to man/assign_job_queue.Rd
index 27511b6a..d2650fed 100644
--- a/man/assignJobQueue.Rd
+++ b/man/assign_job_queue.Rd
@@ -1,14 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{assignJobQueue}
-\alias{assignJobQueue}
+\name{assign_job_queue}
+\alias{assign_job_queue}
 \title{Decision function to assign job queue}
 \usage{
-assignJobQueue(t_sec_estimate, t_cutoff = 21600)
+assign_job_queue(t_sec_estimate, t_cutoff = 21600)
 }
 \arguments{
 \item{t_sec_estimate}{estimated number of seconds a job will process
-(from calculateEstimatedWallTimeFromOptions())}
+(from advanced_opts2est_walltime    ())}
 
 \item{t_long}{threshold value that defines the lower bound for assigning a
 job to the "long queue"}
@@ -17,9 +17,9 @@ job to the "long queue"}
 a string of "short" or "long"
 
 example:
-calculateEstimatedWallTimeFromOptions(c("homology_search",
+advanced_opts2est_walltime	(c("homology_search",
 "domain_architecture"), 3) |>
-assignJobQueue()
+assign_job_queue()
 }
 \description{
 Decision function to assign job queue
diff --git a/man/calculateProcessRuntime.Rd b/man/get_proc_medians.Rd
similarity index 76%
rename from man/calculateProcessRuntime.Rd
rename to man/get_proc_medians.Rd
index bb6dd1ed..b6db0b56 100644
--- a/man/calculateProcessRuntime.Rd
+++ b/man/get_proc_medians.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{calculateProcessRuntime}
-\alias{calculateProcessRuntime}
+\name{get_proc_medians}
+\alias{get_proc_medians}
 \title{Scrape MolEvolvR logs and calculate median processes}
 \usage{
-calculateProcessRuntime(dir_job_results)
+get_proc_medians(dir_job_results)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results
@@ -21,12 +21,12 @@ examples:
 }
 
 dir_job_results <- "/data/scratch/janani/molevolvr_out"
-list_proc_medians <- calculateProcessRuntime(dir_job_results)
+list_proc_medians <- get_proc_medians(dir_job_results)
 \enumerate{
 \item from outside container environment
 common_root <- "/data/molevolvr_transfer/molevolvr_dev"
 dir_job_results <- "/data/molevolvr_transfer/molevolvr_dev/job_results"
-list_proc_medians <- calculateProcessRuntime(dir_job_results)
+list_proc_medians <- get_proc_medians(dir_job_results)
 }
 }
 \description{
diff --git a/man/getProcessRuntimeWeights.Rd b/man/get_proc_weights.Rd
similarity index 73%
rename from man/getProcessRuntimeWeights.Rd
rename to man/get_proc_weights.Rd
index 8eff0347..f48585cc 100644
--- a/man/getProcessRuntimeWeights.Rd
+++ b/man/get_proc_weights.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{getProcessRuntimeWeights}
-\alias{getProcessRuntimeWeights}
+\name{get_proc_weights}
+\alias{get_proc_weights}
 \title{Quickly get the runtime weights for MolEvolvR backend processes}
 \usage{
-getProcessRuntimeWeights(medians_yml_path = NULL)
+get_proc_weights(medians_yml_path = NULL)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results
@@ -13,7 +13,7 @@ directory}
 \value{
 \link{list} names: processes; values: median runtime (seconds)
 
-example: writeProcessRuntimeToYML()
+example: write_proc_medians_yml()
 }
 \description{
 Quickly get the runtime weights for MolEvolvR backend processes
diff --git a/man/mapOption2Process.Rd b/man/make_opts2procs.Rd
similarity index 75%
rename from man/mapOption2Process.Rd
rename to man/make_opts2procs.Rd
index ff6905c5..07e208b2 100644
--- a/man/mapOption2Process.Rd
+++ b/man/make_opts2procs.Rd
@@ -1,15 +1,15 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{mapOption2Process}
-\alias{mapOption2Process}
+\name{make_opts2procs}
+\alias{make_opts2procs}
 \title{Construct list where names (MolEvolvR advanced options) point to processes}
 \usage{
-mapOption2Process()
+make_opts2procs()
 }
 \value{
 list where names (MolEvolvR advanced options) point to processes
 
-example: list_opts2procs <- mapOption2Process
+example: list_opts2procs <- make_opts2procs
 }
 \description{
 Construct list where names (MolEvolvR advanced options) point to processes
diff --git a/man/mapAdvOption2Process.Rd b/man/map_advanced_opts2procs.Rd
similarity index 76%
rename from man/mapAdvOption2Process.Rd
rename to man/map_advanced_opts2procs.Rd
index 5bd9ee65..631708b4 100644
--- a/man/mapAdvOption2Process.Rd
+++ b/man/map_advanced_opts2procs.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{mapAdvOption2Process}
-\alias{mapAdvOption2Process}
+\name{map_advanced_opts2procs}
+\alias{map_advanced_opts2procs}
 \title{Use MolEvolvR advanced options to get associated processes}
 \usage{
-mapAdvOption2Process(advanced_opts)
+map_advanced_opts2procs(advanced_opts)
 }
 \arguments{
 \item{advanced_opts}{character vector of MolEvolvR advanced options}
@@ -15,7 +15,7 @@ the advanced options
 
 example:
 advanced_opts <- c("homology_search", "domain_architecture")
-procs <- mapAdvOption2Process(advanced_opts)
+procs <- map_advanced_opts2procs(advanced_opts)
 }
 \description{
 Use MolEvolvR advanced options to get associated processes
diff --git a/man/plotEstimatedWallTimes.Rd b/man/plot_estimated_walltimes.Rd
similarity index 77%
rename from man/plotEstimatedWallTimes.Rd
rename to man/plot_estimated_walltimes.Rd
index 0d53cb32..884fed50 100644
--- a/man/plotEstimatedWallTimes.Rd
+++ b/man/plot_estimated_walltimes.Rd
@@ -1,17 +1,17 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{plotEstimatedWallTimes}
-\alias{plotEstimatedWallTimes}
+\name{plot_estimated_walltimes}
+\alias{plot_estimated_walltimes}
 \title{Plot the estimated runtimes for different advanced options and number
 of inputs}
 \usage{
-plotEstimatedWallTimes()
+plot_estimated_walltimes()
 }
 \value{
 line plot object
 
 example:
-p <- plotEstimatedWallTimes()
+p <- plot_estimated_walltimes()
 ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_
 dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
 }
diff --git a/man/writeProcessRuntime2TSV.Rd b/man/write_proc_medians_table.Rd
similarity index 77%
rename from man/writeProcessRuntime2TSV.Rd
rename to man/write_proc_medians_table.Rd
index 03cbbd68..2ae7a97b 100644
--- a/man/writeProcessRuntime2TSV.Rd
+++ b/man/write_proc_medians_table.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{writeProcessRuntime2TSV}
-\alias{writeProcessRuntime2TSV}
+\name{write_proc_medians_table}
+\alias{write_proc_medians_table}
 \title{Write a table of 2 columns: 1) process and 2) median seconds}
 \usage{
-writeProcessRuntime2TSV(dir_job_results, filepath)
+write_proc_medians_table(dir_job_results, filepath)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results}
@@ -14,7 +14,7 @@ writeProcessRuntime2TSV(dir_job_results, filepath)
 \value{
 \link{tbl_df} 2 columns: 1) process and 2) median seconds
 
-example: writeProcessRuntime2TSV(
+example: write_proc_medians_table(
 "/data/scratch/janani/molevolvr_out/",
 "/data/scratch/janani/molevolvr_out/log_tbl.tsv"
 )
diff --git a/man/writeProcessRuntimeToYML.Rd b/man/write_proc_medians_yml.Rd
similarity index 74%
rename from man/writeProcessRuntimeToYML.Rd
rename to man/write_proc_medians_yml.Rd
index e4a5c8ad..74757f1f 100644
--- a/man/writeProcessRuntimeToYML.Rd
+++ b/man/write_proc_medians_yml.Rd
@@ -1,11 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{writeProcessRuntimeToYML}
-\alias{writeProcessRuntimeToYML}
+\name{write_proc_medians_yml}
+\alias{write_proc_medians_yml}
 \title{Compute median process runtimes, then write a YAML list of the processes and
 their median runtimes in seconds to the path specified by 'filepath'.}
 \usage{
-writeProcessRuntimeToYML(dir_job_results, filepath = NULL)
+write_proc_medians_yml(dir_job_results, filepath = NULL)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results directory}
@@ -15,12 +15,12 @@ uses ./molevol_scripts/log_data/job_proc_weights.yml}
 }
 \description{
 The default value of filepath is the value of the env var
-MOLEVOLVR_PROC_WEIGHTS, which writeProcessRuntimeToYML() also uses as its default
+MOLEVOLVR_PROC_WEIGHTS, which write_proc_medians_yml() also uses as its default
 read location.
 }
 \examples{
 \dontrun{
-writeProcessRuntimeToYML(
+write_proc_medians_yml(
     "/data/scratch/janani/molevolvr_out/",
     "/data/scratch/janani/molevolvr_out/log_tbl.yml"
 )

From a543898c8579065cbe3125f40b8cdf66200fc06f Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Thu, 10 Oct 2024 11:00:41 +0100
Subject: [PATCH 10/19] Renamed the following functions in
 R/assign_job_queue.R;
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

| Original                        | Modified                         | User Facing                      |
|---------------------------------|----------------------------------|----------------------------------|
| assign_job_queue                | assignJobQueue                   | ✔️                               |
| make_opts2procs                 | mapOption2Process                | ✔️                               |
| map_advanced_opts2procs         | mapAdvOption2Process             | ✔️                               |
| get_proc_medians                | calculateProcessRuntime          | ✔️                               |
| write_proc_medians_table        | writeProcessRuntime2TSV          | ✔️                               |
| write_proc_medians_yml          | writeProcessRuntime2YML          | ✔️                               |
| get_proc_weights                | getProcessRuntimeWeights         | ✔️                               |
| advanced_opts2est_walltime      | calculateEstimatedWallTimeFromOpts| ✔️                               |
| plot_estimated_walltimes        | plotEstimatedWallTimes           | ✔️                               |
---
 NAMESPACE                                     | 18 ++--
 R/assign_job_queue.R                          | 86 +++++++++----------
 ...{assign_job_queue.Rd => assignJobQueue.Rd} | 12 +--
 ... => calculateEstimatedWallTimeFromOpts.Rd} | 10 +--
 ..._medians.Rd => calculateProcessRuntime.Rd} | 10 +--
 ...weights.Rd => getProcessRuntimeWeights.Rd} |  8 +-
 ..._opts2procs.Rd => mapAdvOption2Process.Rd} |  8 +-
 ...ake_opts2procs.Rd => mapOption2Process.Rd} |  8 +-
 ...walltimes.Rd => plotEstimatedWallTimes.Rd} |  8 +-
 ...ns_table.Rd => writeProcessRuntime2TSV.Rd} |  8 +-
 ...ians_yml.Rd => writeProcessRuntime2YML.Rd} | 10 +--
 11 files changed, 93 insertions(+), 93 deletions(-)
 rename man/{assign_job_queue.Rd => assignJobQueue.Rd} (68%)
 rename man/{advanced_opts2est_walltime.Rd => calculateEstimatedWallTimeFromOpts.Rd} (74%)
 rename man/{get_proc_medians.Rd => calculateProcessRuntime.Rd} (76%)
 rename man/{get_proc_weights.Rd => getProcessRuntimeWeights.Rd} (73%)
 rename man/{map_advanced_opts2procs.Rd => mapAdvOption2Process.Rd} (76%)
 rename man/{make_opts2procs.Rd => mapOption2Process.Rd} (75%)
 rename man/{plot_estimated_walltimes.Rd => plotEstimatedWallTimes.Rd} (77%)
 rename man/{write_proc_medians_table.Rd => writeProcessRuntime2TSV.Rd} (77%)
 rename man/{write_proc_medians_yml.Rd => writeProcessRuntime2YML.Rd} (74%)

diff --git a/NAMESPACE b/NAMESPACE
index c811bac3..65cc791e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -15,10 +15,11 @@ export(add_leaves)
 export(add_lins)
 export(add_name)
 export(add_tax)
-export(advanced_opts2est_walltime)
 export(alignFasta)
 export(assert_count_df)
-export(assign_job_queue)
+export(assignJobQueue)
+export(calculateEstimatedWallTimeFromOpts)
+export(calculateProcessRuntime)
 export(cleanClusters)
 export(cleanDomainArchitecture)
 export(cleanGeneDescription)
@@ -53,10 +54,9 @@ export(generate_all_aln2fa)
 export(generate_fa2tre)
 export(generate_msa)
 export(generate_trees)
+export(getProcessRuntimeWeights)
 export(get_accnums_from_fasta_file)
 export(get_job_message)
-export(get_proc_medians)
-export(get_proc_weights)
 export(ipg2lin)
 export(ipr2viz)
 export(ipr2viz_web)
@@ -66,12 +66,12 @@ export(lineage.domain_repeats.plot)
 export(lineage.neighbors.plot)
 export(lineage_sunburst)
 export(make_job_results_url)
-export(make_opts2procs)
 export(mapAcc2Name)
+export(mapAdvOption2Process)
+export(mapOption2Process)
 export(map_acc2name)
-export(map_advanced_opts2procs)
 export(msa_pdf)
-export(plot_estimated_walltimes)
+export(plotEstimatedWallTimes)
 export(prot2tax)
 export(prot2tax_old)
 export(removeAsterisks)
@@ -103,8 +103,8 @@ export(wordcloud2_element)
 export(wordcloud3)
 export(wordcloud_element)
 export(write.MsaAAMultipleAlignment)
-export(write_proc_medians_table)
-export(write_proc_medians_yml)
+export(writeProcessRuntime2TSV)
+export(writeProcessRuntime2YML)
 importFrom(Biostrings,AAStringSet)
 importFrom(Biostrings,readAAStringSet)
 importFrom(Biostrings,toString)
diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index c531fb09..10df1e3a 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -3,16 +3,16 @@
 # pipeline.
 # to use this, construct paths like so: file.path(common_root, "path", "to", "file.R")
 # for example, the reference for this file would be:
-# file.path(common_root, "molevol_scripts", "R", "assign_job_queue.R")
+# file.path(common_root, "molevol_scripts", "R", "assignJobQueue.R")
 common_root <- Sys.getenv("COMMON_SRC_ROOT")
 
 #' Construct list where names (MolEvolvR advanced options) point to processes
 #'
 #' @return list where names (MolEvolvR advanced options) point to processes
 #'
-#' example: list_opts2procs <- make_opts2procs
+#' example: list_opts2procs <- mapOption2Process
 #' @export
-make_opts2procs <- function() {
+mapOption2Process <- function() {
   tryCatch({
     opts2processes <- list(
       "homology_search" = c("dblast", "dblast_cleanup"),
@@ -26,7 +26,7 @@ make_opts2procs <- function() {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("make_opts2procs function execution completed.")
+    message("mapOption2Process function execution completed.")
   })
 
 }
@@ -40,16 +40,16 @@ make_opts2procs <- function() {
 #'
 #' example:
 #' advanced_opts <- c("homology_search", "domain_architecture")
-#' procs <- map_advanced_opts2procs(advanced_opts)
+#' procs <- mapAdvOption2Process(advanced_opts)
 #' @export
-map_advanced_opts2procs <- function(advanced_opts) {
+mapAdvOption2Process <- function(advanced_opts) {
   if (!is.character(advanced_opts)) {
     stop("Argument must be a character vector!")
   }
   tryCatch({
     # append 'always' to add procs that always run
     advanced_opts <- c(advanced_opts, "always")
-    opts2proc <- make_opts2procs()
+    opts2proc <- mapOption2Process()
     # setup index for opts2proc based on advanced options
     idx <- which(names(opts2proc) %in% advanced_opts)
     # extract processes that will run
@@ -60,7 +60,7 @@ map_advanced_opts2procs <- function(advanced_opts) {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("make_opts2procs function execution completed.")
+    message("mapOption2Process function execution completed.")
   })
 
 }
@@ -80,14 +80,14 @@ map_advanced_opts2procs <- function(advanced_opts) {
 #'
 #' 1)
 #' dir_job_results <- "/data/scratch/janani/molevolvr_out"
-#' list_proc_medians <- get_proc_medians(dir_job_results)
+#' list_proc_medians <- calculateProcessRuntime(dir_job_results)
 #'
 #' 2) from outside container environment
 #' common_root <- "/data/molevolvr_transfer/molevolvr_dev"
 #' dir_job_results <- "/data/molevolvr_transfer/molevolvr_dev/job_results"
-#' list_proc_medians <- get_proc_medians(dir_job_results)
+#' list_proc_medians <- calculateProcessRuntime(dir_job_results)
 #' @export
-get_proc_medians <- function(dir_job_results) {
+calculateProcessRuntime <- function(dir_job_results) {
   tryCatch({
     # Check if dir_job_results is a character string
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
@@ -139,7 +139,7 @@ get_proc_medians <- function(dir_job_results) {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("get_proc_medians function execution completed.")
+    message("calculateProcessRuntime function execution completed.")
   })
 
 }
@@ -156,12 +156,12 @@ get_proc_medians <- function(dir_job_results) {
 #'
 #' @return [tbl_df] 2 columns: 1) process and 2) median seconds
 #'
-#' example: write_proc_medians_table(
+#' example: writeProcessRuntime2TSV(
 #'   "/data/scratch/janani/molevolvr_out/",
 #'   "/data/scratch/janani/molevolvr_out/log_tbl.tsv"
 #' )
 #' @export
-write_proc_medians_table <- function(dir_job_results, filepath) {
+writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
   tryCatch({
     # Error handling for input arguments
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
@@ -175,7 +175,7 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
     if (!is.character(filepath) || length(filepath) != 1) {
       stop("Input 'filepath' must be a single character string.")
     }
-    df_proc_medians <- get_proc_medians(dir_job_results) |>
+    df_proc_medians <- calculateProcessRuntime(dir_job_results) |>
       tibble::as_tibble() |>
       tidyr::pivot_longer(
         dplyr::everything(),
@@ -192,7 +192,7 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("write_proc_medians_table function execution completed.")
+    message("writeProcessRuntime2TSV function execution completed.")
   })
 
 }
@@ -201,7 +201,7 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
 #' their median runtimes in seconds to the path specified by 'filepath'.
 #'
 #' The default value of filepath is the value of the env var
-#' MOLEVOLVR_PROC_WEIGHTS, which write_proc_medians_yml() also uses as its default
+#' MOLEVOLVR_PROC_WEIGHTS, which writeProcessRuntime2YML() also uses as its default
 #' read location.
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results directory
@@ -212,13 +212,13 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
 #'
 #' @examples
 #' \dontrun{
-#' write_proc_medians_yml(
+#' writeProcessRuntime2YML(
 #'     "/data/scratch/janani/molevolvr_out/",
 #'     "/data/scratch/janani/molevolvr_out/log_tbl.yml"
 #' )
 #' }
 #' @export
-write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
+writeProcessRuntime2YML <- function(dir_job_results, filepath = NULL) {
   tryCatch({
     # Error handling for dir_job_results arguments
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
@@ -238,14 +238,14 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
       stop("Input 'filepath' must be a single character string.")
     }
 
-    medians <- get_proc_medians(dir_job_results)
+    medians <- calculateProcessRuntime(dir_job_results)
     yaml::write_yaml(medians, filepath)
   }, error = function(e) {
     message(paste("Encountered an error: "), e$message)
   }, warning = function(w) {
     message(paste("Warning: "), w$message)
   }, finally = {
-    message("write_proc_medians_table function execution completed.")
+    message("writeProcessRuntime2TSV function execution completed.")
   }
   )
 
@@ -261,9 +261,9 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
 #'
 #' @return [list] names: processes; values: median runtime (seconds)
 #'
-#' example: write_proc_medians_yml()
+#' example: writeProcessRuntime2YML()
 #' @export
-get_proc_weights <- function(medians_yml_path = NULL) {
+getProcessRuntimeWeights <- function(medians_yml_path = NULL) {
   if (is.null(medians_yml_path)) {
     medians_yml_path <- file.path(common_root,
                                   "molevol_scripts",
@@ -273,7 +273,7 @@ get_proc_weights <- function(medians_yml_path = NULL) {
 
   proc_weights <- tryCatch({
     # attempt to read the weights from the YAML file produced by
-    # write_proc_medians_yml()
+    # writeProcessRuntime2YML()
     if (stringr::str_trim(medians_yml_path) == "") {
       stop(
         stringr::str_glue("medians_yml_path is empty 
@@ -285,7 +285,7 @@ get_proc_weights <- function(medians_yml_path = NULL) {
   },
   # to avoid fatal errors in reading the proc weights yaml,
   # some median process runtimes have been hardcoded based on
-  # the result of get_proc_medians() from Jan 2024
+  # the result of calculateProcessRuntime() from Jan 2024
   error = function(cond) {
     proc_weights <- list(
       "dblast" = 2810,
@@ -306,7 +306,7 @@ get_proc_weights <- function(medians_yml_path = NULL) {
 #' calculate the total estimated walltime for the job
 #'
 #' @param advanced_opts character vector of MolEvolvR advanced options
-#' (see make_opts2procs for the options)
+#' (see mapOption2Process for the options)
 #' @param n_inputs total number of input proteins
 #'
 #' @importFrom dplyr if_else
@@ -314,11 +314,11 @@ get_proc_weights <- function(medians_yml_path = NULL) {
 #'
 #' @return total estimated number of seconds a job will process (walltime)
 #'
-#' example: advanced_opts2est_walltime	(c("homology_search",
+#' example: calculateEstimatedWallTimeFromOpts	(c("homology_search",
 #'                                       "domain_architecture"),
 #'                                       n_inputs = 3, n_hits = 50L)
 #' @export
-advanced_opts2est_walltime	 <- function(advanced_opts,
+calculateEstimatedWallTimeFromOpts	 <- function(advanced_opts,
                                                   n_inputs = 1L,
                                                   n_hits = NULL,
                                                   verbose = FALSE) {
@@ -348,7 +348,7 @@ advanced_opts2est_walltime	 <- function(advanced_opts,
     }
 
     # Get process weights
-    proc_weights <- write_proc_medians_yml()
+    proc_weights <- writeProcessRuntime2YML()
     if (!is.list(proc_weights)) {
       stop("Process weights could not be retrieved correctly.")
     }
@@ -357,7 +357,7 @@ advanced_opts2est_walltime	 <- function(advanced_opts,
     proc_weights <- proc_weights[order(names(proc_weights))] |> unlist()
     all_procs <- names(proc_weights) |> sort()
     # get processes from advanced options and sort by names
-    procs_from_opts <- map_advanced_opts2procs(advanced_opts)
+    procs_from_opts <- mapAdvOption2Process(advanced_opts)
     procs_from_opts <- sort(procs_from_opts)
     # binary encode: yes proc will run (1); else 0
     binary_proc_vec <- dplyr::if_else(all_procs %in% procs_from_opts, 1L, 0L)
@@ -366,7 +366,7 @@ advanced_opts2est_walltime	 <- function(advanced_opts,
       as.numeric()
     # calculate the additional processes to run for the homologous hits
     if ("homology_search" %in% advanced_opts) {
-      opts2procs <- make_opts2procs()
+      opts2procs <- mapOption2Process()
       # exclude the homology search processes for the homologous hits
       procs2exclude_for_homologs <- opts2procs[["homology_search"]]
       procs_homologs <- procs_from_opts[!(procs_from_opts 
@@ -380,7 +380,7 @@ advanced_opts2est_walltime	 <- function(advanced_opts,
     }
     if (verbose) {
       msg <- stringr::str_glue(
-        "warnings from advanced_opts2est_walltime	():\n",
+        "warnings from calculateEstimatedWallTimeFromOpts	():\n",
         "\tn_inputs={n_inputs}\n",
         "\tn_hits={ifelse(is.null(n_hits), 'null', n_hits)}\n",
         "\test_walltime={est_walltime}\n\n"
@@ -393,7 +393,7 @@ advanced_opts2est_walltime	 <- function(advanced_opts,
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("advanced_opts2est_walltime	 
+    message("calculateEstimatedWallTimeFromOpts	 
             function execution completed.")
   })
 
@@ -403,18 +403,18 @@ advanced_opts2est_walltime	 <- function(advanced_opts,
 #' Decision function to assign job queue
 #'
 #' @param t_sec_estimate estimated number of seconds a job will process
-#' (from advanced_opts2est_walltime	())
+#' (from calculateEstimatedWallTimeFromOpts	())
 #' @param t_long threshold value that defines the lower bound for assigning a
 #' job to the "long queue"
 #'
 #' @return a string of "short" or "long"
 #'
 #' example:
-#' advanced_opts2est_walltime	(c("homology_search",
+#' calculateEstimatedWallTimeFromOpts	(c("homology_search",
 #'                                         "domain_architecture"), 3) |>
-#'   assign_job_queue()
+#'   assignJobQueue()
 #' @export
-assign_job_queue <- function(
+assignJobQueue <- function(
   t_sec_estimate,
   t_cutoff = 21600 # 6 hours
 ) {
@@ -434,7 +434,7 @@ assign_job_queue <- function(
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("assign_job_queue function execution completed.")
+    message("assignJobQueue function execution completed.")
   })
 
 }
@@ -451,13 +451,13 @@ assign_job_queue <- function(
 #' @return line plot object
 #'
 #' example:
-#' p <- plot_estimated_walltimes()
+#' p <- plotEstimatedWallTimes()
 #' ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_
 #'                 dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
 #' @export
-plot_estimated_walltimes <- function() {
+plotEstimatedWallTimes <- function() {
   tryCatch({
-    opts <- make_opts2procs() |> names()
+    opts <- mapOption2Process() |> names()
     # get all possible submission permutations (powerset)
     get_powerset <- function(vec) {
       # generate powerset (do not include empty set)
@@ -482,7 +482,7 @@ plot_estimated_walltimes <- function() {
             } else {
                 NULL
               }
-            est_walltime <- advanced_opts2est_walltime	(
+            est_walltime <- calculateEstimatedWallTimeFromOpts	(
               advanced_opts,
               n_inputs = i,
               n_hits = n_hits,
@@ -541,7 +541,7 @@ plot_estimated_walltimes <- function() {
   }, warning = function(w) {
     message(paste("Warning: ", w$message))
   }, finally = {
-    message("plot_estimated_walltimes function execution completed.")
+    message("plotEstimatedWallTimes function execution completed.")
   })
 
 }
diff --git a/man/assign_job_queue.Rd b/man/assignJobQueue.Rd
similarity index 68%
rename from man/assign_job_queue.Rd
rename to man/assignJobQueue.Rd
index d2650fed..3663ce56 100644
--- a/man/assign_job_queue.Rd
+++ b/man/assignJobQueue.Rd
@@ -1,14 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{assign_job_queue}
-\alias{assign_job_queue}
+\name{assignJobQueue}
+\alias{assignJobQueue}
 \title{Decision function to assign job queue}
 \usage{
-assign_job_queue(t_sec_estimate, t_cutoff = 21600)
+assignJobQueue(t_sec_estimate, t_cutoff = 21600)
 }
 \arguments{
 \item{t_sec_estimate}{estimated number of seconds a job will process
-(from advanced_opts2est_walltime    ())}
+(from calculateEstimatedWallTimeFromOpts    ())}
 
 \item{t_long}{threshold value that defines the lower bound for assigning a
 job to the "long queue"}
@@ -17,9 +17,9 @@ job to the "long queue"}
 a string of "short" or "long"
 
 example:
-advanced_opts2est_walltime	(c("homology_search",
+calculateEstimatedWallTimeFromOpts	(c("homology_search",
 "domain_architecture"), 3) |>
-assign_job_queue()
+assignJobQueue()
 }
 \description{
 Decision function to assign job queue
diff --git a/man/advanced_opts2est_walltime.Rd b/man/calculateEstimatedWallTimeFromOpts.Rd
similarity index 74%
rename from man/advanced_opts2est_walltime.Rd
rename to man/calculateEstimatedWallTimeFromOpts.Rd
index 02ae9621..c09cf6a6 100644
--- a/man/advanced_opts2est_walltime.Rd
+++ b/man/calculateEstimatedWallTimeFromOpts.Rd
@@ -1,11 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{advanced_opts2est_walltime}
-\alias{advanced_opts2est_walltime}
+\name{calculateEstimatedWallTimeFromOpts}
+\alias{calculateEstimatedWallTimeFromOpts}
 \title{Given MolEvolvR advanced options and number of inputs,
 calculate the total estimated walltime for the job}
 \usage{
-advanced_opts2est_walltime(
+calculateEstimatedWallTimeFromOpts(
   advanced_opts,
   n_inputs = 1L,
   n_hits = NULL,
@@ -14,14 +14,14 @@ advanced_opts2est_walltime(
 }
 \arguments{
 \item{advanced_opts}{character vector of MolEvolvR advanced options
-(see make_opts2procs for the options)}
+(see mapOption2Process for the options)}
 
 \item{n_inputs}{total number of input proteins}
 }
 \value{
 total estimated number of seconds a job will process (walltime)
 
-example: advanced_opts2est_walltime	(c("homology_search",
+example: calculateEstimatedWallTimeFromOpts	(c("homology_search",
 "domain_architecture"),
 n_inputs = 3, n_hits = 50L)
 }
diff --git a/man/get_proc_medians.Rd b/man/calculateProcessRuntime.Rd
similarity index 76%
rename from man/get_proc_medians.Rd
rename to man/calculateProcessRuntime.Rd
index b6db0b56..bb6dd1ed 100644
--- a/man/get_proc_medians.Rd
+++ b/man/calculateProcessRuntime.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{get_proc_medians}
-\alias{get_proc_medians}
+\name{calculateProcessRuntime}
+\alias{calculateProcessRuntime}
 \title{Scrape MolEvolvR logs and calculate median processes}
 \usage{
-get_proc_medians(dir_job_results)
+calculateProcessRuntime(dir_job_results)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results
@@ -21,12 +21,12 @@ examples:
 }
 
 dir_job_results <- "/data/scratch/janani/molevolvr_out"
-list_proc_medians <- get_proc_medians(dir_job_results)
+list_proc_medians <- calculateProcessRuntime(dir_job_results)
 \enumerate{
 \item from outside container environment
 common_root <- "/data/molevolvr_transfer/molevolvr_dev"
 dir_job_results <- "/data/molevolvr_transfer/molevolvr_dev/job_results"
-list_proc_medians <- get_proc_medians(dir_job_results)
+list_proc_medians <- calculateProcessRuntime(dir_job_results)
 }
 }
 \description{
diff --git a/man/get_proc_weights.Rd b/man/getProcessRuntimeWeights.Rd
similarity index 73%
rename from man/get_proc_weights.Rd
rename to man/getProcessRuntimeWeights.Rd
index f48585cc..ff3c8e5d 100644
--- a/man/get_proc_weights.Rd
+++ b/man/getProcessRuntimeWeights.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{get_proc_weights}
-\alias{get_proc_weights}
+\name{getProcessRuntimeWeights}
+\alias{getProcessRuntimeWeights}
 \title{Quickly get the runtime weights for MolEvolvR backend processes}
 \usage{
-get_proc_weights(medians_yml_path = NULL)
+getProcessRuntimeWeights(medians_yml_path = NULL)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results
@@ -13,7 +13,7 @@ directory}
 \value{
 \link{list} names: processes; values: median runtime (seconds)
 
-example: write_proc_medians_yml()
+example: writeProcessRuntime2YML()
 }
 \description{
 Quickly get the runtime weights for MolEvolvR backend processes
diff --git a/man/map_advanced_opts2procs.Rd b/man/mapAdvOption2Process.Rd
similarity index 76%
rename from man/map_advanced_opts2procs.Rd
rename to man/mapAdvOption2Process.Rd
index 631708b4..5bd9ee65 100644
--- a/man/map_advanced_opts2procs.Rd
+++ b/man/mapAdvOption2Process.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{map_advanced_opts2procs}
-\alias{map_advanced_opts2procs}
+\name{mapAdvOption2Process}
+\alias{mapAdvOption2Process}
 \title{Use MolEvolvR advanced options to get associated processes}
 \usage{
-map_advanced_opts2procs(advanced_opts)
+mapAdvOption2Process(advanced_opts)
 }
 \arguments{
 \item{advanced_opts}{character vector of MolEvolvR advanced options}
@@ -15,7 +15,7 @@ the advanced options
 
 example:
 advanced_opts <- c("homology_search", "domain_architecture")
-procs <- map_advanced_opts2procs(advanced_opts)
+procs <- mapAdvOption2Process(advanced_opts)
 }
 \description{
 Use MolEvolvR advanced options to get associated processes
diff --git a/man/make_opts2procs.Rd b/man/mapOption2Process.Rd
similarity index 75%
rename from man/make_opts2procs.Rd
rename to man/mapOption2Process.Rd
index 07e208b2..ff6905c5 100644
--- a/man/make_opts2procs.Rd
+++ b/man/mapOption2Process.Rd
@@ -1,15 +1,15 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{make_opts2procs}
-\alias{make_opts2procs}
+\name{mapOption2Process}
+\alias{mapOption2Process}
 \title{Construct list where names (MolEvolvR advanced options) point to processes}
 \usage{
-make_opts2procs()
+mapOption2Process()
 }
 \value{
 list where names (MolEvolvR advanced options) point to processes
 
-example: list_opts2procs <- make_opts2procs
+example: list_opts2procs <- mapOption2Process
 }
 \description{
 Construct list where names (MolEvolvR advanced options) point to processes
diff --git a/man/plot_estimated_walltimes.Rd b/man/plotEstimatedWallTimes.Rd
similarity index 77%
rename from man/plot_estimated_walltimes.Rd
rename to man/plotEstimatedWallTimes.Rd
index 884fed50..0d53cb32 100644
--- a/man/plot_estimated_walltimes.Rd
+++ b/man/plotEstimatedWallTimes.Rd
@@ -1,17 +1,17 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{plot_estimated_walltimes}
-\alias{plot_estimated_walltimes}
+\name{plotEstimatedWallTimes}
+\alias{plotEstimatedWallTimes}
 \title{Plot the estimated runtimes for different advanced options and number
 of inputs}
 \usage{
-plot_estimated_walltimes()
+plotEstimatedWallTimes()
 }
 \value{
 line plot object
 
 example:
-p <- plot_estimated_walltimes()
+p <- plotEstimatedWallTimes()
 ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_
 dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
 }
diff --git a/man/write_proc_medians_table.Rd b/man/writeProcessRuntime2TSV.Rd
similarity index 77%
rename from man/write_proc_medians_table.Rd
rename to man/writeProcessRuntime2TSV.Rd
index 2ae7a97b..03cbbd68 100644
--- a/man/write_proc_medians_table.Rd
+++ b/man/writeProcessRuntime2TSV.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{write_proc_medians_table}
-\alias{write_proc_medians_table}
+\name{writeProcessRuntime2TSV}
+\alias{writeProcessRuntime2TSV}
 \title{Write a table of 2 columns: 1) process and 2) median seconds}
 \usage{
-write_proc_medians_table(dir_job_results, filepath)
+writeProcessRuntime2TSV(dir_job_results, filepath)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results}
@@ -14,7 +14,7 @@ write_proc_medians_table(dir_job_results, filepath)
 \value{
 \link{tbl_df} 2 columns: 1) process and 2) median seconds
 
-example: write_proc_medians_table(
+example: writeProcessRuntime2TSV(
 "/data/scratch/janani/molevolvr_out/",
 "/data/scratch/janani/molevolvr_out/log_tbl.tsv"
 )
diff --git a/man/write_proc_medians_yml.Rd b/man/writeProcessRuntime2YML.Rd
similarity index 74%
rename from man/write_proc_medians_yml.Rd
rename to man/writeProcessRuntime2YML.Rd
index 74757f1f..b43f39ee 100644
--- a/man/write_proc_medians_yml.Rd
+++ b/man/writeProcessRuntime2YML.Rd
@@ -1,11 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/assign_job_queue.R
-\name{write_proc_medians_yml}
-\alias{write_proc_medians_yml}
+\name{writeProcessRuntime2YML}
+\alias{writeProcessRuntime2YML}
 \title{Compute median process runtimes, then write a YAML list of the processes and
 their median runtimes in seconds to the path specified by 'filepath'.}
 \usage{
-write_proc_medians_yml(dir_job_results, filepath = NULL)
+writeProcessRuntime2YML(dir_job_results, filepath = NULL)
 }
 \arguments{
 \item{dir_job_results}{\link{chr} path to MolEvolvR job_results directory}
@@ -15,12 +15,12 @@ uses ./molevol_scripts/log_data/job_proc_weights.yml}
 }
 \description{
 The default value of filepath is the value of the env var
-MOLEVOLVR_PROC_WEIGHTS, which write_proc_medians_yml() also uses as its default
+MOLEVOLVR_PROC_WEIGHTS, which writeProcessRuntime2YML() also uses as its default
 read location.
 }
 \examples{
 \dontrun{
-write_proc_medians_yml(
+writeProcessRuntime2YML(
     "/data/scratch/janani/molevolvr_out/",
     "/data/scratch/janani/molevolvr_out/log_tbl.yml"
 )

From e9460610fb054c1c3109cf728561efe2e6619104 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Sat, 12 Oct 2024 14:09:40 -0600
Subject: [PATCH 11/19] remove outdated .Rd

---
 man/GCA2lin.Rd    |  0
 man/acc2lin.Rd    | 57 -----------------------------------------------
 man/efetch_ipg.Rd |  0
 man/ipg2lin.Rd    |  0
 man/sink.reset.Rd |  0
 5 files changed, 57 deletions(-)
 delete mode 100644 man/GCA2lin.Rd
 delete mode 100644 man/acc2lin.Rd
 delete mode 100644 man/efetch_ipg.Rd
 delete mode 100644 man/ipg2lin.Rd
 delete mode 100644 man/sink.reset.Rd

diff --git a/man/GCA2lin.Rd b/man/GCA2lin.Rd
deleted file mode 100644
index e69de29b..00000000
diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd
deleted file mode 100644
index d3f2468b..00000000
--- a/man/acc2lin.Rd
+++ /dev/null
@@ -1,57 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/acc2lin.R, R/lineage.R
-\name{acc2lin}
-\alias{acc2lin}
-\title{acc2lin}
-\usage{
-acc2lin(
-  accessions,
-  assembly_path,
-  lineagelookup_path,
-  ipgout_path = NULL,
-  plan = "multicore"
-)
-
-acc2lin(
-  accessions,
-  assembly_path,
-  lineagelookup_path,
-  ipgout_path = NULL,
-  plan = "multicore"
-)
-}
-\arguments{
-\item{accessions}{Character vector of protein accessions}
-
-\item{assembly_path}{String of the path to the assembly_summary path
-This file can be generated using the "DownloadAssemblySummary()" function}
-
-\item{lineagelookup_path}{String of the path to the lineage lookup file
-(taxid to lineage mapping). This file can be generated using the}
-
-\item{ipgout_path}{Path to write the results of the efetch run of the accessions
-on the ipg database. If NULL, the file will not be written. Defaults to NULL}
-
-\item{plan}{}
-}
-\value{
-Describe return, in detail
-}
-\description{
-This function combines 'efetch_ipg()'
-and 'ipg2lin()' to map a set
-of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
-
-Function to map protein accession numbers to lineage
-
-This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set
-of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
-}
-\examples{
-\dontrun{
-acc2lin()
-}
-}
-\author{
-Samuel Chen, Janani Ravi
-}
diff --git a/man/efetch_ipg.Rd b/man/efetch_ipg.Rd
deleted file mode 100644
index e69de29b..00000000
diff --git a/man/ipg2lin.Rd b/man/ipg2lin.Rd
deleted file mode 100644
index e69de29b..00000000
diff --git a/man/sink.reset.Rd b/man/sink.reset.Rd
deleted file mode 100644
index e69de29b..00000000

From 9571333c44ac879d9b2b6bc1a38d454fdda69a39 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Sat, 12 Oct 2024 14:10:10 -0600
Subject: [PATCH 12/19] let R sort NAMESPACE

---
 NAMESPACE | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 60bec5b1..c448ff13 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -11,9 +11,7 @@ export(addLeaves2Alignment)
 export(addLineage)
 export(addName)
 export(addTaxID)
-export(advanced_opts2est_walltime)
 export(alignFasta)
-export(assert_count_df)
 export(assignJobQueue)
 export(calculateEstimatedWallTimeFromOpts)
 export(calculateProcessRuntime)
@@ -35,9 +33,9 @@ export(countByColumn)
 export(createFA2Tree)
 export(createJobResultsURL)
 export(createJobStatusEmailMessage)
+export(createLineageLookup)
 export(createRepresentativeAccNum)
 export(createWordCloud2Element)
-export(createLineageLookup)
 export(createWordCloudElement)
 export(domain_network)
 export(downloadAssemblySummary)
@@ -50,14 +48,14 @@ export(formatJobArgumentsHTML)
 export(gc_undirected_network)
 export(generateAllAlignments2FA)
 export(generate_msa)
-export(getProcessRuntimeWeights)
 export(getAccNumFromFA)
+export(getProcessRuntimeWeights)
 export(getTopAccByLinDomArch)
 export(mapAcc2Name)
 export(mapAdvOption2Process)
 export(mapOption2Process)
-export(map_acc2name)
 export(msa_pdf)
+export(plotEstimatedWallTimes)
 export(plotIPR2Viz)
 export(plotIPR2VizWeb)
 export(plotLineageDA)
@@ -70,12 +68,10 @@ export(plotStackedLineage)
 export(plotSunburst)
 export(plotTreemap)
 export(plotUpSet)
-export(plotEstimatedWallTimes)
 export(prepareColumnParams)
 export(prepareSingleColumnParams)
 export(proteinAcc2TaxID)
 export(proteinAcc2TaxID_old)
-export(prot2tax_old)
 export(removeAsterisks)
 export(removeEmptyRows)
 export(removeTails)

From 8c573693b92f2aa216b269e24244d2d63fe0d3a9 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Sat, 12 Oct 2024 14:10:26 -0600
Subject: [PATCH 13/19] regen new .Rd

---
 man/GCA2Lineage.Rd | 2 +-
 man/IPG2Lineage.Rd | 5 +++--
 man/efetchIPG.Rd   | 3 ++-
 man/sinkReset.Rd   | 1 +
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/man/GCA2Lineage.Rd b/man/GCA2Lineage.Rd
index 9ec0ce56..9a2a7a30 100644
--- a/man/GCA2Lineage.Rd
+++ b/man/GCA2Lineage.Rd
@@ -19,7 +19,7 @@ This file can be generated using the "downloadAssemblySummary()" function}
 
 \item{lineagelookup_path}{String of the path to the lineage lookup file
 (taxid to lineage mapping). This file can be generated using the
-"create_lineage_lookup()" function}
+"createLineageLookup()" function}
 
 \item{acc_col}{}
 }
diff --git a/man/IPG2Lineage.Rd b/man/IPG2Lineage.Rd
index 282d5cbf..118812ab 100644
--- a/man/IPG2Lineage.Rd
+++ b/man/IPG2Lineage.Rd
@@ -29,7 +29,7 @@ file}
 
 \item{lineagelookup_path}{String of the path to the lineage lookup file
 (taxid to lineage mapping). This file can be generated using the
-"create_lineage_lookup()" function}
+"createLineageLookup()" function}
 
 \item{assembly_path}{String of the path to the assembly_summary path
 This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} function}
@@ -39,7 +39,8 @@ A \code{data.table} with the lineage information for the provided protein
 accessions.
 }
 \description{
-Takes the resulting file of an efetch run on the ipg database and
+Takes the resulting file
+of an efetch run on the ipg database and
 
 Takes the resulting file of an efetch run on the ipg database and
 append lineage, and taxid columns
diff --git a/man/efetchIPG.Rd b/man/efetchIPG.Rd
index 047e2652..db63024f 100644
--- a/man/efetchIPG.Rd
+++ b/man/efetchIPG.Rd
@@ -23,7 +23,8 @@ the ipg database}
 No return value. The function writes the fetched results to \code{out_path}.
 }
 \description{
-Perform efetch on the ipg database and write the results to out_path
+Perform efetch on the ipg database
+and write the results to out_path
 
 Perform efetch on the ipg database and write the results to out_path
 }
diff --git a/man/sinkReset.Rd b/man/sinkReset.Rd
index 0285c0b2..e3fc7ce4 100644
--- a/man/sinkReset.Rd
+++ b/man/sinkReset.Rd
@@ -8,6 +8,7 @@ sinkReset()
 }
 \value{
 No return, but run to close all outstanding \code{sink()}s
+and handles any errors or warnings that occur during the process.
 }
 \description{
 Sink Reset

From 2061d7a24b7a699bfeac72270817ae7225365ffa Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Sat, 12 Oct 2024 14:10:48 -0600
Subject: [PATCH 14/19] remove old tryCatch code (for now)

---
 R/acc2lin.R | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/R/acc2lin.R b/R/acc2lin.R
index 42315ece..a0a95033 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -72,14 +72,6 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
 
       merged <- merge(df, lins, by.x = acc_col, by.y = "Protein", all.x = TRUE)
       return(merged)
-    }, error = function(e) {
-      print(paste("Error: ", e$message))
-    }, warning = function(w) {
-      print(paste("Warning: ", w$message))
-    }, finally = {
-      print("addLineages function execution completed.")
-    })
-
 }
 
 
@@ -247,13 +239,6 @@ IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path,
     lins <- lins[!is.na(Lineage)] %>% unique()
 
     return(lins)
-  }, error = function(e) {
-    print(paste("An error occurred: ", e$message))
-  }, warning = function(w) {
-    print(paste("Warning: ", w$message))
-  }, finally = {
-    print("ipg2lin function execution completed.")
-  })
 }
 
 

From 70f0de8c57d610eaad122e59d4bf1e96fc455963 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Sun, 13 Oct 2024 19:21:41 -0600
Subject: [PATCH 15/19] remove code not relevant to PR

---
 R/acc2lin.R          |  50 +++---
 R/assign_job_queue.R | 359 +++++++++++++------------------------------
 R/blastWrappers.R    | 105 +++----------
 3 files changed, 153 insertions(+), 361 deletions(-)

diff --git a/R/acc2lin.R b/R/acc2lin.R
index a0a95033..61aae87c 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -157,40 +157,34 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
 
       return(partitioned)
     }
-    tryCatch({
-      # Set the future plan strategy
-      plan(strategy = plan, .skip = T)
 
+    # Set the future plan strategy
+    plan(strategy = plan, .skip = T)
 
-      min_groups <- length(accnums) / 200
-      groups <- min(max(min_groups, 15), length(accnums))
-      partitioned_acc <- partition(accnums, groups)
 
-      # Open the sink to the output path
-      sink(out_path)
+    min_groups <- length(accnums) / 200
+    groups <- min(max(min_groups, 15), length(accnums))
+    partitioned_acc <- partition(accnums, groups)
 
-      a <- future_map(1:length(partitioned_acc), function(x) {
-        # Avoid hitting the rate API limit
-        if (x %% 9 == 0) {
-          Sys.sleep(1)
-        }
-        cat(
-          entrez_fetch(
-            id = partitioned_acc[[x]],
-            db = "ipg",
-            rettype = "xml",
-            api_key = "YOUR_KEY_HERE" ## Can this be included in public package?
-          )
+    # Open the sink to the output path
+    sink(out_path)
+
+    a <- future_map(1:length(partitioned_acc), function(x) {
+      # Avoid hitting the rate API limit
+      if (x %% 9 == 0) {
+        Sys.sleep(1)
+      }
+      cat(
+        entrez_fetch(
+          id = partitioned_acc[[x]],
+          db = "ipg",
+          rettype = "xml",
+          api_key = "YOUR_KEY_HERE" ## Can this be included in public package?
         )
-      })
-      sink(NULL)
-    }, error = function(e) {
-      print(paste("An error occurred: ", e$message))
-    }, warning = function(w) {
-      print(paste("Warning: ", w$message))
-    }, finally = {
-      print("efetch_ipg function execution completed.")
+      )
     })
+    sink(NULL)
+
   }
 }
 
diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index 10df1e3a..4791b4a1 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -13,22 +13,13 @@ common_root <- Sys.getenv("COMMON_SRC_ROOT")
 #' example: list_opts2procs <- mapOption2Process
 #' @export
 mapOption2Process <- function() {
-  tryCatch({
-    opts2processes <- list(
-      "homology_search" = c("dblast", "dblast_cleanup"),
-      "domain_architecture" = c("iprscan", "ipr2lineage", "ipr2da"),
-      # processes always present agnostic of advanced options
-      "always" = c("blast_clust", "clust2table")
-    )
-    return(opts2processes)
-  }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
-  }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("mapOption2Process function execution completed.")
-  })
-
+  opts2processes <- list(
+    "homology_search" = c("dblast", "dblast_cleanup"),
+    "domain_architecture" = c("iprscan", "ipr2lineage", "ipr2da"),
+    # processes always present agnostic of advanced options
+    "always" = c("blast_clust", "clust2table")
+  )
+  return(opts2processes)
 }
 
 #' Use MolEvolvR advanced options to get associated processes
@@ -43,26 +34,14 @@ mapOption2Process <- function() {
 #' procs <- mapAdvOption2Process(advanced_opts)
 #' @export
 mapAdvOption2Process <- function(advanced_opts) {
-  if (!is.character(advanced_opts)) {
-    stop("Argument must be a character vector!")
-  }
-  tryCatch({
-    # append 'always' to add procs that always run
-    advanced_opts <- c(advanced_opts, "always")
-    opts2proc <- mapOption2Process()
-    # setup index for opts2proc based on advanced options
-    idx <- which(names(opts2proc) %in% advanced_opts)
-    # extract processes that will run
-    procs <- opts2proc[idx] |> unlist()
-    return(procs)
-  }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
-  }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("mapOption2Process function execution completed.")
-  })
-
+  # append 'always' to add procs that always run
+  advanced_opts <- c(advanced_opts, "always")
+  opts2proc <- mapOption2Process()
+  # setup index for opts2proc based on advanced options
+  idx <- which(names(opts2proc) %in% advanced_opts)
+  # extract processes that will run
+  procs <- opts2proc[idx] |> unlist()
+  return(procs)
 }
 
 #' Scrape MolEvolvR logs and calculate median processes
@@ -88,60 +67,41 @@ mapAdvOption2Process <- function(advanced_opts) {
 #' list_proc_medians <- calculateProcessRuntime(dir_job_results)
 #' @export
 calculateProcessRuntime <- function(dir_job_results) {
-  tryCatch({
-    # Check if dir_job_results is a character string
-    if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
-      stop("Input 'dir_job_results' must be a single character string.")
-    }
+  source(file.path(common_root, "molevol_scripts", "R", "metrics.R"))
 
-    # Check if dir_job_results exists
-    if (!dir.exists(dir_job_results)) {
-      stop(paste("The directory", dir_job_results, "does not exist."))
-    }
+  # aggregate logs from
+  path_log_data <- file.path(common_root,
+                              "molevol_scripts", "log_data", "prod_logs.rda")
 
-    source(file.path(common_root, "molevol_scripts", "R", "metrics.R"))
-
-    # aggregate logs from
-    path_log_data <- file.path(common_root,
-                               "molevol_scripts", "log_data", "prod_logs.rda")
-
-    # ensure the folder exists to the location
-    if (!dir.exists(path_log_data)) {
-      dir.create(dirname(path_log_data),
-                 recursive = TRUE, showWarnings = FALSE)
-    }
-
-    # attempt to load pre-generated logdata
-    if (!file.exists(path_log_data)) {
-      logs <- aggregate_logs(dir_job_results, latest_date = Sys.Date() - 60)
-      save(logs, file = path_log_data)
-    } else {
-      load(path_log_data) # loads the logs object
-    }
-    df_log <- logs$df_log
-    procs <- c(
-      "dblast", "dblast_cleanup", "iprscan",
-      "ipr2lineage", "ipr2da", "blast_clust",
-      "clust2table"
-    )
-    list_proc_medians <- df_log |>
-      dplyr::select(dplyr::all_of(procs)) |>
-      dplyr::summarise(
-        dplyr::across(
-          dplyr::everything(),
-          \(x) median(x, na.rm = TRUE)
-        )
-      ) |>
-      as.list()
-    return(list_proc_medians)
-  }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
-  }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("calculateProcessRuntime function execution completed.")
-  })
+  # ensure the folder exists to the location
+  if (!dir.exists(path_log_data)) {
+    dir.create(dirname(path_log_data),
+                recursive = TRUE, showWarnings = FALSE)
+  }
 
+  # attempt to load pre-generated logdata
+  if (!file.exists(path_log_data)) {
+    logs <- aggregate_logs(dir_job_results, latest_date = Sys.Date() - 60)
+    save(logs, file = path_log_data)
+  } else {
+    load(path_log_data) # loads the logs object
+  }
+  df_log <- logs$df_log
+  procs <- c(
+    "dblast", "dblast_cleanup", "iprscan",
+    "ipr2lineage", "ipr2da", "blast_clust",
+    "clust2table"
+  )
+  list_proc_medians <- df_log |>
+    dplyr::select(dplyr::all_of(procs)) |>
+    dplyr::summarise(
+      dplyr::across(
+        dplyr::everything(),
+        \(x) median(x, na.rm = TRUE)
+      )
+    ) |>
+    as.list()
+  return(list_proc_medians)
 }
 
 #' Write a table of 2 columns: 1) process and 2) median seconds
@@ -162,39 +122,18 @@ calculateProcessRuntime <- function(dir_job_results) {
 #' )
 #' @export
 writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
-  tryCatch({
-    # Error handling for input arguments
-    if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
-      stop("Input 'dir_job_results' must be a single character string.")
-    }
-
-    if (!dir.exists(dir_job_results)) {
-      stop(paste("The directory", dir_job_results, "does not exist."))
-    }
-
-    if (!is.character(filepath) || length(filepath) != 1) {
-      stop("Input 'filepath' must be a single character string.")
-    }
-    df_proc_medians <- calculateProcessRuntime(dir_job_results) |>
-      tibble::as_tibble() |>
-      tidyr::pivot_longer(
-        dplyr::everything(),
-        names_to = "process",
-        values_to = "median_seconds"
-      ) |>
-      dplyr::arrange(dplyr::desc(median_seconds))
-
-    # Write the resulting tibble to a TSV file
-    readr::write_tsv(df_proc_medians, file = filepath)
-    return(df_proc_medians)
-  }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
-  }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("writeProcessRuntime2TSV function execution completed.")
-  })
-
+  df_proc_medians <- calculateProcessRuntime(dir_job_results) |>
+    tibble::as_tibble() |>
+    tidyr::pivot_longer(
+      dplyr::everything(),
+      names_to = "process",
+      values_to = "median_seconds"
+    ) |>
+    dplyr::arrange(dplyr::desc(median_seconds))
+
+  # Write the resulting tibble to a TSV file
+  readr::write_tsv(df_proc_medians, file = filepath)
+  return(df_proc_medians)
 }
 
 #' Compute median process runtimes, then write a YAML list of the processes and
@@ -219,36 +158,8 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
 #' }
 #' @export
 writeProcessRuntime2YML <- function(dir_job_results, filepath = NULL) {
-  tryCatch({
-    # Error handling for dir_job_results arguments
-    if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
-      stop("Input 'dir_job_results' must be a single character string.")
-    }
-
-    if (!dir.exists(dir_job_results)) {
-      stop(paste("The directory", dir_job_results, "does not exist."))
-    }
-    if (is.null(filepath)) {
-      filepath <- file.path(common_root,
-                            "molevol_scripts",
-                            "log_data",
-                            "job_proc_weights.yml")
-    }
-    if (!is.character(filepath) || length(filepath) != 1) {
-      stop("Input 'filepath' must be a single character string.")
-    }
-
-    medians <- calculateProcessRuntime(dir_job_results)
-    yaml::write_yaml(medians, filepath)
-  }, error = function(e) {
-    message(paste("Encountered an error: "), e$message)
-  }, warning = function(w) {
-    message(paste("Warning: "), w$message)
-  }, finally = {
-    message("writeProcessRuntime2TSV function execution completed.")
-  }
-  )
-
+  medians <- calculateProcessRuntime(dir_job_results)
+  yaml::write_yaml(medians, filepath)
 }
 
 #' Quickly get the runtime weights for MolEvolvR backend processes
@@ -322,81 +233,49 @@ calculateEstimatedWallTimeFromOpts	 <- function(advanced_opts,
                                                   n_inputs = 1L,
                                                   n_hits = NULL,
                                                   verbose = FALSE) {
-
-  tryCatch({
-    # to calculate est walltime for a homology search job, the number of hits
-    # must be provided
-    validation_fail <- is.null(n_hits) && "homology_search" %in% advanced_opts
-    stopifnot(!validation_fail)
-
-    # Validate advanced_opts
-    if (!is.character(advanced_opts)) {
-      stop("Argument 'advanced_opts' must be a character vector.")
-    }
-
-    # Validate n_inputs
-    if (!is.numeric(n_inputs) || length(n_inputs) != 1 || n_inputs <= 0) {
-      stop("Argument 'n_inputs' must be a single positive numeric value.")
-    }
-
-    # Validate n_hits if homology_search is in advanced_opts
-    if ("homology_search" %in% advanced_opts &&
-          (is.null(n_hits)|| !is.numeric(n_hits)
-           || length(n_hits) != 1 || n_hits < 0)) {
-      stop("Argument 'n_hits' must be a single non-negative numeric value when 
-           'homology_search' is in 'advanced_opts'.")
-    }
-
-    # Get process weights
-    proc_weights <- writeProcessRuntime2YML()
-    if (!is.list(proc_weights)) {
-      stop("Process weights could not be retrieved correctly.")
-    }
-
-    # sort process weights by names and convert to vec
-    proc_weights <- proc_weights[order(names(proc_weights))] |> unlist()
-    all_procs <- names(proc_weights) |> sort()
-    # get processes from advanced options and sort by names
-    procs_from_opts <- mapAdvOption2Process(advanced_opts)
-    procs_from_opts <- sort(procs_from_opts)
-    # binary encode: yes proc will run (1); else 0
-    binary_proc_vec <- dplyr::if_else(all_procs %in% procs_from_opts, 1L, 0L)
-    # dot product of weights and procs to run; scaled by the number of inputs
-    est_walltime <- (n_inputs * (binary_proc_vec %*% proc_weights)) |>
-      as.numeric()
-    # calculate the additional processes to run for the homologous hits
-    if ("homology_search" %in% advanced_opts) {
-      opts2procs <- mapOption2Process()
-      # exclude the homology search processes for the homologous hits
-      procs2exclude_for_homologs <- opts2procs[["homology_search"]]
-      procs_homologs <- procs_from_opts[!(procs_from_opts 
-                                          %in% procs2exclude_for_homologs)]
-      binary_proc_vec_homolog <- dplyr::if_else(all_procs 
-                                                %in% procs_homologs, 1L, 0L)
-      # add the estimated walltime for processes run on the homologous hits
-      est_walltime <- est_walltime +
-        (n_hits * (binary_proc_vec_homolog
-                   %*% proc_weights) |> as.numeric())
-    }
-    if (verbose) {
-      msg <- stringr::str_glue(
-        "warnings from calculateEstimatedWallTimeFromOpts	():\n",
-        "\tn_inputs={n_inputs}\n",
-        "\tn_hits={ifelse(is.null(n_hits), 'null', n_hits)}\n",
-        "\test_walltime={est_walltime}\n\n"
-      )
-      cat(file = stderr(), msg)
-    }
-    return(est_walltime)
-  }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
-  }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("calculateEstimatedWallTimeFromOpts	 
-            function execution completed.")
-  })
-
+  # to calculate est walltime for a homology search job, the number of hits
+  # must be provided
+  validation_fail <- is.null(n_hits) && "homology_search" %in% advanced_opts
+  stopifnot(!validation_fail)
+
+  # Get process weights
+  proc_weights <- writeProcessRuntime2YML()
+
+  # sort process weights by names and convert to vec
+  proc_weights <- proc_weights[order(names(proc_weights))] |> unlist()
+  all_procs <- names(proc_weights) |> sort()
+  # get processes from advanced options and sort by names
+  procs_from_opts <- mapAdvOption2Process(advanced_opts)
+  procs_from_opts <- sort(procs_from_opts)
+  # binary encode: yes proc will run (1); else 0
+  binary_proc_vec <- dplyr::if_else(all_procs %in% procs_from_opts, 1L, 0L)
+  # dot product of weights and procs to run; scaled by the number of inputs
+  est_walltime <- (n_inputs * (binary_proc_vec %*% proc_weights)) |>
+    as.numeric()
+  # calculate the additional processes to run for the homologous hits
+  if ("homology_search" %in% advanced_opts) {
+    opts2procs <- mapOption2Process()
+    # exclude the homology search processes for the homologous hits
+    procs2exclude_for_homologs <- opts2procs[["homology_search"]]
+    procs_homologs <- procs_from_opts[!(procs_from_opts 
+                                        %in% procs2exclude_for_homologs)]
+    binary_proc_vec_homolog <- dplyr::if_else(all_procs 
+                                              %in% procs_homologs, 1L, 0L)
+    # add the estimated walltime for processes run on the homologous hits
+    est_walltime <- est_walltime +
+      (n_hits * (binary_proc_vec_homolog
+                  %*% proc_weights) |> as.numeric())
+  }
+  if (verbose) {
+    msg <- stringr::str_glue(
+      "warnings from calculateEstimatedWallTimeFromOpts	():\n",
+      "\tn_inputs={n_inputs}\n",
+      "\tn_hits={ifelse(is.null(n_hits), 'null', n_hits)}\n",
+      "\test_walltime={est_walltime}\n\n"
+    )
+    cat(file = stderr(), msg)
+  }
+  return(est_walltime)
 }
 
 
@@ -418,25 +297,8 @@ assignJobQueue <- function(
   t_sec_estimate,
   t_cutoff = 21600 # 6 hours
 ) {
-  tryCatch({
-    if (!is.numeric(t_sec_estimate) || length(t_sec_estimate) != 1) {
-      stop("Argument 't_sec_estimate' must be a single numeric value.")
-    }
-
-    if (!is.numeric(t_cutoff) || length(t_cutoff) != 1 || t_cutoff < 0) {
-      stop("Argument 't_cutoff' must be a single non-negative numeric value.")
-    }
-
-    queue <- ifelse(t_sec_estimate > t_cutoff, "long", "short")
-    return(queue)
-  }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
-  }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("assignJobQueue function execution completed.")
-  })
-
+  queue <- ifelse(t_sec_estimate > t_cutoff, "long", "short")
+  return(queue)
 }
 
 #' Plot the estimated runtimes for different advanced options and number
@@ -456,7 +318,6 @@ assignJobQueue <- function(
 #'                 dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
 #' @export
 plotEstimatedWallTimes <- function() {
-  tryCatch({
     opts <- mapOption2Process() |> names()
     # get all possible submission permutations (powerset)
     get_powerset <- function(vec) {
@@ -536,12 +397,4 @@ plotEstimatedWallTimes <- function() {
         y = "Estimated walltime (hours)"
       )
     return(p)
-  }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
-  }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("plotEstimatedWallTimes function execution completed.")
-  })
-
 }
diff --git a/R/blastWrappers.R b/R/blastWrappers.R
index 15484a1b..9b55f3ee 100755
--- a/R/blastWrappers.R
+++ b/R/blastWrappers.R
@@ -21,52 +21,24 @@ run_deltablast <- function(deltablast_path, db_search_path,
                            db = "refseq", query, evalue = "1e-5",
                            out, num_alignments, num_threads = 1) {
 
-  # Argument validation
-  if (!file.exists(deltablast_path)) {
-    stop("The DELTABLAST executable path is invalid: ", deltablast_path)
-  }
-  if (!dir.exists(db_search_path)) {
-    stop("The database search path is invalid: ", db_search_path)
-  }
-  if (!file.exists(query)) {
-    stop("The query file path is invalid: ", query)
-  }
-  if (!is.numeric(as.numeric(evalue)) || as.numeric(evalue) <= 0) {
-    stop("The evalue must be a positive number: ", evalue)
-  }
-  if (!is.numeric(num_alignments) || num_alignments <= 0) {
-    stop("The number of alignments must be a 
-         positive integer: ", num_alignments)
-  }
-  if (!is.numeric(num_threads) || num_threads <= 0) {
-    stop("The number of threads must be a positive integer: ", num_threads)
-  }
-
   start <- Sys.time()
 
-  tryCatch({
-    system(paste0("export BLASTDB=/", db_search_path))
 
-    system2(
-      command = deltablast_path,
-      args = c(
-        "-db", db,
-        "-query", query,
-        "-evalue", evalue,
-        "-out", out,
-        "-num_threads", num_threads,
-        "-num_alignments", num_alignments
-        #   ,"-outfmt", outfmt
-      )
+  system(paste0("export BLASTDB=/", db_search_path))
+
+  system2(
+    command = deltablast_path,
+    args = c(
+      "-db", db,
+      "-query", query,
+      "-evalue", evalue,
+      "-out", out,
+      "-num_threads", num_threads,
+      "-num_alignments", num_alignments
+      #   ,"-outfmt", outfmt
     )
-    print(Sys.time() - start)
-  }, error = function(e) {
-    message(paste("Error in run_deltablast: ", e))
-  }, warning = function(w) {
-    message(paste("Warning in run_deltablast: ", w))
-  }, finally = {
-    message("run_deltablast completed")
-  })
+  )
+  print(Sys.time() - start)
 
 }
 
@@ -88,46 +60,19 @@ run_deltablast <- function(deltablast_path, db_search_path,
 run_rpsblast <- function(rpsblast_path, db_search_path,
                          db = "refseq", query, evalue = "1e-5",
                          out, num_threads = 1) {
-  # Argument validation
-  if (!file.exists(rpsblast_path)) {
-    stop("The RPSBLAST executable path is invalid: ", rpsblast_path)
-  }
-  if (!dir.exists(db_search_path)) {
-    stop("The database search path is invalid: ", db_search_path)
-  }
-  if (!file.exists(query)) {
-    stop("The query file path is invalid: ", query)
-  }
-  if (!is.numeric(as.numeric(evalue)) || as.numeric(evalue) <= 0) {
-    stop("The evalue must be a positive number: ", evalue)
-  }
-  if (!is.numeric(num_threads) || num_threads <= 0) {
-    stop("The number of threads must be a positive integer: ", num_threads)
-  }
 
   start <- Sys.time()
+  system(paste0("export BLASTDB=/", db_search_path))
 
-  tryCatch({
-
-    system(paste0("export BLASTDB=/", db_search_path))
-
-    system2(
-      command = rpsblast_path,
-      args = c(
-        "-db", db,
-        "-query", query,
-        "-evalue", evalue,
-        "-out", out,
-        "-num_threads", num_threads
-      )
+  system2(
+    command = rpsblast_path,
+    args = c(
+      "-db", db,
+      "-query", query,
+      "-evalue", evalue,
+      "-out", out,
+      "-num_threads", num_threads
     )
-    print(Sys.time() - start)
-  }, error = function(e) {
-    message(paste("Error in run_rpsblast: ", e))
-  }, warning = function(w) {
-    message(paste("Warning in run_rpsblast: ", w))
-  }, finally = {
-    message("run_rpsblast completed")
-  })
-
+  )
+  print(Sys.time() - start)
 }

From 392775de92dfc33b198b41a5a2843f5313dd2e0d Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Sun, 13 Oct 2024 19:43:58 -0600
Subject: [PATCH 16/19] adjust .Rd title tags for renamed functions

---
 R/assign_job_queue.R                      | 27 +++++++++++++++++++++++
 R/create_lineage_lookup.R                 |  3 +++
 man/assignJobQueue.Rd                     |  2 +-
 man/calculateEstimatedWallTimeFromOpts.Rd |  3 +--
 man/calculateProcessRuntime.Rd            |  2 +-
 man/createLineageLookup.Rd                |  2 +-
 man/getProcessRuntimeWeights.Rd           |  2 +-
 man/mapAdvOption2Process.Rd               |  2 +-
 man/mapOption2Process.Rd                  |  2 +-
 man/plotEstimatedWallTimes.Rd             |  6 +++--
 man/writeProcessRuntime2TSV.Rd            |  2 +-
 man/writeProcessRuntime2YML.Rd            |  6 +++--
 12 files changed, 46 insertions(+), 13 deletions(-)

diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index 4791b4a1..20ba841f 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -6,6 +6,9 @@
 # file.path(common_root, "molevol_scripts", "R", "assignJobQueue.R")
 common_root <- Sys.getenv("COMMON_SRC_ROOT")
 
+#' mapOption2Process
+#' 
+#' @description
 #' Construct list where names (MolEvolvR advanced options) point to processes
 #'
 #' @return list where names (MolEvolvR advanced options) point to processes
@@ -22,6 +25,9 @@ mapOption2Process <- function() {
   return(opts2processes)
 }
 
+#' mapAdvOption2Process
+#' 
+#' @description
 #' Use MolEvolvR advanced options to get associated processes
 #'
 #' @param advanced_opts character vector of MolEvolvR advanced options
@@ -44,6 +50,9 @@ mapAdvOption2Process <- function(advanced_opts) {
   return(procs)
 }
 
+#' calculateProcessRuntime
+#' 
+#' @description
 #' Scrape MolEvolvR logs and calculate median processes
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results
@@ -104,6 +113,9 @@ calculateProcessRuntime <- function(dir_job_results) {
   return(list_proc_medians)
 }
 
+#' writeProcessRuntime2TSV
+#' 
+#' @description
 #' Write a table of 2 columns: 1) process and 2) median seconds
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results
@@ -136,6 +148,9 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
   return(df_proc_medians)
 }
 
+#' writeProcessRuntime2YML
+#' 
+#' @description
 #' Compute median process runtimes, then write a YAML list of the processes and
 #' their median runtimes in seconds to the path specified by 'filepath'.
 #'
@@ -162,6 +177,9 @@ writeProcessRuntime2YML <- function(dir_job_results, filepath = NULL) {
   yaml::write_yaml(medians, filepath)
 }
 
+#' getProcessRuntimeWeights
+#' 
+#' @description
 #' Quickly get the runtime weights for MolEvolvR backend processes
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results
@@ -213,6 +231,9 @@ getProcessRuntimeWeights <- function(medians_yml_path = NULL) {
   return(proc_weights)
 }
 
+#' calculateEstimatedWallTimeFromOpts
+#' 
+#' @description
 #' Given MolEvolvR advanced options and number of inputs,
 #' calculate the total estimated walltime for the job
 #'
@@ -279,6 +300,9 @@ calculateEstimatedWallTimeFromOpts	 <- function(advanced_opts,
 }
 
 
+#' assignJobQueue
+#' 
+#' @description
 #' Decision function to assign job queue
 #'
 #' @param t_sec_estimate estimated number of seconds a job will process
@@ -301,6 +325,9 @@ assignJobQueue <- function(
   return(queue)
 }
 
+#' plotEstimatedWallTimes
+#' 
+#' @description
 #' Plot the estimated runtimes for different advanced options and number
 #' of inputs
 #'
diff --git a/R/create_lineage_lookup.R b/R/create_lineage_lookup.R
index 78e79048..2408c5e6 100644
--- a/R/create_lineage_lookup.R
+++ b/R/create_lineage_lookup.R
@@ -3,6 +3,9 @@
 # library(biomartr)
 
 
+#' createLineageLookup
+#' 
+#' @description
 #' Create a look up table that goes from TaxID, to Lineage
 #'
 #' @author Samuel Chen
diff --git a/man/assignJobQueue.Rd b/man/assignJobQueue.Rd
index 3663ce56..de646a82 100644
--- a/man/assignJobQueue.Rd
+++ b/man/assignJobQueue.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/assign_job_queue.R
 \name{assignJobQueue}
 \alias{assignJobQueue}
-\title{Decision function to assign job queue}
+\title{assignJobQueue}
 \usage{
 assignJobQueue(t_sec_estimate, t_cutoff = 21600)
 }
diff --git a/man/calculateEstimatedWallTimeFromOpts.Rd b/man/calculateEstimatedWallTimeFromOpts.Rd
index c09cf6a6..d5361001 100644
--- a/man/calculateEstimatedWallTimeFromOpts.Rd
+++ b/man/calculateEstimatedWallTimeFromOpts.Rd
@@ -2,8 +2,7 @@
 % Please edit documentation in R/assign_job_queue.R
 \name{calculateEstimatedWallTimeFromOpts}
 \alias{calculateEstimatedWallTimeFromOpts}
-\title{Given MolEvolvR advanced options and number of inputs,
-calculate the total estimated walltime for the job}
+\title{calculateEstimatedWallTimeFromOpts}
 \usage{
 calculateEstimatedWallTimeFromOpts(
   advanced_opts,
diff --git a/man/calculateProcessRuntime.Rd b/man/calculateProcessRuntime.Rd
index bb6dd1ed..579ea2b6 100644
--- a/man/calculateProcessRuntime.Rd
+++ b/man/calculateProcessRuntime.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/assign_job_queue.R
 \name{calculateProcessRuntime}
 \alias{calculateProcessRuntime}
-\title{Scrape MolEvolvR logs and calculate median processes}
+\title{calculateProcessRuntime}
 \usage{
 calculateProcessRuntime(dir_job_results)
 }
diff --git a/man/createLineageLookup.Rd b/man/createLineageLookup.Rd
index 5dbab978..132019ce 100644
--- a/man/createLineageLookup.Rd
+++ b/man/createLineageLookup.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/create_lineage_lookup.R
 \name{createLineageLookup}
 \alias{createLineageLookup}
-\title{Create a look up table that goes from TaxID, to Lineage}
+\title{createLineageLookup}
 \usage{
 createLineageLookup(
   lineage_file = here("data/rankedlineage.dmp"),
diff --git a/man/getProcessRuntimeWeights.Rd b/man/getProcessRuntimeWeights.Rd
index ff3c8e5d..de0e2ea6 100644
--- a/man/getProcessRuntimeWeights.Rd
+++ b/man/getProcessRuntimeWeights.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/assign_job_queue.R
 \name{getProcessRuntimeWeights}
 \alias{getProcessRuntimeWeights}
-\title{Quickly get the runtime weights for MolEvolvR backend processes}
+\title{getProcessRuntimeWeights}
 \usage{
 getProcessRuntimeWeights(medians_yml_path = NULL)
 }
diff --git a/man/mapAdvOption2Process.Rd b/man/mapAdvOption2Process.Rd
index 5bd9ee65..6a210a20 100644
--- a/man/mapAdvOption2Process.Rd
+++ b/man/mapAdvOption2Process.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/assign_job_queue.R
 \name{mapAdvOption2Process}
 \alias{mapAdvOption2Process}
-\title{Use MolEvolvR advanced options to get associated processes}
+\title{mapAdvOption2Process}
 \usage{
 mapAdvOption2Process(advanced_opts)
 }
diff --git a/man/mapOption2Process.Rd b/man/mapOption2Process.Rd
index ff6905c5..9645617b 100644
--- a/man/mapOption2Process.Rd
+++ b/man/mapOption2Process.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/assign_job_queue.R
 \name{mapOption2Process}
 \alias{mapOption2Process}
-\title{Construct list where names (MolEvolvR advanced options) point to processes}
+\title{mapOption2Process}
 \usage{
 mapOption2Process()
 }
diff --git a/man/plotEstimatedWallTimes.Rd b/man/plotEstimatedWallTimes.Rd
index 0d53cb32..36b0ecd5 100644
--- a/man/plotEstimatedWallTimes.Rd
+++ b/man/plotEstimatedWallTimes.Rd
@@ -2,8 +2,7 @@
 % Please edit documentation in R/assign_job_queue.R
 \name{plotEstimatedWallTimes}
 \alias{plotEstimatedWallTimes}
-\title{Plot the estimated runtimes for different advanced options and number
-of inputs}
+\title{plotEstimatedWallTimes}
 \usage{
 plotEstimatedWallTimes()
 }
@@ -16,5 +15,8 @@ ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_
 dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
 }
 \description{
+Plot the estimated runtimes for different advanced options and number
+of inputs
+
 this function was just for fun; very, very messy code
 }
diff --git a/man/writeProcessRuntime2TSV.Rd b/man/writeProcessRuntime2TSV.Rd
index 03cbbd68..0e045a5c 100644
--- a/man/writeProcessRuntime2TSV.Rd
+++ b/man/writeProcessRuntime2TSV.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/assign_job_queue.R
 \name{writeProcessRuntime2TSV}
 \alias{writeProcessRuntime2TSV}
-\title{Write a table of 2 columns: 1) process and 2) median seconds}
+\title{writeProcessRuntime2TSV}
 \usage{
 writeProcessRuntime2TSV(dir_job_results, filepath)
 }
diff --git a/man/writeProcessRuntime2YML.Rd b/man/writeProcessRuntime2YML.Rd
index b43f39ee..865f23f7 100644
--- a/man/writeProcessRuntime2YML.Rd
+++ b/man/writeProcessRuntime2YML.Rd
@@ -2,8 +2,7 @@
 % Please edit documentation in R/assign_job_queue.R
 \name{writeProcessRuntime2YML}
 \alias{writeProcessRuntime2YML}
-\title{Compute median process runtimes, then write a YAML list of the processes and
-their median runtimes in seconds to the path specified by 'filepath'.}
+\title{writeProcessRuntime2YML}
 \usage{
 writeProcessRuntime2YML(dir_job_results, filepath = NULL)
 }
@@ -14,6 +13,9 @@ writeProcessRuntime2YML(dir_job_results, filepath = NULL)
 uses ./molevol_scripts/log_data/job_proc_weights.yml}
 }
 \description{
+Compute median process runtimes, then write a YAML list of the processes and
+their median runtimes in seconds to the path specified by 'filepath'.
+
 The default value of filepath is the value of the env var
 MOLEVOLVR_PROC_WEIGHTS, which writeProcessRuntime2YML() also uses as its default
 read location.

From df602dfd63cbab0d84dbcc8229e3da9c7646b9d5 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 22 Oct 2024 13:52:56 -0600
Subject: [PATCH 17/19] 
 https://github.com/JRaviLab/MolEvolvR/pull/95/files#r1805272251 -
 re-implement dropped check - fix .Rd

---
 R/assign_job_queue.R           | 5 ++++-
 man/writeProcessRuntime2YML.Rd | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index 20ba841f..69609417 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -155,7 +155,7 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
 #' their median runtimes in seconds to the path specified by 'filepath'.
 #'
 #' The default value of filepath is the value of the env var
-#' MOLEVOLVR_PROC_WEIGHTS, which writeProcessRuntime2YML() also uses as its default
+#' MOLEVOLVR_PROC_WEIGHTS, which getProcessRuntimeWeights() also uses as its default
 #' read location.
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results directory
@@ -173,6 +173,9 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
 #' }
 #' @export
 writeProcessRuntime2YML <- function(dir_job_results, filepath = NULL) {
+  if (is.null(filepath)) {
+    filepath <- file.path(common_root, "molevol_scripts", "log_data", "job_proc_weights.yml")
+  }
   medians <- calculateProcessRuntime(dir_job_results)
   yaml::write_yaml(medians, filepath)
 }
diff --git a/man/writeProcessRuntime2YML.Rd b/man/writeProcessRuntime2YML.Rd
index 865f23f7..5e0a05a4 100644
--- a/man/writeProcessRuntime2YML.Rd
+++ b/man/writeProcessRuntime2YML.Rd
@@ -17,7 +17,7 @@ Compute median process runtimes, then write a YAML list of the processes and
 their median runtimes in seconds to the path specified by 'filepath'.
 
 The default value of filepath is the value of the env var
-MOLEVOLVR_PROC_WEIGHTS, which writeProcessRuntime2YML() also uses as its default
+MOLEVOLVR_PROC_WEIGHTS, which getProcessRuntimeWeights() also uses as its default
 read location.
 }
 \examples{

From 1a0b66358eac637736a18868ae27e4049aa22628 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 22 Oct 2024 14:43:47 -0600
Subject: [PATCH 18/19] 
 https://github.com/JRaviLab/MolEvolvR/pull/95#discussion_r1805166466 - adjust
 roxygen skeleton readability

---
 R/acc2lin.R | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/R/acc2lin.R b/R/acc2lin.R
index 61aae87c..7b6f570c 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -198,10 +198,8 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
 #'              of an efetch run on the ipg database and
 #'
 #' @param accessions Character vector of protein accessions
-#' @param ipg_file Filepath to the file
-#'                 containing results of an efetch run on the
-#' ipg database. The protein accession in
-#'               'accessions' should be contained in this
+#' @param ipg_file Filepath to the file containing results of an efetch run on the
+#' ipg database. The protein accession in 'accessions' should be contained in this
 #' file
 #' @param assembly_path String of the path to the assembly_summary path
 #' This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} function

From 13e70c75a197c02c395cbef2d7b3c5b991ea7649 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 22 Oct 2024 15:02:39 -0600
Subject: [PATCH 19/19] formatting

---
 R/acc2lin.R      | 8 ++------
 man/efetchIPG.Rd | 3 +--
 man/sinkReset.Rd | 1 -
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/R/acc2lin.R b/R/acc2lin.R
index 7b6f570c..5f25afe2 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -10,7 +10,6 @@
 #' Sink Reset
 #'
 #' @return No return, but run to close all outstanding `sink()`s
-#'         and handles any errors or warnings that occur during the process.
 #'
 #' @export
 #'
@@ -87,8 +86,7 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
 #' This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} function
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
-#' @param ipgout_path Path to write the results 
-#'                    of the efetch run of the accessions
+#' @param ipgout_path Path to write the results of the efetch run of the accessions
 #' on the ipg database. If NULL, the file will not be written. Defaults to NULL
 #' @param plan A string specifying the parallelization strategy for the future
 #' package, such as `"sequential"` or `"multisession"`.
@@ -122,9 +120,7 @@ acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_pa
 #'
 #' @author Samuel Chen, Janani Ravi
 #'
-#' @description Perform efetch on the ipg database
-#'              and write the results to out_path
-#'
+#' @description Perform efetch on the ipg database and write the results to out_path
 #' @param accnums Character vector containing the accession numbers to query on
 #' the ipg database
 #' @param out_path Path to write the efetch results to
diff --git a/man/efetchIPG.Rd b/man/efetchIPG.Rd
index db63024f..047e2652 100644
--- a/man/efetchIPG.Rd
+++ b/man/efetchIPG.Rd
@@ -23,8 +23,7 @@ the ipg database}
 No return value. The function writes the fetched results to \code{out_path}.
 }
 \description{
-Perform efetch on the ipg database
-and write the results to out_path
+Perform efetch on the ipg database and write the results to out_path
 
 Perform efetch on the ipg database and write the results to out_path
 }
diff --git a/man/sinkReset.Rd b/man/sinkReset.Rd
index e3fc7ce4..0285c0b2 100644
--- a/man/sinkReset.Rd
+++ b/man/sinkReset.Rd
@@ -8,7 +8,6 @@ sinkReset()
 }
 \value{
 No return, but run to close all outstanding \code{sink()}s
-and handles any errors or warnings that occur during the process.
 }
 \description{
 Sink Reset