diff --git a/pkgdown.yml b/pkgdown.yml index 91009a3c..410e43d2 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -2,7 +2,7 @@ pandoc: 3.1.11 pkgdown: 2.1.1 pkgdown_sha: ~ articles: {} -last_built: 2024-10-12T01:01Z +last_built: 2024-10-22T21:09Z urls: reference: https://jravilab.github.io/MolEvolvR/reference article: https://jravilab.github.io/MolEvolvR/articles diff --git a/reference/GCA2Lineage.html b/reference/GCA2Lineage.html index a3fe8deb..bdc60267 100644 --- a/reference/GCA2Lineage.html +++ b/reference/GCA2Lineage.html @@ -62,7 +62,7 @@

Argumentslineagelookup_path

String of the path to the lineage lookup file (taxid to lineage mapping). This file can be generated using the -"create_lineage_lookup()" function

+"createLineageLookup()" function

acc_col
diff --git a/reference/IPG2Lineage.html b/reference/IPG2Lineage.html index 81681d20..7603798a 100644 --- a/reference/IPG2Lineage.html +++ b/reference/IPG2Lineage.html @@ -1,7 +1,9 @@ -IPG2Lineage — IPG2Lineage • MolEvolvRIPG2Lineage — IPG2Lineage • MolEvolvR Skip to contents @@ -37,7 +39,8 @@

IPG2Lineage

-

Takes the resulting file of an efetch run on the ipg database and

+

Takes the resulting file +of an efetch run on the ipg database and

Takes the resulting file of an efetch run on the ipg database and append lineage, and taxid columns

@@ -78,7 +81,7 @@

Argumentslineagelookup_path

String of the path to the lineage lookup file (taxid to lineage mapping). This file can be generated using the -"create_lineage_lookup()" function

+"createLineageLookup()" function

assembly_path
diff --git a/reference/assignJobQueue.html b/reference/assignJobQueue.html new file mode 100644 index 00000000..e9b10c11 --- /dev/null +++ b/reference/assignJobQueue.html @@ -0,0 +1,86 @@ + +assignJobQueue — assignJobQueue • MolEvolvR + Skip to contents + + +
+
+
+ +
+

Decision function to assign job queue

+
+ +
+

Usage

+
assignJobQueue(t_sec_estimate, t_cutoff = 21600)
+
+ +
+

Arguments

+ + +
t_sec_estimate
+

estimated number of seconds a job will process +(from calculateEstimatedWallTimeFromOpts ())

+ + +
t_long
+

threshold value that defines the lower bound for assigning a +job to the "long queue"

+ +
+
+

Value

+

a string of "short" or "long"

+

example: +calculateEstimatedWallTimeFromOpts (c("homology_search", +"domain_architecture"), 3) |> +assignJobQueue()

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/reference/calculateEstimatedWallTimeFromOpts.html b/reference/calculateEstimatedWallTimeFromOpts.html new file mode 100644 index 00000000..6f7835aa --- /dev/null +++ b/reference/calculateEstimatedWallTimeFromOpts.html @@ -0,0 +1,92 @@ + +calculateEstimatedWallTimeFromOpts — calculateEstimatedWallTimeFromOpts • MolEvolvR + Skip to contents + + +
+
+
+ +
+

Given MolEvolvR advanced options and number of inputs, +calculate the total estimated walltime for the job

+
+ +
+

Usage

+
calculateEstimatedWallTimeFromOpts(
+  advanced_opts,
+  n_inputs = 1L,
+  n_hits = NULL,
+  verbose = FALSE
+)
+
+ +
+

Arguments

+ + +
advanced_opts
+

character vector of MolEvolvR advanced options +(see mapOption2Process for the options)

+ + +
n_inputs
+

total number of input proteins

+ +
+
+

Value

+

total estimated number of seconds a job will process (walltime)

+

example: calculateEstimatedWallTimeFromOpts (c("homology_search", +"domain_architecture"), +n_inputs = 3, n_hits = 50L)

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/reference/calculateProcessRuntime.html b/reference/calculateProcessRuntime.html new file mode 100644 index 00000000..7fb16126 --- /dev/null +++ b/reference/calculateProcessRuntime.html @@ -0,0 +1,84 @@ + +calculateProcessRuntime — calculateProcessRuntime • MolEvolvR + Skip to contents + + +
+
+
+ +
+

Scrape MolEvolvR logs and calculate median processes

+
+ +
+

Usage

+
calculateProcessRuntime(dir_job_results)
+
+ +
+

Arguments

+ + +
dir_job_results
+

chr path to MolEvolvR job_results +directory

+ +
+
+

Value

+

list names: processes; values: median runtime (seconds)

+

see molevol_scripts/R/metrics.R for info on functions called here

+

examples:

  1. +

dir_job_results <- "/data/scratch/janani/molevolvr_out" +list_proc_medians <- calculateProcessRuntime(dir_job_results)

  1. from outside container environment +common_root <- "/data/molevolvr_transfer/molevolvr_dev" +dir_job_results <- "/data/molevolvr_transfer/molevolvr_dev/job_results" +list_proc_medians <- calculateProcessRuntime(dir_job_results)

  2. +
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/reference/cleanClusterFile.html b/reference/cleanClusterFile.html new file mode 100644 index 00000000..5675ab59 --- /dev/null +++ b/reference/cleanClusterFile.html @@ -0,0 +1,95 @@ + +Clean Cluster File — cleanClusterFile • MolEvolvR + Skip to contents + + +
+
+
+ +
+

Reads and cleans a cluster file

+

This function reads a space-separated cluster file and converts it to a cleaned up data frame.

+
+ +
+

Usage

+
cleanClusterFile(path, writepath = NULL, query)
+
+ +
+

Arguments

+ + +
path
+

A character to the path of the cluster file to be cleaned

+ + +
writepath
+

A character designating where the tsv file of the cleaned cluster file will be written to. If value is NULL no +file is written. Default NULL

+ + +
query
+

A character identifying the query of the file.

+ +
+
+

Value

+

The cleaned up cluster data frame is returned and a tsv file is written if the "writepath" parameter is used.

+
+ +
+

Examples

+
if (FALSE) { # \dontrun{
+cleanClusterFile("data/pspa.op_ins_cls", writepath = NULL, query = "pspa")
+} # }
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/reference/combineFiles.html b/reference/combineFiles.html new file mode 100644 index 00000000..18004e13 --- /dev/null +++ b/reference/combineFiles.html @@ -0,0 +1,91 @@ + +Download the combined assembly summaries of genbank and refseq — combineFiles • MolEvolvR + Skip to contents + + +
+
+
+ +
+

Download the combined assembly summaries of genbank and refseq

+
+ +
+

Usage

+
combineFiles(
+  inpath = c("../molevol_data/project_data/phage_defense/"),
+  pattern = "*full_analysis.tsv",
+  delim = "\t",
+  skip = 0,
+  col_names = T
+)
+
+ +
+

Arguments

+ + +
inpath
+

String of 'master' path where the files reside (recursive=T)

+ + +
pattern
+

Character vector containing search pattern for files

+ + +
col_names
+

Takes logical T/F arguments OR column names vector; +usage similar to col_names parameter in readr::read_delim

+ +
+
+

Author

+

Janani Ravi

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/reference/combineFullAnalysis.html b/reference/combineFullAnalysis.html new file mode 100644 index 00000000..dbf1f748 --- /dev/null +++ b/reference/combineFullAnalysis.html @@ -0,0 +1,72 @@ + +Combining full_analysis files — combineFullAnalysis • MolEvolvR + Skip to contents + + +
+
+
+ +
+

Combining full_analysis files

+
+ +
+

Usage

+
combineFullAnalysis(inpath, ret = FALSE)
+
+ +
+

Arguments

+ + +
ret
+
+ +
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/reference/combineIPR.html b/reference/combineIPR.html new file mode 100644 index 00000000..9383b989 --- /dev/null +++ b/reference/combineIPR.html @@ -0,0 +1,72 @@ + +Combining clean ipr files — combineIPR • MolEvolvR + Skip to contents + + +
+
+
+ +
+

Combining clean ipr files

+
+ +
+

Usage

+
combineIPR(inpath, ret = FALSE)
+
+ +
+

Arguments

+ + +
ret
+
+ +
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/reference/createLineageLookup.html b/reference/createLineageLookup.html new file mode 100644 index 00000000..8a60121c --- /dev/null +++ b/reference/createLineageLookup.html @@ -0,0 +1,93 @@ + +createLineageLookup — createLineageLookup • MolEvolvR + Skip to contents + + +
+
+
+ +
+

Create a look up table that goes from TaxID, to Lineage

+
+ +
+

Usage

+
createLineageLookup(
+  lineage_file = here("data/rankedlineage.dmp"),
+  outfile,
+  taxonomic_rank = "phylum"
+)
+
+ +
+

Arguments

+ + +
lineage_file
+

Path to the rankedlineage.dmp file containing taxid's and their +corresponding taxonomic rank. rankedlineage.dmp can be downloaded at +https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/

+ + +
outfile
+

File the resulting lineage lookup table should be written to

+ + +
taxonomic_rank
+

The upperbound of taxonomic rank that the lineage includes. The lineaege will +include superkingdom>...>taxonomic_rank. +Choices include: "supperkingdom", "phylum", "class","order", "family", +"genus", and "species"

+ +
+
+

Author

+

Samuel Chen

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/reference/getProcessRuntimeWeights.html b/reference/getProcessRuntimeWeights.html new file mode 100644 index 00000000..d8b50bf8 --- /dev/null +++ b/reference/getProcessRuntimeWeights.html @@ -0,0 +1,78 @@ + +getProcessRuntimeWeights — getProcessRuntimeWeights • MolEvolvR + Skip to contents + + +
+
+
+ +
+

Quickly get the runtime weights for MolEvolvR backend processes

+
+ +
+

Usage

+
getProcessRuntimeWeights(medians_yml_path = NULL)
+
+ +
+

Arguments

+ + +
dir_job_results
+

chr path to MolEvolvR job_results +directory

+ +
+
+

Value

+

list names: processes; values: median runtime (seconds)

+

example: writeProcessRuntime2YML()

+
+ +
+ + +
+ + + + + + + diff --git a/reference/index.html b/reference/index.html index 49b326ac..5e35767b 100644 --- a/reference/index.html +++ b/reference/index.html @@ -109,22 +109,34 @@

All functionsadvanced_opts2est_walltime() + alignFasta() -
Given MolEvolvR advanced options and number of inputs, calculate the total estimated walltime for the job
+
alignFasta
- alignFasta() + assignJobQueue()
-
alignFasta
+
assignJobQueue
- assign_job_queue() + calculateEstimatedWallTimeFromOpts()
-
Decision function to assign job queue
+
calculateEstimatedWallTimeFromOpts
+
+ + calculateProcessRuntime() + +
+
calculateProcessRuntime
+
+ + cleanClusterFile() + +
+
Clean Cluster File
cleanClusters() @@ -175,25 +187,19 @@

All functionsclean_clust_file() - -

-
Clean Cluster File
-
- - combine_files() + combineFiles()
Download the combined assembly summaries of genbank and refseq
- combine_full() + combineFullAnalysis()
Combining full_analysis files
- combine_ipr() + combineIPR()
Combining clean ipr files
@@ -265,6 +271,12 @@

All functionscreateLineageLookup() + + +
createLineageLookup
+

+ createRepresentativeAccNum()
@@ -283,12 +295,6 @@

All functionscreate_lineage_lookup() - - -
Create a look up table that goes from TaxID, to Lineage
-

- domain_network()
@@ -391,46 +397,46 @@

All functionsgetTopAccByLinDomArch() + getProcessRuntimeWeights() -
getTopAccByLinDomArch
+
getProcessRuntimeWeights

- get_proc_medians() + getTopAccByLinDomArch()
-
Scrape MolEvolvR logs and calculate median processes
+
getTopAccByLinDomArch
- get_proc_weights() + mapAcc2Name()
-
Quickly get the runtime weights for MolEvolvR backend processes
+
mapAcc2Name
- make_opts2procs() + mapAdvOption2Process()
-
Construct list where names (MolEvolvR advanced options) point to processes
+
mapAdvOption2Process
- mapAcc2Name() + mapOption2Process()
-
mapAcc2Name
+
mapOption2Process
- map_advanced_opts2procs() + msa_pdf()
-
Use MolEvolvR advanced options to get associated processes
+
Multiple Sequence Alignment
- msa_pdf() + plotEstimatedWallTimes()
-
Multiple Sequence Alignment
+
plotEstimatedWallTimes
plotIPR2Viz() @@ -499,12 +505,6 @@

All functionsplot_estimated_walltimes() - -

-
Plot the estimated runtimes for different advanced options and number of inputs
-
- prepareColumnParams()
@@ -655,16 +655,16 @@

All functionswrite_proc_medians_table() + writeProcessRuntime2TSV() -
Write a table of 2 columns: 1) process and 2) median seconds
+
writeProcessRuntime2TSV

- write_proc_medians_yml() + writeProcessRuntime2YML()
-
Compute median process runtimes, then write a YAML list of the processes and their median runtimes in seconds to the path specified by 'filepath'.
+
writeProcessRuntime2YML
diff --git a/reference/mapAdvOption2Process.html b/reference/mapAdvOption2Process.html new file mode 100644 index 00000000..c44f949e --- /dev/null +++ b/reference/mapAdvOption2Process.html @@ -0,0 +1,80 @@ + +mapAdvOption2Process — mapAdvOption2Process • MolEvolvR + Skip to contents + + +
+
+
+ +
+

Use MolEvolvR advanced options to get associated processes

+
+ +
+

Usage

+
mapAdvOption2Process(advanced_opts)
+
+ +
+

Arguments

+ + +
advanced_opts
+

character vector of MolEvolvR advanced options

+ +
+
+

Value

+

character vector of process names that will execute given +the advanced options

+

example: +advanced_opts <- c("homology_search", "domain_architecture") +procs <- mapAdvOption2Process(advanced_opts)

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/reference/mapOption2Process.html b/reference/mapOption2Process.html new file mode 100644 index 00000000..eb15f59e --- /dev/null +++ b/reference/mapOption2Process.html @@ -0,0 +1,69 @@ + +mapOption2Process — mapOption2Process • MolEvolvR + Skip to contents + + +
+
+
+ +
+

Construct list where names (MolEvolvR advanced options) point to processes

+
+ +
+

Usage

+
mapOption2Process()
+
+ +
+

Value

+

list where names (MolEvolvR advanced options) point to processes

+

example: list_opts2procs <- mapOption2Process

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/reference/plotEstimatedWallTimes.html b/reference/plotEstimatedWallTimes.html new file mode 100644 index 00000000..a773bc50 --- /dev/null +++ b/reference/plotEstimatedWallTimes.html @@ -0,0 +1,78 @@ + +plotEstimatedWallTimes — plotEstimatedWallTimes • MolEvolvR + Skip to contents + + +
+
+
+ +
+

Plot the estimated runtimes for different advanced options and number +of inputs

+

this function was just for fun; very, very messy code

+
+ +
+

Usage

+
plotEstimatedWallTimes()
+
+ +
+

Value

+

line plot object

+

example: +p <- plotEstimatedWallTimes() +ggplot2::ggsave(filename = "/data/molevolvr_transfer/molevolvr_ +dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)

+
+ +
+ + +
+ + + + + + + diff --git a/reference/plotSunburst.html b/reference/plotSunburst.html index e75688ed..efcc1ec9 100644 --- a/reference/plotSunburst.html +++ b/reference/plotSunburst.html @@ -85,21 +85,21 @@

Examples# sunburst plot plotSunburst(starwars_count)
- + # fill by group size plotSunburst(starwars_count, fill_by_n = TRUE)
- + # treemap plot, ordered by group size plotTreemap(starwars_count, sort_by_n = TRUE)
- + # display al charchaters by homeworld starwars
%>% count(homeworld, name) %>% plotTreemap(sort_by_n = TRUE)
- +