From 346311e927e2aef48252553e34404b9b03db9780 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Tue, 10 Dec 2024 14:14:02 -0500 Subject: [PATCH 1/9] Limit iridanext output to fastmatch files --- conf/iridanext.config | 5 ----- 1 file changed, 5 deletions(-) diff --git a/conf/iridanext.config b/conf/iridanext.config index d5d6156..3299e55 100644 --- a/conf/iridanext.config +++ b/conf/iridanext.config @@ -7,11 +7,6 @@ iridanext { files { idkey = "irida_id" global = [ - "**/ArborView/arborview.clustered_data_arborview.html", - "**/clusters/gas.mcluster.clusters.text", - "**/clusters/gas.mcluster.run.json", - "**/clusters/gas.mcluster.thresholds.json", - "**/clusters/gas.mcluster.tree.nwk", "**/distances/profile_dists.allele_map.json", "**/distances/profile_dists.query_profile.text", "**/distances/profile_dists.ref_profile.text", From 1efe47cf3e1e3224f0e0aa3605a4f8239a4586c4 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Tue, 10 Dec 2024 14:54:03 -0500 Subject: [PATCH 2/9] First attempt to modify IRIDA UI --- nextflow.config | 9 +++---- nextflow_schema.json | 59 ++++++++++++++++++++------------------------ 2 files changed, 31 insertions(+), 37 deletions(-) diff --git a/nextflow.config b/nextflow.config index 45cf222..af4f488 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,6 +43,10 @@ params { validationShowHiddenParams = false validate_params = true + // FastMatch + fastmatch_category = null + threshold = 1 + // Profile dists args pd_outfmt = "matrix" pd_distm = "hamming" @@ -54,11 +58,6 @@ params { pd_columns = null pd_count_missing = false - // GAS Cluster - gm_thresholds = "10,5,0" - gm_method = "average" - gm_delimiter = "." - // Metadata metadata_1_header = "metadata_1" metadata_2_header = "metadata_2" diff --git a/nextflow_schema.json b/nextflow_schema.json index 10bf5d5..251cffa 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -45,6 +45,13 @@ "description": "The column header names of the metadata columns.", "default": "", "properties": { + "fastmatch_category": { + "type": "string", + "errorMessage": "Has to be either query or reference", + "description": "Identify whether a sample is query or reference", + "fa_icon": "far fa-sticky-note", + "enum": ["query", "reference"] + }, "metadata_1_header": { "type": "string", "default": "metadata_1", @@ -102,7 +109,21 @@ "pattern": "^[^\\n\\t\"]+$" } }, - "fa_icon": "far fa-clipboard" + "fa_icon": "far fa-clipboard", + "required": ["fastmatch_category"] + }, + "fastmatch": { + "title": "FastMatch", + "type": "object", + "description": "Parameters for FastMatch", + "default": "", + "properties": { + "threshold": { + "type": "integer", + "description": "The output format for distances", + "default": 1 + } + } }, "profile_dists": { "title": "Profile Dists", @@ -128,14 +149,14 @@ "description": "The maximum proportion of missing data per locus for a locus to be kept in the analysis", "minimum": 0, "maximum": 1, - "default": 1 + "default": 1.0 }, "pd_sample_quality_threshold": { "type": "number", "description": "The maximum proportion of missing data per sample for a sample to be kept in the analysis", "minimum": 0, "maximum": 1, - "default": 1 + "default": 1.0 }, "pd_file_type": { "type": "string", @@ -168,32 +189,6 @@ } } }, - "gas_cluster": { - "title": "GAS Cluster", - "type": "object", - "description": "", - "default": "Parameters for GAS mcluster", - "properties": { - "gm_thresholds": { - "type": "string", - "default": "10,5,0", - "description": "Thresholds delimited by ','. Values should match units from '--pd_distm' (either 'hamming' or 'scaled').", - "pattern": "^(\\d+(\\.\\d+)?,)*\\d+(\\.\\d+)?$" - }, - "gm_method": { - "type": "string", - "default": "average", - "description": "Clustering linkage method.", - "enum": ["single", "average", "complete"] - }, - "gm_delimiter": { - "type": "string", - "default": ".", - "description": "Delimiter desired for nomenclature code. Must be alphanumeric or one of [._-].", - "pattern": "^[A-Fa-f0-9\\._-]+$" - } - } - }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -353,13 +348,13 @@ "$ref": "#/definitions/input_output_options" }, { - "$ref": "#/definitions/metadata" + "$ref": "#/definitions/fastmatch" }, { - "$ref": "#/definitions/profile_dists" + "$ref": "#/definitions/metadata" }, { - "$ref": "#/definitions/gas_cluster" + "$ref": "#/definitions/profile_dists" }, { "$ref": "#/definitions/institutional_config_options" From aa4e0bd080061ddb829f4a15cda6b1fb5edb12f8 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Tue, 10 Dec 2024 16:15:23 -0500 Subject: [PATCH 3/9] Modified the UI to include query reference --- assets/samplesheet.csv | 8 +- assets/schema_input.json | 10 +- nextflow.config | 1 - nextflow_schema.json | 12 +-- .../samplesheet-addsamplename.csv | 8 +- .../data/samplesheets/samplesheet-hamming.csv | 8 +- .../samplesheets/samplesheet-hash-missing.csv | 8 +- .../samplesheet-hash-more-missing.csv | 8 +- .../samplesheet-little-metadata.csv | 8 +- .../samplesheet-mismatched-ids.csv | 8 +- .../samplesheets/samplesheet-no-metadata.csv | 8 +- .../samplesheet-partial-mismatched-ids.csv | 8 +- tests/data/samplesheets/samplesheet-tabs.csv | 8 +- tests/data/samplesheets/samplesheet1.csv | 8 +- tests/pipelines/main.nf.test | 20 ---- tests/pipelines/main_gm_thresholds.nf.test | 94 ------------------- workflows/fastmatchirida.nf | 17 +--- 17 files changed, 56 insertions(+), 186 deletions(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 82842ce..410a602 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,4 +1,4 @@ -sample,mlst_alleles -sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json -sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json -sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json +sample,fastmatch_category,mlst_alleles +sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json +sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json +sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json diff --git a/assets/schema_input.json b/assets/schema_input.json index 1c141a0..3872a6d 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -25,6 +25,14 @@ "pattern": "^\\S+\\.mlst(\\.subtyping)?\\.json(\\.gz)?$", "errorMessage": "MLST JSON file from locidex report, cannot contain spaces and must have the extension: '.mlst.json', '.mlst.json.gz', '.mlst.subtyping.json', or 'mlst.subtyping.json.gz'" }, + "fastmatch_category": { + "type": "string", + "meta": ["ref_query"], + "errorMessage": "Has to be either query or reference", + "description": "Identify whether a sample is query or reference", + "fa_icon": "far fa-sticky-note", + "enum": ["query", "reference"] + }, "metadata_1": { "type": "string", "meta": ["metadata_1"], @@ -82,6 +90,6 @@ "pattern": "^[^\\n\\t\"]+$" } }, - "required": ["sample", "mlst_alleles"] + "required": ["sample","fastmatch_category", "mlst_alleles"] } } diff --git a/nextflow.config b/nextflow.config index af4f488..bf7ba44 100644 --- a/nextflow.config +++ b/nextflow.config @@ -44,7 +44,6 @@ params { validate_params = true // FastMatch - fastmatch_category = null threshold = 1 // Profile dists args diff --git a/nextflow_schema.json b/nextflow_schema.json index 251cffa..a7131b8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -45,13 +45,6 @@ "description": "The column header names of the metadata columns.", "default": "", "properties": { - "fastmatch_category": { - "type": "string", - "errorMessage": "Has to be either query or reference", - "description": "Identify whether a sample is query or reference", - "fa_icon": "far fa-sticky-note", - "enum": ["query", "reference"] - }, "metadata_1_header": { "type": "string", "default": "metadata_1", @@ -109,8 +102,7 @@ "pattern": "^[^\\n\\t\"]+$" } }, - "fa_icon": "far fa-clipboard", - "required": ["fastmatch_category"] + "fa_icon": "far fa-clipboard" }, "fastmatch": { "title": "FastMatch", @@ -120,7 +112,7 @@ "properties": { "threshold": { "type": "integer", - "description": "The output format for distances", + "description": "Comparison score threshold value", "default": 1 } } diff --git a/tests/data/samplesheets/samplesheet-addsamplename.csv b/tests/data/samplesheets/samplesheet-addsamplename.csv index a1b785a..48cc046 100644 --- a/tests/data/samplesheets/samplesheet-addsamplename.csv +++ b/tests/data/samplesheets/samplesheet-addsamplename.csv @@ -1,4 +1,4 @@ -sample,sample_name,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 -sample1,S 1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 -sample2,S2#,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 -sample3,S2_,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 +sample,fastmatch_category,sample_name,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 +sample1,query,S 1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 +sample2,query,S2#,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 +sample3,reference,S2_,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 diff --git a/tests/data/samplesheets/samplesheet-hamming.csv b/tests/data/samplesheets/samplesheet-hamming.csv index d18a69c..0e03c71 100644 --- a/tests/data/samplesheets/samplesheet-hamming.csv +++ b/tests/data/samplesheets/samplesheet-hamming.csv @@ -1,4 +1,4 @@ -sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 -sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hamming/sample1.mlst.subtyping.json,,,,,,,, -sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hamming/sample2.mlst.subtyping.json,,,,,,,, -sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hamming/sample3.mlst.subtyping.json,,,,,,,, +sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 +sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hamming/sample1.mlst.subtyping.json,,,,,,,, +sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hamming/sample2.mlst.subtyping.json,,,,,,,, +sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hamming/sample3.mlst.subtyping.json,,,,,,,, diff --git a/tests/data/samplesheets/samplesheet-hash-missing.csv b/tests/data/samplesheets/samplesheet-hash-missing.csv index 9355c3d..d06d53a 100644 --- a/tests/data/samplesheets/samplesheet-hash-missing.csv +++ b/tests/data/samplesheets/samplesheet-hash-missing.csv @@ -1,4 +1,4 @@ -sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 -sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample1.mlst.subtyping.json,,,,,,,, -sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample2.mlst.subtyping.json,,,,,,,, -sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample3.mlst.subtyping.json,,,,,,,, +sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 +sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample1.mlst.subtyping.json,,,,,,,, +sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample2.mlst.subtyping.json,,,,,,,, +sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample3.mlst.subtyping.json,,,,,,,, diff --git a/tests/data/samplesheets/samplesheet-hash-more-missing.csv b/tests/data/samplesheets/samplesheet-hash-more-missing.csv index 4ee53c9..5c4a4b4 100644 --- a/tests/data/samplesheets/samplesheet-hash-more-missing.csv +++ b/tests/data/samplesheets/samplesheet-hash-more-missing.csv @@ -1,4 +1,4 @@ -sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 -sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample1.mlst.subtyping.json,,,,,,,, -sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample2.mlst.subtyping.json,,,,,,,, -sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample3-more-missing.mlst.subtyping.json,,,,,,,, +sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 +sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample1.mlst.subtyping.json,,,,,,,, +sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample2.mlst.subtyping.json,,,,,,,, +sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample3-more-missing.mlst.subtyping.json,,,,,,,, diff --git a/tests/data/samplesheets/samplesheet-little-metadata.csv b/tests/data/samplesheets/samplesheet-little-metadata.csv index 3e721de..138469e 100644 --- a/tests/data/samplesheets/samplesheet-little-metadata.csv +++ b/tests/data/samplesheets/samplesheet-little-metadata.csv @@ -1,4 +1,4 @@ -sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 -sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,,,,1.4,,,, -sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,,,,,,,, -sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,,,,,,3.8 +sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 +sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,,,,1.4,,,, +sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,,,,,,,, +sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,,,,,,3.8 diff --git a/tests/data/samplesheets/samplesheet-mismatched-ids.csv b/tests/data/samplesheets/samplesheet-mismatched-ids.csv index 632768d..ffef4ca 100644 --- a/tests/data/samplesheets/samplesheet-mismatched-ids.csv +++ b/tests/data/samplesheets/samplesheet-mismatched-ids.csv @@ -1,4 +1,4 @@ -sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 -sampleA,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 -sampleB,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 -sampleC,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 +sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 +sampleA,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 +sampleB,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 +sampleC,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 diff --git a/tests/data/samplesheets/samplesheet-no-metadata.csv b/tests/data/samplesheets/samplesheet-no-metadata.csv index 9d67864..f752374 100644 --- a/tests/data/samplesheets/samplesheet-no-metadata.csv +++ b/tests/data/samplesheets/samplesheet-no-metadata.csv @@ -1,4 +1,4 @@ -sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 -sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,,,,,,,, -sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,,,,,,,, -sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,,,,,,,, +sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 +sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,,,,,,,, +sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,,,,,,,, +sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,,,,,,,, diff --git a/tests/data/samplesheets/samplesheet-partial-mismatched-ids.csv b/tests/data/samplesheets/samplesheet-partial-mismatched-ids.csv index d5d42f0..ab5abab 100644 --- a/tests/data/samplesheets/samplesheet-partial-mismatched-ids.csv +++ b/tests/data/samplesheets/samplesheet-partial-mismatched-ids.csv @@ -1,4 +1,4 @@ -sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 -sampleA,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 -sampleB,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 -sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 +sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 +sampleA,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 +sampleB,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 +sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 diff --git a/tests/data/samplesheets/samplesheet-tabs.csv b/tests/data/samplesheets/samplesheet-tabs.csv index 56b4243..b863db5 100644 --- a/tests/data/samplesheets/samplesheet-tabs.csv +++ b/tests/data/samplesheets/samplesheet-tabs.csv @@ -1,4 +1,4 @@ -sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 -sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,a b,,,,,,, -sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,,,,a b,,,, -sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,,,,,,,,a b +sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 +sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,a b,,,,,,, +sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,,,,a b,,,, +sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,,,,,,,,a b diff --git a/tests/data/samplesheets/samplesheet1.csv b/tests/data/samplesheets/samplesheet1.csv index 3200344..55cbc2d 100644 --- a/tests/data/samplesheets/samplesheet1.csv +++ b/tests/data/samplesheets/samplesheet1.csv @@ -1,4 +1,4 @@ -sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 -sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 -sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 -sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 +sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 +sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 +sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 +sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 diff --git a/tests/pipelines/main.nf.test b/tests/pipelines/main.nf.test index 14665d5..f960a2b 100644 --- a/tests/pipelines/main.nf.test +++ b/tests/pipelines/main.nf.test @@ -12,7 +12,6 @@ nextflow_pipeline { outdir = "results" pd_distm = "scaled" - gm_thresholds = "50,20,0" metadata_1_header = "myheader_1" metadata_2_header = "myheader_2" @@ -59,7 +58,6 @@ nextflow_pipeline { input = "$baseDir/tests/data/samplesheets/samplesheet-hamming.csv" outdir = "results" - gm_thresholds = "2,1,0" } } @@ -188,24 +186,6 @@ nextflow_pipeline { } } - test("Test fail pipeline if invalid delimiter set") { - tag "pipeline_failure_invalid_delimiter" - - when { - params { - input = "$baseDir/tests/data/samplesheets/samplesheet-hamming.csv" - outdir = "results" - - gm_delimiter = ';' - } - } - - then { - assert workflow.failed - assert workflow.stderr.contains('* --gm_delimiter: string [;] does not match pattern ^[A-Fa-f0-9\\._-]+$ (;)') - } - } - test("Full pipeline with no metadata") { tag "pipeline_no_metadata" diff --git a/tests/pipelines/main_gm_thresholds.nf.test b/tests/pipelines/main_gm_thresholds.nf.test index f72b365..aa90ecf 100644 --- a/tests/pipelines/main_gm_thresholds.nf.test +++ b/tests/pipelines/main_gm_thresholds.nf.test @@ -3,100 +3,6 @@ nextflow_pipeline { name "Integration Tests of adjusting gm_thresholds parameter for clustering" script "main.nf" - test("Test fail pipeline if null threshold set") { - tag "pipeline_failure_null_threshold" - - when { - params { - input = "$baseDir/tests/data/samplesheets/samplesheet-hamming.csv" - outdir = "results" - - gm_thresholds = null - } - } - - then { - assert workflow.failed - assert workflow.stdout.contains("ERROR ~ --gm_thresholds null: Cannot pass null or empty string") - } - } - - test("Test fail pipeline if empty threshold set") { - tag "pipeline_failure_no_threshold" - - when { - params { - input = "$baseDir/tests/data/samplesheets/samplesheet-hamming.csv" - outdir = "results" - - gm_thresholds = "" - } - } - - then { - assert workflow.failed - assert workflow.stdout.contains("ERROR ~ --gm_thresholds : Cannot pass null or empty string") - } - } - - test("Test fail pipeline if negative threshold set") { - tag "pipeline_failure_negative_threshold" - - when { - params { - input = "$baseDir/tests/data/samplesheets/samplesheet-hamming.csv" - outdir = "results" - - gm_thresholds = "-1" - } - } - - then { - assert workflow.failed - assert workflow.stderr.contains('* --gm_thresholds: string [-1] does not match pattern ^(\\d+(\\.\\d+)?,)*\\d+(\\.\\d+)?$ (-1)') - } - } - - test("Test fail pipeline if mismatch between thresholds and scaled distm") { - tag "pipeline_failure_threshold_scaled" - - when { - params { - input = "$baseDir/tests/data/samplesheets/samplesheet-hamming.csv" - outdir = "results" - - gm_thresholds = "200,50" - pd_distm = "scaled" - } - } - - then { - assert workflow.failed - assert workflow.stdout.contains("ERROR ~ '--pd_distm scaled' is set, but '--gm_thresholds 200,50' contains thresholds outside of range [0, 100]." - + " Please either set '--pd_distm hamming' or adjust the threshold values.") - } - } - - test("Test fail pipeline if mismatch between thresholds and hamming distm") { - tag "pipeline_failure_threshold_hamming" - - when { - params { - input = "$baseDir/tests/data/samplesheets/samplesheet-hamming.csv" - outdir = "results" - - gm_thresholds = "2,0.5" - pd_distm = "hamming" - } - } - - then { - assert workflow.failed - assert workflow.stdout.contains("ERROR ~ '--pd_distm hamming' is set, but '--gm_thresholds 2,0.5' contains fractions." - + " Please either set '--pd_distm scaled' or remove fractions from distance thresholds.") - } - } - test("Test pipeline with single threshold set to 0") { tag "pipeline_thresh_0" diff --git a/workflows/fastmatchirida.nf b/workflows/fastmatchirida.nf index 82c3a14..c9e7bff 100644 --- a/workflows/fastmatchirida.nf +++ b/workflows/fastmatchirida.nf @@ -127,22 +127,7 @@ workflow FASTMATCH { exit 1, "--pd_columns ${params.pd_columns}: Does not exist but was passed to the pipeline. Exiting now." } - if(params.gm_thresholds == null || params.gm_thresholds == ""){ - exit 1, "--gm_thresholds ${params.gm_thresholds}: Cannot pass null or empty string" - } - - gm_thresholds_list = params.gm_thresholds.toString().split(',') - if (params.pd_distm == 'hamming') { - if (gm_thresholds_list.any { it != null && it.contains('.') }) { - exit 1, ("'--pd_distm ${params.pd_distm}' is set, but '--gm_thresholds ${params.gm_thresholds}' contains fractions." - + " Please either set '--pd_distm scaled' or remove fractions from distance thresholds.") - } - } else if (params.pd_distm == 'scaled') { - if (gm_thresholds_list.any { it != null && (it as Float < 0.0 || it as Float > 100.0) }) { - exit 1, ("'--pd_distm ${params.pd_distm}' is set, but '--gm_thresholds ${params.gm_thresholds}' contains thresholds outside of range [0, 100]." - + " Please either set '--pd_distm hamming' or adjust the threshold values.") - } - } else { + if ((params.pd_distm != 'hamming') & (params.pd_distm != 'scaled')) { exit 1, "'--pd_distm ${params.pd_distm}' is an invalid value. Please set to either 'hamming' or 'scaled'." } From dc91ff45a7963ec6fc93d2bf2d506c3a6affcb0d Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Tue, 10 Dec 2024 16:21:34 -0500 Subject: [PATCH 4/9] Forgot to check prettier --- assets/schema_input.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 3872a6d..92d1399 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -90,6 +90,6 @@ "pattern": "^[^\\n\\t\"]+$" } }, - "required": ["sample","fastmatch_category", "mlst_alleles"] + "required": ["sample", "fastmatch_category", "mlst_alleles"] } } From 6448399acc998b09b34d2f6de4d0a1564ef89729 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Wed, 11 Dec 2024 10:51:51 -0500 Subject: [PATCH 5/9] Modification to UI --- assets/schema_input.json | 3 ++- nextflow.config | 2 +- nextflow_schema.json | 8 ++++---- workflows/fastmatchirida.nf | 1 - 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 92d1399..c318bdb 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -31,7 +31,8 @@ "errorMessage": "Has to be either query or reference", "description": "Identify whether a sample is query or reference", "fa_icon": "far fa-sticky-note", - "enum": ["query", "reference"] + "enum": ["query", "reference"], + "default": true }, "metadata_1": { "type": "string", diff --git a/nextflow.config b/nextflow.config index bf7ba44..bfda17f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -44,7 +44,7 @@ params { validate_params = true // FastMatch - threshold = 1 + threshold = 1.0 // Profile dists args pd_outfmt = "matrix" diff --git a/nextflow_schema.json b/nextflow_schema.json index a7131b8..2a07979 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -111,9 +111,9 @@ "default": "", "properties": { "threshold": { - "type": "integer", + "type": "number", "description": "Comparison score threshold value", - "default": 1 + "default": 1.0 } } }, @@ -141,14 +141,14 @@ "description": "The maximum proportion of missing data per locus for a locus to be kept in the analysis", "minimum": 0, "maximum": 1, - "default": 1.0 + "default": 1 }, "pd_sample_quality_threshold": { "type": "number", "description": "The maximum proportion of missing data per sample for a sample to be kept in the analysis", "minimum": 0, "maximum": 1, - "default": 1.0 + "default": 1 }, "pd_file_type": { "type": "string", diff --git a/workflows/fastmatchirida.nf b/workflows/fastmatchirida.nf index c9e7bff..b601bc0 100644 --- a/workflows/fastmatchirida.nf +++ b/workflows/fastmatchirida.nf @@ -66,7 +66,6 @@ def prepareFilePath(String filep){ workflow FASTMATCH { SAMPLE_HEADER = "sample" ch_versions = Channel.empty() - // Track processed IDs def processedIDs = [] as Set From b12fb5c244379fa4613bc26b4db4cf27d0e327bb Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Wed, 11 Dec 2024 13:36:37 -0500 Subject: [PATCH 6/9] Convert blank column entries of fastmatch_category to reference --- assets/schema_input.json | 5 ++--- workflows/fastmatchirida.nf | 9 ++++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index c318bdb..25cdb85 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -31,8 +31,7 @@ "errorMessage": "Has to be either query or reference", "description": "Identify whether a sample is query or reference", "fa_icon": "far fa-sticky-note", - "enum": ["query", "reference"], - "default": true + "enum": ["query", "reference"] }, "metadata_1": { "type": "string", @@ -91,6 +90,6 @@ "pattern": "^[^\\n\\t\"]+$" } }, - "required": ["sample", "fastmatch_category", "mlst_alleles"] + "required": ["sample", "mlst_alleles"] } } diff --git a/workflows/fastmatchirida.nf b/workflows/fastmatchirida.nf index b601bc0..2a0a8a1 100644 --- a/workflows/fastmatchirida.nf +++ b/workflows/fastmatchirida.nf @@ -71,7 +71,7 @@ workflow FASTMATCH { // Create a new channel of metadata from a sample sheet // NB: `input` corresponds to `params.input` and associated sample sheet schema - input = Channel.fromSamplesheet("input") + input = Channel.fromSamplesheet("input").view() // and remove non-alphanumeric characters in sample_names (meta.id), whilst also correcting for duplicate sample_names (meta.id) .map { meta, mlst_file -> if (!meta.id) { @@ -86,8 +86,11 @@ workflow FASTMATCH { } // Add the ID to the set of processed IDs processedIDs << meta.id - - tuple(meta, mlst_file)} + // If the fastmatch_category is blank make the default "reference" + if (!meta.ref_query) { + meta.ref_query = "reference" + } + tuple(meta, mlst_file)}.view() // Make sure the ID in samplesheet / meta.id is the same ID // as the corresponding MLST JSON file: input_assure = INPUT_ASSURE(input) From fa831f7e745dca4d81adb85670ad57cbfc02154b Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Wed, 11 Dec 2024 13:59:07 -0500 Subject: [PATCH 7/9] Make reference the default if left blank and fix drop down menu in IRIDIA --- assets/schema_input.json | 1 - workflows/fastmatchirida.nf | 10 ++++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 25cdb85..651d179 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -27,7 +27,6 @@ }, "fastmatch_category": { "type": "string", - "meta": ["ref_query"], "errorMessage": "Has to be either query or reference", "description": "Identify whether a sample is query or reference", "fa_icon": "far fa-sticky-note", diff --git a/workflows/fastmatchirida.nf b/workflows/fastmatchirida.nf index 2a0a8a1..05c2698 100644 --- a/workflows/fastmatchirida.nf +++ b/workflows/fastmatchirida.nf @@ -71,9 +71,9 @@ workflow FASTMATCH { // Create a new channel of metadata from a sample sheet // NB: `input` corresponds to `params.input` and associated sample sheet schema - input = Channel.fromSamplesheet("input").view() + input = Channel.fromSamplesheet("input") // and remove non-alphanumeric characters in sample_names (meta.id), whilst also correcting for duplicate sample_names (meta.id) - .map { meta, mlst_file -> + .map { meta, mlst_file, ref_query -> if (!meta.id) { meta.id = meta.irida_id } else { @@ -87,10 +87,12 @@ workflow FASTMATCH { // Add the ID to the set of processed IDs processedIDs << meta.id // If the fastmatch_category is blank make the default "reference" - if (!meta.ref_query) { + if (!ref_query) { meta.ref_query = "reference" + } else { + meta.ref_query = ref_query } - tuple(meta, mlst_file)}.view() + tuple(meta, mlst_file)} // Make sure the ID in samplesheet / meta.id is the same ID // as the corresponding MLST JSON file: input_assure = INPUT_ASSURE(input) From 8de39dd016eb43d0050383d5744a711539e1b5b4 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Thu, 12 Dec 2024 09:13:03 -0500 Subject: [PATCH 8/9] Set minimum for threshold to 0 --- nextflow_schema.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 2a07979..432c31c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -113,7 +113,8 @@ "threshold": { "type": "number", "description": "Comparison score threshold value", - "default": 1.0 + "default": 1.0, + "minimum": 0 } } }, From a70d03039ae8400c205711b74878cd8fb327fb09 Mon Sep 17 00:00:00 2001 From: Steven Sutcliffe Date: Thu, 12 Dec 2024 11:08:01 -0500 Subject: [PATCH 9/9] Check scaled values in range between 0-100 --- workflows/fastmatchirida.nf | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/workflows/fastmatchirida.nf b/workflows/fastmatchirida.nf index 05c2698..122a4e2 100644 --- a/workflows/fastmatchirida.nf +++ b/workflows/fastmatchirida.nf @@ -131,10 +131,18 @@ workflow FASTMATCH { exit 1, "--pd_columns ${params.pd_columns}: Does not exist but was passed to the pipeline. Exiting now." } + // Check that only 'hamming' or 'scaled' are provided to pd_distm if ((params.pd_distm != 'hamming') & (params.pd_distm != 'scaled')) { exit 1, "'--pd_distm ${params.pd_distm}' is an invalid value. Please set to either 'hamming' or 'scaled'." } + // Check that when using scaled the threshold exists between 0-100 + if (params.pd_distm == 'scaled') { + if ((params.threshold < 0.0) || (params.threshold > 100.0)) { + exit 1, ("'--pd_distm ${params.pd_distm}' is set, but '--threshold ${params.threshold}' contains thresholds outside of range [0, 100]." + + " Please either set '--threshold' or adjust the threshold values.") + } + } // Options related to profile dists mapping_format = Channel.value(params.pd_outfmt)