From 346311e927e2aef48252553e34404b9b03db9780 Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Tue, 10 Dec 2024 14:14:02 -0500
Subject: [PATCH 1/9] Limit iridanext output to fastmatch files

---
 conf/iridanext.config | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/conf/iridanext.config b/conf/iridanext.config
index d5d6156..3299e55 100644
--- a/conf/iridanext.config
+++ b/conf/iridanext.config
@@ -7,11 +7,6 @@ iridanext {
         files {
             idkey = "irida_id"
             global = [
-                "**/ArborView/arborview.clustered_data_arborview.html",
-                "**/clusters/gas.mcluster.clusters.text",
-                "**/clusters/gas.mcluster.run.json",
-                "**/clusters/gas.mcluster.thresholds.json",
-                "**/clusters/gas.mcluster.tree.nwk",
                 "**/distances/profile_dists.allele_map.json",
                 "**/distances/profile_dists.query_profile.text",
                 "**/distances/profile_dists.ref_profile.text",

From 1efe47cf3e1e3224f0e0aa3605a4f8239a4586c4 Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Tue, 10 Dec 2024 14:54:03 -0500
Subject: [PATCH 2/9] First attempt to modify IRIDA UI

---
 nextflow.config      |  9 +++----
 nextflow_schema.json | 59 ++++++++++++++++++++------------------------
 2 files changed, 31 insertions(+), 37 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 45cf222..af4f488 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -43,6 +43,10 @@ params {
     validationShowHiddenParams       = false
     validate_params                  = true
 
+    // FastMatch
+    fastmatch_category = null
+    threshold = 1
+
     // Profile dists args
     pd_outfmt = "matrix"
     pd_distm = "hamming"
@@ -54,11 +58,6 @@ params {
     pd_columns = null
     pd_count_missing = false
 
-    // GAS Cluster
-    gm_thresholds = "10,5,0"
-    gm_method = "average"
-    gm_delimiter = "."
-
     // Metadata
     metadata_1_header = "metadata_1"
     metadata_2_header = "metadata_2"
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 10bf5d5..251cffa 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -45,6 +45,13 @@
             "description": "The column header names of the metadata columns.",
             "default": "",
             "properties": {
+                "fastmatch_category": {
+                    "type": "string",
+                    "errorMessage": "Has to be either query or reference",
+                    "description": "Identify whether a sample is query or reference",
+                    "fa_icon": "far fa-sticky-note",
+                    "enum": ["query", "reference"]
+                },
                 "metadata_1_header": {
                     "type": "string",
                     "default": "metadata_1",
@@ -102,7 +109,21 @@
                     "pattern": "^[^\\n\\t\"]+$"
                 }
             },
-            "fa_icon": "far fa-clipboard"
+            "fa_icon": "far fa-clipboard",
+            "required": ["fastmatch_category"]
+        },
+        "fastmatch": {
+            "title": "FastMatch",
+            "type": "object",
+            "description": "Parameters for FastMatch",
+            "default": "",
+            "properties": {
+                "threshold": {
+                    "type": "integer",
+                    "description": "The output format for distances",
+                    "default": 1
+                }
+            }
         },
         "profile_dists": {
             "title": "Profile Dists",
@@ -128,14 +149,14 @@
                     "description": "The maximum proportion of missing data per locus for a locus to be kept in the analysis",
                     "minimum": 0,
                     "maximum": 1,
-                    "default": 1
+                    "default": 1.0
                 },
                 "pd_sample_quality_threshold": {
                     "type": "number",
                     "description": "The maximum proportion of missing data per sample for a sample to be kept in the analysis",
                     "minimum": 0,
                     "maximum": 1,
-                    "default": 1
+                    "default": 1.0
                 },
                 "pd_file_type": {
                     "type": "string",
@@ -168,32 +189,6 @@
                 }
             }
         },
-        "gas_cluster": {
-            "title": "GAS Cluster",
-            "type": "object",
-            "description": "",
-            "default": "Parameters for GAS mcluster",
-            "properties": {
-                "gm_thresholds": {
-                    "type": "string",
-                    "default": "10,5,0",
-                    "description": "Thresholds delimited by ','. Values should match units from '--pd_distm' (either 'hamming' or 'scaled').",
-                    "pattern": "^(\\d+(\\.\\d+)?,)*\\d+(\\.\\d+)?$"
-                },
-                "gm_method": {
-                    "type": "string",
-                    "default": "average",
-                    "description": "Clustering linkage method.",
-                    "enum": ["single", "average", "complete"]
-                },
-                "gm_delimiter": {
-                    "type": "string",
-                    "default": ".",
-                    "description": "Delimiter desired for nomenclature code. Must be alphanumeric or one of [._-].",
-                    "pattern": "^[A-Fa-f0-9\\._-]+$"
-                }
-            }
-        },
         "institutional_config_options": {
             "title": "Institutional config options",
             "type": "object",
@@ -353,13 +348,13 @@
             "$ref": "#/definitions/input_output_options"
         },
         {
-            "$ref": "#/definitions/metadata"
+            "$ref": "#/definitions/fastmatch"
         },
         {
-            "$ref": "#/definitions/profile_dists"
+            "$ref": "#/definitions/metadata"
         },
         {
-            "$ref": "#/definitions/gas_cluster"
+            "$ref": "#/definitions/profile_dists"
         },
         {
             "$ref": "#/definitions/institutional_config_options"

From aa4e0bd080061ddb829f4a15cda6b1fb5edb12f8 Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Tue, 10 Dec 2024 16:15:23 -0500
Subject: [PATCH 3/9] Modified the UI to include query reference

---
 assets/samplesheet.csv                        |  8 +-
 assets/schema_input.json                      | 10 +-
 nextflow.config                               |  1 -
 nextflow_schema.json                          | 12 +--
 .../samplesheet-addsamplename.csv             |  8 +-
 .../data/samplesheets/samplesheet-hamming.csv |  8 +-
 .../samplesheets/samplesheet-hash-missing.csv |  8 +-
 .../samplesheet-hash-more-missing.csv         |  8 +-
 .../samplesheet-little-metadata.csv           |  8 +-
 .../samplesheet-mismatched-ids.csv            |  8 +-
 .../samplesheets/samplesheet-no-metadata.csv  |  8 +-
 .../samplesheet-partial-mismatched-ids.csv    |  8 +-
 tests/data/samplesheets/samplesheet-tabs.csv  |  8 +-
 tests/data/samplesheets/samplesheet1.csv      |  8 +-
 tests/pipelines/main.nf.test                  | 20 ----
 tests/pipelines/main_gm_thresholds.nf.test    | 94 -------------------
 workflows/fastmatchirida.nf                   | 17 +---
 17 files changed, 56 insertions(+), 186 deletions(-)

diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv
index 82842ce..410a602 100644
--- a/assets/samplesheet.csv
+++ b/assets/samplesheet.csv
@@ -1,4 +1,4 @@
-sample,mlst_alleles
-sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json
-sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json
-sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json
+sample,fastmatch_category,mlst_alleles
+sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json
+sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json
+sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json
diff --git a/assets/schema_input.json b/assets/schema_input.json
index 1c141a0..3872a6d 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -25,6 +25,14 @@
                 "pattern": "^\\S+\\.mlst(\\.subtyping)?\\.json(\\.gz)?$",
                 "errorMessage": "MLST JSON file from locidex report, cannot contain spaces and must have the extension: '.mlst.json', '.mlst.json.gz', '.mlst.subtyping.json', or 'mlst.subtyping.json.gz'"
             },
+            "fastmatch_category": {
+                "type": "string",
+                "meta": ["ref_query"],
+                "errorMessage": "Has to be either query or reference",
+                "description": "Identify whether a sample is query or reference",
+                "fa_icon": "far fa-sticky-note",
+                "enum": ["query", "reference"]
+            },
             "metadata_1": {
                 "type": "string",
                 "meta": ["metadata_1"],
@@ -82,6 +90,6 @@
                 "pattern": "^[^\\n\\t\"]+$"
             }
         },
-        "required": ["sample", "mlst_alleles"]
+        "required": ["sample","fastmatch_category", "mlst_alleles"]
     }
 }
diff --git a/nextflow.config b/nextflow.config
index af4f488..bf7ba44 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -44,7 +44,6 @@ params {
     validate_params                  = true
 
     // FastMatch
-    fastmatch_category = null
     threshold = 1
 
     // Profile dists args
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 251cffa..a7131b8 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -45,13 +45,6 @@
             "description": "The column header names of the metadata columns.",
             "default": "",
             "properties": {
-                "fastmatch_category": {
-                    "type": "string",
-                    "errorMessage": "Has to be either query or reference",
-                    "description": "Identify whether a sample is query or reference",
-                    "fa_icon": "far fa-sticky-note",
-                    "enum": ["query", "reference"]
-                },
                 "metadata_1_header": {
                     "type": "string",
                     "default": "metadata_1",
@@ -109,8 +102,7 @@
                     "pattern": "^[^\\n\\t\"]+$"
                 }
             },
-            "fa_icon": "far fa-clipboard",
-            "required": ["fastmatch_category"]
+            "fa_icon": "far fa-clipboard"
         },
         "fastmatch": {
             "title": "FastMatch",
@@ -120,7 +112,7 @@
             "properties": {
                 "threshold": {
                     "type": "integer",
-                    "description": "The output format for distances",
+                    "description": "Comparison score threshold value",
                     "default": 1
                 }
             }
diff --git a/tests/data/samplesheets/samplesheet-addsamplename.csv b/tests/data/samplesheets/samplesheet-addsamplename.csv
index a1b785a..48cc046 100644
--- a/tests/data/samplesheets/samplesheet-addsamplename.csv
+++ b/tests/data/samplesheets/samplesheet-addsamplename.csv
@@ -1,4 +1,4 @@
-sample,sample_name,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
-sample1,S 1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8
-sample2,S2#,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8
-sample3,S2_,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8
+sample,fastmatch_category,sample_name,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
+sample1,query,S 1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8
+sample2,query,S2#,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8
+sample3,reference,S2_,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8
diff --git a/tests/data/samplesheets/samplesheet-hamming.csv b/tests/data/samplesheets/samplesheet-hamming.csv
index d18a69c..0e03c71 100644
--- a/tests/data/samplesheets/samplesheet-hamming.csv
+++ b/tests/data/samplesheets/samplesheet-hamming.csv
@@ -1,4 +1,4 @@
-sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
-sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hamming/sample1.mlst.subtyping.json,,,,,,,,
-sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hamming/sample2.mlst.subtyping.json,,,,,,,,
-sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hamming/sample3.mlst.subtyping.json,,,,,,,,
+sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
+sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hamming/sample1.mlst.subtyping.json,,,,,,,,
+sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hamming/sample2.mlst.subtyping.json,,,,,,,,
+sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hamming/sample3.mlst.subtyping.json,,,,,,,,
diff --git a/tests/data/samplesheets/samplesheet-hash-missing.csv b/tests/data/samplesheets/samplesheet-hash-missing.csv
index 9355c3d..d06d53a 100644
--- a/tests/data/samplesheets/samplesheet-hash-missing.csv
+++ b/tests/data/samplesheets/samplesheet-hash-missing.csv
@@ -1,4 +1,4 @@
-sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
-sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample1.mlst.subtyping.json,,,,,,,,
-sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample2.mlst.subtyping.json,,,,,,,,
-sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample3.mlst.subtyping.json,,,,,,,,
+sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
+sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample1.mlst.subtyping.json,,,,,,,,
+sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample2.mlst.subtyping.json,,,,,,,,
+sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample3.mlst.subtyping.json,,,,,,,,
diff --git a/tests/data/samplesheets/samplesheet-hash-more-missing.csv b/tests/data/samplesheets/samplesheet-hash-more-missing.csv
index 4ee53c9..5c4a4b4 100644
--- a/tests/data/samplesheets/samplesheet-hash-more-missing.csv
+++ b/tests/data/samplesheets/samplesheet-hash-more-missing.csv
@@ -1,4 +1,4 @@
-sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
-sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample1.mlst.subtyping.json,,,,,,,,
-sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample2.mlst.subtyping.json,,,,,,,,
-sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample3-more-missing.mlst.subtyping.json,,,,,,,,
+sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
+sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample1.mlst.subtyping.json,,,,,,,,
+sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample2.mlst.subtyping.json,,,,,,,,
+sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample3-more-missing.mlst.subtyping.json,,,,,,,,
diff --git a/tests/data/samplesheets/samplesheet-little-metadata.csv b/tests/data/samplesheets/samplesheet-little-metadata.csv
index 3e721de..138469e 100644
--- a/tests/data/samplesheets/samplesheet-little-metadata.csv
+++ b/tests/data/samplesheets/samplesheet-little-metadata.csv
@@ -1,4 +1,4 @@
-sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
-sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,,,,1.4,,,,
-sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,,,,,,,,
-sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,,,,,,3.8
+sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
+sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,,,,1.4,,,,
+sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,,,,,,,,
+sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,,,,,,3.8
diff --git a/tests/data/samplesheets/samplesheet-mismatched-ids.csv b/tests/data/samplesheets/samplesheet-mismatched-ids.csv
index 632768d..ffef4ca 100644
--- a/tests/data/samplesheets/samplesheet-mismatched-ids.csv
+++ b/tests/data/samplesheets/samplesheet-mismatched-ids.csv
@@ -1,4 +1,4 @@
-sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
-sampleA,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8
-sampleB,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8
-sampleC,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8
+sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
+sampleA,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8
+sampleB,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8
+sampleC,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8
diff --git a/tests/data/samplesheets/samplesheet-no-metadata.csv b/tests/data/samplesheets/samplesheet-no-metadata.csv
index 9d67864..f752374 100644
--- a/tests/data/samplesheets/samplesheet-no-metadata.csv
+++ b/tests/data/samplesheets/samplesheet-no-metadata.csv
@@ -1,4 +1,4 @@
-sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
-sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,,,,,,,,
-sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,,,,,,,,
-sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,,,,,,,,
+sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
+sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,,,,,,,,
+sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,,,,,,,,
+sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,,,,,,,,
diff --git a/tests/data/samplesheets/samplesheet-partial-mismatched-ids.csv b/tests/data/samplesheets/samplesheet-partial-mismatched-ids.csv
index d5d42f0..ab5abab 100644
--- a/tests/data/samplesheets/samplesheet-partial-mismatched-ids.csv
+++ b/tests/data/samplesheets/samplesheet-partial-mismatched-ids.csv
@@ -1,4 +1,4 @@
-sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
-sampleA,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8
-sampleB,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8
-sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8
+sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
+sampleA,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8
+sampleB,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8
+sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8
diff --git a/tests/data/samplesheets/samplesheet-tabs.csv b/tests/data/samplesheets/samplesheet-tabs.csv
index 56b4243..b863db5 100644
--- a/tests/data/samplesheets/samplesheet-tabs.csv
+++ b/tests/data/samplesheets/samplesheet-tabs.csv
@@ -1,4 +1,4 @@
-sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
-sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,a	b,,,,,,,
-sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,,,,a	b,,,,
-sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,,,,,,,,a	b
+sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
+sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,a	b,,,,,,,
+sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,,,,a	b,,,,
+sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,,,,,,,,a	b
diff --git a/tests/data/samplesheets/samplesheet1.csv b/tests/data/samplesheets/samplesheet1.csv
index 3200344..55cbc2d 100644
--- a/tests/data/samplesheets/samplesheet1.csv
+++ b/tests/data/samplesheets/samplesheet1.csv
@@ -1,4 +1,4 @@
-sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
-sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8
-sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8
-sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8
+sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
+sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8
+sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8
+sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8
diff --git a/tests/pipelines/main.nf.test b/tests/pipelines/main.nf.test
index 14665d5..f960a2b 100644
--- a/tests/pipelines/main.nf.test
+++ b/tests/pipelines/main.nf.test
@@ -12,7 +12,6 @@ nextflow_pipeline {
                 outdir = "results"
 
                 pd_distm = "scaled"
-                gm_thresholds = "50,20,0"
 
                 metadata_1_header = "myheader_1"
                 metadata_2_header = "myheader_2"
@@ -59,7 +58,6 @@ nextflow_pipeline {
                 input = "$baseDir/tests/data/samplesheets/samplesheet-hamming.csv"
                 outdir = "results"
 
-                gm_thresholds = "2,1,0"
             }
         }
 
@@ -188,24 +186,6 @@ nextflow_pipeline {
         }
     }
 
-    test("Test fail pipeline if invalid delimiter set") {
-        tag "pipeline_failure_invalid_delimiter"
-
-        when {
-            params {
-                input = "$baseDir/tests/data/samplesheets/samplesheet-hamming.csv"
-                outdir = "results"
-
-                gm_delimiter = ';'
-            }
-        }
-
-        then {
-            assert workflow.failed
-            assert workflow.stderr.contains('* --gm_delimiter: string [;] does not match pattern ^[A-Fa-f0-9\\._-]+$ (;)')
-        }
-    }
-
     test("Full pipeline with no metadata") {
         tag "pipeline_no_metadata"
 
diff --git a/tests/pipelines/main_gm_thresholds.nf.test b/tests/pipelines/main_gm_thresholds.nf.test
index f72b365..aa90ecf 100644
--- a/tests/pipelines/main_gm_thresholds.nf.test
+++ b/tests/pipelines/main_gm_thresholds.nf.test
@@ -3,100 +3,6 @@ nextflow_pipeline {
     name "Integration Tests of adjusting gm_thresholds parameter for clustering"
     script "main.nf"
 
-    test("Test fail pipeline if null threshold set") {
-        tag "pipeline_failure_null_threshold"
-
-        when {
-            params {
-                input = "$baseDir/tests/data/samplesheets/samplesheet-hamming.csv"
-                outdir = "results"
-
-                gm_thresholds = null
-            }
-        }
-
-        then {
-            assert workflow.failed
-            assert workflow.stdout.contains("ERROR ~ --gm_thresholds null: Cannot pass null or empty string")
-        }
-    }
-
-    test("Test fail pipeline if empty threshold set") {
-        tag "pipeline_failure_no_threshold"
-
-        when {
-            params {
-                input = "$baseDir/tests/data/samplesheets/samplesheet-hamming.csv"
-                outdir = "results"
-
-                gm_thresholds = ""
-            }
-        }
-
-        then {
-            assert workflow.failed
-            assert workflow.stdout.contains("ERROR ~ --gm_thresholds : Cannot pass null or empty string")
-        }
-    }
-
-    test("Test fail pipeline if negative threshold set") {
-        tag "pipeline_failure_negative_threshold"
-
-        when {
-            params {
-                input = "$baseDir/tests/data/samplesheets/samplesheet-hamming.csv"
-                outdir = "results"
-
-                gm_thresholds = "-1"
-            }
-        }
-
-        then {
-            assert workflow.failed
-            assert workflow.stderr.contains('* --gm_thresholds: string [-1] does not match pattern ^(\\d+(\\.\\d+)?,)*\\d+(\\.\\d+)?$ (-1)')
-        }
-    }
-
-    test("Test fail pipeline if mismatch between thresholds and scaled distm") {
-        tag "pipeline_failure_threshold_scaled"
-
-        when {
-            params {
-                input = "$baseDir/tests/data/samplesheets/samplesheet-hamming.csv"
-                outdir = "results"
-
-                gm_thresholds = "200,50"
-                pd_distm = "scaled"
-            }
-        }
-
-        then {
-            assert workflow.failed
-            assert workflow.stdout.contains("ERROR ~ '--pd_distm scaled' is set, but '--gm_thresholds 200,50' contains thresholds outside of range [0, 100]."
-                                            + " Please either set '--pd_distm hamming' or adjust the threshold values.")
-        }
-    }
-
-    test("Test fail pipeline if mismatch between thresholds and hamming distm") {
-        tag "pipeline_failure_threshold_hamming"
-
-        when {
-            params {
-                input = "$baseDir/tests/data/samplesheets/samplesheet-hamming.csv"
-                outdir = "results"
-
-                gm_thresholds = "2,0.5"
-                pd_distm = "hamming"
-            }
-        }
-
-        then {
-            assert workflow.failed
-            assert workflow.stdout.contains("ERROR ~ '--pd_distm hamming' is set, but '--gm_thresholds 2,0.5' contains fractions."
-                                            + " Please either set '--pd_distm scaled' or remove fractions from distance thresholds.")
-        }
-    }
-
     test("Test pipeline with single threshold set to 0") {
         tag "pipeline_thresh_0"
 
diff --git a/workflows/fastmatchirida.nf b/workflows/fastmatchirida.nf
index 82c3a14..c9e7bff 100644
--- a/workflows/fastmatchirida.nf
+++ b/workflows/fastmatchirida.nf
@@ -127,22 +127,7 @@ workflow FASTMATCH {
         exit 1, "--pd_columns ${params.pd_columns}: Does not exist but was passed to the pipeline. Exiting now."
     }
 
-    if(params.gm_thresholds == null || params.gm_thresholds == ""){
-        exit 1, "--gm_thresholds ${params.gm_thresholds}: Cannot pass null or empty string"
-    }
-
-    gm_thresholds_list = params.gm_thresholds.toString().split(',')
-    if (params.pd_distm == 'hamming') {
-        if (gm_thresholds_list.any { it != null && it.contains('.') }) {
-            exit 1, ("'--pd_distm ${params.pd_distm}' is set, but '--gm_thresholds ${params.gm_thresholds}' contains fractions."
-                    + " Please either set '--pd_distm scaled' or remove fractions from distance thresholds.")
-        }
-    } else if (params.pd_distm == 'scaled') {
-        if (gm_thresholds_list.any { it != null && (it as Float < 0.0 || it as Float > 100.0) }) {
-            exit 1, ("'--pd_distm ${params.pd_distm}' is set, but '--gm_thresholds ${params.gm_thresholds}' contains thresholds outside of range [0, 100]."
-                    + " Please either set '--pd_distm hamming' or adjust the threshold values.")
-        }
-    } else {
+    if ((params.pd_distm != 'hamming') & (params.pd_distm != 'scaled')) {
         exit 1, "'--pd_distm ${params.pd_distm}' is an invalid value. Please set to either 'hamming' or 'scaled'."
     }
 

From dc91ff45a7963ec6fc93d2bf2d506c3a6affcb0d Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Tue, 10 Dec 2024 16:21:34 -0500
Subject: [PATCH 4/9] Forgot to check prettier

---
 assets/schema_input.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/assets/schema_input.json b/assets/schema_input.json
index 3872a6d..92d1399 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -90,6 +90,6 @@
                 "pattern": "^[^\\n\\t\"]+$"
             }
         },
-        "required": ["sample","fastmatch_category", "mlst_alleles"]
+        "required": ["sample", "fastmatch_category", "mlst_alleles"]
     }
 }

From 6448399acc998b09b34d2f6de4d0a1564ef89729 Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Wed, 11 Dec 2024 10:51:51 -0500
Subject: [PATCH 5/9] Modification to UI

---
 assets/schema_input.json    | 3 ++-
 nextflow.config             | 2 +-
 nextflow_schema.json        | 8 ++++----
 workflows/fastmatchirida.nf | 1 -
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/assets/schema_input.json b/assets/schema_input.json
index 92d1399..c318bdb 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -31,7 +31,8 @@
                 "errorMessage": "Has to be either query or reference",
                 "description": "Identify whether a sample is query or reference",
                 "fa_icon": "far fa-sticky-note",
-                "enum": ["query", "reference"]
+                "enum": ["query", "reference"],
+                "default": true
             },
             "metadata_1": {
                 "type": "string",
diff --git a/nextflow.config b/nextflow.config
index bf7ba44..bfda17f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -44,7 +44,7 @@ params {
     validate_params                  = true
 
     // FastMatch
-    threshold = 1
+    threshold = 1.0
 
     // Profile dists args
     pd_outfmt = "matrix"
diff --git a/nextflow_schema.json b/nextflow_schema.json
index a7131b8..2a07979 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -111,9 +111,9 @@
             "default": "",
             "properties": {
                 "threshold": {
-                    "type": "integer",
+                    "type": "number",
                     "description": "Comparison score threshold value",
-                    "default": 1
+                    "default": 1.0
                 }
             }
         },
@@ -141,14 +141,14 @@
                     "description": "The maximum proportion of missing data per locus for a locus to be kept in the analysis",
                     "minimum": 0,
                     "maximum": 1,
-                    "default": 1.0
+                    "default": 1
                 },
                 "pd_sample_quality_threshold": {
                     "type": "number",
                     "description": "The maximum proportion of missing data per sample for a sample to be kept in the analysis",
                     "minimum": 0,
                     "maximum": 1,
-                    "default": 1.0
+                    "default": 1
                 },
                 "pd_file_type": {
                     "type": "string",
diff --git a/workflows/fastmatchirida.nf b/workflows/fastmatchirida.nf
index c9e7bff..b601bc0 100644
--- a/workflows/fastmatchirida.nf
+++ b/workflows/fastmatchirida.nf
@@ -66,7 +66,6 @@ def prepareFilePath(String filep){
 workflow FASTMATCH {
     SAMPLE_HEADER = "sample"
     ch_versions = Channel.empty()
-
     // Track processed IDs
     def processedIDs = [] as Set
 

From b12fb5c244379fa4613bc26b4db4cf27d0e327bb Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Wed, 11 Dec 2024 13:36:37 -0500
Subject: [PATCH 6/9] Convert blank column entries of fastmatch_category to
 reference

---
 assets/schema_input.json    | 5 ++---
 workflows/fastmatchirida.nf | 9 ++++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/assets/schema_input.json b/assets/schema_input.json
index c318bdb..25cdb85 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -31,8 +31,7 @@
                 "errorMessage": "Has to be either query or reference",
                 "description": "Identify whether a sample is query or reference",
                 "fa_icon": "far fa-sticky-note",
-                "enum": ["query", "reference"],
-                "default": true
+                "enum": ["query", "reference"]
             },
             "metadata_1": {
                 "type": "string",
@@ -91,6 +90,6 @@
                 "pattern": "^[^\\n\\t\"]+$"
             }
         },
-        "required": ["sample", "fastmatch_category", "mlst_alleles"]
+        "required": ["sample", "mlst_alleles"]
     }
 }
diff --git a/workflows/fastmatchirida.nf b/workflows/fastmatchirida.nf
index b601bc0..2a0a8a1 100644
--- a/workflows/fastmatchirida.nf
+++ b/workflows/fastmatchirida.nf
@@ -71,7 +71,7 @@ workflow FASTMATCH {
 
     // Create a new channel of metadata from a sample sheet
     // NB: `input` corresponds to `params.input` and associated sample sheet schema
-    input = Channel.fromSamplesheet("input")
+    input = Channel.fromSamplesheet("input").view()
     // and remove non-alphanumeric characters in sample_names (meta.id), whilst also correcting for duplicate sample_names (meta.id)
     .map { meta, mlst_file ->
             if (!meta.id) {
@@ -86,8 +86,11 @@ workflow FASTMATCH {
             }
             // Add the ID to the set of processed IDs
             processedIDs << meta.id
-
-            tuple(meta, mlst_file)}
+            // If the fastmatch_category is blank make the default "reference"
+            if (!meta.ref_query) {
+                meta.ref_query = "reference"
+            }
+            tuple(meta, mlst_file)}.view()
     // Make sure the ID in samplesheet / meta.id is the same ID
     // as the corresponding MLST JSON file:
     input_assure = INPUT_ASSURE(input)

From fa831f7e745dca4d81adb85670ad57cbfc02154b Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Wed, 11 Dec 2024 13:59:07 -0500
Subject: [PATCH 7/9] Make reference the default if left blank and fix drop
 down menu in IRIDIA

---
 assets/schema_input.json    |  1 -
 workflows/fastmatchirida.nf | 10 ++++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/assets/schema_input.json b/assets/schema_input.json
index 25cdb85..651d179 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -27,7 +27,6 @@
             },
             "fastmatch_category": {
                 "type": "string",
-                "meta": ["ref_query"],
                 "errorMessage": "Has to be either query or reference",
                 "description": "Identify whether a sample is query or reference",
                 "fa_icon": "far fa-sticky-note",
diff --git a/workflows/fastmatchirida.nf b/workflows/fastmatchirida.nf
index 2a0a8a1..05c2698 100644
--- a/workflows/fastmatchirida.nf
+++ b/workflows/fastmatchirida.nf
@@ -71,9 +71,9 @@ workflow FASTMATCH {
 
     // Create a new channel of metadata from a sample sheet
     // NB: `input` corresponds to `params.input` and associated sample sheet schema
-    input = Channel.fromSamplesheet("input").view()
+    input = Channel.fromSamplesheet("input")
     // and remove non-alphanumeric characters in sample_names (meta.id), whilst also correcting for duplicate sample_names (meta.id)
-    .map { meta, mlst_file ->
+    .map { meta, mlst_file, ref_query ->
             if (!meta.id) {
                 meta.id = meta.irida_id
             } else {
@@ -87,10 +87,12 @@ workflow FASTMATCH {
             // Add the ID to the set of processed IDs
             processedIDs << meta.id
             // If the fastmatch_category is blank make the default "reference"
-            if (!meta.ref_query) {
+            if (!ref_query) {
                 meta.ref_query = "reference"
+            } else {
+                meta.ref_query = ref_query
             }
-            tuple(meta, mlst_file)}.view()
+            tuple(meta, mlst_file)}
     // Make sure the ID in samplesheet / meta.id is the same ID
     // as the corresponding MLST JSON file:
     input_assure = INPUT_ASSURE(input)

From 8de39dd016eb43d0050383d5744a711539e1b5b4 Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Thu, 12 Dec 2024 09:13:03 -0500
Subject: [PATCH 8/9] Set minimum for threshold to 0

---
 nextflow_schema.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 2a07979..432c31c 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -113,7 +113,8 @@
                 "threshold": {
                     "type": "number",
                     "description": "Comparison score threshold value",
-                    "default": 1.0
+                    "default": 1.0,
+                    "minimum": 0
                 }
             }
         },

From a70d03039ae8400c205711b74878cd8fb327fb09 Mon Sep 17 00:00:00 2001
From: Steven Sutcliffe <sgsutcliffe@gmail.com>
Date: Thu, 12 Dec 2024 11:08:01 -0500
Subject: [PATCH 9/9] Check scaled values in range between 0-100

---
 workflows/fastmatchirida.nf | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/workflows/fastmatchirida.nf b/workflows/fastmatchirida.nf
index 05c2698..122a4e2 100644
--- a/workflows/fastmatchirida.nf
+++ b/workflows/fastmatchirida.nf
@@ -131,10 +131,18 @@ workflow FASTMATCH {
         exit 1, "--pd_columns ${params.pd_columns}: Does not exist but was passed to the pipeline. Exiting now."
     }
 
+    // Check that only 'hamming' or 'scaled' are provided to pd_distm
     if ((params.pd_distm != 'hamming') & (params.pd_distm != 'scaled')) {
         exit 1, "'--pd_distm ${params.pd_distm}' is an invalid value. Please set to either 'hamming' or 'scaled'."
     }
 
+    // Check that when using scaled the threshold exists between 0-100
+    if (params.pd_distm == 'scaled') {
+        if ((params.threshold < 0.0) || (params.threshold > 100.0)) {
+            exit 1, ("'--pd_distm ${params.pd_distm}' is set, but '--threshold ${params.threshold}' contains thresholds outside of range [0, 100]."
+                    + " Please either set '--threshold' or adjust the threshold values.")
+        }
+    }
     // Options related to profile dists
     mapping_format = Channel.value(params.pd_outfmt)