diff --git a/CHANGELOG.md b/CHANGELOG.md
index d6d9784..4a35a28 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [v1.0.1]
+### Fixed
+- The workflow failing due to commas in reference sequence names.
+
+### Changed
+- How samples, reference files, and reference sequence names are listed in the summary section at the beginning of the report.
+
## [v1.0.0]
### Added
- Memory requirements for each process.
diff --git a/bin/workflow_glue/report_utils/sections.py b/bin/workflow_glue/report_utils/sections.py
index 2f26657..b329b2a 100755
--- a/bin/workflow_glue/report_utils/sections.py
+++ b/bin/workflow_glue/report_utils/sections.py
@@ -169,13 +169,15 @@ def summary(report, sample_names, ref_files, ref_seqs, stats_df, flagstat_df):
quickly jump to an individual section with the links in the header bar.
"""
)
- ref_seqs_str = ", ".join(ref_seqs[:7]) + (", ..." if len(ref_seqs) > 7 else "")
+ ref_seqs_str = " ".join(ref_seqs[:7]) + (
+ " ..." if len(ref_seqs) > 7 else ""
+ )
dom_util.raw(
f"""
{len(sample_names)} {plural_s("sample", len(sample_names))}:
- {', '.join(sample_names)}
+ {' '.join(sample_names)}
{len(ref_files)} {plural_s("reference file", len(ref_files))}:
- {', '.join(ref_files)}
+ {' '.join(ref_files)}
{len(ref_seqs)} {plural_s("reference sequence", len(ref_seqs))}:
{ref_seqs_str}
"""
diff --git a/main.nf b/main.nf
index bd8e6d7..357a6a2 100644
--- a/main.nf
+++ b/main.nf
@@ -80,15 +80,15 @@ process addStepsColumn {
label "wfalignment"
cpus 1
memory "2 GB"
- input: path "lengths.csv"
- output: path "lengths_with_steps.csv"
+ input: path "lengths.tsv"
+ output: path "lengths_with_steps.tsv"
"""
#!/usr/bin/env python
import pandas as pd
- all = pd.read_csv('lengths.csv')
+ all = pd.read_csv('lengths.tsv', sep='\\t')
all["step"] = all["lengths"]//200
all = all.replace(0, 1)
- all.to_csv('lengths_with_steps.csv', index=False, header=False)
+ all.to_csv('lengths_with_steps.tsv', index=False, header=False, sep='\\t')
"""
}
@@ -106,7 +106,7 @@ process readDepthPerRef {
def sample_name = meta["alias"]
outfname = "${sample_name}.all_regions.bed.gz"
"""
- while IFS=, read -r name lengths steps; do
+ while IFS=\$'\\t' read -r name lengths steps; do
mosdepth -n --fast-mode --by "\$steps" --chrom "\$name" -t $task.cpus \
${sample_name}."\$name".temp $alignment \
|| echo "No alignments for "\$name""
diff --git a/nextflow.config b/nextflow.config
index a756c68..66278f4 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -51,7 +51,7 @@ manifest {
description = 'Align Nanopore reads and visualize mapping statistics.'
mainScript = 'main.nf'
nextflowVersion = '>=23.04.2'
- version = 'v1.0.0'
+ version = 'v1.0.1'
}
epi2melabs {
diff --git a/subworkflows/process_references.nf b/subworkflows/process_references.nf
index b76dad2..c6d9914 100644
--- a/subworkflows/process_references.nf
+++ b/subworkflows/process_references.nf
@@ -27,13 +27,13 @@ process fx2tab {
path reference
output:
path "*.names.txt", emit: names
- path "*.lengths.csv", emit: lengths
+ path "*.lengths.tsv", emit: lengths
script:
"""
seqkit fx2tab --length --name --only-id $reference > fx2tab.out
cut -f1 fx2tab.out > ${reference}.names.txt
- echo 'name,lengths' > ${reference}.lengths.csv
- tr -s '[:blank:]' ',' < fx2tab.out >> ${reference}.lengths.csv
+ echo -e 'name\\tlengths' > ${reference}.lengths.tsv
+ cat fx2tab.out >> ${reference}.lengths.tsv
"""
}
@@ -73,7 +73,7 @@ workflow process_references {
names_per_ref_file = fx2tab.out.names
lengths_per_ref_file = fx2tab.out.lengths
lengths_combined = fx2tab.out.lengths.collectFile(
- name: "combined_lengths.csv", keepHeader: true
+ name: "combined_lengths.tsv", keepHeader: true
// we need to call `.first()` to get a value channel (`.collectFile()`
// always returns a queue channel, even when it only produces a single file)
).first()