diff --git a/CHANGELOG.md b/CHANGELOG.md index d6d9784..4a35a28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [v1.0.1] +### Fixed +- The workflow failing due to commas in reference sequence names. + +### Changed +- How samples, reference files, and reference sequence names are listed in the summary section at the beginning of the report. + ## [v1.0.0] ### Added - Memory requirements for each process. diff --git a/bin/workflow_glue/report_utils/sections.py b/bin/workflow_glue/report_utils/sections.py index 2f26657..b329b2a 100755 --- a/bin/workflow_glue/report_utils/sections.py +++ b/bin/workflow_glue/report_utils/sections.py @@ -169,13 +169,15 @@ def summary(report, sample_names, ref_files, ref_seqs, stats_df, flagstat_df): quickly jump to an individual section with the links in the header bar. """ ) - ref_seqs_str = ", ".join(ref_seqs[:7]) + (", ..." if len(ref_seqs) > 7 else "") + ref_seqs_str = " ".join(ref_seqs[:7]) + ( + " ..." if len(ref_seqs) > 7 else "" + ) dom_util.raw( f""" {len(sample_names)} {plural_s("sample", len(sample_names))}:
- {', '.join(sample_names)}

+ {' '.join(sample_names)}

{len(ref_files)} {plural_s("reference file", len(ref_files))}:
- {', '.join(ref_files)}

+ {' '.join(ref_files)}

{len(ref_seqs)} {plural_s("reference sequence", len(ref_seqs))}:
{ref_seqs_str}

""" diff --git a/main.nf b/main.nf index bd8e6d7..357a6a2 100644 --- a/main.nf +++ b/main.nf @@ -80,15 +80,15 @@ process addStepsColumn { label "wfalignment" cpus 1 memory "2 GB" - input: path "lengths.csv" - output: path "lengths_with_steps.csv" + input: path "lengths.tsv" + output: path "lengths_with_steps.tsv" """ #!/usr/bin/env python import pandas as pd - all = pd.read_csv('lengths.csv') + all = pd.read_csv('lengths.tsv', sep='\\t') all["step"] = all["lengths"]//200 all = all.replace(0, 1) - all.to_csv('lengths_with_steps.csv', index=False, header=False) + all.to_csv('lengths_with_steps.tsv', index=False, header=False, sep='\\t') """ } @@ -106,7 +106,7 @@ process readDepthPerRef { def sample_name = meta["alias"] outfname = "${sample_name}.all_regions.bed.gz" """ - while IFS=, read -r name lengths steps; do + while IFS=\$'\\t' read -r name lengths steps; do mosdepth -n --fast-mode --by "\$steps" --chrom "\$name" -t $task.cpus \ ${sample_name}."\$name".temp $alignment \ || echo "No alignments for "\$name"" diff --git a/nextflow.config b/nextflow.config index a756c68..66278f4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -51,7 +51,7 @@ manifest { description = 'Align Nanopore reads and visualize mapping statistics.' mainScript = 'main.nf' nextflowVersion = '>=23.04.2' - version = 'v1.0.0' + version = 'v1.0.1' } epi2melabs { diff --git a/subworkflows/process_references.nf b/subworkflows/process_references.nf index b76dad2..c6d9914 100644 --- a/subworkflows/process_references.nf +++ b/subworkflows/process_references.nf @@ -27,13 +27,13 @@ process fx2tab { path reference output: path "*.names.txt", emit: names - path "*.lengths.csv", emit: lengths + path "*.lengths.tsv", emit: lengths script: """ seqkit fx2tab --length --name --only-id $reference > fx2tab.out cut -f1 fx2tab.out > ${reference}.names.txt - echo 'name,lengths' > ${reference}.lengths.csv - tr -s '[:blank:]' ',' < fx2tab.out >> ${reference}.lengths.csv + echo -e 'name\\tlengths' > ${reference}.lengths.tsv + cat fx2tab.out >> ${reference}.lengths.tsv """ } @@ -73,7 +73,7 @@ workflow process_references { names_per_ref_file = fx2tab.out.names lengths_per_ref_file = fx2tab.out.lengths lengths_combined = fx2tab.out.lengths.collectFile( - name: "combined_lengths.csv", keepHeader: true + name: "combined_lengths.tsv", keepHeader: true // we need to call `.first()` to get a value channel (`.collectFile()` // always returns a queue channel, even when it only produces a single file) ).first()