Skip to content

Commit

Permalink
Merge branch 'CW-3173' into 'dev'
Browse files Browse the repository at this point in the history
fix failing when comma in ref. seq. name [CW-3173]

Closes CW-3173

See merge request epi2melabs/workflows/wf-alignment!116
  • Loading branch information
julibeg committed Jan 3, 2024
2 parents 0c42cd3 + cc64275 commit 6a9da93
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 13 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [v1.0.1]
### Fixed
- The workflow failing due to commas in reference sequence names.

### Changed
- How samples, reference files, and reference sequence names are listed in the summary section at the beginning of the report.

## [v1.0.0]
### Added
- Memory requirements for each process.
Expand Down
8 changes: 5 additions & 3 deletions bin/workflow_glue/report_utils/sections.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,13 +169,15 @@ def summary(report, sample_names, ref_files, ref_seqs, stats_df, flagstat_df):
quickly jump to an individual section with the links in the header bar.
"""
)
ref_seqs_str = ", ".join(ref_seqs[:7]) + (", ..." if len(ref_seqs) > 7 else "")
ref_seqs_str = " ".join(ref_seqs[:7]) + (
" ..." if len(ref_seqs) > 7 else ""
)
dom_util.raw(
f"""
<b>{len(sample_names)} {plural_s("sample", len(sample_names))}:</b><br>
{', '.join(sample_names)}<br><br>
{'&emsp;'.join(sample_names)}<br><br>
<b>{len(ref_files)} {plural_s("reference file", len(ref_files))}:</b><br>
{', '.join(ref_files)}<br><br>
{'&emsp;'.join(ref_files)}<br><br>
<b>{len(ref_seqs)} {plural_s("reference sequence", len(ref_seqs))}:</b><br>
{ref_seqs_str}<br><br>
"""
Expand Down
10 changes: 5 additions & 5 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,15 @@ process addStepsColumn {
label "wfalignment"
cpus 1
memory "2 GB"
input: path "lengths.csv"
output: path "lengths_with_steps.csv"
input: path "lengths.tsv"
output: path "lengths_with_steps.tsv"
"""
#!/usr/bin/env python
import pandas as pd
all = pd.read_csv('lengths.csv')
all = pd.read_csv('lengths.tsv', sep='\\t')
all["step"] = all["lengths"]//200
all = all.replace(0, 1)
all.to_csv('lengths_with_steps.csv', index=False, header=False)
all.to_csv('lengths_with_steps.tsv', index=False, header=False, sep='\\t')
"""
}

Expand All @@ -106,7 +106,7 @@ process readDepthPerRef {
def sample_name = meta["alias"]
outfname = "${sample_name}.all_regions.bed.gz"
"""
while IFS=, read -r name lengths steps; do
while IFS=\$'\\t' read -r name lengths steps; do
mosdepth -n --fast-mode --by "\$steps" --chrom "\$name" -t $task.cpus \
${sample_name}."\$name".temp $alignment \
|| echo "No alignments for "\$name""
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ manifest {
description = 'Align Nanopore reads and visualize mapping statistics.'
mainScript = 'main.nf'
nextflowVersion = '>=23.04.2'
version = 'v1.0.0'
version = 'v1.0.1'
}

epi2melabs {
Expand Down
8 changes: 4 additions & 4 deletions subworkflows/process_references.nf
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ process fx2tab {
path reference
output:
path "*.names.txt", emit: names
path "*.lengths.csv", emit: lengths
path "*.lengths.tsv", emit: lengths
script:
"""
seqkit fx2tab --length --name --only-id $reference > fx2tab.out
cut -f1 fx2tab.out > ${reference}.names.txt
echo 'name,lengths' > ${reference}.lengths.csv
tr -s '[:blank:]' ',' < fx2tab.out >> ${reference}.lengths.csv
echo -e 'name\\tlengths' > ${reference}.lengths.tsv
cat fx2tab.out >> ${reference}.lengths.tsv
"""
}

Expand Down Expand Up @@ -73,7 +73,7 @@ workflow process_references {
names_per_ref_file = fx2tab.out.names
lengths_per_ref_file = fx2tab.out.lengths
lengths_combined = fx2tab.out.lengths.collectFile(
name: "combined_lengths.csv", keepHeader: true
name: "combined_lengths.tsv", keepHeader: true
// we need to call `.first()` to get a value channel (`.collectFile()`
// always returns a queue channel, even when it only produces a single file)
).first()
Expand Down

0 comments on commit 6a9da93

Please sign in to comment.