diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b6dc26..2241b22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,16 +3,66 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.0.0dev - [unreleased] +## [v2.0.0](https://github.com/nf-core/pairgenomealign/releases/tag/2.0.0) "Naga imo" - [January 31st, 2025] + +### `Breaking changes` + +- The LAST software was updated and it has new defaults for some of its + parameters. The alignments ran with this pipeline will not be identical to + the ones from older versions. + +### `Added` + +- The `alignment/lastdb` directory is not output anymore. It consumed space, + is not usually needed for downstream analysis, and can be re-computed + identically if needed. +- The _many-to-one_ alignment file is not output anymore by default, to save + space. To keep this file, you can run the pipeline in `many-to-many` mode + with the `--m2m` parameter. +- The `--seed` parameter allows for all the existing values in the `lastdb` + program. +- Errors caused by absence of alignments at training or plotting steps + are now ignored. +- New parameter `--export_aln_to` that creates additional files containing + the alignments in a different format such as Axt, Chain, GFF or SAM. + +### `Fixed` + +- Incorrect detection of regions with 10 or more `N`s was corrected ([#18](https://github.com/nf-core/pairgenomealign/issues/18)). +- The `--lastal_params` now works as intended instead of being ignored ([#22](https://github.com/nf-core/pairgenomealign/issues/22)). +- The _workflow summary_ is now properly sorted at the end of the MultiQC report ([#32](https://github.com/nf-core/pairgenomealign/issues/32)). + +### `Parameters` + +| Old parameter | New parameter | +| ------------- | ----------------- | +| | `--export_aln_to` | + +### `Dependencies` + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `LAST` | 1542 | 1608 | +| `MultiQC` | 1.25.1 | 1.27 | ## [v1.1.1](https://github.com/nf-core/pairgenomealign/releases/tag/1.1.1) "Kani nabe" - [December 17th, 2024] -This release brings the pipeline to the standards of Nextflow 24.10.1 and -nf-core 3.1.0. No changes were made to the alignment process. +### `Fixed` + +- This release brings the pipeline to the standards of Nextflow 24.10.1 and + nf-core 3.1.0. ## [v1.1.0](https://github.com/nf-core/pairgenomealign/releases/tag/1.1.0) "Nattou maki" - [September 27th, 2024] -Added a new `softmask` parameter, to optionally keep original softmasking. +### `Added` + +- Added a new `softmask` parameter, to optionally keep original softmasking. + +### `Parameters` + +| Old parameter | New parameter | +| ------------- | ------------- | +| | `--softmask` | ## [v1.0.0](https://github.com/nf-core/pairgenomealign/releases/tag/1.0.0) "Sweet potato" - [August 27th, 2024] diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index f17fa34..ce20901 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,8 @@ report_comment: > - This report has been generated by the nf-core/pairgenomealign - analysis pipeline. For information about how to interpret these results, please see the - documentation. + This report has been generated by the nf-core/pairgenomealign analysis pipeline. For information about + how to interpret these results, please see the documentation. report_section_order: "nf-core-pairgenomealign-methods-description": order: -1000 diff --git a/conf/modules.config b/conf/modules.config index 883a68e..bdf2ffb 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -19,11 +19,11 @@ process { ] withName: CUTN_TARGET { - ext.args = { "-n 10" } + ext.args = { "-n 10 -p 100000" } } withName: CUTN_QUERY { - ext.args = { "-n 10" } + ext.args = { "-n 10 -p 100000" } } withName: 'ALIGNMENT_LASTDB' { @@ -33,6 +33,9 @@ process { // -c: soft-mask lowercase letters // -S2: index both strands ext.args = { "${params.softmask=="tantan" ? '-R01' : '-R11'} -c -u${params.seed} -S2" } + publishDir = [ + enabled: false + ] } withName: 'ALIGNMENT_SPLIT_O2M' { @@ -40,11 +43,6 @@ process { ext.args = { "--reverse -m${params.last_split_mismap}" } } - withName: 'ALIGNMENT_DOTPLOT_O2M' { - ext.prefix = { "${meta.id}.o2m_plt" } - ext.args = { "--rot2=h --sort2=3 --strands2=1 ${params.dotplot_options}" } - } - withName: 'ALIGNMENT_SPLIT_M2O' { ext.prefix = { "${meta.id}.m2o_aln" } ext.args = { "-m${params.last_split_mismap}" } @@ -56,12 +54,19 @@ process { } withName: 'ALIGNMENT_TRAIN' { + // If the training step fails, it is likely that there are no + // alignments to be found with the selected seed. Ignore and therefore + // skip alignments instead of crashing the pipeline. + errorStrategy = { task.exitStatus in (130..145) ? 'retry' : task.exitStatus == 1 ? 'ignore' : 'finish' } ext.args = { "--revsym ${params.lastal_args}" } } withName: 'ALIGNMENT_LASTAL_M2O' { ext.prefix = { "${meta.id}.m2o_aln" } ext.args = { "--split-f=MAF+ ${params.lastal_args} ${params.lastal_extr_args}" } + publishDir = [ + enabled: false + ] } withName: 'ALIGNMENT_LASTAL_M2M' { @@ -69,19 +74,26 @@ process { ext.args = { "${params.lastal_args} ${params.lastal_extr_args}" } } + withName: 'LAST_DOTPLOT' { + // The number one cause of failure is absence of alignments. + errorStrategy = { task.exitStatus in (130..145) ? 'retry' : task.exitStatus == 1 ? 'ignore' : 'finish' } + ext.args = { "--rot2=h --sort2=3 --strands2=1 ${params.dotplot_options}" } + } + withName: 'ALIGNMENT_DOTPLOT_O2O' { ext.prefix = { "${meta.id}.o2o_plt" } - ext.args = { "--rot2=h --sort2=3 --strands2=1 ${params.dotplot_options}" } } withName: 'ALIGNMENT_DOTPLOT_M2O' { ext.prefix = { "${meta.id}.m2o_plt" } - ext.args = { "--rot2=h --sort2=3 --strands2=1 ${params.dotplot_options}" } + } + + withName: 'ALIGNMENT_DOTPLOT_O2M' { + ext.prefix = { "${meta.id}.o2m_plt" } } withName: 'ALIGNMENT_DOTPLOT_M2M' { ext.prefix = { "${meta.id}.m2m_plt" } - ext.args = { "--rot2=h --sort2=3 --strands2=1 ${params.dotplot_options}" } } withName: 'MULTIQC' { diff --git a/docs/images/pairgenomealign-tubemap.png b/docs/images/pairgenomealign-tubemap.png index 6c12352..4933de2 100644 Binary files a/docs/images/pairgenomealign-tubemap.png and b/docs/images/pairgenomealign-tubemap.png differ diff --git a/docs/images/pairgenomealign-tubemap.svg b/docs/images/pairgenomealign-tubemap.svg index d467777..5a6b65f 100644 --- a/docs/images/pairgenomealign-tubemap.svg +++ b/docs/images/pairgenomealign-tubemap.svg @@ -2,14 +2,41 @@ + + id="layer1" + transform="translate(-3.6434059,-26.765467)"> + + d="M 315.534,100.25485 398.91387,89.980543" + id="path7" + sodipodi:nodetypes="cc" /> @@ -199,100 +233,100 @@ Indexseeds seqtk cutN + x="108.16129" + y="49.953014">seqtk cutN assemblyscan + x="57.712788" + y="49.718147">assembly-scan Target Target genome TrainParameters Alignquery(ies)on target - - - - - - - - Postprocessing Inputs QC Dotplots @@ -559,13 +559,13 @@ id="path66" /> Alignment @@ -671,7 +671,7 @@ font-size="209.87px" y="-15.552752" x="-517.08197" - class="st0 st1 st2">v1.0 + class="st0 st1 st2">v2.0 core/ + id="tspan73-3">core / Many-to-many + - + id="g122" + transform="translate(40.745847,-4.5199465)"> - - - - maf - - - - - maf - - - - - - - - One-tomany - - - png - - - + id="g9-8" + transform="matrix(0.12947131,0,0,-0.11466093,165.88324,338.78951)" + style="stroke-width:4.06863"> + + maf + + + + + maf + + + + + One-tomany + id="g97" + transform="matrix(0.12947131,0,0,-0.11466093,202.95513,400.95529)" + style="stroke-width:4.06862"> + + png + id="path97" + d="m 524.39681,1938.4637 h 47.5386 v -20.9983 c 0,-1.1338 0.91983,-2.0521 2.05208,-2.0521 h 18.12807 v -10.2809 h -67.71875 z m 67.71894,-95.336 h -67.71894 v 19.7705 h 67.71875 v -19.7705 z" + style="fill:#ffffff;stroke-width:4.06862" /> + + + - - - - maf - - - - - maf - - - - - - - - Many-tomany - - - png - - - + id="g9-5" + transform="matrix(0.12947131,0,0,-0.11466093,137.30822,338.78951)" + style="stroke-width:4.0583"> + + maf + + + + + maf + + + + + Many-tomany + + + png + + + + id="g90" + transform="matrix(0.12947131,0,0,-0.11466093,124.25538,336.00412)" + style="stroke-width:4.0583"> + id="path89" + d="m 481.30947,1753.4644 h -2.51304 v 12.1255 c 0,0.076 -0.0119,0.1518 -0.0221,0.2284 -0.004,0.4815 -0.15713,0.9544 -0.48679,1.3291 l -20.17826,23.0502 c -0.006,0.01 -0.0118,0.01 -0.0161,0.014 -0.12027,0.1343 -0.2605,0.246 -0.4087,0.3427 -0.0439,0.029 -0.0882,0.054 -0.13415,0.08 -0.12844,0.07 -0.26468,0.1282 -0.40491,0.1702 -0.038,0.011 -0.0722,0.026 -0.1102,0.036 -0.15238,0.036 -0.31066,0.059 -0.47103,0.059 h -49.59068 c -2.26452,0 -4.10417,-1.8417 -4.10417,-4.1044 v -33.3298 h -2.51247 c -3.23944,0 -5.86611,-2.6255 -5.86611,-5.8661 v -30.5048 c 0,-3.2383 2.62667,-5.8655 5.86611,-5.8655 h 2.51266 v -20.8815 c 0,-2.2624 1.83965,-4.1043 4.10417,-4.1043 h 67.71875 c 2.26242,0 4.10417,1.8417 4.10417,4.1043 v 20.8815 h 2.51304 c 3.23849,0 5.86573,2.6272 5.86573,5.8655 v 30.5039 c -1.9e-4,3.2404 -2.62743,5.8661 -5.86592,5.8661 z m -74.33596,33.33 h 47.5386 v -20.9983 c 0,-1.1338 0.91983,-2.0521 2.05208,-2.0521 h 18.12807 v -10.2809 h -67.71875 z m 67.71894,-95.336 h -67.71894 v 19.7705 h 67.71875 v -19.7705 z" + style="stroke-width:4.0583" /> + maf + + + + + + Many-toone + + id="g8-0-7" + transform="matrix(0.12947131,0,0,-0.11466093,123.60467,399.6544)" + style="stroke-width:4.06862"> + id="path5862-1-8" + d="m 598.73267,1905.0888 h -2.51304 v 12.1255 c 0,0.076 -0.0119,0.1518 -0.0221,0.2284 -0.004,0.4815 -0.15713,0.9544 -0.48679,1.3291 l -20.17826,23.0502 c -0.006,0.01 -0.0118,0.01 -0.0161,0.014 -0.12027,0.1343 -0.2605,0.246 -0.4087,0.3427 -0.0439,0.029 -0.0882,0.054 -0.13415,0.08 -0.12844,0.07 -0.26468,0.1282 -0.40491,0.1702 -0.038,0.011 -0.0722,0.026 -0.1102,0.036 -0.15238,0.036 -0.31066,0.059 -0.47103,0.059 h -49.59068 c -2.26452,0 -4.10417,-1.8417 -4.10417,-4.1044 v -33.3298 h -2.51247 c -3.23944,0 -5.86611,-2.6255 -5.86611,-5.8661 v -30.5048 c 0,-3.2383 2.62667,-5.8655 5.86611,-5.8655 h 2.51266 v -20.8815 c 0,-2.2624 1.83965,-4.1043 4.10417,-4.1043 h 67.71875 c 2.26242,0 4.10417,1.8417 4.10417,4.1043 v 20.8815 h 2.51304 c 3.23849,0 5.86573,2.6272 5.86573,5.8655 v 30.5039 c -1.9e-4,3.2404 -2.62743,5.8661 -5.86592,5.8661 z m -74.33596,33.33 h 47.5386 v -20.9983 c 0,-1.1338 0.91983,-2.0521 2.05208,-2.0521 h 18.12807 v -10.2809 h -67.71875 z m 67.71894,-95.336 h -67.71894 v 19.7705 h 67.71875 v -19.7705 z" + style="stroke-width:4.06862" /> maf + x="519.43506" + y="-1878.6698" + id="tspan2-4-7" + style="font-size:27.538px;stroke-width:4.06862px"> png + id="path6276-4-0-5" + d="m 524.39681,1938.4637 h 47.5386 v -20.9983 c 0,-1.1338 0.91983,-2.0521 2.05208,-2.0521 h 18.12807 v -10.2809 h -67.71875 z m 67.71894,-95.336 h -67.71894 v 19.7705 h 67.71875 v -19.7705 z" + style="fill:#ffffff;stroke-width:4.06862" /> - - - - Many-toone - - - png - - - - - + + png - - + id="tspan102" + style="font-size:27.538px;stroke-width:4.06862px"> png + - - - - - - + d="m 149.94309,103.01241 -0.0449,-74.237059" + id="path108" + sodipodi:nodetypes="cc" /> + + + + + + + + + - - + id="g9" + transform="matrix(0.12947131,0,0,-0.11466093,83.5977,335.96673)" + style="stroke-width:4.0583"> + id="path5862-69" + d="m 481.30947,1753.4644 h -2.51304 v 12.1255 c 0,0.076 -0.0119,0.1518 -0.0221,0.2284 -0.004,0.4815 -0.15713,0.9544 -0.48679,1.3291 l -20.17826,23.0502 c -0.006,0.01 -0.0118,0.01 -0.0161,0.014 -0.12027,0.1343 -0.2605,0.246 -0.4087,0.3427 -0.0439,0.029 -0.0882,0.054 -0.13415,0.08 -0.12844,0.07 -0.26468,0.1282 -0.40491,0.1702 -0.038,0.011 -0.0722,0.026 -0.1102,0.036 -0.15238,0.036 -0.31066,0.059 -0.47103,0.059 h -49.59068 c -2.26452,0 -4.10417,-1.8417 -4.10417,-4.1044 v -33.3298 h -2.51247 c -3.23944,0 -5.86611,-2.6255 -5.86611,-5.8661 v -30.5048 c 0,-3.2383 2.62667,-5.8655 5.86611,-5.8655 h 2.51266 v -20.8815 c 0,-2.2624 1.83965,-4.1043 4.10417,-4.1043 h 67.71875 c 2.26242,0 4.10417,1.8417 4.10417,4.1043 v 20.8815 h 2.51304 c 3.23849,0 5.86573,2.6272 5.86573,5.8655 v 30.5039 c -1.9e-4,3.2404 -2.62743,5.8661 -5.86592,5.8661 z m -74.33596,33.33 h 47.5386 v -20.9983 c 0,-1.1338 0.91983,-2.0521 2.05208,-2.0521 h 18.12807 v -10.2809 h -67.71875 z m 67.71894,-95.336 h -67.71894 v 19.7705 h 67.71875 v -19.7705 z" + style="stroke-width:4.0583" /> + maf + id="path6276-4-9" + d="m 406.97361,1786.8393 h 47.5386 v -20.9983 c 0,-1.1338 0.91983,-2.0521 2.05208,-2.0521 h 18.12807 v -10.2809 h -67.71875 z m 67.71894,-95.336 h -67.71894 v 19.7705 h 67.71875 v -19.7705 z" + style="fill:#ffffff;stroke-width:4.0583" /> + + + One-toone + - + id="g143" + transform="translate(-40.91903,0.88142314)"> + id="g140" + transform="matrix(0.12947131,0,0,-0.11466093,123.60467,399.6544)" + style="stroke-width:4.06862"> + id="path139" + d="m 598.73267,1905.0888 h -2.51304 v 12.1255 c 0,0.076 -0.0119,0.1518 -0.0221,0.2284 -0.004,0.4815 -0.15713,0.9544 -0.48679,1.3291 l -20.17826,23.0502 c -0.006,0.01 -0.0118,0.01 -0.0161,0.014 -0.12027,0.1343 -0.2605,0.246 -0.4087,0.3427 -0.0439,0.029 -0.0882,0.054 -0.13415,0.08 -0.12844,0.07 -0.26468,0.1282 -0.40491,0.1702 -0.038,0.011 -0.0722,0.026 -0.1102,0.036 -0.15238,0.036 -0.31066,0.059 -0.47103,0.059 h -49.59068 c -2.26452,0 -4.10417,-1.8417 -4.10417,-4.1044 v -33.3298 h -2.51247 c -3.23944,0 -5.86611,-2.6255 -5.86611,-5.8661 v -30.5048 c 0,-3.2383 2.62667,-5.8655 5.86611,-5.8655 h 2.51266 v -20.8815 c 0,-2.2624 1.83965,-4.1043 4.10417,-4.1043 h 67.71875 c 2.26242,0 4.10417,1.8417 4.10417,4.1043 v 20.8815 h 2.51304 c 3.23849,0 5.86573,2.6272 5.86573,5.8655 v 30.5039 c -1.9e-4,3.2404 -2.62743,5.8661 -5.86592,5.8661 z m -74.33596,33.33 h 47.5386 v -20.9983 c 0,-1.1338 0.91983,-2.0521 2.05208,-2.0521 h 18.12807 v -10.2809 h -67.71875 z m 67.71894,-95.336 h -67.71894 v 19.7705 h 67.71875 v -19.7705 z" + style="stroke-width:4.06862" /> maf + x="519.43506" + y="-1878.6698" + id="tspan139" + style="font-size:27.538px;stroke-width:4.06862px"> png + id="path140" + d="m 524.39681,1938.4637 h 47.5386 v -20.9983 c 0,-1.1338 0.91983,-2.0521 2.05208,-2.0521 h 18.12807 v -10.2809 h -67.71875 z m 67.71894,-95.336 h -67.71894 v 19.7705 h 67.71875 v -19.7705 z" + style="fill:#ffffff;stroke-width:4.06862" /> - - - One-toone - - - - png - - - - - + + png - - + id="tspan141" + style="font-size:27.538px;stroke-width:4.06862px"> png + diff --git a/docs/output.md b/docs/output.md index 5135c00..2663b4e 100644 --- a/docs/output.md +++ b/docs/output.md @@ -38,9 +38,10 @@ Basic statistics on nucleotide content and contig length are collected for align - `alignment/` - `*.train` is the alignment parameters computed by `last-train` (optional) - `*.m2m_aln.maf.gz` is the _**many-to-many**_ alignment between _target_ and _query_ genomes. (optional through the `--m2m` option) - - `*.m2o_aln.maf.gz` is the _**many-to-one**_ alignment regions of the _target_ genome are matched at most once by the _query_ genome. + - `*.m2o_aln.maf.gz` is the _**many-to-one**_ alignment regions of the _target_ genome are matched at most once by the _query_ genome. (optional through the `--m2m` option) + - `*.o2m_aln.maf.gz` is the _**one-to-many**_ alignment between the _target_ and _query_ genomes. (optional through the `--m2m` option) - `*.o2o_aln.maf.gz` is the _**one-to-one**_ alignment between the _target_ and _query_ genomes. - - `*.o2m_aln.maf.gz` is the _**one-to-many**_ alignment between the _target_ and _query_ genomes (optional). + - For each MAF file there will be an additional file in a format such as Axt, Chain, GFF or SAM if you used the `--export_aln_to` parameter. These files are always compressed. diff --git a/docs/usage.md b/docs/usage.md index 9ad779a..aac08e4 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -42,6 +42,7 @@ The parameters are described in details in the [online documentation](https://nf - `--m2m` enables the computation of the _many-to-many_ alignment, which is the only one to be useful in the case of self-alignments, but which on the other hand can exhaust computing resources in the case of very large genomes. - Likewise, when comparing very similar and repetitive genomes (like two vertebrate genomes from the same species), any dotplot other than for the _one-to-one_ alignment will be heavy to compute and useless anyway, because the whole page will be filled with dots. The `--skip_dotplot_*` options are there to solve that problem. +- Users who need a different format than MAF can check the `--export_aln_to` parameter to generate extra files. ## Fixed arguments (taken from the [LAST cookbook][] and the [LAST tuning][] manual) diff --git a/modules.json b/modules.json index 86dbd4a..2cd5a3b 100644 --- a/modules.json +++ b/modules.json @@ -7,42 +7,47 @@ "nf-core": { "assemblyscan": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "gfastats": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "last/dotplot": { "branch": "master", - "git_sha": "23a928df77b20861eac09ca998029ad47a7155cb", + "git_sha": "b651d72024027c922b5014aec80add091df63831", "installed_by": ["modules"] }, "last/lastal": { "branch": "master", - "git_sha": "882e20c8a18270f0d391a931cef4b80d1a0eeea5", + "git_sha": "b651d72024027c922b5014aec80add091df63831", "installed_by": ["modules"] }, "last/lastdb": { "branch": "master", - "git_sha": "3fa9017b55b9c26e1c327ca189d3942b55f4d496", + "git_sha": "b651d72024027c922b5014aec80add091df63831", + "installed_by": ["modules"] + }, + "last/mafconvert": { + "branch": "master", + "git_sha": "b651d72024027c922b5014aec80add091df63831", "installed_by": ["modules"] }, "last/mafswap": { "branch": "master", - "git_sha": "3fa9017b55b9c26e1c327ca189d3942b55f4d496", + "git_sha": "b651d72024027c922b5014aec80add091df63831", "installed_by": ["modules"] }, "last/split": { "branch": "master", - "git_sha": "882e20c8a18270f0d391a931cef4b80d1a0eeea5", + "git_sha": "b651d72024027c922b5014aec80add091df63831", "installed_by": ["modules"] }, "last/train": { "branch": "master", - "git_sha": "4e5f4687318f24ba944a13609d3ea6ebd890737d", + "git_sha": "29a99284f30fa0c5fd0f5dca192c94b80de0be30", "installed_by": ["modules"] }, "multiqc": { @@ -52,7 +57,7 @@ }, "seqtk/cutn": { "branch": "master", - "git_sha": "7f88aae93c69586c0789322b77743ee0ef469502", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] } } diff --git a/modules/nf-core/assemblyscan/environment.yml b/modules/nf-core/assemblyscan/environment.yml index 34a02bc..3751ff0 100644 --- a/modules/nf-core/assemblyscan/environment.yml +++ b/modules/nf-core/assemblyscan/environment.yml @@ -1,7 +1,5 @@ -name: assemblyscan channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::assembly-scan=0.4.1 diff --git a/modules/nf-core/assemblyscan/meta.yml b/modules/nf-core/assemblyscan/meta.yml index 9ff7e3f..d8bcf06 100644 --- a/modules/nf-core/assemblyscan/meta.yml +++ b/modules/nf-core/assemblyscan/meta.yml @@ -3,6 +3,8 @@ description: Assembly summary statistics in JSON format keywords: - assembly - statistics + - summary + - json tools: - assemblyscan: description: Assembly summary statistics in JSON format @@ -10,30 +12,33 @@ tools: documentation: https://github.com/rpetit3/assembly-scan tool_dev_url: https://github.com/rpetit3/assembly-scan licence: ["MIT"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - assembly: - type: file - description: FASTA file for a given assembly - pattern: "*.fasta" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - assembly: + type: file + description: FASTA file for a given assembly + pattern: "*.fasta" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - json: - type: file - description: Assembly statistics in JSON format - pattern: "*.json" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: Assembly statistics in JSON format + pattern: "*.json" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@sateeshperi" - "@mjcipriano" diff --git a/modules/nf-core/assemblyscan/tests/main.nf.test b/modules/nf-core/assemblyscan/tests/main.nf.test index 7e5a7bb..7ca4444 100644 --- a/modules/nf-core/assemblyscan/tests/main.nf.test +++ b/modules/nf-core/assemblyscan/tests/main.nf.test @@ -16,7 +16,7 @@ nextflow_process { process { """ input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } diff --git a/modules/nf-core/gfastats/environment.yml b/modules/nf-core/gfastats/environment.yml index 1c875ce..b47bbdb 100644 --- a/modules/nf-core/gfastats/environment.yml +++ b/modules/nf-core/gfastats/environment.yml @@ -1,7 +1,5 @@ -name: gfastats channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::gfastats=1.3.6 diff --git a/modules/nf-core/gfastats/meta.yml b/modules/nf-core/gfastats/meta.yml index d0e97a8..a621343 100644 --- a/modules/nf-core/gfastats/meta.yml +++ b/modules/nf-core/gfastats/meta.yml @@ -16,56 +16,67 @@ tools: documentation: "https://github.com/vgl-hub/gfastats/tree/main/instructions" tool_dev_url: "https://github.com/vgl-hub/gfastats" doi: "10.1093/bioinformatics/btac460" - licence: "['MIT']" + licence: ["MIT"] + identifier: biotools:gfastats input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - assembly: - type: file - description: Draft assembly file - pattern: "*.{fasta,fastq,gfa}(.gz)?" - - out_fmt: - type: string - description: Output format (fasta, fastq, gfa) - - genome_size: - type: integer - description: estimated genome size (bp) for NG* statistics (optional). - - target: - type: string - description: target specific sequence by header, optionally with coordinates (optional). - - agpfile: - type: file - description: converts input agp to path and replaces existing paths. - - include_bed: - type: file - description: generates output on a subset list of headers or coordinates in 0-based bed format. - - exclude_bed: - type: file - description: opposite of --include-bed. They can be combined (no coordinates). - - instructions: - type: file - description: set of instructions provided as an ordered list. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - assembly: + type: file + description: Draft assembly file + pattern: "*.{fasta,fastq,gfa}(.gz)?" + - - out_fmt: + type: string + description: Output format (fasta, fastq, gfa) + - - genome_size: + type: integer + description: estimated genome size (bp) for NG* statistics (optional). + - - target: + type: string + description: target specific sequence by header, optionally with coordinates + (optional). + - - agpfile: + type: file + description: converts input agp to path and replaces existing paths. + - - include_bed: + type: file + description: generates output on a subset list of headers or coordinates in + 0-based bed format. + - - exclude_bed: + type: file + description: opposite of --include-bed. They can be combined (no coordinates). + - - instructions: + type: file + description: set of instructions provided as an ordered list. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - assembly_summary: - type: file - description: Assembly summary statistics file - pattern: "*.assembly_summary" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.assembly_summary": + type: file + description: Assembly summary statistics file + pattern: "*.assembly_summary" - assembly: - type: file - description: The assembly as modified by gfastats - pattern: "*.{fasta,fastq,gfa}.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${out_fmt}.gz": + type: file + description: The assembly as modified by gfastats + pattern: "*.{fasta,fastq,gfa}.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@mahesh-panchal" maintainers: diff --git a/modules/nf-core/last/dotplot/environment.yml b/modules/nf-core/last/dotplot/environment.yml index b013d4d..7db722d 100644 --- a/modules/nf-core/last/dotplot/environment.yml +++ b/modules/nf-core/last/dotplot/environment.yml @@ -1,9 +1,6 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "last_dotplot" channels: - conda-forge - bioconda - - defaults + dependencies: - - "bioconda::last=1542" + - bioconda::last=1608 diff --git a/modules/nf-core/last/dotplot/main.nf b/modules/nf-core/last/dotplot/main.nf index 93f311d..963965b 100644 --- a/modules/nf-core/last/dotplot/main.nf +++ b/modules/nf-core/last/dotplot/main.nf @@ -3,9 +3,9 @@ process LAST_DOTPLOT { label 'process_low' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/last:1542--h43eeafb_1' : - 'biocontainers/last:1542--h43eeafb_1' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/db/db0b5de918238f07ec1ca668be942397da85e26aa582f8927ac37c70896303cf/data' + : 'community.wave.seqera.io/library/last:1608--f41c047f7dc37e30'}" input: tuple val(meta), path(maf), path(annot_b) diff --git a/modules/nf-core/last/dotplot/meta.yml b/modules/nf-core/last/dotplot/meta.yml index 5243743..58579b9 100644 --- a/modules/nf-core/last/dotplot/meta.yml +++ b/modules/nf-core/last/dotplot/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "last_dotplot" description: Makes a dotplot (Oxford Grid) of pair-wise sequence alignments @@ -15,45 +14,61 @@ tools: documentation: "https://gitlab.com/mcfrith/last/-/blob/main/doc/last-dotplot.rst" tool_dev_url: "https://gitlab.com/mcfrith/last" licence: ["GPL v3-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - maf: - type: file - description: Multiple Aligment Format (MAF) file, compressed with gzip - pattern: "*.{maf.gz}" - - format: - type: string - description: Output format (PNG or GIF). - - annot_a: - type: file - description: Annotation file in BED, Repeamasker, genePred or AGP format for the first (horizontal) sequence - pattern: "*.{bed,bed.gz,out,out.gz,rmsk.txt,rmsk.txt.gz,genePred,genePred.gz,gff,gff.gz,gtf,gtf.gz,gap.txt,gap.txt.gz}" - - annot_b: - type: file - description: Annotation file in BED, Repeamasker, genePred or AGP format for the second (vertical) sequence - pattern: "*.{bed,bed.gz,out,out.gz,rmsk.txt,rmsk.txt.gz,genePred,genePred.gz,gff,gff.gz,gtf,gtf.gz,gap.txt,gap.txt.gz}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - maf: + type: file + description: Multiple Alignment Format (MAF) file, compressed with gzip + pattern: "*.{maf.gz}" + - annot_b: + type: file + description: Annotation file in BED, Repeamasker, genePred or AGP format for + the second (vertical) sequence + pattern: "*.{bed,bed.gz,out,out.gz,rmsk.txt,rmsk.txt.gz,genePred,genePred.gz,gff,gff.gz,gtf,gtf.gz,gap.txt,gap.txt.gz}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample2', single_end:false ]` + - annot_a: + type: file + description: Annotation file in BED, Repeamasker, genePred or AGP format for + the first (horizontal) sequence + pattern: "*.{bed,bed.gz,out,out.gz,rmsk.txt,rmsk.txt.gz,genePred,genePred.gz,gff,gff.gz,gtf,gtf.gz,gap.txt,gap.txt.gz}" + - - format: + type: string + description: Output format (PNG or GIF). output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - png: - type: file - description: Pairwise alignment dot plot image, in PNG format. - pattern: "*.png" - gif: - type: file - description: Pairwise alignment dot plot image, in GIF format. - pattern: "*.gif" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.gif": + type: file + description: Pairwise alignment dot plot image, in GIF format. + pattern: "*.gif" + - png: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.png": + type: file + description: Pairwise alignment dot plot image, in PNG format. + pattern: "*.png" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@charles-plessy" maintainers: diff --git a/modules/nf-core/last/dotplot/tests/main.nf.test b/modules/nf-core/last/dotplot/tests/main.nf.test index fa5f767..3ea4038 100644 --- a/modules/nf-core/last/dotplot/tests/main.nf.test +++ b/modules/nf-core/last/dotplot/tests/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { . collectFile(name: 'dummy_annot_b.bed', newLine: true) . map { [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true), it ] } input[1] = channel.of('MT192765.1\t1000\t1010') @@ -46,7 +46,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true), [] ] input[1] = [ [id: 'test'], [] ] @@ -73,7 +73,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true), [] ] input[1] = [ [id: 'test'], [] ] diff --git a/modules/nf-core/last/dotplot/tests/main.nf.test.snap b/modules/nf-core/last/dotplot/tests/main.nf.test.snap index cff5b56..3676f0f 100644 --- a/modules/nf-core/last/dotplot/tests/main.nf.test.snap +++ b/modules/nf-core/last/dotplot/tests/main.nf.test.snap @@ -2,37 +2,37 @@ "sarscov2 - contigs - genome - gif": { "content": [ [ - "versions.yml:md5,8a86fe4a0227c77ecfcc0aa21a3ece07" + "versions.yml:md5,143ef48514afd0c39da64d8f11fd18f7" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-06-27T09:19:30.116358" + "timestamp": "2025-01-21T09:18:32.629314" }, "sarscov2 - contigs - genome - png - stub": { "content": [ [ - "versions.yml:md5,8a86fe4a0227c77ecfcc0aa21a3ece07" + "versions.yml:md5,143ef48514afd0c39da64d8f11fd18f7" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-06-27T09:19:46.588825" + "timestamp": "2025-01-21T09:18:48.084683" }, "sarscov2 - contigs - genome - png": { "content": [ [ - "versions.yml:md5,8a86fe4a0227c77ecfcc0aa21a3ece07" + "versions.yml:md5,143ef48514afd0c39da64d8f11fd18f7" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-06-27T09:19:08.959252" + "timestamp": "2025-01-21T09:18:17.971205" } } \ No newline at end of file diff --git a/modules/nf-core/last/lastal/environment.yml b/modules/nf-core/last/lastal/environment.yml index c3a87c6..7db722d 100644 --- a/modules/nf-core/last/lastal/environment.yml +++ b/modules/nf-core/last/lastal/environment.yml @@ -1,9 +1,6 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "last_lastal" channels: - conda-forge - bioconda - - defaults + dependencies: - - "bioconda::last=1542" + - bioconda::last=1608 diff --git a/modules/nf-core/last/lastal/main.nf b/modules/nf-core/last/lastal/main.nf index 560ada1..9cd11d4 100644 --- a/modules/nf-core/last/lastal/main.nf +++ b/modules/nf-core/last/lastal/main.nf @@ -3,9 +3,9 @@ process LAST_LASTAL { label 'process_high' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/last:1542--h43eeafb_1' : - 'biocontainers/last:1542--h43eeafb_1' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/db/db0b5de918238f07ec1ca668be942397da85e26aa582f8927ac37c70896303cf/data' + : 'community.wave.seqera.io/library/last:1608--f41c047f7dc37e30'}" input: tuple val(meta), path(fastx), path (param_file) diff --git a/modules/nf-core/last/lastal/meta.yml b/modules/nf-core/last/lastal/meta.yml index c14fa27..ed6610f 100644 --- a/modules/nf-core/last/lastal/meta.yml +++ b/modules/nf-core/last/lastal/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "last_lastal" description: Aligns query sequences to target sequences indexed with lastdb @@ -14,42 +13,51 @@ tools: documentation: "https://gitlab.com/mcfrith/last/-/blob/main/doc/last-train.rst" tool_dev_url: "https://gitlab.com/mcfrith/last" licence: ["GPL v3-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - fastx: - type: file - description: FASTA/FASTQ file - pattern: "*.{fasta,fastq}" - - param_file: - type: file - description: Trained parameter file - pattern: "*.train" - - index: - type: directory - description: Directory containing the files of the LAST index - pattern: "lastdb/" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - fastx: + type: file + description: FASTA/FASTQ file + pattern: "*.{fasta,fastq}" + - param_file: + type: file + description: Trained parameter file + pattern: "*.train" + - - index: + type: directory + description: Directory containing the files of the LAST index + pattern: "lastdb/" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - maf: - type: file - description: Gzipped MAF (Multiple Alignment Format) file - pattern: "*.{maf.gz}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.maf.gz": + type: file + description: Gzipped MAF (Multiple Alignment Format) file + pattern: "*.{maf.gz}" - multiqc: - type: file - description: Alignment summary for MultiQC - pattern: "*.tsv" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.tsv": + type: file + description: Alignment summary for MultiQC + pattern: "*.tsv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@charles-plessy" maintainers: diff --git a/modules/nf-core/last/lastal/tests/main.nf.test b/modules/nf-core/last/lastal/tests/main.nf.test index b5c0730..c98f07f 100644 --- a/modules/nf-core/last/lastal/tests/main.nf.test +++ b/modules/nf-core/last/lastal/tests/main.nf.test @@ -20,7 +20,7 @@ nextflow_process { """ input[0] = [ [id:'genome'], // meta map - file(params.test_data['sarscov2']['genome']['lastdb_tar_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/lastdb.tar.gz', checkIfExists: true) ] """ } @@ -32,7 +32,7 @@ nextflow_process { """ input[0] = [ [ id:'contigs', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), [] ] input[1] = UNTAR.out.untar.map{ it[1] } @@ -58,7 +58,7 @@ nextflow_process { """ input[0] = [ [id:'genome'], // meta map - file(params.test_data['sarscov2']['genome']['lastdb_tar_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/lastdb.tar.gz', checkIfExists: true) ] """ } @@ -70,8 +70,8 @@ nextflow_process { """ input[0] = [ [ id:'contigs', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['contigs_genome_par'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.par', checkIfExists: true) ] input[1] = UNTAR.out.untar.map{ it[1] } """ @@ -98,7 +98,7 @@ nextflow_process { """ input[0] = [ [id:'genome'], // meta map - file(params.test_data['sarscov2']['genome']['lastdb_tar_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/lastdb.tar.gz', checkIfExists: true) ] """ } @@ -110,7 +110,7 @@ nextflow_process { """ input[0] = [ [ id:'contigs', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), [] ] input[1] = UNTAR.out.untar.map{ it[1] } diff --git a/modules/nf-core/last/lastal/tests/main.nf.test.snap b/modules/nf-core/last/lastal/tests/main.nf.test.snap index 9245a96..50cc114 100644 --- a/modules/nf-core/last/lastal/tests/main.nf.test.snap +++ b/modules/nf-core/last/lastal/tests/main.nf.test.snap @@ -8,7 +8,7 @@ "id": "contigs", "single_end": false }, - "contigs.maf.gz:md5,902274b72657f62d270d284dc211aa7f" + "contigs.maf.gz:md5,3729bf099fecb196b8b5b49fc3487b56" ] ], "1": [ @@ -17,11 +17,11 @@ "id": "contigs", "single_end": false }, - "contigs.tsv:md5,f028e69bd64e54080b9a03fd809cba74" + "contigs.tsv:md5,38d678f139ea70765ce29966c743487b" ] ], "2": [ - "versions.yml:md5,e0a425d7cbca674252a1e4328b247ca2" + "versions.yml:md5,23bb5d428e31caed85e991482fa9f967" ], "maf": [ [ @@ -29,7 +29,7 @@ "id": "contigs", "single_end": false }, - "contigs.maf.gz:md5,902274b72657f62d270d284dc211aa7f" + "contigs.maf.gz:md5,3729bf099fecb196b8b5b49fc3487b56" ] ], "multiqc": [ @@ -38,19 +38,19 @@ "id": "contigs", "single_end": false }, - "contigs.tsv:md5,f028e69bd64e54080b9a03fd809cba74" + "contigs.tsv:md5,38d678f139ea70765ce29966c743487b" ] ], "versions": [ - "versions.yml:md5,e0a425d7cbca674252a1e4328b247ca2" + "versions.yml:md5,23bb5d428e31caed85e991482fa9f967" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-07-02T17:57:48.589408" + "timestamp": "2025-01-22T10:13:12.144354" }, "sarscov2 - contigs - genome - stub": { "content": [ @@ -74,7 +74,7 @@ ] ], "2": [ - "versions.yml:md5,e0a425d7cbca674252a1e4328b247ca2" + "versions.yml:md5,23bb5d428e31caed85e991482fa9f967" ], "maf": [ [ @@ -95,15 +95,15 @@ ] ], "versions": [ - "versions.yml:md5,e0a425d7cbca674252a1e4328b247ca2" + "versions.yml:md5,23bb5d428e31caed85e991482fa9f967" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-07-02T17:58:30.521811" + "timestamp": "2025-01-22T10:15:14.385573" }, "sarscov2 - contigs - genome - withparams": { "content": [ @@ -114,7 +114,7 @@ "id": "contigs", "single_end": false }, - "contigs.maf.gz:md5,8cb97b6daa34dbf9c723a2c4a984992d" + "contigs.maf.gz:md5,b2ea256c960899555dbc3fd1acbc43b4" ] ], "1": [ @@ -127,7 +127,7 @@ ] ], "2": [ - "versions.yml:md5,e0a425d7cbca674252a1e4328b247ca2" + "versions.yml:md5,23bb5d428e31caed85e991482fa9f967" ], "maf": [ [ @@ -135,7 +135,7 @@ "id": "contigs", "single_end": false }, - "contigs.maf.gz:md5,8cb97b6daa34dbf9c723a2c4a984992d" + "contigs.maf.gz:md5,b2ea256c960899555dbc3fd1acbc43b4" ] ], "multiqc": [ @@ -148,14 +148,14 @@ ] ], "versions": [ - "versions.yml:md5,e0a425d7cbca674252a1e4328b247ca2" + "versions.yml:md5,23bb5d428e31caed85e991482fa9f967" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-07-02T17:58:09.677672" + "timestamp": "2025-01-22T10:14:13.280646" } } \ No newline at end of file diff --git a/modules/nf-core/last/lastdb/environment.yml b/modules/nf-core/last/lastdb/environment.yml index 9e98a10..7db722d 100644 --- a/modules/nf-core/last/lastdb/environment.yml +++ b/modules/nf-core/last/lastdb/environment.yml @@ -1,9 +1,6 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "last_lastdb" channels: - conda-forge - bioconda - - defaults + dependencies: - - "bioconda::last=1542" + - bioconda::last=1608 diff --git a/modules/nf-core/last/lastdb/main.nf b/modules/nf-core/last/lastdb/main.nf index 856b364..c69eeb6 100644 --- a/modules/nf-core/last/lastdb/main.nf +++ b/modules/nf-core/last/lastdb/main.nf @@ -3,9 +3,9 @@ process LAST_LASTDB { label 'process_medium' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/last:1542--h43eeafb_1' : - 'biocontainers/last:1542--h43eeafb_1' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/db/db0b5de918238f07ec1ca668be942397da85e26aa582f8927ac37c70896303cf/data' + : 'community.wave.seqera.io/library/last:1608--f41c047f7dc37e30'}" input: tuple val(meta), path(fastx) diff --git a/modules/nf-core/last/lastdb/meta.yml b/modules/nf-core/last/lastdb/meta.yml index c3b499e..5c7be37 100644 --- a/modules/nf-core/last/lastdb/meta.yml +++ b/modules/nf-core/last/lastdb/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "last_lastdb" description: Prepare sequences for subsequent alignment with lastal. @@ -14,31 +13,34 @@ tools: documentation: "https://gitlab.com/mcfrith/last/-/blob/main/doc/lastdb.rst" tool_dev_url: "https://gitlab.com/mcfrith/last" licence: ["GPL v3-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - fastx: - type: file - description: > - Sequence file in FASTA or FASTQ format. May be compressed with gzip. - pattern: "*.{fasta,fasta.gz,fastq,fastq.gz}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - fastx: + type: file + description: > + Sequence file in FASTA or FASTQ format. May be compressed with gzip. + pattern: "*.{fasta,fasta.gz,fastq,fastq.gz}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - index: - type: directory - description: directory containing the files of the LAST index - pattern: "lastdb/" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - lastdb: + type: directory + description: directory containing the files of the LAST index + pattern: "lastdb/" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@charles-plessy" maintainers: diff --git a/modules/nf-core/last/lastdb/tests/main.nf.test b/modules/nf-core/last/lastdb/tests/main.nf.test index 2e3aa2b..1d5d403 100644 --- a/modules/nf-core/last/lastdb/tests/main.nf.test +++ b/modules/nf-core/last/lastdb/tests/main.nf.test @@ -17,7 +17,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -39,7 +39,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] """ } @@ -62,7 +62,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } diff --git a/modules/nf-core/last/lastdb/tests/main.nf.test.snap b/modules/nf-core/last/lastdb/tests/main.nf.test.snap index ee2a113..41f3116 100644 --- a/modules/nf-core/last/lastdb/tests/main.nf.test.snap +++ b/modules/nf-core/last/lastdb/tests/main.nf.test.snap @@ -19,7 +19,7 @@ ] ], "1": [ - "versions.yml:md5,c7fa6cfa252a61c600a73d6341d47557" + "versions.yml:md5,8b5e6ad969c610f963ab9ec6bddea60e" ], "index": [ [ @@ -38,15 +38,15 @@ ] ], "versions": [ - "versions.yml:md5,c7fa6cfa252a61c600a73d6341d47557" + "versions.yml:md5,8b5e6ad969c610f963ab9ec6bddea60e" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-06-06T16:42:08.735561" + "timestamp": "2025-01-22T10:16:49.233187" }, "sarscov2 - fastq gzipped": { "content": [ @@ -59,7 +59,7 @@ [ "test.bck:md5,05b14d8ac418b3193d9cc921086cea05", "test.des:md5,26ab49015cc572172b9efa50fc5190bc", - "test.prj:md5,b50003077b7c7357fb8bacdf6f87653d", + "test.prj:md5,a9e1fa5d50afd39e57aaab489ca2b0a2", "test.sds:md5,d3deb4c985081c9f5ad6684d405bd20b", "test.ssp:md5,5c17139a9022b0cb97f007146fa1c6da", "test.suf:md5,9ac359afa86a8964d81a87a1d4f05ef0", @@ -68,7 +68,7 @@ ] ], "1": [ - "versions.yml:md5,c7fa6cfa252a61c600a73d6341d47557" + "versions.yml:md5,8b5e6ad969c610f963ab9ec6bddea60e" ], "index": [ [ @@ -78,7 +78,7 @@ [ "test.bck:md5,05b14d8ac418b3193d9cc921086cea05", "test.des:md5,26ab49015cc572172b9efa50fc5190bc", - "test.prj:md5,b50003077b7c7357fb8bacdf6f87653d", + "test.prj:md5,a9e1fa5d50afd39e57aaab489ca2b0a2", "test.sds:md5,d3deb4c985081c9f5ad6684d405bd20b", "test.ssp:md5,5c17139a9022b0cb97f007146fa1c6da", "test.suf:md5,9ac359afa86a8964d81a87a1d4f05ef0", @@ -87,15 +87,15 @@ ] ], "versions": [ - "versions.yml:md5,c7fa6cfa252a61c600a73d6341d47557" + "versions.yml:md5,8b5e6ad969c610f963ab9ec6bddea60e" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-06-06T16:41:07.4512" + "timestamp": "2025-01-22T10:16:13.506203" }, "sarscov2 - fasta": { "content": [ @@ -108,7 +108,7 @@ [ "test.bck:md5,157526d333b88523cb15ac4efe00738f", "test.des:md5,3a9ea6d336e113a74d7fdca5e7b623fc", - "test.prj:md5,b937b1565cb4c983c8fcd3780d3e151e", + "test.prj:md5,10b52b9f5adad3b9afd3d73897e588f9", "test.sds:md5,e7729db27ac7a5a109c9d48cfcdc9015", "test.ssp:md5,53524efdea3d8989201419a29e81ec1f", "test.suf:md5,ef7482260705bb8146acbbbdce6c0068", @@ -117,7 +117,7 @@ ] ], "1": [ - "versions.yml:md5,c7fa6cfa252a61c600a73d6341d47557" + "versions.yml:md5,8b5e6ad969c610f963ab9ec6bddea60e" ], "index": [ [ @@ -127,7 +127,7 @@ [ "test.bck:md5,157526d333b88523cb15ac4efe00738f", "test.des:md5,3a9ea6d336e113a74d7fdca5e7b623fc", - "test.prj:md5,b937b1565cb4c983c8fcd3780d3e151e", + "test.prj:md5,10b52b9f5adad3b9afd3d73897e588f9", "test.sds:md5,e7729db27ac7a5a109c9d48cfcdc9015", "test.ssp:md5,53524efdea3d8989201419a29e81ec1f", "test.suf:md5,ef7482260705bb8146acbbbdce6c0068", @@ -136,14 +136,14 @@ ] ], "versions": [ - "versions.yml:md5,c7fa6cfa252a61c600a73d6341d47557" + "versions.yml:md5,8b5e6ad969c610f963ab9ec6bddea60e" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-06-06T16:40:05.874954" + "timestamp": "2025-01-22T10:15:48.482722" } } \ No newline at end of file diff --git a/modules/nf-core/last/mafconvert/environment.yml b/modules/nf-core/last/mafconvert/environment.yml new file mode 100644 index 0000000..7db722d --- /dev/null +++ b/modules/nf-core/last/mafconvert/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::last=1608 diff --git a/modules/nf-core/last/mafconvert/main.nf b/modules/nf-core/last/mafconvert/main.nf new file mode 100644 index 0000000..3f02ed7 --- /dev/null +++ b/modules/nf-core/last/mafconvert/main.nf @@ -0,0 +1,55 @@ +process LAST_MAFCONVERT { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/db/db0b5de918238f07ec1ca668be942397da85e26aa582f8927ac37c70896303cf/data' + : 'community.wave.seqera.io/library/last:1608--f41c047f7dc37e30'}" + + input: + tuple val(meta), path(maf) + val(format) + + output: + tuple val(meta), path("*.axt.gz"), optional:true, emit: axt_gz + tuple val(meta), path("*.blast.gz"), optional:true, emit: blast_gz + tuple val(meta), path("*.blasttab.gz"), optional:true, emit: blasttab_gz + tuple val(meta), path("*.chain.gz"), optional:true, emit: chain_gz + tuple val(meta), path("*.gff.gz"), optional:true, emit: gff_gz + tuple val(meta), path("*.html.gz"), optional:true, emit: html_gz + tuple val(meta), path("*.psl.gz"), optional:true, emit: psl_gz + tuple val(meta), path("*.sam.gz"), optional:true, emit: sam_gz + tuple val(meta), path("*.tab.gz"), optional:true, emit: tab_gz + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + set -o pipefail + maf-convert $args $format $maf | gzip --no-name > ${prefix}.${format}.gz + + # maf-convert has no --version option but lastdb (part of the same package) has. + cat <<-END_VERSIONS > versions.yml + "${task.process}": + last: \$(lastdb --version 2>&1 | sed 's/lastdb //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo stub | gzip --no-name > ${prefix}.${format}.gz + + # maf-convert has no --version option but lastdb (part of the same package) has. + cat <<-END_VERSIONS > versions.yml + "${task.process}": + last: \$(lastdb --version 2>&1 | sed 's/lastdb //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/last/mafconvert/meta.yml b/modules/nf-core/last/mafconvert/meta.yml new file mode 100644 index 0000000..4b9d2cf --- /dev/null +++ b/modules/nf-core/last/mafconvert/meta.yml @@ -0,0 +1,132 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "last_mafconvert" +description: Converts MAF alignments in another format. +keywords: + - LAST + - convert + - alignment + - MAF +tools: + - "last": + description: "LAST finds & aligns related regions of sequences." + homepage: "https://gitlab.com/mcfrith/last" + documentation: "https://gitlab.com/mcfrith/last/-/blob/main/doc/" + tool_dev_url: "https://gitlab.com/mcfrith/last" + licence: ["GPL v3-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - maf: + type: file + description: Multiple Alignment Format (MAF) file, optionally compressed with + gzip + pattern: "*.{maf.gz,maf}" + - - format: + type: string + description: Output format (one of axt, blast, blasttab, chain, gff, html, psl, + sam, or tab) +output: + - axt_gz: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.axt.gz": + type: file + description: Gzipped pairwise alignment in Axt (Blastz) format (optional) + pattern: "*.axt.gz" + - blast_gz: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.blast.gz": + type: file + description: Gzipped pairwise alignment in blast format (optional) + pattern: "*.blast.gz" + - blasttab_gz: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.blasttab.gz": + type: file + description: Gzipped pairwise alignment in blasttab format (optional) + pattern: "*.blasttab.gz" + - chain_gz: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.chain.gz": + type: file + description: Gzipped pairwise alignment in UCSC chain format (optional) + pattern: "*.chain.gz" + - gff_gz: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.gff.gz": + type: file + description: Gzipped pairwise alignment in GFF format (optional) + pattern: "*.gff.gz" + - html_gz: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.html.gz": + type: file + description: Gzipped pairwise alignment in HTML format (optional) + pattern: "*.html.gz" + - psl_gz: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.psl.gz": + type: file + description: Gzipped pairwise alignment in PSL (BLAT) format (optional) + pattern: "*.psl.gz" + - sam_gz: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.sam.gz": + type: file + description: Gzipped pairwise alignment in SAM format (optional) + pattern: "*.sam.gz" + - tab_gz: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.tab.gz": + type: file + description: Gzipped pairwise alignment in TAB format (optional) + pattern: "*.tab.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@aleksandrabliznina" + - "@charles-plessy" +maintainers: + - "@charles-plessy" diff --git a/modules/nf-core/last/mafconvert/tests/main.nf.test b/modules/nf-core/last/mafconvert/tests/main.nf.test new file mode 100644 index 0000000..ea21d1f --- /dev/null +++ b/modules/nf-core/last/mafconvert/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process LAST_MAFCONVERT" + script "../main.nf" + process "LAST_MAFCONVERT" + + tag "modules" + tag "modules_nfcore" + tag "last" + tag "last/mafconvert" + + test("sarscov2 - bam") { + + when { + process { + """ + input[0] = [ + [ id:'contigs.genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + input[1] = 'psl' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'contigs.genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + input[1] = 'psl' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/last/mafconvert/tests/main.nf.test.snap b/modules/nf-core/last/mafconvert/tests/main.nf.test.snap new file mode 100644 index 0000000..8525a41 --- /dev/null +++ b/modules/nf-core/last/mafconvert/tests/main.nf.test.snap @@ -0,0 +1,164 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + { + "id": "contigs.genome" + }, + "contigs.genome.psl.gz:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + "versions.yml:md5,7c66667735aa9f79367b8b8fc8df4497" + ], + "axt_gz": [ + + ], + "blast_gz": [ + + ], + "blasttab_gz": [ + + ], + "chain_gz": [ + + ], + "gff_gz": [ + + ], + "html_gz": [ + + ], + "psl_gz": [ + [ + { + "id": "contigs.genome" + }, + "contigs.genome.psl.gz:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "sam_gz": [ + + ], + "tab_gz": [ + + ], + "versions": [ + "versions.yml:md5,7c66667735aa9f79367b8b8fc8df4497" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-22T10:17:48.535961" + }, + "sarscov2 - bam": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + { + "id": "contigs.genome" + }, + "contigs.genome.psl.gz:md5,515d3cff55d159309bedd38f47dd034b" + ] + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + "versions.yml:md5,7c66667735aa9f79367b8b8fc8df4497" + ], + "axt_gz": [ + + ], + "blast_gz": [ + + ], + "blasttab_gz": [ + + ], + "chain_gz": [ + + ], + "gff_gz": [ + + ], + "html_gz": [ + + ], + "psl_gz": [ + [ + { + "id": "contigs.genome" + }, + "contigs.genome.psl.gz:md5,515d3cff55d159309bedd38f47dd034b" + ] + ], + "sam_gz": [ + + ], + "tab_gz": [ + + ], + "versions": [ + "versions.yml:md5,7c66667735aa9f79367b8b8fc8df4497" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-22T10:17:13.689118" + } +} \ No newline at end of file diff --git a/modules/nf-core/last/mafconvert/tests/tags.yml b/modules/nf-core/last/mafconvert/tests/tags.yml new file mode 100644 index 0000000..4affbd2 --- /dev/null +++ b/modules/nf-core/last/mafconvert/tests/tags.yml @@ -0,0 +1,2 @@ +last/mafconvert: + - "modules/nf-core/last/mafconvert/**" diff --git a/modules/nf-core/last/mafswap/environment.yml b/modules/nf-core/last/mafswap/environment.yml index cc112af..7db722d 100644 --- a/modules/nf-core/last/mafswap/environment.yml +++ b/modules/nf-core/last/mafswap/environment.yml @@ -1,9 +1,6 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "last_mafswap" channels: - conda-forge - bioconda - - defaults + dependencies: - - "bioconda::last=1542" + - bioconda::last=1608 diff --git a/modules/nf-core/last/mafswap/main.nf b/modules/nf-core/last/mafswap/main.nf index 875a6af..811e4a9 100644 --- a/modules/nf-core/last/mafswap/main.nf +++ b/modules/nf-core/last/mafswap/main.nf @@ -3,9 +3,9 @@ process LAST_MAFSWAP { label 'process_low' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/last:1542--h43eeafb_1' : - 'biocontainers/last:1542--h43eeafb_1' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/db/db0b5de918238f07ec1ca668be942397da85e26aa582f8927ac37c70896303cf/data' + : 'community.wave.seqera.io/library/last:1608--f41c047f7dc37e30'}" input: tuple val(meta), path(maf) diff --git a/modules/nf-core/last/mafswap/meta.yml b/modules/nf-core/last/mafswap/meta.yml index aa0fc09..40e5947 100644 --- a/modules/nf-core/last/mafswap/meta.yml +++ b/modules/nf-core/last/mafswap/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "last_mafswap" description: Reorder alignments in a MAF file @@ -14,30 +13,34 @@ tools: documentation: "https://gitlab.com/mcfrith/last/-/blob/main/doc/" tool_dev_url: "https://gitlab.com/mcfrith/last" licence: ["GPL v3-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - maf: - type: file - description: Multiple Aligment Format (MAF) file, optionally compressed with gzip - pattern: "*.{maf.gz,maf}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - maf: + type: file + description: Multiple Alignment Format (MAF) file, optionally compressed with + gzip + pattern: "*.{maf.gz,maf}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - maf: - type: file - description: Multiple Aligment Format (MAF) file, compressed with gzip - pattern: "*.{maf.gz}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.maf.gz": + type: file + description: Multiple Alignment Format (MAF) file, compressed with gzip + pattern: "*.{maf.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@charles-plessy" maintainers: diff --git a/modules/nf-core/last/mafswap/tests/main.nf.test b/modules/nf-core/last/mafswap/tests/main.nf.test index 7aa7704..deacc7c 100644 --- a/modules/nf-core/last/mafswap/tests/main.nf.test +++ b/modules/nf-core/last/mafswap/tests/main.nf.test @@ -16,7 +16,7 @@ nextflow_process { """ input[0] = [ [ id:'contigs.genome' ], // meta map - file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) ] """ } @@ -39,7 +39,7 @@ nextflow_process { """ input[0] = [ [ id:'contigs.genome' ], // meta map - file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) ] """ } diff --git a/modules/nf-core/last/mafswap/tests/main.nf.test.snap b/modules/nf-core/last/mafswap/tests/main.nf.test.snap index cc085df..8c5b39a 100644 --- a/modules/nf-core/last/mafswap/tests/main.nf.test.snap +++ b/modules/nf-core/last/mafswap/tests/main.nf.test.snap @@ -11,7 +11,7 @@ ] ], "1": [ - "versions.yml:md5,9819f8873c15cc665cbee998cefb72dd" + "versions.yml:md5,71e18c05330e522695f775a9fb1375af" ], "maf": [ [ @@ -22,15 +22,15 @@ ] ], "versions": [ - "versions.yml:md5,9819f8873c15cc665cbee998cefb72dd" + "versions.yml:md5,71e18c05330e522695f775a9fb1375af" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-06-04T17:59:43.47396" + "timestamp": "2025-01-22T10:19:10.413649" }, "sarscov2 - contigs - genome - stub": { "content": [ @@ -44,7 +44,7 @@ ] ], "1": [ - "versions.yml:md5,9819f8873c15cc665cbee998cefb72dd" + "versions.yml:md5,71e18c05330e522695f775a9fb1375af" ], "maf": [ [ @@ -55,14 +55,14 @@ ] ], "versions": [ - "versions.yml:md5,9819f8873c15cc665cbee998cefb72dd" + "versions.yml:md5,71e18c05330e522695f775a9fb1375af" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-06-06T19:41:29.891962" + "timestamp": "2025-01-22T10:19:50.695101" } } \ No newline at end of file diff --git a/modules/nf-core/last/split/environment.yml b/modules/nf-core/last/split/environment.yml index 7d76b55..7db722d 100644 --- a/modules/nf-core/last/split/environment.yml +++ b/modules/nf-core/last/split/environment.yml @@ -1,9 +1,6 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "last_split" channels: - conda-forge - bioconda - - defaults + dependencies: - - "bioconda::last=1542" + - bioconda::last=1608 diff --git a/modules/nf-core/last/split/main.nf b/modules/nf-core/last/split/main.nf index 410d16f..b45879b 100644 --- a/modules/nf-core/last/split/main.nf +++ b/modules/nf-core/last/split/main.nf @@ -3,9 +3,9 @@ process LAST_SPLIT { label 'process_high' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/last:1542--h43eeafb_1' : - 'biocontainers/last:1542--h43eeafb_1' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/db/db0b5de918238f07ec1ca668be942397da85e26aa582f8927ac37c70896303cf/data' + : 'community.wave.seqera.io/library/last:1608--f41c047f7dc37e30'}" input: tuple val(meta), path(maf) diff --git a/modules/nf-core/last/split/meta.yml b/modules/nf-core/last/split/meta.yml index 2e23f8b..e654b88 100644 --- a/modules/nf-core/last/split/meta.yml +++ b/modules/nf-core/last/split/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "last_split" description: Find split or spliced alignments in a MAF file @@ -15,35 +14,43 @@ tools: documentation: "https://gitlab.com/mcfrith/last/-/blob/main/doc/" tool_dev_url: "https://gitlab.com/mcfrith/last" licence: ["GPL v3-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - maf: - type: file - description: Multiple Aligment Format (MAF) file, compressed with gzip - pattern: "*.{maf.gz}" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - maf: + type: file + description: Multiple Alignment Format (MAF) file, compressed with gzip + pattern: "*.{maf.gz}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - maf: - type: file - description: Multiple Aligment Format (MAF) file, compressed with gzip - pattern: "*.{maf.gz}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.maf.gz": + type: file + description: Multiple Alignment Format (MAF) file, compressed with gzip + pattern: "*.{maf.gz}" - multiqc: - type: file - description: Alignment summary for MultiQC - pattern: "*.tsv" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.tsv": + type: file + description: Alignment summary for MultiQC + pattern: "*.tsv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@aleksandrabliznina" - "@charles-plessy" diff --git a/modules/nf-core/last/split/tests/main.nf.test b/modules/nf-core/last/split/tests/main.nf.test index 4460d69..0e09941 100644 --- a/modules/nf-core/last/split/tests/main.nf.test +++ b/modules/nf-core/last/split/tests/main.nf.test @@ -17,7 +17,7 @@ nextflow_process { """ input[0] = [ [ id:'sarscov.contigs.genome' ], // meta map - file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) ] """ } @@ -40,7 +40,7 @@ nextflow_process { """ input[0] = [ [ id:'sarscov.contigs.genome' ], // meta map - file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) ] """ } diff --git a/modules/nf-core/last/split/tests/main.nf.test.snap b/modules/nf-core/last/split/tests/main.nf.test.snap index 953a654..9a65380 100644 --- a/modules/nf-core/last/split/tests/main.nf.test.snap +++ b/modules/nf-core/last/split/tests/main.nf.test.snap @@ -19,7 +19,7 @@ ] ], "2": [ - "versions.yml:md5,9e429d0800988ae0bbe5000827d34ad1" + "versions.yml:md5,a6fba12ee7bdf891a38befef38775071" ], "maf": [ [ @@ -38,15 +38,15 @@ ] ], "versions": [ - "versions.yml:md5,9e429d0800988ae0bbe5000827d34ad1" + "versions.yml:md5,a6fba12ee7bdf891a38befef38775071" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-07-02T11:45:00.535348" + "timestamp": "2025-01-22T10:21:13.045466" }, "sarscov2 - contigs_genome - stub": { "content": [ @@ -68,7 +68,7 @@ ] ], "2": [ - "versions.yml:md5,9e429d0800988ae0bbe5000827d34ad1" + "versions.yml:md5,a6fba12ee7bdf891a38befef38775071" ], "maf": [ [ @@ -87,14 +87,14 @@ ] ], "versions": [ - "versions.yml:md5,9e429d0800988ae0bbe5000827d34ad1" + "versions.yml:md5,a6fba12ee7bdf891a38befef38775071" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-07-02T11:45:21.243325" + "timestamp": "2025-01-22T10:21:48.04855" } } \ No newline at end of file diff --git a/modules/nf-core/last/train/environment.yml b/modules/nf-core/last/train/environment.yml index 5edaf64..7db722d 100644 --- a/modules/nf-core/last/train/environment.yml +++ b/modules/nf-core/last/train/environment.yml @@ -1,9 +1,6 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "last_train" channels: - conda-forge - bioconda - - defaults + dependencies: - - "bioconda::last=1542" + - bioconda::last=1608 diff --git a/modules/nf-core/last/train/main.nf b/modules/nf-core/last/train/main.nf index 9f592b5..dad722e 100644 --- a/modules/nf-core/last/train/main.nf +++ b/modules/nf-core/last/train/main.nf @@ -3,9 +3,9 @@ process LAST_TRAIN { label 'process_high' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/last:1542--h43eeafb_1' : - 'biocontainers/last:1542--h43eeafb_1' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/db/db0b5de918238f07ec1ca668be942397da85e26aa582f8927ac37c70896303cf/data' + : 'community.wave.seqera.io/library/last:1608--f41c047f7dc37e30'}" input: tuple val(meta), path(fastx) @@ -30,17 +30,17 @@ process LAST_TRAIN { -P $task.cpus \\ ${index}/\$INDEX_NAME \\ $fastx \\ - > ${prefix}.\$INDEX_NAME.train + > ${prefix}.train - echo "id\tsubstitution_percent_identity\tlast -t\tlast -a\tlast -A\tlast -b\tlast -B\tlast -S" > ${prefix}.train.tsv - printf "\$(basename ${prefix}.\$INDEX_NAME.train .target.train)\t" >> ${prefix}.train.tsv - grep 'substitution percent identity' ${prefix}.\$INDEX_NAME.train | tail -n 1 | awk '{print \$5}' | tr '\n' '\t' >> ${prefix}.train.tsv - grep 'last -t' ${prefix}.\$INDEX_NAME.train | tail -n 1 | awk '{print \$2}' | sed -e 's/-t//' | tr '\n' '\t' >> ${prefix}.train.tsv - grep 'last -a' ${prefix}.\$INDEX_NAME.train | tail -n 1 | awk '{print \$3}' | tr '\n' '\t' >> ${prefix}.train.tsv - grep 'last -A' ${prefix}.\$INDEX_NAME.train | tail -n 1 | awk '{print \$3}' | tr '\n' '\t' >> ${prefix}.train.tsv - grep 'last -b' ${prefix}.\$INDEX_NAME.train | tail -n 1 | awk '{print \$3}' | tr '\n' '\t' >> ${prefix}.train.tsv - grep 'last -B' ${prefix}.\$INDEX_NAME.train | tail -n 1 | awk '{print \$3}' | tr '\n' '\t' >> ${prefix}.train.tsv - grep 'last -S' ${prefix}.\$INDEX_NAME.train | tail -n 1 | awk '{print \$3}' >> ${prefix}.train.tsv + echo "id\tsubstitution_percent_identity\tlast -t\tlast -a\tlast -A\tlast -b\tlast -B\tlast -S" > ${prefix}.train.tsv + printf "\$(basename ${prefix}.train .target.train)\t" >> ${prefix}.train.tsv + grep 'substitution percent identity' ${prefix}.train | tail -n 1 | awk '{print \$5}' | tr '\\n' '\\t' >> ${prefix}.train.tsv + grep 'last -t' ${prefix}.train | tail -n 1 | awk '{print \$2}' | sed -e 's/-t//' | tr '\\n' '\\t' >> ${prefix}.train.tsv + grep 'last -a' ${prefix}.train | tail -n 1 | awk '{print \$3}' | tr '\\n' '\\t' >> ${prefix}.train.tsv + grep 'last -A' ${prefix}.train | tail -n 1 | awk '{print \$3}' | tr '\\n' '\\t' >> ${prefix}.train.tsv + grep 'last -b' ${prefix}.train | tail -n 1 | awk '{print \$3}' | tr '\\n' '\\t' >> ${prefix}.train.tsv + grep 'last -B' ${prefix}.train | tail -n 1 | awk '{print \$3}' | tr '\\n' '\\t' >> ${prefix}.train.tsv + grep 'last -S' ${prefix}.train | tail -n 1 | awk '{print \$3}' >> ${prefix}.train.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -53,7 +53,7 @@ process LAST_TRAIN { def prefix = task.ext.prefix ?: "${meta.id}" """ INDEX_NAME=\$(basename \$(ls $index/*.des) .des) - touch ${prefix}.\$INDEX_NAME.train + touch ${prefix}.train touch ${prefix}.train.tsv cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/last/train/meta.yml b/modules/nf-core/last/train/meta.yml index d55e827..d91ceca 100644 --- a/modules/nf-core/last/train/meta.yml +++ b/modules/nf-core/last/train/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "last_train" description: Find suitable score parameters for sequence alignment @@ -14,38 +13,47 @@ tools: documentation: "https://gitlab.com/mcfrith/last/-/blob/main/doc/last-train.rst" tool_dev_url: "https://gitlab.com/mcfrith/last" licence: ["GPL v3-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - fastx: - type: file - description: FASTA/FASTQ file - pattern: "*.{fasta,fastq}" - - index: - type: directory - description: Directory containing the files of the LAST index - pattern: "lastdb/" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - fastx: + type: file + description: FASTA/FASTQ file + pattern: "*.{fasta,fastq}" + - - index: + type: directory + description: Directory containing the files of the LAST index + pattern: "lastdb/" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - param_file: - type: file - description: Trained parameter file - pattern: "*.train" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.train": + type: file + description: Trained parameter file + pattern: "*.train" - multiqc: - type: file - description: Alignment parameter summary for MultiQC - pattern: "*.tsv" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.tsv": + type: file + description: Alignment parameter summary for MultiQC + pattern: "*.tsv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@aleksandrabliznina" - "@charles-plessy" diff --git a/modules/nf-core/last/train/tests/main.nf.test b/modules/nf-core/last/train/tests/main.nf.test index a4168f2..8325673 100644 --- a/modules/nf-core/last/train/tests/main.nf.test +++ b/modules/nf-core/last/train/tests/main.nf.test @@ -19,7 +19,7 @@ nextflow_process { """ input[0] = [ [id:'genome'], // meta map - file(params.test_data['sarscov2']['genome']['lastdb_tar_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/lastdb.tar.gz', checkIfExists: true) ] """ } @@ -31,7 +31,7 @@ nextflow_process { """ input[0] = [ [ id:'contigs', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), ] input[1] = UNTAR.out.untar.map{ it[1] } """ @@ -56,7 +56,7 @@ nextflow_process { """ input[0] = [ [id:'genome'], // meta map - file(params.test_data['sarscov2']['genome']['lastdb_tar_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/lastdb.tar.gz', checkIfExists: true) ] """ } @@ -68,7 +68,7 @@ nextflow_process { """ input[0] = [ [ id:'contigs', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), ] input[1] = UNTAR.out.untar.map{ it[1] } """ diff --git a/modules/nf-core/last/train/tests/main.nf.test.snap b/modules/nf-core/last/train/tests/main.nf.test.snap index a3a0cc8..537efdb 100644 --- a/modules/nf-core/last/train/tests/main.nf.test.snap +++ b/modules/nf-core/last/train/tests/main.nf.test.snap @@ -8,7 +8,7 @@ "id": "contigs", "single_end": false }, - "contigs..des.train:md5,d41d8cd98f00b204e9800998ecf8427e" + "contigs.train:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -21,7 +21,7 @@ ] ], "2": [ - "versions.yml:md5,c5578547acf9e77e1e8f6bf796e32ac2" + "versions.yml:md5,b2d4a4fce93a910c90768053127969b3" ], "multiqc": [ [ @@ -38,19 +38,19 @@ "id": "contigs", "single_end": false }, - "contigs..des.train:md5,d41d8cd98f00b204e9800998ecf8427e" + "contigs.train:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions": [ - "versions.yml:md5,c5578547acf9e77e1e8f6bf796e32ac2" + "versions.yml:md5,b2d4a4fce93a910c90768053127969b3" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-07-10T14:50:11.528587" + "timestamp": "2025-01-22T10:23:11.655537" }, "sarscov2 - genome - contigs": { "content": [ @@ -61,7 +61,7 @@ "id": "contigs", "single_end": false }, - "contigs.genome.train:md5,3d7e1c630705d83c6a11b6f28d5aefcb" + "contigs.train:md5,4b965e8de2fa644a196547cb487abc4e" ] ], "1": [ @@ -70,11 +70,11 @@ "id": "contigs", "single_end": false }, - "contigs.train.tsv:md5,f09bcd1a111241a3439258a43c2a1a4e" + "contigs.train.tsv:md5,a2fca4aabda82f0aa481085ce5258886" ] ], "2": [ - "versions.yml:md5,38234cf053c708e57cc080990f777411" + "versions.yml:md5,b2d4a4fce93a910c90768053127969b3" ], "multiqc": [ [ @@ -82,7 +82,7 @@ "id": "contigs", "single_end": false }, - "contigs.train.tsv:md5,f09bcd1a111241a3439258a43c2a1a4e" + "contigs.train.tsv:md5,a2fca4aabda82f0aa481085ce5258886" ] ], "param_file": [ @@ -91,18 +91,18 @@ "id": "contigs", "single_end": false }, - "contigs.genome.train:md5,3d7e1c630705d83c6a11b6f28d5aefcb" + "contigs.train:md5,4b965e8de2fa644a196547cb487abc4e" ] ], "versions": [ - "versions.yml:md5,38234cf053c708e57cc080990f777411" + "versions.yml:md5,b2d4a4fce93a910c90768053127969b3" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-07-03T18:04:34.914789" + "timestamp": "2025-01-30T18:37:11.305733" } } \ No newline at end of file diff --git a/modules/nf-core/seqtk/cutn/environment.yml b/modules/nf-core/seqtk/cutn/environment.yml index a57afbb..693aa5c 100644 --- a/modules/nf-core/seqtk/cutn/environment.yml +++ b/modules/nf-core/seqtk/cutn/environment.yml @@ -1,7 +1,5 @@ -name: seqtk_cutn channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::seqtk=1.4 diff --git a/modules/nf-core/seqtk/cutn/meta.yml b/modules/nf-core/seqtk/cutn/meta.yml index 1082867..2e39a47 100644 --- a/modules/nf-core/seqtk/cutn/meta.yml +++ b/modules/nf-core/seqtk/cutn/meta.yml @@ -6,35 +6,40 @@ keywords: - seqtk tools: - seqtk: - description: Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format. Seqtk mergepe command merges pair-end reads into one interleaved file. + description: Seqtk is a fast and lightweight tool for processing sequences in + the FASTA or FASTQ format. Seqtk mergepe command merges pair-end reads into + one interleaved file. homepage: https://github.com/lh3/seqtk documentation: https://docs.csc.fi/apps/seqtk/ tool_dev_url: https://github.com/lh3/seqtk licence: ["MIT"] + identifier: biotools:seqtk input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: A single fasta file to be split. - pattern: "*.{fasta}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: A single fasta file to be split. + pattern: "*.{fasta}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - bed: - type: file - description: The output bed which summarised locations of cuts - pattern: "*.{bed}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bed": + type: file + description: The output bed which summarised locations of cuts + pattern: "*.{bed}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@DLBPointon" maintainers: diff --git a/modules/nf-core/seqtk/cutn/tests/main.nf.test b/modules/nf-core/seqtk/cutn/tests/main.nf.test index a38ed41..46dfff1 100644 --- a/modules/nf-core/seqtk/cutn/tests/main.nf.test +++ b/modules/nf-core/seqtk/cutn/tests/main.nf.test @@ -16,7 +16,7 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:true ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) ] """ } @@ -41,7 +41,7 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:true ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) ] """ } diff --git a/nextflow.config b/nextflow.config index 808be67..a5a23d5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -66,6 +66,7 @@ params { lastal_extr_args = '' last_split_mismap = '1e-05' lastal_params = null + export_aln_to = 'no_export' // Schema validation default options validate_params = true @@ -269,7 +270,7 @@ manifest { mainScript = 'main.nf' defaultBranch = 'master' nextflowVersion = '!>=24.10.1' - version = '2.0.0~dev' + version = '2.0.0' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 5d44c93..d085e1b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -65,8 +65,8 @@ "properties": { "seed": { "type": "string", - "enum": ["YASS", "NEAR", "MAM8", "RY128"], - "help_text": "LAST creates a database of seed sequences in the _target_ genome, and provides different ways to generate these seeds. The default (`YASS`) searches for long-and-weak similarities that allow for mismatches but not gaps. Among alternatives, there are `NEAR` for short-and-strong (near-identical) similarities with many gaps (insertions and deletions), `MAM8` to find weak similarities with high sensitivity, but low speed and high memory usage, or `RY128` that reduces run time and memory use, by only seeking seeds at ~1/128 of positions in each sequence, which is useful when the purpose of running this pipeline is only to generate whole-genome dotplots, or when sensitivity for tiny fragments may be unnecessary or undesirable. See for details.", + "enum": ["YASS", "NEAR", "MAM4", "MAM8", "RY4", "RY8", "RY16", "RY32", "RY64", "RY128"], + "help_text": "LAST creates a database of seed sequences in the _target_ genome, and provides different ways to generate these seeds. The default (`YASS`) searches for long-and-weak similarities that allow for mismatches but not gaps. Among alternatives, there are `MAM8` to find weak similarities with high sensitivity, but low speed and high memory usage, or `RY4` to `RY128` that reduce run time and memory use at the expense of sensitivity, by only seeking seeds at ~1/4 to ~1/128 of positions in each sequence. See [https://gitlab.com/mcfrith/last/-/blob/main/doc/last-seeds.rst](https://gitlab.com/mcfrith/last/-/blob/main/doc/last-seeds.rst) for details.", "description": "Select the LAST seed to index the _target_ genome.", "default": "YASS", "fa_icon": "fas fa-seedling" @@ -74,7 +74,7 @@ "softmask": { "type": "string", "enum": ["tantan", "original"], - "help_text": "In this pipeline, letters soft-masked in lowercase are excluded from indexing (`lastdb -c`). By default, the original mask is removed and a new one is made with an internal version of the “tantan” tool. Set this option to `original` to keep the original soft-masking. See for details.", + "help_text": "In this pipeline, letters soft-masked in lowercase are excluded from indexing (`lastdb -c`). By default, the original mask is removed and a new one is made with an internal version of the \u201ctantan\u201d tool. Set this option to `original` to keep the original soft-masking. See [https://gitlab.com/mcfrith/last/-/blob/main/doc/lastdb.rst](https://gitlab.com/mcfrith/last/-/blob/main/doc/lastdb.rst) for details.", "description": "Customise the way to mask the _target_ genome.", "default": "tantan", "fa_icon": "fas fa-theater-masks" @@ -87,6 +87,27 @@ "description": "Arguments for the lastdb, last-train, lastal and last-split programs.", "default": "", "properties": { + "export_aln_to": { + "type": "string", + "default": "no_export", + "description": "Convert output to a different format than MAF.", + "enum": [ + "no_export", + "axt", + "bed", + "blast", + "blasttab", + "blasttab+", + "chain", + "gff", + "html", + "psl", + "sam", + "tab" + ], + "fa_icon": "fas fa-file-export", + "help_text": "Output extra files for the final _one-to-one_ alignment results in AXT, GFF or SAM format. This is useful for downstream tools that do not parse MAF. The files are always compressed with `gzip`." + }, "m2m": { "type": "boolean", "description": "Make a many to many alignment", @@ -353,9 +374,6 @@ { "$ref": "#/$defs/reference_genome_options" }, - { - "$ref": "#/$defs/reference_genome_options" - }, { "$ref": "#/$defs/institutional_config_options" }, diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index c619f2e..c4f032f 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -21,8 +21,8 @@ { "@id": "./", "@type": "Dataset", - "creativeWorkStatus": "InProgress", - "datePublished": "2025-01-24T04:43:05+00:00", + "creativeWorkStatus": "Stable", + "datePublished": "2025-01-28T00:45:16+00:00", "description": "

\n \n \n \"nf-core/pairgenomealign\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/pairgenomealign/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/pairgenomealign/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/pairgenomealign/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/pairgenomealign/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/pairgenomealign/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.13910535-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.13910535)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.10.1-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/pairgenomealign)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23pairgenomealign-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/pairgenomealign)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/pairgenomealign** is a bioinformatics pipeline that aligns one or more _query_ genomes to a _target_ genome, and plots pairwise representations.\n\n![Tubemap workflow summary](docs/images/pairgenomealign-tubemap.png \"Tubemap workflow summary\")\n\nThe main steps of the pipeline are:\n\n1. Genome QC ([`assembly-scan`](https://github.com/rpetit3/assembly-scan)).\n2. Genome indexing ([`lastdb`](https://gitlab.com/mcfrith/last/-/blob/main/doc/lastdb.rst)).\n3. Genome pairwise alignments ([`lastal`](https://gitlab.com/mcfrith/last/-/blob/main/doc/lastal.rst)).\n4. Alignment plotting ([`last-dotplot`](https://gitlab.com/mcfrith/last/-/blob/main/doc/last-dotplot.rst)).\n\nThe pipeline can generate four kinds of outputs, called _many-to-many_, _many-to-one_, _one-to-many_ and _one-to-one_, depending on whether sequences of one genome are allowed match the other genome multiple times or not.\n\nThese alignments are output in [MAF](https://genome.ucsc.edu/FAQ/FAQformat.html#format5) format, and optional line plot representations are output in PNG format.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fasta\nquery_1,path-to-query-genome-file-one.fasta\nquery_2,path-to-query-genome-file-two.fasta\n```\n\nEach row represents a fasta file, this can also contain multiple rows to accomodate multiple query genomes in fasta format.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/pairgenomealign \\\n -profile \\\n --target sequencefile.fa \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/pairgenomealign/usage) and the [parameter documentation](https://nf-co.re/pairgenomealign/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/pairgenomealign/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/pairgenomealign/output).\n\n## Credits\n\n`nf-core/pairgenomealign` was originally written by [charles-plessy](https://github.com/charles-plessy); the original versions are available at .\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Mahdi Mohammed](https://github.com/U13bs1125) ported the original pipeline to _nf-core_ template 2.14.x.\n- [Martin Frith](https://github.com/mcfrith/), the author of LAST, gave us extensive feedback and advices.\n- [Michael Mansfield](https://github.com/mjmansfi) tested the pipeline and provided critical comments.\n- [Aleksandra Bliznina](https://github.com/aleksandrabliznina) contributed to the creation of the initial `last/*` modules.\n- [Jiashun Miao](https://github.com/miaojiashun) and [Huyen Pham](https://github.com/ngochuyenpham) tested the pipeline on vertebrate genomes.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#pairgenomealign` channel](https://nfcore.slack.com/channels/pairgenomealign) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use this pipeline, please cite:\n\n> **Extreme genome scrambling in marine planktonic Oikopleura dioica cryptic species.**\n> Charles Plessy, Michael J. Mansfield, Aleksandra Bliznina, Aki Masunaga, Charlotte West, Yongkai Tan, Andrew W. Liu, Jan Gra\u0161i\u010d, Mar\u00eda Sara del R\u00edo Pisula, Gaspar S\u00e1nchez-Serna, Marc Fabrega-Torrus, Alfonso Ferr\u00e1ndez-Rold\u00e1n, Vittoria Roncalli, Pavla Navratilova, Eric M. Thompson, Takeshi Onuma, Hiroki Nishida, Cristian Ca\u00f1estro, Nicholas M. Luscombe.\n> _Genome Res._ 2024. 34: 426-440; doi: [10.1101/2023.05.09.539028](https://doi.org/10.1101/gr.278295.123). PubMed ID: [38621828](https://pubmed.ncbi.nlm.nih.gov/38621828/)\n\n[OIST research news article](https://www.oist.jp/news-center/news/2024/4/25/oikopleura-who-species-identity-crisis-genome-community)\n\nAnd also please cite the [LAST papers](https://gitlab.com/mcfrith/last/-/blob/main/doc/last-papers.rst).\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { @@ -102,7 +102,7 @@ }, "mentions": [ { - "@id": "#017f6d41-4da1-4eaf-8758-6bb2d27bcdb1" + "@id": "#a2a70c41-44c5-4493-bc0b-7aff121fd330" } ], "name": "nf-core/pairgenomealign" @@ -138,7 +138,7 @@ } ], "dateCreated": "", - "dateModified": "2025-01-24T13:43:05Z", + "dateModified": "2025-01-28T09:45:16Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": [ "nf-core", @@ -173,10 +173,10 @@ }, "url": [ "https://github.com/nf-core/pairgenomealign", - "https://nf-co.re/pairgenomealign/dev/" + "https://nf-co.re/pairgenomealign/2.0.0/" ], "version": [ - "2.0.0~dev" + "2.0.0" ] }, { @@ -192,11 +192,11 @@ "version": "!>=24.10.1" }, { - "@id": "#017f6d41-4da1-4eaf-8758-6bb2d27bcdb1", + "@id": "#a2a70c41-44c5-4493-bc0b-7aff121fd330", "@type": "TestSuite", "instance": [ { - "@id": "#fc180ed9-2bf4-4640-b53e-a0e3e48a7e24" + "@id": "#c266dbbd-0802-41ae-9f2b-0a56b01c5491" } ], "mainEntity": { @@ -205,7 +205,7 @@ "name": "Test suite for nf-core/pairgenomealign" }, { - "@id": "#fc180ed9-2bf4-4640-b53e-a0e3e48a7e24", + "@id": "#c266dbbd-0802-41ae-9f2b-0a56b01c5491", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/pairgenomealign", "resource": "repos/nf-core/pairgenomealign/actions/workflows/ci.yml", diff --git a/subworkflows/local/pairalign_m2m/main.nf b/subworkflows/local/pairalign_m2m/main.nf index 1bbe385..520a348 100644 --- a/subworkflows/local/pairalign_m2m/main.nf +++ b/subworkflows/local/pairalign_m2m/main.nf @@ -37,17 +37,24 @@ workflow PAIRALIGN_M2M { ch_target ) - // Train alignment parameters + // Train alignment parameters if not provided // - ALIGNMENT_TRAIN ( - ch_queries, - ALIGNMENT_LASTDB.out.index.map { row -> row[1] } // Remove metadata map - ) + if (params.lastal_params) { + ch_queries_with_params = ch_queries.map { row -> [ row[0], row[1], file(params.lastal_params, checkIfExists: true) ] } + training_results_for_multiqc = channel.empty() + } else { + ALIGNMENT_TRAIN ( + ch_queries, + ALIGNMENT_LASTDB.out.index.map { row -> row[1] } // Remove metadata map + ) + ch_queries_with_params = ch_queries.join(ALIGNMENT_TRAIN.out.param_file) + training_results_for_multiqc = ALIGNMENT_TRAIN.out.multiqc.collect{ it[1] } + } // Align queries to target. This is a many-to-many alignment // ALIGNMENT_LASTAL_M2M ( - ch_queries.join(ALIGNMENT_TRAIN.out.param_file), + ch_queries_with_params, ALIGNMENT_LASTDB.out.index.map { row -> row[1] } // Remove metadata map ) @@ -103,13 +110,18 @@ workflow PAIRALIGN_M2M { emit: multiqc = Channel.empty() - .mix( ALIGNMENT_TRAIN.out.multiqc.collect{ it[1]} ) + .mix(training_results_for_multiqc) .mix(ALIGNMENT_SPLIT_O2O.out.multiqc.collect{ it[1]} ) m2m = ALIGNMENT_LASTAL_M2M.out.maf m2o = ALIGNMENT_SPLIT_M2O.out.maf o2m = ALIGNMENT_SPLIT_O2M.out.maf o2o = ALIGNMENT_SPLIT_O2O.out.maf - versions = ALIGNMENT_LASTDB.out.versions + versions = Channel.empty() + .mix( ALIGNMENT_LASTDB.out.versions) + .mix( ALIGNMENT_TRAIN.out.versions) + .mix( ALIGNMENT_LASTAL_M2M.out.versions) + .mix( ALIGNMENT_SPLIT_O2O.out.versions) + .mix(ALIGNMENT_DOTPLOT_O2O.out.versions) } /* diff --git a/subworkflows/local/pairalign_m2o/main.nf b/subworkflows/local/pairalign_m2o/main.nf index 8d902d2..383daaf 100644 --- a/subworkflows/local/pairalign_m2o/main.nf +++ b/subworkflows/local/pairalign_m2o/main.nf @@ -33,18 +33,25 @@ workflow PAIRALIGN_M2O { ch_target ) - // Train alignment parameters + // Train alignment parameters if not provided // - ALIGNMENT_TRAIN ( - ch_queries, - ALIGNMENT_LASTDB.out.index.map { row -> row[1] } // Remove metadata map - ) + if (params.lastal_params) { + ch_queries_with_params = ch_queries.map { row -> [ row[0], row[1], file(params.lastal_params, checkIfExists: true) ] } + training_results_for_multiqc = channel.empty() + } else { + ALIGNMENT_TRAIN ( + ch_queries, + ALIGNMENT_LASTDB.out.index.map { row -> row[1] } // Remove metadata map + ) + ch_queries_with_params = ch_queries.join(ALIGNMENT_TRAIN.out.param_file) + training_results_for_multiqc = ALIGNMENT_TRAIN.out.multiqc.collect{ it[1] } + } // Align queries to target. // This directly computes a many-to-one alignment because of parameter modules // ALIGNMENT_LASTAL_M2O ( - ch_queries.join(ALIGNMENT_TRAIN.out.param_file), + ch_queries_with_params, ALIGNMENT_LASTDB.out.index.map { row -> row[1] } // Remove metadata map ) @@ -74,11 +81,16 @@ workflow PAIRALIGN_M2O { emit: multiqc = Channel.empty() - .mix( ALIGNMENT_TRAIN.out.multiqc.collect{ it[1]} ) + .mix(training_results_for_multiqc) .mix(ALIGNMENT_SPLIT_O2O.out.multiqc.collect{ it[1]} ) m2o = ALIGNMENT_LASTAL_M2O.out.maf o2o = ALIGNMENT_SPLIT_O2O.out.maf - versions = ALIGNMENT_LASTDB.out.versions + versions = Channel.empty() + .mix( ALIGNMENT_LASTDB.out.versions) + .mix( ALIGNMENT_TRAIN.out.versions) + .mix( ALIGNMENT_LASTAL_M2O.out.versions) + .mix( ALIGNMENT_SPLIT_O2O.out.versions) + .mix(ALIGNMENT_DOTPLOT_O2O.out.versions) } /* diff --git a/workflows/pairgenomealign.nf b/workflows/pairgenomealign.nf index 361380a..8c844aa 100644 --- a/workflows/pairgenomealign.nf +++ b/workflows/pairgenomealign.nf @@ -5,6 +5,7 @@ */ include { ASSEMBLYSCAN } from '../modules/nf-core/assemblyscan/main' +include { LAST_MAFCONVERT as ALIGNMENT_EXP } from '../modules/nf-core/last/mafconvert/main' include { MULTIQC_ASSEMBLYSCAN_PLOT_DATA } from '../modules/local/multiqc_assemblyscan_plot_data/main' include { PAIRALIGN_M2M } from '../subworkflows/local/pairalign_m2m/main' include { SEQTK_CUTN as CUTN_TARGET } from '../modules/nf-core/seqtk/cutn/main' @@ -80,6 +81,10 @@ workflow PAIRGENOMEALIGN { pairalign_out = PAIRALIGN_M2M.out } + if (!(params.export_aln_to == "no_export")) { + ALIGNMENT_EXP(pairalign_out.o2o, params.export_aln_to) + } + // Collate and save software versions //