diff --git a/CHANGELOG.md b/CHANGELOG.md index 47ec307e..965ba463 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - A new analysis option `mito` to call and annotate only mitochondrial variants [#608](https://github.com/nf-core/raredisease/pull/608) +- An option to restrict analysis to specific contigs [#644](https://github.com/nf-core/raredisease/pull/644) ### `Changed` @@ -28,6 +29,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Parameters +| Old parameter | New parameter | +| ------------- | ------------------- | +| | extract_alignments | +| | restrict_to_contigs | + ### Tool updates | Tool | Old version | New version | diff --git a/conf/modules/align_bwa_bwamem2_bwameme.config b/conf/modules/align_bwa_bwamem2_bwameme.config index f9037d3d..ce28d438 100644 --- a/conf/modules/align_bwa_bwamem2_bwameme.config +++ b/conf/modules/align_bwa_bwamem2_bwameme.config @@ -50,6 +50,11 @@ process { ext.prefix = { "${meta.id}_sorted_merged" } } + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2_BWAMEME:EXTRACT_ALIGNMENTS' { + ext.prefix = { "${meta.id}_sorted_merged_extracted" } + ext.args2 = { params.restrict_to_contigs } + } + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2_BWAMEME:MARKDUPLICATES' { ext.args = "--TMP_DIR ." ext.prefix = { "${meta.id}_sorted_md" } diff --git a/conf/modules/align_sentieon.config b/conf/modules/align_sentieon.config index 9ae4aeb7..07074ec1 100644 --- a/conf/modules/align_sentieon.config +++ b/conf/modules/align_sentieon.config @@ -30,6 +30,11 @@ process { ext.prefix = { "${meta.id}_merged.bam" } } + withName: '.*ALIGN:ALIGN_SENTIEON:EXTRACT_ALIGNMENTS' { + ext.prefix = { "${meta.id}_merged_extracted" } + ext.args2 = { params.restrict_to_contigs } + } + withName: '.*ALIGN:ALIGN_SENTIEON:SENTIEON_DEDUP' { ext.args4 = { $params.rmdup ? "--rmdup" : '' } ext.prefix = { "${meta.id}_dedup.bam" } diff --git a/docs/usage.md b/docs/usage.md index bb2d3b99..028a064d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -168,15 +168,17 @@ The mandatory and optional parameters for each category are tabulated below. ##### 1. Alignment -| Mandatory | Optional | -| ------------------------------ | ------------------------------ | -| aligner1 | fasta_fai4 | -| fasta2 | bwamem24 | -| platform | bwa4 | -| mito_name/mt_fasta3 | bwameme4 | -| | known_dbsnp5 | -| | known_dbsnp_tbi5 | -| | min_trimmed_length6 | +| Mandatory | Optional | +| ------------------------------ | ------------------------------- | +| aligner1 | fasta_fai4 | +| fasta2 | bwamem24 | +| platform | bwa4 | +| mito_name/mt_fasta3 | bwameme4 | +| | known_dbsnp5 | +| | known_dbsnp_tbi5 | +| | min_trimmed_length6 | +| | extract_alignments | +| | restrict_to_contigs7 | 1Default value is bwamem2. Other alternatives are bwa, bwameme and sentieon (requires valid Sentieon license ).
2Analysis set reference genome in fasta format, first 25 contigs need to be chromosome 1-22, X, Y and the mitochondria.
@@ -184,6 +186,7 @@ The mandatory and optional parameters for each category are tabulated below. 4fasta_fai, bwa, bwamem2 and bwameme, if not provided by the user, will be generated by the pipeline when necessary.
5Used only by Sentieon.
6Default value is 40. Used only by fastp.
+7Used to limit your analysis to specific contigs. Can be used to remove alignments to unplaced contigs to minimize potential errors. This parameter should be used in conjuction with `extract_alignments` parameter.
##### 2. QC stats from the alignment files diff --git a/nextflow.config b/nextflow.config index 72717041..f121d191 100644 --- a/nextflow.config +++ b/nextflow.config @@ -24,6 +24,8 @@ params { analysis_type = 'wgs' bwa_as_fallback = false bait_padding = 100 + extract_alignments = false + restrict_to_contigs = null run_mt_for_wes = false run_rtgvcfeval = false save_mapped_as_cram = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 74d58d05..7191f3e7 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -503,6 +503,13 @@ "help_text": "errorStrategy needs to be set to ignore for the bwamem2 process for the fallback to work. Turned off by default.", "fa_icon": "fas fa-toggle-on" }, + "extract_alignments": { + "type": "boolean", + "default": "false", + "description": "After aligning the reads to a reference, extract alignments from specific regions/contigs and restrict the analysis to those regions/contigs.", + "help_text": "Set this to true, and specify the contig(s) using `restrict_to_contigs` parameter", + "fa_icon": "fas fa-toggle-on" + }, "platform": { "type": "string", "default": "illumina", @@ -516,6 +523,11 @@ "fa_icon": "fas fa-align-center", "enum": ["xy", "hetx", "sry"] }, + "restrict_to_contigs": { + "type": "string", + "description": "Can be specified as RNAME[:STARTPOS[-ENDPOS]]. Multiple regions should be seperated by space", + "fa_icon": "fas fa-align-center" + }, "run_mt_for_wes": { "type": "boolean", "description": "Specifies whether to run mitochondrial analysis for wes samples", diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 14d545ea..496648f4 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -70,7 +70,7 @@ workflow ALIGN { ch_bwamem2_bai = ALIGN_BWA_BWAMEM2_BWAMEME.out.marked_bai ch_versions = ch_versions.mix(ALIGN_BWA_BWAMEM2_BWAMEME.out.versions) } else if (params.aligner.equals("sentieon")) { - ALIGN_SENTIEON ( // Triggered when params.aligner is set as sentieon + ALIGN_SENTIEON ( // Triggered when params.aligner is set as sentieon ch_reads, ch_genome_fasta, ch_genome_fai, diff --git a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf index 15d3db9a..90af20f2 100644 --- a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf +++ b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf @@ -7,9 +7,11 @@ include { BWA_MEM as BWAMEM_FALLBACK } from '../../../modules/nf-c include { BWAMEM2_MEM } from '../../../modules/nf-core/bwamem2/mem/main' include { BWAMEME_MEM } from '../../../modules/nf-core/bwameme/mem/main' include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_ALIGN } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_EXTRACT } from '../../../modules/nf-core/samtools/index/main' include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MARKDUP } from '../../../modules/nf-core/samtools/index/main' include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' include { SAMTOOLS_MERGE } from '../../../modules/nf-core/samtools/merge/main' +include { SAMTOOLS_VIEW as EXTRACT_ALIGNMENTS } from '../../../modules/nf-core/samtools/view/main' include { PICARD_MARKDUPLICATES as MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main' @@ -82,6 +84,14 @@ workflow ALIGN_BWA_BWAMEM2_BWAMEME { SAMTOOLS_MERGE ( bams.multiple, ch_genome_fasta, ch_genome_fai ) prepared_bam = bams.single.mix(SAMTOOLS_MERGE.out.bam) + // GET ALIGNMENT FROM SELECTED CONTIGS + if (params.extract_alignments) { + SAMTOOLS_INDEX_EXTRACT ( prepared_bam ) + extract_bam_sorted_indexed = prepared_bam.join(SAMTOOLS_INDEX_EXTRACT.out.bai, failOnMismatch:true, failOnDuplicate:true) + EXTRACT_ALIGNMENTS( extract_bam_sorted_indexed, ch_genome_fasta, []) + prepared_bam = EXTRACT_ALIGNMENTS.out.bam + } + // Marking duplicates MARKDUPLICATES ( prepared_bam , ch_genome_fasta, ch_genome_fai ) SAMTOOLS_INDEX_MARKDUP ( MARKDUPLICATES.out.bam ) diff --git a/subworkflows/local/alignment/align_sentieon.nf b/subworkflows/local/alignment/align_sentieon.nf index dc5daa60..5b0cb708 100644 --- a/subworkflows/local/alignment/align_sentieon.nf +++ b/subworkflows/local/alignment/align_sentieon.nf @@ -2,10 +2,13 @@ // A subworkflow to annotate structural variants. // -include { SENTIEON_BWAMEM } from '../../../modules/nf-core/sentieon/bwamem/main' -include { SENTIEON_DATAMETRICS } from '../../../modules/nf-core/sentieon/datametrics/main' -include { SENTIEON_DEDUP } from '../../../modules/nf-core/sentieon/dedup/main' -include { SENTIEON_READWRITER } from '../../../modules/nf-core/sentieon/readwriter/main' +include { SENTIEON_BWAMEM } from '../../../modules/nf-core/sentieon/bwamem/main' +include { SENTIEON_DATAMETRICS } from '../../../modules/nf-core/sentieon/datametrics/main' +include { SENTIEON_DEDUP } from '../../../modules/nf-core/sentieon/dedup/main' +include { SENTIEON_READWRITER } from '../../../modules/nf-core/sentieon/readwriter/main' +include { SAMTOOLS_VIEW as EXTRACT_ALIGNMENTS } from '../../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_EXTRACT } from '../../../modules/nf-core/samtools/index/main' + workflow ALIGN_SENTIEON { take: ch_reads_input // channel: [mandatory] [ val(meta), path(reads_input) ] @@ -36,6 +39,14 @@ workflow ALIGN_SENTIEON { SENTIEON_READWRITER ( merge_bams_in.multiple, ch_genome_fasta, ch_genome_fai ) ch_bam_bai = merge_bams_in.single.mix(SENTIEON_READWRITER.out.output_index) + // GET ALIGNMENT FROM SELECTED CONTIGS + if (params.extract_alignments) { + EXTRACT_ALIGNMENTS( ch_bam_bai, ch_genome_fasta, []) + ch_bam_bai = EXTRACT_ALIGNMENTS.out.bam + SAMTOOLS_INDEX_EXTRACT ( EXTRACT_ALIGNMENTS.out.bam ) + ch_bam_bai = EXTRACT_ALIGNMENTS.out.bam.join(SAMTOOLS_INDEX_EXTRACT.out.bai, failOnMismatch:true, failOnDuplicate:true) + } + SENTIEON_DATAMETRICS ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, false ) SENTIEON_DEDUP ( ch_bam_bai, ch_genome_fasta, ch_genome_fai )