diff --git a/CHANGELOG.md b/CHANGELOG.md
index 47ec307e..965ba463 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Added`
- A new analysis option `mito` to call and annotate only mitochondrial variants [#608](https://github.com/nf-core/raredisease/pull/608)
+- An option to restrict analysis to specific contigs [#644](https://github.com/nf-core/raredisease/pull/644)
### `Changed`
@@ -28,6 +29,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Parameters
+| Old parameter | New parameter |
+| ------------- | ------------------- |
+| | extract_alignments |
+| | restrict_to_contigs |
+
### Tool updates
| Tool | Old version | New version |
diff --git a/conf/modules/align_bwa_bwamem2_bwameme.config b/conf/modules/align_bwa_bwamem2_bwameme.config
index f9037d3d..ce28d438 100644
--- a/conf/modules/align_bwa_bwamem2_bwameme.config
+++ b/conf/modules/align_bwa_bwamem2_bwameme.config
@@ -50,6 +50,11 @@ process {
ext.prefix = { "${meta.id}_sorted_merged" }
}
+ withName: '.*ALIGN:ALIGN_BWA_BWAMEM2_BWAMEME:EXTRACT_ALIGNMENTS' {
+ ext.prefix = { "${meta.id}_sorted_merged_extracted" }
+ ext.args2 = { params.restrict_to_contigs }
+ }
+
withName: '.*ALIGN:ALIGN_BWA_BWAMEM2_BWAMEME:MARKDUPLICATES' {
ext.args = "--TMP_DIR ."
ext.prefix = { "${meta.id}_sorted_md" }
diff --git a/conf/modules/align_sentieon.config b/conf/modules/align_sentieon.config
index 9ae4aeb7..07074ec1 100644
--- a/conf/modules/align_sentieon.config
+++ b/conf/modules/align_sentieon.config
@@ -30,6 +30,11 @@ process {
ext.prefix = { "${meta.id}_merged.bam" }
}
+ withName: '.*ALIGN:ALIGN_SENTIEON:EXTRACT_ALIGNMENTS' {
+ ext.prefix = { "${meta.id}_merged_extracted" }
+ ext.args2 = { params.restrict_to_contigs }
+ }
+
withName: '.*ALIGN:ALIGN_SENTIEON:SENTIEON_DEDUP' {
ext.args4 = { $params.rmdup ? "--rmdup" : '' }
ext.prefix = { "${meta.id}_dedup.bam" }
diff --git a/docs/usage.md b/docs/usage.md
index bb2d3b99..028a064d 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -168,15 +168,17 @@ The mandatory and optional parameters for each category are tabulated below.
##### 1. Alignment
-| Mandatory | Optional |
-| ------------------------------ | ------------------------------ |
-| aligner1 | fasta_fai4 |
-| fasta2 | bwamem24 |
-| platform | bwa4 |
-| mito_name/mt_fasta3 | bwameme4 |
-| | known_dbsnp5 |
-| | known_dbsnp_tbi5 |
-| | min_trimmed_length6 |
+| Mandatory | Optional |
+| ------------------------------ | ------------------------------- |
+| aligner1 | fasta_fai4 |
+| fasta2 | bwamem24 |
+| platform | bwa4 |
+| mito_name/mt_fasta3 | bwameme4 |
+| | known_dbsnp5 |
+| | known_dbsnp_tbi5 |
+| | min_trimmed_length6 |
+| | extract_alignments |
+| | restrict_to_contigs7 |
1Default value is bwamem2. Other alternatives are bwa, bwameme and sentieon (requires valid Sentieon license ).
2Analysis set reference genome in fasta format, first 25 contigs need to be chromosome 1-22, X, Y and the mitochondria.
@@ -184,6 +186,7 @@ The mandatory and optional parameters for each category are tabulated below.
4fasta_fai, bwa, bwamem2 and bwameme, if not provided by the user, will be generated by the pipeline when necessary.
5Used only by Sentieon.
6Default value is 40. Used only by fastp.
+7Used to limit your analysis to specific contigs. Can be used to remove alignments to unplaced contigs to minimize potential errors. This parameter should be used in conjuction with `extract_alignments` parameter.
##### 2. QC stats from the alignment files
diff --git a/nextflow.config b/nextflow.config
index 72717041..f121d191 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -24,6 +24,8 @@ params {
analysis_type = 'wgs'
bwa_as_fallback = false
bait_padding = 100
+ extract_alignments = false
+ restrict_to_contigs = null
run_mt_for_wes = false
run_rtgvcfeval = false
save_mapped_as_cram = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 74d58d05..7191f3e7 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -503,6 +503,13 @@
"help_text": "errorStrategy needs to be set to ignore for the bwamem2 process for the fallback to work. Turned off by default.",
"fa_icon": "fas fa-toggle-on"
},
+ "extract_alignments": {
+ "type": "boolean",
+ "default": "false",
+ "description": "After aligning the reads to a reference, extract alignments from specific regions/contigs and restrict the analysis to those regions/contigs.",
+ "help_text": "Set this to true, and specify the contig(s) using `restrict_to_contigs` parameter",
+ "fa_icon": "fas fa-toggle-on"
+ },
"platform": {
"type": "string",
"default": "illumina",
@@ -516,6 +523,11 @@
"fa_icon": "fas fa-align-center",
"enum": ["xy", "hetx", "sry"]
},
+ "restrict_to_contigs": {
+ "type": "string",
+ "description": "Can be specified as RNAME[:STARTPOS[-ENDPOS]]. Multiple regions should be seperated by space",
+ "fa_icon": "fas fa-align-center"
+ },
"run_mt_for_wes": {
"type": "boolean",
"description": "Specifies whether to run mitochondrial analysis for wes samples",
diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf
index 14d545ea..496648f4 100644
--- a/subworkflows/local/align.nf
+++ b/subworkflows/local/align.nf
@@ -70,7 +70,7 @@ workflow ALIGN {
ch_bwamem2_bai = ALIGN_BWA_BWAMEM2_BWAMEME.out.marked_bai
ch_versions = ch_versions.mix(ALIGN_BWA_BWAMEM2_BWAMEME.out.versions)
} else if (params.aligner.equals("sentieon")) {
- ALIGN_SENTIEON ( // Triggered when params.aligner is set as sentieon
+ ALIGN_SENTIEON ( // Triggered when params.aligner is set as sentieon
ch_reads,
ch_genome_fasta,
ch_genome_fai,
diff --git a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf
index 15d3db9a..90af20f2 100644
--- a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf
+++ b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf
@@ -7,9 +7,11 @@ include { BWA_MEM as BWAMEM_FALLBACK } from '../../../modules/nf-c
include { BWAMEM2_MEM } from '../../../modules/nf-core/bwamem2/mem/main'
include { BWAMEME_MEM } from '../../../modules/nf-core/bwameme/mem/main'
include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_ALIGN } from '../../../modules/nf-core/samtools/index/main'
+include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_EXTRACT } from '../../../modules/nf-core/samtools/index/main'
include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MARKDUP } from '../../../modules/nf-core/samtools/index/main'
include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main'
include { SAMTOOLS_MERGE } from '../../../modules/nf-core/samtools/merge/main'
+include { SAMTOOLS_VIEW as EXTRACT_ALIGNMENTS } from '../../../modules/nf-core/samtools/view/main'
include { PICARD_MARKDUPLICATES as MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main'
@@ -82,6 +84,14 @@ workflow ALIGN_BWA_BWAMEM2_BWAMEME {
SAMTOOLS_MERGE ( bams.multiple, ch_genome_fasta, ch_genome_fai )
prepared_bam = bams.single.mix(SAMTOOLS_MERGE.out.bam)
+ // GET ALIGNMENT FROM SELECTED CONTIGS
+ if (params.extract_alignments) {
+ SAMTOOLS_INDEX_EXTRACT ( prepared_bam )
+ extract_bam_sorted_indexed = prepared_bam.join(SAMTOOLS_INDEX_EXTRACT.out.bai, failOnMismatch:true, failOnDuplicate:true)
+ EXTRACT_ALIGNMENTS( extract_bam_sorted_indexed, ch_genome_fasta, [])
+ prepared_bam = EXTRACT_ALIGNMENTS.out.bam
+ }
+
// Marking duplicates
MARKDUPLICATES ( prepared_bam , ch_genome_fasta, ch_genome_fai )
SAMTOOLS_INDEX_MARKDUP ( MARKDUPLICATES.out.bam )
diff --git a/subworkflows/local/alignment/align_sentieon.nf b/subworkflows/local/alignment/align_sentieon.nf
index dc5daa60..5b0cb708 100644
--- a/subworkflows/local/alignment/align_sentieon.nf
+++ b/subworkflows/local/alignment/align_sentieon.nf
@@ -2,10 +2,13 @@
// A subworkflow to annotate structural variants.
//
-include { SENTIEON_BWAMEM } from '../../../modules/nf-core/sentieon/bwamem/main'
-include { SENTIEON_DATAMETRICS } from '../../../modules/nf-core/sentieon/datametrics/main'
-include { SENTIEON_DEDUP } from '../../../modules/nf-core/sentieon/dedup/main'
-include { SENTIEON_READWRITER } from '../../../modules/nf-core/sentieon/readwriter/main'
+include { SENTIEON_BWAMEM } from '../../../modules/nf-core/sentieon/bwamem/main'
+include { SENTIEON_DATAMETRICS } from '../../../modules/nf-core/sentieon/datametrics/main'
+include { SENTIEON_DEDUP } from '../../../modules/nf-core/sentieon/dedup/main'
+include { SENTIEON_READWRITER } from '../../../modules/nf-core/sentieon/readwriter/main'
+include { SAMTOOLS_VIEW as EXTRACT_ALIGNMENTS } from '../../../modules/nf-core/samtools/view/main'
+include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_EXTRACT } from '../../../modules/nf-core/samtools/index/main'
+
workflow ALIGN_SENTIEON {
take:
ch_reads_input // channel: [mandatory] [ val(meta), path(reads_input) ]
@@ -36,6 +39,14 @@ workflow ALIGN_SENTIEON {
SENTIEON_READWRITER ( merge_bams_in.multiple, ch_genome_fasta, ch_genome_fai )
ch_bam_bai = merge_bams_in.single.mix(SENTIEON_READWRITER.out.output_index)
+ // GET ALIGNMENT FROM SELECTED CONTIGS
+ if (params.extract_alignments) {
+ EXTRACT_ALIGNMENTS( ch_bam_bai, ch_genome_fasta, [])
+ ch_bam_bai = EXTRACT_ALIGNMENTS.out.bam
+ SAMTOOLS_INDEX_EXTRACT ( EXTRACT_ALIGNMENTS.out.bam )
+ ch_bam_bai = EXTRACT_ALIGNMENTS.out.bam.join(SAMTOOLS_INDEX_EXTRACT.out.bai, failOnMismatch:true, failOnDuplicate:true)
+ }
+
SENTIEON_DATAMETRICS ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, false )
SENTIEON_DEDUP ( ch_bam_bai, ch_genome_fasta, ch_genome_fai )