Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename bwa-mem2 index variables #34

Merged
merged 3 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions conf/hmf_genomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,20 @@
params {
genomes {
'GRCh37_hmf' {
fasta = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.0/Homo_sapiens.GRCh37.GATK.illumina.fasta"
fai = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.0/samtools_index/1.16/Homo_sapiens.GRCh37.GATK.illumina.fasta.fai"
dict = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.0/samtools_index/1.16/Homo_sapiens.GRCh37.GATK.illumina.fasta.dict"
bwa_index = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.1/bwa_index/2.2.1.tar.gz"
gridss_index = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.1/gridss_index/2.13.2.tar.gz"
star_index = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.0/star_index/gencode_19/2.7.3a.tar.gz"
fasta = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.0/Homo_sapiens.GRCh37.GATK.illumina.fasta"
fai = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.0/samtools_index/1.16/Homo_sapiens.GRCh37.GATK.illumina.fasta.fai"
dict = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.0/samtools_index/1.16/Homo_sapiens.GRCh37.GATK.illumina.fasta.dict"
bwamem2_index = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.1/bwa-mem2_index/2.2.1.tar.gz"
gridss_index = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.1/gridss_index/2.13.2.tar.gz"
star_index = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.0/star_index/gencode_19/2.7.3a.tar.gz"
}
'GRCh38_hmf' {
fasta = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.0/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna"
fai = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.0/samtools_index/1.16/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.fai"
dict = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.0/samtools_index/1.16/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.dict"
bwa_index = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.1/bwa_index/2.2.1.tar.gz"
gridss_index = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.1/gridss_index/2.13.2.tar.gz"
star_index = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.0/star_index/gencode_38/2.7.3a.tar.gz"
fasta = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.0/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna"
fai = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.0/samtools_index/1.16/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.fai"
dict = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.0/samtools_index/1.16/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.dict"
bwamem2_index = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.1/bwa-mem2_index/2.2.1.tar.gz"
gridss_index = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.1/gridss_index/2.13.2.tar.gz"
star_index = "https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.0/star_index/gencode_38/2.7.3a.tar.gz"
}
}
}
12 changes: 6 additions & 6 deletions conf/test_stub.config
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ params {
genomes {

'GRCh38_hmf' {
fasta = "temp/GRCh38.fasta"
fai = "temp/GRCh38.fai"
dict = "temp/GRCh38.dict"
bwa_index = "temp/GRCh38_bwa-mem2_index/"
gridss_index = "temp/GRCh38_gridss_index/"
star_index = "temp/GRCh38_star_index/"
fasta = "temp/GRCh38.fasta"
fai = "temp/GRCh38.fai"
dict = "temp/GRCh38.dict"
bwamem2_index = "temp/GRCh38_bwa-mem2_index/"
gridss_index = "temp/GRCh38_gridss_index/"
star_index = "temp/GRCh38_star_index/"
}

}
Expand Down
6 changes: 3 additions & 3 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ params {
fasta = "/path/to/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna"
fai = "/path/to/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.fai"
dict = "/path/to/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.dict"
bwa_index = "/path/to/bwa-mem2_index/"
bwamem2_index = "/path/to/bwa-mem2_index/"
gridss_index = "/path/to/gridss_index/"
star_index = "/path/to/star_index/"
}
Expand All @@ -357,7 +357,7 @@ _GRCh37 genome (Hartwig) [`GRCh37_hmf`]_
| FASTA | [Homo_sapiens.GRCh37.GATK.illumina.fasta](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.0/Homo_sapiens.GRCh37.GATK.illumina.fasta) |
| FASTA index | [Homo_sapiens.GRCh37.GATK.illumina.fasta.fai](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.0/samtools_index/1.16/Homo_sapiens.GRCh37.GATK.illumina.fasta.fai) |
| FASTA seq dictionary | [Homo_sapiens.GRCh37.GATK.illumina.fasta.dict](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.0/samtools_index/1.16/Homo_sapiens.GRCh37.GATK.illumina.fasta.dict) |
| bwa-mem2 index | [bwa_index/2.2.1.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.1/bwa_index/2.2.1.tar.gz) |
| bwa-mem2 index | [bwa-mem2_index/2.2.1.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.1/bwa-mem2_index/2.2.1.tar.gz) |
| GRIDSS index | [gridss_index/2.13.2.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.1/gridss_index/2.13.2.tar.gz) |
| STAR index | [star_index/gencode_19/2.7.3a.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/24.0/star_index/gencode_19/2.7.3a.tar.gz) |

Expand All @@ -368,7 +368,7 @@ _GRCh38 genome (Hartwig) [`GRCh38_hmf`]_
| FASTA | [GCA_000001405.15_GRCh38_no_alt_analysis_set.fna](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.0/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna) |
| FASTA index | [GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.fai](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.0/samtools_index/1.16/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.fai) |
| FASTA seq dictionary | [GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.dict](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.0/samtools_index/1.16/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.dict) |
| bwa-mem2 index | [bwa_index/2.2.1.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.1/bwa_index/2.2.1.tar.gz) |
| bwa-mem2 index | [bwa-mem2_index/2.2.1.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.1/bwa-mem2_index/2.2.1.tar.gz) |
| GRIDSS index | [gridss_index/2.13.2.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.1/gridss_index/2.13.2.tar.gz) |
| STAR index | [star_index/gencode_38/2.7.3a.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/24.0/star_index/gencode_38/2.7.3a.tar.gz) |

Expand Down
5 changes: 3 additions & 2 deletions lib/Utils.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -197,12 +197,13 @@ class Utils {

def fps = [
params.ref_data_genome_alt,
params.ref_data_genome_bwa_index,
params.ref_data_genome_bwamem2_index,
params.ref_data_genome_dict,
params.ref_data_genome_fai,
params.ref_data_genome_fasta,
params.ref_data_genome_gridss_index,
params.ref_data_genome_gtf,
params.ref_data_genome_star_index,
params.ref_data_virusbreakenddb_path,
]

Expand Down Expand Up @@ -323,7 +324,7 @@ class Utils {
def has_alt_contigs = params.genome_type == 'alt'

// Ensure that custom genomes with ALT contigs that need indexes built have the required .alt file
def has_bwa_indexes = (params.ref_data_genome_bwa_index && params.ref_data_genome_gridss_index)
def has_bwa_indexes = (params.ref_data_genome_bwamem2_index && params.ref_data_genome_gridss_index)
def has_alt_file = params.containsKey('ref_data_genome_alt') && params.ref_data_genome_alt
def run_bwa_or_gridss_index = run_config.stages.alignment && run_config.has_dna_fastq && !has_bwa_indexes

Expand Down
12 changes: 6 additions & 6 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_oncoanaly
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

params.ref_data_genome_fasta = getGenomeAttribute('fasta')
params.ref_data_genome_fai = getGenomeAttribute('fai')
params.ref_data_genome_dict = getGenomeAttribute('dict')
params.ref_data_genome_bwa_index = getGenomeAttribute('bwa_index')
params.ref_data_genome_gridss_index = getGenomeAttribute('gridss_index')
params.ref_data_genome_star_index = getGenomeAttribute('star_index')
params.ref_data_genome_fasta = getGenomeAttribute('fasta')
params.ref_data_genome_fai = getGenomeAttribute('fai')
params.ref_data_genome_dict = getGenomeAttribute('dict')
params.ref_data_genome_bwamem2_index = getGenomeAttribute('bwamem2_index')
params.ref_data_genome_gridss_index = getGenomeAttribute('gridss_index')
params.ref_data_genome_star_index = getGenomeAttribute('star_index')

WorkflowMain.setParamsDefaults(params, log)
WorkflowMain.validateParams(params, log)
Expand Down
4 changes: 2 additions & 2 deletions modules/local/bwa-mem2/mem/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ process BWAMEM2_ALIGN {
input:
tuple val(meta), path(reads_fwd), path(reads_rev)
path genome_fasta
path genome_bwa_index
path genome_bwamem2_index

output:
tuple val(meta), path('*.bam'), path('*.bai'), emit: bam
Expand All @@ -28,7 +28,7 @@ process BWAMEM2_ALIGN {
def output_fn = meta.split ? "${meta.split}.${meta.sample_id}.${meta.read_group}.bam" : "${meta.sample_id}.${meta.read_group}.bam"

"""
ln -fs \$(find -L ${genome_bwa_index} -type f) ./
ln -fs \$(find -L ${genome_bwamem2_index} -type f) ./

bwa-mem2 mem \\
${args} \\
Expand Down
4 changes: 2 additions & 2 deletions modules/local/bwa-mem2/mem/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ input:
type: file
description: Reference genome assembly FASTA file
pattern: "*.{fa,fasta}"
- genome_bwa_index:
- genome_bwamem2_index:
type: directory
description: bwa-mem1 index directory
description: bwa-mem2 index directory
output:
- meta:
type: map
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ process CUSTOM_EXTRACTCONTIG {

output:
path "*extracted.fa" , emit: contig
path "*extracted.fa.*", emit: bwa_index
path "*extracted.fa.*", emit: bwamem2_index
path 'versions.yml' , emit: versions

when:
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ params {
'panel_data_paths',
'ref_data',
'ref_data_genome_alt',
'ref_data_genome_bwa_index',
'ref_data_genome_bwamem2_index',
'ref_data_genome_dict',
'ref_data_genome_fai',
'ref_data_genome_fasta',
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/lilac_calling/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ workflow LILAC_CALLING {
REALIGNREADS(
SLICEBAM.out.bam,
EXTRACTCONTIG.out.contig,
EXTRACTCONTIG.out.bwa_index,
EXTRACTCONTIG.out.bwamem2_index,
)

ch_versions = ch_versions.mix(REALIGNREADS.out.versions)
Expand Down
44 changes: 22 additions & 22 deletions subworkflows/local/prepare_reference/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -55,28 +55,28 @@ workflow PREPARE_REFERENCE {
//
// Set bwa-mem2 index, unpack or create if required
//
ch_genome_bwa_index = Channel.empty()
ch_genome_bwamem2_index = Channel.empty()
if (run_config.has_dna && run_config.stages.alignment) {
if (!params.ref_data_genome_bwa_index) {
if (!params.ref_data_genome_bwamem2_index) {

BWAMEM2_INDEX(
ch_genome_fasta,
params.ref_data_genome_alt ? file(params.ref_data_genome_alt) : [],
)
ch_genome_bwa_index = BWAMEM2_INDEX.out.index
ch_genome_bwamem2_index = BWAMEM2_INDEX.out.index
ch_versions = ch_versions.mix(BWAMEM2_INDEX.out.versions)

} else if (params.ref_data_genome_bwa_index.endsWith('.tar.gz')) {
} else if (params.ref_data_genome_bwamem2_index.endsWith('.tar.gz')) {

ch_genome_bwa_index_inputs = Channel.fromPath(params.ref_data_genome_bwa_index)
ch_genome_bwamem2_index_inputs = Channel.fromPath(params.ref_data_genome_bwamem2_index)
.map { [[id: "bwa-mem2_index_${it.name.replaceAll('\\.tar\\.gz$', '')}"], it] }

DECOMP_BWAMEM2_INDEX(ch_genome_bwa_index_inputs)
ch_genome_bwa_index = DECOMP_BWAMEM2_INDEX.out.dir
DECOMP_BWAMEM2_INDEX(ch_genome_bwamem2_index_inputs)
ch_genome_bwamem2_index = DECOMP_BWAMEM2_INDEX.out.dir

} else {

ch_genome_bwa_index = getRefFileChannel('ref_data_genome_bwa_index')
ch_genome_bwamem2_index = getRefFileChannel('ref_data_genome_bwamem2_index')

}
}
Expand Down Expand Up @@ -235,7 +235,7 @@ workflow PREPARE_REFERENCE {
ch_genome_fasta,
ch_genome_fai,
ch_genome_dict,
ch_genome_bwa_index,
ch_genome_bwamem2_index,
ch_genome_gridss_index,
ch_genome_star_index,
ch_virusbreakenddb,
Expand All @@ -258,19 +258,19 @@ workflow PREPARE_REFERENCE {
}

emit:
genome_fasta = ch_genome_fasta.first() // path: genome_fasta
genome_fai = ch_genome_fai.first() // path: genome_fai
genome_dict = ch_genome_dict.first() // path: genome_dict
genome_bwa_index = ch_genome_bwa_index.first() // path: genome_bwa_index
genome_gridss_index = ch_genome_gridss_index.first() // path: genome_gridss_index
genome_star_index = ch_genome_star_index.first() // path: genome_star_index
genome_version = ch_genome_version // val: genome_version

virusbreakenddb = ch_virusbreakenddb.first() // path: VIRUSBreakend database
hmf_data = ch_hmf_data // map: HMF data paths
panel_data = ch_panel_data // map: Panel data paths

versions = ch_versions // channel: [ versions.yml ]
genome_fasta = ch_genome_fasta.first() // path: genome_fasta
genome_fai = ch_genome_fai.first() // path: genome_fai
genome_dict = ch_genome_dict.first() // path: genome_dict
genome_bwamem2_index = ch_genome_bwamem2_index.first() // path: genome_bwa-mem2_index
genome_gridss_index = ch_genome_gridss_index.first() // path: genome_gridss_index
genome_star_index = ch_genome_star_index.first() // path: genome_star_index
genome_version = ch_genome_version // val: genome_version

virusbreakenddb = ch_virusbreakenddb.first() // path: VIRUSBreakend database
hmf_data = ch_hmf_data // map: HMF data paths
panel_data = ch_panel_data // map: Panel data paths

versions = ch_versions // channel: [ versions.yml ]
}

def getRefFileChannel(key) {
Expand Down
28 changes: 14 additions & 14 deletions subworkflows/local/read_alignment_dna/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ workflow READ_ALIGNMENT_DNA {

// Reference data
genome_fasta // channel: [mandatory] /path/to/genome_fasta
genome_bwa_index // channel: [mandatory] /path/to/genome_bwa_index_dir/
genome_bwamem2_index // channel: [mandatory] /path/to/genome_bwa-mem2_index_dir/

// Params
max_fastq_records // numeric: [mandatory] max number of FASTQ records per split
Expand Down Expand Up @@ -128,11 +128,11 @@ workflow READ_ALIGNMENT_DNA {
// MODULE: BWA-MEM2
//
// Create process input channel
// channel: [ meta_bwa, fastq_fwd, fastq_rev ]
ch_bwa_inputs = ch_fastqs_ready
// channel: [ meta_bwamem2, fastq_fwd, fastq_rev ]
ch_bwamem2_inputs = ch_fastqs_ready
.map { meta_fastq_ready, fastq_fwd, fastq_rev ->

def meta_bwa = [
def meta_bwamem2 = [
*:meta_fastq_ready,


Expand All @@ -142,40 +142,40 @@ workflow READ_ALIGNMENT_DNA {

]

return [meta_bwa, fastq_fwd, fastq_rev]
return [meta_bwamem2, fastq_fwd, fastq_rev]
}

// Run process
BWAMEM2_ALIGN(
ch_bwa_inputs,
ch_bwamem2_inputs,
genome_fasta,
genome_bwa_index,
genome_bwamem2_index,
)

ch_versions = ch_versions.mix(BWAMEM2_ALIGN.out.versions)

// Reunite BAMs
// First, count expected BAMs per sample for non-blocking groupTuple op
// channel: [ meta_count, group_size ]
ch_sample_fastq_counts = ch_bwa_inputs
.map { meta_bwa, reads_fwd, reads_rev ->
ch_sample_fastq_counts = ch_bwamem2_inputs
.map { meta_bwamem2, reads_fwd, reads_rev ->

def meta_count = [
key: meta_bwa.key,
sample_type: meta_bwa.sample_type,
key: meta_bwamem2.key,
sample_type: meta_bwamem2.sample_type,
]

return [meta_count, meta_bwa]
return [meta_count, meta_bwamem2]
}
.groupTuple()
.map { meta_count, meta_bwas -> return [meta_count, meta_bwas.size()] }
.map { meta_count, metas_bwamem2 -> return [meta_count, metas_bwamem2.size()] }

// Now, group with expected size then sort into tumor and normal channels
// channel: [ meta_group, [bam, ...], [bai, ...] ]
ch_bams_united = ch_sample_fastq_counts
.cross(
// First element to match meta_count above for `cross`
BWAMEM2_ALIGN.out.bam.map { meta_bwa, bam, bai -> [[key: meta_bwa.key, sample_type: meta_bwa.sample_type], bam, bai] }
BWAMEM2_ALIGN.out.bam.map { meta_bwamem2, bam, bai -> [[key: meta_bwamem2.key, sample_type: meta_bwamem2.sample_type], bam, bai] }
)
.map { count_tuple, bam_tuple ->

Expand Down
2 changes: 1 addition & 1 deletion workflows/targeted.nf
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ workflow TARGETED {
READ_ALIGNMENT_DNA(
ch_inputs,
ref_data.genome_fasta,
ref_data.genome_bwa_index,
ref_data.genome_bwamem2_index,
params.max_fastq_records,
)

Expand Down
2 changes: 1 addition & 1 deletion workflows/wgts.nf
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ workflow WGTS {
READ_ALIGNMENT_DNA(
ch_inputs,
ref_data.genome_fasta,
ref_data.genome_bwa_index,
ref_data.genome_bwamem2_index,
params.max_fastq_records,
)

Expand Down