Skip to content

Commit

Permalink
Merge pull request #8 from nf-core/neo-subworkflow
Browse files Browse the repository at this point in the history
Implement Neo subworkflow
  • Loading branch information
scwatts authored May 30, 2024
2 parents f86b1d9 + f1a365f commit a34b7b4
Show file tree
Hide file tree
Showing 18 changed files with 686 additions and 5 deletions.
8 changes: 8 additions & 0 deletions conf/hmf_data.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,12 @@ params {
isofox_gc_ratios = 'rna_pipeline/read_100_exp_gc_ratios.csv'
// LILAC
lilac_resources = 'dna_pipeline/immune/'
// Neo
neo_resources = 'neo/binding/'
// ORANGE
cohort_mapping = 'orange/cohort_mapping.tsv'
// Hartwig cohort RNA data
cohort_tpm_medians = 'neo/tpm_cohort/hmf_tpm_medians.37.csv'
cohort_percentiles = 'orange/cohort_percentiles.tsv'
alt_sj_distribution = 'rna_pipeline/isofox.hmf_3444.alt_sj_cohort.37.csv'
gene_exp_distribution = 'rna_pipeline/isofox.hmf_3444.gene_distribution.37.csv'
Expand Down Expand Up @@ -72,8 +76,12 @@ params {
isofox_gc_ratios = 'rna_pipeline/read_100_exp_gc_ratios.csv'
// LILAC
lilac_resources = 'dna_pipeline/immune/'
// Neo
neo_resources = 'neo/binding/'
// ORANGE
cohort_mapping = 'orange/cohort_mapping.tsv'
// Hartwig cohort RNA data
cohort_tpm_medians = 'neo/tpm_cohort/hmf_tpm_medians.38.csv'
cohort_percentiles = 'orange/cohort_percentiles.tsv'
alt_sj_distribution = 'rna_pipeline/isofox.hmf_3444.alt_sj_cohort.38.csv'
gene_exp_distribution = 'rna_pipeline/isofox.hmf_3444.gene_distribution.38.csv'
Expand Down
28 changes: 28 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,34 @@ process {
]
}

withName: 'NEO_(?:SCORER|FINDER)' {
ext.jarPath = '/opt/neo/neo.jar'
}

withName: 'NEO_SCORER' {
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/scorer/" },
]
}

withName: '.*:NEO_PREDICTION:ANNOTATE_FUSIONS' {
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/annotated_fusions/${filename}" },
]
}

withName: 'NEO_FINDER' {
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/finder/" },
]
}

withName: 'CUPPA' {
publishDir = [
path: { "${params.outdir}" },
Expand Down
1 change: 1 addition & 0 deletions lib/Constants.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class Constants {
LILAC,
LINX,
MARKDUPS,
NEO,
ORANGE,
PAVE,
PURPLE,
Expand Down
12 changes: 11 additions & 1 deletion lib/Processes.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,17 @@ import Utils
class Processes {

public static getRunStages(include, exclude, manual_select, log) {
def processes = manual_select ? [] : Constants.Process.values().toList()

// Get default processes
// NOTE(SW): currently set all except Neo to run by default; Process.NEO excluded to be more concise in code
def processes
if (manual_select) {
processes = []
} else {
processes = Constants.Process.values().toList()
processes.remove(Constants.Process.NEO)
}

def include_list = this.getProcessList(include, log)
def exclude_list = this.getProcessList(exclude, log)
this.checkIncludeExcludeList(include_list, exclude_list, log)
Expand Down
2 changes: 1 addition & 1 deletion modules/local/markdups/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ RUN \
conda clean -yaf

RUN \
wget -P /tmp/ https://github.com/hartwigmedical/hmftools/releases/download/mark-dups-v1.1.6_beta/mark-dups_v1.1.6_beta.jar
wget -P /tmp/ https://github.com/hartwigmedical/hmftools/releases/download/mark-dups-v1.1.6_beta/mark-dups_v1.1.6_beta.jar

# Move Conda environment into standard BioContainers base image
FROM quay.io/bioconda/base-glibc-busybox-bash:2.1.0
Expand Down
22 changes: 22 additions & 0 deletions modules/local/neo/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
FROM mambaorg/micromamba:0.24.0

USER root

RUN \
apt-get update && \
apt-get install -y procps wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

RUN \
mkdir -p /opt/neo/ && \
wget -O /opt/neo/neo.jar 'https://github.com/hartwigmedical/hmftools/releases/download/neo-v1.2_beta/neo_v1.2_beta.jar'

USER mambauser

RUN \
micromamba install -y -n base -c bioconda -c conda-forge \
'openjdk >=8' && \
micromamba clean --all --yes

ENV PATH="/opt/conda/bin:/opt/conda/condabin:${PATH}"
7 changes: 7 additions & 0 deletions modules/local/neo/annotate_fusions/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: isofox
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::hmftools-isofox=1.7.1
56 changes: 56 additions & 0 deletions modules/local/neo/annotate_fusions/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
process ANNOTATE_FUSIONS {
tag "${meta.id}"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/hmftools-isofox:1.7.1--hdfd78af_0' :
'biocontainers/hmftools-isofox:1.7.1--hdfd78af_0' }"

input:
tuple val(meta), path(neo_finder_dir), path(bam), path(bai)
val read_length
path genome_fasta
val genome_ver
path genome_fai
path ensembl_data_resources

output:
tuple val(meta), path('*isf.neoepitope.tsv'), emit: annotated_fusions
path 'versions.yml' , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''

"""
mkdir -p isofox/
isofox \\
-Xmx${Math.round(task.memory.bytes * 0.95)} \\
${args} \\
-sample ${meta.sample_id} \\
-bam_file ${bam} \\
-functions NEO_EPITOPES \\
-neo_dir ${neo_finder_dir} \\
-read_length ${read_length} \\
-ref_genome ${genome_fasta} \\
-ref_genome_version ${genome_ver} \\
-ensembl_data_dir ${ensembl_data_resources} \\
-threads ${task.cpus} \\
-output_dir ./
cat <<-END_VERSIONS > versions.yml
"${task.process}":
isofox: \$(isofox -version | sed 's/^.* //')
END_VERSIONS
"""

stub:
"""
touch ${meta.sample_id}.isf.neoepitope.tsv
echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
"""
}
63 changes: 63 additions & 0 deletions modules/local/neo/annotate_fusions/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
name: annotate_fusions
description: Annotate neoeptitopes with RNA fusion data
keywords:
- neoepitopes
- rna
- rnaseq
tools:
- isofox:
description: Characterises and counts gene, transcript features
homepage: https://github.com/hartwigmedical/hmftools/tree/master/isofox
documentation: https://github.com/hartwigmedical/hmftools/tree/master/isofox
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [id: 'sample_id']
- neo_finder_dir:
type: directory
description: Neo Finder directory
- bam:
type: file
description: BAM file
pattern: "*.{bam}"
- bai:
type: file
description: BAI file
pattern: "*.{bai}"
- read_length:
type: integer
description: Read length
- genome_fasta:
type: file
description: Reference genome assembly FASTA file
pattern: "*.{fa,fasta}"
- genome_ver:
type: string
description: Reference genome version
- genome_fai:
type: file
description: Reference genome assembly fai file
pattern: "*.{fai}"
- ensembl_data_resources:
type: directory
description: HMF ensembl data resources directory
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [id: 'sample_id']
- annotated_fusions:
type: file
description: Annotated neoepitopes file
pattern: "*.{tsv}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@scwatts"
- "@charlesshale"
52 changes: 52 additions & 0 deletions modules/local/neo/finder/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
process NEO_FINDER {
tag "${meta.id}"
label 'process_low'

container 'docker.io/scwatts/neo:1.2_beta--1'

input:
tuple val(meta), path(purple_dir), path(linx_annotation_dir)
path genome_fasta
val genome_ver
path genome_fai
path ensembl_data_resources

output:
tuple val(meta), path('neo_finder/'), emit: neo_finder_dir
path 'versions.yml' , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''

"""
mkdir -p neo_finder/
java \\
-Xmx${Math.round(task.memory.bytes * 0.95)} \\
-jar ${task.ext.jarPath} \\
${args} \\
-sample ${meta.sample_id} \\
-linx_dir ${linx_annotation_dir} \\
-somatic_vcf ${purple_dir}/${meta.sample_id}.purple.somatic.vcf.gz \\
-ref_genome ${genome_fasta} \\
-ref_genome_version ${genome_ver} \\
-ensembl_data_dir ${ensembl_data_resources} \\
-log_debug \\
-output_dir neo_finder/
cat <<-END_VERSIONS > versions.yml
"${task.process}":
neo: \$(java -jar ${task.ext.jarPath} -version | sed 's/^.* //')
END_VERSIONS
"""

stub:
"""
mkdir -p neo_finder/
echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
"""
}

52 changes: 52 additions & 0 deletions modules/local/neo/finder/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: neo_finder
description: Identify candidate neoeptitops
keywords:
- neoepitopes
tools:
- neo:
description: Predict and score neoepitopes
homepage: https://github.com/hartwigmedical/hmftools/tree/master/neo
documentation: https://github.com/hartwigmedical/hmftools/tree/master/neo
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [id: 'sample_id']
- purple_dir:
type: directory
description: PURPLE output directory
- linx_annotation_dir:
type: directory
description: LINX somatic annotation output directory
- genome_fasta:
type: file
description: Reference genome assembly FASTA file
pattern: "*.{fa,fasta}"
- genome_ver:
type: string
description: Reference genome version
- genome_fai:
type: file
description: Reference genome assembly fai file
pattern: "*.{fai}"
- ensembl_data_resources:
type: directory
description: HMF ensembl data resources directory
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [id: 'sample_id']
- neo_finder_dir:
type: directory
description: Neo Finder output directory
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@scwatts"
- "@charlesshale"
Loading

0 comments on commit a34b7b4

Please sign in to comment.