Skip to content

Commit

Permalink
finish code exomedepth count per sample
Browse files Browse the repository at this point in the history
  • Loading branch information
ToonRosseel committed Mar 20, 2024
1 parent 90000d7 commit eba5f5b
Show file tree
Hide file tree
Showing 12 changed files with 153 additions and 97 deletions.
77 changes: 28 additions & 49 deletions bin/CNV_ExomeDepth_counting.R → bin/ExomeDepth_count.R
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,45 +1,35 @@
#!/usr/bin/env Rscript

############################################################################################
### ExomeDepth - counting step ###
############################################################################################

### load libs ###
library("optparse")
# library("optparse")
library("methods") # to load the function "as"
library("ExomeDepth")
ExomeDepthVersion <- packageDescription("ExomeDepth")$Version
print(paste0("...loaded ExomeDepth version: ", ExomeDepthVersion))

## pass options ###
option_list = list(
make_option(c("--bamfile"), type="character", default=NULL,
help="full paths of BAM file", metavar="character"),
make_option(c("--exon_target"), type="character", default=NULL,
help="path to bed file of exon target", metavar="character"),
make_option(c("--prefix"), type="character", default=NULL,
help="prefix for output name, chrX or autosomal", metavar="character")
);
opt_parser = OptionParser(option_list=option_list);
opt = parse_args(opt_parser);

if (is.null(opt$bamfile)){
print_help(opt_parser)
stop("Not all the input arguments are provided", call.=FALSE)
}
if (is.null(opt$exon_target)){
print_help(opt_parser)
stop("Not all the input arguments are provided", call.=FALSE)
## pass options ###
args = commandArgs(trailingOnly = TRUE)
print_usage <- function() {
cat("Usage: ExomeDepth_count.R <samplename> <bamfile> <baifile> <exon_target> <prefix>\n")
}
if (is.null(opt$prefix)){
print_help(opt_parser)
stop("Not all the input arguments are provided", call.=FALSE)
if (length(commandArgs(trailingOnly = TRUE)) != 5) {
print_usage()
q("no", status = 1) # Terminate the script with an error status
}

cat("bam file: ", opt$bamfile, "\n")
cat("exon target file: ", opt$exon_target, "\n")
cat("prefix: ", opt$prefix, "\n")
cat("samplename: ", args[1], "\n")
cat("bam file: ", args[2], "\n")
cat("bam index file: ", args[3], "\n")
cat("exon target file: ", args[4], "\n")
cat("prefix: ", args[5], "\n")

### load exon data ###
exons <- read.table(file=opt$exon_target,sep="\t",header = F)
exons <- read.table(file=args[4],sep="\t",header = F)
colnames(exons) <- c("chromosome","start","end","name")

### load all exon info ####
Expand All @@ -49,30 +39,14 @@ exons.GRanges <- GenomicRanges::GRanges(seqnames = exons$chromosome,
)

### Getting the input data from BAM file ###
bam <- unlist(opt$bamfile )

sampleName <- unlist(strsplit(unlist(strsplit(bam, ".bam")),split=".//"))
bam <- unlist(args[2])

cat("Sample names:", "\n")
sampleName2<-gsub("^.*/","",sampleName)
sampleName2
sampleName <- unlist(args[1])
sampleName

### ExomeDepth sometimes spectacularly fails to find the BAM index, for no clear reason ###
## so help it out by testing a few likely filenames
bamindex = bam

stardotbai = gsub(".bam$",".bai",bam)
stardotbamdotbai = gsub(".bam$",".bam.bai",bam)
if (file.exists(stardotbai)) {
bamindexe = stardotbai
} else if (file.exists(stardotbamdotbai)) {
bamindexe = stardotbamdotbai
}
else {
cat(paste("Cannot find a .bai index for BAM: ",bam,"\n",sep=""),file=stderr())
cat("stopping execution....",file=stderr())
stop()
}

### read.count ~exomeCopy package ###
## ouput = GenomicRages object that stores the read count data form the BAM file ###
Expand All @@ -84,22 +58,27 @@ ExomeCount <- getBamCounts(bed.frame = exons,
#min.mapq =20 #default minimum mapping quality to include a read
#read.width =300 #default maximum distance between the side of the target region and the middle of the paired reads to include the paired read into that region
)
cat("\nCounting done...\n")
colnames(ExomeCount)
## convert GRanges class (S4 object) into a data.frame, which is the input format for ExomeDepth
ExomeCount.dafr <- as(ExomeCount[, colnames(ExomeCount)], 'data.frame')
cat("\nConversion done...\n")
print(head(ExomeCount.dafr))
## remove the annoying chr letters
ExomeCount.dafr$space <- gsub(as.character(ExomeCount.dafr$space),
ExomeCount.dafr$chromosome <- gsub(as.character(ExomeCount.dafr$chromosome),
pattern = 'chr',
replacement = '')
cat("\nChr's removed...\n")
## rename sample names in colnames
header_first_part<-c("space", "start", "end", "width", "names")
colnames(ExomeCount.dafr)<-c(header_first_part, sampleName2)
header_first_part<-c("chromosome", "start", "end", "exon")
colnames(ExomeCount.dafr)<-c(header_first_part, sampleName)
colnames(ExomeCount.dafr)
print(head(ExomeCount.dafr))
cat("Successfully calculated counts.\n")

### save counts as a text file and rda object ###
cat("Saving the counts \n")
countspath = paste("counts_",sampleName2,"_",opt$prefix,".txt",sep="")
countspath = paste("counts_",sampleName,"_",args[5],".txt",sep="")
write.table(ExomeCount.dafr,countspath,sep="\t",col.names=TRUE,row.names=FALSE,quote=FALSE)

cat("\n\n---Finished---\n")
Expand Down
2 changes: 2 additions & 0 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,7 @@ params {
fai = 'https://github.com/nf-cmgg/test-datasets/tree/main/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai'

// Parameters
roi_auto = "/home/torossee/projects/nf-cmgg-exomecnv_references/Homo_sapiens.GRCh38.105.chr21_protein_coding_basic_sorted_merged_autosomal.bed"
roi_chrx = "/home/torossee/projects/nf-cmgg-exomecnv_references/Homo_sapiens.GRCh38.105.chrX_protein_coding_basic_sorted_merged.bed"

}
4 changes: 3 additions & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ workflow NFCMGG_EXOMECNV {
params.monochrome_logs,
args,
params.outdir,
params.input
params.input,
params.roi_auto,
params.roi_chrx
)

//
Expand Down
10 changes: 10 additions & 0 deletions modules/local/exomedepth/count/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: exomedepth_count
channels:
- conda-forge
- bioconda
- defaults

dependencies:
- r=3.6=r36_1003
- r-optparse=1.6.4=r36h6115d3f_0
- r-exomedepth=1.1.12=r36h6786f55_0
27 changes: 17 additions & 10 deletions modules/local/exomedepth/count/main.nf
Original file line number Diff line number Diff line change
@@ -1,29 +1,36 @@
process EXOMEDEPTH_COUNT {
tag "$meta.id $prefix"
process COUNT {
tag "$meta.id $prefix.id"
label 'process_low'

conda "conda-forge::r=3.6"
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/r-exomedepth:1.1.12--r36h6786f55_0' :
'biocontainers/r-exomedepth:1.1.12--r36h6786f55_0' }"
'https://depot.galaxyproject.org/singularity/r-exomedepth:1.1.16--r43hfb3cda0_3' :
'biocontainers/r-exomedepth:1.1.16--r43hfb3cda0_3' }"

publishDir "$params.outdir/exomedepth/counts", mode: 'copy'

input:
tuple val(meta), path(bam), path(bai)
tuple val(prefix), path(exon_target)

output:
path '*.txt' , emit: counts
tuple val(meta), path("*.txt"), emit: counts
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script: // This script is bundled with the pipeline, in nf-cmgg/exomecnv/bin/
def VERSION = '1.1.12'

def VERSION = '1.1.16'

"""
Rscript CNV_ExomeDepth_counting.R
ExomeDepth_count.R \\
$meta.id \\
$bam \\
$bai \\
$exon_target \\
$prefix.id
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
7 changes: 6 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ params {
validationShowHiddenParams = false
validate_params = true

// ExomeDepth Parameters
roi_auto = "/home/torossee/projects/nf-cmgg-exomecnv_references/Homo_sapiens.GRCh38.105.chr_protein_coding_basic_sorted_merged_autosomal.bed"
roi_chrx = "/home/torossee/projects/nf-cmgg-exomecnv_references/Homo_sapiens.GRCh38.105.chrX_protein_coding_basic_sorted_merged.bed"


}

// Load base.config by default for all pipelines
Expand Down Expand Up @@ -230,7 +235,7 @@ dag {

manifest {
name = 'nf-cmgg/exomecnv'
author = """nvnieuwk"""
author = """ToonRosseel"""
homePage = 'https://github.com/nf-cmgg/exomecnv'
description = """A nextflow pipeline for calling exome CNVs"""
mainScript = 'main.nf'
Expand Down
37 changes: 34 additions & 3 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@
"type": "string",
"description": "Name of iGenomes reference.",
"fa_icon": "fas fa-book",
"help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
"help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.",
"default": "GRCh38"
},
"fasta": {
"type": "string",
Expand All @@ -79,15 +80,17 @@
"description": "Do not load the iGenomes reference config.",
"fa_icon": "fas fa-ban",
"hidden": true,
"help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`."
"help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.",
"default": true
},
"cmgg_config_base": {
"type": "string",
"default": "/conf/",
"description": "The config base path for the cmgg configs",
"format": "directory-path"
}
}
},
"required": ["fasta", "fai"]
},
"institutional_config_options": {
"title": "Institutional config options",
Expand Down Expand Up @@ -282,6 +285,31 @@
"help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)."
}
}
},
"exomedepth_parameters": {
"title": "ExomeDepth parameters",
"type": "object",
"description": "Options specific for the ExomeDepth execution",
"default": "",
"properties": {
"roi_auto": {
"type": "string",
"description": "Path to the default ROI (regions of interest) BED file of autosomal (merged) regions to be used for CNV analysis",
"format": "file-path",
"pattern": "^\\S+\\.bed$",
"mimetype": "text/plain",
"default": "/home/torossee/projects/nf-cmgg-exomecnv_references/Homo_sapiens.GRCh38.105.chr_protein_coding_basic_sorted_merged_autosomal.bed"
},
"roi_chrx": {
"type": "string",
"description": "Path to the default ROI (regions of interest) BED file of chrX (merged) regions to be used for CNV analysis",
"format": "file-path",
"pattern": "^\\S+\\.bed$",
"mimetype": "text/plain",
"default": "/home/torossee/projects/nf-cmgg-exomecnv_references/Homo_sapiens.GRCh38.105.chrX_protein_coding_basic_sorted_merged.bed"
}
},
"required": ["roi_auto", "roi_chrx"]
}
},
"allOf": [
Expand All @@ -299,6 +327,9 @@
},
{
"$ref": "#/definitions/generic_options"
},
{
"$ref": "#/definitions/exomedepth_parameters"
}
]
}
3 changes: 2 additions & 1 deletion subworkflows/local/cram_prepare/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ workflow CRAM_PREPARE {
ch_versions = ch_versions.mix(CRAM_TO_BAM.out.versions)

emit:
bam = CRAM_TO_BAM.out.bam // channel: [ val(meta), [ bam ], [bai] ]
bam = CRAM_TO_BAM.out.bam // channel: [ val(meta), [ bam ] ]
bai = CRAM_TO_BAM.out.bai // channel: [ val(meta), [ bai ] ]

versions = ch_versions // channel: [ versions.yml ]
}
Expand Down
26 changes: 26 additions & 0 deletions subworkflows/local/exomedepth_count/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@

// Convert CRAM files
include { COUNT } from '../../../modules/local/exomedepth/count/main'

workflow EXOMEDEPTH_COUNT {

take:
ch_bam // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
exon_target // channel: [mandatory] [ val(prefix), path(bed) ]

main:

ch_versions = Channel.empty()

//
// convert CRAM files to BAM files
//
COUNT(ch_bam, exon_target)
ch_versions = ch_versions.mix(COUNT.out.versions)

emit:
count = COUNT.out.counts // channel: [ val(meta), [ txt ] ]

versions = ch_versions // channel: [ versions.yml ]
}

25 changes: 0 additions & 25 deletions subworkflows/local/exomedepth_counting/main.nf

This file was deleted.

2 changes: 2 additions & 0 deletions subworkflows/local/utils_nfcore_exomecnv_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ workflow PIPELINE_INITIALISATION {
nextflow_cli_args // array: List of positional nextflow CLI args
outdir // string: The output directory where the results will be saved
input // string: Path to input samplesheet
roi_auto // string: Path to ExomeDepth BED file of (merged) autosomal regions
roi_chrx // string: Path to ExomeDepth BED file of (merged) chrX regions

main:

Expand Down
Loading

0 comments on commit eba5f5b

Please sign in to comment.