finish code exomedepth count per sample

nf-cmgg · Mar 20, 2024 · eba5f5b · eba5f5b
1 parent 90000d7
commit eba5f5b
Show file tree

Hide file tree

Showing 12 changed files with 153 additions and 97 deletions.
diff --git a/bin/CNV_ExomeDepth_counting.R → bin/ExomeDepth_count.R b/bin/CNV_ExomeDepth_counting.R → bin/ExomeDepth_count.R
@@ -1,45 +1,35 @@
+#!/usr/bin/env Rscript
+
 ############################################################################################
 ### ExomeDepth - counting step                                                           ###
 ############################################################################################
 
 ### load libs ###
-library("optparse") 
+# library("optparse") 
 library("methods") # to load the function "as"
 library("ExomeDepth")
 ExomeDepthVersion <- packageDescription("ExomeDepth")$Version
 print(paste0("...loaded ExomeDepth version: ", ExomeDepthVersion))
 
-## pass options ###
-option_list = list(
-    make_option(c("--bamfile"), type="character", default=NULL, 
-              help="full paths of BAM file", metavar="character"),
-    make_option(c("--exon_target"), type="character", default=NULL, 
-              help="path to bed file of exon target", metavar="character"),
-    make_option(c("--prefix"), type="character", default=NULL, 
-              help="prefix for output name, chrX or autosomal", metavar="character")
-); 
-opt_parser = OptionParser(option_list=option_list);
-opt = parse_args(opt_parser);
 
-if (is.null(opt$bamfile)){
-  print_help(opt_parser)
-  stop("Not all the input arguments are provided", call.=FALSE)
-}
-if (is.null(opt$exon_target)){
-  print_help(opt_parser)
-  stop("Not all the input arguments are provided", call.=FALSE)
+## pass options ###
+args = commandArgs(trailingOnly = TRUE)
+print_usage <- function() {
+  cat("Usage: ExomeDepth_count.R <samplename> <bamfile> <baifile> <exon_target> <prefix>\n")
 }
-if (is.null(opt$prefix)){
-  print_help(opt_parser)
-  stop("Not all the input arguments are provided", call.=FALSE)
+if (length(commandArgs(trailingOnly = TRUE)) != 5) {
+  print_usage()
+  q("no", status = 1) # Terminate the script with an error status
 }
 
-cat("bam file: ", opt$bamfile, "\n")
-cat("exon target file: ", opt$exon_target, "\n")
-cat("prefix: ", opt$prefix, "\n")
+cat("samplename: ", args[1], "\n")
+cat("bam file: ", args[2], "\n")
+cat("bam index file: ", args[3], "\n")
+cat("exon target file: ", args[4], "\n")
+cat("prefix: ", args[5], "\n")
 
 ### load exon data ###
-exons <- read.table(file=opt$exon_target,sep="\t",header = F)
+exons <- read.table(file=args[4],sep="\t",header = F)
 colnames(exons) <- c("chromosome","start","end","name")
 
 ### load all exon info ####
@@ -49,30 +39,14 @@ exons.GRanges <- GenomicRanges::GRanges(seqnames = exons$chromosome,
         )
 
 ### Getting the input data from BAM file ###
-bam <- unlist(opt$bamfile )
-
-sampleName <- unlist(strsplit(unlist(strsplit(bam, ".bam")),split=".//"))
+bam <- unlist(args[2])
 
 cat("Sample names:", "\n")
-sampleName2<-gsub("^.*/","",sampleName)
-sampleName2
+sampleName <- unlist(args[1])
+sampleName
 
 ### ExomeDepth sometimes spectacularly fails to find the BAM index, for no clear reason ###
 ## so help it out by testing a few likely filenames
-bamindex = bam
-
-stardotbai = gsub(".bam$",".bai",bam)
-stardotbamdotbai = gsub(".bam$",".bam.bai",bam)
-if (file.exists(stardotbai)) {
-  bamindexe = stardotbai
-} else if (file.exists(stardotbamdotbai)) {
-  bamindexe = stardotbamdotbai
-}
-else {
-  cat(paste("Cannot find a .bai index for BAM: ",bam,"\n",sep=""),file=stderr())
-  cat("stopping execution....",file=stderr())
-  stop()
-}
 
 ### read.count ~exomeCopy package ###
 ## ouput = GenomicRages object that stores the read count data form the BAM file ###
@@ -84,22 +58,27 @@ ExomeCount <- getBamCounts(bed.frame = exons,
                             #min.mapq =20 #default minimum mapping quality to include a read
                             #read.width =300 #default maximum distance between the side of the target region and the middle of the paired reads to include the paired read into that region
                             )
+cat("\nCounting done...\n")
+colnames(ExomeCount)
 ## convert GRanges class (S4 object) into a data.frame, which is the input format for ExomeDepth
 ExomeCount.dafr <- as(ExomeCount[,  colnames(ExomeCount)], 'data.frame') 
+cat("\nConversion done...\n")
+print(head(ExomeCount.dafr))
 ## remove the annoying chr letters
-ExomeCount.dafr$space <- gsub(as.character(ExomeCount.dafr$space),
+ExomeCount.dafr$chromosome <- gsub(as.character(ExomeCount.dafr$chromosome),
                                         pattern = 'chr', 
                                         replacement = '')
+cat("\nChr's removed...\n")
 ## rename sample names in colnames
-header_first_part<-c("space", "start", "end", "width", "names")
-colnames(ExomeCount.dafr)<-c(header_first_part, sampleName2)
+header_first_part<-c("chromosome", "start", "end", "exon")
+colnames(ExomeCount.dafr)<-c(header_first_part, sampleName)
 colnames(ExomeCount.dafr)
 print(head(ExomeCount.dafr))
 cat("Successfully calculated counts.\n")
 
 ###  save counts as a text file and rda object ###
 cat("Saving the counts \n")
-countspath = paste("counts_",sampleName2,"_",opt$prefix,".txt",sep="")
+countspath = paste("counts_",sampleName,"_",args[5],".txt",sep="")
 write.table(ExomeCount.dafr,countspath,sep="\t",col.names=TRUE,row.names=FALSE,quote=FALSE)  
 
 cat("\n\n---Finished---\n")

diff --git a/conf/test.config b/conf/test.config
@@ -29,5 +29,7 @@ params {
     fai = 'https://github.com/nf-cmgg/test-datasets/tree/main/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai'
 
     // Parameters
+    roi_auto = "/home/torossee/projects/nf-cmgg-exomecnv_references/Homo_sapiens.GRCh38.105.chr21_protein_coding_basic_sorted_merged_autosomal.bed"
+    roi_chrx = "/home/torossee/projects/nf-cmgg-exomecnv_references/Homo_sapiens.GRCh38.105.chrX_protein_coding_basic_sorted_merged.bed"
 
 }
diff --git a/main.nf b/main.nf
@@ -53,7 +53,9 @@ workflow NFCMGG_EXOMECNV {
         params.monochrome_logs,
         args,
         params.outdir,
-        params.input
+        params.input,
+        params.roi_auto,
+        params.roi_chrx
     )
 
     //

diff --git a/modules/local/exomedepth/count/environment.yml b/modules/local/exomedepth/count/environment.yml
@@ -0,0 +1,10 @@
+name: exomedepth_count
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+
+dependencies:
+  - r=3.6=r36_1003
+  - r-optparse=1.6.4=r36h6115d3f_0
+  - r-exomedepth=1.1.12=r36h6786f55_0
diff --git a/modules/local/exomedepth/count/main.nf b/modules/local/exomedepth/count/main.nf
@@ -1,29 +1,36 @@
-process EXOMEDEPTH_COUNT {
-    tag "$meta.id $prefix"
+process COUNT {
+    tag "$meta.id $prefix.id"
     label 'process_low'
 
-    conda "conda-forge::r=3.6"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/r-exomedepth:1.1.12--r36h6786f55_0' :
-        'biocontainers/r-exomedepth:1.1.12--r36h6786f55_0' }"
+        'https://depot.galaxyproject.org/singularity/r-exomedepth:1.1.16--r43hfb3cda0_3' :
+        'biocontainers/r-exomedepth:1.1.16--r43hfb3cda0_3' }"
+
+    publishDir "$params.outdir/exomedepth/counts", mode: 'copy'
 
     input:
     tuple val(meta), path(bam), path(bai)
     tuple val(prefix), path(exon_target)
 
     output:
-    path '*.txt'       , emit: counts
+    tuple val(meta), path("*.txt"), emit: counts
     path "versions.yml", emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script: // This script is bundled with the pipeline, in nf-cmgg/exomecnv/bin/
-    
-    def VERSION = '1.1.12'
-    
+
+    def VERSION = '1.1.16'
+
     """
-    Rscript CNV_ExomeDepth_counting.R
+    ExomeDepth_count.R \\
+        $meta.id \\
+        $bam \\
+        $bai \\
+        $exon_target \\
+        $prefix.id
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/nextflow.config b/nextflow.config
@@ -59,6 +59,11 @@ params {
     validationShowHiddenParams       = false
     validate_params                  = true
 
+    // ExomeDepth Parameters
+    roi_auto = "/home/torossee/projects/nf-cmgg-exomecnv_references/Homo_sapiens.GRCh38.105.chr_protein_coding_basic_sorted_merged_autosomal.bed"
+    roi_chrx = "/home/torossee/projects/nf-cmgg-exomecnv_references/Homo_sapiens.GRCh38.105.chrX_protein_coding_basic_sorted_merged.bed"
+
+
 }
 
 // Load base.config by default for all pipelines
@@ -230,7 +235,7 @@ dag {
 
 manifest {
     name            = 'nf-cmgg/exomecnv'
-    author          = """nvnieuwk"""
+    author          = """ToonRosseel"""
     homePage        = 'https://github.com/nf-cmgg/exomecnv'
     description     = """A nextflow pipeline for calling exome CNVs"""
     mainScript      = 'main.nf'

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -53,7 +53,8 @@
                     "type": "string",
                     "description": "Name of iGenomes reference.",
                     "fa_icon": "fas fa-book",
-                    "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
+                    "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.",
+                    "default": "GRCh38"
                 },
                 "fasta": {
                     "type": "string",
@@ -79,15 +80,17 @@
                     "description": "Do not load the iGenomes reference config.",
                     "fa_icon": "fas fa-ban",
                     "hidden": true,
-                    "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`."
+                    "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.",
+                    "default": true
                 },
                 "cmgg_config_base": {
                     "type": "string",
                     "default": "/conf/",
                     "description": "The config base path for the cmgg configs",
                     "format": "directory-path"
                 }
-            }
+            },
+            "required": ["fasta", "fai"]
         },
         "institutional_config_options": {
             "title": "Institutional config options",
@@ -282,6 +285,31 @@
                     "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)."
                 }
             }
+        },
+        "exomedepth_parameters": {
+            "title": "ExomeDepth parameters",
+            "type": "object",
+            "description": "Options specific for the ExomeDepth execution",
+            "default": "",
+            "properties": {
+                "roi_auto": {
+                    "type": "string",
+                    "description": "Path to the default ROI (regions of interest) BED file of autosomal (merged) regions to be used for CNV analysis",
+                    "format": "file-path",
+                    "pattern": "^\\S+\\.bed$",
+                    "mimetype": "text/plain",
+                    "default": "/home/torossee/projects/nf-cmgg-exomecnv_references/Homo_sapiens.GRCh38.105.chr_protein_coding_basic_sorted_merged_autosomal.bed"
+                },
+                "roi_chrx": {
+                    "type": "string",
+                    "description": "Path to the default ROI (regions of interest) BED file of chrX (merged) regions to be used for CNV analysis",
+                    "format": "file-path",
+                    "pattern": "^\\S+\\.bed$",
+                    "mimetype": "text/plain",
+                    "default": "/home/torossee/projects/nf-cmgg-exomecnv_references/Homo_sapiens.GRCh38.105.chrX_protein_coding_basic_sorted_merged.bed"
+                }
+            },
+            "required": ["roi_auto", "roi_chrx"]
         }
     },
     "allOf": [
@@ -299,6 +327,9 @@
         },
         {
             "$ref": "#/definitions/generic_options"
+        },
+        {
+            "$ref": "#/definitions/exomedepth_parameters"
         }
     ]
 }
diff --git a/subworkflows/local/cram_prepare/main.nf b/subworkflows/local/cram_prepare/main.nf
@@ -20,7 +20,8 @@ workflow CRAM_PREPARE {
     ch_versions = ch_versions.mix(CRAM_TO_BAM.out.versions)
 
     emit:
-    bam      = CRAM_TO_BAM.out.bam           // channel: [ val(meta), [ bam ], [bai] ]
+    bam      = CRAM_TO_BAM.out.bam           // channel: [ val(meta), [ bam ] ]
+    bai      = CRAM_TO_BAM.out.bai           // channel: [ val(meta), [ bai ] ]
 
     versions = ch_versions                     // channel: [ versions.yml ]
 }

diff --git a/subworkflows/local/exomedepth_count/main.nf b/subworkflows/local/exomedepth_count/main.nf
@@ -0,0 +1,26 @@
+
+// Convert CRAM files
+include { COUNT  } from '../../../modules/local/exomedepth/count/main'
+
+workflow EXOMEDEPTH_COUNT {
+
+    take:
+    ch_bam // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
+    exon_target   // channel: [mandatory] [ val(prefix), path(bed) ]
+
+    main:
+
+    ch_versions = Channel.empty()
+
+    //
+    // convert CRAM files to BAM files
+    //
+    COUNT(ch_bam, exon_target)
+    ch_versions = ch_versions.mix(COUNT.out.versions)
+
+    emit:
+    count      = COUNT.out.counts           // channel: [ val(meta), [ txt ] ]
+
+    versions = ch_versions                     // channel: [ versions.yml ]
+}
+
diff --git a/subworkflows/local/exomedepth_counting/main.nf b/subworkflows/local/exomedepth_counting/main.nf
diff --git a/subworkflows/local/utils_nfcore_exomecnv_pipeline/main.nf b/subworkflows/local/utils_nfcore_exomecnv_pipeline/main.nf
@@ -36,6 +36,8 @@ workflow PIPELINE_INITIALISATION {
     nextflow_cli_args //   array: List of positional nextflow CLI args
     outdir            //  string: The output directory where the results will be saved
     input             //  string: Path to input samplesheet
+    roi_auto        //  string: Path to ExomeDepth BED file of (merged) autosomal regions
+    roi_chrx        //  string: Path to ExomeDepth BED file of (merged) chrX regions
 
     main: