diff --git a/snakePipes/shared/rscripts/DESeq2.R b/snakePipes/shared/rscripts/DESeq2.R index 4f76297b1..9d1255162 100755 --- a/snakePipes/shared/rscripts/DESeq2.R +++ b/snakePipes/shared/rscripts/DESeq2.R @@ -9,28 +9,47 @@ # args 5 : path to DE_functions # args 6 : T/F whether or not the workflow is allele-sepecific # args 7 : tx2gene file for salmon --> DESeq mode +# args 9: model formula .libPaths(R.home("library")) -args = commandArgs(TRUE) +#args = commandArgs(TRUE) ## re invented RNaseq workflow -sampleInfoFilePath <- args[1] -countFilePath <- args[2] -fdr <- as.numeric(args[3]) -geneNamesFilePath <- args[4] -importfunc <- args[5] -allelic_info <- as.logical(args[6]) +#sampleInfoFilePath <- args[1] +#countFilePath <- args[2] +#fdr <- as.numeric(args[3]) +#geneNamesFilePath <- args[4] +#importfunc <- args[5] +#allelic_info <- as.logical(args[6]) ## if output is from salmon then tx2gene file should be present -tx2gene_file <- args[7] +#tx2gene_file <- args[7] + +sampleInfoFilePath <- snakemake@params[["sampleSheet"]] +countFilePath <- snakemake@params[["counts_table"]] +fdr <- as.numeric(snakemake@params[["fdr"]]) +geneNamesFilePath <- snakemake@input[["symbol_file"]] +importfunc <- snakemake@params[["importfunc"]] +allelic_info <- as.logical(snakemake@params[["allele_info"]]) +tx2gene_file <- snakemake@params[["tx2gene_file"]] +rmdTemplate <- snakemake@params[["rmdTemplate"]] +formulaInput <- snakemake@params[["formula"]] +wdir <- snakemake@params[["outdir"]] + +setwd(wdir) + + if(file.exists(tx2gene_file)) { tximport <- TRUE } else { tximport <- FALSE } -rmdTemplate <- args[8] +#rmdTemplate <- args[8] + +#formulaInput <- args[9] + topN <- 50 ## include functions suppressPackageStartupMessages(library(ggplot2)) @@ -49,6 +68,7 @@ cat(paste("Working dir:", getwd(), "\n")) cat(paste("Sample info CSV:", sampleInfoFilePath, "\n")) cat(paste("Count file:", countFilePath, "\n")) cat(paste("FDR:", fdr, "\n")) +cat(paste("Custom formula:",formulaInput,"\n")) cat(paste("Gene names:", geneNamesFilePath, "\n")) cat(paste("Number of top N genes:", topN, "\n")) cat(paste("Salmon --> DESeq2 : ", tximport, "\n")) @@ -96,7 +116,7 @@ if(length(unique(sampleInfo$condition))>1){ if(tximport & allelic_info){ message("Detected allelic Salmon counts. Skipping DESeq_basic.") }else{ - seqout <- DESeq_basic(countdata, coldata = sampleInfo, fdr = fdr, alleleSpecific = allelic_info, from_salmon = tximport) + seqout <- DESeq_basic(countdata, coldata = sampleInfo, fdr = fdr, alleleSpecific = allelic_info, from_salmon = tximport, customFormula = formulaInput) DESeq_writeOutput(DEseqout = seqout, fdr = fdr, outprefix = "DEseq_basic", diff --git a/snakePipes/shared/rscripts/DE_functions.R b/snakePipes/shared/rscripts/DE_functions.R index de55cab74..0395465d1 100644 --- a/snakePipes/shared/rscripts/DE_functions.R +++ b/snakePipes/shared/rscripts/DE_functions.R @@ -92,9 +92,15 @@ checktable <- function(countdata = NA, sampleSheet = NA, alleleSpecific = FALSE, #' #' -DESeq_basic <- function(countdata, coldata, fdr, alleleSpecific = FALSE, from_salmon = FALSE, size_factors=NA) { +DESeq_basic <- function(countdata, coldata, fdr, alleleSpecific = FALSE, from_salmon = FALSE, size_factors=NA, customFormula=NA) { cnames.sub<-unique(colnames(coldata)[2:which(colnames(coldata) %in% "condition")]) - d<-as.formula(noquote(paste0("~",paste(cnames.sub,collapse="+")))) + + if(is.na(customFormula)){ + d<-as.formula(noquote(paste0("~",paste(cnames.sub,collapse="+")))) + } else { + + d<-as.formula(paste0("~",customFormula)) + } # Normal DESeq @@ -123,9 +129,9 @@ DESeq_basic <- function(countdata, coldata, fdr, alleleSpecific = FALSE, from_sa } dds <- DESeq2::DESeq(dds) ddr <- DESeq2::results(dds, alpha = fdr) - c1<-unique(coldata$condition)[1] - c2<-unique(coldata$condition)[2] - ddr_shrunk <- DESeq2::lfcShrink(dds,coef=paste0("condition_",c2,"_vs_",c1),type="apeglm",res=ddr) + a <- DESeq2::resultsNames(dds) + auto_coef <- a[length(a)] + ddr_shrunk <- DESeq2::lfcShrink(dds,coef=auto_coef,type="apeglm",res=ddr) output <- list(dds = dds, ddr = ddr, ddr_shrunk = ddr_shrunk) return(output) diff --git a/snakePipes/shared/rules/DESeq2.multipleComp.snakefile b/snakePipes/shared/rules/DESeq2.multipleComp.snakefile index f60a60182..68b13562c 100644 --- a/snakePipes/shared/rules/DESeq2.multipleComp.snakefile +++ b/snakePipes/shared/rules/DESeq2.multipleComp.snakefile @@ -34,24 +34,14 @@ rule DESeq2: importfunc = os.path.join(maindir, "shared", "rscripts", "DE_functions.R"), allele_info = lambda wildcards : 'TRUE' if 'allelic-mapping' in mode else 'FALSE', tx2gene_file = 'NA', - rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd") + rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd"), + formula = config["formula"], + counts_table = lambda wildcards,input: os.path.join(outdir,input.counts_table) log: out = "{}/logs/DESeq2.out".format(get_outdir("DESeq2",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")), err = "{}/logs/DESeq2.err".format(get_outdir("DESeq2",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) conda: CONDA_RNASEQ_ENV - shell: - "cd {params.outdir} && " - "Rscript {params.script} " - "{params.sampleSheet} " # 1 - "../{input.counts_table} " # 2 - "{params.fdr} " # 3 - "../{input.symbol_file} " # 4 - "{params.importfunc} " # 5 - "{params.allele_info} " # 6 - "{params.tx2gene_file} " # 7 - "{params.rmdTemplate} " # 8 - " > ../{log.out} 2> ../{log.err}" - + script: "{params.script}" ## DESeq2 (on Salmon) rule DESeq2_Salmon_basic: @@ -75,7 +65,8 @@ rule DESeq2_Salmon_basic: importfunc = os.path.join(maindir, "shared", "rscripts", "DE_functions.R"), allele_info = 'FALSE', tx2gene_file = "Annotation/genes.filtered.t2g", - rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd") + rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd"), + formula = config["formula"] conda: CONDA_RNASEQ_ENV shell: "cd {params.outdir} && " @@ -88,4 +79,5 @@ rule DESeq2_Salmon_basic: "{params.allele_info} " # 6 "../{input.tx2gene_file} " # 7 "{params.rmdTemplate} " # 8 + "{params.formula} " " > ../{log.out} 2> ../{log.err}" diff --git a/snakePipes/shared/rules/DESeq2.singleComp.snakefile b/snakePipes/shared/rules/DESeq2.singleComp.snakefile index c6822e41b..3464a52be 100644 --- a/snakePipes/shared/rules/DESeq2.singleComp.snakefile +++ b/snakePipes/shared/rules/DESeq2.singleComp.snakefile @@ -21,7 +21,8 @@ rule DESeq2: importfunc = os.path.join(maindir, "shared", "rscripts", "DE_functions.R"), allele_info = lambda wildcards : 'TRUE' if 'allelic-mapping' in mode else 'FALSE', tx2gene_file = 'NA', - rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd") + rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd"), + formula = config["formula"] log: out = "{}/logs/DESeq2.out".format(get_outdir("DESeq2",sampleSheet)), err = "{}/logs/DESeq2.err".format(get_outdir("DESeq2",sampleSheet)) @@ -37,6 +38,7 @@ rule DESeq2: "{params.allele_info} " # 6 "{params.tx2gene_file} " # 7 "{params.rmdTemplate} " # 8 + "{params.formula} " #9 " > ../{log.out} 2> ../{log.err}" @@ -61,7 +63,8 @@ rule DESeq2_Salmon_basic: importfunc = os.path.join(maindir, "shared", "rscripts", "DE_functions.R"), allele_info = 'FALSE', tx2gene_file = "Annotation/genes.filtered.t2g", - rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd") + rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd"), + formula = config["formula"] conda: CONDA_RNASEQ_ENV shell: "cd {params.outdir} && " @@ -74,6 +77,7 @@ rule DESeq2_Salmon_basic: "{params.allele_info} " # 6 "../{input.tx2gene_file} " # 7 "{params.rmdTemplate} " # 8 + "{params.formula} " # 9 " > ../{log.out} 2> ../{log.err}" rule DESeq2_Salmon_allelic: diff --git a/snakePipes/workflows/mRNA-seq/defaults.yaml b/snakePipes/workflows/mRNA-seq/defaults.yaml index 849ffd722..f3142c8bf 100644 --- a/snakePipes/workflows/mRNA-seq/defaults.yaml +++ b/snakePipes/workflows/mRNA-seq/defaults.yaml @@ -64,6 +64,7 @@ clusterPAS: ## further options mode: alignment,deepTools_qc sampleSheet: +formula: rMats: False bwBinSize: 25 fastqc: False diff --git a/snakePipes/workflows/mRNA-seq/internals.snakefile b/snakePipes/workflows/mRNA-seq/internals.snakefile index 835802a80..f9e3c6ba8 100644 --- a/snakePipes/workflows/mRNA-seq/internals.snakefile +++ b/snakePipes/workflows/mRNA-seq/internals.snakefile @@ -39,6 +39,10 @@ else: infiles = sorted(glob.glob(os.path.join(str(indir or ''), '*' + bamExt))) samples = cf.get_sample_names_bam(infiles, bamExt) +if formula and not sampleSheet: + print("In order to apply custom formula, please provide a sample sheet!") + exit(1) + if sampleSheet: cf.check_sample_info_header(sampleSheet) isMultipleComparison = cf.isMultipleComparison(sampleSheet) diff --git a/snakePipes/workflows/mRNA-seq/mRNA-seq b/snakePipes/workflows/mRNA-seq/mRNA-seq index d06eb9388..80f369abd 100755 --- a/snakePipes/workflows/mRNA-seq/mRNA-seq +++ b/snakePipes/workflows/mRNA-seq/mRNA-seq @@ -26,6 +26,7 @@ def parse_args(defaults={"verbose": False, "configFile": None, "alignerOptions": None, "featureCountsOptions": "-C -Q 10 --primary", "filterGTF": None, "sampleSheet": None, + "formula": None, "reads": ["_R1", "_R2"], "ext": ".fastq.gz", "bwBinSize": 25, "dnaContam": False, "plotFormat": "png", "fromBAM": False, "bamExt": ".bam", "pairedEnd": True, @@ -91,6 +92,12 @@ def parse_args(defaults={"verbose": False, "configFile": None, " for that! (default: '%(default)s')", default=defaults["sampleSheet"]) + optional.add_argument("--formula", + dest="formula", + help="Design formula to use in linear model fit (default: '%(default)s')", + default=defaults["formula"]) + + optional.add_argument("--dnaContam", action="store_true", help="Returns a plot which presents the proportion of the intergenic reads (default: '%(default)s')",