diff --git a/.ci_stuff/test_dag.sh b/.ci_stuff/test_dag.sh index afdf07b32..dce808625 100755 --- a/.ci_stuff/test_dag.sh +++ b/.ci_stuff/test_dag.sh @@ -101,6 +101,30 @@ touch allelic_BAM_input/allelic_bams/sample1.genome1.sorted.bam \ allelic_BAM_input/allelic_bams/sample5.genome2.sorted.bam.bai \ allelic_BAM_input/allelic_bams/sample6.genome1.sorted.bam.bai \ allelic_BAM_input/allelic_bams/sample6.genome2.sorted.bam.bai \ + allelic_BAM_input/allelic_bams/sample1.allele_flagged.sorted.bam \ + allelic_BAM_input/allelic_bams/sample1.unassigned.sorted.bam \ + allelic_BAM_input/allelic_bams/sample2.allele_flagged.sorted.bam \ + allelic_BAM_input/allelic_bams/sample2.unassigned.sorted.bam \ + allelic_BAM_input/allelic_bams/sample3.allele_flagged.sorted.bam \ + allelic_BAM_input/allelic_bams/sample3.unassigned.sorted.bam \ + allelic_BAM_input/allelic_bams/sample4.allele_flagged.sorted.bam \ + allelic_BAM_input/allelic_bams/sample4.unassigned.sorted.bam \ + allelic_BAM_input/allelic_bams/sample5.allele_flagged.sorted.bam \ + allelic_BAM_input/allelic_bams/sample5.unassigned.sorted.bam \ + allelic_BAM_input/allelic_bams/sample6.allele_flagged.sorted.bam \ + allelic_BAM_input/allelic_bams/sample6.unassigned.sorted.bam \ + allelic_BAM_input/allelic_bams/sample1.allele_flagged.sorted.bam.bai \ + allelic_BAM_input/allelic_bams/sample1.unassigned.sorted.bam.bai \ + allelic_BAM_input/allelic_bams/sample2.allele_flagged.sorted.bam.bai \ + allelic_BAM_input/allelic_bams/sample2.unassigned.sorted.bam.bai \ + allelic_BAM_input/allelic_bams/sample3.allele_flagged.sorted.bam.bai \ + allelic_BAM_input/allelic_bams/sample3.unassigned.sorted.bam.bai \ + allelic_BAM_input/allelic_bams/sample4.allele_flagged.sorted.bam.bai \ + allelic_BAM_input/allelic_bams/sample4.unassigned.sorted.bam.bai \ + allelic_BAM_input/allelic_bams/sample5.allele_flagged.sorted.bam.bai \ + allelic_BAM_input/allelic_bams/sample5.unassigned.sorted.bam.bai \ + allelic_BAM_input/allelic_bams/sample6.allele_flagged.sorted.bam.bai \ + allelic_BAM_input/allelic_bams/sample6.unassigned.sorted.bam.bai \ allelic_BAM_input/deepTools_qc/bamPEFragmentSize/fragmentSize.metric.tsv \ allelic_BAM_input/filtered_bam/sample1.filtered.bam \ allelic_BAM_input/filtered_bam/sample2.filtered.bam \ @@ -314,6 +338,10 @@ WC=`mRNA-seq -m allelic-mapping,deepTools_qc -i PE_input -o output --sampleSheet if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2539 ]; then exit 1 ; fi WC=`mRNA-seq -m allelic-mapping,deepTools_qc,alignment-free -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 3294 ]; then exit 1 ; fi +WC=`mRNA-seq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 659 ]; then exit 1 ; fi + + WC=`noncoding-RNA-seq -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1370 ]; then exit 1 ; fi diff --git a/docs/content/News.rst b/docs/content/News.rst index be3c759eb..5adf8224b 100644 --- a/docs/content/News.rst +++ b/docs/content/News.rst @@ -6,6 +6,7 @@ ________________ * added SEACR peaks qc * added external bed functionality for differential binding in ChIP-seq and ATAC-seq workflows +* added "allelic-counting" mode to mRNA-seq, allowing to count reads and run DGE from allelic bam files split e.g. by whatshap * fixed copyfile command for sampleSheet * removed deprecated --force argument from mamba commands * fixes #998 diff --git a/docs/content/workflows/mRNA-seq.rst b/docs/content/workflows/mRNA-seq.rst index e26d52564..a0e6a5cf0 100644 --- a/docs/content/workflows/mRNA-seq.rst +++ b/docs/content/workflows/mRNA-seq.rst @@ -194,6 +194,11 @@ Allele-specific, gene-level differential expression analysis is then performed u .. note:: **allelic-mapping** mode is mutually exclusive with **mapping** mode +"allelic-counting" +~~~~~~~~~~~~~~~~~~ + +**allelic-counting** mode requires the user to input, per sample, 4 bam files, corresponding to haplotype1, haplotype2, unassigned and haplotagged , e.g. as generated by whatshap. The respective suffixes ".genome1", ".genome2", ".unassigned", ".alelle_flagged" are required to be followed by the bam extention ".sorted.bam". This mode is mutually exclusive with "deepTools_qc". Only the allelic version of deepTools qc will be run, by default. Allelic version of featureCounts will be run by default. If sample sheet is provided, allelic DESeq2- or allelic Salmon-based differential gene expression analysis will be run. + "alignment-free" ~~~~~~~~~~~~~~~~ diff --git a/snakePipes/common_functions.py b/snakePipes/common_functions.py index 61d4d56cf..abe36b324 100644 --- a/snakePipes/common_functions.py +++ b/snakePipes/common_functions.py @@ -173,6 +173,20 @@ def get_sample_names_bam(infiles, bamExt): return sorted(list(set(s))) +def get_sample_names_suffix_bam(infiles, bamExt): + """ + Get sample names without file extensions + """ + bamSuff = [x + bamExt for x in [".genome1", ".genome2", ".unassigned", ".allele_flagged"]] + s = [] + for x in infiles: + for y in bamSuff: + if y in os.path.basename(x): + x = os.path.basename(x).replace(y, "") + s.append(x) + return sorted(list(set(s))) + + def is_paired(infiles, ext, reads): """ Check for paired-end input files diff --git a/snakePipes/shared/rscripts/DESeq2.R b/snakePipes/shared/rscripts/DESeq2.R index 9d1255162..d762edd34 100755 --- a/snakePipes/shared/rscripts/DESeq2.R +++ b/snakePipes/shared/rscripts/DESeq2.R @@ -29,7 +29,7 @@ sampleInfoFilePath <- snakemake@params[["sampleSheet"]] countFilePath <- snakemake@params[["counts_table"]] fdr <- as.numeric(snakemake@params[["fdr"]]) -geneNamesFilePath <- snakemake@input[["symbol_file"]] +geneNamesFilePath <- snakemake@params[["symbol_file"]] importfunc <- snakemake@params[["importfunc"]] allelic_info <- as.logical(snakemake@params[["allele_info"]]) tx2gene_file <- snakemake@params[["tx2gene_file"]] diff --git a/snakePipes/shared/rules/DESeq2.multipleComp.snakefile b/snakePipes/shared/rules/DESeq2.multipleComp.snakefile index 68b13562c..afb418e9e 100644 --- a/snakePipes/shared/rules/DESeq2.multipleComp.snakefile +++ b/snakePipes/shared/rules/DESeq2.multipleComp.snakefile @@ -36,7 +36,8 @@ rule DESeq2: tx2gene_file = 'NA', rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd"), formula = config["formula"], - counts_table = lambda wildcards,input: os.path.join(outdir,input.counts_table) + counts_table = lambda wildcards,input: os.path.join(outdir,input.counts_table), + symbol_file = lambda wildcards,input: os.path.join(outdir,input.symbol_file) log: out = "{}/logs/DESeq2.out".format(get_outdir("DESeq2",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")), err = "{}/logs/DESeq2.err".format(get_outdir("DESeq2",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) @@ -64,20 +65,10 @@ rule DESeq2_Salmon_basic: fdr = fdr, importfunc = os.path.join(maindir, "shared", "rscripts", "DE_functions.R"), allele_info = 'FALSE', - tx2gene_file = "Annotation/genes.filtered.t2g", + tx2gene_file = os.path.join(outdir,"Annotation/genes.filtered.t2g"), rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd"), - formula = config["formula"] + formula = config["formula"], + counts_table = lambda wildcards,input: os.path.join(outdir,input.counts_table), + symbol_file = lambda wildcards,input: os.path.join(outdir,input.symbol_file) conda: CONDA_RNASEQ_ENV - shell: - "cd {params.outdir} && " - "Rscript {params.script} " - "{params.sampleSheet} " # 1 - "../{input.counts_table} " # 2 - "{params.fdr} " # 3 - "../{input.symbol_file} " # 4 - "{params.importfunc} " # 5 - "{params.allele_info} " # 6 - "../{input.tx2gene_file} " # 7 - "{params.rmdTemplate} " # 8 - "{params.formula} " - " > ../{log.out} 2> ../{log.err}" + script: "{params.script}" diff --git a/snakePipes/shared/rules/DESeq2.singleComp.snakefile b/snakePipes/shared/rules/DESeq2.singleComp.snakefile index 3464a52be..2e506fd5b 100644 --- a/snakePipes/shared/rules/DESeq2.singleComp.snakefile +++ b/snakePipes/shared/rules/DESeq2.singleComp.snakefile @@ -7,7 +7,7 @@ def get_outdir(folder_name,sampleSheet): ## DESeq2 (on featureCounts) rule DESeq2: input: - counts_table = lambda wildcards : "featureCounts/counts_allelic.tsv" if 'allelic-mapping' in mode else "featureCounts/counts.tsv", + counts_table = lambda wildcards : "featureCounts/counts_allelic.tsv" if 'allelic-mapping' in mode or "allelic-counting" in mode else "featureCounts/counts.tsv", sampleSheet = sampleSheet, symbol_file = "Annotation/genes.filtered.symbol" #get_symbol_file output: @@ -15,32 +15,22 @@ rule DESeq2: benchmark: "{}/.benchmark/DESeq2.featureCounts.benchmark".format(get_outdir("DESeq2",sampleSheet)) params: - script=os.path.join(maindir, "shared", "rscripts", "DESeq2.R"), + script = os.path.join(maindir, "shared", "rscripts", "DESeq2.R"), + sampleSheet = lambda wildcards,input: input.sampleSheet, outdir = get_outdir("DESeq2",sampleSheet), fdr = fdr, importfunc = os.path.join(maindir, "shared", "rscripts", "DE_functions.R"), - allele_info = lambda wildcards : 'TRUE' if 'allelic-mapping' in mode else 'FALSE', + allele_info = lambda wildcards : 'TRUE' if 'allelic-mapping' in mode or "allelic-counting" in mode else 'FALSE', tx2gene_file = 'NA', rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd"), - formula = config["formula"] + formula = config["formula"], + counts_table = lambda wildcards,input: os.path.join(outdir,input.counts_table), + symbol_file = lambda wildcards,input: os.path.join(outdir,input.symbol_file) log: out = "{}/logs/DESeq2.out".format(get_outdir("DESeq2",sampleSheet)), err = "{}/logs/DESeq2.err".format(get_outdir("DESeq2",sampleSheet)) conda: CONDA_RNASEQ_ENV - shell: - "cd {params.outdir} && " - "Rscript {params.script} " - "{input.sampleSheet} " # 1 - "../{input.counts_table} " # 2 - "{params.fdr} " # 3 - "../{input.symbol_file} " # 4 - "{params.importfunc} " # 5 - "{params.allele_info} " # 6 - "{params.tx2gene_file} " # 7 - "{params.rmdTemplate} " # 8 - "{params.formula} " #9 - " > ../{log.out} 2> ../{log.err}" - + script: "{params.script}" ## DESeq2 (on Salmon) rule DESeq2_Salmon_basic: @@ -62,23 +52,14 @@ rule DESeq2_Salmon_basic: fdr = fdr, importfunc = os.path.join(maindir, "shared", "rscripts", "DE_functions.R"), allele_info = 'FALSE', - tx2gene_file = "Annotation/genes.filtered.t2g", + tx2gene_file = os.path.join(outdir,"Annotation/genes.filtered.t2g"), rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd"), - formula = config["formula"] + formula = config["formula"], + counts_table = lambda wildcards,input: os.path.join(outdir,input.counts_table), + symbol_file = lambda wildcards,input: os.path.join(outdir,input.symbol_file) conda: CONDA_RNASEQ_ENV - shell: - "cd {params.outdir} && " - "Rscript {params.script} " - "{input.sampleSheet} " # 1 - "../{input.counts_table} " # 2 - "{params.fdr} " # 3 - "../{input.symbol_file} " # 4 - "{params.importfunc} " # 5 - "{params.allele_info} " # 6 - "../{input.tx2gene_file} " # 7 - "{params.rmdTemplate} " # 8 - "{params.formula} " # 9 - " > ../{log.out} 2> ../{log.err}" + script: "{params.script}" + rule DESeq2_Salmon_allelic: input: @@ -99,18 +80,10 @@ rule DESeq2_Salmon_allelic: fdr = fdr, importfunc = os.path.join(maindir, "shared", "rscripts", "DE_functions.R"), allele_info = 'TRUE', - tx2gene_file = "Annotation/genes.filtered.t2g", - rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd") + tx2gene_file = os.path.join(outdir,"Annotation/genes.filtered.t2g"), + rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd"), + formula = config["formula"], + counts_table = lambda wildcards,input: os.path.join(outdir,input.counts_table), + symbol_file = lambda wildcards,input: os.path.join(outdir,input.symbol_file) conda: CONDA_RNASEQ_ENV - shell: - "cd {params.outdir} && " - "Rscript {params.script} " - "{input.sampleSheet} " # 1 - "../{input.counts_table} " # 2 - "{params.fdr} " # 3 - "../{input.symbol_file} " # 4 - "{params.importfunc} " # 5 - "{params.allele_info} " # 6 - "../{input.tx2gene_file} " # 7 - "{params.rmdTemplate} " # 8 - " > ../{log.out} 2> ../{log.err}" + script: "{params.script}" diff --git a/snakePipes/shared/rules/LinkBam.snakefile b/snakePipes/shared/rules/LinkBam.snakefile index 0514049b6..ea498b0f4 100644 --- a/snakePipes/shared/rules/LinkBam.snakefile +++ b/snakePipes/shared/rules/LinkBam.snakefile @@ -1,49 +1,74 @@ import os -rule link_bam: - input: - indir + "/{sample}" + bamExt - output: - aligner + "/{sample}.unsorted.bam" if pipeline=="noncoding-rna-seq" else aligner + "/{sample}.bam" - params: - input_bai = indir + "/{sample}" + bamExt + ".bai", - output_bai = aligner + "/{sample}.unsorted.bam.bai" if pipeline=="noncoding-rna-seq" else aligner + "/{sample}.bam.bai" - run: - if os.path.exists(params.input_bai) and not os.path.exists(os.path.join(outdir,params.output_bai)): - os.symlink(params.input_bai,os.path.join(outdir,params.output_bai)) - if not os.path.exists(os.path.join(outdir,output[0])): - os.symlink(os.path.join(outdir,input[0]),os.path.join(outdir,output[0])) - -if not pipeline=="noncoding-rna-seq": +if pipeline=="rna-seq" and "allelic-counting" in mode: + rule link_bam: + input: + indir + "/{sample}.{suffix}" + bamExt + output: + "allelic_bams/{sample}.{suffix}" + bamExt + params: + input_bai = indir + "/{sample}.{suffix}" + bamExt + ".bai", + output_bai = "allelic_bams/{sample}.{suffix}" + bamExt + ".bai" + run: + if os.path.exists(params.input_bai) and not os.path.exists(os.path.join(outdir,params.output_bai)): + os.symlink(params.input_bai,os.path.join(outdir,params.output_bai)) + if not os.path.exists(os.path.join(outdir,output[0])): + os.symlink(os.path.join(outdir,input[0]),os.path.join(outdir,output[0])) + rule samtools_index_external: input: - aligner + "/{sample}.bam" + "allelic_bams/{sample}.{suffix}" + bamExt output: - aligner + "/{sample}.bam.bai" + "allelic_bams/{sample}.{suffix}" + bamExt + ".bai" conda: CONDA_SHARED_ENV shell: "if [[ ! -f {output[0]} ]]; then samtools index {input[0]}; fi" - if not pipeline=="WGBS" or pipeline=="WGBS" and skipBamQC: - rule link_bam_bai_external: + +else: + rule link_bam: + input: + indir + "/{sample}" + bamExt + output: + aligner + "/{sample}.unsorted.bam" if pipeline=="noncoding-rna-seq" else aligner + "/{sample}.bam" + params: + input_bai = indir + "/{sample}" + bamExt + ".bai", + output_bai = aligner + "/{sample}.unsorted.bam.bai" if pipeline=="noncoding-rna-seq" else aligner + "/{sample}.bam.bai" + run: + if os.path.exists(params.input_bai) and not os.path.exists(os.path.join(outdir,params.output_bai)): + os.symlink(params.input_bai,os.path.join(outdir,params.output_bai)) + if not os.path.exists(os.path.join(outdir,output[0])): + os.symlink(os.path.join(outdir,input[0]),os.path.join(outdir,output[0])) + + if not pipeline=="noncoding-rna-seq": + rule samtools_index_external: input: - bam = aligner + "/{sample}.bam", - bai = aligner + "/{sample}.bam.bai" + aligner + "/{sample}.bam" output: - bam_out = "filtered_bam/{sample}.filtered.bam", - bai_out = "filtered_bam/{sample}.filtered.bam.bai", - shell: """ - ln -s ../{input.bam} {output.bam_out}; - ln -s ../{input.bai} {output.bai_out} - """ - - - rule sambamba_flagstat: - input: - aligner + "/{sample}.bam" - output: - "Sambamba/{sample}.markdup.txt" - log: "Sambamba/logs/{sample}.flagstat.log" - conda: CONDA_SAMBAMBA_ENV - shell: """ - sambamba flagstat -p {input} > {output} 2> {log} - """ + aligner + "/{sample}.bam.bai" + conda: CONDA_SHARED_ENV + shell: "if [[ ! -f {output[0]} ]]; then samtools index {input[0]}; fi" + + if not pipeline=="WGBS" or pipeline=="WGBS" and skipBamQC: + rule link_bam_bai_external: + input: + bam = aligner + "/{sample}.bam", + bai = aligner + "/{sample}.bam.bai" + output: + bam_out = "filtered_bam/{sample}.filtered.bam", + bai_out = "filtered_bam/{sample}.filtered.bam.bai", + shell: """ + ln -s ../{input.bam} {output.bam_out}; + ln -s ../{input.bai} {output.bai_out} + """ + + + rule sambamba_flagstat: + input: + aligner + "/{sample}.bam" + output: + "Sambamba/{sample}.markdup.txt" + log: "Sambamba/logs/{sample}.flagstat.log" + conda: CONDA_SAMBAMBA_ENV + shell: """ + sambamba flagstat -p {input} > {output} 2> {log} + """ diff --git a/snakePipes/shared/rules/multiQC.snakefile b/snakePipes/shared/rules/multiQC.snakefile index 308cbc060..efc2df5b0 100755 --- a/snakePipes/shared/rules/multiQC.snakefile +++ b/snakePipes/shared/rules/multiQC.snakefile @@ -66,7 +66,7 @@ def multiqc_input_check(return_value): indir += "allelic_bams" elif pipeline=="rna-seq": # must be RNA-mapping, add files as per the mode - if ( "alignment" in mode or "deepTools_qc" in mode or "three-prime-seq" in mode ) and not "allelic-mapping" in mode: + if ( "alignment" in mode or "deepTools_qc" in mode or "three-prime-seq" in mode ) and not "allelic-mapping" in mode and not "allelic-counting" in mode: infiles.append( expand(aligner+"/{sample}.bam", sample = samples) + expand("Sambamba/{sample}.markdup.txt", sample = samples) + expand("deepTools_qc/estimateReadFiltering/{sample}_filtering_estimation.txt",sample=samples)+ @@ -74,9 +74,10 @@ def multiqc_input_check(return_value): indir += aligner + " featureCounts " indir += " Sambamba " indir += " deepTools_qc " - if "allelic-mapping" in mode: + if "allelic-mapping" in mode or "allelic-counting" in mode: infiles.append( expand("featureCounts/{sample}.allelic_counts.txt", sample = samples) ) indir += aligner + " featureCounts " + if "allelic-mapping" in mode: infiles.append( expand("allelic_bams/{sample}.SNPsplit_report.yaml", sample = samples) ) infiles.append( expand("allelic_bams/{sample}.SNPsplit_sort.yaml", sample = samples) ) indir += "allelic_bams" diff --git a/snakePipes/workflows/mRNA-seq/Snakefile b/snakePipes/workflows/mRNA-seq/Snakefile index 9d2c968f9..c4c97a8a9 100755 --- a/snakePipes/workflows/mRNA-seq/Snakefile +++ b/snakePipes/workflows/mRNA-seq/Snakefile @@ -27,8 +27,9 @@ include: os.path.join(maindir, "shared", "tools" , "deeptools_cmds.snakefile") ## filtered annotation (GTF) include: os.path.join(maindir, "shared", "rules", "filterGTF.snakefile") -## bamCoverage_RPKM -include: os.path.join(maindir, "shared", "rules", "deepTools_RNA.snakefile") +if not "allelic-counting" in mode: + ## bamCoverage_RPKM + include: os.path.join(maindir, "shared", "rules", "deepTools_RNA.snakefile") if not fromBAM: @@ -100,11 +101,14 @@ else: include: os.path.join(maindir, "shared", "rules", "SNPsplit.snakefile") # deepTools QC include: os.path.join(maindir, "shared", "rules", "deepTools_RNA_allelic.snakefile") + elif "allelic-counting" in mode: + allele_mode = "from_split_bam" + include: os.path.join(maindir, "shared", "rules", "deepTools_RNA_allelic.snakefile") include: os.path.join(maindir, "shared", "rules", "LinkBam.snakefile") -if "allelic-mapping" in mode: +if "allelic-mapping" in mode or "allelic-counting" in mode: ## featureCounts_allelic include: os.path.join(maindir, "shared", "rules", "featureCounts_allelic.snakefile") else: @@ -123,7 +127,7 @@ include:os.path.join(maindir, "shared", "rules", "RNA-seq_qc_report.snakefile") ## DESeq2 if sampleSheet and not "three-prime-seq" in mode: - if isMultipleComparison and not "allelic-mapping" in mode: + if isMultipleComparison and not "allelic-mapping" in mode and not "allelic-counting" in mode: include: os.path.join(maindir, "shared", "rules", "DESeq2.multipleComp.snakefile") if rMats: include: os.path.join(maindir, "shared", "rules", "rMats.multipleComp.snakefile") @@ -239,7 +243,7 @@ def make_nmasked_genome(): return([]) def run_allelesp_mapping(): - if "allelic-mapping" in mode: + if "allelic-mapping" in mode or "allelic-counting" in mode: allele_suffix = ['allele_flagged', 'genome1', 'genome2', 'unassigned'] file_list = [ expand("allelic_bams/{sample}.{suffix}.sorted.bam", sample = samples, diff --git a/snakePipes/workflows/mRNA-seq/internals.snakefile b/snakePipes/workflows/mRNA-seq/internals.snakefile index f9e3c6ba8..93470fd77 100644 --- a/snakePipes/workflows/mRNA-seq/internals.snakefile +++ b/snakePipes/workflows/mRNA-seq/internals.snakefile @@ -37,7 +37,10 @@ if not fromBAM: pairedEnd = cf.is_paired(infiles, ext, reads) else: infiles = sorted(glob.glob(os.path.join(str(indir or ''), '*' + bamExt))) - samples = cf.get_sample_names_bam(infiles, bamExt) + if "allelic-counting" in mode: + samples = cf.get_sample_names_suffix_bam(infiles, bamExt) + else: + samples = cf.get_sample_names_bam(infiles, bamExt) if formula and not sampleSheet: print("In order to apply custom formula, please provide a sample sheet!") diff --git a/snakePipes/workflows/mRNA-seq/mRNA-seq b/snakePipes/workflows/mRNA-seq/mRNA-seq index 80f369abd..a02f4d012 100755 --- a/snakePipes/workflows/mRNA-seq/mRNA-seq +++ b/snakePipes/workflows/mRNA-seq/mRNA-seq @@ -14,6 +14,7 @@ import sys import textwrap import snakePipes.common_functions as cf import snakePipes.parserCommon as parserCommon +import warnings def parse_args(defaults={"verbose": False, "configFile": None, @@ -51,7 +52,7 @@ def parse_args(defaults={"verbose": False, "configFile": None, # Workflow options optional = parser.add_argument_group('Options') optional.add_argument("-m", "--mode", - help="workflow running modes (available: 'alignment-free, alignment, allelic-mapping, deepTools_qc, three-prime-seq')" + help="workflow running modes (available: 'alignment-free, alignment, allelic-mapping, allelic-counting, deepTools_qc, three-prime-seq')" " (default: '%(default)s')", default=defaults["mode"]) @@ -159,16 +160,24 @@ def main(): args.SNPfile = os.path.abspath(args.SNPfile) args.NMaskedIndex = os.path.abspath(args.NMaskedIndex) modeTemp = args.mode.split(",") - validModes = set(["alignment", "alignment-free", "deepTools_qc", "allelic-mapping","three-prime-seq"]) + validModes = set(["alignment", "alignment-free", "deepTools_qc", "allelic-mapping", "allelic-counting", "three-prime-seq"]) for mode in modeTemp: if mode not in validModes: sys.exit("{} is not a valid mode!\n".format(mode)) if "alignment" not in modeTemp and args.UMIDedup: sys.exit("UMIDedup is only valid for \"alignment\" mode!\n") + if "allelic-counting" in modeTemp and "deepTools_qc" in modeTemp: + sys.exit("Mode deepTools_qc is not compatible with mode allelic-counting.") if args.fromBAM and ("alignment-free" in modeTemp ): - sys.exit("\n--fromBAM can only be used with modes \'alignment\' , \'allelic-mapping\' or \'deepTools_qc\' - use one of these modes or provide fastq files!\n") + sys.exit("\n--fromBAM can only be used with modes \'alignment\' , \'allelic-mapping\' , \'allelic-counting\' or \'deepTools_qc\' - use one of these modes or provide fastq files!\n") if args.fromBAM: args.aligner = "EXTERNAL_BAM" + if "allelic-counting" in modeTemp and not args.fromBAM: + warnings.warn("--fromBAM is required with allelic-counting mode. Setting to True.") + args.fromBAM = True + if "allelic-counting" in modeTemp: + args.bamExt = ".sorted.bam" + args.aligner = "allelic_bams" if args.rMats and not args.sampleSheet: sys.exit("--rMats flag requires a sampleSheet (specified with --sampleSheet).\n") if "three_prime_seq" in mode: