From f5180fe10af4b59ac2d8e94de9902fadf46db03e Mon Sep 17 00:00:00 2001 From: Katarzyna Sikora Date: Mon, 20 Jan 2025 09:34:02 +0100 Subject: [PATCH] Issue fixes (#1088) * spikein ext * picard rrbs metrics * loompy combine * keep going * loompy dep * version in News.rst * python cap * python cap * deprecate loompy merge * test dag * samtools -M * quotes * fix wgbs * require tbi * fix wgbs output files * test dag * test dag * test dag * test dag * multiqc * test dag * allelic suffix * allele suffix * fixes multiqc * allelic-counting * allelic-counting * allelic-counting * test dag * test dag * pytest * pytest * pytest * docs * fix predict chip dict * docs --------- Co-authored-by: katarzyna.otylia.sikora@gmail.com --- .ci_stuff/test_dag.sh | 26 +++---- docs/content/News.rst | 11 ++- docs/content/workflows/mRNAseq.rst | 4 +- snakePipes/common_functions.py | 9 +-- snakePipes/shared/defaults.yaml | 2 +- snakePipes/shared/rules/WGBS.snakefile | 71 ++++--------------- .../shared/rules/createIndices.snakefile | 2 +- snakePipes/shared/rules/envs/picard.yaml | 6 ++ snakePipes/shared/rules/multiQC.snakefile | 12 +++- .../shared/rules/scRNAseq_STARsolo.snakefile | 40 +++++------ snakePipes/shared/rules/whatshap.snakefile | 2 +- snakePipes/workflows/WGBS/Snakefile | 2 +- .../workflows/mRNAseq/internals.snakefile | 4 ++ snakePipes/workflows/scRNAseq/Snakefile | 4 +- tests/test_jobcounts.py | 15 ++-- 15 files changed, 91 insertions(+), 119 deletions(-) create mode 100755 snakePipes/shared/rules/envs/picard.yaml diff --git a/.ci_stuff/test_dag.sh b/.ci_stuff/test_dag.sh index de1283fce..54896841a 100755 --- a/.ci_stuff/test_dag.sh +++ b/.ci_stuff/test_dag.sh @@ -149,7 +149,7 @@ mkdir -p output touch /tmp/genes.gtf /tmp/genome.fa /tmp/genome.fa.fai /tmp/rmsk.txt /tmp/genes.bed /tmp/spikein_genes.gtf /tmp/genome.2bit mkdir -p allelic_input mkdir -p allelic_input/Ngenome -touch allelic_input/file.vcf.gz allelic_input/snpfile.txt +touch allelic_input/file.vcf.gz allelic_input/file.vcf.gz.tbi allelic_input/snpfile.txt cp .ci_stuff/genome.fa .ci_stuff/genome.fa.fai /tmp/ mkdir -p /tmp/SalmonIndex /tmp/annotation touch /tmp/SalmonIndex/decoys.txt @@ -338,12 +338,12 @@ if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2408 ]; then exit 1 ; fi WC=`mRNAseq -m allelic-mapping,deepTools_qc,alignment-free -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l ` if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 3105 ]; then exit 1 ; fi WC=`mRNAseq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l ` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 638 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 637 ]; then exit 1 ; fi WC=`mRNAseq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l ` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 638 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 637 ]; then exit 1 ; fi #allelic+multicomp WC=`mRNAseq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l ` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 668 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 667 ]; then exit 1 ; fi WC=`mRNAseq -m allelic-mapping,deepTools_qc -i allelic_BAM_input/filtered_bam --fromBAM --bamExt '.filtered.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --SNPfile allelic_input/snpfile.txt --NMaskedIndex allelic_input/Ngenome .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l ` if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1315 ]; then exit 1 ; fi WC=`mRNAseq -m allelic-mapping,deepTools_qc,alignment-free -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l ` @@ -375,12 +375,8 @@ WC=`ncRNAseq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet_mult if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1317 ]; then exit 1 ; fi # scRNAseq -#WC=`scRNAseq -i PE_input -o output --mode Gruen --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l ` -#if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1038 ]; then exit 1 ; fi -#WC=`scRNAseq -i PE_input -o output --mode Gruen --snakemakeOptions " --dryrun --conda-prefix /tmp" --skipRaceID --splitLib .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l ` -#if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1015 ]; then exit 1 ; fi WC=`scRNAseq -i PE_input -o output --mode STARsolo --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l ` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1642 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1633 ]; then exit 1 ; fi WC=`scRNAseq -i PE_input -o output --mode STARsolo --skipVelocyto --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l ` if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1467 ]; then exit 1 ; fi WC=`scRNAseq -i PE_input -o output --mode Alevin --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l ` @@ -390,17 +386,17 @@ if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 604 ]; then exit 1 ; fi # WGBS WC=`WGBS -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l ` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1327 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1244 ]; then exit 1 ; fi WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l ` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1370 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1287 ]; then exit 1 ; fi WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --aligner bwameth2 --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l ` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1370 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1287 ]; then exit 1 ; fi WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l ` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1380 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1297 ]; then exit 1 ; fi WC=`WGBS -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --fromBAM --snakemakeOptions " --dryrun --conda-prefix /tmp" --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l ` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 817 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 734 ]; then exit 1 ; fi WC=`WGBS -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --fromBAM --fastqc --snakemakeOptions " --dryrun --conda-prefix /tmp" --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l ` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 817 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 734 ]; then exit 1 ; fi WC=`WGBS -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --fromBAM --skipBamQC --snakemakeOptions " --dryrun --conda-prefix /tmp" --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l ` if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 530 ]; then exit 1 ; fi diff --git a/docs/content/News.rst b/docs/content/News.rst index fa9245549..4cecb05cd 100644 --- a/docs/content/News.rst +++ b/docs/content/News.rst @@ -1,10 +1,17 @@ snakePipes News =============== -snakePipes 3.1.1 +snakePipes 3.2.0 ________________ -* added whatshap-allelic mode to mRNA seq +* added allelic-whatshap mode to mRNA seq +* fixes #1085 +* fixes #1083 +* fixes #1082 +* fixes #1063 +* fixes #1058 +* fixes #1024 +* fixes minor issues with mRNAseq allelic-whatshap mode snakePipes 3.1.0 diff --git a/docs/content/workflows/mRNAseq.rst b/docs/content/workflows/mRNAseq.rst index f10728fc2..66dac0888 100644 --- a/docs/content/workflows/mRNAseq.rst +++ b/docs/content/workflows/mRNAseq.rst @@ -9,7 +9,7 @@ What it does The snakePipes mRNAseq workflow allows users to process their single or paired-end mRNAseq fastq files upto the point of gene/transcript-counts and differential expression. It also allows full allele-specific mRNAseq analysis (up to allele-specific -differential expression) using the *allelic-mapping* mode. +differential expression) using the *allelic-mapping* or *allelic-whatshap* mode. .. image:: ../images/RNAseq_pipeline.png @@ -209,7 +209,7 @@ Allele-specific, gene-level differential expression analysis is then performed u ~~~~~~~~~~~~~~~~~~ **allelic-whatshap** mode applies a standard alignment to a nonmasked genome with STAR, followed by allele-specific splitting -of mapped files with whatshap, requiring a phased vcf file as input ( ``--phased-vcf`` ). Gene-level quantification is performed for each allele using **featureCounts**. +of mapped files with whatshap, requiring a phased vcf file as input ( ``--phased-vcf`` ). The file must be bzip-compressed and tabix-indexed as well. Gene-level quantification is performed for each allele using **featureCounts**. Allele-specific, gene-level differential expression analysis is then performed using **DESeq2**. diff --git a/snakePipes/common_functions.py b/snakePipes/common_functions.py index 2c2aa277e..a9f008867 100644 --- a/snakePipes/common_functions.py +++ b/snakePipes/common_functions.py @@ -44,7 +44,8 @@ def set_env_yamls(): 'CONDA_pysam_ENV': 'envs/pysam.yaml', 'CONDA_SEACR_ENV': 'envs/chip_seacr.yaml', 'CONDA_FQLINT_ENV': 'envs/fqlint.yaml', - 'CONDA_WHATSHAP_ENV': 'envs/whatshap.yaml' + 'CONDA_WHATSHAP_ENV': 'envs/whatshap.yaml', + 'CONDA_PICARD_ENV': 'envs/picard.yaml' } @@ -855,11 +856,11 @@ def predict_chip_dict(wdir, input_pattern_str, bamExt, fromBAM=None): print("No control sample found!") chip_dict_pred["chip_dict"][i] = {} - chip_dict_pred["chip_dict"][i]['control'] = tmp + chip_dict_pred["chip_dict"][i]['Control'] = tmp if tmp != "" else None if re.match(".*(H3K4me1|H3K36me3|H3K9me3|H3K27me3).*", i, re.IGNORECASE): - chip_dict_pred["chip_dict"][i]['broad'] = True + chip_dict_pred["chip_dict"][i]['Broad'] = True else: - chip_dict_pred["chip_dict"][i]['broad'] = False + chip_dict_pred["chip_dict"][i]['Broad'] = False outfile = os.path.join(wdir, "chip_seq_sample_config.PREDICTED.yaml") write_configfile(outfile, chip_dict_pred) diff --git a/snakePipes/shared/defaults.yaml b/snakePipes/shared/defaults.yaml index f5de896e0..cb32d1bdc 100755 --- a/snakePipes/shared/defaults.yaml +++ b/snakePipes/shared/defaults.yaml @@ -3,7 +3,7 @@ # Note that due to limitations in yaml.dump, only very basic structures are # permitted here. ################################################################################ -snakemakeOptions: '' +snakemakeOptions: ' --keep-going ' organismsDir: 'shared/organisms' snakemakeProfile: 'shared/profiles/local' tempDir: /scratch/local diff --git a/snakePipes/shared/rules/WGBS.snakefile b/snakePipes/shared/rules/WGBS.snakefile index d0135c8ba..9f923dba3 100755 --- a/snakePipes/shared/rules/WGBS.snakefile +++ b/snakePipes/shared/rules/WGBS.snakefile @@ -5,15 +5,22 @@ import tempfile ###bam symlinking is taken care of by LinkBam -# TODO: Make optional rule conversionRate: input: - "QC_metrics/{sample}.CHH.Mbias.txt" + bam = "filtered_bam/{sample}.filtered.bam", + bai = "filtered_bam/{sample}.filtered.bam.bai", + ref = genome_fasta output: - "QC_metrics/{sample}.conv.rate.txt" + "QC_metrics/{sample}.rrbs_summary_metrics" + params: + prefix = "QC_metrics/{sample}" + conda: CONDA_PICARD_ENV threads: 1 shell: """ - awk '{{if(NR>1) {{M+=$4; UM+=$5}}}}END{{printf("{wildcards.sample}\\t%f\\n", 100*(1.0-M/(M+UM)))}}' {input} > {output} + java -jar picard.jar CollectRrbsMetrics \ + R={input.ref} \ + I={input.bam} \ + M={params.prefix} """ @@ -57,60 +64,6 @@ elif not pairedEnd and not fromBAM: rm -rf "$MYTEMP" """ -#if not fromBAM: -# rule index_bam: -# input: -# aligner+"/{sample}.sorted.bam" -# output: -# temp(aligner+"/{sample}.sorted.bam.bai") -# conda: CONDA_SHARED_ENV -# shell: """ -# samtools index "{input}" -# """ - -#if not skipBamQC: -# rule markDupes: -# input: -# aligner+"/{sample}.sorted.bam", -# aligner+"/{sample}.sorted.bam.bai" -# output: -# "Sambamba/{sample}.markdup.bam" -# threads: lambda wildcards: 10 if 10/ s/$/{spikeinExt}/' {input} > {output} + sed '/\s+/$/{spikeinExt} /' {input} > {output} """ rule createGenomeFasta: diff --git a/snakePipes/shared/rules/envs/picard.yaml b/snakePipes/shared/rules/envs/picard.yaml new file mode 100755 index 000000000..1b8eceb25 --- /dev/null +++ b/snakePipes/shared/rules/envs/picard.yaml @@ -0,0 +1,6 @@ +name: snakepipes_picard_environment_1.0 +channels: + - conda-forge + - bioconda +dependencies: + - picard = 3.3.0 diff --git a/snakePipes/shared/rules/multiQC.snakefile b/snakePipes/shared/rules/multiQC.snakefile index 81650046e..13f6933e7 100755 --- a/snakePipes/shared/rules/multiQC.snakefile +++ b/snakePipes/shared/rules/multiQC.snakefile @@ -78,16 +78,24 @@ def multiqc_input_check(return_value): infiles.append( expand(aligner+"/{sample}.markdup.bam", sample = samples) + expand("Sambamba/{sample}.markdup.txt", sample = samples) + expand("deepTools_qc/estimateReadFiltering/{sample}_filtering_estimation.txt",sample=samples)) + infiles.append( expand("allelic_bams/{sample}.{suffix}.sorted.bam", sample = samples,suffix = ['allele_flagged', 'genome1', 'genome2', 'unassigned']) ) indir += aligner indir += " Sambamba " indir += " deepTools_qc " - if "allelic-mapping" in mode or "allelic-counting" in mode or "allelic-whatshap" in mode: + indir += " allelic_bams " + if "allelic-whatshap" in mode and fromBAM: + infiles.append( expand("filtered_bam/{sample}.filtered.bam", sample = samples) ) + infiles.append( expand("allelic_bams/{sample}.{suffix}.sorted.bam", sample = samples,suffix = ['allele_flagged', 'genome1', 'genome2', 'unassigned']) ) + infiles.append( expand("featureCounts/{sample}.allelic_counts.txt", sample = samples) ) + indir += " filtered_bam " + " featureCounts " + indir += " allelic_bams " + if "allelic-mapping" in mode or "allelic-counting" in mode in mode: infiles.append( expand("featureCounts/{sample}.allelic_counts.txt", sample = samples) ) indir += aligner + " featureCounts " if "allelic-mapping" in mode: infiles.append( expand("allelic_bams/{sample}.markdup.SNPsplit_report.yaml", sample = samples) ) infiles.append( expand("allelic_bams/{sample}.markdup.SNPsplit_sort.yaml", sample = samples) ) - indir += "allelic_bams" + indir += " allelic_bams " if "alignment-free" in mode: if "allelic-mapping" in mode: infiles.append( expand("SalmonAllelic/{sample}.{allelic_suffix}/quant.sf", sample = samples,allelic_suffix=allelic_suffix) ) diff --git a/snakePipes/shared/rules/scRNAseq_STARsolo.snakefile b/snakePipes/shared/rules/scRNAseq_STARsolo.snakefile index 69d950698..b30a22a07 100755 --- a/snakePipes/shared/rules/scRNAseq_STARsolo.snakefile +++ b/snakePipes/shared/rules/scRNAseq_STARsolo.snakefile @@ -225,25 +225,21 @@ if not skipVelocyto: rm -rf $MYTEMP """ - rule combine_loom: - input: expand("VelocytoCounts/{sample}",sample=samples) - output: "VelocytoCounts_merged/merged.loom" - conda: CONDA_loompy_ENV - params: - outfile = outdir+"/VelocytoCounts_merged/merged.loom", - script = maindir+"/shared/tools/loompy_merge.py", - input_fp = lambda wildcards,input: [ os.path.join(outdir,f) for f in input ] - shell: """ - python {params.script} -outf {params.outfile} {params.input_fp} - """ - - #rule velocity_to_seurat: - # input: - # indirs = expand("VelocytoCounts/{sample}",sample=samples) - # output: - # seurat = "Seurat/Velocyto/merged_samples.RDS" - # params: - # wdir = outdir + "/Seurat/Velocyto", - # samples = samples - # conda: CONDA_seurat3_ENV - # script: "../rscripts/scRNAseq_merge_loom.R" + +#deprecate loom combination by loompy - > Seurat4 should be handling it in R + +# def aggregate_input(wildcards): +# checkpoint_output = checkpoints.velocyto.get(sample=wildcards.sample).output["outdir"] +# return expand("VelocytoCounts/{sample}/{i}.loom", +# i=glob_wildcards(os.path.join(checkpoint_output, "{i}.loom")).i) + +# rule combine_loom: +# input: aggregate_input +# output: "VelocytoCounts_merged/merged.loom" +# conda: CONDA_loompy_ENV +# params: +# outfile = outdir+"/VelocytoCounts_merged/merged.loom" +# run: """ +# loompy.combine(files={input}, output_file={params.outfile}, key="Accession") +# """ + diff --git a/snakePipes/shared/rules/whatshap.snakefile b/snakePipes/shared/rules/whatshap.snakefile index f47683f37..7af0cd393 100644 --- a/snakePipes/shared/rules/whatshap.snakefile +++ b/snakePipes/shared/rules/whatshap.snakefile @@ -37,4 +37,4 @@ rule BAMindex_allelic: output: expand("allelic_bams/{sample}.{suffix}.sorted.bam.bai",sample=samples,suffix=['allele_flagged', 'genome1', 'genome2', 'unassigned']) conda: CONDA_SHARED_ENV - shell: "samtools index {input}" + shell: "samtools index -M {input}" diff --git a/snakePipes/workflows/WGBS/Snakefile b/snakePipes/workflows/WGBS/Snakefile index 815da6cb1..c2de341f7 100644 --- a/snakePipes/workflows/WGBS/Snakefile +++ b/snakePipes/workflows/WGBS/Snakefile @@ -109,7 +109,7 @@ def run_deeptools(GCbias): if skipBamQC: GCbias = False if GCbias: - return (expand("QC_metrics/GCbias.freq.txt", sample = samples, read = reads) + expand("QC_metrics/GCbias.png", sample = samples, read = reads)) + return ["QC_metrics/GCbias.freq.txt","QC_metrics/GCbias.png"] else: return ([]) diff --git a/snakePipes/workflows/mRNAseq/internals.snakefile b/snakePipes/workflows/mRNAseq/internals.snakefile index 7ecc4a1e1..2507700fb 100644 --- a/snakePipes/workflows/mRNAseq/internals.snakefile +++ b/snakePipes/workflows/mRNAseq/internals.snakefile @@ -53,6 +53,10 @@ if "allelic-whatshap" in mode: if not os.path.exists(pvcf): print("Phased vcf file " + str(pvcf) + " not found!") exit(1) + if pvcf and os.path.splitext(pvcf)[1] == ".gz": + if not os.path.exists(pvcf + ".tbi"): + print("A gzipped vcf file was provided but the index is missing. Please index the vcf.gz file with tabix.") + exit(1) if formula and not sampleSheet: print("In order to apply custom formula, please provide a sample sheet!") diff --git a/snakePipes/workflows/scRNAseq/Snakefile b/snakePipes/workflows/scRNAseq/Snakefile index ed773686e..e52518d52 100755 --- a/snakePipes/workflows/scRNAseq/Snakefile +++ b/snakePipes/workflows/scRNAseq/Snakefile @@ -67,8 +67,8 @@ def run_velocyto(skipVelocyto): if not skipVelocyto : if mode == "STARsolo": file_list = [ - expand("VelocytoCounts/{sample}.done.txt",sample=samples), - "VelocytoCounts_merged/merged.loom" + expand("VelocytoCounts/{sample}.done.txt",sample=samples)#, +# "VelocytoCounts_merged/merged.loom" ] elif mode == "Alevin": file_list = [expand("AlevinForVelocity/{sample}/alevin/quants_mat.gz",sample=samples), diff --git a/tests/test_jobcounts.py b/tests/test_jobcounts.py index ab8639056..3127cad7d 100644 --- a/tests/test_jobcounts.py +++ b/tests/test_jobcounts.py @@ -67,6 +67,7 @@ def createTestData(fp, samples=9) -> None: (fp / 'allelic_input'/ 'Ngenome').mkdir(parents=True) (fp / 'allelic_input'/ 'file.vcf.gz').touch() + (fp / 'allelic_input'/ 'file.vcf.gz.tbi').touch() (fp / 'allelic_input'/ 'snpfile.txt').touch() # samples @@ -1875,7 +1876,7 @@ def test_default(self, ifs): print(' '.join([str(i) for i in ci])) _p = sp.run(ci, capture_output=True, text=True) assert _p.returncode == 0 - assert parseSpOut(_p) == 178 + assert parseSpOut(_p) == 177 def test_skipvelo(self, ifs): ci = [ "scRNAseq", @@ -1947,7 +1948,7 @@ def test_default(self, ifs): print(' '.join([str(i) for i in ci])) _p = sp.run(ci, capture_output=True, text=True) assert _p.returncode == 0 - assert parseSpOut(_p) == 154 + assert parseSpOut(_p) == 145 def test_no_sampleSheet(self, ifs): ci = [ "WGBS", @@ -1962,7 +1963,7 @@ def test_no_sampleSheet(self, ifs): print(' '.join([str(i) for i in ci])) _p = sp.run(ci, capture_output=True, text=True) assert _p.returncode == 0 - assert parseSpOut(_p) == 150 + assert parseSpOut(_p) == 141 def test_bwameth2(self, ifs): ci = [ "WGBS", @@ -1981,7 +1982,7 @@ def test_bwameth2(self, ifs): print(' '.join([str(i) for i in ci])) _p = sp.run(ci, capture_output=True, text=True) assert _p.returncode == 0 - assert parseSpOut(_p) == 154 + assert parseSpOut(_p) == 145 def test_trimgcbias(self, ifs): ci = [ "WGBS", @@ -2000,7 +2001,7 @@ def test_trimgcbias(self, ifs): print(' '.join([str(i) for i in ci])) _p = sp.run(ci, capture_output=True, text=True) assert _p.returncode == 0 - assert parseSpOut(_p) == 155 + assert parseSpOut(_p) == 146 def test_frombam(self, ifs): ci = [ "WGBS", @@ -2019,7 +2020,7 @@ def test_frombam(self, ifs): print(' '.join([str(i) for i in ci])) _p = sp.run(ci, capture_output=True, text=True) assert _p.returncode == 0 - assert parseSpOut(_p) == 92 + assert parseSpOut(_p) == 83 def test_frombamfqc(self, ifs): ci = [ "WGBS", @@ -2039,7 +2040,7 @@ def test_frombamfqc(self, ifs): print(' '.join([str(i) for i in ci])) _p = sp.run(ci, capture_output=True, text=True) assert _p.returncode == 0 - assert parseSpOut(_p) == 92 + assert parseSpOut(_p) == 83 def test_frombamskipqc(self, ifs): ci = [ "WGBS",