Issue fixes (#1088)

* spikein ext * picard rrbs metrics * loompy combine * keep going * loompy dep * version in News.rst * python cap * python cap * deprecate loompy merge * test dag * samtools -M * quotes * fix wgbs * require tbi * fix wgbs output files * test dag * test dag * test dag * test dag * multiqc * test dag * allelic suffix * allele suffix * fixes multiqc * allelic-counting * allelic-counting * allelic-counting * test dag * test dag * pytest * pytest * pytest * docs * fix predict chip dict * docs --------- Co-authored-by: [email protected] <[email protected]>
maxplanck-ie · Jan 20, 2025 · f5180fe · f5180fe
1 parent 8d1132a
commit f5180fe
Show file tree

Hide file tree

Showing 15 changed files with 91 additions and 119 deletions.
diff --git a/.ci_stuff/test_dag.sh b/.ci_stuff/test_dag.sh
@@ -149,7 +149,7 @@ mkdir -p output
 touch /tmp/genes.gtf /tmp/genome.fa /tmp/genome.fa.fai /tmp/rmsk.txt /tmp/genes.bed /tmp/spikein_genes.gtf /tmp/genome.2bit
 mkdir -p allelic_input
 mkdir -p allelic_input/Ngenome
-touch allelic_input/file.vcf.gz allelic_input/snpfile.txt
+touch allelic_input/file.vcf.gz allelic_input/file.vcf.gz.tbi allelic_input/snpfile.txt
 cp .ci_stuff/genome.fa .ci_stuff/genome.fa.fai /tmp/
 mkdir -p /tmp/SalmonIndex /tmp/annotation
 touch /tmp/SalmonIndex/decoys.txt
@@ -338,12 +338,12 @@ if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2408 ]; then exit 1 ; fi
 WC=`mRNAseq -m allelic-mapping,deepTools_qc,alignment-free -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
 if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 3105 ]; then exit 1 ; fi
 WC=`mRNAseq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp"  .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
-if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 638 ]; then exit 1 ; fi
+if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 637 ]; then exit 1 ; fi
 WC=`mRNAseq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp"  .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
-if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 638 ]; then exit 1 ; fi
+if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 637 ]; then exit 1 ; fi
 #allelic+multicomp
 WC=`mRNAseq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp"  .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
-if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 668 ]; then exit 1 ; fi
+if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 667 ]; then exit 1 ; fi
 WC=`mRNAseq -m allelic-mapping,deepTools_qc -i allelic_BAM_input/filtered_bam --fromBAM --bamExt '.filtered.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --SNPfile allelic_input/snpfile.txt --NMaskedIndex allelic_input/Ngenome .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
 if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1315 ]; then exit 1 ; fi
 WC=`mRNAseq -m allelic-mapping,deepTools_qc,alignment-free -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
@@ -375,12 +375,8 @@ WC=`ncRNAseq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet_mult
 if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1317 ]; then exit 1 ; fi
 
 # scRNAseq
-#WC=`scRNAseq -i PE_input -o output --mode Gruen --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
-#if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1038 ]; then exit 1 ; fi
-#WC=`scRNAseq -i PE_input -o output --mode Gruen --snakemakeOptions " --dryrun --conda-prefix /tmp" --skipRaceID --splitLib .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
-#if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1015 ]; then exit 1 ; fi
 WC=`scRNAseq -i PE_input -o output --mode STARsolo --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
-if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1642 ]; then exit 1 ; fi
+if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1633 ]; then exit 1 ; fi
 WC=`scRNAseq -i PE_input -o output --mode STARsolo --skipVelocyto --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
 if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1467 ]; then exit 1 ; fi
 WC=`scRNAseq -i PE_input -o output --mode Alevin --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
@@ -390,17 +386,17 @@ if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 604 ]; then exit 1 ; fi
 
 # WGBS
 WC=`WGBS -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
-if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1327 ]; then exit 1 ; fi
+if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1244 ]; then exit 1 ; fi
 WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
-if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1370 ]; then exit 1 ; fi
+if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1287 ]; then exit 1 ; fi
 WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --aligner bwameth2 --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
-if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1370 ]; then exit 1 ; fi
+if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1287 ]; then exit 1 ; fi
 WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
-if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1380 ]; then exit 1 ; fi
+if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1297 ]; then exit 1 ; fi
 WC=`WGBS -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --fromBAM --snakemakeOptions " --dryrun --conda-prefix /tmp" --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
-if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 817 ]; then exit 1 ; fi
+if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 734 ]; then exit 1 ; fi
 WC=`WGBS -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --fromBAM --fastqc --snakemakeOptions " --dryrun --conda-prefix /tmp" --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
-if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 817 ]; then exit 1 ; fi
+if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 734 ]; then exit 1 ; fi
 WC=`WGBS -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --fromBAM --skipBamQC --snakemakeOptions " --dryrun --conda-prefix /tmp" --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
 if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 530 ]; then exit 1 ; fi
 

diff --git a/docs/content/News.rst b/docs/content/News.rst
@@ -1,10 +1,17 @@
 snakePipes News
 ===============
 
-snakePipes 3.1.1
+snakePipes 3.2.0
 ________________
 
-* added whatshap-allelic mode to mRNA seq
+* added allelic-whatshap mode to mRNA seq
+* fixes #1085
+* fixes #1083
+* fixes #1082
+* fixes #1063
+* fixes #1058
+* fixes #1024
+* fixes minor issues with mRNAseq allelic-whatshap mode
 
 
 snakePipes 3.1.0

diff --git a/docs/content/workflows/mRNAseq.rst b/docs/content/workflows/mRNAseq.rst
@@ -9,7 +9,7 @@ What it does
 The snakePipes mRNAseq workflow allows users to process their single or paired-end
 mRNAseq fastq files upto the point of gene/transcript-counts and differential expression.
 It also allows full allele-specific mRNAseq analysis (up to allele-specific
-differential expression) using the *allelic-mapping* mode.
+differential expression) using the *allelic-mapping* or *allelic-whatshap* mode.
 
 .. image:: ../images/RNAseq_pipeline.png
 
@@ -209,7 +209,7 @@ Allele-specific, gene-level differential expression analysis is then performed u
 ~~~~~~~~~~~~~~~~~~
 
 **allelic-whatshap** mode applies a standard alignment to a nonmasked genome with STAR, followed by allele-specific splitting
-of mapped files with whatshap, requiring a phased vcf file as input ( ``--phased-vcf`` ). Gene-level quantification is performed for each allele using **featureCounts**.
+of mapped files with whatshap, requiring a phased vcf file as input ( ``--phased-vcf`` ). The file must be bzip-compressed and tabix-indexed as well. Gene-level quantification is performed for each allele using **featureCounts**.
 Allele-specific, gene-level differential expression analysis is then performed using **DESeq2**.
 
 

diff --git a/snakePipes/common_functions.py b/snakePipes/common_functions.py
@@ -44,7 +44,8 @@ def set_env_yamls():
             'CONDA_pysam_ENV': 'envs/pysam.yaml',
             'CONDA_SEACR_ENV': 'envs/chip_seacr.yaml',
             'CONDA_FQLINT_ENV': 'envs/fqlint.yaml',
-            'CONDA_WHATSHAP_ENV': 'envs/whatshap.yaml'
+            'CONDA_WHATSHAP_ENV': 'envs/whatshap.yaml',
+            'CONDA_PICARD_ENV': 'envs/picard.yaml'
             }
 
 
@@ -855,11 +856,11 @@ def predict_chip_dict(wdir, input_pattern_str, bamExt, fromBAM=None):
             print("No control sample found!")
 
         chip_dict_pred["chip_dict"][i] = {}
-        chip_dict_pred["chip_dict"][i]['control'] = tmp
+        chip_dict_pred["chip_dict"][i]['Control'] = tmp if tmp != "" else None
         if re.match(".*(H3K4me1|H3K36me3|H3K9me3|H3K27me3).*", i, re.IGNORECASE):
-            chip_dict_pred["chip_dict"][i]['broad'] = True
+            chip_dict_pred["chip_dict"][i]['Broad'] = True
         else:
-            chip_dict_pred["chip_dict"][i]['broad'] = False
+            chip_dict_pred["chip_dict"][i]['Broad'] = False
 
     outfile = os.path.join(wdir, "chip_seq_sample_config.PREDICTED.yaml")
     write_configfile(outfile, chip_dict_pred)

diff --git a/snakePipes/shared/defaults.yaml b/snakePipes/shared/defaults.yaml
@@ -3,7 +3,7 @@
 # Note that due to limitations in yaml.dump, only very basic structures are
 # permitted here.
 ################################################################################
-snakemakeOptions: ''
+snakemakeOptions: ' --keep-going '
 organismsDir: 'shared/organisms'
 snakemakeProfile: 'shared/profiles/local'
 tempDir: /scratch/local

diff --git a/snakePipes/shared/rules/WGBS.snakefile b/snakePipes/shared/rules/WGBS.snakefile
@@ -5,15 +5,22 @@ import tempfile
 
 ###bam symlinking is taken care of by LinkBam
 
-# TODO: Make optional
 rule conversionRate:
     input:
-        "QC_metrics/{sample}.CHH.Mbias.txt"
+        bam = "filtered_bam/{sample}.filtered.bam",
+        bai = "filtered_bam/{sample}.filtered.bam.bai",
+        ref = genome_fasta
     output:
-        "QC_metrics/{sample}.conv.rate.txt"
+        "QC_metrics/{sample}.rrbs_summary_metrics"
+    params:
+        prefix = "QC_metrics/{sample}"
+    conda: CONDA_PICARD_ENV
     threads: 1
     shell: """
-        awk '{{if(NR>1) {{M+=$4; UM+=$5}}}}END{{printf("{wildcards.sample}\\t%f\\n", 100*(1.0-M/(M+UM)))}}' {input} > {output}
+        java -jar picard.jar CollectRrbsMetrics \
+        R={input.ref} \
+        I={input.bam} \
+        M={params.prefix}
         """
 
 
@@ -57,60 +64,6 @@ elif not pairedEnd and not fromBAM:
             rm -rf "$MYTEMP"
             """
 
-#if not fromBAM:
-#    rule index_bam:
-#        input:
-#            aligner+"/{sample}.sorted.bam"
-#        output:
-#            temp(aligner+"/{sample}.sorted.bam.bai")
-#        conda: CONDA_SHARED_ENV
-#        shell: """
-#            samtools index "{input}"
-#            """
-
-#if not skipBamQC:
-#    rule markDupes:
-#        input:
-#            aligner+"/{sample}.sorted.bam",
-#            aligner+"/{sample}.sorted.bam.bai"
-#        output:
-#            "Sambamba/{sample}.markdup.bam"
-#        threads: lambda wildcards: 10 if 10<max_thread else max_thread
-#        params:
-#            tempDir = tempDir
-#        conda: CONDA_SAMBAMBA_ENV
-#        shell: """
-#            TMPDIR={params.tempDir}
-#            MYTEMP=$(mktemp -d "${{TMPDIR:-/tmp}}"/snakepipes.XXXXXXXXXX)
-#            sambamba markdup --overflow-list-size 600000 -t {threads} --tmpdir "$MYTEMP/{wildcards.sample}" "{input[0]}" "{output}"
-#            rm -rf "$MYTEMP"
-#            """
-
-
-#    rule indexMarkDupes:
-#        input:
-#            "Sambamba/{sample}.markdup.bam"
-#        output:
-#            "Sambamba/{sample}.markdup.bam.bai"
-#        params:
-#        threads: 1
-#        conda: CONDA_SHARED_ENV
-#        shell: """
-#            samtools index "{input}"
-#            """
-
-#    rule link_deduped_bam:
-#        input:
-#            bam="Sambamba/{sample}.markdup.bam",
-#            bai="Sambamba/{sample}.markdup.bam.bai"
-#        output:
-#            bam = "filtered_bam/{sample}.filtered.bam",
-#            bai = "filtered_bam/{sample}.filtered.bam.bai"
-#        shell: """
-#            ln -s ../{input.bam} {output.bam}
-#            ln -s ../{input.bai} {output.bai}
-#        """
-
 
 rule getRandomCpGs:
     output:
@@ -262,7 +215,7 @@ rule produceReport:
     input:
         bedGraphs=expand("MethylDackel/{sample}_CpG.bedGraph", sample=samples),
         Coverage=calc_doc(skipDOC),
-        ConversionRate=expand("QC_metrics/{sample}.conv.rate.txt", sample=samples),
+        ConversionRate=expand("QC_metrics/{sample}.rrbs_summary_metrics", sample=samples),
         mbiasTXT=expand("QC_metrics/{sample}.Mbias.txt", sample=samples),
         fstat=expand("QC_metrics/{sample}.flagstat", sample=samples)
     output:

diff --git a/snakePipes/shared/rules/createIndices.snakefile b/snakePipes/shared/rules/createIndices.snakefile
@@ -54,7 +54,7 @@ else:
         params:
             spikeinExt = spikeinExt
         shell: """
-            sed '/^>/ s/$/{spikeinExt}/' {input} > {output}
+            sed '/\s+/$/{spikeinExt} /' {input} > {output}
         """
 
     rule createGenomeFasta:

diff --git a/snakePipes/shared/rules/envs/picard.yaml b/snakePipes/shared/rules/envs/picard.yaml
@@ -0,0 +1,6 @@
+name: snakepipes_picard_environment_1.0
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - picard = 3.3.0
diff --git a/snakePipes/shared/rules/multiQC.snakefile b/snakePipes/shared/rules/multiQC.snakefile
@@ -78,16 +78,24 @@ def multiqc_input_check(return_value):
             infiles.append( expand(aligner+"/{sample}.markdup.bam", sample = samples) +
                     expand("Sambamba/{sample}.markdup.txt", sample = samples) +
                     expand("deepTools_qc/estimateReadFiltering/{sample}_filtering_estimation.txt",sample=samples))
+            infiles.append( expand("allelic_bams/{sample}.{suffix}.sorted.bam", sample = samples,suffix = ['allele_flagged', 'genome1', 'genome2', 'unassigned']) )
             indir += aligner
             indir += " Sambamba "
             indir += " deepTools_qc "
-        if "allelic-mapping" in mode or "allelic-counting" in mode or "allelic-whatshap" in mode:
+            indir += " allelic_bams "
+        if "allelic-whatshap" in mode and fromBAM:
+            infiles.append( expand("filtered_bam/{sample}.filtered.bam", sample = samples) )
+            infiles.append( expand("allelic_bams/{sample}.{suffix}.sorted.bam", sample = samples,suffix = ['allele_flagged', 'genome1', 'genome2', 'unassigned']) )
+            infiles.append( expand("featureCounts/{sample}.allelic_counts.txt", sample = samples) )
+            indir += " filtered_bam " + " featureCounts "
+            indir += " allelic_bams "
+        if "allelic-mapping" in mode or "allelic-counting" in mode in mode:
             infiles.append( expand("featureCounts/{sample}.allelic_counts.txt", sample = samples) )
             indir += aligner + " featureCounts "
         if "allelic-mapping" in mode:
             infiles.append( expand("allelic_bams/{sample}.markdup.SNPsplit_report.yaml", sample = samples) )
             infiles.append( expand("allelic_bams/{sample}.markdup.SNPsplit_sort.yaml", sample = samples) )
-            indir += "allelic_bams"
+            indir += " allelic_bams "
         if "alignment-free" in mode:
             if "allelic-mapping" in mode:
                 infiles.append( expand("SalmonAllelic/{sample}.{allelic_suffix}/quant.sf", sample = samples,allelic_suffix=allelic_suffix) )

diff --git a/snakePipes/shared/rules/scRNAseq_STARsolo.snakefile b/snakePipes/shared/rules/scRNAseq_STARsolo.snakefile
@@ -225,25 +225,21 @@ if not skipVelocyto:
                 rm -rf $MYTEMP
         """
 
-    rule combine_loom:
-        input: expand("VelocytoCounts/{sample}",sample=samples)
-        output: "VelocytoCounts_merged/merged.loom"
-        conda: CONDA_loompy_ENV
-        params:
-            outfile = outdir+"/VelocytoCounts_merged/merged.loom",
-            script = maindir+"/shared/tools/loompy_merge.py",
-            input_fp = lambda wildcards,input: [ os.path.join(outdir,f) for f in input ]
-        shell: """
-            python {params.script} -outf {params.outfile} {params.input_fp}
-              """
-
-    #rule velocity_to_seurat:
-    #    input:
-    #        indirs = expand("VelocytoCounts/{sample}",sample=samples)
-    #    output:
-    #        seurat = "Seurat/Velocyto/merged_samples.RDS"
-    #    params:
-    #        wdir = outdir + "/Seurat/Velocyto",
-    #        samples = samples
-    #    conda: CONDA_seurat3_ENV
-    #    script: "../rscripts/scRNAseq_merge_loom.R"
+
+#deprecate loom combination by loompy - > Seurat4 should be handling it in R
+
+#    def aggregate_input(wildcards):
+#        checkpoint_output = checkpoints.velocyto.get(sample=wildcards.sample).output["outdir"]
+#        return expand("VelocytoCounts/{sample}/{i}.loom",
+#                  i=glob_wildcards(os.path.join(checkpoint_output, "{i}.loom")).i)
+
+#    rule combine_loom:
+#        input: aggregate_input
+#        output: "VelocytoCounts_merged/merged.loom"
+#        conda: CONDA_loompy_ENV
+#        params:
+#            outfile = outdir+"/VelocytoCounts_merged/merged.loom"
+#        run: """
+#            loompy.combine(files={input}, output_file={params.outfile}, key="Accession")
+#              """
+
diff --git a/snakePipes/shared/rules/whatshap.snakefile b/snakePipes/shared/rules/whatshap.snakefile
@@ -37,4 +37,4 @@ rule BAMindex_allelic:
     output:
         expand("allelic_bams/{sample}.{suffix}.sorted.bam.bai",sample=samples,suffix=['allele_flagged', 'genome1', 'genome2', 'unassigned'])
     conda: CONDA_SHARED_ENV
-    shell: "samtools index {input}"
+    shell: "samtools index -M {input}"
diff --git a/snakePipes/workflows/WGBS/Snakefile b/snakePipes/workflows/WGBS/Snakefile
@@ -109,7 +109,7 @@ def run_deeptools(GCbias):
     if skipBamQC:
         GCbias = False
     if GCbias:
-        return (expand("QC_metrics/GCbias.freq.txt", sample = samples, read = reads) + expand("QC_metrics/GCbias.png", sample = samples, read = reads))
+        return ["QC_metrics/GCbias.freq.txt","QC_metrics/GCbias.png"]
     else:
         return ([])
 

diff --git a/snakePipes/workflows/mRNAseq/internals.snakefile b/snakePipes/workflows/mRNAseq/internals.snakefile
@@ -53,6 +53,10 @@ if "allelic-whatshap" in mode:
     if not os.path.exists(pvcf):
         print("Phased vcf file " + str(pvcf) + " not found!")
         exit(1)
+    if pvcf and os.path.splitext(pvcf)[1] == ".gz":
+        if not os.path.exists(pvcf + ".tbi"):
+            print("A gzipped vcf file was provided but the index is missing. Please index the vcf.gz file with tabix.")
+            exit(1)
 
 if formula and not sampleSheet:
     print("In order to apply custom formula, please provide a sample sheet!")

diff --git a/snakePipes/workflows/scRNAseq/Snakefile b/snakePipes/workflows/scRNAseq/Snakefile
@@ -67,8 +67,8 @@ def run_velocyto(skipVelocyto):
     if not skipVelocyto :
         if mode == "STARsolo":
             file_list = [
-            expand("VelocytoCounts/{sample}.done.txt",sample=samples),
-            "VelocytoCounts_merged/merged.loom"
+            expand("VelocytoCounts/{sample}.done.txt",sample=samples)#,
+#            "VelocytoCounts_merged/merged.loom"
             ]
         elif mode == "Alevin":
             file_list = [expand("AlevinForVelocity/{sample}/alevin/quants_mat.gz",sample=samples),