Skip to content

Commit

Permalink
Issue fixes (#1088)
Browse files Browse the repository at this point in the history
* spikein ext

* picard rrbs metrics

* loompy combine

* keep going

* loompy dep

* version in News.rst

* python cap

* python cap

* deprecate loompy merge

* test dag

* samtools -M

* quotes

* fix wgbs

* require tbi

* fix wgbs output files

* test dag

* test dag

* test dag

* test dag

* multiqc

* test dag

* allelic suffix

* allele suffix

* fixes multiqc

* allelic-counting

* allelic-counting

* allelic-counting

* test dag

* test dag

* pytest

* pytest

* pytest

* docs

* fix predict chip dict

* docs

---------

Co-authored-by: [email protected] <[email protected]>
  • Loading branch information
katsikora and [email protected] authored Jan 20, 2025
1 parent 8d1132a commit f5180fe
Show file tree
Hide file tree
Showing 15 changed files with 91 additions and 119 deletions.
26 changes: 11 additions & 15 deletions .ci_stuff/test_dag.sh
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ mkdir -p output
touch /tmp/genes.gtf /tmp/genome.fa /tmp/genome.fa.fai /tmp/rmsk.txt /tmp/genes.bed /tmp/spikein_genes.gtf /tmp/genome.2bit
mkdir -p allelic_input
mkdir -p allelic_input/Ngenome
touch allelic_input/file.vcf.gz allelic_input/snpfile.txt
touch allelic_input/file.vcf.gz allelic_input/file.vcf.gz.tbi allelic_input/snpfile.txt
cp .ci_stuff/genome.fa .ci_stuff/genome.fa.fai /tmp/
mkdir -p /tmp/SalmonIndex /tmp/annotation
touch /tmp/SalmonIndex/decoys.txt
Expand Down Expand Up @@ -338,12 +338,12 @@ if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2408 ]; then exit 1 ; fi
WC=`mRNAseq -m allelic-mapping,deepTools_qc,alignment-free -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 3105 ]; then exit 1 ; fi
WC=`mRNAseq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 638 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 637 ]; then exit 1 ; fi
WC=`mRNAseq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 638 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 637 ]; then exit 1 ; fi
#allelic+multicomp
WC=`mRNAseq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 668 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 667 ]; then exit 1 ; fi
WC=`mRNAseq -m allelic-mapping,deepTools_qc -i allelic_BAM_input/filtered_bam --fromBAM --bamExt '.filtered.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --SNPfile allelic_input/snpfile.txt --NMaskedIndex allelic_input/Ngenome .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1315 ]; then exit 1 ; fi
WC=`mRNAseq -m allelic-mapping,deepTools_qc,alignment-free -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
Expand Down Expand Up @@ -375,12 +375,8 @@ WC=`ncRNAseq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet_mult
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1317 ]; then exit 1 ; fi

# scRNAseq
#WC=`scRNAseq -i PE_input -o output --mode Gruen --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
#if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1038 ]; then exit 1 ; fi
#WC=`scRNAseq -i PE_input -o output --mode Gruen --snakemakeOptions " --dryrun --conda-prefix /tmp" --skipRaceID --splitLib .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
#if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1015 ]; then exit 1 ; fi
WC=`scRNAseq -i PE_input -o output --mode STARsolo --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1642 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1633 ]; then exit 1 ; fi
WC=`scRNAseq -i PE_input -o output --mode STARsolo --skipVelocyto --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1467 ]; then exit 1 ; fi
WC=`scRNAseq -i PE_input -o output --mode Alevin --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
Expand All @@ -390,17 +386,17 @@ if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 604 ]; then exit 1 ; fi

# WGBS
WC=`WGBS -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1327 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1244 ]; then exit 1 ; fi
WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1370 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1287 ]; then exit 1 ; fi
WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --aligner bwameth2 --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1370 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1287 ]; then exit 1 ; fi
WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1380 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1297 ]; then exit 1 ; fi
WC=`WGBS -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --fromBAM --snakemakeOptions " --dryrun --conda-prefix /tmp" --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 817 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 734 ]; then exit 1 ; fi
WC=`WGBS -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --fromBAM --fastqc --snakemakeOptions " --dryrun --conda-prefix /tmp" --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 817 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 734 ]; then exit 1 ; fi
WC=`WGBS -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --fromBAM --skipBamQC --snakemakeOptions " --dryrun --conda-prefix /tmp" --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | sed '/^host:/d' | wc -l `
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 530 ]; then exit 1 ; fi

Expand Down
11 changes: 9 additions & 2 deletions docs/content/News.rst
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
snakePipes News
===============

snakePipes 3.1.1
snakePipes 3.2.0
________________

* added whatshap-allelic mode to mRNA seq
* added allelic-whatshap mode to mRNA seq
* fixes #1085
* fixes #1083
* fixes #1082
* fixes #1063
* fixes #1058
* fixes #1024
* fixes minor issues with mRNAseq allelic-whatshap mode


snakePipes 3.1.0
Expand Down
4 changes: 2 additions & 2 deletions docs/content/workflows/mRNAseq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ What it does
The snakePipes mRNAseq workflow allows users to process their single or paired-end
mRNAseq fastq files upto the point of gene/transcript-counts and differential expression.
It also allows full allele-specific mRNAseq analysis (up to allele-specific
differential expression) using the *allelic-mapping* mode.
differential expression) using the *allelic-mapping* or *allelic-whatshap* mode.

.. image:: ../images/RNAseq_pipeline.png

Expand Down Expand Up @@ -209,7 +209,7 @@ Allele-specific, gene-level differential expression analysis is then performed u
~~~~~~~~~~~~~~~~~~

**allelic-whatshap** mode applies a standard alignment to a nonmasked genome with STAR, followed by allele-specific splitting
of mapped files with whatshap, requiring a phased vcf file as input ( ``--phased-vcf`` ). Gene-level quantification is performed for each allele using **featureCounts**.
of mapped files with whatshap, requiring a phased vcf file as input ( ``--phased-vcf`` ). The file must be bzip-compressed and tabix-indexed as well. Gene-level quantification is performed for each allele using **featureCounts**.
Allele-specific, gene-level differential expression analysis is then performed using **DESeq2**.


Expand Down
9 changes: 5 additions & 4 deletions snakePipes/common_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ def set_env_yamls():
'CONDA_pysam_ENV': 'envs/pysam.yaml',
'CONDA_SEACR_ENV': 'envs/chip_seacr.yaml',
'CONDA_FQLINT_ENV': 'envs/fqlint.yaml',
'CONDA_WHATSHAP_ENV': 'envs/whatshap.yaml'
'CONDA_WHATSHAP_ENV': 'envs/whatshap.yaml',
'CONDA_PICARD_ENV': 'envs/picard.yaml'
}


Expand Down Expand Up @@ -855,11 +856,11 @@ def predict_chip_dict(wdir, input_pattern_str, bamExt, fromBAM=None):
print("No control sample found!")

chip_dict_pred["chip_dict"][i] = {}
chip_dict_pred["chip_dict"][i]['control'] = tmp
chip_dict_pred["chip_dict"][i]['Control'] = tmp if tmp != "" else None
if re.match(".*(H3K4me1|H3K36me3|H3K9me3|H3K27me3).*", i, re.IGNORECASE):
chip_dict_pred["chip_dict"][i]['broad'] = True
chip_dict_pred["chip_dict"][i]['Broad'] = True
else:
chip_dict_pred["chip_dict"][i]['broad'] = False
chip_dict_pred["chip_dict"][i]['Broad'] = False

outfile = os.path.join(wdir, "chip_seq_sample_config.PREDICTED.yaml")
write_configfile(outfile, chip_dict_pred)
Expand Down
2 changes: 1 addition & 1 deletion snakePipes/shared/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Note that due to limitations in yaml.dump, only very basic structures are
# permitted here.
################################################################################
snakemakeOptions: ''
snakemakeOptions: ' --keep-going '
organismsDir: 'shared/organisms'
snakemakeProfile: 'shared/profiles/local'
tempDir: /scratch/local
Expand Down
71 changes: 12 additions & 59 deletions snakePipes/shared/rules/WGBS.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,22 @@ import tempfile

###bam symlinking is taken care of by LinkBam

# TODO: Make optional
rule conversionRate:
input:
"QC_metrics/{sample}.CHH.Mbias.txt"
bam = "filtered_bam/{sample}.filtered.bam",
bai = "filtered_bam/{sample}.filtered.bam.bai",
ref = genome_fasta
output:
"QC_metrics/{sample}.conv.rate.txt"
"QC_metrics/{sample}.rrbs_summary_metrics"
params:
prefix = "QC_metrics/{sample}"
conda: CONDA_PICARD_ENV
threads: 1
shell: """
awk '{{if(NR>1) {{M+=$4; UM+=$5}}}}END{{printf("{wildcards.sample}\\t%f\\n", 100*(1.0-M/(M+UM)))}}' {input} > {output}
java -jar picard.jar CollectRrbsMetrics \
R={input.ref} \
I={input.bam} \
M={params.prefix}
"""


Expand Down Expand Up @@ -57,60 +64,6 @@ elif not pairedEnd and not fromBAM:
rm -rf "$MYTEMP"
"""

#if not fromBAM:
# rule index_bam:
# input:
# aligner+"/{sample}.sorted.bam"
# output:
# temp(aligner+"/{sample}.sorted.bam.bai")
# conda: CONDA_SHARED_ENV
# shell: """
# samtools index "{input}"
# """

#if not skipBamQC:
# rule markDupes:
# input:
# aligner+"/{sample}.sorted.bam",
# aligner+"/{sample}.sorted.bam.bai"
# output:
# "Sambamba/{sample}.markdup.bam"
# threads: lambda wildcards: 10 if 10<max_thread else max_thread
# params:
# tempDir = tempDir
# conda: CONDA_SAMBAMBA_ENV
# shell: """
# TMPDIR={params.tempDir}
# MYTEMP=$(mktemp -d "${{TMPDIR:-/tmp}}"/snakepipes.XXXXXXXXXX)
# sambamba markdup --overflow-list-size 600000 -t {threads} --tmpdir "$MYTEMP/{wildcards.sample}" "{input[0]}" "{output}"
# rm -rf "$MYTEMP"
# """


# rule indexMarkDupes:
# input:
# "Sambamba/{sample}.markdup.bam"
# output:
# "Sambamba/{sample}.markdup.bam.bai"
# params:
# threads: 1
# conda: CONDA_SHARED_ENV
# shell: """
# samtools index "{input}"
# """

# rule link_deduped_bam:
# input:
# bam="Sambamba/{sample}.markdup.bam",
# bai="Sambamba/{sample}.markdup.bam.bai"
# output:
# bam = "filtered_bam/{sample}.filtered.bam",
# bai = "filtered_bam/{sample}.filtered.bam.bai"
# shell: """
# ln -s ../{input.bam} {output.bam}
# ln -s ../{input.bai} {output.bai}
# """


rule getRandomCpGs:
output:
Expand Down Expand Up @@ -262,7 +215,7 @@ rule produceReport:
input:
bedGraphs=expand("MethylDackel/{sample}_CpG.bedGraph", sample=samples),
Coverage=calc_doc(skipDOC),
ConversionRate=expand("QC_metrics/{sample}.conv.rate.txt", sample=samples),
ConversionRate=expand("QC_metrics/{sample}.rrbs_summary_metrics", sample=samples),
mbiasTXT=expand("QC_metrics/{sample}.Mbias.txt", sample=samples),
fstat=expand("QC_metrics/{sample}.flagstat", sample=samples)
output:
Expand Down
2 changes: 1 addition & 1 deletion snakePipes/shared/rules/createIndices.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ else:
params:
spikeinExt = spikeinExt
shell: """
sed '/^>/ s/$/{spikeinExt}/' {input} > {output}
sed '/\s+/$/{spikeinExt} /' {input} > {output}
"""

rule createGenomeFasta:
Expand Down
6 changes: 6 additions & 0 deletions snakePipes/shared/rules/envs/picard.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: snakepipes_picard_environment_1.0
channels:
- conda-forge
- bioconda
dependencies:
- picard = 3.3.0
12 changes: 10 additions & 2 deletions snakePipes/shared/rules/multiQC.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,24 @@ def multiqc_input_check(return_value):
infiles.append( expand(aligner+"/{sample}.markdup.bam", sample = samples) +
expand("Sambamba/{sample}.markdup.txt", sample = samples) +
expand("deepTools_qc/estimateReadFiltering/{sample}_filtering_estimation.txt",sample=samples))
infiles.append( expand("allelic_bams/{sample}.{suffix}.sorted.bam", sample = samples,suffix = ['allele_flagged', 'genome1', 'genome2', 'unassigned']) )
indir += aligner
indir += " Sambamba "
indir += " deepTools_qc "
if "allelic-mapping" in mode or "allelic-counting" in mode or "allelic-whatshap" in mode:
indir += " allelic_bams "
if "allelic-whatshap" in mode and fromBAM:
infiles.append( expand("filtered_bam/{sample}.filtered.bam", sample = samples) )
infiles.append( expand("allelic_bams/{sample}.{suffix}.sorted.bam", sample = samples,suffix = ['allele_flagged', 'genome1', 'genome2', 'unassigned']) )
infiles.append( expand("featureCounts/{sample}.allelic_counts.txt", sample = samples) )
indir += " filtered_bam " + " featureCounts "
indir += " allelic_bams "
if "allelic-mapping" in mode or "allelic-counting" in mode in mode:
infiles.append( expand("featureCounts/{sample}.allelic_counts.txt", sample = samples) )
indir += aligner + " featureCounts "
if "allelic-mapping" in mode:
infiles.append( expand("allelic_bams/{sample}.markdup.SNPsplit_report.yaml", sample = samples) )
infiles.append( expand("allelic_bams/{sample}.markdup.SNPsplit_sort.yaml", sample = samples) )
indir += "allelic_bams"
indir += " allelic_bams "
if "alignment-free" in mode:
if "allelic-mapping" in mode:
infiles.append( expand("SalmonAllelic/{sample}.{allelic_suffix}/quant.sf", sample = samples,allelic_suffix=allelic_suffix) )
Expand Down
40 changes: 18 additions & 22 deletions snakePipes/shared/rules/scRNAseq_STARsolo.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -225,25 +225,21 @@ if not skipVelocyto:
rm -rf $MYTEMP
"""

rule combine_loom:
input: expand("VelocytoCounts/{sample}",sample=samples)
output: "VelocytoCounts_merged/merged.loom"
conda: CONDA_loompy_ENV
params:
outfile = outdir+"/VelocytoCounts_merged/merged.loom",
script = maindir+"/shared/tools/loompy_merge.py",
input_fp = lambda wildcards,input: [ os.path.join(outdir,f) for f in input ]
shell: """
python {params.script} -outf {params.outfile} {params.input_fp}
"""

#rule velocity_to_seurat:
# input:
# indirs = expand("VelocytoCounts/{sample}",sample=samples)
# output:
# seurat = "Seurat/Velocyto/merged_samples.RDS"
# params:
# wdir = outdir + "/Seurat/Velocyto",
# samples = samples
# conda: CONDA_seurat3_ENV
# script: "../rscripts/scRNAseq_merge_loom.R"

#deprecate loom combination by loompy - > Seurat4 should be handling it in R

# def aggregate_input(wildcards):
# checkpoint_output = checkpoints.velocyto.get(sample=wildcards.sample).output["outdir"]
# return expand("VelocytoCounts/{sample}/{i}.loom",
# i=glob_wildcards(os.path.join(checkpoint_output, "{i}.loom")).i)

# rule combine_loom:
# input: aggregate_input
# output: "VelocytoCounts_merged/merged.loom"
# conda: CONDA_loompy_ENV
# params:
# outfile = outdir+"/VelocytoCounts_merged/merged.loom"
# run: """
# loompy.combine(files={input}, output_file={params.outfile}, key="Accession")
# """

2 changes: 1 addition & 1 deletion snakePipes/shared/rules/whatshap.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ rule BAMindex_allelic:
output:
expand("allelic_bams/{sample}.{suffix}.sorted.bam.bai",sample=samples,suffix=['allele_flagged', 'genome1', 'genome2', 'unassigned'])
conda: CONDA_SHARED_ENV
shell: "samtools index {input}"
shell: "samtools index -M {input}"
2 changes: 1 addition & 1 deletion snakePipes/workflows/WGBS/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def run_deeptools(GCbias):
if skipBamQC:
GCbias = False
if GCbias:
return (expand("QC_metrics/GCbias.freq.txt", sample = samples, read = reads) + expand("QC_metrics/GCbias.png", sample = samples, read = reads))
return ["QC_metrics/GCbias.freq.txt","QC_metrics/GCbias.png"]
else:
return ([])

Expand Down
4 changes: 4 additions & 0 deletions snakePipes/workflows/mRNAseq/internals.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ if "allelic-whatshap" in mode:
if not os.path.exists(pvcf):
print("Phased vcf file " + str(pvcf) + " not found!")
exit(1)
if pvcf and os.path.splitext(pvcf)[1] == ".gz":
if not os.path.exists(pvcf + ".tbi"):
print("A gzipped vcf file was provided but the index is missing. Please index the vcf.gz file with tabix.")
exit(1)

if formula and not sampleSheet:
print("In order to apply custom formula, please provide a sample sheet!")
Expand Down
4 changes: 2 additions & 2 deletions snakePipes/workflows/scRNAseq/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ def run_velocyto(skipVelocyto):
if not skipVelocyto :
if mode == "STARsolo":
file_list = [
expand("VelocytoCounts/{sample}.done.txt",sample=samples),
"VelocytoCounts_merged/merged.loom"
expand("VelocytoCounts/{sample}.done.txt",sample=samples)#,
# "VelocytoCounts_merged/merged.loom"
]
elif mode == "Alevin":
file_list = [expand("AlevinForVelocity/{sample}/alevin/quants_mat.gz",sample=samples),
Expand Down
Loading

0 comments on commit f5180fe

Please sign in to comment.