Skip to content

Commit

Permalink
bowtie2 build fix (#1080)
Browse files Browse the repository at this point in the history
* bowtie2 build fix

* CSAW changes

* fix

* fix

* fix

* fix

* fix

* fix

* bam filtering mrnaseq

* fix

* fix

* fix linkbam

* mq string

* wgbs

* fix multiqc

* fix

* pytest

* fix

* fix

* fix

* fix

* multiqc

* fix

* fix

* wgbs

* wgbs

* wgbs

* fix

* fix wgbs

* fix

* fix

* fix

* fix

* fix

* fix

* does this work

* fix salmon

* fix

* pytest

* test checkpoint

* pytest

* sleuth

* fixes sleuth

* sleuth fix

* fix deseq2 salmon

* fix

* fix

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

* test dag

---------

Co-authored-by: [email protected] <[email protected]>
  • Loading branch information
katsikora and [email protected] authored Dec 20, 2024
1 parent 3608667 commit 4add952
Show file tree
Hide file tree
Showing 27 changed files with 258 additions and 190 deletions.
60 changes: 30 additions & 30 deletions .ci_stuff/test_dag.sh

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion snakePipes/shared/profiles/local/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -205,4 +205,4 @@ set-resources:
velo_to_sce:
mem: 30G
velocyto:
mem: 20G
mem: 20G
2 changes: 2 additions & 0 deletions snakePipes/shared/rscripts/DB_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,8 @@ writeOutput_chip <- function(chipResultObject, outfile_prefix, fdrcutoff,lfccuto
full_res[,2]<-full_res[,2]-1
full_res[,2]<-format(full_res[,2], scientific = FALSE,trim=TRUE)
full_res[,3]<-format(full_res[,3], scientific = FALSE,trim=TRUE)
#write full results
write.table(full_res,file="Full.results.bed",row.names=FALSE,col.names=FALSE,sep="\t",quote=FALSE)
##filter full result for FDR and LFC and write to output
full_res.filt<-subset(full_res,(FDR<=fdrcutoff)&(abs(best.logFC)>=lfccutoff))
if(nrow(full_res.filt)>0){
Expand Down
34 changes: 30 additions & 4 deletions snakePipes/shared/rscripts/sleuth.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,43 @@ sample_info = read.table(sample_info_file, header=T)#[,1:2]
cnames.sub<-unique(colnames(sample_info)[2:which(colnames(sample_info) %in% "condition")])
d<-as.formula(noquote(paste0("~",paste(cnames.sub,collapse="+"))))
colnames(sample_info)[colnames(sample_info) %in% "name"] ="sample"

#check if sample names are pure numbers
numbers_only <- function(x) !grepl("\\D", x)
if(any(numbers_only(sample_info$sample))){sample_info$sample[numbers_only(sample_info$sample)]<-paste0("X",sample_info$sample[numbers_only(sample_info$sample)])}
rownames(sample_info)<-sample_info$sample

print(sample_info)
sample_info$sample

sample_id = list.dirs(file.path(indir), recursive=F, full.names=F)
sample_id = sort(sample_id[grep('[^benchmark][^SalmonIndex]', sample_id, invert=F)])

###MODIFY THIS PART
#sample_id = list.dirs(file.path(indir), recursive=F, full.names=F)
#sample_id = sort(sample_id[grep('[^benchmark][^SalmonIndex]', sample_id, invert=F)])
#if(any(numbers_only(sample_id))){sample_id[numbers_only(sample_id)]<-paste0("X",sample_id[numbers_only(sample_id)])}
#sample_id = intersect(sample_info$sample, sample_id) # get only those sample that are defined in the sampleInfo!
sample_id<-sample_id[match(sample_info$sample,sample_id)]
#sample_id<-sample_id[match(sample_info$sample,sample_id)]
#print(sample_id)
#if(any(is.na(sample_id))){stop("Sample names from sample sheet and from Salmon output are not matching each other.")}

#salmon_dirs = sapply(sample_id, function(id) file.path(indir, id))
#print(salmon_dirs)
######
sample_id<- dir(file.path(indir),recursive=FALSE,full.names=TRUE)
sample_id<-sample_id[grep("*quant.sf",sample_id)]
sample_id<-sub(".quant.sf","",sample_id)
names(sample_id)<-basename(sample_id)
print(sample_id)

if(any(numbers_only(names(sample_id)))){names(sample_id)[numbers_only(names(sample_id))]<-paste0("X",names(sample_id)[numbers_only(names(sample_id))])}

sample_id<-sample_id[match(sample_info$sample,names(sample_id))]
print(sample_id)
if(any(is.na(names(sample_id)))){stop("Sample names from sample sheet and from Salmon output are not matching each other.")}

salmon_dirs = sapply(sample_id, function(id) file.path(indir, id))
salmon_dirs = sample_id
print(salmon_dirs)
##########

s2c = mutate(sample_info, path=salmon_dirs)
## reorder conditions (for Wald test later on: order of comparison important for fold change)
Expand Down
3 changes: 2 additions & 1 deletion snakePipes/shared/rules/CSAW.multiComp.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ rule CSAW:
output:
"{}/CSAW.session_info.txt".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")),
"{}/DiffBinding_analysis.Rdata".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")),
expand("{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{{compGroup}}.tsv")) + "/Filtered.results.{change_dir}.bed", change_dir=change_direction)
expand("{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{{compGroup}}.tsv")) + "/Filtered.results.{change_dir}.bed", change_dir=change_direction),
"{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) + "/Full.results.bed"
benchmark:
"{}/.benchmark/CSAW.benchmark".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv"))
params:
Expand Down
3 changes: 2 additions & 1 deletion snakePipes/shared/rules/CSAW.singleComp.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ rule CSAW:
output:
"CSAW_{}_{}/CSAW.session_info.txt".format(peakCaller, sample_name),
"CSAW_{}_{}/DiffBinding_analysis.Rdata".format(peakCaller, sample_name),
expand("CSAW_{}_{}".format(peakCaller, sample_name) + "/Filtered.results.{change_dir}.bed", change_dir=change_direction)
expand("CSAW_{}_{}".format(peakCaller, sample_name) + "/Filtered.results.{change_dir}.bed", change_dir=change_direction),
"CSAW_{}_{}".format(peakCaller, sample_name) + "/Full.results.bed"
benchmark:
"CSAW_{}_{}/.benchmark/CSAW.benchmark".format(peakCaller, sample_name)
params:
Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/rules/DESeq2.singleComp.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ rule DESeq2_Salmon_basic:
"{}/.benchmark/DESeq2.Salmon.benchmark".format(get_outdir("DESeq2_Salmon",sampleSheet))
params:
script=os.path.join(maindir, "shared", "rscripts", "DESeq2.R"),
sampleSheet = lambda wildcards,input: input.sampleSheet,
outdir = get_outdir("DESeq2_Salmon",sampleSheet),
fdr = fdr,
importfunc = os.path.join(maindir, "shared", "rscripts", "DE_functions.R"),
Expand All @@ -67,6 +68,7 @@ rule DESeq2_Salmon_allelic:
"{}/.benchmark/DESeq2.SalmonAllelic.benchmark".format(get_outdir("DESeq2_SalmonAllelic",sampleSheet))
params:
script=os.path.join(maindir, "shared", "rscripts", "DESeq2.R"),
sampleSheet = lambda wildcards,input: input.sampleSheet,
outdir = get_outdir("DESeq2_SalmonAllelic",sampleSheet),
fdr = fdr,
importfunc = os.path.join(maindir, "shared", "rscripts", "DE_functions.R"),
Expand Down
33 changes: 17 additions & 16 deletions snakePipes/shared/rules/LinkBam.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,42 +29,43 @@ else:
input:
indir + "/{sample}" + bamExt
output:
aligner + "/{sample}.unsorted.bam" if pipeline=="ncRNAseq" else aligner + "/{sample}.bam"
aligner + "/{sample}.unsorted.bam" if pipeline=="ncRNAseq" else aligner + "/{sample}.markdup.bam"
params:
input_bai = indir + "/{sample}" + bamExt + ".bai",
output_bai = aligner + "/{sample}.unsorted.bam.bai" if pipeline=="ncRNAseq" else aligner + "/{sample}.bam.bai"
output_bai = aligner + "/{sample}.unsorted.bam.bai" if pipeline=="ncRNAseq" else aligner + "/{sample}.markdup.bam.bai"
run:
if os.path.exists(params.input_bai) and not os.path.exists(os.path.join(outdir,params.output_bai)):
os.symlink(params.input_bai,os.path.join(outdir,params.output_bai))
if not os.path.exists(os.path.join(outdir,output[0])):
os.symlink(os.path.join(outdir,input[0]),os.path.join(outdir,output[0]))


if not pipeline=="ncRNAseq":
rule samtools_index_external:
input:
aligner + "/{sample}.bam"
aligner + "/{sample}.markdup.bam"
output:
aligner + "/{sample}.bam.bai"
aligner + "/{sample}.markdup.bam.bai"
conda: CONDA_SHARED_ENV
shell: "if [[ ! -f {output[0]} ]]; then samtools index {input[0]}; fi"

if not pipeline=="WGBS" or pipeline=="WGBS" and skipBamQC:
rule link_bam_bai_external:
input:
bam = aligner + "/{sample}.bam",
bai = aligner + "/{sample}.bam.bai"
output:
bam_out = "filtered_bam/{sample}.filtered.bam",
bai_out = "filtered_bam/{sample}.filtered.bam.bai",
shell: """
ln -s ../{input.bam} {output.bam_out};
ln -s ../{input.bai} {output.bai_out}

rule link_bam_bai_external:
input:
bam = aligner + "/{sample}.markdup.bam",
bai = aligner + "/{sample}.markdup.bam.bai"
output:
bam_out = "filtered_bam/{sample}.filtered.bam",
bai_out = "filtered_bam/{sample}.filtered.bam.bai",
shell: """
ln -s ../{input.bam} {output.bam_out};
ln -s ../{input.bai} {output.bai_out}
"""


rule sambamba_flagstat:
input:
aligner + "/{sample}.bam"
aligner + "/{sample}.markdup.bam"
output:
"Sambamba/{sample}.markdup.txt"
conda: CONDA_SAMBAMBA_ENV
Expand Down
10 changes: 5 additions & 5 deletions snakePipes/shared/rules/SNPsplit.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ elif aligner == "STAR" or aligner == "EXTERNAL_BAM":
rule snp_split:
input:
snp = SNPFile,
bam = aligner+"/{sample}.bam"
bam = aligner+"/{sample}.markdup.bam"
output:
targetbam = expand("allelic_bams/{{sample}}.{suffix}.bam", suffix = ['allele_flagged', 'genome1', 'genome2', 'unassigned']),
targetbam = expand("allelic_bams/{{sample}}.markdup.{suffix}.bam", suffix = ['allele_flagged', 'genome1', 'genome2', 'unassigned']),
#tempbam = temp(aligner+"/{sample}.sortedByName.bam"),
rep1 = "allelic_bams/{sample}.SNPsplit_report.yaml",
rep2 = "allelic_bams/{sample}.SNPsplit_sort.yaml"
rep1 = "allelic_bams/{sample}.markdup.SNPsplit_report.yaml",
rep2 = "allelic_bams/{sample}.markdup.SNPsplit_sort.yaml"
params:
pairedEnd = '--paired' if pairedEnd else '',
outdir = "allelic_bams"
Expand Down Expand Up @@ -62,7 +62,7 @@ elif aligner == "STAR" or aligner == "EXTERNAL_BAM":

# sort them
rule BAMsort_allelic:
input: "allelic_bams/{sample}.filtered.{suffix}.bam" if aligner == "Bowtie2" else "allelic_bams/{sample}.{suffix}.bam"
input: "allelic_bams/{sample}.filtered.{suffix}.bam" if aligner == "Bowtie2" else "allelic_bams/{sample}.markdup.{suffix}.bam"
output:
"allelic_bams/{sample}.{suffix}.sorted.bam"
threads:
Expand Down
2 changes: 1 addition & 1 deletion snakePipes/shared/rules/Salmon.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ if pairedEnd:
gtf = genes_gtf,
lib_type = getSalmon_libtype(pairedEnd, libraryType)
threads: 8
conda: CONDA_RNASEQ_ENV
conda: CONDA_SALMON_ENV
shell: """
salmon quant -p {threads} --softclipOverhangs --validateMappings --numBootstraps 50 -i {params.index} -l {params.lib_type} -1 {input.r1} -2 {input.r2} -o {params.outdir}
"""
Expand Down
114 changes: 57 additions & 57 deletions snakePipes/shared/rules/WGBS.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ if pairedEnd and not fromBAM:
r1=fastq_dir + "/{sample}" + reads[0] + ".fastq.gz",
r2=fastq_dir + "/{sample}" + reads[1] + ".fastq.gz"
output:
sbam=temp(aligner+"/{sample}.bam")
sbam=temp(aligner+"/{sample}.sorted.bam")
params:
bwameth_index=bwameth_index if aligner=="bwameth" else bwameth2_index,
tempDir = tempDir
Expand All @@ -43,7 +43,7 @@ elif not pairedEnd and not fromBAM:
input:
r1=fastq_dir + "/{sample}" + reads[0] + ".fastq.gz",
output:
sbam=temp(aligner+"/{sample}.bam")
sbam=temp(aligner+"/{sample}.sorted.bam")
params:
bwameth_index=bwameth_index if aligner=="bwameth" else bwameth2_index,
tempDir = tempDir
Expand All @@ -57,59 +57,59 @@ elif not pairedEnd and not fromBAM:
rm -rf "$MYTEMP"
"""

if not fromBAM:
rule index_bam:
input:
aligner+"/{sample}.bam"
output:
temp(aligner+"/{sample}.bam.bai")
conda: CONDA_SHARED_ENV
shell: """
samtools index "{input}"
"""

if not skipBamQC:
rule markDupes:
input:
aligner+"/{sample}.bam",
aligner+"/{sample}.bam.bai"
output:
"Sambamba/{sample}.markdup.bam"
threads: lambda wildcards: 10 if 10<max_thread else max_thread
params:
tempDir = tempDir
conda: CONDA_SAMBAMBA_ENV
shell: """
TMPDIR={params.tempDir}
MYTEMP=$(mktemp -d "${{TMPDIR:-/tmp}}"/snakepipes.XXXXXXXXXX)
sambamba markdup --overflow-list-size 600000 -t {threads} --tmpdir "$MYTEMP/{wildcards.sample}" "{input[0]}" "{output}"
rm -rf "$MYTEMP"
"""


rule indexMarkDupes:
input:
"Sambamba/{sample}.markdup.bam"
output:
"Sambamba/{sample}.markdup.bam.bai"
params:
threads: 1
conda: CONDA_SHARED_ENV
shell: """
samtools index "{input}"
"""

rule link_deduped_bam:
input:
bam="Sambamba/{sample}.markdup.bam",
bai="Sambamba/{sample}.markdup.bam.bai"
output:
bam = "filtered_bam/{sample}.filtered.bam",
bai = "filtered_bam/{sample}.filtered.bam.bai"
shell: """
ln -s ../{input.bam} {output.bam}
ln -s ../{input.bai} {output.bai}
"""
#if not fromBAM:
# rule index_bam:
# input:
# aligner+"/{sample}.sorted.bam"
# output:
# temp(aligner+"/{sample}.sorted.bam.bai")
# conda: CONDA_SHARED_ENV
# shell: """
# samtools index "{input}"
# """

#if not skipBamQC:
# rule markDupes:
# input:
# aligner+"/{sample}.sorted.bam",
# aligner+"/{sample}.sorted.bam.bai"
# output:
# "Sambamba/{sample}.markdup.bam"
# threads: lambda wildcards: 10 if 10<max_thread else max_thread
# params:
# tempDir = tempDir
# conda: CONDA_SAMBAMBA_ENV
# shell: """
# TMPDIR={params.tempDir}
# MYTEMP=$(mktemp -d "${{TMPDIR:-/tmp}}"/snakepipes.XXXXXXXXXX)
# sambamba markdup --overflow-list-size 600000 -t {threads} --tmpdir "$MYTEMP/{wildcards.sample}" "{input[0]}" "{output}"
# rm -rf "$MYTEMP"
# """


# rule indexMarkDupes:
# input:
# "Sambamba/{sample}.markdup.bam"
# output:
# "Sambamba/{sample}.markdup.bam.bai"
# params:
# threads: 1
# conda: CONDA_SHARED_ENV
# shell: """
# samtools index "{input}"
# """

# rule link_deduped_bam:
# input:
# bam="Sambamba/{sample}.markdup.bam",
# bai="Sambamba/{sample}.markdup.bam.bai"
# output:
# bam = "filtered_bam/{sample}.filtered.bam",
# bai = "filtered_bam/{sample}.filtered.bam.bai"
# shell: """
# ln -s ../{input.bam} {output.bam}
# ln -s ../{input.bai} {output.bai}
# """


rule getRandomCpGs:
Expand Down Expand Up @@ -174,7 +174,7 @@ rule calc_Mbias:
threads: lambda wildcards: 10 if 10<max_thread else max_thread
conda: CONDA_WGBS_ENV
shell: """
MethylDackel mbias -@ {threads} {params.genome} {input[0]} QC_metrics/{wildcards.sample}
MethylDackel mbias -@ {threads} {params.genome} {input[0]} QC_metrics/{wildcards.sample} 2> {output}
"""


Expand All @@ -189,7 +189,7 @@ rule calcCHHbias:
threads: lambda wildcards: 10 if 10<max_thread else max_thread
conda: CONDA_WGBS_ENV
shell: """
MethylDackel mbias -@ {threads} --CHH --noCpG --noSVG {params.genome} {input[0]} QC_metrics/{wildcards.sample}
MethylDackel mbias -@ {threads} --CHH --noCpG --noSVG {params.genome} {input[0]} QC_metrics/{wildcards.sample} 2> {output}
"""


Expand Down
2 changes: 1 addition & 1 deletion snakePipes/shared/rules/bam_filtering.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ bam_filter_string = "{} {} {}".format("-F 1024" if dedup else "", "-f 2" if prop

rule samtools_filter:
input:
aligner+"/{sample}.bam"
aligner+"/{sample}.markdup.bam"
output:
bam = temp("filtered_bam/{sample}.filtered.tmp.bam")
params:
Expand Down
8 changes: 4 additions & 4 deletions snakePipes/shared/rules/deepTools_qc.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

rule bamCoverage:
input:
bam = aligner+"/{sample}.bam",
bai = aligner+"/{sample}.bam.bai"
bam = aligner+"/{sample}.markdup.bam",
bai = aligner+"/{sample}.markdup.bam.bai"
output:
"bamCoverage/{sample}.seq_depth_norm.bw"
params:
Expand Down Expand Up @@ -143,8 +143,8 @@ rule plotPCA:

rule estimate_read_filtering:
input:
bam = aligner+"/{sample}.bam",
bai = aligner+"/{sample}.bam.bai"
bam = aligner+"/{sample}.markdup.bam",
bai = aligner+"/{sample}.markdup.bam.bai"
output:
"deepTools_qc/estimateReadFiltering/{sample}_filtering_estimation.txt"
conda: CONDA_SHARED_ENV
Expand Down
1 change: 1 addition & 0 deletions snakePipes/shared/rules/envs/sleuth.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ dependencies:
- r-base = 4.2.0
- r-sleuth = 0.30.1
- r-dplyr
- r-gtools
Loading

0 comments on commit 4add952

Please sign in to comment.