Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bowtie2 build fix #1080

Merged
merged 78 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
78 commits
Select commit Hold shift + click to select a range
3014b75
bowtie2 build fix
Sep 12, 2024
cd44eb8
Merge branch 'develop' into KS
katsikora Nov 15, 2024
4b2944e
CSAW changes
Nov 15, 2024
12f29d0
fix
Nov 15, 2024
83a31ce
fix
Nov 15, 2024
70925ac
fix
Nov 15, 2024
bcb4294
fix
Nov 15, 2024
b21de8d
fix
Nov 15, 2024
8388496
fix
Nov 15, 2024
d779c64
bam filtering mrnaseq
Nov 18, 2024
5951040
fix
Nov 18, 2024
4f82236
fix
Nov 18, 2024
d8f8e44
fix linkbam
Nov 18, 2024
3e6c010
mq string
Nov 18, 2024
92081f4
wgbs
Nov 18, 2024
a04bc52
fix multiqc
Nov 18, 2024
1fb1870
fix
Nov 19, 2024
5849acd
pytest
Nov 19, 2024
f230897
fix
Nov 19, 2024
b3cb968
fix
Nov 21, 2024
8ae73a8
fix
Nov 21, 2024
7b2b6e2
fix
Nov 22, 2024
f00b7ea
multiqc
Nov 22, 2024
8151385
fix
Nov 22, 2024
9cd07d9
fix
Nov 22, 2024
67da5b0
wgbs
Nov 22, 2024
61a81fb
wgbs
Nov 22, 2024
90d1a5e
wgbs
Nov 22, 2024
e7ff819
fix
Nov 28, 2024
eb89ab3
fix wgbs
Nov 28, 2024
6d66747
fix
Nov 28, 2024
c49aa33
fix
Nov 28, 2024
8bae9f4
fix
Nov 29, 2024
1303c00
fix
Nov 29, 2024
3f3fd4a
fix
Nov 29, 2024
7fdd87b
fix
Nov 29, 2024
06b0fca
does this work
Nov 29, 2024
8b26004
fix salmon
Dec 2, 2024
3c4b764
fix
Dec 2, 2024
32e77bb
pytest
Dec 2, 2024
629ee69
test checkpoint
Dec 2, 2024
6c039ab
pytest
Dec 2, 2024
6fcf1c2
sleuth
Dec 2, 2024
cf96962
fixes sleuth
Dec 3, 2024
ac13752
sleuth fix
Dec 3, 2024
e3fc37c
fix deseq2 salmon
Dec 3, 2024
4db093e
fix
Dec 16, 2024
479ff0c
fix
Dec 19, 2024
7797ddd
test dag
Dec 19, 2024
bb95e35
test dag
Dec 19, 2024
effd074
test dag
Dec 19, 2024
7ab2402
test dag
Dec 19, 2024
d714596
test dag
Dec 19, 2024
05773f3
test dag
Dec 19, 2024
aac3712
test dag
Dec 19, 2024
53e2183
test dag
Dec 19, 2024
fe2b063
test dag
Dec 19, 2024
fde2a06
test dag
Dec 19, 2024
7d8662c
test dag
Dec 19, 2024
3cf3373
test dag
Dec 19, 2024
9e57d91
test dag
Dec 19, 2024
18b332c
test dag
Dec 19, 2024
123bb64
test dag
Dec 19, 2024
6f35785
test dag
Dec 19, 2024
11d92b1
test dag
Dec 19, 2024
57ba11f
test dag
Dec 20, 2024
7212ad1
test dag
Dec 20, 2024
3d007ca
test dag
Dec 20, 2024
29f5096
test dag
Dec 20, 2024
fa7de3f
test dag
Dec 20, 2024
1897402
test dag
Dec 20, 2024
620df6e
test dag
Dec 20, 2024
4d58489
test dag
Dec 20, 2024
29b7365
test dag
Dec 20, 2024
92e4627
test dag
Dec 20, 2024
1ff8aff
test dag
Dec 20, 2024
4b3bbd7
test dag
Dec 20, 2024
9f42aec
test dag
Dec 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 30 additions & 30 deletions .ci_stuff/test_dag.sh

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion snakePipes/shared/profiles/local/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -205,4 +205,4 @@ set-resources:
velo_to_sce:
mem: 30G
velocyto:
mem: 20G
mem: 20G
2 changes: 2 additions & 0 deletions snakePipes/shared/rscripts/DB_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,8 @@ writeOutput_chip <- function(chipResultObject, outfile_prefix, fdrcutoff,lfccuto
full_res[,2]<-full_res[,2]-1
full_res[,2]<-format(full_res[,2], scientific = FALSE,trim=TRUE)
full_res[,3]<-format(full_res[,3], scientific = FALSE,trim=TRUE)
#write full results
write.table(full_res,file="Full.results.bed",row.names=FALSE,col.names=FALSE,sep="\t",quote=FALSE)
##filter full result for FDR and LFC and write to output
full_res.filt<-subset(full_res,(FDR<=fdrcutoff)&(abs(best.logFC)>=lfccutoff))
if(nrow(full_res.filt)>0){
Expand Down
34 changes: 30 additions & 4 deletions snakePipes/shared/rscripts/sleuth.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,43 @@ sample_info = read.table(sample_info_file, header=T)#[,1:2]
cnames.sub<-unique(colnames(sample_info)[2:which(colnames(sample_info) %in% "condition")])
d<-as.formula(noquote(paste0("~",paste(cnames.sub,collapse="+"))))
colnames(sample_info)[colnames(sample_info) %in% "name"] ="sample"

#check if sample names are pure numbers
numbers_only <- function(x) !grepl("\\D", x)
if(any(numbers_only(sample_info$sample))){sample_info$sample[numbers_only(sample_info$sample)]<-paste0("X",sample_info$sample[numbers_only(sample_info$sample)])}
rownames(sample_info)<-sample_info$sample

print(sample_info)
sample_info$sample

sample_id = list.dirs(file.path(indir), recursive=F, full.names=F)
sample_id = sort(sample_id[grep('[^benchmark][^SalmonIndex]', sample_id, invert=F)])

###MODIFY THIS PART
#sample_id = list.dirs(file.path(indir), recursive=F, full.names=F)
#sample_id = sort(sample_id[grep('[^benchmark][^SalmonIndex]', sample_id, invert=F)])
#if(any(numbers_only(sample_id))){sample_id[numbers_only(sample_id)]<-paste0("X",sample_id[numbers_only(sample_id)])}
#sample_id = intersect(sample_info$sample, sample_id) # get only those sample that are defined in the sampleInfo!
sample_id<-sample_id[match(sample_info$sample,sample_id)]
#sample_id<-sample_id[match(sample_info$sample,sample_id)]
#print(sample_id)
#if(any(is.na(sample_id))){stop("Sample names from sample sheet and from Salmon output are not matching each other.")}

#salmon_dirs = sapply(sample_id, function(id) file.path(indir, id))
#print(salmon_dirs)
######
sample_id<- dir(file.path(indir),recursive=FALSE,full.names=TRUE)
sample_id<-sample_id[grep("*quant.sf",sample_id)]
sample_id<-sub(".quant.sf","",sample_id)
names(sample_id)<-basename(sample_id)
print(sample_id)

if(any(numbers_only(names(sample_id)))){names(sample_id)[numbers_only(names(sample_id))]<-paste0("X",names(sample_id)[numbers_only(names(sample_id))])}

sample_id<-sample_id[match(sample_info$sample,names(sample_id))]
print(sample_id)
if(any(is.na(names(sample_id)))){stop("Sample names from sample sheet and from Salmon output are not matching each other.")}

salmon_dirs = sapply(sample_id, function(id) file.path(indir, id))
salmon_dirs = sample_id
print(salmon_dirs)
##########

s2c = mutate(sample_info, path=salmon_dirs)
## reorder conditions (for Wald test later on: order of comparison important for fold change)
Expand Down
3 changes: 2 additions & 1 deletion snakePipes/shared/rules/CSAW.multiComp.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ rule CSAW:
output:
"{}/CSAW.session_info.txt".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")),
"{}/DiffBinding_analysis.Rdata".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")),
expand("{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{{compGroup}}.tsv")) + "/Filtered.results.{change_dir}.bed", change_dir=change_direction)
expand("{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{{compGroup}}.tsv")) + "/Filtered.results.{change_dir}.bed", change_dir=change_direction),
"{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) + "/Full.results.bed"
benchmark:
"{}/.benchmark/CSAW.benchmark".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv"))
params:
Expand Down
3 changes: 2 additions & 1 deletion snakePipes/shared/rules/CSAW.singleComp.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ rule CSAW:
output:
"CSAW_{}_{}/CSAW.session_info.txt".format(peakCaller, sample_name),
"CSAW_{}_{}/DiffBinding_analysis.Rdata".format(peakCaller, sample_name),
expand("CSAW_{}_{}".format(peakCaller, sample_name) + "/Filtered.results.{change_dir}.bed", change_dir=change_direction)
expand("CSAW_{}_{}".format(peakCaller, sample_name) + "/Filtered.results.{change_dir}.bed", change_dir=change_direction),
"CSAW_{}_{}".format(peakCaller, sample_name) + "/Full.results.bed"
benchmark:
"CSAW_{}_{}/.benchmark/CSAW.benchmark".format(peakCaller, sample_name)
params:
Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/rules/DESeq2.singleComp.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ rule DESeq2_Salmon_basic:
"{}/.benchmark/DESeq2.Salmon.benchmark".format(get_outdir("DESeq2_Salmon",sampleSheet))
params:
script=os.path.join(maindir, "shared", "rscripts", "DESeq2.R"),
sampleSheet = lambda wildcards,input: input.sampleSheet,
outdir = get_outdir("DESeq2_Salmon",sampleSheet),
fdr = fdr,
importfunc = os.path.join(maindir, "shared", "rscripts", "DE_functions.R"),
Expand All @@ -67,6 +68,7 @@ rule DESeq2_Salmon_allelic:
"{}/.benchmark/DESeq2.SalmonAllelic.benchmark".format(get_outdir("DESeq2_SalmonAllelic",sampleSheet))
params:
script=os.path.join(maindir, "shared", "rscripts", "DESeq2.R"),
sampleSheet = lambda wildcards,input: input.sampleSheet,
outdir = get_outdir("DESeq2_SalmonAllelic",sampleSheet),
fdr = fdr,
importfunc = os.path.join(maindir, "shared", "rscripts", "DE_functions.R"),
Expand Down
33 changes: 17 additions & 16 deletions snakePipes/shared/rules/LinkBam.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,42 +29,43 @@ else:
input:
indir + "/{sample}" + bamExt
output:
aligner + "/{sample}.unsorted.bam" if pipeline=="ncRNAseq" else aligner + "/{sample}.bam"
aligner + "/{sample}.unsorted.bam" if pipeline=="ncRNAseq" else aligner + "/{sample}.markdup.bam"
params:
input_bai = indir + "/{sample}" + bamExt + ".bai",
output_bai = aligner + "/{sample}.unsorted.bam.bai" if pipeline=="ncRNAseq" else aligner + "/{sample}.bam.bai"
output_bai = aligner + "/{sample}.unsorted.bam.bai" if pipeline=="ncRNAseq" else aligner + "/{sample}.markdup.bam.bai"
run:
if os.path.exists(params.input_bai) and not os.path.exists(os.path.join(outdir,params.output_bai)):
os.symlink(params.input_bai,os.path.join(outdir,params.output_bai))
if not os.path.exists(os.path.join(outdir,output[0])):
os.symlink(os.path.join(outdir,input[0]),os.path.join(outdir,output[0]))


if not pipeline=="ncRNAseq":
rule samtools_index_external:
input:
aligner + "/{sample}.bam"
aligner + "/{sample}.markdup.bam"
output:
aligner + "/{sample}.bam.bai"
aligner + "/{sample}.markdup.bam.bai"
conda: CONDA_SHARED_ENV
shell: "if [[ ! -f {output[0]} ]]; then samtools index {input[0]}; fi"

if not pipeline=="WGBS" or pipeline=="WGBS" and skipBamQC:
rule link_bam_bai_external:
input:
bam = aligner + "/{sample}.bam",
bai = aligner + "/{sample}.bam.bai"
output:
bam_out = "filtered_bam/{sample}.filtered.bam",
bai_out = "filtered_bam/{sample}.filtered.bam.bai",
shell: """
ln -s ../{input.bam} {output.bam_out};
ln -s ../{input.bai} {output.bai_out}

rule link_bam_bai_external:
input:
bam = aligner + "/{sample}.markdup.bam",
bai = aligner + "/{sample}.markdup.bam.bai"
output:
bam_out = "filtered_bam/{sample}.filtered.bam",
bai_out = "filtered_bam/{sample}.filtered.bam.bai",
shell: """
ln -s ../{input.bam} {output.bam_out};
ln -s ../{input.bai} {output.bai_out}
"""


rule sambamba_flagstat:
input:
aligner + "/{sample}.bam"
aligner + "/{sample}.markdup.bam"
output:
"Sambamba/{sample}.markdup.txt"
conda: CONDA_SAMBAMBA_ENV
Expand Down
10 changes: 5 additions & 5 deletions snakePipes/shared/rules/SNPsplit.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ elif aligner == "STAR" or aligner == "EXTERNAL_BAM":
rule snp_split:
input:
snp = SNPFile,
bam = aligner+"/{sample}.bam"
bam = aligner+"/{sample}.markdup.bam"
output:
targetbam = expand("allelic_bams/{{sample}}.{suffix}.bam", suffix = ['allele_flagged', 'genome1', 'genome2', 'unassigned']),
targetbam = expand("allelic_bams/{{sample}}.markdup.{suffix}.bam", suffix = ['allele_flagged', 'genome1', 'genome2', 'unassigned']),
#tempbam = temp(aligner+"/{sample}.sortedByName.bam"),
rep1 = "allelic_bams/{sample}.SNPsplit_report.yaml",
rep2 = "allelic_bams/{sample}.SNPsplit_sort.yaml"
rep1 = "allelic_bams/{sample}.markdup.SNPsplit_report.yaml",
rep2 = "allelic_bams/{sample}.markdup.SNPsplit_sort.yaml"
params:
pairedEnd = '--paired' if pairedEnd else '',
outdir = "allelic_bams"
Expand Down Expand Up @@ -62,7 +62,7 @@ elif aligner == "STAR" or aligner == "EXTERNAL_BAM":

# sort them
rule BAMsort_allelic:
input: "allelic_bams/{sample}.filtered.{suffix}.bam" if aligner == "Bowtie2" else "allelic_bams/{sample}.{suffix}.bam"
input: "allelic_bams/{sample}.filtered.{suffix}.bam" if aligner == "Bowtie2" else "allelic_bams/{sample}.markdup.{suffix}.bam"
output:
"allelic_bams/{sample}.{suffix}.sorted.bam"
threads:
Expand Down
2 changes: 1 addition & 1 deletion snakePipes/shared/rules/Salmon.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ if pairedEnd:
gtf = genes_gtf,
lib_type = getSalmon_libtype(pairedEnd, libraryType)
threads: 8
conda: CONDA_RNASEQ_ENV
conda: CONDA_SALMON_ENV
shell: """
salmon quant -p {threads} --softclipOverhangs --validateMappings --numBootstraps 50 -i {params.index} -l {params.lib_type} -1 {input.r1} -2 {input.r2} -o {params.outdir}
"""
Expand Down
114 changes: 57 additions & 57 deletions snakePipes/shared/rules/WGBS.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ if pairedEnd and not fromBAM:
r1=fastq_dir + "/{sample}" + reads[0] + ".fastq.gz",
r2=fastq_dir + "/{sample}" + reads[1] + ".fastq.gz"
output:
sbam=temp(aligner+"/{sample}.bam")
sbam=temp(aligner+"/{sample}.sorted.bam")
params:
bwameth_index=bwameth_index if aligner=="bwameth" else bwameth2_index,
tempDir = tempDir
Expand All @@ -43,7 +43,7 @@ elif not pairedEnd and not fromBAM:
input:
r1=fastq_dir + "/{sample}" + reads[0] + ".fastq.gz",
output:
sbam=temp(aligner+"/{sample}.bam")
sbam=temp(aligner+"/{sample}.sorted.bam")
params:
bwameth_index=bwameth_index if aligner=="bwameth" else bwameth2_index,
tempDir = tempDir
Expand All @@ -57,59 +57,59 @@ elif not pairedEnd and not fromBAM:
rm -rf "$MYTEMP"
"""

if not fromBAM:
rule index_bam:
input:
aligner+"/{sample}.bam"
output:
temp(aligner+"/{sample}.bam.bai")
conda: CONDA_SHARED_ENV
shell: """
samtools index "{input}"
"""

if not skipBamQC:
rule markDupes:
input:
aligner+"/{sample}.bam",
aligner+"/{sample}.bam.bai"
output:
"Sambamba/{sample}.markdup.bam"
threads: lambda wildcards: 10 if 10<max_thread else max_thread
params:
tempDir = tempDir
conda: CONDA_SAMBAMBA_ENV
shell: """
TMPDIR={params.tempDir}
MYTEMP=$(mktemp -d "${{TMPDIR:-/tmp}}"/snakepipes.XXXXXXXXXX)
sambamba markdup --overflow-list-size 600000 -t {threads} --tmpdir "$MYTEMP/{wildcards.sample}" "{input[0]}" "{output}"
rm -rf "$MYTEMP"
"""


rule indexMarkDupes:
input:
"Sambamba/{sample}.markdup.bam"
output:
"Sambamba/{sample}.markdup.bam.bai"
params:
threads: 1
conda: CONDA_SHARED_ENV
shell: """
samtools index "{input}"
"""

rule link_deduped_bam:
input:
bam="Sambamba/{sample}.markdup.bam",
bai="Sambamba/{sample}.markdup.bam.bai"
output:
bam = "filtered_bam/{sample}.filtered.bam",
bai = "filtered_bam/{sample}.filtered.bam.bai"
shell: """
ln -s ../{input.bam} {output.bam}
ln -s ../{input.bai} {output.bai}
"""
#if not fromBAM:
# rule index_bam:
# input:
# aligner+"/{sample}.sorted.bam"
# output:
# temp(aligner+"/{sample}.sorted.bam.bai")
# conda: CONDA_SHARED_ENV
# shell: """
# samtools index "{input}"
# """

#if not skipBamQC:
# rule markDupes:
# input:
# aligner+"/{sample}.sorted.bam",
# aligner+"/{sample}.sorted.bam.bai"
# output:
# "Sambamba/{sample}.markdup.bam"
# threads: lambda wildcards: 10 if 10<max_thread else max_thread
# params:
# tempDir = tempDir
# conda: CONDA_SAMBAMBA_ENV
# shell: """
# TMPDIR={params.tempDir}
# MYTEMP=$(mktemp -d "${{TMPDIR:-/tmp}}"/snakepipes.XXXXXXXXXX)
# sambamba markdup --overflow-list-size 600000 -t {threads} --tmpdir "$MYTEMP/{wildcards.sample}" "{input[0]}" "{output}"
# rm -rf "$MYTEMP"
# """


# rule indexMarkDupes:
# input:
# "Sambamba/{sample}.markdup.bam"
# output:
# "Sambamba/{sample}.markdup.bam.bai"
# params:
# threads: 1
# conda: CONDA_SHARED_ENV
# shell: """
# samtools index "{input}"
# """

# rule link_deduped_bam:
# input:
# bam="Sambamba/{sample}.markdup.bam",
# bai="Sambamba/{sample}.markdup.bam.bai"
# output:
# bam = "filtered_bam/{sample}.filtered.bam",
# bai = "filtered_bam/{sample}.filtered.bam.bai"
# shell: """
# ln -s ../{input.bam} {output.bam}
# ln -s ../{input.bai} {output.bai}
# """


rule getRandomCpGs:
Expand Down Expand Up @@ -174,7 +174,7 @@ rule calc_Mbias:
threads: lambda wildcards: 10 if 10<max_thread else max_thread
conda: CONDA_WGBS_ENV
shell: """
MethylDackel mbias -@ {threads} {params.genome} {input[0]} QC_metrics/{wildcards.sample}
MethylDackel mbias -@ {threads} {params.genome} {input[0]} QC_metrics/{wildcards.sample} 2> {output}
"""


Expand All @@ -189,7 +189,7 @@ rule calcCHHbias:
threads: lambda wildcards: 10 if 10<max_thread else max_thread
conda: CONDA_WGBS_ENV
shell: """
MethylDackel mbias -@ {threads} --CHH --noCpG --noSVG {params.genome} {input[0]} QC_metrics/{wildcards.sample}
MethylDackel mbias -@ {threads} --CHH --noCpG --noSVG {params.genome} {input[0]} QC_metrics/{wildcards.sample} 2> {output}
"""


Expand Down
2 changes: 1 addition & 1 deletion snakePipes/shared/rules/bam_filtering.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ bam_filter_string = "{} {} {}".format("-F 1024" if dedup else "", "-f 2" if prop

rule samtools_filter:
input:
aligner+"/{sample}.bam"
aligner+"/{sample}.markdup.bam"
output:
bam = temp("filtered_bam/{sample}.filtered.tmp.bam")
params:
Expand Down
8 changes: 4 additions & 4 deletions snakePipes/shared/rules/deepTools_qc.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

rule bamCoverage:
input:
bam = aligner+"/{sample}.bam",
bai = aligner+"/{sample}.bam.bai"
bam = aligner+"/{sample}.markdup.bam",
bai = aligner+"/{sample}.markdup.bam.bai"
output:
"bamCoverage/{sample}.seq_depth_norm.bw"
params:
Expand Down Expand Up @@ -143,8 +143,8 @@ rule plotPCA:

rule estimate_read_filtering:
input:
bam = aligner+"/{sample}.bam",
bai = aligner+"/{sample}.bam.bai"
bam = aligner+"/{sample}.markdup.bam",
bai = aligner+"/{sample}.markdup.bam.bai"
output:
"deepTools_qc/estimateReadFiltering/{sample}_filtering_estimation.txt"
conda: CONDA_SHARED_ENV
Expand Down
1 change: 1 addition & 0 deletions snakePipes/shared/rules/envs/sleuth.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ dependencies:
- r-base = 4.2.0
- r-sleuth = 0.30.1
- r-dplyr
- r-gtools
Loading
Loading