Skip to content

Commit

Permalink
seacr lenient
Browse files Browse the repository at this point in the history
  • Loading branch information
[email protected] committed Apr 19, 2024
1 parent 7d94a6f commit da715d2
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 11 deletions.
57 changes: 54 additions & 3 deletions snakePipes/shared/rules/ChIP_peak_calling.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ rule prep_bedgraph:
bedtools genomecov -bg -i filtered_bedgraph/{params.sample}.fragments.bed -g {params.genome} > {output}
"""

rule SEACR_peaks:
rule SEACR_peaks_stringent:
input:
chip = "filtered_bedgraph/{chip_sample}.fragments.bedgraph",
control = lambda wildcards: "filtered_bedgraph/"+get_control(wildcards.chip_sample)+".fragments.bedgraph" if get_control(wildcards.chip_sample)
Expand All @@ -265,7 +265,24 @@ rule SEACR_peaks:
bash {params.script} {input.chip} {input.control} {params.fdr} "norm" "stringent" {params.prefix} 2>{log}
"""

rule SEACR_peak_qc:
rule SEACR_peaks_lenient:
input:
chip = "filtered_bedgraph/{chip_sample}.fragments.bedgraph",
control = lambda wildcards: "filtered_bedgraph/"+get_control(wildcards.chip_sample)+".fragments.bedgraph" if get_control(wildcards.chip_sample)
else []
output:
"SEACR/{chip_sample}.filtered.lenient.bed"
log: "SEACR/logs/{chip_sample}.log"
params:
fdr = lambda wildcards,input: fdr if not input.control else "",
prefix = os.path.join(outdir,"SEACR/{chip_sample}.filtered"),
script=os.path.join(maindir, "shared","tools/SEACR-1.3/SEACR_1.3.sh")
conda: CONDA_SEACR_ENV
shell: """
bash {params.script} {input.chip} {input.control} {params.fdr} "norm" "lenient" {params.prefix} 2>{log}
"""

rule SEACR_peak_stringent_qc:
input:
bam = "filtered_bam/{sample}.filtered.bam",
peaks = "SEACR/{sample}.filtered.stringent.bed"
Expand All @@ -274,7 +291,41 @@ rule SEACR_peak_qc:
params:
genome_index = genome_index
benchmark:
"SEACR/.benchmark/SEACR_peak_qc.{sample}.filtered.benchmark"
"SEACR/.benchmark/SEACR_peak_stringent_qc.{sample}.filtered.benchmark"
conda: CONDA_SHARED_ENV
shell: """
# get the number of peaks
peak_count=`wc -l < {input.peaks}`
# get the number of mapped reads
mapped_reads=`samtools view -c -F 4 {input.bam}`
#calculate the number of alignments overlapping the peaks
# exclude reads flagged as unmapped (unmapped reads will be reported when using -L)
reads_in_peaks=`samtools view -c -F 4 -L {input.peaks} {input.bam}`
# calculate Fraction of Reads In Peaks
frip=`bc -l <<< "$reads_in_peaks/$mapped_reads"`
# compute peak genome coverage
peak_len=`awk '{{total+=$3-$2}}END{{print total}}' {input.peaks}`
genome_size=`awk '{{total+=$3-$2}}END{{print total}}' {params.genome_index}`
genomecov=`bc -l <<< "$peak_len/$genome_size"`
# write peak-based QC metrics to output file
printf "peak_count\tFRiP\tpeak_genome_coverage\n%d\t%5.3f\t%6.4f\n" $peak_count $frip $genomecov > {output.qc}
"""


rule SEACR_peak_lenient_qc:
input:
bam = "filtered_bam/{sample}.filtered.bam",
peaks = "SEACR/{sample}.filtered.lenient.bed"
output:
qc = "SEACR/{sample}.filtered.lenient_peaks.qc.txt"
params:
genome_index = genome_index
benchmark:
"SEACR/.benchmark/SEACR_peak_lenient_qc.{sample}.filtered.benchmark"
conda: CONDA_SHARED_ENV
shell: """
# get the number of peaks
Expand Down
56 changes: 52 additions & 4 deletions snakePipes/shared/rules/ChIP_peak_calling_spikein.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ rule prep_bedgraph:
bigWigToBedGraph {input} {output}
"""

rule SEACR_peaks:
rule SEACR_peaks_stringent:
input:
chip = "filtered_bedgraph/{chip_sample}_host.fragments.bedgraph",
control = lambda wildcards: "filtered_bedgraph/"+get_control(wildcards.chip_sample)+"_host.fragments.bedgraph" if get_control(wildcards.chip_sample)
Expand All @@ -265,8 +265,25 @@ rule SEACR_peaks:
bash {params.script} {input.chip} {input.control} {params.fdr} "non" "stringent" {params.prefix} 2>{log}
"""

rule SEACR_peaks_lenient:
input:
chip = "filtered_bedgraph/{chip_sample}_host.fragments.bedgraph",
control = lambda wildcards: "filtered_bedgraph/"+get_control(wildcards.chip_sample)+"_host.fragments.bedgraph" if get_control(wildcards.chip_sample)
else []
output:
"SEACR/{chip_sample}_host.lenient.bed"
log: "SEACR/logs/{chip_sample}.log"
params:
fdr = lambda wildcards,input: fdr if not input.control else "",
prefix = os.path.join(outdir,"SEACR/{chip_sample}_host"),
script=os.path.join(maindir, "shared","tools/SEACR-1.3/SEACR_1.3.sh")
conda: CONDA_SEACR_ENV
shell: """
bash {params.script} {input.chip} {input.control} {params.fdr} "non" "lenient" {params.prefix} 2>{log}
"""


rule SEACR_peak_qc:
rule SEACR_peak_stringent_qc:
input:
bam = "split_bam/{sample}_host.bam",
peaks = "SEACR/{sample}_host.stringent.bed"
Expand All @@ -275,7 +292,7 @@ rule SEACR_peak_qc:
params:
genome_index = genome_index
benchmark:
"SEACR/.benchmark/SEACR_peak_qc.{sample}_host.benchmark"
"SEACR/.benchmark/SEACR_peak_qc.{sample}_host_stringend.benchmark"
conda: CONDA_SHARED_ENV
shell: """
# get the number of peaks
Expand All @@ -299,4 +316,35 @@ rule SEACR_peak_qc:
printf "peak_count\tFRiP\tpeak_genome_coverage\n%d\t%5.3f\t%6.4f\n" $peak_count $frip $genomecov > {output.qc}
"""


rule SEACR_peak_lenient_qc:
input:
bam = "split_bam/{sample}_host.bam",
peaks = "SEACR/{sample}_host.lenient.bed"
output:
qc = "SEACR/{sample}_host.lenient_peaks.qc.txt"
params:
genome_index = genome_index
benchmark:
"SEACR/.benchmark/SEACR_peak_qc.{sample}_host_lenient.benchmark"
conda: CONDA_SHARED_ENV
shell: """
# get the number of peaks
peak_count=`wc -l < {input.peaks}`
#get the number of mapped reads
mapped_reads=`samtools view -c -F 4 {input.bam}`
#calculate the number of alignments overlapping the peaks
# exclude reads flagged as unmapped (unmapped reads will be reported when using -L)
reads_in_peaks=`samtools view -c -F 4 -L {input.peaks} {input.bam}`
# calculate Fraction of Reads In Peaks
frip=`bc -l <<< "$reads_in_peaks/$mapped_reads"`
# compute peak genome coverage
peak_len=`awk '{{total+=$3-$2}}END{{print total}}' {input.peaks}`
genome_size=`awk '{{total+=$3-$2}}END{{print total}}' {params.genome_index}`
genomecov=`bc -l <<< "$peak_len/$genome_size"`
# write peak-based QC metrics to output file
printf "peak_count\tFRiP\tpeak_genome_coverage\n%d\t%5.3f\t%6.4f\n" $peak_count $frip $genomecov > {output.qc}
"""
8 changes: 4 additions & 4 deletions snakePipes/workflows/ChIP-seq/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -209,11 +209,11 @@ def run_macs2():
def run_seacr():
if (peakCaller == "SEACR"):
if useSpikeInForNorm:
file_list = expand("SEACR/{chip_sample}_host.stringent.bed", chip_sample = chip_samples)
file_list.append(expand("SEACR/{chip_sample}_host.stringent_peaks.qc.txt",chip_sample=chip_samples))
file_list = expand("SEACR/{chip_sample}_host.{mode}.bed", chip_sample = chip_samples,mode=["stringent","lenient"])
file_list.append(expand("SEACR/{chip_sample}_host.{mode}_peaks.qc.txt",chip_sample=chip_samples,mode=["stringent","lenient"]))
else:
file_list = expand("SEACR/{chip_sample}.filtered.stringent.bed", chip_sample = chip_samples)
file_list.append(expand("SEACR/{chip_sample}.filtered.stringent_peaks.qc.txt",chip_sample=chip_samples))
file_list = expand("SEACR/{chip_sample}.filtered.{mode}.bed", chip_sample = chip_samples,mode=["stringent","lenient"])
file_list.append(expand("SEACR/{chip_sample}.{mode}.stringent_peaks.qc.txt",chip_sample=chip_samples,mode=["stringent","lenient"]))
return (file_list)
return ([])

Expand Down

0 comments on commit da715d2

Please sign in to comment.