Skip to content

Commit

Permalink
feat: remove max af 1 from all workflows (#1338)
Browse files Browse the repository at this point in the history
Removed: 
- Removed bcftools command for setting soft-filter with maximum allele-frequency, which eventually leads to a hard removal of these variants.
  • Loading branch information
mathiasbio authored Jan 4, 2024
1 parent f318b1e commit 7397351
Show file tree
Hide file tree
Showing 4 changed files with 1 addition and 12 deletions.
2 changes: 0 additions & 2 deletions BALSAMIC/constants/variant_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
"field": "INFO",
},
"MQ": {"tag_value": 40, "filter_name": "balsamic_low_mq", "field": "INFO"},
"AF_max": {"tag_value": 1, "filter_name": "balsamic_af_one", "field": "INFO"},
"AF_min": {"tag_value": 0.007, "filter_name": "balsamic_low_af", "field": "INFO"},
"pop_freq": {
"tag_value": 0.005,
Expand Down Expand Up @@ -50,7 +49,6 @@
"filter_name": "balsamic_low_tumor_dp",
"field": "FORMAT",
},
"AF_max": {"tag_value": 1, "filter_name": "balsamic_af_one", "field": "FORMAT"},
"AF_min": {"tag_value": 0.05, "filter_name": "balsamic_low_af", "field": "FORMAT"},
"pop_freq": {
"tag_value": 0.001,
Expand Down
2 changes: 0 additions & 2 deletions BALSAMIC/models/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,6 @@ class VarCallerFilter(BaseModel):
Attributes:
AD: VCFAttributes (required); minimum allelic depth
AF_min: VCFAttributes (optional); minimum allelic fraction
AF_max: VCFAttributes (optional); maximum allelic fraction
MQ: VCFAttributes (optional); minimum mapping quality
DP: VCFAttributes (optional); minimum read depth
pop_freq: VCFAttributes (optional); maximum gnomad allele frequency
Expand All @@ -214,7 +213,6 @@ class VarCallerFilter(BaseModel):

AD: Optional[VCFAttributes] = None
AF_min: Optional[VCFAttributes] = None
AF_max: Optional[VCFAttributes] = None
MQ: Optional[VCFAttributes] = None
DP: Optional[VCFAttributes] = None
pop_freq: Optional[VCFAttributes] = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ if config["analysis"]["sequencing_type"] == 'wgs' and config["analysis"]["analys
DP = [SENTIEON_CALLER.DP.tag_value, SENTIEON_CALLER.DP.filter_name],
AD = [SENTIEON_CALLER.AD.tag_value, SENTIEON_CALLER.AD.filter_name],
AF_min = [SENTIEON_CALLER.AF_min.tag_value, SENTIEON_CALLER.AF_min.filter_name],
AF_max = [SENTIEON_CALLER.AF_max.tag_value, SENTIEON_CALLER.AF_max.filter_name],
strand_reads = [SENTIEON_CALLER.strand_reads.tag_value, SENTIEON_CALLER.strand_reads.filter_name],
qss = [SENTIEON_CALLER.qss.tag_value, SENTIEON_CALLER.qss.filter_name],
sor = [SENTIEON_CALLER.sor.tag_value, SENTIEON_CALLER.sor.filter_name],
Expand All @@ -36,7 +35,6 @@ bcftools view -f PASS,triallelic_site --threads {threads} --regions-file {input.
| bcftools filter --threads {threads} --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \
| bcftools filter --threads {threads} --include 'FORMAT/AD[0:1] > {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \
| bcftools filter --threads {threads} --include 'FORMAT/AF > {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' \
| bcftools filter --threads {threads} --include 'FORMAT/AF < {params.AF_max[0]}' --soft-filter '{params.AF_max[1]}' --mode '+' \
| bcftools filter --threads {threads} --include 'SUM(FORMAT/QSS)/SUM(FORMAT/AD) >= {params.qss[0]}' --soft-filter '{params.qss[1]}' --mode '+' \
| bcftools filter --threads {threads} --include 'FORMAT/ALT_F1R2 > {params.strand_reads[0]} && (FORMAT/ALT_F1R2 > 0 && FORMAT/ALT_F2R1 > {params.strand_reads[0]} && FORMAT/REF_F1R2 > {params.strand_reads[0]} && FORMAT/REF_F2R1 > {params.strand_reads[0]})' --soft-filter '{params.strand_reads[1]}' --mode '+' \
| bcftools filter --threads {threads} --include "INFO/SOR < {params.sor[0]}" --soft-filter '{params.sor[1]}' --mode '+' \
Expand All @@ -60,7 +58,6 @@ elif config["analysis"]["sequencing_type"] == 'wgs' and config["analysis"]["anal
AD = [SENTIEON_CALLER.AD.tag_value, SENTIEON_CALLER.AD.filter_name],
DP = [SENTIEON_CALLER.DP.tag_value, SENTIEON_CALLER.DP.filter_name],
AF_min = [SENTIEON_CALLER.AF_min.tag_value, SENTIEON_CALLER.AF_min.filter_name],
AF_max = [SENTIEON_CALLER.AF_max.tag_value, SENTIEON_CALLER.AF_max.filter_name],
case_name = config["analysis"]["case_id"],
threads:
get_threads(cluster_config, 'bcftools_quality_filter_tnscope_tumor_normal')
Expand All @@ -72,7 +69,6 @@ bcftools view -f PASS,triallelic_site {input.vcf_snv} \
| bcftools filter --threads {threads} --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]} || SUM(FORMAT/AD[1:0]+FORMAT/AD[1:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \
| bcftools filter --threads {threads} --include 'FORMAT/AD[0:1] >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \
| bcftools filter --threads {threads} --include 'FORMAT/AF[0] >= {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' \
| bcftools filter --threads {threads} --include 'FORMAT/AF[0] < {params.AF_max[0]}' --soft-filter '{params.AF_max[1]}' --mode '+' \
| bcftools view -f PASS,triallelic_site -O z -o {output.vcf_snv_research};

tabix -p vcf -f {output.vcf_snv_research};
Expand All @@ -94,7 +90,6 @@ if config["analysis"]["sequencing_type"] == 'targeted' and config["analysis"]["a
AD=[VARDICT.AD.tag_value, VARDICT.AD.filter_name],
DP=[VARDICT.DP.tag_value, VARDICT.DP.filter_name],
AF_min=[VARDICT.AF_min.tag_value, VARDICT.AF_min.filter_name],
AF_max=[VARDICT.AF_max.tag_value, VARDICT.AF_max.filter_name],
case_name = config["analysis"]["case_id"],
threads:
get_threads(cluster_config,'bcftools_quality_filter_vardict_tumor_only')
Expand All @@ -107,7 +102,6 @@ bcftools filter --include 'INFO/MQ >= {params.MQ[0]}' --soft-filter '{params.MQ[
bcftools filter --include 'INFO/DP >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' | \
bcftools filter --include 'INFO/VD >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' | \
bcftools filter --include 'INFO/AF >= {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' | \
bcftools filter --include 'INFO/AF < {params.AF_max[0]}' --soft-filter '{params.AF_max[1]}' --mode '+' | \
bcftools view -f PASS -o {output.vcf_filtered} -O z;

tabix -p vcf -f {output.vcf_filtered};
Expand Down Expand Up @@ -154,7 +148,6 @@ elif config["analysis"]["sequencing_type"] == 'targeted' and config["analysis"][
AD=[VARDICT.AD.tag_value, VARDICT.AD.filter_name],
DP=[VARDICT.DP.tag_value, VARDICT.DP.filter_name],
AF_min=[VARDICT.AF_min.tag_value, VARDICT.AF_min.filter_name],
AF_max=[VARDICT.AF_max.tag_value, VARDICT.AF_max.filter_name],
possible_germline="balsamic_possible_germline",
case_name = config["analysis"]["case_id"],
threads:
Expand All @@ -168,7 +161,6 @@ bcftools filter --include 'SMPL_MIN(FMT/MQ) >= {params.MQ[0]}' --soft-filter '{p
bcftools filter --include 'INFO/DP >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' | \
bcftools filter --include 'INFO/VD >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' | \
bcftools filter --include 'INFO/AF >= {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' | \
bcftools filter --include 'INFO/AF < {params.AF_max[0]}' --soft-filter '{params.AF_max[1]}' --mode '+' | \
bcftools filter --exclude 'INFO/STATUS ~ "germline/i"' --soft-filter '{params.possible_germline}' --mode '+' | \
bcftools view -f PASS -o {output.vcf_filtered} -O z;

Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ Removed:
* Realignment step for TGA workflow https://github.com/Clinical-Genomics/BALSAMIC/pull/1272
* Archived/outdated workflows and scripts https://github.com/Clinical-Genomics/BALSAMIC/pull/1296
* Sed command to convert CNVpytor integer to float, deprecated by updated CNVpytor version https://github.com/Clinical-Genomics/BALSAMIC/pull/1310
* Removed max AF 1 filter from bcftools https://github.com/Clinical-Genomics/BALSAMIC/pull/1338
* Extra samtools sort command from WGS cases https://github.com/Clinical-Genomics/BALSAMIC/pull/1334

[12.0.2]
Expand Down

0 comments on commit 7397351

Please sign in to comment.