From 317fa2c2ab039345f951f8fc8f2773a5a241a3d0 Mon Sep 17 00:00:00 2001 From: Cyriac Kandoth Date: Mon, 1 Feb 2021 11:37:59 -0800 Subject: [PATCH] maf2vcf: Sanity checks for invalid alleles --- maf2vcf.pl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/maf2vcf.pl b/maf2vcf.pl index a3e1a6e..de55fb5 100644 --- a/maf2vcf.pl +++ b/maf2vcf.pl @@ -196,10 +196,6 @@ $qual = "." if( !defined $qual or $qual eq "" ); $filter = "." if( !defined $filter or $filter eq "" ); - # If normal alleles are unset in the MAF (quite common), assume homozygous reference - $n_al1 = $ref if( $n_al1 eq "" ); - $n_al2 = $ref if( $n_al2 eq "" ); - # Make sure we have at least one variant allele. If not, die with an error if( $al1 eq "" and $al2 eq "" ) { die "ERROR: MAF line $line_count has no variant allele specified at $chr:$pos!\n"; @@ -219,6 +215,15 @@ # Blank out the dashes (or other weird chars) used with indels ( $ref, $al1, $al2, $n_al1, $n_al2 ) = map{s/^(\?|-|0)+$//; $_} ( $ref, $al1, $al2, $n_al1, $n_al2 ); + # If normal alleles are unset in the MAF (quite common), assume homozygous reference + $n_al1 = $ref if( $n_al1 eq "" ); + $n_al2 = $ref if( $n_al2 eq "" ); + + # Do a sanity check on all the alleles + unless( $al1=~m/^[ACGT-]*$/ and $al2=~m/^[ACGT-]*$/ and $n_al1=~m/^[ACGT-]*$/ and $n_al2=~m/^[ACGT-]*$/ ) { + die "ERROR: MAF line $line_count (at $chr:$pos) contains invalid alleles in Tumor_Seq_Allele or Match_Norm_Seq_Allele columns!\n"; + } + # To simplify code coming up below, ensure that $al2 is always non-REF ( $al1, $al2 ) = ( $al2, $al1 ) if( $al2 eq $ref ); # Do the same for the normal alleles, though it makes no difference if both are REF