-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathgenotype2vcf.pl
69 lines (61 loc) · 1.69 KB
/
genotype2vcf.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!perl
use Getopt::Std;
getopts "i:o:";
if ((!defined $opt_i)|| (!defined $opt_o) ) {
die "************************************************************************
Usage: perl genotype2vcf.pl -i genotype.file -o snp.vcf
-h : help and usage
-i : genotype input
-o : vcf output
************************************************************************\n";
}else{
print "************************************************************************\n";
print "Version 1.0\n";
print "Copyright to Tanger: tanger.zhang\@gmail.com\n";
print "RUNNING ...\n";
print "************************************************************************\n";
}
my %iupac = (
"A" => "a a",
"C" => "c c",
"G" => "g g",
"T" => "t t",
"M" => "a c",
"K" => "g t",
"Y" => "c t",
"R" => "a g",
"W" => "a t",
"S" => "c g",
"-" => "- -",
"N" => "N N"
);
open(OUT, ">$opt_o") or die"";
print OUT "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT\n";
$id=0;
open(IN, $opt_i) or die"";
while(<IN>){
chomp;
my %basedb = ();
my $i; my @data; my $chrn; my $posi; my $refN; my $nucl; my $alt;
@data = split(/\s+/,$_);
$chrn = $data[0];
$posi = $data[1];
$refN = $data[2];
foreach $i(3..$#data){
($a,$b) = split(/\s+/,$iupac{$data[$i]});
$a = uc $a; $b = uc $b;
next if($a eq $refN); next if($b eq $refN);
$basedb{$a} += 1;
$basedb{$b} += 1;
}
$num_NA = $basedb{'-'};
foreach $nucl(sort {$basedb{$b}<=>$basedb{$a}} keys %basedb){
$alt = $nucl;
last;
}
$id++;
$id_name = "SNP".$id;
print OUT "$chrn $posi $id_name $refN $alt qual PASS LitChi67individuals .\n" if($basedb{$alt}>$num_NA);
}
close IN;
close OUT;