-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathConvertFastatoPhylip
103 lines (91 loc) · 2.62 KB
/
ConvertFastatoPhylip
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#! /usr/bin/perl -w
######################################################################################
# This script takes alignment sequence fasta file and converts it to phylip file
# Date: 2007-01-29
# Usage: perl Fasta2Phylip.pl inputFastaFile outputPhilipFile
######################################################################################
use strict;
my $usage = "Usage: perl Fasta2Phylip.pl inputFastaFile outputPhilipFile\n";
my $infile = shift or die($usage); # input nexus file
my $outFile = shift or die($usage); # output phylip file
my $unixFile = $infile.".unix";
ConvertToUnix ($infile, $unixFile);
ChangetoPhylip($unixFile, $outFile);
unlink ($unixFile);
print "All done!\n";
exit 0;
######################################################################################
sub ConvertToUnix {
my ($infile, $unixFile) = @_;
open (IN, $infile) or die "Couldn't open $infile: $!\n";
open (OUT, ">$unixFile") or die "Couldn't open $unixFile: $!\n";
my @buffer = <IN>;
close IN;
my $line = "";
foreach my $element (@buffer) {
$line .= $element;
}
if ($line =~ /\r\n/) {
$line =~ s/\r//g;
}elsif ($line =~ /\r/) {
$line =~ s/\r/\n/g;
}
print OUT $line;
close OUT;
}
######################################################################################
sub ChangetoPhylip {
my ($unixFile, $phylipFile) = @_;
my $seqCount = 0;
my $seq = my $seqName = "";
open IN, $unixFile or die "Couldn't open $unixFile\n";
while (my $line = <IN>) {
chomp $line;
next if $line =~ /^\s*$/;
if ($line =~ /^>/) {
$seqCount++;
}elsif ($seqCount == 1) {
$seq .= $line;
}
}
close IN;
my $seqLen = length $seq;
open(IN, $unixFile) || die "Can't open $unixFile\n";
open(OUT, ">$phylipFile") || die "Cant open $phylipFile\n";
print OUT $seqCount," ",$seqLen,"\n";
$seqCount = 0;
$seq = "";
while(my $line = <IN>) {
chomp $line;
next if($line =~ /^\s*$/);
if($line =~ /^>(\S+)/) {
if ($seqCount) {
my $len = length $seq;
if ($len == $seqLen) {
print OUT "$seqName\t$seq\n";
$seq = $seqName = "";
}else {
unlink $unixFile;
unlink $phylipFile;
die "Error: the sequence length of $seqName is not same as others.\n";
}
}
$seqName = $1;
$seqCount++;
}else {
$seq .= $line;
}
}
close IN;
# check the length of last sequence
my $len = length $seq;
if ($len == $seqLen) {
print OUT "$seqName\t$seq\n";
}else {
unlink $unixFile;
unlink $phylipFile;
die "Error: the sequence length of $seqName is not same as others.\n";
}
close IN;
close OUT;
}