-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsplitGtf.pl
99 lines (70 loc) · 2.12 KB
/
splitGtf.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env perl
=head2 NAME
Split a gtf file when it's too large to process.
=head2 SYNOPSIS
perl splitGtf.pl <file.gtf> <output_prefix>
=head2 AUTHOR
Zhigang Li lzg0063(at)126.com 2014-11-23
=cut
################################################################################
# Options
################################################################################
BEGIN { use FindBin qw($Bin); use lib "$Bin"; }
use 5.010;
use strict;
use warnings;
use Getopt::Long;
use Pod::Usage;
use lib::basic qw(logMsg logError logWarn logStd);
#use Data::Dumper;
#parameters
my $geneNumPerFile = 10000;
#help
if ( !@ARGV ) {
pod2usage( -noperldoc => 1, -verbose => 2 );
exit(0);
}
Getopt::Long::GetOptions( "help|h" => sub { pod2usage( -noperldoc => 1, -verbose => 2 ); exit(0); }, );
################################################################################
# Main
################################################################################
&logMsg("Starting...");
my %fileHash;
open FI, "$ARGV[0]" || die;
while (<FI>) {
next if ( $_ =~ /^#/ );
next if ( $_ !~ /\S+/ );
$_ =~ s/\R//;
my @field = split /\t/, $_;
if ( $field[2] eq 'exon' || $field[2] eq 'EXON' || $field[2] eq 'CDS' || $field[2] eq 'cds' ) {
$field[8] =~ /gene_id "([^"]+)"/;
my $id = $1;
if ( exists $fileHash{$id} ) {
$fileHash{$id} .= $_ . "\n";
}
else {
$fileHash{$id} = $_ . "\n";
}
}
}
close FI;
my $num = 0;
my $fileId = 1;
my $oFile = $ARGV[1] .'.'.$fileId.".gtf";
open OO, ">$oFile" || die;
foreach ( keys %fileHash ) {
$num++;
if ( $num > $geneNumPerFile ) {
close OO;
$fileId++;
$num = 0;
my $oFile = $ARGV[1] .'.'.$fileId.".gtf";
open OO, ">$oFile" || die;
}
print OO $fileHash{$_};
}
close OO;
&logMsg("Finishing...");
################################################################################
# Subroutines
################################################################################