forked from lstein/modENCODE-GBrowse-Cloud
-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_gbrowse_binary_filenames.pl
executable file
·64 lines (53 loc) · 1.67 KB
/
extract_gbrowse_binary_filenames.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/perl
# step 1 of the pipeline
# get a list of all the files that need to be transferred from xfer-cloud:/browser_data
# and copied to AWS in order to run the GBrowse instance there...
use strict;
use File::Find;
use constant MYSQL_ROOT=>'/browser_data/mysql_dumps_new';
use constant CONF_ROOT =>'/browser_data/conf';
my %files;
warn "extracting binary filenames from conf files...\n";
-d CONF_ROOT or die CONF_ROOT," does not seem to be mounted. Are you running this on modencode.oicr.on.ca?";
my @files = ();
find({wanted => sub {push @files,$File::Find::name if /\.conf(\.gz)?$/},
follow => 1},
CONF_ROOT);
for my $f (@files) {
print STDERR "Extracting $f...";
my $cmd = "zcat $f|";
my @matches;
open my $in,$cmd or die "$cmd: $!";
while (<$in>) {
chomp;
my @f = m!'?(/browser_data[^']+)'?!g or next;
push @matches,@f;
}
print STDERR scalar @matches," candidate files\n";
$files{$_}++ foreach @matches;
}
warn "extracting binary filenames from mysql dumps...\n";
-d MYSQL_ROOT or die MYSQL_ROOT," does not seem to be mounted. Are you running this on modencode.oicr.on.ca?";
@files = glob(MYSQL_ROOT .'/*.gz');
for my $f (@files) {
print STDERR "Extracting $f...";
my $cmd = "zcat $f|";
my @matches;
open my $in,$cmd or die "$cmd: $!";
while (<$in>) {
chomp;
my @f = m!'(/[^']+)'!g or next;
push @matches,@f;
}
print STDERR scalar @matches," candidate files\n";
$files{$_}++ foreach @matches;
}
# add index files
for my $f (keys %files) {
if ($f =~ /\.(fa|fasta)$/i) {
$files{"$f.fai"}++;
} elsif ($f =~ /\.bam$/i) {
$files{"$f.bai"}++;
}
}
print join("\n",sort keys %files),"\n";